Skip to content

Commit c5a2008

Browse files
committed
Introduce demonstrator for background caches in embedding
This is demonstrating how we can incorporate background caches into the MC-pipeline. We can use a sophistication in which we copy background files in multiple stages, since not all files are needed at the same time. This allows for hiding the copy latency and we can overlay the copy of large hit files with signal transport simulation etc. Moreover, we copy only what is needed by a workflow. On a bookkeeping level we still need to verify compatibility of background files with the curent software stack etc.
1 parent 6c26061 commit c5a2008

File tree

1 file changed

+110
-38
lines changed

1 file changed

+110
-38
lines changed

MC/run/PWGHF/create_embedding_workflow.py

Lines changed: 110 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,11 @@
1515
parser.add_argument('-j',help='number of workers (if applicable)', default=8)
1616
parser.add_argument('-e',help='simengine', default='TGeant4')
1717
parser.add_argument('-o',help='output workflow file', default='workflow.json')
18+
parser.add_argument('--rest-digi',action='store_true',help='treat smaller sensors in a single digitization')
1819
parser.add_argument('--embedding',help='whether to embedd into background', default=True)
19-
parser.add_argument('--noIPC',help='disable shared memory in DPL')
20+
parser.add_argument('--noIPC',help='disable shared memory in DPL')
21+
parser.add_argument('--upload-bkg-to',help='where to upload background files (alien)')
22+
parser.add_argument('--use-bkg-from',help='use background from GRID instead of simulating from scratch')
2023
args = parser.parse_args()
2124
print (args)
2225

@@ -59,12 +62,51 @@ def getDPL_global_options(bigshm=False,nosmallrate=False):
5962
return "-b --run --session " + str(taskcounter) + ' --driver-client-backend ws://' + (' --rate 1','')[nosmallrate]
6063

6164
doembedding=True if args.embedding=='True' or args.embedding==True else False
65+
usebkgcache=args.use_bkg_from!=None
6266

6367
if doembedding:
64-
# ---- background transport task -------
65-
BKGtask=createTask(name='bkgsim', lab=["GEANT"], cpu='8')
66-
BKGtask['cmd']='o2-sim -e ' + SIMENGINE + ' -j ' + str(NWORKERS) + ' -n ' + str(NBKGEVENTS) + ' -g pythia8hi ' + str(MODULES) + ' -o bkg --configFile ${O2DPG_ROOT}/MC/config/common/ini/basic.ini'
67-
workflow['stages'].append(BKGtask)
68+
if not usebkgcache:
69+
# ---- background transport task -------
70+
BKGtask=createTask(name='bkgsim', lab=["GEANT"], cpu='8')
71+
BKGtask['cmd']='o2-sim -e ' + SIMENGINE + ' -j ' + str(NWORKERS) + ' -n ' + str(NBKGEVENTS) + ' -g pythia8hi ' + str(MODULES) + ' -o bkg --configFile ${O2DPG_ROOT}/MC/config/common/ini/basic.ini; for d in tf*; do ln -nfs bkg* ${d}/; done'
72+
workflow['stages'].append(BKGtask)
73+
74+
if args.upload_bkg_to!=None:
75+
BKGuploadtask=createTask(name='bkgupload', needs=[BKGtask['name']], cpu='0')
76+
BKGuploadtask['cmd']='alien.py mkdir ' + args.upload_bkg_to + ';'
77+
BKGuploadtask['cmd']+='alien.py cp -f bkg* ' + args.upload_bkg_to + ';'
78+
workflow['stages'].append(BKGuploadtask)
79+
80+
else:
81+
# when using background caches, we have multiple smaller tasks
82+
# this split makes sense as they are needed at different stages
83+
# 1: --> download bkg_MCHeader.root + grp + geometry
84+
# 2: --> download bkg_Hit files (individually)
85+
# 3: --> download bkg_Kinematics
86+
87+
# Step 1: header and link files
88+
BKG_HEADER_task=createTask(name='bkgdownloadheader', cpu='0', lab=['BKGCACHE'])
89+
BKG_HEADER_task['cmd']='alien.py cp ' + args.use_bkg_from + 'bkg_MCHeader.root .'
90+
BKG_HEADER_task['cmd']=BKG_HEADER_task['cmd'] + ';alien.py cp ' + args.use_bkg_from + 'bkg_geometry.root .'
91+
BKG_HEADER_task['cmd']=BKG_HEADER_task['cmd'] + ';alien.py cp ' + args.use_bkg_from + 'bkg_grp.root .'
92+
workflow['stages'].append(BKG_HEADER_task)
93+
94+
# we split some detectors for improved load balancing --> the precise list needs to be made consistent with geometry and active sensors
95+
smallsensorlist = [ "ITS", "TOF", "FT0", "FV0", "FDD", "MCH", "MID", "MFT", "HMP", "EMC", "PHS", "CPV" ]
96+
97+
BKG_HITDOWNLOADER_TASKS={}
98+
for det in [ 'TPC', 'TRD' ] + smallsensorlist:
99+
if usebkgcache:
100+
BKG_HITDOWNLOADER_TASKS[det] = createTask(str(det) + 'hitdownload', cpu='0', lab=['BKGCACHE'])
101+
BKG_HITDOWNLOADER_TASKS[det]['cmd'] = 'alien.py cp ' + args.use_bkg_from + 'bkg_Hits' + str(det) + '.root .'
102+
workflow['stages'].append(BKG_HITDOWNLOADER_TASKS[det])
103+
else:
104+
BKG_HITDOWNLOADER_TASKS[det] = None
105+
106+
if usebkgcache:
107+
BKG_KINEDOWNLOADER_TASK = createTask(name='bkgkinedownload', cpu='0', lab=['BKGCACHE'])
108+
BKG_KINEDOWNLOADER_TASK['cmd'] = 'alien.py cp ' + args.use_bkg_from + 'bkg_Kine.root .'
109+
workflow['stages'].append(BKG_KINEDOWNLOADER_TASK)
68110

69111
# loop over timeframes
70112
for tf in range(1, NTIMEFRAMES + 1):
@@ -91,18 +133,15 @@ def getDPL_global_options(bigshm=False,nosmallrate=False):
91133
--ptHatMax=' + str(PTHATMAX)
92134
workflow['stages'].append(SGN_CONFIG_task)
93135

94-
if doembedding:
95-
# link background files to current working dir for this timeframe
96-
LinkBKGtask=createTask(name='linkbkg_'+str(tf), needs=[BKGtask['name']], tf=tf, cwd=timeframeworkdir)
97-
LinkBKGtask['cmd']='ln -nsf ../bkg*.root .'
98-
workflow['stages'].append(LinkBKGtask)
99-
100136
# transport signals
101137
signalprefix='sgn_' + str(tf)
102138
signalneeds=[ SGN_CONFIG_task['name'] ]
103-
embeddinto= "--embedIntoFile bkg_Kine.root" if doembedding else ""
139+
embeddinto= "--embedIntoFile ../bkg_MCHeader.root" if doembedding else ""
104140
if doembedding:
105-
signalneeds = signalneeds + [ BKGtask['name'], LinkBKGtask['name'] ]
141+
if not usebkgcache:
142+
signalneeds = signalneeds + [ BKGtask['name'] ]
143+
else:
144+
signalneeds = signalneeds + [ BKG_HEADER_task['name'] ]
106145
SGNtask=createTask(name='sgnsim_'+str(tf), needs=signalneeds, tf=tf, cwd='tf'+str(tf), lab=["GEANT"], cpu='5.')
107146
SGNtask['cmd']='o2-sim -e '+str(SIMENGINE) + ' ' + str(MODULES) + ' -n ' + str(NSIGEVENTS) + ' -j ' + str(NWORKERS) + ' -g pythia8 '\
108147
+ ' -o ' + signalprefix + ' ' + embeddinto
@@ -114,8 +153,13 @@ def getDPL_global_options(bigshm=False,nosmallrate=False):
114153
# We need to be careful here and distinguish between embedding and non-embedding cases
115154
# (otherwise it can confuse itstpcmatching, see O2-2026). This is because only one of the GRPs is updated during digitization.
116155
if doembedding:
117-
LinkGRPFileTask=createTask(name='linkGRP_'+str(tf), needs=[BKGtask['name']], tf=tf, cwd=timeframeworkdir)
118-
LinkGRPFileTask['cmd']='ln -nsf bkg_grp.root o2sim_grp.root ; ln -nsf bkg_geometry.root o2sim_geometry.root'
156+
LinkGRPFileTask=createTask(name='linkGRP_'+str(tf), needs=[BKG_HEADER_task['name'] if usebkgcache else BKGtask['name'] ], tf=tf, cwd=timeframeworkdir)
157+
LinkGRPFileTask['cmd']='''
158+
ln -nsf ../bkg_grp.root o2sim_grp.root;
159+
ln -nsf ../bkg_geometry.root o2sim_geometry.root;
160+
ln -nsf ../bkg_geometry.root bkg_geometry.root;
161+
ln -nsf ../bkg_grp.root bkg_grp.root
162+
'''
119163
else:
120164
LinkGRPFileTask=createTask(name='linkGRP_'+str(tf), needs=[SGNtask['name']], tf=tf, cwd=timeframeworkdir)
121165
LinkGRPFileTask['cmd']='ln -nsf ' + signalprefix + '_grp.root o2sim_grp.root ; ln -nsf ' + signalprefix + '_geometry.root o2sim_geometry.root'
@@ -126,37 +170,61 @@ def getDPL_global_options(bigshm=False,nosmallrate=False):
126170
simsoption=' --sims ' + ('bkg,'+signalprefix if doembedding else signalprefix)
127171

128172
ContextTask=createTask(name='digicontext_'+str(tf), needs=[SGNtask['name'], LinkGRPFileTask['name']], tf=tf,
129-
cwd=timeframeworkdir, lab=["DIGI"], cpu='8')
173+
cwd=timeframeworkdir, lab=["DIGI"], cpu='1')
130174
ContextTask['cmd'] = 'o2-sim-digitizer-workflow --only-context --interactionRate 50000 ' + getDPL_global_options() + ' -n ' + str(args.ns) + simsoption
131175
workflow['stages'].append(ContextTask)
132176

133-
TPCDigitask=createTask(name='tpcdigi_'+str(tf), needs=[ContextTask['name'], LinkGRPFileTask['name']],
177+
tpcdigineeds=[ContextTask['name'], LinkGRPFileTask['name']]
178+
if usebkgcache:
179+
tpcdigineeds += [ BKG_HITDOWNLOADER_TASKS['TPC']['name'] ]
180+
181+
TPCDigitask=createTask(name='tpcdigi_'+str(tf), needs=tpcdigineeds,
134182
tf=tf, cwd=timeframeworkdir, lab=["DIGI"], cpu='8', mem='16000')
135-
TPCDigitask['cmd'] = 'o2-sim-digitizer-workflow ' + getDPL_global_options(bigshm=True) + ' -n ' + str(args.ns) + simsoption + ' --onlyDet TPC --interactionRate 50000 --tpc-lanes ' + str(NWORKERS) + ' --incontext ' + str(CONTEXTFILE)
183+
TPCDigitask['cmd'] = ('','ln -nfs ../bkg_HitsTPC.root . ;')[doembedding]
184+
TPCDigitask['cmd'] += 'o2-sim-digitizer-workflow ' + getDPL_global_options(bigshm=True) + ' -n ' + str(args.ns) + simsoption + ' --onlyDet TPC --interactionRate 50000 --tpc-lanes ' + str(NWORKERS) + ' --incontext ' + str(CONTEXTFILE)
136185
workflow['stages'].append(TPCDigitask)
137186

138-
TRDDigitask=createTask(name='trddigi_'+str(tf), needs=[ContextTask['name']], tf=tf, cwd=timeframeworkdir, lab=["DIGI"], cpu='8', mem='8000')
139-
TRDDigitask['cmd'] = 'o2-sim-digitizer-workflow ' + getDPL_global_options() + ' -n ' + str(args.ns) + simsoption + ' --onlyDet TRD --interactionRate 50000 --configKeyValues \"TRDSimParams.digithreads=' + str(NWORKERS) + '\" --incontext ' + str(CONTEXTFILE)
187+
trddigineeds = [ContextTask['name']]
188+
if usebkgcache:
189+
trddigineeds += [ BKG_HITDOWNLOADER_TASKS['TRD']['name'] ]
190+
TRDDigitask=createTask(name='trddigi_'+str(tf), needs=trddigineeds,
191+
tf=tf, cwd=timeframeworkdir, lab=["DIGI"], cpu='8', mem='8000')
192+
TRDDigitask['cmd'] = ('','ln -nfs ../bkg_HitsTRD.root . ;')[doembedding]
193+
TRDDigitask['cmd'] += 'o2-sim-digitizer-workflow ' + getDPL_global_options() + ' -n ' + str(args.ns) + simsoption + ' --onlyDet TRD --interactionRate 50000 --configKeyValues \"TRDSimParams.digithreads=' + str(NWORKERS) + '\" --incontext ' + str(CONTEXTFILE)
140194
workflow['stages'].append(TRDDigitask)
141195

142-
# RESTDigitask=createTask(name='restdigi_'+str(tf), needs=[ContextTask['name'], LinkGRPFileTask['name']], tf=tf, cwd=timeframeworkdir, lab=["DIGI"], cpu='medium', mem='8000')
143-
# RESTDigitask['cmd'] = 'o2-sim-digitizer-workflow ' + getDPL_global_options() + ' -n ' + str(args.ns) + simsoption + ' --skipDet TRD,TPC --interactionRate 50000 --incontext ' + str(CONTEXTFILE)
144-
# workflow['stages'].append(RESTDigitask)
145-
146-
# we split the digitizers for improved load balancing --> the precise list needs to be made consistent with geometry and active sensors
147-
sensorlist = [ "ITS", "TOF", "FT0", "FV0", "FDD", "MCH", "MID", "MFT", "HMP", "EMC", "PHS", "CPV" ]
148196
# these are digitizers which are single threaded
149-
def createRestDigiTask(name):
150-
t = createTask(name=name, needs=[ContextTask['name']], tf=tf, cwd=timeframeworkdir, lab=["DIGI","SMALLDIGI"], cpu='1')
151-
t['cmd'] = 'o2-sim-digitizer-workflow ' + getDPL_global_options() + ' -n ' + str(args.ns) + simsoption + ' --onlyDet ' + str(det) + ' --interactionRate 50000 --incontext ' + str(CONTEXTFILE)
152-
workflow['stages'].append(t)
153-
return t
197+
def createRestDigiTask(name, det='ALLSMALLER'):
198+
tneeds = needs=[ContextTask['name']]
199+
if det=='ALLSMALLER':
200+
if usebkgcache:
201+
for d in smallsensorlist:
202+
tneeds += [ BKG_HITDOWNLOADER_TASKS[d]['name'] ]
203+
t = createTask(name=name, needs=tneeds,
204+
tf=tf, cwd=timeframeworkdir, lab=["DIGI","SMALLDIGI"], cpu='8')
205+
t['cmd'] = ('','ln -nfs ../bkg_Hits*.root . ;')[doembedding]
206+
t['cmd'] += 'o2-sim-digitizer-workflow ' + getDPL_global_options(nosmallrate=True) + ' -n ' + str(args.ns) + simsoption + ' --skipDet TPC,TRD --interactionRate 50000 --incontext ' + str(CONTEXTFILE)
207+
workflow['stages'].append(t)
208+
return t
209+
210+
else:
211+
if usebkgcache:
212+
tneeds += [ BKG_HITDOWNLOADER_TASKS[det]['name'] ]
213+
t = createTask(name=name, needs=tneeds,
214+
tf=tf, cwd=timeframeworkdir, lab=["DIGI","SMALLDIGI"], cpu='1')
215+
t['cmd'] = ('','ln -nfs ../bkg_Hits' + str(det) + '.root . ;')[doembedding]
216+
t['cmd'] += 'o2-sim-digitizer-workflow ' + getDPL_global_options() + ' -n ' + str(args.ns) + simsoption + ' --onlyDet ' + str(det) + ' --interactionRate 50000 --incontext ' + str(CONTEXTFILE)
217+
workflow['stages'].append(t)
218+
return t
154219

155220
det_to_digitask={}
156221

157-
for det in sensorlist:
158-
name=str(det).lower() + "digi_"+str(tf)
159-
t=createRestDigiTask(name)
222+
if args.rest_digi==True:
223+
det_to_digitask['ALLSMALLER']=createRestDigiTask("restdigi_"+str(tf))
224+
225+
for det in smallsensorlist:
226+
name=str(det).lower() + "digi_" + str(tf)
227+
t = det_to_digitask['ALLSMALLER'] if args.rest_digi==True else createRestDigiTask(name, det)
160228
det_to_digitask[det]=t
161229

162230
# -----------
@@ -204,12 +272,16 @@ def createRestDigiTask(name):
204272
# -----------
205273
# produce AOD
206274
# -----------
207-
208-
AODtask = createTask(name='aod_'+str(tf), needs=[PVFINDERtask['name'], TOFRECOtask['name'], TRDTRACKINGtask['name']], tf=tf, cwd=timeframeworkdir, lab=["AOD"])
209-
AODtask['cmd'] = 'o2-aod-producer-workflow --aod-writer-keep dangling --aod-writer-resfile \"AO2D\" --aod-writer-resmode UPDATE --aod-timeframe-id ' + str(tf) + ' ' + getDPL_global_options(bigshm=True)
275+
aodneeds = [PVFINDERtask['name'], TOFRECOtask['name'], TRDTRACKINGtask['name']]
276+
if usebkgcache:
277+
aodneeds += [ BKG_KINEDOWNLOADER_TASK['name'] ]
278+
279+
AODtask = createTask(name='aod_'+str(tf), needs=aodneeds, tf=tf, cwd=timeframeworkdir, lab=["AOD"], mem='16000', cpu='1')
280+
AODtask['cmd'] = ('','ln -nfs ../bkg_Kine.root . ;')[doembedding]
281+
AODtask['cmd'] += 'o2-aod-producer-workflow --aod-writer-keep dangling --aod-writer-resfile \"AO2D\" --aod-writer-resmode UPDATE --aod-timeframe-id ' + str(tf) + ' ' + getDPL_global_options(bigshm=True)
282+
AODtask['cmd'] = 'echo \"hello\"' #-> skipping for moment since not optimized
210283
workflow['stages'].append(AODtask)
211284

212-
213285
def trimString(cmd):
214286
return ' '.join(cmd.split())
215287

0 commit comments

Comments
 (0)