From 953eae23f802d9b5793ba104ce71b6981e513f46 Mon Sep 17 00:00:00 2001 From: swenzel Date: Thu, 13 Feb 2025 15:23:02 +0100 Subject: [PATCH 1/2] ability to specify requirement for grid_submit --- GRID/utils/grid_submit.sh | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/GRID/utils/grid_submit.sh b/GRID/utils/grid_submit.sh index 84cc73fcc..ba4b2a843 100755 --- a/GRID/utils/grid_submit.sh +++ b/GRID/utils/grid_submit.sh @@ -313,6 +313,17 @@ if [[ "${IS_ALIEN_JOB_SUBMITTER}" ]]; then # -) Special singularity / Apptainer image [[ ! ${IMAGESPEC} ]] && IMAGESPEC=$(grep "^#JDL_IMAGE=" ${SCRIPT} | sed 's/#JDL_IMAGE=//') echo "Found Container Image to be ${IMAGESPEC}" + + # -) Requirements-Spec + REQUIRESPEC=$(grep "^#JDL_REQUIRE=" ${SCRIPT} | sed 's/#JDL_REQUIRE=//') + if [ ! "${REQUIRESPEC}" ]; then + echo "No Requirement setting found; Setting to default" + REQUIRESPEC="{member(other.GridPartitions,"${GRIDPARTITION:-multicore_8}")};" + echo "Requirement is ${REQUIRESPEC}" + fi + + echo "Requirements JDL entry is ${REQUIRESPEC}" + # -) PackageSpec [[ ! ${PACKAGESPEC} ]] && PACKAGESPEC=$(grep "^#JDL_PACKAGE=" ${SCRIPT} | sed 's/#JDL_PACKAGE=//') echo "Found PackagesSpec to be ${PACKAGESPEC}" @@ -358,6 +369,8 @@ EOF echo "Packages = {"${PACKAGESPEC}"};" >> "${MY_JOBNAMEDATE}.jdl" # add package spec [ $ERROROUTPUTSPEC ] && echo "OutputErrorE = {"${ERROROUTPUTSPEC}"};" >> "${MY_JOBNAMEDATE}.jdl" # add error output files [ $IMAGESPEC ] && echo "DebugTag = {\"${IMAGESPEC}\"};" >> "${MY_JOBNAMEDATE}.jdl" # use special singularity image to run job + # echo "Requirements = {"${REQUIREMENTSSPEC}"} >> "${MY_JOBNAMEDATE}.jdl" + [ $REQUIRESPEC ] && echo "Requirements = ${REQUIRESPEC}" >> "${MY_JOBNAMEDATE}.jdl" # "output_arch.zip:output/*@disk=2", # "checkpoint*.tar@disk=2" From a916c239bc453ece31220b99ce0cf30e8bef209f Mon Sep 17 00:00:00 2001 From: swenzel Date: Wed, 12 Feb 2025 15:17:11 +0100 Subject: [PATCH 2/2] Ability to query DPL options in a specific environment Also fixing subprocess invocation to prevent DPL hang --- MC/bin/o2dpg_sim_config.py | 45 +++++++++++++++++++++++++++++++----- MC/bin/o2dpg_sim_workflow.py | 3 ++- MC/run/ANCHOR/anchorMC.sh | 16 +++++++++++++ 3 files changed, 57 insertions(+), 7 deletions(-) diff --git a/MC/bin/o2dpg_sim_config.py b/MC/bin/o2dpg_sim_config.py index 8ff128663..b971a182d 100755 --- a/MC/bin/o2dpg_sim_config.py +++ b/MC/bin/o2dpg_sim_config.py @@ -1,6 +1,7 @@ from functools import lru_cache import subprocess import re +import os def create_sim_config(args): # creates a generic simulation config @@ -143,11 +144,43 @@ def constructConfigKeyArg(config): arg = arg + '"' return arg +def load_env_file(env_file): + """Transform an environment file generated with 'export > env.txt' into a python dictionary.""" + env_vars = {} + with open(env_file, "r") as f: + for line in f: + line = line.strip() + + # Ignore empty lines or comments + if not line or line.startswith("#"): + continue + + # Remove 'declare -x ' if present + if line.startswith("declare -x "): + line = line.replace("declare -x ", "", 1) + + # Handle case: "FOO" without "=" (assign empty string) + if "=" not in line: + key, value = line.strip(), "" + else: + key, value = line.split("=", 1) + value = value.strip('"') # Remove surrounding quotes if present + + env_vars[key.strip()] = value + return env_vars + # some functions to determine dpl option availability on the fly -def parse_dpl_help_output(executable): +def parse_dpl_help_output(executable, envfile): """Parses the --help full output of an executable to extract available options.""" try: - output = subprocess.check_output([executable, "--help", "full"], text=True) + env = os.environ.copy() + if envfile != None: + print ("Loading from alternative environment") + env = load_env_file(envfile) + + # the DEVNULL is important for o2-dpl workflows not to hang on non-interactive missing tty environments + # it is cleaner that the echo | trick + output = subprocess.check_output([executable, "--help", "full"], env=env, text=True, stdin=subprocess.DEVNULL, timeout = 10) except subprocess.CalledProcessError: return {}, {} @@ -172,11 +205,11 @@ def parse_dpl_help_output(executable): return sections, inverse_lookup @lru_cache(maxsize=10) -def get_dpl_options_for_executable(executable): +def get_dpl_options_for_executable(executable, envfile): """Returns available options and inverse lookup for a given executable, caching the result.""" - return parse_dpl_help_output(executable) + return parse_dpl_help_output(executable, envfile) -def option_if_available(executable, option): +def option_if_available(executable, option, envfile = None): """Checks if an option is available for a given executable and returns it as a string. Otherwise empty string""" - _, inverse_lookup = get_dpl_options_for_executable(executable) + _, inverse_lookup = get_dpl_options_for_executable(executable, envfile) return ' ' + option if option in inverse_lookup else '' diff --git a/MC/bin/o2dpg_sim_workflow.py b/MC/bin/o2dpg_sim_workflow.py index 090dc5dea..0731784db 100755 --- a/MC/bin/o2dpg_sim_workflow.py +++ b/MC/bin/o2dpg_sim_workflow.py @@ -1154,11 +1154,12 @@ def getDigiTaskName(det): # TODO: Is this still used? tpc_corr_scaling_options = anchorConfig.get('tpc-corr-scaling','') + tpc_envfile = 'env_async.env' if environ.get('ALIEN_JDL_O2DPG_ASYNC_RECO_TAG') is not None else None TPCRECOtask=createTask(name='tpcreco_'+str(tf), needs=tpcreconeeds, tf=tf, cwd=timeframeworkdir, lab=["RECO"], relative_cpu=3/8, mem='16000') TPCRECOtask['cmd'] = '${O2_ROOT}/bin/o2-tpc-reco-workflow ' + getDPL_global_options(bigshm=True) + ' --input-type clusters --output-type tracks,send-clusters-per-sector ' \ + putConfigValuesNew(["GPU_global","TPCGasParam", "TPCCorrMap", "GPU_rec_tpc", "trackTuneParams"], {"GPU_proc.ompThreads":NWORKERS_TF} | tpcLocalCFreco) + ('',' --disable-mc')[args.no_mc_labels] \ + tpc_corr_scaling_options + tpc_corr_options_mc \ - + option_if_available('o2-tpc-reco-workflow', '--tpc-mc-time-gain') + + option_if_available('o2-tpc-reco-workflow', '--tpc-mc-time-gain', envfile=tpc_envfile) workflow['stages'].append(TPCRECOtask) diff --git a/MC/run/ANCHOR/anchorMC.sh b/MC/run/ANCHOR/anchorMC.sh index 0935b6ffb..da9d48cb9 100755 --- a/MC/run/ANCHOR/anchorMC.sh +++ b/MC/run/ANCHOR/anchorMC.sh @@ -147,6 +147,18 @@ SEED=${ALIEN_PROC_ID:-${SEED:-1}} ONCVMFS=0 +# TODO: +# (a) detect if there was an O2DPG_OVERLOAD; because we need to handle this correctly during +# purging, reloading +# (b) apply "tpc-mc-time-gain" optionally to tpc reco --- but this is done; so we really need a replacement method + and a 2-stage workflow production +if [ "${ALIEN_JDL_O2DPG_OVERWRITE}" ]; then + echo "Setting O2DPG_ROOT to overwritten path" + export O2DPG_ROOT=${ALIEN_JDL_O2DPG_OVERWRITE} +fi + +export > env_base.env + if ! declare -F module > /dev/null; then module() { eval "$(/usr/bin/modulecmd bash "$@")"; @@ -249,6 +261,10 @@ if [ "${ALIEN_JDL_O2DPG_ASYNC_RECO_TAG}" ]; then echo "Restoring initial environment" module --no-pager restore initial_modules.list module saverm initial_modules.list + if [ "${ALIEN_JDL_O2DPG_OVERWRITE}" ]; then + echo "Setting back O2DPG_ROOT to overwritten path ${ALIEN_JDL_O2DPG_OVERWRITE}" + export O2DPG_ROOT=${ALIEN_JDL_O2DPG_OVERWRITE} + fi fi #<----- END OF part that should run under a clean alternative software environment if this was given ------