From 717f14f4bb961096de4c026b732774e535a6f1a8 Mon Sep 17 00:00:00 2001 From: saganatt <8majak8@gmail.com> Date: Wed, 9 Oct 2024 15:12:11 +0200 Subject: [PATCH 01/34] Add all used scripts --- machine_learning_hep/check_parquet.py | 46 +++++++++++++++++++++++ machine_learning_hep/gather-inputs-fdd.sh | 16 ++++++++ machine_learning_hep/run-fdd-batch.sh | 42 +++++++++++++++++++++ machine_learning_hep/run-fdd-precise.sh | 1 + machine_learning_hep/run-lc.sh | 14 +++++++ machine_learning_hep/run-prob-batch.sh | 31 +++++++++++++++ 6 files changed, 150 insertions(+) create mode 100644 machine_learning_hep/check_parquet.py create mode 100755 machine_learning_hep/gather-inputs-fdd.sh create mode 100755 machine_learning_hep/run-fdd-batch.sh create mode 120000 machine_learning_hep/run-fdd-precise.sh create mode 100755 machine_learning_hep/run-lc.sh create mode 100755 machine_learning_hep/run-prob-batch.sh diff --git a/machine_learning_hep/check_parquet.py b/machine_learning_hep/check_parquet.py new file mode 100644 index 0000000000..72cfcdd22e --- /dev/null +++ b/machine_learning_hep/check_parquet.py @@ -0,0 +1,46 @@ +import argparse +import numpy as np +import pandas as pd +import matplotlib.pyplot as plt + +def plot_parquet(df): + print(df["fY"]) + print(df["fY"][~np.isinf(df["fY"])]) + + ds_fin = df["fY"][~np.isinf(df["fY"])] + + fig = plt.figure(figsize=(20, 15)) + ax = plt.subplot(1, 1, 1) + #ax.set_xlim([0, (df["fY"].mean()*2)]) + plt.hist(ds_fin.values, bins=50) + ax.set_xlabel("fY", fontsize=30) + ax.set_ylabel("Entries", fontsize=30) + fig.savefig("fY.png", bbox_inches='tight') + plt.close(fig) + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("infile", help="file to process") + args = parser.parse_args() + + df = pd.read_parquet(args.infile) + print(f"df columns: {df.columns}") + print(f"full df:\n{df}") + + print(f"df mean\n{df.mean()}") + + print(f"df[0]\n{df.iloc[0]}") + + df_sel = df[df["y_test_probxgboostbkg"] <= 0.02] + print(f"sel df:\n{df_sel}") + #df_sel = df_sel[df_sel["y_test_probxgboostnon_prompt"] <= 0.08] + #print(f"sel df non-prompt:\n{df_sel}") + + #print(f'ML columns:\n{df["fMlBkgScore"]}\n{df["fMlPromptScore"]}\n{df["fMlNonPromptScore"]}') + #df_sel = df[df["fMlBkgScore"] >= 0.02] + #df_sel = df[df["fMlNonPromptScore"] < 0.15] + #print(f'df sel ML columns:\n{df_sel["fMlBkgScore"]}\n{df_sel["fMlNonPromptScore"]}') + + +if __name__ == '__main__': + main() diff --git a/machine_learning_hep/gather-inputs-fdd.sh b/machine_learning_hep/gather-inputs-fdd.sh new file mode 100755 index 0000000000..96b332c4b4 --- /dev/null +++ b/machine_learning_hep/gather-inputs-fdd.sh @@ -0,0 +1,16 @@ +#!/bin/bash + +MLHEP_DIR="/data8/majak/MLHEP" +OUTPUT_DIR="${MLHEP_DIR}/input-fd-10092024" + +RESDIR_PATTERN="${MLHEP_DIR}/results-2308-hyp-ml_fd_precise_" + +for dir in ${RESDIR_PATTERN}1224_split* ; do + suffix=${dir##${RESDIR_PATTERN}} + echo $suffix + + cp "${dir}/LHC22pp_mc/Results/prod_LHC24d3b/resultsmctot/efficienciesLcpKpiRun3analysis.root" \ + "${OUTPUT_DIR}/efficienciesLcpKpiRun3analysis_${suffix}.root" + #cp "${dir}/LHC22pp/Results/resultsdatatot/Yields_LcpKpi_Run3analysis.root" \ + # "${OUTPUT_DIR}/yieldsLcpKpiRun3analysis-${suffix}.root" +done diff --git a/machine_learning_hep/run-fdd-batch.sh b/machine_learning_hep/run-fdd-batch.sh new file mode 100755 index 0000000000..9b750a0585 --- /dev/null +++ b/machine_learning_hep/run-fdd-batch.sh @@ -0,0 +1,42 @@ +#!/bin/bash + +source "${HOME}/Run3Analysisvalidation/exec/utilities.sh" + +WORKDIR="${HOME}/MachineLearningHEP/machine_learning_hep/" +DATABASE="${WORKDIR}/data/data_run3/database_ml_parameters_LcToPKPi_multiclass_fdd" +DATABASE_EXT="${DATABASE}.yml" +RESDIR_PATTERN="results-2308-hyp-ml_1224_split_widerange_" + +BKG_1216=0.60 +BKG_1624=0.60 + +for fd in $(seq 0.00 0.01 0.00) ; do + echo "bkg ${BKG_1216} ${BKG_1624} fd ${fd}" + + RESDIR="${RESDIR_PATTERN}bkg_${BKG_1216}_${BKG_1624}_fd_${fd}" + RESPATH="/data8/majak/MLHEP/${RESDIR}/" + + rm -rf "${RESPATH}" + + CUR_DB="${DATABASE}_edit_fd${fd}_bkg_${BKG_1216}_${BKG_1624}.yml" + cp "${DATABASE_EXT}" "${CUR_DB}" || ErrExit "Could not copy database" + + sed -i "s/%resdir%/${RESDIR}/g" "${CUR_DB}" || ErrExit "Could not edit database" + sed -i "s/%bkg1216%/${BKG_1216}/g" "${CUR_DB}" || ErrExit "Could not edit database" + sed -i "s/%bkg1624%/${BKG_1624}/g" "${CUR_DB}" || ErrExit "Could not edit database" + sed -i "s/%fd12%/${fd}/g" "${CUR_DB}" || ErrExit "Could not edit database" + sed -i "s/%fd23%/${fd}/g" "${CUR_DB}" || ErrExit "Could not edit database" + sed -i "s/%fd34%/${fd}/g" "${CUR_DB}" || ErrExit "Could not edit database" + sed -i "s/%fd45%/${fd}/g" "${CUR_DB}" || ErrExit "Could not edit database" + sed -i "s/%fd56%/${fd}/g" "${CUR_DB}" || ErrExit "Could not edit database" + sed -i "s/%fd68%/${fd}/g" "${CUR_DB}" || ErrExit "Could not edit database" + sed -i "s/%fd812%/${fd}/g" "${CUR_DB}" || ErrExit "Could not edit database" + sed -i "s/%fd1216%/${fd}/g" "${CUR_DB}" || ErrExit "Could not edit database" + sed -i "s/%fd1624%/${fd}/g" "${CUR_DB}" || ErrExit "Could not edit database" + + mlhep --log-file "logfile_fd${fd}_bkg_${BKG_1216}_${BKG_1624}.log" \ + --run-config submission/default_complete.yml \ + --database-analysis ${CUR_DB} \ + --delete \ + > "debug_fd${fd}_bkg_${BKG_1216}_${BKG_1624}.txt" 2>&1 || ErrExit "Analysis failed" +done diff --git a/machine_learning_hep/run-fdd-precise.sh b/machine_learning_hep/run-fdd-precise.sh new file mode 120000 index 0000000000..9c226abd43 --- /dev/null +++ b/machine_learning_hep/run-fdd-precise.sh @@ -0,0 +1 @@ +/home/maja/CERN-useful-scripts/run-fdd-precise.sh \ No newline at end of file diff --git a/machine_learning_hep/run-lc.sh b/machine_learning_hep/run-lc.sh new file mode 100755 index 0000000000..aec6f6ead0 --- /dev/null +++ b/machine_learning_hep/run-lc.sh @@ -0,0 +1,14 @@ +#!/bin/bash + +if [ "$#" -ne 2 ]; then + echo "Wrong number of parameters" + exit 1 +fi + +DB=$1 +LOGFILE=$2 + +mlhep --log-file ${LOGFILE} \ + --run-config submission/default_complete.yml \ + --database-analysis ${DB} \ + --delete diff --git a/machine_learning_hep/run-prob-batch.sh b/machine_learning_hep/run-prob-batch.sh new file mode 100755 index 0000000000..fe447d668b --- /dev/null +++ b/machine_learning_hep/run-prob-batch.sh @@ -0,0 +1,31 @@ +#!/bin/bash + +source "${HOME}/Run3Analysisvalidation/exec/utilities.sh" + +WORKDIR="${HOME}/MachineLearningHEP/machine_learning_hep/" +DATABASE="${WORKDIR}/data/data_run3/database_ml_parameters_LcToPKPi_multiclass_ana_hyp_ml" +DATABASE_EXT="${DATABASE}.yml" + +BKG_812=0.25 +BKG_1224=0.30 +#for bkg in $(seq 0.20 0.05 0.5) ; do + echo "bkg ${BKG_812} ${BKG_1224}" + + RESDIR_PATTERN="results-2207-hyp-ml_bkg" + RESDIR="${RESDIR_PATTERN}_${BKG_812}_${BKG_1224}\/" + RESPATH="/data8/majak/MLHEP/${RESDIR}" + + rm -rf "${RESPATH}" + + CUR_DB="${DATABASE}_edit_bkg${bkg}.yml" + cp "${DATABASE_EXT}" "${CUR_DB}" || ErrExit "Could not copy database" + + sed -i "s/${RESDIR_PATTERN}.*/${RESDIR}/g" "${CUR_DB}" || ErrExit "Could not edit database" + sed -i "s/%bkg812%/${BKG_812}/g" "${CUR_DB}" || ErrExit "Could not edit database" + sed -i "s/%bkg1224%/${BKG_1224}/g" "${CUR_DB}" || ErrExit "Could not edit database" + + mlhep --log-file "logfile_bkg${bkg}.log" \ + --run-config submission/default_complete.yml \ + --database-analysis ${CUR_DB} \ + --delete +#done From dde7ac68369add07ab3e4d1f6e235af091f4101e Mon Sep 17 00:00:00 2001 From: saganatt <8majak8@gmail.com> Date: Mon, 28 Oct 2024 17:58:18 +0100 Subject: [PATCH 02/34] Shellcheck for scripts --- machine_learning_hep/gather-inputs-fdd.sh | 6 +++--- machine_learning_hep/run-fdd-batch.sh | 2 +- machine_learning_hep/run-prob-batch.sh | 6 +++--- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/machine_learning_hep/gather-inputs-fdd.sh b/machine_learning_hep/gather-inputs-fdd.sh index 96b332c4b4..d4690834a3 100755 --- a/machine_learning_hep/gather-inputs-fdd.sh +++ b/machine_learning_hep/gather-inputs-fdd.sh @@ -5,9 +5,9 @@ OUTPUT_DIR="${MLHEP_DIR}/input-fd-10092024" RESDIR_PATTERN="${MLHEP_DIR}/results-2308-hyp-ml_fd_precise_" -for dir in ${RESDIR_PATTERN}1224_split* ; do - suffix=${dir##${RESDIR_PATTERN}} - echo $suffix +for dir in "${RESDIR_PATTERN}"1224_split* ; do + suffix=${dir##"${RESDIR_PATTERN}"} + echo "$suffix" cp "${dir}/LHC22pp_mc/Results/prod_LHC24d3b/resultsmctot/efficienciesLcpKpiRun3analysis.root" \ "${OUTPUT_DIR}/efficienciesLcpKpiRun3analysis_${suffix}.root" diff --git a/machine_learning_hep/run-fdd-batch.sh b/machine_learning_hep/run-fdd-batch.sh index 9b750a0585..99e549c8ad 100755 --- a/machine_learning_hep/run-fdd-batch.sh +++ b/machine_learning_hep/run-fdd-batch.sh @@ -36,7 +36,7 @@ for fd in $(seq 0.00 0.01 0.00) ; do mlhep --log-file "logfile_fd${fd}_bkg_${BKG_1216}_${BKG_1624}.log" \ --run-config submission/default_complete.yml \ - --database-analysis ${CUR_DB} \ + --database-analysis "${CUR_DB}" \ --delete \ > "debug_fd${fd}_bkg_${BKG_1216}_${BKG_1624}.txt" 2>&1 || ErrExit "Analysis failed" done diff --git a/machine_learning_hep/run-prob-batch.sh b/machine_learning_hep/run-prob-batch.sh index fe447d668b..3cccfd0ee8 100755 --- a/machine_learning_hep/run-prob-batch.sh +++ b/machine_learning_hep/run-prob-batch.sh @@ -8,7 +8,7 @@ DATABASE_EXT="${DATABASE}.yml" BKG_812=0.25 BKG_1224=0.30 -#for bkg in $(seq 0.20 0.05 0.5) ; do +for bkg in $(seq 0.20 0.05 0.5) ; do echo "bkg ${BKG_812} ${BKG_1224}" RESDIR_PATTERN="results-2207-hyp-ml_bkg" @@ -26,6 +26,6 @@ BKG_1224=0.30 mlhep --log-file "logfile_bkg${bkg}.log" \ --run-config submission/default_complete.yml \ - --database-analysis ${CUR_DB} \ + --database-analysis "${CUR_DB}" \ --delete -#done +done From 31c19c1f9f2cc37d74d4fdf5f02a1ac8db7bff14 Mon Sep 17 00:00:00 2001 From: saganatt <8majak8@gmail.com> Date: Tue, 15 Jul 2025 14:39:25 +0200 Subject: [PATCH 03/34] Update scripts to pass6 --- machine_learning_hep/check_parquet.py | 43 +++++++++----- machine_learning_hep/gather-inputs-fdd.sh | 13 +++-- machine_learning_hep/run-fdd-batch.sh | 68 ++++++++++++++--------- machine_learning_hep/run-lc.sh | 11 ++-- machine_learning_hep/run-prob-batch.sh | 31 ----------- 5 files changed, 85 insertions(+), 81 deletions(-) delete mode 100755 machine_learning_hep/run-prob-batch.sh diff --git a/machine_learning_hep/check_parquet.py b/machine_learning_hep/check_parquet.py index 72cfcdd22e..c2510128ca 100644 --- a/machine_learning_hep/check_parquet.py +++ b/machine_learning_hep/check_parquet.py @@ -25,20 +25,37 @@ def main(): df = pd.read_parquet(args.infile) print(f"df columns: {df.columns}") - print(f"full df:\n{df}") - - print(f"df mean\n{df.mean()}") - - print(f"df[0]\n{df.iloc[0]}") - - df_sel = df[df["y_test_probxgboostbkg"] <= 0.02] - print(f"sel df:\n{df_sel}") - #df_sel = df_sel[df_sel["y_test_probxgboostnon_prompt"] <= 0.08] + #print(f"full df:\n{df}") + print(df.size) + + # 1-2: 36715937 + # sum from data: 1615501228 + # 2-3: 45167231 + # 3-4: 71973551 + # 4-5: 34874429 + # 5-6: + # 6-7: + # 7-8: + # 8-10: + # 10-12: + # 12-24: + + #print(f"df mean\n{df.mean()}") + + #print(f"df[0]\n{df.iloc[0]}") + + #df_sel = df[df["y_test_probxgboostbkg"] > 1.0] + #print(f"sel df bkg:\n{df_sel}") + #df_sel = df[df["y_test_probxgboostnon_prompt"] < 0.00] #print(f"sel df non-prompt:\n{df_sel}") - - #print(f'ML columns:\n{df["fMlBkgScore"]}\n{df["fMlPromptScore"]}\n{df["fMlNonPromptScore"]}') - #df_sel = df[df["fMlBkgScore"] >= 0.02] - #df_sel = df[df["fMlNonPromptScore"] < 0.15] + #df_sel = df[df["y_test_probxgboostprompt"] < 0.00] + #print(f"sel df prompt:\n{df_sel}") + + print(f'ML columns:\n{df["fMlBkgScore"]}\n{df["fMlPromptScore"]}\n{df["fMlNonPromptScore"]}') + df_sel = df[df["fMlBkgScore"] > 1.0] + print(f'df sel ML bkg:\n{df_sel["fMlBkgScore"]}') + df_sel = df[df["fMlNonPromptScore"] < 0.0] + print(f'df sel ML non-prompt:\n{df_sel["fMlNonPromptScore"]}') #print(f'df sel ML columns:\n{df_sel["fMlBkgScore"]}\n{df_sel["fMlNonPromptScore"]}') diff --git a/machine_learning_hep/gather-inputs-fdd.sh b/machine_learning_hep/gather-inputs-fdd.sh index d4690834a3..a9aedc0e56 100755 --- a/machine_learning_hep/gather-inputs-fdd.sh +++ b/machine_learning_hep/gather-inputs-fdd.sh @@ -1,16 +1,17 @@ #!/bin/bash MLHEP_DIR="/data8/majak/MLHEP" -OUTPUT_DIR="${MLHEP_DIR}/input-fd-10092024" +OUTPUT_DIR="${MLHEP_DIR}/input-fd-012025" -RESDIR_PATTERN="${MLHEP_DIR}/results-2308-hyp-ml_fd_precise_" +RESDIR_PATTERN="${MLHEP_DIR}/results-24012025-hyp-ml-luigi-cuts_" +PERM_PATTERN="fd_" -for dir in "${RESDIR_PATTERN}"1224_split* ; do +for dir in "${RESDIR_PATTERN}${PERM_PATTERN}"0.[0-9][0-9][0-9]* ; do suffix=${dir##"${RESDIR_PATTERN}"} echo "$suffix" - cp "${dir}/LHC22pp_mc/Results/prod_LHC24d3b/resultsmctot/efficienciesLcpKpiRun3analysis.root" \ + cp "${dir}/LHC24pp_mc/Results/resultsmctot/efficienciesLcpKpiRun3analysis.root" \ "${OUTPUT_DIR}/efficienciesLcpKpiRun3analysis_${suffix}.root" - #cp "${dir}/LHC22pp/Results/resultsdatatot/Yields_LcpKpi_Run3analysis.root" \ - # "${OUTPUT_DIR}/yieldsLcpKpiRun3analysis-${suffix}.root" + #cp "${dir}/LHC23pp_pass4/Results/resultsdatatot/yields_LcpKpi_Run3analysis.root" \ + # "${OUTPUT_DIR}/yieldsLcpKpiRun3analysis-${suffix}-fixed-sigma.root" done diff --git a/machine_learning_hep/run-fdd-batch.sh b/machine_learning_hep/run-fdd-batch.sh index 99e549c8ad..b7e9e88846 100755 --- a/machine_learning_hep/run-fdd-batch.sh +++ b/machine_learning_hep/run-fdd-batch.sh @@ -3,40 +3,56 @@ source "${HOME}/Run3Analysisvalidation/exec/utilities.sh" WORKDIR="${HOME}/MachineLearningHEP/machine_learning_hep/" -DATABASE="${WORKDIR}/data/data_run3/database_ml_parameters_LcToPKPi_multiclass_fdd" +DATABASE="database_ml_parameters_LcToPKPi_multiclass_fdd" DATABASE_EXT="${DATABASE}.yml" -RESDIR_PATTERN="results-2308-hyp-ml_1224_split_widerange_" +DATABASE_PATH="${WORKDIR}/data/data_run3/${DATABASE_EXT}" +#RESDIR_PATTERN="results-24022025-prompt" +RESDIR_PATTERN="results-24022025-newtrain-ptshape-prompt" -BKG_1216=0.60 -BKG_1624=0.60 +bkg=0.00 +for fd in $(seq 0.000 0.005 0.000) ; do + echo "fd ${fd}" -for fd in $(seq 0.00 0.01 0.00) ; do - echo "bkg ${BKG_1216} ${BKG_1624} fd ${fd}" - - RESDIR="${RESDIR_PATTERN}bkg_${BKG_1216}_${BKG_1624}_fd_${fd}" + #suffix="fd_${fd}" + suffix="" + RESDIR="${RESDIR_PATTERN}${suffix}" RESPATH="/data8/majak/MLHEP/${RESDIR}/" - rm -rf "${RESPATH}" + #rm -rf "${RESPATH}" - CUR_DB="${DATABASE}_edit_fd${fd}_bkg_${BKG_1216}_${BKG_1624}.yml" - cp "${DATABASE_EXT}" "${CUR_DB}" || ErrExit "Could not copy database" + CUR_DB="${DATABASE}_edit_fd${fd}.yml" + cp "${DATABASE_PATH}" "${CUR_DB}" || ErrExit "Could not copy database" sed -i "s/%resdir%/${RESDIR}/g" "${CUR_DB}" || ErrExit "Could not edit database" - sed -i "s/%bkg1216%/${BKG_1216}/g" "${CUR_DB}" || ErrExit "Could not edit database" - sed -i "s/%bkg1624%/${BKG_1624}/g" "${CUR_DB}" || ErrExit "Could not edit database" - sed -i "s/%fd12%/${fd}/g" "${CUR_DB}" || ErrExit "Could not edit database" - sed -i "s/%fd23%/${fd}/g" "${CUR_DB}" || ErrExit "Could not edit database" - sed -i "s/%fd34%/${fd}/g" "${CUR_DB}" || ErrExit "Could not edit database" - sed -i "s/%fd45%/${fd}/g" "${CUR_DB}" || ErrExit "Could not edit database" - sed -i "s/%fd56%/${fd}/g" "${CUR_DB}" || ErrExit "Could not edit database" - sed -i "s/%fd68%/${fd}/g" "${CUR_DB}" || ErrExit "Could not edit database" - sed -i "s/%fd812%/${fd}/g" "${CUR_DB}" || ErrExit "Could not edit database" - sed -i "s/%fd1216%/${fd}/g" "${CUR_DB}" || ErrExit "Could not edit database" - sed -i "s/%fd1624%/${fd}/g" "${CUR_DB}" || ErrExit "Could not edit database" - - mlhep --log-file "logfile_fd${fd}_bkg_${BKG_1216}_${BKG_1624}.log" \ - --run-config submission/default_complete.yml \ + sed -i "s/%bkg01%/${bkg}/g" "${CUR_DB}" || ErrExit "Could not edit database" + sed -i "s/%bkg12%/${bkg}/g" "${CUR_DB}" || ErrExit "Could not edit database" + sed -i "s/%bkg23%/${bkg}/g" "${CUR_DB}" || ErrExit "Could not edit database" + sed -i "s/%bkg34%/${bkg}/g" "${CUR_DB}" || ErrExit "Could not edit database" + sed -i "s/%bkg45%/${bkg}/g" "${CUR_DB}" || ErrExit "Could not edit database" + sed -i "s/%bkg56%/${bkg}/g" "${CUR_DB}" || ErrExit "Could not edit database" + sed -i "s/%bkg67%/${bkg}/g" "${CUR_DB}" || ErrExit "Could not edit database" + sed -i "s/%bkg78%/${bkg}/g" "${CUR_DB}" || ErrExit "Could not edit database" + sed -i "s/%bkg810%/${bkg}/g" "${CUR_DB}" || ErrExit "Could not edit database" + sed -i "s/%bkg1012%/${bkg}/g" "${CUR_DB}" || ErrExit "Could not edit database" + sed -i "s/%bkg1216%/${bkg}/g" "${CUR_DB}" || ErrExit "Could not edit database" + sed -i "s/%bkg1624%/${bkg}/g" "${CUR_DB}" || ErrExit "Could not edit database" + sed -i "s/%fd01%/${fd}/g" "${CUR_DB}" || ErrExit "Could not edit database" + sed -i "s/%fd%/${fd}/g" "${CUR_DB}" || ErrExit "Could not edit database" + sed -i "s/%fd%/${fd}/g" "${CUR_DB}" || ErrExit "Could not edit database" + sed -i "s/%fd%/${fd}/g" "${CUR_DB}" || ErrExit "Could not edit database" + sed -i "s/%fd%/${fd}/g" "${CUR_DB}" || ErrExit "Could not edit database" + sed -i "s/%fd%/${fd}/g" "${CUR_DB}" || ErrExit "Could not edit database" + sed -i "s/%fd%/${fd}/g" "${CUR_DB}" || ErrExit "Could not edit database" + sed -i "s/%fd%/${fd}/g" "${CUR_DB}" || ErrExit "Could not edit database" + sed -i "s/%fd%/${fd}/g" "${CUR_DB}" || ErrExit "Could not edit database" + sed -i "s/%fd%/${fd}/g" "${CUR_DB}" || ErrExit "Could not edit database" + sed -i "s/%fd%/${fd}/g" "${CUR_DB}" || ErrExit "Could not edit database" + sed -i "s/%fd%/${fd}/g" "${CUR_DB}" || ErrExit "Could not edit database" + + yes | mlhep --log-file "logfile_${suffix}.log" \ + -a Run3analysis \ + --run-config submission/analyzer.yml \ --database-analysis "${CUR_DB}" \ --delete \ - > "debug_fd${fd}_bkg_${BKG_1216}_${BKG_1624}.txt" 2>&1 || ErrExit "Analysis failed" + > "debug_${suffix}.txt" 2>&1 || ErrExit "Analysis failed" done diff --git a/machine_learning_hep/run-lc.sh b/machine_learning_hep/run-lc.sh index aec6f6ead0..f7c2e19e3e 100755 --- a/machine_learning_hep/run-lc.sh +++ b/machine_learning_hep/run-lc.sh @@ -1,14 +1,15 @@ #!/bin/bash -if [ "$#" -ne 2 ]; then +if [ "$#" -ne 3 ]; then echo "Wrong number of parameters" exit 1 fi DB=$1 -LOGFILE=$2 +CONFIG=$2 +LOGFILE=$3 mlhep --log-file ${LOGFILE} \ - --run-config submission/default_complete.yml \ - --database-analysis ${DB} \ - --delete + -a Run3analysis \ + --run-config ${CONFIG} \ + --database-analysis ${DB} diff --git a/machine_learning_hep/run-prob-batch.sh b/machine_learning_hep/run-prob-batch.sh deleted file mode 100755 index 3cccfd0ee8..0000000000 --- a/machine_learning_hep/run-prob-batch.sh +++ /dev/null @@ -1,31 +0,0 @@ -#!/bin/bash - -source "${HOME}/Run3Analysisvalidation/exec/utilities.sh" - -WORKDIR="${HOME}/MachineLearningHEP/machine_learning_hep/" -DATABASE="${WORKDIR}/data/data_run3/database_ml_parameters_LcToPKPi_multiclass_ana_hyp_ml" -DATABASE_EXT="${DATABASE}.yml" - -BKG_812=0.25 -BKG_1224=0.30 -for bkg in $(seq 0.20 0.05 0.5) ; do - echo "bkg ${BKG_812} ${BKG_1224}" - - RESDIR_PATTERN="results-2207-hyp-ml_bkg" - RESDIR="${RESDIR_PATTERN}_${BKG_812}_${BKG_1224}\/" - RESPATH="/data8/majak/MLHEP/${RESDIR}" - - rm -rf "${RESPATH}" - - CUR_DB="${DATABASE}_edit_bkg${bkg}.yml" - cp "${DATABASE_EXT}" "${CUR_DB}" || ErrExit "Could not copy database" - - sed -i "s/${RESDIR_PATTERN}.*/${RESDIR}/g" "${CUR_DB}" || ErrExit "Could not edit database" - sed -i "s/%bkg812%/${BKG_812}/g" "${CUR_DB}" || ErrExit "Could not edit database" - sed -i "s/%bkg1224%/${BKG_1224}/g" "${CUR_DB}" || ErrExit "Could not edit database" - - mlhep --log-file "logfile_bkg${bkg}.log" \ - --run-config submission/default_complete.yml \ - --database-analysis "${CUR_DB}" \ - --delete -done From 6aae0d3bae46b525e1991a2f08bee2b456fc3b73 Mon Sep 17 00:00:00 2001 From: saganatt <8majak8@gmail.com> Date: Thu, 17 Jul 2025 09:03:55 +0200 Subject: [PATCH 04/34] Move scripts to scripts/ directory --- .../{ => scripts-dhadrons}/check_parquet.py | 0 .../gather-inputs-fdd.sh | 0 .../{ => scripts-dhadrons}/run-fdd-batch.sh | 0 .../{ => scripts-dhadrons}/run-fdd-precise.sh | 0 .../{ => scripts-dhadrons}/run-lc.sh | 0 machine_learning_hep/scripts/check_parquet.py | 63 +++++++++++++++++++ .../scripts/gather-inputs-fdd.sh | 17 +++++ machine_learning_hep/scripts/run-fdd-batch.sh | 58 +++++++++++++++++ .../scripts/run-fdd-precise.sh | 1 + machine_learning_hep/scripts/run-lc.sh | 15 +++++ 10 files changed, 154 insertions(+) rename machine_learning_hep/{ => scripts-dhadrons}/check_parquet.py (100%) rename machine_learning_hep/{ => scripts-dhadrons}/gather-inputs-fdd.sh (100%) rename machine_learning_hep/{ => scripts-dhadrons}/run-fdd-batch.sh (100%) rename machine_learning_hep/{ => scripts-dhadrons}/run-fdd-precise.sh (100%) rename machine_learning_hep/{ => scripts-dhadrons}/run-lc.sh (100%) create mode 100644 machine_learning_hep/scripts/check_parquet.py create mode 100755 machine_learning_hep/scripts/gather-inputs-fdd.sh create mode 100755 machine_learning_hep/scripts/run-fdd-batch.sh create mode 120000 machine_learning_hep/scripts/run-fdd-precise.sh create mode 100755 machine_learning_hep/scripts/run-lc.sh diff --git a/machine_learning_hep/check_parquet.py b/machine_learning_hep/scripts-dhadrons/check_parquet.py similarity index 100% rename from machine_learning_hep/check_parquet.py rename to machine_learning_hep/scripts-dhadrons/check_parquet.py diff --git a/machine_learning_hep/gather-inputs-fdd.sh b/machine_learning_hep/scripts-dhadrons/gather-inputs-fdd.sh similarity index 100% rename from machine_learning_hep/gather-inputs-fdd.sh rename to machine_learning_hep/scripts-dhadrons/gather-inputs-fdd.sh diff --git a/machine_learning_hep/run-fdd-batch.sh b/machine_learning_hep/scripts-dhadrons/run-fdd-batch.sh similarity index 100% rename from machine_learning_hep/run-fdd-batch.sh rename to machine_learning_hep/scripts-dhadrons/run-fdd-batch.sh diff --git a/machine_learning_hep/run-fdd-precise.sh b/machine_learning_hep/scripts-dhadrons/run-fdd-precise.sh similarity index 100% rename from machine_learning_hep/run-fdd-precise.sh rename to machine_learning_hep/scripts-dhadrons/run-fdd-precise.sh diff --git a/machine_learning_hep/run-lc.sh b/machine_learning_hep/scripts-dhadrons/run-lc.sh similarity index 100% rename from machine_learning_hep/run-lc.sh rename to machine_learning_hep/scripts-dhadrons/run-lc.sh diff --git a/machine_learning_hep/scripts/check_parquet.py b/machine_learning_hep/scripts/check_parquet.py new file mode 100644 index 0000000000..c2510128ca --- /dev/null +++ b/machine_learning_hep/scripts/check_parquet.py @@ -0,0 +1,63 @@ +import argparse +import numpy as np +import pandas as pd +import matplotlib.pyplot as plt + +def plot_parquet(df): + print(df["fY"]) + print(df["fY"][~np.isinf(df["fY"])]) + + ds_fin = df["fY"][~np.isinf(df["fY"])] + + fig = plt.figure(figsize=(20, 15)) + ax = plt.subplot(1, 1, 1) + #ax.set_xlim([0, (df["fY"].mean()*2)]) + plt.hist(ds_fin.values, bins=50) + ax.set_xlabel("fY", fontsize=30) + ax.set_ylabel("Entries", fontsize=30) + fig.savefig("fY.png", bbox_inches='tight') + plt.close(fig) + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("infile", help="file to process") + args = parser.parse_args() + + df = pd.read_parquet(args.infile) + print(f"df columns: {df.columns}") + #print(f"full df:\n{df}") + print(df.size) + + # 1-2: 36715937 + # sum from data: 1615501228 + # 2-3: 45167231 + # 3-4: 71973551 + # 4-5: 34874429 + # 5-6: + # 6-7: + # 7-8: + # 8-10: + # 10-12: + # 12-24: + + #print(f"df mean\n{df.mean()}") + + #print(f"df[0]\n{df.iloc[0]}") + + #df_sel = df[df["y_test_probxgboostbkg"] > 1.0] + #print(f"sel df bkg:\n{df_sel}") + #df_sel = df[df["y_test_probxgboostnon_prompt"] < 0.00] + #print(f"sel df non-prompt:\n{df_sel}") + #df_sel = df[df["y_test_probxgboostprompt"] < 0.00] + #print(f"sel df prompt:\n{df_sel}") + + print(f'ML columns:\n{df["fMlBkgScore"]}\n{df["fMlPromptScore"]}\n{df["fMlNonPromptScore"]}') + df_sel = df[df["fMlBkgScore"] > 1.0] + print(f'df sel ML bkg:\n{df_sel["fMlBkgScore"]}') + df_sel = df[df["fMlNonPromptScore"] < 0.0] + print(f'df sel ML non-prompt:\n{df_sel["fMlNonPromptScore"]}') + #print(f'df sel ML columns:\n{df_sel["fMlBkgScore"]}\n{df_sel["fMlNonPromptScore"]}') + + +if __name__ == '__main__': + main() diff --git a/machine_learning_hep/scripts/gather-inputs-fdd.sh b/machine_learning_hep/scripts/gather-inputs-fdd.sh new file mode 100755 index 0000000000..a9aedc0e56 --- /dev/null +++ b/machine_learning_hep/scripts/gather-inputs-fdd.sh @@ -0,0 +1,17 @@ +#!/bin/bash + +MLHEP_DIR="/data8/majak/MLHEP" +OUTPUT_DIR="${MLHEP_DIR}/input-fd-012025" + +RESDIR_PATTERN="${MLHEP_DIR}/results-24012025-hyp-ml-luigi-cuts_" +PERM_PATTERN="fd_" + +for dir in "${RESDIR_PATTERN}${PERM_PATTERN}"0.[0-9][0-9][0-9]* ; do + suffix=${dir##"${RESDIR_PATTERN}"} + echo "$suffix" + + cp "${dir}/LHC24pp_mc/Results/resultsmctot/efficienciesLcpKpiRun3analysis.root" \ + "${OUTPUT_DIR}/efficienciesLcpKpiRun3analysis_${suffix}.root" + #cp "${dir}/LHC23pp_pass4/Results/resultsdatatot/yields_LcpKpi_Run3analysis.root" \ + # "${OUTPUT_DIR}/yieldsLcpKpiRun3analysis-${suffix}-fixed-sigma.root" +done diff --git a/machine_learning_hep/scripts/run-fdd-batch.sh b/machine_learning_hep/scripts/run-fdd-batch.sh new file mode 100755 index 0000000000..b7e9e88846 --- /dev/null +++ b/machine_learning_hep/scripts/run-fdd-batch.sh @@ -0,0 +1,58 @@ +#!/bin/bash + +source "${HOME}/Run3Analysisvalidation/exec/utilities.sh" + +WORKDIR="${HOME}/MachineLearningHEP/machine_learning_hep/" +DATABASE="database_ml_parameters_LcToPKPi_multiclass_fdd" +DATABASE_EXT="${DATABASE}.yml" +DATABASE_PATH="${WORKDIR}/data/data_run3/${DATABASE_EXT}" +#RESDIR_PATTERN="results-24022025-prompt" +RESDIR_PATTERN="results-24022025-newtrain-ptshape-prompt" + +bkg=0.00 +for fd in $(seq 0.000 0.005 0.000) ; do + echo "fd ${fd}" + + #suffix="fd_${fd}" + suffix="" + RESDIR="${RESDIR_PATTERN}${suffix}" + RESPATH="/data8/majak/MLHEP/${RESDIR}/" + + #rm -rf "${RESPATH}" + + CUR_DB="${DATABASE}_edit_fd${fd}.yml" + cp "${DATABASE_PATH}" "${CUR_DB}" || ErrExit "Could not copy database" + + sed -i "s/%resdir%/${RESDIR}/g" "${CUR_DB}" || ErrExit "Could not edit database" + sed -i "s/%bkg01%/${bkg}/g" "${CUR_DB}" || ErrExit "Could not edit database" + sed -i "s/%bkg12%/${bkg}/g" "${CUR_DB}" || ErrExit "Could not edit database" + sed -i "s/%bkg23%/${bkg}/g" "${CUR_DB}" || ErrExit "Could not edit database" + sed -i "s/%bkg34%/${bkg}/g" "${CUR_DB}" || ErrExit "Could not edit database" + sed -i "s/%bkg45%/${bkg}/g" "${CUR_DB}" || ErrExit "Could not edit database" + sed -i "s/%bkg56%/${bkg}/g" "${CUR_DB}" || ErrExit "Could not edit database" + sed -i "s/%bkg67%/${bkg}/g" "${CUR_DB}" || ErrExit "Could not edit database" + sed -i "s/%bkg78%/${bkg}/g" "${CUR_DB}" || ErrExit "Could not edit database" + sed -i "s/%bkg810%/${bkg}/g" "${CUR_DB}" || ErrExit "Could not edit database" + sed -i "s/%bkg1012%/${bkg}/g" "${CUR_DB}" || ErrExit "Could not edit database" + sed -i "s/%bkg1216%/${bkg}/g" "${CUR_DB}" || ErrExit "Could not edit database" + sed -i "s/%bkg1624%/${bkg}/g" "${CUR_DB}" || ErrExit "Could not edit database" + sed -i "s/%fd01%/${fd}/g" "${CUR_DB}" || ErrExit "Could not edit database" + sed -i "s/%fd%/${fd}/g" "${CUR_DB}" || ErrExit "Could not edit database" + sed -i "s/%fd%/${fd}/g" "${CUR_DB}" || ErrExit "Could not edit database" + sed -i "s/%fd%/${fd}/g" "${CUR_DB}" || ErrExit "Could not edit database" + sed -i "s/%fd%/${fd}/g" "${CUR_DB}" || ErrExit "Could not edit database" + sed -i "s/%fd%/${fd}/g" "${CUR_DB}" || ErrExit "Could not edit database" + sed -i "s/%fd%/${fd}/g" "${CUR_DB}" || ErrExit "Could not edit database" + sed -i "s/%fd%/${fd}/g" "${CUR_DB}" || ErrExit "Could not edit database" + sed -i "s/%fd%/${fd}/g" "${CUR_DB}" || ErrExit "Could not edit database" + sed -i "s/%fd%/${fd}/g" "${CUR_DB}" || ErrExit "Could not edit database" + sed -i "s/%fd%/${fd}/g" "${CUR_DB}" || ErrExit "Could not edit database" + sed -i "s/%fd%/${fd}/g" "${CUR_DB}" || ErrExit "Could not edit database" + + yes | mlhep --log-file "logfile_${suffix}.log" \ + -a Run3analysis \ + --run-config submission/analyzer.yml \ + --database-analysis "${CUR_DB}" \ + --delete \ + > "debug_${suffix}.txt" 2>&1 || ErrExit "Analysis failed" +done diff --git a/machine_learning_hep/scripts/run-fdd-precise.sh b/machine_learning_hep/scripts/run-fdd-precise.sh new file mode 120000 index 0000000000..9c226abd43 --- /dev/null +++ b/machine_learning_hep/scripts/run-fdd-precise.sh @@ -0,0 +1 @@ +/home/maja/CERN-useful-scripts/run-fdd-precise.sh \ No newline at end of file diff --git a/machine_learning_hep/scripts/run-lc.sh b/machine_learning_hep/scripts/run-lc.sh new file mode 100755 index 0000000000..f7c2e19e3e --- /dev/null +++ b/machine_learning_hep/scripts/run-lc.sh @@ -0,0 +1,15 @@ +#!/bin/bash + +if [ "$#" -ne 3 ]; then + echo "Wrong number of parameters" + exit 1 +fi + +DB=$1 +CONFIG=$2 +LOGFILE=$3 + +mlhep --log-file ${LOGFILE} \ + -a Run3analysis \ + --run-config ${CONFIG} \ + --database-analysis ${DB} From 7c1ea4aa6db3f2423ddc33e86d347a284c28d278 Mon Sep 17 00:00:00 2001 From: saganatt <8majak8@gmail.com> Date: Thu, 17 Jul 2025 10:12:08 +0200 Subject: [PATCH 05/34] Rename the scripts dir --- .../scripts-dhadrons/DrawCutVarFit.C | 298 ++++++++++ .../scripts-dhadrons/add_pt_bins.py | 75 +++ .../scripts-dhadrons/compare_fractions.py | 547 ++++++++++++++++++ .../config_Lc_Fit_pp13.6TeV_Data.yml | 27 + .../merge-fdd-inputs-sept-approvals.sh | 67 +++ .../scripts-dhadrons/merge-histos.sh | 16 + .../scripts-dhadrons/merge-mlhep.sh | 70 +++ .../scripts-dhadrons/merge-yields.sh | 15 + .../scripts-dhadrons/merge_fractions.py | 68 +++ .../scripts-dhadrons/merge_histomass.py | 42 ++ .../scripts-dhadrons/merge_histos.py | 55 ++ .../scripts-dhadrons/modify_input_run2.py | 92 +++ .../scripts-dhadrons/modify_input_run3.py | 70 +++ .../scripts-dhadrons/plot_graph.py | 99 ++++ .../plot_invmass_fit_dzero_dplus_lambdac.py | 413 +++++++++++++ .../scripts-dhadrons/remove_high_pt.py | 58 ++ machine_learning_hep/scripts/check_parquet.py | 63 -- .../scripts/gather-inputs-fdd.sh | 17 - machine_learning_hep/scripts/run-fdd-batch.sh | 58 -- .../scripts/run-fdd-precise.sh | 1 - machine_learning_hep/scripts/run-lc.sh | 15 - 21 files changed, 2012 insertions(+), 154 deletions(-) create mode 100644 machine_learning_hep/scripts-dhadrons/DrawCutVarFit.C create mode 100644 machine_learning_hep/scripts-dhadrons/add_pt_bins.py create mode 100644 machine_learning_hep/scripts-dhadrons/compare_fractions.py create mode 100644 machine_learning_hep/scripts-dhadrons/config_Lc_Fit_pp13.6TeV_Data.yml create mode 100755 machine_learning_hep/scripts-dhadrons/merge-fdd-inputs-sept-approvals.sh create mode 100755 machine_learning_hep/scripts-dhadrons/merge-histos.sh create mode 100755 machine_learning_hep/scripts-dhadrons/merge-mlhep.sh create mode 100755 machine_learning_hep/scripts-dhadrons/merge-yields.sh create mode 100644 machine_learning_hep/scripts-dhadrons/merge_fractions.py create mode 100644 machine_learning_hep/scripts-dhadrons/merge_histomass.py create mode 100644 machine_learning_hep/scripts-dhadrons/merge_histos.py create mode 100644 machine_learning_hep/scripts-dhadrons/modify_input_run2.py create mode 100644 machine_learning_hep/scripts-dhadrons/modify_input_run3.py create mode 100644 machine_learning_hep/scripts-dhadrons/plot_graph.py create mode 100644 machine_learning_hep/scripts-dhadrons/plot_invmass_fit_dzero_dplus_lambdac.py create mode 100644 machine_learning_hep/scripts-dhadrons/remove_high_pt.py delete mode 100644 machine_learning_hep/scripts/check_parquet.py delete mode 100755 machine_learning_hep/scripts/gather-inputs-fdd.sh delete mode 100755 machine_learning_hep/scripts/run-fdd-batch.sh delete mode 120000 machine_learning_hep/scripts/run-fdd-precise.sh delete mode 100755 machine_learning_hep/scripts/run-lc.sh diff --git a/machine_learning_hep/scripts-dhadrons/DrawCutVarFit.C b/machine_learning_hep/scripts-dhadrons/DrawCutVarFit.C new file mode 100644 index 0000000000..9f595d44ce --- /dev/null +++ b/machine_learning_hep/scripts-dhadrons/DrawCutVarFit.C @@ -0,0 +1,298 @@ +#include "TCanvas.h" +#include "TFile.h" +#include "TGaxis.h" +#include "TGraphAsymmErrors.h" +#include "TH1.h" +#include "TLatex.h" +#include "TLegend.h" +#include "TPad.h" +#include "TStyle.h" +#include + +using namespace std; + +void SetStyle(); +void SetStyleHisto(TH1D *h); +void SetStyleHisto(TH1F *h); +void NormaliseHist1d(TH1 *h); + +//const Int_t colors[] = {kGreen + 2, kBlue - 4, kRed, kOrange + 7}; +//const Int_t markers[] = {20, 21, 33, 34}; +//const Int_t npoints[] = {5, 3, 4, 4, 4, 4, 4}; +//const Int_t nPtBins = 11; +//const Double_t ptlimsmiddle[11] = {1.5, 2.5, 3.5, 4.5, 5.5, 6.5, +// 7.5, 9, 11, 14, 20}; +//const Int_t nPtBinsCoarse = 11; +//Double_t ptlimsCoarse[nPtBinsCoarse + 1] = {1., 2., 3., 4., 5., 6., +// 7., 8., 10., 12., 16., 24.}; +//Double_t ptbinwidthCoarse[nPtBinsCoarse] = {1., 1., 1., 1., 1., 1., +// 1., 2., 2., 4., 8.}; +//const Double_t ptlimsmiddlePrompt[21] = { +// 0.5, 1.25, 1.75, 2.25, 2.75, 3.25, 3.75, 4.25, 4.75, 5.25, 5.75, +// 6.25, 6.75, 7.25, 7.75, 8.5, 9.5, 11., 14., 20., 30.}; +//Double_t yvaluncPrompt[21] = {0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., +// 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.}; + +std::vector bdtScoreCuts_1_2 = {0.21, 0.24, 0.27, 0.30, 0.33, 0.35, 0.37, 0.39, 0.41, 0.44, 0.46, 0.48, 0.50, 0.52, 0.54, 0.55, 0.58}; +std::vector bdtScoreCuts_2_3 = {0.20, 0.22, 0.24, 0.26, 0.28, 0.30, 0.32, 0.34, 0.36, 0.38, 0.40, 0.42, 0.44, 0.46, 0.48, 0.50, 0.52}; +std::vector bdtScoreCuts_3_4 = {0.26, 0.28, 0.30, 0.32, 0.34, 0.36, 0.38, 0.40, 0.42, 0.44, 0.46, 0.48, 0.50, 0.52, 0.55, 0.58, 0.60}; +std::vector bdtScoreCuts_4_5 = {0.17, 0.19, 0.21, 0.23, 0.25, 0.27, 0.29, 0.31, 0.33, 0.35, 0.38, 0.40, 0.43, 0.47, 0.50, 0.54, 0.58}; +std::vector bdtScoreCuts_5_6 = {0.10, 0.12, 0.14, 0.16, 0.18, 0.21, 0.24, 0.26, 0.28, 0.30, 0.33, 0.36, 0.39, 0.42, 0.45, 0.50, 0.52}; +std::vector bdtScoreCuts_6_8 = {0.15, 0.17, 0.19, 0.21, 0.23, 0.25, 0.27, 0.29, 0.31, 0.33, 0.36, 0.39, 0.41, 0.43, 0.46, 0.49, 0.52}; +std::vector bdtScoreCuts_8_12 = {0.08, 0.11, 0.14, 0.16, 0.18, 0.20, 0.22, 0.25, 0.28, 0.30, 0.33, 0.35, 0.38, 0.41, 0.43, 0.46, 0.49}; +//std::vector bdtScoreCuts = {0.29, 0.33, 0.37, 0.41, 0.45, 0.49, +// 0.53, 0.57, 0.61, 0.65, 0.69, 0.73, +// 0.77, 0.81, 0.85, 0.89, 0.93}; +std::vector bdtScoreCuts_toPlot = {0.29, 0.45, 0.61, 0.77, 0.93}; +std::vector bdtScoreCuts_toPlot_ind = {0, 4, 8, 12, 16}; + +const Int_t binMin = 4; +const Int_t binMax = 5; +std::vector bdtScoreCuts = bdtScoreCuts_4_5; + +bool DrawAllPoints = false; + +void DrawCutVarFit(bool isPreliminary = kTRUE) { + + //TGaxis::SetMaxDigits(1); + gStyle->SetOptTitle(0); + gStyle->SetOptStat(0); + + TFile *CutVarFile = nullptr; + + // D + TH1F *hRawYieldsVsCutPt = nullptr; + TH1F *hRawYieldPromptVsCut = nullptr; + TH1F *hRawYieldFDVsCut = nullptr; + TH1F *hRawYieldsVsCutReSum = nullptr; + + CutVarFile = + new TFile("/data8/majak/systematics/230824/CutVarLc_pp13TeV_LHC24d3_default.root", + "read"); + hRawYieldsVsCutPt = + (TH1F *)CutVarFile->Get(Form("hRawYieldVsCut_pt%d_%d", binMin, binMax)); + hRawYieldPromptVsCut = + (TH1F *)CutVarFile->Get(Form("hRawYieldPromptVsCut_pt%d_%d", binMin, binMax)); + hRawYieldFDVsCut = + (TH1F *)CutVarFile->Get(Form("hRawYieldNonPromptVsCut_pt%d_%d", binMin, binMax)); + hRawYieldsVsCutReSum = + (TH1F *)CutVarFile->Get(Form("hRawYieldSumVsCut_pt%d_%d", binMin, binMax)); + + SetStyleHisto(hRawYieldsVsCutPt); + SetStyleHisto(hRawYieldPromptVsCut); + SetStyleHisto(hRawYieldFDVsCut); + SetStyleHisto(hRawYieldsVsCutReSum); + + hRawYieldsVsCutPt->SetMarkerStyle(20); + hRawYieldsVsCutPt->SetMarkerSize(1); + hRawYieldsVsCutPt->SetMarkerColor(kBlack); + hRawYieldsVsCutPt->SetLineColor(kBlack); + + hRawYieldPromptVsCut->SetMarkerStyle(33); + hRawYieldPromptVsCut->SetMarkerSize(1); + hRawYieldPromptVsCut->SetMarkerColor(kRed + 1); + hRawYieldPromptVsCut->SetLineColor(kRed + 1); + + hRawYieldFDVsCut->SetMarkerStyle(33); + hRawYieldFDVsCut->SetMarkerSize(1); + hRawYieldFDVsCut->SetMarkerColor(kAzure + 4); + hRawYieldFDVsCut->SetLineColor(kAzure + 4); + + hRawYieldsVsCutReSum->SetMarkerStyle(33); + hRawYieldsVsCutReSum->SetMarkerSize(1); + hRawYieldsVsCutReSum->SetMarkerColor(kGreen + 2); + hRawYieldsVsCutReSum->SetLineColor(kGreen + 2); + + hRawYieldsVsCutPt->GetYaxis()->SetTitle("Raw yield"); + hRawYieldsVsCutPt->GetYaxis()->SetTitleSize(0.05); + hRawYieldsVsCutPt->GetYaxis()->SetMaxDigits(3); + hRawYieldsVsCutPt->GetXaxis()->SetTitle("Minimum BDT score for non-prompt#Lambda_{c}^{#plus}"); + hRawYieldsVsCutPt->GetXaxis()->SetTitleSize(0.05); + hRawYieldsVsCutPt->SetMinimum(0.1); + hRawYieldsVsCutPt->SetMaximum(35000); + hRawYieldsVsCutPt->SetLineWidth(2); + hRawYieldsVsCutPt->GetYaxis()->SetTitleOffset(1.1); + // Set custom labels + for (size_t i = 0; i < bdtScoreCuts.size(); ++i) { + hRawYieldsVsCutPt->GetXaxis()->SetBinLabel(i + 1, Form("")); + for (size_t j = 0; j < bdtScoreCuts_toPlot_ind.size(); ++j) + //if (bdtScoreCuts[i] == bdtScoreCuts_toPlot[j]) { + if (i == bdtScoreCuts_toPlot_ind[j]) { + std::cout << "bdtScoreCuts[i] " << bdtScoreCuts[i] << " bdtScoreCuts_toPlot " << bdtScoreCuts_toPlot_ind[j] << std::endl; + hRawYieldsVsCutPt->GetXaxis()->SetBinLabel(i + 1, Form("%.2f",bdtScoreCuts[i])); + } + } + + TCanvas *c1 = new TCanvas("c1", "c1", 0, 0, 750, 750); + gStyle->SetOptStat(0); + c1->SetTickx(); + c1->SetTicky(); + c1->SetBottomMargin(0.13); + c1->SetLeftMargin(0.17); + c1->SetTopMargin(0.06); + c1->SetRightMargin(0.06); + c1->cd(); + + hRawYieldsVsCutPt->Draw(); + hRawYieldPromptVsCut->Draw("HISTsame"); + hRawYieldPromptVsCut->SetFillStyle(3154); + hRawYieldPromptVsCut->SetFillColor(kRed + 1); + hRawYieldFDVsCut->Draw("HISTsame"); + hRawYieldFDVsCut->SetFillStyle(3145); + hRawYieldFDVsCut->SetFillColor(kAzure + 4); + hRawYieldsVsCutReSum->Draw("HISTsame"); + + TLatex info; + info.SetNDC(); + info.SetTextFont(43); + info.SetTextSize(40); + info.DrawLatex(0.21, 0.86, "ALICE Preliminary"); + + TLatex infos; + infos.SetNDC(); + infos.SetTextFont(43); + infos.SetTextSize(30); + infos.DrawLatex(0.21, 0.80, + "#Lambda_{c}^{#plus} and charge conj., pp, #sqrt{#it{s}} = 13.6 TeV"); + //infos.DrawLatex(0.21, 0.74, "|#it{y}| < 0.5"); + + TLatex infoPt; + infoPt.SetNDC(); + infoPt.SetTextFont(43); + infoPt.SetTextSize(30); + + infoPt.DrawLatex(0.62, 0.70, Form("%d < #it{p}_{T} < %d GeV/#it{c}", binMin, binMax)); + // TLatex info5; + // info5.SetNDC(); + // info5.SetTextFont(43); + // info5.SetTextSize(15); + // info5.DrawLatex(0.48, 0.66, "#it{f} (b #rightarrow B^{0}, b #rightarrow + // B^{+})_{LHCb}, BR (H_{b} #rightarrow D^{0}+X)_{PYTHIA 8}");//, + // info1.DrawLatex(0.5, 0.74-0.02, "average of"); + // info.DrawLatex(0.20, 0.70, "#Lambda_{c}^{+} #rightarrow pK^{0}_{S}"); + // if (isPreliminary){ + // info.DrawLatex(0.28, 0.85, "ALICE"); + // info.DrawLatex(0.28, 0.85, "ALICE"); + // info.DrawLatex(0.22, 0.2-0.06, "Preliminary"); + // } + + // TLatex info2; + // info2.SetNDC(); + // info2.SetTextFont(43); + // info2.SetTextSize(15); + // info2.DrawLatex(0.21, 0.17, "#pm 3.7% lumi. unc. not shown"); + // info2.DrawLatex(0.21, 0.22, "#pm 0.76% BR unc. not shown"); + + TLegend *leg = new TLegend(0.62, 0.48, 0.70, 0.68); + leg->SetFillColor(0); + leg->SetFillStyle(0); + leg->SetBorderSize(0); + leg->SetMargin(0.46); + leg->SetTextSize(28); + leg->SetTextFont(43); + leg->AddEntry(hRawYieldsVsCutPt, "Data", "p"); + leg->AddEntry(hRawYieldPromptVsCut, "Prompt", "F"); + leg->AddEntry(hRawYieldFDVsCut, "Non-prompt", "F"); + leg->AddEntry(hRawYieldsVsCutReSum, "Total", "l"); + leg->Draw(); + + c1->SaveAs(Form("./CutVarFitLcFD_%d-%d.pdf", binMin, binMax)); + c1->SaveAs(Form("./CutVarFitLcFD_%d-%d.png", binMin, binMax)); + c1->SaveAs(Form("./CutVarFitLcFD_%d-%d.eps", binMin, binMax)); +} + +void SetStyle() { + cout << "Setting style!" << endl; + + gStyle->Reset("Plain"); + gStyle->SetOptTitle(0); + gStyle->SetOptStat(0); + gStyle->SetPalette(1); + gStyle->SetCanvasColor(10); + gStyle->SetCanvasBorderMode(0); + gStyle->SetFrameLineWidth(1); + gStyle->SetFrameFillColor(kWhite); + gStyle->SetPadColor(10); + gStyle->SetPadTickX(1); + gStyle->SetPadTickY(1); + gStyle->SetPadBottomMargin(0.13); + gStyle->SetPadLeftMargin(0.13); + gStyle->SetPadTopMargin(0.07); + gStyle->SetPadRightMargin(0.07); + gStyle->SetHistLineWidth(1); + gStyle->SetHistLineColor(kRed); + gStyle->SetFuncWidth(2); + gStyle->SetFuncColor(kGreen); + gStyle->SetLineWidth(2); + gStyle->SetLabelSize(0.055, "xyz"); + gStyle->SetLabelOffset(0.01, "y"); + gStyle->SetLabelOffset(0.01, "x"); + gStyle->SetLabelColor(kBlack, "xyz"); + // gStyle->SetTitleSize(0.055,"xyz"); + // gStyle->SetTitleOffset(1.5,"y"); + // gStyle->SetTitleOffset(1.15,"x"); + gStyle->SetTitleFillColor(kWhite); + gStyle->SetTextSizePixels(30); + gStyle->SetTextFont(42); + gStyle->SetLegendBorderSize(0); + gStyle->SetLegendFillColor(kWhite); + gStyle->SetLegendFont(42); + gStyle->SetMarkerStyle(20); + gStyle->SetMarkerSize(0.7); + gStyle->SetMarkerColor(kBlack); +} + +void SetStyleHisto(TH1D *h) { + + h->SetLineColor(kBlack); + h->SetLineWidth(2); + h->GetYaxis()->SetLabelFont(42); + h->GetYaxis()->SetTitleFont(42); + h->GetYaxis()->SetTitleSize(0.06); + h->GetYaxis()->SetTitleOffset(1.7); + h->GetYaxis()->SetLabelSize(0.05); + h->GetYaxis()->SetDecimals(kTRUE); + // h->GetYaxis()->SetNdivisions(507); + h->GetXaxis()->SetTitleFont(42); + h->GetXaxis()->SetLabelFont(42); + h->GetXaxis()->SetTitleSize(0.06); + h->GetXaxis()->SetTitleOffset(1.2); + h->GetXaxis()->SetLabelSize(0.07); + h->GetXaxis()->SetNdivisions(510); +} + +void SetStyleHisto(TH1F *h) { + + h->SetLineColor(kBlack); + h->SetLineWidth(2); + h->GetYaxis()->SetLabelFont(42); + h->GetYaxis()->SetTitleFont(42); + h->GetYaxis()->SetTitleSize(0.06); + h->GetYaxis()->SetTitleOffset(1.7); + h->GetYaxis()->SetLabelSize(0.05); + h->GetYaxis()->SetDecimals(kTRUE); + // h->GetYaxis()->SetNdivisions(507); + h->GetXaxis()->SetTitleFont(42); + h->GetXaxis()->SetLabelFont(42); + h->GetXaxis()->SetTitleSize(0.06); + h->GetXaxis()->SetTitleOffset(1.3); + h->GetXaxis()->SetLabelSize(0.07); + h->GetXaxis()->SetLabelOffset(0.01); + // h->GetXaxis()->SetNdivisions(505); + // h->GetXaxis()->SetNdivisions(510); +} + +void NormaliseHist1d(TH1 *h) { + if (h) { + // dN/dpt + for (Int_t i = 1; i <= h->GetNbinsX(); i++) { + h->SetBinContent(i, + h->GetBinContent(i) / (h->GetXaxis()->GetBinWidth(i))); + // hnew->SetBinError(i,hnew->GetBinContent(i)/(hnew->GetBinWidth(i) + //* TMath::Sqrt(hnew->GetBinContent(i)))); // may need to look at again + h->SetBinError(i, h->GetBinError(i) / (h->GetXaxis()->GetBinWidth(i))); + } + } else { + cout << "can't normalise hist - not found" << endl; + } +} diff --git a/machine_learning_hep/scripts-dhadrons/add_pt_bins.py b/machine_learning_hep/scripts-dhadrons/add_pt_bins.py new file mode 100644 index 0000000000..3181d38712 --- /dev/null +++ b/machine_learning_hep/scripts-dhadrons/add_pt_bins.py @@ -0,0 +1,75 @@ +# pylint: disable=missing-function-docstring +""" +file: add_pt_bins.py +brief: Add 0-1 and 24-25 dummy pT bins to extend x-range of input histogram. +usage: python3 add_pt_bins.py file.root my_histo file_out.root +author: Maja Karwowska , Warsaw University of Technology +""" + +import argparse +import math +from array import array + +from ROOT import ( # pylint: disable=import-error,no-name-in-module + gROOT, + TFile, + TH1F +) + + +def main(): + """ + Main function. + """ + gROOT.SetBatch(True) + + parser = argparse.ArgumentParser(description="Arguments to pass") + parser.add_argument("filename", help="input file with histogram") + parser.add_argument("histname", help="histogram name") + parser.add_argument("outname", help="output file for the new histogram") + args = parser.parse_args() + + with TFile(args.filename) as fin, TFile(args.outname, "recreate") as fout: + hist = fin.Get(args.histname) + hist.SetDirectory(0) + first_bin = 1 + #last_bin = hist.GetXaxis().FindBin(12.0) + last_bin = hist.GetNbinsX() + bins = [0.0] + #bins = [] + empty_bins = len(bins) + for binn in range(first_bin, last_bin + 1): + bins.append(hist.GetBinLowEdge(binn)) + #last_bins = [24.0, 25.0] + last_bins = [24.0] + bins += last_bins + print(f"Hist bins {bins}") + hist2 = TH1F(args.histname, "", len(bins) - 1, array('d', bins)) + for binn in range(empty_bins, last_bin + 1): + hist2.SetBinContent(binn + 1, hist.GetBinContent(binn + 1 - empty_bins)) + hist2.SetBinError(binn + 1, hist.GetBinError(binn + 1 - empty_bins)) + print(f"Setting bin {binn + 1} low edge {hist2.GetBinLowEdge(binn + 1)} up edge {hist2.GetXaxis().GetBinUpEdge(binn + 1)} content to content from bin {binn + 1 - empty_bins}: {hist2.GetBinContent(binn + 1)}") + #last_bin = hist2.GetNbinsX() + #width_combined = hist.GetBinWidth(hist.GetNbinsX() -1) + hist.GetBinWidth(hist.GetNbinsX()) + #hist2.SetBinContent(last_bin, + # ((hist.GetBinContent(hist.GetNbinsX() - 1) * hist.GetBinWidth(hist.GetNbinsX() - 1) +\ + # hist.GetBinContent(hist.GetNbinsX()) * hist.GetBinWidth(hist.GetNbinsX())) /\ + # width_combined)) + #hist2.SetBinError(last_bin, + # math.sqrt((hist.GetBinError(hist.GetNbinsX() - 1) * hist.GetBinWidth(hist.GetNbinsX() - 1) /\ + # width_combined) **2 +\ + # (hist.GetBinError(hist.GetNbinsX()) * hist.GetBinWidth(hist.GetNbinsX()) /\ + # width_combined) ** 2)) + #print(f"Setting bin {last_bin} low edge {hist2.GetBinLowEdge(last_bin)} up edge {hist2.GetXaxis().GetBinUpEdge(last_bin)} content to content from bins {hist.GetNbinsX()-1}, {hist.GetNbinsX()}: {hist2.GetBinContent(last_bin)}") + hist2.SetMarkerSize(hist.GetMarkerSize()) + hist2.SetMarkerColor(hist.GetMarkerColor()) + hist2.SetMarkerStyle(hist.GetMarkerStyle()) + hist2.SetLineWidth(hist.GetLineWidth()) + hist2.SetLineColor(hist.GetLineColor()) + hist2.SetLineStyle(hist.GetLineStyle()) + fout.cd() + hist2.Write() + + +if __name__ == "__main__": + main() diff --git a/machine_learning_hep/scripts-dhadrons/compare_fractions.py b/machine_learning_hep/scripts-dhadrons/compare_fractions.py new file mode 100644 index 0000000000..0b2560c972 --- /dev/null +++ b/machine_learning_hep/scripts-dhadrons/compare_fractions.py @@ -0,0 +1,547 @@ +# pylint: disable=missing-function-docstring, too-many-locals +""" +file: compare_fractions.py +brief: Compare non-prompt corrected fractions for the systematic uncertainties analysis. +usage: python3 compare_fractions.py config_compare_fractions.json +author: Maja Karwowska , Warsaw University of Technology +""" + +import argparse +import json +import math +import os +from array import array + +from ROOT import ( # pylint: disable=import-error,no-name-in-module + MakeNullPointer, + TCanvas, + TFile, + TGraphAsymmErrors, + TH1F, + TLegend, + TObject, + TPaveText, + TLine, + gROOT, + gStyle, + kAzure, + kBlack, + kBlue, + kCyan, + kDashed, + kGray, + kGreen, + kMagenta, + kOrange, + kRed, + kTeal, + kYellow +) + +COLORS=[kBlack, kRed-3, kAzure-7, kGreen+2, kOrange-3, kBlue, kMagenta+2, + kTeal+3, kGreen, kAzure+8, + kYellow+3, kOrange-5, kMagenta+2, kBlue-6, kCyan+1, kGreen-6] +MODELS_COLORS=[kGray+1, kOrange-3, kCyan-2, kRed-9, kAzure-9, kBlue-6, kGreen-6, kOrange-5] +MODELS_STYLES=[3001, 3004, 3245, 3250, 3244, 3254, 3209, 3245, 3250, 3244, 3254, 3209] + + +def get_alice_text(cfg): + if "alice_text" not in cfg: + return None + + alice_text = TPaveText(0.17, 0.62, 0.50, 0.86, "brNDC") + alice_text.SetTextFont(42) + alice_text.SetTextSize(0.04) + alice_text.SetBorderSize(0) + alice_text.SetFillStyle(0) + alice_text.SetTextAlign(11) + + alice_text_config = cfg["alice_text"] + alice_text.AddText("#scale[1.35]{ALICE Preliminary}") + alice_text.AddText("#scale[1.05]{pp,#kern[-0.05]{ #sqrt{#it{s}} = 13.6 TeV, |#it{y}| < 0.5}}") + alice_text.AddText(f"#scale[1.20]{{{alice_text_config}}}") + + alice_text.Draw("same") + + return alice_text + + +def get_legend(x_1, y_1, x_2, y_2, num_hists, header=None): + leg = TLegend(x_1, y_1, x_2, y_2) + if num_hists > 4: + leg.SetNColumns(2) + if header: + leg.SetHeader(header) + leg.SetTextAlign(12) + leg.SetTextSize(0.04) + leg.SetMargin(0.3) + leg.SetBorderSize(0) + leg.SetFillStyle(0) + return leg + +def prepare_canvas(cname): + canv = TCanvas(cname, "") + canv.SetCanvasSize(900, 600) + #canv.SetTickx() + #canv.SetTicky() + canv.SetLeftMargin(0.15) + canv.SetBottomMargin(0.15) + return canv + + +def save_canvas(canv, cfg, filename): + for ext in ("png", "pdf"): + canv.SaveAs(os.path.join(cfg["output"]["outdir"], f"{filename}.{ext}")) + + +def combine_syst_errors(syst_errors, value): + err = 0.0 + err_perc = 0.0 + for syst in syst_errors: + err += syst * syst + err_perc += (100 * syst) * (100 * syst) + err_perc = math.sqrt(err_perc) + print(f"Combined percentage error: {err_perc:0.0f}") + return math.sqrt(err) * value + + +def get_hist_limits(hist, graph_syst = None, miny = 0.0, maxy = 0.0): + for binn in range(0, hist.GetNbinsX()): + print(f"bin {binn + 1} [{hist.GetXaxis().GetBinLowEdge(binn + 1)}, "\ + f"{hist.GetXaxis().GetBinLowEdge(binn + 2)}) val {hist.GetBinContent(binn + 1)} "\ + f"err {hist.GetBinError(binn + 1)}") + maxval = hist.GetBinContent(binn + 1) + hist.GetBinError(binn + 1) + minval = hist.GetBinContent(binn + 1) - hist.GetBinError(binn + 1) + if graph_syst: + maxval = max(maxval, hist.GetBinContent(binn + 1) + graph_syst.GetErrorY(binn)) + minval = min(minval, hist.GetBinContent(binn + 1) - graph_syst.GetErrorY(binn)) + maxy = max(maxval, maxy) + miny = min(minval, miny) + return miny, maxy + + +def merge_fractions(inputdir, histname, filenames): + with TFile.Open(os.path.join(inputdir, filenames[0])) as fin: + reshist = fin.Get(histname).Clone() + reshist.SetDirectory(0) + + for ind, file in enumerate(filenames[1:]): + ind += 1 + with TFile.Open(os.path.join(inputdir, file)) as fin: + hist = fin.Get(histname) + reshist.SetBinContent(ind + 1, hist.GetBinContent(ind + 1)) + reshist.SetBinError(ind + 1, hist.GetBinError(ind + 1)) + + return reshist + + +def set_hist_style(hist, color, y_axis, style=None): + for axis in (hist.GetXaxis(), hist.GetYaxis()): + axis.SetLabelFont(42) + axis.SetLabelSize(0.05) + axis.SetLabelOffset(0.02) + axis.SetTitleFont(42) + axis.SetTitleSize(0.06) + axis.SetTitleOffset(1.3) + hist.GetXaxis().SetTitle("#it{p}_{T}(GeV/#it{c})") + hist.GetYaxis().SetTitle(y_axis) + hist.GetYaxis().SetTitleSize(0.05) + hist.GetXaxis().SetTitleOffset(1.1) + + hist.SetLineColor(color) + hist.SetLineWidth(2) + if style: + hist.SetFillColor(color) + hist.SetFillStyle(style) + #hist.SetTitle("") + else: + hist.SetMarkerColor(color) + hist.SetMarkerSize(1) + hist.SetMarkerStyle(21) + + +def get_hist_for_label(label, color, cfg): + if len(cfg["hists"][label]["file"]) == 1: + with TFile.Open(os.path.join(cfg["inputdir"], cfg["hists"][label]["file"][0])) as fin: + hist = fin.Get(cfg["histoname"]) + hist.SetDirectory(0) + else: + print(f"Merging histograms for {label}") + hist = merge_fractions(cfg["inputdir"], cfg["histoname"], cfg["hists"][label]["file"]) + + set_hist_style(hist, color, cfg["y_axis"]) + return hist + + +def get_graph_systematics(hist, label, color, cfg): + if isinstance(cfg["hists"][label]["systematics"][0], str): + with TFile.Open(os.path.join(cfg["inputdir"], \ + cfg["hists"][label]["systematics"][0])) as fin: + graph_syst = fin.Get(cfg["hists"][label]["systematics"][1]) + else: + graph_syst = TGraphAsymmErrors() + graph_syst.SetName(f"graph_{label}_syst") + for binn in range(hist.GetNbinsX()): + syst_err = combine_syst_errors(cfg["hists"][label]["systematics"][binn], + hist.GetBinContent(binn + 1)) + print(f"Syst error {label} bin {binn + 1} {syst_err}") + x_point = hist.GetBinCenter(binn + 1) + y_point = hist.GetBinContent(binn + 1) + x_width = hist.GetBinWidth(binn + 1) / 4.0 # We want syst boxes to be of half-bin width + if y_point != 0: + graph_syst.SetPoint(binn, x_point, y_point) + graph_syst.SetPointError(binn, x_width, x_width, syst_err, syst_err) + set_hist_style(graph_syst, color, cfg["y_axis"]) + graph_syst.SetFillStyle(0) + return graph_syst + + +def get_hist_model(label, color, style, cfg): + with TFile.Open(os.path.join(cfg["inputdir"], cfg["models"][label]["file"])) as fin: + hist = fin.Get(cfg["models"][label]["histoname"]) + hist.SetDirectory(0) + + set_hist_style(hist, color, cfg["y_axis"], style) + #hist.SetTitle("") + + return hist + + +def plot_models(cfg, canv): + maxy = 0. + miny = 1000000. + if cfg.get("models", None): + hists_models = {} + leg_models = get_legend(*cfg["legend_models"], len(cfg["models"])) + leg_models.SetMargin(0.9) + for ind, (label, color, style) in \ + enumerate(zip(cfg["models"], MODELS_COLORS, MODELS_STYLES)): + hist = get_hist_model(label, color, style, cfg) + print(f"hist model for {label}: {hist.GetName()}") + miny, maxy = get_hist_limits(hist, None, miny, maxy) + + canv.cd() + draw_opt = "sameE3" if ind != 0 else "E3" + hist.Draw(draw_opt) + leg_models.AddEntry(hist, label, "f") + + hists_models[label] = hist + else: + leg_models = None + hists_models = None + + return canv, hists_models, leg_models, miny, maxy + + +def set_figs_limits(miny, maxy, hists, graphs, hists_models): + margin = 0.1 + #k = 1.0 - 2 * margin + #rangey = maxy - miny + #miny = miny - margin / k * rangey + #maxy = maxy + margin / k * rangey + print(f"Hist maxy: {maxy} miny: {miny}") + #miny = min(miny - margin * miny, 0) + miny = miny - margin * miny + if miny <= 0: + miny = 0.006 + print(f"Recalculated hist maxy: {maxy + margin * maxy} miny: {miny}") + if hists_models: + for _, hist in hists_models.items(): + hist.GetYaxis().SetRangeUser(miny, maxy + margin * maxy) + for _, hist in hists.items(): + hist.GetYaxis().SetRangeUser(miny, maxy + margin * maxy) + if graphs: + for graph_syst in graphs: + graph_syst.GetYaxis().SetRangeUser(miny, maxy + margin * maxy) + return hists, graphs, hists_models + +def plot_compare(cfg): + canv = prepare_canvas(f'c_{cfg["histoname"]}') + if cfg.get("log_scale", False): + canv.SetLogy() + + canv, hists_models, leg_models, miny, maxy = plot_models(cfg, canv) + leg = get_legend(*cfg["legend"], len(cfg["hists"])) + + hists = {} + central_graph = None + graphs_syst = [] + for ind, (label, color) in enumerate(zip(cfg["hists"], COLORS)): + hist = get_hist_for_label(label, color, cfg) + print(label) + miny, maxy = get_hist_limits(hist, None, miny, maxy) + + canv.cd() + draw_opt = "sameE" if ind != 0 or hists_models else "E" + hist.Draw(draw_opt) + leg.AddEntry(hist, label, "p") + + hists[label] = hist + + if cfg["hists"][label].get("systematics", None): + print("Plotting systematic") + graph_syst = get_graph_systematics(hist, label, color, cfg) + miny, maxy = get_hist_limits(hist, graph_syst, miny, maxy) + graph_syst.Draw("sameE2") + graphs_syst.append(graph_syst) + if label == cfg["default"]: + central_graph = graph_syst + + hists, graphs_syst, hists_models = set_figs_limits(miny, maxy, hists, graphs_syst, hists_models) + + leg.Draw() + if leg_models: + leg_models.Draw() + + alice_text = get_alice_text(cfg) + + return canv, hists, graphs_syst, hists_models, leg, leg_models, alice_text, central_graph + + +def get_average(hist, graph_syst): + width_combined = hist.GetBinWidth(hist.GetNbinsX() -1) + hist.GetBinWidth(hist.GetNbinsX()) + val = ((hist.GetBinContent(hist.GetNbinsX() - 1) * hist.GetBinWidth(hist.GetNbinsX() - 1) +\ + hist.GetBinContent(hist.GetNbinsX()) * hist.GetBinWidth(hist.GetNbinsX())) /\ + width_combined) + err = math.sqrt((hist.GetBinError(hist.GetNbinsX() - 1) *\ + hist.GetBinWidth(hist.GetNbinsX() - 1) /\ + width_combined) **2 +\ + (hist.GetBinError(hist.GetNbinsX()) *\ + hist.GetBinWidth(hist.GetNbinsX()) /\ + width_combined) ** 2) + syst_err = math.sqrt((graph_syst.GetErrorYlow(hist.GetNbinsX() - 2) *\ + hist.GetBinWidth(hist.GetNbinsX() - 1) /\ + width_combined) **2 +\ + (graph_syst.GetErrorYlow(hist.GetNbinsX() - 1) *\ + hist.GetBinWidth(hist.GetNbinsX()) /\ + width_combined) ** 2) + return val, err, syst_err + + +def hist_for_ratio(hist, graph, central_hist): + hist2 = TH1F(hist.GetName(), "", central_hist.GetNbinsX(), + array('d', central_hist.GetXaxis().GetXbins())) + graph2 = TGraphAsymmErrors() + for binn in range(central_hist.GetNbinsX() - 1): + hist2.SetBinContent(binn + 1, hist.GetBinContent(binn + 1)) + hist2.SetBinError(binn + 1, hist.GetBinError(binn + 1)) + graph2.SetPoint(binn, graph.GetPointX(binn), graph.GetPointY(binn)) + graph2.SetPointError(binn, graph.GetErrorX(binn), graph.GetErrorY(binn)) + val, err, syst_err = get_average(hist, graph) + hist2.SetBinContent(hist2.GetNbinsX(), val) + hist2.SetBinError(hist2.GetNbinsX(), err) + graph2.SetPoint(hist2.GetNbinsX() - 1, + hist2.GetBinCenter(hist2.GetNbinsX()), val) + graph2.SetPointError(hist2.GetNbinsX() - 1, + hist2.GetBinWidth(hist2.GetNbinsX()) / 4.0, + hist2.GetBinWidth(hist2.GetNbinsX()) / 4.0, + syst_err, syst_err) + return hist2, graph2 + + +def divide_syst_error(val, val1, val2, err1, err2): + return val * math.sqrt((err1 / val1) **2 + (err2 / val2) **2) + + +def get_figs_ratio(central_graph, central_hist, hist_ratio, graph_ratio, label): + histr = hist_ratio.Clone() + histr.SetName(f"h_ratio_{label}") + histr.Divide(hist_ratio, central_hist, 1., 1., "B") + histr.GetXaxis().SetTitleOffset(1.10) + for binn in range(1, histr.GetNbinsX() + 1): + print(f"Ratio {binn}: {histr.GetBinContent(binn)}") + + graphr = None + if central_graph: + graphr = central_graph.Clone() + graphr.SetName(f"g_ratio_{label}") + for binn in range(1, central_hist.GetNbinsX() + 1): + x_err = histr.GetBinWidth(binn) / 4.0 + y_low = divide_syst_error(histr.GetBinContent(binn), + central_hist.GetBinContent(binn), + hist_ratio.GetBinContent(binn), + central_graph.GetErrorYlow(binn - 1), + graph_ratio.GetErrorYlow(binn - 1)) + y_high = divide_syst_error(histr.GetBinContent(binn), + central_hist.GetBinContent(binn), + hist_ratio.GetBinContent(binn), + central_graph.GetErrorYhigh(binn - 1), + graph_ratio.GetErrorYhigh(binn - 1)) + graphr.SetPoint(binn - 1, histr.GetBinCenter(binn), histr.GetBinContent(binn)) + graphr.SetPointError(binn - 1, x_err, x_err, y_low, y_high) + print(f"Central graph bin {binn-1} low {central_graph.GetErrorYlow(binn-1)} "\ + f"{label} low: {graph_ratio.GetErrorYlow(binn-1)} "\ + f"up {central_graph.GetErrorYhigh(binn-1)} "\ + f"{label} up: {graph_ratio.GetErrorYhigh(binn-1)}") + return histr, graphr + + +def plot_ratio_histos(canvr, legr, hists, graphs, central_hist, + central_label, central_graph, styles, y_axis): + maxx = 0.0 + miny = 0.05 + maxy = 300 + histsr = [] + graphsr = [] + + for ind, (label, color, style) in enumerate(zip(hists, COLORS, styles)): + print(f"central hist bins: {central_hist.GetNbinsX()} "\ + f"{label} bins: {hists[label].GetNbinsX()}") + if label != central_label and hists[label].GetNbinsX() == central_hist.GetNbinsX(): + graph = graphs[ind] if graphs else None + #hist_ratio, graph_ratio = hist_for_ratio(hists[label], graph, central_hist) + hist_ratio = hists[label] + graph_ratio = graph + + histr, graphr = get_figs_ratio(central_graph, central_hist, + hist_ratio, graph_ratio, label) + #set_hist_style(histr, color, "Ratio to INEL > 0") + histr.GetYaxis().SetTitle(y_axis) + + if style: + set_hist_style(histr, color, y_axis, style) + draw_opt = "sameE3" if ind != 0 else "E3" + else: + draw_opt = "sameE" + histr.SetMaximum(maxy) + histr.SetMinimum(miny) + canvr.cd() + histr.Draw(draw_opt) + if style: + histr2 = histr.Clone() + histr2.SetFillStyle(0) + histr2.SetFillColor(0) + histr2.SetMarkerStyle(0) + histr2.Draw("hist same L") + histr2.SetFillStyle(style) + histsr.append(histr2) + histsr.append(histr) + if graphr: + set_hist_style(graphr, color, y_axis) + graphr.Draw("sameE2") + graphsr.append(graphr) + if style and ind == 1: + entry = legr.AddEntry(MakeNullPointer(TObject), "PYTHIA 8.243 Monash", "f") + entry.SetFillColor(kBlack) + entry.SetFillStyle(style) + elif not style: + legr.AddEntry(histr, label, "p") + maxx = max(maxx, histr.GetBinLowEdge(histr.GetNbinsX() + 1)) + return canvr, legr, histsr, graphsr, maxx + + +def plot_ratio(cfg, hists, graphs_syst, central_graph, hists_models): + canvr = prepare_canvas(f'c_ratio_{cfg["histoname"]}') + canvr.SetLogy() + + if hists_models: + leg_models = get_legend(*cfg["legend_ratio_models"], len(cfg["models"])) + leg_models.SetMargin(0.5) + central_hist = hists_models[cfg["model_default"]] + canvr, leg_models, histsr_models, _, maxx =\ + plot_ratio_histos(canvr, leg_models, hists_models, None, + central_hist, cfg["model_default"], None, + [3001] * len(cfg["models"]), cfg["y_axis"]) + leg_models.Draw() + else: + histsr_models = [] + leg_models = None + + legr = get_legend(*cfg["legend_ratio"], len(cfg["hists"]), + ":") + central_hist = hists[cfg["default"]] + canvr, legr, histsr, graphsr, maxx =\ + plot_ratio_histos(canvr, legr, hists, graphs_syst, + central_hist, cfg["default"], central_graph, + [None] * len(cfg["hists"]), cfg["y_axis"]) + + legr.Draw() + + line = TLine(histsr[0].GetBinLowEdge(1), 1.0, maxx, 1.0) + line.SetLineColor(COLORS[len(histsr)]) + line.SetLineWidth(3) + line.SetLineStyle(kDashed) + line.Draw() + + alice_text = get_alice_text(cfg) + + return canvr, histsr, graphsr, histsr_models, legr, line, alice_text, leg_models + + +def calc_systematics(cfg, hists): + syst_errors = [] + central_hist = hists[cfg["default"]] + + for binn in range(central_hist.GetNbinsX()): + syst_err_bin = 0.00 + count = 0 + for label in hists: + if label != cfg["default"] and hists[label].GetNbinsX() == central_hist.GetNbinsX(): + syst_err = float("inf") if central_hist.GetBinContent(binn + 1) == 0 else \ + (hists[label].GetBinContent(binn + 1) - \ + central_hist.GetBinContent(binn + 1)) / \ + central_hist.GetBinContent(binn + 1) + syst_err_bin += syst_err * syst_err + count += 1 + if count == 0: + return + syst_err_bin = 100 * (math.sqrt(syst_err_bin / count)) + syst_errors.append(syst_err_bin) + + str_err = "Systematic errors:" + for err in syst_errors: + str_err = f"{str_err} {err:0.2f}" + print(str_err) + + +def main(): + """ + Main function. + """ + gROOT.SetBatch(True) + + gStyle.SetOptStat(0) + gStyle.SetOptTitle(0) + gStyle.SetFrameLineWidth(2) + + parser = argparse.ArgumentParser(description="Arguments to pass") + parser.add_argument("config", help="JSON config file") + args = parser.parse_args() + + with open(args.config, encoding="utf8") as fil: + cfg = json.load(fil) + + with TFile(os.path.join(cfg["output"]["outdir"], + f'{cfg["output"]["file"]}.root'), "recreate") as output: + + (canv, hists, graphs_syst, hists_models, + leg, leg_models, alice_text, central_graph) = plot_compare(cfg) # pylint: disable=unused-variable + output.cd() + canv.Write() + save_canvas(canv, cfg, cfg["output"]["file"]) + for _, hist in hists.items(): + hist.Write() + if graphs_syst: + for graph in graphs_syst: + graph.Write() + if hists_models: + for _, hist in hists_models.items(): + hist.Write() + + canvr, histr, graphr, histr_models, legr, line, alice_text, leg_models =\ + plot_ratio(cfg, hists, graphs_syst, central_graph, hists_models) # pylint: disable=unused-variable + output.cd() + canvr.Write() + save_canvas(canvr, cfg, f'{cfg["output"]["file"]}_ratio') + for hist in histr: + hist.Write() + for graph in graphr: + graph.Write() + for hist in histr_models: + hist.Write() + + calc_systematics(cfg, hists) + + +if __name__ == "__main__": + main() diff --git a/machine_learning_hep/scripts-dhadrons/config_Lc_Fit_pp13.6TeV_Data.yml b/machine_learning_hep/scripts-dhadrons/config_Lc_Fit_pp13.6TeV_Data.yml new file mode 100644 index 0000000000..ba413ae8c1 --- /dev/null +++ b/machine_learning_hep/scripts-dhadrons/config_Lc_Fit_pp13.6TeV_Data.yml @@ -0,0 +1,27 @@ +_pp13.6TeVFD: + Particle: 'LAMBDAC_TO_PKPI' + PtMin: [1.] + PtMax: [2.] + MassMin: [2.21] + MassMax: [2.356] + Rebin: [2] + Mult: [null] + MultLatex: ["Minimum Bias"] +__pp13.6TeVFD: + Particle: 'LAMBDAC_TO_PKPI' + PtMin: [4.] + PtMax: [5.] + MassMin: [2.19] + MassMax: [2.38] + Rebin: [2] + Mult: ["01"] + MultLatex: [" = 20.07"] +pp13.6TeVFD: + Particle: 'LAMBDAC_TO_PKPI' + PtMin: [8.] + PtMax: [10.] + MassMin: [2.1] + MassMax: [2.456] + Rebin: [4] + Mult: [7085] + MultLatex: [" = 4.34"] diff --git a/machine_learning_hep/scripts-dhadrons/merge-fdd-inputs-sept-approvals.sh b/machine_learning_hep/scripts-dhadrons/merge-fdd-inputs-sept-approvals.sh new file mode 100755 index 0000000000..b8c9d191a7 --- /dev/null +++ b/machine_learning_hep/scripts-dhadrons/merge-fdd-inputs-sept-approvals.sh @@ -0,0 +1,67 @@ +#!/bin/bash + +FD_12=(0.00 0.21 0.24 0.27 0.30 0.33 0.35 0.37 0.39 0.41 0.44 0.46 0.48 0.50 0.52 0.54 0.56 0.58) +FD_12_OLD=(0.00 0.21 0.24 0.27 0.30 0.33 0.35 0.37 0.39 0.41 0.44 0.46 0.48 0.50 0.52 0.54 0.55 0.58) + +DIR_12="/data8/majak/MLHEP/input-fd-23082024" +PTRN_12=("${DIR_12}/yields-bkg_0.20_0.60_fd_" "${DIR_12}/efficienciesLcpKpiRun3analysis_pt-weight_bkg_0.20_0.60_fd_") +SUFFIX_12=("-rebin-1-fixed-sigma.root" ".root") +DIR_212="/data8/majak/MLHEP/input-fd-23082024" +PTRN_212=("${DIR_212}/yields-fd_precise_rebin4_bkg_0.20_0.60_fd_" "${DIR_212}/efficienciesLcpKpiRun3analysis_fd_precise_rebin4_bkg_0.20_0.60_fd_") +SUFFIX_212=("-fixed-sigma.root" ".root") +DIR_1224="/data8/majak/MLHEP/input-fd-10092024" +PTRN_1224=("${DIR_1224}/yields-fd_precise_1224_split_bkg_0.60_0.60_fd_" "${DIR_1224}/efficienciesLcpKpiRun3analysis_1224_split_bkg_0.60_0.60_fd_") +SUFFIX_1224=("-fixed-sigma.root" ".root") + +OUTFILE_PTRN=("merged_yields_fdd_approvals_fd_" "merged_eff_fdd_approvals_fd_") + +for k in "${!PTRN_12[@]}" ; do + echo "k ${k}" + echo "PTRN_12: ${PTRN_12}" + echo "PTRN_12[k]: ${PTRN_12[k]}" + echo "PTRN_212[k]: ${PTRN_212[k]}" + echo "PTRN_1224[k]: ${PTRN_1224[k]}" + + for i in "${!FD_12[@]}" ; do + INPUT_12=${PTRN_12[k]}${FD_12[i]}${SUFFIX_12[k]} + INPUT_212=${PTRN_212[k]}${FD_12_OLD[i]}*[0-9][0-9]${SUFFIX_212[k]} + + # dummy loop to get shell expansion in INPUT_1224 + for f in ${PTRN_1224[k]}${FD_12_OLD[i]}*[0-9][0-9]${SUFFIX_1224[k]} ; do + INPUT_1224=${f} + suffix=${INPUT_1224[0]##${PTRN_1224[k]}} + suffix=${suffix%%${SUFFIX_1224[k]}} + OUTFILE=${OUTFILE_PTRN[k]}${suffix}.root + + echo "i ${i} k ${k}" + echo "INPUT_12: ${INPUT_12}" + echo "INPUT_212: " ${INPUT_212} + echo "INPUT_1224: " ${INPUT_1224} + echo "suffix: " ${suffix} + echo "outfile: " ${OUTFILE} + + python merge_histos.py -o /data8/majak/crosssec/${OUTFILE} \ + -i ${INPUT_12} \ + -i ${INPUT_212} \ + -i ${INPUT_212} \ + -i ${INPUT_212} \ + -i ${INPUT_212} \ + -i ${INPUT_212} \ + -i ${INPUT_212} \ + -i ${INPUT_1224} \ + -i ${INPUT_1224} + done + done +done + +# Merge yields and efficiencies for repeating September cut variation +#python merge_histos.py -o /data8/majak/crosssec/merged_yields_fdd_approvals_fd_0.21_0.20_0.26_0.17_0.10_0.15_0.08_0.17_0.09.root \ +# -i /data8/majak/MLHEP/input-fd-23082024/yields-bkg_0.20_0.60_fd_0.21-rebin-1-fixed-sigma.root +# -i /data8/majak/MLHEP/input-fd-23082024/yields-fd_precise_rebin4_bkg_0.20_0.60_fd_0.21_0.20_0.26_0.17_0.10_0.15_0.08_0.08-fixed-sigma.root \ +# -i /data8/majak/MLHEP/input-fd-23082024/yields-fd_precise_rebin4_bkg_0.20_0.60_fd_0.21_0.20_0.26_0.17_0.10_0.15_0.08_0.08-fixed-sigma.root \ +# -i /data8/majak/MLHEP/input-fd-23082024/yields-fd_precise_rebin4_bkg_0.20_0.60_fd_0.21_0.20_0.26_0.17_0.10_0.15_0.08_0.08-fixed-sigma.root \ +# -i /data8/majak/MLHEP/input-fd-23082024/yields-fd_precise_rebin4_bkg_0.20_0.60_fd_0.21_0.20_0.26_0.17_0.10_0.15_0.08_0.08-fixed-sigma.root \ +# -i /data8/majak/MLHEP/input-fd-23082024/yields-fd_precise_rebin4_bkg_0.20_0.60_fd_0.21_0.20_0.26_0.17_0.10_0.15_0.08_0.08-fixed-sigma.root \ +# -i /data8/majak/MLHEP/input-fd-23082024/yields-fd_precise_rebin4_bkg_0.20_0.60_fd_0.21_0.20_0.26_0.17_0.10_0.15_0.08_0.08-fixed-sigma.root \ +# -i /data8/majak/MLHEP/input-fd-10092024/yields-fd_precise_1224_split_bkg_0.60_0.60_fd_0.21_0.20_0.26_0.17_0.10_0.15_0.08_0.17_0.09-fixed-sigma.root \ +# -i /data8/majak/MLHEP/input-fd-10092024/yields-fd_precise_1224_split_bkg_0.60_0.60_fd_0.21_0.20_0.26_0.17_0.10_0.15_0.08_0.17_0.09-fixed-sigma.root diff --git a/machine_learning_hep/scripts-dhadrons/merge-histos.sh b/machine_learning_hep/scripts-dhadrons/merge-histos.sh new file mode 100755 index 0000000000..2f6bc61c29 --- /dev/null +++ b/machine_learning_hep/scripts-dhadrons/merge-histos.sh @@ -0,0 +1,16 @@ +#!/bin/bash + +python merge_histos.py -o /data8/majak/systematics/032025/bdt/CutVarLc_pp13TeV_LHC23_pass4_wide_both.root \ + -n hCorrYieldsPrompt -n hCorrYieldsNonPrompt -n hCorrFracPrompt -n hCorrFracNonPrompt \ + -n hCovPromptPrompt -n hCovPromptNonPrompt -n hCovNonPromptNonPrompt \ + -i /data8/majak/fdd-results/012025/fdd-results-1-2-wide-both/CutVarLc_pp13TeV_LHC23_pass4.root \ + -i /data8/majak/fdd-results/012025/fdd-results-2-3-cheb-wide-both/CutVarLc_pp13TeV_LHC23_pass4.root \ + -i /data8/majak/fdd-results/012025/fdd-results-3-4-cheb-wide-both/CutVarLc_pp13TeV_LHC23_pass4.root \ + -i /data8/majak/fdd-results/012025/fdd-results-4-5-cheb-wide-both/CutVarLc_pp13TeV_LHC23_pass4.root \ + -i /data8/majak/fdd-results/012025/fdd-results-5-6-cheb-wide-both/CutVarLc_pp13TeV_LHC23_pass4.root \ + -i /data8/majak/fdd-results/012025/fdd-results-4-5-cheb-wide-both/CutVarLc_pp13TeV_LHC23_pass4.root \ + -i /data8/majak/fdd-results/012025/fdd-results-4-5-cheb-wide-both/CutVarLc_pp13TeV_LHC23_pass4.root \ + -i /data8/majak/fdd-results/012025/fdd-results-4-5-cheb-wide-both/CutVarLc_pp13TeV_LHC23_pass4.root \ + -i /data8/majak/fdd-results/012025/fdd-results-10-12-cheb-wide-both/CutVarLc_pp13TeV_LHC23_pass4.root \ + -i /data8/majak/fdd-results/012025/fdd-results-10-12-cheb-wide-both/CutVarLc_pp13TeV_LHC23_pass4.root \ + -i /data8/majak/fdd-results/012025/fdd-results-16-24-cheb-wide-both/CutVarLc_pp13TeV_LHC23_pass4.root diff --git a/machine_learning_hep/scripts-dhadrons/merge-mlhep.sh b/machine_learning_hep/scripts-dhadrons/merge-mlhep.sh new file mode 100755 index 0000000000..f76a3c2b33 --- /dev/null +++ b/machine_learning_hep/scripts-dhadrons/merge-mlhep.sh @@ -0,0 +1,70 @@ +#!/bin/bash + +MLHEP_DIR="/data8/majak/MLHEP" +OUTPUT_DIR="/data8/majak/MLHEP/input-d2h-fitter-012025" +OUTPUT_DIR_EFF="/data8/majak/MLHEP/input-fd-012025" + +RESDIR_PATTERN="${MLHEP_DIR}/results-24012025-hyp-ml-luigi-cuts_fd_" +PERM_PATTERN="fd_precise_" + +FD_12=(0.000 0.200 0.250 0.300 0.350 0.380 0.415 0.430 0.470 0.500 0.520 0.550 0.570 0.590 0.610 0.630 0.650 0.670 0.690) +FD_23=(0.000 0.290 0.320 0.350 0.380 0.410 0.430 0.450 0.470 0.490 0.510 0.530 0.550 0.570 0.590 0.610 0.630 0.650 0.670) +FD_34=(0.000 0.290 0.320 0.350 0.370 0.390 0.410 0.425 0.450 0.470 0.490 0.510 0.530 0.550 0.570 0.590 0.610 0.630 0.650) +FD_45=(0.000 0.130 0.150 0.170 0.190 0.210 0.230 0.250 0.270 0.290 0.320 0.350 0.370 0.390 0.410 0.430 0.450 0.470 0.490) +FD_56=(0.000 0.110 0.130 0.150 0.170 0.190 0.210 0.230 0.250 0.270 0.290 0.310 0.330 0.350 0.370 0.390 0.410 0.430 0.450) +FD_67=(0.000 0.130 0.150 0.170 0.190 0.210 0.230 0.250 0.270 0.290 0.320 0.350 0.370 0.390 0.410 0.430 0.450 0.470 0.490) +FD_78=(0.000 0.130 0.150 0.170 0.190 0.210 0.230 0.250 0.270 0.290 0.320 0.350 0.370 0.390 0.410 0.430 0.450 0.470 0.490) +FD_810=(0.000 0.130 0.150 0.170 0.190 0.210 0.230 0.250 0.270 0.290 0.320 0.350 0.370 0.390 0.410 0.430 0.450 0.470 0.490) +FD_1012=(0.000 0.210 0.230 0.250 0.270 0.290 0.310 0.330 0.350 0.370 0.390 0.410 0.430 0.450 0.470 0.490 0.510 0.530 0.550) +FD_1216=(0.000 0.210 0.230 0.250 0.270 0.290 0.310 0.330 0.350 0.370 0.390 0.410 0.430 0.450 0.470 0.490 0.510 0.530 0.550) +FD_1624=(0.000 0.090 0.110 0.130 0.150 0.170 0.190 0.210 0.230 0.250 0.270 0.290 0.310 0.330 0.350 0.370 0.390 0.410 0.430) + +for i in "${!FD_12[@]}" ; do + fd12=${FD_12[i]} + fd23=${FD_23[i]} + fd34=${FD_34[i]} + fd45=${FD_45[i]} + fd56=${FD_56[i]} + fd67=${FD_67[i]} + fd78=${FD_78[i]} + fd810=${FD_810[i]} + fd1012=${FD_1012[i]} + fd1216=${FD_1216[i]} + fd1624=${FD_1624[i]} + echo "${i} fd ${fd12} ${fd23} ${fd34} ${fd45} ${fd56} ${fd67} ${fd78} ${fd810} ${fd1012} ${fd1216} ${fd1624}" + + RESPATH="${OUTPUT_DIR}/projections_fd_precise_${fd12}_${fd23}_${fd34}_${fd45}_${fd56}_${fd67}_${fd78}_${fd810}_${fd1012}_${fd1216}_${fd1624}.root" + + python merge_histomass.py \ + -n hmassfPt \ + -o ${RESPATH} \ + -i "${RESDIR_PATTERN}${fd12}/LHC23pp_pass4/Results/resultsdatatot/masshisto.root" \ + -i "${RESDIR_PATTERN}${fd23}/LHC23pp_pass4/Results/resultsdatatot/masshisto.root" \ + -i "${RESDIR_PATTERN}${fd34}/LHC23pp_pass4/Results/resultsdatatot/masshisto.root" \ + -i "${RESDIR_PATTERN}${fd45}/LHC23pp_pass4/Results/resultsdatatot/masshisto.root" \ + -i "${RESDIR_PATTERN}${fd56}/LHC23pp_pass4/Results/resultsdatatot/masshisto.root" \ + -i "${RESDIR_PATTERN}${fd67}/LHC23pp_pass4/Results/resultsdatatot/masshisto.root" \ + -i "${RESDIR_PATTERN}${fd78}/LHC23pp_pass4/Results/resultsdatatot/masshisto.root" \ + -i "${RESDIR_PATTERN}${fd810}/LHC23pp_pass4/Results/resultsdatatot/masshisto.root" \ + -i "${RESDIR_PATTERN}${fd1012}/LHC23pp_pass4/Results/resultsdatatot/masshisto.root" \ + -i "${RESDIR_PATTERN}${fd1216}/LHC23pp_pass4/Results/resultsdatatot/masshisto.root" \ + -i "${RESDIR_PATTERN}${fd1624}/LHC23pp_pass4/Results/resultsdatatot/masshisto.root" + + RESPATH="${OUTPUT_DIR_EFF}/eff_fd_precise_${fd12}_${fd23}_${fd34}_${fd45}_${fd56}_${fd67}_${fd78}_${fd810}_${fd1012}_${fd1216}_${fd1624}.root" + + python merge_histos.py \ + -n eff \ + -n eff_fd \ + -o ${RESPATH} \ + -i "${RESDIR_PATTERN}${fd12}/LHC24pp_mc/Results/resultsmctot/efficienciesLcpKpiRun3analysis.root" \ + -i "${RESDIR_PATTERN}${fd23}/LHC24pp_mc/Results/resultsmctot/efficienciesLcpKpiRun3analysis.root" \ + -i "${RESDIR_PATTERN}${fd34}/LHC24pp_mc/Results/resultsmctot/efficienciesLcpKpiRun3analysis.root" \ + -i "${RESDIR_PATTERN}${fd45}/LHC24pp_mc/Results/resultsmctot/efficienciesLcpKpiRun3analysis.root" \ + -i "${RESDIR_PATTERN}${fd56}/LHC24pp_mc/Results/resultsmctot/efficienciesLcpKpiRun3analysis.root" \ + -i "${RESDIR_PATTERN}${fd67}/LHC24pp_mc/Results/resultsmctot/efficienciesLcpKpiRun3analysis.root" \ + -i "${RESDIR_PATTERN}${fd78}/LHC24pp_mc/Results/resultsmctot/efficienciesLcpKpiRun3analysis.root" \ + -i "${RESDIR_PATTERN}${fd810}/LHC24pp_mc/Results/resultsmctot/efficienciesLcpKpiRun3analysis.root" \ + -i "${RESDIR_PATTERN}${fd1012}/LHC24pp_mc/Results/resultsmctot/efficienciesLcpKpiRun3analysis.root" \ + -i "${RESDIR_PATTERN}${fd1216}/LHC24pp_mc/Results/resultsmctot/efficienciesLcpKpiRun3analysis.root" \ + -i "${RESDIR_PATTERN}${fd1624}/LHC24pp_mc/Results/resultsmctot/efficienciesLcpKpiRun3analysis.root" +done diff --git a/machine_learning_hep/scripts-dhadrons/merge-yields.sh b/machine_learning_hep/scripts-dhadrons/merge-yields.sh new file mode 100755 index 0000000000..640e24c30b --- /dev/null +++ b/machine_learning_hep/scripts-dhadrons/merge-yields.sh @@ -0,0 +1,15 @@ +#!/bin/bash + +# Merge yields at 0.0 for prompt cross section +python merge_histos.py -o "/data8/majak/crosssec/202502/yieldsLcpKpiRun3analysis_fd_0.000.root" \ + -i "/data8/majak/MLHEP/input-fd-012025/yields-fd_0.000-poly-fixed-sigma.root" \ + -i "/data8/majak/MLHEP/input-fd-012025/yields-fd_0.000-cheb-fixed-sigma-120-190.root" \ + -i "/data8/majak/MLHEP/input-fd-012025/yields-fd_0.000-cheb-fixed-sigma-120-190.root" \ + -i "/data8/majak/MLHEP/input-fd-012025/yields-fd_0.000-cheb-fixed-sigma-120-190.root" \ + -i "/data8/majak/MLHEP/input-fd-012025/yields-fd_0.000-cheb-fixed-sigma-120-190.root" \ + -i "/data8/majak/MLHEP/input-fd-012025/yields-fd_0.000-cheb-fixed-sigma-120-190.root" \ + -i "/data8/majak/MLHEP/input-fd-012025/yields-fd_0.000-cheb-fixed-sigma-120-190.root" \ + -i "/data8/majak/MLHEP/input-fd-012025/yields-fd_0.000-cheb-fixed-sigma-120-190.root" \ + -i "/data8/majak/MLHEP/input-fd-012025/yields-fd_0.000-cheb-fixed-sigma-120-190.root" \ + -i "/data8/majak/MLHEP/input-fd-012025/yields-fd_0.000-cheb-fixed-sigma-120-190.root" \ + -i "/data8/majak/MLHEP/input-fd-012025/yields-fd_0.000-cheb-fixed-sigma-120-190.root" diff --git a/machine_learning_hep/scripts-dhadrons/merge_fractions.py b/machine_learning_hep/scripts-dhadrons/merge_fractions.py new file mode 100644 index 0000000000..fe5b4527c1 --- /dev/null +++ b/machine_learning_hep/scripts-dhadrons/merge_fractions.py @@ -0,0 +1,68 @@ +#!/usr/bin/env python +""" +file: merge_fractions.py +brief: Merge points from non-prompt fraction plots from different cutvar scans into a single plot +usage: ./merge_fractions.py my-plot.png file1.root file2.root file3.root +author: Maja Karwowska , CERN / Warsaw University of Technology +""" + +import argparse + +from ROOT import ( # pylint: disable=import-error,no-name-in-module + TCanvas, + TFile, + TH1, + gROOT, + kOrange +) + +HISTNAME = "hCorrFracNonPrompt" + +def main(): + """ + Main function. + """ + gROOT.SetBatch(True) + + parser = argparse.ArgumentParser(description="Arguments to pass") + parser.add_argument("outname", help="output filename") + parser.add_argument("oldname", help="old results filename") + parser.add_argument("files", nargs='+', help="input ROOT files") + args = parser.parse_args() + + canv = TCanvas(f"c_{HISTNAME}", "") + canv.SetCanvasSize(800, 600) + + rfile = TFile(args.files[0]) + hist = rfile.Get(HISTNAME) + reshist = hist.Clone() + + for ind, file in enumerate(args.files): + rfile2 = TFile(file) + hist = rfile2.Get(HISTNAME) + print(f"{ind + 1} bin content {hist.GetBinContent(ind + 1)}") + reshist.SetBinContent(ind + 1, hist.GetBinContent(ind + 1)) + reshist.SetBinError(ind + 1, hist.GetBinError(ind + 1)) + for ind in range(2): + reshist.SetBinContent(ind + 1 + len(args.files), 0.0) + reshist.SetBinError(ind + 1 + len(args.files), 0.0) + + reshist.SetMaximum(0.25) + reshist.SetMinimum(0.0) + reshist.Draw() + + oldfile = TFile(args.oldname) + oldhistt = oldfile.Get(HISTNAME) + oldhist = oldhistt.Clone() + oldhist.SetMarkerColor(kOrange) + oldhist.SetLineColor(kOrange) + for ind in range(2): + oldhist.SetBinContent(oldhist.GetNbinsX() - ind, 0.0) + oldhist.SetBinError(oldhist.GetNbinsX() - ind, 0.0) + oldhist.Draw("same") + + canv.SaveAs(args.outname) + + +if __name__ == "__main__": + main() diff --git a/machine_learning_hep/scripts-dhadrons/merge_histomass.py b/machine_learning_hep/scripts-dhadrons/merge_histomass.py new file mode 100644 index 0000000000..b8513cb62a --- /dev/null +++ b/machine_learning_hep/scripts-dhadrons/merge_histomass.py @@ -0,0 +1,42 @@ +""" +Merge MLHEP histomass root files for the PWGHF mass fitter. One file per pt bin. +One histogram per pt bin. +""" + +import argparse + +from ROOT import TFile, gROOT # pylint: disable=import-error + +def main(): + """ + Main + """ + + gROOT.SetBatch(True) + + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument("-n", "--histname", action="append", type=str, + help="name of histograms to merge") + parser.add_argument("-o", "--outfile", action="append", type=str, help="Output file") + parser.add_argument("-i", "--infile", action="append", type=str, help="Input file") + args = parser.parse_args() + + if len(args.outfile) != 1: + raise ValueError("Provide exactly 1 output file") + + print(f"infile {args.infile}") + + with TFile(args.outfile[0], "RECREATE") as fout: + for name in args.histname: + hist_list = [] + for ind, filename in enumerate(args.infile): + fin = TFile(filename) + list_hists = [key.GetName() for key in fin.GetListOfKeys() \ + if name in key.GetName()] + print(f"File {filename} hist list {list_hists} selected {list_hists[ind]}") + hist = fin.Get(list_hists[ind]) + fout.cd() + hist.Write() + +if __name__ == "__main__": + main() diff --git a/machine_learning_hep/scripts-dhadrons/merge_histos.py b/machine_learning_hep/scripts-dhadrons/merge_histos.py new file mode 100644 index 0000000000..aaac9574ea --- /dev/null +++ b/machine_learning_hep/scripts-dhadrons/merge_histos.py @@ -0,0 +1,55 @@ +""" +Merge histograms from different ROOT files. One file per pt bin. +A single histogram contains all pt bins. +""" + +import argparse + +from ROOT import TFile, gROOT # pylint: disable=import-error + +def main(): + """ + Main + """ + gROOT.SetBatch(True) + + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument("-n", "--histname", action="append", type=str, + help="name of histograms to merge") + parser.add_argument("-o", "--outfile", action="append", type=str, help="Output file") + parser.add_argument("-i", "--infile", action="append", type=str, help="Input file") + args = parser.parse_args() + + if len(args.outfile) != 1: + raise ValueError("Provide exactly 1 output file") + + print(f"filenames {args.infile}") + + with TFile(args.outfile[0], "RECREATE") as fout: + fins = [TFile(filename) for filename in args.infile] + + histname = args.histname + if args.histname is None: + histname = [key.GetName() for key in fins[0].GetListOfKeys()] + + print(f"histnames {histname}") + + def get_hist(fin, histname): + fin.cd() + return fin.Get(histname) + + for name in histname: + hist_list = [get_hist(fin, name) for fin in fins] + print(f"{name} hist list length: {len(hist_list)}") + if any(cls in hist_list[0].ClassName() for cls in ("TH1", "TGraph")): + hist = hist_list[-1].Clone() + for ind, hist_tmp in enumerate(hist_list): + print(f"hist {name} bin {ind+1} pt [{hist.GetBinLowEdge(ind + 1)}, {hist.GetBinLowEdge(ind + 2)}) " \ + f"content {hist_tmp.GetBinContent(ind + 1)}") + hist.SetBinContent(ind+1, hist_tmp.GetBinContent(ind+1)) + hist.SetBinError(ind+1, hist_tmp.GetBinError(ind+1)) + fout.cd() + hist.Write() + +if __name__ == "__main__": + main() diff --git a/machine_learning_hep/scripts-dhadrons/modify_input_run2.py b/machine_learning_hep/scripts-dhadrons/modify_input_run2.py new file mode 100644 index 0000000000..8e2214ab4c --- /dev/null +++ b/machine_learning_hep/scripts-dhadrons/modify_input_run2.py @@ -0,0 +1,92 @@ +# pylint: disable=missing-function-docstring +""" +file: modify_input.py +brief: Perform adjustments on input histogram. +usage: python3 modify_input.py file.root my_histo file_out.root +author: Maja Karwowska , Warsaw University of Technology +""" + +import argparse +from array import array +import math + +from ROOT import ( # pylint: disable=import-error,no-name-in-module + gROOT, + TFile, + TH1F +) + +OUTPUT_BINS = [0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 24] +BR = 0.0623 + +def main(): + """ + Main function. + """ + gROOT.SetBatch(True) + + parser = argparse.ArgumentParser(description="Arguments to pass") + parser.add_argument("filename", help="input file with histogram") + parser.add_argument("histname", help="histogram name") + parser.add_argument("outhistname", help="outhistogram name") + parser.add_argument("outname", help="output file for the new histogram") + args = parser.parse_args() + + with TFile(args.filename) as fin, TFile(args.outname, "recreate") as fout: + hist = fin.Get(args.histname) + hist.SetDirectory(0) + #hist.Scale(0.000000001) + hist.Scale(1./BR) + hist2 = TH1F(args.outhistname, "", len(OUTPUT_BINS) - 1, array('d', OUTPUT_BINS)) + merge_bins = [20] # dummy number so as not to merge [7, 9] + ind = 0 + for binn in range(1, hist.GetNbinsX() + 1): + print(f"Old hist bin {binn} low edge {hist.GetBinLowEdge(binn)} "\ + f"up edge {hist.GetXaxis().GetBinUpEdge(binn)} "\ + f"content: {hist.GetBinContent(binn)} +/- {hist.GetBinError(binn)}") + for binn in range(1, hist2.GetNbinsX() + 1): + if binn < merge_bins[0]: + hist2.SetBinContent(binn, hist.GetBinContent(binn)) + hist2.SetBinError(binn, hist.GetBinError(binn)) + elif ind >= len(merge_bins) or binn > merge_bins[0] + len(merge_bins) / 2: + hist2.SetBinContent(binn, hist.GetBinContent(binn + ind)) + hist2.SetBinError(binn, hist.GetBinError(binn + ind)) + else: + bin1 = merge_bins[ind] + bin2 = merge_bins[ind] + 1 + weight_sum = hist.GetBinWidth(bin1) + hist.GetBinWidth(bin2) + average = hist.GetBinContent(bin1) * hist.GetBinWidth(bin1) + hist.GetBinContent(bin2) * hist.GetBinWidth(bin2) + print(f"bin {bin1} width {hist.GetBinWidth(bin1)} bin2 {bin2} width {hist.GetBinWidth(bin2)}") + print(f"weight sum: {weight_sum} average: {hist.GetBinContent(bin1) * hist.GetBinWidth(bin1)} + " + f"{hist.GetBinContent(bin2) + hist.GetBinWidth(bin2)} average: {average}") + hist2.SetBinContent(binn, + (hist.GetBinContent(bin1) * hist.GetBinWidth(bin1) +\ + hist.GetBinContent(bin2) * hist.GetBinWidth(bin2)) /\ + weight_sum) + print(f"bin {bin1} error {hist.GetBinError(bin1)} bin2 {hist.GetBinError(bin2)}\n" + f"scaled: {hist.GetBinWidth(bin1) * hist.GetBinError(bin1)}, "\ + f"{hist.GetBinWidth(bin2) * hist.GetBinError(bin2)}\n"\ + f"divided: {(hist.GetBinWidth(bin1) * hist.GetBinError(bin1)) / weight_sum}, "\ + f"{(hist.GetBinWidth(bin2) * hist.GetBinError(bin2)) / weight_sum}\n"\ + f"power: {((hist.GetBinWidth(bin1) * hist.GetBinError(bin1)) / weight_sum)**2.}, "\ + f"{((hist.GetBinWidth(bin2) * hist.GetBinError(bin2)) / weight_sum)**2.}\n"\ + f"sum: {((hist.GetBinWidth(bin1) * hist.GetBinError(bin1)) / weight_sum)**2. + ((hist.GetBinWidth(bin2) * hist.GetBinError(bin2)) / weight_sum)**2.}\n"\ + f"sqrt: {math.sqrt(((hist.GetBinWidth(bin1) * hist.GetBinError(bin1)) / weight_sum)**2. + ((hist.GetBinWidth(bin2) * hist.GetBinError(bin2)) / weight_sum)**2.)}\n") + hist2.SetBinError(binn, math.sqrt(((hist.GetBinWidth(bin1) * hist.GetBinError(bin1)) / weight_sum) ** 2. +\ + ((hist.GetBinWidth(bin2) * hist.GetBinError(bin2)) / weight_sum) ** 2.)) + ind += 1 + print(f"New bin {binn} low edge {hist2.GetBinLowEdge(binn)} "\ + f"up edge {hist2.GetXaxis().GetBinUpEdge(binn)} "\ + f"content: {hist2.GetBinContent(binn)} +/- {hist2.GetBinError(binn)} ind {ind}") + hist2.SetMarkerSize(hist.GetMarkerSize()) + hist2.SetMarkerColor(hist.GetMarkerColor()) + hist2.SetMarkerStyle(hist.GetMarkerStyle()) + hist2.SetLineWidth(hist.GetLineWidth()) + hist2.SetLineColor(hist.GetLineColor()) + hist2.SetLineStyle(hist.GetLineStyle()) + fout.cd() + hist2.Write() + + +if __name__ == "__main__": + main() diff --git a/machine_learning_hep/scripts-dhadrons/modify_input_run3.py b/machine_learning_hep/scripts-dhadrons/modify_input_run3.py new file mode 100644 index 0000000000..c4fedac595 --- /dev/null +++ b/machine_learning_hep/scripts-dhadrons/modify_input_run3.py @@ -0,0 +1,70 @@ +# pylint: disable=missing-function-docstring +""" +file: modify_input.py +brief: Perform adjustments on input histogram. +usage: python3 modify_input.py file.root my_histo file_out.root +author: Maja Karwowska , Warsaw University of Technology +""" + +import argparse + +from ROOT import ( # pylint: disable=import-error,no-name-in-module + gROOT, + TFile, +) + +# 2024 values for LHC22o +MLHEP_EV_SEL = 20430386. +NORM = 47092223769.611162532 +BR = 0.0623 + +# 2025 values for LHC23_pass4_thin +MLHEP_EV_SEL = 258442910841. # 2 x 10^1 +NORM = 3.0077675e+11 + +# 2025 values for multiplicity analysis +EV_SEL_MULT = 290860860000. +NORM_MB = 249371059919 +NORM_2 = 37884927886 +EV_FACTOR_2 = 0.85 +NORM_3 = 50023302929 +EV_FACTOR_3 = 0.91 +NORM_4 = 49545723906 +EV_FACTOR_4 = 0.96 +NORM_5 = 49300695562 +EV_FACTOR_5 = 0.98 +NORM_6 = 22192632583 +EV_FACTOR_6 = 0.99 +NORM_7 = 2476292886 +EV_FACTOR_7 = 1.0 + +def main(): + """ + Main function. + """ + gROOT.SetBatch(True) + + parser = argparse.ArgumentParser(description="Arguments to pass") + parser.add_argument("filename", help="input file with histogram") + parser.add_argument("histname", help="histogram name") + parser.add_argument("outhistname", help="outhistogram name") + parser.add_argument("outname", help="output file for the new histogram") + args = parser.parse_args() + + with TFile(args.filename) as fin, TFile(args.outname, "recreate") as fout: + hist = fin.Get(args.histname) + hist2 = hist.Clone(args.outhistname) + hist2.SetDirectory(0) + #hist2.Scale(0.000001 * MLHEP_EV_SEL / NORM) + #hist2.Scale(0.000001) + #hist.Scale(1./59400000000) # luminosity scaling, lumi in pb + #hist.Scale(BR) # BR scaling back + + hist2.Scale(EV_SEL_MULT / NORM_7) + hist2.Scale(EV_FACTOR_7) + fout.cd() + hist2.Write() + + +if __name__ == "__main__": + main() diff --git a/machine_learning_hep/scripts-dhadrons/plot_graph.py b/machine_learning_hep/scripts-dhadrons/plot_graph.py new file mode 100644 index 0000000000..72c142d330 --- /dev/null +++ b/machine_learning_hep/scripts-dhadrons/plot_graph.py @@ -0,0 +1,99 @@ +import argparse +import json +import os + +from ROOT import ( # pylint: disable=import-error,no-name-in-module + TCanvas, + TFile, + TLegend, + TLine, + gROOT, + gStyle, + kAzure, + kBlack, + kBlue, + kCyan, + kDashed, + kGray, + kGreen, + kMagenta, + kOrange, + kRed, + kTeal, + kYellow +) + +from compare_fractions import get_legend, prepare_canvas, save_canvas, set_hist_style + +COLORS=[kBlack, kRed-3, kAzure-7, kMagenta+1, kGreen+2, kOrange-3, kBlue, kTeal+3, kGreen, kAzure+8, + kYellow+3, kOrange-5, kMagenta+2, kBlue-6, kCyan+1, kGreen-6] + + +def get_hist_limits(hist, miny = 0.0, maxy = 0.0): + for binn in range(hist.GetN()): + print(f"bin {binn} [{hist.GetPointX(binn)}, "\ + f"val {hist.GetPointY(binn)} "\ + f"err {hist.GetErrorYlow(binn)}, {hist.GetErrorYhigh(binn)}") + maxval = hist.GetPointY(binn) + hist.GetErrorYhigh(binn) + minval = hist.GetPointY(binn) - hist.GetErrorYlow(binn) + maxy = max(maxval, maxy) + miny = min(minval, miny) + return miny, maxy + + +def main(): + gROOT.SetBatch(True) + + gStyle.SetOptStat(0) + gStyle.SetFrameLineWidth(2) + + parser = argparse.ArgumentParser(description="Arguments to pass") + parser.add_argument("config", help="JSON config file") + args = parser.parse_args() + + with open(args.config, encoding="utf8") as fil: + cfg = json.load(fil) + + with TFile(os.path.join(cfg["output"]["outdir"], + f'{cfg["output"]["file"]}.root'), "recreate") as output: + + canv = prepare_canvas(f'c_{cfg["histoname"]}') + leg = get_legend(*cfg["legend"], len(cfg["hists"])) + + maxy = 0. + miny = 1. + hists = [] + for ind, (label, color) in enumerate(zip(cfg["hists"], COLORS)): + with TFile.Open(os.path.join(cfg["inputdir"], cfg["hists"][label]["file"][0])) as fin: + hist = fin.Get(cfg["histoname"]) + print(f'hist {cfg["histoname"]}: {hist}') + set_hist_style(hist, color, cfg["y_axis"]) + print(label) + miny, maxy = get_hist_limits(hist, miny, maxy) + + canv.cd() + draw_opt = "same" if ind != 0 else "" + hist.Draw(draw_opt) + leg.AddEntry(hist, label, "p") + + hists.append(hist) + + margin = 0.1 + print(f"Hist maxy: {maxy} miny: {miny}") + for hist in hists: + #hist.GetYaxis().SetRangeUser(miny - margin * miny, maxy + margin * maxy) + hist.GetYaxis().SetRangeUser(0.0, 0.7) + #hist.GetYaxis().SetRangeUser(0.5, 1.0) + hist.GetXaxis().SetRangeUser(0.0, 25.0) + + leg.Draw() + + output.cd() + canv.Write() + save_canvas(canv, cfg, cfg["output"]["file"]) + for hist in hists: + hist.Write() + + +if __name__ == "__main__": + main() diff --git a/machine_learning_hep/scripts-dhadrons/plot_invmass_fit_dzero_dplus_lambdac.py b/machine_learning_hep/scripts-dhadrons/plot_invmass_fit_dzero_dplus_lambdac.py new file mode 100644 index 0000000000..e5c3a9b0e6 --- /dev/null +++ b/machine_learning_hep/scripts-dhadrons/plot_invmass_fit_dzero_dplus_lambdac.py @@ -0,0 +1,413 @@ +#!/usr/bin/env python3 + +# Copyright 2019-2020 CERN and copyright holders of ALICE O2. +# See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +# All rights not expressly granted are reserved. + +# This software is distributed under the terms of the GNU General Public +# License v3 (GPL Version 3), copied verbatim in the file "COPYING". + +# In applying this license CERN does not waive the privileges and immunities +# granted to it by virtue of its status as an Intergovernmental Organization +# or submit itself to any jurisdiction. + +""" +file: plot_invmass_fit_dzero_dplus_lambdac.py +brief: script to produce invariant mass fit plot for article, the CFG file should be the one used for inv mass fit +usage: python3 plot_invmass_fit_dzero_dplus_lambdac.py CFG +author: Alexandre Bigot , Strasbourg University +""" + +import argparse + +import yaml +import ROOT +from ROOT import (TF1, TCanvas, TDatabasePDG, TFile, TLatex, TLegend, TMath, + gROOT, kAzure, kBlack, kBlue, kGreen, kFullCircle, kRed, TPad) + +from style_formatter import set_global_style, set_object_style + +# enumerator +D0, DPLUS, LAMBDAC_TO_PKPI, LAMBDAC_TO_PK0S = 0, 1, 2, 3 + +# colours +RED = kRed + 1 +BLUE = kBlue + 1 +AZURE = kAzure + 4 +Green = kGreen + 1 + +# conversion +GEV2MEV = 1000 + +# canvas dimensions +WIDTH = 520 +HEIGHT = 500 + +# text size +SIZE_TEXT_LAT_ALICE = 28 +SIZE_TEXT_LAT_LABEL_FOR_COLL_SYSTEM = 24 +SIZE_TEXT_LAT_LABEL = 20 +SIZE_TEXT_LEGEND = 19 + + +def get_name_infile(particle, suffix): + """ + Helper method to get the name of the input file according to the particle + + Parameters + ---------- + - particle (int): particle ID + + Returns + ---------- + - name_infile (string): name of the input file + """ + + name_infile = "" + name_infile_promptEnhanced = "" + name_infile_FDEnhanced = "" + if particle == D0: + name_infile_promptEnhanced = "../RawYieldResult/CentralValue/RawYieldsData_D0_pPb5TeV_FD_pos00.root" + name_infile_FDEnhanced = "../RawYieldResult/CentralValue/RawYieldsData_D0_pPb5TeV_FD_pos13.root" + elif particle == DPLUS: + name_infile = "../Results/Dplus/rawYield_Dplus_nonprompt_enhanced.root" + elif particle == LAMBDAC_TO_PKPI: + name_infile_promptEnhanced = f"/data8/majak/invmass-plots/massesmasshisto{suffix}.root" + name_infile_FDEnhanced = "fits_non_prompt.root" + elif particle == LAMBDAC_TO_PK0S: + name_infile = "" + + return name_infile_promptEnhanced, name_infile_FDEnhanced + + +def get_title_xaxis(particle): + """ + Helper method to get the title of x axis according to the particle + + Parameters + ---------- + - particle (int): particle ID + + Returns + ---------- + - title_xaxis (string): title of x axis + """ + + title_xaxis = "" + if particle == D0: + title_xaxis = "#it{M}(K#pi) (GeV/#it{c}^{2})" + elif particle == DPLUS: + title_xaxis = "#it{M}(#piK#pi) (GeV/#it{c}^{2})" + elif particle == LAMBDAC_TO_PKPI: + title_xaxis = "#it{M}(pK#pi) (GeV/#it{c}^{2})" + elif particle == LAMBDAC_TO_PK0S: + title_xaxis = "#it{M}(pK^{0}_{S}) (GeV/#it{c}^{2})" + + return title_xaxis + + +def get_h_value_err(h, i_bin, convert_to_mev=False): + """ + Helper method to get bin content and error of an histogram + + Parameters + ---------- + - h (TH1): histogram + - i_bin (int): bin number + - convert_to_mev (int): apply conversion from GeV to MeV + + Returns + ---------- + - value (float): bin content of h + - error (float): bin error of h + """ + + value = h.GetBinContent(i_bin) + error = h.GetBinError(i_bin) + + print(f"i_bin: {i_bin} value {value} error {error} first bin value: {h.GetBinContent(1)}") + + if convert_to_mev: + value *= GEV2MEV + error *= GEV2MEV + + return value, error + + +def draw_info(lat_label, particle): + """ + Helper method to draw particle-dependent information on canvas + + Parameters + ---------- + - lat_label (TLatex): TLatex instance + - particle (int): particle ID + """ + + info = "" + fnonprompt = "" + if particle == D0: + info = "D^{0} #rightarrow K^{#font[122]{-}}#pi^{+} and charge conj." + # fnonprompt = "#it{f}_{ non-prompt}^{ raw} = 0.750 #pm 0.016 (stat.) #pm 0.008 (syst.)" + #fnonprompt = "#it{f}_{ non-prompt}^{ raw} = 0.531" + elif particle == DPLUS: + info = "D^{+} #rightarrow #pi^{+}K^{#font[122]{-}}#pi^{+} and charge conj." + fnonprompt = "#it{f}_{ non-prompt}^{ raw} = 0.787 #pm 0.022 (stat.) #pm 0.016 (syst.)" + elif particle == LAMBDAC_TO_PKPI: + info = "#Lambda_{c}^{+} #rightarrow pK^{#font[122]{-}}#pi^{+} and charge conj." + fnonprompt = "#it{f}_{ non-prompt}^{ raw} = 0.630 #pm 0.056 (stat.) #pm 0.050 (syst.)" + elif particle == LAMBDAC_TO_PK0S: + info = "#Lambda_{c}^{+} #rightarrow pK^{0}_{S} and charge conj." + fnonprompt = "#it{f}_{ non-prompt}^{ raw} = 0.549 #pm 0.138 (stat.) #pm 0.055 (syst.)" + + lat_label.DrawLatex(0.19, 0.85, info) + #lat_label.DrawLatex(0.19, 0.16, fnonprompt) + + +def save_canvas(canvas, particle, pt_mins, pt_maxs, i_pt, mult): + """ + Helper method to save canvas according to particle + + Parameters + ---------- + - canvas (TCanvas): a canvas + - particle (int): particle ID + """ + + out_dir = "/data8/majak/invmass-plots/" + name = "" + if particle == D0: + name = "Dzero" + elif particle == DPLUS: + name = "Dplus" + elif particle == LAMBDAC_TO_PKPI: + name = "LambdacToPKPi" + elif particle == LAMBDAC_TO_PK0S: + name = "LambdacToPKzeroShort" + + mult = f"{mult[i_pt]}_" if mult[i_pt] else "" + for ext in ["pdf", "png", "eps"]: + canvas.SaveAs(f"{out_dir}InvMassFit{name}_{mult}Pt_{pt_mins[i_pt]:.0f}_{pt_maxs[i_pt]:.0f}.{ext}") + + +# pylint: disable=too-many-locals,too-many-statements +def main(particle, i_pt, cfg, batch): + """ + Main method for a single bin (for article plots) + + Parameters + ---------- + - particle (int): particle ID + - i_pt (int): pT bin number + """ + + set_global_style(padtopmargin=0.07, padleftmargin=0.14, padbottommargin=0.125, titleoffsety=1.3, titleoffsetx=1., maxdigits=3) + + # import configurables + pt_mins = cfg["pp13.6TeVFD"]["PtMin"] + pt_maxs = cfg["pp13.6TeVFD"]["PtMax"] + mass_mins = cfg["pp13.6TeVFD"]["MassMin"] + mass_maxs = cfg["pp13.6TeVFD"]["MassMax"] + rebin = cfg["pp13.6TeVFD"]["Rebin"] + mult = cfg["pp13.6TeVFD"]["Mult"] + mult_latex = cfg["pp13.6TeVFD"]["MultLatex"] + + print(f"Plotting for {pt_mins[i_pt]}-{pt_maxs[i_pt]}") + + name_infile_promptEnhanced, name_infile_FDEnhanced = get_name_infile(particle, f"{pt_mins[i_pt]:.0f}{pt_maxs[i_pt]:.0f}") + + file_promptEnhanced = TFile.Open(name_infile_promptEnhanced) + #file_FDEnhanced = TFile.Open(name_infile_FDEnhanced) + + hmean_promptEnhanced = file_promptEnhanced.Get("hist_means_lc") + hsigma_promptEnhanced = file_promptEnhanced.Get("hist_sigmas_lc") + + #hmean_FDEnhanced = file_FDEnhanced.Get("hRawYieldsMean") + #hsigma_FDEnhanced = file_FDEnhanced.Get("hRawYieldsSigma") + + hsignal_promptEnhanced = file_promptEnhanced.Get("hist_rawyields_lc") + #hsignal_FDEnhanced = file_FDEnhanced.Get("hRawYields") + + mult_suffix = f"_{mult[i_pt]}" if mult[i_pt] else "" + name_hmass = f"hmass{pt_mins[i_pt]:.0f}{pt_maxs[i_pt]:.0f}{mult_suffix}" + print(f"file {name_infile_promptEnhanced} hist {name_hmass}") + hmass_promptEnhanced = file_promptEnhanced.Get(name_hmass) + #hmass_FDEnhanced = file_FDEnhanced.Get(name_hmass) + hmass_promptEnhanced.Rebin(rebin[i_pt]) + #hmass_FDEnhanced.Rebin(rebin[i_pt]) + + title_xaxis = get_title_xaxis(particle) + width_bin = hmass_promptEnhanced.GetBinWidth(i_pt+1) + bin_max = hmass_promptEnhanced.GetMaximumBin() + bin_min = hmass_promptEnhanced.GetMinimumBin() + + ymax_promptEnhanced = 1.2*(hmass_promptEnhanced.GetMaximum() + hmass_promptEnhanced.GetBinError(bin_max)) + ymin_promptEnhanced = 0.8*(hmass_promptEnhanced.GetMinimum() - hmass_promptEnhanced.GetBinError(bin_min)) + #ymin_FDEnhanced, ymax_FDEnhanced = 0., 1.2*(hmass_FDEnhanced.GetMaximum() + hmass_FDEnhanced.GetBinError(bin_max)) + + title = f"{pt_mins[i_pt]:.0f} < #it{{p}}_{{T}} < {pt_maxs[i_pt]:.0f} GeV/#it{{c}};{title_xaxis};" \ + f"Counts per {width_bin*GEV2MEV:.0f} MeV/#it{{c}}^{{2}}" + + #fit_tot_promptEnhanced = file_promptEnhanced.Get(f"totalTF_{pt_mins[i_pt]:.0f}_{pt_maxs[i_pt]:.0f}") + fit_tot_promptEnhanced = file_promptEnhanced.Get(f"total_func_lc_pt{pt_mins[i_pt]:.0f}_{pt_maxs[i_pt]:.0f}") + #fit_bkg_promptEnhanced = file_promptEnhanced.Get(f"bkgTF_{pt_mins[i_pt]:.0f}_{pt_maxs[i_pt]:.0f}") + fit_bkg_promptEnhanced = file_promptEnhanced.Get(f"bkg_0_lc_pt{pt_mins[i_pt]:.0f}_{pt_maxs[i_pt]:.0f}") + #fit_refl_promptEnhanced = file_promptEnhanced.Get(f"freflect;13") + + + + #fit_tot_FDEnhanced = file_FDEnhanced.Get(f"totalTF_{pt_mins[i_pt]:.0f}.0_{pt_maxs[i_pt]:.0f}.0") + #fit_bkg_FDEnhanced = file_FDEnhanced.Get(f"bkgTF_{pt_mins[i_pt]:.0f}.0_{pt_maxs[i_pt]:.0f}.0") + #fit_refl_FDEnhanced = file_FDEnhanced.Get(f"freflect;13") + + print("Calculating mean") + mean_promptEnhanced, err_mean_promptEnhanced = get_h_value_err(hmean_promptEnhanced, i_pt + 1, True) + #mean_FDEnhanced, err_mean_FDEnhanced = get_h_value_err(hmean_FDEnhanced, 13, True) + print("Calculating sigma") + sigma_promptEnhanced, _ = get_h_value_err(hsigma_promptEnhanced, i_pt + 1, True) + #sigma_FDEnhanced, _ = get_h_value_err(hsigma_FDEnhanced, 13, True) + print("Calculating yield") + signal_promptEnhanced, err_signal_promptEnhanced = get_h_value_err(hsignal_promptEnhanced, i_pt + 1) + #signal_FDEnhanced, err_signal_FDEnhanced = get_h_value_err(hsignal_FDEnhanced, 13) + + lat_alice = TLatex() + lat_alice.SetNDC() + lat_alice.SetTextSize(SIZE_TEXT_LAT_ALICE) + lat_alice.SetTextFont(43) + lat_alice.SetTextColor(kBlack) + + lat_label = TLatex() + lat_label.SetNDC() + lat_label.SetTextFont(43) + lat_label.SetTextColor(kBlack) + + # lat_label = TLatex() + # lat_label.SetNDC() + # lat_label.SetTextFont(43) + # lat_label.SetTextColor(kBlack) + + # str_mu = f"#it{{#mu}} = ({mean:.0f} #pm {err_mean:.0f}) MeV/#it{{c}}^{{2}}" + # str_sigma = f"#it{{#sigma}} = {sigma:.0f} MeV/#it{{c}}^{{2}}" + str_sig_promptEnhanced = f'#it{{S}} = {signal_promptEnhanced:.0f} #pm {err_signal_promptEnhanced:.0f}' + #str_sig_FDEnhanced = f'#it{{S}} = {signal_FDEnhanced:.0f} #pm {err_signal_FDEnhanced:.0f}' + + if particle == D0: + legend = TLegend(0.6, 0.54, 0.87, 0.75) + else: + legend = TLegend(0.62, 0.58, 0.85, 0.72) + legend.SetBorderSize(0) + legend.SetFillStyle(0) + legend.SetTextFont(43) + legend.SetTextSize(SIZE_TEXT_LEGEND) + legend.AddEntry(fit_tot_promptEnhanced, 'Total fit function', 'l') + legend.AddEntry(fit_bkg_promptEnhanced, '#splitline{Combinatorial}{background}', 'l') + if particle == D0: + legend.AddEntry(fit_refl_promptEnhanced, 'K#minus#pi reflected', 'l') + + c = TCanvas("c", "", WIDTH, HEIGHT) + # Create the first pad + pad1 = TPad("promptEnhanced", "Prompt Enhanced", 0., 0., 1., 1.) + if not pad1: + raise RuntimeError("Failed to create pad1") + pad1.Draw() + pad1.cd() # Switch to pad1 + frame_promptEnhanced = pad1.DrawFrame(mass_mins[i_pt], ymin_promptEnhanced, mass_maxs[i_pt], ymax_promptEnhanced, title) + frame_promptEnhanced.GetYaxis().SetDecimals() + + #c.cd() + # Create the second pad + #pad2 = TPad("NonPromptEnhanced", "Non-prompt enhanced", 0.5, 0., 1., 1.) + #if not pad2: + # raise RuntimeError("Failed to create pad2") + #pad2.Draw() + #pad2.cd() # Switch to pad2 + #frame_FDEnhanced = pad2.DrawFrame(mass_mins[i_pt], ymin_FDEnhanced, mass_maxs[i_pt], ymax_FDEnhanced, title) + #frame_FDEnhanced.GetYaxis().SetDecimals() + + + + #c.cd() + #pad1.cd() + set_object_style(hmass_promptEnhanced, linewidth=3, linecolor=kBlack, markersize=0.5) + set_object_style(fit_tot_promptEnhanced, linewidth=3, linecolor=kBlue) + set_object_style(fit_bkg_promptEnhanced, linewidth=3, linecolor=kRed, linestyle=2) + #set_object_style(fit_refl_promptEnhanced, linewidth=3, linecolor=kGreen+2, linestyle=9) + + hmass_promptEnhanced.Draw("sameE") + fit_bkg_promptEnhanced.Draw("same") + fit_tot_promptEnhanced.Draw("same") + #fit_refl_promptEnhanced.Draw("same") + + lat_alice.DrawLatex(0.19, 0.85, 'ALICE Preliminary') + lat_label.SetTextSize(SIZE_TEXT_LAT_LABEL_FOR_COLL_SYSTEM) + lat_label.DrawLatex(0.19, 0.79, 'pp,#kern[-0.08]{ #sqrt{#it{s}} = 13.6 TeV,}#kern[-0.08]{ #it{L}_{int} = 5 pb^{#minus1}}') + lat_label.SetTextSize(SIZE_TEXT_LAT_LABEL) + #draw_info(lat_label, particle) + lat_label.DrawLatex(0.19, 0.73, f'{pt_mins[i_pt]:.0f} < #it{{p}}_{{T}} < {pt_maxs[i_pt]:.0f} GeV/#it{{c}}') + #lat_label.DrawLatex(0.19, 0.3, 'Prompt enhanced') + #lat_label.DrawLatex(0.7, 0.85, '|#it{y}| < 0.5') + #fnonprompt_promptEnhanced = "#it{f}_{ non-prompt}^{ raw} = 0.246 #pm 0.007 (stat.)" # (4, 5) GeV + #fnonprompt_promptEnhanced = "#it{f}_{ non-prompt}^{ raw} = 0.30 #pm 0.02 (stat.)" # (0, 1) GeV + #lat_label.DrawLatex(0.19, 0.18, fnonprompt_promptEnhanced) + + # lat_label.DrawLatex(0.19, 0.64, str_mu) + # lat_label.DrawLatex(0.19, 0.58, str_sigma) + #lat_label.DrawLatex(0.19, 0.24, str_sig_promptEnhanced) + if mult_latex[i_pt]: + lat_label.DrawLatex(0.19, 0.24, mult_latex[i_pt]) + lat_label.DrawLatex(0.19, 0.18, "#Lambda_{c}^{#plus} #rightarrow pK^{#minus}#pi^{#plus} and charge conj.") + #lat_label.DrawLatex(0.19, 0.16, "#it{L}_{int} = 5 pb^{-1}") + + legend.Draw() + + #c.cd() + #pad2.cd() + #set_object_style(hmass_FDEnhanced, linewidth=3, linecolor=kBlack) + #set_object_style(fit_tot_FDEnhanced, linewidth=3, linecolor=kBlue) + #set_object_style(fit_bkg_FDEnhanced, linewidth=3, linecolor=kRed, linestyle=2) + #set_object_style(fit_refl_FDEnhanced, linewidth=3, linecolor=kGreen+2, linestyle=9) + #hmass_FDEnhanced.Draw("same") + #fit_bkg_FDEnhanced.Draw("same") + #fit_tot_FDEnhanced.Draw("same") + #fit_refl_FDEnhanced.Draw("same") + + #lat_alice.DrawLatex(0.19, 0.85, 'ALICE Preliminary') + #lat_label.SetTextSize(SIZE_TEXT_LAT_LABEL_FOR_COLL_SYSTEM) + #lat_label.DrawLatex(0.19, 0.79, 'pp, #sqrt{#it{s}} = 13.6 TeV') + #lat_label.SetTextSize(SIZE_TEXT_LAT_LABEL) + #draw_info(lat_label, particle) + #lat_label.DrawLatex(0.19, 0.3, 'Non-prompt enhanced') + #lat_label.DrawLatex(0.7, 0.85, '|#it{y}| < 0.5') + #fnonprompt_FDEnhanced = "#it{f}_{ non-prompt}^{ raw} = 0.690 #pm 0.008 (stat.)" # (4, 5) GeV + #fnonprompt_FDEnhanced = "#it{f}_{ non-prompt}^{ raw} = 0.70 #pm 0.02 (stat.)" # (0, 1) GeV + #lat_label.DrawLatex(0.19, 0.18, fnonprompt_FDEnhanced) + + # lat_label.DrawLatex(0.19, 0.64, str_mu) + # lat_label.DrawLatex(0.19, 0.58, str_sigma) + #lat_label.DrawLatex(0.19, 0.24, str_sig_FDEnhanced) + + #legend.Draw() + + #c.Update() + c.cd() + + save_canvas(c, particle, pt_mins, pt_maxs, i_pt, mult) + + if not batch: + input("Press enter to exit") + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Arguments") + parser.add_argument("config", metavar="text", default="config.yml", help="config file name for ml") + parser.add_argument("--batch", help="suppress video output", action="store_true") + args = parser.parse_args() + + print("Loading analysis configuration: ...", end="\r") + with open(args.config, "r", encoding="utf-8") as yml_cfg: + configuration = yaml.load(yml_cfg, yaml.FullLoader) + print("Loading analysis configuration: Done!") + + for i_pt in range(len(configuration["pp13.6TeVFD"]["PtMin"])): + main(particle=LAMBDAC_TO_PKPI, i_pt=i_pt, cfg=configuration, batch=args.batch) + # main(particle=DPLUS, i_pt=3, cfg=configuration, batch=args.batch) diff --git a/machine_learning_hep/scripts-dhadrons/remove_high_pt.py b/machine_learning_hep/scripts-dhadrons/remove_high_pt.py new file mode 100644 index 0000000000..3a71c36f40 --- /dev/null +++ b/machine_learning_hep/scripts-dhadrons/remove_high_pt.py @@ -0,0 +1,58 @@ +# pylint: disable=missing-function-docstring +""" +""" + +import argparse +import math +from array import array + +from ROOT import ( # pylint: disable=import-error,no-name-in-module + gROOT, + TFile, + TH1F +) + + +def main(): + """ + Main function. + """ + gROOT.SetBatch(True) + + parser = argparse.ArgumentParser(description="Arguments to pass") + parser.add_argument("filename", help="input file with histogram") + parser.add_argument("histname", help="histogram name pattern") + parser.add_argument("outname", help="output file for the new histogram") + parser.add_argument("maxval", type=float, help="maxval in histogram") + args = parser.parse_args() + + with TFile(args.filename) as fin, TFile(args.outname, "recreate") as fout: + objnames = fin.GetListOfKeys() + print(f"objnames : {objnames}") + histnames = [key.GetName() for key in fin.GetListOfKeys() if args.histname in key.GetName()] + print(f"histnames: {histnames}") + for histname in histnames: + hist = fin.Get(histname) + hist.SetDirectory(0) + last_bin = hist.GetXaxis().FindBin(args.maxval) + bins = [] + for binn in range(1, last_bin + 1): + bins.append(hist.GetBinLowEdge(binn)) + print(f"Hist bins {bins}") + hist2 = TH1F(histname, "", len(bins) - 1, array('d', bins)) + for binn in range(1, last_bin + 1): + hist2.SetBinContent(binn + 1, hist.GetBinContent(binn + 1)) + hist2.SetBinError(binn + 1, hist.GetBinError(binn + 1)) + #print(f"Setting bin {binn + 1} low edge {hist2.GetBinLowEdge(binn + 1)} up edge {hist2.GetXaxis().GetBinUpEdge(binn + 1)} content to content from bin {binn + 1}: {hist2.GetBinContent(binn + 1)}") + hist2.SetMarkerSize(hist.GetMarkerSize()) + hist2.SetMarkerColor(hist.GetMarkerColor()) + hist2.SetMarkerStyle(hist.GetMarkerStyle()) + hist2.SetLineWidth(hist.GetLineWidth()) + hist2.SetLineColor(hist.GetLineColor()) + hist2.SetLineStyle(hist.GetLineStyle()) + fout.cd() + hist2.Write() + + +if __name__ == "__main__": + main() diff --git a/machine_learning_hep/scripts/check_parquet.py b/machine_learning_hep/scripts/check_parquet.py deleted file mode 100644 index c2510128ca..0000000000 --- a/machine_learning_hep/scripts/check_parquet.py +++ /dev/null @@ -1,63 +0,0 @@ -import argparse -import numpy as np -import pandas as pd -import matplotlib.pyplot as plt - -def plot_parquet(df): - print(df["fY"]) - print(df["fY"][~np.isinf(df["fY"])]) - - ds_fin = df["fY"][~np.isinf(df["fY"])] - - fig = plt.figure(figsize=(20, 15)) - ax = plt.subplot(1, 1, 1) - #ax.set_xlim([0, (df["fY"].mean()*2)]) - plt.hist(ds_fin.values, bins=50) - ax.set_xlabel("fY", fontsize=30) - ax.set_ylabel("Entries", fontsize=30) - fig.savefig("fY.png", bbox_inches='tight') - plt.close(fig) - -def main(): - parser = argparse.ArgumentParser() - parser.add_argument("infile", help="file to process") - args = parser.parse_args() - - df = pd.read_parquet(args.infile) - print(f"df columns: {df.columns}") - #print(f"full df:\n{df}") - print(df.size) - - # 1-2: 36715937 - # sum from data: 1615501228 - # 2-3: 45167231 - # 3-4: 71973551 - # 4-5: 34874429 - # 5-6: - # 6-7: - # 7-8: - # 8-10: - # 10-12: - # 12-24: - - #print(f"df mean\n{df.mean()}") - - #print(f"df[0]\n{df.iloc[0]}") - - #df_sel = df[df["y_test_probxgboostbkg"] > 1.0] - #print(f"sel df bkg:\n{df_sel}") - #df_sel = df[df["y_test_probxgboostnon_prompt"] < 0.00] - #print(f"sel df non-prompt:\n{df_sel}") - #df_sel = df[df["y_test_probxgboostprompt"] < 0.00] - #print(f"sel df prompt:\n{df_sel}") - - print(f'ML columns:\n{df["fMlBkgScore"]}\n{df["fMlPromptScore"]}\n{df["fMlNonPromptScore"]}') - df_sel = df[df["fMlBkgScore"] > 1.0] - print(f'df sel ML bkg:\n{df_sel["fMlBkgScore"]}') - df_sel = df[df["fMlNonPromptScore"] < 0.0] - print(f'df sel ML non-prompt:\n{df_sel["fMlNonPromptScore"]}') - #print(f'df sel ML columns:\n{df_sel["fMlBkgScore"]}\n{df_sel["fMlNonPromptScore"]}') - - -if __name__ == '__main__': - main() diff --git a/machine_learning_hep/scripts/gather-inputs-fdd.sh b/machine_learning_hep/scripts/gather-inputs-fdd.sh deleted file mode 100755 index a9aedc0e56..0000000000 --- a/machine_learning_hep/scripts/gather-inputs-fdd.sh +++ /dev/null @@ -1,17 +0,0 @@ -#!/bin/bash - -MLHEP_DIR="/data8/majak/MLHEP" -OUTPUT_DIR="${MLHEP_DIR}/input-fd-012025" - -RESDIR_PATTERN="${MLHEP_DIR}/results-24012025-hyp-ml-luigi-cuts_" -PERM_PATTERN="fd_" - -for dir in "${RESDIR_PATTERN}${PERM_PATTERN}"0.[0-9][0-9][0-9]* ; do - suffix=${dir##"${RESDIR_PATTERN}"} - echo "$suffix" - - cp "${dir}/LHC24pp_mc/Results/resultsmctot/efficienciesLcpKpiRun3analysis.root" \ - "${OUTPUT_DIR}/efficienciesLcpKpiRun3analysis_${suffix}.root" - #cp "${dir}/LHC23pp_pass4/Results/resultsdatatot/yields_LcpKpi_Run3analysis.root" \ - # "${OUTPUT_DIR}/yieldsLcpKpiRun3analysis-${suffix}-fixed-sigma.root" -done diff --git a/machine_learning_hep/scripts/run-fdd-batch.sh b/machine_learning_hep/scripts/run-fdd-batch.sh deleted file mode 100755 index b7e9e88846..0000000000 --- a/machine_learning_hep/scripts/run-fdd-batch.sh +++ /dev/null @@ -1,58 +0,0 @@ -#!/bin/bash - -source "${HOME}/Run3Analysisvalidation/exec/utilities.sh" - -WORKDIR="${HOME}/MachineLearningHEP/machine_learning_hep/" -DATABASE="database_ml_parameters_LcToPKPi_multiclass_fdd" -DATABASE_EXT="${DATABASE}.yml" -DATABASE_PATH="${WORKDIR}/data/data_run3/${DATABASE_EXT}" -#RESDIR_PATTERN="results-24022025-prompt" -RESDIR_PATTERN="results-24022025-newtrain-ptshape-prompt" - -bkg=0.00 -for fd in $(seq 0.000 0.005 0.000) ; do - echo "fd ${fd}" - - #suffix="fd_${fd}" - suffix="" - RESDIR="${RESDIR_PATTERN}${suffix}" - RESPATH="/data8/majak/MLHEP/${RESDIR}/" - - #rm -rf "${RESPATH}" - - CUR_DB="${DATABASE}_edit_fd${fd}.yml" - cp "${DATABASE_PATH}" "${CUR_DB}" || ErrExit "Could not copy database" - - sed -i "s/%resdir%/${RESDIR}/g" "${CUR_DB}" || ErrExit "Could not edit database" - sed -i "s/%bkg01%/${bkg}/g" "${CUR_DB}" || ErrExit "Could not edit database" - sed -i "s/%bkg12%/${bkg}/g" "${CUR_DB}" || ErrExit "Could not edit database" - sed -i "s/%bkg23%/${bkg}/g" "${CUR_DB}" || ErrExit "Could not edit database" - sed -i "s/%bkg34%/${bkg}/g" "${CUR_DB}" || ErrExit "Could not edit database" - sed -i "s/%bkg45%/${bkg}/g" "${CUR_DB}" || ErrExit "Could not edit database" - sed -i "s/%bkg56%/${bkg}/g" "${CUR_DB}" || ErrExit "Could not edit database" - sed -i "s/%bkg67%/${bkg}/g" "${CUR_DB}" || ErrExit "Could not edit database" - sed -i "s/%bkg78%/${bkg}/g" "${CUR_DB}" || ErrExit "Could not edit database" - sed -i "s/%bkg810%/${bkg}/g" "${CUR_DB}" || ErrExit "Could not edit database" - sed -i "s/%bkg1012%/${bkg}/g" "${CUR_DB}" || ErrExit "Could not edit database" - sed -i "s/%bkg1216%/${bkg}/g" "${CUR_DB}" || ErrExit "Could not edit database" - sed -i "s/%bkg1624%/${bkg}/g" "${CUR_DB}" || ErrExit "Could not edit database" - sed -i "s/%fd01%/${fd}/g" "${CUR_DB}" || ErrExit "Could not edit database" - sed -i "s/%fd%/${fd}/g" "${CUR_DB}" || ErrExit "Could not edit database" - sed -i "s/%fd%/${fd}/g" "${CUR_DB}" || ErrExit "Could not edit database" - sed -i "s/%fd%/${fd}/g" "${CUR_DB}" || ErrExit "Could not edit database" - sed -i "s/%fd%/${fd}/g" "${CUR_DB}" || ErrExit "Could not edit database" - sed -i "s/%fd%/${fd}/g" "${CUR_DB}" || ErrExit "Could not edit database" - sed -i "s/%fd%/${fd}/g" "${CUR_DB}" || ErrExit "Could not edit database" - sed -i "s/%fd%/${fd}/g" "${CUR_DB}" || ErrExit "Could not edit database" - sed -i "s/%fd%/${fd}/g" "${CUR_DB}" || ErrExit "Could not edit database" - sed -i "s/%fd%/${fd}/g" "${CUR_DB}" || ErrExit "Could not edit database" - sed -i "s/%fd%/${fd}/g" "${CUR_DB}" || ErrExit "Could not edit database" - sed -i "s/%fd%/${fd}/g" "${CUR_DB}" || ErrExit "Could not edit database" - - yes | mlhep --log-file "logfile_${suffix}.log" \ - -a Run3analysis \ - --run-config submission/analyzer.yml \ - --database-analysis "${CUR_DB}" \ - --delete \ - > "debug_${suffix}.txt" 2>&1 || ErrExit "Analysis failed" -done diff --git a/machine_learning_hep/scripts/run-fdd-precise.sh b/machine_learning_hep/scripts/run-fdd-precise.sh deleted file mode 120000 index 9c226abd43..0000000000 --- a/machine_learning_hep/scripts/run-fdd-precise.sh +++ /dev/null @@ -1 +0,0 @@ -/home/maja/CERN-useful-scripts/run-fdd-precise.sh \ No newline at end of file diff --git a/machine_learning_hep/scripts/run-lc.sh b/machine_learning_hep/scripts/run-lc.sh deleted file mode 100755 index f7c2e19e3e..0000000000 --- a/machine_learning_hep/scripts/run-lc.sh +++ /dev/null @@ -1,15 +0,0 @@ -#!/bin/bash - -if [ "$#" -ne 3 ]; then - echo "Wrong number of parameters" - exit 1 -fi - -DB=$1 -CONFIG=$2 -LOGFILE=$3 - -mlhep --log-file ${LOGFILE} \ - -a Run3analysis \ - --run-config ${CONFIG} \ - --database-analysis ${DB} From b913579c57fd446c2c7753f20d802cf9494c17e6 Mon Sep 17 00:00:00 2001 From: saganatt <8majak8@gmail.com> Date: Thu, 17 Jul 2025 10:14:41 +0200 Subject: [PATCH 06/34] Add JSON configs to git --- .gitignore | 1 - .../compare_prompt_fraction.py | 62 +++++++ .../scripts-dhadrons/config_bdt_cuts.json | 55 ++++++ .../config_compare_fractions.json | 85 +++++++++ .../config_crosssec_run2_run3.json | 45 +++++ .../scripts-dhadrons/config_fitting.json | 112 ++++++++++++ .../scripts-dhadrons/config_fractions.json | 11 ++ .../config_graph_frac_run3.json | 29 +++ .../scripts-dhadrons/config_mult_ratios.json | 168 ++++++++++++++++++ .../scripts-dhadrons/config_pt_weight.json | 26 +++ .../scripts-dhadrons/config_run2.json | 25 +++ .../scripts-dhadrons/config_run3.json | 48 +++++ .../scripts-dhadrons/config_run3_run2.json | 56 ++++++ .../scripts-dhadrons/config_run3d0.json | 64 +++++++ .../scripts-dhadrons/config_spans_cutvar.json | 26 +++ .../scripts-dhadrons/config_track_tuner.json | 56 ++++++ 16 files changed, 868 insertions(+), 1 deletion(-) create mode 100644 machine_learning_hep/scripts-dhadrons/compare_prompt_fraction.py create mode 100644 machine_learning_hep/scripts-dhadrons/config_bdt_cuts.json create mode 100644 machine_learning_hep/scripts-dhadrons/config_compare_fractions.json create mode 100644 machine_learning_hep/scripts-dhadrons/config_crosssec_run2_run3.json create mode 100644 machine_learning_hep/scripts-dhadrons/config_fitting.json create mode 100644 machine_learning_hep/scripts-dhadrons/config_fractions.json create mode 100644 machine_learning_hep/scripts-dhadrons/config_graph_frac_run3.json create mode 100644 machine_learning_hep/scripts-dhadrons/config_mult_ratios.json create mode 100644 machine_learning_hep/scripts-dhadrons/config_pt_weight.json create mode 100644 machine_learning_hep/scripts-dhadrons/config_run2.json create mode 100644 machine_learning_hep/scripts-dhadrons/config_run3.json create mode 100644 machine_learning_hep/scripts-dhadrons/config_run3_run2.json create mode 100644 machine_learning_hep/scripts-dhadrons/config_run3d0.json create mode 100644 machine_learning_hep/scripts-dhadrons/config_spans_cutvar.json create mode 100644 machine_learning_hep/scripts-dhadrons/config_track_tuner.json diff --git a/.gitignore b/.gitignore index 74e08e3562..49a17f54a4 100644 --- a/.gitignore +++ b/.gitignore @@ -40,7 +40,6 @@ machine_learning_hep/LckINT7HighMultwithJets dataframes_* plots_* output_* -*.json *.h5 *.png *.log diff --git a/machine_learning_hep/scripts-dhadrons/compare_prompt_fraction.py b/machine_learning_hep/scripts-dhadrons/compare_prompt_fraction.py new file mode 100644 index 0000000000..3f4962e71f --- /dev/null +++ b/machine_learning_hep/scripts-dhadrons/compare_prompt_fraction.py @@ -0,0 +1,62 @@ +import argparse +import glob +import json +import re +import matplotlib.pyplot as plt + +from ROOT import ( # pylint: disable=import-error,no-name-in-module + TFile, + gROOT, +) + + +def get_fractions(cfg): + filenames = sorted(glob.glob(cfg["file_pattern"])) + fractions = {} + fractions_err = {} + fd_cuts = [] + for pt_bin_min, pt_bin_max in zip(cfg["pt_bins_min"], cfg["pt_bins_max"]): + fractions[f"{pt_bin_min}_{pt_bin_max}"] = [] + fractions_err[f"{pt_bin_min}_{pt_bin_max}"] = [] + for filename in filenames: + with TFile.Open(filename) as fin: + hist = fin.Get(cfg["histoname"]) + dirname = re.search(cfg["dir_pattern"], filename).group(0) + fd_cut = re.split("_", dirname)[-1] + fd_cuts.append(fd_cut) + for ind, (pt_bin_min, pt_bin_max) in enumerate(zip(cfg["pt_bins_min"], cfg["pt_bins_max"])): + fractions[f"{pt_bin_min}_{pt_bin_max}"].append(hist.GetPointY(ind + 1)) + fractions_err[f"{pt_bin_min}_{pt_bin_max}"].append(hist.GetErrorY(ind + 1)) + print(f"final fractions:\n{fractions}\nfd_cuts:\n{fd_cuts}\nfractions error:\n{fractions_err}") + return fractions, fractions_err, fd_cuts + + +def main(): + gROOT.SetBatch(True) + + parser = argparse.ArgumentParser(description="Arguments to pass") + parser.add_argument("config", help="JSON config file") + args = parser.parse_args() + + with open(args.config, encoding="utf8") as fil: + cfg = json.load(fil) + + fractions, fractions_err, fd_cuts = get_fractions(cfg) + + for pt_bin_min, pt_bin_max in zip(cfg["pt_bins_min"], cfg["pt_bins_max"]): + plt.figure(figsize=(20, 15)) + ax = plt.subplot(1, 1, 1) + ax.set_xlabel(cfg["x_axis"]) + ax.set_ylabel(cfg["y_axis"]) + ax.set_ylim([0.0, 1.0]) + ax.tick_params(labelsize=20) + plt.grid(linestyle="-", linewidth=2) + plt.errorbar(fd_cuts, fractions[f"{pt_bin_min}_{pt_bin_max}"], + yerr=fractions_err[f"{pt_bin_min}_{pt_bin_max}"], + c="b", elinewidth=2.5, linewidth=4.0) + ax.set_xticks(ax.get_xticks()[::10]) + plt.savefig(f'{cfg["outdir"]}/{cfg["outfile"]}_{pt_bin_min}_{pt_bin_max}.png') + + +if __name__ == "__main__": + main() diff --git a/machine_learning_hep/scripts-dhadrons/config_bdt_cuts.json b/machine_learning_hep/scripts-dhadrons/config_bdt_cuts.json new file mode 100644 index 0000000000..bbac88dbdd --- /dev/null +++ b/machine_learning_hep/scripts-dhadrons/config_bdt_cuts.json @@ -0,0 +1,55 @@ +{ + "inputdir": "/data8/majak/systematics/032025/bdt", + "histoname": "hCorrFracNonPrompt", + "default": "Default", + "hists": { + "Narrow left": { + "file": [ + "CutVarLc_pp13TeV_LHC23_pass4_narrow_left.root" + ] + }, + "Narrow right": { + "file": [ + "CutVarLc_pp13TeV_LHC23_pass4_narrow_right.root" + ] + }, + "Narrow both": { + "file": [ + "CutVarLc_pp13TeV_LHC23_pass4_narrow_both.root" + ] + }, + "Wide left": { + "file": [ + "CutVarLc_pp13TeV_LHC23_pass4_wide.root" + ] + }, + "Wide right": { + "file": [ + "CutVarLc_pp13TeV_LHC23_pass4_wide_right.root" + ] + }, + "Wide both": { + "file": [ + "CutVarLc_pp13TeV_LHC23_pass4_wide_both.root" + ] + }, + "Default": { + "file": [ + "CutVarLc_pp13TeV_LHC23_pass4_default.root" + ] + } + }, + "bin_min": [1,2,3,4,5,6,7,8,10,12,16], + "bin_max": [2,3,4,5,6,7,8,10,12,16,24], + "y_axis": "Non-prompt #Lambda_{c}^{#plus} fraction", + "legend": [0.50, 0.18, 0.90, 0.38], + "legend_p": [0.20, 0.18, 0.60, 0.38], + "legend_np": [0.50, 0.18, 0.90, 0.38], + "legend_ratio": [0.50, 0.70, 0.90, 0.90], + "legend_ratio_p": [0.20, 0.20, 0.60, 0.40], + "legend_ratio_np": [0.50, 0.70, 0.90, 0.90], + "output": { + "outdir": "/data8/majak/systematics/032025/bdt", + "file": "NP_Frac_pp13TeV_bdt_1-24" + } +} diff --git a/machine_learning_hep/scripts-dhadrons/config_compare_fractions.json b/machine_learning_hep/scripts-dhadrons/config_compare_fractions.json new file mode 100644 index 0000000000..e4c2d12371 --- /dev/null +++ b/machine_learning_hep/scripts-dhadrons/config_compare_fractions.json @@ -0,0 +1,85 @@ +{ + "inputdir": "/data8/majak/systematics/230824/fitting", + "histoname": "hCorrFracNonPrompt", + "default": "Default", + "hists": { + "Poly3 bkg": { + "file": [ + "CutVarLc_pp13TeV_LHC24d3_poly3_1-2_12.root", + "CutVarLc_pp13TeV_LHC24d3_poly3_12.root", + "CutVarLc_pp13TeV_LHC24d3_poly3_12.root", + "CutVarLc_pp13TeV_LHC24d3_poly3_4-5_12.root", + "CutVarLc_pp13TeV_LHC24d3_poly3_5-6_12.root", + "CutVarLc_pp13TeV_LHC24d3_poly3_6-8_12.root", + "CutVarLc_pp13TeV_LHC24d3_poly3_8-12_12.root" + ] + }, + "Double gauss signal": { + "file": [ + "CutVarLc_pp13TeV_LHC24d3_double_gauss_1-2_12.root", + "CutVarLc_pp13TeV_LHC24d3_double_gauss_2-3_12.root", + "CutVarLc_pp13TeV_LHC24d3_double_gauss_12.root", + "CutVarLc_pp13TeV_LHC24d3_double_gauss_4-5_12.root", + "CutVarLc_pp13TeV_LHC24d3_double_gauss_5-6_12.root", + "CutVarLc_pp13TeV_LHC24d3_double_gauss_6-8_12.root", + "CutVarLc_pp13TeV_LHC24d3_double_gauss_8-12_12.root" + ] + }, + "Rebin +1": { + "file": [ + "CutVarLc_pp13TeV_LHC24d3_rebin+1_1-2_12.root", + "CutVarLc_pp13TeV_LHC24d3_rebin+1_12.root", + "CutVarLc_pp13TeV_LHC24d3_rebin+1_12.root", + "CutVarLc_pp13TeV_LHC24d3_rebin+1_4-5_12.root", + "CutVarLc_pp13TeV_LHC24d3_rebin+1_5-6_12.root", + "CutVarLc_pp13TeV_LHC24d3_rebin+1_6-8_12.root", + "CutVarLc_pp13TeV_LHC24d3_rebin+1_8-12_12.root" + ] + }, + "Range wider by 0.04": { + "file": [ + "CutVarLc_pp13TeV_LHC24d3_wide4_1-2_12.root", + "CutVarLc_pp13TeV_LHC24d3_wide4_12.root", + "CutVarLc_pp13TeV_LHC24d3_wide4_12.root", + "CutVarLc_pp13TeV_LHC24d3_wide4_12.root", + "CutVarLc_pp13TeV_LHC24d3_wide4_5-6_12.root", + "CutVarLc_pp13TeV_LHC24d3_wide4_6-8_12.root", + "CutVarLc_pp13TeV_LHC24d3_wide4_8-12_12.root" + ] + }, + "Rebin -1": { + "file": [ + "CutVarLc_pp13TeV_LHC24d3_rebin+1_1-2_12.root", + "CutVarLc_pp13TeV_LHC24d3_rebin+1_12.root", + "CutVarLc_pp13TeV_LHC24d3_rebin+1_12.root", + "CutVarLc_pp13TeV_LHC24d3_rebin+1_4-5_12.root", + "CutVarLc_pp13TeV_LHC24d3_rebin+1_5-6_12.root", + "CutVarLc_pp13TeV_LHC24d3_rebin+1_6-8_12.root", + "CutVarLc_pp13TeV_LHC24d3_rebin+1_8-12_12.root" + ] + }, + "Range wider by 0.02": { + "file": [ + "CutVarLc_pp13TeV_LHC24d3_wide2_1-2_12.root", + "CutVarLc_pp13TeV_LHC24d3_wide2_12.root", + "CutVarLc_pp13TeV_LHC24d3_wide2_12.root", + "CutVarLc_pp13TeV_LHC24d3_wide2_4-5_12.root", + "CutVarLc_pp13TeV_LHC24d3_wide2_5-6_12.root", + "CutVarLc_pp13TeV_LHC24d3_wide2_6-8_12.root", + "CutVarLc_pp13TeV_LHC24d3_wide2_8-12_12.root" + ] + }, + "Default": { + "file": [ + "CutVarLc_pp13TeV_LHC24d3_default_12.root" + ] + } + }, + "bin_min": [1,2,3,4,5,6,8,12], + "bin_max": [2,3,4,5,6,8,12,24], + "y_axis": "Non-prompt #Lambda_{c}^{#plus} fraction", + "output": { + "outdir": "/data8/majak/systematics/230824/fitting", + "file": "NP_Frac_pp13TeV_fitting" + } +} diff --git a/machine_learning_hep/scripts-dhadrons/config_crosssec_run2_run3.json b/machine_learning_hep/scripts-dhadrons/config_crosssec_run2_run3.json new file mode 100644 index 0000000000..7bd4b99da3 --- /dev/null +++ b/machine_learning_hep/scripts-dhadrons/config_crosssec_run2_run3.json @@ -0,0 +1,45 @@ +{ + "inputdir": "/data8/majak/crosssec/032025", + "histoname": "hptspectrum", + "default": "#sqrt{#it{s}} = 13 TeV", + "hists": { + "#sqrt{#it{s}} = 13 TeV": { + "file": [ + "LcpKpi_generated_Run2.root" + ], + "systematics": [ + "LcpKpi_generated_Run2.root", + "graph_syst" + ] + }, + "#sqrt{#it{s}} = 13.6 TeV": { + "file": [ + "finalcrossLcpKpiRun3analysis_0-1_scaled_6.root" + ], + "systematics": [ + [0.0], + [0.19], + [0.16], + [0.16], + [0.16], + [0.16], + [0.16], + [0.16], + [0.16], + [0.16], + [0.16], + [0.24] + ] + } + }, + "y_axis": "d^{2}#it{#sigma}/(d#it{y}d#it{p}_{T}) (#mub GeV^{-1}#it{c})", + "alice_text": "pp, |#kern[0.06]{#it{y}| < 0.5}", + "_alice_text": "#splitline{#Lambda_{c}^{#plus} (and charge conj.)}{|#it{y}| < 0.5}", + "legend": [0.52, 0.50, 0.89, 0.77], + "legend_ratio": [0.40, 0.60, 0.90, 0.90], + "log_scale": true, + "output": { + "outdir": "/data8/majak/crosssec/032025", + "file": "crosssec_Lc_run2_run3_review" + } +} diff --git a/machine_learning_hep/scripts-dhadrons/config_fitting.json b/machine_learning_hep/scripts-dhadrons/config_fitting.json new file mode 100644 index 0000000000..b2ddb200c5 --- /dev/null +++ b/machine_learning_hep/scripts-dhadrons/config_fitting.json @@ -0,0 +1,112 @@ +{ + "inputdir": "/data8/majak/systematics/230824/fitting", + "histoname": "hCorrFracNonPrompt", + "default": "Default", + "hists": { + "Poly3 bkg": { + "file": [ + "CutVarLc_pp13TeV_LHC24d3_poly3_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_poly3_1-2_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_poly3_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_poly3_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_poly3_4-5_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_poly3_5-6_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_poly3_6-8_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_poly3_8-12_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_poly3_12-16_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_poly3_16-24_0-24.root" + ] + }, + "Double gauss signal": { + "file": [ + "CutVarLc_pp13TeV_LHC24d3_double_gauss_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_double_gauss_1-2_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_double_gauss_2-3_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_double_gauss_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_double_gauss_4-5_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_double_gauss_5-6_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_double_gauss_6-8_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_double_gauss_8-12_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_double_gauss_12-16_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_double_gauss_16-24_0-24.root" + ] + }, + "Rebin +1": { + "file": [ + "CutVarLc_pp13TeV_LHC24d3_rebin+1_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_rebin+1_1-2_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_rebin+1_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_rebin+1_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_rebin+1_4-5_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_rebin+1_5-6_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_rebin+1_6-8_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_rebin+1_8-12_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_rebin+1_12-16_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_rebin+1_16-24_0-24.root" + ] + }, + "Range changed by 0.04": { + "file": [ + "CutVarLc_pp13TeV_LHC24d3_wide4_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_wide4_1-2_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_wide4_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_wide4_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_wide4_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_wide4_5-6_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_wide4_6-8_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_wide4_8-12_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_narrow4_12-16_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_narrow4_16-24_0-24.root" + ] + }, + "Rebin -1": { + "file": [ + "CutVarLc_pp13TeV_LHC24d3_rebin-1_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_rebin-1_1-2_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_rebin-1_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_rebin-1_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_rebin-1_4-5_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_rebin-1_5-6_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_rebin-1_6-8_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_rebin-1_8-12_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_rebin-1_12-16_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_rebin-1_16-24_0-24.root" + ] + }, + "Range changed by 0.02": { + "file": [ + "CutVarLc_pp13TeV_LHC24d3_wide2_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_wide2_1-2_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_wide2_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_wide2_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_wide2_4-5_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_wide2_5-6_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_wide2_6-8_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_wide2_8-12_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_narrow2_12-16_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_narrow2_16-24_0-24.root" + ] + }, + "Default": { + "file": [ + "CutVarLc_pp13TeV_LHC24d3_default_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_default_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_default_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_default_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_default_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_default_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_default_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_default_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_default_1-24_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_default_1-24_0-24.root" + ] + } + }, + "bin_min": [1,2,3,4,5,6,8,12,16], + "bin_max": [2,3,4,5,6,8,12,16,24], + "y_axis": "Non-prompt #Lambda_{c}^{#plus} fraction", + "output": { + "outdir": "/data8/majak/systematics/230824/fitting", + "file": "NP_Frac_pp13TeV_fitting_1-24" + } +} diff --git a/machine_learning_hep/scripts-dhadrons/config_fractions.json b/machine_learning_hep/scripts-dhadrons/config_fractions.json new file mode 100644 index 0000000000..cb16754988 --- /dev/null +++ b/machine_learning_hep/scripts-dhadrons/config_fractions.json @@ -0,0 +1,11 @@ +{ + "file_pattern": "/data8/majak/MLHEP/results-24022025-newtrain_fd_0.[0-9][0-9]0/LHC23pp_pass4/Results/resultsdatatot/finalcrossLcpKpiRun3analysis.root", + "dir_pattern": "results-24022025-newtrain_fd_0.[0-9][0-9]0", + "histoname": "gfraction", + "pt_bins_min": [0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 16], + "pt_bins_max": [1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 16, 24], + "x_axis": "non-prompt cut", + "y_axis": "#Lambda_{c} prompt fraction", + "outdir": ".", + "outfile": "fraction" +} diff --git a/machine_learning_hep/scripts-dhadrons/config_graph_frac_run3.json b/machine_learning_hep/scripts-dhadrons/config_graph_frac_run3.json new file mode 100644 index 0000000000..bf1b708e9a --- /dev/null +++ b/machine_learning_hep/scripts-dhadrons/config_graph_frac_run3.json @@ -0,0 +1,29 @@ +{ + "inputdir": "/data8/majak/crosssec/202502/", + "histoname": "gfraction", + "default": "#Lambda_{c}^{#plus} #rightarrow pK^{#minus}#pi^{#plus}, pp, #sqrt{#it{s}} = 13.6 TeV", + "hists": { + "#Lambda_{c}^{#plus} #rightarrow pK^{#minus}#pi^{#plus}, pp, #sqrt{#it{s}} = 13.6 TeV": { + "file": [ + "finalcrossLcpKpiRun3analysis_dd.root" + ] + }, + "#Lambda_{c}^{#plus} #rightarrow pK^{#minus}#pi^{#plus}, pp, #sqrt{#it{s}} = 13.6 TeV, wide span": { + "file": [ + "finalcrossLcpKpiRun3analysis_dd_wide.root" + ] + }, + "#Lambda_{c}^{#plus} #rightarrow pK^{#minus}#pi^{#plus}, pp, #sqrt{#it{s}} = 13.6 TeV, Nb method": { + "file": [ + "finalcrossLcpKpiRun3analysis_roofit.root" + ] + } + }, + "y_axis": "#Lambda_{c} prompt fraction", + "legend": [0.25, 0.18, 0.70, 0.38], + "legend_ratio": [0.40, 0.10, 0.90, 0.35], + "output": { + "outdir": "/data8/majak/crosssec/202502/", + "file": "graph_frac_Lc_run3_Nb" + } +} diff --git a/machine_learning_hep/scripts-dhadrons/config_mult_ratios.json b/machine_learning_hep/scripts-dhadrons/config_mult_ratios.json new file mode 100644 index 0000000000..eaaec0b238 --- /dev/null +++ b/machine_learning_hep/scripts-dhadrons/config_mult_ratios.json @@ -0,0 +1,168 @@ +{ + "inputdir": "/data8/majak/mult-ratios", + "histoname": "hptspectrum", + "default": "MB", + "model_default": "Monash MB", + "hists": { + "MB": { + "file": [ + "spectrum_MB_scaled.root" + ], + "systematics": [ + [0.07], + [0.06], + [0.06], + [0.05], + [0.05], + [0.05], + [0.05], + [0.06], + [0.12], + [0.20] + ] + }, + "4.34": { + "file": [ + "spectrum_2_scaled.root" + ], + "systematics": [ + [0.10], + [0.05], + [0.05], + [0.05], + [0.06], + [0.07], + [0.10], + [0.10], + [0.16], + [0.20] + ] + }, + "5.58": { + "file": [ + "spectrum_3_scaled.root" + ], + "systematics": [ + [0.09], + [0.05], + [0.05], + [0.05], + [0.05], + [0.06], + [0.07], + [0.07], + [0.11], + [0.20] + ] + }, + "7.93": { + "file": [ + "spectrum_4_scaled.root" + ], + "systematics": [ + [0.10], + [0.05], + [0.05], + [0.05], + [0.05], + [0.05], + [0.06], + [0.10], + [0.10], + [0.20] + ] + }, + "11.42": { + "file": [ + "spectrum_5_scaled.root" + ], + "systematics": [ + [0.10], + [0.06], + [0.06], + [0.06], + [0.06], + [0.06], + [0.06], + [0.09], + [0.10], + [0.12] + ] + }, + "15.94": { + "file": [ + "spectrum_6_scaled.root" + ], + "systematics": [ + [0.15], + [0.10], + [0.06], + [0.06], + [0.06], + [0.06], + [0.06], + [0.09], + [0.10], + [0.12] + ] + }, + "20.07": { + "file": [ + "spectrum_7_scaled.root" + ], + "systematics": [ + [0.22], + [0.12], + [0.11], + [0.06], + [0.06], + [0.10], + [0.10], + [0.10], + [0.14], + [0.20] + ] + } + }, + "models": { + "Monash MB": { + "file": "new_MBratio_Monash.root", + "histoname": "hpythia_prompt" + }, + "Monash 7085": { + "file": "new_MBratio_Monash.root", + "histoname": "hpythia_prompt_7085" + }, + "Monash 5070": { + "file": "new_MBratio_Monash.root", + "histoname": "hpythia_prompt_5070" + }, + "Monash 3050": { + "file": "new_MBratio_Monash.root", + "histoname": "hpythia_prompt_3050" + }, + "Monash 1030": { + "file": "new_MBratio_Monash.root", + "histoname": "hpythia_prompt_1030" + }, + "Monash 110": { + "file": "new_MBratio_Monash.root", + "histoname": "hpythia_prompt_110" + }, + "Monash 0-1": { + "file": "new_MBratio_Monash.root", + "histoname": "hpythia_prompt_01" + } + }, + "y_axis": "d^{2}#it{N}/(d#it{y}d#it{p}_{T})#left|_{mult.} / d^{2}#it{N}/(d#it{y}d#it{p}_{T})#right|_{INEL > 0}", + "alice_text": "Prompt#kern[-0.3]{ #Lambda_{c}^{#plus}}", + "legend": [0.50, 0.65, 0.90, 0.93], + "legend_models": [0.50, 0.65, 0.90, 0.93], + "legend_ratio": [0.60, 0.63, 0.90, 0.88], + "legend_ratio_models": [0.18, 0.18, 0.40, 0.23], + "log_scale": true, + "output": { + "outdir": "/data8/majak/mult-ratios", + "file": "mult_ratios_review" + } +} diff --git a/machine_learning_hep/scripts-dhadrons/config_pt_weight.json b/machine_learning_hep/scripts-dhadrons/config_pt_weight.json new file mode 100644 index 0000000000..967b72a65b --- /dev/null +++ b/machine_learning_hep/scripts-dhadrons/config_pt_weight.json @@ -0,0 +1,26 @@ +{ + "inputdir": "/data8/majak/systematics/032025/pt-weight", + "histoname": "hptspectrum", + "default": "Default", + "hists": { + "Weighted #it{p}_{T}": { + "file": [ + "finalcrossLcpKpiRun3analysis_ptshape.root" + ] + }, + "Default": { + "file": [ + "finalcrossLcpKpiRun3analysis.root" + ] + } + }, + "bin_min": [1,2,3,4,5,6,7,8,10,12,16], + "bin_max": [2,3,4,5,6,7,8,10,12,16,24], + "y_axis": "#Lambda_{c} cross section (pb)", + "legend": [0.50, 0.18, 0.90, 0.38], + "legend_ratio": [0.50, 0.70, 0.90, 0.90], + "output": { + "outdir": "/data8/majak/systematics/032025/pt-weight", + "file": "crosssec_pt_weight_1-24" + } +} diff --git a/machine_learning_hep/scripts-dhadrons/config_run2.json b/machine_learning_hep/scripts-dhadrons/config_run2.json new file mode 100644 index 0000000000..a3a0944879 --- /dev/null +++ b/machine_learning_hep/scripts-dhadrons/config_run2.json @@ -0,0 +1,25 @@ +{ + "inputdir": "/data8/majak/systematics/220724/run2-run3d0", + "histoname": "hCorrFracNonPrompt", + "default": "Run 2 #Lambda_{c}^{#plus} #rightarrow pK_{S}^{0}", + "hists": { + "Run 2 #Lambda_{c}^{#plus} #rightarrow pK_{S}^{0}": { + "file": [ + "CutVarLc2pK0s_pp13TeV.root" + ], + "systematics": [ + [0.04, 0.09, 0.09, 0.04, 0.03, 0.016, 0.05], + [0.04, 0.09, 0.05, 0.04, 0.01, 0.016, 0.05], + [0.06, 0.09, 0.08, 0.04, 0.01, 0.016, 0.05], + [0.06, 0.09, 0.08, 0.04, 0.01, 0.016, 0.05] + ] + } + }, + "bin_min": [1,2,3,4,5,6,8,12], + "bin_max": [2,3,4,5,6,8,12,24], + "y_axis": "Non-prompt #Lambda_{c} fraction", + "output": { + "outdir": "/data8/majak/systematics/220724/run2-run3d0", + "file": "NP_Frac_pp13TeV_run2_only" + } +} diff --git a/machine_learning_hep/scripts-dhadrons/config_run3.json b/machine_learning_hep/scripts-dhadrons/config_run3.json new file mode 100644 index 0000000000..c53d281141 --- /dev/null +++ b/machine_learning_hep/scripts-dhadrons/config_run3.json @@ -0,0 +1,48 @@ +{ + "inputdir": "/data8/majak/systematics/230824/run3", + "histoname": "hCorrFracNonPrompt", + "default": "#splitline{#Lambda_{c}^{#plus} #rightarrow pK^{#minus}#pi^{#plus}, pp,}{#sqrt{#it{s}} = 13.6 TeV}", + "hists": { + "#splitline{#Lambda_{c}^{#plus} #rightarrow pK^{#minus}#pi^{#plus}, pp,}{#sqrt{#it{s}} = 13.6 TeV}": { + "file": [ + "CutVarLc_pp13TeV_LHC24d3_default_0-25.root", + "CutVarLc_pp13TeV_LHC24d3_default_0-25.root", + "CutVarLc_pp13TeV_LHC24d3_default_0-25.root", + "CutVarLc_pp13TeV_LHC24d3_default_0-25.root", + "CutVarLc_pp13TeV_LHC24d3_default_0-25.root", + "CutVarLc_pp13TeV_LHC24d3_default_0-25.root", + "CutVarLc_pp13TeV_LHC24d3_default_0-25.root", + "CutVarLc_pp13TeV_LHC24d3_default_0-25.root", + "CutVarLc_pp13TeV_LHC24d3_default_1-24_0-25.root", + "CutVarLc_pp13TeV_LHC24d3_default_1-24_0-25.root" + ], + "systematics": [ + [0.0], + [0.17], + [0.16], + [0.16], + [0.16], + [0.16], + [0.16], + [0.18], + [0.20], + [0.24], + [0.0] + ] + } + }, + "models": { + "Monash": { + "file": "NonPromptLcFraction_Monash_1B.root", + "histoname": "h_nonprompt_fraction" + } + }, + "bin_min": [1,2,3,4,5,6,8,12,16], + "bin_max": [2,3,4,5,6,8,12,16,24], + "y_axis": "#it{f}_{non-prompt}", + "alice_text": "#Lambda_{c}^{#plus} baryon, |#it{y}| < 0.5", + "output": { + "outdir": "/data8/majak/systematics/230824/run3", + "file": "NP_Frac_pp13TeV_run3_1-24" + } +} diff --git a/machine_learning_hep/scripts-dhadrons/config_run3_run2.json b/machine_learning_hep/scripts-dhadrons/config_run3_run2.json new file mode 100644 index 0000000000..5c946cd99b --- /dev/null +++ b/machine_learning_hep/scripts-dhadrons/config_run3_run2.json @@ -0,0 +1,56 @@ +{ + "inputdir": "/data8/majak/systematics/230824/run3-run2", + "histoname": "hCorrFracNonPrompt", + "default": "#Lambda_{c}^{#plus}#rightarrowpK^{#minus}#pi^{#plus}, pp, #sqrt{#it{s}} = 13.6 TeV", + "hists": { + "#Lambda_{c}^{#plus}#rightarrowpK^{#minus}#pi^{#plus}, pK_{S}^{0}, pp, #sqrt{#it{s}} = 13 TeV": { + "file": [ + "CutVarLcMerged_pp13TeV_0-25.root" + ], + "systematics": [ + [0.0], + [0.08], + [0.08], + [0.08], + [0.08], + [0.13], + [0.0] + ] + }, + "#Lambda_{c}^{#plus}#rightarrowpK^{#minus}#pi^{#plus}, pp, #sqrt{#it{s}} = 13.6 TeV": { + "file": [ + "CutVarLc_pp13TeV_LHC24d3_default_0-25.root", + "CutVarLc_pp13TeV_LHC24d3_default_0-25.root", + "CutVarLc_pp13TeV_LHC24d3_default_0-25.root", + "CutVarLc_pp13TeV_LHC24d3_default_0-25.root", + "CutVarLc_pp13TeV_LHC24d3_default_0-25.root", + "CutVarLc_pp13TeV_LHC24d3_default_0-25.root", + "CutVarLc_pp13TeV_LHC24d3_default_0-25.root", + "CutVarLc_pp13TeV_LHC24d3_default_0-25.root", + "CutVarLc_pp13TeV_LHC24d3_default_1-24_0-25.root", + "CutVarLc_pp13TeV_LHC24d3_default_1-24_0-25.root" + ], + "systematics": [ + [0.0], + [0.17], + [0.16], + [0.16], + [0.16], + [0.16], + [0.16], + [0.18], + [0.20], + [0.24], + [0.0] + ] + } + }, + "bin_min": [1,2,3,4,5,6,8,12,16], + "bin_max": [2,3,4,5,6,8,12,16,24], + "y_axis": "#it{f}_{non-prompt}", + "alice_text": "#Lambda_{c}^{#plus} and charge conj., |#it{y}| < 0.5", + "output": { + "outdir": "/data8/majak/systematics/230824/run3-run2", + "file": "NP_Frac_pp13TeV_run3_run2_1-24" + } +} diff --git a/machine_learning_hep/scripts-dhadrons/config_run3d0.json b/machine_learning_hep/scripts-dhadrons/config_run3d0.json new file mode 100644 index 0000000000..f5c0a666c9 --- /dev/null +++ b/machine_learning_hep/scripts-dhadrons/config_run3d0.json @@ -0,0 +1,64 @@ +{ + "inputdir": "/data8/majak/systematics/230824/run3d0", + "histoname": "hCorrFracNonPrompt", + "default": "#Lambda_{c}^{#plus} #rightarrow pK^{#minus}#pi^{#plus}, pp, #sqrt{#it{s}} = 13.6 TeV", + "hists": { + "D^{0} #rightarrow K^{#minus}#pi^{#plus}, pp, #sqrt{#it{s}} = 13.6 TeV": { + "file": [ + "CutVarD0_pp136TeV_final_0-25.root" + ], + "systematics": [ + [0.10, 0.04, 0.08, 0.08], + [0.08, 0.04, 0.03, 0.09], + [0.03, 0.04, 0.03, 0.08], + [0.04, 0.04, 0.03, 0.07], + [0.05, 0.03, 0.04, 0.06], + [0.07, 0.02, 0.06, 0.04], + [0.04, 0.01, 0.08, 0.05], + [0.05, 0.01, 0.09, 0.04], + [0.08, 0.01, 0.15, 0.08], + [0.08, 0.01, 0.15, 0.08], + [0.08, 0.01, 0.15, 0.08], + [0.08, 0.01, 0.15, 0.08], + [0.03, 0.01, 0.15, 0.08], + [0.03, 0.01, 0.15, 0.08], + [0.0] + ] + }, + "#Lambda_{c}^{#plus} #rightarrow pK^{#minus}#pi^{#plus}, pp, #sqrt{#it{s}} = 13.6 TeV": { + "file": [ + "CutVarLc_pp13TeV_LHC24d3_default_0-25.root", + "CutVarLc_pp13TeV_LHC24d3_default_0-25.root", + "CutVarLc_pp13TeV_LHC24d3_default_0-25.root", + "CutVarLc_pp13TeV_LHC24d3_default_0-25.root", + "CutVarLc_pp13TeV_LHC24d3_default_0-25.root", + "CutVarLc_pp13TeV_LHC24d3_default_0-25.root", + "CutVarLc_pp13TeV_LHC24d3_default_0-25.root", + "CutVarLc_pp13TeV_LHC24d3_default_0-25.root", + "CutVarLc_pp13TeV_LHC24d3_default_1-24_0-25.root", + "CutVarLc_pp13TeV_LHC24d3_default_1-24_0-25.root" + ], + "systematics": [ + [0.0], + [0.17], + [0.16], + [0.16], + [0.16], + [0.16], + [0.16], + [0.18], + [0.20], + [0.24], + [0.0] + ] + } + }, + "bin_min": [1,2,3,4,5,6,8,12,16], + "bin_max": [2,3,4,5,6,8,12,16,24], + "y_axis": "#it{f}_{non-prompt}", + "alice_text": "|#it{y}| < 0.5", + "output": { + "outdir": "/data8/majak/systematics/230824/run3d0", + "file": "NP_Frac_pp13TeV_run3d0_1-24" + } +} diff --git a/machine_learning_hep/scripts-dhadrons/config_spans_cutvar.json b/machine_learning_hep/scripts-dhadrons/config_spans_cutvar.json new file mode 100644 index 0000000000..efe1c1fc92 --- /dev/null +++ b/machine_learning_hep/scripts-dhadrons/config_spans_cutvar.json @@ -0,0 +1,26 @@ +{ + "inputdir": "/data8/majak/crosssec/202502/fractions", + "histoname": "hCorrFracNonPrompt", + "default": "#splitline{#Lambda_{c}^{#plus} #rightarrow pK^{#minus}#pi^{#plus}, pp,}{#sqrt{#it{s}} = 13.6 TeV}", + "hists": { + "#splitline{#Lambda_{c}^{#plus} #rightarrow pK^{#minus}#pi^{#plus}, pp,}{#sqrt{#it{s}} = 13.6 TeV}": { + "file": [ + "CutVarLc_pp13TeV_LHC23_pass4_default.root" + ] + }, + "#splitline{#Lambda_{c}^{#plus} #rightarrow pK^{#minus}#pi^{#plus}, pp,}{#sqrt{#it{s}} = 13.6 TeV, wide span}": { + "file": [ + "CutVarLc_pp13TeV_LHC23_pass4_wide.root" + ] + } + }, + "bin_min": [1,2,3,4,5,6,7,8,10,12,16], + "bin_max": [2,3,4,5,6,7,8,10,12,16,24], + "y_axis": "#it{f}_{non-prompt}", + "legend": [0.50, 0.15, 0.90, 0.45], + "legend_ratio": [0.40, 0.10, 0.90, 0.35], + "output": { + "outdir": "/data8/majak/crosssec/202502/fractions", + "file": "NP_Frac_pp13.6TeV_run3_spans" + } +} diff --git a/machine_learning_hep/scripts-dhadrons/config_track_tuner.json b/machine_learning_hep/scripts-dhadrons/config_track_tuner.json new file mode 100644 index 0000000000..ef9e65e39e --- /dev/null +++ b/machine_learning_hep/scripts-dhadrons/config_track_tuner.json @@ -0,0 +1,56 @@ +{ + "inputdir": "/data8/majak/systematics/230824/track-tuner", + "histoname": "hCorrFracNonPrompt", + "default": "Default (with #it{p}_{T} smearing)", + "hists": { + "No #it{p}_{T} smearing": { + "file": [ + "CutVarLc_pp13TeV_LHC24d3_no_pt_smearing_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_no_pt_smearing_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_no_pt_smearing_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_no_pt_smearing_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_no_pt_smearing_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_no_pt_smearing_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_no_pt_smearing_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_no_pt_smearing_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_no_pt_smearing_1-24_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_no_pt_smearing_16-24_0-24.root" + ] + }, + "Reso p1": { + "file": [ + "CutVarLc_pp13TeV_LHC24d3_resop1_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_resop1_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_resop1_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_resop1_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_resop1_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_resop1_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_resop1_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_resop1_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_resop1_1-24_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_resop1_16-24_0-24.root" + ] + }, + "Default (with #it{p}_{T} smearing)": { + "file": [ + "CutVarLc_pp13TeV_LHC24d3_default_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_default_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_default_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_default_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_default_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_default_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_default_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_default_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_default_1-24_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_default_1-24_0-24.root" + ] + } + }, + "bin_min": [1,2,3,4,5,6,8,12,16], + "bin_max": [2,3,4,5,6,8,12,16,24], + "y_axis": "Non-prompt #Lambda_{c}^{#plus} fraction", + "output": { + "outdir": "/data8/majak/systematics/230824/track-tuner", + "file": "NP_Frac_pp13TeV_track_tuner_1-24" + } +} From 2a7aee6f73f5183426cc7c1a07739ad3067fd708 Mon Sep 17 00:00:00 2001 From: saganatt <8majak8@gmail.com> Date: Tue, 15 Jul 2025 11:59:58 +0200 Subject: [PATCH 07/34] Add multitrial scripts --- machine_learning_hep/multitrial.py | 188 ++++++++++++++++++ .../run-mlhep-fitter-multitrial.py | 158 +++++++++++++++ .../run-mlhep-fitter-multitrial.sh | 47 +++++ 3 files changed, 393 insertions(+) create mode 100644 machine_learning_hep/multitrial.py create mode 100644 machine_learning_hep/run-mlhep-fitter-multitrial.py create mode 100644 machine_learning_hep/run-mlhep-fitter-multitrial.sh diff --git a/machine_learning_hep/multitrial.py b/machine_learning_hep/multitrial.py new file mode 100644 index 0000000000..38119c95fa --- /dev/null +++ b/machine_learning_hep/multitrial.py @@ -0,0 +1,188 @@ +# pylint: disable=missing-function-docstring, invalid-name +""" +file: multitrial.py +brief: Plot multitrial systematics based on multiple fit trials, one file per trial. +usage: python3 multitrial.py config_multitrial.json +author: Maja Karwowska , Warsaw University of Technology +""" +import argparse +import glob +import json +import re +import numpy as np +import matplotlib.pyplot as plt +from matplotlib.ticker import MultipleLocator, AutoMinorLocator + +from ROOT import ( # pylint: disable=import-error,no-name-in-module + TFile, + gROOT, +) + + +def plot_text_box(ax, text): + ax.text(0.98, 0.97, text, + horizontalalignment="right", verticalalignment="top", + fontsize=40, va="top", transform=ax.transAxes, + bbox={"edgecolor": "black", "fill": False}) + + +def get_yields(cfg): + filenames = sorted(glob.glob(cfg["file_pattern"]), + key=lambda filename: re.split("/", filename)[-2]) + yields = {} + yields_err = {} + trials = {} + chis = {} + for pt_bin_min, pt_bin_max in zip(cfg["pt_bins_min"], cfg["pt_bins_max"]): + yields[f"{pt_bin_min}_{pt_bin_max}"] = [] + yields_err[f"{pt_bin_min}_{pt_bin_max}"] = [] + trials[f"{pt_bin_min}_{pt_bin_max}"] = [] + chis[f"{pt_bin_min}_{pt_bin_max}"] = [] + for filename in filenames: + print(f"Reading {filename}") + with TFile.Open(filename) as fin: + hist = fin.Get(cfg["histoname"]) + hist_sel = fin.Get(cfg["sel_histoname"]) + if hist.ClassName() != "TH1F": + print(f"No hist in {filename}") + if hist_sel.ClassName() != "TH1F": + print(f"No hist sel in {filename}") + dirname = re.split("/", filename)[4] # [-2] for D2H fitter + trial_name = dirname.replace(cfg["dir_pattern"], "") + for ind, (pt_bin_min, pt_bin_max) in enumerate(zip(cfg["pt_bins_min"], + cfg["pt_bins_max"])): + if eval(cfg["selection"])(hist_sel.GetBinContent(ind + 1)) \ + and hist.GetBinContent(ind + 1) > 1.0 : + yields[f"{pt_bin_min}_{pt_bin_max}"].append(hist.GetBinContent(ind + 1)) + yields_err[f"{pt_bin_min}_{pt_bin_max}"].append(hist.GetBinError(ind + 1)) + trials[f"{pt_bin_min}_{pt_bin_max}"].append(trial_name) + chis[f"{pt_bin_min}_{pt_bin_max}"].append(hist_sel.GetBinContent(ind + 1)) + else: + print(f"Rejected: {hist_sel.GetBinContent(ind + 1)} {trial_name} "\ + f"pt: {pt_bin_min}, {pt_bin_max}") + if hist.GetBinContent(ind + 1) < 1.0: + print("Yield 0") + return yields, yields_err, trials, chis + + +def prepare_figure(cfg, y_label, ticks): + fig = plt.figure(figsize=(20, 15)) + ax = plt.subplot(1, 1, 1) + ax.set_xlabel(cfg["x_axis"], fontsize=20) + ax.set_ylabel(y_label, fontsize=20) + ax.tick_params(which="both", width=2.5, direction="in") + ax.tick_params(which="major", labelsize=20, length=15) + ax.tick_params(which="minor", length=7) + ax.xaxis.set_major_locator(MultipleLocator(ticks)) + ax.xaxis.set_minor_locator(AutoMinorLocator(5)) + ax.yaxis.set_minor_locator(AutoMinorLocator(5)) + return fig, ax + + +def set_ax_limits(ax, pt_string, values, errs): + ax.margins(0.01, 0.2) + np_values = np.array(values, dtype="float32") + np_errs = np.array(errs, dtype="float32") + if ax.get_ylim()[1] - ax.get_ylim()[0] > 30.0 * np.std(np_values): + ax.set_ylim(np.mean(np_values) - 10.0 * np.std(np_values), + np.mean(np_values) + 10.0 * np.std(np_values)) + print(f"{pt_string} narrowing down the axis to {ax.get_ylim()}") + + +def plot_trial_line(ax, central_trial_ind): + axis_lim = ax.get_ylim() + y_axis = np.linspace(*axis_lim, 100) + ax.plot([central_trial_ind] * len(y_axis), y_axis, c="m", ls="--", linewidth=4.0) + ax.set_ylim(*axis_lim) + + +def plot_yields_trials(yields, yields_err, trials, cfg, pt_string, plot_pt_string, + central_trial_ind, central_yield): + fig, ax = prepare_figure(cfg, cfg["y_axis"], 100) + x_axis = range(len(trials)) + ax.errorbar(x_axis, yields, yerr=yields_err, + fmt="o", c="b", elinewidth=2.5, linewidth=4.0) + set_ax_limits(ax, pt_string, yields, yields_err) + central_line = np.array([central_yield] * len(x_axis), dtype="float32") + ax.plot(x_axis, central_line, c="orange", ls="--", linewidth=4.0) + central_err = np.array([yields_err[central_trial_ind]] * len(x_axis), dtype="float32") + ax.fill_between(x_axis, central_line - central_err, central_line + central_err, + facecolor="orange", edgecolor="none", alpha=0.3) + plot_trial_line(ax, central_trial_ind) + plot_text_box(ax, plot_pt_string) + fig.savefig(f'{cfg["outdir"]}/{cfg["outfile"]}_yields_trials_{pt_string}.png', + bbox_inches='tight') + plt.close() + + +def plot_chis(chis, cfg, pt_string, plot_pt_string): + fig, ax = prepare_figure(cfg, "Chi2/ndf", 100) + x_axis = range(len(chis)) + ax.scatter(x_axis, chis, c="b", marker="o") + set_ax_limits(ax, pt_string, chis, [0.0] * len(chis)) + plot_text_box(ax, plot_pt_string) + fig.savefig(f'{cfg["outdir"]}/{cfg["outfile"]}_chis_{pt_string}.png', + bbox_inches='tight') + plt.close() + + +def plot_yields_distr(yields, cfg, pt_string, plot_pt_string, central_trial_ind, central_yield): + plt.figure(figsize=(20, 15)) + ax = plt.subplot(1, 1, 1) + ax.set_xlabel("Ratio", fontsize=20) + ax.tick_params(labelsize=20, length=7, width=2.5) + ratios = [yield_ / central_yield for ind, yield_ in enumerate(yields) \ + if ind != central_trial_ind] + ax.hist(ratios, color="b", linewidth=4.0) + mean = np.mean(yields) + std_dev = np.std(yields) + diffs = [(yield_ - central_yield) / central_yield \ + for yield_ in yields[:central_trial_ind]] + diffs.extend([(yield_ - central_yield) / central_yield \ + for yield_ in yields[central_trial_ind+1:]]) + rmse = np.sqrt(np.mean(np.array(diffs, dtype="float32")**2)) + plot_text_box(ax, f"{plot_pt_string}\n"\ + f"mean: {mean:.0f}\n"\ + f"std dev: {std_dev:.2f}\n"\ + f"RMSE: {rmse:.2f}\n"\ + f"#trials: {len(yields)}") + plt.savefig(f'{cfg["outdir"]}/{cfg["outfile"]}_distr_{pt_string}.png', bbox_inches='tight') + plt.close() + + +def main(): + gROOT.SetBatch(True) + + parser = argparse.ArgumentParser(description="Arguments to pass") + parser.add_argument("config", help="JSON config file") + args = parser.parse_args() + + with open(args.config, encoding="utf8") as fil: + cfg = json.load(fil) + + yields, yields_err, trials, chis = get_yields(cfg) + + for pt_bin_min, pt_bin_max in zip(cfg["pt_bins_min"], cfg["pt_bins_max"]): + plot_pt_string = f"${pt_bin_min} < p_\\mathrm{{T}}/(\\mathrm{{GeV}}/c) < {pt_bin_max}$" + pt_string = f"{pt_bin_min}_{pt_bin_max}" + + try: + central_trial_ind = trials[pt_string].index(cfg["central_trial"]) + central_yield = yields[pt_string][central_trial_ind] + + plot_yields_trials(yields[pt_string], yields_err[pt_string], trials[pt_string], cfg, + pt_string, plot_pt_string, central_trial_ind, central_yield) + plot_yields_distr(yields[pt_string], cfg, pt_string, plot_pt_string, + central_trial_ind, central_yield) + plot_chis(chis[pt_string], cfg, pt_string, plot_pt_string) + except: + pass + + with open(f'{cfg["outdir"]}/{cfg["outfile"]}_trials_{pt_string}.txt', + "w", encoding="utf-8") as ftext: + for trial in trials[pt_string]: + ftext.write(f"{trial}\n") + + +if __name__ == "__main__": + main() diff --git a/machine_learning_hep/run-mlhep-fitter-multitrial.py b/machine_learning_hep/run-mlhep-fitter-multitrial.py new file mode 100644 index 0000000000..c5a06a1836 --- /dev/null +++ b/machine_learning_hep/run-mlhep-fitter-multitrial.py @@ -0,0 +1,158 @@ +# pylint: disable=missing-function-docstring, invalid-name +""" +file: run-mlhep-fitter-multitrial.py +brief: Prepare MLHEP database files for different fit configurations for multitrial systematics. +usage: python3 run-mlhep-fitter-multitrial.py +author: Maja Karwowska , Warsaw University of Technology +""" + +import argparse +import re +import shutil +import yaml + +SIGMA02="0.007, 0.007, 0.013" +SIGMA23="0.007, 0.007, 0.013" +SIGMA34="0.007, 0.007, 0.012" +SIGMA45="0.008, 0.008, 0.016" +SIGMA56="0.010, 0.010, 0.016" +SIGMA67="0.008, 0.008, 0.017" +SIGMA78="0.012, 0.012, 0.018" +SIGMA810="0.015, 0.012, 0.018" +SIGMA1012="0.010, 0.010, 0.022" +SIGMA1216="0.016, 0.016, 0.029" +SIGMA1624="0.016, 0.016, 0.029" +FREE_SIGMAS=[SIGMA02, SIGMA23, SIGMA34, SIGMA45, SIGMA56, SIGMA67, SIGMA78, + SIGMA810, SIGMA1012, SIGMA1216, SIGMA1624] + +CENTRAL_TRIAL="" + +BASE_TRIALS = ( + ["alpha-15%", "alpha+15%"], + ["n-15%", "n+15%"], + ["rebin-1", "rebin+1"], + ["free-sigma"], + ["poly3"], + ["narrow", "narrow2", "wide", "wide2"] +) + +def generate_trials(trial_classes): + combinations = [""] + for trial_class in trial_classes: + class_comb = [] + for cur_comb in combinations: + for trial in trial_class: + class_comb.append(cur_comb + "_" + trial) + #print(f"{cur_comb}_{trial}") + combinations.extend(class_comb) + return combinations + +def replace_with_reval(var, in_str, frac): + pattern = fr"{var}\[([0-9.]*), .*?\]" + values = re.findall(pattern, in_str) + new_val = round(float(values[0]) * frac, 3) + return re.sub(pattern, f"{var}[{new_val}, {new_val}]", in_str) + +def process_trial(trial, ana_cfg, data_cfg, mc_cfg): + fit_cfg = ana_cfg["mass_roofit"] + if "alpha-15%" in trial: + print("Processing alpha-15%") + for pt_cfg in mc_cfg: + sig_fn = pt_cfg["components"]["sig"]["fn"] + pt_cfg["components"]["sig"]["fn"] = replace_with_reval("alpha1", sig_fn, 0.85) + elif "alpha+15%" in trial: + print("Processing alpha+15%") + for pt_cfg in mc_cfg: + sig_fn = pt_cfg["components"]["sig"]["fn"] + pt_cfg["components"]["sig"]["fn"] = replace_with_reval("alpha1", sig_fn, 1.15) + elif "n-15%" in trial: + print("Processing n-15%") + for pt_cfg in mc_cfg: + sig_fn = pt_cfg["components"]["sig"]["fn"] + pt_cfg["components"]["sig"]["fn"] = replace_with_reval("n1", sig_fn, 0.85) + elif "n+15%" in trial: + print("Processing n+15%") + for pt_cfg in mc_cfg: + sig_fn = pt_cfg["components"]["sig"]["fn"] + pt_cfg["components"]["sig"]["fn"] = replace_with_reval("n1", sig_fn, 1.15) + elif "rebin-1" in trial: + print("Processing rebin-1") + ana_cfg["n_rebin"] = [rebin - 1 for rebin in ana_cfg["n_rebin"]] + elif "rebin+1" in trial: + print("Processing rebin+1") + ana_cfg["n_rebin"] = [rebin + 1 for rebin in ana_cfg["n_rebin"]] + elif "free-sigma" in trial: + print("Processing free-sigma") + for pt_cfg, free_sigma in zip(mc_cfg, FREE_SIGMAS): + sig_fn = pt_cfg["components"]["sig"]["fn"] + pt_cfg["components"]["sig"]["fn"] = re.sub(r"sigma_g1\[(.*?)\]", + f"sigma_g1[{free_sigma}]", sig_fn) + elif "poly3" in trial: + print("Processing poly3") + for pt_cfg in data_cfg: + bkg_fn = pt_cfg["components"]["bkg"]["fn"] + pt_cfg["components"]["bkg"]["fn"] = re.sub(r"a2\[(.*?)\]", + r"a2[\1], a3[-1e8, 1e8]", bkg_fn) + elif "narrow2" in trial: + print("Processing narrow2") + for pt_cfg in fit_cfg: + pt_cfg["range"] = [pt_cfg["range"][0] + 0.02, pt_cfg["range"][1] - 0.02] + elif "narrow" in trial: + print("Processing narrow") + for pt_cfg in fit_cfg: + pt_cfg["range"] = [pt_cfg["range"][0] + 0.01, pt_cfg["range"][1] - 0.01] + elif "wide2" in trial: + print("Processing wide2") + for pt_cfg in fit_cfg: + pt_cfg["range"] = [max(2.10, pt_cfg["range"][0] - 0.02), + min(2.47, pt_cfg["range"][1] + 0.02)] + elif "wide" in trial: + print("Processing wide") + for pt_cfg in fit_cfg: + pt_cfg["range"] = [max(2.10, pt_cfg["range"][0] - 0.01), + min(2.47, pt_cfg["range"][1] + 0.01)] + + +def main(db, db_dir, out_db_dir, resdir_pattern): + db_ext=f"{db}.yml" + db_path=f"{db_dir}/{db_ext}" + combinations = generate_trials(BASE_TRIALS) + + for comb in combinations: + print(comb) + + cur_cfg = f"{out_db_dir}/{db}{comb}.yml" + shutil.copy2(db_path, cur_cfg) + + with open(cur_cfg, encoding="utf-8") as stream: + cfg = yaml.safe_load(stream) + + ana_cfg = cfg["LcpKpi"]["analysis"]["Run3analysis"] + fit_cfg = ana_cfg["mass_roofit"] + mc_cfg = [fit_params for fit_params in fit_cfg \ + if "level" in fit_params and fit_params["level"] == "mc"] + data_cfg = [fit_params for fit_params in fit_cfg if not "level" in fit_params] + + resdir = f"{resdir_pattern}{comb}" + respath = f"/data8/majak/MLHEP/{resdir}/" + ana_cfg["data"]["prefix_dir_res"] = respath + ana_cfg["mc"]["prefix_dir_res"] = respath + + trials = comb.split("_") + + for trial in trials: + process_trial(trial, ana_cfg, data_cfg, mc_cfg) + + with open(cur_cfg, "w", encoding="utf-8") as stream: + yaml.dump(cfg, stream, sort_keys=False, width=10000, default_flow_style=None) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Arguments to pass") + parser.add_argument("db", help="MLHEP database without extension") + parser.add_argument("db_dir", help="path to directory with MLHEP database") + parser.add_argument("out_db_dir", help="path to output directory for generated MLHEP databases") + parser.add_argument("resdir", help="MLHEP resdir pattern") + args = parser.parse_args() + + main(args.db, args.db_dir, args.out_db_dir, args.resdir) diff --git a/machine_learning_hep/run-mlhep-fitter-multitrial.sh b/machine_learning_hep/run-mlhep-fitter-multitrial.sh new file mode 100644 index 0000000000..26f25972af --- /dev/null +++ b/machine_learning_hep/run-mlhep-fitter-multitrial.sh @@ -0,0 +1,47 @@ +#!/bin/bash + +DB_PATTERN="database_ml_parameters_LcToPKPi_multiclass_fdd" # Original database to be used as template +DB_DIR="data/data_run3" +OUT_DB_DIR="multitrial-db" # Directory to store multitrial databases only +ext=".yml" + +DIR_PATH="/data8/majak/MLHEP" +DIR_PATTERN="results-24022025-newtrain-multitrial-prompt" # Prefix of output directory for fit results + +# Paths to masshistos to fit +BASE_DIR="/data8/majak/MLHEP/results-24022025-newtrain-ptshape-prompt" +DATA_HIST="LHC23pp/Results/resultsdatatot/masshisto.root" +MC_HIST="LHC24pp_mc/Results/resultsmctot/masshisto.root" + +# Run this only once to generate databases +# Then, you can comment this out if you don't change the *.py file +# The output analysis dir is set in databases to DIR_PATTERN + suffix with trial name +python run-mlhep-fitter-multitrial.py "${DB_PATTERN}" "${DB_DIR}" "${OUT_DB_DIR}" "${DIR_PATTERN}" || exit 1 + +for db in ${OUT_DB_DIR}/*.yml ; do + db_basename=`basename ${db}` + db_basename_no_ext=${db_basename%%${ext}} + echo ${db_basename_no_ext} + suffix=${db_basename_no_ext##${DB_PATTERN}} + echo "suffix: ${suffix}" + RESPATH="${DIR_PATH}/${DIR_PATTERN}${suffix}" + echo "respath: ${RESPATH}" + + # Copy base masshistos so as to skip the masshisto step + # Only the fit step needs to be activated in analyzer.yml + # You need first to create the directory trees + cp "${BASE_DIR}/${DATA_HIST}" "${RESPATH}/${DATA_HIST}" + cp "${BASE_DIR}/${MC_HIST}" "${RESPATH}/${MC_HIST}" + + mlhep logfile_${db_basename}.log \ + -a Run3analysis \ + --run-config submission/analyzer.yml \ + --database-analysis ${db} + + # Copy the plots from MachineLearningHEP/machine_learning_hep/fig/ to RESPATH + # It's not compulsory, it's just for you to see the fits + # It might be obsolete if you changed the default output fig/ location in MLHEP + rm -rf ${RESPATH}/fig/ + mv fig/ ${RESPATH}/fig/ +done + From 2b3170cfbe6e5c8be7a64273f0167b27abad1acd Mon Sep 17 00:00:00 2001 From: saganatt <8majak8@gmail.com> Date: Thu, 17 Jul 2025 10:22:01 +0200 Subject: [PATCH 08/34] Move the multitrial dir to the scripts dir --- machine_learning_hep/{ => scripts-dhadrons}/multitrial/README.md | 0 .../{ => scripts-dhadrons}/multitrial/config_multitrial.json | 0 .../{ => scripts-dhadrons}/multitrial/multitrial.py | 0 .../multitrial/run-mlhep-fitter-multitrial.py | 0 .../multitrial/run-mlhep-fitter-multitrial.sh | 0 5 files changed, 0 insertions(+), 0 deletions(-) rename machine_learning_hep/{ => scripts-dhadrons}/multitrial/README.md (100%) rename machine_learning_hep/{ => scripts-dhadrons}/multitrial/config_multitrial.json (100%) rename machine_learning_hep/{ => scripts-dhadrons}/multitrial/multitrial.py (100%) rename machine_learning_hep/{ => scripts-dhadrons}/multitrial/run-mlhep-fitter-multitrial.py (100%) rename machine_learning_hep/{ => scripts-dhadrons}/multitrial/run-mlhep-fitter-multitrial.sh (100%) diff --git a/machine_learning_hep/multitrial/README.md b/machine_learning_hep/scripts-dhadrons/multitrial/README.md similarity index 100% rename from machine_learning_hep/multitrial/README.md rename to machine_learning_hep/scripts-dhadrons/multitrial/README.md diff --git a/machine_learning_hep/multitrial/config_multitrial.json b/machine_learning_hep/scripts-dhadrons/multitrial/config_multitrial.json similarity index 100% rename from machine_learning_hep/multitrial/config_multitrial.json rename to machine_learning_hep/scripts-dhadrons/multitrial/config_multitrial.json diff --git a/machine_learning_hep/multitrial/multitrial.py b/machine_learning_hep/scripts-dhadrons/multitrial/multitrial.py similarity index 100% rename from machine_learning_hep/multitrial/multitrial.py rename to machine_learning_hep/scripts-dhadrons/multitrial/multitrial.py diff --git a/machine_learning_hep/multitrial/run-mlhep-fitter-multitrial.py b/machine_learning_hep/scripts-dhadrons/multitrial/run-mlhep-fitter-multitrial.py similarity index 100% rename from machine_learning_hep/multitrial/run-mlhep-fitter-multitrial.py rename to machine_learning_hep/scripts-dhadrons/multitrial/run-mlhep-fitter-multitrial.py diff --git a/machine_learning_hep/multitrial/run-mlhep-fitter-multitrial.sh b/machine_learning_hep/scripts-dhadrons/multitrial/run-mlhep-fitter-multitrial.sh similarity index 100% rename from machine_learning_hep/multitrial/run-mlhep-fitter-multitrial.sh rename to machine_learning_hep/scripts-dhadrons/multitrial/run-mlhep-fitter-multitrial.sh From ca01ab87cb745fec2cefa80dc6e0e5b9f803a6cd Mon Sep 17 00:00:00 2001 From: saganatt <8majak8@gmail.com> Date: Thu, 17 Jul 2025 14:34:01 +0200 Subject: [PATCH 09/34] Remove redundant script --- machine_learning_hep/scripts-dhadrons/run-fdd-precise.sh | 1 - 1 file changed, 1 deletion(-) delete mode 120000 machine_learning_hep/scripts-dhadrons/run-fdd-precise.sh diff --git a/machine_learning_hep/scripts-dhadrons/run-fdd-precise.sh b/machine_learning_hep/scripts-dhadrons/run-fdd-precise.sh deleted file mode 120000 index 9c226abd43..0000000000 --- a/machine_learning_hep/scripts-dhadrons/run-fdd-precise.sh +++ /dev/null @@ -1 +0,0 @@ -/home/maja/CERN-useful-scripts/run-fdd-precise.sh \ No newline at end of file From bf3628b385d7cbe36a0fb44ebdcccf344d9b2726 Mon Sep 17 00:00:00 2001 From: saganatt <8majak8@gmail.com> Date: Thu, 17 Jul 2025 15:06:57 +0200 Subject: [PATCH 10/34] Factor out and describe the scripts for preliminary plots --- .../config_Lc_Fit_pp13.6TeV_Data.yml | 27 -------------- .../{ => preliminary-plots}/DrawCutVarFit.C | 0 .../preliminary-plots/README.md | 35 +++++++++++++++++++ .../config_invmass_preliminary.yml | 14 ++++++++ .../plot_invmass_fit_dzero_dplus_lambdac.py | 14 ++------ 5 files changed, 52 insertions(+), 38 deletions(-) delete mode 100644 machine_learning_hep/scripts-dhadrons/config_Lc_Fit_pp13.6TeV_Data.yml rename machine_learning_hep/scripts-dhadrons/{ => preliminary-plots}/DrawCutVarFit.C (100%) create mode 100644 machine_learning_hep/scripts-dhadrons/preliminary-plots/README.md create mode 100644 machine_learning_hep/scripts-dhadrons/preliminary-plots/config_invmass_preliminary.yml rename machine_learning_hep/scripts-dhadrons/{ => preliminary-plots}/plot_invmass_fit_dzero_dplus_lambdac.py (98%) diff --git a/machine_learning_hep/scripts-dhadrons/config_Lc_Fit_pp13.6TeV_Data.yml b/machine_learning_hep/scripts-dhadrons/config_Lc_Fit_pp13.6TeV_Data.yml deleted file mode 100644 index ba413ae8c1..0000000000 --- a/machine_learning_hep/scripts-dhadrons/config_Lc_Fit_pp13.6TeV_Data.yml +++ /dev/null @@ -1,27 +0,0 @@ -_pp13.6TeVFD: - Particle: 'LAMBDAC_TO_PKPI' - PtMin: [1.] - PtMax: [2.] - MassMin: [2.21] - MassMax: [2.356] - Rebin: [2] - Mult: [null] - MultLatex: ["Minimum Bias"] -__pp13.6TeVFD: - Particle: 'LAMBDAC_TO_PKPI' - PtMin: [4.] - PtMax: [5.] - MassMin: [2.19] - MassMax: [2.38] - Rebin: [2] - Mult: ["01"] - MultLatex: [" = 20.07"] -pp13.6TeVFD: - Particle: 'LAMBDAC_TO_PKPI' - PtMin: [8.] - PtMax: [10.] - MassMin: [2.1] - MassMax: [2.456] - Rebin: [4] - Mult: [7085] - MultLatex: [" = 4.34"] diff --git a/machine_learning_hep/scripts-dhadrons/DrawCutVarFit.C b/machine_learning_hep/scripts-dhadrons/preliminary-plots/DrawCutVarFit.C similarity index 100% rename from machine_learning_hep/scripts-dhadrons/DrawCutVarFit.C rename to machine_learning_hep/scripts-dhadrons/preliminary-plots/DrawCutVarFit.C diff --git a/machine_learning_hep/scripts-dhadrons/preliminary-plots/README.md b/machine_learning_hep/scripts-dhadrons/preliminary-plots/README.md new file mode 100644 index 0000000000..e0a7eabdd1 --- /dev/null +++ b/machine_learning_hep/scripts-dhadrons/preliminary-plots/README.md @@ -0,0 +1,35 @@ +# Scripts for preliminary plots + +## Invariant mass fits + +File: `plot_invmass_fit_dzero_dplus_lambdac.py`
+Usage: `python plot_invmass_fit_dzero_dplus_lambdac.py config_invmass_preliminary.yml` + +Example config in `config_invmass_preliminary.yml`. It was used to draw the plots: +- https://alice-figure.web.cern.ch/node/34090 +- https://alice-figure.web.cern.ch/node/34089 +- https://alice-figure.web.cern.ch/node/34088 + +The script is passed in different versions around the D2H people. Here, it contains my few improvements, e.g., configurable multiplicity label.
+I also commented out lines related to non-prompt particles as we had results only for the prompt case. + +You still need to adjust the script in several places: +- comment/uncomment the lines related to prompt/non-prompt particles +- adjust the output directory in line 177 +- input filename in `get_name_infile()` +- histogram names in `main()` + +## Cut variation results + +File: `DrawCutVarFit.C`
+Usage: `root -x DrawCutVarFit.C` in the ROOT / O2 shell + +Used to draw the plot https://alice-figure.web.cern.ch/node/31345. + +Adjust the script: +- set the `bdtScoreCuts_...` variables to your final BDT cuts +- set `binMin` and `binMax` to the pT bin you want to plot +- set `bdtScoreCuts` to the proper `bdtScoreCuts_...` variable +- adjust `bdtScoreCuts_toPlot` and the corresponding indices in `bdtScoreCuts_toPlot_ind`; they are the cuts to label on the x-axis +- adjust the input file name and histogram names in `DrawCutVarFit()` +- adjust x-axis title, if needed diff --git a/machine_learning_hep/scripts-dhadrons/preliminary-plots/config_invmass_preliminary.yml b/machine_learning_hep/scripts-dhadrons/preliminary-plots/config_invmass_preliminary.yml new file mode 100644 index 0000000000..ebf2dddf8a --- /dev/null +++ b/machine_learning_hep/scripts-dhadrons/preliminary-plots/config_invmass_preliminary.yml @@ -0,0 +1,14 @@ +_pp13.6TeVFD: + Particle: 'LAMBDAC_TO_PKPI' + _Particle: 'D0, DPLUS, LAMBDAC_TO_PKPI, LAMBDAC_TO_PK0S' + PtMin: [1., 4., 8.] + PtMax: [2., 5., 10.] + MassMin: [2.21, 2.19, 2.1] + _MassMin: 'min masses to display' + MassMax: [2.356, 2.38, 2.456] + _MassMin: 'max masses to display' + Rebin: [2, 2, 4] + Mult: [null, "01", "7085"] + _Mult: 'multiplicity label contained in the output file name' + MultLatex: ["Minimum Bias", " = 20.07", " = 4.34"] + _MultLatex: 'TLatex text describing multiplicity on the plot' diff --git a/machine_learning_hep/scripts-dhadrons/plot_invmass_fit_dzero_dplus_lambdac.py b/machine_learning_hep/scripts-dhadrons/preliminary-plots/plot_invmass_fit_dzero_dplus_lambdac.py similarity index 98% rename from machine_learning_hep/scripts-dhadrons/plot_invmass_fit_dzero_dplus_lambdac.py rename to machine_learning_hep/scripts-dhadrons/preliminary-plots/plot_invmass_fit_dzero_dplus_lambdac.py index e5c3a9b0e6..bfd2ae1b4b 100644 --- a/machine_learning_hep/scripts-dhadrons/plot_invmass_fit_dzero_dplus_lambdac.py +++ b/machine_learning_hep/scripts-dhadrons/preliminary-plots/plot_invmass_fit_dzero_dplus_lambdac.py @@ -240,7 +240,7 @@ def main(particle, i_pt, cfg, batch): width_bin = hmass_promptEnhanced.GetBinWidth(i_pt+1) bin_max = hmass_promptEnhanced.GetMaximumBin() bin_min = hmass_promptEnhanced.GetMinimumBin() - + ymax_promptEnhanced = 1.2*(hmass_promptEnhanced.GetMaximum() + hmass_promptEnhanced.GetBinError(bin_max)) ymin_promptEnhanced = 0.8*(hmass_promptEnhanced.GetMinimum() - hmass_promptEnhanced.GetBinError(bin_min)) #ymin_FDEnhanced, ymax_FDEnhanced = 0., 1.2*(hmass_FDEnhanced.GetMaximum() + hmass_FDEnhanced.GetBinError(bin_max)) @@ -281,11 +281,6 @@ def main(particle, i_pt, cfg, batch): lat_label.SetTextFont(43) lat_label.SetTextColor(kBlack) - # lat_label = TLatex() - # lat_label.SetNDC() - # lat_label.SetTextFont(43) - # lat_label.SetTextColor(kBlack) - # str_mu = f"#it{{#mu}} = ({mean:.0f} #pm {err_mean:.0f}) MeV/#it{{c}}^{{2}}" # str_sigma = f"#it{{#sigma}} = {sigma:.0f} MeV/#it{{c}}^{{2}}" str_sig_promptEnhanced = f'#it{{S}} = {signal_promptEnhanced:.0f} #pm {err_signal_promptEnhanced:.0f}' @@ -324,15 +319,13 @@ def main(particle, i_pt, cfg, batch): #frame_FDEnhanced = pad2.DrawFrame(mass_mins[i_pt], ymin_FDEnhanced, mass_maxs[i_pt], ymax_FDEnhanced, title) #frame_FDEnhanced.GetYaxis().SetDecimals() - - #c.cd() #pad1.cd() set_object_style(hmass_promptEnhanced, linewidth=3, linecolor=kBlack, markersize=0.5) set_object_style(fit_tot_promptEnhanced, linewidth=3, linecolor=kBlue) set_object_style(fit_bkg_promptEnhanced, linewidth=3, linecolor=kRed, linestyle=2) #set_object_style(fit_refl_promptEnhanced, linewidth=3, linecolor=kGreen+2, linestyle=9) - + hmass_promptEnhanced.Draw("sameE") fit_bkg_promptEnhanced.Draw("same") fit_tot_promptEnhanced.Draw("same") @@ -381,7 +374,7 @@ def main(particle, i_pt, cfg, batch): #fnonprompt_FDEnhanced = "#it{f}_{ non-prompt}^{ raw} = 0.690 #pm 0.008 (stat.)" # (4, 5) GeV #fnonprompt_FDEnhanced = "#it{f}_{ non-prompt}^{ raw} = 0.70 #pm 0.02 (stat.)" # (0, 1) GeV #lat_label.DrawLatex(0.19, 0.18, fnonprompt_FDEnhanced) - + # lat_label.DrawLatex(0.19, 0.64, str_mu) # lat_label.DrawLatex(0.19, 0.58, str_sigma) #lat_label.DrawLatex(0.19, 0.24, str_sig_FDEnhanced) @@ -410,4 +403,3 @@ def main(particle, i_pt, cfg, batch): for i_pt in range(len(configuration["pp13.6TeVFD"]["PtMin"])): main(particle=LAMBDAC_TO_PKPI, i_pt=i_pt, cfg=configuration, batch=args.batch) - # main(particle=DPLUS, i_pt=3, cfg=configuration, batch=args.batch) From e65da00b9753527adb8568ccd8ccf8a93d210cbd Mon Sep 17 00:00:00 2001 From: saganatt <8majak8@gmail.com> Date: Thu, 17 Jul 2025 15:20:32 +0200 Subject: [PATCH 11/34] Split scripts into directories --- .../scripts-dhadrons/{ => debugging}/check_parquet.py | 8 ++++++++ .../{ => debugging}/compare_prompt_fraction.py | 0 .../{ => debugging}/config_fractions.json | 0 .../{ => debugging}/config_graph_frac_run3.json | 0 .../scripts-dhadrons/{ => debugging}/plot_graph.py | 0 .../{gather-inputs-fdd.sh => gather-inputs-cutvar.sh} | 0 .../{ => merging}/merge-fdd-inputs-sept-approvals.sh | 0 .../scripts-dhadrons/{ => merging}/merge-histos.sh | 0 .../scripts-dhadrons/{ => merging}/merge-mlhep.sh | 0 .../scripts-dhadrons/{ => merging}/merge-yields.sh | 0 .../scripts-dhadrons/{ => merging}/merge_fractions.py | 0 .../scripts-dhadrons/{ => merging}/merge_histomass.py | 0 .../scripts-dhadrons/{ => merging}/merge_histos.py | 0 .../scripts-dhadrons/{ => preprocessing}/add_pt_bins.py | 1 + .../{ => preprocessing}/modify_input_run2.py | 0 .../{ => preprocessing}/modify_input_run3.py | 0 .../{ => preprocessing}/remove_high_pt.py | 0 .../{ => systematics}/compare_fractions.py | 0 .../{ => systematics}/config_bdt_cuts.json | 0 .../{ => systematics}/config_compare_fractions.json | 0 .../{ => systematics}/config_crosssec_run2_run3.json | 0 .../{ => systematics}/config_fitting.json | 0 .../{ => systematics}/config_mult_ratios.json | 0 .../{ => systematics}/config_pt_weight.json | 0 .../scripts-dhadrons/{ => systematics}/config_run2.json | 0 .../scripts-dhadrons/{ => systematics}/config_run3.json | 0 .../{ => systematics}/config_run3_run2.json | 0 .../scripts-dhadrons/{ => systematics}/config_run3d0.json | 0 .../{ => systematics}/config_spans_cutvar.json | 0 .../{ => systematics}/config_track_tuner.json | 0 30 files changed, 9 insertions(+) rename machine_learning_hep/scripts-dhadrons/{ => debugging}/check_parquet.py (86%) rename machine_learning_hep/scripts-dhadrons/{ => debugging}/compare_prompt_fraction.py (100%) rename machine_learning_hep/scripts-dhadrons/{ => debugging}/config_fractions.json (100%) rename machine_learning_hep/scripts-dhadrons/{ => debugging}/config_graph_frac_run3.json (100%) rename machine_learning_hep/scripts-dhadrons/{ => debugging}/plot_graph.py (100%) rename machine_learning_hep/scripts-dhadrons/{gather-inputs-fdd.sh => gather-inputs-cutvar.sh} (100%) rename machine_learning_hep/scripts-dhadrons/{ => merging}/merge-fdd-inputs-sept-approvals.sh (100%) rename machine_learning_hep/scripts-dhadrons/{ => merging}/merge-histos.sh (100%) rename machine_learning_hep/scripts-dhadrons/{ => merging}/merge-mlhep.sh (100%) rename machine_learning_hep/scripts-dhadrons/{ => merging}/merge-yields.sh (100%) rename machine_learning_hep/scripts-dhadrons/{ => merging}/merge_fractions.py (100%) rename machine_learning_hep/scripts-dhadrons/{ => merging}/merge_histomass.py (100%) rename machine_learning_hep/scripts-dhadrons/{ => merging}/merge_histos.py (100%) rename machine_learning_hep/scripts-dhadrons/{ => preprocessing}/add_pt_bins.py (97%) rename machine_learning_hep/scripts-dhadrons/{ => preprocessing}/modify_input_run2.py (100%) rename machine_learning_hep/scripts-dhadrons/{ => preprocessing}/modify_input_run3.py (100%) rename machine_learning_hep/scripts-dhadrons/{ => preprocessing}/remove_high_pt.py (100%) rename machine_learning_hep/scripts-dhadrons/{ => systematics}/compare_fractions.py (100%) rename machine_learning_hep/scripts-dhadrons/{ => systematics}/config_bdt_cuts.json (100%) rename machine_learning_hep/scripts-dhadrons/{ => systematics}/config_compare_fractions.json (100%) rename machine_learning_hep/scripts-dhadrons/{ => systematics}/config_crosssec_run2_run3.json (100%) rename machine_learning_hep/scripts-dhadrons/{ => systematics}/config_fitting.json (100%) rename machine_learning_hep/scripts-dhadrons/{ => systematics}/config_mult_ratios.json (100%) rename machine_learning_hep/scripts-dhadrons/{ => systematics}/config_pt_weight.json (100%) rename machine_learning_hep/scripts-dhadrons/{ => systematics}/config_run2.json (100%) rename machine_learning_hep/scripts-dhadrons/{ => systematics}/config_run3.json (100%) rename machine_learning_hep/scripts-dhadrons/{ => systematics}/config_run3_run2.json (100%) rename machine_learning_hep/scripts-dhadrons/{ => systematics}/config_run3d0.json (100%) rename machine_learning_hep/scripts-dhadrons/{ => systematics}/config_spans_cutvar.json (100%) rename machine_learning_hep/scripts-dhadrons/{ => systematics}/config_track_tuner.json (100%) diff --git a/machine_learning_hep/scripts-dhadrons/check_parquet.py b/machine_learning_hep/scripts-dhadrons/debugging/check_parquet.py similarity index 86% rename from machine_learning_hep/scripts-dhadrons/check_parquet.py rename to machine_learning_hep/scripts-dhadrons/debugging/check_parquet.py index c2510128ca..4bac36278d 100644 --- a/machine_learning_hep/scripts-dhadrons/check_parquet.py +++ b/machine_learning_hep/scripts-dhadrons/debugging/check_parquet.py @@ -3,6 +3,14 @@ import pandas as pd import matplotlib.pyplot as plt +""" + +file: check_parquet.py +brief: Examples of different checks on any parquet file produced by the MLHEP preprocessing steps. +usage: python check_parquet.py AnalysisResultsReco_fPt1_2.parquet +author: Maja Karwowska , Warsaw University of Technology +""" + def plot_parquet(df): print(df["fY"]) print(df["fY"][~np.isinf(df["fY"])]) diff --git a/machine_learning_hep/scripts-dhadrons/compare_prompt_fraction.py b/machine_learning_hep/scripts-dhadrons/debugging/compare_prompt_fraction.py similarity index 100% rename from machine_learning_hep/scripts-dhadrons/compare_prompt_fraction.py rename to machine_learning_hep/scripts-dhadrons/debugging/compare_prompt_fraction.py diff --git a/machine_learning_hep/scripts-dhadrons/config_fractions.json b/machine_learning_hep/scripts-dhadrons/debugging/config_fractions.json similarity index 100% rename from machine_learning_hep/scripts-dhadrons/config_fractions.json rename to machine_learning_hep/scripts-dhadrons/debugging/config_fractions.json diff --git a/machine_learning_hep/scripts-dhadrons/config_graph_frac_run3.json b/machine_learning_hep/scripts-dhadrons/debugging/config_graph_frac_run3.json similarity index 100% rename from machine_learning_hep/scripts-dhadrons/config_graph_frac_run3.json rename to machine_learning_hep/scripts-dhadrons/debugging/config_graph_frac_run3.json diff --git a/machine_learning_hep/scripts-dhadrons/plot_graph.py b/machine_learning_hep/scripts-dhadrons/debugging/plot_graph.py similarity index 100% rename from machine_learning_hep/scripts-dhadrons/plot_graph.py rename to machine_learning_hep/scripts-dhadrons/debugging/plot_graph.py diff --git a/machine_learning_hep/scripts-dhadrons/gather-inputs-fdd.sh b/machine_learning_hep/scripts-dhadrons/gather-inputs-cutvar.sh similarity index 100% rename from machine_learning_hep/scripts-dhadrons/gather-inputs-fdd.sh rename to machine_learning_hep/scripts-dhadrons/gather-inputs-cutvar.sh diff --git a/machine_learning_hep/scripts-dhadrons/merge-fdd-inputs-sept-approvals.sh b/machine_learning_hep/scripts-dhadrons/merging/merge-fdd-inputs-sept-approvals.sh similarity index 100% rename from machine_learning_hep/scripts-dhadrons/merge-fdd-inputs-sept-approvals.sh rename to machine_learning_hep/scripts-dhadrons/merging/merge-fdd-inputs-sept-approvals.sh diff --git a/machine_learning_hep/scripts-dhadrons/merge-histos.sh b/machine_learning_hep/scripts-dhadrons/merging/merge-histos.sh similarity index 100% rename from machine_learning_hep/scripts-dhadrons/merge-histos.sh rename to machine_learning_hep/scripts-dhadrons/merging/merge-histos.sh diff --git a/machine_learning_hep/scripts-dhadrons/merge-mlhep.sh b/machine_learning_hep/scripts-dhadrons/merging/merge-mlhep.sh similarity index 100% rename from machine_learning_hep/scripts-dhadrons/merge-mlhep.sh rename to machine_learning_hep/scripts-dhadrons/merging/merge-mlhep.sh diff --git a/machine_learning_hep/scripts-dhadrons/merge-yields.sh b/machine_learning_hep/scripts-dhadrons/merging/merge-yields.sh similarity index 100% rename from machine_learning_hep/scripts-dhadrons/merge-yields.sh rename to machine_learning_hep/scripts-dhadrons/merging/merge-yields.sh diff --git a/machine_learning_hep/scripts-dhadrons/merge_fractions.py b/machine_learning_hep/scripts-dhadrons/merging/merge_fractions.py similarity index 100% rename from machine_learning_hep/scripts-dhadrons/merge_fractions.py rename to machine_learning_hep/scripts-dhadrons/merging/merge_fractions.py diff --git a/machine_learning_hep/scripts-dhadrons/merge_histomass.py b/machine_learning_hep/scripts-dhadrons/merging/merge_histomass.py similarity index 100% rename from machine_learning_hep/scripts-dhadrons/merge_histomass.py rename to machine_learning_hep/scripts-dhadrons/merging/merge_histomass.py diff --git a/machine_learning_hep/scripts-dhadrons/merge_histos.py b/machine_learning_hep/scripts-dhadrons/merging/merge_histos.py similarity index 100% rename from machine_learning_hep/scripts-dhadrons/merge_histos.py rename to machine_learning_hep/scripts-dhadrons/merging/merge_histos.py diff --git a/machine_learning_hep/scripts-dhadrons/add_pt_bins.py b/machine_learning_hep/scripts-dhadrons/preprocessing/add_pt_bins.py similarity index 97% rename from machine_learning_hep/scripts-dhadrons/add_pt_bins.py rename to machine_learning_hep/scripts-dhadrons/preprocessing/add_pt_bins.py index 3181d38712..d380d02ced 100644 --- a/machine_learning_hep/scripts-dhadrons/add_pt_bins.py +++ b/machine_learning_hep/scripts-dhadrons/preprocessing/add_pt_bins.py @@ -49,6 +49,7 @@ def main(): hist2.SetBinContent(binn + 1, hist.GetBinContent(binn + 1 - empty_bins)) hist2.SetBinError(binn + 1, hist.GetBinError(binn + 1 - empty_bins)) print(f"Setting bin {binn + 1} low edge {hist2.GetBinLowEdge(binn + 1)} up edge {hist2.GetXaxis().GetBinUpEdge(binn + 1)} content to content from bin {binn + 1 - empty_bins}: {hist2.GetBinContent(binn + 1)}") + # Formula for merging 2 bins. For example, to compare with less granular Run 2 results. #last_bin = hist2.GetNbinsX() #width_combined = hist.GetBinWidth(hist.GetNbinsX() -1) + hist.GetBinWidth(hist.GetNbinsX()) #hist2.SetBinContent(last_bin, diff --git a/machine_learning_hep/scripts-dhadrons/modify_input_run2.py b/machine_learning_hep/scripts-dhadrons/preprocessing/modify_input_run2.py similarity index 100% rename from machine_learning_hep/scripts-dhadrons/modify_input_run2.py rename to machine_learning_hep/scripts-dhadrons/preprocessing/modify_input_run2.py diff --git a/machine_learning_hep/scripts-dhadrons/modify_input_run3.py b/machine_learning_hep/scripts-dhadrons/preprocessing/modify_input_run3.py similarity index 100% rename from machine_learning_hep/scripts-dhadrons/modify_input_run3.py rename to machine_learning_hep/scripts-dhadrons/preprocessing/modify_input_run3.py diff --git a/machine_learning_hep/scripts-dhadrons/remove_high_pt.py b/machine_learning_hep/scripts-dhadrons/preprocessing/remove_high_pt.py similarity index 100% rename from machine_learning_hep/scripts-dhadrons/remove_high_pt.py rename to machine_learning_hep/scripts-dhadrons/preprocessing/remove_high_pt.py diff --git a/machine_learning_hep/scripts-dhadrons/compare_fractions.py b/machine_learning_hep/scripts-dhadrons/systematics/compare_fractions.py similarity index 100% rename from machine_learning_hep/scripts-dhadrons/compare_fractions.py rename to machine_learning_hep/scripts-dhadrons/systematics/compare_fractions.py diff --git a/machine_learning_hep/scripts-dhadrons/config_bdt_cuts.json b/machine_learning_hep/scripts-dhadrons/systematics/config_bdt_cuts.json similarity index 100% rename from machine_learning_hep/scripts-dhadrons/config_bdt_cuts.json rename to machine_learning_hep/scripts-dhadrons/systematics/config_bdt_cuts.json diff --git a/machine_learning_hep/scripts-dhadrons/config_compare_fractions.json b/machine_learning_hep/scripts-dhadrons/systematics/config_compare_fractions.json similarity index 100% rename from machine_learning_hep/scripts-dhadrons/config_compare_fractions.json rename to machine_learning_hep/scripts-dhadrons/systematics/config_compare_fractions.json diff --git a/machine_learning_hep/scripts-dhadrons/config_crosssec_run2_run3.json b/machine_learning_hep/scripts-dhadrons/systematics/config_crosssec_run2_run3.json similarity index 100% rename from machine_learning_hep/scripts-dhadrons/config_crosssec_run2_run3.json rename to machine_learning_hep/scripts-dhadrons/systematics/config_crosssec_run2_run3.json diff --git a/machine_learning_hep/scripts-dhadrons/config_fitting.json b/machine_learning_hep/scripts-dhadrons/systematics/config_fitting.json similarity index 100% rename from machine_learning_hep/scripts-dhadrons/config_fitting.json rename to machine_learning_hep/scripts-dhadrons/systematics/config_fitting.json diff --git a/machine_learning_hep/scripts-dhadrons/config_mult_ratios.json b/machine_learning_hep/scripts-dhadrons/systematics/config_mult_ratios.json similarity index 100% rename from machine_learning_hep/scripts-dhadrons/config_mult_ratios.json rename to machine_learning_hep/scripts-dhadrons/systematics/config_mult_ratios.json diff --git a/machine_learning_hep/scripts-dhadrons/config_pt_weight.json b/machine_learning_hep/scripts-dhadrons/systematics/config_pt_weight.json similarity index 100% rename from machine_learning_hep/scripts-dhadrons/config_pt_weight.json rename to machine_learning_hep/scripts-dhadrons/systematics/config_pt_weight.json diff --git a/machine_learning_hep/scripts-dhadrons/config_run2.json b/machine_learning_hep/scripts-dhadrons/systematics/config_run2.json similarity index 100% rename from machine_learning_hep/scripts-dhadrons/config_run2.json rename to machine_learning_hep/scripts-dhadrons/systematics/config_run2.json diff --git a/machine_learning_hep/scripts-dhadrons/config_run3.json b/machine_learning_hep/scripts-dhadrons/systematics/config_run3.json similarity index 100% rename from machine_learning_hep/scripts-dhadrons/config_run3.json rename to machine_learning_hep/scripts-dhadrons/systematics/config_run3.json diff --git a/machine_learning_hep/scripts-dhadrons/config_run3_run2.json b/machine_learning_hep/scripts-dhadrons/systematics/config_run3_run2.json similarity index 100% rename from machine_learning_hep/scripts-dhadrons/config_run3_run2.json rename to machine_learning_hep/scripts-dhadrons/systematics/config_run3_run2.json diff --git a/machine_learning_hep/scripts-dhadrons/config_run3d0.json b/machine_learning_hep/scripts-dhadrons/systematics/config_run3d0.json similarity index 100% rename from machine_learning_hep/scripts-dhadrons/config_run3d0.json rename to machine_learning_hep/scripts-dhadrons/systematics/config_run3d0.json diff --git a/machine_learning_hep/scripts-dhadrons/config_spans_cutvar.json b/machine_learning_hep/scripts-dhadrons/systematics/config_spans_cutvar.json similarity index 100% rename from machine_learning_hep/scripts-dhadrons/config_spans_cutvar.json rename to machine_learning_hep/scripts-dhadrons/systematics/config_spans_cutvar.json diff --git a/machine_learning_hep/scripts-dhadrons/config_track_tuner.json b/machine_learning_hep/scripts-dhadrons/systematics/config_track_tuner.json similarity index 100% rename from machine_learning_hep/scripts-dhadrons/config_track_tuner.json rename to machine_learning_hep/scripts-dhadrons/systematics/config_track_tuner.json From 5b0dfc7eb103743e6197dcb0fa6cd5563d719981 Mon Sep 17 00:00:00 2001 From: saganatt <8majak8@gmail.com> Date: Thu, 17 Jul 2025 15:29:36 +0200 Subject: [PATCH 12/34] Cleanup and descriptions in the preprocessing dir --- .../{modify_input_run2.py => modify_crosssec_run2.py} | 7 ++++--- .../{modify_input_run3.py => modify_crosssec_run3.py} | 4 ++-- .../scripts-dhadrons/preprocessing/remove_high_pt.py | 4 ++++ 3 files changed, 10 insertions(+), 5 deletions(-) rename machine_learning_hep/scripts-dhadrons/preprocessing/{modify_input_run2.py => modify_crosssec_run2.py} (94%) rename machine_learning_hep/scripts-dhadrons/preprocessing/{modify_input_run3.py => modify_crosssec_run3.py} (92%) diff --git a/machine_learning_hep/scripts-dhadrons/preprocessing/modify_input_run2.py b/machine_learning_hep/scripts-dhadrons/preprocessing/modify_crosssec_run2.py similarity index 94% rename from machine_learning_hep/scripts-dhadrons/preprocessing/modify_input_run2.py rename to machine_learning_hep/scripts-dhadrons/preprocessing/modify_crosssec_run2.py index 8e2214ab4c..e742263049 100644 --- a/machine_learning_hep/scripts-dhadrons/preprocessing/modify_input_run2.py +++ b/machine_learning_hep/scripts-dhadrons/preprocessing/modify_crosssec_run2.py @@ -1,8 +1,8 @@ # pylint: disable=missing-function-docstring """ file: modify_input.py -brief: Perform adjustments on input histogram. -usage: python3 modify_input.py file.root my_histo file_out.root +brief: Perform adjustments on the input Run 2 cross section histogram. +usage: python3 modify_crosssec_run2.py file.root my_histo out_histo file_out.root author: Maja Karwowska , Warsaw University of Technology """ @@ -38,7 +38,8 @@ def main(): #hist.Scale(0.000000001) hist.Scale(1./BR) hist2 = TH1F(args.outhistname, "", len(OUTPUT_BINS) - 1, array('d', OUTPUT_BINS)) - merge_bins = [20] # dummy number so as not to merge [7, 9] + merge_bins = [20] # dummy large number so as not to merge + # merge bins = [7, 9] # indices of bins to merge ind = 0 for binn in range(1, hist.GetNbinsX() + 1): print(f"Old hist bin {binn} low edge {hist.GetBinLowEdge(binn)} "\ diff --git a/machine_learning_hep/scripts-dhadrons/preprocessing/modify_input_run3.py b/machine_learning_hep/scripts-dhadrons/preprocessing/modify_crosssec_run3.py similarity index 92% rename from machine_learning_hep/scripts-dhadrons/preprocessing/modify_input_run3.py rename to machine_learning_hep/scripts-dhadrons/preprocessing/modify_crosssec_run3.py index c4fedac595..b4d2f2fe15 100644 --- a/machine_learning_hep/scripts-dhadrons/preprocessing/modify_input_run3.py +++ b/machine_learning_hep/scripts-dhadrons/preprocessing/modify_crosssec_run3.py @@ -1,8 +1,8 @@ # pylint: disable=missing-function-docstring """ file: modify_input.py -brief: Perform adjustments on input histogram. -usage: python3 modify_input.py file.root my_histo file_out.root +brief: Perform adjustments on the input Run 3 cross section histogram. +usage: python3 modify_crosssec_run3.py file.root my_histo out_histo file_out.root author: Maja Karwowska , Warsaw University of Technology """ diff --git a/machine_learning_hep/scripts-dhadrons/preprocessing/remove_high_pt.py b/machine_learning_hep/scripts-dhadrons/preprocessing/remove_high_pt.py index 3a71c36f40..e195f78df9 100644 --- a/machine_learning_hep/scripts-dhadrons/preprocessing/remove_high_pt.py +++ b/machine_learning_hep/scripts-dhadrons/preprocessing/remove_high_pt.py @@ -1,5 +1,9 @@ # pylint: disable=missing-function-docstring """ +file: remove_high_pt.py +brief: Remove bins with pT > max_pt in all histograms matching my_histos_pattern in the input file.root. +usage: python3 remove_high_pt.py file.root my_histos_pattern file_out.root max_pt +author: Maja Karwowska , Warsaw University of Technology """ import argparse From 4b6b3f4f1e524f68ac7f708be241e8b6f4baedb5 Mon Sep 17 00:00:00 2001 From: saganatt <8majak8@gmail.com> Date: Thu, 17 Jul 2025 15:46:30 +0200 Subject: [PATCH 13/34] Cleanup and descriptions in the debugging dir --- ...un3.json => config_fraction_vs_crosssec_configs.json} | 9 ++++++++- ...ig_fractions.json => config_fraction_vs_fd_cuts.json} | 7 ++++++- ...ph.py => plot_prompt_fraction_vs_crosssec_configs.py} | 7 +++++++ ...pt_fraction.py => plot_prompt_fraction_vs_fd_cuts.py} | 7 +++++++ 4 files changed, 28 insertions(+), 2 deletions(-) rename machine_learning_hep/scripts-dhadrons/debugging/{config_graph_frac_run3.json => config_fraction_vs_crosssec_configs.json} (70%) rename machine_learning_hep/scripts-dhadrons/debugging/{config_fractions.json => config_fraction_vs_fd_cuts.json} (60%) rename machine_learning_hep/scripts-dhadrons/debugging/{plot_graph.py => plot_prompt_fraction_vs_crosssec_configs.py} (89%) rename machine_learning_hep/scripts-dhadrons/debugging/{compare_prompt_fraction.py => plot_prompt_fraction_vs_fd_cuts.py} (88%) diff --git a/machine_learning_hep/scripts-dhadrons/debugging/config_graph_frac_run3.json b/machine_learning_hep/scripts-dhadrons/debugging/config_fraction_vs_crosssec_configs.json similarity index 70% rename from machine_learning_hep/scripts-dhadrons/debugging/config_graph_frac_run3.json rename to machine_learning_hep/scripts-dhadrons/debugging/config_fraction_vs_crosssec_configs.json index bf1b708e9a..df0ea17a11 100644 --- a/machine_learning_hep/scripts-dhadrons/debugging/config_graph_frac_run3.json +++ b/machine_learning_hep/scripts-dhadrons/debugging/config_fraction_vs_crosssec_configs.json @@ -1,7 +1,10 @@ { "inputdir": "/data8/majak/crosssec/202502/", + "_inputdir": "directory with input files", "histoname": "gfraction", + "_histoname": "the prompt fraction histogram name", "default": "#Lambda_{c}^{#plus} #rightarrow pK^{#minus}#pi^{#plus}, pp, #sqrt{#it{s}} = 13.6 TeV", + "_default": "label of the baseline case", "hists": { "#Lambda_{c}^{#plus} #rightarrow pK^{#minus}#pi^{#plus}, pp, #sqrt{#it{s}} = 13.6 TeV": { "file": [ @@ -19,11 +22,15 @@ ] } }, + "_hists": "dictionary of input files and the corresponding labels of histograms to plot", "y_axis": "#Lambda_{c} prompt fraction", "legend": [0.25, 0.18, 0.70, 0.38], + "_legend": "position of the legend on the main plot", "legend_ratio": [0.40, 0.10, 0.90, 0.35], + "_legend": "position of the legend on the ratio plot", "output": { "outdir": "/data8/majak/crosssec/202502/", "file": "graph_frac_Lc_run3_Nb" - } + }, + "_output": "output directory and file name pattern" } diff --git a/machine_learning_hep/scripts-dhadrons/debugging/config_fractions.json b/machine_learning_hep/scripts-dhadrons/debugging/config_fraction_vs_fd_cuts.json similarity index 60% rename from machine_learning_hep/scripts-dhadrons/debugging/config_fractions.json rename to machine_learning_hep/scripts-dhadrons/debugging/config_fraction_vs_fd_cuts.json index cb16754988..df4c768e0d 100644 --- a/machine_learning_hep/scripts-dhadrons/debugging/config_fractions.json +++ b/machine_learning_hep/scripts-dhadrons/debugging/config_fraction_vs_fd_cuts.json @@ -1,11 +1,16 @@ { "file_pattern": "/data8/majak/MLHEP/results-24022025-newtrain_fd_0.[0-9][0-9]0/LHC23pp_pass4/Results/resultsdatatot/finalcrossLcpKpiRun3analysis.root", + "_file_pattern": "glob pattern to all files with different non-prompt cuts", "dir_pattern": "results-24022025-newtrain_fd_0.[0-9][0-9]0", + "_dir_pattern": "the base directory prefix from the file pattern above", "histoname": "gfraction", + "_histoname": "the prompt fraction histogram name", "pt_bins_min": [0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 16], "pt_bins_max": [1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 16, 24], "x_axis": "non-prompt cut", "y_axis": "#Lambda_{c} prompt fraction", "outdir": ".", - "outfile": "fraction" + "_outdir": "output directory", + "outfile": "fraction", + "_outfile": "output file pattern" } diff --git a/machine_learning_hep/scripts-dhadrons/debugging/plot_graph.py b/machine_learning_hep/scripts-dhadrons/debugging/plot_prompt_fraction_vs_crosssec_configs.py similarity index 89% rename from machine_learning_hep/scripts-dhadrons/debugging/plot_graph.py rename to machine_learning_hep/scripts-dhadrons/debugging/plot_prompt_fraction_vs_crosssec_configs.py index 72c142d330..a976506964 100644 --- a/machine_learning_hep/scripts-dhadrons/debugging/plot_graph.py +++ b/machine_learning_hep/scripts-dhadrons/debugging/plot_prompt_fraction_vs_crosssec_configs.py @@ -1,3 +1,10 @@ +""" +file: plot_prompt_fraction_vs_crosssec_configs.py +brief: Plot prompt fraction for diffent cross section calculations (e.g., different methods or inputs). +usage: python3 plot_prompt_fraction_vs_crosssec_configs.py config_fraction_vs_crosssec_configs.json +author: Maja Karwowska , Warsaw University of Technology +""" + import argparse import json import os diff --git a/machine_learning_hep/scripts-dhadrons/debugging/compare_prompt_fraction.py b/machine_learning_hep/scripts-dhadrons/debugging/plot_prompt_fraction_vs_fd_cuts.py similarity index 88% rename from machine_learning_hep/scripts-dhadrons/debugging/compare_prompt_fraction.py rename to machine_learning_hep/scripts-dhadrons/debugging/plot_prompt_fraction_vs_fd_cuts.py index 3f4962e71f..56871886f8 100644 --- a/machine_learning_hep/scripts-dhadrons/debugging/compare_prompt_fraction.py +++ b/machine_learning_hep/scripts-dhadrons/debugging/plot_prompt_fraction_vs_fd_cuts.py @@ -1,3 +1,10 @@ +""" +file: plot_prompt_fraction_vs_fd_cuts.py +brief: Plot prompt fraction from cross section calculations for different non-prompt cuts +usage: python3 plot_prompt_fraction_vs_fd_cuts.py config_fraction_vs_fd_cuts.json +author: Maja Karwowska , Warsaw University of Technology +""" + import argparse import glob import json From a61863ebef1f04afa9b735a09af7e99871771d14 Mon Sep 17 00:00:00 2001 From: saganatt <8majak8@gmail.com> Date: Thu, 17 Jul 2025 15:58:37 +0200 Subject: [PATCH 14/34] Described the run- and gather- scripts --- .../scripts-dhadrons/gather-inputs-cutvar.sh | 7 +++-- .../scripts-dhadrons/run-fdd-batch.sh | 31 ++++++++++++++----- .../{run-lc.sh => run-mlhep.sh} | 3 ++ 3 files changed, 32 insertions(+), 9 deletions(-) rename machine_learning_hep/scripts-dhadrons/{run-lc.sh => run-mlhep.sh} (68%) diff --git a/machine_learning_hep/scripts-dhadrons/gather-inputs-cutvar.sh b/machine_learning_hep/scripts-dhadrons/gather-inputs-cutvar.sh index a9aedc0e56..249d5b3286 100755 --- a/machine_learning_hep/scripts-dhadrons/gather-inputs-cutvar.sh +++ b/machine_learning_hep/scripts-dhadrons/gather-inputs-cutvar.sh @@ -1,5 +1,8 @@ #!/bin/bash +# Gather MLHEP efficiencies and mass fits for all chosen non-prompt cuts into a single directory. +# The cut variation script requires files from a single directory. + MLHEP_DIR="/data8/majak/MLHEP" OUTPUT_DIR="${MLHEP_DIR}/input-fd-012025" @@ -12,6 +15,6 @@ for dir in "${RESDIR_PATTERN}${PERM_PATTERN}"0.[0-9][0-9][0-9]* ; do cp "${dir}/LHC24pp_mc/Results/resultsmctot/efficienciesLcpKpiRun3analysis.root" \ "${OUTPUT_DIR}/efficienciesLcpKpiRun3analysis_${suffix}.root" - #cp "${dir}/LHC23pp_pass4/Results/resultsdatatot/yields_LcpKpi_Run3analysis.root" \ - # "${OUTPUT_DIR}/yieldsLcpKpiRun3analysis-${suffix}-fixed-sigma.root" + cp "${dir}/LHC23pp_pass4/Results/resultsdatatot/yields_LcpKpi_Run3analysis.root" \ + "${OUTPUT_DIR}/yieldsLcpKpiRun3analysis-${suffix}" done diff --git a/machine_learning_hep/scripts-dhadrons/run-fdd-batch.sh b/machine_learning_hep/scripts-dhadrons/run-fdd-batch.sh index b7e9e88846..97b7a68c6d 100755 --- a/machine_learning_hep/scripts-dhadrons/run-fdd-batch.sh +++ b/machine_learning_hep/scripts-dhadrons/run-fdd-batch.sh @@ -1,29 +1,42 @@ #!/bin/bash -source "${HOME}/Run3Analysisvalidation/exec/utilities.sh" +# Run MLHEP in batch for various non-prompt cuts +# You need a MLHEP database with %resdir%, %bkg...%, and %fd...% placeholders. +source "${HOME}/Run3Analysisvalidation/exec/utilities.sh" WORKDIR="${HOME}/MachineLearningHEP/machine_learning_hep/" + +# Base database. DATABASE="database_ml_parameters_LcToPKPi_multiclass_fdd" DATABASE_EXT="${DATABASE}.yml" DATABASE_PATH="${WORKDIR}/data/data_run3/${DATABASE_EXT}" + +# Output base directory to store all output subdirectories. +RESDIR="/data8/majak/MLHEP" + +# Prefix of the output directories names. #RESDIR_PATTERN="results-24022025-prompt" RESDIR_PATTERN="results-24022025-newtrain-ptshape-prompt" +# Bkg cut. You can rewrite this to have different cuts in different pT bins. bkg=0.00 + +# Loop over all non-prompt cuts. for fd in $(seq 0.000 0.005 0.000) ; do echo "fd ${fd}" - #suffix="fd_${fd}" - suffix="" - RESDIR="${RESDIR_PATTERN}${suffix}" - RESPATH="/data8/majak/MLHEP/${RESDIR}/" + # Variable suffix to append to the output directory name. + suffix="fd_${fd}" - #rm -rf "${RESPATH}" + RESPATH="${RESDIR}/${RESDIR_PATTERN}${suffix}" CUR_DB="${DATABASE}_edit_fd${fd}.yml" cp "${DATABASE_PATH}" "${CUR_DB}" || ErrExit "Could not copy database" + # Adjust the output directory sed -i "s/%resdir%/${RESDIR}/g" "${CUR_DB}" || ErrExit "Could not edit database" + + # Set bkg BDT cuts sed -i "s/%bkg01%/${bkg}/g" "${CUR_DB}" || ErrExit "Could not edit database" sed -i "s/%bkg12%/${bkg}/g" "${CUR_DB}" || ErrExit "Could not edit database" sed -i "s/%bkg23%/${bkg}/g" "${CUR_DB}" || ErrExit "Could not edit database" @@ -36,7 +49,9 @@ for fd in $(seq 0.000 0.005 0.000) ; do sed -i "s/%bkg1012%/${bkg}/g" "${CUR_DB}" || ErrExit "Could not edit database" sed -i "s/%bkg1216%/${bkg}/g" "${CUR_DB}" || ErrExit "Could not edit database" sed -i "s/%bkg1624%/${bkg}/g" "${CUR_DB}" || ErrExit "Could not edit database" - sed -i "s/%fd01%/${fd}/g" "${CUR_DB}" || ErrExit "Could not edit database" + + # Set non-prompt BDT cuts + sed -i "s/%fd%/${fd}/g" "${CUR_DB}" || ErrExit "Could not edit database" sed -i "s/%fd%/${fd}/g" "${CUR_DB}" || ErrExit "Could not edit database" sed -i "s/%fd%/${fd}/g" "${CUR_DB}" || ErrExit "Could not edit database" sed -i "s/%fd%/${fd}/g" "${CUR_DB}" || ErrExit "Could not edit database" @@ -49,6 +64,8 @@ for fd in $(seq 0.000 0.005 0.000) ; do sed -i "s/%fd%/${fd}/g" "${CUR_DB}" || ErrExit "Could not edit database" sed -i "s/%fd%/${fd}/g" "${CUR_DB}" || ErrExit "Could not edit database" + # `yes` is a program that says `y` to all interactive console prompts. + # In this way, we skip all MLHEP questions about deleting old results. yes | mlhep --log-file "logfile_${suffix}.log" \ -a Run3analysis \ --run-config submission/analyzer.yml \ diff --git a/machine_learning_hep/scripts-dhadrons/run-lc.sh b/machine_learning_hep/scripts-dhadrons/run-mlhep.sh similarity index 68% rename from machine_learning_hep/scripts-dhadrons/run-lc.sh rename to machine_learning_hep/scripts-dhadrons/run-mlhep.sh index f7c2e19e3e..f083791c62 100755 --- a/machine_learning_hep/scripts-dhadrons/run-lc.sh +++ b/machine_learning_hep/scripts-dhadrons/run-mlhep.sh @@ -1,5 +1,8 @@ #!/bin/bash +# Shortcut to run MLHEP +# Usage: ./run-mlhep.sh database_Lc.yml submission/analysis.yml logfile.log + if [ "$#" -ne 3 ]; then echo "Wrong number of parameters" exit 1 From 3fdf235c90051c8915729025337998630938df43 Mon Sep 17 00:00:00 2001 From: saganatt <8majak8@gmail.com> Date: Tue, 23 Sep 2025 12:40:00 +0200 Subject: [PATCH 15/34] Described the debugging scripts --- .../scripts-dhadrons/debugging/README.md | 27 ++++++++++++ .../debugging/check_parquet.py | 42 +++++++------------ ....json => config_fraction_vs_bdt_cuts.json} | 4 +- .../config_fraction_vs_crosssec_configs.json | 2 +- ...py => plot_prompt_fraction_vs_bdt_cuts.py} | 0 5 files changed, 45 insertions(+), 30 deletions(-) create mode 100644 machine_learning_hep/scripts-dhadrons/debugging/README.md rename machine_learning_hep/scripts-dhadrons/debugging/{config_fraction_vs_fd_cuts.json => config_fraction_vs_bdt_cuts.json} (81%) rename machine_learning_hep/scripts-dhadrons/debugging/{plot_prompt_fraction_vs_fd_cuts.py => plot_prompt_fraction_vs_bdt_cuts.py} (100%) diff --git a/machine_learning_hep/scripts-dhadrons/debugging/README.md b/machine_learning_hep/scripts-dhadrons/debugging/README.md new file mode 100644 index 0000000000..219203625b --- /dev/null +++ b/machine_learning_hep/scripts-dhadrons/debugging/README.md @@ -0,0 +1,27 @@ +# Debugging some issues + +## Check MLHEP output data files + +File: `check_parquet.py`
+Usage: `python check_parquet.py in_file.parquet` + +The Python script contains some examples of how to read from a parquet file, print some useful information, and plot histograms. + +It can be used to check MLHEP skimming, training, and application outputs by testing individual parquet files. + +## Compare prompt fractions calculated with different inputs or methods + +Files: `plot_prompt_fraction_vs_crosssec_configs.py`, `config_fraction_vs_crosssec_configs.json`
+Usage: `python plot_prompt_fraction_vs_crosssec_configs.py config_fraction_vs_crosssec_configs.json` + +Adjust the JSON config. You can provide as many histogram files in the `hists` dictionary as you want. +By adjusting `histoname`, you can plot also the non-prompt fraction. + +## Plot prompt fraction vs different BDT cuts + +Files: `plot_prompt_fraction_vs_bdt_cuts.py`, `config_fraction_vs_bdt_cuts.json`
+Usage: `python plot_prompt_fraction_vs_bdt_cuts.py config_fraction_vs_bdt_cuts.json` + +Adjust the JSON config. Here, you provide a glob pattern to all files of interest. +By adjusting `histoname`, you can plot also the non-prompt fraction. + diff --git a/machine_learning_hep/scripts-dhadrons/debugging/check_parquet.py b/machine_learning_hep/scripts-dhadrons/debugging/check_parquet.py index 4bac36278d..ed8256eb75 100644 --- a/machine_learning_hep/scripts-dhadrons/debugging/check_parquet.py +++ b/machine_learning_hep/scripts-dhadrons/debugging/check_parquet.py @@ -19,7 +19,6 @@ def plot_parquet(df): fig = plt.figure(figsize=(20, 15)) ax = plt.subplot(1, 1, 1) - #ax.set_xlim([0, (df["fY"].mean()*2)]) plt.hist(ds_fin.values, bins=50) ax.set_xlabel("fY", fontsize=30) ax.set_ylabel("Entries", fontsize=30) @@ -33,38 +32,27 @@ def main(): df = pd.read_parquet(args.infile) print(f"df columns: {df.columns}") - #print(f"full df:\n{df}") print(df.size) - # 1-2: 36715937 - # sum from data: 1615501228 - # 2-3: 45167231 - # 3-4: 71973551 - # 4-5: 34874429 - # 5-6: - # 6-7: - # 7-8: - # 8-10: - # 10-12: - # 12-24: + print(f"df mean\n{df.mean()}") - #print(f"df mean\n{df.mean()}") + print(f"df[0]\n{df.iloc[0]}") - #print(f"df[0]\n{df.iloc[0]}") + plot_parquet(df) - #df_sel = df[df["y_test_probxgboostbkg"] > 1.0] - #print(f"sel df bkg:\n{df_sel}") - #df_sel = df[df["y_test_probxgboostnon_prompt"] < 0.00] - #print(f"sel df non-prompt:\n{df_sel}") - #df_sel = df[df["y_test_probxgboostprompt"] < 0.00] - #print(f"sel df prompt:\n{df_sel}") + df_sel = df[df["y_test_probxgboostbkg"] > 1.0] + print(f"sel df bkg:\n{df_sel}") + df_sel = df[df["y_test_probxgboostnon_prompt"] < 0.00] + print(f"sel df non-prompt:\n{df_sel}") + df_sel = df[df["y_test_probxgboostprompt"] < 0.00] + print(f"sel df prompt:\n{df_sel}") - print(f'ML columns:\n{df["fMlBkgScore"]}\n{df["fMlPromptScore"]}\n{df["fMlNonPromptScore"]}') - df_sel = df[df["fMlBkgScore"] > 1.0] - print(f'df sel ML bkg:\n{df_sel["fMlBkgScore"]}') - df_sel = df[df["fMlNonPromptScore"] < 0.0] - print(f'df sel ML non-prompt:\n{df_sel["fMlNonPromptScore"]}') - #print(f'df sel ML columns:\n{df_sel["fMlBkgScore"]}\n{df_sel["fMlNonPromptScore"]}') + # Valid only for data with saved results of ML application on Hyperloop + #print(f'ML columns:\n{df["fMlBkgScore"]}\n{df["fMlPromptScore"]}\n{df["fMlNonPromptScore"]}') + #df_sel = df[df["fMlBkgScore"] > 1.0] + #print(f'df sel ML bkg:\n{df_sel["fMlBkgScore"]}') + #df_sel = df[df["fMlNonPromptScore"] < 0.0] + #print(f'df sel ML non-prompt:\n{df_sel["fMlNonPromptScore"]}') if __name__ == '__main__': diff --git a/machine_learning_hep/scripts-dhadrons/debugging/config_fraction_vs_fd_cuts.json b/machine_learning_hep/scripts-dhadrons/debugging/config_fraction_vs_bdt_cuts.json similarity index 81% rename from machine_learning_hep/scripts-dhadrons/debugging/config_fraction_vs_fd_cuts.json rename to machine_learning_hep/scripts-dhadrons/debugging/config_fraction_vs_bdt_cuts.json index df4c768e0d..d2895b7fab 100644 --- a/machine_learning_hep/scripts-dhadrons/debugging/config_fraction_vs_fd_cuts.json +++ b/machine_learning_hep/scripts-dhadrons/debugging/config_fraction_vs_bdt_cuts.json @@ -1,6 +1,6 @@ { "file_pattern": "/data8/majak/MLHEP/results-24022025-newtrain_fd_0.[0-9][0-9]0/LHC23pp_pass4/Results/resultsdatatot/finalcrossLcpKpiRun3analysis.root", - "_file_pattern": "glob pattern to all files with different non-prompt cuts", + "_file_pattern": "glob pattern to all files with different BDT cuts", "dir_pattern": "results-24022025-newtrain_fd_0.[0-9][0-9]0", "_dir_pattern": "the base directory prefix from the file pattern above", "histoname": "gfraction", @@ -12,5 +12,5 @@ "outdir": ".", "_outdir": "output directory", "outfile": "fraction", - "_outfile": "output file pattern" + "_outfile": "output file pattern; pdf/png/root suffixes are appended" } diff --git a/machine_learning_hep/scripts-dhadrons/debugging/config_fraction_vs_crosssec_configs.json b/machine_learning_hep/scripts-dhadrons/debugging/config_fraction_vs_crosssec_configs.json index df0ea17a11..ee7e1e5071 100644 --- a/machine_learning_hep/scripts-dhadrons/debugging/config_fraction_vs_crosssec_configs.json +++ b/machine_learning_hep/scripts-dhadrons/debugging/config_fraction_vs_crosssec_configs.json @@ -32,5 +32,5 @@ "outdir": "/data8/majak/crosssec/202502/", "file": "graph_frac_Lc_run3_Nb" }, - "_output": "output directory and file name pattern" + "_output": "output directory and file name pattern; pdf/png/root suffixes are appended" } diff --git a/machine_learning_hep/scripts-dhadrons/debugging/plot_prompt_fraction_vs_fd_cuts.py b/machine_learning_hep/scripts-dhadrons/debugging/plot_prompt_fraction_vs_bdt_cuts.py similarity index 100% rename from machine_learning_hep/scripts-dhadrons/debugging/plot_prompt_fraction_vs_fd_cuts.py rename to machine_learning_hep/scripts-dhadrons/debugging/plot_prompt_fraction_vs_bdt_cuts.py From c7a1b78299af8b31e2ee6b6ec219e2aaea696935 Mon Sep 17 00:00:00 2001 From: saganatt <8majak8@gmail.com> Date: Tue, 23 Sep 2025 14:47:38 +0200 Subject: [PATCH 16/34] Described merge scripts --- .../scripts-dhadrons/merging/README.md | 32 +++++++++ .../{merge-histos.sh => merge-cutvar.sh} | 0 .../merge-fdd-inputs-sept-approvals.sh | 67 ------------------ .../merging/merge_fractions.py | 68 ------------------- .../merging/merge_histomass.py | 6 +- .../scripts-dhadrons/merging/merge_histos.py | 6 +- 6 files changed, 38 insertions(+), 141 deletions(-) create mode 100644 machine_learning_hep/scripts-dhadrons/merging/README.md rename machine_learning_hep/scripts-dhadrons/merging/{merge-histos.sh => merge-cutvar.sh} (100%) delete mode 100755 machine_learning_hep/scripts-dhadrons/merging/merge-fdd-inputs-sept-approvals.sh delete mode 100644 machine_learning_hep/scripts-dhadrons/merging/merge_fractions.py diff --git a/machine_learning_hep/scripts-dhadrons/merging/README.md b/machine_learning_hep/scripts-dhadrons/merging/README.md new file mode 100644 index 0000000000..5497939c9c --- /dev/null +++ b/machine_learning_hep/scripts-dhadrons/merging/README.md @@ -0,0 +1,32 @@ +# Merging histograms and files + +## Merge multiple histograms from multiple input files + +Files: `merge_histos.py`, `merge-cutvar.sh`, `merge-yields.sh` +Usage: `python merge_histos.py -o out_file.root -n histName1 -n histName2 -i in_file1.root -i in_file2.root` + +You can provide as many histogram names as you want. All histograms should be 1-dimensional and have the same x-axis. If no histogram name is provided, the script will merge all 1-dimensional histograms from the input files. + +Provide one input file per x-axis bin. File names can be repeated. + +Merge histograms `histName1` and `histName2` from the input files and save them in the output file. For each histogram name provided, e.g., `histName1`, "merging" means creation of a single output histogram with bin 1 content set to the content of bin 1 in `histName1` in `in_file1.root`, bin 2 content set to the content of bin 2 in `histName1` in `in_file2.root`, and so on. Particularly, the x-axis can represent pT, and the script can be used to merge results obtained for different pT bins. + +The bash files `merge-cutvar.sh` and `merge-yields.sh` provide examples of using this Python script for merging cut variation results and O2Physics D2H fitter results, respectively. + +## Merge the outputs of the MLHEP histomass step + +Files: `merge_histomass.py`, `merge-mlhep.sh` +Usage: `python merge_histomass.py -o out_file.root -n histName1 -n histName2 -i in_file1.root -i -in_file2.root` + +This script is different from the previous one as it is adjusted to the layout of MLHEP `masshisto.root` files, which contain 1 invariant mass histogram per pT bin. + +Histogram names `histName1`, `histName2` are treated as patterns (substrings) of histograms to merge. For `masshisto.root` files, the pattern can be `hmassfPt`, which matches all histograms like `hmassfPt0_1_0.010.000.000`, `hmassfPt1_2_0.020.400.000`, and so on. + +You can provide as many histogram name pattern as you want. +Provide one input file per pT bin. Each file should contain one matching histogram per pT bin. File names can be repeated. + +The merging creates a single output file with histogram for the 1st pT bin from `in_file1.root`, histogram for the 2nd pT bin from `in_file2.root`, and so on. + +`merge-mlhep.sh` is an example that uses `merge_histomass.py` to obtain a single invariant mass file for the O2Physics D2H mass fitter. The script makes also use of `merge_histos.py` to get a single efficiencies file to be used in the cut variation macro. + + diff --git a/machine_learning_hep/scripts-dhadrons/merging/merge-histos.sh b/machine_learning_hep/scripts-dhadrons/merging/merge-cutvar.sh similarity index 100% rename from machine_learning_hep/scripts-dhadrons/merging/merge-histos.sh rename to machine_learning_hep/scripts-dhadrons/merging/merge-cutvar.sh diff --git a/machine_learning_hep/scripts-dhadrons/merging/merge-fdd-inputs-sept-approvals.sh b/machine_learning_hep/scripts-dhadrons/merging/merge-fdd-inputs-sept-approvals.sh deleted file mode 100755 index b8c9d191a7..0000000000 --- a/machine_learning_hep/scripts-dhadrons/merging/merge-fdd-inputs-sept-approvals.sh +++ /dev/null @@ -1,67 +0,0 @@ -#!/bin/bash - -FD_12=(0.00 0.21 0.24 0.27 0.30 0.33 0.35 0.37 0.39 0.41 0.44 0.46 0.48 0.50 0.52 0.54 0.56 0.58) -FD_12_OLD=(0.00 0.21 0.24 0.27 0.30 0.33 0.35 0.37 0.39 0.41 0.44 0.46 0.48 0.50 0.52 0.54 0.55 0.58) - -DIR_12="/data8/majak/MLHEP/input-fd-23082024" -PTRN_12=("${DIR_12}/yields-bkg_0.20_0.60_fd_" "${DIR_12}/efficienciesLcpKpiRun3analysis_pt-weight_bkg_0.20_0.60_fd_") -SUFFIX_12=("-rebin-1-fixed-sigma.root" ".root") -DIR_212="/data8/majak/MLHEP/input-fd-23082024" -PTRN_212=("${DIR_212}/yields-fd_precise_rebin4_bkg_0.20_0.60_fd_" "${DIR_212}/efficienciesLcpKpiRun3analysis_fd_precise_rebin4_bkg_0.20_0.60_fd_") -SUFFIX_212=("-fixed-sigma.root" ".root") -DIR_1224="/data8/majak/MLHEP/input-fd-10092024" -PTRN_1224=("${DIR_1224}/yields-fd_precise_1224_split_bkg_0.60_0.60_fd_" "${DIR_1224}/efficienciesLcpKpiRun3analysis_1224_split_bkg_0.60_0.60_fd_") -SUFFIX_1224=("-fixed-sigma.root" ".root") - -OUTFILE_PTRN=("merged_yields_fdd_approvals_fd_" "merged_eff_fdd_approvals_fd_") - -for k in "${!PTRN_12[@]}" ; do - echo "k ${k}" - echo "PTRN_12: ${PTRN_12}" - echo "PTRN_12[k]: ${PTRN_12[k]}" - echo "PTRN_212[k]: ${PTRN_212[k]}" - echo "PTRN_1224[k]: ${PTRN_1224[k]}" - - for i in "${!FD_12[@]}" ; do - INPUT_12=${PTRN_12[k]}${FD_12[i]}${SUFFIX_12[k]} - INPUT_212=${PTRN_212[k]}${FD_12_OLD[i]}*[0-9][0-9]${SUFFIX_212[k]} - - # dummy loop to get shell expansion in INPUT_1224 - for f in ${PTRN_1224[k]}${FD_12_OLD[i]}*[0-9][0-9]${SUFFIX_1224[k]} ; do - INPUT_1224=${f} - suffix=${INPUT_1224[0]##${PTRN_1224[k]}} - suffix=${suffix%%${SUFFIX_1224[k]}} - OUTFILE=${OUTFILE_PTRN[k]}${suffix}.root - - echo "i ${i} k ${k}" - echo "INPUT_12: ${INPUT_12}" - echo "INPUT_212: " ${INPUT_212} - echo "INPUT_1224: " ${INPUT_1224} - echo "suffix: " ${suffix} - echo "outfile: " ${OUTFILE} - - python merge_histos.py -o /data8/majak/crosssec/${OUTFILE} \ - -i ${INPUT_12} \ - -i ${INPUT_212} \ - -i ${INPUT_212} \ - -i ${INPUT_212} \ - -i ${INPUT_212} \ - -i ${INPUT_212} \ - -i ${INPUT_212} \ - -i ${INPUT_1224} \ - -i ${INPUT_1224} - done - done -done - -# Merge yields and efficiencies for repeating September cut variation -#python merge_histos.py -o /data8/majak/crosssec/merged_yields_fdd_approvals_fd_0.21_0.20_0.26_0.17_0.10_0.15_0.08_0.17_0.09.root \ -# -i /data8/majak/MLHEP/input-fd-23082024/yields-bkg_0.20_0.60_fd_0.21-rebin-1-fixed-sigma.root -# -i /data8/majak/MLHEP/input-fd-23082024/yields-fd_precise_rebin4_bkg_0.20_0.60_fd_0.21_0.20_0.26_0.17_0.10_0.15_0.08_0.08-fixed-sigma.root \ -# -i /data8/majak/MLHEP/input-fd-23082024/yields-fd_precise_rebin4_bkg_0.20_0.60_fd_0.21_0.20_0.26_0.17_0.10_0.15_0.08_0.08-fixed-sigma.root \ -# -i /data8/majak/MLHEP/input-fd-23082024/yields-fd_precise_rebin4_bkg_0.20_0.60_fd_0.21_0.20_0.26_0.17_0.10_0.15_0.08_0.08-fixed-sigma.root \ -# -i /data8/majak/MLHEP/input-fd-23082024/yields-fd_precise_rebin4_bkg_0.20_0.60_fd_0.21_0.20_0.26_0.17_0.10_0.15_0.08_0.08-fixed-sigma.root \ -# -i /data8/majak/MLHEP/input-fd-23082024/yields-fd_precise_rebin4_bkg_0.20_0.60_fd_0.21_0.20_0.26_0.17_0.10_0.15_0.08_0.08-fixed-sigma.root \ -# -i /data8/majak/MLHEP/input-fd-23082024/yields-fd_precise_rebin4_bkg_0.20_0.60_fd_0.21_0.20_0.26_0.17_0.10_0.15_0.08_0.08-fixed-sigma.root \ -# -i /data8/majak/MLHEP/input-fd-10092024/yields-fd_precise_1224_split_bkg_0.60_0.60_fd_0.21_0.20_0.26_0.17_0.10_0.15_0.08_0.17_0.09-fixed-sigma.root \ -# -i /data8/majak/MLHEP/input-fd-10092024/yields-fd_precise_1224_split_bkg_0.60_0.60_fd_0.21_0.20_0.26_0.17_0.10_0.15_0.08_0.17_0.09-fixed-sigma.root diff --git a/machine_learning_hep/scripts-dhadrons/merging/merge_fractions.py b/machine_learning_hep/scripts-dhadrons/merging/merge_fractions.py deleted file mode 100644 index fe5b4527c1..0000000000 --- a/machine_learning_hep/scripts-dhadrons/merging/merge_fractions.py +++ /dev/null @@ -1,68 +0,0 @@ -#!/usr/bin/env python -""" -file: merge_fractions.py -brief: Merge points from non-prompt fraction plots from different cutvar scans into a single plot -usage: ./merge_fractions.py my-plot.png file1.root file2.root file3.root -author: Maja Karwowska , CERN / Warsaw University of Technology -""" - -import argparse - -from ROOT import ( # pylint: disable=import-error,no-name-in-module - TCanvas, - TFile, - TH1, - gROOT, - kOrange -) - -HISTNAME = "hCorrFracNonPrompt" - -def main(): - """ - Main function. - """ - gROOT.SetBatch(True) - - parser = argparse.ArgumentParser(description="Arguments to pass") - parser.add_argument("outname", help="output filename") - parser.add_argument("oldname", help="old results filename") - parser.add_argument("files", nargs='+', help="input ROOT files") - args = parser.parse_args() - - canv = TCanvas(f"c_{HISTNAME}", "") - canv.SetCanvasSize(800, 600) - - rfile = TFile(args.files[0]) - hist = rfile.Get(HISTNAME) - reshist = hist.Clone() - - for ind, file in enumerate(args.files): - rfile2 = TFile(file) - hist = rfile2.Get(HISTNAME) - print(f"{ind + 1} bin content {hist.GetBinContent(ind + 1)}") - reshist.SetBinContent(ind + 1, hist.GetBinContent(ind + 1)) - reshist.SetBinError(ind + 1, hist.GetBinError(ind + 1)) - for ind in range(2): - reshist.SetBinContent(ind + 1 + len(args.files), 0.0) - reshist.SetBinError(ind + 1 + len(args.files), 0.0) - - reshist.SetMaximum(0.25) - reshist.SetMinimum(0.0) - reshist.Draw() - - oldfile = TFile(args.oldname) - oldhistt = oldfile.Get(HISTNAME) - oldhist = oldhistt.Clone() - oldhist.SetMarkerColor(kOrange) - oldhist.SetLineColor(kOrange) - for ind in range(2): - oldhist.SetBinContent(oldhist.GetNbinsX() - ind, 0.0) - oldhist.SetBinError(oldhist.GetNbinsX() - ind, 0.0) - oldhist.Draw("same") - - canv.SaveAs(args.outname) - - -if __name__ == "__main__": - main() diff --git a/machine_learning_hep/scripts-dhadrons/merging/merge_histomass.py b/machine_learning_hep/scripts-dhadrons/merging/merge_histomass.py index b8513cb62a..a29569f46c 100644 --- a/machine_learning_hep/scripts-dhadrons/merging/merge_histomass.py +++ b/machine_learning_hep/scripts-dhadrons/merging/merge_histomass.py @@ -1,6 +1,6 @@ """ -Merge MLHEP histomass root files for the PWGHF mass fitter. One file per pt bin. -One histogram per pt bin. +Merge MLHEP histomass root files for the PWGHF mass fitter. +One file per pt bin. Each file contains one histogram per pt bin. """ import argparse @@ -16,7 +16,7 @@ def main(): parser = argparse.ArgumentParser(description=__doc__) parser.add_argument("-n", "--histname", action="append", type=str, - help="name of histograms to merge") + help="Name pattern of histograms to merge") parser.add_argument("-o", "--outfile", action="append", type=str, help="Output file") parser.add_argument("-i", "--infile", action="append", type=str, help="Input file") args = parser.parse_args() diff --git a/machine_learning_hep/scripts-dhadrons/merging/merge_histos.py b/machine_learning_hep/scripts-dhadrons/merging/merge_histos.py index aaac9574ea..4488d4e48b 100644 --- a/machine_learning_hep/scripts-dhadrons/merging/merge_histos.py +++ b/machine_learning_hep/scripts-dhadrons/merging/merge_histos.py @@ -1,6 +1,6 @@ """ -Merge histograms from different ROOT files. One file per pt bin. -A single histogram contains all pt bins. +Merge histograms from different ROOT files. One file per x-axis bin. +A single histogram contains all x-axis bins. """ import argparse @@ -15,7 +15,7 @@ def main(): parser = argparse.ArgumentParser(description=__doc__) parser.add_argument("-n", "--histname", action="append", type=str, - help="name of histograms to merge") + help="Name of histograms to merge") parser.add_argument("-o", "--outfile", action="append", type=str, help="Output file") parser.add_argument("-i", "--infile", action="append", type=str, help="Input file") args = parser.parse_args() From fa746f4504fc11b607a7d023262e8f10dcb75ffe Mon Sep 17 00:00:00 2001 From: saganatt <8majak8@gmail.com> Date: Tue, 23 Sep 2025 15:07:02 +0200 Subject: [PATCH 17/34] Describe preprocessing scripts --- .../scripts-dhadrons/preprocessing/README.md | 34 +++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100644 machine_learning_hep/scripts-dhadrons/preprocessing/README.md diff --git a/machine_learning_hep/scripts-dhadrons/preprocessing/README.md b/machine_learning_hep/scripts-dhadrons/preprocessing/README.md new file mode 100644 index 0000000000..3e309a4e14 --- /dev/null +++ b/machine_learning_hep/scripts-dhadrons/preprocessing/README.md @@ -0,0 +1,34 @@ +# Scripts to fix histograms for comparison with Run 2 results + +## Add pT bins to extend the x-axis range on the plots + +File: `add_pt_bins.py` +Usage: `python add_pt_bins.py in_file.root histname out_file.root` + +ROOT does not allow nicely to plot a histogram on a plot with x-axis wider than histogram minimum and maximum bins. + +This script takes the `histname` histogram from `in_file.root`, and creates a new histogram with added bin [0.0, `histname`'s minimum) and [`histname`'s maximum, 24.0). `0` and `24.0` can be changed in the Python code. The new histogram has marker and line styles copied forom the old one, and is saved in `out_file.root`. + +You can uncomment lines 53-64 to get a formula for merging 2 bins. You need to adjust the indices of bins to merge. This is useful if you want to compare `histname` against less granular results from elsewhere. + +## Restrict the maximum of x-axis + +File: `remove_high_pt.py` +Usage: `python remove_high_pt.py in_file.root histname out_file.root maxval` + +This is a contrary script to the previous one. +Here, `out_file.root` will contain histograms, where the last x-axis bin contains `maxval`. Higher bins are removed.
+`histname` is a pattern (substring) of histogram names. + +## Rescale and merge cross section results + +Files: `modify_crosssec_run2.py`, `modify_crosssec_run3.py` +Usage: `python modify_crosssec_run2.py in_file.root histname out_histname out_file.root` + +The Run 2 script scales `histname` from `in_file.root` by 1./BR and merges bins, whose indices are provided in the script. The output is saved under name `out_histname` in `out_file.root`. + +The Run 3 script only rescales the input histogram and saves the result in `out_histname` in `out_file.root`. + +The lines commented out provide more examples of rescaling. + +For Lc prompt cross section obtained during March 2025 approvals, only the uncommented lines in both files were used. From 3a51b9eeaa4842fe4d52546d2d37e5236f68a287 Mon Sep 17 00:00:00 2001 From: saganatt <8majak8@gmail.com> Date: Tue, 23 Sep 2025 15:07:43 +0200 Subject: [PATCH 18/34] Rename the preprocessing folder to adjusting-run2-run3 --- .../{preprocessing => adjusting-run2-run3}/README.md | 0 .../{preprocessing => adjusting-run2-run3}/add_pt_bins.py | 0 .../modify_crosssec_run2.py | 0 .../modify_crosssec_run3.py | 0 .../{preprocessing => adjusting-run2-run3}/remove_high_pt.py | 0 5 files changed, 0 insertions(+), 0 deletions(-) rename machine_learning_hep/scripts-dhadrons/{preprocessing => adjusting-run2-run3}/README.md (100%) rename machine_learning_hep/scripts-dhadrons/{preprocessing => adjusting-run2-run3}/add_pt_bins.py (100%) rename machine_learning_hep/scripts-dhadrons/{preprocessing => adjusting-run2-run3}/modify_crosssec_run2.py (100%) rename machine_learning_hep/scripts-dhadrons/{preprocessing => adjusting-run2-run3}/modify_crosssec_run3.py (100%) rename machine_learning_hep/scripts-dhadrons/{preprocessing => adjusting-run2-run3}/remove_high_pt.py (100%) diff --git a/machine_learning_hep/scripts-dhadrons/preprocessing/README.md b/machine_learning_hep/scripts-dhadrons/adjusting-run2-run3/README.md similarity index 100% rename from machine_learning_hep/scripts-dhadrons/preprocessing/README.md rename to machine_learning_hep/scripts-dhadrons/adjusting-run2-run3/README.md diff --git a/machine_learning_hep/scripts-dhadrons/preprocessing/add_pt_bins.py b/machine_learning_hep/scripts-dhadrons/adjusting-run2-run3/add_pt_bins.py similarity index 100% rename from machine_learning_hep/scripts-dhadrons/preprocessing/add_pt_bins.py rename to machine_learning_hep/scripts-dhadrons/adjusting-run2-run3/add_pt_bins.py diff --git a/machine_learning_hep/scripts-dhadrons/preprocessing/modify_crosssec_run2.py b/machine_learning_hep/scripts-dhadrons/adjusting-run2-run3/modify_crosssec_run2.py similarity index 100% rename from machine_learning_hep/scripts-dhadrons/preprocessing/modify_crosssec_run2.py rename to machine_learning_hep/scripts-dhadrons/adjusting-run2-run3/modify_crosssec_run2.py diff --git a/machine_learning_hep/scripts-dhadrons/preprocessing/modify_crosssec_run3.py b/machine_learning_hep/scripts-dhadrons/adjusting-run2-run3/modify_crosssec_run3.py similarity index 100% rename from machine_learning_hep/scripts-dhadrons/preprocessing/modify_crosssec_run3.py rename to machine_learning_hep/scripts-dhadrons/adjusting-run2-run3/modify_crosssec_run3.py diff --git a/machine_learning_hep/scripts-dhadrons/preprocessing/remove_high_pt.py b/machine_learning_hep/scripts-dhadrons/adjusting-run2-run3/remove_high_pt.py similarity index 100% rename from machine_learning_hep/scripts-dhadrons/preprocessing/remove_high_pt.py rename to machine_learning_hep/scripts-dhadrons/adjusting-run2-run3/remove_high_pt.py From 9698a655be782e338c460b469c75c6fb928cbf1a Mon Sep 17 00:00:00 2001 From: saganatt <8majak8@gmail.com> Date: Tue, 23 Sep 2025 16:05:56 +0200 Subject: [PATCH 19/34] Describe the compare script --- .../scripts-dhadrons/systematics/README.md | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 machine_learning_hep/scripts-dhadrons/systematics/README.md diff --git a/machine_learning_hep/scripts-dhadrons/systematics/README.md b/machine_learning_hep/scripts-dhadrons/systematics/README.md new file mode 100644 index 0000000000..0f4269cf9f --- /dev/null +++ b/machine_learning_hep/scripts-dhadrons/systematics/README.md @@ -0,0 +1,16 @@ +# Obtain various comparison plots, esp. for systematics and final analysis results + +File: `compare_fractions.py` +Usage: `python compare_fractions.py config.json` + +All JSON files in this directory provide various configuration examples for different use cases. + +The script can read both model histograms and analysis result histograms, as specified in the JSON configuration. It plots all histograms on a single plot with different colours, and calculates the systematic errors, if no systematics is provided. They are printed in the console. The systematic errors can be provided in the JSON config, and then they are drawn as boxes around the central points. + +The script plots also a separate plot with the ratios of other histograms to the central histogram. The central histogram is the one specified as "default" in the JSON. + +The histogram labels in legend are taken from the dictionary labels in the JSON, which can be specified with the TLatex syntax. + +It is also possible to specify the `y_axis` title and an additional description under the "ALICE Preliminary" header (`alice_text` variable in the config). The header itself and its position can be adjusted in the `get_alice_text` function in the Python script. + +Colors and markers can be adjusted at the beginning of the script. From 75d3224f9a453231bc20894c8e91c059e0073949 Mon Sep 17 00:00:00 2001 From: saganatt <8majak8@gmail.com> Date: Tue, 23 Sep 2025 16:06:43 +0200 Subject: [PATCH 20/34] Add the JSON config for plotting acc x eff --- .../systematics/config_efficiency.json | 28 +++++++++++++++++++ 1 file changed, 28 insertions(+) create mode 100644 machine_learning_hep/scripts-dhadrons/systematics/config_efficiency.json diff --git a/machine_learning_hep/scripts-dhadrons/systematics/config_efficiency.json b/machine_learning_hep/scripts-dhadrons/systematics/config_efficiency.json new file mode 100644 index 0000000000..ccb6495275 --- /dev/null +++ b/machine_learning_hep/scripts-dhadrons/systematics/config_efficiency.json @@ -0,0 +1,28 @@ +{ + "inputdir": "/data8/majak/crosssec/032025", + "histoname": "eff", + "default": "Prompt", + "hists": { + "Prompt": { + "file": [ + "eff.root" + ] + }, + "Non-prompt": { + "file": [ + "eff_fd.root" + ] + } + }, + "bin_min": [1,2,3,4,5,6,7,8,10,12,16], + "bin_max": [2,3,4,5,6,7,8,10,12,16,24], + "y_axis": "Acceptance x efficiency", + "alice_text": "#Lambda_{c}^{#plus} #kern[-0.05]{#rightarrow pK^{#minus}#pi^{#plus} (and charge conj.})", + "legend": [0.45, 0.20, 0.65, 0.30], + "legend_ratio": [0.40, 0.60, 0.90, 0.90], + "log_scale": true, + "output": { + "outdir": "/data8/majak/crosssec/032025", + "file": "efficiencies" + } +} From 6925599b36b43e615b7b04b88b21a71c21a0e564 Mon Sep 17 00:00:00 2001 From: saganatt <8majak8@gmail.com> Date: Tue, 23 Sep 2025 16:19:00 +0200 Subject: [PATCH 21/34] Move the gather script to the merging folder --- .../scripts-dhadrons/merging/README.md | 14 ++++++++++++++ .../{ => merging}/gather-inputs-cutvar.sh | 0 2 files changed, 14 insertions(+) rename machine_learning_hep/scripts-dhadrons/{ => merging}/gather-inputs-cutvar.sh (100%) diff --git a/machine_learning_hep/scripts-dhadrons/merging/README.md b/machine_learning_hep/scripts-dhadrons/merging/README.md index 5497939c9c..d4a7654f9b 100644 --- a/machine_learning_hep/scripts-dhadrons/merging/README.md +++ b/machine_learning_hep/scripts-dhadrons/merging/README.md @@ -29,4 +29,18 @@ The merging creates a single output file with histogram for the 1st pT bin from `merge-mlhep.sh` is an example that uses `merge_histomass.py` to obtain a single invariant mass file for the O2Physics D2H mass fitter. The script makes also use of `merge_histos.py` to get a single efficiencies file to be used in the cut variation macro. +## Gather MLHEP efficiencies and mass fits for cut variation +File: `gather-inputs-cutvar.sh` +Usage: `./gather-inputs-cutvar.sh` + +To get MLHEP results for different non-prompt cuts, different output directories must be set. Otherwise, the results get overwritten. However, the cut variation script requires the input efficiency and mass fit files to be in a single directory. + +This script takes all `efficienciesLcpKpiRun3analysis.root` and `yields_LcpKpi_Run3analysis.root` MLHEP output files from the directories that match `RESDIR_PATTERN`, and puts them in the `OUTPUT_DIR`. To differentiate the files, the suffix made of the corresponding directory name with `RESDIR_PATTERN` removed is appended to a file name.
+`PERM_PATTERN` is also used to match directories, but it is not removed from the suffix. + +For example, given `RESDIR_PATTERN`: `/data/MLHEP/results-today_`, `PERM_PATTERN`: `non-prompt_`, and directories like: `/data/MLHEP/results-today_non-prompt_0.1`, `/data/MLHEP/results-today_-prompt_0.2`, the resulting efficiency file names are: `efficienciesLcpKpiRun3analysis_non-prompt_0.1.root`, `efficienciesLcpKpiRun3analysis_non-prompt_0.2.root`. + +Adjust `MLHEP_DIR`, `OUTPUT_DIR`, `RESDIR_PATTERN` and `PERM_PATTERN` in the script. + +You might also need to adjust the regular expression in line 12 and file paths in the for loop. diff --git a/machine_learning_hep/scripts-dhadrons/gather-inputs-cutvar.sh b/machine_learning_hep/scripts-dhadrons/merging/gather-inputs-cutvar.sh similarity index 100% rename from machine_learning_hep/scripts-dhadrons/gather-inputs-cutvar.sh rename to machine_learning_hep/scripts-dhadrons/merging/gather-inputs-cutvar.sh From 260c2837c58d77eecaaf92dbef2fefa243f91a1b Mon Sep 17 00:00:00 2001 From: saganatt <8majak8@gmail.com> Date: Tue, 23 Sep 2025 17:41:03 +0200 Subject: [PATCH 22/34] Described run scripts --- .../scripts-dhadrons/run-mlhep/README.md | 30 +++++++++++++++++++ .../{ => run-mlhep}/run-fdd-batch.sh | 15 ++-------- .../{ => run-mlhep}/run-mlhep.sh | 0 3 files changed, 32 insertions(+), 13 deletions(-) create mode 100644 machine_learning_hep/scripts-dhadrons/run-mlhep/README.md rename machine_learning_hep/scripts-dhadrons/{ => run-mlhep}/run-fdd-batch.sh (72%) rename machine_learning_hep/scripts-dhadrons/{ => run-mlhep}/run-mlhep.sh (100%) diff --git a/machine_learning_hep/scripts-dhadrons/run-mlhep/README.md b/machine_learning_hep/scripts-dhadrons/run-mlhep/README.md new file mode 100644 index 0000000000..b7cf9da9db --- /dev/null +++ b/machine_learning_hep/scripts-dhadrons/run-mlhep/README.md @@ -0,0 +1,30 @@ +# Run MLHEP + +## A simple shortcut with default running options + +File: `run-mlhep.sh`
+Usage: `./run-mlhep.sh my_database.yml my_run_config.yml logfile.log` + +It calls: +``` +mlhep --log-file logfile.log \ + -a Run3analysis \ + --run-config my_run_config.yml \ + --database-analysis my_database.yml +``` + +## Run MLHEP in batch for various BDT cuts + +File: `run-mlhep-batch.sh`
+Usage: `./run-mlhep-batch.sh` + +The script requires an MLHEP database with %resdir%, %bkg...%, and %fd% placeholders. +You can see the examples of placeholders in the `data/data_run3/database_ml_parameters_LcToPKPi_multiclass_fdd.yml` database. + +The script loops over different cuts defined with `seq`. By default, they are non-prompt cuts. To run for prompt cuts, you need simply to put the %fd% placeholders at the place of prompt cuts in `probcutoptimal` variables in the database. Background cuts are set to the `bkg` value. + +For each non-prompt cut, the MLHEP workflow is launched with %resdir% output directory set to `${RESDIR}/${RESDIR_PATTERN}${suffix}`, where `RESDIR` is the main MLHEP output directory, `RESDIR_PATTERN` is the prefix of the output directory name, and suffix is `fd_${fd}`, where `${fd}` is the current cut value. + +The MLHEP workflow is defined by `submission/analyzer.yml` file. Usually, you would enable `histomass` and `efficiency` steps for data and MC, and `fit` and `efficiency` steps in the "Inclusive hadrons" section. + +Adjust the script variables and the `submission/analyzer.yml` file to your needs. diff --git a/machine_learning_hep/scripts-dhadrons/run-fdd-batch.sh b/machine_learning_hep/scripts-dhadrons/run-mlhep/run-fdd-batch.sh similarity index 72% rename from machine_learning_hep/scripts-dhadrons/run-fdd-batch.sh rename to machine_learning_hep/scripts-dhadrons/run-mlhep/run-fdd-batch.sh index 97b7a68c6d..f51ce6cb9e 100755 --- a/machine_learning_hep/scripts-dhadrons/run-fdd-batch.sh +++ b/machine_learning_hep/scripts-dhadrons/run-mlhep/run-fdd-batch.sh @@ -1,7 +1,7 @@ #!/bin/bash -# Run MLHEP in batch for various non-prompt cuts -# You need a MLHEP database with %resdir%, %bkg...%, and %fd...% placeholders. +# Run MLHEP in batch for various BDT cuts. +# You need an MLHEP database with %resdir%, %bkg...%, and %fd% placeholders. source "${HOME}/Run3Analysisvalidation/exec/utilities.sh" WORKDIR="${HOME}/MachineLearningHEP/machine_learning_hep/" @@ -52,17 +52,6 @@ for fd in $(seq 0.000 0.005 0.000) ; do # Set non-prompt BDT cuts sed -i "s/%fd%/${fd}/g" "${CUR_DB}" || ErrExit "Could not edit database" - sed -i "s/%fd%/${fd}/g" "${CUR_DB}" || ErrExit "Could not edit database" - sed -i "s/%fd%/${fd}/g" "${CUR_DB}" || ErrExit "Could not edit database" - sed -i "s/%fd%/${fd}/g" "${CUR_DB}" || ErrExit "Could not edit database" - sed -i "s/%fd%/${fd}/g" "${CUR_DB}" || ErrExit "Could not edit database" - sed -i "s/%fd%/${fd}/g" "${CUR_DB}" || ErrExit "Could not edit database" - sed -i "s/%fd%/${fd}/g" "${CUR_DB}" || ErrExit "Could not edit database" - sed -i "s/%fd%/${fd}/g" "${CUR_DB}" || ErrExit "Could not edit database" - sed -i "s/%fd%/${fd}/g" "${CUR_DB}" || ErrExit "Could not edit database" - sed -i "s/%fd%/${fd}/g" "${CUR_DB}" || ErrExit "Could not edit database" - sed -i "s/%fd%/${fd}/g" "${CUR_DB}" || ErrExit "Could not edit database" - sed -i "s/%fd%/${fd}/g" "${CUR_DB}" || ErrExit "Could not edit database" # `yes` is a program that says `y` to all interactive console prompts. # In this way, we skip all MLHEP questions about deleting old results. diff --git a/machine_learning_hep/scripts-dhadrons/run-mlhep.sh b/machine_learning_hep/scripts-dhadrons/run-mlhep/run-mlhep.sh similarity index 100% rename from machine_learning_hep/scripts-dhadrons/run-mlhep.sh rename to machine_learning_hep/scripts-dhadrons/run-mlhep/run-mlhep.sh From dd9e148469acccf31bdfe0d978ed6d4567a3b4a5 Mon Sep 17 00:00:00 2001 From: saganatt <8majak8@gmail.com> Date: Tue, 23 Sep 2025 17:42:11 +0200 Subject: [PATCH 23/34] Add missing
in READMEs --- .../scripts-dhadrons/adjusting-run2-run3/README.md | 6 +++--- machine_learning_hep/scripts-dhadrons/merging/README.md | 6 +++--- machine_learning_hep/scripts-dhadrons/systematics/README.md | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/machine_learning_hep/scripts-dhadrons/adjusting-run2-run3/README.md b/machine_learning_hep/scripts-dhadrons/adjusting-run2-run3/README.md index 3e309a4e14..20df4fae7a 100644 --- a/machine_learning_hep/scripts-dhadrons/adjusting-run2-run3/README.md +++ b/machine_learning_hep/scripts-dhadrons/adjusting-run2-run3/README.md @@ -2,7 +2,7 @@ ## Add pT bins to extend the x-axis range on the plots -File: `add_pt_bins.py` +File: `add_pt_bins.py`
Usage: `python add_pt_bins.py in_file.root histname out_file.root` ROOT does not allow nicely to plot a histogram on a plot with x-axis wider than histogram minimum and maximum bins. @@ -13,7 +13,7 @@ You can uncomment lines 53-64 to get a formula for merging 2 bins. You need to a ## Restrict the maximum of x-axis -File: `remove_high_pt.py` +File: `remove_high_pt.py`
Usage: `python remove_high_pt.py in_file.root histname out_file.root maxval` This is a contrary script to the previous one. @@ -22,7 +22,7 @@ Here, `out_file.root` will contain histograms, where the last x-axis bin contain ## Rescale and merge cross section results -Files: `modify_crosssec_run2.py`, `modify_crosssec_run3.py` +Files: `modify_crosssec_run2.py`, `modify_crosssec_run3.py`
Usage: `python modify_crosssec_run2.py in_file.root histname out_histname out_file.root` The Run 2 script scales `histname` from `in_file.root` by 1./BR and merges bins, whose indices are provided in the script. The output is saved under name `out_histname` in `out_file.root`. diff --git a/machine_learning_hep/scripts-dhadrons/merging/README.md b/machine_learning_hep/scripts-dhadrons/merging/README.md index d4a7654f9b..0c3ddc421a 100644 --- a/machine_learning_hep/scripts-dhadrons/merging/README.md +++ b/machine_learning_hep/scripts-dhadrons/merging/README.md @@ -2,7 +2,7 @@ ## Merge multiple histograms from multiple input files -Files: `merge_histos.py`, `merge-cutvar.sh`, `merge-yields.sh` +Files: `merge_histos.py`, `merge-cutvar.sh`, `merge-yields.sh`
Usage: `python merge_histos.py -o out_file.root -n histName1 -n histName2 -i in_file1.root -i in_file2.root` You can provide as many histogram names as you want. All histograms should be 1-dimensional and have the same x-axis. If no histogram name is provided, the script will merge all 1-dimensional histograms from the input files. @@ -15,7 +15,7 @@ The bash files `merge-cutvar.sh` and `merge-yields.sh` provide examples of using ## Merge the outputs of the MLHEP histomass step -Files: `merge_histomass.py`, `merge-mlhep.sh` +Files: `merge_histomass.py`, `merge-mlhep.sh`
Usage: `python merge_histomass.py -o out_file.root -n histName1 -n histName2 -i in_file1.root -i -in_file2.root` This script is different from the previous one as it is adjusted to the layout of MLHEP `masshisto.root` files, which contain 1 invariant mass histogram per pT bin. @@ -31,7 +31,7 @@ The merging creates a single output file with histogram for the 1st pT bin from ## Gather MLHEP efficiencies and mass fits for cut variation -File: `gather-inputs-cutvar.sh` +File: `gather-inputs-cutvar.sh`
Usage: `./gather-inputs-cutvar.sh` To get MLHEP results for different non-prompt cuts, different output directories must be set. Otherwise, the results get overwritten. However, the cut variation script requires the input efficiency and mass fit files to be in a single directory. diff --git a/machine_learning_hep/scripts-dhadrons/systematics/README.md b/machine_learning_hep/scripts-dhadrons/systematics/README.md index 0f4269cf9f..04ded7039c 100644 --- a/machine_learning_hep/scripts-dhadrons/systematics/README.md +++ b/machine_learning_hep/scripts-dhadrons/systematics/README.md @@ -1,6 +1,6 @@ # Obtain various comparison plots, esp. for systematics and final analysis results -File: `compare_fractions.py` +File: `compare_fractions.py`
Usage: `python compare_fractions.py config.json` All JSON files in this directory provide various configuration examples for different use cases. From c719bcfa600d1c37cbd397049e1c0253b1842772 Mon Sep 17 00:00:00 2001 From: saganatt <8majak8@gmail.com> Date: Tue, 23 Sep 2025 17:49:10 +0200 Subject: [PATCH 24/34] Add the general README. Fix titles of other READMEs --- machine_learning_hep/scripts-dhadrons/README.md | 11 +++++++++++ .../scripts-dhadrons/debugging/README.md | 2 +- .../scripts-dhadrons/merging/README.md | 2 +- .../scripts-dhadrons/multitrial/README.md | 2 +- .../scripts-dhadrons/preliminary-plots/README.md | 2 +- .../scripts-dhadrons/run-mlhep/README.md | 2 +- 6 files changed, 16 insertions(+), 5 deletions(-) create mode 100644 machine_learning_hep/scripts-dhadrons/README.md diff --git a/machine_learning_hep/scripts-dhadrons/README.md b/machine_learning_hep/scripts-dhadrons/README.md new file mode 100644 index 0000000000..ecdd99d8fc --- /dev/null +++ b/machine_learning_hep/scripts-dhadrons/README.md @@ -0,0 +1,11 @@ +# Helper scripts for inclusive hadron analysis + +- adjusting-run2-run3: fix Run 3 plots for comparison with Run 2 results +- debugging: verify different stages of MLHEP processing +- merging: merge results from different MLHEP and cut variation runs +- multitrial: a workflow to perform the multitrial (raw yield) systematics with MLHEP +- preliminary-plots: scripts to plot invariant mass fits and cut variation results for ALICE preliminaries +- run-mlhep: automate MLHEP running +- systematics: obtain various comparison plots, esp. for systematics and final analysis results + +See README files in each subfolder. diff --git a/machine_learning_hep/scripts-dhadrons/debugging/README.md b/machine_learning_hep/scripts-dhadrons/debugging/README.md index 219203625b..b91c508ab5 100644 --- a/machine_learning_hep/scripts-dhadrons/debugging/README.md +++ b/machine_learning_hep/scripts-dhadrons/debugging/README.md @@ -1,4 +1,4 @@ -# Debugging some issues +# Verify different stages of MLHEP processing ## Check MLHEP output data files diff --git a/machine_learning_hep/scripts-dhadrons/merging/README.md b/machine_learning_hep/scripts-dhadrons/merging/README.md index 0c3ddc421a..49f8929c96 100644 --- a/machine_learning_hep/scripts-dhadrons/merging/README.md +++ b/machine_learning_hep/scripts-dhadrons/merging/README.md @@ -1,4 +1,4 @@ -# Merging histograms and files +# Merge results from different MLHEP and cut variation runs ## Merge multiple histograms from multiple input files diff --git a/machine_learning_hep/scripts-dhadrons/multitrial/README.md b/machine_learning_hep/scripts-dhadrons/multitrial/README.md index 8437924336..56ebef2b75 100644 --- a/machine_learning_hep/scripts-dhadrons/multitrial/README.md +++ b/machine_learning_hep/scripts-dhadrons/multitrial/README.md @@ -1,4 +1,4 @@ -# Multitrial systematics with MLHEP +# Multitrial (raw yield) systematics with MLHEP ## Generate configurations (MLHEP yml databases) for each trial diff --git a/machine_learning_hep/scripts-dhadrons/preliminary-plots/README.md b/machine_learning_hep/scripts-dhadrons/preliminary-plots/README.md index e0a7eabdd1..822c3c11ab 100644 --- a/machine_learning_hep/scripts-dhadrons/preliminary-plots/README.md +++ b/machine_learning_hep/scripts-dhadrons/preliminary-plots/README.md @@ -1,4 +1,4 @@ -# Scripts for preliminary plots +# Scripts for ALICE preliminary plots ## Invariant mass fits diff --git a/machine_learning_hep/scripts-dhadrons/run-mlhep/README.md b/machine_learning_hep/scripts-dhadrons/run-mlhep/README.md index b7cf9da9db..3105ee0d74 100644 --- a/machine_learning_hep/scripts-dhadrons/run-mlhep/README.md +++ b/machine_learning_hep/scripts-dhadrons/run-mlhep/README.md @@ -1,4 +1,4 @@ -# Run MLHEP +# Automate MLHEP running ## A simple shortcut with default running options From 28dd24e8b84902d4d9e379e473817dd492d03adb Mon Sep 17 00:00:00 2001 From: saganatt <8majak8@gmail.com> Date: Tue, 23 Sep 2025 17:53:09 +0200 Subject: [PATCH 25/34] Delete spurious multitrial script --- machine_learning_hep/multitrial.py | 188 ------------------ .../run-mlhep-fitter-multitrial.py | 158 --------------- .../run-mlhep-fitter-multitrial.sh | 47 ----- 3 files changed, 393 deletions(-) delete mode 100644 machine_learning_hep/multitrial.py delete mode 100644 machine_learning_hep/run-mlhep-fitter-multitrial.py delete mode 100644 machine_learning_hep/run-mlhep-fitter-multitrial.sh diff --git a/machine_learning_hep/multitrial.py b/machine_learning_hep/multitrial.py deleted file mode 100644 index 38119c95fa..0000000000 --- a/machine_learning_hep/multitrial.py +++ /dev/null @@ -1,188 +0,0 @@ -# pylint: disable=missing-function-docstring, invalid-name -""" -file: multitrial.py -brief: Plot multitrial systematics based on multiple fit trials, one file per trial. -usage: python3 multitrial.py config_multitrial.json -author: Maja Karwowska , Warsaw University of Technology -""" -import argparse -import glob -import json -import re -import numpy as np -import matplotlib.pyplot as plt -from matplotlib.ticker import MultipleLocator, AutoMinorLocator - -from ROOT import ( # pylint: disable=import-error,no-name-in-module - TFile, - gROOT, -) - - -def plot_text_box(ax, text): - ax.text(0.98, 0.97, text, - horizontalalignment="right", verticalalignment="top", - fontsize=40, va="top", transform=ax.transAxes, - bbox={"edgecolor": "black", "fill": False}) - - -def get_yields(cfg): - filenames = sorted(glob.glob(cfg["file_pattern"]), - key=lambda filename: re.split("/", filename)[-2]) - yields = {} - yields_err = {} - trials = {} - chis = {} - for pt_bin_min, pt_bin_max in zip(cfg["pt_bins_min"], cfg["pt_bins_max"]): - yields[f"{pt_bin_min}_{pt_bin_max}"] = [] - yields_err[f"{pt_bin_min}_{pt_bin_max}"] = [] - trials[f"{pt_bin_min}_{pt_bin_max}"] = [] - chis[f"{pt_bin_min}_{pt_bin_max}"] = [] - for filename in filenames: - print(f"Reading {filename}") - with TFile.Open(filename) as fin: - hist = fin.Get(cfg["histoname"]) - hist_sel = fin.Get(cfg["sel_histoname"]) - if hist.ClassName() != "TH1F": - print(f"No hist in {filename}") - if hist_sel.ClassName() != "TH1F": - print(f"No hist sel in {filename}") - dirname = re.split("/", filename)[4] # [-2] for D2H fitter - trial_name = dirname.replace(cfg["dir_pattern"], "") - for ind, (pt_bin_min, pt_bin_max) in enumerate(zip(cfg["pt_bins_min"], - cfg["pt_bins_max"])): - if eval(cfg["selection"])(hist_sel.GetBinContent(ind + 1)) \ - and hist.GetBinContent(ind + 1) > 1.0 : - yields[f"{pt_bin_min}_{pt_bin_max}"].append(hist.GetBinContent(ind + 1)) - yields_err[f"{pt_bin_min}_{pt_bin_max}"].append(hist.GetBinError(ind + 1)) - trials[f"{pt_bin_min}_{pt_bin_max}"].append(trial_name) - chis[f"{pt_bin_min}_{pt_bin_max}"].append(hist_sel.GetBinContent(ind + 1)) - else: - print(f"Rejected: {hist_sel.GetBinContent(ind + 1)} {trial_name} "\ - f"pt: {pt_bin_min}, {pt_bin_max}") - if hist.GetBinContent(ind + 1) < 1.0: - print("Yield 0") - return yields, yields_err, trials, chis - - -def prepare_figure(cfg, y_label, ticks): - fig = plt.figure(figsize=(20, 15)) - ax = plt.subplot(1, 1, 1) - ax.set_xlabel(cfg["x_axis"], fontsize=20) - ax.set_ylabel(y_label, fontsize=20) - ax.tick_params(which="both", width=2.5, direction="in") - ax.tick_params(which="major", labelsize=20, length=15) - ax.tick_params(which="minor", length=7) - ax.xaxis.set_major_locator(MultipleLocator(ticks)) - ax.xaxis.set_minor_locator(AutoMinorLocator(5)) - ax.yaxis.set_minor_locator(AutoMinorLocator(5)) - return fig, ax - - -def set_ax_limits(ax, pt_string, values, errs): - ax.margins(0.01, 0.2) - np_values = np.array(values, dtype="float32") - np_errs = np.array(errs, dtype="float32") - if ax.get_ylim()[1] - ax.get_ylim()[0] > 30.0 * np.std(np_values): - ax.set_ylim(np.mean(np_values) - 10.0 * np.std(np_values), - np.mean(np_values) + 10.0 * np.std(np_values)) - print(f"{pt_string} narrowing down the axis to {ax.get_ylim()}") - - -def plot_trial_line(ax, central_trial_ind): - axis_lim = ax.get_ylim() - y_axis = np.linspace(*axis_lim, 100) - ax.plot([central_trial_ind] * len(y_axis), y_axis, c="m", ls="--", linewidth=4.0) - ax.set_ylim(*axis_lim) - - -def plot_yields_trials(yields, yields_err, trials, cfg, pt_string, plot_pt_string, - central_trial_ind, central_yield): - fig, ax = prepare_figure(cfg, cfg["y_axis"], 100) - x_axis = range(len(trials)) - ax.errorbar(x_axis, yields, yerr=yields_err, - fmt="o", c="b", elinewidth=2.5, linewidth=4.0) - set_ax_limits(ax, pt_string, yields, yields_err) - central_line = np.array([central_yield] * len(x_axis), dtype="float32") - ax.plot(x_axis, central_line, c="orange", ls="--", linewidth=4.0) - central_err = np.array([yields_err[central_trial_ind]] * len(x_axis), dtype="float32") - ax.fill_between(x_axis, central_line - central_err, central_line + central_err, - facecolor="orange", edgecolor="none", alpha=0.3) - plot_trial_line(ax, central_trial_ind) - plot_text_box(ax, plot_pt_string) - fig.savefig(f'{cfg["outdir"]}/{cfg["outfile"]}_yields_trials_{pt_string}.png', - bbox_inches='tight') - plt.close() - - -def plot_chis(chis, cfg, pt_string, plot_pt_string): - fig, ax = prepare_figure(cfg, "Chi2/ndf", 100) - x_axis = range(len(chis)) - ax.scatter(x_axis, chis, c="b", marker="o") - set_ax_limits(ax, pt_string, chis, [0.0] * len(chis)) - plot_text_box(ax, plot_pt_string) - fig.savefig(f'{cfg["outdir"]}/{cfg["outfile"]}_chis_{pt_string}.png', - bbox_inches='tight') - plt.close() - - -def plot_yields_distr(yields, cfg, pt_string, plot_pt_string, central_trial_ind, central_yield): - plt.figure(figsize=(20, 15)) - ax = plt.subplot(1, 1, 1) - ax.set_xlabel("Ratio", fontsize=20) - ax.tick_params(labelsize=20, length=7, width=2.5) - ratios = [yield_ / central_yield for ind, yield_ in enumerate(yields) \ - if ind != central_trial_ind] - ax.hist(ratios, color="b", linewidth=4.0) - mean = np.mean(yields) - std_dev = np.std(yields) - diffs = [(yield_ - central_yield) / central_yield \ - for yield_ in yields[:central_trial_ind]] - diffs.extend([(yield_ - central_yield) / central_yield \ - for yield_ in yields[central_trial_ind+1:]]) - rmse = np.sqrt(np.mean(np.array(diffs, dtype="float32")**2)) - plot_text_box(ax, f"{plot_pt_string}\n"\ - f"mean: {mean:.0f}\n"\ - f"std dev: {std_dev:.2f}\n"\ - f"RMSE: {rmse:.2f}\n"\ - f"#trials: {len(yields)}") - plt.savefig(f'{cfg["outdir"]}/{cfg["outfile"]}_distr_{pt_string}.png', bbox_inches='tight') - plt.close() - - -def main(): - gROOT.SetBatch(True) - - parser = argparse.ArgumentParser(description="Arguments to pass") - parser.add_argument("config", help="JSON config file") - args = parser.parse_args() - - with open(args.config, encoding="utf8") as fil: - cfg = json.load(fil) - - yields, yields_err, trials, chis = get_yields(cfg) - - for pt_bin_min, pt_bin_max in zip(cfg["pt_bins_min"], cfg["pt_bins_max"]): - plot_pt_string = f"${pt_bin_min} < p_\\mathrm{{T}}/(\\mathrm{{GeV}}/c) < {pt_bin_max}$" - pt_string = f"{pt_bin_min}_{pt_bin_max}" - - try: - central_trial_ind = trials[pt_string].index(cfg["central_trial"]) - central_yield = yields[pt_string][central_trial_ind] - - plot_yields_trials(yields[pt_string], yields_err[pt_string], trials[pt_string], cfg, - pt_string, plot_pt_string, central_trial_ind, central_yield) - plot_yields_distr(yields[pt_string], cfg, pt_string, plot_pt_string, - central_trial_ind, central_yield) - plot_chis(chis[pt_string], cfg, pt_string, plot_pt_string) - except: - pass - - with open(f'{cfg["outdir"]}/{cfg["outfile"]}_trials_{pt_string}.txt', - "w", encoding="utf-8") as ftext: - for trial in trials[pt_string]: - ftext.write(f"{trial}\n") - - -if __name__ == "__main__": - main() diff --git a/machine_learning_hep/run-mlhep-fitter-multitrial.py b/machine_learning_hep/run-mlhep-fitter-multitrial.py deleted file mode 100644 index c5a06a1836..0000000000 --- a/machine_learning_hep/run-mlhep-fitter-multitrial.py +++ /dev/null @@ -1,158 +0,0 @@ -# pylint: disable=missing-function-docstring, invalid-name -""" -file: run-mlhep-fitter-multitrial.py -brief: Prepare MLHEP database files for different fit configurations for multitrial systematics. -usage: python3 run-mlhep-fitter-multitrial.py -author: Maja Karwowska , Warsaw University of Technology -""" - -import argparse -import re -import shutil -import yaml - -SIGMA02="0.007, 0.007, 0.013" -SIGMA23="0.007, 0.007, 0.013" -SIGMA34="0.007, 0.007, 0.012" -SIGMA45="0.008, 0.008, 0.016" -SIGMA56="0.010, 0.010, 0.016" -SIGMA67="0.008, 0.008, 0.017" -SIGMA78="0.012, 0.012, 0.018" -SIGMA810="0.015, 0.012, 0.018" -SIGMA1012="0.010, 0.010, 0.022" -SIGMA1216="0.016, 0.016, 0.029" -SIGMA1624="0.016, 0.016, 0.029" -FREE_SIGMAS=[SIGMA02, SIGMA23, SIGMA34, SIGMA45, SIGMA56, SIGMA67, SIGMA78, - SIGMA810, SIGMA1012, SIGMA1216, SIGMA1624] - -CENTRAL_TRIAL="" - -BASE_TRIALS = ( - ["alpha-15%", "alpha+15%"], - ["n-15%", "n+15%"], - ["rebin-1", "rebin+1"], - ["free-sigma"], - ["poly3"], - ["narrow", "narrow2", "wide", "wide2"] -) - -def generate_trials(trial_classes): - combinations = [""] - for trial_class in trial_classes: - class_comb = [] - for cur_comb in combinations: - for trial in trial_class: - class_comb.append(cur_comb + "_" + trial) - #print(f"{cur_comb}_{trial}") - combinations.extend(class_comb) - return combinations - -def replace_with_reval(var, in_str, frac): - pattern = fr"{var}\[([0-9.]*), .*?\]" - values = re.findall(pattern, in_str) - new_val = round(float(values[0]) * frac, 3) - return re.sub(pattern, f"{var}[{new_val}, {new_val}]", in_str) - -def process_trial(trial, ana_cfg, data_cfg, mc_cfg): - fit_cfg = ana_cfg["mass_roofit"] - if "alpha-15%" in trial: - print("Processing alpha-15%") - for pt_cfg in mc_cfg: - sig_fn = pt_cfg["components"]["sig"]["fn"] - pt_cfg["components"]["sig"]["fn"] = replace_with_reval("alpha1", sig_fn, 0.85) - elif "alpha+15%" in trial: - print("Processing alpha+15%") - for pt_cfg in mc_cfg: - sig_fn = pt_cfg["components"]["sig"]["fn"] - pt_cfg["components"]["sig"]["fn"] = replace_with_reval("alpha1", sig_fn, 1.15) - elif "n-15%" in trial: - print("Processing n-15%") - for pt_cfg in mc_cfg: - sig_fn = pt_cfg["components"]["sig"]["fn"] - pt_cfg["components"]["sig"]["fn"] = replace_with_reval("n1", sig_fn, 0.85) - elif "n+15%" in trial: - print("Processing n+15%") - for pt_cfg in mc_cfg: - sig_fn = pt_cfg["components"]["sig"]["fn"] - pt_cfg["components"]["sig"]["fn"] = replace_with_reval("n1", sig_fn, 1.15) - elif "rebin-1" in trial: - print("Processing rebin-1") - ana_cfg["n_rebin"] = [rebin - 1 for rebin in ana_cfg["n_rebin"]] - elif "rebin+1" in trial: - print("Processing rebin+1") - ana_cfg["n_rebin"] = [rebin + 1 for rebin in ana_cfg["n_rebin"]] - elif "free-sigma" in trial: - print("Processing free-sigma") - for pt_cfg, free_sigma in zip(mc_cfg, FREE_SIGMAS): - sig_fn = pt_cfg["components"]["sig"]["fn"] - pt_cfg["components"]["sig"]["fn"] = re.sub(r"sigma_g1\[(.*?)\]", - f"sigma_g1[{free_sigma}]", sig_fn) - elif "poly3" in trial: - print("Processing poly3") - for pt_cfg in data_cfg: - bkg_fn = pt_cfg["components"]["bkg"]["fn"] - pt_cfg["components"]["bkg"]["fn"] = re.sub(r"a2\[(.*?)\]", - r"a2[\1], a3[-1e8, 1e8]", bkg_fn) - elif "narrow2" in trial: - print("Processing narrow2") - for pt_cfg in fit_cfg: - pt_cfg["range"] = [pt_cfg["range"][0] + 0.02, pt_cfg["range"][1] - 0.02] - elif "narrow" in trial: - print("Processing narrow") - for pt_cfg in fit_cfg: - pt_cfg["range"] = [pt_cfg["range"][0] + 0.01, pt_cfg["range"][1] - 0.01] - elif "wide2" in trial: - print("Processing wide2") - for pt_cfg in fit_cfg: - pt_cfg["range"] = [max(2.10, pt_cfg["range"][0] - 0.02), - min(2.47, pt_cfg["range"][1] + 0.02)] - elif "wide" in trial: - print("Processing wide") - for pt_cfg in fit_cfg: - pt_cfg["range"] = [max(2.10, pt_cfg["range"][0] - 0.01), - min(2.47, pt_cfg["range"][1] + 0.01)] - - -def main(db, db_dir, out_db_dir, resdir_pattern): - db_ext=f"{db}.yml" - db_path=f"{db_dir}/{db_ext}" - combinations = generate_trials(BASE_TRIALS) - - for comb in combinations: - print(comb) - - cur_cfg = f"{out_db_dir}/{db}{comb}.yml" - shutil.copy2(db_path, cur_cfg) - - with open(cur_cfg, encoding="utf-8") as stream: - cfg = yaml.safe_load(stream) - - ana_cfg = cfg["LcpKpi"]["analysis"]["Run3analysis"] - fit_cfg = ana_cfg["mass_roofit"] - mc_cfg = [fit_params for fit_params in fit_cfg \ - if "level" in fit_params and fit_params["level"] == "mc"] - data_cfg = [fit_params for fit_params in fit_cfg if not "level" in fit_params] - - resdir = f"{resdir_pattern}{comb}" - respath = f"/data8/majak/MLHEP/{resdir}/" - ana_cfg["data"]["prefix_dir_res"] = respath - ana_cfg["mc"]["prefix_dir_res"] = respath - - trials = comb.split("_") - - for trial in trials: - process_trial(trial, ana_cfg, data_cfg, mc_cfg) - - with open(cur_cfg, "w", encoding="utf-8") as stream: - yaml.dump(cfg, stream, sort_keys=False, width=10000, default_flow_style=None) - - -if __name__ == "__main__": - parser = argparse.ArgumentParser(description="Arguments to pass") - parser.add_argument("db", help="MLHEP database without extension") - parser.add_argument("db_dir", help="path to directory with MLHEP database") - parser.add_argument("out_db_dir", help="path to output directory for generated MLHEP databases") - parser.add_argument("resdir", help="MLHEP resdir pattern") - args = parser.parse_args() - - main(args.db, args.db_dir, args.out_db_dir, args.resdir) diff --git a/machine_learning_hep/run-mlhep-fitter-multitrial.sh b/machine_learning_hep/run-mlhep-fitter-multitrial.sh deleted file mode 100644 index 26f25972af..0000000000 --- a/machine_learning_hep/run-mlhep-fitter-multitrial.sh +++ /dev/null @@ -1,47 +0,0 @@ -#!/bin/bash - -DB_PATTERN="database_ml_parameters_LcToPKPi_multiclass_fdd" # Original database to be used as template -DB_DIR="data/data_run3" -OUT_DB_DIR="multitrial-db" # Directory to store multitrial databases only -ext=".yml" - -DIR_PATH="/data8/majak/MLHEP" -DIR_PATTERN="results-24022025-newtrain-multitrial-prompt" # Prefix of output directory for fit results - -# Paths to masshistos to fit -BASE_DIR="/data8/majak/MLHEP/results-24022025-newtrain-ptshape-prompt" -DATA_HIST="LHC23pp/Results/resultsdatatot/masshisto.root" -MC_HIST="LHC24pp_mc/Results/resultsmctot/masshisto.root" - -# Run this only once to generate databases -# Then, you can comment this out if you don't change the *.py file -# The output analysis dir is set in databases to DIR_PATTERN + suffix with trial name -python run-mlhep-fitter-multitrial.py "${DB_PATTERN}" "${DB_DIR}" "${OUT_DB_DIR}" "${DIR_PATTERN}" || exit 1 - -for db in ${OUT_DB_DIR}/*.yml ; do - db_basename=`basename ${db}` - db_basename_no_ext=${db_basename%%${ext}} - echo ${db_basename_no_ext} - suffix=${db_basename_no_ext##${DB_PATTERN}} - echo "suffix: ${suffix}" - RESPATH="${DIR_PATH}/${DIR_PATTERN}${suffix}" - echo "respath: ${RESPATH}" - - # Copy base masshistos so as to skip the masshisto step - # Only the fit step needs to be activated in analyzer.yml - # You need first to create the directory trees - cp "${BASE_DIR}/${DATA_HIST}" "${RESPATH}/${DATA_HIST}" - cp "${BASE_DIR}/${MC_HIST}" "${RESPATH}/${MC_HIST}" - - mlhep logfile_${db_basename}.log \ - -a Run3analysis \ - --run-config submission/analyzer.yml \ - --database-analysis ${db} - - # Copy the plots from MachineLearningHEP/machine_learning_hep/fig/ to RESPATH - # It's not compulsory, it's just for you to see the fits - # It might be obsolete if you changed the default output fig/ location in MLHEP - rm -rf ${RESPATH}/fig/ - mv fig/ ${RESPATH}/fig/ -done - From 03c7db7389f60a4b32df685b1ec249bd9f7b8557 Mon Sep 17 00:00:00 2001 From: saganatt <8majak8@gmail.com> Date: Tue, 23 Sep 2025 17:54:26 +0200 Subject: [PATCH 26/34] Restore proper gitignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 49a17f54a4..74e08e3562 100644 --- a/.gitignore +++ b/.gitignore @@ -40,6 +40,7 @@ machine_learning_hep/LckINT7HighMultwithJets dataframes_* plots_* output_* +*.json *.h5 *.png *.log From be00cdf7fc07dfd50886f5dc83da6024c3f8a49c Mon Sep 17 00:00:00 2001 From: saganatt <8majak8@gmail.com> Date: Tue, 23 Sep 2025 17:55:53 +0200 Subject: [PATCH 27/34] Remove whitespaces from READMEs --- machine_learning_hep/scripts-dhadrons/README.md | 4 ++-- .../scripts-dhadrons/adjusting-run2-run3/README.md | 4 ++-- machine_learning_hep/scripts-dhadrons/debugging/README.md | 1 - machine_learning_hep/scripts-dhadrons/merging/README.md | 2 +- machine_learning_hep/scripts-dhadrons/multitrial/README.md | 2 +- .../scripts-dhadrons/preliminary-plots/README.md | 4 ++-- machine_learning_hep/scripts-dhadrons/systematics/README.md | 2 +- 7 files changed, 9 insertions(+), 10 deletions(-) diff --git a/machine_learning_hep/scripts-dhadrons/README.md b/machine_learning_hep/scripts-dhadrons/README.md index ecdd99d8fc..53b40fb84f 100644 --- a/machine_learning_hep/scripts-dhadrons/README.md +++ b/machine_learning_hep/scripts-dhadrons/README.md @@ -1,8 +1,8 @@ # Helper scripts for inclusive hadron analysis - adjusting-run2-run3: fix Run 3 plots for comparison with Run 2 results -- debugging: verify different stages of MLHEP processing -- merging: merge results from different MLHEP and cut variation runs +- debugging: verify different stages of MLHEP processing +- merging: merge results from different MLHEP and cut variation runs - multitrial: a workflow to perform the multitrial (raw yield) systematics with MLHEP - preliminary-plots: scripts to plot invariant mass fits and cut variation results for ALICE preliminaries - run-mlhep: automate MLHEP running diff --git a/machine_learning_hep/scripts-dhadrons/adjusting-run2-run3/README.md b/machine_learning_hep/scripts-dhadrons/adjusting-run2-run3/README.md index 20df4fae7a..daf5e02ddb 100644 --- a/machine_learning_hep/scripts-dhadrons/adjusting-run2-run3/README.md +++ b/machine_learning_hep/scripts-dhadrons/adjusting-run2-run3/README.md @@ -3,7 +3,7 @@ ## Add pT bins to extend the x-axis range on the plots File: `add_pt_bins.py`
-Usage: `python add_pt_bins.py in_file.root histname out_file.root` +Usage: `python add_pt_bins.py in_file.root histname out_file.root` ROOT does not allow nicely to plot a histogram on a plot with x-axis wider than histogram minimum and maximum bins. @@ -31,4 +31,4 @@ The Run 3 script only rescales the input histogram and saves the result in `out_ The lines commented out provide more examples of rescaling. -For Lc prompt cross section obtained during March 2025 approvals, only the uncommented lines in both files were used. +For Lc prompt cross section obtained during March 2025 approvals, only the uncommented lines in both files were used. diff --git a/machine_learning_hep/scripts-dhadrons/debugging/README.md b/machine_learning_hep/scripts-dhadrons/debugging/README.md index b91c508ab5..d8a887e311 100644 --- a/machine_learning_hep/scripts-dhadrons/debugging/README.md +++ b/machine_learning_hep/scripts-dhadrons/debugging/README.md @@ -24,4 +24,3 @@ Usage: `python plot_prompt_fraction_vs_bdt_cuts.py config_fraction_vs_bdt_cuts.j Adjust the JSON config. Here, you provide a glob pattern to all files of interest. By adjusting `histoname`, you can plot also the non-prompt fraction. - diff --git a/machine_learning_hep/scripts-dhadrons/merging/README.md b/machine_learning_hep/scripts-dhadrons/merging/README.md index 49f8929c96..c5da64c0ed 100644 --- a/machine_learning_hep/scripts-dhadrons/merging/README.md +++ b/machine_learning_hep/scripts-dhadrons/merging/README.md @@ -43,4 +43,4 @@ For example, given `RESDIR_PATTERN`: `/data/MLHEP/results-today_`, `PERM_PATTERN Adjust `MLHEP_DIR`, `OUTPUT_DIR`, `RESDIR_PATTERN` and `PERM_PATTERN` in the script. -You might also need to adjust the regular expression in line 12 and file paths in the for loop. +You might also need to adjust the regular expression in line 12 and file paths in the for loop. diff --git a/machine_learning_hep/scripts-dhadrons/multitrial/README.md b/machine_learning_hep/scripts-dhadrons/multitrial/README.md index 56ebef2b75..54d84c7299 100644 --- a/machine_learning_hep/scripts-dhadrons/multitrial/README.md +++ b/machine_learning_hep/scripts-dhadrons/multitrial/README.md @@ -9,7 +9,7 @@ Arguments: - `database_file`: filename of the template database without the .yml extension, e.g., `database_ml_parameters_LcToPKPi` - `in_db_dir`: path to the directory containing the database, e.g., `data/data_run3` - `out_db_dir`: path to the directory for output multitrial databases, e.g., `multitrial_db` -- `mlhep_results_dir_pattern`: prefix of output directory name for fit results; for each trial, the trial name is appended to the directory name, and the resulting directory name is written under `Run3analysis/{data,mc}/prefix_dir_res` in the database file +- `mlhep_results_dir_pattern`: prefix of output directory name for fit results; for each trial, the trial name is appended to the directory name, and the resulting directory name is written under `Run3analysis/{data,mc}/prefix_dir_res` in the database file Adjust `DIR_PATH` in the script. It is the path to the base directory where you store directories with MLHEP results. diff --git a/machine_learning_hep/scripts-dhadrons/preliminary-plots/README.md b/machine_learning_hep/scripts-dhadrons/preliminary-plots/README.md index 822c3c11ab..2e0d74a607 100644 --- a/machine_learning_hep/scripts-dhadrons/preliminary-plots/README.md +++ b/machine_learning_hep/scripts-dhadrons/preliminary-plots/README.md @@ -8,7 +8,7 @@ Usage: `python plot_invmass_fit_dzero_dplus_lambdac.py config_invmass_preliminar Example config in `config_invmass_preliminary.yml`. It was used to draw the plots: - https://alice-figure.web.cern.ch/node/34090 - https://alice-figure.web.cern.ch/node/34089 -- https://alice-figure.web.cern.ch/node/34088 +- https://alice-figure.web.cern.ch/node/34088 The script is passed in different versions around the D2H people. Here, it contains my few improvements, e.g., configurable multiplicity label.
I also commented out lines related to non-prompt particles as we had results only for the prompt case. @@ -32,4 +32,4 @@ Adjust the script: - set `bdtScoreCuts` to the proper `bdtScoreCuts_...` variable - adjust `bdtScoreCuts_toPlot` and the corresponding indices in `bdtScoreCuts_toPlot_ind`; they are the cuts to label on the x-axis - adjust the input file name and histogram names in `DrawCutVarFit()` -- adjust x-axis title, if needed +- adjust x-axis title, if needed diff --git a/machine_learning_hep/scripts-dhadrons/systematics/README.md b/machine_learning_hep/scripts-dhadrons/systematics/README.md index 04ded7039c..cb2124b10c 100644 --- a/machine_learning_hep/scripts-dhadrons/systematics/README.md +++ b/machine_learning_hep/scripts-dhadrons/systematics/README.md @@ -13,4 +13,4 @@ The histogram labels in legend are taken from the dictionary labels in the JSON, It is also possible to specify the `y_axis` title and an additional description under the "ALICE Preliminary" header (`alice_text` variable in the config). The header itself and its position can be adjusted in the `get_alice_text` function in the Python script. -Colors and markers can be adjusted at the beginning of the script. +Colors and markers can be adjusted at the beginning of the script. From d4f3a2eae0d9615cbfa30ea381dc531e0dfd7d2c Mon Sep 17 00:00:00 2001 From: saganatt <8majak8@gmail.com> Date: Tue, 23 Sep 2025 17:59:07 +0200 Subject: [PATCH 28/34] Remove whitespaces from the scripts --- .../adjusting-run2-run3/modify_crosssec_run2.py | 2 +- .../scripts-dhadrons/debugging/config_fraction_vs_bdt_cuts.json | 2 +- .../debugging/plot_prompt_fraction_vs_bdt_cuts.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/machine_learning_hep/scripts-dhadrons/adjusting-run2-run3/modify_crosssec_run2.py b/machine_learning_hep/scripts-dhadrons/adjusting-run2-run3/modify_crosssec_run2.py index e742263049..3952503a8d 100644 --- a/machine_learning_hep/scripts-dhadrons/adjusting-run2-run3/modify_crosssec_run2.py +++ b/machine_learning_hep/scripts-dhadrons/adjusting-run2-run3/modify_crosssec_run2.py @@ -64,7 +64,7 @@ def main(): (hist.GetBinContent(bin1) * hist.GetBinWidth(bin1) +\ hist.GetBinContent(bin2) * hist.GetBinWidth(bin2)) /\ weight_sum) - print(f"bin {bin1} error {hist.GetBinError(bin1)} bin2 {hist.GetBinError(bin2)}\n" + print(f"bin {bin1} error {hist.GetBinError(bin1)} bin2 {hist.GetBinError(bin2)}\n"\ f"scaled: {hist.GetBinWidth(bin1) * hist.GetBinError(bin1)}, "\ f"{hist.GetBinWidth(bin2) * hist.GetBinError(bin2)}\n"\ f"divided: {(hist.GetBinWidth(bin1) * hist.GetBinError(bin1)) / weight_sum}, "\ diff --git a/machine_learning_hep/scripts-dhadrons/debugging/config_fraction_vs_bdt_cuts.json b/machine_learning_hep/scripts-dhadrons/debugging/config_fraction_vs_bdt_cuts.json index d2895b7fab..5d9a224404 100644 --- a/machine_learning_hep/scripts-dhadrons/debugging/config_fraction_vs_bdt_cuts.json +++ b/machine_learning_hep/scripts-dhadrons/debugging/config_fraction_vs_bdt_cuts.json @@ -1,7 +1,7 @@ { "file_pattern": "/data8/majak/MLHEP/results-24022025-newtrain_fd_0.[0-9][0-9]0/LHC23pp_pass4/Results/resultsdatatot/finalcrossLcpKpiRun3analysis.root", "_file_pattern": "glob pattern to all files with different BDT cuts", - "dir_pattern": "results-24022025-newtrain_fd_0.[0-9][0-9]0", + "dir_pattern": "results-24022025-newtrain_fd_0.[0-9][0-9]0", "_dir_pattern": "the base directory prefix from the file pattern above", "histoname": "gfraction", "_histoname": "the prompt fraction histogram name", diff --git a/machine_learning_hep/scripts-dhadrons/debugging/plot_prompt_fraction_vs_bdt_cuts.py b/machine_learning_hep/scripts-dhadrons/debugging/plot_prompt_fraction_vs_bdt_cuts.py index 56871886f8..688d309912 100644 --- a/machine_learning_hep/scripts-dhadrons/debugging/plot_prompt_fraction_vs_bdt_cuts.py +++ b/machine_learning_hep/scripts-dhadrons/debugging/plot_prompt_fraction_vs_bdt_cuts.py @@ -1,6 +1,6 @@ """ file: plot_prompt_fraction_vs_fd_cuts.py -brief: Plot prompt fraction from cross section calculations for different non-prompt cuts +brief: Plot prompt fraction from cross section calculations for different non-prompt cuts usage: python3 plot_prompt_fraction_vs_fd_cuts.py config_fraction_vs_fd_cuts.json author: Maja Karwowska , Warsaw University of Technology """ From c596b4391f74a13053e2bf526045e0683b607477 Mon Sep 17 00:00:00 2001 From: saganatt <8majak8@gmail.com> Date: Wed, 1 Oct 2025 16:49:50 +0200 Subject: [PATCH 29/34] MegaLinter autofix --- .../adjusting-run2-run3/add_pt_bins.py | 7 +- .../modify_crosssec_run2.py | 8 +- .../modify_crosssec_run3.py | 2 +- .../adjusting-run2-run3/remove_high_pt.py | 7 +- .../debugging/check_parquet.py | 3 +- .../plot_prompt_fraction_vs_bdt_cuts.py | 8 +- ...lot_prompt_fraction_vs_crosssec_configs.py | 12 +- .../scripts-dhadrons/merging/merge-mlhep.sh | 8 +- .../merging/merge_histomass.py | 3 +- .../scripts-dhadrons/merging/merge_histos.py | 3 +- .../scripts-dhadrons/multitrial/multitrial.py | 12 +- .../multitrial/run-mlhep-fitter-multitrial.py | 5 +- .../preliminary-plots/DrawCutVarFit.C | 109 +++++++++--------- .../preliminary-plots/README.md | 8 +- .../plot_invmass_fit_dzero_dplus_lambdac.py | 18 ++- .../run-mlhep/run-fdd-batch.sh | 10 +- .../scripts-dhadrons/run-mlhep/run-mlhep.sh | 6 +- .../systematics/compare_fractions.py | 12 +- 18 files changed, 121 insertions(+), 120 deletions(-) diff --git a/machine_learning_hep/scripts-dhadrons/adjusting-run2-run3/add_pt_bins.py b/machine_learning_hep/scripts-dhadrons/adjusting-run2-run3/add_pt_bins.py index d380d02ced..fc8ff547cc 100644 --- a/machine_learning_hep/scripts-dhadrons/adjusting-run2-run3/add_pt_bins.py +++ b/machine_learning_hep/scripts-dhadrons/adjusting-run2-run3/add_pt_bins.py @@ -7,14 +7,9 @@ """ import argparse -import math from array import array -from ROOT import ( # pylint: disable=import-error,no-name-in-module - gROOT, - TFile, - TH1F -) +from ROOT import TH1F, TFile, gROOT # pylint: disable=import-error,no-name-in-module def main(): diff --git a/machine_learning_hep/scripts-dhadrons/adjusting-run2-run3/modify_crosssec_run2.py b/machine_learning_hep/scripts-dhadrons/adjusting-run2-run3/modify_crosssec_run2.py index 3952503a8d..a5af444eee 100644 --- a/machine_learning_hep/scripts-dhadrons/adjusting-run2-run3/modify_crosssec_run2.py +++ b/machine_learning_hep/scripts-dhadrons/adjusting-run2-run3/modify_crosssec_run2.py @@ -7,14 +7,10 @@ """ import argparse -from array import array import math +from array import array -from ROOT import ( # pylint: disable=import-error,no-name-in-module - gROOT, - TFile, - TH1F -) +from ROOT import TH1F, TFile, gROOT # pylint: disable=import-error,no-name-in-module OUTPUT_BINS = [0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 24] BR = 0.0623 diff --git a/machine_learning_hep/scripts-dhadrons/adjusting-run2-run3/modify_crosssec_run3.py b/machine_learning_hep/scripts-dhadrons/adjusting-run2-run3/modify_crosssec_run3.py index b4d2f2fe15..442707dca6 100644 --- a/machine_learning_hep/scripts-dhadrons/adjusting-run2-run3/modify_crosssec_run3.py +++ b/machine_learning_hep/scripts-dhadrons/adjusting-run2-run3/modify_crosssec_run3.py @@ -9,8 +9,8 @@ import argparse from ROOT import ( # pylint: disable=import-error,no-name-in-module - gROOT, TFile, + gROOT, ) # 2024 values for LHC22o diff --git a/machine_learning_hep/scripts-dhadrons/adjusting-run2-run3/remove_high_pt.py b/machine_learning_hep/scripts-dhadrons/adjusting-run2-run3/remove_high_pt.py index e195f78df9..507020b505 100644 --- a/machine_learning_hep/scripts-dhadrons/adjusting-run2-run3/remove_high_pt.py +++ b/machine_learning_hep/scripts-dhadrons/adjusting-run2-run3/remove_high_pt.py @@ -7,14 +7,9 @@ """ import argparse -import math from array import array -from ROOT import ( # pylint: disable=import-error,no-name-in-module - gROOT, - TFile, - TH1F -) +from ROOT import TH1F, TFile, gROOT # pylint: disable=import-error,no-name-in-module def main(): diff --git a/machine_learning_hep/scripts-dhadrons/debugging/check_parquet.py b/machine_learning_hep/scripts-dhadrons/debugging/check_parquet.py index ed8256eb75..823df874c7 100644 --- a/machine_learning_hep/scripts-dhadrons/debugging/check_parquet.py +++ b/machine_learning_hep/scripts-dhadrons/debugging/check_parquet.py @@ -1,7 +1,8 @@ import argparse + +import matplotlib.pyplot as plt import numpy as np import pandas as pd -import matplotlib.pyplot as plt """ diff --git a/machine_learning_hep/scripts-dhadrons/debugging/plot_prompt_fraction_vs_bdt_cuts.py b/machine_learning_hep/scripts-dhadrons/debugging/plot_prompt_fraction_vs_bdt_cuts.py index 688d309912..30875d54fa 100644 --- a/machine_learning_hep/scripts-dhadrons/debugging/plot_prompt_fraction_vs_bdt_cuts.py +++ b/machine_learning_hep/scripts-dhadrons/debugging/plot_prompt_fraction_vs_bdt_cuts.py @@ -9,8 +9,8 @@ import glob import json import re -import matplotlib.pyplot as plt +import matplotlib.pyplot as plt from ROOT import ( # pylint: disable=import-error,no-name-in-module TFile, gROOT, @@ -22,7 +22,7 @@ def get_fractions(cfg): fractions = {} fractions_err = {} fd_cuts = [] - for pt_bin_min, pt_bin_max in zip(cfg["pt_bins_min"], cfg["pt_bins_max"]): + for pt_bin_min, pt_bin_max in zip(cfg["pt_bins_min"], cfg["pt_bins_max"], strict=False): fractions[f"{pt_bin_min}_{pt_bin_max}"] = [] fractions_err[f"{pt_bin_min}_{pt_bin_max}"] = [] for filename in filenames: @@ -31,7 +31,7 @@ def get_fractions(cfg): dirname = re.search(cfg["dir_pattern"], filename).group(0) fd_cut = re.split("_", dirname)[-1] fd_cuts.append(fd_cut) - for ind, (pt_bin_min, pt_bin_max) in enumerate(zip(cfg["pt_bins_min"], cfg["pt_bins_max"])): + for ind, (pt_bin_min, pt_bin_max) in enumerate(zip(cfg["pt_bins_min"], cfg["pt_bins_max"], strict=False)): fractions[f"{pt_bin_min}_{pt_bin_max}"].append(hist.GetPointY(ind + 1)) fractions_err[f"{pt_bin_min}_{pt_bin_max}"].append(hist.GetErrorY(ind + 1)) print(f"final fractions:\n{fractions}\nfd_cuts:\n{fd_cuts}\nfractions error:\n{fractions_err}") @@ -50,7 +50,7 @@ def main(): fractions, fractions_err, fd_cuts = get_fractions(cfg) - for pt_bin_min, pt_bin_max in zip(cfg["pt_bins_min"], cfg["pt_bins_max"]): + for pt_bin_min, pt_bin_max in zip(cfg["pt_bins_min"], cfg["pt_bins_max"], strict=False): plt.figure(figsize=(20, 15)) ax = plt.subplot(1, 1, 1) ax.set_xlabel(cfg["x_axis"]) diff --git a/machine_learning_hep/scripts-dhadrons/debugging/plot_prompt_fraction_vs_crosssec_configs.py b/machine_learning_hep/scripts-dhadrons/debugging/plot_prompt_fraction_vs_crosssec_configs.py index a976506964..adaf064924 100644 --- a/machine_learning_hep/scripts-dhadrons/debugging/plot_prompt_fraction_vs_crosssec_configs.py +++ b/machine_learning_hep/scripts-dhadrons/debugging/plot_prompt_fraction_vs_crosssec_configs.py @@ -9,29 +9,23 @@ import json import os +from compare_fractions import get_legend, prepare_canvas, save_canvas, set_hist_style from ROOT import ( # pylint: disable=import-error,no-name-in-module - TCanvas, TFile, - TLegend, - TLine, gROOT, gStyle, kAzure, kBlack, kBlue, kCyan, - kDashed, - kGray, kGreen, kMagenta, kOrange, kRed, kTeal, - kYellow + kYellow, ) -from compare_fractions import get_legend, prepare_canvas, save_canvas, set_hist_style - COLORS=[kBlack, kRed-3, kAzure-7, kMagenta+1, kGreen+2, kOrange-3, kBlue, kTeal+3, kGreen, kAzure+8, kYellow+3, kOrange-5, kMagenta+2, kBlue-6, kCyan+1, kGreen-6] @@ -70,7 +64,7 @@ def main(): maxy = 0. miny = 1. hists = [] - for ind, (label, color) in enumerate(zip(cfg["hists"], COLORS)): + for ind, (label, color) in enumerate(zip(cfg["hists"], COLORS, strict=False)): with TFile.Open(os.path.join(cfg["inputdir"], cfg["hists"][label]["file"][0])) as fin: hist = fin.Get(cfg["histoname"]) print(f'hist {cfg["histoname"]}: {hist}') diff --git a/machine_learning_hep/scripts-dhadrons/merging/merge-mlhep.sh b/machine_learning_hep/scripts-dhadrons/merging/merge-mlhep.sh index f76a3c2b33..ef0df01089 100755 --- a/machine_learning_hep/scripts-dhadrons/merging/merge-mlhep.sh +++ b/machine_learning_hep/scripts-dhadrons/merging/merge-mlhep.sh @@ -33,11 +33,11 @@ for i in "${!FD_12[@]}" ; do fd1624=${FD_1624[i]} echo "${i} fd ${fd12} ${fd23} ${fd34} ${fd45} ${fd56} ${fd67} ${fd78} ${fd810} ${fd1012} ${fd1216} ${fd1624}" - RESPATH="${OUTPUT_DIR}/projections_fd_precise_${fd12}_${fd23}_${fd34}_${fd45}_${fd56}_${fd67}_${fd78}_${fd810}_${fd1012}_${fd1216}_${fd1624}.root" + RESPATH="${OUTPUT_DIR}/projections_${PERM_PATTERN}${fd12}_${fd23}_${fd34}_${fd45}_${fd56}_${fd67}_${fd78}_${fd810}_${fd1012}_${fd1216}_${fd1624}.root" python merge_histomass.py \ -n hmassfPt \ - -o ${RESPATH} \ + -o "${RESPATH}" \ -i "${RESDIR_PATTERN}${fd12}/LHC23pp_pass4/Results/resultsdatatot/masshisto.root" \ -i "${RESDIR_PATTERN}${fd23}/LHC23pp_pass4/Results/resultsdatatot/masshisto.root" \ -i "${RESDIR_PATTERN}${fd34}/LHC23pp_pass4/Results/resultsdatatot/masshisto.root" \ @@ -50,12 +50,12 @@ for i in "${!FD_12[@]}" ; do -i "${RESDIR_PATTERN}${fd1216}/LHC23pp_pass4/Results/resultsdatatot/masshisto.root" \ -i "${RESDIR_PATTERN}${fd1624}/LHC23pp_pass4/Results/resultsdatatot/masshisto.root" - RESPATH="${OUTPUT_DIR_EFF}/eff_fd_precise_${fd12}_${fd23}_${fd34}_${fd45}_${fd56}_${fd67}_${fd78}_${fd810}_${fd1012}_${fd1216}_${fd1624}.root" + RESPATH="${OUTPUT_DIR_EFF}/eff_${PERM_PATTERN}${fd12}_${fd23}_${fd34}_${fd45}_${fd56}_${fd67}_${fd78}_${fd810}_${fd1012}_${fd1216}_${fd1624}.root" python merge_histos.py \ -n eff \ -n eff_fd \ - -o ${RESPATH} \ + -o "${RESPATH}" \ -i "${RESDIR_PATTERN}${fd12}/LHC24pp_mc/Results/resultsmctot/efficienciesLcpKpiRun3analysis.root" \ -i "${RESDIR_PATTERN}${fd23}/LHC24pp_mc/Results/resultsmctot/efficienciesLcpKpiRun3analysis.root" \ -i "${RESDIR_PATTERN}${fd34}/LHC24pp_mc/Results/resultsmctot/efficienciesLcpKpiRun3analysis.root" \ diff --git a/machine_learning_hep/scripts-dhadrons/merging/merge_histomass.py b/machine_learning_hep/scripts-dhadrons/merging/merge_histomass.py index a29569f46c..5e6f31fcfe 100644 --- a/machine_learning_hep/scripts-dhadrons/merging/merge_histomass.py +++ b/machine_learning_hep/scripts-dhadrons/merging/merge_histomass.py @@ -5,7 +5,8 @@ import argparse -from ROOT import TFile, gROOT # pylint: disable=import-error +from ROOT import TFile, gROOT # pylint: disable=import-error + def main(): """ diff --git a/machine_learning_hep/scripts-dhadrons/merging/merge_histos.py b/machine_learning_hep/scripts-dhadrons/merging/merge_histos.py index 4488d4e48b..c2812f210b 100644 --- a/machine_learning_hep/scripts-dhadrons/merging/merge_histos.py +++ b/machine_learning_hep/scripts-dhadrons/merging/merge_histos.py @@ -5,7 +5,8 @@ import argparse -from ROOT import TFile, gROOT # pylint: disable=import-error +from ROOT import TFile, gROOT # pylint: disable=import-error + def main(): """ diff --git a/machine_learning_hep/scripts-dhadrons/multitrial/multitrial.py b/machine_learning_hep/scripts-dhadrons/multitrial/multitrial.py index 593a99dd31..141e6dfc97 100644 --- a/machine_learning_hep/scripts-dhadrons/multitrial/multitrial.py +++ b/machine_learning_hep/scripts-dhadrons/multitrial/multitrial.py @@ -9,10 +9,10 @@ import glob import json import re -import numpy as np -import matplotlib.pyplot as plt -from matplotlib.ticker import MultipleLocator, AutoMinorLocator +import matplotlib.pyplot as plt +import numpy as np +from matplotlib.ticker import AutoMinorLocator, MultipleLocator from ROOT import ( # pylint: disable=import-error,no-name-in-module TFile, gROOT, @@ -33,7 +33,7 @@ def get_yields(cfg): yields_err = {} trials = {} chis = {} - for pt_bin_min, pt_bin_max in zip(cfg["pt_bins_min"], cfg["pt_bins_max"]): + for pt_bin_min, pt_bin_max in zip(cfg["pt_bins_min"], cfg["pt_bins_max"], strict=False): yields[f"{pt_bin_min}_{pt_bin_max}"] = [] yields_err[f"{pt_bin_min}_{pt_bin_max}"] = [] trials[f"{pt_bin_min}_{pt_bin_max}"] = [] @@ -50,7 +50,7 @@ def get_yields(cfg): dirname = re.split("/", filename)[4] # [-2] for D2H fitter trial_name = dirname.replace(cfg["dir_pattern"], "") for ind, (pt_bin_min, pt_bin_max) in enumerate(zip(cfg["pt_bins_min"], - cfg["pt_bins_max"])): + cfg["pt_bins_max"], strict=False)): if eval(cfg["selection"])(hist_sel.GetBinContent(ind + 1)) \ and hist.GetBinContent(ind + 1) > 1.0 : # pylint: disable=eval-used yields[f"{pt_bin_min}_{pt_bin_max}"].append(hist.GetBinContent(ind + 1)) @@ -161,7 +161,7 @@ def main(): yields, yields_err, trials, chis = get_yields(cfg) - for pt_bin_min, pt_bin_max in zip(cfg["pt_bins_min"], cfg["pt_bins_max"]): + for pt_bin_min, pt_bin_max in zip(cfg["pt_bins_min"], cfg["pt_bins_max"], strict=False): plot_pt_string = f"${pt_bin_min} < p_\\mathrm{{T}}/(\\mathrm{{GeV}}/c) < {pt_bin_max}$" pt_string = f"{pt_bin_min}_{pt_bin_max}" diff --git a/machine_learning_hep/scripts-dhadrons/multitrial/run-mlhep-fitter-multitrial.py b/machine_learning_hep/scripts-dhadrons/multitrial/run-mlhep-fitter-multitrial.py index d8307def4e..b547900b95 100644 --- a/machine_learning_hep/scripts-dhadrons/multitrial/run-mlhep-fitter-multitrial.py +++ b/machine_learning_hep/scripts-dhadrons/multitrial/run-mlhep-fitter-multitrial.py @@ -9,6 +9,7 @@ import argparse import re import shutil + import yaml SIGMA02="0.007, 0.007, 0.013" @@ -85,7 +86,7 @@ def process_trial(trial, ana_cfg, data_cfg, mc_cfg): ana_cfg["n_rebin"] = [rebin + 1 for rebin in ana_cfg["n_rebin"]] elif "free-sigma" in trial: print("Processing free-sigma") - for pt_cfg, free_sigma in zip(mc_cfg, FREE_SIGMAS): + for pt_cfg, free_sigma in zip(mc_cfg, FREE_SIGMAS, strict=False): sig_fn = pt_cfg["components"]["sig"]["fn"] pt_cfg["components"]["sig"]["fn"] = re.sub(r"sigma_g1\[(.*?)\]", f"sigma_g1[{free_sigma}]", sig_fn) @@ -133,7 +134,7 @@ def main(db, db_dir, out_db_dir, resdir_pattern): fit_cfg = ana_cfg["mass_roofit"] mc_cfg = [fit_params for fit_params in fit_cfg \ if "level" in fit_params and fit_params["level"] == "mc"] - data_cfg = [fit_params for fit_params in fit_cfg if not "level" in fit_params] + data_cfg = [fit_params for fit_params in fit_cfg if "level" not in fit_params] resdir = f"{resdir_pattern}{comb}" respath = f"{DIR_PATH}/{resdir}/" diff --git a/machine_learning_hep/scripts-dhadrons/preliminary-plots/DrawCutVarFit.C b/machine_learning_hep/scripts-dhadrons/preliminary-plots/DrawCutVarFit.C index 9f595d44ce..ec910c78df 100644 --- a/machine_learning_hep/scripts-dhadrons/preliminary-plots/DrawCutVarFit.C +++ b/machine_learning_hep/scripts-dhadrons/preliminary-plots/DrawCutVarFit.C @@ -12,26 +12,26 @@ using namespace std; void SetStyle(); -void SetStyleHisto(TH1D *h); -void SetStyleHisto(TH1F *h); -void NormaliseHist1d(TH1 *h); - -//const Int_t colors[] = {kGreen + 2, kBlue - 4, kRed, kOrange + 7}; -//const Int_t markers[] = {20, 21, 33, 34}; -//const Int_t npoints[] = {5, 3, 4, 4, 4, 4, 4}; -//const Int_t nPtBins = 11; -//const Double_t ptlimsmiddle[11] = {1.5, 2.5, 3.5, 4.5, 5.5, 6.5, -// 7.5, 9, 11, 14, 20}; -//const Int_t nPtBinsCoarse = 11; -//Double_t ptlimsCoarse[nPtBinsCoarse + 1] = {1., 2., 3., 4., 5., 6., -// 7., 8., 10., 12., 16., 24.}; -//Double_t ptbinwidthCoarse[nPtBinsCoarse] = {1., 1., 1., 1., 1., 1., -// 1., 2., 2., 4., 8.}; -//const Double_t ptlimsmiddlePrompt[21] = { -// 0.5, 1.25, 1.75, 2.25, 2.75, 3.25, 3.75, 4.25, 4.75, 5.25, 5.75, -// 6.25, 6.75, 7.25, 7.75, 8.5, 9.5, 11., 14., 20., 30.}; -//Double_t yvaluncPrompt[21] = {0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., -// 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.}; +void SetStyleHisto(TH1D* h); +void SetStyleHisto(TH1F* h); +void NormaliseHist1d(TH1* h); + +// const Int_t colors[] = {kGreen + 2, kBlue - 4, kRed, kOrange + 7}; +// const Int_t markers[] = {20, 21, 33, 34}; +// const Int_t npoints[] = {5, 3, 4, 4, 4, 4, 4}; +// const Int_t nPtBins = 11; +// const Double_t ptlimsmiddle[11] = {1.5, 2.5, 3.5, 4.5, 5.5, 6.5, +// 7.5, 9, 11, 14, 20}; +// const Int_t nPtBinsCoarse = 11; +// Double_t ptlimsCoarse[nPtBinsCoarse + 1] = {1., 2., 3., 4., 5., 6., +// 7., 8., 10., 12., 16., 24.}; +// Double_t ptbinwidthCoarse[nPtBinsCoarse] = {1., 1., 1., 1., 1., 1., +// 1., 2., 2., 4., 8.}; +// const Double_t ptlimsmiddlePrompt[21] = { +// 0.5, 1.25, 1.75, 2.25, 2.75, 3.25, 3.75, 4.25, 4.75, 5.25, 5.75, +// 6.25, 6.75, 7.25, 7.75, 8.5, 9.5, 11., 14., 20., 30.}; +// Double_t yvaluncPrompt[21] = {0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., +// 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.}; std::vector bdtScoreCuts_1_2 = {0.21, 0.24, 0.27, 0.30, 0.33, 0.35, 0.37, 0.39, 0.41, 0.44, 0.46, 0.48, 0.50, 0.52, 0.54, 0.55, 0.58}; std::vector bdtScoreCuts_2_3 = {0.20, 0.22, 0.24, 0.26, 0.28, 0.30, 0.32, 0.34, 0.36, 0.38, 0.40, 0.42, 0.44, 0.46, 0.48, 0.50, 0.52}; @@ -40,9 +40,9 @@ std::vector bdtScoreCuts_4_5 = {0.17, 0.19, 0.21, 0.23, 0.25, 0.27, 0. std::vector bdtScoreCuts_5_6 = {0.10, 0.12, 0.14, 0.16, 0.18, 0.21, 0.24, 0.26, 0.28, 0.30, 0.33, 0.36, 0.39, 0.42, 0.45, 0.50, 0.52}; std::vector bdtScoreCuts_6_8 = {0.15, 0.17, 0.19, 0.21, 0.23, 0.25, 0.27, 0.29, 0.31, 0.33, 0.36, 0.39, 0.41, 0.43, 0.46, 0.49, 0.52}; std::vector bdtScoreCuts_8_12 = {0.08, 0.11, 0.14, 0.16, 0.18, 0.20, 0.22, 0.25, 0.28, 0.30, 0.33, 0.35, 0.38, 0.41, 0.43, 0.46, 0.49}; -//std::vector bdtScoreCuts = {0.29, 0.33, 0.37, 0.41, 0.45, 0.49, -// 0.53, 0.57, 0.61, 0.65, 0.69, 0.73, -// 0.77, 0.81, 0.85, 0.89, 0.93}; +// std::vector bdtScoreCuts = {0.29, 0.33, 0.37, 0.41, 0.45, 0.49, +// 0.53, 0.57, 0.61, 0.65, 0.69, 0.73, +// 0.77, 0.81, 0.85, 0.89, 0.93}; std::vector bdtScoreCuts_toPlot = {0.29, 0.45, 0.61, 0.77, 0.93}; std::vector bdtScoreCuts_toPlot_ind = {0, 4, 8, 12, 16}; @@ -52,31 +52,32 @@ std::vector bdtScoreCuts = bdtScoreCuts_4_5; bool DrawAllPoints = false; -void DrawCutVarFit(bool isPreliminary = kTRUE) { +void DrawCutVarFit(bool isPreliminary = kTRUE) +{ - //TGaxis::SetMaxDigits(1); + // TGaxis::SetMaxDigits(1); gStyle->SetOptTitle(0); gStyle->SetOptStat(0); - TFile *CutVarFile = nullptr; + TFile* CutVarFile = nullptr; // D - TH1F *hRawYieldsVsCutPt = nullptr; - TH1F *hRawYieldPromptVsCut = nullptr; - TH1F *hRawYieldFDVsCut = nullptr; - TH1F *hRawYieldsVsCutReSum = nullptr; + TH1F* hRawYieldsVsCutPt = nullptr; + TH1F* hRawYieldPromptVsCut = nullptr; + TH1F* hRawYieldFDVsCut = nullptr; + TH1F* hRawYieldsVsCutReSum = nullptr; CutVarFile = - new TFile("/data8/majak/systematics/230824/CutVarLc_pp13TeV_LHC24d3_default.root", - "read"); + new TFile("/data8/majak/systematics/230824/CutVarLc_pp13TeV_LHC24d3_default.root", + "read"); hRawYieldsVsCutPt = - (TH1F *)CutVarFile->Get(Form("hRawYieldVsCut_pt%d_%d", binMin, binMax)); + (TH1F*)CutVarFile->Get(Form("hRawYieldVsCut_pt%d_%d", binMin, binMax)); hRawYieldPromptVsCut = - (TH1F *)CutVarFile->Get(Form("hRawYieldPromptVsCut_pt%d_%d", binMin, binMax)); + (TH1F*)CutVarFile->Get(Form("hRawYieldPromptVsCut_pt%d_%d", binMin, binMax)); hRawYieldFDVsCut = - (TH1F *)CutVarFile->Get(Form("hRawYieldNonPromptVsCut_pt%d_%d", binMin, binMax)); + (TH1F*)CutVarFile->Get(Form("hRawYieldNonPromptVsCut_pt%d_%d", binMin, binMax)); hRawYieldsVsCutReSum = - (TH1F *)CutVarFile->Get(Form("hRawYieldSumVsCut_pt%d_%d", binMin, binMax)); + (TH1F*)CutVarFile->Get(Form("hRawYieldSumVsCut_pt%d_%d", binMin, binMax)); SetStyleHisto(hRawYieldsVsCutPt); SetStyleHisto(hRawYieldPromptVsCut); @@ -113,17 +114,17 @@ void DrawCutVarFit(bool isPreliminary = kTRUE) { hRawYieldsVsCutPt->SetLineWidth(2); hRawYieldsVsCutPt->GetYaxis()->SetTitleOffset(1.1); // Set custom labels - for (size_t i = 0; i < bdtScoreCuts.size(); ++i) { - hRawYieldsVsCutPt->GetXaxis()->SetBinLabel(i + 1, Form("")); - for (size_t j = 0; j < bdtScoreCuts_toPlot_ind.size(); ++j) - //if (bdtScoreCuts[i] == bdtScoreCuts_toPlot[j]) { - if (i == bdtScoreCuts_toPlot_ind[j]) { - std::cout << "bdtScoreCuts[i] " << bdtScoreCuts[i] << " bdtScoreCuts_toPlot " << bdtScoreCuts_toPlot_ind[j] << std::endl; - hRawYieldsVsCutPt->GetXaxis()->SetBinLabel(i + 1, Form("%.2f",bdtScoreCuts[i])); - } - } + for (size_t i = 0; i < bdtScoreCuts.size(); ++i) { + hRawYieldsVsCutPt->GetXaxis()->SetBinLabel(i + 1, Form("")); + for (size_t j = 0; j < bdtScoreCuts_toPlot_ind.size(); ++j) + // if (bdtScoreCuts[i] == bdtScoreCuts_toPlot[j]) { + if (i == bdtScoreCuts_toPlot_ind[j]) { + std::cout << "bdtScoreCuts[i] " << bdtScoreCuts[i] << " bdtScoreCuts_toPlot " << bdtScoreCuts_toPlot_ind[j] << std::endl; + hRawYieldsVsCutPt->GetXaxis()->SetBinLabel(i + 1, Form("%.2f", bdtScoreCuts[i])); + } + } - TCanvas *c1 = new TCanvas("c1", "c1", 0, 0, 750, 750); + TCanvas* c1 = new TCanvas("c1", "c1", 0, 0, 750, 750); gStyle->SetOptStat(0); c1->SetTickx(); c1->SetTicky(); @@ -154,7 +155,7 @@ void DrawCutVarFit(bool isPreliminary = kTRUE) { infos.SetTextSize(30); infos.DrawLatex(0.21, 0.80, "#Lambda_{c}^{#plus} and charge conj., pp, #sqrt{#it{s}} = 13.6 TeV"); - //infos.DrawLatex(0.21, 0.74, "|#it{y}| < 0.5"); + // infos.DrawLatex(0.21, 0.74, "|#it{y}| < 0.5"); TLatex infoPt; infoPt.SetNDC(); @@ -183,7 +184,7 @@ void DrawCutVarFit(bool isPreliminary = kTRUE) { // info2.DrawLatex(0.21, 0.17, "#pm 3.7% lumi. unc. not shown"); // info2.DrawLatex(0.21, 0.22, "#pm 0.76% BR unc. not shown"); - TLegend *leg = new TLegend(0.62, 0.48, 0.70, 0.68); + TLegend* leg = new TLegend(0.62, 0.48, 0.70, 0.68); leg->SetFillColor(0); leg->SetFillStyle(0); leg->SetBorderSize(0); @@ -201,7 +202,8 @@ void DrawCutVarFit(bool isPreliminary = kTRUE) { c1->SaveAs(Form("./CutVarFitLcFD_%d-%d.eps", binMin, binMax)); } -void SetStyle() { +void SetStyle() +{ cout << "Setting style!" << endl; gStyle->Reset("Plain"); @@ -242,7 +244,8 @@ void SetStyle() { gStyle->SetMarkerColor(kBlack); } -void SetStyleHisto(TH1D *h) { +void SetStyleHisto(TH1D* h) +{ h->SetLineColor(kBlack); h->SetLineWidth(2); @@ -261,7 +264,8 @@ void SetStyleHisto(TH1D *h) { h->GetXaxis()->SetNdivisions(510); } -void SetStyleHisto(TH1F *h) { +void SetStyleHisto(TH1F* h) +{ h->SetLineColor(kBlack); h->SetLineWidth(2); @@ -282,7 +286,8 @@ void SetStyleHisto(TH1F *h) { // h->GetXaxis()->SetNdivisions(510); } -void NormaliseHist1d(TH1 *h) { +void NormaliseHist1d(TH1* h) +{ if (h) { // dN/dpt for (Int_t i = 1; i <= h->GetNbinsX(); i++) { diff --git a/machine_learning_hep/scripts-dhadrons/preliminary-plots/README.md b/machine_learning_hep/scripts-dhadrons/preliminary-plots/README.md index 2e0d74a607..3696a7d539 100644 --- a/machine_learning_hep/scripts-dhadrons/preliminary-plots/README.md +++ b/machine_learning_hep/scripts-dhadrons/preliminary-plots/README.md @@ -6,9 +6,9 @@ File: `plot_invmass_fit_dzero_dplus_lambdac.py`
Usage: `python plot_invmass_fit_dzero_dplus_lambdac.py config_invmass_preliminary.yml` Example config in `config_invmass_preliminary.yml`. It was used to draw the plots: -- https://alice-figure.web.cern.ch/node/34090 -- https://alice-figure.web.cern.ch/node/34089 -- https://alice-figure.web.cern.ch/node/34088 +- +- +- The script is passed in different versions around the D2H people. Here, it contains my few improvements, e.g., configurable multiplicity label.
I also commented out lines related to non-prompt particles as we had results only for the prompt case. @@ -24,7 +24,7 @@ You still need to adjust the script in several places: File: `DrawCutVarFit.C`
Usage: `root -x DrawCutVarFit.C` in the ROOT / O2 shell -Used to draw the plot https://alice-figure.web.cern.ch/node/31345. +Used to draw the plot . Adjust the script: - set the `bdtScoreCuts_...` variables to your final BDT cuts diff --git a/machine_learning_hep/scripts-dhadrons/preliminary-plots/plot_invmass_fit_dzero_dplus_lambdac.py b/machine_learning_hep/scripts-dhadrons/preliminary-plots/plot_invmass_fit_dzero_dplus_lambdac.py index bfd2ae1b4b..e7438cec4d 100644 --- a/machine_learning_hep/scripts-dhadrons/preliminary-plots/plot_invmass_fit_dzero_dplus_lambdac.py +++ b/machine_learning_hep/scripts-dhadrons/preliminary-plots/plot_invmass_fit_dzero_dplus_lambdac.py @@ -21,10 +21,18 @@ import argparse import yaml -import ROOT -from ROOT import (TF1, TCanvas, TDatabasePDG, TFile, TLatex, TLegend, TMath, - gROOT, kAzure, kBlack, kBlue, kGreen, kFullCircle, kRed, TPad) - +from ROOT import ( + TCanvas, + TFile, + TLatex, + TLegend, + TPad, + kAzure, + kBlack, + kBlue, + kGreen, + kRed, +) from style_formatter import set_global_style, set_object_style # enumerator @@ -397,7 +405,7 @@ def main(particle, i_pt, cfg, batch): args = parser.parse_args() print("Loading analysis configuration: ...", end="\r") - with open(args.config, "r", encoding="utf-8") as yml_cfg: + with open(args.config, encoding="utf-8") as yml_cfg: configuration = yaml.load(yml_cfg, yaml.FullLoader) print("Loading analysis configuration: Done!") diff --git a/machine_learning_hep/scripts-dhadrons/run-mlhep/run-fdd-batch.sh b/machine_learning_hep/scripts-dhadrons/run-mlhep/run-fdd-batch.sh index f51ce6cb9e..cd5b6f24c1 100755 --- a/machine_learning_hep/scripts-dhadrons/run-mlhep/run-fdd-batch.sh +++ b/machine_learning_hep/scripts-dhadrons/run-mlhep/run-fdd-batch.sh @@ -3,7 +3,11 @@ # Run MLHEP in batch for various BDT cuts. # You need an MLHEP database with %resdir%, %bkg...%, and %fd% placeholders. -source "${HOME}/Run3Analysisvalidation/exec/utilities.sh" +# Throw error and exit. +function ErrExit { + MsgErr "Error: $*"; exit 1; +} + WORKDIR="${HOME}/MachineLearningHEP/machine_learning_hep/" # Base database. @@ -28,13 +32,13 @@ for fd in $(seq 0.000 0.005 0.000) ; do # Variable suffix to append to the output directory name. suffix="fd_${fd}" - RESPATH="${RESDIR}/${RESDIR_PATTERN}${suffix}" + RESPATH="${RESDIR_PATTERN}${suffix}" CUR_DB="${DATABASE}_edit_fd${fd}.yml" cp "${DATABASE_PATH}" "${CUR_DB}" || ErrExit "Could not copy database" # Adjust the output directory - sed -i "s/%resdir%/${RESDIR}/g" "${CUR_DB}" || ErrExit "Could not edit database" + sed -i "s/%resdir%/${RESPATH}/g" "${CUR_DB}" || ErrExit "Could not edit database" # Set bkg BDT cuts sed -i "s/%bkg01%/${bkg}/g" "${CUR_DB}" || ErrExit "Could not edit database" diff --git a/machine_learning_hep/scripts-dhadrons/run-mlhep/run-mlhep.sh b/machine_learning_hep/scripts-dhadrons/run-mlhep/run-mlhep.sh index f083791c62..3bdcef5c10 100755 --- a/machine_learning_hep/scripts-dhadrons/run-mlhep/run-mlhep.sh +++ b/machine_learning_hep/scripts-dhadrons/run-mlhep/run-mlhep.sh @@ -12,7 +12,7 @@ DB=$1 CONFIG=$2 LOGFILE=$3 -mlhep --log-file ${LOGFILE} \ +mlhep --log-file "${LOGFILE}" \ -a Run3analysis \ - --run-config ${CONFIG} \ - --database-analysis ${DB} + --run-config "${CONFIG}" \ + --database-analysis "${DB}" diff --git a/machine_learning_hep/scripts-dhadrons/systematics/compare_fractions.py b/machine_learning_hep/scripts-dhadrons/systematics/compare_fractions.py index 0b2560c972..bee0489198 100644 --- a/machine_learning_hep/scripts-dhadrons/systematics/compare_fractions.py +++ b/machine_learning_hep/scripts-dhadrons/systematics/compare_fractions.py @@ -13,15 +13,15 @@ from array import array from ROOT import ( # pylint: disable=import-error,no-name-in-module + TH1F, MakeNullPointer, TCanvas, TFile, TGraphAsymmErrors, - TH1F, TLegend, + TLine, TObject, TPaveText, - TLine, gROOT, gStyle, kAzure, @@ -35,7 +35,7 @@ kOrange, kRed, kTeal, - kYellow + kYellow, ) COLORS=[kBlack, kRed-3, kAzure-7, kGreen+2, kOrange-3, kBlue, kMagenta+2, @@ -215,7 +215,7 @@ def plot_models(cfg, canv): leg_models = get_legend(*cfg["legend_models"], len(cfg["models"])) leg_models.SetMargin(0.9) for ind, (label, color, style) in \ - enumerate(zip(cfg["models"], MODELS_COLORS, MODELS_STYLES)): + enumerate(zip(cfg["models"], MODELS_COLORS, MODELS_STYLES, strict=False)): hist = get_hist_model(label, color, style, cfg) print(f"hist model for {label}: {hist.GetName()}") miny, maxy = get_hist_limits(hist, None, miny, maxy) @@ -266,7 +266,7 @@ def plot_compare(cfg): hists = {} central_graph = None graphs_syst = [] - for ind, (label, color) in enumerate(zip(cfg["hists"], COLORS)): + for ind, (label, color) in enumerate(zip(cfg["hists"], COLORS, strict=False)): hist = get_hist_for_label(label, color, cfg) print(label) miny, maxy = get_hist_limits(hist, None, miny, maxy) @@ -384,7 +384,7 @@ def plot_ratio_histos(canvr, legr, hists, graphs, central_hist, histsr = [] graphsr = [] - for ind, (label, color, style) in enumerate(zip(hists, COLORS, styles)): + for ind, (label, color, style) in enumerate(zip(hists, COLORS, styles, strict=False)): print(f"central hist bins: {central_hist.GetNbinsX()} "\ f"{label} bins: {hists[label].GetNbinsX()}") if label != central_label and hists[label].GetNbinsX() == central_hist.GetNbinsX(): From c8e68c03def9f53e1105d0d349556fe75e68912d Mon Sep 17 00:00:00 2001 From: saganatt <8majak8@gmail.com> Date: Thu, 2 Oct 2025 08:05:20 +0200 Subject: [PATCH 30/34] Manual MegaLinter fixes in md, yml, C, JSON files --- .../scripts-dhadrons/merging/README.md | 4 ++- .../preliminary-plots/DrawCutVarFit.C | 13 ++++----- .../config_invmass_preliminary.yml | 27 ++++++++++--------- .../plot_invmass_fit_dzero_dplus_lambdac.py | 4 +-- .../scripts-dhadrons/run-mlhep/README.md | 2 +- .../run-mlhep/run-fdd-batch.sh | 7 ++--- .../systematics/config_compare_fractions.json | 2 ++ .../systematics/config_fitting.json | 2 ++ .../systematics/config_run2.json | 2 ++ .../systematics/config_run3.json | 4 +++ .../systematics/config_run3_run2.json | 2 ++ .../systematics/config_run3d0.json | 2 ++ .../systematics/config_track_tuner.json | 2 ++ 13 files changed, 45 insertions(+), 28 deletions(-) diff --git a/machine_learning_hep/scripts-dhadrons/merging/README.md b/machine_learning_hep/scripts-dhadrons/merging/README.md index c5da64c0ed..e7b4f55f96 100644 --- a/machine_learning_hep/scripts-dhadrons/merging/README.md +++ b/machine_learning_hep/scripts-dhadrons/merging/README.md @@ -9,7 +9,9 @@ You can provide as many histogram names as you want. All histograms should be 1- Provide one input file per x-axis bin. File names can be repeated. -Merge histograms `histName1` and `histName2` from the input files and save them in the output file. For each histogram name provided, e.g., `histName1`, "merging" means creation of a single output histogram with bin 1 content set to the content of bin 1 in `histName1` in `in_file1.root`, bin 2 content set to the content of bin 2 in `histName1` in `in_file2.root`, and so on. Particularly, the x-axis can represent pT, and the script can be used to merge results obtained for different pT bins. +Merge histograms `histName1` and `histName2` from the input files and save them in the output file. +For each histogram name provided, e.g., `histName1`, "merging" means creation of a single output histogram with bin 1 content set to the content of bin 1 in `histName1` in `in_file1.root`, bin 2 content set to the content of bin 2 in `histName1` in `in_file2.root`, and so on. +Particularly, the x-axis can represent pT, and the script can be used to merge results obtained for different pT bins. The bash files `merge-cutvar.sh` and `merge-yields.sh` provide examples of using this Python script for merging cut variation results and O2Physics D2H fitter results, respectively. diff --git a/machine_learning_hep/scripts-dhadrons/preliminary-plots/DrawCutVarFit.C b/machine_learning_hep/scripts-dhadrons/preliminary-plots/DrawCutVarFit.C index ec910c78df..f06f7771a6 100644 --- a/machine_learning_hep/scripts-dhadrons/preliminary-plots/DrawCutVarFit.C +++ b/machine_learning_hep/scripts-dhadrons/preliminary-plots/DrawCutVarFit.C @@ -8,6 +8,7 @@ #include "TPad.h" #include "TStyle.h" #include +#include using namespace std; @@ -71,13 +72,13 @@ void DrawCutVarFit(bool isPreliminary = kTRUE) new TFile("/data8/majak/systematics/230824/CutVarLc_pp13TeV_LHC24d3_default.root", "read"); hRawYieldsVsCutPt = - (TH1F*)CutVarFile->Get(Form("hRawYieldVsCut_pt%d_%d", binMin, binMax)); + std::reinterpret_castCutVarFile->Get(Form("hRawYieldVsCut_pt%d_%d", binMin, binMax)); hRawYieldPromptVsCut = - (TH1F*)CutVarFile->Get(Form("hRawYieldPromptVsCut_pt%d_%d", binMin, binMax)); + std::reinterpret_castCutVarFile->Get(Form("hRawYieldPromptVsCut_pt%d_%d", binMin, binMax)); hRawYieldFDVsCut = - (TH1F*)CutVarFile->Get(Form("hRawYieldNonPromptVsCut_pt%d_%d", binMin, binMax)); + std::reinterpret_castCutVarFile->Get(Form("hRawYieldNonPromptVsCut_pt%d_%d", binMin, binMax)); hRawYieldsVsCutReSum = - (TH1F*)CutVarFile->Get(Form("hRawYieldSumVsCut_pt%d_%d", binMin, binMax)); + std::reinterpret_castCutVarFile->Get(Form("hRawYieldSumVsCut_pt%d_%d", binMin, binMax)); SetStyleHisto(hRawYieldsVsCutPt); SetStyleHisto(hRawYieldPromptVsCut); @@ -293,8 +294,8 @@ void NormaliseHist1d(TH1* h) for (Int_t i = 1; i <= h->GetNbinsX(); i++) { h->SetBinContent(i, h->GetBinContent(i) / (h->GetXaxis()->GetBinWidth(i))); - // hnew->SetBinError(i,hnew->GetBinContent(i)/(hnew->GetBinWidth(i) - //* TMath::Sqrt(hnew->GetBinContent(i)))); // may need to look at again + // hnew->SetBinError(i,hnew->GetBinContent(i)/(hnew->GetBinWidth(i) + // * TMath::Sqrt(hnew->GetBinContent(i)))); // may need to look at again h->SetBinError(i, h->GetBinError(i) / (h->GetXaxis()->GetBinWidth(i))); } } else { diff --git a/machine_learning_hep/scripts-dhadrons/preliminary-plots/config_invmass_preliminary.yml b/machine_learning_hep/scripts-dhadrons/preliminary-plots/config_invmass_preliminary.yml index ebf2dddf8a..f4045e4d65 100644 --- a/machine_learning_hep/scripts-dhadrons/preliminary-plots/config_invmass_preliminary.yml +++ b/machine_learning_hep/scripts-dhadrons/preliminary-plots/config_invmass_preliminary.yml @@ -1,14 +1,15 @@ +--- _pp13.6TeVFD: - Particle: 'LAMBDAC_TO_PKPI' - _Particle: 'D0, DPLUS, LAMBDAC_TO_PKPI, LAMBDAC_TO_PK0S' - PtMin: [1., 4., 8.] - PtMax: [2., 5., 10.] - MassMin: [2.21, 2.19, 2.1] - _MassMin: 'min masses to display' - MassMax: [2.356, 2.38, 2.456] - _MassMin: 'max masses to display' - Rebin: [2, 2, 4] - Mult: [null, "01", "7085"] - _Mult: 'multiplicity label contained in the output file name' - MultLatex: ["Minimum Bias", " = 20.07", " = 4.34"] - _MultLatex: 'TLatex text describing multiplicity on the plot' + Particle: 'LAMBDAC_TO_PKPI' + _Particle: 'D0, DPLUS, LAMBDAC_TO_PKPI, LAMBDAC_TO_PK0S' + PtMin: [1., 4., 8.] + PtMax: [2., 5., 10.] + MassMin: [2.21, 2.19, 2.1] + _MassMin: 'min masses to display' + MassMax: [2.356, 2.38, 2.456] + _MassMax: 'max masses to display' + Rebin: [2, 2, 4] + Mult: [null, "01", "7085"] + _Mult: 'multiplicity label contained in the output file name' + MultLatex: ["Minimum Bias", " = 20.07", " = 4.34"] + _MultLatex: 'TLatex text describing multiplicity on the plot' diff --git a/machine_learning_hep/scripts-dhadrons/preliminary-plots/plot_invmass_fit_dzero_dplus_lambdac.py b/machine_learning_hep/scripts-dhadrons/preliminary-plots/plot_invmass_fit_dzero_dplus_lambdac.py index e7438cec4d..c5a6b33c0b 100644 --- a/machine_learning_hep/scripts-dhadrons/preliminary-plots/plot_invmass_fit_dzero_dplus_lambdac.py +++ b/machine_learning_hep/scripts-dhadrons/preliminary-plots/plot_invmass_fit_dzero_dplus_lambdac.py @@ -304,8 +304,8 @@ def main(particle, i_pt, cfg, batch): legend.SetTextSize(SIZE_TEXT_LEGEND) legend.AddEntry(fit_tot_promptEnhanced, 'Total fit function', 'l') legend.AddEntry(fit_bkg_promptEnhanced, '#splitline{Combinatorial}{background}', 'l') - if particle == D0: - legend.AddEntry(fit_refl_promptEnhanced, 'K#minus#pi reflected', 'l') + # if particle == D0: + # legend.AddEntry(fit_refl_promptEnhanced, 'K#minus#pi reflected', 'l') c = TCanvas("c", "", WIDTH, HEIGHT) # Create the first pad diff --git a/machine_learning_hep/scripts-dhadrons/run-mlhep/README.md b/machine_learning_hep/scripts-dhadrons/run-mlhep/README.md index 3105ee0d74..41ad64955d 100644 --- a/machine_learning_hep/scripts-dhadrons/run-mlhep/README.md +++ b/machine_learning_hep/scripts-dhadrons/run-mlhep/README.md @@ -6,7 +6,7 @@ File: `run-mlhep.sh`
Usage: `./run-mlhep.sh my_database.yml my_run_config.yml logfile.log` It calls: -``` +```bash mlhep --log-file logfile.log \ -a Run3analysis \ --run-config my_run_config.yml \ diff --git a/machine_learning_hep/scripts-dhadrons/run-mlhep/run-fdd-batch.sh b/machine_learning_hep/scripts-dhadrons/run-mlhep/run-fdd-batch.sh index cd5b6f24c1..0f6eb05da9 100755 --- a/machine_learning_hep/scripts-dhadrons/run-mlhep/run-fdd-batch.sh +++ b/machine_learning_hep/scripts-dhadrons/run-mlhep/run-fdd-batch.sh @@ -15,9 +15,6 @@ DATABASE="database_ml_parameters_LcToPKPi_multiclass_fdd" DATABASE_EXT="${DATABASE}.yml" DATABASE_PATH="${WORKDIR}/data/data_run3/${DATABASE_EXT}" -# Output base directory to store all output subdirectories. -RESDIR="/data8/majak/MLHEP" - # Prefix of the output directories names. #RESDIR_PATTERN="results-24022025-prompt" RESDIR_PATTERN="results-24022025-newtrain-ptshape-prompt" @@ -32,13 +29,13 @@ for fd in $(seq 0.000 0.005 0.000) ; do # Variable suffix to append to the output directory name. suffix="fd_${fd}" - RESPATH="${RESDIR_PATTERN}${suffix}" + RESDIR="${RESDIR_PATTERN}${suffix}" CUR_DB="${DATABASE}_edit_fd${fd}.yml" cp "${DATABASE_PATH}" "${CUR_DB}" || ErrExit "Could not copy database" # Adjust the output directory - sed -i "s/%resdir%/${RESPATH}/g" "${CUR_DB}" || ErrExit "Could not edit database" + sed -i "s/%resdir%/${RESDIR}/g" "${CUR_DB}" || ErrExit "Could not edit database" # Set bkg BDT cuts sed -i "s/%bkg01%/${bkg}/g" "${CUR_DB}" || ErrExit "Could not edit database" diff --git a/machine_learning_hep/scripts-dhadrons/systematics/config_compare_fractions.json b/machine_learning_hep/scripts-dhadrons/systematics/config_compare_fractions.json index e4c2d12371..42bd8c562c 100644 --- a/machine_learning_hep/scripts-dhadrons/systematics/config_compare_fractions.json +++ b/machine_learning_hep/scripts-dhadrons/systematics/config_compare_fractions.json @@ -78,6 +78,8 @@ "bin_min": [1,2,3,4,5,6,8,12], "bin_max": [2,3,4,5,6,8,12,24], "y_axis": "Non-prompt #Lambda_{c}^{#plus} fraction", + "legend": [0.50, 0.18, 0.90, 0.38], + "legend_ratio": [0.50, 0.70, 0.90, 0.90], "output": { "outdir": "/data8/majak/systematics/230824/fitting", "file": "NP_Frac_pp13TeV_fitting" diff --git a/machine_learning_hep/scripts-dhadrons/systematics/config_fitting.json b/machine_learning_hep/scripts-dhadrons/systematics/config_fitting.json index b2ddb200c5..2cbef9d998 100644 --- a/machine_learning_hep/scripts-dhadrons/systematics/config_fitting.json +++ b/machine_learning_hep/scripts-dhadrons/systematics/config_fitting.json @@ -105,6 +105,8 @@ "bin_min": [1,2,3,4,5,6,8,12,16], "bin_max": [2,3,4,5,6,8,12,16,24], "y_axis": "Non-prompt #Lambda_{c}^{#plus} fraction", + "legend": [0.50, 0.18, 0.90, 0.38], + "legend_ratio": [0.50, 0.70, 0.90, 0.90], "output": { "outdir": "/data8/majak/systematics/230824/fitting", "file": "NP_Frac_pp13TeV_fitting_1-24" diff --git a/machine_learning_hep/scripts-dhadrons/systematics/config_run2.json b/machine_learning_hep/scripts-dhadrons/systematics/config_run2.json index a3a0944879..43ac089d78 100644 --- a/machine_learning_hep/scripts-dhadrons/systematics/config_run2.json +++ b/machine_learning_hep/scripts-dhadrons/systematics/config_run2.json @@ -18,6 +18,8 @@ "bin_min": [1,2,3,4,5,6,8,12], "bin_max": [2,3,4,5,6,8,12,24], "y_axis": "Non-prompt #Lambda_{c} fraction", + "legend": [0.50, 0.18, 0.90, 0.38], + "legend_ratio": [0.50, 0.70, 0.90, 0.90], "output": { "outdir": "/data8/majak/systematics/220724/run2-run3d0", "file": "NP_Frac_pp13TeV_run2_only" diff --git a/machine_learning_hep/scripts-dhadrons/systematics/config_run3.json b/machine_learning_hep/scripts-dhadrons/systematics/config_run3.json index c53d281141..b695a82906 100644 --- a/machine_learning_hep/scripts-dhadrons/systematics/config_run3.json +++ b/machine_learning_hep/scripts-dhadrons/systematics/config_run3.json @@ -41,6 +41,10 @@ "bin_max": [2,3,4,5,6,8,12,16,24], "y_axis": "#it{f}_{non-prompt}", "alice_text": "#Lambda_{c}^{#plus} baryon, |#it{y}| < 0.5", + "legend": [0.50, 0.65, 0.90, 0.93], + "legend_models": [0.50, 0.65, 0.90, 0.93], + "legend_ratio": [0.60, 0.63, 0.90, 0.88], + "legend_ratio_models": [0.18, 0.18, 0.40, 0.23], "output": { "outdir": "/data8/majak/systematics/230824/run3", "file": "NP_Frac_pp13TeV_run3_1-24" diff --git a/machine_learning_hep/scripts-dhadrons/systematics/config_run3_run2.json b/machine_learning_hep/scripts-dhadrons/systematics/config_run3_run2.json index 5c946cd99b..d13a4c31b8 100644 --- a/machine_learning_hep/scripts-dhadrons/systematics/config_run3_run2.json +++ b/machine_learning_hep/scripts-dhadrons/systematics/config_run3_run2.json @@ -49,6 +49,8 @@ "bin_max": [2,3,4,5,6,8,12,16,24], "y_axis": "#it{f}_{non-prompt}", "alice_text": "#Lambda_{c}^{#plus} and charge conj., |#it{y}| < 0.5", + "legend": [0.50, 0.65, 0.90, 0.93], + "legend_ratio": [0.60, 0.63, 0.90, 0.88], "output": { "outdir": "/data8/majak/systematics/230824/run3-run2", "file": "NP_Frac_pp13TeV_run3_run2_1-24" diff --git a/machine_learning_hep/scripts-dhadrons/systematics/config_run3d0.json b/machine_learning_hep/scripts-dhadrons/systematics/config_run3d0.json index f5c0a666c9..5f96cc77e4 100644 --- a/machine_learning_hep/scripts-dhadrons/systematics/config_run3d0.json +++ b/machine_learning_hep/scripts-dhadrons/systematics/config_run3d0.json @@ -57,6 +57,8 @@ "bin_max": [2,3,4,5,6,8,12,16,24], "y_axis": "#it{f}_{non-prompt}", "alice_text": "|#it{y}| < 0.5", + "legend": [0.50, 0.18, 0.90, 0.38], + "legend_ratio": [0.50, 0.70, 0.90, 0.90], "output": { "outdir": "/data8/majak/systematics/230824/run3d0", "file": "NP_Frac_pp13TeV_run3d0_1-24" diff --git a/machine_learning_hep/scripts-dhadrons/systematics/config_track_tuner.json b/machine_learning_hep/scripts-dhadrons/systematics/config_track_tuner.json index ef9e65e39e..c1f63fa343 100644 --- a/machine_learning_hep/scripts-dhadrons/systematics/config_track_tuner.json +++ b/machine_learning_hep/scripts-dhadrons/systematics/config_track_tuner.json @@ -49,6 +49,8 @@ "bin_min": [1,2,3,4,5,6,8,12,16], "bin_max": [2,3,4,5,6,8,12,16,24], "y_axis": "Non-prompt #Lambda_{c}^{#plus} fraction", + "legend": [0.50, 0.18, 0.90, 0.38], + "legend_ratio": [0.50, 0.70, 0.90, 0.90], "output": { "outdir": "/data8/majak/systematics/230824/track-tuner", "file": "NP_Frac_pp13TeV_track_tuner_1-24" From 9a84e1af86174700ffe76ac8e30a0884bf1ffae5 Mon Sep 17 00:00:00 2001 From: saganatt <8majak8@gmail.com> Date: Thu, 2 Oct 2025 09:26:07 +0200 Subject: [PATCH 31/34] Manual fixes of py and C files --- .../adjusting-run2-run3/add_pt_bins.py | 36 ++-- .../modify_crosssec_run2.py | 15 +- .../adjusting-run2-run3/remove_high_pt.py | 11 +- ...lot_prompt_fraction_vs_crosssec_configs.py | 2 +- .../merging/merge_histomass.py | 1 - .../scripts-dhadrons/merging/merge_histos.py | 8 +- .../scripts-dhadrons/multitrial/multitrial.py | 12 +- .../preliminary-plots/DrawCutVarFit.C | 8 +- .../plot_invmass_fit_dzero_dplus_lambdac.py | 162 +++++++++--------- .../systematics/compare_fractions.py | 10 +- 10 files changed, 126 insertions(+), 139 deletions(-) diff --git a/machine_learning_hep/scripts-dhadrons/adjusting-run2-run3/add_pt_bins.py b/machine_learning_hep/scripts-dhadrons/adjusting-run2-run3/add_pt_bins.py index fc8ff547cc..dbe5c0214a 100644 --- a/machine_learning_hep/scripts-dhadrons/adjusting-run2-run3/add_pt_bins.py +++ b/machine_learning_hep/scripts-dhadrons/adjusting-run2-run3/add_pt_bins.py @@ -28,14 +28,14 @@ def main(): hist = fin.Get(args.histname) hist.SetDirectory(0) first_bin = 1 - #last_bin = hist.GetXaxis().FindBin(12.0) + # last_bin = hist.GetXaxis().FindBin(12.0) last_bin = hist.GetNbinsX() bins = [0.0] - #bins = [] + # bins = [] empty_bins = len(bins) for binn in range(first_bin, last_bin + 1): bins.append(hist.GetBinLowEdge(binn)) - #last_bins = [24.0, 25.0] + # last_bins = [24.0, 25.0] last_bins = [24.0] bins += last_bins print(f"Hist bins {bins}") @@ -43,20 +43,24 @@ def main(): for binn in range(empty_bins, last_bin + 1): hist2.SetBinContent(binn + 1, hist.GetBinContent(binn + 1 - empty_bins)) hist2.SetBinError(binn + 1, hist.GetBinError(binn + 1 - empty_bins)) - print(f"Setting bin {binn + 1} low edge {hist2.GetBinLowEdge(binn + 1)} up edge {hist2.GetXaxis().GetBinUpEdge(binn + 1)} content to content from bin {binn + 1 - empty_bins}: {hist2.GetBinContent(binn + 1)}") + print(f"Setting bin {binn + 1} low edge {hist2.GetBinLowEdge(binn + 1)} " \ + f"up edge {hist2.GetXaxis().GetBinUpEdge(binn + 1)} content to content " \ + f"from bin {binn + 1 - empty_bins}: {hist2.GetBinContent(binn + 1)}") # Formula for merging 2 bins. For example, to compare with less granular Run 2 results. - #last_bin = hist2.GetNbinsX() - #width_combined = hist.GetBinWidth(hist.GetNbinsX() -1) + hist.GetBinWidth(hist.GetNbinsX()) - #hist2.SetBinContent(last_bin, - # ((hist.GetBinContent(hist.GetNbinsX() - 1) * hist.GetBinWidth(hist.GetNbinsX() - 1) +\ - # hist.GetBinContent(hist.GetNbinsX()) * hist.GetBinWidth(hist.GetNbinsX())) /\ - # width_combined)) - #hist2.SetBinError(last_bin, - # math.sqrt((hist.GetBinError(hist.GetNbinsX() - 1) * hist.GetBinWidth(hist.GetNbinsX() - 1) /\ - # width_combined) **2 +\ - # (hist.GetBinError(hist.GetNbinsX()) * hist.GetBinWidth(hist.GetNbinsX()) /\ - # width_combined) ** 2)) - #print(f"Setting bin {last_bin} low edge {hist2.GetBinLowEdge(last_bin)} up edge {hist2.GetXaxis().GetBinUpEdge(last_bin)} content to content from bins {hist.GetNbinsX()-1}, {hist.GetNbinsX()}: {hist2.GetBinContent(last_bin)}") + # last_bin = hist2.GetNbinsX() + # width_combined = hist.GetBinWidth(hist.GetNbinsX() -1) + hist.GetBinWidth(hist.GetNbinsX()) + # hist2.SetBinContent(last_bin, + # ((hist.GetBinContent(hist.GetNbinsX() - 1) * hist.GetBinWidth(hist.GetNbinsX() - 1) +\ + # hist.GetBinContent(hist.GetNbinsX()) * hist.GetBinWidth(hist.GetNbinsX())) /\ + # width_combined)) + # hist2.SetBinError(last_bin, + # math.sqrt((hist.GetBinError(hist.GetNbinsX() - 1) * hist.GetBinWidth(hist.GetNbinsX() - 1) /\ + # width_combined) **2 +\ + # (hist.GetBinError(hist.GetNbinsX()) * hist.GetBinWidth(hist.GetNbinsX()) /\ + # width_combined) ** 2)) + # print(f"Setting bin {last_bin} low edge {hist2.GetBinLowEdge(last_bin)} " \ + # f"up edge {hist2.GetXaxis().GetBinUpEdge(last_bin)} content to content " \ + # f"from bins {hist.GetNbinsX()-1}, {hist.GetNbinsX()}: {hist2.GetBinContent(last_bin)}") hist2.SetMarkerSize(hist.GetMarkerSize()) hist2.SetMarkerColor(hist.GetMarkerColor()) hist2.SetMarkerStyle(hist.GetMarkerStyle()) diff --git a/machine_learning_hep/scripts-dhadrons/adjusting-run2-run3/modify_crosssec_run2.py b/machine_learning_hep/scripts-dhadrons/adjusting-run2-run3/modify_crosssec_run2.py index a5af444eee..b4aa8b5d19 100644 --- a/machine_learning_hep/scripts-dhadrons/adjusting-run2-run3/modify_crosssec_run2.py +++ b/machine_learning_hep/scripts-dhadrons/adjusting-run2-run3/modify_crosssec_run2.py @@ -52,23 +52,12 @@ def main(): bin1 = merge_bins[ind] bin2 = merge_bins[ind] + 1 weight_sum = hist.GetBinWidth(bin1) + hist.GetBinWidth(bin2) - average = hist.GetBinContent(bin1) * hist.GetBinWidth(bin1) + hist.GetBinContent(bin2) * hist.GetBinWidth(bin2) - print(f"bin {bin1} width {hist.GetBinWidth(bin1)} bin2 {bin2} width {hist.GetBinWidth(bin2)}") - print(f"weight sum: {weight_sum} average: {hist.GetBinContent(bin1) * hist.GetBinWidth(bin1)} + " - f"{hist.GetBinContent(bin2) + hist.GetBinWidth(bin2)} average: {average}") + average = hist.GetBinContent(bin1) * hist.GetBinWidth(bin1) +\ + hist.GetBinContent(bin2) * hist.GetBinWidth(bin2) hist2.SetBinContent(binn, (hist.GetBinContent(bin1) * hist.GetBinWidth(bin1) +\ hist.GetBinContent(bin2) * hist.GetBinWidth(bin2)) /\ weight_sum) - print(f"bin {bin1} error {hist.GetBinError(bin1)} bin2 {hist.GetBinError(bin2)}\n"\ - f"scaled: {hist.GetBinWidth(bin1) * hist.GetBinError(bin1)}, "\ - f"{hist.GetBinWidth(bin2) * hist.GetBinError(bin2)}\n"\ - f"divided: {(hist.GetBinWidth(bin1) * hist.GetBinError(bin1)) / weight_sum}, "\ - f"{(hist.GetBinWidth(bin2) * hist.GetBinError(bin2)) / weight_sum}\n"\ - f"power: {((hist.GetBinWidth(bin1) * hist.GetBinError(bin1)) / weight_sum)**2.}, "\ - f"{((hist.GetBinWidth(bin2) * hist.GetBinError(bin2)) / weight_sum)**2.}\n"\ - f"sum: {((hist.GetBinWidth(bin1) * hist.GetBinError(bin1)) / weight_sum)**2. + ((hist.GetBinWidth(bin2) * hist.GetBinError(bin2)) / weight_sum)**2.}\n"\ - f"sqrt: {math.sqrt(((hist.GetBinWidth(bin1) * hist.GetBinError(bin1)) / weight_sum)**2. + ((hist.GetBinWidth(bin2) * hist.GetBinError(bin2)) / weight_sum)**2.)}\n") hist2.SetBinError(binn, math.sqrt(((hist.GetBinWidth(bin1) * hist.GetBinError(bin1)) / weight_sum) ** 2. +\ ((hist.GetBinWidth(bin2) * hist.GetBinError(bin2)) / weight_sum) ** 2.)) ind += 1 diff --git a/machine_learning_hep/scripts-dhadrons/adjusting-run2-run3/remove_high_pt.py b/machine_learning_hep/scripts-dhadrons/adjusting-run2-run3/remove_high_pt.py index 507020b505..e85a2bd7c5 100644 --- a/machine_learning_hep/scripts-dhadrons/adjusting-run2-run3/remove_high_pt.py +++ b/machine_learning_hep/scripts-dhadrons/adjusting-run2-run3/remove_high_pt.py @@ -27,9 +27,7 @@ def main(): with TFile(args.filename) as fin, TFile(args.outname, "recreate") as fout: objnames = fin.GetListOfKeys() - print(f"objnames : {objnames}") histnames = [key.GetName() for key in fin.GetListOfKeys() if args.histname in key.GetName()] - print(f"histnames: {histnames}") for histname in histnames: hist = fin.Get(histname) hist.SetDirectory(0) @@ -37,12 +35,13 @@ def main(): bins = [] for binn in range(1, last_bin + 1): bins.append(hist.GetBinLowEdge(binn)) - print(f"Hist bins {bins}") hist2 = TH1F(histname, "", len(bins) - 1, array('d', bins)) for binn in range(1, last_bin + 1): - hist2.SetBinContent(binn + 1, hist.GetBinContent(binn + 1)) - hist2.SetBinError(binn + 1, hist.GetBinError(binn + 1)) - #print(f"Setting bin {binn + 1} low edge {hist2.GetBinLowEdge(binn + 1)} up edge {hist2.GetXaxis().GetBinUpEdge(binn + 1)} content to content from bin {binn + 1}: {hist2.GetBinContent(binn + 1)}") + hist2.SetBinContent(binn, hist.GetBinContent(binn)) + hist2.SetBinError(binn, hist.GetBinError(binn)) + print(f"Setting bin {binn} low edge {hist2.GetBinLowEdge(binn)} " \ + f"up edge {hist2.GetXaxis().GetBinUpEdge(binn)} content to content " \ + f"from bin {binn}: {hist2.GetBinContent(binn)}") hist2.SetMarkerSize(hist.GetMarkerSize()) hist2.SetMarkerColor(hist.GetMarkerColor()) hist2.SetMarkerStyle(hist.GetMarkerStyle()) diff --git a/machine_learning_hep/scripts-dhadrons/debugging/plot_prompt_fraction_vs_crosssec_configs.py b/machine_learning_hep/scripts-dhadrons/debugging/plot_prompt_fraction_vs_crosssec_configs.py index adaf064924..08a810b6e1 100644 --- a/machine_learning_hep/scripts-dhadrons/debugging/plot_prompt_fraction_vs_crosssec_configs.py +++ b/machine_learning_hep/scripts-dhadrons/debugging/plot_prompt_fraction_vs_crosssec_configs.py @@ -79,7 +79,7 @@ def main(): hists.append(hist) - margin = 0.1 + # margin = 0.1 print(f"Hist maxy: {maxy} miny: {miny}") for hist in hists: #hist.GetYaxis().SetRangeUser(miny - margin * miny, maxy + margin * maxy) diff --git a/machine_learning_hep/scripts-dhadrons/merging/merge_histomass.py b/machine_learning_hep/scripts-dhadrons/merging/merge_histomass.py index 5e6f31fcfe..eb172d1039 100644 --- a/machine_learning_hep/scripts-dhadrons/merging/merge_histomass.py +++ b/machine_learning_hep/scripts-dhadrons/merging/merge_histomass.py @@ -29,7 +29,6 @@ def main(): with TFile(args.outfile[0], "RECREATE") as fout: for name in args.histname: - hist_list = [] for ind, filename in enumerate(args.infile): fin = TFile(filename) list_hists = [key.GetName() for key in fin.GetListOfKeys() \ diff --git a/machine_learning_hep/scripts-dhadrons/merging/merge_histos.py b/machine_learning_hep/scripts-dhadrons/merging/merge_histos.py index c2812f210b..d32b39e66a 100644 --- a/machine_learning_hep/scripts-dhadrons/merging/merge_histos.py +++ b/machine_learning_hep/scripts-dhadrons/merging/merge_histos.py @@ -24,8 +24,6 @@ def main(): if len(args.outfile) != 1: raise ValueError("Provide exactly 1 output file") - print(f"filenames {args.infile}") - with TFile(args.outfile[0], "RECREATE") as fout: fins = [TFile(filename) for filename in args.infile] @@ -33,19 +31,17 @@ def main(): if args.histname is None: histname = [key.GetName() for key in fins[0].GetListOfKeys()] - print(f"histnames {histname}") - def get_hist(fin, histname): fin.cd() return fin.Get(histname) for name in histname: hist_list = [get_hist(fin, name) for fin in fins] - print(f"{name} hist list length: {len(hist_list)}") if any(cls in hist_list[0].ClassName() for cls in ("TH1", "TGraph")): hist = hist_list[-1].Clone() for ind, hist_tmp in enumerate(hist_list): - print(f"hist {name} bin {ind+1} pt [{hist.GetBinLowEdge(ind + 1)}, {hist.GetBinLowEdge(ind + 2)}) " \ + print(f"hist {name} bin {ind+1} pt [{hist.GetBinLowEdge(ind + 1)}, " \ + f"{hist.GetBinLowEdge(ind + 2)}) " \ f"content {hist_tmp.GetBinContent(ind + 1)}") hist.SetBinContent(ind+1, hist_tmp.GetBinContent(ind+1)) hist.SetBinError(ind+1, hist_tmp.GetBinError(ind+1)) diff --git a/machine_learning_hep/scripts-dhadrons/multitrial/multitrial.py b/machine_learning_hep/scripts-dhadrons/multitrial/multitrial.py index 141e6dfc97..59bd46a75c 100644 --- a/machine_learning_hep/scripts-dhadrons/multitrial/multitrial.py +++ b/machine_learning_hep/scripts-dhadrons/multitrial/multitrial.py @@ -109,8 +109,8 @@ def plot_yields_trials(yields, yields_err, trials, cfg, pt_string, plot_pt_strin facecolor="orange", edgecolor="none", alpha=0.3) plot_trial_line(ax, central_trial_ind) plot_text_box(ax, plot_pt_string) - fig.savefig(f'{cfg["outdir"]}/{cfg["outfile"]}_yields_trials_{pt_string}.png', - bbox_inches='tight') + fig.savefig(f"{cfg["outdir"]}/{cfg["outfile"]}_yields_trials_{pt_string}.png", + bbox_inches="tight") plt.close() @@ -120,8 +120,8 @@ def plot_chis(chis, cfg, pt_string, plot_pt_string): ax.scatter(x_axis, chis, c="b", marker="o") set_ax_limits(ax, pt_string, chis) plot_text_box(ax, plot_pt_string) - fig.savefig(f'{cfg["outdir"]}/{cfg["outfile"]}_chis_{pt_string}.png', - bbox_inches='tight') + fig.savefig(f"{cfg["outdir"]}/{cfg["outfile"]}_chis_{pt_string}.png", + bbox_inches="tight") plt.close() @@ -145,7 +145,7 @@ def plot_yields_distr(yields, cfg, pt_string, plot_pt_string, central_trial_ind, f"std dev: {std_dev:.2f}\n"\ f"RMSE: {rmse:.2f}\n"\ f"#trials: {len(yields)}") - plt.savefig(f'{cfg["outdir"]}/{cfg["outfile"]}_distr_{pt_string}.png', bbox_inches='tight') + plt.savefig(f"{cfg["outdir"]}/{cfg["outfile"]}_distr_{pt_string}.png", bbox_inches="tight") plt.close() @@ -177,7 +177,7 @@ def main(): except: # pylint: disable=bare-except pass - with open(f'{cfg["outdir"]}/{cfg["outfile"]}_trials_{pt_string}.txt', + with open(f"{cfg["outdir"]}/{cfg["outfile"]}_trials_{pt_string}.txt", "w", encoding="utf-8") as ftext: for trial in trials[pt_string]: ftext.write(f"{trial}\n") diff --git a/machine_learning_hep/scripts-dhadrons/preliminary-plots/DrawCutVarFit.C b/machine_learning_hep/scripts-dhadrons/preliminary-plots/DrawCutVarFit.C index f06f7771a6..509f8caf1c 100644 --- a/machine_learning_hep/scripts-dhadrons/preliminary-plots/DrawCutVarFit.C +++ b/machine_learning_hep/scripts-dhadrons/preliminary-plots/DrawCutVarFit.C @@ -72,13 +72,13 @@ void DrawCutVarFit(bool isPreliminary = kTRUE) new TFile("/data8/majak/systematics/230824/CutVarLc_pp13TeV_LHC24d3_default.root", "read"); hRawYieldsVsCutPt = - std::reinterpret_castCutVarFile->Get(Form("hRawYieldVsCut_pt%d_%d", binMin, binMax)); + std::reinterpret_cast CutVarFile->Get(Form("hRawYieldVsCut_pt%d_%d", binMin, binMax)); hRawYieldPromptVsCut = - std::reinterpret_castCutVarFile->Get(Form("hRawYieldPromptVsCut_pt%d_%d", binMin, binMax)); + std::reinterpret_cast CutVarFile->Get(Form("hRawYieldPromptVsCut_pt%d_%d", binMin, binMax)); hRawYieldFDVsCut = - std::reinterpret_castCutVarFile->Get(Form("hRawYieldNonPromptVsCut_pt%d_%d", binMin, binMax)); + std::reinterpret_cast CutVarFile->Get(Form("hRawYieldNonPromptVsCut_pt%d_%d", binMin, binMax)); hRawYieldsVsCutReSum = - std::reinterpret_castCutVarFile->Get(Form("hRawYieldSumVsCut_pt%d_%d", binMin, binMax)); + std::reinterpret_cast CutVarFile->Get(Form("hRawYieldSumVsCut_pt%d_%d", binMin, binMax)); SetStyleHisto(hRawYieldsVsCutPt); SetStyleHisto(hRawYieldPromptVsCut); diff --git a/machine_learning_hep/scripts-dhadrons/preliminary-plots/plot_invmass_fit_dzero_dplus_lambdac.py b/machine_learning_hep/scripts-dhadrons/preliminary-plots/plot_invmass_fit_dzero_dplus_lambdac.py index c5a6b33c0b..519c72b7d7 100644 --- a/machine_learning_hep/scripts-dhadrons/preliminary-plots/plot_invmass_fit_dzero_dplus_lambdac.py +++ b/machine_learning_hep/scripts-dhadrons/preliminary-plots/plot_invmass_fit_dzero_dplus_lambdac.py @@ -71,19 +71,18 @@ def get_name_infile(particle, suffix): - name_infile (string): name of the input file """ - name_infile = "" name_infile_promptEnhanced = "" name_infile_FDEnhanced = "" if particle == D0: name_infile_promptEnhanced = "../RawYieldResult/CentralValue/RawYieldsData_D0_pPb5TeV_FD_pos00.root" name_infile_FDEnhanced = "../RawYieldResult/CentralValue/RawYieldsData_D0_pPb5TeV_FD_pos13.root" elif particle == DPLUS: - name_infile = "../Results/Dplus/rawYield_Dplus_nonprompt_enhanced.root" + pass elif particle == LAMBDAC_TO_PKPI: name_infile_promptEnhanced = f"/data8/majak/invmass-plots/massesmasshisto{suffix}.root" name_infile_FDEnhanced = "fits_non_prompt.root" elif particle == LAMBDAC_TO_PK0S: - name_infile = "" + pass return name_infile_promptEnhanced, name_infile_FDEnhanced @@ -156,8 +155,8 @@ def draw_info(lat_label, particle): fnonprompt = "" if particle == D0: info = "D^{0} #rightarrow K^{#font[122]{-}}#pi^{+} and charge conj." - # fnonprompt = "#it{f}_{ non-prompt}^{ raw} = 0.750 #pm 0.016 (stat.) #pm 0.008 (syst.)" - #fnonprompt = "#it{f}_{ non-prompt}^{ raw} = 0.531" + fnonprompt = "#it{f}_{ non-prompt}^{ raw} = 0.750 #pm 0.016 (stat.) #pm 0.008 (syst.)" + # fnonprompt = "#it{f}_{ non-prompt}^{ raw} = 0.531" elif particle == DPLUS: info = "D^{+} #rightarrow #pi^{+}K^{#font[122]{-}}#pi^{+} and charge conj." fnonprompt = "#it{f}_{ non-prompt}^{ raw} = 0.787 #pm 0.022 (stat.) #pm 0.016 (syst.)" @@ -169,7 +168,7 @@ def draw_info(lat_label, particle): fnonprompt = "#it{f}_{ non-prompt}^{ raw} = 0.549 #pm 0.138 (stat.) #pm 0.055 (syst.)" lat_label.DrawLatex(0.19, 0.85, info) - #lat_label.DrawLatex(0.19, 0.16, fnonprompt) + lat_label.DrawLatex(0.19, 0.16, fnonprompt) def save_canvas(canvas, particle, pt_mins, pt_maxs, i_pt, mult): @@ -209,7 +208,8 @@ def main(particle, i_pt, cfg, batch): - i_pt (int): pT bin number """ - set_global_style(padtopmargin=0.07, padleftmargin=0.14, padbottommargin=0.125, titleoffsety=1.3, titleoffsetx=1., maxdigits=3) + set_global_style(padtopmargin=0.07, padleftmargin=0.14, padbottommargin=0.125, + titleoffsety=1.3, titleoffsetx=1., maxdigits=3) # import configurables pt_mins = cfg["pp13.6TeVFD"]["PtMin"] @@ -222,27 +222,28 @@ def main(particle, i_pt, cfg, batch): print(f"Plotting for {pt_mins[i_pt]}-{pt_maxs[i_pt]}") - name_infile_promptEnhanced, name_infile_FDEnhanced = get_name_infile(particle, f"{pt_mins[i_pt]:.0f}{pt_maxs[i_pt]:.0f}") + name_infile_promptEnhanced, name_infile_FDEnhanced = \ + get_name_infile(particle, f"{pt_mins[i_pt]:.0f}{pt_maxs[i_pt]:.0f}") file_promptEnhanced = TFile.Open(name_infile_promptEnhanced) - #file_FDEnhanced = TFile.Open(name_infile_FDEnhanced) + # file_FDEnhanced = TFile.Open(name_infile_FDEnhanced) hmean_promptEnhanced = file_promptEnhanced.Get("hist_means_lc") hsigma_promptEnhanced = file_promptEnhanced.Get("hist_sigmas_lc") - #hmean_FDEnhanced = file_FDEnhanced.Get("hRawYieldsMean") - #hsigma_FDEnhanced = file_FDEnhanced.Get("hRawYieldsSigma") + # hmean_FDEnhanced = file_FDEnhanced.Get("hRawYieldsMean") + # hsigma_FDEnhanced = file_FDEnhanced.Get("hRawYieldsSigma") hsignal_promptEnhanced = file_promptEnhanced.Get("hist_rawyields_lc") - #hsignal_FDEnhanced = file_FDEnhanced.Get("hRawYields") + # hsignal_FDEnhanced = file_FDEnhanced.Get("hRawYields") mult_suffix = f"_{mult[i_pt]}" if mult[i_pt] else "" name_hmass = f"hmass{pt_mins[i_pt]:.0f}{pt_maxs[i_pt]:.0f}{mult_suffix}" print(f"file {name_infile_promptEnhanced} hist {name_hmass}") hmass_promptEnhanced = file_promptEnhanced.Get(name_hmass) - #hmass_FDEnhanced = file_FDEnhanced.Get(name_hmass) + # hmass_FDEnhanced = file_FDEnhanced.Get(name_hmass) hmass_promptEnhanced.Rebin(rebin[i_pt]) - #hmass_FDEnhanced.Rebin(rebin[i_pt]) + # hmass_FDEnhanced.Rebin(rebin[i_pt]) title_xaxis = get_title_xaxis(particle) width_bin = hmass_promptEnhanced.GetBinWidth(i_pt+1) @@ -251,32 +252,32 @@ def main(particle, i_pt, cfg, batch): ymax_promptEnhanced = 1.2*(hmass_promptEnhanced.GetMaximum() + hmass_promptEnhanced.GetBinError(bin_max)) ymin_promptEnhanced = 0.8*(hmass_promptEnhanced.GetMinimum() - hmass_promptEnhanced.GetBinError(bin_min)) - #ymin_FDEnhanced, ymax_FDEnhanced = 0., 1.2*(hmass_FDEnhanced.GetMaximum() + hmass_FDEnhanced.GetBinError(bin_max)) + # ymin_FDEnhanced, ymax_FDEnhanced = 0., 1.2*(hmass_FDEnhanced.GetMaximum() + hmass_FDEnhanced.GetBinError(bin_max)) title = f"{pt_mins[i_pt]:.0f} < #it{{p}}_{{T}} < {pt_maxs[i_pt]:.0f} GeV/#it{{c}};{title_xaxis};" \ f"Counts per {width_bin*GEV2MEV:.0f} MeV/#it{{c}}^{{2}}" - #fit_tot_promptEnhanced = file_promptEnhanced.Get(f"totalTF_{pt_mins[i_pt]:.0f}_{pt_maxs[i_pt]:.0f}") + # fit_tot_promptEnhanced = file_promptEnhanced.Get(f"totalTF_{pt_mins[i_pt]:.0f}_{pt_maxs[i_pt]:.0f}") fit_tot_promptEnhanced = file_promptEnhanced.Get(f"total_func_lc_pt{pt_mins[i_pt]:.0f}_{pt_maxs[i_pt]:.0f}") - #fit_bkg_promptEnhanced = file_promptEnhanced.Get(f"bkgTF_{pt_mins[i_pt]:.0f}_{pt_maxs[i_pt]:.0f}") + # fit_bkg_promptEnhanced = file_promptEnhanced.Get(f"bkgTF_{pt_mins[i_pt]:.0f}_{pt_maxs[i_pt]:.0f}") fit_bkg_promptEnhanced = file_promptEnhanced.Get(f"bkg_0_lc_pt{pt_mins[i_pt]:.0f}_{pt_maxs[i_pt]:.0f}") #fit_refl_promptEnhanced = file_promptEnhanced.Get(f"freflect;13") - #fit_tot_FDEnhanced = file_FDEnhanced.Get(f"totalTF_{pt_mins[i_pt]:.0f}.0_{pt_maxs[i_pt]:.0f}.0") - #fit_bkg_FDEnhanced = file_FDEnhanced.Get(f"bkgTF_{pt_mins[i_pt]:.0f}.0_{pt_maxs[i_pt]:.0f}.0") - #fit_refl_FDEnhanced = file_FDEnhanced.Get(f"freflect;13") + # fit_tot_FDEnhanced = file_FDEnhanced.Get(f"totalTF_{pt_mins[i_pt]:.0f}.0_{pt_maxs[i_pt]:.0f}.0") + # fit_bkg_FDEnhanced = file_FDEnhanced.Get(f"bkgTF_{pt_mins[i_pt]:.0f}.0_{pt_maxs[i_pt]:.0f}.0") + # fit_refl_FDEnhanced = file_FDEnhanced.Get(f"freflect;13") print("Calculating mean") mean_promptEnhanced, err_mean_promptEnhanced = get_h_value_err(hmean_promptEnhanced, i_pt + 1, True) - #mean_FDEnhanced, err_mean_FDEnhanced = get_h_value_err(hmean_FDEnhanced, 13, True) + # mean_FDEnhanced, err_mean_FDEnhanced = get_h_value_err(hmean_FDEnhanced, 13, True) print("Calculating sigma") sigma_promptEnhanced, _ = get_h_value_err(hsigma_promptEnhanced, i_pt + 1, True) - #sigma_FDEnhanced, _ = get_h_value_err(hsigma_FDEnhanced, 13, True) + # sigma_FDEnhanced, _ = get_h_value_err(hsigma_FDEnhanced, 13, True) print("Calculating yield") signal_promptEnhanced, err_signal_promptEnhanced = get_h_value_err(hsignal_promptEnhanced, i_pt + 1) - #signal_FDEnhanced, err_signal_FDEnhanced = get_h_value_err(hsignal_FDEnhanced, 13) + # signal_FDEnhanced, err_signal_FDEnhanced = get_h_value_err(hsignal_FDEnhanced, 13) lat_alice = TLatex() lat_alice.SetNDC() @@ -291,21 +292,18 @@ def main(particle, i_pt, cfg, batch): # str_mu = f"#it{{#mu}} = ({mean:.0f} #pm {err_mean:.0f}) MeV/#it{{c}}^{{2}}" # str_sigma = f"#it{{#sigma}} = {sigma:.0f} MeV/#it{{c}}^{{2}}" - str_sig_promptEnhanced = f'#it{{S}} = {signal_promptEnhanced:.0f} #pm {err_signal_promptEnhanced:.0f}' - #str_sig_FDEnhanced = f'#it{{S}} = {signal_FDEnhanced:.0f} #pm {err_signal_FDEnhanced:.0f}' + # str_sig_promptEnhanced = f'#it{{S}} = {signal_promptEnhanced:.0f} #pm {err_signal_promptEnhanced:.0f}' + # str_sig_FDEnhanced = f"#it{{S}} = {signal_FDEnhanced:.0f} #pm {err_signal_FDEnhanced:.0f}" - if particle == D0: - legend = TLegend(0.6, 0.54, 0.87, 0.75) - else: - legend = TLegend(0.62, 0.58, 0.85, 0.72) + legend = TLegend(0.6, 0.54, 0.87, 0.75) if particle == D0 else TLegend(0.62, 0.58, 0.85, 0.72) legend.SetBorderSize(0) legend.SetFillStyle(0) legend.SetTextFont(43) legend.SetTextSize(SIZE_TEXT_LEGEND) - legend.AddEntry(fit_tot_promptEnhanced, 'Total fit function', 'l') - legend.AddEntry(fit_bkg_promptEnhanced, '#splitline{Combinatorial}{background}', 'l') + legend.AddEntry(fit_tot_promptEnhanced, "Total fit function", "l") + legend.AddEntry(fit_bkg_promptEnhanced, "#splitline{Combinatorial}{background}", "l") # if particle == D0: - # legend.AddEntry(fit_refl_promptEnhanced, 'K#minus#pi reflected', 'l') + # legend.AddEntry(fit_refl_promptEnhanced, "K#minus#pi reflected", "l") c = TCanvas("c", "", WIDTH, HEIGHT) # Create the first pad @@ -314,82 +312,84 @@ def main(particle, i_pt, cfg, batch): raise RuntimeError("Failed to create pad1") pad1.Draw() pad1.cd() # Switch to pad1 - frame_promptEnhanced = pad1.DrawFrame(mass_mins[i_pt], ymin_promptEnhanced, mass_maxs[i_pt], ymax_promptEnhanced, title) + frame_promptEnhanced = pad1.DrawFrame(mass_mins[i_pt], ymin_promptEnhanced, + mass_maxs[i_pt], ymax_promptEnhanced, title) frame_promptEnhanced.GetYaxis().SetDecimals() - #c.cd() + # c.cd() # Create the second pad - #pad2 = TPad("NonPromptEnhanced", "Non-prompt enhanced", 0.5, 0., 1., 1.) - #if not pad2: - # raise RuntimeError("Failed to create pad2") - #pad2.Draw() - #pad2.cd() # Switch to pad2 - #frame_FDEnhanced = pad2.DrawFrame(mass_mins[i_pt], ymin_FDEnhanced, mass_maxs[i_pt], ymax_FDEnhanced, title) - #frame_FDEnhanced.GetYaxis().SetDecimals() - - #c.cd() - #pad1.cd() + # pad2 = TPad("NonPromptEnhanced", "Non-prompt enhanced", 0.5, 0., 1., 1.) + # if not pad2: + # raise RuntimeError("Failed to create pad2") + # pad2.Draw() + # pad2.cd() # Switch to pad2 + # frame_FDEnhanced = pad2.DrawFrame(mass_mins[i_pt], ymin_FDEnhanced, mass_maxs[i_pt], ymax_FDEnhanced, title) + # frame_FDEnhanced.GetYaxis().SetDecimals() + + # c.cd() + # pad1.cd() set_object_style(hmass_promptEnhanced, linewidth=3, linecolor=kBlack, markersize=0.5) set_object_style(fit_tot_promptEnhanced, linewidth=3, linecolor=kBlue) set_object_style(fit_bkg_promptEnhanced, linewidth=3, linecolor=kRed, linestyle=2) - #set_object_style(fit_refl_promptEnhanced, linewidth=3, linecolor=kGreen+2, linestyle=9) + # set_object_style(fit_refl_promptEnhanced, linewidth=3, linecolor=kGreen+2, linestyle=9) hmass_promptEnhanced.Draw("sameE") fit_bkg_promptEnhanced.Draw("same") fit_tot_promptEnhanced.Draw("same") - #fit_refl_promptEnhanced.Draw("same") + # fit_refl_promptEnhanced.Draw("same") - lat_alice.DrawLatex(0.19, 0.85, 'ALICE Preliminary') + lat_alice.DrawLatex(0.19, 0.85, "ALICE Preliminary") lat_label.SetTextSize(SIZE_TEXT_LAT_LABEL_FOR_COLL_SYSTEM) - lat_label.DrawLatex(0.19, 0.79, 'pp,#kern[-0.08]{ #sqrt{#it{s}} = 13.6 TeV,}#kern[-0.08]{ #it{L}_{int} = 5 pb^{#minus1}}') + lat_label.DrawLatex(0.19, 0.79, "pp,#kern[-0.08]{ #sqrt{#it{s}} = 13.6 TeV,}" \ + "#kern[-0.08]{ #it{L}_{int} = 5 pb^{#minus1}}") lat_label.SetTextSize(SIZE_TEXT_LAT_LABEL) - #draw_info(lat_label, particle) - lat_label.DrawLatex(0.19, 0.73, f'{pt_mins[i_pt]:.0f} < #it{{p}}_{{T}} < {pt_maxs[i_pt]:.0f} GeV/#it{{c}}') - #lat_label.DrawLatex(0.19, 0.3, 'Prompt enhanced') - #lat_label.DrawLatex(0.7, 0.85, '|#it{y}| < 0.5') - #fnonprompt_promptEnhanced = "#it{f}_{ non-prompt}^{ raw} = 0.246 #pm 0.007 (stat.)" # (4, 5) GeV - #fnonprompt_promptEnhanced = "#it{f}_{ non-prompt}^{ raw} = 0.30 #pm 0.02 (stat.)" # (0, 1) GeV - #lat_label.DrawLatex(0.19, 0.18, fnonprompt_promptEnhanced) + # draw_info(lat_label, particle) + lat_label.DrawLatex(0.19, 0.73, f"{pt_mins[i_pt]:.0f} < #it{{p}}_{{T}} < {pt_maxs[i_pt]:.0f} GeV/#it{{c}}") + # lat_label.DrawLatex(0.19, 0.3, "Prompt enhanced") + # lat_label.DrawLatex(0.7, 0.85, "|#it{y}| < 0.5") + # fnonprompt_promptEnhanced = "#it{f}_{ non-prompt}^{ raw} = 0.246 #pm 0.007 (stat.)" # (4, 5) GeV + # fnonprompt_promptEnhanced = "#it{f}_{ non-prompt}^{ raw} = 0.30 #pm 0.02 (stat.)" # (0, 1) GeV + # lat_label.DrawLatex(0.19, 0.18, fnonprompt_promptEnhanced) # lat_label.DrawLatex(0.19, 0.64, str_mu) # lat_label.DrawLatex(0.19, 0.58, str_sigma) - #lat_label.DrawLatex(0.19, 0.24, str_sig_promptEnhanced) + # lat_label.DrawLatex(0.19, 0.24, str_sig_promptEnhanced) if mult_latex[i_pt]: lat_label.DrawLatex(0.19, 0.24, mult_latex[i_pt]) lat_label.DrawLatex(0.19, 0.18, "#Lambda_{c}^{#plus} #rightarrow pK^{#minus}#pi^{#plus} and charge conj.") - #lat_label.DrawLatex(0.19, 0.16, "#it{L}_{int} = 5 pb^{-1}") + # lat_label.DrawLatex(0.19, 0.16, "#it{L}_{int} = 5 pb^{-1}") legend.Draw() - #c.cd() - #pad2.cd() - #set_object_style(hmass_FDEnhanced, linewidth=3, linecolor=kBlack) - #set_object_style(fit_tot_FDEnhanced, linewidth=3, linecolor=kBlue) - #set_object_style(fit_bkg_FDEnhanced, linewidth=3, linecolor=kRed, linestyle=2) - #set_object_style(fit_refl_FDEnhanced, linewidth=3, linecolor=kGreen+2, linestyle=9) - #hmass_FDEnhanced.Draw("same") - #fit_bkg_FDEnhanced.Draw("same") - #fit_tot_FDEnhanced.Draw("same") - #fit_refl_FDEnhanced.Draw("same") - - #lat_alice.DrawLatex(0.19, 0.85, 'ALICE Preliminary') - #lat_label.SetTextSize(SIZE_TEXT_LAT_LABEL_FOR_COLL_SYSTEM) - #lat_label.DrawLatex(0.19, 0.79, 'pp, #sqrt{#it{s}} = 13.6 TeV') - #lat_label.SetTextSize(SIZE_TEXT_LAT_LABEL) - #draw_info(lat_label, particle) - #lat_label.DrawLatex(0.19, 0.3, 'Non-prompt enhanced') - #lat_label.DrawLatex(0.7, 0.85, '|#it{y}| < 0.5') - #fnonprompt_FDEnhanced = "#it{f}_{ non-prompt}^{ raw} = 0.690 #pm 0.008 (stat.)" # (4, 5) GeV - #fnonprompt_FDEnhanced = "#it{f}_{ non-prompt}^{ raw} = 0.70 #pm 0.02 (stat.)" # (0, 1) GeV - #lat_label.DrawLatex(0.19, 0.18, fnonprompt_FDEnhanced) + # c.cd() + # pad2.cd() + # set_object_style(hmass_FDEnhanced, linewidth=3, linecolor=kBlack) + # set_object_style(fit_tot_FDEnhanced, linewidth=3, linecolor=kBlue) + # set_object_style(fit_bkg_FDEnhanced, linewidth=3, linecolor=kRed, linestyle=2) + # set_object_style(fit_refl_FDEnhanced, linewidth=3, linecolor=kGreen+2, linestyle=9) + # hmass_FDEnhanced.Draw("same") + # fit_bkg_FDEnhanced.Draw("same") + # fit_tot_FDEnhanced.Draw("same") + # fit_refl_FDEnhanced.Draw("same") + + # lat_alice.DrawLatex(0.19, 0.85, "ALICE Preliminary") + # lat_label.SetTextSize(SIZE_TEXT_LAT_LABEL_FOR_COLL_SYSTEM) + # lat_label.DrawLatex(0.19, 0.79, "pp, #sqrt{#it{s}} = 13.6 TeV") + # lat_label.SetTextSize(SIZE_TEXT_LAT_LABEL) + # draw_info(lat_label, particle) + # lat_label.DrawLatex(0.19, 0.3, "Non-prompt enhanced") + # lat_label.DrawLatex(0.7, 0.85, "|#it{y}| < 0.5") + # fnonprompt_FDEnhanced = "#it{f}_{ non-prompt}^{ raw} = 0.690 #pm 0.008 (stat.)" # (4, 5) GeV + # fnonprompt_FDEnhanced = "#it{f}_{ non-prompt}^{ raw} = 0.70 #pm 0.02 (stat.)" # (0, 1) GeV + # lat_label.DrawLatex(0.19, 0.18, fnonprompt_FDEnhanced) # lat_label.DrawLatex(0.19, 0.64, str_mu) # lat_label.DrawLatex(0.19, 0.58, str_sigma) - #lat_label.DrawLatex(0.19, 0.24, str_sig_FDEnhanced) + # lat_label.DrawLatex(0.19, 0.24, str_sig_FDEnhanced) - #legend.Draw() + # legend.Draw() - #c.Update() + # c.Update() c.cd() save_canvas(c, particle, pt_mins, pt_maxs, i_pt, mult) diff --git a/machine_learning_hep/scripts-dhadrons/systematics/compare_fractions.py b/machine_learning_hep/scripts-dhadrons/systematics/compare_fractions.py index bee0489198..cd86bb3a51 100644 --- a/machine_learning_hep/scripts-dhadrons/systematics/compare_fractions.py +++ b/machine_learning_hep/scripts-dhadrons/systematics/compare_fractions.py @@ -43,7 +43,7 @@ kYellow+3, kOrange-5, kMagenta+2, kBlue-6, kCyan+1, kGreen-6] MODELS_COLORS=[kGray+1, kOrange-3, kCyan-2, kRed-9, kAzure-9, kBlue-6, kGreen-6, kOrange-5] MODELS_STYLES=[3001, 3004, 3245, 3250, 3244, 3254, 3209, 3245, 3250, 3244, 3254, 3209] - +LEGEND_PER_COLUMN=4 def get_alice_text(cfg): if "alice_text" not in cfg: @@ -68,7 +68,7 @@ def get_alice_text(cfg): def get_legend(x_1, y_1, x_2, y_2, num_hists, header=None): leg = TLegend(x_1, y_1, x_2, y_2) - if num_hists > 4: + if num_hists > LEGEND_PER_COLUMN: leg.SetNColumns(2) if header: leg.SetHeader(header) @@ -126,11 +126,11 @@ def merge_fractions(inputdir, histname, filenames): reshist.SetDirectory(0) for ind, file in enumerate(filenames[1:]): - ind += 1 + binn = ind + 1 with TFile.Open(os.path.join(inputdir, file)) as fin: hist = fin.Get(histname) - reshist.SetBinContent(ind + 1, hist.GetBinContent(ind + 1)) - reshist.SetBinError(ind + 1, hist.GetBinError(ind + 1)) + reshist.SetBinContent(binn + 1, hist.GetBinContent(binn + 1)) + reshist.SetBinError(binn + 1, hist.GetBinError(binn + 1)) return reshist From 36a966f0f34858149806006d145fef8b34d7973f Mon Sep 17 00:00:00 2001 From: saganatt <8majak8@gmail.com> Date: Thu, 2 Oct 2025 10:37:44 +0200 Subject: [PATCH 32/34] More manual fixes in py files --- .../scripts-dhadrons/adjusting-run2-run3/add_pt_bins.py | 4 ++-- .../adjusting-run2-run3/modify_crosssec_run2.py | 7 +++---- .../scripts-dhadrons/adjusting-run2-run3/remove_high_pt.py | 1 - .../scripts-dhadrons/debugging/check_parquet.py | 1 - 4 files changed, 5 insertions(+), 8 deletions(-) diff --git a/machine_learning_hep/scripts-dhadrons/adjusting-run2-run3/add_pt_bins.py b/machine_learning_hep/scripts-dhadrons/adjusting-run2-run3/add_pt_bins.py index dbe5c0214a..b65b41ce57 100644 --- a/machine_learning_hep/scripts-dhadrons/adjusting-run2-run3/add_pt_bins.py +++ b/machine_learning_hep/scripts-dhadrons/adjusting-run2-run3/add_pt_bins.py @@ -54,8 +54,8 @@ def main(): # hist.GetBinContent(hist.GetNbinsX()) * hist.GetBinWidth(hist.GetNbinsX())) /\ # width_combined)) # hist2.SetBinError(last_bin, - # math.sqrt((hist.GetBinError(hist.GetNbinsX() - 1) * hist.GetBinWidth(hist.GetNbinsX() - 1) /\ - # width_combined) **2 +\ + # math.sqrt((hist.GetBinError(hist.GetNbinsX() - 1) * hist.GetBinWidth(hist.GetNbinsX() - 1) \ + # / width_combined) **2 +\ # (hist.GetBinError(hist.GetNbinsX()) * hist.GetBinWidth(hist.GetNbinsX()) /\ # width_combined) ** 2)) # print(f"Setting bin {last_bin} low edge {hist2.GetBinLowEdge(last_bin)} " \ diff --git a/machine_learning_hep/scripts-dhadrons/adjusting-run2-run3/modify_crosssec_run2.py b/machine_learning_hep/scripts-dhadrons/adjusting-run2-run3/modify_crosssec_run2.py index b4aa8b5d19..cac35b6ba4 100644 --- a/machine_learning_hep/scripts-dhadrons/adjusting-run2-run3/modify_crosssec_run2.py +++ b/machine_learning_hep/scripts-dhadrons/adjusting-run2-run3/modify_crosssec_run2.py @@ -52,14 +52,13 @@ def main(): bin1 = merge_bins[ind] bin2 = merge_bins[ind] + 1 weight_sum = hist.GetBinWidth(bin1) + hist.GetBinWidth(bin2) - average = hist.GetBinContent(bin1) * hist.GetBinWidth(bin1) +\ - hist.GetBinContent(bin2) * hist.GetBinWidth(bin2) hist2.SetBinContent(binn, (hist.GetBinContent(bin1) * hist.GetBinWidth(bin1) +\ hist.GetBinContent(bin2) * hist.GetBinWidth(bin2)) /\ weight_sum) - hist2.SetBinError(binn, math.sqrt(((hist.GetBinWidth(bin1) * hist.GetBinError(bin1)) / weight_sum) ** 2. +\ - ((hist.GetBinWidth(bin2) * hist.GetBinError(bin2)) / weight_sum) ** 2.)) + hist2.SetBinError(binn, + math.sqrt(((hist.GetBinWidth(bin1) * hist.GetBinError(bin1)) / weight_sum) ** 2. +\ + ((hist.GetBinWidth(bin2) * hist.GetBinError(bin2)) / weight_sum) ** 2.)) ind += 1 print(f"New bin {binn} low edge {hist2.GetBinLowEdge(binn)} "\ f"up edge {hist2.GetXaxis().GetBinUpEdge(binn)} "\ diff --git a/machine_learning_hep/scripts-dhadrons/adjusting-run2-run3/remove_high_pt.py b/machine_learning_hep/scripts-dhadrons/adjusting-run2-run3/remove_high_pt.py index e85a2bd7c5..7539057c85 100644 --- a/machine_learning_hep/scripts-dhadrons/adjusting-run2-run3/remove_high_pt.py +++ b/machine_learning_hep/scripts-dhadrons/adjusting-run2-run3/remove_high_pt.py @@ -26,7 +26,6 @@ def main(): args = parser.parse_args() with TFile(args.filename) as fin, TFile(args.outname, "recreate") as fout: - objnames = fin.GetListOfKeys() histnames = [key.GetName() for key in fin.GetListOfKeys() if args.histname in key.GetName()] for histname in histnames: hist = fin.Get(histname) diff --git a/machine_learning_hep/scripts-dhadrons/debugging/check_parquet.py b/machine_learning_hep/scripts-dhadrons/debugging/check_parquet.py index 823df874c7..997b321538 100644 --- a/machine_learning_hep/scripts-dhadrons/debugging/check_parquet.py +++ b/machine_learning_hep/scripts-dhadrons/debugging/check_parquet.py @@ -5,7 +5,6 @@ import pandas as pd """ - file: check_parquet.py brief: Examples of different checks on any parquet file produced by the MLHEP preprocessing steps. usage: python check_parquet.py AnalysisResultsReco_fPt1_2.parquet From e098af40d8bec7581b416968b5b3d6129dd9737a Mon Sep 17 00:00:00 2001 From: saganatt <8majak8@gmail.com> Date: Thu, 2 Oct 2025 11:24:57 +0200 Subject: [PATCH 33/34] Fixed most of pylint errors --- .../debugging/check_parquet.py | 19 +++++++---- .../plot_prompt_fraction_vs_bdt_cuts.py | 8 ++++- ...lot_prompt_fraction_vs_crosssec_configs.py | 6 ++++ .../scripts-dhadrons/multitrial/multitrial.py | 4 +-- .../plot_invmass_fit_dzero_dplus_lambdac.py | 33 +++++++++---------- 5 files changed, 44 insertions(+), 26 deletions(-) diff --git a/machine_learning_hep/scripts-dhadrons/debugging/check_parquet.py b/machine_learning_hep/scripts-dhadrons/debugging/check_parquet.py index 997b321538..45729f1dc2 100644 --- a/machine_learning_hep/scripts-dhadrons/debugging/check_parquet.py +++ b/machine_learning_hep/scripts-dhadrons/debugging/check_parquet.py @@ -1,9 +1,3 @@ -import argparse - -import matplotlib.pyplot as plt -import numpy as np -import pandas as pd - """ file: check_parquet.py brief: Examples of different checks on any parquet file produced by the MLHEP preprocessing steps. @@ -11,7 +5,17 @@ author: Maja Karwowska , Warsaw University of Technology """ +import argparse + +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd + + def plot_parquet(df): + """ + An example of plotting a histogram from parquet. + """ print(df["fY"]) print(df["fY"][~np.isinf(df["fY"])]) @@ -26,6 +30,9 @@ def plot_parquet(df): plt.close(fig) def main(): + """ + The main function. + """ parser = argparse.ArgumentParser() parser.add_argument("infile", help="file to process") args = parser.parse_args() diff --git a/machine_learning_hep/scripts-dhadrons/debugging/plot_prompt_fraction_vs_bdt_cuts.py b/machine_learning_hep/scripts-dhadrons/debugging/plot_prompt_fraction_vs_bdt_cuts.py index 30875d54fa..6e2f73f724 100644 --- a/machine_learning_hep/scripts-dhadrons/debugging/plot_prompt_fraction_vs_bdt_cuts.py +++ b/machine_learning_hep/scripts-dhadrons/debugging/plot_prompt_fraction_vs_bdt_cuts.py @@ -10,7 +10,7 @@ import json import re -import matplotlib.pyplot as plt +import matplotlib.pyplot as plt # pylint: disable=import-error from ROOT import ( # pylint: disable=import-error,no-name-in-module TFile, gROOT, @@ -18,6 +18,9 @@ def get_fractions(cfg): + """ + Read the prompt fractions from files for different non-prompt cuts. + """ filenames = sorted(glob.glob(cfg["file_pattern"])) fractions = {} fractions_err = {} @@ -39,6 +42,9 @@ def get_fractions(cfg): def main(): + """ + The main function. + """ gROOT.SetBatch(True) parser = argparse.ArgumentParser(description="Arguments to pass") diff --git a/machine_learning_hep/scripts-dhadrons/debugging/plot_prompt_fraction_vs_crosssec_configs.py b/machine_learning_hep/scripts-dhadrons/debugging/plot_prompt_fraction_vs_crosssec_configs.py index 08a810b6e1..bf219c9d41 100644 --- a/machine_learning_hep/scripts-dhadrons/debugging/plot_prompt_fraction_vs_crosssec_configs.py +++ b/machine_learning_hep/scripts-dhadrons/debugging/plot_prompt_fraction_vs_crosssec_configs.py @@ -31,6 +31,9 @@ def get_hist_limits(hist, miny = 0.0, maxy = 0.0): + """ + Find the minimum and maximum y-value of the histogram. + """ for binn in range(hist.GetN()): print(f"bin {binn} [{hist.GetPointX(binn)}, "\ f"val {hist.GetPointY(binn)} "\ @@ -43,6 +46,9 @@ def get_hist_limits(hist, miny = 0.0, maxy = 0.0): def main(): + """ + The main function. + """ gROOT.SetBatch(True) gStyle.SetOptStat(0) diff --git a/machine_learning_hep/scripts-dhadrons/multitrial/multitrial.py b/machine_learning_hep/scripts-dhadrons/multitrial/multitrial.py index 59bd46a75c..b4f2aed962 100644 --- a/machine_learning_hep/scripts-dhadrons/multitrial/multitrial.py +++ b/machine_learning_hep/scripts-dhadrons/multitrial/multitrial.py @@ -10,9 +10,9 @@ import json import re -import matplotlib.pyplot as plt +import matplotlib.pyplot as plt # pylint: disable=import-error import numpy as np -from matplotlib.ticker import AutoMinorLocator, MultipleLocator +from matplotlib.ticker import AutoMinorLocator, MultipleLocator # pylint: disable=import-error from ROOT import ( # pylint: disable=import-error,no-name-in-module TFile, gROOT, diff --git a/machine_learning_hep/scripts-dhadrons/preliminary-plots/plot_invmass_fit_dzero_dplus_lambdac.py b/machine_learning_hep/scripts-dhadrons/preliminary-plots/plot_invmass_fit_dzero_dplus_lambdac.py index 519c72b7d7..c6246dd5c8 100644 --- a/machine_learning_hep/scripts-dhadrons/preliminary-plots/plot_invmass_fit_dzero_dplus_lambdac.py +++ b/machine_learning_hep/scripts-dhadrons/preliminary-plots/plot_invmass_fit_dzero_dplus_lambdac.py @@ -33,6 +33,8 @@ kGreen, kRed, ) + +# import or copy the file O2Physics/PWGHF/D2H/Macros/style_formatter.py from style_formatter import set_global_style, set_object_style # enumerator @@ -171,7 +173,7 @@ def draw_info(lat_label, particle): lat_label.DrawLatex(0.19, 0.16, fnonprompt) -def save_canvas(canvas, particle, pt_mins, pt_maxs, i_pt, mult): +def save_canvas(canvas, particle, pt_mins, pt_maxs, ipt, mult): """ Helper method to save canvas according to particle @@ -192,9 +194,9 @@ def save_canvas(canvas, particle, pt_mins, pt_maxs, i_pt, mult): elif particle == LAMBDAC_TO_PK0S: name = "LambdacToPKzeroShort" - mult = f"{mult[i_pt]}_" if mult[i_pt] else "" + mult = f"{mult[ipt]}_" if mult[ipt] else "" for ext in ["pdf", "png", "eps"]: - canvas.SaveAs(f"{out_dir}InvMassFit{name}_{mult}Pt_{pt_mins[i_pt]:.0f}_{pt_maxs[i_pt]:.0f}.{ext}") + canvas.SaveAs(f"{out_dir}InvMassFit{name}_{mult}Pt_{pt_mins[ipt]:.0f}_{pt_maxs[ipt]:.0f}.{ext}") # pylint: disable=too-many-locals,too-many-statements @@ -222,19 +224,19 @@ def main(particle, i_pt, cfg, batch): print(f"Plotting for {pt_mins[i_pt]}-{pt_maxs[i_pt]}") - name_infile_promptEnhanced, name_infile_FDEnhanced = \ - get_name_infile(particle, f"{pt_mins[i_pt]:.0f}{pt_maxs[i_pt]:.0f}") + # name_infile_promptEnhanced, name_infile_FDEnhanced = \ + # get_name_infile(particle, f"{pt_mins[i_pt]:.0f}{pt_maxs[i_pt]:.0f}") file_promptEnhanced = TFile.Open(name_infile_promptEnhanced) # file_FDEnhanced = TFile.Open(name_infile_FDEnhanced) - hmean_promptEnhanced = file_promptEnhanced.Get("hist_means_lc") - hsigma_promptEnhanced = file_promptEnhanced.Get("hist_sigmas_lc") + # hmean_promptEnhanced = file_promptEnhanced.Get("hist_means_lc") + # hsigma_promptEnhanced = file_promptEnhanced.Get("hist_sigmas_lc") # hmean_FDEnhanced = file_FDEnhanced.Get("hRawYieldsMean") # hsigma_FDEnhanced = file_FDEnhanced.Get("hRawYieldsSigma") - hsignal_promptEnhanced = file_promptEnhanced.Get("hist_rawyields_lc") + # hsignal_promptEnhanced = file_promptEnhanced.Get("hist_rawyields_lc") # hsignal_FDEnhanced = file_FDEnhanced.Get("hRawYields") mult_suffix = f"_{mult[i_pt]}" if mult[i_pt] else "" @@ -254,7 +256,7 @@ def main(particle, i_pt, cfg, batch): ymin_promptEnhanced = 0.8*(hmass_promptEnhanced.GetMinimum() - hmass_promptEnhanced.GetBinError(bin_min)) # ymin_FDEnhanced, ymax_FDEnhanced = 0., 1.2*(hmass_FDEnhanced.GetMaximum() + hmass_FDEnhanced.GetBinError(bin_max)) - title = f"{pt_mins[i_pt]:.0f} < #it{{p}}_{{T}} < {pt_maxs[i_pt]:.0f} GeV/#it{{c}};{title_xaxis};" \ + title = f"{pt_mins[i_pt]:.0f} < #it{{p}}_{{T}} < {pt_maxs[i_pt]:.0f} GeV/#it{{c}};{title_xaxis};" \ # pylint: disable=unnecessary-semicolon f"Counts per {width_bin*GEV2MEV:.0f} MeV/#it{{c}}^{{2}}" # fit_tot_promptEnhanced = file_promptEnhanced.Get(f"totalTF_{pt_mins[i_pt]:.0f}_{pt_maxs[i_pt]:.0f}") @@ -269,14 +271,11 @@ def main(particle, i_pt, cfg, batch): # fit_bkg_FDEnhanced = file_FDEnhanced.Get(f"bkgTF_{pt_mins[i_pt]:.0f}.0_{pt_maxs[i_pt]:.0f}.0") # fit_refl_FDEnhanced = file_FDEnhanced.Get(f"freflect;13") - print("Calculating mean") - mean_promptEnhanced, err_mean_promptEnhanced = get_h_value_err(hmean_promptEnhanced, i_pt + 1, True) + # mean_promptEnhanced, err_mean_promptEnhanced = get_h_value_err(hmean_promptEnhanced, i_pt + 1, True) # mean_FDEnhanced, err_mean_FDEnhanced = get_h_value_err(hmean_FDEnhanced, 13, True) - print("Calculating sigma") - sigma_promptEnhanced, _ = get_h_value_err(hsigma_promptEnhanced, i_pt + 1, True) + # sigma_promptEnhanced, _ = get_h_value_err(hsigma_promptEnhanced, i_pt + 1, True) # sigma_FDEnhanced, _ = get_h_value_err(hsigma_FDEnhanced, 13, True) - print("Calculating yield") - signal_promptEnhanced, err_signal_promptEnhanced = get_h_value_err(hsignal_promptEnhanced, i_pt + 1) + # signal_promptEnhanced, err_signal_promptEnhanced = get_h_value_err(hsignal_promptEnhanced, i_pt + 1) # signal_FDEnhanced, err_signal_FDEnhanced = get_h_value_err(hsignal_FDEnhanced, 13) lat_alice = TLatex() @@ -409,5 +408,5 @@ def main(particle, i_pt, cfg, batch): configuration = yaml.load(yml_cfg, yaml.FullLoader) print("Loading analysis configuration: Done!") - for i_pt in range(len(configuration["pp13.6TeVFD"]["PtMin"])): - main(particle=LAMBDAC_TO_PKPI, i_pt=i_pt, cfg=configuration, batch=args.batch) + for ipt in range(len(configuration["pp13.6TeVFD"]["PtMin"])): + main(particle=LAMBDAC_TO_PKPI, i_pt=ipt, cfg=configuration, batch=args.batch) From b6194511587eb1ca2477620b4800c828c526815a Mon Sep 17 00:00:00 2001 From: saganatt <8majak8@gmail.com> Date: Thu, 2 Oct 2025 16:46:53 +0200 Subject: [PATCH 34/34] Completed fixes --- .../scripts-dhadrons/debugging/check_parquet.py | 2 +- .../debugging/plot_prompt_fraction_vs_bdt_cuts.py | 2 +- .../scripts-dhadrons/multitrial/multitrial.py | 4 ++-- .../plot_invmass_fit_dzero_dplus_lambdac.py | 14 +++++++------- 4 files changed, 11 insertions(+), 11 deletions(-) diff --git a/machine_learning_hep/scripts-dhadrons/debugging/check_parquet.py b/machine_learning_hep/scripts-dhadrons/debugging/check_parquet.py index 45729f1dc2..b0c3b4b1db 100644 --- a/machine_learning_hep/scripts-dhadrons/debugging/check_parquet.py +++ b/machine_learning_hep/scripts-dhadrons/debugging/check_parquet.py @@ -7,7 +7,7 @@ import argparse -import matplotlib.pyplot as plt +import matplotlib.pyplot as plt # pylint: disable=import-error import numpy as np import pandas as pd diff --git a/machine_learning_hep/scripts-dhadrons/debugging/plot_prompt_fraction_vs_bdt_cuts.py b/machine_learning_hep/scripts-dhadrons/debugging/plot_prompt_fraction_vs_bdt_cuts.py index 6e2f73f724..45bc981009 100644 --- a/machine_learning_hep/scripts-dhadrons/debugging/plot_prompt_fraction_vs_bdt_cuts.py +++ b/machine_learning_hep/scripts-dhadrons/debugging/plot_prompt_fraction_vs_bdt_cuts.py @@ -10,7 +10,7 @@ import json import re -import matplotlib.pyplot as plt # pylint: disable=import-error +import matplotlib.pyplot as plt # pylint: disable=import-error from ROOT import ( # pylint: disable=import-error,no-name-in-module TFile, gROOT, diff --git a/machine_learning_hep/scripts-dhadrons/multitrial/multitrial.py b/machine_learning_hep/scripts-dhadrons/multitrial/multitrial.py index b4f2aed962..5f8fc0b684 100644 --- a/machine_learning_hep/scripts-dhadrons/multitrial/multitrial.py +++ b/machine_learning_hep/scripts-dhadrons/multitrial/multitrial.py @@ -10,9 +10,9 @@ import json import re -import matplotlib.pyplot as plt # pylint: disable=import-error +import matplotlib.pyplot as plt # pylint: disable=import-error import numpy as np -from matplotlib.ticker import AutoMinorLocator, MultipleLocator # pylint: disable=import-error +from matplotlib.ticker import AutoMinorLocator, MultipleLocator # pylint: disable=import-error from ROOT import ( # pylint: disable=import-error,no-name-in-module TFile, gROOT, diff --git a/machine_learning_hep/scripts-dhadrons/preliminary-plots/plot_invmass_fit_dzero_dplus_lambdac.py b/machine_learning_hep/scripts-dhadrons/preliminary-plots/plot_invmass_fit_dzero_dplus_lambdac.py index c6246dd5c8..936880d6d4 100644 --- a/machine_learning_hep/scripts-dhadrons/preliminary-plots/plot_invmass_fit_dzero_dplus_lambdac.py +++ b/machine_learning_hep/scripts-dhadrons/preliminary-plots/plot_invmass_fit_dzero_dplus_lambdac.py @@ -173,7 +173,7 @@ def draw_info(lat_label, particle): lat_label.DrawLatex(0.19, 0.16, fnonprompt) -def save_canvas(canvas, particle, pt_mins, pt_maxs, ipt, mult): +def save_canvas(canvas, particle, pt_mins, pt_maxs, ind_pt, mult): """ Helper method to save canvas according to particle @@ -194,9 +194,9 @@ def save_canvas(canvas, particle, pt_mins, pt_maxs, ipt, mult): elif particle == LAMBDAC_TO_PK0S: name = "LambdacToPKzeroShort" - mult = f"{mult[ipt]}_" if mult[ipt] else "" + mult = f"{mult[ind_pt]}_" if mult[ind_pt] else "" for ext in ["pdf", "png", "eps"]: - canvas.SaveAs(f"{out_dir}InvMassFit{name}_{mult}Pt_{pt_mins[ipt]:.0f}_{pt_maxs[ipt]:.0f}.{ext}") + canvas.SaveAs(f"{out_dir}InvMassFit{name}_{mult}Pt_{pt_mins[ind_pt]:.0f}_{pt_maxs[ind_pt]:.0f}.{ext}") # pylint: disable=too-many-locals,too-many-statements @@ -224,8 +224,8 @@ def main(particle, i_pt, cfg, batch): print(f"Plotting for {pt_mins[i_pt]}-{pt_maxs[i_pt]}") - # name_infile_promptEnhanced, name_infile_FDEnhanced = \ - # get_name_infile(particle, f"{pt_mins[i_pt]:.0f}{pt_maxs[i_pt]:.0f}") + name_infile_promptEnhanced, name_infile_FDEnhanced = \ + get_name_infile(particle, f"{pt_mins[i_pt]:.0f}{pt_maxs[i_pt]:.0f}") file_promptEnhanced = TFile.Open(name_infile_promptEnhanced) # file_FDEnhanced = TFile.Open(name_infile_FDEnhanced) @@ -256,8 +256,8 @@ def main(particle, i_pt, cfg, batch): ymin_promptEnhanced = 0.8*(hmass_promptEnhanced.GetMinimum() - hmass_promptEnhanced.GetBinError(bin_min)) # ymin_FDEnhanced, ymax_FDEnhanced = 0., 1.2*(hmass_FDEnhanced.GetMaximum() + hmass_FDEnhanced.GetBinError(bin_max)) - title = f"{pt_mins[i_pt]:.0f} < #it{{p}}_{{T}} < {pt_maxs[i_pt]:.0f} GeV/#it{{c}};{title_xaxis};" \ # pylint: disable=unnecessary-semicolon - f"Counts per {width_bin*GEV2MEV:.0f} MeV/#it{{c}}^{{2}}" + title = f"{pt_mins[i_pt]:.0f} < #it{{p}}_{{T}} < {pt_maxs[i_pt]:.0f} GeV/#it{{c}};{title_xaxis};" \ + f"Counts per {width_bin*GEV2MEV:.0f} MeV/#it{{c}}^{{2}}" # pylint: disable=unnecessary-semicolon # fit_tot_promptEnhanced = file_promptEnhanced.Get(f"totalTF_{pt_mins[i_pt]:.0f}_{pt_maxs[i_pt]:.0f}") fit_tot_promptEnhanced = file_promptEnhanced.Get(f"total_func_lc_pt{pt_mins[i_pt]:.0f}_{pt_maxs[i_pt]:.0f}")