diff --git a/machine_learning_hep/multitrial/README.md b/machine_learning_hep/multitrial/README.md
new file mode 100644
index 0000000000..8437924336
--- /dev/null
+++ b/machine_learning_hep/multitrial/README.md
@@ -0,0 +1,38 @@
+# Multitrial systematics with MLHEP
+
+## Generate configurations (MLHEP yml databases) for each trial
+
+File: `run-mlhep-fitter-multitrial.py`
+Usage: `python run-mlhep-fitter-multitrial.py database_file in_db_dir out_db_dir mlhep_results_dir_pattern`
+
+Arguments:
+- `database_file`: filename of the template database without the .yml extension, e.g., `database_ml_parameters_LcToPKPi`
+- `in_db_dir`: path to the directory containing the database, e.g., `data/data_run3`
+- `out_db_dir`: path to the directory for output multitrial databases, e.g., `multitrial_db`
+- `mlhep_results_dir_pattern`: prefix of output directory name for fit results; for each trial, the trial name is appended to the directory name, and the resulting directory name is written under `Run3analysis/{data,mc}/prefix_dir_res` in the database file
+
+Adjust `DIR_PATH` in the script. It is the path to the base directory where you store directories with MLHEP results.
+
+This script needs to be ran only once to generate databases.
+
+Currently, the trials are hardcoded in the Python script. To add or modify a trial, you need to adjust `BASE_TRIALS` variable and the `process_trial` function.
+
+## Get mass fits for each trial
+
+File: `run-mlhep-fitter-multitrial.sh`
+Usage: `./run-mlhep-fitter-multitrial.sh`
+
+The `submission/analyzer.yml` config is used.
+The script automates running MLHEP for each trial. Mass histograms are copied before each MLHEP invocation, so as only the quick fit steps needs to be activated in `submission/analyzer.yml`
+
+Adjust the variables before the `for` loop.
+The script includes also a call to `run-mlhep-fitter-multitrial.py`, which can be commented out. In this case, make sure to pass the same `OUT_DB_DIR`, `DB_PATTERN`, `DIR_PATTERN` values to the two scripts.
+
+Before running, you need to create the directory structure for each MLHEP output. You can, for example, run the `.sh` script with the `cp` lines commented out. Then, MLHEP creates directories for each trial and fails quietly. Next, run the script with `cp` lines uncommented, and you will get the final output.
+
+## Plot multitrial results
+
+Files: `multitrial.py`, `config_multitrial.json`
+Usage: `python3 multitrial.py config_multitrial.json`
+
+Adjust the sample `config_multitrial.json` to your needs.
diff --git a/machine_learning_hep/multitrial/config_multitrial.json b/machine_learning_hep/multitrial/config_multitrial.json
new file mode 100644
index 0000000000..35aa25a32e
--- /dev/null
+++ b/machine_learning_hep/multitrial/config_multitrial.json
@@ -0,0 +1,20 @@
+{
+ "file_pattern": "/data8/majak/MLHEP/results-24022025-newtrain-multitrial-prompt*/LHC23pp_pass4/Results/resultsdatatot/yields_LcpKpi_Run3analysis.root",
+ "_file_pattern": "regex pattern for all multitrial fit files; note the asterisk to match all trial suffixes",
+ "dir_pattern": "results-24022025-newtrain-multitrial-prompt",
+ "_dir_pattern": "the base directory prefix from the file pattern above",
+ "histoname": "hyields0",
+ "_histoname": "histogram with mass fit",
+ "sel_histoname": "hchi0",
+ "_sel_histoname": "histogram for filtering the results",
+ "selection": "lambda x : x < 5.0",
+ "_selection": "filter to apply with sel_histoname, e.g., chi < 5",
+ "pt_bins_min": [1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 16],
+ "pt_bins_max": [2, 3, 4, 5, 6, 7, 8, 10, 12, 16, 24],
+ "central_trial": "",
+ "_central_trial": "suffix of the directory with the central trial",
+ "outdir": "/data8/majak/multitrial",
+ "_outdir": "output directory",
+ "outfile": "result-prompt-chi5",
+ "_outfile": "output file pattern"
+}
diff --git a/machine_learning_hep/multitrial/multitrial.py b/machine_learning_hep/multitrial/multitrial.py
new file mode 100644
index 0000000000..593a99dd31
--- /dev/null
+++ b/machine_learning_hep/multitrial/multitrial.py
@@ -0,0 +1,187 @@
+# pylint: disable=missing-function-docstring, invalid-name
+"""
+file: multitrial.py
+brief: Plot multitrial systematics based on multiple fit trials, one file per trial.
+usage: python3 multitrial.py config_multitrial.json
+author: Maja Karwowska , Warsaw University of Technology
+"""
+import argparse
+import glob
+import json
+import re
+import numpy as np
+import matplotlib.pyplot as plt
+from matplotlib.ticker import MultipleLocator, AutoMinorLocator
+
+from ROOT import ( # pylint: disable=import-error,no-name-in-module
+ TFile,
+ gROOT,
+)
+
+
+def plot_text_box(ax, text):
+ ax.text(0.98, 0.97, text,
+ horizontalalignment="right", verticalalignment="top",
+ fontsize=40, va="top", transform=ax.transAxes,
+ bbox={"edgecolor": "black", "fill": False})
+
+
+def get_yields(cfg):
+ filenames = sorted(glob.glob(cfg["file_pattern"]),
+ key=lambda filename: re.split("/", filename)[-2])
+ yields = {}
+ yields_err = {}
+ trials = {}
+ chis = {}
+ for pt_bin_min, pt_bin_max in zip(cfg["pt_bins_min"], cfg["pt_bins_max"]):
+ yields[f"{pt_bin_min}_{pt_bin_max}"] = []
+ yields_err[f"{pt_bin_min}_{pt_bin_max}"] = []
+ trials[f"{pt_bin_min}_{pt_bin_max}"] = []
+ chis[f"{pt_bin_min}_{pt_bin_max}"] = []
+ for filename in filenames:
+ print(f"Reading {filename}")
+ with TFile.Open(filename) as fin:
+ hist = fin.Get(cfg["histoname"])
+ hist_sel = fin.Get(cfg["sel_histoname"])
+ if hist.ClassName() != "TH1F":
+ print(f"No hist in {filename}")
+ if hist_sel.ClassName() != "TH1F":
+ print(f"No hist sel in {filename}")
+ dirname = re.split("/", filename)[4] # [-2] for D2H fitter
+ trial_name = dirname.replace(cfg["dir_pattern"], "")
+ for ind, (pt_bin_min, pt_bin_max) in enumerate(zip(cfg["pt_bins_min"],
+ cfg["pt_bins_max"])):
+ if eval(cfg["selection"])(hist_sel.GetBinContent(ind + 1)) \
+ and hist.GetBinContent(ind + 1) > 1.0 : # pylint: disable=eval-used
+ yields[f"{pt_bin_min}_{pt_bin_max}"].append(hist.GetBinContent(ind + 1))
+ yields_err[f"{pt_bin_min}_{pt_bin_max}"].append(hist.GetBinError(ind + 1))
+ trials[f"{pt_bin_min}_{pt_bin_max}"].append(trial_name)
+ chis[f"{pt_bin_min}_{pt_bin_max}"].append(hist_sel.GetBinContent(ind + 1))
+ else:
+ print(f"Rejected: {hist_sel.GetBinContent(ind + 1)} {trial_name} "\
+ f"pt: {pt_bin_min}, {pt_bin_max}")
+ if hist.GetBinContent(ind + 1) < 1.0:
+ print("Yield 0")
+ return yields, yields_err, trials, chis
+
+
+def prepare_figure(y_label, ticks):
+ fig = plt.figure(figsize=(20, 15))
+ ax = plt.subplot(1, 1, 1)
+ ax.set_xlabel("Trial #", fontsize=20)
+ ax.set_ylabel(y_label, fontsize=20)
+ ax.tick_params(which="both", width=2.5, direction="in")
+ ax.tick_params(which="major", labelsize=20, length=15)
+ ax.tick_params(which="minor", length=7)
+ ax.xaxis.set_major_locator(MultipleLocator(ticks))
+ ax.xaxis.set_minor_locator(AutoMinorLocator(5))
+ ax.yaxis.set_minor_locator(AutoMinorLocator(5))
+ return fig, ax
+
+
+def set_ax_limits(ax, pt_string, values):
+ ax.margins(0.01, 0.2)
+ np_values = np.array(values, dtype="float32")
+ if ax.get_ylim()[1] - ax.get_ylim()[0] > 30.0 * np.std(np_values):
+ ax.set_ylim(np.mean(np_values) - 10.0 * np.std(np_values),
+ np.mean(np_values) + 10.0 * np.std(np_values))
+ print(f"{pt_string} narrowing down the axis to {ax.get_ylim()}")
+
+
+def plot_trial_line(ax, central_trial_ind):
+ axis_lim = ax.get_ylim()
+ y_axis = np.linspace(*axis_lim, 100)
+ ax.plot([central_trial_ind] * len(y_axis), y_axis, c="m", ls="--", linewidth=4.0)
+ ax.set_ylim(*axis_lim)
+
+
+def plot_yields_trials(yields, yields_err, trials, cfg, pt_string, plot_pt_string,
+ central_trial_ind, central_yield):
+ fig, ax = prepare_figure("Raw yield", 100)
+ x_axis = range(len(trials))
+ ax.errorbar(x_axis, yields, yerr=yields_err,
+ fmt="o", c="b", elinewidth=2.5, linewidth=4.0)
+ set_ax_limits(ax, pt_string, yields)
+ central_line = np.array([central_yield] * len(x_axis), dtype="float32")
+ ax.plot(x_axis, central_line, c="orange", ls="--", linewidth=4.0)
+ central_err = np.array([yields_err[central_trial_ind]] * len(x_axis), dtype="float32")
+ ax.fill_between(x_axis, central_line - central_err, central_line + central_err,
+ facecolor="orange", edgecolor="none", alpha=0.3)
+ plot_trial_line(ax, central_trial_ind)
+ plot_text_box(ax, plot_pt_string)
+ fig.savefig(f'{cfg["outdir"]}/{cfg["outfile"]}_yields_trials_{pt_string}.png',
+ bbox_inches='tight')
+ plt.close()
+
+
+def plot_chis(chis, cfg, pt_string, plot_pt_string):
+ fig, ax = prepare_figure("Chi2/ndf", 100)
+ x_axis = range(len(chis))
+ ax.scatter(x_axis, chis, c="b", marker="o")
+ set_ax_limits(ax, pt_string, chis)
+ plot_text_box(ax, plot_pt_string)
+ fig.savefig(f'{cfg["outdir"]}/{cfg["outfile"]}_chis_{pt_string}.png',
+ bbox_inches='tight')
+ plt.close()
+
+
+def plot_yields_distr(yields, cfg, pt_string, plot_pt_string, central_trial_ind, central_yield):
+ plt.figure(figsize=(20, 15))
+ ax = plt.subplot(1, 1, 1)
+ ax.set_xlabel("Ratio", fontsize=20)
+ ax.tick_params(labelsize=20, length=7, width=2.5)
+ ratios = [yield_ / central_yield for ind, yield_ in enumerate(yields) \
+ if ind != central_trial_ind]
+ ax.hist(ratios, color="b", linewidth=4.0)
+ mean = np.mean(yields)
+ std_dev = np.std(yields)
+ diffs = [(yield_ - central_yield) / central_yield \
+ for yield_ in yields[:central_trial_ind]]
+ diffs.extend([(yield_ - central_yield) / central_yield \
+ for yield_ in yields[central_trial_ind+1:]])
+ rmse = np.sqrt(np.mean(np.array(diffs, dtype="float32")**2))
+ plot_text_box(ax, f"{plot_pt_string}\n"\
+ f"mean: {mean:.0f}\n"\
+ f"std dev: {std_dev:.2f}\n"\
+ f"RMSE: {rmse:.2f}\n"\
+ f"#trials: {len(yields)}")
+ plt.savefig(f'{cfg["outdir"]}/{cfg["outfile"]}_distr_{pt_string}.png', bbox_inches='tight')
+ plt.close()
+
+
+def main():
+ gROOT.SetBatch(True)
+
+ parser = argparse.ArgumentParser(description="Arguments to pass")
+ parser.add_argument("config", help="JSON config file")
+ args = parser.parse_args()
+
+ with open(args.config, encoding="utf8") as fil:
+ cfg = json.load(fil)
+
+ yields, yields_err, trials, chis = get_yields(cfg)
+
+ for pt_bin_min, pt_bin_max in zip(cfg["pt_bins_min"], cfg["pt_bins_max"]):
+ plot_pt_string = f"${pt_bin_min} < p_\\mathrm{{T}}/(\\mathrm{{GeV}}/c) < {pt_bin_max}$"
+ pt_string = f"{pt_bin_min}_{pt_bin_max}"
+
+ try:
+ central_trial_ind = trials[pt_string].index(cfg["central_trial"])
+ central_yield = yields[pt_string][central_trial_ind]
+
+ plot_yields_trials(yields[pt_string], yields_err[pt_string], trials[pt_string], cfg,
+ pt_string, plot_pt_string, central_trial_ind, central_yield)
+ plot_yields_distr(yields[pt_string], cfg, pt_string, plot_pt_string,
+ central_trial_ind, central_yield)
+ plot_chis(chis[pt_string], cfg, pt_string, plot_pt_string)
+ except: # pylint: disable=bare-except
+ pass
+
+ with open(f'{cfg["outdir"]}/{cfg["outfile"]}_trials_{pt_string}.txt',
+ "w", encoding="utf-8") as ftext:
+ for trial in trials[pt_string]:
+ ftext.write(f"{trial}\n")
+
+
+if __name__ == "__main__":
+ main()
diff --git a/machine_learning_hep/multitrial/run-mlhep-fitter-multitrial.py b/machine_learning_hep/multitrial/run-mlhep-fitter-multitrial.py
new file mode 100644
index 0000000000..d8307def4e
--- /dev/null
+++ b/machine_learning_hep/multitrial/run-mlhep-fitter-multitrial.py
@@ -0,0 +1,160 @@
+# pylint: disable=missing-function-docstring, invalid-name
+"""
+file: run-mlhep-fitter-multitrial.py
+brief: Prepare MLHEP database files for different fit configurations for multitrial systematics.
+usage: python run-mlhep-fitter-multitrial.py database_lc data/data_run3 trial_configs_dir mlhep_results_dir_pattern
+author: Maja Karwowska , Warsaw University of Technology
+"""
+
+import argparse
+import re
+import shutil
+import yaml
+
+SIGMA02="0.007, 0.007, 0.013"
+SIGMA23="0.007, 0.007, 0.013"
+SIGMA34="0.007, 0.007, 0.012"
+SIGMA45="0.008, 0.008, 0.016"
+SIGMA56="0.010, 0.010, 0.016"
+SIGMA67="0.008, 0.008, 0.017"
+SIGMA78="0.012, 0.012, 0.018"
+SIGMA810="0.015, 0.012, 0.018"
+SIGMA1012="0.010, 0.010, 0.022"
+SIGMA1216="0.016, 0.016, 0.029"
+SIGMA1624="0.016, 0.016, 0.029"
+FREE_SIGMAS=[SIGMA02, SIGMA23, SIGMA34, SIGMA45, SIGMA56, SIGMA67, SIGMA78,
+ SIGMA810, SIGMA1012, SIGMA1216, SIGMA1624]
+
+CENTRAL_TRIAL=""
+
+BASE_TRIALS = (
+ ["alpha-15%", "alpha+15%"],
+ ["n-15%", "n+15%"],
+ ["rebin-1", "rebin+1"],
+ ["free-sigma"],
+ ["poly3"],
+ ["narrow", "narrow2", "wide", "wide2"]
+)
+
+DIR_PATH = "/data8/majak/MLHEP"
+
+def generate_trials(trial_classes):
+ combinations = [""]
+ for trial_class in trial_classes:
+ class_comb = []
+ for cur_comb in combinations:
+ for trial in trial_class:
+ class_comb.append(cur_comb + "_" + trial)
+ #print(f"{cur_comb}_{trial}")
+ combinations.extend(class_comb)
+ return combinations
+
+def replace_with_reval(var, in_str, frac):
+ pattern = fr"{var}\[([0-9.]*), .*?\]"
+ values = re.findall(pattern, in_str)
+ new_val = round(float(values[0]) * frac, 3)
+ return re.sub(pattern, f"{var}[{new_val}, {new_val}]", in_str)
+
+def process_trial(trial, ana_cfg, data_cfg, mc_cfg):
+ fit_cfg = ana_cfg["mass_roofit"]
+ if "alpha-15%" in trial:
+ print("Processing alpha-15%")
+ for pt_cfg in mc_cfg:
+ sig_fn = pt_cfg["components"]["sig"]["fn"]
+ pt_cfg["components"]["sig"]["fn"] = replace_with_reval("alpha1", sig_fn, 0.85)
+ elif "alpha+15%" in trial:
+ print("Processing alpha+15%")
+ for pt_cfg in mc_cfg:
+ sig_fn = pt_cfg["components"]["sig"]["fn"]
+ pt_cfg["components"]["sig"]["fn"] = replace_with_reval("alpha1", sig_fn, 1.15)
+ elif "n-15%" in trial:
+ print("Processing n-15%")
+ for pt_cfg in mc_cfg:
+ sig_fn = pt_cfg["components"]["sig"]["fn"]
+ pt_cfg["components"]["sig"]["fn"] = replace_with_reval("n1", sig_fn, 0.85)
+ elif "n+15%" in trial:
+ print("Processing n+15%")
+ for pt_cfg in mc_cfg:
+ sig_fn = pt_cfg["components"]["sig"]["fn"]
+ pt_cfg["components"]["sig"]["fn"] = replace_with_reval("n1", sig_fn, 1.15)
+ elif "rebin-1" in trial:
+ print("Processing rebin-1")
+ ana_cfg["n_rebin"] = [rebin - 1 for rebin in ana_cfg["n_rebin"]]
+ elif "rebin+1" in trial:
+ print("Processing rebin+1")
+ ana_cfg["n_rebin"] = [rebin + 1 for rebin in ana_cfg["n_rebin"]]
+ elif "free-sigma" in trial:
+ print("Processing free-sigma")
+ for pt_cfg, free_sigma in zip(mc_cfg, FREE_SIGMAS):
+ sig_fn = pt_cfg["components"]["sig"]["fn"]
+ pt_cfg["components"]["sig"]["fn"] = re.sub(r"sigma_g1\[(.*?)\]",
+ f"sigma_g1[{free_sigma}]", sig_fn)
+ elif "poly3" in trial:
+ print("Processing poly3")
+ for pt_cfg in data_cfg:
+ bkg_fn = pt_cfg["components"]["bkg"]["fn"]
+ pt_cfg["components"]["bkg"]["fn"] = re.sub(r"a2\[(.*?)\]",
+ r"a2[\1], a3[-1e8, 1e8]", bkg_fn)
+ elif "narrow2" in trial:
+ print("Processing narrow2")
+ for pt_cfg in fit_cfg:
+ pt_cfg["range"] = [pt_cfg["range"][0] + 0.02, pt_cfg["range"][1] - 0.02]
+ elif "narrow" in trial:
+ print("Processing narrow")
+ for pt_cfg in fit_cfg:
+ pt_cfg["range"] = [pt_cfg["range"][0] + 0.01, pt_cfg["range"][1] - 0.01]
+ elif "wide2" in trial:
+ print("Processing wide2")
+ for pt_cfg in fit_cfg:
+ pt_cfg["range"] = [max(2.10, pt_cfg["range"][0] - 0.02),
+ min(2.47, pt_cfg["range"][1] + 0.02)]
+ elif "wide" in trial:
+ print("Processing wide")
+ for pt_cfg in fit_cfg:
+ pt_cfg["range"] = [max(2.10, pt_cfg["range"][0] - 0.01),
+ min(2.47, pt_cfg["range"][1] + 0.01)]
+
+
+def main(db, db_dir, out_db_dir, resdir_pattern):
+ db_ext=f"{db}.yml"
+ db_path=f"{db_dir}/{db_ext}"
+ combinations = generate_trials(BASE_TRIALS)
+
+ for comb in combinations:
+ print(comb)
+
+ cur_cfg = f"{out_db_dir}/{db}{comb}.yml"
+ shutil.copy2(db_path, cur_cfg)
+
+ with open(cur_cfg, encoding="utf-8") as stream:
+ cfg = yaml.safe_load(stream)
+
+ ana_cfg = cfg["LcpKpi"]["analysis"]["Run3analysis"]
+ fit_cfg = ana_cfg["mass_roofit"]
+ mc_cfg = [fit_params for fit_params in fit_cfg \
+ if "level" in fit_params and fit_params["level"] == "mc"]
+ data_cfg = [fit_params for fit_params in fit_cfg if not "level" in fit_params]
+
+ resdir = f"{resdir_pattern}{comb}"
+ respath = f"{DIR_PATH}/{resdir}/"
+ ana_cfg["data"]["prefix_dir_res"] = respath
+ ana_cfg["mc"]["prefix_dir_res"] = respath
+
+ trials = comb.split("_")
+
+ for trial in trials:
+ process_trial(trial, ana_cfg, data_cfg, mc_cfg)
+
+ with open(cur_cfg, "w", encoding="utf-8") as stream:
+ yaml.dump(cfg, stream, sort_keys=False, width=10000, default_flow_style=None)
+
+
+if __name__ == "__main__":
+ parser = argparse.ArgumentParser(description="Arguments to pass")
+ parser.add_argument("db", help="MLHEP database without extension")
+ parser.add_argument("db_dir", help="path to directory with MLHEP database")
+ parser.add_argument("out_db_dir", help="path to output directory for generated MLHEP databases")
+ parser.add_argument("resdir", help="MLHEP resdir pattern")
+ args = parser.parse_args()
+
+ main(args.db, args.db_dir, args.out_db_dir, args.resdir)
diff --git a/machine_learning_hep/multitrial/run-mlhep-fitter-multitrial.sh b/machine_learning_hep/multitrial/run-mlhep-fitter-multitrial.sh
new file mode 100644
index 0000000000..4006d0790a
--- /dev/null
+++ b/machine_learning_hep/multitrial/run-mlhep-fitter-multitrial.sh
@@ -0,0 +1,41 @@
+#!/bin/bash
+
+DB_PATTERN="database_ml_parameters_LcToPKPi_multiclass_fdd" # Original database to be used as template
+DB_DIR="data/data_run3"
+OUT_DB_DIR="multitrial-db" # Directory to store multitrial databases only
+ext=".yml"
+
+DIR_PATH="/data8/majak/MLHEP"
+DIR_PATTERN="results-24022025-newtrain-multitrial-prompt" # Prefix of output directory for fit results
+
+# Paths to input masshistos to fit
+BASE_DIR="/data8/majak/MLHEP/results-24022025-newtrain-ptshape-prompt"
+DATA_HIST="LHC23pp/Results/resultsdatatot/masshisto.root"
+MC_HIST="LHC24pp_mc/Results/resultsmctot/masshisto.root"
+
+# Run this only once to generate databases
+# Then, you can comment this out if you don't change the *.py file
+# The output analysis dir is set in databases to DIR_PATTERN + suffix with trial name
+python run-mlhep-fitter-multitrial.py "${DB_PATTERN}" "${DB_DIR}" "${OUT_DB_DIR}" "${DIR_PATTERN}" || exit 1
+
+for db in "${OUT_DB_DIR}"/*.yml ; do
+ db_basename=$(basename "${db}")
+ db_basename_no_ext=${db_basename%%"${ext}"}
+ echo "${db_basename_no_ext}"
+ suffix=${db_basename_no_ext##"${DB_PATTERN}"}
+ echo "suffix: ${suffix}"
+ RESPATH="${DIR_PATH}/${DIR_PATTERN}${suffix}"
+ echo "respath: ${RESPATH}"
+
+ # Copy base masshistos so as to skip the masshisto step
+ # Only the fit step needs to be activated in analyzer.yml
+ # You need first to create the directory trees
+ cp "${BASE_DIR}/${DATA_HIST}" "${RESPATH}/${DATA_HIST}"
+ cp "${BASE_DIR}/${MC_HIST}" "${RESPATH}/${MC_HIST}"
+
+ mlhep "logfile_${db_basename}.log" \
+ -a Run3analysis \
+ --run-config submission/analyzer.yml \
+ --database-analysis "${db}"
+done
+