From ff28a60bf1bdc0f3cde5cde919f20c59ad1e8936 Mon Sep 17 00:00:00 2001
From: saganatt <8majak8@gmail.com>
Date: Tue, 15 Jul 2025 11:59:58 +0200
Subject: [PATCH 1/5] Add multitrial scripts

---
 machine_learning_hep/multitrial.py            | 188 ++++++++++++++++++
 .../run-mlhep-fitter-multitrial.py            | 158 +++++++++++++++
 .../run-mlhep-fitter-multitrial.sh            |  47 +++++
 3 files changed, 393 insertions(+)
 create mode 100644 machine_learning_hep/multitrial.py
 create mode 100644 machine_learning_hep/run-mlhep-fitter-multitrial.py
 create mode 100644 machine_learning_hep/run-mlhep-fitter-multitrial.sh

diff --git a/machine_learning_hep/multitrial.py b/machine_learning_hep/multitrial.py
new file mode 100644
index 0000000000..38119c95fa
--- /dev/null
+++ b/machine_learning_hep/multitrial.py
@@ -0,0 +1,188 @@
+# pylint: disable=missing-function-docstring, invalid-name
+"""
+file: multitrial.py
+brief: Plot multitrial systematics based on multiple fit trials, one file per trial.
+usage: python3 multitrial.py config_multitrial.json
+author: Maja Karwowska <mkarwowska@cern.ch>, Warsaw University of Technology
+"""
+import argparse
+import glob
+import json
+import re
+import numpy as np
+import matplotlib.pyplot as plt
+from matplotlib.ticker import MultipleLocator, AutoMinorLocator
+
+from ROOT import (  # pylint: disable=import-error,no-name-in-module
+    TFile,
+    gROOT,
+)
+
+
+def plot_text_box(ax, text):
+    ax.text(0.98, 0.97, text,
+            horizontalalignment="right", verticalalignment="top",
+            fontsize=40, va="top", transform=ax.transAxes,
+            bbox={"edgecolor": "black", "fill": False})
+
+
+def get_yields(cfg):
+    filenames = sorted(glob.glob(cfg["file_pattern"]),
+                       key=lambda filename: re.split("/", filename)[-2])
+    yields = {}
+    yields_err = {}
+    trials = {}
+    chis = {}
+    for pt_bin_min, pt_bin_max in zip(cfg["pt_bins_min"], cfg["pt_bins_max"]):
+        yields[f"{pt_bin_min}_{pt_bin_max}"] = []
+        yields_err[f"{pt_bin_min}_{pt_bin_max}"] = []
+        trials[f"{pt_bin_min}_{pt_bin_max}"] = []
+        chis[f"{pt_bin_min}_{pt_bin_max}"] = []
+    for filename in filenames:
+        print(f"Reading {filename}")
+        with TFile.Open(filename) as fin:
+            hist = fin.Get(cfg["histoname"])
+            hist_sel = fin.Get(cfg["sel_histoname"])
+            if hist.ClassName() != "TH1F":
+                print(f"No hist in {filename}")
+            if hist_sel.ClassName() != "TH1F":
+                print(f"No hist sel in {filename}")
+            dirname = re.split("/", filename)[4] # [-2] for D2H fitter
+            trial_name = dirname.replace(cfg["dir_pattern"], "")
+            for ind, (pt_bin_min, pt_bin_max) in enumerate(zip(cfg["pt_bins_min"],
+                                                               cfg["pt_bins_max"])):
+                if eval(cfg["selection"])(hist_sel.GetBinContent(ind + 1)) \
+                        and hist.GetBinContent(ind + 1) > 1.0 :
+                    yields[f"{pt_bin_min}_{pt_bin_max}"].append(hist.GetBinContent(ind + 1))
+                    yields_err[f"{pt_bin_min}_{pt_bin_max}"].append(hist.GetBinError(ind + 1))
+                    trials[f"{pt_bin_min}_{pt_bin_max}"].append(trial_name)
+                    chis[f"{pt_bin_min}_{pt_bin_max}"].append(hist_sel.GetBinContent(ind + 1))
+                else:
+                    print(f"Rejected: {hist_sel.GetBinContent(ind + 1)} {trial_name} "\
+                          f"pt: {pt_bin_min}, {pt_bin_max}")
+                    if hist.GetBinContent(ind + 1) < 1.0:
+                        print("Yield 0")
+    return yields, yields_err, trials, chis
+
+
+def prepare_figure(cfg, y_label, ticks):
+    fig = plt.figure(figsize=(20, 15))
+    ax = plt.subplot(1, 1, 1)
+    ax.set_xlabel(cfg["x_axis"], fontsize=20)
+    ax.set_ylabel(y_label, fontsize=20)
+    ax.tick_params(which="both", width=2.5, direction="in")
+    ax.tick_params(which="major", labelsize=20, length=15)
+    ax.tick_params(which="minor", length=7)
+    ax.xaxis.set_major_locator(MultipleLocator(ticks))
+    ax.xaxis.set_minor_locator(AutoMinorLocator(5))
+    ax.yaxis.set_minor_locator(AutoMinorLocator(5))
+    return fig, ax
+
+
+def set_ax_limits(ax, pt_string, values, errs):
+    ax.margins(0.01, 0.2)
+    np_values = np.array(values, dtype="float32")
+    np_errs = np.array(errs, dtype="float32")
+    if ax.get_ylim()[1] - ax.get_ylim()[0] > 30.0 * np.std(np_values):
+        ax.set_ylim(np.mean(np_values) - 10.0 * np.std(np_values),
+                    np.mean(np_values) + 10.0 * np.std(np_values))
+        print(f"{pt_string} narrowing down the axis to {ax.get_ylim()}")
+
+
+def plot_trial_line(ax, central_trial_ind):
+    axis_lim = ax.get_ylim()
+    y_axis = np.linspace(*axis_lim, 100)
+    ax.plot([central_trial_ind] * len(y_axis), y_axis, c="m", ls="--", linewidth=4.0)
+    ax.set_ylim(*axis_lim)
+
+
+def plot_yields_trials(yields, yields_err, trials, cfg, pt_string, plot_pt_string,
+                       central_trial_ind, central_yield):
+    fig, ax = prepare_figure(cfg, cfg["y_axis"], 100)
+    x_axis = range(len(trials))
+    ax.errorbar(x_axis, yields, yerr=yields_err,
+                fmt="o", c="b", elinewidth=2.5, linewidth=4.0)
+    set_ax_limits(ax, pt_string, yields, yields_err)
+    central_line = np.array([central_yield] * len(x_axis), dtype="float32")
+    ax.plot(x_axis, central_line, c="orange", ls="--", linewidth=4.0)
+    central_err = np.array([yields_err[central_trial_ind]] * len(x_axis), dtype="float32")
+    ax.fill_between(x_axis, central_line - central_err, central_line + central_err,
+                    facecolor="orange", edgecolor="none", alpha=0.3)
+    plot_trial_line(ax, central_trial_ind)
+    plot_text_box(ax, plot_pt_string)
+    fig.savefig(f'{cfg["outdir"]}/{cfg["outfile"]}_yields_trials_{pt_string}.png',
+                bbox_inches='tight')
+    plt.close()
+
+
+def plot_chis(chis, cfg, pt_string, plot_pt_string):
+    fig, ax = prepare_figure(cfg, "Chi2/ndf", 100)
+    x_axis = range(len(chis))
+    ax.scatter(x_axis, chis, c="b", marker="o")
+    set_ax_limits(ax, pt_string, chis, [0.0] * len(chis))
+    plot_text_box(ax, plot_pt_string)
+    fig.savefig(f'{cfg["outdir"]}/{cfg["outfile"]}_chis_{pt_string}.png',
+                bbox_inches='tight')
+    plt.close()
+
+
+def plot_yields_distr(yields, cfg, pt_string, plot_pt_string, central_trial_ind, central_yield):
+    plt.figure(figsize=(20, 15))
+    ax = plt.subplot(1, 1, 1)
+    ax.set_xlabel("Ratio", fontsize=20)
+    ax.tick_params(labelsize=20, length=7, width=2.5)
+    ratios = [yield_ / central_yield for ind, yield_ in enumerate(yields) \
+              if ind != central_trial_ind]
+    ax.hist(ratios, color="b", linewidth=4.0)
+    mean = np.mean(yields)
+    std_dev = np.std(yields)
+    diffs = [(yield_ - central_yield) / central_yield \
+             for yield_ in yields[:central_trial_ind]]
+    diffs.extend([(yield_ - central_yield) / central_yield \
+                 for yield_ in yields[central_trial_ind+1:]])
+    rmse = np.sqrt(np.mean(np.array(diffs, dtype="float32")**2))
+    plot_text_box(ax, f"{plot_pt_string}\n"\
+                      f"mean:    {mean:.0f}\n"\
+                      f"std dev: {std_dev:.2f}\n"\
+                      f"RMSE:    {rmse:.2f}\n"\
+                      f"#trials: {len(yields)}")
+    plt.savefig(f'{cfg["outdir"]}/{cfg["outfile"]}_distr_{pt_string}.png', bbox_inches='tight')
+    plt.close()
+
+
+def main():
+    gROOT.SetBatch(True)
+
+    parser = argparse.ArgumentParser(description="Arguments to pass")
+    parser.add_argument("config", help="JSON config file")
+    args = parser.parse_args()
+
+    with open(args.config, encoding="utf8") as fil:
+        cfg = json.load(fil)
+
+        yields, yields_err, trials, chis = get_yields(cfg)
+
+        for pt_bin_min, pt_bin_max in zip(cfg["pt_bins_min"], cfg["pt_bins_max"]):
+            plot_pt_string = f"${pt_bin_min} < p_\\mathrm{{T}}/(\\mathrm{{GeV}}/c) < {pt_bin_max}$"
+            pt_string = f"{pt_bin_min}_{pt_bin_max}"
+
+            try:
+                central_trial_ind = trials[pt_string].index(cfg["central_trial"])
+                central_yield = yields[pt_string][central_trial_ind]
+
+                plot_yields_trials(yields[pt_string], yields_err[pt_string], trials[pt_string], cfg,
+                                   pt_string, plot_pt_string, central_trial_ind, central_yield)
+                plot_yields_distr(yields[pt_string], cfg, pt_string, plot_pt_string,
+                                  central_trial_ind, central_yield)
+                plot_chis(chis[pt_string], cfg, pt_string, plot_pt_string)
+            except:
+                pass
+
+            with open(f'{cfg["outdir"]}/{cfg["outfile"]}_trials_{pt_string}.txt',
+                      "w", encoding="utf-8") as ftext:
+                for trial in trials[pt_string]:
+                    ftext.write(f"{trial}\n")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/machine_learning_hep/run-mlhep-fitter-multitrial.py b/machine_learning_hep/run-mlhep-fitter-multitrial.py
new file mode 100644
index 0000000000..c5a06a1836
--- /dev/null
+++ b/machine_learning_hep/run-mlhep-fitter-multitrial.py
@@ -0,0 +1,158 @@
+# pylint: disable=missing-function-docstring, invalid-name
+"""
+file: run-mlhep-fitter-multitrial.py
+brief: Prepare MLHEP database files for different fit configurations for multitrial systematics.
+usage: python3 run-mlhep-fitter-multitrial.py
+author: Maja Karwowska <mkarwowska@cern.ch>, Warsaw University of Technology
+"""
+
+import argparse
+import re
+import shutil
+import yaml
+
+SIGMA02="0.007, 0.007, 0.013"
+SIGMA23="0.007, 0.007, 0.013"
+SIGMA34="0.007, 0.007, 0.012"
+SIGMA45="0.008, 0.008, 0.016"
+SIGMA56="0.010, 0.010, 0.016"
+SIGMA67="0.008, 0.008, 0.017"
+SIGMA78="0.012, 0.012, 0.018"
+SIGMA810="0.015, 0.012, 0.018"
+SIGMA1012="0.010, 0.010, 0.022"
+SIGMA1216="0.016, 0.016, 0.029"
+SIGMA1624="0.016, 0.016, 0.029"
+FREE_SIGMAS=[SIGMA02, SIGMA23, SIGMA34, SIGMA45, SIGMA56, SIGMA67, SIGMA78,
+             SIGMA810, SIGMA1012, SIGMA1216, SIGMA1624]
+
+CENTRAL_TRIAL=""
+
+BASE_TRIALS = (
+    ["alpha-15%", "alpha+15%"],
+    ["n-15%", "n+15%"],
+    ["rebin-1", "rebin+1"],
+    ["free-sigma"],
+    ["poly3"],
+    ["narrow", "narrow2", "wide", "wide2"]
+)
+
+def generate_trials(trial_classes):
+    combinations = [""]
+    for trial_class in trial_classes:
+        class_comb = []
+        for cur_comb in combinations:
+            for trial in trial_class:
+                class_comb.append(cur_comb + "_" + trial)
+                #print(f"{cur_comb}_{trial}")
+        combinations.extend(class_comb)
+    return combinations
+
+def replace_with_reval(var, in_str, frac):
+    pattern = fr"{var}\[([0-9.]*), .*?\]"
+    values = re.findall(pattern, in_str)
+    new_val = round(float(values[0]) * frac, 3)
+    return re.sub(pattern, f"{var}[{new_val}, {new_val}]", in_str)
+
+def process_trial(trial, ana_cfg, data_cfg, mc_cfg):
+    fit_cfg = ana_cfg["mass_roofit"]
+    if "alpha-15%" in trial:
+        print("Processing alpha-15%")
+        for pt_cfg in mc_cfg:
+            sig_fn = pt_cfg["components"]["sig"]["fn"]
+            pt_cfg["components"]["sig"]["fn"] = replace_with_reval("alpha1", sig_fn, 0.85)
+    elif "alpha+15%" in trial:
+        print("Processing alpha+15%")
+        for pt_cfg in mc_cfg:
+            sig_fn = pt_cfg["components"]["sig"]["fn"]
+            pt_cfg["components"]["sig"]["fn"] = replace_with_reval("alpha1", sig_fn, 1.15)
+    elif "n-15%" in trial:
+        print("Processing n-15%")
+        for pt_cfg in mc_cfg:
+            sig_fn = pt_cfg["components"]["sig"]["fn"]
+            pt_cfg["components"]["sig"]["fn"] = replace_with_reval("n1", sig_fn, 0.85)
+    elif "n+15%" in trial:
+        print("Processing n+15%")
+        for pt_cfg in mc_cfg:
+            sig_fn = pt_cfg["components"]["sig"]["fn"]
+            pt_cfg["components"]["sig"]["fn"] = replace_with_reval("n1", sig_fn, 1.15)
+    elif "rebin-1" in trial:
+        print("Processing rebin-1")
+        ana_cfg["n_rebin"] = [rebin - 1 for rebin in ana_cfg["n_rebin"]]
+    elif "rebin+1" in trial:
+        print("Processing rebin+1")
+        ana_cfg["n_rebin"] = [rebin + 1 for rebin in ana_cfg["n_rebin"]]
+    elif "free-sigma" in trial:
+        print("Processing free-sigma")
+        for pt_cfg, free_sigma in zip(mc_cfg, FREE_SIGMAS):
+            sig_fn = pt_cfg["components"]["sig"]["fn"]
+            pt_cfg["components"]["sig"]["fn"] = re.sub(r"sigma_g1\[(.*?)\]",
+                                                       f"sigma_g1[{free_sigma}]", sig_fn)
+    elif "poly3" in trial:
+        print("Processing poly3")
+        for pt_cfg in data_cfg:
+            bkg_fn = pt_cfg["components"]["bkg"]["fn"]
+            pt_cfg["components"]["bkg"]["fn"] = re.sub(r"a2\[(.*?)\]",
+                                                       r"a2[\1], a3[-1e8, 1e8]", bkg_fn)
+    elif "narrow2" in trial:
+        print("Processing narrow2")
+        for pt_cfg in fit_cfg:
+            pt_cfg["range"] = [pt_cfg["range"][0] + 0.02, pt_cfg["range"][1] - 0.02]
+    elif "narrow" in trial:
+        print("Processing narrow")
+        for pt_cfg in fit_cfg:
+            pt_cfg["range"] = [pt_cfg["range"][0] + 0.01, pt_cfg["range"][1] - 0.01]
+    elif "wide2" in trial:
+        print("Processing wide2")
+        for pt_cfg in fit_cfg:
+            pt_cfg["range"] = [max(2.10, pt_cfg["range"][0] - 0.02),
+                               min(2.47, pt_cfg["range"][1] + 0.02)]
+    elif "wide" in trial:
+        print("Processing wide")
+        for pt_cfg in fit_cfg:
+            pt_cfg["range"] = [max(2.10, pt_cfg["range"][0] - 0.01),
+                               min(2.47, pt_cfg["range"][1] + 0.01)]
+
+
+def main(db, db_dir, out_db_dir, resdir_pattern):
+    db_ext=f"{db}.yml"
+    db_path=f"{db_dir}/{db_ext}"
+    combinations = generate_trials(BASE_TRIALS)
+
+    for comb in combinations:
+        print(comb)
+
+        cur_cfg = f"{out_db_dir}/{db}{comb}.yml"
+        shutil.copy2(db_path, cur_cfg)
+
+        with open(cur_cfg, encoding="utf-8") as stream:
+            cfg = yaml.safe_load(stream)
+
+        ana_cfg = cfg["LcpKpi"]["analysis"]["Run3analysis"]
+        fit_cfg = ana_cfg["mass_roofit"]
+        mc_cfg = [fit_params for fit_params in fit_cfg \
+                    if "level" in fit_params and fit_params["level"] == "mc"]
+        data_cfg = [fit_params for fit_params in fit_cfg if not "level" in fit_params]
+
+        resdir = f"{resdir_pattern}{comb}"
+        respath = f"/data8/majak/MLHEP/{resdir}/"
+        ana_cfg["data"]["prefix_dir_res"] = respath
+        ana_cfg["mc"]["prefix_dir_res"] = respath
+
+        trials = comb.split("_")
+
+        for trial in trials:
+            process_trial(trial, ana_cfg, data_cfg, mc_cfg)
+
+        with open(cur_cfg, "w", encoding="utf-8") as stream:
+            yaml.dump(cfg, stream, sort_keys=False, width=10000, default_flow_style=None)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Arguments to pass")
+    parser.add_argument("db", help="MLHEP database without extension")
+    parser.add_argument("db_dir", help="path to directory with MLHEP database")
+    parser.add_argument("out_db_dir", help="path to output directory for generated MLHEP databases")
+    parser.add_argument("resdir", help="MLHEP resdir pattern")
+    args = parser.parse_args()
+
+    main(args.db, args.db_dir, args.out_db_dir, args.resdir)
diff --git a/machine_learning_hep/run-mlhep-fitter-multitrial.sh b/machine_learning_hep/run-mlhep-fitter-multitrial.sh
new file mode 100644
index 0000000000..26f25972af
--- /dev/null
+++ b/machine_learning_hep/run-mlhep-fitter-multitrial.sh
@@ -0,0 +1,47 @@
+#!/bin/bash
+
+DB_PATTERN="database_ml_parameters_LcToPKPi_multiclass_fdd" # Original database to be used as template
+DB_DIR="data/data_run3"
+OUT_DB_DIR="multitrial-db" # Directory to store multitrial databases only
+ext=".yml"
+
+DIR_PATH="/data8/majak/MLHEP"
+DIR_PATTERN="results-24022025-newtrain-multitrial-prompt" # Prefix of output directory for fit results
+
+# Paths to masshistos to fit
+BASE_DIR="/data8/majak/MLHEP/results-24022025-newtrain-ptshape-prompt"
+DATA_HIST="LHC23pp/Results/resultsdatatot/masshisto.root"
+MC_HIST="LHC24pp_mc/Results/resultsmctot/masshisto.root"
+
+# Run this only once to generate databases
+# Then, you can comment this out if you don't change the *.py file
+# The output analysis dir is set in databases to DIR_PATTERN + suffix with trial name
+python run-mlhep-fitter-multitrial.py "${DB_PATTERN}" "${DB_DIR}" "${OUT_DB_DIR}" "${DIR_PATTERN}" || exit 1
+
+for db in ${OUT_DB_DIR}/*.yml ; do
+  db_basename=`basename ${db}`
+  db_basename_no_ext=${db_basename%%${ext}}
+  echo ${db_basename_no_ext}
+  suffix=${db_basename_no_ext##${DB_PATTERN}}
+  echo "suffix: ${suffix}"
+  RESPATH="${DIR_PATH}/${DIR_PATTERN}${suffix}"
+  echo "respath: ${RESPATH}"
+
+  # Copy base masshistos so as to skip the masshisto step
+  # Only the fit step needs to be activated in analyzer.yml
+  # You need first to create the directory trees
+  cp "${BASE_DIR}/${DATA_HIST}" "${RESPATH}/${DATA_HIST}"
+  cp "${BASE_DIR}/${MC_HIST}" "${RESPATH}/${MC_HIST}"
+
+  mlhep logfile_${db_basename}.log \
+    -a Run3analysis \
+    --run-config submission/analyzer.yml \
+    --database-analysis ${db}
+
+  # Copy the plots from MachineLearningHEP/machine_learning_hep/fig/ to RESPATH
+  # It's not compulsory, it's just for you to see the fits
+  # It might be obsolete if you changed the default output fig/ location in MLHEP
+  rm -rf ${RESPATH}/fig/
+  mv fig/ ${RESPATH}/fig/
+done
+

From 7c0d2c2286c86da85cc02ea98ea20225f66c6236 Mon Sep 17 00:00:00 2001
From: saganatt <8majak8@gmail.com>
Date: Tue, 15 Jul 2025 12:45:48 +0200
Subject: [PATCH 2/5] Add multitrial scripts and README

---
 machine_learning_hep/multitrial/README.md     | 36 +++++++++++++++++++
 .../{ => multitrial}/multitrial.py            |  4 +--
 .../run-mlhep-fitter-multitrial.py            |  6 ++--
 .../run-mlhep-fitter-multitrial.sh            |  8 +----
 4 files changed, 43 insertions(+), 11 deletions(-)
 create mode 100644 machine_learning_hep/multitrial/README.md
 rename machine_learning_hep/{ => multitrial}/multitrial.py (98%)
 rename machine_learning_hep/{ => multitrial}/run-mlhep-fitter-multitrial.py (96%)
 rename machine_learning_hep/{ => multitrial}/run-mlhep-fitter-multitrial.sh (83%)

diff --git a/machine_learning_hep/multitrial/README.md b/machine_learning_hep/multitrial/README.md
new file mode 100644
index 0000000000..93054c751c
--- /dev/null
+++ b/machine_learning_hep/multitrial/README.md
@@ -0,0 +1,36 @@
+# Multitrial systematics with MLHEP
+
+## Generate configurations (MLHEP yml databases) for each trial
+
+File: `run-mlhep-fitter-multitrial.py`<br>
+Usage: `python run-mlhep-fitter-multitrial.py database_file in_db_dir out_db_dir mlhep_results_dir_pattern`
+
+Arguments:
+- `database_file`: filename of the template database without the .yml extension, e.g., `database_ml_parameters_LcToPKPi`
+- `in_db_dir`: path to the directory containing the database, e.g., `data/data_run3`
+- `out_db_dir`: path to the directory for output multitrial databases, e.g., `multitrial_db`
+- `mlhep_results_dir_pattern`: prefix of output directory name for fit results; for each trial, the trial name is appended to the directory name, and the resulting directory name is written under `Run3analysis/{data,mc}/prefix_dir_res` in the database file 
+
+Adjust `DIR_PATH` in the script. It is the path to the base directory where you store directories with MLHEP results.
+
+This script needs to be ran only once to generate databases.
+
+Currently, the trials are hardcoded in the Python script. To add or modify a trial, you need to adjust `BASE_TRIALS` variable and the `process_trial` function.
+
+## Get mass fits for each trial
+
+File: `run-mlhep-fitter-multitrial.sh`<br>
+Usage: `./run-mlhep-fitter-multitrial.sh`
+
+The `submission/analyzer.yml` config is used.
+The script automates running MLHEP for each trial. Mass histograms are copied before each MLHEP invocation, so as only the quick fit steps needs to be activated in `submission/analyzer.yml`
+
+Adjust the variables before the `for` loop.<br>
+The script includes also a call to `run-mlhep-fitter-multitrial.py`, which can be commented out. In this case, make sure to pass the same `OUT_DB_DIR`, `DB_PATTERN`, `DIR_PATTERN` values to the two scripts.
+
+Before running, you need to create the directory structure for each MLHEP output. You can, for example, run the `.sh` script with the `cp` lines commented out. Then, MLHEP creates directories for each trial and fails quietly. Next, run the script with `cp` lines uncommented, and you will get the final output.
+
+## Plot multitrial results
+
+Files: `multitrial.py`, `config_multitrial.json`<br>
+Usage: `python3 multitrial.py config_multitrial.json`
diff --git a/machine_learning_hep/multitrial.py b/machine_learning_hep/multitrial/multitrial.py
similarity index 98%
rename from machine_learning_hep/multitrial.py
rename to machine_learning_hep/multitrial/multitrial.py
index 38119c95fa..71194ce979 100644
--- a/machine_learning_hep/multitrial.py
+++ b/machine_learning_hep/multitrial/multitrial.py
@@ -68,7 +68,7 @@ def get_yields(cfg):
 def prepare_figure(cfg, y_label, ticks):
     fig = plt.figure(figsize=(20, 15))
     ax = plt.subplot(1, 1, 1)
-    ax.set_xlabel(cfg["x_axis"], fontsize=20)
+    ax.set_xlabel("Trial #", fontsize=20)
     ax.set_ylabel(y_label, fontsize=20)
     ax.tick_params(which="both", width=2.5, direction="in")
     ax.tick_params(which="major", labelsize=20, length=15)
@@ -98,7 +98,7 @@ def plot_trial_line(ax, central_trial_ind):
 
 def plot_yields_trials(yields, yields_err, trials, cfg, pt_string, plot_pt_string,
                        central_trial_ind, central_yield):
-    fig, ax = prepare_figure(cfg, cfg["y_axis"], 100)
+    fig, ax = prepare_figure(cfg, "Raw yield", 100)
     x_axis = range(len(trials))
     ax.errorbar(x_axis, yields, yerr=yields_err,
                 fmt="o", c="b", elinewidth=2.5, linewidth=4.0)
diff --git a/machine_learning_hep/run-mlhep-fitter-multitrial.py b/machine_learning_hep/multitrial/run-mlhep-fitter-multitrial.py
similarity index 96%
rename from machine_learning_hep/run-mlhep-fitter-multitrial.py
rename to machine_learning_hep/multitrial/run-mlhep-fitter-multitrial.py
index c5a06a1836..d8307def4e 100644
--- a/machine_learning_hep/run-mlhep-fitter-multitrial.py
+++ b/machine_learning_hep/multitrial/run-mlhep-fitter-multitrial.py
@@ -2,7 +2,7 @@
 """
 file: run-mlhep-fitter-multitrial.py
 brief: Prepare MLHEP database files for different fit configurations for multitrial systematics.
-usage: python3 run-mlhep-fitter-multitrial.py
+usage: python run-mlhep-fitter-multitrial.py database_lc data/data_run3 trial_configs_dir mlhep_results_dir_pattern
 author: Maja Karwowska <mkarwowska@cern.ch>, Warsaw University of Technology
 """
 
@@ -36,6 +36,8 @@
     ["narrow", "narrow2", "wide", "wide2"]
 )
 
+DIR_PATH = "/data8/majak/MLHEP"
+
 def generate_trials(trial_classes):
     combinations = [""]
     for trial_class in trial_classes:
@@ -134,7 +136,7 @@ def main(db, db_dir, out_db_dir, resdir_pattern):
         data_cfg = [fit_params for fit_params in fit_cfg if not "level" in fit_params]
 
         resdir = f"{resdir_pattern}{comb}"
-        respath = f"/data8/majak/MLHEP/{resdir}/"
+        respath = f"{DIR_PATH}/{resdir}/"
         ana_cfg["data"]["prefix_dir_res"] = respath
         ana_cfg["mc"]["prefix_dir_res"] = respath
 
diff --git a/machine_learning_hep/run-mlhep-fitter-multitrial.sh b/machine_learning_hep/multitrial/run-mlhep-fitter-multitrial.sh
similarity index 83%
rename from machine_learning_hep/run-mlhep-fitter-multitrial.sh
rename to machine_learning_hep/multitrial/run-mlhep-fitter-multitrial.sh
index 26f25972af..de173ee0f5 100644
--- a/machine_learning_hep/run-mlhep-fitter-multitrial.sh
+++ b/machine_learning_hep/multitrial/run-mlhep-fitter-multitrial.sh
@@ -8,7 +8,7 @@ ext=".yml"
 DIR_PATH="/data8/majak/MLHEP"
 DIR_PATTERN="results-24022025-newtrain-multitrial-prompt" # Prefix of output directory for fit results
 
-# Paths to masshistos to fit
+# Paths to input masshistos to fit
 BASE_DIR="/data8/majak/MLHEP/results-24022025-newtrain-ptshape-prompt"
 DATA_HIST="LHC23pp/Results/resultsdatatot/masshisto.root"
 MC_HIST="LHC24pp_mc/Results/resultsmctot/masshisto.root"
@@ -37,11 +37,5 @@ for db in ${OUT_DB_DIR}/*.yml ; do
     -a Run3analysis \
     --run-config submission/analyzer.yml \
     --database-analysis ${db}
-
-  # Copy the plots from MachineLearningHEP/machine_learning_hep/fig/ to RESPATH
-  # It's not compulsory, it's just for you to see the fits
-  # It might be obsolete if you changed the default output fig/ location in MLHEP
-  rm -rf ${RESPATH}/fig/
-  mv fig/ ${RESPATH}/fig/
 done
 

From 7c9873afb9d5583883f2c4870828b01c750dd694 Mon Sep 17 00:00:00 2001
From: saganatt <8majak8@gmail.com>
Date: Tue, 15 Jul 2025 13:37:54 +0200
Subject: [PATCH 3/5] Linter fixes

---
 machine_learning_hep/multitrial/multitrial.py   | 17 ++++++++---------
 .../multitrial/run-mlhep-fitter-multitrial.sh   | 14 +++++++-------
 2 files changed, 15 insertions(+), 16 deletions(-)

diff --git a/machine_learning_hep/multitrial/multitrial.py b/machine_learning_hep/multitrial/multitrial.py
index 71194ce979..7d3c335a7a 100644
--- a/machine_learning_hep/multitrial/multitrial.py
+++ b/machine_learning_hep/multitrial/multitrial.py
@@ -51,7 +51,7 @@ def get_yields(cfg):
             trial_name = dirname.replace(cfg["dir_pattern"], "")
             for ind, (pt_bin_min, pt_bin_max) in enumerate(zip(cfg["pt_bins_min"],
                                                                cfg["pt_bins_max"])):
-                if eval(cfg["selection"])(hist_sel.GetBinContent(ind + 1)) \
+                if eval(cfg["selection"])(hist_sel.GetBinContent(ind + 1)) \ # pylint: disable=eval-used
                         and hist.GetBinContent(ind + 1) > 1.0 :
                     yields[f"{pt_bin_min}_{pt_bin_max}"].append(hist.GetBinContent(ind + 1))
                     yields_err[f"{pt_bin_min}_{pt_bin_max}"].append(hist.GetBinError(ind + 1))
@@ -65,7 +65,7 @@ def get_yields(cfg):
     return yields, yields_err, trials, chis
 
 
-def prepare_figure(cfg, y_label, ticks):
+def prepare_figure(y_label, ticks):
     fig = plt.figure(figsize=(20, 15))
     ax = plt.subplot(1, 1, 1)
     ax.set_xlabel("Trial #", fontsize=20)
@@ -79,10 +79,9 @@ def prepare_figure(cfg, y_label, ticks):
     return fig, ax
 
 
-def set_ax_limits(ax, pt_string, values, errs):
+def set_ax_limits(ax, pt_string, values):
     ax.margins(0.01, 0.2)
     np_values = np.array(values, dtype="float32")
-    np_errs = np.array(errs, dtype="float32")
     if ax.get_ylim()[1] - ax.get_ylim()[0] > 30.0 * np.std(np_values):
         ax.set_ylim(np.mean(np_values) - 10.0 * np.std(np_values),
                     np.mean(np_values) + 10.0 * np.std(np_values))
@@ -98,11 +97,11 @@ def plot_trial_line(ax, central_trial_ind):
 
 def plot_yields_trials(yields, yields_err, trials, cfg, pt_string, plot_pt_string,
                        central_trial_ind, central_yield):
-    fig, ax = prepare_figure(cfg, "Raw yield", 100)
+    fig, ax = prepare_figure("Raw yield", 100)
     x_axis = range(len(trials))
     ax.errorbar(x_axis, yields, yerr=yields_err,
                 fmt="o", c="b", elinewidth=2.5, linewidth=4.0)
-    set_ax_limits(ax, pt_string, yields, yields_err)
+    set_ax_limits(ax, pt_string, yields)
     central_line = np.array([central_yield] * len(x_axis), dtype="float32")
     ax.plot(x_axis, central_line, c="orange", ls="--", linewidth=4.0)
     central_err = np.array([yields_err[central_trial_ind]] * len(x_axis), dtype="float32")
@@ -116,10 +115,10 @@ def plot_yields_trials(yields, yields_err, trials, cfg, pt_string, plot_pt_strin
 
 
 def plot_chis(chis, cfg, pt_string, plot_pt_string):
-    fig, ax = prepare_figure(cfg, "Chi2/ndf", 100)
+    fig, ax = prepare_figure("Chi2/ndf", 100)
     x_axis = range(len(chis))
     ax.scatter(x_axis, chis, c="b", marker="o")
-    set_ax_limits(ax, pt_string, chis, [0.0] * len(chis))
+    set_ax_limits(ax, pt_string, chis)
     plot_text_box(ax, plot_pt_string)
     fig.savefig(f'{cfg["outdir"]}/{cfg["outfile"]}_chis_{pt_string}.png',
                 bbox_inches='tight')
@@ -175,7 +174,7 @@ def main():
                 plot_yields_distr(yields[pt_string], cfg, pt_string, plot_pt_string,
                                   central_trial_ind, central_yield)
                 plot_chis(chis[pt_string], cfg, pt_string, plot_pt_string)
-            except:
+            except: # pylint: disable=bare-except
                 pass
 
             with open(f'{cfg["outdir"]}/{cfg["outfile"]}_trials_{pt_string}.txt',
diff --git a/machine_learning_hep/multitrial/run-mlhep-fitter-multitrial.sh b/machine_learning_hep/multitrial/run-mlhep-fitter-multitrial.sh
index de173ee0f5..4006d0790a 100644
--- a/machine_learning_hep/multitrial/run-mlhep-fitter-multitrial.sh
+++ b/machine_learning_hep/multitrial/run-mlhep-fitter-multitrial.sh
@@ -18,11 +18,11 @@ MC_HIST="LHC24pp_mc/Results/resultsmctot/masshisto.root"
 # The output analysis dir is set in databases to DIR_PATTERN + suffix with trial name
 python run-mlhep-fitter-multitrial.py "${DB_PATTERN}" "${DB_DIR}" "${OUT_DB_DIR}" "${DIR_PATTERN}" || exit 1
 
-for db in ${OUT_DB_DIR}/*.yml ; do
-  db_basename=`basename ${db}`
-  db_basename_no_ext=${db_basename%%${ext}}
-  echo ${db_basename_no_ext}
-  suffix=${db_basename_no_ext##${DB_PATTERN}}
+for db in "${OUT_DB_DIR}"/*.yml ; do
+  db_basename=$(basename "${db}")
+  db_basename_no_ext=${db_basename%%"${ext}"}
+  echo "${db_basename_no_ext}"
+  suffix=${db_basename_no_ext##"${DB_PATTERN}"}
   echo "suffix: ${suffix}"
   RESPATH="${DIR_PATH}/${DIR_PATTERN}${suffix}"
   echo "respath: ${RESPATH}"
@@ -33,9 +33,9 @@ for db in ${OUT_DB_DIR}/*.yml ; do
   cp "${BASE_DIR}/${DATA_HIST}" "${RESPATH}/${DATA_HIST}"
   cp "${BASE_DIR}/${MC_HIST}" "${RESPATH}/${MC_HIST}"
 
-  mlhep logfile_${db_basename}.log \
+  mlhep "logfile_${db_basename}.log" \
     -a Run3analysis \
     --run-config submission/analyzer.yml \
-    --database-analysis ${db}
+    --database-analysis "${db}"
 done
 

From 0b46c9acb4c4dc1d51fa3996ddb2b0cba1770b8d Mon Sep 17 00:00:00 2001
From: saganatt <8majak8@gmail.com>
Date: Tue, 15 Jul 2025 13:44:59 +0200
Subject: [PATCH 4/5] Nitpick

---
 machine_learning_hep/multitrial/multitrial.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/machine_learning_hep/multitrial/multitrial.py b/machine_learning_hep/multitrial/multitrial.py
index 7d3c335a7a..593a99dd31 100644
--- a/machine_learning_hep/multitrial/multitrial.py
+++ b/machine_learning_hep/multitrial/multitrial.py
@@ -51,8 +51,8 @@ def get_yields(cfg):
             trial_name = dirname.replace(cfg["dir_pattern"], "")
             for ind, (pt_bin_min, pt_bin_max) in enumerate(zip(cfg["pt_bins_min"],
                                                                cfg["pt_bins_max"])):
-                if eval(cfg["selection"])(hist_sel.GetBinContent(ind + 1)) \ # pylint: disable=eval-used
-                        and hist.GetBinContent(ind + 1) > 1.0 :
+                if eval(cfg["selection"])(hist_sel.GetBinContent(ind + 1)) \
+                        and hist.GetBinContent(ind + 1) > 1.0 : # pylint: disable=eval-used
                     yields[f"{pt_bin_min}_{pt_bin_max}"].append(hist.GetBinContent(ind + 1))
                     yields_err[f"{pt_bin_min}_{pt_bin_max}"].append(hist.GetBinError(ind + 1))
                     trials[f"{pt_bin_min}_{pt_bin_max}"].append(trial_name)

From ecef407788416a7de370ff2f2551235a6e5f3a41 Mon Sep 17 00:00:00 2001
From: saganatt <8majak8@gmail.com>
Date: Thu, 17 Jul 2025 10:20:19 +0200
Subject: [PATCH 5/5] Add leftover JSON

---
 machine_learning_hep/multitrial/README.md     |  2 ++
 .../multitrial/config_multitrial.json         | 20 +++++++++++++++++++
 2 files changed, 22 insertions(+)
 create mode 100644 machine_learning_hep/multitrial/config_multitrial.json

diff --git a/machine_learning_hep/multitrial/README.md b/machine_learning_hep/multitrial/README.md
index 93054c751c..8437924336 100644
--- a/machine_learning_hep/multitrial/README.md
+++ b/machine_learning_hep/multitrial/README.md
@@ -34,3 +34,5 @@ Before running, you need to create the directory structure for each MLHEP output
 
 Files: `multitrial.py`, `config_multitrial.json`<br>
 Usage: `python3 multitrial.py config_multitrial.json`
+
+Adjust the sample `config_multitrial.json` to your needs.
diff --git a/machine_learning_hep/multitrial/config_multitrial.json b/machine_learning_hep/multitrial/config_multitrial.json
new file mode 100644
index 0000000000..35aa25a32e
--- /dev/null
+++ b/machine_learning_hep/multitrial/config_multitrial.json
@@ -0,0 +1,20 @@
+{
+  "file_pattern": "/data8/majak/MLHEP/results-24022025-newtrain-multitrial-prompt*/LHC23pp_pass4/Results/resultsdatatot/yields_LcpKpi_Run3analysis.root",
+  "_file_pattern": "regex pattern for all multitrial fit files; note the asterisk to match all trial suffixes",
+  "dir_pattern": "results-24022025-newtrain-multitrial-prompt",
+  "_dir_pattern": "the base directory prefix from the file pattern above",
+  "histoname": "hyields0",
+  "_histoname": "histogram with mass fit",
+  "sel_histoname": "hchi0",
+  "_sel_histoname": "histogram for filtering the results",
+  "selection": "lambda x : x < 5.0",
+  "_selection": "filter to apply with sel_histoname, e.g., chi < 5",
+  "pt_bins_min": [1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 16],
+  "pt_bins_max": [2, 3, 4, 5, 6, 7, 8, 10, 12, 16, 24],
+  "central_trial": "",
+  "_central_trial": "suffix of the directory with the central trial",
+  "outdir": "/data8/majak/multitrial",
+  "_outdir": "output directory",
+  "outfile": "result-prompt-chi5",
+  "_outfile": "output file pattern"
+}