From d1f28d8699877cd26a95540ebc02381b90d3f127 Mon Sep 17 00:00:00 2001 From: Luigi Dello Stritto Date: Thu, 10 Jul 2025 10:47:25 +0200 Subject: [PATCH] Your commit message --- ...ase_ml_parameters_LcToPKPi_CrystalBall.yml | 20 +++++++-------- machine_learning_hep/processer.py | 25 +++++++++++-------- 2 files changed, 25 insertions(+), 20 deletions(-) diff --git a/machine_learning_hep/data/data_run3/database_ml_parameters_LcToPKPi_CrystalBall.yml b/machine_learning_hep/data/data_run3/database_ml_parameters_LcToPKPi_CrystalBall.yml index 67be528008..86dc4c60c1 100644 --- a/machine_learning_hep/data/data_run3/database_ml_parameters_LcToPKPi_CrystalBall.yml +++ b/machine_learning_hep/data/data_run3/database_ml_parameters_LcToPKPi_CrystalBall.yml @@ -82,11 +82,11 @@ LcpKpi: #extra: #fY: log((sqrt(2.28646**2 + (fPt * cosh(fEta))**2) + fPt * sinh(fEta)) / sqrt(2.28646**2 + fPt**2)) #TODO : change mass or make sure Lc mass is updated tags: - isstd: {var: fFlagMcMatchRec, req: [[1], []], level: mc} - ismcsignal: {var: fFlagMcMatchRec, req: [[1], []], abs: true, level: mc} - ismcbkg: {var: fFlagMcMatchRec, req: [[], [1]], abs: true, level: mc} - ismcprompt: {var: fOriginMcRec, req: [[0], []], level: mc} - ismcfd: {var: fOriginMcRec, req: [[1], []], level: mc} + isstd: {var: fFlagMcMatchRec, req: 17, level: mc} + ismcsignal: {var: fFlagMcMatchRec, req: 17, abs: true, level: mc} + ismcbkg: {var: ismcsignal, req: 0, level: mc} + ismcprompt: {var: fOriginMcRec, req: 1, level: mc} + ismcfd: {var: fOriginMcRec, req: 2, level: mc} swap: {cand: fCandidateSelFlag, var_swap: fIsCandidateSwapped, vars: [ismcsignal, ismcprompt, ismcfd], level: mc} gen: @@ -94,11 +94,11 @@ LcpKpi: trees: O2hflcpbase: [fIndexHFLCMCCOLLBASES, fPt, fY, fEta, fPhi, fFlagMcMatchGen, fOriginMcGen] tags: - isstd: {var: fFlagMcMatchGen, req: [[1], []], level: mc} - ismcsignal: {var: fFlagMcMatchGen, req: [[1], []], abs: true, level: mc} - ismcbkg: {var: fFlagMcMatchGen, req: [[], [1]], abs: true, level: mc} - ismcprompt: {var: fOriginMcGen, req: [[0], []], level: mc} - ismcfd: {var: fOriginMcGen, req: [[1], []], level: mc} + isstd: {var: fFlagMcMatchGen, req: 17, level: mc} + ismcsignal: {var: fFlagMcMatchGen, req: 17, abs: true, level: mc} + ismcbkg: {var: ismcsignal, req: 0, level: mc} + ismcprompt: {var: fOriginMcGen, req: 1, level: mc} + ismcfd: {var: fOriginMcGen, req: 2, level: mc} #extra: #fY: log((sqrt(2.28646**2 + (fPt * cosh(fEta))**2) + fPt * sinh(fEta)) / sqrt(2.28646**2 + fPt**2)) #TODO : change mass or make sure Lc mass is updated diff --git a/machine_learning_hep/processer.py b/machine_learning_hep/processer.py index 2bd62dff9b..7606de4f22 100644 --- a/machine_learning_hep/processer.py +++ b/machine_learning_hep/processer.py @@ -32,7 +32,6 @@ import uproot from pandas.api.types import is_numeric_dtype -from .bitwise import tag_bit_df from .io import dump_yaml_from_dict from .logger import get_logger from .utilities import ( @@ -217,8 +216,8 @@ def __init__( # Potentially mask certain values (e.g. nsigma TOF of -999) self.p_mask_values = datap["ml"].get("mask_values", None) - self.bins_skimming = np.array(list(zip(self.lpt_anbinmin, self.lpt_anbinmax)), "d") - self.bins_analysis = np.array(list(zip(self.lpt_finbinmin, self.lpt_finbinmax)), "d") + self.bins_skimming = np.array(list(zip(self.lpt_anbinmin, self.lpt_anbinmax, strict=False)), "d") + self.bins_analysis = np.array(list(zip(self.lpt_finbinmin, self.lpt_finbinmax, strict=False)), "d") bin_matching = [ [ptrange[0] <= bin[0] and ptrange[1] >= bin[1] for ptrange in self.bins_skimming].index(True) for bin in self.bins_analysis @@ -251,7 +250,7 @@ def __init__( for ipt in range(self.p_nptfinbins): mlsel_multi = [ f"y_test_prob{self.p_modelname}{label.replace('-', '_')} {comp} {probcut}" - for label, comp, probcut in zip(self.class_labels, comps, self.lpt_probcutfin[ipt]) + for label, comp, probcut in zip(self.class_labels, comps, self.lpt_probcutfin[ipt], strict=False) ] self.l_selml.append(" and ".join(mlsel_multi)) @@ -375,7 +374,7 @@ def dfread(rdir, trees, cols, idx_name=None): cols = [cols] # if all(type(var) is str for var in vars): vars = [vars] df = None - for tree, col in zip([rdir[name] for name in trees], cols): + for tree, col in zip([rdir[name] for name in trees], cols, strict=False): try: data = tree.arrays(expressions=col, library="np") dfnew = pd.DataFrame(columns=col, data=data) @@ -396,7 +395,7 @@ def dfread(rdir, trees, cols, idx_name=None): def dfappend(name: str, dfa): """Append DF row-wise""" - dfs[name] = pd.concat([dfs.get(name, None), dfa]) + dfs[name] = pd.concat([dfs.get(name), dfa]) def dfmerge(dfl, dfr, **kwargs): """Merge dfl and dfr""" @@ -437,7 +436,7 @@ def dfuse(df_spec): if dfuse(df_spec): trees = [] cols = [] - for tree, spec in zip(df_spec["trees"].keys(), df_spec["trees"].values()): + for tree, spec in zip(df_spec["trees"].keys(), df_spec["trees"].values(), strict=False): if isinstance(spec, list): trees.append(tree) cols.append(spec) @@ -467,9 +466,15 @@ def dfuse(df_spec): self.logger.debug(" %s -> tags", df_name) for tag, value in df_spec["tags"].items(): if dfuse(value): - dfs[df_name][tag] = np.array( - tag_bit_df(dfs[df_name], value["var"], value["req"], value.get("abs", False)), dtype=int - ) + var = dfs[df_name][value["var"]] + + if value.get("abs", False): + var = var.abs() + + dfs[df_name][tag] = (var == value["req"]).astype(int) + + # dfs[df_name][tag] = np.array( + # tag_bit_df(dfs[df_name], value["var"], value["req"], value.get("abs", False)), dtype=int) if "swap" in df_spec: self.logger.debug(" %s -> swap", df_name)