From ee4713b27766480a404122f1542612f67b216a40 Mon Sep 17 00:00:00 2001 From: saganatt <8majak8@gmail.com> Date: Tue, 5 Nov 2024 09:06:49 +0100 Subject: [PATCH 01/16] First completed code for cross section. To be tested --- .../analysis/analyzerdhadrons.py | 4 +- ...base_ml_parameters_LcToPKPi_multiclass.yml | 5 +- machine_learning_hep/hf_pt_spectrum.py | 59 +++++++++++-------- 3 files changed, 41 insertions(+), 27 deletions(-) diff --git a/machine_learning_hep/analysis/analyzerdhadrons.py b/machine_learning_hep/analysis/analyzerdhadrons.py index b0631c24bf..72f8683260 100644 --- a/machine_learning_hep/analysis/analyzerdhadrons.py +++ b/machine_learning_hep/analysis/analyzerdhadrons.py @@ -122,6 +122,7 @@ def __init__(self, datap, case, typean, period): self.p_anahpt = datap["analysis"]["anahptspectrum"] self.p_fd_method = datap["analysis"]["fd_method"] + self.p_crosssec_prompt = datap["analysis"]["crosssec_prompt"] self.p_cctype = datap["analysis"]["cctype"] self.p_inputfonllpred = datap["analysis"]["inputfonllpred"] self.p_triggereff = datap["analysis"][self.typean].get("triggereff", [1]) @@ -525,7 +526,8 @@ def makenormyields(self): # pylint: disable=import-outside-toplevel, too-many-b selnorm, self.p_sigmamb, output_prompt, - fileoutcross) + fileoutcross, + self.p_crosssec_prompt) fileoutcrosstot = TFile.Open("%s/finalcross%s%stot.root" % (self.d_resultsallpdata, self.case, self.typean), "recreate") diff --git a/machine_learning_hep/data/data_run3/database_ml_parameters_LcToPKPi_multiclass.yml b/machine_learning_hep/data/data_run3/database_ml_parameters_LcToPKPi_multiclass.yml index 7549fc0853..3a94567740 100644 --- a/machine_learning_hep/data/data_run3/database_ml_parameters_LcToPKPi_multiclass.yml +++ b/machine_learning_hep/data/data_run3/database_ml_parameters_LcToPKPi_multiclass.yml @@ -367,10 +367,11 @@ LcpKpi: probcutoptimal: [[0.2, 0.15, 0.0], [0.2, 0.15, 0.0], [0.2, 0.15, 0.0], [0.2, 0.15, 0.0], [0.5, 0.2, 0.0], [0.5, 0.2, 0.0]] #list of nbins analysis: anahptspectrum: "LctopKpi" # D0Kpi, DplusKpipi, DstarD0pi, DsKKpi, LctopKpi, LcK0Sp - fd_method: "Nb" # fc, Nb + fd_method: "dd" # fc, Nb, ext, dd + crosssec_prompt: True # True for prompt, False for non-prompt cctype: "pp" sigmamb: 59.4e+9 # 50.87e+9 pp5TeV, 57.8e+9 pp13TeV, 59.4e+9 pp Run3, pb - inputfonllpred: data/fonll/DmesonLcPredictions_13TeV_y05_FFptDepLHCb_BRpythia8_PDG2020.root + inputfonllpred: /data8/majak/fdd-results/10092024/fdd-results-precise/CutVarLc_pp13TeV_LHC24d3.root dir_general_plots: analysis_plots Run3analysis: diff --git a/machine_learning_hep/hf_pt_spectrum.py b/machine_learning_hep/hf_pt_spectrum.py index 472a5e8fca..d8c0da3e88 100644 --- a/machine_learning_hep/hf_pt_spectrum.py +++ b/machine_learning_hep/hf_pt_spectrum.py @@ -18,7 +18,7 @@ usage: python3 HfPtSpectrum.py CONFIG authors: Fabrizio Grosa , CERN Luigi Dello Stritto , CERN -Macro committed and manteined in O2Physics: +Macro committed and mantained in O2Physics: https://github.com/AliceO2Group/O2Physics/tree/master/PWGHF/D2H/Macros """ @@ -47,7 +47,7 @@ def hf_pt_spectrum(channel, # pylint: disable=too-many-locals, too-many-arguments, too-many-statements, too-many-branches b_ratio, - inputfonllpred, + input_fonll_or_fdd_pred, frac_method, prompt_frac, eff_filename, @@ -58,7 +58,8 @@ def hf_pt_spectrum(channel, # pylint: disable=too-many-locals, too-many-argument norm, sigmamb, output_prompt, - output_file): + output_file, + crosssec_prompt=True): # final plots style settings style_hist = TStyle('style_hist','Histo graphics style') @@ -87,7 +88,7 @@ def hf_pt_spectrum(channel, # pylint: disable=too-many-locals, too-many-argument print(f"\033[91mERROR: channel {channel} not supported. Exit\033[0m") sys.exit(2) - if frac_method not in ["Nb", "fc", "ext"]: + if frac_method not in ["Nb", "fc", "ext", "dd"]: print( f"\033[91mERROR: method to subtract nonprompt" f" {frac_method} not supported. Exit\033[0m" @@ -105,20 +106,23 @@ def hf_pt_spectrum(channel, # pylint: disable=too-many-locals, too-many-argument histos = {} - histos["FONLL"] = {"prompt": {}, "nonprompt": {}} - infile_fonll = TFile.Open(inputfonllpred) - for pred in ("central", "min", "max"): - histos["FONLL"]["nonprompt"][pred] = infile_fonll.Get( - f"{fonll_hist_name[channel]}fromBpred_{pred}_corr" - ) - histos["FONLL"]["nonprompt"][pred].SetDirectory(0) - if frac_method == "fc": - histos["FONLL"]["prompt"][pred] = infile_fonll.Get( - f"{fonll_hist_name[channel]}pred_{pred}" - ) - histos["FONLL"]["prompt"][pred].SetDirectory(0) - - infile_fonll.Close() + source_name = "FDD" if frac_method == "dd" else "FONLL" + histos[source_name] = {"prompt": {}, "nonprompt": {}} + with TFile.Open(input_fonll_or_fdd_pred) as infile_pred: + if frac_method == "dd": + histos["corryields_fdd"]["prompt"] = infile_pred.Get(f"hCorrYieldsPrompt") + histos["corryields_fdd"]["nonprompt"] = infile_pred.Get(f"hCorrYieldsNonPrompt") + else: + for pred in ("central", "min", "max"): + histos[source_name]["nonprompt"][pred] = infile_pred.Get( + f"{fonll_hist_name[channel]}fromBpred_{pred}_corr" + ) + histos[source_name]["nonprompt"][pred].SetDirectory(0) + if frac_method == "fc": + histos[source_name]["prompt"][pred] = infile_pred.Get( + f"{fonll_hist_name[channel]}pred_{pred}" + ) + histos[source_name]["prompt"][pred].SetDirectory(0) infile_rawy = TFile.Open(yield_filename) histos["rawyields"] = infile_rawy.Get(yield_histoname) @@ -200,17 +204,17 @@ def hf_pt_spectrum(channel, # pylint: disable=too-many-locals, too-many-argument eff_times_acc_prompt = histos["acceffp"].GetBinContent(i_pt + 1) eff_times_acc_nonprompt = histos["acceffnp"].GetBinContent(i_pt + 1) ptmin_fonll = ( - histos["FONLL"]["nonprompt"]["central"].GetXaxis().FindBin(ptmin * 1.0001) + histos[source_name]["nonprompt"]["central"].GetXaxis().FindBin(ptmin * 1.0001) ) ptmax_fonll = ( - histos["FONLL"]["nonprompt"]["central"].GetXaxis().FindBin(ptmax * 0.9999) + histos[source_name]["nonprompt"]["central"].GetXaxis().FindBin(ptmax * 0.9999) ) crosssec_nonprompt_fonll = [ - histos["FONLL"]["nonprompt"][pred].Integral( + histos[source_name]["nonprompt"][pred].Integral( ptmin_fonll, ptmax_fonll, "width" ) / (ptmax - ptmin) - for pred in histos["FONLL"]["nonprompt"] + for pred in histos[source_name]["nonprompt"] ] # compute prompt fraction @@ -229,11 +233,11 @@ def hf_pt_spectrum(channel, # pylint: disable=too-many-locals, too-many-argument ) elif frac_method == "fc": crosssec_prompt_fonll = [ - histos["FONLL"]["prompt"][pred].Integral( + histos[source_name]["prompt"][pred].Integral( ptmin_fonll, ptmax_fonll, "width" ) / (ptmax - ptmin) - for pred in histos["FONLL"]["prompt"] + for pred in histos[source_name]["prompt"] ] frac, _ = compute_fraction_fc( eff_times_acc_prompt, @@ -243,6 +247,13 @@ def hf_pt_spectrum(channel, # pylint: disable=too-many-locals, too-many-argument ) elif frac_method == "ext": frac[0] = prompt_frac[i_pt] + elif frac_method == "dd": + yield_times_acceff_prompt = histos["corryields_fdd"]["prompt"][pred].GetBinContent(i_pt + 1) * eff_times_acc_prompt + yield_times_acceff_nonprompt = histos["corryields_fdd"]["nonprompt"][pred].GetBinContent(i_pt + 1) * eff_times_acc_nonprompt + yield_times_acceff_own = yield_times_acceff_prompt if crosssec_prompt else yield_times_acceff_nonprompt + yield_times_acceff_other = yield_times_acceff_nonprompt if crosssec_prompt else yield_times_acceff_prompt + frac_v = yield_times_acceff_own / (yield_times_acceff_own + yield_times_acceff_other) + frac = [frac_v] * 3 # compute cross section times BR crosssec, crosssec_unc = compute_crosssection( From 0ca5ff5e1e3fd12845a01aff346c9ee0b4b76c52 Mon Sep 17 00:00:00 2001 From: saganatt <8majak8@gmail.com> Date: Thu, 7 Nov 2024 10:26:02 +0100 Subject: [PATCH 02/16] Fixes for working hf_pt_spectrum --- .../analysis/analyzerdhadrons.py | 9 +- ...base_ml_parameters_LcToPKPi_multiclass.yml | 176 ++++++++++-------- machine_learning_hep/hf_pt_spectrum.py | 43 ++--- machine_learning_hep/processer.py | 2 +- machine_learning_hep/submission/analyzer.yml | 24 +-- 5 files changed, 136 insertions(+), 118 deletions(-) diff --git a/machine_learning_hep/analysis/analyzerdhadrons.py b/machine_learning_hep/analysis/analyzerdhadrons.py index 72f8683260..ca4e16b29f 100644 --- a/machine_learning_hep/analysis/analyzerdhadrons.py +++ b/machine_learning_hep/analysis/analyzerdhadrons.py @@ -396,7 +396,7 @@ def efficiency(self): print(self.n_fileff) lfileeff = TFile.Open(self.n_fileff) lfileeff.ls() - fileouteff = TFile.Open("%s/efficiencies%s%s.root" % (self.d_resultsallpmc, + fileouteff = TFile.Open("%s/%s%s%s.root" % (self.d_resultsallpmc, self.efficiency_filename, self.case, self.typean), "recreate") cEff = TCanvas('cEff', 'The Fit Canvas') cEff.SetCanvasSize(1900, 1500) @@ -474,11 +474,12 @@ def makenormyields(self): # pylint: disable=import-outside-toplevel, too-many-b yield_filename = self.make_file_path(self.d_resultsallpdata, self.yields_filename, "root", None, [self.case, self.typean]) + yield_filename = "/data8/majak/MLHEP/input-fd-10092024/yields-1224_split_bkg_0.60_0.60_fd_0.00-fixed-sigma.root" if not os.path.exists(yield_filename): self.logger.fatal( "Yield file %s could not be found", yield_filename) - fileouteff = f"{self.d_resultsallpmc}/efficiencies{self.case}{self.typean}.root" + fileouteff = f"{self.d_resultsallpmc}/{self.efficiency_filename}{self.case}{self.typean}.root" if not os.path.exists(fileouteff): self.logger.fatal( "Efficiency file %s could not be found", fileouteff) @@ -488,7 +489,7 @@ def makenormyields(self): # pylint: disable=import-outside-toplevel, too-many-b namehistoeffprompt = "eff" namehistoefffeed = "eff_fd" - nameyield = "hyields0" + nameyield = "hRawYields" histonorm = TH1F("histonorm", "histonorm", 1, 0, 1) @@ -535,7 +536,9 @@ def makenormyields(self): # pylint: disable=import-outside-toplevel, too-many-b f_fileoutcross = TFile.Open(fileoutcross) if f_fileoutcross: hcross = f_fileoutcross.Get("hptspectrum") + hcrossbr = f_fileoutcross.Get("hptspectrum_wo_br") fileoutcrosstot.cd() hcross.Write() + hcrossbr.Write() histonorm.Write() fileoutcrosstot.Close() diff --git a/machine_learning_hep/data/data_run3/database_ml_parameters_LcToPKPi_multiclass.yml b/machine_learning_hep/data/data_run3/database_ml_parameters_LcToPKPi_multiclass.yml index 3a94567740..99d7d14972 100644 --- a/machine_learning_hep/data/data_run3/database_ml_parameters_LcToPKPi_multiclass.yml +++ b/machine_learning_hep/data/data_run3/database_ml_parameters_LcToPKPi_multiclass.yml @@ -19,12 +19,14 @@ LcpKpi: sel_gen_unp: "fPt>0" sel_cen_unp: null sel_good_evt_unp: null - sel_reco_skim: [null,null,null,null,null,null] - sel_gen_skim: [null,null,null,null,null,null] - sel_skim_binmin: [1,2,4,6,8,12] #list of nbins - sel_skim_binmax: [2,4,6,8,12,24] #list of nbins + sel_reco_skim: [null,null,null,null,null,null,null,null,null] + sel_gen_skim: [null,null,null,null,null,null,null,null,null] + sel_skim_binmin: [1,2,3,4,5,6,8,12,16] #list of nbins + sel_skim_binmax: [2,3,4,5,6,8,12,16,24] #list of nbins apply_yptacccut: false var_binning: fPt + do_ptshape: true + var_binning_ptshape: fPtWeighted dofullevtmerge: false var_cand: fCandidateSelFlag var_swap: fIsCandidateSwapped @@ -72,6 +74,7 @@ LcpKpi: O2hf3psel: level: mc vars: [fCandidateSelFlag] + O2hf3pml: [fMlScores] filter: "fPt > 1." extra: fY: log((sqrt(2.28646**2 + (fPt * cosh(fEta))**2) + fPt * sinh(fEta)) / sqrt(2.28646**2 + fPt**2)) #TODO : change mass or make sure Lc mass is updated @@ -82,6 +85,9 @@ LcpKpi: ismcprompt: {var: fOriginMcRec, req: [[0],[]], level: mc} ismcfd: {var: fOriginMcRec, req: [[1],[]], level: mc} swap: {cand: fCandidateSelFlag, var_swap: fIsCandidateSwapped, vars: [ismcsignal, ismcprompt, ismcfd], level: mc} + extract_component: + - {var: fMlScores, newvar: fMlBkgScore, component: 0} + - {var: fMlScores, newvar: fMlNonPromptScore, component: 2} gen: level: mc @@ -148,6 +154,12 @@ LcpKpi: fNSigTpcTofPi0, fNSigTpcTofPr0, fNSigTpcTofKa1, fNSigTpcTofPi2, fNSigTpcTofPr2], [fImpactParameter0, fImpactParameter1, fImpactParameter2, fCpa, fChi2PCA, fDecayLength, fDecayLengthXY, fNSigTpcTofPi0, fNSigTpcTofPr0, fNSigTpcTofKa1, fNSigTpcTofPi2, fNSigTpcTofPr2], + [fImpactParameter0, fImpactParameter1, fImpactParameter2, fCpa, fChi2PCA, fDecayLength, fDecayLengthXY, + fNSigTpcTofPi0, fNSigTpcTofPr0, fNSigTpcTofKa1, fNSigTpcTofPi2, fNSigTpcTofPr2], + [fImpactParameter0, fImpactParameter1, fImpactParameter2, fCpa, fChi2PCA, fDecayLength, fDecayLengthXY, + fNSigTpcTofPi0, fNSigTpcTofPr0, fNSigTpcTofKa1, fNSigTpcTofPi2, fNSigTpcTofPr2], + [fImpactParameter0, fImpactParameter1, fImpactParameter2, fCpa, fChi2PCA, fDecayLength, fDecayLengthXY, + fNSigTpcTofPi0, fNSigTpcTofPr0, fNSigTpcTofKa1, fNSigTpcTofPi2, fNSigTpcTofPr2], [fImpactParameter0, fImpactParameter1, fImpactParameter2, fCpa, fChi2PCA, fDecayLength, fDecayLengthXY, fNSigTpcTofPi0, fNSigTpcTofPr0, fNSigTpcTofKa1, fNSigTpcTofPi2, fNSigTpcTofPr2]] var_selected: [fM, fY, fEta, fPt, fCpa, fCpaXY, fChi2PCA, fDecayLength, fDecayLengthXY, @@ -245,59 +257,59 @@ LcpKpi: - 20. files_names: namefile_unmerged_tree: AO2D.root + namefile_reco: AnalysisResultsReco.parquet + namefile_evt: AnalysisResultsEvt.parquet namefile_collcnt: AnalysisResultsCollCnt.parquet namefile_evtvalroot: AnalysisResultsROOTEvtVal.root namefile_evtorig: AnalysisResultsEvtOrig.parquet namefile_gen: AnalysisResultsGen.parquet namefile_reco_applieddata: AnalysisResultsRecoAppliedData.parquet namefile_reco_appliedmc: AnalysisResultsRecoAppliedMC.parquet - namefile_mcweights: mcweights.root + namefile_mcweights: NonPromptLcPtWeigths.root treeoutput: "Lctree" histofilename: "masshisto.root" efffilename: "effhisto.root" + histoweights: "ptWeigths" respfilename: "resphisto.root" crossfilename: "cross_section_tot.root" + namefile_bccnt: "bccnt.root" multi: data: - nprocessesparallel: 60 + nprocessesparallel: 10 maxfiles : [-1] #list of periods chunksizeunp : [100] #list of periods - chunksizeskim: [100] #list of periods + chunksizeskim: [10] #list of periods fracmerge : [0.05] #list of periods seedmerge: [12] #list of periods period: [LHC22o] #list of periods select_period: [1] - prefix_dir: /data2/MLhep/ - unmerged_tree_dir: [real/train_158254/alice/cern.ch/user/a/alihyperloop/jobs/0029] #list of periods - pkl: [LHC22pp/period_LHC22o/pkldata] #list of periods - pkl_skimmed: [LHC22pp/period_LHC22o/pklskdata] #list of periods - pkl_skimmed_merge_for_ml: [LHC22pp/period_LHC22o/pklskmldata] #list of periods - pkl_skimmed_merge_for_ml_all: LHC22pp/mltotdata - pkl_evtcounter_all: LHC22pp/evttotdata + prefix_dir: /data8/majak/MLHEP-data/23082024/ + unmerged_tree_dir: [LHC22o_ml/alice/cern.ch/user/a/alihyperloop/jobs/0064] #list of periods + pkl: [LHC22pp_ml/period_LHC22o/pkldata] #list of periods + pkl_skimmed: [LHC22pp_ml_16/period_LHC22o/pklskdata] #list of periods + pkl_skimmed_merge_for_ml: [LHC22pp_ml_16/period_LHC22o/pklskmldata] #list of periods + pkl_skimmed_merge_for_ml_all: LHC22pp_ml_16/mltotdata + pkl_evtcounter_all: LHC22pp_ml_16/evttotdata #select_jobs: [[hy_189959], [hy_189000]] mcreweights: [../Analyses] mc: - nprocessesparallel: 60 - maxfiles : [-1, -1] #list of periods - chunksizeunp : [100, 100] #list of periods - chunksizeskim: [100, 100] #list of periods - fracmerge : [1.0, 1.0] #list of periods - seedmerge: [12, 12] #list of periods - period: [LHC22b1b, LHC22b1a] #list of periods - select_period: [1, 1] - prefix_dir: /data2/MLhep/ - unmerged_tree_dir: [sim/train_159856/alice/cern.ch/user/a/alihyperloop/jobs/0029, - sim/train_159854/alice/cern.ch/user/a/alihyperloop/jobs/0029] #list of periods - pkl: [LHC22pp_mc/prod_LHC22b1b/pklmc, - LHC22pp_mc/prod_LHC22b1a/pklmc] #list of periods - pkl_skimmed: [LHC22pp_mc/prod_LHC22b1b/pklskmc, - LHC22pp_mc/prod_LHC22b1a/pklskmc] #list of periods - pkl_skimmed_merge_for_ml: [LHC22pp_mc/prod_LHC22b1b/pklskmlmc, - LHC22pp_mc/prod_LHC22b1a/pklskmlmc] #list of periods - pkl_skimmed_merge_for_ml_all: LHC22pp_mc/prod_LHC22/mltotmc - pkl_evtcounter_all: LHC22pp_mc/prod_LHC22/evttotmc - mcreweights: [../Analyses, ../Analyses] + nprocessesparallel: 80 + maxfiles : [-1] #list of periods + chunksizeunp : [100] #list of periods + chunksizeskim: [100] #list of periods + fracmerge : [1.0] #list of periods + seedmerge: [12] #list of periods + period: [LHC24d3b] #list of periods + select_period: [1] + prefix_dir: /data8/majak/MLHEP-data/10062024/ + unmerged_tree_dir: [LHC24d3b/alice/cern.ch/user/a/alihyperloop/outputs] #list of periods + pkl: [LHC22pp_mc_tuner/prod_LHC24d3b/pklmc] #list of period + pkl_skimmed: [LHC22pp_mc_tuner_pt_weight_16/prod_LHC24d3b/pklskmc] #list of periods + pkl_skimmed_merge_for_ml: [LHC22pp_mc_tuner_pt_weight_16/prod_LHC24d3b/pklskmlmc] #list of periods + pkl_skimmed_merge_for_ml_all: LHC22pp_mc_tuner_pt_weight_16/prod_LHC24d3b/mltotmc + pkl_evtcounter_all: LHC22pp_mc_tuner_pt_weight_16/prod_LHC24d3b/evttotmc + mcreweights: [../Analyses] ml: evtsel: null triggersel: @@ -306,7 +318,7 @@ LcpKpi: nclasses: [200000, 200000, 200000] equalise_sig_bkg: True - mult_bkg: [1,1,1,1,1,1] + mult_bkg: [1,1,1,1,1,1,1,1] sampletags: [0, 1, 1] sel_bkg: fM < 2.22 or fM > 2.35 # for plotting significance; should agree with bkg selection in sel_ml # best to have non-prompt (the smallest class) last, so the plots won't complain about the middle class missing @@ -317,11 +329,11 @@ LcpKpi: rnd_splt: 12 rnd_all: 12 # Set to None for pure randomness test_frac: 0.2 - binmin: [1,2,4,6,8,12] # must be equal to sel_skim_binmin (sel_skim_binmin bins) - binmax: [2,4,6,8,12,24] # must be equal to sel_skim_binmax (sel_skim_binmin bins) + binmin: [1,2,3,4,5,6,8,12] # must be equal to sel_skim_binmin (sel_skim_binmin bins) + binmax: [2,3,4,5,6,8,12,24] # must be equal to sel_skim_binmax (sel_skim_binmin bins) mltype: MultiClassification ncorescrossval: 10 - prefix_dir_ml: /data2/MLhep/ + prefix_dir_ml: /data8/majak/MLHEP/results-data-1006-finer-bins-1224-merged/ mlplot: mlplot # to be removed mlout: mlout # to be removed @@ -339,35 +351,36 @@ LcpKpi: num_steps: 111 # number of steps used in efficiency and signif. estimation bkg_function: pol2 # fit function for bkg (among TH1 predefined fit functions, e.g. expo, pol1, pol2, ...) save_fit: True # save bkg fits with the various cuts on ML output - raahp: [1,1,1,1,1,1] # sel_skim_binmin bins + raahp: [1,1,1,1,1,1,1,1] # sel_skim_binmin bins presel_gen_eff: "abs(fY) < 0.8" #presel_gen_eff: "abs(fY) < 0.8 and abs(fPosZ) < 10" mlapplication: data: - prefix_dir_app: /data2/MLhep/ + prefix_dir_app: /data8/majak/MLHEP/results-data-2308-finer-bins-1224-merged-16/ pkl_skimmed_dec: [LHC22pp/MLapplication/prod_LHC22o/skpkldecdata] #list of periods pkl_skimmed_decmerged: [LHC22pp/MLapplication/prod_LHC22o/skpkldecdatamerged] #list of periods mc: - prefix_dir_app: /data2/MLhep/ - pkl_skimmed_dec: [LHC22pp_mc/MLapplication/prod_LHC22b1b/skpkldecmc, - LHC22pp_mc/MLapplication/prod_LHC22b1a/skpkldecmc,] #list of periods - pkl_skimmed_decmerged: [LHC22pp_mc/MLapplication/prod_LHC22b1b/skpkldecmcmerged, - LHC22pp_mc/MLapplication/prod_LHC22b1a/skpkldecmcmerged] #list of periods + prefix_dir_app: /data8/majak/MLHEP/results-mc-2908-finer-bins-1224-merged-pt-weight-16/ + pkl_skimmed_dec: [LHC22pp_mc/MLapplication/prod_LHC24d3b/skpkldecmc] #list of periods + pkl_skimmed_decmerged: [LHC22pp_mc/MLapplication/prod_LHC24d3b/skpkldecmcmerged] #list of periods modelname: xgboost modelsperptbin: [xgboost_classifierLcpKpi_dfselection_fPt_1.0_2.0.sav, - xgboost_classifierLcpKpi_dfselection_fPt_2.0_4.0.sav, - xgboost_classifierLcpKpi_dfselection_fPt_4.0_6.0.sav, + xgboost_classifierLcpKpi_dfselection_fPt_2.0_3.0.sav, + xgboost_classifierLcpKpi_dfselection_fPt_3.0_4.0.sav, + xgboost_classifierLcpKpi_dfselection_fPt_4.0_5.0.sav, + xgboost_classifierLcpKpi_dfselection_fPt_5.0_6.0.sav, xgboost_classifierLcpKpi_dfselection_fPt_6.0_8.0.sav, xgboost_classifierLcpKpi_dfselection_fPt_8.0_12.0.sav, + xgboost_classifierLcpKpi_dfselection_fPt_12.0_24.0.sav, xgboost_classifierLcpKpi_dfselection_fPt_12.0_24.0.sav] probcutpresel: - data: [[0.5, 0.0, 0.0], [0.5, 0.0, 0.0], [0.5, 0.0, 0.0], [0.5, 0.0, 0.0], [0.7, 0.0, 0.0], [0.7, 0.0, 0.0]] #list of nbins - mc: [[0.5, 0.0, 0.0], [0.5, 0.0, 0.0], [0.5, 0.0, 0.0], [0.5, 0.0, 0.0], [0.7, 0.0, 0.0], [0.7, 0.0, 0.0]] #list of nbins - probcutoptimal: [[0.2, 0.15, 0.0], [0.2, 0.15, 0.0], [0.2, 0.15, 0.0], [0.2, 0.15, 0.0], [0.5, 0.2, 0.0], [0.5, 0.2, 0.0]] #list of nbins + data: [[0.5, 0.0, 0.0], [0.5, 0.0, 0.0], [0.5, 0.0, 0.0], [0.5, 0.0, 0.0], [0.5, 0.0, 0.0], [0.5, 0.0, 0.0], [0.8, 0.0, 0.0], [0.8, 0.0, 0.0], [0.8, 0.0, 0.0]] #list of nbins + mc: [[0.5, 0.0, 0.0], [0.5, 0.0, 0.0], [0.5, 0.0, 0.0], [0.5, 0.0, 0.0], [0.5, 0.0, 0.0], [0.5, 0.0, 0.0], [0.8, 0.0, 0.0], [0.8, 0.0, 0.0], [0.8, 0.0, 0.0]] #list of nbins + probcutoptimal: [[0.02, 0.0, %fd12%], [0.02, 0.0, %fd23%], [0.02, 0.0, %fd34%], [0.06, 0.0, %fd45%], [0.06, 0.0, %fd56%], [0.10, 0.0, %fd68%], [0.20, 0.0, %fd812%], [%bkg1216%, 0.0, %fd1216%], [%bkg1624%, 0.0, %fd1624%]] #list of nbins analysis: - anahptspectrum: "LctopKpi" # D0Kpi, DplusKpipi, DstarD0pi, DsKKpi, LctopKpi, LcK0Sp - fd_method: "dd" # fc, Nb, ext, dd + anahptspectrum: "LctopKpi" #D0Kpi, DplusKpipi, DstarD0pi, DsKKpi, LctopKpi, LcK0Sp + fd_method: "dd" # fc, Nb crosssec_prompt: True # True for prompt, False for non-prompt cctype: "pp" sigmamb: 59.4e+9 # 50.87e+9 pp5TeV, 57.8e+9 pp13TeV, 59.4e+9 pp Run3, pb @@ -379,23 +392,25 @@ LcpKpi: useperiod: [1,1] plotbin: [1] usesinglebineff: 0 - sel_binmin2: [1,2,4,6,8,12] #list of nbins - sel_binmax2: [2,4,6,8,12,24] #list of nbins + sel_binmin2: [1,2,3,4,5,6,8,12,16] #list of nbins + sel_binmax2: [2,3,4,5,6,8,12,16,24] #list of nbins var_binning2: null triggerbit: '' - use_cuts: False + use_cuts: false cuts: - - "fDecayLength > 0.02" - - "fDecayLength > 0.02" - - "fDecayLength > 0.02" - - "fDecayLength > 0.02" - - "fDecayLength > 0.02" - - "fDecayLength > 0.02" - + - "fMlNonPromptScore >= %fd12%" + - "fMlNonPromptScore >= %fd23%" + - "fMlNonPromptScore >= %fd34%" + - "fMlNonPromptScore >= %fd45%" + - "fMlNonPromptScore >= %fd56%" + - "fMlNonPromptScore >= %fd68%" + - "fMlBkgScore <= 0.20 and fMlNonPromptScore >= %fd812%" + - "fMlBkgScore <= %bkg1216% and fMlNonPromptScore >= %fd1216%" + - "fMlBkgScore <= %bkg1624% and fMlNonPromptScore >= %fd1624%" - sel_an_binmin: [1,2,4,6,8,12] - sel_an_binmax: [2,4,6,8,12,24] - binning_matching: [0,1,2,3,4,5] + sel_an_binmin: [1,2,3,4,5,6,8,12,16] + sel_an_binmax: [2,3,4,5,6,8,12,16,24] + binning_matching: [0,1,2,3,4,5,6,7,8] presel_gen_eff: "abs(fY) < 0.5" evtsel: "abs(fPosZ < 10)" triggersel: @@ -405,17 +420,16 @@ LcpKpi: data: runselection: [null] #FIXME - prefix_dir_res: /data2/MLhep/ + prefix_dir_res: /data8/majak/MLHEP/%resdir%/ results: [LHC22pp/Results/prod_LHC22o/resultsdata] #list of periods resultsallp: LHC22pp/Results/resultsdatatot mc: runselection: [null, null] #FIXME - prefix_dir_res: /data2/MLhep/ - results: [LHC22pp_mc/Results/prod_LHC22b1b/resultsmc, - LHC22pp_mc/Results/prod_LHC22b1a/resultsmc] #list of periods - resultsallp: LHC22pp_mc/Results/prod_LHC22/resultsmctot + prefix_dir_res: /data8/majak/MLHEP/%resdir%/ + results: [LHC22pp_mc/Results/prod_LHC24d3b/resultsmc] #list of periods + resultsallp: LHC22pp_mc/Results/prod_LHC24d3b/resultsmctot - mass_fit_lim: [2.10, 2.47] # region for the fit of the invariant mass distribution [GeV/c^2] + mass_fit_lim: [2.00, 2.47] # region for the fit of the invariant mass distribution [GeV/c^2] bin_width: 0.001 # bin width of the invariant mass histogram # To initialize the individual fits in pT bins # Decide whether to take the sigma from MC or data for individual fits @@ -423,19 +437,19 @@ LcpKpi: sgnfunc: [kGaus,kGaus,kGaus,kGaus,kGaus,kGaus] bkgfunc: [Pol2,Pol2,Pol2,Pol2,Pol2,Pol2] masspeak: 2.286 - massmin: [2.18, 2.18, 2.16, 2.14, 2.13, 2.10] - massmax: [2.40, 2.40, 2.42, 2.436, 2.446, 2.47] - rebin: [5,5,6,7,8,14] - fix_mean: [false,false,false,false,false,false] - fix_sigma: [false,false,false,false,false,false] + massmin: [2.18, 2.16, 2.16, 2.14, 2.16, 2.11, 2.10, 2.10] + massmax: [2.40, 2.43, 2.43, 2.42, 2.40, 2.476, 2.476, 2.47] + rebin: [4,4,4,5,5,6,7,10] + fix_mean: [false,false,false,false,false,false,false,false] + fix_sigma: [false,false,false,false,false,false,false,false] # Fix mean and/or sigma FixedMean: False - SetFixGaussianSigma: [false,false,false,false,false,false] + SetFixGaussianSigma: [false,false,false,false,false,false,false,false] # Use value set for "masspeak" for initializing total fit, otherwise what is derived from MC fit is used SetInitialGaussianMean: true # Use values set for "sigmaarray" for initializing total fit (per pT bin), # otherwise what is derived from MC fit is used - SetInitialGaussianSigma: [false,false,false,false,false,false] + SetInitialGaussianSigma: [false,false,false,false,false,false,false,false] # Max percentage deviation in sigma (from init) to be considered as a good fit MaxPercSigmaDeviation: 0.5 # Number of initial signal sigmas around the mean to be excluded for side-band fit @@ -443,7 +457,7 @@ LcpKpi: # Sigma around mean where signal is integrated after total fit has been ne nsigma_signal: 3 dolikelihood: true - sigmaarray: [0.01,0.01,0.01,0.01,0.01,0.01] + sigmaarray: [0.0094,0.0098,0.0102,0.0119,0.0138,0.0155,0.0191,0.0220] FixedSigma: false fitcase: Lc latexnamehadron: "#Lambda_{c}^{pK#pi}" @@ -457,7 +471,7 @@ LcpKpi: useperiod: [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] #period from where to define prob cuts ncutvar: 10 #number of looser and tighter variations maxperccutvar: 0.25 #max diff in efficiency for loosest/tightest var - cutvarminrange: [[0.1, 0.3], [0.1, 0.3], [0.1, 0.3], [0.1, 0.3], [0.1, 0.3], [0.7, 0.9]] #Min starting point for scan - cutvarmaxrange: [[0.7, 0.9], [0.7, 0.9], [0.7, 0.9], [0.7, 0.9], [0.7, 0.9], [0.7, 0.9]] #Max starting point for scan + cutvarminrange: [[0.1, 0.3], [0.1, 0.3], [0.1, 0.3], [0.1, 0.3], [0.1, 0.3], [0.1, 0.3], [0.1, 0.3], [0.7, 0.9], [0.7, 0.9]] #Min starting point for scan + cutvarmaxrange: [[0.7, 0.9], [0.7, 0.9], [0.7, 0.9], [0.7, 0.9], [0.7, 0.9], [0.7, 0.9], [0.7, 0.9], [0.7, 0.9], [0.7, 0.9]] #Max starting point for scan fixedmean: True #Fix mean cutvar histo to central fit fixedsigma: True #Fix sigma cutvar histo to central fit diff --git a/machine_learning_hep/hf_pt_spectrum.py b/machine_learning_hep/hf_pt_spectrum.py index d8c0da3e88..041a742b9d 100644 --- a/machine_learning_hep/hf_pt_spectrum.py +++ b/machine_learning_hep/hf_pt_spectrum.py @@ -106,7 +106,7 @@ def hf_pt_spectrum(channel, # pylint: disable=too-many-locals, too-many-argument histos = {} - source_name = "FDD" if frac_method == "dd" else "FONLL" + source_name = "corryields_fdd" if frac_method == "dd" else "FONLL" histos[source_name] = {"prompt": {}, "nonprompt": {}} with TFile.Open(input_fonll_or_fdd_pred) as infile_pred: if frac_method == "dd": @@ -114,15 +114,15 @@ def hf_pt_spectrum(channel, # pylint: disable=too-many-locals, too-many-argument histos["corryields_fdd"]["nonprompt"] = infile_pred.Get(f"hCorrYieldsNonPrompt") else: for pred in ("central", "min", "max"): - histos[source_name]["nonprompt"][pred] = infile_pred.Get( + histos["FONLL"]["nonprompt"][pred] = infile_pred.Get( f"{fonll_hist_name[channel]}fromBpred_{pred}_corr" ) - histos[source_name]["nonprompt"][pred].SetDirectory(0) + histos["FONLL"]["nonprompt"][pred].SetDirectory(0) if frac_method == "fc": - histos[source_name]["prompt"][pred] = infile_pred.Get( + histos["FONLL"]["prompt"][pred] = infile_pred.Get( f"{fonll_hist_name[channel]}pred_{pred}" ) - histos[source_name]["prompt"][pred].SetDirectory(0) + histos["FONLL"]["prompt"][pred].SetDirectory(0) infile_rawy = TFile.Open(yield_filename) histos["rawyields"] = infile_rawy.Get(yield_histoname) @@ -203,19 +203,20 @@ def hf_pt_spectrum(channel, # pylint: disable=too-many-locals, too-many-argument rawy_unc = histos["rawyields"].GetBinError(i_pt + 1) eff_times_acc_prompt = histos["acceffp"].GetBinContent(i_pt + 1) eff_times_acc_nonprompt = histos["acceffnp"].GetBinContent(i_pt + 1) - ptmin_fonll = ( - histos[source_name]["nonprompt"]["central"].GetXaxis().FindBin(ptmin * 1.0001) - ) - ptmax_fonll = ( - histos[source_name]["nonprompt"]["central"].GetXaxis().FindBin(ptmax * 0.9999) - ) - crosssec_nonprompt_fonll = [ - histos[source_name]["nonprompt"][pred].Integral( - ptmin_fonll, ptmax_fonll, "width" + if frac_method != "dd": + ptmin_fonll = ( + histos["FONLL"]["nonprompt"]["central"].GetXaxis().FindBin(ptmin * 1.0001) ) - / (ptmax - ptmin) - for pred in histos[source_name]["nonprompt"] - ] + ptmax_fonll = ( + histos["FONLL"]["nonprompt"]["central"].GetXaxis().FindBin(ptmax * 0.9999) + ) + crosssec_nonprompt_fonll = [ + histos["FONLL"]["nonprompt"][pred].Integral( + ptmin_fonll, ptmax_fonll, "width" + ) + / (ptmax - ptmin) + for pred in histos["FONLL"]["nonprompt"] + ] # compute prompt fraction frac = [0,0,0] @@ -233,11 +234,11 @@ def hf_pt_spectrum(channel, # pylint: disable=too-many-locals, too-many-argument ) elif frac_method == "fc": crosssec_prompt_fonll = [ - histos[source_name]["prompt"][pred].Integral( + histos["FONLL"]["prompt"][pred].Integral( ptmin_fonll, ptmax_fonll, "width" ) / (ptmax - ptmin) - for pred in histos[source_name]["prompt"] + for pred in histos["FONLL"]["prompt"] ] frac, _ = compute_fraction_fc( eff_times_acc_prompt, @@ -248,8 +249,8 @@ def hf_pt_spectrum(channel, # pylint: disable=too-many-locals, too-many-argument elif frac_method == "ext": frac[0] = prompt_frac[i_pt] elif frac_method == "dd": - yield_times_acceff_prompt = histos["corryields_fdd"]["prompt"][pred].GetBinContent(i_pt + 1) * eff_times_acc_prompt - yield_times_acceff_nonprompt = histos["corryields_fdd"]["nonprompt"][pred].GetBinContent(i_pt + 1) * eff_times_acc_nonprompt + yield_times_acceff_prompt = histos["corryields_fdd"]["prompt"].GetBinContent(i_pt + 1) * eff_times_acc_prompt + yield_times_acceff_nonprompt = histos["corryields_fdd"]["nonprompt"].GetBinContent(i_pt + 1) * eff_times_acc_nonprompt yield_times_acceff_own = yield_times_acceff_prompt if crosssec_prompt else yield_times_acceff_nonprompt yield_times_acceff_other = yield_times_acceff_nonprompt if crosssec_prompt else yield_times_acceff_prompt frac_v = yield_times_acceff_own / (yield_times_acceff_own + yield_times_acceff_other) diff --git a/machine_learning_hep/processer.py b/machine_learning_hep/processer.py index a4f2a825ad..b4380b1c8c 100644 --- a/machine_learning_hep/processer.py +++ b/machine_learning_hep/processer.py @@ -128,7 +128,7 @@ def __init__(self, case, datap, run_param, mcordata, p_maxfiles, # pylint: disab self.s_gen_skim = datap["sel_gen_skim"] #bitmap - # self.b_mcrefl = datap["bitmap_sel"].get("ismcrefl", None) + self.b_mcrefl = datap["bitmap_sel"].get("ismcrefl", None) #variables name self.v_train = datap["variables"]["var_training"] diff --git a/machine_learning_hep/submission/analyzer.yml b/machine_learning_hep/submission/analyzer.yml index e08341b099..19161a1abc 100644 --- a/machine_learning_hep/submission/analyzer.yml +++ b/machine_learning_hep/submission/analyzer.yml @@ -60,22 +60,22 @@ analysis: # Do only merged (false) doperperiod: false data: - histomass: false # processer: process_histomass + histomass: true # processer: process_histomass mc: - histomass: false # processer: process_histomass - efficiency: false # processer: process_efficiency + histomass: true # processer: process_histomass + efficiency: true # processer: process_efficiency steps: # analyzer methods to run (uncomment to activate) ##### Inclusive hadrons - # fit: - # efficiency: - # makenormyields: + #fit: + efficiency: + makenormyields: ##### Jets - init: - calculate_efficiencies: - qa: - fit: - estimate_feeddown: - analyze_with_sidesub: + #init: + #calculate_efficiencies: + #qa: + #fit: + #estimate_feeddown: + #analyze_with_sidesub: # analyze_with_sigextr: systematics: From 8625bb2b3992f92abf3bae7e187f777c7060d203 Mon Sep 17 00:00:00 2001 From: saganatt <8majak8@gmail.com> Date: Thu, 14 Nov 2024 14:35:59 +0100 Subject: [PATCH 03/16] Database for Nb method --- .../data_run3/database_ml_parameters_LcToPKPi_multiclass.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/machine_learning_hep/data/data_run3/database_ml_parameters_LcToPKPi_multiclass.yml b/machine_learning_hep/data/data_run3/database_ml_parameters_LcToPKPi_multiclass.yml index 99d7d14972..ca0e9856a4 100644 --- a/machine_learning_hep/data/data_run3/database_ml_parameters_LcToPKPi_multiclass.yml +++ b/machine_learning_hep/data/data_run3/database_ml_parameters_LcToPKPi_multiclass.yml @@ -380,11 +380,11 @@ LcpKpi: probcutoptimal: [[0.02, 0.0, %fd12%], [0.02, 0.0, %fd23%], [0.02, 0.0, %fd34%], [0.06, 0.0, %fd45%], [0.06, 0.0, %fd56%], [0.10, 0.0, %fd68%], [0.20, 0.0, %fd812%], [%bkg1216%, 0.0, %fd1216%], [%bkg1624%, 0.0, %fd1624%]] #list of nbins analysis: anahptspectrum: "LctopKpi" #D0Kpi, DplusKpipi, DstarD0pi, DsKKpi, LctopKpi, LcK0Sp - fd_method: "dd" # fc, Nb + fd_method: "Nb" # fc, Nb crosssec_prompt: True # True for prompt, False for non-prompt cctype: "pp" sigmamb: 59.4e+9 # 50.87e+9 pp5TeV, 57.8e+9 pp13TeV, 59.4e+9 pp Run3, pb - inputfonllpred: /data8/majak/fdd-results/10092024/fdd-results-precise/CutVarLc_pp13TeV_LHC24d3.root + inputfonllpred: data/fonll/DmesonLcPredictions_13TeV_y05_FFptDepLHCb_BRpythia8_PDG2020.root dir_general_plots: analysis_plots Run3analysis: From a86ba396cfc2772ec2c35782608d5684984dbd6d Mon Sep 17 00:00:00 2001 From: saganatt <8majak8@gmail.com> Date: Mon, 18 Nov 2024 17:25:58 +0100 Subject: [PATCH 04/16] Add histogram for fractions --- machine_learning_hep/hf_pt_spectrum.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/machine_learning_hep/hf_pt_spectrum.py b/machine_learning_hep/hf_pt_spectrum.py index 041a742b9d..8e98dce602 100644 --- a/machine_learning_hep/hf_pt_spectrum.py +++ b/machine_learning_hep/hf_pt_spectrum.py @@ -193,6 +193,12 @@ def hf_pt_spectrum(channel, # pylint: disable=too-many-locals, too-many-argument 0, 1 ) + hfraction = TH1F( + "hfraction", + f";{axistit_pt};{axistit_fprompt}", + len(ptlims["rawyields"]) - 1, + ptlims["rawyields"], + ) for i_pt, (ptmin, ptmax) in enumerate( zip(ptlims["rawyields"][:-1], ptlims["rawyields"][1:]) @@ -281,6 +287,8 @@ def hf_pt_spectrum(channel, # pylint: disable=too-many-locals, too-many-argument gfraction.SetPointError( i_pt, pt_delta / 2, pt_delta / 2, frac[0] - frac[1], frac[2] - frac[0] ) + hfraction.SetBinContent(i_pt + 1, frac[0]) + hfraction.SetBinError(i_pt + 1, 0.0) c = TCanvas("c", "c", 600, 800) c.Divide (1, 2) @@ -300,6 +308,7 @@ def hf_pt_spectrum(channel, # pylint: disable=too-many-locals, too-many-argument hnorm.Write() if frac_method != "ext": gfraction.Write() + hfraction.Write() for _, value in histos.items(): if isinstance(value, TH1): From ae5bb30a0e8665bccbf673d6fcb8a7ebd5456fcd Mon Sep 17 00:00:00 2001 From: saganatt <8majak8@gmail.com> Date: Tue, 19 Nov 2024 15:35:52 +0100 Subject: [PATCH 05/16] Back to dd config --- .../data_run3/database_ml_parameters_LcToPKPi_multiclass.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/machine_learning_hep/data/data_run3/database_ml_parameters_LcToPKPi_multiclass.yml b/machine_learning_hep/data/data_run3/database_ml_parameters_LcToPKPi_multiclass.yml index ca0e9856a4..99d7d14972 100644 --- a/machine_learning_hep/data/data_run3/database_ml_parameters_LcToPKPi_multiclass.yml +++ b/machine_learning_hep/data/data_run3/database_ml_parameters_LcToPKPi_multiclass.yml @@ -380,11 +380,11 @@ LcpKpi: probcutoptimal: [[0.02, 0.0, %fd12%], [0.02, 0.0, %fd23%], [0.02, 0.0, %fd34%], [0.06, 0.0, %fd45%], [0.06, 0.0, %fd56%], [0.10, 0.0, %fd68%], [0.20, 0.0, %fd812%], [%bkg1216%, 0.0, %fd1216%], [%bkg1624%, 0.0, %fd1624%]] #list of nbins analysis: anahptspectrum: "LctopKpi" #D0Kpi, DplusKpipi, DstarD0pi, DsKKpi, LctopKpi, LcK0Sp - fd_method: "Nb" # fc, Nb + fd_method: "dd" # fc, Nb crosssec_prompt: True # True for prompt, False for non-prompt cctype: "pp" sigmamb: 59.4e+9 # 50.87e+9 pp5TeV, 57.8e+9 pp13TeV, 59.4e+9 pp Run3, pb - inputfonllpred: data/fonll/DmesonLcPredictions_13TeV_y05_FFptDepLHCb_BRpythia8_PDG2020.root + inputfonllpred: /data8/majak/fdd-results/10092024/fdd-results-precise/CutVarLc_pp13TeV_LHC24d3.root dir_general_plots: analysis_plots Run3analysis: From a9db9545e5ee487b970e297a7d8f475ee06045ab Mon Sep 17 00:00:00 2001 From: saganatt <8majak8@gmail.com> Date: Wed, 20 Nov 2024 15:40:36 +0100 Subject: [PATCH 06/16] Use configurable nevents for crosssec calculations --- machine_learning_hep/analysis/analyzerdhadrons.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/machine_learning_hep/analysis/analyzerdhadrons.py b/machine_learning_hep/analysis/analyzerdhadrons.py index ca4e16b29f..5ddee38a41 100644 --- a/machine_learning_hep/analysis/analyzerdhadrons.py +++ b/machine_learning_hep/analysis/analyzerdhadrons.py @@ -496,10 +496,14 @@ def makenormyields(self): # pylint: disable=import-outside-toplevel, too-many-b filemass = TFile.Open(self.n_filemass) hevents = filemass.Get("all_events") hselevents = filemass.Get("sel_events") - norm, selnorm = self.calculate_norm(self.logger, hevents, hselevents) - histonorm.SetBinContent(1, selnorm) - self.logger.warning("Number of events %d", norm) - self.logger.warning("Number of events after event selection %d", selnorm) + if self.p_nevents is None: + norm, selnorm = self.calculate_norm(self.logger, hevents, hselevents) + histonorm.SetBinContent(1, selnorm) + self.logger.warning("Number of events %d", norm) + self.logger.warning("Number of events after event selection %d", selnorm) + else: + self.logger.warning("Number of events provided %d", self.p_nevents) + selnorm = self.p_nevents if self.p_dobkgfromsideband: fileoutbkg = TFile.Open("%s/Background_fromsidebands_%s_%s.root" % \ From fe5707f9b3a0914e517b1ef6167b1e71aa150409 Mon Sep 17 00:00:00 2001 From: saganatt <8majak8@gmail.com> Date: Wed, 20 Nov 2024 15:52:21 +0100 Subject: [PATCH 07/16] Fix nevents for Nb method. Fix input for dd method --- .../data_run3/database_ml_parameters_LcToPKPi_multiclass.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/machine_learning_hep/data/data_run3/database_ml_parameters_LcToPKPi_multiclass.yml b/machine_learning_hep/data/data_run3/database_ml_parameters_LcToPKPi_multiclass.yml index 99d7d14972..f1e1dbffd7 100644 --- a/machine_learning_hep/data/data_run3/database_ml_parameters_LcToPKPi_multiclass.yml +++ b/machine_learning_hep/data/data_run3/database_ml_parameters_LcToPKPi_multiclass.yml @@ -384,7 +384,7 @@ LcpKpi: crosssec_prompt: True # True for prompt, False for non-prompt cctype: "pp" sigmamb: 59.4e+9 # 50.87e+9 pp5TeV, 57.8e+9 pp13TeV, 59.4e+9 pp Run3, pb - inputfonllpred: /data8/majak/fdd-results/10092024/fdd-results-precise/CutVarLc_pp13TeV_LHC24d3.root + inputfonllpred: /data8/majak/systematics/230824/finalCutVarMerged_0-24.root #inputfonllpred: data/fonll/DmesonLcPredictions_13TeV_y05_FFptDepLHCb_BRpythia8_PDG2020.root dir_general_plots: analysis_plots Run3analysis: @@ -462,7 +462,7 @@ LcpKpi: fitcase: Lc latexnamehadron: "#Lambda_{c}^{pK#pi}" latexbin2var: "n_{trkl}" - nevents: null + nevents: 47092223770 dodoublecross: false dobkgfromsideband: false From 52c89f77bc9654c06209c7d0353353f4b221a61b Mon Sep 17 00:00:00 2001 From: saganatt <8majak8@gmail.com> Date: Fri, 22 Nov 2024 16:43:48 +0100 Subject: [PATCH 08/16] Remove redundant hfraction. Add propagated statistical uncertainties to dd result. --- machine_learning_hep/hf_analysis_utils.py | 57 ++++++++++++++++++++++- machine_learning_hep/hf_pt_spectrum.py | 41 ++++++++-------- 2 files changed, 78 insertions(+), 20 deletions(-) diff --git a/machine_learning_hep/hf_analysis_utils.py b/machine_learning_hep/hf_analysis_utils.py index cb6d547f43..f07e60b327 100644 --- a/machine_learning_hep/hf_analysis_utils.py +++ b/machine_learning_hep/hf_analysis_utils.py @@ -16,7 +16,7 @@ file: hf_analysis_utils.py brief: script with miscellanea utils methods for the HF analyses author: Fabrizio Grosa , CERN -Macro committed and manteined in O2Physics: +Macro committed and manteined in O2Physics: https://github.com/AliceO2Group/O2Physics/tree/master/PWGHF/D2H/Macros """ @@ -237,6 +237,10 @@ def compute_fraction_nb( / rawy / sigma_mb ) + print(f"Nb pp fraction {i_sigma} raa ratio {i_raa_ratio} sigma {sigma} " + f"delta_pt {delta_pt} delta_y {delta_y} " \ + f"acceff other {acc_eff_other} b_ratio {b_ratio} n_events {n_events} " \ + f"rawyields {rawy} sigmamb {sigma_mb} final frac {frac_cent}") else: # p-Pb or Pb-Pb: iterative evaluation of Raa needed delta_raa = 1.0 while delta_raa > 1.0e-3: @@ -280,6 +284,10 @@ def compute_fraction_nb( / rawy / sigma_mb ) + print(f"Nb pp fraction {i_sigma} raa ratio {i_raa_ratio} sigma {sigma} " + f"delta_pt {delta_pt} delta_y {delta_y} " \ + f"acceff other {acc_eff_other} b_ratio {b_ratio} n_events {n_events} " \ + f"rawyields {rawy} sigmamb {sigma_mb} final frac {frac_cent}") else: # p-Pb or Pb-Pb: iterative evaluation of Raa needed delta_raa = 1.0 frac_tmp = 1.0 @@ -321,6 +329,53 @@ def compute_fraction_nb( return frac +# pylint: disable=too-many-branches,too-many-arguments,too-many-locals,invalid-name +def compute_fraction_dd( + acc_eff_same, + acc_eff_other, + corryields_same, + corryields_other, + cov_same, + cov_other, + cov_comb +): + """ + Method to get fraction of prompt / FD fraction with data-driven method + + Parameters + ---------- + - acc_eff_same: efficiency times acceptance of prompt (non-prompt) D + - acc_eff_other: efficiency times acceptance of non-prompt (prompt) D + - corryields_same: corrected yield of prompt (non-prompt) D computed with cut variation + - corryields_other: corrected yield of non-prompt (prompt) D computed with cut variation + - cov_same: covariance of prompt (non-prompt) D computed with cut variation + - cov_other: covariance of non-prompt (prompt) D computed with cut variation + - cov_comb: covariance of prompt and non-prompt D computed with cut variation + + Returns + ---------- + - frac: list of fraction of prompt (non-prompt) D (central, min, max) + where min = frac - unc, max = frac + unc, and unc is the statistical + uncertainty propagated from efficiency and corrected yield uncertainties + """ + yield_times_acceff_same = corryields_same * acc_eff_same + yield_times_acceff_other = corryields_other * acc_eff_other + frac_v = yield_times_acceff_same / (yield_times_acceff_same + yield_times_acceff_other) + print(f"same yield times acceff: {yield_times_acceff_same} " \ + f"other {yield_times_acceff_other} final frac: {frac_v}") + + denom = (yield_times_acceff_same + yield_times_acceff_other) ** 2 + der_same_same = (acc_eff_same * (yield_times_acceff_same + yield_times_acceff_other) - \ + acc_eff_same**2 * corryields_same) / denom + der_same_other = -acc_eff_same * acc_eff_other * corryields_same / denom + unc = np.sqrt(der_same_same**2 * cov_same + der_same_other * cov_other + \ + 2 * der_same_same * der_same_other * cov_comb) + print(f"denom {denom} der_same_same {der_same_same} der_same_other {der_same_other} " \ + f"cov same {cov_same} cov other {cov_other} cov comb {cov_comb} final unc {unc}") + + return [frac_v, frac_v - unc, frac_v + unc] + + def get_hist_binlimits(histo): """ Method to retrieve bin limits of ROOT.TH1 diff --git a/machine_learning_hep/hf_pt_spectrum.py b/machine_learning_hep/hf_pt_spectrum.py index 8e98dce602..bc34749946 100644 --- a/machine_learning_hep/hf_pt_spectrum.py +++ b/machine_learning_hep/hf_pt_spectrum.py @@ -42,6 +42,7 @@ compute_crosssection, compute_fraction_fc, compute_fraction_nb, + compute_fraction_dd, get_hist_binlimits, ) @@ -106,13 +107,15 @@ def hf_pt_spectrum(channel, # pylint: disable=too-many-locals, too-many-argument histos = {} - source_name = "corryields_fdd" if frac_method == "dd" else "FONLL" - histos[source_name] = {"prompt": {}, "nonprompt": {}} with TFile.Open(input_fonll_or_fdd_pred) as infile_pred: if frac_method == "dd": - histos["corryields_fdd"]["prompt"] = infile_pred.Get(f"hCorrYieldsPrompt") - histos["corryields_fdd"]["nonprompt"] = infile_pred.Get(f"hCorrYieldsNonPrompt") + histos["corryields_fdd"] = [infile_pred.Get("hCorrYieldsPrompt"), + infile_pred.Get("hCorrYieldsNonPrompt")] + histos["covariances"] = [infile_pred.Get("hCovPromptPrompt"), + infile_pred.Get("hCovNonPromptNonPrompt"), + infile_pred.Get("hCovPromptNonPrompt")] else: + histos["FONLL"] = {"prompt": {}, "nonprompt": {}} for pred in ("central", "min", "max"): histos["FONLL"]["nonprompt"][pred] = infile_pred.Get( f"{fonll_hist_name[channel]}fromBpred_{pred}_corr" @@ -193,12 +196,6 @@ def hf_pt_spectrum(channel, # pylint: disable=too-many-locals, too-many-argument 0, 1 ) - hfraction = TH1F( - "hfraction", - f";{axistit_pt};{axistit_fprompt}", - len(ptlims["rawyields"]) - 1, - ptlims["rawyields"], - ) for i_pt, (ptmin, ptmax) in enumerate( zip(ptlims["rawyields"][:-1], ptlims["rawyields"][1:]) @@ -255,12 +252,20 @@ def hf_pt_spectrum(channel, # pylint: disable=too-many-locals, too-many-argument elif frac_method == "ext": frac[0] = prompt_frac[i_pt] elif frac_method == "dd": - yield_times_acceff_prompt = histos["corryields_fdd"]["prompt"].GetBinContent(i_pt + 1) * eff_times_acc_prompt - yield_times_acceff_nonprompt = histos["corryields_fdd"]["nonprompt"].GetBinContent(i_pt + 1) * eff_times_acc_nonprompt - yield_times_acceff_own = yield_times_acceff_prompt if crosssec_prompt else yield_times_acceff_nonprompt - yield_times_acceff_other = yield_times_acceff_nonprompt if crosssec_prompt else yield_times_acceff_prompt - frac_v = yield_times_acceff_own / (yield_times_acceff_own + yield_times_acceff_other) - frac = [frac_v] * 3 + eff_times_acc_own = eff_times_acc_prompt if crosssec_prompt else eff_times_acc_nonprompt + eff_times_acc_other = eff_times_acc_nonprompt if crosssec_prompt else eff_times_acc_prompt + pnp_ind = 0 if crosssec_prompt else 1 + print(f'bin {i_pt + 1} corr yields prompt {histos["corryields_fdd"][0].GetBinContent(i_pt + 1)} ' \ + f'non-prompt {histos["corryields_fdd"][1].GetBinContent(i_pt + 1)} ' \ + f'eff prompt {eff_times_acc_prompt} non-prompt {eff_times_acc_nonprompt}') + frac = compute_fraction_dd( + eff_times_acc_own, + eff_times_acc_other, + histos["corryields_fdd"][pnp_ind].GetBinContent(i_pt + 1), + histos["corryields_fdd"][1 - pnp_ind].GetBinContent(i_pt + 1), + histos["covariances"][pnp_ind].GetBinContent(i_pt + 1), + histos["covariances"][1 - pnp_ind].GetBinContent(i_pt + 1), + histos["covariances"][2].GetBinContent(i_pt + 1)) # compute cross section times BR crosssec, crosssec_unc = compute_crosssection( @@ -284,11 +289,10 @@ def hf_pt_spectrum(channel, # pylint: disable=too-many-locals, too-many-argument if frac_method != "ext": output_prompt.append(frac[0]) gfraction.SetPoint(i_pt, pt_cent, frac[0]) + print(f"Errors in gfraction: {frac[0] - frac[1]}, {frac[2] - frac[0]}") gfraction.SetPointError( i_pt, pt_delta / 2, pt_delta / 2, frac[0] - frac[1], frac[2] - frac[0] ) - hfraction.SetBinContent(i_pt + 1, frac[0]) - hfraction.SetBinError(i_pt + 1, 0.0) c = TCanvas("c", "c", 600, 800) c.Divide (1, 2) @@ -308,7 +312,6 @@ def hf_pt_spectrum(channel, # pylint: disable=too-many-locals, too-many-argument hnorm.Write() if frac_method != "ext": gfraction.Write() - hfraction.Write() for _, value in histos.items(): if isinstance(value, TH1): From 8c261c5147ec4d77b274647b29a4b78db9aafd5c Mon Sep 17 00:00:00 2001 From: saganatt <8majak8@gmail.com> Date: Wed, 27 Nov 2024 17:40:41 +0100 Subject: [PATCH 09/16] Narrow range for low pt reproduction --- .../data_run3/database_ml_parameters_LcToPKPi_multiclass.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/machine_learning_hep/data/data_run3/database_ml_parameters_LcToPKPi_multiclass.yml b/machine_learning_hep/data/data_run3/database_ml_parameters_LcToPKPi_multiclass.yml index f1e1dbffd7..d88a4bf9da 100644 --- a/machine_learning_hep/data/data_run3/database_ml_parameters_LcToPKPi_multiclass.yml +++ b/machine_learning_hep/data/data_run3/database_ml_parameters_LcToPKPi_multiclass.yml @@ -429,7 +429,7 @@ LcpKpi: results: [LHC22pp_mc/Results/prod_LHC24d3b/resultsmc] #list of periods resultsallp: LHC22pp_mc/Results/prod_LHC24d3b/resultsmctot - mass_fit_lim: [2.00, 2.47] # region for the fit of the invariant mass distribution [GeV/c^2] + mass_fit_lim: [2.10, 2.47] # region for the fit of the invariant mass distribution [GeV/c^2] bin_width: 0.001 # bin width of the invariant mass histogram # To initialize the individual fits in pT bins # Decide whether to take the sigma from MC or data for individual fits From 8555f5b6e2d30d49b748f02d702ea7244cbd1161 Mon Sep 17 00:00:00 2001 From: saganatt <8majak8@gmail.com> Date: Mon, 2 Dec 2024 13:36:30 +0100 Subject: [PATCH 10/16] Add dd_N method to calculate cross section from final corrected yields from cut variation --- machine_learning_hep/hf_analysis_utils.py | 9 +++++++++ machine_learning_hep/hf_pt_spectrum.py | 7 +++++-- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/machine_learning_hep/hf_analysis_utils.py b/machine_learning_hep/hf_analysis_utils.py index f07e60b327..9b97ebd7f6 100644 --- a/machine_learning_hep/hf_analysis_utils.py +++ b/machine_learning_hep/hf_analysis_utils.py @@ -62,6 +62,15 @@ def compute_crosssection( if rawy <= 0: crosssection = -9999 crosssec_unc = -1 + elif method_frac == "dd_N": + crosssection = ( + frac + * sigma_mb + / (2 * delta_pt * delta_y * n_events * b_ratio) + ) + # TODO: How to calculate the uncertainty? + # frac_unc / frac * crosssection? + crosssec_unc = 0. else: crosssection = ( rawy diff --git a/machine_learning_hep/hf_pt_spectrum.py b/machine_learning_hep/hf_pt_spectrum.py index bc34749946..9105a91262 100644 --- a/machine_learning_hep/hf_pt_spectrum.py +++ b/machine_learning_hep/hf_pt_spectrum.py @@ -89,7 +89,7 @@ def hf_pt_spectrum(channel, # pylint: disable=too-many-locals, too-many-argument print(f"\033[91mERROR: channel {channel} not supported. Exit\033[0m") sys.exit(2) - if frac_method not in ["Nb", "fc", "ext", "dd"]: + if frac_method not in ["Nb", "fc", "ext", "dd", "dd_N"]: print( f"\033[91mERROR: method to subtract nonprompt" f" {frac_method} not supported. Exit\033[0m" @@ -108,7 +108,7 @@ def hf_pt_spectrum(channel, # pylint: disable=too-many-locals, too-many-argument histos = {} with TFile.Open(input_fonll_or_fdd_pred) as infile_pred: - if frac_method == "dd": + if frac_method in ("dd", "dd_N"): histos["corryields_fdd"] = [infile_pred.Get("hCorrYieldsPrompt"), infile_pred.Get("hCorrYieldsNonPrompt")] histos["covariances"] = [infile_pred.Get("hCovPromptPrompt"), @@ -266,6 +266,9 @@ def hf_pt_spectrum(channel, # pylint: disable=too-many-locals, too-many-argument histos["covariances"][pnp_ind].GetBinContent(i_pt + 1), histos["covariances"][1 - pnp_ind].GetBinContent(i_pt + 1), histos["covariances"][2].GetBinContent(i_pt + 1)) + elif frac_method == "dd_N": + frac = [histos["corryields_fdd"][pnp_ind].GetBinContent(i_pt + 1)] * 3 + # compute cross section times BR crosssec, crosssec_unc = compute_crosssection( From 8d2d0339ad245e569ca6696fd0f07c8b442eaadd Mon Sep 17 00:00:00 2001 From: saganatt <8majak8@gmail.com> Date: Mon, 2 Dec 2024 13:37:01 +0100 Subject: [PATCH 11/16] Configure proper merged input files for dd in database and analyser --- machine_learning_hep/analysis/analyzerdhadrons.py | 4 +++- .../data_run3/database_ml_parameters_LcToPKPi_multiclass.yml | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/machine_learning_hep/analysis/analyzerdhadrons.py b/machine_learning_hep/analysis/analyzerdhadrons.py index 5ddee38a41..020417a0f0 100644 --- a/machine_learning_hep/analysis/analyzerdhadrons.py +++ b/machine_learning_hep/analysis/analyzerdhadrons.py @@ -474,16 +474,18 @@ def makenormyields(self): # pylint: disable=import-outside-toplevel, too-many-b yield_filename = self.make_file_path(self.d_resultsallpdata, self.yields_filename, "root", None, [self.case, self.typean]) - yield_filename = "/data8/majak/MLHEP/input-fd-10092024/yields-1224_split_bkg_0.60_0.60_fd_0.00-fixed-sigma.root" + fileouteff = "/data8/majak/crosssec/merged_yields_fdd_approvals_fd_0.00_0.00_0.00_0.00_0.00_0.00_0.00_0.00_0.00.root" if not os.path.exists(yield_filename): self.logger.fatal( "Yield file %s could not be found", yield_filename) fileouteff = f"{self.d_resultsallpmc}/{self.efficiency_filename}{self.case}{self.typean}.root" + fileouteff = "/data8/majak/crosssec/merged_eff_fdd_approvals_fd_0.00_0.00_0.00_0.00_0.00_0.00_0.00_0.00_0.00.root" if not os.path.exists(fileouteff): self.logger.fatal( "Efficiency file %s could not be found", fileouteff) + fileoutcross = "%s/finalcross%s%s.root" % \ (self.d_resultsallpdata, self.case, self.typean) diff --git a/machine_learning_hep/data/data_run3/database_ml_parameters_LcToPKPi_multiclass.yml b/machine_learning_hep/data/data_run3/database_ml_parameters_LcToPKPi_multiclass.yml index d88a4bf9da..50ea3fd7cb 100644 --- a/machine_learning_hep/data/data_run3/database_ml_parameters_LcToPKPi_multiclass.yml +++ b/machine_learning_hep/data/data_run3/database_ml_parameters_LcToPKPi_multiclass.yml @@ -384,7 +384,7 @@ LcpKpi: crosssec_prompt: True # True for prompt, False for non-prompt cctype: "pp" sigmamb: 59.4e+9 # 50.87e+9 pp5TeV, 57.8e+9 pp13TeV, 59.4e+9 pp Run3, pb - inputfonllpred: /data8/majak/systematics/230824/finalCutVarMerged_0-24.root #inputfonllpred: data/fonll/DmesonLcPredictions_13TeV_y05_FFptDepLHCb_BRpythia8_PDG2020.root + inputfonllpred: /data8/majak/fdd-results/07112024-check-approvals/fdd-results-merged-sept-yields-eff/CutVarLc_pp13TeV_LHC24d3.root #inputfonllpred: data/fonll/DmesonLcPredictions_13TeV_y05_FFptDepLHCb_BRpythia8_PDG2020.root dir_general_plots: analysis_plots Run3analysis: From 71f573bfa80aa10818e18ce55436395213197997 Mon Sep 17 00:00:00 2001 From: saganatt <8majak8@gmail.com> Date: Mon, 13 Jan 2025 16:56:30 +0100 Subject: [PATCH 12/16] Working cross section estimators. Only multiclass optimization remains --- machine_learning_hep/analysis/analyzerdhadrons.py | 2 +- .../data_run3/database_ml_parameters_LcToPKPi_multiclass.yml | 5 +++-- machine_learning_hep/hf_analysis_utils.py | 2 +- machine_learning_hep/hf_pt_spectrum.py | 3 ++- 4 files changed, 7 insertions(+), 5 deletions(-) diff --git a/machine_learning_hep/analysis/analyzerdhadrons.py b/machine_learning_hep/analysis/analyzerdhadrons.py index 020417a0f0..bcbf9fc952 100644 --- a/machine_learning_hep/analysis/analyzerdhadrons.py +++ b/machine_learning_hep/analysis/analyzerdhadrons.py @@ -474,7 +474,7 @@ def makenormyields(self): # pylint: disable=import-outside-toplevel, too-many-b yield_filename = self.make_file_path(self.d_resultsallpdata, self.yields_filename, "root", None, [self.case, self.typean]) - fileouteff = "/data8/majak/crosssec/merged_yields_fdd_approvals_fd_0.00_0.00_0.00_0.00_0.00_0.00_0.00_0.00_0.00.root" + yield_filename = "/data8/majak/crosssec/merged_yields_fdd_approvals_fd_0.00_0.00_0.00_0.00_0.00_0.00_0.00_0.00_0.00.root" if not os.path.exists(yield_filename): self.logger.fatal( "Yield file %s could not be found", yield_filename) diff --git a/machine_learning_hep/data/data_run3/database_ml_parameters_LcToPKPi_multiclass.yml b/machine_learning_hep/data/data_run3/database_ml_parameters_LcToPKPi_multiclass.yml index 50ea3fd7cb..438d72c99d 100644 --- a/machine_learning_hep/data/data_run3/database_ml_parameters_LcToPKPi_multiclass.yml +++ b/machine_learning_hep/data/data_run3/database_ml_parameters_LcToPKPi_multiclass.yml @@ -380,11 +380,12 @@ LcpKpi: probcutoptimal: [[0.02, 0.0, %fd12%], [0.02, 0.0, %fd23%], [0.02, 0.0, %fd34%], [0.06, 0.0, %fd45%], [0.06, 0.0, %fd56%], [0.10, 0.0, %fd68%], [0.20, 0.0, %fd812%], [%bkg1216%, 0.0, %fd1216%], [%bkg1624%, 0.0, %fd1624%]] #list of nbins analysis: anahptspectrum: "LctopKpi" #D0Kpi, DplusKpipi, DstarD0pi, DsKKpi, LctopKpi, LcK0Sp - fd_method: "dd" # fc, Nb + fd_method: "dd" # fc, Nb, dd, dd_N crosssec_prompt: True # True for prompt, False for non-prompt cctype: "pp" sigmamb: 59.4e+9 # 50.87e+9 pp5TeV, 57.8e+9 pp13TeV, 59.4e+9 pp Run3, pb - inputfonllpred: /data8/majak/fdd-results/07112024-check-approvals/fdd-results-merged-sept-yields-eff/CutVarLc_pp13TeV_LHC24d3.root #inputfonllpred: data/fonll/DmesonLcPredictions_13TeV_y05_FFptDepLHCb_BRpythia8_PDG2020.root + inputfonllpred: /data8/majak/fdd-results/07112024-check-approvals/fdd-results-merged-sept-yields-eff/CutVarLc_pp13TeV_LHC24d3.root + #inputfonllpred: data/fonll/DmesonLcPredictions_13TeV_y05_FFptDepLHCb_BRpythia8_PDG2020.root dir_general_plots: analysis_plots Run3analysis: diff --git a/machine_learning_hep/hf_analysis_utils.py b/machine_learning_hep/hf_analysis_utils.py index 9b97ebd7f6..251f302458 100644 --- a/machine_learning_hep/hf_analysis_utils.py +++ b/machine_learning_hep/hf_analysis_utils.py @@ -377,7 +377,7 @@ def compute_fraction_dd( der_same_same = (acc_eff_same * (yield_times_acceff_same + yield_times_acceff_other) - \ acc_eff_same**2 * corryields_same) / denom der_same_other = -acc_eff_same * acc_eff_other * corryields_same / denom - unc = np.sqrt(der_same_same**2 * cov_same + der_same_other * cov_other + \ + unc = np.sqrt(der_same_same**2 * cov_same + der_same_other**2 * cov_other + \ 2 * der_same_same * der_same_other * cov_comb) print(f"denom {denom} der_same_same {der_same_same} der_same_other {der_same_other} " \ f"cov same {cov_same} cov other {cov_other} cov comb {cov_comb} final unc {unc}") diff --git a/machine_learning_hep/hf_pt_spectrum.py b/machine_learning_hep/hf_pt_spectrum.py index 9105a91262..d5210828da 100644 --- a/machine_learning_hep/hf_pt_spectrum.py +++ b/machine_learning_hep/hf_pt_spectrum.py @@ -206,7 +206,7 @@ def hf_pt_spectrum(channel, # pylint: disable=too-many-locals, too-many-argument rawy_unc = histos["rawyields"].GetBinError(i_pt + 1) eff_times_acc_prompt = histos["acceffp"].GetBinContent(i_pt + 1) eff_times_acc_nonprompt = histos["acceffnp"].GetBinContent(i_pt + 1) - if frac_method != "dd": + if frac_method not in ("dd", "dd_N"): ptmin_fonll = ( histos["FONLL"]["nonprompt"]["central"].GetXaxis().FindBin(ptmin * 1.0001) ) @@ -267,6 +267,7 @@ def hf_pt_spectrum(channel, # pylint: disable=too-many-locals, too-many-argument histos["covariances"][1 - pnp_ind].GetBinContent(i_pt + 1), histos["covariances"][2].GetBinContent(i_pt + 1)) elif frac_method == "dd_N": + pnp_ind = 0 if crosssec_prompt else 1 frac = [histos["corryields_fdd"][pnp_ind].GetBinContent(i_pt + 1)] * 3 From 59b33adc36d8175801f8d826aab5e639d9073b84 Mon Sep 17 00:00:00 2001 From: saganatt <8majak8@gmail.com> Date: Wed, 5 Mar 2025 10:50:55 +0100 Subject: [PATCH 13/16] Defer opening of nevents files --- machine_learning_hep/analysis/analyzerdhadrons.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/machine_learning_hep/analysis/analyzerdhadrons.py b/machine_learning_hep/analysis/analyzerdhadrons.py index bcbf9fc952..5adacd2775 100644 --- a/machine_learning_hep/analysis/analyzerdhadrons.py +++ b/machine_learning_hep/analysis/analyzerdhadrons.py @@ -495,10 +495,10 @@ def makenormyields(self): # pylint: disable=import-outside-toplevel, too-many-b histonorm = TH1F("histonorm", "histonorm", 1, 0, 1) - filemass = TFile.Open(self.n_filemass) - hevents = filemass.Get("all_events") - hselevents = filemass.Get("sel_events") if self.p_nevents is None: + filemass = TFile.Open(self.n_filemass) + hevents = filemass.Get("all_events") + hselevents = filemass.Get("sel_events") norm, selnorm = self.calculate_norm(self.logger, hevents, hselevents) histonorm.SetBinContent(1, selnorm) self.logger.warning("Number of events %d", norm) From 95c2ca854f2cef7025bab74a2bfd19f9affb45af Mon Sep 17 00:00:00 2001 From: saganatt <8majak8@gmail.com> Date: Tue, 15 Jul 2025 13:23:58 +0200 Subject: [PATCH 14/16] Remove spurious lines --- machine_learning_hep/hf_analysis_utils.py | 4 ---- machine_learning_hep/hf_pt_spectrum.py | 1 - 2 files changed, 5 deletions(-) diff --git a/machine_learning_hep/hf_analysis_utils.py b/machine_learning_hep/hf_analysis_utils.py index cf1538f776..b56db71623 100644 --- a/machine_learning_hep/hf_analysis_utils.py +++ b/machine_learning_hep/hf_analysis_utils.py @@ -202,10 +202,6 @@ def compute_fraction_nb( frac_cent = ( 1 - sigma * delta_pt * delta_y * acc_eff_other * b_ratio * n_events * 2 / rawy / sigma_mb ) - print(f"Nb pp fraction {i_sigma} raa ratio {i_raa_ratio} sigma {sigma} " - f"delta_pt {delta_pt} delta_y {delta_y} " \ - f"acceff other {acc_eff_other} b_ratio {b_ratio} n_events {n_events} " \ - f"rawyields {rawy} sigmamb {sigma_mb} final frac {frac_cent}") else: # p-Pb or Pb-Pb: iterative evaluation of Raa needed delta_raa = 1.0 while delta_raa > 1.0e-3: diff --git a/machine_learning_hep/hf_pt_spectrum.py b/machine_learning_hep/hf_pt_spectrum.py index dd5547e902..f8a4cacc57 100644 --- a/machine_learning_hep/hf_pt_spectrum.py +++ b/machine_learning_hep/hf_pt_spectrum.py @@ -107,7 +107,6 @@ def hf_pt_spectrum( histos = {} - infile_pred = TFile.Open(input_fonll_or_fdd_pred) with TFile.Open(input_fonll_or_fdd_pred) as infile_pred: if frac_method in ("dd", "dd_N"): histos["corryields_fdd"] = [infile_pred.Get("hCorrYieldsPrompt"), infile_pred.Get("hCorrYieldsNonPrompt")] From feb914e04e8dd80f05dfd6560118a4f616bb9e41 Mon Sep 17 00:00:00 2001 From: saganatt <8majak8@gmail.com> Date: Tue, 15 Jul 2025 13:25:28 +0200 Subject: [PATCH 15/16] Exclude changes in database and submission files --- ...base_ml_parameters_LcToPKPi_multiclass.yml | 69 ++++++++----------- machine_learning_hep/submission/analyzer.yml | 24 +++---- 2 files changed, 39 insertions(+), 54 deletions(-) diff --git a/machine_learning_hep/data/data_run3/database_ml_parameters_LcToPKPi_multiclass.yml b/machine_learning_hep/data/data_run3/database_ml_parameters_LcToPKPi_multiclass.yml index f4e46544a2..7920531766 100644 --- a/machine_learning_hep/data/data_run3/database_ml_parameters_LcToPKPi_multiclass.yml +++ b/machine_learning_hep/data/data_run3/database_ml_parameters_LcToPKPi_multiclass.yml @@ -26,8 +26,6 @@ LcpKpi: sel_skim_binmax: [2, 4, 6, 8, 12, 24] #list of nbins apply_yptacccut: false var_binning: fPt - do_ptshape: true - var_binning_ptshape: fPtWeighted dofullevtmerge: false var_cand: fCandidateSelFlag var_swap: fIsCandidateSwapped @@ -71,7 +69,6 @@ LcpKpi: O2hf3psel: level: mc vars: [fCandidateSelFlag] - O2hf3pml: [fMlScores] filter: "fPt > 1." extra: fY: log((sqrt(2.28646**2 + (fPt * cosh(fEta))**2) + fPt * sinh(fEta)) / sqrt(2.28646**2 + fPt**2)) #TODO : change mass or make sure Lc mass is updated @@ -82,9 +79,6 @@ LcpKpi: ismcprompt: {var: fOriginMcRec, req: [[0], []], level: mc} ismcfd: {var: fOriginMcRec, req: [[1], []], level: mc} swap: {cand: fCandidateSelFlag, var_swap: fIsCandidateSwapped, vars: [ismcsignal, ismcprompt, ismcfd], level: mc} - extract_component: - - {var: fMlScores, newvar: fMlBkgScore, component: 0} - - {var: fMlScores, newvar: fMlNonPromptScore, component: 2} gen: level: mc @@ -229,22 +223,18 @@ LcpKpi: - 20. files_names: namefile_unmerged_tree: AO2D.root - namefile_reco: AnalysisResultsReco.parquet - namefile_evt: AnalysisResultsEvt.parquet namefile_collcnt: AnalysisResultsCollCnt.parquet namefile_evtvalroot: AnalysisResultsROOTEvtVal.root namefile_evtorig: AnalysisResultsEvtOrig.parquet namefile_gen: AnalysisResultsGen.parquet namefile_reco_applieddata: AnalysisResultsRecoAppliedData.parquet namefile_reco_appliedmc: AnalysisResultsRecoAppliedMC.parquet - namefile_mcweights: NonPromptLcPtWeigths.root + namefile_mcweights: mcweights.root treeoutput: "Lctree" histofilename: "masshisto.root" efffilename: "effhisto.root" - histoweights: "ptWeigths" respfilename: "resphisto.root" crossfilename: "cross_section_tot.root" - namefile_bccnt: "bccnt.root" multi: data: @@ -256,13 +246,13 @@ LcpKpi: seedmerge: [12] #list of periods period: [LHC22o] #list of periods select_period: [1] - prefix_dir: /data8/majak/MLHEP-data/23082024/ - unmerged_tree_dir: [LHC22o_ml/alice/cern.ch/user/a/alihyperloop/jobs/0064] #list of periods - pkl: [LHC22pp_ml/period_LHC22o/pkldata] #list of periods - pkl_skimmed: [LHC22pp_ml_16/period_LHC22o/pklskdata] #list of periods - pkl_skimmed_merge_for_ml: [LHC22pp_ml_16/period_LHC22o/pklskmldata] #list of periods - pkl_skimmed_merge_for_ml_all: LHC22pp_ml_16/mltotdata - pkl_evtcounter_all: LHC22pp_ml_16/evttotdata + prefix_dir: /data2/MLhep/ + unmerged_tree_dir: [real/train_158254/alice/cern.ch/user/a/alihyperloop/jobs/0029] #list of periods + pkl: [LHC22pp/period_LHC22o/pkldata] #list of periods + pkl_skimmed: [LHC22pp/period_LHC22o/pklskdata] #list of periods + pkl_skimmed_merge_for_ml: [LHC22pp/period_LHC22o/pklskmldata] #list of periods + pkl_skimmed_merge_for_ml_all: LHC22pp/mltotdata + pkl_evtcounter_all: LHC22pp/evttotdata #select_jobs: [[hy_189959], [hy_189000]] mcreweights: [../Analyses] mc: @@ -305,7 +295,7 @@ LcpKpi: binmax: [2, 4, 6, 8, 12, 24] # must be equal to sel_skim_binmax (sel_skim_binmin bins) mltype: MultiClassification ncorescrossval: 10 - prefix_dir_ml: /data8/majak/MLHEP/results-data-1006-finer-bins-1224-merged/ + prefix_dir_ml: /data2/MLhep/ mlplot: mlplot # to be removed mlout: mlout # to be removed @@ -329,7 +319,7 @@ LcpKpi: mlapplication: data: - prefix_dir_app: /data8/majak/MLHEP/results-data-2308-finer-bins-1224-merged-16/ + prefix_dir_app: /data2/MLhep/ pkl_skimmed_dec: [LHC22pp/MLapplication/prod_LHC22o/skpkldecdata] #list of periods pkl_skimmed_decmerged: [LHC22pp/MLapplication/prod_LHC22o/skpkldecdatamerged] #list of periods mc: @@ -339,17 +329,15 @@ LcpKpi: modelname: xgboost modelsperptbin: [xgboost_classifierLcpKpi_dfselection_fPt_1.0_2.0.sav, xgboost_classifierLcpKpi_dfselection_fPt_2.0_4.0.sav, xgboost_classifierLcpKpi_dfselection_fPt_4.0_6.0.sav, xgboost_classifierLcpKpi_dfselection_fPt_6.0_8.0.sav, xgboost_classifierLcpKpi_dfselection_fPt_8.0_12.0.sav, xgboost_classifierLcpKpi_dfselection_fPt_12.0_24.0.sav] probcutpresel: - data: [[0.5, 0.0, 0.0], [0.5, 0.0, 0.0], [0.5, 0.0, 0.0], [0.5, 0.0, 0.0], [0.5, 0.0, 0.0], [0.5, 0.0, 0.0], [0.8, 0.0, 0.0], [0.8, 0.0, 0.0], [0.8, 0.0, 0.0]] #list of nbins - mc: [[0.5, 0.0, 0.0], [0.5, 0.0, 0.0], [0.5, 0.0, 0.0], [0.5, 0.0, 0.0], [0.5, 0.0, 0.0], [0.5, 0.0, 0.0], [0.8, 0.0, 0.0], [0.8, 0.0, 0.0], [0.8, 0.0, 0.0]] #list of nbins - probcutoptimal: [[0.02, 0.0, %fd12%], [0.02, 0.0, %fd23%], [0.02, 0.0, %fd34%], [0.06, 0.0, %fd45%], [0.06, 0.0, %fd56%], [0.10, 0.0, %fd68%], [0.20, 0.0, %fd812%], [%bkg1216%, 0.0, %fd1216%], [%bkg1624%, 0.0, %fd1624%]] #list of nbins + data: [[0.5, 0.0, 0.0], [0.5, 0.0, 0.0], [0.5, 0.0, 0.0], [0.5, 0.0, 0.0], [0.7, 0.0, 0.0], [0.7, 0.0, 0.0]] #list of nbins + mc: [[0.5, 0.0, 0.0], [0.5, 0.0, 0.0], [0.5, 0.0, 0.0], [0.5, 0.0, 0.0], [0.7, 0.0, 0.0], [0.7, 0.0, 0.0]] #list of nbins + probcutoptimal: [[0.2, 0.15, 0.0], [0.2, 0.15, 0.0], [0.2, 0.15, 0.0], [0.2, 0.15, 0.0], [0.5, 0.2, 0.0], [0.5, 0.2, 0.0]] #list of nbins analysis: - anahptspectrum: "LctopKpi" #D0Kpi, DplusKpipi, DstarD0pi, DsKKpi, LctopKpi, LcK0Sp - fd_method: "dd" # fc, Nb, dd, dd_N - crosssec_prompt: True # True for prompt, False for non-prompt + anahptspectrum: "LctopKpi" # D0Kpi, DplusKpipi, DstarD0pi, DsKKpi, LctopKpi, LcK0Sp + fd_method: "Nb" # fc, Nb cctype: "pp" sigmamb: 59.4e+9 # 50.87e+9 pp5TeV, 57.8e+9 pp13TeV, 59.4e+9 pp Run3, pb - inputfonllpred: /data8/majak/fdd-results/07112024-check-approvals/fdd-results-merged-sept-yields-eff/CutVarLc_pp13TeV_LHC24d3.root - #inputfonllpred: data/fonll/DmesonLcPredictions_13TeV_y05_FFptDepLHCb_BRpythia8_PDG2020.root + inputfonllpred: data/fonll/DmesonLcPredictions_13TeV_y05_FFptDepLHCb_BRpythia8_PDG2020.root dir_general_plots: analysis_plots Run3analysis: @@ -361,17 +349,14 @@ LcpKpi: sel_binmax2: [2, 4, 6, 8, 12, 24] #list of nbins var_binning2: null triggerbit: '' - use_cuts: false + use_cuts: False cuts: - - "fMlNonPromptScore >= %fd12%" - - "fMlNonPromptScore >= %fd23%" - - "fMlNonPromptScore >= %fd34%" - - "fMlNonPromptScore >= %fd45%" - - "fMlNonPromptScore >= %fd56%" - - "fMlNonPromptScore >= %fd68%" - - "fMlBkgScore <= 0.20 and fMlNonPromptScore >= %fd812%" - - "fMlBkgScore <= %bkg1216% and fMlNonPromptScore >= %fd1216%" - - "fMlBkgScore <= %bkg1624% and fMlNonPromptScore >= %fd1624%" + - "fDecayLength > 0.02" + - "fDecayLength > 0.02" + - "fDecayLength > 0.02" + - "fDecayLength > 0.02" + - "fDecayLength > 0.02" + - "fDecayLength > 0.02" sel_an_binmin: [1, 2, 4, 6, 8, 12] sel_an_binmax: [2, 4, 6, 8, 12, 24] @@ -385,7 +370,7 @@ LcpKpi: data: runselection: [null] #FIXME - prefix_dir_res: /data8/majak/MLHEP/%resdir%/ + prefix_dir_res: /data2/MLhep/ results: [LHC22pp/Results/prod_LHC22o/resultsdata] #list of periods resultsallp: LHC22pp/Results/resultsdatatot mc: @@ -427,7 +412,7 @@ LcpKpi: fitcase: Lc latexnamehadron: "#Lambda_{c}^{pK#pi}" latexbin2var: "n_{trkl}" - nevents: 47092223770 + nevents: null dodoublecross: false dobkgfromsideband: false @@ -436,7 +421,7 @@ LcpKpi: useperiod: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] #period from where to define prob cuts ncutvar: 10 #number of looser and tighter variations maxperccutvar: 0.25 #max diff in efficiency for loosest/tightest var - cutvarminrange: [[0.1, 0.3], [0.1, 0.3], [0.1, 0.3], [0.1, 0.3], [0.1, 0.3], [0.1, 0.3], [0.1, 0.3], [0.7, 0.9], [0.7, 0.9]] #Min starting point for scan - cutvarmaxrange: [[0.7, 0.9], [0.7, 0.9], [0.7, 0.9], [0.7, 0.9], [0.7, 0.9], [0.7, 0.9], [0.7, 0.9], [0.7, 0.9], [0.7, 0.9]] #Max starting point for scan + cutvarminrange: [[0.1, 0.3], [0.1, 0.3], [0.1, 0.3], [0.1, 0.3], [0.1, 0.3], [0.7, 0.9]] #Min starting point for scan + cutvarmaxrange: [[0.7, 0.9], [0.7, 0.9], [0.7, 0.9], [0.7, 0.9], [0.7, 0.9], [0.7, 0.9]] #Max starting point for scan fixedmean: True #Fix mean cutvar histo to central fit fixedsigma: True #Fix sigma cutvar histo to central fit diff --git a/machine_learning_hep/submission/analyzer.yml b/machine_learning_hep/submission/analyzer.yml index 19161a1abc..e08341b099 100644 --- a/machine_learning_hep/submission/analyzer.yml +++ b/machine_learning_hep/submission/analyzer.yml @@ -60,22 +60,22 @@ analysis: # Do only merged (false) doperperiod: false data: - histomass: true # processer: process_histomass + histomass: false # processer: process_histomass mc: - histomass: true # processer: process_histomass - efficiency: true # processer: process_efficiency + histomass: false # processer: process_histomass + efficiency: false # processer: process_efficiency steps: # analyzer methods to run (uncomment to activate) ##### Inclusive hadrons - #fit: - efficiency: - makenormyields: + # fit: + # efficiency: + # makenormyields: ##### Jets - #init: - #calculate_efficiencies: - #qa: - #fit: - #estimate_feeddown: - #analyze_with_sidesub: + init: + calculate_efficiencies: + qa: + fit: + estimate_feeddown: + analyze_with_sidesub: # analyze_with_sigextr: systematics: From fe2b8346a669266ea7eb2625c9fad9eab85be31d Mon Sep 17 00:00:00 2001 From: saganatt <8majak8@gmail.com> Date: Tue, 15 Jul 2025 13:29:15 +0200 Subject: [PATCH 16/16] Fixed next lines --- machine_learning_hep/analysis/analyzerdhadrons.py | 3 +-- machine_learning_hep/hf_pt_spectrum.py | 1 - 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/machine_learning_hep/analysis/analyzerdhadrons.py b/machine_learning_hep/analysis/analyzerdhadrons.py index 2b891dec63..24672b0303 100644 --- a/machine_learning_hep/analysis/analyzerdhadrons.py +++ b/machine_learning_hep/analysis/analyzerdhadrons.py @@ -495,7 +495,6 @@ def makenormyields(self): # pylint: disable=import-outside-toplevel, too-many-b self.logger.fatal("Yield file %s could not be found", yield_filename) fileouteff = f"{self.d_resultsallpmc}/{self.efficiency_filename}{self.case}{self.typean}.root" - fileouteff = "/data8/majak/crosssec/merged_eff_fdd_approvals_fd_0.00_0.00_0.00_0.00_0.00_0.00_0.00_0.00_0.00.root" if not os.path.exists(fileouteff): self.logger.fatal("Efficiency file %s could not be found", fileouteff) @@ -503,7 +502,7 @@ def makenormyields(self): # pylint: disable=import-outside-toplevel, too-many-b namehistoeffprompt = "eff" namehistoefffeed = "eff_fd" - nameyield = "hRawYields" + nameyield = "hyields0" histonorm = TH1F("histonorm", "histonorm", 1, 0, 1) diff --git a/machine_learning_hep/hf_pt_spectrum.py b/machine_learning_hep/hf_pt_spectrum.py index f8a4cacc57..efaf891870 100644 --- a/machine_learning_hep/hf_pt_spectrum.py +++ b/machine_learning_hep/hf_pt_spectrum.py @@ -43,7 +43,6 @@ compute_fraction_dd, compute_fraction_fc, compute_fraction_nb, - compute_fraction_dd, get_hist_binlimits, )