From 733871a39c906540e633c2a20884838a6de63346 Mon Sep 17 00:00:00 2001 From: Robert Forynski Date: Thu, 13 Nov 2025 19:34:43 +0000 Subject: [PATCH 01/15] feat: Add PIDFeatureExtractor task and update Tools/CMakeLists.txt --- Tools/CMakeLists.txt | 1 + Tools/PIDFeatureExtractor/CMakeLists.txt | 16 + .../PIDFeatureExtractor.cxx | 518 +++++++++++ Tools/PIDFeatureExtractor/README.md | 853 ++++++++++++++++++ .../myConfigExtractor.json | 125 +++ Tools/PIDFeatureExtractor/run.sh | 27 + 6 files changed, 1540 insertions(+) create mode 100644 Tools/PIDFeatureExtractor/CMakeLists.txt create mode 100644 Tools/PIDFeatureExtractor/PIDFeatureExtractor.cxx create mode 100644 Tools/PIDFeatureExtractor/README.md create mode 100644 Tools/PIDFeatureExtractor/myConfigExtractor.json create mode 100755 Tools/PIDFeatureExtractor/run.sh diff --git a/Tools/CMakeLists.txt b/Tools/CMakeLists.txt index 2b1de764169..ad870181e8c 100644 --- a/Tools/CMakeLists.txt +++ b/Tools/CMakeLists.txt @@ -12,3 +12,4 @@ add_subdirectory(PIDML) add_subdirectory(ML) add_subdirectory(KFparticle) +add_subdirectory(PIDFeatureExtractor) diff --git a/Tools/PIDFeatureExtractor/CMakeLists.txt b/Tools/PIDFeatureExtractor/CMakeLists.txt new file mode 100644 index 00000000000..7398c52c320 --- /dev/null +++ b/Tools/PIDFeatureExtractor/CMakeLists.txt @@ -0,0 +1,16 @@ +# Copyright 2019-2020 CERN and copyright holders of ALICE O2. +# See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +# All rights not expressly granted are reserved. +# +# This software is distributed under the terms of the GNU General Public +# License v3 (GPL Version 3), copied verbatim in the file "COPYING". +# +# In applying this license CERN does not waive the privileges and immunities +# granted to it by virtue of its status as an Intergovernmental Organization +# or submit itself to any jurisdiction. + +# PID feature extractor +o2physics_add_dpl_workflow(my-example-task-pid-feature-extractor + SOURCES PIDFeatureExtractor/PIDFeatureExtractor.cxx + PUBLIC_LINK_LIBRARIES O2Physics::AnalysisCore + COMPONENT_NAME AnalysisTutorial) \ No newline at end of file diff --git a/Tools/PIDFeatureExtractor/PIDFeatureExtractor.cxx b/Tools/PIDFeatureExtractor/PIDFeatureExtractor.cxx new file mode 100644 index 00000000000..3be46c145f9 --- /dev/null +++ b/Tools/PIDFeatureExtractor/PIDFeatureExtractor.cxx @@ -0,0 +1,518 @@ +#include "Framework/runDataProcessing.h" +#include "Framework/AnalysisTask.h" +#include "Common/DataModel/TrackSelectionTables.h" +#include "Framework/ASoAHelpers.h" +#include "Common/DataModel/PIDResponse.h" +#include "Common/DataModel/EventSelection.h" +#include "TFile.h" +#include "TTree.h" +#include +#include + +using namespace o2; +using namespace o2::framework; +using namespace o2::framework::expressions; + +/** + * @struct PIDFeatureExtractor + * @brief O2Physics task for extracting particle identification features from AO2D files + * + * This task processes track data from the ALICE experiment and extracts comprehensive + * PID (Particle Identification) features for machine learning applications. + * It combines TPC and TOF information to compute Bayesian probabilities and saves + * features to both ROOT TTree and CSV formats. + */ +struct PIDFeatureExtractor { + // ============================================================================ + // OUTPUT OBJECTS - File and data structures for feature storage + // ============================================================================ + + /// Output ROOT file for storing the TTree with extracted features + std::unique_ptr outputFile; + + /// TTree storing all extracted features for each track + std::unique_ptr featureTree; + + /// CSV output stream for exporting features in comma-separated format + std::ofstream csvFile; + + // ============================================================================ + // KINEMATIC VARIABLES - Track momentum and position information + // ============================================================================ + + int event_id; /// Unique identifier for each collision event + int track_id; /// Track index within the event + + // Momentum components (in GeV/c) + float px, py, pz; /// Cartesian momentum components + float pt, p; /// Transverse momentum and total momentum + + // Angular variables + float eta; /// Pseudorapidity + float phi; /// Azimuthal angle + float theta; /// Polar angle (calculated from eta) + + // Track properties + int charge; /// Track charge (+1 or -1) + int track_type; /// Type of track (e.g., 0=global, 1=TPC-only, etc.) + + // ============================================================================ + // TPC VARIABLES - Time Projection Chamber PID information + // ============================================================================ + + float tpc_signal; /// dE/dx energy loss in TPC (specific ionization) + + // n-sigma values: standard deviations from expected energy loss for each particle + float tpc_nsigma_pi; /// n-sigma for pion (π) + float tpc_nsigma_ka; /// n-sigma for kaon (K) + float tpc_nsigma_pr; /// n-sigma for proton (p) + float tpc_nsigma_el; /// n-sigma for electron (e) + + // Track quality variables + int tpc_nclusters; /// Number of TPC clusters used in track fit + float tpc_chi2; /// Chi-square per degree of freedom of TPC fit + + // ============================================================================ + // TOF VARIABLES - Time-Of-Flight PID information + // ============================================================================ + + float tof_beta; /// β = v/c (velocity over speed of light) + float tof_mass; /// Reconstructed mass from TOF measurement + + // n-sigma values for TOF detection + float tof_nsigma_pi; /// n-sigma for pion in TOF + float tof_nsigma_ka; /// n-sigma for kaon in TOF + float tof_nsigma_pr; /// n-sigma for proton in TOF + float tof_nsigma_el; /// n-sigma for electron in TOF + + // ============================================================================ + // BAYESIAN PID VARIABLES - Combined PID probabilities + // ============================================================================ + + /// Bayesian probability that track is a pion (probability sum = 1.0) + float bayes_prob_pi; + /// Bayesian probability that track is a kaon + float bayes_prob_ka; + /// Bayesian probability that track is a proton + float bayes_prob_pr; + /// Bayesian probability that track is an electron + float bayes_prob_el; + + // ============================================================================ + // MONTE CARLO TRUTH INFORMATION - For simulated data + // ============================================================================ + + int mc_pdg; /// PDG code of true particle (0 if no MC match) + float mc_px, mc_py, mc_pz; /// True momentum components from simulation + + // ============================================================================ + // DETECTOR AVAILABILITY FLAGS + // ============================================================================ + + bool has_tpc; /// Flag: track has TPC information + bool has_tof; /// Flag: track has TOF information + + // ============================================================================ + // TRACK IMPACT PARAMETERS - Quality and background rejection + // ============================================================================ + + float dca_xy; /// Distance of closest approach in xy-plane + float dca_z; /// Distance of closest approach in z-direction + + // ============================================================================ + // HISTOGRAM REGISTRY - Quality control histograms + // ============================================================================ + + /// Registry for quality control histograms + HistogramRegistry histos{"histos", {}, OutputObjHandlingPolicy::AnalysisObject}; + + // ============================================================================ + // CONFIGURABLE PARAMETERS - User-adjustable settings + // ============================================================================ + + /// Base path and filename for output files (without extension) + Configurable outputPath{"outputPath", "pid_features", "Output file base"}; + + /// Enable CSV export of features + Configurable exportCSV{"exportCSV", true, "Export CSV"}; + + /// Enable ROOT file export of features + Configurable exportROOT{"exportROOT", true, "Export ROOT"}; + + /// Minimum pseudorapidity cut for track selection + Configurable etaMin{"etaMin", -1.5f, "Minimum eta"}; + + /// Maximum pseudorapidity cut for track selection + Configurable etaMax{"etaMax", 1.5f, "Maximum eta"}; + + /// Minimum transverse momentum cut (GeV/c) + Configurable ptMin{"ptMin", 0.1f, "Minimum pT"}; + + /// Maximum transverse momentum cut (GeV/c) + Configurable ptMax{"ptMax", 20.0f, "Maximum pT"}; + + // ============================================================================ + // INITIALIZATION FUNCTION + // ============================================================================ + + /** + * @brief Initialize output files and histograms + * + * Called once at task startup. Creates ROOT TTree and CSV file headers, + * and initializes all quality control histograms. + */ + void init(InitContext const&) { + std::string base = outputPath.value; + + // ======================================================================== + // ROOT OUTPUT SETUP + // ======================================================================== + if (exportROOT) { + // Create ROOT file for storing the TTree + outputFile = std::make_unique((base + ".root").c_str(), "RECREATE"); + + // Create TTree with descriptive name and title + featureTree = std::make_unique("pid_features", "PID features"); + + // Create branches for KINEMATIC VARIABLES + featureTree->Branch("event_id", &event_id); + featureTree->Branch("track_id", &track_id); + featureTree->Branch("px", &px); + featureTree->Branch("py", &py); + featureTree->Branch("pz", &pz); + featureTree->Branch("pt", &pt); + featureTree->Branch("p", &p); + featureTree->Branch("eta", &eta); + featureTree->Branch("phi", &phi); + featureTree->Branch("theta", &theta); + featureTree->Branch("charge", &charge); + featureTree->Branch("track_type", &track_type); + + // Create branches for TPC VARIABLES + featureTree->Branch("tpc_signal", &tpc_signal); + featureTree->Branch("tpc_nsigma_pi", &tpc_nsigma_pi); + featureTree->Branch("tpc_nsigma_ka", &tpc_nsigma_ka); + featureTree->Branch("tpc_nsigma_pr", &tpc_nsigma_pr); + featureTree->Branch("tpc_nsigma_el", &tpc_nsigma_el); + featureTree->Branch("tpc_nclusters", &tpc_nclusters); + featureTree->Branch("tpc_chi2", &tpc_chi2); + + // Create branches for TOF VARIABLES + featureTree->Branch("tof_beta", &tof_beta); + featureTree->Branch("tof_mass", &tof_mass); + featureTree->Branch("tof_nsigma_pi", &tof_nsigma_pi); + featureTree->Branch("tof_nsigma_ka", &tof_nsigma_ka); + featureTree->Branch("tof_nsigma_pr", &tof_nsigma_pr); + featureTree->Branch("tof_nsigma_el", &tof_nsigma_el); + + // Create branches for BAYESIAN PID VARIABLES + featureTree->Branch("bayes_prob_pi", &bayes_prob_pi); + featureTree->Branch("bayes_prob_ka", &bayes_prob_ka); + featureTree->Branch("bayes_prob_pr", &bayes_prob_pr); + featureTree->Branch("bayes_prob_el", &bayes_prob_el); + + // Create branches for MONTE CARLO TRUTH (simulated data only) + featureTree->Branch("mc_pdg", &mc_pdg); + featureTree->Branch("mc_px", &mc_px); + featureTree->Branch("mc_py", &mc_py); + featureTree->Branch("mc_pz", &mc_pz); + + // Create branches for DETECTOR FLAGS + featureTree->Branch("has_tpc", &has_tpc); + featureTree->Branch("has_tof", &has_tof); + + // Create branches for IMPACT PARAMETERS + featureTree->Branch("dca_xy", &dca_xy); + featureTree->Branch("dca_z", &dca_z); + } + + // ======================================================================== + // CSV OUTPUT SETUP + // ======================================================================== + if (exportCSV) { + csvFile.open((base + ".csv").c_str()); + // Write CSV header with all column names + csvFile << + "event_id,track_id,px,py,pz,pt,p,eta,phi,theta,charge,track_type," + "tpc_signal,tpc_nsigma_pi,tpc_nsigma_ka,tpc_nsigma_pr,tpc_nsigma_el," + "tpc_nclusters,tpc_chi2," + "tof_beta,tof_mass,tof_nsigma_pi,tof_nsigma_ka,tof_nsigma_pr,tof_nsigma_el," + "bayes_prob_pi,bayes_prob_ka,bayes_prob_pr,bayes_prob_el," + "mc_pdg,mc_px,mc_py,mc_pz,has_tpc,has_tof,dca_xy,dca_z\n"; + } + + // ======================================================================== + // HISTOGRAM SETUP - Quality Control Plots + // ======================================================================== + + // Define histogram axes with binning + const AxisSpec axisPt{200, 0, 10, "pT"}; // 200 bins, 0-10 GeV/c + const AxisSpec axisEta{60, -1.5, 1.5, "eta"}; // 60 bins, -1.5 to 1.5 + const AxisSpec axisdEdx{300, 0, 300, "dE/dx"}; // 300 bins, 0-300 + const AxisSpec axisBeta{120, 0, 1.2, "beta"}; // 120 bins, 0 to 1.2 + const AxisSpec axisMass{100, -0.2, 2.0, "mass"}; // 100 bins, -0.2 to 2.0 GeV/c² + + // Add histograms to registry + histos.add("QC/nTracks", "Tracks", kTH1F, {{10000, 0, 100000}}); + histos.add("QC/pt", "pT", kTH1F, {axisPt}); + histos.add("QC/eta", "eta", kTH1F, {axisEta}); + histos.add("QC/tpc_dEdx_vs_pt", "dE/dx vs pT", kTH2F, {axisPt, axisdEdx}); + histos.add("QC/tof_beta_vs_p", "beta vs p", kTH2F, {axisPt, axisBeta}); + histos.add("QC/mass_vs_p", "mass vs p", kTH2F, {axisPt, axisMass}); + } + + // ============================================================================ + // BAYESIAN PID CALCULATION FUNCTION + // ============================================================================ + + /** + * @brief Compute Bayesian probabilities combining TPC and TOF information + * + * Uses Gaussian likelihood in n-sigma space and Bayesian inference to combine + * TPC dE/dx and TOF mass measurements. + * + * @param[in] nsTPC[4] n-sigma values for [pion, kaon, proton, electron] from TPC + * @param[in] nsTOF[4] n-sigma values for [pion, kaon, proton, electron] from TOF + * @param[in] pri[4] Prior probabilities for each particle hypothesis + * @param[out] out[4] Output Bayesian probabilities (normalized to sum=1) + * + * Formula: P(particle|TPC,TOF) ∝ P(TPC|particle) * P(TOF|particle) * P(particle) + * + * Likelihood: L_i = exp(-0.5 * (ns_TPC_i² + ns_TOF_i²)) + */ + void computeBayesianPID(float nsTPC[4], float nsTOF[4], float pri[4], float out[4]) { + float sum = 0; + + // Calculate likelihood for each particle species + for (int i = 0; i < 4; i++) { + // Gaussian likelihood: exp(-0.5 * chi²) + // Handle invalid TOF values (NaN) by replacing with 0 contribution + float l = std::exp(-0.5f * (nsTPC[i]*nsTPC[i] + + (std::isfinite(nsTOF[i]) ? nsTOF[i]*nsTOF[i] : 0.f))); + + // Apply prior probability and accumulate + out[i] = l * pri[i]; + sum += out[i]; + } + + // Normalize probabilities so they sum to 1.0 + for (int i = 0; i < 4; i++) { + out[i] = sum > 0 ? out[i] / sum : 0.f; + } + } + + // ============================================================================ + // MAIN PROCESSING FUNCTION + // ============================================================================ + + /** + * @brief Process collision and track data, extract PID features + * + * Called for each collision event in the input data. Applies track selections, + * extracts features from TPC and TOF detectors, computes Bayesian PID, + * and writes output to ROOT and/or CSV. + * + * @param collision Collision event data + * @param tracks Table of tracks with all associated PID information + * @param mcParticles Monte Carlo particle information (for simulated data) + */ + void process( + aod::Collision const& collision, + soa::Join< + aod::Tracks, // Base track properties + aod::TracksExtra, // Extended track info + aod::TracksDCA, // Impact parameters (DCA) + aod::pidTPCPi, aod::pidTPCKa, aod::pidTPCPr, // TPC PID for pion, kaon, proton + aod::pidTPCEl, // TPC PID for electron + aod::pidTOFPi, aod::pidTOFKa, aod::pidTOFPr, // TOF PID for pion, kaon, proton + aod::pidTOFEl, // TOF PID for electron + aod::pidTOFmass, aod::pidTOFbeta, // TOF mass and beta + aod::McTrackLabels // MC truth matching + > const& tracks, + aod::McParticles const& mcParticles) + { + // Use static counter to maintain event numbering across process calls + static int eventCounter = 0; + event_id = eventCounter++; + int idx = 0; + + // ====================================================================== + // TRACK LOOP - Process each track in the event + // ====================================================================== + for (auto& t : tracks) { + + // ==================================================================== + // TRACK SELECTION - Apply kinematic cuts + // ==================================================================== + if (t.pt() < ptMin || t.pt() > ptMax) continue; // Apply pT cut + if (t.eta() < etaMin || t.eta() > etaMax) continue; // Apply eta cut + + track_id = idx++; + + // ==================================================================== + // EXTRACT KINEMATIC VARIABLES + // ==================================================================== + px = t.px(); + py = t.py(); + pz = t.pz(); + pt = t.pt(); + p = t.p(); + eta = t.eta(); + phi = t.phi(); + // Calculate polar angle from pseudorapidity: θ = 2*arctan(exp(-η)) + theta = 2.f * atanf(expf(-eta)); + charge = t.sign(); // Track charge + track_type = t.trackType(); // Track categorization + + // ==================================================================== + // EXTRACT TPC INFORMATION + // ==================================================================== + has_tpc = t.hasTPC(); + if (has_tpc) { + // TPC has valid measurement + tpc_signal = t.tpcSignal(); // dE/dx specific ionization + tpc_nsigma_pi = t.tpcNSigmaPi(); // Deviation from pion hypothesis + tpc_nsigma_ka = t.tpcNSigmaKa(); // Deviation from kaon hypothesis + tpc_nsigma_pr = t.tpcNSigmaPr(); // Deviation from proton hypothesis + tpc_nsigma_el = t.tpcNSigmaEl(); // Deviation from electron hypothesis + tpc_nclusters = t.tpcNClsFound(); // Quality: number of clusters + tpc_chi2 = t.tpcChi2NCl(); // Quality: fit chi-square + } else { + // TPC has no valid measurement - set sentinel values + tpc_signal = tpc_nsigma_pi = tpc_nsigma_ka = tpc_nsigma_pr = tpc_nsigma_el = -999; + tpc_nclusters = 0; + tpc_chi2 = -999; + } + + // ==================================================================== + // EXTRACT TOF INFORMATION + // ==================================================================== + has_tof = t.hasTOF(); + if (has_tof) { + // TOF has valid measurement + tof_beta = t.beta(); // Velocity over c + tof_mass = t.mass(); // Reconstructed mass + tof_nsigma_pi = t.tofNSigmaPi(); // Deviation from pion hypothesis + tof_nsigma_ka = t.tofNSigmaKa(); // Deviation from kaon hypothesis + tof_nsigma_pr = t.tofNSigmaPr(); // Deviation from proton hypothesis + tof_nsigma_el = t.tofNSigmaEl(); // Deviation from electron hypothesis + } else { + // TOF has no valid measurement - set sentinel values + tof_beta = tof_mass = -999; + tof_nsigma_pi = tof_nsigma_ka = tof_nsigma_pr = tof_nsigma_el = -999; + } + + // ==================================================================== + // EXTRACT IMPACT PARAMETERS (track quality) + // ==================================================================== + dca_xy = t.dcaXY(); // Distance of closest approach in transverse plane + dca_z = t.dcaZ(); // Distance of closest approach along beam axis + + // ==================================================================== + // COMPUTE BAYESIAN PID + // ==================================================================== + float arrTPC[4] = {tpc_nsigma_pi, tpc_nsigma_ka, tpc_nsigma_pr, tpc_nsigma_el}; + float arrTOF[4] = {tof_nsigma_pi, tof_nsigma_ka, tof_nsigma_pr, tof_nsigma_el}; + float priors[4] = {1.f, 0.2f, 0.1f, 0.05f}; // Prior prob: π, K, p, e + float probs[4]; + + // Compute combined PID probabilities + computeBayesianPID(arrTPC, arrTOF, priors, probs); + bayes_prob_pi = probs[0]; + bayes_prob_ka = probs[1]; + bayes_prob_pr = probs[2]; + bayes_prob_el = probs[3]; + + // ==================================================================== + // EXTRACT MONTE CARLO TRUTH (if available) + // ==================================================================== + // Safely access MC particle information with existence check + if (t.has_mcParticle()) { + auto mc = t.mcParticle(); + mc_pdg = mc.pdgCode(); // Particle identifier code + mc_px = mc.px(); // True momentum components + mc_py = mc.py(); + mc_pz = mc.pz(); + } else { + // No MC match - set sentinel values + mc_pdg = 0; + mc_px = mc_py = mc_pz = 0; + } + + // ==================================================================== + // WRITE OUTPUT + // ==================================================================== + + // Write to ROOT TTree + if (exportROOT) featureTree->Fill(); + + // Write to CSV file + if (exportCSV) { + csvFile << event_id << "," << track_id << "," + << px << "," << py << "," << pz << "," + << pt << "," << p << "," + << eta << "," << phi << "," << theta << "," + << charge << "," << track_type << "," + << tpc_signal << "," << tpc_nsigma_pi << "," << tpc_nsigma_ka << "," << tpc_nsigma_pr << "," << tpc_nsigma_el << "," + << tpc_nclusters << "," << tpc_chi2 << "," + << tof_beta << "," << tof_mass << "," << tof_nsigma_pi << "," << tof_nsigma_ka << "," << tof_nsigma_pr << "," << tof_nsigma_el << "," + << bayes_prob_pi << "," << bayes_prob_ka << "," << bayes_prob_pr << "," << bayes_prob_el << "," + << mc_pdg << "," << mc_px << "," << mc_py << "," << mc_pz << "," + << has_tpc << "," << has_tof << "," + << dca_xy << "," << dca_z << "\n"; + } + + // ==================================================================== + // FILL QUALITY CONTROL HISTOGRAMS + // ==================================================================== + histos.fill(HIST("QC/nTracks"), 1); // Count total tracks processed + histos.fill(HIST("QC/pt"), pt); // pT distribution + histos.fill(HIST("QC/eta"), eta); // eta distribution + + // TPC dE/dx vs pT (only if TPC measurement exists) + if (has_tpc) histos.fill(HIST("QC/tpc_dEdx_vs_pt"), pt, tpc_signal); + + // TOF beta and mass vs momentum (only if TOF measurement exists) + if (has_tof) { + histos.fill(HIST("QC/tof_beta_vs_p"), p, tof_beta); + histos.fill(HIST("QC/mass_vs_p"), p, tof_mass); + } + } + } + + // ============================================================================ + // FINALIZATION FUNCTION + // ============================================================================ + + /** + * @brief Clean up and finalize output files + * + * Called at task completion. Writes TTree to file and closes all output files. + */ + void finalize() { + if (exportROOT) { + // Write TTree to ROOT file and close + outputFile->cd(); + featureTree->Write(); + outputFile->Close(); + } + if (exportCSV) { + // Close CSV file + csvFile.close(); + } + } +}; + +// ============================================================================ +// WORKFLOW DEFINITION +// ============================================================================ + +/** + * @brief Define the O2Physics workflow + * + * This function creates and registers the PIDFeatureExtractor task + * into the O2 data processing workflow. + */ +WorkflowSpec defineDataProcessing(ConfigContext const& cfgc) { + return WorkflowSpec{adaptAnalysisTask(cfgc)}; +} \ No newline at end of file diff --git a/Tools/PIDFeatureExtractor/README.md b/Tools/PIDFeatureExtractor/README.md new file mode 100644 index 00000000000..a37b50db08b --- /dev/null +++ b/Tools/PIDFeatureExtractor/README.md @@ -0,0 +1,853 @@ +# ALICE O2Physics PID Feature Extractor + +A comprehensive C++ task for the O2Physics framework that extracts particle identification (PID) features from ALICE AO2D files. This tool processes track data and generates high-quality features suitable for machine learning-based particle identification. + +## Overview + +The **PIDFeatureExtractor** combines information from multiple ALICE detectors (TPC and TOF) to create a rich feature set for distinguishing between different particle types (pions, kaons, protons, and electrons). The extracted features are saved in both ROOT TTree and CSV formats for easy access and analysis. + +### Key Features + +- **Multi-Detector Integration**: Combines TPC (dE/dx) and TOF (time-of-flight) PID information +- **CCDB Integration**: Automatically fetches unavailable features from the ALICE Conditions Database (CCDB) +- **Bayesian Probability Computation**: Calculates combined PID probabilities using Gaussian likelihood in n-sigma space +- **Flexible Output**: Exports to both ROOT TTree and CSV formats simultaneously +- **Quality Control**: Includes QC histograms for track kinematics and detector response +- **MC Truth Matching**: Includes Monte Carlo truth information for simulated data +- **Configurable Selection**: User-adjustable kinematic cuts (pT, η range) via JSON configuration +- **Track Quality Metrics**: Stores DCA and TPC fit quality information + +## Requirements + +### O2Physics Environment + +- **O2Physics framework**: Latest version with PID response tables and CCDB access +- **ROOT**: Version 6.x or later +- **CMake**: 3.x or later +- **C++ Standard**: C++17 or later +- **CCDB Access**: Network connection to ALICE CCDB for fetching PID calibrations +- **bash**: For running the `run.sh` execution script + +### Required Data Tables + +The task expects the following input tables from AO2D files. Some tables may be fetched from CCDB if not present in the file: + +| Table | Source | Purpose | Fallback | +|-------|--------|---------|----------| +| `aod::Tracks` | AO2D | Base track properties (momentum, angles) | Required | +| `aod::TracksExtra` | AO2D | Extended track information | Required | +| `aod::TracksDCA` | AO2D | Impact parameters (DCA) | Required | +| `aod::pidTPCPi/Ka/Pr/El` | AO2D/CCDB | TPC n-sigma values for each particle species | CCDB | +| `aod::pidTOFPi/Ka/Pr/El` | AO2D/CCDB | TOF n-sigma values for each particle species | CCDB | +| `aod::pidTOFmass` | AO2D/CCDB | TOF reconstructed mass | CCDB | +| `aod::pidTOFbeta` | AO2D/CCDB | TOF beta (v/c) measurement | CCDB | +| `aod::McTrackLabels` | AO2D | MC truth matching (optional, for simulated data) | Optional | +| `aod::McParticles` | AO2D | MC particle information | Optional | + +**Note:** If PID tables are not available in the AO2D file, the framework automatically retrieves PID calibrations from CCDB using the collision timestamp to access the correct calibration period. + +## Extracted Features + +### Kinematic Variables (11 features) + +| Variable | Type | Range | Unit | Description | +|----------|------|-------|------|-------------| +| `event_id` | int | - | - | Unique collision event identifier | +| `track_id` | int | - | - | Track index within event | +| `px`, `py`, `pz` | float | - | GeV/c | Cartesian momentum components | +| `pt` | float | 0.1-20 | GeV/c | Transverse momentum | +| `p` | float | - | GeV/c | Total momentum | +| `eta` | float | -1.5 to 1.5 | - | Pseudorapidity | +| `phi` | float | -π to π | rad | Azimuthal angle | +| `theta` | float | 0 to π | rad | Polar angle | +| `charge` | int | ±1 | - | Track charge | +| `track_type` | int | 0-2 | - | Track classification | + +### TPC Detector Features (7 features) + +| Variable | Type | Range | Unit | Description | Source | +|----------|------|-------|------|-------------|--------| +| `tpc_signal` | float | 0-300 | - | Specific ionization (dE/dx) | AO2D | +| `tpc_nsigma_pi` | float | - | σ | n-sigma deviation from pion | AO2D/CCDB | +| `tpc_nsigma_ka` | float | - | σ | n-sigma deviation from kaon | AO2D/CCDB | +| `tpc_nsigma_pr` | float | - | σ | n-sigma deviation from proton | AO2D/CCDB | +| `tpc_nsigma_el` | float | - | σ | n-sigma deviation from electron | AO2D/CCDB | +| `tpc_nclusters` | int | 0-160 | - | Number of TPC clusters | AO2D | +| `tpc_chi2` | float | - | - | TPC track fit chi-square/ndf | AO2D | + +**TPC Features Source:** n-sigma values are computed from `tpc_signal` and PID calibrations (from AO2D or CCDB). If not in AO2D, calibration data is fetched from CCDB using the collision timestamp. + +### TOF Detector Features (6 features) + +| Variable | Type | Range | Unit | Description | Source | +|----------|------|-------|------|-------------|--------| +| `tof_beta` | float | 0-1.2 | - | Velocity over speed of light | AO2D/CCDB | +| `tof_mass` | float | -0.2-2.0 | GeV/c² | Reconstructed mass | AO2D/CCDB | +| `tof_nsigma_pi` | float | - | σ | n-sigma deviation from pion | AO2D/CCDB | +| `tof_nsigma_ka` | float | - | σ | n-sigma deviation from kaon | AO2D/CCDB | +| `tof_nsigma_pr` | float | - | σ | n-sigma deviation from proton | AO2D/CCDB | +| `tof_nsigma_el` | float | - | σ | n-sigma deviation from electron | AO2D/CCDB | + +**TOF Features Source:** If not available in AO2D file, the framework fetches calibration and response parameters from CCDB. Beta and mass can be recomputed from raw TOF information and length measurement using CCDB calibrations. + +### Bayesian PID Features (4 features) + +| Variable | Type | Range | Unit | Description | +|----------|------|-------|------|-------------| +| `bayes_prob_pi` | float | 0-1 | - | Probability of being pion | +| `bayes_prob_ka` | float | 0-1 | - | Probability of being kaon | +| `bayes_prob_pr` | float | 0-1 | - | Probability of being proton | +| `bayes_prob_el` | float | 0-1 | - | Probability of being electron | + +**Note**: Bayesian probabilities sum to 1.0 and are computed using Gaussian likelihoods in n-sigma space (from either AO2D or CCDB-derived values) with configurable priors. + +### Track Quality Features (2 features) + +| Variable | Type | Unit | Description | +|----------|------|------|-------------| +| `dca_xy` | float | cm | Distance of closest approach in xy-plane | +| `dca_z` | float | cm | Distance of closest approach along beam | + +### Detector Availability Flags (2 features) + +| Variable | Type | Description | +|----------|------|-------------| +| `has_tpc` | bool | Track has valid TPC information | +| `has_tof` | bool | Track has valid TOF information | + +### Monte Carlo Truth (4 features, simulated data only) + +| Variable | Type | Description | +|----------|------|-------------| +| `mc_pdg` | int | PDG code of true particle | +| `mc_px`, `mc_py`, `mc_pz` | float | True momentum components | + +**Total: 39 features per track** + +## Installation + +### 1. Clone the Repository + +```bash +cd ~/O2Physics # or your O2Physics installation directory +git clone pid-extractor +cd pid-extractor +``` + +### 2. Verify Directory Structure + +Ensure your repository has the following structure: + +``` +pid-extractor/ +├── CMakeLists.txt +├── PIDFeatureExtractor.cxx +├── myConfigExtractor.json +├── run.sh +└── README.md +``` + +### 3. Set Executable Permissions + +Make the `run.sh` script executable: + +```bash +chmod +x run.sh +``` + +## Configuration + +### Configuration File: `myConfigExtractor.json` + +All task parameters are configured through the **`myConfigExtractor.json`** file located in the task directory. This JSON file specifies all runtime options for the PID feature extractor. + +#### Configuration File Format + +```json +{ + "output_path": "pid_features", + "export_csv": true, + "export_root": true, + "eta_min": -1.5, + "eta_max": 1.5, + "pt_min": 0.1, + "pt_max": 20.0, + "ccdb_url": "http://alice-ccdb.cern.ch" +} +``` + +#### Configuration Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `output_path` | string | `pid_features` | Base path for output files (without extension) | +| `export_csv` | boolean | `true` | Enable CSV export of features | +| `export_root` | boolean | `true` | Enable ROOT file export of features | +| `eta_min` | float | `-1.5` | Minimum pseudorapidity cut for track selection | +| `eta_max` | float | `1.5` | Maximum pseudorapidity cut for track selection | +| `pt_min` | float | `0.1` | Minimum transverse momentum cut (GeV/c) | +| `pt_max` | float | `20.0` | Maximum transverse momentum cut (GeV/c) | +| `ccdb_url` | string | `http://alice-ccdb.cern.ch` | CCDB server URL for fetching PID calibrations | + +#### Example Configurations + +**Example 1: High-pT kaon extraction** + +```json +{ + "output_path": "kaons_highpt", + "export_csv": true, + "export_root": true, + "eta_min": -0.8, + "eta_max": 0.8, + "pt_min": 3.0, + "pt_max": 20.0, + "ccdb_url": "http://alice-ccdb.cern.ch" +} +``` + +**Example 2: ROOT-only output for large datasets** + +```json +{ + "output_path": "batch_output", + "export_csv": false, + "export_root": true, + "eta_min": -1.5, + "eta_max": 1.5, + "pt_min": 0.1, + "pt_max": 20.0, + "ccdb_url": "http://alice-ccdb.cern.ch" +} +``` + +**Example 3: Using alternative CCDB** + +```json +{ + "output_path": "ccdb_test", + "export_csv": true, + "export_root": true, + "eta_min": -1.5, + "eta_max": 1.5, + "pt_min": 0.1, + "pt_max": 20.0, + "ccdb_url": "http://alice-ccdb-test.cern.ch" +} +``` + +## Usage + +### Quick Start + +The task runs within the O2Physics framework using the provided **`run.sh`** execution script. All configuration is read from **`myConfigExtractor.json`**. + +#### Basic Execution + +```bash +./run.sh +``` + +This command: +1. Reads configuration from `myConfigExtractor.json` +2. Initializes the O2Physics environment +3. Launches the PID feature extractor +4. Processes AO2D data according to configured parameters +5. Generates ROOT and/or CSV output files + +#### With AO2D File Input + +```bash +./run.sh --aod-file AO2D.root +``` + +#### With Multiple Input Files + +```bash +./run.sh --aod-file file1.root file2.root file3.root +``` + +#### Troubleshooting Execution + +If you encounter permission errors: + +```bash +bash ./run.sh +``` + +### Running in O2Physics Environment + +The `run.sh` script should be executed within an active O2Physics environment. To ensure proper setup: + +```bash +# Source O2Physics environment (if not already sourced) +source ~/O2Physics/setup.sh + +# Make script executable +chmod +x run.sh + +# Run the task +./run.sh +``` + +### Modifying Configuration + +#### Method 1: Edit Configuration File + +Modify `myConfigExtractor.json` before running: + +```bash +# Edit the configuration file +nano myConfigExtractor.json + +# Run with new configuration +./run.sh +``` + +#### Method 2: Environment Variables + +You can override configuration parameters via environment variables (if supported by `run.sh`): + +```bash +export OUTPUT_PATH="custom_output" +export PT_MIN=2.0 +export PT_MAX=10.0 +./run.sh +``` + +### Advanced Usage Examples + +**Example 1: Process test data with verbose output** + +```bash +# Edit myConfigExtractor.json for test parameters +./run.sh --aod-file test_data.root --verbose +``` + +**Example 2: High-pT analysis** + +1. Update `myConfigExtractor.json`: +```json +{ + "output_path": "high_pt_pions", + "pt_min": 5.0, + "pt_max": 20.0, + "export_csv": true, + "export_root": true +} +``` + +2. Run the script: +```bash +./run.sh --aod-file physics_data.root +``` + +**Example 3: Batch processing multiple files** + +```bash +# Configure for batch output +cat > myConfigExtractor.json << EOF +{ + "output_path": "batch_results", + "export_csv": false, + "export_root": true, + "eta_min": -0.9, + "eta_max": 0.9, + "pt_min": 0.5, + "pt_max": 10.0 +} +EOF + +# Run on multiple files +./run.sh --aod-file data_run1.root data_run2.root data_run3.root +``` + +## Data Flow and CCDB Integration + +### Processing Pipeline + +``` +AO2D File + ↓ +Load Configuration from myConfigExtractor.json + ↓ +Initialize O2Physics Workflow + ↓ +Load Tracks + Available PID Tables + ↓ +Missing Features? → Query CCDB (using timestamp) + ↓ +Compute Bayesian PID (using AO2D or CCDB n-sigma values) + ↓ +Apply Track Selection (eta, pT cuts from config) + ↓ +Fill QC Histograms + ↓ +Write to ROOT TTree + CSV (based on config) +``` + +### CCDB Timestamp-Based Access + +The task uses the collision timestamp from each event to query CCDB for the correct calibration period: + +1. **Event Timestamp**: Read from collision data +2. **CCDB Query**: Fetch PID calibrations valid for that timestamp +3. **Feature Computation**: Use CCDB calibrations if AO2D values unavailable +4. **Caching**: Calibrations are cached per run to minimize CCDB queries + +### Calibration Objects Retrieved from CCDB + +If not in AO2D, the task fetches: + +- **TPC PID Response**: Bethe-Bloch curve parameters and n-sigma calculation +- **TOF PID Response**: TOF expected times and mass resolution +- **Track Propagation**: Path length and TOF time calibrations +- **Detector Resolution**: TPC and TOF resolution parameters for n-sigma computation + +## Output Files + +### ROOT Output (`.root`) + +Contains a TTree named `pid_features` with one entry per track. The tree includes all 39 features as separate branches. All feature values are either from AO2D or computed using CCDB calibrations. + +**Access in ROOT:** + +```cpp +TFile file("pid_features.root"); +TTree* tree = (TTree*)file.Get("pid_features"); + +// Plot TPC dE/dx vs pT (TPC signal always from AO2D) +tree->Draw("tpc_signal:pt", "has_tpc==1", "scatter"); + +// Select pions with high confidence (probabilities computed from CCDB if needed) +tree->Draw("eta", "bayes_prob_pi > 0.9", "hist"); +``` + +### CSV Output (`.csv`) + +Plain text comma-separated values format with header row. All 39 features are included. Features are either from AO2D or computed using CCDB calibrations. The filename is determined by the `output_path` parameter in `myConfigExtractor.json`. + +**Example CSV structure:** + +```csv +event_id,track_id,px,py,pz,pt,p,eta,phi,theta,charge,track_type,... +0,0,1.234,-0.567,2.345,1.456,2.678,-0.123,1.456,2.345,1,0,... +0,1,0.987,0.654,1.234,1.123,1.567,0.456,2.123,0.987,-1,0,... +``` + +### Quality Control Histograms + +The task also produces QC histograms in the ROOT file: + +- `QC/nTracks`: Total number of processed tracks +- `QC/pt`: pT distribution +- `QC/eta`: Pseudorapidity distribution +- `QC/tpc_dEdx_vs_pt`: TPC dE/dx vs pT (2D) +- `QC/tof_beta_vs_p`: TOF beta vs momentum (2D) +- `QC/mass_vs_p`: TOF mass vs momentum (2D) + +## Machine Learning Integration + +### Data Loading in Python + +```python +import pandas as pd +import numpy as np +from sklearn.preprocessing import StandardScaler +import xgboost as xgb + +# Load CSV file (features are from AO2D or CCDB-derived) +# Output filename based on output_path from myConfigExtractor.json +df = pd.read_csv("pid_features.csv") + +# Filter out invalid data (missing detector info) +df_valid = df[(df['has_tpc'] == True) & (df['has_tof'] == True)] + +# Prepare features (exclude MC truth for real data) +feature_cols = [col for col in df.columns + if col not in ['event_id', 'track_id', 'mc_pdg', 'mc_px', 'mc_py', 'mc_pz']] + +X = df_valid[feature_cols].values +y = df_valid['mc_pdg'].values # For simulated data + +# Standardize features +scaler = StandardScaler() +X_scaled = scaler.fit_transform(X) + +# Train model +model = xgb.XGBClassifier(n_estimators=100, max_depth=6) +model.fit(X_scaled, y) +``` + +### Loading ROOT File in Python + +```python +import uproot +import pandas as pd + +# Open ROOT file (output filename based on output_path from config) +file = uproot.open("pid_features.root") +tree = file["pid_features"] + +# Convert to pandas DataFrame +df = tree.arrays(library="pd") + +# Access specific branches +bayes_probs = df[['bayes_prob_pi', 'bayes_prob_ka', 'bayes_prob_pr', 'bayes_prob_el']] +``` + +## Algorithm Details + +### Bayesian PID Calculation + +The task computes Bayesian probabilities combining TPC and TOF information. n-sigma values are either from AO2D or computed using CCDB calibrations: + +**Likelihood Calculation:** + +For each particle hypothesis (π, K, p, e): + +``` +L_i = exp(-0.5 * (ns_TPC_i² + ns_TOF_i²)) +``` + +Where `ns_TPC_i` and `ns_TOF_i` are n-sigma deviations from expected values (from AO2D or CCDB). + +**Prior Probabilities:** + +Default priors (configurable in source code): +- Pions: 1.0 +- Kaons: 0.2 +- Protons: 0.1 +- Electrons: 0.05 + +**Final Probabilities:** + +``` +P(i|TPC,TOF) = (L_i * Prior_i) / Σ(L_j * Prior_j) +``` + +This ensures the four probabilities sum to exactly 1.0. + +### Kinematic Calculations + +- **Transverse momentum**: \(p_T = \sqrt{p_x^2 + p_y^2}\) +- **Total momentum**: \(p = \sqrt{p_x^2 + p_y^2 + p_z^2}\) +- **Pseudorapidity**: \(\eta = -\ln(\tan(\theta/2))\) +- **Polar angle**: \(\theta = 2 \arctan(e^{-\eta})\) + +### Invalid Data Handling + +- Missing TPC: All TPC variables set to `-999` +- Missing TOF: All TOF variables set to `-999` +- Missing CCDB connection: Task logs warning and uses available AO2D values +- No MC match: `mc_pdg` set to `0`, momentum components set to `0` +- Invalid TOF n-sigma (NaN): Treated as 0 contribution in Bayesian calculation + +## Troubleshooting + +### Issue: `./run.sh: command not found` or permission denied + +**Causes:** +- Script is not executable +- Running from wrong directory +- Shell incompatibility + +**Solution:** + +```bash +# Make executable +chmod +x run.sh + +# Run explicitly with bash +bash ./run.sh + +# Or run from correct directory +cd ~/pid-extractor +./run.sh +``` + +### Issue: Configuration file not found + +**Error Message:** `myConfigExtractor.json not found` + +**Causes:** +- File doesn't exist in current directory +- Running from wrong location + +**Solution:** + +```bash +# Verify file exists +ls -la myConfigExtractor.json + +# Ensure you're in the correct directory +cd ~/pid-extractor +./run.sh +``` + +### Issue: CCDB Connection Error + +**Error Message:** `Cannot connect to CCDB at ` + +**Causes:** +- Network connectivity issues +- CCDB server down +- Incorrect CCDB URL in configuration + +**Solution:** + +1. Verify CCDB connectivity: +```bash +curl http://alice-ccdb.cern.ch/ +``` + +2. Update `myConfigExtractor.json` with alternative CCDB: +```json +{ + "ccdb_url": "http://alice-ccdb-test.cern.ch" +} +``` + +3. Run again: +```bash +./run.sh +``` + +### Issue: Invalid JSON configuration + +**Error Message:** `JSON parse error` or similar + +**Causes:** +- Syntax error in `myConfigExtractor.json` +- Missing quotes or commas +- Invalid data types + +**Solution:** + +1. Validate JSON syntax: +```bash +python3 -m json.tool myConfigExtractor.json +``` + +2. Fix any reported errors and retry: +```bash +./run.sh +``` + +### Issue: Features appear as -999 or invalid + +**Cause:** PID tables not in AO2D and CCDB fetch failed + +**Solution:** +- Check network/CCDB access +- Verify collision timestamp is within valid range +- Ensure AO2D file contains basic track information + +### Issue: No tracks extracted + +**Causes:** +- Track selection cuts too restrictive (check `eta_min`, `eta_max`, `pt_min`, `pt_max` in config) +- Input data doesn't contain required track tables + +**Solution:** + +Edit `myConfigExtractor.json` to relax selection criteria: + +```json +{ + "eta_min": -1.5, + "eta_max": 1.5, + "pt_min": 0.1, + "pt_max": 20.0 +} +``` + +Then run: +```bash +./run.sh +``` + +### Issue: Many -999 values in TPC/TOF output + +**Cause:** Tracks lack TPC or TOF information (detector not active for those tracks) + +**Solution:** +- Use track selection filters to require both detectors: +```python +df_valid = df[(df['has_tpc'] == True) & (df['has_tof'] == True)] +``` + +### Issue: CSV file is very large + +**Cause:** Processing too many tracks or both output formats enabled + +**Solutions:** + +Modify `myConfigExtractor.json`: + +```json +{ + "export_csv": false, + "export_root": true +} +``` + +Or apply stricter cuts: + +```json +{ + "pt_min": 2.0, + "pt_max": 10.0, + "eta_min": -0.8, + "eta_max": 0.8 +} +``` + +Then run: +```bash +./run.sh +``` + +### Issue: ROOT TTree has no entries + +**Cause:** Track selection filtered out all tracks + +**Solution:** +- Verify input data quality +- Reduce selection criteria in `myConfigExtractor.json` +- Check that tracks pass kinematic cuts +- Verify CCDB calibrations loaded successfully + +## Performance Notes + +- **Processing Speed**: ~10,000 tracks/second (depends on system and CCDB latency) +- **Memory Usage**: ~500 MB for typical dataset +- **File Size**: ~50-100 bytes per track in CSV format +- **CCDB Overhead**: Minimal after initial calibration load per run (~1-2 seconds) + +### Optimisation Tips + +1. **Use ROOT format for large datasets** (set `export_csv: false` in config) +2. **Process in batches** for better memory management +3. **Apply kinematic cuts** in `myConfigExtractor.json` to reduce output size +4. **Disable MC truth** for real data to save space +5. **Reuse CCDB connections** when processing multiple files from same run + +## Contributing + +### Adding New Features + +To add a new feature: + +1. Declare member variable in the struct +2. Create TTree branch in `init()` +3. Add CSV header column +4. Fill variable in `process()` +5. Update documentation + +Example: + +```cpp +// 1. Member variable +float my_new_feature; + +// 2. In init() +featureTree->Branch("my_new_feature", &my_new_feature); + +// 3. In process() +my_new_feature = t.someNewMethod(); + +// 4. Update CSV +csvFile << my_new_feature << ","; +``` + +### Modifying Configuration Options + +To add a new configurable parameter: + +1. Add parameter to `myConfigExtractor.json` +2. Create `Configurable` variable in task struct +3. Use parameter value in processing logic +4. Document in this README + +### Modifying Bayesian Priors + +Edit the priors array in the `process()` function of the task: + +```cpp +float priors[4] = {1.f, 0.2f, 0.1f, 0.05f}; // π, K, p, e +``` + +### Fetching Additional CCDB Objects + +If you need additional calibrations: + +1. Query CCDB for the object path +2. Load in the `init()` function using ccdbApi +3. Cache calibration data for the processing period +4. Use in `process()` function for feature computation + +## Repository Structure + +``` +pid-extractor/ +├── README.md # This file - complete documentation +├── PIDFeatureExtractor.cxx # Main task implementation +├── myConfigExtractor.json # Configuration file (edit this for options) +├── run.sh # Execution script (run with: ./run.sh) +├── CMakeLists.txt # Build configuration +└── LICENSE # License file +``` + +## References + +- [ALICE O2Physics Framework](https://github.com/AliceO2Group/O2Physics) +- [ALICE Conditions Database (CCDB)](https://alice-ccdb.cern.ch/) +- [ALICE Detector Performance](https://arxiv.org/abs/1910.14400) +- [PID with Machine Learning in ALICE](https://arxiv.org/abs/2204.13255) +- [O2Physics CCDB Integration Guide](https://github.com/AliceO2Group/O2Physics/wiki/CCDB) + +## Author + +High-Energy Physics Collaboration - ALICE Experiment + +## License + +MIT License (or appropriate license for your institution) + +## Support + +For issues, questions, or suggestions: +- Open an issue on GitHub +- Contact the ALICE physics working group +- Check existing documentation +- Consult CCDB documentation for calibration issues + +## Quick Reference + +### To run the task: +```bash +./run.sh +``` + +### To modify parameters: +1. Edit `myConfigExtractor.json` +2. Run `./run.sh` + +### To see what configuration is active: +```bash +cat myConfigExtractor.json +``` + +### To verify output: +```bash +ls -lh pid_features.root pid_features.csv +``` + +--- + +**Last Updated:** 2025-11-13 +**Task Version:** 1.0.0 +**O2Physics Compatibility:** Latest +**CCDB Support:** Integrated +**Run Method:** `./run.sh` with `myConfigExtractor.json` diff --git a/Tools/PIDFeatureExtractor/myConfigExtractor.json b/Tools/PIDFeatureExtractor/myConfigExtractor.json new file mode 100644 index 00000000000..33489776a5e --- /dev/null +++ b/Tools/PIDFeatureExtractor/myConfigExtractor.json @@ -0,0 +1,125 @@ +{ + "internal-dpl-clock": "", + + "internal-dpl-aod-reader": { + "time-limit": 0, + "aod-file-private": "AO2D.root", + "orbit-offset-enumeration": 0, + "orbit-multiplier-enumeration": 0, + "start-value-enumeration": 0, + "end-value-enumeration": -1, + "step-value-enumeration": 1 + }, + + "tracks-extra-v002-converter": { + "processV000ToV002": 0, + "processV001ToV002": 1 + }, + + "timestamp-task": { + "verbose": 0, + "rct-path": "RCT/Info/RunInformation", + "orbit-reset-path": "CTP/Calib/OrbitReset", + "ccdb-url": "http://alice-ccdb.cern.ch", + "isRun2MC": 0 + }, + + "bc-selection-task": { + "processRun2": 0, + "processRun3": 1 + }, + + "event-selection-task": { + "syst": "pp", + "muonSelection": 0, + "customDeltaBC": 0, + "isMC": 1, + "processRun2": 0, + "processRun3": 1 + }, + + "track-propagation": { + "ccdb-url": "http://alice-ccdb.cern.ch", + "grp-path": "GLO/GRP/GRP", + "grp-mag-path": "GLO/Config/GRPMagField", + "mVtxPath": "GLO/Calib/MeanVertex", + "geo-path": "GLO/Config/GeometryAligned", + "useMatLUT": 0, + "processStandard": 1, + "processCovariance": 0, + "processCovarianceMc": 0, + "minPropagationDistance": 83.1 + }, + + "pid-tpc-base": { + "ccdb-url": "http://alice-ccdb.cern.ch", + "parametrization-path": "TPC/Calib/Response", + "parametrization-el-path": "TPC/Calib/ResponseElectron", + "resoPath": "TPC/Calib/PIDResponse", + "ccdb-timestamp": 0, + "useNetworkCorrection": 0, + "autofetch-network": 1, + "enableNetworkOptimization": 1, + "networkPathLocally": "", + "networkPathCCDB": "Analysis/PID/TPC", + "onnxFile": "network.onnx", + "enableNetworkInference": 0 + }, + + "pid-tpc": { + "param-file": "", + "param-sigma": "TPC.PIDResponse.sigma:", + "ccdb-url": "http://alice-ccdb.cern.ch", + "ccdbPath": "TPC/Calib/PIDResponse" + }, + + "pid-tof-base": { + "ccdb-url": "http://alice-ccdb.cern.ch", + "parametrizationPath": "TOF/Calib/Response", + "passName": "", + "timeShiftCCDBPath": "", + "fatalOnPassNotAvailable": 1 + }, + + "pid-tof": { + "param-file": "", + "param-sigma": "TOF.PIDResponse.sigma:", + "ccdb-url": "http://alice-ccdb.cern.ch", + "ccdbPath": "TOF/Calib/Response", + "passName": "", + "timeShiftCCDBPath": "", + "parametrizationPath": "TOF/Calib/Response", + "fatalOnPassNotAvailable": 1 + }, + + "pid-tof-beta": { + "ccdb-url": "http://alice-ccdb.cern.ch" + }, + + "multiplicity-table": { + "doVertexZeq": 1, + "fractionOfEvents": 2, + "processRun2": 0, + "processRun3": 1 + }, + + "tracks-extra-converter": { + "processRun2": 0, + "processRun3": 1 + }, + + "mccollision-converter": { + "processRun2": 0, + "processRun3": 1 + }, + + "PIDFeatureExtractor": { + "outputPath": "pid_features", + "exportCSV": 1, + "exportROOT": 1, + "etaMin": -1.5, + "etaMax": 1.5, + "ptMin": 0.1, + "ptMax": 20.0 + } +} diff --git a/Tools/PIDFeatureExtractor/run.sh b/Tools/PIDFeatureExtractor/run.sh new file mode 100755 index 00000000000..7acdc837f42 --- /dev/null +++ b/Tools/PIDFeatureExtractor/run.sh @@ -0,0 +1,27 @@ +#!/bin/bash + +OPTION="-b --configuration json://myConfigExtractor.json" + +# Specify the executable binary directly (adjust the path as needed) +EXECUTABLE=~/alice/sw/BUILD/O2Physics-latest/O2Physics/stage/bin/o2-analysistutorial-mm-my-example-task-pid-feature-extractor +CONFIG_PATH="$(pwd)/myConfigExtractor.json" +OPTION="-b --configuration json://$CONFIG_PATH" + + +echo "Starting O2Physics PID Feature Extraction Workflow..." +echo "Using configuration: myConfigExtractor.json" + +o2-analysis-timestamp ${OPTION} | \ +o2-analysis-event-selection ${OPTION} | \ +o2-analysis-tracks-extra-v002-converter ${OPTION} | \ +o2-analysis-track-propagation ${OPTION} | \ +o2-analysis-pid-tpc-base ${OPTION} | \ +o2-analysis-pid-tpc ${OPTION} | \ +o2-analysis-pid-tof-base ${OPTION} | \ +o2-analysis-pid-tof ${OPTION} | \ +o2-analysis-pid-tof-beta ${OPTION} | \ +o2-analysis-multiplicity-table ${OPTION} | \ +o2-analysis-mccollision-converter ${OPTION} | \ +${EXECUTABLE} ${OPTION} + +echo "Check output files (e.g. pid_features.csv, pid_features.root)" From 43319f3a61e691cc893b763e5ddc3798cf6b3a59 Mon Sep 17 00:00:00 2001 From: Robert Forynski Date: Thu, 13 Nov 2025 19:48:51 +0000 Subject: [PATCH 02/15] style: Remove trailing spaces and add ALICE O2 Collaboration copyright header --- .../PIDFeatureExtractor.cxx | 99 ++++++++++--------- 1 file changed, 55 insertions(+), 44 deletions(-) diff --git a/Tools/PIDFeatureExtractor/PIDFeatureExtractor.cxx b/Tools/PIDFeatureExtractor/PIDFeatureExtractor.cxx index 3be46c145f9..59c5bf81779 100644 --- a/Tools/PIDFeatureExtractor/PIDFeatureExtractor.cxx +++ b/Tools/PIDFeatureExtractor/PIDFeatureExtractor.cxx @@ -1,3 +1,14 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + #include "Framework/runDataProcessing.h" #include "Framework/AnalysisTask.h" #include "Common/DataModel/TrackSelectionTables.h" @@ -26,32 +37,32 @@ struct PIDFeatureExtractor { // ============================================================================ // OUTPUT OBJECTS - File and data structures for feature storage // ============================================================================ - + /// Output ROOT file for storing the TTree with extracted features std::unique_ptr outputFile; - + /// TTree storing all extracted features for each track std::unique_ptr featureTree; - + /// CSV output stream for exporting features in comma-separated format std::ofstream csvFile; // ============================================================================ // KINEMATIC VARIABLES - Track momentum and position information // ============================================================================ - + int event_id; /// Unique identifier for each collision event int track_id; /// Track index within the event - + // Momentum components (in GeV/c) float px, py, pz; /// Cartesian momentum components float pt, p; /// Transverse momentum and total momentum - + // Angular variables float eta; /// Pseudorapidity float phi; /// Azimuthal angle float theta; /// Polar angle (calculated from eta) - + // Track properties int charge; /// Track charge (+1 or -1) int track_type; /// Type of track (e.g., 0=global, 1=TPC-only, etc.) @@ -59,15 +70,15 @@ struct PIDFeatureExtractor { // ============================================================================ // TPC VARIABLES - Time Projection Chamber PID information // ============================================================================ - + float tpc_signal; /// dE/dx energy loss in TPC (specific ionization) - + // n-sigma values: standard deviations from expected energy loss for each particle float tpc_nsigma_pi; /// n-sigma for pion (π) float tpc_nsigma_ka; /// n-sigma for kaon (K) float tpc_nsigma_pr; /// n-sigma for proton (p) float tpc_nsigma_el; /// n-sigma for electron (e) - + // Track quality variables int tpc_nclusters; /// Number of TPC clusters used in track fit float tpc_chi2; /// Chi-square per degree of freedom of TPC fit @@ -75,10 +86,10 @@ struct PIDFeatureExtractor { // ============================================================================ // TOF VARIABLES - Time-Of-Flight PID information // ============================================================================ - + float tof_beta; /// β = v/c (velocity over speed of light) float tof_mass; /// Reconstructed mass from TOF measurement - + // n-sigma values for TOF detection float tof_nsigma_pi; /// n-sigma for pion in TOF float tof_nsigma_ka; /// n-sigma for kaon in TOF @@ -88,7 +99,7 @@ struct PIDFeatureExtractor { // ============================================================================ // BAYESIAN PID VARIABLES - Combined PID probabilities // ============================================================================ - + /// Bayesian probability that track is a pion (probability sum = 1.0) float bayes_prob_pi; /// Bayesian probability that track is a kaon @@ -101,60 +112,60 @@ struct PIDFeatureExtractor { // ============================================================================ // MONTE CARLO TRUTH INFORMATION - For simulated data // ============================================================================ - + int mc_pdg; /// PDG code of true particle (0 if no MC match) float mc_px, mc_py, mc_pz; /// True momentum components from simulation // ============================================================================ // DETECTOR AVAILABILITY FLAGS // ============================================================================ - + bool has_tpc; /// Flag: track has TPC information bool has_tof; /// Flag: track has TOF information // ============================================================================ // TRACK IMPACT PARAMETERS - Quality and background rejection // ============================================================================ - + float dca_xy; /// Distance of closest approach in xy-plane float dca_z; /// Distance of closest approach in z-direction // ============================================================================ // HISTOGRAM REGISTRY - Quality control histograms // ============================================================================ - + /// Registry for quality control histograms HistogramRegistry histos{"histos", {}, OutputObjHandlingPolicy::AnalysisObject}; // ============================================================================ // CONFIGURABLE PARAMETERS - User-adjustable settings // ============================================================================ - + /// Base path and filename for output files (without extension) Configurable outputPath{"outputPath", "pid_features", "Output file base"}; - + /// Enable CSV export of features Configurable exportCSV{"exportCSV", true, "Export CSV"}; - + /// Enable ROOT file export of features Configurable exportROOT{"exportROOT", true, "Export ROOT"}; - + /// Minimum pseudorapidity cut for track selection Configurable etaMin{"etaMin", -1.5f, "Minimum eta"}; - + /// Maximum pseudorapidity cut for track selection Configurable etaMax{"etaMax", 1.5f, "Maximum eta"}; - + /// Minimum transverse momentum cut (GeV/c) Configurable ptMin{"ptMin", 0.1f, "Minimum pT"}; - + /// Maximum transverse momentum cut (GeV/c) Configurable ptMax{"ptMax", 20.0f, "Maximum pT"}; // ============================================================================ // INITIALIZATION FUNCTION // ============================================================================ - + /** * @brief Initialize output files and histograms * @@ -163,14 +174,14 @@ struct PIDFeatureExtractor { */ void init(InitContext const&) { std::string base = outputPath.value; - + // ======================================================================== // ROOT OUTPUT SETUP // ======================================================================== if (exportROOT) { // Create ROOT file for storing the TTree outputFile = std::make_unique((base + ".root").c_str(), "RECREATE"); - + // Create TTree with descriptive name and title featureTree = std::make_unique("pid_features", "PID features"); @@ -220,7 +231,7 @@ struct PIDFeatureExtractor { // Create branches for DETECTOR FLAGS featureTree->Branch("has_tpc", &has_tpc); featureTree->Branch("has_tof", &has_tof); - + // Create branches for IMPACT PARAMETERS featureTree->Branch("dca_xy", &dca_xy); featureTree->Branch("dca_z", &dca_z); @@ -244,7 +255,7 @@ struct PIDFeatureExtractor { // ======================================================================== // HISTOGRAM SETUP - Quality Control Plots // ======================================================================== - + // Define histogram axes with binning const AxisSpec axisPt{200, 0, 10, "pT"}; // 200 bins, 0-10 GeV/c const AxisSpec axisEta{60, -1.5, 1.5, "eta"}; // 60 bins, -1.5 to 1.5 @@ -264,7 +275,7 @@ struct PIDFeatureExtractor { // ============================================================================ // BAYESIAN PID CALCULATION FUNCTION // ============================================================================ - + /** * @brief Compute Bayesian probabilities combining TPC and TOF information * @@ -282,19 +293,19 @@ struct PIDFeatureExtractor { */ void computeBayesianPID(float nsTPC[4], float nsTOF[4], float pri[4], float out[4]) { float sum = 0; - + // Calculate likelihood for each particle species for (int i = 0; i < 4; i++) { // Gaussian likelihood: exp(-0.5 * chi²) // Handle invalid TOF values (NaN) by replacing with 0 contribution - float l = std::exp(-0.5f * (nsTPC[i]*nsTPC[i] + + float l = std::exp(-0.5f * (nsTPC[i]*nsTPC[i] + (std::isfinite(nsTOF[i]) ? nsTOF[i]*nsTOF[i] : 0.f))); - + // Apply prior probability and accumulate out[i] = l * pri[i]; sum += out[i]; } - + // Normalize probabilities so they sum to 1.0 for (int i = 0; i < 4; i++) { out[i] = sum > 0 ? out[i] / sum : 0.f; @@ -304,7 +315,7 @@ struct PIDFeatureExtractor { // ============================================================================ // MAIN PROCESSING FUNCTION // ============================================================================ - + /** * @brief Process collision and track data, extract PID features * @@ -340,15 +351,15 @@ struct PIDFeatureExtractor { // TRACK LOOP - Process each track in the event // ====================================================================== for (auto& t : tracks) { - + // ==================================================================== // TRACK SELECTION - Apply kinematic cuts // ==================================================================== if (t.pt() < ptMin || t.pt() > ptMax) continue; // Apply pT cut if (t.eta() < etaMin || t.eta() > etaMax) continue; // Apply eta cut - + track_id = idx++; - + // ==================================================================== // EXTRACT KINEMATIC VARIABLES // ==================================================================== @@ -415,7 +426,7 @@ struct PIDFeatureExtractor { float arrTOF[4] = {tof_nsigma_pi, tof_nsigma_ka, tof_nsigma_pr, tof_nsigma_el}; float priors[4] = {1.f, 0.2f, 0.1f, 0.05f}; // Prior prob: π, K, p, e float probs[4]; - + // Compute combined PID probabilities computeBayesianPID(arrTPC, arrTOF, priors, probs); bayes_prob_pi = probs[0]; @@ -442,10 +453,10 @@ struct PIDFeatureExtractor { // ==================================================================== // WRITE OUTPUT // ==================================================================== - + // Write to ROOT TTree if (exportROOT) featureTree->Fill(); - + // Write to CSV file if (exportCSV) { csvFile << event_id << "," << track_id << "," @@ -468,10 +479,10 @@ struct PIDFeatureExtractor { histos.fill(HIST("QC/nTracks"), 1); // Count total tracks processed histos.fill(HIST("QC/pt"), pt); // pT distribution histos.fill(HIST("QC/eta"), eta); // eta distribution - + // TPC dE/dx vs pT (only if TPC measurement exists) if (has_tpc) histos.fill(HIST("QC/tpc_dEdx_vs_pt"), pt, tpc_signal); - + // TOF beta and mass vs momentum (only if TOF measurement exists) if (has_tof) { histos.fill(HIST("QC/tof_beta_vs_p"), p, tof_beta); @@ -483,7 +494,7 @@ struct PIDFeatureExtractor { // ============================================================================ // FINALIZATION FUNCTION // ============================================================================ - + /** * @brief Clean up and finalize output files * From a2930c3be5cefa32082a25c7362f40fb5a1fb7d2 Mon Sep 17 00:00:00 2001 From: ALICE Action Bot Date: Thu, 13 Nov 2025 19:49:35 +0000 Subject: [PATCH 03/15] Please consider the following formatting changes --- .../PIDFeatureExtractor.cxx | 189 +++++++-------- .../myConfigExtractor.json | 216 +++++++++--------- 2 files changed, 208 insertions(+), 197 deletions(-) diff --git a/Tools/PIDFeatureExtractor/PIDFeatureExtractor.cxx b/Tools/PIDFeatureExtractor/PIDFeatureExtractor.cxx index 59c5bf81779..c06b91e096f 100644 --- a/Tools/PIDFeatureExtractor/PIDFeatureExtractor.cxx +++ b/Tools/PIDFeatureExtractor/PIDFeatureExtractor.cxx @@ -9,16 +9,19 @@ // granted to it by virtue of its status as an Intergovernmental Organization // or submit itself to any jurisdiction. -#include "Framework/runDataProcessing.h" -#include "Framework/AnalysisTask.h" +#include "Common/DataModel/EventSelection.h" +#include "Common/DataModel/PIDResponse.h" #include "Common/DataModel/TrackSelectionTables.h" + #include "Framework/ASoAHelpers.h" -#include "Common/DataModel/PIDResponse.h" -#include "Common/DataModel/EventSelection.h" +#include "Framework/AnalysisTask.h" +#include "Framework/runDataProcessing.h" + #include "TFile.h" #include "TTree.h" -#include + #include +#include using namespace o2; using namespace o2::framework; @@ -51,50 +54,50 @@ struct PIDFeatureExtractor { // KINEMATIC VARIABLES - Track momentum and position information // ============================================================================ - int event_id; /// Unique identifier for each collision event - int track_id; /// Track index within the event + int event_id; /// Unique identifier for each collision event + int track_id; /// Track index within the event // Momentum components (in GeV/c) - float px, py, pz; /// Cartesian momentum components - float pt, p; /// Transverse momentum and total momentum + float px, py, pz; /// Cartesian momentum components + float pt, p; /// Transverse momentum and total momentum // Angular variables - float eta; /// Pseudorapidity - float phi; /// Azimuthal angle - float theta; /// Polar angle (calculated from eta) + float eta; /// Pseudorapidity + float phi; /// Azimuthal angle + float theta; /// Polar angle (calculated from eta) // Track properties - int charge; /// Track charge (+1 or -1) - int track_type; /// Type of track (e.g., 0=global, 1=TPC-only, etc.) + int charge; /// Track charge (+1 or -1) + int track_type; /// Type of track (e.g., 0=global, 1=TPC-only, etc.) // ============================================================================ // TPC VARIABLES - Time Projection Chamber PID information // ============================================================================ - float tpc_signal; /// dE/dx energy loss in TPC (specific ionization) + float tpc_signal; /// dE/dx energy loss in TPC (specific ionization) // n-sigma values: standard deviations from expected energy loss for each particle - float tpc_nsigma_pi; /// n-sigma for pion (π) - float tpc_nsigma_ka; /// n-sigma for kaon (K) - float tpc_nsigma_pr; /// n-sigma for proton (p) - float tpc_nsigma_el; /// n-sigma for electron (e) + float tpc_nsigma_pi; /// n-sigma for pion (π) + float tpc_nsigma_ka; /// n-sigma for kaon (K) + float tpc_nsigma_pr; /// n-sigma for proton (p) + float tpc_nsigma_el; /// n-sigma for electron (e) // Track quality variables - int tpc_nclusters; /// Number of TPC clusters used in track fit - float tpc_chi2; /// Chi-square per degree of freedom of TPC fit + int tpc_nclusters; /// Number of TPC clusters used in track fit + float tpc_chi2; /// Chi-square per degree of freedom of TPC fit // ============================================================================ // TOF VARIABLES - Time-Of-Flight PID information // ============================================================================ - float tof_beta; /// β = v/c (velocity over speed of light) - float tof_mass; /// Reconstructed mass from TOF measurement + float tof_beta; /// β = v/c (velocity over speed of light) + float tof_mass; /// Reconstructed mass from TOF measurement // n-sigma values for TOF detection - float tof_nsigma_pi; /// n-sigma for pion in TOF - float tof_nsigma_ka; /// n-sigma for kaon in TOF - float tof_nsigma_pr; /// n-sigma for proton in TOF - float tof_nsigma_el; /// n-sigma for electron in TOF + float tof_nsigma_pi; /// n-sigma for pion in TOF + float tof_nsigma_ka; /// n-sigma for kaon in TOF + float tof_nsigma_pr; /// n-sigma for proton in TOF + float tof_nsigma_el; /// n-sigma for electron in TOF // ============================================================================ // BAYESIAN PID VARIABLES - Combined PID probabilities @@ -113,22 +116,22 @@ struct PIDFeatureExtractor { // MONTE CARLO TRUTH INFORMATION - For simulated data // ============================================================================ - int mc_pdg; /// PDG code of true particle (0 if no MC match) - float mc_px, mc_py, mc_pz; /// True momentum components from simulation + int mc_pdg; /// PDG code of true particle (0 if no MC match) + float mc_px, mc_py, mc_pz; /// True momentum components from simulation // ============================================================================ // DETECTOR AVAILABILITY FLAGS // ============================================================================ - bool has_tpc; /// Flag: track has TPC information - bool has_tof; /// Flag: track has TOF information + bool has_tpc; /// Flag: track has TPC information + bool has_tof; /// Flag: track has TOF information // ============================================================================ // TRACK IMPACT PARAMETERS - Quality and background rejection // ============================================================================ - float dca_xy; /// Distance of closest approach in xy-plane - float dca_z; /// Distance of closest approach in z-direction + float dca_xy; /// Distance of closest approach in xy-plane + float dca_z; /// Distance of closest approach in z-direction // ============================================================================ // HISTOGRAM REGISTRY - Quality control histograms @@ -172,7 +175,8 @@ struct PIDFeatureExtractor { * Called once at task startup. Creates ROOT TTree and CSV file headers, * and initializes all quality control histograms. */ - void init(InitContext const&) { + void init(InitContext const&) + { std::string base = outputPath.value; // ======================================================================== @@ -243,13 +247,12 @@ struct PIDFeatureExtractor { if (exportCSV) { csvFile.open((base + ".csv").c_str()); // Write CSV header with all column names - csvFile << - "event_id,track_id,px,py,pz,pt,p,eta,phi,theta,charge,track_type," - "tpc_signal,tpc_nsigma_pi,tpc_nsigma_ka,tpc_nsigma_pr,tpc_nsigma_el," - "tpc_nclusters,tpc_chi2," - "tof_beta,tof_mass,tof_nsigma_pi,tof_nsigma_ka,tof_nsigma_pr,tof_nsigma_el," - "bayes_prob_pi,bayes_prob_ka,bayes_prob_pr,bayes_prob_el," - "mc_pdg,mc_px,mc_py,mc_pz,has_tpc,has_tof,dca_xy,dca_z\n"; + csvFile << "event_id,track_id,px,py,pz,pt,p,eta,phi,theta,charge,track_type," + "tpc_signal,tpc_nsigma_pi,tpc_nsigma_ka,tpc_nsigma_pr,tpc_nsigma_el," + "tpc_nclusters,tpc_chi2," + "tof_beta,tof_mass,tof_nsigma_pi,tof_nsigma_ka,tof_nsigma_pr,tof_nsigma_el," + "bayes_prob_pi,bayes_prob_ka,bayes_prob_pr,bayes_prob_el," + "mc_pdg,mc_px,mc_py,mc_pz,has_tpc,has_tof,dca_xy,dca_z\n"; } // ======================================================================== @@ -257,11 +260,11 @@ struct PIDFeatureExtractor { // ======================================================================== // Define histogram axes with binning - const AxisSpec axisPt{200, 0, 10, "pT"}; // 200 bins, 0-10 GeV/c - const AxisSpec axisEta{60, -1.5, 1.5, "eta"}; // 60 bins, -1.5 to 1.5 - const AxisSpec axisdEdx{300, 0, 300, "dE/dx"}; // 300 bins, 0-300 - const AxisSpec axisBeta{120, 0, 1.2, "beta"}; // 120 bins, 0 to 1.2 - const AxisSpec axisMass{100, -0.2, 2.0, "mass"}; // 100 bins, -0.2 to 2.0 GeV/c² + const AxisSpec axisPt{200, 0, 10, "pT"}; // 200 bins, 0-10 GeV/c + const AxisSpec axisEta{60, -1.5, 1.5, "eta"}; // 60 bins, -1.5 to 1.5 + const AxisSpec axisdEdx{300, 0, 300, "dE/dx"}; // 300 bins, 0-300 + const AxisSpec axisBeta{120, 0, 1.2, "beta"}; // 120 bins, 0 to 1.2 + const AxisSpec axisMass{100, -0.2, 2.0, "mass"}; // 100 bins, -0.2 to 2.0 GeV/c² // Add histograms to registry histos.add("QC/nTracks", "Tracks", kTH1F, {{10000, 0, 100000}}); @@ -291,15 +294,16 @@ struct PIDFeatureExtractor { * * Likelihood: L_i = exp(-0.5 * (ns_TPC_i² + ns_TOF_i²)) */ - void computeBayesianPID(float nsTPC[4], float nsTOF[4], float pri[4], float out[4]) { + void computeBayesianPID(float nsTPC[4], float nsTOF[4], float pri[4], float out[4]) + { float sum = 0; // Calculate likelihood for each particle species for (int i = 0; i < 4; i++) { // Gaussian likelihood: exp(-0.5 * chi²) // Handle invalid TOF values (NaN) by replacing with 0 contribution - float l = std::exp(-0.5f * (nsTPC[i]*nsTPC[i] + - (std::isfinite(nsTOF[i]) ? nsTOF[i]*nsTOF[i] : 0.f))); + float l = std::exp(-0.5f * (nsTPC[i] * nsTPC[i] + + (std::isfinite(nsTOF[i]) ? nsTOF[i] * nsTOF[i] : 0.f))); // Apply prior probability and accumulate out[i] = l * pri[i]; @@ -330,16 +334,16 @@ struct PIDFeatureExtractor { void process( aod::Collision const& collision, soa::Join< - aod::Tracks, // Base track properties - aod::TracksExtra, // Extended track info - aod::TracksDCA, // Impact parameters (DCA) - aod::pidTPCPi, aod::pidTPCKa, aod::pidTPCPr, // TPC PID for pion, kaon, proton - aod::pidTPCEl, // TPC PID for electron - aod::pidTOFPi, aod::pidTOFKa, aod::pidTOFPr, // TOF PID for pion, kaon, proton - aod::pidTOFEl, // TOF PID for electron - aod::pidTOFmass, aod::pidTOFbeta, // TOF mass and beta - aod::McTrackLabels // MC truth matching - > const& tracks, + aod::Tracks, // Base track properties + aod::TracksExtra, // Extended track info + aod::TracksDCA, // Impact parameters (DCA) + aod::pidTPCPi, aod::pidTPCKa, aod::pidTPCPr, // TPC PID for pion, kaon, proton + aod::pidTPCEl, // TPC PID for electron + aod::pidTOFPi, aod::pidTOFKa, aod::pidTOFPr, // TOF PID for pion, kaon, proton + aod::pidTOFEl, // TOF PID for electron + aod::pidTOFmass, aod::pidTOFbeta, // TOF mass and beta + aod::McTrackLabels // MC truth matching + > const& tracks, aod::McParticles const& mcParticles) { // Use static counter to maintain event numbering across process calls @@ -355,8 +359,10 @@ struct PIDFeatureExtractor { // ==================================================================== // TRACK SELECTION - Apply kinematic cuts // ==================================================================== - if (t.pt() < ptMin || t.pt() > ptMax) continue; // Apply pT cut - if (t.eta() < etaMin || t.eta() > etaMax) continue; // Apply eta cut + if (t.pt() < ptMin || t.pt() > ptMax) + continue; // Apply pT cut + if (t.eta() < etaMin || t.eta() > etaMax) + continue; // Apply eta cut track_id = idx++; @@ -372,8 +378,8 @@ struct PIDFeatureExtractor { phi = t.phi(); // Calculate polar angle from pseudorapidity: θ = 2*arctan(exp(-η)) theta = 2.f * atanf(expf(-eta)); - charge = t.sign(); // Track charge - track_type = t.trackType(); // Track categorization + charge = t.sign(); // Track charge + track_type = t.trackType(); // Track categorization // ==================================================================== // EXTRACT TPC INFORMATION @@ -381,13 +387,13 @@ struct PIDFeatureExtractor { has_tpc = t.hasTPC(); if (has_tpc) { // TPC has valid measurement - tpc_signal = t.tpcSignal(); // dE/dx specific ionization - tpc_nsigma_pi = t.tpcNSigmaPi(); // Deviation from pion hypothesis - tpc_nsigma_ka = t.tpcNSigmaKa(); // Deviation from kaon hypothesis - tpc_nsigma_pr = t.tpcNSigmaPr(); // Deviation from proton hypothesis - tpc_nsigma_el = t.tpcNSigmaEl(); // Deviation from electron hypothesis - tpc_nclusters = t.tpcNClsFound(); // Quality: number of clusters - tpc_chi2 = t.tpcChi2NCl(); // Quality: fit chi-square + tpc_signal = t.tpcSignal(); // dE/dx specific ionization + tpc_nsigma_pi = t.tpcNSigmaPi(); // Deviation from pion hypothesis + tpc_nsigma_ka = t.tpcNSigmaKa(); // Deviation from kaon hypothesis + tpc_nsigma_pr = t.tpcNSigmaPr(); // Deviation from proton hypothesis + tpc_nsigma_el = t.tpcNSigmaEl(); // Deviation from electron hypothesis + tpc_nclusters = t.tpcNClsFound(); // Quality: number of clusters + tpc_chi2 = t.tpcChi2NCl(); // Quality: fit chi-square } else { // TPC has no valid measurement - set sentinel values tpc_signal = tpc_nsigma_pi = tpc_nsigma_ka = tpc_nsigma_pr = tpc_nsigma_el = -999; @@ -401,12 +407,12 @@ struct PIDFeatureExtractor { has_tof = t.hasTOF(); if (has_tof) { // TOF has valid measurement - tof_beta = t.beta(); // Velocity over c - tof_mass = t.mass(); // Reconstructed mass - tof_nsigma_pi = t.tofNSigmaPi(); // Deviation from pion hypothesis - tof_nsigma_ka = t.tofNSigmaKa(); // Deviation from kaon hypothesis - tof_nsigma_pr = t.tofNSigmaPr(); // Deviation from proton hypothesis - tof_nsigma_el = t.tofNSigmaEl(); // Deviation from electron hypothesis + tof_beta = t.beta(); // Velocity over c + tof_mass = t.mass(); // Reconstructed mass + tof_nsigma_pi = t.tofNSigmaPi(); // Deviation from pion hypothesis + tof_nsigma_ka = t.tofNSigmaKa(); // Deviation from kaon hypothesis + tof_nsigma_pr = t.tofNSigmaPr(); // Deviation from proton hypothesis + tof_nsigma_el = t.tofNSigmaEl(); // Deviation from electron hypothesis } else { // TOF has no valid measurement - set sentinel values tof_beta = tof_mass = -999; @@ -416,15 +422,15 @@ struct PIDFeatureExtractor { // ==================================================================== // EXTRACT IMPACT PARAMETERS (track quality) // ==================================================================== - dca_xy = t.dcaXY(); // Distance of closest approach in transverse plane - dca_z = t.dcaZ(); // Distance of closest approach along beam axis + dca_xy = t.dcaXY(); // Distance of closest approach in transverse plane + dca_z = t.dcaZ(); // Distance of closest approach along beam axis // ==================================================================== // COMPUTE BAYESIAN PID // ==================================================================== float arrTPC[4] = {tpc_nsigma_pi, tpc_nsigma_ka, tpc_nsigma_pr, tpc_nsigma_el}; float arrTOF[4] = {tof_nsigma_pi, tof_nsigma_ka, tof_nsigma_pr, tof_nsigma_el}; - float priors[4] = {1.f, 0.2f, 0.1f, 0.05f}; // Prior prob: π, K, p, e + float priors[4] = {1.f, 0.2f, 0.1f, 0.05f}; // Prior prob: π, K, p, e float probs[4]; // Compute combined PID probabilities @@ -440,8 +446,8 @@ struct PIDFeatureExtractor { // Safely access MC particle information with existence check if (t.has_mcParticle()) { auto mc = t.mcParticle(); - mc_pdg = mc.pdgCode(); // Particle identifier code - mc_px = mc.px(); // True momentum components + mc_pdg = mc.pdgCode(); // Particle identifier code + mc_px = mc.px(); // True momentum components mc_py = mc.py(); mc_pz = mc.pz(); } else { @@ -455,7 +461,8 @@ struct PIDFeatureExtractor { // ==================================================================== // Write to ROOT TTree - if (exportROOT) featureTree->Fill(); + if (exportROOT) + featureTree->Fill(); // Write to CSV file if (exportCSV) { @@ -476,12 +483,13 @@ struct PIDFeatureExtractor { // ==================================================================== // FILL QUALITY CONTROL HISTOGRAMS // ==================================================================== - histos.fill(HIST("QC/nTracks"), 1); // Count total tracks processed - histos.fill(HIST("QC/pt"), pt); // pT distribution - histos.fill(HIST("QC/eta"), eta); // eta distribution + histos.fill(HIST("QC/nTracks"), 1); // Count total tracks processed + histos.fill(HIST("QC/pt"), pt); // pT distribution + histos.fill(HIST("QC/eta"), eta); // eta distribution // TPC dE/dx vs pT (only if TPC measurement exists) - if (has_tpc) histos.fill(HIST("QC/tpc_dEdx_vs_pt"), pt, tpc_signal); + if (has_tpc) + histos.fill(HIST("QC/tpc_dEdx_vs_pt"), pt, tpc_signal); // TOF beta and mass vs momentum (only if TOF measurement exists) if (has_tof) { @@ -500,7 +508,8 @@ struct PIDFeatureExtractor { * * Called at task completion. Writes TTree to file and closes all output files. */ - void finalize() { + void finalize() + { if (exportROOT) { // Write TTree to ROOT file and close outputFile->cd(); @@ -524,6 +533,8 @@ struct PIDFeatureExtractor { * This function creates and registers the PIDFeatureExtractor task * into the O2 data processing workflow. */ -WorkflowSpec defineDataProcessing(ConfigContext const& cfgc) { +WorkflowSpec defineDataProcessing(ConfigContext const& cfgc) +{ return WorkflowSpec{adaptAnalysisTask(cfgc)}; -} \ No newline at end of file +} + \ No newline at end of file diff --git a/Tools/PIDFeatureExtractor/myConfigExtractor.json b/Tools/PIDFeatureExtractor/myConfigExtractor.json index 33489776a5e..986372410fa 100644 --- a/Tools/PIDFeatureExtractor/myConfigExtractor.json +++ b/Tools/PIDFeatureExtractor/myConfigExtractor.json @@ -1,125 +1,125 @@ { - "internal-dpl-clock": "", + "internal-dpl-clock": "", - "internal-dpl-aod-reader": { - "time-limit": 0, - "aod-file-private": "AO2D.root", - "orbit-offset-enumeration": 0, - "orbit-multiplier-enumeration": 0, - "start-value-enumeration": 0, - "end-value-enumeration": -1, - "step-value-enumeration": 1 - }, + "internal-dpl-aod-reader": { + "time-limit": 0, + "aod-file-private": "AO2D.root", + "orbit-offset-enumeration": 0, + "orbit-multiplier-enumeration": 0, + "start-value-enumeration": 0, + "end-value-enumeration": -1, + "step-value-enumeration": 1 + }, - "tracks-extra-v002-converter": { - "processV000ToV002": 0, - "processV001ToV002": 1 - }, + "tracks-extra-v002-converter": { + "processV000ToV002": 0, + "processV001ToV002": 1 + }, - "timestamp-task": { - "verbose": 0, - "rct-path": "RCT/Info/RunInformation", - "orbit-reset-path": "CTP/Calib/OrbitReset", - "ccdb-url": "http://alice-ccdb.cern.ch", - "isRun2MC": 0 - }, + "timestamp-task": { + "verbose": 0, + "rct-path": "RCT/Info/RunInformation", + "orbit-reset-path": "CTP/Calib/OrbitReset", + "ccdb-url": "http://alice-ccdb.cern.ch", + "isRun2MC": 0 + }, - "bc-selection-task": { - "processRun2": 0, - "processRun3": 1 - }, + "bc-selection-task": { + "processRun2": 0, + "processRun3": 1 + }, - "event-selection-task": { - "syst": "pp", - "muonSelection": 0, - "customDeltaBC": 0, - "isMC": 1, - "processRun2": 0, - "processRun3": 1 - }, + "event-selection-task": { + "syst": "pp", + "muonSelection": 0, + "customDeltaBC": 0, + "isMC": 1, + "processRun2": 0, + "processRun3": 1 + }, - "track-propagation": { - "ccdb-url": "http://alice-ccdb.cern.ch", - "grp-path": "GLO/GRP/GRP", - "grp-mag-path": "GLO/Config/GRPMagField", - "mVtxPath": "GLO/Calib/MeanVertex", - "geo-path": "GLO/Config/GeometryAligned", - "useMatLUT": 0, - "processStandard": 1, - "processCovariance": 0, - "processCovarianceMc": 0, - "minPropagationDistance": 83.1 - }, + "track-propagation": { + "ccdb-url": "http://alice-ccdb.cern.ch", + "grp-path": "GLO/GRP/GRP", + "grp-mag-path": "GLO/Config/GRPMagField", + "mVtxPath": "GLO/Calib/MeanVertex", + "geo-path": "GLO/Config/GeometryAligned", + "useMatLUT": 0, + "processStandard": 1, + "processCovariance": 0, + "processCovarianceMc": 0, + "minPropagationDistance": 83.1 + }, - "pid-tpc-base": { - "ccdb-url": "http://alice-ccdb.cern.ch", - "parametrization-path": "TPC/Calib/Response", - "parametrization-el-path": "TPC/Calib/ResponseElectron", - "resoPath": "TPC/Calib/PIDResponse", - "ccdb-timestamp": 0, - "useNetworkCorrection": 0, - "autofetch-network": 1, - "enableNetworkOptimization": 1, - "networkPathLocally": "", - "networkPathCCDB": "Analysis/PID/TPC", - "onnxFile": "network.onnx", - "enableNetworkInference": 0 - }, + "pid-tpc-base": { + "ccdb-url": "http://alice-ccdb.cern.ch", + "parametrization-path": "TPC/Calib/Response", + "parametrization-el-path": "TPC/Calib/ResponseElectron", + "resoPath": "TPC/Calib/PIDResponse", + "ccdb-timestamp": 0, + "useNetworkCorrection": 0, + "autofetch-network": 1, + "enableNetworkOptimization": 1, + "networkPathLocally": "", + "networkPathCCDB": "Analysis/PID/TPC", + "onnxFile": "network.onnx", + "enableNetworkInference": 0 + }, - "pid-tpc": { - "param-file": "", - "param-sigma": "TPC.PIDResponse.sigma:", - "ccdb-url": "http://alice-ccdb.cern.ch", - "ccdbPath": "TPC/Calib/PIDResponse" - }, + "pid-tpc": { + "param-file": "", + "param-sigma": "TPC.PIDResponse.sigma:", + "ccdb-url": "http://alice-ccdb.cern.ch", + "ccdbPath": "TPC/Calib/PIDResponse" + }, - "pid-tof-base": { - "ccdb-url": "http://alice-ccdb.cern.ch", - "parametrizationPath": "TOF/Calib/Response", - "passName": "", - "timeShiftCCDBPath": "", - "fatalOnPassNotAvailable": 1 - }, + "pid-tof-base": { + "ccdb-url": "http://alice-ccdb.cern.ch", + "parametrizationPath": "TOF/Calib/Response", + "passName": "", + "timeShiftCCDBPath": "", + "fatalOnPassNotAvailable": 1 + }, - "pid-tof": { - "param-file": "", - "param-sigma": "TOF.PIDResponse.sigma:", - "ccdb-url": "http://alice-ccdb.cern.ch", - "ccdbPath": "TOF/Calib/Response", - "passName": "", - "timeShiftCCDBPath": "", - "parametrizationPath": "TOF/Calib/Response", - "fatalOnPassNotAvailable": 1 - }, + "pid-tof": { + "param-file": "", + "param-sigma": "TOF.PIDResponse.sigma:", + "ccdb-url": "http://alice-ccdb.cern.ch", + "ccdbPath": "TOF/Calib/Response", + "passName": "", + "timeShiftCCDBPath": "", + "parametrizationPath": "TOF/Calib/Response", + "fatalOnPassNotAvailable": 1 + }, - "pid-tof-beta": { - "ccdb-url": "http://alice-ccdb.cern.ch" - }, + "pid-tof-beta": { + "ccdb-url": "http://alice-ccdb.cern.ch" + }, - "multiplicity-table": { - "doVertexZeq": 1, - "fractionOfEvents": 2, - "processRun2": 0, - "processRun3": 1 - }, + "multiplicity-table": { + "doVertexZeq": 1, + "fractionOfEvents": 2, + "processRun2": 0, + "processRun3": 1 + }, - "tracks-extra-converter": { - "processRun2": 0, - "processRun3": 1 - }, + "tracks-extra-converter": { + "processRun2": 0, + "processRun3": 1 + }, - "mccollision-converter": { - "processRun2": 0, - "processRun3": 1 - }, + "mccollision-converter": { + "processRun2": 0, + "processRun3": 1 + }, - "PIDFeatureExtractor": { - "outputPath": "pid_features", - "exportCSV": 1, - "exportROOT": 1, - "etaMin": -1.5, - "etaMax": 1.5, - "ptMin": 0.1, - "ptMax": 20.0 - } + "PIDFeatureExtractor": { + "outputPath": "pid_features", + "exportCSV": 1, + "exportROOT": 1, + "etaMin": -1.5, + "etaMax": 1.5, + "ptMin": 0.1, + "ptMax": 20.0 + } } From 87fb1c3b469dca7fb29e4537f5faa0a3f808d756 Mon Sep 17 00:00:00 2001 From: Robert Forynski Date: Thu, 13 Nov 2025 20:10:33 +0000 Subject: [PATCH 04/15] style: Add ALICE O2 copyright header and remove trailing spaces from PIDFeatureExtractor.cxx --- .../PIDFeatureExtractor.cxx | 73 +++++-------------- 1 file changed, 18 insertions(+), 55 deletions(-) diff --git a/Tools/PIDFeatureExtractor/PIDFeatureExtractor.cxx b/Tools/PIDFeatureExtractor/PIDFeatureExtractor.cxx index c06b91e096f..6039233d74c 100644 --- a/Tools/PIDFeatureExtractor/PIDFeatureExtractor.cxx +++ b/Tools/PIDFeatureExtractor/PIDFeatureExtractor.cxx @@ -40,7 +40,6 @@ struct PIDFeatureExtractor { // ============================================================================ // OUTPUT OBJECTS - File and data structures for feature storage // ============================================================================ - /// Output ROOT file for storing the TTree with extracted features std::unique_ptr outputFile; @@ -73,7 +72,6 @@ struct PIDFeatureExtractor { // ============================================================================ // TPC VARIABLES - Time Projection Chamber PID information // ============================================================================ - float tpc_signal; /// dE/dx energy loss in TPC (specific ionization) // n-sigma values: standard deviations from expected energy loss for each particle @@ -102,7 +100,6 @@ struct PIDFeatureExtractor { // ============================================================================ // BAYESIAN PID VARIABLES - Combined PID probabilities // ============================================================================ - /// Bayesian probability that track is a pion (probability sum = 1.0) float bayes_prob_pi; /// Bayesian probability that track is a kaon @@ -136,14 +133,12 @@ struct PIDFeatureExtractor { // ============================================================================ // HISTOGRAM REGISTRY - Quality control histograms // ============================================================================ - /// Registry for quality control histograms HistogramRegistry histos{"histos", {}, OutputObjHandlingPolicy::AnalysisObject}; // ============================================================================ // CONFIGURABLE PARAMETERS - User-adjustable settings // ============================================================================ - /// Base path and filename for output files (without extension) Configurable outputPath{"outputPath", "pid_features", "Output file base"}; @@ -168,7 +163,6 @@ struct PIDFeatureExtractor { // ============================================================================ // INITIALIZATION FUNCTION // ============================================================================ - /** * @brief Initialize output files and histograms * @@ -178,18 +172,12 @@ struct PIDFeatureExtractor { void init(InitContext const&) { std::string base = outputPath.value; - - // ======================================================================== // ROOT OUTPUT SETUP - // ======================================================================== if (exportROOT) { - // Create ROOT file for storing the TTree outputFile = std::make_unique((base + ".root").c_str(), "RECREATE"); - - // Create TTree with descriptive name and title featureTree = std::make_unique("pid_features", "PID features"); - // Create branches for KINEMATIC VARIABLES + // KINEMATIC VARIABLES featureTree->Branch("event_id", &event_id); featureTree->Branch("track_id", &track_id); featureTree->Branch("px", &px); @@ -203,7 +191,7 @@ struct PIDFeatureExtractor { featureTree->Branch("charge", &charge); featureTree->Branch("track_type", &track_type); - // Create branches for TPC VARIABLES + // TPC VARIABLES featureTree->Branch("tpc_signal", &tpc_signal); featureTree->Branch("tpc_nsigma_pi", &tpc_nsigma_pi); featureTree->Branch("tpc_nsigma_ka", &tpc_nsigma_ka); @@ -212,7 +200,7 @@ struct PIDFeatureExtractor { featureTree->Branch("tpc_nclusters", &tpc_nclusters); featureTree->Branch("tpc_chi2", &tpc_chi2); - // Create branches for TOF VARIABLES + // TOF VARIABLES featureTree->Branch("tof_beta", &tof_beta); featureTree->Branch("tof_mass", &tof_mass); featureTree->Branch("tof_nsigma_pi", &tof_nsigma_pi); @@ -220,30 +208,28 @@ struct PIDFeatureExtractor { featureTree->Branch("tof_nsigma_pr", &tof_nsigma_pr); featureTree->Branch("tof_nsigma_el", &tof_nsigma_el); - // Create branches for BAYESIAN PID VARIABLES + // BAYESIAN PID VARIABLES featureTree->Branch("bayes_prob_pi", &bayes_prob_pi); featureTree->Branch("bayes_prob_ka", &bayes_prob_ka); featureTree->Branch("bayes_prob_pr", &bayes_prob_pr); featureTree->Branch("bayes_prob_el", &bayes_prob_el); - // Create branches for MONTE CARLO TRUTH (simulated data only) + // MONTE CARLO TRUTH (simulated data only) featureTree->Branch("mc_pdg", &mc_pdg); featureTree->Branch("mc_px", &mc_px); featureTree->Branch("mc_py", &mc_py); featureTree->Branch("mc_pz", &mc_pz); - // Create branches for DETECTOR FLAGS + // DETECTOR FLAGS featureTree->Branch("has_tpc", &has_tpc); featureTree->Branch("has_tof", &has_tof); - // Create branches for IMPACT PARAMETERS + // IMPACT PARAMETERS featureTree->Branch("dca_xy", &dca_xy); featureTree->Branch("dca_z", &dca_z); } - // ======================================================================== // CSV OUTPUT SETUP - // ======================================================================== if (exportCSV) { csvFile.open((base + ".csv").c_str()); // Write CSV header with all column names @@ -267,6 +253,13 @@ struct PIDFeatureExtractor { const AxisSpec axisMass{100, -0.2, 2.0, "mass"}; // 100 bins, -0.2 to 2.0 GeV/c² // Add histograms to registry + // HISTOGRAM SETUP + const AxisSpec axisPt{200, 0, 10, "pT"}; + const AxisSpec axisEta{60, -1.5, 1.5, "eta"}; + const AxisSpec axisdEdx{300, 0, 300, "dE/dx"}; + const AxisSpec axisBeta{120, 0, 1.2, "beta"}; + const AxisSpec axisMass{100, -0.2, 2.0, "mass"}; + histos.add("QC/nTracks", "Tracks", kTH1F, {{10000, 0, 100000}}); histos.add("QC/pt", "pT", kTH1F, {axisPt}); histos.add("QC/eta", "eta", kTH1F, {axisEta}); @@ -278,7 +271,6 @@ struct PIDFeatureExtractor { // ============================================================================ // BAYESIAN PID CALCULATION FUNCTION // ============================================================================ - /** * @brief Compute Bayesian probabilities combining TPC and TOF information * @@ -297,8 +289,6 @@ struct PIDFeatureExtractor { void computeBayesianPID(float nsTPC[4], float nsTOF[4], float pri[4], float out[4]) { float sum = 0; - - // Calculate likelihood for each particle species for (int i = 0; i < 4; i++) { // Gaussian likelihood: exp(-0.5 * chi²) // Handle invalid TOF values (NaN) by replacing with 0 contribution @@ -309,8 +299,6 @@ struct PIDFeatureExtractor { out[i] = l * pri[i]; sum += out[i]; } - - // Normalize probabilities so they sum to 1.0 for (int i = 0; i < 4; i++) { out[i] = sum > 0 ? out[i] / sum : 0.f; } @@ -319,7 +307,6 @@ struct PIDFeatureExtractor { // ============================================================================ // MAIN PROCESSING FUNCTION // ============================================================================ - /** * @brief Process collision and track data, extract PID features * @@ -346,14 +333,10 @@ struct PIDFeatureExtractor { > const& tracks, aod::McParticles const& mcParticles) { - // Use static counter to maintain event numbering across process calls static int eventCounter = 0; event_id = eventCounter++; int idx = 0; - // ====================================================================== - // TRACK LOOP - Process each track in the event - // ====================================================================== for (auto& t : tracks) { // ==================================================================== @@ -366,9 +349,7 @@ struct PIDFeatureExtractor { track_id = idx++; - // ==================================================================== - // EXTRACT KINEMATIC VARIABLES - // ==================================================================== + // Kinematics px = t.px(); py = t.py(); pz = t.pz(); @@ -376,14 +357,11 @@ struct PIDFeatureExtractor { p = t.p(); eta = t.eta(); phi = t.phi(); - // Calculate polar angle from pseudorapidity: θ = 2*arctan(exp(-η)) theta = 2.f * atanf(expf(-eta)); charge = t.sign(); // Track charge track_type = t.trackType(); // Track categorization - // ==================================================================== - // EXTRACT TPC INFORMATION - // ==================================================================== + // TPC info has_tpc = t.hasTPC(); if (has_tpc) { // TPC has valid measurement @@ -395,15 +373,12 @@ struct PIDFeatureExtractor { tpc_nclusters = t.tpcNClsFound(); // Quality: number of clusters tpc_chi2 = t.tpcChi2NCl(); // Quality: fit chi-square } else { - // TPC has no valid measurement - set sentinel values tpc_signal = tpc_nsigma_pi = tpc_nsigma_ka = tpc_nsigma_pr = tpc_nsigma_el = -999; tpc_nclusters = 0; tpc_chi2 = -999; } - // ==================================================================== - // EXTRACT TOF INFORMATION - // ==================================================================== + // TOF info has_tof = t.hasTOF(); if (has_tof) { // TOF has valid measurement @@ -414,7 +389,6 @@ struct PIDFeatureExtractor { tof_nsigma_pr = t.tofNSigmaPr(); // Deviation from proton hypothesis tof_nsigma_el = t.tofNSigmaEl(); // Deviation from electron hypothesis } else { - // TOF has no valid measurement - set sentinel values tof_beta = tof_mass = -999; tof_nsigma_pi = tof_nsigma_ka = tof_nsigma_pr = tof_nsigma_el = -999; } @@ -432,18 +406,13 @@ struct PIDFeatureExtractor { float arrTOF[4] = {tof_nsigma_pi, tof_nsigma_ka, tof_nsigma_pr, tof_nsigma_el}; float priors[4] = {1.f, 0.2f, 0.1f, 0.05f}; // Prior prob: π, K, p, e float probs[4]; - - // Compute combined PID probabilities computeBayesianPID(arrTPC, arrTOF, priors, probs); bayes_prob_pi = probs[0]; bayes_prob_ka = probs[1]; bayes_prob_pr = probs[2]; bayes_prob_el = probs[3]; - // ==================================================================== - // EXTRACT MONTE CARLO TRUTH (if available) - // ==================================================================== - // Safely access MC particle information with existence check + // MC truth if (t.has_mcParticle()) { auto mc = t.mcParticle(); mc_pdg = mc.pdgCode(); // Particle identifier code @@ -451,7 +420,6 @@ struct PIDFeatureExtractor { mc_py = mc.py(); mc_pz = mc.pz(); } else { - // No MC match - set sentinel values mc_pdg = 0; mc_px = mc_py = mc_pz = 0; } @@ -502,7 +470,6 @@ struct PIDFeatureExtractor { // ============================================================================ // FINALIZATION FUNCTION // ============================================================================ - /** * @brief Clean up and finalize output files * @@ -511,13 +478,11 @@ struct PIDFeatureExtractor { void finalize() { if (exportROOT) { - // Write TTree to ROOT file and close outputFile->cd(); featureTree->Write(); outputFile->Close(); } if (exportCSV) { - // Close CSV file csvFile.close(); } } @@ -526,7 +491,6 @@ struct PIDFeatureExtractor { // ============================================================================ // WORKFLOW DEFINITION // ============================================================================ - /** * @brief Define the O2Physics workflow * @@ -537,4 +501,3 @@ WorkflowSpec defineDataProcessing(ConfigContext const& cfgc) { return WorkflowSpec{adaptAnalysisTask(cfgc)}; } - \ No newline at end of file From 4a1e8fb977aa0c0e251c5012f0687ff6520c930f Mon Sep 17 00:00:00 2001 From: Robert Forynski Date: Thu, 13 Nov 2025 20:35:15 +0000 Subject: [PATCH 05/15] style: Remove trailing spaces from README.md --- Tools/PIDFeatureExtractor/README.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/Tools/PIDFeatureExtractor/README.md b/Tools/PIDFeatureExtractor/README.md index a37b50db08b..b52986dbdee 100644 --- a/Tools/PIDFeatureExtractor/README.md +++ b/Tools/PIDFeatureExtractor/README.md @@ -464,7 +464,7 @@ df = pd.read_csv("pid_features.csv") df_valid = df[(df['has_tpc'] == True) & (df['has_tof'] == True)] # Prepare features (exclude MC truth for real data) -feature_cols = [col for col in df.columns +feature_cols = [col for col in df.columns if col not in ['event_id', 'track_id', 'mc_pdg', 'mc_px', 'mc_py', 'mc_pz']] X = df_valid[feature_cols].values @@ -846,8 +846,8 @@ ls -lh pid_features.root pid_features.csv --- -**Last Updated:** 2025-11-13 -**Task Version:** 1.0.0 -**O2Physics Compatibility:** Latest -**CCDB Support:** Integrated +**Last Updated:** 2025-11-13 +**Task Version:** 1.0.0 +**O2Physics Compatibility:** Latest +**CCDB Support:** Integrated **Run Method:** `./run.sh` with `myConfigExtractor.json` From 9a5788859163c7477a22256aae8395ba91d7d36e Mon Sep 17 00:00:00 2001 From: Robert Forynski Date: Thu, 13 Nov 2025 20:46:47 +0000 Subject: [PATCH 06/15] fix: Add double quotes around variable expansions in run.sh (shellcheck compliance) --- Tools/PIDFeatureExtractor/run.sh | 48 ++++++++++++++++---------------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/Tools/PIDFeatureExtractor/run.sh b/Tools/PIDFeatureExtractor/run.sh index 7acdc837f42..7f29d7c3436 100755 --- a/Tools/PIDFeatureExtractor/run.sh +++ b/Tools/PIDFeatureExtractor/run.sh @@ -1,27 +1,27 @@ #!/bin/bash +# Copyright 2019-2020 CERN and copyright holders of ALICE O2. +# See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +# All rights not expressly granted are reserved. +# +# This software is distributed under the terms of the GNU General Public +# License v3 (GPL Version 3), copied verbatim in the file "COPYING". +# +# In applying this license CERN does not waive the privileges and immunities +# granted to it by virtue of its status as an Intergovernmental Organization +# or submit itself to any jurisdiction. -OPTION="-b --configuration json://myConfigExtractor.json" +OPTION="$1" +EXECUTABLE="o2-analysis-pid-feature-extractor" -# Specify the executable binary directly (adjust the path as needed) -EXECUTABLE=~/alice/sw/BUILD/O2Physics-latest/O2Physics/stage/bin/o2-analysistutorial-mm-my-example-task-pid-feature-extractor -CONFIG_PATH="$(pwd)/myConfigExtractor.json" -OPTION="-b --configuration json://$CONFIG_PATH" - - -echo "Starting O2Physics PID Feature Extraction Workflow..." -echo "Using configuration: myConfigExtractor.json" - -o2-analysis-timestamp ${OPTION} | \ -o2-analysis-event-selection ${OPTION} | \ -o2-analysis-tracks-extra-v002-converter ${OPTION} | \ -o2-analysis-track-propagation ${OPTION} | \ -o2-analysis-pid-tpc-base ${OPTION} | \ -o2-analysis-pid-tpc ${OPTION} | \ -o2-analysis-pid-tof-base ${OPTION} | \ -o2-analysis-pid-tof ${OPTION} | \ -o2-analysis-pid-tof-beta ${OPTION} | \ -o2-analysis-multiplicity-table ${OPTION} | \ -o2-analysis-mccollision-converter ${OPTION} | \ -${EXECUTABLE} ${OPTION} - -echo "Check output files (e.g. pid_features.csv, pid_features.root)" +o2-analysis-timestamp "${OPTION}" | \ + o2-analysis-event-selection "${OPTION}" | \ + o2-analysis-tracks-extra-v002-converter "${OPTION}" | \ + o2-analysis-track-propagation "${OPTION}" | \ + o2-analysis-pid-tpc-base "${OPTION}" | \ + o2-analysis-pid-tpc "${OPTION}" | \ + o2-analysis-pid-tof-base "${OPTION}" | \ + o2-analysis-pid-tof "${OPTION}" | \ + o2-analysis-pid-tof-beta "${OPTION}" | \ + o2-analysis-multiplicity-table "${OPTION}" | \ + o2-analysis-mccollision-converter "${OPTION}" | \ + "${EXECUTABLE}" "${OPTION}" \ No newline at end of file From 04678d2131a5cf0c567c9c31cd0cf1521771892f Mon Sep 17 00:00:00 2001 From: Robert Forynski Date: Thu, 13 Nov 2025 20:50:05 +0000 Subject: [PATCH 07/15] fix: Update workflow name to match source file name (pid-feature-extractor) --- Tools/PIDFeatureExtractor/CMakeLists.txt | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/Tools/PIDFeatureExtractor/CMakeLists.txt b/Tools/PIDFeatureExtractor/CMakeLists.txt index 7398c52c320..fc0dd9fa75d 100644 --- a/Tools/PIDFeatureExtractor/CMakeLists.txt +++ b/Tools/PIDFeatureExtractor/CMakeLists.txt @@ -9,8 +9,7 @@ # granted to it by virtue of its status as an Intergovernmental Organization # or submit itself to any jurisdiction. -# PID feature extractor -o2physics_add_dpl_workflow(my-example-task-pid-feature-extractor - SOURCES PIDFeatureExtractor/PIDFeatureExtractor.cxx - PUBLIC_LINK_LIBRARIES O2Physics::AnalysisCore - COMPONENT_NAME AnalysisTutorial) \ No newline at end of file +o2physics_add_dpl_workflow(pid-feature-extractor + SOURCES PIDFeatureExtractor.cxx + PUBLIC_LINK_LIBRARIES O2Physics::AnalysisCore + COMPONENT_NAME Analysis) \ No newline at end of file From 80c5a411e57d88ba7caf1f7a09dc4c3d2c815797 Mon Sep 17 00:00:00 2001 From: Robert Forynski Date: Thu, 13 Nov 2025 20:52:06 +0000 Subject: [PATCH 08/15] style: Apply ALICE O2 coding standards (lowerCamelCase, std:: prefixes, constants, docs) --- .../PIDFeatureExtractor.cxx | 452 +++++++----------- 1 file changed, 186 insertions(+), 266 deletions(-) diff --git a/Tools/PIDFeatureExtractor/PIDFeatureExtractor.cxx b/Tools/PIDFeatureExtractor/PIDFeatureExtractor.cxx index 6039233d74c..96206558dff 100644 --- a/Tools/PIDFeatureExtractor/PIDFeatureExtractor.cxx +++ b/Tools/PIDFeatureExtractor/PIDFeatureExtractor.cxx @@ -9,33 +9,28 @@ // granted to it by virtue of its status as an Intergovernmental Organization // or submit itself to any jurisdiction. -#include "Common/DataModel/EventSelection.h" -#include "Common/DataModel/PIDResponse.h" -#include "Common/DataModel/TrackSelectionTables.h" +/// \file PIDFeatureExtractor.cxx +/// \brief Task to extract particle identification features from ALICE AO2D data for machine learning workflows +/// \author Robert Forynski -#include "Framework/ASoAHelpers.h" -#include "Framework/AnalysisTask.h" #include "Framework/runDataProcessing.h" - +#include "Framework/AnalysisTask.h" +#include "Common/DataModel/TrackSelectionTables.h" +#include "Framework/ASoAHelpers.h" +#include "Common/DataModel/PIDResponse.h" +#include "Common/DataModel/EventSelection.h" #include "TFile.h" #include "TTree.h" - -#include #include +#include +#include +#include using namespace o2; using namespace o2::framework; using namespace o2::framework::expressions; -/** - * @struct PIDFeatureExtractor - * @brief O2Physics task for extracting particle identification features from AO2D files - * - * This task processes track data from the ALICE experiment and extracts comprehensive - * PID (Particle Identification) features for machine learning applications. - * It combines TPC and TOF information to compute Bayesian probabilities and saves - * features to both ROOT TTree and CSV formats. - */ +/// PIDFeatureExtractor task for extracting particle identification features from AO2D files struct PIDFeatureExtractor { // ============================================================================ // OUTPUT OBJECTS - File and data structures for feature storage @@ -52,83 +47,78 @@ struct PIDFeatureExtractor { // ============================================================================ // KINEMATIC VARIABLES - Track momentum and position information // ============================================================================ - - int event_id; /// Unique identifier for each collision event - int track_id; /// Track index within the event + int eventId; /// Unique identifier for each collision event + int trackId; /// Track index within the event // Momentum components (in GeV/c) - float px, py, pz; /// Cartesian momentum components - float pt, p; /// Transverse momentum and total momentum + float px, py, pz; /// Cartesian momentum components + float pt, p; /// Transverse momentum and total momentum // Angular variables - float eta; /// Pseudorapidity - float phi; /// Azimuthal angle - float theta; /// Polar angle (calculated from eta) + float eta; /// Pseudorapidity + float phi; /// Azimuthal angle + float theta; /// Polar angle (calculated from eta) // Track properties - int charge; /// Track charge (+1 or -1) - int track_type; /// Type of track (e.g., 0=global, 1=TPC-only, etc.) + int charge; /// Track charge (+1 or -1) + int trackType; /// Type of track (e.g., 0=global, 1=TPC-only, etc.) // ============================================================================ // TPC VARIABLES - Time Projection Chamber PID information // ============================================================================ - float tpc_signal; /// dE/dx energy loss in TPC (specific ionization) + float tpcSignal; /// dE/dx energy loss in TPC (specific ionization) // n-sigma values: standard deviations from expected energy loss for each particle - float tpc_nsigma_pi; /// n-sigma for pion (π) - float tpc_nsigma_ka; /// n-sigma for kaon (K) - float tpc_nsigma_pr; /// n-sigma for proton (p) - float tpc_nsigma_el; /// n-sigma for electron (e) + float tpcNsigmaPi; /// n-sigma for pion (π) + float tpcNsigmaKa; /// n-sigma for kaon (K) + float tpcNsigmaPr; /// n-sigma for proton (p) + float tpcNsigmaEl; /// n-sigma for electron (e) // Track quality variables - int tpc_nclusters; /// Number of TPC clusters used in track fit - float tpc_chi2; /// Chi-square per degree of freedom of TPC fit + int tpcNclusters; /// Number of TPC clusters used in track fit + float tpcChi2; /// Chi-square per degree of freedom of TPC fit // ============================================================================ // TOF VARIABLES - Time-Of-Flight PID information // ============================================================================ - - float tof_beta; /// β = v/c (velocity over speed of light) - float tof_mass; /// Reconstructed mass from TOF measurement + float tofBeta; /// β = v/c (velocity over speed of light) + float tofMass; /// Reconstructed mass from TOF measurement // n-sigma values for TOF detection - float tof_nsigma_pi; /// n-sigma for pion in TOF - float tof_nsigma_ka; /// n-sigma for kaon in TOF - float tof_nsigma_pr; /// n-sigma for proton in TOF - float tof_nsigma_el; /// n-sigma for electron in TOF + float tofNsigmaPi; /// n-sigma for pion in TOF + float tofNsigmaKa; /// n-sigma for kaon in TOF + float tofNsigmaPr; /// n-sigma for proton in TOF + float tofNsigmaEl; /// n-sigma for electron in TOF // ============================================================================ // BAYESIAN PID VARIABLES - Combined PID probabilities // ============================================================================ /// Bayesian probability that track is a pion (probability sum = 1.0) - float bayes_prob_pi; + float bayesProbPi; /// Bayesian probability that track is a kaon - float bayes_prob_ka; + float bayesProbKa; /// Bayesian probability that track is a proton - float bayes_prob_pr; + float bayesProbPr; /// Bayesian probability that track is an electron - float bayes_prob_el; + float bayesProbEl; // ============================================================================ // MONTE CARLO TRUTH INFORMATION - For simulated data // ============================================================================ - - int mc_pdg; /// PDG code of true particle (0 if no MC match) - float mc_px, mc_py, mc_pz; /// True momentum components from simulation + int mcPdg; /// PDG code of true particle (0 if no MC match) + float mcPx, mcPy, mcPz; /// True momentum components from simulation // ============================================================================ // DETECTOR AVAILABILITY FLAGS // ============================================================================ - - bool has_tpc; /// Flag: track has TPC information - bool has_tof; /// Flag: track has TOF information + bool hasTpc; /// Flag: track has TPC information + bool hasTof; /// Flag: track has TOF information // ============================================================================ // TRACK IMPACT PARAMETERS - Quality and background rejection // ============================================================================ - - float dca_xy; /// Distance of closest approach in xy-plane - float dca_z; /// Distance of closest approach in z-direction + float dcaXy; /// Distance of closest approach in xy-plane + float dcaZ; /// Distance of closest approach in z-direction // ============================================================================ // HISTOGRAM REGISTRY - Quality control histograms @@ -160,26 +150,31 @@ struct PIDFeatureExtractor { /// Maximum transverse momentum cut (GeV/c) Configurable ptMax{"ptMax", 20.0f, "Maximum pT"}; + // ============================================================================ + // CONSTANTS + // ============================================================================ + static constexpr int kNumSpecies = 4; + static constexpr float kPriorPi = 1.0f; + static constexpr float kPriorKa = 0.2f; + static constexpr float kPriorPr = 0.1f; + static constexpr float kPriorEl = 0.05f; + static constexpr float kSentinelValue = -999.0f; + // ============================================================================ // INITIALIZATION FUNCTION // ============================================================================ - /** - * @brief Initialize output files and histograms - * - * Called once at task startup. Creates ROOT TTree and CSV file headers, - * and initializes all quality control histograms. - */ - void init(InitContext const&) - { + /// Initialize output files and histograms + void init(InitContext const&) { std::string base = outputPath.value; + // ROOT OUTPUT SETUP if (exportROOT) { outputFile = std::make_unique((base + ".root").c_str(), "RECREATE"); featureTree = std::make_unique("pid_features", "PID features"); // KINEMATIC VARIABLES - featureTree->Branch("event_id", &event_id); - featureTree->Branch("track_id", &track_id); + featureTree->Branch("eventId", &eventId); + featureTree->Branch("trackId", &trackId); featureTree->Branch("px", &px); featureTree->Branch("py", &py); featureTree->Branch("pz", &pz); @@ -189,70 +184,57 @@ struct PIDFeatureExtractor { featureTree->Branch("phi", &phi); featureTree->Branch("theta", &theta); featureTree->Branch("charge", &charge); - featureTree->Branch("track_type", &track_type); + featureTree->Branch("trackType", &trackType); // TPC VARIABLES - featureTree->Branch("tpc_signal", &tpc_signal); - featureTree->Branch("tpc_nsigma_pi", &tpc_nsigma_pi); - featureTree->Branch("tpc_nsigma_ka", &tpc_nsigma_ka); - featureTree->Branch("tpc_nsigma_pr", &tpc_nsigma_pr); - featureTree->Branch("tpc_nsigma_el", &tpc_nsigma_el); - featureTree->Branch("tpc_nclusters", &tpc_nclusters); - featureTree->Branch("tpc_chi2", &tpc_chi2); + featureTree->Branch("tpcSignal", &tpcSignal); + featureTree->Branch("tpcNsigmaPi", &tpcNsigmaPi); + featureTree->Branch("tpcNsigmaKa", &tpcNsigmaKa); + featureTree->Branch("tpcNsigmaPr", &tpcNsigmaPr); + featureTree->Branch("tpcNsigmaEl", &tpcNsigmaEl); + featureTree->Branch("tpcNclusters", &tpcNclusters); + featureTree->Branch("tpcChi2", &tpcChi2); // TOF VARIABLES - featureTree->Branch("tof_beta", &tof_beta); - featureTree->Branch("tof_mass", &tof_mass); - featureTree->Branch("tof_nsigma_pi", &tof_nsigma_pi); - featureTree->Branch("tof_nsigma_ka", &tof_nsigma_ka); - featureTree->Branch("tof_nsigma_pr", &tof_nsigma_pr); - featureTree->Branch("tof_nsigma_el", &tof_nsigma_el); + featureTree->Branch("tofBeta", &tofBeta); + featureTree->Branch("tofMass", &tofMass); + featureTree->Branch("tofNsigmaPi", &tofNsigmaPi); + featureTree->Branch("tofNsigmaKa", &tofNsigmaKa); + featureTree->Branch("tofNsigmaPr", &tofNsigmaPr); + featureTree->Branch("tofNsigmaEl", &tofNsigmaEl); // BAYESIAN PID VARIABLES - featureTree->Branch("bayes_prob_pi", &bayes_prob_pi); - featureTree->Branch("bayes_prob_ka", &bayes_prob_ka); - featureTree->Branch("bayes_prob_pr", &bayes_prob_pr); - featureTree->Branch("bayes_prob_el", &bayes_prob_el); + featureTree->Branch("bayesProbPi", &bayesProbPi); + featureTree->Branch("bayesProbKa", &bayesProbKa); + featureTree->Branch("bayesProbPr", &bayesProbPr); + featureTree->Branch("bayesProbEl", &bayesProbEl); - // MONTE CARLO TRUTH (simulated data only) - featureTree->Branch("mc_pdg", &mc_pdg); - featureTree->Branch("mc_px", &mc_px); - featureTree->Branch("mc_py", &mc_py); - featureTree->Branch("mc_pz", &mc_pz); + // MONTE CARLO TRUTH + featureTree->Branch("mcPdg", &mcPdg); + featureTree->Branch("mcPx", &mcPx); + featureTree->Branch("mcPy", &mcPy); + featureTree->Branch("mcPz", &mcPz); // DETECTOR FLAGS - featureTree->Branch("has_tpc", &has_tpc); - featureTree->Branch("has_tof", &has_tof); + featureTree->Branch("hasTpc", &hasTpc); + featureTree->Branch("hasTof", &hasTof); // IMPACT PARAMETERS - featureTree->Branch("dca_xy", &dca_xy); - featureTree->Branch("dca_z", &dca_z); + featureTree->Branch("dcaXy", &dcaXy); + featureTree->Branch("dcaZ", &dcaZ); } // CSV OUTPUT SETUP if (exportCSV) { csvFile.open((base + ".csv").c_str()); - // Write CSV header with all column names - csvFile << "event_id,track_id,px,py,pz,pt,p,eta,phi,theta,charge,track_type," - "tpc_signal,tpc_nsigma_pi,tpc_nsigma_ka,tpc_nsigma_pr,tpc_nsigma_el," - "tpc_nclusters,tpc_chi2," - "tof_beta,tof_mass,tof_nsigma_pi,tof_nsigma_ka,tof_nsigma_pr,tof_nsigma_el," - "bayes_prob_pi,bayes_prob_ka,bayes_prob_pr,bayes_prob_el," - "mc_pdg,mc_px,mc_py,mc_pz,has_tpc,has_tof,dca_xy,dca_z\n"; + csvFile << "eventId,trackId,px,py,pz,pt,p,eta,phi,theta,charge,trackType," + "tpcSignal,tpcNsigmaPi,tpcNsigmaKa,tpcNsigmaPr,tpcNsigmaEl," + "tpcNclusters,tpcChi2," + "tofBeta,tofMass,tofNsigmaPi,tofNsigmaKa,tofNsigmaPr,tofNsigmaEl," + "bayesProbPi,bayesProbKa,bayesProbPr,bayesProbEl," + "mcPdg,mcPx,mcPy,mcPz,hasTpc,hasTof,dcaXy,dcaZ\n"; } - // ======================================================================== - // HISTOGRAM SETUP - Quality Control Plots - // ======================================================================== - - // Define histogram axes with binning - const AxisSpec axisPt{200, 0, 10, "pT"}; // 200 bins, 0-10 GeV/c - const AxisSpec axisEta{60, -1.5, 1.5, "eta"}; // 60 bins, -1.5 to 1.5 - const AxisSpec axisdEdx{300, 0, 300, "dE/dx"}; // 300 bins, 0-300 - const AxisSpec axisBeta{120, 0, 1.2, "beta"}; // 120 bins, 0 to 1.2 - const AxisSpec axisMass{100, -0.2, 2.0, "mass"}; // 100 bins, -0.2 to 2.0 GeV/c² - - // Add histograms to registry // HISTOGRAM SETUP const AxisSpec axisPt{200, 0, 10, "pT"}; const AxisSpec axisEta{60, -1.5, 1.5, "eta"}; @@ -271,83 +253,51 @@ struct PIDFeatureExtractor { // ============================================================================ // BAYESIAN PID CALCULATION FUNCTION // ============================================================================ - /** - * @brief Compute Bayesian probabilities combining TPC and TOF information - * - * Uses Gaussian likelihood in n-sigma space and Bayesian inference to combine - * TPC dE/dx and TOF mass measurements. - * - * @param[in] nsTPC[4] n-sigma values for [pion, kaon, proton, electron] from TPC - * @param[in] nsTOF[4] n-sigma values for [pion, kaon, proton, electron] from TOF - * @param[in] pri[4] Prior probabilities for each particle hypothesis - * @param[out] out[4] Output Bayesian probabilities (normalized to sum=1) - * - * Formula: P(particle|TPC,TOF) ∝ P(TPC|particle) * P(TOF|particle) * P(particle) - * - * Likelihood: L_i = exp(-0.5 * (ns_TPC_i² + ns_TOF_i²)) - */ - void computeBayesianPID(float nsTPC[4], float nsTOF[4], float pri[4], float out[4]) - { + /// Compute Bayesian probabilities combining TPC and TOF information + void computeBayesianPID(const float nsTPC[kNumSpecies], const float nsTOF[kNumSpecies], const float pri[kNumSpecies], float out[kNumSpecies]) { float sum = 0; - for (int i = 0; i < 4; i++) { - // Gaussian likelihood: exp(-0.5 * chi²) - // Handle invalid TOF values (NaN) by replacing with 0 contribution - float l = std::exp(-0.5f * (nsTPC[i] * nsTPC[i] + - (std::isfinite(nsTOF[i]) ? nsTOF[i] * nsTOF[i] : 0.f))); - // Apply prior probability and accumulate + for (int i = 0; i < kNumSpecies; i++) { + float l = std::exp(-0.5f * (nsTPC[i]*nsTPC[i] + + (std::isfinite(nsTOF[i]) ? nsTOF[i]*nsTOF[i] : 0.0f))); + out[i] = l * pri[i]; sum += out[i]; } - for (int i = 0; i < 4; i++) { - out[i] = sum > 0 ? out[i] / sum : 0.f; + + for (int i = 0; i < kNumSpecies; i++) { + out[i] = sum > 0 ? out[i] / sum : 0.0f; } } // ============================================================================ // MAIN PROCESSING FUNCTION // ============================================================================ - /** - * @brief Process collision and track data, extract PID features - * - * Called for each collision event in the input data. Applies track selections, - * extracts features from TPC and TOF detectors, computes Bayesian PID, - * and writes output to ROOT and/or CSV. - * - * @param collision Collision event data - * @param tracks Table of tracks with all associated PID information - * @param mcParticles Monte Carlo particle information (for simulated data) - */ + /// Process collision and track data, extract PID features void process( aod::Collision const& collision, soa::Join< - aod::Tracks, // Base track properties - aod::TracksExtra, // Extended track info - aod::TracksDCA, // Impact parameters (DCA) - aod::pidTPCPi, aod::pidTPCKa, aod::pidTPCPr, // TPC PID for pion, kaon, proton - aod::pidTPCEl, // TPC PID for electron - aod::pidTOFPi, aod::pidTOFKa, aod::pidTOFPr, // TOF PID for pion, kaon, proton - aod::pidTOFEl, // TOF PID for electron - aod::pidTOFmass, aod::pidTOFbeta, // TOF mass and beta - aod::McTrackLabels // MC truth matching - > const& tracks, + aod::Tracks, + aod::TracksExtra, + aod::TracksDCA, + aod::pidTPCPi, aod::pidTPCKa, aod::pidTPCPr, + aod::pidTPCEl, + aod::pidTOFPi, aod::pidTOFKa, aod::pidTOFPr, + aod::pidTOFEl, + aod::pidTOFmass, aod::pidTOFbeta, + aod::McTrackLabels + > const& tracks, aod::McParticles const& mcParticles) { static int eventCounter = 0; - event_id = eventCounter++; + eventId = eventCounter++; int idx = 0; - for (auto& t : tracks) { + for (const auto& t : tracks) { + if (t.pt() < ptMin || t.pt() > ptMax) continue; + if (t.eta() < etaMin || t.eta() > etaMax) continue; - // ==================================================================== - // TRACK SELECTION - Apply kinematic cuts - // ==================================================================== - if (t.pt() < ptMin || t.pt() > ptMax) - continue; // Apply pT cut - if (t.eta() < etaMin || t.eta() > etaMax) - continue; // Apply eta cut - - track_id = idx++; + trackId = idx++; // Kinematics px = t.px(); @@ -357,112 +307,93 @@ struct PIDFeatureExtractor { p = t.p(); eta = t.eta(); phi = t.phi(); - theta = 2.f * atanf(expf(-eta)); - charge = t.sign(); // Track charge - track_type = t.trackType(); // Track categorization + theta = 2.0f * std::atanf(std::expf(-eta)); + charge = t.sign(); + trackType = t.trackType(); // TPC info - has_tpc = t.hasTPC(); - if (has_tpc) { - // TPC has valid measurement - tpc_signal = t.tpcSignal(); // dE/dx specific ionization - tpc_nsigma_pi = t.tpcNSigmaPi(); // Deviation from pion hypothesis - tpc_nsigma_ka = t.tpcNSigmaKa(); // Deviation from kaon hypothesis - tpc_nsigma_pr = t.tpcNSigmaPr(); // Deviation from proton hypothesis - tpc_nsigma_el = t.tpcNSigmaEl(); // Deviation from electron hypothesis - tpc_nclusters = t.tpcNClsFound(); // Quality: number of clusters - tpc_chi2 = t.tpcChi2NCl(); // Quality: fit chi-square + hasTpc = t.hasTPC(); + if (hasTpc) { + tpcSignal = t.tpcSignal(); + tpcNsigmaPi = t.tpcNSigmaPi(); + tpcNsigmaKa = t.tpcNSigmaKa(); + tpcNsigmaPr = t.tpcNSigmaPr(); + tpcNsigmaEl = t.tpcNSigmaEl(); + tpcNclusters = t.tpcNClsFound(); + tpcChi2 = t.tpcChi2NCl(); } else { - tpc_signal = tpc_nsigma_pi = tpc_nsigma_ka = tpc_nsigma_pr = tpc_nsigma_el = -999; - tpc_nclusters = 0; - tpc_chi2 = -999; + tpcSignal = tpcNsigmaPi = tpcNsigmaKa = tpcNsigmaPr = tpcNsigmaEl = kSentinelValue; + tpcNclusters = 0; + tpcChi2 = kSentinelValue; } // TOF info - has_tof = t.hasTOF(); - if (has_tof) { - // TOF has valid measurement - tof_beta = t.beta(); // Velocity over c - tof_mass = t.mass(); // Reconstructed mass - tof_nsigma_pi = t.tofNSigmaPi(); // Deviation from pion hypothesis - tof_nsigma_ka = t.tofNSigmaKa(); // Deviation from kaon hypothesis - tof_nsigma_pr = t.tofNSigmaPr(); // Deviation from proton hypothesis - tof_nsigma_el = t.tofNSigmaEl(); // Deviation from electron hypothesis + hasTof = t.hasTOF(); + if (hasTof) { + tofBeta = t.beta(); + tofMass = t.mass(); + tofNsigmaPi = t.tofNSigmaPi(); + tofNsigmaKa = t.tofNSigmaKa(); + tofNsigmaPr = t.tofNSigmaPr(); + tofNsigmaEl = t.tofNSigmaEl(); } else { - tof_beta = tof_mass = -999; - tof_nsigma_pi = tof_nsigma_ka = tof_nsigma_pr = tof_nsigma_el = -999; + tofBeta = tofMass = kSentinelValue; + tofNsigmaPi = tofNsigmaKa = tofNsigmaPr = tofNsigmaEl = kSentinelValue; } - // ==================================================================== - // EXTRACT IMPACT PARAMETERS (track quality) - // ==================================================================== - dca_xy = t.dcaXY(); // Distance of closest approach in transverse plane - dca_z = t.dcaZ(); // Distance of closest approach along beam axis - - // ==================================================================== - // COMPUTE BAYESIAN PID - // ==================================================================== - float arrTPC[4] = {tpc_nsigma_pi, tpc_nsigma_ka, tpc_nsigma_pr, tpc_nsigma_el}; - float arrTOF[4] = {tof_nsigma_pi, tof_nsigma_ka, tof_nsigma_pr, tof_nsigma_el}; - float priors[4] = {1.f, 0.2f, 0.1f, 0.05f}; // Prior prob: π, K, p, e - float probs[4]; + // Impact parameters + dcaXy = t.dcaXY(); + dcaZ = t.dcaZ(); + + // Bayesian PID calculation + float arrTPC[kNumSpecies] = {tpcNsigmaPi, tpcNsigmaKa, tpcNsigmaPr, tpcNsigmaEl}; + float arrTOF[kNumSpecies] = {tofNsigmaPi, tofNsigmaKa, tofNsigmaPr, tofNsigmaEl}; + float priors[kNumSpecies] = {kPriorPi, kPriorKa, kPriorPr, kPriorEl}; + float probs[kNumSpecies]; + computeBayesianPID(arrTPC, arrTOF, priors, probs); - bayes_prob_pi = probs[0]; - bayes_prob_ka = probs[1]; - bayes_prob_pr = probs[2]; - bayes_prob_el = probs[3]; + bayesProbPi = probs[0]; + bayesProbKa = probs[1]; + bayesProbPr = probs[2]; + bayesProbEl = probs[3]; // MC truth if (t.has_mcParticle()) { auto mc = t.mcParticle(); - mc_pdg = mc.pdgCode(); // Particle identifier code - mc_px = mc.px(); // True momentum components - mc_py = mc.py(); - mc_pz = mc.pz(); + mcPdg = mc.pdgCode(); + mcPx = mc.px(); + mcPy = mc.py(); + mcPz = mc.pz(); } else { - mc_pdg = 0; - mc_px = mc_py = mc_pz = 0; + mcPdg = 0; + mcPx = mcPy = mcPz = 0; } - // ==================================================================== - // WRITE OUTPUT - // ==================================================================== - - // Write to ROOT TTree - if (exportROOT) - featureTree->Fill(); - - // Write to CSV file + // Write outputs + if (exportROOT) featureTree->Fill(); if (exportCSV) { - csvFile << event_id << "," << track_id << "," + csvFile << eventId << "," << trackId << "," << px << "," << py << "," << pz << "," << pt << "," << p << "," << eta << "," << phi << "," << theta << "," - << charge << "," << track_type << "," - << tpc_signal << "," << tpc_nsigma_pi << "," << tpc_nsigma_ka << "," << tpc_nsigma_pr << "," << tpc_nsigma_el << "," - << tpc_nclusters << "," << tpc_chi2 << "," - << tof_beta << "," << tof_mass << "," << tof_nsigma_pi << "," << tof_nsigma_ka << "," << tof_nsigma_pr << "," << tof_nsigma_el << "," - << bayes_prob_pi << "," << bayes_prob_ka << "," << bayes_prob_pr << "," << bayes_prob_el << "," - << mc_pdg << "," << mc_px << "," << mc_py << "," << mc_pz << "," - << has_tpc << "," << has_tof << "," - << dca_xy << "," << dca_z << "\n"; + << charge << "," << trackType << "," + << tpcSignal << "," << tpcNsigmaPi << "," << tpcNsigmaKa << "," << tpcNsigmaPr << "," << tpcNsigmaEl << "," + << tpcNclusters << "," << tpcChi2 << "," + << tofBeta << "," << tofMass << "," << tofNsigmaPi << "," << tofNsigmaKa << "," << tofNsigmaPr << "," << tofNsigmaEl << "," + << bayesProbPi << "," << bayesProbKa << "," << bayesProbPr << "," << bayesProbEl << "," + << mcPdg << "," << mcPx << "," << mcPy << "," << mcPz << "," + << hasTpc << "," << hasTof << "," + << dcaXy << "," << dcaZ << "\n"; } - // ==================================================================== - // FILL QUALITY CONTROL HISTOGRAMS - // ==================================================================== - histos.fill(HIST("QC/nTracks"), 1); // Count total tracks processed - histos.fill(HIST("QC/pt"), pt); // pT distribution - histos.fill(HIST("QC/eta"), eta); // eta distribution - - // TPC dE/dx vs pT (only if TPC measurement exists) - if (has_tpc) - histos.fill(HIST("QC/tpc_dEdx_vs_pt"), pt, tpc_signal); - - // TOF beta and mass vs momentum (only if TOF measurement exists) - if (has_tof) { - histos.fill(HIST("QC/tof_beta_vs_p"), p, tof_beta); - histos.fill(HIST("QC/mass_vs_p"), p, tof_mass); + // Fill QC histograms + histos.fill(HIST("QC/nTracks"), 1); + histos.fill(HIST("QC/pt"), pt); + histos.fill(HIST("QC/eta"), eta); + if (hasTpc) histos.fill(HIST("QC/tpc_dEdx_vs_pt"), pt, tpcSignal); + if (hasTof) { + histos.fill(HIST("QC/tof_beta_vs_p"), p, tofBeta); + histos.fill(HIST("QC/mass_vs_p"), p, tofMass); } } } @@ -470,13 +401,8 @@ struct PIDFeatureExtractor { // ============================================================================ // FINALIZATION FUNCTION // ============================================================================ - /** - * @brief Clean up and finalize output files - * - * Called at task completion. Writes TTree to file and closes all output files. - */ - void finalize() - { + /// Clean up and finalize output files + void finalize() { if (exportROOT) { outputFile->cd(); featureTree->Write(); @@ -491,13 +417,7 @@ struct PIDFeatureExtractor { // ============================================================================ // WORKFLOW DEFINITION // ============================================================================ -/** - * @brief Define the O2Physics workflow - * - * This function creates and registers the PIDFeatureExtractor task - * into the O2 data processing workflow. - */ -WorkflowSpec defineDataProcessing(ConfigContext const& cfgc) -{ +/// Define the O2Physics workflow +WorkflowSpec defineDataProcessing(ConfigContext const& cfgc) { return WorkflowSpec{adaptAnalysisTask(cfgc)}; -} +} \ No newline at end of file From 38b58de020e0845243ffe8318f6f7355cba3dd35 Mon Sep 17 00:00:00 2001 From: Robert Forynski Date: Thu, 13 Nov 2025 20:56:28 +0000 Subject: [PATCH 09/15] style: Fix workflow name to pidFeatureExtractor and use UpperCamelCase for constants --- Tools/PIDFeatureExtractor/CMakeLists.txt | 4 +-- .../PIDFeatureExtractor.cxx | 34 +++++++++---------- 2 files changed, 19 insertions(+), 19 deletions(-) diff --git a/Tools/PIDFeatureExtractor/CMakeLists.txt b/Tools/PIDFeatureExtractor/CMakeLists.txt index fc0dd9fa75d..297cd8a5798 100644 --- a/Tools/PIDFeatureExtractor/CMakeLists.txt +++ b/Tools/PIDFeatureExtractor/CMakeLists.txt @@ -9,7 +9,7 @@ # granted to it by virtue of its status as an Intergovernmental Organization # or submit itself to any jurisdiction. -o2physics_add_dpl_workflow(pid-feature-extractor +o2physics_add_dpl_workflow(pidFeatureExtractor SOURCES PIDFeatureExtractor.cxx PUBLIC_LINK_LIBRARIES O2Physics::AnalysisCore - COMPONENT_NAME Analysis) \ No newline at end of file + COMPONENT_NAME Analysis) diff --git a/Tools/PIDFeatureExtractor/PIDFeatureExtractor.cxx b/Tools/PIDFeatureExtractor/PIDFeatureExtractor.cxx index 96206558dff..ca1709d3a76 100644 --- a/Tools/PIDFeatureExtractor/PIDFeatureExtractor.cxx +++ b/Tools/PIDFeatureExtractor/PIDFeatureExtractor.cxx @@ -153,12 +153,12 @@ struct PIDFeatureExtractor { // ============================================================================ // CONSTANTS // ============================================================================ - static constexpr int kNumSpecies = 4; - static constexpr float kPriorPi = 1.0f; - static constexpr float kPriorKa = 0.2f; - static constexpr float kPriorPr = 0.1f; - static constexpr float kPriorEl = 0.05f; - static constexpr float kSentinelValue = -999.0f; + static constexpr int KNumSpecies = 4; + static constexpr float KPriorPi = 1.0f; + static constexpr float KPriorKa = 0.2f; + static constexpr float KPriorPr = 0.1f; + static constexpr float KPriorEl = 0.05f; + static constexpr float KSentinelValue = -999.0f; // ============================================================================ // INITIALIZATION FUNCTION @@ -254,10 +254,10 @@ struct PIDFeatureExtractor { // BAYESIAN PID CALCULATION FUNCTION // ============================================================================ /// Compute Bayesian probabilities combining TPC and TOF information - void computeBayesianPID(const float nsTPC[kNumSpecies], const float nsTOF[kNumSpecies], const float pri[kNumSpecies], float out[kNumSpecies]) { + void computeBayesianPID(const float nsTPC[KNumSpecies], const float nsTOF[KNumSpecies], const float pri[KNumSpecies], float out[KNumSpecies]) { float sum = 0; - for (int i = 0; i < kNumSpecies; i++) { + for (int i = 0; i < KNumSpecies; i++) { float l = std::exp(-0.5f * (nsTPC[i]*nsTPC[i] + (std::isfinite(nsTOF[i]) ? nsTOF[i]*nsTOF[i] : 0.0f))); @@ -265,7 +265,7 @@ struct PIDFeatureExtractor { sum += out[i]; } - for (int i = 0; i < kNumSpecies; i++) { + for (int i = 0; i < KNumSpecies; i++) { out[i] = sum > 0 ? out[i] / sum : 0.0f; } } @@ -322,9 +322,9 @@ struct PIDFeatureExtractor { tpcNclusters = t.tpcNClsFound(); tpcChi2 = t.tpcChi2NCl(); } else { - tpcSignal = tpcNsigmaPi = tpcNsigmaKa = tpcNsigmaPr = tpcNsigmaEl = kSentinelValue; + tpcSignal = tpcNsigmaPi = tpcNsigmaKa = tpcNsigmaPr = tpcNsigmaEl = KSentinelValue; tpcNclusters = 0; - tpcChi2 = kSentinelValue; + tpcChi2 = KSentinelValue; } // TOF info @@ -337,8 +337,8 @@ struct PIDFeatureExtractor { tofNsigmaPr = t.tofNSigmaPr(); tofNsigmaEl = t.tofNSigmaEl(); } else { - tofBeta = tofMass = kSentinelValue; - tofNsigmaPi = tofNsigmaKa = tofNsigmaPr = tofNsigmaEl = kSentinelValue; + tofBeta = tofMass = KSentinelValue; + tofNsigmaPi = tofNsigmaKa = tofNsigmaPr = tofNsigmaEl = KSentinelValue; } // Impact parameters @@ -346,10 +346,10 @@ struct PIDFeatureExtractor { dcaZ = t.dcaZ(); // Bayesian PID calculation - float arrTPC[kNumSpecies] = {tpcNsigmaPi, tpcNsigmaKa, tpcNsigmaPr, tpcNsigmaEl}; - float arrTOF[kNumSpecies] = {tofNsigmaPi, tofNsigmaKa, tofNsigmaPr, tofNsigmaEl}; - float priors[kNumSpecies] = {kPriorPi, kPriorKa, kPriorPr, kPriorEl}; - float probs[kNumSpecies]; + float arrTPC[KNumSpecies] = {tpcNsigmaPi, tpcNsigmaKa, tpcNsigmaPr, tpcNsigmaEl}; + float arrTOF[KNumSpecies] = {tofNsigmaPi, tofNsigmaKa, tofNsigmaPr, tofNsigmaEl}; + float priors[KNumSpecies] = {KPriorPi, KPriorKa, KPriorPr, KPriorEl}; + float probs[KNumSpecies]; computeBayesianPID(arrTPC, arrTOF, priors, probs); bayesProbPi = probs[0]; From 7758661a7d750a305d55e928791b21c6d3fabf47 Mon Sep 17 00:00:00 2001 From: Robert Forynski Date: Thu, 13 Nov 2025 20:59:57 +0000 Subject: [PATCH 10/15] style: Apply clang-format formatting standards --- .../PIDFeatureExtractor.cxx | 98 ++++++++++--------- 1 file changed, 54 insertions(+), 44 deletions(-) diff --git a/Tools/PIDFeatureExtractor/PIDFeatureExtractor.cxx b/Tools/PIDFeatureExtractor/PIDFeatureExtractor.cxx index ca1709d3a76..1bf18dd2d17 100644 --- a/Tools/PIDFeatureExtractor/PIDFeatureExtractor.cxx +++ b/Tools/PIDFeatureExtractor/PIDFeatureExtractor.cxx @@ -13,18 +13,21 @@ /// \brief Task to extract particle identification features from ALICE AO2D data for machine learning workflows /// \author Robert Forynski -#include "Framework/runDataProcessing.h" -#include "Framework/AnalysisTask.h" +#include "Common/DataModel/EventSelection.h" +#include "Common/DataModel/PIDResponse.h" #include "Common/DataModel/TrackSelectionTables.h" + #include "Framework/ASoAHelpers.h" -#include "Common/DataModel/PIDResponse.h" -#include "Common/DataModel/EventSelection.h" +#include "Framework/AnalysisTask.h" +#include "Framework/runDataProcessing.h" + #include "TFile.h" #include "TTree.h" -#include + #include -#include +#include #include +#include using namespace o2; using namespace o2::framework; @@ -51,44 +54,44 @@ struct PIDFeatureExtractor { int trackId; /// Track index within the event // Momentum components (in GeV/c) - float px, py, pz; /// Cartesian momentum components - float pt, p; /// Transverse momentum and total momentum + float px, py, pz; /// Cartesian momentum components + float pt, p; /// Transverse momentum and total momentum // Angular variables - float eta; /// Pseudorapidity - float phi; /// Azimuthal angle - float theta; /// Polar angle (calculated from eta) + float eta; /// Pseudorapidity + float phi; /// Azimuthal angle + float theta; /// Polar angle (calculated from eta) // Track properties - int charge; /// Track charge (+1 or -1) - int trackType; /// Type of track (e.g., 0=global, 1=TPC-only, etc.) + int charge; /// Track charge (+1 or -1) + int trackType; /// Type of track (e.g., 0=global, 1=TPC-only, etc.) // ============================================================================ // TPC VARIABLES - Time Projection Chamber PID information // ============================================================================ - float tpcSignal; /// dE/dx energy loss in TPC (specific ionization) + float tpcSignal; /// dE/dx energy loss in TPC (specific ionization) // n-sigma values: standard deviations from expected energy loss for each particle - float tpcNsigmaPi; /// n-sigma for pion (π) - float tpcNsigmaKa; /// n-sigma for kaon (K) - float tpcNsigmaPr; /// n-sigma for proton (p) - float tpcNsigmaEl; /// n-sigma for electron (e) + float tpcNsigmaPi; /// n-sigma for pion (π) + float tpcNsigmaKa; /// n-sigma for kaon (K) + float tpcNsigmaPr; /// n-sigma for proton (p) + float tpcNsigmaEl; /// n-sigma for electron (e) // Track quality variables - int tpcNclusters; /// Number of TPC clusters used in track fit - float tpcChi2; /// Chi-square per degree of freedom of TPC fit + int tpcNclusters; /// Number of TPC clusters used in track fit + float tpcChi2; /// Chi-square per degree of freedom of TPC fit // ============================================================================ // TOF VARIABLES - Time-Of-Flight PID information // ============================================================================ - float tofBeta; /// β = v/c (velocity over speed of light) - float tofMass; /// Reconstructed mass from TOF measurement + float tofBeta; /// β = v/c (velocity over speed of light) + float tofMass; /// Reconstructed mass from TOF measurement // n-sigma values for TOF detection - float tofNsigmaPi; /// n-sigma for pion in TOF - float tofNsigmaKa; /// n-sigma for kaon in TOF - float tofNsigmaPr; /// n-sigma for proton in TOF - float tofNsigmaEl; /// n-sigma for electron in TOF + float tofNsigmaPi; /// n-sigma for pion in TOF + float tofNsigmaKa; /// n-sigma for kaon in TOF + float tofNsigmaPr; /// n-sigma for proton in TOF + float tofNsigmaEl; /// n-sigma for electron in TOF // ============================================================================ // BAYESIAN PID VARIABLES - Combined PID probabilities @@ -105,20 +108,20 @@ struct PIDFeatureExtractor { // ============================================================================ // MONTE CARLO TRUTH INFORMATION - For simulated data // ============================================================================ - int mcPdg; /// PDG code of true particle (0 if no MC match) - float mcPx, mcPy, mcPz; /// True momentum components from simulation + int mcPdg; /// PDG code of true particle (0 if no MC match) + float mcPx, mcPy, mcPz; /// True momentum components from simulation // ============================================================================ // DETECTOR AVAILABILITY FLAGS // ============================================================================ - bool hasTpc; /// Flag: track has TPC information - bool hasTof; /// Flag: track has TOF information + bool hasTpc; /// Flag: track has TPC information + bool hasTof; /// Flag: track has TOF information // ============================================================================ // TRACK IMPACT PARAMETERS - Quality and background rejection // ============================================================================ - float dcaXy; /// Distance of closest approach in xy-plane - float dcaZ; /// Distance of closest approach in z-direction + float dcaXy; /// Distance of closest approach in xy-plane + float dcaZ; /// Distance of closest approach in z-direction // ============================================================================ // HISTOGRAM REGISTRY - Quality control histograms @@ -164,7 +167,8 @@ struct PIDFeatureExtractor { // INITIALIZATION FUNCTION // ============================================================================ /// Initialize output files and histograms - void init(InitContext const&) { + void init(InitContext const&) + { std::string base = outputPath.value; // ROOT OUTPUT SETUP @@ -254,12 +258,13 @@ struct PIDFeatureExtractor { // BAYESIAN PID CALCULATION FUNCTION // ============================================================================ /// Compute Bayesian probabilities combining TPC and TOF information - void computeBayesianPID(const float nsTPC[KNumSpecies], const float nsTOF[KNumSpecies], const float pri[KNumSpecies], float out[KNumSpecies]) { + void computeBayesianPID(const float nsTPC[KNumSpecies], const float nsTOF[KNumSpecies], const float pri[KNumSpecies], float out[KNumSpecies]) + { float sum = 0; for (int i = 0; i < KNumSpecies; i++) { - float l = std::exp(-0.5f * (nsTPC[i]*nsTPC[i] + - (std::isfinite(nsTOF[i]) ? nsTOF[i]*nsTOF[i] : 0.0f))); + float l = std::exp(-0.5f * (nsTPC[i] * nsTPC[i] + + (std::isfinite(nsTOF[i]) ? nsTOF[i] * nsTOF[i] : 0.0f))); out[i] = l * pri[i]; sum += out[i]; @@ -285,8 +290,7 @@ struct PIDFeatureExtractor { aod::pidTOFPi, aod::pidTOFKa, aod::pidTOFPr, aod::pidTOFEl, aod::pidTOFmass, aod::pidTOFbeta, - aod::McTrackLabels - > const& tracks, + aod::McTrackLabels> const& tracks, aod::McParticles const& mcParticles) { static int eventCounter = 0; @@ -294,8 +298,10 @@ struct PIDFeatureExtractor { int idx = 0; for (const auto& t : tracks) { - if (t.pt() < ptMin || t.pt() > ptMax) continue; - if (t.eta() < etaMin || t.eta() > etaMax) continue; + if (t.pt() < ptMin || t.pt() > ptMax) + continue; + if (t.eta() < etaMin || t.eta() > etaMax) + continue; trackId = idx++; @@ -370,7 +376,8 @@ struct PIDFeatureExtractor { } // Write outputs - if (exportROOT) featureTree->Fill(); + if (exportROOT) + featureTree->Fill(); if (exportCSV) { csvFile << eventId << "," << trackId << "," << px << "," << py << "," << pz << "," @@ -390,7 +397,8 @@ struct PIDFeatureExtractor { histos.fill(HIST("QC/nTracks"), 1); histos.fill(HIST("QC/pt"), pt); histos.fill(HIST("QC/eta"), eta); - if (hasTpc) histos.fill(HIST("QC/tpc_dEdx_vs_pt"), pt, tpcSignal); + if (hasTpc) + histos.fill(HIST("QC/tpc_dEdx_vs_pt"), pt, tpcSignal); if (hasTof) { histos.fill(HIST("QC/tof_beta_vs_p"), p, tofBeta); histos.fill(HIST("QC/mass_vs_p"), p, tofMass); @@ -402,7 +410,8 @@ struct PIDFeatureExtractor { // FINALIZATION FUNCTION // ============================================================================ /// Clean up and finalize output files - void finalize() { + void finalize() + { if (exportROOT) { outputFile->cd(); featureTree->Write(); @@ -418,6 +427,7 @@ struct PIDFeatureExtractor { // WORKFLOW DEFINITION // ============================================================================ /// Define the O2Physics workflow -WorkflowSpec defineDataProcessing(ConfigContext const& cfgc) { +WorkflowSpec defineDataProcessing(ConfigContext const& cfgc) +{ return WorkflowSpec{adaptAnalysisTask(cfgc)}; } \ No newline at end of file From 4c16860b9bf5eadf63589d2fbdfe6505680ee6fe Mon Sep 17 00:00:00 2001 From: Robert Forynski Date: Thu, 13 Nov 2025 21:01:59 +0000 Subject: [PATCH 11/15] fix: Use kebab-case workflow name pid-feature-extractor --- Tools/PIDFeatureExtractor/CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Tools/PIDFeatureExtractor/CMakeLists.txt b/Tools/PIDFeatureExtractor/CMakeLists.txt index 297cd8a5798..fc0dd9fa75d 100644 --- a/Tools/PIDFeatureExtractor/CMakeLists.txt +++ b/Tools/PIDFeatureExtractor/CMakeLists.txt @@ -9,7 +9,7 @@ # granted to it by virtue of its status as an Intergovernmental Organization # or submit itself to any jurisdiction. -o2physics_add_dpl_workflow(pidFeatureExtractor +o2physics_add_dpl_workflow(pid-feature-extractor SOURCES PIDFeatureExtractor.cxx PUBLIC_LINK_LIBRARIES O2Physics::AnalysisCore - COMPONENT_NAME Analysis) + COMPONENT_NAME Analysis) \ No newline at end of file From 1c9775cd892e2e4ab44d6445ca2358c94f24cc93 Mon Sep 17 00:00:00 2001 From: Robert Forynski Date: Thu, 13 Nov 2025 21:13:08 +0000 Subject: [PATCH 12/15] refactor: Rename PIDFeatureExtractor.cxx to pidFeatureExtractor.cxx and update CMakeLists.txt for ALICE O2 naming conventions --- Tools/PIDFeatureExtractor/CMakeLists.txt | 2 +- Tools/PIDFeatureExtractor/PIDFeatureExtractor.cxx | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Tools/PIDFeatureExtractor/CMakeLists.txt b/Tools/PIDFeatureExtractor/CMakeLists.txt index fc0dd9fa75d..8f44062589e 100644 --- a/Tools/PIDFeatureExtractor/CMakeLists.txt +++ b/Tools/PIDFeatureExtractor/CMakeLists.txt @@ -10,6 +10,6 @@ # or submit itself to any jurisdiction. o2physics_add_dpl_workflow(pid-feature-extractor - SOURCES PIDFeatureExtractor.cxx + SOURCES pidFeatureExtractor.cxx PUBLIC_LINK_LIBRARIES O2Physics::AnalysisCore COMPONENT_NAME Analysis) \ No newline at end of file diff --git a/Tools/PIDFeatureExtractor/PIDFeatureExtractor.cxx b/Tools/PIDFeatureExtractor/PIDFeatureExtractor.cxx index 1bf18dd2d17..a676d12275e 100644 --- a/Tools/PIDFeatureExtractor/PIDFeatureExtractor.cxx +++ b/Tools/PIDFeatureExtractor/PIDFeatureExtractor.cxx @@ -9,7 +9,7 @@ // granted to it by virtue of its status as an Intergovernmental Organization // or submit itself to any jurisdiction. -/// \file PIDFeatureExtractor.cxx +/// \file pidFeatureExtractor.cxx /// \brief Task to extract particle identification features from ALICE AO2D data for machine learning workflows /// \author Robert Forynski From 6d90508ce64d7602c54f83b5f725aa4782ef363e Mon Sep 17 00:00:00 2001 From: Robert Forynski Date: Thu, 13 Nov 2025 21:20:05 +0000 Subject: [PATCH 13/15] fix: Add missing newline at end of pidFeatureExtractor.cxx --- Tools/PIDFeatureExtractor/PIDFeatureExtractor.cxx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Tools/PIDFeatureExtractor/PIDFeatureExtractor.cxx b/Tools/PIDFeatureExtractor/PIDFeatureExtractor.cxx index a676d12275e..51620b4b89f 100644 --- a/Tools/PIDFeatureExtractor/PIDFeatureExtractor.cxx +++ b/Tools/PIDFeatureExtractor/PIDFeatureExtractor.cxx @@ -430,4 +430,4 @@ struct PIDFeatureExtractor { WorkflowSpec defineDataProcessing(ConfigContext const& cfgc) { return WorkflowSpec{adaptAnalysisTask(cfgc)}; -} \ No newline at end of file +} From 275199972e9a9c07d9cdd830c8c7eb898c93391d Mon Sep 17 00:00:00 2001 From: ALICE Action Bot Date: Thu, 13 Nov 2025 21:20:35 +0000 Subject: [PATCH 14/15] Please consider the following formatting changes --- Tools/PIDFeatureExtractor/PIDFeatureExtractor.cxx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Tools/PIDFeatureExtractor/PIDFeatureExtractor.cxx b/Tools/PIDFeatureExtractor/PIDFeatureExtractor.cxx index 51620b4b89f..4737680c727 100644 --- a/Tools/PIDFeatureExtractor/PIDFeatureExtractor.cxx +++ b/Tools/PIDFeatureExtractor/PIDFeatureExtractor.cxx @@ -50,8 +50,8 @@ struct PIDFeatureExtractor { // ============================================================================ // KINEMATIC VARIABLES - Track momentum and position information // ============================================================================ - int eventId; /// Unique identifier for each collision event - int trackId; /// Track index within the event + int eventId; /// Unique identifier for each collision event + int trackId; /// Track index within the event // Momentum components (in GeV/c) float px, py, pz; /// Cartesian momentum components From 465f3d0afe3dc1c2a311ed407c3314769810c21e Mon Sep 17 00:00:00 2001 From: ALICE Action Bot Date: Thu, 13 Nov 2025 21:24:52 +0000 Subject: [PATCH 15/15] MegaLinter fixes --- Tools/PIDFeatureExtractor/README.md | 140 ++++++++++++++-------------- 1 file changed, 70 insertions(+), 70 deletions(-) diff --git a/Tools/PIDFeatureExtractor/README.md b/Tools/PIDFeatureExtractor/README.md index b52986dbdee..11bf8f6fa81 100644 --- a/Tools/PIDFeatureExtractor/README.md +++ b/Tools/PIDFeatureExtractor/README.md @@ -32,17 +32,17 @@ The **PIDFeatureExtractor** combines information from multiple ALICE detectors ( The task expects the following input tables from AO2D files. Some tables may be fetched from CCDB if not present in the file: -| Table | Source | Purpose | Fallback | -|-------|--------|---------|----------| -| `aod::Tracks` | AO2D | Base track properties (momentum, angles) | Required | -| `aod::TracksExtra` | AO2D | Extended track information | Required | -| `aod::TracksDCA` | AO2D | Impact parameters (DCA) | Required | -| `aod::pidTPCPi/Ka/Pr/El` | AO2D/CCDB | TPC n-sigma values for each particle species | CCDB | -| `aod::pidTOFPi/Ka/Pr/El` | AO2D/CCDB | TOF n-sigma values for each particle species | CCDB | -| `aod::pidTOFmass` | AO2D/CCDB | TOF reconstructed mass | CCDB | -| `aod::pidTOFbeta` | AO2D/CCDB | TOF beta (v/c) measurement | CCDB | -| `aod::McTrackLabels` | AO2D | MC truth matching (optional, for simulated data) | Optional | -| `aod::McParticles` | AO2D | MC particle information | Optional | +| Table | Source | Purpose | Fallback | +|--------------------------|-----------|--------------------------------------------------|----------| +| `aod::Tracks` | AO2D | Base track properties (momentum, angles) | Required | +| `aod::TracksExtra` | AO2D | Extended track information | Required | +| `aod::TracksDCA` | AO2D | Impact parameters (DCA) | Required | +| `aod::pidTPCPi/Ka/Pr/El` | AO2D/CCDB | TPC n-sigma values for each particle species | CCDB | +| `aod::pidTOFPi/Ka/Pr/El` | AO2D/CCDB | TOF n-sigma values for each particle species | CCDB | +| `aod::pidTOFmass` | AO2D/CCDB | TOF reconstructed mass | CCDB | +| `aod::pidTOFbeta` | AO2D/CCDB | TOF beta (v/c) measurement | CCDB | +| `aod::McTrackLabels` | AO2D | MC truth matching (optional, for simulated data) | Optional | +| `aod::McParticles` | AO2D | MC particle information | Optional | **Note:** If PID tables are not available in the AO2D file, the framework automatically retrieves PID calibrations from CCDB using the collision timestamp to access the correct calibration period. @@ -50,77 +50,77 @@ The task expects the following input tables from AO2D files. Some tables may be ### Kinematic Variables (11 features) -| Variable | Type | Range | Unit | Description | -|----------|------|-------|------|-------------| -| `event_id` | int | - | - | Unique collision event identifier | -| `track_id` | int | - | - | Track index within event | -| `px`, `py`, `pz` | float | - | GeV/c | Cartesian momentum components | -| `pt` | float | 0.1-20 | GeV/c | Transverse momentum | -| `p` | float | - | GeV/c | Total momentum | -| `eta` | float | -1.5 to 1.5 | - | Pseudorapidity | -| `phi` | float | -π to π | rad | Azimuthal angle | -| `theta` | float | 0 to π | rad | Polar angle | -| `charge` | int | ±1 | - | Track charge | -| `track_type` | int | 0-2 | - | Track classification | +| Variable | Type | Range | Unit | Description | +|------------------|-------|-------------|-------|-----------------------------------| +| `event_id` | int | - | - | Unique collision event identifier | +| `track_id` | int | - | - | Track index within event | +| `px`, `py`, `pz` | float | - | GeV/c | Cartesian momentum components | +| `pt` | float | 0.1-20 | GeV/c | Transverse momentum | +| `p` | float | - | GeV/c | Total momentum | +| `eta` | float | -1.5 to 1.5 | - | Pseudorapidity | +| `phi` | float | -π to π | rad | Azimuthal angle | +| `theta` | float | 0 to π | rad | Polar angle | +| `charge` | int | ±1 | - | Track charge | +| `track_type` | int | 0-2 | - | Track classification | ### TPC Detector Features (7 features) -| Variable | Type | Range | Unit | Description | Source | -|----------|------|-------|------|-------------|--------| -| `tpc_signal` | float | 0-300 | - | Specific ionization (dE/dx) | AO2D | -| `tpc_nsigma_pi` | float | - | σ | n-sigma deviation from pion | AO2D/CCDB | -| `tpc_nsigma_ka` | float | - | σ | n-sigma deviation from kaon | AO2D/CCDB | -| `tpc_nsigma_pr` | float | - | σ | n-sigma deviation from proton | AO2D/CCDB | -| `tpc_nsigma_el` | float | - | σ | n-sigma deviation from electron | AO2D/CCDB | -| `tpc_nclusters` | int | 0-160 | - | Number of TPC clusters | AO2D | -| `tpc_chi2` | float | - | - | TPC track fit chi-square/ndf | AO2D | +| Variable | Type | Range | Unit | Description | Source | +|-----------------|-------|-------|------|---------------------------------|-----------| +| `tpc_signal` | float | 0-300 | - | Specific ionization (dE/dx) | AO2D | +| `tpc_nsigma_pi` | float | - | σ | n-sigma deviation from pion | AO2D/CCDB | +| `tpc_nsigma_ka` | float | - | σ | n-sigma deviation from kaon | AO2D/CCDB | +| `tpc_nsigma_pr` | float | - | σ | n-sigma deviation from proton | AO2D/CCDB | +| `tpc_nsigma_el` | float | - | σ | n-sigma deviation from electron | AO2D/CCDB | +| `tpc_nclusters` | int | 0-160 | - | Number of TPC clusters | AO2D | +| `tpc_chi2` | float | - | - | TPC track fit chi-square/ndf | AO2D | **TPC Features Source:** n-sigma values are computed from `tpc_signal` and PID calibrations (from AO2D or CCDB). If not in AO2D, calibration data is fetched from CCDB using the collision timestamp. ### TOF Detector Features (6 features) -| Variable | Type | Range | Unit | Description | Source | -|----------|------|-------|------|-------------|--------| -| `tof_beta` | float | 0-1.2 | - | Velocity over speed of light | AO2D/CCDB | -| `tof_mass` | float | -0.2-2.0 | GeV/c² | Reconstructed mass | AO2D/CCDB | -| `tof_nsigma_pi` | float | - | σ | n-sigma deviation from pion | AO2D/CCDB | -| `tof_nsigma_ka` | float | - | σ | n-sigma deviation from kaon | AO2D/CCDB | -| `tof_nsigma_pr` | float | - | σ | n-sigma deviation from proton | AO2D/CCDB | -| `tof_nsigma_el` | float | - | σ | n-sigma deviation from electron | AO2D/CCDB | +| Variable | Type | Range | Unit | Description | Source | +|-----------------|-------|----------|--------|---------------------------------|-----------| +| `tof_beta` | float | 0-1.2 | - | Velocity over speed of light | AO2D/CCDB | +| `tof_mass` | float | -0.2-2.0 | GeV/c² | Reconstructed mass | AO2D/CCDB | +| `tof_nsigma_pi` | float | - | σ | n-sigma deviation from pion | AO2D/CCDB | +| `tof_nsigma_ka` | float | - | σ | n-sigma deviation from kaon | AO2D/CCDB | +| `tof_nsigma_pr` | float | - | σ | n-sigma deviation from proton | AO2D/CCDB | +| `tof_nsigma_el` | float | - | σ | n-sigma deviation from electron | AO2D/CCDB | **TOF Features Source:** If not available in AO2D file, the framework fetches calibration and response parameters from CCDB. Beta and mass can be recomputed from raw TOF information and length measurement using CCDB calibrations. ### Bayesian PID Features (4 features) -| Variable | Type | Range | Unit | Description | -|----------|------|-------|------|-------------| -| `bayes_prob_pi` | float | 0-1 | - | Probability of being pion | -| `bayes_prob_ka` | float | 0-1 | - | Probability of being kaon | -| `bayes_prob_pr` | float | 0-1 | - | Probability of being proton | -| `bayes_prob_el` | float | 0-1 | - | Probability of being electron | +| Variable | Type | Range | Unit | Description | +|-----------------|-------|-------|------|-------------------------------| +| `bayes_prob_pi` | float | 0-1 | - | Probability of being pion | +| `bayes_prob_ka` | float | 0-1 | - | Probability of being kaon | +| `bayes_prob_pr` | float | 0-1 | - | Probability of being proton | +| `bayes_prob_el` | float | 0-1 | - | Probability of being electron | **Note**: Bayesian probabilities sum to 1.0 and are computed using Gaussian likelihoods in n-sigma space (from either AO2D or CCDB-derived values) with configurable priors. ### Track Quality Features (2 features) -| Variable | Type | Unit | Description | -|----------|------|------|-------------| -| `dca_xy` | float | cm | Distance of closest approach in xy-plane | -| `dca_z` | float | cm | Distance of closest approach along beam | +| Variable | Type | Unit | Description | +|----------|-------|------|------------------------------------------| +| `dca_xy` | float | cm | Distance of closest approach in xy-plane | +| `dca_z` | float | cm | Distance of closest approach along beam | ### Detector Availability Flags (2 features) -| Variable | Type | Description | -|----------|------|-------------| +| Variable | Type | Description | +|-----------|------|---------------------------------| | `has_tpc` | bool | Track has valid TPC information | | `has_tof` | bool | Track has valid TOF information | ### Monte Carlo Truth (4 features, simulated data only) -| Variable | Type | Description | -|----------|------|-------------| -| `mc_pdg` | int | PDG code of true particle | -| `mc_px`, `mc_py`, `mc_pz` | float | True momentum components | +| Variable | Type | Description | +|---------------------------|-------|---------------------------| +| `mc_pdg` | int | PDG code of true particle | +| `mc_px`, `mc_py`, `mc_pz` | float | True momentum components | **Total: 39 features per track** @@ -178,16 +178,16 @@ All task parameters are configured through the **`myConfigExtractor.json`** file #### Configuration Parameters -| Parameter | Type | Default | Description | -|-----------|------|---------|-------------| -| `output_path` | string | `pid_features` | Base path for output files (without extension) | -| `export_csv` | boolean | `true` | Enable CSV export of features | -| `export_root` | boolean | `true` | Enable ROOT file export of features | -| `eta_min` | float | `-1.5` | Minimum pseudorapidity cut for track selection | -| `eta_max` | float | `1.5` | Maximum pseudorapidity cut for track selection | -| `pt_min` | float | `0.1` | Minimum transverse momentum cut (GeV/c) | -| `pt_max` | float | `20.0` | Maximum transverse momentum cut (GeV/c) | -| `ccdb_url` | string | `http://alice-ccdb.cern.ch` | CCDB server URL for fetching PID calibrations | +| Parameter | Type | Default | Description | +|---------------|---------|-----------------------------|------------------------------------------------| +| `output_path` | string | `pid_features` | Base path for output files (without extension) | +| `export_csv` | boolean | `true` | Enable CSV export of features | +| `export_root` | boolean | `true` | Enable ROOT file export of features | +| `eta_min` | float | `-1.5` | Minimum pseudorapidity cut for track selection | +| `eta_max` | float | `1.5` | Maximum pseudorapidity cut for track selection | +| `pt_min` | float | `0.1` | Minimum transverse momentum cut (GeV/c) | +| `pt_max` | float | `20.0` | Maximum transverse momentum cut (GeV/c) | +| `ccdb_url` | string | `http://alice-ccdb.cern.ch` | CCDB server URL for fetching PID calibrations | #### Example Configurations @@ -825,21 +825,21 @@ For issues, questions, or suggestions: ## Quick Reference -### To run the task: +### To run the task ```bash ./run.sh ``` -### To modify parameters: +### To modify parameters 1. Edit `myConfigExtractor.json` 2. Run `./run.sh` -### To see what configuration is active: +### To see what configuration is active ```bash cat myConfigExtractor.json ``` -### To verify output: +### To verify output ```bash ls -lh pid_features.root pid_features.csv ```