diff --git a/UTILS/Parsers/READMEtreeFastCache.md b/UTILS/Parsers/READMEtreeFastCache.md new file mode 100644 index 000000000..19296ef11 --- /dev/null +++ b/UTILS/Parsers/READMEtreeFastCache.md @@ -0,0 +1,126 @@ +# 📦 ROOT Tree Fast Cache System + +This repository contains ROOT macros for fast lookup and interpolation of values from TTrees, using: + +- `treeFastCache1D.C`: 1D cache with nearest-neighbor lookup +- `treeFastCacheND.C`: ND cache with exact match in N–1 dimensions and nearest-neighbor in 1 dimension + +They are designed for interactive analysis with `TTree::Draw`, e.g., QA plots, calibration overlays, or smoothed time series. + +--- + +## 🔹 `treeFastCache1D.C` + +### ✅ Features + +- Register 1D lookup maps from TTrees +- Nearest-neighbor lookup from `std::map` +- Can register by ID or string name +- Fast evaluation inside `TTree::Draw` +- Alias integration for interactive sessions + +### 🧪 Example + +```cpp +TTree* tree = ...; +int mapID = registerMap1DByName("dcar_vs_time", "time", "dcar_value", tree, "subentry==127"); + +tree->SetAlias("dcar_smooth", ("getNearest1D(time," + std::to_string(mapID) + ")").c_str()); +tree->Draw("dcar_value:dcar_smooth", "indexType==1", "colz", 10000); +``` + +--- + +## 🔸 `treeFastCacheND.C` + +### ✅ Features + +- ND caching with: + - **Exact match** in N–1 dimensions + - **Nearest-neighbor** lookup in 1 dimension (e.g. `time`) +- Uses full `double` precision for all keys +- Alias support for `TTree::Draw` +- Registration by name with hashed map ID +- Variadic interface for direct use + +### 🧪 Example: Time Series + +```cpp +TTree* tree = ...; +int mapID = registerMapND("dcar_vs_time", tree, {"subentry"}, "time", "mTSITSTPC.mDCAr_A_NTracks_median", "1"); +setNearestNDAlias(tree, "dcar_smooth", "dcar_vs_time", "time", {"subentry"}); + +tree->Draw("mTSITSTPC.mDCAr_A_NTracks_median:dcar_smooth", "indexType==1", "colz", 10000); +``` + +### 🖊️ Parameters for `registerMapND` +```cpp +int registerMapND( + const std::string& name, // Unique name of the map + TTree* tree, // Source TTree + const std::vector& exactDims, // Exact-match coordinate names + const std::string& nearestDim, // Nearest-match dimension (e.g. time) + const std::string& valueVar, // Variable to interpolate + const std::string& selection // TTree selection +); +``` + +### 🖊️ Parameters for `setNearestNDAlias` +```cpp +void setNearestNDAlias( + TTree* tree, // Target tree + const std::string& aliasName, // Alias to create + const std::string& mapName, // Name used in registration + const std::string& nearestCoordExpr, // Nearest-match expression + const std::vector& exactCoordExprs // Exact match expressions +); +``` + +### ⚡️ Alternative: Direct expression +```cpp +tree->Draw("val:getNearestND(time,mapID,subentry)", ...); +``` + +--- + +## 📊 Internal Storage + +### 1D: +```cpp +std::map> registeredMaps; +std::map nameToMapID; +``` + +### ND: +```cpp +std::map, std::map>> ndCaches; +std::map ndNameToID; +``` + +--- + +## 📌 Best Practices + +- Use aliases to simplify `TTree::Draw` expressions +- Use double precision for stability in nearest search +- Store maps by string name to simplify re-registration +- Prefer `setNearestNDAlias()` over manual `getNearestND(...)` for readability + +--- + +## 📤 Future Ideas + +- Optional interpolation (linear, spline) +- Graceful handling of unmatched keys +- Caching diagnostics and summary statistics +- C++ class wrapper for lifecycle + reusability + +--- + +## 📜 License + +Intended for use in internal physics analyses. No warranty implied. + +--- + +For more details, see comments and examples inside `treeFastCache1D.C` and `treeFastCacheND.C`. diff --git a/UTILS/Parsers/treeFastCache.C b/UTILS/Parsers/treeFastCache.C new file mode 100644 index 000000000..624a9f02f --- /dev/null +++ b/UTILS/Parsers/treeFastCache.C @@ -0,0 +1,258 @@ +/* +.L $O2DPG/UTILS/Parsers/treeFastCache.C +*/ + +/* + treeFastCache.C + Simple caching system for fast lookup of 1D values from a TTree, using nearest-neighbor interpolation. + This utility allows registration of (X, Y) pairs from a TTree into a std::map, + indexed by a user-defined mapID or map name. The lookup function `getNearest1D(x, mapID)` + retrieves the Y value for the X closest to the query. + Features: + - Register maps via string name or numeric ID + - Query nearest-neighbor value for any X + - Graceful error handling and range checking + - Base for future ND extension +*/ + +#include +#include +#include +#include +#include +#include +#include + +using namespace std; + +// Map: mapID -> map +std::map> registeredMaps; +std::map nameToMapID; + +/// Hash a string to create a deterministic mapID +int hashMapName(const std::string& name) { + std::hash hasher; + return static_cast(hasher(name)); +} + +/// Register a 1D lookup map from TTree (X -> Y) +/// @param valX Name of the X-axis variable (lookup key) +/// @param valY Name of the Y-axis variable (value to retrieve) +/// @param tree Pointer to TTree to extract data from +/// @param selection Selection string (TTree::Draw-compatible) +/// @param mapID Integer ID to associate with this map +void registerMap1D(const std::string& valX, const std::string& valY, TTree* tree, const std::string& selection, int mapID) { + if (!tree) { + std::cerr << "[registerMap1D] Null TTree pointer." << std::endl; + return; + } + + int entries = tree->Draw((valY + ":" + valX).c_str(), selection.c_str(), "goff"); + if (entries <= 0) { + std::cerr << "[registerMap1D] No entries matched for mapID=" << mapID << std::endl; + return; + } + + if (!tree->GetV1() || !tree->GetV2()) { + std::cerr << "[registerMap1D] Internal Draw buffer pointers are null." << std::endl; + return; + } + + std::map newMap; + for (int i = 0; i < entries; ++i) { + if (i >= tree->GetSelectedRows()) { + std::cerr << "[registerMap1D] Index out of range at i=" << i << std::endl; + break; + } + double x = tree->GetV2()[i]; // valX + float y = tree->GetV1()[i]; // valY + newMap[x] = y; + } + + registeredMaps[mapID] = std::move(newMap); + std::cout << "[registerMap1D] Registered map " << mapID << " with " << entries << " entries." << std::endl; +} + +/// Register by name; returns mapID computed from name +int registerMap1DByName(const std::string& mapName, const std::string& valX, const std::string& valY, TTree* tree, const std::string& selection) { + int mapID = hashMapName(mapName); + nameToMapID[mapName] = mapID; + registerMap1D(valX, valY, tree, selection, mapID); + return mapID; +} + +/// Get the nearest Y for a given X from the map registered with mapID +/// @param x Query value along X axis +/// @param mapID Map identifier used in registration +/// @return Y value corresponding to nearest X in the map +float getNearest1D(float x, int mapID) { + const auto itMap = registeredMaps.find(mapID); + if (itMap == registeredMaps.end()) { + std::cerr << "[getNearest1D] Map ID " << mapID << " not found." << std::endl; + return NAN; + } + + const auto& map = itMap->second; + if (map.empty()) { + std::cerr << "[getNearest1D] Map ID " << mapID << " is empty." << std::endl; + return NAN; + } + + auto it = map.lower_bound(x); + if (it == map.begin()) return it->second; + if (it == map.end()) return std::prev(it)->second; + + auto prev = std::prev(it); + return (std::abs(prev->first - x) < std::abs(it->first - x)) ? prev->second : it->second; +} + +/// Convenience version: lookup by name +float getNearest1DByName(float x, const std::string& mapName) { + auto it = nameToMapID.find(mapName); + if (it == nameToMapID.end()) { + std::cerr << "[getNearest1DByName] Map name \"" << mapName << "\" not found." << std::endl; + return NAN; + } + return getNearest1D(x, it->second); +} + +/// Example usage +void example1D() { + TFile *f = TFile::Open("timeSeries10000_apass5.root"); + TTree * tree0=(TTree*)f->Get("timeSeries"); + // Fill tree here or load from file + int mapID = registerMap1DByName("dcar_vs_time", "time", "mTSITSTPC.mDCAr_A_NTracks_median", tree0, "subentry==127"); + tree0->SetAlias("mDCAr_A_NTracks_median_All" ,("getNearest1D(time, " + std::to_string(mapID) + ")").data()); + tree0->Draw("mTSITSTPC.mDCAr_A_NTracks_median:mDCAr_A_NTracks_median_All","indexType==1","",10000); +} +/* ------------------------------------------------------------------ + Statistics extension (non‑breaking) ------------------------------- + Added without changing previous API. + + New options: + • Enum‑based interface for better ROOT compatibility + enum StatKind { kMean=0, kMedian=1, kStd=2 }; + float getStat(double x,int mapID,StatKind kind,double dx); + + • Convenience thin wrappers for ROOT aliases + getMean1D , getMedian1D , getStd1D + + • cacheStat unchanged (uses strings internally) + + ------------------------------------------------------------------*/ + +#include +#include +#include + +// --- enum for faster numeric calls -------------------------------- +enum StatKind { kMean=0, kMedian=1, kStd=2 }; + +// Cache: stat → mapID → dx → (x → value) +static std::map>> cacheMean; +static std::map>> cacheMedian; +static std::map>> cacheStd; + +static float _mean(const std::vector& v){ return v.empty()?NAN:std::accumulate(v.begin(),v.end(),0.0f)/v.size(); } +static float _median(std::vector v){ if(v.empty()) return NAN; size_t n=v.size()/2; std::nth_element(v.begin(),v.begin()+n,v.end()); return v[n]; } +static float _std(const std::vector& v){ if(v.size()<2) return NAN; float m=_mean(v); double s2=0; for(float e:v){ double d=e-m; s2+=d*d;} return std::sqrt(s2/(v.size()-1)); } + +//-------------------------------------------------------------------- +static float _computeStat(double x,int mapID,double dx,StatKind k){ + const auto itM=registeredMaps.find(mapID); + if(itM==registeredMaps.end()||itM->second.empty()) return NAN; + const auto &mp=itM->second; + std::vector buf; + for(auto it=mp.lower_bound(x-dx); it!=mp.end()&&it->first<=x+dx; ++it) buf.push_back(it->second); + if(buf.empty()) return NAN; + switch(k){ + case kMean: return _mean(buf); + case kMedian: return _median(buf); + case kStd: return _std(buf); + } + return NAN; +} + +//-------------------------------------------------------------------- +/** + * @brief Return a local statistic (mean / median / std) around a query point. + * + * This version is preferred inside **TTree::Draw** because it uses an enum + * instead of a string literal. + * + * @param x Center of the window (same coordinate used in the cache) + * @param mapID ID returned by registerMap1D / registerMap1DByName + * @param kind kMean (0), kMedian (1) or kStd (2) + * @param dx Half‑window size: the statistic is computed from all points + * with X in [x − dx, x + dx] + * + * Internally the first request builds (and caches) a map x → stat(x) + * for the given (mapID, dx, kind). Subsequent calls are O(log N). + */ +// Fast numeric interface (enum) ------------------------------------ +float getStat(double x,int mapID,StatKind kind,double dx){ + auto *pcache = (kind==kMean? &cacheMean : (kind==kMedian? &cacheMedian : &cacheStd)); + auto &byMap = (*pcache)[mapID]; + auto &byDx = byMap[dx]; + if(byDx.empty()){ + // build lazily for this dx + const auto itM=registeredMaps.find(mapID); + if(itM==registeredMaps.end()) return NAN; + for(const auto &kv: itM->second){ double cx=kv.first; byDx[cx]=_computeStat(cx,mapID,dx,kind);} } + const auto &statMap = byDx; + auto it=statMap.lower_bound(x); + if(it==statMap.begin()) return it->second; + if(it==statMap.end()) return std::prev(it)->second; + auto prev=std::prev(it); + return (fabs(prev->first-x)first-x)?prev->second:it->second); +} + +// String interface kept for backward compat. +float getStat(double x,int mapID,const char* st,double dx){ + std::string s(st); + if(s=="mean") return getStat(x,mapID,kMean ,dx); + if(s=="median") return getStat(x,mapID,kMedian,dx); + if(s=="std"||s=="sigma") return getStat(x,mapID,kStd,dx); + std::cerr<<"[getStat] Unknown statType="<(kind),dx); +} + +//-------------------------------------------------------------------- +// Pre‑cache requested stats (by enum) ------------------------------- (by enum) ------------------------------- +bool cacheStat(int mapID,const std::vector& stats,double dx){ + for(const std::string &s:stats){ + if(s=="mean") getStat(0,mapID,kMean ,dx); // lazy build + else if(s=="median") getStat(0,mapID,kMedian,dx); + else if(s=="std"||s=="sigma") getStat(0,mapID,kStd,dx); + } + return true; +} + +//-------------------------------------------------------------------- +/// Example: statistics with enum wrappers +void exampleStat1D(){ + TFile *f=TFile::Open("timeSeries10000_apass5.root"); + TTree *t=(TTree*)f->Get("timeSeries"); + int id = registerMap1DByName("dcar_time_stat","time","mTSITSTPC.mDCAr_A_NTracks_median",t,"subentry==127"); + + // Pre‑cache mean & std for ±200 window + cacheStat(id,{"mean","std"},200); + + // Use integer selector (0 = mean, 2 = std). This avoids any ROOT + // overload ambiguity and works in TTree::Draw directly. + t->SetAlias("dcar_mean", Form("getStat(time,%d,0,200)", id)); // 0 → kMean + t->SetAlias("dcar_sigma", Form("getStat(time,%d,2,200)", id)); // 2 → kStd + + t->Draw("mTSITSTPC.mDCAr_A_NTracks_median:dcar_mean","indexType==1","colz",10000); + t->Draw("getStat(time,591487517, 0 ,10000+0):getStat(time,591487517, 1 ,10000+0)","indexType==1","colz",100000); +} \ No newline at end of file diff --git a/UTILS/Parsers/treeFastCacheND.C b/UTILS/Parsers/treeFastCacheND.C new file mode 100644 index 000000000..541022746 --- /dev/null +++ b/UTILS/Parsers/treeFastCacheND.C @@ -0,0 +1,221 @@ +/* +.L $O2DPG/UTILS/Parsers/treeFastCacheND.C +*/ +/* + treeFastCacheND.C + + Multi-dimensional cache system for ROOT TTree lookup with mixed matching modes: + - Exact match in N-1 dimensions + - Nearest-neighbor in one dimension (typically time) + + This system complements treeFastCache1D by enabling efficient access to structured ND data. + + Features: + - Caches values based on N-dimensional keys using a combination of exact and nearest lookups + - Provides ROOT-friendly interface usable within TTree::Draw + - Uses full double precision for both exact and nearest match coordinates + - Works interactively with aliases and supports variadic access + + Author: ChatGPT for Marian +*/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace std; + +/// Generic ND key support +typedef std::map NearestMap; ///< 1D interpolation map (e.g., time → value) +typedef std::vector ExactKey; ///< Exact match dimensions (e.g., subentry, CRU, etc.) + +/// Container of ND maps: outer map = mapID → {ExactKey → NearestMap} +std::map> ndCaches; +std::map ndNameToID; ///< Map from user-defined name to hash-based mapID + +/// Deterministically hash a name to a map ID +int hashMapNameND(const std::string& name) { + std::hash hasher; + return static_cast(hasher(name)); +} + +/// Register an ND lookup map from a TTree +/** + * @param name Unique name to identify the map + * @param tree Source TTree + * @param exactDims List of column names for exact-match dimensions + * @param nearestDim Name of nearest-search dimension (e.g., "time") + * @param valueVar Name of value column + * @param selection TTree::Draw-compatible selection expression + * @return mapID (generated from name) + */ +int registerMapND(const std::string& name, + TTree* tree, + const std::vector& exactDims, + const std::string& nearestDim, + const std::string& valueVar, + const std::string& selection) { + if (!tree) throw std::invalid_argument("[registerMapND] Null TTree."); + int mapID = hashMapNameND(name); + ndNameToID[name] = mapID; + + std::string expr = valueVar + ":" + nearestDim; + for (const auto& dim : exactDims) expr += ":" + dim; + int entries = tree->Draw(expr.c_str(), selection.c_str(), "goff"); + if (entries>=tree->GetEstimate()){ + tree->SetEstimate(entries*2); + entries = tree->Draw(expr.c_str(), selection.c_str(), "goff"); + } + if (entries <= 0) { + std::cerr << "[registerMapND] No entries selected." << std::endl; + return mapID; + } + + int dimCount = 2 + exactDims.size(); + std::vector buffers(dimCount); + for (int i = 0; i < dimCount; ++i) { + buffers[i] = tree->GetVal(i); + if (!buffers[i]) throw std::runtime_error("[registerMapND] Missing Draw buffer at " + std::to_string(i)); + } + + std::map newMap; + for (int i = 0; i < entries; ++i) { + double val = buffers[0][i]; + double near = buffers[1][i]; + ExactKey key; + for (size_t j = 0; j < exactDims.size(); ++j) key.push_back(buffers[2 + j][i]); + newMap[key][near] = val; + } + ndCaches[mapID] = std::move(newMap); + std::cout << "[registerMapND] Registered ND map '" << name << "' with ID=" << mapID << " and " << entries << " entries." << std::endl; + return mapID; +} + +/// Query map using exact + nearest key +/** + * @param query Value for nearest-match dimension + * @param mapID ID of registered map + * @param exactKey Vector of exact-match dimensions (must match registration) + * @return Interpolated value or NaN if no match + */ +double getNearestND(double query, int mapID, const ExactKey& exactKey) { + const auto& map = ndCaches[mapID]; + auto itOuter = map.find(exactKey); + if (itOuter == map.end()) return NAN; + + const auto& innerMap = itOuter->second; + if (innerMap.empty()) return NAN; + + auto it = innerMap.lower_bound(query); + if (it == innerMap.begin()) return it->second; + if (it == innerMap.end()) return std::prev(it)->second; + + auto prev = std::prev(it); + return (std::abs(prev->first - query) < std::abs(it->first - query)) ? prev->second : it->second; +} + +/// Variadic interface to getNearestND for use with TTree::Draw +/** + * @tparam Dims Arbitrary number of scalar dimensions (int or float/double) + * @param query Nearest dimension (e.g., time) + * @param mapID Map ID registered via registerMapND + * @param dims... Dimensions to match exactly + */ +template +double getNearestND(double query, int mapID, Dims... dims) { + ExactKey exactKey{static_cast(dims)...}; + return getNearestND(query, mapID, exactKey); +} + +/// Lookup using map name +/** + * @param query Nearest-dimension value (e.g., time) + * @param exactKey Exact-dimension vector + * @param mapName Map name from registration + */ +double getNearestNDByName(double query, const ExactKey& exactKey, const std::string& mapName) { + auto itID = ndNameToID.find(mapName); + if (itID == ndNameToID.end()) return NAN; + return getNearestND(query, itID->second, exactKey); +} + +/// Register alias in tree for use in interactive Draw +/** + * @param tree TTree pointer + * @param aliasName Alias to create + * @param mapName Name of registered ND map + * @param nearestCoordExpr Expression for nearest dimension (e.g., "time") + * @param exactCoordExprs Expressions for exact dimensions (e.g., {"subentry"}) + */ +void setNearestNDAlias(TTree* tree, const std::string& aliasName, const std::string& mapName, const std::string& nearestCoordExpr, const std::vector& exactCoordExprs) { + auto it = ndNameToID.find(mapName); + if (it == ndNameToID.end()) { + std::cerr << "[setNearestNDAlias] Map not found: " << mapName << std::endl; + return; + } + int mapID = it->second; + + std::string expr = "getNearestND(" + nearestCoordExpr + "," + std::to_string(mapID); + for (const auto& ex : exactCoordExprs) expr += "," + ex; + expr += ")"; + tree->SetAlias(aliasName.c_str(), expr.c_str()); +} + +/// Example usage for ND map creation - for very High voltage queries for distertion calibration +void exampleND() { + TTree* tree = new TTree("tree", "demo"); + int mapID = registerMapND("test_map", tree, {"CRU", "iTF"}, "time", "val", "subentry==127"); + setNearestNDAlias(tree, "val_interp", "test_map", "time", {"CRU", "iTF"}); + tree->Draw("val:val_interp", "val!=0", "colz"); +} + +/// Example usage for time series ND lookup +void exampleTimeSeries() { + TFile *f = TFile::Open("timeSeries10000_apass5.root"); + TTree *tree = (TTree*)f->Get("timeSeries"); + int mapID = registerMapND("dcar_vs_time", tree, {"subentry"}, "time", "mTSITSTPC.mDCAr_A_NTracks_median", "1"); + setNearestNDAlias(tree, "mDCAr_A_NTracks_median_interp", "dcar_vs_time", "time", {"subentry"}); + tree->Draw("mTSITSTPC.mDCAr_A_NTracks_median:mDCAr_A_NTracks_median_interp", "indexType==1", "", 10000); +} + +/// Example usage for time series ND lookup +void test_exampleTimeSeries() { + TFile *f5 = TFile::Open("timeSeries10000_LHC23zzx_apass5.root"); + TTree *tree5 = (TTree*)f5->Get("timeSeries"); + TFile *f4 = TFile::Open("timeSeries10000_LHC23zz_combo_apass4.root"); + TTree *tree4 = (TTree*)f4->Get("timeSeries"); + int mapID5A = registerMapND("mDCAr_A_Median_median5", tree5, {"subentry"}, "time", "mTSITSTPC.mDCAr_A_Median_median", "1"); + int mapID5C = registerMapND("mDCAr_C_Median_median5", tree5, {"subentry"}, "time", "mTSITSTPC.mDCAr_C_Median_median", "1"); + int mapID4A = registerMapND("mDCAr_A_Median_median4", tree4, {"subentry"}, "time", "mTSITSTPC.mDCAr_A_Median_median", "1"); + int mapID4C = registerMapND("mDCAr_C_Median_median4", tree4, {"subentry"}, "time", "mTSITSTPC.mDCAr_C_Median_median", "1"); + // + setNearestNDAlias(tree5, "mDCAr_A_Median_median_interp5", "mDCAr_A_Median_median5", "time", {"subentry"}); + setNearestNDAlias(tree5, "mDCAr_C_Median_median_interp5", "mDCAr_C_Median_median5", "time", {"subentry"}); + setNearestNDAlias(tree4, "mDCAr_C_Median_median_interp5", "mDCAr_A_Median_median5", "time", {"subentry"}); + // + setNearestNDAlias(tree5, "mDCAr_A_Median_median_interp4", "mDCAr_A_Median_median4", "time", {"subentry"}); + setNearestNDAlias(tree4, "mDCAr_A_Median_median_interp4", "mDCAr_A_Median_median4", "time", {"subentry"}); + + tree5->Draw("mTSITSTPC.mDCAr_A_Median_median:mDCAr_A_Median_median_interp4", "indexType==1", "", 10000); + // make unit test -RMS should be 0 + int val5=tree5->Draw("mTSITSTPC.mDCAr_A_Median_median==mDCAr_A_Median_median_interp5", "indexType==1", ""); + float rms5=tree5->GetHistogram()->GetRMS(); + float mean5=tree5->GetHistogram()->GetMean(); + //make unit test like output rms5==0, mean5==1 + int va4l=tree4->Draw("mTSITSTPC.mDCAr_A_Median_median==mDCAr_A_Median_median_interp4", "indexType==1", ""); + float rms4=tree4->GetHistogram()->GetRMS(); + float mean4=tree4->GetHistogram()->GetMean(); + //make unit test like output rms5==0, mean5==1 + if ( std::abs(rms4) < 1e-5 && std::abs(mean4 - 1.0) < 1e-5) { + std::cout << "[UnitTest] OK - Interpolation match for apass4 is exact." << std::endl; + } else { + std::cerr << "[UnitTest] ERROR - Interpolation mismatch for apass4. RMS=" << rms4 << ", Mean=" << mean4 << std::endl; + } +} + diff --git a/UTILS/Parsers/workflowToJSON.sh b/UTILS/Parsers/workflowToJSON.sh index 1b0f530cb..1b6163a58 100644 --- a/UTILS/Parsers/workflowToJSON.sh +++ b/UTILS/Parsers/workflowToJSON.sh @@ -1,4 +1,5 @@ # Source path for the script +# source $O2DPG/UTILS/Parsers/workflowToJSON.sh # source $NOTES/JIRA/ATO-648/workflowToJSON.sh # Description: @@ -110,6 +111,73 @@ jq -Rn ' ]' "$log_file" } + +# makeDiffWorkflow alien:///alice/data/2023/LHC23zzk/544515/apass5/1140/o2_ctf_run00544515_orbit0221337280_tf0000047516_epn242/workflowconfig.log alien:///alice/data/2023/LHC23zzk/544515/apass4/1140/o2_ctf_run00544515_orbit0221337280_tf0000047516_epn242/workflowconfig.log 1 gpu +makeDiffWorkflow() { + # Make diff of workflowConfig.log JSONs. + # Usage: + # makeDiffWorkflow + # file0: path or alien:// to first workflowconfig.log + # file1: path or alien:// to second workflowconfig.log + # diffType: 0 = unified diff, 1 = side-by-side (default: 1) + # filter: string to match command, e.g. gpu (default: gpu) + # Notes: + # Creates workflow0.json and workflow1.json from parsed input. + # Uses makeParse and jq for filtering and diffing. + # Supports Alien paths via alien.py cat. + + if [[ -z "$1" || -z "$2" ]]; then + cat <<'HELP_USAGE' | helpCat0 bash +makeDiffWorkflow: Compare two O2 workflowconfig logs (local or Alien). +Usage: + makeDiffWorkflow +Parameters: + file0 – path to first workflowconfig.log or alien:// path + file1 – path to second workflowconfig.log or alien:// path + diffType – (optional) 0 = unified diff, 1 = side-by-side diff (default: 1) + filter – (optional) command string filter, e.g. "gpu", "hlt" (default: gpu) +Example: + makeDiffWorkflow alien:///path/to/file0.log ./file1.log 1 gpu + makeDiffWorkflow alien:///alice/data/2023/LHC23zzk/544515/apass5/1140/o2_ctf_run00544515_orbit0221337280_tf0000047516_epn242/workflowconfig.log alien:///alice/data/2023/LHC23zzk/544515/apass4/1140/o2_ctf_run00544515_orbit0221337280_tf0000047516_epn242/workflowconfig.log 1 gpu + +HELP_USAGE + return + fi + file0="$1" + file1="$2" + diffType="${3:-1}" + filter="${4:-o2-gpu}" + # Download from alien if needed + if [[ "$file0" == alien://* ]]; then + echo "Fetching $file0 from Alien..." + alien.py cat "$file0" > "${TMPDIR:-/tmp}/workflow0.log" + file0="${TMPDIR:-/tmp}/workflow0.log" + fi + if [[ "$file1" == alien://* ]]; then + echo "Fetching $file1 from Alien..." + alien.py cat "$file1" > "${TMPDIR:-/tmp}/workflow1.log" + file1="${TMPDIR:-/tmp}/workflow1.log" + fi + + makeParse "$file0" > workflow0.json + makeParse "$file1" > workflow1.json + + # Apply filter to both JSON files + jq ".[] | select(.command | test(\"^${filter}\"))" workflow0.json | jq --sort-keys . > workflow0.filtered.json + jq ".[] | select(.command | test(\"^${filter}\"))" workflow1.json | jq --sort-keys . > workflow1.filtered.json + + echo "Comparing workflow commands filtered by '^o2-${filter}'..." + + if [[ "$diffType" -eq 1 ]]; then + diff --side-by-side --left-column --color=always workflow0.filtered.json workflow1.filtered.json | less -R + else + diff --color=always workflow0.filtered.json workflow1.filtered.json | less -R + fi +} + + + + makeDiffExample(){ cat <