diff --git a/Framework/basic.json b/Framework/basic.json index d61ac6285d..1d8ff36da3 100644 --- a/Framework/basic.json +++ b/Framework/basic.json @@ -84,7 +84,7 @@ }], "extendedCheckParameters": { "physics": { - "PROTON-PROTON": { + "pp": { "myOwnKey1": "myOwnValue1c" } } diff --git a/Framework/include/QualityControl/runnerUtils.h b/Framework/include/QualityControl/runnerUtils.h index 2e864f4d1f..b8cbbe853c 100644 --- a/Framework/include/QualityControl/runnerUtils.h +++ b/Framework/include/QualityControl/runnerUtils.h @@ -42,8 +42,8 @@ std::string getFirstCheckName(const std::string& configurationSource); bool hasChecks(const std::string& configSource); template -requires std::is_arithmetic_v - T computeNumericalActivityField(framework::ServiceRegistryRef services, const std::string& name, T fallbackNumber = 0) + requires std::is_arithmetic_v +T computeNumericalActivityField(framework::ServiceRegistryRef services, const std::string& name, T fallbackNumber = 0) { T result = 0; @@ -86,8 +86,6 @@ uint64_t getCurrentTimestamp(); void initInfologger(framework::InitContext& iCtx, core::LogDiscardParameters infologgerDiscardParameters, std::string facility, std::string detectorName = ""); -std::string translateBeamType(const std::string& pdpBeamType); - } // namespace o2::quality_control::core #endif // QUALITYCONTROL_RUNNERUTILS_H diff --git a/Framework/script/updatePdpBeam.sh b/Framework/script/updatePdpBeam.sh new file mode 100755 index 0000000000..f4806e0ca7 --- /dev/null +++ b/Framework/script/updatePdpBeam.sh @@ -0,0 +1,82 @@ +#!/bin/bash + +# Default values +PREFIX="o2/" +TMP_DIR="./consul_kv_backups" +mkdir -p "$TMP_DIR" +UPDATE=false + +# --- Parse arguments --- +if [[ "$#" -eq 0 ]]; then + echo "Use -h or --help for usage" + exit 0 +fi + +while [[ "$#" -gt 0 ]]; do + case "$1" in + -u | --update) + UPDATE=true + ;; + -p | --prefix) + PREFIX="$2" + shift + ;; + -h | --help) + echo "Script to change any occurrence of \"PROTON-PROTON\" -> \"pp\", \"Pb-PROTON\" -> \"pPb\", \"Pb-Pb\" -> \"PbPb\"" + echo "Usage: $0 [-p|--prefix ] [-u|--update]" + echo + echo " -p, --prefix Prefix to search in Consul KV (default: o2/)" + echo " -u, --update Apply changes back to Consul" + exit 0 + ;; + *) + echo "❌ Unknown option: $1" + echo "Use -h or --help for usage" + exit 1 + ;; + esac + shift +done + +echo "🔍 Using Consul prefix: $PREFIX" +[[ "$UPDATE" == true ]] && echo "🚀 Update mode: ON (values will be written back)" || echo "🔒 Dry-run mode: changes only printed/saved" + +# --- Define replacement logic --- +replace() { + sed -e 's/\"PROTON-PROTON\"/"pp"/g' \ + -e 's/\"Pb-PROTON\"/"pPb"/g' \ + -e 's/\"Pb-Pb\"/"PbPb"/g' +} + +# --- Fetch all keys --- +KEYS=$(consul kv get -recurse -keys "$PREFIX") + +# --- Process each key --- +while IFS= read -r key; do + VALUE=$(consul kv get "$key") + MODIFIED=$(echo "$VALUE" | replace) + + if [[ "$VALUE" != "$MODIFIED" ]]; then + SAFE_NAME=$(echo "$key" | sed 's|/|__|g') + + ORIG_FILE="$TMP_DIR/$SAFE_NAME.orig" + NEW_FILE="$TMP_DIR/$SAFE_NAME.new" + DIFF_FILE="$TMP_DIR/$SAFE_NAME.diff" + + echo "$VALUE" >"$ORIG_FILE" + echo "$MODIFIED" >"$NEW_FILE" + diff -u "$ORIG_FILE" "$NEW_FILE" >"$DIFF_FILE" + + echo "✅ Changed key: $key" + echo " 📄 $ORIG_FILE" + echo " 🆕 $NEW_FILE" + echo " 📑 $DIFF_FILE" + + if [[ "$UPDATE" == true ]]; then + echo "$MODIFIED" | consul kv put "$key" - + echo " 🔁 Updated in Consul: $key" + fi + + echo "---------------------------------------" + fi +done <<<"$KEYS" diff --git a/Framework/src/runnerUtils.cxx b/Framework/src/runnerUtils.cxx index 7408ddb63b..cce2f36f6f 100644 --- a/Framework/src/runnerUtils.cxx +++ b/Framework/src/runnerUtils.cxx @@ -20,6 +20,7 @@ #include #include +#include #include #include #include @@ -28,6 +29,8 @@ #include #include +#include +#include #include namespace o2::quality_control::core @@ -91,25 +94,6 @@ std::string computeStringActivityField(framework::ServiceRegistryRef services, c return property; } -std::string translateBeamType(const std::string& pdpBeamType) -{ - // convert the beam type received from pdp into the format we use in flp/ecs - std::string result = ""; - if (pdpBeamType == "pp") { - result = "PROTON-PROTON"; - } else if (pdpBeamType == "PbPb") { - result = "Pb-Pb"; - } else if (pdpBeamType == "pPb") { - result = "Pb-PROTON"; - } else if (pdpBeamType == "cosmic") { - result = "cosmic"; - } else { - ILOG(Warning, Ops) << "Failed to convert the pdp beam type ('" << pdpBeamType << "'), returning an empty string" << ENDM; - } - ILOG(Debug, Devel) << "Translated pdp beam type '" << pdpBeamType << "' to '" << result << "'" << ENDM; - return result; -} - Activity computeActivity(framework::ServiceRegistryRef services, const Activity& fallbackActivity) { // for a complete list of the properties provided by ECS, see here: https://github.com/AliceO2Group/Control/blob/master/docs/handbook/configuration.md#variables-pushed-to-controlled-tasks @@ -122,7 +106,6 @@ Activity computeActivity(framework::ServiceRegistryRef services, const Activity& auto periodName = computeStringActivityField(services, "lhc_period", fallbackActivity.mPeriodName); auto fillNumber = computeNumericalActivityField(services, "fill_info_fill_number", fallbackActivity.mFillNumber); auto beam_type = computeStringActivityField(services, "pdp_beam_type", fallbackActivity.mBeamType); - beam_type = translateBeamType(beam_type); Activity activity( runNumber, diff --git a/Framework/test/testCustomParameters.cxx b/Framework/test/testCustomParameters.cxx index b9ea316b9e..70678488c2 100644 --- a/Framework/test/testCustomParameters.cxx +++ b/Framework/test/testCustomParameters.cxx @@ -138,23 +138,23 @@ TEST_CASE("test_at_optional") TEST_CASE("test_at_optional_activity") { Activity activity; - activity.mBeamType = "PROTON-PROTON"; + activity.mBeamType = "pp"; activity.mType = "PHYSICS"; CustomParameters cp; cp.set("aaa", "AAA"); cp.set("bbb", "BBB"); cp.set("aaa", "asdf", "PHYSICS"); - cp.set("aaa", "CCC", "PHYSICS", "PROTON-PROTON"); - cp.set("aaa", "DDD", "PHYSICS", "Pb-Pb"); - cp.set("aaa", "AAA", "TECHNICAL", "PROTON-PROTON"); + cp.set("aaa", "CCC", "PHYSICS", "pp"); + cp.set("aaa", "DDD", "PHYSICS", "PbPb"); + cp.set("aaa", "AAA", "TECHNICAL", "pp"); CHECK(cp.atOptional("aaa", activity).value() == "CCC"); CHECK(cp.atOptional("abc", activity).has_value() == false); CHECK(cp.atOptional("abc", activity).value_or("bla") == "bla"); Activity activity2; - activity.mBeamType = "Pb-Pb"; + activity.mBeamType = "PbPb"; activity.mType = "PHYSICS"; CHECK(cp.atOptional("aaa", activity).value() == "DDD"); } @@ -214,21 +214,21 @@ TEST_CASE("test_default_if_not_found_at_optional") // prepare the CP cp.set("key", "valueDefaultDefault", "default", "default"); cp.set("key", "valuePhysicsDefault", "PHYSICS", "default"); - cp.set("key", "valuePhysicsPbPb", "PHYSICS", "Pb-Pb"); + cp.set("key", "valuePhysicsPbPb", "PHYSICS", "PbPb"); cp.set("key", "valueCosmicsDefault", "COSMICS", "default"); - cp.set("key", "valueCosmicsDefault", "default", "PROTON-PROTON"); + cp.set("key", "valueCosmicsDefault", "default", "pp"); // check the data CHECK(cp.atOptional("key").value() == "valueDefaultDefault"); CHECK(cp.atOptional("key", "PHYSICS").value() == "valuePhysicsDefault"); - CHECK(cp.atOptional("key", "PHYSICS", "Pb-Pb").value() == "valuePhysicsPbPb"); + CHECK(cp.atOptional("key", "PHYSICS", "PbPb").value() == "valuePhysicsPbPb"); CHECK(cp.atOptional("key", "COSMICS", "default").value() == "valueCosmicsDefault"); - CHECK(cp.atOptional("key", "default", "PROTON-PROTON").value() == "valueCosmicsDefault"); + CHECK(cp.atOptional("key", "default", "pp").value() == "valueCosmicsDefault"); // check when something is missing - CHECK(cp.atOptional("key", "PHYSICS", "PROTON-PROTON").value() == "valuePhysicsDefault"); // key is not defined for pp - CHECK(cp.atOptional("key", "TECHNICAL", "STRANGE").value() == "valueDefaultDefault"); // key is not defined for run nor beam - CHECK(cp.atOptional("key", "TECHNICAL", "PROTON-PROTON").value() == "valueCosmicsDefault"); // key is not defined for technical + CHECK(cp.atOptional("key", "PHYSICS", "pp").value() == "valuePhysicsDefault"); // key is not defined for pp + CHECK(cp.atOptional("key", "TECHNICAL", "STRANGE").value() == "valueDefaultDefault"); // key is not defined for run nor beam + CHECK(cp.atOptional("key", "TECHNICAL", "pp").value() == "valueCosmicsDefault"); // key is not defined for technical } TEST_CASE("test_default_if_not_found_at") @@ -242,21 +242,21 @@ TEST_CASE("test_default_if_not_found_at") // prepare the CP cp.set("key", "valueDefaultDefault", "default", "default"); cp.set("key", "valuePhysicsDefault", "PHYSICS", "default"); - cp.set("key", "valuePhysicsPbPb", "PHYSICS", "Pb-Pb"); + cp.set("key", "valuePhysicsPbPb", "PHYSICS", "PbPb"); cp.set("key", "valueCosmicsDefault", "COSMICS", "default"); - cp.set("key", "valueCosmicsDefault", "default", "PROTON-PROTON"); + cp.set("key", "valueCosmicsDefault", "default", "pp"); // check the data CHECK(cp.at("key") == "valueDefaultDefault"); CHECK(cp.at("key", "PHYSICS") == "valuePhysicsDefault"); - CHECK(cp.at("key", "PHYSICS", "Pb-Pb") == "valuePhysicsPbPb"); + CHECK(cp.at("key", "PHYSICS", "PbPb") == "valuePhysicsPbPb"); CHECK(cp.at("key", "COSMICS", "default") == "valueCosmicsDefault"); - CHECK(cp.at("key", "default", "PROTON-PROTON") == "valueCosmicsDefault"); + CHECK(cp.at("key", "default", "pp") == "valueCosmicsDefault"); // check when something is missing - CHECK(cp.at("key", "PHYSICS", "PROTON-PROTON") == "valuePhysicsDefault"); // key is not defined for pp - CHECK(cp.at("key", "TECHNICAL", "STRANGE") == "valueDefaultDefault"); // key is not defined for run nor beam - CHECK(cp.at("key", "TECHNICAL", "PROTON-PROTON") == "valueCosmicsDefault"); // key is not defined for technical + CHECK(cp.at("key", "PHYSICS", "pp") == "valuePhysicsDefault"); // key is not defined for pp + CHECK(cp.at("key", "TECHNICAL", "STRANGE") == "valueDefaultDefault"); // key is not defined for run nor beam + CHECK(cp.at("key", "TECHNICAL", "pp") == "valueCosmicsDefault"); // key is not defined for technical } TEST_CASE("test_getAllDefaults") @@ -264,4 +264,4 @@ TEST_CASE("test_getAllDefaults") CustomParameters cp; auto result = cp.getAllDefaults(); CHECK(result.size() == 0); -} \ No newline at end of file +} diff --git a/Modules/CTP/src/qc-ctp.json b/Modules/CTP/src/qc-ctp.json index fea937226c..61a969abf4 100644 --- a/Modules/CTP/src/qc-ctp.json +++ b/Modules/CTP/src/qc-ctp.json @@ -58,12 +58,12 @@ "MB1inputName" : "MTVX", "MB2inputName" : "MVBA" }, - "PROTON-PROTON": { + "pp": { "MBclassName" : "CMTVX-B-NOPF", "MB1inputName" : "MTVX", "MB2inputName" : "MTVA" }, - "Pb-Pb": { + "PbPb": { "MBclassName" : "CMTCE-B-NOPF", "MB1inputName" : "MTSC", "MB2inputName" : "MTCE" diff --git a/Modules/Common/etc/reference-comparator-example.json b/Modules/Common/etc/reference-comparator-example.json index 3f653f379f..64b23cead3 100644 --- a/Modules/Common/etc/reference-comparator-example.json +++ b/Modules/Common/etc/reference-comparator-example.json @@ -34,7 +34,7 @@ } }, "PHYSICS": { - "PROTON-PROTON": { + "pp": { "referenceRun" : "551890" } } diff --git a/Modules/ITS/itsCluster.json b/Modules/ITS/itsCluster.json index 3095d2ae28..5298554303 100644 --- a/Modules/ITS/itsCluster.json +++ b/Modules/ITS/itsCluster.json @@ -11,7 +11,7 @@ "Activity": { "number": "42", "type": "PHYSICS", - "beamType" : "PbPb", "": "Beam type: `PROTON-PROTON`, `Pb-Pb`, `Pb-PROTON` " + "beamType" : "PbPb", "": "Beam type: `pp`, `PbPb`, `pPb` " }, "monitoring": { "url": "infologger:///debug?qc" @@ -91,7 +91,7 @@ "maxcluoccL5": "0.2", "maxcluoccL6": "0.2" }, - "PROTON-PROTON": { + "pp": { "maxcluoccL0": "5", "maxcluoccL1": "3", "maxcluoccL2": "3", @@ -100,7 +100,7 @@ "maxcluoccL5": "0.2", "maxcluoccL6": "0.2" }, - "Pb-Pb": { + "PbPb": { "maxcluoccL0": "65", "maxcluoccL1": "35", "maxcluoccL2": "25", diff --git a/Modules/ITS/itsFee.json b/Modules/ITS/itsFee.json index fc2a8f09be..a07981973b 100644 --- a/Modules/ITS/itsFee.json +++ b/Modules/ITS/itsFee.json @@ -11,7 +11,7 @@ "Activity": { "number": "42", "type": "PHYSICS", - "beamType" : "PbPb", "": "Beam type: `PROTON-PROTON`, `Pb-Pb`, `Pb-PROTON` " + "beamType" : "PbPb", "": "Beam type: `pp`, `PbPb`, `pPb` " }, "monitoring": { "url": "infologger:///debug?qc" @@ -78,10 +78,10 @@ "default": { "expectedROFperOrbit": "18" }, - "PROTON-PROTON": { + "pp": { "expectedROFperOrbit": "18" }, - "Pb-Pb": { + "PbPb": { "expectedROFperOrbit": "6" } }, diff --git a/doc/Advanced.md b/doc/Advanced.md index 6203b85ec6..d71fd959a2 100644 --- a/doc/Advanced.md +++ b/doc/Advanced.md @@ -5,68 +5,68 @@ Advanced topics - * [Advanced topics](#advanced-topics) +* [Advanced topics](#advanced-topics) * [Framework](#framework) - * [Plugging the QC to an existing DPL workflow](#plugging-the-qc-to-an-existing-dpl-workflow) - * [Production of QC objects outside this framework](#production-of-qc-objects-outside-this-framework) - * [Configuration](#configuration) - * [Example 1: basic](#example-1-basic) - * [Example 2: advanced](#example-2-advanced) - * [Limitations](#limitations) - * [Multi-node setups](#multi-node-setups) - * [Batch processing](#batch-processing) - * [Moving window](#moving-window) - * [Monitor cycles](#monitor-cycles) - * [Writing a DPL data producer](#writing-a-dpl-data-producer) - * [Custom merging](#custom-merging) - * [Critical, resilient and non-critical tasks](#critical-resilient-and-non-critical-tasks) - * [QC with DPL Analysis](#qc-with-dpl-analysis) - * [Uploading objects to QCDB](#uploading-objects-to-qcdb) - * [Propagating Check results to RCT in Bookkeeping](#propagating-check-results-to-rct-in-bookkeeping) - * [Conversion details](#conversion-details) + * [Plugging the QC to an existing DPL workflow](#plugging-the-qc-to-an-existing-dpl-workflow) + * [Production of QC objects outside this framework](#production-of-qc-objects-outside-this-framework) + * [Configuration](#configuration) + * [Example 1: basic](#example-1-basic) + * [Example 2: advanced](#example-2-advanced) + * [Limitations](#limitations) + * [Multi-node setups](#multi-node-setups) + * [Batch processing](#batch-processing) + * [Moving window](#moving-window) + * [Monitor cycles](#monitor-cycles) + * [Writing a DPL data producer](#writing-a-dpl-data-producer) + * [Custom merging](#custom-merging) + * [Critical, resilient and non-critical tasks](#critical-resilient-and-non-critical-tasks) + * [QC with DPL Analysis](#qc-with-dpl-analysis) + * [Uploading objects to QCDB](#uploading-objects-to-qcdb) + * [Propagating Check results to RCT in Bookkeeping](#propagating-check-results-to-rct-in-bookkeeping) + * [Conversion details](#conversion-details) * [Solving performance issues](#solving-performance-issues) - * [Dispatcher](#dispatcher) - * [QC Tasks](#qc-tasks-1) - * [Mergers](#mergers) + * [Dispatcher](#dispatcher) + * [QC Tasks](#qc-tasks-1) + * [Mergers](#mergers) * [Understanding and reducing memory footprint](#understanding-and-reducing-memory-footprint) - * [Analysing memory usage with valgrind](#analysing-memory-usage-with-valgrind) + * [Analysing memory usage with valgrind](#analysing-memory-usage-with-valgrind) * [CCDB / QCDB](#ccdb--qcdb) - * [Accessing objects in CCDB](#accessing-objects-in-ccdb) - * [Access GRP objects with GRP Geom Helper](#access-grp-objects-with-grp-geom-helper) - * [Global Tracking Data Request helper](#global-tracking-data-request-helper) - * [Custom metadata](#custom-metadata) - * [Details on the data storage format in the CCDB](#details-on-the-data-storage-format-in-the-ccdb) - * [Local CCDB setup](#local-ccdb-setup) - * [Instructions to move an object in the QCDB](#instructions-to-move-an-object-in-the-qcdb) + * [Accessing objects in CCDB](#accessing-objects-in-ccdb) + * [Access GRP objects with GRP Geom Helper](#access-grp-objects-with-grp-geom-helper) + * [Global Tracking Data Request helper](#global-tracking-data-request-helper) + * [Custom metadata](#custom-metadata) + * [Details on the data storage format in the CCDB](#details-on-the-data-storage-format-in-the-ccdb) + * [Local CCDB setup](#local-ccdb-setup) + * [Instructions to move an object in the QCDB](#instructions-to-move-an-object-in-the-qcdb) * [Asynchronous Data and Monte Carlo QC operations](#asynchronous-data-and-monte-carlo-qc-operations) * [QCG](#qcg) - * [Display a non-standard ROOT object in QCG](#display-a-non-standard-root-object-in-qcg) - * [Canvas options](#canvas-options) - * [Local QCG (QC GUI) setup](#local-qcg-qc-gui-setup) + * [Display a non-standard ROOT object in QCG](#display-a-non-standard-root-object-in-qcg) + * [Canvas options](#canvas-options) + * [Local QCG (QC GUI) setup](#local-qcg-qc-gui-setup) * [FLP Suite](#flp-suite) - * [Developing QC modules on a machine with FLP suite](#developing-qc-modules-on-a-machine-with-flp-suite) - * [Switch detector in the workflow readout-dataflow](#switch-detector-in-the-workflow-readout-dataflow) - * [Get all the task output to the infologger](#get-all-the-task-output-to-the-infologger) - * [Using a different config file with the general QC](#using-a-different-config-file-with-the-general-qc) - * [Enable the repo cleaner](#enable-the-repo-cleaner) + * [Developing QC modules on a machine with FLP suite](#developing-qc-modules-on-a-machine-with-flp-suite) + * [Switch detector in the workflow readout-dataflow](#switch-detector-in-the-workflow-readout-dataflow) + * [Get all the task output to the infologger](#get-all-the-task-output-to-the-infologger) + * [Using a different config file with the general QC](#using-a-different-config-file-with-the-general-qc) + * [Enable the repo cleaner](#enable-the-repo-cleaner) * [Configuration](#configuration-1) - * [Merging multiple configuration files into one](#merging-multiple-configuration-files-into-one) - * [Definition and access of simple user-defined task configuration ("taskParameters")](#definition-and-access-of-simple-user-defined-task-configuration-taskparameters) - * [Definition and access of user-defined configuration ("extendedTaskParameters")](#definition-and-access-of-user-defined-configuration-extendedtaskparameters) - * [Definition of new arguments](#definition-of-new-arguments) - * [Configuration files details](#configuration-files-details) - * [Global configuration structure](#global-configuration-structure) - * [Common configuration](#common-configuration) - * [QC Tasks configuration](#qc-tasks-configuration) - * [QC Checks configuration](#qc-checks-configuration) - * [QC Aggregators configuration](#qc-aggregators-configuration) - * [QC Post-processing configuration](#qc-post-processing-configuration) - * [External tasks configuration](#external-tasks-configuration) + * [Merging multiple configuration files into one](#merging-multiple-configuration-files-into-one) + * [Definition and access of simple user-defined task configuration ("taskParameters")](#definition-and-access-of-simple-user-defined-task-configuration-taskparameters) + * [Definition and access of user-defined configuration ("extendedTaskParameters")](#definition-and-access-of-user-defined-configuration-extendedtaskparameters) + * [Definition of new arguments](#definition-of-new-arguments) + * [Configuration files details](#configuration-files-details) + * [Global configuration structure](#global-configuration-structure) + * [Common configuration](#common-configuration) + * [QC Tasks configuration](#qc-tasks-configuration) + * [QC Checks configuration](#qc-checks-configuration) + * [QC Aggregators configuration](#qc-aggregators-configuration) + * [QC Post-processing configuration](#qc-post-processing-configuration) + * [External tasks configuration](#external-tasks-configuration) * [Miscellaneous](#miscellaneous) - * [Data Sampling monitoring](#data-sampling-monitoring) - * [Monitoring metrics](#monitoring-metrics) - * [Common check IncreasingEntries](#common-check-increasingentries) - * [Update the shmem segment size of a detector](#update-the-shmem-segment-size-of-a-detector) + * [Data Sampling monitoring](#data-sampling-monitoring) + * [Monitoring metrics](#monitoring-metrics) + * [Common check IncreasingEntries](#common-check-increasingentries) + * [Update the shmem segment size of a detector](#update-the-shmem-segment-size-of-a-detector) [← Go back to Post-processing](PostProcessing.md) | [↑ Go to the Table of Content ↑](../README.md) | [Continue to Frequently Asked Questions →](FAQ.md) @@ -78,11 +78,13 @@ Advanced topics Your existing DPL workflow can simply be considered a publisher. Therefore, replace `o2-qc-run-producer` with your own workflow. For example, if TPC wants to monitor the output `{"TPC", "CLUSTERS"}` of the workflow `o2-qc-run-tpcpid`, modify the config file to point to the correct data and do : + ``` o2-qc-run-tpcpid | o2-qc --config json://${QUALITYCONTROL_ROOT}/etc/tpcQCPID.json ``` ## Production of QC objects outside this framework + QC objects (e.g. histograms) are typically produced in a QC task. This is however not the only way. Some processing tasks such as the calibration might have already processed the data and produced histograms that should be @@ -92,6 +94,7 @@ push this QC object to the QC framework where it will be checked and stored. ### Configuration Let be a device in the main data flow that produces a histogram on a channel defined as `TST/HISTO/0`. To get this histogram in the QC and check it, add to the configuration file an "external device": + ```yaml "externalTasks": { "External-1": { @@ -101,12 +104,14 @@ Let be a device in the main data flow that produces a histogram on a channel def }, "checks": { ``` + The "query" syntax is the same as the one used in the DPL and in the Dispatcher. It must match the output of another device, whether it is in the same workflow or in a piped one. The `binding` (first part, before the colon) is used in the path of the stored objects and thus we encourage to use the task name to avoid confusion. Moreover, the `origin` (first element after the colon) is used as detectorName. ### Example 1: basic As a basic example, we are going to produce histograms with the HistoProducer and collect them with the QC. The configuration is in [basic-external-histo.json](https://github.com/AliceO2Group/QualityControl/blob/master/Framework/basic-external-histo.json). An external task is defined and named "External-1" (see subsection above). It is then used in the Check QCCheck : + ```yaml "QcCheck": { "active": "true", @@ -121,9 +126,11 @@ As a basic example, we are going to produce histograms with the HistoProducer an }] } ``` + When using this feature, make sure that the name of the MO in the Check definition matches the name of the object you are sending from the external device. To run it, do: + ```yaml o2-qc-run-histo-producer | o2-qc --config json://${QUALITYCONTROL_ROOT}/etc/basic-external-histo.json ``` @@ -145,6 +152,7 @@ On top we see 3 histogram producers. `histoProducer-2` is not part of the QC, it `histoProducer-0` produces an object that is used in a check (`QcCheck-External-1`). `histoProducer-1` objects are not used in any check but we generate one automatically to take care of the storage in the database. To run it, do: + ```yaml o2-qc-run-producer | o2-qc-run-histo-producer --producers 3 --histograms 3 | o2-qc --config json://${QUALITYCONTROL_ROOT}/etc/advanced-external-histo.json ``` @@ -168,9 +176,9 @@ that for now we support cases with one or more local machines, but just only one In our example, we assume having two local processing nodes (`localnode1`, `localnode2`) and one QC node (`qcnode`). There are two types of QC Tasks declared: -- `MultiNodeLocal` which are executed on the local nodes and their results are merged and checked +* `MultiNodeLocal` which are executed on the local nodes and their results are merged and checked on the QC server. -- `MultiNodeRemote` which runs on the QC server, receiving a small percent of data from +* `MultiNodeRemote` which runs on the QC server, receiving a small percent of data from `localnode2` only. Mergers are not needed in this case, but there is a process running Checks against Monitor Objects generated by this Task. @@ -204,12 +212,13 @@ added: } }, ``` + List the local processing machines in the `localMachines` array. `remoteMachine` should contain the host name which will serve as a QC server and `remotePort` should be a port number on which Mergers will wait for upcoming MOs. Make - sure it is not used by other service. If different QC Tasks are run in parallel, use separate ports for each. The - `localControl` parameter allows to properly configure QC with respect to the control software it is run with. It can + sure it is not used by other service. If different QC Tasks are run in parallel, use separate ports for each. The + `localControl` parameter allows to properly configure QC with respect to the control software it is run with. It can be either `aliecs` (on FLPs) or `odc` (EPNs). It has no influence when running the software by hand. - + One also may choose the merging mode - `delta` is the default and recommended (tasks are reset after each cycle, so they send only updates), but if it is not feasible, Mergers may expect `entire` objects - tasks are not reset, they always send entire objects and the latest versions are combined in Mergers. @@ -244,6 +253,7 @@ In case the task is running remotely, data should be sampled. The minimal-effort (see the example below). Use separate ports for each Data Sampling Policy. If the same configuration file will be used on many nodes, but only some of them should apply a given sampling policy, one should also specify the list of machines to match (or generalized aliases, e.g. "flp", "epn"). + ```json { "dataSamplingPolicies": [ @@ -260,6 +270,7 @@ In case the task is running remotely, data should be sampled. The minimal-effort ] } ``` + By default, the channel is bound on the QC Task side. If this is not what you need, add `"bindLocation" : "local"` in the policy configuration (`"remote"` is the default value) and make sure to use valid host names. @@ -277,7 +288,9 @@ iptables -I OUTPUT -p tcp -m conntrack --ctstate NEW,ESTABLISHED -d localnode1 - iptables -I INPUT -p tcp -m conntrack --ctstate NEW,ESTABLISHED -s localnode2 -j ACCEPT iptables -I OUTPUT -p tcp -m conntrack --ctstate NEW,ESTABLISHED -d localnode2 -j ACCEPT ``` + If your network is isolated, you might consider disabling the firewall as an alternative. Be wary of the security risks. + ``` systemctl stop firewalld # to disable until reboot systemctl disable firewalld # to disable permanently @@ -287,6 +300,7 @@ systemctl disable firewalld # to disable permanently 4. Run each part of the workflow. In this example `o2-qc-run-producer` represents any DPL workflow, here it is just a process which produces some random data. The `--host` argument is matched against the `machines` lists in the configuration files. + ``` # On localnode1: o2-qc-run-producer | o2-qc --config json:/${QUALITYCONTROL_ROOT}/etc/multiNode.json --local --host localnode1 -b @@ -312,6 +326,7 @@ If the file already exists, the new objects will be merged with those obtained s At the end, one can run the rest of processing chain (Checks, Aggregators) on the complete objects. Here is a simple example: + ```bash # Remove any existing results rm results.root @@ -322,6 +337,7 @@ o2-qc-run-producer --message-amount 100 | o2-qc --config json:/${QUALITYCONTROL_ # Run Checks and Aggregators, publish results to QCDB o2-qc --config json:/${QUALITYCONTROL_ROOT}/etc/basic.json --remote-batch results.root ``` + Please note, that the local batch QC workflow should not work on the same file at the same time. A semaphore mechanism is required if there is a risk they might be executed in parallel. @@ -329,37 +345,40 @@ The file is organized into directories named after 3-letter detector codes and s To browse the file, one needs the associated Quality Control environment loaded, since it contains QC-specific data structures. It is worth remembering, that this file is considered as intermediate storage, thus Monitor Object do not have Checks applied and cannot be considered the final results. The quick and easy way to inspect the contents of the file is to load a recent environment (e.g. on lxplus) and open it with ROOT's `TBrowser`: + ```shell alienv enter O2PDPSuite/nightly-20221219-1 root TBrowser t; // a browser window will pop-up ``` + ...or by browsing the file manually: + ```shell alienv enter O2PDPSuite/nightly-20221219-1 root root [0] auto f = new TFile("QC_fullrun.root") (TFile *) @0x7ffe84833dc8 root [1] f->ls() -TFile** QC_fullrun.root - TFile* QC_fullrun.root - KEY: TDirectoryFile CPV;1 CPV - KEY: TDirectoryFile EMC;1 EMC - KEY: TDirectoryFile FDD;1 FDD - KEY: TDirectoryFile FT0;1 FT0 - KEY: TDirectoryFile FV0;1 FV0 - KEY: TDirectoryFile GLO;1 GLO - KEY: TDirectoryFile ITS;1 ITS +TFile** QC_fullrun.root + TFile* QC_fullrun.root + KEY: TDirectoryFile CPV;1 CPV + KEY: TDirectoryFile EMC;1 EMC + KEY: TDirectoryFile FDD;1 FDD + KEY: TDirectoryFile FT0;1 FT0 + KEY: TDirectoryFile FV0;1 FV0 + KEY: TDirectoryFile GLO;1 GLO + KEY: TDirectoryFile ITS;1 ITS ... root [2] f->cd("GLO") (bool) true root [3] f->ls() -TFile** QC_fullrun.root - TFile* QC_fullrun.root - TDirectoryFile* GLO GLO - KEY: o2::quality_control::core::MonitorObjectCollection MTCITSTPC;1 - KEY: o2::quality_control::core::MonitorObjectCollection Vertexing;1 - KEY: TDirectoryFile CPV;1 CPV +TFile** QC_fullrun.root + TFile* QC_fullrun.root + TDirectoryFile* GLO GLO + KEY: o2::quality_control::core::MonitorObjectCollection MTCITSTPC;1 + KEY: o2::quality_control::core::MonitorObjectCollection Vertexing;1 + KEY: TDirectoryFile CPV;1 CPV ... root [4] auto vtx = dynamic_cast(f->Get("GLO/Vertexing")) (o2::quality_control::core::MonitorObjectCollection *) @0x7ffe84833dc8 @@ -368,6 +387,7 @@ root [5] auto vtx_x = dynamic_cast(vt root [6] vtx_x->getObject()->ClassName() (const char *) "TH1F" ``` + To merge several incomplete QC files, one can use the `o2-qc-file-merger` executable. It takes a list of input files, which may or may not reside on alien, and produces a merged file. One can select whether the executable should fail upon any error or continue for as long as possible. @@ -379,27 +399,30 @@ Please see its `--help` output for usage details. By default QC Tasks are never reset, thus the MOs they produce contain data from the full run. However, if objects should have a shorter validity range, one may add the following options to QC Task configuration: + ```json "MovingWindowTaskA": { ... "resetAfterCycles": "10", } ``` + In the case above the QC Task will have the `TaskInterface::reset()` method invoked each 10 cycles. Thus, all the plots generated by this task will by affected. If the QC Task runs in parallel on many nodes and its results are merged, the effects will be different depending on the chosen merging mode: -- If `"delta"` mode is used, the Merger in the last layer will implement the moving window, while the QC Tasks will +* If `"delta"` mode is used, the Merger in the last layer will implement the moving window, while the QC Tasks will still reset after each cycle. Please note, that QC Tasks will fall out of sync during data acquisition, so the moving window might contain slightly misaligned data time ranges coming from different sources. Also, due to fluctuations of the data transfer, objects coming from different sources might appear more frequently than others. Thus, one might notice higher occupancy on stave A one time, but the next object might contain less than average data for the same stave. -- In the `"entire"` mode, QC Tasks will reset MOs, while Mergers will use the latest available object version from each +* In the `"entire"` mode, QC Tasks will reset MOs, while Mergers will use the latest available object version from each Task. Please note that if one of the Tasks dies, an old version of MO will be still used over and over. Thus, `"delta"` mode is advised in most use cases. In setups with Mergers one may also extend the Mergers cycle duration, which can help to even out any data fluctuations: + ```json "MovingWindowTaskB": { ... @@ -409,9 +432,10 @@ In setups with Mergers one may also extend the Mergers cycle duration, which can "resetAfterCycles": "1", "": "it could be still larger than 1" } ``` + In the presented case, the Merger will publish one set of complete MOs per 10 minutes, which should contain all deltas received during this last period. Since the QC Tasks cycle is 10 times shorter, the occupancy fluctuations should be - less apparent. Please also note, that using this parameter in the `"entire"` merging mode does not make much sense, + less apparent. Please also note, that using this parameter in the `"entire"` merging mode does not make much sense, since Mergers would use every 10th incomplete MO version when merging. ### Moving windows of selected plots only @@ -420,6 +444,7 @@ The following applies to synchronous setups which use Mergers in the delta mode One can obtain objects containing data from one cycle alongside the ones covering the whole run. These are saved in QCDB in the task subdirectory `mw` and also can be requested by Checks. To specify which objects should get a moving window variant, add a `"movingWindows"` list to the task configuration: + ```json "MyTask": { ... @@ -430,6 +455,7 @@ To specify which objects should get a moving window variant, add a `"movingWindo ``` To request these objects in a Check, use `TaskMovingWindow` data source, as in the example: + ```json "QcCheckMW": { "dataSource": [{ @@ -439,6 +465,7 @@ To request these objects in a Check, use `TaskMovingWindow` data source, as in t }] } ``` + It is possible to request both the integrated and single cycle plots by the same Check. To test it in a small setup, one can run `o2-qc` with `--full-chain` flag, which creates a complete workflow with a Merger for **local** QC tasks, even though it runs just one instance of them. @@ -446,16 +473,18 @@ Please remember to use `"location" : "local"` in such case. In asynchronous QC, the moving window plots will appear in the intermediate QC file in the directory `mw` and will be uploaded to QCDB to `/mw`. When testing, please make sure to let DPL know that it has to run in Grid mode, so that QC can compute object validity based on timestamps in the data: + ``` export O2_DPL_DEPLOYMENT_MODE=Grid && o2-qc --local-batch QC.root ... ``` ## Monitor cycles -The QC tasks monitor and process data continuously during a so-called "monitor cycle". At the end of such a cycle they publish the QC objects that will then continue their way in the QC data flow. +The QC tasks monitor and process data continuously during a so-called "monitor cycle". At the end of such a cycle they publish the QC objects that will then continue their way in the QC data flow. + +A monitor cycle lasts typically between **1 and 5 minutes**, some reaching 10 minutes but never less than 1 minute for performance reasons. +It is defined in the config file this way: -A monitor cycle lasts typically between __1 and 5 minutes__, some reaching 10 minutes but never less than 1 minute for performance reasons. -It is defined in the config file this way: ``` "tasks": { "dataSizeTask": { @@ -475,18 +504,20 @@ It is possible to specify various durations for different period of times. It is ], ... ``` -In this example, a cycle of 60 seconds is used for the first 5 minutes (300 seconds), then a cycle of 3 minutes (180 seconds) between 5 minutes and 10 minutes after SOR, and finally a cycle of 5 minutes for the rest of the run. The last `validitySeconds` is not used and is just applied for the rest of the run. -## Writing a DPL data producer +In this example, a cycle of 60 seconds is used for the first 5 minutes (300 seconds), then a cycle of 3 minutes (180 seconds) between 5 minutes and 10 minutes after SOR, and finally a cycle of 5 minutes for the rest of the run. The last `validitySeconds` is not used and is just applied for the rest of the run. + +## Writing a DPL data producer For your convenience, and although it does not lie within the QC scope, we would like to document how to write a simple data producer in the DPL. The DPL documentation can be found [here](https://github.com/AliceO2Group/AliceO2/blob/dev/Framework/Core/README.md) and for questions please head to the [forum](https://alice-talk.web.cern.ch/). As an example we take the `DataProducerExample` that you can find in the QC repository. It is produces a number. By default it will be 1s but one can specify with the parameter `my-param` a different number. It is made of 3 files : + * [runDataProducerExample.cxx](../Framework/src/runDataProducerExample.cxx) : This is an executable with a basic data producer in the Data Processing Layer. There are 2 important functions here : - * `customize(...)` to add parameters to the executable. Note that it must be written before the includes for the dataProcessing. - * `defineDataProcessing(...)` to define the workflow to be ran, in our case the device(s) publishing the number. + * `customize(...)` to add parameters to the executable. Note that it must be written before the includes for the dataProcessing. + * `defineDataProcessing(...)` to define the workflow to be ran, in our case the device(s) publishing the number. * [DataProducerExample.h](../Framework/include/QualityControl/DataProducerExample.h) : The key elements are : 1. The include `#include ` @@ -508,27 +539,29 @@ Once a custom class is implemented, one should let QCG know how to display it co ## Critical, resilient and non-critical tasks -DPL devices can be marked as expendable, resilient or critical. Expendable tasks can die without affecting the run. -Resilient tasks can survive having one or all their inputs coming from an expendable task but they will stop the system if they themselves die. -Critical tasks (default) will stop the system if they die and will not accept input from expendable tasks. +DPL devices can be marked as expendable, resilient or critical. Expendable tasks can die without affecting the run. +Resilient tasks can survive having one or all their inputs coming from an expendable task but they will stop the system if they themselves die. +Critical tasks (default) will stop the system if they die and will not accept input from expendable tasks. -In QC we use these `labels`. +In QC we use these `labels`. ### QC tasks In QC, one can mark a task as critical or non-critical: + ```json "tasks": { "QcTask": { "active": "true", "critical": "false", "": "if false the task is allowed to die without stopping the workflow, default: true", ``` -By default they are `critical` meaning that their failure will stop the run. -If they are not critical, they will be `expendable` and will not stop the run if they die. -### Auto-generated proxies +By default they are `critical` meaning that their failure will stop the run. +If they are not critical, they will be `expendable` and will not stop the run if they die. + +### Auto-generated proxies -They adopt the criticality of the task they are proxying. +They adopt the criticality of the task they are proxying. ### QC mergers @@ -544,13 +577,15 @@ Aggregators are `resilient`. ### QC post-processing tasks -Post-processing tasks can be marked as critical or non-critical: +Post-processing tasks can be marked as critical or non-critical: + ```json "postprocessing": { "ExamplePostprocessing": { "active": "true", "critical": "false", "": "if false the task is allowed to die without stopping the workflow, default: true", ``` + By default, they are critical meaning that their failure will stop the run. If they are not critical, they will be `expendable` and will not stop the run if they die. @@ -559,6 +594,7 @@ If they are not critical, they will be `expendable` and will not stop the run if ### Uploading objects to QCDB To upload objects written to a file by an Analysis Task to QCDB, one may use the following command: + ```shell script o2-qc-upload-root-objects \ --input-file ./QAResults.root \ @@ -574,6 +610,7 @@ o2-qc-upload-root-objects \ See the `--help` message for explanation of the arguments. If everything went well, the objects should be accessible in [the test QCG instance](https://qcg-test.cern.ch) under the directories listed in the logs: + ``` 2021-10-05 10:59:41.408998 QC infologger initialized 2021-10-05 10:59:41.409053 Input file './QAResults.root' successfully open. @@ -584,6 +621,7 @@ the directories listed in the logs: 2021-10-05 10:59:41.594386 Storing MonitorObject qc_mc/TST/MO/AnalysisFromFileTest/hTimeT0Call 2021-10-05 10:59:41.597743 Successfully uploaded 10 objects to the QCDB. ``` + Notice that by default the executable will ignore the directory structure in the input file and upload all objects to one directory. If you need the directory structure preserved, add the argument `--preserve-directories`. @@ -592,9 +630,11 @@ If you need the directory structure preserved, add the argument `--preserve-dire The framework allows to propagate Quality Objects (QOs) produced by Checks and Aggregators to RCT in Bookkeeping. The synchronisation is done once, at the end of workflow runtime, i.e. at the End of Run or in the last stage of QC merging on Grid. Propagation can be enabled by adding the following key-value pair to Check/Aggregator configuration: + ```json "exportToBookkeeping": "true" ``` + Using it for Aggregators is discouraged, as the information on which exact Check failed is lost or at least obfuscated. Also, make sure that the configuration file includes the Bookkeeping URL. @@ -608,49 +648,49 @@ Below we describe some details of how the conversion is done. Good QOs are marked with green, Medium QOs are marked with orange and Bad QOs are marked with red. Null QOs are marked with purple. -- **Good QOs with no Flags associated are not converted to any Flags.** +* **Good QOs with no Flags associated are not converted to any Flags.** According to the preliminary design for Data Tagging, "bad" Flags always win, thus there is no need for explicit "good" Flags. It also implies that there is no need to explicitly add Good Flag to Good Quality. ![](images/qo_flag_conversion_01.svg) -- **Bad and Medium QOs with no Flags are converted to Flag 14 (Unknown).** +* **Bad and Medium QOs with no Flags are converted to Flag 14 (Unknown).** This means that Medium Quality data is by default bad for Analysis. ![](images/qo_flag_conversion_02.svg) -- **Null QOs with no Flags are converted to Flag 1 (Unknown Quality).** +* **Null QOs with no Flags are converted to Flag 1 (Unknown Quality).** ![](images/qo_flag_conversion_03.svg) -- **All QOs with Flags are converted to Flags, while the Quality is ignored.** +* **All QOs with Flags are converted to Flags, while the Quality is ignored.** As a consequence, one can customize the meaning of any Quality (Medium in particular) in terms of data usability. A warning is printed if a Check associates a good Flag to bad Quality or a bad Flag to good Quality. ![](images/qo_flag_conversion_04.svg) -- **Timespans not covered by a given QO are filled with Flag 1 (Unknown Quality).** +* **Timespans not covered by a given QO are filled with Flag 1 (Unknown Quality).** In other words, if an object was missing during a part of the run, we can state that the data quality is not known. ![](images/qo_flag_conversion_05.svg) -- **Overlapping or adjacent Flags with the same ID, comment and source (QO name) are merged.**. +* **Overlapping or adjacent Flags with the same ID, comment and source (QO name) are merged.**. This happens even if they were associated with different Qualities, e.g. Bad and Medium. Order of Flag arrival does not matter. ![](images/qo_flag_conversion_06.svg) ![](images/qo_flag_conversion_07.svg) -- **Flag 1 (Unknown Quality) is overwritten by any other Flag.** +* **Flag 1 (Unknown Quality) is overwritten by any other Flag.** This allows us to return Null Quality when there is not enough statistics to determine data quality, but it can be suppressed later, once we can return Good/Medium/Bad. ![](images/qo_flag_conversion_08.svg) -- **Good and Bad flags do not affect each other, they may coexist.** +* **Good and Bad flags do not affect each other, they may coexist.** ![](images/qo_flag_conversion_09.svg) -- **Flags for different QOs (QO names) do not affect each other. +* **Flags for different QOs (QO names) do not affect each other. Flag 1 (Unknown Quality) is added separately for each.** ![](images/qo_flag_conversion_10.svg) @@ -664,29 +704,29 @@ When observing performance issues with QC setups, consider the following actions ## Dispatcher Dispatcher will usually cause backpressure when it is requested to sample too much data. -In particular, copying many small messages takes more time than less messages of equivalent size. +In particular, copying many small messages takes more time than less messages of equivalent size. To improve the performance: -- reduce the sampling rate -- disable unused sampling policies -- adapt the data format to pack data in fewer messages -- when in need of 100% data, do not use Data Sampling, but connect to the data source directly +* reduce the sampling rate +* disable unused sampling policies +* adapt the data format to pack data in fewer messages +* when in need of 100% data, do not use Data Sampling, but connect to the data source directly ## QC Tasks QC Tasks are implemented by the users, thus the maximum possible input data throughput largely depends on the task implementation. If a QC Task cannot cope with the input messages, consider: -- sampling less data -- using performance measurement tools (like `perf top`) to understand where the task spends the most time and optimize this part of code -- if one task instance processes data, spawn one task per machine and merge the result objects instead +* sampling less data +* using performance measurement tools (like `perf top`) to understand where the task spends the most time and optimize this part of code +* if one task instance processes data, spawn one task per machine and merge the result objects instead ## Mergers The performance of Mergers depends on the type of objects being merged, as well as their number and size. The following points might help avoid backpressure: -- increase QC tasks cycle duration -- use less or smaller objects -- if an object has its custom Merge() method, check if it could be optimized -- enable multi-layer Mergers to split the computations across multiple processes (config parameter "mergersPerLayer") +* increase QC tasks cycle duration +* use less or smaller objects +* if an object has its custom Merge() method, check if it could be optimized +* enable multi-layer Mergers to split the computations across multiple processes (config parameter "mergersPerLayer") # Understanding and reducing memory footprint @@ -704,16 +744,19 @@ Consider loading only the parts of detector geometry which are being used by a g 2) The workflow will run and save files massif.out. 3) Generate a report for the file corresponding to the PID of the QC task: + ``` ms_print massif.out.976329 > massif_abc_task.log ``` + 4) The generated report contains: -- the command used to run the process -- graph of the memory usage -- grouped call stacks of all memory allocations on the heap (above certain threshold) within certain time intervals. +* the command used to run the process +* graph of the memory usage +* grouped call stacks of all memory allocations on the heap (above certain threshold) within certain time intervals. The left-most call contains all the calls which lead to it, represented on the right. - For example, the call stack below means that the AbcTask created a TH2F histogram in the initalize method at the line + For example, the call stack below means that the AbcTask created a TH2F histogram in the initalize method at the line AbcTask.cxx:82, which was 51,811,760B. In total, 130,269,568B worth of TH2F histograms were created in this time interval. + ``` 98.56% (256,165,296B) (heap allocation functions) malloc/new/new[], --alloc-fns, etc. ->50.12% (130,269,568B) 0xFCBD1A6: TArrayF::Set(int) [clone .part.0] (TArrayF.cxx:111) @@ -721,7 +764,9 @@ ms_print massif.out.976329 > massif_abc_task.log | ->19.93% (51,811,760B) 0x32416518: make_unique (unique_ptr.h:1065) | | ->19.93% (51,811,760B) 0x32416518: o2::quality_control_modules::det::AbcTask::initialize(o2::framework::InitContext&) (AbcTask.cxx:82) ``` + 5) To get a lightweight and more digestible output, consider running the massif report through the following command to get the summary of the calls only within a QC module. This essentially tells you how much memory a given line allocates. + ``` [O2PDPSuite/latest] ~/alice/test-rss $> grep quality_control_modules massif_abc_task.log | sed 's/^.*[0-9][0-9]\.[0-9][0-9]\% //g' | sort | uniq (242,371,376B) 0x324166B2: o2::quality_control_modules::det::AbcTask::initialize(o2::framework::InitContext&) (AbcTask.cxx:88) @@ -730,16 +775,18 @@ ms_print massif.out.976329 > massif_abc_task.log (51,811,760B) 0x32416518: o2::quality_control_modules::det::AbcTask::initialize(o2::framework::InitContext&) (AbcTask.cxx:82) (51,811,760B) 0x324165EB: o2::quality_control_modules::det::AbcTask::initialize(o2::framework::InitContext&) (AbcTask.cxx:85) ``` + 6) Consider reducing the size and number of the biggest histogram. Consider disabling histograms which will not be useful for async QC (no allocations, no startPublishing). # CCDB / QCDB ## Accessing objects in CCDB -The MonitorObjects generated by Quality Control are stored in a dedicated repository (QCDB), which is based on CCDB. +The MonitorObjects generated by Quality Control are stored in a dedicated repository (QCDB), which is based on CCDB. The run conditions, on the other hand, are located in another, separate database. The recommended way (excluding postprocessing) to access these conditions is to use a `Lifetime::Condition` DPL input, which can be requested as in the query below: + ```json "tasks": { "MyTask": { @@ -751,9 +798,11 @@ The recommended way (excluding postprocessing) to access these conditions is to } }, ``` + The timestamp of the CCDB object will be aligned with the data timestamp. If a task needs both sampled input and a CCDB object, it is advised to use two data sources as such: + ```json "tasks": { "MyTask": { @@ -770,6 +819,7 @@ If a task needs both sampled input and a CCDB object, it is advised to use two d ``` The requested CCDB object can be accessed like any other DPL input in `monitorData`: + ``` void QcMFTClusterTask::monitorData(o2::framework::ProcessingContext& ctx) { @@ -787,8 +837,9 @@ PostProcessingTasks do not take DPL inputs, so in this case `ConditionAccess::re ## Access GRP objects with GRP Geom Helper -To get GRP objects via a central facility, add the following structure to the task definition and set its values +To get GRP objects via a central facility, add the following structure to the task definition and set its values according to the needs. + ```json "myTask": { ... @@ -804,13 +855,14 @@ according to the needs. } } ``` -The requested objects will be available via [`GRPGeomHelper::instance()`](https://github.com/AliceO2Group/AliceO2/blob/dev/Detectors/Base/include/DetectorsBase/GRPGeomHelper.h) singleton. +The requested objects will be available via [`GRPGeomHelper::instance()`](https://github.com/AliceO2Group/AliceO2/blob/dev/Detectors/Base/include/DetectorsBase/GRPGeomHelper.h) singleton. ## Global Tracking Data Request helper To retrieve tracks and clusters for specific detectors or detector combinations, one can use the [`DataRequest`](https://github.com/AliceO2Group/AliceO2/blob/dev/DataFormats/Detectors/GlobalTracking/include/DataFormatsGlobalTracking/RecoContainer.h) helper. By adding the following structure to a QC task, the corresponding `InputSpecs` will be automatically added to the task. + ```json "myTask": { ... @@ -823,7 +875,9 @@ By adding the following structure to a QC task, the corresponding `InputSpecs` w } } ``` + Then, the corresponding tracks and clusters can be retrieved in the code using `RecoContainer`: + ```c++ void MyTask::monitorData(o2::framework::ProcessingContext& ctx) { @@ -838,13 +892,16 @@ void MyTask::monitorData(o2::framework::ProcessingContext& ctx) One can add custom metadata on the QC objects produced in a QC task. Simply call `ObjectsManager::addMetadata(...)`, like in + ``` // add a metadata on histogram mHistogram, key is "custom" and value "34" getObjectsManager()->addMetadata(mHistogram->GetName(), "custom", "34"); ``` + This metadata will end up in the QCDB. -It is also possible to add or update metadata of a MonitorObject directly: +It is also possible to add or update metadata of a MonitorObject directly: + ``` MonitorObject* mo = getMonitorObject(objectName); mo->addOrUpdateMetadata(key, value); @@ -858,6 +915,7 @@ It is therefore possible to easily open it with ROOT when loaded with alienv. It The MonitorObjects are stored at a path which is enforced by the qc framework : `/qc//MO//object/name` Note that the name of the object can contain slashes (`/`) in order to build a sub-tree visible in the GUI. The detector name and the taskname are set in the config file : + ```json "tasks": { "QcTask": { <---------- task name @@ -879,7 +937,7 @@ The QC framework is nevertheless backward compatible and can handle the old and Having a central ccdb for test (ccdb-test) is handy but also means that everyone can access, modify or delete the data. If you prefer to have a local instance of the CCDB, for example in your lab or on your development machine, follow these instructions. -1. Download the local repository service from http://alimonitor.cern.ch/download/local.jar +1. Download the local repository service from 2. The service can simply be run with `java -jar local.jar` @@ -890,11 +948,12 @@ By default the local repository is located in /tmp/QC (or java.io.tmpdir/QC to b The address of the CCDB will have to be updated in the Tasks config file. -At the moment, the description of the REST api can be found in this document : https://docs.google.com/presentation/d/1PJ0CVW7QHgnFzi0LELc06V82LFGPgmG3vsmmuurPnUg +At the moment, the description of the REST api can be found in this document : ## Instructions to move an object in the QCDB The script `o2-qc-repo-move-objects` lets the user move an object, and thus all the versions attached to it. E.g.: + ``` python3 o2-qc-repo-move-objects --url http://ccdb-test.cern.ch:8080 --path qc/TST/MO/Bob --new-path qc/TST/MO/Bob2 --log-level 10 ``` @@ -931,12 +990,13 @@ In such case, several masterjobs are run in parallel. Each produces a `QC.root` file which contains all the statistics for a masterjob. The last masterjob to complete recognizes this fact and merges all `QC.root` into `QC_fullrun.root` and only then uploads the results to QCDB. To find it, one can use `alien_find`: + ``` > alien_find /alice/data/2022/LHC22m/523897/apass1_epn QC_fullrun.root /alice/data/2022/LHC22m/523897/apass1_epn/0750/QC/001/QC_fullrun.root ``` -TODO explain how a connection to QCDB is made from Grid sites. +TODO explain how a connection to QCDB is made from Grid sites. # QCG @@ -946,19 +1006,22 @@ Users can publish objects inheriting from a custom class, e.g. not a TH2F but a The solution depends on the strategy to adopt to display the object. -1. The custom class has multiple inheritance and one of them is a standard ROOT object which the QCG can display (e.g. a histogram). In such case, add a member `mTreatMeAs` to your custom class and set it to the name of the class that should be used to interpret and display the data. There is an example in the Example module : +1. The custom class has multiple inheritance and one of them is a standard ROOT object which the QCG can display (e.g. a histogram). In such case, add a member `mTreatMeAs` to your custom class and set it to the name of the class that should be used to interpret and display the data. There is an example in the Example module : + ```c++ std::string mTreatMeAs = "TH2F"; // the name of the class this object should be considered as when drawing in QCG. ``` -2. [Not ready yet] The class encapsulates the object that should actually be drawn. Contact us if you need this feature, we can easily add it. -3. [Not ready yet] The class cannot be drawn in the ways outlined above and need a custom piece of JS to actually do it. Contact us if you need this feature, it is not a trivial thing to do. + +2. [Not ready yet] The class encapsulates the object that should actually be drawn. Contact us if you need this feature, we can easily add it. +3. [Not ready yet] The class cannot be drawn in the ways outlined above and need a custom piece of JS to actually do it. Contact us if you need this feature, it is not a trivial thing to do. ## Canvas options The developer of a Task might perfectly know how to display a plot or a graph but cannot set these options if they belong to the Canvas. This is typically the case of `drawOptions` such as `colz` or `alp`. It is also the case for canvases' properties such as logarithmic scale and grid. These options can be set by the end user in the QCG but it is likely that the developer wants to give pertinent default options. To do so, one can use one of the two following methods. -* `getObjectsManager()->setDefaultDrawOptions(, string& drawOptions)` where + +* `getObjectsManager()->setDefaultDrawOptions(, string& drawOptions)` where `drawOptions` is a space-separated list of drawing options. E.g. "colz" or "alp lego1". * `getObjectsManager()->setDisplayHint(, string& hints)` where `hints` is a space-separated list of hints on how to draw the object. E.g. "logz" or "gridy logy". @@ -968,7 +1031,7 @@ These methods must be called after the objects has been published, i.e. after th ## Local QCG (QC GUI) setup -To install and run the QCG locally please follow these instructions : https://github.com/AliceO2Group/WebUi/tree/dev/QualityControl#installation +To install and run the QCG locally please follow these instructions : # FLP Suite @@ -976,11 +1039,12 @@ The QC is part of the FLP Suite. The Suite is installed on FLPs through RPMs and ## Developing QC modules on a machine with FLP suite -Development RPMs are available on the FLPs. Start by installing them, then compile QC and finally tell aliECS to use it. +Development RPMs are available on the FLPs. Start by installing them, then compile QC and finally tell aliECS to use it. **Installation** As root do: + ``` yum install o2-QualityControl-devel git -y ``` @@ -988,6 +1052,7 @@ yum install o2-QualityControl-devel git -y **Compilation** As user `flp` do: + ``` git clone https://github.com/AliceO2Group/QualityControl.git cd QualityControl @@ -999,9 +1064,10 @@ cmake -DCMAKE_INSTALL_PREFIX=/tmp/installdir -G Ninja -DCLANG_EXECUTABLE=/opt/o2 ninja -j16 install ``` -***Compilation on top of a local O2*** +_**Compilation on top of a local O2**_ + +If you want to build also O2 locally do -If you want to build also O2 locally do ``` # O2 git clone https://github.com/AliceO2Group/AliceO2.git @@ -1022,13 +1088,14 @@ cmake -DCMAKE_INSTALL_PREFIX=/tmp/installdir -G Ninja -DCLANG_EXECUTABLE=/opt/o2 ninja -j8 install ``` -***Important step in case several nodes are involved*** +_**Important step in case several nodes are involved**_ In case the workflows will span over several FLPs and/or QC machines, one should `scp` the `installdir` to the other machines in the same directory. **Use it in aliECS** -In the aliECS gui, in the panel "Advanced Configuration", et an extra variable `extra_env_vars` and set it to +In the aliECS gui, in the panel "Advanced Configuration", et an extra variable `extra_env_vars` and set it to + ``` PATH=/tmp/installdir/bin/:$PATH; LD_LIBRARY_PATH=/tmp/installdir/lib/:/tmp/installdir/lib64/:$LD_LIBRARY_PATH; QUALITYCONTROL_ROOT=/tmp/installdir/; echo ``` @@ -1050,36 +1117,39 @@ Set the variable log_task_output=all One can set the `QC URI` to a different config file that is used by the general QC when enabled. However, this is not the recommended way. One must make sure that the name of the task and the check are left untouched and that they are both enabled. -## Enable the repo cleaner +## Enable the repo cleaner -If the CCDB used in an FLP setup is the local one, the repo cleaner might be necessary as to avoid filling up the disk of the machine. +If the CCDB used in an FLP setup is the local one, the repo cleaner might be necessary as to avoid filling up the disk of the machine. + +By defaults there is a _disabled_ cron job : -By defaults there is a *disabled* cron job : ```shell */10 * * * * /opt/o2/bin/o2-qc-repo-cleaner --config /etc/flp.d/ccdb-sql/repocleaner.yaml --dry-run > /dev/null 2>> /tmp/cron-errors.txt ``` 1. copy the config file /etc/flp.d/ccdb-sql/repocleaner.yaml 2. modify the config file to suit your needs -3. run by hand the repo-cleaner to check that the config file is ok -3. update the cron job to use the modified config file +3. run by hand the repo-cleaner to check that the config file is ok +3. update the cron job to use the modified config file 4. uncomment the cron job -# Configuration +# Configuration ## Merging multiple configuration files into one To merge multiple QC configuration files into one, one can use `jq` in the following way: + ``` jq -n 'reduce inputs as $s (input; .qc.tasks += ($s.qc.tasks) | .qc.checks += ($s.qc.checks) | .qc.externalTasks += ($s.qc.externalTasks) | .qc.postprocessing += ($s.qc.postprocessing)| .dataSamplingPolicies += ($s.dataSamplingPolicies))' $QC_JSON_GLOBAL $JSON_FILES > $MERGED_JSON_FILENAME ``` -However, one should pay attention to avoid duplicate task definition keys (e.g. having RawTask twice, each for a different detector), otherwise only one of them would find its way to a merged file. +However, one should pay attention to avoid duplicate task definition keys (e.g. having RawTask twice, each for a different detector), otherwise only one of them would find its way to a merged file. In such case, one can add the `taskName` parameter in the body of a task configuration structure to use the preferred name and change the root key to a unique id, which shall be used only for the purpose of navigating a configuration file. If `taskName` does not exist, it is taken from the root key value. Please remember to update also the references to the task in other actors which refer it (e.g. in Check's data source). These two tasks will **not** be merged correctly: + ```json "RawTask": { "className": "o2::quality_control_modules::abc::RawTask", @@ -1091,6 +1161,7 @@ These two tasks will **not** be merged correctly: } } ``` + ```json "RawTask": { "className": "o2::quality_control_modules::xyz::RawTask", @@ -1102,7 +1173,9 @@ These two tasks will **not** be merged correctly: } } ``` + The following tasks will be merged correctly: + ```json "RawTaskA": { "taskName": "RawTask", @@ -1115,6 +1188,7 @@ The following tasks will be merged correctly: } } ``` + ```json "RawTaskB": { "taskName": "RawTask" @@ -1127,6 +1201,7 @@ The following tasks will be merged correctly: } } ``` + The same approach can be applied to other actors in the QC framework, like Checks (`checkName`), Aggregators(`aggregatorName`), External Tasks (`taskName`) and Postprocessing Tasks (`taskName`). ## Templating config files @@ -1134,14 +1209,14 @@ The same approach can be applied to other actors in the QC framework, like Check > [!WARNING] > Templating only works when using aliECS, i.e. in production and staging. -The templating is provided by a template engine called `jinja`. You can use any of its feature. A couple are described below and should satisfy the vast majority of the needs. +The templating is provided by a template engine called `jinja`. You can use any of its feature. A couple are described below and should satisfy the vast majority of the needs. ### Preparation > [!IMPORTANT] > Workflows have already been migrated to apricot. This should not be needed anymore. -To template a config file, modify the corresponding workflow in `ControlWorkflows`. This is needed because we won't use directly `Consul` but instead go through `apricot` to template it. +To template a config file, modify the corresponding workflow in `ControlWorkflows`. This is needed because we won't use directly `Consul` but instead go through `apricot` to template it. 1. Replace `consul-json` by `apricot` 2. Replace `consul_endpoint` by `apricot_endpoint` @@ -1152,19 +1227,24 @@ Example: ``` o2-qc --config consul-json://{{ consul_endpoint }}/o2/components/qc/ANY/any/mch-qcmn-epn-full-track-matching --remote -b ``` + becomes + ``` o2-qc --config 'apricot://{{ apricot_endpoint }}/o2/components/qc/ANY/any/mch-qcmn-epn-full-track-matching' --remote -b ``` -Make sure that you are able to run with the new workflow before actually templating. + +Make sure that you are able to run with the new workflow before actually templating. ### Include a config file -To include a config file (e.g. named `mch_digits`) add this line : +To include a config file (e.g. named `mch_digits`) add this line : + ``` {% include "MCH/mch_digits" %} ``` -The content of the file `mch_digits` is then copied into the config file. Thus make sure that you include all the commas and stuff. + +The content of the file `mch_digits` is then copied into the config file. Thus make sure that you include all the commas and stuff. #### Configuration files organisation @@ -1175,43 +1255,51 @@ Common config files includes are provided in the `COMMON` subfolder. ### Conditionals The `if` looks like + ``` {% if [condition] %} … {% endif %} ``` + The condition probably requires some external info, such as the run type or a detectors list. Thus you must pass the info in the ControlWorkflows. -It could look like this +It could look like this + ``` o2-qc --config 'apricot://{{ apricot_endpoint }}/o2/components/qc/ANY/any/tpc-pulser-calib-qcmn?run_type={{ run_type }}' ... ``` -or + +or + ``` o2-qc --config 'apricot://{{ apricot_endpoint }}/o2/components/qc/ANY/any/mch-qcmn-epn-full-track-matching?detectors={{ detectors }}' ... ``` Then use it like this: + ``` {% if run_type == "PHYSICS" %} ... {% endif %} ``` + or like this respectively: + ``` {% if "mch" in detectors|lower %} ... {% endif %} ``` -### Test and debug +### Test and debug To see how a config file will look like once templated, simply open a browser at this address: `{{apricot_endpoint}}/components/qc/ANY/any/tpc-pulser-calib-qcmn?process=true` Replace `{{apricot_endpoint}}` by the value you can find in Consul under `o2/runtime/aliecs/vars/apricot_endpoint` (it is different on staging and prod). -*Note that there is no `o2` in the path!!!* +_Note that there is no `o2` in the path!!!_ ### Example -We are going to create in staging a small example to demonstrate the above. -First create 2 files if they don't exist yet: +We are going to create in staging a small example to demonstrate the above. +First create 2 files if they don't exist yet: **o2/components/qc/ANY/any/templating_demo** @@ -1223,7 +1311,8 @@ First create 2 files if they don't exist yet: } } ``` -Here we simply include 1 file from a subfolder and add a piece if a certain condition is successful. + +Here we simply include 1 file from a subfolder and add a piece if a certain condition is successful. **o2/components/qc/ANY/any/TST/templating_included** @@ -1235,26 +1324,29 @@ Here we simply include 1 file from a subfolder and add a piece if a certain cond } ``` -And now you can try it out: +And now you can try it out: + ``` http://alio2-cr1-hv-mvs00.cern.ch:32188/components/qc/ANY/any/templating_demo?process=true ``` ---> the file is included inside the other. + +--> the file is included inside the other. ``` [http://alio2-cr1-hv-mvs00.cern.ch:32188/components/qc/ANY/any/templating_demo?process=true](http://alio2-cr1-hv-mvs00.cern.ch:32188/components/qc/ANY/any/templating_demo?process=true&run_type=PHYSICS) ``` ---> the file is included and the condition is true thus we have an extra line. +--> the file is included and the condition is true thus we have an extra line. ## Definition and access of simple user-defined task configuration ("taskParameters") -The new, extended, way of defining such parameters, not only in Tasks but also in Checks, Aggregators and PP tasks, -is described in the next section. +The new, extended, way of defining such parameters, not only in Tasks but also in Checks, Aggregators and PP tasks, +is described in the next section. A task can access custom parameters declared in the configuration file at `qc.tasks..taskParameters`. They are stored inside an object of type `CustomParameters` named `mCustomParameters`, which is a protected member of `TaskInterface`. The syntax is + ```json "tasks": { "QcTask": { @@ -1262,14 +1354,15 @@ The syntax is "myOwnKey1": "myOwnValue1" }, ``` + It is accessed with : `mCustomParameters["myOwnKey"]`. ## Definition and access of user-defined configuration ("extendedTaskParameters") -User code, whether it is a Task, a Check, an Aggregator or a PostProcessing task, can access custom parameters declared in the configuration file. +User code, whether it is a Task, a Check, an Aggregator or a PostProcessing task, can access custom parameters declared in the configuration file. They are stored inside an object of type `CustomParameters` named `mCustomParameters`, which is a protected member of `TaskInterface`. -The following table gives the path in the config file and the name of the configuration parameter for the various types of user code: +The following table gives the path in the config file and the name of the configuration parameter for the various types of user code: | User code | Config File item | |----------------|--------------------------------------------------------| @@ -1279,6 +1372,7 @@ The following table gives the path in the config file and the name of the config | PostProcessing | `qc.postprocessing..extendedTaskParameters` | The new syntax is + ```json "tasks": { "QcTask": { @@ -1295,10 +1389,10 @@ The new syntax is "myOwnKey1": "myOwnValue1b", "myOwnKey2": "myOwnValue2b" }, - "PROTON-PROTON": { + "pp": { "myOwnKey1": "myOwnValue1c" }, - "Pb-Pb": { + "PbPb": { "myOwnKey1": "myOwnValue1d" } }, @@ -1308,23 +1402,25 @@ The new syntax is } }, ``` -It allows to have variations of the parameters depending on the run and beam types. The proper run types can be found here: [ECSDataAdapters.h](https://github.com/AliceO2Group/AliceO2/blob/dev/DataFormats/Parameters/include/DataFormatsParameters/ECSDataAdapters.h#L54). The `default` can be used -to ignore the run or the beam type. -The beam type is one of the following: `PROTON-PROTON`, `Pb-Pb`, `Pb-PROTON`, `cosmic`. + +It allows to have variations of the parameters depending on the run and beam types. The proper run types can be found here: [ECSDataAdapters.h](https://github.com/AliceO2Group/AliceO2/blob/dev/DataFormats/Parameters/include/DataFormatsParameters/ECSDataAdapters.h#L54). The `default` can be used +to ignore the run or the beam type. +The beam type comes from the parameter `pdp_beam_type` set by ECS and can be one of the following: `pp`, `PbPb`, `pPb`, `pO`, `OO`, `NeNe`, `cosmic`, `technical`. +See `[readout-dataflow](https://github.com/AliceO2Group/ControlWorkflows/blob/master/workflows/readout-dataflow.yaml)` to verify the possible values. The values can be accessed in various ways described in the following sub-sections. ### Access optional values with or without activity -The value for the key, runType and beamType is returned if found, or an empty value otherwise. -However, before returning an empty value we try to substitute the runType and the beamType with "default". +The value for the key, runType and beamType is returned if found, or an empty value otherwise. +However, before returning an empty value we try to substitute the runType and the beamType with "default". ```c++ // returns an Optional if it finds the key `myOwnKey` for the runType and beamType of the provided activity, // or if it can find the key with the runType or beamType substituted with "default". -auto param = mCustomParameters.atOptional("myOwnKey1", activity); // activity is "PHYSICS", "Pb-Pb" , returns "myOwnValue1d" +auto param = mCustomParameters.atOptional("myOwnKey1", activity); // activity is "PHYSICS", "PbPb" , returns "myOwnValue1d" // same but passing directly the run and beam types -auto param = mCustomParameters.atOptional("myOwnKey1", "PHYSICS", "Pb-Pb"); // returns "myOwnValue1d" +auto param = mCustomParameters.atOptional("myOwnKey1", "PHYSICS", "PbPb"); // returns "myOwnValue1d" // or with only the run type auto param = mCustomParameters.atOptional("myOwnKey1", "PHYSICS"); // returns "myOwnValue1b" ``` @@ -1340,8 +1436,8 @@ mCustomParameters.at("myOwnKey"); // returns `myOwnValue` mCustomParameters.at("myOwnKey", "default"); // returns `myOwnValue` mCustomParameters.at("myOwnKey", "default", "default"); // returns `myOwnValue` -mCustomParameters.at("myOwnKey1", "PHYSICS", "PROTON-PROTON"); // returns `myOwnValue1c` -mCustomParameters.at("myOwnKey1", "PHYSICS", "Pb-Pb"); // returns `myOwnValue1d` +mCustomParameters.at("myOwnKey1", "PHYSICS", "pp"); // returns `myOwnValue1c` +mCustomParameters.at("myOwnKey1", "PHYSICS", "PbPb"); // returns `myOwnValue1d` mCustomParameters.at("myOwnKey2", "COSMICS"); // returns `myOwnValue2e` mCustomParameters.at("myOwnKey1", activity); // result will depend on activity @@ -1350,6 +1446,7 @@ mCustomParameters.at("myOwnKey1", activity); // result will depend on activity ### Access values and return default if not found The correct way of accessing a parameter and to default to a value if it is not there, is the following: + ```c++ std::string param = mCustomParameters.atOrDefaultValue("myOwnKey1", "1" /*default value*/, "physics", "pp"); int casted = std::stoi(param); @@ -1358,9 +1455,10 @@ The correct way of accessing a parameter and to default to a value if it is not std::string param = mCustomParameters.atOrDefaultValue("myOwnKey1", "1" /*default value*/, activity); // see below how to get the activity ``` -### Find a value +### Find a value + +Finally the way to search for a value and only act if it is there is the following: -Finally the way to search for a value and only act if it is there is the following: ```c++ if (auto param2 = mCustomParameters.find("myOwnKey1", "physics", "pp"); param2 != cp.end()) { int casted = std::stoi(param); @@ -1382,6 +1480,7 @@ In a postprocessing task, it is available in the objects manager: `getObjectsMan One can also tell the DPL driver to accept new arguments. This is done using the `customize` method at the top of your workflow definition (usually called "runXXX" in the QC). For example, to add two parameters of different types do : + ``` void customize(std::vector& workflowOptions) { @@ -1394,24 +1493,26 @@ void customize(std::vector& workflowOptions) ## Reference data -A reference object is an object from a previous run. It is usually used as a point of comparison. +A reference object is an object from a previous run. It is usually used as a point of comparison. ### Get a reference plot in a check -To retrieve a reference plot in your Check, use +To retrieve a reference plot in your Check, use + ``` std::shared_ptr CheckInterface::retrieveReference(std::string path, Activity referenceActivity); ``` -- `path` : the path of the object _without the provenance (e.g. `qc`)_ -- `referenceActivity` : the activity of reference (usually the current activity with a different run number) +* `path` : the path of the object _without the provenance (e.g. `qc`)_ +* `referenceActivity` : the activity of reference (usually the current activity with a different run number) If the reference is not found it will return a `nullptr` and the quality is `Null`. ### Compare to a reference plot -The check `ReferenceComparatorCheck` in `Common` compares objects to their reference. +The check `ReferenceComparatorCheck` in `Common` compares objects to their reference. + +The configuration looks like -The configuration looks like ``` "QcCheck": { "active": "true", @@ -1434,26 +1535,28 @@ The configuration looks like } }, "PHYSICS": { - "PROTON-PROTON": { + "pp": { "referenceRun" : "551890" } } } } ``` + The check needs the following parameters -- `referenceRun` to specify what is the run of reference and retrieve the reference data. -- `comparatorName` to decide how to compare, see below for their descriptions. -- `threshold` to specifie the value used to discriminate between good and bad matches between the histograms. +* `referenceRun` to specify what is the run of reference and retrieve the reference data. +* `comparatorName` to decide how to compare, see below for their descriptions. +* `threshold` to specifie the value used to discriminate between good and bad matches between the histograms. Three comparators are provided: + 1. `o2::quality_control_modules::common::ObjectComparatorDeviation`: comparison based on the average relative deviation between the bins of the current and reference histograms; the `threshold` parameter represent in this case the maximum allowed deviation 2. `o2::quality_control_modules::common::ObjectComparatorChi2`: comparison based on a standard chi2 test between the current and reference histograms; the `threshold` parameter represent in this case the minimum allowed chi2 probability 3. `o2::quality_control_modules::common::ObjectComparatorKolmogorov`: comparison based on a standard Kolmogorov test between the current and reference histograms; the `threshold` parameter represent in this case the minimum allowed Kolmogorov probability -Note that you can easily specify different reference runs for different run types and beam types. +Note that you can easily specify different reference runs for different run types and beam types. -The plot is beautified by the addition of a `TPaveText` containing the quality and the reason for the quality. +The plot is beautified by the addition of a `TPaveText` containing the quality and the reason for the quality. ### Generate a canvas combining both the current and reference ratio histogram @@ -1499,15 +1602,15 @@ This is the global structure of the configuration in QC. ``` There are six QC-related components: -- "config" - contains global configuration of QC which apply to any component. It is required in any configuration +* "config" - contains global configuration of QC which apply to any component. It is required in any configuration file. -- "tasks" - contains declarations of QC Tasks. It is mandatory for running topologies with Tasks and +* "tasks" - contains declarations of QC Tasks. It is mandatory for running topologies with Tasks and Checks. -- "externalTasks" - contains declarations of external devices which sends objects to the QC to be checked and stored. -- "checks" - contains declarations of QC Checks. It is mandatory for running topologies with +* "externalTasks" - contains declarations of external devices which sends objects to the QC to be checked and stored. +* "checks" - contains declarations of QC Checks. It is mandatory for running topologies with Tasks and Checks. -- "aggregators" - contains declarations of QC Aggregators. It is not mandatory. -- "postprocessing" - contains declarations of PostProcessing Tasks. It is only needed only when Post-Processing is +* "aggregators" - contains declarations of QC Aggregators. It is not mandatory. +* "postprocessing" - contains declarations of PostProcessing Tasks. It is only needed only when Post-Processing is run. The configuration file can also include a path to Data Sampling configuration ("dataSamplingPoliciesFile") or the @@ -1540,7 +1643,7 @@ should not be present in real configuration files. "provenance": "qc", "": "Provenance - qc or qc_mc depending whether it is normal data or monte carlo data", "start" : "0", "": "Activity start time in ms since epoch. One can use it as a filter in post-processing", "end" : "1234", "": "Activity end time in ms since epoch. One can use it as a filter in post-processing", - "beamType" : "PROTON-PROTON", "": "Beam type: `PROTON-PROTON`, `Pb-Pb`, `Pb-PROTON` ", + "beamType" : "pp", "": "Beam type: `pp`, `PbPb`, `pPb` ", "partitionName" : "", "": "Partition name", "fillNumber" : "123", "": "Fill Number" }, @@ -1579,21 +1682,25 @@ should not be present in real configuration files. #### Common configuration in production -In production at P2 and in staging, some common items are defined globally in the file `QC/general-config-params`: - - QCDB - - monitoring - - consul - - conditionDB - - bookkeeping - - -It is mandatory to use them by including the file: +In production at P2 and in staging, some common items are defined globally in the file `QC/general-config-params`: + +* QCDB +* monitoring +* consul +* conditionDB +* bookkeeping +* + +It is mandatory to use them by including the file: + ``` "config": { {% include "QC/general-config-params" %} }, ``` -Other configuration items can still be added in your files as such (note the comma after the inclusion) : +Other configuration items can still be added in your files as such (note the comma after the inclusion) : + ``` "config": { {% include "QC/general-config-params" %}, @@ -1809,13 +1916,14 @@ Below the external task configuration structure is described. Note that more tha ## Data Sampling monitoring To have the monitoring metrics for the Data Sampling (the Dispatcher) sent to a specific sink (like influxdb), add the option `--monitoring-backend` when launching the DPL workflow. For example: + ```shell --monitoring-backend 'influxdb-udp://influxdb-server.cern.ch:8086' ``` -This will actually send the monitoring data of *all* DPL devices to this database. +This will actually send the monitoring data of _all_ DPL devices to this database. -__Note for mac users__: if you get a crash and the message "std::exception::what: send_to: Message too long", it means that you have to adapt a `udp` parameter. You can check the datagram size via `sudo sysctl net.inet.udp.maxdgram`. If it says something less than 64 kB, then increase size: `sudo sysctl -w net.inet.udp.maxdgram=65535` +**Note for mac users**: if you get a crash and the message "std::exception::what: send_to: Message too long", it means that you have to adapt a `udp` parameter. You can check the datagram size via `sudo sysctl net.inet.udp.maxdgram`. If it says something less than 64 kB, then increase size: `sudo sysctl -w net.inet.udp.maxdgram=65535` ## Monitoring metrics @@ -1828,13 +1936,14 @@ One can also enable publishing metrics related to CPU/memory usage. To do so, us ## Common check `IncreasingEntries` -This check make sures that the number of entries has increased in the past cycle(s). If not, it will display a pavetext -on the plot and set the quality to bad. +This check make sures that the number of entries has increased in the past cycle(s). If not, it will display a pavetext +on the plot and set the quality to bad. If you use `SetBinContent` the number of entries does not increase creating a false positive. Please call `ResetStats()` after using `SetBinContent`. -The behaviour of the check can be inverted by setting the customparameter "mustIncrease" to "false" : +The behaviour of the check can be inverted by setting the customparameter "mustIncrease" to "false" : + ``` "checkParameters": { "mustIncrease": "false" @@ -1842,6 +1951,7 @@ The behaviour of the check can be inverted by setting the customparameter "mustI ``` The number of cycles during which we tolerate increasing (or not respectively) the number of entries can be set with the custom parameter `nBadCyclesLimit`: + ``` "extendedCheckParameters": { "default": { @@ -1851,11 +1961,12 @@ The number of cycles during which we tolerate increasing (or not respectively) t } } ``` -In the example above, the quality goes to bad when there are 3 cycles in a row with no increase in the number of entries. + +In the example above, the quality goes to bad when there are 3 cycles in a row with no increase in the number of entries. ## Common check `TrendCheck` -This check compares the last point of a trending plot with some minimum and maximum thresholds. +This check compares the last point of a trending plot with some minimum and maximum thresholds. The thresholds can be defined in different ways, controlled by the `trendCheckMode` parameter: @@ -1868,6 +1979,7 @@ For example: ``` means that the last point should not be lower than `(mean - 0.1 * |mean|)` and not higher than `(mean + 0.2 * |mean|)`. + * `"trendCheckMode": "StdDeviation"` ==> the thresholds represent the relative variation with respect to the total error of the N points preceding the last one (which is checked) For example: @@ -1878,7 +1990,6 @@ For example: means that the last point should not be lower than `(mean - 1 * TotError)` and not higher than `(mean + 2 * TotError)`. The total error takes into account the standard deviation of the N points before the current one, as well as the error associated to the current point. - In general, the threshold values are configured separately for the Bad and Medium qualities, like this: ``` @@ -1892,6 +2003,7 @@ It is also possible to customize the threshold values for specific plots: "thresholdsBad:PlotName": "min,max", "thresholdsMedium:PlotName": "min,max", ``` + Here `PlotName` represents the name of the plot, stripped from all the QCDB path. The position and size of the text label that shows the check result can also be customized in the configuration: @@ -1900,8 +2012,8 @@ The position and size of the text label that shows the check result can also be "qualityLabelPosition": "0.5,0.8", "qualityLabelSize": "0.5,0.1" ``` -The values are relative to the canvas size, so in the example above the label width is 50% of the canvas width and the label height is 10% of the canvas height. +The values are relative to the canvas size, so in the example above the label width is 50% of the canvas width and the label height is 10% of the canvas height. #### Full configuration example @@ -1996,9 +2108,6 @@ The values are relative to the canvas size, so in the example above the label wi In consul go to `o2/runtime/aliecs/defaults` and modify the file corresponding to the detector: [det]_qc_shm_segment_size - - - --- [← Go back to Post-processing](PostProcessing.md) | [↑ Go to the Table of Content ↑](../README.md) | [Continue to Frequently Asked Questions →](FAQ.md) diff --git a/doc/PostProcessing.md b/doc/PostProcessing.md index 0f9129e34f..b137e1f0cd 100644 --- a/doc/PostProcessing.md +++ b/doc/PostProcessing.md @@ -600,7 +600,7 @@ In the example configuration below, the relationship between the input and outpu } }, "PHYSICS": { - "PROTON-PROTON": { + "pp": { "referenceRun" : "551890" } }