diff --git a/.gitignore b/.gitignore index 75ebcbed..bbe7388b 100644 --- a/.gitignore +++ b/.gitignore @@ -1,8 +1,10 @@ # folders -build/ +build/* build*/ +!build/.gitkeep log/ logs/ +generation_logs/ # ignore all analysis in the config folder apart from the unittest and the template analysis_configurations/* !analysis_configurations/unittest diff --git a/build/.gitkeep b/build/.gitkeep new file mode 100644 index 00000000..378eac25 --- /dev/null +++ b/build/.gitkeep @@ -0,0 +1 @@ +build diff --git a/cmake/ConfigureCrownlib.cmake b/cmake/ConfigureCrownlib.cmake index 134eb1ce..38353155 100644 --- a/cmake/ConfigureCrownlib.cmake +++ b/cmake/ConfigureCrownlib.cmake @@ -1,11 +1,18 @@ # build a shared lib from all CROWN functions include_directories(${CMAKE_SOURCE_DIR}/src) include_directories(${CMAKE_SOURCE_DIR}/include) + +include_directories(${CMAKE_SOURCE_DIR}/analysis_configurations/${ANALYSIS}/cpp_addons/src) +include_directories(${CMAKE_SOURCE_DIR}/analysis_configurations/${ANALYSIS}/cpp_addons/include) + file(GLOB SOURCES_1 ${CMAKE_SOURCE_DIR}/src/*.cxx) file(GLOB SOURCES_2 ${CMAKE_SOURCE_DIR}/src/utility/*.cxx ${CMAKE_SOURCE_DIR}/src/RecoilCorrections/*.cxx ${CMAKE_SOURCE_DIR}/src/SVFit/*.cxx) -set(SOURCES ${SOURCES_1} ${SOURCES_2}) + +file(GLOB SOURCES_3 ${CMAKE_SOURCE_DIR}/analysis_configurations/${ANALYSIS}/cpp_addons/src/*.cxx) + +set(SOURCES ${SOURCES_1} ${SOURCES_2} ${SOURCES_3}) if(BUILD_CROWNLIB_ONLY) message(STATUS "Building only the CROWNLIB library") diff --git a/code_generation/analysis_template.cxx b/code_generation/analysis_template.cxx index a88f05cc..730cb62d 100644 --- a/code_generation/analysis_template.cxx +++ b/code_generation/analysis_template.cxx @@ -33,6 +33,8 @@ #include "include/triggers.hxx" #include "include/fakefactors.hxx" +// {INCLUDE_ANALYSISADDONS} + // {INCLUDES} int main(int argc, char *argv[]) { diff --git a/code_generation/analysis_template_friends.cxx b/code_generation/analysis_template_friends.cxx index 4bb38a76..2f1e6c26 100644 --- a/code_generation/analysis_template_friends.cxx +++ b/code_generation/analysis_template_friends.cxx @@ -33,6 +33,9 @@ #include "include/topreco.hxx" #include "include/triggers.hxx" #include "include/tripleselection.hxx" + +// {INCLUDE_ANALYSISADDONS} + // {INCLUDES} int validate_rootfile(std::string file, std::string &basetree) { @@ -60,9 +63,16 @@ int validate_rootfile(std::string file, std::string &basetree) { Logger::get("main")->info("CROWN input_file: {} - {} Events", file, t1->GetEntries()); return nevents; + } else if (list->FindObject("quantities")) { + TTree *t1 = (TTree *)f1->Get("quantities"); + nevents += t1->GetEntries(); + basetree = "ntuple"; + Logger::get("main")->critical("CROWN input_file: {} - {} Events", file, + t1->GetEntries()); + return nevents; } else { Logger::get("main")->critical("File {} does not contain a tree " - "named 'Events' or 'ntuple'", + "named 'Events' or 'ntuple' or 'quantities'", file); return -1; } @@ -160,11 +170,13 @@ int main(int argc, char *argv[]) { } // initialize df ROOT::RDataFrame df0(dataset); - ROOT::RDF::Experimental::AddProgressBar(df0); // add progress bar // print all available branches to the log - Logger::get("main")->debug("Available branches:"); - for (auto const &branch : df0.GetColumnNames()) { - Logger::get("main")->debug("{}", branch); + if (nevents != 0) { + ROOT::RDF::Experimental::AddProgressBar(df0); // add progress bar + Logger::get("main")->debug("Available branches:"); + for (auto const &branch : df0.GetColumnNames()) { + Logger::get("main")->debug("{}", branch); + } } Logger::get("main")->info( "Starting Setup of Dataframe with {} events and {} friends", nevents, diff --git a/code_generation/code_generation.py b/code_generation/code_generation.py index b50d0f5f..6593831e 100644 --- a/code_generation/code_generation.py +++ b/code_generation/code_generation.py @@ -12,6 +12,32 @@ log = logging.getLogger(__name__) +def addon_includes(analysis_name: str, file_name: str) -> str: + """ + Add the includes all .hxx files from analysis configuration folder: + analysis_configurations/{analysis_name}/cpp_addons/include + Args: + analysis_name: the name of the analysis + file_name: Name of file that is templated + Returns: + str - the include statements for the cpp addons + """ + path = f"analysis_configurations/{analysis_name}/cpp_addons/include" + if os.path.exists(path) and os.path.isdir(path) and os.listdir(path): + log.debug( + f"Adding addons from {path} to {file_name}: {' '.join(os.listdir(path))}" + ) + paths = "\n".join( + f'#include "{os.path.abspath(os.path.join(path, item))}"' + for item in os.listdir(path) + if item.endswith(".hxx") + ) + return paths + else: + log.debug(f"No addons found in {path}") + return "" + + class CodeSubset(object): """ Class used to generate code for a smaller subset. For each subset, a new object must be created. @@ -24,6 +50,7 @@ class CodeSubset(object): folder: The folder in which the code will be generated. parameters: The parameters to be used for the generation. name: The name of the code subset. + analysis_name: Name of the analysis configuration. Returns: None @@ -38,6 +65,7 @@ def __init__( folder: str, configuration_parameters: Dict[str, Any], name: str, + analysis_name: str, ): self.file_name = file_name self.template = template @@ -48,6 +76,7 @@ def __init__( self.count = 0 self.folder = folder self.commands: List[str] = [] + self.analysis_name = analysis_name self.headerfile = os.path.join( self.folder, "include", self.scope, "{}.hxx".format(self.file_name) ) @@ -120,8 +149,11 @@ def write(self): with open(self.sourcefile + ".new", "w") as f: commandstring = "".join(self.commands) f.write( - self.template.replace("// { commands }", commandstring).replace( - "{subsetname}", self.name + self.template.replace("// { commands }", commandstring) + .replace("{subsetname}", self.name) + .replace( + "// {INCLUDE_ANALYSISADDONS}", + addon_includes(self.analysis_name, self.file_name), ) ) if os.path.isfile(self.sourcefile): @@ -350,6 +382,10 @@ def write_code(self, calls: str, includes: str, run_commands: str) -> None: " // {ZERO_EVENTS_FALLBACK}", self.zero_events_fallback() ) .replace(" // {CODE_GENERATION}", calls) + .replace( + "// {INCLUDE_ANALYSISADDONS}", + addon_includes(self.analysis_name, self.executable_name + ".cxx"), + ) .replace("// {INCLUDES}", includes) .replace(" // {RUN_COMMANDS}", run_commands) .replace("// {MULTITHREADING}", threadcall) @@ -458,6 +494,7 @@ def generate_subsets(self, scope: str) -> None: ), configuration_parameters=self.configuration.config_parameters[scope], name=producer_name + "_" + scope, + analysis_name=self.analysis_name, ) subset.create() subset.write() diff --git a/code_generation/configuration.py b/code_generation/configuration.py index 39f88c14..71184cdf 100644 --- a/code_generation/configuration.py +++ b/code_generation/configuration.py @@ -28,6 +28,7 @@ ) from code_generation.rules import ProducerRule, RemoveProducer from code_generation.systematics import SystematicShift, SystematicShiftByQuantity +from code_generation.helpers import is_empty log = logging.getLogger(__name__) # type aliases @@ -257,12 +258,12 @@ def unpack_producergroups( """ if isinstance(producers, list): - # we always want to know the toplevel producergroup, so if the parent is None, we set it to the first producer. + # we always want to know the toplevel producergroup, so if the parent evaluates to false, we set it to the first producer. # If a prent is given, we set it to the parent, since this means we are in a producergroup. This is important if we # have nested producergroups, this way every producer is assigned to the outermost producergroup, which is important for the # potential removal of a single producer. for producer in producers: - if parent is None: + if is_empty(parent): parent_producer = producer else: parent_producer = parent @@ -276,7 +277,7 @@ def unpack_producergroups( if isinstance(producers, ProducerGroup): log.debug("{} Unpacking ".format(" " * depth)) for sub_producer in producers.producers[scope]: - if parent is None: + if is_empty(parent): parent_producer = producers else: parent_producer = parent @@ -287,7 +288,7 @@ def unpack_producergroups( depth=depth + 1, ) else: - if parent is None: + if is_empty(parent): log.debug("{} {}".format(" " * depth, producers)) self.unpacked_producers[scope][producers] = producers else: @@ -333,11 +334,11 @@ def add_shift( Returns: None """ - if exclude_samples is not None and samples is not None: + if not is_empty(exclude_samples) and not is_empty(samples): raise ConfigurationError( f"You cannot use samples and exclude_samples at the same time -> Shift {shift}, samples {samples}, exclude_samples {exclude_samples}" ) - if samples is not None: + if not is_empty(samples): if isinstance(samples, str): samples = [samples] for sample in samples: @@ -345,7 +346,7 @@ def add_shift( raise ConfigurationError( f"Sampletype {sample} is not available -> Shift {shift}, available_sample_types {self.available_sample_types}, sample_types {samples}" ) - if exclude_samples is not None: + if not is_empty(exclude_samples): if isinstance(exclude_samples, str): exclude_samples = [exclude_samples] for excluded_sample in exclude_samples: @@ -360,7 +361,7 @@ def add_shift( raise TypeError("shift must be of type SystematicShift") if isinstance(samples, str): samples = [samples] - if samples is None or self.sample in samples: + if is_empty(samples) or self.sample in samples: scopes_to_shift = [ scope for scope in shift.get_scopes() if scope in self.scopes ] @@ -513,9 +514,9 @@ def _remove_empty_scopes(self) -> None: # we have to use a seperate list, because we cannot modify the list while iterating over it without breaking stuff scopes_to_test = [scope for scope in self.scopes] for scope in scopes_to_test: - if (len(self.producers[scope]) == 0) or ( - scope not in self.selected_scopes and scope is not self.global_scope - ): + if ( + len(self.producers[scope]) == 0 or scope not in self.selected_scopes + ) and scope is not self.global_scope: log.warning("Removing unrequested / empty scope {}".format(scope)) self.scopes.remove(scope) del self.producers[scope] @@ -631,12 +632,7 @@ def _remove_empty_configkeys(self, config) -> None: if isinstance(value, dict): self._remove_empty_configkeys(value) - elif ( - config[key] is None - or config[key] == "" - or config[key] == [] - or config[key] == {} - ): + elif is_empty(config[key]): log.info( "Removing {} since it is an empty configuration parameter".format( key @@ -767,9 +763,11 @@ def report(self) -> None: total_quantities = [ sum( [ - len(self.config_parameters[scope][output.vec_config]) - if isinstance(output, QuantityGroup) - else 1 + ( + len(self.config_parameters[scope][output.vec_config]) + if isinstance(output, QuantityGroup) + else 1 + ) for output in self.outputs[scope] ] ) diff --git a/code_generation/exceptions.py b/code_generation/exceptions.py index 9695a522..e598299c 100644 --- a/code_generation/exceptions.py +++ b/code_generation/exceptions.py @@ -1,6 +1,7 @@ from __future__ import annotations # needed for type annotations in > python 3.7 from typing import List, Set, Union from code_generation.quantity import Quantity +from code_generation.helpers import is_empty class ConfigurationError(Exception): @@ -108,7 +109,7 @@ class InvalidShiftError(ConfigurationError): """ def __init__(self, shift: str, sample: str, scope: Union[str, None] = None): - if scope is None: + if is_empty(scope): self.message = "Shift {} is not setup properly or not available for sampletype {}".format( shift, sample ) diff --git a/code_generation/helpers.py b/code_generation/helpers.py new file mode 100644 index 00000000..75a5ce04 --- /dev/null +++ b/code_generation/helpers.py @@ -0,0 +1,24 @@ +from __future__ import annotations # needed for type annotations in > python 3.7 + +# File with helper functions for the CROWN code generation + + +def is_empty(value): + """ + Check if a value is empty. + + Args: + value: The value that should be checked. + + Returns: + bool: Whether the input value is considered 'empty' + """ + # List of all values that should be considered empty despite not having a length. + empty_values = [None] + + try: + length = len(value) + except TypeError: + length = -1 + bool_val = value in empty_values or length == 0 + return bool_val diff --git a/code_generation/modifiers.py b/code_generation/modifiers.py index 1057e42d..7976a606 100644 --- a/code_generation/modifiers.py +++ b/code_generation/modifiers.py @@ -4,6 +4,7 @@ SampleConfigurationError, EraConfigurationError, ) +from code_generation.helpers import is_empty ConfigurationParameters = Union[str, int, float, bool] @@ -71,7 +72,7 @@ def apply(self, sample: str) -> ModifierResolved: """ if sample in self.samples: return self.modifier_dict[sample] - elif self.default is not None: + elif not is_empty(self.default): return self.default else: raise SampleConfigurationError(sample, self.samples) @@ -106,7 +107,7 @@ def apply(self, era: str) -> ModifierResolved: """ if era in self.eras: return self.modifier_dict[era] - elif self.default is not None: + elif not is_empty(self.default): return self.default else: raise EraConfigurationError(era, self.eras) diff --git a/code_generation/optimizer.py b/code_generation/optimizer.py index 6864084c..fa14e3d4 100644 --- a/code_generation/optimizer.py +++ b/code_generation/optimizer.py @@ -1,6 +1,7 @@ from __future__ import annotations # needed for type annotations in > python 3.7 from code_generation.quantity import NanoAODQuantity, Quantity from code_generation.producer import Filter, BaseFilter, Producer, ProducerGroup +from code_generation.helpers import is_empty from typing import Set, Tuple, Union, List import logging @@ -79,7 +80,7 @@ def get_global_outputs(self) -> List[Quantity]: """ outputs: List[Quantity] = [] for producer in self.global_producers: - if producer.get_outputs("global") is not None: + if not is_empty(producer.get_outputs("global")): outputs.extend( [ quantity @@ -155,7 +156,7 @@ def Optimize(self) -> None: log.error("Please check, if all needed producers are activated") raise Exception wrongProducer, wrong_inputs = self.check_ordering() - if wrongProducer is not None: + if not is_empty(wrongProducer): producers_to_relocate = self.find_inputs(wrongProducer, wrong_inputs) # if len(producers_to_relocate) == 0: # self.optimized = True @@ -197,7 +198,7 @@ def check_ordering( outputs = self.global_outputs for producer_to_check in self.ordering: temp_outputs = producer_to_check.get_outputs(self.scope) - if temp_outputs is not None: + if not is_empty(temp_outputs): outputs.extend( [ quantity diff --git a/code_generation/producer.py b/code_generation/producer.py index 9372c091..38c385fb 100644 --- a/code_generation/producer.py +++ b/code_generation/producer.py @@ -8,6 +8,7 @@ InvalidProducerConfigurationError, ConfigurationError, ) +from code_generation.helpers import is_empty import code_generation.quantity as q @@ -66,7 +67,7 @@ def __init__( inputdict = input self.input: Dict[str, List[q.Quantity]] = inputdict # keep track of variable dependencies - if self.output is not None: + if not is_empty(self.output): for scope in self.scopes: for input_quantity in self.input[scope]: for output_quantity in self.output: @@ -75,7 +76,7 @@ def __init__( log.debug("| Producer: {}".format(self.name)) log.debug("| Call: {}".format(self.call)) for scope in self.scopes: - if self.input[scope] is None: + if is_empty(self.input[scope]): log.debug("| Inputs ({}): None".format(scope)) else: log.debug( @@ -83,7 +84,7 @@ def __init__( scope, [input.name for input in self.input[scope]] ) ) - if self.output is None: + if is_empty(self.output): log.debug("| Output: None") else: log.debug("| Outputs: {}".format([output.name for output in self.output])) @@ -142,7 +143,7 @@ def reserve_output(self, scope: str) -> None: """ - if self.output is not None: + if not is_empty(self.output): for output_quantity in self.output: output_quantity.reserve_scope(scope) @@ -162,10 +163,10 @@ def shift(self, name: str, scope: str = "global") -> None: % (name, self.name, scope) ) raise Exception - if self.output is None: + if is_empty(self.output): log.error( - "Exception (%s): output None cannot be shifted ! How did you end up here ?" - % name + "Exception (%s): output %s cannot be shifted ! How did you end up here ?" + % (name, self.output) ) raise Exception for entry in self.output: @@ -206,10 +207,10 @@ def ignore_shift(self, name: str, scope: str = "global") -> None: % (name, self.name, scope) ) raise Exception - if self.output is None: + if is_empty(self.output): log.error( - "Exception (%s): output None cannot be shifted ! How did you end up here ?" - % name + "Exception (%s): output %s cannot be shifted ! How did you end up here ?" + % (name, self.output) ) raise Exception for entry in self.output: @@ -231,7 +232,7 @@ def writecall( Returns: str: The generated C++ call """ - if self.output is None: + if is_empty(self.output): config[shift]["output"] = "" config[shift]["output_vec"] = "" else: @@ -310,7 +311,7 @@ def writecalls( ) raise Exception calls = [self.writecall(config, scope)] - if self.output is not None: + if not is_empty(self.output): list_of_shifts = self.output[0].get_shifts( scope ) # all entries must have same shifts @@ -358,7 +359,7 @@ def get_outputs(self, scope: str) -> List[Union[q.QuantityGroup, q.Quantity]]: ) ) raise Exception - if self.output is None: + if is_empty(self.output): return [] else: return self.output @@ -412,7 +413,7 @@ def writecalls( basecall = self.call calls: List[str] = [] shifts = ["nominal"] - if self.output is not None: + if self.output: shifts.extend(self.output[0].get_shifts(scope)) for shift in shifts: # check that all config lists (and output if applicable) have same length @@ -427,7 +428,7 @@ def writecalls( % (self.vec_configs[0], key) ) raise Exception - if self.output is not None and len(self.output) != n_versions: + if not is_empty(self.output) and len(self.output) != n_versions: log.error( "{} expects either no output or same amount as entries in config lists !".format( self @@ -440,7 +441,7 @@ def writecalls( helper_dict: Dict[Any, Any] = {} for key in self.vec_configs: helper_dict[key] = config[shift][key][i] - if self.output is not None: + if not is_empty(self.output): helper_dict["output"] = ( '"' + self.output[i].get_leaf(shift, scope) + '"' ) @@ -482,7 +483,7 @@ def __init__( # set the vec config key of the quantity group quantity_group.set_vec_config(vec_config) super().__init__(name, call, input, [quantity_group], scope) - if self.output is None: + if is_empty(self.output): raise InvalidProducerConfigurationError(self.name) # add the vec config to the parameters of the producer for scope in self.scopes: @@ -496,7 +497,7 @@ def __repr__(self) -> str: @property def output_group(self) -> q.QuantityGroup: - if self.output is None: + if is_empty(self.output): raise Exception("ExtendedVectorProducer has no output!") if not isinstance(self.output[0], q.QuantityGroup): log.error("ExtendedVectorProducer expects a QuantityGroup as output!") @@ -520,7 +521,7 @@ def writecalls( """ n_versions = len(config["nominal"][self.vec_config]) log.debug("Number of extended producers to be created {}".format(n_versions)) - if self.output is None: + if is_empty(self.output): raise InvalidProducerConfigurationError(self.name) if not isinstance(self.output[0], q.QuantityGroup): log.error("ExtendedVectorProducer expects a QuantityGroup as output!") @@ -666,7 +667,7 @@ def __init__( else: self.input = dict(input) # If call is provided, this is supposed to consume output of subproducers. Creating these internal products below: - if self.call is not None: + if not is_empty(self.call): log.debug("Constructing {}".format(self.name)) log.debug(" --> Scopes: {}".format(self.scopes)) for scope in self.scopes: @@ -709,7 +710,7 @@ def __init__( log.debug("| ProducerGroup: {}".format(self.name)) log.debug("| Call: {}".format(self.call)) for scope in self.scopes: - if self.input[scope] is None: + if is_empty(self.input[scope]): log.debug("| Inputs ({}): None".format(scope)) else: log.debug( @@ -717,7 +718,7 @@ def __init__( scope, [input.name for input in self.input[scope]] ) ) - if self.output is None: + if is_empty(self.output): log.debug("| Output: None") else: log.debug("| Outputs: {}".format([output.name for output in self.output])) @@ -831,9 +832,9 @@ def writecalls( for producer in self.producers[scope]: # duplicate outputs of vector subproducers if they were generated automatically if ( - self.call is not None + not is_empty(self.call) and isinstance(producer, VectorProducer) - and producer.output is not None + and not is_empty(producer.output) ): for i in range(len(config["nominal"][producer.vec_configs[0]]) - 1): producer.output.append( @@ -932,7 +933,7 @@ def CollectProducersOutput( ) -> Set[q.Quantity]: output: Set[q.Quantity] = set() for producer in producers: - if producer.output is not None: + if not is_empty(producer.output): output |= set(producer.output) if isinstance(producer, ProducerGroup): try: @@ -951,7 +952,7 @@ def CollectProducerOutput( producer: Union[ProducerGroup, Producer], scope: str ) -> Set[q.Quantity]: output: Set[q.Quantity] = set() - if producer.output is not None: + if producer.output: output |= set(producer.output) if isinstance(producer, ProducerGroup): try: diff --git a/code_generation/subset_template.cxx b/code_generation/subset_template.cxx index 2ad37d10..e59cec98 100644 --- a/code_generation/subset_template.cxx +++ b/code_generation/subset_template.cxx @@ -32,6 +32,9 @@ #include "include/topreco.hxx" #include "include/triggers.hxx" #include "include/fakefactors.hxx" + +// {INCLUDE_ANALYSISADDONS} + ROOT::RDF::RNode {subsetname} (ROOT::RDF::RNode df0, OnnxSessionManager &onnxSessionManager, correctionManager::CorrectionManager &correctionManager) { // { commands } diff --git a/code_generation/systematics.py b/code_generation/systematics.py index 86448999..a0be7241 100644 --- a/code_generation/systematics.py +++ b/code_generation/systematics.py @@ -11,6 +11,7 @@ TProducerStore, ) from code_generation.quantity import NanoAODQuantity +from code_generation.helpers import is_empty log = logging.getLogger(__name__) @@ -181,7 +182,7 @@ def determine_scopes(self, scopes: Union[List[str], str, None]) -> Set[str]: Returns: set: Set of scopes that are affected by the systematic shift. """ - if scopes is None: + if is_empty(scopes): scope_set: Set[str] = ( set(self.shift_config.keys()) | set(self.input_producers.keys()) @@ -302,7 +303,7 @@ def add_ignore_producer( Returns: None """ - if scopes is None: + if is_empty(scopes): scopes = self.scopes if isinstance(scopes, str): scopes = set(scopes) diff --git a/docs/sphinx_source/build_root.rst b/docs/sphinx_source/build_root.rst index 3fca6031..66e0ee1a 100644 --- a/docs/sphinx_source/build_root.rst +++ b/docs/sphinx_source/build_root.rst @@ -1,11 +1,11 @@ How to build ROOT with CVMFS on CentOS 7 ========================================= -For profiling with debug symbols or just to test the newest ROOT features, you may want to use your own ROOT version. Here are the commands, which allow you to build ROOT with a given build type, ROOT release tag and C++ 17. +For profiling with debug symbols or just to test the latest ROOT features, you may want to use your own ROOT version. Here are the commands that allow you to build ROOT with a given build type, ROOT release tag and C++ 17. -Most likely, you want to use :code:`RelWithDebInfo` as build type so you get debug symbols but also a realistic performance due to compiler optimizations. +Most likely, you want to use :code:`RelWithDebInfo` as build type. This provides debug symbols while maintaining a realistic performance due to compiler optimizations. -To look up the release tags, go to https://github.com/root-project/root and see the tags (not the branches!). +To look up the release tags, visit https://github.com/root-project/root and check the tags (not the branches!). .. code-block:: console diff --git a/docs/sphinx_source/changelog.rst b/docs/sphinx_source/changelog.rst index e50f7aef..66456c68 100644 --- a/docs/sphinx_source/changelog.rst +++ b/docs/sphinx_source/changelog.rst @@ -4,20 +4,20 @@ Changelog May 2024 - Version 0.4.0 * Switch to ROOT 6.30, supporting now RHEL8 and RHEL9. -* Introduced support for ML inference via `OnnxRuntime`. A generic producer is avialable at this link: https://github.com/KIT-CMS/CROWN/blob/main/include/ml.hxx -* Introduced a CorrectionManager, that is responsible for loading correction files and sharing them among the different producers. This allows to load the corrections only once and share them among the different producers, resulting in a signifiant speedup of the initial loading time. In the course of this implementation, Many functions now have a deprecated version, that does not use the `CorrectionManager`. The old functions will be removed in the next release. A more detailed description can be found in the :ref:`The Correction Manager` page. +* Introduced support for ML inference via `OnnxRuntime`. A generic producer is available at this link: https://github.com/KIT-CMS/CROWN/blob/main/include/ml.hxx +* Introduced a CorrectionManager that is responsible for loading correction files and sharing them across producers. This allows to load the corrections only once and share them among the different producers, resulting in a significant speedup of the initial loading time. As part of this implementation, many functions now have a deprecated version, that does not use the `CorrectionManager`. The old functions will be removed in the next major release. A more detailed description can be found on :ref:`The Correction Manager` page. Sept. 2023 - Version 0.3.0 * Switched to ROOT 6.28 via LCG 104, resulting in about 20% faster processing times. -* Added support for the generation of friend trees with additional friends as input. For more details, check :ref:`FriendTree Generation`. -* Added option to compile the CROWNlib only, allowing to reuse the same libary for multiple CROWN executables. +* Added support for generating friend trees with additional input friends.. For more details, check :ref:`FriendTree Generation`. +* Added option to compile the CROWNlib only, allowing to reuse the same library for multiple CROWN executables. Feb. 2023 * Added support for the generation of friend trees. For more details, check :ref:`FriendTree Generation`. -* Added documentation on ntuple and friend production via KingMaker. For more details, check :ref:`KingMaker`. +* Added documentation on Ntuple and friend production via KingMaker. For more details, check :ref:`Workflow Management`. Jan. 2023 -* Added Quantities <-> Shifts mapping to the output files to allow an easier Postprocessing. For more details, check :ref:`Quantity mapping`. +* Added Quantities <-> Shifts mapping to the output files to allow easier postprocessing. For more details, check :ref:`Quantity mapping`. diff --git a/docs/sphinx_source/contrib.rst b/docs/sphinx_source/contrib.rst index ed6c242f..7f0f0d1d 100644 --- a/docs/sphinx_source/contrib.rst +++ b/docs/sphinx_source/contrib.rst @@ -1,18 +1,18 @@ Writing a new producer ======================= -Writing a new producer requires two main parts, adding the :ref:`C++ function` and the required :ref:`python part`. +Writing a new producer involves two main parts, adding the :ref:`C++ function` and the required :ref:`python component`. -If the C++ function is written generally enough, it can be used in multiple producers and multiple purposes in the end. +If the C++ function is written generically enough, it can be used in multiple producers and multiple purposes in the end. For example, the producer generating the pt_1 quantity can be used regardless of what particle is being considered. -In the following, an introduction on how to add a new producer is given. As an example, we will add a new producer, which can be used to calculate the Lorentz vectors of particles, in our case electrons. For simplicity, we only want to calculate one single Lorentz vector for a given index. First, we will do the C++ implementation of the function followed by the Python definition. Keep in mind, that those two parts are connected. +In the following, an introduction to adding a new producer is given. As an example, we will add a new producer, which can be used to calculate the Lorentz vectors of particles, in our case electrons. For simplicity, we only want to calculate one single Lorentz vector for a given index. First, we will do the C++ implementation of the function followed by the Python definition. Keep in mind, that those two parts are connected. Writing a new C++ function ============================ -For a new C++ function, a definition in the header file, and the implementation in the source file are required. As good practice, we will add the function to a namespace called ``lorentzvector``, and call the function ``build``. -The return type of any function in CROWN should always be ``ROOT::RDF::RNode`` and the first argument of the function should always be the RDataframe, where we want to Define our new quantity. This means the basic definition of the function should look like this: +For a new C++ function, both a definition in the header file and the implementation in the source file are required. As good practice, we will add the function to a namespace called ``lorentzvector``, and call the function ``build``. +The return type of any function in CROWN should always be ``ROOT::RDF::RNode``, and the first argument of the function should always be the RDataframe, where we want to Define our new quantity. This means the basic definition of the function should look like this: .. code-block:: cpp diff --git a/docs/sphinx_source/correction_manager.rst b/docs/sphinx_source/correction_manager.rst index d70034df..2a31b491 100644 --- a/docs/sphinx_source/correction_manager.rst +++ b/docs/sphinx_source/correction_manager.rst @@ -13,7 +13,7 @@ For now, the CorrectionManager supports the following correction files: - correctionlib files of type ``correction::CompoundCorrection`` using the :cpp:func:`correctionManager::CorrectionManager::loadCompoundCorrection` function - json files using the :cpp:func:`correctionManager::CorrectionManager::loadjson` function -A Documentation of all Correction Manager functions can be found in :ref:`Namespace:Correctionmanager` +A Documentation of all Correction Manager functions can be found in :ref:`Namespace: Correctionmanager` Required Changes ****************** diff --git a/docs/sphinx_source/cpp_addons.rst b/docs/sphinx_source/cpp_addons.rst new file mode 100644 index 00000000..311ca5c6 --- /dev/null +++ b/docs/sphinx_source/cpp_addons.rst @@ -0,0 +1,61 @@ +C++ Add-ons +========== + +In some cases, the core codebase of CROWN (CROWNLIB) may not include all the features required for an analysis. To address this, users can add custom C++ code within their analysis configurations. These add-ons are automatically integrated to the C++ code during the code generation process. + +Location and directory structure +-------------------------------- + +The expected structure within the analysis configuration is as follows: + +.. code-block:: console + + analysis_configurations + └── + └── cpp_addons + ├── include + │ ├── .hxx + │ ├── .hxx + │ └── ... + └── src + ├── .cxx + ├── .cxx + └── ... + + +If an analysis does not require any additional C++ code and can rely solely on CROWNLIB, the ``cpp_addons`` folder can be omitted entirely from the analysis configuration. + +``.cxx`` and ``.hxx`` File structure +------------------------------------ + +This functionality considers files in ``analysis_configuration//cpp_addons/src`` and ``analysis_configuration//cpp_addons/include`` during compilation. The following points should be followed when adding and using custom C++ code: + +* Use unique guards for each ``.cxx`` file you introduce, especially concerning CROWNLIB. For the corresponding ``.hxx`` file(s), the same unique guard(s) should be applied. +* Use a unique function name or function signature if the custom function needs to reside in a namespace that already exists in CROWNLIB +* Use ``../../../../include/.hxx`` if you explicitly want to import functionality from CROWNLIB. Importing CROWNLIB files using different relative paths can lead to unexpected behavior. + +A example ``.cxx`` file could have the following structure: + + +.. code-block:: cpp + + #ifndef UNIQUE_GUARD_NAME_H // unique w.r.t. CROWNLIB and other files in cpp_addons + #define UNIQUE_GUARD_NAME_H + + // Include CROWNLIB functionalities + #include "../../../../include/utility/CorrectionManager.hxx" + #include "../../../../include/utility/Logger.hxx" + + // Feature.hxx file defined in cpp_addons + #include "../Feature.hxx" + + // Globally present, i.e. from the ROOT framework + #include "ROOT/RDataFrame.hxx" + #include "correction.h" + + /* Your code here */ + + // End of the file + #endif // UNIQUE_GUARD_NAME_H + + diff --git a/docs/sphinx_source/friend_trees.rst b/docs/sphinx_source/friend_trees.rst index 9e38c3f6..948788d0 100644 --- a/docs/sphinx_source/friend_trees.rst +++ b/docs/sphinx_source/friend_trees.rst @@ -1,21 +1,21 @@ FriendTree Generation =========================== -CROWN can be used, to generate FriendTrees based on a CROWN ntuple. The concept of FriendTrees is explained here: https://root.cern/manual/trees/#widening-a-ttree-through-friends. They allow to extend an existing ntuple with new quantities. Common use cases are new high-level variables like neural network outputs or additional correction factors. +CROWN can be used, to generate FriendTrees based on a CROWN Ntuple. The concept of FriendTrees is explained here: https://root.cern/manual/trees/#widening-a-ttree-through-friends. They allow to extend an existing ntuple with new quantities. Common use cases are new high-level variables like neural network outputs or additional correction factors. .. image:: ../images/root_friends.png :width: 900 :align: center :alt: Sketch of how Friend trees work -The the example depicted above, two additional friends to the main NTuple are created. During analysis, the quantities stored in the friend trees can be added by using the ``AddFriend`` method. The quantities are then available in the TTree as if they were part of the original NTuple. +In the example depicted above, two additional friends to the main NTuple are created. During analysis, the quantities stored in the friend trees can be added by using the ``AddFriend`` method. The quantities are then available in the TTree as if they were part of the original NTuple. A FriendTree is generated using a FriendTreeConfiguration. Such a configuration has some major differences, compared to a regular configuration: -1. The input file is a CROWN ntuple, not a ROOT file. +1. The input file is a CROWN Ntuple, not a ROOT file. 2. Only one scope per user is allowed. 3. No global scope is required -4. The available inputs have to be specified. The available inputs can be provided by using a CROWN ntuple as input, or a JSON file. The ntuple can be used for debugging proposes, when running a production, it is recommended to use a JSON file. The basic structure of this quantities map is listed below. Such a JSON can then be used for multiple eras, sample types and scopes. +4. The available inputs must be specified. The available inputs can be provided by using a CROWN Ntuple as input, or a JSON file. The Ntuple can be used for debugging purposes, when running a production. It is recommended to use a JSON file. The basic structure of this quantities map is listed below. Such a JSON can then be used for multiple eras, sample types and scopes. .. code-block:: JSON @@ -43,28 +43,28 @@ A FriendTree is generated using a FriendTreeConfiguration. Such a configuration -The recommended way of producing FriendTrees is to use a workflow tool, that manages the submission of jobs, generation of tarballs and organizing the output. One possible workflow tool choice is KingMaker (https://github.com/KIT-CMS/KingMaker). A more detailed description of the KingMaker workflow can be found in :ref:`KingMaker`. +The recommended way of producing FriendTrees is to use a workflow tool, that manages the submission of jobs, generation of tarballs and organizing the output. One possible workflow tool choice is KingMaker (https://github.com/KIT-CMS/KingMaker). A more detailed description of the KingMaker workflow can be found in :ref:`Workflow Management`. Writing a FriendTreeConfiguration --------------------------------- -The basic structure of a FriendTreeConfiguration is identical to a regular configuration. When creating a new FriendTree executable, an additional argument has to be provided: +The basic structure of a FriendTreeConfiguration is identical to a regular configuration. When creating a new FriendTree executable, you must provide an additional argument: -* ``DQUANTITIESMAP`` - The path to the quantities map JSON file or the crown ntuple root file. +* ``DQUANTITIESMAP`` - The path to the quantities map JSON file or the crown Ntuple ROOT file. -All other parameters are identical to the regular configuration. Setting up producers, outputs and new systematic shifts works the same way as before. The configuration has to be of type ``FriendTreeConfiguration``. During the configuration, the available inputs are checked for consistency, to catch any possible misconfiguration early. In addition, as for CROWN ntuples, only required shifts are executed. +All other parameters are identical to the regular configuration. Setting up producers, outputs and new systematic shifts works the same way as before. The configuration must be of type ``FriendTreeConfiguration``. During the configuration, the available inputs are checked for consistency, to catch any possible misconfiguration early. In addition, as for CROWN Ntuples, only required shifts are executed. FriendTrees with multiple input friend trees -------------------------------------------- -Starting from version 0.3 of CROWN, it is also possible to use multiple input friend trees. A typical use case for this feature is the evaluation of Classifiers, and storing the output of the classifier in the friend tree. This way, the classifier can utilize quantities from both the main ntuple and from additional friend trees. The interface for configuring such a FriendTree executable is similar to the regular FriendTree configuration, with the following differences: +Starting from version 0.3 of CROWN, it is also possible to use multiple input friend trees. A typical use case for this feature is the evaluation of Classifiers, and storing the output of the classifier in the friend tree. This way, the classifier can utilize quantities from both the main Ntuple and from additional friend trees. The interface for configuring such a FriendTree executable is similar to the regular FriendTree configuration, with the following differences: -* The information for all input files has to be provided. This means that the ``DQUANTITIESMAP`` has to be extended. It is possible to - 1. provide a single JSON file, that contains the input information for all input files (the crown ntuple + all additional files) +* The information for all input files must provided. This means that the ``DQUANTITIESMAP`` must extended. It is possible to + 1. provide a single JSON file, that contains the input information for all files (the crown Ntuple + all additional files) 2. provide a list of JSON files, each containing the input information for one input file - 3. provide a list of root files (crown ntuple + all additional files) + 3. provide a list of ROOT files (crown Ntuple + all additional files) -During the execution, all input files have to be provided, resulting in a command line like this: +During execution, all input files must be provided, resulting in a command line like this: .. code-block:: bash @@ -74,4 +74,4 @@ During the execution, all input files have to be provided, resulting in a comman Before execution, the input files are checked for consistency. This means that the following checks are performed: * All input files have to contain the same number of entries -* All input files have to be readable (no missing files) \ No newline at end of file +* All input files must be readable and present (no missing files) diff --git a/docs/sphinx_source/index.rst b/docs/sphinx_source/index.rst index 0db17070..561edbd9 100644 --- a/docs/sphinx_source/index.rst +++ b/docs/sphinx_source/index.rst @@ -50,6 +50,7 @@ Documentation contrib.rst py_configuration.rst correction_manager.rst + cpp_addons.rst .. toctree:: :maxdepth: 2 diff --git a/docs/sphinx_source/introduction.rst b/docs/sphinx_source/introduction.rst index 809a764c..caf4c976 100644 --- a/docs/sphinx_source/introduction.rst +++ b/docs/sphinx_source/introduction.rst @@ -1,7 +1,7 @@ Introduction ============= -The **C** ++-based **RO** OT **W** orkflow for **N** -tuples (CROWN) is a fast new way to convert NanoAOD samples into flat :code:`TTrees` to be used in further analysis. The main focus of the framework is to provide a fast and clean way of selecting events and calculating quantities and weights. The framework has minimal dependencies and only uses ROOT and it's Dataframe as a backend. +The **C** ++-based **RO** OT **W** orkflow for **N** -tuples (CROWN) is a fast new way to convert NanoAOD samples into flat :code:`TTrees` to be used in further analysis. The main focus of the framework is to provide a fast and clean way of selecting events and calculating quantities and weights. The framework has minimal dependencies and only uses ROOT and its Dataframe as a backend. Design Idea diff --git a/docs/sphinx_source/kingmaker.rst b/docs/sphinx_source/kingmaker.rst index 885b271a..344ea649 100644 --- a/docs/sphinx_source/kingmaker.rst +++ b/docs/sphinx_source/kingmaker.rst @@ -13,7 +13,7 @@ Setup .. code-block:: bash - git clone --recursive git@github.com:KIT-CMS/KingMaker.git + git clone git@github.com:KIT-CMS/KingMaker.git cd KingMaker source setup.sh KingMaker @@ -30,6 +30,34 @@ Samples can be managed manually or using the ``sample_manager``, which can be st This starts a CLI, which can be used to add more samples to the database, update samples or quickly generate a sample list for producing ntuples. +Information on CMS datasets +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +To search for CMS datasets, we need first a bit of information, how these datasets can look like. We refer to the CMS dataset names as ``DAS nicks``, since we will search for them using the +Data Aggregation Service (DAS) of CMS. The datasets can be searched for at https://cmsweb.cern.ch/das/, or alternatively via ``dasgoclient`` (https://github.com/dmwm/dasgoclient) in a CMSSW +command-line environment. Our ``sample_manager`` integrates the corresponding software components and puts them into a questionnaire logic. + +The naming convention of CMS datasets is according to https://twiki.cern.ch/twiki/bin/view/CMSPublic/WorkBookLocatingDataSamples as follows: + +.. code-block:: bash + + # Convention: + /PrimaryDataset/ProcessedDataset/DataTier + # Examples: + ## MC Simulation: + /DYJetsToLL_M-50_TuneCP5_13TeV-madgraphMLM-pythia8/RunIISummer20UL16NanoAODv9-106X_mcRun2_asymptotic_v17-v1/NANOAODSIM + ## Data: + /Tau/Run2016B-ver2_HIPM_UL2016_MiniAODv2-v1/MINIAOD + ## User-produced Dataset: + /Tau/aakhmets-data_2016ULpreVFP_tau_Tau_Run2016B-ver2_HIPM_1736940678-00000000000000000000000000000000/USER + +- ``PrimaryDataset`` usually represents the superset of data recorded by the experiment in case of Data, and the simulated process in case of MC simulation. In general, for User-produced Datasets this can be anything, however, users are responsible for having meaningful names. +- ``ProcessedDataset`` provides details on the actual production or processing campaigns of the dataset, including conditions (so-called ``GlobalTag``), version, etc. Again, user Datasets can have there anything, but users are encouraged to have there something meaningful. +- ``DataTier`` represents the dataformat of the dataset. A list of some more popular formats is given here: https://twiki.cern.ch/twiki/bin/view/CMSPublic/WorkBookDataFormats#EvenT. We are mostly interested in NANOAOD(SIM) and MINIAOD(SIM) tailored for analyses. The ``USER`` datatier represents anything that a user can produce. + +All centrally produced datasets from CMS are stored under the ``prod/global`` DAS instance, while there is a dedicated DAS instance for user datasets, ``prod/phys03``. +See https://cmsweb.cern.ch/das/services for more details. + Addition of new Samples ~~~~~~~~~~~~~~~~~~~~~~~ @@ -43,6 +71,7 @@ When adding a new sample, follow the instructions of the ``sample_manager``. In Database loaded The database contains 581 samples, split over 4 era(s) and 22 sampletype(s) ? What do you want to do? Add a new sample + ? Select the DAS instance for the search prod/global ? Enter a DAS nick to add /DYJetsToLL_M-50_*/RunIISummer20UL16NanoAOD*v9-106X*/NANOAODSIM Multiple results found ? Which dataset do you want to add ? (Use arrow keys to move, to select, to toggle, to invert) @@ -293,4 +322,4 @@ The ``problematic_eras`` option is used to define eras, where only one file per .. warning:: For friend trees, multiprocessing is not possible, since the resulting friend tree must have the same order as the input tree. Therefore, the ``htcondor_request_cpus`` option has to be set to 1, which will disable multiprocessing. -For a more complete description of the different options, please refer to the overcomplete configuration in the law repository (https://github.com/riga/law/blob/master/law.cfg.example). \ No newline at end of file +For a more complete description of the different options, please refer to the overcomplete configuration in the law repository (https://github.com/riga/law/blob/master/law.cfg.example). diff --git a/docs/sphinx_source/postprocessing.rst b/docs/sphinx_source/postprocessing.rst index 28e0569a..53b9887e 100644 --- a/docs/sphinx_source/postprocessing.rst +++ b/docs/sphinx_source/postprocessing.rst @@ -1,13 +1,13 @@ Ntuples in Postprocessing =========================== -The CROWN Ntuples can be used by any Postprocessing framework. Some things have to be kept in mind, in order to ensure an easy processing. -Most important difference is, that only quantities affected by a shift are recalculated. This means the prostprocessing framework must be able to use a mixture of the original and the shifted quantities, when applying shifts. In order to make this step a bit easier, the information, which quantities are affected by a shift, is stored in the Ntuple. +The CROWN Ntuples can be used by any Postprocessing framework. There are a few things to keep in mind to ensure easy processing. +The most important difference is that only quantities affected by a shift are recalculated. This means the postprocessing framework must be able to use a mixture of the original and shifted quantities, when applying shifts. In order to make this step a bit easier, the information which quantities are affected by a shift, is stored in the Ntuple. Quantity mapping ***************** -To read the mapping from a NTuple, the python function listed below may be used. Two types of mapping are available, depending on the actual usecase. In the first, the mapping is sorted by shift; in the second the mapping is sorted by quantity. +To read the mapping from an Ntuple, the python function listed below may be used. Two types of mapping are available, depending on the actual use case. In the first, the mapping is sorted by shift; in the second the mapping is sorted by quantity. .. code-block:: python