From 727bada1a277c50328be7b80970e1b1c5a51b532 Mon Sep 17 00:00:00 2001 From: achiefa Date: Tue, 10 Dec 2024 11:39:45 +0000 Subject: [PATCH 1/9] First commit re-implementation CMS_WCHARM_7TEV --- .../commondata/CMS_WCHARM_7TEV/metadata.yaml | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_7TEV/metadata.yaml b/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_7TEV/metadata.yaml index a741e0159d..9286466e5a 100644 --- a/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_7TEV/metadata.yaml +++ b/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_7TEV/metadata.yaml @@ -1,17 +1,21 @@ setname: CMS_WCHARM_7TEV -version: 1 -version_comment: Port of old commondata + nnpdf_metadata: nnpdf31_process: DY CC experiment: CMS + arXiv: url: https://arxiv.org/abs/1310.1138 journal: JHEP 02 (2014) 013 iNSPIRE: - url: '' + url: https://inspirehep.net/literature/1256938 hepdata: - url: '' - version: -1 + url: https://www.hepdata.net/record/ins1256938 + version: 1 + +version: 2 +version_comment: Implementation in the new format + implemented_observables: - observable_name: WPWM-RATIO observable: @@ -62,6 +66,7 @@ implemented_observables: - uncertainties_WPWM-RATIO_sys_10.yaml data_central: data_legacy_WPWM-RATIO.yaml ported_from: CMSWCHARMRAT + - observable_name: WPWM-TOT observable: description: Jet Rapidity Distribution From e4cb7bd32efa66f98f7e3886864e8ffb3584aa70 Mon Sep 17 00:00:00 2001 From: achiefa Date: Tue, 10 Dec 2024 14:13:12 +0000 Subject: [PATCH 2/9] Add filter files --- .../commondata/CMS_WCHARM_7TEV/filter.py | 18 ++ .../CMS_WCHARM_7TEV/filter_utils.py | 280 ++++++++++++++++++ .../commondata/CMS_WCHARM_7TEV/metadata.yaml | 74 ++--- 3 files changed, 327 insertions(+), 45 deletions(-) create mode 100644 nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_7TEV/filter.py create mode 100644 nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_7TEV/filter_utils.py diff --git a/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_7TEV/filter.py b/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_7TEV/filter.py new file mode 100644 index 0000000000..97444511cb --- /dev/null +++ b/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_7TEV/filter.py @@ -0,0 +1,18 @@ +''' +Filter script for CMS_WCHARM_7TEV +''' + +import logging + +from filter_utils import Extractor +import numpy as np + +logging.basicConfig(level=logging.INFO, format='[%(levelname)s] %(message)s') + + +if __name__ == "__main__": + CMS_WCHARM_TOT = Extractor("./metadata.yaml", "WPWM-TOT", mult_factor=1000) + _, _, _ = CMS_WCHARM_TOT.generate_data(variant='default', save_to_yaml=True) + + CMS_WCHARM_RATIO = Extractor("./metadata.yaml", "WPWM-RATIO", mult_factor=1000) + _, _, _ = CMS_WCHARM_RATIO.generate_data(variant='default', save_to_yaml=True) diff --git a/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_7TEV/filter_utils.py b/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_7TEV/filter_utils.py new file mode 100644 index 0000000000..78096bafba --- /dev/null +++ b/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_7TEV/filter_utils.py @@ -0,0 +1,280 @@ +import logging + +import numpy as np +import yaml + +from nnpdf_data.filter_utils.utils import prettify_float + +yaml.add_representer(float, prettify_float) + +SQRTS = 8000 +MW2 = 80.385**2 +CMSLUMI13 = 2.5 + +# List of systematic uncertainties that shuold +# be considered uncorrelated +UNCORR_SYS_UNC = ['UnfoldMCstat', 'UnfoldOtherGen', 'UnfoldReweight'] +ART_LABEL = 'art_corr' +STAT_LABEL = 'stat_uncorr' +TABLE_TOKEN = 'Table' + + +class Extractor: + """ + Extracts kinematics, central data, and uncertainties for a given dataset + + Parameters + ---------- + metadata_file: str + Path to the metadata file + observable: str + The name of the observable for which the data is extracted. The name must + be listed in the metadata file. + """ + + def __init__(self, metadata_file, observable, mult_factor=1): + + # Open metadata and select process + with open(metadata_file, 'r') as file: + metadata = yaml.safe_load(file) + self.metadata = next( + ( + md + for md in metadata["implemented_observables"] + if md['observable_name'] == observable + ), + None, + ) + if self.metadata is None: + raise Exception(f"{observable} is not listed in the metadata file.") + + # Initialise dict of tables + self.tables = {} + self.observable = observable + self.mult_factor = mult_factor + self.kin_labels = self.metadata['kinematic_coverage'] + self.ndata = self.metadata['ndata'] + + def __retrieve_table(self, table_id): + """ + Implementation of the lazy loading for the tables. If the table + is loaded for the first time, it is stored into an internal + container of the class, so that it will not be loaded each time. + + When called, this functions checks if the table has already been stored + and, if that is the case, returns the stored table. + + Parameters + ---------- + table_id: int + Index that specifies the table + + Return + ------ + The table specified by `table_id`. If not previously loaded, it is also + stored into the internal container for future use. + """ + try: + table = self.tables[str(table_id)] + except KeyError: + logging.debug( + f'Table {table_id} has not already been used or stored.' f' Storing the table...' + ) + with open(f'./rawdata/{TABLE_TOKEN}{table_id}.yaml', 'r') as tab: + tab_dict = yaml.safe_load(tab) + self.tables[str(table_id)] = tab_dict + table = tab_dict + return table + + def __extract_kinematics(self, table: dict): + """ + Extracts the kinematic variables of the single differential + distribution given a table. + + For each bin, it computes the max, min, and mid value of the transverse + momentum of the boson. + + Parameters + ---------- + table: dict + Dictionary containing the bins in the transverse momentum + tab_number: int + Index to select the range of the second kinematic variable + + Return + ------ + List of bins containing min, max, and mid values for each of the kinematic + observables listed in the `kinematic_coverage` of the metadata file. + + """ + data = table['independent_variables'][0] + label = self.kin_labels + kinematics = [] + for bin in data['values']: + abs_eta_min = bin['low'] + abs_eta_max = bin['high'] + kin_bin = { + label[0]: { + 'min': abs_eta_min, + 'mid': (abs_eta_max + abs_eta_min) / 2, + 'max': abs_eta_max, + }, + label[1]: {'min': None, 'mid': MW2, 'max': None}, + } + kinematics.append(kin_bin) + return kinematics + + def generate_kinematics(self): + """ + Function that generates the kinematics by looping over all the + tables specified in the metadata file. The resulting kinematics + is then saved to a yaml file. It relies on the method + `__extract_kinematics`. + """ + + logging.info(f"Generating kinematics for ATLAS_{self.observable}...") + + # Initialise kinematics list + kinematics = [] + ndata = 0 + table = self.metadata["tables"][0] + tab_dict = self.__retrieve_table(table) + kin = self.__extract_kinematics(tab_dict) + kinematics = np.concatenate([kinematics, kin]) + ndata += len(kin) + + # Check number of data agrees with metadata + try: + assert self.metadata['ndata'] is not None + assert self.metadata['ndata'] == ndata + except AssertionError as e: + logging.warning( + f"The number of data in the metafile is either wrong or unspecified." + f" The correct number is {ndata}. Please, update the metafile." + ) + return + return kinematics.tolist() + + def generate_data_and_unc(self, mult_factor=1.0): + """ + Same as `generate_kinematics`, but for central data points. + """ + logging.info(f"Generating central data for CMS_{self.observable}...") + dat_central = [] + stat_unc = [] + asy_sys_unc = [] + table = self.metadata['tables'][0] + tab_dict = self.__retrieve_table(table) + + # Select data with pT > 25 GeV + tab_dict = tab_dict['dependent_variables'][0]['values'] + + # Loop over bins + for rap_bin in tab_dict: + dat_central.append(rap_bin['value'] * mult_factor) + stat_unc.append(rap_bin['errors'][0]['symerror'] * mult_factor) + asy_sys_unc.append(rap_bin['errors'][1]['symerror'] * mult_factor) + return dat_central, stat_unc, asy_sys_unc + + def __build_unc_definitions(self, variant='default'): + unc_definitions = {} + + # Statistical uncertainty + unc_definitions[STAT_LABEL] = { + 'description': f'Statistical uncertainty', + 'treatment': 'ADD', + 'type': 'UNCORR', + } + + if self.observable == 'WPWM-RATIO': + unc_definitions['ART_LABEL'] = { + 'description': f'Correlated systematic uncertainty', + 'treatment': 'MULT', + 'type': 'CORR', + } + elif self.observable == 'WPWM-TOT': + for idx in range(self.ndata): + unc_definitions[f'{ART_LABEL}_{idx+1}'] = { + 'description': f'Correlated systematic uncertainty {idx+1}', + 'treatment': 'MULT', + 'type': 'CORR', + } + + if variant != 'default': + raise ValueError(f'The variant {variant} is not implemented yet.') + + return unc_definitions + + def generate_covmat(self, diag_stat_uncs=None): + table = self.metadata["tables"][1] + tab_dict = self.__retrieve_table(table) + matlist = tab_dict['dependent_variables'][0]['values'] + matlist = [d['value'] for d in matlist] + covmat = np.zeros((self.ndata, self.ndata)) + for i in range(self.ndata): + for j in range(self.ndata): + covmat[i, j] = matlist[i + self.ndata * j] * diag_stat_uncs[i] * diag_stat_uncs[j] + return covmat + + def generate_data(self, variant='default', save_to_yaml=False, path='./'): + # Get central data and kinematics + central_data, stat_unc, sys_unc = self.generate_data_and_unc(self.mult_factor) + kinematics = self.generate_kinematics() + + # Uncertainty definitions + unc_definitions = self.__build_unc_definitions(variant=variant) + sys_artificial = [] # Initialize vector of artificial uncertainties + + if self.observable == 'WPWM-TOT': + covmat = self.generate_covmat(stat_unc) + eigvals, eigvecs = np.linalg.eig(covmat) + art_unc = np.sqrt(eigvals) * eigvecs + + # Loop over bins + for data_idx, data in enumerate(central_data): + # Statistical uncertainty + unc_dict = {STAT_LABEL: stat_unc[data_idx]} + for sys_idx, art_sys in enumerate(art_unc[data_idx, :]): + unc_dict[f'{ART_LABEL}_{sys_idx+1}'] = float(art_sys) + sys_artificial.append(unc_dict) + + elif self.observable == 'WPWM-RATIO': + for data_idx, data in enumerate(central_data): + # Statistical uncertainty + unc_dict = {STAT_LABEL: stat_unc[data_idx]} + unc_dict[f'{ART_LABEL}'] = sys_unc[data_idx] + sys_artificial.append(unc_dict) + + if save_to_yaml: + # Save kinematics into file + logging.info("Dumping kinematics to file...") + kinematics_yaml = {'bins': kinematics} + with open(path + self.metadata['kinematics']['file'], 'w') as kin_out_file: + yaml.dump(kinematics_yaml, kin_out_file, sort_keys=False) + logging.info("Done!") + + # Save central data into file + logging.info("Dumping kinematics to file...") + dat_central_yaml = {'data_central': central_data} + file_name = self.metadata['data_central'] + with open(path + file_name, 'w') as dat_out_file: + yaml.dump(dat_central_yaml, dat_out_file, sort_keys=False) + logging.info("Done!") + + # Save unertainties + logging.info("Dumping kinematics to file...") + uncertainties_yaml = {'definitions': unc_definitions, 'bins': sys_artificial} + file_name = ( + self.metadata['data_uncertainties'][0] + if variant == 'default' + else self.metadata['variants'][variant]['data_uncertainties'][0] + ) + with open(path + file_name, 'w') as dat_out_file: + yaml.dump(uncertainties_yaml, dat_out_file, sort_keys=False) + logging.info("Done!") + return kinematics, central_data, sys_artificial + else: + return kinematics, central_data, sys_artificial + + def get_table(self, table_id): + return self.__retrieve_table(table_id) diff --git a/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_7TEV/metadata.yaml b/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_7TEV/metadata.yaml index 9286466e5a..2e60beeb07 100644 --- a/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_7TEV/metadata.yaml +++ b/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_7TEV/metadata.yaml @@ -21,42 +21,35 @@ implemented_observables: observable: description: Jet Rapidity Distribution label: CMS $W+c$ ratio - units: '' - process_type: EWJ_RAP - tables: [] - npoints: [] + units: '[fb]' + process_type: DY_W_ETA + tables: [9] ndata: 5 plotting: kinematics_override: ewj_rap_sqrt_scale dataset_label: CMS $W+c$ ratio y_label: $\sigma(W^+ + \bar{c})/\sigma(W^- + c)$ - plot_x: k1 - kinematic_coverage: - - k1 - - k2 - - k3 + plot_x: abs_eta + kinematic_coverage: [abs_eta, m_W2] kinematics: variables: - k1: - description: Variable k1 - label: k1 - units: '' - k2: - description: Variable k2 - label: k2 - units: '' - k3: - description: Variable k3 - label: k3 + abs_eta: + description: Absolute pseudo-rapidity of the Z boson + label: $|\eta|$ units: '' + m_W2: + description: Mass of the W boson squared + label: $m_W^2$ + units: GeV$^{2}$ file: kinematics_WPWM-RATIO.yaml + data_central: data_WPWM-RATIO.yaml + data_uncertainties: [uncertainties_WPWM-RATIO.yaml] theory: conversion_factor: 1.0 operation: ratio FK_tables: - - CMSWCHARMRAT-CMSWCHARM-WpCb-eta4 - - CMSWCHARMRAT-CMSWCHARM-WmC-eta3 - data_uncertainties: [] variants: legacy: data_uncertainties: @@ -64,41 +57,34 @@ implemented_observables: legacy_10: data_uncertainties: - uncertainties_WPWM-RATIO_sys_10.yaml - data_central: data_legacy_WPWM-RATIO.yaml ported_from: CMSWCHARMRAT - observable_name: WPWM-TOT observable: - description: Jet Rapidity Distribution + description: identity label: CMS $W+c$ total - units: '' - process_type: EWJ_RAP - tables: [] - npoints: [] + units: '[fb]' + process_type: DY_W_ETA + tables: [5, 6] ndata: 5 plotting: - kinematics_override: ewj_rap_sqrt_scale + kinematics_override: identity dataset_label: CMS $W+c$ total y_label: $d\sigma(W+c)/d|\eta_l|$ (fb) plot_x: k1 - kinematic_coverage: - - k1 - - k2 - - k3 + kinematic_coverage: [abs_eta, m_W2] + data_uncertainties: [uncertainties_WPWM-TOT.yaml] + data_central: data_WPWM-TOT.yaml kinematics: variables: - k1: - description: Variable k1 - label: k1 - units: '' - k2: - description: Variable k2 - label: k2 - units: '' - k3: - description: Variable k3 - label: k3 + abs_eta: + description: Absolute pseudo-rapidity of the Z boson + label: $|\eta|$ units: '' + m_W2: + description: Mass of the W boson squared + label: $m_W^2$ + units: GeV$^{2}$ file: kinematics_WPWM-TOT.yaml theory: conversion_factor: 1.0 @@ -106,7 +92,6 @@ implemented_observables: FK_tables: - - CMSWCHARM-CMSWCHARM-WmC-eta3 - - CMSWCHARM-CMSWCHARM-WpCb-eta4 - data_uncertainties: [] variants: legacy: data_uncertainties: @@ -114,5 +99,4 @@ implemented_observables: legacy_10: data_uncertainties: - uncertainties_WPWM-TOT_sys_10.yaml - data_central: data_legacy_WPWM-TOT.yaml ported_from: CMSWCHARMTOT From 962643784f3bf10c61c042da2841eed6eef7c4fb Mon Sep 17 00:00:00 2001 From: achiefa Date: Tue, 10 Dec 2024 14:14:11 +0000 Subject: [PATCH 3/9] Add generated files --- .../CMS_WCHARM_7TEV/data_WPWM-RATIO.yaml | 6 ++ .../CMS_WCHARM_7TEV/data_WPWM-TOT.yaml | 6 ++ .../CMS_WCHARM_7TEV/rawdata/Table5.yaml | 69 ++++++++++++++ .../CMS_WCHARM_7TEV/rawdata/Table6.yaml | 91 +++++++++++++++++++ .../CMS_WCHARM_7TEV/rawdata/Table9.yaml | 69 ++++++++++++++ .../uncertainties_WPWM-RATIO.yaml | 20 ++++ .../uncertainties_WPWM-TOT.yaml | 56 ++++++++++++ 7 files changed, 317 insertions(+) create mode 100644 nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_7TEV/data_WPWM-RATIO.yaml create mode 100644 nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_7TEV/data_WPWM-TOT.yaml create mode 100755 nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_7TEV/rawdata/Table5.yaml create mode 100755 nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_7TEV/rawdata/Table6.yaml create mode 100755 nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_7TEV/rawdata/Table9.yaml create mode 100644 nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_7TEV/uncertainties_WPWM-RATIO.yaml create mode 100644 nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_7TEV/uncertainties_WPWM-TOT.yaml diff --git a/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_7TEV/data_WPWM-RATIO.yaml b/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_7TEV/data_WPWM-RATIO.yaml new file mode 100644 index 0000000000..4b4660e02d --- /dev/null +++ b/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_7TEV/data_WPWM-RATIO.yaml @@ -0,0 +1,6 @@ +data_central: +- 1.01300000e+03 +- 960.0 +- 897.0 +- 1062.0 +- 776.0 diff --git a/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_7TEV/data_WPWM-TOT.yaml b/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_7TEV/data_WPWM-TOT.yaml new file mode 100644 index 0000000000..328dc6c22b --- /dev/null +++ b/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_7TEV/data_WPWM-TOT.yaml @@ -0,0 +1,6 @@ +data_central: +- 68700.0 +- 59900.0 +- 56700.0 +- 44800.0 +- 35100.0 diff --git a/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_7TEV/rawdata/Table5.yaml b/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_7TEV/rawdata/Table5.yaml new file mode 100755 index 0000000000..2d329707b5 --- /dev/null +++ b/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_7TEV/rawdata/Table5.yaml @@ -0,0 +1,69 @@ +dependent_variables: +- header: {name: D(SIG)/DABS(ETARAP(LEPTON)), units: PB} + qualifiers: + - {name: ABS(ETARAP(JET)), value: < 2.5} + - {name: JETS, value: 'ANTI-KT, R=1'} + - {name: PT(JET), units: GEV, value: '> 25'} + - {name: PT(LEPTON), units: GEV, value: '> 25'} + - {name: RE, value: P P --> W+ < LEPTON+ NU > CHARMBAR X} + - {name: RE, value: P P --> W- < LEPTON- NUBAR > CHARM X} + - {name: SQRT(S), units: GeV, value: '7000.0'} + values: + - errors: + - {label: stat, symerror: 2.7} + - {label: sys, symerror: 4.6} + value: 68.7 + - errors: + - {label: stat, symerror: 2.5} + - {label: sys, symerror: 4.0} + value: 59.9 + - errors: + - {label: stat, symerror: 2.4} + - {label: sys, symerror: 3.8} + value: 56.7 + - errors: + - {label: stat, symerror: 1.9} + - {label: sys, symerror: 3.2} + value: 44.8 + - errors: + - {label: stat, symerror: 1.7} + - {label: sys, symerror: 2.4} + value: 35.1 +- header: {name: D(SIG)/DABS(ETARAP(LEPTON)), units: PB} + qualifiers: + - {name: ABS(ETARAP(JET)), value: < 2.5} + - {name: JETS, value: 'ANTI-KT, R=1'} + - {name: PT(JET), units: GEV, value: '> 25'} + - {name: PT(LEPTON), units: GEV, value: '> 35'} + - {name: RE, value: P P --> W+ < LEPTON+ NU > CHARMBAR X} + - {name: RE, value: P P --> W- < LEPTON- NUBAR > CHARM X} + - {name: SQRT(S), units: GeV, value: '7000.0'} + values: + - errors: + - {label: stat, symerror: 1.7} + - {label: sys, symerror: 3.2} + value: 52.3 + - errors: + - {label: stat, symerror: 1.6} + - {label: sys, symerror: 3.0} + value: 49.2 + - errors: + - {label: stat, symerror: 1.5} + - {label: sys, symerror: 2.7} + value: 45.5 + - errors: + - {label: stat, symerror: 1.2} + - {label: sys, symerror: 2.1} + value: 34.2 + - errors: + - {label: stat, symerror: 1.0} + - {label: sys, symerror: 1.7} + value: 26.6 +independent_variables: +- header: {name: ABS(ETARAP(LEPTON))} + values: + - {high: 0.35, low: 0.0} + - {high: 0.7, low: 0.35} + - {high: 1.1, low: 0.7} + - {high: 1.6, low: 1.1} + - {high: 2.1, low: 1.6} diff --git a/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_7TEV/rawdata/Table6.yaml b/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_7TEV/rawdata/Table6.yaml new file mode 100755 index 0000000000..5ea79d8856 --- /dev/null +++ b/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_7TEV/rawdata/Table6.yaml @@ -0,0 +1,91 @@ +dependent_variables: +- header: {name: ''} + qualifiers: + - {name: ABS(ETARAP(JET)), value: < 2.5} + - {name: JETS, value: 'ANTI-KT, R=1'} + - {name: PT(JET), units: GEV, value: '> 25'} + - {name: PT(LEPTON), units: GEV, value: '> 25'} + - {name: RE, value: P P --> W+ < LEPTON+ NU > CHARMBAR X} + - {name: RE, value: P P --> W- < LEPTON- NUBAR > CHARM X} + - {name: SQRT(S), units: GeV, value: '7000.0'} + values: + - {value: 1.0} + - {value: 0.77} + - {value: 0.78} + - {value: 0.76} + - {value: 0.72} + - {value: 0.77} + - {value: 1.0} + - {value: 0.76} + - {value: 0.74} + - {value: 0.7} + - {value: 0.78} + - {value: 0.76} + - {value: 1.0} + - {value: 0.74} + - {value: 0.7} + - {value: 0.76} + - {value: 0.74} + - {value: 0.74} + - {value: 1.0} + - {value: 0.69} + - {value: 0.72} + - {value: 0.7} + - {value: 0.7} + - {value: 0.69} + - {value: 1.0} +independent_variables: +- header: {name: ABS(ETARAP(LEPTON))} + values: + - {high: 0.35, low: 0.0} + - {high: 0.7, low: 0.35} + - {high: 1.1, low: 0.7} + - {high: 1.6, low: 1.1} + - {high: 2.1, low: 1.6} + - {high: 0.35, low: 0.0} + - {high: 0.7, low: 0.35} + - {high: 1.1, low: 0.7} + - {high: 1.6, low: 1.1} + - {high: 2.1, low: 1.6} + - {high: 0.35, low: 0.0} + - {high: 0.7, low: 0.35} + - {high: 1.1, low: 0.7} + - {high: 1.6, low: 1.1} + - {high: 2.1, low: 1.6} + - {high: 0.35, low: 0.0} + - {high: 0.7, low: 0.35} + - {high: 1.1, low: 0.7} + - {high: 1.6, low: 1.1} + - {high: 2.1, low: 1.6} + - {high: 0.35, low: 0.0} + - {high: 0.7, low: 0.35} + - {high: 1.1, low: 0.7} + - {high: 1.6, low: 1.1} + - {high: 2.1, low: 1.6} +- header: {name: ABS(ETARAP(LEPTON))} + values: + - {high: 0.35, low: 0.0} + - {high: 0.35, low: 0.0} + - {high: 0.35, low: 0.0} + - {high: 0.35, low: 0.0} + - {high: 0.35, low: 0.0} + - {high: 0.7, low: 0.35} + - {high: 0.7, low: 0.35} + - {high: 0.7, low: 0.35} + - {high: 0.7, low: 0.35} + - {high: 0.7, low: 0.35} + - {high: 1.1, low: 0.7} + - {high: 1.1, low: 0.7} + - {high: 1.1, low: 0.7} + - {high: 1.1, low: 0.7} + - {high: 1.1, low: 0.7} + - {high: 1.6, low: 1.1} + - {high: 1.6, low: 1.1} + - {high: 1.6, low: 1.1} + - {high: 1.6, low: 1.1} + - {high: 1.6, low: 1.1} + - {high: 2.1, low: 1.6} + - {high: 2.1, low: 1.6} + - {high: 2.1, low: 1.6} + - {high: 2.1, low: 1.6} + - {high: 2.1, low: 1.6} diff --git a/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_7TEV/rawdata/Table9.yaml b/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_7TEV/rawdata/Table9.yaml new file mode 100755 index 0000000000..f260ee1f54 --- /dev/null +++ b/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_7TEV/rawdata/Table9.yaml @@ -0,0 +1,69 @@ +dependent_variables: +- header: {name: SIG(W+ CHARMBAR) / SIG(W- CHARM)} + qualifiers: + - {name: ABS(ETARAP(JET)), value: < 2.5} + - {name: JETS, value: 'ANTI-KT, R=1'} + - {name: PT(JET), units: GEV, value: '> 25'} + - {name: PT(LEPTON), units: GEV, value: '> 25'} + - {name: RE(W+ CHARMBAR), value: P P --> W+ < LEPTON+ NU > CHARMBAR X} + - {name: RE(W- CHARM), value: P P --> W- < LEPTON- NUBAR > CHARM X} + - {name: SQRT(S), units: GeV, value: '7000.0'} + values: + - errors: + - {label: stat, symerror: 0.052} + - {label: sys, symerror: 0.005} + value: 1.013 + - errors: + - {label: stat, symerror: 0.053} + - {label: sys, symerror: 0.005} + value: 0.96 + - errors: + - {label: stat, symerror: 0.051} + - {label: sys, symerror: 0.008} + value: 0.897 + - errors: + - {label: stat, symerror: 0.061} + - {label: sys, symerror: 0.014} + value: 1.062 + - errors: + - {label: stat, symerror: 0.058} + - {label: sys, symerror: 0.016} + value: 0.776 +- header: {name: SIG(W+ CHARMBAR) / SIG(W- CHARM)} + qualifiers: + - {name: ABS(ETARAP(JET)), value: < 2.5} + - {name: JETS, value: 'ANTI-KT, R=1'} + - {name: PT(JET), units: GEV, value: '> 25'} + - {name: PT(LEPTON), units: GEV, value: '> 35'} + - {name: RE(W+ CHARMBAR), value: P P --> W+ < LEPTON+ NU > CHARMBAR X} + - {name: RE(W- CHARM), value: P P --> W- < LEPTON- NUBAR > CHARM X} + - {name: SQRT(S), units: GeV, value: '7000.0'} + values: + - errors: + - {label: stat, symerror: 0.041} + - {label: sys, symerror: 0.007} + value: 0.993 + - errors: + - {label: stat, symerror: 0.039} + - {label: sys, symerror: 0.007} + value: 0.977 + - errors: + - {label: stat, symerror: 0.04} + - {label: sys, symerror: 0.008} + value: 0.927 + - errors: + - {label: stat, symerror: 0.046} + - {label: sys, symerror: 0.01} + value: 0.948 + - errors: + - {label: stat, symerror: 0.05} + - {label: sys, symerror: 0.011} + value: 0.784 +independent_variables: +- header: {name: ABS(ETARAP(LEPTON))} + values: + - {high: 0.35, low: 0.0} + - {high: 0.7, low: 0.35} + - {high: 1.1, low: 0.7} + - {high: 1.6, low: 1.1} + - {high: 2.1, low: 1.6} diff --git a/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_7TEV/uncertainties_WPWM-RATIO.yaml b/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_7TEV/uncertainties_WPWM-RATIO.yaml new file mode 100644 index 0000000000..b3b6336a7f --- /dev/null +++ b/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_7TEV/uncertainties_WPWM-RATIO.yaml @@ -0,0 +1,20 @@ +definitions: + stat_uncorr: + description: Statistical uncertainty + treatment: ADD + type: UNCORR + ART_LABEL: + description: Correlated systematic uncertainty + treatment: MULT + type: CORR +bins: +- stat_uncorr: 52.0 + art_corr: 5.0 +- stat_uncorr: 53.0 + art_corr: 5.0 +- stat_uncorr: 51.0 + art_corr: 8.0 +- stat_uncorr: 61.0 + art_corr: 14.0 +- stat_uncorr: 58.0 + art_corr: 16.0 diff --git a/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_7TEV/uncertainties_WPWM-TOT.yaml b/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_7TEV/uncertainties_WPWM-TOT.yaml new file mode 100644 index 0000000000..d53545763f --- /dev/null +++ b/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_7TEV/uncertainties_WPWM-TOT.yaml @@ -0,0 +1,56 @@ +definitions: + stat_uncorr: + description: Statistical uncertainty + treatment: ADD + type: UNCORR + art_corr_1: + description: Correlated systematic uncertainty 1 + treatment: MULT + type: CORR + art_corr_2: + description: Correlated systematic uncertainty 2 + treatment: MULT + type: CORR + art_corr_3: + description: Correlated systematic uncertainty 3 + treatment: MULT + type: CORR + art_corr_4: + description: Correlated systematic uncertainty 4 + treatment: MULT + type: CORR + art_corr_5: + description: Correlated systematic uncertainty 5 + treatment: MULT + type: CORR +bins: +- stat_uncorr: 2700.0 + art_corr_1: -2.50638824e+03 + art_corr_2: -7.95697332e+02 + art_corr_3: 5.18818026e+02 + art_corr_4: 3.24524090e+02 + art_corr_5: 1.98921572e+01 +- stat_uncorr: 2500.0 + art_corr_1: -2.26430174e+03 + art_corr_2: 9.54417285e+02 + art_corr_3: 3.80750387e+02 + art_corr_4: 2.58001370e+02 + art_corr_5: 2.21295434e+01 +- stat_uncorr: 2400.0 + art_corr_1: -2.16891682e+03 + art_corr_2: -4.98172990e+01 + art_corr_3: -9.87453634e+02 + art_corr_4: 2.78677305e+02 + art_corr_5: 2.43385786e+01 +- stat_uncorr: 1900.0 + art_corr_1: -1.64250437e+03 + art_corr_2: -1.68762567e+01 + art_corr_3: -1.03617739e+01 + art_corr_4: -6.53762602e+02 + art_corr_5: -6.95975341e+02 +- stat_uncorr: 1700.0 + art_corr_1: -1.39269626e+03 + art_corr_2: -2.22542263e+01 + art_corr_3: -2.70634325e+00 + art_corr_4: -6.66474311e+02 + art_corr_5: 7.11130479e+02 From a11c031afaf757818fde8e0cf18cea42b76f3e57 Mon Sep 17 00:00:00 2001 From: achiefa Date: Tue, 10 Dec 2024 14:18:31 +0000 Subject: [PATCH 4/9] Correct bug --- .../CMS_WCHARM_7TEV/data_WPWM-RATIO.yaml | 10 +-- .../commondata/CMS_WCHARM_7TEV/filter.py | 2 +- .../kinematics_WPWM-RATIO.yaml | 72 +++++++------------ .../CMS_WCHARM_7TEV/kinematics_WPWM-TOT.yaml | 72 +++++++------------ .../uncertainties_WPWM-RATIO.yaml | 20 +++--- 5 files changed, 68 insertions(+), 108 deletions(-) diff --git a/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_7TEV/data_WPWM-RATIO.yaml b/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_7TEV/data_WPWM-RATIO.yaml index 4b4660e02d..7454c33d79 100644 --- a/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_7TEV/data_WPWM-RATIO.yaml +++ b/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_7TEV/data_WPWM-RATIO.yaml @@ -1,6 +1,6 @@ data_central: -- 1.01300000e+03 -- 960.0 -- 897.0 -- 1062.0 -- 776.0 +- 1.013 +- 0.96 +- 0.897 +- 1.062 +- 0.776 diff --git a/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_7TEV/filter.py b/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_7TEV/filter.py index 97444511cb..365a92a2d3 100644 --- a/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_7TEV/filter.py +++ b/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_7TEV/filter.py @@ -14,5 +14,5 @@ CMS_WCHARM_TOT = Extractor("./metadata.yaml", "WPWM-TOT", mult_factor=1000) _, _, _ = CMS_WCHARM_TOT.generate_data(variant='default', save_to_yaml=True) - CMS_WCHARM_RATIO = Extractor("./metadata.yaml", "WPWM-RATIO", mult_factor=1000) + CMS_WCHARM_RATIO = Extractor("./metadata.yaml", "WPWM-RATIO", mult_factor=1.0) _, _, _ = CMS_WCHARM_RATIO.generate_data(variant='default', save_to_yaml=True) diff --git a/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_7TEV/kinematics_WPWM-RATIO.yaml b/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_7TEV/kinematics_WPWM-RATIO.yaml index d56da464e5..b3549529e5 100644 --- a/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_7TEV/kinematics_WPWM-RATIO.yaml +++ b/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_7TEV/kinematics_WPWM-RATIO.yaml @@ -1,61 +1,41 @@ bins: -- k1: - min: null +- abs_eta: + min: 0.0 mid: 0.175 - max: null - k2: - min: null - mid: 6463.838404 - max: null - k3: - min: null - mid: 7000.0 - max: null -- k1: - min: null - mid: 0.525 - max: null - k2: + max: 0.35 + m_W2: min: null - mid: 6463.838404 + mid: 6.46174823e+03 max: null - k3: +- abs_eta: + min: 0.35 + mid: 5.25000000e-01 + max: 0.7 + m_W2: min: null - mid: 7000.0 + mid: 6.46174823e+03 max: null -- k1: - min: null +- abs_eta: + min: 0.7 mid: 0.9 - max: null - k2: + max: 1.1 + m_W2: min: null - mid: 6463.838404 + mid: 6.46174823e+03 max: null - k3: - min: null - mid: 7000.0 - max: null -- k1: - min: null +- abs_eta: + min: 1.1 mid: 1.35 - max: null - k2: - min: null - mid: 6463.838404 - max: null - k3: + max: 1.6 + m_W2: min: null - mid: 7000.0 + mid: 6.46174823e+03 max: null -- k1: - min: null +- abs_eta: + min: 1.6 mid: 1.85 - max: null - k2: - min: null - mid: 6463.838404 - max: null - k3: + max: 2.1 + m_W2: min: null - mid: 7000.0 + mid: 6.46174823e+03 max: null diff --git a/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_7TEV/kinematics_WPWM-TOT.yaml b/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_7TEV/kinematics_WPWM-TOT.yaml index d56da464e5..b3549529e5 100644 --- a/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_7TEV/kinematics_WPWM-TOT.yaml +++ b/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_7TEV/kinematics_WPWM-TOT.yaml @@ -1,61 +1,41 @@ bins: -- k1: - min: null +- abs_eta: + min: 0.0 mid: 0.175 - max: null - k2: - min: null - mid: 6463.838404 - max: null - k3: - min: null - mid: 7000.0 - max: null -- k1: - min: null - mid: 0.525 - max: null - k2: + max: 0.35 + m_W2: min: null - mid: 6463.838404 + mid: 6.46174823e+03 max: null - k3: +- abs_eta: + min: 0.35 + mid: 5.25000000e-01 + max: 0.7 + m_W2: min: null - mid: 7000.0 + mid: 6.46174823e+03 max: null -- k1: - min: null +- abs_eta: + min: 0.7 mid: 0.9 - max: null - k2: + max: 1.1 + m_W2: min: null - mid: 6463.838404 + mid: 6.46174823e+03 max: null - k3: - min: null - mid: 7000.0 - max: null -- k1: - min: null +- abs_eta: + min: 1.1 mid: 1.35 - max: null - k2: - min: null - mid: 6463.838404 - max: null - k3: + max: 1.6 + m_W2: min: null - mid: 7000.0 + mid: 6.46174823e+03 max: null -- k1: - min: null +- abs_eta: + min: 1.6 mid: 1.85 - max: null - k2: - min: null - mid: 6463.838404 - max: null - k3: + max: 2.1 + m_W2: min: null - mid: 7000.0 + mid: 6.46174823e+03 max: null diff --git a/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_7TEV/uncertainties_WPWM-RATIO.yaml b/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_7TEV/uncertainties_WPWM-RATIO.yaml index b3b6336a7f..64740a4566 100644 --- a/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_7TEV/uncertainties_WPWM-RATIO.yaml +++ b/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_7TEV/uncertainties_WPWM-RATIO.yaml @@ -8,13 +8,13 @@ definitions: treatment: MULT type: CORR bins: -- stat_uncorr: 52.0 - art_corr: 5.0 -- stat_uncorr: 53.0 - art_corr: 5.0 -- stat_uncorr: 51.0 - art_corr: 8.0 -- stat_uncorr: 61.0 - art_corr: 14.0 -- stat_uncorr: 58.0 - art_corr: 16.0 +- stat_uncorr: 0.052 + art_corr: 0.005 +- stat_uncorr: 0.053 + art_corr: 0.005 +- stat_uncorr: 0.051 + art_corr: 0.008 +- stat_uncorr: 0.061 + art_corr: 0.014 +- stat_uncorr: 0.058 + art_corr: 0.016 From b75c99d98ec539ee293f65fdb64dead79e74c6be Mon Sep 17 00:00:00 2001 From: achiefa Date: Tue, 10 Dec 2024 14:32:05 +0000 Subject: [PATCH 5/9] Remove legacy data --- .../commondata/CMS_WCHARM_7TEV/data_legacy_WPWM-RATIO.yaml | 6 ------ .../commondata/CMS_WCHARM_7TEV/data_legacy_WPWM-TOT.yaml | 6 ------ 2 files changed, 12 deletions(-) delete mode 100644 nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_7TEV/data_legacy_WPWM-RATIO.yaml delete mode 100644 nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_7TEV/data_legacy_WPWM-TOT.yaml diff --git a/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_7TEV/data_legacy_WPWM-RATIO.yaml b/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_7TEV/data_legacy_WPWM-RATIO.yaml deleted file mode 100644 index 7454c33d79..0000000000 --- a/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_7TEV/data_legacy_WPWM-RATIO.yaml +++ /dev/null @@ -1,6 +0,0 @@ -data_central: -- 1.013 -- 0.96 -- 0.897 -- 1.062 -- 0.776 diff --git a/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_7TEV/data_legacy_WPWM-TOT.yaml b/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_7TEV/data_legacy_WPWM-TOT.yaml deleted file mode 100644 index 328dc6c22b..0000000000 --- a/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_7TEV/data_legacy_WPWM-TOT.yaml +++ /dev/null @@ -1,6 +0,0 @@ -data_central: -- 68700.0 -- 59900.0 -- 56700.0 -- 44800.0 -- 35100.0 From 5a8e2cb44aa936b4a9e8d50474778d970e77cc0f Mon Sep 17 00:00:00 2001 From: achiefa Date: Fri, 13 Dec 2024 10:08:18 +0000 Subject: [PATCH 6/9] Change artificial uncertainties from MUL to ADD --- .../CMS_WCHARM_7TEV/filter_utils.py | 10 ++-- .../uncertainties_WPWM-TOT.yaml | 60 +++++++++---------- 2 files changed, 36 insertions(+), 34 deletions(-) diff --git a/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_7TEV/filter_utils.py b/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_7TEV/filter_utils.py index 78096bafba..e6e33eb232 100644 --- a/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_7TEV/filter_utils.py +++ b/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_7TEV/filter_utils.py @@ -196,7 +196,7 @@ def __build_unc_definitions(self, variant='default'): for idx in range(self.ndata): unc_definitions[f'{ART_LABEL}_{idx+1}'] = { 'description': f'Correlated systematic uncertainty {idx+1}', - 'treatment': 'MULT', + 'treatment': 'ADD', 'type': 'CORR', } @@ -205,7 +205,7 @@ def __build_unc_definitions(self, variant='default'): return unc_definitions - def generate_covmat(self, diag_stat_uncs=None): + def generate_covmat(self, diag_uncs=None): table = self.metadata["tables"][1] tab_dict = self.__retrieve_table(table) matlist = tab_dict['dependent_variables'][0]['values'] @@ -213,7 +213,7 @@ def generate_covmat(self, diag_stat_uncs=None): covmat = np.zeros((self.ndata, self.ndata)) for i in range(self.ndata): for j in range(self.ndata): - covmat[i, j] = matlist[i + self.ndata * j] * diag_stat_uncs[i] * diag_stat_uncs[j] + covmat[i, j] = matlist[i + self.ndata * j] * diag_uncs[i] * diag_uncs[j] return covmat def generate_data(self, variant='default', save_to_yaml=False, path='./'): @@ -226,7 +226,7 @@ def generate_data(self, variant='default', save_to_yaml=False, path='./'): sys_artificial = [] # Initialize vector of artificial uncertainties if self.observable == 'WPWM-TOT': - covmat = self.generate_covmat(stat_unc) + covmat = self.generate_covmat(sys_unc) eigvals, eigvecs = np.linalg.eig(covmat) art_unc = np.sqrt(eigvals) * eigvecs @@ -242,6 +242,8 @@ def generate_data(self, variant='default', save_to_yaml=False, path='./'): for data_idx, data in enumerate(central_data): # Statistical uncertainty unc_dict = {STAT_LABEL: stat_unc[data_idx]} + + # Systematic uncertainty unc_dict[f'{ART_LABEL}'] = sys_unc[data_idx] sys_artificial.append(unc_dict) diff --git a/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_7TEV/uncertainties_WPWM-TOT.yaml b/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_7TEV/uncertainties_WPWM-TOT.yaml index d53545763f..19e422c314 100644 --- a/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_7TEV/uncertainties_WPWM-TOT.yaml +++ b/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_7TEV/uncertainties_WPWM-TOT.yaml @@ -5,52 +5,52 @@ definitions: type: UNCORR art_corr_1: description: Correlated systematic uncertainty 1 - treatment: MULT + treatment: ADD type: CORR art_corr_2: description: Correlated systematic uncertainty 2 - treatment: MULT + treatment: ADD type: CORR art_corr_3: description: Correlated systematic uncertainty 3 - treatment: MULT + treatment: ADD type: CORR art_corr_4: description: Correlated systematic uncertainty 4 - treatment: MULT + treatment: ADD type: CORR art_corr_5: description: Correlated systematic uncertainty 5 - treatment: MULT + treatment: ADD type: CORR bins: - stat_uncorr: 2700.0 - art_corr_1: -2.50638824e+03 - art_corr_2: -7.95697332e+02 - art_corr_3: 5.18818026e+02 - art_corr_4: 3.24524090e+02 - art_corr_5: 1.98921572e+01 + art_corr_1: -4.30085960e+03 + art_corr_2: 1.48958525e+02 + art_corr_3: -1.49014534e+03 + art_corr_4: -3.33760715e+02 + art_corr_5: 5.55417584e+02 - stat_uncorr: 2500.0 - art_corr_1: -2.26430174e+03 - art_corr_2: 9.54417285e+02 - art_corr_3: 3.80750387e+02 - art_corr_4: 2.58001370e+02 - art_corr_5: 2.21295434e+01 + art_corr_1: -3.61128670e+03 + art_corr_2: 1.46245103e+02 + art_corr_3: 1.37172210e+03 + art_corr_4: -3.95171209e+02 + art_corr_5: 9.48387525e+02 - stat_uncorr: 2400.0 - art_corr_1: -2.16891682e+03 - art_corr_2: -4.98172990e+01 - art_corr_3: -9.87453634e+02 - art_corr_4: 2.78677305e+02 - art_corr_5: 2.43385786e+01 + art_corr_1: -3.42032648e+03 + art_corr_2: 1.58201894e+02 + art_corr_3: 2.94011007e+02 + art_corr_4: -5.54441014e+02 + art_corr_5: -1.52397231e+03 - stat_uncorr: 1900.0 - art_corr_1: -1.64250437e+03 - art_corr_2: -1.68762567e+01 - art_corr_3: -1.03617739e+01 - art_corr_4: -6.53762602e+02 - art_corr_5: -6.95975341e+02 + art_corr_1: -2.77487954e+03 + art_corr_2: 3.53874801e+02 + art_corr_3: 1.26728625e+02 + art_corr_4: 1.53882437e+03 + art_corr_5: -1.75429767e+02 - stat_uncorr: 1700.0 - art_corr_1: -1.39269626e+03 - art_corr_2: -2.22542263e+01 - art_corr_3: -2.70634325e+00 - art_corr_4: -6.66474311e+02 - art_corr_5: 7.11130479e+02 + art_corr_1: -1.94086828e+03 + art_corr_2: -1.38692811e+03 + art_corr_3: 5.04690564e+01 + art_corr_4: 2.51872601e+02 + art_corr_5: -5.89390783e+01 From 40b42e8b9b16b266d5c2a6e33f9052127839054a Mon Sep 17 00:00:00 2001 From: Amedeo Chiefa <103528316+achiefa@users.noreply.github.com> Date: Wed, 18 Dec 2024 11:42:51 +0100 Subject: [PATCH 7/9] Remove `save_to_yaml` and variant --- .../commondata/CMS_WCHARM_7TEV/filter.py | 4 +- .../CMS_WCHARM_7TEV/filter_utils.py | 77 ++++++++----------- 2 files changed, 33 insertions(+), 48 deletions(-) diff --git a/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_7TEV/filter.py b/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_7TEV/filter.py index 365a92a2d3..8ce27320c6 100644 --- a/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_7TEV/filter.py +++ b/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_7TEV/filter.py @@ -12,7 +12,7 @@ if __name__ == "__main__": CMS_WCHARM_TOT = Extractor("./metadata.yaml", "WPWM-TOT", mult_factor=1000) - _, _, _ = CMS_WCHARM_TOT.generate_data(variant='default', save_to_yaml=True) + CMS_WCHARM_TOT.generate_data() CMS_WCHARM_RATIO = Extractor("./metadata.yaml", "WPWM-RATIO", mult_factor=1.0) - _, _, _ = CMS_WCHARM_RATIO.generate_data(variant='default', save_to_yaml=True) + CMS_WCHARM_RATIO.generate_data() diff --git a/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_7TEV/filter_utils.py b/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_7TEV/filter_utils.py index e6e33eb232..097d5dc0de 100644 --- a/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_7TEV/filter_utils.py +++ b/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_7TEV/filter_utils.py @@ -7,13 +7,9 @@ yaml.add_representer(float, prettify_float) -SQRTS = 8000 MW2 = 80.385**2 CMSLUMI13 = 2.5 -# List of systematic uncertainties that shuold -# be considered uncorrelated -UNCORR_SYS_UNC = ['UnfoldMCstat', 'UnfoldOtherGen', 'UnfoldReweight'] ART_LABEL = 'art_corr' STAT_LABEL = 'stat_uncorr' TABLE_TOKEN = 'Table' @@ -98,8 +94,6 @@ def __extract_kinematics(self, table: dict): ---------- table: dict Dictionary containing the bins in the transverse momentum - tab_number: int - Index to select the range of the second kinematic variable Return ------ @@ -176,7 +170,7 @@ def generate_data_and_unc(self, mult_factor=1.0): asy_sys_unc.append(rap_bin['errors'][1]['symerror'] * mult_factor) return dat_central, stat_unc, asy_sys_unc - def __build_unc_definitions(self, variant='default'): + def __build_unc_definitions(self): unc_definitions = {} # Statistical uncertainty @@ -200,9 +194,6 @@ def __build_unc_definitions(self, variant='default'): 'type': 'CORR', } - if variant != 'default': - raise ValueError(f'The variant {variant} is not implemented yet.') - return unc_definitions def generate_covmat(self, diag_uncs=None): @@ -216,13 +207,13 @@ def generate_covmat(self, diag_uncs=None): covmat[i, j] = matlist[i + self.ndata * j] * diag_uncs[i] * diag_uncs[j] return covmat - def generate_data(self, variant='default', save_to_yaml=False, path='./'): + def generate_data(self): # Get central data and kinematics central_data, stat_unc, sys_unc = self.generate_data_and_unc(self.mult_factor) kinematics = self.generate_kinematics() # Uncertainty definitions - unc_definitions = self.__build_unc_definitions(variant=variant) + unc_definitions = self.__build_unc_definitions() sys_artificial = [] # Initialize vector of artificial uncertainties if self.observable == 'WPWM-TOT': @@ -246,37 +237,31 @@ def generate_data(self, variant='default', save_to_yaml=False, path='./'): # Systematic uncertainty unc_dict[f'{ART_LABEL}'] = sys_unc[data_idx] sys_artificial.append(unc_dict) - - if save_to_yaml: - # Save kinematics into file - logging.info("Dumping kinematics to file...") - kinematics_yaml = {'bins': kinematics} - with open(path + self.metadata['kinematics']['file'], 'w') as kin_out_file: - yaml.dump(kinematics_yaml, kin_out_file, sort_keys=False) - logging.info("Done!") - - # Save central data into file - logging.info("Dumping kinematics to file...") - dat_central_yaml = {'data_central': central_data} - file_name = self.metadata['data_central'] - with open(path + file_name, 'w') as dat_out_file: - yaml.dump(dat_central_yaml, dat_out_file, sort_keys=False) - logging.info("Done!") - - # Save unertainties - logging.info("Dumping kinematics to file...") - uncertainties_yaml = {'definitions': unc_definitions, 'bins': sys_artificial} - file_name = ( - self.metadata['data_uncertainties'][0] - if variant == 'default' - else self.metadata['variants'][variant]['data_uncertainties'][0] - ) - with open(path + file_name, 'w') as dat_out_file: - yaml.dump(uncertainties_yaml, dat_out_file, sort_keys=False) - logging.info("Done!") - return kinematics, central_data, sys_artificial - else: - return kinematics, central_data, sys_artificial - - def get_table(self, table_id): - return self.__retrieve_table(table_id) + + # Local path for yaml files + path='./' + + # Save kinematics into file + logging.info("Dumping kinematics to file...") + kinematics_yaml = {'bins': kinematics} + with open(path + self.metadata['kinematics']['file'], 'w') as kin_out_file: + yaml.dump(kinematics_yaml, kin_out_file, sort_keys=False) + logging.info("Done!") + + # Save central data into file + logging.info("Dumping kinematics to file...") + dat_central_yaml = {'data_central': central_data} + file_name = self.metadata['data_central'] + with open(path + file_name, 'w') as dat_out_file: + yaml.dump(dat_central_yaml, dat_out_file, sort_keys=False) + logging.info("Done!") + + # Save unertainties + logging.info("Dumping kinematics to file...") + uncertainties_yaml = {'definitions': unc_definitions, 'bins': sys_artificial} + file_name = ( + self.metadata['data_uncertainties'][0] + ) + with open(path + file_name, 'w') as dat_out_file: + yaml.dump(uncertainties_yaml, dat_out_file, sort_keys=False) + logging.info("Done!") From 21bc618a300664b68e376dd5ad2665e229645ccd Mon Sep 17 00:00:00 2001 From: Amedeo Chiefa <103528316+achiefa@users.noreply.github.com> Date: Wed, 18 Dec 2024 11:44:48 +0100 Subject: [PATCH 8/9] Add docstring to `generate_data` --- .../nnpdf_data/commondata/CMS_WCHARM_7TEV/filter_utils.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_7TEV/filter_utils.py b/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_7TEV/filter_utils.py index 097d5dc0de..1dbc3e44d5 100644 --- a/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_7TEV/filter_utils.py +++ b/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_7TEV/filter_utils.py @@ -208,6 +208,10 @@ def generate_covmat(self, diag_uncs=None): return covmat def generate_data(self): + ''' + Collect central data, kinematics, and uncertainties and save them into + yaml files. + ''' # Get central data and kinematics central_data, stat_unc, sys_unc = self.generate_data_and_unc(self.mult_factor) kinematics = self.generate_kinematics() From bd47a55257be8c3c4db0d82a56f161718fed9185 Mon Sep 17 00:00:00 2001 From: achiefa Date: Wed, 8 Jan 2025 10:30:17 +0000 Subject: [PATCH 9/9] Clean-up + pre-commit --- .../commondata/CMS_WCHARM_7TEV/filter.py | 7 +- .../CMS_WCHARM_7TEV/filter_utils.py | 233 +++++++++--------- 2 files changed, 115 insertions(+), 125 deletions(-) diff --git a/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_7TEV/filter.py b/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_7TEV/filter.py index 8ce27320c6..c0604b80d7 100644 --- a/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_7TEV/filter.py +++ b/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_7TEV/filter.py @@ -3,16 +3,17 @@ ''' import logging +import os from filter_utils import Extractor -import numpy as np logging.basicConfig(level=logging.INFO, format='[%(levelname)s] %(message)s') +CURRENT_DIR = os.path.dirname(os.path.abspath(__file__)) if __name__ == "__main__": - CMS_WCHARM_TOT = Extractor("./metadata.yaml", "WPWM-TOT", mult_factor=1000) + CMS_WCHARM_TOT = Extractor(f"{CURRENT_DIR}/metadata.yaml", "WPWM-TOT", mult_factor=1000) CMS_WCHARM_TOT.generate_data() - CMS_WCHARM_RATIO = Extractor("./metadata.yaml", "WPWM-RATIO", mult_factor=1.0) + CMS_WCHARM_RATIO = Extractor(f"{CURRENT_DIR}/metadata.yaml", "WPWM-RATIO", mult_factor=1.0) CMS_WCHARM_RATIO.generate_data() diff --git a/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_7TEV/filter_utils.py b/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_7TEV/filter_utils.py index 1dbc3e44d5..691d251bdd 100644 --- a/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_7TEV/filter_utils.py +++ b/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_7TEV/filter_utils.py @@ -1,4 +1,6 @@ +import functools import logging +import os import numpy as np import yaml @@ -8,28 +10,29 @@ yaml.add_representer(float, prettify_float) MW2 = 80.385**2 -CMSLUMI13 = 2.5 +CURRENT_DIR = os.path.dirname(os.path.abspath(__file__)) ART_LABEL = 'art_corr' STAT_LABEL = 'stat_uncorr' TABLE_TOKEN = 'Table' class Extractor: - """ - Extracts kinematics, central data, and uncertainties for a given dataset - - Parameters - ---------- - metadata_file: str - Path to the metadata file - observable: str - The name of the observable for which the data is extracted. The name must - be listed in the metadata file. - """ def __init__(self, metadata_file, observable, mult_factor=1): - + """ + Parameters + ---------- + metadata_file: str + Path to the metadata file + observable: str + The name of the observable for which the data is extracted. The name + must be listed in the metadata file. + mult_factor: float + Multiplication factor to apply to the central data points. This is + useful to convert the data in the metadata file to the desired + units. + """ # Open metadata and select process with open(metadata_file, 'r') as file: metadata = yaml.safe_load(file) @@ -44,69 +47,51 @@ def __init__(self, metadata_file, observable, mult_factor=1): if self.metadata is None: raise Exception(f"{observable} is not listed in the metadata file.") - # Initialise dict of tables - self.tables = {} self.observable = observable self.mult_factor = mult_factor - self.kin_labels = self.metadata['kinematic_coverage'] - self.ndata = self.metadata['ndata'] - def __retrieve_table(self, table_id): + @functools.cache + def _retrieve_table(self, table_id): """ - Implementation of the lazy loading for the tables. If the table - is loaded for the first time, it is stored into an internal - container of the class, so that it will not be loaded each time. - - When called, this functions checks if the table has already been stored - and, if that is the case, returns the stored table. + Implementation of the loading for the table. Parameters ---------- table_id: int - Index that specifies the table + Index that specifies the table. Return ------ - The table specified by `table_id`. If not previously loaded, it is also - stored into the internal container for future use. + The table specified by `table_id`. """ - try: - table = self.tables[str(table_id)] - except KeyError: - logging.debug( - f'Table {table_id} has not already been used or stored.' f' Storing the table...' - ) - with open(f'./rawdata/{TABLE_TOKEN}{table_id}.yaml', 'r') as tab: - tab_dict = yaml.safe_load(tab) - self.tables[str(table_id)] = tab_dict - table = tab_dict - return table + with open(f'{CURRENT_DIR}/rawdata/{TABLE_TOKEN}{table_id}.yaml') as tab: + tab_dict = yaml.safe_load(tab) + return tab_dict - def __extract_kinematics(self, table: dict): + def _generate_kinematics(self): """ - Extracts the kinematic variables of the single differential - distribution given a table. - - For each bin, it computes the max, min, and mid value of the transverse - momentum of the boson. - - Parameters - ---------- - table: dict - Dictionary containing the bins in the transverse momentum + The function generates the kinematics by reading and processing it from + the referenced table. Kinematics is processed in the format of a list of + dictionaries. The keys in each dictionaries specify the label (i.e. name) + for the kinematic variables. For this dataset, they are 'abs_eta' and 'm_W2'. + The labels are taken from the matadata file. The corresponding values are + 'min', 'mid', and 'max'. + + For this dataset, 'm_W2' is used in the computation of the (x,Q2)-map and + does not have any active role in the fit. For that reason, every bin has the + same value. Moreover, only the mid value is used. + """ + logging.info(f"Generating kinematics for CMS_{self.observable}...") - Return - ------ - List of bins containing min, max, and mid values for each of the kinematic - observables listed in the `kinematic_coverage` of the metadata file. + table_ID = self.metadata["tables"][0] + tab_dict = self._retrieve_table(table_ID) - """ - data = table['independent_variables'][0] - label = self.kin_labels + data = tab_dict['independent_variables'][0] + label = self.metadata['kinematic_coverage'] kinematics = [] - for bin in data['values']: - abs_eta_min = bin['low'] - abs_eta_max = bin['high'] + for eta_bin in data['values']: + abs_eta_max = eta_bin['high'] + abs_eta_min = eta_bin['low'] kin_bin = { label[0]: { 'min': abs_eta_min, @@ -116,61 +101,47 @@ def __extract_kinematics(self, table: dict): label[1]: {'min': None, 'mid': MW2, 'max': None}, } kinematics.append(kin_bin) - return kinematics - - def generate_kinematics(self): - """ - Function that generates the kinematics by looping over all the - tables specified in the metadata file. The resulting kinematics - is then saved to a yaml file. It relies on the method - `__extract_kinematics`. - """ - - logging.info(f"Generating kinematics for ATLAS_{self.observable}...") - - # Initialise kinematics list - kinematics = [] - ndata = 0 - table = self.metadata["tables"][0] - tab_dict = self.__retrieve_table(table) - kin = self.__extract_kinematics(tab_dict) - kinematics = np.concatenate([kinematics, kin]) - ndata += len(kin) # Check number of data agrees with metadata - try: - assert self.metadata['ndata'] is not None - assert self.metadata['ndata'] == ndata - except AssertionError as e: - logging.warning( - f"The number of data in the metafile is either wrong or unspecified." - f" The correct number is {ndata}. Please, update the metafile." + ndata = len(kinematics) + if not self.metadata['ndata'] == ndata: + raise ValueError( + f"Mismatch in 'ndata': expected {self.metadata['ndata']}, but got {ndata}" ) - return - return kinematics.tolist() + self.ndata = ndata + return kinematics - def generate_data_and_unc(self, mult_factor=1.0): + def _generate_data_and_unc(self): """ - Same as `generate_kinematics`, but for central data points. + Return a list with central data points and two additional lists with the corresponding + statistical and systematic uncertainties. For this dataset, uncertainties are always + symmetric. Uncertainties are given as absolute values. + + Note that, for the total x-sec, the correlation matrix is provided. The corresponding + covariance matrix is constructed in `_generate_covmat`. """ logging.info(f"Generating central data for CMS_{self.observable}...") dat_central = [] stat_unc = [] asy_sys_unc = [] - table = self.metadata['tables'][0] - tab_dict = self.__retrieve_table(table) + table_ID = self.metadata['tables'][0] + tab_dict = self._retrieve_table(table_ID) # Select data with pT > 25 GeV tab_dict = tab_dict['dependent_variables'][0]['values'] # Loop over bins for rap_bin in tab_dict: - dat_central.append(rap_bin['value'] * mult_factor) - stat_unc.append(rap_bin['errors'][0]['symerror'] * mult_factor) - asy_sys_unc.append(rap_bin['errors'][1]['symerror'] * mult_factor) + dat_central.append(rap_bin['value'] * self.mult_factor) + stat_unc.append(rap_bin['errors'][0]['symerror'] * self.mult_factor) + asy_sys_unc.append(rap_bin['errors'][1]['symerror'] * self.mult_factor) return dat_central, stat_unc, asy_sys_unc - def __build_unc_definitions(self): + def _build_unc_definitions(self): + """ + Build the dictionary containing the definitions of the uncertainties to be + used in the uncertainty data file. + """ unc_definitions = {} # Statistical uncertainty @@ -196,9 +167,22 @@ def __build_unc_definitions(self): return unc_definitions - def generate_covmat(self, diag_uncs=None): - table = self.metadata["tables"][1] - tab_dict = self.__retrieve_table(table) + def _generate_covmat(self, diag_uncs): + """ + Generate the covariance matrix for the total x-sec. This function requires + the diagonal systematic uncertainties as argument. The diagonal uncertainties + are used to construct the covariance matrix from the correlation matrix stored + in the HepData table. + + Note that such a correlation matrix exists for the total x-sec only, while the + ratio observable does not provide this information. + """ + if not self.observable == 'WPWM-TOT': + raise ValueError( + "The construction of the covariance matrix is defined for the total x-sec only." + ) + table_ID = self.metadata["tables"][1] + tab_dict = self._retrieve_table(table_ID) matlist = tab_dict['dependent_variables'][0]['values'] matlist = [d['value'] for d in matlist] covmat = np.zeros((self.ndata, self.ndata)) @@ -208,64 +192,69 @@ def generate_covmat(self, diag_uncs=None): return covmat def generate_data(self): - ''' - Collect central data, kinematics, and uncertainties and save them into - yaml files. - ''' + """ + The function collects central data, kinematics, and uncertainties ans save them + into yaml files. + + The systematic uncertainties are given as percentages relative the central data point. + The absolute value of the uncertainty is obtained from the central data point before + the shifts are applied. + """ # Get central data and kinematics - central_data, stat_unc, sys_unc = self.generate_data_and_unc(self.mult_factor) - kinematics = self.generate_kinematics() + central_data, stat_unc, sys_unc = self._generate_data_and_unc() + kinematics = self._generate_kinematics() # Uncertainty definitions - unc_definitions = self.__build_unc_definitions() + unc_definitions = self._build_unc_definitions() sys_artificial = [] # Initialize vector of artificial uncertainties if self.observable == 'WPWM-TOT': - covmat = self.generate_covmat(sys_unc) + # Generate covmat and perform eigen decomposition + covmat = self._generate_covmat(sys_unc) eigvals, eigvecs = np.linalg.eig(covmat) art_unc = np.sqrt(eigvals) * eigvecs # Loop over bins - for data_idx, data in enumerate(central_data): + for data_idx in range(len(central_data)): # Statistical uncertainty unc_dict = {STAT_LABEL: stat_unc[data_idx]} + + # Artificial systematic uncertainties for sys_idx, art_sys in enumerate(art_unc[data_idx, :]): unc_dict[f'{ART_LABEL}_{sys_idx+1}'] = float(art_sys) + + # Append to list sys_artificial.append(unc_dict) elif self.observable == 'WPWM-RATIO': - for data_idx, data in enumerate(central_data): + for data_idx in range(len(central_data)): # Statistical uncertainty unc_dict = {STAT_LABEL: stat_unc[data_idx]} # Systematic uncertainty unc_dict[f'{ART_LABEL}'] = sys_unc[data_idx] sys_artificial.append(unc_dict) - - # Local path for yaml files - path='./' # Save kinematics into file logging.info("Dumping kinematics to file...") kinematics_yaml = {'bins': kinematics} - with open(path + self.metadata['kinematics']['file'], 'w') as kin_out_file: - yaml.dump(kinematics_yaml, kin_out_file, sort_keys=False) + kins_file_name = self.metadata['kinematics']['file'] + with open(CURRENT_DIR + '/' + kins_file_name, 'w') as file: + yaml.dump(kinematics_yaml, file, sort_keys=False) logging.info("Done!") # Save central data into file logging.info("Dumping kinematics to file...") dat_central_yaml = {'data_central': central_data} - file_name = self.metadata['data_central'] - with open(path + file_name, 'w') as dat_out_file: - yaml.dump(dat_central_yaml, dat_out_file, sort_keys=False) + data_file_name = self.metadata['data_central'] + with open(CURRENT_DIR + '/' + data_file_name, 'w') as file: + yaml.dump(dat_central_yaml, file, sort_keys=False) logging.info("Done!") # Save unertainties logging.info("Dumping kinematics to file...") uncertainties_yaml = {'definitions': unc_definitions, 'bins': sys_artificial} - file_name = ( - self.metadata['data_uncertainties'][0] - ) - with open(path + file_name, 'w') as dat_out_file: - yaml.dump(uncertainties_yaml, dat_out_file, sort_keys=False) + unc_file_name = self.metadata['data_uncertainties'][0] + with open(CURRENT_DIR + '/' + unc_file_name, 'w') as file: + yaml.dump(uncertainties_yaml, file, sort_keys=False) logging.info("Done!")