From 422f1d197b634ec168095a852d1b2a3cb962087a Mon Sep 17 00:00:00 2001 From: Tanjona Rabemananjara Date: Wed, 8 Jan 2025 11:47:04 +0300 Subject: [PATCH] Run `pre-commit` hooks --- .../commondata/DYE605_Z0_38P8GEV_DW/filter.py | 137 +++++++------- .../commondata/DYE866_Z0_800GEV/filter.py | 120 +++++++----- .../DYE866_Z0_800GEV_DW_RATIO/filter.py | 125 +++++++------ .../commondata/DYE906_Z0_120GEV_DW/filter.py | 176 +++++++++--------- 4 files changed, 299 insertions(+), 259 deletions(-) diff --git a/nnpdf_data/nnpdf_data/commondata/DYE605_Z0_38P8GEV_DW/filter.py b/nnpdf_data/nnpdf_data/commondata/DYE605_Z0_38P8GEV_DW/filter.py index 6a93e5a4a1..d25f9bf325 100644 --- a/nnpdf_data/nnpdf_data/commondata/DYE605_Z0_38P8GEV_DW/filter.py +++ b/nnpdf_data/nnpdf_data/commondata/DYE605_Z0_38P8GEV_DW/filter.py @@ -1,99 +1,104 @@ -from nnpdf_data.filter_utils.hera_utils import commondata, covmat_is_close -from pathlib import Path from dataclasses import dataclass +from os import PathLike +from pathlib import Path import typing from typing import List + import numpy as np import pandas as pd -from os import PathLike import yaml +from nnpdf_data.filter_utils.hera_utils import commondata, covmat_is_close + + def mergetables() -> pd.DataFrame: - table_paths = [] - for i in range(1,8): - table_paths.append(Path(f"./rawdata/Table{i}.csv")) + table_paths = [] + for i in range(1, 8): + table_paths.append(Path(f"./rawdata/Table{i}.csv")) + + # List with the rapidity bins for tables 1 to 7. + yrap = [-0.2, -0.1, 0.0, 0.1, 0.2, 0.3, 0.4] - # List with the rapidity bins for tables 1 to 7. - yrap = [-0.2, -0.1, 0.0, 0.1, 0.2, 0.3, 0.4] + col_names = ["M2", "dsig", "statp", "statm", "normp", "normm", "sysp", "sysm"] + col_names_all = col_names + ["y", "sqrts"] - col_names = ["M2","dsig","statp","statm","normp","normm","sysp","sysm"] - col_names_all = col_names + ["y", "sqrts"] + combined_df = pd.DataFrame(columns=col_names_all) + for i, path in enumerate(table_paths): + df = pd.read_csv(path, header=11, names=col_names) + df["y"] = yrap[i] + df["sqrts"] = 38.8 + df = df[pd.to_numeric(df['dsig'], errors='coerce').notnull()] + combined_df = pd.concat([combined_df, df], ignore_index=True) - combined_df = pd.DataFrame(columns=col_names_all) - for i, path in enumerate(table_paths): - df = pd.read_csv(path, header=11, names=col_names) - df["y"]=yrap[i] - df["sqrts"]=38.8 - df = df[pd.to_numeric(df['dsig'], errors='coerce').notnull()] - combined_df = pd.concat([combined_df,df],ignore_index=True) + # In the table we have sqrt(tau) not M2; compute M2=tau*s + combined_df["M2"] = (combined_df["M2"] * 38.8) ** 2 - # In the table we have sqrt(tau) not M2; compute M2=tau*s - combined_df["M2"] = (combined_df["M2"]*38.8)**2 + return combined_df - return combined_df def nuclear_uncert_dw(tableN: PathLike, tablep: PathLike): - dfN = pd.read_table(tableN) - dfp = pd.read_table(tablep) - return dfN, dfp + dfN = pd.read_table(tableN) + dfp = pd.read_table(tablep) + return dfN, dfp + @dataclass class E605_commondata(commondata): - def __init__(self, data: pd.DataFrame, dataset_name: str, process: str): + def __init__(self, data: pd.DataFrame, dataset_name: str, process: str): - # Kinematic quantities. - self.central_values = data["dsig"].astype(float).to_numpy() - self.kinematics = data[["y", "M2", "sqrts"]].astype(float).to_numpy() - self.kinematic_quantities = ["y", "M2", "sqrts"] + # Kinematic quantities. + self.central_values = data["dsig"].astype(float).to_numpy() + self.kinematics = data[["y", "M2", "sqrts"]].astype(float).to_numpy() + self.kinematic_quantities = ["y", "M2", "sqrts"] - # Statistical uncertainties. - self.statistical_uncertainties = data["statp"] + # Statistical uncertainties. + self.statistical_uncertainties = data["statp"] - # the overall 10% statistical uncertainty is treated as - # additive, while normalisation uncertainty is always treated - # multiplicatively - syst = pd.DataFrame(0.1 * self.central_values) + # the overall 10% statistical uncertainty is treated as + # additive, while normalisation uncertainty is always treated + # multiplicatively + syst = pd.DataFrame(0.1 * self.central_values) - # Systematic uncertainties. - syst["norm"] = (self.central_values - *data["normp"].str.strip("%").astype(float)/100) + # Systematic uncertainties. + syst["norm"] = self.central_values * data["normp"].str.strip("%").astype(float) / 100 + # self.systematic_uncertainties = np.dstack((stat,norm))[0] + self.systypes = [("ADD", "UNCORR"), ("MULT", "CORR")] - #self.systematic_uncertainties = np.dstack((stat,norm))[0] - self.systypes = [("ADD","UNCORR"),("MULT", "CORR")] + # Compute the point-to-point uncertainties + nrep = 999 + norm = np.sqrt(nrep) + dfN, dfp = nuclear_uncert_dw( + "rawdata/nuclear/output/tables/group_result_table.csv", + "rawdata/proton_ite/output/tables/group_result_table.csv", + ) - # Compute the point-to-point uncertainties - nrep=999 - norm=np.sqrt(nrep) - dfN, dfp = nuclear_uncert_dw("rawdata/nuclear/output/tables/group_result_table.csv", - "rawdata/proton_ite/output/tables/group_result_table.csv") + for rep in range(1, nrep + 1): + Delta = (dfN[f"rep_{rep:05d}"] - dfp["theory_central"]) / norm + syst[f"NUCLEAR{rep:05d}"] = Delta + self.systypes.append(("ADD", f"NUCLEAR{rep:05d}")) - for rep in range(1,nrep+1): - Delta = (dfN[f"rep_{rep:05d}"]-dfp["theory_central"])/norm - syst[f"NUCLEAR{rep:05d}"]=Delta - self.systypes.append(("ADD", f"NUCLEAR{rep:05d}")) + self.systematic_uncertainties = syst.to_numpy() - self.systematic_uncertainties = syst.to_numpy() + self.process = process + self.dataset_name = dataset_name - self.process = process - self.dataset_name = dataset_name def main(): - data = mergetables() - # First create the commondata variant without the nuclear uncertainties. - DYE605 = E605_commondata(data, "DYE605_Z0_38P8GEV", "Z0") - DYE605.write_new_commondata(Path("data_reimplemented_PXSEC.yaml"), - Path("kinematics_reimplemented_PXSEC.yaml"), - Path("uncertainties_reimplemented_PXSEC.yaml")) - if(covmat_is_close("DYE605_Z0_38P8GEV_DW_PXSEC", "legacy", "reimplemented")): - print("covmat is close") - else: - print("covmat is different.") - -if __name__ == "__main__": - main() - - + data = mergetables() + # First create the commondata variant without the nuclear uncertainties. + DYE605 = E605_commondata(data, "DYE605_Z0_38P8GEV", "Z0") + DYE605.write_new_commondata( + Path("data_reimplemented_PXSEC.yaml"), + Path("kinematics_reimplemented_PXSEC.yaml"), + Path("uncertainties_reimplemented_PXSEC.yaml"), + ) + if covmat_is_close("DYE605_Z0_38P8GEV_DW_PXSEC", "legacy", "reimplemented"): + print("covmat is close") + else: + print("covmat is different.") +if __name__ == "__main__": + main() diff --git a/nnpdf_data/nnpdf_data/commondata/DYE866_Z0_800GEV/filter.py b/nnpdf_data/nnpdf_data/commondata/DYE866_Z0_800GEV/filter.py index b0db5bf59b..1b505606a0 100644 --- a/nnpdf_data/nnpdf_data/commondata/DYE866_Z0_800GEV/filter.py +++ b/nnpdf_data/nnpdf_data/commondata/DYE866_Z0_800GEV/filter.py @@ -1,76 +1,94 @@ -from nnpdf_data.filter_utils.hera_utils import commondata, covmat_is_close -from pathlib import Path from dataclasses import dataclass +from os import PathLike +from pathlib import Path import typing from typing import List + import numpy as np import pandas as pd -from os import PathLike import yaml +from nnpdf_data.filter_utils.hera_utils import commondata, covmat_is_close + + def readdata() -> pd.DataFrame: - col_names = ["xF","Mmin","Mmax","Mavg","xFavg","pt","dsig","stat","syst","kfact","rsig","rstat","rsyst"] - table_path = Path(f"./rawdata/table.csv") - df = pd.read_csv(table_path,names=col_names) - return df + col_names = [ + "xF", + "Mmin", + "Mmax", + "Mavg", + "xFavg", + "pt", + "dsig", + "stat", + "syst", + "kfact", + "rsig", + "rstat", + "rsyst", + ] + table_path = Path(f"./rawdata/table.csv") + df = pd.read_csv(table_path, names=col_names) + return df + @dataclass class E866commondata(commondata): - def __init__(self, data: pd.DataFrame, dataset_name: str, process: str): + def __init__(self, data: pd.DataFrame, dataset_name: str, process: str): - # Definitions, compute Jacobian, get dsig/dy/dM - M = (data["Mmax"]+data["Mmin"])/2 - M2=M*M - sqrts=M/M*38.8 - s=sqrts**2 - tau=M**2/s - tau=tau.to_numpy() - xF=data["xF"] - y=np.arcsinh(xF/np.sqrt(tau)/2) - jac=np.sqrt(xF**2+4*tau) - dsigdydM = data["dsig"]*jac + # Definitions, compute Jacobian, get dsig/dy/dM + M = (data["Mmax"] + data["Mmin"]) / 2 + M2 = M * M + sqrts = M / M * 38.8 + s = sqrts**2 + tau = M**2 / s + tau = tau.to_numpy() + xF = data["xF"] + y = np.arcsinh(xF / np.sqrt(tau) / 2) + jac = np.sqrt(xF**2 + 4 * tau) + dsigdydM = data["dsig"] * jac + # Set the central values + self.central_values = dsigdydM.astype(float).to_numpy() - # Set the central values - self.central_values = dsigdydM.astype(float).to_numpy() + # Pick the the kinematic quantities + kin = pd.concat([y, M2, sqrts], axis=1) + kin = kin.set_axis(["y", "M2", "sqrts"], axis=1) + self.kinematics = kin.astype(float).to_numpy() + self.kinematic_quantities = ["y", "M2", "sqrts"] - # Pick the the kinematic quantities - kin=pd.concat([y,M2,sqrts],axis=1) - kin=kin.set_axis(["y","M2","sqrts"],axis=1) - self.kinematics = kin.astype(float).to_numpy() - self.kinematic_quantities = ["y", "M2", "sqrts"] + # Statistical uncertainties. + self.statistical_uncertainties = data["stat"] * jac - # Statistical uncertainties. - self.statistical_uncertainties = data["stat"]*jac + # Systematic uncertainty + syst = data["syst"] * jac - # Systematic uncertainty - syst = data["syst"]*jac + # Normalisation uncertainty of 6.5% from beam intensity calibration. + norm = 6.5 / 100 + norm = norm * self.central_values - # Normalisation uncertainty of 6.5% from beam intensity calibration. - norm = 6.5/100 - norm = norm * self.central_values + self.systematic_uncertainties = np.dstack((syst, norm))[0] + self.systypes = [("ADD", "UNCORR"), ("MULT", "CORR")] - self.systematic_uncertainties = np.dstack((syst,norm))[0] - self.systypes = [("ADD", "UNCORR"),("MULT","CORR")] + self.process = process + self.dataset_name = dataset_name - self.process = process - self.dataset_name = dataset_name def main(): - data = readdata() - # First create the commondata variant without the nuclear uncertainties. - DYE866 = E866commondata(data, "DYE866_Z0", "Z0") - DYE866.write_new_commondata(Path("data_reimplemented_PXSEC.yaml"), - Path("kinematics_reimplemented_PXSEC.yaml"), - Path("uncertainties_reimplemented_PXSEC.yaml")) - - if(covmat_is_close("DYE866_Z0_800GEV_PXSEC", "legacy", "reimplemented")): - print("covmat is close") - else: - print("covmat is different.") -if __name__ == "__main__": - main() - + data = readdata() + # First create the commondata variant without the nuclear uncertainties. + DYE866 = E866commondata(data, "DYE866_Z0", "Z0") + DYE866.write_new_commondata( + Path("data_reimplemented_PXSEC.yaml"), + Path("kinematics_reimplemented_PXSEC.yaml"), + Path("uncertainties_reimplemented_PXSEC.yaml"), + ) + if covmat_is_close("DYE866_Z0_800GEV_PXSEC", "legacy", "reimplemented"): + print("covmat is close") + else: + print("covmat is different.") +if __name__ == "__main__": + main() diff --git a/nnpdf_data/nnpdf_data/commondata/DYE866_Z0_800GEV_DW_RATIO/filter.py b/nnpdf_data/nnpdf_data/commondata/DYE866_Z0_800GEV_DW_RATIO/filter.py index ca82e7b83b..87628cb5bc 100644 --- a/nnpdf_data/nnpdf_data/commondata/DYE866_Z0_800GEV_DW_RATIO/filter.py +++ b/nnpdf_data/nnpdf_data/commondata/DYE866_Z0_800GEV_DW_RATIO/filter.py @@ -1,78 +1,85 @@ -from nnpdf_data.filter_utils.hera_utils import commondata, covmat_is_close -from pathlib import Path from dataclasses import dataclass +from os import PathLike +from pathlib import Path import typing from typing import List + import numpy as np import pandas as pd -from os import PathLike import yaml +from nnpdf_data.filter_utils.hera_utils import commondata, covmat_is_close + + def readdata() -> pd.DataFrame: - col_names = ["x2","xlow","xhigh","xf","pt","M","sig","errp","errm"] - table_path = Path(f"./rawdata/table11.csv") - df = pd.read_csv(table_path,header=12,names=col_names, nrows=15) - return df + col_names = ["x2", "xlow", "xhigh", "xf", "pt", "M", "sig", "errp", "errm"] + table_path = Path(f"./rawdata/table11.csv") + df = pd.read_csv(table_path, header=12, names=col_names, nrows=15) + return df + def nuclear_uncert_dw(tableN: PathLike, tablep: PathLike): - dfN = pd.read_table(tableN) - dfp = pd.read_table(tablep) - return dfN, dfp + dfN = pd.read_table(tableN) + dfp = pd.read_table(tablep) + return dfN, dfp + @dataclass class E866_DW_RATIO_commondata(commondata): - def __init__(self, data: pd.DataFrame, dataset_name: str, process: str): - - # Kinematic quantities. - self.central_values = data["sig"].astype(float).to_numpy() - x2 = 0.5*(data["xhigh"]+data["xlow"]) - x1 = data["xf"]+data["x2"] - s=x2/x2*38.8*38.8 - tau = data["M"]**2/38.8**2 - y = np.log((data["M"]/np.sqrt(s))/data["x2"]) - kin=pd.concat([y,data["M"]**2,np.sqrt(s)],axis=1) - kin=kin.set_axis(["y","M2","s"],axis=1) - self.kinematics = kin.astype(float).to_numpy() - self.kinematic_quantities = ["y", "M2", "sqrts"] - - # Statistical uncertainties. - self.statistical_uncertainties = data["errp"].astype(float).to_numpy() - - # Systematic uncertainties. Taken from Phys. Rev. D. Vol. 64, 052002 table 10 - syst = 0.97/100 - syst = pd.DataFrame(syst * data["sig"]) - systypes = [("ADD", "CORR")] - - # Compute the point-to-point uncertainties - nrep=100 - norm=np.sqrt(nrep) - dfN, dfp = nuclear_uncert_dw("rawdata/nuclear_ite/output/tables/group_result_table.csv", - "rawdata/proton_ite/output/tables/group_result_table.csv") - for rep in range(1,nrep+1): - Delta = (dfN[f"rep_{rep:05d}"]-dfp["theory_central"])/norm - syst[f"NUCLEAR{rep:03d}"]=Delta - systypes.append(("ADD", f"NUCLEAR{rep:03d}")) - - self.systematic_uncertainties = syst.astype(float).to_numpy() - self.systypes = systypes - self.process = process - self.dataset_name = dataset_name + def __init__(self, data: pd.DataFrame, dataset_name: str, process: str): -def main(): - data = readdata() - # First create the commondata variant without the nuclear uncertainties. - DYE866 = E866_DW_RATIO_commondata(data, "DYE866_Z0_DW_RATIO", "Z0") - DYE866.write_new_commondata(Path("data_reimplemented_PDXSECRATIO.yaml"), - Path("kinematics_reimplemented_PDXSECRATIO.yaml"), - Path("uncertainties_reimplemented_PDXSECRATIO.yaml")) - if(covmat_is_close("DYE866_Z0_800GEV_DW_RATIO_PDXSECRATIO", "legacy", "reimplemented")): - print("covmat is close") - else: - print("covmat is different.") + # Kinematic quantities. + self.central_values = data["sig"].astype(float).to_numpy() + x2 = 0.5 * (data["xhigh"] + data["xlow"]) + x1 = data["xf"] + data["x2"] + s = x2 / x2 * 38.8 * 38.8 + tau = data["M"] ** 2 / 38.8**2 + y = np.log((data["M"] / np.sqrt(s)) / data["x2"]) + kin = pd.concat([y, data["M"] ** 2, np.sqrt(s)], axis=1) + kin = kin.set_axis(["y", "M2", "s"], axis=1) + self.kinematics = kin.astype(float).to_numpy() + self.kinematic_quantities = ["y", "M2", "sqrts"] -if __name__ == "__main__": - main() + # Statistical uncertainties. + self.statistical_uncertainties = data["errp"].astype(float).to_numpy() + + # Systematic uncertainties. Taken from Phys. Rev. D. Vol. 64, 052002 table 10 + syst = 0.97 / 100 + syst = pd.DataFrame(syst * data["sig"]) + systypes = [("ADD", "CORR")] + # Compute the point-to-point uncertainties + nrep = 100 + norm = np.sqrt(nrep) + dfN, dfp = nuclear_uncert_dw( + "rawdata/nuclear_ite/output/tables/group_result_table.csv", + "rawdata/proton_ite/output/tables/group_result_table.csv", + ) + for rep in range(1, nrep + 1): + Delta = (dfN[f"rep_{rep:05d}"] - dfp["theory_central"]) / norm + syst[f"NUCLEAR{rep:03d}"] = Delta + systypes.append(("ADD", f"NUCLEAR{rep:03d}")) + self.systematic_uncertainties = syst.astype(float).to_numpy() + self.systypes = systypes + self.process = process + self.dataset_name = dataset_name +def main(): + data = readdata() + # First create the commondata variant without the nuclear uncertainties. + DYE866 = E866_DW_RATIO_commondata(data, "DYE866_Z0_DW_RATIO", "Z0") + DYE866.write_new_commondata( + Path("data_reimplemented_PDXSECRATIO.yaml"), + Path("kinematics_reimplemented_PDXSECRATIO.yaml"), + Path("uncertainties_reimplemented_PDXSECRATIO.yaml"), + ) + if covmat_is_close("DYE866_Z0_800GEV_DW_RATIO_PDXSECRATIO", "legacy", "reimplemented"): + print("covmat is close") + else: + print("covmat is different.") + + +if __name__ == "__main__": + main() diff --git a/nnpdf_data/nnpdf_data/commondata/DYE906_Z0_120GEV_DW/filter.py b/nnpdf_data/nnpdf_data/commondata/DYE906_Z0_120GEV_DW/filter.py index c2e27b2241..2fa38efe95 100644 --- a/nnpdf_data/nnpdf_data/commondata/DYE906_Z0_120GEV_DW/filter.py +++ b/nnpdf_data/nnpdf_data/commondata/DYE906_Z0_120GEV_DW/filter.py @@ -1,107 +1,117 @@ -from nnpdf_data.filter_utils.hera_utils import commondata, covmat_is_close -from pathlib import Path from dataclasses import dataclass +from os import PathLike +from pathlib import Path import typing from typing import List + import numpy as np import pandas as pd -from os import PathLike import yaml + +from nnpdf_data.filter_utils.hera_utils import commondata, covmat_is_close + # TODO the same function might be placed differently depending on the version of the code. try: - from validphys.newcommondata_utils import covmat_to_artunc + from validphys.newcommondata_utils import covmat_to_artunc except ModuleNotFoundError: - from nnpdf_data.filter_utils.correlations import covmat_to_artunc + from nnpdf_data.filter_utils.correlations import covmat_to_artunc + def readdata() -> pd.DataFrame: - col_names = ["xtlow", "xtup", "xt", "xb", "M", "pt", "sigr", "stat", "sys", "dx"] - table_path = Path(f"./rawdata/table.csv") - df = pd.read_csv(table_path, names=col_names,header=0) - return df + col_names = ["xtlow", "xtup", "xt", "xb", "M", "pt", "sigr", "stat", "sys", "dx"] + table_path = Path(f"./rawdata/table.csv") + df = pd.read_csv(table_path, names=col_names, header=0) + return df + def nuclear_uncert_dw(tableN: PathLike, tablep: PathLike): - dfN = pd.read_table(tableN) - dfp = pd.read_table(tablep) - return dfN, dfp + dfN = pd.read_table(tableN) + dfp = pd.read_table(tablep) + return dfN, dfp + def get_covmat_list(covmat_filename: PathLike): - covmat = np.loadtxt(covmat_filename, delimiter=",") - return covmat + covmat = np.loadtxt(covmat_filename, delimiter=",") + return covmat + def is_symmetric(mat: np.ndarray) -> bool: - return np.all(np.isclose(mat, mat.T)) + return np.all(np.isclose(mat, mat.T)) + @dataclass class E906_DW_RATIO_commondata(commondata): - def __init__(self, data: pd.DataFrame, dataset_name: str, process: str): - - self.central_values = data["sigr"].astype(float).to_numpy() - # Kinematic quantities. - y = 0.5*np.log(data["xb"]/data["xt"]) - Ebeam = 120 - MP = 0.938 - s = 2*MP*(Ebeam+MP)*y/y - - kin=pd.concat([y,data["M"]**2,s],axis=1) - kin=kin.set_axis(["y","M2","s"],axis=1) - self.kinematics = kin.astype(float).to_numpy() - self.kinematic_quantities = ["y", "M2", "sqrts"] - - # Statistical uncertainties. - self.statistical_uncertainties = np.zeros(len(data["stat"])) - - # Systematic uncertainties. - syst = pd.DataFrame(data["sys"]) - syst = syst.set_axis(["sys_unc_0"], axis=1) - systypes = [("ADD", "CORR")] - - # Compute the artificial uncertainties from the - # covariance matrix found in equation 9 of - # https://arxiv.org/abs/2103.04024 - covmat = get_covmat_list("rawdata/covmat.csv") - if(not is_symmetric(covmat)): - raise ValueError("The covariance matrix is not symmetric.") - artunc = pd.DataFrame(covmat_to_artunc(len(covmat), - covmat.flatten().tolist())) - artunc_names = [] - for i in range(1,len(artunc)+1): artunc_names.append(f"sys_unc_{i}") - artunc = artunc.set_axis(artunc_names, axis=1) - - for _, col_name in enumerate(artunc): - systypes.append(("ADD","CORR")) - syst[col_name]=artunc[col_name] - - # Compute the point-to-point uncertainties - nrep = 100 - norm = np.sqrt(nrep) - norm = 100 - dfN, dfp = nuclear_uncert_dw("rawdata/nuclear_ite/output/tables/group_result_table.csv", - "rawdata/proton_ite/output/tables/group_result_table.csv") - for rep in range(1,nrep+1): - Delta = (dfN[f"rep_{rep:05d}"]-dfp["theory_central"])/norm - syst[f"NUCLEAR{rep:03d}"]=Delta - systypes.append(("ADD", f"NUCLEAR{rep:03d}")) - - self.systematic_uncertainties = syst.astype(float).to_numpy() - self.systypes = systypes - self.process = process - self.dataset_name = dataset_name - -def main(): - data = readdata() - # First create the commondata variant without the nuclear uncertainties. - DYE906 = E906_DW_RATIO_commondata(data, "DYE906_Z0", "Z0") - DYE906.write_new_commondata(Path("data_reimplemented_PDXSECRATIO.yaml"), - Path("kinematics_reimplemented_PDXSECRATIO.yaml"), - Path("uncertainties_reimplemented_PDXSECRATIO.yaml")) - if(covmat_is_close("DYE906_Z0_120GEV_DW_PDXSECRATIO", "legacy", "reimplemented")): - print("covmat is close") - else: - print("covmat is different.") - -if __name__ == "__main__": - main() + def __init__(self, data: pd.DataFrame, dataset_name: str, process: str): + + self.central_values = data["sigr"].astype(float).to_numpy() + # Kinematic quantities. + y = 0.5 * np.log(data["xb"] / data["xt"]) + Ebeam = 120 + MP = 0.938 + s = 2 * MP * (Ebeam + MP) * y / y + + kin = pd.concat([y, data["M"] ** 2, s], axis=1) + kin = kin.set_axis(["y", "M2", "s"], axis=1) + self.kinematics = kin.astype(float).to_numpy() + self.kinematic_quantities = ["y", "M2", "sqrts"] + + # Statistical uncertainties. + self.statistical_uncertainties = np.zeros(len(data["stat"])) + + # Systematic uncertainties. + syst = pd.DataFrame(data["sys"]) + syst = syst.set_axis(["sys_unc_0"], axis=1) + systypes = [("ADD", "CORR")] + + # Compute the artificial uncertainties from the + # covariance matrix found in equation 9 of + # https://arxiv.org/abs/2103.04024 + covmat = get_covmat_list("rawdata/covmat.csv") + if not is_symmetric(covmat): + raise ValueError("The covariance matrix is not symmetric.") + artunc = pd.DataFrame(covmat_to_artunc(len(covmat), covmat.flatten().tolist())) + artunc_names = [] + for i in range(1, len(artunc) + 1): + artunc_names.append(f"sys_unc_{i}") + artunc = artunc.set_axis(artunc_names, axis=1) + + for _, col_name in enumerate(artunc): + systypes.append(("ADD", "CORR")) + syst[col_name] = artunc[col_name] + + # Compute the point-to-point uncertainties + nrep = 100 + norm = np.sqrt(nrep) + norm = 100 + dfN, dfp = nuclear_uncert_dw( + "rawdata/nuclear_ite/output/tables/group_result_table.csv", + "rawdata/proton_ite/output/tables/group_result_table.csv", + ) + for rep in range(1, nrep + 1): + Delta = (dfN[f"rep_{rep:05d}"] - dfp["theory_central"]) / norm + syst[f"NUCLEAR{rep:03d}"] = Delta + systypes.append(("ADD", f"NUCLEAR{rep:03d}")) + + self.systematic_uncertainties = syst.astype(float).to_numpy() + self.systypes = systypes + self.process = process + self.dataset_name = dataset_name +def main(): + data = readdata() + # First create the commondata variant without the nuclear uncertainties. + DYE906 = E906_DW_RATIO_commondata(data, "DYE906_Z0", "Z0") + DYE906.write_new_commondata( + Path("data_reimplemented_PDXSECRATIO.yaml"), + Path("kinematics_reimplemented_PDXSECRATIO.yaml"), + Path("uncertainties_reimplemented_PDXSECRATIO.yaml"), + ) + if covmat_is_close("DYE906_Z0_120GEV_DW_PDXSECRATIO", "legacy", "reimplemented"): + print("covmat is close") + else: + print("covmat is different.") +if __name__ == "__main__": + main()