diff --git a/doc/sphinx/source/tutorials/hessian2mc.rst b/doc/sphinx/source/tutorials/hessian2mc.rst new file mode 100644 index 0000000000..344bece3ab --- /dev/null +++ b/doc/sphinx/source/tutorials/hessian2mc.rst @@ -0,0 +1,33 @@ +.. _hessian2mc: +How to transform a Hessian set into the corresponding Monte Carlo PDF +===================================================================== + +A Hessian PDF set can be transformed into a Monte Carlo replica set using the +method described in Eq. (4.3) of `PDF4LHC21 `_ using a runcard +like the one below. +In this example ``Neig`` are the number of basis eigenvectors of the Hessian PDF. +``mc_pdf_name`` is the name of the new MC replica set that will be created. +``watt_thorne_rnd_seed`` is the random seed used to generate the MC replicas as shown in the equation below. + +.. math:: + + f^{MC, (k)}(x,Q) = f^{H}_{0}(x,Q) + \frac{1}{2} \sum_{j=1}^{Neig} \left(f^{H}_{j,+}(x,Q) - f^{H}_{j,-}(x,Q)\right) R_j^{(k)} + +where :math:`f^{MC, (k)}(x,Q)` is the :math:`k`-th Monte Carlo replica, :math:`f^{H}_{0}(x,Q)` the central Hessian member, +:math:`f^{H}_{j,\pm}(x,Q)` the positive and negative eigenvector directions corresponding to the :math:`j`-th Hessian eigenvector, and :math:`R_j^{(k)}` +are random standard normal numbers. + +The new MC replica set will be saved in the LHAPDF folder. + +.. code:: yaml + + pdf: MSTW2008nlo68cl + + mc_pdf_name: MSTW2008nlo68cl_mc + + num_members: 100 + + watt_thorne_rnd_seed: 1 + + actions_: + - write_hessian_to_mc_watt_thorne diff --git a/doc/sphinx/source/tutorials/index.rst b/doc/sphinx/source/tutorials/index.rst index a9902db814..c6c20abf63 100644 --- a/doc/sphinx/source/tutorials/index.rst +++ b/doc/sphinx/source/tutorials/index.rst @@ -47,6 +47,7 @@ Special PDF sets ./reproduce ./bundle-pdfs.rst ./mc2hessian.rst + ./hessian2mc.rst Miscellaneous ------------- diff --git a/validphys2/examples/hessian_2mc.yaml b/validphys2/examples/hessian_2mc.yaml new file mode 100644 index 0000000000..6a3cf4c895 --- /dev/null +++ b/validphys2/examples/hessian_2mc.yaml @@ -0,0 +1,16 @@ +meta: + author: Lazy Person + title: runcard for conversion of msht20 hessian set to MC + keywords: [hessian Monte carlo] + + +pdf: MSTW2008nlo68cl + +mc_pdf_name: MSTW2008nlo68cl_mc + +num_members: 100 + +watt_thorne_rnd_seed: 1 + +actions_: + - write_hessian_to_mc_watt_thorne diff --git a/validphys2/src/validphys/app.py b/validphys2/src/validphys/app.py index 8073566389..42cf0def31 100644 --- a/validphys2/src/validphys/app.py +++ b/validphys2/src/validphys/app.py @@ -52,6 +52,7 @@ "validphys.mc2hessian", "reportengine.report", "validphys.overfit_metric", + "validphys.hessian2mc", ] log = logging.getLogger(__name__) diff --git a/validphys2/src/validphys/checks.py b/validphys2/src/validphys/checks.py index 8ccecdec43..645c37bb88 100644 --- a/validphys2/src/validphys/checks.py +++ b/validphys2/src/validphys/checks.py @@ -42,6 +42,15 @@ def check_pdf_is_montecarlo_or_hessian(pdf, **kwargs): ) +@make_argcheck +def check_pdf_is_hessian(pdf, **kwargs): + etype = pdf.error_type + check( + etype in {'symmhessian', 'hessian'}, + f"Error type of PDF {pdf} must be 'symmhessian' or 'hessian' and not '{etype}'", + ) + + @make_argcheck def check_using_theory_covmat(use_theorycovmat): """Check that the `use_theorycovmat` is set to True""" diff --git a/validphys2/src/validphys/hessian2mc.py b/validphys2/src/validphys/hessian2mc.py new file mode 100644 index 0000000000..c9361c7156 --- /dev/null +++ b/validphys2/src/validphys/hessian2mc.py @@ -0,0 +1,145 @@ +""" +validphys.hessian2mc.py + +This module contains the functions that can be used to convert Hessian sets +like MSHT20 and CT18 to Monte Carlo sets. +The functions implemented here follow equations (4.3) of the paper arXiv:2203.05506 +""" + +import pathlib +import lhapdf +import os +import logging +import numpy as np + +from validphys.lhio import load_all_replicas, rep_matrix, write_replica +from validphys.checks import check_pdf_is_hessian + +log = logging.getLogger(__name__) + + +def write_new_lhapdf_info_file_from_previous_pdf( + path_old_pdfset, + name_old_pdfset, + path_new_pdfset, + name_new_pdfset, + num_members, + description_set="MC representation of hessian PDF set", + errortype="replicas", +): + """ + Writes a new LHAPDF set info file based on an existing set. + """ + + # write LHAPDF info file for a new pdf set + with open(path_old_pdfset / f"{name_old_pdfset}.info", "r") as in_stream, open( + path_new_pdfset / f"{name_new_pdfset}.info", "w" + ) as out_stream: + for l in in_stream.readlines(): + if l.find("SetDesc:") >= 0: + out_stream.write(f'SetDesc: "{description_set}"\n') + elif l.find("NumMembers:") >= 0: + out_stream.write(f"NumMembers: {num_members+1}\n") + elif l.find("ErrorType:") >= 0: + out_stream.write(f"ErrorType: {errortype}\n") + elif l.find("ErrorConfLevel") >= 0: + # remove ErrorConfLevel line + pass + else: + out_stream.write(l) + log.info(f"Info file written to {path_new_pdfset / f'{name_new_pdfset}.info'}") + + +def write_mc_watt_thorne_replicas(Rjk_std_normal, replicas_df, mc_pdf_path): + """ + Writes the Monte Carlo representation of a PDF set that is in Hessian form + using the Watt-Thorne prescription described in Eq. 4.3 of arXiv:2203.05506. + + Parameters + ---------- + Rjk_std_normal: np.ndarray + Array of shape (num_members, n_eig) containing random standard normal numbers. + + replicas_df: pd.DataFrame + DataFrame containing replicas of the hessian set at all scales. + + mc_pdf_path: pathlib.Path + Path to the new Monte Carlo PDF set. + """ + + for i, rnd_std_norm_vec in enumerate(Rjk_std_normal): + + # Odd eigenvectors: negative direction, even eigenvectors: positive direction + df_odd = replicas_df.loc[:, 2::2] + df_even = replicas_df.loc[:, 3::2] + new_column_names = range(1, len(df_even.columns) + 1) + + df_even.columns = new_column_names + df_odd.columns = new_column_names + + central_member, hess_diff_cov = replicas_df.loc[:, [1]], df_even - df_odd + + # Eq. 4.3 of arXiv:2203.05506 + mc_replica = central_member.dot([1]) + 0.5 * hess_diff_cov.dot(rnd_std_norm_vec) + + headers = f"PdfType: replica\nFormat: lhagrid1\nFromHessReplica: {i}\n" + write_replica(i + 1, mc_pdf_path, headers.encode("UTF-8"), mc_replica) + + # Write central replica from hessian set to mc set + headers = f"PdfType: replica\nFormat: lhagrid1\nFromHessReplica: {i}\n" + log.info(f"Writing central replica to {mc_pdf_path}") + write_replica(0, mc_pdf_path, headers.encode("UTF-8"), central_member) + + +@check_pdf_is_hessian +def write_hessian_to_mc_watt_thorne(pdf, mc_pdf_name, num_members, watt_thorne_rnd_seed=1): + """ + Writes the Monte Carlo representation of a PDF set that is in Hessian form + using the Watt-Thorne (MSHT20) prescription described in Eq. 4.3 of arXiv:2203.05506. + + Parameters + ---------- + pdf: validphys.core.PDF + The Hessian PDF set that is to be converted to Monte Carlo. + + mc_pdf_name: str + The name of the new Monte Carlo PDF set. + + """ + hessian_set = pdf + + lhapdf_path = pathlib.Path(lhapdf.paths()[-1]) + + # path to hessian lhapdf set + hessian_pdf_path = lhapdf_path / str(hessian_set) + + # path to new pdf set + mc_pdf_path = lhapdf_path / mc_pdf_name + + # create new pdf set folder in lhapdf path if it does not exist + if not mc_pdf_path.exists(): + os.makedirs(mc_pdf_path) + + # write LHAPDF info file for new pdf set + write_new_lhapdf_info_file_from_previous_pdf( + path_old_pdfset=hessian_pdf_path, + name_old_pdfset=str(hessian_set), + path_new_pdfset=mc_pdf_path, + name_new_pdfset=mc_pdf_name, + num_members=num_members, + description_set=f"MC representation of {str(hessian_set)}", + errortype="replicas", + ) + + # load replicas from basis set at all scales + _, grids = load_all_replicas(hessian_set) + replicas_df = rep_matrix(grids) + + # Eg for MSHT20, mem=0 => central value; mem=1-64 => 32 eigenvector sets (+/- directions) + n_eig = int((replicas_df.shape[1] - 1) / 2) + + np.random.seed(watt_thorne_rnd_seed) + Rjk_std_normal = np.random.standard_normal(size=(num_members, n_eig)) + + # write replicas to new pdf set + write_mc_watt_thorne_replicas(Rjk_std_normal, replicas_df, mc_pdf_path) diff --git a/validphys2/src/validphys/tests/test_hessian2mc.py b/validphys2/src/validphys/tests/test_hessian2mc.py new file mode 100644 index 0000000000..68f55fa695 --- /dev/null +++ b/validphys2/src/validphys/tests/test_hessian2mc.py @@ -0,0 +1,82 @@ +import numpy as np +import pandas as pd +from unittest import mock +from validphys.hessian2mc import write_mc_watt_thorne_replicas, write_hessian_to_mc_watt_thorne +import pathlib + + +@mock.patch("validphys.hessian2mc.write_replica") +@mock.patch("validphys.hessian2mc.log.info") +def test_write_mc_watt_thorne_replicas(mock_log_info, mock_write_replica): + # Set up mock inputs + n_eig = 5 + n_all_eig = 1 + 2 * n_eig # includes central member, so 1 + 2*n_eig + num_members = 50 # new MC members + num_nx_nq = 100 # xgrid and Q points + + replicas_df = pd.DataFrame( + np.random.standard_normal(size=(num_nx_nq, n_all_eig)), columns=range(1, n_all_eig + 1) + ) + + Rjk_std_normal = np.random.standard_normal(size=(num_members, n_eig)) + + # import IPython; IPython.embed() + mc_pdf_path = mock.Mock() # Mock the path + + # Call the function being tested + write_mc_watt_thorne_replicas(Rjk_std_normal, replicas_df, mc_pdf_path) + + # Verify that write_replica was called the correct number of times + assert ( + mock_write_replica.call_count == num_members + 1 + ) # for Rjk members + 1 for the central replica + + # Check if the log message was correct for the central replica + mock_log_info.assert_any_call(f"Writing central replica to {mc_pdf_path}") + + +@mock.patch("validphys.hessian2mc.write_mc_watt_thorne_replicas") +@mock.patch("validphys.hessian2mc.load_all_replicas") +@mock.patch("validphys.hessian2mc.rep_matrix") +@mock.patch("validphys.hessian2mc.write_new_lhapdf_info_file_from_previous_pdf") +@mock.patch("validphys.hessian2mc.os.makedirs") +@mock.patch("validphys.hessian2mc.lhapdf.paths") +def test_write_hessian_to_mc_watt_thorne( + mock_lhapdf_paths, + mock_makedirs, + mock_write_info_file, + mock_rep_matrix, + mock_load_all_replicas, + mock_write_mc_replicas, +): + # Set up mock inputs + msht_like_hessian_pdf = "MSHT20" + mc_pdf_name = "MSHT20_MC" + num_members = 100 + + mock_load_all_replicas.return_value = (None, None) + + mock_lhapdf_paths.return_value = [pathlib.Path("/path/to/lhapdf")] + + mock_rep_matrix.return_value = np.random.randn(5, 7) # Mocked replica matrix + + # Call the function being tested + write_hessian_to_mc_watt_thorne(msht_like_hessian_pdf, mc_pdf_name, num_members) + + # Verify that the necessary directories were created + mc_pdf_path = pathlib.Path("/path/to/lhapdf") / mc_pdf_name + mock_makedirs.assert_called_once_with(mc_pdf_path) + + # Verify that the LHAPDF info file was written + mock_write_info_file.assert_called_once_with( + path_old_pdfset=pathlib.Path("/path/to/lhapdf") / msht_like_hessian_pdf, + name_old_pdfset=msht_like_hessian_pdf, + path_new_pdfset=mc_pdf_path, + name_new_pdfset=mc_pdf_name, + num_members=num_members, + description_set=f"MC representation of {msht_like_hessian_pdf}", + errortype="replicas", + ) + + # Verify that the replicas were written + mock_write_mc_replicas.assert_called_once()