From a2ef2f0cb369e935dc424b40120502d1fb6d2b1b Mon Sep 17 00:00:00 2001
From: Abelardo Moralejo Olaizola <moralejo@ifae.es>
Date: Wed, 27 Nov 2024 10:47:25 +0100
Subject: [PATCH 01/33] New feature to perform interpolation of RF predictions
 vs. cos(zenith) to avoid jumps when using RFs trained on a discrete set of
 pointings

---
 lstchain/reco/dl1_to_dl2.py             | 220 +++++++++++++++++++++++-
 lstchain/scripts/lstchain_dl1_to_dl2.py |  28 ++-
 2 files changed, 235 insertions(+), 13 deletions(-)

diff --git a/lstchain/reco/dl1_to_dl2.py b/lstchain/reco/dl1_to_dl2.py
index dd4c76195d..b80687351e 100644
--- a/lstchain/reco/dl1_to_dl2.py
+++ b/lstchain/reco/dl1_to_dl2.py
@@ -7,6 +7,7 @@
 "import dl1_to_dl2"
 """
 
+import glob
 import os
 import logging
 
@@ -14,7 +15,7 @@
 import joblib
 import numpy as np
 import pandas as pd
-from astropy.coordinates import SkyCoord, Angle
+from astropy.coordinates import SkyCoord, Angle, angular_separation
 from astropy.time import Time
 from pathlib import Path
 from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
@@ -38,10 +39,13 @@
 logger = logging.getLogger(__name__)
 
 __all__ = [
+    'add_zd_interpolation_info',
     'apply_models',
     'build_models',
     'get_expected_source_pos',
     'get_source_dependent_parameters',
+    'get_training_directions',
+    'predict_with_zd_interpolation',
     'train_disp_norm',
     'train_disp_sign',
     'train_disp_vector',
@@ -52,6 +56,172 @@
 ]
 
 
+def get_training_directions(training_dir):
+    """
+    This function obtains the pointings of the telescope in the RF training
+    sample.
+
+    Parameters:
+    -----------
+    training_dir: pathlib.Path, path to the directory under which the folders
+    containing the DL1 files used in the training are found.
+    The folders' names are assumed to follow this pattern:
+
+    node_corsika_theta_34.367_az_69.537_
+
+    (theta is zenith; az is azimuth. Units are degrees, note that the values'
+    field lengths are not fixed)
+
+    Returns: training_az_deg, training_zd_deg arrays containing the azimuth and
+    zenith of the training nodes (in degrees)
+
+    """
+
+    dirs = glob.glob(str(training_dir) + '/node_corsika*')
+
+    training_zd_deg = []
+    training_az_deg = []
+
+    for dir in dirs:
+        c1 = dir.find('_theta_') + 7
+        c2 = dir.find('_az_', c1) + 4
+        c3 = dir.find('_', c2)
+        training_zd_deg.append(float(dir[c1:c2 - 4]))
+        training_az_deg.append(float(dir[c2:c3]))
+
+    training_zd_deg = np.array(training_zd_deg)
+    training_az_deg = np.array(training_az_deg)
+    # The order of the pointings is irrelevant
+
+    return training_az_deg, training_zd_deg
+
+
+def add_zd_interpolation_info(dl2table, training_zd_deg, training_az_deg):
+    """
+    Compute necessary parameters for the interpolation of RF predictions
+    between the zenith pointings of the MC data in the training sample on
+    which the RFs were trained.
+
+
+    Parameters:
+    -----------
+    dl2table: pandas dataframe. Four columns will be added: alt0, alt1, w0, w1
+    alt0 and alt1 are the alt_tel (in rad) values (telescope elevation) for
+    the closest and second-closest training MC pointing for each event in the
+    table. The values w0 and w1 are the corresponding weights that, multiplied
+    by the RF predictions at those two pointings, provide the interpolated
+    result for each event's pointing
+
+    training_zd_deg: array containing the zenith distances (in deg) for the
+    MC training nodes
+    training_az_deg: array containing the azimuth angles (in deg) for the
+    MC training nodes (a given index in bith arrays corresponds to a given MC
+    pointing)
+
+    """
+
+    pd.options.mode.copy_on_write = True
+
+    alt_tel = dl2table['alt_tel']
+    az_tel  = dl2table['az_tel']
+
+    training_alt_rad = np.pi / 2 - np.deg2rad(training_zd_deg)
+    training_az_rad = np.deg2rad(training_az_deg)
+
+    tiled_az = np.tile(az_tel,
+                       len(training_az_rad)).reshape(len(training_az_rad),
+                                                     len(dl2table)).T
+    tiled_alt = np.tile(alt_tel,
+                       len(training_alt_rad)).reshape(len(training_alt_rad),
+                                                      len(dl2table)).T
+
+    angular_distance = angular_separation(training_az_rad, training_alt_rad,
+                                          tiled_az, tiled_alt)
+    # indices ordered in angular distance to each event:
+    sorted_indices = np.argsort(angular_distance, axis=1)
+
+    closest_alt = training_alt_rad[sorted_indices[:, 0]]
+    second_closest_alt = training_alt_rad[sorted_indices[:, 1]]
+
+    c0 = np.cos(np.pi / 2 - closest_alt)
+    c1 = np.cos(np.pi / 2 - second_closest_alt)
+    cos_tel_zd = np.cos(np.pi / 2 - alt_tel)
+
+    # Compute the weights that multiplied times the RF predictions at the
+    # closest (0) and 2nd-closest (1) nodes result in the interpolated value:
+    w0 = 1 - (cos_tel_zd - c0) / (c1 - c0)
+    w1 = (cos_tel_zd - c0) / (c1 - c0)
+
+    # Update the dataframe:
+    dl2table = dl2table.assign(alt0=pd.Series(closest_alt).values,
+                               alt1=pd.Series(second_closest_alt).values,
+                               w0=pd.Series(w0).values,
+                               w1=pd.Series(w1).values)
+
+    return dl2table, ['alt0', 'alt1', 'w0', 'w1']
+
+
+def predict_with_zd_interpolation(rf, param_array, features):
+    """
+    Obtain a RF prediction which takes into account the difference between
+    the telescope elevation (alt_tel, i.e. 90 deg - zenith) and those of the
+    MC training nodes. The dependence of image parameters (for a shower of
+    given characteristics) with zenith is strong at angles beyond ~50 deg,
+    due to the change in airmass. Given the way Random Forests work, if the
+    training is performed with a discrete distribution of pointings,
+    the prediction of the RF will be biased for pointings in between those
+    used in training. If zenith is used as one of the RF features, there will
+    be a sudden jump in the predictions halfway between the training nodes.
+
+    To solve this, we compute here two predictions for each event, one using
+    the elevation (alt_tel) of the training pointing which is closest to the
+    telescope pointing, and another one usimg the elevation of the
+    sceond-closest pointing. Then the values are interpolated (linearly in
+    cos(zenith)) to the actual zenith pointing (90 deg - alt_tel) of the event.
+
+    rf: sklearn.ensemble.RandomForestRegressor or RandomForestClassifier,
+    the random forest we want to apply (must contain alt_tel among the
+    training parameters)
+
+    param_array: pandas dataframe containing the features needed by theRF
+    It must also contain four additional columns: alt0, alt1, w0, w1, which
+    can be added with the function add_zd_interpolation_info. These are the
+    event-wise telescope elevations for the closest and 2nd-closest training
+    pointings (alt0 and alt1), and the event-wise weights (w0 and w1) which
+    must be applied to the RF prediction at the two pointings to obtain the
+    interpolated value at the actual telescope pointing. Since the weights
+    are the same (for a given event) for different RFs, it does not make
+    sense to compute them here - they are pre-calculated by
+    add_zd_interpolation_info
+
+    features: list of the names of the image features used by the RF
+
+    Return: event-wise 1d array of interpolated RF predictions (e.g. log
+    energy, or gammaness, etc depending on the RF)
+
+    """
+    # keep original alt_tel values:
+    param_array.rename(columns={"alt_tel": "original_alt_tel"}, inplace=True)
+
+    # Set alt_tel to closest MC training node's alt:
+    param_array.rename(columns={"alt0": "alt_tel"}, inplace=True)
+    prediction_0 = rf.predict(param_array[features])
+    param_array.rename(columns={"alt_tel": "alt0"}, inplace=True)
+
+    # set alt_tel value to that of second closest node:
+    param_array.rename(columns={"alt1": "alt_tel"}, inplace=True)
+    prediction_1 = rf.predict(param_array[features])
+    param_array.rename(columns={"alt_tel": "alt1"}, inplace=True)
+
+    # Put back original value of alt_tel:
+    param_array.rename(columns={"original_alt_tel": "alt_tel"}, inplace=True)
+
+    # Interpolated RF prediction:
+    prediction = (prediction_0 * param_array['w0'] +
+                  prediction_1 * param_array['w1'])
+
+    return prediction.values
+
 def train_energy(train, custom_config=None):
     """
     Train a Random Forest Regressor for the regression of the energy
@@ -602,6 +772,7 @@ def apply_models(dl1,
                  cls_disp_sign=None,
                  effective_focal_length=29.30565 * u.m,
                  custom_config=None,
+                 dl1_training_dir=None,
                  ):
     """
     Apply previously trained Random Forests to a set of data
@@ -629,6 +800,9 @@ def apply_models(dl1,
     effective_focal_length: `astropy.unit`
     custom_config: dictionary
         Modified configuration to update the standard one
+    dl1_training_dir: pathlib.Path, path to the directory under which the
+    folders containing the DL1 MC training data are kept (only folder names
+    are needed, not the actual DL1 files)
 
     Returns
     -------
@@ -643,6 +817,7 @@ def apply_models(dl1,
     classification_features = config["particle_classification_features"]
     events_filters = config["events_filters"]
 
+
     dl2 = utils.filter_events(dl1,
                               filters=events_filters,
                               finite_params=config['disp_regression_features']
@@ -660,29 +835,56 @@ def apply_models(dl1,
     is_simu = 'disp_norm' in dl2.columns
     if is_simu:
         dl2 = update_disp_with_effective_focal_length(dl2, effective_focal_length = effective_focal_length)
-    
+
+    # If dl1_training_dir was provided (containing folders for each fo the MC
+    # training pointing nodes), obtain the training pointings, and update the
+    # DL2 table with the additiona info needed for the interpolation:
+
+    coszd_interpolated_RF = False
+    if dl1_training_dir is not None:
+        coszd_interpolated_RF = True
+        training_az_deg, training_zd_deg = get_training_directions(dl1_training_dir)
+        add_zd_interpolation_info(dl2, training_zd_deg, training_az_deg)
 
     # Reconstruction of Energy and disp_norm distance
     if isinstance(reg_energy, (str, bytes, Path)):
         reg_energy = joblib.load(reg_energy)
-    dl2['log_reco_energy'] = reg_energy.predict(dl2[energy_regression_features])
+    if coszd_interpolated_RF:
+        # Interpolation of RF predictions (linear in cos(zenith)):
+        dl2['log_reco_energy'] = predict_with_zd_interpolation(reg_energy, dl2,
+                                                               energy_regression_features)
+    else:
+        dl2['log_reco_energy'] = reg_energy.predict(dl2[energy_regression_features])
     del reg_energy
     dl2['reco_energy'] = 10 ** (dl2['log_reco_energy'])
 
     if config['disp_method'] == 'disp_vector':
         if isinstance(reg_disp_vector, (str, bytes, Path)):
             reg_disp_vector = joblib.load(reg_disp_vector)
-        disp_vector = reg_disp_vector.predict(dl2[disp_regression_features])
+        if coszd_interpolated_RF:
+            disp_vector = predict_with_zd_interpolation(reg_disp_vector, dl2,
+                                                        disp_regression_features)
+        else:
+            disp_vector = reg_disp_vector.predict(dl2[disp_regression_features])
         del reg_disp_vector
     elif config['disp_method'] == 'disp_norm_sign':
         if isinstance(reg_disp_norm, (str, bytes, Path)):
             reg_disp_norm = joblib.load(reg_disp_norm)
-        disp_norm = reg_disp_norm.predict(dl2[disp_regression_features])
+        if coszd_interpolated_RF:
+            disp_norm = predict_with_zd_interpolation(reg_disp_norm, dl2,
+                                                      disp_regression_features)
+        else:
+            disp_norm = reg_disp_norm.predict(dl2[disp_regression_features])
         del reg_disp_norm
 
         if isinstance(cls_disp_sign, (str, bytes, Path)):
             cls_disp_sign = joblib.load(cls_disp_sign)
-        disp_sign_proba = cls_disp_sign.predict_proba(dl2[disp_classification_features])
+        if coszd_interpolated_RF:
+            disp_sign_proba = predict_with_zd_interpolation(cls_disp_sign, dl2,
+                                                            disp_classification_features)
+        else:
+            disp_sign_proba = cls_disp_sign.predict_proba(dl2[disp_classification_features])
+
         col = list(cls_disp_sign.classes_).index(1)
         disp_sign = np.where(disp_sign_proba[:, col] > 0.5, 1, -1)
         del cls_disp_sign
@@ -748,7 +950,11 @@ def apply_models(dl1,
     
     if isinstance(classifier, (str, bytes, Path)):
         classifier = joblib.load(classifier)
-    probs = classifier.predict_proba(dl2[classification_features])
+    if coszd_interpolated_RF:
+        probs = predict_with_zd_interpolation(classifier, dl2,
+                                              classification_features)
+    else:
+        probs = classifier.predict_proba(dl2[classification_features])
 
     # This check is valid as long as we train on only two classes (gammas and protons)
     if probs.shape[1] > 2:
diff --git a/lstchain/scripts/lstchain_dl1_to_dl2.py b/lstchain/scripts/lstchain_dl1_to_dl2.py
index 4ae1eb4b13..73f7ef45b8 100644
--- a/lstchain/scripts/lstchain_dl1_to_dl2.py
+++ b/lstchain/scripts/lstchain_dl1_to_dl2.py
@@ -74,8 +74,15 @@
                     default=None,
                     required=False)
 
+parser.add_argument('--dl1_training_dir', '-t',
+                    action='store',
+                    type=Path,
+                    dest='dl1_training_dir',
+                    help='Path to parent directory of DL1 training folders',
+                    default=None,
+                    required=False)
 
-def apply_to_file(filename, models_dict, output_dir, config):
+def apply_to_file(filename, models_dict, output_dir, config, dl1_training_dir):
 
     data = pd.read_hdf(filename, key=dl1_params_lstcam_key)
 
@@ -134,7 +141,8 @@ def apply_to_file(filename, models_dict, output_dir, config):
                                           models_dict['reg_energy'],
                                           reg_disp_vector=models_dict['reg_disp_vector'],
                                           effective_focal_length=effective_focal_length,
-                                          custom_config=config)
+                                          custom_config=config,
+                                          dl1_training_dir=dl1_training_dir)
         elif config['disp_method'] == 'disp_norm_sign':
             dl2 = dl1_to_dl2.apply_models(data,
                                           models_dict['cls_gh'],
@@ -142,7 +150,8 @@ def apply_to_file(filename, models_dict, output_dir, config):
                                           reg_disp_norm=models_dict['reg_disp_norm'],
                                           cls_disp_sign=models_dict['cls_disp_sign'],
                                           effective_focal_length=effective_focal_length,
-                                          custom_config=config)
+                                          custom_config=config,
+                                          dl1_training_dir=dl1_training_dir)
 
     # Source-dependent analysis
     if config['source_dependent']:
@@ -175,7 +184,8 @@ def apply_to_file(filename, models_dict, output_dir, config):
                                                  models_dict['reg_energy'],
                                                  reg_disp_vector=models_dict['reg_disp_vector'],
                                                  effective_focal_length=effective_focal_length,
-                                                 custom_config=config)
+                                                 custom_config=config,
+                                                 dl1_training_dir=dl1_training_dir)
             elif config['disp_method'] == 'disp_norm_sign':
                 dl2 = dl1_to_dl2.apply_models(data_with_srcdep_param,
                                                  models_dict['cls_gh'],
@@ -183,7 +193,8 @@ def apply_to_file(filename, models_dict, output_dir, config):
                                                  reg_disp_norm=models_dict['reg_disp_norm'],
                                                  cls_disp_sign=models_dict['cls_disp_sign'],
                                                  effective_focal_length=effective_focal_length,
-                                                 custom_config=config)
+                                                 custom_config=config,
+                                                 dl1_training_dir=dl1_training_dir)
 
             dl2_srcdep = dl2.drop(srcindep_keys, axis=1)
             dl2_srcdep_dict[k] = dl2_srcdep
@@ -270,6 +281,10 @@ def main():
         except("Custom configuration could not be loaded !!!"):
             pass
 
+    if args.dl1_training_dir is not None:
+        logger.info('Cos(zenith) interpolation will be used in Random Forests')
+        logger.info('DL1 training directory:', args.dl1_training_dir)
+
     config = replace_config(standard_config, custom_config)
 
     models_keys = ['reg_energy', 'cls_gh']
@@ -291,7 +306,8 @@ def main():
             models_dict[models_key] = joblib.load(models_path)
 
     for filename in args.input_files:
-        apply_to_file(filename, models_dict, args.output_dir, config)
+        apply_to_file(filename, models_dict, args.output_dir, config,
+                      args.dl1_training_dir)
 
 
 

From 908e6cfee7b573b53c970023bc128ddd2f67e8f6 Mon Sep 17 00:00:00 2001
From: Abelardo Moralejo Olaizola <moralejo@ifae.es>
Date: Wed, 27 Nov 2024 10:51:34 +0100
Subject: [PATCH 02/33] Improved help message

---
 lstchain/scripts/lstchain_dl1_to_dl2.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/lstchain/scripts/lstchain_dl1_to_dl2.py b/lstchain/scripts/lstchain_dl1_to_dl2.py
index 73f7ef45b8..df206e9fae 100644
--- a/lstchain/scripts/lstchain_dl1_to_dl2.py
+++ b/lstchain/scripts/lstchain_dl1_to_dl2.py
@@ -78,7 +78,11 @@
                     action='store',
                     type=Path,
                     dest='dl1_training_dir',
-                    help='Path to parent directory of DL1 training folders',
+                    help='Path to parent directory of DL1 training folders. '
+                         'If given, the pointings of the training sample will be '
+                         'obtained, and RF predictions will be interpolated '
+                         '(lineary in cos(zenith) to the instantaneous '
+                         'telescope pointing for each event',
                     default=None,
                     required=False)
 

From 000f091de1c3a764543a550fb3a3b3bc359a52a6 Mon Sep 17 00:00:00 2001
From: Abelardo Moralejo Olaizola <moralejo@ifae.es>
Date: Wed, 27 Nov 2024 16:04:01 +0100
Subject: [PATCH 03/33] Fixed various errors...

---
 lstchain/reco/dl1_to_dl2.py             | 17 +++++++++++------
 lstchain/scripts/lstchain_dl1_to_dl2.py |  5 +++--
 2 files changed, 14 insertions(+), 8 deletions(-)

diff --git a/lstchain/reco/dl1_to_dl2.py b/lstchain/reco/dl1_to_dl2.py
index b80687351e..0d552fa5ab 100644
--- a/lstchain/reco/dl1_to_dl2.py
+++ b/lstchain/reco/dl1_to_dl2.py
@@ -102,7 +102,6 @@ def add_zd_interpolation_info(dl2table, training_zd_deg, training_az_deg):
     between the zenith pointings of the MC data in the training sample on
     which the RFs were trained.
 
-
     Parameters:
     -----------
     dl2table: pandas dataframe. Four columns will be added: alt0, alt1, w0, w1
@@ -114,12 +113,16 @@ def add_zd_interpolation_info(dl2table, training_zd_deg, training_az_deg):
 
     training_zd_deg: array containing the zenith distances (in deg) for the
     MC training nodes
+
     training_az_deg: array containing the azimuth angles (in deg) for the
     MC training nodes (a given index in bith arrays corresponds to a given MC
     pointing)
 
-    """
+    Returns:
+    -------
+    DL2 pandas dataframe with additional columns alt0, alt1, w0, w1
 
+    """
     pd.options.mode.copy_on_write = True
 
     alt_tel = dl2table['alt_tel']
@@ -158,10 +161,10 @@ def add_zd_interpolation_info(dl2table, training_zd_deg, training_az_deg):
                                w0=pd.Series(w0).values,
                                w1=pd.Series(w1).values)
 
-    return dl2table, ['alt0', 'alt1', 'w0', 'w1']
+    return dl2table
 
 
-def predict_with_zd_interpolation(rf, param_array, features):
+def predict_with_zd_interpolation(rf, param_array, features, is_proba=False):
     """
     Obtain a RF prediction which takes into account the difference between
     the telescope elevation (alt_tel, i.e. 90 deg - zenith) and those of the
@@ -196,6 +199,8 @@ def predict_with_zd_interpolation(rf, param_array, features):
 
     features: list of the names of the image features used by the RF
 
+    is_proba: if True predict_proba will be called instead of predict 
+
     Return: event-wise 1d array of interpolated RF predictions (e.g. log
     energy, or gammaness, etc depending on the RF)
 
@@ -838,13 +843,13 @@ def apply_models(dl1,
 
     # If dl1_training_dir was provided (containing folders for each fo the MC
     # training pointing nodes), obtain the training pointings, and update the
-    # DL2 table with the additiona info needed for the interpolation:
+    # DL2 table with the additional info needed for the interpolation:
 
     coszd_interpolated_RF = False
     if dl1_training_dir is not None:
         coszd_interpolated_RF = True
         training_az_deg, training_zd_deg = get_training_directions(dl1_training_dir)
-        add_zd_interpolation_info(dl2, training_zd_deg, training_az_deg)
+        dl2 = add_zd_interpolation_info(dl2, training_zd_deg, training_az_deg)
 
     # Reconstruction of Energy and disp_norm distance
     if isinstance(reg_energy, (str, bytes, Path)):
diff --git a/lstchain/scripts/lstchain_dl1_to_dl2.py b/lstchain/scripts/lstchain_dl1_to_dl2.py
index df206e9fae..6ecdaead2f 100644
--- a/lstchain/scripts/lstchain_dl1_to_dl2.py
+++ b/lstchain/scripts/lstchain_dl1_to_dl2.py
@@ -286,8 +286,9 @@ def main():
             pass
 
     if args.dl1_training_dir is not None:
-        logger.info('Cos(zenith) interpolation will be used in Random Forests')
-        logger.info('DL1 training directory:', args.dl1_training_dir)
+        logger.warning('Cos(zenith) interpolation will be used in Random '
+                       'Forests')
+        logger.warning('DL1 training directory: ' + str(args.dl1_training_dir))
 
     config = replace_config(standard_config, custom_config)
 

From 19c6480590dfc196c21a71fabcf7396c131ba5c6 Mon Sep 17 00:00:00 2001
From: Abelardo Moralejo Olaizola <moralejo@ifae.es>
Date: Wed, 27 Nov 2024 16:39:05 +0100
Subject: [PATCH 04/33] Add is_proba option for RF classifiers

---
 lstchain/reco/dl1_to_dl2.py | 21 ++++++++++++++++-----
 1 file changed, 16 insertions(+), 5 deletions(-)

diff --git a/lstchain/reco/dl1_to_dl2.py b/lstchain/reco/dl1_to_dl2.py
index 0d552fa5ab..b5d4ec18db 100644
--- a/lstchain/reco/dl1_to_dl2.py
+++ b/lstchain/reco/dl1_to_dl2.py
@@ -199,7 +199,7 @@ def predict_with_zd_interpolation(rf, param_array, features, is_proba=False):
 
     features: list of the names of the image features used by the RF
 
-    is_proba: if True predict_proba will be called instead of predict 
+    is_proba: if True predict_proba will be called instead of predict
 
     Return: event-wise 1d array of interpolated RF predictions (e.g. log
     energy, or gammaness, etc depending on the RF)
@@ -210,18 +210,27 @@ def predict_with_zd_interpolation(rf, param_array, features, is_proba=False):
 
     # Set alt_tel to closest MC training node's alt:
     param_array.rename(columns={"alt0": "alt_tel"}, inplace=True)
-    prediction_0 = rf.predict(param_array[features])
+    if is_proba:
+        prediction_0 = rf.predict_proba(param_array[features])
+    else:
+        prediction_0 = rf.predict(param_array[features])
+
     param_array.rename(columns={"alt_tel": "alt0"}, inplace=True)
 
     # set alt_tel value to that of second closest node:
     param_array.rename(columns={"alt1": "alt_tel"}, inplace=True)
-    prediction_1 = rf.predict(param_array[features])
+    if is_proba:
+        prediction_1 = rf.predict_proba(param_array[features])
+    else:
+        prediction_1 = rf.predict(param_array[features])
+
     param_array.rename(columns={"alt_tel": "alt1"}, inplace=True)
 
     # Put back original value of alt_tel:
     param_array.rename(columns={"original_alt_tel": "alt_tel"}, inplace=True)
 
     # Interpolated RF prediction:
+
     prediction = (prediction_0 * param_array['w0'] +
                   prediction_1 * param_array['w1'])
 
@@ -886,7 +895,8 @@ def apply_models(dl1,
             cls_disp_sign = joblib.load(cls_disp_sign)
         if coszd_interpolated_RF:
             disp_sign_proba = predict_with_zd_interpolation(cls_disp_sign, dl2,
-                                                            disp_classification_features)
+                                                            disp_classification_features,
+                                                            is_proba=True)
         else:
             disp_sign_proba = cls_disp_sign.predict_proba(dl2[disp_classification_features])
 
@@ -957,7 +967,8 @@ def apply_models(dl1,
         classifier = joblib.load(classifier)
     if coszd_interpolated_RF:
         probs = predict_with_zd_interpolation(classifier, dl2,
-                                              classification_features)
+                                              classification_features,
+                                              is_proba=True)
     else:
         probs = classifier.predict_proba(dl2[classification_features])
 

From dba62a246ea8dbf5f0ae042648060d1d83119721 Mon Sep 17 00:00:00 2001
From: Abelardo Moralejo Olaizola <moralejo@ifae.es>
Date: Wed, 27 Nov 2024 17:06:01 +0100
Subject: [PATCH 05/33] Proper treatment of predict_proba output

---
 lstchain/reco/dl1_to_dl2.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/lstchain/reco/dl1_to_dl2.py b/lstchain/reco/dl1_to_dl2.py
index b5d4ec18db..6ca9e09ce1 100644
--- a/lstchain/reco/dl1_to_dl2.py
+++ b/lstchain/reco/dl1_to_dl2.py
@@ -231,8 +231,12 @@ def predict_with_zd_interpolation(rf, param_array, features, is_proba=False):
 
     # Interpolated RF prediction:
 
-    prediction = (prediction_0 * param_array['w0'] +
-                  prediction_1 * param_array['w1'])
+    if is_proba:
+        prediction = (prediction_0.T * param_array['w0'] +
+                      prediction_1.T * param_array['w1']).T
+    else:
+        prediction = (prediction_0 * param_array['w0'] +
+                      prediction_1 * param_array['w1'])
 
     return prediction.values
 

From d45ef907586b84b55298720d4eb0c0463a3baae4 Mon Sep 17 00:00:00 2001
From: Abelardo Moralejo Olaizola <moralejo@ifae.es>
Date: Wed, 27 Nov 2024 17:33:36 +0100
Subject: [PATCH 06/33] minor fix of matrix operations

---
 lstchain/reco/dl1_to_dl2.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/lstchain/reco/dl1_to_dl2.py b/lstchain/reco/dl1_to_dl2.py
index 6ca9e09ce1..cb9d78305e 100644
--- a/lstchain/reco/dl1_to_dl2.py
+++ b/lstchain/reco/dl1_to_dl2.py
@@ -232,13 +232,13 @@ def predict_with_zd_interpolation(rf, param_array, features, is_proba=False):
     # Interpolated RF prediction:
 
     if is_proba:
-        prediction = (prediction_0.T * param_array['w0'] +
-                      prediction_1.T * param_array['w1']).T
+        prediction = (prediction_0.T * param_array['w0'].values+
+                      prediction_1.T * param_array['w1'].values).T
     else:
         prediction = (prediction_0 * param_array['w0'] +
-                      prediction_1 * param_array['w1'])
+                      prediction_1 * param_array['w1']).values
 
-    return prediction.values
+    return prediction
 
 def train_energy(train, custom_config=None):
     """

From d4a28db9f2c28d060840efa78775f50b3633c14e Mon Sep 17 00:00:00 2001
From: Abelardo Moralejo Olaizola <moralejo@ifae.es>
Date: Wed, 27 Nov 2024 17:55:18 +0100
Subject: [PATCH 07/33] Check RF type automatically - remove argument

---
 lstchain/reco/dl1_to_dl2.py | 21 ++++++++++-----------
 1 file changed, 10 insertions(+), 11 deletions(-)

diff --git a/lstchain/reco/dl1_to_dl2.py b/lstchain/reco/dl1_to_dl2.py
index cb9d78305e..1539a82821 100644
--- a/lstchain/reco/dl1_to_dl2.py
+++ b/lstchain/reco/dl1_to_dl2.py
@@ -164,7 +164,7 @@ def add_zd_interpolation_info(dl2table, training_zd_deg, training_az_deg):
     return dl2table
 
 
-def predict_with_zd_interpolation(rf, param_array, features, is_proba=False):
+def predict_with_zd_interpolation(rf, param_array, features):
     """
     Obtain a RF prediction which takes into account the difference between
     the telescope elevation (alt_tel, i.e. 90 deg - zenith) and those of the
@@ -199,18 +199,20 @@ def predict_with_zd_interpolation(rf, param_array, features, is_proba=False):
 
     features: list of the names of the image features used by the RF
 
-    is_proba: if True predict_proba will be called instead of predict
-
     Return: event-wise 1d array of interpolated RF predictions (e.g. log
     energy, or gammaness, etc depending on the RF)
 
     """
+
+    # Type of RF (classifier or regressor):
+    is_classifier = isinstance(rf, RandomForestClassifier)
+
     # keep original alt_tel values:
     param_array.rename(columns={"alt_tel": "original_alt_tel"}, inplace=True)
 
     # Set alt_tel to closest MC training node's alt:
     param_array.rename(columns={"alt0": "alt_tel"}, inplace=True)
-    if is_proba:
+    if is_classifier:
         prediction_0 = rf.predict_proba(param_array[features])
     else:
         prediction_0 = rf.predict(param_array[features])
@@ -219,7 +221,7 @@ def predict_with_zd_interpolation(rf, param_array, features, is_proba=False):
 
     # set alt_tel value to that of second closest node:
     param_array.rename(columns={"alt1": "alt_tel"}, inplace=True)
-    if is_proba:
+    if is_classifier:
         prediction_1 = rf.predict_proba(param_array[features])
     else:
         prediction_1 = rf.predict(param_array[features])
@@ -230,8 +232,7 @@ def predict_with_zd_interpolation(rf, param_array, features, is_proba=False):
     param_array.rename(columns={"original_alt_tel": "alt_tel"}, inplace=True)
 
     # Interpolated RF prediction:
-
-    if is_proba:
+    if is_classifier:
         prediction = (prediction_0.T * param_array['w0'].values+
                       prediction_1.T * param_array['w1'].values).T
     else:
@@ -899,8 +900,7 @@ def apply_models(dl1,
             cls_disp_sign = joblib.load(cls_disp_sign)
         if coszd_interpolated_RF:
             disp_sign_proba = predict_with_zd_interpolation(cls_disp_sign, dl2,
-                                                            disp_classification_features,
-                                                            is_proba=True)
+                                                            disp_classification_features)
         else:
             disp_sign_proba = cls_disp_sign.predict_proba(dl2[disp_classification_features])
 
@@ -971,8 +971,7 @@ def apply_models(dl1,
         classifier = joblib.load(classifier)
     if coszd_interpolated_RF:
         probs = predict_with_zd_interpolation(classifier, dl2,
-                                              classification_features,
-                                              is_proba=True)
+                                              classification_features)
     else:
         probs = classifier.predict_proba(dl2[classification_features])
 

From 4b1c960cbd037868acd7c271eef6f00f961038b6 Mon Sep 17 00:00:00 2001
From: Abelardo Moralejo Olaizola <moralejo@ifae.es>
Date: Wed, 27 Nov 2024 17:58:49 +0100
Subject: [PATCH 08/33] Improved description

---
 lstchain/reco/dl1_to_dl2.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/lstchain/reco/dl1_to_dl2.py b/lstchain/reco/dl1_to_dl2.py
index 1539a82821..8606c451e8 100644
--- a/lstchain/reco/dl1_to_dl2.py
+++ b/lstchain/reco/dl1_to_dl2.py
@@ -199,8 +199,8 @@ def predict_with_zd_interpolation(rf, param_array, features):
 
     features: list of the names of the image features used by the RF
 
-    Return: event-wise 1d array of interpolated RF predictions (e.g. log
-    energy, or gammaness, etc depending on the RF)
+    Return: interpolated RF predictions. 1D array for regressors (log energy,
+    or disp_norm), 2D (events, # of classes) for classifiers
 
     """
 
@@ -233,7 +233,7 @@ def predict_with_zd_interpolation(rf, param_array, features):
 
     # Interpolated RF prediction:
     if is_classifier:
-        prediction = (prediction_0.T * param_array['w0'].values+
+        prediction = (prediction_0.T * param_array['w0'].values +
                       prediction_1.T * param_array['w1'].values).T
     else:
         prediction = (prediction_0 * param_array['w0'] +

From 2f54db8e956dc6b7a35544a6e8334119b72094ad Mon Sep 17 00:00:00 2001
From: Abelardo Moralejo Olaizola <moralejo@ifae.es>
Date: Thu, 28 Nov 2024 11:50:50 +0100
Subject: [PATCH 09/33] Now configuration is fully made through the config
 file, no command-line options

The interpolation can be switched on for the three reconstructions
independently (energy, gammaness, direction)

By default the correction is applied. If no MC DL1 training
directory is provided, the path will be built from the path
of the RF models. If directory does not exist the option is
simply deactivated
---
 lstchain/data/lstchain_standard_config.json |  7 +++
 lstchain/reco/dl1_to_dl2.py                 | 30 ++++++---
 lstchain/scripts/lstchain_dl1_to_dl2.py     | 67 ++++++++++++++-------
 3 files changed, 74 insertions(+), 30 deletions(-)

diff --git a/lstchain/data/lstchain_standard_config.json b/lstchain/data/lstchain_standard_config.json
index bad8f978ef..19a126c5bb 100644
--- a/lstchain/data/lstchain_standard_config.json
+++ b/lstchain/data/lstchain_standard_config.json
@@ -69,6 +69,13 @@
     "pointing_wise_weights": true
   },
 
+  "random_forest_zd_interpolation": {
+    "interpolate_energy": true,
+    "interpolate_gammaness": true,
+    "interpolate_direction": true,
+    "DL1_training_dir": null
+  },
+
   "random_forest_energy_regressor_args": {
     "max_depth": 30,
     "min_samples_leaf": 10,
diff --git a/lstchain/reco/dl1_to_dl2.py b/lstchain/reco/dl1_to_dl2.py
index 8606c451e8..69b96de939 100644
--- a/lstchain/reco/dl1_to_dl2.py
+++ b/lstchain/reco/dl1_to_dl2.py
@@ -791,6 +791,9 @@ def apply_models(dl1,
                  cls_disp_sign=None,
                  effective_focal_length=29.30565 * u.m,
                  custom_config=None,
+                 interpolate_rf={'energy_regression':False,
+                                 'particle_classification':False,
+                                 'disp':False},
                  dl1_training_dir=None,
                  ):
     """
@@ -819,9 +822,13 @@ def apply_models(dl1,
     effective_focal_length: `astropy.unit`
     custom_config: dictionary
         Modified configuration to update the standard one
+    interpolate_rf: dictionary. Contains three booleans, 'energy_regression',
+        'particle_classification', 'disp', indicating which RF predictions
+        should be interpolated linearly in cos(zenith)
     dl1_training_dir: pathlib.Path, path to the directory under which the
-    folders containing the DL1 MC training data are kept (only folder names
-    are needed, not the actual DL1 files)
+        folders containing the DL1 MC training data are kept (only folder
+        names are needed, not the actual DL1 files). Needed for interpolation
+        of RF predictions, if activated by user via interpolate_rf
 
     Returns
     -------
@@ -859,16 +866,21 @@ def apply_models(dl1,
     # training pointing nodes), obtain the training pointings, and update the
     # DL2 table with the additional info needed for the interpolation:
 
-    coszd_interpolated_RF = False
     if dl1_training_dir is not None:
-        coszd_interpolated_RF = True
         training_az_deg, training_zd_deg = get_training_directions(dl1_training_dir)
         dl2 = add_zd_interpolation_info(dl2, training_zd_deg, training_az_deg)
 
+    if not dl1_training_dir.is_dir():
+        logger.warning('DL1 training directory not found...')
+        logger.warning('Switching off RF interpolation with zenith!')
+        interpolate_rf['energy_regression'] = False
+        interpolate_rf['particle_classification'] = False
+        interpolate_rf['disp'] = False
+
     # Reconstruction of Energy and disp_norm distance
     if isinstance(reg_energy, (str, bytes, Path)):
         reg_energy = joblib.load(reg_energy)
-    if coszd_interpolated_RF:
+    if interpolate_rf['energy_regression']:
         # Interpolation of RF predictions (linear in cos(zenith)):
         dl2['log_reco_energy'] = predict_with_zd_interpolation(reg_energy, dl2,
                                                                energy_regression_features)
@@ -880,7 +892,7 @@ def apply_models(dl1,
     if config['disp_method'] == 'disp_vector':
         if isinstance(reg_disp_vector, (str, bytes, Path)):
             reg_disp_vector = joblib.load(reg_disp_vector)
-        if coszd_interpolated_RF:
+        if interpolate_rf['disp']:
             disp_vector = predict_with_zd_interpolation(reg_disp_vector, dl2,
                                                         disp_regression_features)
         else:
@@ -889,7 +901,7 @@ def apply_models(dl1,
     elif config['disp_method'] == 'disp_norm_sign':
         if isinstance(reg_disp_norm, (str, bytes, Path)):
             reg_disp_norm = joblib.load(reg_disp_norm)
-        if coszd_interpolated_RF:
+        if interpolate_rf['disp']:
             disp_norm = predict_with_zd_interpolation(reg_disp_norm, dl2,
                                                       disp_regression_features)
         else:
@@ -898,7 +910,7 @@ def apply_models(dl1,
 
         if isinstance(cls_disp_sign, (str, bytes, Path)):
             cls_disp_sign = joblib.load(cls_disp_sign)
-        if coszd_interpolated_RF:
+        if interpolate_rf['disp']:
             disp_sign_proba = predict_with_zd_interpolation(cls_disp_sign, dl2,
                                                             disp_classification_features)
         else:
@@ -969,7 +981,7 @@ def apply_models(dl1,
     
     if isinstance(classifier, (str, bytes, Path)):
         classifier = joblib.load(classifier)
-    if coszd_interpolated_RF:
+    if interpolate_rf['particle_classification']:
         probs = predict_with_zd_interpolation(classifier, dl2,
                                               classification_features)
     else:
diff --git a/lstchain/scripts/lstchain_dl1_to_dl2.py b/lstchain/scripts/lstchain_dl1_to_dl2.py
index 6ecdaead2f..5cdcfde785 100644
--- a/lstchain/scripts/lstchain_dl1_to_dl2.py
+++ b/lstchain/scripts/lstchain_dl1_to_dl2.py
@@ -74,22 +74,50 @@
                     default=None,
                     required=False)
 
-parser.add_argument('--dl1_training_dir', '-t',
-                    action='store',
-                    type=Path,
-                    dest='dl1_training_dir',
-                    help='Path to parent directory of DL1 training folders. '
-                         'If given, the pointings of the training sample will be '
-                         'obtained, and RF predictions will be interpolated '
-                         '(lineary in cos(zenith) to the instantaneous '
-                         'telescope pointing for each event',
-                    default=None,
-                    required=False)
-
-def apply_to_file(filename, models_dict, output_dir, config, dl1_training_dir):
+def apply_to_file(filename, models_dict, output_dir, config):
 
     data = pd.read_hdf(filename, key=dl1_params_lstcam_key)
 
+    interpolate_energy = False
+    interpolate_gammaness = False
+    interpolate_direction = False
+    dl1_training_dir = None
+    # Read in the settings for the interpolation of Random Forest predictions
+    # in cos(zd). If activated this avoids the jumps of performance produced
+    # by the discrete set of pointings in the RF training sample.
+    if 'random_forest_zd_interpolation' in config.keys():
+        zdinter = config['random_forest_zd_interpolation']
+        if 'interpolate_energy' in zdinter.keys():
+            interpolate_energy = zdinter['interpolate_energy']
+        if 'interpolate_gammaness' in zdinter.keys():
+            interpolate_gammaness = zdinter['interpolate_gammaness']
+        if 'interpolate_direction' in zdinter.keys():
+            interpolate_direction = zdinter['interpolate_direction']
+        if 'DL1_training_dir' in zdinter.keys():
+            dl1_training_dir = zdinter['DL1_training_dir']
+
+    interpolate_rf = {'energy_regression': interpolate_energy,
+                      'particle_classification': interpolate_gammaness,
+                      'disp': interpolate_direction
+                      }
+    if dl1_training_dir is None:
+        # Build name of DL1 MC training files assuming standard pattern:
+        models_dir = models_dict['reg_energy'].parent()
+        dummy = models_dir.as_posix().replace('/data/models', '/data/mc/DL1')
+        dl1_training_dir = Path(dummy[:dummy.rfind('/dec')] +
+                                '/TrainingDataset/GammaDiffuse/' +
+                                dummy[dummy.rfind('/dec')+1:])
+
+
+    if interpolate_energy or interpolate_gammaness or interpolate_direction:
+        logger.warning('Cos(zenith) interpolation will be used in:')
+        if interpolate_energy:
+            logger.warning('   energy reconstruction Random Forest')
+        if interpolate_gammaness:
+            logger.warning('   g/h classification Random Forest')
+        if interpolate_direction:
+            logger.warning('   direction reconstruction Random Forest')
+
     if 'lh_fit_config' in config.keys():
         lhfit_data = pd.read_hdf(filename, key=dl1_likelihood_params_lstcam_key)
         if np.all(lhfit_data['obs_id'] == data['obs_id']) & np.all(lhfit_data['event_id'] == data['event_id']):
@@ -146,6 +174,7 @@ def apply_to_file(filename, models_dict, output_dir, config, dl1_training_dir):
                                           reg_disp_vector=models_dict['reg_disp_vector'],
                                           effective_focal_length=effective_focal_length,
                                           custom_config=config,
+                                          interpolate_rf=interpolate_rf,
                                           dl1_training_dir=dl1_training_dir)
         elif config['disp_method'] == 'disp_norm_sign':
             dl2 = dl1_to_dl2.apply_models(data,
@@ -155,6 +184,7 @@ def apply_to_file(filename, models_dict, output_dir, config, dl1_training_dir):
                                           cls_disp_sign=models_dict['cls_disp_sign'],
                                           effective_focal_length=effective_focal_length,
                                           custom_config=config,
+                                          interpolate_rf=interpolate_rf,
                                           dl1_training_dir=dl1_training_dir)
 
     # Source-dependent analysis
@@ -189,6 +219,7 @@ def apply_to_file(filename, models_dict, output_dir, config, dl1_training_dir):
                                                  reg_disp_vector=models_dict['reg_disp_vector'],
                                                  effective_focal_length=effective_focal_length,
                                                  custom_config=config,
+                                                 interpolate_rf=interpolate_rf,
                                                  dl1_training_dir=dl1_training_dir)
             elif config['disp_method'] == 'disp_norm_sign':
                 dl2 = dl1_to_dl2.apply_models(data_with_srcdep_param,
@@ -198,6 +229,7 @@ def apply_to_file(filename, models_dict, output_dir, config, dl1_training_dir):
                                                  cls_disp_sign=models_dict['cls_disp_sign'],
                                                  effective_focal_length=effective_focal_length,
                                                  custom_config=config,
+                                                 interpolate_rf=interpolate_rf,
                                                  dl1_training_dir=dl1_training_dir)
 
             dl2_srcdep = dl2.drop(srcindep_keys, axis=1)
@@ -285,11 +317,6 @@ def main():
         except("Custom configuration could not be loaded !!!"):
             pass
 
-    if args.dl1_training_dir is not None:
-        logger.warning('Cos(zenith) interpolation will be used in Random '
-                       'Forests')
-        logger.warning('DL1 training directory: ' + str(args.dl1_training_dir))
-
     config = replace_config(standard_config, custom_config)
 
     models_keys = ['reg_energy', 'cls_gh']
@@ -311,9 +338,7 @@ def main():
             models_dict[models_key] = joblib.load(models_path)
 
     for filename in args.input_files:
-        apply_to_file(filename, models_dict, args.output_dir, config,
-                      args.dl1_training_dir)
-
+        apply_to_file(filename, models_dict, args.output_dir, config)
 
 
 if __name__ == '__main__':

From 309ab4903daeff2abd819bfee9667d9f2867e524 Mon Sep 17 00:00:00 2001
From: Abelardo Moralejo Olaizola <moralejo@ifae.es>
Date: Thu, 28 Nov 2024 12:00:17 +0100
Subject: [PATCH 10/33] Change in treatment of missing dl1_training_dir case

---
 lstchain/reco/dl1_to_dl2.py | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/lstchain/reco/dl1_to_dl2.py b/lstchain/reco/dl1_to_dl2.py
index 69b96de939..fc502e6a52 100644
--- a/lstchain/reco/dl1_to_dl2.py
+++ b/lstchain/reco/dl1_to_dl2.py
@@ -860,17 +860,16 @@ def apply_models(dl1,
     # taking into account of the abrration effect using effective focal length
     is_simu = 'disp_norm' in dl2.columns
     if is_simu:
-        dl2 = update_disp_with_effective_focal_length(dl2, effective_focal_length = effective_focal_length)
+        dl2 = update_disp_with_effective_focal_length(dl2,
+                                                      effective_focal_length=effective_focal_length)
 
     # If dl1_training_dir was provided (containing folders for each fo the MC
     # training pointing nodes), obtain the training pointings, and update the
     # DL2 table with the additional info needed for the interpolation:
-
-    if dl1_training_dir is not None:
+    if dl1_training_dir is not None and dl1_training_dir.is_dir():
         training_az_deg, training_zd_deg = get_training_directions(dl1_training_dir)
         dl2 = add_zd_interpolation_info(dl2, training_zd_deg, training_az_deg)
-
-    if not dl1_training_dir.is_dir():
+    else:
         logger.warning('DL1 training directory not found...')
         logger.warning('Switching off RF interpolation with zenith!')
         interpolate_rf['energy_regression'] = False

From 98a90ae5dd8fe311aea31ccda9ece976dd4e966c Mon Sep 17 00:00:00 2001
From: Abelardo Moralejo Olaizola <moralejo@ifae.es>
Date: Thu, 28 Nov 2024 12:20:59 +0100
Subject: [PATCH 11/33] Fix building of dl1_training_dir from path to RF models

---
 lstchain/scripts/lstchain_dl1_to_dl2.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/lstchain/scripts/lstchain_dl1_to_dl2.py b/lstchain/scripts/lstchain_dl1_to_dl2.py
index 5cdcfde785..eabc8f8e52 100644
--- a/lstchain/scripts/lstchain_dl1_to_dl2.py
+++ b/lstchain/scripts/lstchain_dl1_to_dl2.py
@@ -74,7 +74,7 @@
                     default=None,
                     required=False)
 
-def apply_to_file(filename, models_dict, output_dir, config):
+def apply_to_file(filename, models_dict, output_dir, config, models_path):
 
     data = pd.read_hdf(filename, key=dl1_params_lstcam_key)
 
@@ -102,8 +102,7 @@ def apply_to_file(filename, models_dict, output_dir, config):
                       }
     if dl1_training_dir is None:
         # Build name of DL1 MC training files assuming standard pattern:
-        models_dir = models_dict['reg_energy'].parent()
-        dummy = models_dir.as_posix().replace('/data/models', '/data/mc/DL1')
+        dummy = models_path.as_posix().replace('/data/models', '/data/mc/DL1')
         dl1_training_dir = Path(dummy[:dummy.rfind('/dec')] +
                                 '/TrainingDataset/GammaDiffuse/' +
                                 dummy[dummy.rfind('/dec')+1:])
@@ -337,8 +336,9 @@ def main():
         else:
             models_dict[models_key] = joblib.load(models_path)
 
-    for filename in args.input_files:
-        apply_to_file(filename, models_dict, args.output_dir, config)
+    for filename in args.input_files:pply_to_file
+        apply_to_file(filename, models_dict, args.output_dir, config,
+                      args.path_models)
 
 
 if __name__ == '__main__':

From 2acdf9f4aa362337f39e29d1a422e61b9dde4b91 Mon Sep 17 00:00:00 2001
From: Abelardo Moralejo Olaizola <moralejo@ifae.es>
Date: Thu, 28 Nov 2024 12:23:58 +0100
Subject: [PATCH 12/33] Typo

---
 lstchain/scripts/lstchain_dl1_to_dl2.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lstchain/scripts/lstchain_dl1_to_dl2.py b/lstchain/scripts/lstchain_dl1_to_dl2.py
index eabc8f8e52..b5f04935ab 100644
--- a/lstchain/scripts/lstchain_dl1_to_dl2.py
+++ b/lstchain/scripts/lstchain_dl1_to_dl2.py
@@ -336,7 +336,7 @@ def main():
         else:
             models_dict[models_key] = joblib.load(models_path)
 
-    for filename in args.input_files:pply_to_file
+    for filename in args.input_files:
         apply_to_file(filename, models_dict, args.output_dir, config,
                       args.path_models)
 

From 588a248606e98d85e02e5b0e81627a6c3e61d965 Mon Sep 17 00:00:00 2001
From: Abelardo Moralejo Olaizola <moralejo@ifae.es>
Date: Thu, 28 Nov 2024 13:03:28 +0100
Subject: [PATCH 13/33] Protection against MC training nodes with identical
 zenith

---
 lstchain/reco/dl1_to_dl2.py | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/lstchain/reco/dl1_to_dl2.py b/lstchain/reco/dl1_to_dl2.py
index fc502e6a52..69c97e28be 100644
--- a/lstchain/reco/dl1_to_dl2.py
+++ b/lstchain/reco/dl1_to_dl2.py
@@ -151,9 +151,13 @@ def add_zd_interpolation_info(dl2table, training_zd_deg, training_az_deg):
     cos_tel_zd = np.cos(np.pi / 2 - alt_tel)
 
     # Compute the weights that multiplied times the RF predictions at the
-    # closest (0) and 2nd-closest (1) nodes result in the interpolated value:
-    w0 = 1 - (cos_tel_zd - c0) / (c1 - c0)
-    w1 = (cos_tel_zd - c0) / (c1 - c0)
+    # closest (0) and 2nd-closest (1) nodes result in the interpolated value.
+    # Take care of cases in which the two closest nodes happen to have the
+    # same zenith (or very close)! (if so, both nodes are set to have equal
+    # weight in the interpolation)
+    w1 = np.where(np.isclose(closest_alt, second_closest_alt, atol=1e-4, rtol=0),
+                  0.5, (cos_tel_zd - c0) / (c1 - c0))
+    w0 = 1 - w1
 
     # Update the dataframe:
     dl2table = dl2table.assign(alt0=pd.Series(closest_alt).values,

From 2969a25a3d1d610b86d4d1f0df7c2b634b7ef47d Mon Sep 17 00:00:00 2001
From: Abelardo Moralejo Olaizola <moralejo@ifae.es>
Date: Thu, 28 Nov 2024 15:53:55 +0100
Subject: [PATCH 14/33] Preparations for better configuration of training
 pointings (for RF interpolation)

---
 lstchain/reco/dl1_to_dl2.py             | 42 +++++++++-------------
 lstchain/scripts/lstchain_dl1_to_dl2.py | 48 +++++++++++++++++--------
 2 files changed, 49 insertions(+), 41 deletions(-)

diff --git a/lstchain/reco/dl1_to_dl2.py b/lstchain/reco/dl1_to_dl2.py
index 69c97e28be..2834b117ff 100644
--- a/lstchain/reco/dl1_to_dl2.py
+++ b/lstchain/reco/dl1_to_dl2.py
@@ -56,16 +56,16 @@
 ]
 
 
-def get_training_directions(training_dir):
+def get_training_directions(training_dirs):
     """
     This function obtains the pointings of the telescope in the RF training
     sample.
 
     Parameters:
     -----------
-    training_dir: pathlib.Path, path to the directory under which the folders
-    containing the DL1 files used in the training are found.
-    The folders' names are assumed to follow this pattern:
+    training_dirs: array of strings, each element is the name of one of the
+    folders containing the DL1 files used in the training. Order is
+    irrelevant. The folders' names are assumed to follow this pattern:
 
     node_corsika_theta_34.367_az_69.537_
 
@@ -77,12 +77,10 @@ def get_training_directions(training_dir):
 
     """
 
-    dirs = glob.glob(str(training_dir) + '/node_corsika*')
-
     training_zd_deg = []
     training_az_deg = []
 
-    for dir in dirs:
+    for dir in training_dirs:
         c1 = dir.find('_theta_') + 7
         c2 = dir.find('_az_', c1) + 4
         c3 = dir.find('_', c2)
@@ -795,10 +793,10 @@ def apply_models(dl1,
                  cls_disp_sign=None,
                  effective_focal_length=29.30565 * u.m,
                  custom_config=None,
-                 interpolate_rf={'energy_regression':False,
-                                 'particle_classification':False,
-                                 'disp':False},
-                 dl1_training_dir=None,
+                 interpolate_rf={'energy_regression': False,
+                                 'particle_classification': False,
+                                 'disp': False},
+                 training_pointings=None
                  ):
     """
     Apply previously trained Random Forests to a set of data
@@ -829,10 +827,9 @@ def apply_models(dl1,
     interpolate_rf: dictionary. Contains three booleans, 'energy_regression',
         'particle_classification', 'disp', indicating which RF predictions
         should be interpolated linearly in cos(zenith)
-    dl1_training_dir: pathlib.Path, path to the directory under which the
-        folders containing the DL1 MC training data are kept (only folder
-        names are needed, not the actual DL1 files). Needed for interpolation
-        of RF predictions, if activated by user via interpolate_rf
+    training_pointings: array (# of pointings, 2) azimuth, zenith in degrees;
+         pointings of the MC sample used in the training. Needed for the
+         interpolation of RF predictions.
 
     Returns
     -------
@@ -867,18 +864,11 @@ def apply_models(dl1,
         dl2 = update_disp_with_effective_focal_length(dl2,
                                                       effective_focal_length=effective_focal_length)
 
-    # If dl1_training_dir was provided (containing folders for each fo the MC
-    # training pointing nodes), obtain the training pointings, and update the
-    # DL2 table with the additional info needed for the interpolation:
-    if dl1_training_dir is not None and dl1_training_dir.is_dir():
-        training_az_deg, training_zd_deg = get_training_directions(dl1_training_dir)
+    if True in interpolate_rf.values():
+        # Interpolation of RF predictions is switched on
+        training_az_deg = training_pointings[:, 0]
+        training_zd_deg = training_pointings[:, 1]
         dl2 = add_zd_interpolation_info(dl2, training_zd_deg, training_az_deg)
-    else:
-        logger.warning('DL1 training directory not found...')
-        logger.warning('Switching off RF interpolation with zenith!')
-        interpolate_rf['energy_regression'] = False
-        interpolate_rf['particle_classification'] = False
-        interpolate_rf['disp'] = False
 
     # Reconstruction of Energy and disp_norm distance
     if isinstance(reg_energy, (str, bytes, Path)):
diff --git a/lstchain/scripts/lstchain_dl1_to_dl2.py b/lstchain/scripts/lstchain_dl1_to_dl2.py
index b5f04935ab..0dd92ec16a 100644
--- a/lstchain/scripts/lstchain_dl1_to_dl2.py
+++ b/lstchain/scripts/lstchain_dl1_to_dl2.py
@@ -81,7 +81,6 @@ def apply_to_file(filename, models_dict, output_dir, config, models_path):
     interpolate_energy = False
     interpolate_gammaness = False
     interpolate_direction = False
-    dl1_training_dir = None
     # Read in the settings for the interpolation of Random Forest predictions
     # in cos(zd). If activated this avoids the jumps of performance produced
     # by the discrete set of pointings in the RF training sample.
@@ -93,22 +92,15 @@ def apply_to_file(filename, models_dict, output_dir, config, models_path):
             interpolate_gammaness = zdinter['interpolate_gammaness']
         if 'interpolate_direction' in zdinter.keys():
             interpolate_direction = zdinter['interpolate_direction']
-        if 'DL1_training_dir' in zdinter.keys():
-            dl1_training_dir = zdinter['DL1_training_dir']
 
     interpolate_rf = {'energy_regression': interpolate_energy,
                       'particle_classification': interpolate_gammaness,
                       'disp': interpolate_direction
                       }
-    if dl1_training_dir is None:
-        # Build name of DL1 MC training files assuming standard pattern:
-        dummy = models_path.as_posix().replace('/data/models', '/data/mc/DL1')
-        dl1_training_dir = Path(dummy[:dummy.rfind('/dec')] +
-                                '/TrainingDataset/GammaDiffuse/' +
-                                dummy[dummy.rfind('/dec')+1:])
-
 
-    if interpolate_energy or interpolate_gammaness or interpolate_direction:
+    dl1_training_dir = None
+    training_pointings = None
+    if True in interpolate_rf.values():
         logger.warning('Cos(zenith) interpolation will be used in:')
         if interpolate_energy:
             logger.warning('   energy reconstruction Random Forest')
@@ -117,6 +109,32 @@ def apply_to_file(filename, models_dict, output_dir, config, models_path):
         if interpolate_direction:
             logger.warning('   direction reconstruction Random Forest')
 
+        if 'random_forest_zd_interpolation' in config.keys():
+            zdinter = config['random_forest_zd_interpolation']
+            if 'dl1_training_dir' in zdinter.keys():
+                dl1_training_dir = zdinter['dl1_training_dir']
+
+        if dl1_training_dir is None:
+            # Build name of DL1 MC training files assuming standard pattern:
+            dummy = models_path.as_posix().replace('/data/models', '/data/mc/DL1')
+            dl1_training_dir = (Path(dummy[:dummy.rfind('/dec')] +
+                                     '/TrainingDataset/GammaDiffuse/' +
+                                     dummy[dummy.rfind('/dec') + 1:]))
+
+        # Obtain the training pointings, needed for the RF interpolation:
+        if dl1_training_dir.is_dir():
+            dirs = glob.glob(str(dl1_training_dir) + '/node_corsika*')
+            training_az_deg, training_zd_deg = get_training_directions(dirs)
+        else:
+            logger.warning('DL1 training directory not found...')
+            logger.warning('Switching off RF interpolation with zenith!')
+            interpolate_rf['energy_regression'] = False
+            interpolate_rf['particle_classification'] = False
+            interpolate_rf['disp'] = False
+
+        training_pointings = np.array([training_az_deg, training_zd_deg]).T
+
+
     if 'lh_fit_config' in config.keys():
         lhfit_data = pd.read_hdf(filename, key=dl1_likelihood_params_lstcam_key)
         if np.all(lhfit_data['obs_id'] == data['obs_id']) & np.all(lhfit_data['event_id'] == data['event_id']):
@@ -174,7 +192,7 @@ def apply_to_file(filename, models_dict, output_dir, config, models_path):
                                           effective_focal_length=effective_focal_length,
                                           custom_config=config,
                                           interpolate_rf=interpolate_rf,
-                                          dl1_training_dir=dl1_training_dir)
+                                          training_pointings=training_pointings)
         elif config['disp_method'] == 'disp_norm_sign':
             dl2 = dl1_to_dl2.apply_models(data,
                                           models_dict['cls_gh'],
@@ -184,7 +202,7 @@ def apply_to_file(filename, models_dict, output_dir, config, models_path):
                                           effective_focal_length=effective_focal_length,
                                           custom_config=config,
                                           interpolate_rf=interpolate_rf,
-                                          dl1_training_dir=dl1_training_dir)
+                                          training_pointings=training_pointings)
 
     # Source-dependent analysis
     if config['source_dependent']:
@@ -219,7 +237,7 @@ def apply_to_file(filename, models_dict, output_dir, config, models_path):
                                                  effective_focal_length=effective_focal_length,
                                                  custom_config=config,
                                                  interpolate_rf=interpolate_rf,
-                                                 dl1_training_dir=dl1_training_dir)
+                                                 training_pointings=training_pointings)
             elif config['disp_method'] == 'disp_norm_sign':
                 dl2 = dl1_to_dl2.apply_models(data_with_srcdep_param,
                                                  models_dict['cls_gh'],
@@ -229,7 +247,7 @@ def apply_to_file(filename, models_dict, output_dir, config, models_path):
                                                  effective_focal_length=effective_focal_length,
                                                  custom_config=config,
                                                  interpolate_rf=interpolate_rf,
-                                                 dl1_training_dir=dl1_training_dir)
+                                                 training_pointings=training_pointings)
 
             dl2_srcdep = dl2.drop(srcindep_keys, axis=1)
             dl2_srcdep_dict[k] = dl2_srcdep

From 32620d948f23e3ef4452c04439fb9676b12cf93e Mon Sep 17 00:00:00 2001
From: Abelardo Moralejo Olaizola <moralejo@ifae.es>
Date: Thu, 28 Nov 2024 15:57:25 +0100
Subject: [PATCH 15/33] Moved function

---
 lstchain/reco/dl1_to_dl2.py             | 40 -------------------------
 lstchain/scripts/lstchain_dl1_to_dl2.py | 38 +++++++++++++++++++++++
 2 files changed, 38 insertions(+), 40 deletions(-)

diff --git a/lstchain/reco/dl1_to_dl2.py b/lstchain/reco/dl1_to_dl2.py
index 2834b117ff..22623273a2 100644
--- a/lstchain/reco/dl1_to_dl2.py
+++ b/lstchain/reco/dl1_to_dl2.py
@@ -44,7 +44,6 @@
     'build_models',
     'get_expected_source_pos',
     'get_source_dependent_parameters',
-    'get_training_directions',
     'predict_with_zd_interpolation',
     'train_disp_norm',
     'train_disp_sign',
@@ -55,45 +54,6 @@
     'update_disp_with_effective_focal_length'
 ]
 
-
-def get_training_directions(training_dirs):
-    """
-    This function obtains the pointings of the telescope in the RF training
-    sample.
-
-    Parameters:
-    -----------
-    training_dirs: array of strings, each element is the name of one of the
-    folders containing the DL1 files used in the training. Order is
-    irrelevant. The folders' names are assumed to follow this pattern:
-
-    node_corsika_theta_34.367_az_69.537_
-
-    (theta is zenith; az is azimuth. Units are degrees, note that the values'
-    field lengths are not fixed)
-
-    Returns: training_az_deg, training_zd_deg arrays containing the azimuth and
-    zenith of the training nodes (in degrees)
-
-    """
-
-    training_zd_deg = []
-    training_az_deg = []
-
-    for dir in training_dirs:
-        c1 = dir.find('_theta_') + 7
-        c2 = dir.find('_az_', c1) + 4
-        c3 = dir.find('_', c2)
-        training_zd_deg.append(float(dir[c1:c2 - 4]))
-        training_az_deg.append(float(dir[c2:c3]))
-
-    training_zd_deg = np.array(training_zd_deg)
-    training_az_deg = np.array(training_az_deg)
-    # The order of the pointings is irrelevant
-
-    return training_az_deg, training_zd_deg
-
-
 def add_zd_interpolation_info(dl2table, training_zd_deg, training_az_deg):
     """
     Compute necessary parameters for the interpolation of RF predictions
diff --git a/lstchain/scripts/lstchain_dl1_to_dl2.py b/lstchain/scripts/lstchain_dl1_to_dl2.py
index 0dd92ec16a..840f03f169 100644
--- a/lstchain/scripts/lstchain_dl1_to_dl2.py
+++ b/lstchain/scripts/lstchain_dl1_to_dl2.py
@@ -74,6 +74,44 @@
                     default=None,
                     required=False)
 
+def get_training_directions(training_dirs):
+    """
+    This function obtains the pointings of the telescope in the RF training
+    sample.
+
+    Parameters:
+    -----------
+    training_dirs: array of strings, each element is the name of one of the
+    folders containing the DL1 files used in the training. Order is
+    irrelevant. The folders' names are assumed to follow this pattern:
+
+    node_corsika_theta_34.367_az_69.537_
+
+    (theta is zenith; az is azimuth. Units are degrees, note that the values'
+    field lengths are not fixed)
+
+    Returns: training_az_deg, training_zd_deg arrays containing the azimuth and
+    zenith of the training nodes (in degrees)
+
+    """
+
+    training_zd_deg = []
+    training_az_deg = []
+
+    for dir in training_dirs:
+        c1 = dir.find('_theta_') + 7
+        c2 = dir.find('_az_', c1) + 4
+        c3 = dir.find('_', c2)
+        training_zd_deg.append(float(dir[c1:c2 - 4]))
+        training_az_deg.append(float(dir[c2:c3]))
+
+    training_zd_deg = np.array(training_zd_deg)
+    training_az_deg = np.array(training_az_deg)
+    # The order of the pointings is irrelevant
+
+    return training_az_deg, training_zd_deg
+
+
 def apply_to_file(filename, models_dict, output_dir, config, models_path):
 
     data = pd.read_hdf(filename, key=dl1_params_lstcam_key)

From a65112bad4bed32c4f125606325aa896d7840c86 Mon Sep 17 00:00:00 2001
From: Abelardo Moralejo Olaizola <moralejo@ifae.es>
Date: Thu, 28 Nov 2024 16:01:05 +0100
Subject: [PATCH 16/33] import glob

---
 lstchain/reco/dl1_to_dl2.py             | 1 -
 lstchain/scripts/lstchain_dl1_to_dl2.py | 1 +
 2 files changed, 1 insertion(+), 1 deletion(-)

diff --git a/lstchain/reco/dl1_to_dl2.py b/lstchain/reco/dl1_to_dl2.py
index 22623273a2..1d390196fa 100644
--- a/lstchain/reco/dl1_to_dl2.py
+++ b/lstchain/reco/dl1_to_dl2.py
@@ -7,7 +7,6 @@
 "import dl1_to_dl2"
 """
 
-import glob
 import os
 import logging
 
diff --git a/lstchain/scripts/lstchain_dl1_to_dl2.py b/lstchain/scripts/lstchain_dl1_to_dl2.py
index 840f03f169..8cd609235d 100644
--- a/lstchain/scripts/lstchain_dl1_to_dl2.py
+++ b/lstchain/scripts/lstchain_dl1_to_dl2.py
@@ -7,6 +7,7 @@
 """
 
 import argparse
+import glob
 from pathlib import Path
 import joblib
 import logging

From 9e37b7ada165b65f55b110231a4a6be837fd9592 Mon Sep 17 00:00:00 2001
From: Abelardo Moralejo Olaizola <moralejo@ifae.es>
Date: Thu, 28 Nov 2024 16:47:51 +0100
Subject: [PATCH 17/33] Wrong variable scope!

---
 lstchain/scripts/lstchain_dl1_to_dl2.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lstchain/scripts/lstchain_dl1_to_dl2.py b/lstchain/scripts/lstchain_dl1_to_dl2.py
index 8cd609235d..fb63e32946 100644
--- a/lstchain/scripts/lstchain_dl1_to_dl2.py
+++ b/lstchain/scripts/lstchain_dl1_to_dl2.py
@@ -164,6 +164,7 @@ def apply_to_file(filename, models_dict, output_dir, config, models_path):
         if dl1_training_dir.is_dir():
             dirs = glob.glob(str(dl1_training_dir) + '/node_corsika*')
             training_az_deg, training_zd_deg = get_training_directions(dirs)
+            training_pointings = np.array([training_az_deg, training_zd_deg]).T
         else:
             logger.warning('DL1 training directory not found...')
             logger.warning('Switching off RF interpolation with zenith!')
@@ -171,7 +172,6 @@ def apply_to_file(filename, models_dict, output_dir, config, models_path):
             interpolate_rf['particle_classification'] = False
             interpolate_rf['disp'] = False
 
-        training_pointings = np.array([training_az_deg, training_zd_deg]).T
 
 
     if 'lh_fit_config' in config.keys():

From 80c0ffca41caa156ee108ae0b8cc4330ff461154 Mon Sep 17 00:00:00 2001
From: Abelardo Moralejo Olaizola <moralejo@ifae.es>
Date: Thu, 28 Nov 2024 17:42:56 +0100
Subject: [PATCH 18/33] Better log message

---
 lstchain/scripts/lstchain_dl1_to_dl2.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/lstchain/scripts/lstchain_dl1_to_dl2.py b/lstchain/scripts/lstchain_dl1_to_dl2.py
index fb63e32946..1b985bbe20 100644
--- a/lstchain/scripts/lstchain_dl1_to_dl2.py
+++ b/lstchain/scripts/lstchain_dl1_to_dl2.py
@@ -165,6 +165,8 @@ def apply_to_file(filename, models_dict, output_dir, config, models_path):
             dirs = glob.glob(str(dl1_training_dir) + '/node_corsika*')
             training_az_deg, training_zd_deg = get_training_directions(dirs)
             training_pointings = np.array([training_az_deg, training_zd_deg]).T
+            logger.warning('RF training pointings (az_deg, zd_deg):')
+            logger.warning(training_pointings)
         else:
             logger.warning('DL1 training directory not found...')
             logger.warning('Switching off RF interpolation with zenith!')

From b0afdac2bf4ee7bbe0041937ee7886f1ce3cc45e Mon Sep 17 00:00:00 2001
From: Abelardo Moralejo Olaizola <moralejo@ifae.es>
Date: Tue, 3 Dec 2024 00:01:01 +0100
Subject: [PATCH 19/33] Changed selection of pointing nodes for interpolation.
 Instead of closest angular distance we now take the closest nodes in alt_tel
 on the same side of the culmination (same sign of sin_az_tel)

---
 lstchain/reco/dl1_to_dl2.py | 35 ++++++++++++++++++++---------------
 1 file changed, 20 insertions(+), 15 deletions(-)

diff --git a/lstchain/reco/dl1_to_dl2.py b/lstchain/reco/dl1_to_dl2.py
index 1d390196fa..e553bbb88f 100644
--- a/lstchain/reco/dl1_to_dl2.py
+++ b/lstchain/reco/dl1_to_dl2.py
@@ -14,7 +14,7 @@
 import joblib
 import numpy as np
 import pandas as pd
-from astropy.coordinates import SkyCoord, Angle, angular_separation
+from astropy.coordinates import SkyCoord, Angle
 from astropy.time import Time
 from pathlib import Path
 from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
@@ -62,11 +62,12 @@ def add_zd_interpolation_info(dl2table, training_zd_deg, training_az_deg):
     Parameters:
     -----------
     dl2table: pandas dataframe. Four columns will be added: alt0, alt1, w0, w1
-    alt0 and alt1 are the alt_tel (in rad) values (telescope elevation) for
-    the closest and second-closest training MC pointing for each event in the
-    table. The values w0 and w1 are the corresponding weights that, multiplied
-    by the RF predictions at those two pointings, provide the interpolated
-    result for each event's pointing
+    alt0 and alt1 are the alt_tel values (telescope elevation, in radians) of
+    the closest and second-closest training MC pointings (closest in elevation,
+    on the same side of culmination) for each event in the table. The values
+    w0 and w1 are the corresponding weights that, multiplied by the RF
+    predictions at those two pointings, provide the interpolated result for
+    each event's pointing
 
     training_zd_deg: array containing the zenith distances (in deg) for the
     MC training nodes
@@ -95,11 +96,15 @@ def add_zd_interpolation_info(dl2table, training_zd_deg, training_az_deg):
                        len(training_alt_rad)).reshape(len(training_alt_rad),
                                                       len(dl2table)).T
 
-    angular_distance = angular_separation(training_az_rad, training_alt_rad,
-                                          tiled_az, tiled_alt)
-    # indices ordered in angular distance to each event:
-    sorted_indices = np.argsort(angular_distance, axis=1)
-
+    delta_alt = np.abs(training_alt_rad - tiled_alt)
+    # mask to select training nodes only on the same side of the source
+    # culmination as the event:
+    same_side_of_culmination = np.sign(np.sin(training_az_rad) *
+                                       np.sin(tiled_az)) > 0
+    # Just fill a large value for pointings on the other side of culmination:
+    delta_alt = np.where(same_side_of_culmination, delta_alt, np.pi/2)
+    # indices ordered according to distance in telescope elevation
+    sorted_indices = np.argsort(delta_alt, axis=1)
     closest_alt = training_alt_rad[sorted_indices[:, 0]]
     second_closest_alt = training_alt_rad[sorted_indices[:, 1]]
 
@@ -108,10 +113,10 @@ def add_zd_interpolation_info(dl2table, training_zd_deg, training_az_deg):
     cos_tel_zd = np.cos(np.pi / 2 - alt_tel)
 
     # Compute the weights that multiplied times the RF predictions at the
-    # closest (0) and 2nd-closest (1) nodes result in the interpolated value.
-    # Take care of cases in which the two closest nodes happen to have the
-    # same zenith (or very close)! (if so, both nodes are set to have equal
-    # weight in the interpolation)
+    # closest (0) and 2nd-closest (1) nodes (in alt_tel) result in the
+    # interpolated value. Take care of cases in which the two closest nodes
+    # happen to have the same zenith (or very close)! (if so, both nodes are
+    # set to have equal weight in the interpolation)
     w1 = np.where(np.isclose(closest_alt, second_closest_alt, atol=1e-4, rtol=0),
                   0.5, (cos_tel_zd - c0) / (c1 - c0))
     w0 = 1 - w1

From 95632d0e77da622ae03ace0d1818d02aafaf32e0 Mon Sep 17 00:00:00 2001
From: Abelardo Moralejo <moralejo@ifae.es>
Date: Wed, 18 Dec 2024 14:39:32 +0100
Subject: [PATCH 20/33] typo

---
 lstchain/reco/dl1_to_dl2.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lstchain/reco/dl1_to_dl2.py b/lstchain/reco/dl1_to_dl2.py
index e553bbb88f..5205c549bd 100644
--- a/lstchain/reco/dl1_to_dl2.py
+++ b/lstchain/reco/dl1_to_dl2.py
@@ -215,7 +215,7 @@ def train_energy(train, custom_config=None):
     Parameters
     ----------
     train: `pandas.DataFrame`
-    custom_config: dictionnary
+    custom_config: dictionary
         Modified configuration to update the standard one
 
     Returns

From c0d6a6bcc42fcacf10e618bcbe5e51a196661d21 Mon Sep 17 00:00:00 2001
From: Abelardo Moralejo Olaizola <moralejo@ifae.es>
Date: Thu, 23 Jan 2025 11:36:50 +0100
Subject: [PATCH 21/33] logger.warning => logger.info

---
 lstchain/scripts/lstchain_dl1_to_dl2.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/lstchain/scripts/lstchain_dl1_to_dl2.py b/lstchain/scripts/lstchain_dl1_to_dl2.py
index 1b985bbe20..38cb5c95b6 100644
--- a/lstchain/scripts/lstchain_dl1_to_dl2.py
+++ b/lstchain/scripts/lstchain_dl1_to_dl2.py
@@ -140,13 +140,13 @@ def apply_to_file(filename, models_dict, output_dir, config, models_path):
     dl1_training_dir = None
     training_pointings = None
     if True in interpolate_rf.values():
-        logger.warning('Cos(zenith) interpolation will be used in:')
+        logger.info('Cos(zenith) interpolation will be used in:')
         if interpolate_energy:
-            logger.warning('   energy reconstruction Random Forest')
+            logger.info('   energy reconstruction Random Forest')
         if interpolate_gammaness:
-            logger.warning('   g/h classification Random Forest')
+            logger.info('   g/h classification Random Forest')
         if interpolate_direction:
-            logger.warning('   direction reconstruction Random Forest')
+            logger.info('   direction reconstruction Random Forest')
 
         if 'random_forest_zd_interpolation' in config.keys():
             zdinter = config['random_forest_zd_interpolation']

From f2bfdc18b45af3bbd8aae4e2e75b091ecb338a9b Mon Sep 17 00:00:00 2001
From: Abelardo Moralejo Olaizola <moralejo@ifae.es>
Date: Thu, 23 Jan 2025 11:38:06 +0100
Subject: [PATCH 22/33] warning => info

---
 lstchain/scripts/lstchain_dl1_to_dl2.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/lstchain/scripts/lstchain_dl1_to_dl2.py b/lstchain/scripts/lstchain_dl1_to_dl2.py
index 38cb5c95b6..67cdb72cd1 100644
--- a/lstchain/scripts/lstchain_dl1_to_dl2.py
+++ b/lstchain/scripts/lstchain_dl1_to_dl2.py
@@ -165,8 +165,8 @@ def apply_to_file(filename, models_dict, output_dir, config, models_path):
             dirs = glob.glob(str(dl1_training_dir) + '/node_corsika*')
             training_az_deg, training_zd_deg = get_training_directions(dirs)
             training_pointings = np.array([training_az_deg, training_zd_deg]).T
-            logger.warning('RF training pointings (az_deg, zd_deg):')
-            logger.warning(training_pointings)
+            logger.info('RF training pointings (az_deg, zd_deg):')
+            logger.info(training_pointings)
         else:
             logger.warning('DL1 training directory not found...')
             logger.warning('Switching off RF interpolation with zenith!')

From 5bb6e4df95f3282877a915bd751bfd7ff9b7a861 Mon Sep 17 00:00:00 2001
From: Abelardo Moralejo Olaizola <moralejo@ifae.es>
Date: Thu, 23 Jan 2025 12:34:35 +0100
Subject: [PATCH 23/33] Changed approach to obtain the pointings used for theRF
 training Now the info is stored with the RF models in an ECVS file, and it is
 simply read from there

---
 lstchain/reco/dl1_to_dl2.py             |  6 +--
 lstchain/scripts/lstchain_dl1_to_dl2.py | 65 +++----------------------
 2 files changed, 9 insertions(+), 62 deletions(-)

diff --git a/lstchain/reco/dl1_to_dl2.py b/lstchain/reco/dl1_to_dl2.py
index 5205c549bd..5530c8b070 100644
--- a/lstchain/reco/dl1_to_dl2.py
+++ b/lstchain/reco/dl1_to_dl2.py
@@ -791,7 +791,7 @@ def apply_models(dl1,
     interpolate_rf: dictionary. Contains three booleans, 'energy_regression',
         'particle_classification', 'disp', indicating which RF predictions
         should be interpolated linearly in cos(zenith)
-    training_pointings: array (# of pointings, 2) azimuth, zenith in degrees;
+    training_pointings: astropy Table azimuth (az), zenith (zd)
          pointings of the MC sample used in the training. Needed for the
          interpolation of RF predictions.
 
@@ -830,8 +830,8 @@ def apply_models(dl1,
 
     if True in interpolate_rf.values():
         # Interpolation of RF predictions is switched on
-        training_az_deg = training_pointings[:, 0]
-        training_zd_deg = training_pointings[:, 1]
+        training_az_deg = training_pointings['az'].to(u.deg).value
+        training_zd_deg = training_pointings['zd'].to(u.deg).value
         dl2 = add_zd_interpolation_info(dl2, training_zd_deg, training_az_deg)
 
     # Reconstruction of Energy and disp_norm distance
diff --git a/lstchain/scripts/lstchain_dl1_to_dl2.py b/lstchain/scripts/lstchain_dl1_to_dl2.py
index 67cdb72cd1..e27667bf36 100644
--- a/lstchain/scripts/lstchain_dl1_to_dl2.py
+++ b/lstchain/scripts/lstchain_dl1_to_dl2.py
@@ -14,6 +14,7 @@
 import numpy as np
 import pandas as pd
 import astropy.units as u
+from astropy.table import Table
 from astropy.coordinates import Angle
 from ctapipe.instrument import SubarrayDescription
 from ctapipe_io_lst import OPTICS
@@ -75,44 +76,6 @@
                     default=None,
                     required=False)
 
-def get_training_directions(training_dirs):
-    """
-    This function obtains the pointings of the telescope in the RF training
-    sample.
-
-    Parameters:
-    -----------
-    training_dirs: array of strings, each element is the name of one of the
-    folders containing the DL1 files used in the training. Order is
-    irrelevant. The folders' names are assumed to follow this pattern:
-
-    node_corsika_theta_34.367_az_69.537_
-
-    (theta is zenith; az is azimuth. Units are degrees, note that the values'
-    field lengths are not fixed)
-
-    Returns: training_az_deg, training_zd_deg arrays containing the azimuth and
-    zenith of the training nodes (in degrees)
-
-    """
-
-    training_zd_deg = []
-    training_az_deg = []
-
-    for dir in training_dirs:
-        c1 = dir.find('_theta_') + 7
-        c2 = dir.find('_az_', c1) + 4
-        c3 = dir.find('_', c2)
-        training_zd_deg.append(float(dir[c1:c2 - 4]))
-        training_az_deg.append(float(dir[c2:c3]))
-
-    training_zd_deg = np.array(training_zd_deg)
-    training_az_deg = np.array(training_az_deg)
-    # The order of the pointings is irrelevant
-
-    return training_az_deg, training_zd_deg
-
-
 def apply_to_file(filename, models_dict, output_dir, config, models_path):
 
     data = pd.read_hdf(filename, key=dl1_params_lstcam_key)
@@ -137,7 +100,6 @@ def apply_to_file(filename, models_dict, output_dir, config, models_path):
                       'disp': interpolate_direction
                       }
 
-    dl1_training_dir = None
     training_pointings = None
     if True in interpolate_rf.values():
         logger.info('Cos(zenith) interpolation will be used in:')
@@ -148,34 +110,19 @@ def apply_to_file(filename, models_dict, output_dir, config, models_path):
         if interpolate_direction:
             logger.info('   direction reconstruction Random Forest')
 
-        if 'random_forest_zd_interpolation' in config.keys():
-            zdinter = config['random_forest_zd_interpolation']
-            if 'dl1_training_dir' in zdinter.keys():
-                dl1_training_dir = zdinter['dl1_training_dir']
-
-        if dl1_training_dir is None:
-            # Build name of DL1 MC training files assuming standard pattern:
-            dummy = models_path.as_posix().replace('/data/models', '/data/mc/DL1')
-            dl1_training_dir = (Path(dummy[:dummy.rfind('/dec')] +
-                                     '/TrainingDataset/GammaDiffuse/' +
-                                     dummy[dummy.rfind('/dec') + 1:]))
-
         # Obtain the training pointings, needed for the RF interpolation:
-        if dl1_training_dir.is_dir():
-            dirs = glob.glob(str(dl1_training_dir) + '/node_corsika*')
-            training_az_deg, training_zd_deg = get_training_directions(dirs)
-            training_pointings = np.array([training_az_deg, training_zd_deg]).T
-            logger.info('RF training pointings (az_deg, zd_deg):')
+        training_pointings_path = Path(models_path, 'training_dirs.ecsv')
+        if training_pointings_path.is_file():
+            training_pointings = Table.read(training_pointings_path)
+            logger.info('RF training pointings:')
             logger.info(training_pointings)
         else:
-            logger.warning('DL1 training directory not found...')
+            logger.warning(f'{training_pointings_path} not found!')
             logger.warning('Switching off RF interpolation with zenith!')
             interpolate_rf['energy_regression'] = False
             interpolate_rf['particle_classification'] = False
             interpolate_rf['disp'] = False
 
-
-
     if 'lh_fit_config' in config.keys():
         lhfit_data = pd.read_hdf(filename, key=dl1_likelihood_params_lstcam_key)
         if np.all(lhfit_data['obs_id'] == data['obs_id']) & np.all(lhfit_data['event_id'] == data['event_id']):

From b0dbbc70003e6a51e898d28d112bc11fc1fc15b3 Mon Sep 17 00:00:00 2001
From: Abelardo Moralejo Olaizola <moralejo@ifae.es>
Date: Thu, 23 Jan 2025 12:36:45 +0100
Subject: [PATCH 24/33] Removed unused setting

---
 lstchain/data/lstchain_standard_config.json | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/lstchain/data/lstchain_standard_config.json b/lstchain/data/lstchain_standard_config.json
index 19a126c5bb..dfdbb516d5 100644
--- a/lstchain/data/lstchain_standard_config.json
+++ b/lstchain/data/lstchain_standard_config.json
@@ -72,8 +72,7 @@
   "random_forest_zd_interpolation": {
     "interpolate_energy": true,
     "interpolate_gammaness": true,
-    "interpolate_direction": true,
-    "DL1_training_dir": null
+    "interpolate_direction": true
   },
 
   "random_forest_energy_regressor_args": {

From f5d519ab3090b0be9cc522a11bdbc58e7700047d Mon Sep 17 00:00:00 2001
From: Abelardo Moralejo Olaizola <moralejo@ifae.es>
Date: Thu, 23 Jan 2025 13:49:41 +0100
Subject: [PATCH 25/33] Some simplifications

---
 lstchain/reco/dl1_to_dl2.py | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/lstchain/reco/dl1_to_dl2.py b/lstchain/reco/dl1_to_dl2.py
index 5530c8b070..70a8b2dc84 100644
--- a/lstchain/reco/dl1_to_dl2.py
+++ b/lstchain/reco/dl1_to_dl2.py
@@ -53,7 +53,7 @@
     'update_disp_with_effective_focal_length'
 ]
 
-def add_zd_interpolation_info(dl2table, training_zd_deg, training_az_deg):
+def add_zd_interpolation_info(dl2table, training_pointings):
     """
     Compute necessary parameters for the interpolation of RF predictions
     between the zenith pointings of the MC data in the training sample on
@@ -69,8 +69,8 @@ def add_zd_interpolation_info(dl2table, training_zd_deg, training_az_deg):
     predictions at those two pointings, provide the interpolated result for
     each event's pointing
 
-    training_zd_deg: array containing the zenith distances (in deg) for the
-    MC training nodes
+    training_pointings: astropy Table containing the pointings (zd,
+    az) of the MC training nodes
 
     training_az_deg: array containing the azimuth angles (in deg) for the
     MC training nodes (a given index in bith arrays corresponds to a given MC
@@ -86,8 +86,8 @@ def add_zd_interpolation_info(dl2table, training_zd_deg, training_az_deg):
     alt_tel = dl2table['alt_tel']
     az_tel  = dl2table['az_tel']
 
-    training_alt_rad = np.pi / 2 - np.deg2rad(training_zd_deg)
-    training_az_rad = np.deg2rad(training_az_deg)
+    training_alt_rad = np.pi / 2 - training_pointings['zd'].to(u.rad).value
+    training_az_rad = training_pointings['az'].to(u.rad).value
 
     tiled_az = np.tile(az_tel,
                        len(training_az_rad)).reshape(len(training_az_rad),
@@ -830,9 +830,7 @@ def apply_models(dl1,
 
     if True in interpolate_rf.values():
         # Interpolation of RF predictions is switched on
-        training_az_deg = training_pointings['az'].to(u.deg).value
-        training_zd_deg = training_pointings['zd'].to(u.deg).value
-        dl2 = add_zd_interpolation_info(dl2, training_zd_deg, training_az_deg)
+        dl2 = add_zd_interpolation_info(dl2, training_pointings)
 
     # Reconstruction of Energy and disp_norm distance
     if isinstance(reg_energy, (str, bytes, Path)):

From c0d90b396d975e4b4062893548f046094f1bfb3f Mon Sep 17 00:00:00 2001
From: Abelardo Moralejo Olaizola <moralejo@ifae.es>
Date: Thu, 23 Jan 2025 13:51:14 +0100
Subject: [PATCH 26/33] Unused import

---
 lstchain/scripts/lstchain_dl1_to_dl2.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/lstchain/scripts/lstchain_dl1_to_dl2.py b/lstchain/scripts/lstchain_dl1_to_dl2.py
index e27667bf36..79df0b5a6e 100644
--- a/lstchain/scripts/lstchain_dl1_to_dl2.py
+++ b/lstchain/scripts/lstchain_dl1_to_dl2.py
@@ -7,7 +7,6 @@
 """
 
 import argparse
-import glob
 from pathlib import Path
 import joblib
 import logging

From 9aabd2b8f204374dba60336a6f520d5c95a10c02 Mon Sep 17 00:00:00 2001
From: Abelardo Moralejo Olaizola <moralejo@ifae.es>
Date: Thu, 23 Jan 2025 13:57:54 +0100
Subject: [PATCH 27/33] Simplification: use np.broadcast

---
 lstchain/reco/dl1_to_dl2.py | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/lstchain/reco/dl1_to_dl2.py b/lstchain/reco/dl1_to_dl2.py
index 70a8b2dc84..d63e790065 100644
--- a/lstchain/reco/dl1_to_dl2.py
+++ b/lstchain/reco/dl1_to_dl2.py
@@ -89,12 +89,10 @@ def add_zd_interpolation_info(dl2table, training_pointings):
     training_alt_rad = np.pi / 2 - training_pointings['zd'].to(u.rad).value
     training_az_rad = training_pointings['az'].to(u.rad).value
 
-    tiled_az = np.tile(az_tel,
-                       len(training_az_rad)).reshape(len(training_az_rad),
-                                                     len(dl2table)).T
-    tiled_alt = np.tile(alt_tel,
-                       len(training_alt_rad)).reshape(len(training_alt_rad),
-                                                      len(dl2table)).T
+    tiled_az = np.broadcast_to(az_tel[:, np.newaxis],
+                               (len(dl2table), len(training_az_rad)))
+    tiled_alt = np.broadcast_to(alt_tel[:, np.newaxis],
+                                (len(dl2table), len(training_az_rad)))
 
     delta_alt = np.abs(training_alt_rad - tiled_alt)
     # mask to select training nodes only on the same side of the source

From 2023bcfaa337ae5f1ec6645a2a44665be8de68b4 Mon Sep 17 00:00:00 2001
From: Abelardo Moralejo Olaizola <moralejo@ifae.es>
Date: Thu, 23 Jan 2025 14:09:28 +0100
Subject: [PATCH 28/33] Simplification

---
 lstchain/scripts/lstchain_dl1_to_dl2.py | 12 +++---------
 1 file changed, 3 insertions(+), 9 deletions(-)

diff --git a/lstchain/scripts/lstchain_dl1_to_dl2.py b/lstchain/scripts/lstchain_dl1_to_dl2.py
index 79df0b5a6e..1af69ac3a8 100644
--- a/lstchain/scripts/lstchain_dl1_to_dl2.py
+++ b/lstchain/scripts/lstchain_dl1_to_dl2.py
@@ -79,20 +79,14 @@ def apply_to_file(filename, models_dict, output_dir, config, models_path):
 
     data = pd.read_hdf(filename, key=dl1_params_lstcam_key)
 
-    interpolate_energy = False
-    interpolate_gammaness = False
-    interpolate_direction = False
     # Read in the settings for the interpolation of Random Forest predictions
     # in cos(zd). If activated this avoids the jumps of performance produced
     # by the discrete set of pointings in the RF training sample.
     if 'random_forest_zd_interpolation' in config.keys():
         zdinter = config['random_forest_zd_interpolation']
-        if 'interpolate_energy' in zdinter.keys():
-            interpolate_energy = zdinter['interpolate_energy']
-        if 'interpolate_gammaness' in zdinter.keys():
-            interpolate_gammaness = zdinter['interpolate_gammaness']
-        if 'interpolate_direction' in zdinter.keys():
-            interpolate_direction = zdinter['interpolate_direction']
+        interpolate_energy = zdinter.get('interpolate_energy', False)
+        interpolate_gammaness = zdinter.get('interpolate_gammaness', False)
+        interpolate_direction = zdinter.get('interpolate_direction', False)
 
     interpolate_rf = {'energy_regression': interpolate_energy,
                       'particle_classification': interpolate_gammaness,

From 3c290052b66766de2352387ca8e33aaf39b26468 Mon Sep 17 00:00:00 2001
From: Abelardo Moralejo Olaizola <moralejo@ifae.es>
Date: Thu, 23 Jan 2025 14:24:43 +0100
Subject: [PATCH 29/33] Various simplifications

---
 lstchain/reco/dl1_to_dl2.py | 47 ++++++++++++++-----------------------
 1 file changed, 17 insertions(+), 30 deletions(-)

diff --git a/lstchain/reco/dl1_to_dl2.py b/lstchain/reco/dl1_to_dl2.py
index d63e790065..317cdcafbf 100644
--- a/lstchain/reco/dl1_to_dl2.py
+++ b/lstchain/reco/dl1_to_dl2.py
@@ -72,16 +72,11 @@ def add_zd_interpolation_info(dl2table, training_pointings):
     training_pointings: astropy Table containing the pointings (zd,
     az) of the MC training nodes
 
-    training_az_deg: array containing the azimuth angles (in deg) for the
-    MC training nodes (a given index in bith arrays corresponds to a given MC
-    pointing)
-
     Returns:
     -------
     DL2 pandas dataframe with additional columns alt0, alt1, w0, w1
 
     """
-    pd.options.mode.copy_on_write = True
 
     alt_tel = dl2table['alt_tel']
     az_tel  = dl2table['az_tel']
@@ -110,8 +105,8 @@ def add_zd_interpolation_info(dl2table, training_pointings):
     c1 = np.cos(np.pi / 2 - second_closest_alt)
     cos_tel_zd = np.cos(np.pi / 2 - alt_tel)
 
-    # Compute the weights that multiplied times the RF predictions at the
-    # closest (0) and 2nd-closest (1) nodes (in alt_tel) result in the
+    # Compute the weights w0, w1 that multiplied times the RF predictions at
+    # the closest (0) and 2nd-closest (1) nodes (in alt_tel) result in the
     # interpolated value. Take care of cases in which the two closest nodes
     # happen to have the same zenith (or very close)! (if so, both nodes are
     # set to have equal weight in the interpolation)
@@ -120,10 +115,11 @@ def add_zd_interpolation_info(dl2table, training_pointings):
     w0 = 1 - w1
 
     # Update the dataframe:
-    dl2table = dl2table.assign(alt0=pd.Series(closest_alt).values,
-                               alt1=pd.Series(second_closest_alt).values,
-                               w0=pd.Series(w0).values,
-                               w1=pd.Series(w1).values)
+    with pd.option_context('mode.copy_on_write', True):
+        dl2table = dl2table.assign(alt0=pd.Series(closest_alt).values,
+                                   alt1=pd.Series(second_closest_alt).values,
+                                   w0=pd.Series(w0).values,
+                                   w1=pd.Series(w1).values)
 
     return dl2table
 
@@ -171,29 +167,20 @@ def predict_with_zd_interpolation(rf, param_array, features):
     # Type of RF (classifier or regressor):
     is_classifier = isinstance(rf, RandomForestClassifier)
 
-    # keep original alt_tel values:
-    param_array.rename(columns={"alt_tel": "original_alt_tel"}, inplace=True)
-
-    # Set alt_tel to closest MC training node's alt:
-    param_array.rename(columns={"alt0": "alt_tel"}, inplace=True)
+    features_copy = features.copy()
+    alt_index_in_features = features_copy.index('alt_tel')
+    # First use alt_tel of closest MC training node:
+    features_copy[alt_index_in_features] = 'alt0'
     if is_classifier:
-        prediction_0 = rf.predict_proba(param_array[features])
+        prediction_0 = rf.predict_proba(param_array[features_copy])
     else:
-        prediction_0 = rf.predict(param_array[features])
-
-    param_array.rename(columns={"alt_tel": "alt0"}, inplace=True)
-
-    # set alt_tel value to that of second closest node:
-    param_array.rename(columns={"alt1": "alt_tel"}, inplace=True)
+        prediction_0 = rf.predict(param_array[features_copy])
+    # Now the alt_tel value of the second-closest node:
+    features_copy[alt_index_in_features] = 'alt1'
     if is_classifier:
-        prediction_1 = rf.predict_proba(param_array[features])
+        prediction_1 = rf.predict_proba(param_array[features_copy])
     else:
-        prediction_1 = rf.predict(param_array[features])
-
-    param_array.rename(columns={"alt_tel": "alt1"}, inplace=True)
-
-    # Put back original value of alt_tel:
-    param_array.rename(columns={"original_alt_tel": "alt_tel"}, inplace=True)
+        prediction_1 = rf.predict(param_array[features_copy])
 
     # Interpolated RF prediction:
     if is_classifier:

From 43bb86ee0323811b7cb22803c5b3bf68c52ee391 Mon Sep 17 00:00:00 2001
From: Abelardo Moralejo Olaizola <moralejo@ifae.es>
Date: Thu, 23 Jan 2025 16:22:28 +0100
Subject: [PATCH 30/33] Fix problem with RFs expecting the same feature names
 as when they were grown... Cast to numpy array of az and zd (solves error)

---
 lstchain/reco/dl1_to_dl2.py | 35 +++++++++++++++++++++--------------
 1 file changed, 21 insertions(+), 14 deletions(-)

diff --git a/lstchain/reco/dl1_to_dl2.py b/lstchain/reco/dl1_to_dl2.py
index 317cdcafbf..1f8e00234d 100644
--- a/lstchain/reco/dl1_to_dl2.py
+++ b/lstchain/reco/dl1_to_dl2.py
@@ -14,6 +14,7 @@
 import joblib
 import numpy as np
 import pandas as pd
+import warnings
 from astropy.coordinates import SkyCoord, Angle
 from astropy.time import Time
 from pathlib import Path
@@ -78,8 +79,8 @@ def add_zd_interpolation_info(dl2table, training_pointings):
 
     """
 
-    alt_tel = dl2table['alt_tel']
-    az_tel  = dl2table['az_tel']
+    alt_tel = np.array(dl2table['alt_tel'])
+    az_tel  = np.array(dl2table['az_tel'])
 
     training_alt_rad = np.pi / 2 - training_pointings['zd'].to(u.rad).value
     training_az_rad = training_pointings['az'].to(u.rad).value
@@ -169,18 +170,24 @@ def predict_with_zd_interpolation(rf, param_array, features):
 
     features_copy = features.copy()
     alt_index_in_features = features_copy.index('alt_tel')
-    # First use alt_tel of closest MC training node:
-    features_copy[alt_index_in_features] = 'alt0'
-    if is_classifier:
-        prediction_0 = rf.predict_proba(param_array[features_copy])
-    else:
-        prediction_0 = rf.predict(param_array[features_copy])
-    # Now the alt_tel value of the second-closest node:
-    features_copy[alt_index_in_features] = 'alt1'
-    if is_classifier:
-        prediction_1 = rf.predict_proba(param_array[features_copy])
-    else:
-        prediction_1 = rf.predict(param_array[features_copy])
+
+    with warnings.catch_warnings():
+        warnings.simplefilter("ignore")
+        # This is just to avoid the RFs to warn about the features
+        # unnamed (passed as an array). We do this because we want to replace
+        # alt_tel by alt0, then by alt1...
+        # First use alt_tel of closest MC training node:
+        features_copy[alt_index_in_features] = 'alt0'
+        if is_classifier:
+            prediction_0 = rf.predict_proba(param_array[features_copy].to_numpy())
+        else:
+            prediction_0 = rf.predict(param_array[features_copy].to_numpy())
+        # Now the alt_tel value of the second-closest node:
+        features_copy[alt_index_in_features] = 'alt1'
+        if is_classifier:
+            prediction_1 = rf.predict_proba(param_array[features_copy].to_numpy())
+        else:
+            prediction_1 = rf.predict(param_array[features_copy].to_numpy())
 
     # Interpolated RF prediction:
     if is_classifier:

From 9d1c1d3e975e8424079b26ed2b1e78dfbe0d8460 Mon Sep 17 00:00:00 2001
From: moralejo <moralejo@ifae.es>
Date: Fri, 24 Jan 2025 08:58:52 +0000
Subject: [PATCH 31/33] Removed unneeded conversion to Series

---
 lstchain/reco/dl1_to_dl2.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/lstchain/reco/dl1_to_dl2.py b/lstchain/reco/dl1_to_dl2.py
index 1f8e00234d..499cc97766 100644
--- a/lstchain/reco/dl1_to_dl2.py
+++ b/lstchain/reco/dl1_to_dl2.py
@@ -117,10 +117,10 @@ def add_zd_interpolation_info(dl2table, training_pointings):
 
     # Update the dataframe:
     with pd.option_context('mode.copy_on_write', True):
-        dl2table = dl2table.assign(alt0=pd.Series(closest_alt).values,
-                                   alt1=pd.Series(second_closest_alt).values,
-                                   w0=pd.Series(w0).values,
-                                   w1=pd.Series(w1).values)
+        dl2table = dl2table.assign(alt0=closest_alt,
+                                   alt1=second_closest_alt,
+                                   w0=w0,
+                                   w1=w1)
 
     return dl2table
 

From 12f875dcecc529915220093c242854eb0419848c Mon Sep 17 00:00:00 2001
From: Daniel Morcuende <dmorcuende@iaa.es>
Date: Fri, 24 Jan 2025 12:39:25 +0100
Subject: [PATCH 32/33] [skip ci] Apply suggestions from code review (use
 proper docstrings format)

---
 lstchain/reco/dl1_to_dl2.py             | 89 ++++++++++++++-----------
 lstchain/scripts/lstchain_dl1_to_dl2.py |  2 +-
 2 files changed, 50 insertions(+), 41 deletions(-)

diff --git a/lstchain/reco/dl1_to_dl2.py b/lstchain/reco/dl1_to_dl2.py
index 499cc97766..a7eda5c926 100644
--- a/lstchain/reco/dl1_to_dl2.py
+++ b/lstchain/reco/dl1_to_dl2.py
@@ -60,22 +60,25 @@ def add_zd_interpolation_info(dl2table, training_pointings):
     between the zenith pointings of the MC data in the training sample on
     which the RFs were trained.
 
-    Parameters:
-    -----------
-    dl2table: pandas dataframe. Four columns will be added: alt0, alt1, w0, w1
-    alt0 and alt1 are the alt_tel values (telescope elevation, in radians) of
-    the closest and second-closest training MC pointings (closest in elevation,
-    on the same side of culmination) for each event in the table. The values
-    w0 and w1 are the corresponding weights that, multiplied by the RF
-    predictions at those two pointings, provide the interpolated result for
-    each event's pointing
-
-    training_pointings: astropy Table containing the pointings (zd,
-    az) of the MC training nodes
-
-    Returns:
+    Parameters
+    ----------
+    dl2table : pandas.DataFrame
+        DataFrame containing DL2 information, including 'alt_tel' and 'az_tel'.
+        Four columns will be added: alt0, alt1, w0, w1.
+        alt0 and alt1 are the alt_tel values (telescope elevation, in radians) of
+        the closest and second-closest training MC pointings (closest in elevation,
+        on the same side of culmination) for each event in the table. The values
+        w0 and w1 are the corresponding weights that, multiplied by the RF
+        predictions at those two pointings, provide the interpolated result for
+        each event's pointing.
+
+    training_pointings : astropy.table.Table 
+        Table containing the pointings (zd, az) of the MC training nodes.
+
+    Returns
     -------
-    DL2 pandas dataframe with additional columns alt0, alt1, w0, w1
+    pandas.DataFrame
+        Updated DL2 pandas dataframe with additional columns alt0, alt1, w0, w1.
 
     """
 
@@ -143,25 +146,30 @@ def predict_with_zd_interpolation(rf, param_array, features):
     sceond-closest pointing. Then the values are interpolated (linearly in
     cos(zenith)) to the actual zenith pointing (90 deg - alt_tel) of the event.
 
-    rf: sklearn.ensemble.RandomForestRegressor or RandomForestClassifier,
-    the random forest we want to apply (must contain alt_tel among the
-    training parameters)
-
-    param_array: pandas dataframe containing the features needed by theRF
-    It must also contain four additional columns: alt0, alt1, w0, w1, which
-    can be added with the function add_zd_interpolation_info. These are the
-    event-wise telescope elevations for the closest and 2nd-closest training
-    pointings (alt0 and alt1), and the event-wise weights (w0 and w1) which
-    must be applied to the RF prediction at the two pointings to obtain the
-    interpolated value at the actual telescope pointing. Since the weights
-    are the same (for a given event) for different RFs, it does not make
-    sense to compute them here - they are pre-calculated by
-    add_zd_interpolation_info
-
-    features: list of the names of the image features used by the RF
-
-    Return: interpolated RF predictions. 1D array for regressors (log energy,
-    or disp_norm), 2D (events, # of classes) for classifiers
+    Parameters
+    ----------
+    rf : sklearn.ensemble.RandomForestRegressor or RandomForestClassifier,
+        The random forest we want to apply (must contain alt_tel among the
+        training parameters).
+    param_array : pandas.DataFrame
+        Dataframe containing the features needed by the RF.
+        It must also contain four additional columns: alt0, alt1, w0, w1, which
+        can be added with the function add_zd_interpolation_info. These are the
+        event-wise telescope elevations for the closest and 2nd-closest training
+        pointings (alt0 and alt1), and the event-wise weights (w0 and w1) which
+        must be applied to the RF prediction at the two pointings to obtain the
+        interpolated value at the actual telescope pointing. Since the weights
+        are the same (for a given event) for different RFs, it does not make
+        sense to compute them here - they are pre-calculated by
+        `add_zd_interpolation_info`.
+    features : list of str
+        List of the names of the image features used by the RF.
+
+    Return
+    ------
+    numpy.ndarray
+        Interpolated RF predictions. 1D array for regressors (log energy,
+        or disp_norm), 2D (events, # of classes) for classifiers.
 
     """
 
@@ -207,7 +215,7 @@ def train_energy(train, custom_config=None):
     Parameters
     ----------
     train: `pandas.DataFrame`
-    custom_config: dictionary
+    custom_config : dict
         Modified configuration to update the standard one
 
     Returns
@@ -780,12 +788,13 @@ def apply_models(dl1,
     effective_focal_length: `astropy.unit`
     custom_config: dictionary
         Modified configuration to update the standard one
-    interpolate_rf: dictionary. Contains three booleans, 'energy_regression',
+    interpolate_rf : dict
+        Contains three booleans, 'energy_regression',
         'particle_classification', 'disp', indicating which RF predictions
-        should be interpolated linearly in cos(zenith)
-    training_pointings: astropy Table azimuth (az), zenith (zd)
-         pointings of the MC sample used in the training. Needed for the
-         interpolation of RF predictions.
+        should be interpolated linearly in cos(zenith).
+    training_pointings : astropy.table.Table 
+        Table with azimuth (az), zenith (zd) pointings of the MC sample used
+        in the training. Needed for the interpolation of RF predictions.
 
     Returns
     -------
diff --git a/lstchain/scripts/lstchain_dl1_to_dl2.py b/lstchain/scripts/lstchain_dl1_to_dl2.py
index 1af69ac3a8..c54b2f81dc 100644
--- a/lstchain/scripts/lstchain_dl1_to_dl2.py
+++ b/lstchain/scripts/lstchain_dl1_to_dl2.py
@@ -82,7 +82,7 @@ def apply_to_file(filename, models_dict, output_dir, config, models_path):
     # Read in the settings for the interpolation of Random Forest predictions
     # in cos(zd). If activated this avoids the jumps of performance produced
     # by the discrete set of pointings in the RF training sample.
-    if 'random_forest_zd_interpolation' in config.keys():
+    if 'random_forest_zd_interpolation' in config:
         zdinter = config['random_forest_zd_interpolation']
         interpolate_energy = zdinter.get('interpolate_energy', False)
         interpolate_gammaness = zdinter.get('interpolate_gammaness', False)

From 324a8c6633aa5cc1919c68583acdec5860fc38e9 Mon Sep 17 00:00:00 2001
From: Abelardo Moralejo Olaizola <moralejo@ifae.es>
Date: Mon, 27 Jan 2025 15:21:56 +0100
Subject: [PATCH 33/33] Remove default mutable argument

---
 lstchain/reco/dl1_to_dl2.py             |  9 +++--
 lstchain/scripts/lstchain_dl1_to_dl2.py | 54 ++++++++++++-------------
 2 files changed, 32 insertions(+), 31 deletions(-)

diff --git a/lstchain/reco/dl1_to_dl2.py b/lstchain/reco/dl1_to_dl2.py
index a7eda5c926..41529bb90d 100644
--- a/lstchain/reco/dl1_to_dl2.py
+++ b/lstchain/reco/dl1_to_dl2.py
@@ -757,9 +757,7 @@ def apply_models(dl1,
                  cls_disp_sign=None,
                  effective_focal_length=29.30565 * u.m,
                  custom_config=None,
-                 interpolate_rf={'energy_regression': False,
-                                 'particle_classification': False,
-                                 'disp': False},
+                 interpolate_rf=None,
                  training_pointings=None
                  ):
     """
@@ -809,6 +807,11 @@ def apply_models(dl1,
     classification_features = config["particle_classification_features"]
     events_filters = config["events_filters"]
 
+    # If no settings are provided for RF interpolation, it is switched off:
+    if interpolate_rf is None:
+        interpolate_rf = {'energy_regression': False,
+                          'particle_classification': False,
+                          'disp': False}
 
     dl2 = utils.filter_events(dl1,
                               filters=events_filters,
diff --git a/lstchain/scripts/lstchain_dl1_to_dl2.py b/lstchain/scripts/lstchain_dl1_to_dl2.py
index c54b2f81dc..d7b64b108e 100644
--- a/lstchain/scripts/lstchain_dl1_to_dl2.py
+++ b/lstchain/scripts/lstchain_dl1_to_dl2.py
@@ -82,39 +82,37 @@ def apply_to_file(filename, models_dict, output_dir, config, models_path):
     # Read in the settings for the interpolation of Random Forest predictions
     # in cos(zd). If activated this avoids the jumps of performance produced
     # by the discrete set of pointings in the RF training sample.
+
+    interpolate_rf = None     # Default, no interpolation
+    training_pointings = None # Default, no interpolation
     if 'random_forest_zd_interpolation' in config:
         zdinter = config['random_forest_zd_interpolation']
         interpolate_energy = zdinter.get('interpolate_energy', False)
         interpolate_gammaness = zdinter.get('interpolate_gammaness', False)
         interpolate_direction = zdinter.get('interpolate_direction', False)
-
-    interpolate_rf = {'energy_regression': interpolate_energy,
-                      'particle_classification': interpolate_gammaness,
-                      'disp': interpolate_direction
-                      }
-
-    training_pointings = None
-    if True in interpolate_rf.values():
-        logger.info('Cos(zenith) interpolation will be used in:')
-        if interpolate_energy:
-            logger.info('   energy reconstruction Random Forest')
-        if interpolate_gammaness:
-            logger.info('   g/h classification Random Forest')
-        if interpolate_direction:
-            logger.info('   direction reconstruction Random Forest')
-
-        # Obtain the training pointings, needed for the RF interpolation:
-        training_pointings_path = Path(models_path, 'training_dirs.ecsv')
-        if training_pointings_path.is_file():
-            training_pointings = Table.read(training_pointings_path)
-            logger.info('RF training pointings:')
-            logger.info(training_pointings)
-        else:
-            logger.warning(f'{training_pointings_path} not found!')
-            logger.warning('Switching off RF interpolation with zenith!')
-            interpolate_rf['energy_regression'] = False
-            interpolate_rf['particle_classification'] = False
-            interpolate_rf['disp'] = False
+        interpolate_rf = {'energy_regression': interpolate_energy,
+                          'particle_classification': interpolate_gammaness,
+                          'disp': interpolate_direction
+                          }
+        if True in interpolate_rf.values():
+            logger.info('Cos(zenith) interpolation will be used in:')
+            if interpolate_energy:
+                logger.info('   energy reconstruction Random Forest')
+            if interpolate_gammaness:
+                logger.info('   g/h classification Random Forest')
+            if interpolate_direction:
+                logger.info('   direction reconstruction Random Forest')
+
+            # Obtain the training pointings, needed for the RF interpolation:
+            training_pointings_path = Path(models_path, 'training_dirs.ecsv')
+            if training_pointings_path.is_file():
+                training_pointings = Table.read(training_pointings_path)
+                logger.info('RF training pointings:')
+                logger.info(training_pointings)
+            else:
+                logger.warning(f'{training_pointings_path} not found!')
+                logger.warning('Switching off RF interpolation with zenith!')
+                interpolate_rf = None
 
     if 'lh_fit_config' in config.keys():
         lhfit_data = pd.read_hdf(filename, key=dl1_likelihood_params_lstcam_key)