From 9f698ddaf0f08e7099b24285d5ebe3307cf9af7b Mon Sep 17 00:00:00 2001
From: Abelardo Moralejo Olaizola <moralejo@ifae.es>
Date: Fri, 13 Dec 2024 18:55:18 +0100
Subject: [PATCH 01/45] Script to "compute" tailcuts and NSB level to be added
 to the MC Just based on mean pedestal charge in interleaved pedestals, and
 using tabulated data (and a parametrization of NSB level vs <ped charge>)

---
 lstchain/scripts/lstchain_find_tailcuts.py | 184 +++++++++++++++++++++
 1 file changed, 184 insertions(+)
 create mode 100644 lstchain/scripts/lstchain_find_tailcuts.py
diff --git a/lstchain/scripts/lstchain_find_tailcuts.py b/lstchain/scripts/lstchain_find_tailcuts.py
new file mode 100644
index 0000000000..5437322672
--- /dev/null
+++ b/lstchain/scripts/lstchain_find_tailcuts.py
@@ -0,0 +1,184 @@
+#!/usr/bin/env python
+
+"""
+This script uses DL1 files to determine tailcuts which are adequate for the
+bulk of the pixels in a given run. It does so simply based on the median (for
+the whole camera) of the average pixel charge for pedestal events.
+
+For reasons of stability & simplicity of analysis, we cannot decide the
+cleaning levels on a subrun-by-subrun basis. We select values which are ok
+for the whole run.
+
+The script also returns the suggested NSB adjustment needed in the "dark-sky" MC
+to match the data.
+
+The script will process a maximum of 10 subruns of those provided as input,
+distributed uniformly through the run - just for speed reasons: the result
+would hardly change if all subruns are used.
+
+lstchain_find_tailcuts -f "/.../dl1_LST-1.Run12469.????.h5"
+
+The program creates in the output directory a .json file containing the
+cleaning configuration.
+
+"""
+
+import argparse
+import logging
+from pathlib import Path
+import tables
+import glob
+import os
+import numpy as np
+import time
+import sys
+
+from lstchain.paths import parse_dl1_filename
+from lstchain.io.io import dl1_params_lstcam_key, dl1_images_lstcam_key
+from lstchain.io.io import dl1_params_tel_mon_cal_key
+from lstchain.io.config import get_standard_config, dump_config
+
+from ctapipe.io import read_table
+from ctapipe.core import Container, Field
+from ctapipe.io import HDF5TableWriter
+from ctapipe.containers import EventType
+from ctapipe_io_lst import LSTEventSource
+
+parser = argparse.ArgumentParser(description="Tailcut finder")
+
+parser.add_argument('-f', '--dl1-files', dest='dl1_files',
+                    type=str, default='',
+                    help='Input DL1 file names')
+parser.add_argument('-o', '--output-dir', dest='output_dir',
+                    type=Path, default='./',
+                    help='Path to the output directory (default: %(default)s)')
+parser.add_argument('--log', dest='log_file',
+                    type=str, default=None,
+                    help='Log file name')
+
+log = logging.getLogger(__name__)
+
+
+def main():
+    args = parser.parse_args()
+    log.setLevel(logging.INFO)
+    logging.getLogger().addHandler(logging.StreamHandler(sys.stdout))
+
+    output_dir = args.output_dir.absolute()
+    output_dir.mkdir(exist_ok=True, parents=True)
+
+    if args.dl1_files == '':
+        log.error('Please use --dl1-files to provide a valid set of DL1 files!')
+        sys.exit(1)
+
+    all_dl1_files = glob.glob(args.dl1_files)
+    all_dl1_files.sort()
+
+    log_file = args.log_file
+    if log_file is not None:
+        handler = logging.FileHandler(log_file, mode='w')
+        logging.getLogger().addHandler(handler)
+
+    # Number of subruns (uniformly distributed through the run) to be processed:
+    max_number_of_processed_subruns = 10
+
+    # process at most max_number_of_processed_subruns for each run:
+    dl1_files = get_input_files(all_dl1_files, max_number_of_processed_subruns)
+    number_of_pedestals = []
+    usable_pixels = []
+    median_ped_mean_pix_charge = []
+
+    for dl1_file in dl1_files:
+        log.info('\nInput file: %s', dl1_file)
+
+        run_info = parse_dl1_filename(dl1_file)
+        run_id, subrun_id = run_info.run, run_info.subrun
+        summary_info.run_id = run_id
+        summary_info.subrun_id = subrun_id
+
+        data_parameters = read_table(dl1_file, dl1_params_lstcam_key)
+        event_type_data = data_parameters['event_type'].data
+        pedestal_mask = event_type_data == EventType.SKY_PEDESTAL.value
+
+        number_of_pedestals.append(pedestal_mask.sum())
+        data_images = read_table(dl1_file, dl1_images_lstcam_key)
+        data_calib = read_table(dl1_file, dl1_params_tel_mon_cal_key)
+        # data_calib['unusable_pixels'] , indices: (Gain  Calib_id  Pixel)
+
+        # Get the "unusable" flags from the pedcal file:
+        unusable_HG = data_calib['unusable_pixels'][0][0]
+        unusable_LG = data_calib['unusable_pixels'][0][1]
+
+        reliable_pixels = ~(unusable_HG | unusable_LG)
+        usable_pixels.append(reliable_pixels)
+
+        charges_data = data_images['image']
+        charges_pedestals = charges_data[pedestal_mask]
+        mean_ped_charge = np.mean(charges_pedestals, axis=0)
+        median_ped_mean_pix_charge.append(np.median(mean_ped_charge[
+                                                        reliable_pixels]))
+
+    median_ped_mean_pix_charge = np.array(median_ped_mean_pix_charge)
+    number_of_pedestals = np.array(number_of_pedestals)
+
+    # Now compute the mean for all processed subruns (weighted with the
+    # number of pedestals in each subrun)
+
+    qped = np.sum(number_of_pedestals *
+                  median_ped_mean_pix_charge) / np.sum(number_of_pedestals)
+
+    picture_threshold = pic_th(qped)
+    boundary_threshold = picture_threshold / 2
+
+    # We now create a .json files with recommended image cleaning
+    # settings for lstchain_dl1ab.
+    newconfig = get_standard_config()['tailcuts_clean_with_pedestal_threshold']
+    newconfig['picture_thresh'] = picture_threshold
+    newconfig['boundary_thresh'] = boundary_threshold
+    run = int(file.name[file.name.find('Run')+3:-3])
+    json_filename = Path(output_dir, f'dl1ab_Run{run:05d}.json')
+    dump_config({'tailcuts_clean_with_pedestal_threshold': newconfig,
+                 'dynamic_cleaning': get_standard_config()['dynamic_cleaning']},
+                json_filename, overwrite=True)
+    log.info(json_filename)
+    log.info('lstchain_find_tailcuts finished successfully!')
+
+
+def get_input_files(all_dl1_files, max_number_of_processed_subruns):
+    """
+    Reduce the number of DL1 files in all_dl1_files to a maximum of
+    max_number_of_processed_subruns per run
+    """
+
+    runlist = np.unique([parse_dl1_filename(f).run for f in all_dl1_files])
+
+    dl1_files = []
+    for run in runlist:
+        file_list = [f for f in all_dl1_files 
+                     if f.find(f'dl1_LST-1.Run{run:05d}')>0]
+        if len(file_list) <= max_number_of_processed_subruns:
+            dl1_files.extend(file_list)
+            continue
+        step = len(file_list) / max_number_of_processed_subruns
+        k = 0
+        while np.round(k) < len(file_list):
+            dl1_files.append(file_list[int(np.round(k))])
+            k += step
+
+    return dl1_files
+
+
+def pic_th(mean_ped):
+    """
+    mean_ped: mean pixel charge in pedestal events (for the standard
+    LocalPeakWindowSearch algo & settings in lstchain)
+
+    Returns:
+        recommended picture threshold for image cleaning (from a table)
+    """
+    mp_edges = np.array([2.4, 3.1, 3.8, 4.5, 5.2])
+    picture_threshold = np.array([8, 10, 12, 14, 16, 18])
+
+    if mean_ped >= mp_edges[-1]:
+        return picture_threshold[-1]
+    return picture_threshold[np.where(mp_edges>mean_ped)[0][0]]

From 7a707e377d5099d6f64807f6e06c77c7ce55cf3c Mon Sep 17 00:00:00 2001
From: Abelardo Moralejo Olaizola <moralejo@ifae.es>
Date: Fri, 13 Dec 2024 19:03:55 +0100
Subject: [PATCH 02/45] Minor fix

---
 lstchain/scripts/lstchain_find_tailcuts.py | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/lstchain/scripts/lstchain_find_tailcuts.py b/lstchain/scripts/lstchain_find_tailcuts.py
index 5437322672..691b67d7c3 100644
--- a/lstchain/scripts/lstchain_find_tailcuts.py
+++ b/lstchain/scripts/lstchain_find_tailcuts.py
@@ -91,11 +91,6 @@ def main():
     for dl1_file in dl1_files:
         log.info('\nInput file: %s', dl1_file)
 
-        run_info = parse_dl1_filename(dl1_file)
-        run_id, subrun_id = run_info.run, run_info.subrun
-        summary_info.run_id = run_id
-        summary_info.subrun_id = subrun_id
-
         data_parameters = read_table(dl1_file, dl1_params_lstcam_key)
         event_type_data = data_parameters['event_type'].data
         pedestal_mask = event_type_data == EventType.SKY_PEDESTAL.value
@@ -135,8 +130,11 @@ def main():
     newconfig = get_standard_config()['tailcuts_clean_with_pedestal_threshold']
     newconfig['picture_thresh'] = picture_threshold
     newconfig['boundary_thresh'] = boundary_threshold
-    run = int(file.name[file.name.find('Run')+3:-3])
-    json_filename = Path(output_dir, f'dl1ab_Run{run:05d}.json')
+
+    run_info = parse_dl1_filename(dl1_files[0])
+    run_id = run_info.run
+
+    json_filename = Path(output_dir, f'dl1ab_Run{run_id:05d}.json')
     dump_config({'tailcuts_clean_with_pedestal_threshold': newconfig,
                  'dynamic_cleaning': get_standard_config()['dynamic_cleaning']},
                 json_filename, overwrite=True)

From afd9ba6ef8badfab80e0d5e2bfe132792b505046 Mon Sep 17 00:00:00 2001
From: Abelardo Moralejo Olaizola <moralejo@ifae.es>
Date: Fri, 13 Dec 2024 19:14:19 +0100
Subject: [PATCH 03/45] mean => median

---
 lstchain/scripts/lstchain_find_tailcuts.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/lstchain/scripts/lstchain_find_tailcuts.py b/lstchain/scripts/lstchain_find_tailcuts.py
index 691b67d7c3..6719994678 100644
--- a/lstchain/scripts/lstchain_find_tailcuts.py
+++ b/lstchain/scripts/lstchain_find_tailcuts.py
@@ -116,11 +116,11 @@ def main():
     median_ped_mean_pix_charge = np.array(median_ped_mean_pix_charge)
     number_of_pedestals = np.array(number_of_pedestals)
 
-    # Now compute the mean for all processed subruns (weighted with the
-    # number of pedestals in each subrun)
-
-    qped = np.sum(number_of_pedestals *
-                  median_ped_mean_pix_charge) / np.sum(number_of_pedestals)
+    # Now compute the median for all processed subruns, which is more robust
+    # against e.g. subruns affected by car flashes. We also exclude subruns
+    # which have less than half of the median statistics per subrun.
+    good_stats = number_of_pedestals > 0.5 * np.median(number_of_pedestals)
+    qped = np.median(median_ped_mean_pix_charge[good_stats])
 
     picture_threshold = pic_th(qped)
     boundary_threshold = picture_threshold / 2

From 47115f0822f48b4514055c225aa397328b7ff5a6 Mon Sep 17 00:00:00 2001
From: Abelardo Moralejo Olaizola <moralejo@ifae.es>
Date: Fri, 13 Dec 2024 19:21:26 +0100
Subject: [PATCH 04/45] cast to int

---
 lstchain/scripts/lstchain_find_tailcuts.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/lstchain/scripts/lstchain_find_tailcuts.py b/lstchain/scripts/lstchain_find_tailcuts.py
index 6719994678..337c85961a 100644
--- a/lstchain/scripts/lstchain_find_tailcuts.py
+++ b/lstchain/scripts/lstchain_find_tailcuts.py
@@ -128,8 +128,9 @@ def main():
     # We now create a .json files with recommended image cleaning
     # settings for lstchain_dl1ab.
     newconfig = get_standard_config()['tailcuts_clean_with_pedestal_threshold']
-    newconfig['picture_thresh'] = picture_threshold
-    newconfig['boundary_thresh'] = boundary_threshold
+    # casts below are needed, json does not like numpy's int64:
+    newconfig['picture_thresh'] = int(picture_threshold)
+    newconfig['boundary_thresh'] = int(boundary_threshold)
 
     run_info = parse_dl1_filename(dl1_files[0])
     run_id = run_info.run

From 2336cdb2d1baf8cd4efc203ba4ba8b9a193d79ef Mon Sep 17 00:00:00 2001
From: Abelardo Moralejo Olaizola <moralejo@ifae.es>
Date: Mon, 16 Dec 2024 10:00:44 +0100
Subject: [PATCH 05/45] Changed arguments

---
 lstchain/scripts/lstchain_find_tailcuts.py | 21 ++++++++++++---------
 1 file changed, 12 insertions(+), 9 deletions(-)

diff --git a/lstchain/scripts/lstchain_find_tailcuts.py b/lstchain/scripts/lstchain_find_tailcuts.py
index 337c85961a..f7fdc95304 100644
--- a/lstchain/scripts/lstchain_find_tailcuts.py
+++ b/lstchain/scripts/lstchain_find_tailcuts.py
@@ -33,7 +33,7 @@
 import time
 import sys
 
-from lstchain.paths import parse_dl1_filename
+from lstchain.paths import parse_dl1_filename, run_to_dl1_filename
 from lstchain.io.io import dl1_params_lstcam_key, dl1_images_lstcam_key
 from lstchain.io.io import dl1_params_tel_mon_cal_key
 from lstchain.io.config import get_standard_config, dump_config
@@ -46,9 +46,11 @@
 
 parser = argparse.ArgumentParser(description="Tailcut finder")
 
-parser.add_argument('-f', '--dl1-files', dest='dl1_files',
-                    type=str, default='',
-                    help='Input DL1 file names')
+parser.add_argument('-d', '--input-dir', dest='input_dir',
+                    type=Path, default='./',
+                    help='Input DL1 directory')
+parser.add_argument('-r', '--run', dest='run_number',
+                    type=int, help='Run number')
 parser.add_argument('-o', '--output-dir', dest='output_dir',
                     type=Path, default='./',
                     help='Path to the output directory (default: %(default)s)')
@@ -67,11 +69,12 @@ def main():
     output_dir = args.output_dir.absolute()
     output_dir.mkdir(exist_ok=True, parents=True)
 
-    if args.dl1_files == '':
-        log.error('Please use --dl1-files to provide a valid set of DL1 files!')
-        sys.exit(1)
-
-    all_dl1_files = glob.glob(args.dl1_files)
+    input_dir = args.input_dir.absolute()
+    # subrun-wise dl1 file names:
+    dl1_filenames = Path(input_dir,
+                         run_to_dl1_filename(1, args.run_number, 0).replace(
+                                 '.0000.h5', '.????.h5'))
+    all_dl1_files = glob.glob(dl1_filenames)
     all_dl1_files.sort()
 
     log_file = args.log_file

From be287733ca1d948e24d57e51654d8369c0136461 Mon Sep 17 00:00:00 2001
From: Abelardo Moralejo Olaizola <moralejo@ifae.es>
Date: Mon, 16 Dec 2024 10:04:24 +0100
Subject: [PATCH 06/45] Minor fix

---
 lstchain/scripts/lstchain_find_tailcuts.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lstchain/scripts/lstchain_find_tailcuts.py b/lstchain/scripts/lstchain_find_tailcuts.py
index f7fdc95304..57a6c0f2ce 100644
--- a/lstchain/scripts/lstchain_find_tailcuts.py
+++ b/lstchain/scripts/lstchain_find_tailcuts.py
@@ -74,7 +74,7 @@ def main():
     dl1_filenames = Path(input_dir,
                          run_to_dl1_filename(1, args.run_number, 0).replace(
                                  '.0000.h5', '.????.h5'))
-    all_dl1_files = glob.glob(dl1_filenames)
+    all_dl1_files = glob.glob(dl1_filenames.absolute())
     all_dl1_files.sort()
 
     log_file = args.log_file

From 1c8aa49f83a9d5304074594f877dedac1c389fd5 Mon Sep 17 00:00:00 2001
From: Abelardo Moralejo Olaizola <moralejo@ifae.es>
Date: Mon, 16 Dec 2024 10:12:27 +0100
Subject: [PATCH 07/45] Path to str in glob argument

---
 lstchain/scripts/lstchain_find_tailcuts.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lstchain/scripts/lstchain_find_tailcuts.py b/lstchain/scripts/lstchain_find_tailcuts.py
index 57a6c0f2ce..8be668a43f 100644
--- a/lstchain/scripts/lstchain_find_tailcuts.py
+++ b/lstchain/scripts/lstchain_find_tailcuts.py
@@ -74,7 +74,7 @@ def main():
     dl1_filenames = Path(input_dir,
                          run_to_dl1_filename(1, args.run_number, 0).replace(
                                  '.0000.h5', '.????.h5'))
-    all_dl1_files = glob.glob(dl1_filenames.absolute())
+    all_dl1_files = glob.glob(str(dl1_filenames))
     all_dl1_files.sort()
 
     log_file = args.log_file

From fcaad2f8ec9fe10e93c5ebb068d34754a38e3428 Mon Sep 17 00:00:00 2001
From: Abelardo Moralejo Olaizola <moralejo@ifae.es>
Date: Mon, 16 Dec 2024 10:26:44 +0100
Subject: [PATCH 08/45] Simplification on the selection of the subsample of
 subruns

---
 lstchain/scripts/lstchain_find_tailcuts.py | 37 ++++------------------
 1 file changed, 7 insertions(+), 30 deletions(-)

diff --git a/lstchain/scripts/lstchain_find_tailcuts.py b/lstchain/scripts/lstchain_find_tailcuts.py
index 8be668a43f..6588e2e844 100644
--- a/lstchain/scripts/lstchain_find_tailcuts.py
+++ b/lstchain/scripts/lstchain_find_tailcuts.py
@@ -74,6 +74,13 @@ def main():
     dl1_filenames = Path(input_dir,
                          run_to_dl1_filename(1, args.run_number, 0).replace(
                                  '.0000.h5', '.????.h5'))
+    # Aprox. maximum number of subruns (uniformly distributed through the
+    # run) to be processed:
+    max_number_of_processed_subruns = 10
+    # Keep only ~max_number_of_processed_subruns subruns, distributed
+    # along the run:
+    dl1_filenames = dl1_filenames[::int(1+max_number_of_processed_subruns)]
+
     all_dl1_files = glob.glob(str(dl1_filenames))
     all_dl1_files.sort()
 
@@ -82,11 +89,6 @@ def main():
         handler = logging.FileHandler(log_file, mode='w')
         logging.getLogger().addHandler(handler)
 
-    # Number of subruns (uniformly distributed through the run) to be processed:
-    max_number_of_processed_subruns = 10
-
-    # process at most max_number_of_processed_subruns for each run:
-    dl1_files = get_input_files(all_dl1_files, max_number_of_processed_subruns)
     number_of_pedestals = []
     usable_pixels = []
     median_ped_mean_pix_charge = []
@@ -145,31 +147,6 @@ def main():
     log.info(json_filename)
     log.info('lstchain_find_tailcuts finished successfully!')
 
-
-def get_input_files(all_dl1_files, max_number_of_processed_subruns):
-    """
-    Reduce the number of DL1 files in all_dl1_files to a maximum of
-    max_number_of_processed_subruns per run
-    """
-
-    runlist = np.unique([parse_dl1_filename(f).run for f in all_dl1_files])
-
-    dl1_files = []
-    for run in runlist:
-        file_list = [f for f in all_dl1_files 
-                     if f.find(f'dl1_LST-1.Run{run:05d}')>0]
-        if len(file_list) <= max_number_of_processed_subruns:
-            dl1_files.extend(file_list)
-            continue
-        step = len(file_list) / max_number_of_processed_subruns
-        k = 0
-        while np.round(k) < len(file_list):
-            dl1_files.append(file_list[int(np.round(k))])
-            k += step
-
-    return dl1_files
-
-
 def pic_th(mean_ped):
     """
     mean_ped: mean pixel charge in pedestal events (for the standard

From c359bf6494970756ad6fd68c47dc91809b4eb178 Mon Sep 17 00:00:00 2001
From: Abelardo Moralejo Olaizola <moralejo@ifae.es>
Date: Mon, 16 Dec 2024 10:29:46 +0100
Subject: [PATCH 09/45] Fix in subrun selection

---
 lstchain/scripts/lstchain_find_tailcuts.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/lstchain/scripts/lstchain_find_tailcuts.py b/lstchain/scripts/lstchain_find_tailcuts.py
index 6588e2e844..8c116de0c2 100644
--- a/lstchain/scripts/lstchain_find_tailcuts.py
+++ b/lstchain/scripts/lstchain_find_tailcuts.py
@@ -74,15 +74,16 @@ def main():
     dl1_filenames = Path(input_dir,
                          run_to_dl1_filename(1, args.run_number, 0).replace(
                                  '.0000.h5', '.????.h5'))
+    all_dl1_files = glob.glob(str(dl1_filenames))
+    all_dl1_files.sort()
+
     # Aprox. maximum number of subruns (uniformly distributed through the
     # run) to be processed:
     max_number_of_processed_subruns = 10
     # Keep only ~max_number_of_processed_subruns subruns, distributed
     # along the run:
-    dl1_filenames = dl1_filenames[::int(1+max_number_of_processed_subruns)]
-
-    all_dl1_files = glob.glob(str(dl1_filenames))
-    all_dl1_files.sort()
+    dl1_files = all_dl1_files[::int(1+len(all_dl1_files) /
+                                    max_number_of_processed_subruns)]
 
     log_file = args.log_file
     if log_file is not None:

From 0733892afa1f8bf40f89be0001984866f9bd712f Mon Sep 17 00:00:00 2001
From: Abelardo Moralejo Olaizola <moralejo@ifae.es>
Date: Mon, 16 Dec 2024 10:58:21 +0100
Subject: [PATCH 10/45] Refactoring

---
 lstchain/image/cleaning.py                 |  99 +++++++++++++++++++-
 lstchain/scripts/lstchain_find_tailcuts.py | 104 ++-------------------
 2 files changed, 104 insertions(+), 99 deletions(-)

diff --git a/lstchain/image/cleaning.py b/lstchain/image/cleaning.py
index 6fa9da82f7..527613eef7 100644
--- a/lstchain/image/cleaning.py
+++ b/lstchain/image/cleaning.py
@@ -1,6 +1,19 @@
+import glob
+import logging
 import numpy as np
+from pathlib import Path
+from lstchain.paths import run_to_dl1_filename
+from lstchain.io.io import dl1_params_lstcam_key, dl1_images_lstcam_key
+from lstchain.io.io import dl1_params_tel_mon_cal_key
+from lstchain.io.config import get_standard_config
+from ctapipe.containers import EventType
 
-__all__ = ['apply_dynamic_cleaning']
+from ctapipe.io import read_table
+__all__ = ['apply_dynamic_cleaning',
+           'find_tailcuts',
+           'pic_th']
+
+log = logging.getLogger(__name__)
 
 
 def apply_dynamic_cleaning(image, signal_pixels, threshold, fraction):
@@ -36,3 +49,87 @@ def apply_dynamic_cleaning(image, signal_pixels, threshold, fraction):
     mask_dynamic_cleaning = (image >= dynamic_threshold) & signal_pixels
 
     return mask_dynamic_cleaning
+
+
+def find_tailcuts(input_dir, run_number):
+
+    # subrun-wise dl1 file names:
+    dl1_filenames = Path(input_dir,
+                         run_to_dl1_filename(1, run_number, 0).replace(
+                                 '.0000.h5', '.????.h5'))
+    all_dl1_files = glob.glob(str(dl1_filenames))
+    all_dl1_files.sort()
+
+    # Aprox. maximum number of subruns (uniformly distributed through the
+    # run) to be processed:
+    max_number_of_processed_subruns = 10
+    # Keep only ~max_number_of_processed_subruns subruns, distributed
+    # along the run:
+    dl1_files = all_dl1_files[::int(1+len(all_dl1_files) /
+                                    max_number_of_processed_subruns)]
+
+    number_of_pedestals = []
+    usable_pixels = []
+    median_ped_mean_pix_charge = []
+
+    for dl1_file in dl1_files:
+        log.info('\nInput file: %s', dl1_file)
+
+        data_parameters = read_table(dl1_file, dl1_params_lstcam_key)
+        event_type_data = data_parameters['event_type'].data
+        pedestal_mask = event_type_data == EventType.SKY_PEDESTAL.value
+
+        number_of_pedestals.append(pedestal_mask.sum())
+        data_images = read_table(dl1_file, dl1_images_lstcam_key)
+        data_calib = read_table(dl1_file, dl1_params_tel_mon_cal_key)
+        # data_calib['unusable_pixels'] , indices: (Gain  Calib_id  Pixel)
+
+        # Get the "unusable" flags from the pedcal file:
+        unusable_hg = data_calib['unusable_pixels'][0][0]
+        unusable_lg = data_calib['unusable_pixels'][0][1]
+
+        reliable_pixels = ~(unusable_hg | unusable_lg)
+        usable_pixels.append(reliable_pixels)
+
+        charges_data = data_images['image']
+        charges_pedestals = charges_data[pedestal_mask]
+        mean_ped_charge = np.mean(charges_pedestals, axis=0)
+        median_ped_mean_pix_charge.append(np.median(mean_ped_charge[
+                                                        reliable_pixels]))
+
+    median_ped_mean_pix_charge = np.array(median_ped_mean_pix_charge)
+    number_of_pedestals = np.array(number_of_pedestals)
+
+    # Now compute the median for all processed subruns, which is more robust
+    # against e.g. subruns affected by car flashes. We also exclude subruns
+    # which have less than half of the median statistics per subrun.
+    good_stats = number_of_pedestals > 0.5 * np.median(number_of_pedestals)
+    qped = np.median(median_ped_mean_pix_charge[good_stats])
+
+    picture_threshold = pic_th(qped)
+    boundary_threshold = picture_threshold / 2
+
+    # We now create a .json files with recommended image cleaning
+    # settings for lstchain_dl1ab.
+    newconfig = get_standard_config()['tailcuts_clean_with_pedestal_threshold']
+    # casts below are needed, json does not like numpy's int64:
+    newconfig['picture_thresh'] = int(picture_threshold)
+    newconfig['boundary_thresh'] = int(boundary_threshold)
+
+    return newconfig
+
+
+def pic_th(mean_ped):
+    """
+    mean_ped: mean pixel charge in pedestal events (for the standard
+    LocalPeakWindowSearch algo & settings in lstchain)
+
+    Returns:
+        recommended picture threshold for image cleaning (from a table)
+    """
+    mp_edges = np.array([2.4, 3.1, 3.8, 4.5, 5.2])
+    picture_threshold = np.array([8, 10, 12, 14, 16, 18])
+
+    if mean_ped >= mp_edges[-1]:
+        return picture_threshold[-1]
+    return picture_threshold[np.where(mp_edges > mean_ped)[0][0]]
diff --git a/lstchain/scripts/lstchain_find_tailcuts.py b/lstchain/scripts/lstchain_find_tailcuts.py
index 8c116de0c2..3c6a0e34ef 100644
--- a/lstchain/scripts/lstchain_find_tailcuts.py
+++ b/lstchain/scripts/lstchain_find_tailcuts.py
@@ -26,23 +26,10 @@
 import argparse
 import logging
 from pathlib import Path
-import tables
-import glob
-import os
-import numpy as np
-import time
 import sys
 
-from lstchain.paths import parse_dl1_filename, run_to_dl1_filename
-from lstchain.io.io import dl1_params_lstcam_key, dl1_images_lstcam_key
-from lstchain.io.io import dl1_params_tel_mon_cal_key
 from lstchain.io.config import get_standard_config, dump_config
-
-from ctapipe.io import read_table
-from ctapipe.core import Container, Field
-from ctapipe.io import HDF5TableWriter
-from ctapipe.containers import EventType
-from ctapipe_io_lst import LSTEventSource
+from lstchain.image.cleaning import find_tailcuts
 
 parser = argparse.ArgumentParser(description="Tailcut finder")
 
@@ -60,86 +47,22 @@
 
 log = logging.getLogger(__name__)
 
-
 def main():
     args = parser.parse_args()
     log.setLevel(logging.INFO)
     logging.getLogger().addHandler(logging.StreamHandler(sys.stdout))
 
-    output_dir = args.output_dir.absolute()
-    output_dir.mkdir(exist_ok=True, parents=True)
-
-    input_dir = args.input_dir.absolute()
-    # subrun-wise dl1 file names:
-    dl1_filenames = Path(input_dir,
-                         run_to_dl1_filename(1, args.run_number, 0).replace(
-                                 '.0000.h5', '.????.h5'))
-    all_dl1_files = glob.glob(str(dl1_filenames))
-    all_dl1_files.sort()
-
-    # Aprox. maximum number of subruns (uniformly distributed through the
-    # run) to be processed:
-    max_number_of_processed_subruns = 10
-    # Keep only ~max_number_of_processed_subruns subruns, distributed
-    # along the run:
-    dl1_files = all_dl1_files[::int(1+len(all_dl1_files) /
-                                    max_number_of_processed_subruns)]
-
     log_file = args.log_file
     if log_file is not None:
         handler = logging.FileHandler(log_file, mode='w')
         logging.getLogger().addHandler(handler)
 
-    number_of_pedestals = []
-    usable_pixels = []
-    median_ped_mean_pix_charge = []
-
-    for dl1_file in dl1_files:
-        log.info('\nInput file: %s', dl1_file)
-
-        data_parameters = read_table(dl1_file, dl1_params_lstcam_key)
-        event_type_data = data_parameters['event_type'].data
-        pedestal_mask = event_type_data == EventType.SKY_PEDESTAL.value
-
-        number_of_pedestals.append(pedestal_mask.sum())
-        data_images = read_table(dl1_file, dl1_images_lstcam_key)
-        data_calib = read_table(dl1_file, dl1_params_tel_mon_cal_key)
-        # data_calib['unusable_pixels'] , indices: (Gain  Calib_id  Pixel)
-
-        # Get the "unusable" flags from the pedcal file:
-        unusable_HG = data_calib['unusable_pixels'][0][0]
-        unusable_LG = data_calib['unusable_pixels'][0][1]
-
-        reliable_pixels = ~(unusable_HG | unusable_LG)
-        usable_pixels.append(reliable_pixels)
-
-        charges_data = data_images['image']
-        charges_pedestals = charges_data[pedestal_mask]
-        mean_ped_charge = np.mean(charges_pedestals, axis=0)
-        median_ped_mean_pix_charge.append(np.median(mean_ped_charge[
-                                                        reliable_pixels]))
-
-    median_ped_mean_pix_charge = np.array(median_ped_mean_pix_charge)
-    number_of_pedestals = np.array(number_of_pedestals)
-
-    # Now compute the median for all processed subruns, which is more robust
-    # against e.g. subruns affected by car flashes. We also exclude subruns
-    # which have less than half of the median statistics per subrun.
-    good_stats = number_of_pedestals > 0.5 * np.median(number_of_pedestals)
-    qped = np.median(median_ped_mean_pix_charge[good_stats])
-
-    picture_threshold = pic_th(qped)
-    boundary_threshold = picture_threshold / 2
-
-    # We now create a .json files with recommended image cleaning
-    # settings for lstchain_dl1ab.
-    newconfig = get_standard_config()['tailcuts_clean_with_pedestal_threshold']
-    # casts below are needed, json does not like numpy's int64:
-    newconfig['picture_thresh'] = int(picture_threshold)
-    newconfig['boundary_thresh'] = int(boundary_threshold)
+    output_dir = args.output_dir.absolute()
+    output_dir.mkdir(exist_ok=True, parents=True)
+    input_dir = args.input_dir.absolute()
 
-    run_info = parse_dl1_filename(dl1_files[0])
-    run_id = run_info.run
+    run_id = args.run_number
+    newconfig = find_tailcuts(input_dir, run_id)
 
     json_filename = Path(output_dir, f'dl1ab_Run{run_id:05d}.json')
     dump_config({'tailcuts_clean_with_pedestal_threshold': newconfig,
@@ -147,18 +70,3 @@ def main():
                 json_filename, overwrite=True)
     log.info(json_filename)
     log.info('lstchain_find_tailcuts finished successfully!')
-
-def pic_th(mean_ped):
-    """
-    mean_ped: mean pixel charge in pedestal events (for the standard
-    LocalPeakWindowSearch algo & settings in lstchain)
-
-    Returns:
-        recommended picture threshold for image cleaning (from a table)
-    """
-    mp_edges = np.array([2.4, 3.1, 3.8, 4.5, 5.2])
-    picture_threshold = np.array([8, 10, 12, 14, 16, 18])
-
-    if mean_ped >= mp_edges[-1]:
-        return picture_threshold[-1]
-    return picture_threshold[np.where(mp_edges>mean_ped)[0][0]]

From ee6f29fd4762efe38dd1ed5399956a52604a43a5 Mon Sep 17 00:00:00 2001
From: Abelardo Moralejo Olaizola <moralejo@ifae.es>
Date: Mon, 16 Dec 2024 12:45:24 +0100
Subject: [PATCH 11/45] set logging.INFO

---
 lstchain/image/cleaning.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/lstchain/image/cleaning.py b/lstchain/image/cleaning.py
index 527613eef7..b1c839ca42 100644
--- a/lstchain/image/cleaning.py
+++ b/lstchain/image/cleaning.py
@@ -53,6 +53,8 @@ def apply_dynamic_cleaning(image, signal_pixels, threshold, fraction):
 
 def find_tailcuts(input_dir, run_number):
 
+    log.setLevel(logging.INFO)
+
     # subrun-wise dl1 file names:
     dl1_filenames = Path(input_dir,
                          run_to_dl1_filename(1, run_number, 0).replace(

From fb1ca4dc5bbb19061ae051e157dcf5e009557d8e Mon Sep 17 00:00:00 2001
From: Abelardo Moralejo Olaizola <moralejo@ifae.es>
Date: Mon, 16 Dec 2024 15:36:12 +0100
Subject: [PATCH 12/45] Additional NSB computation Improved docstrings

---
 lstchain/image/cleaning.py                 | 66 ++++++++++++++++++++--
 lstchain/scripts/lstchain_find_tailcuts.py |  7 ++-
 2 files changed, 68 insertions(+), 5 deletions(-)

diff --git a/lstchain/image/cleaning.py b/lstchain/image/cleaning.py
index b1c839ca42..6b707ad586 100644
--- a/lstchain/image/cleaning.py
+++ b/lstchain/image/cleaning.py
@@ -7,6 +7,7 @@
 from lstchain.io.io import dl1_params_tel_mon_cal_key
 from lstchain.io.config import get_standard_config
 from ctapipe.containers import EventType
+from scipy.stats import median_abs_deviation
 
 from ctapipe.io import read_table
 __all__ = ['apply_dynamic_cleaning',
@@ -52,6 +53,34 @@ def apply_dynamic_cleaning(image, signal_pixels, threshold, fraction):
 
 
 def find_tailcuts(input_dir, run_number):
+    """
+    This functions uses DL1 files to determine tailcuts which are adequate
+    for the bulk of the pixels in a given run. It does so simply based on the
+    median (for the whole camera) of the median pixel charge for pedestal
+    events.
+    For reasons of stability & simplicity of analysis, we cannot decide the
+    cleaning levels on a subrun-by-subrun basis. We select values which are
+    valid for the whole run.
+    The script also returns the suggested NSB adjustment needed in the
+    "dark-sky" MC to match the data.
+    The script will process a subset of the subruns (~10, hardcoded) of the run,
+    distributed uniformly through it.
+
+    Parameters
+    ----------
+    input_dir: `Path`
+        directory where the DL1 files (subrun-wise, i,e, including
+        DL1a) are stored
+
+    run_number: ìnt`
+        run number to be processed
+
+    Returns
+    -------
+        additional_nsb_rate: p.e./ns rate of NSB to be added to "dark MC" to
+        match the noise in the data
+        newconfig (dict): cleaning configuration for running the DL1ab stage
+    """
 
     log.setLevel(logging.INFO)
 
@@ -118,16 +147,24 @@ def find_tailcuts(input_dir, run_number):
     newconfig['picture_thresh'] = int(picture_threshold)
     newconfig['boundary_thresh'] = int(boundary_threshold)
 
-    return newconfig
+    additional_nsb_rate = get_nsb(qped)
+
+    return additional_nsb_rate, newconfig
 
 
 def pic_th(mean_ped):
     """
-    mean_ped: mean pixel charge in pedestal events (for the standard
-    LocalPeakWindowSearch algo & settings in lstchain)
+    Parameters
+    ----------
+    mean_ped: `float`
+        mean pixel charge in pedestal events (for the standard
+        LocalPeakWindowSearch algo & settings in lstchain)
 
-    Returns:
+    Returns
+    -------
+        `int`
         recommended picture threshold for image cleaning (from a table)
+
     """
     mp_edges = np.array([2.4, 3.1, 3.8, 4.5, 5.2])
     picture_threshold = np.array([8, 10, 12, 14, 16, 18])
@@ -135,3 +172,24 @@ def pic_th(mean_ped):
     if mean_ped >= mp_edges[-1]:
         return picture_threshold[-1]
     return picture_threshold[np.where(mp_edges > mean_ped)[0][0]]
+
+
+def get_nsb(median_ped):
+    """
+    Parameters
+    ----------
+    median_ped: `float`
+        median pixel charge in pedestal events
+
+    Returns
+    -------
+        `float`
+        (from a parabolic parametrization) the recommended additional NSB
+        (in p.e. / ns) that has to be added to the "dark MC" waveforms in
+        order to match the data for which the median pedestal charge is
+        median_ped
+
+    """
+    params = [1.63991237, 0.3130943, 0.07311759]
+    return (params[1] * (median_ped - params[0]) +
+            params[2] * (median_ped - params[0]) ** 2)
diff --git a/lstchain/scripts/lstchain_find_tailcuts.py b/lstchain/scripts/lstchain_find_tailcuts.py
index 3c6a0e34ef..e572e19569 100644
--- a/lstchain/scripts/lstchain_find_tailcuts.py
+++ b/lstchain/scripts/lstchain_find_tailcuts.py
@@ -62,11 +62,16 @@ def main():
     input_dir = args.input_dir.absolute()
 
     run_id = args.run_number
-    newconfig = find_tailcuts(input_dir, run_id)
+    additional_nsb_rate, newconfig = find_tailcuts(input_dir, run_id)
 
     json_filename = Path(output_dir, f'dl1ab_Run{run_id:05d}.json')
     dump_config({'tailcuts_clean_with_pedestal_threshold': newconfig,
                  'dynamic_cleaning': get_standard_config()['dynamic_cleaning']},
                 json_filename, overwrite=True)
     log.info(json_filename)
+    log.info(f'Additional NSB rate (over dark MC): {additional_nsb_rate:.3f} '
+             f'p.e./s')
+
     log.info('lstchain_find_tailcuts finished successfully!')
+
+    return additional_nsb_rate

From ef812ceb18daf169d344140b7b83b4415d9db832 Mon Sep 17 00:00:00 2001
From: Abelardo Moralejo Olaizola <moralejo@ifae.es>
Date: Mon, 16 Dec 2024 15:44:27 +0100
Subject: [PATCH 13/45] Better log message

---
 lstchain/scripts/lstchain_find_tailcuts.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/lstchain/scripts/lstchain_find_tailcuts.py b/lstchain/scripts/lstchain_find_tailcuts.py
index e572e19569..14cf30a21c 100644
--- a/lstchain/scripts/lstchain_find_tailcuts.py
+++ b/lstchain/scripts/lstchain_find_tailcuts.py
@@ -68,10 +68,11 @@ def main():
     dump_config({'tailcuts_clean_with_pedestal_threshold': newconfig,
                  'dynamic_cleaning': get_standard_config()['dynamic_cleaning']},
                 json_filename, overwrite=True)
+    log.info('\nCleaning settings:')
+    log.info(newconfig)
+    log.info('\nWritten to:')
     log.info(json_filename)
-    log.info(f'Additional NSB rate (over dark MC): {additional_nsb_rate:.3f} '
+    log.info(f'\nAdditional NSB rate (over dark MC): {additional_nsb_rate:.4f} '
              f'p.e./s')
 
     log.info('lstchain_find_tailcuts finished successfully!')
-
-    return additional_nsb_rate

From 17a15c5fdeea06b594be55be108bc2fa3451e407 Mon Sep 17 00:00:00 2001
From: Abelardo Moralejo Olaizola <moralejo@ifae.es>
Date: Mon, 16 Dec 2024 16:50:28 +0100
Subject: [PATCH 14/45] Outlier detection

---
 lstchain/image/cleaning.py                 | 54 ++++++++++++++++++----
 lstchain/scripts/lstchain_find_tailcuts.py | 18 +++-----
 2 files changed, 53 insertions(+), 19 deletions(-)

diff --git a/lstchain/image/cleaning.py b/lstchain/image/cleaning.py
index 6b707ad586..277a98fd8a 100644
--- a/lstchain/image/cleaning.py
+++ b/lstchain/image/cleaning.py
@@ -91,6 +91,15 @@ def find_tailcuts(input_dir, run_number):
     all_dl1_files = glob.glob(str(dl1_filenames))
     all_dl1_files.sort()
 
+    # Number of median absolute deviations (mad) away from the median that a
+    # pixel value has to be to be considered an outlier:
+    mad_pixels = 5  # would exclude 8e-4 of the pdf for a gaussian
+
+    # Number of median absolute deviations (mad) away from the median that a
+    # subrun value has to be to be considered an outlier:
+    mad_subruns = 3  # would exclude 0.043 of the pdf for a gaussian
+
+
     # Aprox. maximum number of subruns (uniformly distributed through the
     # run) to be processed:
     max_number_of_processed_subruns = 10
@@ -101,7 +110,7 @@ def find_tailcuts(input_dir, run_number):
 
     number_of_pedestals = []
     usable_pixels = []
-    median_ped_mean_pix_charge = []
+    median_ped_median_pix_charge = []
 
     for dl1_file in dl1_files:
         log.info('\nInput file: %s', dl1_file)
@@ -124,18 +133,47 @@ def find_tailcuts(input_dir, run_number):
 
         charges_data = data_images['image']
         charges_pedestals = charges_data[pedestal_mask]
-        mean_ped_charge = np.mean(charges_pedestals, axis=0)
-        median_ped_mean_pix_charge.append(np.median(mean_ped_charge[
-                                                        reliable_pixels]))
-
-    median_ped_mean_pix_charge = np.array(median_ped_mean_pix_charge)
+        # pixel-wise median charge through the subrun:
+        median_pix_charge = np.nanmedian(charges_pedestals, axis=0)
+        median_pix_charge_dev = median_abs_deviation(charges_pedestals,
+                                                     axis=0,
+                                                     nan_policy='omit')
+        # Just a cut to remove outliers:
+        outliers = (np.abs(charges_pedestals - median_pix_charge) /
+                    median_pix_charge_dev) > mad_pixels
+        if outliers.sum() > 0:
+            log.info(f'    Removed {outliers.sum()} outlier pixels from '
+                     f'pedestal median calculation')
+            # Replace outliers by nans:
+            charges_pedestals = np.where(outliers, np.nan, charges_pedestals)
+            # Recompute the pixel-wise medians ignoring the outliers:
+            median_pix_charge = np.nanmedian(charges_pedestals, axis=0)
+
+        # Now compute the median (for the whole camera) of the medians (for
+        # each pixel) of the charges in pedestal events. Use only reliable
+        # pixels for this:
+        median_ped_median_pix_charge.append(np.nanmedian(median_pix_charge[
+                                                             reliable_pixels]))
+
+    # convert to ndarray:
+    median_ped_median_pix_charge = np.array(median_ped_median_pix_charge)
     number_of_pedestals = np.array(number_of_pedestals)
 
     # Now compute the median for all processed subruns, which is more robust
-    # against e.g. subruns affected by car flashes. We also exclude subruns
+    # against e.g. subruns affected by car flashes. We exclude subruns
     # which have less than half of the median statistics per subrun.
     good_stats = number_of_pedestals > 0.5 * np.median(number_of_pedestals)
-    qped = np.median(median_ped_mean_pix_charge[good_stats])
+    qped = np.nanmedian(median_ped_median_pix_charge[good_stats])
+    # Now we also remove outliers if any:
+    qped_dev = median_abs_deviation(median_ped_median_pix_charge[good_stats])
+    not_outlier = (np.abs(median_ped_median_pix_charge - qped) /
+                   qped_dev) < mad_subruns
+
+    if (~not_outlier).sum() > 0:
+        log.info(f'    Removed {(~not_outlier).sum()} outlier subruns from '
+                 f'pedestal median calculation')
+        # recompute without outliers:
+        qped = np.nanmedian(median_ped_median_pix_charge[good_stats & not_outlier])
 
     picture_threshold = pic_th(qped)
     boundary_threshold = picture_threshold / 2
diff --git a/lstchain/scripts/lstchain_find_tailcuts.py b/lstchain/scripts/lstchain_find_tailcuts.py
index 14cf30a21c..5055adc22d 100644
--- a/lstchain/scripts/lstchain_find_tailcuts.py
+++ b/lstchain/scripts/lstchain_find_tailcuts.py
@@ -3,23 +3,19 @@
 """
 This script uses DL1 files to determine tailcuts which are adequate for the
 bulk of the pixels in a given run. It does so simply based on the median (for
-the whole camera) of the average pixel charge for pedestal events.
+the whole camera) of the median pixel charge for pedestal events.
 
 For reasons of stability & simplicity of analysis, we cannot decide the
 cleaning levels on a subrun-by-subrun basis. We select values which are ok
 for the whole run.
 
-The script also returns the suggested NSB adjustment needed in the "dark-sky" MC
-to match the data.
+The script writes out the cleaning settings to a json file,
+e.g. dl1ab_Run13181.json
+It also returns the suggested NSB adjustment needed in the "dark-sky" MC
+to match the data, in units of p.e./ns.
 
-The script will process a maximum of 10 subruns of those provided as input,
-distributed uniformly through the run - just for speed reasons: the result
-would hardly change if all subruns are used.
-
-lstchain_find_tailcuts -f "/.../dl1_LST-1.Run12469.????.h5"
-
-The program creates in the output directory a .json file containing the
-cleaning configuration.
+lstchain_find_tailcuts -d "/..../DL1/YYYYMMDD/v0.10/tailcut84 -r 13181 --log
+out.log "
 
 """
 

From 2a677b464c2e3cc05e3145276c4e255e03177255 Mon Sep 17 00:00:00 2001
From: Abelardo Moralejo Olaizola <moralejo@ifae.es>
Date: Mon, 16 Dec 2024 17:16:13 +0100
Subject: [PATCH 15/45] Changed outlier detection

---
 lstchain/image/cleaning.py | 23 ++++++++++-------------
 1 file changed, 10 insertions(+), 13 deletions(-)

diff --git a/lstchain/image/cleaning.py b/lstchain/image/cleaning.py
index 277a98fd8a..cc364cd2c5 100644
--- a/lstchain/image/cleaning.py
+++ b/lstchain/image/cleaning.py
@@ -92,13 +92,8 @@ def find_tailcuts(input_dir, run_number):
     all_dl1_files.sort()
 
     # Number of median absolute deviations (mad) away from the median that a
-    # pixel value has to be to be considered an outlier:
-    mad_pixels = 5  # would exclude 8e-4 of the pdf for a gaussian
-
-    # Number of median absolute deviations (mad) away from the median that a
-    # subrun value has to be to be considered an outlier:
-    mad_subruns = 3  # would exclude 0.043 of the pdf for a gaussian
-
+    # value has to be to be considered an outlier:
+    mad_max = 5  # would exclude 8e-4 of the pdf for a gaussian
 
     # Aprox. maximum number of subruns (uniformly distributed through the
     # run) to be processed:
@@ -140,10 +135,11 @@ def find_tailcuts(input_dir, run_number):
                                                      nan_policy='omit')
         # Just a cut to remove outliers:
         outliers = (np.abs(charges_pedestals - median_pix_charge) /
-                    median_pix_charge_dev) > mad_pixels
+                    median_pix_charge_dev) > mad_max
         if outliers.sum() > 0:
-            log.info(f'    Removed {outliers.sum()} outlier pixels from '
-                     f'pedestal median calculation')
+            removed_fraction = outliers.sum() / outliers.size
+            log.info(f'    Removed {removed_fraction:.2%} of pixels (outliers) '
+                     f'from pedestal median calculation')
             # Replace outliers by nans:
             charges_pedestals = np.where(outliers, np.nan, charges_pedestals)
             # Recompute the pixel-wise medians ignoring the outliers:
@@ -167,11 +163,12 @@ def find_tailcuts(input_dir, run_number):
     # Now we also remove outliers if any:
     qped_dev = median_abs_deviation(median_ped_median_pix_charge[good_stats])
     not_outlier = (np.abs(median_ped_median_pix_charge - qped) /
-                   qped_dev) < mad_subruns
+                   qped_dev) < mad_max
 
     if (~not_outlier).sum() > 0:
-        log.info(f'    Removed {(~not_outlier).sum()} outlier subruns from '
-                 f'pedestal median calculation')
+        log.info(f'    Removed {(~not_outlier).sum()} outlier subruns '
+                 f'(out of {not_outlier.size}) from pedestal median '
+                 f'calculation')
         # recompute without outliers:
         qped = np.nanmedian(median_ped_median_pix_charge[good_stats & not_outlier])
 

From 16bffee2b4da29d4e4714def8754d6f4bacb871a Mon Sep 17 00:00:00 2001
From: Abelardo Moralejo Olaizola <moralejo@ifae.es>
Date: Mon, 16 Dec 2024 17:23:25 +0100
Subject: [PATCH 16/45] Improved log message

---
 lstchain/image/cleaning.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/lstchain/image/cleaning.py b/lstchain/image/cleaning.py
index cc364cd2c5..de394a0866 100644
--- a/lstchain/image/cleaning.py
+++ b/lstchain/image/cleaning.py
@@ -133,7 +133,7 @@ def find_tailcuts(input_dir, run_number):
         median_pix_charge_dev = median_abs_deviation(charges_pedestals,
                                                      axis=0,
                                                      nan_policy='omit')
-        # Just a cut to remove outliers:
+        # Just a cut to remove outliers (pixels):
         outliers = (np.abs(charges_pedestals - median_pix_charge) /
                     median_pix_charge_dev) > mad_max
         if outliers.sum() > 0:
@@ -160,13 +160,13 @@ def find_tailcuts(input_dir, run_number):
     # which have less than half of the median statistics per subrun.
     good_stats = number_of_pedestals > 0.5 * np.median(number_of_pedestals)
     qped = np.nanmedian(median_ped_median_pix_charge[good_stats])
-    # Now we also remove outliers if any:
+    # Now we also remove outliers (subruns) if any:
     qped_dev = median_abs_deviation(median_ped_median_pix_charge[good_stats])
     not_outlier = (np.abs(median_ped_median_pix_charge - qped) /
                    qped_dev) < mad_max
 
     if (~not_outlier).sum() > 0:
-        log.info(f'    Removed {(~not_outlier).sum()} outlier subruns '
+        log.info(f'\nRemoved {(~not_outlier).sum()} outlier subruns '
                  f'(out of {not_outlier.size}) from pedestal median '
                  f'calculation')
         # recompute without outliers:

From c115c11dca175493b9b4269a32178c298624676c Mon Sep 17 00:00:00 2001
From: Abelardo Moralejo Olaizola <moralejo@ifae.es>
Date: Mon, 16 Dec 2024 17:31:55 +0100
Subject: [PATCH 17/45] Better log message

---
 lstchain/image/cleaning.py                 | 4 ++++
 lstchain/scripts/lstchain_find_tailcuts.py | 2 +-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/lstchain/image/cleaning.py b/lstchain/image/cleaning.py
index de394a0866..20ea903993 100644
--- a/lstchain/image/cleaning.py
+++ b/lstchain/image/cleaning.py
@@ -165,6 +165,10 @@ def find_tailcuts(input_dir, run_number):
     not_outlier = (np.abs(median_ped_median_pix_charge - qped) /
                    qped_dev) < mad_max
 
+    if (~good_stats).sum() > 0:
+        log.info(f'\nNumber of subruns with low statistics: '
+                 f'{(~good_stats).sum()} - removed from pedestal median '
+                 f'calculation')
     if (~not_outlier).sum() > 0:
         log.info(f'\nRemoved {(~not_outlier).sum()} outlier subruns '
                  f'(out of {not_outlier.size}) from pedestal median '
diff --git a/lstchain/scripts/lstchain_find_tailcuts.py b/lstchain/scripts/lstchain_find_tailcuts.py
index 5055adc22d..4fb4cc7533 100644
--- a/lstchain/scripts/lstchain_find_tailcuts.py
+++ b/lstchain/scripts/lstchain_find_tailcuts.py
@@ -69,6 +69,6 @@ def main():
     log.info('\nWritten to:')
     log.info(json_filename)
     log.info(f'\nAdditional NSB rate (over dark MC): {additional_nsb_rate:.4f} '
-             f'p.e./s')
+             f'p.e./ns')
 
     log.info('lstchain_find_tailcuts finished successfully!')

From 9269f2c4fc6ef232e8aae2004341c7bf4ae16b2f Mon Sep 17 00:00:00 2001
From: Abelardo Moralejo Olaizola <moralejo@ifae.es>
Date: Tue, 17 Dec 2024 09:36:16 +0100
Subject: [PATCH 18/45] Added the median pedestal charge among the returns of
 find_tailcuts

---
 lstchain/image/cleaning.py                 | 2 +-
 lstchain/scripts/lstchain_find_tailcuts.py | 4 +++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/lstchain/image/cleaning.py b/lstchain/image/cleaning.py
index 20ea903993..4ec17d42a1 100644
--- a/lstchain/image/cleaning.py
+++ b/lstchain/image/cleaning.py
@@ -188,7 +188,7 @@ def find_tailcuts(input_dir, run_number):
 
     additional_nsb_rate = get_nsb(qped)
 
-    return additional_nsb_rate, newconfig
+    return qped, additional_nsb_rate, newconfig
 
 
 def pic_th(mean_ped):
diff --git a/lstchain/scripts/lstchain_find_tailcuts.py b/lstchain/scripts/lstchain_find_tailcuts.py
index 4fb4cc7533..e97591fba6 100644
--- a/lstchain/scripts/lstchain_find_tailcuts.py
+++ b/lstchain/scripts/lstchain_find_tailcuts.py
@@ -58,12 +58,14 @@ def main():
     input_dir = args.input_dir.absolute()
 
     run_id = args.run_number
-    additional_nsb_rate, newconfig = find_tailcuts(input_dir, run_id)
+    median_qped, additional_nsb_rate, newconfig = find_tailcuts(input_dir,
+                                                                run_id)
 
     json_filename = Path(output_dir, f'dl1ab_Run{run_id:05d}.json')
     dump_config({'tailcuts_clean_with_pedestal_threshold': newconfig,
                  'dynamic_cleaning': get_standard_config()['dynamic_cleaning']},
                 json_filename, overwrite=True)
+    log.info('\nMedian pedestal charge: {median_qped:.3f} p.e.')
     log.info('\nCleaning settings:')
     log.info(newconfig)
     log.info('\nWritten to:')

From 3fd7b3a92b7b3905a87a49d0c7ee48565add0ec1 Mon Sep 17 00:00:00 2001
From: Abelardo Moralejo Olaizola <moralejo@ifae.es>
Date: Tue, 17 Dec 2024 10:02:05 +0100
Subject: [PATCH 19/45] string formatting

---
 lstchain/scripts/lstchain_find_tailcuts.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lstchain/scripts/lstchain_find_tailcuts.py b/lstchain/scripts/lstchain_find_tailcuts.py
index e97591fba6..c66a82e0c4 100644
--- a/lstchain/scripts/lstchain_find_tailcuts.py
+++ b/lstchain/scripts/lstchain_find_tailcuts.py
@@ -65,7 +65,7 @@ def main():
     dump_config({'tailcuts_clean_with_pedestal_threshold': newconfig,
                  'dynamic_cleaning': get_standard_config()['dynamic_cleaning']},
                 json_filename, overwrite=True)
-    log.info('\nMedian pedestal charge: {median_qped:.3f} p.e.')
+    log.info(f'\nMedian pedestal charge: {median_qped:.3f} p.e.')
     log.info('\nCleaning settings:')
     log.info(newconfig)
     log.info('\nWritten to:')

From cf54e8189dfd0c01f3709a143cd0082a28fdc323 Mon Sep 17 00:00:00 2001
From: Abelardo Moralejo Olaizola <moralejo@ifae.es>
Date: Tue, 17 Dec 2024 16:41:40 +0100
Subject: [PATCH 20/45] New parameters for function to obtain (from pedestal
 charges) the additional NSB needed in the MC. Previous ones were based on the
 mean pedestal charges, with no exclusion of outliers (pixels or subruns). New
 ones are based on the medians computed in the current code (& excluding
 outliers)

---
 lstchain/image/cleaning.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lstchain/image/cleaning.py b/lstchain/image/cleaning.py
index 4ec17d42a1..584b11c4f3 100644
--- a/lstchain/image/cleaning.py
+++ b/lstchain/image/cleaning.py
@@ -229,6 +229,6 @@ def get_nsb(median_ped):
         median_ped
 
     """
-    params = [1.63991237, 0.3130943, 0.07311759]
+    params = [1.39498756, 0.28761336, 0.0858798 ]
     return (params[1] * (median_ped - params[0]) +
             params[2] * (median_ped - params[0]) ** 2)

From 794f992b9c7a63e3b8c530c2a9533d726e4b29cb Mon Sep 17 00:00:00 2001
From: Abelardo Moralejo Olaizola <moralejo@ifae.es>
Date: Wed, 18 Dec 2024 09:50:06 +0100
Subject: [PATCH 21/45] Removed from lstchain_dvr_pixselector.py the creation
 of a json file with cleaning settings. This is now doe by the new script
 lstchain_find_tailcuts.py

---
 lstchain/scripts/lstchain_dvr_pixselector.py | 35 +-------------------
 1 file changed, 1 insertion(+), 34 deletions(-)

diff --git a/lstchain/scripts/lstchain_dvr_pixselector.py b/lstchain/scripts/lstchain_dvr_pixselector.py
index 41adf403db..ea8d92037c 100644
--- a/lstchain/scripts/lstchain_dvr_pixselector.py
+++ b/lstchain/scripts/lstchain_dvr_pixselector.py
@@ -27,12 +27,7 @@
 The program creates in the output directory (which is the current by default) 
 a file DVR_settings_LST-1.Run12469.h5 which contains a table "run_summary" 
 which includes the DVR algorithm parameters determined for each processed 
-subrun. It also creates a file with recommended cleaning settings for running 
-DL1ab, based on the NSB level measured in the processed runs. We use
-as picture threshold the closest even number not smaller than the charge 
-"min_charge_for_certain_selection" (averaged for all subruns and rounded) 
-which is the value from which a pixel will be certainly kept by the Data Volume 
-Reduction.
+subrun.
 
 Then we run again the script over all subruns, and using the option to create
 the pixel maks (this can also be done subrun by subrun, to parallelize the
@@ -506,34 +501,6 @@ def main():
                 time.sleep(300)
                 continue
 
-  
-    # We now create also .json files with recommended image cleaning
-    # settings for lstchain_dl1ab. We determine the picture threshold 
-    # from the values of min_charge_for_certain_selection:
-  
-    if not write_pixel_masks:
-        log.info('Output files:')
-        for file in list_of_output_files:
-            log.info(file)
-            dvrtable = read_table(file, "/run_summary")
-            picture_threshold = get_typical_dvr_min_charge(dvrtable)
-
-            # we round it to an even number of p.e., just to limit the amount 
-            # of different settings in the analysis (i.e. we change 
-            # picture_threshold in steps of 2 p.e.):
-            if picture_threshold % 2 != 0:
-                picture_threshold += 1
-            boundary_threshold = picture_threshold / 2
-            newconfig = get_standard_config()['tailcuts_clean_with_pedestal_threshold']
-            newconfig['picture_thresh'] = picture_threshold
-            newconfig['boundary_thresh'] = boundary_threshold
-            run = int(file.name[file.name.find('Run')+3:-3])
-            json_filename = Path(output_dir, f'dl1ab_Run{run:05d}.json')
-            dump_config({'tailcuts_clean_with_pedestal_threshold': newconfig,
-                         'dynamic_cleaning': get_standard_config()['dynamic_cleaning']},
-                        json_filename, overwrite=True)
-            log.info(json_filename)
-
     log.info('lstchain_dvr_pixselector finished successfully!')
 
 

From 353fb1865c0c35237d7dc29bb3a176711ed2b4c6 Mon Sep 17 00:00:00 2001
From: Abelardo Moralejo Olaizola <moralejo@ifae.es>
Date: Wed, 18 Dec 2024 09:53:14 +0100
Subject: [PATCH 22/45] Remove unused imports

---
 lstchain/scripts/lstchain_dvr_pixselector.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/lstchain/scripts/lstchain_dvr_pixselector.py b/lstchain/scripts/lstchain_dvr_pixselector.py
index ea8d92037c..e6f85d4fb7 100644
--- a/lstchain/scripts/lstchain_dvr_pixselector.py
+++ b/lstchain/scripts/lstchain_dvr_pixselector.py
@@ -63,7 +63,6 @@
 from lstchain.paths import parse_dl1_filename
 from lstchain.io.io import dl1_params_lstcam_key, dl1_images_lstcam_key
 from lstchain.io.io import dl1_params_tel_mon_cal_key
-from lstchain.io.config import get_standard_config, dump_config
 
 from ctapipe.io import read_table
 from ctapipe.core import Container, Field

From b78758c9fea5ff5d3fd7fc9f524f6bdb6f6f4903 Mon Sep 17 00:00:00 2001
From: Abelardo Moralejo Olaizola <moralejo@ifae.es>
Date: Wed, 18 Dec 2024 10:07:58 +0100
Subject: [PATCH 23/45] Check possible wrong input

---
 lstchain/scripts/lstchain_find_tailcuts.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/lstchain/scripts/lstchain_find_tailcuts.py b/lstchain/scripts/lstchain_find_tailcuts.py
index c66a82e0c4..a8bff41cf2 100644
--- a/lstchain/scripts/lstchain_find_tailcuts.py
+++ b/lstchain/scripts/lstchain_find_tailcuts.py
@@ -57,6 +57,14 @@ def main():
     output_dir.mkdir(exist_ok=True, parents=True)
     input_dir = args.input_dir.absolute()
 
+    if not input_dir.exists():
+        logging.error('Input directory does not exist')
+        sys.exit(1)
+
+    if not input_dir.is_dir():
+        logging.error('Input directory is not a directory!')
+        sys.exit(1)
+
     run_id = args.run_number
     median_qped, additional_nsb_rate, newconfig = find_tailcuts(input_dir,
                                                                 run_id)

From 9d88a860fe58c4997f22e0296fe860d1365d61fb Mon Sep 17 00:00:00 2001
From: Abelardo Moralejo Olaizola <moralejo@ifae.es>
Date: Wed, 18 Dec 2024 10:46:44 +0100
Subject: [PATCH 24/45] Check for number of valid pixels

---
 lstchain/image/cleaning.py | 21 +++++++++++++++++----
 1 file changed, 17 insertions(+), 4 deletions(-)

diff --git a/lstchain/image/cleaning.py b/lstchain/image/cleaning.py
index 584b11c4f3..b5fc20f48b 100644
--- a/lstchain/image/cleaning.py
+++ b/lstchain/image/cleaning.py
@@ -95,6 +95,10 @@ def find_tailcuts(input_dir, run_number):
     # value has to be to be considered an outlier:
     mad_max = 5  # would exclude 8e-4 of the pdf for a gaussian
 
+    # minimum number of valid pixels to consider the calculation of NSB level
+    # acceptable:
+    min_number_of_valid_pixels = 1000
+
     # Aprox. maximum number of subruns (uniformly distributed through the
     # run) to be processed:
     max_number_of_processed_subruns = 10
@@ -148,8 +152,14 @@ def find_tailcuts(input_dir, run_number):
         # Now compute the median (for the whole camera) of the medians (for
         # each pixel) of the charges in pedestal events. Use only reliable
         # pixels for this:
-        median_ped_median_pix_charge.append(np.nanmedian(median_pix_charge[
-                                                             reliable_pixels]))
+        n_valid_pixels = np.isfinite(median_pix_charge[reliable_pixels]).sum()
+        if n_valid_pixels < min_number_of_valid_pixels:
+            logging.warning(f'    Too few valid pixels ({n_valid_pixels}) for '
+                            f'calculation!')
+            median_ped_median_pix_charge.append(np.nan)
+        else:
+            median_ped_median_pix_charge.append(np.nanmedian(
+                    median_pix_charge[reliable_pixels]))
 
     # convert to ndarray:
     median_ped_median_pix_charge = np.array(median_ped_median_pix_charge)
@@ -173,8 +183,11 @@ def find_tailcuts(input_dir, run_number):
         log.info(f'\nRemoved {(~not_outlier).sum()} outlier subruns '
                  f'(out of {not_outlier.size}) from pedestal median '
                  f'calculation')
-        # recompute without outliers:
-        qped = np.nanmedian(median_ped_median_pix_charge[good_stats & not_outlier])
+
+    # recompute with good-statistics and well-behaving runs:
+    qped = np.nanmedian(median_ped_median_pix_charge[good_stats & not_outlier])
+    log.info(f'\nNumber of subruns used in calculations: '
+             f'{(good_stats & not_outlier).sum()}')
 
     picture_threshold = pic_th(qped)
     boundary_threshold = picture_threshold / 2

From ac610813902ec481e2706995ee27c0516043eb0e Mon Sep 17 00:00:00 2001
From: Abelardo Moralejo Olaizola <moralejo@ifae.es>
Date: Wed, 18 Dec 2024 10:58:09 +0100
Subject: [PATCH 25/45] Check for number of pedestals in subrun

---
 lstchain/image/cleaning.py | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/lstchain/image/cleaning.py b/lstchain/image/cleaning.py
index b5fc20f48b..1bac7c5a06 100644
--- a/lstchain/image/cleaning.py
+++ b/lstchain/image/cleaning.py
@@ -95,7 +95,11 @@ def find_tailcuts(input_dir, run_number):
     # value has to be to be considered an outlier:
     mad_max = 5  # would exclude 8e-4 of the pdf for a gaussian
 
-    # minimum number of valid pixels to consider the calculation of NSB level
+    # Minimum number of interleaved pedestals in subrun to proceed with
+    # calculation:
+    min_number_of_ped_events = 10
+
+    # Minimum number of valid pixels to consider the calculation of NSB level
     # acceptable:
     min_number_of_valid_pixels = 1000
 
@@ -118,6 +122,11 @@ def find_tailcuts(input_dir, run_number):
         event_type_data = data_parameters['event_type'].data
         pedestal_mask = event_type_data == EventType.SKY_PEDESTAL.value
 
+        num_pedestals = pedestal_mask.sum()
+        if  num_pedestals < min_number_of_ped_events:
+            log.warning(f'    Too few interleaved pedestals ('
+                        f'{num_pedestals}) - skipped subrun!')
+
         number_of_pedestals.append(pedestal_mask.sum())
         data_images = read_table(dl1_file, dl1_images_lstcam_key)
         data_calib = read_table(dl1_file, dl1_params_tel_mon_cal_key)

From c8c47de3c2cb6006609440f3a81df4911892ab8e Mon Sep 17 00:00:00 2001
From: Abelardo Moralejo Olaizola <moralejo@ifae.es>
Date: Wed, 18 Dec 2024 11:00:56 +0100
Subject: [PATCH 26/45] missing continue

---
 lstchain/image/cleaning.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/lstchain/image/cleaning.py b/lstchain/image/cleaning.py
index 1bac7c5a06..37aafd5863 100644
--- a/lstchain/image/cleaning.py
+++ b/lstchain/image/cleaning.py
@@ -126,6 +126,7 @@ def find_tailcuts(input_dir, run_number):
         if  num_pedestals < min_number_of_ped_events:
             log.warning(f'    Too few interleaved pedestals ('
                         f'{num_pedestals}) - skipped subrun!')
+            continue
 
         number_of_pedestals.append(pedestal_mask.sum())
         data_images = read_table(dl1_file, dl1_images_lstcam_key)

From 2913357d984eba3f624ef8da2a0b361fc4c25ecb Mon Sep 17 00:00:00 2001
From: Abelardo Moralejo Olaizola <moralejo@ifae.es>
Date: Wed, 18 Dec 2024 12:05:09 +0100
Subject: [PATCH 27/45] Avoid useless warning from rare empty pixels

---
 lstchain/image/cleaning.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/lstchain/image/cleaning.py b/lstchain/image/cleaning.py
index 37aafd5863..9db70e1287 100644
--- a/lstchain/image/cleaning.py
+++ b/lstchain/image/cleaning.py
@@ -147,6 +147,9 @@ def find_tailcuts(input_dir, run_number):
         median_pix_charge_dev = median_abs_deviation(charges_pedestals,
                                                      axis=0,
                                                      nan_policy='omit')
+        # Avoid later warnings from empty pixels:
+        median_pix_charge_dev = np.where(median_pix_charge_dev > 0,
+                                         median_pix_charge_dev, np.nan)
         # Just a cut to remove outliers (pixels):
         outliers = (np.abs(charges_pedestals - median_pix_charge) /
                     median_pix_charge_dev) > mad_max

From 7486271d5197500b35e9956d75b60104ac953877 Mon Sep 17 00:00:00 2001
From: Abelardo Moralejo Olaizola <moralejo@ifae.es>
Date: Thu, 19 Dec 2024 09:35:01 +0100
Subject: [PATCH 28/45] Modified limits for cleaning level changes. The former
 ones referred to the old version using mean pedestal chages (and including
 outliers). New ones are adapted to the new definitions (medians, no outliers)
 and consistent with the MC productions

---
 lstchain/image/cleaning.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lstchain/image/cleaning.py b/lstchain/image/cleaning.py
index 9db70e1287..e01ce346e6 100644
--- a/lstchain/image/cleaning.py
+++ b/lstchain/image/cleaning.py
@@ -231,7 +231,7 @@ def pic_th(mean_ped):
         recommended picture threshold for image cleaning (from a table)
 
     """
-    mp_edges = np.array([2.4, 3.1, 3.8, 4.5, 5.2])
+    mp_edges = [2.23, 2.88, 3.53, 4.19, 4.84]
     picture_threshold = np.array([8, 10, 12, 14, 16, 18])
 
     if mean_ped >= mp_edges[-1]:

From c02bf8e4a37715e83a29b868138968b8937d111c Mon Sep 17 00:00:00 2001
From: Abelardo Moralejo Olaizola <moralejo@ifae.es>
Date: Thu, 19 Dec 2024 11:08:48 +0100
Subject: [PATCH 29/45] Improved comment

---
 lstchain/scripts/lstchain_find_tailcuts.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/lstchain/scripts/lstchain_find_tailcuts.py b/lstchain/scripts/lstchain_find_tailcuts.py
index a8bff41cf2..36bf625eb2 100644
--- a/lstchain/scripts/lstchain_find_tailcuts.py
+++ b/lstchain/scripts/lstchain_find_tailcuts.py
@@ -12,7 +12,8 @@
 The script writes out the cleaning settings to a json file,
 e.g. dl1ab_Run13181.json
 It also returns the suggested NSB adjustment needed in the "dark-sky" MC
-to match the data, in units of p.e./ns.
+to match the data, in units of p.e./ns, to be applied at the waveforms level,
+i.e  with lstchain_tune_nsb_waveform.py
 
 lstchain_find_tailcuts -d "/..../DL1/YYYYMMDD/v0.10/tailcut84 -r 13181 --log
 out.log "

From 45a0cfaa5b641fcc2382dbc4d80f03918a2b2965 Mon Sep 17 00:00:00 2001
From: Abelardo Moralejo Olaizola <moralejo@ifae.es>
Date: Fri, 20 Dec 2024 09:33:14 +0100
Subject: [PATCH 30/45] Fixed buggy computation of outlier pixels. Previous one
 was not removing pixels, but truncating the charge distribution of all pixels
 (hence biasing the result)

---
 lstchain/image/cleaning.py | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/lstchain/image/cleaning.py b/lstchain/image/cleaning.py
index e01ce346e6..58737cac32 100644
--- a/lstchain/image/cleaning.py
+++ b/lstchain/image/cleaning.py
@@ -142,29 +142,29 @@ def find_tailcuts(input_dir, run_number):
 
         charges_data = data_images['image']
         charges_pedestals = charges_data[pedestal_mask]
-        # pixel-wise median charge through the subrun:
+        # pixel-wise median charge through the subrun (#pixels):
         median_pix_charge = np.nanmedian(charges_pedestals, axis=0)
-        median_pix_charge_dev = median_abs_deviation(charges_pedestals,
-                                                     axis=0,
+        # ignore pixels with 0 signal:
+        median_pix_charge = np.where(median_pix_charge > 0,
+                                     median_pix_charge, np.nan)
+        # median of medians accross camera:
+        median_median_pix_charge = np.nanmedian(median_pix_charge)
+        # mean abs deviation of pixel medians:
+        median_pix_charge_dev = median_abs_deviation(median_pix_charge,
                                                      nan_policy='omit')
-        # Avoid later warnings from empty pixels:
-        median_pix_charge_dev = np.where(median_pix_charge_dev > 0,
-                                         median_pix_charge_dev, np.nan)
+
         # Just a cut to remove outliers (pixels):
-        outliers = (np.abs(charges_pedestals - median_pix_charge) /
+        outliers = (np.abs(median_pix_charge - median_median_pix_charge) /
                     median_pix_charge_dev) > mad_max
+
         if outliers.sum() > 0:
             removed_fraction = outliers.sum() / outliers.size
             log.info(f'    Removed {removed_fraction:.2%} of pixels (outliers) '
                      f'from pedestal median calculation')
-            # Replace outliers by nans:
-            charges_pedestals = np.where(outliers, np.nan, charges_pedestals)
-            # Recompute the pixel-wise medians ignoring the outliers:
-            median_pix_charge = np.nanmedian(charges_pedestals, axis=0)
 
         # Now compute the median (for the whole camera) of the medians (for
         # each pixel) of the charges in pedestal events. Use only reliable
-        # pixels for this:
+        # pixels for this, and exclude outliers:
         n_valid_pixels = np.isfinite(median_pix_charge[reliable_pixels]).sum()
         if n_valid_pixels < min_number_of_valid_pixels:
             logging.warning(f'    Too few valid pixels ({n_valid_pixels}) for '
@@ -172,7 +172,7 @@ def find_tailcuts(input_dir, run_number):
             median_ped_median_pix_charge.append(np.nan)
         else:
             median_ped_median_pix_charge.append(np.nanmedian(
-                    median_pix_charge[reliable_pixels]))
+                    median_pix_charge[reliable_pixels & ~outliers]))
 
     # convert to ndarray:
     median_ped_median_pix_charge = np.array(median_ped_median_pix_charge)

From 8f801354d0e28952ae34458354eb76e9d5a84425 Mon Sep 17 00:00:00 2001
From: Abelardo Moralejo Olaizola <moralejo@ifae.es>
Date: Fri, 20 Dec 2024 10:18:16 +0100
Subject: [PATCH 31/45] Updated parameters of additional_nsb vs. median_qped
 parametrization (to those obtained with the proper pixel outlier exclusion in
 MC)

---
 lstchain/image/cleaning.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lstchain/image/cleaning.py b/lstchain/image/cleaning.py
index 58737cac32..cfaea257c3 100644
--- a/lstchain/image/cleaning.py
+++ b/lstchain/image/cleaning.py
@@ -255,6 +255,6 @@ def get_nsb(median_ped):
         median_ped
 
     """
-    params = [1.39498756, 0.28761336, 0.0858798 ]
+    params = [1.43121978, 0.31553277, 0.07626393]
     return (params[1] * (median_ped - params[0]) +
             params[2] * (median_ped - params[0]) ** 2)

From 3f0749ef233d3f59a48ffdc8b2db8460815b69be Mon Sep 17 00:00:00 2001
From: Abelardo Moralejo Olaizola <moralejo@ifae.es>
Date: Sat, 21 Dec 2024 00:02:45 +0100
Subject: [PATCH 32/45] Replaced median of pixel charge in pedestal events by
 the 95% quantile, which characterizes better the right-side tail of the
 distribution (more relevant for the analysis)

---
 lstchain/image/cleaning.py                 | 96 +++++++++++-----------
 lstchain/scripts/lstchain_find_tailcuts.py |  8 +-
 2 files changed, 55 insertions(+), 49 deletions(-)

diff --git a/lstchain/image/cleaning.py b/lstchain/image/cleaning.py
index cfaea257c3..9f5fc80187 100644
--- a/lstchain/image/cleaning.py
+++ b/lstchain/image/cleaning.py
@@ -54,15 +54,19 @@ def apply_dynamic_cleaning(image, signal_pixels, threshold, fraction):
 
 def find_tailcuts(input_dir, run_number):
     """
-    This functions uses DL1 files to determine tailcuts which are adequate
-    for the bulk of the pixels in a given run. It does so simply based on the
-    median (for the whole camera) of the median pixel charge for pedestal
-    events.
+    This function uses DL1 files to determine tailcuts which are adequate
+    for the bulk of the pixels in a given run. The script also returns the
+    suggested NSB adjustment needed in the "dark-sky" MC to match the data.
+    The function uses the median (for the whole camera, excluding outliers)
+    of the 95% quantile of the pixel charge for pedestal events to deduce the
+    NSB level. It is good to use a large quantile of the pixel charge
+    distribution (vs. e.g. using the median) because what matters for having
+    a relaistic noise simulation is the tail on the right-side, i.e. for
+    large pulses.
     For reasons of stability & simplicity of analysis, we cannot decide the
-    cleaning levels on a subrun-by-subrun basis. We select values which are
-    valid for the whole run.
-    The script also returns the suggested NSB adjustment needed in the
-    "dark-sky" MC to match the data.
+    cleaning levels (or the NSB tuning) on a subrun-by-subrun basis. We select
+    values which are more or less valid for the whole run.
+
     The script will process a subset of the subruns (~10, hardcoded) of the run,
     distributed uniformly through it.
 
@@ -97,7 +101,7 @@ def find_tailcuts(input_dir, run_number):
 
     # Minimum number of interleaved pedestals in subrun to proceed with
     # calculation:
-    min_number_of_ped_events = 10
+    min_number_of_ped_events = 100
 
     # Minimum number of valid pixels to consider the calculation of NSB level
     # acceptable:
@@ -113,7 +117,7 @@ def find_tailcuts(input_dir, run_number):
 
     number_of_pedestals = []
     usable_pixels = []
-    median_ped_median_pix_charge = []
+    median_ped_qt95_pix_charge = []
 
     for dl1_file in dl1_files:
         log.info('\nInput file: %s', dl1_file)
@@ -123,7 +127,7 @@ def find_tailcuts(input_dir, run_number):
         pedestal_mask = event_type_data == EventType.SKY_PEDESTAL.value
 
         num_pedestals = pedestal_mask.sum()
-        if  num_pedestals < min_number_of_ped_events:
+        if num_pedestals < min_number_of_ped_events:
             log.warning(f'    Too few interleaved pedestals ('
                         f'{num_pedestals}) - skipped subrun!')
             continue
@@ -142,50 +146,50 @@ def find_tailcuts(input_dir, run_number):
 
         charges_data = data_images['image']
         charges_pedestals = charges_data[pedestal_mask]
-        # pixel-wise median charge through the subrun (#pixels):
-        median_pix_charge = np.nanmedian(charges_pedestals, axis=0)
+        # pixel-wise 95% quantile of ped pix charge through the subrun
+        # (#pixels):
+        qt95_pix_charge = np.nanquantile(charges_pedestals, 0.95, axis=0)
         # ignore pixels with 0 signal:
-        median_pix_charge = np.where(median_pix_charge > 0,
-                                     median_pix_charge, np.nan)
+        qt95_pix_charge = np.where(qt95_pix_charge > 0, qt95_pix_charge, np.nan)
         # median of medians accross camera:
-        median_median_pix_charge = np.nanmedian(median_pix_charge)
-        # mean abs deviation of pixel medians:
-        median_pix_charge_dev = median_abs_deviation(median_pix_charge,
-                                                     nan_policy='omit')
+        median_qt95_pix_charge = np.nanmedian(qt95_pix_charge)
+        # mean abs deviation of pixel qt95 values:
+        qt95_pix_charge_dev = median_abs_deviation(qt95_pix_charge,
+                                                   nan_policy='omit')
 
         # Just a cut to remove outliers (pixels):
-        outliers = (np.abs(median_pix_charge - median_median_pix_charge) /
-                    median_pix_charge_dev) > mad_max
+        outliers = (np.abs(qt95_pix_charge - median_qt95_pix_charge) /
+                    qt95_pix_charge_dev) > mad_max
 
         if outliers.sum() > 0:
             removed_fraction = outliers.sum() / outliers.size
             log.info(f'    Removed {removed_fraction:.2%} of pixels (outliers) '
                      f'from pedestal median calculation')
 
-        # Now compute the median (for the whole camera) of the medians (for
+        # Now compute the median (for the whole camera) of the qt95's (for
         # each pixel) of the charges in pedestal events. Use only reliable
         # pixels for this, and exclude outliers:
-        n_valid_pixels = np.isfinite(median_pix_charge[reliable_pixels]).sum()
+        n_valid_pixels = np.isfinite(qt95_pix_charge[reliable_pixels]).sum()
         if n_valid_pixels < min_number_of_valid_pixels:
             logging.warning(f'    Too few valid pixels ({n_valid_pixels}) for '
                             f'calculation!')
-            median_ped_median_pix_charge.append(np.nan)
+            median_ped_qt95_pix_charge.append(np.nan)
         else:
-            median_ped_median_pix_charge.append(np.nanmedian(
-                    median_pix_charge[reliable_pixels & ~outliers]))
-
+            median_ped_qt95_pix_charge.append(np.nanmedian(qt95_pix_charge[
+                                                               reliable_pixels &
+                                                               ~outliers]))
     # convert to ndarray:
-    median_ped_median_pix_charge = np.array(median_ped_median_pix_charge)
+    median_ped_qt95_pix_charge = np.array(median_ped_qt95_pix_charge)
     number_of_pedestals = np.array(number_of_pedestals)
 
     # Now compute the median for all processed subruns, which is more robust
     # against e.g. subruns affected by car flashes. We exclude subruns
     # which have less than half of the median statistics per subrun.
     good_stats = number_of_pedestals > 0.5 * np.median(number_of_pedestals)
-    qped = np.nanmedian(median_ped_median_pix_charge[good_stats])
+    qped = np.nanmedian(median_ped_qt95_pix_charge[good_stats])
     # Now we also remove outliers (subruns) if any:
-    qped_dev = median_abs_deviation(median_ped_median_pix_charge[good_stats])
-    not_outlier = (np.abs(median_ped_median_pix_charge - qped) /
+    qped_dev = median_abs_deviation(median_ped_qt95_pix_charge[good_stats])
+    not_outlier = (np.abs(median_ped_qt95_pix_charge - qped) /
                    qped_dev) < mad_max
 
     if (~good_stats).sum() > 0:
@@ -198,7 +202,7 @@ def find_tailcuts(input_dir, run_number):
                  f'calculation')
 
     # recompute with good-statistics and well-behaving runs:
-    qped = np.nanmedian(median_ped_median_pix_charge[good_stats & not_outlier])
+    qped = np.nanmedian(median_ped_qt95_pix_charge[good_stats & not_outlier])
     log.info(f'\nNumber of subruns used in calculations: '
              f'{(good_stats & not_outlier).sum()}')
 
@@ -217,12 +221,12 @@ def find_tailcuts(input_dir, run_number):
     return qped, additional_nsb_rate, newconfig
 
 
-def pic_th(mean_ped):
+def pic_th(qt95_ped):
     """
     Parameters
     ----------
-    mean_ped: `float`
-        mean pixel charge in pedestal events (for the standard
+    qt95_ped: `float`
+        95% quantile of pixel charge in pedestal events (for the standard
         LocalPeakWindowSearch algo & settings in lstchain)
 
     Returns
@@ -231,30 +235,30 @@ def pic_th(mean_ped):
         recommended picture threshold for image cleaning (from a table)
 
     """
-    mp_edges = [2.23, 2.88, 3.53, 4.19, 4.84]
+    mp_edges = np.array([5.85, 7.25, 8.75, 10.3, 11.67])
     picture_threshold = np.array([8, 10, 12, 14, 16, 18])
 
-    if mean_ped >= mp_edges[-1]:
+    if qt95_ped >= mp_edges[-1]:
         return picture_threshold[-1]
-    return picture_threshold[np.where(mp_edges > mean_ped)[0][0]]
+    return picture_threshold[np.where(mp_edges > qt95_ped)[0][0]]
 
 
-def get_nsb(median_ped):
+def get_nsb(qt95_ped):
     """
     Parameters
     ----------
-    median_ped: `float`
-        median pixel charge in pedestal events
+    qt95_ped: `float`
+        95% quantile of pixel charge in pedestal events
 
     Returns
     -------
         `float`
         (from a parabolic parametrization) the recommended additional NSB
         (in p.e. / ns) that has to be added to the "dark MC" waveforms in
-        order to match the data for which the median pedestal charge is
-        median_ped
+        order to match the data for which the 95% quantile of pedestal pixel
+        charge is qt95_ped
 
     """
-    params = [1.43121978, 0.31553277, 0.07626393]
-    return (params[1] * (median_ped - params[0]) +
-            params[2] * (median_ped - params[0]) ** 2)
+    params = [3.95147396, 0.12642504, 0.01718627]
+    return (params[1] * (qt95_ped - params[0]) +
+            params[2] * (qt95_ped - params[0]) ** 2)
diff --git a/lstchain/scripts/lstchain_find_tailcuts.py b/lstchain/scripts/lstchain_find_tailcuts.py
index 36bf625eb2..c5149cbce5 100644
--- a/lstchain/scripts/lstchain_find_tailcuts.py
+++ b/lstchain/scripts/lstchain_find_tailcuts.py
@@ -44,6 +44,7 @@
 
 log = logging.getLogger(__name__)
 
+
 def main():
     args = parser.parse_args()
     log.setLevel(logging.INFO)
@@ -67,14 +68,15 @@ def main():
         sys.exit(1)
 
     run_id = args.run_number
-    median_qped, additional_nsb_rate, newconfig = find_tailcuts(input_dir,
-                                                                run_id)
+    median_qt95_qped, additional_nsb_rate, newconfig = find_tailcuts(input_dir,
+                                                                     run_id)
 
     json_filename = Path(output_dir, f'dl1ab_Run{run_id:05d}.json')
     dump_config({'tailcuts_clean_with_pedestal_threshold': newconfig,
                  'dynamic_cleaning': get_standard_config()['dynamic_cleaning']},
                 json_filename, overwrite=True)
-    log.info(f'\nMedian pedestal charge: {median_qped:.3f} p.e.')
+    log.info(f'\nMedian of 95% quantile of pedestal charge:'
+             f' {median_qt95_qped:.3f} p.e.')
     log.info('\nCleaning settings:')
     log.info(newconfig)
     log.info('\nWritten to:')

From 9f344d409859e70213ea36c0d260fe595d57eaec Mon Sep 17 00:00:00 2001
From: Abelardo Moralejo Olaizola <moralejo@ifae.es>
Date: Sun, 22 Dec 2024 20:09:58 +0100
Subject: [PATCH 33/45] Automatic log file name if not provided

---
 lstchain/scripts/lstchain_find_tailcuts.py | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/lstchain/scripts/lstchain_find_tailcuts.py b/lstchain/scripts/lstchain_find_tailcuts.py
index c5149cbce5..9b4a2dc4a1 100644
--- a/lstchain/scripts/lstchain_find_tailcuts.py
+++ b/lstchain/scripts/lstchain_find_tailcuts.py
@@ -50,13 +50,18 @@ def main():
     log.setLevel(logging.INFO)
     logging.getLogger().addHandler(logging.StreamHandler(sys.stdout))
 
-    log_file = args.log_file
-    if log_file is not None:
-        handler = logging.FileHandler(log_file, mode='w')
-        logging.getLogger().addHandler(handler)
-
+    run_id = args.run_number
     output_dir = args.output_dir.absolute()
     output_dir.mkdir(exist_ok=True, parents=True)
+
+    log_file = args.log_file
+    if log_file is None:
+        log_file = f'log_find_tailcuts_Run{run_id:05d}.log'
+
+    log_file = Path(output_dir, log_file)
+    handler = logging.FileHandler(log_file, mode='w')
+    logging.getLogger().addHandler(handler)
+
     input_dir = args.input_dir.absolute()
 
     if not input_dir.exists():
@@ -67,7 +72,6 @@ def main():
         logging.error('Input directory is not a directory!')
         sys.exit(1)
 
-    run_id = args.run_number
     median_qt95_qped, additional_nsb_rate, newconfig = find_tailcuts(input_dir,
                                                                      run_id)
 

From 66add8b7158c59363eb4336b5ad5b3721f81292e Mon Sep 17 00:00:00 2001
From: Abelardo Moralejo <moralejo@ifae.es>
Date: Wed, 15 Jan 2025 10:38:39 +0100
Subject: [PATCH 34/45] Better treatment of pathological runs

---
 lstchain/image/cleaning.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/lstchain/image/cleaning.py b/lstchain/image/cleaning.py
index 9f5fc80187..891a057378 100644
--- a/lstchain/image/cleaning.py
+++ b/lstchain/image/cleaning.py
@@ -186,7 +186,17 @@ def find_tailcuts(input_dir, run_number):
     # against e.g. subruns affected by car flashes. We exclude subruns
     # which have less than half of the median statistics per subrun.
     good_stats = number_of_pedestals > 0.5 * np.median(number_of_pedestals)
+
+    # First check if we have any valid subruns at all:
+    if np.isfinite(median_ped_qt95_pix_charge[good_stats]).sum() ==  0:
+        qped = np.nan
+        qped_dev = np.nan
+        additional_nsb_rate = np.nan
+        log.error('No valid computation was possible with any of the processed subruns!')
+        return qped, additional_nsb_rate, None
+
     qped = np.nanmedian(median_ped_qt95_pix_charge[good_stats])
+    not_outlier = np.zeros(len(median_ped_qt95_pix_charge), dtype='bool')
     # Now we also remove outliers (subruns) if any:
     qped_dev = median_abs_deviation(median_ped_qt95_pix_charge[good_stats])
     not_outlier = (np.abs(median_ped_qt95_pix_charge - qped) /

From 57725a7ce9149d460ca0e86426fe049208c3e7a3 Mon Sep 17 00:00:00 2001
From: Abelardo Moralejo <moralejo@ifae.es>
Date: Wed, 15 Jan 2025 10:40:58 +0100
Subject: [PATCH 35/45] Update lstchain_find_tailcuts.py

Proper error message if calculation failed
---
 lstchain/scripts/lstchain_find_tailcuts.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/lstchain/scripts/lstchain_find_tailcuts.py b/lstchain/scripts/lstchain_find_tailcuts.py
index 9b4a2dc4a1..58722771d3 100644
--- a/lstchain/scripts/lstchain_find_tailcuts.py
+++ b/lstchain/scripts/lstchain_find_tailcuts.py
@@ -74,7 +74,10 @@ def main():
 
     median_qt95_qped, additional_nsb_rate, newconfig = find_tailcuts(input_dir,
                                                                      run_id)
-
+    if newconfig is None:
+        logging.error('lstchain_find_tailcuts failed!')
+        sys.exit(1)
+  
     json_filename = Path(output_dir, f'dl1ab_Run{run_id:05d}.json')
     dump_config({'tailcuts_clean_with_pedestal_threshold': newconfig,
                  'dynamic_cleaning': get_standard_config()['dynamic_cleaning']},

From 684d2ea61b11dca4ee14a79051f813af0eb21d79 Mon Sep 17 00:00:00 2001
From: Abelardo Moralejo <moralejo@ifae.es>
Date: Wed, 15 Jan 2025 11:29:54 +0100
Subject: [PATCH 36/45] Report fraction of unreliable pixels

---
 lstchain/image/cleaning.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/lstchain/image/cleaning.py b/lstchain/image/cleaning.py
index 891a057378..1882a6b2e6 100644
--- a/lstchain/image/cleaning.py
+++ b/lstchain/image/cleaning.py
@@ -116,7 +116,6 @@ def find_tailcuts(input_dir, run_number):
                                     max_number_of_processed_subruns)]
 
     number_of_pedestals = []
-    usable_pixels = []
     median_ped_qt95_pix_charge = []
 
     for dl1_file in dl1_files:
@@ -141,8 +140,12 @@ def find_tailcuts(input_dir, run_number):
         unusable_hg = data_calib['unusable_pixels'][0][0]
         unusable_lg = data_calib['unusable_pixels'][0][1]
 
-        reliable_pixels = ~(unusable_hg | unusable_lg)
-        usable_pixels.append(reliable_pixels)
+        unreliable_pixels = unusable_hg | unusable_lg
+        if unreliable_pixels.sum() > 0:
+            log.info(f'    Removed {unreliable_pixels.sum()/unreliable_pixels.size:.2%} of pixels '
+                     f'    due to unreliable calibration!')
+        
+        reliable_pixels = ~unreliable_pixels
 
         charges_data = data_images['image']
         charges_pedestals = charges_data[pedestal_mask]

From 9425c835dd57f9a946efc1e5e3b9f42c0d5b3ec2 Mon Sep 17 00:00:00 2001
From: Abelardo Moralejo <moralejo@ifae.es>
Date: Wed, 15 Jan 2025 11:32:59 +0100
Subject: [PATCH 37/45] Remove spaces from logging message

---
 lstchain/image/cleaning.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lstchain/image/cleaning.py b/lstchain/image/cleaning.py
index 1882a6b2e6..ba5b023a64 100644
--- a/lstchain/image/cleaning.py
+++ b/lstchain/image/cleaning.py
@@ -143,7 +143,7 @@ def find_tailcuts(input_dir, run_number):
         unreliable_pixels = unusable_hg | unusable_lg
         if unreliable_pixels.sum() > 0:
             log.info(f'    Removed {unreliable_pixels.sum()/unreliable_pixels.size:.2%} of pixels '
-                     f'    due to unreliable calibration!')
+                     f'due to unreliable calibration!')
         
         reliable_pixels = ~unreliable_pixels
 

From 9259b25b8bc0f181ed8db1d9ef00f433378d7b2b Mon Sep 17 00:00:00 2001
From: Abelardo Moralejo <moralejo@ifae.es>
Date: Wed, 15 Jan 2025 11:35:32 +0100
Subject: [PATCH 38/45] Include run number in message

---
 lstchain/image/cleaning.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lstchain/image/cleaning.py b/lstchain/image/cleaning.py
index ba5b023a64..9acf34b77f 100644
--- a/lstchain/image/cleaning.py
+++ b/lstchain/image/cleaning.py
@@ -195,7 +195,7 @@ def find_tailcuts(input_dir, run_number):
         qped = np.nan
         qped_dev = np.nan
         additional_nsb_rate = np.nan
-        log.error('No valid computation was possible with any of the processed subruns!')
+        log.error('No valid computation was possible for run {run_number} with any of the processed subruns!')
         return qped, additional_nsb_rate, None
 
     qped = np.nanmedian(median_ped_qt95_pix_charge[good_stats])

From 69cef5c9684693dac4408ca8446d6c0b6d74bc56 Mon Sep 17 00:00:00 2001
From: Abelardo Moralejo <moralejo@ifae.es>
Date: Wed, 15 Jan 2025 11:36:57 +0100
Subject: [PATCH 39/45] Fix log message

---
 lstchain/image/cleaning.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lstchain/image/cleaning.py b/lstchain/image/cleaning.py
index 9acf34b77f..005f23c476 100644
--- a/lstchain/image/cleaning.py
+++ b/lstchain/image/cleaning.py
@@ -195,7 +195,7 @@ def find_tailcuts(input_dir, run_number):
         qped = np.nan
         qped_dev = np.nan
         additional_nsb_rate = np.nan
-        log.error('No valid computation was possible for run {run_number} with any of the processed subruns!')
+        log.error(f'No valid computation was possible for run {run_number} with any of the processed subruns!')
         return qped, additional_nsb_rate, None
 
     qped = np.nanmedian(median_ped_qt95_pix_charge[good_stats])

From 0b0eebb84d4acc90ed4e90022e9fe636b4278015 Mon Sep 17 00:00:00 2001
From: Abelardo Moralejo <moralejo@ifae.es>
Date: Thu, 23 Jan 2025 09:35:04 +0100
Subject: [PATCH 40/45] Remove unused "tailcut" settings

To avoid confusion, we set the "tailcut" setting in the json file (which is not used later) to {} (empty), rather than the default 8,4 (picture, boundary). The default appeared in the attrs of the resulting DL1 file, which may lead to confusion. The actually used settings are those in tailcuts_clean_with_pedestal_threshold
---
 lstchain/scripts/lstchain_find_tailcuts.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/lstchain/scripts/lstchain_find_tailcuts.py b/lstchain/scripts/lstchain_find_tailcuts.py
index 58722771d3..0e11bf5110 100644
--- a/lstchain/scripts/lstchain_find_tailcuts.py
+++ b/lstchain/scripts/lstchain_find_tailcuts.py
@@ -80,7 +80,8 @@ def main():
   
     json_filename = Path(output_dir, f'dl1ab_Run{run_id:05d}.json')
     dump_config({'tailcuts_clean_with_pedestal_threshold': newconfig,
-                 'dynamic_cleaning': get_standard_config()['dynamic_cleaning']},
+                 'dynamic_cleaning': get_standard_config()['dynamic_cleaning'],
+                 'tailcut': {}},
                 json_filename, overwrite=True)
     log.info(f'\nMedian of 95% quantile of pedestal charge:'
              f' {median_qt95_qped:.3f} p.e.')

From ea232832c79c626b328d5f14d45d0c926883acb4 Mon Sep 17 00:00:00 2001
From: Abelardo Moralejo <moralejo@ifae.es>
Date: Thu, 23 Jan 2025 11:24:34 +0100
Subject: [PATCH 41/45] Require that at least half of subruns are ok

---
 lstchain/image/cleaning.py | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/lstchain/image/cleaning.py b/lstchain/image/cleaning.py
index 005f23c476..b02b41ebbf 100644
--- a/lstchain/image/cleaning.py
+++ b/lstchain/image/cleaning.py
@@ -214,6 +214,15 @@ def find_tailcuts(input_dir, run_number):
                  f'(out of {not_outlier.size}) from pedestal median '
                  f'calculation')
 
+    # If less than half of the processed files are valid for the final calculation,
+    # we declare it unsuccessful (data probably have problems):
+    if (good_stats & not_outlier).sum() < 0.5 * len(dl1_files):
+        qped = np.nan
+        additional_nsb_rate = np.nan
+        log.error(f'Calculation failed for more than half of the processed subruns!')
+        return qped, additional_nsb_rate, None
+
+           
     # recompute with good-statistics and well-behaving runs:
     qped = np.nanmedian(median_ped_qt95_pix_charge[good_stats & not_outlier])
     log.info(f'\nNumber of subruns used in calculations: '

From fbc5cd19702929702166b57ff7230deee6af3754 Mon Sep 17 00:00:00 2001
From: Abelardo Moralejo <moralejo@ifae.es>
Date: Thu, 23 Jan 2025 11:27:08 +0100
Subject: [PATCH 42/45] Update cleaning.py

---
 lstchain/image/cleaning.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lstchain/image/cleaning.py b/lstchain/image/cleaning.py
index b02b41ebbf..d10c7a9663 100644
--- a/lstchain/image/cleaning.py
+++ b/lstchain/image/cleaning.py
@@ -219,7 +219,7 @@ def find_tailcuts(input_dir, run_number):
     if (good_stats & not_outlier).sum() < 0.5 * len(dl1_files):
         qped = np.nan
         additional_nsb_rate = np.nan
-        log.error(f'Calculation failed for more than half of the processed subruns!')
+        log.error(f'Calculation failed for more than half of the processed subruns of run {run_number}!')
         return qped, additional_nsb_rate, None
 
            

From 24aa61fd2a8f9bf9d97ff8ac1e635e51a191de40 Mon Sep 17 00:00:00 2001
From: Daniel Morcuende <dmorcuende@iaa.es>
Date: Mon, 27 Jan 2025 15:38:22 +0100
Subject: [PATCH 43/45] [skip-ci] Suggestions from code review (docstring
 cleanup, typos)

---
 lstchain/image/cleaning.py                 | 23 +++++++++++-----------
 lstchain/scripts/lstchain_find_tailcuts.py |  9 +++++----
 2 files changed, 17 insertions(+), 15 deletions(-)

diff --git a/lstchain/image/cleaning.py b/lstchain/image/cleaning.py
index d10c7a9663..d419f36184 100644
--- a/lstchain/image/cleaning.py
+++ b/lstchain/image/cleaning.py
@@ -61,7 +61,7 @@ def find_tailcuts(input_dir, run_number):
     of the 95% quantile of the pixel charge for pedestal events to deduce the
     NSB level. It is good to use a large quantile of the pixel charge
     distribution (vs. e.g. using the median) because what matters for having
-    a relaistic noise simulation is the tail on the right-side, i.e. for
+    a realistic noise simulation is the tail on the right-side, i.e. for
     large pulses.
     For reasons of stability & simplicity of analysis, we cannot decide the
     cleaning levels (or the NSB tuning) on a subrun-by-subrun basis. We select
@@ -73,17 +73,18 @@ def find_tailcuts(input_dir, run_number):
     Parameters
     ----------
     input_dir: `Path`
-        directory where the DL1 files (subrun-wise, i,e, including
+        directory where the DL1 files (subrun-wise, i.e., including
         DL1a) are stored
 
-    run_number: ìnt`
+    run_number : int
         run number to be processed
 
     Returns
     -------
-        additional_nsb_rate: p.e./ns rate of NSB to be added to "dark MC" to
-        match the noise in the data
-        newconfig (dict): cleaning configuration for running the DL1ab stage
+    additional_nsb_rate : float
+        p.e./ns rate of NSB to be added to "dark MC" to match the noise in the data
+    newconfig : dict
+        cleaning configuration for running the DL1ab stage
     """
 
     log.setLevel(logging.INFO)
@@ -107,7 +108,7 @@ def find_tailcuts(input_dir, run_number):
     # acceptable:
     min_number_of_valid_pixels = 1000
 
-    # Aprox. maximum number of subruns (uniformly distributed through the
+    # Approx. maximum number of subruns (uniformly distributed through the
     # run) to be processed:
     max_number_of_processed_subruns = 10
     # Keep only ~max_number_of_processed_subruns subruns, distributed
@@ -154,7 +155,7 @@ def find_tailcuts(input_dir, run_number):
         qt95_pix_charge = np.nanquantile(charges_pedestals, 0.95, axis=0)
         # ignore pixels with 0 signal:
         qt95_pix_charge = np.where(qt95_pix_charge > 0, qt95_pix_charge, np.nan)
-        # median of medians accross camera:
+        # median of medians across camera:
         median_qt95_pix_charge = np.nanmedian(qt95_pix_charge)
         # mean abs deviation of pixel qt95 values:
         qt95_pix_charge_dev = median_abs_deviation(qt95_pix_charge,
@@ -247,13 +248,13 @@ def pic_th(qt95_ped):
     """
     Parameters
     ----------
-    qt95_ped: `float`
+    qt95_ped : float
         95% quantile of pixel charge in pedestal events (for the standard
         LocalPeakWindowSearch algo & settings in lstchain)
 
     Returns
     -------
-        `int`
+    int
         recommended picture threshold for image cleaning (from a table)
 
     """
@@ -274,7 +275,7 @@ def get_nsb(qt95_ped):
 
     Returns
     -------
-        `float`
+    float
         (from a parabolic parametrization) the recommended additional NSB
         (in p.e. / ns) that has to be added to the "dark MC" waveforms in
         order to match the data for which the 95% quantile of pedestal pixel
diff --git a/lstchain/scripts/lstchain_find_tailcuts.py b/lstchain/scripts/lstchain_find_tailcuts.py
index 0e11bf5110..a085772675 100644
--- a/lstchain/scripts/lstchain_find_tailcuts.py
+++ b/lstchain/scripts/lstchain_find_tailcuts.py
@@ -15,8 +15,9 @@
 to match the data, in units of p.e./ns, to be applied at the waveforms level,
 i.e  with lstchain_tune_nsb_waveform.py
 
-lstchain_find_tailcuts -d "/..../DL1/YYYYMMDD/v0.10/tailcut84 -r 13181 --log
-out.log "
+Example
+-------
+lstchain_find_tailcuts -d /.../DL1/YYYYMMDD/v0.10/tailcut84 -r 13181 --log out.log
 
 """
 
@@ -30,12 +31,12 @@
 
 parser = argparse.ArgumentParser(description="Tailcut finder")
 
-parser.add_argument('-d', '--input-dir', dest='input_dir',
+parser.add_argument('-d', '--input-dir',
                     type=Path, default='./',
                     help='Input DL1 directory')
 parser.add_argument('-r', '--run', dest='run_number',
                     type=int, help='Run number')
-parser.add_argument('-o', '--output-dir', dest='output_dir',
+parser.add_argument('-o', '--output-dir',
                     type=Path, default='./',
                     help='Path to the output directory (default: %(default)s)')
 parser.add_argument('--log', dest='log_file',

From 9f5df1db79f30ba63b04a9169a6c541226007f06 Mon Sep 17 00:00:00 2001
From: Daniel Morcuende <dmorcuende@iaa.es>
Date: Mon, 27 Jan 2025 15:42:05 +0100
Subject: [PATCH 44/45] Apply suggestions from code review (paths cleanup)

---
 lstchain/scripts/lstchain_find_tailcuts.py | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/lstchain/scripts/lstchain_find_tailcuts.py b/lstchain/scripts/lstchain_find_tailcuts.py
index a085772675..2fe345cd66 100644
--- a/lstchain/scripts/lstchain_find_tailcuts.py
+++ b/lstchain/scripts/lstchain_find_tailcuts.py
@@ -55,11 +55,9 @@ def main():
     output_dir = args.output_dir.absolute()
     output_dir.mkdir(exist_ok=True, parents=True)
 
-    log_file = args.log_file
-    if log_file is None:
-        log_file = f'log_find_tailcuts_Run{run_id:05d}.log'
+    log_file = args.log_file or f'log_find_tailcuts_Run{run_id:05d}.log'
 
-    log_file = Path(output_dir, log_file)
+    log_file = output_dir / log_file
     handler = logging.FileHandler(log_file, mode='w')
     logging.getLogger().addHandler(handler)
 
@@ -79,7 +77,7 @@ def main():
         logging.error('lstchain_find_tailcuts failed!')
         sys.exit(1)
   
-    json_filename = Path(output_dir, f'dl1ab_Run{run_id:05d}.json')
+    json_filename = output_dir / f'dl1ab_Run{run_id:05d}.json'
     dump_config({'tailcuts_clean_with_pedestal_threshold': newconfig,
                  'dynamic_cleaning': get_standard_config()['dynamic_cleaning'],
                  'tailcut': {}},

From 563bc2660aaca05cc719a8ddd120207c08e76123 Mon Sep 17 00:00:00 2001
From: Abelardo Moralejo <moralejo@ifae.es>
Date: Mon, 27 Jan 2025 16:02:15 +0100
Subject: [PATCH 45/45] Removed logging.SetLevel

Set in the script calling this function
---
 lstchain/image/cleaning.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/lstchain/image/cleaning.py b/lstchain/image/cleaning.py
index d419f36184..35044bd475 100644
--- a/lstchain/image/cleaning.py
+++ b/lstchain/image/cleaning.py
@@ -87,8 +87,6 @@ def find_tailcuts(input_dir, run_number):
         cleaning configuration for running the DL1ab stage
     """
 
-    log.setLevel(logging.INFO)
-
     # subrun-wise dl1 file names:
     dl1_filenames = Path(input_dir,
                          run_to_dl1_filename(1, run_number, 0).replace(