Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Tailcut finder function and script #1325

Open
wants to merge 39 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 28 commits
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
9f698dd
Script to "compute" tailcuts and NSB level to be added to the MC
moralejo Dec 13, 2024
7a707e3
Minor fix
moralejo Dec 13, 2024
afd9ba6
mean => median
moralejo Dec 13, 2024
47115f0
cast to int
moralejo Dec 13, 2024
2336cdb
Changed arguments
moralejo Dec 16, 2024
be28773
Minor fix
moralejo Dec 16, 2024
1c8aa49
Path to str in glob argument
moralejo Dec 16, 2024
fcaad2f
Simplification on the selection of the subsample of subruns
moralejo Dec 16, 2024
c359bf6
Fix in subrun selection
moralejo Dec 16, 2024
0733892
Refactoring
moralejo Dec 16, 2024
ee6f29f
set logging.INFO
moralejo Dec 16, 2024
fb1ca4d
Additional NSB computation
moralejo Dec 16, 2024
ef812ce
Better log message
moralejo Dec 16, 2024
17a15c5
Outlier detection
moralejo Dec 16, 2024
2a677b4
Changed outlier detection
moralejo Dec 16, 2024
16bffee
Improved log message
moralejo Dec 16, 2024
c115c11
Better log message
moralejo Dec 16, 2024
9269f2c
Added the median pedestal charge among the returns of find_tailcuts
moralejo Dec 17, 2024
3fd7b3a
string formatting
moralejo Dec 17, 2024
cf54e81
New parameters for function to obtain (from pedestal charges) the add…
moralejo Dec 17, 2024
794f992
Removed from lstchain_dvr_pixselector.py the creation of a json file …
moralejo Dec 18, 2024
353fb18
Remove unused imports
moralejo Dec 18, 2024
b78758c
Check possible wrong input
moralejo Dec 18, 2024
9d88a86
Check for number of valid pixels
moralejo Dec 18, 2024
ac61081
Check for number of pedestals in subrun
moralejo Dec 18, 2024
c8c47de
missing continue
moralejo Dec 18, 2024
2913357
Avoid useless warning from rare empty pixels
moralejo Dec 18, 2024
7486271
Modified limits for cleaning level changes. The former ones referred …
moralejo Dec 19, 2024
c02bf8e
Improved comment
moralejo Dec 19, 2024
45a0cfa
Fixed buggy computation of outlier pixels. Previous one was not removing
moralejo Dec 20, 2024
8f80135
Updated parameters of additional_nsb vs. median_qped parametrization
moralejo Dec 20, 2024
3f0749e
Replaced median of pixel charge in pedestal events by the 95% quantil…
moralejo Dec 20, 2024
9f344d4
Automatic log file name if not provided
moralejo Dec 22, 2024
66add8b
Better treatment of pathological runs
moralejo Jan 15, 2025
57725a7
Update lstchain_find_tailcuts.py
moralejo Jan 15, 2025
684d2ea
Report fraction of unreliable pixels
moralejo Jan 15, 2025
9425c83
Remove spaces from logging message
moralejo Jan 15, 2025
9259b25
Include run number in message
moralejo Jan 15, 2025
69cef5c
Fix log message
moralejo Jan 15, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
224 changes: 223 additions & 1 deletion lstchain/image/cleaning.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,20 @@
import glob
import logging
import numpy as np
from pathlib import Path
from lstchain.paths import run_to_dl1_filename
from lstchain.io.io import dl1_params_lstcam_key, dl1_images_lstcam_key
from lstchain.io.io import dl1_params_tel_mon_cal_key
from lstchain.io.config import get_standard_config
from ctapipe.containers import EventType
from scipy.stats import median_abs_deviation

__all__ = ['apply_dynamic_cleaning']
from ctapipe.io import read_table
__all__ = ['apply_dynamic_cleaning',
'find_tailcuts',
'pic_th']

log = logging.getLogger(__name__)


def apply_dynamic_cleaning(image, signal_pixels, threshold, fraction):
Expand Down Expand Up @@ -36,3 +50,211 @@ def apply_dynamic_cleaning(image, signal_pixels, threshold, fraction):
mask_dynamic_cleaning = (image >= dynamic_threshold) & signal_pixels

return mask_dynamic_cleaning


def find_tailcuts(input_dir, run_number):
"""
This functions uses DL1 files to determine tailcuts which are adequate
for the bulk of the pixels in a given run. It does so simply based on the
median (for the whole camera) of the median pixel charge for pedestal
events.
For reasons of stability & simplicity of analysis, we cannot decide the
cleaning levels on a subrun-by-subrun basis. We select values which are
valid for the whole run.
The script also returns the suggested NSB adjustment needed in the
"dark-sky" MC to match the data.
moralejo marked this conversation as resolved.
Show resolved Hide resolved
The script will process a subset of the subruns (~10, hardcoded) of the run,
distributed uniformly through it.

Parameters
----------
input_dir: `Path`
directory where the DL1 files (subrun-wise, i,e, including
DL1a) are stored

run_number: ìnt`
run number to be processed

Returns
-------
additional_nsb_rate: p.e./ns rate of NSB to be added to "dark MC" to
match the noise in the data
newconfig (dict): cleaning configuration for running the DL1ab stage
"""

log.setLevel(logging.INFO)

# subrun-wise dl1 file names:
dl1_filenames = Path(input_dir,
run_to_dl1_filename(1, run_number, 0).replace(
'.0000.h5', '.????.h5'))
all_dl1_files = glob.glob(str(dl1_filenames))
all_dl1_files.sort()

# Number of median absolute deviations (mad) away from the median that a
# value has to be to be considered an outlier:
mad_max = 5 # would exclude 8e-4 of the pdf for a gaussian

# Minimum number of interleaved pedestals in subrun to proceed with
# calculation:
min_number_of_ped_events = 10

# Minimum number of valid pixels to consider the calculation of NSB level
# acceptable:
min_number_of_valid_pixels = 1000

# Aprox. maximum number of subruns (uniformly distributed through the
# run) to be processed:
max_number_of_processed_subruns = 10
# Keep only ~max_number_of_processed_subruns subruns, distributed
# along the run:
dl1_files = all_dl1_files[::int(1+len(all_dl1_files) /
max_number_of_processed_subruns)]

number_of_pedestals = []
usable_pixels = []
median_ped_median_pix_charge = []

for dl1_file in dl1_files:
log.info('\nInput file: %s', dl1_file)

data_parameters = read_table(dl1_file, dl1_params_lstcam_key)
event_type_data = data_parameters['event_type'].data
pedestal_mask = event_type_data == EventType.SKY_PEDESTAL.value

num_pedestals = pedestal_mask.sum()
if num_pedestals < min_number_of_ped_events:
log.warning(f' Too few interleaved pedestals ('
f'{num_pedestals}) - skipped subrun!')
continue

number_of_pedestals.append(pedestal_mask.sum())
data_images = read_table(dl1_file, dl1_images_lstcam_key)
data_calib = read_table(dl1_file, dl1_params_tel_mon_cal_key)
# data_calib['unusable_pixels'] , indices: (Gain Calib_id Pixel)

# Get the "unusable" flags from the pedcal file:
unusable_hg = data_calib['unusable_pixels'][0][0]
unusable_lg = data_calib['unusable_pixels'][0][1]

reliable_pixels = ~(unusable_hg | unusable_lg)
usable_pixels.append(reliable_pixels)

charges_data = data_images['image']
charges_pedestals = charges_data[pedestal_mask]
# pixel-wise median charge through the subrun:
median_pix_charge = np.nanmedian(charges_pedestals, axis=0)
median_pix_charge_dev = median_abs_deviation(charges_pedestals,
axis=0,
nan_policy='omit')
# Avoid later warnings from empty pixels:
median_pix_charge_dev = np.where(median_pix_charge_dev > 0,
median_pix_charge_dev, np.nan)
# Just a cut to remove outliers (pixels):
outliers = (np.abs(charges_pedestals - median_pix_charge) /
median_pix_charge_dev) > mad_max
if outliers.sum() > 0:
removed_fraction = outliers.sum() / outliers.size
log.info(f' Removed {removed_fraction:.2%} of pixels (outliers) '
f'from pedestal median calculation')
# Replace outliers by nans:
charges_pedestals = np.where(outliers, np.nan, charges_pedestals)
# Recompute the pixel-wise medians ignoring the outliers:
median_pix_charge = np.nanmedian(charges_pedestals, axis=0)

# Now compute the median (for the whole camera) of the medians (for
# each pixel) of the charges in pedestal events. Use only reliable
# pixels for this:
n_valid_pixels = np.isfinite(median_pix_charge[reliable_pixels]).sum()
if n_valid_pixels < min_number_of_valid_pixels:
logging.warning(f' Too few valid pixels ({n_valid_pixels}) for '
f'calculation!')
median_ped_median_pix_charge.append(np.nan)
else:
median_ped_median_pix_charge.append(np.nanmedian(
median_pix_charge[reliable_pixels]))

# convert to ndarray:
median_ped_median_pix_charge = np.array(median_ped_median_pix_charge)
number_of_pedestals = np.array(number_of_pedestals)

# Now compute the median for all processed subruns, which is more robust
# against e.g. subruns affected by car flashes. We exclude subruns
# which have less than half of the median statistics per subrun.
good_stats = number_of_pedestals > 0.5 * np.median(number_of_pedestals)
qped = np.nanmedian(median_ped_median_pix_charge[good_stats])
# Now we also remove outliers (subruns) if any:
qped_dev = median_abs_deviation(median_ped_median_pix_charge[good_stats])
not_outlier = (np.abs(median_ped_median_pix_charge - qped) /
qped_dev) < mad_max

if (~good_stats).sum() > 0:
log.info(f'\nNumber of subruns with low statistics: '
f'{(~good_stats).sum()} - removed from pedestal median '
f'calculation')
if (~not_outlier).sum() > 0:
log.info(f'\nRemoved {(~not_outlier).sum()} outlier subruns '
f'(out of {not_outlier.size}) from pedestal median '
f'calculation')

# recompute with good-statistics and well-behaving runs:
qped = np.nanmedian(median_ped_median_pix_charge[good_stats & not_outlier])
log.info(f'\nNumber of subruns used in calculations: '
f'{(good_stats & not_outlier).sum()}')

picture_threshold = pic_th(qped)
boundary_threshold = picture_threshold / 2

# We now create a .json files with recommended image cleaning
# settings for lstchain_dl1ab.
newconfig = get_standard_config()['tailcuts_clean_with_pedestal_threshold']
# casts below are needed, json does not like numpy's int64:
newconfig['picture_thresh'] = int(picture_threshold)
newconfig['boundary_thresh'] = int(boundary_threshold)

additional_nsb_rate = get_nsb(qped)

return qped, additional_nsb_rate, newconfig


def pic_th(mean_ped):
"""
Parameters
----------
mean_ped: `float`
mean pixel charge in pedestal events (for the standard
LocalPeakWindowSearch algo & settings in lstchain)

Returns
-------
`int`
recommended picture threshold for image cleaning (from a table)

"""
mp_edges = [2.23, 2.88, 3.53, 4.19, 4.84]
picture_threshold = np.array([8, 10, 12, 14, 16, 18])

if mean_ped >= mp_edges[-1]:
return picture_threshold[-1]
return picture_threshold[np.where(mp_edges > mean_ped)[0][0]]


def get_nsb(median_ped):
"""
Parameters
----------
median_ped: `float`
median pixel charge in pedestal events

Returns
-------
`float`
(from a parabolic parametrization) the recommended additional NSB
(in p.e. / ns) that has to be added to the "dark MC" waveforms in
order to match the data for which the median pedestal charge is
median_ped

"""
params = [1.39498756, 0.28761336, 0.0858798 ]
return (params[1] * (median_ped - params[0]) +
params[2] * (median_ped - params[0]) ** 2)
36 changes: 1 addition & 35 deletions lstchain/scripts/lstchain_dvr_pixselector.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,7 @@
The program creates in the output directory (which is the current by default)
a file DVR_settings_LST-1.Run12469.h5 which contains a table "run_summary"
which includes the DVR algorithm parameters determined for each processed
subrun. It also creates a file with recommended cleaning settings for running
DL1ab, based on the NSB level measured in the processed runs. We use
as picture threshold the closest even number not smaller than the charge
"min_charge_for_certain_selection" (averaged for all subruns and rounded)
which is the value from which a pixel will be certainly kept by the Data Volume
Reduction.
subrun.

Then we run again the script over all subruns, and using the option to create
the pixel maks (this can also be done subrun by subrun, to parallelize the
Expand Down Expand Up @@ -68,7 +63,6 @@
from lstchain.paths import parse_dl1_filename
from lstchain.io.io import dl1_params_lstcam_key, dl1_images_lstcam_key
from lstchain.io.io import dl1_params_tel_mon_cal_key
from lstchain.io.config import get_standard_config, dump_config

from ctapipe.io import read_table
from ctapipe.core import Container, Field
Expand Down Expand Up @@ -506,34 +500,6 @@ def main():
time.sleep(300)
continue


# We now create also .json files with recommended image cleaning
# settings for lstchain_dl1ab. We determine the picture threshold
# from the values of min_charge_for_certain_selection:

if not write_pixel_masks:
log.info('Output files:')
for file in list_of_output_files:
log.info(file)
dvrtable = read_table(file, "/run_summary")
picture_threshold = get_typical_dvr_min_charge(dvrtable)

# we round it to an even number of p.e., just to limit the amount
# of different settings in the analysis (i.e. we change
# picture_threshold in steps of 2 p.e.):
if picture_threshold % 2 != 0:
picture_threshold += 1
boundary_threshold = picture_threshold / 2
newconfig = get_standard_config()['tailcuts_clean_with_pedestal_threshold']
newconfig['picture_thresh'] = picture_threshold
newconfig['boundary_thresh'] = boundary_threshold
run = int(file.name[file.name.find('Run')+3:-3])
json_filename = Path(output_dir, f'dl1ab_Run{run:05d}.json')
dump_config({'tailcuts_clean_with_pedestal_threshold': newconfig,
'dynamic_cleaning': get_standard_config()['dynamic_cleaning']},
json_filename, overwrite=True)
log.info(json_filename)

log.info('lstchain_dvr_pixselector finished successfully!')


Expand Down
84 changes: 84 additions & 0 deletions lstchain/scripts/lstchain_find_tailcuts.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
#!/usr/bin/env python

"""
This script uses DL1 files to determine tailcuts which are adequate for the
bulk of the pixels in a given run. It does so simply based on the median (for
the whole camera) of the median pixel charge for pedestal events.

For reasons of stability & simplicity of analysis, we cannot decide the
cleaning levels on a subrun-by-subrun basis. We select values which are ok
for the whole run.

The script writes out the cleaning settings to a json file,
e.g. dl1ab_Run13181.json
It also returns the suggested NSB adjustment needed in the "dark-sky" MC
to match the data, in units of p.e./ns.

lstchain_find_tailcuts -d "/..../DL1/YYYYMMDD/v0.10/tailcut84 -r 13181 --log
out.log "

"""

import argparse
import logging
from pathlib import Path
import sys

from lstchain.io.config import get_standard_config, dump_config
from lstchain.image.cleaning import find_tailcuts

parser = argparse.ArgumentParser(description="Tailcut finder")

parser.add_argument('-d', '--input-dir', dest='input_dir',
type=Path, default='./',
help='Input DL1 directory')
parser.add_argument('-r', '--run', dest='run_number',
type=int, help='Run number')
parser.add_argument('-o', '--output-dir', dest='output_dir',
type=Path, default='./',
help='Path to the output directory (default: %(default)s)')
parser.add_argument('--log', dest='log_file',
type=str, default=None,
help='Log file name')

log = logging.getLogger(__name__)

def main():
args = parser.parse_args()
log.setLevel(logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(sys.stdout))

log_file = args.log_file
if log_file is not None:
handler = logging.FileHandler(log_file, mode='w')
logging.getLogger().addHandler(handler)

output_dir = args.output_dir.absolute()
output_dir.mkdir(exist_ok=True, parents=True)
input_dir = args.input_dir.absolute()

if not input_dir.exists():
logging.error('Input directory does not exist')
sys.exit(1)

if not input_dir.is_dir():
logging.error('Input directory is not a directory!')
sys.exit(1)

run_id = args.run_number
median_qped, additional_nsb_rate, newconfig = find_tailcuts(input_dir,
run_id)

json_filename = Path(output_dir, f'dl1ab_Run{run_id:05d}.json')
dump_config({'tailcuts_clean_with_pedestal_threshold': newconfig,
'dynamic_cleaning': get_standard_config()['dynamic_cleaning']},
json_filename, overwrite=True)
log.info(f'\nMedian pedestal charge: {median_qped:.3f} p.e.')
log.info('\nCleaning settings:')
log.info(newconfig)
log.info('\nWritten to:')
log.info(json_filename)
log.info(f'\nAdditional NSB rate (over dark MC): {additional_nsb_rate:.4f} '
f'p.e./ns')

log.info('lstchain_find_tailcuts finished successfully!')
Loading