diff --git a/lstchain/image/cleaning.py b/lstchain/image/cleaning.py index 6fa9da82f7..35044bd475 100644 --- a/lstchain/image/cleaning.py +++ b/lstchain/image/cleaning.py @@ -1,6 +1,20 @@ +import glob +import logging import numpy as np +from pathlib import Path +from lstchain.paths import run_to_dl1_filename +from lstchain.io.io import dl1_params_lstcam_key, dl1_images_lstcam_key +from lstchain.io.io import dl1_params_tel_mon_cal_key +from lstchain.io.config import get_standard_config +from ctapipe.containers import EventType +from scipy.stats import median_abs_deviation -__all__ = ['apply_dynamic_cleaning'] +from ctapipe.io import read_table +__all__ = ['apply_dynamic_cleaning', + 'find_tailcuts', + 'pic_th'] + +log = logging.getLogger(__name__) def apply_dynamic_cleaning(image, signal_pixels, threshold, fraction): @@ -36,3 +50,236 @@ def apply_dynamic_cleaning(image, signal_pixels, threshold, fraction): mask_dynamic_cleaning = (image >= dynamic_threshold) & signal_pixels return mask_dynamic_cleaning + + +def find_tailcuts(input_dir, run_number): + """ + This function uses DL1 files to determine tailcuts which are adequate + for the bulk of the pixels in a given run. The script also returns the + suggested NSB adjustment needed in the "dark-sky" MC to match the data. + The function uses the median (for the whole camera, excluding outliers) + of the 95% quantile of the pixel charge for pedestal events to deduce the + NSB level. It is good to use a large quantile of the pixel charge + distribution (vs. e.g. using the median) because what matters for having + a realistic noise simulation is the tail on the right-side, i.e. for + large pulses. + For reasons of stability & simplicity of analysis, we cannot decide the + cleaning levels (or the NSB tuning) on a subrun-by-subrun basis. We select + values which are more or less valid for the whole run. + + The script will process a subset of the subruns (~10, hardcoded) of the run, + distributed uniformly through it. + + Parameters + ---------- + input_dir: `Path` + directory where the DL1 files (subrun-wise, i.e., including + DL1a) are stored + + run_number : int + run number to be processed + + Returns + ------- + additional_nsb_rate : float + p.e./ns rate of NSB to be added to "dark MC" to match the noise in the data + newconfig : dict + cleaning configuration for running the DL1ab stage + """ + + # subrun-wise dl1 file names: + dl1_filenames = Path(input_dir, + run_to_dl1_filename(1, run_number, 0).replace( + '.0000.h5', '.????.h5')) + all_dl1_files = glob.glob(str(dl1_filenames)) + all_dl1_files.sort() + + # Number of median absolute deviations (mad) away from the median that a + # value has to be to be considered an outlier: + mad_max = 5 # would exclude 8e-4 of the pdf for a gaussian + + # Minimum number of interleaved pedestals in subrun to proceed with + # calculation: + min_number_of_ped_events = 100 + + # Minimum number of valid pixels to consider the calculation of NSB level + # acceptable: + min_number_of_valid_pixels = 1000 + + # Approx. maximum number of subruns (uniformly distributed through the + # run) to be processed: + max_number_of_processed_subruns = 10 + # Keep only ~max_number_of_processed_subruns subruns, distributed + # along the run: + dl1_files = all_dl1_files[::int(1+len(all_dl1_files) / + max_number_of_processed_subruns)] + + number_of_pedestals = [] + median_ped_qt95_pix_charge = [] + + for dl1_file in dl1_files: + log.info('\nInput file: %s', dl1_file) + + data_parameters = read_table(dl1_file, dl1_params_lstcam_key) + event_type_data = data_parameters['event_type'].data + pedestal_mask = event_type_data == EventType.SKY_PEDESTAL.value + + num_pedestals = pedestal_mask.sum() + if num_pedestals < min_number_of_ped_events: + log.warning(f' Too few interleaved pedestals (' + f'{num_pedestals}) - skipped subrun!') + continue + + number_of_pedestals.append(pedestal_mask.sum()) + data_images = read_table(dl1_file, dl1_images_lstcam_key) + data_calib = read_table(dl1_file, dl1_params_tel_mon_cal_key) + # data_calib['unusable_pixels'] , indices: (Gain Calib_id Pixel) + + # Get the "unusable" flags from the pedcal file: + unusable_hg = data_calib['unusable_pixels'][0][0] + unusable_lg = data_calib['unusable_pixels'][0][1] + + unreliable_pixels = unusable_hg | unusable_lg + if unreliable_pixels.sum() > 0: + log.info(f' Removed {unreliable_pixels.sum()/unreliable_pixels.size:.2%} of pixels ' + f'due to unreliable calibration!') + + reliable_pixels = ~unreliable_pixels + + charges_data = data_images['image'] + charges_pedestals = charges_data[pedestal_mask] + # pixel-wise 95% quantile of ped pix charge through the subrun + # (#pixels): + qt95_pix_charge = np.nanquantile(charges_pedestals, 0.95, axis=0) + # ignore pixels with 0 signal: + qt95_pix_charge = np.where(qt95_pix_charge > 0, qt95_pix_charge, np.nan) + # median of medians across camera: + median_qt95_pix_charge = np.nanmedian(qt95_pix_charge) + # mean abs deviation of pixel qt95 values: + qt95_pix_charge_dev = median_abs_deviation(qt95_pix_charge, + nan_policy='omit') + + # Just a cut to remove outliers (pixels): + outliers = (np.abs(qt95_pix_charge - median_qt95_pix_charge) / + qt95_pix_charge_dev) > mad_max + + if outliers.sum() > 0: + removed_fraction = outliers.sum() / outliers.size + log.info(f' Removed {removed_fraction:.2%} of pixels (outliers) ' + f'from pedestal median calculation') + + # Now compute the median (for the whole camera) of the qt95's (for + # each pixel) of the charges in pedestal events. Use only reliable + # pixels for this, and exclude outliers: + n_valid_pixels = np.isfinite(qt95_pix_charge[reliable_pixels]).sum() + if n_valid_pixels < min_number_of_valid_pixels: + logging.warning(f' Too few valid pixels ({n_valid_pixels}) for ' + f'calculation!') + median_ped_qt95_pix_charge.append(np.nan) + else: + median_ped_qt95_pix_charge.append(np.nanmedian(qt95_pix_charge[ + reliable_pixels & + ~outliers])) + # convert to ndarray: + median_ped_qt95_pix_charge = np.array(median_ped_qt95_pix_charge) + number_of_pedestals = np.array(number_of_pedestals) + + # Now compute the median for all processed subruns, which is more robust + # against e.g. subruns affected by car flashes. We exclude subruns + # which have less than half of the median statistics per subrun. + good_stats = number_of_pedestals > 0.5 * np.median(number_of_pedestals) + + # First check if we have any valid subruns at all: + if np.isfinite(median_ped_qt95_pix_charge[good_stats]).sum() == 0: + qped = np.nan + qped_dev = np.nan + additional_nsb_rate = np.nan + log.error(f'No valid computation was possible for run {run_number} with any of the processed subruns!') + return qped, additional_nsb_rate, None + + qped = np.nanmedian(median_ped_qt95_pix_charge[good_stats]) + not_outlier = np.zeros(len(median_ped_qt95_pix_charge), dtype='bool') + # Now we also remove outliers (subruns) if any: + qped_dev = median_abs_deviation(median_ped_qt95_pix_charge[good_stats]) + not_outlier = (np.abs(median_ped_qt95_pix_charge - qped) / + qped_dev) < mad_max + + if (~good_stats).sum() > 0: + log.info(f'\nNumber of subruns with low statistics: ' + f'{(~good_stats).sum()} - removed from pedestal median ' + f'calculation') + if (~not_outlier).sum() > 0: + log.info(f'\nRemoved {(~not_outlier).sum()} outlier subruns ' + f'(out of {not_outlier.size}) from pedestal median ' + f'calculation') + + # If less than half of the processed files are valid for the final calculation, + # we declare it unsuccessful (data probably have problems): + if (good_stats & not_outlier).sum() < 0.5 * len(dl1_files): + qped = np.nan + additional_nsb_rate = np.nan + log.error(f'Calculation failed for more than half of the processed subruns of run {run_number}!') + return qped, additional_nsb_rate, None + + + # recompute with good-statistics and well-behaving runs: + qped = np.nanmedian(median_ped_qt95_pix_charge[good_stats & not_outlier]) + log.info(f'\nNumber of subruns used in calculations: ' + f'{(good_stats & not_outlier).sum()}') + + picture_threshold = pic_th(qped) + boundary_threshold = picture_threshold / 2 + + # We now create a .json files with recommended image cleaning + # settings for lstchain_dl1ab. + newconfig = get_standard_config()['tailcuts_clean_with_pedestal_threshold'] + # casts below are needed, json does not like numpy's int64: + newconfig['picture_thresh'] = int(picture_threshold) + newconfig['boundary_thresh'] = int(boundary_threshold) + + additional_nsb_rate = get_nsb(qped) + + return qped, additional_nsb_rate, newconfig + + +def pic_th(qt95_ped): + """ + Parameters + ---------- + qt95_ped : float + 95% quantile of pixel charge in pedestal events (for the standard + LocalPeakWindowSearch algo & settings in lstchain) + + Returns + ------- + int + recommended picture threshold for image cleaning (from a table) + + """ + mp_edges = np.array([5.85, 7.25, 8.75, 10.3, 11.67]) + picture_threshold = np.array([8, 10, 12, 14, 16, 18]) + + if qt95_ped >= mp_edges[-1]: + return picture_threshold[-1] + return picture_threshold[np.where(mp_edges > qt95_ped)[0][0]] + + +def get_nsb(qt95_ped): + """ + Parameters + ---------- + qt95_ped: `float` + 95% quantile of pixel charge in pedestal events + + Returns + ------- + float + (from a parabolic parametrization) the recommended additional NSB + (in p.e. / ns) that has to be added to the "dark MC" waveforms in + order to match the data for which the 95% quantile of pedestal pixel + charge is qt95_ped + + """ + params = [3.95147396, 0.12642504, 0.01718627] + return (params[1] * (qt95_ped - params[0]) + + params[2] * (qt95_ped - params[0]) ** 2) diff --git a/lstchain/scripts/lstchain_dvr_pixselector.py b/lstchain/scripts/lstchain_dvr_pixselector.py index 41adf403db..e6f85d4fb7 100644 --- a/lstchain/scripts/lstchain_dvr_pixselector.py +++ b/lstchain/scripts/lstchain_dvr_pixselector.py @@ -27,12 +27,7 @@ The program creates in the output directory (which is the current by default) a file DVR_settings_LST-1.Run12469.h5 which contains a table "run_summary" which includes the DVR algorithm parameters determined for each processed -subrun. It also creates a file with recommended cleaning settings for running -DL1ab, based on the NSB level measured in the processed runs. We use -as picture threshold the closest even number not smaller than the charge -"min_charge_for_certain_selection" (averaged for all subruns and rounded) -which is the value from which a pixel will be certainly kept by the Data Volume -Reduction. +subrun. Then we run again the script over all subruns, and using the option to create the pixel maks (this can also be done subrun by subrun, to parallelize the @@ -68,7 +63,6 @@ from lstchain.paths import parse_dl1_filename from lstchain.io.io import dl1_params_lstcam_key, dl1_images_lstcam_key from lstchain.io.io import dl1_params_tel_mon_cal_key -from lstchain.io.config import get_standard_config, dump_config from ctapipe.io import read_table from ctapipe.core import Container, Field @@ -506,34 +500,6 @@ def main(): time.sleep(300) continue - - # We now create also .json files with recommended image cleaning - # settings for lstchain_dl1ab. We determine the picture threshold - # from the values of min_charge_for_certain_selection: - - if not write_pixel_masks: - log.info('Output files:') - for file in list_of_output_files: - log.info(file) - dvrtable = read_table(file, "/run_summary") - picture_threshold = get_typical_dvr_min_charge(dvrtable) - - # we round it to an even number of p.e., just to limit the amount - # of different settings in the analysis (i.e. we change - # picture_threshold in steps of 2 p.e.): - if picture_threshold % 2 != 0: - picture_threshold += 1 - boundary_threshold = picture_threshold / 2 - newconfig = get_standard_config()['tailcuts_clean_with_pedestal_threshold'] - newconfig['picture_thresh'] = picture_threshold - newconfig['boundary_thresh'] = boundary_threshold - run = int(file.name[file.name.find('Run')+3:-3]) - json_filename = Path(output_dir, f'dl1ab_Run{run:05d}.json') - dump_config({'tailcuts_clean_with_pedestal_threshold': newconfig, - 'dynamic_cleaning': get_standard_config()['dynamic_cleaning']}, - json_filename, overwrite=True) - log.info(json_filename) - log.info('lstchain_dvr_pixselector finished successfully!') diff --git a/lstchain/scripts/lstchain_find_tailcuts.py b/lstchain/scripts/lstchain_find_tailcuts.py new file mode 100644 index 0000000000..2fe345cd66 --- /dev/null +++ b/lstchain/scripts/lstchain_find_tailcuts.py @@ -0,0 +1,94 @@ +#!/usr/bin/env python + +""" +This script uses DL1 files to determine tailcuts which are adequate for the +bulk of the pixels in a given run. It does so simply based on the median (for +the whole camera) of the median pixel charge for pedestal events. + +For reasons of stability & simplicity of analysis, we cannot decide the +cleaning levels on a subrun-by-subrun basis. We select values which are ok +for the whole run. + +The script writes out the cleaning settings to a json file, +e.g. dl1ab_Run13181.json +It also returns the suggested NSB adjustment needed in the "dark-sky" MC +to match the data, in units of p.e./ns, to be applied at the waveforms level, +i.e with lstchain_tune_nsb_waveform.py + +Example +------- +lstchain_find_tailcuts -d /.../DL1/YYYYMMDD/v0.10/tailcut84 -r 13181 --log out.log + +""" + +import argparse +import logging +from pathlib import Path +import sys + +from lstchain.io.config import get_standard_config, dump_config +from lstchain.image.cleaning import find_tailcuts + +parser = argparse.ArgumentParser(description="Tailcut finder") + +parser.add_argument('-d', '--input-dir', + type=Path, default='./', + help='Input DL1 directory') +parser.add_argument('-r', '--run', dest='run_number', + type=int, help='Run number') +parser.add_argument('-o', '--output-dir', + type=Path, default='./', + help='Path to the output directory (default: %(default)s)') +parser.add_argument('--log', dest='log_file', + type=str, default=None, + help='Log file name') + +log = logging.getLogger(__name__) + + +def main(): + args = parser.parse_args() + log.setLevel(logging.INFO) + logging.getLogger().addHandler(logging.StreamHandler(sys.stdout)) + + run_id = args.run_number + output_dir = args.output_dir.absolute() + output_dir.mkdir(exist_ok=True, parents=True) + + log_file = args.log_file or f'log_find_tailcuts_Run{run_id:05d}.log' + + log_file = output_dir / log_file + handler = logging.FileHandler(log_file, mode='w') + logging.getLogger().addHandler(handler) + + input_dir = args.input_dir.absolute() + + if not input_dir.exists(): + logging.error('Input directory does not exist') + sys.exit(1) + + if not input_dir.is_dir(): + logging.error('Input directory is not a directory!') + sys.exit(1) + + median_qt95_qped, additional_nsb_rate, newconfig = find_tailcuts(input_dir, + run_id) + if newconfig is None: + logging.error('lstchain_find_tailcuts failed!') + sys.exit(1) + + json_filename = output_dir / f'dl1ab_Run{run_id:05d}.json' + dump_config({'tailcuts_clean_with_pedestal_threshold': newconfig, + 'dynamic_cleaning': get_standard_config()['dynamic_cleaning'], + 'tailcut': {}}, + json_filename, overwrite=True) + log.info(f'\nMedian of 95% quantile of pedestal charge:' + f' {median_qt95_qped:.3f} p.e.') + log.info('\nCleaning settings:') + log.info(newconfig) + log.info('\nWritten to:') + log.info(json_filename) + log.info(f'\nAdditional NSB rate (over dark MC): {additional_nsb_rate:.4f} ' + f'p.e./ns') + + log.info('lstchain_find_tailcuts finished successfully!')