diff --git a/docs/api-reference/irfs/index.rst b/docs/api-reference/irfs/index.rst deleted file mode 100644 index 933122e7..00000000 --- a/docs/api-reference/irfs/index.rst +++ /dev/null @@ -1,13 +0,0 @@ -.. _irfs: - -==================================================== -Instrument Response Functions (`~magicctapipe.irfs`) -==================================================== - -.. currentmodule:: magicctapipe.irfs - -Reference/API -============= - -.. automodapi:: magicctapipe.irfs - :no-inheritance-diagram: diff --git a/download_data.sh b/download_data.sh deleted file mode 100644 index d3224959..00000000 --- a/download_data.sh +++ /dev/null @@ -1,47 +0,0 @@ -#!/bin/bash - -set -e - -echo "http://data.magic.pic.es/Users/ctapipe_io_magic/test_data/real/calibrated/20210314_M1_05095172.001_Y_CrabNebula-W0.40+035.root" > test_data_real.txt -echo "http://data.magic.pic.es/Users/ctapipe_io_magic/test_data/real/calibrated/20210314_M1_05095172.002_Y_CrabNebula-W0.40+035.root" >> test_data_real.txt -echo "http://data.magic.pic.es/Users/ctapipe_io_magic/test_data/real/calibrated/20210314_M2_05095172.001_Y_CrabNebula-W0.40+035.root" >> test_data_real.txt -echo "http://data.magic.pic.es/Users/ctapipe_io_magic/test_data/real/calibrated/20210314_M2_05095172.002_Y_CrabNebula-W0.40+035.root" >> test_data_real.txt - -echo "http://data.magic.pic.es/Users/ctapipe_io_magic/test_data/simulated/calibrated/GA_M1_za35to50_8_824318_Y_w0.root" > test_data_simulated.txt -echo "http://data.magic.pic.es/Users/ctapipe_io_magic/test_data/simulated/calibrated/GA_M1_za35to50_8_824319_Y_w0.root" >> test_data_simulated.txt -echo "http://data.magic.pic.es/Users/ctapipe_io_magic/test_data/simulated/calibrated/GA_M2_za35to50_8_824318_Y_w0.root" >> test_data_simulated.txt -echo "http://data.magic.pic.es/Users/ctapipe_io_magic/test_data/simulated/calibrated/GA_M2_za35to50_8_824319_Y_w0.root" >> test_data_simulated.txt - -if [ -z "$TEST_DATA_USER" ]; then - echo -n "Username: " - read TEST_DATA_USER - echo -fi - -if [ -z "$TEST_DATA_PASSWORD" ]; then - echo -n "Password: " - read -s TEST_DATA_PASSWORD - echo -fi - -if ! wget \ - -i test_data_real.txt \ - --user="$TEST_DATA_USER" \ - --password="$TEST_DATA_PASSWORD" \ - --no-verbose \ - --timestamping \ - --directory-prefix=test_data/real/calibrated; then - echo "Problem in downloading the test data set for real data." -fi - -if ! wget \ - -i test_data_simulated.txt \ - --user="$TEST_DATA_USER" \ - --password="$TEST_DATA_PASSWORD" \ - --no-verbose \ - --timestamping \ - --directory-prefix=test_data/simulated/calibrated; then - echo "Problem in downloading the test data set for simulated data." -fi - -rm -f test_data_real.txt test_data_simulated.txt diff --git a/magicctapipe/config/config_MAGIC_LST.yaml b/magicctapipe/config/config_MAGIC_LST.yaml deleted file mode 100644 index 2267e08e..00000000 --- a/magicctapipe/config/config_MAGIC_LST.yaml +++ /dev/null @@ -1,150 +0,0 @@ -# --- DATA FILES --------------------------------------------------------------- -# NOTE: -# - stereo_reco will store files in "DL1" folder ("_Link" is removed from hillas_h5) -# - train_rfs and train_rfs will look in "DL1_Link" folder instead -# - train_rfs is run on all files, so so "_run${run}_" is removed from hillas_h5 -data_files: - mc: - train_sample: - mask_sim: '/fefs/aswg/workspace/davide.depaoli/CTA-MC/Prod5/DL0_Link/gamma_diffuse/*_run${run}_*.simtel.gz' - hillas_h5: '/fefs/aswg/workspace/davide.depaoli/CTA-MC/Prod5/Analysis/4LST/8-4_nocuts/DL1_Link/gamma_diffuse_train/*_run${run}_*.h5' - test_sample: - mask_sim: '/fefs/aswg/workspace/davide.depaoli/CTA-MC/Prod5/DL0_Link/gamma_00deg/*_run${run}_*.simtel.gz' - hillas_h5: '/fefs/aswg/workspace/davide.depaoli/CTA-MC/Prod5/Analysis/4LST/8-4_nocuts/DL1_Link/gamma_00deg_test/*_run${run}_*.h5' - reco_h5: '/fefs/aswg/workspace/davide.depaoli/CTA-MC/Prod5/Analysis/4LST/8-4_nocuts/DL2/gamma_00deg_test_reco/*_reco.h5' - data: - train_sample: - mask_sim: '/fefs/aswg/workspace/davide.depaoli/CTA-MC/Prod5/DL0_Link/proton/*_run${run}_*.simtel.gz' - hillas_h5: '/fefs/aswg/workspace/davide.depaoli/CTA-MC/Prod5/Analysis/4LST/8-4_nocuts/DL1_Link/proton_train/*_run${run}_*.h5' - test_sample: - mask_sim: '/fefs/aswg/workspace/davide.depaoli/CTA-MC/Prod5/DL0_Link/proton/*_run${run}_*.simtel.gz' - hillas_h5: '/fefs/aswg/workspace/davide.depaoli/CTA-MC/Prod5/Analysis/4LST/8-4_nocuts/DL1_Link/proton_test/*_run${run}_*.h5' - reco_h5: '/fefs/aswg/workspace/davide.depaoli/CTA-MC/Prod5/Analysis/4LST/8-4_nocuts/DL2/proton_test_reco/*_reco.h5' - -# --- TELESCOPES --------------------------------------------------------------- -all_tels: - tel_ids: [1, 2, 3, 4] - tel_n: ['LST', 'MAGIC'] - tel_n_short: ['LST', 'M'] - stereo_id: -1 - -LST: - tel_ids: [1, 2, 3, 4] - cleaning_config: - picture_thresh: 8 - boundary_thresh: 4 - keep_isolated_pixels: false - min_number_picture_neighbors: 1 - camera_name: 'LSTCam' - min_pixel: 5 - -MAGIC: - tel_ids: [5, 6] - use_MARS_cleaning: true - cleaning_config: - picture_thresh: 6 - boundary_thresh: 3.5 - findhotpixels: false - max_time_diff: 2.46 - max_time_off: 7.38 - usesum: true - usetime: true - bad_pixel_config: - pedestalLevel: 400 - destalLevelVariance: 4.5 - pedestalType: 'FromExtractorRndm' - camera_name: 'MAGICCam' - min_pixel: 5 - position: # MAGIC telescope positions in m wrt. to the center of CTA simulations - 5: [-27.24, -146.66, 50.0] - 6: [-96.44, -96.77, 51.0] - -# --- GLOBAL ------------------------------------------------------------------- -global: - wrong_alt: 'tel_alt < 1.5707963267948966' - -# --- TRAIN CLASSIFIER RF ------------------------------------------------------ -classifier_rf: - check_train_test: False - save_dir: '/fefs/aswg/workspace/davide.depaoli/CTA-MC/Prod5/Analysis/4LST/8-4_nocuts/RFs/' - joblib_name: 'classifier_rf.joblib' - cuts: "(event_id > 0)" - settings: - n_estimators: 100 - min_samples_leaf: 10 - n_jobs: 3 - features: ['slope', 'length', 'width', 'intensity', 'skewness', 'kurtosis', 'r', 'psi', 'num_islands', 'h_max', - 'core_x', 'core_y', 'impact'] - fig_name: 'classifier_rf_gammaness' - fig_size: [20, 10] - test_file_n: 0 - -# --- TRAIN DIRECTION RF ------------------------------------------------------- -direction_rf: - check_train_test: False - save_dir: '/fefs/aswg/workspace/davide.depaoli/CTA-MC/Prod5/Analysis/4LST/8-4_nocuts/RFs/' - joblib_name: 'direction_rf.joblib' - cuts: "(event_id > 0)" - settings: - n_estimators: 100 - min_samples_leaf: 10 - n_jobs: 3 - features: - 'disp': ['slope', 'length', 'width', 'intensity', 'skewness', 'kurtosis', 'r', 'psi', 'h_max', 'core_x', - 'core_y', 'impact'] - 'pos_angle_shift': ['slope', 'length', 'width', 'intensity', 'skewness', 'kurtosis', 'r', 'psi', 'h_max', - 'core_x', 'core_y', 'impact'] - fig_name_theta2: 'Direction_RF_theta2' - fig_name_PSF_energy: 'Direction_RF_PSF_energy' - fig_name_PSF_offset: 'Direction_RF_PSF_offset' - fig_size: [12, 12] - test_file_n: 20 - -# --- TRAIN ENERGY RF ---------------------------------------------------------- -energy_rf: - check_train_test: False - save_dir: '/fefs/aswg/workspace/davide.depaoli/CTA-MC/Prod5/Analysis/4LST/8-4_nocuts/RFs/' - joblib_name: 'energy_rf.joblib' - cuts: "(event_id > 0)" - settings: - n_estimators: 100 - min_samples_leaf: 10 - n_jobs: 3 - features: ['slope', 'length', 'width', 'intensity', 'skewness', 'kurtosis', 'r', 'psi', 'intensity_width_1', - 'intensity_width_2', 'h_max', 'core_x', 'core_y', 'impact'] - fig_name: 'Energy_RF_migmatrix' - fig_size: [30, 10] - test_file_n: 0 - -# --- IRFs --------------------------------------------------------------------- -irfs: - T_OBS: 50 # u.hour - ALPHA: 0.2 # scaling between on and off region - MAX_BG_RADIUS: 1.0 # u.deg - cut_on_multiplicity: 2 - INITIAL_GH_CUT_EFFICENCY: 0.4 - MAX_GH_CUT_EFFICIENCY: 0.99 - GH_CUT_EFFICIENCY_STEP: 0.01 - MIN_GH_CUT_EFFICIENCY: 0.01 - theta_bins_exp_low: -1.9 - theta_bins_exp_high: 2.3005 - theta_bins_num: 50 - sensitivity_bins_exp_low: -1.9 - sensitivity_bins_exp_high: 2.31 - sensitivity_bins_num: 5 - max_files_gamma: 0 - max_files_proton: 0 - verbose: False - INTENSITY_CUT: 50 - LEAKAGE1_CUT: 0.15 - gamma_dl2: '/fefs/aswg/workspace/davide.depaoli/CTA-MC/Prod5/Analysis/4LST/8-4_nocuts/DL2/gamma_00deg_test_reco_merged.h5' - proton_dl2: '/fefs/aswg/workspace/davide.depaoli/CTA-MC/Prod5/Analysis/4LST/8-4_nocuts/DL2/proton_test_reco_merged.h5' - consider_electron: True - electron_dl2: '/fefs/aswg/workspace/davide.depaoli/CTA-MC/Prod5/Analysis/4LST/8-4_nocuts/DL2/electron_test_reco_merged.h5' - save_dir: '/fefs/aswg/workspace/davide.depaoli/CTA-MC/Prod5/Analysis/4LST/8-4_nocuts/IRFs/' - useless_cols: ['pixels_width_1', 'pixels_width_2', 'alt', 'alt_uncert', 'az', 'az_uncert', 'core_x', 'core_y', 'impact', - 'core_uncert', 'h_max', 'h_max_uncert', 'is_valid', 'tel_ids', 'average_intensity', 'goodness_of_fit', - 'disp_reco', 'disp_reco_err', 'az_reco_mean', 'alt_reco_mean', 'energy_reco_err', - 'energy_reco_mean', 'event_class_1', 'event_class_0_mean', 'event_class_1_mean', 'r', 'r', 'phi', - 'length', 'width', 'psi', 'skewness', 'kurtosis', 'slope', 'slope_err', 'intercept', 'intercept_err', - 'deviation', 'num_islands'] \ No newline at end of file diff --git a/magicctapipe/conftest.py b/magicctapipe/conftest.py index 0fcce33e..cbf40fb9 100644 --- a/magicctapipe/conftest.py +++ b/magicctapipe/conftest.py @@ -9,7 +9,7 @@ from astropy.table import Table from ctapipe.utils.download import download_file_cached -from magicctapipe.utils import resource_file +from magicctapipe.io import resource_file maxjoint = 13000 maxmonly = 500 diff --git a/magicctapipe/image/__init__.py b/magicctapipe/image/__init__.py index c9856823..d5aefe2a 100644 --- a/magicctapipe/image/__init__.py +++ b/magicctapipe/image/__init__.py @@ -1,10 +1,5 @@ from .calib import calibrate -from .cleaning import ( - MAGICClean, - PixelTreatment, - clean_image_params, - get_num_islands_MAGIC, -) +from .cleaning import MAGICClean, PixelTreatment, get_num_islands_MAGIC from .leakage import get_leakage __all__ = [ @@ -12,6 +7,5 @@ "PixelTreatment", "get_num_islands_MAGIC", "calibrate", - "clean_image_params", "get_leakage", ] diff --git a/magicctapipe/image/cleaning.py b/magicctapipe/image/cleaning.py index bee57a0f..6fce2a72 100644 --- a/magicctapipe/image/cleaning.py +++ b/magicctapipe/image/cleaning.py @@ -2,14 +2,12 @@ import itertools import numpy as np -from ctapipe.image import hillas_parameters, leakage_parameters, timing_parameters from scipy.sparse.csgraph import connected_components __all__ = [ "MAGICClean", "PixelTreatment", "get_num_islands_MAGIC", - "clean_image_params", ] @@ -672,41 +670,3 @@ def get_num_islands_MAGIC(camera, clean_mask, event_image): clean_neighbors = neighbors[clean_mask][:, clean_mask] num_islands, labels = connected_components(clean_neighbors, directed=False) return num_islands - - -def clean_image_params(geom, image, clean, peakpos): - """Evaluate cleaned image parameters - - Parameters - ---------- - geom : ctapipe.instrument.camera.geometry.CameraGeometry - camera geometry - image : numpy.ndarray - image - clean : numpy.ndarray - clean mask - peakpos : numpy.ndarray - peakpos - - Returns - ------- - tuple - tuple of three containers for hillas, leakage and timing parameters - """ - # Hillas parameters, same for LST and MAGIC. From ctapipe - hillas_p = hillas_parameters(geom=geom[clean], image=image[clean]) - # Leakage, same for LST and MAGIC. From ctapipe - leakage_p = leakage_parameters(geom=geom, image=image, cleaning_mask=clean) - # Timing parameters, same for LST and MAGIC. From ctapipe - timing_p = timing_parameters( - geom=geom[clean], - image=image[clean], - peak_time=peakpos[clean], - hillas_parameters=hillas_p, - ) - - # Make sure each telescope get's an arrow - # if abs(time_grad[tel_id]) < 0.2: - # time_grad[tel_id] = 1 - - return hillas_p, leakage_p, timing_p diff --git a/magicctapipe/io/__init__.py b/magicctapipe/io/__init__.py index dfa139b5..e1cf9fcc 100644 --- a/magicctapipe/io/__init__.py +++ b/magicctapipe/io/__init__.py @@ -25,6 +25,7 @@ load_mc_dl2_data_file, load_train_data_files, load_train_data_files_tel, + resource_file, save_pandas_data_in_table, telescope_combinations, ) @@ -51,4 +52,5 @@ "load_train_data_files_tel", "save_pandas_data_in_table", "telescope_combinations", + "resource_file", ] diff --git a/magicctapipe/io/io.py b/magicctapipe/io/io.py index 5c6745dc..3cf66daf 100644 --- a/magicctapipe/io/io.py +++ b/magicctapipe/io/io.py @@ -21,6 +21,11 @@ from pyirf.simulations import SimulatedEventsInfo from pyirf.utils import calculate_source_fov_offset, calculate_theta +try: + from importlib.resources import files +except ImportError: + from importlib_resources import files + from ..utils import calculate_mean_direction, transform_altaz_to_radec __all__ = [ @@ -38,6 +43,7 @@ "load_train_data_files_tel", "save_pandas_data_in_table", "telescope_combinations", + "resource_file", ] logger = logging.getLogger(__name__) @@ -1349,3 +1355,19 @@ def save_pandas_data_in_table( with tables.open_file(output_file, mode=mode) as f_out: f_out.create_table(group_name, table_name, createparents=True, obj=data_array) + + +def resource_file(filename): + """Get the absoulte path of magicctapipe resource files. + + Parameters + ---------- + filename : str + Input filename + + Returns + ------- + str + Absolute path of the input filename + """ + return files("magicctapipe").joinpath("resources", filename) diff --git a/magicctapipe/irfs/__init__.py b/magicctapipe/irfs/__init__.py deleted file mode 100644 index 02b5f509..00000000 --- a/magicctapipe/irfs/__init__.py +++ /dev/null @@ -1,29 +0,0 @@ -from .utils import ( - convert_simu_info_mcp_to_pyirf, - plot_ang_res, - plot_effective_area, - plot_en_res_bias, - plot_en_res_resolution, - plot_gamma_eff_gh, - plot_irfs_MAGIC_LST, - plot_MAGIC_reference_sensitivity, - plot_MARS_sensitivity, - plot_sensitivity, - read_dl2_mcp_to_pyirf_MAGIC_LST_list, - read_simu_info_mcp_sum_num_showers, -) - -__all__ = [ - "read_simu_info_mcp_sum_num_showers", - "convert_simu_info_mcp_to_pyirf", - "read_dl2_mcp_to_pyirf_MAGIC_LST_list", - "plot_sensitivity", - "plot_en_res_bias", - "plot_en_res_resolution", - "plot_ang_res", - "plot_effective_area", - "plot_gamma_eff_gh", - "plot_irfs_MAGIC_LST", - "plot_MARS_sensitivity", - "plot_MAGIC_reference_sensitivity", -] diff --git a/magicctapipe/irfs/utils.py b/magicctapipe/irfs/utils.py deleted file mode 100644 index 65cc7903..00000000 --- a/magicctapipe/irfs/utils.py +++ /dev/null @@ -1,533 +0,0 @@ -import glob -import os - -import astropy.units as u -import matplotlib.pylab as plt -import numpy as np -import pandas as pd -from astropy import table -from astropy.io import fits -from astropy.table import QTable -from pyirf.simulations import SimulatedEventsInfo - -from ..utils import load_default_plot_settings, print_title, read_mc_header, save_plt - -__all__ = [ - "read_simu_info_mcp_sum_num_showers", - "convert_simu_info_mcp_to_pyirf", - "read_dl2_mcp_to_pyirf_MAGIC_LST_list", - "plot_sensitivity", - "plot_en_res_bias", - "plot_en_res_resolution", - "plot_ang_res", - "plot_effective_area", - "plot_gamma_eff_gh", - "plot_irfs_MAGIC_LST", - "plot_MARS_sensitivity", - "plot_MAGIC_reference_sensitivity", -] - - -def read_simu_info_mcp_sum_num_showers(file_list, mc_header_key="dl2/mc_header"): - """Function to read simulation information from DL2 files and sum the simultated - showers. Assumes that all the mc_headers are equal - - Parameters - ---------- - file_list : list - magic-cta-pipe DL2 file list - mc_header_key : str - mc_header key, by default "dl2/mc_header" - - Returns - ------- - pandas.DataFrame - mc_header with sum num showers - """ - d = read_mc_header(file_list[0], mc_header_key) - num_showers = 0 - if len(file_list) > 1: - for i, file in enumerate(file_list): - num_showers += int(read_mc_header(file, mc_header_key)["num_showers"]) - else: - num_showers = int(d["num_showers"].sum()) - d["num_showers"] = num_showers - # In the case of merged DL2 now num_showers is a list where each value is the sum - # of the showers... not very smart - return d - - -def convert_simu_info_mcp_to_pyirf(file_list, mc_header_key="dl2/mc_header"): - """Function to convert simulation information from magic-cta-pipe DL2 files to - pyirf format - - Parameters - ---------- - file_list : list - magic-cta-pipe DL2 file list - mc_header_key : str - mc_header key, by default "dl2/mc_header" - - Returns - ------- - pyirf.simulations.SimulatedEventsInfo - pyirf_simu_info - """ - simu_info = read_simu_info_mcp_sum_num_showers(file_list, mc_header_key) - # very bad way way to separate file_list and merged file - if len(file_list) > 1: - pyirf_simu_info = SimulatedEventsInfo( - n_showers=int(simu_info.num_showers) * int(simu_info.shower_reuse), - energy_min=float(simu_info.energy_range_min) * u.TeV, - energy_max=float(simu_info.energy_range_max) * u.TeV, - max_impact=float(simu_info.max_scatter_range) * u.m, - spectral_index=float(simu_info.spectral_index), - viewcone=float(simu_info.max_viewcone_radius) * u.deg, - ) - else: - pyirf_simu_info = SimulatedEventsInfo( - n_showers=int(simu_info.num_showers.iloc[0]) - * int(simu_info.shower_reuse.iloc[0]), - energy_min=float(simu_info.energy_range_min.iloc[0]) * u.TeV, - energy_max=float(simu_info.energy_range_max.iloc[0]) * u.TeV, - max_impact=float(simu_info.max_scatter_range.iloc[0]) * u.m, - spectral_index=float(simu_info.spectral_index.iloc[0]), - viewcone=float(simu_info.max_viewcone_radius.iloc[0]) * u.deg, - ) - # Regarding the max_impact, in pyirf is used for: - # A = np.pi * simulated_event_info.max_impact ** 2 - return pyirf_simu_info - - -def read_dl2_mcp_to_pyirf_MAGIC_LST_list( - file_mask, - reco_key="dl2/reco", - mc_header_key="dl2/mc_header", - useless_cols=[], - cuts="", - max_files=0, - eval_mean_events=False, - verbose=False, -): - """Function to read dl2 file and convert to pyirf format - - Parameters - ---------- - file_mask : str - file mask for magic-cta-pipe DL2 files - reco_key : str - key for DL2 reco files, by default "dl2/reco" - mc_header_key : str - mc_header key, by default "dl2/mc_header" - useless_cols : list - columns not used, by default [] - cuts : str - cuts on dl2 events, by default "" - max_files : int - max number of files to be processed, 0 to process all of them, by default 0 - eval_mean_events : bool - evaluate mean of event, to get single row per obs_id, by default False - verbose : bool - verbose mode, by default False - - Returns - ------- - tuple - events, pyirf_simu_info - """ - # Map column names (DL2 -> pyirf) - name_mapping = { - "tel_alt": "pointing_alt", - "tel_az": "pointing_az", - "energy_reco": "reco_energy", - "alt_reco": "reco_alt", - "az_reco": "reco_az", - # "intensity_width_1": "leakage_intensity_width_1", - # "intensity_width_2": "leakage_intensity_width_2", - "event_class_0": "gh_score", - # "pos_angle_shift_reco": "reco_source_fov_offset", # ??? - } - - # Map units - unit_mapping = { - "true_energy": u.TeV, - "reco_energy": u.TeV, - "pointing_alt": u.rad, - "pointing_az": u.rad, - "true_alt": u.rad, - "true_az": u.rad, - "reco_alt": u.rad, - "reco_az": u.rad, - } - - file_list = glob.glob(file_mask) - - if (max_files > 0) and (max_files < len(file_list)): - file_list = file_list[:max_files] - - pyirf_simu_info = convert_simu_info_mcp_to_pyirf(file_list, mc_header_key) - - first_time = True - for i, file in enumerate(file_list): - if verbose: - print(f"Analizing file: {file}") - try: - events_ = pd.read_hdf(file, key=reco_key) - if cuts != "": - print(f"Applying cuts: {cuts}") - print(len(events_)) - events_ = events_.query(cuts) - # l_ = ["obs_id", "event_id"] - # events_["multiplicity"] = events_["intensity"].groupby(level=l_).count() - # events_ = events_.query(cuts) - print(len(events_)) - events_ = events_.rename(columns=name_mapping) - if useless_cols != []: - events_ = events_.drop(useless_cols, axis=1, errors="ignore") - if eval_mean_events: - events_ = events_.groupby(["obs_id", "event_id"]).mean() - # events_ = events_.mean(level=1) - if first_time: - events = events_ - first_time = False - else: - events = events.append(events_) - except Exception as e: - print(f"ERROR: skipping file {file}\n{e}") - - events = table.QTable.from_pandas(events) - for k, v in unit_mapping.items(): - events[k] *= v - - return events, pyirf_simu_info - - -def plot_sensitivity(data, unit, label, ax=None, **kwargs): - """Plot sensitivity - - Parameters - ---------- - data : astropy.table.QTable - sensitivity data - unit : str - sensitivity unit - label : str - label for plot - ax : matplotlib.axes, optional - give it if you want to specify the axis, by default None - **kwargs : dict, optional - """ - e = data["reco_energy_center"] - s_mc = e**2 * data["flux_sensitivity"] - e_low, e_high = data["reco_energy_low"], data["reco_energy_high"] - ax_ = ax if ax is not None else plt - plt_ = ax_.errorbar( - e.to_value(u.GeV), - s_mc.to_value(unit), - xerr=[(e - e_low).to_value(u.GeV), (e_high - e).to_value(u.GeV)], - label=label, - **kwargs, - ) - return plt_ - - -def plot_en_res_bias(data, label, **kwargs): - """Plot energy resolution bias - - Parameters - ---------- - data : astropy.table.QTable - angular resolution data - label : str - label for plot - **kwargs : dict - """ - e = data["reco_energy_center"] - e_low, e_high = data["reco_energy_low"], data["reco_energy_high"] - plt.errorbar( - e.to_value(u.GeV), - data["bias"], - xerr=[(e - e_low).to_value(u.GeV), (e_high - e).to_value(u.GeV)], - label=label, - **kwargs, - ) - - -def plot_en_res_resolution(data, label, **kwargs): - """Plot energy resolution resolution - - Parameters - ---------- - data : astropy.table.QTable - angular resolution data - label : str - label for plot - **kwargs : dict - """ - e = data["reco_energy_center"] - e_low, e_high = data["reco_energy_low"], data["reco_energy_high"] - plt.errorbar( - e.to_value(u.GeV), - data["resolution"], - xerr=[(e - e_low).to_value(u.GeV), (e_high - e).to_value(u.GeV)], - label=label, - **kwargs, - ) - - -def plot_ang_res(data, label, **kwargs): - """Plot angular resolution - - Parameters - ---------- - data : astropy.table.QTable - angular resolution data - label : str - label for plot - **kwargs : dict - """ - e = data["reco_energy_center"] - e_low, e_high = data["reco_energy_low"], data["reco_energy_high"] - plt.errorbar( - e.to_value(u.GeV), - data["angular_resolution"].to_value(u.deg), - xerr=[(e - e_low).to_value(u.GeV), (e_high - e).to_value(u.GeV)], - label=label, - **kwargs, - ) - - -def plot_effective_area(data, label, **kwargs): - """Plot effective area - - Parameters - ---------- - data : astropy.table.QTable - effective area data - label : str - label for plot - **kwargs : dict - """ - e_low, e_high = data["ENERG_LO"][0], data["ENERG_HI"][0] - e = (e_low + e_high) / 2 - a = data["EFFAREA"][0, 0] - e = e_high - m2 = u.m * u.m - plt.errorbar( - e.to_value(u.GeV), - a.to_value(m2), - xerr=(e - e_low).to_value(u.GeV), - label=label, - **kwargs, - ) - - -def plot_gamma_eff_gh(gamma_efficiency, gh_cuts, sensitivity): - """Plot gamma efficiency and gh cuts - - Parameters - ---------- - gamma_efficiency : astropy.table.QTable - gamma efficiency - gh_cuts : astropy.table.QTable - gh cuts - sensitivity : astropy.table.QTable - sensitivity - """ - fig, ax = plt.subplots() - ax.set_xlabel("Energy (GeV)") - ax.set_xscale("log") - # Consider only energy where the sensitivity is estimated - m_ = sensitivity["n_signal"] > 0 - plt.plot(gh_cuts["center"][m_].to(u.GeV), gh_cuts["cut"][m_], "-o", label="GH cut") - plt.plot( - gamma_efficiency["center"][m_].to(u.GeV), - gamma_efficiency["eff_gh"][m_], - "-o", - label="Gamma Efficiency GH", - ) - plt.plot( - gamma_efficiency["center"][m_].to(u.GeV), - gamma_efficiency["eff"][m_], - "-o", - label="Gamma Efficiency", - ) - plt.legend() - - -def plot_irfs_MAGIC_LST(config_file, irfs_dir): - """Plot IRFs for MAGIC and/or LST array using pyirf - - Parameters - ---------- - config_file : str - configuration file - """ - print_title("Plot IRFs") - - load_default_plot_settings() - - # --- Open file --- - # Open fits - hdu_open = fits.open(os.path.join(irfs_dir, "pyirf.fits.gz")) - - # --- Plot Sensitivity --- - sensitivity = QTable.read(hdu_open, hdu="SENSITIVITY") - fig, ax = plt.subplots(figsize=(12, 8)) - ax.set_title(r"Minimal Flux Needed for 5$\mathrm{\sigma}$ Detection in 50 hours") - ax.set_xscale("log") - ax.set_yscale("log") - ax.set_xlabel("Reconstructed energy (GeV)") - unit = u.Unit("TeV cm-2 s-1") - ax.set_ylabel( - rf"$(E^2 \cdot \mathrm{{Flux Sensitivity}}) /$ ({unit.to_string('latex')})" - ) - ax.grid(which="both") - - plot_sensitivity(data=sensitivity, unit=unit, label="MC") - - # Plot magic sensitivity - plot_MAGIC_reference_sensitivity(ax) - - # Plot Crab SED - # plot_utils.plot_Crab_SED( - # ax, 100, 5 * u.GeV, 1e4 * u.GeV, label="100% Crab" - # ) # Energy in GeV - # plot_utils.plot_Crab_SED( - # ax, 10, 5 * u.GeV, 1e4 * u.GeV, linestyle="--", label="10% Crab" - # ) # Energy in GeV - # plot_utils.plot_Crab_SED( - # ax, 1, 5 * u.GeV, 1e4 * u.GeV, linestyle=":", label="1% Crab" - # ) # Energy in GeV - - plt.legend() - - save_plt( - n="Sensitivity", - rdir=irfs_dir, - vect="pdf", - ) - - # --- Plot Angular Resolution --- - ang_res = QTable.read(hdu_open, hdu="ANGULAR_RESOLUTION") - fig, ax = plt.subplots() - ax.set_xscale("log") - ax.set_xlabel("Reconstructed energy (GeV)") - ax.set_ylabel("Angular resolution (deg)") - - plot_ang_res(data=ang_res, label="Angular Resolution") - save_plt( - n="Angular_Resolution", - rdir=irfs_dir, - vect="pdf", - ) - - # --- Effective Area --- - effective_area = QTable.read(hdu_open, hdu="EFFECTIVE_AREA") - fig, ax = plt.subplots() - ax.set_xscale("log") - ax.set_yscale("log") - ax.set_xlabel("True energy (GeV)") - ax.set_ylabel(r"Effective Area ($\mathrm{m^2}$)") - plot_effective_area(data=effective_area, label="Effective Area") - save_plt( - n="Effective_Area", - rdir=irfs_dir, - vect="pdf", - ) - - # --- GH cuts and Gamma Efficiency --- - gh_cuts = QTable.read(hdu_open, hdu="GH_CUTS") - gamma_efficiency = QTable.read(hdu_open, hdu="GAMMA_EFFICIENCY") - plot_gamma_eff_gh(gamma_efficiency, gh_cuts, sensitivity) - save_plt( - n="Gamma_Efficiency", - rdir=irfs_dir, - vect="pdf", - ) - - -def plot_MARS_sensitivity(array="4LST", label="", print_data=False, **kwargs): - """Plot Sensitivity from MARS - - Parameters - ---------- - array : str - telescope array, by default "4LST" - - Possibilities: - - * "4LST": file = "magic-cta-pipe/data/MARS_4LST.txt" - * "MAGIC": file = "magic-cta-pipe/data/MARS_MAGIC.txt" - * "MAGIC_LST1": file = "magic-cta-pipe/data/MARS_MAGIC_LST1.txt" - - label : str - custom plot label, by default "" - print_data : bool - print data, by default False - """ - available_arrays = ["4LST", "MAGIC", "MAGIC_LST1"] - - if array in available_arrays: - f_ = f"MARS_{array}.txt" - else: - print("Invalid array") - return - if label == "": - label = f"{array} Di Pierro et al. ICRC2019" - - # Load data - file = os.path.join(os.path.dirname(os.path.realpath(__file__)), f"../../data/{f_}") - d = np.loadtxt(file, unpack=True) - e_mars_, s_mars_, err_e_mars_, err_s_mars_ = [d_[:-1] for d_ in d] - - # Units - unit_file = u.Unit("erg cm-2 s-1") - unit = u.Unit("TeV cm-2 s-1") - - # Convert sensitivity from erg/(s cm**2) to TeV/(s cm**2) - s_mars = (s_mars_ * unit_file).to(unit).value - err_s_mars = (err_s_mars_ * unit_file).to(unit).value - - # Convert energy from log(E/TeV) to GeV - e_mars = ((10 ** (e_mars_)) * u.TeV).to(u.GeV).value - e_low = ((10 ** (e_mars_ - err_e_mars_)) * u.TeV).to(u.GeV).value - e_high = ((10 ** (e_mars_ + err_e_mars_)) * u.TeV).to(u.GeV).value - err_e_mars = [(e_mars - e_low), (e_high - e_mars)] - - # Set default values in kwargs - if "linestyle" not in kwargs.keys(): - kwargs["linestyle"] = "--" - - # Plot - plt.errorbar( - e_mars, s_mars, xerr=err_e_mars, yerr=err_s_mars, label=label, **kwargs - ) - if print_data: - print("Energy\t\tDirection") - [print(f"{l_[0]}\t{l_[1]}") for l_ in list(map(list, zip(*[e_mars, s_mars])))] - - -def plot_MAGIC_reference_sensitivity(ax, **kwargs): - """Plot MAGIC reference sensitivity - - Parameters - ---------- - ax : matplotlib.axes - ax where you want to plot the sensitivity - **kwargs : dict - """ - d = np.loadtxt( - os.path.join( - os.path.dirname(os.path.realpath(__file__)), - "../../data/MAGIC_Sensitivity_magicmpp.txt", - ), - unpack=True, - ) - e, e_low, e_high = d[0], d[1], d[2] - s, err_s = d[5], d[6] - if "label" not in kwargs.keys(): - kwargs["label"] = "MAGIC Reference magic.mpp.mpg.de" - if "color" not in kwargs.keys(): - kwargs["color"] = "k" - ax.errorbar(e, s, xerr=[(e - e_low), (e_high - e)], yerr=err_s, **kwargs) diff --git a/magicctapipe/reco/__init__.py b/magicctapipe/reco/__init__.py index 3b8f2f92..b19cf46f 100644 --- a/magicctapipe/reco/__init__.py +++ b/magicctapipe/reco/__init__.py @@ -1,70 +1,7 @@ -from .classifier_utils import ( - GetHist_classifier, - check_train_test_intersections_classifier, - evaluate_performance_classifier, - get_weights_classifier, - load_init_data_classifier, - print_par_imp_classifier, -) -from .direction_utils import compute_separation_angle_direction -from .energy_utils import GetHist2D_energy, evaluate_performance_energy, plot_migmatrix from .estimators import DispRegressor, EnergyRegressor, EventClassifier -from .event_processing import ( # EnergyRegressor, - DirectionEstimatorPandas, - DirectionStereoEstimatorPandas, - EnergyEstimator, - EnergyEstimatorPandas, - EventClassifierPandas, - EventFeatureSelector, - EventFeatureTargetSelector, - EventProcessor, - HillasFeatureSelector, - RegressorClassifierBase, -) -from .global_utils import ( - check_train_test_intersections, - compute_event_weights, - get_weights_mc_dir_class, -) -from .stereo import ( - StereoInfoContainer, - check_stereo, - check_write_stereo, - write_hillas, - write_stereo, -) __all__ = [ - "GetHist_classifier", - "evaluate_performance_classifier", - "get_weights_classifier", - "print_par_imp_classifier", - "load_init_data_classifier", - "check_train_test_intersections_classifier", - "compute_separation_angle_direction", - "GetHist2D_energy", - "evaluate_performance_energy", - "plot_migmatrix", - "RegressorClassifierBase", - # "EnergyRegressor", - "HillasFeatureSelector", - "EventFeatureSelector", - "EventFeatureTargetSelector", - "EventProcessor", - "EnergyEstimator", - "EnergyEstimatorPandas", - "DirectionEstimatorPandas", - "EventClassifierPandas", - "DirectionStereoEstimatorPandas", - "compute_event_weights", - "get_weights_mc_dir_class", - "check_train_test_intersections", "DispRegressor", "EnergyRegressor", "EventClassifier", - "write_hillas", - "check_write_stereo", - "check_stereo", - "write_stereo", - "StereoInfoContainer", ] diff --git a/magicctapipe/reco/classifier_utils.py b/magicctapipe/reco/classifier_utils.py deleted file mode 100644 index 1bebc48a..00000000 --- a/magicctapipe/reco/classifier_utils.py +++ /dev/null @@ -1,233 +0,0 @@ -import glob -import os - -import numpy as np -import pandas as pd -import sklearn.metrics - -from ..utils import ( - info_message, - load_dl1_data_stereo_list, - load_dl1_data_stereo_list_selected, -) -from .global_utils import check_train_test_intersections - -__all__ = [ - "GetHist_classifier", - "evaluate_performance_classifier", - "get_weights_classifier", - "print_par_imp_classifier", - "load_init_data_classifier", - "check_train_test_intersections_classifier", -] - - -def GetHist_classifier(data, bins=30, range=None, weights=None): - hs, edges = np.histogram(data, bins=bins, range=range, weights=weights) - loc = (edges[1:] + edges[:-1]) / 2 - - hist = {} - hist["Hist"] = hs - hist["X"] = loc - hist["XEdges"] = edges - - return hist - - -def evaluate_performance_classifier(data, class0_name="event_class_0", drop_na=True): - if drop_na: - data = data.dropna() - - report = {"gammaness": dict(), "metrics": dict()} - - for event_class in data["true_event_class"].unique(): - events = data.query(f"true_event_class == {event_class}") - hist = GetHist_classifier(events[class0_name], bins=100, range=(0, 1)) - hist["Hist"] = hist["Hist"] / hist["Hist"].sum() - hist["Cumsum"] = 1 - np.cumsum(hist["Hist"]) - - report["gammaness"][event_class] = hist - - if "mean" in class0_name: - class_names = list( - filter( - lambda name: "event_class_" in name and "_mean" in name, data.columns - ) - ) - else: - class_names = list( - filter( - lambda name: "event_class_" in name and "_mean" not in name, - data.columns, - ) - ) - - proba = data[class_names].values - predicted_class = proba.argmax(axis=1) - print(data) - print(class_names) - print(proba) - - report["metrics"]["acc"] = sklearn.metrics.accuracy_score( - data["true_event_class"], predicted_class - ) - - true_class = np.clip(data["true_event_class"], 0, 1) - true_class = 1 - true_class - - try: - report["metrics"]["auc_roc"] = sklearn.metrics.roc_auc_score( - true_class, proba[:, 0] - ) - except Exception as e: - print(f"ERROR: {e}") - - return report - - -def get_weights_classifier(mc_data, bkg_data, alt_edges, intensity_edges): - mc_hist, _, _ = np.histogram2d( - mc_data["tel_alt"], mc_data["intensity"], bins=[alt_edges, intensity_edges] - ) - bkg_hist, _, _ = np.histogram2d( - bkg_data["tel_alt"], bkg_data["intensity"], bins=[alt_edges, intensity_edges] - ) - - availability_hist = np.clip(mc_hist, 0, 1) * np.clip(bkg_hist, 0, 1) - - # --- MC weights --- - mc_alt_bins = np.digitize(mc_data["tel_alt"], alt_edges) - 1 - mc_intensity_bins = np.digitize(mc_data["intensity"], intensity_edges) - 1 - - # Treating the out-of-range events - mc_alt_bins[mc_alt_bins == len(alt_edges) - 1] = len(alt_edges) - 2 - mc_intensity_bins[mc_intensity_bins == len(intensity_edges) - 1] = ( - len(intensity_edges) - 2 - ) - - mc_weights = 1 / mc_hist[mc_alt_bins, mc_intensity_bins] - mc_weights *= availability_hist[mc_alt_bins, mc_intensity_bins] - - # --- Bkg weights --- - bkg_alt_bins = np.digitize(bkg_data["tel_alt"], alt_edges) - 1 - bkg_intensity_bins = np.digitize(bkg_data["intensity"], intensity_edges) - 1 - - # Treating the out-of-range events - bkg_alt_bins[bkg_alt_bins == len(alt_edges) - 1] = len(alt_edges) - 2 - bkg_intensity_bins[bkg_intensity_bins == len(intensity_edges) - 1] = ( - len(intensity_edges) - 2 - ) - - bkg_weights = 1 / bkg_hist[bkg_alt_bins, bkg_intensity_bins] - bkg_weights *= availability_hist[bkg_alt_bins, bkg_intensity_bins] - - # --- Storing to a data frame --- - mc_weight_df = pd.DataFrame(data={"event_weight": mc_weights}, index=mc_data.index) - bkg_weight_df = pd.DataFrame( - data={"event_weight": bkg_weights}, index=bkg_data.index - ) - - return mc_weight_df, bkg_weight_df - - -def print_par_imp_classifier(class_estimator): - for tel_id in class_estimator.telescope_classifiers: - feature_importances = class_estimator.telescope_classifiers[ - tel_id - ].feature_importances_ - - print(f" tel_id: {tel_id}") - z_ = zip(class_estimator.feature_names, feature_importances) - for feature, importance in z_: - print(f" {feature:.<15s}: {importance:.4f}") - print("") - - -def load_init_data_classifier(cfg, mode="train"): - """Load and init data for classifier train RFs - - Parameters - ---------- - cfg : dict - configurations loaded from configuration file - mode : str, optional - train or test, by default "train" - - Returns - ------- - tuple - mc_data, bkg_data - """ - mono_mode = len(cfg["all_tels"]["tel_ids"]) == 1 - f_ = cfg["data_files"]["mc"][f"{mode}_sample"]["hillas_h5"] - f_ = f"{os.path.dirname(f_)}/*{os.path.splitext(f_)[1]}" - fl_ = glob.glob(f_) - info_message(f"Loading MC {mode} data...", prefix="ClassifierRF") - if mode == "test": - mc_data = load_dl1_data_stereo_list_selected( - file_list=fl_, - sub_dict=cfg["classifier_rf"], - file_n_key="test_file_n", - drop=True, - mono_mode=mono_mode, - ) - else: - mc_data = load_dl1_data_stereo_list(fl_, drop=True, mono_mode=mono_mode) - - f_ = cfg["data_files"]["data"][f"{mode}_sample"]["hillas_h5"] - f_ = f"{os.path.dirname(f_)}/*{os.path.splitext(f_)[1]}" - fl_ = glob.glob(f_) - info_message(f'Loading "off" {mode} data...', prefix="ClassifierRF") - if mode == "test": - bkg_data = load_dl1_data_stereo_list_selected( - file_list=fl_, - sub_dict=cfg["classifier_rf"], - file_n_key="test_file_n", - drop=True, - mono_mode=mono_mode, - ) - else: - bkg_data = load_dl1_data_stereo_list(fl_, drop=True, mono_mode=mono_mode) - - # True event classes - mc_data["true_event_class"] = 0 - bkg_data["true_event_class"] = 1 - - # Dropping data with the wrong altitude - bkg_data = bkg_data.query(cfg["global"]["wrong_alt"]) - - return mc_data, bkg_data - - -def check_train_test_intersections_classifier( - mc_data_train, bkg_data_train, mc_data_test, bkg_data_test -): - """Function to check if there are same events in train and test samples, for - train rfs classifier - - Parameters - ---------- - mc_data_train : pandas.DataFrame - mc_data_train - bkg_data_train : pandas.DataFrame - bkg_data_train - mc_data_test : pandas.DataFrame - mc_data_test - bkg_data_test : pandas.DataFrame - bkg_data_test - - Returns - ------- - bool - test_passed - """ - tests = { - "data": [mc_data_train, mc_data_test], - "bkg": [bkg_data_train, bkg_data_test], - } - test_passed = True - for k in tests.keys(): - print(f"Analizing {k} test and train") - test_passed_ = check_train_test_intersections(*tests[k]) - test_passed = test_passed and test_passed_ - return test_passed diff --git a/magicctapipe/reco/direction_utils.py b/magicctapipe/reco/direction_utils.py deleted file mode 100644 index 6d65dcb5..00000000 --- a/magicctapipe/reco/direction_utils.py +++ /dev/null @@ -1,70 +0,0 @@ -import pandas as pd -from astropy import units as u -from astropy.coordinates import AltAz, SkyCoord - -from ..utils import get_tel_ids_dl1 - -__all__ = ["compute_separation_angle_direction"] - - -def compute_separation_angle_direction(shower_data_test): - separation = dict() - - # ??? - shower_data_test.dropna(subset=["az_reco_mean", "alt_reco_mean"], inplace=True) - - tel_ids = get_tel_ids_dl1(shower_data_test) - - for tel_id in tel_ids: - event_coord_true = SkyCoord( - shower_data_test.loc[(slice(None), slice(None), tel_id), "true_az"].values - * u.rad, - shower_data_test.loc[(slice(None), slice(None), tel_id), "true_alt"].values - * u.rad, - frame=AltAz(), - ) - - event_coord_reco = SkyCoord( - shower_data_test.loc[(slice(None), slice(None), tel_id), "az_reco"].values - * u.rad, - shower_data_test.loc[(slice(None), slice(None), tel_id), "alt_reco"].values - * u.rad, - frame=AltAz(), - ) - - separation[tel_id] = event_coord_true.separation(event_coord_reco) - - event_coord_true = SkyCoord( - shower_data_test["true_az"].values * u.rad, - shower_data_test["true_alt"].values * u.rad, - frame=AltAz(), - ) - - event_coord_reco = SkyCoord( - shower_data_test["az_reco_mean"].values * u.rad, - shower_data_test["alt_reco_mean"].values * u.rad, - frame=AltAz(), - ) - - separation[0] = event_coord_true.separation(event_coord_reco) - - # Converting to a data frame - separation_df = pd.DataFrame( - data={"sep_0": separation[0]}, index=shower_data_test.index - ) - - for tel_id in tel_ids: - df = pd.DataFrame( - data={f"sep_{tel_id:d}": separation[tel_id]}, - index=shower_data_test.loc[ - (slice(None), slice(None), tel_id), "true_az" - ].index, - ) - separation_df = separation_df.join(df) - - separation_df = separation_df.join(shower_data_test) - - for tel_id in [0] + tel_ids: - print(f" Tel {tel_id} scatter: ", f"{separation[tel_id].to(u.deg).std():.2f}") - - return separation_df diff --git a/magicctapipe/reco/energy_utils.py b/magicctapipe/reco/energy_utils.py deleted file mode 100644 index 8cebc7b2..00000000 --- a/magicctapipe/reco/energy_utils.py +++ /dev/null @@ -1,149 +0,0 @@ -import matplotlib.pyplot as plt -import numpy as np -from matplotlib import colors - -__all__ = [ - "GetHist2D_energy", - "evaluate_performance_energy", - "plot_migmatrix", -] - - -def GetHist2D_energy(x, y, bins=30, range=None, weights=None): - hs, xedges, yedges = np.histogram2d(x, y, bins=bins, range=range, weights=weights) - xloc = (xedges[1:] + xedges[:-1]) / 2 - yloc = (yedges[1:] + yedges[:-1]) / 2 - - xxloc, yyloc = np.meshgrid(xloc, yloc, indexing="ij") - - hist = {} - hist["Hist"] = hs - hist["X"] = xloc - hist["Y"] = yloc - hist["XX"] = xxloc - hist["YY"] = yyloc - hist["XEdges"] = xedges - hist["YEdges"] = yedges - - return hist - - -def evaluate_performance_energy(data, energy_name): - valid_data = data.dropna(subset=[energy_name]) - migmatrix = GetHist2D_energy( - np.lib.scimath.log10(valid_data["true_energy"]), - np.lib.scimath.log10(valid_data[energy_name]), - range=((-1.5, 1.5), (-1.5, 1.5)), - bins=30, - ) - - matrix_norms = migmatrix["Hist"].sum(axis=1) - for i in range(0, migmatrix["Hist"].shape[0]): - if matrix_norms[i] > 0: - migmatrix["Hist"][i, :] /= matrix_norms[i] - - true_energies = valid_data["true_energy"].values - estimated_energies = valid_data[energy_name].values - - for confidence in (68, 95): - name = "{:d}%".format(confidence) - - migmatrix[name] = dict() - migmatrix[name]["upper"] = np.zeros_like(migmatrix["X"]) - migmatrix[name]["mean"] = np.zeros_like(migmatrix["X"]) - migmatrix[name]["lower"] = np.zeros_like(migmatrix["X"]) - migmatrix[name]["rms"] = np.zeros_like(migmatrix["X"]) - - for i in range(0, len(migmatrix["X"])): - true_energies_log = np.lib.scimath.log10(true_energies) - wh = np.where( - (true_energies_log >= migmatrix["XEdges"][i]) - & (true_energies_log < migmatrix["XEdges"][i + 1]) - ) - - if len(wh[0]) > 0: - rel_diff_ = estimated_energies[wh] - true_energies[wh] - rel_diff = rel_diff_ / true_energies[wh] - quantiles = np.percentile( - rel_diff, [50 - confidence / 2.0, 50, 50 + confidence / 2.0] - ) - migmatrix[name]["upper"][i] = quantiles[2] - migmatrix[name]["mean"][i] = quantiles[1] - migmatrix[name]["lower"][i] = quantiles[0] - migmatrix[name]["rms"][i] = rel_diff.std() - else: - migmatrix[name]["upper"][i] = 0 - migmatrix[name]["mean"][i] = 0 - migmatrix[name]["lower"][i] = 0 - migmatrix[name]["rms"][i] = 0 - - return migmatrix - - -def plot_migmatrix(index, name, matrix, grid_shape): - """Plot migration matrix - - Parameters - ---------- - index : int - plot index (different from tel_id) - name : str - telescope name (use short names) - matrix : dict - migration matrix to be plotted - grid_shape : tuple - grid shape - """ - plt.subplot2grid(grid_shape, (0, index)) - plt.loglog() - plt.title("%s estimation" % name) - plt.xlabel("E$_{true}$, TeV") - plt.ylabel("E$_{est}$, TeV") - - plt.pcolormesh( - 10 ** matrix["XEdges"], - 10 ** matrix["YEdges"], - matrix["Hist"].transpose(), - cmap="jet", - norm=colors.LogNorm(vmin=1e-3, vmax=1), - ) - plt.colorbar() - - plt.subplot2grid(grid_shape, (1, index)) - plt.semilogx() - plt.title("%s estimation" % name) - plt.xlabel("E$_{true}$, TeV") - plt.ylim(-1, 1) - - plt.plot( - 10 ** matrix["X"], - matrix["68%"]["mean"], - linestyle="-", - color="C0", - label="Bias", - ) - - plt.plot( - 10 ** matrix["X"], matrix["68%"]["rms"], linestyle=":", color="red", label="RMS" - ) - - plt.plot( - 10 ** matrix["X"], - matrix["68%"]["upper"], - linestyle="--", - color="C1", - label="68% containment", - ) - plt.plot(10 ** matrix["X"], matrix["68%"]["lower"], linestyle="--", color="C1") - - plt.plot( - 10 ** matrix["X"], - matrix["95%"]["upper"], - linestyle=":", - color="C2", - label="95% containment", - ) - plt.plot(10 ** matrix["X"], matrix["95%"]["lower"], linestyle=":", color="C2") - - plt.grid(linestyle=":") - plt.legend() diff --git a/magicctapipe/reco/event_processing.py b/magicctapipe/reco/event_processing.py deleted file mode 100644 index 88a3dc76..00000000 --- a/magicctapipe/reco/event_processing.py +++ /dev/null @@ -1,2699 +0,0 @@ -import itertools -import re -from abc import ABC, abstractmethod -from copy import deepcopy - -import joblib -import numpy as np -import pandas as pd -import sklearn -import sklearn.ensemble -from astropy import units as u -from astropy.coordinates import AltAz, SkyCoord -from astropy.coordinates.angle_utilities import angular_separation -from ctapipe.containers import ReconstructedContainer, ReconstructedEnergyContainer -from ctapipe.coordinates import CameraFrame, TelescopeFrame -from ctapipe.image import hillas_parameters, tailcuts_clean -from ctapipe.io import EventSource -from ctapipe.reco.reco_algorithms import TooFewTelescopesException -from sklearn.ensemble import RandomForestRegressor -from sklearn.preprocessing import StandardScaler - -__all__ = [ - "RegressorClassifierBase", - "EnergyRegressor", - "HillasFeatureSelector", - "EventFeatureSelector", - "EventFeatureTargetSelector", - "EventProcessor", - "EnergyEstimator", - "EnergyEstimatorPandas", - "DirectionEstimatorPandas", - "EventClassifierPandas", - "DirectionStereoEstimatorPandas", -] - - -class RegressorClassifierBase: - """This class collects one model for every camera type -- given by - `cam_id_list` -- to get an estimate for the energy of an - air-shower. The class interfaces with `scikit-learn` in that it - relays all function calls not exeplicitly defined here through - `.__getattr__` to any model in its collection. This gives us the - full power and convenience of scikit-learn and we only have to - bother about the high-level interface. - - The fitting approach is to train on single images; then separately - predict an image's energy and combine the different predictions - for all the telscopes in each event in one single energy estimate. - - TODO: come up with a better merging approach to combine the - estimators of the various camera types - - Parameters - ---------- - - model: scikit-learn model - the model you want to use to estimate the shower energies - cam_id_list: list - list of identifiers to differentiate the various sources of - the images; could be the camera IDs or even telescope IDs. We - will train one model for each of the identifiers. - unit: str - scikit-learn regressors don't work with astropy unit. so, tell - in advance in which unit we want to deal here in case we need - one. (default: 1) - kwargs: dict - arguments to be passed on to the constructor of the regressors - - """ - - def __init__(self, model, cam_id_list, unit=1, **kwargs): - self.model_dict = {} - self.input_features_dict = {} - self.output_features_dict = {} - self.unit = unit - for cam_id in cam_id_list or []: - self.model_dict[cam_id] = model(**deepcopy(kwargs)) - - def __getattr__(self, attr): - """We interface this class with the "first" model in `.model_dict` - and relay all function calls over to it. This gives access to - all the fancy implementations in the scikit-learn classes - right from this class and we only have to overwrite the - high-level interface functions we want to adapt to our - use-case. - - """ - return getattr(next(iter(self.model_dict.values())), attr) - - def __str__(self): - """return the class name of the used model""" - return str(next(iter(self.model_dict.values()))).split("(")[0] - - def reshuffle_event_list(self, X, y): - """I collect the data event-wise as dictionaries of the different - camera identifiers but for the training it is more convenient - to have a dictionary containing flat arrays of the - feature lists. This function flattens the `X` and `y` arrays - accordingly. - - This is only for convenience during the training and testing - phase, later on, we won't collect data first event- and then - telescope-wise. That's why this is a separate function and not - implemented in `.fit`. - - Parameters - ---------- - X : list - collection of training features see Notes section for a sketch of - the container's expected shape - y : list - list of the training targets (i.e. shower energies) for all events - - Returns - ------- - trainFeatures, trainTarget : dict - flattened containers to be handed over to `.fit` - - Raises - ------ - KeyError: - in case `X` contains keys that were not provided with - `cam_id_list` during `.__init__` or `.load`. - - Notes - ----- - `X` and `y` are expected to be lists of events: - `X = [event_1, event_2, ..., event_n]` - `y = [energy_1, energy_2, ..., energy_n]` - - with `event_i` being a dictionary: - `event_i = {tel_type_1: list_of_tels_1, ...}` - and `energy_i` is the energy of `event_i` - - `list_of_tels_i` being the list of all telescope of type - `tel_type_1` containing the features to fit and predict the - shower energy: `list_of_tels_i = [[tel_1_feature_1, - tel_1_feature_2, ...], [tel_2_feature_1, tel_2_feature_2, - ...], ...]` - - after reshuffling, the resulting lists will look like this:: - - trainFeatures = {tel_type_1: [features_event_1_tel_1, - features_event_1_tel_2, - features_event_2_tel_1, ...], - tel_type_2: [features_event_1_tel_3, - features_event_2_tel_3, - features_event_2_tel_4, ...], - ...} - - - `trainTarget` will be a dictionary with the same keys and a - lists of energies corresponding to the features in the - `trainFeatures` lists. - - """ - - trainFeatures = {a: [] for a in self.model_dict} - trainTarget = {a: [] for a in self.model_dict} - - for evt, target in zip(X, y): - for cam_id, tels in evt.items(): - try: - # append the features-lists of the current event - # and telescope type to the flat list of - # features-lists for this telescope type - trainFeatures[cam_id] += tels - except KeyError: - raise KeyError( - "cam_id '{}' in X but no model defined: {}".format( - cam_id, [k for k in self.model_dict] - ) - ) - - try: - # add a target-entry for every feature-list - trainTarget[cam_id] += [target.to(self.unit).value] * len(tels) - except AttributeError: - # in case the target is not given as an astropy - # quantity let's hope that the user keeps proper - # track of the unit themself (might be just `1` anyway) - trainTarget[cam_id] += [target] * len(tels) - return trainFeatures, trainTarget - - def fit(self, X, y, sample_weight=None): - """This function fits a model against the collected features; - separately for every telescope identifier. - - Parameters - ---------- - X : dict - Dictionary that maps the telescope identifiers to lists of - feature-lists. The values of the dictionary are the lists - `scikit-learn` regressors train on and are supposed to - comply to their format requirements e.g. each featuer-list - has to contain the same features at the same position - y : dict - the energies corresponding to all the feature-lists of `X` - sample_weight : dict - lists of weights for the various telescope identifiers - Note: Not all models support this; e.g. RandomForests do - (Regressor and Classifier) - - Returns - ------- - RegressorClassifierBase - - Raises - ------ - KeyError: - in case `X` contains keys that are either not in `y` or - were not provided before with `cam_id_list`. - - """ - - sample_weight = sample_weight or {} - - for cam_id in X: - if cam_id not in y: - raise KeyError( - "cam_id '{}' in X but not in y: {}".format(cam_id, [k for k in y]) - ) - - if cam_id not in self.model_dict: - raise KeyError( - "cam_id '{}' in X but no model defined: {}".format( - cam_id, [k for k in self.model_dict] - ) - ) - - # add a `None` entry in the weights dictionary in case there is no entry yet - if cam_id not in sample_weight: - sample_weight[cam_id] = None - - # for every `cam_id` train one model (as long as there are events in `X`) - if len(X[cam_id]): - try: - self.model_dict[cam_id].fit( - X[cam_id], y[cam_id], sample_weight=sample_weight[cam_id] - ) - except (TypeError, ValueError): - # some models do not like `sample_weight` in the `fit` call... - # catch the exception and try again without the weights - self.model_dict[cam_id].fit(X[cam_id], y[cam_id]) - - return self - - # def predict(self, X, cam_id=None): - # """ - # In the tradition of scikit-learn, `.predict` takes a "list of feature-lists" and - # returns an estimate for targeted quantity for every set of features. - # - # Parameters - # ---------- - # X : list of lists of floats - # the list of feature-lists - # cam_id : any (e.g. string, int), optional - # identifier of the singular camera type to consider here - # if not set, `.reg_dict` is assumed to have a single key which is used in place - # - # Returns - # ------- - # predict : list of floats - # predictions for the target quantity for every set of features given in `X` - # - # Raises - # ------ - # ValueError - # if `cam_id is None` and the number of registered models is not 1 - # """ - # - # if cam_id is None: - # if len(self.model_dict) == 1: - # cam_id = next(iter(self.model_dict.keys())) - # else: - # raise ValueError("you need to provide a cam_id") - # - # return self.model_dict[cam_id].predict(X)*self.energy_unit - - def save(self, path): - """saves the models in `.reg_dict` each in a separate pickle to disk - - TODO: investigate more stable containers to write out models - than joblib dumps - - Parameters - ---------- - path : str - Path to store the different models. Expects to contain - `{cam_id}` or at least an empty `{}` to replace it with - the keys in `.reg_dict`. - - """ - - import joblib - - for cam_id, model in self.model_dict.items(): - try: - # assume that there is a `{cam_id}` keyword to replace - # in the string - joblib.dump(model, path.format(cam_id=cam_id)) - except IndexError: - # if not, assume there is a naked `{}` somewhere left - # if not, format won't do anything, so it doesn't - # break but will overwrite every pickle with the - # following one - joblib.dump(model, path.format(cam_id)) - - @classmethod - def load(cls, path, cam_id_list, unit=1): - """Load the pickled dictionary of model from disk, create a husk - `cls` instance and fill the model dictionary. - - Parameters - ---------- - path : str - the path where the pre-trained, pickled regressors are - stored `path` is assumed to contain a `{cam_id}` keyword - to be replaced by each camera identifier in `cam_id_list` - (or at least a naked `{}`). - cam_id_list : list - list of camera identifiers like telescope ID or camera ID - and the assumed distinguishing feature in the filenames of - the various pickled regressors. - unit : str, optional - scikit-learn regressor/classifier do not work with - units. so append this one to the predictions in case you - deal with unified targets (like energy). assuming that - the models where trained with consistent units. clf - self : RegressorClassifierBase - in derived classes, this will return a ready-to-use - instance of that class to predict any problem you have - trained for - - """ - import joblib - - # need to get an instance of this class `cam_id_list=None` - # prevents `.__init__` to initialise `.model_dict` itself, - # since we are going to set it with the pickled models - # manually - self = cls(cam_id_list=None, unit=unit) - for key in cam_id_list: - try: - # assume that there is a `{cam_id}` keyword to replace - # in the string - self.model_dict[key] = joblib.load(path.format(cam_id=key)) - except IndexError: - # if not, assume there is a naked `{}` somewhere left - # if not, format won't do anything, so it doesn't matter - # though this will load the same model for every `key` - self.model_dict[key] = joblib.load(path.format(key)) - - return self - - @staticmethod - def scale_features(cam_id_list, feature_list): - """Scales features before training with any ML method. - - Parameters - ---------- - cam_id_list : list - list of camera identifiers like telescope ID or camera ID - and the assumed distinguishing feature in the filenames of - the various pickled regressors. - feature_list : dict - Dictionary that maps the telescope identifiers to lists of - feature-lists. The values of the dictionary are the lists - `scikit-learn` regressors train on and are supposed to - comply to their format requirements e.g. each feature-list - has to contain the same features at the same position - - Returns - ------- - f_dict : dict - a copy of feature_list input, with scaled values - - """ - f_dict = {} - scaler = {} - - for cam_id in cam_id_list or []: - scaler[cam_id] = StandardScaler() - scaler[cam_id].fit(feature_list[cam_id]) - f_dict[cam_id] = scaler[cam_id].transform(feature_list[cam_id]) - - return f_dict, scaler - - def show_importances(self): - """Creates a matplotlib figure that shows the importances of the - different features for the various trained models in a grid of - barplots. The features are sorted by descending importance. - - Parameters - ---------- - feature_labels : list, optional - a list of the feature names in proper order - to be used as x-axis tick-labels - - Returns - ------- - fig : matplotlib.figure - the figure holding the different bar plots - - """ - - import matplotlib.pyplot as plt - - n_tel_types = len(self.model_dict) - n_cols = np.ceil(np.sqrt(n_tel_types)).astype(int) - n_rows = np.ceil(n_tel_types / n_cols).astype(int) - - fig, axs = plt.subplots(nrows=n_rows, ncols=n_cols, squeeze=False) - plt.suptitle("Feature Importances") - for i, (cam_id, model) in enumerate(self.model_dict.items()): - plt.sca(axs.ravel()[i]) - plt.title(cam_id) - try: - importances = model.feature_importances_ - except Exception: - plt.gca().axis("off") - continue - bins = range(importances.shape[0]) - - if cam_id in self.input_features_dict and ( - len(self.input_features_dict[cam_id]) == len(bins) - ): - feature_labels = self.input_features_dict[cam_id] - importances, s_feature_labels = zip( - *sorted(zip(importances, feature_labels), reverse=True) - ) - plt.xticks(bins, s_feature_labels, rotation=17) - plt.bar(bins, importances, color="r", align="center") - - # switch off superfluous axes - for j in range(i + 1, n_rows * n_cols): - axs.ravel()[j].axis("off") - - return fig - - -class EnergyRegressor(RegressorClassifierBase): - """This class collects one regressor for every camera type -- given - by `cam_id_list` -- to get an estimate for the energy of an - air-shower. The class interfaces with `scikit-learn` in that it - relays all function calls not exeplicitly defined here through - `.__getattr__` to any regressor in its collection. This gives us - the full power and convenience of scikit-learn and we only have to - bother about the high-level interface. - - The fitting approach is to train on single images; then separately - predict an image's energy and combine the different predictions - for all the telscopes in each event in one single energy estimate. - - TODO: come up with a better merging approach to combine the - estimators of the various camera types - - Parameters - ---------- - regressor : scikit-learn regressor - the regressor you want to use to estimate the shower energies - cam_id_list : list - list of identifiers to differentiate the various sources of - the images; could be the camera IDs or even telescope IDs. We - will train one regressor for each of the identifiers. - unit : astropy.Quantity - scikit-learn regressors don't work with astropy unit. so, tell - in advance in which unit we want to deal here. - kwargs - arguments to be passed on to the constructor of the regressors - - """ - - def __init__( - self, regressor=RandomForestRegressor, cam_id_list="cam", unit=u.TeV, **kwargs - ): - super().__init__(model=regressor, cam_id_list=cam_id_list, unit=unit, **kwargs) - - def predict_by_event(self, event_list): - """expects a list of events where every "event" is a dictionary - mapping telescope identifiers to the list of feature-lists by - the telescopes of that type in this event. Refer to the Note - section in `.reshuffle_event_list` for an description how `event_list` - is supposed to look like. The singular estimate for the event - is simply the mean of the various estimators of the event. - - event_list : list - cf. `.reshuffle_event_list` under Notes - - Returns - ------- - dict : - dictionary that contains various statistical modes (mean, - median, standard deviation) of the predicted quantity of - every telescope for all events. - - Raises - ------ - KeyError: - if there is a telescope identifier in `event_list` that is not a - key in the regressor dictionary - - """ - - predict_mean = [] - predict_median = [] - predict_std = [] - for evt in event_list: - predicts = [] - weights = [] - for cam_id, tels in evt.items(): - try: - t_res = self.model_dict[cam_id].predict(tels).tolist() - predicts += t_res - except KeyError: - # QUESTION if there is no trained classifier for - # `cam_id`, raise an error or just pass this - # camera type? - raise KeyError( - "cam_id '{}' in event_list but no model defined: {}".format( - cam_id, [k for k in self.model_dict] - ) - ) - - try: - # if a `namedtuple` is provided, we can weight the different images - # using some of the provided features - weights += [t.sum_signal_cam / t.impact_dist for t in tels] - except AttributeError: - # otherwise give every image the same weight - weights += [1] * len(tels) - - predict_mean.append(np.average(predicts, weights=weights)) - predict_median.append(np.median(predicts)) - predict_std.append(np.std(predicts)) - - return { - "mean": np.array(predict_mean) * self.unit, - "median": np.array(predict_median) * self.unit, - "std": np.array(predict_std) * self.unit, - } - - def predict_by_telescope_type(self, event_list): - """same as `predict_dict` only that it returns a list of dictionaries - with an estimate for the target quantity for every telescope - type separately. - - more for testing- and performance-measuring-purpouses -- to - see how the different telescope types behave throughout the - energy ranges and if a better (energy-dependant) combination - of the separate telescope-wise estimators (compared to the - mean) can be achieved. - - """ - - predict_list_dict = [] - for evt in event_list: - res_dict = {} - for cam_id, tels in evt.items(): - t_res = self.model_dict[cam_id].predict(tels).tolist() - res_dict[cam_id] = np.mean(t_res) * self.unit - predict_list_dict.append(res_dict) - - return predict_list_dict - - @classmethod - def load(cls, path, cam_id_list, unit=u.TeV): - """this is only here to overwrite the unit argument with an astropy - quantity - - Parameters - ---------- - path : str - the path where the pre-trained, pickled regressors are - stored `path` is assumed to contain a `{cam_id}` keyword - to be replaced by each camera identifier in `cam_id_list` - (or at least a naked `{}`). - cam_id_list : list - list of camera identifiers like telescope ID or camera ID - and the assumed distinguishing feature in the filenames of - the various pickled regressors. - unit : astropy.Quantity - scikit-learn regressor do not work with units. so append - this one to the predictions. assuming that the models - where trained with consistent units. (default: u.TeV) - - Returns - ------- - EnergyRegressor: - a ready-to-use instance of this class to predict any - quantity you have trained for - - """ - return super().load(path, cam_id_list, unit) - - -class HillasFeatureSelector(ABC): - """ - The base class that handles the event Hillas parameter extraction - for future use with the random forest energy and classification pipelines. - """ - - def __init__(self, hillas_params_to_use, hillas_reco_params_to_use, telescopes): - """ - Constructor. Stores the settings that will be used during the parameter - extraction. - - Parameters - ---------- - hillas_params_to_use: list - A list of Hillas parameter names that should be extracted. - hillas_reco_params_to_use: list - A list of the Hillas "stereo" parameters (after HillasReconstructor), - that should also be extracted. - telescopes: list - List of telescope identifiers. Only events triggering these will be processed. - """ - - self.hillas_params_to_use = hillas_params_to_use - self.hillas_reco_params_to_use = hillas_reco_params_to_use - self.telescopes = telescopes - - n_features_per_telescope = len(hillas_params_to_use) + len( - hillas_reco_params_to_use - ) - self.n_features = n_features_per_telescope * len(telescopes) - - @staticmethod - def _get_param_value(param): - """ - An internal method that extracts the parameter value from both - float and Quantity instances. - - Parameters - ---------- - param: float or astropy.unit.Quantity - A parameter whos value should be extracted. - - Returns - ------- - float: - An extracted value. For float the param itself is returned, - for Quantity the Quantity.value is taken. - - """ - - if isinstance(param, u.Quantity): - return param.value - else: - return param - - @abstractmethod - def fill_event(self, event, hillas_reco_result, target): - """ - A dummy function to process an event. - - Parameters - ---------- - event: DataContainer - Container instances, holding DL1 event data. - hillas_reco_result: ReconstructedShowerContainer - A container with the computed shower direction properties. - target: float - A target variable for future regression/classification model. - - Returns - ------- - - """ - - pass - - -class EventFeatureSelector(HillasFeatureSelector): - """ - A class that performs the selects event features for further reconstruction with - ctapipe.reco.RegressorClassifierBase. - """ - - def __init__(self, hillas_params_to_use, hillas_reco_params_to_use, telescopes): - """ - Constructor. Stores the settings that will be used during the parameter - extraction. - - Parameters - ---------- - hillas_params_to_use: list - A list of Hillas parameter names that should be extracted. - hillas_reco_params_to_use: list - A list of the Hillas "stereo" parameters (after HillasReconstructor), - that should also be extracted. - telescopes: list - List of telescope identifiers. Only events triggering these will be processed. - """ - - super(EventFeatureSelector, self).__init__( - hillas_params_to_use, hillas_reco_params_to_use, telescopes - ) - - self.events = [] - self.event_targets = [] - - def fill_event(self, event, hillas_reco_result, target=None): - """ - This method fills the event features to the feature list. - Optionally it can add a "target" value, which can be used - to check the accuracy of the reconstruction. - - Parameters - ---------- - event: DataContainer - Container instances, holding DL1 event data. - hillas_reco_result: ReconstructedShowerContainer - A container with the computed shower direction properties. - target: float - A target variable for future regression/classification model. - - Returns - ------- - - """ - - event_record = dict() - for tel_id in self.telescopes: - feature_entry = [] - - for param_name in self.hillas_params_to_use: - param = event.dl1.tel[tel_id].hillas_params[param_name] - - feature_entry.append(self._get_param_value(param)) - - for param_name in self.hillas_reco_params_to_use: - param = hillas_reco_result[param_name] - - feature_entry.append(self._get_param_value(param)) - - if np.all(np.isfinite(feature_entry)): - event_record[tel_id] = [feature_entry] - - self.events.append(event_record) - self.event_targets.append(target) - - -class EventFeatureTargetSelector(HillasFeatureSelector): - """ - A class that performs the selects event features for further training of the - ctapipe.reco.RegressorClassifierBase model. - """ - - def __init__(self, hillas_params_to_use, hillas_reco_params_to_use, telescopes): - """ - Constructor. Stores the settings that will be used during the parameter - extraction. - - Parameters - ---------- - hillas_params_to_use: list - A list of Hillas parameter names that should be extracted. - hillas_reco_params_to_use: list - A list of the Hillas "stereo" parameters (after HillasReconstructor), - that should also be extracted. - telescopes: list - List of telescope identifiers. Only events triggering these will be processed. - """ - - super(EventFeatureTargetSelector, self).__init__( - hillas_params_to_use, hillas_reco_params_to_use, telescopes - ) - - self.features = dict() - self.targets = dict() - self.events = [] - - for tel_id in self.telescopes: - self.features[tel_id] = [] - self.targets[tel_id] = [] - - def fill_event(self, event, hillas_reco_result, target): - """ - This method fills the event features to the feature list; - "target" values are added to their own "target" list. - - Parameters - ---------- - event: DataContainer - Container instances, holding DL1 event data. - hillas_reco_result: ReconstructedShowerContainer - A container with the computed shower direction properties. - target: float - A target variable for future regression/classification model. - - Returns - ------- - - """ - - event_record = dict() - for tel_id in self.telescopes: - feature_entry = [] - - for param_name in self.hillas_params_to_use: - param = event.dl1.tel[tel_id].hillas_params[param_name] - - feature_entry.append(self._get_param_value(param)) - - for param_name in self.hillas_reco_params_to_use: - param = hillas_reco_result[param_name] - - feature_entry.append(self._get_param_value(param)) - - if np.all(np.isfinite(feature_entry)): - self.features[tel_id].append(feature_entry) - self.targets[tel_id].append(self._get_param_value(target)) - - event_record[tel_id] = [feature_entry] - self.events.append(event_record) - - -class EventProcessor: - """ - This class is meant to represents the DL0->DL2 analysis pipeline. - It handles event loading, Hillas parameter estimation and storage - (DL0->DL1), stereo (with >=2 telescopes) event direction/impact etc. - reconstruction and RF energy estimation. - """ - - def __init__(self, calibrator, hillas_reconstructor, min_survived_pixels=10): - """ - Constructor. Sets the calibration / Hillas processing workers. - - Parameters - ---------- - calibrator: ctapipe.calib.CameraCalibrator - A desired camera calibrator instance. - hillas_reconstructor: ctapipe.reco.HillasReconstructor - A "stereo" (with >=2 telescopes) Hillas reconstructor instance that - will be used to determine the event direction/impact etc. - min_survived_pixels: int, optional - Minimal number of pixels in the shower image, that should survive - image cleaning. Hillas parameters are not computed for events falling - below this threshold. - Defaults to 10. - """ - - self.calibrator = calibrator - self.hillas_reconstructor = hillas_reconstructor - self.min_survived_pixels = min_survived_pixels - - self.events = [] - self.reconstruction_results = [] - - def _dl1_process(self, event): - """ - Internal method that performs DL0->DL1 event processing. - This involves image cleaning and Hillas parameter calculation. - - Parameters - ---------- - event: DataContainer - Container instances, holding DL0 event data. - - Returns - ------- - DataContainer: - Event with computed Hillas parameters. - - """ - - tels_with_data = list(event.r1.tels_with_data) - - for tel_id in tels_with_data: - subarray = event.inst.subarray - camera = subarray.tel[tel_id].camera - - self.calibrator.calibrate(event) - - event_image = event.dl1.tel[tel_id].image[1] - - mask = tailcuts_clean( - camera, event_image, picture_thresh=6, boundary_thresh=6 - ) - event_image_cleaned = event_image.copy() - event_image_cleaned[~mask] = 0 - - n_survived_pixels = event_image_cleaned[mask].size - - # Calculate hillas parameters - # It fails for empty images, so we apply a cut on the number - # of survived pixels - if n_survived_pixels > self.min_survived_pixels: - try: - event.dl1.tel[tel_id].hillas_params = hillas_parameters( - camera, event_image_cleaned - ) - except Exception: - print("Failed") - pass - - return event - - def _update_event_direction(self, event, reco_container): - """ - Internal method used to compute the shower direction/impact etc. from - intersection of the per-telescope image planes (from Hillas parameters) - and store them to the provided reconstruction container. - - Parameters - ---------- - event: DataContainer - Container instances, holding DL1 event data. - reco_container: ReconstructedContainer - A container that will hold the computed shower properties. - - Returns - ------- - ReconstructedContainer: - Updated shower reconstruction container. - - """ - - # Performing a geometrical direction reconstruction - try: - reco_container.shower[ - "hillas" - ] = self.hillas_reconstructor.predict_from_dl1(event) - except TooFewTelescopesException: - # No reconstruction possible. Resetting to defaults - reco_container.shower["hillas"].reset() - - return reco_container - - def _update_event_energy(self, event, reco_container): - """ - Internal method used to compute the shower energy from a pre-trained RF - and store it to the provided reconstruction container. - - Parameters - ---------- - event: DataContainer - Container instances, holding DL1 event data. - reco_container: ReconstructedContainer - A container that will hold the computed shower energy. - - Returns - ------- - ReconstructedContainer: - Updated shower reconstruction container. - - """ - - return reco_container - - def _update_event_classification(self, event, reco_container): - """ - Internal method used to compute the classify the using the pre-trained RF - and store it to the provided reconstruction container. - - Parameters - ---------- - event: DataContainer - Container instances, holding DL1 event data. - reco_container: ReconstructedContainer - A container that will hold the computed shower class. - - Returns - ------- - ReconstructedContainer: - Updated shower reconstruction container. - - """ - - return reco_container - - def _load_events(self, file_name, append_to_existing_events): - """ - Internal method that takes care of the event loading from the specified file - and DL1 processing. The DL1 events are also stored in the self.events list - for future usage; their R0/R1/DL0 containers are reset to save memory. - - Parameters - ---------- - file_name: str - A file name from which to read the events. - append_to_existing_events: bool - Defines whether the previously filled event list should be cleared - or if the new events should be appended to it. - - Returns - ------- - - """ - - with EventSource(input_url=file_name) as source: - event_generator = source._generator() - - if not append_to_existing_events: - self.events = [] - - # Running the parameter computation over the event list - for event in event_generator: - event = self._dl1_process(event) - - event.r0.reset() - event.r1.reset() - event.dl0.reset() - - self.events.append(deepcopy(event)) - - def process_file( - self, - file_name, - append_to_existing_events=True, - do_direction_reconstruction=True, - do_energy_reconstruction=True, - do_classification=True, - ): - """ - This method represents the file processing pipeline: data loading, - DL0->DL1 processing and the subsequent direction/energy/classification. - - Parameters - ---------- - file_name: str - A file name from which to read the events. - append_to_existing_events: bool - Defines whether the previously filled event list should be cleared - or if the new events should be appended to it. - do_direction_reconstruction: bool, optional - Sets whether the direction reconstruction should be performed. - Defaults to True. - do_energy_reconstruction: bool, optional - Sets whether the energy reconstruction should be performed. - Requires a trained energy random forest. - Defaults to False. NOT YES IMPLEMENTED - do_classification: bool, optional - Sets whether the event classification should be performed. - Requires a trained classifier random forest. - Defaults to False. NOT YES IMPLEMENTED - - Returns - ------- - - """ - - self._load_events( - file_name, append_to_existing_events=append_to_existing_events - ) - - self.reconstruction_results = [] - - # Running the parameter computation over the event list - for event in self.events: - # Run the event properties reconstruction - reco_results = ReconstructedContainer() - - if do_direction_reconstruction: - reco_results = self._update_event_direction(event, reco_results) - - if do_energy_reconstruction: - reco_results = self._update_event_energy(event, reco_results) - - if do_classification: - reco_results = self._update_event_classification(event, reco_results) - - self.reconstruction_results.append(reco_results) - - -class EnergyEstimator: - def __init__(self, hillas_params_to_use, hillas_reco_params_to_use, telescopes): - """ - Constructor. Stores the settings that will be used during the parameter - extraction. - - Parameters - ---------- - hillas_params_to_use: list - A list of Hillas parameter names that should be extracted. - hillas_reco_params_to_use: list - A list of the Hillas "stereo" parameters (after HillasReconstructor), - that should also be extracted. - telescopes: list - List of telescope identifiers. Only events triggering these will be processed. - """ - - self.hillas_params_to_use = hillas_params_to_use - self.hillas_reco_params_to_use = hillas_reco_params_to_use - self.telescopes = telescopes - - n_features_per_telescope = len(hillas_params_to_use) + len( - hillas_reco_params_to_use - ) - self.n_features = n_features_per_telescope * len(telescopes) - - self.train_features = dict() - self.train_targets = dict() - self.train_events = [] - - for tel_id in self.telescopes: - self.train_features[tel_id] = [] - self.train_targets[tel_id] = [] - - self.regressor = EnergyRegressor(cam_id_list=self.telescopes) - - @staticmethod - def _get_param_value(param): - """ - An internal method that extracts the parameter value from both - float and Quantity instances. - - Parameters - ---------- - param: float or astropy.unit.Quantity - A parameter whose value should be extracted. - - Returns - ------- - float: - An extracted value. For float the param itself is returned, - for Quantity the Quantity.value is taken. - - """ - - if isinstance(param, u.Quantity): - return param.value - else: - return param - - def add_train_event(self, event, reco_result): - """ - This method fills the event features to the feature list; - "target" values are added to their own "target" list. - - Parameters - ---------- - event: DataContainer - Container instances, holding DL1 event data. - reco_result: ReconstructedContainer - A container with the already reconstructed event properties. - Its 'shower' part must have the 'hillas' key. - - Returns - ------- - - """ - - event_record = dict() - - for tel_id in self.telescopes: - feature_entry = [] - - for param_name in self.hillas_params_to_use: - param = event.dl1.tel[tel_id].hillas_params[param_name] - - feature_entry.append(self._get_param_value(param)) - - for param_name in self.hillas_reco_params_to_use: - param = reco_result.shower["hillas"][param_name] - - feature_entry.append(self._get_param_value(param)) - - if np.all(np.isfinite(feature_entry)): - event_energy = event.mc.energy.to(u.TeV).value - self.train_features[tel_id].append(feature_entry) - self.train_targets[tel_id].append(np.log10(event_energy)) - - event_record[tel_id] = [feature_entry] - - self.train_events.append(event_record) - - def process_event(self, event, reco_result): - event_record = dict() - for tel_id in self.telescopes: - feature_entry = [] - - for param_name in self.hillas_params_to_use: - param = event.dl1.tel[tel_id].hillas_params[param_name] - - feature_entry.append(self._get_param_value(param)) - - for param_name in self.hillas_reco_params_to_use: - param = reco_result.shower["hillas"][param_name] - - feature_entry.append(self._get_param_value(param)) - - if np.all(np.isfinite(feature_entry)): - event_record[tel_id] = [feature_entry] - - predicted_energy_dict = self.regressor.predict_by_event([event_record]) - - reconstructed_energy = 10 ** predicted_energy_dict["mean"].value * u.TeV - std = predicted_energy_dict["std"].value - rel_uncert = 0.5 * (10**std - 1 / 10**std) - - energy_container = ReconstructedEnergyContainer() - energy_container.energy = reconstructed_energy - energy_container.energy_uncert = energy_container.energy * rel_uncert - energy_container.is_valid = True - energy_container.tel_ids = list(event_record.keys()) - - return energy_container - - def fit_model(self): - _ = self.regressor.fit(self.train_features, self.train_targets) - - def save_model(self): - pass - - def load_model(self): - pass - - -class EnergyEstimatorPandas: - """ - This class trains/applies the random forest regressor for event energy, - using as the input Hillas and stereo parameters, stored in a Pandas data frame. - It trains a separate regressor for each telescope. Further another "consolidating" - regressor is applied to combine the per-telescope predictions. - """ - - def __init__(self, feature_names, **rf_settings): - """ - Constructor. Gets basic settings. - - Parameters - ---------- - feature_names: list - Feature names (str type) to be used by the regressor. Must correspond to the - columns of the data frames that will be processed. - rf_settings: dict - The settings to be passed to the random forest regressor. - """ - - self.feature_names = feature_names - self.rf_settings = rf_settings - - self.telescope_regressors = dict() - self.consolidating_regressor = None - - def fit(self, shower_data): - """ - Fits the regressor model. - - Parameters - ---------- - shower_data: pandas.DataFrame - Data frame with the shower parameters. Must contain columns called self.feature_names. - - Returns - ------- - None - - """ - - self.train_per_telescope_rf(shower_data) - - # shower_data_with_energy = self.apply_per_telescope_rf(shower_data) - # - # features = shower_data_with_energy['energy_reco'] - # features = features.fillna(0).groupby(['obs_id', 'event_id']).sum() - # features = features.values - # - # target = shower_data_with_energy['mc_energy'].groupby(['obs_id', 'event_id']).mean().values - # - # self.consolidating_regressor = sklearn.ensemble.RandomForestRegressor(self.rf_settings) - # self.consolidating_regressor.fit(features, target) - - def predict(self, shower_data): - """ - Applies the trained regressor to the data. - - Parameters - ---------- - shower_data: pandas.DataFrame - Data frame with the shower parameters. Must contain columns called self.feature_names. - - Returns - ------- - pandas.DataFrame: - Updated data frame with the computed shower energies. - - """ - - shower_data_with_energy = self.apply_per_telescope_rf(shower_data) - - # Selecting and log-scaling the reconstructed energies - energy_reco = shower_data_with_energy["energy_reco"].apply(np.log10) - - # Getting the weights - weights = shower_data_with_energy["energy_reco_err"] - - # Weighted energies - weighted = energy_reco * weights - - # Grouping per-event data - weighted_group = weighted.groupby(level=["obs_id", "event_id"]) - weight_group = weights.groupby(level=["obs_id", "event_id"]) - - # Weighted mean log-energy - log_energy = weighted_group.sum() / weight_group.sum() - - shower_data_with_energy["energy_reco_mean"] = 10**log_energy - - return shower_data_with_energy - - def train_per_telescope_rf(self, shower_data): - """ - Trains the energy regressors for each of the available telescopes. - - Parameters - ---------- - shower_data: pandas.DataFrame - Data frame with the shower parameters. Must contain columns called self.feature_names. - - Returns - ------- - None - - """ - - idx = pd.IndexSlice - - tel_ids = shower_data.index.levels[2] - print("Selected tels:", tel_ids) - - self.telescope_regressors = dict() - - for tel_id in tel_ids: - input_data = shower_data.loc[ - idx[:, :, tel_id], self.feature_names + ["event_weight", "mc_energy"] - ] - input_data.dropna(inplace=True) - - x_train = input_data[list(self.feature_names)].values - y_train = np.log10(input_data["mc_energy"].values) - weight = input_data["event_weight"].values - - regressor = sklearn.ensemble.RandomForestRegressor(**self.rf_settings) - regressor.fit(x_train, y_train, sample_weight=weight) - - self.telescope_regressors[tel_id] = regressor - - def apply_per_telescope_rf(self, shower_data): - """ - Applies the regressors, trained per each telescope. - - Parameters - ---------- - shower_data: pandas.DataFrame - Data frame with the shower parameters. Must contain columns called self.feature_names. - - Returns - ------- - pandas.DataFrame: - Updated data frame with the computed shower energies. - - """ - - tel_ids = shower_data.index.levels[2] - print("Selected tels:", tel_ids) - - energy_reco = pd.DataFrame() - - for tel_id in tel_ids: - # Selecting data - this_telescope = shower_data.loc[ - (slice(None), slice(None), tel_id), self.feature_names - ] - this_telescope = this_telescope.dropna() - features = this_telescope.values - - # Getting the RF response - response = 10 ** self.telescope_regressors[tel_id].predict(features) - - per_tree_responses = [] - for tree in self.telescope_regressors[tel_id].estimators_: - per_tree_responses.append(10 ** tree.predict(features)) - response_err = np.std(per_tree_responses, axis=0) - - # Storing to a data frame - result = {"energy_reco": response, "energy_reco_err": response_err} - df = pd.DataFrame(data=result, index=this_telescope.index) - - energy_reco = energy_reco.append(df) - - energy_reco.sort_index(inplace=True) - - return energy_reco - - def save(self, file_name): - """ - Saves trained regressors to the specified joblib file. - - Parameters - ---------- - file_name: str - Output file name. - - Returns - ------- - None - - """ - - output = dict() - output["feature_names"] = self.feature_names - output["telescope_regressors"] = self.telescope_regressors - output["consolidating_regressor"] = self.consolidating_regressor - - joblib.dump(output, file_name) - - def load(self, file_name): - """ - Loads pre-trained regressors to the specified joblib file. - - Parameters - ---------- - file_name: str - Output file name. - - Returns - ------- - None - - """ - - data = joblib.load(file_name) - - self.feature_names = data["feature_names"] - self.telescope_regressors = data["telescope_regressors"] - self.consolidating_regressor = data["consolidating_regressor"] - - -class DirectionEstimatorPandas: - """ - This class trains/applies the random forest regressor for event energy, - using as the input Hillas and stereo parameters, stored in a Pandas data frame. - It trains a separate regressor for each telescope. Further another "consolidating" - regressor is applied to combine the per-telescope predictions. - """ - - def __init__(self, feature_names, tel_descriptions, **rf_settings): - """ - Constructor. Gets basic settings. - - Parameters - ---------- - feature_names: dict - Feature names (str type) to be used by the regressor. Must correspond to the - columns of the data frames that will be processed. Must be a dict with the keys - "disp" and "pos_angle", each containing the corresponding feature lists. - tel_descriptions: dict - A dictionary of the ctapipe.instrument.TelescopeDescription instances, covering - the telescopes used to record the data. - rf_settings: dict - A dictionary of the parameters to be transferred to regressors - (sklearn.ensemble.RandomForestRegressor). - """ - - self.feature_names = feature_names - self.telescope_descriptions = tel_descriptions - - self.rf_settings = rf_settings - - self.telescope_rfs = dict(disp={}, pos_angle_shift={}) - - def _get_disp_and_position_angle(self, shower_data): - """ - Computes the displacement and its position angle between the event - shower image and its original coordinates. - - Parameters - ---------- - shower_data: pd.DataFrame - A data frame with the event properties. - - Returns - ------- - pandas.DataFrame: - - "disp_true" column: - Computed displacement in radians. - - "pos_angle_true" column: - Computed displacement position angles in radians. - - """ - - tel_ids = shower_data.index.levels[2] - print("Selected tels:", tel_ids) - - result = pd.DataFrame() - - for tel_id in tel_ids: - optics = self.telescope_descriptions[tel_id].optics - camera = self.telescope_descriptions[tel_id].camera - - values = {"disp_true": {}, "pos_angle_shift_true": {}} - - this_telescope = shower_data.loc[ - (slice(None), slice(None), tel_id), shower_data.columns - ] - - tel_pointing = AltAz( - alt=this_telescope["alt_tel"].values * u.rad, - az=this_telescope["az_tel"].values * u.rad, - ) - - camera_frame = CameraFrame( - focal_length=optics.equivalent_focal_length, - rotation=camera.geometry.cam_rotation, - ) - - telescope_frame = TelescopeFrame(telescope_pointing=tel_pointing) - - camera_coord = SkyCoord( - this_telescope["x"].values * u.m, - this_telescope["y"].values * u.m, - frame=camera_frame, - ) - shower_coord_in_telescope = camera_coord.transform_to(telescope_frame) - - event_coord = SkyCoord( - this_telescope["mc_az"].values * u.rad, - this_telescope["mc_alt"].values * u.rad, - frame=AltAz(), - ) - event_coord_in_telescope = event_coord.transform_to(telescope_frame) - - disp = angular_separation( - shower_coord_in_telescope.altaz.az, - shower_coord_in_telescope.altaz.alt, - event_coord_in_telescope.altaz.az, - event_coord_in_telescope.altaz.alt, - ) - - pos_angle = shower_coord_in_telescope.position_angle(event_coord) - psi = this_telescope["psi"].values * u.deg - - toshift_by_pi = abs(pos_angle.value - 3.14159 - psi.to(u.rad).value) < 1 - toshift_by_pi = np.int16(toshift_by_pi) - - values["disp_true"] = disp.to(u.rad).value - values["pos_angle_shift_true"] = toshift_by_pi - - df = pd.DataFrame.from_dict(values) - df.index = this_telescope.index - - result = result.append(df) - - result.sort_index(inplace=True) - - return result - - def fit(self, shower_data): - """ - Fits the regressor model. - - Parameters - ---------- - shower_data: pandas.DataFrame - Data frame with the shower parameters. Must contain columns called - self.feature_names and self.target_name. - - Returns - ------- - None - - """ - - disp_pos_angle = self._get_disp_and_position_angle(shower_data) - shower_data = shower_data.join(disp_pos_angle) - - print('Training "disp" RFs...') - self.telescope_rfs["disp"] = self._train_per_telescope_rf(shower_data, "disp") - - print('Training "PA" RFs...') - self.telescope_rfs["pos_angle_shift"] = self._train_per_telescope_rf( - shower_data, "pos_angle_shift" - ) - - @staticmethod - def _set_flip(bit_mask, tel_id): - flip_code = 2**tel_id - - out_mask = np.bitwise_or(bit_mask, flip_code) - - return out_mask - - @staticmethod - def _get_flip(bit_mask, tel_id): - flip_code = 2**tel_id - - out_mask = np.bitwise_and(bit_mask, flip_code) - out_mask = np.int8(out_mask == flip_code) - - return out_mask - - @staticmethod - def _get_flip_combinations(df): - tel_ids = df.index.levels[2] - flip_combinations = list(itertools.product([0, 1], repeat=len(tel_ids))) - - return flip_combinations - - @staticmethod - def _get_telescope_combinations(df): - tel_ids = df.index.levels[2] - - telescope_combinations = set(itertools.product(tel_ids, repeat=2)) - self_repetitions = set(zip(tel_ids, tel_ids)) - telescope_combinations = telescope_combinations - self_repetitions - - return telescope_combinations - - def _get_directions_with_flips(self, reco_df): - tel_ids = reco_df.index.levels[2] - - direction_with_flips = pd.DataFrame() - - for tel_id in tel_ids: - optics = self.telescope_descriptions[tel_id].optics - camera = self.telescope_descriptions[tel_id].camera - - # Selecting events from this telescope - this_telescope = reco_df.loc[ - (slice(None), slice(None), tel_id), reco_df.columns - ] - - # Definining the coordinate systems - tel_pointing = AltAz( - alt=this_telescope["alt_tel"].values * u.rad, - az=this_telescope["az_tel"].values * u.rad, - ) - - camera_frame = CameraFrame( - focal_length=optics.equivalent_focal_length, - rotation=camera.geometry.cam_rotation, - ) - - telescope_frame = TelescopeFrame(telescope_pointing=tel_pointing) - - camera_coord = SkyCoord( - this_telescope["x"].values * u.m, - this_telescope["y"].values * u.m, - frame=camera_frame, - ) - - # Shower image coordinates on the sky - shower_coord_in_telescope = camera_coord.transform_to(telescope_frame) - - disp_reco = this_telescope["disp_reco"].values * u.rad - position_angle_reco = this_telescope["psi"].values * u.deg - - # Shower direction coordinates on the sky - for flip in [0, 1]: - event_coord_reco = shower_coord_in_telescope.directional_offset_by( - position_angle_reco + flip * u.rad * np.pi, disp_reco - ) - - # Saving to a data frame - alt = event_coord_reco.altaz.alt.to(u.rad).value - az = event_coord_reco.altaz.az.to(u.rad).value - - df = pd.DataFrame( - data={ - "alt_reco": alt, - "az_reco": az, - "disp_reco": this_telescope["disp_reco"].values, - "psi": this_telescope["psi"].values, - }, - index=this_telescope.index, - ) - - # Adding the "flip" index - df = pd.concat([df], keys=[flip] * len(df), names=["flip"]) - df = df.reset_index() - df = df.set_index(["obs_id", "event_id", "tel_id", "flip"]) - - direction_with_flips = direction_with_flips.append(df) - - return direction_with_flips - - def _get_total_pairwise_dist_with_flips(self, df_with_flips): - telescope_combinations = self._get_telescope_combinations(df_with_flips) - flip_combinations = self._get_flip_combinations(df_with_flips) - - tel_ids = df_with_flips.index.levels[2] - tel_ids = tel_ids.sort_values() - - dist2 = pd.DataFrame() - - for flip_comb in flip_combinations: - flip_code = 0 - for flip, tel_id in zip(flip_comb, tel_ids): - if flip: - flip_code = self._set_flip(flip_code, tel_id) - - for tel_comb in telescope_combinations: - tel_id1, tel_id2 = tel_comb - - tel_df1 = df_with_flips.xs(tel_id1, level="tel_id") - tel_df2 = df_with_flips.xs(tel_id2, level="tel_id") - flip1 = self._get_flip(flip_code, tel_id1) - flip2 = self._get_flip(flip_code, tel_id2) - - az1 = tel_df1.xs(flip1, level="flip")["az_reco"] - az2 = tel_df2.xs(flip2, level="flip")["az_reco"] - alt1 = tel_df1.xs(flip1, level="flip")["alt_reco"] - alt2 = tel_df2.xs(flip2, level="flip")["alt_reco"] - - dist = angular_separation(az1, alt1, az2, alt2) - dist.fillna(0, inplace=True) - - dist2_ = dist.apply(np.square) - - dist2_ = pd.DataFrame(data={"dist2": dist2_}) - dist2_["tel_comb"] = [tel_comb] * len(dist2_) - dist2_["flip_code"] = [flip_code] * len(dist2_) - dist2_.reset_index(inplace=True) - dist2_.set_index( - ["obs_id", "event_id", "flip_code", "tel_comb"], inplace=True - ) - - dist2 = dist2.append(dist2_) - - _group = dist2.groupby(level=["obs_id", "event_id", "flip_code"]) - total_dist2 = _group.sum() - - return total_dist2 - - def _get_flip_choice_from_pairwise_dist2(self, dist2_df, tel_ids): - _group = dist2_df.groupby(level=["obs_id", "event_id"]) - min_dist2 = _group.min() - min_dist2.head() - - min_dist2_ = min_dist2.reindex(dist2_df.index) - result = dist2_df.join(min_dist2_, rsuffix="_") - result = result.query("(dist2 == dist2_)") - - result.reset_index(inplace=True) - - flip_choice = pd.DataFrame() - for tel_id in tel_ids: - df_ = result[["obs_id", "event_id", "flip_code"]] - df_["tel_id"] = tel_id - df_["flip"] = self._get_flip(df_["flip_code"], tel_id) - df_.head() - - flip_choice = flip_choice.append(df_) - - flip_choice = flip_choice.drop("flip_code", axis=1, errors="raise") - flip_choice.set_index(["obs_id", "event_id", "tel_id", "flip"], inplace=True) - - return flip_choice - - def _get_average_direction(self, df): - # Stereo estimate - arithmetical mean - # First getting cartensian XYZ - _x = np.cos(df["az_reco"]) * np.cos(df["alt_reco"]) - _y = np.sin(df["az_reco"]) * np.cos(df["alt_reco"]) - _z = np.sin(df["alt_reco"]) - - # Getting the weights - weights = df["weight"] - - # Weighted XYZ - _x = _x * weights - _y = _y * weights - _z = _z * weights - - # Grouping per-event data - x_group = _x.groupby(level=["obs_id", "event_id"]) - y_group = _y.groupby(level=["obs_id", "event_id"]) - z_group = _z.groupby(level=["obs_id", "event_id"]) - - # Averaging: weighted mean - x_mean = x_group.sum() / weights.sum() - y_mean = y_group.sum() / weights.sum() - z_mean = z_group.sum() / weights.sum() - - # Computing the averaged spherical coordinates - coord_mean = SkyCoord( - representation_type="cartesian", - x=x_mean.values, - y=y_mean.values, - z=z_mean.values, - ) - - # Converting to a data frame - coord_mean_df = pd.DataFrame( - data={ - "az_reco_mean": coord_mean.spherical.lon.to(u.rad), - "alt_reco_mean": coord_mean.spherical.lat.to(u.rad), - }, - index=x_mean.index, - ) - - return coord_mean_df - - def predict(self, shower_data): - """ - Applies the trained regressor to the data. - - Parameters - ---------- - shower_data: pandas.DataFrame - Data frame with the shower parameters. Must contain columns called - self.feature_names and self.target_name. - - Returns - ------- - pandas.DataFrame: - Updated data frame with the computed shower energies. - - """ - - # Computing the estimates from individual telescopes - # This is the "mono" estimate - direction_reco = self._apply_per_telescope_rf(shower_data) - - shower_data_new = shower_data.join(direction_reco) - shower_data_new["multiplicity"] = ( - shower_data_new["intensity"].groupby(level=["obs_id", "event_id"]).count() - ) - - tel_ids = shower_data_new.index.levels[2] - - # Selecting "stereo" (multi-telescope) events only - multi_events = shower_data_new.query("multiplicity > 1") - - # Computing direction of all possible head-tail flips - direction_with_flips = self._get_directions_with_flips(multi_events) - # Computing the total distance between the per-telescope positions in all flip combinations - pairwise_dist = self._get_total_pairwise_dist_with_flips(direction_with_flips) - # Selecting the flip combination with the smallest distance - flip_choice = self._get_flip_choice_from_pairwise_dist2(pairwise_dist, tel_ids) - - # Filtering the reconstructed directions for the "min distance" flip combination - common_idx = flip_choice.index.intersection(direction_with_flips.index) - direction_with_selected_flips = direction_with_flips.loc[common_idx] - - # Assigning weights to average out per-telescope estimates - direction_with_selected_flips["weight"] = multi_events["disp_reco_err"] - # Getting the final direction prediction - multi_events_reco = self._get_average_direction(direction_with_selected_flips) - - # Merging the "multi-telescope" and "mono" estimates - direction_reco = direction_reco.join( - multi_events_reco.reindex(direction_reco.index) - ) - - return direction_reco - - def _train_per_telescope_rf(self, shower_data, kind): - """ - Trains the energy regressors for each of the available telescopes. - - Parameters - ---------- - shower_data: pandas.DataFrame - Data frame with the shower parameters. Must contain columns called - self.feature_names and self.target_name. - kind: str - RF kind. Can be "disp" (regressor is used) or 'pos_angle_shift' (classifier is used). - - Returns - ------- - dict: - Regressors/classifiers for each of the telescopes. Keys - telescope IDs. - - """ - - idx = pd.IndexSlice - - tel_ids = shower_data.index.levels[2] - print("Selected tels:", tel_ids) - - target_name = kind + "_true" - - telescope_rfs = dict() - - for tel_id in tel_ids: - print(f"Training telescope {tel_id}...") - - input_data = shower_data.loc[ - idx[:, :, tel_id], - self.feature_names[kind] + ["event_weight", target_name], - ] - input_data.dropna(inplace=True) - - x_train = input_data[self.feature_names[kind]].values - y_train = input_data[target_name].values - weight = input_data["event_weight"].values - - if kind == "pos_angle_shift": - # This is a binary classification problem - to shift or not to shift - rf = sklearn.ensemble.RandomForestClassifier(**self.rf_settings) - else: - # Regression is needed for "disp" - rf = sklearn.ensemble.RandomForestRegressor(**self.rf_settings) - - rf.fit(x_train, y_train, sample_weight=weight) - - telescope_rfs[tel_id] = rf - - return telescope_rfs - - def _apply_per_telescope_rf(self, shower_data): - """ - Applies the regressors, trained per each telescope. - - Parameters - ---------- - shower_data: pandas.DataFrame - Data frame with the shower parameters. Must contain columns from self.feature_names. - - Returns - ------- - pandas.DataFrame: - Data frame with the computed shower directions. It also contains the "displacement" - ("disp_reco") of this direction w.r.t. the shower image center and its position angle - ("pos_angle_reco"). - - """ - - tel_ids = shower_data.index.levels[2] - print("Selected tels:", tel_ids) - - # ----------------------------------------- - # *** Computing disp and position angle *** - predictions = pd.DataFrame() - - prediction_list = [] - - for kind in ["disp", "pos_angle_shift"]: - pred_ = pd.DataFrame() - - for tel_id in tel_ids: - # Selecting data - this_telescope = shower_data.loc[ - (slice(None), slice(None), tel_id), self.feature_names[kind] - ] - this_telescope = this_telescope.dropna() - features = this_telescope.values - - # Getting the RF response - if kind == "pos_angle_shift": - response = self.telescope_rfs[kind][tel_id].predict_proba(features) - response = response[:, 1] - - # Storing to a data frame - name = f"{kind:s}_reco" - df = pd.DataFrame(data={name: response}, index=this_telescope.index) - else: - response = self.telescope_rfs[kind][tel_id].predict(features) - - per_tree_responses = [] - for tree in self.telescope_rfs[kind][tel_id].estimators_: - per_tree_responses.append(tree.predict(features)) - response_err = np.std(per_tree_responses, axis=0) - - # Storing to a data frame - val_name = f"{kind:s}_reco" - err_name = f"{kind:s}_reco_err" - data_ = {val_name: response, err_name: response_err} - df = pd.DataFrame(data=data_, index=this_telescope.index) - - if pred_.empty: - pred_ = predictions.append(df) - else: - pred_ = pred_.append(df) - - pred_.sort_index(inplace=True) - prediction_list.append(pred_) - - # Merging the resulting data frames - for pred_ in prediction_list: - if predictions.empty: - predictions = predictions.append(pred_) - else: - predictions = predictions.join(pred_) - - # Merging with input for an easier reference - shower_data = shower_data.join(predictions) - # ----------------------------------------- - - # ----------------------------- - # Computing the sky coordinates - direction_reco = pd.DataFrame() - - for tel_id in tel_ids: - optics = self.telescope_descriptions[tel_id].optics - camera = self.telescope_descriptions[tel_id].camera - - # Selecting events from this telescope - this_telescope = shower_data.loc[ - (slice(None), slice(None), tel_id), shower_data.columns - ] - - # Definining the coordinate systems - tel_pointing = AltAz( - alt=this_telescope["alt_tel"].values * u.rad, - az=this_telescope["az_tel"].values * u.rad, - ) - - camera_frame = CameraFrame( - focal_length=optics.equivalent_focal_length, - rotation=camera.geometry.cam_rotation, - ) - - telescope_frame = TelescopeFrame(telescope_pointing=tel_pointing) - - camera_coord = SkyCoord( - this_telescope["x"].values * u.m, - this_telescope["y"].values * u.m, - frame=camera_frame, - ) - - # Shower image coordinates on the sky - shower_coord_in_telescope = camera_coord.transform_to(telescope_frame) - - disp_reco = this_telescope["disp_reco"].values * u.rad - position_angle_reco = this_telescope["psi"].values * u.deg - # In some cases the position angle should be flipped by pi - shift = ( - u.rad * np.pi * np.round(this_telescope["pos_angle_shift_reco"].values) - ) - position_angle_reco += shift - - # Shower direction coordinates on the sky - event_coord_reco = shower_coord_in_telescope.directional_offset_by( - position_angle_reco, disp_reco - ) - - # Saving to a data frame - alt = event_coord_reco.altaz.alt.to(u.rad).value - az = event_coord_reco.altaz.az.to(u.rad).value - df = pd.DataFrame( - data={"alt_reco": alt, "az_reco": az}, index=this_telescope.index - ) - - direction_reco = direction_reco.append(df) - - direction_reco = direction_reco.join(predictions) - direction_reco.sort_index(inplace=True) - # ----------------------------- - - return direction_reco - - def save(self, file_name): - """ - Saves trained regressors to the specified joblib file. - - Parameters - ---------- - file_name: str - Output file name. - - Returns - ------- - None - - """ - - output = dict() - output["rf_settings"] = self.rf_settings - output["feature_names"] = self.feature_names - output["telescope_regressors"] = self.telescope_rfs - output["telescope_descriptions"] = self.telescope_descriptions - - joblib.dump(output, file_name) - - def load(self, file_name): - """ - Loads pre-trained regressors to the specified joblib file. - - Parameters - ---------- - file_name: str - Output file name. - - Returns - ------- - None - - """ - - data = joblib.load(file_name) - - self.rf_settings = data["rf_settings"] - self.feature_names = data["feature_names"] - self.telescope_rfs = data["telescope_regressors"] - self.telescope_descriptions = data["telescope_descriptions"] - - -class EventClassifierPandas: - """ - This class trains/applies the random forest classifier for event types (e.g. gamma / proton), - using as the input Hillas and stereo parameters, stored in a Pandas data frame. - It trains a separate classifier for each telescope. Further their outputs are combined - to give the final answer. - """ - - def __init__(self, feature_names, **rf_settings): - """ - Constructor. Gets basic settings. - - Parameters - ---------- - feature_names: list - Feature names (str type) to be used by the classifier. Must correspond to the - columns of the data frames that will be processed. - rf_settings: dict - The settings to be passed to the random forest classifier. - """ - - self.feature_names = feature_names - self.rf_settings = rf_settings - - self.telescope_classifiers = dict() - - def fit(self, shower_data): - """ - Fits the classification model. - - Parameters - ---------- - shower_data: pandas.DataFrame - Data frame with the shower parameters. Must contain columns called self.feature_names. - - Returns - ------- - None - - """ - - self.train_per_telescope_rf(shower_data) - - # shower_data_with_energy = self.apply_per_telescope_rf(shower_data) - # - # features = shower_data_with_energy['energy_reco'] - # features = features.fillna(0).groupby(['obs_id', 'event_id']).sum() - # features = features.values - # - # target = shower_data_with_energy['mc_energy'].groupby(['obs_id', 'event_id']).mean().values - # - # self.consolidating_regressor = sklearn.ensemble.RandomForestRegressor(self.rf_settings) - # self.consolidating_regressor.fit(features, target) - - def predict(self, shower_data): - """ - Applies the trained classifiers to the data. - - Parameters - ---------- - shower_data: pandas.DataFrame - Data frame with the shower parameters. Must contain columns called self.feature_names. - - Returns - ------- - pandas.DataFrame: - Updated data frame with the computed shower classes. - - """ - - shower_class = self.apply_per_telescope_rf(shower_data) - - # Grouping per-event data - class_group = shower_class.groupby(level=["obs_id", "event_id"]) - - # Averaging - class_mean = class_group.mean() - - for class_name in class_mean.columns: - shower_class[class_name + "_mean"] = class_mean[class_name] - - return shower_class - - def train_per_telescope_rf(self, shower_data): - """ - Trains the event classifiers for each of the available telescopes. - - Parameters - ---------- - shower_data: pandas.DataFrame - Data frame with the shower parameters. Must contain columns called self.feature_names. - - Returns - ------- - None - - """ - - idx = pd.IndexSlice - - tel_ids = shower_data.index.levels[2] - print("Selected tels:", tel_ids) - - self.telescope_classifiers = dict() - - for tel_id in tel_ids: - input_data = shower_data.loc[ - idx[:, :, tel_id], - self.feature_names + ["event_weight", "true_event_class"], - ] - input_data.dropna(inplace=True) - - x_train = input_data[list(self.feature_names)].values - y_train = input_data["true_event_class"].values - weight = input_data["event_weight"].values - - classifier = sklearn.ensemble.RandomForestClassifier(**self.rf_settings) - classifier.fit(x_train, y_train, sample_weight=weight) - - self.telescope_classifiers[tel_id] = classifier - - def apply_per_telescope_rf(self, shower_data): - """ - Applies the classifiers, trained per each telescope. - - Parameters - ---------- - shower_data: pandas.DataFrame - Data frame with the shower parameters. Must contain columns called self.feature_names. - - Returns - ------- - pandas.DataFrame: - Updated data frame with the computed shower classes. - - """ - - tel_ids = shower_data.index.levels[2] - print("Selected tels:", tel_ids) - - event_class_reco = pd.DataFrame() - - for tel_id in tel_ids: - # Selecting data - this_telescope = shower_data.loc[ - (slice(None), slice(None), tel_id), self.feature_names - ] - this_telescope = this_telescope.dropna() - features = this_telescope.values - - # Getting the RF response - response = self.telescope_classifiers[tel_id].predict_proba(features) - - # Storing to a data frame - response_data = dict() - for class_i in range(response.shape[1]): - name = f"event_class_{class_i}" - response_data[name] = response[:, class_i] - df = pd.DataFrame(response_data, index=this_telescope.index) - - event_class_reco = event_class_reco.append(df) - - event_class_reco.sort_index(inplace=True) - - return event_class_reco - - def save(self, file_name): - """ - Saves trained regressors to the specified joblib file. - - Parameters - ---------- - file_name: str - Output file name. - - Returns - ------- - None - - """ - - output = dict() - output["feature_names"] = self.feature_names - output["telescope_classifiers"] = self.telescope_classifiers - - joblib.dump(output, file_name) - - def load(self, file_name): - """ - Loads pre-trained regressors to the specified joblib file. - - Parameters - ---------- - file_name: str - Output file name. - - Returns - ------- - None - - """ - - data = joblib.load(file_name) - - self.feature_names = data["feature_names"] - self.telescope_classifiers = data["telescope_classifiers"] - - -class DirectionStereoEstimatorPandas: - """ - This class trains/applies the random forest regressor for event direction, - using the event parameters, stored in a Pandas data frame. - It trains a separate regressor for each telescope. Further outputs of these - regressors are combined to deliver the average predictions. - """ - - def __init__(self, feature_names, tel_descriptions, **rf_settings): - """ - Constructor. Gets basic settings. - - Parameters - ---------- - feature_names: dict - Feature names (str type) to be used by the regressor. Must correspond to the - columns of the data frames that will be processed. Must be a dict with the keys - "disp" and "pos_angle", each containing the corresponding feature lists. - tel_descriptions: dict - A dictionary of the ctapipe.instrument.TelescopeDescription instances, covering - the telescopes used to record the data. - rf_settings: dict - A dictionary of the parameters to be transferred to regressors - (sklearn.ensemble.RandomForestRegressor). - """ - - self.feature_names = feature_names - self.telescope_descriptions = tel_descriptions - - self.rf_settings = rf_settings - - self.telescope_regressors = dict(disp={}, pos_angle={}) - - @staticmethod - def _get_tel_ids(shower_data): - """ - Retrieves the telescope IDs from the input data frame. - - Parameters - ---------- - shower_data: pd.DataFrame - A data frame with the event properties. - - Returns - ------- - set: - Telescope IDs. - - """ - - tel_ids = set() - - for column in shower_data: - parse = re.findall(r".*_(\d)", column) - if parse: - tel_ids.add(int(parse[0])) - - return tel_ids - - def _get_disp_and_position_angle(self, shower_data): - """ - Computes the displacement and its position angle between the event - shower image and its original coordinates. - - Parameters - ---------- - shower_data: pd.DataFrame - A data frame with the event properties. - - Returns - ------- - dict: - - disp: dict - Computed displacement in radians. Keys - telescope IDs. - - pos_angle: dict - Computed displacement position angles in radians. Keys - telescope IDs. - - """ - - result = {"disp": {}, "pos_angle": {}} - - for tel_id in self._get_tel_ids(shower_data): - optics = self.telescope_descriptions[tel_id].optics - camera = self.telescope_descriptions[tel_id].camera - - tel_pointing = AltAz( - alt=shower_data[f"alt_tel_{tel_id:d}"].values * u.rad, - az=shower_data[f"az_tel_{tel_id:d}"].values * u.rad, - ) - - camera_frame = CameraFrame( - focal_length=optics.equivalent_focal_length, - rotation=camera.geometry.cam_rotation, - ) - - telescope_frame = TelescopeFrame(telescope_pointing=tel_pointing) - - camera_coord = SkyCoord( - shower_data[f"x_{tel_id:d}"].values * u.m, - shower_data[f"y_{tel_id:d}"].values * u.m, - frame=camera_frame, - ) - shower_coord_in_telescope = camera_coord.transform_to(telescope_frame) - - event_coord = SkyCoord( - shower_data[f"mc_az_{tel_id:d}"].values * u.rad, - shower_data[f"mc_alt_{tel_id:d}"].values * u.rad, - frame=AltAz(), - ) - event_coord_in_telescope = event_coord.transform_to(telescope_frame) - - disp = angular_separation( - shower_coord_in_telescope.altaz.az, - shower_coord_in_telescope.altaz.alt, - event_coord_in_telescope.altaz.az, - event_coord_in_telescope.altaz.alt, - ) - - pos_angle = shower_coord_in_telescope.position_angle(event_coord) - - result["disp"][tel_id] = disp.to(u.rad).value - result["pos_angle"][tel_id] = pos_angle.value - - return result - - def _get_per_telescope_features(self, shower_data, feature_names): - """ - Extracts the shower features specific to each telescope of - the available ones. - - Parameters - ---------- - shower_data: pandas.DataFrame - Data frame with the shower parameters. - - Returns - ------- - output: dict - Shower features for each telescope (keys - telescope IDs). - - """ - - tel_ids = self._get_tel_ids(shower_data) - - features = dict() - - for tel_id in tel_ids: - tel_feature_names = [name + f"_{tel_id:d}" for name in feature_names] - - this_telescope = shower_data[tel_feature_names] - this_telescope = this_telescope.dropna() - - features[tel_id] = this_telescope.values - - return features - - def _train_per_telescope_rf(self, shower_data, disp_pa, target_name): - """ - Trains the regressors for each of the available telescopes. - - Parameters - ---------- - shower_data: pandas.DataFrame - Data frame with the shower parameters. - disp_pa: dict - A dictionary with the keys "disp" and "pos_angle", containing - the target disp and pos_angle values. - target_name: str - The name of the target variable. Must one of the keys of disp_pa. - - Returns - ------- - dict: - A dictionary of sklearn.ensemble.RandomForestRegressor instances. - Keys - telescope IDs. - - """ - - features = self._get_per_telescope_features( - shower_data, self.feature_names[target_name] - ) - targets = disp_pa[target_name] - - tel_ids = features.keys() - - telescope_regressors = dict() - - for tel_id in tel_ids: - print(f"Working on telescope {tel_id:d}") - x_train = features[tel_id] - y_train = targets[tel_id] - - regressor = sklearn.ensemble.RandomForestRegressor(**self.rf_settings) - regressor.fit(x_train, y_train) - - telescope_regressors[tel_id] = regressor - - return telescope_regressors - - def _apply_per_telescope_rf(self, shower_data): - """ - Applies the regressors, trained per each telescope. - - Parameters - ---------- - shower_data: pandas.DataFrame - Data frame with the shower parameters. - - Returns - ------- - dict: - Dictionary with predictions for "disp" and "pos_angle". - Internal keys - telescope IDs. - - """ - - predictions = dict(disp={}, pos_angle={}) - - for kind in predictions: - features = self._get_per_telescope_features( - shower_data, self.feature_names[kind] - ) - tel_ids = features.keys() - - for tel_id in tel_ids: - predictions[kind][tel_id] = self.telescope_regressors[kind][ - tel_id - ].predict(features[tel_id]) - - return predictions - - def fit(self, shower_data): - """ - Fits the regressor model. - - Parameters - ---------- - shower_data: pandas.DataFrame - Data frame with the shower parameters. - - Returns - ------- - None - - """ - - disp_pa = self._get_disp_and_position_angle(shower_data) - - print('Training "disp" RFs...') - self.telescope_regressors["disp"] = self._train_per_telescope_rf( - shower_data, disp_pa, "disp" - ) - print('Training "PA" RFs...') - self.telescope_regressors["pos_angle"] = self._train_per_telescope_rf( - shower_data, disp_pa, "pos_angle" - ) - - def predict(self, shower_data, output_suffix="reco"): - """ - Applies the trained regressor to the data. - - Parameters - ---------- - shower_data: pandas.DataFrame - Data frame with the shower parameters. - output_suffix: str, optional - Suffix to use with the data frame columns, that will host the regressors - predictions. Columns will have names "{param_name}_{tel_id}_{output_suffix}". - Defaults to 'reco'. - - Returns - ------- - pandas.DataFrame: - Data frame with the computed shower coordinates. - - """ - - disp_pa_reco = self._apply_per_telescope_rf(shower_data) - - coords_reco = dict() - event_coord_reco = dict() - - tel_ids = list(self._get_tel_ids(shower_data)) - - for tel_id in tel_ids: - optics = self.telescope_descriptions[tel_id].optics - camera = self.telescope_descriptions[tel_id].camera - - tel_pointing = AltAz( - alt=shower_data[f"alt_tel_{tel_id:d}"].values * u.rad, - az=shower_data[f"az_tel_{tel_id:d}"].values * u.rad, - ) - - camera_frame = CameraFrame( - focal_length=optics.equivalent_focal_length, - rotation=camera.geometry.cam_rotation, - ) - - telescope_frame = TelescopeFrame(telescope_pointing=tel_pointing) - - camera_coord = SkyCoord( - shower_data[f"x_{tel_id:d}"].values * u.m, - shower_data[f"y_{tel_id:d}"].values * u.m, - frame=camera_frame, - ) - shower_coord_in_telescope = camera_coord.transform_to(telescope_frame) - - disp_reco = disp_pa_reco["disp"][tel_id] * u.rad - position_angle_reco = disp_pa_reco["pos_angle"][tel_id] * u.rad - - event_coord_reco[tel_id] = shower_coord_in_telescope.directional_offset_by( - position_angle_reco, disp_reco - ) - - coords_reco[f"alt_{tel_id:d}_{output_suffix:s}"] = ( - event_coord_reco[tel_id].altaz.alt.to(u.rad).value - ) - coords_reco[f"az_{tel_id:d}_{output_suffix:s}"] = ( - event_coord_reco[tel_id].altaz.az.to(u.rad).value - ) - - # ------------------------------ - # *** Computing the midpoint *** - for tel_id in tel_ids: - event_coord_reco[tel_id] = SkyCoord( - coords_reco[f"az_{tel_id:d}_{output_suffix:s}"] * u.rad, - coords_reco[f"alt_{tel_id:d}_{output_suffix:s}"] * u.rad, - frame=AltAz(), - ) - - data = [event_coord_reco[tel_id].data for tel_id in tel_ids] - - midpoint_data = data[0] - for d in data[1:]: - midpoint_data += d - midpoint_data /= len(data) - - event_coord_reco[-1] = SkyCoord( - midpoint_data, representation_type="unitspherical", frame=AltAz() - ) - - coords_reco[f"alt_{output_suffix:s}"] = ( - event_coord_reco[-1].altaz.alt.to(u.rad).value - ) - coords_reco[f"az_{output_suffix:s}"] = ( - event_coord_reco[-1].altaz.az.to(u.rad).value - ) - # ------------------------------ - - coord_df = pd.DataFrame.from_dict(coords_reco) - coord_df.index = shower_data.index - - return coord_df - - def save(self, file_name): - """ - Saves trained regressors to the specified joblib file. - - Parameters - ---------- - file_name: str - Output file name. - - Returns - ------- - None - - """ - - output = dict() - output["rf_settings"] = self.rf_settings - output["feature_names"] = self.feature_names - output["telescope_descriptions"] = self.telescope_descriptions - output["telescope_regressors"] = self.telescope_regressors - - joblib.dump(output, file_name) - - def load(self, file_name): - """ - Loads pre-trained regressors to the specified joblib file. - - Parameters - ---------- - file_name: str - Output file name. - - Returns - ------- - None - - """ - - data = joblib.load(file_name) - - self.rf_settings = data["rf_settings"] - self.feature_names = data["feature_names"] - self.telescope_regressors = data["telescope_regressors"] - self.telescope_descriptions = data["telescope_descriptions"] diff --git a/magicctapipe/reco/global_utils.py b/magicctapipe/reco/global_utils.py deleted file mode 100644 index b269c57b..00000000 --- a/magicctapipe/reco/global_utils.py +++ /dev/null @@ -1,77 +0,0 @@ -import numpy as np -import pandas as pd - -__all__ = [ - "compute_event_weights", - "get_weights_mc_dir_class", - "check_train_test_intersections", -] - - -def compute_event_weights(): - """Compute event weights for train scripts - - Returns - ------- - tuple - - alt_edges - - intensity_edges - """ - sin_edges = np.linspace(0, 1, num=51) - alt_edges = np.lib.scimath.arcsin(sin_edges) - intensity_edges = np.logspace(1, 5, num=51) - return alt_edges, intensity_edges - - -def get_weights_mc_dir_class(mc_data, alt_edges, intensity_edges): - mc_hist, _, _ = np.histogram2d( - mc_data["tel_alt"], mc_data["intensity"], bins=[alt_edges, intensity_edges] - ) - - availability_hist = np.clip(mc_hist, 0, 1) - - # --- MC weights --- - mc_alt_bins = np.digitize(mc_data["tel_alt"], alt_edges) - 1 - mc_intensity_bins = np.digitize(mc_data["intensity"], intensity_edges) - 1 - - # Treating the out-of-range events - mc_alt_bins[mc_alt_bins == len(alt_edges) - 1] = len(alt_edges) - 2 - mc_intensity_bins[mc_intensity_bins == len(intensity_edges) - 1] = ( - len(intensity_edges) - 2 - ) - - mc_weights = 1 / mc_hist[mc_alt_bins, mc_intensity_bins] - mc_weights *= availability_hist[mc_alt_bins, mc_intensity_bins] - - # --- Storing to a data frame --- - mc_weight_df = pd.DataFrame(data={"event_weight": mc_weights}, index=mc_data.index) - - return mc_weight_df - - -def check_train_test_intersections(train, test): - """Function to check if there are same events in train and test samples - - Parameters - ---------- - train : pandas.DataFrame - train dataframe - test : pandas.DataFrame - test dataframe - - Returns - ------- - bool - test_passed - """ - test_passed = True - cols = list(train.columns) - df_merge = pd.merge(train, test, on=cols, how="inner") - if df_merge.empty: - print("PASS: test and train are different") - else: - print("********** WARNING **********") - print("Same entries for test and train") - test_passed = False - print(df_merge) - return test_passed diff --git a/magicctapipe/reco/stereo.py b/magicctapipe/reco/stereo.py deleted file mode 100644 index 6b8095fb..00000000 --- a/magicctapipe/reco/stereo.py +++ /dev/null @@ -1,179 +0,0 @@ -import astropy.units as u -import numpy as np -from ctapipe.core.container import Container, Field - -from ..utils import tel_ids_2_num - -__all__ = [ - "write_hillas", - "check_write_stereo", - "check_stereo", - "write_stereo", - "StereoInfoContainer", -] - - -def write_hillas(writer, event_info, hillas_p, leakage_p, timing_p, impact_p): - """Write - - Parameters - ---------- - writer : ctapipe.io.hdf5tableio.HDF5TableWriter - HDF5TableWriter object - event_info : magicctapipe.reco.stereo.StereoInfoContainer - event info, StereoInfoContainer object - hillas_p : ctapipe.containers.HillasParametersContainer - hillas parameters - leakage_p : ctapipe.containers.HillasParametersContainer - leakage parameters - timing_p : ctapipe.containers.TimingParametersContainer - timing parameters - impact_p : astropy.units.quantity.Quantity - impact parameters - """ - writer.write("hillas_params", (event_info, hillas_p, leakage_p, timing_p, impact_p)) - - -def check_write_stereo( - event, - tel_id, - stereo_id, - hillas_p, - hillas_reco, - subarray, - array_pointing, - telescope_pointings, - event_info, - writer, -): - """Check hillas parameters and write stero parameters - - Parameters - ---------- - event : ctapipe.containers.ArrayEventContainer - event - tel_id : int - telescope id - hillas_p : dict - computed hillas parameters - hillas_reco : ctapipe.reco.HillasReconstructor - HillasReconstructor - subarray : ctapipe.instrument.subarray.SubarrayDescription - source.subarray - array_pointing : astropy.coordinates.sky_coordinate.SkyCoord - array_pointing - telescope_pointings : dict - telescope_pointings - event_info : magicctapipe.reco.stereo.StereoInfoContainer - StereoInfoContainer object - writer : ctapipe.io.hdf5tableio.HDF5TableWriter - HDF5TableWriter object - - Returns - ------- - ctapipe.containers.ReconstructedContainer - stereo_params - """ - if len(hillas_p.keys()) > 1: - err_str = ( - "Event ID %d (obs ID: %d) has an ellipse with width = %s: " - "stereo parameters calculation skipped." - ) - stereo_params = None - if any([hillas_p[tel_id]["width"].value == 0 for tel_id in hillas_p]): - print(err_str % (event.index.event_id, event.index.obs_id, "0")) - elif any([np.isnan(hillas_p[tel_id]["width"].value) for tel_id in hillas_p]): - print(err_str % (event.index.event_id, event.index.obs_id, "NaN")) - else: - # Reconstruct stereo event. From ctapipe - stereo_params = hillas_reco.predict( - hillas_dict=hillas_p, - subarray=subarray, - array_pointing=array_pointing, - telescopes_pointings=telescope_pointings, - ) - event_info.tel_id = stereo_id - stereo_params.tel_ids = tel_ids_2_num(stereo_params.tel_ids) - # How to go back - # n = stereo_params.tel_ids - # np.where(np.array(list(bin(n)[2:][::-1]))=='1')[0] - writer.write("stereo_params", (event_info, stereo_params)) - return stereo_params - - -def check_stereo(event, tel_id, hillas_p): - """Check hillas parameters for stereo reconstruction - - Parameters - ---------- - event : ctapipe.containers.ArrayEventContainer - event - tel_id : int - telescope id - hillas_p : dict - computed hillas parameters - - Returns - ------- - bool - check_passed - """ - check_passed = False - if len(hillas_p.keys()) > 1: - err_str = ( - "Event ID %d (obs ID: %d) has an ellipse with width = %s: " - "stereo parameters calculation skipped." - ) - - if any([hillas_p[tel_id]["width"].value == 0 for tel_id in hillas_p]): - print(err_str % (event.index.event_id, event.index.obs_id, "0")) - elif any([np.isnan(hillas_p[tel_id]["width"].value) for tel_id in hillas_p]): - print(err_str % (event.index.event_id, event.index.obs_id, "NaN")) - else: - check_passed = True - return check_passed - - -def write_stereo( - stereo_params, - stereo_id, - event_info, - writer, -): - """Check hillas parameters and write stero parameters - - Parameters - ---------- - stereo_params : ctapipe.containers.ReconstructedContainer - stereo parameters - event_info : magicctapipe.reco.stereo.StereoInfoContainer - StereoInfoContainer object - writer : ctapipe.io.hdf5tableio.HDF5TableWriter - HDF5TableWriter object - - Returns - ------- - ctapipe.containers.ReconstructedContainer - stereo_params - """ - - event_info.tel_id = stereo_id - # How to go back - # n = stereo_params.tel_ids - # np.where(np.array(list(bin(n)[2:][::-1]))=='1')[0] - writer.write("stereo_params", (event_info, stereo_params)) - return stereo_params - - -class StereoInfoContainer(Container): - """InfoContainer for stereo""" - - obs_id = Field(-1, "Observation ID") - event_id = Field(-1, "Event ID") - tel_id = Field(-1, "Telescope ID") - true_energy = Field(-1, "MC event energy", unit=u.TeV) - true_alt = Field(-1, "MC event altitude", unit=u.rad) - true_az = Field(-1, "MC event azimuth", unit=u.rad) - tel_alt = Field(-1, "MC telescope altitude", unit=u.rad) - tel_az = Field(-1, "MC telescope azimuth", unit=u.rad) - num_islands = Field(-1, "Number of image islands") diff --git a/magicctapipe/scripts/comparison/access_pixel_info_file.py b/magicctapipe/scripts/comparison/access_pixel_info_file.py deleted file mode 100644 index 176cbcff..00000000 --- a/magicctapipe/scripts/comparison/access_pixel_info_file.py +++ /dev/null @@ -1,25 +0,0 @@ -#!/usr/bin/env python -# coding: utf-8 - -import h5py -import pandas as pd - -# ### Pixel differences - -pixel_diff_file = "Files_Alessio/Skript_Test/5086952_1962_M2_pixel_diff.h5" -store = h5py.File(pixel_diff_file) -print(store.keys()) -print(store["pixel_differences"].keys()) -store.close() -df = pd.read_hdf(pixel_diff_file, key="pixel_differences") -print(df) - -# ### All pixel information - -pixel_info_file = "Files_Alessio/Skript_Test/5086952_1962_M2_pixel_info.h5" -store = h5py.File(pixel_info_file) -print(store.keys()) -print(store["pixel_information"].keys()) -store.close() -df = pd.read_hdf(pixel_info_file, key="pixel_information") -print(df) diff --git a/magicctapipe/scripts/comparison/cleaning_analysis.py b/magicctapipe/scripts/comparison/cleaning_analysis.py deleted file mode 100644 index b5c71360..00000000 --- a/magicctapipe/scripts/comparison/cleaning_analysis.py +++ /dev/null @@ -1,128 +0,0 @@ -import argparse -import glob -import logging -import sys - -import numpy as np -import scipy -from ctapipe.image import hillas_parameters -from ctapipe.instrument import CameraGeometry -from ctapipe_io_magic import MAGICEventSource - -from magicctapipe.utils import MAGIC_Badpixels, MAGIC_Cleaning - -np.set_printoptions(threshold=sys.maxsize) - -logger = logging.getLogger(__name__) - - -def main(): - usage = "usage: %(prog)s [options] " - description = "Run gtselect and gtmktime on one or more FT1 files. " - "Note that gtmktime will be skipped if no FT2 file is provided." - parser = argparse.ArgumentParser(usage=usage, description=description) - parser.add_argument("-m", "--mc", dest="mc", action="store_true") - - args = parser.parse_args() - - tel_id = 1 - - # when comparing to MARS images - mars_camera = CameraGeometry.from_name("MAGICCamMars") - - # for consistent usage within ctapipe/with LST - image is mirrored on x=-y axis - # ars_camera = CameraGeometry.from_name("MAGICCam") - - neighbors = mars_camera.neighbor_matrix_sparse - outermost = [] - for pix in range(mars_camera.n_pixels): - if neighbors[pix].getnnz() < 5: - outermost.append(pix) - - cleaning_config = dict( - picture_thresh=6, - boundary_thresh=3.5, - max_time_off=4.5 * 1.64, - max_time_diff=1.5 * 1.64, - usetime=True, - usesum=True, - findhotpixels=True, - ) - - bad_pixels_config = dict( - pedestalLevel=400, pedestalLevelVariance=4.5, pedestalType="FromExtractorRndm" - ) - - magic_clean = MAGIC_Cleaning.magic_clean(mars_camera, cleaning_config) - badpixel_calculator = MAGIC_Badpixels.MAGICBadPixelsCalc(config=bad_pixels_config) - - all_events = [] - - if args.mc: - # MC FILES - mars_file_mask = "/remote/ceph/group/magic/MAGIC-LST/MCs/MAGIC/ST.03.07/za05to35/Train_sample/1.Calibrated/GA_M1*root" - mars_file_list = glob.glob( - mars_file_mask - ) # Here makes array which contains files matching the input condition (GA_M1*root). - file_list = list(filter(lambda name: "123" in name, mars_file_list)) - print("Number of files : %s" % len(file_list)) - else: - # DATA FILES - mars_file_mask = ( - "/Users/moritz/Documents/MAGIC-data/Crab/Data/Sorcerer/2020*.root" - ) - mars_file_list = glob.glob( - mars_file_mask - ) # Here makes array which contains files matching the input condition (GA_M1*root). - file_list = list(filter(lambda name: "0" in name, mars_file_list)) - print("Number of files : %s" % len(file_list)) - - for ix, file_name in enumerate(file_list): - magic_event_source = MAGICEventSource(input_url=file_name) - event_generator = magic_event_source._mono_event_generator("M1") - - for i, event in enumerate(event_generator): - if i > 1e30: - break - if i % 1000 == 0: - print("Event %s" % i) - - if args.mc: - all_events.append(event.mc.energy.value) - - badrmspixel_mask = badpixel_calculator.get_badrmspixel_mask(event) - deadpixel_mask = badpixel_calculator.get_deadpixel_mask(event) - unsuitable_mask = np.logical_or(badrmspixel_mask[0], deadpixel_mask[0]) - - event_image = event.dl1.tel[tel_id].image - event_pulse_time = event.dl1.tel[tel_id].peak_time - - clean_mask, event_image, event_pulse_time = magic_clean.clean_image( - event_image, event_pulse_time, unsuitable_mask=unsuitable_mask - ) - - event_image_cleaned = event_image.copy() - event_image_cleaned[~clean_mask] = 0 - - event_pulse_time_cleaned = event_pulse_time.copy() - event_pulse_time_cleaned[~clean_mask] = 0 - - if scipy.any(event_image_cleaned): - try: - hillas_params = hillas_parameters(mars_camera, event_image_cleaned) - except Exception: - continue - - logger.debug( - "EvtNum : %i, Size : %f, Core : %i, Used %i" - % ( - event.dl0.event_id, - hillas_params.intensity, - magic_clean.core_pix, - magic_clean.used_pix, - ) - ) - - -if __name__ == "__main__": - main() diff --git a/magicctapipe/scripts/comparison/cleaning_comp.py b/magicctapipe/scripts/comparison/cleaning_comp.py deleted file mode 100644 index 43bd3221..00000000 --- a/magicctapipe/scripts/comparison/cleaning_comp.py +++ /dev/null @@ -1,571 +0,0 @@ -import argparse -import copy -import csv -import glob -import sys - -import numpy as np -import pylab as plt -import uproot -from astropy import units as u -from ctapipe.image import hillas_parameters -from ctapipe.instrument import CameraGeometry -from ctapipe_io_magic import MAGICEventSource -from texttable import Texttable - -from magicctapipe.utils import MAGIC_Badpixels, MAGIC_Cleaning - -np.set_printoptions(threshold=sys.maxsize) - - -class bcolors: - HEADER = "\033[95m" - OKBLUE = "\033[94m" - OKGREEN = "\033[92m" - WARNING = "\033[93m" - FAIL = "\033[91m" - ENDC = "\033[0m" - BOLD = "\033[1m" - UNDERLINE = "\033[4m" - - -def file_reader(filename, max_row=1e30): - datafile = open(filename, "r") - datareader = csv.reader(datafile) - data = [] - for ix, row in enumerate(datareader): - if ix >= max_row: - break - if len(row) == 0: - data.append(row) - else: - data.append(list(map(int, row[0][:-1].split(" ")))) - - datafile.close() - return data - - -def main(): - usage = "usage: %(prog)s [options] " - description = "Run gtselect and gtmktime on one or more FT1 files. " - "Note that gtmktime will be skipped if no FT2 file is provided." - parser = argparse.ArgumentParser(usage=usage, description=description) - parser.add_argument("-m", "--mc", dest="mc", action="store_true") - - parser.add_argument("--base", dest="base", action="store_true") - parser.add_argument("--nopix", dest="nopix", action="store_true") - - parser.add_argument("--test", dest="test", action="store_true") - # parser.add_argument('--testnopix', dest='testnopix', action='store_true') - - args = parser.parse_args() - - tel_id = 1 - max_events = 1e40 - - mars_camera = CameraGeometry.from_name("MAGICCamMars") - - neighbors = mars_camera.neighbor_matrix_sparse - outermost = [] - for pix in range(mars_camera.n_pixels): - if neighbors[pix].getnnz() < 5: - outermost.append(pix) - - cleaning_config = dict( - picture_thresh=6, - boundary_thresh=3.5, - max_time_off=4.5 * 1.64, - max_time_diff=1.5 * 1.64, - usetime=True, - usesum=True, - findhotpixels=True, - test=False, - ) - - if args.base: - base_dir = "/home/iwsatlas1/damgreen/CTA/MAGIC_Cleaning/star/Star_Base/" - - star_file = "%s/20191127_M1_05086952.001_I_CrabNebula-W0.40+035.root" % base_dir - event_images_stardump = np.genfromtxt( - "%s/event_images.txt" % base_dir, dtype=float, max_rows=max_events - ) - event_times_stardump = np.genfromtxt( - "%s/event_times.txt" % base_dir, dtype=float, max_rows=max_events - ) - event_unsuitables_stardump = np.genfromtxt( - "%s/event_unsuitable.txt" % base_dir, dtype=bool, max_rows=max_events - ) - event_unmappeds_stardump = np.genfromtxt( - "%s/event_unmapped.txt" % base_dir, dtype=bool, max_rows=max_events - ) - - event_cleanstep1 = file_reader( - "%s/event_cleanstep1.txt" % base_dir, max_row=max_events - ) - event_cleanstep2 = file_reader( - "%s/event_cleanstep2.txt" % base_dir, max_row=max_events - ) - event_cleanstep3 = file_reader( - "%s/event_cleanstep3.txt" % base_dir, max_row=max_events - ) - - cleaning_config["findhotpixels"] = True - - if args.nopix: - base_dir = "/home/iwsatlas1/damgreen/CTA/MAGIC_Cleaning/star/Star_No_HotPix/" - - star_file = "%s/20191127_M1_05086952.001_I_CrabNebula-W0.40+035.root" % base_dir - event_images_stardump = np.genfromtxt( - "%s/event_images.txt" % base_dir, dtype=float, max_rows=max_events - ) - event_times_stardump = np.genfromtxt( - "%s/event_times.txt" % base_dir, dtype=float, max_rows=max_events - ) - event_unsuitables_stardump = np.genfromtxt( - "%s/event_unsuitable.txt" % base_dir, dtype=bool, max_rows=max_events - ) - event_unmappeds_stardump = np.genfromtxt( - "%s/event_unmapped.txt" % base_dir, dtype=bool, max_rows=max_events - ) - - event_cleanstep1 = file_reader( - "%s/event_cleanstep1.txt" % base_dir, max_row=max_events - ) - event_cleanstep2 = file_reader( - "%s/event_cleanstep2.txt" % base_dir, max_row=max_events - ) - event_cleanstep3 = file_reader( - "%s/event_cleanstep3.txt" % base_dir, max_row=max_events - ) - - cleaning_config["findhotpixels"] = False - - bad_pixels_config = dict( - pedestalLevel=400, pedestalLevelVariance=4.5, pedestalType="FromExtractorRndm" - ) - - # Grab the processed star information - hillas_array_list = [ - "MHillas.fSize", - "MHillas.fWidth", - "MHillas.fLength", - "MRawEvtHeader.fStereoEvtNumber", - "MNewImagePar.fNumCorePixels", - "MNewImagePar.fNumUsedPixels", - ] - - input_file = uproot.open(star_file) - events = input_file["Events"].arrays(hillas_array_list) - - evtnum_arr = events[b"MRawEvtHeader.fStereoEvtNumber"] - size_arr = events[b"MHillas.fSize"] - width_arr = events[b"MHillas.fWidth"] - length_arr = events[b"MHillas.fLength"] - core_arr = events[b"MNewImagePar.fNumCorePixels"] - used_arr = events[b"MNewImagePar.fNumUsedPixels"] - - star_size_arr = [] - ctapipe_size_arr = [] - - star_length_arr = [] - ctapipe_length_arr = [] - - star_width_arr = [] - ctapipe_width_arr = [] - - magic_clean = MAGIC_Cleaning.magic_clean(mars_camera, cleaning_config) - badpixel_calculator = MAGIC_Badpixels.MAGICBadPixelsCalc(config=bad_pixels_config) - - size_cut = 0 - AberrationCorrection = 1.0713 - - if args.mc: - # MC FILES - mars_file_mask = "/remote/ceph/group/magic/MAGIC-LST/MCs/MAGIC/ST.03.07/za05to35/Train_sample/1.Calibrated/GA_M1*root" - mars_file_list = glob.glob( - mars_file_mask - ) # Here makes array which contains files matching the input condition (GA_M1*root). - file_list = list(filter(lambda name: "123" in name, mars_file_list)) - print("Number of files : %s" % len(file_list)) - else: - # DATA FILES - mars_file_mask = ( - "/home/iwsatlas1/damgreen/CTA/MAGIC_Cleaning/datafile/Calib/*.root" - ) - mars_file_list = glob.glob( - mars_file_mask - ) # Here makes array which contains files matching the input condition (GA_M1*root). - file_list = list(filter(lambda name: "0" in name, mars_file_list)) - print("Number of files : %s" % len(file_list)) - - tolerance = 0.5 - - offset = -1 - for ix, file_name in enumerate(file_list): - magic_event_source = MAGICEventSource(input_url=file_name) - event_generator = magic_event_source._pedestal_event_generator(telescope="M1") - - for i, event in enumerate(event_generator): - # This is used to make sure the star and ctapipe events are lined up - if i == 0: - continue - - if i == 9399: - offset -= 1 - continue - - if i > max_events: - break - - # Basic ctapipe image processing and cleaning - - badrmspixel_mask = badpixel_calculator.get_badrmspixel_mask(event) - deadpixel_mask = badpixel_calculator.get_deadpixel_mask(event) - unsuitable_mask = np.logical_or(badrmspixel_mask[0], deadpixel_mask[0]) - - event_image = event.dl1.tel[tel_id].image - event_pulse_time = event.dl1.tel[tel_id].pulse_time - - clean_mask, event_image, event_pulse_time = magic_clean.clean_image( - event_image, event_pulse_time, unsuitable_mask=unsuitable_mask - ) - - event_image_cleaned = event_image.copy() - event_image_cleaned[~clean_mask] = 0 - - try: - hillas_params = hillas_parameters(mars_camera, event_image_cleaned) - except Exception: - continue - - # All of the comparisons between star and ctapipe - event_image_stardump = event_images_stardump[i + offset, :1039] - event_time_stardump = event_times_stardump[i + offset, :1039] - - image_diff = ( - 100 * np.abs(event_image_stardump - event_image) / event_image_stardump - ) - image_diff_selection = image_diff > tolerance - - time_diff = ( - 100 - * np.abs(event_time_stardump - event_pulse_time) - / event_time_stardump - ) - time_diff_selection = time_diff > tolerance - - ctapipe_unsuitable_mask = copy.copy(unsuitable_mask) - star_unsuitable_mask = copy.copy( - event_unsuitables_stardump[i + offset, :1039] - ) - ctapipe_unsuitable_pixels = np.where(ctapipe_unsuitable_mask)[0] - star_unsuitable_pixels = np.where(star_unsuitable_mask)[0] - - ctapipe_unmapped_mask = copy.copy(magic_clean.unmapped_mask) - star_unmapped_mask = copy.copy(event_unmappeds_stardump[i + offset, :1039]) - ctapipe_unmapped_pixels = np.where(ctapipe_unmapped_mask)[0] - star_unmapped_pixels = np.where(star_unmapped_mask)[0] - - diff_unsuitable = np.setdiff1d( - np.union1d(ctapipe_unsuitable_pixels, star_unsuitable_pixels), - np.intersect1d(ctapipe_unsuitable_pixels, star_unsuitable_pixels), - ) - diff_unmapped = np.setdiff1d( - np.union1d(ctapipe_unmapped_pixels, star_unmapped_pixels), - np.intersect1d(ctapipe_unmapped_pixels, star_unmapped_pixels), - ) - - ctapipe_length = hillas_params.length.to("mm") / AberrationCorrection - ctapipe_width = hillas_params.width.to("mm") / AberrationCorrection - ctapipe_size = hillas_params.intensity - - star_idx = np.where(evtnum_arr == event.dl0.event_id)[0] - if len(star_idx) != 1: - continue - - star_length = length_arr[star_idx[0]] * u.mm - star_width = width_arr[star_idx[0]] * u.mm - star_size = size_arr[star_idx[0]] - - if star_length.value < 1e-1 or star_width.value < 1e-1: - continue - - length_ratio = np.fabs((ctapipe_length - star_length) / star_length) * 100 - width_ratio = np.fabs((ctapipe_width - star_width) / star_width) * 100 - size_ratio = np.fabs((ctapipe_size - star_size) / star_size) * 100 - - ctapipe_step1 = np.where(magic_clean.mask_step1)[0] - star_step1 = np.asarray(event_cleanstep1[i + offset]) - - ctapipe_step2 = np.where(magic_clean.mask_step2)[0] - star_step2 = np.asarray(event_cleanstep2[i + offset]) - - ctapipe_step3 = np.where(magic_clean.mask_step3)[0] - star_step3 = np.asarray(event_cleanstep3[i + offset]) - - diff_step1 = np.setdiff1d( - np.union1d(ctapipe_step1, star_step1), - np.intersect1d(ctapipe_step1, star_step1), - ) - diff_step2 = np.setdiff1d( - np.union1d(ctapipe_step2, star_step2), - np.intersect1d(ctapipe_step2, star_step2), - ) - diff_step3 = np.setdiff1d( - np.union1d(ctapipe_step3, star_step3), - np.intersect1d(ctapipe_step3, star_step3), - ) - - if ( - size_ratio > tolerance - or length_ratio > tolerance - or width_ratio > tolerance - ): - # if len(diff_step3) > 0 or len(diff_unsuitable) > 0 or len(diff_unmapped) > 0:# or np.sum(image_diff_selection) > 0 or np.sum(time_diff_selection) > 0: - print("*" * 50) - print( - bcolors.OKGREEN - + "Count : %i, Stereo Event Number : %s" % (i, event.dl0.event_id) - + bcolors.ENDC - ) - - if len(diff_unsuitable) > 0: - print("+" * 50) - print(bcolors.FAIL + "Unsuitable Diff : " + bcolors.ENDC) - print("diff:\t", diff_unsuitable.tolist()) - - if len(diff_unmapped) > 0: - print("+" * 50) - print(bcolors.FAIL + "Unmapped Diff : " + bcolors.ENDC) - print("diff:\t", diff_unmapped.tolist()) - - if np.sum(image_diff_selection) > 0: - print("+" * 50) - print(bcolors.FAIL + "Image Diff : " + bcolors.ENDC) - print( - "image ctapipe : ", event_image[image_diff_selection].tolist() - ) - print( - "image dump : ", - event_image_stardump[image_diff_selection].tolist(), - ) - print( - "image diff (%) : ", image_diff[image_diff_selection].tolist() - ) - - if np.sum(time_diff_selection) > 0: - print("+" * 50) - print(bcolors.FAIL + "Time Diff : " + bcolors.ENDC) - print( - "time ctapipe : ", - event_pulse_time[time_diff_selection].tolist(), - ) - print( - "time dump : ", - event_time_stardump[time_diff_selection].tolist(), - ) - print("time diff : ", time_diff[time_diff_selection].tolist()) - - string = "" - if len(diff_step1) > 0: - string += "STEP1 DIFF Size %s, " % len(diff_step1) - if len(diff_step2) > 0: - string += "STEP2 DIFF Size %s, " % len(diff_step2) - if len(diff_step3) > 0: - string += "STEP3 DIFF Size %s" % len(diff_step3) - - if len(string) > 0: - print(bcolors.FAIL + string + bcolors.ENDC) - - string = "" - if size_ratio > tolerance: - string += "SIZE RATIO : %.2f, " % size_ratio - if length_ratio > tolerance: - string += " LENGTH RATIO : %.2f, " % length_ratio - if width_ratio > tolerance: - string += " WIDTH RATIO : %.2f" % width_ratio - - if len(string) > 0: - print(bcolors.WARNING + string + bcolors.ENDC) - t = Texttable() - t.add_rows( - [ - ["var", "ctapipe", "star", "ratio"], - [ - "Size", - hillas_params.intensity, - size_arr[star_idx[0]], - size_ratio, - ], - [ - "Width", - ctapipe_width.value, - star_width.value, - width_ratio, - ], - [ - "Length", - ctapipe_length.value, - star_length.value, - length_ratio, - ], - [ - "CorePix", - magic_clean.core_pix, - core_arr[star_idx[0]], - np.nan, - ], - [ - "UsedPix", - magic_clean.used_pix, - used_arr[star_idx[0]], - np.nan, - ], - ] - ) - print(bcolors.BOLD + bcolors.OKBLUE + t.draw() + bcolors.ENDC) - - # print("cta mask :\t" , np.where(clean_mask)[0].tolist()) - # print("star mask :\t" , star_step3.tolist()) - - if len(diff_step1) > 0: - print(bcolors.FAIL + "Clean Step 1 Diff : " + bcolors.ENDC, end="") - # print("star:\t", star_step1.tolist()) - # print("cta:\t", ctapipe_step1.tolist()) - if len(star_step1) > len(ctapipe_step1): - print("Pixel not found in ctapipe : \t", diff_step1.tolist()) - else: - print("Extra pixel found in ctapipe : \t", diff_step1.tolist()) - - if len(diff_step2) > 0: - print(bcolors.FAIL + "Clean Step 2 Diff : " + bcolors.ENDC, end="") - # print("star:\t", star_step2.tolist()) - # print("cta:\t", ctapipe_step2.tolist()) - if len(star_step2) > len(ctapipe_step2): - print("Pixel not found in ctapipe : \t", diff_step2.tolist()) - else: - print("Extra pixel found in ctapipe : \t", diff_step2.tolist()) - - if len(diff_step3) > 0: - print(bcolors.FAIL + "Clean Step 3 Diff : " + bcolors.ENDC, end="") - # print("star:\t", star_step3.tolist()) - # print("cta:\t", ctapipe_step3.tolist()) - if len(star_step3) > len(ctapipe_step3): - print("Pixel not found in ctapipe : \t", diff_step3.tolist()) - else: - print("Extra pixel found in ctapipe : \t", diff_step3.tolist()) - - if star_size > size_cut: - star_size_arr.append(star_size) - ctapipe_size_arr.append(ctapipe_size) - - star_length_arr.append(star_length.value) - ctapipe_length_arr.append(ctapipe_length.value) - - star_width_arr.append(star_width.value) - ctapipe_width_arr.append(ctapipe_width.value) - - star_size_arr = np.asarray(star_size_arr) - star_length_arr = np.asarray(star_length_arr) - star_width_arr = np.asarray(star_width_arr) - - ctapipe_size_arr = np.asarray(ctapipe_size_arr) - ctapipe_length_arr = np.asarray(ctapipe_length_arr) - ctapipe_width_arr = np.asarray(ctapipe_width_arr) - - plt.clf() - fig, axs = plt.subplots(2, 3, figsize=(15, 10)) - bins = 101 - - axs[0, 0].scatter(np.log10(star_size_arr), np.log10(ctapipe_size_arr)) - axs[0, 0].set_title("Size") - axs[0, 0].set_xlim([1.0, 5.5]) - axs[0, 0].set_ylim([1.0, 5.5]) - axs[0, 0].set_xlabel("Mars - log10(size)") - axs[0, 0].set_ylabel("ctapipe - log10(size)") - - axs[0, 1].scatter(np.log10(star_length_arr), np.log10(ctapipe_length_arr)) - axs[0, 1].set_title("Length") - axs[0, 1].set_xlim([0.0, 3.0]) - axs[0, 1].set_ylim([0.0, 3.0]) - axs[0, 1].set_xlabel("Mars - log10(length)") - axs[0, 1].set_ylabel("ctapipe - log10(length)") - - axs[0, 2].scatter(np.log10(star_width_arr), np.log10(ctapipe_width_arr)) - axs[0, 2].set_title("Width") - axs[0, 2].set_xlim([0.0, 3.0]) - axs[0, 2].set_ylim([0.0, 3.0]) - axs[0, 2].set_xlabel("Mars - log10(width)") - axs[0, 2].set_ylabel("ctapipe - log10(width)") - - size_ratio = (ctapipe_size_arr - star_size_arr) / star_size_arr - axs[1, 0].hist(size_ratio, bins=bins) - plt.text( - 0.15, - 0.9, - "mean : %.4f" % np.nanmean(size_ratio), - horizontalalignment="left", - verticalalignment="center", - transform=axs[1, 0].transAxes, - ) - plt.text( - 0.15, - 0.85, - "rms : %.4f" % np.nanstd(size_ratio), - horizontalalignment="left", - verticalalignment="center", - transform=axs[1, 0].transAxes, - ) - axs[1, 0].set_xlabel("Size Ratio [(ctapipe - star)/star]") - axs[1, 0].set_yscale("log", nonposy="clip") - - length_ratio = (ctapipe_length_arr - star_length_arr) / star_length_arr - axs[1, 1].hist(length_ratio, bins=bins) - plt.text( - 0.15, - 0.9, - "mean : %.4f" % np.nanmean(length_ratio), - horizontalalignment="left", - verticalalignment="center", - transform=axs[1, 1].transAxes, - ) - plt.text( - 0.15, - 0.85, - "rms : %.4f" % np.nanstd(length_ratio), - horizontalalignment="left", - verticalalignment="center", - transform=axs[1, 1].transAxes, - ) - axs[1, 1].set_xlabel("Length Ratio [(ctapipe - star)/star]") - axs[1, 1].set_yscale("log", nonposy="clip") - - width_ratio = (ctapipe_width_arr - star_width_arr) / star_width_arr - axs[1, 2].hist(width_ratio, bins=bins) - plt.text( - 0.15, - 0.9, - "mean : %.4f" % np.nanmean(width_ratio), - horizontalalignment="left", - verticalalignment="center", - transform=axs[1, 2].transAxes, - ) - plt.text( - 0.15, - 0.85, - "rms : %.4f" % np.nanstd(width_ratio), - horizontalalignment="left", - verticalalignment="center", - transform=axs[1, 2].transAxes, - ) - axs[1, 2].set_xlabel("Width Ratio [(ctapipe - star)/star]") - axs[1, 2].set_yscale("log", nonposy="clip") - - if args.base: - plt.savefig("comp_M1_base.pdf") - elif args.nopix: - plt.savefig("comp_M1_base_nopix.pdf") - - -if __name__ == "__main__": - main() diff --git a/magicctapipe/scripts/comparison/get_images_and_pixel_info.py b/magicctapipe/scripts/comparison/get_images_and_pixel_info.py deleted file mode 100644 index bb749e99..00000000 --- a/magicctapipe/scripts/comparison/get_images_and_pixel_info.py +++ /dev/null @@ -1,537 +0,0 @@ -#!/usr/bin/env python -# coding: utf-8 - -import argparse - -import matplotlib -import matplotlib.pyplot as plt -import numpy as np -import pandas as pd -import uproot -import yaml -from ctapipe.image import hillas_parameters -from ctapipe.image.morphology import number_of_islands -from ctapipe.image.timing import timing_parameters -from ctapipe.instrument import CameraDescription, CameraGeometry -from ctapipe.visualization import CameraDisplay -from ctapipe_io_magic import MAGICEventSource -from mars_images_to_hdf5 import read_images -from utils import MAGIC_Badpixels, MAGIC_Cleaning - -# ---------------------------------------------------------------- - -# create a list of the events you want to be compared -ids_to_compare = [] - -# add argparser arguments -arg_parser = argparse.ArgumentParser( - description=""" -This tool compares the images of events processed by MARS and the magic-cta-pipeline. -The output is a png file with the camera images and a hdf5 file that contains the pixel information. -""" -) - -arg_parser.add_argument( - "--config", - default="config.yaml", - help="Configuration file to steer the code execution.", -) -arg_parser.add_argument( - "--useall", - help="Compare images for all events in the input file", - action="store_true", -) -arg_parser.add_argument( - "--use_ids_config", help="Compare images for certain events", action="store_true" -) - -parsed_args = arg_parser.parse_args() - -file_not_found_message = """ -Error: can not load the configuration file {:s}. -Please check that the file exists and is of YAML or JSON format. -Exiting. -""" - - -# load get_images_config file -try: - config = yaml.safe_load(open(parsed_args.config, "r")) -except IOError: - print(file_not_found_message.format(parsed_args.config)) - exit() -# check for the important keys -if "input_files" not in config: - print( - 'Error: the configuration file is missing the "input_files" section. Exiting.' - ) - exit() - -if "output_files" not in config: - print( - 'Error: the configuration file is missing the "output_files" section. Exiting.' - ) - exit() - - -# --------------------------------------------------------- -# Compare images for all events in the given file -# --------------------------------------------------------- -if parsed_args.useall: - mcp_input = config["input_files"]["magic-cta-pipe"] - tel_id = config["information"]["tel_id"] - # does only work for stereo files - mcp_file = uproot.open(config["input_files"]["magic-cta-pipe"]) - # print(mcp_file.keys()) - if "Events" not in mcp_file.keys(): - print("Missing key: Events. Unacceptable file format. Exiting.") - exit() - else: - if ( - "MRawEvtHeader./MRawEvtHeader.fStereoEvtNumber" - not in mcp_file["Events"].keys() - ): - print( - f"Missing key: MRawEvtHeader_{tel_id}.fStereoEvtNumber. Unacceptable file format. Exiting." - ) - exit() - else: - df_mcp_data = pd.DataFrame() - df_mcp_data["event_id"] = ( - mcp_file["Events"]["MRawEvtHeader./MRawEvtHeader.fStereoEvtNumber"] - .array() - .to_numpy() - ) - ids_to_compare = df_mcp_data["event_id"].tolist() - # the keys depend on the file, there may be a case where they have to be changed -# --------------------------------------------------------- -# Compare images for only certain events -# --------------------------------------------------------- - -elif parsed_args.use_ids_config: - ids_to_compare = config["event_list"] - -else: - print("No comparing argument specified. Exiting.") - exit() - - -# print the list -if len(ids_to_compare) >= 21: - print("EVENTS:", ids_to_compare[:10], "...", ids_to_compare[-10:]) -else: - print("EVENTS:", ids_to_compare) - -print(len(ids_to_compare)) - - -# ------------------------------------------------------------ -# define camera geometry -def new_camera_geometry(camera_geom): - """Scale given camera geometry of a given (constant) factor - - Parameters - ---------- - camera : CameraGeometry - Camera geometry - factor : float - Scale factor - - Returns - ------- - CameraGeometry - Scaled camera geometry - """ - - return CameraGeometry( - camera_name="MAGICCam", - pix_id=camera_geom.pix_id, - pix_x=-1.0 * camera_geom.pix_y, - pix_y=-1.0 * camera_geom.pix_x, - pix_area=camera_geom.guess_pixel_area( - camera_geom.pix_x, camera_geom.pix_y, camera_geom.pix_type - ), - pix_type=camera_geom.pix_type, - pix_rotation=camera_geom.pix_rotation, - cam_rotation=camera_geom.cam_rotation, - ) - - -# get information from the config file -telescope_id = config["information"]["tel_id"] -run_num = config["information"]["run_number"] -out_path = config["output_files"]["file_path"] - - -# compare events one at a time -ids = [] -for id_to_compare in ids_to_compare: - ids.append(id_to_compare) - print("EVENT ID:", ids) - for id_event in ids: - # ------------------ - # MARS - # ----------------- - - mars_input = config["input_files"]["mars"] - - image = [] # pixel charges - events = [] # event ids - telescope = [] # telescope id - runnum = [] # run number - - for image_container in read_images(mars_input): - events.append(image_container.event_id) - telescope.append(image_container.tel_id) - runnum.append(image_container.obs_id) - image.append(image_container.image) - - image = image.to_numpy() - stereo_id = events.to_numpy() - - MAGICCAM = CameraDescription.from_name("MAGICCam") - GEOM = MAGICCAM.geometry - geometry_mars = new_camera_geometry(GEOM) - - # uncomment this list to compare different events - # ids = [1962] - - for id in range(len(image)): - if stereo_id[id] not in ids: - continue - event_image = np.array(image[id][:1039]) - clean_mask = event_image != 0 - print("Event ID:", stereo_id[id]) - print(event_image[clean_mask]) - # print(event_image) - event_image_mars = event_image.copy() - - # compare number of islands - num_islands_mars, island_labels_mars = number_of_islands( - geometry_mars, (np.array(event_image[:1039]) > 0) - ) - # print(num_islands_mars) - - # ----------------------------- - # magic-cta-pipe - # ----------------------------- - - source = MAGICEventSource(input_url=config["input_files"]["mcp_source"]) - - cleaning_config = dict( - picture_thresh=6, - boundary_thresh=3.5, - max_time_off=4.5 * 1.64, - max_time_diff=1.5 * 1.64, - usetime=True, - usesum=True, - findhotpixels=False, - ) - - bad_pixels_config = dict( - pedestalLevel=400, - pedestalLevelVariance=4.5, - pedestalType="FromExtractorRndm", - ) - - # uncomment this list to compare different events - # ids = [1962] - - for event in source._mono_event_generator(telescope=f"{telescope_id}"): - if event.index.event_id not in ids: - continue - print(f"Event ID: {event.index.event_id}") - tel = source.subarray.tel - for tel_id in [telescope_id]: - print("Telescope ID:", tel_id) - r0tel = event.r0.tel[tel_id] - geometry_old = tel[tel_id].camera.geometry - geometry_mcp = new_camera_geometry(geometry_old) - magic_clean = MAGIC_Cleaning.magic_clean(geometry_mcp, cleaning_config) - badpixel_calculator = MAGIC_Badpixels.MAGICBadPixelsCalc( - config=bad_pixels_config - ) - event_image = event.dl1.tel[tel_id].image - event_pulse_time = event.dl1.tel[tel_id].peak_time - - """ - find different catgories of pixels - ------ - bad: pixel that is unsuitable, defined subrunwise - hot: too high charge due to star or not working correctly - badrms: the pedestal rms value is over a certain threshhold; this is bad and hot pixels combined - get badrms pixels via badrmspixel_mask - get badrms pixel indices via bad_pixel_indices - dead: this pixels is not working at all - get dead pixels via deadpixel_mask - get dead pixel indices via dead_pixel_indices - unsuitable: union of badrms and dead pixels - get unsuitable pixels via unsuitable_mask - --------- - """ - - badrmspixel_indices = [[None], [None]] - - badrmspixel_mask = badpixel_calculator.get_badrmspixel_mask(event) - - deadpixel_mask = badpixel_calculator.get_deadpixel_mask(event) - unsuitable_mask = np.logical_or( - badrmspixel_mask[tel_id - 1], deadpixel_mask[tel_id - 1] - ) - - bad_pixel_indices = [ - i for i, x in enumerate(badrmspixel_mask[telescope_id - 1]) if x - ] - # print("badpixel_indices:", bad_pixel_indices) - dead_pixel_indices = [ - i for i, x in enumerate(deadpixel_mask[telescope_id - 1]) if x - ] - # print("deadpixel_indices:", dead_pixel_indices) - bad_not_dead_pixels = [] - for i in bad_pixel_indices: - if i not in dead_pixel_indices: - bad_not_dead_pixels.append(i) - # print("bad but not dead pixels:", bad_not_dead_pixels) - # print(len(bad_pixel_indices)) - - clean_mask, event_image, event_pulse_time = magic_clean.clean_image( - event_image, event_pulse_time, unsuitable_mask=unsuitable_mask - ) - - event_image_cleaned = event_image.copy() - event_image_cleaned[~clean_mask] = 0 - # print(event_image_cleaned[clean_mask]) - - event_pulse_time_cleaned = event_pulse_time.copy() - event_pulse_time_cleaned[~clean_mask] = 0 - - if np.any(event_image_cleaned): - hillas_params = hillas_parameters(geometry_mcp, event_image_cleaned) - image_mask = event_image_cleaned > 0 - timing_params = timing_parameters( - geometry_mcp, - event_image_cleaned, - event_pulse_time_cleaned, - hillas_params, - image_mask, - ) - print(timing_params.slope) - else: - print("Cleaning failed.") - continue - - # compare number of islands - num_islands_mcp, island_labels_mcp = number_of_islands( - geometry_mcp, (np.array(event_image_cleaned[:1039]) > 0) - ) - # print(num_islands_mcp) - - # ------------------ - # original calibrated data - # --------------------- - # chooses the path for either M1 or M2 - # specified by the telescope id in the config file - data_path = config["input_files"][f"original_data_M{telescope_id}"] - with uproot.open(data_path) as input_data: - event_ids = input_data["Events"]["MRawEvtHeader.fStereoEvtNumber"].array() - images = input_data["Events"]["MCerPhotEvt.fPixels.fPhot"].array() - - event_index_array = np.where(event_ids == id_event) - event_index = event_index_array[0][0] - # print(event_index) - calibrated_data_images = images[event_index][:1039] - # print(np.array(images[event_index][:1039]).shape) - # print(np.array(images).shape) - # print(np.array(calibrated_data_images).shape) - - # print(len(data_images)) - - # ------------------------------------------- - # get maximum pixel charge for the colorbar - # ------------------------------------------- - - mcp_max = np.amax(event_image_cleaned[clean_mask]) - mars_max = np.amax(event_image[clean_mask]) - if mcp_max >= mars_max: - vmax = mcp_max - else: - vmax = mars_max - - vmin = 0 - - # --------------------------------------------- - # find pixel differences and write h5 output file - # ------------------------------------------------- - - pix_mars = np.array([event_image_mars]) - pix_mcp = np.array([event_image_cleaned]) - pix_diff = abs(pix_mars - pix_mcp) - # if you want negative values instead of the absolutes - # pix_diff = pix_mars - pix_mcp - # pix_diff = abs(pix_mars - pix_mcp[0]) - pix_image = pix_diff[0] - clean_mask_pixels = pix_image != 0 - - # differences between outputs from MARS and mcp and input calibrated data - pix_diff_mars = abs(pix_mars[0] - calibrated_data_images) - pix_diff_mcp = abs(pix_mcp[0] - calibrated_data_images) - print(pix_diff_mars) - # print(calibrated_data_images) - - data_value = [] - - for i in range(1039): - data_value.append(calibrated_data_images[i]) - - df_pixel = pd.DataFrame() - data = np.array([event_image_mars, event_image_cleaned]) - data_real = np.transpose(data) - - df_pixel = pd.DataFrame(data_real, columns=["MARS charge", "mcp charge"]) - df_pixel["calibrated data"] = np.transpose( - data_value - ) # (calibrated_data_images[:1039]) - df_pixel["difference MARS mcp"] = np.transpose(pix_diff) - df_pix_diff = df_pixel.loc[df_pixel["difference MARS mcp"] > 0] - pix_diff_ids = df_pix_diff.index.tolist() - print(df_pix_diff) - - # saving the output - if config["save_only_when_differences"] == True: - # the file only gets saved if there are differences between the images - if any(pix_image) == True: - print("Differences found. Saving files!") - df_pixel.to_hdf( - f"{out_path}{run_num}_{id_event}_M{telescope_id}_pixel_info.h5", - "/pixel_information", - "w", - ) - df_pix_diff.to_hdf( - f"{out_path}{run_num}_{id_event}_M{telescope_id}_pixel_diff.h5", - "/pixel_differences", - "w", - ) - print(pix_diff_ids) - - else: - print("No differences found. No files will be saved!") - continue - - elif config["save_only_when_differences"] == False: - # the file gets saved in any case - df_pixel.to_hdf( - f"{out_path}{run_num}_{id_event}_M{telescope_id}_pixel_info.h5", - "/pixel_information", - "w", - ) - df_pix_diff.to_hdf( - f"{out_path}{run_num}_{id_event}_M{telescope_id}_pixel_diff.h5", - "/pixel_differences", - "w", - ) - print(pix_diff_ids) - else: - print("No criteria for saving data specified. Exiting.") - exit() - - # -------------------------------- - # creating output image - # ---------------------------- - fig = plt.figure(figsize=(20, 10)) - grid_shape = (2, 3) - - # original data - plt.subplot2grid(grid_shape, (0, 0)) - - geom = CameraGeometry.from_name("MAGICCamMars") - disp = CameraDisplay(geom, calibrated_data_images) - # pixels whose original value is negative - negative_mask = calibrated_data_images < 0 - disp.highlight_pixels(negative_mask, color="white", alpha=1, linewidth=1) - disp.add_colorbar(label="pixel charge") - disp.set_limits_minmax(vmin, vmax) - - plt.title("original data") - - # mars_data - plt.subplot2grid(grid_shape, (0, 1)) - - norm = matplotlib.colors.Normalize(vmin=0, vmax=vmax) - disp_mars = CameraDisplay(geometry_mars, event_image_mars[:1039]) - # disp_mars.highlight_pixels(clean_mask, color='white', alpha=0.5, linewidth=1) - disp_mars.set_limits_minmax(vmin, vmax) - disp_mars.add_colorbar(label="pixel charge") - disp_mars.highlight_pixels(clean_mask_pixels, color="red", alpha=1, linewidth=1) - - plt.title("MARS data") - - # mcp_data - plt.subplot2grid(grid_shape, (0, 2)) - - disp_mcp = CameraDisplay(geometry_mcp, image=event_image_cleaned) - # disp_mcp.highlight_pixels(clean_mask, color='white', alpha=0.5, linewidth=1) - disp_mcp.add_colorbar(label="pixel charge") - disp_mcp.set_limits_minmax(vmin, vmax) - disp_mcp.highlight_pixels(clean_mask_pixels, color="red", alpha=1, linewidth=1) - - plt.title("magic_cta_pipe data") - - # differences between MARS and mcp - plt.subplot2grid(grid_shape, (1, 0)) - - disp = CameraDisplay(geom, pix_image[:1039]) - disp.add_colorbar(label="pixel charge") - disp.highlight_pixels(clean_mask_pixels, color="red", alpha=1, linewidth=1) - - # alternative image: used pixels after the cleaning and differences mask - """for id in range(len(image)): - if stereo_id[id] not in ids: - continue - event_image = np.array(image[id][:1039]) - clean_mask = event_image != 0 - #print(event_image[clean_mask]) - norm = matplotlib.colors.Normalize(vmin=0, vmax=vmax) - disp = CameraDisplay(geometry_mars, image=event_image) - disp.highlight_pixels(clean_mask_pixels, color='red', alpha=0.5, linewidth=2) - disp.set_limits_minmax(vmin, 1)""" - - plt.title("differences MARS-mcp") - - # differences between MARS and the calibrated data - plt.subplot2grid(grid_shape, (1, 1)) - - pix_diff_mars_copy = np.array(pix_diff_mars).copy() - pix_diff_mars_copy[np.array(event_image_mars[:1039]) == 0] = 0 - - disp = CameraDisplay(geom, pix_diff_mars_copy > 0) - # disp.highlight_pixels(clean_mask_pixels, color='red', alpha=1, linewidth=1) - disp.highlight_pixels( - np.array(event_image_mars[:1039]) != 0, color="white", alpha=1, linewidth=3 - ) - # disp.set_limits_minmax(vmin, vmax) - # disp.add_colorbar(label='pixel charge') - - plt.title("differences MARS-original data") - - # mcp-orig-data-diff - plt.subplot2grid(grid_shape, (1, 2)) - - pix_diff_mcp_copy = np.array(pix_diff_mcp).copy() - pix_diff_mcp_copy[np.array(event_image_cleaned) == 0] = 0 - - disp = CameraDisplay(geom, pix_diff_mcp_copy > 0) - disp.highlight_pixels( - np.array(event_image_cleaned) != 0, color="white", alpha=1, linewidth=3 - ) - # disp.set_limits_minmax(vmin, vmax) - # disp.add_colorbar(label='pixel charge') - plt.title("differences mcp-original data") - - fig.suptitle( - f"Comparison_MARS_magic-cta-pipe: Event ID {id_event}, {run_num}, M{tel_id}", - fontsize=16, - ) - fig.savefig(f"{out_path}/data-comparison-{run_num}_{id_event}_M{tel_id}.png") - # fig.savefig(f"{out_path}/data-comparison-{run_num}_{id_event}_M{tel_id}.pdf") - - ids.clear() diff --git a/magicctapipe/scripts/comparison/params_labels.txt b/magicctapipe/scripts/comparison/params_labels.txt deleted file mode 100644 index d43ddc13..00000000 --- a/magicctapipe/scripts/comparison/params_labels.txt +++ /dev/null @@ -1,17 +0,0 @@ -MARS Hillas Params -Length_M1" : "MHillas_1.fLength", "length_M2" : "MHillas_2.fLength", "width_M1" : "MHillas_1.fWidth", "width_M2" : "MHillas_2.fWidth", "size_M1" : "MHillas_1.fSize", "size_M2" : "MHillas_2.fSize", "slope_M1" : "MHillasTimeFit_1.fP1Grad", "slope_M2" : "MHillasTimeFit_2.fP1Grad", "delta_M1" : "MHillas_1.fDelta", "delta_M2" : "MHillas_2.fDelta", "cogx_M1" : "MHillas_1.fMeanX", "cogx_M2" : "MHillas_2.fMeanX", "cogy_M1" : "MHillas_1.fMeanY", "cogy_M2" : "MHillas_2.fMeanY", "hmax" : "MStereoPar.fMaxHeight", "corex" : "MStereoPar.fCoreX", "corey" : "MStereoPar.fCoreY", "az" : "MStereoPar.fDirectionAz", "zd" : "MStereoPar.fDirectionZd" - -labels and units - "length_M1" : "Length M1 [mm]", "length_M2" : "Length M2 [mm]", "width_M1" : "Width M1 [mm]", "width_M2" : "Width M2 [mm]", "size_M1" : "Size M1 [phe]", #phe means photoelectrons "size_M2" : "Size M2 [phe]", "delta_M1" : "Delta M1 [rad]", "delta_M2" : "Delta M2 [rad]", "slope_M1" : "Time Gradient M1", "slope_M2" : "Time Gradient M2", "cogx_M1" : "Cog_x M1 [mm]", "cogx_M2" : "Cog_x M2 [mm]", "cogy_M1" : "Cog_y M1 [mm]", "cogy_M2" : "Cog_y M2 [mm]", "hmax" : "Max height [cm]", "corex" : "CoreX [cm]", "corey" : "CoreY [cm]", "az" : "Azimuth [deg]", "zd" : "Zenith [deg] - -MCP Hillas Params -obs_id', 'event_id', 'tel_id', 'mjd', 'tel_alt', 'tel_az', 'n_islands', - 'intensity', 'x', 'y', 'r', 'phi', 'length', 'width', 'psi', 'skewness', - 'kurtosis', 'pixels_width_1', 'pixels_width_2', 'intensity_width_1', - 'intensity_width_2', 'slope', 'slope_err', 'intercept', 'intercept_err', - 'deviation', 'impact' - -scaling factors -cta_params : { "length_M1" : "length", "length_M2" : "length", "width_M1" : "width", "width_M2" : "width", "size_M1" : "intensity", "size_M2" : "intensity", "delta_M1" : "psi", "delta_M2" : "psi", "slope_M1" : "slope", "slope_M2" : "slope", "cogx_M1" : "y", "cogx_M2" : "y", "cogy_M1" : "x", "cogy_M2" : "x", "hmax" : "h_max", "corex" : "core_x", "corey" : "core_y", "az" : "az", "zd" : "alt" - -scale_factors = { "length_M1" : 1000., "length_M2" : 1000., "width_M1" : 1000., "width_M2" : 1000., "cogx_M1" : -1000., "cogx_M2" : -1000., "cogy_M1" : -1000., "cogy_M2" : -1000., "slope_M1" : 0.001, "slope_M2" : 0.001, "corex" : 100., "corey" : 100., "hmax" : 100. diff --git a/magicctapipe/scripts/mars/convert_superstar_to_dl1.py b/magicctapipe/scripts/mars/convert_superstar_to_dl1.py deleted file mode 100644 index 3c5052eb..00000000 --- a/magicctapipe/scripts/mars/convert_superstar_to_dl1.py +++ /dev/null @@ -1,612 +0,0 @@ -#!/usr/bin/env python - -import argparse -import re -import sys -from pathlib import Path - -import astropy.units as u -import numpy as np -import uproot -from astropy.table import QTable, vstack -from ctapipe.containers import ( - CameraHillasParametersContainer, - CameraTimingParametersContainer, - LeakageContainer, - ReconstructedGeometryContainer, -) -from ctapipe.coordinates import CameraFrame -from ctapipe.core.container import Container, Field -from ctapipe.instrument import ( - CameraDescription, - CameraReadout, - OpticsDescription, - SubarrayDescription, - TelescopeDescription, -) -from ctapipe.io import HDF5TableWriter - -magic_optics = OpticsDescription( - "MAGIC", - num_mirrors=1, - equivalent_focal_length=u.Quantity(16.97, u.m), - mirror_area=u.Quantity(239.0, u.m**2), - num_mirror_tiles=964, -) - -magic_tel_positions = {1: [31.80, -28.10, 0.00] * u.m, 2: [-31.80, 28.10, 0.00] * u.m} - -magic_cam = CameraDescription.from_name("MAGICCam") - -pulse_shape_lo_gain = np.array([0.0, 1.0, 2.0, 1.0, 0.0]) -pulse_shape_hi_gain = np.array([1.0, 2.0, 3.0, 2.0, 1.0]) -pulse_shape = np.vstack((pulse_shape_lo_gain, pulse_shape_hi_gain)) -camera_readout = CameraReadout( - camera_name="MAGICCam", - sampling_rate=u.Quantity(1.64, u.GHz), - reference_pulse_shape=pulse_shape, - reference_pulse_sample_width=u.Quantity(0.5, u.ns), -) - -magic_cam.readout = camera_readout - -magic_cam.geometry.frame = CameraFrame( - focal_length=magic_optics.equivalent_focal_length -) - -magic_tel_description = TelescopeDescription( - name="MAGIC", tel_type="MAGIC", optics=magic_optics, camera=magic_cam -) - -magic_tel_descriptions = {1: magic_tel_description, 2: magic_tel_description} - -subarray = SubarrayDescription( - name="MAGIC", - tel_positions=magic_tel_positions, - tel_descriptions=magic_tel_descriptions, -) - -magic_bdec = u.Quantity(-7.0, u.deg).to(u.rad) - -columns_mc = { - "event_id": ("MRawEvtHeader_1.fStereoEvtNumber", dict(dtype=int)), - "true_energy": ("MMcEvt_1.fEnergy", dict(unit=u.GeV)), - "true_pointing_zd": ("MMcEvt_1.fTelescopeTheta", dict(unit=u.rad)), - "true_pointing_az": ("MMcEvt_1.fTelescopePhi", dict(unit=u.rad)), - "true_zd": ("MMcEvt_1.fTheta", dict(unit=u.rad)), - "true_az": ("MMcEvt_1.fPhi", dict(unit=u.rad)), - "true_core_x": ("MMcEvt_1.fCoreX", dict(unit=u.cm)), - "true_core_y": ("MMcEvt_1.fCoreY", dict(unit=u.cm)), - "particle_id": ("MMcEvt_1.fPartId", dict()), - "length1": ("MHillas_1.fLength", dict(unit=u.mm)), - "length2": ("MHillas_2.fLength", dict(unit=u.mm)), - "psi1": ("MHillas_1.fDelta", dict(unit=u.rad)), - "psi2": ("MHillas_2.fDelta", dict(unit=u.rad)), - "width1": ("MHillas_1.fWidth", dict(unit=u.mm)), - "width2": ("MHillas_2.fWidth", dict(unit=u.mm)), - "size1": ("MHillas_1.fSize", dict()), - "size2": ("MHillas_2.fSize", dict()), - "leakage1_1": ("MNewImagePar_1.fLeakage1", dict()), - "leakage2_1": ("MNewImagePar_1.fLeakage2", dict()), - "leakage1_2": ("MNewImagePar_2.fLeakage1", dict()), - "leakage2_2": ("MNewImagePar_2.fLeakage2", dict()), - "x1": ("MHillas_1.fMeanX", dict(unit=u.mm)), - "y1": ("MHillas_1.fMeanY", dict(unit=u.mm)), - "x2": ("MHillas_2.fMeanX", dict(unit=u.mm)), - "y2": ("MHillas_2.fMeanY", dict(unit=u.mm)), - "slope1": ("MHillasTimeFit_1.fP1Grad", dict(unit=1 / u.mm)), - "slope2": ("MHillasTimeFit_2.fP1Grad", dict(unit=1 / u.mm)), - "reco_zd": ("MStereoPar.fDirectionZd", dict(unit=u.deg)), - "reco_az": ("MStereoPar.fDirectionAz", dict(unit=u.deg)), - "reco_source_x": ("MStereoPar.fDirectionX", dict(unit=u.deg)), - "reco_source_y": ("MStereoPar.fDirectionY", dict(unit=u.deg)), - "reco_core_x": ("MStereoPar.fCoreX", dict(unit=u.cm)), - "reco_core_y": ("MStereoPar.fCoreY", dict(unit=u.cm)), - "hmax": ("MStereoPar.fMaxHeight", dict(unit=u.cm)), - "impact1": ("MStereoPar.fM1Impact", dict(unit=u.cm)), - "impact2": ("MStereoPar.fM2Impact", dict(unit=u.cm)), - "theta2": ("MStereoPar.fTheta2", dict(unit=u.deg**2)), - "is_valid": ("MStereoPar.fValid", dict(dtype=int)), -} - -columns_mc_orig = { - "true_energy_1": ("MMcEvtBasic_1.fEnergy", dict(unit=u.GeV)), - "true_pointing_az_1": ("MMcEvtBasic_1.fTelescopePhi", dict(unit=u.rad)), - "true_pointing_zd_1": ("MMcEvtBasic_1.fTelescopeTheta", dict(unit=u.rad)), - "cam_x_1": ("MSrcPosCam_1.fX", dict(unit=u.mm)), - "cam_y_1": ("MSrcPosCam_1.fY", dict(unit=u.mm)), - "true_energy_2": ("MMcEvtBasic_2.fEnergy", dict(unit=u.GeV)), - "true_pointing_az_2": ("MMcEvtBasic_2.fTelescopePhi", dict(unit=u.rad)), - "true_pointing_zd_2": ("MMcEvtBasic_2.fTelescopeTheta", dict(unit=u.rad)), - "cam_x_2": ("MSrcPosCam_2.fX", dict(unit=u.mm)), - "cam_y_2": ("MSrcPosCam_2.fY", dict(unit=u.mm)), -} - -columns_data = { - "event_id": ("MRawEvtHeader_1.fStereoEvtNumber", dict(dtype=int)), - "pointing_zen": ("MPointingPos_1.fZd", dict(unit=u.deg)), - "pointing_az": ("MPointingPos_1.fAz", dict(unit=u.deg)), - "mjd1": ("MTime_1.fMjd", dict(dtype=float)), - "mjd2": ("MTime_2.fMjd", dict(dtype=float)), - "millisec1": ("MTime_1.fTime.fMilliSec", dict(dtype=float)), - "millisec2": ("MTime_2.fTime.fMilliSec", dict(dtype=float)), - "nanosec1": ("MTime_1.fNanoSec", dict(dtype=float)), - "nanosec2": ("MTime_2.fNanoSec", dict(dtype=float)), - "length1": ("MHillas_1.fLength", dict(unit=u.mm)), - "length2": ("MHillas_2.fLength", dict(unit=u.mm)), - "psi1": ("MHillas_1.fDelta", dict(unit=u.rad)), - "psi2": ("MHillas_2.fDelta", dict(unit=u.rad)), - "width1": ("MHillas_1.fWidth", dict(unit=u.mm)), - "width2": ("MHillas_2.fWidth", dict(unit=u.mm)), - "size1": ("MHillas_1.fSize", dict()), - "size2": ("MHillas_2.fSize", dict()), - "leakage1_1": ("MNewImagePar_1.fLeakage1", dict()), - "leakage2_1": ("MNewImagePar_1.fLeakage2", dict()), - "leakage1_2": ("MNewImagePar_2.fLeakage1", dict()), - "leakage2_2": ("MNewImagePar_2.fLeakage2", dict()), - "x1": ("MHillas_1.fMeanX", dict(unit=u.mm)), - "y1": ("MHillas_1.fMeanY", dict(unit=u.mm)), - "x2": ("MHillas_2.fMeanX", dict(unit=u.mm)), - "y2": ("MHillas_2.fMeanY", dict(unit=u.mm)), - "slope1": ("MHillasTimeFit_1.fP1Grad", dict(unit=1 / u.mm)), - "slope2": ("MHillasTimeFit_2.fP1Grad", dict(unit=1 / u.mm)), - "reco_zd": ("MStereoPar.fDirectionZd", dict(unit=u.deg)), - "reco_az": ("MStereoPar.fDirectionAz", dict(unit=u.deg)), - "reco_core_x": ("MStereoPar.fCoreX", dict(unit=u.cm)), - "reco_core_y": ("MStereoPar.fCoreY", dict(unit=u.cm)), - "hmax": ("MStereoPar.fMaxHeight", dict(unit=u.cm)), - "impact1": ("MStereoPar.fM1Impact", dict(unit=u.cm)), - "impact2": ("MStereoPar.fM2Impact", dict(unit=u.cm)), - "theta2": ("MStereoPar.fTheta2", dict(unit=u.deg**2)), - "is_valid": ("MStereoPar.fValid", dict(dtype=int)), -} - - -class ExtraInfo(Container): - obs_id = Field(-1, "Observation ID") - event_id = Field(-1, "Event ID") - - -class InfoOriginalMC(ExtraInfo): - tel_id = Field(-1, "Telescope ID") - true_energy = Field(-1 * u.TeV, "MC event energy", unit=u.TeV) - tel_alt = Field(-1 * u.rad, "MC telescope altitude", unit=u.rad) - tel_az = Field(-1 * u.rad, "MC telescope azimuth", unit=u.rad) - - -class InfoMC(InfoOriginalMC): - true_core_x = Field(-1 * u.m, "MC event x-core position", unit=u.m) - true_core_y = Field(-1 * u.m, "MC event y-core position", unit=u.m) - true_alt = Field(-1 * u.rad, "MC event altitude", unit=u.rad) - true_az = Field(-1 * u.rad, "MC event azimuth", unit=u.rad) - - -class InfoData(ExtraInfo): - tel_id = Field(-1, "Telescope ID") - mjd = Field(-1, "Event MJD", dtype=np.float64) - tel_alt = Field(-1, "Telescope altitude", unit=u.rad) - tel_az = Field(-1, "Telescope azimuth", unit=u.rad) - - -def get_run_info_from_name(file_name): - file_name = Path(file_name) - file_name = file_name.name - mask_data = r".*\d+_(\d+)_S_.*" - mask_mc = r".*_M\d_za\d+to\d+_\d_(\d+)_Y_.*" - mask_mc_alt = r".*_M\d_\d_(\d+)_.*" - if re.findall(mask_data, file_name): - parsed_info = re.findall(mask_data, file_name) - is_mc = False - elif re.findall(mask_mc, file_name): - parsed_info = re.findall(mask_mc, file_name) - is_mc = True - else: - parsed_info = re.findall(mask_mc_alt, file_name) - is_mc = True - - try: - run_number = int(parsed_info[0]) - except IndexError: - raise IndexError( - "Can not identify the run number and type (data/MC) of the file " - "{:s}".format(file_name) - ) - - return run_number, is_mc - - -def parse_args(args): - """ - Parse command line options and arguments. - """ - - parser = argparse.ArgumentParser(description="", prefix_chars="-") - parser.add_argument( - "--use_mc", action="store_true", help="Read MC data if flag is specified." - ) - parser.add_argument( - "-in", - "--input_mask", - nargs="?", - help='Mask for input files e.g. "20*_S_*.root" (NOTE: the double quotes should be there).', - ) - - return parser.parse_args(args) - - -def write_hdf5_mc(filelist): - """ - Writes an HDF5 file for each superstar file in - filelist. Specific for MC files. - - Parameters - ---------- - filelist : list - A list of files to be opened. - """ - - obs_id = 0 - columns = columns_mc - - for path in filelist: - print(f"Opening {path}") - with uproot.open(path) as f: - outfile = str(path).replace(".root", ".h5") - - writer = HDF5TableWriter(filename=outfile, mode="a") - - tr_tel_list_to_mask = subarray.tel_ids_to_mask - - writer.add_column_transform_regexp( - table_regexp="dl1/.*", - col_regexp="tel_ids", - transform=tr_tel_list_to_mask, - ) - - print(f"Writing in {outfile}") - - events_tree = f["Events"] - events = QTable() - - for column, (branch, kwargs) in columns.items(): - events[column] = u.Quantity( - events_tree[branch].array(library="np"), copy=False, **kwargs - ) - - events = vstack(events) - - event_info = dict() - hillas_params = dict() - timing_params = dict() - leakage_params = dict() - id_prev = events[0]["event_id"] - for event in events: - id_current = event["event_id"] - if id_current < id_prev: - obs_id += 1 - # correction needed for true_az since it is in the corsika - # reference frame. Also, wrapped in [-180, 180] interval - # see e.g. MHMcEnergyMigration.cc lines 567-570 - true_az = np.pi - event["true_az"].value + magic_bdec.value - if true_az > np.pi: - true_az -= 2 * np.pi - # correction needed for tel_az since it is in the corsika - # reference frame. Also, wrapped in [0, 360] interval - # see e.g. MPointingPosCalc.cc lines 149-153 - tel_az = np.pi - event["true_pointing_az"].value + magic_bdec.value - if tel_az < 0: - tel_az += 2 * np.pi - if tel_az > 2 * np.pi: - tel_az -= 2 * np.pi - if event["is_valid"] > 0: - is_valid = True - else: - is_valid = False - event_info[1] = InfoMC( - obs_id=obs_id, - event_id=event["event_id"], - tel_id=1, - true_energy=event["true_energy"].to(u.TeV), - true_alt=(90.0 * u.deg).to(u.rad) - event["true_zd"], - true_az=u.Quantity(true_az, u.rad), - true_core_x=event["true_core_x"].to(u.m), - true_core_y=event["true_core_y"].to(u.m), - tel_alt=(90.0 * u.deg).to(u.rad) - event["true_pointing_zd"], - tel_az=u.Quantity(tel_az, u.rad), - ) - event_info[2] = InfoMC( - obs_id=obs_id, - event_id=event["event_id"], - tel_id=2, - true_energy=event["true_energy"].to(u.TeV), - true_alt=(90.0 * u.deg).to(u.rad) - event["true_zd"], - true_core_x=event["true_core_x"].to(u.m), - true_core_y=event["true_core_y"].to(u.m), - true_az=u.Quantity(true_az, u.rad), - tel_alt=(90.0 * u.deg).to(u.rad) - event["true_pointing_zd"], - tel_az=u.Quantity(tel_az, u.rad), - ) - hillas_params[1] = CameraHillasParametersContainer( - x=event["x1"].to(u.m), - y=event["y1"].to(u.m), - intensity=event["size1"], - length=event["length1"].to(u.m), - width=event["width1"].to(u.m), - psi=event["psi1"].to(u.deg), - ) - hillas_params[2] = CameraHillasParametersContainer( - x=event["x2"].to(u.m), - y=event["y2"].to(u.m), - intensity=event["size2"], - length=event["length2"].to(u.m), - width=event["width2"].to(u.m), - psi=event["psi2"].to(u.deg), - ) - timing_params[1] = CameraTimingParametersContainer( - slope=event["slope1"].to(1 / u.m) - ) - timing_params[2] = CameraTimingParametersContainer( - slope=event["slope2"].to(1 / u.m) - ) - leakage_params[1] = LeakageContainer( - intensity_width_1=event["leakage1_1"], - intensity_width_2=event["leakage2_1"], - ) - leakage_params[2] = LeakageContainer( - intensity_width_1=event["leakage1_2"], - intensity_width_2=event["leakage2_2"], - ) - stereo_params = ReconstructedGeometryContainer( - alt=(90.0 * u.deg) - event["reco_zd"], - az=event["reco_az"], - core_x=event["reco_core_x"].to(u.m), - core_y=event["reco_core_y"].to(u.m), - tel_ids=[h for h in hillas_params.keys()], - average_intensity=np.mean( - [h.intensity for h in hillas_params.values()] - ), - is_valid=is_valid, - h_max=event["hmax"].to(u.m), - ) - for tel_id in list(event_info.keys()): - writer.write( - table_name="dl1/hillas_params", - containers=[ - event_info[tel_id], - hillas_params[tel_id], - leakage_params[tel_id], - timing_params[tel_id], - ], - ) - common_info = ExtraInfo( - obs_id=obs_id, - event_id=event["event_id"], - ) - writer.write( - table_name="dl1/stereo_params", - containers=[ - common_info, - stereo_params, - ], - ) - id_prev = event["event_id"] - - originalmc_tree = f["OriginalMC"] - originalmc = QTable() - - for column, (branch, kwargs) in columns_mc_orig.items(): - originalmc[column] = u.Quantity( - originalmc_tree[branch].array(library="np"), copy=False, **kwargs - ) - - originalmc = vstack(originalmc) - - original_info = dict() - for event in originalmc: - # correction needed for tel_az since it is in the corsika - # reference frame. Also, wrapped in [0, 360] interval - # see e.g. MPointingPosCalc.cc lines 149-153 - tel_az_1 = np.pi - event["true_pointing_az_1"].value + magic_bdec.value - if tel_az_1 < 0: - tel_az_1 += 2 * np.pi - if tel_az_1 > 2 * np.pi: - tel_az_1 -= 2 * np.pi - tel_az_2 = np.pi - event["true_pointing_az_2"].value + magic_bdec.value - if tel_az_2 < 0: - tel_az_2 += 2 * np.pi - if tel_az_2 > 2 * np.pi: - tel_az_2 -= 2 * np.pi - original_info[1] = InfoOriginalMC( - tel_id=1, - true_energy=event["true_energy_1"].to(u.TeV), - tel_alt=(90.0 * u.deg).to(u.rad) - event["true_pointing_zd_1"], - tel_az=u.Quantity(tel_az_1, u.rad), - ) - original_info[2] = InfoOriginalMC( - tel_id=2, - true_energy=event["true_energy_2"].to(u.TeV), - tel_alt=(90.0 * u.deg).to(u.rad) - event["true_pointing_zd_2"], - tel_az=u.Quantity(tel_az_2, u.rad), - ) - for tel_id in list(original_info.keys()): - writer.write( - table_name="dl1/original_mc", - containers=[ - original_info[tel_id], - ], - ) - - writer.close() - - -def write_hdf5_data(filelist): - """ - Writes an HDF5 file for each superstar file in - filelist. Specific for real data files. - - Parameters - ---------- - filelist : list - A list of files to be opened. - """ - - columns = columns_data - - for path in filelist: - print(f"Opening {path}") - with uproot.open(path) as f: - outfile = str(path).replace(".root", ".h5") - - writer = HDF5TableWriter(filename=outfile, mode="a") - - tr_tel_list_to_mask = subarray.tel_ids_to_mask - - writer.add_column_transform_regexp( - table_regexp="dl1/.*", - col_regexp="tel_ids", - transform=tr_tel_list_to_mask, - ) - - print(f"Writing in {outfile}") - - events_tree = f["Events"] - events = QTable() - - for column, (branch, kwargs) in columns.items(): - events[column] = u.Quantity( - events_tree[branch].array(library="np"), copy=False, **kwargs - ) - - events = vstack(events) - run_info = get_run_info_from_name(path) - run_number = run_info[0] - - event_info = dict() - hillas_params = dict() - timing_params = dict() - leakage_params = dict() - for event in events: - event_mjd = ( - event["mjd1"] - + (event["millisec1"] / 1.0e3 + event["nanosec1"] / 1.0e9) / 86400.0 - ) - if event["is_valid"] > 0: - is_valid = True - else: - is_valid = False - event_info[1] = InfoData( - obs_id=run_number, - event_id=event["event_id"], - tel_id=1, - mjd=event_mjd.value, - tel_alt=(90.0 * u.deg).to(u.rad) - event["pointing_zen"].to(u.rad), - tel_az=event["pointing_az"].to(u.rad), - ) - event_info[2] = InfoData( - obs_id=run_number, - event_id=event["event_id"], - tel_id=2, - mjd=event_mjd.value, - tel_alt=(90.0 * u.deg).to(u.rad) - event["pointing_zen"].to(u.rad), - tel_az=event["pointing_az"].to(u.rad), - ) - hillas_params[1] = CameraHillasParametersContainer( - x=event["x1"].to(u.m), - y=event["y1"].to(u.m), - intensity=event["size1"], - length=event["length1"].to(u.m), - width=event["width1"].to(u.m), - psi=event["psi1"].to(u.deg), - ) - hillas_params[2] = CameraHillasParametersContainer( - x=event["x2"].to(u.m), - y=event["y2"].to(u.m), - intensity=event["size2"], - length=event["length2"].to(u.m), - width=event["width2"].to(u.m), - psi=event["psi2"].to(u.deg), - ) - timing_params[1] = CameraTimingParametersContainer( - slope=event["slope1"].to(1 / u.m) - ) - timing_params[2] = CameraTimingParametersContainer( - slope=event["slope2"].to(1 / u.m) - ) - leakage_params[1] = LeakageContainer( - intensity_width_1=event["leakage1_1"], - intensity_width_2=event["leakage2_1"], - ) - leakage_params[2] = LeakageContainer( - intensity_width_1=event["leakage1_2"], - intensity_width_2=event["leakage2_2"], - ) - stereo_params = ReconstructedGeometryContainer( - alt=(90.0 * u.deg) - event["reco_zd"], - az=event["reco_az"], - core_x=event["reco_core_x"].to(u.m), - core_y=event["reco_core_y"].to(u.m), - tel_ids=[h for h in hillas_params.keys()], - average_intensity=np.mean( - [h.intensity for h in hillas_params.values()] - ), - is_valid=is_valid, - h_max=event["hmax"].to(u.m), - ) - for tel_id in list(event_info.keys()): - writer.write( - table_name="dl1/hillas_params", - containers=[ - event_info[tel_id], - hillas_params[tel_id], - leakage_params[tel_id], - timing_params[tel_id], - ], - ) - common_info = ExtraInfo( - obs_id=run_number, - event_id=event["event_id"], - ) - writer.write( - table_name="dl1/stereo_params", - containers=[ - common_info, - stereo_params, - ], - ) - - writer.close() - - -def convert_superstar_to_dl1(input_files_mask, is_mc): - """ - Takes superstar files as input and converts them in HDF5 - format. Real and MC data are treated differently. - - Parameters - ---------- - input_files_mask : str - Mask for the superstar input files. - is_mc : bool - Flag to tell if real or MC data. - """ - - input_files = Path(input_files_mask) - filelist = sorted(Path(input_files.parent).expanduser().glob(input_files.name)) - - if is_mc: - write_hdf5_mc(filelist) - else: - write_hdf5_data(filelist) - - -def main(*args): - flags = parse_args(args) - - is_mc = flags.use_mc - input_mask = flags.input_mask - - convert_superstar_to_dl1(input_mask, is_mc) - - -if __name__ == "__main__": - main(*sys.argv[1:]) diff --git a/magicctapipe/scripts/mars/magic-cta-pipe_config_stereo.yaml b/magicctapipe/scripts/mars/magic-cta-pipe_config_stereo.yaml deleted file mode 100644 index adf1a452..00000000 --- a/magicctapipe/scripts/mars/magic-cta-pipe_config_stereo.yaml +++ /dev/null @@ -1,103 +0,0 @@ -# Configuration file to be used for stereo analysis of MAGIC data. -# In the several input_mask keys, one has to include files from -# both M1 and M2, since the MAGICEventSource will match the events -# to get the stereo events. - -data_files: - mc: - train_sample: - magic: - input_mask: "/path/to/MC_train_files/GA*_Y_*.root" - hillas_output: "/path/to/MC_train_files/MC_train_hillas.h5" - test_sample: - magic: - input_mask: "/path/to/MC_test_files/GA*_Y_*.root" - hillas_output: "/path/to/MC_test_files/MC_test_hillas.h5" - reco_output: "/path/to/MC_test_files/MC_test_reco.h5" - data: - train_sample: - magic: - input_mask: "/path/to/data_train_files/20*_Y_*.root" - hillas_output: "/path/to/data_train_files/data_train_hillas.h5" - test_sample: - magic: - input_mask: "/path/to/data_test_files/20*_Y_*.root" - hillas_output: "/path/to/data_test_files/data_test_hillas.h5" - reco_output: "/path/to/data_test_files/data_test_reco.h5" - -image_cleaning: - magic: - picture_thresh: 6 - boundary_thresh: 3.5 - # 1.5 slices x 1.64 GHz - max_time_diff: 2.46 - # 4.5 slices x 1.64 GHz - max_time_off: 7.38 - minimum_number_of_neighbors: 3 - use_time : True - use_sum : True - find_hotpixels : True - -energy_rf: - save_name: "RFs/energy_rf.joblib" - cuts: "(event_id > 0) & (multiplicity > 1) & (intensity > 30) & (length > 0.0) & (intensity_width_1 < 0.15)" - settings: - n_estimators: 100 - min_samples_leaf: 10 - n_jobs: 3 - - features: - ['slope', 'length', 'width', 'intensity', 'skewness', 'kurtosis', 'x', 'y', 'psi', 'intensity_width_1', 'intensity_width_2', 'h_max', 'alt', 'az'] - -direction_rf: - save_name: "RFs/direction_rf.joblib" - cuts: "(event_id > 0) & (multiplicity > 1) & (intensity > 30) & (length > 0.0) & (intensity_width_1 < 0.15)" - settings: - n_estimators: 100 - min_samples_leaf: 10 - n_jobs: 3 - - features: - 'disp': ['slope', 'length', 'width', 'intensity', 'skewness', 'kurtosis', 'x', 'y', 'psi', 'h_max', 'alt', 'az'] - 'pos_angle_shift': ['slope', 'length', 'width', 'intensity', 'skewness', 'kurtosis', 'x', 'y', 'psi', 'h_max', 'alt', 'az'] - -classifier_rf: - save_name: "RFs/classifier_rf.joblib" - cuts: "(event_id > 0) & (multiplicity > 1) & (intensity > 30) & (length > 0.0) & (intensity_width_1 < 0.15)" - settings: - n_estimators: 100 - min_samples_leaf: 10 - n_jobs: 3 - - features: - ['slope', 'length', 'width', 'intensity', 'skewness', 'kurtosis', 'x', 'y', 'psi', 'n_islands', 'h_max', 'alt', 'az'] - -irf: - output_name: "/path/to/IRF_file/outfile.fits" - -source: - name: "Crab" - coordinates: - ra_dec: [83.633083, 22.0145] - -event_list: - output_directory: "/path/to/output/directory" - max_time_diff: 6.9e-4 - cuts: - quality: - l3rate: "80 < value < 1000" - dc: "0 < value < 7e3" - selection: - "(event_id > 0) & (multiplicity > 1) & (event_class_0 > 0.8)" - -spectrum: - input_files: "/path/to/input/events*.fits" - energy_reco_nbins: 30 - energy_reco_min: 0.005 # TeV - energy_reco_max: 50. # TeV - energy_true_factor: 1.4 # ratio of number of estimated-to-true energy bins - off_positions: 3 - #exclude_region: {0 : [183.604, -8.708, 0.5]} - spectral_model: - power_law: {'index' : 2, 'amplitude' : 2.0e-11, 'reference' : 0.3} - #log_parabola: {'alpha' : 2, 'amplitude' : 2.0e-11, 'beta' : 0.2, 'reference' : 0.3} diff --git a/magicctapipe/scripts/mars/magic_calibrated_to_dl1.py b/magicctapipe/scripts/mars/magic_calibrated_to_dl1.py deleted file mode 100644 index 9425e627..00000000 --- a/magicctapipe/scripts/mars/magic_calibrated_to_dl1.py +++ /dev/null @@ -1,327 +0,0 @@ -#!/usr/bin/env python -# coding: utf-8 - -import argparse -import copy -import glob -import re -from pathlib import Path - -import numpy as np -import yaml -from ctapipe.containers import ( - ImageParametersContainer, - IntensityStatisticsContainer, - PeakTimeStatisticsContainer, - TimingParametersContainer, -) -from ctapipe.coordinates import TelescopeFrame -from ctapipe.image import ( - concentration_parameters, - descriptive_statistics, - hillas_parameters, - morphology_parameters, - timing_parameters, -) -from ctapipe.io import DataWriter -from ctapipe_io_magic import MAGICEventSource - -from magicctapipe.image import MAGICClean, get_leakage -from magicctapipe.utils import info_message - -DEFAULT_IMAGE_PARAMETERS = ImageParametersContainer() -DEFAULT_TRUE_IMAGE_PARAMETERS = ImageParametersContainer() -DEFAULT_TRUE_IMAGE_PARAMETERS.intensity_statistics = IntensityStatisticsContainer( - max=np.int32(-1), - min=np.int32(-1), - mean=np.float64(np.nan), - std=np.float64(np.nan), - skewness=np.float64(np.nan), - kurtosis=np.float64(np.nan), -) -DEFAULT_TIMING_PARAMETERS = TimingParametersContainer() -DEFAULT_PEAKTIME_STATISTICS = PeakTimeStatisticsContainer() - - -def magic_calibrated_to_dl1(input_mask, cleaning_config): - """Process MAGIC calibrated files, both real and simulation, to get - DL1 files in the standard CTA format. - Custom parts: - - MAGIC cleaning - - treatment of hot/bad pixels - - Parameters - ---------- - input_mask : str - Mask for MC input files. Reading of files is managed - by the MAGICEventSource class. - cleaning_config: dict - Dictionary for cleaning settings - bad_pixels_config: dict - Dictionary for bad pixels settings - - Returns - ------- - None - """ - - input_files = glob.glob(input_mask) - input_files.sort() - - clean_config = copy.deepcopy(cleaning_config) - - # Now let's loop over the events and perform: - # - image cleaning; - # - hillas parameter calculation; - # - time gradient calculation. - # - # We'll write the result to the HDF5 file that can be used for further processing. - - for input_file in input_files: - file_name = Path(input_file).name - output_name = file_name.replace(".root", ".h5") - print("") - print(f"-- Working on {file_name:s} --") - print("") - # Event source - source = MAGICEventSource(input_url=input_file) - is_simulation = source.is_mc - - geometry_camera_frame = source.subarray.tel[source.telescope].camera.geometry - geometry = geometry_camera_frame.transform_to(TelescopeFrame()) - # camera_old = source.subarray.tel[source.telescope].camera.geometry - # camera_refl = reflected_camera_geometry(camera_old) - # geometry = scale_camera_geometry(camera_refl, aberration_factor) - if is_simulation: - clean_config["findhotpixels"] = False - - magic_clean = MAGICClean(geometry, clean_config) - - info_message("Cleaning configuration", prefix="Hillas") - for item in vars(magic_clean).items(): - print(f"{item[0]}: {item[1]}") - if magic_clean.find_hotpixels: - for item in vars(magic_clean.pixel_treatment).items(): - print(f"{item[0]}: {item[1]}") - - with DataWriter( - event_source=source, output_path=output_name, overwrite=True - ) as write_data: - # Looping over the events - for event in source: - tels_with_data = event.trigger.tels_with_trigger - - # Looping over the triggered telescopes - for tel_id in tels_with_data: - if is_simulation: - event.dl1.tel[tel_id].parameters = DEFAULT_TRUE_IMAGE_PARAMETERS - else: - event.dl1.tel[tel_id].parameters = DEFAULT_IMAGE_PARAMETERS - # Obtained image - event_image = event.dl1.tel[tel_id].image - # Pixel arrival time map - peak_time = event.dl1.tel[tel_id].peak_time - - if is_simulation: - clean_mask, event_image, peak_time = magic_clean.clean_image( - event_image, peak_time - ) - else: - dead_pixels = event.mon.tel[ - tel_id - ].pixel_status.hardware_failing_pixels[0] - badrms_pixels = event.mon.tel[ - tel_id - ].pixel_status.pedestal_failing_pixels[2] - unsuitable_mask = np.logical_or(dead_pixels, badrms_pixels) - clean_mask, event_image, peak_time = magic_clean.clean_image( - event_image, peak_time, unsuitable_mask=unsuitable_mask - ) - - event.dl1.tel[tel_id].image_mask = clean_mask - - event_image_cleaned = event_image.copy() - event_image_cleaned[~clean_mask] = 0 - - event_pulse_time_cleaned = peak_time.copy() - event_pulse_time_cleaned[~clean_mask] = 0 - - geom_selected = geometry[clean_mask] - image_selected = event_image[clean_mask] - - if np.any(event_image_cleaned): - try: - # If event has survived the cleaning, computing the Hillas parameters - hillas = hillas_parameters( - geom=geom_selected, image=image_selected - ) - leakage = get_leakage(geometry, event_image, clean_mask) - concentration = concentration_parameters( - geom=geom_selected, - image=image_selected, - hillas_parameters=hillas, - ) - morphology = morphology_parameters( - geom=geometry, image_mask=clean_mask - ) - intensity_statistics = descriptive_statistics( - image_selected, - container_class=IntensityStatisticsContainer, - ) - if peak_time is not None: - timing = timing_parameters( - geom=geom_selected, - image=image_selected, - peak_time=peak_time[clean_mask], - hillas_parameters=hillas, - ) - peak_time_statistics = descriptive_statistics( - peak_time[clean_mask], - container_class=PeakTimeStatisticsContainer, - ) - else: - timing = DEFAULT_TIMING_PARAMETERS - peak_time_statistics = DEFAULT_PEAKTIME_STATISTICS - - event.dl1.tel[tel_id].parameters = ImageParametersContainer( - hillas=hillas, - timing=timing, - leakage=leakage, - morphology=morphology, - concentration=concentration, - intensity_statistics=intensity_statistics, - peak_time_statistics=peak_time_statistics, - ) - - except ValueError: - print( - f"Event ID {event.index.event_id} (obs ID: {event.index.obs_id}; " - f"telescope ID: {tel_id}): Hillas calculation failed." - ) - else: - print( - f"Event ID {event.index.event_id} (obs ID: {event.index.obs_id}; " - f"telescope ID: {tel_id}) did not pass cleaning." - ) - - write_data(event) - - -# ================= -# === Main code === -# ================= - -# -------------------------- -# Adding the argument parser - - -arg_parser = argparse.ArgumentParser( - description=""" -This tools computes the Hillas parameters for the specified data sets. -""" -) - -arg_parser.add_argument( - "--config", - default="config.yaml", - help="Configuration file to steer the code execution.", -) -arg_parser.add_argument( - "--usereal", help="Process only real data files.", action="store_true" -) -arg_parser.add_argument( - "--usemc", help="Process only simulated data files.", action="store_true" -) -arg_parser.add_argument( - "--usetest", help="Process only test files.", action="store_true" -) -arg_parser.add_argument( - "--usetrain", help="Process only train files.", action="store_true" -) -arg_parser.add_argument("--usem1", help="Process only M1 files.", action="store_true") -arg_parser.add_argument("--usem2", help="Process only M2 files.", action="store_true") - -parsed_args = arg_parser.parse_args() -# -------------------------- - -# ------------------------------ -# Reading the configuration file - -file_not_found_message = """ -Error: can not load the configuration file {:s}. -Please check that the file exists and is of YAML or JSON format. -Exiting. -""" - -try: - config = yaml.safe_load(open(parsed_args.config, "r")) -except IOError: - print(file_not_found_message.format(parsed_args.config)) - exit() - -if "data_files" not in config: - print('Error: the configuration file is missing the "data_files" section. Exiting.') - exit() - -if "image_cleaning" not in config: - print( - 'Error: the configuration file is missing the "image_cleaning" section. Exiting.' - ) - exit() -# ------------------------------ - -if parsed_args.usereal and parsed_args.usemc: - data_type_to_process = config["data_files"] -elif parsed_args.usereal: - data_type_to_process = ["data"] -elif parsed_args.usemc: - data_type_to_process = ["mc"] -else: - data_type_to_process = config["data_files"] - -if parsed_args.usetrain and parsed_args.usetest: - data_sample_to_process = ["train_sample", "test_sample"] -elif parsed_args.usetrain: - data_sample_to_process = ["train_sample"] -elif parsed_args.usetest: - data_sample_to_process = ["test_sample"] -else: - data_sample_to_process = ["train_sample", "test_sample"] - -if parsed_args.usem1 and parsed_args.usem2: - telescope_to_process = ["magic1", "magic2"] -elif parsed_args.usem1: - telescope_to_process = ["magic1"] -elif parsed_args.usem2: - telescope_to_process = ["magic2"] -else: - telescope_to_process = ["magic1", "magic2"] - -for data_type in data_type_to_process: - for sample in data_sample_to_process: - for telescope in telescope_to_process: - info_message( - f'Data "{data_type}", sample "{sample}", telescope "{telescope}"', - prefix="Hillas", - ) - - try: - telescope_type = re.findall("(.*)[_\\d]+", telescope)[0] - except Exception: - ValueError( - f'Can not recognize the telescope type from name "{telescope}"' - ) - - if telescope_type not in config["image_cleaning"]: - raise ValueError( - f'Guessed telescope type "{telescope_type}" does not have image cleaning settings' - ) - - cleaning_config = config["image_cleaning"][telescope_type] - - magic_calibrated_to_dl1( - input_mask=config["data_files"][data_type][sample][telescope][ - "input_mask" - ], - cleaning_config=cleaning_config, - ) diff --git a/magicctapipe/scripts/performance/apply_rfs_MAGIC_LST.py b/magicctapipe/scripts/performance/apply_rfs_MAGIC_LST.py deleted file mode 100644 index 509b7877..00000000 --- a/magicctapipe/scripts/performance/apply_rfs_MAGIC_LST.py +++ /dev/null @@ -1,194 +0,0 @@ -# coding: utf-8 - -import argparse -import glob -import os -import time - -import pandas as pd - -from magicctapipe.reco.event_processing import ( - DirectionEstimatorPandas, - EnergyEstimatorPandas, - EventClassifierPandas, -) -from magicctapipe.utils.filedir import ( - check_folder, - load_cfg_file, - load_dl1_data_stereo, - out_file_h5_reco, -) -from magicctapipe.utils.tels import check_tel_ids, get_array_tel_descriptions -from magicctapipe.utils.utils import info_message, print_elapsed_time, print_title - -PARSER = argparse.ArgumentParser( - description="Apply random forests. For stereo data.", - formatter_class=argparse.ArgumentDefaultsHelpFormatter, -) -PARSER.add_argument( - "-cfg", - "--config_file", - type=str, - required=True, - help="Configuration file, yaml format", -) -PARSER.add_argument( - "-mte", - "--only_mc_test", - action="store_true", - required=False, - default=False, - help="Consider only mc test files", -) -PARSER.add_argument( - "-dte", - "--only_data_test", - action="store_true", - required=False, - default=False, - help="Consider only data test files", -) - - -def apply_rfs_stereo(config_file, only_mc_test, only_data_test): - """Apply - - Parameters - ---------- - config_file : str - configuration file - only_mc_test : bool - process only `mc_test` files - only_data_test : bool - process only `data_test` files - """ - print_title("Apply RFs") - - # --- Read the configuration file --- - cfg = load_cfg_file(config_file) - - # Get tel_ids - tel_ids, tel_ids_LST, tel_ids_MAGIC = check_tel_ids(cfg) - - # --- MAGIC - LST description --- - array_tel_descriptions = get_array_tel_descriptions( - tel_ids_LST=tel_ids_LST, tel_ids_MAGIC=tel_ids_MAGIC - ) - - # Using only the "mc" and/or "data" "test_sample" - if only_mc_test: - data_types = ["mc"] - elif only_data_test: - data_types = ["data"] - else: - data_types = ["mc", "data"] - - sample = "test_sample" - - for data_type in data_types: - info_message(f'Loading "{data_type}", sample "{sample}"', prefix="ApplyRF") - - file_list = glob.glob(cfg["data_files"][data_type][sample]["hillas_h5"]) - print(cfg["data_files"][data_type][sample]["hillas_h5"]) - print(file_list) - - for file in file_list: - print(f"Analyzing file:\n{file}") - out_file = os.path.join( - os.path.dirname(cfg["data_files"][data_type][sample]["reco_h5"]), - out_file_h5_reco(in_file=file), - ) - print(f"Output file:\n{out_file}") - check_folder(os.path.dirname(out_file)) - - shower_data = load_dl1_data_stereo(file) - - # Dropping data with the wrong altitude - shower_data = shower_data.query(cfg["global"]["wrong_alt"]) - - # Computing the event "multiplicity" - l_ = ["obs_id", "event_id"] - shower_data["multiplicity"] = ( - shower_data["intensity"].groupby(level=l_).count() - ) - - # Added by Lea Heckmann 2020-05-15 for the moment to delete duplicate - # events - info_message("Removing duplicate events", prefix="ApplyRF") - shower_data = shower_data[~shower_data.index.duplicated()] - - # --- Applying RFs --- - # Random forest kinds - rf_kinds = ["energy_rf", "direction_rf", "classifier_rf"] - # Loop on rf_kinds - for rf_kind in rf_kinds: - info_message(f"Loading RF: {rf_kind}", prefix="ApplyRF") - try: - # Init the estimator - if rf_kind == "direction_rf": - estimator = DirectionEstimatorPandas( - cfg[rf_kind]["features"], - array_tel_descriptions, - **cfg[rf_kind]["settings"], - ) - elif rf_kind == "energy_rf": - estimator = EnergyEstimatorPandas( - cfg[rf_kind]["features"], **cfg[rf_kind]["settings"] - ) - - elif rf_kind == "classifier_rf": - estimator = EventClassifierPandas( - cfg[rf_kind]["features"], **cfg[rf_kind]["settings"] - ) - - # Load the joblib RFs file - estimator.load( - os.path.join( - cfg[rf_kind]["save_dir"], cfg[rf_kind]["joblib_name"] - ) - ) - - # Apply RF - info_message(f"Applying RF: {rf_kind}", prefix="ApplyRF") - reco = estimator.predict(shower_data) - - # Appeding the result to the main data frame - shower_data = shower_data.join(reco) - except Exception as e: - print(e) - - # --- END LOOP on rf_kinds --- - - # --- Store results in DL2 file --- - info_message("Saving the reconstructed data", prefix="ApplyRF") - # Storing the reconstructed values for the given data sample - shower_data.to_hdf(out_file, key="dl2/reco") - - # Take mc_header form DL1 and save in DL2 - try: - # Only if file is a Monte Carlo simulation - try: - mc_ = pd.read_hdf(file, key="dl1/mc_header") - mc_.to_hdf(out_file, key="dl2/mc_header") - except Exception: - mc_ = pd.read_hdf(file, key="dl2/mc_header") - mc_.to_hdf(out_file, key="dl2/mc_header") - except Exception as e: - # No mc_header found in file, file is not a simulation - print(f"No dl1/mc_header found in file {file}, skipping - %s", e) - - # --- END LOOP on file_list --- - - # --- END LOOP on data_types --- - - -if __name__ == "__main__": - args = PARSER.parse_args() - kwargs = args.__dict__ - start_time = time.time() - apply_rfs_stereo( - config_file=kwargs["config_file"], - only_mc_test=kwargs["only_mc_test"], - only_data_test=kwargs["only_data_test"], - ) - print_elapsed_time(start_time, time.time()) diff --git a/magicctapipe/scripts/performance/make_irfs_MAGIC_LST.py b/magicctapipe/scripts/performance/make_irfs_MAGIC_LST.py deleted file mode 100644 index d75396d3..00000000 --- a/magicctapipe/scripts/performance/make_irfs_MAGIC_LST.py +++ /dev/null @@ -1,459 +0,0 @@ -import argparse -import logging -import operator -import os -import shutil -import time - -import astropy.units as u -import numpy as np -from astropy import table -from astropy.io import fits -from pyirf.benchmarks import angular_resolution, energy_bias_resolution -from pyirf.binning import ( - add_overflow_bins, - create_bins_per_decade, - create_histogram_table, -) -from pyirf.cut_optimization import optimize_gh_cut -from pyirf.cuts import calculate_percentile_cut, evaluate_binned_cut -from pyirf.io import ( - create_aeff2d_hdu, - create_background_2d_hdu, - create_energy_dispersion_hdu, - create_psf_table_hdu, - create_rad_max_hdu, -) -from pyirf.irf import ( - background_2d, - effective_area_per_energy, - energy_dispersion, - psf_table, -) -from pyirf.sensitivity import calculate_sensitivity, estimate_background -from pyirf.spectral import ( - CRAB_HEGRA, - IRFDOC_ELECTRON_SPECTRUM, - IRFDOC_PROTON_SPECTRUM, - PowerLaw, - calculate_event_weights, -) -from pyirf.utils import calculate_source_fov_offset, calculate_theta - -from magicctapipe.irfs.utils import ( - plot_irfs_MAGIC_LST, - read_dl2_mcp_to_pyirf_MAGIC_LST_list, -) -from magicctapipe.utils.filedir import check_folder, load_cfg_file -from magicctapipe.utils.utils import print_elapsed_time, print_title - -PARSER = argparse.ArgumentParser( - description="Apply random forests. For stereo data.", - formatter_class=argparse.ArgumentDefaultsHelpFormatter, -) -PARSER.add_argument( - "-cfg", - "--config_file", - type=str, - required=True, - help="Configuration file, yaml format", -) - - -def make_irfs_MAGIC_LST(config_file): - """Make IRFs for MAGIC and/or LST array using pyirf - - Parameters - ---------- - config_file : str - configuration file - """ - print_title("Make IRFs") - - cfg = load_cfg_file(config_file) - - consider_electron = cfg["irfs"].get("consider_electron", False) - - # --- Check out folder --- - check_folder(cfg["irfs"]["save_dir"]) - - log = logging.getLogger("pyirf") - - # --- Initial variables --- - # Observation time for sensitivity - T_OBS = cfg["irfs"]["T_OBS"] * u.hour - - # scaling between on and off region. - # Make off region 5 times larger than on region for better background statistics - ALPHA = cfg["irfs"]["ALPHA"] - - # Radius to use for calculating bg rate - MAX_BG_RADIUS = cfg["irfs"]["MAX_BG_RADIUS"] * u.deg - - # Gamma efficiency used for first calculation of the binned theta cuts - # initial theta cuts are calculated using a fixed g/h cut corresponding to this - # efficiency then g/h cuts are optimized after applying these initial theta cuts. - INITIAL_GH_CUT_EFFICENCY = cfg["irfs"]["INITIAL_GH_CUT_EFFICENCY"] - - # gamma efficiency used for gh cuts calculation - MAX_GH_CUT_EFFICIENCY = cfg["irfs"]["MAX_GH_CUT_EFFICIENCY"] - GH_CUT_EFFICIENCY_STEP = cfg["irfs"]["GH_CUT_EFFICIENCY_STEP"] - - MIN_GH_CUT_EFFICIENCY = cfg["irfs"].get( - "MIN_GH_CUT_EFFICIENCY", GH_CUT_EFFICIENCY_STEP - ) - cuts = cfg["irfs"].get("cuts", "") - # if "cuts" not in cfg["irfs"]: - # INTENSITY_CUT = cfg["irfs"]["INTENSITY_CUT"] - # LEAKAGE1_CUT = cfg["irfs"]["LEAKAGE1_CUT"] - - particles = { - "gamma": { - "file": cfg["irfs"]["gamma_dl2"], - "max_files": cfg["irfs"].get("max_files_gamma", 0), - "target_spectrum": CRAB_HEGRA, - }, - "proton": { - "file": cfg["irfs"]["proton_dl2"], - "max_files": cfg["irfs"].get("max_files_proton", 0), - "target_spectrum": IRFDOC_PROTON_SPECTRUM, - }, - } - if consider_electron: - particles["electron"] = { - "file": cfg["irfs"]["electron_dl2"], - "max_files": cfg["irfs"].get("max_files_electron", 0), - "target_spectrum": IRFDOC_ELECTRON_SPECTRUM, - } - - logging.basicConfig(level=logging.INFO) - logging.getLogger("pyirf").setLevel(logging.DEBUG) - - # Read hdf5 files into pyirf format - for particle_type, p in particles.items(): - log.info(f"Simulated {particle_type.title()} Events:") - p["events"], p["simulation_info"] = read_dl2_mcp_to_pyirf_MAGIC_LST_list( - file_mask=p["file"], - useless_cols=cfg["irfs"].get("useless_cols", []), - max_files=p["max_files"], - verbose=cfg["irfs"].get("verbose", False), - eval_mean_events=True, - cuts=cuts, - ) - - # # Applying cuts (if not already done) - # if "cuts" not in cfg["irfs"]: - # cut_mult = cfg["irfs"].get("cut_on_multiplicity", 2) - # p["events"] = p["events"][p["events"]["multiplicity"] >= cut_mult].copy() - # good_ = (p["events"]["intensity"] >= INTENSITY_CUT) & ( - # p["events"]["intensity_width_1"] <= LEAKAGE1_CUT - # ) - # p["events"] = p["events"][good_] - - l_ = len(p["events"]) - log.info(f"Number of events: {l_}") - p["events"]["particle_type"] = particle_type - - p["simulated_spectrum"] = PowerLaw.from_simulation(p["simulation_info"], T_OBS) - p["events"]["weight"] = calculate_event_weights( - p["events"]["true_energy"], p["target_spectrum"], p["simulated_spectrum"] - ) - for prefix in ("true", "reco"): - k = f"{prefix}_source_fov_offset" - p["events"][k] = calculate_source_fov_offset(p["events"], prefix=prefix) - - # calculate theta / distance between reco and assuemd source position - # we handle only ON observations here, so the assumed source position - # is the pointing position - p["events"]["theta"] = calculate_theta( - p["events"], - assumed_source_az=p["events"]["pointing_az"], - assumed_source_alt=p["events"]["pointing_alt"], - ) - log.info(p["simulation_info"]) - log.info("") - - gammas = particles["gamma"]["events"] - # background table composed of both electrons and protons - if consider_electron: - background = table.vstack( - [particles["proton"]["events"], particles["electron"]["events"]] - ) - else: - background = table.vstack([particles["proton"]["events"]]) - - INITIAL_GH_CUT = np.quantile(gammas["gh_score"], (1 - INITIAL_GH_CUT_EFFICENCY)) - log.info(f"Using fixed G/H cut of {INITIAL_GH_CUT} to calculate theta cuts") - - # event display uses much finer bins for the theta cut than - # for the sensitivity - - theta_bins = add_overflow_bins( - create_bins_per_decade( - e_min=10 ** (cfg["irfs"]["theta_bins_exp_low"]) * u.TeV, - e_max=10 ** (cfg["irfs"]["theta_bins_exp_high"]) * u.TeV, - bins_per_decade=cfg["irfs"]["theta_bins_num"], - ) - ) - - # theta cut is 68 percent containmente of the gammas - # for now with a fixed global, unoptimized score cut - mask_theta_cuts = gammas["gh_score"] >= INITIAL_GH_CUT - theta_cuts = calculate_percentile_cut( - gammas["theta"][mask_theta_cuts], - gammas["reco_energy"][mask_theta_cuts], - bins=theta_bins, - min_value=0.05 * u.deg, - fill_value=0.32 * u.deg, - max_value=0.32 * u.deg, - percentile=68, - ) - - # same bins as event display uses - sensitivity_bins = add_overflow_bins( - create_bins_per_decade( - e_min=10 ** (cfg["irfs"]["sensitivity_bins_exp_low"]) * u.TeV, - e_max=10 ** (cfg["irfs"]["sensitivity_bins_exp_high"]) * u.TeV, - bins_per_decade=cfg["irfs"]["sensitivity_bins_num"], - ) - ) - - log.info("Optimizing G/H separation cut for best sensitivity") - gh_cut_efficiencies = np.arange( - MIN_GH_CUT_EFFICIENCY, - MAX_GH_CUT_EFFICIENCY + GH_CUT_EFFICIENCY_STEP / 2, - GH_CUT_EFFICIENCY_STEP, - ) - - # Fixed the desired efficiencies, find the best gh cuts - sensitivity_step_2, gh_cuts = optimize_gh_cut( - gammas, - background, - reco_energy_bins=sensitivity_bins, - gh_cut_efficiencies=gh_cut_efficiencies, - op=operator.ge, - theta_cuts=theta_cuts, - alpha=ALPHA, - background_radius=MAX_BG_RADIUS, - ) - - # now that we have the optimized gh cuts, we recalculate the theta - # cut as 68 percent containment on the events surviving these cuts. - log.info("Recalculating theta cut for optimized GH Cuts") - for tab in (gammas, background): - tab["selected_gh"] = evaluate_binned_cut( - tab["gh_score"], tab["reco_energy"], gh_cuts, operator.ge - ) - - theta_cuts_opt = calculate_percentile_cut( - gammas[gammas["selected_gh"]]["theta"], - gammas[gammas["selected_gh"]]["reco_energy"], - theta_bins, - percentile=68, - fill_value=0.32 * u.deg, - max_value=0.32 * u.deg, - min_value=0.05 * u.deg, - ) - - gammas["selected_theta"] = evaluate_binned_cut( - gammas["theta"], gammas["reco_energy"], theta_cuts_opt, operator.le - ) - gammas["selected"] = gammas["selected_theta"] & gammas["selected_gh"] - - # calculate sensitivity - signal_hist = create_histogram_table( - gammas[gammas["selected"]], bins=sensitivity_bins - ) - background_hist = estimate_background( - background[background["selected_gh"]], - reco_energy_bins=sensitivity_bins, - theta_cuts=theta_cuts_opt, - alpha=ALPHA, - background_radius=MAX_BG_RADIUS, - ) - sensitivity = calculate_sensitivity(signal_hist, background_hist, alpha=ALPHA) - - # scale relative sensitivity by Crab flux to get the flux sensitivity - spectrum = particles["gamma"]["target_spectrum"] - for s in (sensitivity_step_2, sensitivity): - s["flux_sensitivity"] = s["relative_sensitivity"] * spectrum( - s["reco_energy_center"] - ) - - log.info("Calculating IRFs") - hdus = [ - fits.PrimaryHDU(), - fits.BinTableHDU(sensitivity, name="SENSITIVITY"), - fits.BinTableHDU(sensitivity_step_2, name="SENSITIVITY_STEP_2"), - fits.BinTableHDU(theta_cuts, name="THETA_CUTS"), - fits.BinTableHDU(theta_cuts_opt, name="THETA_CUTS_OPT"), - fits.BinTableHDU(gh_cuts, name="GH_CUTS"), - ] - - masks = { - "": gammas["selected"], - "_NO_CUTS": slice(None), - "_ONLY_GH": gammas["selected_gh"], - "_ONLY_THETA": gammas["selected_theta"], - } - - # binnings for the irfs - true_energy_bins = add_overflow_bins( - create_bins_per_decade(10**-1.9 * u.TeV, 10**2.31 * u.TeV, 10) - ) - reco_energy_bins = add_overflow_bins( - create_bins_per_decade(10**-1.9 * u.TeV, 10**2.31 * u.TeV, 5) - ) - fov_offset_bins = [0, 0.5] * u.deg - source_offset_bins = np.arange(0, 1 + 1e-4, 1e-3) * u.deg - energy_migration_bins = np.geomspace(0.2, 5, 200) - - for label, mask in masks.items(): - effective_area = effective_area_per_energy( - gammas[mask], - particles["gamma"]["simulation_info"], - true_energy_bins=true_energy_bins, - ) - hdus.append( - create_aeff2d_hdu( - effective_area[..., np.newaxis], # add one dimension for FOV offset - true_energy_bins, - fov_offset_bins, - extname="EFFECTIVE_AREA" + label, - ) - ) - edisp = energy_dispersion( - gammas[mask], - true_energy_bins=true_energy_bins, - fov_offset_bins=fov_offset_bins, - migration_bins=energy_migration_bins, - ) - hdus.append( - create_energy_dispersion_hdu( - edisp, - true_energy_bins=true_energy_bins, - migration_bins=energy_migration_bins, - fov_offset_bins=fov_offset_bins, - extname="ENERGY_DISPERSION" + label, - ) - ) - - bias_resolution = energy_bias_resolution( - gammas[gammas["selected"]], reco_energy_bins, energy_type="reco" - ) - ang_res = angular_resolution( - gammas[gammas["selected_gh"]], reco_energy_bins, energy_type="reco" - ) - ang_res = table.QTable(ang_res) - psf = psf_table( - gammas[gammas["selected_gh"]], - true_energy_bins, - fov_offset_bins=fov_offset_bins, - source_offset_bins=source_offset_bins, - ) - - background_rate = background_2d( - background[background["selected_gh"]], - reco_energy_bins, - fov_offset_bins=np.arange(0, 11) * u.deg, - t_obs=T_OBS, - ) - - hdus.append( - create_background_2d_hdu( - background_rate, - reco_energy_bins, - fov_offset_bins=np.arange(0, 11) * u.deg, - ) - ) - hdus.append( - create_psf_table_hdu( - psf, - true_energy_bins, - source_offset_bins, - fov_offset_bins, - ) - ) - hdus.append( - create_rad_max_hdu( - theta_cuts_opt["cut"][:, np.newaxis], theta_bins, fov_offset_bins - ) - ) - hdus.append(fits.BinTableHDU(ang_res, name="ANGULAR_RESOLUTION")) - hdus.append(fits.BinTableHDU(bias_resolution, name="ENERGY_BIAS_RESOLUTION")) - - # --- Evaluate gamma efficiency --- - gamma_efficiency = table.QTable() - for l_ in ["low", "center", "high"]: - gamma_efficiency[l_] = gh_cuts[l_] - for l_ in ["eff_gh", "eff"]: - gamma_efficiency[l_] = 0.0 - for i in range(len(gh_cuts["cut"])): - if gh_cuts["cut"][i] < 1: - m = np.logical_and( - gammas["true_energy"] > gh_cuts["low"][i], - gammas["true_energy"] < gh_cuts["high"][i], - ) - gammas_sel_en = gammas[m] - m_gh = gammas_sel_en["gh_score"] > gh_cuts["cut"][i] - gammas_sel_en_gh = gammas_sel_en[m_gh] - try: - gamma_efficiency["eff_gh"][i] = float( - len(gammas_sel_en_gh) / len(gammas_sel_en) - ) - gamma_efficiency["eff"][i] = float( - len(gammas_sel_en[gammas_sel_en["selected"] == True]) - / len(gammas_sel_en) - ) - except Exception: - gamma_efficiency["eff_gh"][i] = -1 - gamma_efficiency["eff"][i] = -1 - hdus.append(fits.BinTableHDU(gamma_efficiency, name="GAMMA_EFFICIENCY")) - - # --- Store results --- - log.info("Writing outputfile") - if "tag" in cfg["irfs"]: - tag = f'_{cfg["irfs"]["tag"]}' - else: - tag = "" - - if "cuts" in cfg["irfs"]: - # Find multiplicity cut - rm_dict = {" ": "", "<": "", ">": "", "=": "", "(": "", ")": ""} - cuts_list = cfg["irfs"]["cuts"].translate(str.maketrans(rm_dict)).split("&") - for s_ in cuts_list: - if "multiplicity" in s_: - cut_mult = int(s_.replace("multiplicity", "")) - - irfs_subdir = "IRFs_MinEff%s_cut%s%s/" % ( - str(MIN_GH_CUT_EFFICIENCY), - str(cut_mult), - tag, - ) - irfs_dir = os.path.join(cfg["irfs"]["save_dir"], irfs_subdir) - check_folder(irfs_dir) - log.info(f"Output directory: {irfs_dir}") - - fits.HDUList(hdus).writeto( - os.path.join(irfs_dir, "pyirf.fits.gz"), - overwrite=True, - ) - - shutil.copyfile( - kwargs["config_file"], - os.path.join(irfs_dir, os.path.basename(kwargs["config_file"])), - ) - - return irfs_dir - - -if __name__ == "__main__": - args = PARSER.parse_args() - kwargs = args.__dict__ - start_time = time.time() - - irfs_dir = make_irfs_MAGIC_LST(kwargs["config_file"]) - - plot_irfs_MAGIC_LST(kwargs["config_file"], irfs_dir) - - print_elapsed_time(start_time, time.time()) diff --git a/magicctapipe/scripts/performance/stereo_reco_MAGIC_LST.py b/magicctapipe/scripts/performance/stereo_reco_MAGIC_LST.py deleted file mode 100644 index b037cdb3..00000000 --- a/magicctapipe/scripts/performance/stereo_reco_MAGIC_LST.py +++ /dev/null @@ -1,475 +0,0 @@ -# coding: utf-8 - -import argparse -import copy -import glob -import os -import select -import sys -import time - -import astropy.units as u -import matplotlib.pyplot as plt -import scipy -from astropy.coordinates import AltAz, EarthLocation, SkyCoord -from astropy.time import Time -from ctapipe.calib import CameraCalibrator -from ctapipe.containers import ImageParametersContainer -from ctapipe.coordinates import TelescopeFrame -from ctapipe.image.cleaning import tailcuts_clean -from ctapipe.image.morphology import number_of_islands -from ctapipe.io import HDF5TableWriter, SimTelEventSource -from ctapipe.reco import HillasReconstructor -from ctapipe.visualization import ArrayDisplay - -from magicctapipe.image import MAGICClean, clean_image_params, get_num_islands_MAGIC -from magicctapipe.reco.stereo import ( - StereoInfoContainer, - check_stereo, - write_hillas, - write_stereo, -) -from magicctapipe.utils import calculate_impact, check_folder -from magicctapipe.utils.filedir import load_cfg_file, out_file_h5 -from magicctapipe.utils.tels import check_tel_ids -from magicctapipe.utils.utils import print_elapsed_time, print_title - -PARSER = argparse.ArgumentParser( - description="Stereo Reconstruction MAGIC + LST", - formatter_class=argparse.ArgumentDefaultsHelpFormatter, -) -PARSER.add_argument( - "-cfg", - "--config_file", - type=str, - required=True, - help="Configuration file, yaml format", -) -PARSER.add_argument( - "-mtr", - "--only_mc_train", - action="store_true", - required=False, - default=False, - help="Consider only mc train files", -) -PARSER.add_argument( - "-mte", - "--only_mc_test", - action="store_true", - required=False, - default=False, - help="Consider only mc test files", -) -PARSER.add_argument( - "-dtr", - "--only_data_train", - action="store_true", - required=False, - default=False, - help="Consider only data train files", -) -PARSER.add_argument( - "-dte", - "--only_data_test", - action="store_true", - required=False, - default=False, - help="Consider only data test files", -) -PARSER.add_argument( - "-d", - "--display", - action="store_true", - required=False, - default=False, - help="Display plots", -) - - -def call_stereo_reco_MAGIC_LST(kwargs): - """Stereo Reconstruction for MAGIC and/or LST array, looping on all given data - - Parameters - ---------- - kwargs : dict - parser options - """ - print_title("Stereo Reconstruction") - - cfg = load_cfg_file(kwargs["config_file"]) - - if kwargs["only_mc_train"]: - k1, k2 = ["mc"], ["train_sample"] - elif kwargs["only_mc_test"]: - k1, k2 = ["mc"], ["test_sample"] - elif kwargs["only_data_train"]: - k1, k2 = ["data"], ["train_sample"] - elif kwargs["only_data_test"]: - k1, k2 = ["data"], ["test_sample"] - else: - k1 = ["mc", "data"] - k2 = ["train_sample", "test_sample"] - - for k1_ in k1: - for k2_ in k2: - stereo_reco_MAGIC_LST(k1=k1_, k2=k2_, cfg=cfg, display=kwargs["display"]) - - -def stereo_reco_MAGIC_LST(k1, k2, cfg, display=False): - """Stereo Reconstruction for MAGIC and/or LST array - - Parameters - ---------- - k1 : str - first key in `cfg["data_files"][k1][k2]` - k2 : str - second key in `cfg["data_files"][k1][k2]` - cfg: dict - configurations loaded from configuration file - display : bool, optional - display plots, by default False - """ - - tel_ids, tel_ids_LST, tel_ids_MAGIC = check_tel_ids(cfg) - if len(tel_ids) < 2: - print("Select at least two telescopes in the MAGIC + LST array") - return - - consider_MAGIC = len(tel_ids_MAGIC) > 0 - - if consider_MAGIC: - use_MARS_cleaning = cfg["MAGIC"].get("use_MARS_cleaning", True) - - file_list = glob.glob(cfg["data_files"][k1][k2]["mask_sim"]) - print(file_list) - - previous_event_id = 0 - - # --- Loop on files --- - for file in file_list: - print(f"Analyzing file:\n{file}") - - # --- Output file DL1 --- - out_file = os.path.join( - os.path.dirname(cfg["data_files"][k1][k2]["hillas_h5"]), - out_file_h5(in_file=file), - ) - print(f"Output file:\n{out_file}") - check_folder(os.path.dirname(out_file)) - # Init the writer for the output file - writer = HDF5TableWriter(filename=out_file, group_name="dl1", overwrite=True) - - # --- Open simtel file --- - if "max_events_run" in cfg["all_tels"]: - max_events_run = cfg["all_tels"]["max_events_run"] - else: - max_events_run = 0 # I read all events in the file - - # Init source - source = SimTelEventSource(file, max_events=max_events_run) - - # Init calibrator, both for MAGIC and LST - calibrator = CameraCalibrator(subarray=source.subarray) - - hillas_reconstructor = HillasReconstructor(source.subarray) - tel_positions = source.subarray.positions - - # Init MAGIC MARS cleaning, if selected - if consider_MAGIC and use_MARS_cleaning: - magic_clean = MAGICClean( - camera=source.subarray.tel[tel_ids_MAGIC[0]].camera.geometry, - configuration=cfg["MAGIC"]["cleaning_config"], - ) - - # --- Write MC HEADER --- - # Problem: impossible to write/read with the following function a - # list, so we assign to run_array_direction an empty list, in order to - # make the sotware NOT write the run_array_direction - source.mc_header.run_array_direction = [] # dummy value - writer.write("mc_header", source.mc_header) - - if display: - fig, ax = plt.subplots() - go, first_time_display, cont = True, True, False - - # --- Loop on events in source --- - for event in source: - if previous_event_id == event.index.event_id: - continue - previous_event_id = copy.copy(event.index.event_id) - - if display and go: - print("Event %d" % event.count) - elif event.count % 100 == 0: - print("Event %d" % event.count) - - # Process only if I have at least two tel_ids of the selected array - # sel_tels: selected telescopes with data in the event - sel_tels = list(set(event.r0.tels_with_data).intersection(tel_ids)) - if len(sel_tels) < 2: - continue - - # Inits - hillas_p, leakage_p, timing_p = {}, {}, {} - telescope_pointings, time_grad, event_info = {}, {}, {} - - # --- Calibrate --- - # Call the calibrator, both for MAGIC and LST - calibrator(event) - - # Loop on triggered telescopes - for tel_id, dl1 in event.dl1.tel.items(): - # Exclude telescopes not selected - if tel_id not in sel_tels: - continue - try: - geom_camera_frame = source.subarray.tels[tel_id].camera.geometry - geom = geom_camera_frame.transform_to(TelescopeFrame()) - image = dl1.image # == event_image - peakpos = dl1.peak_time # == event_pulse_time - - # --- Cleaning --- - if geom.camera_name == cfg["LST"]["camera_name"]: - # Apply tailcuts clean on LST. From ctapipe - clean = tailcuts_clean( - geom=geom, image=image, **cfg["LST"]["cleaning_config"] - ) - # Ignore if less than n pixels after cleaning - if clean.sum() < cfg["LST"]["min_pixel"]: - continue - # Number of islands: LST. From ctapipe - num_islands, island_ids = number_of_islands( - geom=geom, mask=clean - ) - elif ( - geom.camera_name == cfg["MAGIC"]["camera_name"] - and not use_MARS_cleaning - ): - # Apply tailcuts clean on MAGIC. From ctapipe - clean = tailcuts_clean( - geom=geom, - image=image, - **cfg["MAGIC"]["cleaning_config_ctapipe"], - ) - # Ignore if less than n pixels after cleaning - if clean.sum() < cfg["MAGIC"]["min_pixel"]: - continue - # Number of islands: LST. From ctapipe - num_islands, island_ids = number_of_islands( - geom=geom, mask=clean - ) - elif ( - geom.camera_name == cfg["MAGIC"]["camera_name"] - and use_MARS_cleaning - ): - # Apply MAGIC MARS cleaning. From magic-cta-pipe - clean, image, peakpos = magic_clean.clean_image( - event_image=image, event_pulse_time=peakpos - ) - # Ignore if less than n pixels after cleaning - if clean.sum() < cfg["MAGIC"]["min_pixel"]: - continue - # Number of islands: MAGIC. From magic-cta-pipe - num_islands = get_num_islands_MAGIC( - camera=geom, clean_mask=clean, event_image=image - ) - else: - continue - # --- Analize cleaned image --- - # Evaluate Hillas, leakeage, timing - ( - hillas_p[tel_id], - leakage_p[tel_id], - timing_p[tel_id], - ) = clean_image_params( - geom=geom, image=image, clean=clean, peakpos=peakpos - ) - # Get time gradients - time_grad[tel_id] = timing_p[tel_id].slope.value - - if "cuts" in cfg["all_tels"]: - cuts = cfg["all_tels"]["cuts"] - if ( - (hillas_p[tel_id].intensity < cuts["intensity_low"]) - or (hillas_p[tel_id].intensity > cuts["intensity_high"]) - or (hillas_p[tel_id].length < cuts["length_low"]) - or ( - leakage_p[tel_id].intensity_width_1 - > cuts["intensity_width_1_high"] - ) - ): - hillas_p.pop(tel_id, None) - leakage_p.pop(tel_id, None) - timing_p.pop(tel_id, None) - continue - # position of the LST1 - location = EarthLocation.from_geodetic( - -17.89139 * u.deg, 28.76139 * u.deg, 2184 * u.m - ) - obstime = Time("2018-11-01T02:00") - horizon_frame = AltAz(location=location, obstime=obstime) - # Evaluate telescope pointings - telescope_pointings[tel_id] = SkyCoord( - alt=event.pointing.tel[tel_id].altitude, - az=event.pointing.tel[tel_id].azimuth, - frame=horizon_frame, - ) - - # Preparing metadata - event_info[tel_id] = StereoInfoContainer( - obs_id=event.index.obs_id, - event_id=scipy.int32(event.index.event_id), - tel_id=tel_id, - true_energy=event.mc.energy, - true_alt=event.mc.alt.to(u.rad), - true_az=event.mc.az.to(u.rad), - tel_alt=event.pointing.tel[tel_id].altitude.to(u.rad), - tel_az=event.pointing.tel[tel_id].azimuth.to(u.rad), - num_islands=num_islands, - ) - except Exception as e: - print(f"Image not reconstructed (tel_id={tel_id}):", e) - break - # --- END LOOP on tel_ids --- - - # --- Check if event is fine --- - # Ignore events with less than two telescopes - if len(hillas_p.keys()) < 2: - print(f"EVENT with LESS than 2 hillas_p (sel_tels={sel_tels})") - continue - - # Check hillas parameters for stereo reconstruction - if not check_stereo(event=event, tel_id=tel_id, hillas_p=hillas_p): - print("STEREO CHECK NOT PASSED") - continue - - tel_ids_written = list(event_info.keys()) - - for tel_id in tel_ids_written: - event.dl1.tel[tel_id].parameters = ImageParametersContainer( - hillas=hillas_p[tel_id] - ) - - hillas_reconstructor(event) - - stereo_params = event.dl2.stereo.geometry["HillasReconstructor"] - - if stereo_params.az < 0: - stereo_params.az += u.Quantity(360, u.deg) - - impact_p = calculate_impact( - core_x=stereo_params.core_x, - core_y=stereo_params.core_y, - az=stereo_params.az, - alt=stereo_params.alt, - tel_pos_x=tel_positions[tel_id][0], - tel_pos_y=tel_positions[tel_id][1], - tel_pos_z=tel_positions[tel_id][2], - ) - - # --- Store DL1 data --- - # Store hillas params - - # Loop on triggered telescopes - for tel_id in tel_ids_written: - # Write them - write_hillas( - writer=writer, - event_info=event_info[tel_id], - hillas_p=hillas_p[tel_id], - leakage_p=leakage_p[tel_id], - timing_p=timing_p[tel_id], - impact_p=impact_p[tel_id], - ) - - write_stereo( - stereo_params=stereo_params, - stereo_id=cfg["all_tels"]["stereo_id"], - event_info=event_info[tel_ids_written[0]], - writer=writer, - ) - - # --- Display plot --- - if display and go: - go, cont = _display_plots( - source=source, - event=event, - hillas_p=hillas_p, - time_grad=time_grad, - stereo_params=stereo_params, - first=first_time_display, - cont=cont, - fig=fig, - ax=ax, - ) - first_time_display = False - - # --- END LOOP event in source --- - - # --- Close DL1 writer --- - writer.close() - - # --- END LOOP file in file_list --- - - return - - -def _display_plots( - source, event, hillas_p, time_grad, stereo_params, first, cont, fig, ax -): - ax.set_xlabel("Distance (m)") - ax.set_ylabel("Distance (m)") - # Display the top-town view of the MAGIC-LST telescope array - disp = ArrayDisplay( - subarray=source.subarray, axes=ax, tel_scale=1, title="MAGIC-LST Monte Carlo" - ) - # # Set the vector angle and length from Hillas parameters - disp.set_vector_hillas( - hillas_dict=hillas_p, - time_gradient=time_grad, - angle_offset=event.pointing.array_azimuth, - length=500, - ) - # Estimated and true impact - plt.scatter( - event.mc.core_x, event.mc.core_y, s=20, c="k", marker="x", label="True Impact" - ) - plt.scatter( - stereo_params.core_x, - stereo_params.core_y, - s=20, - c="r", - marker="x", - label="Estimated Impact", - ) - plt.legend() - if first: - plt.show(block=False) - fig.canvas.draw() - fig.canvas.flush_events() - go = True - if not cont: - c = input("Press Enter to continue, s to stop, c to go continously: ") - plt.cla() - if not cont: - if c == "s": - go = False - elif c == "c": - print("Press Enter to stop the loop") - cont = True - if cont: - i, o, e = select.select([sys.stdin], [], [], 0.0001) - if i == [sys.stdin]: - go = False - input() - time.sleep(0.1) - return go, cont - - -if __name__ == "__main__": - args = PARSER.parse_args() - kwargs = args.__dict__ - start_time = time.time() - call_stereo_reco_MAGIC_LST(kwargs) - print_elapsed_time(start_time, time.time()) diff --git a/magicctapipe/scripts/performance/train_rfs_MAGIC_LST.py b/magicctapipe/scripts/performance/train_rfs_MAGIC_LST.py deleted file mode 100644 index 270d14ec..00000000 --- a/magicctapipe/scripts/performance/train_rfs_MAGIC_LST.py +++ /dev/null @@ -1,844 +0,0 @@ -# coding: utf-8 - -import argparse -import glob -import os -import time - -import matplotlib.pyplot as plt -import numpy as np -import pandas as pd -from astropy.coordinates.angle_utilities import angular_separation - -from magicctapipe.reco.classifier_utils import ( - check_train_test_intersections_classifier, - evaluate_performance_classifier, - get_weights_classifier, - load_init_data_classifier, - print_par_imp_classifier, -) -from magicctapipe.reco.direction_utils import compute_separation_angle_direction -from magicctapipe.reco.energy_utils import evaluate_performance_energy, plot_migmatrix -from magicctapipe.reco.event_processing import ( - DirectionEstimatorPandas, - EnergyEstimatorPandas, - EventClassifierPandas, -) -from magicctapipe.reco.global_utils import ( - check_train_test_intersections, - compute_event_weights, - get_weights_mc_dir_class, -) -from magicctapipe.utils.filedir import ( - check_folder, - load_cfg_file_check, - load_dl1_data_stereo_list, - load_dl1_data_stereo_list_selected, -) -from magicctapipe.utils.plot import ( - load_default_plot_settings, - load_default_plot_settings_02, - save_plt, -) -from magicctapipe.utils.tels import ( - check_tel_ids, - get_array_tel_descriptions, - get_tel_name, -) -from magicctapipe.utils.utils import info_message, print_elapsed_time, print_title - -PARSER = argparse.ArgumentParser( - description="Trains random forests for stereo data", - formatter_class=argparse.ArgumentDefaultsHelpFormatter, -) -PARSER.add_argument( - "-cfg", - "--config_file", - type=str, - required=True, - help="Configuration file, yaml format", -) -PARSER.add_argument( - "-c", - "--classifier", - action="store_true", - required=False, - default=False, - help="Train classifier random forests", -) -PARSER.add_argument( - "-e", - "--energy", - action="store_true", - required=False, - default=False, - help="Train energy random forests", -) -PARSER.add_argument( - "-d", - "--direction", - action="store_true", - required=False, - default=False, - help="Train direction random forests", -) -PARSER.add_argument( - "-a", - "--all", - action="store_true", - required=False, - default=False, - help="Train all random forests", -) -PARSER.add_argument( - "-p", - "--only_plots", - action="store_true", - required=False, - default=False, - help="Only plots (works only for classifier)", -) - - -def train_classifier_rf_stereo(config_file, only_plots=False): - print_title("TRAIN CLASSIFIER RFs") - - # --- Read the configuration file --- - cfg = load_cfg_file_check(config_file=config_file, label="classifier_rf") - - # --- Check output directory --- - check_folder(cfg["classifier_rf"]["save_dir"]) - - # --- Train sample --- - if not only_plots: - mc_data_train, bkg_data_train = load_init_data_classifier(cfg=cfg, mode="train") - - # --- Test sample --- - mc_data_test, bkg_data_test = load_init_data_classifier(cfg=cfg, mode="test") - - # --- Check intersections --- - if not only_plots: - # useful ONLY if test_file_n == 0 - wn_ = ( - "WARNING: test_file_n != 0, considering only a selection of the test sample" - ) - if "check_train_test" in cfg["classifier_rf"].keys(): - if cfg["classifier_rf"]["check_train_test"]: - info_message("Check train and test", prefix="ClassifierRF") - if cfg["classifier_rf"]["test_file_n"] > 0: - info_message(wn_, prefix="ClassifierRF") - test_passed = check_train_test_intersections_classifier( - mc_data_train=mc_data_train, - bkg_data_train=bkg_data_train, - mc_data_test=mc_data_test, - bkg_data_test=bkg_data_test, - ) - s_ = "Test PASSED" if test_passed else "Test NOT PASSED" - info_message(s_, prefix="ClassifierRF") - - if not only_plots: - # Computing event weights for train sample - alt_edges, intensity_edges = compute_event_weights() - - mc_weights, bkg_weights = get_weights_classifier( - mc_data_train, bkg_data_train, alt_edges, intensity_edges - ) - - mc_data_train = mc_data_train.join(mc_weights) - bkg_data_train = bkg_data_train.join(bkg_weights) - - # Merging the train sample - shower_data_train = mc_data_train.append(bkg_data_train) - - # Merging the test sample - shower_data_test = mc_data_test.append(bkg_data_test) - - info_message("Preprocessing...", prefix="ClassifierRF") - - # --- Data preparation --- - l_ = ["obs_id", "event_id"] - if not only_plots: - shower_data_train["multiplicity"] = ( - shower_data_train["intensity"].groupby(level=l_).count() - ) - shower_data_test["multiplicity"] = ( - shower_data_test["intensity"].groupby(level=l_).count() - ) - - # Applying the cuts - c_ = cfg["classifier_rf"]["cuts"] - if not only_plots: - shower_data_train = shower_data_train.query(c_) - shower_data_test = shower_data_test.query(c_) - - # --- Training the classifier RF --- - info_message("Training RF...", prefix="ClassifierRF") - - class_estimator = EventClassifierPandas( - cfg["classifier_rf"]["features"], **cfg["classifier_rf"]["settings"] - ) - if not only_plots: - class_estimator.fit(shower_data_train) - - # --- Save RF data to joblib file --- - class_estimator.save( - os.path.join( - cfg["classifier_rf"]["save_dir"], cfg["classifier_rf"]["joblib_name"] - ) - ) - else: - # Load the joblib RFs file - class_estimator.load( - os.path.join( - cfg["classifier_rf"]["save_dir"], cfg["classifier_rf"]["joblib_name"] - ) - ) - - # --- Show results --- - # Print Parameter importances Mono - info_message("Parameter importances", prefix="ClassifierRF") - print_par_imp_classifier(class_estimator) - - # Apply RF - info_message("Applying RF...", prefix="ClassifierRF") - # Mono - class_reco = class_estimator.predict(shower_data_test) - shower_data_test = shower_data_test.join(class_reco) - - # Evaluating performance - info_message("Evaluating performance...", prefix="ClassifierRF") - - idx = pd.IndexSlice - - performance = dict() - # tel_ids = shower_data_test.index.levels[2] - - tel_ids, tel_ids_LST, tel_ids_MAGIC = check_tel_ids(cfg) - - drop_na = True if not only_plots else False - - # Mean - performance[0] = evaluate_performance_classifier( - shower_data_test.loc[idx[:, :, tel_ids[0]], shower_data_test.columns], - class0_name="event_class_0_mean", - drop_na=drop_na, - ) - - # For tels - for tel_id in tel_ids: - performance[tel_id] = evaluate_performance_classifier( - shower_data_test.loc[idx[:, :, tel_id], shower_data_test.columns], - drop_na=drop_na, - ) - - # ================ - # === Plotting === - # ================ - load_default_plot_settings(grid_bool=False) - plt.figure(figsize=tuple(cfg["classifier_rf"]["fig_size"])) - # labels = ["Gamma", "Hadrons"] - labels = cfg["classifier_rf"].get("labels", ["Gamma", "Hadrons"]) - - grid_shape = (2, len(tel_ids) + 1) - - for tel_num, tel_id in enumerate(performance): - plt.subplot2grid(grid_shape, (0, tel_num)) - if tel_id == 0: - plt.title("Mean") - else: - n_ = get_tel_name(tel_id=tel_id, cfg=cfg) - plt.title(f"{n_} estimation") - plt.xlabel("Gammaness") - # plt.xlabel('Class 0 probability') - plt.ylabel("Event density") - - gammaness = performance[tel_id]["gammaness"] - print(performance[tel_id]["metrics"]) - - for class_i, event_class in enumerate(gammaness): - plt.step( - gammaness[event_class]["XEdges"][:-1], - gammaness[event_class]["Hist"], - where="post", - color=f"C{class_i}", - label=labels[class_i], - ) - # label=f'Class {event_class}') - - plt.step( - gammaness[event_class]["XEdges"][1:], - gammaness[event_class]["Hist"], - where="pre", - color=f"C{class_i}", - ) - - plt.fill_between( - gammaness[event_class]["XEdges"][:-1], - gammaness[event_class]["Hist"], - step="post", - color=f"C{class_i}", - alpha=0.3, - ) - - # value = performance[tel_id]["metrics"]["acc"] - # plt.text( - # 0.9, - # 0.9, - # f"acc={value:.2f}", - # ha="right", - # va="top", - # transform=plt.gca().transAxes, - # ) - - # value = performance[tel_id]["metrics"]["auc_roc"] - # plt.text( - # 0.9, - # 0.8, - # f"auc_roc={value:.2f}", - # ha="right", - # va="top", - # transform=plt.gca().transAxes, - # ) - - plt.legend() - - for tel_num, tel_id in enumerate(performance): - plt.subplot2grid(grid_shape, (1, tel_num)) - plt.semilogy() - if tel_id == 0: - # plt.title(f'Tel {tel_id} estimation') - plt.title("Mean") - elif tel_id == -1: - plt.title("Stereo") - else: - n_ = get_tel_name(tel_id=tel_id, cfg=cfg) - plt.title(f"{n_} estimation") - plt.xlabel("Gammaness") - # plt.xlabel('Class 0 probability') - plt.ylabel("Cumulative probability") - plt.ylim(1e-3, 1) - - gammaness = performance[tel_id]["gammaness"] - - for class_i, event_class in enumerate(gammaness): - plt.step( - gammaness[event_class]["XEdges"][:-1], - gammaness[event_class]["Cumsum"], - where="post", - color=f"C{class_i}", - label=labels[class_i], - ) - # label=f'Class {event_class}') - - plt.step( - gammaness[event_class]["XEdges"][1:], - gammaness[event_class]["Cumsum"], - where="pre", - color=f"C{class_i}", - ) - - plt.fill_between( - gammaness[event_class]["XEdges"][:-1], - gammaness[event_class]["Cumsum"], - step="post", - color=f"C{class_i}", - alpha=0.3, - ) - - plt.legend() - - plt.tight_layout() - save_plt( - n=cfg["classifier_rf"]["fig_name"], - rdir=cfg["classifier_rf"]["save_dir"], - vect="pdf", - ) - - plt.close() - - -def train_direction_rf_stereo(config_file): - print_title("TRAIN DIRECTION RFs") - - # --- Read the configuration file --- - cfg = load_cfg_file_check(config_file=config_file, label="direction_rf") - - mono_mode = len(cfg["all_tels"]["tel_ids"]) == 1 - - # --- Check output directory --- - check_folder(cfg["direction_rf"]["save_dir"]) - - # --- Train sample --- - info_message("Loading train data...", prefix="DirRF") - f_ = cfg["data_files"]["mc"]["train_sample"]["hillas_h5"] - f_ = f"{os.path.dirname(f_)}/*{os.path.splitext(f_)[1]}" - info_message(f"Loading train files with the following mask:\n{f_}", prefix="DirRF") - shower_data_train = load_dl1_data_stereo_list(glob.glob(f_), mono_mode=mono_mode) - - # --- Test sample --- - f_ = cfg["data_files"]["mc"]["test_sample"]["hillas_h5"] - f_ = f"{os.path.dirname(f_)}/*{os.path.splitext(f_)[1]}" - info_message(f"Loading test files with the following mask:\n{f_}", prefix="DirRF") - shower_data_test = load_dl1_data_stereo_list_selected( - file_list=glob.glob(f_), - sub_dict=cfg["direction_rf"], - file_n_key="test_file_n", - mono_mode=mono_mode, - ) - - # --- Check intersections --- - wt_ = "WARNING: check only on gammas; use it in classifier to check also protons" - # useful ONLY if test_file_n == 0 - wn_ = "WARNING: test_file_n != 0, considering only a selection of the test sample" - if "check_train_test" in cfg["direction_rf"].keys(): - if cfg["direction_rf"]["check_train_test"]: - info_message("Check train and test", prefix="DirRF") - info_message(wt_, prefix="DirRF") - if cfg["direction_rf"]["test_file_n"] > 0: - info_message(wn_, prefix="DirRF") - test_passed = check_train_test_intersections( - shower_data_train, shower_data_test - ) - s_ = "Test PASSED" if test_passed else "Test NOT PASSED" - info_message(s_, prefix="DirRF") - - # Computing event weights - info_message("Computing the train sample event weights...", prefix="DirRF") - alt_edges, intensity_edges = compute_event_weights() - - mc_weights = get_weights_mc_dir_class(shower_data_train, alt_edges, intensity_edges) - - shower_data_train = shower_data_train.join(mc_weights) - - tel_ids, tel_ids_LST, tel_ids_MAGIC = check_tel_ids(cfg) - - # --- Data preparation --- - l_ = ["obs_id", "event_id"] - shower_data_train["multiplicity"] = ( - shower_data_train["intensity"].groupby(level=l_).count() - ) - shower_data_test["multiplicity"] = ( - shower_data_test["intensity"].groupby(level=l_).count() - ) - - # Applying the cuts - shower_data_train = shower_data_train.query(cfg["direction_rf"]["cuts"]) - shower_data_test = shower_data_test.query(cfg["direction_rf"]["cuts"]) - - # --- MAGIC - LST description --- - array_tel_descriptions = get_array_tel_descriptions( - tel_ids_LST=tel_ids_LST, tel_ids_MAGIC=tel_ids_MAGIC - ) - - # --- Training the direction RF --- - info_message("Training the RF\n", prefix="DirRF") - - direction_estimator = DirectionEstimatorPandas( - cfg["direction_rf"]["features"], - array_tel_descriptions, - **cfg["direction_rf"]["settings"], - ) - direction_estimator.fit(shower_data_train) - - # --- Save RF data to joblib file --- - direction_estimator.save( - os.path.join( - cfg["direction_rf"]["save_dir"], cfg["direction_rf"]["joblib_name"] - ) - ) - - # Printing the parameter "importances" - for kind in direction_estimator.telescope_rfs: - rfs_ = direction_estimator.telescope_rfs[kind] - for tel_id in rfs_: - feature_importances = rfs_[tel_id].feature_importances_ - print(f" Kind: {kind}, tel_id: {tel_id}") - z = zip(cfg["direction_rf"]["features"][kind], feature_importances) - for feature, importance in z: - print(f" {feature:.<15s}: {importance:.4f}") - print("") - - # --- Applying RF to the "test" sample --- - info_message('Applying RF to the "test" sample', prefix="DirRF") - coords_reco = direction_estimator.predict(shower_data_test) - shower_data_test = shower_data_test.join(coords_reco) - - # --- Evaluating the performance --- - info_message("Evaluating the performance", prefix="DirRF") - separation_df = compute_separation_angle_direction(shower_data_test) - - # Energy-dependent resolution - info_message("Estimating the energy-dependent resolution", prefix="DirRF") - energy_edges = np.logspace(-1, 1.3, num=20) - energy = (energy_edges[1:] * energy_edges[:-1]) ** 0.5 - - energy_psf = dict() - # for i in range(3): - for i in [0] + tel_ids: - energy_psf[i] = np.zeros_like(energy) - - for ei in range(len(energy_edges) - 1): - cuts = f"(true_energy>= {energy_edges[ei]:.2e})" - cuts += f" & (true_energy < {energy_edges[ei+1]: .2e})" - # cuts += ' & (intensity > 100)' - # cuts += ' & (length > 0.05)' - if not mono_mode: - cuts += " & (multiplicity > 1)" - query = separation_df.query(cuts) - - # for pi in range(3): # OLD - for pi in [0] + tel_ids: - if pi > 0: - tel_id = pi - else: - tel_id = tel_ids[0] - try: - selection = query.loc[ - (slice(None), slice(None), tel_id), f"sep_{pi}" - ].dropna() - energy_psf[pi][ei] = np.percentile(selection, 68) - except Exception as e: - print(f"ERROR: {e}. Setting energy_psf to 0") - energy_psf[pi][ei] = 0 - - # Offset-dependent resolution - info_message("Estimating the offset-dependent resolution", prefix="DirRF") - offset = angular_separation( - separation_df["tel_az"], - separation_df["tel_alt"], - separation_df["true_az"], - separation_df["true_alt"], - ) - - separation_df["offset"] = np.degrees(offset) - - offset_edges = np.linspace(0, 1.3, num=10) - offset = (offset_edges[1:] * offset_edges[:-1]) ** 0.5 - - offset_psf = dict() - # for i in range(3): # OLD - for i in [0] + tel_ids: - offset_psf[i] = np.zeros_like(offset) - - for oi in range(len(offset_edges) - 1): - cuts = f"(offset >= {offset_edges[oi]:.2f})" - cuts += f" & (offset < {offset_edges[oi+1]:.2f})" - # cuts += ' & (intensity > 100)' - # cuts += ' & (length > 0.05)' - if not mono_mode: - cuts += " & (multiplicity > 1)" - query = separation_df.query(cuts) - - # for pi in range(3): - for pi in [0] + tel_ids: - if pi > 0: - tel_id = pi - else: - tel_id = tel_ids[0] - try: - selection = query.loc[ - (slice(None), slice(None), tel_id), [f"sep_{pi}"] - ].dropna() - offset_psf[pi][oi] = np.percentile(selection[f"sep_{pi}"], 68) - except Exception as e: - print(f"ERROR: {e}. Setting offset_psf to 0") - offset_psf[pi][oi] = 0 - - # ================ - # === Plotting === - # ================ - load_default_plot_settings(grid_bool=False) - plt.figure(figsize=tuple(cfg["direction_rf"]["fig_size"])) - # plt.style.use('presentation') - - plt.xlabel(r"$\theta^2$, deg$^2$") - - # for tel_id in [0, 1, 2]: - grid_shape = (len(tel_ids) + 1, 2) - for index, tel_id in enumerate([0] + tel_ids): - plt.subplot2grid(grid_shape, (index, 0)) - if tel_id == 0: - plt.title("Total") - else: - plt.title(get_tel_name(tel_id=tel_id, cfg=cfg)) - plt.xlabel(r"$\theta^2$, deg$^2$") - # plt.semilogy() - plt.hist( - separation_df[f"sep_{tel_id}"] ** 2, - bins=100, - range=(0, 0.5), - density=True, - alpha=0.1, - color="C0", - ) - plt.hist( - separation_df[f"sep_{tel_id}"] ** 2, - bins=100, - range=(0, 0.5), - density=True, - histtype="step", - color="C0", - ) - plt.grid(linestyle=":") - - plt.subplot2grid(grid_shape, (index, 1)) - plt.xlabel(r"$\theta$, deg") - plt.xlim(0, 2.0) - plt.hist( - separation_df[f"sep_{tel_id}"], - bins=400, - range=(0, 5), - cumulative=True, - density=True, - alpha=0.1, - color="C0", - ) - plt.hist( - separation_df[f"sep_{tel_id}"], - bins=400, - range=(0, 5), - cumulative=True, - density=True, - histtype="step", - color="C0", - ) - plt.grid(linestyle=":") - - plt.tight_layout() - save_plt( - n=cfg["direction_rf"]["fig_name_theta2"], - rdir=cfg["direction_rf"]["save_dir"], - vect="pdf", - ) - plt.close() - - plt.clf() - - plt.semilogx() - plt.xlabel("Energy [TeV]") - plt.ylabel(r"$\sigma_{68}$ [deg]") - plt.ylim(0, 1.0) - plt.plot(energy, energy_psf[0], linewidth=4, label="Total") - for tel_id in tel_ids: - for i, tel_label in enumerate(cfg["all_tels"]["tel_n"]): - if tel_id in cfg[tel_label]["tel_ids"]: - l_ = get_tel_name(tel_id=tel_id, cfg=cfg) - plt.plot(energy, energy_psf[tel_id], label=l_) - plt.grid(linestyle=":") - plt.legend() - save_plt( - n=cfg["direction_rf"]["fig_name_PSF_energy"], - rdir=cfg["direction_rf"]["save_dir"], - vect="pdf", - ) - plt.close() - - plt.clf() - plt.xlabel("Offset [deg]") - plt.ylabel(r"$\sigma_{68}$ [deg]") - plt.ylim(0, 0.5) - plt.plot(offset, offset_psf[0], linewidth=4, label="Total") - for tel_id in tel_ids: - for i, tel_label in enumerate(cfg["all_tels"]["tel_n"]): - if tel_id in cfg[tel_label]["tel_ids"]: - l_ = get_tel_name(tel_id=tel_id, cfg=cfg) - plt.plot(offset, offset_psf[tel_id], label=l_) - plt.grid(linestyle=":") - plt.legend() - save_plt( - n=cfg["direction_rf"]["fig_name_PSF_offset"], - rdir=cfg["direction_rf"]["save_dir"], - vect="pdf", - ) - plt.close() - - -def train_energy_rf_stereo(config_file, only_plots=False): - print_title("TRAIN ENERGY RFs") - - # --- Read the configuration file --- - cfg = load_cfg_file_check(config_file=config_file, label="energy_rf") - - mono_mode = len(cfg["all_tels"]["tel_ids"]) == 1 - - # --- Check output directory --- - check_folder(cfg["energy_rf"]["save_dir"]) - - # --- Train sample --- - if not only_plots: - f_ = cfg["data_files"]["mc"]["train_sample"]["hillas_h5"] - f_ = f"{os.path.dirname(f_)}/*{os.path.splitext(f_)[1]}" - info_message("Loading train data...", prefix="EnergyRF") - info_message( - f"Loading train data with the following mask: \n{f_}", prefix="EnergyRF" - ) - info_message(f"Loading files with the following mask:\n{f_}", prefix="EnergyRF") - shower_data_train = load_dl1_data_stereo_list( - glob.glob(f_), mono_mode=mono_mode - ) - - # --- Test sample --- - f_ = cfg["data_files"]["mc"]["test_sample"]["hillas_h5"] - f_ = f"{os.path.dirname(f_)}/*{os.path.splitext(f_)[1]}" - info_message(f"Loading test data with the following mask:\n{f_}", prefix="EnergyRF") - # shower_data_test = load_dl1_data_stereo_list(glob.glob(f_)) - shower_data_test = load_dl1_data_stereo_list_selected( - file_list=glob.glob(f_), - sub_dict=cfg["energy_rf"], - file_n_key="test_file_n", - mono_mode=mono_mode, - ) - - # --- Check intersections --- - if not only_plots: - wt_ = ( - "WARNING: check only on gammas; use it in classifier to check also protons" - ) - # useful ONLY if test_file_n == 0 - wn_ = ( - "WARNING: test_file_n != 0, considering only a selection of the test sample" - ) - if "check_train_test" in cfg["energy_rf"].keys(): - if cfg["energy_rf"]["check_train_test"]: - info_message("Check train and test", prefix="EnergyRF") - info_message(wt_, prefix="EnergyRF") - if cfg["energy_rf"]["test_file_n"] > 0: - info_message(wn_, prefix="EnergyRF") - test_passed = check_train_test_intersections( - shower_data_train, shower_data_test - ) - s_ = "Test PASSED" if test_passed else "Test NOT PASSED" - info_message(s_, prefix="EnergyRF") - - # Computing event weights - if not only_plots: - info_message("Computing the train sample event weights...", prefix="EnergyRF") - alt_edges, intensity_edges = compute_event_weights() - - mc_weights = get_weights_mc_dir_class( - shower_data_train, alt_edges, intensity_edges - ) - - shower_data_train = shower_data_train.join(mc_weights) - - tel_ids, tel_ids_LST, tel_ids_MAGIC = check_tel_ids(cfg) - - # --- Data preparation --- - l_ = ["obs_id", "event_id"] - if not only_plots: - shower_data_train["multiplicity"] = ( - shower_data_train["intensity"].groupby(level=l_).count() - ) - shower_data_test["multiplicity"] = ( - shower_data_test["intensity"].groupby(level=l_).count() - ) - - # Applying the cuts - if not only_plots: - shower_data_train = shower_data_train.query(cfg["energy_rf"]["cuts"]) - shower_data_test = shower_data_test.query(cfg["energy_rf"]["cuts"]) - - # --- Training the direction RF --- - info_message("Training the RF\n", prefix="EnergyRF") - - energy_estimator = EnergyEstimatorPandas( - cfg["energy_rf"]["features"], **cfg["energy_rf"]["settings"] - ) - if not only_plots: - energy_estimator.fit(shower_data_train) - - # --- Save RF data to joblib file --- - energy_estimator.save( - os.path.join(cfg["energy_rf"]["save_dir"], cfg["energy_rf"]["joblib_name"]) - ) - # energy_estimator.load(cfg['energy_rf']['save_name']) - else: - # Load the joblib RFs file - energy_estimator.load( - os.path.join(cfg["energy_rf"]["save_dir"], cfg["energy_rf"]["joblib_name"]) - ) - - info_message("Parameter importances", prefix="EnergyRF") - print("") - r_ = energy_estimator.telescope_regressors - for tel_id in r_: - feature_importances = r_[tel_id].feature_importances_ - print(f" tel_id: {tel_id}") - z_ = zip(energy_estimator.feature_names, feature_importances) - for feature, importance in z_: - print(f" {feature:.<15s}: {importance:.4f}") - print("") - - info_message("Applying RF...", prefix="EnergyRF") - energy_reco = energy_estimator.predict(shower_data_test) - shower_data_test = shower_data_test.join(energy_reco) - - # Evaluating performance - info_message("Evaluating performance...", prefix="EnergyRF") - - idx = pd.IndexSlice - - tel_migmatrix = {} - for tel_id in tel_ids: - tel_migmatrix[tel_id] = evaluate_performance_energy( - shower_data_test.loc[idx[:, :, tel_id], ["true_energy", "energy_reco"]], - "energy_reco", - ) - - migmatrix = evaluate_performance_energy(shower_data_test, "energy_reco_mean") - - # ================ - # === Plotting === - # ================ - load_default_plot_settings_02(grid_bool=False) - plt.figure(figsize=tuple(cfg["energy_rf"]["fig_size"])) - - grid_shape = (2, len(tel_ids) + 1) - # --- PLOT --- - for index, tel_id in enumerate(tel_ids): - for i, tel_label in enumerate(cfg["all_tels"]["tel_n"]): - if tel_id in cfg[tel_label]["tel_ids"]: - n = cfg["all_tels"]["tel_n_short"][i] - j = tel_id - cfg[tel_label]["tel_ids"][0] + 1 - name = f"{n}{j}" - plot_migmatrix( - index=index, name=name, matrix=tel_migmatrix[tel_id], grid_shape=grid_shape - ) - index += 1 - # --- GLOBAL --- - plot_migmatrix(index=index, name="All", matrix=migmatrix, grid_shape=grid_shape) - - plt.tight_layout() - save_plt( - n=cfg["energy_rf"]["fig_name"], - rdir=cfg["energy_rf"]["save_dir"], - vect="pdf", - ) - plt.close() - - -if __name__ == "__main__": - args = PARSER.parse_args() - kwargs = args.__dict__ - start_time = time.time() - - do_classifier = kwargs["all"] or kwargs["classifier"] - do_energy = kwargs["all"] or kwargs["energy"] - do_direction = kwargs["all"] or kwargs["direction"] - - if not (do_classifier or do_energy or do_direction): - print("No options selected") - print("Type -h or --help for information on how to run the script") - - if do_classifier: - train_classifier_rf_stereo( - config_file=kwargs["config_file"], only_plots=kwargs["only_plots"] - ) - if do_energy: - train_energy_rf_stereo( - config_file=kwargs["config_file"], only_plots=kwargs["only_plots"] - ) - if do_direction: - train_direction_rf_stereo(config_file=kwargs["config_file"]) - - print_elapsed_time(start_time, time.time()) diff --git a/magicctapipe/utils/__init__.py b/magicctapipe/utils/__init__.py index dbdf54fc..90c63b6c 100644 --- a/magicctapipe/utils/__init__.py +++ b/magicctapipe/utils/__init__.py @@ -1,22 +1,5 @@ from .badpixels import MAGICBadPixelsCalc from .camera_geometry import reflected_camera_geometry, scale_camera_geometry -from .filedir import ( - check_common_keys, - check_folder, - convert_np_list_dict, - drop_keys, - load_cfg_file, - load_cfg_file_check, - load_dl1_data_mono, - load_dl1_data_stereo, - load_dl1_data_stereo_list, - load_dl1_data_stereo_list_selected, - out_file_h5, - out_file_h5_no_run, - out_file_h5_reco, - read_mc_header, - save_yaml_np, -) from .functions import ( HEIGHT_ORM, LAT_ORM, @@ -27,73 +10,26 @@ calculate_off_coordinates, transform_altaz_to_radec, ) -from .gti import GTIGenerator, identify_time_edges, intersect_time_intervals -from .plot import load_default_plot_settings, load_default_plot_settings_02, save_plt -from .tels import ( - check_tel_ids, - convert_positions_dict, - get_array_tel_descriptions, - get_tel_descriptions, - get_tel_ids_dl1, - get_tel_name, - intersec_tel_ids, - num_2_tel_ids, - tel_ids_2_num, -) -from .utils import ( +from .gti import ( + GTIGenerator, + identify_time_edges, info_message, - make_elapsed_time_str, - make_title_str, - print_elapsed_time, - print_title, - resource_file, + intersect_time_intervals, ) __all__ = [ "MAGICBadPixelsCalc", "scale_camera_geometry", "reflected_camera_geometry", - "load_cfg_file", - "load_cfg_file_check", - "check_folder", - "load_dl1_data_stereo_list_selected", - "load_dl1_data_stereo_list", - "load_dl1_data_stereo", - "load_dl1_data_mono", - "drop_keys", - "check_common_keys", - "out_file_h5_no_run", - "out_file_h5", - "out_file_h5_reco", - "read_mc_header", - "save_yaml_np", - "convert_np_list_dict", "identify_time_edges", "intersect_time_intervals", "GTIGenerator", + "info_message", "calculate_disp", "calculate_impact", "calculate_mean_direction", "calculate_off_coordinates", "transform_altaz_to_radec", - "save_plt", - "load_default_plot_settings", - "load_default_plot_settings_02", - "tel_ids_2_num", - "num_2_tel_ids", - "get_tel_descriptions", - "get_array_tel_descriptions", - "get_tel_ids_dl1", - "convert_positions_dict", - "check_tel_ids", - "intersec_tel_ids", - "get_tel_name", - "info_message", - "print_elapsed_time", - "make_elapsed_time_str", - "print_title", - "make_title_str", - "resource_file", "LON_ORM", "LAT_ORM", "HEIGHT_ORM", diff --git a/magicctapipe/utils/filedir.py b/magicctapipe/utils/filedir.py deleted file mode 100644 index dbe9f519..00000000 --- a/magicctapipe/utils/filedir.py +++ /dev/null @@ -1,416 +0,0 @@ -import copy -import os -import sys - -import numpy as np -import pandas as pd -import yaml - -__all__ = [ - "load_cfg_file", - "load_cfg_file_check", - "check_folder", - "load_dl1_data_stereo_list_selected", - "load_dl1_data_stereo_list", - "load_dl1_data_stereo", - "load_dl1_data_mono", - "drop_keys", - "check_common_keys", - "out_file_h5_no_run", - "out_file_h5", - "out_file_h5_reco", - "read_mc_header", - "save_yaml_np", - "convert_np_list_dict", -] - - -def load_cfg_file(config_file): - """Loads the configuration file (yaml format) - - Parameters - ---------- - config_file : str - configuration file, yaml format - - Returns - ------- - dict - loaded configurations - """ - print(f"Loading configuration file\n{config_file}") - e_ = ( - "ERROR: can not load the configuration file %s\n" - "Please check that the file exists and is of YAML format\n" - "Exiting" - ) - try: - cfg = yaml.safe_load(open(config_file, "r")) - except IOError: - print(e_ % config_file) - sys.exit() - return cfg - - -def load_cfg_file_check(config_file, label): - """Loads the configuration file (yaml format) and checks that the label - section is present in the given file, otherwise it exits - - Parameters - ---------- - config_file : str - configuration file, yaml format - label : str - label for the desired section - - Returns - ------- - dict - loaded configurations - """ - - l_ = "ERROR: the configuration file is missing the %s section.\n" "Exiting" - cfg = load_cfg_file(config_file) - if label not in cfg: - print(l_ % label) - sys.exit() - return cfg - - -def check_folder(folder): - """Checks if folder exists; if not, it will be created - - Parameters - ---------- - folder : str - folder name (with path) - """ - if not os.path.exists(folder): - print("Directory %s does not exist, creating it..." % folder) - try: - os.makedirs(folder) - except Exception as e: - print(f"ERROR, folder not created. {e}") - - -def load_dl1_data_stereo_list_selected( - file_list, sub_dict, file_n_key="file_n", drop=False, mono_mode=False -): - """Loads dl1 data hillas and stereo and merge them togheter, from `file_list`. - If in `sub_dict` it finds the `file_n_key` key, and if the given number is > 0, it - limits the `file_list` lenght to the given number. Useful to make random forests - plot on a smaller test sample - - Parameters - ---------- - file_list : string - file_list - sub_dict : dict - sub-dictionary loaded from config file (e.g. cfg["direction_rf"]) - file_n_key : str, optional - file number key, by default "file_n" - drop : bool, optional - drop extra keys, by default False - mono_mode : bool, optional - mono mode, by default False - - Returns - ------- - pandas.DataFrame - data - """ - if file_n_key in sub_dict.keys(): - n = sub_dict[file_n_key] - if n > 0: - file_list = file_list[:n] - data = load_dl1_data_stereo_list(file_list, drop, mono_mode=mono_mode, verbose=True) - return data - - -def load_dl1_data_stereo_list(file_list, drop=False, verbose=False, mono_mode=False): - """Loads dl1 data hillas and stereo and merge them togheter, from `file_list` - - Parameters - ---------- - file_list : string - file_list - drop : bool, optional - drop extra keys, by default False - verbose : bool, optional - print file list, by default False - mono_mode : bool, optional - mono mode, by default False - - - Returns - ------- - pandas.DataFrame - data - """ - if verbose: - fl = "\n".join(file_list) - print(f"File list:\n{fl}") - first_time = True - for i, file in enumerate(file_list): - try: - if not mono_mode: - data_ = load_dl1_data_stereo(file, drop) - else: - data_ = load_dl1_data_mono(file) - except Exception as e: - print(f"LOAD FAILED with file {file}", e) - continue - if first_time: - data = data_ - first_time = False - else: - data = data.append(data_) - return data - - -def load_dl1_data_stereo(file, drop=False, slope_abs=False): - """Loads dl1 data (hillas and stereo) from `file` and merge them togheter - - Parameters - ---------- - file : string - file - drop : bool, optional - drop extra keys, by default False - slope_abs : bool, optional - get only the absolute value of slope, by default False - - - Returns - ------- - pandas.DataFrame - data - """ - extra_keys = [ - "true_energy", - "true_alt", - "true_az", - "mjd", - "goodness_of_fit", - "h_max_uncert", - "az_uncert", - "core_uncert", - "true_core_x", - "true_core_y", - ] - extra_stereo_keys = ["tel_alt", "tel_az", "num_islands", "n_islands", "tel_id"] - common_keys = ["obs_id", "event_id", "true_energy", "true_alt", "true_az"] - try: - # Hillas - data_hillas = pd.read_hdf(file, key="dl1/hillas_params") - # Stereo - data_stereo = pd.read_hdf(file, key="dl1/stereo_params") - # Drop extra stereo keys - data_stereo = drop_keys(data_stereo, extra_stereo_keys) - # Drop extra keys - if drop: - data_hillas = drop_keys(data_hillas, extra_keys) - data_stereo = drop_keys(data_stereo, extra_keys) - # Check common keys - common_keys = check_common_keys(data_hillas, data_stereo, common_keys) - # Merge - data = data_hillas.merge(data_stereo, on=common_keys) - # Index - data.set_index(["obs_id", "event_id", "tel_id"], inplace=True) - except Exception: - data = pd.read_hdf(file, key="dl2/reco") - data.sort_index(inplace=True) - # Get absolute value of slope, if needed - if slope_abs: - data["slope"] = abs(data["slope"]) - return data - - -def load_dl1_data_mono(file, label="hillas_params", slope_abs=False): - """Loads `dl1/{label}` from dl1 file, h5 format for mono data - - Parameters - ---------- - file : str - file name - label : str, optional - dl1 label, by default 'hillas_params' - slope_abs : bool, optional - get only the absolute value of slope, by default False - - Returns - ------- - pandas.DataFrame - data - """ - data = pd.read_hdf(file, key=f"dl1/{label}") - data.set_index(["obs_id", "event_id", "tel_id"], inplace=True) - data.sort_index(inplace=True) - # Get absolute value of slope, if needed - if slope_abs: - data["slope"] = abs(data["slope"]) - return data - - -def drop_keys(df, extra_keys): - """Drops extrakeys from pandas dataframe, without crashing if they are not - present in the dataframe - - Parameters - ---------- - df : pandas.DataFrame - dataframe - extra_keys : list - list of keys to be dropped - - Returns - ------- - pandas.DataFrame - dataframe without extra keys - """ - for extra_key in extra_keys: - if extra_key in df.columns: - df.drop(extra_key, axis=1, inplace=True) - return df - - -def check_common_keys(df1, df2, common_keys): - """Check if `common_keys` exist both in `df1` and in `df2`, and return sub-list - of `common_keys` which really exist in both dataframe - - Parameters - ---------- - df1 : pandas.DataFrame - dataframe - df2 : pandas.DataFrame - dataframe - common_keys : list - list of common keys to be checked - - Returns - ------- - list - list of common keys - """ - common_keys_checked = [] - for k in common_keys: - if k in df1.columns and k in df2.columns: - common_keys_checked.append(k) - return common_keys_checked - - -def out_file_h5_no_run(in_file, li, hi): - """Returns the h5 output file name, from a simtel.gz input file, without run number - - Parameters - ---------- - in_file : str - Input file - li : int - low index - hi : int - high index - - Returns - ------- - str - h5 output file, absolute path - """ - f = os.path.basename(in_file) - out = "_".join(f.split("_")[:li] + f.split("_")[hi:]) - out = "%s.h5" % out.rstrip(".simtel.gz") - out = os.path.join(os.path.dirname(in_file), out) - return out - - -def out_file_h5(in_file): - """Returns the h5 output file name, from a simtel.gz input file. Only file name, - without path - - Parameters - ---------- - in_file : str - Input file - - Returns - ------- - str - h5 output file, without path - """ - f = os.path.basename(in_file) - out = "%s.h5" % f.rstrip(".simtel.gz") - return out - - -def out_file_h5_reco(in_file): - """Returns the h5 reco output file name, from a h5 dl1 input file. Only file name, - without path - - Parameters - ---------- - in_file : str - Input file - - Returns - ------- - str - h5 output file, without path - """ - f = os.path.basename(in_file) - # out = "%s_reco.h5" % f.rstrip(".h5") - # Remove ".h5" - out = "%s_reco.h5" % f[:-3] - return out - - -def read_mc_header(file, mc_header_key="dl2/mc_header"): - """Function to read `mc_header` from DL2 file - - Parameters - ---------- - file : str - file - mc_header_key : str, optional - mc_header key, by default "dl2/mc_header" - - Returns - ------- - pandas.DataFrame - mc_header - """ - return pd.read_hdf(file, key=mc_header_key) - - -def save_yaml_np(data, file): - """Save dictionary to yaml file, converting `np.array` objects to `list` - - Parameters - ---------- - data : dict - data to be saved - file : str - yaml file name - """ - with open(file, "w") as f_: - yaml.dump(convert_np_list_dict(copy.deepcopy(data)), f_) - - -def convert_np_list_dict(d): - """Loop on dictionary and convert `np.array` objects to list - - Parameters - ---------- - d : dict - input dictionary - - Returns - ------- - dict - output dictionary - """ - for k in d.keys(): - if type(d[k]) == dict: - convert_np_list_dict(d[k]) - elif type(d[k]) == np.ndarray: - d[k] = d[k].tolist() - return d diff --git a/magicctapipe/utils/gti.py b/magicctapipe/utils/gti.py index 7a6fad5a..7d6e8cfc 100644 --- a/magicctapipe/utils/gti.py +++ b/magicctapipe/utils/gti.py @@ -1,18 +1,33 @@ # coding: utf-8 +import datetime + import pandas import scipy import uproot -from .utils import info_message - __all__ = [ + "info_message", "identify_time_edges", "intersect_time_intervals", "GTIGenerator", ] +def info_message(text, prefix="info"): + """Prints the specified text with the prefix of the current date + + Parameters + ---------- + text : str + text + prefix : str, optional + prefix, by default 'info' + """ + date_str = datetime.datetime.now().strftime("%Y-%m-%dT%H:%M:%S") + print(f"({prefix:s}) {date_str:s}: {text:s}") + + def identify_time_edges(times, criterion, max_time_diff=6.9e-4): """ Identifies the time interval edges, corresponding to the True diff --git a/magicctapipe/utils/plot.py b/magicctapipe/utils/plot.py deleted file mode 100644 index 762acae8..00000000 --- a/magicctapipe/utils/plot.py +++ /dev/null @@ -1,63 +0,0 @@ -import os - -import matplotlib.pylab as plt - -__all__ = [ - "save_plt", - "load_default_plot_settings", - "load_default_plot_settings_02", -] - - -def save_plt(n, rdir="", vect="pdf,eps"): - """Save plot in the required formats - - Parameters - ---------- - n : str - plot name - rdir : str, optional - directory, by default '' - vect : str, optional - vectory formats, leave empty to save only png, by default "pdf,eps" - """ - if os.path.exists(os.path.dirname(rdir)): - for vect_ in vect.split(","): - if vect_ == "pdf" or vect_ == "eps": - plt.savefig(os.path.join(rdir, "%s.%s" % (n, vect_))) - plt.savefig(os.path.join(rdir, "%s.png" % (n)), dpi=300) - else: - print("Figure NOT saved, directory doesn't exist") - return - - -def load_default_plot_settings(grid_bool=True): - """Load default plot settings""" - params = { - "figure.figsize": (10, 6), - "savefig.bbox": "tight", - "axes.grid": grid_bool, - "errorbar.capsize": 3, - "axes.titlesize": 16, - "axes.labelsize": 16, - "xtick.labelsize": 14, - "ytick.labelsize": 14, - "legend.fontsize": 14, - } - plt.rcParams.update(params) - - -def load_default_plot_settings_02(grid_bool=True): - """Load default plot settings""" - params = { - "figure.figsize": (10, 6), - "savefig.bbox": "tight", - "axes.grid": grid_bool, - "errorbar.capsize": 3, - "axes.titlesize": 24, - "axes.labelsize": 24, - "xtick.labelsize": 26, - "ytick.labelsize": 26, - "legend.fontsize": 26, - } - plt.rcParams.update(params) diff --git a/magicctapipe/utils/tels.py b/magicctapipe/utils/tels.py deleted file mode 100644 index 7d83ed31..00000000 --- a/magicctapipe/utils/tels.py +++ /dev/null @@ -1,252 +0,0 @@ -import numpy as np -from astropy import units as u -from ctapipe.instrument import CameraGeometry, OpticsDescription, TelescopeDescription - -__all__ = [ - "tel_ids_2_num", - "num_2_tel_ids", - "get_tel_descriptions", - "get_array_tel_descriptions", - "get_tel_ids_dl1", - "convert_positions_dict", - "check_tel_ids", - "intersec_tel_ids", - "get_tel_name", -] - - -def tel_ids_2_num(tel_ids): - """Function to convert a list of tel_ids into a decimal number wich can be - stored in a h5 file. - The number is computed on a binary basis, assigning 1 to a triggered - telescope, 0 to a non-triggered one. - For example, if in the array the triggered telescopes are 2, 3 and 5: - - [6 5 4 3 2 1] all telescope ids of the array - - [ 5 3 2 ] triggered telescopes (tel_ids) - - [0 1 0 1 1 0] triggered telescopes (binary) - - the number is saved as decimal, therefore: 2**5 + 2**3 + 2**2 = 44 - Please note that the number does not depend on the presence or absence - of non-triggered telescopes in the array - - Parameters - ---------- - tel_ids : list - list of triggered telescope ids - - Returns - ------- - int - decimal number computed as follow - """ - return sum([2**a_ for a_ in tel_ids]) - - -def num_2_tel_ids(num): - """Inverse function of tel_ids_2_num() - - Parameters - ---------- - num : int - decimal number computed by tel_ids_2_num() - - Returns - ------- - list - list of triggered telescope ids - """ - return np.where(np.array(list(bin(num)[2:][::-1])) == "1")[0] - - -def get_tel_descriptions(name, cam, tel_ids): - """Get telescope descriptions from ctapipe, for the same telescope type. - Returns a dictionary with repeated description of the selected telescope, - depending on the number of telescopes in the telescope array under study - - Parameters - ---------- - name : str - telescope name - cam : str - camera name - tel_ids : list - telescope ids - - Returns - ------- - dict - tel_descriptions - """ - optics = OpticsDescription.from_name(name) - cam = CameraGeometry.from_name(cam) - tel_description = TelescopeDescription( - name=name, tel_type=name, optics=optics, camera=cam - ) - tel_descriptions = {} - for tel_id in tel_ids: - tel_descriptions = {**tel_descriptions, **{tel_id: tel_description}} - return tel_descriptions - - -def get_array_tel_descriptions(tel_ids_LST, tel_ids_MAGIC): - """Get telescope descriptions for the array - - Parameters - ---------- - tel_ids_LST : list - list of selected LST tel_ids - tel_ids_MAGIC : list - list of selected MAGIC tel_ids - - Returns - ------- - dict - array_tel_descriptions - """ - array_tel_descriptions = {} - if len(tel_ids_LST) > 0: - array_tel_descriptions = { - **array_tel_descriptions, - **get_tel_descriptions(name="LST", cam="LSTCam", tel_ids=tel_ids_LST), - } - if len(tel_ids_MAGIC) > 0: - array_tel_descriptions = { - **array_tel_descriptions, - **get_tel_descriptions(name="MAGIC", cam="MAGICCam", tel_ids=tel_ids_MAGIC), - } - return array_tel_descriptions - - -def get_tel_ids_dl1(df): - """Return telescope ids from loaded dl1 pandas dataframe - - Parameters - ---------- - df : pandas.DataFrame - pandas dataframe - - Returns - ------- - list - telescope ids - """ - return list(df.index.levels[2]) - - -def convert_positions_dict(positions_dict): - """Convert telescope positions loaded from config.yaml file from - adimensional numbers to u.m (astropy units) - - Parameters - ---------- - positions_dict : dict - telescopes positions - - Returns - ------- - dict - telescopes positions - """ - for k_ in positions_dict.keys(): - positions_dict[k_] *= u.m - return positions_dict - - -def check_tel_ids(cfg): - """Function to check the given tel_ids in the configuration file (all_tels:tel_ids) - - Parameters - ---------- - cfg : dict - configuration dictionary loaded from config file. It must contain the - following elements: - - * cfg['all_tels']['tel_n']: list with all telescope names - * cfg[]['tel_ids']: list with telescope ids, where is MAGIC and LST, but not necessarily both - - Returns - ------- - tuple - - * tel_ids: intersection with tel_ids_sel and the sum between - all_tel_ids_LST and all_tel_ids_MAGIC - * tel_ids_LST: LST telescope ids in tel_ids - * tel_ids_MAGIC: MAGIC telescope ids in tel_ids - - """ - all_tel_ids = {"LST": [], "MAGIC": []} - for k in all_tel_ids.keys(): - if k in cfg.keys(): - if "tel_ids" in cfg[k].keys(): - all_tel_ids[k] = cfg[k]["tel_ids"] - - tel_ids, tel_ids_LST, tel_ids_MAGIC = intersec_tel_ids( - all_tel_ids_LST=all_tel_ids["LST"], - all_tel_ids_MAGIC=all_tel_ids["MAGIC"], - tel_ids_sel=cfg["all_tels"]["tel_ids"], - ) - return tel_ids, tel_ids_LST, tel_ids_MAGIC - - -def intersec_tel_ids( - all_tel_ids_LST=[1, 2, 3, 4], - all_tel_ids_MAGIC=[5, 6], - tel_ids_sel=[1, 2, 3, 4, 5, 6], -): - """Get telescope ids from the intersection between the selected ids and the - telescope ids of the telescope array - - Parameters - ---------- - all_tel_ids_LST : list, optional - LST ids, by default [1, 2, 3, 4] - all_tel_ids_MAGIC : list, optional - MAGIC ids, by default [5, 6] - tel_ids_sel : list, optional - telescope ids selected for the analysis, by default [1, 2, 3, 4, 5, 6] - - Returns - ------- - tuple - - * tel_ids: intersection with tel_ids_sel and the sum between - all_tel_ids_LST and all_tel_ids_MAGIC - * tel_ids_LST: LST telescope ids in tel_ids - * tel_ids_MAGIC: MAGIC telescope ids in tel_ids - """ - sum_ids = all_tel_ids_LST + all_tel_ids_MAGIC - tel_ids = list(set(tel_ids_sel).intersection(sum_ids)) - tel_ids_LST = list(set(tel_ids).intersection(all_tel_ids_LST)) - tel_ids_MAGIC = list(set(tel_ids).intersection(all_tel_ids_MAGIC)) - print("Selected tels:", tel_ids) - print("LST tels:", tel_ids_LST) - print("MAGIC tels:", tel_ids_MAGIC) - return tel_ids, tel_ids_LST, tel_ids_MAGIC - - -def get_tel_name(tel_id, cfg): - """Get telescope name from telescope id - - Parameters - ---------- - tel_id : int - telescope id - cfg : dict - configuration dictionary loaded from config file. It must contain the - following elements: - - * cfg['all_tels']['tel_n']: list with all telescope names - * cfg[]['tel_ids']: list with telescope ids - * cfg['all_tels']['tel_n_short']: with with telescope short name, to be plotted - - Returns - ------- - str - telescope name - """ - for i, tel_label in enumerate(cfg["all_tels"]["tel_n"]): - if tel_id in cfg[tel_label]["tel_ids"]: - n = cfg["all_tels"]["tel_n_short"][i] - j = tel_id - cfg[tel_label]["tel_ids"][0] + 1 - name = f"{n}{j}" - break - return name diff --git a/magicctapipe/utils/utils.py b/magicctapipe/utils/utils.py deleted file mode 100644 index 8eea8a0a..00000000 --- a/magicctapipe/utils/utils.py +++ /dev/null @@ -1,165 +0,0 @@ -import datetime - -import numpy as np - -try: - from importlib.resources import files -except ImportError: - from importlib_resources import files - -__all__ = [ - "info_message", - "print_elapsed_time", - "make_elapsed_time_str", - "print_title", - "make_title_str", - "resource_file", -] - - -def info_message(text, prefix="info"): - """Prints the specified text with the prefix of the current date - - Parameters - ---------- - text : str - text - prefix : str, optional - prefix, by default 'info' - """ - date_str = datetime.datetime.now().strftime("%Y-%m-%dT%H:%M:%S") - print(f"({prefix:s}) {date_str:s}: {text:s}") - - -def print_elapsed_time(start, end): - """Prints elapsed time as start - end. Output format is `hh:mm:ss` - - Parameters - ---------- - start : float - start time, from time.time() - end : float - stop time, from time.time() - """ - print(make_elapsed_time_str(start=start, end=end)) - - -def make_elapsed_time_str(start, end): - """Make elapsed time - - Parameters - ---------- - start : str - start time of the observation - end : str - end time of the observation - - """ - """Makes elapsed time string as start - end. Output format is `hh:mm:ss` - - Parameters - ---------- - start : float - start time, from time.time() - end : float - stop time, from time.time() - - Returns - ------- - str - output string - """ - h, r = divmod(end - start, 3600) - m, s = divmod(r, 60) - out_s = f"Elapsed time: {int(h):0>2}:{int(m):0>2}:{int(s):0>2}" - return out_s - - -def print_title(title, style_char="=", in_space=3, width_char=80): - """Prints a title string in the following format. If `style_char="="` and - `in_space=3`, the string will be: - - :: - - ===...=================...=== - ===...=== title ===...=== - ===...=================...=== - - - The total width in characters is given by the `width_char` option - - Parameters - ---------- - title : str - title string - style_char : str, optional - style characted, by default "=" - in_space : int, optional - number of spaces between the title and the style characters, by default 3 - width_char : int, optional - total width in characters, by default 80 - """ - print( - make_title_str( - title, style_char=style_char, in_space=in_space, width_char=width_char - ) - ) - - -def make_title_str(title, style_char="=", in_space=3, width_char=80): - """Makes a title string in the following format. If `style_char="="` and - `in_space=3`, the string will be: - - :: - - ===...=================...=== - ===...=== title ===...=== - ===...=================...=== - - The total width in characters is given by the `width_char` option - - Parameters - ---------- - title : str - title string - style_char : str, optional - style characted, by default "=" - in_space : int, optional - number of spaces between the title and the style characters, by default 3 - width_char : int, optional - total width in characters, by default 80 - - Returns - ------- - str - formatted title string - """ - s1 = f"{style_char*width_char}\n" - s2ab_len_float = (width_char - len(title) - in_space * 2) / 2 - if s2ab_len_float > 0: - s2a_len = int(np.floor(s2ab_len_float)) - s2b_len = int(np.ceil(s2ab_len_float)) - s2a = f"{style_char*s2a_len}{' '*in_space}" - s2b = f"{' '*in_space}{style_char*s2b_len}\n" - s2 = f"{s2a}{title}{s2b}" - else: - s2 = f"{title}\n" - s3 = f"{style_char*width_char}" - s = f"{s1}{s2}{s3}" - return s - - -def resource_file(filename): - """Get the absoulte path of magicctapipe resource files. - - Parameters - ---------- - filename : str - Input filename - - Returns - ------- - str - Absolute path of the input filename - """ - return files("magicctapipe").joinpath("resources", filename)