From 2ee0e1630c297d57d2830c245bd2e5f17534c0ca Mon Sep 17 00:00:00 2001 From: Elisa-Visentin Date: Mon, 25 Mar 2024 11:07:35 +0100 Subject: [PATCH 001/236] Semi-automatic scripts + MCP master --- .../semi_automatic_scripts/LSTnsb.py | 173 ++++ .../semi_automatic_scripts/__init__.py | 38 + .../coincident_events.py | 374 ++++++++ .../config_general.yaml | 32 + .../semi_automatic_scripts/config_h5.yaml | 10 + .../semi_automatic_scripts/list_from_h5.py | 171 ++++ ...ing_runs_and_splitting_training_samples.py | 537 ++++++++++++ .../semi_automatic_scripts/nsb_level.py | 107 +++ .../setting_up_config_and_dir.py | 808 ++++++++++++++++++ .../semi_automatic_scripts/stereo_events.py | 364 ++++++++ 10 files changed, 2614 insertions(+) create mode 100644 magicctapipe/scripts/lst1_magic/semi_automatic_scripts/LSTnsb.py create mode 100644 magicctapipe/scripts/lst1_magic/semi_automatic_scripts/__init__.py create mode 100644 magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py create mode 100644 magicctapipe/scripts/lst1_magic/semi_automatic_scripts/config_general.yaml create mode 100644 magicctapipe/scripts/lst1_magic/semi_automatic_scripts/config_h5.yaml create mode 100644 magicctapipe/scripts/lst1_magic/semi_automatic_scripts/list_from_h5.py create mode 100644 magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs_and_splitting_training_samples.py create mode 100644 magicctapipe/scripts/lst1_magic/semi_automatic_scripts/nsb_level.py create mode 100644 magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py create mode 100644 magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/LSTnsb.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/LSTnsb.py new file mode 100644 index 00000000..51807714 --- /dev/null +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/LSTnsb.py @@ -0,0 +1,173 @@ +""" +Evaluates NSB level for a LST run +""" +import argparse +import glob +import logging +import os + +import numpy as np +import yaml +from lstchain.image.modifier import calculate_noise_parameters + +__all__ = ["nsb"] + +logger = logging.getLogger(__name__) +logger.addHandler(logging.StreamHandler()) +logger.setLevel(logging.INFO) + + +def nsb(run_list, simtel, lst_config, run_number): + + """ + Here we compute the NSB value for a run based on a subset of subruns. + + Parameters + ---------- + run_list : list + List of subruns in the run + simtel : str + Simtel (MC) file to be used to evaluate the extra noise in dim pixels + lst_config : str + LST configuration file (cf. lstchain) + run_number : int + LST run number + + Returns + ------- + list + List of the sub-run wise NSB values + """ + + noise = [] + denominator = 25 + if len(run_list) == 0: + logger.warning( + "There is no subrun matching the provided run number. Check the list of the LST runs (LST_runs.txt)" + ) + return + if len(run_list) < denominator: + mod = 1 + else: + mod = int(len(run_list) / denominator) + failed = 0 + for ii in range(0, len(run_list)): + print(mod) + subrun = run_list[ii].split(".")[-2] + if mod == 0: + break + if ii % mod == 0: + try: + a, _, _ = calculate_noise_parameters(simtel, run_list[ii], lst_config) + noise.append(a) + logger.info(a) + except IndexError: + failed = failed + 1 + if len(run_list) > denominator: + mod = int(len(run_list) / (denominator + failed)) + logger.warning( + f"Subrun {subrun} caused an error in the NSB level evaluation for run {run_number}. Check reports before using it" + ) + return noise + + +def main(): + + """ + Main function + """ + + parser = argparse.ArgumentParser() + parser.add_argument( + "--config-file", + "-c", + dest="config_file", + type=str, + default="./config_general.yaml", + help="Path to a configuration file", + ) + parser.add_argument( + "--input-run", + "-i", + dest="run", + type=str, + help="Run to be processed", + ) + parser.add_argument( + "--day", + "-d", + dest="day", + type=str, + help="Day of the run to be processed", + ) + + args = parser.parse_args() + with open( + args.config_file, "rb" + ) as f: # "rb" mode opens the file in binary format for reading + config = yaml.safe_load(f) + NSB_match = config["general"]["NSB_matching"] + run_number = args.run + date = args.day + simtel = "/fefs/aswg/data/mc/DL0/LSTProd2/TestDataset/sim_telarray/node_theta_14.984_az_355.158_/output_v1.4/simtel_corsika_theta_14.984_az_355.158_run10.simtel.gz" + + nsb_list = config["general"]["nsb"] + lst_version = config["general"]["LST_version"] + lst_tailcut = config["general"]["LST_tailcut"] + width = [a / 2 - b / 2 for a, b in zip(nsb_list[1:], nsb_list[:-1])] + source = config["directories"]["target_name"] + width.append(0.25) + nsb_limit = [a + b for a, b in zip(nsb_list[:], width[:])] + nsb_limit.insert(0, 0) + + lst_config = "lstchain_standard_config.json" + if NSB_match: + LST_files = np.sort(glob.glob(f"{source}_LST_[0-9]*_{run_number}.txt")) + + if len(LST_files) > 1: + logger.info( + f"Run {run_number} classified in more than one NSB bin. Removing all these files and evaluating it again" + ) + for kk in LST_files: + os.remove(kk) + LST_files = [] + if len(LST_files) == 1: + logger.info(f"Run {run_number} already processed") + return + else: + LST_files = np.sort(glob.glob(f"{source}_LST_nsb_*{run_number}*.txt")) + + if len(LST_files) > 1: + logger.warning( + f"More than one files exists for run {run_number}. Removing all these files and evaluating it again." + ) + for repeated_files in LST_files: + os.remove(repeated_files) + LST_files = [] + elif len(LST_files) == 1: + logger.info(f"Run {run_number} already processed.") + return + + date_lst = date.split("_")[0] + date.split("_")[1] + date.split("_")[2] + inputdir = f"/fefs/aswg/data/real/DL1/{date_lst}/{lst_version}/{lst_tailcut}" + run_list = np.sort(glob.glob(f"{inputdir}/dl1*Run*{run_number}.*.h5")) + noise = nsb(run_list, simtel, lst_config, run_number) + if len(noise) == 0: + logger.warning( + "No NSB value could be evaluated: check the observation logs (observation problems, car flashes...)" + ) + return + a = np.median(noise) + logger.info(f"Run n. {run_number}, nsb median {a}") + if NSB_match: + for j in range(0, len(nsb_list)): + if (a < nsb_limit[j + 1]) & (a > nsb_limit[j]): + with open(f"{source}_LST_{nsb_list[j]}_{run_number}.txt", "a+") as f: + f.write(f"{date},{run_number}\n") + else: + with open(f"{source}_LST_nsb_{run_number}.txt", "a+") as f: + f.write(f"{a}\n") + + +if __name__ == "__main__": + main() diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/__init__.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/__init__.py new file mode 100644 index 00000000..42428cb2 --- /dev/null +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/__init__.py @@ -0,0 +1,38 @@ +from .coincident_events import configfile_coincidence, linking_bash_lst +from .LSTnsb import nsb +from .merging_runs_and_splitting_training_samples import ( + cleaning, + merge, + mergeMC, + split_train_test, +) +from .nsb_level import bash_scripts +from .setting_up_config_and_dir import ( + collect_nsb, + config_file_gen, + directories_generator, + lists_and_bash_gen_MAGIC, + lists_and_bash_generator, + nsb_avg, +) +from .stereo_events import bash_stereo, bash_stereoMC, configfile_stereo + +__all__ = [ + "nsb", + "cleaning", + "split_train_test", + "merge", + "mergeMC", + "bash_scripts", + "nsb_avg", + "collect_nsb", + "config_file_gen", + "lists_and_bash_generator", + "lists_and_bash_gen_MAGIC", + "directories_generator", + "configfile_coincidence", + "linking_bash_lst", + "configfile_stereo", + "bash_stereo", + "bash_stereoMC", +] diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py new file mode 100644 index 00000000..01d50c0f --- /dev/null +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py @@ -0,0 +1,374 @@ +""" +This scripts facilitates the usage of the script +"lst1_magic_event_coincidence.py". This script is +more like a "manager" that organizes the analysis +process by: +1) Creating the bash scripts for looking for +coincidence events between MAGIC and LST in each +night. +2) Creating the subdirectories for the coincident +event files. + +Usage: +$ python coincident_events.py (-c config_file.yaml) +""" +import argparse +import glob +import logging +import os +from datetime import date as dtdt +from datetime import timedelta +from pathlib import Path + +import numpy as np +import yaml + +from magicctapipe import __version__ + +__all__ = ["configfile_coincidence", "linking_bash_lst"] + +logger = logging.getLogger(__name__) +logger.addHandler(logging.StreamHandler()) +logger.setLevel(logging.INFO) + + +def configfile_coincidence(ids, target_dir): + + """ + This function creates the configuration file needed for the event coincidence step + + Parameters + ---------- + ids : list + List of telescope IDs + target_dir : str + Path to the working directory + """ + + lines = [ + f"mc_tel_ids:\n LST-1: {ids[0]}\n LST-2: {ids[1]}\n LST-3: {ids[2]}\n LST-4: {ids[3]}\n MAGIC-I: {ids[4]}\n MAGIC-II: {ids[5]}\n\n", + 'event_coincidence:\n timestamp_type_lst: "dragon_time" # select "dragon_time", "tib_time" or "ucts_time"\n pre_offset_search: true\n n_pre_offset_search_events: 100\n window_half_width: "300 ns"\n', + ' time_offset:\n start: "-10 us"\n stop: "0 us"\n', + ] + with open(f"{target_dir}/config_coincidence.yaml", "w") as f: + f.writelines(lines) + + +def linking_bash_lst( + target_dir, LST_runs, nsb, date, source, LST_version, env_name, NSB_match +): + + """ + This function links the LST data paths to the working directory and creates bash scripts. + + Parameters + ---------- + target_dir : str + Path to the working directory + LST_runs : matrix of strings + This matrix is imported from config_general.yaml and tells the function where to find the LST data and link them to our working directory + nsb : int + NSB level + date : numpy array + Array of lists [date run] for all the LST runs (before the NSB splitting) + source : str + Target name + LST_version : str + The lstchain version used to process the LST data + env_name : str + Name of the conda environment + NSB_match : bool + If real data are matched to pre-processed MCs or not + """ + + ST_list = [ + os.path.basename(x) for x in glob.glob(f"{target_dir}/v{__version__}/DL1/*") + ] + + if (len(LST_runs) == 2) and (len(LST_runs[0]) == 10): + LST = LST_runs + + LST_runs = [] + LST_runs.append(LST) + + if (len(date) == 2) and (len(date[0]) == 10): + dt = date + date = [] + date.append(dt) + if NSB_match: + coincidence_DL1_dir = f"{target_dir}/v{__version__}" + if not os.path.exists(f"{coincidence_DL1_dir}/DL1Coincident/"): + os.mkdir(f"{coincidence_DL1_dir}/DL1Coincident") + for p in ST_list: + MAGIC_DL1_dir = f"{target_dir}/v{__version__}/DL1/{p}" + if not os.path.exists(f"{coincidence_DL1_dir}/DL1Coincident/{p}"): + os.mkdir(f"{coincidence_DL1_dir}/DL1Coincident/{p}") + dates = [ + os.path.basename(x) + for x in glob.glob(f"{MAGIC_DL1_dir}/Merged/Merged_*") + ] + for d in dates: + Y_M = int(d.split("_")[1]) + M_M = int(d.split("_")[2]) + D_M = int(d.split("_")[3]) + + day_MAGIC = dtdt(Y_M, M_M, D_M) + + delta = timedelta(days=1) + for i in LST_runs: + Y_L = i[0].split("_")[0] + M_L = i[0].split("_")[1] + D_L = i[0].split("_")[2] + day_LST = dtdt(int(Y_L), int(M_L), int(D_L)) + if day_MAGIC == day_LST + delta: + if not os.path.exists( + f"{coincidence_DL1_dir}/DL1Coincident/{p}/NSB{nsb}" + ): + os.mkdir( + f"{coincidence_DL1_dir}/DL1Coincident/{p}/NSB{nsb}" + ) + + lstObsDir = ( + i[0].split("_")[0] + i[0].split("_")[1] + i[0].split("_")[2] + ) + + inputdir = f"/fefs/aswg/data/real/DL1/{lstObsDir}/{LST_version}/tailcut84" + if not os.path.exists( + f"{coincidence_DL1_dir}/DL1Coincident/{p}/NSB{nsb}/{lstObsDir}" + ): + os.mkdir( + f"{coincidence_DL1_dir}/DL1Coincident/{p}/NSB{nsb}/{lstObsDir}" + ) + if not os.path.exists( + f"{coincidence_DL1_dir}/DL1Coincident/{p}/NSB{nsb}/{lstObsDir}/logs" + ): + os.mkdir( + f"{coincidence_DL1_dir}/DL1Coincident/{p}/NSB{nsb}/{lstObsDir}/logs" + ) + + outputdir = f"{coincidence_DL1_dir}/DL1Coincident/{p}/NSB{nsb}/{lstObsDir}" + list_of_subruns = np.sort( + glob.glob(f"{inputdir}/dl1*Run*{i[1]}*.*.h5") + ) + if os.path.exists(f"{outputdir}/logs/list_LST.txt"): + with open( + f"{outputdir}/logs/list_LST.txt", "a" + ) as LSTdataPathFile: + for subrun in list_of_subruns: + LSTdataPathFile.write( + f"{subrun}\n" + ) # If this files already exists, simply append the new information + else: + with open( + f"{outputdir}/logs/list_LST.txt", "w" + ) as f: # If the file list_LST.txt does not exist, it will be created here + for subrun in list_of_subruns: + f.write(f"{subrun}\n") + + if not os.path.exists(f"{outputdir}/logs/list_LST.txt"): + continue + process_size = ( + len( + np.genfromtxt( + f"{outputdir}/logs/list_LST.txt", dtype="str" + ) + ) + - 1 + ) + + if process_size < 0: + continue + lines = [ + "#!/bin/sh\n\n", + "#SBATCH -p short\n", + f'#SBATCH -J {target_dir.split("/")[-2:][1]}_coincidence_{nsb}\n', + f"#SBATCH --array=0-{process_size}\n", + "#SBATCH --mem=30g\n", + "#SBATCH -N 1\n\n", + "ulimit -l unlimited\n", + "ulimit -s unlimited\n", + "ulimit -a\n\n", + f"export INM={MAGIC_DL1_dir}/Merged/Merged_{str(Y_M).zfill(4)}_{str(M_M).zfill(2)}_{str(D_M).zfill(2)}\n", + f"export OUTPUTDIR={outputdir}\n", + "SAMPLE_LIST=($(<$OUTPUTDIR/logs/list_LST.txt))\n", + "SAMPLE=${SAMPLE_LIST[${SLURM_ARRAY_TASK_ID}]}\n", + "export LOG=$OUTPUTDIR/logs/coincidence_${SLURM_ARRAY_TASK_ID}.log\n", + f"time conda run -n {env_name} lst1_magic_event_coincidence --input-file-lst $SAMPLE --input-dir-magic $INM --output-dir $OUTPUTDIR --config-file {target_dir}/config_coincidence.yaml >$LOG 2>&1", + ] + with open( + f"{source}_LST_coincident_{nsb}_{outputdir.split('/')[-1]}.sh", + "w", + ) as f: + f.writelines(lines) + else: + coincidence_DL1_dir = f"{target_dir}/DL1/Observations" + if not os.path.exists(f"{coincidence_DL1_dir}/Coincident"): + os.mkdir(f"{coincidence_DL1_dir}/Coincident") + + for i in LST_runs: + lstObsDir = i[0].split("_")[0] + i[0].split("_")[1] + i[0].split("_")[2] + inputdir = f"/fefs/aswg/data/real/DL1/{lstObsDir}/{LST_version}/tailcut84" + outputdir = f"{coincidence_DL1_dir}/Coincident/{lstObsDir}" + list_of_subruns = np.sort(glob.glob(f"{inputdir}/dl1*Run*{i[1]}*.*.h5")) + if os.path.exists(f"{outputdir}/list_LST.txt"): + with open(f"{outputdir}/list_LST.txt", "a") as LSTdataPathFile: + for subrun in list_of_subruns: + LSTdataPathFile.write( + f"{subrun}\n" + ) # If this files already exists, simply append the new information + else: + os.mkdir(outputdir) + with open( + f"{outputdir}/list_LST.txt", "w" + ) as f: # If the file list_LST.txt does not exist, it will be created here + for subrun in list_of_subruns: + f.write(f"{subrun}\n") + process_name = target_dir.split("/")[-2:][1] + + listOfNightsLST = np.sort( + glob.glob(f"{target_dir}/DL1/Observations/Coincident/*") + ) + listOfNightsMAGIC = np.sort( + glob.glob(f"{target_dir}/DL1/Observations/Merged/Merged*") + ) + + for nightMAGIC, nightLST in zip(listOfNightsMAGIC, listOfNightsLST): + process_size = ( + len(np.genfromtxt(f"{nightLST}/list_LST.txt", dtype="str")) - 1 + ) + + with open(f"LST_coincident_{nightLST.split('/')[-1]}.sh", "w") as f: + lines = [ + "#!/bin/sh\n\n", + "#SBATCH -p short\n", + f"#SBATCH -J {process_name}_coincidence\n", + f"#SBATCH --array=0-{process_size}%50\n", + "#SBATCH -N 1\n\n", + "ulimit -l unlimited\n", + "ulimit -s unlimited\n", + "ulimit -a\n\n", + f"export INM={nightMAGIC}\n", + f"export OUTPUTDIR={nightLST}\n", + "SAMPLE_LIST=($(<$OUTPUTDIR/list_LST.txt))\n", + "SAMPLE=${SAMPLE_LIST[${SLURM_ARRAY_TASK_ID}]}\n", + "export LOG=$OUTPUTDIR/coincidence_${SLURM_ARRAY_TASK_ID}.log\n", + f"conda run -n {env_name} lst1_magic_event_coincidence --input-file-lst $SAMPLE --input-dir-magic $INM --output-dir $OUTPUTDIR --config-file {target_dir}/config_coincidence.yaml >$LOG 2>&1", + ] + f.writelines(lines) + + +def main(): + + """ + Here we read the config file and call the functions defined above. + """ + + parser = argparse.ArgumentParser() + parser.add_argument( + "--config-file", + "-c", + dest="config_file", + type=str, + default="./config_general.yaml", + help="Path to a configuration file", + ) + + args = parser.parse_args() + with open( + args.config_file, "rb" + ) as f: # "rb" mode opens the file in binary format for reading + config = yaml.safe_load(f) + + telescope_ids = list(config["mc_tel_ids"].values()) + target_dir = str( + Path(config["directories"]["workspace_dir"]) + / config["directories"]["target_name"] + ) + NSB_match = config["general"]["NSB_matching"] + env_name = config["general"]["env_name"] + source = config["directories"]["target_name"] + LST_version = config["general"]["LST_version"] + print("***** Generating file config_coincidence.yaml...") + print("***** This file can be found in ", target_dir) + configfile_coincidence(telescope_ids, target_dir) + nsb = config["general"]["nsb"] + runs_all = config["general"]["LST_runs"] + date = np.genfromtxt(runs_all, dtype=str, delimiter=",") + if not NSB_match: + nsblvl = 0 + LST_runs_and_dates = config["general"]["LST_runs"] + LST_runs = np.genfromtxt(LST_runs_and_dates, dtype=str, delimiter=",") + print("***** Linking the paths to LST data files...") + + print("***** Generating the bashscript...") + linking_bash_lst( + target_dir, LST_runs, nsblvl, date, source, LST_version, env_name, NSB_match + ) # linking the data paths to current working directory + + print("***** Submitting processess to the cluster...") + print(f"Process name: {target_dir.split('/')[-2:][1]}_coincidence") + print( + f"To check the jobs submitted to the cluster, type: squeue -n {target_dir.split('/')[-2:][1]}_coincidence" + ) + + # Below we run the bash scripts to find the coincident events + list_of_coincidence_scripts = np.sort(glob.glob("LST_coincident*.sh")) + + for n, run in enumerate(list_of_coincidence_scripts): + if n == 0: + launch_jobs = f"coincidence{n}=$(sbatch --parsable {run})" + else: + launch_jobs = ( + f"{launch_jobs} && coincidence{n}=$(sbatch --parsable {run})" + ) + + os.system(launch_jobs) + else: + for nsblvl in nsb: + try: + LST_runs = np.genfromtxt( + f"{source}_LST_{nsblvl}_.txt", dtype=str, delimiter="," + ) + + print("***** Linking the paths to LST data files...") + + print("***** Generating the bashscript...") + linking_bash_lst( + target_dir, + LST_runs, + nsblvl, + date, + source, + LST_version, + env_name, + NSB_match, + ) # linking the data paths to current working directory + + print("***** Submitting processess to the cluster...") + print( + f'Process name: {target_dir.split("/")[-2:][1]}_coincidence_{nsblvl}' + ) + print( + f'To check the jobs submitted to the cluster, type: squeue -n {target_dir.split("/")[-2:][1]}_coincidence_{nsblvl}' + ) + + # Below we run the bash scripts to find the coincident events + list_of_coincidence_scripts = np.sort( + glob.glob(f"{source}_LST_coincident_{nsblvl}*.sh") + ) + if len(list_of_coincidence_scripts) < 1: + continue + for n, run in enumerate(list_of_coincidence_scripts): + if n == 0: + launch_jobs = f"coincidence{n}=$(sbatch --parsable {run})" + else: + launch_jobs = f"{launch_jobs} && coincidence{n}=$(sbatch --parsable {run})" + + os.system(launch_jobs) + + except OSError as exc: + print(exc) + + +if __name__ == "__main__": + main() diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/config_general.yaml b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/config_general.yaml new file mode 100644 index 00000000..4f3a2016 --- /dev/null +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/config_general.yaml @@ -0,0 +1,32 @@ +mc_tel_ids: + LST-1: 1 + LST-2: 0 + LST-3: 0 + LST-4: 0 + MAGIC-I: 2 + MAGIC-II: 3 + +directories: + workspace_dir : "/fefs/aswg/workspace/elisa.visentin/MAGIC_LST_analysis/" + target_name : "Crab" + MC_gammas : "/fefs/aswg/data/mc/DL0/LSTProd2/TestDataset/sim_telarray" + MC_electrons : "/fefs/aswg/data/mc/DL0/LSTProd2/TestDataset/Electrons/sim_telarray/" + MC_helium : "/fefs/aswg/data/mc/DL0/LSTProd2/TestDataset/Helium/sim_telarray/" + MC_protons : "/fefs/aswg/data/mc/DL0/LSTProd2/TrainingDataset/Protons/dec_2276/sim_telarray" + MC_gammadiff : "/fefs/aswg/data/mc/DL0/LSTProd2/TrainingDataset/GammaDiffuse/dec_2276/sim_telarray/" + +general: + target_RA_deg : 83.629 #RA in degrees + target_Dec_deg: 22.015 #Dec in degrees + SimTel_version : "v1.4" + LST_version : "v0.9" + LST_tailcut : "tailcut84" + focal_length : "effective" + MAGIC_runs : "MAGIC_runs.txt" #If there is no MAGIC data, please fill this file with "0, 0" + LST_runs : "LST_runs.txt" + proton_train_fraction : 0.8 # 0.8 means that 80% of the DL1 protons will be used for training the Random Forest + nsb : [0.5, 1.0, 1.5, 2.0, 2.5, 3.0] + env_name : magic-lst + cluster : "SLURM" + NSB_matching : true + diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/config_h5.yaml b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/config_h5.yaml new file mode 100644 index 00000000..89b278d5 --- /dev/null +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/config_h5.yaml @@ -0,0 +1,10 @@ +data_selection_and_lists: + time_range : True #Search for all runs in a LST range (e.g., 2020_01_01 -> 2022_01_01) + min : "2019_11_17" + max : "2020_03_19" + date_list : ['2020_12_15','2021_03_11'] + #LST list of days to be processed (time_range=False), format: YYYY_MM_DD + skipped_LST_runs: [2919,2923] + skipped_MAGIC_runs: [5088421,5088503] #MAGIC and LST runs not to be processed (why?) + source_name_database: 'CrabNebula' #MUST BE THE SAME AS IN THE DATABASE + source_name_output: 'Crab' #Name to be used in the analysis (will be used in the name of the txt output files), best idea: the same as in the analysis config_general.yaml, WARNING: ONLY ALPHANUMERIC, NO SPECIAL CHARACTERS (special characters can interfere with the analysis scripts) \ No newline at end of file diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/list_from_h5.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/list_from_h5.py new file mode 100644 index 00000000..6b7073b7 --- /dev/null +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/list_from_h5.py @@ -0,0 +1,171 @@ +""" +By using this scrip, the list of MAGIC and LST runs (date and run number) can be automatically created from a dataframe in the .h5 format +""" + +import os +from datetime import datetime + +import pandas as pd +import yaml + + +def split_lst_date(df): + + """ + This function appends to the provided dataframe, which contains the LST date as YYYYMMDD in one of the columns, four new columns: the LST year, month and day and the date as YYYY_MM_DD + + Parameters + ---------- + df : :class:`pandas.DataFrame` + Dataframe of the joint MAGIC+LST-1 observations + + Returns + ------- + :class:`pandas.DataFrame` + The input dataframe with some added columns + """ + + date = df["DATE"] + df["YY_LST"] = date.str[:4] + df["MM_LST"] = date.str[4:6] + df["DD_LST"] = date.str[6:8] + df["date_LST"] = df["YY_LST"] + "-" + df["MM_LST"] + "-" + df["DD_LST"] + return df + + +def magic_date(df): + + """ + This function appends to the provided dataframe, which contains the LST date, year, month and day, a column with the MAGIC date (in the YYYY_MM_DD format) + + Parameters + ---------- + df : :class:`pandas.DataFrame` + Dataframe of the joint MAGIC+LST-1 observations + + Returns + ------- + :class:`pandas.DataFrame` + The input dataframe with an added column + """ + + date_lst = pd.to_datetime(df["date_LST"]) + delta = pd.Timedelta("1 day") + date_magic = date_lst + delta + date_magic = date_magic.dt.strftime("%Y-%m-%d") + df["date_MAGIC"] = date_magic + return df + + +def list_run(source_out, df, skip_LST, skip_MAGIC): + + """ + This function create the MAGIC_runs.txt and LST_runs.txt files, which contain the list of runs (with date) to be processed + + Parameters + ---------- + source_out : str + Name of the source to be used in the output file name + df : :class:`pandas.DataFrame` + Dataframe of the joint MAGIC+LST-1 observations + skip_LST : list + List of the LST runs not to be added to the files + skip_MAGIC : list + List of the MAGIC runs not to be added to the files + """ + + file_list = [ + f"{source_out}_LST_runs.txt", + f"{source_out}_MAGIC_runs.txt", + ] # LST, MAGIC!!!! + for j in file_list: + if os.path.isfile(j): + os.remove(j) + print(f"{j} deleted.") + MAGIC_listed = [] + LST_listed = [] + for k in range(len(df)): + skip = False + LST = df["LST1_run"] + + if (int(LST[k]) in skip_LST) or (int(LST[k]) in LST_listed): + skip = True + + if not skip: + with open(file_list[0], "a+") as f: + f.write( + f"{df['date_LST'][k].replace('-','_')},{str(LST[k]).lstrip('0')}\n" + ) + LST_listed.append(int(LST[k])) + MAGIC_min = int(df["MAGIC_first_run"][k]) + MAGIC_max = int(df["MAGIC_last_run"][k]) + for z in range(MAGIC_min, MAGIC_max + 1): + skip = False + + if (int(z) in skip_MAGIC) or (int(z) in MAGIC_listed): + skip = True + if not skip: + with open(file_list[1], "a+") as f: + f.write(f"{df['date_MAGIC'][k].replace('-','_')},{z}\n") + MAGIC_listed.append(int(z)) + + +def main(): + + """ + Main function + """ + + with open("config_h5.yaml", "rb") as f: + config = yaml.safe_load(f) + df = pd.read_hdf( + "/fefs/aswg/workspace/federico.dipierro/simultaneous_obs_summary.h5", key="/str" + ) # TODO: put this file in a shared folder + + df = split_lst_date(df) + + df = magic_date(df) + + df.to_hdf("observations.h5", key="joint_obs", mode="w") + + source_in = config["data_selection_and_lists"]["source_name_database"] + + source_out = config["data_selection_and_lists"]["source_name_output"] + range = config["data_selection_and_lists"]["time_range"] + skip_LST = config["data_selection_and_lists"]["skipped_LST_runs"] + skip_MAGIC = config["data_selection_and_lists"]["skipped_MAGIC_runs"] + + df = pd.read_hdf("observations.h5", key="joint_obs") + df = df.astype({"YY_LST": int, "MM_LST": int, "DD_LST": int}) + + stereo = True + + df.query( + f'source=="{source_in}"& MAGIC_trigger=="L3T" & MAGIC_HV=="Nominal" & MAGIC_stereo == {stereo}', + inplace=True, + ) # + + if range: + min = str(config["data_selection_and_lists"]["min"]) + max = str(config["data_selection_and_lists"]["max"]) + min = datetime.strptime(min, "%Y_%m_%d") + max = datetime.strptime(max, "%Y_%m_%d") + lst = pd.to_datetime(df["date_LST"].str.replace("_", "-")) + df["date"] = lst + df = df[df["date"] > min] + df = df[df["date"] < max] + + else: + dates = config["data_selection_and_lists"]["date_list"] + + df = df[df["date_LST"].isin(dates)] + + df = df.reset_index() + df = df.drop("index", axis=1) + + df.to_hdf("observations_query.h5", key="joint_obs", mode="w") + list_run(source_out, df, skip_LST, skip_MAGIC) + + +if __name__ == "__main__": + main() diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs_and_splitting_training_samples.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs_and_splitting_training_samples.py new file mode 100644 index 00000000..2b0da5af --- /dev/null +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs_and_splitting_training_samples.py @@ -0,0 +1,537 @@ +""" +This script splits the proton MC data sample into "train" +and "test", deletes possible failed runs (only those files +that end up with a size < 1 kB), and generates the bash +scripts to merge the data files calling the script "merge_hdf_files.py" +in the following order: + +MAGIC: +1) Merge the subruns into runs for M1 and M2 individually. +2) Merge the runs of M1 and M2 into M1-M2 runs. +3) Merge all the M1-M2 runs for a given night. +Workingdir/DL1/Observations/Merged + +MC: +1) Merges all MC runs in a node and save them at +Workingdir/DL1/MC/PARTICLE/Merged + +Usage: +$ python merging_runs_and_splitting_training_samples.py (-c config.yaml) + +If you want to merge only the MAGIC or only the MC data, +you can do as follows: + +Only MAGIC: +$ python merging_runs_and_splitting_training_samples.py --analysis-type onlyMAGIC (-c config.yaml) + +Only MC: +$ python merging_runs_and_splitting_training_samples.py --analysis-type onlyMC (-c config.yaml) +""" + +import argparse +import glob +import logging +import os +from pathlib import Path + +import numpy as np +import yaml +from tqdm import tqdm + +from magicctapipe import __version__ + +__all__ = ["cleaning", "split_train_test", "merge", "mergeMC"] + +logger = logging.getLogger(__name__) +logger.addHandler(logging.StreamHandler()) +logger.setLevel(logging.INFO) + + +def cleaning(list_of_nodes, cwd): + + """ + This function looks for failed runs in each node and remove them. + + Parameters + ---------- + list_of_nodes : array of str + List of nodes where the function will look for failed runs. + cwd : Path + Current working directory + """ + + for i in tqdm(range(len(list_of_nodes)), desc="Cleaning failed runs"): + os.chdir(list_of_nodes[i]) + os.system('find . -type f -name "*.h5" -size -1k -delete') + + os.chdir(cwd) + print("Cleaning done.") + + +def split_train_test(target_dir, train_fraction): + + """ + This function splits the MC proton sample in 2, i.e. the "test" and the "train" subsamples. + It generates 2 subdirectories in the directory .../DL1/MC/protons named "test" and "train" and creates sub-sub-directories with the names of all nodes. + For each node sub-sub-directory we move 80% of the .h5 files (if it is in the "test" subdirectory) or 20% of the .h5 files (if it is in the "train" subdirectory). + + Parameters + ---------- + target_dir : str + Path to the working directory + train_fraction : float + Fraction of proton MC files to be used in the training RF dataset + """ + + proton_dir = f"{target_dir}/DL1/MC/protons" + + if not os.path.exists(f"{proton_dir}/train"): + os.mkdir(f"{proton_dir}/train") + if not os.path.exists(f"{proton_dir}/../protons_test"): + os.mkdir(f"{proton_dir}/../protons_test") + + list_of_dir = np.sort(glob.glob(f"{proton_dir}/node*{os.path.sep}")) + + for directory in tqdm( + range(len(list_of_dir)) + ): # tqdm allows us to print a progessbar in the terminal + if not os.path.exists( + f"{proton_dir}/train/{list_of_dir[directory].split('/')[-2]}" + ): + os.mkdir(f"{proton_dir}/train/{list_of_dir[directory].split('/')[-2]}") + if not os.path.exists( + f"{proton_dir}/../protons_test/{list_of_dir[directory].split('/')[-2]}" + ): + os.mkdir( + f'{proton_dir}/../protons_test/{list_of_dir[directory].split("/")[-2]}' + ) + list_of_runs = np.sort( + glob.glob(f'{proton_dir}/{list_of_dir[directory].split("/")[-2]}/*.h5') + ) + split_percent = int(len(list_of_runs) * train_fraction) + for j in list_of_runs[0:split_percent]: + os.system( + f"mv {j} {proton_dir}/train/{list_of_dir[directory].split('/')[-2]}" + ) + + os.system( + f"cp {list_of_dir[directory]}*.txt {proton_dir}/train/{list_of_dir[directory].split('/')[-2]}" + ) + os.system( + f"mv {list_of_dir[directory]}*.txt {proton_dir}/../protons_test/{list_of_dir[directory].split('/')[-2]}" + ) + os.system( + f"mv {list_of_dir[directory]}*.h5 {proton_dir}/../protons_test/{list_of_dir[directory].split('/')[-2]}" + ) + os.system(f"rm -r {list_of_dir[directory]}") + + +def merge(target_dir, identification, MAGIC_runs, env_name, source, NSB_match): + + """ + This function creates the bash scripts to run merge_hdf_files.py in all MAGIC subruns. + + Parameters + ---------- + target_dir : str + Path to the working directory + identification : str + Tells which batch to create. Options: subruns, M1M2, nights + MAGIC_runs : matrix of strings + This matrix is imported from config_general.yaml and tells the function where to find the data and where to put the merged files + env_name : str + Name of the environment + source : str + Target name + NSB_match : bool + If real data are matched to pre-processed MCs or not + """ + + process_name = f"merging_{target_dir.split('/')[-2:][1]}" + if not NSB_match: + MAGIC_DL1_dir = f"{target_dir}/DL1/Observations" + if os.path.exists(f"{MAGIC_DL1_dir}/M1") & os.path.exists( + f"{MAGIC_DL1_dir}/M2" + ): + if not os.path.exists(f"{MAGIC_DL1_dir}/Merged"): + os.mkdir(f"{MAGIC_DL1_dir}/Merged") + + with open(f"{source}_Merge_MAGIC_{identification}.sh", "w") as f: + lines = [ + "#!/bin/sh\n\n", + "#SBATCH -p short\n", + f"#SBATCH -J {process_name}\n", + "#SBATCH -N 1\n\n", + "ulimit -l unlimited\n", + "ulimit -s unlimited\n", + "ulimit -a\n\n", + ] + f.writelines(lines) + + if identification == "0_subruns": + if os.path.exists(f"{MAGIC_DL1_dir}/M1"): + for i in MAGIC_runs: + if not os.path.exists(f"{MAGIC_DL1_dir}/Merged/{i[0]}"): + os.mkdir( + f"{MAGIC_DL1_dir}/Merged/{i[0]}" + ) # Creating a merged directory for the respective night + if not os.path.exists(f"{MAGIC_DL1_dir}/Merged/{i[0]}/{i[1]}"): + os.mkdir( + f"{MAGIC_DL1_dir}/Merged/{i[0]}/{i[1]}" + ) # Creating a merged directory for the respective run + f.write( + f"conda run -n {env_name} merge_hdf_files --input-dir {MAGIC_DL1_dir}/M1/{i[0]}/{i[1]} --output-dir {MAGIC_DL1_dir}/Merged/{i[0]}/{i[1]} \n" + ) + + if os.path.exists(f"{MAGIC_DL1_dir}/M2"): + for i in MAGIC_runs: + if not os.path.exists(f"{MAGIC_DL1_dir}/Merged/{i[0]}"): + os.mkdir( + f"{MAGIC_DL1_dir}/Merged/{i[0]}" + ) # Creating a merged directory for the respective night + if not os.path.exists(f"{MAGIC_DL1_dir}/Merged/{i[0]}/{i[1]}"): + os.mkdir( + f"{MAGIC_DL1_dir}/Merged/{i[0]}/{i[1]}" + ) # Creating a merged directory for the respective run + f.write( + f"conda run -n {env_name} merge_hdf_files --input-dir {MAGIC_DL1_dir}/M2/{i[0]}/{i[1]} --output-dir {MAGIC_DL1_dir}/Merged/{i[0]}/{i[1]} \n" + ) + + elif identification == "1_M1M2": + if os.path.exists(f"{MAGIC_DL1_dir}/M1") & os.path.exists( + f"{MAGIC_DL1_dir}/M2" + ): + for i in MAGIC_runs: + if not os.path.exists(f"{MAGIC_DL1_dir}/Merged/{i[0]}/Merged"): + os.mkdir(f"{MAGIC_DL1_dir}/Merged/{i[0]}/Merged") + f.write( + f"conda run -n {env_name} merge_hdf_files --input-dir {MAGIC_DL1_dir}/Merged/{i[0]}/{i[1]} --output-dir {MAGIC_DL1_dir}/Merged/{i[0]}/Merged --run-wise \n" + ) + else: + for i in MAGIC_runs: + if not os.path.exists(f"{MAGIC_DL1_dir}/Merged/Merged_{i[0]}"): + os.mkdir( + f"{MAGIC_DL1_dir}/Merged/Merged_{i[0]}" + ) # Creating a merged directory for each night + f.write( + f"conda run -n {env_name} merge_hdf_files --input-dir {MAGIC_DL1_dir}/Merged/{i[0]}/Merged --output-dir {MAGIC_DL1_dir}/Merged/Merged_{i[0]} \n" + ) + + else: + ST_list = [ + os.path.basename(x) for x in glob.glob(f"{target_dir}/v{__version__}/DL1/*") + ] + + for p in ST_list: + process_name = f'merging_{target_dir.split("/")[-2:][1]}' + + MAGIC_DL1_dir = f"{target_dir}/v{__version__}/DL1/{p}" + + if os.path.exists(f"{MAGIC_DL1_dir}/M1") & os.path.exists( + f"{MAGIC_DL1_dir}/M2" + ): + if not os.path.exists(f"{MAGIC_DL1_dir}/Merged"): + os.mkdir(f"{MAGIC_DL1_dir}/Merged") + lines = [ + "#!/bin/sh\n\n", + "#SBATCH -p short\n", + f"#SBATCH -J {process_name}\n", + "#SBATCH -N 1\n\n", + "ulimit -l unlimited\n", + "ulimit -s unlimited\n", + "ulimit -a\n\n", + ] + with open(f"{source}_Merge_MAGIC_{identification}_{p}.sh", "w") as f: + f.writelines(lines) + if identification == "0_subruns": + + if os.path.exists(f"{MAGIC_DL1_dir}/M1"): + dates = [ + os.path.basename(x) + for x in glob.glob(f"{MAGIC_DL1_dir}/M1/*") + ] + for i in dates: + runs = [ + os.path.basename(x) + for x in glob.glob(f"{MAGIC_DL1_dir}/M1/{i}/*") + ] + if not os.path.exists(f"{MAGIC_DL1_dir}/Merged/{i}"): + os.mkdir( + f"{MAGIC_DL1_dir}/Merged/{i}" + ) # Creating a merged directory for the respective night + for r in runs: + if not os.path.exists( + f"{MAGIC_DL1_dir}/Merged/{i}/{r}" + ): + os.mkdir( + f"{MAGIC_DL1_dir}/Merged/{i}/{r}" + ) # Creating a merged directory for the respective run + if not os.path.exists( + f"{MAGIC_DL1_dir}/Merged/{i}/{r}/logs" + ): + os.mkdir( + f"{MAGIC_DL1_dir}/Merged/{i}/{r}/logs" + ) # Creating a merged directory for the respective run + + f.write( + f"time conda run -n {env_name} merge_hdf_files --input-dir {MAGIC_DL1_dir}/M1/{i}/{r} --output-dir {MAGIC_DL1_dir}/Merged/{i}/{r} >{MAGIC_DL1_dir}/Merged/{i}/{r}/logs/merge_M1_{i}_{r}.log \n" + ) + + if os.path.exists(f"{MAGIC_DL1_dir}/M2"): + dates = [ + os.path.basename(x) + for x in glob.glob(f"{MAGIC_DL1_dir}/M2/*") + ] + + for i in dates: + runs = [ + os.path.basename(x) + for x in glob.glob(f"{MAGIC_DL1_dir}/M2/{i}/*") + ] + if not os.path.exists(f"{MAGIC_DL1_dir}/Merged/{i}"): + os.mkdir( + f"{MAGIC_DL1_dir}/Merged/{i}" + ) # Creating a merged directory for the respective night + for r in runs: + if not os.path.exists( + f"{MAGIC_DL1_dir}/Merged/{i}/{r}" + ): + os.mkdir( + f"{MAGIC_DL1_dir}/Merged/{i}/{r}" + ) # Creating a merged directory for the respective run + if not os.path.exists( + f"{MAGIC_DL1_dir}/Merged/{i}/{r}/logs" + ): + os.mkdir( + f"{MAGIC_DL1_dir}/Merged/{i}/{r}/logs" + ) # Creating a merged directory for the respective run + + f.write( + f"time conda run -n {env_name} merge_hdf_files --input-dir {MAGIC_DL1_dir}/M2/{i}/{r} --output-dir {MAGIC_DL1_dir}/Merged/{i}/{r} >{MAGIC_DL1_dir}/Merged/{i}/{r}/logs/merge_M2_{i}_{r}.log \n" + ) + elif identification == "1_M1M2": + if os.path.exists(f"{MAGIC_DL1_dir}/M1") & os.path.exists( + f"{MAGIC_DL1_dir}/M2" + ): + dates = [ + os.path.basename(x) + for x in glob.glob(f"{MAGIC_DL1_dir}/M1/*") + ] + for i in dates: + runs = [ + os.path.basename(x) + for x in glob.glob(f"{MAGIC_DL1_dir}/M2/{i}/*") + ] + for r in runs: + if ( + len(glob.glob(f"{MAGIC_DL1_dir}/M1/{i}/{r}")) > 0 + ) and ( + len(glob.glob(f"{MAGIC_DL1_dir}/M2/{i}/{r}")) + ) > 0: + if not os.path.exists( + f"{MAGIC_DL1_dir}/Merged/{i}/Merged" + ): + os.mkdir(f"{MAGIC_DL1_dir}/Merged/{i}/Merged") + if not os.path.exists( + f"{MAGIC_DL1_dir}/Merged/{i}/Merged/logs" + ): + os.mkdir( + f"{MAGIC_DL1_dir}/Merged/{i}/Merged/logs" + ) + f.write( + f"time conda run -n {env_name} merge_hdf_files --input-dir {MAGIC_DL1_dir}/Merged/{i}/{r} --output-dir {MAGIC_DL1_dir}/Merged/{i}/Merged --run-wise >{MAGIC_DL1_dir}/Merged/{i}/Merged/logs/merge_{i}.log \n" + ) + else: + dates = [ + os.path.basename(x) for x in glob.glob(f"{MAGIC_DL1_dir}/M1/*") + ] + for i in dates: + if not os.path.exists(f"{MAGIC_DL1_dir}/Merged/{i}/Merged"): + continue + if not os.path.exists(f"{MAGIC_DL1_dir}/Merged/Merged_{i}"): + os.mkdir( + f"{MAGIC_DL1_dir}/Merged/Merged_{i}" + ) # Creating a merged directory for each night + if not os.path.exists( + f"{MAGIC_DL1_dir}/Merged/Merged_{i}/logs" + ): + os.mkdir(f"{MAGIC_DL1_dir}/Merged/Merged_{i}/logs") + f.write( + f"time conda run -n {env_name} merge_hdf_files --input-dir {MAGIC_DL1_dir}/Merged/{i}/Merged --output-dir {MAGIC_DL1_dir}/Merged/Merged_{i} >{MAGIC_DL1_dir}/Merged/Merged_{i}/logs/merge_night_{i}.log \n" + ) + + +def mergeMC(target_dir, identification, env_name, cwd): + + """ + This function creates the bash scripts to run merge_hdf_files.py in all MC runs. + + Parameters + ---------- + target_dir : str + Path to the working directory + identification : str + Tells which batch to create. Options: protons, gammadiffuse + env_name : str + Name of the environment + cwd : Path + Current working directory + """ + + process_name = f"merging_{target_dir.split('/')[-2:][1]}" + + MC_DL1_dir = f"{target_dir}/DL1/MC" + if not os.path.exists(f"{MC_DL1_dir}/{identification}/Merged"): + os.mkdir(f"{MC_DL1_dir}/{identification}/Merged") + + if identification == "protons": + list_of_nodes = np.sort(glob.glob(f"{MC_DL1_dir}/{identification}/train/node*")) + else: + list_of_nodes = np.sort(glob.glob(f"{MC_DL1_dir}/{identification}/node*")) + + np.savetxt( + f"{MC_DL1_dir}/{identification}/list_of_nodes.txt", list_of_nodes, fmt="%s" + ) + + process_size = len(list_of_nodes) - 1 + + cleaning(list_of_nodes, cwd) # This will delete the (possibly) failed runs. + + with open(f"Merge_MC_{identification}.sh", "w") as f: + lines_bash_file = [ + "#!/bin/sh\n\n", + "#SBATCH -p short\n", + f"#SBATCH -J {process_name}\n", + f"#SBATCH --array=0-{process_size}%50\n", + "#SBATCH --mem=7g\n", + "#SBATCH -N 1\n\n", + "ulimit -l unlimited\n", + "ulimit -s unlimited\n", + "ulimit -a\n\n", + f"SAMPLE_LIST=($(<{MC_DL1_dir}/{identification}/list_of_nodes.txt))\n", + "SAMPLE=${SAMPLE_LIST[${SLURM_ARRAY_TASK_ID}]}\n", + f"export LOG={MC_DL1_dir}/{identification}/Merged" + + "/merged_${SLURM_ARRAY_TASK_ID}.log\n", + f"conda run -n {env_name} merge_hdf_files --input-dir $SAMPLE --output-dir {MC_DL1_dir}/{identification}/Merged >$LOG 2>&1\n", + ] + f.writelines(lines_bash_file) + + +def main(): + + """ + Here we read the config_general.yaml file, split the pronton sample into "test" and "train", and merge the MAGIC files. + """ + + parser = argparse.ArgumentParser() + parser.add_argument( + "--config-file", + "-c", + dest="config_file", + type=str, + default="./config_general.yaml", + help="Path to a configuration file", + ) + + parser.add_argument( + "--analysis-type", + "-t", + choices=["onlyMAGIC", "onlyMC"], + dest="analysis_type", + type=str, + default="doEverything", + help="You can type 'onlyMAGIC' or 'onlyMC' to run this script only on MAGIC or MC data, respectively.", + ) + + args = parser.parse_args() + with open( + args.config_file, "rb" + ) as f: # "rb" mode opens the file in binary format for reading + config = yaml.safe_load(f) + cwd = os.getcwd() + + target_dir = f'{Path(config["directories"]["workspace_dir"])}/{config["directories"]["target_name"]}' + + MAGIC_runs_and_dates = config["general"]["MAGIC_runs"] + MAGIC_runs = np.genfromtxt(MAGIC_runs_and_dates, dtype=str, delimiter=",") + NSB_match = config["general"]["NSB_matching"] + train_fraction = float(config["general"]["proton_train_fraction"]) + source = config["directories"]["target_name"] + + env_name = config["general"]["env_name"] + + # Below we run the analysis on the MC data + if not NSB_match: + if (args.analysis_type == "onlyMC") or (args.analysis_type == "doEverything"): + # Here we slice the proton MC data into "train" and "test" (but first we check if the directory already exists): + if not os.path.exists(f"{target_dir}/DL1/MC/protons_test"): + print("***** Splitting protons into 'train' and 'test' datasets...") + split_train_test(target_dir, train_fraction) + + print("***** Generating merge_MC bashscripts...") + mergeMC( + target_dir, "protons", env_name, cwd + ) # generating the bash script to merge the files + mergeMC( + target_dir, "gammadiffuse", env_name, cwd + ) # generating the bash script to merge the files + mergeMC( + target_dir, "gammas", env_name, cwd + ) # generating the bash script to merge the files + mergeMC(target_dir, "protons_test", env_name, cwd) + + print("***** Running merge_hdf_files.py on the MC data files...") + + # Below we run the bash scripts to merge the MC files + list_of_merging_scripts = np.sort(glob.glob("Merge_MC_*.sh")) + + for n, run in enumerate(list_of_merging_scripts): + if n == 0: + launch_jobs = f"merging{n}=$(sbatch --parsable {run})" + else: + launch_jobs = ( + f"{launch_jobs} && merging{n}=$(sbatch --parsable {run})" + ) + + os.system(launch_jobs) + + # Below we run the analysis on the MAGIC data + if ( + (args.analysis_type == "onlyMAGIC") + or (args.analysis_type == "doEverything") + or (NSB_match) + ): + print("***** Generating merge_MAGIC bashscripts...") + merge( + target_dir, "0_subruns", MAGIC_runs, env_name, source, NSB_match + ) # generating the bash script to merge the subruns + merge( + target_dir, "1_M1M2", MAGIC_runs, env_name, source, NSB_match + ) # generating the bash script to merge the M1 and M2 runs + merge( + target_dir, "2_nights", MAGIC_runs, env_name, source, NSB_match + ) # generating the bash script to merge all runs per night + + print("***** Running merge_hdf_files.py on the MAGIC data files...") + + # Below we run the bash scripts to merge the MAGIC files + list_of_merging_scripts = np.sort(glob.glob(f"{source}_Merge_MAGIC_*.sh")) + if len(list_of_merging_scripts) < 1: + logger.warning("no bash scripts") + return + for n, run in enumerate(list_of_merging_scripts): + if n == 0: + launch_jobs = f"merging{n}=$(sbatch --parsable {run})" + else: + launch_jobs = f"{launch_jobs} && merging{n}=$(sbatch --parsable --dependency=afterany:$merging{n-1} {run})" + + os.system(launch_jobs) + + print(f"Process name: merging_{target_dir.split('/')[-2:][1]}") + print( + f"To check the jobs submitted to the cluster, type: squeue -n merging_{target_dir.split('/')[-2:][1]}" + ) + + +if __name__ == "__main__": + main() diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/nsb_level.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/nsb_level.py new file mode 100644 index 00000000..04514efb --- /dev/null +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/nsb_level.py @@ -0,0 +1,107 @@ +""" +Bash scripts to run LSTnsb.py on all the LST runs by using parallel jobs + +Usage: python nsb_level.py (-c config.yaml) +""" + +import argparse +import glob +import logging +import os + +import numpy as np +import yaml + +__all__ = ["bash_scripts"] + +logger = logging.getLogger(__name__) +logger.addHandler(logging.StreamHandler()) +logger.setLevel(logging.INFO) + + +def bash_scripts(run, date, config, source, env_name): + + """Here we create the bash scripts (one per LST run) + + Parameters + ---------- + run : str + LST run number + date : str + LST date + config : str + Name of the configuration file + source : str + Target name + env_name : str + Name of the environment + """ + + lines = [ + "#!/bin/sh\n\n", + "#SBATCH -p long\n", + "#SBATCH -J nsb\n", + "#SBATCH -N 1\n\n", + "ulimit -l unlimited\n", + "ulimit -s unlimited\n", + "ulimit -a\n\n", + f"time conda run -n {env_name} LSTnsb -c {config} -i {run} -d {date} > {source}_nsblog_{date}_{run}.log 2>&1 \n\n", + ] + with open(f"{source}_{date}_run_{run}.sh", "w") as f: + f.writelines(lines) + + +def main(): + + """ + Main function + """ + + parser = argparse.ArgumentParser() + parser.add_argument( + "--config-file", + "-c", + dest="config_file", + type=str, + default="./config_general.yaml", + help="Path to a configuration file", + ) + + args = parser.parse_args() + with open( + args.config_file, "rb" + ) as f: # "rb" mode opens the file in binary format for reading + config = yaml.safe_load(f) + source = config["directories"]["target_name"] + lst_runs_filename = config["general"]["LST_runs"] + env_name = config["general"]["env_name"] + + with open(str(lst_runs_filename), "r") as LSTfile: + run_list = LSTfile.readlines() + print("***** Generating bashscripts...") + for run in run_list: + run = run.rstrip() + run_number = run.split(",")[1] + date = run.split(",")[0] + bash_scripts(run_number, date, args.config_file, source, env_name) + print("Process name: nsb") + print("To check the jobs submitted to the cluster, type: squeue -n nsb") + list_of_bash_scripts = np.sort(glob.glob(f"{source}_*_run_*.sh")) + + if len(list_of_bash_scripts) < 1: + print( + "Warning: no bash script has been produced to evaluate the NSB level for the provided LST runs. Please check the input list" + ) + return + for n, run in enumerate(list_of_bash_scripts): + if n == 0: + launch_jobs = f"nsb{n}=$(sbatch --parsable {run})" + else: + launch_jobs = f"{launch_jobs} && nsb{n}=$(sbatch --parsable {run})" + + # print(launch_jobs) + os.system(launch_jobs) + + +if __name__ == "__main__": + main() diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py new file mode 100644 index 00000000..56096618 --- /dev/null +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py @@ -0,0 +1,808 @@ +""" +This script facilitates the usage of +"magic_calib_to_dl1.py". This script is more like a +"manager" that organizes the analysis process by: +1) Creating the necessary directories and subdirectories. +2) Generating all the bash script files that convert the +MAGIC files from DL0 to DL1. +3) Launching these jobs in the IT container. + +Notice that in this stage we only use MAGIC data. +No LST data is used here. + +Standard usage: +$ python setting_up_config_and_dir.py (-c config_file.yaml) +""" +import argparse +import glob +import logging +import os +import time +from pathlib import Path + +import numpy as np +import yaml + +from magicctapipe import __version__ + +__all__ = [ + "nsb_avg", + "collect_nsb", + "config_file_gen", + "lists_and_bash_generator", + "lists_and_bash_gen_MAGIC", + "directories_generator", +] + +logger = logging.getLogger(__name__) +logger.addHandler(logging.StreamHandler()) +logger.setLevel(logging.INFO) + +ST_list = ["ST0320A", "ST0319A", "ST0318A", "ST0317A", "ST0316A"] +ST_begin = ["2023_03_10", "2022_12_15", "2022_06_10", "2021_12_30", "2020_10_24"] +ST_end = [ + "2024_01_01", + "2023_03_09", + "2022_08_31", + "2022_06_09", + "2021_09_29", +] # ST0320 ongoing -> 'service' end date + + +def nsb_avg(source, config, LST_list): + + """ + This function evaluates the average of the NSB levels that have been evaluated by LSTnsb_MC.py (one value per run). + + Parameters + ---------- + source : str + Source name + config : str + Config file + LST_list : str + Name of the file where the adopted LST runs are listed + + Returns + ------- + continue_process : string + If 'y', data processing will continue, otherwise it will be stopped + nsb : double + NSB value (average over the runs) + """ + allfile = np.sort( + glob.glob(f"{source}_LST_nsb_*.txt") + ) # List with the names of all files containing the NSB values for each run + if len(allfile) == 0: + print( + "Warning: no file (containing the NSB value) exists for any of the LST runs to be processed. Check the input list" + ) + return + noise = [] + for j in allfile: + with open(j) as ff: + line_str = ff.readline().rstrip("\n") + line = float(line_str) + noise.append(line) + nsb = np.average(noise) + std = np.std(noise) + continue_process = "y" + if std > 0.2: + continue_process = input( + f'The standard deviation of the NSB levels is {std}. We recommend using NSB-matching scripts always that the standard deviation of NSB is > 0.2. Would you like to continue the current analysis anyway? [only "y" or "n"]: ' + ) + delete_index = [] + for n, j in enumerate(allfile): + run = j.split("_")[3].rstrip(".txt") + if abs(noise[n] - nsb) > 3 * std: + sigma_range = input( + f'Run {run} has an NSB value of {noise[n]}, which is more than 3*sigma (i.e. {3*std}) away from the average (i.e. {nsb}). Would you like to continue the current analysis anyway? [only "y" or "n"]: ' + ) + if sigma_range != "y": + return (sigma_range, 0) + + sigma_range = input( + f'Would you like to keep this run (i.e. {run}) in the analysis? [only "y" or "n"]:' + ) + if sigma_range != "y": + delete_index.append(n) + with open(LST_list, "r") as f: + lines = f.readlines() + with open(LST_list, "w") as f: + for i in lines: + if not i.endswith(f"{run}\n"): + f.write(i) + + if len(delete_index) > 0: + index = ( + delete_index.reverse() + ) # Here we reverse the list of indexes associated with out-of-the-average NSB values, such that after deleting one element (below), the indexes of the array do not change. + for k in index: + np.delete(noise, k) + + nsb = np.average(noise) + with open(config, "r") as f: + lines = f.readlines() + with open(config, "w") as f: + for i in lines: + if not i.startswith("nsb_value"): + f.write(i) + f.write(f"nsb_value: {nsb}\n") + return (continue_process, nsb) + + +def collect_nsb(config): + """ + Here we split the LST runs in NSB-wise .txt files + + Parameters + ---------- + config : dict + Configuration file + """ + source = config["directories"]["target_name"] + + nsb = config["general"]["nsb"] + for nsblvl in nsb: + allfile = np.sort(glob.glob(f"{source}_LST_{nsblvl}_*.txt")) + if len(allfile) == 0: + continue + for j in allfile: + with open(j) as ff: + line = ff.readline() + with open(f"{source}_LST_{nsblvl}_.txt", "a+") as f: + f.write(f"{line.rstrip()}\n") + + +def config_file_gen(ids, target_dir, noise_value, NSB_match): + + """ + Here we create the configuration file needed for transforming DL0 into DL1 + + Parameters + ---------- + ids : list + Telescope IDs + target_dir : path + Directory to store the results + noise_value : list + List of the noise correction values for LST + NSB_match : bool + If real data are matched to pre-processed MCs or not + """ + + """ + Here we create the configuration file needed for transforming DL0 into DL1 + + Parameters + ---------- + ids : list + Telescope IDs + target_dir : path + Directory to store the results + noise_value : list + Extra noise in dim and bright pixels, Extra bias in dim pixels + """ + config_file = "../config.yaml" + with open( + config_file, "rb" + ) as fc: # "rb" mode opens the file in binary format for reading + config_dict = yaml.safe_load(fc) + LST_config = config_dict["LST"] + MAGIC_config = config_dict["MAGIC"] + + if not NSB_match: + LST_config["increase_nsb"]["extra_noise_in_dim_pixels"] = {noise_value[0]} + LST_config["increase_nsb"]["extra_bias_in_dim_pixels"] = {noise_value[2]} + LST_config["increase_nsb"]["extra_noise_in_bright_pixels"] = {noise_value[1]} + conf = {} + conf["LST"] = LST_config + + conf["MAGIC"] = MAGIC_config + + with open(f"{target_dir}/config_DL0_to_DL1.yaml", "w") as f: + lines = [ + "mc_tel_ids:", + f"\n LST-1: {ids[0]}", + f"\n LST-2: {ids[1]}", + f"\n LST-3: {ids[2]}", + f"\n LST-4: {ids[3]}", + f"\n MAGIC-I: {ids[4]}", + f"\n MAGIC-II: {ids[5]}", + "\n", + ] + f.writelines(lines) + yaml.dump(conf, f, default_flow_style=False) + + +def lists_and_bash_generator( + particle_type, target_dir, MC_path, SimTel_version, focal_length, env_name +): + + """ + This function creates the lists list_nodes_gamma_complete.txt and list_folder_gamma.txt with the MC file paths. + After that, it generates a few bash scripts to link the MC paths to each subdirectory. + These bash scripts will be called later in the main() function below. This step will be skipped in case the MC path has not been provided (MC_path='') + + Parameters + ---------- + particle_type : str + Particle type (e.g., protons) + target_dir : str + Directory to store the results + MC_path : str + Path to the MCs DL0s + SimTel_version : str + Version of SimTel (used to produce MCs) + focal_length : str + Focal length to be used to process MCs (e.g., 'nominal') + env_name : str + Name of the environment + """ + + if MC_path == "": + return + + process_name = target_dir.split("/")[-2:][1] + + list_of_nodes = glob.glob(f"{MC_path}/node*") + with open( + f"{target_dir}/list_nodes_{particle_type}_complete.txt", "w" + ) as f: # creating list_nodes_gammas_complete.txt + for i in list_of_nodes: + f.write(f"{i}/output_{SimTel_version}\n") + + with open( + f"{target_dir}/list_folder_{particle_type}.txt", "w" + ) as f: # creating list_folder_gammas.txt + for i in list_of_nodes: + f.write(f'{i.split("/")[-1]}\n') + + #################################################################################### + # bash scripts that link the MC paths to each subdirectory. + #################################################################################### + + with open(f"linking_MC_{particle_type}_paths.sh", "w") as f: + lines_of_config_file = [ + "#!/bin/sh\n\n", + "#SBATCH -p short\n", + f"#SBATCH -J {process_name}\n\n", + "#SBATCH -N 1\n\n", + "ulimit -l unlimited\n", + "ulimit -s unlimited\n", + "ulimit -a\n\n", + "while read -r -u 3 lineA && read -r -u 4 lineB\n", + "do\n", + f" cd {target_dir}/DL1/MC/{particle_type}\n", + " mkdir $lineB\n", + " cd $lineA\n", + " ls -lR *.gz |wc -l\n", + f" ls *.gz > {target_dir}/DL1/MC/{particle_type}/$lineB/list_dl0.txt\n", + ' string=$lineA"/"\n', + f" export file={target_dir}/DL1/MC/{particle_type}/$lineB/list_dl0.txt\n\n", + " cat $file | while read line; do echo $string${line}" + + f" >>{target_dir}/DL1/MC/{particle_type}/$lineB/list_dl0_ok.txt; done\n\n", + ' echo "folder $lineB and node $lineA"\n', + f'done 3<"{target_dir}/list_nodes_{particle_type}_complete.txt" 4<"{target_dir}/list_folder_{particle_type}.txt"\n', + "", + ] + f.writelines(lines_of_config_file) + + ################################################################################################################ + # bash script that applies lst1_magic_mc_dl0_to_dl1.py to all MC data files. + ################################################################################################################ + + number_of_nodes = glob.glob(f"{MC_path}/node*") + number_of_nodes = len(number_of_nodes) - 1 + + with open(f"linking_MC_{particle_type}_paths_r.sh", "w") as f: + lines_of_config_file = [ + "#!/bin/sh\n\n", + "#SBATCH -p xxl\n", + f"#SBATCH -J {process_name}\n", + f"#SBATCH --array=0-{number_of_nodes}%50\n", + "#SBATCH --mem=10g\n", + "#SBATCH -N 1\n\n", + "ulimit -l unlimited\n", + "ulimit -s unlimited\n", + "ulimit -a\n", + f"cd {target_dir}/DL1/MC/{particle_type}\n\n", + f"export INF={target_dir}\n", + f"SAMPLE_LIST=($(<$INF/list_folder_{particle_type}.txt))\n", + "SAMPLE=${SAMPLE_LIST[${SLURM_ARRAY_TASK_ID}]}\n", + "cd $SAMPLE\n\n", + f"export LOG={target_dir}/DL1/MC/{particle_type}" + + "/simtel_{$SAMPLE}_all.log\n", + "cat list_dl0_ok.txt | while read line\n", + "do\n", + f" cd {target_dir}/../\n", + f" conda run -n {env_name} lst1_magic_mc_dl0_to_dl1 --input-file $line --output-dir {target_dir}/DL1/MC/{particle_type}/$SAMPLE --config-file {target_dir}/config_DL0_to_DL1.yaml --focal_length_choice {focal_length}>>$LOG 2>&1\n\n", + "done\n", + "", + ] + f.writelines(lines_of_config_file) + + +def lists_and_bash_gen_MAGIC( + target_dir, telescope_ids, MAGIC_runs, source, env_name, NSB_match +): + + """ + Below we create a bash script that links the the MAGIC data paths to each subdirectory. + + Parameters + ---------- + target_dir : str + Directory to store the results + telescope_ids : list + List of the telescope IDs (set by the user) + MAGIC_runs : str + MAGIC dates and runs to be processed + source : str + Name of the target + env_name : str + Name of the environment + NSB_match : bool + If real data are matched to pre-processed MCs or not + """ + process_name = f'{target_dir.split("/")[-2:][1]}' + lines = [ + "#!/bin/sh\n\n", + "#SBATCH -p short\n", + f"#SBATCH -J {process_name}\n", + "#SBATCH -N 1\n\n", + "ulimit -l unlimited\n", + "ulimit -s unlimited\n", + "ulimit -a\n", + ] + with open(f"{source}_linking_MAGIC_data_paths.sh", "w") as f: + f.writelines(lines) + if NSB_match: + if (len(MAGIC_runs) == 2) and (len(MAGIC_runs[0]) == 10): + MAGIC = MAGIC_runs + + MAGIC_runs = [] + MAGIC_runs.append(MAGIC) + + for i in MAGIC_runs: + for p in range(len(ST_begin)): + if ( + time.strptime(i[0], "%Y_%m_%d") + >= time.strptime(ST_begin[p], "%Y_%m_%d") + ) and ( + time.strptime(i[0], "%Y_%m_%d") + <= time.strptime(ST_end[p], "%Y_%m_%d") + ): + if telescope_ids[-1] > 0: + lines = [ + f'export IN1=/fefs/onsite/common/MAGIC/data/M2/event/Calibrated/{i[0].split("_")[0]}/{i[0].split("_")[1]}/{i[0].split("_")[2]}\n', + f"export OUT1={target_dir}/v{__version__}/DL1/{ST_list[p]}/M2/{i[0]}/{i[1]}/logs \n", + f"ls $IN1/*{i[1][-2:]}.*_Y_*.root > $OUT1/list_dl0.txt\n", + ] + f.writelines(lines) + + f.write("\n") + if telescope_ids[-2] > 0: + lines = [ + f'export IN1=/fefs/onsite/common/MAGIC/data/M1/event/Calibrated/{i[0].split("_")[0]}/{i[0].split("_")[1]}/{i[0].split("_")[2]}\n', + f"export OUT1={target_dir}/v{__version__}/DL1/{ST_list[p]}/M1/{i[0]}/{i[1]}/logs \n", + f"ls $IN1/*{i[1][-2:]}.*_Y_*.root > $OUT1/list_dl0.txt\n", + ] + f.writelines(lines) + else: + if telescope_ids[-1] > 0: + for i in MAGIC_runs: + lines = [ + f'export IN1=/fefs/onsite/common/MAGIC/data/M2/event/Calibrated/{i[0].split("_")[0]}/{i[0].split("_")[1]}/{i[0].split("_")[2]}\n', + f"export OUT1={target_dir}/DL1/Observations/M2/{i[0]}/{i[1]}\n", + f"ls $IN1/*{i[1][-2:]}.*_Y_*.root > $OUT1/list_dl0.txt\n", + ] + f.writelines(lines) + f.write("\n") + if telescope_ids[-2] > 0: + for i in MAGIC_runs: + lines = [ + f'export IN1=/fefs/onsite/common/MAGIC/data/M1/event/Calibrated/{i[0].split("_")[0]}/{i[0].split("_")[1]}/{i[0].split("_")[2]}\n', + f"export OUT1={target_dir}/DL1/Observations/M1/{i[0]}/{i[1]}\n", + f"ls $IN1/*{i[1][-2:]}.*_Y_*.root > $OUT1/list_dl0.txt\n", + ] + f.writelines(lines) + if NSB_match: + + if (telescope_ids[-2] > 0) or (telescope_ids[-1] > 0): + for i in MAGIC_runs: + + for p in range(len(ST_begin)): + if ( + time.strptime(i[0], "%Y_%m_%d") + >= time.strptime(ST_begin[p], "%Y_%m_%d") + ) and ( + time.strptime(i[0], "%Y_%m_%d") + <= time.strptime(ST_end[p], "%Y_%m_%d") + ): + + if telescope_ids[-1] > 0: + number_of_nodes = glob.glob( + f'/fefs/onsite/common/MAGIC/data/M2/event/Calibrated/{i[0].split("_")[0]}/{i[0].split("_")[1]}/{i[0].split("_")[2]}/*{i[1]}.*_Y_*.root' + ) + number_of_nodes = len(number_of_nodes) - 1 + if number_of_nodes < 0: + continue + lines = [ + "#!/bin/sh\n\n", + "#SBATCH -p long\n", + f"#SBATCH -J {process_name}\n", + f"#SBATCH --array=0-{number_of_nodes}\n", + "#SBATCH -N 1\n\n", + "ulimit -l unlimited\n", + "ulimit -s unlimited\n", + "ulimit -a\n\n", + f"export OUTPUTDIR={target_dir}/v{__version__}/DL1/{ST_list[p]}/M2/{i[0]}/{i[1]}\n", + "SAMPLE_LIST=($(<$OUTPUTDIR/logs/list_dl0.txt))\n", + "SAMPLE=${SAMPLE_LIST[${SLURM_ARRAY_TASK_ID}]}\n\n", + "export LOG=$OUTPUTDIR/logs/real_0_1_task${SLURM_ARRAY_TASK_ID}.log\n", + f"time conda run -n {env_name} magic_calib_to_dl1 --input-file $SAMPLE --output-dir $OUTPUTDIR --config-file {target_dir}/config_DL0_to_DL1.yaml >$LOG 2>&1\n", + ] + with open( + f"{source}_MAGIC-II_dl0_to_dl1_run_{i[1]}.sh", "w" + ) as f: + f.writelines(lines) + + if telescope_ids[-2] > 0: + number_of_nodes = glob.glob( + f'/fefs/onsite/common/MAGIC/data/M1/event/Calibrated/{i[0].split("_")[0]}/{i[0].split("_")[1]}/{i[0].split("_")[2]}/*{i[1]}.*_Y_*.root' + ) + number_of_nodes = len(number_of_nodes) - 1 + if number_of_nodes < 0: + continue + lines = [ + "#!/bin/sh\n\n", + "#SBATCH -p long\n", + f"#SBATCH -J {process_name}\n", + f"#SBATCH --array=0-{number_of_nodes}\n", + "#SBATCH -N 1\n\n", + "ulimit -l unlimited\n", + "ulimit -s unlimited\n", + "ulimit -a\n\n", + f"export OUTPUTDIR={target_dir}/v{__version__}/DL1/{ST_list[p]}/M1/{i[0]}/{i[1]}\n", + "SAMPLE_LIST=($(<$OUTPUTDIR/logs/list_dl0.txt))\n", + "SAMPLE=${SAMPLE_LIST[${SLURM_ARRAY_TASK_ID}]}\n\n", + "export LOG=$OUTPUTDIR/logs/real_0_1_task${SLURM_ARRAY_TASK_ID}.log\n", + f"time conda run -n {env_name} magic_calib_to_dl1 --input-file $SAMPLE --output-dir $OUTPUTDIR --config-file {target_dir}/config_DL0_to_DL1.yaml >$LOG 2>&1\n", + ] + with open( + f"{source}_MAGIC-I_dl0_to_dl1_run_{i[1]}.sh", "w" + ) as f: + f.writelines(lines) + else: + if (telescope_ids[-2] > 0) or (telescope_ids[-1] > 0): + for i in MAGIC_runs: + if telescope_ids[-1] > 0: + number_of_nodes = glob.glob( + f'/fefs/onsite/common/MAGIC/data/M2/event/Calibrated/{i[0].split("_")[0]}/{i[0].split("_")[1]}/{i[0].split("_")[2]}/*{i[1]}.*_Y_*.root' + ) + number_of_nodes = len(number_of_nodes) - 1 + + with open(f"{source}_MAGIC-II_dl0_to_dl1_run_{i[1]}.sh", "w") as f: + lines = [ + "#!/bin/sh\n\n", + "#SBATCH -p long\n", + f"#SBATCH -J {process_name}\n", + f"#SBATCH --array=0-{number_of_nodes}\n", + "#SBATCH -N 1\n\n", + "ulimit -l unlimited\n", + "ulimit -s unlimited\n", + "ulimit -a\n\n", + f"export OUTPUTDIR={target_dir}/DL1/Observations/M2/{i[0]}/{i[1]}\n", + f"cd {target_dir}/../\n", + "SAMPLE_LIST=($(<$OUTPUTDIR/list_dl0.txt))\n", + "SAMPLE=${SAMPLE_LIST[${SLURM_ARRAY_TASK_ID}]}\n\n", + "export LOG=$OUTPUTDIR/real_0_1_task${SLURM_ARRAY_TASK_ID}.log\n", + f"conda run -n {env_name} magic_calib_to_dl1 --input-file $SAMPLE --output-dir $OUTPUTDIR --config-file {target_dir}/config_DL0_to_DL1.yaml >$LOG 2>&1\n", + "", + ] + f.writelines(lines) + + if telescope_ids[-2] > 0: + number_of_nodes = glob.glob( + f'/fefs/onsite/common/MAGIC/data/M1/event/Calibrated/{i[0].split("_")[0]}/{i[0].split("_")[1]}/{i[0].split("_")[2]}/*{i[1]}.*_Y_*.root' + ) + number_of_nodes = len(number_of_nodes) - 1 + + with open(f"{source}_MAGIC-I_dl0_to_dl1_run_{i[1]}.sh", "w") as f: + lines = [ + "#!/bin/sh\n\n", + "#SBATCH -p long\n", + f"#SBATCH -J {process_name}\n", + f"#SBATCH --array=0-{number_of_nodes}\n", + "#SBATCH -N 1\n\n", + "ulimit -l unlimited\n", + "ulimit -s unlimited\n", + "ulimit -a\n\n", + f"export OUTPUTDIR={target_dir}/DL1/Observations/M1/{i[0]}/{i[1]}\n", + f"cd {target_dir}/../\n", + "SAMPLE_LIST=($(<$OUTPUTDIR/list_dl0.txt))\n", + "SAMPLE=${SAMPLE_LIST[${SLURM_ARRAY_TASK_ID}]}\n\n", + "export LOG=$OUTPUTDIR/real_0_1_task${SLURM_ARRAY_TASK_ID}.log\n", + f"conda run -n {env_name} magic_calib_to_dl1 --input-file $SAMPLE --output-dir $OUTPUTDIR --config-file {target_dir}/config_DL0_to_DL1.yaml >$LOG 2>&1\n", + "", + ] + f.writelines(lines) + + +def directories_generator(target_dir, telescope_ids, MAGIC_runs, NSB_match): + + """ + Here we create all subdirectories for a given workspace and target name. + + Parameters + ---------- + target_dir : str + Directory to store the results + telescope_ids : list + List of the telescope IDs (set by the user) + MAGIC_runs : str + MAGIC dates and runs to be processed + NSB_match : bool + If real data are matched to pre-processed MCs or not + """ + + if NSB_match: + if not os.path.exists(target_dir): + os.mkdir(target_dir) + if not os.path.exists(f"{target_dir}/v{__version__}"): + os.mkdir(f"{target_dir}/v{__version__}") + if not os.path.exists(f"{target_dir}/v{__version__}/DL1"): + os.mkdir(f"{target_dir}/v{__version__}/DL1") + dl1_dir = str(f"{target_dir}/v{__version__}/DL1") + else: + if not os.path.exists(target_dir): + os.mkdir(target_dir) + os.mkdir(f"{target_dir}/DL1") + os.mkdir(f"{target_dir}/DL1/Observations") + os.mkdir(f"{target_dir}/DL1/MC") + os.mkdir(f"{target_dir}/DL1/MC/gammas") + os.mkdir(f"{target_dir}/DL1/MC/gammadiffuse") + os.mkdir(f"{target_dir}/DL1/MC/electrons") + os.mkdir(f"{target_dir}/DL1/MC/protons") + os.mkdir(f"{target_dir}/DL1/MC/helium") + else: + overwrite = input( + f'MC directory for {target_dir.split("/")[-1]} already exists. Would you like to overwrite it? [only "y" or "n"]: ' + ) + if overwrite == "y": + os.system(f"rm -r {target_dir}") + os.mkdir(target_dir) + os.mkdir(f"{target_dir}/DL1") + os.mkdir(f"{target_dir}/DL1/Observations") + os.mkdir(f"{target_dir}/DL1/MC") + os.mkdir(f"{target_dir}/DL1/MC/gammas") + os.mkdir(f"{target_dir}/DL1/MC/gammadiffuse") + os.mkdir(f"{target_dir}/DL1/MC/electrons") + os.mkdir(f"{target_dir}/DL1/MC/protons") + os.mkdir(f"{target_dir}/DL1/MC/helium") + else: + print("Directory not modified.") + + ########################################### + # MAGIC + ########################################### + if (len(MAGIC_runs) == 2) and (len(MAGIC_runs[0]) == 10): + MAGIC = MAGIC_runs + + MAGIC_runs = [] + MAGIC_runs.append(MAGIC) + if NSB_match: + for i in MAGIC_runs: + for p in range(len(ST_begin)): + if ( + time.strptime(i[0], "%Y_%m_%d") + >= time.strptime(ST_begin[p], "%Y_%m_%d") + ) and ( + time.strptime(i[0], "%Y_%m_%d") + <= time.strptime(ST_end[p], "%Y_%m_%d") + ): + if telescope_ids[-1] > 0: + if not os.path.exists(f"{dl1_dir}/{ST_list[p]}"): + os.mkdir(f"{dl1_dir}/{ST_list[p]}") + if not os.path.exists(f"{dl1_dir}/{ST_list[p]}/M2"): + os.mkdir(f"{dl1_dir}/{ST_list[p]}/M2") + if not os.path.exists(f"{dl1_dir}/{ST_list[p]}/M2/{i[0]}"): + os.mkdir(f"{dl1_dir}/{ST_list[p]}/M2/{i[0]}") + + if not os.path.exists( + f"{dl1_dir}/{ST_list[p]}/M2/{i[0]}/{i[1]}" + ): + os.mkdir(f"{dl1_dir}/{ST_list[p]}/M2/{i[0]}/{i[1]}") + if not os.path.exists( + f"{dl1_dir}/{ST_list[p]}/M2/{i[0]}/{i[1]}/logs" + ): + os.mkdir(f"{dl1_dir}/{ST_list[p]}/M2/{i[0]}/{i[1]}/logs") + if telescope_ids[-2] > 0: + if not os.path.exists(f"{dl1_dir}/{ST_list[p]}"): + os.mkdir(f"{dl1_dir}/{ST_list[p]}") + if not os.path.exists(f"{dl1_dir}/{ST_list[p]}/M1"): + os.mkdir(f"{dl1_dir}/{ST_list[p]}/M1") + if not os.path.exists(f"{dl1_dir}/{ST_list[p]}/M1/{i[0]}"): + os.mkdir(f"{dl1_dir}/{ST_list[p]}/M1/{i[0]}") + + if not os.path.exists( + f"{dl1_dir}/{ST_list[p]}/M1/{i[0]}/{i[1]}" + ): + os.mkdir(f"{dl1_dir}/{ST_list[p]}/M1/{i[0]}/{i[1]}") + if not os.path.exists( + f"{dl1_dir}/{ST_list[p]}/M1/{i[0]}/{i[1]}/logs" + ): + os.mkdir(f"{dl1_dir}/{ST_list[p]}/M1/{i[0]}/{i[1]}/logs") + else: + if telescope_ids[-1] > 0: + if not os.path.exists(f"{target_dir}/DL1/Observations/M2"): + os.mkdir(f"{target_dir}/DL1/Observations/M2") + for i in MAGIC_runs: + if not os.path.exists(f"{target_dir}/DL1/Observations/M2/{i[0]}"): + os.mkdir(f"{target_dir}/DL1/Observations/M2/{i[0]}") + os.mkdir(f"{target_dir}/DL1/Observations/M2/{i[0]}/{i[1]}") + else: + os.mkdir(f"{target_dir}/DL1/Observations/M2/{i[0]}/{i[1]}") + + if telescope_ids[-2] > 0: + if not os.path.exists(f"{target_dir}/DL1/Observations/M1"): + os.mkdir(f"{target_dir}/DL1/Observations/M1") + for i in MAGIC_runs: + if not os.path.exists(f"{target_dir}/DL1/Observations/M1/{i[0]}"): + os.mkdir(f"{target_dir}/DL1/Observations/M1/{i[0]}") + os.mkdir(f"{target_dir}/DL1/Observations/M1/{i[0]}/{i[1]}") + else: + os.mkdir(f"{target_dir}/DL1/Observations/M1/{i[0]}/{i[1]}") + + +def main(): + + """Here we read the config file and call the functions to generate the necessary directories, bash scripts and launching the jobs.""" + + # Here we are simply collecting the parameters from the command line, as input file, output directory, and configuration file + + parser = argparse.ArgumentParser() + parser.add_argument( + "--analysis-type", + "-t", + choices=["onlyMAGIC", "onlyMC"], + dest="analysis_type", + type=str, + default="doEverything", + help="You can type 'onlyMAGIC' or 'onlyMC' to run this script only on MAGIC or MC data, respectively.", + ) + + parser.add_argument( + "--config-file", + "-c", + dest="config_file", + type=str, + default="./config_general.yaml", + help="Path to a configuration file", + ) + + args = parser.parse_args() + with open( + args.config_file, "rb" + ) as f: # "rb" mode opens the file in binary format for reading + config = yaml.safe_load(f) + + telescope_ids = list(config["mc_tel_ids"].values()) + SimTel_version = config["general"]["SimTel_version"] + env_name = config["general"]["env_name"] + NSB_match = config["general"]["NSB_matching"] + MAGIC_runs_and_dates = config["general"]["MAGIC_runs"] + MAGIC_runs = np.genfromtxt( + MAGIC_runs_and_dates, dtype=str, delimiter="," + ) # READ LIST OF DATES AND RUNS: format table where each line is like "2020_11_19,5093174" + target_dir = str( + Path(config["directories"]["workspace_dir"]) + / config["directories"]["target_name"] + ) + LST_runs_and_dates = config["general"]["LST_runs"] + MC_gammas = str(Path(config["directories"]["MC_gammas"])) + MC_electrons = str(Path(config["directories"]["MC_electrons"])) + MC_helium = str(Path(config["directories"]["MC_helium"])) + MC_protons = str(Path(config["directories"]["MC_protons"])) + MC_gammadiff = str(Path(config["directories"]["MC_gammadiff"])) + focal_length = config["general"]["focal_length"] + source = config["directories"]["target_name"] + noise_value = [0, 0, 0] + if not NSB_match: + running, nsb = nsb_avg(source, args.config_file, LST_runs_and_dates) + if running != "y": + print("OK... The script was terminated by the user choice.") + return + noisebright = 1.15 * pow(nsb, 1.115) + biasdim = 0.358 * pow(nsb, 0.805) + noise_value = [nsb, noisebright, biasdim] + else: + collect_nsb(config) + + print("*** Reducing DL0 to DL1 data***") + print(f'Process name: {target_dir.split("/")[-2:][1]}') + print( + f'To check the jobs submitted to the cluster, type: squeue -n {target_dir.split("/")[-2:][1]}' + ) + + directories_generator( + target_dir, telescope_ids, MAGIC_runs, NSB_match + ) # Here we create all the necessary directories in the given workspace and collect the main directory of the target + config_file_gen(telescope_ids, target_dir, noise_value, NSB_match) + + if not NSB_match: + # Below we run the analysis on the MC data + if (args.analysis_type == "onlyMC") or (args.analysis_type == "doEverything"): + lists_and_bash_generator( + "gammas", target_dir, MC_gammas, SimTel_version, focal_length, env_name + ) # gammas + lists_and_bash_generator( + "electrons", + target_dir, + MC_electrons, + SimTel_version, + focal_length, + env_name, + ) # electrons + lists_and_bash_generator( + "helium", target_dir, MC_helium, SimTel_version, focal_length, env_name + ) # helium + lists_and_bash_generator( + "protons", + target_dir, + MC_protons, + SimTel_version, + focal_length, + env_name, + ) # protons + lists_and_bash_generator( + "gammadiffuse", + target_dir, + MC_gammadiff, + SimTel_version, + focal_length, + env_name, + ) # gammadiffuse + + # Here we do the MC DL0 to DL1 conversion: + list_of_MC = glob.glob("linking_MC_*s.sh") + + # os.system("RES=$(sbatch --parsable linking_MC_gammas_paths.sh) && sbatch --dependency=afterok:$RES MC_dl0_to_dl1.sh") + + for n, run in enumerate(list_of_MC): + if n == 0: + launch_jobs_MC = f"linking{n}=$(sbatch --parsable {run}) && running{n}=$(sbatch --parsable --dependency=afterany:$linking{n} {run[0:-3]}_r.sh)" + else: + launch_jobs_MC = f"{launch_jobs_MC} && linking{n}=$(sbatch --parsable {run}) && running{n}=$(sbatch --parsable --dependency=afterany:$linking{n} {run[0:-3]}_r.sh)" + + os.system(launch_jobs_MC) + + # Below we run the analysis on the MAGIC data + if ( + (args.analysis_type == "onlyMAGIC") + or (args.analysis_type == "doEverything") + or (NSB_match) + ): + lists_and_bash_gen_MAGIC( + target_dir, telescope_ids, MAGIC_runs, source, env_name, NSB_match + ) # MAGIC real data + if (telescope_ids[-2] > 0) or (telescope_ids[-1] > 0): + list_of_MAGIC_runs = glob.glob(f"{source}_MAGIC-*.sh") + if len(list_of_MAGIC_runs) < 1: + print( + "Warning: no bash script has been produced. Please check the provided MAGIC_runs.txt and the MAGIC calibrated data" + ) + return + + for n, run in enumerate(list_of_MAGIC_runs): + if n == 0: + launch_jobs = f"linking=$(sbatch --parsable {source}_linking_MAGIC_data_paths.sh) && RES{n}=$(sbatch --parsable --dependency=afterany:$linking {run})" + else: + launch_jobs = f"{launch_jobs} && RES{n}=$(sbatch --parsable --dependency=afterany:$linking {run})" + + os.system(launch_jobs) + + +if __name__ == "__main__": + main() diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py new file mode 100644 index 00000000..5557351d --- /dev/null +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py @@ -0,0 +1,364 @@ +""" +This scripts generates and runs the bashscripts +to compute the stereo parameters of DL1 MC and +Coincident MAGIC+LST data files. + +Usage: +$ python stereo_events.py (-c config.yaml) + +If you want to compute the stereo parameters only the real data or only the MC data, +you can do as follows: + +Only real data: +$ python stereo_events.py --analysis-type onlyReal (-c config.yaml) + +Only MC: +$ python stereo_events.py --analysis-type onlyMC (-c config.yaml) +""" + +import argparse +import glob +import logging +import os +from pathlib import Path + +import numpy as np +import yaml + +from magicctapipe import __version__ + +__all__ = ["configfile_stereo", "bash_stereo", "bash_stereoMC"] + +logger = logging.getLogger(__name__) +logger.addHandler(logging.StreamHandler()) +logger.setLevel(logging.INFO) + + +def configfile_stereo(ids, target_dir): + + """ + This function creates the configuration file needed for the event stereo step + + Parameters + ---------- + ids : list + List of telescope IDs + target_dir : str + Path to the working directory + """ + + with open(f"{target_dir}/config_stereo.yaml", "w") as f: + lines = [ + f"mc_tel_ids:\n LST-1: {ids[0]}\n LST-2: {ids[1]}\n LST-3: {ids[2]}\n LST-4: {ids[3]}\n MAGIC-I: {ids[4]}\n MAGIC-II: {ids[5]}\n\n", + 'stereo_reco:\n quality_cuts: "(intensity > 50) & (width > 0)"\n theta_uplim: "6 arcmin"\n', + ] + + f.writelines(lines) + + +def bash_stereo(target_dir, nsb, source, env_name, NSB_match): + + """ + This function generates the bashscript for running the stereo analysis. + + Parameters + ---------- + target_dir : str + Path to the working directory + nsb : float + NSB level of the LST run(s) + source : str + Target name + env_name : str + Name of the environment + NSB_match : bool + If real data are matched to pre-processed MCs or not + """ + + process_name = target_dir.split("/")[-2:][1] + if not NSB_match: + if not os.path.exists(f"{target_dir}/DL1/Observations/Coincident_stereo"): + os.mkdir(f"{target_dir}/DL1/Observations/Coincident_stereo") + + listOfNightsLST = np.sort( + glob.glob(f"{target_dir}/DL1/Observations/Coincident/*") + ) + + for nightLST in listOfNightsLST: + stereoDir = f"{target_dir}/DL1/Observations/Coincident_stereo/{nightLST.split('/')[-1]}" + if not os.path.exists(stereoDir): + os.mkdir(stereoDir) + + os.system( + f"ls {nightLST}/*LST*.h5 > {nightLST}/list_coin.txt" + ) # generating a list with the DL1 coincident data files. + process_size = ( + len(np.genfromtxt(f"{nightLST}/list_coin.txt", dtype="str")) - 1 + ) + + with open(f"StereoEvents_real_{nightLST.split('/')[-1]}.sh", "w") as f: + lines = [ + "#!/bin/sh\n\n", + "#SBATCH -p short\n", + f"#SBATCH -J {process_name}_stereo\n", + f"#SBATCH --array=0-{process_size}%100\n", + "#SBATCH -N 1\n\n", + "ulimit -l unlimited\n", + "ulimit -s unlimited\n", + "ulimit -a\n\n", + f"export INPUTDIR={nightLST}\n", + f"export OUTPUTDIR={stereoDir}\n", + "SAMPLE_LIST=($(<$INPUTDIR/list_coin.txt))\n", + "SAMPLE=${SAMPLE_LIST[${SLURM_ARRAY_TASK_ID}]}\n", + "export LOG=$OUTPUTDIR/stereo_${SLURM_ARRAY_TASK_ID}.log\n", + f"conda run -n {env_name} lst1_magic_stereo_reco --input-file $SAMPLE --output-dir $OUTPUTDIR --config-file {target_dir}/config_stereo.yaml >$LOG 2>&1", + ] + f.writelines(lines) + else: + if not os.path.exists(f"{target_dir}/v{__version__}/DL1CoincidentStereo"): + os.mkdir(f"{target_dir}/v{__version__}/DL1CoincidentStereo") + + ST_list = [ + os.path.basename(x) + for x in glob.glob(f"{target_dir}/v{__version__}/DL1Coincident/*") + ] + + for p in ST_list: + if not os.path.exists( + f"{target_dir}/v{__version__}/DL1CoincidentStereo/{p}" + ): + os.mkdir(f"{target_dir}/v{__version__}/DL1CoincidentStereo/{p}") + + if ( + not os.path.exists( + f"{target_dir}/v{__version__}/DL1CoincidentStereo/{p}/NSB{nsb}" + ) + ) and ( + os.path.exists( + f"{target_dir}/v{__version__}/DL1Coincident/{p}/NSB{nsb}" + ) + ): + os.mkdir( + f"{target_dir}/v{__version__}/DL1CoincidentStereo/{p}/NSB{nsb}" + ) + listOfNightsLST = np.sort( + glob.glob(f"{target_dir}/v{__version__}/DL1Coincident/{p}/NSB{nsb}/*") + ) + for nightLST in listOfNightsLST: + stereoDir = f'{target_dir}/v{__version__}/DL1CoincidentStereo/{p}/NSB{nsb}/{nightLST.split("/")[-1]}' + if not os.path.exists(stereoDir): + os.mkdir(stereoDir) + if not os.path.exists(f"{stereoDir}/logs"): + os.mkdir(f"{stereoDir}/logs") + if not os.listdir(f"{nightLST}"): + continue + if len(os.listdir(nightLST)) < 2: + continue + os.system( + f"ls {nightLST}/*LST*.h5 > {stereoDir}/logs/list_coin_{nsb}.txt" + ) # generating a list with the DL1 coincident data files. + process_size = ( + len( + np.genfromtxt( + f"{stereoDir}/logs/list_coin_{nsb}.txt", dtype="str" + ) + ) + - 1 + ) + if process_size < 0: + continue + lines = [ + "#!/bin/sh\n\n", + "#SBATCH -p short\n", + f"#SBATCH -J {process_name}_stereo_{nsb}\n", + f"#SBATCH --array=0-{process_size}\n", + "#SBATCH -N 1\n\n", + "ulimit -l unlimited\n", + "ulimit -s unlimited\n", + "ulimit -a\n\n", + f"export INPUTDIR={nightLST}\n", + f"export OUTPUTDIR={stereoDir}\n", + f"SAMPLE_LIST=($(<$OUTPUTDIR/logs/list_coin_{nsb}.txt))\n", + "SAMPLE=${SAMPLE_LIST[${SLURM_ARRAY_TASK_ID}]}\n", + "export LOG=$OUTPUTDIR/logs/stereo_${SLURM_ARRAY_TASK_ID}.log\n", + f"time conda run -n {env_name} lst1_magic_stereo_reco --input-file $SAMPLE --output-dir $OUTPUTDIR --config-file {target_dir}/config_stereo.yaml >$LOG 2>&1", + ] + with open( + f"{source}_StereoEvents_{nsb}_{nightLST.split('/')[-1]}.sh", "w" + ) as f: + f.writelines(lines) + + +def bash_stereoMC(target_dir, identification, env_name): + + """ + This function generates the bashscript for running the stereo analysis. + + Parameters + ---------- + target_dir : str + Path to the working directory + identification : str + Particle name. Options: protons, gammadiffuse + env_name : str + Name of the environment + """ + + process_name = target_dir.split("/")[-2:][1] + + if not os.path.exists(f"{target_dir}/DL1/MC/{identification}/Merged/StereoMerged"): + os.mkdir(f"{target_dir}/DL1/MC/{identification}/Merged/StereoMerged") + + inputdir = f"{target_dir}/DL1/MC/{identification}/Merged" + + os.system( + f"ls {inputdir}/dl1*.h5 > {inputdir}/list_coin.txt" + ) # generating a list with the DL1 coincident data files. + process_size = len(np.genfromtxt(f"{inputdir}/list_coin.txt", dtype="str")) - 1 + + with open(f"StereoEvents_MC_{identification}.sh", "w") as f: + lines = [ + "#!/bin/sh\n\n", + "#SBATCH -p xxl\n", + f"#SBATCH -J {process_name}_stereo\n", + f"#SBATCH --array=0-{process_size}%100\n", + "#SBATCH --mem=30g\n", + "#SBATCH -N 1\n\n", + "ulimit -l unlimited\n", + "ulimit -s unlimited\n", + "ulimit -a\n\n", + f"export INPUTDIR={inputdir}\n", + f"export OUTPUTDIR={inputdir}/StereoMerged\n", + "SAMPLE_LIST=($(<$INPUTDIR/list_coin.txt))\n", + "SAMPLE=${SAMPLE_LIST[${SLURM_ARRAY_TASK_ID}]}\n", + "export LOG=$OUTPUTDIR/stereo_${SLURM_ARRAY_TASK_ID}.log\n", + f"conda run -n {env_name} lst1_magic_stereo_reco --input-file $SAMPLE --output-dir $OUTPUTDIR --config-file {target_dir}/config_stereo.yaml >$LOG 2>&1", + ] + f.writelines(lines) + + +def main(): + + """ + Here we read the config_general.yaml file and call the functions defined above. + """ + + parser = argparse.ArgumentParser() + parser.add_argument( + "--config-file", + "-c", + dest="config_file", + type=str, + default="./config_general.yaml", + help="Path to a configuration file", + ) + + parser.add_argument( + "--analysis-type", + "-t", + choices=["onlyReal", "onlyMC"], + dest="analysis_type", + type=str, + default="doEverything", + help="You can type 'onlyReal' or 'onlyMC' to run this script only on real or MC data, respectively.", + ) + + args = parser.parse_args() + with open( + args.config_file, "rb" + ) as f: # "rb" mode opens the file in binary format for reading + config = yaml.safe_load(f) + + target_dir = f'{Path(config["directories"]["workspace_dir"])}/{config["directories"]["target_name"]}' + + env_name = config["general"]["env_name"] + source = config["directories"]["target_name"] + NSB_match = config["general"]["NSB_matching"] + telescope_ids = list(config["mc_tel_ids"].values()) + + print("***** Generating file config_stereo.yaml...") + print("***** This file can be found in ", target_dir) + configfile_stereo(telescope_ids, target_dir) + + # Below we run the analysis on the MC data + if ( + (args.analysis_type == "onlyMC") + or (args.analysis_type == "doEverything") + and not NSB_match + ): + print("***** Generating the bashscript for MCs...") + bash_stereoMC(target_dir, "gammadiffuse", env_name) + bash_stereoMC(target_dir, "gammas", env_name) + bash_stereoMC(target_dir, "protons", env_name) + bash_stereoMC(target_dir, "protons_test", env_name) + + list_of_stereo_scripts = np.sort(glob.glob("StereoEvents_MC_*.sh")) + + for n, run in enumerate(list_of_stereo_scripts): + if n == 0: + launch_jobs = f"stereo{n}=$(sbatch --parsable {run})" + else: + launch_jobs = f"{launch_jobs} && stereo{n}=$(sbatch --parsable --dependency=afterany:$stereo{n-1} {run})" + + os.system(launch_jobs) + + # Below we run the analysis on the real data + if not NSB_match: + nsb = 0 + if ( + (args.analysis_type == "onlyReal") + or (args.analysis_type == "doEverything") + or NSB_match + ): + print("***** Generating the bashscript for real data...") + bash_stereo(target_dir, nsb, source, env_name, NSB_match) + + list_of_stereo_scripts = np.sort(glob.glob("StereoEvents_real_*.sh")) + print("***** Submitting processes to the cluster...") + print(f"Process name: {target_dir.split('/')[-2:][1]}_stereo") + print( + f"To check the jobs submitted to the cluster, type: squeue -n {target_dir.split('/')[-2:][1]}_stereo" + ) + for n, run in enumerate(list_of_stereo_scripts): + if n == 0: + launch_jobs = f"stereo{n}=$(sbatch --parsable {run})" + else: + launch_jobs = f"{launch_jobs} && stereo{n}=$(sbatch --parsable --dependency=afterany:$stereo{n-1} {run})" + + os.system(launch_jobs) + + else: + listnsb = np.sort(glob.glob(f"{source}_LST_*_.txt")) + nsb = [] + for f in listnsb: + nsb.append(f.split("_")[-2]) + + for nsblvl in nsb: + print("***** Generating the bashscript...") + bash_stereo(target_dir, nsblvl, source, env_name, NSB_match) + + print("***** Submitting processess to the cluster...") + print(f'Process name: {target_dir.split("/")[-2:][1]}_stereo_{nsblvl}') + print( + f'To check the jobs submitted to the cluster, type: squeue -n {target_dir.split("/")[-2:][1]}_stereo_{nsblvl}' + ) + + # Below we run the bash scripts to find the stereo events + list_of_stereo_scripts = np.sort( + glob.glob(f"{source}_StereoEvents_{nsblvl}*.sh") + ) + if len(list_of_stereo_scripts) < 1: + continue + for n, run in enumerate(list_of_stereo_scripts): + if n == 0: + launch_jobs = f"stereo{n}=$(sbatch --parsable {run})" + else: + launch_jobs = ( + f"{launch_jobs} && stereo{n}=$(sbatch --parsable {run})" + ) + + os.system(launch_jobs) + + +if __name__ == "__main__": + main() From 9914806be7fb0019524e96717be979ea2b4a64af Mon Sep 17 00:00:00 2001 From: Elisa-Visentin Date: Mon, 25 Mar 2024 13:06:29 +0100 Subject: [PATCH 002/236] new scripts as console scripts --- setup.cfg | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/setup.cfg b/setup.cfg index 35260502..5f715816 100644 --- a/setup.cfg +++ b/setup.cfg @@ -92,6 +92,13 @@ console_scripts = lst1_magic_train_rfs = magicctapipe.scripts.lst1_magic.lst1_magic_train_rfs:main magic_calib_to_dl1 = magicctapipe.scripts.lst1_magic.magic_calib_to_dl1:main merge_hdf_files = magicctapipe.scripts.lst1_magic.merge_hdf_files:main + coincident_events = magicctapipe.scripts.lst1_magic.semi_automatic_scripts.coincident_events:main + list_from_h5 = magicctapipe.scripts.lst1_magic.semi_automatic_scripts.list_from_h5:main + LSTnsb = magicctapipe.scripts.lst1_magic.semi_automatic_scripts.LSTnsb:main + merging_runs_and_splitting_training_samples = magicctapipe.scripts.lst1_magic.semi_automatic_scripts.merging_runs_and_splitting_training_samples:main + nsb_level = magicctapipe.scripts.lst1_magic.semi_automatic_scripts.nsb_level:main + setting_up_config_and_dir = magicctapipe.scripts.lst1_magic.semi_automatic_scripts.setting_up_config_and_dir:main + stereo_events = magicctapipe.scripts.lst1_magic.semi_automatic_scripts.stereo_events:main [tool:pytest] minversion=3.0 From f0d63c70f968e0c56f9483b80db04c6e67f6a091 Mon Sep 17 00:00:00 2001 From: Raniere Date: Mon, 25 Mar 2024 14:58:48 +0100 Subject: [PATCH 003/236] Tutorial up to DL1 stereo --- magicctapipe/scripts/lst1_magic/README.md | 243 ++++++++++++++++++++++ 1 file changed, 243 insertions(+) create mode 100644 magicctapipe/scripts/lst1_magic/README.md diff --git a/magicctapipe/scripts/lst1_magic/README.md b/magicctapipe/scripts/lst1_magic/README.md new file mode 100644 index 00000000..5c93bf0f --- /dev/null +++ b/magicctapipe/scripts/lst1_magic/README.md @@ -0,0 +1,243 @@ +# Scripts for MAGIC+LST analysis + +This folder contains the scripts to perform MAGIC+LST analysis in a semi-automatic way. + +Each script can be called from the command line from anywhere in your system. Please run them with `-h` option for the first time to check what are the options available. + + +## Overview + + +MAGIC+LST analysis starts from MAGIC calibrated data (\_Y\_ files), LST data level 1 (DL1) data and SimTelArray DL0 data, and our goal is to achieve data level 3 (DL3). + +Behind the scenes, the semi-automatic scripts will run: +- `magic_calib_to_dl1` on real MAGIC data, to convert them into DL1 format. +- `lst1_magic_mc_dl0_to_dl1` over SimTelArray MCs to convert them into DL1 format. +- `merge_hdf_files.py` on MAGIC data to merge subruns and/or runs together. +- `lst1_magic_event_coincidence.py` to find coincident events between MAGIC and LST-1, starting from DL1 data. +- `lst1_magic_stereo_reco.py` to add stereo parameters to the DL1 data. +- `lst1_magic_train_rfs.py` to train the RFs (energy, direction, classification) on train gamma MCs and protons. +- `lst1_magic_dl1_stereo_to_dl2.py` to apply the RFs to stereo DL1 data (real and test MCs) and produce DL2 data. +- `lst1_magic_create_irf.py` to create the IRF. +- `lst1_magic_dl2_to_dl3.py` to create DL3 files, and `create_dl3_index_files.py` to create DL3 HDU and index files. + +From DL3 on, the analysis is done with gammapy. + +## Installation + +1) The very first step to reduce MAGIC-LST data is to have remote access/credentials to the IT Container, so provide one. Once you have it, the connection steps are the following: + +Authorized institute server (Client) → ssh connection to CTALaPalma → ssh connection to cp01/02. + +2) Once connected to the IT Container, install magic-cta-pipe (e.g. in your home directory in the IT Container) with the following commands: + +``` +git clone https://github.com/cta-observatory/magic-cta-pipe/tree/Torino_auto_MCP +cd magic-cta-pipe +conda env create -n magic-lst -f environment.yml +conda activate magic-lst +pip install . +``` + +## Analysis + +### DL0 to DL1 + +In this step, we will convert the MAGIC and Monte Carlo (MC) Data Level (DL) 0 to DL1 (our goal is to reach DL3). + +In your working IT Container directory (e.g. /fefs/aswg/workspace/yourname/yourprojectname), open the magic-lst environment with the command `conda activate magic-lst` and create the files `config_general.yaml`, `MAGIC_runs.txt` and `LST_runs.txt`. + +The file `config_general.yaml` must contain the telescope IDs and the directories with the MC data, as shown below: +``` +mc_tel_ids: + LST-1: 1 + LST-2: 0 # If the telescope ID is set to 0, this means that this telescope is not used in the analysis. + LST-3: 0 + LST-4: 0 + MAGIC-I: 2 + MAGIC-II: 3 + +directories: + workspace_dir : "/fefs/aswg/workspace/yourname/yourprojectname/" + target_name : "Crab" + MC_gammas : "/fefs/aswg/data/mc/DL0/LSTProd2/TestDataset/sim_telarray" + MC_electrons : "" + MC_helium : "" + MC_protons : "/fefs/aswg/data/mc/DL0/LSTProd2/TrainingDataset/Protons/dec_2276/sim_telarray" + MC_gammadiff : "/fefs/aswg/data/mc/DL0/LSTProd2/TrainingDataset/GammaDiffuse/dec_2276/sim_telarray/" + +general: + target_RA_deg : 83.629 # RA in degrees, the coordinates are useful only if the target name is not found in the catalogs. + target_Dec_deg: 22.015 # Dec in degrees + SimTel_version: "v1.4" + LST_version : "v0.9" + LST_tailcut : "tailcut84" + focal_length : "effective" + MAGIC_runs : "MAGIC_runs.txt" #If there is no MAGIC data, please fill this file with "0, 0" + LST_runs : "LST_runs.txt" + proton_train_fraction : 0.8 # 0.8 means that 80% of the DL1 protons will be used for training the Random Forest + nsb : [0.5, 1.0, 1.5, 2.0, 2.5, 3.0] # NSB = night sky background. This will be useful if NSB_matching = True + env_name : magic-lst + cluster : "SLURM" + NSB_matching : true + +``` + +The file `MAGIC_runs.txt` looks like that: +``` +2020_11_19,5093174 +2020_11_19,5093175 +2020_12_08,5093491 +2020_12_08,5093492 +2020_12_16,5093711 +2020_12_16,5093712 +2020_12_16,5093713 +2020_12_16,5093714 +2021_02_14,5094483 +2021_02_14,5094484 +2021_02_14,5094485 +2021_02_14,5094486 +2021_02_14,5094487 +2021_02_14,5094488 +2021_03_16,5095265 +2021_03_16,5095266 +2021_03_16,5095267 +2021_03_16,5095268 +2021_03_16,5095271 +2021_03_16,5095272 +2021_03_16,5095273 +2021_03_16,5095277 +2021_03_16,5095278 +2021_03_16,5095281 +2021_03_18,5095376 +2021_03_18,5095377 +2021_03_18,5095380 +2021_03_18,5095381 +2021_03_18,5095382 +2021_03_18,5095383 +``` + + +The columns here represent the night and run in which you want to select data. Please **do not add blank spaces** in the rows, as these names will be used to i) find the MAGIC data in the IT Container and ii) create the subdirectories in your working directory. If there is no MAGIC data, please fill this file with "0,0". Similarly, the `LST_runs.txt` file looks like this: + +``` +2020_11_18,2923 +2020_11_18,2924 +2020_12_07,3093 +2020_12_15,3265 +2020_12_15,3266 +2020_12_15,3267 +2020_12_15,3268 +2021_02_13,3631 +2021_02_13,3633 +2021_02_13,3634 +2021_02_13,3635 +2021_02_13,3636 +2021_03_15,4069 +2021_03_15,4070 +2021_03_15,4071 +2021_03_17,4125 +``` +Note that the LST nights appear as being one day before MAGIC's!!! This is because LST saves the date at the beginning of the night, while MAGIC saves it at the end. If there is no LST data, please fill this file with "0,0". These files are the only ones we need to modify in order to convert DL0 into DL1 data. + +In this analysis, we use a wobble of 0.4°. + +To convert the MAGIC and SimTelArray MCs data into DL1 format, you simply do: +> $ setting_up_config_and_dir -c config_general.yaml + +The output in the terminal will be something like this: +``` +***** Linking MC paths - this may take a few minutes ****** +*** Reducing DL0 to DL1 data - this can take many hours *** +Process name: yourprojectname_Crab +To check the jobs submitted to the cluster, type: squeue -n yourprojectname_Crab +``` + +The command `setting_up_config_and_dir` does a series of things: +- Creates a directory with your target name within the directory `yourprojectname` and several subdirectories inside it that are necessary for the rest of the data reduction. The main directories are: +``` +/fefs/aswg/workspace/yourname/yourprojectname/Crab/ +/fefs/aswg/workspace/yourname/yourprojectname/Crab/DL1 +/fefs/aswg/workspace/yourname/yourprojectname/Crab/DL1/[subdirectories] +``` +where [subdirectories] stands for several subdirectories containing the MC and MAGIC subruns in the DL1 format. +- Generates a configuration file called `config_step1.yaml` with telescope ID information and adopted imaging/cleaning cuts, and puts it in the directory `[...]/yourprojectname/Crab/` created in the previous step. +- Links the MAGIC and MC data addresses to their respective subdirectories defined in the previous steps. +- Runs the scripts `lst1_magic_mc_dl0_to_dl1.py` and `magic_calib_to_dl1.py` for each one of the linked data files. + + +You can check if this process is done with the following commands: +> $ squeue -n yourprojectname_Crab +or +> $ squeue -u your_user_name + +Once it is done, all of the subdirectories in `/fefs/aswg/workspace/yourname/yourprojectname/Crab/DL1/` will be filled with files of the type `dl1_[...]_LST1_MAGIC1_MAGIC2_runXXXXXX.h5` for the MCs and `dl1_MX.RunXXXXXX.0XX.h5` for the MAGIC runs. The next step of the conversion from DL0 to DL1 is to split the DL1 MC proton sample into "train" and "test" datasets (these will be used later in the Random Forest event classification and to do some diagnostic plots) and to merge all the MAGIC data files such that in the end, we have only one datafile per night. To do so, we run the following command (always in the dorectory `yourprojectname`): + +> $ merging_runs_and_splitting_training_samples + +The output in the terminal will be something like this: +``` +***** Splitting protons into 'train' and 'test' datasets... +***** Generating merge bashscripts... +***** Running merge_hdf_files.py in the MAGIC data files... +Process name: merging_Crab +To check the jobs submitted to the cluster, type: squeue -n merging_Crab +``` + +This script will slice the proton MC sample according to the entry "proton_train" in the "config_general.yaml" file, and then it will merge the MAGIC data files in the following order: +- MAGIC subruns are merged into single runs. +- MAGIC I and II runs are merged (only if both telescopes are used, of course). +- All runs in specific nights are merged, such that in the end we have only one datafile per night. +- Proton MC training data is merged. +- Proton MC testing data is merged. +- Diffuse MC gammas are merged. +- MC gammas are merged. + +### Coincident events and stereo parameters on DL1 + +To find coincident events between MAGIC and LST, starting from DL1 data, we run the following command in the working directory: + +> $ coincident_events + +This script creates the file config_coincidence.yaml containing the telescope IDs and the following parameters: +``` +event_coincidence: + timestamp_type_lst: "dragon_time" # select "dragon_time", "tib_time" or "ucts_time" + pre_offset_search: true + n_pre_offset_search_events: 100 + window_half_width: "300 ns" + time_offset: + start: "-10 us" + stop: "0 us +``` + +It then links the real LST data files to the output directory [...]DL1/Observations/Coincident, and runs the script lst1_magic_event_coincidence.py in all of them. + +Once it is done, we add stereo parameters to the MAGIC+LST coincident DL1 data by running: + +> $ stereo_events + +This script creates the file config_stereo.yaml with the follwoing parameters: +``` +stereo_reco: + quality_cuts: "(intensity > 50) & (width > 0)" + theta_uplim: "6 arcmin" +``` + +It then creates the output directories for the DL1 with stereo parameters [...]DL1/Observations/Coincident__stereo/SEVERALNIGHTS and [...]/DL1/MC/GAMMAorPROTON/Merged/StereoMerged, and then runs the script lst1_magic_stereo_reco.py in all of the coincident DL1 files. The stereo DL1 files for MC and real data are then saved in these directories. + +### Random forest and DL1 to DL2 + +TBD. + +### Instrument response function and DL3 + +TBD. + +## High-level analysis + +Since the DL3 may have only a few MBs, it is typically convenient to download it to your own computer at this point. It will be necessary to have astropy and gammapy (version > 0.20) installed before proceeding. + +We prepared a [Jupyter Notebook](https://github.com/ranieremenezes/magic-cta-pipe/blob/master/magicctapipe/scripts/lst1_magic/SED_and_LC_from_DL3.ipynb) that quickly creates a counts map, a significance curve, an SED, and a light curve. You can give it a try. + +The folder [Notebooks](https://github.com/cta-observatory/magic-cta-pipe/tree/master/notebooks) contains Jupyter notebooks to perform checks on the IRF, to produce theta2 plots and SEDs. Note that the notebooks run with gammapy v0.20 or higher, while the gammapy version adopted in the MAGIC+LST-1 pipeline is v0.19. From 634566e535a6235a1f76325c92ad082e11aa7548 Mon Sep 17 00:00:00 2001 From: Raniere Date: Mon, 25 Mar 2024 15:29:57 +0100 Subject: [PATCH 004/236] Update README.md --- magicctapipe/scripts/lst1_magic/README.md | 32 +++++++++-------------- 1 file changed, 13 insertions(+), 19 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/README.md b/magicctapipe/scripts/lst1_magic/README.md index 5c93bf0f..8d94e8f9 100644 --- a/magicctapipe/scripts/lst1_magic/README.md +++ b/magicctapipe/scripts/lst1_magic/README.md @@ -12,7 +12,6 @@ MAGIC+LST analysis starts from MAGIC calibrated data (\_Y\_ files), LST data le Behind the scenes, the semi-automatic scripts will run: - `magic_calib_to_dl1` on real MAGIC data, to convert them into DL1 format. -- `lst1_magic_mc_dl0_to_dl1` over SimTelArray MCs to convert them into DL1 format. - `merge_hdf_files.py` on MAGIC data to merge subruns and/or runs together. - `lst1_magic_event_coincidence.py` to find coincident events between MAGIC and LST-1, starting from DL1 data. - `lst1_magic_stereo_reco.py` to add stereo parameters to the DL1 data. @@ -140,14 +139,11 @@ The columns here represent the night and run in which you want to select data. P ``` Note that the LST nights appear as being one day before MAGIC's!!! This is because LST saves the date at the beginning of the night, while MAGIC saves it at the end. If there is no LST data, please fill this file with "0,0". These files are the only ones we need to modify in order to convert DL0 into DL1 data. -In this analysis, we use a wobble of 0.4°. - -To convert the MAGIC and SimTelArray MCs data into DL1 format, you simply do: +To convert the MAGIC data into DL1 format, you simply do: > $ setting_up_config_and_dir -c config_general.yaml The output in the terminal will be something like this: ``` -***** Linking MC paths - this may take a few minutes ****** *** Reducing DL0 to DL1 data - this can take many hours *** Process name: yourprojectname_Crab To check the jobs submitted to the cluster, type: squeue -n yourprojectname_Crab @@ -160,38 +156,36 @@ The command `setting_up_config_and_dir` does a series of things: /fefs/aswg/workspace/yourname/yourprojectname/Crab/DL1 /fefs/aswg/workspace/yourname/yourprojectname/Crab/DL1/[subdirectories] ``` -where [subdirectories] stands for several subdirectories containing the MC and MAGIC subruns in the DL1 format. +where [subdirectories] stands for several subdirectories containing the MAGIC subruns in the DL1 format. - Generates a configuration file called `config_step1.yaml` with telescope ID information and adopted imaging/cleaning cuts, and puts it in the directory `[...]/yourprojectname/Crab/` created in the previous step. -- Links the MAGIC and MC data addresses to their respective subdirectories defined in the previous steps. -- Runs the scripts `lst1_magic_mc_dl0_to_dl1.py` and `magic_calib_to_dl1.py` for each one of the linked data files. +- Links the MAGIC data addresses to their respective subdirectories defined in the previous steps. +- Runs the script `magic_calib_to_dl1.py` for each one of the linked data files. You can check if this process is done with the following commands: + > $ squeue -n yourprojectname_Crab + or + > $ squeue -u your_user_name -Once it is done, all of the subdirectories in `/fefs/aswg/workspace/yourname/yourprojectname/Crab/DL1/` will be filled with files of the type `dl1_[...]_LST1_MAGIC1_MAGIC2_runXXXXXX.h5` for the MCs and `dl1_MX.RunXXXXXX.0XX.h5` for the MAGIC runs. The next step of the conversion from DL0 to DL1 is to split the DL1 MC proton sample into "train" and "test" datasets (these will be used later in the Random Forest event classification and to do some diagnostic plots) and to merge all the MAGIC data files such that in the end, we have only one datafile per night. To do so, we run the following command (always in the dorectory `yourprojectname`): +Once it is done, all of the subdirectories in `/fefs/aswg/workspace/yourname/yourprojectname/Crab/DL1/` will be filled with files of the type `dl1_MX.RunXXXXXX.0XX.h5` for each MAGIC subrun. The next step of the conversion from DL0 to DL1 is to merge all the MAGIC data files such that in the end, we have only one datafile per night. To do so, we run the following command (always in the directory `yourprojectname`): > $ merging_runs_and_splitting_training_samples The output in the terminal will be something like this: ``` -***** Splitting protons into 'train' and 'test' datasets... ***** Generating merge bashscripts... ***** Running merge_hdf_files.py in the MAGIC data files... Process name: merging_Crab To check the jobs submitted to the cluster, type: squeue -n merging_Crab ``` -This script will slice the proton MC sample according to the entry "proton_train" in the "config_general.yaml" file, and then it will merge the MAGIC data files in the following order: +This script will merge the MAGIC data files in the following order: - MAGIC subruns are merged into single runs. - MAGIC I and II runs are merged (only if both telescopes are used, of course). - All runs in specific nights are merged, such that in the end we have only one datafile per night. -- Proton MC training data is merged. -- Proton MC testing data is merged. -- Diffuse MC gammas are merged. -- MC gammas are merged. ### Coincident events and stereo parameters on DL1 @@ -211,20 +205,20 @@ event_coincidence: stop: "0 us ``` -It then links the real LST data files to the output directory [...]DL1/Observations/Coincident, and runs the script lst1_magic_event_coincidence.py in all of them. +It then links the LST data files to the output directory [...]DL1/Observations/Coincident, and runs the script lst1_magic_event_coincidence.py in all of them. -Once it is done, we add stereo parameters to the MAGIC+LST coincident DL1 data by running: +Once it is done, we add stereo parameters to the MAGIC+LST coincident DL1 files by running: > $ stereo_events -This script creates the file config_stereo.yaml with the follwoing parameters: +This script creates the file config_stereo.yaml with the following parameters: ``` stereo_reco: quality_cuts: "(intensity > 50) & (width > 0)" theta_uplim: "6 arcmin" ``` -It then creates the output directories for the DL1 with stereo parameters [...]DL1/Observations/Coincident__stereo/SEVERALNIGHTS and [...]/DL1/MC/GAMMAorPROTON/Merged/StereoMerged, and then runs the script lst1_magic_stereo_reco.py in all of the coincident DL1 files. The stereo DL1 files for MC and real data are then saved in these directories. +It then creates the output directories for the DL1 with stereo parameters [...]DL1/Observations/Coincident__stereo/SEVERALNIGHTS, and then runs the script lst1_magic_stereo_reco.py in all of the coincident DL1 files. The stereo DL1 files are then saved in these directories. ### Random forest and DL1 to DL2 From dfef975990e7b6ebc44dfc06349ba48e63b90292 Mon Sep 17 00:00:00 2001 From: Elisa-Visentin <121040436+Elisa-Visentin@users.noreply.github.com> Date: Mon, 25 Mar 2024 15:48:47 +0100 Subject: [PATCH 005/236] change name --- ...ing_runs_and_splitting_training_samples.py => merging_runs.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename magicctapipe/scripts/lst1_magic/semi_automatic_scripts/{merging_runs_and_splitting_training_samples.py => merging_runs.py} (100%) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs_and_splitting_training_samples.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py similarity index 100% rename from magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs_and_splitting_training_samples.py rename to magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py From 32b9b3dd560afd282b0702b7f283afa206581094 Mon Sep 17 00:00:00 2001 From: Elisa-Visentin <121040436+Elisa-Visentin@users.noreply.github.com> Date: Mon, 25 Mar 2024 15:49:28 +0100 Subject: [PATCH 006/236] Update setup.cfg --- setup.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index 5f715816..baa342d8 100644 --- a/setup.cfg +++ b/setup.cfg @@ -95,7 +95,7 @@ console_scripts = coincident_events = magicctapipe.scripts.lst1_magic.semi_automatic_scripts.coincident_events:main list_from_h5 = magicctapipe.scripts.lst1_magic.semi_automatic_scripts.list_from_h5:main LSTnsb = magicctapipe.scripts.lst1_magic.semi_automatic_scripts.LSTnsb:main - merging_runs_and_splitting_training_samples = magicctapipe.scripts.lst1_magic.semi_automatic_scripts.merging_runs_and_splitting_training_samples:main + merging_runs = magicctapipe.scripts.lst1_magic.semi_automatic_scripts.merging_runs:main nsb_level = magicctapipe.scripts.lst1_magic.semi_automatic_scripts.nsb_level:main setting_up_config_and_dir = magicctapipe.scripts.lst1_magic.semi_automatic_scripts.setting_up_config_and_dir:main stereo_events = magicctapipe.scripts.lst1_magic.semi_automatic_scripts.stereo_events:main From 1db960ac852381ef94bdf90e4f843e23bea8f6be Mon Sep 17 00:00:00 2001 From: Elisa-Visentin <121040436+Elisa-Visentin@users.noreply.github.com> Date: Mon, 25 Mar 2024 15:59:18 +0100 Subject: [PATCH 007/236] Update README.md --- magicctapipe/scripts/lst1_magic/README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/README.md b/magicctapipe/scripts/lst1_magic/README.md index 8d94e8f9..249490b7 100644 --- a/magicctapipe/scripts/lst1_magic/README.md +++ b/magicctapipe/scripts/lst1_magic/README.md @@ -140,7 +140,7 @@ The columns here represent the night and run in which you want to select data. P Note that the LST nights appear as being one day before MAGIC's!!! This is because LST saves the date at the beginning of the night, while MAGIC saves it at the end. If there is no LST data, please fill this file with "0,0". These files are the only ones we need to modify in order to convert DL0 into DL1 data. To convert the MAGIC data into DL1 format, you simply do: -> $ setting_up_config_and_dir -c config_general.yaml +> $ setting_up_config_and_dir (-c config_general.yaml) The output in the terminal will be something like this: ``` @@ -172,7 +172,7 @@ or Once it is done, all of the subdirectories in `/fefs/aswg/workspace/yourname/yourprojectname/Crab/DL1/` will be filled with files of the type `dl1_MX.RunXXXXXX.0XX.h5` for each MAGIC subrun. The next step of the conversion from DL0 to DL1 is to merge all the MAGIC data files such that in the end, we have only one datafile per night. To do so, we run the following command (always in the directory `yourprojectname`): -> $ merging_runs_and_splitting_training_samples +> $ merging_runs (-c config_general.yaml) The output in the terminal will be something like this: ``` @@ -191,7 +191,7 @@ This script will merge the MAGIC data files in the following order: To find coincident events between MAGIC and LST, starting from DL1 data, we run the following command in the working directory: -> $ coincident_events +> $ coincident_events (-c config_general.yaml) This script creates the file config_coincidence.yaml containing the telescope IDs and the following parameters: ``` @@ -209,7 +209,7 @@ It then links the LST data files to the output directory [...]DL1/Observations/C Once it is done, we add stereo parameters to the MAGIC+LST coincident DL1 files by running: -> $ stereo_events +> $ stereo_events (-c config_general.yaml) This script creates the file config_stereo.yaml with the following parameters: ``` From 55b4cb81deb0617a17e37333b6a63d278f6ebf7b Mon Sep 17 00:00:00 2001 From: Raniere Date: Tue, 26 Mar 2024 11:17:16 +0100 Subject: [PATCH 008/236] Update README.md --- magicctapipe/scripts/lst1_magic/README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/magicctapipe/scripts/lst1_magic/README.md b/magicctapipe/scripts/lst1_magic/README.md index 249490b7..ac5a4cfc 100644 --- a/magicctapipe/scripts/lst1_magic/README.md +++ b/magicctapipe/scripts/lst1_magic/README.md @@ -140,7 +140,7 @@ The columns here represent the night and run in which you want to select data. P Note that the LST nights appear as being one day before MAGIC's!!! This is because LST saves the date at the beginning of the night, while MAGIC saves it at the end. If there is no LST data, please fill this file with "0,0". These files are the only ones we need to modify in order to convert DL0 into DL1 data. To convert the MAGIC data into DL1 format, you simply do: -> $ setting_up_config_and_dir (-c config_general.yaml) +> $ setting_up_config_and_dir -c config_general.yaml The output in the terminal will be something like this: ``` @@ -174,6 +174,7 @@ Once it is done, all of the subdirectories in `/fefs/aswg/workspace/yourname/you > $ merging_runs (-c config_general.yaml) +**The command inside parenthesis is not mandatory**. By the way, it is better if you don't use it unless you know what you are doing. The output in the terminal will be something like this: ``` ***** Generating merge bashscripts... From f014e520616039116cec72c9ac61eb7c49be972d Mon Sep 17 00:00:00 2001 From: Raniere Date: Thu, 4 Apr 2024 15:58:33 +0100 Subject: [PATCH 009/236] Update README.md --- magicctapipe/scripts/lst1_magic/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/README.md b/magicctapipe/scripts/lst1_magic/README.md index ac5a4cfc..d138c8a8 100644 --- a/magicctapipe/scripts/lst1_magic/README.md +++ b/magicctapipe/scripts/lst1_magic/README.md @@ -28,10 +28,10 @@ From DL3 on, the analysis is done with gammapy. Authorized institute server (Client) → ssh connection to CTALaPalma → ssh connection to cp01/02. -2) Once connected to the IT Container, install magic-cta-pipe (e.g. in your home directory in the IT Container) with the following commands: +2) Once connected to the IT Container, install magic-cta-pipe (e.g. in your home directory in the IT Container) with the following commands (if you have mamba installed, we recommend yo uuse it instead of conda. The installation process will be much faster.): ``` -git clone https://github.com/cta-observatory/magic-cta-pipe/tree/Torino_auto_MCP +git clone -b Torino_auto_MCP https://github.com/cta-observatory/magic-cta-pipe.git cd magic-cta-pipe conda env create -n magic-lst -f environment.yml conda activate magic-lst From f671519ab0d4c69bd04b62e63b6551aa9a90d6a2 Mon Sep 17 00:00:00 2001 From: Raniere Date: Thu, 4 Apr 2024 16:17:22 +0100 Subject: [PATCH 010/236] README - fixing installation From 92b2973837c44933c0e0c14b0eb07c840591e22a Mon Sep 17 00:00:00 2001 From: Raniere Date: Thu, 4 Apr 2024 16:27:14 +0100 Subject: [PATCH 011/236] Fixing problem when importing merging_runs --- .../scripts/lst1_magic/semi_automatic_scripts/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/__init__.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/__init__.py index 42428cb2..d2286daf 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/__init__.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/__init__.py @@ -1,6 +1,6 @@ from .coincident_events import configfile_coincidence, linking_bash_lst from .LSTnsb import nsb -from .merging_runs_and_splitting_training_samples import ( +from .merging_runs import ( cleaning, merge, mergeMC, From 86cf6420c336abe977e2eccb013772019d5176db Mon Sep 17 00:00:00 2001 From: Elisa-Visentin Date: Mon, 8 Apr 2024 08:32:05 +0000 Subject: [PATCH 012/236] minor fixes + get config files (MCP + lstchain) --- .../{scripts/lst1_magic => resources}/config.yaml | 0 .../lst1_magic/semi_automatic_scripts/LSTnsb.py | 11 +++++++++-- .../lst1_magic/semi_automatic_scripts/nsb_level.py | 6 +++--- .../setting_up_config_and_dir.py | 12 ++++++++---- setup.cfg | 3 +++ 5 files changed, 23 insertions(+), 9 deletions(-) rename magicctapipe/{scripts/lst1_magic => resources}/config.yaml (100%) diff --git a/magicctapipe/scripts/lst1_magic/config.yaml b/magicctapipe/resources/config.yaml similarity index 100% rename from magicctapipe/scripts/lst1_magic/config.yaml rename to magicctapipe/resources/config.yaml diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/LSTnsb.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/LSTnsb.py index 51807714..4676e3fc 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/LSTnsb.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/LSTnsb.py @@ -10,6 +10,11 @@ import yaml from lstchain.image.modifier import calculate_noise_parameters +try: + from importlib.resources import files +except ImportError: + from importlib_resources import files + __all__ = ["nsb"] logger = logging.getLogger(__name__) @@ -119,8 +124,10 @@ def main(): width.append(0.25) nsb_limit = [a + b for a, b in zip(nsb_list[:], width[:])] nsb_limit.insert(0, 0) - - lst_config = "lstchain_standard_config.json" + conda_path= os.environ['CONDA_PREFIX'] + + lst_config = str(conda_path)+"/lib/python3.11/site-packages/lstchain/data/lstchain_standard_config.json" + if NSB_match: LST_files = np.sort(glob.glob(f"{source}_LST_[0-9]*_{run_number}.txt")) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/nsb_level.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/nsb_level.py index 04514efb..12583bd4 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/nsb_level.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/nsb_level.py @@ -39,7 +39,7 @@ def bash_scripts(run, date, config, source, env_name): lines = [ "#!/bin/sh\n\n", - "#SBATCH -p long\n", + "#SBATCH -p short,long\n", "#SBATCH -J nsb\n", "#SBATCH -N 1\n\n", "ulimit -l unlimited\n", @@ -75,7 +75,7 @@ def main(): source = config["directories"]["target_name"] lst_runs_filename = config["general"]["LST_runs"] env_name = config["general"]["env_name"] - + with open(str(lst_runs_filename), "r") as LSTfile: run_list = LSTfile.readlines() print("***** Generating bashscripts...") @@ -101,7 +101,7 @@ def main(): # print(launch_jobs) os.system(launch_jobs) - + if __name__ == "__main__": main() diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py index 56096618..b6f4fac7 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py @@ -25,6 +25,9 @@ from magicctapipe import __version__ +from magicctapipe.io import resource_file + + __all__ = [ "nsb_avg", "collect_nsb", @@ -183,7 +186,8 @@ def config_file_gen(ids, target_dir, noise_value, NSB_match): noise_value : list Extra noise in dim and bright pixels, Extra bias in dim pixels """ - config_file = "../config.yaml" + + config_file = resource_file("config.yaml") with open( config_file, "rb" ) as fc: # "rb" mode opens the file in binary format for reading @@ -192,9 +196,9 @@ def config_file_gen(ids, target_dir, noise_value, NSB_match): MAGIC_config = config_dict["MAGIC"] if not NSB_match: - LST_config["increase_nsb"]["extra_noise_in_dim_pixels"] = {noise_value[0]} - LST_config["increase_nsb"]["extra_bias_in_dim_pixels"] = {noise_value[2]} - LST_config["increase_nsb"]["extra_noise_in_bright_pixels"] = {noise_value[1]} + LST_config["increase_nsb"]["extra_noise_in_dim_pixels"] = noise_value[0] + LST_config["increase_nsb"]["extra_bias_in_dim_pixels"] = noise_value[2] + LST_config["increase_nsb"]["extra_noise_in_bright_pixels"] = noise_value[1] conf = {} conf["LST"] = LST_config diff --git a/setup.cfg b/setup.cfg index baa342d8..e0fb4723 100644 --- a/setup.cfg +++ b/setup.cfg @@ -80,6 +80,9 @@ all = %(docs)s %(dev)s +[options.package_data] +* = resources/* + [options.entry_points] console_scripts = create_dl3_index_files = magicctapipe.scripts.lst1_magic.create_dl3_index_files:main From 8a6f5b2fbdf70f3791bfe8ecfe3cc19b825f4412 Mon Sep 17 00:00:00 2001 From: Elisa-Visentin Date: Mon, 8 Apr 2024 08:37:02 +0000 Subject: [PATCH 013/236] pre-commit --- .../lst1_magic/semi_automatic_scripts/LSTnsb.py | 16 +++++++--------- .../semi_automatic_scripts/__init__.py | 7 +------ .../semi_automatic_scripts/nsb_level.py | 4 ++-- .../setting_up_config_and_dir.py | 2 -- 4 files changed, 10 insertions(+), 19 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/LSTnsb.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/LSTnsb.py index 4676e3fc..80e505a4 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/LSTnsb.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/LSTnsb.py @@ -10,11 +10,6 @@ import yaml from lstchain.image.modifier import calculate_noise_parameters -try: - from importlib.resources import files -except ImportError: - from importlib_resources import files - __all__ = ["nsb"] logger = logging.getLogger(__name__) @@ -124,10 +119,13 @@ def main(): width.append(0.25) nsb_limit = [a + b for a, b in zip(nsb_list[:], width[:])] nsb_limit.insert(0, 0) - conda_path= os.environ['CONDA_PREFIX'] - - lst_config = str(conda_path)+"/lib/python3.11/site-packages/lstchain/data/lstchain_standard_config.json" - + conda_path = os.environ["CONDA_PREFIX"] + + lst_config = ( + str(conda_path) + + "/lib/python3.11/site-packages/lstchain/data/lstchain_standard_config.json" + ) + if NSB_match: LST_files = np.sort(glob.glob(f"{source}_LST_[0-9]*_{run_number}.txt")) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/__init__.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/__init__.py index d2286daf..2c0cfbb6 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/__init__.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/__init__.py @@ -1,11 +1,6 @@ from .coincident_events import configfile_coincidence, linking_bash_lst from .LSTnsb import nsb -from .merging_runs import ( - cleaning, - merge, - mergeMC, - split_train_test, -) +from .merging_runs import cleaning, merge, mergeMC, split_train_test from .nsb_level import bash_scripts from .setting_up_config_and_dir import ( collect_nsb, diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/nsb_level.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/nsb_level.py index 12583bd4..a9211370 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/nsb_level.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/nsb_level.py @@ -75,7 +75,7 @@ def main(): source = config["directories"]["target_name"] lst_runs_filename = config["general"]["LST_runs"] env_name = config["general"]["env_name"] - + with open(str(lst_runs_filename), "r") as LSTfile: run_list = LSTfile.readlines() print("***** Generating bashscripts...") @@ -101,7 +101,7 @@ def main(): # print(launch_jobs) os.system(launch_jobs) - + if __name__ == "__main__": main() diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py index b6f4fac7..2472904b 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py @@ -24,10 +24,8 @@ import yaml from magicctapipe import __version__ - from magicctapipe.io import resource_file - __all__ = [ "nsb_avg", "collect_nsb", From 23616ee24df1f1daad298f504ac6fae678cb9984 Mon Sep 17 00:00:00 2001 From: Raniere Date: Mon, 8 Apr 2024 23:10:26 +0100 Subject: [PATCH 014/236] Update setting_up_config_and_dir.py --- .../semi_automatic_scripts/setting_up_config_and_dir.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py index 2472904b..d4196796 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py @@ -432,7 +432,7 @@ def lists_and_bash_gen_MAGIC( continue lines = [ "#!/bin/sh\n\n", - "#SBATCH -p long\n", + "#SBATCH -p short\n", f"#SBATCH -J {process_name}\n", f"#SBATCH --array=0-{number_of_nodes}\n", "#SBATCH -N 1\n\n", @@ -459,7 +459,7 @@ def lists_and_bash_gen_MAGIC( continue lines = [ "#!/bin/sh\n\n", - "#SBATCH -p long\n", + "#SBATCH -p short\n", f"#SBATCH -J {process_name}\n", f"#SBATCH --array=0-{number_of_nodes}\n", "#SBATCH -N 1\n\n", @@ -722,11 +722,12 @@ def main(): else: collect_nsb(config) - print("*** Reducing DL0 to DL1 data***") + print("*** Converting DL0 into DL1 data ***") print(f'Process name: {target_dir.split("/")[-2:][1]}') print( f'To check the jobs submitted to the cluster, type: squeue -n {target_dir.split("/")[-2:][1]}' ) + print("This process will take about 10 min to run if the IT cluster is free.") directories_generator( target_dir, telescope_ids, MAGIC_runs, NSB_match From 83706b1a5e844ad21f175a2f7d9af1c410015c47 Mon Sep 17 00:00:00 2001 From: Raniere Date: Mon, 8 Apr 2024 23:12:18 +0100 Subject: [PATCH 015/236] Update merging_runs.py --- .../scripts/lst1_magic/semi_automatic_scripts/merging_runs.py | 1 + 1 file changed, 1 insertion(+) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py index 2b0da5af..388a8f69 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py @@ -531,6 +531,7 @@ def main(): print( f"To check the jobs submitted to the cluster, type: squeue -n merging_{target_dir.split('/')[-2:][1]}" ) + print("This process will take about 10 to 30 min to run.") if __name__ == "__main__": From 146749f42d07a7fb18629d02a5423bff8dfe7c31 Mon Sep 17 00:00:00 2001 From: Raniere Date: Mon, 8 Apr 2024 23:50:03 +0100 Subject: [PATCH 016/236] Update coincident_events.py --- .../lst1_magic/semi_automatic_scripts/coincident_events.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py index 01d50c0f..94d4fb17 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py @@ -150,9 +150,9 @@ def linking_bash_lst( list_of_subruns = np.sort( glob.glob(f"{inputdir}/dl1*Run*{i[1]}*.*.h5") ) - if os.path.exists(f"{outputdir}/logs/list_LST.txt"): + if os.path.exists(f"{outputdir}/logs/list_LST"): with open( - f"{outputdir}/logs/list_LST.txt", "a" + f"{outputdir}/logs/list_LST", "a" ) as LSTdataPathFile: for subrun in list_of_subruns: LSTdataPathFile.write( @@ -303,7 +303,7 @@ def main(): print("***** Generating the bashscript...") linking_bash_lst( target_dir, LST_runs, nsblvl, date, source, LST_version, env_name, NSB_match - ) # linking the data paths to current working directory + ) # linking the data paths to the current working directory print("***** Submitting processess to the cluster...") print(f"Process name: {target_dir.split('/')[-2:][1]}_coincidence") From b2483297421c25841a60bb4f6ffbac29ae9e4087 Mon Sep 17 00:00:00 2001 From: Raniere Date: Mon, 8 Apr 2024 23:50:15 +0100 Subject: [PATCH 017/236] Update README.md --- magicctapipe/scripts/lst1_magic/README.md | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/README.md b/magicctapipe/scripts/lst1_magic/README.md index d138c8a8..2e9841d8 100644 --- a/magicctapipe/scripts/lst1_magic/README.md +++ b/magicctapipe/scripts/lst1_magic/README.md @@ -144,20 +144,21 @@ To convert the MAGIC data into DL1 format, you simply do: The output in the terminal will be something like this: ``` -*** Reducing DL0 to DL1 data - this can take many hours *** -Process name: yourprojectname_Crab -To check the jobs submitted to the cluster, type: squeue -n yourprojectname_Crab +*** Converting DL0 into DL1 data *** +Process name: yourprojectname +To check the jobs submitted to the cluster, type: squeue -n yourprojectname +This process will take about 10 min to run if the IT cluster is free. ``` The command `setting_up_config_and_dir` does a series of things: - Creates a directory with your target name within the directory `yourprojectname` and several subdirectories inside it that are necessary for the rest of the data reduction. The main directories are: ``` /fefs/aswg/workspace/yourname/yourprojectname/Crab/ -/fefs/aswg/workspace/yourname/yourprojectname/Crab/DL1 -/fefs/aswg/workspace/yourname/yourprojectname/Crab/DL1/[subdirectories] +/fefs/aswg/workspace/yourname/yourprojectname/Crab/VERSION/DL1 +/fefs/aswg/workspace/yourname/yourprojectname/Crab/VERSION/DL1/[subdirectories] ``` where [subdirectories] stands for several subdirectories containing the MAGIC subruns in the DL1 format. -- Generates a configuration file called `config_step1.yaml` with telescope ID information and adopted imaging/cleaning cuts, and puts it in the directory `[...]/yourprojectname/Crab/` created in the previous step. +- Generates a configuration file called `config_DL0_to_DL1.yaml` with telescope ID information and adopted imaging/cleaning cuts, and puts it in the directory `[...]/yourprojectname/Crab/` created in the previous step. - Links the MAGIC data addresses to their respective subdirectories defined in the previous steps. - Runs the script `magic_calib_to_dl1.py` for each one of the linked data files. @@ -170,17 +171,18 @@ or > $ squeue -u your_user_name -Once it is done, all of the subdirectories in `/fefs/aswg/workspace/yourname/yourprojectname/Crab/DL1/` will be filled with files of the type `dl1_MX.RunXXXXXX.0XX.h5` for each MAGIC subrun. The next step of the conversion from DL0 to DL1 is to merge all the MAGIC data files such that in the end, we have only one datafile per night. To do so, we run the following command (always in the directory `yourprojectname`): +Once it is done, all of the subdirectories in `/fefs/aswg/workspace/yourname/yourprojectname/Crab/VERSION/DL1/` will be filled with files of the type `dl1_MX.RunXXXXXX.0XX.h5` for each MAGIC subrun. The next step of the conversion from DL0 to DL1 is to merge all the MAGIC data files such that in the end, we have only one datafile per night. To do so, we run the following command (always in the directory `yourprojectname`): > $ merging_runs (-c config_general.yaml) **The command inside parenthesis is not mandatory**. By the way, it is better if you don't use it unless you know what you are doing. The output in the terminal will be something like this: ``` -***** Generating merge bashscripts... +***** Generating merge_MAGIC bashscripts... ***** Running merge_hdf_files.py in the MAGIC data files... Process name: merging_Crab To check the jobs submitted to the cluster, type: squeue -n merging_Crab +This process will take about 10 to 30 min to run. ``` This script will merge the MAGIC data files in the following order: From 8270c867c2428d764184808aef1a50bffcf865c1 Mon Sep 17 00:00:00 2001 From: Elisa-Visentin Date: Wed, 10 Apr 2024 08:59:24 +0000 Subject: [PATCH 018/236] Updates doc --- magicctapipe/scripts/lst1_magic/README.md | 34 +++++++++++++++++++++-- 1 file changed, 32 insertions(+), 2 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/README.md b/magicctapipe/scripts/lst1_magic/README.md index 2e9841d8..fcdedb5b 100644 --- a/magicctapipe/scripts/lst1_magic/README.md +++ b/magicctapipe/scripts/lst1_magic/README.md @@ -139,6 +139,22 @@ The columns here represent the night and run in which you want to select data. P ``` Note that the LST nights appear as being one day before MAGIC's!!! This is because LST saves the date at the beginning of the night, while MAGIC saves it at the end. If there is no LST data, please fill this file with "0,0". These files are the only ones we need to modify in order to convert DL0 into DL1 data. +First, you have to evaluate the NSB level for each LST run (in the future this information will be provided in a database): + +> $ nsb_level -c config_general.yaml + +This will launch a set of parallel jobs, one per LST run, that use an lstchain function to evaluate the NSB over (approx.) 25 subruns per run and then calculate the median of these 25 values. According to this median, the run is associated to a bin in the NSB range (standard bins (center and range) are 0.5=(0, 0.75), 1.0=(0.75,1.25), 1.5=(1.25,1.75), 2.0=(1.75,2.25), 2.5=(2.25,2.75), 3.0=(2.75,3.25)) + +The output is a set of txt files (e.g., `LST_1.5_1234.txt` for run 1234), one per LST run, whose title contains the NSB bin assignet to the run and whose content is the string `date,run` + +You can check if this process is done with the following commands: + +> $ squeue -n nsb + +or + +> $ squeue -u your_user_name + To convert the MAGIC data into DL1 format, you simply do: > $ setting_up_config_and_dir -c config_general.yaml @@ -151,10 +167,24 @@ This process will take about 10 min to run if the IT cluster is free. ``` The command `setting_up_config_and_dir` does a series of things: +- Collects the txt files produced above into one txt file per NSB bin (e.g., `LST_1.5_.txt`), whose content is a list of all the `date,runs` couples associated to this background value +- According to the date of the MAGIC runs to be analyzed, it associates each run (actually, each day, but here it is done run-wise) to the corresponding MAGIC observation period ST... +``` +ST_list = ["ST0320A", "ST0319A", "ST0318A", "ST0317A", "ST0316A"] +ST_begin = ["2023_03_10", "2022_12_15", "2022_06_10", "2021_12_30", "2020_10_24"] +ST_end = [ + "2024_01_01", + "2023_03_09", + "2022_08_31", + "2022_06_09", + "2021_09_29", +] # ST0320 ongoing -> 'service' end date +``` - Creates a directory with your target name within the directory `yourprojectname` and several subdirectories inside it that are necessary for the rest of the data reduction. The main directories are: ``` /fefs/aswg/workspace/yourname/yourprojectname/Crab/ /fefs/aswg/workspace/yourname/yourprojectname/Crab/VERSION/DL1 +/fefs/aswg/workspace/yourname/yourprojectname/Crab/VERSION/DL1/ST{} /fefs/aswg/workspace/yourname/yourprojectname/Crab/VERSION/DL1/[subdirectories] ``` where [subdirectories] stands for several subdirectories containing the MAGIC subruns in the DL1 format. @@ -208,7 +238,7 @@ event_coincidence: stop: "0 us ``` -It then links the LST data files to the output directory [...]DL1/Observations/Coincident, and runs the script lst1_magic_event_coincidence.py in all of them. +Then, for each NSB level, it reads the corresponding txt file generated by the second script and, taking into account the day (and, as a consequence the MAGIC period of the joint MAGIC runs) links the LST data files for these runs to the output directory [...]DL1Coincident/ST{}/NSB{}, and runs the script lst1_magic_event_coincidence.py in all of them. Once it is done, we add stereo parameters to the MAGIC+LST coincident DL1 files by running: @@ -221,7 +251,7 @@ stereo_reco: theta_uplim: "6 arcmin" ``` -It then creates the output directories for the DL1 with stereo parameters [...]DL1/Observations/Coincident__stereo/SEVERALNIGHTS, and then runs the script lst1_magic_stereo_reco.py in all of the coincident DL1 files. The stereo DL1 files are then saved in these directories. +It then creates the output directories for the DL1 with stereo parameters [...]DL1CoincidentStereo/ST{}/NSB{}, and then runs the script lst1_magic_stereo_reco.py in all of the coincident DL1 files. The stereo DL1 files are then saved in these directories. ### Random forest and DL1 to DL2 From 780372b1a3f1b8616323e7ffc5ad63b8e174752d Mon Sep 17 00:00:00 2001 From: Elisa-Visentin Date: Wed, 10 Apr 2024 09:57:04 +0000 Subject: [PATCH 019/236] First trial on selecting more sources --- .../semi_automatic_scripts/config_h5.yaml | 12 +- .../semi_automatic_scripts/list_from_h5.py | 104 +++++++++++------- 2 files changed, 72 insertions(+), 44 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/config_h5.yaml b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/config_h5.yaml index 89b278d5..8ddb6fe5 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/config_h5.yaml +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/config_h5.yaml @@ -1,10 +1,10 @@ data_selection_and_lists: time_range : True #Search for all runs in a LST range (e.g., 2020_01_01 -> 2022_01_01) - min : "2019_11_17" - max : "2020_03_19" + min : "2020_11_17" + max : "2021_03_19" date_list : ['2020_12_15','2021_03_11'] #LST list of days to be processed (time_range=False), format: YYYY_MM_DD - skipped_LST_runs: [2919,2923] - skipped_MAGIC_runs: [5088421,5088503] #MAGIC and LST runs not to be processed (why?) - source_name_database: 'CrabNebula' #MUST BE THE SAME AS IN THE DATABASE - source_name_output: 'Crab' #Name to be used in the analysis (will be used in the name of the txt output files), best idea: the same as in the analysis config_general.yaml, WARNING: ONLY ALPHANUMERIC, NO SPECIAL CHARACTERS (special characters can interfere with the analysis scripts) \ No newline at end of file + skipped_LST_runs: [3216,3217] + skipped_MAGIC_runs: [5094658] #MAGIC and LST runs not to be processed (why?) + source_name_database: null #MUST BE THE SAME AS IN THE DATABASE; null to process all sources in time range + source_name_output: 'Crabtest' #Name to be used in the analysis (will be used in the name of the txt output files), best idea: the same as in the analysis config_general.yaml, WARNING: ONLY ALPHANUMERIC, NO SPECIAL CHARACTERS (special characters can interfere with the analysis scripts); Used only if a source name (not null) set above \ No newline at end of file diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/list_from_h5.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/list_from_h5.py index 6b7073b7..c15c0bc8 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/list_from_h5.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/list_from_h5.py @@ -7,6 +7,8 @@ import pandas as pd import yaml +import numpy as np +import joblib def split_lst_date(df): @@ -57,7 +59,7 @@ def magic_date(df): return df -def list_run(source_out, df, skip_LST, skip_MAGIC): +def list_run(source_in, source_out, df, skip_LST, skip_MAGIC): """ This function create the MAGIC_runs.txt and LST_runs.txt files, which contain the list of runs (with date) to be processed @@ -73,42 +75,63 @@ def list_run(source_out, df, skip_LST, skip_MAGIC): skip_MAGIC : list List of the MAGIC runs not to be added to the files """ - - file_list = [ - f"{source_out}_LST_runs.txt", - f"{source_out}_MAGIC_runs.txt", - ] # LST, MAGIC!!!! - for j in file_list: - if os.path.isfile(j): - os.remove(j) - print(f"{j} deleted.") - MAGIC_listed = [] - LST_listed = [] - for k in range(len(df)): - skip = False - LST = df["LST1_run"] - - if (int(LST[k]) in skip_LST) or (int(LST[k]) in LST_listed): - skip = True - - if not skip: - with open(file_list[0], "a+") as f: - f.write( - f"{df['date_LST'][k].replace('-','_')},{str(LST[k]).lstrip('0')}\n" - ) - LST_listed.append(int(LST[k])) - MAGIC_min = int(df["MAGIC_first_run"][k]) - MAGIC_max = int(df["MAGIC_last_run"][k]) - for z in range(MAGIC_min, MAGIC_max + 1): + source_list=[] + if source_in is None: + source_list=np.unique(df['source']) + else: + source_list.append(source_out) + joblib.dump(source_list, 'list_sources.dat') + print(source_list) + for source_name in source_list: + print(source_name) + file_list = [ + f"{source_name}_LST_runs.txt", + f"{source_name}_MAGIC_runs.txt", + ] # LST, MAGIC!!!! + print(file_list) + for j in file_list: + if os.path.isfile(j): + os.remove(j) + print(f"{j} deleted.") + MAGIC_listed = [] + LST_listed = [] + if source_in is None: + df_source=df[df['source']==source_name] + else: + df_source=df[df['source']==source_in] + + print(df_source) + LST_run = df_source["LST1_run"].tolist() + MAGIC_run_first=df_source["MAGIC_first_run"].tolist() + MAGIC_run_last=df_source["MAGIC_last_run"].tolist() + LST_date = df_source["date_LST"].tolist() + MAGIC_date=df_source["date_MAGIC"].tolist() + + for k in range(len(df_source)): skip = False - - if (int(z) in skip_MAGIC) or (int(z) in MAGIC_listed): + + + if (int(LST_run[k]) in skip_LST) or (int(LST_run[k]) in LST_listed): skip = True - if not skip: - with open(file_list[1], "a+") as f: - f.write(f"{df['date_MAGIC'][k].replace('-','_')},{z}\n") - MAGIC_listed.append(int(z)) + if not skip: + with open(file_list[0], "a+") as f: + f.write( + f"{LST_date[k].replace('-','_')},{str(LST_run[k]).lstrip('0')}\n" + ) + LST_listed.append(int(LST_run[k])) + MAGIC_min = int(MAGIC_run_first[k]) + MAGIC_max = int(MAGIC_run_last[k]) + for z in range(MAGIC_min, MAGIC_max + 1): + skip = False + + if (int(z) in skip_MAGIC) or (int(z) in MAGIC_listed): + skip = True + if not skip: + with open(file_list[1], "a+") as f: + f.write(f"{MAGIC_date[k].replace('-','_')},{z}\n") + MAGIC_listed.append(int(z)) + def main(): @@ -139,11 +162,16 @@ def main(): df = df.astype({"YY_LST": int, "MM_LST": int, "DD_LST": int}) stereo = True - - df.query( - f'source=="{source_in}"& MAGIC_trigger=="L3T" & MAGIC_HV=="Nominal" & MAGIC_stereo == {stereo}', + if source_in is None: + df.query( + f'MAGIC_trigger=="L3T" & MAGIC_HV=="Nominal" & MAGIC_stereo == {stereo}', inplace=True, ) # + else: + df.query( + f'source=="{source_in}"& MAGIC_trigger=="L3T" & MAGIC_HV=="Nominal" & MAGIC_stereo == {stereo}', + inplace=True, + ) # if range: min = str(config["data_selection_and_lists"]["min"]) @@ -164,7 +192,7 @@ def main(): df = df.drop("index", axis=1) df.to_hdf("observations_query.h5", key="joint_obs", mode="w") - list_run(source_out, df, skip_LST, skip_MAGIC) + list_run(source_in, source_out, df, skip_LST, skip_MAGIC) if __name__ == "__main__": From 412350c513f2c9a25d0275f50eacb993d4d4d4c2 Mon Sep 17 00:00:00 2001 From: Elisa-Visentin Date: Wed, 10 Apr 2024 10:30:18 +0000 Subject: [PATCH 020/236] Doc --- magicctapipe/scripts/lst1_magic/README.md | 4 +++ .../semi_automatic_scripts/list_from_h5.py | 35 ++++++++++--------- 2 files changed, 22 insertions(+), 17 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/README.md b/magicctapipe/scripts/lst1_magic/README.md index fcdedb5b..4e99f4a8 100644 --- a/magicctapipe/scripts/lst1_magic/README.md +++ b/magicctapipe/scripts/lst1_magic/README.md @@ -139,6 +139,10 @@ The columns here represent the night and run in which you want to select data. P ``` Note that the LST nights appear as being one day before MAGIC's!!! This is because LST saves the date at the beginning of the night, while MAGIC saves it at the end. If there is no LST data, please fill this file with "0,0". These files are the only ones we need to modify in order to convert DL0 into DL1 data. +These files can by automatically created (from a table stored on the IT) by the list_from_h5.py scripts: to do so, you have to fill-in the config_h5.yaml file with the list of dates (or range of dates) to be processed, MAGIC and LST runs to be skipped (if there are some runs to be skipped in the dates to be processed) and the name of the target source in the database: in case you want to analyze only one source, put its name in the database and the name you want to assign it (e.g., you would like to call the CrabNebula as Crab in the rest of the analysis) into this configuration file; if you want to analyze all the sources in the given time range, put source_name_database = null and source_name_output won't be considered by the script + +> $ list_from_h5 + First, you have to evaluate the NSB level for each LST run (in the future this information will be provided in a database): > $ nsb_level -c config_general.yaml diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/list_from_h5.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/list_from_h5.py index c15c0bc8..bce833a4 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/list_from_h5.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/list_from_h5.py @@ -5,10 +5,10 @@ import os from datetime import datetime +import joblib +import numpy as np import pandas as pd import yaml -import numpy as np -import joblib def split_lst_date(df): @@ -66,6 +66,8 @@ def list_run(source_in, source_out, df, skip_LST, skip_MAGIC): Parameters ---------- + source_in : str or null + Name of the source in the database of joint observations. null to process all sources in a given time range source_out : str Name of the source to be used in the output file name df : :class:`pandas.DataFrame` @@ -75,12 +77,12 @@ def list_run(source_in, source_out, df, skip_LST, skip_MAGIC): skip_MAGIC : list List of the MAGIC runs not to be added to the files """ - source_list=[] + source_list = [] if source_in is None: - source_list=np.unique(df['source']) + source_list = np.unique(df["source"]) else: source_list.append(source_out) - joblib.dump(source_list, 'list_sources.dat') + joblib.dump(source_list, "list_sources.dat") print(source_list) for source_name in source_list: print(source_name) @@ -96,21 +98,20 @@ def list_run(source_in, source_out, df, skip_LST, skip_MAGIC): MAGIC_listed = [] LST_listed = [] if source_in is None: - df_source=df[df['source']==source_name] + df_source = df[df["source"] == source_name] else: - df_source=df[df['source']==source_in] - + df_source = df[df["source"] == source_in] + print(df_source) LST_run = df_source["LST1_run"].tolist() - MAGIC_run_first=df_source["MAGIC_first_run"].tolist() - MAGIC_run_last=df_source["MAGIC_last_run"].tolist() + MAGIC_run_first = df_source["MAGIC_first_run"].tolist() + MAGIC_run_last = df_source["MAGIC_last_run"].tolist() LST_date = df_source["date_LST"].tolist() - MAGIC_date=df_source["date_MAGIC"].tolist() + MAGIC_date = df_source["date_MAGIC"].tolist() for k in range(len(df_source)): skip = False - - + if (int(LST_run[k]) in skip_LST) or (int(LST_run[k]) in LST_listed): skip = True @@ -131,7 +132,7 @@ def list_run(source_in, source_out, df, skip_LST, skip_MAGIC): with open(file_list[1], "a+") as f: f.write(f"{MAGIC_date[k].replace('-','_')},{z}\n") MAGIC_listed.append(int(z)) - + def main(): @@ -164,9 +165,9 @@ def main(): stereo = True if source_in is None: df.query( - f'MAGIC_trigger=="L3T" & MAGIC_HV=="Nominal" & MAGIC_stereo == {stereo}', - inplace=True, - ) # + f'MAGIC_trigger=="L3T" & MAGIC_HV=="Nominal" & MAGIC_stereo == {stereo}', + inplace=True, + ) # else: df.query( f'source=="{source_in}"& MAGIC_trigger=="L3T" & MAGIC_HV=="Nominal" & MAGIC_stereo == {stereo}', From 3fd709776a4e0b64dc4494d4e7b3b16a850f7cba Mon Sep 17 00:00:00 2001 From: Elisa-Visentin Date: Wed, 10 Apr 2024 13:34:27 +0000 Subject: [PATCH 021/236] First NSB + ST fixes --- magicctapipe/scripts/lst1_magic/README.md | 3 + .../config_general.yaml | 33 +- .../semi_automatic_scripts/config_h5.yaml | 13 +- .../semi_automatic_scripts/nsb_level.py | 2 + .../setting_up_config_and_dir.py | 462 +++++++++--------- 5 files changed, 253 insertions(+), 260 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/README.md b/magicctapipe/scripts/lst1_magic/README.md index 4e99f4a8..234ffd66 100644 --- a/magicctapipe/scripts/lst1_magic/README.md +++ b/magicctapipe/scripts/lst1_magic/README.md @@ -39,6 +39,9 @@ pip install . ``` ## Analysis +### WARNING + +During the analysis some files (txt, dat) are automatically produced by the scripts and are needed by the folllowing steps. All the script can be called as console scripts, so that you don't need to launch them from their directory in the cloned repository or from a directory where you copied them, but it is mandatory that you always launch them from the same directory (e.g., you can create a working directory somewhere in your workspace and use it as your 'base' directory) so that the scripts save there their output files and read them from there as input files. As fpr the `config_general.yaml` file, you can store it in the same directory or not, but provide its full path to the script `-c` option if it is not in the same folder! ### DL0 to DL1 diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/config_general.yaml b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/config_general.yaml index 4f3a2016..81c8ccca 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/config_general.yaml +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/config_general.yaml @@ -7,26 +7,29 @@ mc_tel_ids: MAGIC-II: 3 directories: - workspace_dir : "/fefs/aswg/workspace/elisa.visentin/MAGIC_LST_analysis/" - target_name : "Crab" - MC_gammas : "/fefs/aswg/data/mc/DL0/LSTProd2/TestDataset/sim_telarray" - MC_electrons : "/fefs/aswg/data/mc/DL0/LSTProd2/TestDataset/Electrons/sim_telarray/" - MC_helium : "/fefs/aswg/data/mc/DL0/LSTProd2/TestDataset/Helium/sim_telarray/" - MC_protons : "/fefs/aswg/data/mc/DL0/LSTProd2/TrainingDataset/Protons/dec_2276/sim_telarray" - MC_gammadiff : "/fefs/aswg/data/mc/DL0/LSTProd2/TrainingDataset/GammaDiffuse/dec_2276/sim_telarray/" + workspace_dir : "/fefs/aswg/workspace/elisa.visentin/auto_MCP_PR/" #where to create the output folders `MCP_version/TARGET/DLx/...` + target_name : "Crab" # here put null to analyse all the sources in a given time range (see list_from_h5) + MC_gammas : "/fefs/aswg/data/mc/DL0/LSTProd2/TestDataset/sim_telarray" #put "" if you don't want to process these simulated particles + MC_electrons : "/fefs/aswg/data/mc/DL0/LSTProd2/TestDataset/Electrons/sim_telarray/" #put "" if you don't want to process these simulated particles + MC_helium : "/fefs/aswg/data/mc/DL0/LSTProd2/TestDataset/Helium/sim_telarray/" #put "" if you don't want to process these simulated particles + MC_protons : "/fefs/aswg/data/mc/DL0/LSTProd2/TrainingDataset/Protons/dec_2276/sim_telarray" #put "" if you don't want to process these simulated particles + MC_gammadiff : "/fefs/aswg/data/mc/DL0/LSTProd2/TrainingDataset/GammaDiffuse/dec_2276/sim_telarray/" #put "" if you don't want to process these simulated particles + # MC paths are ignored if you put NSB_matching = true general: - target_RA_deg : 83.629 #RA in degrees - target_Dec_deg: 22.015 #Dec in degrees + target_RA_deg : 83.629 #RA in degrees; Put null if target_name=null (i.e., fill it with the source coordinatets only if you are analyzing only one source) + target_Dec_deg: 22.015 #Dec in degrees; Put null if target_name=null (i.e., fill it with the source coordinatets only if you are analyzing only one source) SimTel_version : "v1.4" - LST_version : "v0.9" + LST_version : "v0.10" LST_tailcut : "tailcut84" focal_length : "effective" - MAGIC_runs : "MAGIC_runs.txt" #If there is no MAGIC data, please fill this file with "0, 0" - LST_runs : "LST_runs.txt" + MAGIC_runs : "Crab_MAGIC_runs.txt" #If there is no MAGIC data, please fill this file with "0, 0" + LST_runs : "Crab_LST_runs.txt" proton_train_fraction : 0.8 # 0.8 means that 80% of the DL1 protons will be used for training the Random Forest nsb : [0.5, 1.0, 1.5, 2.0, 2.5, 3.0] - env_name : magic-lst - cluster : "SLURM" - NSB_matching : true + env_name : auto_MCP_PR # name of the conda environment to be used to process data + cluster : "SLURM" # cluster management system on which data are processed: now only SLURM available (almost all of the data on the IT cluster), in the future maybe also condor (PIC, CNAF) + NSB_matching : true # put false to process also the MCs (see below): not recommended if adequate MC productions (DLx) already available on the IT because it requires a lot of time and memory! + # if true, only real data are processed and are later matched to the available MC (see /fefs/aswg/LST1MAGIC/mc) + NSB_MC : 0.5 # extra noise in dim pixels used to process MCs; e.g., you could put here the average NSB value of the processed LST runs. Ignored if NSB_matching=true diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/config_h5.yaml b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/config_h5.yaml index 8ddb6fe5..7c355b81 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/config_h5.yaml +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/config_h5.yaml @@ -1,10 +1,13 @@ data_selection_and_lists: time_range : True #Search for all runs in a LST range (e.g., 2020_01_01 -> 2022_01_01) - min : "2020_11_17" - max : "2021_03_19" + min : "2023_11_17" + max : "2024_03_03" date_list : ['2020_12_15','2021_03_11'] - #LST list of days to be processed (time_range=False), format: YYYY_MM_DD + #LST list of days to be processed (time_range=False), format: YYYY_MM_DD skipped_LST_runs: [3216,3217] skipped_MAGIC_runs: [5094658] #MAGIC and LST runs not to be processed (why?) - source_name_database: null #MUST BE THE SAME AS IN THE DATABASE; null to process all sources in time range - source_name_output: 'Crabtest' #Name to be used in the analysis (will be used in the name of the txt output files), best idea: the same as in the analysis config_general.yaml, WARNING: ONLY ALPHANUMERIC, NO SPECIAL CHARACTERS (special characters can interfere with the analysis scripts); Used only if a source name (not null) set above \ No newline at end of file + source_name_database: CrabNebula #MUST BE THE SAME AS IN THE DATABASE; null to process all sources in time range + source_name_output: 'Crabtest' #Name to be used in the analysis (will be used in the name of the txt output files), + #best idea: the same as in the analysis config_general.yaml, + #WARNING: ONLY ALPHANUMERIC, NO SPECIAL CHARACTERS (special characters can interfere with the analysis scripts); + #Used only if a source name (not null) set above \ No newline at end of file diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/nsb_level.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/nsb_level.py index a9211370..a36b9133 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/nsb_level.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/nsb_level.py @@ -1,4 +1,6 @@ """ +TO BE FIXED AND ADAPTED TO NEW TABLE/DATABASE + Bash scripts to run LSTnsb.py on all the LST runs by using parallel jobs Usage: python nsb_level.py (-c config.yaml) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py index d4196796..3cd28fae 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py @@ -22,6 +22,7 @@ import numpy as np import yaml +import json from magicctapipe import __version__ from magicctapipe.io import resource_file @@ -39,17 +40,10 @@ logger.addHandler(logging.StreamHandler()) logger.setLevel(logging.INFO) -ST_list = ["ST0320A", "ST0319A", "ST0318A", "ST0317A", "ST0316A"] -ST_begin = ["2023_03_10", "2022_12_15", "2022_06_10", "2021_12_30", "2020_10_24"] -ST_end = [ - "2024_01_01", - "2023_03_09", - "2022_08_31", - "2022_06_09", - "2021_09_29", -] # ST0320 ongoing -> 'service' end date +# TODO +''' def nsb_avg(source, config, LST_list): """ @@ -130,8 +124,9 @@ def nsb_avg(source, config, LST_list): f.write(i) f.write(f"nsb_value: {nsb}\n") return (continue_process, nsb) - - +''' +# TODO +''' def collect_nsb(config): """ Here we split the LST runs in NSB-wise .txt files @@ -154,7 +149,7 @@ def collect_nsb(config): with open(f"{source}_LST_{nsblvl}_.txt", "a+") as f: f.write(f"{line.rstrip()}\n") - +''' def config_file_gen(ids, target_dir, noise_value, NSB_match): """ @@ -367,30 +362,23 @@ def lists_and_bash_gen_MAGIC( MAGIC_runs.append(MAGIC) for i in MAGIC_runs: - for p in range(len(ST_begin)): - if ( - time.strptime(i[0], "%Y_%m_%d") - >= time.strptime(ST_begin[p], "%Y_%m_%d") - ) and ( - time.strptime(i[0], "%Y_%m_%d") - <= time.strptime(ST_end[p], "%Y_%m_%d") - ): - if telescope_ids[-1] > 0: - lines = [ - f'export IN1=/fefs/onsite/common/MAGIC/data/M2/event/Calibrated/{i[0].split("_")[0]}/{i[0].split("_")[1]}/{i[0].split("_")[2]}\n', - f"export OUT1={target_dir}/v{__version__}/DL1/{ST_list[p]}/M2/{i[0]}/{i[1]}/logs \n", - f"ls $IN1/*{i[1][-2:]}.*_Y_*.root > $OUT1/list_dl0.txt\n", - ] - f.writelines(lines) - - f.write("\n") - if telescope_ids[-2] > 0: - lines = [ - f'export IN1=/fefs/onsite/common/MAGIC/data/M1/event/Calibrated/{i[0].split("_")[0]}/{i[0].split("_")[1]}/{i[0].split("_")[2]}\n', - f"export OUT1={target_dir}/v{__version__}/DL1/{ST_list[p]}/M1/{i[0]}/{i[1]}/logs \n", - f"ls $IN1/*{i[1][-2:]}.*_Y_*.root > $OUT1/list_dl0.txt\n", - ] - f.writelines(lines) + + if telescope_ids[-1] > 0: + lines = [ + f'export IN1=/fefs/onsite/common/MAGIC/data/M2/event/Calibrated/{i[0].split("_")[0]}/{i[0].split("_")[1]}/{i[0].split("_")[2]}\n', + f"export OUT1=v{__version__}/{target_dir}/DL1/M2/{i[0]}/{i[1]}/logs \n", + f"ls $IN1/*{i[1][-2:]}.*_Y_*.root > $OUT1/list_dl0.txt\n", + ] + f.writelines(lines) + + f.write("\n") + if telescope_ids[-2] > 0: + lines = [ + f'export IN1=/fefs/onsite/common/MAGIC/data/M1/event/Calibrated/{i[0].split("_")[0]}/{i[0].split("_")[1]}/{i[0].split("_")[2]}\n', + f"export OUT1=v{__version__}/{target_dir}/DL1/M1/{i[0]}/{i[1]}/logs \n", + f"ls $IN1/*{i[1][-2:]}.*_Y_*.root > $OUT1/list_dl0.txt\n", + ] + f.writelines(lines) else: if telescope_ids[-1] > 0: for i in MAGIC_runs: @@ -414,68 +402,60 @@ def lists_and_bash_gen_MAGIC( if (telescope_ids[-2] > 0) or (telescope_ids[-1] > 0): for i in MAGIC_runs: - for p in range(len(ST_begin)): - if ( - time.strptime(i[0], "%Y_%m_%d") - >= time.strptime(ST_begin[p], "%Y_%m_%d") - ) and ( - time.strptime(i[0], "%Y_%m_%d") - <= time.strptime(ST_end[p], "%Y_%m_%d") - ): - - if telescope_ids[-1] > 0: - number_of_nodes = glob.glob( - f'/fefs/onsite/common/MAGIC/data/M2/event/Calibrated/{i[0].split("_")[0]}/{i[0].split("_")[1]}/{i[0].split("_")[2]}/*{i[1]}.*_Y_*.root' - ) - number_of_nodes = len(number_of_nodes) - 1 - if number_of_nodes < 0: - continue - lines = [ - "#!/bin/sh\n\n", - "#SBATCH -p short\n", - f"#SBATCH -J {process_name}\n", - f"#SBATCH --array=0-{number_of_nodes}\n", - "#SBATCH -N 1\n\n", - "ulimit -l unlimited\n", - "ulimit -s unlimited\n", - "ulimit -a\n\n", - f"export OUTPUTDIR={target_dir}/v{__version__}/DL1/{ST_list[p]}/M2/{i[0]}/{i[1]}\n", - "SAMPLE_LIST=($(<$OUTPUTDIR/logs/list_dl0.txt))\n", - "SAMPLE=${SAMPLE_LIST[${SLURM_ARRAY_TASK_ID}]}\n\n", - "export LOG=$OUTPUTDIR/logs/real_0_1_task${SLURM_ARRAY_TASK_ID}.log\n", - f"time conda run -n {env_name} magic_calib_to_dl1 --input-file $SAMPLE --output-dir $OUTPUTDIR --config-file {target_dir}/config_DL0_to_DL1.yaml >$LOG 2>&1\n", - ] - with open( - f"{source}_MAGIC-II_dl0_to_dl1_run_{i[1]}.sh", "w" - ) as f: - f.writelines(lines) - - if telescope_ids[-2] > 0: - number_of_nodes = glob.glob( - f'/fefs/onsite/common/MAGIC/data/M1/event/Calibrated/{i[0].split("_")[0]}/{i[0].split("_")[1]}/{i[0].split("_")[2]}/*{i[1]}.*_Y_*.root' - ) - number_of_nodes = len(number_of_nodes) - 1 - if number_of_nodes < 0: - continue - lines = [ - "#!/bin/sh\n\n", - "#SBATCH -p short\n", - f"#SBATCH -J {process_name}\n", - f"#SBATCH --array=0-{number_of_nodes}\n", - "#SBATCH -N 1\n\n", - "ulimit -l unlimited\n", - "ulimit -s unlimited\n", - "ulimit -a\n\n", - f"export OUTPUTDIR={target_dir}/v{__version__}/DL1/{ST_list[p]}/M1/{i[0]}/{i[1]}\n", - "SAMPLE_LIST=($(<$OUTPUTDIR/logs/list_dl0.txt))\n", - "SAMPLE=${SAMPLE_LIST[${SLURM_ARRAY_TASK_ID}]}\n\n", - "export LOG=$OUTPUTDIR/logs/real_0_1_task${SLURM_ARRAY_TASK_ID}.log\n", - f"time conda run -n {env_name} magic_calib_to_dl1 --input-file $SAMPLE --output-dir $OUTPUTDIR --config-file {target_dir}/config_DL0_to_DL1.yaml >$LOG 2>&1\n", - ] - with open( - f"{source}_MAGIC-I_dl0_to_dl1_run_{i[1]}.sh", "w" - ) as f: - f.writelines(lines) + + if telescope_ids[-1] > 0: + number_of_nodes = glob.glob( + f'/fefs/onsite/common/MAGIC/data/M2/event/Calibrated/{i[0].split("_")[0]}/{i[0].split("_")[1]}/{i[0].split("_")[2]}/*{i[1]}.*_Y_*.root' + ) + number_of_nodes = len(number_of_nodes) - 1 + if number_of_nodes < 0: + continue + lines = [ + "#!/bin/sh\n\n", + "#SBATCH -p short\n", + f"#SBATCH -J {process_name}\n", + f"#SBATCH --array=0-{number_of_nodes}\n", + "#SBATCH -N 1\n\n", + "ulimit -l unlimited\n", + "ulimit -s unlimited\n", + "ulimit -a\n\n", + f"export OUTPUTDIR=v{__version__}/{target_dir}/DL1/M2/{i[0]}/{i[1]}\n", + "SAMPLE_LIST=($(<$OUTPUTDIR/logs/list_dl0.txt))\n", + "SAMPLE=${SAMPLE_LIST[${SLURM_ARRAY_TASK_ID}]}\n\n", + "export LOG=$OUTPUTDIR/logs/real_0_1_task${SLURM_ARRAY_TASK_ID}.log\n", + f"time conda run -n {env_name} magic_calib_to_dl1 --input-file $SAMPLE --output-dir $OUTPUTDIR --config-file {target_dir}/config_DL0_to_DL1.yaml >$LOG 2>&1\n", + ] + with open( + f"{source}_MAGIC-II_dl0_to_dl1_run_{i[1]}.sh", "w" + ) as f: + f.writelines(lines) + + if telescope_ids[-2] > 0: + number_of_nodes = glob.glob( + f'/fefs/onsite/common/MAGIC/data/M1/event/Calibrated/{i[0].split("_")[0]}/{i[0].split("_")[1]}/{i[0].split("_")[2]}/*{i[1]}.*_Y_*.root' + ) + number_of_nodes = len(number_of_nodes) - 1 + if number_of_nodes < 0: + continue + lines = [ + "#!/bin/sh\n\n", + "#SBATCH -p short\n", + f"#SBATCH -J {process_name}\n", + f"#SBATCH --array=0-{number_of_nodes}\n", + "#SBATCH -N 1\n\n", + "ulimit -l unlimited\n", + "ulimit -s unlimited\n", + "ulimit -a\n\n", + f"export OUTPUTDIR=v{__version__}/{target_dir}/DL1/M1/{i[0]}/{i[1]}\n", + "SAMPLE_LIST=($(<$OUTPUTDIR/logs/list_dl0.txt))\n", + "SAMPLE=${SAMPLE_LIST[${SLURM_ARRAY_TASK_ID}]}\n\n", + "export LOG=$OUTPUTDIR/logs/real_0_1_task${SLURM_ARRAY_TASK_ID}.log\n", + f"time conda run -n {env_name} magic_calib_to_dl1 --input-file $SAMPLE --output-dir $OUTPUTDIR --config-file {target_dir}/config_DL0_to_DL1.yaml >$LOG 2>&1\n", + ] + with open( + f"{source}_MAGIC-I_dl0_to_dl1_run_{i[1]}.sh", "w" + ) as f: + f.writelines(lines) else: if (telescope_ids[-2] > 0) or (telescope_ids[-1] > 0): for i in MAGIC_runs: @@ -550,13 +530,13 @@ def directories_generator(target_dir, telescope_ids, MAGIC_runs, NSB_match): """ if NSB_match: - if not os.path.exists(target_dir): - os.mkdir(target_dir) - if not os.path.exists(f"{target_dir}/v{__version__}"): - os.mkdir(f"{target_dir}/v{__version__}") - if not os.path.exists(f"{target_dir}/v{__version__}/DL1"): - os.mkdir(f"{target_dir}/v{__version__}/DL1") - dl1_dir = str(f"{target_dir}/v{__version__}/DL1") + if not os.path.exists(f"v{__version__}"): + os.mkdir(f"v{__version__}") + if not os.path.exists(f"v{__version__}/{target_dir}"): + os.mkdir(f"v{__version__}/{target_dir}") + if not os.path.exists(f"v{__version__}/{target_dir}/DL1"): + os.mkdir(f"v{__version__}/{target_dir}/DL1") + dl1_dir = str(f"v{__version__}/{target_dir}/DL1") else: if not os.path.exists(target_dir): os.mkdir(target_dir) @@ -596,46 +576,39 @@ def directories_generator(target_dir, telescope_ids, MAGIC_runs, NSB_match): MAGIC_runs.append(MAGIC) if NSB_match: for i in MAGIC_runs: - for p in range(len(ST_begin)): - if ( - time.strptime(i[0], "%Y_%m_%d") - >= time.strptime(ST_begin[p], "%Y_%m_%d") - ) and ( - time.strptime(i[0], "%Y_%m_%d") - <= time.strptime(ST_end[p], "%Y_%m_%d") + + if telescope_ids[-1] > 0: + if not os.path.exists(f"{dl1_dir}"): + os.mkdir(f"{dl1_dir}") + if not os.path.exists(f"{dl1_dir}/M2"): + os.mkdir(f"{dl1_dir}/M2") + if not os.path.exists(f"{dl1_dir}/M2/{i[0]}"): + os.mkdir(f"{dl1_dir}/M2/{i[0]}") + + if not os.path.exists( + f"{dl1_dir}/M2/{i[0]}/{i[1]}" + ): + os.mkdir(f"{dl1_dir}/M2/{i[0]}/{i[1]}") + if not os.path.exists( + f"{dl1_dir}/M2/{i[0]}/{i[1]}/logs" + ): + os.mkdir(f"{dl1_dir}/M2/{i[0]}/{i[1]}/logs") + if telescope_ids[-2] > 0: + if not os.path.exists(f"{dl1_dir}"): + os.mkdir(f"{dl1_dir}") + if not os.path.exists(f"{dl1_dir}/M1"): + os.mkdir(f"{dl1_dir}/M1") + if not os.path.exists(f"{dl1_dir}/M1/{i[0]}"): + os.mkdir(f"{dl1_dir}/M1/{i[0]}") + + if not os.path.exists( + f"{dl1_dir}/M1/{i[0]}/{i[1]}" + ): + os.mkdir(f"{dl1_dir}/M1/{i[0]}/{i[1]}") + if not os.path.exists( + f"{dl1_dir}/M1/{i[0]}/{i[1]}/logs" ): - if telescope_ids[-1] > 0: - if not os.path.exists(f"{dl1_dir}/{ST_list[p]}"): - os.mkdir(f"{dl1_dir}/{ST_list[p]}") - if not os.path.exists(f"{dl1_dir}/{ST_list[p]}/M2"): - os.mkdir(f"{dl1_dir}/{ST_list[p]}/M2") - if not os.path.exists(f"{dl1_dir}/{ST_list[p]}/M2/{i[0]}"): - os.mkdir(f"{dl1_dir}/{ST_list[p]}/M2/{i[0]}") - - if not os.path.exists( - f"{dl1_dir}/{ST_list[p]}/M2/{i[0]}/{i[1]}" - ): - os.mkdir(f"{dl1_dir}/{ST_list[p]}/M2/{i[0]}/{i[1]}") - if not os.path.exists( - f"{dl1_dir}/{ST_list[p]}/M2/{i[0]}/{i[1]}/logs" - ): - os.mkdir(f"{dl1_dir}/{ST_list[p]}/M2/{i[0]}/{i[1]}/logs") - if telescope_ids[-2] > 0: - if not os.path.exists(f"{dl1_dir}/{ST_list[p]}"): - os.mkdir(f"{dl1_dir}/{ST_list[p]}") - if not os.path.exists(f"{dl1_dir}/{ST_list[p]}/M1"): - os.mkdir(f"{dl1_dir}/{ST_list[p]}/M1") - if not os.path.exists(f"{dl1_dir}/{ST_list[p]}/M1/{i[0]}"): - os.mkdir(f"{dl1_dir}/{ST_list[p]}/M1/{i[0]}") - - if not os.path.exists( - f"{dl1_dir}/{ST_list[p]}/M1/{i[0]}/{i[1]}" - ): - os.mkdir(f"{dl1_dir}/{ST_list[p]}/M1/{i[0]}/{i[1]}") - if not os.path.exists( - f"{dl1_dir}/{ST_list[p]}/M1/{i[0]}/{i[1]}/logs" - ): - os.mkdir(f"{dl1_dir}/{ST_list[p]}/M1/{i[0]}/{i[1]}/logs") + os.mkdir(f"{dl1_dir}/M1/{i[0]}/{i[1]}/logs") else: if telescope_ids[-1] > 0: if not os.path.exists(f"{target_dir}/DL1/Observations/M2"): @@ -698,10 +671,8 @@ def main(): MAGIC_runs = np.genfromtxt( MAGIC_runs_and_dates, dtype=str, delimiter="," ) # READ LIST OF DATES AND RUNS: format table where each line is like "2020_11_19,5093174" - target_dir = str( - Path(config["directories"]["workspace_dir"]) - / config["directories"]["target_name"] - ) + + LST_runs_and_dates = config["general"]["LST_runs"] MC_gammas = str(Path(config["directories"]["MC_gammas"])) MC_electrons = str(Path(config["directories"]["MC_electrons"])) @@ -710,101 +681,112 @@ def main(): MC_gammadiff = str(Path(config["directories"]["MC_gammadiff"])) focal_length = config["general"]["focal_length"] source = config["directories"]["target_name"] - noise_value = [0, 0, 0] - if not NSB_match: - running, nsb = nsb_avg(source, args.config_file, LST_runs_and_dates) - if running != "y": - print("OK... The script was terminated by the user choice.") - return - noisebright = 1.15 * pow(nsb, 1.115) - biasdim = 0.358 * pow(nsb, 0.805) - noise_value = [nsb, noisebright, biasdim] - else: - collect_nsb(config) - print("*** Converting DL0 into DL1 data ***") - print(f'Process name: {target_dir.split("/")[-2:][1]}') - print( - f'To check the jobs submitted to the cluster, type: squeue -n {target_dir.split("/")[-2:][1]}' - ) - print("This process will take about 10 min to run if the IT cluster is free.") + source_list=[] + if source is not None: + source_list=json.load('list_sources.dat') + + else: + source_list.append(source) + for source_name in source_list: + target_dir = str( + Path(config["directories"]["workspace_dir"]) + / source_name + ) + + noise_value = [0, 0, 0] + if not NSB_match: + nsb = config["general"]["NSB_MC"] + + noisebright = 1.15 * pow(nsb, 1.115) + biasdim = 0.358 * pow(nsb, 0.805) + noise_value = [nsb, noisebright, biasdim] + + + #TODO: fix here above + print("*** Converting DL0 into DL1 data ***") + print(f'Process name: {target_dir.split("/")[-2:][1]}') + print( + f'To check the jobs submitted to the cluster, type: squeue -n {target_dir.split("/")[-2:][1]}' + ) + print("This process will take about 10 min to run if the IT cluster is free.") + + directories_generator( + target_dir, telescope_ids, MAGIC_runs, NSB_match + ) # Here we create all the necessary directories in the given workspace and collect the main directory of the target + config_file_gen(telescope_ids, target_dir, noise_value, NSB_match)#TODO: fix here + + if not NSB_match: + # Below we run the analysis on the MC data + if (args.analysis_type == "onlyMC") or (args.analysis_type == "doEverything"): + lists_and_bash_generator( + "gammas", target_dir, MC_gammas, SimTel_version, focal_length, env_name + ) # gammas + lists_and_bash_generator( + "electrons", + target_dir, + MC_electrons, + SimTel_version, + focal_length, + env_name, + ) # electrons + lists_and_bash_generator( + "helium", target_dir, MC_helium, SimTel_version, focal_length, env_name + ) # helium + lists_and_bash_generator( + "protons", + target_dir, + MC_protons, + SimTel_version, + focal_length, + env_name, + ) # protons + lists_and_bash_generator( + "gammadiffuse", + target_dir, + MC_gammadiff, + SimTel_version, + focal_length, + env_name, + ) # gammadiffuse + + # Here we do the MC DL0 to DL1 conversion: + list_of_MC = glob.glob("linking_MC_*s.sh") + + # os.system("RES=$(sbatch --parsable linking_MC_gammas_paths.sh) && sbatch --dependency=afterok:$RES MC_dl0_to_dl1.sh") + + for n, run in enumerate(list_of_MC): + if n == 0: + launch_jobs_MC = f"linking{n}=$(sbatch --parsable {run}) && running{n}=$(sbatch --parsable --dependency=afterany:$linking{n} {run[0:-3]}_r.sh)" + else: + launch_jobs_MC = f"{launch_jobs_MC} && linking{n}=$(sbatch --parsable {run}) && running{n}=$(sbatch --parsable --dependency=afterany:$linking{n} {run[0:-3]}_r.sh)" + + os.system(launch_jobs_MC) + + # Below we run the analysis on the MAGIC data + if ( + (args.analysis_type == "onlyMAGIC") + or (args.analysis_type == "doEverything") + or (NSB_match) + ): + lists_and_bash_gen_MAGIC( + target_dir, telescope_ids, MAGIC_runs, source_name, env_name, NSB_match + ) # MAGIC real data + if (telescope_ids[-2] > 0) or (telescope_ids[-1] > 0): + list_of_MAGIC_runs = glob.glob(f"{source_name}_MAGIC-*.sh") + if len(list_of_MAGIC_runs) < 1: + print( + "Warning: no bash script has been produced. Please check the provided MAGIC_runs.txt and the MAGIC calibrated data" + ) + return - directories_generator( - target_dir, telescope_ids, MAGIC_runs, NSB_match - ) # Here we create all the necessary directories in the given workspace and collect the main directory of the target - config_file_gen(telescope_ids, target_dir, noise_value, NSB_match) + for n, run in enumerate(list_of_MAGIC_runs): + if n == 0: + launch_jobs = f"linking=$(sbatch --parsable {source_name}_linking_MAGIC_data_paths.sh) && RES{n}=$(sbatch --parsable --dependency=afterany:$linking {run})" + else: + launch_jobs = f"{launch_jobs} && RES{n}=$(sbatch --parsable --dependency=afterany:$linking {run})" - if not NSB_match: - # Below we run the analysis on the MC data - if (args.analysis_type == "onlyMC") or (args.analysis_type == "doEverything"): - lists_and_bash_generator( - "gammas", target_dir, MC_gammas, SimTel_version, focal_length, env_name - ) # gammas - lists_and_bash_generator( - "electrons", - target_dir, - MC_electrons, - SimTel_version, - focal_length, - env_name, - ) # electrons - lists_and_bash_generator( - "helium", target_dir, MC_helium, SimTel_version, focal_length, env_name - ) # helium - lists_and_bash_generator( - "protons", - target_dir, - MC_protons, - SimTel_version, - focal_length, - env_name, - ) # protons - lists_and_bash_generator( - "gammadiffuse", - target_dir, - MC_gammadiff, - SimTel_version, - focal_length, - env_name, - ) # gammadiffuse - - # Here we do the MC DL0 to DL1 conversion: - list_of_MC = glob.glob("linking_MC_*s.sh") - - # os.system("RES=$(sbatch --parsable linking_MC_gammas_paths.sh) && sbatch --dependency=afterok:$RES MC_dl0_to_dl1.sh") - - for n, run in enumerate(list_of_MC): - if n == 0: - launch_jobs_MC = f"linking{n}=$(sbatch --parsable {run}) && running{n}=$(sbatch --parsable --dependency=afterany:$linking{n} {run[0:-3]}_r.sh)" - else: - launch_jobs_MC = f"{launch_jobs_MC} && linking{n}=$(sbatch --parsable {run}) && running{n}=$(sbatch --parsable --dependency=afterany:$linking{n} {run[0:-3]}_r.sh)" - - os.system(launch_jobs_MC) - - # Below we run the analysis on the MAGIC data - if ( - (args.analysis_type == "onlyMAGIC") - or (args.analysis_type == "doEverything") - or (NSB_match) - ): - lists_and_bash_gen_MAGIC( - target_dir, telescope_ids, MAGIC_runs, source, env_name, NSB_match - ) # MAGIC real data - if (telescope_ids[-2] > 0) or (telescope_ids[-1] > 0): - list_of_MAGIC_runs = glob.glob(f"{source}_MAGIC-*.sh") - if len(list_of_MAGIC_runs) < 1: - print( - "Warning: no bash script has been produced. Please check the provided MAGIC_runs.txt and the MAGIC calibrated data" - ) - return - - for n, run in enumerate(list_of_MAGIC_runs): - if n == 0: - launch_jobs = f"linking=$(sbatch --parsable {source}_linking_MAGIC_data_paths.sh) && RES{n}=$(sbatch --parsable --dependency=afterany:$linking {run})" - else: - launch_jobs = f"{launch_jobs} && RES{n}=$(sbatch --parsable --dependency=afterany:$linking {run})" - - os.system(launch_jobs) + os.system(launch_jobs) if __name__ == "__main__": From 7151ddf1159989f8d854387ed35a86e7b4a0b70e Mon Sep 17 00:00:00 2001 From: Elisa-Visentin Date: Tue, 16 Apr 2024 16:10:46 +0000 Subject: [PATCH 022/236] query on dfs (magic lst) --- .../create_LST_table.py | 24 +++ .../semi_automatic_scripts/list_from_h5.py | 161 +++++++++++------- 2 files changed, 125 insertions(+), 60 deletions(-) create mode 100644 magicctapipe/scripts/lst1_magic/semi_automatic_scripts/create_LST_table.py diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/create_LST_table.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/create_LST_table.py new file mode 100644 index 00000000..78cbcb28 --- /dev/null +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/create_LST_table.py @@ -0,0 +1,24 @@ +import pandas as pd +import numpy as np + +def main(): + + """ + Main function + """ + + + df = pd.read_hdf( + "/fefs/aswg/workspace/federico.dipierro/simultaneous_obs_summary.h5", key="/str" + ) # TODO: put this file in a shared folder + + needed_cols=['source', 'DATE', 'LST1_run', 'MAGIC_stereo', 'MAGIC_trigger', 'MAGIC_HV'] + df_cut=df[needed_cols] + print(df_cut.columns) + df_cut['nsb']=np.repeat(np.nan,len(df_cut)) + df_cut['error_code']=np.repeat(np.nan,len(df_cut)) + print(df_cut) + df_cut.to_hdf("/fefs/aswg/workspace/elisa.visentin/auto_MCP_PR/observations_LST.h5", key="joint_obs", mode="w") + +if __name__ == "__main__": + main() diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/list_from_h5.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/list_from_h5.py index bce833a4..325b8514 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/list_from_h5.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/list_from_h5.py @@ -51,15 +51,34 @@ def magic_date(df): The input dataframe with an added column """ - date_lst = pd.to_datetime(df["date_LST"]) + date_lst = pd.to_datetime(df["Date (LST convention)"], format="%Y_%m_%d") delta = pd.Timedelta("1 day") date_magic = date_lst + delta date_magic = date_magic.dt.strftime("%Y-%m-%d") df["date_MAGIC"] = date_magic return df +def clear_files(source_in, source_out, df): + source_list = [] + if source_in is None: + source_list = np.unique(df["source"]) + else: + source_list.append(source_out) + + print(source_list) + for source_name in source_list: + print(source_name) + file_list = [ + f"{source_name}_LST_runs.txt", + f"{source_name}_MAGIC_runs.txt", + ] # LST, MAGIC!!!! + print(file_list) + for j in file_list: + if os.path.isfile(j): + os.remove(j) + print(f"{j} deleted.") -def list_run(source_in, source_out, df, skip_LST, skip_MAGIC): +def list_run(source_in, source_out, df, skip_LST, skip_MAGIC, is_LST, M1_run_list=None): """ This function create the MAGIC_runs.txt and LST_runs.txt files, which contain the list of runs (with date) to be processed @@ -80,59 +99,59 @@ def list_run(source_in, source_out, df, skip_LST, skip_MAGIC): source_list = [] if source_in is None: source_list = np.unique(df["source"]) + else: source_list.append(source_out) joblib.dump(source_list, "list_sources.dat") print(source_list) for source_name in source_list: - print(source_name) file_list = [ f"{source_name}_LST_runs.txt", f"{source_name}_MAGIC_runs.txt", ] # LST, MAGIC!!!! - print(file_list) - for j in file_list: - if os.path.isfile(j): - os.remove(j) - print(f"{j} deleted.") - MAGIC_listed = [] - LST_listed = [] + + run_listed = [] if source_in is None: df_source = df[df["source"] == source_name] else: df_source = df[df["source"] == source_in] + if is_LST: + print('lst') + LST_run = df_source["LST1_run"].tolist() + LST_date = df_source["date_LST"].tolist() + for k in range(len(df_source)): + skip = False + + if (int(LST_run[k]) in skip_LST) or (int(LST_run[k]) in run_listed): + skip = True - print(df_source) - LST_run = df_source["LST1_run"].tolist() - MAGIC_run_first = df_source["MAGIC_first_run"].tolist() - MAGIC_run_last = df_source["MAGIC_last_run"].tolist() - LST_date = df_source["date_LST"].tolist() - MAGIC_date = df_source["date_MAGIC"].tolist() - - for k in range(len(df_source)): - skip = False - - if (int(LST_run[k]) in skip_LST) or (int(LST_run[k]) in LST_listed): - skip = True - - if not skip: - with open(file_list[0], "a+") as f: - f.write( - f"{LST_date[k].replace('-','_')},{str(LST_run[k]).lstrip('0')}\n" - ) - LST_listed.append(int(LST_run[k])) - MAGIC_min = int(MAGIC_run_first[k]) - MAGIC_max = int(MAGIC_run_last[k]) - for z in range(MAGIC_min, MAGIC_max + 1): + if not skip: + with open(file_list[0], "a+") as f: + f.write( + f"{LST_date[k].replace('-','_')},{str(LST_run[k]).lstrip('0')}\n" + ) + run_listed.append(int(LST_run[k])) + if not is_LST: + print('magic') + MAGIC_date = df_source["date_MAGIC"].tolist() + M2_run=df_source['Run ID'].tolist() + print(M2_run) + for k in range(len(df_source)): skip = False - if (int(z) in skip_MAGIC) or (int(z) in MAGIC_listed): + if (int(M2_run[k]) in skip_MAGIC) or (int(M2_run[k]) in run_listed): + skip = True + if float(M2_run[k]) not in M1_run_list: skip = True if not skip: with open(file_list[1], "a+") as f: - f.write(f"{MAGIC_date[k].replace('-','_')},{z}\n") - MAGIC_listed.append(int(z)) + print(file_list[1]) + f.write( + f"{MAGIC_date[k].replace('-','_')},{str(M2_run[k]).rstrip('.0')}\n" + ) + run_listed.append(int(M2_run[k])) + def main(): @@ -142,15 +161,11 @@ def main(): with open("config_h5.yaml", "rb") as f: config = yaml.safe_load(f) - df = pd.read_hdf( - "/fefs/aswg/workspace/federico.dipierro/simultaneous_obs_summary.h5", key="/str" - ) # TODO: put this file in a shared folder + - df = split_lst_date(df) + - df = magic_date(df) - - df.to_hdf("observations.h5", key="joint_obs", mode="w") + source_in = config["data_selection_and_lists"]["source_name_database"] @@ -159,42 +174,68 @@ def main(): skip_LST = config["data_selection_and_lists"]["skipped_LST_runs"] skip_MAGIC = config["data_selection_and_lists"]["skipped_MAGIC_runs"] - df = pd.read_hdf("observations.h5", key="joint_obs") - df = df.astype({"YY_LST": int, "MM_LST": int, "DD_LST": int}) + + + + + df_LST = pd.read_hdf( + "/fefs/aswg/workspace/elisa.visentin/auto_MCP_PR/observations_LST.h5", key="joint_obs" + ) # TODO: put this file in a shared folder + + df_LST = split_lst_date(df_LST) + df_LST = df_LST.astype({"YY_LST": int, "MM_LST": int, "DD_LST": int}) stereo = True if source_in is None: - df.query( + df_LST.query( f'MAGIC_trigger=="L3T" & MAGIC_HV=="Nominal" & MAGIC_stereo == {stereo}', inplace=True, ) # else: - df.query( + df_LST.query( f'source=="{source_in}"& MAGIC_trigger=="L3T" & MAGIC_HV=="Nominal" & MAGIC_stereo == {stereo}', inplace=True, - ) # - + ) #(int(M2_run[k]) if range: min = str(config["data_selection_and_lists"]["min"]) max = str(config["data_selection_and_lists"]["max"]) min = datetime.strptime(min, "%Y_%m_%d") max = datetime.strptime(max, "%Y_%m_%d") - lst = pd.to_datetime(df["date_LST"].str.replace("_", "-")) - df["date"] = lst - df = df[df["date"] > min] - df = df[df["date"] < max] + lst = pd.to_datetime(df_LST["date_LST"].str.replace("_", "-")) + df_LST["date"] = lst + df_LST = df_LST[df_LST["date"] >= min] + df_LST = df_LST[df_LST["date"] <= max] else: dates = config["data_selection_and_lists"]["date_list"] - df = df[df["date_LST"].isin(dates)] - - df = df.reset_index() - df = df.drop("index", axis=1) - - df.to_hdf("observations_query.h5", key="joint_obs", mode="w") - list_run(source_in, source_out, df, skip_LST, skip_MAGIC) - + df_LST = df_LST[df_LST["date_LST"].isin(dates)] + + df_LST = df_LST.reset_index() + df_LST = df_LST.drop("index", axis=1) + + clear_files(source_in, source_out, df_LST) + list_run(source_in, source_out, df_LST, skip_LST, skip_MAGIC, True) + list_date_LST=np.unique(df_LST['date_LST']) + list_date_LST_low=[sub.replace('-', '_') for sub in list_date_LST] + + print(list_date_LST_low) + df_MAGIC1=pd.read_hdf('/fefs/aswg/workspace/joanna.wojtowicz/data/Common_MAGIC_LST1_data_MAGIC_RUNS.h5', key='MAGIC1/runs_M1') + df_MAGIC2=pd.read_hdf('/fefs/aswg/workspace/joanna.wojtowicz/data/Common_MAGIC_LST1_data_MAGIC_RUNS.h5', key='MAGIC2/runs_M2') + + print(list_date_LST) + df_MAGIC1=df_MAGIC1[df_MAGIC1['Date (LST convention)'].isin(list_date_LST_low)] + df_MAGIC2=df_MAGIC2[df_MAGIC2['Date (LST convention)'].isin(list_date_LST_low)] + print(df_MAGIC2) + + df_MAGIC2=magic_date(df_MAGIC2) + df_MAGIC1=magic_date(df_MAGIC1) + df_MAGIC2 = df_MAGIC2.rename(columns={'Source': 'source'}) + print(df_MAGIC2) + + M1_runs=df_MAGIC1['Run ID'].tolist() + list_run(source_in, source_out, df_MAGIC2, skip_LST, skip_MAGIC, False, M1_runs) + if __name__ == "__main__": main() From f0155ddf2350425467f5025309e0487f1a864c1a Mon Sep 17 00:00:00 2001 From: Elisa-Visentin Date: Wed, 17 Apr 2024 07:53:28 +0000 Subject: [PATCH 023/236] Bug --- .../scripts/lst1_magic/semi_automatic_scripts/list_from_h5.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/list_from_h5.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/list_from_h5.py index 325b8514..2e55c2a1 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/list_from_h5.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/list_from_h5.py @@ -136,9 +136,11 @@ def list_run(source_in, source_out, df, skip_LST, skip_MAGIC, is_LST, M1_run_lis MAGIC_date = df_source["date_MAGIC"].tolist() M2_run=df_source['Run ID'].tolist() print(M2_run) + print(M2_run) for k in range(len(df_source)): skip = False + if (int(M2_run[k]) in skip_MAGIC) or (int(M2_run[k]) in run_listed): skip = True if float(M2_run[k]) not in M1_run_list: @@ -147,7 +149,7 @@ def list_run(source_in, source_out, df, skip_LST, skip_MAGIC, is_LST, M1_run_lis with open(file_list[1], "a+") as f: print(file_list[1]) f.write( - f"{MAGIC_date[k].replace('-','_')},{str(M2_run[k]).rstrip('.0')}\n" + f"{MAGIC_date[k].replace('-','_')},{int(M2_run[k])}\n" ) run_listed.append(int(M2_run[k])) From 7c206c38308ff416662fc2d4520b6fbde7af9921 Mon Sep 17 00:00:00 2001 From: Elisa-Visentin Date: Thu, 18 Apr 2024 15:50:32 +0000 Subject: [PATCH 024/236] some fixes --- .../semi_automatic_scripts/create_LST_table.py | 7 +++++++ .../lst1_magic/semi_automatic_scripts/list_from_h5.py | 11 +++++++++-- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/create_LST_table.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/create_LST_table.py index 78cbcb28..9e120517 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/create_LST_table.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/create_LST_table.py @@ -1,5 +1,6 @@ import pandas as pd import numpy as np +import os def main(): @@ -18,6 +19,12 @@ def main(): df_cut['nsb']=np.repeat(np.nan,len(df_cut)) df_cut['error_code']=np.repeat(np.nan,len(df_cut)) print(df_cut) + if os.path.isfile('/fefs/aswg/workspace/elisa.visentin/auto_MCP_PR/observations_LST.h5'): + df_old=pd.read_hdf('/fefs/aswg/workspace/elisa.visentin/auto_MCP_PR/observations_LST.h5', key="joint_obs") + df_cut=pd.concat([df_old, df_cut]).drop_duplicates(keep='first') + df_cut= df_cut.sort_values(by=["DATE","source"]) + print(df_cut) # TODO check if fine with update and nsb + df_cut.to_hdf("/fefs/aswg/workspace/elisa.visentin/auto_MCP_PR/observations_LST.h5", key="joint_obs", mode="w") if __name__ == "__main__": diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/list_from_h5.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/list_from_h5.py index 2e55c2a1..c0c49eae 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/list_from_h5.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/list_from_h5.py @@ -66,6 +66,7 @@ def clear_files(source_in, source_out, df): source_list.append(source_out) print(source_list) + joblib.dump(source_list, "list_sources.dat") for source_name in source_list: print(source_name) file_list = [ @@ -77,6 +78,7 @@ def clear_files(source_in, source_out, df): if os.path.isfile(j): os.remove(j) print(f"{j} deleted.") + def list_run(source_in, source_out, df, skip_LST, skip_MAGIC, is_LST, M1_run_list=None): @@ -102,7 +104,7 @@ def list_run(source_in, source_out, df, skip_LST, skip_MAGIC, is_LST, M1_run_lis else: source_list.append(source_out) - joblib.dump(source_list, "list_sources.dat") + print(source_list) for source_name in source_list: file_list = [ @@ -121,9 +123,12 @@ def list_run(source_in, source_out, df, skip_LST, skip_MAGIC, is_LST, M1_run_lis LST_date = df_source["date_LST"].tolist() for k in range(len(df_source)): skip = False + if LST_run[k]!=LST_run[k]: + skip = True if (int(LST_run[k]) in skip_LST) or (int(LST_run[k]) in run_listed): skip = True + if not skip: with open(file_list[0], "a+") as f: @@ -139,12 +144,14 @@ def list_run(source_in, source_out, df, skip_LST, skip_MAGIC, is_LST, M1_run_lis print(M2_run) for k in range(len(df_source)): skip = False - + if M2_run[k]!=M2_run[k]: #TODO check if ok for nan + skip = True if (int(M2_run[k]) in skip_MAGIC) or (int(M2_run[k]) in run_listed): skip = True if float(M2_run[k]) not in M1_run_list: skip = True + if not skip: with open(file_list[1], "a+") as f: print(file_list[1]) From 0559aba1fb5526c2afc7e16220c812047e2c6093 Mon Sep 17 00:00:00 2001 From: Raniere de Menezes Date: Mon, 22 Apr 2024 17:35:13 +0200 Subject: [PATCH 025/236] Documentation + debugging --- magicctapipe/scripts/lst1_magic/README.md | 143 ++++++------------ .../config_general.yaml | 47 +++--- .../semi_automatic_scripts/list_from_h5.py | 133 +++++++++------- .../setting_up_config_and_dir.py | 4 +- 4 files changed, 150 insertions(+), 177 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/README.md b/magicctapipe/scripts/lst1_magic/README.md index 234ffd66..c870823e 100644 --- a/magicctapipe/scripts/lst1_magic/README.md +++ b/magicctapipe/scripts/lst1_magic/README.md @@ -28,7 +28,7 @@ From DL3 on, the analysis is done with gammapy. Authorized institute server (Client) → ssh connection to CTALaPalma → ssh connection to cp01/02. -2) Once connected to the IT Container, install magic-cta-pipe (e.g. in your home directory in the IT Container) with the following commands (if you have mamba installed, we recommend yo uuse it instead of conda. The installation process will be much faster.): +2) Once connected to the IT Container, install magic-cta-pipe (e.g. in your home directory in the IT Container) with the following commands (if you have mamba installed, we recommend you to use it instead of conda. The installation process will be much faster.): ``` git clone -b Torino_auto_MCP https://github.com/cta-observatory/magic-cta-pipe.git @@ -39,122 +39,75 @@ pip install . ``` ## Analysis -### WARNING -During the analysis some files (txt, dat) are automatically produced by the scripts and are needed by the folllowing steps. All the script can be called as console scripts, so that you don't need to launch them from their directory in the cloned repository or from a directory where you copied them, but it is mandatory that you always launch them from the same directory (e.g., you can create a working directory somewhere in your workspace and use it as your 'base' directory) so that the scripts save there their output files and read them from there as input files. As fpr the `config_general.yaml` file, you can store it in the same directory or not, but provide its full path to the script `-c` option if it is not in the same folder! +During the analysis, some files are automatically produced by the scripts and are saved on your working directory. These files are necessary for the subsequent steps in the analysis chain. It is furthermore mandatory that you always launch the scripts from the same working directory since the output files stored there can be correctly read as input files at the subsequent analysis steps. ### DL0 to DL1 In this step, we will convert the MAGIC and Monte Carlo (MC) Data Level (DL) 0 to DL1 (our goal is to reach DL3). -In your working IT Container directory (e.g. /fefs/aswg/workspace/yourname/yourprojectname), open the magic-lst environment with the command `conda activate magic-lst` and create the files `config_general.yaml`, `MAGIC_runs.txt` and `LST_runs.txt`. +In your working IT Container directory (e.g. /fefs/aswg/workspace/yourname/yourprojectname), open the magic-lst environment with the command `conda activate magic-lst` and update the file `config_general.yaml` according to your analysis. + +The file `config_general.yaml` must contain the telescope IDs and the directories with the MC data, as shown below, as well as the data selection and some information on the night sky background (NSB) level and software versions: -The file `config_general.yaml` must contain the telescope IDs and the directories with the MC data, as shown below: ``` mc_tel_ids: LST-1: 1 - LST-2: 0 # If the telescope ID is set to 0, this means that this telescope is not used in the analysis. + LST-2: 0 LST-3: 0 LST-4: 0 MAGIC-I: 2 MAGIC-II: 3 directories: - workspace_dir : "/fefs/aswg/workspace/yourname/yourprojectname/" - target_name : "Crab" - MC_gammas : "/fefs/aswg/data/mc/DL0/LSTProd2/TestDataset/sim_telarray" + workspace_dir : "/fefs/aswg/workspace/yourname/yourprojectname/" # Output directory where all the data products will be saved. + # MC paths below are ignored if you set NSB_matching = true. + MC_gammas : "/fefs/aswg/data/mc/DL0/LSTProd2/TestDataset/sim_telarray" # set to "" if you don't want to process these Monte Carlo simulations. MC_electrons : "" MC_helium : "" - MC_protons : "/fefs/aswg/data/mc/DL0/LSTProd2/TrainingDataset/Protons/dec_2276/sim_telarray" + MC_protons : "/fefs/aswg/data/mc/DL0/LSTProd2/TrainingDataset/Protons/dec_2276/sim_telarray" MC_gammadiff : "/fefs/aswg/data/mc/DL0/LSTProd2/TrainingDataset/GammaDiffuse/dec_2276/sim_telarray/" - + +data_selection: + source_name_database: "CrabNebula" # MUST BE THE SAME AS IN THE DATABASE; Set to null to process all sources in the given time range. + source_name_output: 'Crabtest' # Name tag of your target. Used only if source_name_database != null. + target_RA_deg : 83.629 # RA in degrees; Please set it to null if source_name_database=null. + target_Dec_deg: 22.015 # Dec in degrees; Please set it to null if source_name_database=null. + time_range : True # Search for all runs in a LST time range (e.g., 2020_01_01 -> 2022_01_01). + min : "2023_11_17" + max : "2024_03_03" + date_list : ['2020_12_15','2021_03_11'] # LST list of days to be processed (only if time_range=False), format: YYYY_MM_DD. + skip_LST_runs: [3216,3217] # LST runs to ignore. + skip_MAGIC_runs: [5094658] # MAGIC runs to ignore. + general: - target_RA_deg : 83.629 # RA in degrees, the coordinates are useful only if the target name is not found in the catalogs. - target_Dec_deg: 22.015 # Dec in degrees SimTel_version: "v1.4" - LST_version : "v0.9" + LST_version : "v0.10" LST_tailcut : "tailcut84" focal_length : "effective" - MAGIC_runs : "MAGIC_runs.txt" #If there is no MAGIC data, please fill this file with "0, 0" - LST_runs : "LST_runs.txt" - proton_train_fraction : 0.8 # 0.8 means that 80% of the DL1 protons will be used for training the Random Forest - nsb : [0.5, 1.0, 1.5, 2.0, 2.5, 3.0] # NSB = night sky background. This will be useful if NSB_matching = True - env_name : magic-lst - cluster : "SLURM" - NSB_matching : true + proton_train_fraction : 0.8 # 0.8 means that 80% of the DL1 protons will be used for training the Random Forest. + nsb : [0.5, 1.0, 1.5, 2.0, 2.5, 3.0] + env_name : auto_MCP_PR # name of the conda environment to be used to process data. + cluster : "SLURM" # cluster management system on which data are processed. At the moment we have only SLURM available, in the future maybe also condor (PIC, CNAF). + NSB_matching : true # Set to false to process also the MCs. Set to true if adequate MC productions (DLx) are already available on the IT Container. + NSB_MC : 0.5 # extra noise in dim pixels used to process MCs; e.g., you could put here the average NSB value of the processed LST runs. Ignored if NSB_matching=true. + ``` -The file `MAGIC_runs.txt` looks like that: -``` -2020_11_19,5093174 -2020_11_19,5093175 -2020_12_08,5093491 -2020_12_08,5093492 -2020_12_16,5093711 -2020_12_16,5093712 -2020_12_16,5093713 -2020_12_16,5093714 -2021_02_14,5094483 -2021_02_14,5094484 -2021_02_14,5094485 -2021_02_14,5094486 -2021_02_14,5094487 -2021_02_14,5094488 -2021_03_16,5095265 -2021_03_16,5095266 -2021_03_16,5095267 -2021_03_16,5095268 -2021_03_16,5095271 -2021_03_16,5095272 -2021_03_16,5095273 -2021_03_16,5095277 -2021_03_16,5095278 -2021_03_16,5095281 -2021_03_18,5095376 -2021_03_18,5095377 -2021_03_18,5095380 -2021_03_18,5095381 -2021_03_18,5095382 -2021_03_18,5095383 -``` - - -The columns here represent the night and run in which you want to select data. Please **do not add blank spaces** in the rows, as these names will be used to i) find the MAGIC data in the IT Container and ii) create the subdirectories in your working directory. If there is no MAGIC data, please fill this file with "0,0". Similarly, the `LST_runs.txt` file looks like this: - -``` -2020_11_18,2923 -2020_11_18,2924 -2020_12_07,3093 -2020_12_15,3265 -2020_12_15,3266 -2020_12_15,3267 -2020_12_15,3268 -2021_02_13,3631 -2021_02_13,3633 -2021_02_13,3634 -2021_02_13,3635 -2021_02_13,3636 -2021_03_15,4069 -2021_03_15,4070 -2021_03_15,4071 -2021_03_17,4125 -``` -Note that the LST nights appear as being one day before MAGIC's!!! This is because LST saves the date at the beginning of the night, while MAGIC saves it at the end. If there is no LST data, please fill this file with "0,0". These files are the only ones we need to modify in order to convert DL0 into DL1 data. - -These files can by automatically created (from a table stored on the IT) by the list_from_h5.py scripts: to do so, you have to fill-in the config_h5.yaml file with the list of dates (or range of dates) to be processed, MAGIC and LST runs to be skipped (if there are some runs to be skipped in the dates to be processed) and the name of the target source in the database: in case you want to analyze only one source, put its name in the database and the name you want to assign it (e.g., you would like to call the CrabNebula as Crab in the rest of the analysis) into this configuration file; if you want to analyze all the sources in the given time range, put source_name_database = null and source_name_output won't be considered by the script +Now that the configuration file is ready, lets create a list with all the MAGIC+LST1 runs for the chosen time window (or list of nights): -> $ list_from_h5 +> $ list_from_h5 -c config_general.yaml -First, you have to evaluate the NSB level for each LST run (in the future this information will be provided in a database): +Now we evaluate the NSB level for each LST run (in the future this information will be provided in a database): > $ nsb_level -c config_general.yaml -This will launch a set of parallel jobs, one per LST run, that use an lstchain function to evaluate the NSB over (approx.) 25 subruns per run and then calculate the median of these 25 values. According to this median, the run is associated to a bin in the NSB range (standard bins (center and range) are 0.5=(0, 0.75), 1.0=(0.75,1.25), 1.5=(1.25,1.75), 2.0=(1.75,2.25), 2.5=(2.25,2.75), 3.0=(2.75,3.25)) +This will launch a number of parallel jobs, one per LST run, that use an lstchain function to evaluate the NSB over (approx.) 25 subruns per run and then calculate the median of these 25 values. According to this median, the run is associated to a bin in the NSB range. The standard bins (center and range) are 0.5=(0, 0.75), 1.0=(0.75,1.25), 1.5=(1.25,1.75), 2.0=(1.75,2.25), 2.5=(2.25,2.75), 3.0=(2.75,3.25). -The output is a set of txt files (e.g., `LST_1.5_1234.txt` for run 1234), one per LST run, whose title contains the NSB bin assignet to the run and whose content is the string `date,run` +The output is a set of txt files (e.g., `LST_1.5_1234.txt` for run 1234), one per LST run, whose title contains the NSB bin assigned to the run and whose content is the string `date,run` -You can check if this process is done with the following commands: +You can follow up this process is done with the following commands: > $ squeue -n nsb @@ -162,7 +115,8 @@ or > $ squeue -u your_user_name -To convert the MAGIC data into DL1 format, you simply do: + +At this point we can convert the MAGIC data into DL1 format with the following command: > $ setting_up_config_and_dir -c config_general.yaml The output in the terminal will be something like this: @@ -174,20 +128,9 @@ This process will take about 10 min to run if the IT cluster is free. ``` The command `setting_up_config_and_dir` does a series of things: -- Collects the txt files produced above into one txt file per NSB bin (e.g., `LST_1.5_.txt`), whose content is a list of all the `date,runs` couples associated to this background value -- According to the date of the MAGIC runs to be analyzed, it associates each run (actually, each day, but here it is done run-wise) to the corresponding MAGIC observation period ST... -``` -ST_list = ["ST0320A", "ST0319A", "ST0318A", "ST0317A", "ST0316A"] -ST_begin = ["2023_03_10", "2022_12_15", "2022_06_10", "2021_12_30", "2020_10_24"] -ST_end = [ - "2024_01_01", - "2023_03_09", - "2022_08_31", - "2022_06_09", - "2021_09_29", -] # ST0320 ongoing -> 'service' end date -``` -- Creates a directory with your target name within the directory `yourprojectname` and several subdirectories inside it that are necessary for the rest of the data reduction. The main directories are: +- Collects the txt files produced above into a single txt file per NSB bin (e.g., `LST_1.5_.txt`), whose content is a list of all the `date,runs` couples associated to this background value. +- According to the date of the selected MAGIC runs, each run is associated (actually, each day, but here it is done run-wise) to the corresponding MAGIC observation period ST_XXXX. +- Creates a directory with the target name within the directory `yourprojectname` and several subdirectories inside it that are necessary for the rest of the data reduction. The main directories are: ``` /fefs/aswg/workspace/yourname/yourprojectname/Crab/ /fefs/aswg/workspace/yourname/yourprojectname/Crab/VERSION/DL1 @@ -212,7 +155,7 @@ Once it is done, all of the subdirectories in `/fefs/aswg/workspace/yourname/you > $ merging_runs (-c config_general.yaml) -**The command inside parenthesis is not mandatory**. By the way, it is better if you don't use it unless you know what you are doing. +**The command inside parenthesis is not mandatory if you are running the command in the working directory**. By the way, it is better if you don't use it unless you know what you are doing. The output in the terminal will be something like this: ``` ***** Generating merge_MAGIC bashscripts... diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/config_general.yaml b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/config_general.yaml index 81c8ccca..99f2764c 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/config_general.yaml +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/config_general.yaml @@ -7,29 +7,34 @@ mc_tel_ids: MAGIC-II: 3 directories: - workspace_dir : "/fefs/aswg/workspace/elisa.visentin/auto_MCP_PR/" #where to create the output folders `MCP_version/TARGET/DLx/...` - target_name : "Crab" # here put null to analyse all the sources in a given time range (see list_from_h5) - MC_gammas : "/fefs/aswg/data/mc/DL0/LSTProd2/TestDataset/sim_telarray" #put "" if you don't want to process these simulated particles - MC_electrons : "/fefs/aswg/data/mc/DL0/LSTProd2/TestDataset/Electrons/sim_telarray/" #put "" if you don't want to process these simulated particles - MC_helium : "/fefs/aswg/data/mc/DL0/LSTProd2/TestDataset/Helium/sim_telarray/" #put "" if you don't want to process these simulated particles - MC_protons : "/fefs/aswg/data/mc/DL0/LSTProd2/TrainingDataset/Protons/dec_2276/sim_telarray" #put "" if you don't want to process these simulated particles - MC_gammadiff : "/fefs/aswg/data/mc/DL0/LSTProd2/TrainingDataset/GammaDiffuse/dec_2276/sim_telarray/" #put "" if you don't want to process these simulated particles - # MC paths are ignored if you put NSB_matching = true - + workspace_dir : "/fefs/aswg/workspace/elisa.visentin/auto_MCP_PR/" # Output directory where all the data products will be saved. + # MC paths below are ignored if you set NSB_matching = true. + MC_gammas : "/fefs/aswg/data/mc/DL0/LSTProd2/TestDataset/sim_telarray" # set to "" if you don't want to process these Monte Carlo simulations. + MC_electrons : "" + MC_helium : "" + MC_protons : "/fefs/aswg/data/mc/DL0/LSTProd2/TrainingDataset/Protons/dec_2276/sim_telarray" + MC_gammadiff : "/fefs/aswg/data/mc/DL0/LSTProd2/TrainingDataset/GammaDiffuse/dec_2276/sim_telarray/" + +data_selection: + source_name_database: "CrabNebula" # MUST BE THE SAME AS IN THE DATABASE; Set to null to process all sources in the given time range. + source_name_output: 'Crabtest' # Name tag of your target. Used only if source_name_database != null. + target_RA_deg : 83.629 # RA in degrees; Set to null if source_name_database=null. + target_Dec_deg: 22.015 # Dec in degrees; set to null if source_name_database=null. + time_range : True # Search for all runs in a LST time range (e.g., 2020_01_01 -> 2022_01_01). + min : "2023_11_17" + max : "2024_03_03" + date_list : ['2020_12_15','2021_03_11'] # LST list of days to be processed (only if time_range=False), format: YYYY_MM_DD. + skip_LST_runs: [3216,3217] # LST runs to ignore. + skip_MAGIC_runs: [5094658] # MAGIC runs to ignore. + general: - target_RA_deg : 83.629 #RA in degrees; Put null if target_name=null (i.e., fill it with the source coordinatets only if you are analyzing only one source) - target_Dec_deg: 22.015 #Dec in degrees; Put null if target_name=null (i.e., fill it with the source coordinatets only if you are analyzing only one source) - SimTel_version : "v1.4" + SimTel_version: "v1.4" LST_version : "v0.10" LST_tailcut : "tailcut84" focal_length : "effective" - MAGIC_runs : "Crab_MAGIC_runs.txt" #If there is no MAGIC data, please fill this file with "0, 0" - LST_runs : "Crab_LST_runs.txt" - proton_train_fraction : 0.8 # 0.8 means that 80% of the DL1 protons will be used for training the Random Forest + proton_train_fraction : 0.8 # 0.8 means that 80% of the DL1 protons will be used for training the Random Forest. nsb : [0.5, 1.0, 1.5, 2.0, 2.5, 3.0] - env_name : auto_MCP_PR # name of the conda environment to be used to process data - cluster : "SLURM" # cluster management system on which data are processed: now only SLURM available (almost all of the data on the IT cluster), in the future maybe also condor (PIC, CNAF) - NSB_matching : true # put false to process also the MCs (see below): not recommended if adequate MC productions (DLx) already available on the IT because it requires a lot of time and memory! - # if true, only real data are processed and are later matched to the available MC (see /fefs/aswg/LST1MAGIC/mc) - NSB_MC : 0.5 # extra noise in dim pixels used to process MCs; e.g., you could put here the average NSB value of the processed LST runs. Ignored if NSB_matching=true - + env_name : auto_MCP_PR # name of the conda environment to be used to process data. + cluster : "SLURM" # cluster management system on which data are processed. At the moment we have only SLURM available, in the future maybe also condor (PIC, CNAF). + NSB_matching : true # Set to false to process also the MCs. Set to true if adequate MC productions (DLx) are already available on the IT Container. + NSB_MC : 0.5 # extra noise in dim pixels used to process MCs; e.g., you could put here the average NSB value of the processed LST runs. Ignored if NSB_matching=true. diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/list_from_h5.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/list_from_h5.py index c0c49eae..db71b65d 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/list_from_h5.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/list_from_h5.py @@ -1,14 +1,14 @@ """ -By using this scrip, the list of MAGIC and LST runs (date and run number) can be automatically created from a dataframe in the .h5 format +This script creates the lists of MAGIC and LST runs (date and run number) from a dataframe in the .h5 format for a specific time range. """ import os from datetime import datetime - import joblib import numpy as np import pandas as pd import yaml +import argparse def split_lst_date(df): @@ -19,12 +19,12 @@ def split_lst_date(df): Parameters ---------- df : :class:`pandas.DataFrame` - Dataframe of the joint MAGIC+LST-1 observations + Dataframe of the joint MAGIC+LST-1 observations based on the .h5 table. Returns ------- :class:`pandas.DataFrame` - The input dataframe with some added columns + The input dataframe with four added columns. """ date = df["DATE"] @@ -38,17 +38,17 @@ def split_lst_date(df): def magic_date(df): """ - This function appends to the provided dataframe, which contains the LST date, year, month and day, a column with the MAGIC date (in the YYYY_MM_DD format) + This function appends to the provided dataframe (which contains the LST date, year, month and day) a column with the MAGIC dates (in the YYYY_MM_DD format). Parameters ---------- df : :class:`pandas.DataFrame` - Dataframe of the joint MAGIC+LST-1 observations + Dataframe of the joint MAGIC+LST-1 observations based on the .h5 table. Returns ------- :class:`pandas.DataFrame` - The input dataframe with an added column + The input dataframe with an added column. """ date_lst = pd.to_datetime(df["Date (LST convention)"], format="%Y_%m_%d") @@ -59,20 +59,38 @@ def magic_date(df): return df def clear_files(source_in, source_out, df): + + """ + This function deletes any file named XXXX_LST_runs.txt and XXXX_MAGIC_runs.txt from the working directory. + + Parameters + ---------- + source_in : string + Target name in the database. If None, it stands for all the sources observed in a pre-set time interval. + source_out: string + Name tag for the target. Used only if source_in is not None. + df: :class:`pandas.DataFrame` + Dataframe of the joint MAGIC+LST-1 observations based on the .h5 table. + + Returns + ------- + + """ + source_list = [] if source_in is None: source_list = np.unique(df["source"]) else: source_list.append(source_out) - print(source_list) + print("Source list: ",source_list) joblib.dump(source_list, "list_sources.dat") for source_name in source_list: - print(source_name) + print("Target name: ",source_name) file_list = [ f"{source_name}_LST_runs.txt", f"{source_name}_MAGIC_runs.txt", - ] # LST, MAGIC!!!! + ] # The order here must be LST before MAGIC! print(file_list) for j in file_list: if os.path.isfile(j): @@ -83,21 +101,25 @@ def clear_files(source_in, source_out, df): def list_run(source_in, source_out, df, skip_LST, skip_MAGIC, is_LST, M1_run_list=None): """ - This function create the MAGIC_runs.txt and LST_runs.txt files, which contain the list of runs (with date) to be processed + This function creates the MAGIC_runs.txt and LST_runs.txt files, which contain the list of runs (with corresponding dates) to be processed. Parameters ---------- - source_in : str or null - Name of the source in the database of joint observations. null to process all sources in a given time range + source_in : str or None + Name of the source in the database of joint observations. If None, it will process all sources for the given time range. source_out : str - Name of the source to be used in the output file name + Name of the source to be used in the output file name. Useful only if source_in != None. df : :class:`pandas.DataFrame` - Dataframe of the joint MAGIC+LST-1 observations + Dataframe of the joint MAGIC+LST-1 observations. skip_LST : list - List of the LST runs not to be added to the files + List of the LST runs to be ignored. skip_MAGIC : list - List of the MAGIC runs not to be added to the files + List of the MAGIC runs to be ignored. + is_LST : bool + If you are looking for LST runs, set to True. For MAGIC set False. + """ + source_list = [] if source_in is None: source_list = np.unique(df["source"]) @@ -105,87 +127,90 @@ def list_run(source_in, source_out, df, skip_LST, skip_MAGIC, is_LST, M1_run_lis else: source_list.append(source_out) - print(source_list) + print("List of sources: ",source_list) for source_name in source_list: file_list = [ f"{source_name}_LST_runs.txt", f"{source_name}_MAGIC_runs.txt", - ] # LST, MAGIC!!!! + ] # The order here must be LST before MAGIC! run_listed = [] if source_in is None: df_source = df[df["source"] == source_name] else: df_source = df[df["source"] == source_in] + if is_LST: - print('lst') - LST_run = df_source["LST1_run"].tolist() + print('LST') + LST_run = df_source["LST1_run"].tolist() # List with runs as strings LST_date = df_source["date_LST"].tolist() for k in range(len(df_source)): skip = False - if LST_run[k]!=LST_run[k]: + if np.isnan(LST_run[k]): skip = True if (int(LST_run[k]) in skip_LST) or (int(LST_run[k]) in run_listed): skip = True - if not skip: + if skip is False: with open(file_list[0], "a+") as f: f.write( f"{LST_date[k].replace('-','_')},{str(LST_run[k]).lstrip('0')}\n" ) run_listed.append(int(LST_run[k])) + if not is_LST: - print('magic') + print('MAGIC') MAGIC_date = df_source["date_MAGIC"].tolist() M2_run=df_source['Run ID'].tolist() - print(M2_run) - print(M2_run) for k in range(len(df_source)): skip = False - if M2_run[k]!=M2_run[k]: #TODO check if ok for nan + if np.isnan(M2_run[k]): skip = True if (int(M2_run[k]) in skip_MAGIC) or (int(M2_run[k]) in run_listed): skip = True - if float(M2_run[k]) not in M1_run_list: + if int(M2_run[k]) not in M1_run_list: skip = True - if not skip: + if skip is False: with open(file_list[1], "a+") as f: - print(file_list[1]) f.write( f"{MAGIC_date[k].replace('-','_')},{int(M2_run[k])}\n" ) run_listed.append(int(M2_run[k])) - def main(): """ - Main function + This function is automatically called whe script is launched. + It calls the functions above to create the files XXXXXX_LST_runs.txt and XXXXX_MAGIC_runs.txt for the desired targets. """ - with open("config_h5.yaml", "rb") as f: + parser = argparse.ArgumentParser() + + parser.add_argument( + "--config-file", + "-c", + dest="config_file", + type=str, + default="./config_general.yaml", + help="Path to a configuration file config_general.yaml", + ) + + args = parser.parse_args() + with open( + args.config_file, "rb" + ) as f: # "rb" mode opens the file in binary format for reading config = yaml.safe_load(f) - - - - - - source_in = config["data_selection_and_lists"]["source_name_database"] - - source_out = config["data_selection_and_lists"]["source_name_output"] - range = config["data_selection_and_lists"]["time_range"] - skip_LST = config["data_selection_and_lists"]["skipped_LST_runs"] - skip_MAGIC = config["data_selection_and_lists"]["skipped_MAGIC_runs"] - - - - + source_in = config["data_selection"]["source_name_database"] + source_out = config["data_selection"]["source_name_output"] + range = config["data_selection"]["time_range"] + skip_LST = config["data_selection"]["skip_LST_runs"] + skip_MAGIC = config["data_selection"]["skip_MAGIC_runs"] df_LST = pd.read_hdf( "/fefs/aswg/workspace/elisa.visentin/auto_MCP_PR/observations_LST.h5", key="joint_obs" @@ -199,15 +224,16 @@ def main(): df_LST.query( f'MAGIC_trigger=="L3T" & MAGIC_HV=="Nominal" & MAGIC_stereo == {stereo}', inplace=True, - ) # + ) else: df_LST.query( f'source=="{source_in}"& MAGIC_trigger=="L3T" & MAGIC_HV=="Nominal" & MAGIC_stereo == {stereo}', inplace=True, - ) #(int(M2_run[k]) + ) + if range: - min = str(config["data_selection_and_lists"]["min"]) - max = str(config["data_selection_and_lists"]["max"]) + min = str(config["data_selection"]["min"]) + max = str(config["data_selection"]["max"]) min = datetime.strptime(min, "%Y_%m_%d") max = datetime.strptime(max, "%Y_%m_%d") lst = pd.to_datetime(df_LST["date_LST"].str.replace("_", "-")) @@ -216,8 +242,7 @@ def main(): df_LST = df_LST[df_LST["date"] <= max] else: - dates = config["data_selection_and_lists"]["date_list"] - + dates = config["data_selection"]["date_list"] df_LST = df_LST[df_LST["date_LST"].isin(dates)] df_LST = df_LST.reset_index() diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py index 3cd28fae..9d74131f 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py @@ -136,7 +136,7 @@ def collect_nsb(config): config : dict Configuration file """ - source = config["directories"]["target_name"] + source = config["data_selection"]["source_name_database"] nsb = config["general"]["nsb"] for nsblvl in nsb: @@ -680,7 +680,7 @@ def main(): MC_protons = str(Path(config["directories"]["MC_protons"])) MC_gammadiff = str(Path(config["directories"]["MC_gammadiff"])) focal_length = config["general"]["focal_length"] - source = config["directories"]["target_name"] + source = config["data_selection"]["source_name_output"] source_list=[] if source is not None: From 9d93d18b8217318e792134b1e2add6ea0bdcbf56 Mon Sep 17 00:00:00 2001 From: Raniere de Menezes Date: Tue, 23 Apr 2024 16:58:45 +0200 Subject: [PATCH 026/236] several updates --- .../semi_automatic_scripts/LSTnsb.py | 19 ++- .../semi_automatic_scripts/__init__.py | 2 - .../config_general.yaml | 2 +- .../semi_automatic_scripts/nsb_level.py | 61 +++++++--- .../semi_automatic_scripts/nsb_to_h5.py | 107 +++++++++++++++++ .../setting_up_config_and_dir.py | 111 ------------------ 6 files changed, 160 insertions(+), 142 deletions(-) create mode 100644 magicctapipe/scripts/lst1_magic/semi_automatic_scripts/nsb_to_h5.py diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/LSTnsb.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/LSTnsb.py index 80e505a4..237007be 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/LSTnsb.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/LSTnsb.py @@ -115,7 +115,6 @@ def main(): lst_version = config["general"]["LST_version"] lst_tailcut = config["general"]["LST_tailcut"] width = [a / 2 - b / 2 for a, b in zip(nsb_list[1:], nsb_list[:-1])] - source = config["directories"]["target_name"] width.append(0.25) nsb_limit = [a + b for a, b in zip(nsb_list[:], width[:])] nsb_limit.insert(0, 0) @@ -127,7 +126,7 @@ def main(): ) if NSB_match: - LST_files = np.sort(glob.glob(f"{source}_LST_[0-9]*_{run_number}.txt")) + LST_files = np.sort(glob.glob(f"nsb_LST_[0-9]*_{run_number}.txt")) if len(LST_files) > 1: logger.info( @@ -140,7 +139,7 @@ def main(): logger.info(f"Run {run_number} already processed") return else: - LST_files = np.sort(glob.glob(f"{source}_LST_nsb_*{run_number}*.txt")) + LST_files = np.sort(glob.glob(f"nsb_LST_nsb_*{run_number}*.txt")) if len(LST_files) > 1: logger.warning( @@ -162,16 +161,16 @@ def main(): "No NSB value could be evaluated: check the observation logs (observation problems, car flashes...)" ) return - a = np.median(noise) - logger.info(f"Run n. {run_number}, nsb median {a}") + median_NSB = np.median(noise) + logger.info(f"Run n. {run_number}, nsb median {median_NSB}") if NSB_match: for j in range(0, len(nsb_list)): - if (a < nsb_limit[j + 1]) & (a > nsb_limit[j]): - with open(f"{source}_LST_{nsb_list[j]}_{run_number}.txt", "a+") as f: - f.write(f"{date},{run_number}\n") + if (median_NSB < nsb_limit[j + 1]) & (median_NSB > nsb_limit[j]): + with open(f"nsb_LST_{nsb_list[j]}_{run_number}.txt", "a+") as f: + f.write(f"{date},{run_number},{median_NSB}\n") else: - with open(f"{source}_LST_nsb_{run_number}.txt", "a+") as f: - f.write(f"{a}\n") + with open(f"nsb_LST_nsb_{run_number}.txt", "a+") as f: + f.write(f"{median_NSB}\n") if __name__ == "__main__": diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/__init__.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/__init__.py index 2c0cfbb6..5d949be4 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/__init__.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/__init__.py @@ -8,7 +8,6 @@ directories_generator, lists_and_bash_gen_MAGIC, lists_and_bash_generator, - nsb_avg, ) from .stereo_events import bash_stereo, bash_stereoMC, configfile_stereo @@ -19,7 +18,6 @@ "merge", "mergeMC", "bash_scripts", - "nsb_avg", "collect_nsb", "config_file_gen", "lists_and_bash_generator", diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/config_general.yaml b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/config_general.yaml index 99f2764c..986da015 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/config_general.yaml +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/config_general.yaml @@ -34,7 +34,7 @@ general: focal_length : "effective" proton_train_fraction : 0.8 # 0.8 means that 80% of the DL1 protons will be used for training the Random Forest. nsb : [0.5, 1.0, 1.5, 2.0, 2.5, 3.0] - env_name : auto_MCP_PR # name of the conda environment to be used to process data. + env_name : magic-lst # name of the conda environment to be used to process data. cluster : "SLURM" # cluster management system on which data are processed. At the moment we have only SLURM available, in the future maybe also condor (PIC, CNAF). NSB_matching : true # Set to false to process also the MCs. Set to true if adequate MC productions (DLx) are already available on the IT Container. NSB_MC : 0.5 # extra noise in dim pixels used to process MCs; e.g., you could put here the average NSB value of the processed LST runs. Ignored if NSB_matching=true. diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/nsb_level.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/nsb_level.py index a36b9133..a9fc7329 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/nsb_level.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/nsb_level.py @@ -1,6 +1,4 @@ """ -TO BE FIXED AND ADAPTED TO NEW TABLE/DATABASE - Bash scripts to run LSTnsb.py on all the LST runs by using parallel jobs Usage: python nsb_level.py (-c config.yaml) @@ -10,6 +8,8 @@ import glob import logging import os +import pandas as pd +from datetime import datetime import numpy as np import yaml @@ -21,7 +21,12 @@ logger.setLevel(logging.INFO) -def bash_scripts(run, date, config, source, env_name): + + + + + +def bash_scripts(run, date, config, env_name): """Here we create the bash scripts (one per LST run) @@ -33,8 +38,7 @@ def bash_scripts(run, date, config, source, env_name): LST date config : str Name of the configuration file - source : str - Target name + env_name : str Name of the environment """ @@ -47,9 +51,9 @@ def bash_scripts(run, date, config, source, env_name): "ulimit -l unlimited\n", "ulimit -s unlimited\n", "ulimit -a\n\n", - f"time conda run -n {env_name} LSTnsb -c {config} -i {run} -d {date} > {source}_nsblog_{date}_{run}.log 2>&1 \n\n", + f"time conda run -n {env_name} LSTnsb -c {config} -i {run} -d {date} > nsblog_{date}_{run}.log 2>&1 \n\n", ] - with open(f"{source}_{date}_run_{run}.sh", "w") as f: + with open(f"nsb_{date}_run_{run}.sh", "w") as f: f.writelines(lines) @@ -68,27 +72,47 @@ def main(): default="./config_general.yaml", help="Path to a configuration file", ) - + parser.add_argument( + "--begin-date", + "-b", + dest="begin_date", + type=str, + help="Begin date to start NSB evaluation from the database.", + ) + parser.add_argument( + "--end-date", + "-e", + dest="end_date", + type=str, + help="End date to start NSB evaluation from the database.", + ) args = parser.parse_args() with open( args.config_file, "rb" ) as f: # "rb" mode opens the file in binary format for reading config = yaml.safe_load(f) - source = config["directories"]["target_name"] - lst_runs_filename = config["general"]["LST_runs"] + env_name = config["general"]["env_name"] - with open(str(lst_runs_filename), "r") as LSTfile: - run_list = LSTfile.readlines() + df_LST = pd.read_hdf( + "/fefs/aswg/workspace/elisa.visentin/auto_MCP_PR/observations_LST.h5", key="joint_obs" + ) + min = datetime.strptime(args.begin_date, "%Y_%m_%d") + max = datetime.strptime(args.end_date, "%Y_%m_%d") + lst = pd.to_datetime(df_LST["date_LST"].str.replace("_", "-")) + df_LST["date"] = lst + df_LST = df_LST[df_LST["date"] >= min] + df_LST = df_LST[df_LST["date"] <= max] + + df_LST=df_LST.drop('date') + run_LST=df_LST["LST1_run"] + date_LST=df_LST["date_LST"] print("***** Generating bashscripts...") - for run in run_list: - run = run.rstrip() - run_number = run.split(",")[1] - date = run.split(",")[0] - bash_scripts(run_number, date, args.config_file, source, env_name) + for run_number, date in zip(run_LST, date_LST): + bash_scripts(run_number, date, args.config_file, env_name) print("Process name: nsb") print("To check the jobs submitted to the cluster, type: squeue -n nsb") - list_of_bash_scripts = np.sort(glob.glob(f"{source}_*_run_*.sh")) + list_of_bash_scripts = np.sort(glob.glob(f"nsb_*_run_*.sh")) if len(list_of_bash_scripts) < 1: print( @@ -105,5 +129,6 @@ def main(): os.system(launch_jobs) + if __name__ == "__main__": main() diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/nsb_to_h5.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/nsb_to_h5.py new file mode 100644 index 00000000..8547cae2 --- /dev/null +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/nsb_to_h5.py @@ -0,0 +1,107 @@ +""" +Bash scripts to run LSTnsb.py on all the LST runs by using parallel jobs + +Usage: python nsb_level.py (-c config.yaml) +""" + +import argparse +import glob +import logging +import os +import pandas as pd +from datetime import datetime + +import numpy as np +import yaml + +__all__ = ["bash_scripts"] + +logger = logging.getLogger(__name__) +logger.addHandler(logging.StreamHandler()) +logger.setLevel(logging.INFO) + + +def collect_nsb(df_LST): + """ + Here we split the LST runs in NSB-wise .txt files + + Parameters + ---------- + config : dict + Configuration file + """ + nsb_files=glob.glob('nsb_LST_*.txt') + for file_nsb in nsb_files: + run=file_nsb.split('_')[3] + nsb=np.nan + with open(file_nsb) as ff: + line_str = ff.readline().rstrip("\n") + nsb=line_str.split(',')[2] + df_LST=df_LST.set_index("LST1_run") + df_LST.loc[df_LST.index[run], 'nsb']=nsb + df_LST=df_LST.reset_index() + + + + + + + +def main(): + + """ + Main function + """ + + parser = argparse.ArgumentParser() + parser.add_argument( + "--config-file", + "-c", + dest="config_file", + type=str, + default="./config_general.yaml", + help="Path to a configuration file", + ) + parser.add_argument( + "--begin-date", + "-b", + dest="begin_date", + type=str, + help="Begin date to start NSB evaluation from the database.", + ) + parser.add_argument( + "--end-date", + "-e", + dest="end_date", + type=str, + help="End date to start NSB evaluation from the database.", + ) + args = parser.parse_args() + with open( + args.config_file, "rb" + ) as f: # "rb" mode opens the file in binary format for reading + config = yaml.safe_load(f) + + env_name = config["general"]["env_name"] + + df_LST = pd.read_hdf( + "/fefs/aswg/workspace/elisa.visentin/auto_MCP_PR/observations_LST.h5", key="joint_obs" + ) + min = datetime.strptime(args.begin_date, "%Y_%m_%d") + max = datetime.strptime(args.end_date, "%Y_%m_%d") + lst = pd.to_datetime(df_LST["date_LST"].str.replace("_", "-")) + df_LST["date"] = lst + df_LST = df_LST[df_LST["date"] >= min] + df_LST = df_LST[df_LST["date"] <= max] + + + df_new=collect_nsb(df_LST) + + df_old=pd.read_hdf('/fefs/aswg/workspace/elisa.visentin/auto_MCP_PR/observations_LST.h5', key="joint_obs") + df_new=pd.concat([df_old, df_new]).drop_duplicates(keep='first') + df_new= df_new.sort_values(by=["DATE","source"]) + + + df_new.to_hdf("/fefs/aswg/workspace/elisa.visentin/auto_MCP_PR/observations_LST.h5", key="joint_obs", mode="w") +if __name__ == "__main__": + main() diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py index 9d74131f..964bed9f 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py @@ -28,8 +28,6 @@ from magicctapipe.io import resource_file __all__ = [ - "nsb_avg", - "collect_nsb", "config_file_gen", "lists_and_bash_generator", "lists_and_bash_gen_MAGIC", @@ -41,115 +39,6 @@ logger.setLevel(logging.INFO) - -# TODO -''' -def nsb_avg(source, config, LST_list): - - """ - This function evaluates the average of the NSB levels that have been evaluated by LSTnsb_MC.py (one value per run). - - Parameters - ---------- - source : str - Source name - config : str - Config file - LST_list : str - Name of the file where the adopted LST runs are listed - - Returns - ------- - continue_process : string - If 'y', data processing will continue, otherwise it will be stopped - nsb : double - NSB value (average over the runs) - """ - allfile = np.sort( - glob.glob(f"{source}_LST_nsb_*.txt") - ) # List with the names of all files containing the NSB values for each run - if len(allfile) == 0: - print( - "Warning: no file (containing the NSB value) exists for any of the LST runs to be processed. Check the input list" - ) - return - noise = [] - for j in allfile: - with open(j) as ff: - line_str = ff.readline().rstrip("\n") - line = float(line_str) - noise.append(line) - nsb = np.average(noise) - std = np.std(noise) - continue_process = "y" - if std > 0.2: - continue_process = input( - f'The standard deviation of the NSB levels is {std}. We recommend using NSB-matching scripts always that the standard deviation of NSB is > 0.2. Would you like to continue the current analysis anyway? [only "y" or "n"]: ' - ) - delete_index = [] - for n, j in enumerate(allfile): - run = j.split("_")[3].rstrip(".txt") - if abs(noise[n] - nsb) > 3 * std: - sigma_range = input( - f'Run {run} has an NSB value of {noise[n]}, which is more than 3*sigma (i.e. {3*std}) away from the average (i.e. {nsb}). Would you like to continue the current analysis anyway? [only "y" or "n"]: ' - ) - if sigma_range != "y": - return (sigma_range, 0) - - sigma_range = input( - f'Would you like to keep this run (i.e. {run}) in the analysis? [only "y" or "n"]:' - ) - if sigma_range != "y": - delete_index.append(n) - with open(LST_list, "r") as f: - lines = f.readlines() - with open(LST_list, "w") as f: - for i in lines: - if not i.endswith(f"{run}\n"): - f.write(i) - - if len(delete_index) > 0: - index = ( - delete_index.reverse() - ) # Here we reverse the list of indexes associated with out-of-the-average NSB values, such that after deleting one element (below), the indexes of the array do not change. - for k in index: - np.delete(noise, k) - - nsb = np.average(noise) - with open(config, "r") as f: - lines = f.readlines() - with open(config, "w") as f: - for i in lines: - if not i.startswith("nsb_value"): - f.write(i) - f.write(f"nsb_value: {nsb}\n") - return (continue_process, nsb) -''' -# TODO -''' -def collect_nsb(config): - """ - Here we split the LST runs in NSB-wise .txt files - - Parameters - ---------- - config : dict - Configuration file - """ - source = config["data_selection"]["source_name_database"] - - nsb = config["general"]["nsb"] - for nsblvl in nsb: - allfile = np.sort(glob.glob(f"{source}_LST_{nsblvl}_*.txt")) - if len(allfile) == 0: - continue - for j in allfile: - with open(j) as ff: - line = ff.readline() - with open(f"{source}_LST_{nsblvl}_.txt", "a+") as f: - f.write(f"{line.rstrip()}\n") - -''' def config_file_gen(ids, target_dir, noise_value, NSB_match): """ From 3e05e32879081ee97dafa9339256c127b1cabadb Mon Sep 17 00:00:00 2001 From: Joanna Wojtowicz Date: Wed, 24 Apr 2024 12:05:03 +0200 Subject: [PATCH 027/236] Add script for checking availability MAGIC _Y_ runs. --- .../check_MAGIC_runs.py | 88 +++++++++++++++++++ 1 file changed, 88 insertions(+) create mode 100644 magicctapipe/scripts/lst1_magic/semi_automatic_scripts/check_MAGIC_runs.py diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/check_MAGIC_runs.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/check_MAGIC_runs.py new file mode 100644 index 00000000..71de53ac --- /dev/null +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/check_MAGIC_runs.py @@ -0,0 +1,88 @@ +#This script allows us to get information about every MAGIC run ID (and subruns) in files used for common data analysis (MAGIC1, MAGIC2, #LST1). You can also run the script using Jupyter Notebook. + +#The MAGIC files that can be used for analysis are located here: +#/fefs/onsite/common/MAGIC/data/M{tel_id}/event/Calibrated/{year}/{month}/{day} + +#In this path, 'tel_id' refers to the telescope ID, which must be either 1 or 2. 'Year,' 'month,' and 'day' specify the date. + +#In the first step, we have to load a dataframe that contains information about the date, the name of the source, and the range of MAGIC #runs. The file in file_path was generated using the spreadsheet (Common MAGIC LST1 data) from the following link: + +#https://docs.google.com/spreadsheets/d/1Tya0tlK-3fuN6_vXOU5FwJruis5vBA9FALAPLFHOfBQ/edit#gid=1066216668 + +import pandas as pd +from datetime import datetime, timedelta +import os +import re + +file_path = '/fefs/aswg/workspace/joanna.wojtowicz/data/magic_first_and_last_runs.csv' +df = pd.read_csv(file_path,sep='\t', dtype={'Date (LST conv.)': str, 'Source': str, 'First run': int, 'Last run': int}) + +#df + +def check_run_ID(path, filename, first_run, last_run, date, source): + # Extract numbers from filename and check range + run_ids = [int(s) for s in re.findall(r'\d+', filename)] + matched = False + magic_runs = [] + + for id in run_ids: + if first_run <= id <= last_run: + matched = True + magic_runs.append(f"{date} \t {source} \t {id}") + #print(f"{date} \t {source} \t {id}") + return magic_runs + +def check_directory(date, source, first_run, last_run, tel_id): + # In the table date are written as follows: YYYYMMDD, for example '20191123' We need a datetime object. + date_obj = datetime.strptime(date, '%Y%m%d') + + # Date in MAGIC convention ( 'LST +1 day') + date_obj += timedelta(days=1) + new_date = datetime.strftime(date_obj, '%Y%m%d') + + #Between 2022/09/04 - 2022/12/14 MAGIC 1 had a failure. Therefore we have to skip the range when we want to get information about missing files. + M1_start_failure = datetime.strptime('20220904', '%Y%m%d') + M1_end_failure = datetime.strptime('20221214', '%Y%m%d') + + year = new_date[:4] + month = new_date[4:6] + day = new_date[6:8] + + results_count = {} + + path = f"/fefs/onsite/common/MAGIC/data/M{tel_id}/event/Calibrated/{year}/{month}/{day}" + + + if os.path.exists(path): + files = os.listdir(path) + for filename in files: + if source in filename: + results = check_run_ID(path, filename, first_run, last_run, date, source) + #We will see many results becuse a file with a run ID has subruns. + #We must count the same results to get information how many subruns we have. + for result in results: + if result in results_count: + results_count[result] += 1 + else: + results_count[result] = 1 + #else: + #print(f"No such file or directory: {date}") + + for result, count in results_count.items(): + print(f"{result} \t {count}") + +print(f'For the MAGIC 1 telescope:') +print(f"Date (LST convention) \t Source \t Run ID \t Subruns") + +for index, row in df.iterrows(): + check_directory(row['Date (LST conv.)'], row['Source'], row['First run'], row['Last run'], tel_id=1) + +print() +print() +print(f'For the MAGIC 2 telescope:') +print(f"Date (LST convention) \t Source \t Run ID \t Subruns") + +for index, row in df.iterrows(): + check_directory(row['Date (LST conv.)'], row['Source'], row['First run'], row['Last run'], tel_id=2) + + From 83a8577d39d6916d3b559b6e986b2198fec55023 Mon Sep 17 00:00:00 2001 From: joannaWojS <167758973+joannaWojS@users.noreply.github.com> Date: Wed, 24 Apr 2024 18:09:48 +0200 Subject: [PATCH 028/236] Update check_MAGIC_runs.py --- .../check_MAGIC_runs.py | 20 +++++++------------ 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/check_MAGIC_runs.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/check_MAGIC_runs.py index 71de53ac..2ad5488e 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/check_MAGIC_runs.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/check_MAGIC_runs.py @@ -1,13 +1,11 @@ -#This script allows us to get information about every MAGIC run ID (and subruns) in files used for common data analysis (MAGIC1, MAGIC2, #LST1). You can also run the script using Jupyter Notebook. +#This script allows us to get information about every MAGIC run ID (and subruns) in files used for common data analysis (MAGIC1, MAGIC2, LST1). -#The MAGIC files that can be used for analysis are located here: +#The MAGIC files that can be used for analysis are located in the IT cluster in the following directory: #/fefs/onsite/common/MAGIC/data/M{tel_id}/event/Calibrated/{year}/{month}/{day} #In this path, 'tel_id' refers to the telescope ID, which must be either 1 or 2. 'Year,' 'month,' and 'day' specify the date. -#In the first step, we have to load a dataframe that contains information about the date, the name of the source, and the range of MAGIC #runs. The file in file_path was generated using the spreadsheet (Common MAGIC LST1 data) from the following link: - -#https://docs.google.com/spreadsheets/d/1Tya0tlK-3fuN6_vXOU5FwJruis5vBA9FALAPLFHOfBQ/edit#gid=1066216668 +#In the first step, we have to load a dataframe that contains information about the date, the name of the source, and the range of MAGIC #runs. The file in file_path was generated using the spreadsheet "Common MAGIC LST1 data". import pandas as pd from datetime import datetime, timedelta @@ -17,8 +15,6 @@ file_path = '/fefs/aswg/workspace/joanna.wojtowicz/data/magic_first_and_last_runs.csv' df = pd.read_csv(file_path,sep='\t', dtype={'Date (LST conv.)': str, 'Source': str, 'First run': int, 'Last run': int}) -#df - def check_run_ID(path, filename, first_run, last_run, date, source): # Extract numbers from filename and check range run_ids = [int(s) for s in re.findall(r'\d+', filename)] @@ -40,9 +36,7 @@ def check_directory(date, source, first_run, last_run, tel_id): date_obj += timedelta(days=1) new_date = datetime.strftime(date_obj, '%Y%m%d') - #Between 2022/09/04 - 2022/12/14 MAGIC 1 had a failure. Therefore we have to skip the range when we want to get information about missing files. - M1_start_failure = datetime.strptime('20220904', '%Y%m%d') - M1_end_failure = datetime.strptime('20221214', '%Y%m%d') + #Between 2022/09/04 - 2022/12/14 MAGIC 1 had a failure. year = new_date[:4] month = new_date[4:6] @@ -58,15 +52,15 @@ def check_directory(date, source, first_run, last_run, tel_id): for filename in files: if source in filename: results = check_run_ID(path, filename, first_run, last_run, date, source) - #We will see many results becuse a file with a run ID has subruns. + #We will see many results because a file with a run ID has subruns. #We must count the same results to get information how many subruns we have. for result in results: if result in results_count: results_count[result] += 1 else: results_count[result] = 1 - #else: - #print(f"No such file or directory: {date}") + else: + print(f"No such file or directory: {date}") for result, count in results_count.items(): print(f"{result} \t {count}") From 9e5bdea52fceaf568a7a104105344f35fd7e52df Mon Sep 17 00:00:00 2001 From: joannaWojS <167758973+joannaWojS@users.noreply.github.com> Date: Wed, 24 Apr 2024 18:49:54 +0200 Subject: [PATCH 029/236] Update check_MAGIC_runs.py --- .../check_MAGIC_runs.py | 23 +++++++------------ 1 file changed, 8 insertions(+), 15 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/check_MAGIC_runs.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/check_MAGIC_runs.py index 2ad5488e..0f0afa28 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/check_MAGIC_runs.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/check_MAGIC_runs.py @@ -63,20 +63,13 @@ def check_directory(date, source, first_run, last_run, tel_id): print(f"No such file or directory: {date}") for result, count in results_count.items(): - print(f"{result} \t {count}") - -print(f'For the MAGIC 1 telescope:') -print(f"Date (LST convention) \t Source \t Run ID \t Subruns") - -for index, row in df.iterrows(): - check_directory(row['Date (LST conv.)'], row['Source'], row['First run'], row['Last run'], tel_id=1) - -print() -print() -print(f'For the MAGIC 2 telescope:') -print(f"Date (LST convention) \t Source \t Run ID \t Subruns") - -for index, row in df.iterrows(): - check_directory(row['Date (LST conv.)'], row['Source'], row['First run'], row['Last run'], tel_id=2) + print(f"M{tel_id} \t {result} \t {count}") +tel_id = [1, 2] +for tel in tel_id: + print() + print(f"Telescope ID \t Date (LST convention) \t Source \t Run ID \t Subruns") + for index, row in df.iterrows(): + check_directory(row['Date (LST conv.)'], row['Source'], row['First run'], row['Last run'], tel) + From b773c377fa0970deaa9cc6f8f85c4f20162112ec Mon Sep 17 00:00:00 2001 From: joannaWojS <167758973+joannaWojS@users.noreply.github.com> Date: Wed, 24 Apr 2024 19:51:30 +0200 Subject: [PATCH 030/236] Update check_MAGIC_runs.py --- .../semi_automatic_scripts/check_MAGIC_runs.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/check_MAGIC_runs.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/check_MAGIC_runs.py index 0f0afa28..d8fb26cd 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/check_MAGIC_runs.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/check_MAGIC_runs.py @@ -1,9 +1,9 @@ #This script allows us to get information about every MAGIC run ID (and subruns) in files used for common data analysis (MAGIC1, MAGIC2, LST1). #The MAGIC files that can be used for analysis are located in the IT cluster in the following directory: -#/fefs/onsite/common/MAGIC/data/M{tel_id}/event/Calibrated/{year}/{month}/{day} +#/fefs/onsite/common/MAGIC/data/M{tel_id}/event/Calibrated/{YYYY}/{MM}/{DD} -#In this path, 'tel_id' refers to the telescope ID, which must be either 1 or 2. 'Year,' 'month,' and 'day' specify the date. +#In this path, 'tel_id' refers to the telescope ID, which must be either 1 or 2. 'YYYY', 'MM', and 'DD' specify the date. #In the first step, we have to load a dataframe that contains information about the date, the name of the source, and the range of MAGIC #runs. The file in file_path was generated using the spreadsheet "Common MAGIC LST1 data". @@ -38,13 +38,13 @@ def check_directory(date, source, first_run, last_run, tel_id): #Between 2022/09/04 - 2022/12/14 MAGIC 1 had a failure. - year = new_date[:4] - month = new_date[4:6] - day = new_date[6:8] + YYYY = new_date[:4] + MM = new_date[4:6] + DD = new_date[6:8] results_count = {} - path = f"/fefs/onsite/common/MAGIC/data/M{tel_id}/event/Calibrated/{year}/{month}/{day}" + path = f"/fefs/onsite/common/MAGIC/data/M{tel_id}/event/Calibrated/{YYYY}/{MM}/{DD}" if os.path.exists(path): From 1b18664313cb26e5e39ef84bee02a7a6c21a7248 Mon Sep 17 00:00:00 2001 From: joannaWojS <167758973+joannaWojS@users.noreply.github.com> Date: Sat, 27 Apr 2024 14:53:16 +0200 Subject: [PATCH 031/236] Update check_MAGIC_runs.py --- .../check_MAGIC_runs.py | 37 ++++++++++++------- 1 file changed, 24 insertions(+), 13 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/check_MAGIC_runs.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/check_MAGIC_runs.py index d8fb26cd..e30877e7 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/check_MAGIC_runs.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/check_MAGIC_runs.py @@ -16,17 +16,21 @@ df = pd.read_csv(file_path,sep='\t', dtype={'Date (LST conv.)': str, 'Source': str, 'First run': int, 'Last run': int}) def check_run_ID(path, filename, first_run, last_run, date, source): - # Extract numbers from filename and check range - run_ids = [int(s) for s in re.findall(r'\d+', filename)] - matched = False - magic_runs = [] + Y = f'_Y_{source}' + #'Y' because we have to be sure that the function counts right filename. + + if Y in filename: + # Extract numbers from filename and check range + run_ids = [int(s) for s in re.findall(r'\d+', filename)] + matched = False + magic_runs = [] - for id in run_ids: - if first_run <= id <= last_run: - matched = True - magic_runs.append(f"{date} \t {source} \t {id}") - #print(f"{date} \t {source} \t {id}") - return magic_runs + for id in run_ids: + if first_run <= id <= last_run: + matched = True + magic_runs.append(f"{date} \t {source} \t {id}") + #print(f"{date} \t {source} \t {id}") + return magic_runs def check_directory(date, source, first_run, last_run, tel_id): # In the table date are written as follows: YYYYMMDD, for example '20191123' We need a datetime object. @@ -36,8 +40,6 @@ def check_directory(date, source, first_run, last_run, tel_id): date_obj += timedelta(days=1) new_date = datetime.strftime(date_obj, '%Y%m%d') - #Between 2022/09/04 - 2022/12/14 MAGIC 1 had a failure. - YYYY = new_date[:4] MM = new_date[4:6] DD = new_date[6:8] @@ -46,11 +48,12 @@ def check_directory(date, source, first_run, last_run, tel_id): path = f"/fefs/onsite/common/MAGIC/data/M{tel_id}/event/Calibrated/{YYYY}/{MM}/{DD}" - if os.path.exists(path): files = os.listdir(path) + count_with_source = 0 for filename in files: if source in filename: + count_with_source += 1 results = check_run_ID(path, filename, first_run, last_run, date, source) #We will see many results because a file with a run ID has subruns. #We must count the same results to get information how many subruns we have. @@ -59,6 +62,14 @@ def check_directory(date, source, first_run, last_run, tel_id): results_count[result] += 1 else: results_count[result] = 1 + if count_with_source == 0: + if(tel_id == 1): + #Between 2022/09/04 - 2022/12/14 MAGIC 1 had a failure. Therefore we have to skip the range when we want to get information about missing files. + if(date<'20220904' or date>'20221214'): + print(f"No files found containing the source '{source}' on {date}, (M{tel_id})") + if(tel_id == 2): + print(f"No files found containing the source '{source}' on {date}, (M{tel_id})") + else: print(f"No such file or directory: {date}") From 207f7b6e05b9235a5e81a065ee382477e377fe57 Mon Sep 17 00:00:00 2001 From: Elisa-Visentin Date: Sat, 27 Apr 2024 14:02:20 +0000 Subject: [PATCH 032/236] Bug fixes and NSB database --- .../semi_automatic_scripts/LSTnsb.py | 10 +- .../semi_automatic_scripts/__init__.py | 2 - .../create_LST_table.py | 54 +++++--- .../semi_automatic_scripts/list_from_h5.py | 86 ++++++------- .../semi_automatic_scripts/nsb_level.py | 27 ++-- .../semi_automatic_scripts/nsb_to_h5.py | 115 ++++++++---------- .../setting_up_config_and_dir.py | 81 ++++++------ 7 files changed, 193 insertions(+), 182 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/LSTnsb.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/LSTnsb.py index 237007be..b752805b 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/LSTnsb.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/LSTnsb.py @@ -152,9 +152,11 @@ def main(): logger.info(f"Run {run_number} already processed.") return - date_lst = date.split("_")[0] + date.split("_")[1] + date.split("_")[2] - inputdir = f"/fefs/aswg/data/real/DL1/{date_lst}/{lst_version}/{lst_tailcut}" + # date_lst = date.split("_")[0] + date.split("_")[1] + date.split("_")[2] + inputdir = f"/fefs/aswg/data/real/DL1/{date}/{lst_version}/{lst_tailcut}" run_list = np.sort(glob.glob(f"{inputdir}/dl1*Run*{run_number}.*.h5")) + print(inputdir) + print(run_list) noise = nsb(run_list, simtel, lst_config, run_number) if len(noise) == 0: logger.warning( @@ -168,6 +170,10 @@ def main(): if (median_NSB < nsb_limit[j + 1]) & (median_NSB > nsb_limit[j]): with open(f"nsb_LST_{nsb_list[j]}_{run_number}.txt", "a+") as f: f.write(f"{date},{run_number},{median_NSB}\n") + if median_NSB > nsb_limit[-1]: + with open(f"nsb_LST_high_{run_number}.txt", "a+") as f: + f.write(f"{date},{run_number},{median_NSB}\n") + else: with open(f"nsb_LST_nsb_{run_number}.txt", "a+") as f: f.write(f"{median_NSB}\n") diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/__init__.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/__init__.py index 5d949be4..1ffcd0f3 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/__init__.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/__init__.py @@ -3,7 +3,6 @@ from .merging_runs import cleaning, merge, mergeMC, split_train_test from .nsb_level import bash_scripts from .setting_up_config_and_dir import ( - collect_nsb, config_file_gen, directories_generator, lists_and_bash_gen_MAGIC, @@ -18,7 +17,6 @@ "merge", "mergeMC", "bash_scripts", - "collect_nsb", "config_file_gen", "lists_and_bash_generator", "lists_and_bash_gen_MAGIC", diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/create_LST_table.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/create_LST_table.py index 9e120517..33c2eec5 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/create_LST_table.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/create_LST_table.py @@ -1,31 +1,57 @@ -import pandas as pd -import numpy as np +""" +Create a new h5 table from the one of joint observations. + +Only the columns needed to produce the lists of LST runs to be processed are presenved, and two columns are added to store NSB level and error codes +""" + import os +import numpy as np +import pandas as pd + + def main(): """ Main function """ - df = pd.read_hdf( "/fefs/aswg/workspace/federico.dipierro/simultaneous_obs_summary.h5", key="/str" ) # TODO: put this file in a shared folder - needed_cols=['source', 'DATE', 'LST1_run', 'MAGIC_stereo', 'MAGIC_trigger', 'MAGIC_HV'] - df_cut=df[needed_cols] + needed_cols = [ + "source", + "DATE", + "LST1_run", + "MAGIC_stereo", + "MAGIC_trigger", + "MAGIC_HV", + ] + df_cut = df[needed_cols] print(df_cut.columns) - df_cut['nsb']=np.repeat(np.nan,len(df_cut)) - df_cut['error_code']=np.repeat(np.nan,len(df_cut)) + df_cut["nsb"] = np.repeat(np.nan, len(df_cut)) + df_cut["error_code"] = np.repeat(np.nan, len(df_cut)) print(df_cut) - if os.path.isfile('/fefs/aswg/workspace/elisa.visentin/auto_MCP_PR/observations_LST.h5'): - df_old=pd.read_hdf('/fefs/aswg/workspace/elisa.visentin/auto_MCP_PR/observations_LST.h5', key="joint_obs") - df_cut=pd.concat([df_old, df_cut]).drop_duplicates(keep='first') - df_cut= df_cut.sort_values(by=["DATE","source"]) - print(df_cut) # TODO check if fine with update and nsb - - df_cut.to_hdf("/fefs/aswg/workspace/elisa.visentin/auto_MCP_PR/observations_LST.h5", key="joint_obs", mode="w") + if os.path.isfile( + "/fefs/aswg/workspace/elisa.visentin/auto_MCP_PR/observations_LST.h5" + ): + df_old = pd.read_hdf( + "/fefs/aswg/workspace/elisa.visentin/auto_MCP_PR/observations_LST.h5", + key="joint_obs", + ) + df_cut = pd.concat([df_old, df_cut]).drop_duplicates( + subset="LST1_run", keep="first" + ) + df_cut = df_cut.sort_values(by=["DATE", "source"]) + print(df_cut) # TODO check if fine with update and nsb + + df_cut.to_hdf( + "/fefs/aswg/workspace/elisa.visentin/auto_MCP_PR/observations_LST.h5", + key="joint_obs", + mode="w", + ) + if __name__ == "__main__": main() diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/list_from_h5.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/list_from_h5.py index db71b65d..1c4bc308 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/list_from_h5.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/list_from_h5.py @@ -2,13 +2,14 @@ This script creates the lists of MAGIC and LST runs (date and run number) from a dataframe in the .h5 format for a specific time range. """ +import argparse import os from datetime import datetime + import joblib import numpy as np import pandas as pd import yaml -import argparse def split_lst_date(df): @@ -58,6 +59,7 @@ def magic_date(df): df["date_MAGIC"] = date_magic return df + def clear_files(source_in, source_out, df): """ @@ -65,16 +67,12 @@ def clear_files(source_in, source_out, df): Parameters ---------- - source_in : string + source_in : str Target name in the database. If None, it stands for all the sources observed in a pre-set time interval. - source_out: string + source_out : str Name tag for the target. Used only if source_in is not None. - df: :class:`pandas.DataFrame` + df : :class:`pandas.DataFrame` Dataframe of the joint MAGIC+LST-1 observations based on the .h5 table. - - Returns - ------- - """ source_list = [] @@ -82,11 +80,11 @@ def clear_files(source_in, source_out, df): source_list = np.unique(df["source"]) else: source_list.append(source_out) - - print("Source list: ",source_list) + + print("Source list: ", source_list) joblib.dump(source_list, "list_sources.dat") for source_name in source_list: - print("Target name: ",source_name) + print("Target name: ", source_name) file_list = [ f"{source_name}_LST_runs.txt", f"{source_name}_MAGIC_runs.txt", @@ -96,7 +94,7 @@ def clear_files(source_in, source_out, df): if os.path.isfile(j): os.remove(j) print(f"{j} deleted.") - + def list_run(source_in, source_out, df, skip_LST, skip_MAGIC, is_LST, M1_run_list=None): @@ -117,23 +115,25 @@ def list_run(source_in, source_out, df, skip_LST, skip_MAGIC, is_LST, M1_run_lis List of the MAGIC runs to be ignored. is_LST : bool If you are looking for LST runs, set to True. For MAGIC set False. - + M1_run_list : list + If you are looking for MAGIC runs, pass the list of MAGIC-1 runs here, and the MAGIC-2 database as df. + Only the runs both in the list and in the dataframe (i.e., stereo MAGIC observations) will be saved in the output txt files """ source_list = [] if source_in is None: source_list = np.unique(df["source"]) - + else: source_list.append(source_out) - - print("List of sources: ",source_list) + + print("List of sources: ", source_list) for source_name in source_list: file_list = [ f"{source_name}_LST_runs.txt", f"{source_name}_MAGIC_runs.txt", ] # The order here must be LST before MAGIC! - + run_listed = [] if source_in is None: df_source = df[df["source"] == source_name] @@ -141,7 +141,7 @@ def list_run(source_in, source_out, df, skip_LST, skip_MAGIC, is_LST, M1_run_lis df_source = df[df["source"] == source_in] if is_LST: - print('LST') + print("LST") LST_run = df_source["LST1_run"].tolist() # List with runs as strings LST_date = df_source["date_LST"].tolist() for k in range(len(df_source)): @@ -151,7 +151,6 @@ def list_run(source_in, source_out, df, skip_LST, skip_MAGIC, is_LST, M1_run_lis if (int(LST_run[k]) in skip_LST) or (int(LST_run[k]) in run_listed): skip = True - if skip is False: with open(file_list[0], "a+") as f: @@ -161,9 +160,9 @@ def list_run(source_in, source_out, df, skip_LST, skip_MAGIC, is_LST, M1_run_lis run_listed.append(int(LST_run[k])) if not is_LST: - print('MAGIC') + print("MAGIC") MAGIC_date = df_source["date_MAGIC"].tolist() - M2_run=df_source['Run ID'].tolist() + M2_run = df_source["Run ID"].tolist() for k in range(len(df_source)): skip = False if np.isnan(M2_run[k]): @@ -176,16 +175,14 @@ def list_run(source_in, source_out, df, skip_LST, skip_MAGIC, is_LST, M1_run_lis if skip is False: with open(file_list[1], "a+") as f: - f.write( - f"{MAGIC_date[k].replace('-','_')},{int(M2_run[k])}\n" - ) + f.write(f"{MAGIC_date[k].replace('-','_')},{int(M2_run[k])}\n") run_listed.append(int(M2_run[k])) def main(): """ - This function is automatically called whe script is launched. + This function is automatically called whe script is launched. It calls the functions above to create the files XXXXXX_LST_runs.txt and XXXXX_MAGIC_runs.txt for the desired targets. """ @@ -205,7 +202,7 @@ def main(): args.config_file, "rb" ) as f: # "rb" mode opens the file in binary format for reading config = yaml.safe_load(f) - + source_in = config["data_selection"]["source_name_database"] source_out = config["data_selection"]["source_name_output"] range = config["data_selection"]["time_range"] @@ -213,7 +210,8 @@ def main(): skip_MAGIC = config["data_selection"]["skip_MAGIC_runs"] df_LST = pd.read_hdf( - "/fefs/aswg/workspace/elisa.visentin/auto_MCP_PR/observations_LST.h5", key="joint_obs" + "/fefs/aswg/workspace/elisa.visentin/auto_MCP_PR/observations_LST.h5", + key="joint_obs", ) # TODO: put this file in a shared folder df_LST = split_lst_date(df_LST) @@ -230,7 +228,7 @@ def main(): f'source=="{source_in}"& MAGIC_trigger=="L3T" & MAGIC_HV=="Nominal" & MAGIC_stereo == {stereo}', inplace=True, ) - + if range: min = str(config["data_selection"]["min"]) max = str(config["data_selection"]["max"]) @@ -250,26 +248,32 @@ def main(): clear_files(source_in, source_out, df_LST) list_run(source_in, source_out, df_LST, skip_LST, skip_MAGIC, True) - list_date_LST=np.unique(df_LST['date_LST']) - list_date_LST_low=[sub.replace('-', '_') for sub in list_date_LST] + list_date_LST = np.unique(df_LST["date_LST"]) + list_date_LST_low = [sub.replace("-", "_") for sub in list_date_LST] print(list_date_LST_low) - df_MAGIC1=pd.read_hdf('/fefs/aswg/workspace/joanna.wojtowicz/data/Common_MAGIC_LST1_data_MAGIC_RUNS.h5', key='MAGIC1/runs_M1') - df_MAGIC2=pd.read_hdf('/fefs/aswg/workspace/joanna.wojtowicz/data/Common_MAGIC_LST1_data_MAGIC_RUNS.h5', key='MAGIC2/runs_M2') - + df_MAGIC1 = pd.read_hdf( + "/fefs/aswg/workspace/joanna.wojtowicz/data/Common_MAGIC_LST1_data_MAGIC_RUNS.h5", + key="MAGIC1/runs_M1", + ) + df_MAGIC2 = pd.read_hdf( + "/fefs/aswg/workspace/joanna.wojtowicz/data/Common_MAGIC_LST1_data_MAGIC_RUNS.h5", + key="MAGIC2/runs_M2", + ) + print(list_date_LST) - df_MAGIC1=df_MAGIC1[df_MAGIC1['Date (LST convention)'].isin(list_date_LST_low)] - df_MAGIC2=df_MAGIC2[df_MAGIC2['Date (LST convention)'].isin(list_date_LST_low)] + df_MAGIC1 = df_MAGIC1[df_MAGIC1["Date (LST convention)"].isin(list_date_LST_low)] + df_MAGIC2 = df_MAGIC2[df_MAGIC2["Date (LST convention)"].isin(list_date_LST_low)] print(df_MAGIC2) - - df_MAGIC2=magic_date(df_MAGIC2) - df_MAGIC1=magic_date(df_MAGIC1) - df_MAGIC2 = df_MAGIC2.rename(columns={'Source': 'source'}) + + df_MAGIC2 = magic_date(df_MAGIC2) + df_MAGIC1 = magic_date(df_MAGIC1) + df_MAGIC2 = df_MAGIC2.rename(columns={"Source": "source"}) print(df_MAGIC2) - M1_runs=df_MAGIC1['Run ID'].tolist() + M1_runs = df_MAGIC1["Run ID"].tolist() list_run(source_in, source_out, df_MAGIC2, skip_LST, skip_MAGIC, False, M1_runs) - + if __name__ == "__main__": main() diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/nsb_level.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/nsb_level.py index a9fc7329..9c515585 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/nsb_level.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/nsb_level.py @@ -8,10 +8,10 @@ import glob import logging import os -import pandas as pd from datetime import datetime import numpy as np +import pandas as pd import yaml __all__ = ["bash_scripts"] @@ -21,11 +21,6 @@ logger.setLevel(logging.INFO) - - - - - def bash_scripts(run, date, config, env_name): """Here we create the bash scripts (one per LST run) @@ -91,28 +86,31 @@ def main(): args.config_file, "rb" ) as f: # "rb" mode opens the file in binary format for reading config = yaml.safe_load(f) - + env_name = config["general"]["env_name"] df_LST = pd.read_hdf( - "/fefs/aswg/workspace/elisa.visentin/auto_MCP_PR/observations_LST.h5", key="joint_obs" + "/fefs/aswg/workspace/elisa.visentin/auto_MCP_PR/observations_LST.h5", + key="joint_obs", ) + min = datetime.strptime(args.begin_date, "%Y_%m_%d") max = datetime.strptime(args.end_date, "%Y_%m_%d") - lst = pd.to_datetime(df_LST["date_LST"].str.replace("_", "-")) + lst = pd.to_datetime(df_LST["DATE"].str.replace("_", "-")) df_LST["date"] = lst + print(df_LST.columns) df_LST = df_LST[df_LST["date"] >= min] df_LST = df_LST[df_LST["date"] <= max] - - df_LST=df_LST.drop('date') - run_LST=df_LST["LST1_run"] - date_LST=df_LST["date_LST"] + + df_LST = df_LST.drop(columns="date") + run_LST = df_LST["LST1_run"] + date_LST = df_LST["DATE"] print("***** Generating bashscripts...") for run_number, date in zip(run_LST, date_LST): bash_scripts(run_number, date, args.config_file, env_name) print("Process name: nsb") print("To check the jobs submitted to the cluster, type: squeue -n nsb") - list_of_bash_scripts = np.sort(glob.glob(f"nsb_*_run_*.sh")) + list_of_bash_scripts = np.sort(glob.glob("nsb_*_run_*.sh")) if len(list_of_bash_scripts) < 1: print( @@ -129,6 +127,5 @@ def main(): os.system(launch_jobs) - if __name__ == "__main__": main() diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/nsb_to_h5.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/nsb_to_h5.py index 8547cae2..7aca980f 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/nsb_to_h5.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/nsb_to_h5.py @@ -4,17 +4,13 @@ Usage: python nsb_level.py (-c config.yaml) """ -import argparse import glob import logging -import os -import pandas as pd -from datetime import datetime import numpy as np -import yaml +import pandas as pd -__all__ = ["bash_scripts"] +__all__ = ["collect_nsb"] logger = logging.getLogger(__name__) logger.addHandler(logging.StreamHandler()) @@ -27,24 +23,26 @@ def collect_nsb(df_LST): Parameters ---------- - config : dict - Configuration file + df_LST : :class:`pandas.DataFrame` + Dataframe collecting the LST1 runs (produced by the create_LST_table script) + + Returns + ------- + :class:`pandas.DataFrame` + Same dataframe as the input one, but with NSB values added in the 'nsb' column (for the runs processed by nsb_level.py) """ - nsb_files=glob.glob('nsb_LST_*.txt') + nsb_files = glob.glob("nsb_LST_*.txt") for file_nsb in nsb_files: - run=file_nsb.split('_')[3] - nsb=np.nan + run = file_nsb.split("_")[3] + run = run.split(".")[0] + nsb = np.nan with open(file_nsb) as ff: line_str = ff.readline().rstrip("\n") - nsb=line_str.split(',')[2] - df_LST=df_LST.set_index("LST1_run") - df_LST.loc[df_LST.index[run], 'nsb']=nsb - df_LST=df_LST.reset_index() - - - - - + nsb = line_str.split(",")[2] + df_LST = df_LST.set_index("LST1_run") + df_LST.loc[run, "nsb"] = float(nsb) + df_LST = df_LST.reset_index() + return df_LST def main(): @@ -53,55 +51,40 @@ def main(): Main function """ - parser = argparse.ArgumentParser() - parser.add_argument( - "--config-file", - "-c", - dest="config_file", - type=str, - default="./config_general.yaml", - help="Path to a configuration file", + df_LST = pd.read_hdf( + "/fefs/aswg/workspace/elisa.visentin/auto_MCP_PR/observations_LST.h5", + key="joint_obs", ) - parser.add_argument( - "--begin-date", - "-b", - dest="begin_date", - type=str, - help="Begin date to start NSB evaluation from the database.", + + df_new = collect_nsb(df_LST) + + df_old = pd.read_hdf( + "/fefs/aswg/workspace/elisa.visentin/auto_MCP_PR/observations_LST.h5", + key="joint_obs", ) - parser.add_argument( - "--end-date", - "-e", - dest="end_date", - type=str, - help="End date to start NSB evaluation from the database.", + df_new = pd.concat([df_new, df_old]).drop_duplicates( + subset="LST1_run", keep="first" ) - args = parser.parse_args() - with open( - args.config_file, "rb" - ) as f: # "rb" mode opens the file in binary format for reading - config = yaml.safe_load(f) - - env_name = config["general"]["env_name"] - - df_LST = pd.read_hdf( - "/fefs/aswg/workspace/elisa.visentin/auto_MCP_PR/observations_LST.h5", key="joint_obs" + df_new = df_new.sort_values(by=["DATE", "source", "LST1_run"]) + df_new["error_code"] = np.where(df_new["nsb"].notna(), df_new["error_code"], "000") + df_new = df_new[ + [ + "source", + "DATE", + "LST1_run", + "MAGIC_stereo", + "MAGIC_trigger", + "MAGIC_HV", + "nsb", + "error_code", + ] + ] + df_new.to_hdf( + "/fefs/aswg/workspace/elisa.visentin/auto_MCP_PR/observations_LST.h5", + key="joint_obs", + mode="w", ) - min = datetime.strptime(args.begin_date, "%Y_%m_%d") - max = datetime.strptime(args.end_date, "%Y_%m_%d") - lst = pd.to_datetime(df_LST["date_LST"].str.replace("_", "-")) - df_LST["date"] = lst - df_LST = df_LST[df_LST["date"] >= min] - df_LST = df_LST[df_LST["date"] <= max] - - - df_new=collect_nsb(df_LST) - - df_old=pd.read_hdf('/fefs/aswg/workspace/elisa.visentin/auto_MCP_PR/observations_LST.h5', key="joint_obs") - df_new=pd.concat([df_old, df_new]).drop_duplicates(keep='first') - df_new= df_new.sort_values(by=["DATE","source"]) - - - df_new.to_hdf("/fefs/aswg/workspace/elisa.visentin/auto_MCP_PR/observations_LST.h5", key="joint_obs", mode="w") + + if __name__ == "__main__": main() diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py index 964bed9f..34b07555 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py @@ -15,14 +15,15 @@ """ import argparse import glob +import json import logging import os -import time + +# import time from pathlib import Path import numpy as np import yaml -import json from magicctapipe import __version__ from magicctapipe.io import resource_file @@ -251,7 +252,7 @@ def lists_and_bash_gen_MAGIC( MAGIC_runs.append(MAGIC) for i in MAGIC_runs: - + if telescope_ids[-1] > 0: lines = [ f'export IN1=/fefs/onsite/common/MAGIC/data/M2/event/Calibrated/{i[0].split("_")[0]}/{i[0].split("_")[1]}/{i[0].split("_")[2]}\n', @@ -291,7 +292,6 @@ def lists_and_bash_gen_MAGIC( if (telescope_ids[-2] > 0) or (telescope_ids[-1] > 0): for i in MAGIC_runs: - if telescope_ids[-1] > 0: number_of_nodes = glob.glob( f'/fefs/onsite/common/MAGIC/data/M2/event/Calibrated/{i[0].split("_")[0]}/{i[0].split("_")[1]}/{i[0].split("_")[2]}/*{i[1]}.*_Y_*.root' @@ -314,9 +314,7 @@ def lists_and_bash_gen_MAGIC( "export LOG=$OUTPUTDIR/logs/real_0_1_task${SLURM_ARRAY_TASK_ID}.log\n", f"time conda run -n {env_name} magic_calib_to_dl1 --input-file $SAMPLE --output-dir $OUTPUTDIR --config-file {target_dir}/config_DL0_to_DL1.yaml >$LOG 2>&1\n", ] - with open( - f"{source}_MAGIC-II_dl0_to_dl1_run_{i[1]}.sh", "w" - ) as f: + with open(f"{source}_MAGIC-II_dl0_to_dl1_run_{i[1]}.sh", "w") as f: f.writelines(lines) if telescope_ids[-2] > 0: @@ -341,9 +339,7 @@ def lists_and_bash_gen_MAGIC( "export LOG=$OUTPUTDIR/logs/real_0_1_task${SLURM_ARRAY_TASK_ID}.log\n", f"time conda run -n {env_name} magic_calib_to_dl1 --input-file $SAMPLE --output-dir $OUTPUTDIR --config-file {target_dir}/config_DL0_to_DL1.yaml >$LOG 2>&1\n", ] - with open( - f"{source}_MAGIC-I_dl0_to_dl1_run_{i[1]}.sh", "w" - ) as f: + with open(f"{source}_MAGIC-I_dl0_to_dl1_run_{i[1]}.sh", "w") as f: f.writelines(lines) else: if (telescope_ids[-2] > 0) or (telescope_ids[-1] > 0): @@ -465,7 +461,7 @@ def directories_generator(target_dir, telescope_ids, MAGIC_runs, NSB_match): MAGIC_runs.append(MAGIC) if NSB_match: for i in MAGIC_runs: - + if telescope_ids[-1] > 0: if not os.path.exists(f"{dl1_dir}"): os.mkdir(f"{dl1_dir}") @@ -474,13 +470,9 @@ def directories_generator(target_dir, telescope_ids, MAGIC_runs, NSB_match): if not os.path.exists(f"{dl1_dir}/M2/{i[0]}"): os.mkdir(f"{dl1_dir}/M2/{i[0]}") - if not os.path.exists( - f"{dl1_dir}/M2/{i[0]}/{i[1]}" - ): + if not os.path.exists(f"{dl1_dir}/M2/{i[0]}/{i[1]}"): os.mkdir(f"{dl1_dir}/M2/{i[0]}/{i[1]}") - if not os.path.exists( - f"{dl1_dir}/M2/{i[0]}/{i[1]}/logs" - ): + if not os.path.exists(f"{dl1_dir}/M2/{i[0]}/{i[1]}/logs"): os.mkdir(f"{dl1_dir}/M2/{i[0]}/{i[1]}/logs") if telescope_ids[-2] > 0: if not os.path.exists(f"{dl1_dir}"): @@ -490,13 +482,9 @@ def directories_generator(target_dir, telescope_ids, MAGIC_runs, NSB_match): if not os.path.exists(f"{dl1_dir}/M1/{i[0]}"): os.mkdir(f"{dl1_dir}/M1/{i[0]}") - if not os.path.exists( - f"{dl1_dir}/M1/{i[0]}/{i[1]}" - ): + if not os.path.exists(f"{dl1_dir}/M1/{i[0]}/{i[1]}"): os.mkdir(f"{dl1_dir}/M1/{i[0]}/{i[1]}") - if not os.path.exists( - f"{dl1_dir}/M1/{i[0]}/{i[1]}/logs" - ): + if not os.path.exists(f"{dl1_dir}/M1/{i[0]}/{i[1]}/logs"): os.mkdir(f"{dl1_dir}/M1/{i[0]}/{i[1]}/logs") else: if telescope_ids[-1] > 0: @@ -560,9 +548,8 @@ def main(): MAGIC_runs = np.genfromtxt( MAGIC_runs_and_dates, dtype=str, delimiter="," ) # READ LIST OF DATES AND RUNS: format table where each line is like "2020_11_19,5093174" - - - LST_runs_and_dates = config["general"]["LST_runs"] + + # LST_runs_and_dates = config["general"]["LST_runs"] MC_gammas = str(Path(config["directories"]["MC_gammas"])) MC_electrons = str(Path(config["directories"]["MC_electrons"])) MC_helium = str(Path(config["directories"]["MC_helium"])) @@ -571,28 +558,24 @@ def main(): focal_length = config["general"]["focal_length"] source = config["data_selection"]["source_name_output"] - source_list=[] + source_list = [] if source is not None: - source_list=json.load('list_sources.dat') - + source_list = json.load("list_sources.dat") + else: source_list.append(source) for source_name in source_list: - target_dir = str( - Path(config["directories"]["workspace_dir"]) - / source_name - ) - + target_dir = str(Path(config["directories"]["workspace_dir"]) / source_name) + noise_value = [0, 0, 0] if not NSB_match: nsb = config["general"]["NSB_MC"] - + noisebright = 1.15 * pow(nsb, 1.115) biasdim = 0.358 * pow(nsb, 0.805) noise_value = [nsb, noisebright, biasdim] - - - #TODO: fix here above + + # TODO: fix here above print("*** Converting DL0 into DL1 data ***") print(f'Process name: {target_dir.split("/")[-2:][1]}') print( @@ -603,13 +586,22 @@ def main(): directories_generator( target_dir, telescope_ids, MAGIC_runs, NSB_match ) # Here we create all the necessary directories in the given workspace and collect the main directory of the target - config_file_gen(telescope_ids, target_dir, noise_value, NSB_match)#TODO: fix here + config_file_gen( + telescope_ids, target_dir, noise_value, NSB_match + ) # TODO: fix here if not NSB_match: # Below we run the analysis on the MC data - if (args.analysis_type == "onlyMC") or (args.analysis_type == "doEverything"): + if (args.analysis_type == "onlyMC") or ( + args.analysis_type == "doEverything" + ): lists_and_bash_generator( - "gammas", target_dir, MC_gammas, SimTel_version, focal_length, env_name + "gammas", + target_dir, + MC_gammas, + SimTel_version, + focal_length, + env_name, ) # gammas lists_and_bash_generator( "electrons", @@ -620,7 +612,12 @@ def main(): env_name, ) # electrons lists_and_bash_generator( - "helium", target_dir, MC_helium, SimTel_version, focal_length, env_name + "helium", + target_dir, + MC_helium, + SimTel_version, + focal_length, + env_name, ) # helium lists_and_bash_generator( "protons", From 5ab3e06be54bec4cdf8f0c1c7ea23a0e405c02d1 Mon Sep 17 00:00:00 2001 From: Elisa-Visentin Date: Sun, 28 Apr 2024 13:40:52 +0000 Subject: [PATCH 033/236] LST database --- .../semi_automatic_scripts/create_LST_table.py | 5 +++++ .../semi_automatic_scripts/list_from_h5.py | 14 +++++++++----- .../lst1_magic/semi_automatic_scripts/nsb_to_h5.py | 6 ++++++ 3 files changed, 20 insertions(+), 5 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/create_LST_table.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/create_LST_table.py index 33c2eec5..4c7a5578 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/create_LST_table.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/create_LST_table.py @@ -31,6 +31,11 @@ def main(): df_cut = df[needed_cols] print(df_cut.columns) df_cut["nsb"] = np.repeat(np.nan, len(df_cut)) + + df_cut["lstchain_0.9"] = np.zeros(len(df_cut), dtype=bool) + + df_cut["lstchain_0.10"] = np.zeros(len(df_cut), dtype=bool) + df_cut["error_code"] = np.repeat(np.nan, len(df_cut)) print(df_cut) if os.path.isfile( diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/list_from_h5.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/list_from_h5.py index 1c4bc308..e326ecb1 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/list_from_h5.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/list_from_h5.py @@ -147,7 +147,7 @@ def list_run(source_in, source_out, df, skip_LST, skip_MAGIC, is_LST, M1_run_lis for k in range(len(df_source)): skip = False if np.isnan(LST_run[k]): - skip = True + continue if (int(LST_run[k]) in skip_LST) or (int(LST_run[k]) in run_listed): skip = True @@ -166,7 +166,7 @@ def list_run(source_in, source_out, df, skip_LST, skip_MAGIC, is_LST, M1_run_lis for k in range(len(df_source)): skip = False if np.isnan(M2_run[k]): - skip = True + continue if (int(M2_run[k]) in skip_MAGIC) or (int(M2_run[k]) in run_listed): skip = True @@ -215,17 +215,19 @@ def main(): ) # TODO: put this file in a shared folder df_LST = split_lst_date(df_LST) - df_LST = df_LST.astype({"YY_LST": int, "MM_LST": int, "DD_LST": int}) + df_LST = df_LST.astype( + {"YY_LST": int, "MM_LST": int, "DD_LST": int, "nsb": float, "LST1_run": int} + ) stereo = True if source_in is None: df_LST.query( - f'MAGIC_trigger=="L3T" & MAGIC_HV=="Nominal" & MAGIC_stereo == {stereo}', + f'MAGIC_trigger=="L3T" & MAGIC_HV=="Nominal" & MAGIC_stereo == {stereo} & nsb <=3.0 & error_code.isnull()', inplace=True, ) else: df_LST.query( - f'source=="{source_in}"& MAGIC_trigger=="L3T" & MAGIC_HV=="Nominal" & MAGIC_stereo == {stereo}', + f'source=="{source_in}"& MAGIC_trigger=="L3T" & MAGIC_HV=="Nominal" & MAGIC_stereo == {stereo} & nsb <=3.0 & error_code.isnull()', inplace=True, ) @@ -236,6 +238,8 @@ def main(): max = datetime.strptime(max, "%Y_%m_%d") lst = pd.to_datetime(df_LST["date_LST"].str.replace("_", "-")) df_LST["date"] = lst + print("lst", lst) + print(min, max) df_LST = df_LST[df_LST["date"] >= min] df_LST = df_LST[df_LST["date"] <= max] diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/nsb_to_h5.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/nsb_to_h5.py index 7aca980f..1245aadf 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/nsb_to_h5.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/nsb_to_h5.py @@ -66,6 +66,10 @@ def main(): subset="LST1_run", keep="first" ) df_new = df_new.sort_values(by=["DATE", "source", "LST1_run"]) + + df_new["error_code"] = df_new["error_code"].replace("000", np.nan) + + df_new["error_code"] = np.where(df_new["nsb"] <= 3.0, df_new["error_code"], "001") df_new["error_code"] = np.where(df_new["nsb"].notna(), df_new["error_code"], "000") df_new = df_new[ [ @@ -76,6 +80,8 @@ def main(): "MAGIC_trigger", "MAGIC_HV", "nsb", + "lstchain_0.9", + "lstchain_0.10", "error_code", ] ] From 63dc843d3601c4881c208907f947a84e921c6611 Mon Sep 17 00:00:00 2001 From: Elisa-Visentin Date: Mon, 29 Apr 2024 08:25:47 +0000 Subject: [PATCH 034/236] lstchain + minor fixes --- .../semi_automatic_scripts/LSTnsb.py | 3 - .../create_LST_table.py | 5 +- .../semi_automatic_scripts/list_from_h5.py | 8 +-- .../lstchain_version.py | 71 +++++++++++++++++++ .../semi_automatic_scripts/nsb_level.py | 2 - .../semi_automatic_scripts/nsb_to_h5.py | 7 -- 6 files changed, 74 insertions(+), 22 deletions(-) create mode 100644 magicctapipe/scripts/lst1_magic/semi_automatic_scripts/lstchain_version.py diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/LSTnsb.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/LSTnsb.py index b752805b..861dac64 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/LSTnsb.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/LSTnsb.py @@ -52,7 +52,6 @@ def nsb(run_list, simtel, lst_config, run_number): mod = int(len(run_list) / denominator) failed = 0 for ii in range(0, len(run_list)): - print(mod) subrun = run_list[ii].split(".")[-2] if mod == 0: break @@ -155,8 +154,6 @@ def main(): # date_lst = date.split("_")[0] + date.split("_")[1] + date.split("_")[2] inputdir = f"/fefs/aswg/data/real/DL1/{date}/{lst_version}/{lst_tailcut}" run_list = np.sort(glob.glob(f"{inputdir}/dl1*Run*{run_number}.*.h5")) - print(inputdir) - print(run_list) noise = nsb(run_list, simtel, lst_config, run_number) if len(noise) == 0: logger.warning( diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/create_LST_table.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/create_LST_table.py index 4c7a5578..39842aba 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/create_LST_table.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/create_LST_table.py @@ -29,7 +29,7 @@ def main(): "MAGIC_HV", ] df_cut = df[needed_cols] - print(df_cut.columns) + df_cut["nsb"] = np.repeat(np.nan, len(df_cut)) df_cut["lstchain_0.9"] = np.zeros(len(df_cut), dtype=bool) @@ -37,7 +37,6 @@ def main(): df_cut["lstchain_0.10"] = np.zeros(len(df_cut), dtype=bool) df_cut["error_code"] = np.repeat(np.nan, len(df_cut)) - print(df_cut) if os.path.isfile( "/fefs/aswg/workspace/elisa.visentin/auto_MCP_PR/observations_LST.h5" ): @@ -49,7 +48,7 @@ def main(): subset="LST1_run", keep="first" ) df_cut = df_cut.sort_values(by=["DATE", "source"]) - print(df_cut) # TODO check if fine with update and nsb + # TODO check if fine with update and nsb df_cut.to_hdf( "/fefs/aswg/workspace/elisa.visentin/auto_MCP_PR/observations_LST.h5", diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/list_from_h5.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/list_from_h5.py index e326ecb1..c8cdfd4a 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/list_from_h5.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/list_from_h5.py @@ -89,7 +89,6 @@ def clear_files(source_in, source_out, df): f"{source_name}_LST_runs.txt", f"{source_name}_MAGIC_runs.txt", ] # The order here must be LST before MAGIC! - print(file_list) for j in file_list: if os.path.isfile(j): os.remove(j) @@ -238,8 +237,7 @@ def main(): max = datetime.strptime(max, "%Y_%m_%d") lst = pd.to_datetime(df_LST["date_LST"].str.replace("_", "-")) df_LST["date"] = lst - print("lst", lst) - print(min, max) + df_LST = df_LST[df_LST["date"] >= min] df_LST = df_LST[df_LST["date"] <= max] @@ -255,7 +253,6 @@ def main(): list_date_LST = np.unique(df_LST["date_LST"]) list_date_LST_low = [sub.replace("-", "_") for sub in list_date_LST] - print(list_date_LST_low) df_MAGIC1 = pd.read_hdf( "/fefs/aswg/workspace/joanna.wojtowicz/data/Common_MAGIC_LST1_data_MAGIC_RUNS.h5", key="MAGIC1/runs_M1", @@ -265,15 +262,12 @@ def main(): key="MAGIC2/runs_M2", ) - print(list_date_LST) df_MAGIC1 = df_MAGIC1[df_MAGIC1["Date (LST convention)"].isin(list_date_LST_low)] df_MAGIC2 = df_MAGIC2[df_MAGIC2["Date (LST convention)"].isin(list_date_LST_low)] - print(df_MAGIC2) df_MAGIC2 = magic_date(df_MAGIC2) df_MAGIC1 = magic_date(df_MAGIC1) df_MAGIC2 = df_MAGIC2.rename(columns={"Source": "source"}) - print(df_MAGIC2) M1_runs = df_MAGIC1["Run ID"].tolist() list_run(source_in, source_out, df_MAGIC2, skip_LST, skip_MAGIC, False, M1_runs) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/lstchain_version.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/lstchain_version.py new file mode 100644 index 00000000..f33e8f12 --- /dev/null +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/lstchain_version.py @@ -0,0 +1,71 @@ +""" +Fill the lstchain_0.9 and lstchain_0.10 columns of the LST database (i.e., which version of data is on the IT cluster) +""" + + +import pandas as pd +import os + + +def main(): + + """ + Main function + """ + + df_LST = pd.read_hdf( + "/fefs/aswg/workspace/elisa.visentin/auto_MCP_PR/observations_LST.h5", + key="joint_obs", + ) + for i, row in df_LST.iterrows(): + + lst_9 = False + lst_10=False + run=row['LST1_run'] + run=format(int(run), '05d') + date=row['DATE'] + + if os.path.isfile(f'/fefs/aswg/data/real/DL1/{date}/v0.9/tailcut84/dl1_LST-1.Run{run}.h5'): + lst_9=True + if os.path.isfile(f'/fefs/aswg/data/real/DL1/{date}/v0.10/tailcut84/dl1_LST-1.Run{run}.h5'): + lst_10=True + if (lst_9==False) and (lst_10==False): + df_LST.at[i,'error_code'] = '002' + df_LST.at[i,'lstchain_0.9'] = lst_9 + df_LST.at[i,'lstchain_0.10'] = lst_10 + + + + df_LST.to_hdf( + "/fefs/aswg/workspace/elisa.visentin/auto_MCP_PR/observations_LST.h5", + key="joint_obs", + mode="w", + ) + +if __name__ == "__main__": + main() + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/nsb_level.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/nsb_level.py index 9c515585..88c1ebcc 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/nsb_level.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/nsb_level.py @@ -98,7 +98,6 @@ def main(): max = datetime.strptime(args.end_date, "%Y_%m_%d") lst = pd.to_datetime(df_LST["DATE"].str.replace("_", "-")) df_LST["date"] = lst - print(df_LST.columns) df_LST = df_LST[df_LST["date"] >= min] df_LST = df_LST[df_LST["date"] <= max] @@ -123,7 +122,6 @@ def main(): else: launch_jobs = f"{launch_jobs} && nsb{n}=$(sbatch --parsable {run})" - # print(launch_jobs) os.system(launch_jobs) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/nsb_to_h5.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/nsb_to_h5.py index 1245aadf..2aec74cd 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/nsb_to_h5.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/nsb_to_h5.py @@ -58,13 +58,6 @@ def main(): df_new = collect_nsb(df_LST) - df_old = pd.read_hdf( - "/fefs/aswg/workspace/elisa.visentin/auto_MCP_PR/observations_LST.h5", - key="joint_obs", - ) - df_new = pd.concat([df_new, df_old]).drop_duplicates( - subset="LST1_run", keep="first" - ) df_new = df_new.sort_values(by=["DATE", "source", "LST1_run"]) df_new["error_code"] = df_new["error_code"].replace("000", np.nan) From 0d74f0c9fccb775027f37e5d02f69dc4f13ca08a Mon Sep 17 00:00:00 2001 From: Elisa-Visentin Date: Mon, 29 Apr 2024 08:36:22 +0000 Subject: [PATCH 035/236] fix linter --- .../lst1_magic/semi_automatic_scripts/lstchain_version.py | 1 + 1 file changed, 1 insertion(+) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/lstchain_version.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/lstchain_version.py index f33e8f12..398b6843 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/lstchain_version.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/lstchain_version.py @@ -8,6 +8,7 @@ def main(): + """ Main function From b6937b5f078746e03970dda4d0e69f8e811a0f68 Mon Sep 17 00:00:00 2001 From: Elisa-Visentin Date: Mon, 29 Apr 2024 08:44:37 +0000 Subject: [PATCH 036/236] linter --- .../lstchain_version.py | 64 ++++++------------- 1 file changed, 21 insertions(+), 43 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/lstchain_version.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/lstchain_version.py index 398b6843..7386d66a 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/lstchain_version.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/lstchain_version.py @@ -3,12 +3,12 @@ """ -import pandas as pd import os +import pandas as pd + def main(): - """ Main function @@ -19,54 +19,32 @@ def main(): key="joint_obs", ) for i, row in df_LST.iterrows(): - + lst_9 = False - lst_10=False - run=row['LST1_run'] - run=format(int(run), '05d') - date=row['DATE'] - - if os.path.isfile(f'/fefs/aswg/data/real/DL1/{date}/v0.9/tailcut84/dl1_LST-1.Run{run}.h5'): - lst_9=True - if os.path.isfile(f'/fefs/aswg/data/real/DL1/{date}/v0.10/tailcut84/dl1_LST-1.Run{run}.h5'): - lst_10=True - if (lst_9==False) and (lst_10==False): - df_LST.at[i,'error_code'] = '002' - df_LST.at[i,'lstchain_0.9'] = lst_9 - df_LST.at[i,'lstchain_0.10'] = lst_10 - + lst_10 = False + run = row["LST1_run"] + run = format(int(run), "05d") + date = row["DATE"] + + if os.path.isfile( + f"/fefs/aswg/data/real/DL1/{date}/v0.9/tailcut84/dl1_LST-1.Run{run}.h5" + ): + lst_9 = True + if os.path.isfile( + f"/fefs/aswg/data/real/DL1/{date}/v0.10/tailcut84/dl1_LST-1.Run{run}.h5" + ): + lst_10 = True + if (lst_9 == False) and (lst_10 == False): + df_LST.at[i, "error_code"] = "002" + df_LST.at[i, "lstchain_0.9"] = lst_9 + df_LST.at[i, "lstchain_0.10"] = lst_10 - df_LST.to_hdf( "/fefs/aswg/workspace/elisa.visentin/auto_MCP_PR/observations_LST.h5", key="joint_obs", mode="w", ) + if __name__ == "__main__": main() - - - - - - - - - - - - - - - - - - - - - - - - - From 54a7f68accd38a2996c8a80e296d23fb21dc1f12 Mon Sep 17 00:00:00 2001 From: Elisa-Visentin Date: Mon, 29 Apr 2024 10:29:20 +0000 Subject: [PATCH 037/236] doc & new folder --- magicctapipe/scripts/lst1_magic/README.md | 40 +++++++++++-------- .../semi_automatic_scripts/__init__.py | 4 +- .../{ => database_production}/LSTnsb.py | 0 .../create_LST_table.py | 0 .../lstchain_version.py | 0 .../{ => database_production}/nsb_level.py | 0 .../{ => database_production}/nsb_to_h5.py | 0 7 files changed, 25 insertions(+), 19 deletions(-) rename magicctapipe/scripts/lst1_magic/semi_automatic_scripts/{ => database_production}/LSTnsb.py (100%) rename magicctapipe/scripts/lst1_magic/semi_automatic_scripts/{ => database_production}/create_LST_table.py (100%) rename magicctapipe/scripts/lst1_magic/semi_automatic_scripts/{ => database_production}/lstchain_version.py (100%) rename magicctapipe/scripts/lst1_magic/semi_automatic_scripts/{ => database_production}/nsb_level.py (100%) rename magicctapipe/scripts/lst1_magic/semi_automatic_scripts/{ => database_production}/nsb_to_h5.py (100%) diff --git a/magicctapipe/scripts/lst1_magic/README.md b/magicctapipe/scripts/lst1_magic/README.md index c870823e..9b49ad6a 100644 --- a/magicctapipe/scripts/lst1_magic/README.md +++ b/magicctapipe/scripts/lst1_magic/README.md @@ -99,23 +99,6 @@ Now that the configuration file is ready, lets create a list with all the MAGIC+ > $ list_from_h5 -c config_general.yaml -Now we evaluate the NSB level for each LST run (in the future this information will be provided in a database): - -> $ nsb_level -c config_general.yaml - -This will launch a number of parallel jobs, one per LST run, that use an lstchain function to evaluate the NSB over (approx.) 25 subruns per run and then calculate the median of these 25 values. According to this median, the run is associated to a bin in the NSB range. The standard bins (center and range) are 0.5=(0, 0.75), 1.0=(0.75,1.25), 1.5=(1.25,1.75), 2.0=(1.75,2.25), 2.5=(2.25,2.75), 3.0=(2.75,3.25). - -The output is a set of txt files (e.g., `LST_1.5_1234.txt` for run 1234), one per LST run, whose title contains the NSB bin assigned to the run and whose content is the string `date,run` - -You can follow up this process is done with the following commands: - -> $ squeue -n nsb - -or - -> $ squeue -u your_user_name - - At this point we can convert the MAGIC data into DL1 format with the following command: > $ setting_up_config_and_dir -c config_general.yaml @@ -218,3 +201,26 @@ Since the DL3 may have only a few MBs, it is typically convenient to download it We prepared a [Jupyter Notebook](https://github.com/ranieremenezes/magic-cta-pipe/blob/master/magicctapipe/scripts/lst1_magic/SED_and_LC_from_DL3.ipynb) that quickly creates a counts map, a significance curve, an SED, and a light curve. You can give it a try. The folder [Notebooks](https://github.com/cta-observatory/magic-cta-pipe/tree/master/notebooks) contains Jupyter notebooks to perform checks on the IRF, to produce theta2 plots and SEDs. Note that the notebooks run with gammapy v0.20 or higher, while the gammapy version adopted in the MAGIC+LST-1 pipeline is v0.19. + + +## For mainteiners (creation of MAGIC adn LST databases) + +To create and update the MAGIC and LST databases (from the one produced by AB and FDP) you should use the scripts in `database_production` + +- `create_lst_table`: creates the LST database (1 row per LST run) by dropping some columns from the parent one (AB, FDP) and adding columns for NSB value (NaN by default), lstchain version (one column per version, False by default) and error codes (NaN by default). Launched as `python create_lst_table.py` + +- `nsb_level`: evaluates, for every LST run, the respective NSB value (i.e., the median over the NSB estimated by lstchain over approx. 25 sub-runs per run). This scripts launch a set of jobs (one per run; each job calls the `LSTnsb` script) and each jobs produces an output txt file containing a string like `date,run,NSB`; in the title of these files, both the run number and the NSB range are indicated (0.5=(0,0.75), 1.0=(0.75, 1.25),...., 2.5=(2.25,2.75), 3.0=(2.75,3.25), `high`=(3.25,Infinity) ). To limit the number of simultaneous jobs running on SLURM, you should always provide a begin and a end date (format YYYY_MM_DD) in the options. Launched as `python nsb_level.py -c config_general.yaml -b begin_date -e end_date` + +- `LSTnsb`: called by `nsb_level`, it gathers all the subruns for a run, evaluates the NSB for approx. 25 of them (using the lstchain `calculate_noise_parameters` function), evaluates the median over these values and the approximate NSB level (0.5, 1.0, 1.5, ...., 2.5, 3.0, `high`) and then creates one txt file per run. These files contain the value of the NSB (i.e., the median over subruns) and are needed to fill the database `nsb` column + +- `nsb_to_h5`: this script reads the txt files created by `nsb_level` to know the NSB value for each run. This value is used to fill the `nsb` column of the database at the location of the respective run number. It also put '000' as an error code in case the NSB is NaN (i.e., not evaluated for the LST run), '001' in case NSB>3.0 (too high NSB to be processed with a standard analysis!). Launched as `python nsb_to_h5.py` + +- `lstchain_version`: this scripts loop over all the rows of the database, estract date and run number from the table and look for the data saved in the IT (i.e., which version of lstchain has been used to process a run). For each run, it sets to True the lstchain_0.9(0.10) cell if this run has been processed up to DL1 with lstchain 0.9(0.10). It sets error code '002' in case none of the two versions has been used to process the run. Launched as `python lstchain_version.py` + +Error codes: + +- 000: no NSB + +- 001: NSB>3.0 + +- 002: neither 0.9 nor 0.10 lstchain version diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/__init__.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/__init__.py index 1ffcd0f3..5fcb89bd 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/__init__.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/__init__.py @@ -1,7 +1,7 @@ from .coincident_events import configfile_coincidence, linking_bash_lst -from .LSTnsb import nsb +from .database_production.LSTnsb import nsb from .merging_runs import cleaning, merge, mergeMC, split_train_test -from .nsb_level import bash_scripts +from .database_production.nsb_level import bash_scripts from .setting_up_config_and_dir import ( config_file_gen, directories_generator, diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/LSTnsb.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/LSTnsb.py similarity index 100% rename from magicctapipe/scripts/lst1_magic/semi_automatic_scripts/LSTnsb.py rename to magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/LSTnsb.py diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/create_LST_table.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/create_LST_table.py similarity index 100% rename from magicctapipe/scripts/lst1_magic/semi_automatic_scripts/create_LST_table.py rename to magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/create_LST_table.py diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/lstchain_version.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/lstchain_version.py similarity index 100% rename from magicctapipe/scripts/lst1_magic/semi_automatic_scripts/lstchain_version.py rename to magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/lstchain_version.py diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/nsb_level.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/nsb_level.py similarity index 100% rename from magicctapipe/scripts/lst1_magic/semi_automatic_scripts/nsb_level.py rename to magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/nsb_level.py diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/nsb_to_h5.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/nsb_to_h5.py similarity index 100% rename from magicctapipe/scripts/lst1_magic/semi_automatic_scripts/nsb_to_h5.py rename to magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/nsb_to_h5.py From 12a7cb8fbc86a373ac4791fe21d85ec61b2984b1 Mon Sep 17 00:00:00 2001 From: joannaWojS <167758973+joannaWojS@users.noreply.github.com> Date: Mon, 29 Apr 2024 16:59:26 +0200 Subject: [PATCH 038/236] Update check_MAGIC_runs.py --- .../lst1_magic/semi_automatic_scripts/check_MAGIC_runs.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/check_MAGIC_runs.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/check_MAGIC_runs.py index e30877e7..f9a4cad2 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/check_MAGIC_runs.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/check_MAGIC_runs.py @@ -20,9 +20,8 @@ def check_run_ID(path, filename, first_run, last_run, date, source): #'Y' because we have to be sure that the function counts right filename. if Y in filename: - # Extract numbers from filename and check range - run_ids = [int(s) for s in re.findall(r'\d+', filename)] - matched = False + # Extract run_ids from filename and check range + run_ids = [int(filename.split("_")[2].split(".")[0])] magic_runs = [] for id in run_ids: From 3388b659bed1af651f57222d8708b726dd106806 Mon Sep 17 00:00:00 2001 From: Raniere Date: Tue, 30 Apr 2024 15:16:31 +0200 Subject: [PATCH 039/236] Updated tutorial --- magicctapipe/scripts/lst1_magic/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/README.md b/magicctapipe/scripts/lst1_magic/README.md index 9b49ad6a..7255be8c 100644 --- a/magicctapipe/scripts/lst1_magic/README.md +++ b/magicctapipe/scripts/lst1_magic/README.md @@ -33,8 +33,8 @@ Authorized institute server (Client) → ssh connection to CTALaPalma → ``` git clone -b Torino_auto_MCP https://github.com/cta-observatory/magic-cta-pipe.git cd magic-cta-pipe -conda env create -n magic-lst -f environment.yml -conda activate magic-lst +mamba env create -n magic-lst -f environment.yml +mamba activate magic-lst pip install . ``` From 0076095b7b602fcaa5599a4209e1d1561395b48d Mon Sep 17 00:00:00 2001 From: Raniere Date: Tue, 30 Apr 2024 15:35:36 +0200 Subject: [PATCH 040/236] Update README.md --- magicctapipe/scripts/lst1_magic/README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/README.md b/magicctapipe/scripts/lst1_magic/README.md index 7255be8c..4b68b472 100644 --- a/magicctapipe/scripts/lst1_magic/README.md +++ b/magicctapipe/scripts/lst1_magic/README.md @@ -40,7 +40,7 @@ pip install . ## Analysis -During the analysis, some files are automatically produced by the scripts and are saved on your working directory. These files are necessary for the subsequent steps in the analysis chain. It is furthermore mandatory that you always launch the scripts from the same working directory since the output files stored there can be correctly read as input files at the subsequent analysis steps. +During the analysis, some files are automatically produced by the scripts and are saved in your working directory. These files are necessary for the subsequent steps in the analysis chain. It is therefore mandatory to always launch the scripts from the same working directory so that the output files stored there can be correctly assigned as input files at the subsequent analysis steps. ### DL0 to DL1 @@ -48,7 +48,7 @@ In this step, we will convert the MAGIC and Monte Carlo (MC) Data Level (DL) 0 t In your working IT Container directory (e.g. /fefs/aswg/workspace/yourname/yourprojectname), open the magic-lst environment with the command `conda activate magic-lst` and update the file `config_general.yaml` according to your analysis. -The file `config_general.yaml` must contain the telescope IDs and the directories with the MC data, as shown below, as well as the data selection and some information on the night sky background (NSB) level and software versions: +The file `config_general.yaml` must contain the telescope IDs, the directories with the MC data (mandatory only if NSB_matching = False), the data selection, and some information on the night sky background (NSB) level and software versions: ``` mc_tel_ids: @@ -95,11 +95,11 @@ general: ``` -Now that the configuration file is ready, lets create a list with all the MAGIC+LST1 runs for the chosen time window (or list of nights): +Now that the configuration file is ready, let's create a list with all the MAGIC+LST1 runs for the time window (or list of nights) defined on the config_general.yaml file: > $ list_from_h5 -c config_general.yaml -At this point we can convert the MAGIC data into DL1 format with the following command: +At this point, we can convert the MAGIC data into DL1 format with the following command: > $ setting_up_config_and_dir -c config_general.yaml The output in the terminal will be something like this: From 5416209a59026fa26da0392da69bf29c0c72021f Mon Sep 17 00:00:00 2001 From: Elisa-Visentin Date: Tue, 30 Apr 2024 13:48:38 +0000 Subject: [PATCH 041/236] bug --- .../scripts/lst1_magic/semi_automatic_scripts/__init__.py | 4 ---- .../database_production/__init__.py | 8 ++++++++ 2 files changed, 8 insertions(+), 4 deletions(-) create mode 100644 magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/__init__.py diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/__init__.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/__init__.py index 5fcb89bd..1b4856d6 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/__init__.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/__init__.py @@ -1,7 +1,5 @@ from .coincident_events import configfile_coincidence, linking_bash_lst -from .database_production.LSTnsb import nsb from .merging_runs import cleaning, merge, mergeMC, split_train_test -from .database_production.nsb_level import bash_scripts from .setting_up_config_and_dir import ( config_file_gen, directories_generator, @@ -11,12 +9,10 @@ from .stereo_events import bash_stereo, bash_stereoMC, configfile_stereo __all__ = [ - "nsb", "cleaning", "split_train_test", "merge", "mergeMC", - "bash_scripts", "config_file_gen", "lists_and_bash_generator", "lists_and_bash_gen_MAGIC", diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/__init__.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/__init__.py new file mode 100644 index 00000000..cd61131b --- /dev/null +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/__init__.py @@ -0,0 +1,8 @@ + +from .LSTnsb import nsb +from .nsb_level import bash_scripts + +__all__ = [ + "nsb", + "bash_scripts", +] From a0c0529ab3220c2de6073bbf0c4ac00ebaf84da2 Mon Sep 17 00:00:00 2001 From: Elisa-Visentin Date: Tue, 30 Apr 2024 14:05:48 +0000 Subject: [PATCH 042/236] multi-source merging --- .../semi_automatic_scripts/merging_runs.py | 373 +++++++++--------- 1 file changed, 188 insertions(+), 185 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py index 388a8f69..400b8467 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py @@ -218,147 +218,143 @@ def merge(target_dir, identification, MAGIC_runs, env_name, source, NSB_match): ) else: - ST_list = [ - os.path.basename(x) for x in glob.glob(f"{target_dir}/v{__version__}/DL1/*") - ] - - for p in ST_list: - process_name = f'merging_{target_dir.split("/")[-2:][1]}' + + process_name = f'merging_{target_dir.split("/")[-2:][1]}' - MAGIC_DL1_dir = f"{target_dir}/v{__version__}/DL1/{p}" + MAGIC_DL1_dir = f"v{__version__}/{target_dir}/DL1/{p}" - if os.path.exists(f"{MAGIC_DL1_dir}/M1") & os.path.exists( - f"{MAGIC_DL1_dir}/M2" - ): - if not os.path.exists(f"{MAGIC_DL1_dir}/Merged"): - os.mkdir(f"{MAGIC_DL1_dir}/Merged") - lines = [ - "#!/bin/sh\n\n", - "#SBATCH -p short\n", - f"#SBATCH -J {process_name}\n", - "#SBATCH -N 1\n\n", - "ulimit -l unlimited\n", - "ulimit -s unlimited\n", - "ulimit -a\n\n", - ] - with open(f"{source}_Merge_MAGIC_{identification}_{p}.sh", "w") as f: - f.writelines(lines) - if identification == "0_subruns": + if os.path.exists(f"{MAGIC_DL1_dir}/M1") & os.path.exists( + f"{MAGIC_DL1_dir}/M2" + ): + if not os.path.exists(f"{MAGIC_DL1_dir}/Merged"): + os.mkdir(f"{MAGIC_DL1_dir}/Merged") + lines = [ + "#!/bin/sh\n\n", + "#SBATCH -p short\n", + f"#SBATCH -J {process_name}\n", + "#SBATCH -N 1\n\n", + "ulimit -l unlimited\n", + "ulimit -s unlimited\n", + "ulimit -a\n\n", + ] + with open(f"{source}_Merge_MAGIC_{identification}_{p}.sh", "w") as f: + f.writelines(lines) + if identification == "0_subruns": - if os.path.exists(f"{MAGIC_DL1_dir}/M1"): - dates = [ + if os.path.exists(f"{MAGIC_DL1_dir}/M1"): + dates = [ + os.path.basename(x) + for x in glob.glob(f"{MAGIC_DL1_dir}/M1/*") + ] + for i in dates: + runs = [ os.path.basename(x) - for x in glob.glob(f"{MAGIC_DL1_dir}/M1/*") + for x in glob.glob(f"{MAGIC_DL1_dir}/M1/{i}/*") ] - for i in dates: - runs = [ - os.path.basename(x) - for x in glob.glob(f"{MAGIC_DL1_dir}/M1/{i}/*") - ] - if not os.path.exists(f"{MAGIC_DL1_dir}/Merged/{i}"): + if not os.path.exists(f"{MAGIC_DL1_dir}/Merged/{i}"): + os.mkdir( + f"{MAGIC_DL1_dir}/Merged/{i}" + ) # Creating a merged directory for the respective night + for r in runs: + if not os.path.exists( + f"{MAGIC_DL1_dir}/Merged/{i}/{r}" + ): os.mkdir( - f"{MAGIC_DL1_dir}/Merged/{i}" - ) # Creating a merged directory for the respective night - for r in runs: - if not os.path.exists( f"{MAGIC_DL1_dir}/Merged/{i}/{r}" - ): - os.mkdir( - f"{MAGIC_DL1_dir}/Merged/{i}/{r}" - ) # Creating a merged directory for the respective run - if not os.path.exists( + ) # Creating a merged directory for the respective run + if not os.path.exists( + f"{MAGIC_DL1_dir}/Merged/{i}/{r}/logs" + ): + os.mkdir( f"{MAGIC_DL1_dir}/Merged/{i}/{r}/logs" - ): - os.mkdir( - f"{MAGIC_DL1_dir}/Merged/{i}/{r}/logs" - ) # Creating a merged directory for the respective run + ) # Creating a merged directory for the respective run - f.write( - f"time conda run -n {env_name} merge_hdf_files --input-dir {MAGIC_DL1_dir}/M1/{i}/{r} --output-dir {MAGIC_DL1_dir}/Merged/{i}/{r} >{MAGIC_DL1_dir}/Merged/{i}/{r}/logs/merge_M1_{i}_{r}.log \n" - ) + f.write( + f"time conda run -n {env_name} merge_hdf_files --input-dir {MAGIC_DL1_dir}/M1/{i}/{r} --output-dir {MAGIC_DL1_dir}/Merged/{i}/{r} >{MAGIC_DL1_dir}/Merged/{i}/{r}/logs/merge_M1_{i}_{r}.log \n" + ) + + if os.path.exists(f"{MAGIC_DL1_dir}/M2"): + dates = [ + os.path.basename(x) + for x in glob.glob(f"{MAGIC_DL1_dir}/M2/*") + ] - if os.path.exists(f"{MAGIC_DL1_dir}/M2"): - dates = [ + for i in dates: + runs = [ os.path.basename(x) - for x in glob.glob(f"{MAGIC_DL1_dir}/M2/*") + for x in glob.glob(f"{MAGIC_DL1_dir}/M2/{i}/*") ] - - for i in dates: - runs = [ - os.path.basename(x) - for x in glob.glob(f"{MAGIC_DL1_dir}/M2/{i}/*") - ] - if not os.path.exists(f"{MAGIC_DL1_dir}/Merged/{i}"): + if not os.path.exists(f"{MAGIC_DL1_dir}/Merged/{i}"): + os.mkdir( + f"{MAGIC_DL1_dir}/Merged/{i}" + ) # Creating a merged directory for the respective night + for r in runs: + if not os.path.exists( + f"{MAGIC_DL1_dir}/Merged/{i}/{r}" + ): os.mkdir( - f"{MAGIC_DL1_dir}/Merged/{i}" - ) # Creating a merged directory for the respective night - for r in runs: - if not os.path.exists( f"{MAGIC_DL1_dir}/Merged/{i}/{r}" + ) # Creating a merged directory for the respective run + if not os.path.exists( + f"{MAGIC_DL1_dir}/Merged/{i}/{r}/logs" + ): + os.mkdir( + f"{MAGIC_DL1_dir}/Merged/{i}/{r}/logs" + ) # Creating a merged directory for the respective run + + f.write( + f"time conda run -n {env_name} merge_hdf_files --input-dir {MAGIC_DL1_dir}/M2/{i}/{r} --output-dir {MAGIC_DL1_dir}/Merged/{i}/{r} >{MAGIC_DL1_dir}/Merged/{i}/{r}/logs/merge_M2_{i}_{r}.log \n" + ) + elif identification == "1_M1M2": + if os.path.exists(f"{MAGIC_DL1_dir}/M1") & os.path.exists( + f"{MAGIC_DL1_dir}/M2" + ): + dates = [ + os.path.basename(x) + for x in glob.glob(f"{MAGIC_DL1_dir}/M1/*") + ] + for i in dates: + runs = [ + os.path.basename(x) + for x in glob.glob(f"{MAGIC_DL1_dir}/M2/{i}/*") + ] + for r in runs: + if ( + len(glob.glob(f"{MAGIC_DL1_dir}/M1/{i}/{r}")) > 0 + ) and ( + len(glob.glob(f"{MAGIC_DL1_dir}/M2/{i}/{r}")) + ) > 0: + if not os.path.exists( + f"{MAGIC_DL1_dir}/Merged/{i}/Merged" ): - os.mkdir( - f"{MAGIC_DL1_dir}/Merged/{i}/{r}" - ) # Creating a merged directory for the respective run + os.mkdir(f"{MAGIC_DL1_dir}/Merged/{i}/Merged") if not os.path.exists( - f"{MAGIC_DL1_dir}/Merged/{i}/{r}/logs" + f"{MAGIC_DL1_dir}/Merged/{i}/Merged/logs" ): os.mkdir( - f"{MAGIC_DL1_dir}/Merged/{i}/{r}/logs" - ) # Creating a merged directory for the respective run - + f"{MAGIC_DL1_dir}/Merged/{i}/Merged/logs" + ) f.write( - f"time conda run -n {env_name} merge_hdf_files --input-dir {MAGIC_DL1_dir}/M2/{i}/{r} --output-dir {MAGIC_DL1_dir}/Merged/{i}/{r} >{MAGIC_DL1_dir}/Merged/{i}/{r}/logs/merge_M2_{i}_{r}.log \n" + f"time conda run -n {env_name} merge_hdf_files --input-dir {MAGIC_DL1_dir}/Merged/{i}/{r} --output-dir {MAGIC_DL1_dir}/Merged/{i}/Merged --run-wise >{MAGIC_DL1_dir}/Merged/{i}/Merged/logs/merge_{i}.log \n" ) - elif identification == "1_M1M2": - if os.path.exists(f"{MAGIC_DL1_dir}/M1") & os.path.exists( - f"{MAGIC_DL1_dir}/M2" + else: + dates = [ + os.path.basename(x) for x in glob.glob(f"{MAGIC_DL1_dir}/M1/*") + ] + for i in dates: + if not os.path.exists(f"{MAGIC_DL1_dir}/Merged/{i}/Merged"): + continue + if not os.path.exists(f"{MAGIC_DL1_dir}/Merged/Merged_{i}"): + os.mkdir( + f"{MAGIC_DL1_dir}/Merged/Merged_{i}" + ) # Creating a merged directory for each night + if not os.path.exists( + f"{MAGIC_DL1_dir}/Merged/Merged_{i}/logs" ): - dates = [ - os.path.basename(x) - for x in glob.glob(f"{MAGIC_DL1_dir}/M1/*") - ] - for i in dates: - runs = [ - os.path.basename(x) - for x in glob.glob(f"{MAGIC_DL1_dir}/M2/{i}/*") - ] - for r in runs: - if ( - len(glob.glob(f"{MAGIC_DL1_dir}/M1/{i}/{r}")) > 0 - ) and ( - len(glob.glob(f"{MAGIC_DL1_dir}/M2/{i}/{r}")) - ) > 0: - if not os.path.exists( - f"{MAGIC_DL1_dir}/Merged/{i}/Merged" - ): - os.mkdir(f"{MAGIC_DL1_dir}/Merged/{i}/Merged") - if not os.path.exists( - f"{MAGIC_DL1_dir}/Merged/{i}/Merged/logs" - ): - os.mkdir( - f"{MAGIC_DL1_dir}/Merged/{i}/Merged/logs" - ) - f.write( - f"time conda run -n {env_name} merge_hdf_files --input-dir {MAGIC_DL1_dir}/Merged/{i}/{r} --output-dir {MAGIC_DL1_dir}/Merged/{i}/Merged --run-wise >{MAGIC_DL1_dir}/Merged/{i}/Merged/logs/merge_{i}.log \n" - ) - else: - dates = [ - os.path.basename(x) for x in glob.glob(f"{MAGIC_DL1_dir}/M1/*") - ] - for i in dates: - if not os.path.exists(f"{MAGIC_DL1_dir}/Merged/{i}/Merged"): - continue - if not os.path.exists(f"{MAGIC_DL1_dir}/Merged/Merged_{i}"): - os.mkdir( - f"{MAGIC_DL1_dir}/Merged/Merged_{i}" - ) # Creating a merged directory for each night - if not os.path.exists( - f"{MAGIC_DL1_dir}/Merged/Merged_{i}/logs" - ): - os.mkdir(f"{MAGIC_DL1_dir}/Merged/Merged_{i}/logs") - f.write( - f"time conda run -n {env_name} merge_hdf_files --input-dir {MAGIC_DL1_dir}/Merged/{i}/Merged --output-dir {MAGIC_DL1_dir}/Merged/Merged_{i} >{MAGIC_DL1_dir}/Merged/Merged_{i}/logs/merge_night_{i}.log \n" - ) + os.mkdir(f"{MAGIC_DL1_dir}/Merged/Merged_{i}/logs") + f.write( + f"time conda run -n {env_name} merge_hdf_files --input-dir {MAGIC_DL1_dir}/Merged/{i}/Merged --output-dir {MAGIC_DL1_dir}/Merged/Merged_{i} >{MAGIC_DL1_dir}/Merged/Merged_{i}/logs/merge_night_{i}.log \n" + ) def mergeMC(target_dir, identification, env_name, cwd): @@ -456,82 +452,89 @@ def main(): MAGIC_runs = np.genfromtxt(MAGIC_runs_and_dates, dtype=str, delimiter=",") NSB_match = config["general"]["NSB_matching"] train_fraction = float(config["general"]["proton_train_fraction"]) - source = config["directories"]["target_name"] - + env_name = config["general"]["env_name"] + source = config["data_selection"]["source_name_output"] + source_list = [] + if source is not None: + source_list = json.load("list_sources.dat") + + else: + source_list.append(source) + for source_name in source_list: # Below we run the analysis on the MC data - if not NSB_match: - if (args.analysis_type == "onlyMC") or (args.analysis_type == "doEverything"): - # Here we slice the proton MC data into "train" and "test" (but first we check if the directory already exists): - if not os.path.exists(f"{target_dir}/DL1/MC/protons_test"): - print("***** Splitting protons into 'train' and 'test' datasets...") - split_train_test(target_dir, train_fraction) - - print("***** Generating merge_MC bashscripts...") - mergeMC( - target_dir, "protons", env_name, cwd - ) # generating the bash script to merge the files - mergeMC( - target_dir, "gammadiffuse", env_name, cwd - ) # generating the bash script to merge the files - mergeMC( - target_dir, "gammas", env_name, cwd - ) # generating the bash script to merge the files - mergeMC(target_dir, "protons_test", env_name, cwd) - - print("***** Running merge_hdf_files.py on the MC data files...") - - # Below we run the bash scripts to merge the MC files - list_of_merging_scripts = np.sort(glob.glob("Merge_MC_*.sh")) + if not NSB_match: + if (args.analysis_type == "onlyMC") or (args.analysis_type == "doEverything"): + # Here we slice the proton MC data into "train" and "test" (but first we check if the directory already exists): + if not os.path.exists(f"{target_dir}/DL1/MC/protons_test"): + print("***** Splitting protons into 'train' and 'test' datasets...") + split_train_test(target_dir, train_fraction) + + print("***** Generating merge_MC bashscripts...") + mergeMC( + target_dir, "protons", env_name, cwd + ) # generating the bash script to merge the files + mergeMC( + target_dir, "gammadiffuse", env_name, cwd + ) # generating the bash script to merge the files + mergeMC( + target_dir, "gammas", env_name, cwd + ) # generating the bash script to merge the files + mergeMC(target_dir, "protons_test", env_name, cwd) + + print("***** Running merge_hdf_files.py on the MC data files...") + + # Below we run the bash scripts to merge the MC files + list_of_merging_scripts = np.sort(glob.glob("Merge_MC_*.sh")) + + for n, run in enumerate(list_of_merging_scripts): + if n == 0: + launch_jobs = f"merging{n}=$(sbatch --parsable {run})" + else: + launch_jobs = ( + f"{launch_jobs} && merging{n}=$(sbatch --parsable {run})" + ) + + os.system(launch_jobs) + # Below we run the analysis on the MAGIC data + if ( + (args.analysis_type == "onlyMAGIC") + or (args.analysis_type == "doEverything") + or (NSB_match) + ): + print("***** Generating merge_MAGIC bashscripts...") + merge( + target_dir, "0_subruns", MAGIC_runs, env_name, source, NSB_match + ) # generating the bash script to merge the subruns + merge( + target_dir, "1_M1M2", MAGIC_runs, env_name, source, NSB_match + ) # generating the bash script to merge the M1 and M2 runs + merge( + target_dir, "2_nights", MAGIC_runs, env_name, source, NSB_match + ) # generating the bash script to merge all runs per night + + print("***** Running merge_hdf_files.py on the MAGIC data files...") + + # Below we run the bash scripts to merge the MAGIC files + list_of_merging_scripts = np.sort(glob.glob(f"{source}_Merge_MAGIC_*.sh")) + if len(list_of_merging_scripts) < 1: + logger.warning("no bash scripts") + return for n, run in enumerate(list_of_merging_scripts): if n == 0: launch_jobs = f"merging{n}=$(sbatch --parsable {run})" else: - launch_jobs = ( - f"{launch_jobs} && merging{n}=$(sbatch --parsable {run})" - ) + launch_jobs = f"{launch_jobs} && merging{n}=$(sbatch --parsable --dependency=afterany:$merging{n-1} {run})" os.system(launch_jobs) - # Below we run the analysis on the MAGIC data - if ( - (args.analysis_type == "onlyMAGIC") - or (args.analysis_type == "doEverything") - or (NSB_match) - ): - print("***** Generating merge_MAGIC bashscripts...") - merge( - target_dir, "0_subruns", MAGIC_runs, env_name, source, NSB_match - ) # generating the bash script to merge the subruns - merge( - target_dir, "1_M1M2", MAGIC_runs, env_name, source, NSB_match - ) # generating the bash script to merge the M1 and M2 runs - merge( - target_dir, "2_nights", MAGIC_runs, env_name, source, NSB_match - ) # generating the bash script to merge all runs per night - - print("***** Running merge_hdf_files.py on the MAGIC data files...") - - # Below we run the bash scripts to merge the MAGIC files - list_of_merging_scripts = np.sort(glob.glob(f"{source}_Merge_MAGIC_*.sh")) - if len(list_of_merging_scripts) < 1: - logger.warning("no bash scripts") - return - for n, run in enumerate(list_of_merging_scripts): - if n == 0: - launch_jobs = f"merging{n}=$(sbatch --parsable {run})" - else: - launch_jobs = f"{launch_jobs} && merging{n}=$(sbatch --parsable --dependency=afterany:$merging{n-1} {run})" - - os.system(launch_jobs) - - print(f"Process name: merging_{target_dir.split('/')[-2:][1]}") - print( - f"To check the jobs submitted to the cluster, type: squeue -n merging_{target_dir.split('/')[-2:][1]}" - ) - print("This process will take about 10 to 30 min to run.") + print(f"Process name: merging_{target_dir.split('/')[-2:][1]}") + print( + f"To check the jobs submitted to the cluster, type: squeue -n merging_{target_dir.split('/')[-2:][1]}" + ) + print("This process will take about 10 to 30 min to run.") if __name__ == "__main__": From 361f20a1f4ac5082a50cd7b6057ff19d018b09da Mon Sep 17 00:00:00 2001 From: Raniere Date: Tue, 30 Apr 2024 16:09:54 +0200 Subject: [PATCH 043/236] Updates on list_from_h5.py --- .../semi_automatic_scripts/list_from_h5.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/list_from_h5.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/list_from_h5.py index c8cdfd4a..d1c48370 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/list_from_h5.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/list_from_h5.py @@ -81,10 +81,9 @@ def clear_files(source_in, source_out, df): else: source_list.append(source_out) - print("Source list: ", source_list) joblib.dump(source_list, "list_sources.dat") + print("Cleaning pre-existing *_LST_runs.txt and *_MAGIC_runs.txt files") for source_name in source_list: - print("Target name: ", source_name) file_list = [ f"{source_name}_LST_runs.txt", f"{source_name}_MAGIC_runs.txt", @@ -113,10 +112,10 @@ def list_run(source_in, source_out, df, skip_LST, skip_MAGIC, is_LST, M1_run_lis skip_MAGIC : list List of the MAGIC runs to be ignored. is_LST : bool - If you are looking for LST runs, set to True. For MAGIC set False. + If you are looking for LST runs, set it to True. For MAGIC set False. M1_run_list : list If you are looking for MAGIC runs, pass the list of MAGIC-1 runs here, and the MAGIC-2 database as df. - Only the runs both in the list and in the dataframe (i.e., stereo MAGIC observations) will be saved in the output txt files + Only the runs both in the list and in the data frame (i.e., stereo MAGIC observations) will be saved in the output txt files """ source_list = [] @@ -125,9 +124,9 @@ def list_run(source_in, source_out, df, skip_LST, skip_MAGIC, is_LST, M1_run_lis else: source_list.append(source_out) - - print("List of sources: ", source_list) + for source_name in source_list: + print("Target name: ", source_name) file_list = [ f"{source_name}_LST_runs.txt", f"{source_name}_MAGIC_runs.txt", @@ -140,7 +139,7 @@ def list_run(source_in, source_out, df, skip_LST, skip_MAGIC, is_LST, M1_run_lis df_source = df[df["source"] == source_in] if is_LST: - print("LST") + print("Finding LST runs...") LST_run = df_source["LST1_run"].tolist() # List with runs as strings LST_date = df_source["date_LST"].tolist() for k in range(len(df_source)): @@ -159,7 +158,7 @@ def list_run(source_in, source_out, df, skip_LST, skip_MAGIC, is_LST, M1_run_lis run_listed.append(int(LST_run[k])) if not is_LST: - print("MAGIC") + print("Finding MAGIC runs...") MAGIC_date = df_source["date_MAGIC"].tolist() M2_run = df_source["Run ID"].tolist() for k in range(len(df_source)): @@ -181,7 +180,7 @@ def list_run(source_in, source_out, df, skip_LST, skip_MAGIC, is_LST, M1_run_lis def main(): """ - This function is automatically called whe script is launched. + This function is automatically called when the script is launched. It calls the functions above to create the files XXXXXX_LST_runs.txt and XXXXX_MAGIC_runs.txt for the desired targets. """ From ba7a5af8c480996323f1fb224b8a572521ebe352 Mon Sep 17 00:00:00 2001 From: Raniere Date: Tue, 30 Apr 2024 16:22:30 +0200 Subject: [PATCH 044/236] Update list_from_h5.py --- .../scripts/lst1_magic/semi_automatic_scripts/list_from_h5.py | 1 - 1 file changed, 1 deletion(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/list_from_h5.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/list_from_h5.py index d1c48370..e5c3c57b 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/list_from_h5.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/list_from_h5.py @@ -126,7 +126,6 @@ def list_run(source_in, source_out, df, skip_LST, skip_MAGIC, is_LST, M1_run_lis source_list.append(source_out) for source_name in source_list: - print("Target name: ", source_name) file_list = [ f"{source_name}_LST_runs.txt", f"{source_name}_MAGIC_runs.txt", From a5a738e47588f0f54a1e2a12ac6ea05069bcc992 Mon Sep 17 00:00:00 2001 From: Raniere Date: Tue, 30 Apr 2024 16:41:00 +0200 Subject: [PATCH 045/236] Updates in setting_up_config_and_dir.py --- .../setting_up_config_and_dir.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py index 34b07555..241b40ff 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py @@ -223,7 +223,7 @@ def lists_and_bash_gen_MAGIC( Directory to store the results telescope_ids : list List of the telescope IDs (set by the user) - MAGIC_runs : str + MAGIC_runs : array MAGIC dates and runs to be processed source : str Name of the target @@ -408,7 +408,7 @@ def directories_generator(target_dir, telescope_ids, MAGIC_runs, NSB_match): Directory to store the results telescope_ids : list List of the telescope IDs (set by the user) - MAGIC_runs : str + MAGIC_runs : array MAGIC dates and runs to be processed NSB_match : bool If real data are matched to pre-processed MCs or not @@ -544,10 +544,7 @@ def main(): SimTel_version = config["general"]["SimTel_version"] env_name = config["general"]["env_name"] NSB_match = config["general"]["NSB_matching"] - MAGIC_runs_and_dates = config["general"]["MAGIC_runs"] - MAGIC_runs = np.genfromtxt( - MAGIC_runs_and_dates, dtype=str, delimiter="," - ) # READ LIST OF DATES AND RUNS: format table where each line is like "2020_11_19,5093174" + # LST_runs_and_dates = config["general"]["LST_runs"] MC_gammas = str(Path(config["directories"]["MC_gammas"])) @@ -567,6 +564,11 @@ def main(): for source_name in source_list: target_dir = str(Path(config["directories"]["workspace_dir"]) / source_name) + MAGIC_runs_and_dates = f'{source_name}_MAGIC_runs.txt' + MAGIC_runs = np.genfromtxt( + MAGIC_runs_and_dates, dtype=str, delimiter="," + ) # READ LIST OF DATES AND RUNS: format table where each line is like "2020_11_19,5093174" + noise_value = [0, 0, 0] if not NSB_match: nsb = config["general"]["NSB_MC"] From 3ca346428f7c8b24f9768b7470b8f3edbf10026d Mon Sep 17 00:00:00 2001 From: Elisa-Visentin Date: Tue, 30 Apr 2024 14:42:41 +0000 Subject: [PATCH 046/236] bug fix --- .../lst1_magic/semi_automatic_scripts/merging_runs.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py index 400b8467..25b791ba 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py @@ -448,8 +448,7 @@ def main(): target_dir = f'{Path(config["directories"]["workspace_dir"])}/{config["directories"]["target_name"]}' - MAGIC_runs_and_dates = config["general"]["MAGIC_runs"] - MAGIC_runs = np.genfromtxt(MAGIC_runs_and_dates, dtype=str, delimiter=",") + NSB_match = config["general"]["NSB_matching"] train_fraction = float(config["general"]["proton_train_fraction"]) @@ -464,6 +463,8 @@ def main(): source_list.append(source) for source_name in source_list: # Below we run the analysis on the MC data + MAGIC_runs_and_dates = f'{source_name}_MAGIC_runs.txt' + MAGIC_runs = np.genfromtxt(MAGIC_runs_and_dates, dtype=str, delimiter=",") if not NSB_match: if (args.analysis_type == "onlyMC") or (args.analysis_type == "doEverything"): # Here we slice the proton MC data into "train" and "test" (but first we check if the directory already exists): From 3ef52dd6e2dc945375ad8479c0cc7370c63ca619 Mon Sep 17 00:00:00 2001 From: Elisa-Visentin Date: Tue, 30 Apr 2024 14:49:06 +0000 Subject: [PATCH 047/236] typo/bug --- .../semi_automatic_scripts/setting_up_config_and_dir.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py index 241b40ff..dd89a29f 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py @@ -15,7 +15,7 @@ """ import argparse import glob -import json +import joblib import logging import os @@ -557,7 +557,7 @@ def main(): source_list = [] if source is not None: - source_list = json.load("list_sources.dat") + source_list = joblib.load("list_sources.dat") else: source_list.append(source) From d0807baa2d36b7bd829c09a1ebbf2e6c25c84a48 Mon Sep 17 00:00:00 2001 From: Raniere Date: Tue, 30 Apr 2024 17:04:10 +0200 Subject: [PATCH 048/236] Update README.md --- magicctapipe/scripts/lst1_magic/README.md | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/README.md b/magicctapipe/scripts/lst1_magic/README.md index 4b68b472..5d5fd200 100644 --- a/magicctapipe/scripts/lst1_magic/README.md +++ b/magicctapipe/scripts/lst1_magic/README.md @@ -74,8 +74,8 @@ data_selection: target_RA_deg : 83.629 # RA in degrees; Please set it to null if source_name_database=null. target_Dec_deg: 22.015 # Dec in degrees; Please set it to null if source_name_database=null. time_range : True # Search for all runs in a LST time range (e.g., 2020_01_01 -> 2022_01_01). - min : "2023_11_17" - max : "2024_03_03" + min : "2021_01_17" + max : "2022_12_03" date_list : ['2020_12_15','2021_03_11'] # LST list of days to be processed (only if time_range=False), format: YYYY_MM_DD. skip_LST_runs: [3216,3217] # LST runs to ignore. skip_MAGIC_runs: [5094658] # MAGIC runs to ignore. @@ -99,6 +99,14 @@ Now that the configuration file is ready, let's create a list with all the MAGIC > $ list_from_h5 -c config_general.yaml +The output in the terminal should look like this: +``` +Cleaning pre-existing *_LST_runs.txt and *_MAGIC_runs.txt files +Finding LST runs... +Finding MAGIC runs... +``` +And it will save the files TARGET_LST_runs.txt, TARGET_MAGIC_runs.txt, and list_sources.dat in your working directory. + At this point, we can convert the MAGIC data into DL1 format with the following command: > $ setting_up_config_and_dir -c config_general.yaml From 240dbe295881f2b71c7b817b6942e34370b4d757 Mon Sep 17 00:00:00 2001 From: Elisa-Visentin Date: Tue, 30 Apr 2024 15:45:26 +0000 Subject: [PATCH 049/236] fix directories --- .../setting_up_config_and_dir.py | 159 +++++++++--------- 1 file changed, 84 insertions(+), 75 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py index dd89a29f..f92d4a3b 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py @@ -40,7 +40,7 @@ logger.setLevel(logging.INFO) -def config_file_gen(ids, target_dir, noise_value, NSB_match): +def config_file_gen(ids, target_dir, noise_value, NSB_match, source_name): """ Here we create the configuration file needed for transforming DL0 into DL1 @@ -86,8 +86,11 @@ def config_file_gen(ids, target_dir, noise_value, NSB_match): conf["LST"] = LST_config conf["MAGIC"] = MAGIC_config - - with open(f"{target_dir}/config_DL0_to_DL1.yaml", "w") as f: + if not NSB_match: + file_name=f"{target_dir}/{source_name}/config_DL0_to_DL1.yaml" + else: + file_name=f"{target_dir}/v{__version__}/{source_name}/config_DL0_to_DL1.yaml" + with open(file_name, "w") as f: lines = [ "mc_tel_ids:", f"\n LST-1: {ids[0]}", @@ -103,7 +106,7 @@ def config_file_gen(ids, target_dir, noise_value, NSB_match): def lists_and_bash_generator( - particle_type, target_dir, MC_path, SimTel_version, focal_length, env_name + particle_type, target_dir, MC_path, SimTel_version, focal_length, env_name, source_name ): """ @@ -130,17 +133,17 @@ def lists_and_bash_generator( if MC_path == "": return - process_name = target_dir.split("/")[-2:][1] + process_name = source_name list_of_nodes = glob.glob(f"{MC_path}/node*") with open( - f"{target_dir}/list_nodes_{particle_type}_complete.txt", "w" + f"{target_dir}/{source_name}/list_nodes_{particle_type}_complete.txt", "w" ) as f: # creating list_nodes_gammas_complete.txt for i in list_of_nodes: f.write(f"{i}/output_{SimTel_version}\n") with open( - f"{target_dir}/list_folder_{particle_type}.txt", "w" + f"{target_dir}/{source_name}/list_folder_{particle_type}.txt", "w" ) as f: # creating list_folder_gammas.txt for i in list_of_nodes: f.write(f'{i.split("/")[-1]}\n') @@ -160,17 +163,17 @@ def lists_and_bash_generator( "ulimit -a\n\n", "while read -r -u 3 lineA && read -r -u 4 lineB\n", "do\n", - f" cd {target_dir}/DL1/MC/{particle_type}\n", + f" cd {target_dir}/{source_name}/DL1/MC/{particle_type}\n", " mkdir $lineB\n", " cd $lineA\n", " ls -lR *.gz |wc -l\n", - f" ls *.gz > {target_dir}/DL1/MC/{particle_type}/$lineB/list_dl0.txt\n", + f" ls *.gz > {target_dir}/{source_name}/DL1/MC/{particle_type}/$lineB/list_dl0.txt\n", ' string=$lineA"/"\n', - f" export file={target_dir}/DL1/MC/{particle_type}/$lineB/list_dl0.txt\n\n", + f" export file={target_dir}/{source_name}/DL1/MC/{particle_type}/$lineB/list_dl0.txt\n\n", " cat $file | while read line; do echo $string${line}" - + f" >>{target_dir}/DL1/MC/{particle_type}/$lineB/list_dl0_ok.txt; done\n\n", + + f" >>{target_dir}/{source_name}/DL1/MC/{particle_type}/$lineB/list_dl0_ok.txt; done\n\n", ' echo "folder $lineB and node $lineA"\n', - f'done 3<"{target_dir}/list_nodes_{particle_type}_complete.txt" 4<"{target_dir}/list_folder_{particle_type}.txt"\n', + f'done 3<"{target_dir}/{source_name}/list_nodes_{particle_type}_complete.txt" 4<"{target_dir}/{source_name}/list_folder_{particle_type}.txt"\n', "", ] f.writelines(lines_of_config_file) @@ -193,17 +196,17 @@ def lists_and_bash_generator( "ulimit -l unlimited\n", "ulimit -s unlimited\n", "ulimit -a\n", - f"cd {target_dir}/DL1/MC/{particle_type}\n\n", - f"export INF={target_dir}\n", + f"cd {target_dir}/{source_name}/DL1/MC/{particle_type}\n\n", + f"export INF={target_dir}/{source_name}\n", f"SAMPLE_LIST=($(<$INF/list_folder_{particle_type}.txt))\n", "SAMPLE=${SAMPLE_LIST[${SLURM_ARRAY_TASK_ID}]}\n", "cd $SAMPLE\n\n", - f"export LOG={target_dir}/DL1/MC/{particle_type}" + f"export LOG={target_dir}/{source_name}/DL1/MC/{particle_type}" + "/simtel_{$SAMPLE}_all.log\n", "cat list_dl0_ok.txt | while read line\n", "do\n", - f" cd {target_dir}/../\n", - f" conda run -n {env_name} lst1_magic_mc_dl0_to_dl1 --input-file $line --output-dir {target_dir}/DL1/MC/{particle_type}/$SAMPLE --config-file {target_dir}/config_DL0_to_DL1.yaml --focal_length_choice {focal_length}>>$LOG 2>&1\n\n", + f" cd {target_dir}/{source_name}/../\n", + f" conda run -n {env_name} lst1_magic_mc_dl0_to_dl1 --input-file $line --output-dir {target_dir}/{source_name}/DL1/MC/{particle_type}/$SAMPLE --config-file {target_dir}/{source_name}/config_DL0_to_DL1.yaml --focal_length_choice {focal_length}>>$LOG 2>&1\n\n", "done\n", "", ] @@ -232,7 +235,7 @@ def lists_and_bash_gen_MAGIC( NSB_match : bool If real data are matched to pre-processed MCs or not """ - process_name = f'{target_dir.split("/")[-2:][1]}' + process_name = source lines = [ "#!/bin/sh\n\n", "#SBATCH -p short\n", @@ -245,6 +248,7 @@ def lists_and_bash_gen_MAGIC( with open(f"{source}_linking_MAGIC_data_paths.sh", "w") as f: f.writelines(lines) if NSB_match: + if (len(MAGIC_runs) == 2) and (len(MAGIC_runs[0]) == 10): MAGIC = MAGIC_runs @@ -256,7 +260,7 @@ def lists_and_bash_gen_MAGIC( if telescope_ids[-1] > 0: lines = [ f'export IN1=/fefs/onsite/common/MAGIC/data/M2/event/Calibrated/{i[0].split("_")[0]}/{i[0].split("_")[1]}/{i[0].split("_")[2]}\n', - f"export OUT1=v{__version__}/{target_dir}/DL1/M2/{i[0]}/{i[1]}/logs \n", + f"export OUT1={target_dir}/v{__version__}/{source}/DL1/M2/{i[0]}/{i[1]}/logs \n", f"ls $IN1/*{i[1][-2:]}.*_Y_*.root > $OUT1/list_dl0.txt\n", ] f.writelines(lines) @@ -265,7 +269,7 @@ def lists_and_bash_gen_MAGIC( if telescope_ids[-2] > 0: lines = [ f'export IN1=/fefs/onsite/common/MAGIC/data/M1/event/Calibrated/{i[0].split("_")[0]}/{i[0].split("_")[1]}/{i[0].split("_")[2]}\n', - f"export OUT1=v{__version__}/{target_dir}/DL1/M1/{i[0]}/{i[1]}/logs \n", + f"export OUT1={target_dir}/v{__version__}/{source}/DL1/M1/{i[0]}/{i[1]}/logs \n", f"ls $IN1/*{i[1][-2:]}.*_Y_*.root > $OUT1/list_dl0.txt\n", ] f.writelines(lines) @@ -274,7 +278,7 @@ def lists_and_bash_gen_MAGIC( for i in MAGIC_runs: lines = [ f'export IN1=/fefs/onsite/common/MAGIC/data/M2/event/Calibrated/{i[0].split("_")[0]}/{i[0].split("_")[1]}/{i[0].split("_")[2]}\n', - f"export OUT1={target_dir}/DL1/Observations/M2/{i[0]}/{i[1]}\n", + f"export OUT1={target_dir}/{source}/DL1/Observations/M2/{i[0]}/{i[1]}\n", f"ls $IN1/*{i[1][-2:]}.*_Y_*.root > $OUT1/list_dl0.txt\n", ] f.writelines(lines) @@ -283,7 +287,7 @@ def lists_and_bash_gen_MAGIC( for i in MAGIC_runs: lines = [ f'export IN1=/fefs/onsite/common/MAGIC/data/M1/event/Calibrated/{i[0].split("_")[0]}/{i[0].split("_")[1]}/{i[0].split("_")[2]}\n', - f"export OUT1={target_dir}/DL1/Observations/M1/{i[0]}/{i[1]}\n", + f"export OUT1={target_dir}/{source}/DL1/Observations/M1/{i[0]}/{i[1]}\n", f"ls $IN1/*{i[1][-2:]}.*_Y_*.root > $OUT1/list_dl0.txt\n", ] f.writelines(lines) @@ -308,11 +312,11 @@ def lists_and_bash_gen_MAGIC( "ulimit -l unlimited\n", "ulimit -s unlimited\n", "ulimit -a\n\n", - f"export OUTPUTDIR=v{__version__}/{target_dir}/DL1/M2/{i[0]}/{i[1]}\n", + f"export OUTPUTDIR={target_dir}/v{__version__}/{source}/DL1/M2/{i[0]}/{i[1]}\n", "SAMPLE_LIST=($(<$OUTPUTDIR/logs/list_dl0.txt))\n", "SAMPLE=${SAMPLE_LIST[${SLURM_ARRAY_TASK_ID}]}\n\n", "export LOG=$OUTPUTDIR/logs/real_0_1_task${SLURM_ARRAY_TASK_ID}.log\n", - f"time conda run -n {env_name} magic_calib_to_dl1 --input-file $SAMPLE --output-dir $OUTPUTDIR --config-file {target_dir}/config_DL0_to_DL1.yaml >$LOG 2>&1\n", + f"time conda run -n {env_name} magic_calib_to_dl1 --input-file $SAMPLE --output-dir $OUTPUTDIR --config-file {target_dir}/{source}/config_DL0_to_DL1.yaml >$LOG 2>&1\n", ] with open(f"{source}_MAGIC-II_dl0_to_dl1_run_{i[1]}.sh", "w") as f: f.writelines(lines) @@ -333,11 +337,11 @@ def lists_and_bash_gen_MAGIC( "ulimit -l unlimited\n", "ulimit -s unlimited\n", "ulimit -a\n\n", - f"export OUTPUTDIR=v{__version__}/{target_dir}/DL1/M1/{i[0]}/{i[1]}\n", + f"export OUTPUTDIR=v{__version__}/{target_dir}/{source}/DL1/M1/{i[0]}/{i[1]}\n", "SAMPLE_LIST=($(<$OUTPUTDIR/logs/list_dl0.txt))\n", "SAMPLE=${SAMPLE_LIST[${SLURM_ARRAY_TASK_ID}]}\n\n", "export LOG=$OUTPUTDIR/logs/real_0_1_task${SLURM_ARRAY_TASK_ID}.log\n", - f"time conda run -n {env_name} magic_calib_to_dl1 --input-file $SAMPLE --output-dir $OUTPUTDIR --config-file {target_dir}/config_DL0_to_DL1.yaml >$LOG 2>&1\n", + f"time conda run -n {env_name} magic_calib_to_dl1 --input-file $SAMPLE --output-dir $OUTPUTDIR --config-file {target_dir}/{source}/config_DL0_to_DL1.yaml >$LOG 2>&1\n", ] with open(f"{source}_MAGIC-I_dl0_to_dl1_run_{i[1]}.sh", "w") as f: f.writelines(lines) @@ -360,12 +364,12 @@ def lists_and_bash_gen_MAGIC( "ulimit -l unlimited\n", "ulimit -s unlimited\n", "ulimit -a\n\n", - f"export OUTPUTDIR={target_dir}/DL1/Observations/M2/{i[0]}/{i[1]}\n", - f"cd {target_dir}/../\n", + f"export OUTPUTDIR={target_dir}/{source}/DL1/Observations/M2/{i[0]}/{i[1]}\n", + f"cd {target_dir}/{source}/../\n", "SAMPLE_LIST=($(<$OUTPUTDIR/list_dl0.txt))\n", "SAMPLE=${SAMPLE_LIST[${SLURM_ARRAY_TASK_ID}]}\n\n", "export LOG=$OUTPUTDIR/real_0_1_task${SLURM_ARRAY_TASK_ID}.log\n", - f"conda run -n {env_name} magic_calib_to_dl1 --input-file $SAMPLE --output-dir $OUTPUTDIR --config-file {target_dir}/config_DL0_to_DL1.yaml >$LOG 2>&1\n", + f"conda run -n {env_name} magic_calib_to_dl1 --input-file $SAMPLE --output-dir $OUTPUTDIR --config-file {target_dir}/{source}/config_DL0_to_DL1.yaml >$LOG 2>&1\n", "", ] f.writelines(lines) @@ -386,18 +390,18 @@ def lists_and_bash_gen_MAGIC( "ulimit -l unlimited\n", "ulimit -s unlimited\n", "ulimit -a\n\n", - f"export OUTPUTDIR={target_dir}/DL1/Observations/M1/{i[0]}/{i[1]}\n", - f"cd {target_dir}/../\n", + f"export OUTPUTDIR={target_dir}/{source}/DL1/Observations/M1/{i[0]}/{i[1]}\n", + f"cd {target_dir}/{source}/../\n", "SAMPLE_LIST=($(<$OUTPUTDIR/list_dl0.txt))\n", "SAMPLE=${SAMPLE_LIST[${SLURM_ARRAY_TASK_ID}]}\n\n", "export LOG=$OUTPUTDIR/real_0_1_task${SLURM_ARRAY_TASK_ID}.log\n", - f"conda run -n {env_name} magic_calib_to_dl1 --input-file $SAMPLE --output-dir $OUTPUTDIR --config-file {target_dir}/config_DL0_to_DL1.yaml >$LOG 2>&1\n", + f"conda run -n {env_name} magic_calib_to_dl1 --input-file $SAMPLE --output-dir $OUTPUTDIR --config-file {target_dir}/{source}/config_DL0_to_DL1.yaml >$LOG 2>&1\n", "", ] f.writelines(lines) -def directories_generator(target_dir, telescope_ids, MAGIC_runs, NSB_match): +def directories_generator(target_dir, telescope_ids, MAGIC_runs, NSB_match, source_name): """ Here we create all subdirectories for a given workspace and target name. @@ -415,39 +419,39 @@ def directories_generator(target_dir, telescope_ids, MAGIC_runs, NSB_match): """ if NSB_match: - if not os.path.exists(f"v{__version__}"): - os.mkdir(f"v{__version__}") - if not os.path.exists(f"v{__version__}/{target_dir}"): - os.mkdir(f"v{__version__}/{target_dir}") - if not os.path.exists(f"v{__version__}/{target_dir}/DL1"): - os.mkdir(f"v{__version__}/{target_dir}/DL1") - dl1_dir = str(f"v{__version__}/{target_dir}/DL1") + if not os.path.exists(f"{target_dir}/v{__version__}"): + os.mkdir(f"{target_dir}/v{__version__}") + if not os.path.exists(f"{target_dir}/v{__version__}/{source_name}"): + os.mkdir(f"{target_dir}/v{__version__}/{source_name}") + if not os.path.exists(f"{target_dir}/v{__version__}/{source_name}/DL1"): + os.mkdir(f"{target_dir}/v{__version__}/{source_name}/DL1") + dl1_dir = str(f"{target_dir}/v{__version__}/{source_name}/DL1") else: if not os.path.exists(target_dir): os.mkdir(target_dir) - os.mkdir(f"{target_dir}/DL1") - os.mkdir(f"{target_dir}/DL1/Observations") - os.mkdir(f"{target_dir}/DL1/MC") - os.mkdir(f"{target_dir}/DL1/MC/gammas") - os.mkdir(f"{target_dir}/DL1/MC/gammadiffuse") - os.mkdir(f"{target_dir}/DL1/MC/electrons") - os.mkdir(f"{target_dir}/DL1/MC/protons") - os.mkdir(f"{target_dir}/DL1/MC/helium") + os.mkdir(f"{target_dir}/{source_name}/DL1") + os.mkdir(f"{target_dir}/{source_name}/DL1/Observations") + os.mkdir(f"{target_dir}/{source_name}/DL1/MC") + os.mkdir(f"{target_dir}/{source_name}/DL1/MC/gammas") + os.mkdir(f"{target_dir}/{source_name}/DL1/MC/gammadiffuse") + os.mkdir(f"{target_dir}/{source_name}/DL1/MC/electrons") + os.mkdir(f"{target_dir}/{source_name}/DL1/MC/protons") + os.mkdir(f"{target_dir}/{source_name}/DL1/MC/helium") else: overwrite = input( f'MC directory for {target_dir.split("/")[-1]} already exists. Would you like to overwrite it? [only "y" or "n"]: ' ) if overwrite == "y": - os.system(f"rm -r {target_dir}") + os.system(f"rm -r {target_dir}/{source_name}") os.mkdir(target_dir) - os.mkdir(f"{target_dir}/DL1") - os.mkdir(f"{target_dir}/DL1/Observations") - os.mkdir(f"{target_dir}/DL1/MC") - os.mkdir(f"{target_dir}/DL1/MC/gammas") - os.mkdir(f"{target_dir}/DL1/MC/gammadiffuse") - os.mkdir(f"{target_dir}/DL1/MC/electrons") - os.mkdir(f"{target_dir}/DL1/MC/protons") - os.mkdir(f"{target_dir}/DL1/MC/helium") + os.mkdir(f"{target_dir}/{source_name}/DL1") + os.mkdir(f"{target_dir}/{source_name}/DL1/Observations") + os.mkdir(f"{target_dir}/{source_name}/DL1/MC") + os.mkdir(f"{target_dir}/{source_name}/DL1/MC/gammas") + os.mkdir(f"{target_dir}/{source_name}/DL1/MC/gammadiffuse") + os.mkdir(f"{target_dir}/{source_name}/DL1/MC/electrons") + os.mkdir(f"{target_dir}/{source_name}/DL1/MC/protons") + os.mkdir(f"{target_dir}/{source_name}/DL1/MC/helium") else: print("Directory not modified.") @@ -488,24 +492,24 @@ def directories_generator(target_dir, telescope_ids, MAGIC_runs, NSB_match): os.mkdir(f"{dl1_dir}/M1/{i[0]}/{i[1]}/logs") else: if telescope_ids[-1] > 0: - if not os.path.exists(f"{target_dir}/DL1/Observations/M2"): - os.mkdir(f"{target_dir}/DL1/Observations/M2") + if not os.path.exists(f"{target_dir}/{source_name}/DL1/Observations/M2"): + os.mkdir(f"{target_dir}/{source_name}/DL1/Observations/M2") for i in MAGIC_runs: - if not os.path.exists(f"{target_dir}/DL1/Observations/M2/{i[0]}"): - os.mkdir(f"{target_dir}/DL1/Observations/M2/{i[0]}") - os.mkdir(f"{target_dir}/DL1/Observations/M2/{i[0]}/{i[1]}") + if not os.path.exists(f"{target_dir}/{source_name}/DL1/Observations/M2/{i[0]}"): + os.mkdir(f"{target_dir}/{source_name}/DL1/Observations/M2/{i[0]}") + os.mkdir(f"{target_dir}/{source_name}/DL1/Observations/M2/{i[0]}/{i[1]}") else: - os.mkdir(f"{target_dir}/DL1/Observations/M2/{i[0]}/{i[1]}") + os.mkdir(f"{target_dir}/{source_name}/DL1/Observations/M2/{i[0]}/{i[1]}") if telescope_ids[-2] > 0: - if not os.path.exists(f"{target_dir}/DL1/Observations/M1"): - os.mkdir(f"{target_dir}/DL1/Observations/M1") + if not os.path.exists(f"{target_dir}/{source_name}/DL1/Observations/M1"): + os.mkdir(f"{target_dir}/{source_name}/DL1/Observations/M1") for i in MAGIC_runs: - if not os.path.exists(f"{target_dir}/DL1/Observations/M1/{i[0]}"): - os.mkdir(f"{target_dir}/DL1/Observations/M1/{i[0]}") - os.mkdir(f"{target_dir}/DL1/Observations/M1/{i[0]}/{i[1]}") + if not os.path.exists(f"{target_dir}/{source_name}/DL1/Observations/M1/{i[0]}"): + os.mkdir(f"{target_dir}/{source_name}/DL1/Observations/M1/{i[0]}") + os.mkdir(f"{target_dir}/{source_name}/DL1/Observations/M1/{i[0]}/{i[1]}") else: - os.mkdir(f"{target_dir}/DL1/Observations/M1/{i[0]}/{i[1]}") + os.mkdir(f"{target_dir}/{source_name}/DL1/Observations/M1/{i[0]}/{i[1]}") def main(): @@ -562,7 +566,7 @@ def main(): else: source_list.append(source) for source_name in source_list: - target_dir = str(Path(config["directories"]["workspace_dir"]) / source_name) + target_dir = Path(config["directories"]["workspace_dir"]) MAGIC_runs_and_dates = f'{source_name}_MAGIC_runs.txt' MAGIC_runs = np.genfromtxt( @@ -579,17 +583,17 @@ def main(): # TODO: fix here above print("*** Converting DL0 into DL1 data ***") - print(f'Process name: {target_dir.split("/")[-2:][1]}') + print(f'Process name: {source_name}') print( - f'To check the jobs submitted to the cluster, type: squeue -n {target_dir.split("/")[-2:][1]}' + f'To check the jobs submitted to the cluster, type: squeue -n {source_name}' ) print("This process will take about 10 min to run if the IT cluster is free.") directories_generator( - target_dir, telescope_ids, MAGIC_runs, NSB_match + target_dir, telescope_ids, MAGIC_runs, NSB_match, source_name ) # Here we create all the necessary directories in the given workspace and collect the main directory of the target config_file_gen( - telescope_ids, target_dir, noise_value, NSB_match + telescope_ids, target_dir, noise_value, NSB_match, source_name ) # TODO: fix here if not NSB_match: @@ -604,6 +608,7 @@ def main(): SimTel_version, focal_length, env_name, + source_name, ) # gammas lists_and_bash_generator( "electrons", @@ -612,6 +617,7 @@ def main(): SimTel_version, focal_length, env_name, + source_name, ) # electrons lists_and_bash_generator( "helium", @@ -620,6 +626,7 @@ def main(): SimTel_version, focal_length, env_name, + source_name, ) # helium lists_and_bash_generator( "protons", @@ -628,6 +635,7 @@ def main(): SimTel_version, focal_length, env_name, + source_name, ) # protons lists_and_bash_generator( "gammadiffuse", @@ -636,6 +644,7 @@ def main(): SimTel_version, focal_length, env_name, + source_name, ) # gammadiffuse # Here we do the MC DL0 to DL1 conversion: @@ -658,7 +667,7 @@ def main(): or (NSB_match) ): lists_and_bash_gen_MAGIC( - target_dir, telescope_ids, MAGIC_runs, source_name, env_name, NSB_match + target_dir, telescope_ids, MAGIC_runs, source_name, env_name, NSB_match, ) # MAGIC real data if (telescope_ids[-2] > 0) or (telescope_ids[-1] > 0): list_of_MAGIC_runs = glob.glob(f"{source_name}_MAGIC-*.sh") From 13d6aedeaefb16abb0d6d0a3caa4eeb5e6f7ceca Mon Sep 17 00:00:00 2001 From: Elisa-Visentin Date: Tue, 30 Apr 2024 16:46:25 +0000 Subject: [PATCH 050/236] directories --- .../semi_automatic_scripts/merging_runs.py | 47 ++++++++++--------- 1 file changed, 24 insertions(+), 23 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py index 25b791ba..99fee5ed 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py @@ -33,6 +33,7 @@ import logging import os from pathlib import Path +import joblib import numpy as np import yaml @@ -68,7 +69,7 @@ def cleaning(list_of_nodes, cwd): print("Cleaning done.") -def split_train_test(target_dir, train_fraction): +def split_train_test(target_dir, train_fraction, source_name): """ This function splits the MC proton sample in 2, i.e. the "test" and the "train" subsamples. @@ -83,7 +84,7 @@ def split_train_test(target_dir, train_fraction): Fraction of proton MC files to be used in the training RF dataset """ - proton_dir = f"{target_dir}/DL1/MC/protons" + proton_dir = f"{target_dir}/{source_name}/DL1/MC/protons" if not os.path.exists(f"{proton_dir}/train"): os.mkdir(f"{proton_dir}/train") @@ -147,9 +148,9 @@ def merge(target_dir, identification, MAGIC_runs, env_name, source, NSB_match): If real data are matched to pre-processed MCs or not """ - process_name = f"merging_{target_dir.split('/')[-2:][1]}" + process_name = f"merging_{source}" if not NSB_match: - MAGIC_DL1_dir = f"{target_dir}/DL1/Observations" + MAGIC_DL1_dir = f"{target_dir}/{source}/DL1/Observations" if os.path.exists(f"{MAGIC_DL1_dir}/M1") & os.path.exists( f"{MAGIC_DL1_dir}/M2" ): @@ -219,9 +220,9 @@ def merge(target_dir, identification, MAGIC_runs, env_name, source, NSB_match): else: - process_name = f'merging_{target_dir.split("/")[-2:][1]}' + process_name = f'merging_{source}' - MAGIC_DL1_dir = f"v{__version__}/{target_dir}/DL1/{p}" + MAGIC_DL1_dir = f"{target_dir}/v{__version__}/{source}/DL1/" if os.path.exists(f"{MAGIC_DL1_dir}/M1") & os.path.exists( f"{MAGIC_DL1_dir}/M2" @@ -237,7 +238,7 @@ def merge(target_dir, identification, MAGIC_runs, env_name, source, NSB_match): "ulimit -s unlimited\n", "ulimit -a\n\n", ] - with open(f"{source}_Merge_MAGIC_{identification}_{p}.sh", "w") as f: + with open(f"{source}_Merge_MAGIC_{identification}.sh", "w") as f: f.writelines(lines) if identification == "0_subruns": @@ -357,7 +358,7 @@ def merge(target_dir, identification, MAGIC_runs, env_name, source, NSB_match): ) -def mergeMC(target_dir, identification, env_name, cwd): +def mergeMC(target_dir, identification, env_name, cwd, source_name): """ This function creates the bash scripts to run merge_hdf_files.py in all MC runs. @@ -374,9 +375,9 @@ def mergeMC(target_dir, identification, env_name, cwd): Current working directory """ - process_name = f"merging_{target_dir.split('/')[-2:][1]}" + process_name = f"merging_{source_name}" - MC_DL1_dir = f"{target_dir}/DL1/MC" + MC_DL1_dir = f"{target_dir}/{source_name}/DL1/MC" if not os.path.exists(f"{MC_DL1_dir}/{identification}/Merged"): os.mkdir(f"{MC_DL1_dir}/{identification}/Merged") @@ -446,7 +447,7 @@ def main(): config = yaml.safe_load(f) cwd = os.getcwd() - target_dir = f'{Path(config["directories"]["workspace_dir"])}/{config["directories"]["target_name"]}' + target_dir = Path(config["directories"]["workspace_dir"]) NSB_match = config["general"]["NSB_matching"] @@ -457,7 +458,7 @@ def main(): source_list = [] if source is not None: - source_list = json.load("list_sources.dat") + source_list = joblib.load("list_sources.dat") else: source_list.append(source) @@ -468,21 +469,21 @@ def main(): if not NSB_match: if (args.analysis_type == "onlyMC") or (args.analysis_type == "doEverything"): # Here we slice the proton MC data into "train" and "test" (but first we check if the directory already exists): - if not os.path.exists(f"{target_dir}/DL1/MC/protons_test"): + if not os.path.exists(f"{target_dir}/{source_name}/DL1/MC/protons_test"): print("***** Splitting protons into 'train' and 'test' datasets...") - split_train_test(target_dir, train_fraction) + split_train_test(target_dir, train_fraction, source_name) print("***** Generating merge_MC bashscripts...") mergeMC( - target_dir, "protons", env_name, cwd + target_dir, "protons", env_name, cwd, source_name ) # generating the bash script to merge the files mergeMC( - target_dir, "gammadiffuse", env_name, cwd + target_dir, "gammadiffuse", env_name, cwd, source_name ) # generating the bash script to merge the files mergeMC( - target_dir, "gammas", env_name, cwd + target_dir, "gammas", env_name, cwd, source_name ) # generating the bash script to merge the files - mergeMC(target_dir, "protons_test", env_name, cwd) + mergeMC(target_dir, "protons_test", env_name, cwd, source_name) print("***** Running merge_hdf_files.py on the MC data files...") @@ -507,13 +508,13 @@ def main(): ): print("***** Generating merge_MAGIC bashscripts...") merge( - target_dir, "0_subruns", MAGIC_runs, env_name, source, NSB_match + target_dir, "0_subruns", MAGIC_runs, env_name, source_name, NSB_match ) # generating the bash script to merge the subruns merge( - target_dir, "1_M1M2", MAGIC_runs, env_name, source, NSB_match + target_dir, "1_M1M2", MAGIC_runs, env_name, source_name, NSB_match ) # generating the bash script to merge the M1 and M2 runs merge( - target_dir, "2_nights", MAGIC_runs, env_name, source, NSB_match + target_dir, "2_nights", MAGIC_runs, env_name, source_name, NSB_match ) # generating the bash script to merge all runs per night print("***** Running merge_hdf_files.py on the MAGIC data files...") @@ -531,9 +532,9 @@ def main(): os.system(launch_jobs) - print(f"Process name: merging_{target_dir.split('/')[-2:][1]}") + print(f"Process name: merging_{source_name}") print( - f"To check the jobs submitted to the cluster, type: squeue -n merging_{target_dir.split('/')[-2:][1]}" + f"To check the jobs submitted to the cluster, type: squeue -n merging_{source_name}" ) print("This process will take about 10 to 30 min to run.") From 50107a395932827f9e59779f806cb81fbe06855c Mon Sep 17 00:00:00 2001 From: Elisa-Visentin Date: Wed, 1 May 2024 17:49:15 +0000 Subject: [PATCH 051/236] coincidence updates --- .../coincident_events.py | 323 +++++++++--------- .../setting_up_config_and_dir.py | 6 +- 2 files changed, 168 insertions(+), 161 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py index 94d4fb17..fb416f54 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py @@ -16,6 +16,7 @@ import glob import logging import os +import joblib from datetime import date as dtdt from datetime import timedelta from pathlib import Path @@ -32,7 +33,7 @@ logger.setLevel(logging.INFO) -def configfile_coincidence(ids, target_dir): +def configfile_coincidence(ids, target_dir, source_name, NSB_match): """ This function creates the configuration file needed for the event coincidence step @@ -50,12 +51,17 @@ def configfile_coincidence(ids, target_dir): 'event_coincidence:\n timestamp_type_lst: "dragon_time" # select "dragon_time", "tib_time" or "ucts_time"\n pre_offset_search: true\n n_pre_offset_search_events: 100\n window_half_width: "300 ns"\n', ' time_offset:\n start: "-10 us"\n stop: "0 us"\n', ] - with open(f"{target_dir}/config_coincidence.yaml", "w") as f: + if not NSB_match: + file_name=f"{target_dir}/{source_name}/config_coincidence.yaml" + else: + file_name=f"{target_dir}/v{__version__}/{source_name}/config_coincidence.yaml" + + with open(file_name, "w") as f: f.writelines(lines) def linking_bash_lst( - target_dir, LST_runs, nsb, date, source, LST_version, env_name, NSB_match + target_dir, LST_runs, source_name, LST_version, env_name, NSB_match ): """ @@ -71,7 +77,7 @@ def linking_bash_lst( NSB level date : numpy array Array of lists [date run] for all the LST runs (before the NSB splitting) - source : str + source_name : str Target name LST_version : str The lstchain version used to process the LST data @@ -81,127 +87,125 @@ def linking_bash_lst( If real data are matched to pre-processed MCs or not """ - ST_list = [ - os.path.basename(x) for x in glob.glob(f"{target_dir}/v{__version__}/DL1/*") - ] - + if (len(LST_runs) == 2) and (len(LST_runs[0]) == 10): LST = LST_runs LST_runs = [] LST_runs.append(LST) - if (len(date) == 2) and (len(date[0]) == 10): - dt = date - date = [] - date.append(dt) + if (len(LST_runs) == 2) and (len(LST_runs[0]) == 10): + dt = LST_runs + LST_runs = [] + LST_runs.append(dt) if NSB_match: - coincidence_DL1_dir = f"{target_dir}/v{__version__}" + coincidence_DL1_dir = f"{target_dir}/v{__version__}/{source_name}" if not os.path.exists(f"{coincidence_DL1_dir}/DL1Coincident/"): os.mkdir(f"{coincidence_DL1_dir}/DL1Coincident") - for p in ST_list: - MAGIC_DL1_dir = f"{target_dir}/v{__version__}/DL1/{p}" - if not os.path.exists(f"{coincidence_DL1_dir}/DL1Coincident/{p}"): - os.mkdir(f"{coincidence_DL1_dir}/DL1Coincident/{p}") - dates = [ - os.path.basename(x) - for x in glob.glob(f"{MAGIC_DL1_dir}/Merged/Merged_*") - ] - for d in dates: - Y_M = int(d.split("_")[1]) - M_M = int(d.split("_")[2]) - D_M = int(d.split("_")[3]) - - day_MAGIC = dtdt(Y_M, M_M, D_M) - - delta = timedelta(days=1) - for i in LST_runs: - Y_L = i[0].split("_")[0] - M_L = i[0].split("_")[1] - D_L = i[0].split("_")[2] - day_LST = dtdt(int(Y_L), int(M_L), int(D_L)) - if day_MAGIC == day_LST + delta: - if not os.path.exists( - f"{coincidence_DL1_dir}/DL1Coincident/{p}/NSB{nsb}" - ): - os.mkdir( - f"{coincidence_DL1_dir}/DL1Coincident/{p}/NSB{nsb}" - ) - - lstObsDir = ( - i[0].split("_")[0] + i[0].split("_")[1] + i[0].split("_")[2] + + MAGIC_DL1_dir = f"{target_dir}/v{__version__}/DL1" + if not os.path.exists(f"{coincidence_DL1_dir}/DL1Coincident"): + os.mkdir(f"{coincidence_DL1_dir}/DL1Coincident") + dates = [ + os.path.basename(x) + for x in glob.glob(f"{MAGIC_DL1_dir}/Merged/Merged_*") + ] + + for d in dates: + Y_M = int(d.split("_")[1]) + M_M = int(d.split("_")[2]) + D_M = int(d.split("_")[3]) + + day_MAGIC = dtdt(Y_M, M_M, D_M) + + delta = timedelta(days=1) + for i in LST_runs: + Y_L = i[0].split("_")[0] + M_L = i[0].split("_")[1] + D_L = i[0].split("_")[2] + day_LST = dtdt(int(Y_L), int(M_L), int(D_L)) + if day_MAGIC == day_LST + delta: + if not os.path.exists( + f"{coincidence_DL1_dir}/DL1Coincident/" + ): + os.mkdir( + f"{coincidence_DL1_dir}/DL1Coincident/" ) - inputdir = f"/fefs/aswg/data/real/DL1/{lstObsDir}/{LST_version}/tailcut84" - if not os.path.exists( - f"{coincidence_DL1_dir}/DL1Coincident/{p}/NSB{nsb}/{lstObsDir}" - ): - os.mkdir( - f"{coincidence_DL1_dir}/DL1Coincident/{p}/NSB{nsb}/{lstObsDir}" - ) - if not os.path.exists( - f"{coincidence_DL1_dir}/DL1Coincident/{p}/NSB{nsb}/{lstObsDir}/logs" - ): - os.mkdir( - f"{coincidence_DL1_dir}/DL1Coincident/{p}/NSB{nsb}/{lstObsDir}/logs" - ) + lstObsDir = ( + i[0].split("_")[0] + i[0].split("_")[1] + i[0].split("_")[2] + ) - outputdir = f"{coincidence_DL1_dir}/DL1Coincident/{p}/NSB{nsb}/{lstObsDir}" - list_of_subruns = np.sort( - glob.glob(f"{inputdir}/dl1*Run*{i[1]}*.*.h5") + inputdir = f"/fefs/aswg/data/real/DL1/{lstObsDir}/{LST_version}/tailcut84" + if not os.path.exists( + f"{coincidence_DL1_dir}/DL1Coincident/{lstObsDir}" + ): + os.mkdir( + f"{coincidence_DL1_dir}/DL1Coincident/{lstObsDir}" ) - if os.path.exists(f"{outputdir}/logs/list_LST"): - with open( - f"{outputdir}/logs/list_LST", "a" - ) as LSTdataPathFile: - for subrun in list_of_subruns: - LSTdataPathFile.write( - f"{subrun}\n" - ) # If this files already exists, simply append the new information - else: - with open( - f"{outputdir}/logs/list_LST.txt", "w" - ) as f: # If the file list_LST.txt does not exist, it will be created here - for subrun in list_of_subruns: - f.write(f"{subrun}\n") - - if not os.path.exists(f"{outputdir}/logs/list_LST.txt"): - continue - process_size = ( - len( - np.genfromtxt( - f"{outputdir}/logs/list_LST.txt", dtype="str" - ) - ) - - 1 + if not os.path.exists( + f"{coincidence_DL1_dir}/DL1Coincident/{lstObsDir}/logs" + ): + os.mkdir( + f"{coincidence_DL1_dir}/DL1Coincident/{lstObsDir}/logs" ) - if process_size < 0: - continue - lines = [ - "#!/bin/sh\n\n", - "#SBATCH -p short\n", - f'#SBATCH -J {target_dir.split("/")[-2:][1]}_coincidence_{nsb}\n', - f"#SBATCH --array=0-{process_size}\n", - "#SBATCH --mem=30g\n", - "#SBATCH -N 1\n\n", - "ulimit -l unlimited\n", - "ulimit -s unlimited\n", - "ulimit -a\n\n", - f"export INM={MAGIC_DL1_dir}/Merged/Merged_{str(Y_M).zfill(4)}_{str(M_M).zfill(2)}_{str(D_M).zfill(2)}\n", - f"export OUTPUTDIR={outputdir}\n", - "SAMPLE_LIST=($(<$OUTPUTDIR/logs/list_LST.txt))\n", - "SAMPLE=${SAMPLE_LIST[${SLURM_ARRAY_TASK_ID}]}\n", - "export LOG=$OUTPUTDIR/logs/coincidence_${SLURM_ARRAY_TASK_ID}.log\n", - f"time conda run -n {env_name} lst1_magic_event_coincidence --input-file-lst $SAMPLE --input-dir-magic $INM --output-dir $OUTPUTDIR --config-file {target_dir}/config_coincidence.yaml >$LOG 2>&1", - ] + outputdir = f"{coincidence_DL1_dir}/DL1Coincident/{lstObsDir}" + list_of_subruns = np.sort( + glob.glob(f"{inputdir}/dl1*Run*{i[1]}*.*.h5") + ) + if os.path.exists(f"{outputdir}/logs/list_LST"): with open( - f"{source}_LST_coincident_{nsb}_{outputdir.split('/')[-1]}.sh", - "w", - ) as f: - f.writelines(lines) + f"{outputdir}/logs/list_LST", "a" + ) as LSTdataPathFile: + for subrun in list_of_subruns: + LSTdataPathFile.write( + f"{subrun}\n" + ) # If this files already exists, simply append the new information + else: + with open( + f"{outputdir}/logs/list_LST.txt", "w" + ) as f: # If the file list_LST.txt does not exist, it will be created here + for subrun in list_of_subruns: + f.write(f"{subrun}\n") + + if not os.path.exists(f"{outputdir}/logs/list_LST.txt"): + continue + process_size = ( + len( + np.genfromtxt( + f"{outputdir}/logs/list_LST.txt", dtype="str" + ) + ) + - 1 + ) + + if process_size < 0: + continue + lines = [ + "#!/bin/sh\n\n", + "#SBATCH -p short\n", + f'#SBATCH -J {source_name}_coincidence\n', + f"#SBATCH --array=0-{process_size}\n", + "#SBATCH --mem=30g\n", + "#SBATCH -N 1\n\n", + "ulimit -l unlimited\n", + "ulimit -s unlimited\n", + "ulimit -a\n\n", + f"export INM={MAGIC_DL1_dir}/Merged/Merged_{str(Y_M).zfill(4)}_{str(M_M).zfill(2)}_{str(D_M).zfill(2)}\n", + f"export OUTPUTDIR={outputdir}\n", + "SAMPLE_LIST=($(<$OUTPUTDIR/logs/list_LST.txt))\n", + "SAMPLE=${SAMPLE_LIST[${SLURM_ARRAY_TASK_ID}]}\n", + "export LOG=$OUTPUTDIR/logs/coincidence_${SLURM_ARRAY_TASK_ID}.log\n", + f"time conda run -n {env_name} lst1_magic_event_coincidence --input-file-lst $SAMPLE --input-dir-magic $INM --output-dir $OUTPUTDIR --config-file {target_dir}/v{__version__}/{source_name}/config_coincidence.yaml >$LOG 2>&1", + ] + with open( + f"{source_name}_LST_coincident_{outputdir.split('/')[-1]}.sh", + "w", + ) as f: + f.writelines(lines) else: - coincidence_DL1_dir = f"{target_dir}/DL1/Observations" + coincidence_DL1_dir = f"{target_dir}/{source_name}/DL1/Observations" if not os.path.exists(f"{coincidence_DL1_dir}/Coincident"): os.mkdir(f"{coincidence_DL1_dir}/Coincident") @@ -223,13 +227,13 @@ def linking_bash_lst( ) as f: # If the file list_LST.txt does not exist, it will be created here for subrun in list_of_subruns: f.write(f"{subrun}\n") - process_name = target_dir.split("/")[-2:][1] + process_name = source_name listOfNightsLST = np.sort( - glob.glob(f"{target_dir}/DL1/Observations/Coincident/*") + glob.glob(f"{target_dir}/{source_name}/DL1/Observations/Coincident/*") ) listOfNightsMAGIC = np.sort( - glob.glob(f"{target_dir}/DL1/Observations/Merged/Merged*") + glob.glob(f"{target_dir}/{source_name}/DL1/Observations/Merged/Merged*") ) for nightMAGIC, nightLST in zip(listOfNightsMAGIC, listOfNightsLST): @@ -252,7 +256,7 @@ def linking_bash_lst( "SAMPLE_LIST=($(<$OUTPUTDIR/list_LST.txt))\n", "SAMPLE=${SAMPLE_LIST[${SLURM_ARRAY_TASK_ID}]}\n", "export LOG=$OUTPUTDIR/coincidence_${SLURM_ARRAY_TASK_ID}.log\n", - f"conda run -n {env_name} lst1_magic_event_coincidence --input-file-lst $SAMPLE --input-dir-magic $INM --output-dir $OUTPUTDIR --config-file {target_dir}/config_coincidence.yaml >$LOG 2>&1", + f"conda run -n {env_name} lst1_magic_event_coincidence --input-file-lst $SAMPLE --input-dir-magic $INM --output-dir $OUTPUTDIR --config-file {target_dir}/{source_name}/config_coincidence.yaml >$LOG 2>&1", ] f.writelines(lines) @@ -280,55 +284,60 @@ def main(): config = yaml.safe_load(f) telescope_ids = list(config["mc_tel_ids"].values()) - target_dir = str( - Path(config["directories"]["workspace_dir"]) - / config["directories"]["target_name"] - ) + target_dir = Path(config["directories"]["workspace_dir"]) + NSB_match = config["general"]["NSB_matching"] env_name = config["general"]["env_name"] - source = config["directories"]["target_name"] LST_version = config["general"]["LST_version"] - print("***** Generating file config_coincidence.yaml...") - print("***** This file can be found in ", target_dir) - configfile_coincidence(telescope_ids, target_dir) - nsb = config["general"]["nsb"] - runs_all = config["general"]["LST_runs"] - date = np.genfromtxt(runs_all, dtype=str, delimiter=",") - if not NSB_match: - nsblvl = 0 - LST_runs_and_dates = config["general"]["LST_runs"] - LST_runs = np.genfromtxt(LST_runs_and_dates, dtype=str, delimiter=",") - print("***** Linking the paths to LST data files...") - print("***** Generating the bashscript...") - linking_bash_lst( - target_dir, LST_runs, nsblvl, date, source, LST_version, env_name, NSB_match - ) # linking the data paths to the current working directory + source = config["data_selection"]["source_name_output"] - print("***** Submitting processess to the cluster...") - print(f"Process name: {target_dir.split('/')[-2:][1]}_coincidence") - print( - f"To check the jobs submitted to the cluster, type: squeue -n {target_dir.split('/')[-2:][1]}_coincidence" - ) + source_list = [] + if source is not None: + source_list = joblib.load("list_sources.dat") - # Below we run the bash scripts to find the coincident events - list_of_coincidence_scripts = np.sort(glob.glob("LST_coincident*.sh")) + else: + source_list.append(source) + for source_name in source_list: + + print("***** Generating file config_coincidence.yaml...") + configfile_coincidence(telescope_ids, target_dir, source_name, NSB_match) + + + LST_runs_and_dates = f'{source_name}_LST_runs.txt' + LST_runs = np.genfromtxt(LST_runs_and_dates, dtype=str, delimiter=",") + + + if not NSB_match: + print("***** Linking the paths to LST data files...") + + print("***** Generating the bashscript...") + linking_bash_lst( + target_dir, LST_runs, source_name, LST_version, env_name, NSB_match + ) # linking the data paths to the current working directory + + print("***** Submitting processess to the cluster...") + print(f"Process name: {source_name}_coincidence") + print( + f"To check the jobs submitted to the cluster, type: squeue -n {source_name}_coincidence" + ) - for n, run in enumerate(list_of_coincidence_scripts): - if n == 0: - launch_jobs = f"coincidence{n}=$(sbatch --parsable {run})" - else: - launch_jobs = ( - f"{launch_jobs} && coincidence{n}=$(sbatch --parsable {run})" - ) + # Below we run the bash scripts to find the coincident events + list_of_coincidence_scripts = np.sort(glob.glob("LST_coincident*.sh")) - os.system(launch_jobs) - else: - for nsblvl in nsb: + for n, run in enumerate(list_of_coincidence_scripts): + if n == 0: + launch_jobs = f"coincidence{n}=$(sbatch --parsable {run})" + else: + launch_jobs = ( + f"{launch_jobs} && coincidence{n}=$(sbatch --parsable {run})" + ) + + os.system(launch_jobs) + else: + try: - LST_runs = np.genfromtxt( - f"{source}_LST_{nsblvl}_.txt", dtype=str, delimiter="," - ) + print("***** Linking the paths to LST data files...") @@ -336,9 +345,7 @@ def main(): linking_bash_lst( target_dir, LST_runs, - nsblvl, - date, - source, + source_name, LST_version, env_name, NSB_match, @@ -346,15 +353,15 @@ def main(): print("***** Submitting processess to the cluster...") print( - f'Process name: {target_dir.split("/")[-2:][1]}_coincidence_{nsblvl}' + f'Process name: {source_name}_coincidence' ) print( - f'To check the jobs submitted to the cluster, type: squeue -n {target_dir.split("/")[-2:][1]}_coincidence_{nsblvl}' + f'To check the jobs submitted to the cluster, type: squeue -n {source_name}_coincidence' ) # Below we run the bash scripts to find the coincident events list_of_coincidence_scripts = np.sort( - glob.glob(f"{source}_LST_coincident_{nsblvl}*.sh") + glob.glob(f"{source_name}_LST_coincident*.sh") ) if len(list_of_coincidence_scripts) < 1: continue diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py index f92d4a3b..dc3b2693 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py @@ -316,7 +316,7 @@ def lists_and_bash_gen_MAGIC( "SAMPLE_LIST=($(<$OUTPUTDIR/logs/list_dl0.txt))\n", "SAMPLE=${SAMPLE_LIST[${SLURM_ARRAY_TASK_ID}]}\n\n", "export LOG=$OUTPUTDIR/logs/real_0_1_task${SLURM_ARRAY_TASK_ID}.log\n", - f"time conda run -n {env_name} magic_calib_to_dl1 --input-file $SAMPLE --output-dir $OUTPUTDIR --config-file {target_dir}/{source}/config_DL0_to_DL1.yaml >$LOG 2>&1\n", + f"time conda run -n {env_name} magic_calib_to_dl1 --input-file $SAMPLE --output-dir $OUTPUTDIR --config-file {target_dir}/v{__version__}/{source}/config_DL0_to_DL1.yaml >$LOG 2>&1\n", ] with open(f"{source}_MAGIC-II_dl0_to_dl1_run_{i[1]}.sh", "w") as f: f.writelines(lines) @@ -337,11 +337,11 @@ def lists_and_bash_gen_MAGIC( "ulimit -l unlimited\n", "ulimit -s unlimited\n", "ulimit -a\n\n", - f"export OUTPUTDIR=v{__version__}/{target_dir}/{source}/DL1/M1/{i[0]}/{i[1]}\n", + f"export OUTPUTDIR={target_dir}/v{__version__}/{source}/DL1/M1/{i[0]}/{i[1]}\n", "SAMPLE_LIST=($(<$OUTPUTDIR/logs/list_dl0.txt))\n", "SAMPLE=${SAMPLE_LIST[${SLURM_ARRAY_TASK_ID}]}\n\n", "export LOG=$OUTPUTDIR/logs/real_0_1_task${SLURM_ARRAY_TASK_ID}.log\n", - f"time conda run -n {env_name} magic_calib_to_dl1 --input-file $SAMPLE --output-dir $OUTPUTDIR --config-file {target_dir}/{source}/config_DL0_to_DL1.yaml >$LOG 2>&1\n", + f"time conda run -n {env_name} magic_calib_to_dl1 --input-file $SAMPLE --output-dir $OUTPUTDIR --config-file {target_dir}/v{__version__}/{source}/config_DL0_to_DL1.yaml >$LOG 2>&1\n", ] with open(f"{source}_MAGIC-I_dl0_to_dl1_run_{i[1]}.sh", "w") as f: f.writelines(lines) From 36590ed9d7d0042fb0b08182983e5ee0b7dba6ec Mon Sep 17 00:00:00 2001 From: Elisa-Visentin Date: Thu, 2 May 2024 08:48:41 +0000 Subject: [PATCH 052/236] stereo --- .../coincident_events.py | 13 +- .../semi_automatic_scripts/stereo_events.py | 262 +++++++++--------- 2 files changed, 129 insertions(+), 146 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py index fb416f54..17fb4574 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py @@ -103,9 +103,8 @@ def linking_bash_lst( if not os.path.exists(f"{coincidence_DL1_dir}/DL1Coincident/"): os.mkdir(f"{coincidence_DL1_dir}/DL1Coincident") - MAGIC_DL1_dir = f"{target_dir}/v{__version__}/DL1" - if not os.path.exists(f"{coincidence_DL1_dir}/DL1Coincident"): - os.mkdir(f"{coincidence_DL1_dir}/DL1Coincident") + MAGIC_DL1_dir = f"{target_dir}/v{__version__}/{source_name}/DL1" + dates = [ os.path.basename(x) for x in glob.glob(f"{MAGIC_DL1_dir}/Merged/Merged_*") @@ -125,13 +124,7 @@ def linking_bash_lst( D_L = i[0].split("_")[2] day_LST = dtdt(int(Y_L), int(M_L), int(D_L)) if day_MAGIC == day_LST + delta: - if not os.path.exists( - f"{coincidence_DL1_dir}/DL1Coincident/" - ): - os.mkdir( - f"{coincidence_DL1_dir}/DL1Coincident/" - ) - + lstObsDir = ( i[0].split("_")[0] + i[0].split("_")[1] + i[0].split("_")[2] ) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py index 5557351d..3abdc6b8 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py @@ -21,6 +21,7 @@ import logging import os from pathlib import Path +import joblib import numpy as np import yaml @@ -34,7 +35,7 @@ logger.setLevel(logging.INFO) -def configfile_stereo(ids, target_dir): +def configfile_stereo(ids, target_dir, source_name, NSB_match): """ This function creates the configuration file needed for the event stereo step @@ -46,8 +47,11 @@ def configfile_stereo(ids, target_dir): target_dir : str Path to the working directory """ - - with open(f"{target_dir}/config_stereo.yaml", "w") as f: + if not NSB_match: + file_name=f"{target_dir}/{source_name}/config_stereo.yaml" + else: + file_name=f"{target_dir}/v{__version__}/{source_name}/config_stereo.yaml" + with open(file_name, "w") as f: lines = [ f"mc_tel_ids:\n LST-1: {ids[0]}\n LST-2: {ids[1]}\n LST-3: {ids[2]}\n LST-4: {ids[3]}\n MAGIC-I: {ids[4]}\n MAGIC-II: {ids[5]}\n\n", 'stereo_reco:\n quality_cuts: "(intensity > 50) & (width > 0)"\n theta_uplim: "6 arcmin"\n', @@ -56,7 +60,7 @@ def configfile_stereo(ids, target_dir): f.writelines(lines) -def bash_stereo(target_dir, nsb, source, env_name, NSB_match): +def bash_stereo(target_dir, source, env_name, NSB_match): """ This function generates the bashscript for running the stereo analysis. @@ -75,17 +79,17 @@ def bash_stereo(target_dir, nsb, source, env_name, NSB_match): If real data are matched to pre-processed MCs or not """ - process_name = target_dir.split("/")[-2:][1] + process_name = source if not NSB_match: - if not os.path.exists(f"{target_dir}/DL1/Observations/Coincident_stereo"): - os.mkdir(f"{target_dir}/DL1/Observations/Coincident_stereo") + if not os.path.exists(f"{target_dir}/{source}/DL1/Observations/Coincident_stereo"): + os.mkdir(f"{target_dir}/{source}/DL1/Observations/Coincident_stereo") listOfNightsLST = np.sort( - glob.glob(f"{target_dir}/DL1/Observations/Coincident/*") + glob.glob(f"{target_dir}/{source}/DL1/Observations/Coincident/*") ) for nightLST in listOfNightsLST: - stereoDir = f"{target_dir}/DL1/Observations/Coincident_stereo/{nightLST.split('/')[-1]}" + stereoDir = f"{target_dir}/{source}/DL1/Observations/Coincident_stereo/{nightLST.split('/')[-1]}" if not os.path.exists(stereoDir): os.mkdir(stereoDir) @@ -111,85 +115,65 @@ def bash_stereo(target_dir, nsb, source, env_name, NSB_match): "SAMPLE_LIST=($(<$INPUTDIR/list_coin.txt))\n", "SAMPLE=${SAMPLE_LIST[${SLURM_ARRAY_TASK_ID}]}\n", "export LOG=$OUTPUTDIR/stereo_${SLURM_ARRAY_TASK_ID}.log\n", - f"conda run -n {env_name} lst1_magic_stereo_reco --input-file $SAMPLE --output-dir $OUTPUTDIR --config-file {target_dir}/config_stereo.yaml >$LOG 2>&1", + f"conda run -n {env_name} lst1_magic_stereo_reco --input-file $SAMPLE --output-dir $OUTPUTDIR --config-file {target_dir}/{source}/config_stereo.yaml >$LOG 2>&1", ] f.writelines(lines) else: - if not os.path.exists(f"{target_dir}/v{__version__}/DL1CoincidentStereo"): - os.mkdir(f"{target_dir}/v{__version__}/DL1CoincidentStereo") + if not os.path.exists(f"{target_dir}/v{__version__}/{source}/DL1CoincidentStereo"): + os.mkdir(f"{target_dir}/v{__version__}/{source}/DL1CoincidentStereo") - ST_list = [ - os.path.basename(x) - for x in glob.glob(f"{target_dir}/v{__version__}/DL1Coincident/*") - ] + - for p in ST_list: - if not os.path.exists( - f"{target_dir}/v{__version__}/DL1CoincidentStereo/{p}" - ): - os.mkdir(f"{target_dir}/v{__version__}/DL1CoincidentStereo/{p}") - - if ( - not os.path.exists( - f"{target_dir}/v{__version__}/DL1CoincidentStereo/{p}/NSB{nsb}" - ) - ) and ( - os.path.exists( - f"{target_dir}/v{__version__}/DL1Coincident/{p}/NSB{nsb}" - ) - ): - os.mkdir( - f"{target_dir}/v{__version__}/DL1CoincidentStereo/{p}/NSB{nsb}" - ) - listOfNightsLST = np.sort( - glob.glob(f"{target_dir}/v{__version__}/DL1Coincident/{p}/NSB{nsb}/*") - ) - for nightLST in listOfNightsLST: - stereoDir = f'{target_dir}/v{__version__}/DL1CoincidentStereo/{p}/NSB{nsb}/{nightLST.split("/")[-1]}' - if not os.path.exists(stereoDir): - os.mkdir(stereoDir) - if not os.path.exists(f"{stereoDir}/logs"): - os.mkdir(f"{stereoDir}/logs") - if not os.listdir(f"{nightLST}"): - continue - if len(os.listdir(nightLST)) < 2: - continue - os.system( - f"ls {nightLST}/*LST*.h5 > {stereoDir}/logs/list_coin_{nsb}.txt" - ) # generating a list with the DL1 coincident data files. - process_size = ( - len( - np.genfromtxt( - f"{stereoDir}/logs/list_coin_{nsb}.txt", dtype="str" - ) + + listOfNightsLST = np.sort( + glob.glob(f"{target_dir}/v{__version__}/{source}/DL1Coincident/*") + ) + for nightLST in listOfNightsLST: + stereoDir = f'{target_dir}/v{__version__}/{source}/DL1CoincidentStereo/{nightLST.split("/")[-1]}' + if not os.path.exists(stereoDir): + os.mkdir(stereoDir) + if not os.path.exists(f"{stereoDir}/logs"): + os.mkdir(f"{stereoDir}/logs") + if not os.listdir(f"{nightLST}"): + continue + if len(os.listdir(nightLST)) < 2: + continue + os.system( + f"ls {nightLST}/*LST*.h5 > {stereoDir}/logs/list_coin.txt" + ) # generating a list with the DL1 coincident data files. + process_size = ( + len( + np.genfromtxt( + f"{stereoDir}/logs/list_coin.txt", dtype="str" ) - - 1 ) - if process_size < 0: - continue - lines = [ - "#!/bin/sh\n\n", - "#SBATCH -p short\n", - f"#SBATCH -J {process_name}_stereo_{nsb}\n", - f"#SBATCH --array=0-{process_size}\n", - "#SBATCH -N 1\n\n", - "ulimit -l unlimited\n", - "ulimit -s unlimited\n", - "ulimit -a\n\n", - f"export INPUTDIR={nightLST}\n", - f"export OUTPUTDIR={stereoDir}\n", - f"SAMPLE_LIST=($(<$OUTPUTDIR/logs/list_coin_{nsb}.txt))\n", - "SAMPLE=${SAMPLE_LIST[${SLURM_ARRAY_TASK_ID}]}\n", - "export LOG=$OUTPUTDIR/logs/stereo_${SLURM_ARRAY_TASK_ID}.log\n", - f"time conda run -n {env_name} lst1_magic_stereo_reco --input-file $SAMPLE --output-dir $OUTPUTDIR --config-file {target_dir}/config_stereo.yaml >$LOG 2>&1", - ] - with open( - f"{source}_StereoEvents_{nsb}_{nightLST.split('/')[-1]}.sh", "w" - ) as f: - f.writelines(lines) + - 1 + ) + if process_size < 0: + continue + lines = [ + "#!/bin/sh\n\n", + "#SBATCH -p short\n", + f"#SBATCH -J {process_name}_stereo\n", + f"#SBATCH --array=0-{process_size}\n", + "#SBATCH -N 1\n\n", + "ulimit -l unlimited\n", + "ulimit -s unlimited\n", + "ulimit -a\n\n", + f"export INPUTDIR={nightLST}\n", + f"export OUTPUTDIR={stereoDir}\n", + f"SAMPLE_LIST=($(<$OUTPUTDIR/logs/list_coin.txt))\n", + "SAMPLE=${SAMPLE_LIST[${SLURM_ARRAY_TASK_ID}]}\n", + "export LOG=$OUTPUTDIR/logs/stereo_${SLURM_ARRAY_TASK_ID}.log\n", + f"time conda run -n {env_name} lst1_magic_stereo_reco --input-file $SAMPLE --output-dir $OUTPUTDIR --config-file {target_dir}/v{__version__}/{source}/config_stereo.yaml >$LOG 2>&1", + ] + with open( + f"{source}_StereoEvents_{nightLST.split('/')[-1]}.sh", "w" + ) as f: + f.writelines(lines) -def bash_stereoMC(target_dir, identification, env_name): +def bash_stereoMC(target_dir, identification, env_name, source): """ This function generates the bashscript for running the stereo analysis. @@ -204,12 +188,12 @@ def bash_stereoMC(target_dir, identification, env_name): Name of the environment """ - process_name = target_dir.split("/")[-2:][1] + process_name = source - if not os.path.exists(f"{target_dir}/DL1/MC/{identification}/Merged/StereoMerged"): - os.mkdir(f"{target_dir}/DL1/MC/{identification}/Merged/StereoMerged") + if not os.path.exists(f"{target_dir}/{source}/DL1/MC/{identification}/Merged/StereoMerged"): + os.mkdir(f"{target_dir}/{source}/DL1/MC/{identification}/Merged/StereoMerged") - inputdir = f"{target_dir}/DL1/MC/{identification}/Merged" + inputdir = f"{target_dir}/{source}/DL1/MC/{identification}/Merged" os.system( f"ls {inputdir}/dl1*.h5 > {inputdir}/list_coin.txt" @@ -232,7 +216,7 @@ def bash_stereoMC(target_dir, identification, env_name): "SAMPLE_LIST=($(<$INPUTDIR/list_coin.txt))\n", "SAMPLE=${SAMPLE_LIST[${SLURM_ARRAY_TASK_ID}]}\n", "export LOG=$OUTPUTDIR/stereo_${SLURM_ARRAY_TASK_ID}.log\n", - f"conda run -n {env_name} lst1_magic_stereo_reco --input-file $SAMPLE --output-dir $OUTPUTDIR --config-file {target_dir}/config_stereo.yaml >$LOG 2>&1", + f"conda run -n {env_name} lst1_magic_stereo_reco --input-file $SAMPLE --output-dir $OUTPUTDIR --config-file {target_dir}/{source}/config_stereo.yaml >$LOG 2>&1", ] f.writelines(lines) @@ -269,56 +253,40 @@ def main(): ) as f: # "rb" mode opens the file in binary format for reading config = yaml.safe_load(f) - target_dir = f'{Path(config["directories"]["workspace_dir"])}/{config["directories"]["target_name"]}' + target_dir = Path(config["directories"]["workspace_dir"]) env_name = config["general"]["env_name"] - source = config["directories"]["target_name"] + NSB_match = config["general"]["NSB_matching"] telescope_ids = list(config["mc_tel_ids"].values()) + source = config["data_selection"]["source_name_output"] - print("***** Generating file config_stereo.yaml...") - print("***** This file can be found in ", target_dir) - configfile_stereo(telescope_ids, target_dir) - - # Below we run the analysis on the MC data - if ( - (args.analysis_type == "onlyMC") - or (args.analysis_type == "doEverything") - and not NSB_match - ): - print("***** Generating the bashscript for MCs...") - bash_stereoMC(target_dir, "gammadiffuse", env_name) - bash_stereoMC(target_dir, "gammas", env_name) - bash_stereoMC(target_dir, "protons", env_name) - bash_stereoMC(target_dir, "protons_test", env_name) - - list_of_stereo_scripts = np.sort(glob.glob("StereoEvents_MC_*.sh")) - - for n, run in enumerate(list_of_stereo_scripts): - if n == 0: - launch_jobs = f"stereo{n}=$(sbatch --parsable {run})" - else: - launch_jobs = f"{launch_jobs} && stereo{n}=$(sbatch --parsable --dependency=afterany:$stereo{n-1} {run})" - - os.system(launch_jobs) - - # Below we run the analysis on the real data - if not NSB_match: - nsb = 0 + source_list = [] + if source is not None: + source_list = joblib.load("list_sources.dat") + + else: + source_list.append(source) + for source_name in source_list: + + + print("***** Generating file config_stereo.yaml...") + configfile_stereo(telescope_ids, target_dir, source_name, NSB_match) + + # Below we run the analysis on the MC data if ( - (args.analysis_type == "onlyReal") + (args.analysis_type == "onlyMC") or (args.analysis_type == "doEverything") - or NSB_match + and not NSB_match ): - print("***** Generating the bashscript for real data...") - bash_stereo(target_dir, nsb, source, env_name, NSB_match) + print("***** Generating the bashscript for MCs...") + bash_stereoMC(target_dir, "gammadiffuse", env_name, source_name) + bash_stereoMC(target_dir, "gammas", env_name, source_name) + bash_stereoMC(target_dir, "protons", env_name, source_name) + bash_stereoMC(target_dir, "protons_test", env_name, source_name) + + list_of_stereo_scripts = np.sort(glob.glob("StereoEvents_MC_*.sh")) - list_of_stereo_scripts = np.sort(glob.glob("StereoEvents_real_*.sh")) - print("***** Submitting processes to the cluster...") - print(f"Process name: {target_dir.split('/')[-2:][1]}_stereo") - print( - f"To check the jobs submitted to the cluster, type: squeue -n {target_dir.split('/')[-2:][1]}_stereo" - ) for n, run in enumerate(list_of_stereo_scripts): if n == 0: launch_jobs = f"stereo{n}=$(sbatch --parsable {run})" @@ -327,28 +295,50 @@ def main(): os.system(launch_jobs) - else: - listnsb = np.sort(glob.glob(f"{source}_LST_*_.txt")) - nsb = [] - for f in listnsb: - nsb.append(f.split("_")[-2]) + # Below we run the analysis on the real data + if not NSB_match: + + if ( + (args.analysis_type == "onlyReal") + or (args.analysis_type == "doEverything") + or NSB_match + ): + print("***** Generating the bashscript for real data...") + bash_stereo(target_dir, source_name, env_name, NSB_match) + + list_of_stereo_scripts = np.sort(glob.glob("StereoEvents_real_*.sh")) + print("***** Submitting processes to the cluster...") + print(f"Process name: {source_name}_stereo") + print( + f"To check the jobs submitted to the cluster, type: squeue -n {source_name}_stereo" + ) + for n, run in enumerate(list_of_stereo_scripts): + if n == 0: + launch_jobs = f"stereo{n}=$(sbatch --parsable {run})" + else: + launch_jobs = f"{launch_jobs} && stereo{n}=$(sbatch --parsable --dependency=afterany:$stereo{n-1} {run})" - for nsblvl in nsb: + os.system(launch_jobs) + + else: + + + print("***** Generating the bashscript...") - bash_stereo(target_dir, nsblvl, source, env_name, NSB_match) + bash_stereo(target_dir, source_name, env_name, NSB_match) print("***** Submitting processess to the cluster...") - print(f'Process name: {target_dir.split("/")[-2:][1]}_stereo_{nsblvl}') + print(f'Process name: {source_name}_stereo') print( - f'To check the jobs submitted to the cluster, type: squeue -n {target_dir.split("/")[-2:][1]}_stereo_{nsblvl}' + f'To check the jobs submitted to the cluster, type: squeue -n {source_name}_stereo' ) # Below we run the bash scripts to find the stereo events list_of_stereo_scripts = np.sort( - glob.glob(f"{source}_StereoEvents_{nsblvl}*.sh") + glob.glob(f"{source_name}_StereoEvents*.sh") ) if len(list_of_stereo_scripts) < 1: - continue + return for n, run in enumerate(list_of_stereo_scripts): if n == 0: launch_jobs = f"stereo{n}=$(sbatch --parsable {run})" From 1370c5746d63eb2f6b817dc383df5acb430b65d7 Mon Sep 17 00:00:00 2001 From: joannaWojS <167758973+joannaWojS@users.noreply.github.com> Date: Thu, 2 May 2024 10:54:52 +0200 Subject: [PATCH 053/236] Update check_MAGIC_runs.py --- .../check_MAGIC_runs.py | 27 ++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/check_MAGIC_runs.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/check_MAGIC_runs.py index f9a4cad2..e4d3f8f5 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/check_MAGIC_runs.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/check_MAGIC_runs.py @@ -12,8 +12,27 @@ import os import re -file_path = '/fefs/aswg/workspace/joanna.wojtowicz/data/magic_first_and_last_runs.csv' -df = pd.read_csv(file_path,sep='\t', dtype={'Date (LST conv.)': str, 'Source': str, 'First run': int, 'Last run': int}) +def table_first_last_run(df): + df_selected_data = df.iloc[:, [2, 1, 5, 6]] + df_selected_data.columns = ['DATE','source', 'MAGIC_first_run', 'MAGIC_last_run'] + grouped_data = df_selected_data.groupby(['DATE', 'source']) + + result_table = [] + + for (date, source), group in grouped_data: + First_run = group['MAGIC_first_run'].min() + Last_run = group['MAGIC_last_run'].max() + + result_table.append({ + 'Date (LST conv.)': date, + 'Source': source, + 'First run': First_run, + 'Last run': Last_run + }) + + result = pd.DataFrame(result_table) + + return(result) def check_run_ID(path, filename, first_run, last_run, date, source): Y = f'_Y_{source}' @@ -75,11 +94,13 @@ def check_directory(date, source, first_run, last_run, tel_id): for result, count in results_count.items(): print(f"M{tel_id} \t {result} \t {count}") +df = pd.read_hdf( '/fefs/aswg/workspace/federico.dipierro/simultaneous_obs_summary.h5', key='str/table') +database = table_first_last_run(df) tel_id = [1, 2] for tel in tel_id: print() print(f"Telescope ID \t Date (LST convention) \t Source \t Run ID \t Subruns") - for index, row in df.iterrows(): + for index, row in database.iterrows(): check_directory(row['Date (LST conv.)'], row['Source'], row['First run'], row['Last run'], tel) From ff5d52c4664522de1c2dc3b5c8883dd9640b843e Mon Sep 17 00:00:00 2001 From: Elisa-Visentin Date: Fri, 3 May 2024 06:55:18 +0000 Subject: [PATCH 054/236] pre-commit + both h5 + minor fixes --- .../coincident_events.py | 69 ++++++++--------- .../database_production/__init__.py | 1 - .../database_production/create_LST_table.py | 6 +- .../semi_automatic_scripts/list_from_h5.py | 2 +- .../semi_automatic_scripts/merging_runs.py | 54 ++++++-------- .../setting_up_config_and_dir.py | 74 ++++++++++++++----- .../semi_automatic_scripts/stereo_events.py | 57 +++++++------- 7 files changed, 140 insertions(+), 123 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py index 17fb4574..da2b1cfc 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py @@ -16,11 +16,11 @@ import glob import logging import os -import joblib from datetime import date as dtdt from datetime import timedelta from pathlib import Path +import joblib import numpy as np import yaml @@ -44,6 +44,10 @@ def configfile_coincidence(ids, target_dir, source_name, NSB_match): List of telescope IDs target_dir : str Path to the working directory + source_name : str + Name of the target source + NSB_match : bool + If real data are matched to pre-processed MCs or not """ lines = [ @@ -52,10 +56,10 @@ def configfile_coincidence(ids, target_dir, source_name, NSB_match): ' time_offset:\n start: "-10 us"\n stop: "0 us"\n', ] if not NSB_match: - file_name=f"{target_dir}/{source_name}/config_coincidence.yaml" - else: - file_name=f"{target_dir}/v{__version__}/{source_name}/config_coincidence.yaml" - + file_name = f"{target_dir}/{source_name}/config_coincidence.yaml" + else: + file_name = f"{target_dir}/v{__version__}/{source_name}/config_coincidence.yaml" + with open(file_name, "w") as f: f.writelines(lines) @@ -72,11 +76,7 @@ def linking_bash_lst( target_dir : str Path to the working directory LST_runs : matrix of strings - This matrix is imported from config_general.yaml and tells the function where to find the LST data and link them to our working directory - nsb : int - NSB level - date : numpy array - Array of lists [date run] for all the LST runs (before the NSB splitting) + This matrix ([['date','run'],['date','run']...]) is imported from *_LST_runs.txt files and tells the function where to find the LST data and link them to our working directory source_name : str Target name LST_version : str @@ -87,7 +87,6 @@ def linking_bash_lst( If real data are matched to pre-processed MCs or not """ - if (len(LST_runs) == 2) and (len(LST_runs[0]) == 10): LST = LST_runs @@ -102,14 +101,13 @@ def linking_bash_lst( coincidence_DL1_dir = f"{target_dir}/v{__version__}/{source_name}" if not os.path.exists(f"{coincidence_DL1_dir}/DL1Coincident/"): os.mkdir(f"{coincidence_DL1_dir}/DL1Coincident") - + MAGIC_DL1_dir = f"{target_dir}/v{__version__}/{source_name}/DL1" - + dates = [ - os.path.basename(x) - for x in glob.glob(f"{MAGIC_DL1_dir}/Merged/Merged_*") + os.path.basename(x) for x in glob.glob(f"{MAGIC_DL1_dir}/Merged/Merged_*") ] - + for d in dates: Y_M = int(d.split("_")[1]) M_M = int(d.split("_")[2]) @@ -124,18 +122,18 @@ def linking_bash_lst( D_L = i[0].split("_")[2] day_LST = dtdt(int(Y_L), int(M_L), int(D_L)) if day_MAGIC == day_LST + delta: - + lstObsDir = ( i[0].split("_")[0] + i[0].split("_")[1] + i[0].split("_")[2] ) - inputdir = f"/fefs/aswg/data/real/DL1/{lstObsDir}/{LST_version}/tailcut84" + inputdir = ( + f"/fefs/aswg/data/real/DL1/{lstObsDir}/{LST_version}/tailcut84" + ) if not os.path.exists( f"{coincidence_DL1_dir}/DL1Coincident/{lstObsDir}" ): - os.mkdir( - f"{coincidence_DL1_dir}/DL1Coincident/{lstObsDir}" - ) + os.mkdir(f"{coincidence_DL1_dir}/DL1Coincident/{lstObsDir}") if not os.path.exists( f"{coincidence_DL1_dir}/DL1Coincident/{lstObsDir}/logs" ): @@ -148,9 +146,7 @@ def linking_bash_lst( glob.glob(f"{inputdir}/dl1*Run*{i[1]}*.*.h5") ) if os.path.exists(f"{outputdir}/logs/list_LST"): - with open( - f"{outputdir}/logs/list_LST", "a" - ) as LSTdataPathFile: + with open(f"{outputdir}/logs/list_LST", "a") as LSTdataPathFile: for subrun in list_of_subruns: LSTdataPathFile.write( f"{subrun}\n" @@ -166,9 +162,7 @@ def linking_bash_lst( continue process_size = ( len( - np.genfromtxt( - f"{outputdir}/logs/list_LST.txt", dtype="str" - ) + np.genfromtxt(f"{outputdir}/logs/list_LST.txt", dtype="str") ) - 1 ) @@ -178,7 +172,7 @@ def linking_bash_lst( lines = [ "#!/bin/sh\n\n", "#SBATCH -p short\n", - f'#SBATCH -J {source_name}_coincidence\n', + f"#SBATCH -J {source_name}_coincidence\n", f"#SBATCH --array=0-{process_size}\n", "#SBATCH --mem=30g\n", "#SBATCH -N 1\n\n", @@ -278,7 +272,7 @@ def main(): telescope_ids = list(config["mc_tel_ids"].values()) target_dir = Path(config["directories"]["workspace_dir"]) - + NSB_match = config["general"]["NSB_matching"] env_name = config["general"]["env_name"] LST_version = config["general"]["LST_version"] @@ -295,12 +289,10 @@ def main(): print("***** Generating file config_coincidence.yaml...") configfile_coincidence(telescope_ids, target_dir, source_name, NSB_match) - - - LST_runs_and_dates = f'{source_name}_LST_runs.txt' + + LST_runs_and_dates = f"{source_name}_LST_runs.txt" LST_runs = np.genfromtxt(LST_runs_and_dates, dtype=str, delimiter=",") - - + if not NSB_match: print("***** Linking the paths to LST data files...") @@ -328,9 +320,8 @@ def main(): os.system(launch_jobs) else: - + try: - print("***** Linking the paths to LST data files...") @@ -345,11 +336,9 @@ def main(): ) # linking the data paths to current working directory print("***** Submitting processess to the cluster...") + print(f"Process name: {source_name}_coincidence") print( - f'Process name: {source_name}_coincidence' - ) - print( - f'To check the jobs submitted to the cluster, type: squeue -n {source_name}_coincidence' + f"To check the jobs submitted to the cluster, type: squeue -n {source_name}_coincidence" ) # Below we run the bash scripts to find the coincident events diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/__init__.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/__init__.py index cd61131b..59ca9c10 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/__init__.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/__init__.py @@ -1,4 +1,3 @@ - from .LSTnsb import nsb from .nsb_level import bash_scripts diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/create_LST_table.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/create_LST_table.py index 39842aba..b713dfde 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/create_LST_table.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/create_LST_table.py @@ -19,7 +19,11 @@ def main(): df = pd.read_hdf( "/fefs/aswg/workspace/federico.dipierro/simultaneous_obs_summary.h5", key="/str" ) # TODO: put this file in a shared folder - + df2 = pd.read_hdf( + "/home/alessio.berti/MAGIC-LST_common/runfile/simultaneous_obs_summary.h5", + key="/str", + ) # TODO: put this file in a shared folder + df = pd.concat([df, df2]).drop_duplicates(subset="LST1_run", keep="first") needed_cols = [ "source", "DATE", diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/list_from_h5.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/list_from_h5.py index e5c3c57b..27db6811 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/list_from_h5.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/list_from_h5.py @@ -124,7 +124,7 @@ def list_run(source_in, source_out, df, skip_LST, skip_MAGIC, is_LST, M1_run_lis else: source_list.append(source_out) - + for source_name in source_list: file_list = [ f"{source_name}_LST_runs.txt", diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py index 99fee5ed..e83a7b71 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py @@ -33,8 +33,8 @@ import logging import os from pathlib import Path -import joblib +import joblib import numpy as np import yaml from tqdm import tqdm @@ -82,6 +82,8 @@ def split_train_test(target_dir, train_fraction, source_name): Path to the working directory train_fraction : float Fraction of proton MC files to be used in the training RF dataset + source_name : str + Name of the target source """ proton_dir = f"{target_dir}/{source_name}/DL1/MC/protons" @@ -219,8 +221,8 @@ def merge(target_dir, identification, MAGIC_runs, env_name, source, NSB_match): ) else: - - process_name = f'merging_{source}' + + process_name = f"merging_{source}" MAGIC_DL1_dir = f"{target_dir}/v{__version__}/{source}/DL1/" @@ -244,8 +246,7 @@ def merge(target_dir, identification, MAGIC_runs, env_name, source, NSB_match): if os.path.exists(f"{MAGIC_DL1_dir}/M1"): dates = [ - os.path.basename(x) - for x in glob.glob(f"{MAGIC_DL1_dir}/M1/*") + os.path.basename(x) for x in glob.glob(f"{MAGIC_DL1_dir}/M1/*") ] for i in dates: runs = [ @@ -257,9 +258,7 @@ def merge(target_dir, identification, MAGIC_runs, env_name, source, NSB_match): f"{MAGIC_DL1_dir}/Merged/{i}" ) # Creating a merged directory for the respective night for r in runs: - if not os.path.exists( - f"{MAGIC_DL1_dir}/Merged/{i}/{r}" - ): + if not os.path.exists(f"{MAGIC_DL1_dir}/Merged/{i}/{r}"): os.mkdir( f"{MAGIC_DL1_dir}/Merged/{i}/{r}" ) # Creating a merged directory for the respective run @@ -276,8 +275,7 @@ def merge(target_dir, identification, MAGIC_runs, env_name, source, NSB_match): if os.path.exists(f"{MAGIC_DL1_dir}/M2"): dates = [ - os.path.basename(x) - for x in glob.glob(f"{MAGIC_DL1_dir}/M2/*") + os.path.basename(x) for x in glob.glob(f"{MAGIC_DL1_dir}/M2/*") ] for i in dates: @@ -290,9 +288,7 @@ def merge(target_dir, identification, MAGIC_runs, env_name, source, NSB_match): f"{MAGIC_DL1_dir}/Merged/{i}" ) # Creating a merged directory for the respective night for r in runs: - if not os.path.exists( - f"{MAGIC_DL1_dir}/Merged/{i}/{r}" - ): + if not os.path.exists(f"{MAGIC_DL1_dir}/Merged/{i}/{r}"): os.mkdir( f"{MAGIC_DL1_dir}/Merged/{i}/{r}" ) # Creating a merged directory for the respective run @@ -311,8 +307,7 @@ def merge(target_dir, identification, MAGIC_runs, env_name, source, NSB_match): f"{MAGIC_DL1_dir}/M2" ): dates = [ - os.path.basename(x) - for x in glob.glob(f"{MAGIC_DL1_dir}/M1/*") + os.path.basename(x) for x in glob.glob(f"{MAGIC_DL1_dir}/M1/*") ] for i in dates: runs = [ @@ -320,9 +315,7 @@ def merge(target_dir, identification, MAGIC_runs, env_name, source, NSB_match): for x in glob.glob(f"{MAGIC_DL1_dir}/M2/{i}/*") ] for r in runs: - if ( - len(glob.glob(f"{MAGIC_DL1_dir}/M1/{i}/{r}")) > 0 - ) and ( + if (len(glob.glob(f"{MAGIC_DL1_dir}/M1/{i}/{r}")) > 0) and ( len(glob.glob(f"{MAGIC_DL1_dir}/M2/{i}/{r}")) ) > 0: if not os.path.exists( @@ -332,9 +325,7 @@ def merge(target_dir, identification, MAGIC_runs, env_name, source, NSB_match): if not os.path.exists( f"{MAGIC_DL1_dir}/Merged/{i}/Merged/logs" ): - os.mkdir( - f"{MAGIC_DL1_dir}/Merged/{i}/Merged/logs" - ) + os.mkdir(f"{MAGIC_DL1_dir}/Merged/{i}/Merged/logs") f.write( f"time conda run -n {env_name} merge_hdf_files --input-dir {MAGIC_DL1_dir}/Merged/{i}/{r} --output-dir {MAGIC_DL1_dir}/Merged/{i}/Merged --run-wise >{MAGIC_DL1_dir}/Merged/{i}/Merged/logs/merge_{i}.log \n" ) @@ -349,9 +340,7 @@ def merge(target_dir, identification, MAGIC_runs, env_name, source, NSB_match): os.mkdir( f"{MAGIC_DL1_dir}/Merged/Merged_{i}" ) # Creating a merged directory for each night - if not os.path.exists( - f"{MAGIC_DL1_dir}/Merged/Merged_{i}/logs" - ): + if not os.path.exists(f"{MAGIC_DL1_dir}/Merged/Merged_{i}/logs"): os.mkdir(f"{MAGIC_DL1_dir}/Merged/Merged_{i}/logs") f.write( f"time conda run -n {env_name} merge_hdf_files --input-dir {MAGIC_DL1_dir}/Merged/{i}/Merged --output-dir {MAGIC_DL1_dir}/Merged/Merged_{i} >{MAGIC_DL1_dir}/Merged/Merged_{i}/logs/merge_night_{i}.log \n" @@ -373,6 +362,8 @@ def mergeMC(target_dir, identification, env_name, cwd, source_name): Name of the environment cwd : Path Current working directory + source_name : str + Name of the target source """ process_name = f"merging_{source_name}" @@ -449,10 +440,9 @@ def main(): target_dir = Path(config["directories"]["workspace_dir"]) - NSB_match = config["general"]["NSB_matching"] train_fraction = float(config["general"]["proton_train_fraction"]) - + env_name = config["general"]["env_name"] source = config["data_selection"]["source_name_output"] @@ -463,13 +453,17 @@ def main(): else: source_list.append(source) for source_name in source_list: - # Below we run the analysis on the MC data - MAGIC_runs_and_dates = f'{source_name}_MAGIC_runs.txt' + # Below we run the analysis on the MC data + MAGIC_runs_and_dates = f"{source_name}_MAGIC_runs.txt" MAGIC_runs = np.genfromtxt(MAGIC_runs_and_dates, dtype=str, delimiter=",") if not NSB_match: - if (args.analysis_type == "onlyMC") or (args.analysis_type == "doEverything"): + if (args.analysis_type == "onlyMC") or ( + args.analysis_type == "doEverything" + ): # Here we slice the proton MC data into "train" and "test" (but first we check if the directory already exists): - if not os.path.exists(f"{target_dir}/{source_name}/DL1/MC/protons_test"): + if not os.path.exists( + f"{target_dir}/{source_name}/DL1/MC/protons_test" + ): print("***** Splitting protons into 'train' and 'test' datasets...") split_train_test(target_dir, train_fraction, source_name) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py index dc3b2693..9d82acb8 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py @@ -15,13 +15,13 @@ """ import argparse import glob -import joblib import logging import os # import time from pathlib import Path +import joblib import numpy as np import yaml @@ -55,6 +55,8 @@ def config_file_gen(ids, target_dir, noise_value, NSB_match, source_name): List of the noise correction values for LST NSB_match : bool If real data are matched to pre-processed MCs or not + source_name : str + Name of the target source """ """ @@ -87,9 +89,9 @@ def config_file_gen(ids, target_dir, noise_value, NSB_match, source_name): conf["MAGIC"] = MAGIC_config if not NSB_match: - file_name=f"{target_dir}/{source_name}/config_DL0_to_DL1.yaml" - else: - file_name=f"{target_dir}/v{__version__}/{source_name}/config_DL0_to_DL1.yaml" + file_name = f"{target_dir}/{source_name}/config_DL0_to_DL1.yaml" + else: + file_name = f"{target_dir}/v{__version__}/{source_name}/config_DL0_to_DL1.yaml" with open(file_name, "w") as f: lines = [ "mc_tel_ids:", @@ -106,7 +108,13 @@ def config_file_gen(ids, target_dir, noise_value, NSB_match, source_name): def lists_and_bash_generator( - particle_type, target_dir, MC_path, SimTel_version, focal_length, env_name, source_name + particle_type, + target_dir, + MC_path, + SimTel_version, + focal_length, + env_name, + source_name, ): """ @@ -128,6 +136,8 @@ def lists_and_bash_generator( Focal length to be used to process MCs (e.g., 'nominal') env_name : str Name of the environment + source_name : str + Name of the target source """ if MC_path == "": @@ -401,7 +411,9 @@ def lists_and_bash_gen_MAGIC( f.writelines(lines) -def directories_generator(target_dir, telescope_ids, MAGIC_runs, NSB_match, source_name): +def directories_generator( + target_dir, telescope_ids, MAGIC_runs, NSB_match, source_name +): """ Here we create all subdirectories for a given workspace and target name. @@ -416,6 +428,8 @@ def directories_generator(target_dir, telescope_ids, MAGIC_runs, NSB_match, sour MAGIC dates and runs to be processed NSB_match : bool If real data are matched to pre-processed MCs or not + source_name : str + Name of the target source """ if NSB_match: @@ -495,21 +509,37 @@ def directories_generator(target_dir, telescope_ids, MAGIC_runs, NSB_match, sour if not os.path.exists(f"{target_dir}/{source_name}/DL1/Observations/M2"): os.mkdir(f"{target_dir}/{source_name}/DL1/Observations/M2") for i in MAGIC_runs: - if not os.path.exists(f"{target_dir}/{source_name}/DL1/Observations/M2/{i[0]}"): - os.mkdir(f"{target_dir}/{source_name}/DL1/Observations/M2/{i[0]}") - os.mkdir(f"{target_dir}/{source_name}/DL1/Observations/M2/{i[0]}/{i[1]}") + if not os.path.exists( + f"{target_dir}/{source_name}/DL1/Observations/M2/{i[0]}" + ): + os.mkdir( + f"{target_dir}/{source_name}/DL1/Observations/M2/{i[0]}" + ) + os.mkdir( + f"{target_dir}/{source_name}/DL1/Observations/M2/{i[0]}/{i[1]}" + ) else: - os.mkdir(f"{target_dir}/{source_name}/DL1/Observations/M2/{i[0]}/{i[1]}") + os.mkdir( + f"{target_dir}/{source_name}/DL1/Observations/M2/{i[0]}/{i[1]}" + ) if telescope_ids[-2] > 0: if not os.path.exists(f"{target_dir}/{source_name}/DL1/Observations/M1"): os.mkdir(f"{target_dir}/{source_name}/DL1/Observations/M1") for i in MAGIC_runs: - if not os.path.exists(f"{target_dir}/{source_name}/DL1/Observations/M1/{i[0]}"): - os.mkdir(f"{target_dir}/{source_name}/DL1/Observations/M1/{i[0]}") - os.mkdir(f"{target_dir}/{source_name}/DL1/Observations/M1/{i[0]}/{i[1]}") + if not os.path.exists( + f"{target_dir}/{source_name}/DL1/Observations/M1/{i[0]}" + ): + os.mkdir( + f"{target_dir}/{source_name}/DL1/Observations/M1/{i[0]}" + ) + os.mkdir( + f"{target_dir}/{source_name}/DL1/Observations/M1/{i[0]}/{i[1]}" + ) else: - os.mkdir(f"{target_dir}/{source_name}/DL1/Observations/M1/{i[0]}/{i[1]}") + os.mkdir( + f"{target_dir}/{source_name}/DL1/Observations/M1/{i[0]}/{i[1]}" + ) def main(): @@ -548,7 +578,6 @@ def main(): SimTel_version = config["general"]["SimTel_version"] env_name = config["general"]["env_name"] NSB_match = config["general"]["NSB_matching"] - # LST_runs_and_dates = config["general"]["LST_runs"] MC_gammas = str(Path(config["directories"]["MC_gammas"])) @@ -568,11 +597,11 @@ def main(): for source_name in source_list: target_dir = Path(config["directories"]["workspace_dir"]) - MAGIC_runs_and_dates = f'{source_name}_MAGIC_runs.txt' + MAGIC_runs_and_dates = f"{source_name}_MAGIC_runs.txt" MAGIC_runs = np.genfromtxt( MAGIC_runs_and_dates, dtype=str, delimiter="," ) # READ LIST OF DATES AND RUNS: format table where each line is like "2020_11_19,5093174" - + noise_value = [0, 0, 0] if not NSB_match: nsb = config["general"]["NSB_MC"] @@ -583,9 +612,9 @@ def main(): # TODO: fix here above print("*** Converting DL0 into DL1 data ***") - print(f'Process name: {source_name}') + print(f"Process name: {source_name}") print( - f'To check the jobs submitted to the cluster, type: squeue -n {source_name}' + f"To check the jobs submitted to the cluster, type: squeue -n {source_name}" ) print("This process will take about 10 min to run if the IT cluster is free.") @@ -667,7 +696,12 @@ def main(): or (NSB_match) ): lists_and_bash_gen_MAGIC( - target_dir, telescope_ids, MAGIC_runs, source_name, env_name, NSB_match, + target_dir, + telescope_ids, + MAGIC_runs, + source_name, + env_name, + NSB_match, ) # MAGIC real data if (telescope_ids[-2] > 0) or (telescope_ids[-1] > 0): list_of_MAGIC_runs = glob.glob(f"{source_name}_MAGIC-*.sh") diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py index 3abdc6b8..4e8f59f2 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py @@ -21,8 +21,8 @@ import logging import os from pathlib import Path -import joblib +import joblib import numpy as np import yaml @@ -46,11 +46,15 @@ def configfile_stereo(ids, target_dir, source_name, NSB_match): List of telescope IDs target_dir : str Path to the working directory + source_name : str + Name of the target source + NSB_match : bool + If real data are matched to pre-processed MCs or not """ if not NSB_match: - file_name=f"{target_dir}/{source_name}/config_stereo.yaml" - else: - file_name=f"{target_dir}/v{__version__}/{source_name}/config_stereo.yaml" + file_name = f"{target_dir}/{source_name}/config_stereo.yaml" + else: + file_name = f"{target_dir}/v{__version__}/{source_name}/config_stereo.yaml" with open(file_name, "w") as f: lines = [ f"mc_tel_ids:\n LST-1: {ids[0]}\n LST-2: {ids[1]}\n LST-3: {ids[2]}\n LST-4: {ids[3]}\n MAGIC-I: {ids[4]}\n MAGIC-II: {ids[5]}\n\n", @@ -69,8 +73,6 @@ def bash_stereo(target_dir, source, env_name, NSB_match): ---------- target_dir : str Path to the working directory - nsb : float - NSB level of the LST run(s) source : str Target name env_name : str @@ -81,7 +83,9 @@ def bash_stereo(target_dir, source, env_name, NSB_match): process_name = source if not NSB_match: - if not os.path.exists(f"{target_dir}/{source}/DL1/Observations/Coincident_stereo"): + if not os.path.exists( + f"{target_dir}/{source}/DL1/Observations/Coincident_stereo" + ): os.mkdir(f"{target_dir}/{source}/DL1/Observations/Coincident_stereo") listOfNightsLST = np.sort( @@ -119,12 +123,11 @@ def bash_stereo(target_dir, source, env_name, NSB_match): ] f.writelines(lines) else: - if not os.path.exists(f"{target_dir}/v{__version__}/{source}/DL1CoincidentStereo"): + if not os.path.exists( + f"{target_dir}/v{__version__}/{source}/DL1CoincidentStereo" + ): os.mkdir(f"{target_dir}/v{__version__}/{source}/DL1CoincidentStereo") - - - listOfNightsLST = np.sort( glob.glob(f"{target_dir}/v{__version__}/{source}/DL1Coincident/*") ) @@ -142,12 +145,7 @@ def bash_stereo(target_dir, source, env_name, NSB_match): f"ls {nightLST}/*LST*.h5 > {stereoDir}/logs/list_coin.txt" ) # generating a list with the DL1 coincident data files. process_size = ( - len( - np.genfromtxt( - f"{stereoDir}/logs/list_coin.txt", dtype="str" - ) - ) - - 1 + len(np.genfromtxt(f"{stereoDir}/logs/list_coin.txt", dtype="str")) - 1 ) if process_size < 0: continue @@ -162,14 +160,12 @@ def bash_stereo(target_dir, source, env_name, NSB_match): "ulimit -a\n\n", f"export INPUTDIR={nightLST}\n", f"export OUTPUTDIR={stereoDir}\n", - f"SAMPLE_LIST=($(<$OUTPUTDIR/logs/list_coin.txt))\n", + "SAMPLE_LIST=($(<$OUTPUTDIR/logs/list_coin.txt))\n", "SAMPLE=${SAMPLE_LIST[${SLURM_ARRAY_TASK_ID}]}\n", "export LOG=$OUTPUTDIR/logs/stereo_${SLURM_ARRAY_TASK_ID}.log\n", f"time conda run -n {env_name} lst1_magic_stereo_reco --input-file $SAMPLE --output-dir $OUTPUTDIR --config-file {target_dir}/v{__version__}/{source}/config_stereo.yaml >$LOG 2>&1", ] - with open( - f"{source}_StereoEvents_{nightLST.split('/')[-1]}.sh", "w" - ) as f: + with open(f"{source}_StereoEvents_{nightLST.split('/')[-1]}.sh", "w") as f: f.writelines(lines) @@ -183,14 +179,18 @@ def bash_stereoMC(target_dir, identification, env_name, source): target_dir : str Path to the working directory identification : str - Particle name. Options: protons, gammadiffuse + Particle name. Options: protons, gammadiffuse, gammas, protons_test env_name : str Name of the environment + source : str + Name of the target source """ process_name = source - if not os.path.exists(f"{target_dir}/{source}/DL1/MC/{identification}/Merged/StereoMerged"): + if not os.path.exists( + f"{target_dir}/{source}/DL1/MC/{identification}/Merged/StereoMerged" + ): os.mkdir(f"{target_dir}/{source}/DL1/MC/{identification}/Merged/StereoMerged") inputdir = f"{target_dir}/{source}/DL1/MC/{identification}/Merged" @@ -256,7 +256,7 @@ def main(): target_dir = Path(config["directories"]["workspace_dir"]) env_name = config["general"]["env_name"] - + NSB_match = config["general"]["NSB_matching"] telescope_ids = list(config["mc_tel_ids"].values()) source = config["data_selection"]["source_name_output"] @@ -269,7 +269,6 @@ def main(): source_list.append(source) for source_name in source_list: - print("***** Generating file config_stereo.yaml...") configfile_stereo(telescope_ids, target_dir, source_name, NSB_match) @@ -297,7 +296,7 @@ def main(): # Below we run the analysis on the real data if not NSB_match: - + if ( (args.analysis_type == "onlyReal") or (args.analysis_type == "doEverything") @@ -321,16 +320,14 @@ def main(): os.system(launch_jobs) else: - - print("***** Generating the bashscript...") bash_stereo(target_dir, source_name, env_name, NSB_match) print("***** Submitting processess to the cluster...") - print(f'Process name: {source_name}_stereo') + print(f"Process name: {source_name}_stereo") print( - f'To check the jobs submitted to the cluster, type: squeue -n {source_name}_stereo' + f"To check the jobs submitted to the cluster, type: squeue -n {source_name}_stereo" ) # Below we run the bash scripts to find the stereo events From 0c45a2dc8127d05f612810311cf10235aaf6543c Mon Sep 17 00:00:00 2001 From: Elisa-Visentin Date: Fri, 3 May 2024 07:05:01 +0000 Subject: [PATCH 055/236] SLURM fixes --- .../semi_automatic_scripts/coincident_events.py | 6 +++--- .../database_production/nsb_level.py | 2 +- .../semi_automatic_scripts/merging_runs.py | 6 +++--- .../setting_up_config_and_dir.py | 14 +++++++------- .../semi_automatic_scripts/stereo_events.py | 8 ++++---- 5 files changed, 18 insertions(+), 18 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py index da2b1cfc..7f85bcfc 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py @@ -174,8 +174,8 @@ def linking_bash_lst( "#SBATCH -p short\n", f"#SBATCH -J {source_name}_coincidence\n", f"#SBATCH --array=0-{process_size}\n", - "#SBATCH --mem=30g\n", - "#SBATCH -N 1\n\n", + "#SBATCH --mem=8g\n", + "#SBATCH -n 1\n\n", "ulimit -l unlimited\n", "ulimit -s unlimited\n", "ulimit -a\n\n", @@ -234,7 +234,7 @@ def linking_bash_lst( "#SBATCH -p short\n", f"#SBATCH -J {process_name}_coincidence\n", f"#SBATCH --array=0-{process_size}%50\n", - "#SBATCH -N 1\n\n", + "#SBATCH -n 1\n\n", "ulimit -l unlimited\n", "ulimit -s unlimited\n", "ulimit -a\n\n", diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/nsb_level.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/nsb_level.py index 88c1ebcc..55391fad 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/nsb_level.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/nsb_level.py @@ -42,7 +42,7 @@ def bash_scripts(run, date, config, env_name): "#!/bin/sh\n\n", "#SBATCH -p short,long\n", "#SBATCH -J nsb\n", - "#SBATCH -N 1\n\n", + "#SBATCH -n 1\n\n", "ulimit -l unlimited\n", "ulimit -s unlimited\n", "ulimit -a\n\n", diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py index e83a7b71..7b6e2301 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py @@ -164,7 +164,7 @@ def merge(target_dir, identification, MAGIC_runs, env_name, source, NSB_match): "#!/bin/sh\n\n", "#SBATCH -p short\n", f"#SBATCH -J {process_name}\n", - "#SBATCH -N 1\n\n", + "#SBATCH -n 1\n\n", "ulimit -l unlimited\n", "ulimit -s unlimited\n", "ulimit -a\n\n", @@ -235,7 +235,7 @@ def merge(target_dir, identification, MAGIC_runs, env_name, source, NSB_match): "#!/bin/sh\n\n", "#SBATCH -p short\n", f"#SBATCH -J {process_name}\n", - "#SBATCH -N 1\n\n", + "#SBATCH -n 1\n\n", "ulimit -l unlimited\n", "ulimit -s unlimited\n", "ulimit -a\n\n", @@ -392,7 +392,7 @@ def mergeMC(target_dir, identification, env_name, cwd, source_name): f"#SBATCH -J {process_name}\n", f"#SBATCH --array=0-{process_size}%50\n", "#SBATCH --mem=7g\n", - "#SBATCH -N 1\n\n", + "#SBATCH -n 1\n\n", "ulimit -l unlimited\n", "ulimit -s unlimited\n", "ulimit -a\n\n", diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py index 9d82acb8..cd9f362c 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py @@ -167,7 +167,7 @@ def lists_and_bash_generator( "#!/bin/sh\n\n", "#SBATCH -p short\n", f"#SBATCH -J {process_name}\n\n", - "#SBATCH -N 1\n\n", + "#SBATCH -n 1\n\n", "ulimit -l unlimited\n", "ulimit -s unlimited\n", "ulimit -a\n\n", @@ -202,7 +202,7 @@ def lists_and_bash_generator( f"#SBATCH -J {process_name}\n", f"#SBATCH --array=0-{number_of_nodes}%50\n", "#SBATCH --mem=10g\n", - "#SBATCH -N 1\n\n", + "#SBATCH -n 1\n\n", "ulimit -l unlimited\n", "ulimit -s unlimited\n", "ulimit -a\n", @@ -250,7 +250,7 @@ def lists_and_bash_gen_MAGIC( "#!/bin/sh\n\n", "#SBATCH -p short\n", f"#SBATCH -J {process_name}\n", - "#SBATCH -N 1\n\n", + "#SBATCH -n 1\n\n", "ulimit -l unlimited\n", "ulimit -s unlimited\n", "ulimit -a\n", @@ -318,7 +318,7 @@ def lists_and_bash_gen_MAGIC( "#SBATCH -p short\n", f"#SBATCH -J {process_name}\n", f"#SBATCH --array=0-{number_of_nodes}\n", - "#SBATCH -N 1\n\n", + "#SBATCH -n 1\n\n", "ulimit -l unlimited\n", "ulimit -s unlimited\n", "ulimit -a\n\n", @@ -343,7 +343,7 @@ def lists_and_bash_gen_MAGIC( "#SBATCH -p short\n", f"#SBATCH -J {process_name}\n", f"#SBATCH --array=0-{number_of_nodes}\n", - "#SBATCH -N 1\n\n", + "#SBATCH -n 1\n\n", "ulimit -l unlimited\n", "ulimit -s unlimited\n", "ulimit -a\n\n", @@ -370,7 +370,7 @@ def lists_and_bash_gen_MAGIC( "#SBATCH -p long\n", f"#SBATCH -J {process_name}\n", f"#SBATCH --array=0-{number_of_nodes}\n", - "#SBATCH -N 1\n\n", + "#SBATCH -n 1\n\n", "ulimit -l unlimited\n", "ulimit -s unlimited\n", "ulimit -a\n\n", @@ -396,7 +396,7 @@ def lists_and_bash_gen_MAGIC( "#SBATCH -p long\n", f"#SBATCH -J {process_name}\n", f"#SBATCH --array=0-{number_of_nodes}\n", - "#SBATCH -N 1\n\n", + "#SBATCH -n 1\n\n", "ulimit -l unlimited\n", "ulimit -s unlimited\n", "ulimit -a\n\n", diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py index 4e8f59f2..6dc1cb09 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py @@ -110,7 +110,7 @@ def bash_stereo(target_dir, source, env_name, NSB_match): "#SBATCH -p short\n", f"#SBATCH -J {process_name}_stereo\n", f"#SBATCH --array=0-{process_size}%100\n", - "#SBATCH -N 1\n\n", + "#SBATCH -n 1\n\n", "ulimit -l unlimited\n", "ulimit -s unlimited\n", "ulimit -a\n\n", @@ -154,7 +154,7 @@ def bash_stereo(target_dir, source, env_name, NSB_match): "#SBATCH -p short\n", f"#SBATCH -J {process_name}_stereo\n", f"#SBATCH --array=0-{process_size}\n", - "#SBATCH -N 1\n\n", + "#SBATCH -n 1\n\n", "ulimit -l unlimited\n", "ulimit -s unlimited\n", "ulimit -a\n\n", @@ -206,8 +206,8 @@ def bash_stereoMC(target_dir, identification, env_name, source): "#SBATCH -p xxl\n", f"#SBATCH -J {process_name}_stereo\n", f"#SBATCH --array=0-{process_size}%100\n", - "#SBATCH --mem=30g\n", - "#SBATCH -N 1\n\n", + "#SBATCH --mem=8g\n", + "#SBATCH -n 1\n\n", "ulimit -l unlimited\n", "ulimit -s unlimited\n", "ulimit -a\n\n", From 672fa0ce89b7e5c81389d5b6443778aa9212d4ea Mon Sep 17 00:00:00 2001 From: Elisa-Visentin Date: Fri, 3 May 2024 07:53:15 +0000 Subject: [PATCH 056/236] bug --- .../lst1_magic/semi_automatic_scripts/merging_runs.py | 4 ++-- .../semi_automatic_scripts/setting_up_config_and_dir.py | 8 +++++--- .../lst1_magic/semi_automatic_scripts/stereo_events.py | 2 +- 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py index 7b6e2301..642a4240 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py @@ -514,10 +514,10 @@ def main(): print("***** Running merge_hdf_files.py on the MAGIC data files...") # Below we run the bash scripts to merge the MAGIC files - list_of_merging_scripts = np.sort(glob.glob(f"{source}_Merge_MAGIC_*.sh")) + list_of_merging_scripts = np.sort(glob.glob(f"{source_name}_Merge_MAGIC_*.sh")) if len(list_of_merging_scripts) < 1: logger.warning("no bash scripts") - return + continue for n, run in enumerate(list_of_merging_scripts): if n == 0: launch_jobs = f"merging{n}=$(sbatch --parsable {run})" diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py index cd9f362c..9945fd7c 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py @@ -319,6 +319,7 @@ def lists_and_bash_gen_MAGIC( f"#SBATCH -J {process_name}\n", f"#SBATCH --array=0-{number_of_nodes}\n", "#SBATCH -n 1\n\n", + "#SBATCH --mem 2g\n\n", "ulimit -l unlimited\n", "ulimit -s unlimited\n", "ulimit -a\n\n", @@ -326,7 +327,7 @@ def lists_and_bash_gen_MAGIC( "SAMPLE_LIST=($(<$OUTPUTDIR/logs/list_dl0.txt))\n", "SAMPLE=${SAMPLE_LIST[${SLURM_ARRAY_TASK_ID}]}\n\n", "export LOG=$OUTPUTDIR/logs/real_0_1_task${SLURM_ARRAY_TASK_ID}.log\n", - f"time conda run -n {env_name} magic_calib_to_dl1 --input-file $SAMPLE --output-dir $OUTPUTDIR --config-file {target_dir}/v{__version__}/{source}/config_DL0_to_DL1.yaml >$LOG 2>&1\n", + f"conda run -n {env_name} magic_calib_to_dl1 --input-file $SAMPLE --output-dir $OUTPUTDIR --config-file {target_dir}/v{__version__}/{source}/config_DL0_to_DL1.yaml >$LOG 2>&1\n", ] with open(f"{source}_MAGIC-II_dl0_to_dl1_run_{i[1]}.sh", "w") as f: f.writelines(lines) @@ -344,6 +345,7 @@ def lists_and_bash_gen_MAGIC( f"#SBATCH -J {process_name}\n", f"#SBATCH --array=0-{number_of_nodes}\n", "#SBATCH -n 1\n\n", + "#SBATCH --mem 2g\n\n", "ulimit -l unlimited\n", "ulimit -s unlimited\n", "ulimit -a\n\n", @@ -351,7 +353,7 @@ def lists_and_bash_gen_MAGIC( "SAMPLE_LIST=($(<$OUTPUTDIR/logs/list_dl0.txt))\n", "SAMPLE=${SAMPLE_LIST[${SLURM_ARRAY_TASK_ID}]}\n\n", "export LOG=$OUTPUTDIR/logs/real_0_1_task${SLURM_ARRAY_TASK_ID}.log\n", - f"time conda run -n {env_name} magic_calib_to_dl1 --input-file $SAMPLE --output-dir $OUTPUTDIR --config-file {target_dir}/v{__version__}/{source}/config_DL0_to_DL1.yaml >$LOG 2>&1\n", + f"conda run -n {env_name} magic_calib_to_dl1 --input-file $SAMPLE --output-dir $OUTPUTDIR --config-file {target_dir}/v{__version__}/{source}/config_DL0_to_DL1.yaml >$LOG 2>&1\n", ] with open(f"{source}_MAGIC-I_dl0_to_dl1_run_{i[1]}.sh", "w") as f: f.writelines(lines) @@ -709,7 +711,7 @@ def main(): print( "Warning: no bash script has been produced. Please check the provided MAGIC_runs.txt and the MAGIC calibrated data" ) - return + continue for n, run in enumerate(list_of_MAGIC_runs): if n == 0: diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py index 6dc1cb09..4b3177e2 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py @@ -335,7 +335,7 @@ def main(): glob.glob(f"{source_name}_StereoEvents*.sh") ) if len(list_of_stereo_scripts) < 1: - return + continue for n, run in enumerate(list_of_stereo_scripts): if n == 0: launch_jobs = f"stereo{n}=$(sbatch --parsable {run})" From 8ec28e6b4c7a7f05dd64388ce56367a3561efea4 Mon Sep 17 00:00:00 2001 From: Elisa-Visentin Date: Fri, 3 May 2024 14:14:13 +0000 Subject: [PATCH 057/236] some fixes --- .../coincident_events.py | 24 +++---- .../database_production/nsb_level.py | 5 +- .../semi_automatic_scripts/merging_runs.py | 70 ++++++++++++++----- .../setting_up_config_and_dir.py | 24 +++++-- .../semi_automatic_scripts/stereo_events.py | 28 +++++--- 5 files changed, 104 insertions(+), 47 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py index 7f85bcfc..8def3618 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py @@ -160,12 +160,8 @@ def linking_bash_lst( if not os.path.exists(f"{outputdir}/logs/list_LST.txt"): continue - process_size = ( - len( - np.genfromtxt(f"{outputdir}/logs/list_LST.txt", dtype="str") - ) - - 1 - ) + with open(f"{outputdir}/logs/list_LST.txt", "r") as f: + process_size = len(f.readlines()) - 1 if process_size < 0: continue @@ -176,6 +172,8 @@ def linking_bash_lst( f"#SBATCH --array=0-{process_size}\n", "#SBATCH --mem=8g\n", "#SBATCH -n 1\n\n", + f"#SBATCH --output={outputdir}/logs/slurm-%x.%j.out" + f"#SBATCH --error={outputdir}/logs/slurm-%x.%j.err" "ulimit -l unlimited\n", "ulimit -s unlimited\n", "ulimit -a\n\n", @@ -183,8 +181,8 @@ def linking_bash_lst( f"export OUTPUTDIR={outputdir}\n", "SAMPLE_LIST=($(<$OUTPUTDIR/logs/list_LST.txt))\n", "SAMPLE=${SAMPLE_LIST[${SLURM_ARRAY_TASK_ID}]}\n", - "export LOG=$OUTPUTDIR/logs/coincidence_${SLURM_ARRAY_TASK_ID}.log\n", - f"time conda run -n {env_name} lst1_magic_event_coincidence --input-file-lst $SAMPLE --input-dir-magic $INM --output-dir $OUTPUTDIR --config-file {target_dir}/v{__version__}/{source_name}/config_coincidence.yaml >$LOG 2>&1", + "export LOG=$OUTPUTDIR/logs/coincidence_${SLURM_JOB_ID}_${SLURM_ARRAY_TASK_ID}.log\n", + f"conda run -n {env_name} lst1_magic_event_coincidence --input-file-lst $SAMPLE --input-dir-magic $INM --output-dir $OUTPUTDIR --config-file {target_dir}/v{__version__}/{source_name}/config_coincidence.yaml >$LOG 2>&1", ] with open( f"{source_name}_LST_coincident_{outputdir.split('/')[-1]}.sh", @@ -224,9 +222,8 @@ def linking_bash_lst( ) for nightMAGIC, nightLST in zip(listOfNightsMAGIC, listOfNightsLST): - process_size = ( - len(np.genfromtxt(f"{nightLST}/list_LST.txt", dtype="str")) - 1 - ) + with open(f"{nightLST}/list_LST.txt", "r") as f: + process_size = len(f.readlines()) - 1 with open(f"LST_coincident_{nightLST.split('/')[-1]}.sh", "w") as f: lines = [ @@ -234,7 +231,10 @@ def linking_bash_lst( "#SBATCH -p short\n", f"#SBATCH -J {process_name}_coincidence\n", f"#SBATCH --array=0-{process_size}%50\n", + "#SBATCH --mem=8g\n", "#SBATCH -n 1\n\n", + f"#SBATCH --output={nightLST}/slurm-%x.%j.out" + f"#SBATCH --error={nightLST}/slurm-%x.%j.err" "ulimit -l unlimited\n", "ulimit -s unlimited\n", "ulimit -a\n\n", @@ -242,7 +242,7 @@ def linking_bash_lst( f"export OUTPUTDIR={nightLST}\n", "SAMPLE_LIST=($(<$OUTPUTDIR/list_LST.txt))\n", "SAMPLE=${SAMPLE_LIST[${SLURM_ARRAY_TASK_ID}]}\n", - "export LOG=$OUTPUTDIR/coincidence_${SLURM_ARRAY_TASK_ID}.log\n", + "export LOG=$OUTPUTDIR/coincidence_${SLURM_JOB_ID}_${SLURM_ARRAY_TASK_ID}.log\n", f"conda run -n {env_name} lst1_magic_event_coincidence --input-file-lst $SAMPLE --input-dir-magic $INM --output-dir $OUTPUTDIR --config-file {target_dir}/{source_name}/config_coincidence.yaml >$LOG 2>&1", ] f.writelines(lines) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/nsb_level.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/nsb_level.py index 55391fad..7ae1e7cc 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/nsb_level.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/nsb_level.py @@ -43,10 +43,13 @@ def bash_scripts(run, date, config, env_name): "#SBATCH -p short,long\n", "#SBATCH -J nsb\n", "#SBATCH -n 1\n\n", + "#SBATCH --output=slurm-nsb-%x.%j.out" + "#SBATCH --error=slurm-nsb-%x.%j.err" "ulimit -l unlimited\n", "ulimit -s unlimited\n", "ulimit -a\n\n", - f"time conda run -n {env_name} LSTnsb -c {config} -i {run} -d {date} > nsblog_{date}_{run}.log 2>&1 \n\n", + f"conda run -n {env_name} LSTnsb -c {config} -i {run} -d {date} > nsblog_{date}_{run}_" + + "${SLURM_JOB_ID}.log 2>&1 \n\n", ] with open(f"nsb_{date}_run_{run}.sh", "w") as f: f.writelines(lines) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py index 642a4240..5050d403 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py @@ -165,6 +165,8 @@ def merge(target_dir, identification, MAGIC_runs, env_name, source, NSB_match): "#SBATCH -p short\n", f"#SBATCH -J {process_name}\n", "#SBATCH -n 1\n\n", + f"#SBATCH --output={MAGIC_DL1_dir}/Merged/slurm-%x.%j.out" + f"#SBATCH --error={MAGIC_DL1_dir}/Merged/slurm-%x.%j.err" "ulimit -l unlimited\n", "ulimit -s unlimited\n", "ulimit -a\n\n", @@ -182,8 +184,12 @@ def merge(target_dir, identification, MAGIC_runs, env_name, source, NSB_match): os.mkdir( f"{MAGIC_DL1_dir}/Merged/{i[0]}/{i[1]}" ) # Creating a merged directory for the respective run + os.system( + f'find {MAGIC_DL1_dir}/M1/{i[0]}/{i[1]} -type f -name "*.h5" -size -3k -delete' + ) f.write( - f"conda run -n {env_name} merge_hdf_files --input-dir {MAGIC_DL1_dir}/M1/{i[0]}/{i[1]} --output-dir {MAGIC_DL1_dir}/Merged/{i[0]}/{i[1]} \n" + f"conda run -n {env_name} merge_hdf_files --input-dir {MAGIC_DL1_dir}/M1/{i[0]}/{i[1]} --output-dir {MAGIC_DL1_dir}/Merged/{i[0]}/{i[1]} >{MAGIC_DL1_dir}/Merged/{i[0]}/{i[1]}/merge_M1_{i[0]}_{i[1]}_" + + "${SLURM_JOB_ID}.log\n" ) if os.path.exists(f"{MAGIC_DL1_dir}/M2"): @@ -196,8 +202,12 @@ def merge(target_dir, identification, MAGIC_runs, env_name, source, NSB_match): os.mkdir( f"{MAGIC_DL1_dir}/Merged/{i[0]}/{i[1]}" ) # Creating a merged directory for the respective run + os.system( + f'find {MAGIC_DL1_dir}/M2/{i[0]}/{i[1]} -type f -name "*.h5" -size -3k -delete' + ) f.write( - f"conda run -n {env_name} merge_hdf_files --input-dir {MAGIC_DL1_dir}/M2/{i[0]}/{i[1]} --output-dir {MAGIC_DL1_dir}/Merged/{i[0]}/{i[1]} \n" + f"conda run -n {env_name} merge_hdf_files --input-dir {MAGIC_DL1_dir}/M2/{i[0]}/{i[1]} --output-dir {MAGIC_DL1_dir}/Merged/{i[0]}/{i[1]} >{MAGIC_DL1_dir}/Merged/{i[0]}/{i[1]}/merge_M2_{i[0]}_{i[1]}_" + + "${SLURM_JOB_ID}.log\n" ) elif identification == "1_M1M2": @@ -208,7 +218,8 @@ def merge(target_dir, identification, MAGIC_runs, env_name, source, NSB_match): if not os.path.exists(f"{MAGIC_DL1_dir}/Merged/{i[0]}/Merged"): os.mkdir(f"{MAGIC_DL1_dir}/Merged/{i[0]}/Merged") f.write( - f"conda run -n {env_name} merge_hdf_files --input-dir {MAGIC_DL1_dir}/Merged/{i[0]}/{i[1]} --output-dir {MAGIC_DL1_dir}/Merged/{i[0]}/Merged --run-wise \n" + f"conda run -n {env_name} merge_hdf_files --input-dir {MAGIC_DL1_dir}/Merged/{i[0]}/{i[1]} --output-dir {MAGIC_DL1_dir}/Merged/{i[0]}/Merged --run-wise >{MAGIC_DL1_dir}/Merged/{i[0]}/Merged/merge_{i[0]}_{[1]}_" + + "${SLURM_JOB_ID}.log\n" ) else: for i in MAGIC_runs: @@ -217,7 +228,8 @@ def merge(target_dir, identification, MAGIC_runs, env_name, source, NSB_match): f"{MAGIC_DL1_dir}/Merged/Merged_{i[0]}" ) # Creating a merged directory for each night f.write( - f"conda run -n {env_name} merge_hdf_files --input-dir {MAGIC_DL1_dir}/Merged/{i[0]}/Merged --output-dir {MAGIC_DL1_dir}/Merged/Merged_{i[0]} \n" + f"conda run -n {env_name} merge_hdf_files --input-dir {MAGIC_DL1_dir}/Merged/{i[0]}/Merged --output-dir {MAGIC_DL1_dir}/Merged/Merged_{i[0]} >{MAGIC_DL1_dir}/Merged/Merged_{i[0]}/merge_night_{i[0]}_" + + "${SLURM_JOB_ID}.log \n" ) else: @@ -236,6 +248,9 @@ def merge(target_dir, identification, MAGIC_runs, env_name, source, NSB_match): "#SBATCH -p short\n", f"#SBATCH -J {process_name}\n", "#SBATCH -n 1\n\n", + f"#SBATCH --output={MAGIC_DL1_dir}/Merged/slurm-%x.%j.out" + f"#SBATCH --error={MAGIC_DL1_dir}/Merged/slurm-%x.%j.err" + "#SBATCH --mem 2g\n\n", "ulimit -l unlimited\n", "ulimit -s unlimited\n", "ulimit -a\n\n", @@ -268,9 +283,12 @@ def merge(target_dir, identification, MAGIC_runs, env_name, source, NSB_match): os.mkdir( f"{MAGIC_DL1_dir}/Merged/{i}/{r}/logs" ) # Creating a merged directory for the respective run - + os.system( + f'find {MAGIC_DL1_dir}/M1/{i}/{r} -type f -name "*.h5" -size -3k -delete' + ) f.write( - f"time conda run -n {env_name} merge_hdf_files --input-dir {MAGIC_DL1_dir}/M1/{i}/{r} --output-dir {MAGIC_DL1_dir}/Merged/{i}/{r} >{MAGIC_DL1_dir}/Merged/{i}/{r}/logs/merge_M1_{i}_{r}.log \n" + f"conda run -n {env_name} merge_hdf_files --input-dir {MAGIC_DL1_dir}/M1/{i}/{r} --output-dir {MAGIC_DL1_dir}/Merged/{i}/{r} >{MAGIC_DL1_dir}/Merged/{i}/{r}/logs/merge_M1_{i}_{r}_" + + "${SLURM_JOB_ID}.log \n" ) if os.path.exists(f"{MAGIC_DL1_dir}/M2"): @@ -298,9 +316,12 @@ def merge(target_dir, identification, MAGIC_runs, env_name, source, NSB_match): os.mkdir( f"{MAGIC_DL1_dir}/Merged/{i}/{r}/logs" ) # Creating a merged directory for the respective run - + os.system( + f'find {MAGIC_DL1_dir}/M2/{i}/{r} -type f -name "*.h5" -size -3k -delete' + ) f.write( - f"time conda run -n {env_name} merge_hdf_files --input-dir {MAGIC_DL1_dir}/M2/{i}/{r} --output-dir {MAGIC_DL1_dir}/Merged/{i}/{r} >{MAGIC_DL1_dir}/Merged/{i}/{r}/logs/merge_M2_{i}_{r}.log \n" + f"conda run -n {env_name} merge_hdf_files --input-dir {MAGIC_DL1_dir}/M2/{i}/{r} --output-dir {MAGIC_DL1_dir}/Merged/{i}/{r} >{MAGIC_DL1_dir}/Merged/{i}/{r}/logs/merge_M2_{i}_{r}_" + + "${SLURM_JOB_ID}.log \n" ) elif identification == "1_M1M2": if os.path.exists(f"{MAGIC_DL1_dir}/M1") & os.path.exists( @@ -316,8 +337,8 @@ def merge(target_dir, identification, MAGIC_runs, env_name, source, NSB_match): ] for r in runs: if (len(glob.glob(f"{MAGIC_DL1_dir}/M1/{i}/{r}")) > 0) and ( - len(glob.glob(f"{MAGIC_DL1_dir}/M2/{i}/{r}")) - ) > 0: + len(glob.glob(f"{MAGIC_DL1_dir}/M2/{i}/{r}")) > 0 + ): if not os.path.exists( f"{MAGIC_DL1_dir}/Merged/{i}/Merged" ): @@ -327,7 +348,8 @@ def merge(target_dir, identification, MAGIC_runs, env_name, source, NSB_match): ): os.mkdir(f"{MAGIC_DL1_dir}/Merged/{i}/Merged/logs") f.write( - f"time conda run -n {env_name} merge_hdf_files --input-dir {MAGIC_DL1_dir}/Merged/{i}/{r} --output-dir {MAGIC_DL1_dir}/Merged/{i}/Merged --run-wise >{MAGIC_DL1_dir}/Merged/{i}/Merged/logs/merge_{i}.log \n" + f"conda run -n {env_name} merge_hdf_files --input-dir {MAGIC_DL1_dir}/Merged/{i}/{r} --output-dir {MAGIC_DL1_dir}/Merged/{i}/Merged --run-wise >{MAGIC_DL1_dir}/Merged/{i}/Merged/logs/merge_{i}_{r}_" + + "${SLURM_JOB_ID}.log \n" ) else: dates = [ @@ -340,11 +362,19 @@ def merge(target_dir, identification, MAGIC_runs, env_name, source, NSB_match): os.mkdir( f"{MAGIC_DL1_dir}/Merged/Merged_{i}" ) # Creating a merged directory for each night - if not os.path.exists(f"{MAGIC_DL1_dir}/Merged/Merged_{i}/logs"): - os.mkdir(f"{MAGIC_DL1_dir}/Merged/Merged_{i}/logs") - f.write( - f"time conda run -n {env_name} merge_hdf_files --input-dir {MAGIC_DL1_dir}/Merged/{i}/Merged --output-dir {MAGIC_DL1_dir}/Merged/Merged_{i} >{MAGIC_DL1_dir}/Merged/Merged_{i}/logs/merge_night_{i}.log \n" - ) + if ( + len(glob.glob(f"{MAGIC_DL1_dir}/Merged/{i}/Merged/*M1*.h5")) > 0 + ) and ( + len(glob.glob(f"{MAGIC_DL1_dir}/Merged/{i}/Merged/*M2*.h5")) > 0 + ): + if not os.path.exists( + f"{MAGIC_DL1_dir}/Merged/Merged_{i}/logs" + ): + os.mkdir(f"{MAGIC_DL1_dir}/Merged/Merged_{i}/logs") + f.write( + f"conda run -n {env_name} merge_hdf_files --input-dir {MAGIC_DL1_dir}/Merged/{i}/Merged --output-dir {MAGIC_DL1_dir}/Merged/Merged_{i} >{MAGIC_DL1_dir}/Merged/Merged_{i}/logs/merge_night_{i}_" + + "${SLURM_JOB_ID}.log \n" + ) def mergeMC(target_dir, identification, env_name, cwd, source_name): @@ -393,13 +423,15 @@ def mergeMC(target_dir, identification, env_name, cwd, source_name): f"#SBATCH --array=0-{process_size}%50\n", "#SBATCH --mem=7g\n", "#SBATCH -n 1\n\n", + f"#SBATCH --output={MC_DL1_dir}/{identification}/Merged/slurm-%x.%j.out" + f"#SBATCH --error={MC_DL1_dir}/{identification}/Merged/slurm-%x.%j.err" "ulimit -l unlimited\n", "ulimit -s unlimited\n", "ulimit -a\n\n", f"SAMPLE_LIST=($(<{MC_DL1_dir}/{identification}/list_of_nodes.txt))\n", "SAMPLE=${SAMPLE_LIST[${SLURM_ARRAY_TASK_ID}]}\n", f"export LOG={MC_DL1_dir}/{identification}/Merged" - + "/merged_${SLURM_ARRAY_TASK_ID}.log\n", + + "/merged_${SLURM_JOB_ID}_${SLURM_ARRAY_TASK_ID}.log\n", f"conda run -n {env_name} merge_hdf_files --input-dir $SAMPLE --output-dir {MC_DL1_dir}/{identification}/Merged >$LOG 2>&1\n", ] f.writelines(lines_bash_file) @@ -514,7 +546,9 @@ def main(): print("***** Running merge_hdf_files.py on the MAGIC data files...") # Below we run the bash scripts to merge the MAGIC files - list_of_merging_scripts = np.sort(glob.glob(f"{source_name}_Merge_MAGIC_*.sh")) + list_of_merging_scripts = np.sort( + glob.glob(f"{source_name}_Merge_MAGIC_*.sh") + ) if len(list_of_merging_scripts) < 1: logger.warning("no bash scripts") continue diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py index 9945fd7c..4f2969e5 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py @@ -168,6 +168,8 @@ def lists_and_bash_generator( "#SBATCH -p short\n", f"#SBATCH -J {process_name}\n\n", "#SBATCH -n 1\n\n", + f"#SBATCH --output={target_dir}/{source_name}/DL1/MC/{particle_type}/slurm-linkMC-%x.%j.out" + f"#SBATCH --error={target_dir}/{source_name}/DL1/MC/{particle_type}/slurm-linkMC-%x.%j.err" "ulimit -l unlimited\n", "ulimit -s unlimited\n", "ulimit -a\n\n", @@ -202,6 +204,8 @@ def lists_and_bash_generator( f"#SBATCH -J {process_name}\n", f"#SBATCH --array=0-{number_of_nodes}%50\n", "#SBATCH --mem=10g\n", + f"#SBATCH --output={target_dir}/{source_name}/DL1/MC/{particle_type}/slurm-%x.%j.out" + f"#SBATCH --error={target_dir}/{source_name}/DL1/MC/{particle_type}/slurm-%x.%j.err" "#SBATCH -n 1\n\n", "ulimit -l unlimited\n", "ulimit -s unlimited\n", @@ -212,7 +216,7 @@ def lists_and_bash_generator( "SAMPLE=${SAMPLE_LIST[${SLURM_ARRAY_TASK_ID}]}\n", "cd $SAMPLE\n\n", f"export LOG={target_dir}/{source_name}/DL1/MC/{particle_type}" - + "/simtel_{$SAMPLE}_all.log\n", + + "/simtel_{$SAMPLE}_${SLURM_JOB_ID}_${SLURM_ARRAY_TASK_ID}_all.log\n", "cat list_dl0_ok.txt | while read line\n", "do\n", f" cd {target_dir}/{source_name}/../\n", @@ -251,6 +255,8 @@ def lists_and_bash_gen_MAGIC( "#SBATCH -p short\n", f"#SBATCH -J {process_name}\n", "#SBATCH -n 1\n\n", + f"#SBATCH --output={target_dir}/v{__version__}/{source}/DL1/slurm-linkMAGIC-%x.%j.out" + f"#SBATCH --error={target_dir}/v{__version__}/{source}/DL1/slurm-linkMAGIC-%x.%j.err" "ulimit -l unlimited\n", "ulimit -s unlimited\n", "ulimit -a\n", @@ -319,6 +325,8 @@ def lists_and_bash_gen_MAGIC( f"#SBATCH -J {process_name}\n", f"#SBATCH --array=0-{number_of_nodes}\n", "#SBATCH -n 1\n\n", + f"#SBATCH --output={target_dir}/v{__version__}/{source}/DL1/M2/{i[0]}/{i[1]}/logs/slurm-%x.%j.out" + f"#SBATCH --error={target_dir}/v{__version__}/{source}/DL1/M2/{i[0]}/{i[1]}/logs/slurm-%x.%j.err" "#SBATCH --mem 2g\n\n", "ulimit -l unlimited\n", "ulimit -s unlimited\n", @@ -326,7 +334,7 @@ def lists_and_bash_gen_MAGIC( f"export OUTPUTDIR={target_dir}/v{__version__}/{source}/DL1/M2/{i[0]}/{i[1]}\n", "SAMPLE_LIST=($(<$OUTPUTDIR/logs/list_dl0.txt))\n", "SAMPLE=${SAMPLE_LIST[${SLURM_ARRAY_TASK_ID}]}\n\n", - "export LOG=$OUTPUTDIR/logs/real_0_1_task${SLURM_ARRAY_TASK_ID}.log\n", + "export LOG=$OUTPUTDIR/logs/real_0_1_task_${SLURM_JOB_ID}_${SLURM_ARRAY_TASK_ID}.log\n", f"conda run -n {env_name} magic_calib_to_dl1 --input-file $SAMPLE --output-dir $OUTPUTDIR --config-file {target_dir}/v{__version__}/{source}/config_DL0_to_DL1.yaml >$LOG 2>&1\n", ] with open(f"{source}_MAGIC-II_dl0_to_dl1_run_{i[1]}.sh", "w") as f: @@ -345,6 +353,8 @@ def lists_and_bash_gen_MAGIC( f"#SBATCH -J {process_name}\n", f"#SBATCH --array=0-{number_of_nodes}\n", "#SBATCH -n 1\n\n", + f"#SBATCH --output={target_dir}/v{__version__}/{source}/DL1/M1/{i[0]}/{i[1]}/logs/slurm-%x.%j.out" + f"#SBATCH --error={target_dir}/v{__version__}/{source}/DL1/M1/{i[0]}/{i[1]}/logs/slurm-%x.%j.err" "#SBATCH --mem 2g\n\n", "ulimit -l unlimited\n", "ulimit -s unlimited\n", @@ -352,7 +362,7 @@ def lists_and_bash_gen_MAGIC( f"export OUTPUTDIR={target_dir}/v{__version__}/{source}/DL1/M1/{i[0]}/{i[1]}\n", "SAMPLE_LIST=($(<$OUTPUTDIR/logs/list_dl0.txt))\n", "SAMPLE=${SAMPLE_LIST[${SLURM_ARRAY_TASK_ID}]}\n\n", - "export LOG=$OUTPUTDIR/logs/real_0_1_task${SLURM_ARRAY_TASK_ID}.log\n", + "export LOG=$OUTPUTDIR/logs/real_0_1_task_${SLURM_JOB_ID}_${SLURM_ARRAY_TASK_ID}.log\n", f"conda run -n {env_name} magic_calib_to_dl1 --input-file $SAMPLE --output-dir $OUTPUTDIR --config-file {target_dir}/v{__version__}/{source}/config_DL0_to_DL1.yaml >$LOG 2>&1\n", ] with open(f"{source}_MAGIC-I_dl0_to_dl1_run_{i[1]}.sh", "w") as f: @@ -373,6 +383,8 @@ def lists_and_bash_gen_MAGIC( f"#SBATCH -J {process_name}\n", f"#SBATCH --array=0-{number_of_nodes}\n", "#SBATCH -n 1\n\n", + f"#SBATCH --output={target_dir}/{source}/DL1/Observations/M2/{i[0]}/{i[1]}/slurm-%x.%j.out" + f"#SBATCH --error={target_dir}/{source}/DL1/Observations/M2/{i[0]}/{i[1]}/slurm-%x.%j.err" "ulimit -l unlimited\n", "ulimit -s unlimited\n", "ulimit -a\n\n", @@ -380,7 +392,7 @@ def lists_and_bash_gen_MAGIC( f"cd {target_dir}/{source}/../\n", "SAMPLE_LIST=($(<$OUTPUTDIR/list_dl0.txt))\n", "SAMPLE=${SAMPLE_LIST[${SLURM_ARRAY_TASK_ID}]}\n\n", - "export LOG=$OUTPUTDIR/real_0_1_task${SLURM_ARRAY_TASK_ID}.log\n", + "export LOG=$OUTPUTDIR/real_0_1_task_${SLURM_JOB_ID}_${SLURM_ARRAY_TASK_ID}.log\n", f"conda run -n {env_name} magic_calib_to_dl1 --input-file $SAMPLE --output-dir $OUTPUTDIR --config-file {target_dir}/{source}/config_DL0_to_DL1.yaml >$LOG 2>&1\n", "", ] @@ -399,6 +411,8 @@ def lists_and_bash_gen_MAGIC( f"#SBATCH -J {process_name}\n", f"#SBATCH --array=0-{number_of_nodes}\n", "#SBATCH -n 1\n\n", + f"#SBATCH --output={target_dir}/{source}/DL1/Observations/M1/{i[0]}/{i[1]}/slurm-%x.%j.out" + f"#SBATCH --error={target_dir}/{source}/DL1/Observations/M1/{i[0]}/{i[1]}/slurm-%x.%j.err" "ulimit -l unlimited\n", "ulimit -s unlimited\n", "ulimit -a\n\n", @@ -406,7 +420,7 @@ def lists_and_bash_gen_MAGIC( f"cd {target_dir}/{source}/../\n", "SAMPLE_LIST=($(<$OUTPUTDIR/list_dl0.txt))\n", "SAMPLE=${SAMPLE_LIST[${SLURM_ARRAY_TASK_ID}]}\n\n", - "export LOG=$OUTPUTDIR/real_0_1_task${SLURM_ARRAY_TASK_ID}.log\n", + "export LOG=$OUTPUTDIR/real_0_1_task_${SLURM_JOB_ID}_${SLURM_ARRAY_TASK_ID}.log\n", f"conda run -n {env_name} magic_calib_to_dl1 --input-file $SAMPLE --output-dir $OUTPUTDIR --config-file {target_dir}/{source}/config_DL0_to_DL1.yaml >$LOG 2>&1\n", "", ] diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py index 4b3177e2..6bd82f32 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py @@ -100,9 +100,8 @@ def bash_stereo(target_dir, source, env_name, NSB_match): os.system( f"ls {nightLST}/*LST*.h5 > {nightLST}/list_coin.txt" ) # generating a list with the DL1 coincident data files. - process_size = ( - len(np.genfromtxt(f"{nightLST}/list_coin.txt", dtype="str")) - 1 - ) + with open(f"{nightLST}/list_coin.txt", "r") as f: + process_size = len(f.readlines()) - 1 with open(f"StereoEvents_real_{nightLST.split('/')[-1]}.sh", "w") as f: lines = [ @@ -111,6 +110,8 @@ def bash_stereo(target_dir, source, env_name, NSB_match): f"#SBATCH -J {process_name}_stereo\n", f"#SBATCH --array=0-{process_size}%100\n", "#SBATCH -n 1\n\n", + f"#SBATCH --output={stereoDir}/logs/slurm-%x.%j.out" + f"#SBATCH --error={stereoDir}/logs/slurm-%x.%j.err" "ulimit -l unlimited\n", "ulimit -s unlimited\n", "ulimit -a\n\n", @@ -118,7 +119,7 @@ def bash_stereo(target_dir, source, env_name, NSB_match): f"export OUTPUTDIR={stereoDir}\n", "SAMPLE_LIST=($(<$INPUTDIR/list_coin.txt))\n", "SAMPLE=${SAMPLE_LIST[${SLURM_ARRAY_TASK_ID}]}\n", - "export LOG=$OUTPUTDIR/stereo_${SLURM_ARRAY_TASK_ID}.log\n", + "export LOG=$OUTPUTDIR/stereo_${SLURM_JOB_ID}_${SLURM_ARRAY_TASK_ID}.log\n", f"conda run -n {env_name} lst1_magic_stereo_reco --input-file $SAMPLE --output-dir $OUTPUTDIR --config-file {target_dir}/{source}/config_stereo.yaml >$LOG 2>&1", ] f.writelines(lines) @@ -144,9 +145,9 @@ def bash_stereo(target_dir, source, env_name, NSB_match): os.system( f"ls {nightLST}/*LST*.h5 > {stereoDir}/logs/list_coin.txt" ) # generating a list with the DL1 coincident data files. - process_size = ( - len(np.genfromtxt(f"{stereoDir}/logs/list_coin.txt", dtype="str")) - 1 - ) + with open(f"{stereoDir}/logs/list_coin.txt", "r") as f: + process_size = len(f.readlines()) - 1 + if process_size < 0: continue lines = [ @@ -155,6 +156,8 @@ def bash_stereo(target_dir, source, env_name, NSB_match): f"#SBATCH -J {process_name}_stereo\n", f"#SBATCH --array=0-{process_size}\n", "#SBATCH -n 1\n\n", + f"#SBATCH --output={stereoDir}/logs/slurm-%x.%j.out" + f"#SBATCH --error={stereoDir}/logs/slurm-%x.%j.err" "ulimit -l unlimited\n", "ulimit -s unlimited\n", "ulimit -a\n\n", @@ -162,8 +165,8 @@ def bash_stereo(target_dir, source, env_name, NSB_match): f"export OUTPUTDIR={stereoDir}\n", "SAMPLE_LIST=($(<$OUTPUTDIR/logs/list_coin.txt))\n", "SAMPLE=${SAMPLE_LIST[${SLURM_ARRAY_TASK_ID}]}\n", - "export LOG=$OUTPUTDIR/logs/stereo_${SLURM_ARRAY_TASK_ID}.log\n", - f"time conda run -n {env_name} lst1_magic_stereo_reco --input-file $SAMPLE --output-dir $OUTPUTDIR --config-file {target_dir}/v{__version__}/{source}/config_stereo.yaml >$LOG 2>&1", + "export LOG=$OUTPUTDIR/logs/stereo_${SLURM_JOB_ID}_${SLURM_ARRAY_TASK_ID}.log\n", + f"conda run -n {env_name} lst1_magic_stereo_reco --input-file $SAMPLE --output-dir $OUTPUTDIR --config-file {target_dir}/v{__version__}/{source}/config_stereo.yaml >$LOG 2>&1", ] with open(f"{source}_StereoEvents_{nightLST.split('/')[-1]}.sh", "w") as f: f.writelines(lines) @@ -198,7 +201,8 @@ def bash_stereoMC(target_dir, identification, env_name, source): os.system( f"ls {inputdir}/dl1*.h5 > {inputdir}/list_coin.txt" ) # generating a list with the DL1 coincident data files. - process_size = len(np.genfromtxt(f"{inputdir}/list_coin.txt", dtype="str")) - 1 + with open(f"{inputdir}/list_coin.txt", "r") as f: + process_size = len(f.readlines()) - 1 with open(f"StereoEvents_MC_{identification}.sh", "w") as f: lines = [ @@ -208,6 +212,8 @@ def bash_stereoMC(target_dir, identification, env_name, source): f"#SBATCH --array=0-{process_size}%100\n", "#SBATCH --mem=8g\n", "#SBATCH -n 1\n\n", + f"#SBATCH --output={inputdir}/StereoMerged/logs/slurm-%x.%j.out" + f"#SBATCH --error={inputdir}/StereoMerged/logs/slurm-%x.%j.err" "ulimit -l unlimited\n", "ulimit -s unlimited\n", "ulimit -a\n\n", @@ -215,7 +221,7 @@ def bash_stereoMC(target_dir, identification, env_name, source): f"export OUTPUTDIR={inputdir}/StereoMerged\n", "SAMPLE_LIST=($(<$INPUTDIR/list_coin.txt))\n", "SAMPLE=${SAMPLE_LIST[${SLURM_ARRAY_TASK_ID}]}\n", - "export LOG=$OUTPUTDIR/stereo_${SLURM_ARRAY_TASK_ID}.log\n", + "export LOG=$OUTPUTDIR/stereo_${SLURM_JOB_ID}_${SLURM_ARRAY_TASK_ID}.log\n", f"conda run -n {env_name} lst1_magic_stereo_reco --input-file $SAMPLE --output-dir $OUTPUTDIR --config-file {target_dir}/{source}/config_stereo.yaml >$LOG 2>&1", ] f.writelines(lines) From 818551d41a0c4d7b039f1e88395f8e9d6f52d569 Mon Sep 17 00:00:00 2001 From: Elisa-Visentin Date: Fri, 3 May 2024 15:53:44 +0000 Subject: [PATCH 058/236] bug in slurm --- .../coincident_events.py | 12 ++++---- .../semi_automatic_scripts/merging_runs.py | 6 ++-- .../setting_up_config_and_dir.py | 30 +++++++++---------- .../semi_automatic_scripts/stereo_events.py | 18 +++++------ 4 files changed, 33 insertions(+), 33 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py index 8def3618..27aeb362 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py @@ -172,8 +172,8 @@ def linking_bash_lst( f"#SBATCH --array=0-{process_size}\n", "#SBATCH --mem=8g\n", "#SBATCH -n 1\n\n", - f"#SBATCH --output={outputdir}/logs/slurm-%x.%j.out" - f"#SBATCH --error={outputdir}/logs/slurm-%x.%j.err" + f"#SBATCH --output={outputdir}/logs/slurm-%x.%A_%a.out" + f"#SBATCH --error={outputdir}/logs/slurm-%x.%A_%a.err" "ulimit -l unlimited\n", "ulimit -s unlimited\n", "ulimit -a\n\n", @@ -181,7 +181,7 @@ def linking_bash_lst( f"export OUTPUTDIR={outputdir}\n", "SAMPLE_LIST=($(<$OUTPUTDIR/logs/list_LST.txt))\n", "SAMPLE=${SAMPLE_LIST[${SLURM_ARRAY_TASK_ID}]}\n", - "export LOG=$OUTPUTDIR/logs/coincidence_${SLURM_JOB_ID}_${SLURM_ARRAY_TASK_ID}.log\n", + "export LOG=$OUTPUTDIR/logs/coincidence_${SLURM_ARRAY_JOB_ID}_${SLURM_ARRAY_TASK_ID}.log\n", f"conda run -n {env_name} lst1_magic_event_coincidence --input-file-lst $SAMPLE --input-dir-magic $INM --output-dir $OUTPUTDIR --config-file {target_dir}/v{__version__}/{source_name}/config_coincidence.yaml >$LOG 2>&1", ] with open( @@ -233,8 +233,8 @@ def linking_bash_lst( f"#SBATCH --array=0-{process_size}%50\n", "#SBATCH --mem=8g\n", "#SBATCH -n 1\n\n", - f"#SBATCH --output={nightLST}/slurm-%x.%j.out" - f"#SBATCH --error={nightLST}/slurm-%x.%j.err" + f"#SBATCH --output={nightLST}/slurm-%x.%A_%a.out" + f"#SBATCH --error={nightLST}/slurm-%x.%A_%a.err" "ulimit -l unlimited\n", "ulimit -s unlimited\n", "ulimit -a\n\n", @@ -242,7 +242,7 @@ def linking_bash_lst( f"export OUTPUTDIR={nightLST}\n", "SAMPLE_LIST=($(<$OUTPUTDIR/list_LST.txt))\n", "SAMPLE=${SAMPLE_LIST[${SLURM_ARRAY_TASK_ID}]}\n", - "export LOG=$OUTPUTDIR/coincidence_${SLURM_JOB_ID}_${SLURM_ARRAY_TASK_ID}.log\n", + "export LOG=$OUTPUTDIR/coincidence_${SLURM_ARRAY_JOB_ID}_${SLURM_ARRAY_TASK_ID}.log\n", f"conda run -n {env_name} lst1_magic_event_coincidence --input-file-lst $SAMPLE --input-dir-magic $INM --output-dir $OUTPUTDIR --config-file {target_dir}/{source_name}/config_coincidence.yaml >$LOG 2>&1", ] f.writelines(lines) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py index 5050d403..64cfbff2 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py @@ -423,15 +423,15 @@ def mergeMC(target_dir, identification, env_name, cwd, source_name): f"#SBATCH --array=0-{process_size}%50\n", "#SBATCH --mem=7g\n", "#SBATCH -n 1\n\n", - f"#SBATCH --output={MC_DL1_dir}/{identification}/Merged/slurm-%x.%j.out" - f"#SBATCH --error={MC_DL1_dir}/{identification}/Merged/slurm-%x.%j.err" + f"#SBATCH --output={MC_DL1_dir}/{identification}/Merged/slurm-%x.%A_%a.out" + f"#SBATCH --error={MC_DL1_dir}/{identification}/Merged/slurm-%x.%A_%a.err" "ulimit -l unlimited\n", "ulimit -s unlimited\n", "ulimit -a\n\n", f"SAMPLE_LIST=($(<{MC_DL1_dir}/{identification}/list_of_nodes.txt))\n", "SAMPLE=${SAMPLE_LIST[${SLURM_ARRAY_TASK_ID}]}\n", f"export LOG={MC_DL1_dir}/{identification}/Merged" - + "/merged_${SLURM_JOB_ID}_${SLURM_ARRAY_TASK_ID}.log\n", + + "/merged_${SLURM_ARRAY_JOB_ID}_${SLURM_ARRAY_TASK_ID}.log\n", f"conda run -n {env_name} merge_hdf_files --input-dir $SAMPLE --output-dir {MC_DL1_dir}/{identification}/Merged >$LOG 2>&1\n", ] f.writelines(lines_bash_file) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py index 4f2969e5..7ea67a4c 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py @@ -204,8 +204,8 @@ def lists_and_bash_generator( f"#SBATCH -J {process_name}\n", f"#SBATCH --array=0-{number_of_nodes}%50\n", "#SBATCH --mem=10g\n", - f"#SBATCH --output={target_dir}/{source_name}/DL1/MC/{particle_type}/slurm-%x.%j.out" - f"#SBATCH --error={target_dir}/{source_name}/DL1/MC/{particle_type}/slurm-%x.%j.err" + f"#SBATCH --output={target_dir}/{source_name}/DL1/MC/{particle_type}/slurm-%x.%A_%a.out" + f"#SBATCH --error={target_dir}/{source_name}/DL1/MC/{particle_type}/slurm-%x.%A_%a.err" "#SBATCH -n 1\n\n", "ulimit -l unlimited\n", "ulimit -s unlimited\n", @@ -216,7 +216,7 @@ def lists_and_bash_generator( "SAMPLE=${SAMPLE_LIST[${SLURM_ARRAY_TASK_ID}]}\n", "cd $SAMPLE\n\n", f"export LOG={target_dir}/{source_name}/DL1/MC/{particle_type}" - + "/simtel_{$SAMPLE}_${SLURM_JOB_ID}_${SLURM_ARRAY_TASK_ID}_all.log\n", + + "/simtel_{$SAMPLE}_${SLURM_ARRAY_JOB_ID}_${SLURM_ARRAY_TASK_ID}_all.log\n", "cat list_dl0_ok.txt | while read line\n", "do\n", f" cd {target_dir}/{source_name}/../\n", @@ -325,8 +325,8 @@ def lists_and_bash_gen_MAGIC( f"#SBATCH -J {process_name}\n", f"#SBATCH --array=0-{number_of_nodes}\n", "#SBATCH -n 1\n\n", - f"#SBATCH --output={target_dir}/v{__version__}/{source}/DL1/M2/{i[0]}/{i[1]}/logs/slurm-%x.%j.out" - f"#SBATCH --error={target_dir}/v{__version__}/{source}/DL1/M2/{i[0]}/{i[1]}/logs/slurm-%x.%j.err" + f"#SBATCH --output={target_dir}/v{__version__}/{source}/DL1/M2/{i[0]}/{i[1]}/logs/slurm-%x.%A_%a.out" + f"#SBATCH --error={target_dir}/v{__version__}/{source}/DL1/M2/{i[0]}/{i[1]}/logs/slurm-%x.%A_%a.err" "#SBATCH --mem 2g\n\n", "ulimit -l unlimited\n", "ulimit -s unlimited\n", @@ -334,7 +334,7 @@ def lists_and_bash_gen_MAGIC( f"export OUTPUTDIR={target_dir}/v{__version__}/{source}/DL1/M2/{i[0]}/{i[1]}\n", "SAMPLE_LIST=($(<$OUTPUTDIR/logs/list_dl0.txt))\n", "SAMPLE=${SAMPLE_LIST[${SLURM_ARRAY_TASK_ID}]}\n\n", - "export LOG=$OUTPUTDIR/logs/real_0_1_task_${SLURM_JOB_ID}_${SLURM_ARRAY_TASK_ID}.log\n", + "export LOG=$OUTPUTDIR/logs/real_0_1_task_${SLURM_ARRAY_JOB_ID}_${SLURM_ARRAY_TASK_ID}.log\n", f"conda run -n {env_name} magic_calib_to_dl1 --input-file $SAMPLE --output-dir $OUTPUTDIR --config-file {target_dir}/v{__version__}/{source}/config_DL0_to_DL1.yaml >$LOG 2>&1\n", ] with open(f"{source}_MAGIC-II_dl0_to_dl1_run_{i[1]}.sh", "w") as f: @@ -353,8 +353,8 @@ def lists_and_bash_gen_MAGIC( f"#SBATCH -J {process_name}\n", f"#SBATCH --array=0-{number_of_nodes}\n", "#SBATCH -n 1\n\n", - f"#SBATCH --output={target_dir}/v{__version__}/{source}/DL1/M1/{i[0]}/{i[1]}/logs/slurm-%x.%j.out" - f"#SBATCH --error={target_dir}/v{__version__}/{source}/DL1/M1/{i[0]}/{i[1]}/logs/slurm-%x.%j.err" + f"#SBATCH --output={target_dir}/v{__version__}/{source}/DL1/M1/{i[0]}/{i[1]}/logs/slurm-%x.%A_%a.out" + f"#SBATCH --error={target_dir}/v{__version__}/{source}/DL1/M1/{i[0]}/{i[1]}/logs/slurm-%x.%A_%a.err" "#SBATCH --mem 2g\n\n", "ulimit -l unlimited\n", "ulimit -s unlimited\n", @@ -362,7 +362,7 @@ def lists_and_bash_gen_MAGIC( f"export OUTPUTDIR={target_dir}/v{__version__}/{source}/DL1/M1/{i[0]}/{i[1]}\n", "SAMPLE_LIST=($(<$OUTPUTDIR/logs/list_dl0.txt))\n", "SAMPLE=${SAMPLE_LIST[${SLURM_ARRAY_TASK_ID}]}\n\n", - "export LOG=$OUTPUTDIR/logs/real_0_1_task_${SLURM_JOB_ID}_${SLURM_ARRAY_TASK_ID}.log\n", + "export LOG=$OUTPUTDIR/logs/real_0_1_task_${SLURM_ARRAY_JOB_ID}_${SLURM_ARRAY_TASK_ID}.log\n", f"conda run -n {env_name} magic_calib_to_dl1 --input-file $SAMPLE --output-dir $OUTPUTDIR --config-file {target_dir}/v{__version__}/{source}/config_DL0_to_DL1.yaml >$LOG 2>&1\n", ] with open(f"{source}_MAGIC-I_dl0_to_dl1_run_{i[1]}.sh", "w") as f: @@ -383,8 +383,8 @@ def lists_and_bash_gen_MAGIC( f"#SBATCH -J {process_name}\n", f"#SBATCH --array=0-{number_of_nodes}\n", "#SBATCH -n 1\n\n", - f"#SBATCH --output={target_dir}/{source}/DL1/Observations/M2/{i[0]}/{i[1]}/slurm-%x.%j.out" - f"#SBATCH --error={target_dir}/{source}/DL1/Observations/M2/{i[0]}/{i[1]}/slurm-%x.%j.err" + f"#SBATCH --output={target_dir}/{source}/DL1/Observations/M2/{i[0]}/{i[1]}/slurm-%x.%A_%a.out" + f"#SBATCH --error={target_dir}/{source}/DL1/Observations/M2/{i[0]}/{i[1]}/slurm-%x.%A_%a.err" "ulimit -l unlimited\n", "ulimit -s unlimited\n", "ulimit -a\n\n", @@ -392,7 +392,7 @@ def lists_and_bash_gen_MAGIC( f"cd {target_dir}/{source}/../\n", "SAMPLE_LIST=($(<$OUTPUTDIR/list_dl0.txt))\n", "SAMPLE=${SAMPLE_LIST[${SLURM_ARRAY_TASK_ID}]}\n\n", - "export LOG=$OUTPUTDIR/real_0_1_task_${SLURM_JOB_ID}_${SLURM_ARRAY_TASK_ID}.log\n", + "export LOG=$OUTPUTDIR/real_0_1_task_${SLURM_ARRAY_JOB_ID}_${SLURM_ARRAY_TASK_ID}.log\n", f"conda run -n {env_name} magic_calib_to_dl1 --input-file $SAMPLE --output-dir $OUTPUTDIR --config-file {target_dir}/{source}/config_DL0_to_DL1.yaml >$LOG 2>&1\n", "", ] @@ -411,8 +411,8 @@ def lists_and_bash_gen_MAGIC( f"#SBATCH -J {process_name}\n", f"#SBATCH --array=0-{number_of_nodes}\n", "#SBATCH -n 1\n\n", - f"#SBATCH --output={target_dir}/{source}/DL1/Observations/M1/{i[0]}/{i[1]}/slurm-%x.%j.out" - f"#SBATCH --error={target_dir}/{source}/DL1/Observations/M1/{i[0]}/{i[1]}/slurm-%x.%j.err" + f"#SBATCH --output={target_dir}/{source}/DL1/Observations/M1/{i[0]}/{i[1]}/slurm-%x.%A_%a.out" + f"#SBATCH --error={target_dir}/{source}/DL1/Observations/M1/{i[0]}/{i[1]}/slurm-%x.%A_%a.err" "ulimit -l unlimited\n", "ulimit -s unlimited\n", "ulimit -a\n\n", @@ -420,7 +420,7 @@ def lists_and_bash_gen_MAGIC( f"cd {target_dir}/{source}/../\n", "SAMPLE_LIST=($(<$OUTPUTDIR/list_dl0.txt))\n", "SAMPLE=${SAMPLE_LIST[${SLURM_ARRAY_TASK_ID}]}\n\n", - "export LOG=$OUTPUTDIR/real_0_1_task_${SLURM_JOB_ID}_${SLURM_ARRAY_TASK_ID}.log\n", + "export LOG=$OUTPUTDIR/real_0_1_task_${SLURM_ARRAY_JOB_ID}_${SLURM_ARRAY_TASK_ID}.log\n", f"conda run -n {env_name} magic_calib_to_dl1 --input-file $SAMPLE --output-dir $OUTPUTDIR --config-file {target_dir}/{source}/config_DL0_to_DL1.yaml >$LOG 2>&1\n", "", ] diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py index 6bd82f32..0cdb5938 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py @@ -110,8 +110,8 @@ def bash_stereo(target_dir, source, env_name, NSB_match): f"#SBATCH -J {process_name}_stereo\n", f"#SBATCH --array=0-{process_size}%100\n", "#SBATCH -n 1\n\n", - f"#SBATCH --output={stereoDir}/logs/slurm-%x.%j.out" - f"#SBATCH --error={stereoDir}/logs/slurm-%x.%j.err" + f"#SBATCH --output={stereoDir}/logs/slurm-%x.%A_%a.out" + f"#SBATCH --error={stereoDir}/logs/slurm-%x.%A_%a.err" "ulimit -l unlimited\n", "ulimit -s unlimited\n", "ulimit -a\n\n", @@ -119,7 +119,7 @@ def bash_stereo(target_dir, source, env_name, NSB_match): f"export OUTPUTDIR={stereoDir}\n", "SAMPLE_LIST=($(<$INPUTDIR/list_coin.txt))\n", "SAMPLE=${SAMPLE_LIST[${SLURM_ARRAY_TASK_ID}]}\n", - "export LOG=$OUTPUTDIR/stereo_${SLURM_JOB_ID}_${SLURM_ARRAY_TASK_ID}.log\n", + "export LOG=$OUTPUTDIR/stereo_${SLURM_ARRAY_JOB_ID}_${SLURM_ARRAY_TASK_ID}.log\n", f"conda run -n {env_name} lst1_magic_stereo_reco --input-file $SAMPLE --output-dir $OUTPUTDIR --config-file {target_dir}/{source}/config_stereo.yaml >$LOG 2>&1", ] f.writelines(lines) @@ -156,8 +156,8 @@ def bash_stereo(target_dir, source, env_name, NSB_match): f"#SBATCH -J {process_name}_stereo\n", f"#SBATCH --array=0-{process_size}\n", "#SBATCH -n 1\n\n", - f"#SBATCH --output={stereoDir}/logs/slurm-%x.%j.out" - f"#SBATCH --error={stereoDir}/logs/slurm-%x.%j.err" + f"#SBATCH --output={stereoDir}/logs/slurm-%x.%A_%a.out" + f"#SBATCH --error={stereoDir}/logs/slurm-%x.%A_%a.err" "ulimit -l unlimited\n", "ulimit -s unlimited\n", "ulimit -a\n\n", @@ -165,7 +165,7 @@ def bash_stereo(target_dir, source, env_name, NSB_match): f"export OUTPUTDIR={stereoDir}\n", "SAMPLE_LIST=($(<$OUTPUTDIR/logs/list_coin.txt))\n", "SAMPLE=${SAMPLE_LIST[${SLURM_ARRAY_TASK_ID}]}\n", - "export LOG=$OUTPUTDIR/logs/stereo_${SLURM_JOB_ID}_${SLURM_ARRAY_TASK_ID}.log\n", + "export LOG=$OUTPUTDIR/logs/stereo_${SLURM_ARRAY_JOB_ID}_${SLURM_ARRAY_TASK_ID}.log\n", f"conda run -n {env_name} lst1_magic_stereo_reco --input-file $SAMPLE --output-dir $OUTPUTDIR --config-file {target_dir}/v{__version__}/{source}/config_stereo.yaml >$LOG 2>&1", ] with open(f"{source}_StereoEvents_{nightLST.split('/')[-1]}.sh", "w") as f: @@ -212,8 +212,8 @@ def bash_stereoMC(target_dir, identification, env_name, source): f"#SBATCH --array=0-{process_size}%100\n", "#SBATCH --mem=8g\n", "#SBATCH -n 1\n\n", - f"#SBATCH --output={inputdir}/StereoMerged/logs/slurm-%x.%j.out" - f"#SBATCH --error={inputdir}/StereoMerged/logs/slurm-%x.%j.err" + f"#SBATCH --output={inputdir}/StereoMerged/logs/slurm-%x.%A_%a.out" + f"#SBATCH --error={inputdir}/StereoMerged/logs/slurm-%x.%A_%a.err" "ulimit -l unlimited\n", "ulimit -s unlimited\n", "ulimit -a\n\n", @@ -221,7 +221,7 @@ def bash_stereoMC(target_dir, identification, env_name, source): f"export OUTPUTDIR={inputdir}/StereoMerged\n", "SAMPLE_LIST=($(<$INPUTDIR/list_coin.txt))\n", "SAMPLE=${SAMPLE_LIST[${SLURM_ARRAY_TASK_ID}]}\n", - "export LOG=$OUTPUTDIR/stereo_${SLURM_JOB_ID}_${SLURM_ARRAY_TASK_ID}.log\n", + "export LOG=$OUTPUTDIR/stereo_${SLURM_ARRAY_JOB_ID}_${SLURM_ARRAY_TASK_ID}.log\n", f"conda run -n {env_name} lst1_magic_stereo_reco --input-file $SAMPLE --output-dir $OUTPUTDIR --config-file {target_dir}/{source}/config_stereo.yaml >$LOG 2>&1", ] f.writelines(lines) From 04e54d067d431c6e857680ceb37dc190009c6d6f Mon Sep 17 00:00:00 2001 From: Julian Sitarek Date: Wed, 8 May 2024 13:59:20 +0000 Subject: [PATCH 059/236] moved production of slurm script lines to a separate function --- .../semi_automatic_scripts/__init__.py | 2 + .../semi_automatic_scripts/clusters.py | 40 ++++++ .../setting_up_config_and_dir.py | 133 +++++++----------- 3 files changed, 95 insertions(+), 80 deletions(-) create mode 100644 magicctapipe/scripts/lst1_magic/semi_automatic_scripts/clusters.py diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/__init__.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/__init__.py index 1b4856d6..56df1adc 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/__init__.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/__init__.py @@ -1,3 +1,4 @@ +from .clusters import slurm_lines from .coincident_events import configfile_coincidence, linking_bash_lst from .merging_runs import cleaning, merge, mergeMC, split_train_test from .setting_up_config_and_dir import ( @@ -22,4 +23,5 @@ "configfile_stereo", "bash_stereo", "bash_stereoMC", + "slurm_lines", ] diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/clusters.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/clusters.py new file mode 100644 index 00000000..c8b8fb56 --- /dev/null +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/clusters.py @@ -0,0 +1,40 @@ +""" +Module for generating bash script lines for running analysis in different clusters +""" + + +def slurm_lines(p, J, array=None, mem=None, out_err=None): + """ + Function for creating the general lines that slurm scripts are starting with. + + Parameters + ---------- + p : str + Name of the queue + J : str + Job name + array : None or int + If not none array of jobs from 0 to array will be made + mem : None or str + Requested memory + out_err : None or str + If the output should be written to a specific output file + + Returns + ------- + list + List of strings + """ + lines = [ + "#!/bin/sh\n\n", + f"#SBATCH -p {p}\n", + f"#SBATCH -J {J}\n", + f"#SBATCH --array=0-{array}\n" if array is not None else "", + "#SBATCH -n 1\n\n", + f"#SBATCH --output={out_err}.out\n" if out_err is not None else "", + f"#SBATCH --error={out_err}.err\n\n" if out_err is not None else "", + "ulimit -l unlimited\n", + "ulimit -s unlimited\n", + "ulimit -a\n\n", + ] + return lines diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py index 7ea67a4c..c8df0330 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py @@ -27,6 +27,7 @@ from magicctapipe import __version__ from magicctapipe.io import resource_file +from magicctapipe.scripts.lst1_magic.semi_automatic_scripts.clusters import slurm_lines __all__ = [ "config_file_gen", @@ -163,16 +164,12 @@ def lists_and_bash_generator( #################################################################################### with open(f"linking_MC_{particle_type}_paths.sh", "w") as f: - lines_of_config_file = [ - "#!/bin/sh\n\n", - "#SBATCH -p short\n", - f"#SBATCH -J {process_name}\n\n", - "#SBATCH -n 1\n\n", - f"#SBATCH --output={target_dir}/{source_name}/DL1/MC/{particle_type}/slurm-linkMC-%x.%j.out" - f"#SBATCH --error={target_dir}/{source_name}/DL1/MC/{particle_type}/slurm-linkMC-%x.%j.err" - "ulimit -l unlimited\n", - "ulimit -s unlimited\n", - "ulimit -a\n\n", + slurm = slurm_lines( + p="short", + J=process_name, + out_err=f"{target_dir}/{source_name}/DL1/MC/{particle_type}/slurm-linkMC-%x.%j", + ) + lines_of_config_file = slurm + [ "while read -r -u 3 lineA && read -r -u 4 lineB\n", "do\n", f" cd {target_dir}/{source_name}/DL1/MC/{particle_type}\n", @@ -198,18 +195,14 @@ def lists_and_bash_generator( number_of_nodes = len(number_of_nodes) - 1 with open(f"linking_MC_{particle_type}_paths_r.sh", "w") as f: - lines_of_config_file = [ - "#!/bin/sh\n\n", - "#SBATCH -p xxl\n", - f"#SBATCH -J {process_name}\n", - f"#SBATCH --array=0-{number_of_nodes}%50\n", - "#SBATCH --mem=10g\n", - f"#SBATCH --output={target_dir}/{source_name}/DL1/MC/{particle_type}/slurm-%x.%A_%a.out" - f"#SBATCH --error={target_dir}/{source_name}/DL1/MC/{particle_type}/slurm-%x.%A_%a.err" - "#SBATCH -n 1\n\n", - "ulimit -l unlimited\n", - "ulimit -s unlimited\n", - "ulimit -a\n", + slurm = slurm_lines( + p="xxl", + J=process_name, + array=number_of_nodes, + mem="10g", + out_err=f"{target_dir}/{source_name}/DL1/MC/{particle_type}/slurm-%x.%A_%a", + ) + lines_of_config_file = slurm + [ f"cd {target_dir}/{source_name}/DL1/MC/{particle_type}\n\n", f"export INF={target_dir}/{source_name}\n", f"SAMPLE_LIST=($(<$INF/list_folder_{particle_type}.txt))\n", @@ -250,17 +243,12 @@ def lists_and_bash_gen_MAGIC( If real data are matched to pre-processed MCs or not """ process_name = source - lines = [ - "#!/bin/sh\n\n", - "#SBATCH -p short\n", - f"#SBATCH -J {process_name}\n", - "#SBATCH -n 1\n\n", - f"#SBATCH --output={target_dir}/v{__version__}/{source}/DL1/slurm-linkMAGIC-%x.%j.out" - f"#SBATCH --error={target_dir}/v{__version__}/{source}/DL1/slurm-linkMAGIC-%x.%j.err" - "ulimit -l unlimited\n", - "ulimit -s unlimited\n", - "ulimit -a\n", - ] + lines = slurm_lines( + p="short", + J=process_name, + out_err=f"{target_dir}/v{__version__}/{source}/DL1/slurm-linkMAGIC-%x.%j", + ) + with open(f"{source}_linking_MAGIC_data_paths.sh", "w") as f: f.writelines(lines) if NSB_match: @@ -319,18 +307,14 @@ def lists_and_bash_gen_MAGIC( number_of_nodes = len(number_of_nodes) - 1 if number_of_nodes < 0: continue - lines = [ - "#!/bin/sh\n\n", - "#SBATCH -p short\n", - f"#SBATCH -J {process_name}\n", - f"#SBATCH --array=0-{number_of_nodes}\n", - "#SBATCH -n 1\n\n", - f"#SBATCH --output={target_dir}/v{__version__}/{source}/DL1/M2/{i[0]}/{i[1]}/logs/slurm-%x.%A_%a.out" - f"#SBATCH --error={target_dir}/v{__version__}/{source}/DL1/M2/{i[0]}/{i[1]}/logs/slurm-%x.%A_%a.err" - "#SBATCH --mem 2g\n\n", - "ulimit -l unlimited\n", - "ulimit -s unlimited\n", - "ulimit -a\n\n", + slurm = slurm_lines( + p="short", + J=process_name, + array=number_of_nodes, + mem="2g", + out_err=f"{target_dir}/v{__version__}/{source}/DL1/M2/{i[0]}/{i[1]}/logs/slurm-%x.%A_%a", + ) + lines = slurm + [ f"export OUTPUTDIR={target_dir}/v{__version__}/{source}/DL1/M2/{i[0]}/{i[1]}\n", "SAMPLE_LIST=($(<$OUTPUTDIR/logs/list_dl0.txt))\n", "SAMPLE=${SAMPLE_LIST[${SLURM_ARRAY_TASK_ID}]}\n\n", @@ -347,18 +331,14 @@ def lists_and_bash_gen_MAGIC( number_of_nodes = len(number_of_nodes) - 1 if number_of_nodes < 0: continue - lines = [ - "#!/bin/sh\n\n", - "#SBATCH -p short\n", - f"#SBATCH -J {process_name}\n", - f"#SBATCH --array=0-{number_of_nodes}\n", - "#SBATCH -n 1\n\n", - f"#SBATCH --output={target_dir}/v{__version__}/{source}/DL1/M1/{i[0]}/{i[1]}/logs/slurm-%x.%A_%a.out" - f"#SBATCH --error={target_dir}/v{__version__}/{source}/DL1/M1/{i[0]}/{i[1]}/logs/slurm-%x.%A_%a.err" - "#SBATCH --mem 2g\n\n", - "ulimit -l unlimited\n", - "ulimit -s unlimited\n", - "ulimit -a\n\n", + slurm = slurm_lines( + p="short", + J=process_name, + array=number_of_nodes, + mem="2g", + out_err=f"{target_dir}/v{__version__}/{source}/DL1/M1/{i[0]}/{i[1]}/logs/slurm-%x.%A_%a", + ) + lines = slurm + [ f"export OUTPUTDIR={target_dir}/v{__version__}/{source}/DL1/M1/{i[0]}/{i[1]}\n", "SAMPLE_LIST=($(<$OUTPUTDIR/logs/list_dl0.txt))\n", "SAMPLE=${SAMPLE_LIST[${SLURM_ARRAY_TASK_ID}]}\n\n", @@ -377,17 +357,14 @@ def lists_and_bash_gen_MAGIC( number_of_nodes = len(number_of_nodes) - 1 with open(f"{source}_MAGIC-II_dl0_to_dl1_run_{i[1]}.sh", "w") as f: - lines = [ - "#!/bin/sh\n\n", - "#SBATCH -p long\n", - f"#SBATCH -J {process_name}\n", - f"#SBATCH --array=0-{number_of_nodes}\n", - "#SBATCH -n 1\n\n", - f"#SBATCH --output={target_dir}/{source}/DL1/Observations/M2/{i[0]}/{i[1]}/slurm-%x.%A_%a.out" - f"#SBATCH --error={target_dir}/{source}/DL1/Observations/M2/{i[0]}/{i[1]}/slurm-%x.%A_%a.err" - "ulimit -l unlimited\n", - "ulimit -s unlimited\n", - "ulimit -a\n\n", + slurm = slurm_lines( + p="long", + J=process_name, + array=number_of_nodes, + mem="2g", + out_err=f"{target_dir}/{source}/DL1/Observations/M2/{i[0]}/{i[1]}/slurm-%x.%A_%a", + ) + lines = slurm + [ f"export OUTPUTDIR={target_dir}/{source}/DL1/Observations/M2/{i[0]}/{i[1]}\n", f"cd {target_dir}/{source}/../\n", "SAMPLE_LIST=($(<$OUTPUTDIR/list_dl0.txt))\n", @@ -405,17 +382,14 @@ def lists_and_bash_gen_MAGIC( number_of_nodes = len(number_of_nodes) - 1 with open(f"{source}_MAGIC-I_dl0_to_dl1_run_{i[1]}.sh", "w") as f: - lines = [ - "#!/bin/sh\n\n", - "#SBATCH -p long\n", - f"#SBATCH -J {process_name}\n", - f"#SBATCH --array=0-{number_of_nodes}\n", - "#SBATCH -n 1\n\n", - f"#SBATCH --output={target_dir}/{source}/DL1/Observations/M1/{i[0]}/{i[1]}/slurm-%x.%A_%a.out" - f"#SBATCH --error={target_dir}/{source}/DL1/Observations/M1/{i[0]}/{i[1]}/slurm-%x.%A_%a.err" - "ulimit -l unlimited\n", - "ulimit -s unlimited\n", - "ulimit -a\n\n", + slurm = slurm_lines( + p="long", + J=process_name, + array=number_of_nodes, + mem="2g", + out_err=f"{target_dir}/{source}/DL1/Observations/M1/{i[0]}/{i[1]}/slurm-%x.%A_%a", + ) + lines = slurm + [ f"export OUTPUTDIR={target_dir}/{source}/DL1/Observations/M1/{i[0]}/{i[1]}\n", f"cd {target_dir}/{source}/../\n", "SAMPLE_LIST=($(<$OUTPUTDIR/list_dl0.txt))\n", @@ -702,7 +676,6 @@ def main(): launch_jobs_MC = f"linking{n}=$(sbatch --parsable {run}) && running{n}=$(sbatch --parsable --dependency=afterany:$linking{n} {run[0:-3]}_r.sh)" else: launch_jobs_MC = f"{launch_jobs_MC} && linking{n}=$(sbatch --parsable {run}) && running{n}=$(sbatch --parsable --dependency=afterany:$linking{n} {run[0:-3]}_r.sh)" - os.system(launch_jobs_MC) # Below we run the analysis on the MAGIC data From 27f234c3966de3db74b391025965ebaf828d0c01 Mon Sep 17 00:00:00 2001 From: Julian Sitarek Date: Wed, 8 May 2024 14:02:25 +0000 Subject: [PATCH 060/236] added acccidently removed new line --- .../semi_automatic_scripts/setting_up_config_and_dir.py | 1 + 1 file changed, 1 insertion(+) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py index c8df0330..d88a61c8 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py @@ -676,6 +676,7 @@ def main(): launch_jobs_MC = f"linking{n}=$(sbatch --parsable {run}) && running{n}=$(sbatch --parsable --dependency=afterany:$linking{n} {run[0:-3]}_r.sh)" else: launch_jobs_MC = f"{launch_jobs_MC} && linking{n}=$(sbatch --parsable {run}) && running{n}=$(sbatch --parsable --dependency=afterany:$linking{n} {run[0:-3]}_r.sh)" + os.system(launch_jobs_MC) # Below we run the analysis on the MAGIC data From 151d55b30fe3cd0a9f36384104fea94309ea8251 Mon Sep 17 00:00:00 2001 From: Julian Sitarek Date: Wed, 8 May 2024 14:25:49 +0000 Subject: [PATCH 061/236] using bash slurm lines generated in clusters.py --- .../coincident_events.py | 41 ++++++-------- .../semi_automatic_scripts/merging_runs.py | 55 +++++++------------ 2 files changed, 37 insertions(+), 59 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py index 27aeb362..2864d684 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py @@ -25,6 +25,7 @@ import yaml from magicctapipe import __version__ +from magicctapipe.scripts.lst1_magic.semi_automatic_scripts.clusters import slurm_lines __all__ = ["configfile_coincidence", "linking_bash_lst"] @@ -165,18 +166,14 @@ def linking_bash_lst( if process_size < 0: continue - lines = [ - "#!/bin/sh\n\n", - "#SBATCH -p short\n", - f"#SBATCH -J {source_name}_coincidence\n", - f"#SBATCH --array=0-{process_size}\n", - "#SBATCH --mem=8g\n", - "#SBATCH -n 1\n\n", - f"#SBATCH --output={outputdir}/logs/slurm-%x.%A_%a.out" - f"#SBATCH --error={outputdir}/logs/slurm-%x.%A_%a.err" - "ulimit -l unlimited\n", - "ulimit -s unlimited\n", - "ulimit -a\n\n", + slurm = slurm_lines( + p="short", + J=f"{source_name}_coincidence", + array=process_size, + mem="8g", + out_err=f"{outputdir}/logs/slurm-%x.%A_%a", + ) + lines = slurm + [ f"export INM={MAGIC_DL1_dir}/Merged/Merged_{str(Y_M).zfill(4)}_{str(M_M).zfill(2)}_{str(D_M).zfill(2)}\n", f"export OUTPUTDIR={outputdir}\n", "SAMPLE_LIST=($(<$OUTPUTDIR/logs/list_LST.txt))\n", @@ -226,18 +223,14 @@ def linking_bash_lst( process_size = len(f.readlines()) - 1 with open(f"LST_coincident_{nightLST.split('/')[-1]}.sh", "w") as f: - lines = [ - "#!/bin/sh\n\n", - "#SBATCH -p short\n", - f"#SBATCH -J {process_name}_coincidence\n", - f"#SBATCH --array=0-{process_size}%50\n", - "#SBATCH --mem=8g\n", - "#SBATCH -n 1\n\n", - f"#SBATCH --output={nightLST}/slurm-%x.%A_%a.out" - f"#SBATCH --error={nightLST}/slurm-%x.%A_%a.err" - "ulimit -l unlimited\n", - "ulimit -s unlimited\n", - "ulimit -a\n\n", + slurm = slurm_lines( + p="short", + J=f"{process_name}_coincidence", + array=process_size, + mem="8g", + out_err=f"{nightLST}/slurm-%x.%A_%a", + ) + lines = slurm + [ f"export INM={nightMAGIC}\n", f"export OUTPUTDIR={nightLST}\n", "SAMPLE_LIST=($(<$OUTPUTDIR/list_LST.txt))\n", diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py index 64cfbff2..77977d67 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py @@ -40,6 +40,7 @@ from tqdm import tqdm from magicctapipe import __version__ +from magicctapipe.scripts.lst1_magic.semi_automatic_scripts.clusters import slurm_lines __all__ = ["cleaning", "split_train_test", "merge", "mergeMC"] @@ -160,17 +161,11 @@ def merge(target_dir, identification, MAGIC_runs, env_name, source, NSB_match): os.mkdir(f"{MAGIC_DL1_dir}/Merged") with open(f"{source}_Merge_MAGIC_{identification}.sh", "w") as f: - lines = [ - "#!/bin/sh\n\n", - "#SBATCH -p short\n", - f"#SBATCH -J {process_name}\n", - "#SBATCH -n 1\n\n", - f"#SBATCH --output={MAGIC_DL1_dir}/Merged/slurm-%x.%j.out" - f"#SBATCH --error={MAGIC_DL1_dir}/Merged/slurm-%x.%j.err" - "ulimit -l unlimited\n", - "ulimit -s unlimited\n", - "ulimit -a\n\n", - ] + lines = slurm_lines( + p="short", + J=process_name, + out_err=f"{MAGIC_DL1_dir}/Merged/slurm-%x.%j", + ) f.writelines(lines) if identification == "0_subruns": @@ -243,18 +238,12 @@ def merge(target_dir, identification, MAGIC_runs, env_name, source, NSB_match): ): if not os.path.exists(f"{MAGIC_DL1_dir}/Merged"): os.mkdir(f"{MAGIC_DL1_dir}/Merged") - lines = [ - "#!/bin/sh\n\n", - "#SBATCH -p short\n", - f"#SBATCH -J {process_name}\n", - "#SBATCH -n 1\n\n", - f"#SBATCH --output={MAGIC_DL1_dir}/Merged/slurm-%x.%j.out" - f"#SBATCH --error={MAGIC_DL1_dir}/Merged/slurm-%x.%j.err" - "#SBATCH --mem 2g\n\n", - "ulimit -l unlimited\n", - "ulimit -s unlimited\n", - "ulimit -a\n\n", - ] + lines = slurm_lines( + p="short", + J=process_name, + mem="2g", + out_err=f"{MAGIC_DL1_dir}/Merged/slurm-%x.%j", + ) with open(f"{source}_Merge_MAGIC_{identification}.sh", "w") as f: f.writelines(lines) if identification == "0_subruns": @@ -416,18 +405,14 @@ def mergeMC(target_dir, identification, env_name, cwd, source_name): cleaning(list_of_nodes, cwd) # This will delete the (possibly) failed runs. with open(f"Merge_MC_{identification}.sh", "w") as f: - lines_bash_file = [ - "#!/bin/sh\n\n", - "#SBATCH -p short\n", - f"#SBATCH -J {process_name}\n", - f"#SBATCH --array=0-{process_size}%50\n", - "#SBATCH --mem=7g\n", - "#SBATCH -n 1\n\n", - f"#SBATCH --output={MC_DL1_dir}/{identification}/Merged/slurm-%x.%A_%a.out" - f"#SBATCH --error={MC_DL1_dir}/{identification}/Merged/slurm-%x.%A_%a.err" - "ulimit -l unlimited\n", - "ulimit -s unlimited\n", - "ulimit -a\n\n", + slurm = slurm_lines( + p="short", + array=process_size, + mem="7g", + J=process_name, + out_err=f"{MC_DL1_dir}/{identification}/Merged/slurm-%x.%A_%a", + ) + lines_bash_file = slurm + [ f"SAMPLE_LIST=($(<{MC_DL1_dir}/{identification}/list_of_nodes.txt))\n", "SAMPLE=${SAMPLE_LIST[${SLURM_ARRAY_TASK_ID}]}\n", f"export LOG={MC_DL1_dir}/{identification}/Merged" From 76ed2e2af28c161fb6290cd595b17eda42ec7311 Mon Sep 17 00:00:00 2001 From: Elisa-Visentin Date: Wed, 8 May 2024 14:51:04 +0000 Subject: [PATCH 062/236] Database updates --- .../database_production/create_LST_table.py | 22 +++++--- .../database_production/lstchain_version.py | 53 +++++++++++++------ .../database_production/nsb_level.py | 35 ++++++++++-- .../database_production/nsb_to_h5.py | 19 ++++--- .../semi_automatic_scripts/list_from_h5.py | 9 ++-- 5 files changed, 99 insertions(+), 39 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/create_LST_table.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/create_LST_table.py index b713dfde..f7211932 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/create_LST_table.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/create_LST_table.py @@ -34,13 +34,18 @@ def main(): ] df_cut = df[needed_cols] - df_cut["nsb"] = np.repeat(np.nan, len(df_cut)) - - df_cut["lstchain_0.9"] = np.zeros(len(df_cut), dtype=bool) - - df_cut["lstchain_0.10"] = np.zeros(len(df_cut), dtype=bool) - - df_cut["error_code"] = np.repeat(np.nan, len(df_cut)) + df_cut=df_cut.assign(nsb = np.nan) + df_cut=df_cut.assign(lstchain_versions = '[]') + df_cut=df_cut.assign(last_lstchain_file = '') + df_cut=df_cut.assign(processed_lstchain_file = '') + df_cut=df_cut.assign(error_code_nsb = -1) + + df_cut=df_cut.assign(error_code_coincidence = -1) + df_cut=df_cut.assign(error_code_stereo = -1) + + + + if os.path.isfile( "/fefs/aswg/workspace/elisa.visentin/auto_MCP_PR/observations_LST.h5" ): @@ -53,11 +58,12 @@ def main(): ) df_cut = df_cut.sort_values(by=["DATE", "source"]) # TODO check if fine with update and nsb - + df_cut=df_cut.reset_index(drop=True) df_cut.to_hdf( "/fefs/aswg/workspace/elisa.visentin/auto_MCP_PR/observations_LST.h5", key="joint_obs", mode="w", + min_itemsize={'lstchain_versions':20, 'last_lstchain_file':90,'processed_lstchain_file':90} ) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/lstchain_version.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/lstchain_version.py index 7386d66a..3a2a4b74 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/lstchain_version.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/lstchain_version.py @@ -6,7 +6,9 @@ import os import pandas as pd - +import numpy as np +import glob +from string import ascii_letters def main(): @@ -18,31 +20,48 @@ def main(): "/fefs/aswg/workspace/elisa.visentin/auto_MCP_PR/observations_LST.h5", key="joint_obs", ) + + for i, row in df_LST.iterrows(): - lst_9 = False - lst_10 = False + version=[] run = row["LST1_run"] run = format(int(run), "05d") date = row["DATE"] - - if os.path.isfile( - f"/fefs/aswg/data/real/DL1/{date}/v0.9/tailcut84/dl1_LST-1.Run{run}.h5" - ): - lst_9 = True - if os.path.isfile( - f"/fefs/aswg/data/real/DL1/{date}/v0.10/tailcut84/dl1_LST-1.Run{run}.h5" - ): - lst_10 = True - if (lst_9 == False) and (lst_10 == False): - df_LST.at[i, "error_code"] = "002" - df_LST.at[i, "lstchain_0.9"] = lst_9 - df_LST.at[i, "lstchain_0.10"] = lst_10 - + directories_version=[i.split('/')[-1] for i in glob.glob(f"/fefs/aswg/data/real/DL1/{date}/v*")] + + v_number=np.sort([float(i.replace('v0.','').rstrip(ascii_letters).split('_')[0]) for i in directories_version]).tolist() + + + + v_number=[str(i).replace('.0','') for i in v_number] + + for vers in v_number: + + if os.path.isfile( + f"/fefs/aswg/data/real/DL1/{date}/v0.{vers}/tailcut84/dl1_LST-1.Run{run}.h5" + ): + if vers not in version: + version.append(vers) + + version=list(version) + + + version=[f'v0.{i}'for i in version] + + if len(version)>0: + + df_LST.loc[i,'last_lstchain_file']=f"/fefs/aswg/data/real/DL1/{date}/{version[-1]}/tailcut84/dl1_LST-1.Run{run}.h5" + else: + df_LST.loc[i,'last_lstchain_file']=f"/fefs/aswg/data/real/DL1/{date}/{version}/tailcut84/dl1_LST-1.Run{run}.h5" + + df_LST.loc[i, "lstchain_versions"] = str(version) + df_LST.to_hdf( "/fefs/aswg/workspace/elisa.visentin/auto_MCP_PR/observations_LST.h5", key="joint_obs", mode="w", + min_itemsize={'lstchain_versions':20, 'last_lstchain_file':90,'processed_lstchain_file':90} ) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/nsb_level.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/nsb_level.py index 7ae1e7cc..dfc70ec3 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/nsb_level.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/nsb_level.py @@ -96,6 +96,9 @@ def main(): "/fefs/aswg/workspace/elisa.visentin/auto_MCP_PR/observations_LST.h5", key="joint_obs", ) + lstchain_version=config["general"]["LST_version"] + + min = datetime.strptime(args.begin_date, "%Y_%m_%d") max = datetime.strptime(args.end_date, "%Y_%m_%d") @@ -105,11 +108,17 @@ def main(): df_LST = df_LST[df_LST["date"] <= max] df_LST = df_LST.drop(columns="date") - run_LST = df_LST["LST1_run"] - date_LST = df_LST["DATE"] + print("***** Generating bashscripts...") - for run_number, date in zip(run_LST, date_LST): + for i, row in df_LST.iterrows(): + if lstchain_version not in str(row['lstchain_versions'].replace(']','').replace('[','').split(',')): + continue + run_number = row["LST1_run"] + date = row["DATE"] + df_LST.loc[i,'processed_lstchain_file']=f"/fefs/aswg/data/real/DL1/{date}/{lstchain_version}/tailcut84/dl1_LST-1.Run{run_number}.h5" + df_LST.loc[i,'error_code_nsb']=np.nan bash_scripts(run_number, date, args.config_file, env_name) + print("Process name: nsb") print("To check the jobs submitted to the cluster, type: squeue -n nsb") list_of_bash_scripts = np.sort(glob.glob("nsb_*_run_*.sh")) @@ -118,7 +127,23 @@ def main(): print( "Warning: no bash script has been produced to evaluate the NSB level for the provided LST runs. Please check the input list" ) - return + return print("Update database and launch jobs") + df_old = pd.read_hdf( + "/fefs/aswg/workspace/elisa.visentin/auto_MCP_PR/observations_LST.h5", + key="joint_obs", + ) + df_LST = pd.concat([df_LST, df_old]).drop_duplicates( + subset="LST1_run", keep="first" + ) + df_LST = df_LST.sort_values(by=["DATE", "source","LST1_run"]) + + + df_LST.to_hdf( + "/fefs/aswg/workspace/elisa.visentin/auto_MCP_PR/observations_LST.h5", + key="joint_obs", + mode="w", + min_itemsize={'lstchain_versions':20, 'last_lstchain_file':90,'processed_lstchain_file':90} + ) for n, run in enumerate(list_of_bash_scripts): if n == 0: launch_jobs = f"nsb{n}=$(sbatch --parsable {run})" @@ -126,6 +151,8 @@ def main(): launch_jobs = f"{launch_jobs} && nsb{n}=$(sbatch --parsable {run})" os.system(launch_jobs) + + if __name__ == "__main__": diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/nsb_to_h5.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/nsb_to_h5.py index 2aec74cd..c83380b0 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/nsb_to_h5.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/nsb_to_h5.py @@ -60,10 +60,12 @@ def main(): df_new = df_new.sort_values(by=["DATE", "source", "LST1_run"]) - df_new["error_code"] = df_new["error_code"].replace("000", np.nan) - - df_new["error_code"] = np.where(df_new["nsb"] <= 3.0, df_new["error_code"], "001") - df_new["error_code"] = np.where(df_new["nsb"].notna(), df_new["error_code"], "000") + df_new.loc[df_new['error_code_nsb'].isna(),"error_code_nsb"] = '1' + + df_new.loc[df_new['nsb'].notna(),"error_code_nsb"] = '0' + df_new.loc[df_new['nsb'] > 3.0,"error_code_nsb"] = '2' + + df_new = df_new[ [ "source", @@ -73,9 +75,12 @@ def main(): "MAGIC_trigger", "MAGIC_HV", "nsb", - "lstchain_0.9", - "lstchain_0.10", - "error_code", + "lstchain_versions", + "last_lstchain_file", + "processed_lstchain_file", + "error_code_nsb", + "error_code_coincidence", + "error_code_stereo", ] ] df_new.to_hdf( diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/list_from_h5.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/list_from_h5.py index 27db6811..e889dc5d 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/list_from_h5.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/list_from_h5.py @@ -210,21 +210,24 @@ def main(): "/fefs/aswg/workspace/elisa.visentin/auto_MCP_PR/observations_LST.h5", key="joint_obs", ) # TODO: put this file in a shared folder - + df_LST(subset=["LST1_run"], inplace=True) df_LST = split_lst_date(df_LST) df_LST = df_LST.astype( {"YY_LST": int, "MM_LST": int, "DD_LST": int, "nsb": float, "LST1_run": int} ) stereo = True + lstchain_version=config["general"]["LST_version"] + mask=(df_LST['processed_lstchain_file'].str.split('/')[-3]==lstchain_version) + df_LST=df_LST[mask] if source_in is None: df_LST.query( - f'MAGIC_trigger=="L3T" & MAGIC_HV=="Nominal" & MAGIC_stereo == {stereo} & nsb <=3.0 & error_code.isnull()', + f'MAGIC_trigger=="L3T" & MAGIC_HV=="Nominal" & MAGIC_stereo == {stereo} & nsb <=3.0 & error_code_nsb==0', inplace=True, ) else: df_LST.query( - f'source=="{source_in}"& MAGIC_trigger=="L3T" & MAGIC_HV=="Nominal" & MAGIC_stereo == {stereo} & nsb <=3.0 & error_code.isnull()', + f'source=="{source_in}"& MAGIC_trigger=="L3T" & MAGIC_HV=="Nominal" & MAGIC_stereo == {stereo} & nsb <=3.0 & error_code_nsb==0', inplace=True, ) From 32f770ad2f891fd7eff241a2187731efcbc345c9 Mon Sep 17 00:00:00 2001 From: Julian Sitarek Date: Wed, 8 May 2024 16:30:30 +0000 Subject: [PATCH 063/236] added missing --mem line --- .../scripts/lst1_magic/semi_automatic_scripts/clusters.py | 1 + 1 file changed, 1 insertion(+) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/clusters.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/clusters.py index c8b8fb56..0eb08483 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/clusters.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/clusters.py @@ -30,6 +30,7 @@ def slurm_lines(p, J, array=None, mem=None, out_err=None): f"#SBATCH -p {p}\n", f"#SBATCH -J {J}\n", f"#SBATCH --array=0-{array}\n" if array is not None else "", + f"#SBATCH --mem {mem}\n" if mem is not None else "", "#SBATCH -n 1\n\n", f"#SBATCH --output={out_err}.out\n" if out_err is not None else "", f"#SBATCH --error={out_err}.err\n\n" if out_err is not None else "", From 6adee2a8decc4a968d3b1f6e865000d82153b39b Mon Sep 17 00:00:00 2001 From: Elisa-Visentin Date: Thu, 9 May 2024 10:48:10 +0000 Subject: [PATCH 064/236] fixed search for lstchain (last) versions --- .../database_production/lstchain_version.py | 36 ++++++++++--------- 1 file changed, 19 insertions(+), 17 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/lstchain_version.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/lstchain_version.py index 3a2a4b74..8673978a 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/lstchain_version.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/lstchain_version.py @@ -8,7 +8,8 @@ import pandas as pd import numpy as np import glob -from string import ascii_letters + +lstchain_versions=['v0.9','v0.10'] def main(): @@ -30,33 +31,34 @@ def main(): date = row["DATE"] directories_version=[i.split('/')[-1] for i in glob.glob(f"/fefs/aswg/data/real/DL1/{date}/v*")] - v_number=np.sort([float(i.replace('v0.','').rstrip(ascii_letters).split('_')[0]) for i in directories_version]).tolist() - - - v_number=[str(i).replace('.0','') for i in v_number] + - for vers in v_number: + for vers in directories_version: if os.path.isfile( - f"/fefs/aswg/data/real/DL1/{date}/v0.{vers}/tailcut84/dl1_LST-1.Run{run}.h5" + f"/fefs/aswg/data/real/DL1/{date}/{vers}/tailcut84/dl1_LST-1.Run{run}.h5" ): if vers not in version: version.append(vers) - + version=list(version) + df_LST.loc[i, "lstchain_versions"] = str(version) + max_version=None - - version=[f'v0.{i}'for i in version] - - if len(version)>0: + for j in range(len(lstchain_versions)): + - df_LST.loc[i,'last_lstchain_file']=f"/fefs/aswg/data/real/DL1/{date}/{version[-1]}/tailcut84/dl1_LST-1.Run{run}.h5" - else: - df_LST.loc[i,'last_lstchain_file']=f"/fefs/aswg/data/real/DL1/{date}/{version}/tailcut84/dl1_LST-1.Run{run}.h5" + if lstchain_versions[j] in version: + + max_version=lstchain_versions[j] - df_LST.loc[i, "lstchain_versions"] = str(version) - + if max_version is None: + raise ValueError('issue with lstchain versions') + name=f"/fefs/aswg/data/real/DL1/{date}/{max_version}/tailcut84/dl1_LST-1.Run{run}.h5" + + df_LST.loc[i,'last_lstchain_file']= name + df_LST.to_hdf( "/fefs/aswg/workspace/elisa.visentin/auto_MCP_PR/observations_LST.h5", key="joint_obs", From e49b9a08f884508f8e90969e1ed9da1a8765f80c Mon Sep 17 00:00:00 2001 From: Elisa-Visentin Date: Thu, 9 May 2024 10:54:42 +0000 Subject: [PATCH 065/236] fix setup --- setup.cfg | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.cfg b/setup.cfg index e0fb4723..00d2074f 100644 --- a/setup.cfg +++ b/setup.cfg @@ -97,9 +97,9 @@ console_scripts = merge_hdf_files = magicctapipe.scripts.lst1_magic.merge_hdf_files:main coincident_events = magicctapipe.scripts.lst1_magic.semi_automatic_scripts.coincident_events:main list_from_h5 = magicctapipe.scripts.lst1_magic.semi_automatic_scripts.list_from_h5:main - LSTnsb = magicctapipe.scripts.lst1_magic.semi_automatic_scripts.LSTnsb:main + LSTnsb = magicctapipe.scripts.lst1_magic.semi_automatic_scripts.database_production.LSTnsb:main merging_runs = magicctapipe.scripts.lst1_magic.semi_automatic_scripts.merging_runs:main - nsb_level = magicctapipe.scripts.lst1_magic.semi_automatic_scripts.nsb_level:main + nsb_level = magicctapipe.scripts.lst1_magic.semi_automatic_scripts.database_production.nsb_level:main setting_up_config_and_dir = magicctapipe.scripts.lst1_magic.semi_automatic_scripts.setting_up_config_and_dir:main stereo_events = magicctapipe.scripts.lst1_magic.semi_automatic_scripts.stereo_events:main From f0549b259b65049574d5fc608c4318e53b1fc155 Mon Sep 17 00:00:00 2001 From: Julian Sitarek Date: Thu, 9 May 2024 19:53:13 +0000 Subject: [PATCH 066/236] added simple error tracking and removed duplication of code every job that ends is storing its return code into a log file with an additional copy to errors file if return is != 0 --- .../setting_up_config_and_dir.py | 57 +++++++------------ 1 file changed, 19 insertions(+), 38 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py index d88a61c8..4e9a306a 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py @@ -296,37 +296,12 @@ def lists_and_bash_gen_MAGIC( ] f.writelines(lines) if NSB_match: - - if (telescope_ids[-2] > 0) or (telescope_ids[-1] > 0): - for i in MAGIC_runs: - - if telescope_ids[-1] > 0: - number_of_nodes = glob.glob( - f'/fefs/onsite/common/MAGIC/data/M2/event/Calibrated/{i[0].split("_")[0]}/{i[0].split("_")[1]}/{i[0].split("_")[2]}/*{i[1]}.*_Y_*.root' - ) - number_of_nodes = len(number_of_nodes) - 1 - if number_of_nodes < 0: - continue - slurm = slurm_lines( - p="short", - J=process_name, - array=number_of_nodes, - mem="2g", - out_err=f"{target_dir}/v{__version__}/{source}/DL1/M2/{i[0]}/{i[1]}/logs/slurm-%x.%A_%a", - ) - lines = slurm + [ - f"export OUTPUTDIR={target_dir}/v{__version__}/{source}/DL1/M2/{i[0]}/{i[1]}\n", - "SAMPLE_LIST=($(<$OUTPUTDIR/logs/list_dl0.txt))\n", - "SAMPLE=${SAMPLE_LIST[${SLURM_ARRAY_TASK_ID}]}\n\n", - "export LOG=$OUTPUTDIR/logs/real_0_1_task_${SLURM_ARRAY_JOB_ID}_${SLURM_ARRAY_TASK_ID}.log\n", - f"conda run -n {env_name} magic_calib_to_dl1 --input-file $SAMPLE --output-dir $OUTPUTDIR --config-file {target_dir}/v{__version__}/{source}/config_DL0_to_DL1.yaml >$LOG 2>&1\n", - ] - with open(f"{source}_MAGIC-II_dl0_to_dl1_run_{i[1]}.sh", "w") as f: - f.writelines(lines) - - if telescope_ids[-2] > 0: + for magic in [1, 2]: + # if 1 then magic is second from last, if 2 then last + if telescope_ids[magic - 3] > 0: + for i in MAGIC_runs: number_of_nodes = glob.glob( - f'/fefs/onsite/common/MAGIC/data/M1/event/Calibrated/{i[0].split("_")[0]}/{i[0].split("_")[1]}/{i[0].split("_")[2]}/*{i[1]}.*_Y_*.root' + f'/fefs/onsite/common/MAGIC/data/M{magic}/event/Calibrated/{i[0].split("_")[0]}/{i[0].split("_")[1]}/{i[0].split("_")[2]}/*{i[1]}.*_Y_*.root' ) number_of_nodes = len(number_of_nodes) - 1 if number_of_nodes < 0: @@ -336,16 +311,24 @@ def lists_and_bash_gen_MAGIC( J=process_name, array=number_of_nodes, mem="2g", - out_err=f"{target_dir}/v{__version__}/{source}/DL1/M1/{i[0]}/{i[1]}/logs/slurm-%x.%A_%a", + out_err=f"{target_dir}/v{__version__}/{source}/DL1/M{magic}/{i[0]}/{i[1]}/logs/slurm-%x.%A_%a", ) lines = slurm + [ - f"export OUTPUTDIR={target_dir}/v{__version__}/{source}/DL1/M1/{i[0]}/{i[1]}\n", + f"export OUTPUTDIR={target_dir}/v{__version__}/{source}/DL1/M{magic}/{i[0]}/{i[1]}\n", "SAMPLE_LIST=($(<$OUTPUTDIR/logs/list_dl0.txt))\n", "SAMPLE=${SAMPLE_LIST[${SLURM_ARRAY_TASK_ID}]}\n\n", "export LOG=$OUTPUTDIR/logs/real_0_1_task_${SLURM_ARRAY_JOB_ID}_${SLURM_ARRAY_TASK_ID}.log\n", f"conda run -n {env_name} magic_calib_to_dl1 --input-file $SAMPLE --output-dir $OUTPUTDIR --config-file {target_dir}/v{__version__}/{source}/config_DL0_to_DL1.yaml >$LOG 2>&1\n", + "rc=$?\n", + 'if [ "$rc" -ne "0" ]; then\n', + " echo $SAMPLE ${SLURM_ARRAY_JOB_ID} ${SLURM_ARRAY_TASK_ID} $rc >> $OUTPUTDIR/logs/list_failed.log\n", + "fi\n", + "echo $SAMPLE ${SLURM_ARRAY_JOB_ID} ${SLURM_ARRAY_TASK_ID} $rc >> $OUTPUTDIR/logs/list_return.log\n", ] - with open(f"{source}_MAGIC-I_dl0_to_dl1_run_{i[1]}.sh", "w") as f: + with open( + f"{source}_MAGIC-" + "I" * magic + f"_dl0_to_dl1_run_{i[1]}.sh", + "w", + ) as f: f.writelines(lines) else: if (telescope_ids[-2] > 0) or (telescope_ids[-1] > 0): @@ -677,7 +660,7 @@ def main(): else: launch_jobs_MC = f"{launch_jobs_MC} && linking{n}=$(sbatch --parsable {run}) && running{n}=$(sbatch --parsable --dependency=afterany:$linking{n} {run[0:-3]}_r.sh)" - os.system(launch_jobs_MC) + # os.system(launch_jobs_MC) # Below we run the analysis on the MAGIC data if ( @@ -701,11 +684,9 @@ def main(): ) continue + launch_jobs = f"linking=$(sbatch --parsable {source_name}_linking_MAGIC_data_paths.sh)" for n, run in enumerate(list_of_MAGIC_runs): - if n == 0: - launch_jobs = f"linking=$(sbatch --parsable {source_name}_linking_MAGIC_data_paths.sh) && RES{n}=$(sbatch --parsable --dependency=afterany:$linking {run})" - else: - launch_jobs = f"{launch_jobs} && RES{n}=$(sbatch --parsable --dependency=afterany:$linking {run})" + launch_jobs = f"{launch_jobs} && RES{n}=$(sbatch --parsable --dependency=afterany:$linking {run})" os.system(launch_jobs) From 6076d2389b72e39362dc36667737632382304a0b Mon Sep 17 00:00:00 2001 From: Julian Sitarek Date: Thu, 9 May 2024 19:57:02 +0000 Subject: [PATCH 067/236] uncommented a forgotten line --- .../semi_automatic_scripts/setting_up_config_and_dir.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py index 4e9a306a..b677aaa1 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py @@ -660,7 +660,7 @@ def main(): else: launch_jobs_MC = f"{launch_jobs_MC} && linking{n}=$(sbatch --parsable {run}) && running{n}=$(sbatch --parsable --dependency=afterany:$linking{n} {run[0:-3]}_r.sh)" - # os.system(launch_jobs_MC) + os.system(launch_jobs_MC) # Below we run the analysis on the MAGIC data if ( From df69c176a09e4c99f385ddd069ec3edeff1e1406 Mon Sep 17 00:00:00 2001 From: Elisa-Visentin Date: Fri, 10 May 2024 10:09:50 +0000 Subject: [PATCH 068/236] bug fix --- .../database_production/__init__.py | 3 ++ .../database_production/lstchain_version.py | 28 ++++++++------- .../database_production/nsb_level.py | 36 +++++++++++++------ setup.cfg | 1 + 4 files changed, 45 insertions(+), 23 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/__init__.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/__init__.py index 59ca9c10..b3f006ee 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/__init__.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/__init__.py @@ -1,7 +1,10 @@ from .LSTnsb import nsb from .nsb_level import bash_scripts +from .lstchain_version import version_lstchain, lstchain_versions __all__ = [ "nsb", "bash_scripts", + "version_lstchain", + "lstchain_versions" ] diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/lstchain_version.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/lstchain_version.py index 8673978a..d03ae717 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/lstchain_version.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/lstchain_version.py @@ -10,19 +10,8 @@ import glob lstchain_versions=['v0.9','v0.10'] - -def main(): - - """ - Main function - """ - - df_LST = pd.read_hdf( - "/fefs/aswg/workspace/elisa.visentin/auto_MCP_PR/observations_LST.h5", - key="joint_obs", - ) - - +__all__ = ["version_lstchain"] +def version_lstchain(df_LST): for i, row in df_LST.iterrows(): version=[] @@ -58,6 +47,19 @@ def main(): name=f"/fefs/aswg/data/real/DL1/{date}/{max_version}/tailcut84/dl1_LST-1.Run{run}.h5" df_LST.loc[i,'last_lstchain_file']= name +def main(): + + """ + Main function + """ + + df_LST = pd.read_hdf( + "/fefs/aswg/workspace/elisa.visentin/auto_MCP_PR/observations_LST.h5", + key="joint_obs", + ) + + version_lstchain(df_LST) + df_LST.to_hdf( "/fefs/aswg/workspace/elisa.visentin/auto_MCP_PR/observations_LST.h5", diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/nsb_level.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/nsb_level.py index dfc70ec3..d582e2a0 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/nsb_level.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/nsb_level.py @@ -13,7 +13,7 @@ import numpy as np import pandas as pd import yaml - +from .lstchain_version import lstchain_versions __all__ = ["bash_scripts"] logger = logging.getLogger(__name__) @@ -96,7 +96,7 @@ def main(): "/fefs/aswg/workspace/elisa.visentin/auto_MCP_PR/observations_LST.h5", key="joint_obs", ) - lstchain_version=config["general"]["LST_version"] + lstchain_v=config["general"]["LST_version"] @@ -111,11 +111,26 @@ def main(): print("***** Generating bashscripts...") for i, row in df_LST.iterrows(): - if lstchain_version not in str(row['lstchain_versions'].replace(']','').replace('[','').split(',')): + + #list_v=list(str(row['lstchain_versions'].replace('"]','').replace('["','').split('",')).rstrip('"]').lstrip('["')) + list_v = [eval(i) for i in row['lstchain_versions'].strip("][").split(', ')] + #list_v=list_v..rstrip('"]').lstrip('["') + + if str(lstchain_v) not in list_v: continue + if len(list_v)>1: + common_v=[i for i in (set(lstchain_versions).intersection(list_v))] + + max_common=common_v + if len(common_v)>1: + max_common=common_v[-1] + + if lstchain_v!=max_common: + continue run_number = row["LST1_run"] date = row["DATE"] - df_LST.loc[i,'processed_lstchain_file']=f"/fefs/aswg/data/real/DL1/{date}/{lstchain_version}/tailcut84/dl1_LST-1.Run{run_number}.h5" + + df_LST.loc[i,'processed_lstchain_file']=f"/fefs/aswg/data/real/DL1/{date}/{lstchain_v}/tailcut84/dl1_LST-1.Run{run_number}.h5" df_LST.loc[i,'error_code_nsb']=np.nan bash_scripts(run_number, date, args.config_file, env_name) @@ -138,12 +153,7 @@ def main(): df_LST = df_LST.sort_values(by=["DATE", "source","LST1_run"]) - df_LST.to_hdf( - "/fefs/aswg/workspace/elisa.visentin/auto_MCP_PR/observations_LST.h5", - key="joint_obs", - mode="w", - min_itemsize={'lstchain_versions':20, 'last_lstchain_file':90,'processed_lstchain_file':90} - ) + for n, run in enumerate(list_of_bash_scripts): if n == 0: launch_jobs = f"nsb{n}=$(sbatch --parsable {run})" @@ -151,6 +161,12 @@ def main(): launch_jobs = f"{launch_jobs} && nsb{n}=$(sbatch --parsable {run})" os.system(launch_jobs) + df_LST.to_hdf( + "/fefs/aswg/workspace/elisa.visentin/auto_MCP_PR/observations_LST.h5", + key="joint_obs", + mode="w", + min_itemsize={'lstchain_versions':20, 'last_lstchain_file':90,'processed_lstchain_file':90} + ) diff --git a/setup.cfg b/setup.cfg index 00d2074f..6861e3e2 100644 --- a/setup.cfg +++ b/setup.cfg @@ -100,6 +100,7 @@ console_scripts = LSTnsb = magicctapipe.scripts.lst1_magic.semi_automatic_scripts.database_production.LSTnsb:main merging_runs = magicctapipe.scripts.lst1_magic.semi_automatic_scripts.merging_runs:main nsb_level = magicctapipe.scripts.lst1_magic.semi_automatic_scripts.database_production.nsb_level:main + lstchain_version = magicctapipe.scripts.lst1_magic.semi_automatic_scripts.database_production.lstchain_version:main setting_up_config_and_dir = magicctapipe.scripts.lst1_magic.semi_automatic_scripts.setting_up_config_and_dir:main stereo_events = magicctapipe.scripts.lst1_magic.semi_automatic_scripts.stereo_events:main From 4f99db62d1a9197c48df542d29b059986ec53278 Mon Sep 17 00:00:00 2001 From: Elisa-Visentin Date: Fri, 10 May 2024 14:31:15 +0000 Subject: [PATCH 069/236] bug + linter --- .../database_production/__init__.py | 9 +-- .../database_production/create_LST_table.py | 29 ++++---- .../database_production/lstchain_version.py | 66 +++++++++++-------- .../database_production/nsb_level.py | 59 +++++++++-------- .../database_production/nsb_to_h5.py | 19 +++--- .../semi_automatic_scripts/merging_runs.py | 10 +-- 6 files changed, 101 insertions(+), 91 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/__init__.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/__init__.py index b3f006ee..f37b4346 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/__init__.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/__init__.py @@ -1,10 +1,5 @@ +from .lstchain_version import lstchain_versions, version_lstchain from .LSTnsb import nsb from .nsb_level import bash_scripts -from .lstchain_version import version_lstchain, lstchain_versions -__all__ = [ - "nsb", - "bash_scripts", - "version_lstchain", - "lstchain_versions" -] +__all__ = ["nsb", "bash_scripts", "version_lstchain", "lstchain_versions"] diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/create_LST_table.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/create_LST_table.py index f7211932..734d410c 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/create_LST_table.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/create_LST_table.py @@ -34,18 +34,15 @@ def main(): ] df_cut = df[needed_cols] - df_cut=df_cut.assign(nsb = np.nan) - df_cut=df_cut.assign(lstchain_versions = '[]') - df_cut=df_cut.assign(last_lstchain_file = '') - df_cut=df_cut.assign(processed_lstchain_file = '') - df_cut=df_cut.assign(error_code_nsb = -1) - - df_cut=df_cut.assign(error_code_coincidence = -1) - df_cut=df_cut.assign(error_code_stereo = -1) - - - - + df_cut = df_cut.assign(nsb=np.nan) + df_cut = df_cut.assign(lstchain_versions="[]") + df_cut = df_cut.assign(last_lstchain_file="") + df_cut = df_cut.assign(processed_lstchain_file="") + df_cut = df_cut.assign(error_code_nsb=-1) + + df_cut = df_cut.assign(error_code_coincidence=-1) + df_cut = df_cut.assign(error_code_stereo=-1) + if os.path.isfile( "/fefs/aswg/workspace/elisa.visentin/auto_MCP_PR/observations_LST.h5" ): @@ -58,12 +55,16 @@ def main(): ) df_cut = df_cut.sort_values(by=["DATE", "source"]) # TODO check if fine with update and nsb - df_cut=df_cut.reset_index(drop=True) + df_cut = df_cut.reset_index(drop=True) df_cut.to_hdf( "/fefs/aswg/workspace/elisa.visentin/auto_MCP_PR/observations_LST.h5", key="joint_obs", mode="w", - min_itemsize={'lstchain_versions':20, 'last_lstchain_file':90,'processed_lstchain_file':90} + min_itemsize={ + "lstchain_versions": 20, + "last_lstchain_file": 90, + "processed_lstchain_file": 90, + }, ) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/lstchain_version.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/lstchain_version.py index d03ae717..b946ff39 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/lstchain_version.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/lstchain_version.py @@ -3,50 +3,59 @@ """ +import glob import os import pandas as pd -import numpy as np -import glob -lstchain_versions=['v0.9','v0.10'] +lstchain_versions = ["v0.9", "v0.10"] __all__ = ["version_lstchain"] + + def version_lstchain(df_LST): + """ + Evaluates all the versions used to process a given file and the last version of a file + + Parameters + ---------- + df_LST : :class:`pandas.DataFrame` + Dataframe of the LST-1 observations. + """ for i, row in df_LST.iterrows(): - version=[] + version = [] run = row["LST1_run"] run = format(int(run), "05d") date = row["DATE"] - directories_version=[i.split('/')[-1] for i in glob.glob(f"/fefs/aswg/data/real/DL1/{date}/v*")] - - - - + directories_version = [ + i.split("/")[-1] for i in glob.glob(f"/fefs/aswg/data/real/DL1/{date}/v*") + ] + for vers in directories_version: - + if os.path.isfile( f"/fefs/aswg/data/real/DL1/{date}/{vers}/tailcut84/dl1_LST-1.Run{run}.h5" ): if vers not in version: version.append(vers) - - version=list(version) + + version = list(version) df_LST.loc[i, "lstchain_versions"] = str(version) - max_version=None - + max_version = None + for j in range(len(lstchain_versions)): - - + if lstchain_versions[j] in version: - - max_version=lstchain_versions[j] - + + max_version = lstchain_versions[j] + if max_version is None: - raise ValueError('issue with lstchain versions') - name=f"/fefs/aswg/data/real/DL1/{date}/{max_version}/tailcut84/dl1_LST-1.Run{run}.h5" - - df_LST.loc[i,'last_lstchain_file']= name + raise ValueError("issue with lstchain versions") + name = f"/fefs/aswg/data/real/DL1/{date}/{max_version}/tailcut84/dl1_LST-1.Run{run}.h5" + + df_LST.loc[i, "last_lstchain_file"] = name + + def main(): """ @@ -57,15 +66,18 @@ def main(): "/fefs/aswg/workspace/elisa.visentin/auto_MCP_PR/observations_LST.h5", key="joint_obs", ) - + version_lstchain(df_LST) - - + df_LST.to_hdf( "/fefs/aswg/workspace/elisa.visentin/auto_MCP_PR/observations_LST.h5", key="joint_obs", mode="w", - min_itemsize={'lstchain_versions':20, 'last_lstchain_file':90,'processed_lstchain_file':90} + min_itemsize={ + "lstchain_versions": 20, + "last_lstchain_file": 90, + "processed_lstchain_file": 90, + }, ) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/nsb_level.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/nsb_level.py index d582e2a0..8f5608cc 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/nsb_level.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/nsb_level.py @@ -13,7 +13,9 @@ import numpy as np import pandas as pd import yaml + from .lstchain_version import lstchain_versions + __all__ = ["bash_scripts"] logger = logging.getLogger(__name__) @@ -96,9 +98,7 @@ def main(): "/fefs/aswg/workspace/elisa.visentin/auto_MCP_PR/observations_LST.h5", key="joint_obs", ) - lstchain_v=config["general"]["LST_version"] - - + lstchain_v = config["general"]["LST_version"] min = datetime.strptime(args.begin_date, "%Y_%m_%d") max = datetime.strptime(args.end_date, "%Y_%m_%d") @@ -108,30 +108,32 @@ def main(): df_LST = df_LST[df_LST["date"] <= max] df_LST = df_LST.drop(columns="date") - + print("***** Generating bashscripts...") for i, row in df_LST.iterrows(): - - #list_v=list(str(row['lstchain_versions'].replace('"]','').replace('["','').split('",')).rstrip('"]').lstrip('["')) - list_v = [eval(i) for i in row['lstchain_versions'].strip("][").split(', ')] - #list_v=list_v..rstrip('"]').lstrip('["') - + + # list_v=list(str(row['lstchain_versions'].replace('"]','').replace('["','').split('",')).rstrip('"]').lstrip('["')) + list_v = [eval(i) for i in row["lstchain_versions"].strip("][").split(", ")] + # list_v=list_v..rstrip('"]').lstrip('["') + if str(lstchain_v) not in list_v: continue - if len(list_v)>1: - common_v=[i for i in (set(lstchain_versions).intersection(list_v))] - - max_common=common_v - if len(common_v)>1: - max_common=common_v[-1] - - if lstchain_v!=max_common: + if len(list_v) > 1: + common_v = [i for i in (set(lstchain_versions).intersection(list_v))] + + max_common = common_v + if len(common_v) > 1: + max_common = common_v[-1] + + if lstchain_v != max_common: continue - run_number = row["LST1_run"] + run_number = row["LST1_run"] date = row["DATE"] - df_LST.loc[i,'processed_lstchain_file']=f"/fefs/aswg/data/real/DL1/{date}/{lstchain_v}/tailcut84/dl1_LST-1.Run{run_number}.h5" - df_LST.loc[i,'error_code_nsb']=np.nan + df_LST.loc[ + i, "processed_lstchain_file" + ] = f"/fefs/aswg/data/real/DL1/{date}/{lstchain_v}/tailcut84/dl1_LST-1.Run{run_number}.h5" + df_LST.loc[i, "error_code_nsb"] = np.nan bash_scripts(run_number, date, args.config_file, env_name) print("Process name: nsb") @@ -142,7 +144,8 @@ def main(): print( "Warning: no bash script has been produced to evaluate the NSB level for the provided LST runs. Please check the input list" ) - return print("Update database and launch jobs") + return + print("Update database and launch jobs") df_old = pd.read_hdf( "/fefs/aswg/workspace/elisa.visentin/auto_MCP_PR/observations_LST.h5", key="joint_obs", @@ -150,10 +153,8 @@ def main(): df_LST = pd.concat([df_LST, df_old]).drop_duplicates( subset="LST1_run", keep="first" ) - df_LST = df_LST.sort_values(by=["DATE", "source","LST1_run"]) - - - + df_LST = df_LST.sort_values(by=["DATE", "source", "LST1_run"]) + for n, run in enumerate(list_of_bash_scripts): if n == 0: launch_jobs = f"nsb{n}=$(sbatch --parsable {run})" @@ -165,10 +166,12 @@ def main(): "/fefs/aswg/workspace/elisa.visentin/auto_MCP_PR/observations_LST.h5", key="joint_obs", mode="w", - min_itemsize={'lstchain_versions':20, 'last_lstchain_file':90,'processed_lstchain_file':90} + min_itemsize={ + "lstchain_versions": 20, + "last_lstchain_file": 90, + "processed_lstchain_file": 90, + }, ) - - if __name__ == "__main__": diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/nsb_to_h5.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/nsb_to_h5.py index c83380b0..381026ce 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/nsb_to_h5.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/nsb_to_h5.py @@ -60,12 +60,11 @@ def main(): df_new = df_new.sort_values(by=["DATE", "source", "LST1_run"]) - df_new.loc[df_new['error_code_nsb'].isna(),"error_code_nsb"] = '1' - - df_new.loc[df_new['nsb'].notna(),"error_code_nsb"] = '0' - df_new.loc[df_new['nsb'] > 3.0,"error_code_nsb"] = '2' - - + df_new.loc[df_new["error_code_nsb"].isna(), "error_code_nsb"] = "1" + + df_new.loc[df_new["nsb"].notna(), "error_code_nsb"] = "0" + df_new.loc[df_new["nsb"] > 3.0, "error_code_nsb"] = "2" + df_new = df_new[ [ "source", @@ -76,11 +75,11 @@ def main(): "MAGIC_HV", "nsb", "lstchain_versions", - "last_lstchain_file", - "processed_lstchain_file", + "last_lstchain_file", + "processed_lstchain_file", "error_code_nsb", - "error_code_coincidence", - "error_code_stereo", + "error_code_coincidence", + "error_code_stereo", ] ] df_new.to_hdf( diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py index 77977d67..eeefb53a 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py @@ -345,17 +345,17 @@ def merge(target_dir, identification, MAGIC_runs, env_name, source, NSB_match): os.path.basename(x) for x in glob.glob(f"{MAGIC_DL1_dir}/M1/*") ] for i in dates: + if not os.path.exists(f"{MAGIC_DL1_dir}/Merged/{i}/Merged"): + continue if not os.path.exists(f"{MAGIC_DL1_dir}/Merged/Merged_{i}"): os.mkdir( f"{MAGIC_DL1_dir}/Merged/Merged_{i}" ) # Creating a merged directory for each night - if ( - len(glob.glob(f"{MAGIC_DL1_dir}/Merged/{i}/Merged/*M1*.h5")) > 0 - ) and ( - len(glob.glob(f"{MAGIC_DL1_dir}/Merged/{i}/Merged/*M2*.h5")) > 0 - ): + if len(glob.glob(f"{MAGIC_DL1_dir}/Merged/{i}/Merged/*MAGIC*.h5")) > 0: + + if not os.path.exists( f"{MAGIC_DL1_dir}/Merged/Merged_{i}/logs" ): From e04ded39586e92f2c81020712b88bba64ab02028 Mon Sep 17 00:00:00 2001 From: Julian Sitarek Date: Fri, 10 May 2024 22:29:06 +0000 Subject: [PATCH 070/236] added a script for statistics of job completion and errors it also does accounting of used CPU and memory --- .../semi_automatic_scripts/job_accounting.py | 178 ++++++++++++++++++ setup.cfg | 1 + 2 files changed, 179 insertions(+) create mode 100644 magicctapipe/scripts/lst1_magic/semi_automatic_scripts/job_accounting.py diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/job_accounting.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/job_accounting.py new file mode 100644 index 00000000..4a435f10 --- /dev/null +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/job_accounting.py @@ -0,0 +1,178 @@ +""" +This script does checks of status of jobs based on the log files generated during the execution. +It also does accounting of memory and CPU usage +""" +import argparse +import glob +from datetime import timedelta +from subprocess import PIPE, run + +import numpy as np +import yaml + +from magicctapipe import __version__ + +GREEN = "\033[32m" +YELLOW = "\033[33m" +RED = "\033[31m" +ENDC = "\033[0m" + + +def run_shell(command): + """ + Simple function to extract the output of a command run in a shell + + Parameters + ---------- + command : str + Command to be executed + + Returns + ---------- + list + List of lines returned by the program + """ + result = run(command, stdout=PIPE, stderr=PIPE, shell=True, universal_newlines=True) + return result.stdout + + +def main(): + """ + Function counts the number of jobs that should have been submitted, and checks the output of the logs to see how many finished successfully, and how many failed. + """ + parser = argparse.ArgumentParser() + + parser.add_argument( + "--config-file", + "-c", + dest="config_file", + type=str, + default="./config_general.yaml", + help="Path to a configuration file config_general.yaml", + ) + + parser.add_argument( + "--data-level", + "-d", + dest="data_level", + type=str, + default="DL1/M1", + help="Data level to be checked", + ) + + parser.add_argument( + "--version", + "-v", + dest="version", + type=str, + default=__version__, + help="MCP version (used for subdirectory name)", + ) + + args = parser.parse_args() + with open(args.config_file, "r") as f: + config = yaml.safe_load(f) + + # TODO: those variables will be needed when more features are implemented + # source_in = config["data_selection"]["source_name_database"] + # source_out = config["data_selection"]["source_name_output"] + # timerange = config["data_selection"]["time_range"] + # skip_LST = config["data_selection"]["skip_LST_runs"] + # skip_MAGIC = config["data_selection"]["skip_MAGIC_runs"] + work_dir = config["directories"]["workspace_dir"] + + print(f"Checking progress of jobs stored in {work_dir}") + dirs = sorted(glob.glob(f"{work_dir}/v{args.version}/*/{args.data_level}/*/*")) + + all_todo = 0 + all_return = 0 + all_good = 0 + all_cpu = [] + all_mem = [] + for dir in dirs: + print(dir) + # fixme list_dl0.txt is only available for DL1/M[12] processing + list_dl0 = f"{dir}/logs/list_dl0.txt" + try: + with open(list_dl0, "r") as fp: + this_todo = len(fp.readlines()) + except IOError: + print(f"{RED}File {list_dl0} is missing{ENDC}") + this_todo = 0 + + list_return = f"{dir}/logs/list_return.log" + this_good = 0 + this_cpu = [] + this_mem = [] + try: + with open(list_return, "r") as fp: + returns = fp.readlines() + this_return = len(returns) + for line in returns: + file_in, slurm_id, task_id, rc = line[0:-1].split(" ") + if rc == "0": + this_good += 1 + # now check accounting + out = run_shell( + f'sacct --format="JobID,CPUTime,MaxRSS" --units=M -j {slurm_id}_{task_id}| tail -1' + ) + _, cpu, mem = out.split() + hh, mm, ss = (int(x) for x in str(cpu).split(":")) + delta = timedelta( + days=hh // 24, hours=hh % 24, minutes=mm, seconds=ss + ) + this_cpu.append(delta) + this_mem.append(float(mem[0:-1])) + else: + print(f"file {file_in} failed with error {rc}") + if len(this_cpu) > 0: + all_cpu += this_cpu + all_mem += this_mem + this_cpu = np.array(this_cpu) + this_mem = np.array(this_mem) + print( + f"CPU: median={np.median(this_cpu)}, max={this_cpu.max()}; memory [M]: median={np.median(this_mem)}, max={this_mem.max()}" + ) + + except IOError: + print(f"{RED}File {list_return} is missing{ENDC}") + this_return = 0 + + all_todo += this_todo + all_return += this_return + all_good += this_good + if this_good < this_return: + status = RED # there are errors in processing + elif this_return < this_todo: + status = YELLOW # waiting for jobs to finish (or lost jobs) + else: + status = GREEN # all done and ready + + if this_todo > 0: + print( + f"{status}to do: {this_todo}, finished: {this_return}, no errors: {this_good}{ENDC}" + ) + + print("SUMMARY") + if all_good < all_return: + status = RED # there are errors in processing + elif all_return < all_todo: + status = YELLOW # waiting for jobs to finish (or lost jobs) + else: + status = GREEN # all done and ready + + if all_todo > 0: + print( + f"{status}to do: {all_todo}, finished: {all_return}, no errors: {all_good}{ENDC}" + ) + + if len(all_cpu) > 0: + all_cpu = np.array(all_cpu) + all_mem = np.array(all_mem) + print( + f"CPU: median={np.median(all_cpu)}, max={all_cpu.max()}; memory [M]: median={np.median(all_mem)}, max={all_mem.max()}" + ) + + +if __name__ == "__main__": + main() diff --git a/setup.cfg b/setup.cfg index 00d2074f..82cacee8 100644 --- a/setup.cfg +++ b/setup.cfg @@ -102,6 +102,7 @@ console_scripts = nsb_level = magicctapipe.scripts.lst1_magic.semi_automatic_scripts.database_production.nsb_level:main setting_up_config_and_dir = magicctapipe.scripts.lst1_magic.semi_automatic_scripts.setting_up_config_and_dir:main stereo_events = magicctapipe.scripts.lst1_magic.semi_automatic_scripts.stereo_events:main + job_accounting = magicctapipe.scripts.lst1_magic.semi_automatic_scripts.job_accounting:main [tool:pytest] minversion=3.0 From f119906ffaa4f625eec1431dfd87ea176acf2995 Mon Sep 17 00:00:00 2001 From: Elisa-Visentin Date: Mon, 13 May 2024 09:27:19 +0000 Subject: [PATCH 071/236] bug --- .../database_production/nsb_level.py | 8 ++++---- .../semi_automatic_scripts/list_from_h5.py | 20 +++++++++++-------- 2 files changed, 16 insertions(+), 12 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/nsb_level.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/nsb_level.py index 8f5608cc..4b06c9a6 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/nsb_level.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/nsb_level.py @@ -42,11 +42,11 @@ def bash_scripts(run, date, config, env_name): lines = [ "#!/bin/sh\n\n", - "#SBATCH -p short,long\n", + "#SBATCH -p long\n", "#SBATCH -J nsb\n", "#SBATCH -n 1\n\n", - "#SBATCH --output=slurm-nsb-%x.%j.out" - "#SBATCH --error=slurm-nsb-%x.%j.err" + f"#SBATCH --output=slurm-nsb_{run}-%x.%j.out\n" + f"#SBATCH --error=slurm-nsb_{run}-%x.%j.err\n" "ulimit -l unlimited\n", "ulimit -s unlimited\n", "ulimit -a\n\n", @@ -160,7 +160,7 @@ def main(): launch_jobs = f"nsb{n}=$(sbatch --parsable {run})" else: launch_jobs = f"{launch_jobs} && nsb{n}=$(sbatch --parsable {run})" - + os.system(launch_jobs) df_LST.to_hdf( "/fefs/aswg/workspace/elisa.visentin/auto_MCP_PR/observations_LST.h5", diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/list_from_h5.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/list_from_h5.py index e889dc5d..42c9683c 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/list_from_h5.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/list_from_h5.py @@ -210,27 +210,32 @@ def main(): "/fefs/aswg/workspace/elisa.visentin/auto_MCP_PR/observations_LST.h5", key="joint_obs", ) # TODO: put this file in a shared folder - df_LST(subset=["LST1_run"], inplace=True) + df_LST.dropna(subset=["LST1_run"], inplace=True) df_LST = split_lst_date(df_LST) df_LST = df_LST.astype( {"YY_LST": int, "MM_LST": int, "DD_LST": int, "nsb": float, "LST1_run": int} ) stereo = True - lstchain_version=config["general"]["LST_version"] - mask=(df_LST['processed_lstchain_file'].str.split('/')[-3]==lstchain_version) - df_LST=df_LST[mask] + lstchain_version = config["general"]["LST_version"] + + + processed_v=df_LST["processed_lstchain_file"].str.split("/").str[-3] + + mask = (processed_v== lstchain_version) + df_LST = df_LST[mask] + if source_in is None: df_LST.query( - f'MAGIC_trigger=="L3T" & MAGIC_HV=="Nominal" & MAGIC_stereo == {stereo} & nsb <=3.0 & error_code_nsb==0', + f'MAGIC_trigger=="L3T" & MAGIC_HV=="Nominal" & MAGIC_stereo == {stereo} & error_code_nsb=="0"', inplace=True, ) else: df_LST.query( - f'source=="{source_in}"& MAGIC_trigger=="L3T" & MAGIC_HV=="Nominal" & MAGIC_stereo == {stereo} & nsb <=3.0 & error_code_nsb==0', + f'source=="{source_in}"& MAGIC_trigger=="L3T" & MAGIC_HV=="Nominal" & MAGIC_stereo == {stereo} & error_code_nsb=="0"', inplace=True, ) - + if range: min = str(config["data_selection"]["min"]) max = str(config["data_selection"]["max"]) @@ -248,7 +253,6 @@ def main(): df_LST = df_LST.reset_index() df_LST = df_LST.drop("index", axis=1) - clear_files(source_in, source_out, df_LST) list_run(source_in, source_out, df_LST, skip_LST, skip_MAGIC, True) list_date_LST = np.unique(df_LST["date_LST"]) From e826703f68534a73b6fc1862f6c2de5aa15f029b Mon Sep 17 00:00:00 2001 From: Elisa-Visentin Date: Mon, 13 May 2024 09:52:36 +0000 Subject: [PATCH 072/236] exit if no runs (MAGIC or LST) --- .../lst1_magic/semi_automatic_scripts/list_from_h5.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/list_from_h5.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/list_from_h5.py index 42c9683c..0dba68b2 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/list_from_h5.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/list_from_h5.py @@ -254,6 +254,9 @@ def main(): df_LST = df_LST.reset_index() df_LST = df_LST.drop("index", axis=1) clear_files(source_in, source_out, df_LST) + if len(df_LST)==0: + print('NO LST run found. Exiting...') + return list_run(source_in, source_out, df_LST, skip_LST, skip_MAGIC, True) list_date_LST = np.unique(df_LST["date_LST"]) list_date_LST_low = [sub.replace("-", "_") for sub in list_date_LST] @@ -275,6 +278,9 @@ def main(): df_MAGIC2 = df_MAGIC2.rename(columns={"Source": "source"}) M1_runs = df_MAGIC1["Run ID"].tolist() + if (len(M1_runs)==0) or (len(df_MAGIC2)==0): + print('NO MAGIC stereo run found. Exiting...') + return list_run(source_in, source_out, df_MAGIC2, skip_LST, skip_MAGIC, False, M1_runs) From 956ce3e5455aac86f3a4fce230d1ba10b9968167 Mon Sep 17 00:00:00 2001 From: Elisa-Visentin Date: Mon, 13 May 2024 10:34:02 +0000 Subject: [PATCH 073/236] os.makedirs --- .../coincident_events.py | 41 ++---- .../semi_automatic_scripts/merging_runs.py | 127 +++++------------- .../semi_automatic_scripts/stereo_events.py | 24 +--- 3 files changed, 51 insertions(+), 141 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py index 2864d684..455e33bb 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py @@ -100,8 +100,7 @@ def linking_bash_lst( LST_runs.append(dt) if NSB_match: coincidence_DL1_dir = f"{target_dir}/v{__version__}/{source_name}" - if not os.path.exists(f"{coincidence_DL1_dir}/DL1Coincident/"): - os.mkdir(f"{coincidence_DL1_dir}/DL1Coincident") + MAGIC_DL1_dir = f"{target_dir}/v{__version__}/{source_name}/DL1" @@ -131,16 +130,10 @@ def linking_bash_lst( inputdir = ( f"/fefs/aswg/data/real/DL1/{lstObsDir}/{LST_version}/tailcut84" ) - if not os.path.exists( - f"{coincidence_DL1_dir}/DL1Coincident/{lstObsDir}" - ): - os.mkdir(f"{coincidence_DL1_dir}/DL1Coincident/{lstObsDir}") - if not os.path.exists( + + os.makedirs( f"{coincidence_DL1_dir}/DL1Coincident/{lstObsDir}/logs" - ): - os.mkdir( - f"{coincidence_DL1_dir}/DL1Coincident/{lstObsDir}/logs" - ) + ) outputdir = f"{coincidence_DL1_dir}/DL1Coincident/{lstObsDir}" list_of_subruns = np.sort( @@ -188,27 +181,21 @@ def linking_bash_lst( f.writelines(lines) else: coincidence_DL1_dir = f"{target_dir}/{source_name}/DL1/Observations" - if not os.path.exists(f"{coincidence_DL1_dir}/Coincident"): - os.mkdir(f"{coincidence_DL1_dir}/Coincident") - + for i in LST_runs: lstObsDir = i[0].split("_")[0] + i[0].split("_")[1] + i[0].split("_")[2] inputdir = f"/fefs/aswg/data/real/DL1/{lstObsDir}/{LST_version}/tailcut84" outputdir = f"{coincidence_DL1_dir}/Coincident/{lstObsDir}" list_of_subruns = np.sort(glob.glob(f"{inputdir}/dl1*Run*{i[1]}*.*.h5")) - if os.path.exists(f"{outputdir}/list_LST.txt"): - with open(f"{outputdir}/list_LST.txt", "a") as LSTdataPathFile: - for subrun in list_of_subruns: - LSTdataPathFile.write( - f"{subrun}\n" - ) # If this files already exists, simply append the new information - else: - os.mkdir(outputdir) - with open( - f"{outputdir}/list_LST.txt", "w" - ) as f: # If the file list_LST.txt does not exist, it will be created here - for subrun in list_of_subruns: - f.write(f"{subrun}\n") + os.makedirs(outputdir) + + + with open(f"{outputdir}/list_LST.txt", "a+") as LSTdataPathFile: + for subrun in list_of_subruns: + LSTdataPathFile.write( + f"{subrun}\n" + ) # If this files already exists, simply append the new information + process_name = source_name listOfNightsLST = np.sort( diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py index eeefb53a..f877ff61 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py @@ -89,26 +89,18 @@ def split_train_test(target_dir, train_fraction, source_name): proton_dir = f"{target_dir}/{source_name}/DL1/MC/protons" - if not os.path.exists(f"{proton_dir}/train"): - os.mkdir(f"{proton_dir}/train") - if not os.path.exists(f"{proton_dir}/../protons_test"): - os.mkdir(f"{proton_dir}/../protons_test") + list_of_dir = np.sort(glob.glob(f"{proton_dir}/node*{os.path.sep}")) for directory in tqdm( range(len(list_of_dir)) ): # tqdm allows us to print a progessbar in the terminal - if not os.path.exists( - f"{proton_dir}/train/{list_of_dir[directory].split('/')[-2]}" - ): - os.mkdir(f"{proton_dir}/train/{list_of_dir[directory].split('/')[-2]}") - if not os.path.exists( - f"{proton_dir}/../protons_test/{list_of_dir[directory].split('/')[-2]}" - ): - os.mkdir( - f'{proton_dir}/../protons_test/{list_of_dir[directory].split("/")[-2]}' - ) + + os.makedirs(f"{proton_dir}/train/{list_of_dir[directory].split('/')[-2]}") + os.makedirs( + f'{proton_dir}/../protons_test/{list_of_dir[directory].split("/")[-2]}' + ) list_of_runs = np.sort( glob.glob(f'{proton_dir}/{list_of_dir[directory].split("/")[-2]}/*.h5') ) @@ -154,11 +146,7 @@ def merge(target_dir, identification, MAGIC_runs, env_name, source, NSB_match): process_name = f"merging_{source}" if not NSB_match: MAGIC_DL1_dir = f"{target_dir}/{source}/DL1/Observations" - if os.path.exists(f"{MAGIC_DL1_dir}/M1") & os.path.exists( - f"{MAGIC_DL1_dir}/M2" - ): - if not os.path.exists(f"{MAGIC_DL1_dir}/Merged"): - os.mkdir(f"{MAGIC_DL1_dir}/Merged") + with open(f"{source}_Merge_MAGIC_{identification}.sh", "w") as f: lines = slurm_lines( @@ -169,16 +157,13 @@ def merge(target_dir, identification, MAGIC_runs, env_name, source, NSB_match): f.writelines(lines) if identification == "0_subruns": - if os.path.exists(f"{MAGIC_DL1_dir}/M1"): + if os.path.exists(f"{MAGIC_DL1_dir}/M1") & os.path.exists(f"{MAGIC_DL1_dir}/M2"): for i in MAGIC_runs: - if not os.path.exists(f"{MAGIC_DL1_dir}/Merged/{i[0]}"): - os.mkdir( - f"{MAGIC_DL1_dir}/Merged/{i[0]}" - ) # Creating a merged directory for the respective night - if not os.path.exists(f"{MAGIC_DL1_dir}/Merged/{i[0]}/{i[1]}"): - os.mkdir( - f"{MAGIC_DL1_dir}/Merged/{i[0]}/{i[1]}" - ) # Creating a merged directory for the respective run + + + os.makedirs( + f"{MAGIC_DL1_dir}/Merged/{i[0]}/{i[1]}" + ) # Creating a merged directory for the respective run os.system( f'find {MAGIC_DL1_dir}/M1/{i[0]}/{i[1]} -type f -name "*.h5" -size -3k -delete' ) @@ -187,16 +172,7 @@ def merge(target_dir, identification, MAGIC_runs, env_name, source, NSB_match): + "${SLURM_JOB_ID}.log\n" ) - if os.path.exists(f"{MAGIC_DL1_dir}/M2"): - for i in MAGIC_runs: - if not os.path.exists(f"{MAGIC_DL1_dir}/Merged/{i[0]}"): - os.mkdir( - f"{MAGIC_DL1_dir}/Merged/{i[0]}" - ) # Creating a merged directory for the respective night - if not os.path.exists(f"{MAGIC_DL1_dir}/Merged/{i[0]}/{i[1]}"): - os.mkdir( - f"{MAGIC_DL1_dir}/Merged/{i[0]}/{i[1]}" - ) # Creating a merged directory for the respective run + os.system( f'find {MAGIC_DL1_dir}/M2/{i[0]}/{i[1]} -type f -name "*.h5" -size -3k -delete' ) @@ -210,18 +186,16 @@ def merge(target_dir, identification, MAGIC_runs, env_name, source, NSB_match): f"{MAGIC_DL1_dir}/M2" ): for i in MAGIC_runs: - if not os.path.exists(f"{MAGIC_DL1_dir}/Merged/{i[0]}/Merged"): - os.mkdir(f"{MAGIC_DL1_dir}/Merged/{i[0]}/Merged") + os.makedirs(f"{MAGIC_DL1_dir}/Merged/{i[0]}/Merged") f.write( f"conda run -n {env_name} merge_hdf_files --input-dir {MAGIC_DL1_dir}/Merged/{i[0]}/{i[1]} --output-dir {MAGIC_DL1_dir}/Merged/{i[0]}/Merged --run-wise >{MAGIC_DL1_dir}/Merged/{i[0]}/Merged/merge_{i[0]}_{[1]}_" + "${SLURM_JOB_ID}.log\n" ) else: for i in MAGIC_runs: - if not os.path.exists(f"{MAGIC_DL1_dir}/Merged/Merged_{i[0]}"): - os.mkdir( - f"{MAGIC_DL1_dir}/Merged/Merged_{i[0]}" - ) # Creating a merged directory for each night + os.makedirs( + f"{MAGIC_DL1_dir}/Merged/Merged_{i[0]}" + ) # Creating a merged directory for each night f.write( f"conda run -n {env_name} merge_hdf_files --input-dir {MAGIC_DL1_dir}/Merged/{i[0]}/Merged --output-dir {MAGIC_DL1_dir}/Merged/Merged_{i[0]} >{MAGIC_DL1_dir}/Merged/Merged_{i[0]}/merge_night_{i[0]}_" + "${SLURM_JOB_ID}.log \n" @@ -233,11 +207,7 @@ def merge(target_dir, identification, MAGIC_runs, env_name, source, NSB_match): MAGIC_DL1_dir = f"{target_dir}/v{__version__}/{source}/DL1/" - if os.path.exists(f"{MAGIC_DL1_dir}/M1") & os.path.exists( - f"{MAGIC_DL1_dir}/M2" - ): - if not os.path.exists(f"{MAGIC_DL1_dir}/Merged"): - os.mkdir(f"{MAGIC_DL1_dir}/Merged") + lines = slurm_lines( p="short", J=process_name, @@ -248,7 +218,7 @@ def merge(target_dir, identification, MAGIC_runs, env_name, source, NSB_match): f.writelines(lines) if identification == "0_subruns": - if os.path.exists(f"{MAGIC_DL1_dir}/M1"): + if os.path.exists(f"{MAGIC_DL1_dir}/M1") & os.path.exists(f"{MAGIC_DL1_dir}/M2"): dates = [ os.path.basename(x) for x in glob.glob(f"{MAGIC_DL1_dir}/M1/*") ] @@ -257,21 +227,12 @@ def merge(target_dir, identification, MAGIC_runs, env_name, source, NSB_match): os.path.basename(x) for x in glob.glob(f"{MAGIC_DL1_dir}/M1/{i}/*") ] - if not os.path.exists(f"{MAGIC_DL1_dir}/Merged/{i}"): - os.mkdir( - f"{MAGIC_DL1_dir}/Merged/{i}" - ) # Creating a merged directory for the respective night + for r in runs: - if not os.path.exists(f"{MAGIC_DL1_dir}/Merged/{i}/{r}"): - os.mkdir( - f"{MAGIC_DL1_dir}/Merged/{i}/{r}" - ) # Creating a merged directory for the respective run - if not os.path.exists( + + os.makedirs( f"{MAGIC_DL1_dir}/Merged/{i}/{r}/logs" - ): - os.mkdir( - f"{MAGIC_DL1_dir}/Merged/{i}/{r}/logs" - ) # Creating a merged directory for the respective run + ) # Creating a merged directory for the respective run os.system( f'find {MAGIC_DL1_dir}/M1/{i}/{r} -type f -name "*.h5" -size -3k -delete' ) @@ -280,7 +241,7 @@ def merge(target_dir, identification, MAGIC_runs, env_name, source, NSB_match): + "${SLURM_JOB_ID}.log \n" ) - if os.path.exists(f"{MAGIC_DL1_dir}/M2"): + dates = [ os.path.basename(x) for x in glob.glob(f"{MAGIC_DL1_dir}/M2/*") ] @@ -290,21 +251,11 @@ def merge(target_dir, identification, MAGIC_runs, env_name, source, NSB_match): os.path.basename(x) for x in glob.glob(f"{MAGIC_DL1_dir}/M2/{i}/*") ] - if not os.path.exists(f"{MAGIC_DL1_dir}/Merged/{i}"): - os.mkdir( - f"{MAGIC_DL1_dir}/Merged/{i}" - ) # Creating a merged directory for the respective night + for r in runs: - if not os.path.exists(f"{MAGIC_DL1_dir}/Merged/{i}/{r}"): - os.mkdir( - f"{MAGIC_DL1_dir}/Merged/{i}/{r}" - ) # Creating a merged directory for the respective run - if not os.path.exists( + os.makedirs( f"{MAGIC_DL1_dir}/Merged/{i}/{r}/logs" - ): - os.mkdir( - f"{MAGIC_DL1_dir}/Merged/{i}/{r}/logs" - ) # Creating a merged directory for the respective run + ) # Creating a merged directory for the respective run os.system( f'find {MAGIC_DL1_dir}/M2/{i}/{r} -type f -name "*.h5" -size -3k -delete' ) @@ -328,14 +279,7 @@ def merge(target_dir, identification, MAGIC_runs, env_name, source, NSB_match): if (len(glob.glob(f"{MAGIC_DL1_dir}/M1/{i}/{r}")) > 0) and ( len(glob.glob(f"{MAGIC_DL1_dir}/M2/{i}/{r}")) > 0 ): - if not os.path.exists( - f"{MAGIC_DL1_dir}/Merged/{i}/Merged" - ): - os.mkdir(f"{MAGIC_DL1_dir}/Merged/{i}/Merged") - if not os.path.exists( - f"{MAGIC_DL1_dir}/Merged/{i}/Merged/logs" - ): - os.mkdir(f"{MAGIC_DL1_dir}/Merged/{i}/Merged/logs") + os.makedirs(f"{MAGIC_DL1_dir}/Merged/{i}/Merged/logs") f.write( f"conda run -n {env_name} merge_hdf_files --input-dir {MAGIC_DL1_dir}/Merged/{i}/{r} --output-dir {MAGIC_DL1_dir}/Merged/{i}/Merged --run-wise >{MAGIC_DL1_dir}/Merged/{i}/Merged/logs/merge_{i}_{r}_" + "${SLURM_JOB_ID}.log \n" @@ -349,17 +293,11 @@ def merge(target_dir, identification, MAGIC_runs, env_name, source, NSB_match): if not os.path.exists(f"{MAGIC_DL1_dir}/Merged/{i}/Merged"): continue - if not os.path.exists(f"{MAGIC_DL1_dir}/Merged/Merged_{i}"): - os.mkdir( - f"{MAGIC_DL1_dir}/Merged/Merged_{i}" - ) # Creating a merged directory for each night + if len(glob.glob(f"{MAGIC_DL1_dir}/Merged/{i}/Merged/*MAGIC*.h5")) > 0: - if not os.path.exists( - f"{MAGIC_DL1_dir}/Merged/Merged_{i}/logs" - ): - os.mkdir(f"{MAGIC_DL1_dir}/Merged/Merged_{i}/logs") + os.makedirs(f"{MAGIC_DL1_dir}/Merged/Merged_{i}/logs") f.write( f"conda run -n {env_name} merge_hdf_files --input-dir {MAGIC_DL1_dir}/Merged/{i}/Merged --output-dir {MAGIC_DL1_dir}/Merged/Merged_{i} >{MAGIC_DL1_dir}/Merged/Merged_{i}/logs/merge_night_{i}_" + "${SLURM_JOB_ID}.log \n" @@ -388,8 +326,7 @@ def mergeMC(target_dir, identification, env_name, cwd, source_name): process_name = f"merging_{source_name}" MC_DL1_dir = f"{target_dir}/{source_name}/DL1/MC" - if not os.path.exists(f"{MC_DL1_dir}/{identification}/Merged"): - os.mkdir(f"{MC_DL1_dir}/{identification}/Merged") + os.makedirs(f"{MC_DL1_dir}/{identification}/Merged") if identification == "protons": list_of_nodes = np.sort(glob.glob(f"{MC_DL1_dir}/{identification}/train/node*")) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py index 0cdb5938..a6211a1f 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py @@ -83,10 +83,7 @@ def bash_stereo(target_dir, source, env_name, NSB_match): process_name = source if not NSB_match: - if not os.path.exists( - f"{target_dir}/{source}/DL1/Observations/Coincident_stereo" - ): - os.mkdir(f"{target_dir}/{source}/DL1/Observations/Coincident_stereo") + listOfNightsLST = np.sort( glob.glob(f"{target_dir}/{source}/DL1/Observations/Coincident/*") @@ -94,8 +91,7 @@ def bash_stereo(target_dir, source, env_name, NSB_match): for nightLST in listOfNightsLST: stereoDir = f"{target_dir}/{source}/DL1/Observations/Coincident_stereo/{nightLST.split('/')[-1]}" - if not os.path.exists(stereoDir): - os.mkdir(stereoDir) + os.makedirs(stereoDir) os.system( f"ls {nightLST}/*LST*.h5 > {nightLST}/list_coin.txt" @@ -124,20 +120,13 @@ def bash_stereo(target_dir, source, env_name, NSB_match): ] f.writelines(lines) else: - if not os.path.exists( - f"{target_dir}/v{__version__}/{source}/DL1CoincidentStereo" - ): - os.mkdir(f"{target_dir}/v{__version__}/{source}/DL1CoincidentStereo") - + listOfNightsLST = np.sort( glob.glob(f"{target_dir}/v{__version__}/{source}/DL1Coincident/*") ) for nightLST in listOfNightsLST: stereoDir = f'{target_dir}/v{__version__}/{source}/DL1CoincidentStereo/{nightLST.split("/")[-1]}' - if not os.path.exists(stereoDir): - os.mkdir(stereoDir) - if not os.path.exists(f"{stereoDir}/logs"): - os.mkdir(f"{stereoDir}/logs") + os.makedirs(f"{stereoDir}/logs") if not os.listdir(f"{nightLST}"): continue if len(os.listdir(nightLST)) < 2: @@ -191,10 +180,7 @@ def bash_stereoMC(target_dir, identification, env_name, source): process_name = source - if not os.path.exists( - f"{target_dir}/{source}/DL1/MC/{identification}/Merged/StereoMerged" - ): - os.mkdir(f"{target_dir}/{source}/DL1/MC/{identification}/Merged/StereoMerged") + os.makedirs(f"{target_dir}/{source}/DL1/MC/{identification}/Merged/StereoMerged") inputdir = f"{target_dir}/{source}/DL1/MC/{identification}/Merged" From 20637082ed8340100e3f91a96744a5d13ef73100 Mon Sep 17 00:00:00 2001 From: Elisa-Visentin Date: Mon, 13 May 2024 11:44:12 +0000 Subject: [PATCH 074/236] fix doc --- .../semi_automatic_scripts/database_production/nsb_to_h5.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/nsb_to_h5.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/nsb_to_h5.py index 381026ce..c55d06e6 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/nsb_to_h5.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/nsb_to_h5.py @@ -1,7 +1,7 @@ """ -Bash scripts to run LSTnsb.py on all the LST runs by using parallel jobs +Script to fill the 'nsb' column of the LST database. To be called after nsb_level.py -Usage: python nsb_level.py (-c config.yaml) +Usage: python nsb_to_h5.py """ import glob @@ -19,7 +19,7 @@ def collect_nsb(df_LST): """ - Here we split the LST runs in NSB-wise .txt files + Here we collect NSB values from .txt files and store them into the dataframe Parameters ---------- From e7ff655671dac3b8fde5e21ebdfea91486eb1650 Mon Sep 17 00:00:00 2001 From: Elisa-Visentin Date: Mon, 13 May 2024 11:45:32 +0000 Subject: [PATCH 075/236] df_lst index --- .../semi_automatic_scripts/database_production/nsb_to_h5.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/nsb_to_h5.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/nsb_to_h5.py index c55d06e6..c1112e5f 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/nsb_to_h5.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/nsb_to_h5.py @@ -32,6 +32,7 @@ def collect_nsb(df_LST): Same dataframe as the input one, but with NSB values added in the 'nsb' column (for the runs processed by nsb_level.py) """ nsb_files = glob.glob("nsb_LST_*.txt") + df_LST = df_LST.set_index("LST1_run") for file_nsb in nsb_files: run = file_nsb.split("_")[3] run = run.split(".")[0] @@ -39,9 +40,9 @@ def collect_nsb(df_LST): with open(file_nsb) as ff: line_str = ff.readline().rstrip("\n") nsb = line_str.split(",")[2] - df_LST = df_LST.set_index("LST1_run") + df_LST.loc[run, "nsb"] = float(nsb) - df_LST = df_LST.reset_index() + df_LST = df_LST.reset_index() return df_LST From 6da633676f61522a32eef041406fc1a974916426 Mon Sep 17 00:00:00 2001 From: Elisa-Visentin Date: Mon, 13 May 2024 12:18:14 +0000 Subject: [PATCH 076/236] remove skip --- .../semi_automatic_scripts/list_from_h5.py | 28 +++++++++---------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/list_from_h5.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/list_from_h5.py index 0dba68b2..fbd92bd7 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/list_from_h5.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/list_from_h5.py @@ -142,38 +142,36 @@ def list_run(source_in, source_out, df, skip_LST, skip_MAGIC, is_LST, M1_run_lis LST_run = df_source["LST1_run"].tolist() # List with runs as strings LST_date = df_source["date_LST"].tolist() for k in range(len(df_source)): - skip = False if np.isnan(LST_run[k]): continue if (int(LST_run[k]) in skip_LST) or (int(LST_run[k]) in run_listed): - skip = True + continue - if skip is False: - with open(file_list[0], "a+") as f: - f.write( - f"{LST_date[k].replace('-','_')},{str(LST_run[k]).lstrip('0')}\n" - ) - run_listed.append(int(LST_run[k])) + + with open(file_list[0], "a+") as f: + f.write( + f"{LST_date[k].replace('-','_')},{str(LST_run[k]).lstrip('0')}\n" + ) + run_listed.append(int(LST_run[k])) if not is_LST: print("Finding MAGIC runs...") MAGIC_date = df_source["date_MAGIC"].tolist() M2_run = df_source["Run ID"].tolist() for k in range(len(df_source)): - skip = False if np.isnan(M2_run[k]): continue if (int(M2_run[k]) in skip_MAGIC) or (int(M2_run[k]) in run_listed): - skip = True + continue if int(M2_run[k]) not in M1_run_list: - skip = True + continue - if skip is False: - with open(file_list[1], "a+") as f: - f.write(f"{MAGIC_date[k].replace('-','_')},{int(M2_run[k])}\n") - run_listed.append(int(M2_run[k])) + + with open(file_list[1], "a+") as f: + f.write(f"{MAGIC_date[k].replace('-','_')},{int(M2_run[k])}\n") + run_listed.append(int(M2_run[k])) def main(): From 8999000c972dbb0125d2fcf99ecd46e51483a5d1 Mon Sep 17 00:00:00 2001 From: Elisa-Visentin Date: Mon, 13 May 2024 12:34:26 +0000 Subject: [PATCH 077/236] linter --- .../coincident_events.py | 14 +++---- .../database_production/LSTnsb.py | 8 +++- .../database_production/nsb_level.py | 2 +- .../database_production/nsb_to_h5.py | 4 +- .../semi_automatic_scripts/list_from_h5.py | 23 +++++------ .../semi_automatic_scripts/merging_runs.py | 39 +++++++++---------- .../semi_automatic_scripts/stereo_events.py | 3 +- 7 files changed, 44 insertions(+), 49 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py index 455e33bb..1746cd3c 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py @@ -100,7 +100,6 @@ def linking_bash_lst( LST_runs.append(dt) if NSB_match: coincidence_DL1_dir = f"{target_dir}/v{__version__}/{source_name}" - MAGIC_DL1_dir = f"{target_dir}/v{__version__}/{source_name}/DL1" @@ -130,10 +129,8 @@ def linking_bash_lst( inputdir = ( f"/fefs/aswg/data/real/DL1/{lstObsDir}/{LST_version}/tailcut84" ) - - os.makedirs( - f"{coincidence_DL1_dir}/DL1Coincident/{lstObsDir}/logs" - ) + + os.makedirs(f"{coincidence_DL1_dir}/DL1Coincident/{lstObsDir}/logs") outputdir = f"{coincidence_DL1_dir}/DL1Coincident/{lstObsDir}" list_of_subruns = np.sort( @@ -181,21 +178,20 @@ def linking_bash_lst( f.writelines(lines) else: coincidence_DL1_dir = f"{target_dir}/{source_name}/DL1/Observations" - + for i in LST_runs: lstObsDir = i[0].split("_")[0] + i[0].split("_")[1] + i[0].split("_")[2] inputdir = f"/fefs/aswg/data/real/DL1/{lstObsDir}/{LST_version}/tailcut84" outputdir = f"{coincidence_DL1_dir}/Coincident/{lstObsDir}" list_of_subruns = np.sort(glob.glob(f"{inputdir}/dl1*Run*{i[1]}*.*.h5")) os.makedirs(outputdir) - - + with open(f"{outputdir}/list_LST.txt", "a+") as LSTdataPathFile: for subrun in list_of_subruns: LSTdataPathFile.write( f"{subrun}\n" ) # If this files already exists, simply append the new information - + process_name = source_name listOfNightsLST = np.sort( diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/LSTnsb.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/LSTnsb.py index 861dac64..28e49956 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/LSTnsb.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/LSTnsb.py @@ -10,6 +10,8 @@ import yaml from lstchain.image.modifier import calculate_noise_parameters +from magicctapipe.io import resource_file + __all__ = ["nsb"] logger = logging.getLogger(__name__) @@ -118,12 +120,14 @@ def main(): nsb_limit = [a + b for a, b in zip(nsb_list[:], width[:])] nsb_limit.insert(0, 0) conda_path = os.environ["CONDA_PREFIX"] - + lstchain_modified = config["general"]["lstchain_modified_config"] lst_config = ( str(conda_path) + "/lib/python3.11/site-packages/lstchain/data/lstchain_standard_config.json" ) - + if lstchain_modified: + lst_config = resource_file("lstchain_standard_config_modified.json") + print(lst_config) if NSB_match: LST_files = np.sort(glob.glob(f"nsb_LST_[0-9]*_{run_number}.txt")) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/nsb_level.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/nsb_level.py index 4b06c9a6..883c1fee 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/nsb_level.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/nsb_level.py @@ -160,7 +160,7 @@ def main(): launch_jobs = f"nsb{n}=$(sbatch --parsable {run})" else: launch_jobs = f"{launch_jobs} && nsb{n}=$(sbatch --parsable {run})" - + os.system(launch_jobs) df_LST.to_hdf( "/fefs/aswg/workspace/elisa.visentin/auto_MCP_PR/observations_LST.h5", diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/nsb_to_h5.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/nsb_to_h5.py index c1112e5f..b5f87b6c 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/nsb_to_h5.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/nsb_to_h5.py @@ -1,7 +1,7 @@ """ Script to fill the 'nsb' column of the LST database. To be called after nsb_level.py -Usage: python nsb_to_h5.py +Usage: python nsb_to_h5.py """ import glob @@ -40,7 +40,7 @@ def collect_nsb(df_LST): with open(file_nsb) as ff: line_str = ff.readline().rstrip("\n") nsb = line_str.split(",")[2] - + df_LST.loc[run, "nsb"] = float(nsb) df_LST = df_LST.reset_index() return df_LST diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/list_from_h5.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/list_from_h5.py index fbd92bd7..1629e738 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/list_from_h5.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/list_from_h5.py @@ -148,7 +148,6 @@ def list_run(source_in, source_out, df, skip_LST, skip_MAGIC, is_LST, M1_run_lis if (int(LST_run[k]) in skip_LST) or (int(LST_run[k]) in run_listed): continue - with open(file_list[0], "a+") as f: f.write( f"{LST_date[k].replace('-','_')},{str(LST_run[k]).lstrip('0')}\n" @@ -168,7 +167,6 @@ def list_run(source_in, source_out, df, skip_LST, skip_MAGIC, is_LST, M1_run_lis if int(M2_run[k]) not in M1_run_list: continue - with open(file_list[1], "a+") as f: f.write(f"{MAGIC_date[k].replace('-','_')},{int(M2_run[k])}\n") run_listed.append(int(M2_run[k])) @@ -216,13 +214,12 @@ def main(): stereo = True lstchain_version = config["general"]["LST_version"] - - - processed_v=df_LST["processed_lstchain_file"].str.split("/").str[-3] - - mask = (processed_v== lstchain_version) + + processed_v = df_LST["processed_lstchain_file"].str.split("/").str[-3] + + mask = processed_v == lstchain_version df_LST = df_LST[mask] - + if source_in is None: df_LST.query( f'MAGIC_trigger=="L3T" & MAGIC_HV=="Nominal" & MAGIC_stereo == {stereo} & error_code_nsb=="0"', @@ -233,7 +230,7 @@ def main(): f'source=="{source_in}"& MAGIC_trigger=="L3T" & MAGIC_HV=="Nominal" & MAGIC_stereo == {stereo} & error_code_nsb=="0"', inplace=True, ) - + if range: min = str(config["data_selection"]["min"]) max = str(config["data_selection"]["max"]) @@ -252,8 +249,8 @@ def main(): df_LST = df_LST.reset_index() df_LST = df_LST.drop("index", axis=1) clear_files(source_in, source_out, df_LST) - if len(df_LST)==0: - print('NO LST run found. Exiting...') + if len(df_LST) == 0: + print("NO LST run found. Exiting...") return list_run(source_in, source_out, df_LST, skip_LST, skip_MAGIC, True) list_date_LST = np.unique(df_LST["date_LST"]) @@ -276,8 +273,8 @@ def main(): df_MAGIC2 = df_MAGIC2.rename(columns={"Source": "source"}) M1_runs = df_MAGIC1["Run ID"].tolist() - if (len(M1_runs)==0) or (len(df_MAGIC2)==0): - print('NO MAGIC stereo run found. Exiting...') + if (len(M1_runs) == 0) or (len(df_MAGIC2) == 0): + print("NO MAGIC stereo run found. Exiting...") return list_run(source_in, source_out, df_MAGIC2, skip_LST, skip_MAGIC, False, M1_runs) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py index f877ff61..971faba3 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py @@ -89,14 +89,12 @@ def split_train_test(target_dir, train_fraction, source_name): proton_dir = f"{target_dir}/{source_name}/DL1/MC/protons" - - list_of_dir = np.sort(glob.glob(f"{proton_dir}/node*{os.path.sep}")) for directory in tqdm( range(len(list_of_dir)) ): # tqdm allows us to print a progessbar in the terminal - + os.makedirs(f"{proton_dir}/train/{list_of_dir[directory].split('/')[-2]}") os.makedirs( f'{proton_dir}/../protons_test/{list_of_dir[directory].split("/")[-2]}' @@ -146,7 +144,6 @@ def merge(target_dir, identification, MAGIC_runs, env_name, source, NSB_match): process_name = f"merging_{source}" if not NSB_match: MAGIC_DL1_dir = f"{target_dir}/{source}/DL1/Observations" - with open(f"{source}_Merge_MAGIC_{identification}.sh", "w") as f: lines = slurm_lines( @@ -157,10 +154,11 @@ def merge(target_dir, identification, MAGIC_runs, env_name, source, NSB_match): f.writelines(lines) if identification == "0_subruns": - if os.path.exists(f"{MAGIC_DL1_dir}/M1") & os.path.exists(f"{MAGIC_DL1_dir}/M2"): + if os.path.exists(f"{MAGIC_DL1_dir}/M1") & os.path.exists( + f"{MAGIC_DL1_dir}/M2" + ): for i in MAGIC_runs: - - + os.makedirs( f"{MAGIC_DL1_dir}/Merged/{i[0]}/{i[1]}" ) # Creating a merged directory for the respective run @@ -172,7 +170,6 @@ def merge(target_dir, identification, MAGIC_runs, env_name, source, NSB_match): + "${SLURM_JOB_ID}.log\n" ) - os.system( f'find {MAGIC_DL1_dir}/M2/{i[0]}/{i[1]} -type f -name "*.h5" -size -3k -delete' ) @@ -207,7 +204,6 @@ def merge(target_dir, identification, MAGIC_runs, env_name, source, NSB_match): MAGIC_DL1_dir = f"{target_dir}/v{__version__}/{source}/DL1/" - lines = slurm_lines( p="short", J=process_name, @@ -218,7 +214,9 @@ def merge(target_dir, identification, MAGIC_runs, env_name, source, NSB_match): f.writelines(lines) if identification == "0_subruns": - if os.path.exists(f"{MAGIC_DL1_dir}/M1") & os.path.exists(f"{MAGIC_DL1_dir}/M2"): + if os.path.exists(f"{MAGIC_DL1_dir}/M1") & os.path.exists( + f"{MAGIC_DL1_dir}/M2" + ): dates = [ os.path.basename(x) for x in glob.glob(f"{MAGIC_DL1_dir}/M1/*") ] @@ -227,9 +225,9 @@ def merge(target_dir, identification, MAGIC_runs, env_name, source, NSB_match): os.path.basename(x) for x in glob.glob(f"{MAGIC_DL1_dir}/M1/{i}/*") ] - + for r in runs: - + os.makedirs( f"{MAGIC_DL1_dir}/Merged/{i}/{r}/logs" ) # Creating a merged directory for the respective run @@ -241,7 +239,6 @@ def merge(target_dir, identification, MAGIC_runs, env_name, source, NSB_match): + "${SLURM_JOB_ID}.log \n" ) - dates = [ os.path.basename(x) for x in glob.glob(f"{MAGIC_DL1_dir}/M2/*") ] @@ -251,7 +248,7 @@ def merge(target_dir, identification, MAGIC_runs, env_name, source, NSB_match): os.path.basename(x) for x in glob.glob(f"{MAGIC_DL1_dir}/M2/{i}/*") ] - + for r in runs: os.makedirs( f"{MAGIC_DL1_dir}/Merged/{i}/{r}/logs" @@ -289,14 +286,16 @@ def merge(target_dir, identification, MAGIC_runs, env_name, source, NSB_match): os.path.basename(x) for x in glob.glob(f"{MAGIC_DL1_dir}/M1/*") ] for i in dates: - + if not os.path.exists(f"{MAGIC_DL1_dir}/Merged/{i}/Merged"): - + continue - - if len(glob.glob(f"{MAGIC_DL1_dir}/Merged/{i}/Merged/*MAGIC*.h5")) > 0: - - + + if ( + len(glob.glob(f"{MAGIC_DL1_dir}/Merged/{i}/Merged/*MAGIC*.h5")) + > 0 + ): + os.makedirs(f"{MAGIC_DL1_dir}/Merged/Merged_{i}/logs") f.write( f"conda run -n {env_name} merge_hdf_files --input-dir {MAGIC_DL1_dir}/Merged/{i}/Merged --output-dir {MAGIC_DL1_dir}/Merged/Merged_{i} >{MAGIC_DL1_dir}/Merged/Merged_{i}/logs/merge_night_{i}_" diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py index a6211a1f..0a6458a3 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py @@ -83,7 +83,6 @@ def bash_stereo(target_dir, source, env_name, NSB_match): process_name = source if not NSB_match: - listOfNightsLST = np.sort( glob.glob(f"{target_dir}/{source}/DL1/Observations/Coincident/*") @@ -120,7 +119,7 @@ def bash_stereo(target_dir, source, env_name, NSB_match): ] f.writelines(lines) else: - + listOfNightsLST = np.sort( glob.glob(f"{target_dir}/v{__version__}/{source}/DL1Coincident/*") ) From 493662b7b6607245b5b70ad4539f455fb24a7b52 Mon Sep 17 00:00:00 2001 From: Julian Sitarek Date: Mon, 13 May 2024 15:18:59 +0000 Subject: [PATCH 078/236] extensive refactoring for calib to DL1 processing for MAGIC data the cases when M1 and M2 or NSB_match yes/no were treated by a different loops are now mostly removed for the data and some unification was done (e.g. adding version into the output dirs of NSB_match=no case. The MC part still needs to be checked if there are no issues caused by changed directories. --- .../setting_up_config_and_dir.py | 277 ++++-------------- 1 file changed, 64 insertions(+), 213 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py index b677aaa1..4c583d85 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py @@ -89,10 +89,7 @@ def config_file_gen(ids, target_dir, noise_value, NSB_match, source_name): conf["LST"] = LST_config conf["MAGIC"] = MAGIC_config - if not NSB_match: - file_name = f"{target_dir}/{source_name}/config_DL0_to_DL1.yaml" - else: - file_name = f"{target_dir}/v{__version__}/{source_name}/config_DL0_to_DL1.yaml" + file_name = f"{target_dir}/v{__version__}/{source_name}/config_DL0_to_DL1.yaml" with open(file_name, "w") as f: lines = [ "mc_tel_ids:", @@ -249,6 +246,7 @@ def lists_and_bash_gen_MAGIC( out_err=f"{target_dir}/v{__version__}/{source}/DL1/slurm-linkMAGIC-%x.%j", ) + obs_tag = "" if NSB_match else "Observations" with open(f"{source}_linking_MAGIC_data_paths.sh", "w") as f: f.writelines(lines) if NSB_match: @@ -259,129 +257,51 @@ def lists_and_bash_gen_MAGIC( MAGIC_runs = [] MAGIC_runs.append(MAGIC) - for i in MAGIC_runs: - - if telescope_ids[-1] > 0: + for i in MAGIC_runs: + for magic in [1, 2]: + # if 1 then magic is second from last, if 2 then last + if telescope_ids[magic - 3] > 0: lines = [ - f'export IN1=/fefs/onsite/common/MAGIC/data/M2/event/Calibrated/{i[0].split("_")[0]}/{i[0].split("_")[1]}/{i[0].split("_")[2]}\n', - f"export OUT1={target_dir}/v{__version__}/{source}/DL1/M2/{i[0]}/{i[1]}/logs \n", - f"ls $IN1/*{i[1][-2:]}.*_Y_*.root > $OUT1/list_dl0.txt\n", + f'export IN1=/fefs/onsite/common/MAGIC/data/M{magic}/event/Calibrated/{i[0].split("_")[0]}/{i[0].split("_")[1]}/{i[0].split("_")[2]}\n', + f"export OUT1={target_dir}/v{__version__}/{source}/DL1/{obs_tag}/M{magic}/{i[0]}/{i[1]}/logs \n", + f"ls $IN1/*{i[1][-2:]}.*_Y_*.root > $OUT1/list_dl0.txt\n\n", ] f.writelines(lines) - f.write("\n") - if telescope_ids[-2] > 0: - lines = [ - f'export IN1=/fefs/onsite/common/MAGIC/data/M1/event/Calibrated/{i[0].split("_")[0]}/{i[0].split("_")[1]}/{i[0].split("_")[2]}\n', - f"export OUT1={target_dir}/v{__version__}/{source}/DL1/M1/{i[0]}/{i[1]}/logs \n", - f"ls $IN1/*{i[1][-2:]}.*_Y_*.root > $OUT1/list_dl0.txt\n", - ] - f.writelines(lines) - else: - if telescope_ids[-1] > 0: - for i in MAGIC_runs: - lines = [ - f'export IN1=/fefs/onsite/common/MAGIC/data/M2/event/Calibrated/{i[0].split("_")[0]}/{i[0].split("_")[1]}/{i[0].split("_")[2]}\n', - f"export OUT1={target_dir}/{source}/DL1/Observations/M2/{i[0]}/{i[1]}\n", - f"ls $IN1/*{i[1][-2:]}.*_Y_*.root > $OUT1/list_dl0.txt\n", - ] - f.writelines(lines) - f.write("\n") - if telescope_ids[-2] > 0: - for i in MAGIC_runs: - lines = [ - f'export IN1=/fefs/onsite/common/MAGIC/data/M1/event/Calibrated/{i[0].split("_")[0]}/{i[0].split("_")[1]}/{i[0].split("_")[2]}\n', - f"export OUT1={target_dir}/{source}/DL1/Observations/M1/{i[0]}/{i[1]}\n", - f"ls $IN1/*{i[1][-2:]}.*_Y_*.root > $OUT1/list_dl0.txt\n", - ] - f.writelines(lines) - if NSB_match: - for magic in [1, 2]: - # if 1 then magic is second from last, if 2 then last - if telescope_ids[magic - 3] > 0: - for i in MAGIC_runs: - number_of_nodes = glob.glob( - f'/fefs/onsite/common/MAGIC/data/M{magic}/event/Calibrated/{i[0].split("_")[0]}/{i[0].split("_")[1]}/{i[0].split("_")[2]}/*{i[1]}.*_Y_*.root' - ) - number_of_nodes = len(number_of_nodes) - 1 - if number_of_nodes < 0: - continue - slurm = slurm_lines( - p="short", - J=process_name, - array=number_of_nodes, - mem="2g", - out_err=f"{target_dir}/v{__version__}/{source}/DL1/M{magic}/{i[0]}/{i[1]}/logs/slurm-%x.%A_%a", - ) - lines = slurm + [ - f"export OUTPUTDIR={target_dir}/v{__version__}/{source}/DL1/M{magic}/{i[0]}/{i[1]}\n", - "SAMPLE_LIST=($(<$OUTPUTDIR/logs/list_dl0.txt))\n", - "SAMPLE=${SAMPLE_LIST[${SLURM_ARRAY_TASK_ID}]}\n\n", - "export LOG=$OUTPUTDIR/logs/real_0_1_task_${SLURM_ARRAY_JOB_ID}_${SLURM_ARRAY_TASK_ID}.log\n", - f"conda run -n {env_name} magic_calib_to_dl1 --input-file $SAMPLE --output-dir $OUTPUTDIR --config-file {target_dir}/v{__version__}/{source}/config_DL0_to_DL1.yaml >$LOG 2>&1\n", - "rc=$?\n", - 'if [ "$rc" -ne "0" ]; then\n', - " echo $SAMPLE ${SLURM_ARRAY_JOB_ID} ${SLURM_ARRAY_TASK_ID} $rc >> $OUTPUTDIR/logs/list_failed.log\n", - "fi\n", - "echo $SAMPLE ${SLURM_ARRAY_JOB_ID} ${SLURM_ARRAY_TASK_ID} $rc >> $OUTPUTDIR/logs/list_return.log\n", - ] - with open( - f"{source}_MAGIC-" + "I" * magic + f"_dl0_to_dl1_run_{i[1]}.sh", - "w", - ) as f: - f.writelines(lines) - else: - if (telescope_ids[-2] > 0) or (telescope_ids[-1] > 0): + for magic in [1, 2]: + # if 1 then magic is second from last, if 2 then last + if telescope_ids[magic - 3] > 0: for i in MAGIC_runs: - if telescope_ids[-1] > 0: - number_of_nodes = glob.glob( - f'/fefs/onsite/common/MAGIC/data/M2/event/Calibrated/{i[0].split("_")[0]}/{i[0].split("_")[1]}/{i[0].split("_")[2]}/*{i[1]}.*_Y_*.root' - ) - number_of_nodes = len(number_of_nodes) - 1 - - with open(f"{source}_MAGIC-II_dl0_to_dl1_run_{i[1]}.sh", "w") as f: - slurm = slurm_lines( - p="long", - J=process_name, - array=number_of_nodes, - mem="2g", - out_err=f"{target_dir}/{source}/DL1/Observations/M2/{i[0]}/{i[1]}/slurm-%x.%A_%a", - ) - lines = slurm + [ - f"export OUTPUTDIR={target_dir}/{source}/DL1/Observations/M2/{i[0]}/{i[1]}\n", - f"cd {target_dir}/{source}/../\n", - "SAMPLE_LIST=($(<$OUTPUTDIR/list_dl0.txt))\n", - "SAMPLE=${SAMPLE_LIST[${SLURM_ARRAY_TASK_ID}]}\n\n", - "export LOG=$OUTPUTDIR/real_0_1_task_${SLURM_ARRAY_JOB_ID}_${SLURM_ARRAY_TASK_ID}.log\n", - f"conda run -n {env_name} magic_calib_to_dl1 --input-file $SAMPLE --output-dir $OUTPUTDIR --config-file {target_dir}/{source}/config_DL0_to_DL1.yaml >$LOG 2>&1\n", - "", - ] - f.writelines(lines) - - if telescope_ids[-2] > 0: - number_of_nodes = glob.glob( - f'/fefs/onsite/common/MAGIC/data/M1/event/Calibrated/{i[0].split("_")[0]}/{i[0].split("_")[1]}/{i[0].split("_")[2]}/*{i[1]}.*_Y_*.root' - ) - number_of_nodes = len(number_of_nodes) - 1 - - with open(f"{source}_MAGIC-I_dl0_to_dl1_run_{i[1]}.sh", "w") as f: - slurm = slurm_lines( - p="long", - J=process_name, - array=number_of_nodes, - mem="2g", - out_err=f"{target_dir}/{source}/DL1/Observations/M1/{i[0]}/{i[1]}/slurm-%x.%A_%a", - ) - lines = slurm + [ - f"export OUTPUTDIR={target_dir}/{source}/DL1/Observations/M1/{i[0]}/{i[1]}\n", - f"cd {target_dir}/{source}/../\n", - "SAMPLE_LIST=($(<$OUTPUTDIR/list_dl0.txt))\n", - "SAMPLE=${SAMPLE_LIST[${SLURM_ARRAY_TASK_ID}]}\n\n", - "export LOG=$OUTPUTDIR/real_0_1_task_${SLURM_ARRAY_JOB_ID}_${SLURM_ARRAY_TASK_ID}.log\n", - f"conda run -n {env_name} magic_calib_to_dl1 --input-file $SAMPLE --output-dir $OUTPUTDIR --config-file {target_dir}/{source}/config_DL0_to_DL1.yaml >$LOG 2>&1\n", - "", - ] - f.writelines(lines) + number_of_nodes = glob.glob( + f'/fefs/onsite/common/MAGIC/data/M{magic}/event/Calibrated/{i[0].split("_")[0]}/{i[0].split("_")[1]}/{i[0].split("_")[2]}/*{i[1]}.*_Y_*.root' + ) + number_of_nodes = len(number_of_nodes) - 1 + if number_of_nodes < 0: + continue + slurm = slurm_lines( + p="short", # was long for no NSB_match + J=process_name, + array=number_of_nodes, + mem="2g", + out_err=f"{target_dir}/v{__version__}/{source}/DL1/{obs_tag}/M{magic}/{i[0]}/{i[1]}/logs/slurm-%x.%A_%a", # without version for no NSB_match + ) + lines = slurm + [ # without version for no NSB_match + f"export OUTPUTDIR={target_dir}/v{__version__}/{source}/DL1/{obs_tag}/M{magic}/{i[0]}/{i[1]}\n", + "SAMPLE_LIST=($(<$OUTPUTDIR/logs/list_dl0.txt))\n", + "SAMPLE=${SAMPLE_LIST[${SLURM_ARRAY_TASK_ID}]}\n\n", + "export LOG=$OUTPUTDIR/logs/real_0_1_task_${SLURM_ARRAY_JOB_ID}_${SLURM_ARRAY_TASK_ID}.log\n", + f"conda run -n {env_name} magic_calib_to_dl1 --input-file $SAMPLE --output-dir $OUTPUTDIR --config-file {target_dir}/v{__version__}/{source}/config_DL0_to_DL1.yaml >$LOG 2>&1\n", + "rc=$?\n", + 'if [ "$rc" -ne "0" ]; then\n', + " echo $SAMPLE ${SLURM_ARRAY_JOB_ID} ${SLURM_ARRAY_TASK_ID} $rc >> $OUTPUTDIR/logs/list_failed.log\n", + "fi\n", + "echo $SAMPLE ${SLURM_ARRAY_JOB_ID} ${SLURM_ARRAY_TASK_ID} $rc >> $OUTPUTDIR/logs/list_return.log\n", + ] + with open( + f"{source}_MAGIC-" + "I" * magic + f"_dl0_to_dl1_run_{i[1]}.sh", + "w", + ) as f: + f.writelines(lines) def directories_generator( @@ -406,39 +326,29 @@ def directories_generator( """ if NSB_match: - if not os.path.exists(f"{target_dir}/v{__version__}"): - os.mkdir(f"{target_dir}/v{__version__}") - if not os.path.exists(f"{target_dir}/v{__version__}/{source_name}"): - os.mkdir(f"{target_dir}/v{__version__}/{source_name}") - if not os.path.exists(f"{target_dir}/v{__version__}/{source_name}/DL1"): - os.mkdir(f"{target_dir}/v{__version__}/{source_name}/DL1") + os.makedirs(f"{target_dir}/v{__version__}/{source_name}/DL1") dl1_dir = str(f"{target_dir}/v{__version__}/{source_name}/DL1") else: - if not os.path.exists(target_dir): - os.mkdir(target_dir) - os.mkdir(f"{target_dir}/{source_name}/DL1") - os.mkdir(f"{target_dir}/{source_name}/DL1/Observations") - os.mkdir(f"{target_dir}/{source_name}/DL1/MC") - os.mkdir(f"{target_dir}/{source_name}/DL1/MC/gammas") - os.mkdir(f"{target_dir}/{source_name}/DL1/MC/gammadiffuse") - os.mkdir(f"{target_dir}/{source_name}/DL1/MC/electrons") - os.mkdir(f"{target_dir}/{source_name}/DL1/MC/protons") - os.mkdir(f"{target_dir}/{source_name}/DL1/MC/helium") + dl1_dir = str(f"{target_dir}/v{__version__}/{source_name}/DL1/Observations") + dir_list = [ + "Observations", + "MC/gammas", + "MC/gammadiffuse", + "MC/electrons", + "MC/protons", + "MC/helium", + ] + if not os.path.exists(f"{target_dir}/v{__version__}/{source_name}"): + for dir in dir_list: + os.makedirs(f"{target_dir}/v{__version__}/{source_name}/DL1/{dir}") else: overwrite = input( - f'MC directory for {target_dir.split("/")[-1]} already exists. Would you like to overwrite it? [only "y" or "n"]: ' + f'MC&data directory for {target_dir.split("/")[-1]} already exists. Would you like to overwrite it? [only "y" or "n"]: ' ) if overwrite == "y": - os.system(f"rm -r {target_dir}/{source_name}") - os.mkdir(target_dir) - os.mkdir(f"{target_dir}/{source_name}/DL1") - os.mkdir(f"{target_dir}/{source_name}/DL1/Observations") - os.mkdir(f"{target_dir}/{source_name}/DL1/MC") - os.mkdir(f"{target_dir}/{source_name}/DL1/MC/gammas") - os.mkdir(f"{target_dir}/{source_name}/DL1/MC/gammadiffuse") - os.mkdir(f"{target_dir}/{source_name}/DL1/MC/electrons") - os.mkdir(f"{target_dir}/{source_name}/DL1/MC/protons") - os.mkdir(f"{target_dir}/{source_name}/DL1/MC/helium") + os.system(f"rm -r {target_dir}/v{__version__}/{source_name}") + for dir in dir_list: + os.makedirs(f"{target_dir}/v{__version__}/{source_name}/DL1/{dir}") else: print("Directory not modified.") @@ -450,69 +360,10 @@ def directories_generator( MAGIC_runs = [] MAGIC_runs.append(MAGIC) - if NSB_match: - for i in MAGIC_runs: - - if telescope_ids[-1] > 0: - if not os.path.exists(f"{dl1_dir}"): - os.mkdir(f"{dl1_dir}") - if not os.path.exists(f"{dl1_dir}/M2"): - os.mkdir(f"{dl1_dir}/M2") - if not os.path.exists(f"{dl1_dir}/M2/{i[0]}"): - os.mkdir(f"{dl1_dir}/M2/{i[0]}") - - if not os.path.exists(f"{dl1_dir}/M2/{i[0]}/{i[1]}"): - os.mkdir(f"{dl1_dir}/M2/{i[0]}/{i[1]}") - if not os.path.exists(f"{dl1_dir}/M2/{i[0]}/{i[1]}/logs"): - os.mkdir(f"{dl1_dir}/M2/{i[0]}/{i[1]}/logs") - if telescope_ids[-2] > 0: - if not os.path.exists(f"{dl1_dir}"): - os.mkdir(f"{dl1_dir}") - if not os.path.exists(f"{dl1_dir}/M1"): - os.mkdir(f"{dl1_dir}/M1") - if not os.path.exists(f"{dl1_dir}/M1/{i[0]}"): - os.mkdir(f"{dl1_dir}/M1/{i[0]}") - - if not os.path.exists(f"{dl1_dir}/M1/{i[0]}/{i[1]}"): - os.mkdir(f"{dl1_dir}/M1/{i[0]}/{i[1]}") - if not os.path.exists(f"{dl1_dir}/M1/{i[0]}/{i[1]}/logs"): - os.mkdir(f"{dl1_dir}/M1/{i[0]}/{i[1]}/logs") - else: - if telescope_ids[-1] > 0: - if not os.path.exists(f"{target_dir}/{source_name}/DL1/Observations/M2"): - os.mkdir(f"{target_dir}/{source_name}/DL1/Observations/M2") - for i in MAGIC_runs: - if not os.path.exists( - f"{target_dir}/{source_name}/DL1/Observations/M2/{i[0]}" - ): - os.mkdir( - f"{target_dir}/{source_name}/DL1/Observations/M2/{i[0]}" - ) - os.mkdir( - f"{target_dir}/{source_name}/DL1/Observations/M2/{i[0]}/{i[1]}" - ) - else: - os.mkdir( - f"{target_dir}/{source_name}/DL1/Observations/M2/{i[0]}/{i[1]}" - ) - - if telescope_ids[-2] > 0: - if not os.path.exists(f"{target_dir}/{source_name}/DL1/Observations/M1"): - os.mkdir(f"{target_dir}/{source_name}/DL1/Observations/M1") - for i in MAGIC_runs: - if not os.path.exists( - f"{target_dir}/{source_name}/DL1/Observations/M1/{i[0]}" - ): - os.mkdir( - f"{target_dir}/{source_name}/DL1/Observations/M1/{i[0]}" - ) - os.mkdir( - f"{target_dir}/{source_name}/DL1/Observations/M1/{i[0]}/{i[1]}" - ) - else: - os.mkdir( - f"{target_dir}/{source_name}/DL1/Observations/M1/{i[0]}/{i[1]}" - ) + for i in MAGIC_runs: + for magic in [1, 2]: + if telescope_ids[magic - 3] > 0: + os.makedirs(f"{dl1_dir}/M{magic}/{i[0]}/{i[1]}/logs") def main(): @@ -592,7 +443,7 @@ def main(): print("This process will take about 10 min to run if the IT cluster is free.") directories_generator( - target_dir, telescope_ids, MAGIC_runs, NSB_match, source_name + str(target_dir), telescope_ids, MAGIC_runs, NSB_match, source_name ) # Here we create all the necessary directories in the given workspace and collect the main directory of the target config_file_gen( telescope_ids, target_dir, noise_value, NSB_match, source_name From 048a24f84c663ce1de045308794621e15b936e62 Mon Sep 17 00:00:00 2001 From: Elisa-Visentin Date: Mon, 13 May 2024 16:05:26 +0000 Subject: [PATCH 079/236] try to remove duplicated code + minor fixes --- .../coincident_events.py | 270 +++++++----------- 1 file changed, 104 insertions(+), 166 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py index 1746cd3c..51c593b1 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py @@ -25,6 +25,7 @@ import yaml from magicctapipe import __version__ +from magicctapipe.io import resource_file from magicctapipe.scripts.lst1_magic.semi_automatic_scripts.clusters import slurm_lines __all__ = ["configfile_coincidence", "linking_bash_lst"] @@ -51,18 +52,38 @@ def configfile_coincidence(ids, target_dir, source_name, NSB_match): If real data are matched to pre-processed MCs or not """ - lines = [ - f"mc_tel_ids:\n LST-1: {ids[0]}\n LST-2: {ids[1]}\n LST-3: {ids[2]}\n LST-4: {ids[3]}\n MAGIC-I: {ids[4]}\n MAGIC-II: {ids[5]}\n\n", - 'event_coincidence:\n timestamp_type_lst: "dragon_time" # select "dragon_time", "tib_time" or "ucts_time"\n pre_offset_search: true\n n_pre_offset_search_events: 100\n window_half_width: "300 ns"\n', - ' time_offset:\n start: "-10 us"\n stop: "0 us"\n', - ] + config_file = resource_file("config.yaml") + with open( + config_file, "rb" + ) as fc: # "rb" mode opens the file in binary format for reading + config_dict = yaml.safe_load(fc) + coincidence=config_dict['event_coincidence'] + + + conf = {} + conf["event_coincidence"] = coincidence + + if not NSB_match: file_name = f"{target_dir}/{source_name}/config_coincidence.yaml" else: file_name = f"{target_dir}/v{__version__}/{source_name}/config_coincidence.yaml" - with open(file_name, "w") as f: + lines = [ + "mc_tel_ids:", + f"\n LST-1: {ids[0]}", + f"\n LST-2: {ids[1]}", + f"\n LST-3: {ids[2]}", + f"\n LST-4: {ids[3]}", + f"\n MAGIC-I: {ids[4]}", + f"\n MAGIC-II: {ids[5]}", + "\n", + ] f.writelines(lines) + yaml.dump(conf, f, default_flow_style=False) + + + def linking_bash_lst( @@ -94,134 +115,78 @@ def linking_bash_lst( LST_runs = [] LST_runs.append(LST) - if (len(LST_runs) == 2) and (len(LST_runs[0]) == 10): - dt = LST_runs - LST_runs = [] - LST_runs.append(dt) + if NSB_match: coincidence_DL1_dir = f"{target_dir}/v{__version__}/{source_name}" MAGIC_DL1_dir = f"{target_dir}/v{__version__}/{source_name}/DL1" - - dates = [ - os.path.basename(x) for x in glob.glob(f"{MAGIC_DL1_dir}/Merged/Merged_*") - ] - - for d in dates: - Y_M = int(d.split("_")[1]) - M_M = int(d.split("_")[2]) - D_M = int(d.split("_")[3]) - - day_MAGIC = dtdt(Y_M, M_M, D_M) - - delta = timedelta(days=1) - for i in LST_runs: - Y_L = i[0].split("_")[0] - M_L = i[0].split("_")[1] - D_L = i[0].split("_")[2] - day_LST = dtdt(int(Y_L), int(M_L), int(D_L)) - if day_MAGIC == day_LST + delta: - - lstObsDir = ( - i[0].split("_")[0] + i[0].split("_")[1] + i[0].split("_")[2] - ) - - inputdir = ( - f"/fefs/aswg/data/real/DL1/{lstObsDir}/{LST_version}/tailcut84" - ) - - os.makedirs(f"{coincidence_DL1_dir}/DL1Coincident/{lstObsDir}/logs") - - outputdir = f"{coincidence_DL1_dir}/DL1Coincident/{lstObsDir}" - list_of_subruns = np.sort( - glob.glob(f"{inputdir}/dl1*Run*{i[1]}*.*.h5") - ) - if os.path.exists(f"{outputdir}/logs/list_LST"): - with open(f"{outputdir}/logs/list_LST", "a") as LSTdataPathFile: - for subrun in list_of_subruns: - LSTdataPathFile.write( - f"{subrun}\n" - ) # If this files already exists, simply append the new information - else: - with open( - f"{outputdir}/logs/list_LST.txt", "w" - ) as f: # If the file list_LST.txt does not exist, it will be created here - for subrun in list_of_subruns: - f.write(f"{subrun}\n") - - if not os.path.exists(f"{outputdir}/logs/list_LST.txt"): - continue - with open(f"{outputdir}/logs/list_LST.txt", "r") as f: - process_size = len(f.readlines()) - 1 - - if process_size < 0: - continue - slurm = slurm_lines( - p="short", - J=f"{source_name}_coincidence", - array=process_size, - mem="8g", - out_err=f"{outputdir}/logs/slurm-%x.%A_%a", - ) - lines = slurm + [ - f"export INM={MAGIC_DL1_dir}/Merged/Merged_{str(Y_M).zfill(4)}_{str(M_M).zfill(2)}_{str(D_M).zfill(2)}\n", - f"export OUTPUTDIR={outputdir}\n", - "SAMPLE_LIST=($(<$OUTPUTDIR/logs/list_LST.txt))\n", - "SAMPLE=${SAMPLE_LIST[${SLURM_ARRAY_TASK_ID}]}\n", - "export LOG=$OUTPUTDIR/logs/coincidence_${SLURM_ARRAY_JOB_ID}_${SLURM_ARRAY_TASK_ID}.log\n", - f"conda run -n {env_name} lst1_magic_event_coincidence --input-file-lst $SAMPLE --input-dir-magic $INM --output-dir $OUTPUTDIR --config-file {target_dir}/v{__version__}/{source_name}/config_coincidence.yaml >$LOG 2>&1", - ] - with open( - f"{source_name}_LST_coincident_{outputdir.split('/')[-1]}.sh", - "w", - ) as f: - f.writelines(lines) else: coincidence_DL1_dir = f"{target_dir}/{source_name}/DL1/Observations" + MAGIC_DL1_dir = f"{target_dir}/{source_name}/DL1/Observations/" + + dates = [ + os.path.basename(x) for x in glob.glob(f"{MAGIC_DL1_dir}/Merged/Merged_*") + ] + + for d in dates: + Y_M, M_M, D_M = [int(x) for x in d.split("_")[1:]] + + day_MAGIC = dtdt(Y_M, M_M, D_M) + delta = timedelta(days=1) for i in LST_runs: - lstObsDir = i[0].split("_")[0] + i[0].split("_")[1] + i[0].split("_")[2] - inputdir = f"/fefs/aswg/data/real/DL1/{lstObsDir}/{LST_version}/tailcut84" - outputdir = f"{coincidence_DL1_dir}/Coincident/{lstObsDir}" - list_of_subruns = np.sort(glob.glob(f"{inputdir}/dl1*Run*{i[1]}*.*.h5")) - os.makedirs(outputdir) - - with open(f"{outputdir}/list_LST.txt", "a+") as LSTdataPathFile: - for subrun in list_of_subruns: - LSTdataPathFile.write( - f"{subrun}\n" - ) # If this files already exists, simply append the new information - - process_name = source_name - - listOfNightsLST = np.sort( - glob.glob(f"{target_dir}/{source_name}/DL1/Observations/Coincident/*") - ) - listOfNightsMAGIC = np.sort( - glob.glob(f"{target_dir}/{source_name}/DL1/Observations/Merged/Merged*") - ) - - for nightMAGIC, nightLST in zip(listOfNightsMAGIC, listOfNightsLST): - with open(f"{nightLST}/list_LST.txt", "r") as f: - process_size = len(f.readlines()) - 1 - - with open(f"LST_coincident_{nightLST.split('/')[-1]}.sh", "w") as f: + Y_L, M_L, D_L = [int(x) for x in i[0].split("_")] + + day_LST = dtdt(int(Y_L), int(M_L), int(D_L)) + if day_MAGIC == day_LST + delta: + + lstObsDir = i[0].replace('_', '') + inputdir = ( + f"/fefs/aswg/data/real/DL1/{lstObsDir}/{LST_version}/tailcut84" + ) + + os.makedirs(f"{coincidence_DL1_dir}/DL1Coincident/{lstObsDir}/logs") + + outputdir = f"{coincidence_DL1_dir}/DL1Coincident/{lstObsDir}" + list_of_subruns = np.sort( + glob.glob(f"{inputdir}/dl1*Run*{i[1]}*.*.h5") + ) + + with open(f"{outputdir}/logs/list_LST", "a+") as LSTdataPathFile: + for subrun in list_of_subruns: + LSTdataPathFile.write( + f"{subrun}\n" + ) + + + if not os.path.exists(f"{outputdir}/logs/list_LST.txt"): + continue + with open(f"{outputdir}/logs/list_LST.txt", "r") as f: + process_size = len(f.readlines()) - 1 + + if process_size < 0: + continue slurm = slurm_lines( p="short", - J=f"{process_name}_coincidence", + J=f"{source_name}_coincidence", array=process_size, mem="8g", - out_err=f"{nightLST}/slurm-%x.%A_%a", + out_err=f"{outputdir}/logs/slurm-%x.%A_%a", ) lines = slurm + [ - f"export INM={nightMAGIC}\n", - f"export OUTPUTDIR={nightLST}\n", - "SAMPLE_LIST=($(<$OUTPUTDIR/list_LST.txt))\n", + f"export INM={MAGIC_DL1_dir}/Merged/Merged_{d}\n", + f"export OUTPUTDIR={outputdir}\n", + "SAMPLE_LIST=($(<$OUTPUTDIR/logs/list_LST.txt))\n", "SAMPLE=${SAMPLE_LIST[${SLURM_ARRAY_TASK_ID}]}\n", - "export LOG=$OUTPUTDIR/coincidence_${SLURM_ARRAY_JOB_ID}_${SLURM_ARRAY_TASK_ID}.log\n", - f"conda run -n {env_name} lst1_magic_event_coincidence --input-file-lst $SAMPLE --input-dir-magic $INM --output-dir $OUTPUTDIR --config-file {target_dir}/{source_name}/config_coincidence.yaml >$LOG 2>&1", + "export LOG=$OUTPUTDIR/logs/coincidence_${SLURM_ARRAY_JOB_ID}_${SLURM_ARRAY_TASK_ID}.log\n", + f"conda run -n {env_name} lst1_magic_event_coincidence --input-file-lst $SAMPLE --input-dir-magic $INM --output-dir $OUTPUTDIR --config-file {target_dir}/v{__version__}/{source_name}/config_coincidence.yaml >$LOG 2>&1", ] - f.writelines(lines) + with open( + f"{source_name}_LST_coincident_{outputdir.split('/')[-1]}.sh", + "w", + ) as f: + f.writelines(lines) + def main(): @@ -269,13 +234,22 @@ def main(): LST_runs_and_dates = f"{source_name}_LST_runs.txt" LST_runs = np.genfromtxt(LST_runs_and_dates, dtype=str, delimiter=",") - if not NSB_match: + + + + try: + print("***** Linking the paths to LST data files...") print("***** Generating the bashscript...") linking_bash_lst( - target_dir, LST_runs, source_name, LST_version, env_name, NSB_match - ) # linking the data paths to the current working directory + target_dir, + LST_runs, + source_name, + LST_version, + env_name, + NSB_match, + ) # linking the data paths to current working directory print("***** Submitting processess to the cluster...") print(f"Process name: {source_name}_coincidence") @@ -284,55 +258,19 @@ def main(): ) # Below we run the bash scripts to find the coincident events - list_of_coincidence_scripts = np.sort(glob.glob("LST_coincident*.sh")) - + list_of_coincidence_scripts = np.sort( + glob.glob(f"{source_name}_LST_coincident*.sh") + ) + if len(list_of_coincidence_scripts) < 1: + continue + launch_jobs = "" for n, run in enumerate(list_of_coincidence_scripts): - if n == 0: - launch_jobs = f"coincidence{n}=$(sbatch --parsable {run})" - else: - launch_jobs = ( - f"{launch_jobs} && coincidence{n}=$(sbatch --parsable {run})" - ) + launch_jobs += (" && " if n>0 else "") +f"coincidence{n}=$(sbatch --parsable {run})" os.system(launch_jobs) - else: - - try: - - print("***** Linking the paths to LST data files...") - - print("***** Generating the bashscript...") - linking_bash_lst( - target_dir, - LST_runs, - source_name, - LST_version, - env_name, - NSB_match, - ) # linking the data paths to current working directory - - print("***** Submitting processess to the cluster...") - print(f"Process name: {source_name}_coincidence") - print( - f"To check the jobs submitted to the cluster, type: squeue -n {source_name}_coincidence" - ) - - # Below we run the bash scripts to find the coincident events - list_of_coincidence_scripts = np.sort( - glob.glob(f"{source_name}_LST_coincident*.sh") - ) - if len(list_of_coincidence_scripts) < 1: - continue - for n, run in enumerate(list_of_coincidence_scripts): - if n == 0: - launch_jobs = f"coincidence{n}=$(sbatch --parsable {run})" - else: - launch_jobs = f"{launch_jobs} && coincidence{n}=$(sbatch --parsable {run})" - - os.system(launch_jobs) - except OSError as exc: - print(exc) + except OSError as exc: + print(exc) if __name__ == "__main__": From d9d1a23a61e03e1db4352f67c9f2e0c4dae8ef90 Mon Sep 17 00:00:00 2001 From: Julian Sitarek Date: Mon, 13 May 2024 17:10:10 +0000 Subject: [PATCH 080/236] clean up of the code for running over MCs introduced version numbers and logs dir also for MC dirs introduced a loop over MC types to avoid code repetition --- .../setting_up_config_and_dir.py | 104 +++++++----------- 1 file changed, 41 insertions(+), 63 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py index 4c583d85..be8a280d 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py @@ -140,18 +140,19 @@ def lists_and_bash_generator( if MC_path == "": return - + print(f"running {particle_type} from {MC_path}") process_name = source_name list_of_nodes = glob.glob(f"{MC_path}/node*") + dir1 = f"{target_dir}/v{__version__}/{source_name}" with open( - f"{target_dir}/{source_name}/list_nodes_{particle_type}_complete.txt", "w" + f"{dir1}/logs/list_nodes_{particle_type}_complete.txt", "w" ) as f: # creating list_nodes_gammas_complete.txt for i in list_of_nodes: - f.write(f"{i}/output_{SimTel_version}\n") + f.write(f"{i}/output{SimTel_version}\n") with open( - f"{target_dir}/{source_name}/list_folder_{particle_type}.txt", "w" + f"{dir1}/logs/list_folder_{particle_type}.txt", "w" ) as f: # creating list_folder_gammas.txt for i in list_of_nodes: f.write(f'{i.split("/")[-1]}\n') @@ -164,22 +165,23 @@ def lists_and_bash_generator( slurm = slurm_lines( p="short", J=process_name, - out_err=f"{target_dir}/{source_name}/DL1/MC/{particle_type}/slurm-linkMC-%x.%j", + out_err=f"{dir1}/DL1/MC/{particle_type}/logs/slurm-linkMC-%x.%j", ) lines_of_config_file = slurm + [ "while read -r -u 3 lineA && read -r -u 4 lineB\n", "do\n", - f" cd {target_dir}/{source_name}/DL1/MC/{particle_type}\n", + f" cd {dir1}/DL1/MC/{particle_type}\n", " mkdir $lineB\n", " cd $lineA\n", " ls -lR *.gz |wc -l\n", - f" ls *.gz > {target_dir}/{source_name}/DL1/MC/{particle_type}/$lineB/list_dl0.txt\n", + f" mkdir -p {dir1}/DL1/MC/{particle_type}/$lineB/logs/\n", + f" ls *.gz > {dir1}/DL1/MC/{particle_type}/$lineB/logs/list_dl0.txt\n", ' string=$lineA"/"\n', - f" export file={target_dir}/{source_name}/DL1/MC/{particle_type}/$lineB/list_dl0.txt\n\n", + f" export file={dir1}/DL1/MC/{particle_type}/$lineB/logs/list_dl0.txt\n\n", " cat $file | while read line; do echo $string${line}" - + f" >>{target_dir}/{source_name}/DL1/MC/{particle_type}/$lineB/list_dl0_ok.txt; done\n\n", + + f" >>{dir1}/DL1/MC/{particle_type}/$lineB/logs/list_dl0_ok.txt; done\n\n", ' echo "folder $lineB and node $lineA"\n', - f'done 3<"{target_dir}/{source_name}/list_nodes_{particle_type}_complete.txt" 4<"{target_dir}/{source_name}/list_folder_{particle_type}.txt"\n', + f'done 3<"{dir1}/logs/list_nodes_{particle_type}_complete.txt" 4<"{dir1}/logs/list_folder_{particle_type}.txt"\n', "", ] f.writelines(lines_of_config_file) @@ -197,20 +199,20 @@ def lists_and_bash_generator( J=process_name, array=number_of_nodes, mem="10g", - out_err=f"{target_dir}/{source_name}/DL1/MC/{particle_type}/slurm-%x.%A_%a", + out_err=f"{dir1}/DL1/MC/{particle_type}/logs/slurm-%x.%A_%a", ) lines_of_config_file = slurm + [ - f"cd {target_dir}/{source_name}/DL1/MC/{particle_type}\n\n", - f"export INF={target_dir}/{source_name}\n", + f"cd {dir1}/DL1/MC/{particle_type}\n\n", + f"export INF={dir1}/logs\n", f"SAMPLE_LIST=($(<$INF/list_folder_{particle_type}.txt))\n", "SAMPLE=${SAMPLE_LIST[${SLURM_ARRAY_TASK_ID}]}\n", "cd $SAMPLE\n\n", - f"export LOG={target_dir}/{source_name}/DL1/MC/{particle_type}" + f"export LOG={dir1}/DL1/MC/{particle_type}/logs" + "/simtel_{$SAMPLE}_${SLURM_ARRAY_JOB_ID}_${SLURM_ARRAY_TASK_ID}_all.log\n", "cat list_dl0_ok.txt | while read line\n", "do\n", - f" cd {target_dir}/{source_name}/../\n", - f" conda run -n {env_name} lst1_magic_mc_dl0_to_dl1 --input-file $line --output-dir {target_dir}/{source_name}/DL1/MC/{particle_type}/$SAMPLE --config-file {target_dir}/{source_name}/config_DL0_to_DL1.yaml --focal_length_choice {focal_length}>>$LOG 2>&1\n\n", + f" cd {dir1}/../\n", + f" conda run -n {env_name} lst1_magic_mc_dl0_to_dl1 --input-file $line --output-dir {dir1}/DL1/MC/{particle_type}/$SAMPLE --config-file {dir1}/config_DL0_to_DL1.yaml --focal_length_choice {focal_length}>>$LOG 2>&1\n\n", "done\n", "", ] @@ -339,16 +341,20 @@ def directories_generator( "MC/helium", ] if not os.path.exists(f"{target_dir}/v{__version__}/{source_name}"): + os.makedirs(f"{target_dir}/v{__version__}/{source_name}/logs") for dir in dir_list: - os.makedirs(f"{target_dir}/v{__version__}/{source_name}/DL1/{dir}") + os.makedirs(f"{target_dir}/v{__version__}/{source_name}/DL1/{dir}/logs") else: overwrite = input( f'MC&data directory for {target_dir.split("/")[-1]} already exists. Would you like to overwrite it? [only "y" or "n"]: ' ) if overwrite == "y": os.system(f"rm -r {target_dir}/v{__version__}/{source_name}") + os.makedirs(f"{target_dir}/v{__version__}/{source_name}/logs") for dir in dir_list: - os.makedirs(f"{target_dir}/v{__version__}/{source_name}/DL1/{dir}") + os.makedirs( + f"{target_dir}/v{__version__}/{source_name}/DL1/{dir}/logs" + ) else: print("Directory not modified.") @@ -454,51 +460,23 @@ def main(): if (args.analysis_type == "onlyMC") or ( args.analysis_type == "doEverything" ): - lists_and_bash_generator( - "gammas", - target_dir, - MC_gammas, - SimTel_version, - focal_length, - env_name, - source_name, - ) # gammas - lists_and_bash_generator( - "electrons", - target_dir, - MC_electrons, - SimTel_version, - focal_length, - env_name, - source_name, - ) # electrons - lists_and_bash_generator( - "helium", - target_dir, - MC_helium, - SimTel_version, - focal_length, - env_name, - source_name, - ) # helium - lists_and_bash_generator( - "protons", - target_dir, - MC_protons, - SimTel_version, - focal_length, - env_name, - source_name, - ) # protons - lists_and_bash_generator( - "gammadiffuse", - target_dir, - MC_gammadiff, - SimTel_version, - focal_length, - env_name, - source_name, - ) # gammadiffuse + to_process = { + "gammas": MC_gammas, + "electrons": MC_electrons, + "helium": MC_helium, + "protons": MC_protons, + "gammadiffuse": MC_gammadiff, + } + for particle in to_process.keys(): + lists_and_bash_generator( + particle, + target_dir, + to_process[particle], + SimTel_version, + focal_length, + env_name, + source_name, + ) # Here we do the MC DL0 to DL1 conversion: list_of_MC = glob.glob("linking_MC_*s.sh") From c69cb3c30bb3050ca804909feaab0cf9acd4cc81 Mon Sep 17 00:00:00 2001 From: Elisa-Visentin Date: Tue, 14 May 2024 07:12:09 +0000 Subject: [PATCH 081/236] stereo events --- .../coincident_events.py | 35 +-- .../semi_automatic_scripts/stereo_events.py | 206 +++++++----------- 2 files changed, 93 insertions(+), 148 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py index 51c593b1..571826a0 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py @@ -57,13 +57,11 @@ def configfile_coincidence(ids, target_dir, source_name, NSB_match): config_file, "rb" ) as fc: # "rb" mode opens the file in binary format for reading config_dict = yaml.safe_load(fc) - coincidence=config_dict['event_coincidence'] + coincidence = config_dict["event_coincidence"] - conf = {} conf["event_coincidence"] = coincidence - if not NSB_match: file_name = f"{target_dir}/{source_name}/config_coincidence.yaml" else: @@ -83,9 +81,6 @@ def configfile_coincidence(ids, target_dir, source_name, NSB_match): yaml.dump(conf, f, default_flow_style=False) - - - def linking_bash_lst( target_dir, LST_runs, source_name, LST_version, env_name, NSB_match ): @@ -115,7 +110,6 @@ def linking_bash_lst( LST_runs = [] LST_runs.append(LST) - if NSB_match: coincidence_DL1_dir = f"{target_dir}/v{__version__}/{source_name}" @@ -124,9 +118,7 @@ def linking_bash_lst( coincidence_DL1_dir = f"{target_dir}/{source_name}/DL1/Observations" MAGIC_DL1_dir = f"{target_dir}/{source_name}/DL1/Observations/" - dates = [ - os.path.basename(x) for x in glob.glob(f"{MAGIC_DL1_dir}/Merged/Merged_*") - ] + dates = [os.path.basename(x) for x in glob.glob(f"{MAGIC_DL1_dir}/Merged/Merged_*")] for d in dates: Y_M, M_M, D_M = [int(x) for x in d.split("_")[1:]] @@ -136,11 +128,11 @@ def linking_bash_lst( delta = timedelta(days=1) for i in LST_runs: Y_L, M_L, D_L = [int(x) for x in i[0].split("_")] - + day_LST = dtdt(int(Y_L), int(M_L), int(D_L)) if day_MAGIC == day_LST + delta: - lstObsDir = i[0].replace('_', '') + lstObsDir = i[0].replace("_", "") inputdir = ( f"/fefs/aswg/data/real/DL1/{lstObsDir}/{LST_version}/tailcut84" ) @@ -148,16 +140,11 @@ def linking_bash_lst( os.makedirs(f"{coincidence_DL1_dir}/DL1Coincident/{lstObsDir}/logs") outputdir = f"{coincidence_DL1_dir}/DL1Coincident/{lstObsDir}" - list_of_subruns = np.sort( - glob.glob(f"{inputdir}/dl1*Run*{i[1]}*.*.h5") - ) - + list_of_subruns = np.sort(glob.glob(f"{inputdir}/dl1*Run*{i[1]}*.*.h5")) + with open(f"{outputdir}/logs/list_LST", "a+") as LSTdataPathFile: for subrun in list_of_subruns: - LSTdataPathFile.write( - f"{subrun}\n" - ) - + LSTdataPathFile.write(f"{subrun}\n") if not os.path.exists(f"{outputdir}/logs/list_LST.txt"): continue @@ -186,7 +173,6 @@ def linking_bash_lst( "w", ) as f: f.writelines(lines) - def main(): @@ -234,9 +220,6 @@ def main(): LST_runs_and_dates = f"{source_name}_LST_runs.txt" LST_runs = np.genfromtxt(LST_runs_and_dates, dtype=str, delimiter=",") - - - try: print("***** Linking the paths to LST data files...") @@ -265,7 +248,9 @@ def main(): continue launch_jobs = "" for n, run in enumerate(list_of_coincidence_scripts): - launch_jobs += (" && " if n>0 else "") +f"coincidence{n}=$(sbatch --parsable {run})" + launch_jobs += ( + " && " if n > 0 else "" + ) + f"coincidence{n}=$(sbatch --parsable {run})" os.system(launch_jobs) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py index 0a6458a3..519aee3f 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py @@ -27,6 +27,7 @@ import yaml from magicctapipe import __version__ +from magicctapipe.io import resource_file __all__ = ["configfile_stereo", "bash_stereo", "bash_stereoMC"] @@ -51,17 +52,34 @@ def configfile_stereo(ids, target_dir, source_name, NSB_match): NSB_match : bool If real data are matched to pre-processed MCs or not """ + + config_file = resource_file("config.yaml") + with open( + config_file, "rb" + ) as fc: # "rb" mode opens the file in binary format for reading + config_dict = yaml.safe_load(fc) + coincidence = config_dict["stereo_reco"] + + conf = {} + conf["stereo_reco"] = coincidence + if not NSB_match: file_name = f"{target_dir}/{source_name}/config_stereo.yaml" else: file_name = f"{target_dir}/v{__version__}/{source_name}/config_stereo.yaml" with open(file_name, "w") as f: lines = [ - f"mc_tel_ids:\n LST-1: {ids[0]}\n LST-2: {ids[1]}\n LST-3: {ids[2]}\n LST-4: {ids[3]}\n MAGIC-I: {ids[4]}\n MAGIC-II: {ids[5]}\n\n", - 'stereo_reco:\n quality_cuts: "(intensity > 50) & (width > 0)"\n theta_uplim: "6 arcmin"\n', + "mc_tel_ids:", + f"\n LST-1: {ids[0]}", + f"\n LST-2: {ids[1]}", + f"\n LST-3: {ids[2]}", + f"\n LST-4: {ids[3]}", + f"\n MAGIC-I: {ids[4]}", + f"\n MAGIC-II: {ids[5]}", + "\n", ] - f.writelines(lines) + yaml.dump(conf, f, default_flow_style=False) def bash_stereo(target_dir, source, env_name, NSB_match): @@ -82,82 +100,53 @@ def bash_stereo(target_dir, source, env_name, NSB_match): """ process_name = source - if not NSB_match: - listOfNightsLST = np.sort( - glob.glob(f"{target_dir}/{source}/DL1/Observations/Coincident/*") - ) + if NSB_match: + coincidence_DL1_dir = f"{target_dir}/v{__version__}/{source}" - for nightLST in listOfNightsLST: - stereoDir = f"{target_dir}/{source}/DL1/Observations/Coincident_stereo/{nightLST.split('/')[-1]}" - os.makedirs(stereoDir) - - os.system( - f"ls {nightLST}/*LST*.h5 > {nightLST}/list_coin.txt" - ) # generating a list with the DL1 coincident data files. - with open(f"{nightLST}/list_coin.txt", "r") as f: - process_size = len(f.readlines()) - 1 - - with open(f"StereoEvents_real_{nightLST.split('/')[-1]}.sh", "w") as f: - lines = [ - "#!/bin/sh\n\n", - "#SBATCH -p short\n", - f"#SBATCH -J {process_name}_stereo\n", - f"#SBATCH --array=0-{process_size}%100\n", - "#SBATCH -n 1\n\n", - f"#SBATCH --output={stereoDir}/logs/slurm-%x.%A_%a.out" - f"#SBATCH --error={stereoDir}/logs/slurm-%x.%A_%a.err" - "ulimit -l unlimited\n", - "ulimit -s unlimited\n", - "ulimit -a\n\n", - f"export INPUTDIR={nightLST}\n", - f"export OUTPUTDIR={stereoDir}\n", - "SAMPLE_LIST=($(<$INPUTDIR/list_coin.txt))\n", - "SAMPLE=${SAMPLE_LIST[${SLURM_ARRAY_TASK_ID}]}\n", - "export LOG=$OUTPUTDIR/stereo_${SLURM_ARRAY_JOB_ID}_${SLURM_ARRAY_TASK_ID}.log\n", - f"conda run -n {env_name} lst1_magic_stereo_reco --input-file $SAMPLE --output-dir $OUTPUTDIR --config-file {target_dir}/{source}/config_stereo.yaml >$LOG 2>&1", - ] - f.writelines(lines) else: + coincidence_DL1_dir = f"{target_dir}/{source}/DL1/Observations" - listOfNightsLST = np.sort( - glob.glob(f"{target_dir}/v{__version__}/{source}/DL1Coincident/*") + listOfNightsLST = np.sort(glob.glob(f"{coincidence_DL1_dir}/DL1Coincident/*")) + + for nightLST in listOfNightsLST: + stereoDir = ( + f"{coincidence_DL1_dir}/DL1CoincidentStereo/{nightLST.split('/')[-1]}" ) - for nightLST in listOfNightsLST: - stereoDir = f'{target_dir}/v{__version__}/{source}/DL1CoincidentStereo/{nightLST.split("/")[-1]}' - os.makedirs(f"{stereoDir}/logs") - if not os.listdir(f"{nightLST}"): - continue - if len(os.listdir(nightLST)) < 2: - continue - os.system( - f"ls {nightLST}/*LST*.h5 > {stereoDir}/logs/list_coin.txt" - ) # generating a list with the DL1 coincident data files. - with open(f"{stereoDir}/logs/list_coin.txt", "r") as f: - process_size = len(f.readlines()) - 1 - - if process_size < 0: - continue - lines = [ - "#!/bin/sh\n\n", - "#SBATCH -p short\n", - f"#SBATCH -J {process_name}_stereo\n", - f"#SBATCH --array=0-{process_size}\n", - "#SBATCH -n 1\n\n", - f"#SBATCH --output={stereoDir}/logs/slurm-%x.%A_%a.out" - f"#SBATCH --error={stereoDir}/logs/slurm-%x.%A_%a.err" - "ulimit -l unlimited\n", - "ulimit -s unlimited\n", - "ulimit -a\n\n", - f"export INPUTDIR={nightLST}\n", - f"export OUTPUTDIR={stereoDir}\n", - "SAMPLE_LIST=($(<$OUTPUTDIR/logs/list_coin.txt))\n", - "SAMPLE=${SAMPLE_LIST[${SLURM_ARRAY_TASK_ID}]}\n", - "export LOG=$OUTPUTDIR/logs/stereo_${SLURM_ARRAY_JOB_ID}_${SLURM_ARRAY_TASK_ID}.log\n", - f"conda run -n {env_name} lst1_magic_stereo_reco --input-file $SAMPLE --output-dir $OUTPUTDIR --config-file {target_dir}/v{__version__}/{source}/config_stereo.yaml >$LOG 2>&1", - ] - with open(f"{source}_StereoEvents_{nightLST.split('/')[-1]}.sh", "w") as f: - f.writelines(lines) + os.makedirs(f"{stereoDir}/logs") + if not os.listdir(f"{nightLST}"): + continue + if len(os.listdir(nightLST)) < 2: + continue + + os.system( + f"ls {nightLST}/*LST*.h5 > {stereoDir}/logs/list_coin.txt" + ) # generating a list with the DL1 coincident data files. + with open(f"{stereoDir}/logs/list_coin.txt", "r") as f: + process_size = len(f.readlines()) - 1 + + if process_size < 0: + continue + lines = [ + "#!/bin/sh\n\n", + "#SBATCH -p short\n", + f"#SBATCH -J {process_name}_stereo\n", + f"#SBATCH --array=0-{process_size}\n", + "#SBATCH -n 1\n\n", + f"#SBATCH --output={stereoDir}/logs/slurm-%x.%A_%a.out" + f"#SBATCH --error={stereoDir}/logs/slurm-%x.%A_%a.err" + "ulimit -l unlimited\n", + "ulimit -s unlimited\n", + "ulimit -a\n\n", + f"export INPUTDIR={nightLST}\n", + f"export OUTPUTDIR={stereoDir}\n", + "SAMPLE_LIST=($(<$OUTPUTDIR/logs/list_coin.txt))\n", + "SAMPLE=${SAMPLE_LIST[${SLURM_ARRAY_TASK_ID}]}\n", + "export LOG=$OUTPUTDIR/logs/stereo_${SLURM_ARRAY_JOB_ID}_${SLURM_ARRAY_TASK_ID}.log\n", + f"conda run -n {env_name} lst1_magic_stereo_reco --input-file $SAMPLE --output-dir $OUTPUTDIR --config-file {target_dir}/v{__version__}/{source}/config_stereo.yaml >$LOG 2>&1", + ] + with open(f"{source}_StereoEvents_{nightLST.split('/')[-1]}.sh", "w") as f: + f.writelines(lines) def bash_stereoMC(target_dir, identification, env_name, source): @@ -286,56 +275,27 @@ def main(): os.system(launch_jobs) # Below we run the analysis on the real data - if not NSB_match: - - if ( - (args.analysis_type == "onlyReal") - or (args.analysis_type == "doEverything") - or NSB_match - ): - print("***** Generating the bashscript for real data...") - bash_stereo(target_dir, source_name, env_name, NSB_match) - - list_of_stereo_scripts = np.sort(glob.glob("StereoEvents_real_*.sh")) - print("***** Submitting processes to the cluster...") - print(f"Process name: {source_name}_stereo") - print( - f"To check the jobs submitted to the cluster, type: squeue -n {source_name}_stereo" - ) - for n, run in enumerate(list_of_stereo_scripts): - if n == 0: - launch_jobs = f"stereo{n}=$(sbatch --parsable {run})" - else: - launch_jobs = f"{launch_jobs} && stereo{n}=$(sbatch --parsable --dependency=afterany:$stereo{n-1} {run})" - - os.system(launch_jobs) - - else: - - print("***** Generating the bashscript...") - bash_stereo(target_dir, source_name, env_name, NSB_match) - - print("***** Submitting processess to the cluster...") - print(f"Process name: {source_name}_stereo") - print( - f"To check the jobs submitted to the cluster, type: squeue -n {source_name}_stereo" - ) - - # Below we run the bash scripts to find the stereo events - list_of_stereo_scripts = np.sort( - glob.glob(f"{source_name}_StereoEvents*.sh") - ) - if len(list_of_stereo_scripts) < 1: - continue - for n, run in enumerate(list_of_stereo_scripts): - if n == 0: - launch_jobs = f"stereo{n}=$(sbatch --parsable {run})" - else: - launch_jobs = ( - f"{launch_jobs} && stereo{n}=$(sbatch --parsable {run})" - ) - os.system(launch_jobs) + print("***** Generating the bashscript...") + bash_stereo(target_dir, source_name, env_name, NSB_match) + + print("***** Submitting processess to the cluster...") + print(f"Process name: {source_name}_stereo") + print( + f"To check the jobs submitted to the cluster, type: squeue -n {source_name}_stereo" + ) + + # Below we run the bash scripts to find the stereo events + list_of_stereo_scripts = np.sort(glob.glob(f"{source_name}_StereoEvents*.sh")) + if len(list_of_stereo_scripts) < 1: + continue + for n, run in enumerate(list_of_stereo_scripts): + if n == 0: + launch_jobs = f"stereo{n}=$(sbatch --parsable {run})" + else: + launch_jobs = f"{launch_jobs} && stereo{n}=$(sbatch --parsable {run})" + + os.system(launch_jobs) if __name__ == "__main__": From 5cfb27324ea9cfc422a3507f09e930a98e47c7ed Mon Sep 17 00:00:00 2001 From: Julian Sitarek Date: Tue, 14 May 2024 12:40:29 +0000 Subject: [PATCH 082/236] fixed paths for MC generation --- .../setting_up_config_and_dir.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py index be8a280d..113a9d7c 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py @@ -192,7 +192,6 @@ def lists_and_bash_generator( number_of_nodes = glob.glob(f"{MC_path}/node*") number_of_nodes = len(number_of_nodes) - 1 - with open(f"linking_MC_{particle_type}_paths_r.sh", "w") as f: slurm = slurm_lines( p="xxl", @@ -209,7 +208,7 @@ def lists_and_bash_generator( "cd $SAMPLE\n\n", f"export LOG={dir1}/DL1/MC/{particle_type}/logs" + "/simtel_{$SAMPLE}_${SLURM_ARRAY_JOB_ID}_${SLURM_ARRAY_TASK_ID}_all.log\n", - "cat list_dl0_ok.txt | while read line\n", + "cat logs/list_dl0_ok.txt | while read line\n", "do\n", f" cd {dir1}/../\n", f" conda run -n {env_name} lst1_magic_mc_dl0_to_dl1 --input-file $line --output-dir {dir1}/DL1/MC/{particle_type}/$SAMPLE --config-file {dir1}/config_DL0_to_DL1.yaml --focal_length_choice {focal_length}>>$LOG 2>&1\n\n", @@ -410,11 +409,11 @@ def main(): NSB_match = config["general"]["NSB_matching"] # LST_runs_and_dates = config["general"]["LST_runs"] - MC_gammas = str(Path(config["directories"]["MC_gammas"])) - MC_electrons = str(Path(config["directories"]["MC_electrons"])) - MC_helium = str(Path(config["directories"]["MC_helium"])) - MC_protons = str(Path(config["directories"]["MC_protons"])) - MC_gammadiff = str(Path(config["directories"]["MC_gammadiff"])) + MC_gammas = config["directories"]["MC_gammas"] + MC_electrons = config["directories"]["MC_electrons"] + MC_helium = config["directories"]["MC_helium"] + MC_protons = config["directories"]["MC_protons"] + MC_gammadiff = config["directories"]["MC_gammadiff"] focal_length = config["general"]["focal_length"] source = config["data_selection"]["source_name_output"] From 979a19d0aff6056294352d8f0874af5f879dfbc5 Mon Sep 17 00:00:00 2001 From: Julian Sitarek Date: Tue, 14 May 2024 15:56:22 +0000 Subject: [PATCH 083/236] added a check if the list of directories is empty with the list of available versions and data levels --- .../lst1_magic/semi_automatic_scripts/job_accounting.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/job_accounting.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/job_accounting.py index 4a435f10..6c1d0944 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/job_accounting.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/job_accounting.py @@ -79,10 +79,19 @@ def main(): # timerange = config["data_selection"]["time_range"] # skip_LST = config["data_selection"]["skip_LST_runs"] # skip_MAGIC = config["data_selection"]["skip_MAGIC_runs"] + NSB_matching = config["general"]["NSB_matching"] work_dir = config["directories"]["workspace_dir"] print(f"Checking progress of jobs stored in {work_dir}") dirs = sorted(glob.glob(f"{work_dir}/v{args.version}/*/{args.data_level}/*/*")) + if dirs == []: + versions = [x.split("/v")[-1] for x in glob.glob(f"{work_dir}/v*")] + print("Error, no directories found") + print(f"for path {work_dir} found in {args.config_file} this is available") + print(f"Versions {versions}") + tag = "" if NSB_matching else "/Observations" + print(f"Supported data types: DL1{tag}/M1, DL1{tag}/M2") + exit(1) all_todo = 0 all_return = 0 From abcb9052df330ade2223614b677aaedea1e5a2b2 Mon Sep 17 00:00:00 2001 From: Julian Sitarek Date: Tue, 14 May 2024 16:14:31 +0000 Subject: [PATCH 084/236] added an option to skip CPU and memory accounting it makes the script much faster if one is only interested in counting how many jobs are done/without errors also simplified .split() command --- .../semi_automatic_scripts/job_accounting.py | 31 ++++++++++++------- 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/job_accounting.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/job_accounting.py index 6c1d0944..f9c54d9f 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/job_accounting.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/job_accounting.py @@ -69,6 +69,12 @@ def main(): help="MCP version (used for subdirectory name)", ) + parser.add_argument( + "--no-accounting", + action="store_true", + help="No CPU/Memory usage check (faster)", + ) + args = parser.parse_args() with open(args.config_file, "r") as f: config = yaml.safe_load(f) @@ -118,20 +124,21 @@ def main(): returns = fp.readlines() this_return = len(returns) for line in returns: - file_in, slurm_id, task_id, rc = line[0:-1].split(" ") + file_in, slurm_id, task_id, rc = line.split() if rc == "0": this_good += 1 # now check accounting - out = run_shell( - f'sacct --format="JobID,CPUTime,MaxRSS" --units=M -j {slurm_id}_{task_id}| tail -1' - ) - _, cpu, mem = out.split() - hh, mm, ss = (int(x) for x in str(cpu).split(":")) - delta = timedelta( - days=hh // 24, hours=hh % 24, minutes=mm, seconds=ss - ) - this_cpu.append(delta) - this_mem.append(float(mem[0:-1])) + if not args.no_accounting: + out = run_shell( + f'sacct --format="JobID,CPUTime,MaxRSS" --units=M -j {slurm_id}_{task_id}| tail -1' + ) + _, cpu, mem = out.split() + hh, mm, ss = (int(x) for x in str(cpu).split(":")) + delta = timedelta( + days=hh // 24, hours=hh % 24, minutes=mm, seconds=ss + ) + this_cpu.append(delta) + this_mem.append(float(mem[0:-1])) else: print(f"file {file_in} failed with error {rc}") if len(this_cpu) > 0: @@ -162,7 +169,7 @@ def main(): f"{status}to do: {this_todo}, finished: {this_return}, no errors: {this_good}{ENDC}" ) - print("SUMMARY") + print("\nSUMMARY") if all_good < all_return: status = RED # there are errors in processing elif all_return < all_todo: From 5ba5003866732afc87bf771da93efe8c7efbe611 Mon Sep 17 00:00:00 2001 From: Julian Sitarek Date: Tue, 14 May 2024 16:39:27 +0000 Subject: [PATCH 085/236] renamed parameters of slurm_lines --- .../semi_automatic_scripts/clusters.py | 16 ++++++------- .../coincident_events.py | 6 ++--- .../semi_automatic_scripts/merging_runs.py | 18 +++++++------- .../setting_up_config_and_dir.py | 24 +++++++++---------- 4 files changed, 32 insertions(+), 32 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/clusters.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/clusters.py index 0eb08483..25c1e1f6 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/clusters.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/clusters.py @@ -3,21 +3,21 @@ """ -def slurm_lines(p, J, array=None, mem=None, out_err=None): +def slurm_lines(queue, job_name, array=None, mem=None, out_name=None): """ Function for creating the general lines that slurm scripts are starting with. Parameters ---------- - p : str + queue : str Name of the queue - J : str + job_name : str Job name array : None or int If not none array of jobs from 0 to array will be made mem : None or str Requested memory - out_err : None or str + out_name : None or str If the output should be written to a specific output file Returns @@ -27,13 +27,13 @@ def slurm_lines(p, J, array=None, mem=None, out_err=None): """ lines = [ "#!/bin/sh\n\n", - f"#SBATCH -p {p}\n", - f"#SBATCH -J {J}\n", + f"#SBATCH -p {queue}\n", + f"#SBATCH -J {job_name}\n", f"#SBATCH --array=0-{array}\n" if array is not None else "", f"#SBATCH --mem {mem}\n" if mem is not None else "", "#SBATCH -n 1\n\n", - f"#SBATCH --output={out_err}.out\n" if out_err is not None else "", - f"#SBATCH --error={out_err}.err\n\n" if out_err is not None else "", + f"#SBATCH --output={out_name}.out\n" if out_name is not None else "", + f"#SBATCH --error={out_name}.err\n\n" if out_name is not None else "", "ulimit -l unlimited\n", "ulimit -s unlimited\n", "ulimit -a\n\n", diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py index 571826a0..27c48da4 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py @@ -154,11 +154,11 @@ def linking_bash_lst( if process_size < 0: continue slurm = slurm_lines( - p="short", - J=f"{source_name}_coincidence", + queue="short", + job_name=f"{source_name}_coincidence", array=process_size, mem="8g", - out_err=f"{outputdir}/logs/slurm-%x.%A_%a", + out_name=f"{outputdir}/logs/slurm-%x.%A_%a", ) lines = slurm + [ f"export INM={MAGIC_DL1_dir}/Merged/Merged_{d}\n", diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py index 971faba3..604b2e5b 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py @@ -147,9 +147,9 @@ def merge(target_dir, identification, MAGIC_runs, env_name, source, NSB_match): with open(f"{source}_Merge_MAGIC_{identification}.sh", "w") as f: lines = slurm_lines( - p="short", - J=process_name, - out_err=f"{MAGIC_DL1_dir}/Merged/slurm-%x.%j", + queue="short", + job_name=process_name, + out_name=f"{MAGIC_DL1_dir}/Merged/slurm-%x.%j", ) f.writelines(lines) @@ -205,10 +205,10 @@ def merge(target_dir, identification, MAGIC_runs, env_name, source, NSB_match): MAGIC_DL1_dir = f"{target_dir}/v{__version__}/{source}/DL1/" lines = slurm_lines( - p="short", - J=process_name, + queue="short", + job_name=process_name, mem="2g", - out_err=f"{MAGIC_DL1_dir}/Merged/slurm-%x.%j", + out_name=f"{MAGIC_DL1_dir}/Merged/slurm-%x.%j", ) with open(f"{source}_Merge_MAGIC_{identification}.sh", "w") as f: f.writelines(lines) @@ -342,11 +342,11 @@ def mergeMC(target_dir, identification, env_name, cwd, source_name): with open(f"Merge_MC_{identification}.sh", "w") as f: slurm = slurm_lines( - p="short", + queue="short", array=process_size, mem="7g", - J=process_name, - out_err=f"{MC_DL1_dir}/{identification}/Merged/slurm-%x.%A_%a", + job_name=process_name, + out_name=f"{MC_DL1_dir}/{identification}/Merged/slurm-%x.%A_%a", ) lines_bash_file = slurm + [ f"SAMPLE_LIST=($(<{MC_DL1_dir}/{identification}/list_of_nodes.txt))\n", diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py index 113a9d7c..4d6110b7 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py @@ -163,9 +163,9 @@ def lists_and_bash_generator( with open(f"linking_MC_{particle_type}_paths.sh", "w") as f: slurm = slurm_lines( - p="short", - J=process_name, - out_err=f"{dir1}/DL1/MC/{particle_type}/logs/slurm-linkMC-%x.%j", + queue="short", + job_name=process_name, + out_name=f"{dir1}/DL1/MC/{particle_type}/logs/slurm-linkMC-%x.%j", ) lines_of_config_file = slurm + [ "while read -r -u 3 lineA && read -r -u 4 lineB\n", @@ -194,11 +194,11 @@ def lists_and_bash_generator( number_of_nodes = len(number_of_nodes) - 1 with open(f"linking_MC_{particle_type}_paths_r.sh", "w") as f: slurm = slurm_lines( - p="xxl", - J=process_name, + queue="xxl", + job_name=process_name, array=number_of_nodes, mem="10g", - out_err=f"{dir1}/DL1/MC/{particle_type}/logs/slurm-%x.%A_%a", + out_name=f"{dir1}/DL1/MC/{particle_type}/logs/slurm-%x.%A_%a", ) lines_of_config_file = slurm + [ f"cd {dir1}/DL1/MC/{particle_type}\n\n", @@ -242,9 +242,9 @@ def lists_and_bash_gen_MAGIC( """ process_name = source lines = slurm_lines( - p="short", - J=process_name, - out_err=f"{target_dir}/v{__version__}/{source}/DL1/slurm-linkMAGIC-%x.%j", + queue="short", + job_name=process_name, + out_name=f"{target_dir}/v{__version__}/{source}/DL1/slurm-linkMAGIC-%x.%j", ) obs_tag = "" if NSB_match else "Observations" @@ -280,11 +280,11 @@ def lists_and_bash_gen_MAGIC( if number_of_nodes < 0: continue slurm = slurm_lines( - p="short", # was long for no NSB_match - J=process_name, + queue="short", # was long for no NSB_match + job_name=process_name, array=number_of_nodes, mem="2g", - out_err=f"{target_dir}/v{__version__}/{source}/DL1/{obs_tag}/M{magic}/{i[0]}/{i[1]}/logs/slurm-%x.%A_%a", # without version for no NSB_match + out_name=f"{target_dir}/v{__version__}/{source}/DL1/{obs_tag}/M{magic}/{i[0]}/{i[1]}/logs/slurm-%x.%A_%a", # without version for no NSB_match ) lines = slurm + [ # without version for no NSB_match f"export OUTPUTDIR={target_dir}/v{__version__}/{source}/DL1/{obs_tag}/M{magic}/{i[0]}/{i[1]}\n", From 97d62258764fde17797c7b8fe178f07ac8f29900 Mon Sep 17 00:00:00 2001 From: Julian Sitarek Date: Tue, 14 May 2024 16:48:21 +0000 Subject: [PATCH 086/236] changes in the docstring --- .../scripts/lst1_magic/semi_automatic_scripts/clusters.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/clusters.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/clusters.py index 25c1e1f6..8f66417b 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/clusters.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/clusters.py @@ -1,6 +1,7 @@ """ Module for generating bash script lines for running analysis in different clusters """ +__all__ = ["slurm_lines"] def slurm_lines(queue, job_name, array=None, mem=None, out_name=None): @@ -16,14 +17,14 @@ def slurm_lines(queue, job_name, array=None, mem=None, out_name=None): array : None or int If not none array of jobs from 0 to array will be made mem : None or str - Requested memory + Requested memory. If None cluster default (5 GB) will be used out_name : None or str If the output should be written to a specific output file Returns ------- list - List of strings + List of strings to submit a SLURM job. """ lines = [ "#!/bin/sh\n\n", From a21a155277173775d159784ece7ed958e2b4a756 Mon Sep 17 00:00:00 2001 From: Julian Sitarek Date: Tue, 14 May 2024 16:59:21 +0000 Subject: [PATCH 087/236] removed outdated note about gammapy 0.19 --- magicctapipe/scripts/lst1_magic/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/magicctapipe/scripts/lst1_magic/README.md b/magicctapipe/scripts/lst1_magic/README.md index 5d5fd200..527f4df1 100644 --- a/magicctapipe/scripts/lst1_magic/README.md +++ b/magicctapipe/scripts/lst1_magic/README.md @@ -208,7 +208,7 @@ Since the DL3 may have only a few MBs, it is typically convenient to download it We prepared a [Jupyter Notebook](https://github.com/ranieremenezes/magic-cta-pipe/blob/master/magicctapipe/scripts/lst1_magic/SED_and_LC_from_DL3.ipynb) that quickly creates a counts map, a significance curve, an SED, and a light curve. You can give it a try. -The folder [Notebooks](https://github.com/cta-observatory/magic-cta-pipe/tree/master/notebooks) contains Jupyter notebooks to perform checks on the IRF, to produce theta2 plots and SEDs. Note that the notebooks run with gammapy v0.20 or higher, while the gammapy version adopted in the MAGIC+LST-1 pipeline is v0.19. +The folder [Notebooks](https://github.com/cta-observatory/magic-cta-pipe/tree/master/notebooks) contains Jupyter notebooks to perform checks on the IRF, to produce theta2 plots and SEDs. ## For mainteiners (creation of MAGIC adn LST databases) From cee1cef3bc1cf3e013231ff9c0b39af71e5fb31e Mon Sep 17 00:00:00 2001 From: Julian Sitarek Date: Wed, 15 May 2024 11:46:34 +0000 Subject: [PATCH 088/236] refactoring of the code to avoid duplication of code for M1 and M2 --- .../semi_automatic_scripts/merging_runs.py | 149 +++++++----------- 1 file changed, 61 insertions(+), 88 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py index 604b2e5b..4ca77211 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py @@ -16,16 +16,16 @@ Workingdir/DL1/MC/PARTICLE/Merged Usage: -$ python merging_runs_and_splitting_training_samples.py (-c config.yaml) +$ merging_runs (-c config.yaml) If you want to merge only the MAGIC or only the MC data, you can do as follows: Only MAGIC: -$ python merging_runs_and_splitting_training_samples.py --analysis-type onlyMAGIC (-c config.yaml) +$ merging_runs --analysis-type onlyMAGIC (-c config.yaml) Only MC: -$ python merging_runs_and_splitting_training_samples.py --analysis-type onlyMC (-c config.yaml) +$ merging_runs --analysis-type onlyMC (-c config.yaml) """ import argparse @@ -142,48 +142,46 @@ def merge(target_dir, identification, MAGIC_runs, env_name, source, NSB_match): """ process_name = f"merging_{source}" + + MAGIC_DL1_dir = f"{target_dir}/v{__version__}/{source}/DL1/" if not NSB_match: - MAGIC_DL1_dir = f"{target_dir}/{source}/DL1/Observations" + MAGIC_DL1_dir += "Observations/" - with open(f"{source}_Merge_MAGIC_{identification}.sh", "w") as f: - lines = slurm_lines( - queue="short", - job_name=process_name, - out_name=f"{MAGIC_DL1_dir}/Merged/slurm-%x.%j", - ) - f.writelines(lines) + lines = slurm_lines( + queue="short", + job_name=process_name, + mem="2g", + out_name=f"{MAGIC_DL1_dir}/Merged/slurm-%x.%j", + ) + with open(f"{source}_Merge_MAGIC_{identification}.sh", "w") as f: + f.writelines(lines) + if not NSB_match: if identification == "0_subruns": if os.path.exists(f"{MAGIC_DL1_dir}/M1") & os.path.exists( f"{MAGIC_DL1_dir}/M2" ): for i in MAGIC_runs: - os.makedirs( - f"{MAGIC_DL1_dir}/Merged/{i[0]}/{i[1]}" + f"{MAGIC_DL1_dir}/Merged/{i[0]}/{i[1]}", exist_ok=True ) # Creating a merged directory for the respective run - os.system( - f'find {MAGIC_DL1_dir}/M1/{i[0]}/{i[1]} -type f -name "*.h5" -size -3k -delete' - ) - f.write( - f"conda run -n {env_name} merge_hdf_files --input-dir {MAGIC_DL1_dir}/M1/{i[0]}/{i[1]} --output-dir {MAGIC_DL1_dir}/Merged/{i[0]}/{i[1]} >{MAGIC_DL1_dir}/Merged/{i[0]}/{i[1]}/merge_M1_{i[0]}_{i[1]}_" - + "${SLURM_JOB_ID}.log\n" - ) - - os.system( - f'find {MAGIC_DL1_dir}/M2/{i[0]}/{i[1]} -type f -name "*.h5" -size -3k -delete' - ) - f.write( - f"conda run -n {env_name} merge_hdf_files --input-dir {MAGIC_DL1_dir}/M2/{i[0]}/{i[1]} --output-dir {MAGIC_DL1_dir}/Merged/{i[0]}/{i[1]} >{MAGIC_DL1_dir}/Merged/{i[0]}/{i[1]}/merge_M2_{i[0]}_{i[1]}_" - + "${SLURM_JOB_ID}.log\n" - ) + for magic in [1, 2]: + os.system( + f'find {MAGIC_DL1_dir}/M{magic}/{i[0]}/{i[1]} -type f -name "dl1_M{magic}.Run*.h5" -size -3k -delete' + ) + f.write( + f"conda run -n {env_name} merge_hdf_files --input-dir {MAGIC_DL1_dir}/M{magic}/{i[0]}/{i[1]} --output-dir {MAGIC_DL1_dir}/Merged/{i[0]}/{i[1]} >{MAGIC_DL1_dir}/Merged/{i[0]}/{i[1]}/merge_M{magic}_{i[0]}_{i[1]}_" + + "${SLURM_JOB_ID}.log\n" + ) elif identification == "1_M1M2": if os.path.exists(f"{MAGIC_DL1_dir}/M1") & os.path.exists( f"{MAGIC_DL1_dir}/M2" ): for i in MAGIC_runs: - os.makedirs(f"{MAGIC_DL1_dir}/Merged/{i[0]}/Merged") + os.makedirs( + f"{MAGIC_DL1_dir}/Merged/{i[0]}/Merged", exist_ok=True + ) f.write( f"conda run -n {env_name} merge_hdf_files --input-dir {MAGIC_DL1_dir}/Merged/{i[0]}/{i[1]} --output-dir {MAGIC_DL1_dir}/Merged/{i[0]}/Merged --run-wise >{MAGIC_DL1_dir}/Merged/{i[0]}/Merged/merge_{i[0]}_{[1]}_" + "${SLURM_JOB_ID}.log\n" @@ -191,75 +189,43 @@ def merge(target_dir, identification, MAGIC_runs, env_name, source, NSB_match): else: for i in MAGIC_runs: os.makedirs( - f"{MAGIC_DL1_dir}/Merged/Merged_{i[0]}" + f"{MAGIC_DL1_dir}/Merged/Merged_{i[0]}", exist_ok=True ) # Creating a merged directory for each night f.write( f"conda run -n {env_name} merge_hdf_files --input-dir {MAGIC_DL1_dir}/Merged/{i[0]}/Merged --output-dir {MAGIC_DL1_dir}/Merged/Merged_{i[0]} >{MAGIC_DL1_dir}/Merged/Merged_{i[0]}/merge_night_{i[0]}_" + "${SLURM_JOB_ID}.log \n" ) - else: - - process_name = f"merging_{source}" - - MAGIC_DL1_dir = f"{target_dir}/v{__version__}/{source}/DL1/" - - lines = slurm_lines( - queue="short", - job_name=process_name, - mem="2g", - out_name=f"{MAGIC_DL1_dir}/Merged/slurm-%x.%j", - ) - with open(f"{source}_Merge_MAGIC_{identification}.sh", "w") as f: - f.writelines(lines) + else: if identification == "0_subruns": - if os.path.exists(f"{MAGIC_DL1_dir}/M1") & os.path.exists( f"{MAGIC_DL1_dir}/M2" ): - dates = [ - os.path.basename(x) for x in glob.glob(f"{MAGIC_DL1_dir}/M1/*") - ] - for i in dates: - runs = [ + for magic in [1, 2]: + dates = [ os.path.basename(x) - for x in glob.glob(f"{MAGIC_DL1_dir}/M1/{i}/*") + for x in glob.glob(f"{MAGIC_DL1_dir}/M{magic}/*") ] - for r in runs: - - os.makedirs( - f"{MAGIC_DL1_dir}/Merged/{i}/{r}/logs" - ) # Creating a merged directory for the respective run - os.system( - f'find {MAGIC_DL1_dir}/M1/{i}/{r} -type f -name "*.h5" -size -3k -delete' - ) - f.write( - f"conda run -n {env_name} merge_hdf_files --input-dir {MAGIC_DL1_dir}/M1/{i}/{r} --output-dir {MAGIC_DL1_dir}/Merged/{i}/{r} >{MAGIC_DL1_dir}/Merged/{i}/{r}/logs/merge_M1_{i}_{r}_" - + "${SLURM_JOB_ID}.log \n" - ) - - dates = [ - os.path.basename(x) for x in glob.glob(f"{MAGIC_DL1_dir}/M2/*") - ] - - for i in dates: - runs = [ - os.path.basename(x) - for x in glob.glob(f"{MAGIC_DL1_dir}/M2/{i}/*") - ] + for i in dates: + runs = [ + os.path.basename(x) + for x in glob.glob(f"{MAGIC_DL1_dir}/M{2}/{i}/*") + ] + + for r in runs: + os.makedirs( + f"{MAGIC_DL1_dir}/Merged/{i}/{r}/logs", + exist_ok=True, + ) # Creating a merged directory for the respective run + os.system( + f'find {MAGIC_DL1_dir}/M{magic}/{i}/{r} -type f -name "dl1_M{magic}.Run*.h5" -size -3k -delete' + ) + f.write( + f"conda run -n {env_name} merge_hdf_files --input-dir {MAGIC_DL1_dir}/M{magic}/{i}/{r} --output-dir {MAGIC_DL1_dir}/Merged/{i}/{r} >{MAGIC_DL1_dir}/Merged/{i}/{r}/logs/merge_M{magic}_{i}_{r}_" + + "${SLURM_JOB_ID}.log \n" + ) - for r in runs: - os.makedirs( - f"{MAGIC_DL1_dir}/Merged/{i}/{r}/logs" - ) # Creating a merged directory for the respective run - os.system( - f'find {MAGIC_DL1_dir}/M2/{i}/{r} -type f -name "*.h5" -size -3k -delete' - ) - f.write( - f"conda run -n {env_name} merge_hdf_files --input-dir {MAGIC_DL1_dir}/M2/{i}/{r} --output-dir {MAGIC_DL1_dir}/Merged/{i}/{r} >{MAGIC_DL1_dir}/Merged/{i}/{r}/logs/merge_M2_{i}_{r}_" - + "${SLURM_JOB_ID}.log \n" - ) elif identification == "1_M1M2": if os.path.exists(f"{MAGIC_DL1_dir}/M1") & os.path.exists( f"{MAGIC_DL1_dir}/M2" @@ -276,7 +242,10 @@ def merge(target_dir, identification, MAGIC_runs, env_name, source, NSB_match): if (len(glob.glob(f"{MAGIC_DL1_dir}/M1/{i}/{r}")) > 0) and ( len(glob.glob(f"{MAGIC_DL1_dir}/M2/{i}/{r}")) > 0 ): - os.makedirs(f"{MAGIC_DL1_dir}/Merged/{i}/Merged/logs") + os.makedirs( + f"{MAGIC_DL1_dir}/Merged/{i}/Merged/logs", + exist_ok=True, + ) f.write( f"conda run -n {env_name} merge_hdf_files --input-dir {MAGIC_DL1_dir}/Merged/{i}/{r} --output-dir {MAGIC_DL1_dir}/Merged/{i}/Merged --run-wise >{MAGIC_DL1_dir}/Merged/{i}/Merged/logs/merge_{i}_{r}_" + "${SLURM_JOB_ID}.log \n" @@ -296,7 +265,9 @@ def merge(target_dir, identification, MAGIC_runs, env_name, source, NSB_match): > 0 ): - os.makedirs(f"{MAGIC_DL1_dir}/Merged/Merged_{i}/logs") + os.makedirs( + f"{MAGIC_DL1_dir}/Merged/Merged_{i}/logs", exist_ok=True + ) f.write( f"conda run -n {env_name} merge_hdf_files --input-dir {MAGIC_DL1_dir}/Merged/{i}/Merged --output-dir {MAGIC_DL1_dir}/Merged/Merged_{i} >{MAGIC_DL1_dir}/Merged/Merged_{i}/logs/merge_night_{i}_" + "${SLURM_JOB_ID}.log \n" @@ -325,7 +296,7 @@ def mergeMC(target_dir, identification, env_name, cwd, source_name): process_name = f"merging_{source_name}" MC_DL1_dir = f"{target_dir}/{source_name}/DL1/MC" - os.makedirs(f"{MC_DL1_dir}/{identification}/Merged") + os.makedirs(f"{MC_DL1_dir}/{identification}/Merged", exist_ok=True) if identification == "protons": list_of_nodes = np.sort(glob.glob(f"{MC_DL1_dir}/{identification}/train/node*")) @@ -408,7 +379,9 @@ def main(): for source_name in source_list: # Below we run the analysis on the MC data MAGIC_runs_and_dates = f"{source_name}_MAGIC_runs.txt" - MAGIC_runs = np.genfromtxt(MAGIC_runs_and_dates, dtype=str, delimiter=",") + MAGIC_runs = np.genfromtxt( + MAGIC_runs_and_dates, dtype=str, delimiter=",", ndmin=2 + ) if not NSB_match: if (args.analysis_type == "onlyMC") or ( args.analysis_type == "doEverything" From adae025d4a29b5feb9462e6ed7b9a4244dc7c8ff Mon Sep 17 00:00:00 2001 From: Julian Sitarek Date: Wed, 15 May 2024 14:55:00 +0000 Subject: [PATCH 089/236] refactoring for the case of NSB_match set to true --- .../semi_automatic_scripts/merging_runs.py | 116 ++++++++---------- 1 file changed, 49 insertions(+), 67 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py index 4ca77211..734d4a35 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py @@ -170,7 +170,7 @@ def merge(target_dir, identification, MAGIC_runs, env_name, source, NSB_match): f'find {MAGIC_DL1_dir}/M{magic}/{i[0]}/{i[1]} -type f -name "dl1_M{magic}.Run*.h5" -size -3k -delete' ) f.write( - f"conda run -n {env_name} merge_hdf_files --input-dir {MAGIC_DL1_dir}/M{magic}/{i[0]}/{i[1]} --output-dir {MAGIC_DL1_dir}/Merged/{i[0]}/{i[1]} >{MAGIC_DL1_dir}/Merged/{i[0]}/{i[1]}/merge_M{magic}_{i[0]}_{i[1]}_" + f"conda run -n {env_name} merge_hdf_files --input-dir {MAGIC_DL1_dir}/M{magic}/{i[0]}/{i[1]} --output-dir {MAGIC_DL1_dir}/Merged/{i[0]}/{i[1]} >{MAGIC_DL1_dir}/Merged/{i[0]}/{i[1]}/logs/merge_M{magic}_{i[0]}_{i[1]}_" + "${SLURM_JOB_ID}.log\n" ) @@ -198,80 +198,62 @@ def merge(target_dir, identification, MAGIC_runs, env_name, source, NSB_match): else: if identification == "0_subruns": - if os.path.exists(f"{MAGIC_DL1_dir}/M1") & os.path.exists( - f"{MAGIC_DL1_dir}/M2" - ): - for magic in [1, 2]: - dates = [ - os.path.basename(x) - for x in glob.glob(f"{MAGIC_DL1_dir}/M{magic}/*") - ] - - for i in dates: - runs = [ - os.path.basename(x) - for x in glob.glob(f"{MAGIC_DL1_dir}/M{2}/{i}/*") - ] - - for r in runs: - os.makedirs( - f"{MAGIC_DL1_dir}/Merged/{i}/{r}/logs", - exist_ok=True, - ) # Creating a merged directory for the respective run - os.system( - f'find {MAGIC_DL1_dir}/M{magic}/{i}/{r} -type f -name "dl1_M{magic}.Run*.h5" -size -3k -delete' - ) - f.write( - f"conda run -n {env_name} merge_hdf_files --input-dir {MAGIC_DL1_dir}/M{magic}/{i}/{r} --output-dir {MAGIC_DL1_dir}/Merged/{i}/{r} >{MAGIC_DL1_dir}/Merged/{i}/{r}/logs/merge_M{magic}_{i}_{r}_" - + "${SLURM_JOB_ID}.log \n" - ) + for magic in [1, 2]: + for i in MAGIC_runs: + # Here is a difference w.r.t. original code. If only one telescope data are available they will be merged now for this telescope + if os.path.exists(f"{MAGIC_DL1_dir}/M{magic}/{i[0]}/{i[1]}"): + os.makedirs( + f"{MAGIC_DL1_dir}/Merged/{i[0]}/{i[1]}/logs", + exist_ok=True, + ) # Creating a merged directory for the respective run + os.system( + f'find {MAGIC_DL1_dir}/M{magic}/{i[0]}/{i[1]} -type f -name "dl1_M{magic}.Run*.h5" -size -3k -delete' + ) + f.write( + f"conda run -n {env_name} merge_hdf_files --input-dir {MAGIC_DL1_dir}/M{magic}/{i[0]}/{i[1]} --output-dir {MAGIC_DL1_dir}/Merged/{i[0]}/{i[1]} >{MAGIC_DL1_dir}/Merged/{i[0]}/{i[1]}/logs/merge_M{magic}_{i[0]}_{i[1]}_" + + "${SLURM_JOB_ID}.log\n" + ) + else: + print( + f"ERROR: {MAGIC_DL1_dir}/M{magic}/{i[0]}/{i[1]} does not exist" + ) elif identification == "1_M1M2": - if os.path.exists(f"{MAGIC_DL1_dir}/M1") & os.path.exists( - f"{MAGIC_DL1_dir}/M2" - ): - dates = [ - os.path.basename(x) for x in glob.glob(f"{MAGIC_DL1_dir}/M1/*") - ] - for i in dates: - runs = [ - os.path.basename(x) - for x in glob.glob(f"{MAGIC_DL1_dir}/M2/{i}/*") - ] - for r in runs: - if (len(glob.glob(f"{MAGIC_DL1_dir}/M1/{i}/{r}")) > 0) and ( - len(glob.glob(f"{MAGIC_DL1_dir}/M2/{i}/{r}")) > 0 - ): - os.makedirs( - f"{MAGIC_DL1_dir}/Merged/{i}/Merged/logs", - exist_ok=True, - ) - f.write( - f"conda run -n {env_name} merge_hdf_files --input-dir {MAGIC_DL1_dir}/Merged/{i}/{r} --output-dir {MAGIC_DL1_dir}/Merged/{i}/Merged --run-wise >{MAGIC_DL1_dir}/Merged/{i}/Merged/logs/merge_{i}_{r}_" - + "${SLURM_JOB_ID}.log \n" - ) + for i in MAGIC_runs: + if os.path.exists( + f"{MAGIC_DL1_dir}/M1/{i[0]}/{i[1]}" + ) & os.path.exists(f"{MAGIC_DL1_dir}/M2/{i[0]}/{i[1]}"): + os.makedirs( + f"{MAGIC_DL1_dir}/Merged/{i[0]}/Merged/logs", + exist_ok=True, + ) + f.write( + f"conda run -n {env_name} merge_hdf_files --input-dir {MAGIC_DL1_dir}/Merged/{i[0]}/{i[1]} --output-dir {MAGIC_DL1_dir}/Merged/{i[0]}/Merged --run-wise >{MAGIC_DL1_dir}/Merged/{i[0]}/Merged/logs/merge_{i[0]}_{i[1]}_" + + "${SLURM_JOB_ID}.log \n" + ) + else: + print( + f"ERROR {MAGIC_DL1_dir}/M1/{i[0]}/{i[1]} or {MAGIC_DL1_dir}/M2/{i[0]}/{i[1]} does not exist" + ) else: - dates = [ - os.path.basename(x) for x in glob.glob(f"{MAGIC_DL1_dir}/M1/*") - ] + dates = np.unique(MAGIC_runs.T[0]) for i in dates: - if not os.path.exists(f"{MAGIC_DL1_dir}/Merged/{i}/Merged"): - continue - if ( - len(glob.glob(f"{MAGIC_DL1_dir}/Merged/{i}/Merged/*MAGIC*.h5")) - > 0 - ): + # if ( + # len(glob.glob(f"{MAGIC_DL1_dir}/Merged/{i}/Merged/*MAGIC*.h5")) + # > 0 + # ): # this is strange, those files should not be there yet at the moment of creating of this script + # runs = MAGIC_runs.T[1][MAGIC_runs.T[0]==i] - os.makedirs( - f"{MAGIC_DL1_dir}/Merged/Merged_{i}/logs", exist_ok=True - ) - f.write( - f"conda run -n {env_name} merge_hdf_files --input-dir {MAGIC_DL1_dir}/Merged/{i}/Merged --output-dir {MAGIC_DL1_dir}/Merged/Merged_{i} >{MAGIC_DL1_dir}/Merged/Merged_{i}/logs/merge_night_{i}_" - + "${SLURM_JOB_ID}.log \n" - ) + os.makedirs( + f"{MAGIC_DL1_dir}/Merged/Merged_{i}/logs", exist_ok=True + ) + f.write( + f"conda run -n {env_name} merge_hdf_files --input-dir {MAGIC_DL1_dir}/Merged/{i}/Merged --output-dir {MAGIC_DL1_dir}/Merged/Merged_{i} >{MAGIC_DL1_dir}/Merged/Merged_{i}/logs/merge_night_{i}_" + + "${SLURM_JOB_ID}.log \n" + ) def mergeMC(target_dir, identification, env_name, cwd, source_name): From cec4833ed95facdfc8ee905caab9f377b2b65f50 Mon Sep 17 00:00:00 2001 From: Julian Sitarek Date: Wed, 15 May 2024 15:02:18 +0000 Subject: [PATCH 090/236] refactoring to unify the code for NSB_match yes and no --- .../semi_automatic_scripts/merging_runs.py | 123 ++++++------------ 1 file changed, 40 insertions(+), 83 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py index 734d4a35..d5cff4ba 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py @@ -156,104 +156,61 @@ def merge(target_dir, identification, MAGIC_runs, env_name, source, NSB_match): with open(f"{source}_Merge_MAGIC_{identification}.sh", "w") as f: f.writelines(lines) - if not NSB_match: - if identification == "0_subruns": - if os.path.exists(f"{MAGIC_DL1_dir}/M1") & os.path.exists( - f"{MAGIC_DL1_dir}/M2" - ): - for i in MAGIC_runs: - os.makedirs( - f"{MAGIC_DL1_dir}/Merged/{i[0]}/{i[1]}", exist_ok=True - ) # Creating a merged directory for the respective run - for magic in [1, 2]: - os.system( - f'find {MAGIC_DL1_dir}/M{magic}/{i[0]}/{i[1]} -type f -name "dl1_M{magic}.Run*.h5" -size -3k -delete' - ) - f.write( - f"conda run -n {env_name} merge_hdf_files --input-dir {MAGIC_DL1_dir}/M{magic}/{i[0]}/{i[1]} --output-dir {MAGIC_DL1_dir}/Merged/{i[0]}/{i[1]} >{MAGIC_DL1_dir}/Merged/{i[0]}/{i[1]}/logs/merge_M{magic}_{i[0]}_{i[1]}_" - + "${SLURM_JOB_ID}.log\n" - ) - - elif identification == "1_M1M2": - if os.path.exists(f"{MAGIC_DL1_dir}/M1") & os.path.exists( - f"{MAGIC_DL1_dir}/M2" - ): - for i in MAGIC_runs: - os.makedirs( - f"{MAGIC_DL1_dir}/Merged/{i[0]}/Merged", exist_ok=True - ) - f.write( - f"conda run -n {env_name} merge_hdf_files --input-dir {MAGIC_DL1_dir}/Merged/{i[0]}/{i[1]} --output-dir {MAGIC_DL1_dir}/Merged/{i[0]}/Merged --run-wise >{MAGIC_DL1_dir}/Merged/{i[0]}/Merged/merge_{i[0]}_{[1]}_" - + "${SLURM_JOB_ID}.log\n" - ) - else: + if identification == "0_subruns": + for magic in [1, 2]: for i in MAGIC_runs: - os.makedirs( - f"{MAGIC_DL1_dir}/Merged/Merged_{i[0]}", exist_ok=True - ) # Creating a merged directory for each night - f.write( - f"conda run -n {env_name} merge_hdf_files --input-dir {MAGIC_DL1_dir}/Merged/{i[0]}/Merged --output-dir {MAGIC_DL1_dir}/Merged/Merged_{i[0]} >{MAGIC_DL1_dir}/Merged/Merged_{i[0]}/merge_night_{i[0]}_" - + "${SLURM_JOB_ID}.log \n" - ) - - else: - if identification == "0_subruns": - for magic in [1, 2]: - for i in MAGIC_runs: - # Here is a difference w.r.t. original code. If only one telescope data are available they will be merged now for this telescope - if os.path.exists(f"{MAGIC_DL1_dir}/M{magic}/{i[0]}/{i[1]}"): - os.makedirs( - f"{MAGIC_DL1_dir}/Merged/{i[0]}/{i[1]}/logs", - exist_ok=True, - ) # Creating a merged directory for the respective run - os.system( - f'find {MAGIC_DL1_dir}/M{magic}/{i[0]}/{i[1]} -type f -name "dl1_M{magic}.Run*.h5" -size -3k -delete' - ) - f.write( - f"conda run -n {env_name} merge_hdf_files --input-dir {MAGIC_DL1_dir}/M{magic}/{i[0]}/{i[1]} --output-dir {MAGIC_DL1_dir}/Merged/{i[0]}/{i[1]} >{MAGIC_DL1_dir}/Merged/{i[0]}/{i[1]}/logs/merge_M{magic}_{i[0]}_{i[1]}_" - + "${SLURM_JOB_ID}.log\n" - ) - else: - print( - f"ERROR: {MAGIC_DL1_dir}/M{magic}/{i[0]}/{i[1]} does not exist" - ) - - elif identification == "1_M1M2": - for i in MAGIC_runs: - if os.path.exists( - f"{MAGIC_DL1_dir}/M1/{i[0]}/{i[1]}" - ) & os.path.exists(f"{MAGIC_DL1_dir}/M2/{i[0]}/{i[1]}"): + # Here is a difference w.r.t. original code. If only one telescope data are available they will be merged now for this telescope + if os.path.exists(f"{MAGIC_DL1_dir}/M{magic}/{i[0]}/{i[1]}"): os.makedirs( - f"{MAGIC_DL1_dir}/Merged/{i[0]}/Merged/logs", + f"{MAGIC_DL1_dir}/Merged/{i[0]}/{i[1]}/logs", exist_ok=True, + ) # Creating a merged directory for the respective run + os.system( + f'find {MAGIC_DL1_dir}/M{magic}/{i[0]}/{i[1]} -type f -name "dl1_M{magic}.Run*.h5" -size -3k -delete' ) f.write( - f"conda run -n {env_name} merge_hdf_files --input-dir {MAGIC_DL1_dir}/Merged/{i[0]}/{i[1]} --output-dir {MAGIC_DL1_dir}/Merged/{i[0]}/Merged --run-wise >{MAGIC_DL1_dir}/Merged/{i[0]}/Merged/logs/merge_{i[0]}_{i[1]}_" - + "${SLURM_JOB_ID}.log \n" + f"conda run -n {env_name} merge_hdf_files --input-dir {MAGIC_DL1_dir}/M{magic}/{i[0]}/{i[1]} --output-dir {MAGIC_DL1_dir}/Merged/{i[0]}/{i[1]} >{MAGIC_DL1_dir}/Merged/{i[0]}/{i[1]}/logs/merge_M{magic}_{i[0]}_{i[1]}_" + + "${SLURM_JOB_ID}.log\n" ) else: print( - f"ERROR {MAGIC_DL1_dir}/M1/{i[0]}/{i[1]} or {MAGIC_DL1_dir}/M2/{i[0]}/{i[1]} does not exist" + f"ERROR: {MAGIC_DL1_dir}/M{magic}/{i[0]}/{i[1]} does not exist" ) - else: - dates = np.unique(MAGIC_runs.T[0]) - for i in dates: - if not os.path.exists(f"{MAGIC_DL1_dir}/Merged/{i}/Merged"): - continue - - # if ( - # len(glob.glob(f"{MAGIC_DL1_dir}/Merged/{i}/Merged/*MAGIC*.h5")) - # > 0 - # ): # this is strange, those files should not be there yet at the moment of creating of this script - # runs = MAGIC_runs.T[1][MAGIC_runs.T[0]==i] + elif identification == "1_M1M2": + for i in MAGIC_runs: + if os.path.exists(f"{MAGIC_DL1_dir}/M1/{i[0]}/{i[1]}") & os.path.exists( + f"{MAGIC_DL1_dir}/M2/{i[0]}/{i[1]}" + ): os.makedirs( - f"{MAGIC_DL1_dir}/Merged/Merged_{i}/logs", exist_ok=True + f"{MAGIC_DL1_dir}/Merged/{i[0]}/Merged/logs", + exist_ok=True, ) f.write( - f"conda run -n {env_name} merge_hdf_files --input-dir {MAGIC_DL1_dir}/Merged/{i}/Merged --output-dir {MAGIC_DL1_dir}/Merged/Merged_{i} >{MAGIC_DL1_dir}/Merged/Merged_{i}/logs/merge_night_{i}_" + f"conda run -n {env_name} merge_hdf_files --input-dir {MAGIC_DL1_dir}/Merged/{i[0]}/{i[1]} --output-dir {MAGIC_DL1_dir}/Merged/{i[0]}/Merged --run-wise >{MAGIC_DL1_dir}/Merged/{i[0]}/Merged/logs/merge_{i[0]}_{i[1]}_" + "${SLURM_JOB_ID}.log \n" ) + else: + print( + f"ERROR {MAGIC_DL1_dir}/M1/{i[0]}/{i[1]} or {MAGIC_DL1_dir}/M2/{i[0]}/{i[1]} does not exist" + ) + else: + dates = np.unique(MAGIC_runs.T[0]) + for i in dates: + if not os.path.exists(f"{MAGIC_DL1_dir}/Merged/{i}/Merged"): + continue + + # if ( + # len(glob.glob(f"{MAGIC_DL1_dir}/Merged/{i}/Merged/*MAGIC*.h5")) + # > 0 + # ): # this is strange, those files should not be there yet at the moment of creating of this script + # runs = MAGIC_runs.T[1][MAGIC_runs.T[0]==i] + + os.makedirs(f"{MAGIC_DL1_dir}/Merged/Merged_{i}/logs", exist_ok=True) + f.write( + f"conda run -n {env_name} merge_hdf_files --input-dir {MAGIC_DL1_dir}/Merged/{i}/Merged --output-dir {MAGIC_DL1_dir}/Merged/Merged_{i} >{MAGIC_DL1_dir}/Merged/Merged_{i}/logs/merge_night_{i}_" + + "${SLURM_JOB_ID}.log \n" + ) def mergeMC(target_dir, identification, env_name, cwd, source_name): From e3f8b9e820cfd6c50e476a1f8a2b48c03a8c33ee Mon Sep 17 00:00:00 2001 From: Federico Devoto Date: Thu, 16 May 2024 09:14:17 +0000 Subject: [PATCH 091/236] Fix bug sources --- .../lst1_magic/semi_automatic_scripts/coincident_events.py | 3 ++- .../scripts/lst1_magic/semi_automatic_scripts/merging_runs.py | 3 ++- .../semi_automatic_scripts/setting_up_config_and_dir.py | 3 ++- .../scripts/lst1_magic/semi_automatic_scripts/stereo_events.py | 3 ++- 4 files changed, 8 insertions(+), 4 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py index 27c48da4..2084d1c2 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py @@ -204,10 +204,11 @@ def main(): env_name = config["general"]["env_name"] LST_version = config["general"]["LST_version"] + source_in = config["data_selection"]["source_name_database"] source = config["data_selection"]["source_name_output"] source_list = [] - if source is not None: + if source_in is None: source_list = joblib.load("list_sources.dat") else: diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py index 604b2e5b..0060a1c3 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py @@ -397,10 +397,11 @@ def main(): train_fraction = float(config["general"]["proton_train_fraction"]) env_name = config["general"]["env_name"] + source_in = config["data_selection"]["source_name_database"] source = config["data_selection"]["source_name_output"] source_list = [] - if source is not None: + if source_in is None: source_list = joblib.load("list_sources.dat") else: diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py index 4d6110b7..4094ee8f 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py @@ -415,10 +415,11 @@ def main(): MC_protons = config["directories"]["MC_protons"] MC_gammadiff = config["directories"]["MC_gammadiff"] focal_length = config["general"]["focal_length"] + source_in = config["data_selection"]["source_name_database"] source = config["data_selection"]["source_name_output"] source_list = [] - if source is not None: + if source_in is None: source_list = joblib.load("list_sources.dat") else: diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py index 519aee3f..b3b6601c 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py @@ -239,10 +239,11 @@ def main(): NSB_match = config["general"]["NSB_matching"] telescope_ids = list(config["mc_tel_ids"].values()) + source_in = config["data_selection"]["source_name_database"] source = config["data_selection"]["source_name_output"] source_list = [] - if source is not None: + if source_in is None: source_list = joblib.load("list_sources.dat") else: From 12bd08f06231a60534c5d59ae1aefee586cdc124 Mon Sep 17 00:00:00 2001 From: Julian Sitarek Date: Thu, 16 May 2024 10:10:49 +0000 Subject: [PATCH 092/236] added error management for merging scripts also some refactoring in the setting up, mergi and coincidence scripts --- .../semi_automatic_scripts/clusters.py | 28 +++++++++- .../coincident_events.py | 14 ++--- .../semi_automatic_scripts/merging_runs.py | 56 +++++++++++-------- .../setting_up_config_and_dir.py | 50 +++++++---------- 4 files changed, 86 insertions(+), 62 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/clusters.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/clusters.py index 8f66417b..54d14922 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/clusters.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/clusters.py @@ -1,7 +1,7 @@ """ Module for generating bash script lines for running analysis in different clusters """ -__all__ = ["slurm_lines"] +__all__ = ["slurm_lines", "rc_lines"] def slurm_lines(queue, job_name, array=None, mem=None, out_name=None): @@ -40,3 +40,29 @@ def slurm_lines(queue, job_name, array=None, mem=None, out_name=None): "ulimit -a\n\n", ] return lines + + +def rc_lines(store, out): + """ + Function for creating the general lines for error tracking. + + Parameters + ---------- + store : str + String what to store in addition to $rc + out : str + Base name for the log files with return codes, all output will go into {out}_return.log, only errors to {out}_failed.log + + Returns + ------- + list + List of strings to attach to a shell script + """ + lines = [ + "rc=$?\n", + 'if [ "$rc" -ne "0" ]; then\n', + f" echo {store} $rc >> {out}_failed.log\n", + "fi\n", + f"echo {store} $rc >> {out}_return.log\n", + ] + return lines diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py index 27c48da4..b81cc932 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py @@ -104,19 +104,15 @@ def linking_bash_lst( If real data are matched to pre-processed MCs or not """ - if (len(LST_runs) == 2) and (len(LST_runs[0]) == 10): - LST = LST_runs - - LST_runs = [] - LST_runs.append(LST) - if NSB_match: coincidence_DL1_dir = f"{target_dir}/v{__version__}/{source_name}" MAGIC_DL1_dir = f"{target_dir}/v{__version__}/{source_name}/DL1" else: - coincidence_DL1_dir = f"{target_dir}/{source_name}/DL1/Observations" - MAGIC_DL1_dir = f"{target_dir}/{source_name}/DL1/Observations/" + coincidence_DL1_dir = ( + f"{target_dir}/v{__version__}/{source_name}/DL1/Observations" + ) + MAGIC_DL1_dir = f"{target_dir}/v{__version__}/{source_name}/DL1/Observations/" dates = [os.path.basename(x) for x in glob.glob(f"{MAGIC_DL1_dir}/Merged/Merged_*")] @@ -218,7 +214,7 @@ def main(): configfile_coincidence(telescope_ids, target_dir, source_name, NSB_match) LST_runs_and_dates = f"{source_name}_LST_runs.txt" - LST_runs = np.genfromtxt(LST_runs_and_dates, dtype=str, delimiter=",") + LST_runs = np.genfromtxt(LST_runs_and_dates, dtype=str, delimiter=",", ndmin=2) try: diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py index d5cff4ba..a043f9f1 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py @@ -40,7 +40,10 @@ from tqdm import tqdm from magicctapipe import __version__ -from magicctapipe.scripts.lst1_magic.semi_automatic_scripts.clusters import slurm_lines +from magicctapipe.scripts.lst1_magic.semi_automatic_scripts.clusters import ( + rc_lines, + slurm_lines, +) __all__ = ["cleaning", "split_train_test", "merge", "mergeMC"] @@ -151,8 +154,9 @@ def merge(target_dir, identification, MAGIC_runs, env_name, source, NSB_match): queue="short", job_name=process_name, mem="2g", - out_name=f"{MAGIC_DL1_dir}/Merged/slurm-%x.%j", + out_name=f"{MAGIC_DL1_dir}/Merged/logs/slurm-%x.%j", ) + os.makedirs(f"{MAGIC_DL1_dir}/Merged/logs", exist_ok=True) with open(f"{source}_Merge_MAGIC_{identification}.sh", "w") as f: f.writelines(lines) @@ -160,36 +164,38 @@ def merge(target_dir, identification, MAGIC_runs, env_name, source, NSB_match): for magic in [1, 2]: for i in MAGIC_runs: # Here is a difference w.r.t. original code. If only one telescope data are available they will be merged now for this telescope - if os.path.exists(f"{MAGIC_DL1_dir}/M{magic}/{i[0]}/{i[1]}"): - os.makedirs( - f"{MAGIC_DL1_dir}/Merged/{i[0]}/{i[1]}/logs", - exist_ok=True, - ) # Creating a merged directory for the respective run + indir = f"{MAGIC_DL1_dir}/M{magic}/{i[0]}/{i[1]}" + if os.path.exists(f"{indir}"): + outdir = f"{MAGIC_DL1_dir}/Merged/{i[0]}/{i[1]}" + os.makedirs(f"{outdir}/logs", exist_ok=True) os.system( - f'find {MAGIC_DL1_dir}/M{magic}/{i[0]}/{i[1]} -type f -name "dl1_M{magic}.Run*.h5" -size -3k -delete' + f'find {indir} -type f -name "dl1_M{magic}.Run*.h5" -size -3k -delete' ) f.write( - f"conda run -n {env_name} merge_hdf_files --input-dir {MAGIC_DL1_dir}/M{magic}/{i[0]}/{i[1]} --output-dir {MAGIC_DL1_dir}/Merged/{i[0]}/{i[1]} >{MAGIC_DL1_dir}/Merged/{i[0]}/{i[1]}/logs/merge_M{magic}_{i[0]}_{i[1]}_" - + "${SLURM_JOB_ID}.log\n" + f"conda run -n {env_name} merge_hdf_files --input-dir {indir} --output-dir {outdir} >{outdir}/logs/merge_M{magic}_{i[0]}_{i[1]}_${{SLURM_JOB_ID}}.log\n" ) - else: - print( - f"ERROR: {MAGIC_DL1_dir}/M{magic}/{i[0]}/{i[1]} does not exist" + rc = rc_lines( + store=f"{indir} ${{SLURM_JOB_ID}}", out="{outdir}/logs/list" ) + f.writelines(rc) + else: + print(f"ERROR: {indir} does not exist") elif identification == "1_M1M2": for i in MAGIC_runs: if os.path.exists(f"{MAGIC_DL1_dir}/M1/{i[0]}/{i[1]}") & os.path.exists( f"{MAGIC_DL1_dir}/M2/{i[0]}/{i[1]}" ): - os.makedirs( - f"{MAGIC_DL1_dir}/Merged/{i[0]}/Merged/logs", - exist_ok=True, - ) + indir = f"{MAGIC_DL1_dir}/Merged/{i[0]}/{i[1]}" + outdir = f"{MAGIC_DL1_dir}/Merged/{i[0]}/Merged" + os.makedirs(f"{outdir}/logs", exist_ok=True) f.write( - f"conda run -n {env_name} merge_hdf_files --input-dir {MAGIC_DL1_dir}/Merged/{i[0]}/{i[1]} --output-dir {MAGIC_DL1_dir}/Merged/{i[0]}/Merged --run-wise >{MAGIC_DL1_dir}/Merged/{i[0]}/Merged/logs/merge_{i[0]}_{i[1]}_" - + "${SLURM_JOB_ID}.log \n" + f"conda run -n {env_name} merge_hdf_files --input-dir {indir} --output-dir {outdir} --run-wise >{outdir}/logs/merge_{i[0]}_{i[1]}_${{SLURM_JOB_ID}}.log\n" ) + rc = rc_lines( + store=f"{indir} ${{SLURM_JOB_ID}}", out="{outdir}/logs/list" + ) + f.writelines(rc) else: print( f"ERROR {MAGIC_DL1_dir}/M1/{i[0]}/{i[1]} or {MAGIC_DL1_dir}/M2/{i[0]}/{i[1]} does not exist" @@ -205,12 +211,16 @@ def merge(target_dir, identification, MAGIC_runs, env_name, source, NSB_match): # > 0 # ): # this is strange, those files should not be there yet at the moment of creating of this script # runs = MAGIC_runs.T[1][MAGIC_runs.T[0]==i] - - os.makedirs(f"{MAGIC_DL1_dir}/Merged/Merged_{i}/logs", exist_ok=True) + indir = f"{MAGIC_DL1_dir}/Merged/{i}/Merged" + outdir = f"{MAGIC_DL1_dir}/Merged/Merged_{i}" + os.makedirs(f"{outdir}/logs", exist_ok=True) f.write( - f"conda run -n {env_name} merge_hdf_files --input-dir {MAGIC_DL1_dir}/Merged/{i}/Merged --output-dir {MAGIC_DL1_dir}/Merged/Merged_{i} >{MAGIC_DL1_dir}/Merged/Merged_{i}/logs/merge_night_{i}_" - + "${SLURM_JOB_ID}.log \n" + f"conda run -n {env_name} merge_hdf_files --input-dir {indir} --output-dir {outdir} >{outdir}/logs/merge_night_{i}_${{SLURM_JOB_ID}}.log\n" + ) + rc = rc_lines( + store=f"{indir} ${{SLURM_JOB_ID}}", out="{outdir}/logs/list" ) + f.writelines(rc) def mergeMC(target_dir, identification, env_name, cwd, source_name): diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py index 4d6110b7..994cf21d 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py @@ -27,7 +27,10 @@ from magicctapipe import __version__ from magicctapipe.io import resource_file -from magicctapipe.scripts.lst1_magic.semi_automatic_scripts.clusters import slurm_lines +from magicctapipe.scripts.lst1_magic.semi_automatic_scripts.clusters import ( + rc_lines, + slurm_lines, +) __all__ = [ "config_file_gen", @@ -206,8 +209,7 @@ def lists_and_bash_generator( f"SAMPLE_LIST=($(<$INF/list_folder_{particle_type}.txt))\n", "SAMPLE=${SAMPLE_LIST[${SLURM_ARRAY_TASK_ID}]}\n", "cd $SAMPLE\n\n", - f"export LOG={dir1}/DL1/MC/{particle_type}/logs" - + "/simtel_{$SAMPLE}_${SLURM_ARRAY_JOB_ID}_${SLURM_ARRAY_TASK_ID}_all.log\n", + f"export LOG={dir1}/DL1/MC/{particle_type}/logs/simtel_{{$SAMPLE}}_${{SLURM_ARRAY_JOB_ID}}_${{SLURM_ARRAY_TASK_ID}}_all.log\n", "cat logs/list_dl0_ok.txt | while read line\n", "do\n", f" cd {dir1}/../\n", @@ -250,14 +252,6 @@ def lists_and_bash_gen_MAGIC( obs_tag = "" if NSB_match else "Observations" with open(f"{source}_linking_MAGIC_data_paths.sh", "w") as f: f.writelines(lines) - if NSB_match: - - if (len(MAGIC_runs) == 2) and (len(MAGIC_runs[0]) == 10): - MAGIC = MAGIC_runs - - MAGIC_runs = [] - MAGIC_runs.append(MAGIC) - for i in MAGIC_runs: for magic in [1, 2]: # if 1 then magic is second from last, if 2 then last @@ -286,18 +280,21 @@ def lists_and_bash_gen_MAGIC( mem="2g", out_name=f"{target_dir}/v{__version__}/{source}/DL1/{obs_tag}/M{magic}/{i[0]}/{i[1]}/logs/slurm-%x.%A_%a", # without version for no NSB_match ) - lines = slurm + [ # without version for no NSB_match - f"export OUTPUTDIR={target_dir}/v{__version__}/{source}/DL1/{obs_tag}/M{magic}/{i[0]}/{i[1]}\n", - "SAMPLE_LIST=($(<$OUTPUTDIR/logs/list_dl0.txt))\n", - "SAMPLE=${SAMPLE_LIST[${SLURM_ARRAY_TASK_ID}]}\n\n", - "export LOG=$OUTPUTDIR/logs/real_0_1_task_${SLURM_ARRAY_JOB_ID}_${SLURM_ARRAY_TASK_ID}.log\n", - f"conda run -n {env_name} magic_calib_to_dl1 --input-file $SAMPLE --output-dir $OUTPUTDIR --config-file {target_dir}/v{__version__}/{source}/config_DL0_to_DL1.yaml >$LOG 2>&1\n", - "rc=$?\n", - 'if [ "$rc" -ne "0" ]; then\n', - " echo $SAMPLE ${SLURM_ARRAY_JOB_ID} ${SLURM_ARRAY_TASK_ID} $rc >> $OUTPUTDIR/logs/list_failed.log\n", - "fi\n", - "echo $SAMPLE ${SLURM_ARRAY_JOB_ID} ${SLURM_ARRAY_TASK_ID} $rc >> $OUTPUTDIR/logs/list_return.log\n", - ] + rc = rc_lines( + store="$SAMPLE ${SLURM_ARRAY_JOB_ID} ${SLURM_ARRAY_TASK_ID}", + out="$OUTPUTDIR/logs/list", + ) + lines = ( + slurm + + [ # without version for no NSB_match + f"export OUTPUTDIR={target_dir}/v{__version__}/{source}/DL1/{obs_tag}/M{magic}/{i[0]}/{i[1]}\n", + "SAMPLE_LIST=($(<$OUTPUTDIR/logs/list_dl0.txt))\n", + "SAMPLE=${SAMPLE_LIST[${SLURM_ARRAY_TASK_ID}]}\n\n", + "export LOG=$OUTPUTDIR/logs/real_0_1_task_${SLURM_ARRAY_JOB_ID}_${SLURM_ARRAY_TASK_ID}.log\n", + f"conda run -n {env_name} magic_calib_to_dl1 --input-file $SAMPLE --output-dir $OUTPUTDIR --config-file {target_dir}/v{__version__}/{source}/config_DL0_to_DL1.yaml >$LOG 2>&1\n", + ] + + rc + ) with open( f"{source}_MAGIC-" + "I" * magic + f"_dl0_to_dl1_run_{i[1]}.sh", "w", @@ -360,11 +357,6 @@ def directories_generator( ########################################### # MAGIC ########################################### - if (len(MAGIC_runs) == 2) and (len(MAGIC_runs[0]) == 10): - MAGIC = MAGIC_runs - - MAGIC_runs = [] - MAGIC_runs.append(MAGIC) for i in MAGIC_runs: for magic in [1, 2]: if telescope_ids[magic - 3] > 0: @@ -428,7 +420,7 @@ def main(): MAGIC_runs_and_dates = f"{source_name}_MAGIC_runs.txt" MAGIC_runs = np.genfromtxt( - MAGIC_runs_and_dates, dtype=str, delimiter="," + MAGIC_runs_and_dates, dtype=str, delimiter=",", ndmin=2 ) # READ LIST OF DATES AND RUNS: format table where each line is like "2020_11_19,5093174" noise_value = [0, 0, 0] From c829703920f922db678202e8d199ec8b1a2306cc Mon Sep 17 00:00:00 2001 From: Julian Sitarek Date: Thu, 16 May 2024 12:33:03 +0000 Subject: [PATCH 093/236] added support for job_accounting running over merging files --- .../semi_automatic_scripts/job_accounting.py | 34 ++++++++++++++----- .../semi_automatic_scripts/merging_runs.py | 15 ++++---- 2 files changed, 33 insertions(+), 16 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/job_accounting.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/job_accounting.py index f9c54d9f..b8114eeb 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/job_accounting.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/job_accounting.py @@ -89,14 +89,17 @@ def main(): work_dir = config["directories"]["workspace_dir"] print(f"Checking progress of jobs stored in {work_dir}") - dirs = sorted(glob.glob(f"{work_dir}/v{args.version}/*/{args.data_level}/*/*")) + dirs = sorted( + glob.glob(f"{work_dir}/v{args.version}/*/{args.data_level}/[0-9]*/[M0-9]*") + + glob.glob(f"{work_dir}/v{args.version}/*/{args.data_level}/Merged_[0-9]*") + ) if dirs == []: versions = [x.split("/v")[-1] for x in glob.glob(f"{work_dir}/v*")] print("Error, no directories found") print(f"for path {work_dir} found in {args.config_file} this is available") print(f"Versions {versions}") tag = "" if NSB_matching else "/Observations" - print(f"Supported data types: DL1{tag}/M1, DL1{tag}/M2") + print(f"Supported data types: DL1{tag}/M1, DL1{tag}/M2, DL1{tag}/Merged") exit(1) all_todo = 0 @@ -124,21 +127,31 @@ def main(): returns = fp.readlines() this_return = len(returns) for line in returns: - file_in, slurm_id, task_id, rc = line.split() + line = line.split() + file_in = line[0] + slurm_id = f"{line[1]}_{line[2]}" if len(line) == 4 else line[1] + rc = line[-1] if rc == "0": this_good += 1 # now check accounting if not args.no_accounting: out = run_shell( - f'sacct --format="JobID,CPUTime,MaxRSS" --units=M -j {slurm_id}_{task_id}| tail -1' - ) - _, cpu, mem = out.split() + f'sacct --format="JobID,CPUTime,MaxRSS" --units=M -j {slurm_id}| tail -1' + ).split() + if len(out) == 3: + _, cpu, mem = out + else: + cpu = out[1] + mem = None hh, mm, ss = (int(x) for x in str(cpu).split(":")) delta = timedelta( days=hh // 24, hours=hh % 24, minutes=mm, seconds=ss ) this_cpu.append(delta) - this_mem.append(float(mem[0:-1])) + if mem is not None: + this_mem.append(float(mem[0:-1])) + else: + print("Memory usage information is missing") else: print(f"file {file_in} failed with error {rc}") if len(this_cpu) > 0: @@ -146,8 +159,13 @@ def main(): all_mem += this_mem this_cpu = np.array(this_cpu) this_mem = np.array(this_mem) + mem_info = ( + f"memory [M]: median={np.median(this_mem)}, max={this_mem.max()}" + if len(this_mem) + else "" + ) print( - f"CPU: median={np.median(this_cpu)}, max={this_cpu.max()}; memory [M]: median={np.median(this_mem)}, max={this_mem.max()}" + f"CPU: median={np.median(this_cpu)}, max={this_cpu.max()}; {mem_info}" ) except IOError: diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py index a043f9f1..abe653a1 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py @@ -175,9 +175,11 @@ def merge(target_dir, identification, MAGIC_runs, env_name, source, NSB_match): f"conda run -n {env_name} merge_hdf_files --input-dir {indir} --output-dir {outdir} >{outdir}/logs/merge_M{magic}_{i[0]}_{i[1]}_${{SLURM_JOB_ID}}.log\n" ) rc = rc_lines( - store=f"{indir} ${{SLURM_JOB_ID}}", out="{outdir}/logs/list" + store=f"{indir} ${{SLURM_JOB_ID}}", + out=f"{outdir}/logs/list", ) f.writelines(rc) + os.system(f"echo {indir} >> {outdir}/logs/list_dl0.txt") else: print(f"ERROR: {indir} does not exist") @@ -193,9 +195,10 @@ def merge(target_dir, identification, MAGIC_runs, env_name, source, NSB_match): f"conda run -n {env_name} merge_hdf_files --input-dir {indir} --output-dir {outdir} --run-wise >{outdir}/logs/merge_{i[0]}_{i[1]}_${{SLURM_JOB_ID}}.log\n" ) rc = rc_lines( - store=f"{indir} ${{SLURM_JOB_ID}}", out="{outdir}/logs/list" + store=f"{indir} ${{SLURM_JOB_ID}}", out=f"{outdir}/logs/list" ) f.writelines(rc) + os.system(f"echo {indir} >> {outdir}/logs/list_dl0.txt") else: print( f"ERROR {MAGIC_DL1_dir}/M1/{i[0]}/{i[1]} or {MAGIC_DL1_dir}/M2/{i[0]}/{i[1]} does not exist" @@ -206,11 +209,6 @@ def merge(target_dir, identification, MAGIC_runs, env_name, source, NSB_match): if not os.path.exists(f"{MAGIC_DL1_dir}/Merged/{i}/Merged"): continue - # if ( - # len(glob.glob(f"{MAGIC_DL1_dir}/Merged/{i}/Merged/*MAGIC*.h5")) - # > 0 - # ): # this is strange, those files should not be there yet at the moment of creating of this script - # runs = MAGIC_runs.T[1][MAGIC_runs.T[0]==i] indir = f"{MAGIC_DL1_dir}/Merged/{i}/Merged" outdir = f"{MAGIC_DL1_dir}/Merged/Merged_{i}" os.makedirs(f"{outdir}/logs", exist_ok=True) @@ -218,9 +216,10 @@ def merge(target_dir, identification, MAGIC_runs, env_name, source, NSB_match): f"conda run -n {env_name} merge_hdf_files --input-dir {indir} --output-dir {outdir} >{outdir}/logs/merge_night_{i}_${{SLURM_JOB_ID}}.log\n" ) rc = rc_lines( - store=f"{indir} ${{SLURM_JOB_ID}}", out="{outdir}/logs/list" + store=f"{indir} ${{SLURM_JOB_ID}}", out=f"{outdir}/logs/list" ) f.writelines(rc) + os.system(f"echo {indir} >> {outdir}/logs/list_dl0.txt") def mergeMC(target_dir, identification, env_name, cwd, source_name): From 6917646d9de1b0983bf0cdb4c040109fe0d6353e Mon Sep 17 00:00:00 2001 From: Elisa-Visentin Date: Thu, 16 May 2024 13:33:01 +0000 Subject: [PATCH 094/236] Fixed script + deleted useless file --- .../semi_automatic_scripts/config_h5.yaml | 13 --------- .../database_production/nsb_level.py | 29 ++++++++++--------- 2 files changed, 16 insertions(+), 26 deletions(-) delete mode 100644 magicctapipe/scripts/lst1_magic/semi_automatic_scripts/config_h5.yaml diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/config_h5.yaml b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/config_h5.yaml deleted file mode 100644 index 7c355b81..00000000 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/config_h5.yaml +++ /dev/null @@ -1,13 +0,0 @@ -data_selection_and_lists: - time_range : True #Search for all runs in a LST range (e.g., 2020_01_01 -> 2022_01_01) - min : "2023_11_17" - max : "2024_03_03" - date_list : ['2020_12_15','2021_03_11'] - #LST list of days to be processed (time_range=False), format: YYYY_MM_DD - skipped_LST_runs: [3216,3217] - skipped_MAGIC_runs: [5094658] #MAGIC and LST runs not to be processed (why?) - source_name_database: CrabNebula #MUST BE THE SAME AS IN THE DATABASE; null to process all sources in time range - source_name_output: 'Crabtest' #Name to be used in the analysis (will be used in the name of the txt output files), - #best idea: the same as in the analysis config_general.yaml, - #WARNING: ONLY ALPHANUMERIC, NO SPECIAL CHARACTERS (special characters can interfere with the analysis scripts); - #Used only if a source name (not null) set above \ No newline at end of file diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/nsb_level.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/nsb_level.py index 883c1fee..e9716590 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/nsb_level.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/nsb_level.py @@ -112,28 +112,30 @@ def main(): print("***** Generating bashscripts...") for i, row in df_LST.iterrows(): - # list_v=list(str(row['lstchain_versions'].replace('"]','').replace('["','').split('",')).rstrip('"]').lstrip('["')) + list_v = [eval(i) for i in row["lstchain_versions"].strip("][").split(", ")] - # list_v=list_v..rstrip('"]').lstrip('["') - + if str(lstchain_v) not in list_v: continue - if len(list_v) > 1: - common_v = [i for i in (set(lstchain_versions).intersection(list_v))] + + + common_v = [i for i in (set(lstchain_versions).intersection(list_v))] - max_common = common_v - if len(common_v) > 1: - max_common = common_v[-1] + max_common = common_v[0] + + + if lstchain_v != str(max_common): - if lstchain_v != max_common: - continue + continue + run_number = row["LST1_run"] date = row["DATE"] - + df_LST.loc[ i, "processed_lstchain_file" - ] = f"/fefs/aswg/data/real/DL1/{date}/{lstchain_v}/tailcut84/dl1_LST-1.Run{run_number}.h5" + ] = f"/fefs/aswg/data/real/DL1/{date}/{max_common}/tailcut84/dl1_LST-1.Run{run_number}.h5" df_LST.loc[i, "error_code_nsb"] = np.nan + bash_scripts(run_number, date, args.config_file, env_name) print("Process name: nsb") @@ -162,6 +164,7 @@ def main(): launch_jobs = f"{launch_jobs} && nsb{n}=$(sbatch --parsable {run})" os.system(launch_jobs) + df_LST.to_hdf( "/fefs/aswg/workspace/elisa.visentin/auto_MCP_PR/observations_LST.h5", key="joint_obs", @@ -172,7 +175,7 @@ def main(): "processed_lstchain_file": 90, }, ) - + if __name__ == "__main__": main() From 87423b3622d798ed514bf93155c9801fb6dd06a2 Mon Sep 17 00:00:00 2001 From: Elisa-Visentin Date: Thu, 16 May 2024 13:49:21 +0000 Subject: [PATCH 095/236] minor fix --- .../database_production/LSTnsb.py | 40 ++++++++----------- 1 file changed, 17 insertions(+), 23 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/LSTnsb.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/LSTnsb.py index 28e49956..8eea7007 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/LSTnsb.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/LSTnsb.py @@ -19,7 +19,7 @@ logger.setLevel(logging.INFO) -def nsb(run_list, simtel, lst_config, run_number): +def nsb(run_list, simtel, lst_config, run_number, denominator): """ Here we compute the NSB value for a run based on a subset of subruns. @@ -42,7 +42,7 @@ def nsb(run_list, simtel, lst_config, run_number): """ noise = [] - denominator = 25 + if len(run_list) == 0: logger.warning( "There is no subrun matching the provided run number. Check the list of the LST runs (LST_runs.txt)" @@ -51,7 +51,7 @@ def nsb(run_list, simtel, lst_config, run_number): if len(run_list) < denominator: mod = 1 else: - mod = int(len(run_list) / denominator) + mod = len(run_list) // denominator failed = 0 for ii in range(0, len(run_list)): subrun = run_list[ii].split(".")[-2] @@ -65,7 +65,7 @@ def nsb(run_list, simtel, lst_config, run_number): except IndexError: failed = failed + 1 if len(run_list) > denominator: - mod = int(len(run_list) / (denominator + failed)) + mod = (len(run_list) - ii) // (denominator -len(noise)) logger.warning( f"Subrun {subrun} caused an error in the NSB level evaluation for run {run_number}. Check reports before using it" ) @@ -101,7 +101,14 @@ def main(): type=str, help="Day of the run to be processed", ) - + parser.add_argument( + "--denominator", + "-s", + dest="denominator", + type=int, + default=25, + help="Number of subruns to be processed", + ) args = parser.parse_args() with open( args.config_file, "rb" @@ -110,6 +117,7 @@ def main(): NSB_match = config["general"]["NSB_matching"] run_number = args.run date = args.day + denominator = args.denominator simtel = "/fefs/aswg/data/mc/DL0/LSTProd2/TestDataset/sim_telarray/node_theta_14.984_az_355.158_/output_v1.4/simtel_corsika_theta_14.984_az_355.158_run10.simtel.gz" nsb_list = config["general"]["nsb"] @@ -130,35 +138,21 @@ def main(): print(lst_config) if NSB_match: LST_files = np.sort(glob.glob(f"nsb_LST_[0-9]*_{run_number}.txt")) - - if len(LST_files) > 1: - logger.info( - f"Run {run_number} classified in more than one NSB bin. Removing all these files and evaluating it again" - ) - for kk in LST_files: - os.remove(kk) - LST_files = [] + if len(LST_files) == 1: logger.info(f"Run {run_number} already processed") return else: LST_files = np.sort(glob.glob(f"nsb_LST_nsb_*{run_number}*.txt")) - - if len(LST_files) > 1: - logger.warning( - f"More than one files exists for run {run_number}. Removing all these files and evaluating it again." - ) - for repeated_files in LST_files: - os.remove(repeated_files) - LST_files = [] - elif len(LST_files) == 1: + + if len(LST_files) == 1: logger.info(f"Run {run_number} already processed.") return # date_lst = date.split("_")[0] + date.split("_")[1] + date.split("_")[2] inputdir = f"/fefs/aswg/data/real/DL1/{date}/{lst_version}/{lst_tailcut}" run_list = np.sort(glob.glob(f"{inputdir}/dl1*Run*{run_number}.*.h5")) - noise = nsb(run_list, simtel, lst_config, run_number) + noise = nsb(run_list, simtel, lst_config, run_number, denominator) if len(noise) == 0: logger.warning( "No NSB value could be evaluated: check the observation logs (observation problems, car flashes...)" From 2111e953926c8c42c104d9a5dc6dbe08a3d0692b Mon Sep 17 00:00:00 2001 From: Julian Sitarek Date: Thu, 16 May 2024 16:58:27 +0000 Subject: [PATCH 096/236] added counting of total CPU time it works even if some files were produced with the same job --- .../semi_automatic_scripts/job_accounting.py | 27 ++++++++++++------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/job_accounting.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/job_accounting.py index b8114eeb..9b6df96b 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/job_accounting.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/job_accounting.py @@ -107,6 +107,8 @@ def main(): all_good = 0 all_cpu = [] all_mem = [] + total_time = 0 + all_jobs = [] for dir in dirs: print(dir) # fixme list_dl0.txt is only available for DL1/M[12] processing @@ -140,18 +142,25 @@ def main(): ).split() if len(out) == 3: _, cpu, mem = out - else: + elif ( + len(out) == 2 + ): # MaxRSS sometimes is missing in the output cpu = out[1] mem = None - hh, mm, ss = (int(x) for x in str(cpu).split(":")) - delta = timedelta( - days=hh // 24, hours=hh % 24, minutes=mm, seconds=ss - ) - this_cpu.append(delta) + print("Memory usage information is missing") + else: + print("Unexpected sacct output: {out}") + if cpu is not None: + hh, mm, ss = (int(x) for x in str(cpu).split(":")) + delta = timedelta( + days=hh // 24, hours=hh % 24, minutes=mm, seconds=ss + ) + if slurm_id not in all_jobs: + total_time += delta.total_seconds() / 3600 + all_jobs += [slurm_id] + this_cpu.append(delta) if mem is not None: this_mem.append(float(mem[0:-1])) - else: - print("Memory usage information is missing") else: print(f"file {file_in} failed with error {rc}") if len(this_cpu) > 0: @@ -204,7 +213,7 @@ def main(): all_cpu = np.array(all_cpu) all_mem = np.array(all_mem) print( - f"CPU: median={np.median(all_cpu)}, max={all_cpu.max()}; memory [M]: median={np.median(all_mem)}, max={all_mem.max()}" + f"CPU: median={np.median(all_cpu)}, max={all_cpu.max()}, total={total_time:.2f} CPU hrs; memory [M]: median={np.median(all_mem)}, max={all_mem.max()}" ) From 725fdb356b540d5d7e8ffc926f5be7277bcab628 Mon Sep 17 00:00:00 2001 From: Elisa-Visentin Date: Fri, 17 May 2024 07:01:09 +0000 Subject: [PATCH 097/236] minor fix --- .../lst1_magic/semi_automatic_scripts/config_general.yaml | 1 + .../semi_automatic_scripts/database_production/nsb_level.py | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/config_general.yaml b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/config_general.yaml index 986da015..5038100e 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/config_general.yaml +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/config_general.yaml @@ -32,6 +32,7 @@ general: LST_version : "v0.10" LST_tailcut : "tailcut84" focal_length : "effective" + lstchain_modified_config : true proton_train_fraction : 0.8 # 0.8 means that 80% of the DL1 protons will be used for training the Random Forest. nsb : [0.5, 1.0, 1.5, 2.0, 2.5, 3.0] env_name : magic-lst # name of the conda environment to be used to process data. diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/nsb_level.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/nsb_level.py index e9716590..5ac6a043 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/nsb_level.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/nsb_level.py @@ -119,9 +119,9 @@ def main(): continue - common_v = [i for i in (set(lstchain_versions).intersection(list_v))] + common_v = [value for value in lstchain_versions if value in list_v] - max_common = common_v[0] + max_common = common_v[-1] if lstchain_v != str(max_common): From 9f1389df8860f05b1025ffb50efbf7a4044b5d5c Mon Sep 17 00:00:00 2001 From: Elisa-Visentin Date: Fri, 17 May 2024 07:57:51 +0000 Subject: [PATCH 098/236] linter --- .../database_production/LSTnsb.py | 10 ++++++---- .../database_production/nsb_level.py | 19 ++++++++----------- 2 files changed, 14 insertions(+), 15 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/LSTnsb.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/LSTnsb.py index 8eea7007..85181279 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/LSTnsb.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/LSTnsb.py @@ -34,6 +34,8 @@ def nsb(run_list, simtel, lst_config, run_number, denominator): LST configuration file (cf. lstchain) run_number : int LST run number + denominator : int + Number of subruns to be used to evaluate NSB for a run Returns ------- @@ -42,7 +44,7 @@ def nsb(run_list, simtel, lst_config, run_number, denominator): """ noise = [] - + if len(run_list) == 0: logger.warning( "There is no subrun matching the provided run number. Check the list of the LST runs (LST_runs.txt)" @@ -65,7 +67,7 @@ def nsb(run_list, simtel, lst_config, run_number, denominator): except IndexError: failed = failed + 1 if len(run_list) > denominator: - mod = (len(run_list) - ii) // (denominator -len(noise)) + mod = (len(run_list) - ii) // (denominator - len(noise)) logger.warning( f"Subrun {subrun} caused an error in the NSB level evaluation for run {run_number}. Check reports before using it" ) @@ -138,13 +140,13 @@ def main(): print(lst_config) if NSB_match: LST_files = np.sort(glob.glob(f"nsb_LST_[0-9]*_{run_number}.txt")) - + if len(LST_files) == 1: logger.info(f"Run {run_number} already processed") return else: LST_files = np.sort(glob.glob(f"nsb_LST_nsb_*{run_number}*.txt")) - + if len(LST_files) == 1: logger.info(f"Run {run_number} already processed.") return diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/nsb_level.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/nsb_level.py index 5ac6a043..64ffabcd 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/nsb_level.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/nsb_level.py @@ -112,30 +112,27 @@ def main(): print("***** Generating bashscripts...") for i, row in df_LST.iterrows(): - list_v = [eval(i) for i in row["lstchain_versions"].strip("][").split(", ")] - + if str(lstchain_v) not in list_v: continue - - + common_v = [value for value in lstchain_versions if value in list_v] max_common = common_v[-1] - - + if lstchain_v != str(max_common): continue - + run_number = row["LST1_run"] date = row["DATE"] - + df_LST.loc[ i, "processed_lstchain_file" ] = f"/fefs/aswg/data/real/DL1/{date}/{max_common}/tailcut84/dl1_LST-1.Run{run_number}.h5" df_LST.loc[i, "error_code_nsb"] = np.nan - + bash_scripts(run_number, date, args.config_file, env_name) print("Process name: nsb") @@ -164,7 +161,7 @@ def main(): launch_jobs = f"{launch_jobs} && nsb{n}=$(sbatch --parsable {run})" os.system(launch_jobs) - + df_LST.to_hdf( "/fefs/aswg/workspace/elisa.visentin/auto_MCP_PR/observations_LST.h5", key="joint_obs", @@ -175,7 +172,7 @@ def main(): "processed_lstchain_file": 90, }, ) - + if __name__ == "__main__": main() From 50407be71d794ed02cb38bd53acbcd75521f23d3 Mon Sep 17 00:00:00 2001 From: FedericoDevoto757 Date: Fri, 17 May 2024 09:45:14 +0000 Subject: [PATCH 099/236] Bug --- .../semi_automatic_scripts/merging_runs.py | 36 ++++++++----------- 1 file changed, 15 insertions(+), 21 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py index 0060a1c3..3b7dbb43 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py @@ -95,9 +95,9 @@ def split_train_test(target_dir, train_fraction, source_name): range(len(list_of_dir)) ): # tqdm allows us to print a progessbar in the terminal - os.makedirs(f"{proton_dir}/train/{list_of_dir[directory].split('/')[-2]}") + os.makedirs(f"{proton_dir}/train/{list_of_dir[directory].split('/')[-2]}", exist_ok = True) os.makedirs( - f'{proton_dir}/../protons_test/{list_of_dir[directory].split("/")[-2]}' + f'{proton_dir}/../protons_test/{list_of_dir[directory].split("/")[-2]}', exist_ok = True ) list_of_runs = np.sort( glob.glob(f'{proton_dir}/{list_of_dir[directory].split("/")[-2]}/*.h5') @@ -160,7 +160,7 @@ def merge(target_dir, identification, MAGIC_runs, env_name, source, NSB_match): for i in MAGIC_runs: os.makedirs( - f"{MAGIC_DL1_dir}/Merged/{i[0]}/{i[1]}" + f"{MAGIC_DL1_dir}/Merged/{i[0]}/{i[1]}", exist_ok = True ) # Creating a merged directory for the respective run os.system( f'find {MAGIC_DL1_dir}/M1/{i[0]}/{i[1]} -type f -name "*.h5" -size -3k -delete' @@ -183,7 +183,7 @@ def merge(target_dir, identification, MAGIC_runs, env_name, source, NSB_match): f"{MAGIC_DL1_dir}/M2" ): for i in MAGIC_runs: - os.makedirs(f"{MAGIC_DL1_dir}/Merged/{i[0]}/Merged") + os.makedirs(f"{MAGIC_DL1_dir}/Merged/{i[0]}/Merged", exist_ok = True) f.write( f"conda run -n {env_name} merge_hdf_files --input-dir {MAGIC_DL1_dir}/Merged/{i[0]}/{i[1]} --output-dir {MAGIC_DL1_dir}/Merged/{i[0]}/Merged --run-wise >{MAGIC_DL1_dir}/Merged/{i[0]}/Merged/merge_{i[0]}_{[1]}_" + "${SLURM_JOB_ID}.log\n" @@ -191,7 +191,7 @@ def merge(target_dir, identification, MAGIC_runs, env_name, source, NSB_match): else: for i in MAGIC_runs: os.makedirs( - f"{MAGIC_DL1_dir}/Merged/Merged_{i[0]}" + f"{MAGIC_DL1_dir}/Merged/Merged_{i[0]}", exist_ok = True ) # Creating a merged directory for each night f.write( f"conda run -n {env_name} merge_hdf_files --input-dir {MAGIC_DL1_dir}/Merged/{i[0]}/Merged --output-dir {MAGIC_DL1_dir}/Merged/Merged_{i[0]} >{MAGIC_DL1_dir}/Merged/Merged_{i[0]}/merge_night_{i[0]}_" @@ -213,7 +213,6 @@ def merge(target_dir, identification, MAGIC_runs, env_name, source, NSB_match): with open(f"{source}_Merge_MAGIC_{identification}.sh", "w") as f: f.writelines(lines) if identification == "0_subruns": - if os.path.exists(f"{MAGIC_DL1_dir}/M1") & os.path.exists( f"{MAGIC_DL1_dir}/M2" ): @@ -227,9 +226,8 @@ def merge(target_dir, identification, MAGIC_runs, env_name, source, NSB_match): ] for r in runs: - os.makedirs( - f"{MAGIC_DL1_dir}/Merged/{i}/{r}/logs" + f"{MAGIC_DL1_dir}/Merged/{i}/{r}/logs", exist_ok = True ) # Creating a merged directory for the respective run os.system( f'find {MAGIC_DL1_dir}/M1/{i}/{r} -type f -name "*.h5" -size -3k -delete' @@ -251,7 +249,7 @@ def merge(target_dir, identification, MAGIC_runs, env_name, source, NSB_match): for r in runs: os.makedirs( - f"{MAGIC_DL1_dir}/Merged/{i}/{r}/logs" + f"{MAGIC_DL1_dir}/Merged/{i}/{r}/logs", exist_ok = True ) # Creating a merged directory for the respective run os.system( f'find {MAGIC_DL1_dir}/M2/{i}/{r} -type f -name "*.h5" -size -3k -delete' @@ -276,7 +274,7 @@ def merge(target_dir, identification, MAGIC_runs, env_name, source, NSB_match): if (len(glob.glob(f"{MAGIC_DL1_dir}/M1/{i}/{r}")) > 0) and ( len(glob.glob(f"{MAGIC_DL1_dir}/M2/{i}/{r}")) > 0 ): - os.makedirs(f"{MAGIC_DL1_dir}/Merged/{i}/Merged/logs") + os.makedirs(f"{MAGIC_DL1_dir}/Merged/{i}/Merged/logs", exist_ok = True) f.write( f"conda run -n {env_name} merge_hdf_files --input-dir {MAGIC_DL1_dir}/Merged/{i}/{r} --output-dir {MAGIC_DL1_dir}/Merged/{i}/Merged --run-wise >{MAGIC_DL1_dir}/Merged/{i}/Merged/logs/merge_{i}_{r}_" + "${SLURM_JOB_ID}.log \n" @@ -291,16 +289,12 @@ def merge(target_dir, identification, MAGIC_runs, env_name, source, NSB_match): continue - if ( - len(glob.glob(f"{MAGIC_DL1_dir}/Merged/{i}/Merged/*MAGIC*.h5")) - > 0 - ): - - os.makedirs(f"{MAGIC_DL1_dir}/Merged/Merged_{i}/logs") - f.write( - f"conda run -n {env_name} merge_hdf_files --input-dir {MAGIC_DL1_dir}/Merged/{i}/Merged --output-dir {MAGIC_DL1_dir}/Merged/Merged_{i} >{MAGIC_DL1_dir}/Merged/Merged_{i}/logs/merge_night_{i}_" - + "${SLURM_JOB_ID}.log \n" - ) + + os.makedirs(f"{MAGIC_DL1_dir}/Merged/Merged_{i}/logs") + f.write( + f"conda run -n {env_name} merge_hdf_files --input-dir {MAGIC_DL1_dir}/Merged/{i}/Merged --output-dir {MAGIC_DL1_dir}/Merged/Merged_{i} >{MAGIC_DL1_dir}/Merged/Merged_{i}/logs/merge_night_{i}_" + + "${SLURM_JOB_ID}.log \n" + ) def mergeMC(target_dir, identification, env_name, cwd, source_name): @@ -325,7 +319,7 @@ def mergeMC(target_dir, identification, env_name, cwd, source_name): process_name = f"merging_{source_name}" MC_DL1_dir = f"{target_dir}/{source_name}/DL1/MC" - os.makedirs(f"{MC_DL1_dir}/{identification}/Merged") + os.makedirs(f"{MC_DL1_dir}/{identification}/Merged", exist_ok = True) if identification == "protons": list_of_nodes = np.sort(glob.glob(f"{MC_DL1_dir}/{identification}/train/node*")) From 5e357deb525ccde0127c848de8e5a9f9236573a5 Mon Sep 17 00:00:00 2001 From: FedericoDevoto757 Date: Fri, 17 May 2024 10:16:45 +0000 Subject: [PATCH 100/236] Bug --- .../lst1_magic/semi_automatic_scripts/coincident_events.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py index 71ba7c6e..20444fbe 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py @@ -133,12 +133,12 @@ def linking_bash_lst( f"/fefs/aswg/data/real/DL1/{lstObsDir}/{LST_version}/tailcut84" ) - os.makedirs(f"{coincidence_DL1_dir}/DL1Coincident/{lstObsDir}/logs") + os.makedirs(f"{coincidence_DL1_dir}/DL1Coincident/{lstObsDir}/logs", exist_ok=True) outputdir = f"{coincidence_DL1_dir}/DL1Coincident/{lstObsDir}" list_of_subruns = np.sort(glob.glob(f"{inputdir}/dl1*Run*{i[1]}*.*.h5")) - with open(f"{outputdir}/logs/list_LST", "a+") as LSTdataPathFile: + with open(f"{outputdir}/logs/list_LST.txt", "a+") as LSTdataPathFile: for subrun in list_of_subruns: LSTdataPathFile.write(f"{subrun}\n") From 21fbf4c9d5463abfd2e6125dd3fe2df07524ac02 Mon Sep 17 00:00:00 2001 From: FedericoDevoto757 Date: Fri, 17 May 2024 13:03:15 +0000 Subject: [PATCH 101/236] Bug fix --- .../lst1_magic/semi_automatic_scripts/coincident_events.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py index 20444fbe..89643029 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py @@ -157,7 +157,7 @@ def linking_bash_lst( out_name=f"{outputdir}/logs/slurm-%x.%A_%a", ) lines = slurm + [ - f"export INM={MAGIC_DL1_dir}/Merged/Merged_{d}\n", + f"export INM={MAGIC_DL1_dir}/Merged/{d}\n", f"export OUTPUTDIR={outputdir}\n", "SAMPLE_LIST=($(<$OUTPUTDIR/logs/list_LST.txt))\n", "SAMPLE=${SAMPLE_LIST[${SLURM_ARRAY_TASK_ID}]}\n", From a31fb3f3e3769cd48995e1aa48e7a1e4208a22ce Mon Sep 17 00:00:00 2001 From: Julian Sitarek Date: Thu, 23 May 2024 09:56:18 +0000 Subject: [PATCH 102/236] introduced error logging for coincidence script --- .../coincident_events.py | 37 +++++++++++++------ .../semi_automatic_scripts/job_accounting.py | 12 ++++-- 2 files changed, 33 insertions(+), 16 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py index 89643029..0bafd112 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py @@ -26,7 +26,10 @@ from magicctapipe import __version__ from magicctapipe.io import resource_file -from magicctapipe.scripts.lst1_magic.semi_automatic_scripts.clusters import slurm_lines +from magicctapipe.scripts.lst1_magic.semi_automatic_scripts.clusters import ( + rc_lines, + slurm_lines, +) __all__ = ["configfile_coincidence", "linking_bash_lst"] @@ -63,7 +66,7 @@ def configfile_coincidence(ids, target_dir, source_name, NSB_match): conf["event_coincidence"] = coincidence if not NSB_match: - file_name = f"{target_dir}/{source_name}/config_coincidence.yaml" + file_name = f"{target_dir}/v{__version__}/{source_name}/config_coincidence.yaml" else: file_name = f"{target_dir}/v{__version__}/{source_name}/config_coincidence.yaml" with open(file_name, "w") as f: @@ -133,9 +136,9 @@ def linking_bash_lst( f"/fefs/aswg/data/real/DL1/{lstObsDir}/{LST_version}/tailcut84" ) - os.makedirs(f"{coincidence_DL1_dir}/DL1Coincident/{lstObsDir}/logs", exist_ok=True) - outputdir = f"{coincidence_DL1_dir}/DL1Coincident/{lstObsDir}" + os.makedirs(f"{outputdir}/logs", exist_ok=True) + list_of_subruns = np.sort(glob.glob(f"{inputdir}/dl1*Run*{i[1]}*.*.h5")) with open(f"{outputdir}/logs/list_LST.txt", "a+") as LSTdataPathFile: @@ -156,14 +159,23 @@ def linking_bash_lst( mem="8g", out_name=f"{outputdir}/logs/slurm-%x.%A_%a", ) - lines = slurm + [ - f"export INM={MAGIC_DL1_dir}/Merged/{d}\n", - f"export OUTPUTDIR={outputdir}\n", - "SAMPLE_LIST=($(<$OUTPUTDIR/logs/list_LST.txt))\n", - "SAMPLE=${SAMPLE_LIST[${SLURM_ARRAY_TASK_ID}]}\n", - "export LOG=$OUTPUTDIR/logs/coincidence_${SLURM_ARRAY_JOB_ID}_${SLURM_ARRAY_TASK_ID}.log\n", - f"conda run -n {env_name} lst1_magic_event_coincidence --input-file-lst $SAMPLE --input-dir-magic $INM --output-dir $OUTPUTDIR --config-file {target_dir}/v{__version__}/{source_name}/config_coincidence.yaml >$LOG 2>&1", - ] + rc = rc_lines( + store="$SAMPLE ${SLURM_ARRAY_JOB_ID} ${SLURM_ARRAY_TASK_ID}", + out="$OUTPUTDIR/logs/list", + ) + + lines = ( + slurm + + [ + f"export INM={MAGIC_DL1_dir}/Merged/{d}\n", + f"export OUTPUTDIR={outputdir}\n", + "SAMPLE_LIST=($(<$OUTPUTDIR/logs/list_LST.txt))\n", + "SAMPLE=${SAMPLE_LIST[${SLURM_ARRAY_TASK_ID}]}\n", + "export LOG=$OUTPUTDIR/logs/coincidence_${SLURM_ARRAY_JOB_ID}_${SLURM_ARRAY_TASK_ID}.log\n", + f"conda run -n {env_name} lst1_magic_event_coincidence --input-file-lst $SAMPLE --input-dir-magic $INM --output-dir $OUTPUTDIR --config-file {target_dir}/v{__version__}/{source_name}/config_coincidence.yaml >$LOG 2>&1\n", + ] + + rc + ) with open( f"{source_name}_LST_coincident_{outputdir.split('/')[-1]}.sh", "w", @@ -249,6 +261,7 @@ def main(): " && " if n > 0 else "" ) + f"coincidence{n}=$(sbatch --parsable {run})" + # print (launch_jobs) os.system(launch_jobs) except OSError as exc: diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/job_accounting.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/job_accounting.py index 9b6df96b..600bb1c4 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/job_accounting.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/job_accounting.py @@ -4,6 +4,7 @@ """ import argparse import glob +import os from datetime import timedelta from subprocess import PIPE, run @@ -92,6 +93,7 @@ def main(): dirs = sorted( glob.glob(f"{work_dir}/v{args.version}/*/{args.data_level}/[0-9]*/[M0-9]*") + glob.glob(f"{work_dir}/v{args.version}/*/{args.data_level}/Merged_[0-9]*") + + glob.glob(f"{work_dir}/v{args.version}/*/{args.data_level}/" + "[0-9]" * 8) ) if dirs == []: versions = [x.split("/v")[-1] for x in glob.glob(f"{work_dir}/v*")] @@ -111,12 +113,14 @@ def main(): all_jobs = [] for dir in dirs: print(dir) - # fixme list_dl0.txt is only available for DL1/M[12] processing - list_dl0 = f"{dir}/logs/list_dl0.txt" - try: + list_dl0 = "" + for file in ["list_dl0.txt", "list_LST.txt"]: + if os.path.exists(f"{dir}/logs/{file}"): + list_dl0 = f"{dir}/logs/{file}" + if list_dl0 != "": with open(list_dl0, "r") as fp: this_todo = len(fp.readlines()) - except IOError: + else: print(f"{RED}File {list_dl0} is missing{ENDC}") this_todo = 0 From ddfa1bc922ec09a5ec087b95e11433145a7bfee1 Mon Sep 17 00:00:00 2001 From: Julian Sitarek Date: Thu, 23 May 2024 09:58:18 +0000 Subject: [PATCH 103/236] removed forgotten print --- .../lst1_magic/semi_automatic_scripts/coincident_events.py | 1 - 1 file changed, 1 deletion(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py index 0bafd112..4a030b31 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py @@ -261,7 +261,6 @@ def main(): " && " if n > 0 else "" ) + f"coincidence{n}=$(sbatch --parsable {run})" - # print (launch_jobs) os.system(launch_jobs) except OSError as exc: From f52e02430f92e841b0267e7bc2a82694bc6582fe Mon Sep 17 00:00:00 2001 From: Julian Sitarek Date: Thu, 23 May 2024 10:52:29 +0000 Subject: [PATCH 104/236] error logging for stereo reconstruction script --- .../semi_automatic_scripts/job_accounting.py | 9 +- .../semi_automatic_scripts/stereo_events.py | 93 +++++++++---------- 2 files changed, 49 insertions(+), 53 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/job_accounting.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/job_accounting.py index 600bb1c4..e8a011b6 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/job_accounting.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/job_accounting.py @@ -101,7 +101,9 @@ def main(): print(f"for path {work_dir} found in {args.config_file} this is available") print(f"Versions {versions}") tag = "" if NSB_matching else "/Observations" - print(f"Supported data types: DL1{tag}/M1, DL1{tag}/M2, DL1{tag}/Merged") + print( + f"Supported data types: DL1{tag}/M1, DL1{tag}/M2, DL1{tag}/Merged, DL1Coincident, DL1CoincidentStereo" + ) exit(1) all_todo = 0 @@ -114,14 +116,15 @@ def main(): for dir in dirs: print(dir) list_dl0 = "" - for file in ["list_dl0.txt", "list_LST.txt"]: + ins = ["list_dl0.txt", "list_LST.txt", "list_coin.txt"] + for file in ins: if os.path.exists(f"{dir}/logs/{file}"): list_dl0 = f"{dir}/logs/{file}" if list_dl0 != "": with open(list_dl0, "r") as fp: this_todo = len(fp.readlines()) else: - print(f"{RED}File {list_dl0} is missing{ENDC}") + print(f"{RED}No {ins} files {ENDC}") this_todo = 0 list_return = f"{dir}/logs/list_return.log" diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py index b3b6601c..79bc76fd 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py @@ -28,6 +28,10 @@ from magicctapipe import __version__ from magicctapipe.io import resource_file +from magicctapipe.scripts.lst1_magic.semi_automatic_scripts.clusters import ( + rc_lines, + slurm_lines, +) __all__ = ["configfile_stereo", "bash_stereo", "bash_stereoMC"] @@ -58,15 +62,9 @@ def configfile_stereo(ids, target_dir, source_name, NSB_match): config_file, "rb" ) as fc: # "rb" mode opens the file in binary format for reading config_dict = yaml.safe_load(fc) - coincidence = config_dict["stereo_reco"] + conf = {"stereo_reco": config_dict["stereo_reco"]} - conf = {} - conf["stereo_reco"] = coincidence - - if not NSB_match: - file_name = f"{target_dir}/{source_name}/config_stereo.yaml" - else: - file_name = f"{target_dir}/v{__version__}/{source_name}/config_stereo.yaml" + file_name = f"{target_dir}/v{__version__}/{source_name}/config_stereo.yaml" with open(file_name, "w") as f: lines = [ "mc_tel_ids:", @@ -103,9 +101,8 @@ def bash_stereo(target_dir, source, env_name, NSB_match): if NSB_match: coincidence_DL1_dir = f"{target_dir}/v{__version__}/{source}" - else: - coincidence_DL1_dir = f"{target_dir}/{source}/DL1/Observations" + coincidence_DL1_dir = f"{target_dir}/v{__version__}/{source}/DL1/Observations" listOfNightsLST = np.sort(glob.glob(f"{coincidence_DL1_dir}/DL1Coincident/*")) @@ -113,7 +110,7 @@ def bash_stereo(target_dir, source, env_name, NSB_match): stereoDir = ( f"{coincidence_DL1_dir}/DL1CoincidentStereo/{nightLST.split('/')[-1]}" ) - os.makedirs(f"{stereoDir}/logs") + os.makedirs(f"{stereoDir}/logs", exist_ok=True) if not os.listdir(f"{nightLST}"): continue if len(os.listdir(nightLST)) < 2: @@ -127,24 +124,29 @@ def bash_stereo(target_dir, source, env_name, NSB_match): if process_size < 0: continue - lines = [ - "#!/bin/sh\n\n", - "#SBATCH -p short\n", - f"#SBATCH -J {process_name}_stereo\n", - f"#SBATCH --array=0-{process_size}\n", - "#SBATCH -n 1\n\n", - f"#SBATCH --output={stereoDir}/logs/slurm-%x.%A_%a.out" - f"#SBATCH --error={stereoDir}/logs/slurm-%x.%A_%a.err" - "ulimit -l unlimited\n", - "ulimit -s unlimited\n", - "ulimit -a\n\n", - f"export INPUTDIR={nightLST}\n", - f"export OUTPUTDIR={stereoDir}\n", - "SAMPLE_LIST=($(<$OUTPUTDIR/logs/list_coin.txt))\n", - "SAMPLE=${SAMPLE_LIST[${SLURM_ARRAY_TASK_ID}]}\n", - "export LOG=$OUTPUTDIR/logs/stereo_${SLURM_ARRAY_JOB_ID}_${SLURM_ARRAY_TASK_ID}.log\n", - f"conda run -n {env_name} lst1_magic_stereo_reco --input-file $SAMPLE --output-dir $OUTPUTDIR --config-file {target_dir}/v{__version__}/{source}/config_stereo.yaml >$LOG 2>&1", - ] + + slurm = slurm_lines( + queue="short", + job_name=f"{process_name}_stereo", + array=process_size, + out_name=f"{stereoDir}/logs/slurm-%x.%A_%a", + ) + rc = rc_lines( + store="$SAMPLE ${SLURM_ARRAY_JOB_ID} ${SLURM_ARRAY_TASK_ID}", + out="$OUTPUTDIR/logs/list", + ) + lines = ( + slurm + + [ + f"export INPUTDIR={nightLST}\n", + f"export OUTPUTDIR={stereoDir}\n", + "SAMPLE_LIST=($(<$OUTPUTDIR/logs/list_coin.txt))\n", + "SAMPLE=${SAMPLE_LIST[${SLURM_ARRAY_TASK_ID}]}\n", + "export LOG=$OUTPUTDIR/logs/stereo_${SLURM_ARRAY_JOB_ID}_${SLURM_ARRAY_TASK_ID}.log\n", + f"conda run -n {env_name} lst1_magic_stereo_reco --input-file $SAMPLE --output-dir $OUTPUTDIR --config-file {target_dir}/v{__version__}/{source}/config_stereo.yaml >$LOG 2>&1\n", + ] + + rc + ) with open(f"{source}_StereoEvents_{nightLST.split('/')[-1]}.sh", "w") as f: f.writelines(lines) @@ -168,9 +170,8 @@ def bash_stereoMC(target_dir, identification, env_name, source): process_name = source - os.makedirs(f"{target_dir}/{source}/DL1/MC/{identification}/Merged/StereoMerged") - inputdir = f"{target_dir}/{source}/DL1/MC/{identification}/Merged" + os.makedirs(f"{inputdir}/StereoMerged", exist_ok=True) os.system( f"ls {inputdir}/dl1*.h5 > {inputdir}/list_coin.txt" @@ -179,18 +180,14 @@ def bash_stereoMC(target_dir, identification, env_name, source): process_size = len(f.readlines()) - 1 with open(f"StereoEvents_MC_{identification}.sh", "w") as f: - lines = [ - "#!/bin/sh\n\n", - "#SBATCH -p xxl\n", - f"#SBATCH -J {process_name}_stereo\n", - f"#SBATCH --array=0-{process_size}%100\n", - "#SBATCH --mem=8g\n", - "#SBATCH -n 1\n\n", - f"#SBATCH --output={inputdir}/StereoMerged/logs/slurm-%x.%A_%a.out" - f"#SBATCH --error={inputdir}/StereoMerged/logs/slurm-%x.%A_%a.err" - "ulimit -l unlimited\n", - "ulimit -s unlimited\n", - "ulimit -a\n\n", + slurm = slurm_lines( + queue="xxl", + job_name=f"{process_name}_stereo", + array=f"{process_size}%100", + mem="8g", + out_name=f"{inputdir}/StereoMerged/logs/slurm-%x.%A_%a", + ) + lines = slurm + [ f"export INPUTDIR={inputdir}\n", f"export OUTPUTDIR={inputdir}/StereoMerged\n", "SAMPLE_LIST=($(<$INPUTDIR/list_coin.txt))\n", @@ -242,12 +239,10 @@ def main(): source_in = config["data_selection"]["source_name_database"] source = config["data_selection"]["source_name_output"] - source_list = [] if source_in is None: source_list = joblib.load("list_sources.dat") - else: - source_list.append(source) + source_list = [source] for source_name in source_list: print("***** Generating file config_stereo.yaml...") @@ -260,10 +255,8 @@ def main(): and not NSB_match ): print("***** Generating the bashscript for MCs...") - bash_stereoMC(target_dir, "gammadiffuse", env_name, source_name) - bash_stereoMC(target_dir, "gammas", env_name, source_name) - bash_stereoMC(target_dir, "protons", env_name, source_name) - bash_stereoMC(target_dir, "protons_test", env_name, source_name) + for part in ["gammadiffuse", "gammas", "protons", "protons_test"]: + bash_stereoMC(target_dir, part, env_name, source_name) list_of_stereo_scripts = np.sort(glob.glob("StereoEvents_MC_*.sh")) From d170ea9705e1f93ace7686fafbccaba7d4ba1c46 Mon Sep 17 00:00:00 2001 From: Julian Sitarek Date: Thu, 23 May 2024 15:51:05 +0000 Subject: [PATCH 105/236] changed name of output directory from DL1CoincidentStereo to DL1Stereo --- .../lst1_magic/semi_automatic_scripts/job_accounting.py | 2 +- .../lst1_magic/semi_automatic_scripts/stereo_events.py | 4 +--- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/job_accounting.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/job_accounting.py index e8a011b6..deaa97a7 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/job_accounting.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/job_accounting.py @@ -102,7 +102,7 @@ def main(): print(f"Versions {versions}") tag = "" if NSB_matching else "/Observations" print( - f"Supported data types: DL1{tag}/M1, DL1{tag}/M2, DL1{tag}/Merged, DL1Coincident, DL1CoincidentStereo" + f"Supported data types: DL1{tag}/M1, DL1{tag}/M2, DL1{tag}/Merged, DL1Coincident, DL1Stereo" ) exit(1) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py index 79bc76fd..1dd07e75 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py @@ -107,9 +107,7 @@ def bash_stereo(target_dir, source, env_name, NSB_match): listOfNightsLST = np.sort(glob.glob(f"{coincidence_DL1_dir}/DL1Coincident/*")) for nightLST in listOfNightsLST: - stereoDir = ( - f"{coincidence_DL1_dir}/DL1CoincidentStereo/{nightLST.split('/')[-1]}" - ) + stereoDir = f"{coincidence_DL1_dir}/DL1Stereo/{nightLST.split('/')[-1]}" os.makedirs(f"{stereoDir}/logs", exist_ok=True) if not os.listdir(f"{nightLST}"): continue From c1059e26cf26bc31436727b74c4c6e1c52a824fc Mon Sep 17 00:00:00 2001 From: Elisa-Visentin Date: Fri, 24 May 2024 07:45:47 +0000 Subject: [PATCH 106/236] fixed nsb --- .../lstchain_standard_config_modified.json | 289 ++++++++++++++++++ .../config_general.yaml | 3 +- .../database_production/LSTnsb.py | 36 +-- .../database_production/nsb_level.py | 55 ++-- 4 files changed, 335 insertions(+), 48 deletions(-) create mode 100644 magicctapipe/resources/lstchain_standard_config_modified.json diff --git a/magicctapipe/resources/lstchain_standard_config_modified.json b/magicctapipe/resources/lstchain_standard_config_modified.json new file mode 100644 index 00000000..7046f91c --- /dev/null +++ b/magicctapipe/resources/lstchain_standard_config_modified.json @@ -0,0 +1,289 @@ +{ + "source_config" : { + "EventSource": { + "allowed_tels": [1], + "max_events": null + }, + "LSTEventSource": { + "default_trigger_type": "ucts", + "allowed_tels": [1], + "min_flatfield_adc": 3000, + "min_flatfield_pixel_fraction": 0.8, + "calibrate_flatfields_and_pedestals": false, + "use_flatfield_heuristic": true, + "EventTimeCalculator": { + "dragon_reference_counter": null, + "dragon_reference_time": null + }, + "PointingSource":{ + "drive_report_path": null + }, + "LSTR0Corrections":{ + "calib_scale_high_gain":1.088, + "calib_scale_low_gain":1.004, + "drs4_pedestal_path": null, + "calibration_path": null, + "drs4_time_calibration_path": null + } + } + }, + + "events_filters": { + "intensity": [0, Infinity], + "width": [0, Infinity], + "length": [0, Infinity], + "wl": [0, Infinity], + "r": [0, Infinity], + "leakage_intensity_width_2": [0, Infinity] + }, + "n_training_events": { + "gamma_regressors": 1.0, + "gamma_tmp_regressors": 0.8, + "gamma_classifier": 0.2, + "proton_classifier": 1.0 + }, + + "tailcut": { + "picture_thresh":8, + "boundary_thresh":4, + "keep_isolated_pixels":false, + "min_number_picture_neighbors":2, + "use_only_main_island":false, + "delta_time": 2 + }, + "tailcuts_clean_with_pedestal_threshold": { + "picture_thresh":8, + "boundary_thresh":4, + "sigma":2.5, + "keep_isolated_pixels":false, + "min_number_picture_neighbors":2, + "use_only_main_island":false, + "delta_time": 2 + }, + "dynamic_cleaning": { + "apply": true, + "threshold": 267, + "fraction_cleaning_intensity": 0.03 + }, + + "random_forest_energy_regressor_args": { + "max_depth": 30, + "min_samples_leaf": 10, + "n_jobs": -1, + "n_estimators": 150, + "bootstrap": true, + "criterion": "squared_error", + "max_features": "auto", + "max_leaf_nodes": null, + "min_impurity_decrease": 0.0, + "min_samples_split": 10, + "min_weight_fraction_leaf": 0.0, + "oob_score": false, + "random_state": 42, + "warm_start": false + }, + + "random_forest_disp_regressor_args": { + "max_depth": 30, + "min_samples_leaf": 10, + "n_jobs": -1, + "n_estimators": 150, + "bootstrap": true, + "criterion": "squared_error", + "max_features": "auto", + "max_leaf_nodes": null, + "min_impurity_decrease": 0.0, + "min_samples_split": 10, + "min_weight_fraction_leaf": 0.0, + "oob_score": false, + "random_state": 42, + "warm_start": false + }, + + "random_forest_disp_classifier_args": { + "max_depth": 30, + "min_samples_leaf": 10, + "n_jobs": -1, + "n_estimators": 100, + "criterion": "gini", + "min_samples_split": 10, + "min_weight_fraction_leaf": 0.0, + "max_features": "auto", + "max_leaf_nodes": null, + "min_impurity_decrease": 0.0, + "bootstrap": true, + "oob_score": false, + "random_state": 42, + "warm_start": false, + "class_weight": null + }, + + "random_forest_particle_classifier_args": { + "max_depth": 30, + "min_samples_leaf": 10, + "n_jobs": -1, + "n_estimators": 100, + "criterion": "gini", + "min_samples_split": 10, + "min_weight_fraction_leaf": 0.0, + "max_features": "auto", + "max_leaf_nodes": null, + "min_impurity_decrease": 0.0, + "bootstrap": true, + "oob_score": false, + "random_state": 42, + "warm_start": false, + "class_weight": null + }, + + + "energy_regression_features": [ + "log_intensity", + "width", + "length", + "x", + "y", + "wl", + "skewness", + "kurtosis", + "time_gradient", + "leakage_intensity_width_2", + "sin_az_tel", + "alt_tel" + ], + + "disp_method": "disp_norm_sign", + + "disp_regression_features": [ + "log_intensity", + "width", + "length", + "wl", + "skewness", + "kurtosis", + "time_gradient", + "leakage_intensity_width_2", + "sin_az_tel", + "alt_tel" + ], + + "disp_classification_features": [ + "log_intensity", + "width", + "length", + "wl", + "skewness", + "kurtosis", + "time_gradient", + "leakage_intensity_width_2", + "sin_az_tel", + "alt_tel" + ], + + "particle_classification_features": [ + "log_intensity", + "width", + "length", + "x", + "y", + "wl", + "signed_skewness", + "kurtosis", + "signed_time_gradient", + "leakage_intensity_width_2", + "log_reco_energy", + "reco_disp_norm", + "reco_disp_sign", + "sin_az_tel", + "alt_tel" + ], + + "allowed_tels": [1], + "write_pe_image": false, + "mc_image_scaling_factor": 1, + "image_extractor": "LocalPeakWindowSum", + "image_extractor_for_muons": "GlobalPeakWindowSum", + "CameraCalibrator": { + "apply_waveform_time_shift": false + }, + "time_sampling_correction_path": "default", + "LocalPeakWindowSum":{ + "window_shift": 4, + "window_width": 8, + "apply_integration_correction": false + }, + "GlobalPeakWindowSum":{ + "window_shift": 4, + "window_width": 8, + "apply_integration_correction": false + }, + "timestamps_pointing":"ucts", + + "train_gamma_src_r_deg": [0, Infinity], + + "source_dependent": false, + "mc_nominal_source_x_deg": 0.4, + "mc_nominal_source_y_deg": 0.0, + + "volume_reducer":{ + "algorithm": null, + "parameters": { + } + }, + "calibration_product": "LSTCalibrationCalculator", + + "LSTCalibrationCalculator":{ + "systematic_correction_path": null, + "npe_median_cut_outliers": [-5,5], + "squared_excess_noise_factor": 1.222, + "flatfield_product": "FlasherFlatFieldCalculator", + "pedestal_product": "PedestalIntegrator", + "PedestalIntegrator":{ + "sample_size": 10000, + "sample_duration":100000, + "tel_id":1, + "time_sampling_correction_path": null, + "charge_median_cut_outliers": [-10,10], + "charge_std_cut_outliers": [-10,10], + "charge_product":"FixedWindowSum", + "FixedWindowSum":{ + "window_shift": 6, + "window_width":12, + "peak_index": 18, + "apply_integration_correction": false + } + }, + "FlasherFlatFieldCalculator":{ + "sample_size": 10000, + "sample_duration":100000, + "tel_id":1, + "time_sampling_correction_path": null, + "charge_product":"LocalPeakWindowSum", + "charge_median_cut_outliers": [-0.9,2], + "charge_std_cut_outliers": [-10,10], + "time_cut_outliers": [2,38], + "LocalPeakWindowSum":{ + "window_shift": 5, + "window_width":12, + "apply_integration_correction": false + } + } + }, + "waveform_nsb_tuning":{ + "nsb_tuning": false, + "nsb_tuning_ratio": 0.52, + "spe_location": "lstchain/data/SinglePhE_ResponseInPhE_expo2Gaus.dat" + }, + "write_interleaved_events":{ + "DataWriter": { + "overwrite": true, + "write_images": false, + "write_parameters": false, + "write_waveforms": true, + "transform_waveform": true, + "waveform_dtype": "uint16", + "waveform_offset": 400, + "waveform_scale": 80 + } + } +} diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/config_general.yaml b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/config_general.yaml index 5038100e..864a9c7f 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/config_general.yaml +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/config_general.yaml @@ -32,7 +32,8 @@ general: LST_version : "v0.10" LST_tailcut : "tailcut84" focal_length : "effective" - lstchain_modified_config : true + simtel_nsb : "/fefs/aswg/data/mc/DL0/LSTProd2/TestDataset/sim_telarray/node_theta_14.984_az_355.158_/output_v1.4/simtel_corsika_theta_14.984_az_355.158_run10.simtel.gz" # simtel file (DL0) to evaluate NSB + lstchain_modified_config : true # use_flatfield_heuristic = True to evaluate NSB proton_train_fraction : 0.8 # 0.8 means that 80% of the DL1 protons will be used for training the Random Forest. nsb : [0.5, 1.0, 1.5, 2.0, 2.5, 3.0] env_name : magic-lst # name of the conda environment to be used to process data. diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/LSTnsb.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/LSTnsb.py index 85181279..8e78fdb1 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/LSTnsb.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/LSTnsb.py @@ -116,11 +116,10 @@ def main(): args.config_file, "rb" ) as f: # "rb" mode opens the file in binary format for reading config = yaml.safe_load(f) - NSB_match = config["general"]["NSB_matching"] run_number = args.run date = args.day denominator = args.denominator - simtel = "/fefs/aswg/data/mc/DL0/LSTProd2/TestDataset/sim_telarray/node_theta_14.984_az_355.158_/output_v1.4/simtel_corsika_theta_14.984_az_355.158_run10.simtel.gz" + simtel = config["general"]["simtel_nsb"] nsb_list = config["general"]["nsb"] lst_version = config["general"]["LST_version"] @@ -138,18 +137,13 @@ def main(): if lstchain_modified: lst_config = resource_file("lstchain_standard_config_modified.json") print(lst_config) - if NSB_match: - LST_files = np.sort(glob.glob(f"nsb_LST_[0-9]*_{run_number}.txt")) - if len(LST_files) == 1: - logger.info(f"Run {run_number} already processed") - return - else: - LST_files = np.sort(glob.glob(f"nsb_LST_nsb_*{run_number}*.txt")) + LST_files = np.sort(glob.glob(f"nsb_LST_*_{run_number}.txt")) if len(LST_files) == 1: - logger.info(f"Run {run_number} already processed.") + logger.info(f"Run {run_number} already processed") return + # date_lst = date.split("_")[0] + date.split("_")[1] + date.split("_")[2] inputdir = f"/fefs/aswg/data/real/DL1/{date}/{lst_version}/{lst_tailcut}" @@ -162,18 +156,16 @@ def main(): return median_NSB = np.median(noise) logger.info(f"Run n. {run_number}, nsb median {median_NSB}") - if NSB_match: - for j in range(0, len(nsb_list)): - if (median_NSB < nsb_limit[j + 1]) & (median_NSB > nsb_limit[j]): - with open(f"nsb_LST_{nsb_list[j]}_{run_number}.txt", "a+") as f: - f.write(f"{date},{run_number},{median_NSB}\n") - if median_NSB > nsb_limit[-1]: - with open(f"nsb_LST_high_{run_number}.txt", "a+") as f: - f.write(f"{date},{run_number},{median_NSB}\n") - - else: - with open(f"nsb_LST_nsb_{run_number}.txt", "a+") as f: - f.write(f"{median_NSB}\n") + + for j in range(0, len(nsb_list)): + if (median_NSB < nsb_limit[j + 1]) & (median_NSB > nsb_limit[j]): + with open(f"nsb_LST_{nsb_list[j]}_{run_number}.txt", "a+") as f: + f.write(f"{date},{run_number},{median_NSB}\n") + if median_NSB > nsb_limit[-1]: + with open(f"nsb_LST_high_{run_number}.txt", "a+") as f: + f.write(f"{date},{run_number},{median_NSB}\n") + + if __name__ == "__main__": diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/nsb_level.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/nsb_level.py index 64ffabcd..590f6205 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/nsb_level.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/nsb_level.py @@ -23,7 +23,7 @@ logger.setLevel(logging.INFO) -def bash_scripts(run, date, config, env_name): +def bash_scripts(run, date, config, env_name, cluster): """Here we create the bash scripts (one per LST run) @@ -39,23 +39,25 @@ def bash_scripts(run, date, config, env_name): env_name : str Name of the environment """ - - lines = [ - "#!/bin/sh\n\n", - "#SBATCH -p long\n", - "#SBATCH -J nsb\n", - "#SBATCH -n 1\n\n", - f"#SBATCH --output=slurm-nsb_{run}-%x.%j.out\n" - f"#SBATCH --error=slurm-nsb_{run}-%x.%j.err\n" - "ulimit -l unlimited\n", - "ulimit -s unlimited\n", - "ulimit -a\n\n", - f"conda run -n {env_name} LSTnsb -c {config} -i {run} -d {date} > nsblog_{date}_{run}_" - + "${SLURM_JOB_ID}.log 2>&1 \n\n", - ] - with open(f"nsb_{date}_run_{run}.sh", "w") as f: - f.writelines(lines) - + if cluster == 'SLURM': + lines = [ + "#!/bin/sh\n\n", + "#SBATCH -p long\n", + "#SBATCH -J nsb\n", + "#SBATCH -n 1\n\n", + f"#SBATCH --output=slurm-nsb_{run}-%x.%j.out\n" + f"#SBATCH --error=slurm-nsb_{run}-%x.%j.err\n" + "ulimit -l unlimited\n", + "ulimit -s unlimited\n", + "ulimit -a\n\n", + f"conda run -n {env_name} LSTnsb -c {config} -i {run} -d {date} > nsblog_{date}_{run}_" + + "${SLURM_JOB_ID}.log 2>&1 \n\n", + ] + with open(f"nsb_{date}_run_{run}.sh", "w") as f: + f.writelines(lines) + else: + logger.warning('Automatic processing not implemented for the cluster indicated in the config file') + return def main(): @@ -93,6 +95,9 @@ def main(): config = yaml.safe_load(f) env_name = config["general"]["env_name"] + + cluster = config["general"]["cluster"] + df_LST = pd.read_hdf( "/fefs/aswg/workspace/elisa.visentin/auto_MCP_PR/observations_LST.h5", @@ -133,15 +138,15 @@ def main(): ] = f"/fefs/aswg/data/real/DL1/{date}/{max_common}/tailcut84/dl1_LST-1.Run{run_number}.h5" df_LST.loc[i, "error_code_nsb"] = np.nan - bash_scripts(run_number, date, args.config_file, env_name) + bash_scripts(run_number, date, args.config_file, env_name, cluster) print("Process name: nsb") print("To check the jobs submitted to the cluster, type: squeue -n nsb") list_of_bash_scripts = np.sort(glob.glob("nsb_*_run_*.sh")) if len(list_of_bash_scripts) < 1: - print( - "Warning: no bash script has been produced to evaluate the NSB level for the provided LST runs. Please check the input list" + logger.warning( + "No bash script has been produced to evaluate the NSB level for the provided LST runs. Please check the input list" ) return print("Update database and launch jobs") @@ -154,11 +159,11 @@ def main(): ) df_LST = df_LST.sort_values(by=["DATE", "source", "LST1_run"]) + launch_jobs = "" for n, run in enumerate(list_of_bash_scripts): - if n == 0: - launch_jobs = f"nsb{n}=$(sbatch --parsable {run})" - else: - launch_jobs = f"{launch_jobs} && nsb{n}=$(sbatch --parsable {run})" + launch_jobs += ( + " && " if n > 0 else "" + ) + f"nsb{n}=$(sbatch --parsable {run})" os.system(launch_jobs) From 9c0561ed6bb07f00f218e9701909d77948c3f5cd Mon Sep 17 00:00:00 2001 From: joannaWojS <167758973+joannaWojS@users.noreply.github.com> Date: Fri, 24 May 2024 15:54:04 +0200 Subject: [PATCH 107/236] Update check_MAGIC_runs.py --- .../semi_automatic_scripts/check_MAGIC_runs.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/check_MAGIC_runs.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/check_MAGIC_runs.py index e4d3f8f5..0e2cb261 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/check_MAGIC_runs.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/check_MAGIC_runs.py @@ -34,18 +34,22 @@ def table_first_last_run(df): return(result) -def check_run_ID(path, filename, first_run, last_run, date, source): - Y = f'_Y_{source}' - #'Y' because we have to be sure that the function counts right filename. +def check_run_ID(path, filename, first_run, last_run, date, source, tel_id): - if Y in filename: + #We have to be sure that the function counts right filename. + date_obs = filename.split("_")[0] + run = filename.split("_")[2].split(".")[0] + subrun = filename.split("_")[2].split(".")[1] + Y = f'{date_obs}_M{tel_id}_{run}.{subrun}_Y_{source}' + r = f".root" + + if Y and r in filename: # Extract run_ids from filename and check range run_ids = [int(filename.split("_")[2].split(".")[0])] magic_runs = [] for id in run_ids: if first_run <= id <= last_run: - matched = True magic_runs.append(f"{date} \t {source} \t {id}") #print(f"{date} \t {source} \t {id}") return magic_runs @@ -72,7 +76,7 @@ def check_directory(date, source, first_run, last_run, tel_id): for filename in files: if source in filename: count_with_source += 1 - results = check_run_ID(path, filename, first_run, last_run, date, source) + results = check_run_ID(path, filename, first_run, last_run, date, source, tel_id) #We will see many results because a file with a run ID has subruns. #We must count the same results to get information how many subruns we have. for result in results: @@ -85,6 +89,8 @@ def check_directory(date, source, first_run, last_run, tel_id): #Between 2022/09/04 - 2022/12/14 MAGIC 1 had a failure. Therefore we have to skip the range when we want to get information about missing files. if(date<'20220904' or date>'20221214'): print(f"No files found containing the source '{source}' on {date}, (M{tel_id})") + else: + print(f"M1 failure. No files found containing the source '{source}' on {date}.") if(tel_id == 2): print(f"No files found containing the source '{source}' on {date}, (M{tel_id})") From b12ba943d6ce3b6154efbbe3698a297ab1e77c84 Mon Sep 17 00:00:00 2001 From: Elisa-Visentin Date: Mon, 27 May 2024 07:40:04 +0000 Subject: [PATCH 108/236] cluster type --- .../coincident_events.py | 134 +++++----- .../database_production/LSTnsb.py | 9 +- .../semi_automatic_scripts/merging_runs.py | 171 ++++++------- .../setting_up_config_and_dir.py | 235 +++++++++--------- 4 files changed, 284 insertions(+), 265 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py index 4a030b31..1d7bdede 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py @@ -85,7 +85,7 @@ def configfile_coincidence(ids, target_dir, source_name, NSB_match): def linking_bash_lst( - target_dir, LST_runs, source_name, LST_version, env_name, NSB_match + target_dir, LST_runs, source_name, LST_version, env_name, NSB_match, cluster ): """ @@ -118,70 +118,72 @@ def linking_bash_lst( MAGIC_DL1_dir = f"{target_dir}/v{__version__}/{source_name}/DL1/Observations/" dates = [os.path.basename(x) for x in glob.glob(f"{MAGIC_DL1_dir}/Merged/Merged_*")] - - for d in dates: - Y_M, M_M, D_M = [int(x) for x in d.split("_")[1:]] - - day_MAGIC = dtdt(Y_M, M_M, D_M) - - delta = timedelta(days=1) - for i in LST_runs: - Y_L, M_L, D_L = [int(x) for x in i[0].split("_")] - - day_LST = dtdt(int(Y_L), int(M_L), int(D_L)) - if day_MAGIC == day_LST + delta: - - lstObsDir = i[0].replace("_", "") - inputdir = ( - f"/fefs/aswg/data/real/DL1/{lstObsDir}/{LST_version}/tailcut84" - ) - - outputdir = f"{coincidence_DL1_dir}/DL1Coincident/{lstObsDir}" - os.makedirs(f"{outputdir}/logs", exist_ok=True) - - list_of_subruns = np.sort(glob.glob(f"{inputdir}/dl1*Run*{i[1]}*.*.h5")) - - with open(f"{outputdir}/logs/list_LST.txt", "a+") as LSTdataPathFile: - for subrun in list_of_subruns: - LSTdataPathFile.write(f"{subrun}\n") - - if not os.path.exists(f"{outputdir}/logs/list_LST.txt"): - continue - with open(f"{outputdir}/logs/list_LST.txt", "r") as f: - process_size = len(f.readlines()) - 1 - - if process_size < 0: - continue - slurm = slurm_lines( - queue="short", - job_name=f"{source_name}_coincidence", - array=process_size, - mem="8g", - out_name=f"{outputdir}/logs/slurm-%x.%A_%a", - ) - rc = rc_lines( - store="$SAMPLE ${SLURM_ARRAY_JOB_ID} ${SLURM_ARRAY_TASK_ID}", - out="$OUTPUTDIR/logs/list", - ) - - lines = ( - slurm - + [ - f"export INM={MAGIC_DL1_dir}/Merged/{d}\n", - f"export OUTPUTDIR={outputdir}\n", - "SAMPLE_LIST=($(<$OUTPUTDIR/logs/list_LST.txt))\n", - "SAMPLE=${SAMPLE_LIST[${SLURM_ARRAY_TASK_ID}]}\n", - "export LOG=$OUTPUTDIR/logs/coincidence_${SLURM_ARRAY_JOB_ID}_${SLURM_ARRAY_TASK_ID}.log\n", - f"conda run -n {env_name} lst1_magic_event_coincidence --input-file-lst $SAMPLE --input-dir-magic $INM --output-dir $OUTPUTDIR --config-file {target_dir}/v{__version__}/{source_name}/config_coincidence.yaml >$LOG 2>&1\n", - ] - + rc - ) - with open( - f"{source_name}_LST_coincident_{outputdir.split('/')[-1]}.sh", - "w", - ) as f: - f.writelines(lines) - + if cluster == 'SLURM': + for d in dates: + Y_M, M_M, D_M = [int(x) for x in d.split("_")[1:]] + + day_MAGIC = dtdt(Y_M, M_M, D_M) + + delta = timedelta(days=1) + for i in LST_runs: + Y_L, M_L, D_L = [int(x) for x in i[0].split("_")] + + day_LST = dtdt(int(Y_L), int(M_L), int(D_L)) + if day_MAGIC == day_LST + delta: + + lstObsDir = i[0].replace("_", "") + inputdir = ( + f"/fefs/aswg/data/real/DL1/{lstObsDir}/{LST_version}/tailcut84" + ) + + outputdir = f"{coincidence_DL1_dir}/DL1Coincident/{lstObsDir}" + os.makedirs(f"{outputdir}/logs", exist_ok=True) + + list_of_subruns = np.sort(glob.glob(f"{inputdir}/dl1*Run*{i[1]}*.*.h5")) + + with open(f"{outputdir}/logs/list_LST.txt", "a+") as LSTdataPathFile: + for subrun in list_of_subruns: + LSTdataPathFile.write(f"{subrun}\n") + + if not os.path.exists(f"{outputdir}/logs/list_LST.txt"): + continue + with open(f"{outputdir}/logs/list_LST.txt", "r") as f: + process_size = len(f.readlines()) - 1 + + if process_size < 0: + continue + slurm = slurm_lines( + queue="short", + job_name=f"{source_name}_coincidence", + array=process_size, + mem="8g", + out_name=f"{outputdir}/logs/slurm-%x.%A_%a", + ) + rc = rc_lines( + store="$SAMPLE ${SLURM_ARRAY_JOB_ID} ${SLURM_ARRAY_TASK_ID}", + out="$OUTPUTDIR/logs/list", + ) + + lines = ( + slurm + + [ + f"export INM={MAGIC_DL1_dir}/Merged/{d}\n", + f"export OUTPUTDIR={outputdir}\n", + "SAMPLE_LIST=($(<$OUTPUTDIR/logs/list_LST.txt))\n", + "SAMPLE=${SAMPLE_LIST[${SLURM_ARRAY_TASK_ID}]}\n", + "export LOG=$OUTPUTDIR/logs/coincidence_${SLURM_ARRAY_JOB_ID}_${SLURM_ARRAY_TASK_ID}.log\n", + f"conda run -n {env_name} lst1_magic_event_coincidence --input-file-lst $SAMPLE --input-dir-magic $INM --output-dir $OUTPUTDIR --config-file {target_dir}/v{__version__}/{source_name}/config_coincidence.yaml >$LOG 2>&1\n", + ] + + rc + ) + with open( + f"{source_name}_LST_coincident_{outputdir.split('/')[-1]}.sh", + "w", + ) as f: + f.writelines(lines) + else: + logger.warning('Automatic processing not implemented for the cluster indicated in the config file') + return def main(): @@ -214,6 +216,7 @@ def main(): source_in = config["data_selection"]["source_name_database"] source = config["data_selection"]["source_name_output"] + cluster = config["general"]["cluster"] source_list = [] if source_in is None: @@ -241,6 +244,7 @@ def main(): LST_version, env_name, NSB_match, + cluster ) # linking the data paths to current working directory print("***** Submitting processess to the cluster...") diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/LSTnsb.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/LSTnsb.py index 8e78fdb1..e7c23e8d 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/LSTnsb.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/LSTnsb.py @@ -120,7 +120,6 @@ def main(): date = args.day denominator = args.denominator simtel = config["general"]["simtel_nsb"] - nsb_list = config["general"]["nsb"] lst_version = config["general"]["LST_version"] lst_tailcut = config["general"]["LST_tailcut"] @@ -140,10 +139,10 @@ def main(): LST_files = np.sort(glob.glob(f"nsb_LST_*_{run_number}.txt")) - if len(LST_files) == 1: - logger.info(f"Run {run_number} already processed") - return - + if len(LST_files) == 1: + logger.info(f"Run {run_number} already processed") + return + # date_lst = date.split("_")[0] + date.split("_")[1] + date.split("_")[2] inputdir = f"/fefs/aswg/data/real/DL1/{date}/{lst_version}/{lst_tailcut}" diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py index d98baf71..6b1959e3 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py @@ -123,7 +123,7 @@ def split_train_test(target_dir, train_fraction, source_name): os.system(f"rm -r {list_of_dir[directory]}") -def merge(target_dir, identification, MAGIC_runs, env_name, source, NSB_match): +def merge(target_dir, identification, MAGIC_runs, env_name, source, NSB_match, cluster): """ This function creates the bash scripts to run merge_hdf_files.py in all MAGIC subruns. @@ -149,80 +149,82 @@ def merge(target_dir, identification, MAGIC_runs, env_name, source, NSB_match): MAGIC_DL1_dir = f"{target_dir}/v{__version__}/{source}/DL1/" if not NSB_match: MAGIC_DL1_dir += "Observations/" - - lines = slurm_lines( - queue="short", - job_name=process_name, - mem="2g", - out_name=f"{MAGIC_DL1_dir}/Merged/logs/slurm-%x.%j", - ) - os.makedirs(f"{MAGIC_DL1_dir}/Merged/logs", exist_ok=True) - - with open(f"{source}_Merge_MAGIC_{identification}.sh", "w") as f: - f.writelines(lines) - if identification == "0_subruns": - for magic in [1, 2]: + if cluster == 'SLURM': + lines = slurm_lines( + queue="short", + job_name=process_name, + mem="2g", + out_name=f"{MAGIC_DL1_dir}/Merged/logs/slurm-%x.%j", + ) + os.makedirs(f"{MAGIC_DL1_dir}/Merged/logs", exist_ok=True) + + with open(f"{source}_Merge_MAGIC_{identification}.sh", "w") as f: + f.writelines(lines) + if identification == "0_subruns": + for magic in [1, 2]: + for i in MAGIC_runs: + # Here is a difference w.r.t. original code. If only one telescope data are available they will be merged now for this telescope + indir = f"{MAGIC_DL1_dir}/M{magic}/{i[0]}/{i[1]}" + if os.path.exists(f"{indir}"): + outdir = f"{MAGIC_DL1_dir}/Merged/{i[0]}/{i[1]}" + os.makedirs(f"{outdir}/logs", exist_ok=True) + os.system( + f'find {indir} -type f -name "dl1_M{magic}.Run*.h5" -size -3k -delete' + ) + f.write( + f"conda run -n {env_name} merge_hdf_files --input-dir {indir} --output-dir {outdir} >{outdir}/logs/merge_M{magic}_{i[0]}_{i[1]}_${{SLURM_JOB_ID}}.log\n" + ) + rc = rc_lines( + store=f"{indir} ${{SLURM_JOB_ID}}", + out=f"{outdir}/logs/list", + ) + f.writelines(rc) + os.system(f"echo {indir} >> {outdir}/logs/list_dl0.txt") + else: + print(f"ERROR: {indir} does not exist") + + elif identification == "1_M1M2": for i in MAGIC_runs: - # Here is a difference w.r.t. original code. If only one telescope data are available they will be merged now for this telescope - indir = f"{MAGIC_DL1_dir}/M{magic}/{i[0]}/{i[1]}" - if os.path.exists(f"{indir}"): - outdir = f"{MAGIC_DL1_dir}/Merged/{i[0]}/{i[1]}" + if os.path.exists(f"{MAGIC_DL1_dir}/M1/{i[0]}/{i[1]}") & os.path.exists( + f"{MAGIC_DL1_dir}/M2/{i[0]}/{i[1]}" + ): + indir = f"{MAGIC_DL1_dir}/Merged/{i[0]}/{i[1]}" + outdir = f"{MAGIC_DL1_dir}/Merged/{i[0]}/Merged" os.makedirs(f"{outdir}/logs", exist_ok=True) - os.system( - f'find {indir} -type f -name "dl1_M{magic}.Run*.h5" -size -3k -delete' - ) f.write( - f"conda run -n {env_name} merge_hdf_files --input-dir {indir} --output-dir {outdir} >{outdir}/logs/merge_M{magic}_{i[0]}_{i[1]}_${{SLURM_JOB_ID}}.log\n" + f"conda run -n {env_name} merge_hdf_files --input-dir {indir} --output-dir {outdir} --run-wise >{outdir}/logs/merge_{i[0]}_{i[1]}_${{SLURM_JOB_ID}}.log\n" ) rc = rc_lines( - store=f"{indir} ${{SLURM_JOB_ID}}", - out=f"{outdir}/logs/list", + store=f"{indir} ${{SLURM_JOB_ID}}", out=f"{outdir}/logs/list" ) f.writelines(rc) os.system(f"echo {indir} >> {outdir}/logs/list_dl0.txt") else: - print(f"ERROR: {indir} does not exist") - - elif identification == "1_M1M2": - for i in MAGIC_runs: - if os.path.exists(f"{MAGIC_DL1_dir}/M1/{i[0]}/{i[1]}") & os.path.exists( - f"{MAGIC_DL1_dir}/M2/{i[0]}/{i[1]}" - ): - indir = f"{MAGIC_DL1_dir}/Merged/{i[0]}/{i[1]}" - outdir = f"{MAGIC_DL1_dir}/Merged/{i[0]}/Merged" + print( + f"ERROR {MAGIC_DL1_dir}/M1/{i[0]}/{i[1]} or {MAGIC_DL1_dir}/M2/{i[0]}/{i[1]} does not exist" + ) + else: + dates = np.unique(MAGIC_runs.T[0]) + for i in dates: + if not os.path.exists(f"{MAGIC_DL1_dir}/Merged/{i}/Merged"): + continue + + indir = f"{MAGIC_DL1_dir}/Merged/{i}/Merged" + outdir = f"{MAGIC_DL1_dir}/Merged/Merged_{i}" os.makedirs(f"{outdir}/logs", exist_ok=True) f.write( - f"conda run -n {env_name} merge_hdf_files --input-dir {indir} --output-dir {outdir} --run-wise >{outdir}/logs/merge_{i[0]}_{i[1]}_${{SLURM_JOB_ID}}.log\n" + f"conda run -n {env_name} merge_hdf_files --input-dir {indir} --output-dir {outdir} >{outdir}/logs/merge_night_{i}_${{SLURM_JOB_ID}}.log\n" ) rc = rc_lines( store=f"{indir} ${{SLURM_JOB_ID}}", out=f"{outdir}/logs/list" ) f.writelines(rc) os.system(f"echo {indir} >> {outdir}/logs/list_dl0.txt") - else: - print( - f"ERROR {MAGIC_DL1_dir}/M1/{i[0]}/{i[1]} or {MAGIC_DL1_dir}/M2/{i[0]}/{i[1]} does not exist" - ) - else: - dates = np.unique(MAGIC_runs.T[0]) - for i in dates: - if not os.path.exists(f"{MAGIC_DL1_dir}/Merged/{i}/Merged"): - continue - - indir = f"{MAGIC_DL1_dir}/Merged/{i}/Merged" - outdir = f"{MAGIC_DL1_dir}/Merged/Merged_{i}" - os.makedirs(f"{outdir}/logs", exist_ok=True) - f.write( - f"conda run -n {env_name} merge_hdf_files --input-dir {indir} --output-dir {outdir} >{outdir}/logs/merge_night_{i}_${{SLURM_JOB_ID}}.log\n" - ) - rc = rc_lines( - store=f"{indir} ${{SLURM_JOB_ID}}", out=f"{outdir}/logs/list" - ) - f.writelines(rc) - os.system(f"echo {indir} >> {outdir}/logs/list_dl0.txt") - - -def mergeMC(target_dir, identification, env_name, cwd, source_name): + else: + logger.warning('Automatic processing not implemented for the cluster indicated in the config file') + return + +def mergeMC(target_dir, identification, env_name, cwd, source_name, cluster): """ This function creates the bash scripts to run merge_hdf_files.py in all MC runs. @@ -258,24 +260,26 @@ def mergeMC(target_dir, identification, env_name, cwd, source_name): process_size = len(list_of_nodes) - 1 cleaning(list_of_nodes, cwd) # This will delete the (possibly) failed runs. - - with open(f"Merge_MC_{identification}.sh", "w") as f: - slurm = slurm_lines( - queue="short", - array=process_size, - mem="7g", - job_name=process_name, - out_name=f"{MC_DL1_dir}/{identification}/Merged/slurm-%x.%A_%a", - ) - lines_bash_file = slurm + [ - f"SAMPLE_LIST=($(<{MC_DL1_dir}/{identification}/list_of_nodes.txt))\n", - "SAMPLE=${SAMPLE_LIST[${SLURM_ARRAY_TASK_ID}]}\n", - f"export LOG={MC_DL1_dir}/{identification}/Merged" - + "/merged_${SLURM_ARRAY_JOB_ID}_${SLURM_ARRAY_TASK_ID}.log\n", - f"conda run -n {env_name} merge_hdf_files --input-dir $SAMPLE --output-dir {MC_DL1_dir}/{identification}/Merged >$LOG 2>&1\n", - ] - f.writelines(lines_bash_file) - + if cluster == 'SLURM': + with open(f"Merge_MC_{identification}.sh", "w") as f: + slurm = slurm_lines( + queue="short", + array=process_size, + mem="7g", + job_name=process_name, + out_name=f"{MC_DL1_dir}/{identification}/Merged/slurm-%x.%A_%a", + ) + lines_bash_file = slurm + [ + f"SAMPLE_LIST=($(<{MC_DL1_dir}/{identification}/list_of_nodes.txt))\n", + "SAMPLE=${SAMPLE_LIST[${SLURM_ARRAY_TASK_ID}]}\n", + f"export LOG={MC_DL1_dir}/{identification}/Merged" + + "/merged_${SLURM_ARRAY_JOB_ID}_${SLURM_ARRAY_TASK_ID}.log\n", + f"conda run -n {env_name} merge_hdf_files --input-dir $SAMPLE --output-dir {MC_DL1_dir}/{identification}/Merged >$LOG 2>&1\n", + ] + f.writelines(lines_bash_file) + else: + logger.warning('Automatic processing not implemented for the cluster indicated in the config file') + return def main(): @@ -318,6 +322,7 @@ def main(): env_name = config["general"]["env_name"] source_in = config["data_selection"]["source_name_database"] source = config["data_selection"]["source_name_output"] + cluster = config["general"]["cluster"] source_list = [] if source_in is None: @@ -344,15 +349,15 @@ def main(): print("***** Generating merge_MC bashscripts...") mergeMC( - target_dir, "protons", env_name, cwd, source_name + target_dir, "protons", env_name, cwd, source_name, cluster ) # generating the bash script to merge the files mergeMC( - target_dir, "gammadiffuse", env_name, cwd, source_name + target_dir, "gammadiffuse", env_name, cwd, source_name, cluster ) # generating the bash script to merge the files mergeMC( - target_dir, "gammas", env_name, cwd, source_name + target_dir, "gammas", env_name, cwd, source_name, cluster ) # generating the bash script to merge the files - mergeMC(target_dir, "protons_test", env_name, cwd, source_name) + mergeMC(target_dir, "protons_test", env_name, cwd, source_name, cluster) print("***** Running merge_hdf_files.py on the MC data files...") @@ -377,13 +382,13 @@ def main(): ): print("***** Generating merge_MAGIC bashscripts...") merge( - target_dir, "0_subruns", MAGIC_runs, env_name, source_name, NSB_match + target_dir, "0_subruns", MAGIC_runs, env_name, source_name, NSB_match, cluster ) # generating the bash script to merge the subruns merge( - target_dir, "1_M1M2", MAGIC_runs, env_name, source_name, NSB_match + target_dir, "1_M1M2", MAGIC_runs, env_name, source_name, NSB_match, cluster ) # generating the bash script to merge the M1 and M2 runs merge( - target_dir, "2_nights", MAGIC_runs, env_name, source_name, NSB_match + target_dir, "2_nights", MAGIC_runs, env_name, source_name, NSB_match, cluster ) # generating the bash script to merge all runs per night print("***** Running merge_hdf_files.py on the MAGIC data files...") diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py index dee6d8c5..d6374ff3 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py @@ -116,6 +116,7 @@ def lists_and_bash_generator( focal_length, env_name, source_name, + cluster, ): """ @@ -163,65 +164,68 @@ def lists_and_bash_generator( #################################################################################### # bash scripts that link the MC paths to each subdirectory. #################################################################################### - - with open(f"linking_MC_{particle_type}_paths.sh", "w") as f: - slurm = slurm_lines( - queue="short", - job_name=process_name, - out_name=f"{dir1}/DL1/MC/{particle_type}/logs/slurm-linkMC-%x.%j", - ) - lines_of_config_file = slurm + [ - "while read -r -u 3 lineA && read -r -u 4 lineB\n", - "do\n", - f" cd {dir1}/DL1/MC/{particle_type}\n", - " mkdir $lineB\n", - " cd $lineA\n", - " ls -lR *.gz |wc -l\n", - f" mkdir -p {dir1}/DL1/MC/{particle_type}/$lineB/logs/\n", - f" ls *.gz > {dir1}/DL1/MC/{particle_type}/$lineB/logs/list_dl0.txt\n", - ' string=$lineA"/"\n', - f" export file={dir1}/DL1/MC/{particle_type}/$lineB/logs/list_dl0.txt\n\n", - " cat $file | while read line; do echo $string${line}" - + f" >>{dir1}/DL1/MC/{particle_type}/$lineB/logs/list_dl0_ok.txt; done\n\n", - ' echo "folder $lineB and node $lineA"\n', - f'done 3<"{dir1}/logs/list_nodes_{particle_type}_complete.txt" 4<"{dir1}/logs/list_folder_{particle_type}.txt"\n', - "", - ] - f.writelines(lines_of_config_file) - - ################################################################################################################ - # bash script that applies lst1_magic_mc_dl0_to_dl1.py to all MC data files. - ################################################################################################################ - - number_of_nodes = glob.glob(f"{MC_path}/node*") - number_of_nodes = len(number_of_nodes) - 1 - with open(f"linking_MC_{particle_type}_paths_r.sh", "w") as f: - slurm = slurm_lines( - queue="xxl", - job_name=process_name, - array=number_of_nodes, - mem="10g", - out_name=f"{dir1}/DL1/MC/{particle_type}/logs/slurm-%x.%A_%a", - ) - lines_of_config_file = slurm + [ - f"cd {dir1}/DL1/MC/{particle_type}\n\n", - f"export INF={dir1}/logs\n", - f"SAMPLE_LIST=($(<$INF/list_folder_{particle_type}.txt))\n", - "SAMPLE=${SAMPLE_LIST[${SLURM_ARRAY_TASK_ID}]}\n", - "cd $SAMPLE\n\n", - f"export LOG={dir1}/DL1/MC/{particle_type}/logs/simtel_{{$SAMPLE}}_${{SLURM_ARRAY_JOB_ID}}_${{SLURM_ARRAY_TASK_ID}}_all.log\n", - "cat logs/list_dl0_ok.txt | while read line\n", - "do\n", - f" cd {dir1}/../\n", - f" conda run -n {env_name} lst1_magic_mc_dl0_to_dl1 --input-file $line --output-dir {dir1}/DL1/MC/{particle_type}/$SAMPLE --config-file {dir1}/config_DL0_to_DL1.yaml --focal_length_choice {focal_length}>>$LOG 2>&1\n\n", - "done\n", - "", - ] - f.writelines(lines_of_config_file) + if cluster == 'SLURM': + with open(f"linking_MC_{particle_type}_paths.sh", "w") as f: + slurm = slurm_lines( + queue="short", + job_name=process_name, + out_name=f"{dir1}/DL1/MC/{particle_type}/logs/slurm-linkMC-%x.%j", + ) + lines_of_config_file = slurm + [ + "while read -r -u 3 lineA && read -r -u 4 lineB\n", + "do\n", + f" cd {dir1}/DL1/MC/{particle_type}\n", + " mkdir $lineB\n", + " cd $lineA\n", + " ls -lR *.gz |wc -l\n", + f" mkdir -p {dir1}/DL1/MC/{particle_type}/$lineB/logs/\n", + f" ls *.gz > {dir1}/DL1/MC/{particle_type}/$lineB/logs/list_dl0.txt\n", + ' string=$lineA"/"\n', + f" export file={dir1}/DL1/MC/{particle_type}/$lineB/logs/list_dl0.txt\n\n", + " cat $file | while read line; do echo $string${line}" + + f" >>{dir1}/DL1/MC/{particle_type}/$lineB/logs/list_dl0_ok.txt; done\n\n", + ' echo "folder $lineB and node $lineA"\n', + f'done 3<"{dir1}/logs/list_nodes_{particle_type}_complete.txt" 4<"{dir1}/logs/list_folder_{particle_type}.txt"\n', + "", + ] + f.writelines(lines_of_config_file) + + ################################################################################################################ + # bash script that applies lst1_magic_mc_dl0_to_dl1.py to all MC data files. + ################################################################################################################ + + number_of_nodes = glob.glob(f"{MC_path}/node*") + number_of_nodes = len(number_of_nodes) - 1 + with open(f"linking_MC_{particle_type}_paths_r.sh", "w") as f: + slurm = slurm_lines( + queue="xxl", + job_name=process_name, + array=number_of_nodes, + mem="10g", + out_name=f"{dir1}/DL1/MC/{particle_type}/logs/slurm-%x.%A_%a", + ) + lines_of_config_file = slurm + [ + f"cd {dir1}/DL1/MC/{particle_type}\n\n", + f"export INF={dir1}/logs\n", + f"SAMPLE_LIST=($(<$INF/list_folder_{particle_type}.txt))\n", + "SAMPLE=${SAMPLE_LIST[${SLURM_ARRAY_TASK_ID}]}\n", + "cd $SAMPLE\n\n", + f"export LOG={dir1}/DL1/MC/{particle_type}/logs/simtel_{{$SAMPLE}}_${{SLURM_ARRAY_JOB_ID}}_${{SLURM_ARRAY_TASK_ID}}_all.log\n", + "cat logs/list_dl0_ok.txt | while read line\n", + "do\n", + f" cd {dir1}/../\n", + f" conda run -n {env_name} lst1_magic_mc_dl0_to_dl1 --input-file $line --output-dir {dir1}/DL1/MC/{particle_type}/$SAMPLE --config-file {dir1}/config_DL0_to_DL1.yaml --focal_length_choice {focal_length}>>$LOG 2>&1\n\n", + "done\n", + "", + ] + f.writelines(lines_of_config_file) + else: + logger.warning('Automatic processing not implemented for the cluster indicated in the config file') + return def lists_and_bash_gen_MAGIC( - target_dir, telescope_ids, MAGIC_runs, source, env_name, NSB_match + target_dir, telescope_ids, MAGIC_runs, source, env_name, NSB_match, cluster ): """ @@ -242,64 +246,68 @@ def lists_and_bash_gen_MAGIC( NSB_match : bool If real data are matched to pre-processed MCs or not """ - process_name = source - lines = slurm_lines( - queue="short", - job_name=process_name, - out_name=f"{target_dir}/v{__version__}/{source}/DL1/slurm-linkMAGIC-%x.%j", - ) + if cluster == 'SLURM': + process_name = source + lines = slurm_lines( + queue="short", + job_name=process_name, + out_name=f"{target_dir}/v{__version__}/{source}/DL1/slurm-linkMAGIC-%x.%j", + ) - obs_tag = "" if NSB_match else "Observations" - with open(f"{source}_linking_MAGIC_data_paths.sh", "w") as f: - f.writelines(lines) - for i in MAGIC_runs: - for magic in [1, 2]: - # if 1 then magic is second from last, if 2 then last - if telescope_ids[magic - 3] > 0: - lines = [ - f'export IN1=/fefs/onsite/common/MAGIC/data/M{magic}/event/Calibrated/{i[0].split("_")[0]}/{i[0].split("_")[1]}/{i[0].split("_")[2]}\n', - f"export OUT1={target_dir}/v{__version__}/{source}/DL1/{obs_tag}/M{magic}/{i[0]}/{i[1]}/logs \n", - f"ls $IN1/*{i[1][-2:]}.*_Y_*.root > $OUT1/list_dl0.txt\n\n", - ] - f.writelines(lines) - - for magic in [1, 2]: - # if 1 then magic is second from last, if 2 then last - if telescope_ids[magic - 3] > 0: + obs_tag = "" if NSB_match else "Observations" + with open(f"{source}_linking_MAGIC_data_paths.sh", "w") as f: + f.writelines(lines) for i in MAGIC_runs: - number_of_nodes = glob.glob( - f'/fefs/onsite/common/MAGIC/data/M{magic}/event/Calibrated/{i[0].split("_")[0]}/{i[0].split("_")[1]}/{i[0].split("_")[2]}/*{i[1]}.*_Y_*.root' - ) - number_of_nodes = len(number_of_nodes) - 1 - if number_of_nodes < 0: - continue - slurm = slurm_lines( - queue="short", # was long for no NSB_match - job_name=process_name, - array=number_of_nodes, - mem="2g", - out_name=f"{target_dir}/v{__version__}/{source}/DL1/{obs_tag}/M{magic}/{i[0]}/{i[1]}/logs/slurm-%x.%A_%a", # without version for no NSB_match - ) - rc = rc_lines( - store="$SAMPLE ${SLURM_ARRAY_JOB_ID} ${SLURM_ARRAY_TASK_ID}", - out="$OUTPUTDIR/logs/list", - ) - lines = ( - slurm - + [ # without version for no NSB_match - f"export OUTPUTDIR={target_dir}/v{__version__}/{source}/DL1/{obs_tag}/M{magic}/{i[0]}/{i[1]}\n", - "SAMPLE_LIST=($(<$OUTPUTDIR/logs/list_dl0.txt))\n", - "SAMPLE=${SAMPLE_LIST[${SLURM_ARRAY_TASK_ID}]}\n\n", - "export LOG=$OUTPUTDIR/logs/real_0_1_task_${SLURM_ARRAY_JOB_ID}_${SLURM_ARRAY_TASK_ID}.log\n", - f"conda run -n {env_name} magic_calib_to_dl1 --input-file $SAMPLE --output-dir $OUTPUTDIR --config-file {target_dir}/v{__version__}/{source}/config_DL0_to_DL1.yaml >$LOG 2>&1\n", - ] - + rc - ) - with open( - f"{source}_MAGIC-" + "I" * magic + f"_dl0_to_dl1_run_{i[1]}.sh", - "w", - ) as f: - f.writelines(lines) + for magic in [1, 2]: + # if 1 then magic is second from last, if 2 then last + if telescope_ids[magic - 3] > 0: + lines = [ + f'export IN1=/fefs/onsite/common/MAGIC/data/M{magic}/event/Calibrated/{i[0].split("_")[0]}/{i[0].split("_")[1]}/{i[0].split("_")[2]}\n', + f"export OUT1={target_dir}/v{__version__}/{source}/DL1/{obs_tag}/M{magic}/{i[0]}/{i[1]}/logs \n", + f"ls $IN1/*{i[1][-2:]}.*_Y_*.root > $OUT1/list_dl0.txt\n\n", + ] + f.writelines(lines) + + for magic in [1, 2]: + # if 1 then magic is second from last, if 2 then last + if telescope_ids[magic - 3] > 0: + for i in MAGIC_runs: + number_of_nodes = glob.glob( + f'/fefs/onsite/common/MAGIC/data/M{magic}/event/Calibrated/{i[0].split("_")[0]}/{i[0].split("_")[1]}/{i[0].split("_")[2]}/*{i[1]}.*_Y_*.root' + ) + number_of_nodes = len(number_of_nodes) - 1 + if number_of_nodes < 0: + continue + slurm = slurm_lines( + queue="short", # was long for no NSB_match + job_name=process_name, + array=number_of_nodes, + mem="2g", + out_name=f"{target_dir}/v{__version__}/{source}/DL1/{obs_tag}/M{magic}/{i[0]}/{i[1]}/logs/slurm-%x.%A_%a", # without version for no NSB_match + ) + rc = rc_lines( + store="$SAMPLE ${SLURM_ARRAY_JOB_ID} ${SLURM_ARRAY_TASK_ID}", + out="$OUTPUTDIR/logs/list", + ) + lines = ( + slurm + + [ # without version for no NSB_match + f"export OUTPUTDIR={target_dir}/v{__version__}/{source}/DL1/{obs_tag}/M{magic}/{i[0]}/{i[1]}\n", + "SAMPLE_LIST=($(<$OUTPUTDIR/logs/list_dl0.txt))\n", + "SAMPLE=${SAMPLE_LIST[${SLURM_ARRAY_TASK_ID}]}\n\n", + "export LOG=$OUTPUTDIR/logs/real_0_1_task_${SLURM_ARRAY_JOB_ID}_${SLURM_ARRAY_TASK_ID}.log\n", + f"conda run -n {env_name} magic_calib_to_dl1 --input-file $SAMPLE --output-dir $OUTPUTDIR --config-file {target_dir}/v{__version__}/{source}/config_DL0_to_DL1.yaml >$LOG 2>&1\n", + ] + + rc + ) + with open( + f"{source}_MAGIC-" + "I" * magic + f"_dl0_to_dl1_run_{i[1]}.sh", + "w", + ) as f: + f.writelines(lines) + else: + logger.warning('Automatic processing not implemented for the cluster indicated in the config file') + return def directories_generator( @@ -409,6 +417,7 @@ def main(): focal_length = config["general"]["focal_length"] source_in = config["data_selection"]["source_name_database"] source = config["data_selection"]["source_name_output"] + cluster = config["general"]["cluster"] source_list = [] if source_in is None: @@ -468,6 +477,7 @@ def main(): focal_length, env_name, source_name, + cluster, ) # Here we do the MC DL0 to DL1 conversion: @@ -496,6 +506,7 @@ def main(): source_name, env_name, NSB_match, + cluster, ) # MAGIC real data if (telescope_ids[-2] > 0) or (telescope_ids[-1] > 0): list_of_MAGIC_runs = glob.glob(f"{source_name}_MAGIC-*.sh") From 5dfc8394f17a3b89d3151fb77bd43f7f59d49af8 Mon Sep 17 00:00:00 2001 From: Elisa-Visentin Date: Mon, 27 May 2024 07:59:11 +0000 Subject: [PATCH 109/236] fixed readme and lists --- magicctapipe/scripts/lst1_magic/README.md | 33 +++++++------------ .../semi_automatic_scripts/list_from_h5.py | 12 +++++-- 2 files changed, 20 insertions(+), 25 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/README.md b/magicctapipe/scripts/lst1_magic/README.md index 527f4df1..a8096044 100644 --- a/magicctapipe/scripts/lst1_magic/README.md +++ b/magicctapipe/scripts/lst1_magic/README.md @@ -12,26 +12,24 @@ MAGIC+LST analysis starts from MAGIC calibrated data (\_Y\_ files), LST data le Behind the scenes, the semi-automatic scripts will run: - `magic_calib_to_dl1` on real MAGIC data, to convert them into DL1 format. -- `merge_hdf_files.py` on MAGIC data to merge subruns and/or runs together. -- `lst1_magic_event_coincidence.py` to find coincident events between MAGIC and LST-1, starting from DL1 data. -- `lst1_magic_stereo_reco.py` to add stereo parameters to the DL1 data. -- `lst1_magic_train_rfs.py` to train the RFs (energy, direction, classification) on train gamma MCs and protons. -- `lst1_magic_dl1_stereo_to_dl2.py` to apply the RFs to stereo DL1 data (real and test MCs) and produce DL2 data. -- `lst1_magic_create_irf.py` to create the IRF. -- `lst1_magic_dl2_to_dl3.py` to create DL3 files, and `create_dl3_index_files.py` to create DL3 HDU and index files. +- `merge_hdf_files` on MAGIC data to merge subruns and/or runs together. +- `lst1_magic_event_coincidence` to find coincident events between MAGIC and LST-1, starting from DL1 data. +- `lst1_magic_stereo_reco` to add stereo parameters to the DL1 data. +- `lst1_magic_train_rfs` to train the RFs (energy, direction, classification) on train gamma MCs and protons. +- `lst1_magic_dl1_stereo_to_dl2` to apply the RFs to stereo DL1 data (real and test MCs) and produce DL2 data. +- `lst1_magic_create_irf` to create the IRF. +- `lst1_magic_dl2_to_dl3` to create DL3 files, and `create_dl3_index_files` to create DL3 HDU and index files. From DL3 on, the analysis is done with gammapy. ## Installation -1) The very first step to reduce MAGIC-LST data is to have remote access/credentials to the IT Container, so provide one. Once you have it, the connection steps are the following: +1) The very first step to reduce MAGIC-LST data is to have remote access/credentials to the IT Container. If you do not have it, please write an email to request it to , and the admin will send you the instructions to connect to the IT container. -Authorized institute server (Client) → ssh connection to CTALaPalma → ssh connection to cp01/02. - -2) Once connected to the IT Container, install magic-cta-pipe (e.g. in your home directory in the IT Container) with the following commands (if you have mamba installed, we recommend you to use it instead of conda. The installation process will be much faster.): +2) Once connected to the IT Container, install magic-cta-pipe (e.g. in your home directory in the IT Container) with the following commands (if you have mamba installed, we recommend you to use it instead of conda, so that the installation process will be much faster; if you don't have anaconda/miniconda/miniforge, please install one of them into your workspace directory): ``` -git clone -b Torino_auto_MCP https://github.com/cta-observatory/magic-cta-pipe.git +git clone https://github.com/cta-observatory/magic-cta-pipe.git cd magic-cta-pipe mamba env create -n magic-lst -f environment.yml mamba activate magic-lst @@ -105,7 +103,7 @@ Cleaning pre-existing *_LST_runs.txt and *_MAGIC_runs.txt files Finding LST runs... Finding MAGIC runs... ``` -And it will save the files TARGET_LST_runs.txt, TARGET_MAGIC_runs.txt, and list_sources.dat in your working directory. +And it will save the files TARGET_LST_runs.txt, TARGET_MAGIC_runs.txt, and list_sources.dat in your working directory. In case no runs are found for MAGIC and/or LST (for a source and a given time range/list of dates), a warning will be printed and no output text file will be produced for the given source and telescope(s). At this point, we can convert the MAGIC data into DL1 format with the following command: > $ setting_up_config_and_dir -c config_general.yaml @@ -206,8 +204,6 @@ TBD. Since the DL3 may have only a few MBs, it is typically convenient to download it to your own computer at this point. It will be necessary to have astropy and gammapy (version > 0.20) installed before proceeding. -We prepared a [Jupyter Notebook](https://github.com/ranieremenezes/magic-cta-pipe/blob/master/magicctapipe/scripts/lst1_magic/SED_and_LC_from_DL3.ipynb) that quickly creates a counts map, a significance curve, an SED, and a light curve. You can give it a try. - The folder [Notebooks](https://github.com/cta-observatory/magic-cta-pipe/tree/master/notebooks) contains Jupyter notebooks to perform checks on the IRF, to produce theta2 plots and SEDs. @@ -225,10 +221,3 @@ To create and update the MAGIC and LST databases (from the one produced by AB an - `lstchain_version`: this scripts loop over all the rows of the database, estract date and run number from the table and look for the data saved in the IT (i.e., which version of lstchain has been used to process a run). For each run, it sets to True the lstchain_0.9(0.10) cell if this run has been processed up to DL1 with lstchain 0.9(0.10). It sets error code '002' in case none of the two versions has been used to process the run. Launched as `python lstchain_version.py` -Error codes: - -- 000: no NSB - -- 001: NSB>3.0 - -- 002: neither 0.9 nor 0.10 lstchain version diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/list_from_h5.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/list_from_h5.py index 1629e738..1103d868 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/list_from_h5.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/list_from_h5.py @@ -134,11 +134,16 @@ def list_run(source_in, source_out, df, skip_LST, skip_MAGIC, is_LST, M1_run_lis run_listed = [] if source_in is None: df_source = df[df["source"] == source_name] + print('Source: ', source_name) else: df_source = df[df["source"] == source_in] + print('Source: ', source_in) if is_LST: print("Finding LST runs...") + if len(df_source) == 0: + print("NO LST run found. Exiting...") + continue LST_run = df_source["LST1_run"].tolist() # List with runs as strings LST_date = df_source["date_LST"].tolist() for k in range(len(df_source)): @@ -156,6 +161,9 @@ def list_run(source_in, source_out, df, skip_LST, skip_MAGIC, is_LST, M1_run_lis if not is_LST: print("Finding MAGIC runs...") + if len(df_source) == 0: + print("NO MAGIC run found. Exiting...") + continue MAGIC_date = df_source["date_MAGIC"].tolist() M2_run = df_source["Run ID"].tolist() for k in range(len(df_source)): @@ -249,9 +257,7 @@ def main(): df_LST = df_LST.reset_index() df_LST = df_LST.drop("index", axis=1) clear_files(source_in, source_out, df_LST) - if len(df_LST) == 0: - print("NO LST run found. Exiting...") - return + list_run(source_in, source_out, df_LST, skip_LST, skip_MAGIC, True) list_date_LST = np.unique(df_LST["date_LST"]) list_date_LST_low = [sub.replace("-", "_") for sub in list_date_LST] From 250713a51a12cab610efe74cdbae452e9d9c2d87 Mon Sep 17 00:00:00 2001 From: Elisa-Visentin Date: Tue, 28 May 2024 07:38:51 +0000 Subject: [PATCH 110/236] merge DL1 stereo (draft) --- .../semi_automatic_scripts/merge_stereo.py | 133 ++++++++++++++++++ 1 file changed, 133 insertions(+) create mode 100644 magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merge_stereo.py diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merge_stereo.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merge_stereo.py new file mode 100644 index 00000000..de8ceadd --- /dev/null +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merge_stereo.py @@ -0,0 +1,133 @@ +import argparse +import glob +import logging +import os +from pathlib import Path +import joblib +import numpy as np +import yaml +from magicctapipe import __version__ +from magicctapipe.scripts.lst1_magic.semi_automatic_scripts.clusters import ( + rc_lines, + slurm_lines, +) + +logger = logging.getLogger(__name__) +logger.addHandler(logging.StreamHandler()) +logger.setLevel(logging.INFO) + + +def MergeStereo(target_dir, env_name, source, NSB_match, cluster): + """ + This function creates the bash scripts to run merge_hdf_files.py in all DL2 subruns. + + Parameters + ---------- + target_dir: str + Path to the working directory + """ + + process_name = source + if NSB_match: + stereo_DL1_dir = f"{target_dir}/v{__version__}/{source}" + else: + stereo_DL1_dir = f"{target_dir}/v{__version__}/{source}/DL1/Observations" + + listOfNightsLST = np.sort(glob.glob(f"{stereo_DL1_dir}/DL1Stereo/*")) + if cluster == 'SLURM': + for nightLST in listOfNightsLST: + stereoMergeDir = ( + f"{stereo_DL1_dir}/DL1Stereo/{nightLST.split('/')[-1]}/Merged" + ) + os.makedirs(f"{stereoMergeDir}/logs", exist_ok=True) + if not os.listdir(f"{nightLST}"): + continue + if len(os.listdir(nightLST)) < 3: + continue + + + slurm = slurm_lines( + queue="short", + job_name=f"{process_name}_stereo_merge", + out_name=f"{stereoMergeDir}/logs/slurm-%x.%A_%a", + ) + rc = rc_lines( + store=f"{nightLST} ${{SLURM_JOB_ID}}", out=f"{stereoMergeDir}/logs/list" + ) + os.system(f"echo {nightLST} >> {stereoMergeDir}/logs/list_dl0.txt") + lines = ( + slurm + + [ + f"conda run -n {env_name} merge_hdf_files --input-dir {nightLST} --output-dir {stereoMergeDir} --run-wise >{stereoMergeDir}/logs/merge_{nightLST.split('/')[-1]}_${{SLURM_JOB_ID}}.log\n" + ] + + rc + ) + + with open(f"{source}_StereoMerge_{nightLST.split('/')[-1]}.sh", "w") as f: + f.writelines(lines) + else: + logger.warning('Automatic processing not implemented for the cluster indicated in the config file') + return + + + +def main(): + """ + Here we read the config_general.yaml file and call the functions defined above. + """ + + parser = argparse.ArgumentParser() + parser.add_argument( + "--config-file", + "-c", + dest="config_file", + type=str, + default="./config_general.yaml", + help="Path to a configuration file", + ) + + args = parser.parse_args() + with open( + args.config_file, "rb" + ) as f: # "rb" mode opens the file in binary format for reading + config = yaml.safe_load(f) + + target_dir = Path(config["directories"]["workspace_dir"]) + + NSB_match = config["general"]["NSB_matching"] + env_name = config["general"]["env_name"] + + + source_in = config["data_selection"]["source_name_database"] + source = config["data_selection"]["source_name_output"] + cluster = config["general"]["cluster"] + + + source_list = [] + if source_in is None: + source_list = joblib.load("list_sources.dat") + + else: + source_list.append(source) + for source_name in source_list: + + print("***** Merging DL2 files run-wise...") + MergeStereo(target_dir, env_name, source, NSB_match, cluster) + + list_of_merge = glob.glob(f"{source_name}_StereoMerge_*.sh") + if len(list_of_merge) < 1: + print( + "Warning: no bash script has been produced" + ) + continue + + launch_jobs = "" + for n, run in enumerate(list_of_merge): + launch_jobs = f"{launch_jobs} && RES{n}=$(sbatch --parsable {run})" + + os.system(launch_jobs) + + + +if __name__ == "__main__": + main() From e5f06243ba02b27fdd23ce75d22da6c7159148f0 Mon Sep 17 00:00:00 2001 From: Elisa-Visentin Date: Tue, 28 May 2024 09:18:17 +0000 Subject: [PATCH 111/236] minor fixes --- .../database_production/create_LST_table.py | 2 +- .../semi_automatic_scripts/merging_runs.py | 32 ++++++++----------- .../semi_automatic_scripts/stereo_events.py | 2 +- 3 files changed, 16 insertions(+), 20 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/create_LST_table.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/create_LST_table.py index 734d410c..5e250e5a 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/create_LST_table.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/create_LST_table.py @@ -1,7 +1,7 @@ """ Create a new h5 table from the one of joint observations. -Only the columns needed to produce the lists of LST runs to be processed are presenved, and two columns are added to store NSB level and error codes +Only the columns needed to produce the lists of LST runs to be processed are preserved, and two columns are added to store NSB level and error codes """ import os diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py index 6b1959e3..11d694ea 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py @@ -52,7 +52,7 @@ logger.setLevel(logging.INFO) -def cleaning(list_of_nodes, cwd): +def cleaning(list_of_nodes): """ This function looks for failed runs in each node and remove them. @@ -60,11 +60,10 @@ def cleaning(list_of_nodes, cwd): Parameters ---------- list_of_nodes : array of str - List of nodes where the function will look for failed runs. - cwd : Path - Current working directory + List of nodes where the function will look for failed runs. """ + cwd = os.getcwd() for i in tqdm(range(len(list_of_nodes)), desc="Cleaning failed runs"): os.chdir(list_of_nodes[i]) os.system('find . -type f -name "*.h5" -size -1k -delete') @@ -76,9 +75,9 @@ def cleaning(list_of_nodes, cwd): def split_train_test(target_dir, train_fraction, source_name): """ - This function splits the MC proton sample in 2, i.e. the "test" and the "train" subsamples. + This function splits the MC proton sample in 2, i.e. the "test" and the "train" subsamples, in case you want to make performance studies on MC. For regular analyses, you can/should use the whole MC sample for training. It generates 2 subdirectories in the directory .../DL1/MC/protons named "test" and "train" and creates sub-sub-directories with the names of all nodes. - For each node sub-sub-directory we move 80% of the .h5 files (if it is in the "test" subdirectory) or 20% of the .h5 files (if it is in the "train" subdirectory). + For each node sub-sub-directory we move `train_fraction` of the .h5 files to the "train" subdirectory and `1-train_fraction` of the .h5 files to the "test" subdirectory. Parameters ---------- @@ -105,8 +104,8 @@ def split_train_test(target_dir, train_fraction, source_name): list_of_runs = np.sort( glob.glob(f'{proton_dir}/{list_of_dir[directory].split("/")[-2]}/*.h5') ) - split_percent = int(len(list_of_runs) * train_fraction) - for j in list_of_runs[0:split_percent]: + number_train_runs = int(len(list_of_runs) * train_fraction) + for j in list_of_runs[0:number_train_runs]: os.system( f"mv {j} {proton_dir}/train/{list_of_dir[directory].split('/')[-2]}" ) @@ -224,7 +223,7 @@ def merge(target_dir, identification, MAGIC_runs, env_name, source, NSB_match, c logger.warning('Automatic processing not implemented for the cluster indicated in the config file') return -def mergeMC(target_dir, identification, env_name, cwd, source_name, cluster): +def mergeMC(target_dir, identification, env_name, source_name, cluster): """ This function creates the bash scripts to run merge_hdf_files.py in all MC runs. @@ -237,8 +236,6 @@ def mergeMC(target_dir, identification, env_name, cwd, source_name, cluster): Tells which batch to create. Options: protons, gammadiffuse env_name : str Name of the environment - cwd : Path - Current working directory source_name : str Name of the target source """ @@ -259,7 +256,7 @@ def mergeMC(target_dir, identification, env_name, cwd, source_name, cluster): process_size = len(list_of_nodes) - 1 - cleaning(list_of_nodes, cwd) # This will delete the (possibly) failed runs. + cleaning(list_of_nodes) # This will delete the (possibly) failed runs. if cluster == 'SLURM': with open(f"Merge_MC_{identification}.sh", "w") as f: slurm = slurm_lines( @@ -284,7 +281,7 @@ def mergeMC(target_dir, identification, env_name, cwd, source_name, cluster): def main(): """ - Here we read the config_general.yaml file, split the pronton sample into "test" and "train", and merge the MAGIC files. + Here we read the config_general.yaml file, split the proton sample into "test" and "train", and merge the MAGIC files. """ parser = argparse.ArgumentParser() @@ -312,7 +309,6 @@ def main(): args.config_file, "rb" ) as f: # "rb" mode opens the file in binary format for reading config = yaml.safe_load(f) - cwd = os.getcwd() target_dir = Path(config["directories"]["workspace_dir"]) @@ -349,15 +345,15 @@ def main(): print("***** Generating merge_MC bashscripts...") mergeMC( - target_dir, "protons", env_name, cwd, source_name, cluster + target_dir, "protons", env_name, source_name, cluster ) # generating the bash script to merge the files mergeMC( - target_dir, "gammadiffuse", env_name, cwd, source_name, cluster + target_dir, "gammadiffuse", env_name, source_name, cluster ) # generating the bash script to merge the files mergeMC( - target_dir, "gammas", env_name, cwd, source_name, cluster + target_dir, "gammas", env_name, source_name, cluster ) # generating the bash script to merge the files - mergeMC(target_dir, "protons_test", env_name, cwd, source_name, cluster) + mergeMC(target_dir, "protons_test", env_name, source_name, cluster) print("***** Running merge_hdf_files.py on the MC data files...") diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py index 79bc76fd..6bdaac95 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py @@ -264,7 +264,7 @@ def main(): if n == 0: launch_jobs = f"stereo{n}=$(sbatch --parsable {run})" else: - launch_jobs = f"{launch_jobs} && stereo{n}=$(sbatch --parsable --dependency=afterany:$stereo{n-1} {run})" + launch_jobs = f"{launch_jobs} && stereo{n}=$(sbatch --parsable {run})" os.system(launch_jobs) From acafa7ed0894e3e166156184e3c6a423d001f79f Mon Sep 17 00:00:00 2001 From: Elisa-Visentin Date: Tue, 28 May 2024 09:24:47 +0000 Subject: [PATCH 112/236] MAGIC calib --- .../setting_up_config_and_dir.py | 23 ++++--------------- 1 file changed, 5 insertions(+), 18 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py index d6374ff3..a1cdd083 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py @@ -4,7 +4,7 @@ "manager" that organizes the analysis process by: 1) Creating the necessary directories and subdirectories. 2) Generating all the bash script files that convert the -MAGIC files from DL0 to DL1. +MAGIC files from Calibrated (`_Y_`) to DL1. 3) Launching these jobs in the IT container. Notice that in this stage we only use MAGIC data. @@ -63,19 +63,6 @@ def config_file_gen(ids, target_dir, noise_value, NSB_match, source_name): Name of the target source """ - """ - Here we create the configuration file needed for transforming DL0 into DL1 - - Parameters - ---------- - ids : list - Telescope IDs - target_dir : path - Directory to store the results - noise_value : list - Extra noise in dim and bright pixels, Extra bias in dim pixels - """ - config_file = resource_file("config.yaml") with open( config_file, "rb" @@ -264,7 +251,7 @@ def lists_and_bash_gen_MAGIC( lines = [ f'export IN1=/fefs/onsite/common/MAGIC/data/M{magic}/event/Calibrated/{i[0].split("_")[0]}/{i[0].split("_")[1]}/{i[0].split("_")[2]}\n', f"export OUT1={target_dir}/v{__version__}/{source}/DL1/{obs_tag}/M{magic}/{i[0]}/{i[1]}/logs \n", - f"ls $IN1/*{i[1][-2:]}.*_Y_*.root > $OUT1/list_dl0.txt\n\n", + f"ls $IN1/*{i[1][-2:]}.*_Y_*.root > $OUT1/list_cal.txt\n\n", ] f.writelines(lines) @@ -293,7 +280,7 @@ def lists_and_bash_gen_MAGIC( slurm + [ # without version for no NSB_match f"export OUTPUTDIR={target_dir}/v{__version__}/{source}/DL1/{obs_tag}/M{magic}/{i[0]}/{i[1]}\n", - "SAMPLE_LIST=($(<$OUTPUTDIR/logs/list_dl0.txt))\n", + "SAMPLE_LIST=($(<$OUTPUTDIR/logs/list_cal.txt))\n", "SAMPLE=${SAMPLE_LIST[${SLURM_ARRAY_TASK_ID}]}\n\n", "export LOG=$OUTPUTDIR/logs/real_0_1_task_${SLURM_ARRAY_JOB_ID}_${SLURM_ARRAY_TASK_ID}.log\n", f"conda run -n {env_name} magic_calib_to_dl1 --input-file $SAMPLE --output-dir $OUTPUTDIR --config-file {target_dir}/v{__version__}/{source}/config_DL0_to_DL1.yaml >$LOG 2>&1\n", @@ -301,7 +288,7 @@ def lists_and_bash_gen_MAGIC( + rc ) with open( - f"{source}_MAGIC-" + "I" * magic + f"_dl0_to_dl1_run_{i[1]}.sh", + f"{source}_MAGIC-" + "I" * magic + f"_cal_to_dl1_run_{i[1]}.sh", "w", ) as f: f.writelines(lines) @@ -442,7 +429,7 @@ def main(): noise_value = [nsb, noisebright, biasdim] # TODO: fix here above - print("*** Converting DL0 into DL1 data ***") + print("*** Converting Calibrated into DL1 data ***") print(f"Process name: {source_name}") print( f"To check the jobs submitted to the cluster, type: squeue -n {source_name}" From cde8a21bcbcccea2824e793c23697b6db03acbd9 Mon Sep 17 00:00:00 2001 From: Julian Sitarek Date: Wed, 29 May 2024 09:56:55 +0000 Subject: [PATCH 113/236] added list_cal.txt to the list of input files (since it is now used in setting up script) --- .../scripts/lst1_magic/semi_automatic_scripts/job_accounting.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/job_accounting.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/job_accounting.py index deaa97a7..9f779329 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/job_accounting.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/job_accounting.py @@ -116,7 +116,7 @@ def main(): for dir in dirs: print(dir) list_dl0 = "" - ins = ["list_dl0.txt", "list_LST.txt", "list_coin.txt"] + ins = ["list_dl0.txt", "list_LST.txt", "list_coin.txt", "list_cal.txt"] for file in ins: if os.path.exists(f"{dir}/logs/{file}"): list_dl0 = f"{dir}/logs/{file}" From f0fb1c3b49d0ad0bfae7c25180b8b9d0681d5500 Mon Sep 17 00:00:00 2001 From: Elisa-Visentin Date: Thu, 30 May 2024 13:24:10 +0000 Subject: [PATCH 114/236] fix (check on source names) --- .../semi_automatic_scripts/list_from_h5.py | 20 ++++++++++--------- .../setting_up_config_and_dir.py | 14 ++++++------- 2 files changed, 18 insertions(+), 16 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/list_from_h5.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/list_from_h5.py index 1103d868..0e702835 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/list_from_h5.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/list_from_h5.py @@ -60,7 +60,7 @@ def magic_date(df): return df -def clear_files(source_in, source_out, df): +def clear_files(source_in, source_out, df_LST, df_MAGIC1, df_MAGIC2): """ This function deletes any file named XXXX_LST_runs.txt and XXXX_MAGIC_runs.txt from the working directory. @@ -76,8 +76,8 @@ def clear_files(source_in, source_out, df): """ source_list = [] - if source_in is None: - source_list = np.unique(df["source"]) + if source_in is None: + source_list = np.intersect1d(np.intersect1d(np.unique(df_LST["source"]),np.unique(df_MAGIC1["Source"])),np.unique(df_MAGIC2["Source"])) else: source_list.append(source_out) @@ -256,12 +256,6 @@ def main(): df_LST = df_LST.reset_index() df_LST = df_LST.drop("index", axis=1) - clear_files(source_in, source_out, df_LST) - - list_run(source_in, source_out, df_LST, skip_LST, skip_MAGIC, True) - list_date_LST = np.unique(df_LST["date_LST"]) - list_date_LST_low = [sub.replace("-", "_") for sub in list_date_LST] - df_MAGIC1 = pd.read_hdf( "/fefs/aswg/workspace/joanna.wojtowicz/data/Common_MAGIC_LST1_data_MAGIC_RUNS.h5", key="MAGIC1/runs_M1", @@ -270,9 +264,17 @@ def main(): "/fefs/aswg/workspace/joanna.wojtowicz/data/Common_MAGIC_LST1_data_MAGIC_RUNS.h5", key="MAGIC2/runs_M2", ) + list_date_LST = np.unique(df_LST["date_LST"]) + list_date_LST_low = [sub.replace("-", "_") for sub in list_date_LST] + + df_MAGIC1 = df_MAGIC1[df_MAGIC1["Date (LST convention)"].isin(list_date_LST_low)] df_MAGIC2 = df_MAGIC2[df_MAGIC2["Date (LST convention)"].isin(list_date_LST_low)] + clear_files(source_in, source_out, df_LST, df_MAGIC1, df_MAGIC2) + + list_run(source_in, source_out, df_LST, skip_LST, skip_MAGIC, True) + df_MAGIC2 = magic_date(df_MAGIC2) df_MAGIC1 = magic_date(df_MAGIC1) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py index a1cdd083..6cf64d30 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py @@ -216,7 +216,7 @@ def lists_and_bash_gen_MAGIC( ): """ - Below we create a bash script that links the the MAGIC data paths to each subdirectory. + Below we create a bash script that links the MAGIC data paths to each subdirectory. Parameters ---------- @@ -319,7 +319,7 @@ def directories_generator( """ if NSB_match: - os.makedirs(f"{target_dir}/v{__version__}/{source_name}/DL1") + os.makedirs(f"{target_dir}/v{__version__}/{source_name}/DL1", exist_ok=True) dl1_dir = str(f"{target_dir}/v{__version__}/{source_name}/DL1") else: dl1_dir = str(f"{target_dir}/v{__version__}/{source_name}/DL1/Observations") @@ -332,19 +332,19 @@ def directories_generator( "MC/helium", ] if not os.path.exists(f"{target_dir}/v{__version__}/{source_name}"): - os.makedirs(f"{target_dir}/v{__version__}/{source_name}/logs") + os.makedirs(f"{target_dir}/v{__version__}/{source_name}/logs", exist_ok=True) for dir in dir_list: - os.makedirs(f"{target_dir}/v{__version__}/{source_name}/DL1/{dir}/logs") + os.makedirs(f"{target_dir}/v{__version__}/{source_name}/DL1/{dir}/logs", exist_ok=True) else: overwrite = input( f'MC&data directory for {target_dir.split("/")[-1]} already exists. Would you like to overwrite it? [only "y" or "n"]: ' ) if overwrite == "y": os.system(f"rm -r {target_dir}/v{__version__}/{source_name}") - os.makedirs(f"{target_dir}/v{__version__}/{source_name}/logs") + os.makedirs(f"{target_dir}/v{__version__}/{source_name}/logs", exist_ok=True) for dir in dir_list: os.makedirs( - f"{target_dir}/v{__version__}/{source_name}/DL1/{dir}/logs" + f"{target_dir}/v{__version__}/{source_name}/DL1/{dir}/logs", exist_ok=True ) else: print("Directory not modified.") @@ -355,7 +355,7 @@ def directories_generator( for i in MAGIC_runs: for magic in [1, 2]: if telescope_ids[magic - 3] > 0: - os.makedirs(f"{dl1_dir}/M{magic}/{i[0]}/{i[1]}/logs") + os.makedirs(f"{dl1_dir}/M{magic}/{i[0]}/{i[1]}/logs", exist_ok=True) def main(): From fb862b7facbb319610cfcaf98e9836b2e846318c Mon Sep 17 00:00:00 2001 From: Elisa-Visentin <121040436+Elisa-Visentin@users.noreply.github.com> Date: Fri, 31 May 2024 08:34:02 +0200 Subject: [PATCH 115/236] Update README.md --- magicctapipe/scripts/lst1_magic/README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/magicctapipe/scripts/lst1_magic/README.md b/magicctapipe/scripts/lst1_magic/README.md index a8096044..d3194925 100644 --- a/magicctapipe/scripts/lst1_magic/README.md +++ b/magicctapipe/scripts/lst1_magic/README.md @@ -93,6 +93,8 @@ general: ``` +WARNING: the `LST_version` parameter must match the `processed_lstchain_file` version in the LST database (i.e., generally the last available and processable version of a run), which is the one used to evaluate the NSB level + Now that the configuration file is ready, let's create a list with all the MAGIC+LST1 runs for the time window (or list of nights) defined on the config_general.yaml file: > $ list_from_h5 -c config_general.yaml From 0e2bce396cb5e86a9570e36a28eb31e80c5baa3a Mon Sep 17 00:00:00 2001 From: Elisa-Visentin <121040436+Elisa-Visentin@users.noreply.github.com> Date: Fri, 31 May 2024 08:34:27 +0200 Subject: [PATCH 116/236] Update config_general.yaml --- .../lst1_magic/semi_automatic_scripts/config_general.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/config_general.yaml b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/config_general.yaml index 864a9c7f..51a52c12 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/config_general.yaml +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/config_general.yaml @@ -29,7 +29,7 @@ data_selection: general: SimTel_version: "v1.4" - LST_version : "v0.10" + LST_version : "v0.10" # check the `processed_lstchain_file` version in the LST database! LST_tailcut : "tailcut84" focal_length : "effective" simtel_nsb : "/fefs/aswg/data/mc/DL0/LSTProd2/TestDataset/sim_telarray/node_theta_14.984_az_355.158_/output_v1.4/simtel_corsika_theta_14.984_az_355.158_run10.simtel.gz" # simtel file (DL0) to evaluate NSB From 97639c0d2942d8ad6dfbdbdab673d887f007578a Mon Sep 17 00:00:00 2001 From: Elisa-Visentin Date: Fri, 31 May 2024 07:06:11 +0000 Subject: [PATCH 117/236] cluster type --- .../coincident_events.py | 129 +++++----- .../database_production/nsb_level.py | 38 +-- .../semi_automatic_scripts/merging_runs.py | 150 +++++------ .../setting_up_config_and_dir.py | 232 +++++++++--------- .../semi_automatic_scripts/stereo_events.py | 19 +- 5 files changed, 288 insertions(+), 280 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py index 1d7bdede..6fa8fea7 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py @@ -118,73 +118,72 @@ def linking_bash_lst( MAGIC_DL1_dir = f"{target_dir}/v{__version__}/{source_name}/DL1/Observations/" dates = [os.path.basename(x) for x in glob.glob(f"{MAGIC_DL1_dir}/Merged/Merged_*")] - if cluster == 'SLURM': - for d in dates: - Y_M, M_M, D_M = [int(x) for x in d.split("_")[1:]] - - day_MAGIC = dtdt(Y_M, M_M, D_M) - - delta = timedelta(days=1) - for i in LST_runs: - Y_L, M_L, D_L = [int(x) for x in i[0].split("_")] - - day_LST = dtdt(int(Y_L), int(M_L), int(D_L)) - if day_MAGIC == day_LST + delta: - - lstObsDir = i[0].replace("_", "") - inputdir = ( - f"/fefs/aswg/data/real/DL1/{lstObsDir}/{LST_version}/tailcut84" - ) - - outputdir = f"{coincidence_DL1_dir}/DL1Coincident/{lstObsDir}" - os.makedirs(f"{outputdir}/logs", exist_ok=True) - - list_of_subruns = np.sort(glob.glob(f"{inputdir}/dl1*Run*{i[1]}*.*.h5")) - - with open(f"{outputdir}/logs/list_LST.txt", "a+") as LSTdataPathFile: - for subrun in list_of_subruns: - LSTdataPathFile.write(f"{subrun}\n") - - if not os.path.exists(f"{outputdir}/logs/list_LST.txt"): - continue - with open(f"{outputdir}/logs/list_LST.txt", "r") as f: - process_size = len(f.readlines()) - 1 - - if process_size < 0: - continue - slurm = slurm_lines( - queue="short", - job_name=f"{source_name}_coincidence", - array=process_size, - mem="8g", - out_name=f"{outputdir}/logs/slurm-%x.%A_%a", - ) - rc = rc_lines( - store="$SAMPLE ${SLURM_ARRAY_JOB_ID} ${SLURM_ARRAY_TASK_ID}", - out="$OUTPUTDIR/logs/list", - ) - - lines = ( - slurm - + [ - f"export INM={MAGIC_DL1_dir}/Merged/{d}\n", - f"export OUTPUTDIR={outputdir}\n", - "SAMPLE_LIST=($(<$OUTPUTDIR/logs/list_LST.txt))\n", - "SAMPLE=${SAMPLE_LIST[${SLURM_ARRAY_TASK_ID}]}\n", - "export LOG=$OUTPUTDIR/logs/coincidence_${SLURM_ARRAY_JOB_ID}_${SLURM_ARRAY_TASK_ID}.log\n", - f"conda run -n {env_name} lst1_magic_event_coincidence --input-file-lst $SAMPLE --input-dir-magic $INM --output-dir $OUTPUTDIR --config-file {target_dir}/v{__version__}/{source_name}/config_coincidence.yaml >$LOG 2>&1\n", - ] - + rc - ) - with open( - f"{source_name}_LST_coincident_{outputdir.split('/')[-1]}.sh", - "w", - ) as f: - f.writelines(lines) - else: + if cluster != 'SLURM': logger.warning('Automatic processing not implemented for the cluster indicated in the config file') return - + for d in dates: + Y_M, M_M, D_M = [int(x) for x in d.split("_")[1:]] + + day_MAGIC = dtdt(Y_M, M_M, D_M) + + delta = timedelta(days=1) + for i in LST_runs: + Y_L, M_L, D_L = [int(x) for x in i[0].split("_")] + + day_LST = dtdt(int(Y_L), int(M_L), int(D_L)) + if day_MAGIC == day_LST + delta: + + lstObsDir = i[0].replace("_", "") + inputdir = ( + f"/fefs/aswg/data/real/DL1/{lstObsDir}/{LST_version}/tailcut84" + ) + + outputdir = f"{coincidence_DL1_dir}/DL1Coincident/{lstObsDir}" + os.makedirs(f"{outputdir}/logs", exist_ok=True) + + list_of_subruns = np.sort(glob.glob(f"{inputdir}/dl1*Run*{i[1]}*.*.h5")) + + with open(f"{outputdir}/logs/list_LST.txt", "a+") as LSTdataPathFile: + for subrun in list_of_subruns: + LSTdataPathFile.write(f"{subrun}\n") + + if not os.path.exists(f"{outputdir}/logs/list_LST.txt"): + continue + with open(f"{outputdir}/logs/list_LST.txt", "r") as f: + process_size = len(f.readlines()) - 1 + + if process_size < 0: + continue + slurm = slurm_lines( + queue="short", + job_name=f"{source_name}_coincidence", + array=process_size, + mem="8g", + out_name=f"{outputdir}/logs/slurm-%x.%A_%a", + ) + rc = rc_lines( + store="$SAMPLE ${SLURM_ARRAY_JOB_ID} ${SLURM_ARRAY_TASK_ID}", + out="$OUTPUTDIR/logs/list", + ) + + lines = ( + slurm + + [ + f"export INM={MAGIC_DL1_dir}/Merged/{d}\n", + f"export OUTPUTDIR={outputdir}\n", + "SAMPLE_LIST=($(<$OUTPUTDIR/logs/list_LST.txt))\n", + "SAMPLE=${SAMPLE_LIST[${SLURM_ARRAY_TASK_ID}]}\n", + "export LOG=$OUTPUTDIR/logs/coincidence_${SLURM_ARRAY_JOB_ID}_${SLURM_ARRAY_TASK_ID}.log\n", + f"conda run -n {env_name} lst1_magic_event_coincidence --input-file-lst $SAMPLE --input-dir-magic $INM --output-dir $OUTPUTDIR --config-file {target_dir}/v{__version__}/{source_name}/config_coincidence.yaml >$LOG 2>&1\n", + ] + + rc + ) + with open( + f"{source_name}_LST_coincident_{outputdir.split('/')[-1]}.sh", + "w", + ) as f: + f.writelines(lines) + def main(): """ diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/nsb_level.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/nsb_level.py index 590f6205..e061cafd 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/nsb_level.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/nsb_level.py @@ -15,6 +15,10 @@ import yaml from .lstchain_version import lstchain_versions +from magicctapipe.scripts.lst1_magic.semi_automatic_scripts.clusters import ( + slurm_lines, +) + __all__ = ["bash_scripts"] @@ -39,26 +43,26 @@ def bash_scripts(run, date, config, env_name, cluster): env_name : str Name of the environment """ - if cluster == 'SLURM': - lines = [ - "#!/bin/sh\n\n", - "#SBATCH -p long\n", - "#SBATCH -J nsb\n", - "#SBATCH -n 1\n\n", - f"#SBATCH --output=slurm-nsb_{run}-%x.%j.out\n" - f"#SBATCH --error=slurm-nsb_{run}-%x.%j.err\n" - "ulimit -l unlimited\n", - "ulimit -s unlimited\n", - "ulimit -a\n\n", - f"conda run -n {env_name} LSTnsb -c {config} -i {run} -d {date} > nsblog_{date}_{run}_" - + "${SLURM_JOB_ID}.log 2>&1 \n\n", - ] - with open(f"nsb_{date}_run_{run}.sh", "w") as f: - f.writelines(lines) - else: + if cluster != 'SLURM': logger.warning('Automatic processing not implemented for the cluster indicated in the config file') return + slurm = slurm_lines( + queue="long", + job_name="nsb", + out_name=f"slurm-nsb_{run}-%x.%j", + ) + lines = ( + slurm + + [ + f"conda run -n {env_name} LSTnsb -c {config} -i {run} -d {date} > nsblog_{date}_{run}_", + "${SLURM_JOB_ID}.log 2>&1 \n\n", + ] + ) + + with open(f"nsb_{date}_run_{run}.sh", "w") as f: + f.writelines(lines) + def main(): """ diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py index 11d694ea..4580b653 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py @@ -148,80 +148,80 @@ def merge(target_dir, identification, MAGIC_runs, env_name, source, NSB_match, c MAGIC_DL1_dir = f"{target_dir}/v{__version__}/{source}/DL1/" if not NSB_match: MAGIC_DL1_dir += "Observations/" - if cluster == 'SLURM': - lines = slurm_lines( - queue="short", - job_name=process_name, - mem="2g", - out_name=f"{MAGIC_DL1_dir}/Merged/logs/slurm-%x.%j", - ) - os.makedirs(f"{MAGIC_DL1_dir}/Merged/logs", exist_ok=True) - - with open(f"{source}_Merge_MAGIC_{identification}.sh", "w") as f: - f.writelines(lines) - if identification == "0_subruns": - for magic in [1, 2]: - for i in MAGIC_runs: - # Here is a difference w.r.t. original code. If only one telescope data are available they will be merged now for this telescope - indir = f"{MAGIC_DL1_dir}/M{magic}/{i[0]}/{i[1]}" - if os.path.exists(f"{indir}"): - outdir = f"{MAGIC_DL1_dir}/Merged/{i[0]}/{i[1]}" - os.makedirs(f"{outdir}/logs", exist_ok=True) - os.system( - f'find {indir} -type f -name "dl1_M{magic}.Run*.h5" -size -3k -delete' - ) - f.write( - f"conda run -n {env_name} merge_hdf_files --input-dir {indir} --output-dir {outdir} >{outdir}/logs/merge_M{magic}_{i[0]}_{i[1]}_${{SLURM_JOB_ID}}.log\n" - ) - rc = rc_lines( - store=f"{indir} ${{SLURM_JOB_ID}}", - out=f"{outdir}/logs/list", - ) - f.writelines(rc) - os.system(f"echo {indir} >> {outdir}/logs/list_dl0.txt") - else: - print(f"ERROR: {indir} does not exist") - - elif identification == "1_M1M2": + if cluster != 'SLURM': + logger.warning('Automatic processing not implemented for the cluster indicated in the config file') + return + lines = slurm_lines( + queue="short", + job_name=process_name, + mem="2g", + out_name=f"{MAGIC_DL1_dir}/Merged/logs/slurm-%x.%j", + ) + os.makedirs(f"{MAGIC_DL1_dir}/Merged/logs", exist_ok=True) + + with open(f"{source}_Merge_MAGIC_{identification}.sh", "w") as f: + f.writelines(lines) + if identification == "0_subruns": + for magic in [1, 2]: for i in MAGIC_runs: - if os.path.exists(f"{MAGIC_DL1_dir}/M1/{i[0]}/{i[1]}") & os.path.exists( - f"{MAGIC_DL1_dir}/M2/{i[0]}/{i[1]}" - ): - indir = f"{MAGIC_DL1_dir}/Merged/{i[0]}/{i[1]}" - outdir = f"{MAGIC_DL1_dir}/Merged/{i[0]}/Merged" + # Here is a difference w.r.t. original code. If only one telescope data are available they will be merged now for this telescope + indir = f"{MAGIC_DL1_dir}/M{magic}/{i[0]}/{i[1]}" + if os.path.exists(f"{indir}"): + outdir = f"{MAGIC_DL1_dir}/Merged/{i[0]}/{i[1]}" os.makedirs(f"{outdir}/logs", exist_ok=True) + os.system( + f'find {indir} -type f -name "dl1_M{magic}.Run*.h5" -size -3k -delete' + ) f.write( - f"conda run -n {env_name} merge_hdf_files --input-dir {indir} --output-dir {outdir} --run-wise >{outdir}/logs/merge_{i[0]}_{i[1]}_${{SLURM_JOB_ID}}.log\n" + f"conda run -n {env_name} merge_hdf_files --input-dir {indir} --output-dir {outdir} >{outdir}/logs/merge_M{magic}_{i[0]}_{i[1]}_${{SLURM_JOB_ID}}.log\n" ) rc = rc_lines( - store=f"{indir} ${{SLURM_JOB_ID}}", out=f"{outdir}/logs/list" + store=f"{indir} ${{SLURM_JOB_ID}}", + out=f"{outdir}/logs/list", ) f.writelines(rc) os.system(f"echo {indir} >> {outdir}/logs/list_dl0.txt") else: - print( - f"ERROR {MAGIC_DL1_dir}/M1/{i[0]}/{i[1]} or {MAGIC_DL1_dir}/M2/{i[0]}/{i[1]} does not exist" - ) - else: - dates = np.unique(MAGIC_runs.T[0]) - for i in dates: - if not os.path.exists(f"{MAGIC_DL1_dir}/Merged/{i}/Merged"): - continue - - indir = f"{MAGIC_DL1_dir}/Merged/{i}/Merged" - outdir = f"{MAGIC_DL1_dir}/Merged/Merged_{i}" + print(f"ERROR: {indir} does not exist") + + elif identification == "1_M1M2": + for i in MAGIC_runs: + if os.path.exists(f"{MAGIC_DL1_dir}/M1/{i[0]}/{i[1]}") & os.path.exists( + f"{MAGIC_DL1_dir}/M2/{i[0]}/{i[1]}" + ): + indir = f"{MAGIC_DL1_dir}/Merged/{i[0]}/{i[1]}" + outdir = f"{MAGIC_DL1_dir}/Merged/{i[0]}/Merged" os.makedirs(f"{outdir}/logs", exist_ok=True) f.write( - f"conda run -n {env_name} merge_hdf_files --input-dir {indir} --output-dir {outdir} >{outdir}/logs/merge_night_{i}_${{SLURM_JOB_ID}}.log\n" + f"conda run -n {env_name} merge_hdf_files --input-dir {indir} --output-dir {outdir} --run-wise >{outdir}/logs/merge_{i[0]}_{i[1]}_${{SLURM_JOB_ID}}.log\n" ) rc = rc_lines( store=f"{indir} ${{SLURM_JOB_ID}}", out=f"{outdir}/logs/list" ) f.writelines(rc) os.system(f"echo {indir} >> {outdir}/logs/list_dl0.txt") - else: - logger.warning('Automatic processing not implemented for the cluster indicated in the config file') - return + else: + print( + f"ERROR {MAGIC_DL1_dir}/M1/{i[0]}/{i[1]} or {MAGIC_DL1_dir}/M2/{i[0]}/{i[1]} does not exist" + ) + else: + dates = np.unique(MAGIC_runs.T[0]) + for i in dates: + if not os.path.exists(f"{MAGIC_DL1_dir}/Merged/{i}/Merged"): + continue + + indir = f"{MAGIC_DL1_dir}/Merged/{i}/Merged" + outdir = f"{MAGIC_DL1_dir}/Merged/Merged_{i}" + os.makedirs(f"{outdir}/logs", exist_ok=True) + f.write( + f"conda run -n {env_name} merge_hdf_files --input-dir {indir} --output-dir {outdir} >{outdir}/logs/merge_night_{i}_${{SLURM_JOB_ID}}.log\n" + ) + rc = rc_lines( + store=f"{indir} ${{SLURM_JOB_ID}}", out=f"{outdir}/logs/list" + ) + f.writelines(rc) + os.system(f"echo {indir} >> {outdir}/logs/list_dl0.txt") + def mergeMC(target_dir, identification, env_name, source_name, cluster): @@ -257,26 +257,26 @@ def mergeMC(target_dir, identification, env_name, source_name, cluster): process_size = len(list_of_nodes) - 1 cleaning(list_of_nodes) # This will delete the (possibly) failed runs. - if cluster == 'SLURM': - with open(f"Merge_MC_{identification}.sh", "w") as f: - slurm = slurm_lines( - queue="short", - array=process_size, - mem="7g", - job_name=process_name, - out_name=f"{MC_DL1_dir}/{identification}/Merged/slurm-%x.%A_%a", - ) - lines_bash_file = slurm + [ - f"SAMPLE_LIST=($(<{MC_DL1_dir}/{identification}/list_of_nodes.txt))\n", - "SAMPLE=${SAMPLE_LIST[${SLURM_ARRAY_TASK_ID}]}\n", - f"export LOG={MC_DL1_dir}/{identification}/Merged" - + "/merged_${SLURM_ARRAY_JOB_ID}_${SLURM_ARRAY_TASK_ID}.log\n", - f"conda run -n {env_name} merge_hdf_files --input-dir $SAMPLE --output-dir {MC_DL1_dir}/{identification}/Merged >$LOG 2>&1\n", - ] - f.writelines(lines_bash_file) - else: + if cluster != 'SLURM': logger.warning('Automatic processing not implemented for the cluster indicated in the config file') return + with open(f"Merge_MC_{identification}.sh", "w") as f: + slurm = slurm_lines( + queue="short", + array=process_size, + mem="7g", + job_name=process_name, + out_name=f"{MC_DL1_dir}/{identification}/Merged/slurm-%x.%A_%a", + ) + lines_bash_file = slurm + [ + f"SAMPLE_LIST=($(<{MC_DL1_dir}/{identification}/list_of_nodes.txt))\n", + "SAMPLE=${SAMPLE_LIST[${SLURM_ARRAY_TASK_ID}]}\n", + f"export LOG={MC_DL1_dir}/{identification}/Merged" + + "/merged_${SLURM_ARRAY_JOB_ID}_${SLURM_ARRAY_TASK_ID}.log\n", + f"conda run -n {env_name} merge_hdf_files --input-dir $SAMPLE --output-dir {MC_DL1_dir}/{identification}/Merged >$LOG 2>&1\n", + ] + f.writelines(lines_bash_file) + def main(): diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py index 6cf64d30..479cbc03 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py @@ -151,64 +151,64 @@ def lists_and_bash_generator( #################################################################################### # bash scripts that link the MC paths to each subdirectory. #################################################################################### - if cluster == 'SLURM': - with open(f"linking_MC_{particle_type}_paths.sh", "w") as f: - slurm = slurm_lines( - queue="short", - job_name=process_name, - out_name=f"{dir1}/DL1/MC/{particle_type}/logs/slurm-linkMC-%x.%j", - ) - lines_of_config_file = slurm + [ - "while read -r -u 3 lineA && read -r -u 4 lineB\n", - "do\n", - f" cd {dir1}/DL1/MC/{particle_type}\n", - " mkdir $lineB\n", - " cd $lineA\n", - " ls -lR *.gz |wc -l\n", - f" mkdir -p {dir1}/DL1/MC/{particle_type}/$lineB/logs/\n", - f" ls *.gz > {dir1}/DL1/MC/{particle_type}/$lineB/logs/list_dl0.txt\n", - ' string=$lineA"/"\n', - f" export file={dir1}/DL1/MC/{particle_type}/$lineB/logs/list_dl0.txt\n\n", - " cat $file | while read line; do echo $string${line}" - + f" >>{dir1}/DL1/MC/{particle_type}/$lineB/logs/list_dl0_ok.txt; done\n\n", - ' echo "folder $lineB and node $lineA"\n', - f'done 3<"{dir1}/logs/list_nodes_{particle_type}_complete.txt" 4<"{dir1}/logs/list_folder_{particle_type}.txt"\n', - "", - ] - f.writelines(lines_of_config_file) - - ################################################################################################################ - # bash script that applies lst1_magic_mc_dl0_to_dl1.py to all MC data files. - ################################################################################################################ - - number_of_nodes = glob.glob(f"{MC_path}/node*") - number_of_nodes = len(number_of_nodes) - 1 - with open(f"linking_MC_{particle_type}_paths_r.sh", "w") as f: - slurm = slurm_lines( - queue="xxl", - job_name=process_name, - array=number_of_nodes, - mem="10g", - out_name=f"{dir1}/DL1/MC/{particle_type}/logs/slurm-%x.%A_%a", - ) - lines_of_config_file = slurm + [ - f"cd {dir1}/DL1/MC/{particle_type}\n\n", - f"export INF={dir1}/logs\n", - f"SAMPLE_LIST=($(<$INF/list_folder_{particle_type}.txt))\n", - "SAMPLE=${SAMPLE_LIST[${SLURM_ARRAY_TASK_ID}]}\n", - "cd $SAMPLE\n\n", - f"export LOG={dir1}/DL1/MC/{particle_type}/logs/simtel_{{$SAMPLE}}_${{SLURM_ARRAY_JOB_ID}}_${{SLURM_ARRAY_TASK_ID}}_all.log\n", - "cat logs/list_dl0_ok.txt | while read line\n", - "do\n", - f" cd {dir1}/../\n", - f" conda run -n {env_name} lst1_magic_mc_dl0_to_dl1 --input-file $line --output-dir {dir1}/DL1/MC/{particle_type}/$SAMPLE --config-file {dir1}/config_DL0_to_DL1.yaml --focal_length_choice {focal_length}>>$LOG 2>&1\n\n", - "done\n", - "", - ] - f.writelines(lines_of_config_file) - else: + if cluster != 'SLURM': logger.warning('Automatic processing not implemented for the cluster indicated in the config file') return + with open(f"linking_MC_{particle_type}_paths.sh", "w") as f: + slurm = slurm_lines( + queue="short", + job_name=process_name, + out_name=f"{dir1}/DL1/MC/{particle_type}/logs/slurm-linkMC-%x.%j", + ) + lines_of_config_file = slurm + [ + "while read -r -u 3 lineA && read -r -u 4 lineB\n", + "do\n", + f" cd {dir1}/DL1/MC/{particle_type}\n", + " mkdir $lineB\n", + " cd $lineA\n", + " ls -lR *.gz |wc -l\n", + f" mkdir -p {dir1}/DL1/MC/{particle_type}/$lineB/logs/\n", + f" ls *.gz > {dir1}/DL1/MC/{particle_type}/$lineB/logs/list_dl0.txt\n", + ' string=$lineA"/"\n', + f" export file={dir1}/DL1/MC/{particle_type}/$lineB/logs/list_dl0.txt\n\n", + " cat $file | while read line; do echo $string${line}" + + f" >>{dir1}/DL1/MC/{particle_type}/$lineB/logs/list_dl0_ok.txt; done\n\n", + ' echo "folder $lineB and node $lineA"\n', + f'done 3<"{dir1}/logs/list_nodes_{particle_type}_complete.txt" 4<"{dir1}/logs/list_folder_{particle_type}.txt"\n', + "", + ] + f.writelines(lines_of_config_file) + + ################################################################################################################ + # bash script that applies lst1_magic_mc_dl0_to_dl1.py to all MC data files. + ################################################################################################################ + + number_of_nodes = glob.glob(f"{MC_path}/node*") + number_of_nodes = len(number_of_nodes) - 1 + with open(f"linking_MC_{particle_type}_paths_r.sh", "w") as f: + slurm = slurm_lines( + queue="xxl", + job_name=process_name, + array=number_of_nodes, + mem="10g", + out_name=f"{dir1}/DL1/MC/{particle_type}/logs/slurm-%x.%A_%a", + ) + lines_of_config_file = slurm + [ + f"cd {dir1}/DL1/MC/{particle_type}\n\n", + f"export INF={dir1}/logs\n", + f"SAMPLE_LIST=($(<$INF/list_folder_{particle_type}.txt))\n", + "SAMPLE=${SAMPLE_LIST[${SLURM_ARRAY_TASK_ID}]}\n", + "cd $SAMPLE\n\n", + f"export LOG={dir1}/DL1/MC/{particle_type}/logs/simtel_{{$SAMPLE}}_${{SLURM_ARRAY_JOB_ID}}_${{SLURM_ARRAY_TASK_ID}}_all.log\n", + "cat logs/list_dl0_ok.txt | while read line\n", + "do\n", + f" cd {dir1}/../\n", + f" conda run -n {env_name} lst1_magic_mc_dl0_to_dl1 --input-file $line --output-dir {dir1}/DL1/MC/{particle_type}/$SAMPLE --config-file {dir1}/config_DL0_to_DL1.yaml --focal_length_choice {focal_length}>>$LOG 2>&1\n\n", + "done\n", + "", + ] + f.writelines(lines_of_config_file) + def lists_and_bash_gen_MAGIC( @@ -233,70 +233,68 @@ def lists_and_bash_gen_MAGIC( NSB_match : bool If real data are matched to pre-processed MCs or not """ - if cluster == 'SLURM': - process_name = source - lines = slurm_lines( - queue="short", - job_name=process_name, - out_name=f"{target_dir}/v{__version__}/{source}/DL1/slurm-linkMAGIC-%x.%j", - ) - - obs_tag = "" if NSB_match else "Observations" - with open(f"{source}_linking_MAGIC_data_paths.sh", "w") as f: - f.writelines(lines) - for i in MAGIC_runs: - for magic in [1, 2]: - # if 1 then magic is second from last, if 2 then last - if telescope_ids[magic - 3] > 0: - lines = [ - f'export IN1=/fefs/onsite/common/MAGIC/data/M{magic}/event/Calibrated/{i[0].split("_")[0]}/{i[0].split("_")[1]}/{i[0].split("_")[2]}\n', - f"export OUT1={target_dir}/v{__version__}/{source}/DL1/{obs_tag}/M{magic}/{i[0]}/{i[1]}/logs \n", - f"ls $IN1/*{i[1][-2:]}.*_Y_*.root > $OUT1/list_cal.txt\n\n", - ] - f.writelines(lines) - - for magic in [1, 2]: - # if 1 then magic is second from last, if 2 then last - if telescope_ids[magic - 3] > 0: - for i in MAGIC_runs: - number_of_nodes = glob.glob( - f'/fefs/onsite/common/MAGIC/data/M{magic}/event/Calibrated/{i[0].split("_")[0]}/{i[0].split("_")[1]}/{i[0].split("_")[2]}/*{i[1]}.*_Y_*.root' - ) - number_of_nodes = len(number_of_nodes) - 1 - if number_of_nodes < 0: - continue - slurm = slurm_lines( - queue="short", # was long for no NSB_match - job_name=process_name, - array=number_of_nodes, - mem="2g", - out_name=f"{target_dir}/v{__version__}/{source}/DL1/{obs_tag}/M{magic}/{i[0]}/{i[1]}/logs/slurm-%x.%A_%a", # without version for no NSB_match - ) - rc = rc_lines( - store="$SAMPLE ${SLURM_ARRAY_JOB_ID} ${SLURM_ARRAY_TASK_ID}", - out="$OUTPUTDIR/logs/list", - ) - lines = ( - slurm - + [ # without version for no NSB_match - f"export OUTPUTDIR={target_dir}/v{__version__}/{source}/DL1/{obs_tag}/M{magic}/{i[0]}/{i[1]}\n", - "SAMPLE_LIST=($(<$OUTPUTDIR/logs/list_cal.txt))\n", - "SAMPLE=${SAMPLE_LIST[${SLURM_ARRAY_TASK_ID}]}\n\n", - "export LOG=$OUTPUTDIR/logs/real_0_1_task_${SLURM_ARRAY_JOB_ID}_${SLURM_ARRAY_TASK_ID}.log\n", - f"conda run -n {env_name} magic_calib_to_dl1 --input-file $SAMPLE --output-dir $OUTPUTDIR --config-file {target_dir}/v{__version__}/{source}/config_DL0_to_DL1.yaml >$LOG 2>&1\n", - ] - + rc - ) - with open( - f"{source}_MAGIC-" + "I" * magic + f"_cal_to_dl1_run_{i[1]}.sh", - "w", - ) as f: - f.writelines(lines) - else: + if cluster != 'SLURM': logger.warning('Automatic processing not implemented for the cluster indicated in the config file') return + process_name = source + lines = slurm_lines( + queue="short", + job_name=process_name, + out_name=f"{target_dir}/v{__version__}/{source}/DL1/slurm-linkMAGIC-%x.%j", + ) - + obs_tag = "" if NSB_match else "Observations" + with open(f"{source}_linking_MAGIC_data_paths.sh", "w") as f: + f.writelines(lines) + for i in MAGIC_runs: + for magic in [1, 2]: + # if 1 then magic is second from last, if 2 then last + if telescope_ids[magic - 3] > 0: + lines = [ + f'export IN1=/fefs/onsite/common/MAGIC/data/M{magic}/event/Calibrated/{i[0].split("_")[0]}/{i[0].split("_")[1]}/{i[0].split("_")[2]}\n', + f"export OUT1={target_dir}/v{__version__}/{source}/DL1/{obs_tag}/M{magic}/{i[0]}/{i[1]}/logs \n", + f"ls $IN1/*{i[1][-2:]}.*_Y_*.root > $OUT1/list_cal.txt\n\n", + ] + f.writelines(lines) + + for magic in [1, 2]: + # if 1 then magic is second from last, if 2 then last + if telescope_ids[magic - 3] > 0: + for i in MAGIC_runs: + number_of_nodes = glob.glob( + f'/fefs/onsite/common/MAGIC/data/M{magic}/event/Calibrated/{i[0].split("_")[0]}/{i[0].split("_")[1]}/{i[0].split("_")[2]}/*{i[1]}.*_Y_*.root' + ) + number_of_nodes = len(number_of_nodes) - 1 + if number_of_nodes < 0: + continue + slurm = slurm_lines( + queue="short", # was long for no NSB_match + job_name=process_name, + array=number_of_nodes, + mem="2g", + out_name=f"{target_dir}/v{__version__}/{source}/DL1/{obs_tag}/M{magic}/{i[0]}/{i[1]}/logs/slurm-%x.%A_%a", # without version for no NSB_match + ) + rc = rc_lines( + store="$SAMPLE ${SLURM_ARRAY_JOB_ID} ${SLURM_ARRAY_TASK_ID}", + out="$OUTPUTDIR/logs/list", + ) + lines = ( + slurm + + [ # without version for no NSB_match + f"export OUTPUTDIR={target_dir}/v{__version__}/{source}/DL1/{obs_tag}/M{magic}/{i[0]}/{i[1]}\n", + "SAMPLE_LIST=($(<$OUTPUTDIR/logs/list_cal.txt))\n", + "SAMPLE=${SAMPLE_LIST[${SLURM_ARRAY_TASK_ID}]}\n\n", + "export LOG=$OUTPUTDIR/logs/real_0_1_task_${SLURM_ARRAY_JOB_ID}_${SLURM_ARRAY_TASK_ID}.log\n", + f"conda run -n {env_name} magic_calib_to_dl1 --input-file $SAMPLE --output-dir $OUTPUTDIR --config-file {target_dir}/v{__version__}/{source}/config_DL0_to_DL1.yaml >$LOG 2>&1\n", + ] + + rc + ) + with open( + f"{source}_MAGIC-" + "I" * magic + f"_cal_to_dl1_run_{i[1]}.sh", + "w", + ) as f: + f.writelines(lines) + def directories_generator( target_dir, telescope_ids, MAGIC_runs, NSB_match, source_name ): diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py index 483c644f..4be5071c 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py @@ -80,7 +80,7 @@ def configfile_stereo(ids, target_dir, source_name, NSB_match): yaml.dump(conf, f, default_flow_style=False) -def bash_stereo(target_dir, source, env_name, NSB_match): +def bash_stereo(target_dir, source, env_name, NSB_match, cluster): """ This function generates the bashscript for running the stereo analysis. @@ -105,7 +105,9 @@ def bash_stereo(target_dir, source, env_name, NSB_match): coincidence_DL1_dir = f"{target_dir}/v{__version__}/{source}/DL1/Observations" listOfNightsLST = np.sort(glob.glob(f"{coincidence_DL1_dir}/DL1Coincident/*")) - + if cluster != 'SLURM': + logger.warning('Automatic processing not implemented for the cluster indicated in the config file') + return for nightLST in listOfNightsLST: stereoDir = f"{coincidence_DL1_dir}/DL1Stereo/{nightLST.split('/')[-1]}" os.makedirs(f"{stereoDir}/logs", exist_ok=True) @@ -149,7 +151,7 @@ def bash_stereo(target_dir, source, env_name, NSB_match): f.writelines(lines) -def bash_stereoMC(target_dir, identification, env_name, source): +def bash_stereoMC(target_dir, identification, env_name, source, cluster): """ This function generates the bashscript for running the stereo analysis. @@ -176,7 +178,9 @@ def bash_stereoMC(target_dir, identification, env_name, source): ) # generating a list with the DL1 coincident data files. with open(f"{inputdir}/list_coin.txt", "r") as f: process_size = len(f.readlines()) - 1 - + if cluster != 'SLURM': + logger.warning('Automatic processing not implemented for the cluster indicated in the config file') + return with open(f"StereoEvents_MC_{identification}.sh", "w") as f: slurm = slurm_lines( queue="xxl", @@ -237,6 +241,9 @@ def main(): source_in = config["data_selection"]["source_name_database"] source = config["data_selection"]["source_name_output"] + cluster = config["general"]["cluster"] + + if source_in is None: source_list = joblib.load("list_sources.dat") else: @@ -254,7 +261,7 @@ def main(): ): print("***** Generating the bashscript for MCs...") for part in ["gammadiffuse", "gammas", "protons", "protons_test"]: - bash_stereoMC(target_dir, part, env_name, source_name) + bash_stereoMC(target_dir, part, env_name, source_name, cluster) list_of_stereo_scripts = np.sort(glob.glob("StereoEvents_MC_*.sh")) @@ -269,7 +276,7 @@ def main(): # Below we run the analysis on the real data print("***** Generating the bashscript...") - bash_stereo(target_dir, source_name, env_name, NSB_match) + bash_stereo(target_dir, source_name, env_name, NSB_match, cluster) print("***** Submitting processess to the cluster...") print(f"Process name: {source_name}_stereo") From 3506ce5432f0018010fcd617c835bcab2f6c1d12 Mon Sep 17 00:00:00 2001 From: Elisa-Visentin Date: Fri, 31 May 2024 07:13:02 +0000 Subject: [PATCH 118/236] minor fixes --- .../coincident_events.py | 13 +++-- .../database_production/LSTnsb.py | 5 +- .../database_production/nsb_level.py | 37 ++++++------- .../semi_automatic_scripts/list_from_h5.py | 24 +++++---- .../semi_automatic_scripts/merging_runs.py | 53 ++++++++++++++----- .../setting_up_config_and_dir.py | 38 +++++++++---- .../semi_automatic_scripts/stereo_events.py | 21 +++++--- 7 files changed, 123 insertions(+), 68 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py index 6fa8fea7..9542e0cd 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py @@ -105,6 +105,8 @@ def linking_bash_lst( Name of the conda environment NSB_match : bool If real data are matched to pre-processed MCs or not + cluster : str + Cluster system """ if NSB_match: @@ -118,8 +120,10 @@ def linking_bash_lst( MAGIC_DL1_dir = f"{target_dir}/v{__version__}/{source_name}/DL1/Observations/" dates = [os.path.basename(x) for x in glob.glob(f"{MAGIC_DL1_dir}/Merged/Merged_*")] - if cluster != 'SLURM': - logger.warning('Automatic processing not implemented for the cluster indicated in the config file') + if cluster != "SLURM": + logger.warning( + "Automatic processing not implemented for the cluster indicated in the config file" + ) return for d in dates: Y_M, M_M, D_M = [int(x) for x in d.split("_")[1:]] @@ -183,7 +187,8 @@ def linking_bash_lst( "w", ) as f: f.writelines(lines) - + + def main(): """ @@ -243,7 +248,7 @@ def main(): LST_version, env_name, NSB_match, - cluster + cluster, ) # linking the data paths to current working directory print("***** Submitting processess to the cluster...") diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/LSTnsb.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/LSTnsb.py index e7c23e8d..984eafb2 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/LSTnsb.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/LSTnsb.py @@ -143,7 +143,6 @@ def main(): logger.info(f"Run {run_number} already processed") return - # date_lst = date.split("_")[0] + date.split("_")[1] + date.split("_")[2] inputdir = f"/fefs/aswg/data/real/DL1/{date}/{lst_version}/{lst_tailcut}" run_list = np.sort(glob.glob(f"{inputdir}/dl1*Run*{run_number}.*.h5")) @@ -155,7 +154,7 @@ def main(): return median_NSB = np.median(noise) logger.info(f"Run n. {run_number}, nsb median {median_NSB}") - + for j in range(0, len(nsb_list)): if (median_NSB < nsb_limit[j + 1]) & (median_NSB > nsb_limit[j]): with open(f"nsb_LST_{nsb_list[j]}_{run_number}.txt", "a+") as f: @@ -164,8 +163,6 @@ def main(): with open(f"nsb_LST_high_{run_number}.txt", "a+") as f: f.write(f"{date},{run_number},{median_NSB}\n") - - if __name__ == "__main__": main() diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/nsb_level.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/nsb_level.py index e061cafd..59b812ae 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/nsb_level.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/nsb_level.py @@ -14,11 +14,9 @@ import pandas as pd import yaml -from .lstchain_version import lstchain_versions -from magicctapipe.scripts.lst1_magic.semi_automatic_scripts.clusters import ( - slurm_lines, -) +from magicctapipe.scripts.lst1_magic.semi_automatic_scripts.clusters import slurm_lines +from .lstchain_version import lstchain_versions __all__ = ["bash_scripts"] @@ -39,30 +37,30 @@ def bash_scripts(run, date, config, env_name, cluster): LST date config : str Name of the configuration file - env_name : str Name of the environment + cluster : str + Cluster system """ - if cluster != 'SLURM': - logger.warning('Automatic processing not implemented for the cluster indicated in the config file') + if cluster != "SLURM": + logger.warning( + "Automatic processing not implemented for the cluster indicated in the config file" + ) return slurm = slurm_lines( queue="long", job_name="nsb", out_name=f"slurm-nsb_{run}-%x.%j", ) - lines = ( - slurm - + [ - f"conda run -n {env_name} LSTnsb -c {config} -i {run} -d {date} > nsblog_{date}_{run}_", - "${SLURM_JOB_ID}.log 2>&1 \n\n", - ] - ) - + lines = slurm + [ + f"conda run -n {env_name} LSTnsb -c {config} -i {run} -d {date} > nsblog_{date}_{run}_", + "${SLURM_JOB_ID}.log 2>&1 \n\n", + ] + with open(f"nsb_{date}_run_{run}.sh", "w") as f: f.writelines(lines) - + def main(): """ @@ -99,9 +97,8 @@ def main(): config = yaml.safe_load(f) env_name = config["general"]["env_name"] - - cluster = config["general"]["cluster"] + cluster = config["general"]["cluster"] df_LST = pd.read_hdf( "/fefs/aswg/workspace/elisa.visentin/auto_MCP_PR/observations_LST.h5", @@ -165,9 +162,7 @@ def main(): launch_jobs = "" for n, run in enumerate(list_of_bash_scripts): - launch_jobs += ( - " && " if n > 0 else "" - ) + f"nsb{n}=$(sbatch --parsable {run})" + launch_jobs += (" && " if n > 0 else "") + f"nsb{n}=$(sbatch --parsable {run})" os.system(launch_jobs) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/list_from_h5.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/list_from_h5.py index 0e702835..3dadc304 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/list_from_h5.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/list_from_h5.py @@ -71,13 +71,20 @@ def clear_files(source_in, source_out, df_LST, df_MAGIC1, df_MAGIC2): Target name in the database. If None, it stands for all the sources observed in a pre-set time interval. source_out : str Name tag for the target. Used only if source_in is not None. - df : :class:`pandas.DataFrame` - Dataframe of the joint MAGIC+LST-1 observations based on the .h5 table. + df_LST : :class:`pandas.DataFrame` + LST-1 dataframe of the joint MAGIC+LST-1 observations. + df_MAGIC1 : :class:`pandas.DataFrame` + MAGIC-1 dataframe of the joint MAGIC+LST-1 observations. + df_MAGIC2 : :class:`pandas.DataFrame` + MAGIC-2 dataframe of the joint MAGIC+LST-1 observations. """ source_list = [] - if source_in is None: - source_list = np.intersect1d(np.intersect1d(np.unique(df_LST["source"]),np.unique(df_MAGIC1["Source"])),np.unique(df_MAGIC2["Source"])) + if source_in is None: + source_list = np.intersect1d( + np.intersect1d(np.unique(df_LST["source"]), np.unique(df_MAGIC1["Source"])), + np.unique(df_MAGIC2["Source"]), + ) else: source_list.append(source_out) @@ -134,10 +141,10 @@ def list_run(source_in, source_out, df, skip_LST, skip_MAGIC, is_LST, M1_run_lis run_listed = [] if source_in is None: df_source = df[df["source"] == source_name] - print('Source: ', source_name) + print("Source: ", source_name) else: df_source = df[df["source"] == source_in] - print('Source: ', source_in) + print("Source: ", source_in) if is_LST: print("Finding LST runs...") @@ -267,14 +274,11 @@ def main(): list_date_LST = np.unique(df_LST["date_LST"]) list_date_LST_low = [sub.replace("-", "_") for sub in list_date_LST] - - df_MAGIC1 = df_MAGIC1[df_MAGIC1["Date (LST convention)"].isin(list_date_LST_low)] df_MAGIC2 = df_MAGIC2[df_MAGIC2["Date (LST convention)"].isin(list_date_LST_low)] clear_files(source_in, source_out, df_LST, df_MAGIC1, df_MAGIC2) - + list_run(source_in, source_out, df_LST, skip_LST, skip_MAGIC, True) - df_MAGIC2 = magic_date(df_MAGIC2) df_MAGIC1 = magic_date(df_MAGIC1) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py index 4580b653..9740834d 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py @@ -60,7 +60,7 @@ def cleaning(list_of_nodes): Parameters ---------- list_of_nodes : array of str - List of nodes where the function will look for failed runs. + List of nodes where the function will look for failed runs. """ cwd = os.getcwd() @@ -97,9 +97,12 @@ def split_train_test(target_dir, train_fraction, source_name): range(len(list_of_dir)) ): # tqdm allows us to print a progessbar in the terminal - os.makedirs(f"{proton_dir}/train/{list_of_dir[directory].split('/')[-2]}", exist_ok = True) os.makedirs( - f'{proton_dir}/../protons_test/{list_of_dir[directory].split("/")[-2]}', exist_ok = True + f"{proton_dir}/train/{list_of_dir[directory].split('/')[-2]}", exist_ok=True + ) + os.makedirs( + f'{proton_dir}/../protons_test/{list_of_dir[directory].split("/")[-2]}', + exist_ok=True, ) list_of_runs = np.sort( glob.glob(f'{proton_dir}/{list_of_dir[directory].split("/")[-2]}/*.h5') @@ -141,6 +144,8 @@ def merge(target_dir, identification, MAGIC_runs, env_name, source, NSB_match, c Target name NSB_match : bool If real data are matched to pre-processed MCs or not + cluster : str + Cluster system """ process_name = f"merging_{source}" @@ -148,8 +153,10 @@ def merge(target_dir, identification, MAGIC_runs, env_name, source, NSB_match, c MAGIC_DL1_dir = f"{target_dir}/v{__version__}/{source}/DL1/" if not NSB_match: MAGIC_DL1_dir += "Observations/" - if cluster != 'SLURM': - logger.warning('Automatic processing not implemented for the cluster indicated in the config file') + if cluster != "SLURM": + logger.warning( + "Automatic processing not implemented for the cluster indicated in the config file" + ) return lines = slurm_lines( queue="short", @@ -221,7 +228,7 @@ def merge(target_dir, identification, MAGIC_runs, env_name, source, NSB_match, c ) f.writelines(rc) os.system(f"echo {indir} >> {outdir}/logs/list_dl0.txt") - + def mergeMC(target_dir, identification, env_name, source_name, cluster): @@ -238,6 +245,8 @@ def mergeMC(target_dir, identification, env_name, source_name, cluster): Name of the environment source_name : str Name of the target source + cluster : str + Cluster system """ process_name = f"merging_{source_name}" @@ -257,8 +266,10 @@ def mergeMC(target_dir, identification, env_name, source_name, cluster): process_size = len(list_of_nodes) - 1 cleaning(list_of_nodes) # This will delete the (possibly) failed runs. - if cluster != 'SLURM': - logger.warning('Automatic processing not implemented for the cluster indicated in the config file') + if cluster != "SLURM": + logger.warning( + "Automatic processing not implemented for the cluster indicated in the config file" + ) return with open(f"Merge_MC_{identification}.sh", "w") as f: slurm = slurm_lines( @@ -276,7 +287,7 @@ def mergeMC(target_dir, identification, env_name, source_name, cluster): f"conda run -n {env_name} merge_hdf_files --input-dir $SAMPLE --output-dir {MC_DL1_dir}/{identification}/Merged >$LOG 2>&1\n", ] f.writelines(lines_bash_file) - + def main(): @@ -378,13 +389,31 @@ def main(): ): print("***** Generating merge_MAGIC bashscripts...") merge( - target_dir, "0_subruns", MAGIC_runs, env_name, source_name, NSB_match, cluster + target_dir, + "0_subruns", + MAGIC_runs, + env_name, + source_name, + NSB_match, + cluster, ) # generating the bash script to merge the subruns merge( - target_dir, "1_M1M2", MAGIC_runs, env_name, source_name, NSB_match, cluster + target_dir, + "1_M1M2", + MAGIC_runs, + env_name, + source_name, + NSB_match, + cluster, ) # generating the bash script to merge the M1 and M2 runs merge( - target_dir, "2_nights", MAGIC_runs, env_name, source_name, NSB_match, cluster + target_dir, + "2_nights", + MAGIC_runs, + env_name, + source_name, + NSB_match, + cluster, ) # generating the bash script to merge all runs per night print("***** Running merge_hdf_files.py on the MAGIC data files...") diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py index 479cbc03..c57bb91d 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py @@ -127,6 +127,8 @@ def lists_and_bash_generator( Name of the environment source_name : str Name of the target source + cluster : str + Cluster system """ if MC_path == "": @@ -151,8 +153,10 @@ def lists_and_bash_generator( #################################################################################### # bash scripts that link the MC paths to each subdirectory. #################################################################################### - if cluster != 'SLURM': - logger.warning('Automatic processing not implemented for the cluster indicated in the config file') + if cluster != "SLURM": + logger.warning( + "Automatic processing not implemented for the cluster indicated in the config file" + ) return with open(f"linking_MC_{particle_type}_paths.sh", "w") as f: slurm = slurm_lines( @@ -208,7 +212,6 @@ def lists_and_bash_generator( "", ] f.writelines(lines_of_config_file) - def lists_and_bash_gen_MAGIC( @@ -232,9 +235,13 @@ def lists_and_bash_gen_MAGIC( Name of the environment NSB_match : bool If real data are matched to pre-processed MCs or not + cluster : str + Cluster system """ - if cluster != 'SLURM': - logger.warning('Automatic processing not implemented for the cluster indicated in the config file') + if cluster != "SLURM": + logger.warning( + "Automatic processing not implemented for the cluster indicated in the config file" + ) return process_name = source lines = slurm_lines( @@ -294,7 +301,8 @@ def lists_and_bash_gen_MAGIC( "w", ) as f: f.writelines(lines) - + + def directories_generator( target_dir, telescope_ids, MAGIC_runs, NSB_match, source_name ): @@ -330,19 +338,27 @@ def directories_generator( "MC/helium", ] if not os.path.exists(f"{target_dir}/v{__version__}/{source_name}"): - os.makedirs(f"{target_dir}/v{__version__}/{source_name}/logs", exist_ok=True) + os.makedirs( + f"{target_dir}/v{__version__}/{source_name}/logs", exist_ok=True + ) for dir in dir_list: - os.makedirs(f"{target_dir}/v{__version__}/{source_name}/DL1/{dir}/logs", exist_ok=True) + os.makedirs( + f"{target_dir}/v{__version__}/{source_name}/DL1/{dir}/logs", + exist_ok=True, + ) else: overwrite = input( f'MC&data directory for {target_dir.split("/")[-1]} already exists. Would you like to overwrite it? [only "y" or "n"]: ' ) if overwrite == "y": os.system(f"rm -r {target_dir}/v{__version__}/{source_name}") - os.makedirs(f"{target_dir}/v{__version__}/{source_name}/logs", exist_ok=True) + os.makedirs( + f"{target_dir}/v{__version__}/{source_name}/logs", exist_ok=True + ) for dir in dir_list: os.makedirs( - f"{target_dir}/v{__version__}/{source_name}/DL1/{dir}/logs", exist_ok=True + f"{target_dir}/v{__version__}/{source_name}/DL1/{dir}/logs", + exist_ok=True, ) else: print("Directory not modified.") @@ -462,7 +478,7 @@ def main(): focal_length, env_name, source_name, - cluster, + cluster, ) # Here we do the MC DL0 to DL1 conversion: diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py index 4be5071c..f863d933 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py @@ -95,6 +95,8 @@ def bash_stereo(target_dir, source, env_name, NSB_match, cluster): Name of the environment NSB_match : bool If real data are matched to pre-processed MCs or not + cluster : str + Cluster system """ process_name = source @@ -105,8 +107,10 @@ def bash_stereo(target_dir, source, env_name, NSB_match, cluster): coincidence_DL1_dir = f"{target_dir}/v{__version__}/{source}/DL1/Observations" listOfNightsLST = np.sort(glob.glob(f"{coincidence_DL1_dir}/DL1Coincident/*")) - if cluster != 'SLURM': - logger.warning('Automatic processing not implemented for the cluster indicated in the config file') + if cluster != "SLURM": + logger.warning( + "Automatic processing not implemented for the cluster indicated in the config file" + ) return for nightLST in listOfNightsLST: stereoDir = f"{coincidence_DL1_dir}/DL1Stereo/{nightLST.split('/')[-1]}" @@ -166,6 +170,8 @@ def bash_stereoMC(target_dir, identification, env_name, source, cluster): Name of the environment source : str Name of the target source + cluster : str + Cluster system """ process_name = source @@ -178,8 +184,10 @@ def bash_stereoMC(target_dir, identification, env_name, source, cluster): ) # generating a list with the DL1 coincident data files. with open(f"{inputdir}/list_coin.txt", "r") as f: process_size = len(f.readlines()) - 1 - if cluster != 'SLURM': - logger.warning('Automatic processing not implemented for the cluster indicated in the config file') + if cluster != "SLURM": + logger.warning( + "Automatic processing not implemented for the cluster indicated in the config file" + ) return with open(f"StereoEvents_MC_{identification}.sh", "w") as f: slurm = slurm_lines( @@ -243,7 +251,6 @@ def main(): cluster = config["general"]["cluster"] - if source_in is None: source_list = joblib.load("list_sources.dat") else: @@ -269,7 +276,9 @@ def main(): if n == 0: launch_jobs = f"stereo{n}=$(sbatch --parsable {run})" else: - launch_jobs = f"{launch_jobs} && stereo{n}=$(sbatch --parsable {run})" + launch_jobs = ( + f"{launch_jobs} && stereo{n}=$(sbatch --parsable {run})" + ) os.system(launch_jobs) From ccc0e3ad69bc673b2d3d212c65bfc20614121096 Mon Sep 17 00:00:00 2001 From: Elisa-Visentin Date: Fri, 31 May 2024 07:54:22 +0000 Subject: [PATCH 119/236] fixed lstchain config --- .../lstchain_standard_config_modified.json | 289 ------------------ .../database_production/LSTnsb.py | 19 +- .../database_production/nsb_level.py | 26 +- 3 files changed, 31 insertions(+), 303 deletions(-) delete mode 100644 magicctapipe/resources/lstchain_standard_config_modified.json diff --git a/magicctapipe/resources/lstchain_standard_config_modified.json b/magicctapipe/resources/lstchain_standard_config_modified.json deleted file mode 100644 index 7046f91c..00000000 --- a/magicctapipe/resources/lstchain_standard_config_modified.json +++ /dev/null @@ -1,289 +0,0 @@ -{ - "source_config" : { - "EventSource": { - "allowed_tels": [1], - "max_events": null - }, - "LSTEventSource": { - "default_trigger_type": "ucts", - "allowed_tels": [1], - "min_flatfield_adc": 3000, - "min_flatfield_pixel_fraction": 0.8, - "calibrate_flatfields_and_pedestals": false, - "use_flatfield_heuristic": true, - "EventTimeCalculator": { - "dragon_reference_counter": null, - "dragon_reference_time": null - }, - "PointingSource":{ - "drive_report_path": null - }, - "LSTR0Corrections":{ - "calib_scale_high_gain":1.088, - "calib_scale_low_gain":1.004, - "drs4_pedestal_path": null, - "calibration_path": null, - "drs4_time_calibration_path": null - } - } - }, - - "events_filters": { - "intensity": [0, Infinity], - "width": [0, Infinity], - "length": [0, Infinity], - "wl": [0, Infinity], - "r": [0, Infinity], - "leakage_intensity_width_2": [0, Infinity] - }, - "n_training_events": { - "gamma_regressors": 1.0, - "gamma_tmp_regressors": 0.8, - "gamma_classifier": 0.2, - "proton_classifier": 1.0 - }, - - "tailcut": { - "picture_thresh":8, - "boundary_thresh":4, - "keep_isolated_pixels":false, - "min_number_picture_neighbors":2, - "use_only_main_island":false, - "delta_time": 2 - }, - "tailcuts_clean_with_pedestal_threshold": { - "picture_thresh":8, - "boundary_thresh":4, - "sigma":2.5, - "keep_isolated_pixels":false, - "min_number_picture_neighbors":2, - "use_only_main_island":false, - "delta_time": 2 - }, - "dynamic_cleaning": { - "apply": true, - "threshold": 267, - "fraction_cleaning_intensity": 0.03 - }, - - "random_forest_energy_regressor_args": { - "max_depth": 30, - "min_samples_leaf": 10, - "n_jobs": -1, - "n_estimators": 150, - "bootstrap": true, - "criterion": "squared_error", - "max_features": "auto", - "max_leaf_nodes": null, - "min_impurity_decrease": 0.0, - "min_samples_split": 10, - "min_weight_fraction_leaf": 0.0, - "oob_score": false, - "random_state": 42, - "warm_start": false - }, - - "random_forest_disp_regressor_args": { - "max_depth": 30, - "min_samples_leaf": 10, - "n_jobs": -1, - "n_estimators": 150, - "bootstrap": true, - "criterion": "squared_error", - "max_features": "auto", - "max_leaf_nodes": null, - "min_impurity_decrease": 0.0, - "min_samples_split": 10, - "min_weight_fraction_leaf": 0.0, - "oob_score": false, - "random_state": 42, - "warm_start": false - }, - - "random_forest_disp_classifier_args": { - "max_depth": 30, - "min_samples_leaf": 10, - "n_jobs": -1, - "n_estimators": 100, - "criterion": "gini", - "min_samples_split": 10, - "min_weight_fraction_leaf": 0.0, - "max_features": "auto", - "max_leaf_nodes": null, - "min_impurity_decrease": 0.0, - "bootstrap": true, - "oob_score": false, - "random_state": 42, - "warm_start": false, - "class_weight": null - }, - - "random_forest_particle_classifier_args": { - "max_depth": 30, - "min_samples_leaf": 10, - "n_jobs": -1, - "n_estimators": 100, - "criterion": "gini", - "min_samples_split": 10, - "min_weight_fraction_leaf": 0.0, - "max_features": "auto", - "max_leaf_nodes": null, - "min_impurity_decrease": 0.0, - "bootstrap": true, - "oob_score": false, - "random_state": 42, - "warm_start": false, - "class_weight": null - }, - - - "energy_regression_features": [ - "log_intensity", - "width", - "length", - "x", - "y", - "wl", - "skewness", - "kurtosis", - "time_gradient", - "leakage_intensity_width_2", - "sin_az_tel", - "alt_tel" - ], - - "disp_method": "disp_norm_sign", - - "disp_regression_features": [ - "log_intensity", - "width", - "length", - "wl", - "skewness", - "kurtosis", - "time_gradient", - "leakage_intensity_width_2", - "sin_az_tel", - "alt_tel" - ], - - "disp_classification_features": [ - "log_intensity", - "width", - "length", - "wl", - "skewness", - "kurtosis", - "time_gradient", - "leakage_intensity_width_2", - "sin_az_tel", - "alt_tel" - ], - - "particle_classification_features": [ - "log_intensity", - "width", - "length", - "x", - "y", - "wl", - "signed_skewness", - "kurtosis", - "signed_time_gradient", - "leakage_intensity_width_2", - "log_reco_energy", - "reco_disp_norm", - "reco_disp_sign", - "sin_az_tel", - "alt_tel" - ], - - "allowed_tels": [1], - "write_pe_image": false, - "mc_image_scaling_factor": 1, - "image_extractor": "LocalPeakWindowSum", - "image_extractor_for_muons": "GlobalPeakWindowSum", - "CameraCalibrator": { - "apply_waveform_time_shift": false - }, - "time_sampling_correction_path": "default", - "LocalPeakWindowSum":{ - "window_shift": 4, - "window_width": 8, - "apply_integration_correction": false - }, - "GlobalPeakWindowSum":{ - "window_shift": 4, - "window_width": 8, - "apply_integration_correction": false - }, - "timestamps_pointing":"ucts", - - "train_gamma_src_r_deg": [0, Infinity], - - "source_dependent": false, - "mc_nominal_source_x_deg": 0.4, - "mc_nominal_source_y_deg": 0.0, - - "volume_reducer":{ - "algorithm": null, - "parameters": { - } - }, - "calibration_product": "LSTCalibrationCalculator", - - "LSTCalibrationCalculator":{ - "systematic_correction_path": null, - "npe_median_cut_outliers": [-5,5], - "squared_excess_noise_factor": 1.222, - "flatfield_product": "FlasherFlatFieldCalculator", - "pedestal_product": "PedestalIntegrator", - "PedestalIntegrator":{ - "sample_size": 10000, - "sample_duration":100000, - "tel_id":1, - "time_sampling_correction_path": null, - "charge_median_cut_outliers": [-10,10], - "charge_std_cut_outliers": [-10,10], - "charge_product":"FixedWindowSum", - "FixedWindowSum":{ - "window_shift": 6, - "window_width":12, - "peak_index": 18, - "apply_integration_correction": false - } - }, - "FlasherFlatFieldCalculator":{ - "sample_size": 10000, - "sample_duration":100000, - "tel_id":1, - "time_sampling_correction_path": null, - "charge_product":"LocalPeakWindowSum", - "charge_median_cut_outliers": [-0.9,2], - "charge_std_cut_outliers": [-10,10], - "time_cut_outliers": [2,38], - "LocalPeakWindowSum":{ - "window_shift": 5, - "window_width":12, - "apply_integration_correction": false - } - } - }, - "waveform_nsb_tuning":{ - "nsb_tuning": false, - "nsb_tuning_ratio": 0.52, - "spe_location": "lstchain/data/SinglePhE_ResponseInPhE_expo2Gaus.dat" - }, - "write_interleaved_events":{ - "DataWriter": { - "overwrite": true, - "write_images": false, - "write_parameters": false, - "write_waveforms": true, - "transform_waveform": true, - "waveform_dtype": "uint16", - "waveform_offset": 400, - "waveform_scale": 80 - } - } -} diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/LSTnsb.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/LSTnsb.py index 984eafb2..b97bcf71 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/LSTnsb.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/LSTnsb.py @@ -103,6 +103,13 @@ def main(): type=str, help="Day of the run to be processed", ) + parser.add_argument( + "--lstchain-config", + "-l", + dest="lst_conf", + type=str, + help="lstchain configuration file", + ) parser.add_argument( "--denominator", "-s", @@ -119,6 +126,7 @@ def main(): run_number = args.run date = args.day denominator = args.denominator + lst_config = args.lst_conf simtel = config["general"]["simtel_nsb"] nsb_list = config["general"]["nsb"] lst_version = config["general"]["LST_version"] @@ -127,15 +135,8 @@ def main(): width.append(0.25) nsb_limit = [a + b for a, b in zip(nsb_list[:], width[:])] nsb_limit.insert(0, 0) - conda_path = os.environ["CONDA_PREFIX"] - lstchain_modified = config["general"]["lstchain_modified_config"] - lst_config = ( - str(conda_path) - + "/lib/python3.11/site-packages/lstchain/data/lstchain_standard_config.json" - ) - if lstchain_modified: - lst_config = resource_file("lstchain_standard_config_modified.json") - print(lst_config) + + LST_files = np.sort(glob.glob(f"nsb_LST_*_{run_number}.txt")) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/nsb_level.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/nsb_level.py index 59b812ae..0daee37d 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/nsb_level.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/nsb_level.py @@ -9,11 +9,12 @@ import logging import os from datetime import datetime - +import json import numpy as np import pandas as pd import yaml + from magicctapipe.scripts.lst1_magic.semi_automatic_scripts.clusters import slurm_lines from .lstchain_version import lstchain_versions @@ -25,7 +26,7 @@ logger.setLevel(logging.INFO) -def bash_scripts(run, date, config, env_name, cluster): +def bash_scripts(run, date, config, env_name, cluster, lst_config): """Here we create the bash scripts (one per LST run) @@ -41,6 +42,8 @@ def bash_scripts(run, date, config, env_name, cluster): Name of the environment cluster : str Cluster system + lst_config : str + Configuration file lstchain """ if cluster != "SLURM": logger.warning( @@ -53,7 +56,7 @@ def bash_scripts(run, date, config, env_name, cluster): out_name=f"slurm-nsb_{run}-%x.%j", ) lines = slurm + [ - f"conda run -n {env_name} LSTnsb -c {config} -i {run} -d {date} > nsblog_{date}_{run}_", + f"conda run -n {env_name} LSTnsb -c {config} -i {run} -d {date} -l {lst_config} > nsblog_{date}_{run}_", "${SLURM_JOB_ID}.log 2>&1 \n\n", ] @@ -105,6 +108,19 @@ def main(): key="joint_obs", ) lstchain_v = config["general"]["LST_version"] + lstchain_modified = config["general"]["lstchain_modified_config"] + conda_path = os.environ["CONDA_PREFIX"] + lst_config_orig = ( + str(conda_path) + + "/lib/python3.11/site-packages/lstchain/data/lstchain_standard_config.json" + ) + with open(lst_config_orig, 'r') as f_lst: + lst_dict=json.load(f_lst) + if lstchain_modified: + lst_dict["source_config"]['LSTEventSource']['use_flatfield_heuristic'] = True + with open("lstchain.json", "w+") as outfile: + json.dump(lst_dict, outfile) + lst_config = "lstchain.json" min = datetime.strptime(args.begin_date, "%Y_%m_%d") max = datetime.strptime(args.end_date, "%Y_%m_%d") @@ -139,7 +155,7 @@ def main(): ] = f"/fefs/aswg/data/real/DL1/{date}/{max_common}/tailcut84/dl1_LST-1.Run{run_number}.h5" df_LST.loc[i, "error_code_nsb"] = np.nan - bash_scripts(run_number, date, args.config_file, env_name, cluster) + bash_scripts(run_number, date, args.config_file, env_name, cluster, lst_config) print("Process name: nsb") print("To check the jobs submitted to the cluster, type: squeue -n nsb") @@ -147,7 +163,7 @@ def main(): if len(list_of_bash_scripts) < 1: logger.warning( - "No bash script has been produced to evaluate the NSB level for the provided LST runs. Please check the input list" + "No bash script has been produced to evaluate the NSB level for the provided LST runs. Please check the input dates" ) return print("Update database and launch jobs") From 4280c4213928cbd83b584be521b817a01662f890 Mon Sep 17 00:00:00 2001 From: Elisa-Visentin Date: Fri, 31 May 2024 08:06:52 +0000 Subject: [PATCH 120/236] minor fixes --- .../semi_automatic_scripts/merging_runs.py | 12 ++++++------ .../setting_up_config_and_dir.py | 4 ++-- .../semi_automatic_scripts/stereo_events.py | 5 +++-- 3 files changed, 11 insertions(+), 10 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py index 9740834d..9c9553a5 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py @@ -96,9 +96,9 @@ def split_train_test(target_dir, train_fraction, source_name): for directory in tqdm( range(len(list_of_dir)) ): # tqdm allows us to print a progessbar in the terminal - + node = list_of_dir[directory].split('/')[-2] os.makedirs( - f"{proton_dir}/train/{list_of_dir[directory].split('/')[-2]}", exist_ok=True + f"{proton_dir}/train/{node}", exist_ok=True ) os.makedirs( f'{proton_dir}/../protons_test/{list_of_dir[directory].split("/")[-2]}', @@ -110,17 +110,17 @@ def split_train_test(target_dir, train_fraction, source_name): number_train_runs = int(len(list_of_runs) * train_fraction) for j in list_of_runs[0:number_train_runs]: os.system( - f"mv {j} {proton_dir}/train/{list_of_dir[directory].split('/')[-2]}" + f"mv {j} {proton_dir}/train/{node}" ) os.system( - f"cp {list_of_dir[directory]}*.txt {proton_dir}/train/{list_of_dir[directory].split('/')[-2]}" + f"cp {list_of_dir[directory]}*.txt {proton_dir}/train/{node}" ) os.system( - f"mv {list_of_dir[directory]}*.txt {proton_dir}/../protons_test/{list_of_dir[directory].split('/')[-2]}" + f"mv {list_of_dir[directory]}*.txt {proton_dir}/../protons_test/{node}" ) os.system( - f"mv {list_of_dir[directory]}*.h5 {proton_dir}/../protons_test/{list_of_dir[directory].split('/')[-2]}" + f"mv {list_of_dir[directory]}*.h5 {proton_dir}/../protons_test/{node}" ) os.system(f"rm -r {list_of_dir[directory]}") diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py index c57bb91d..70a9d686 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py @@ -258,7 +258,7 @@ def lists_and_bash_gen_MAGIC( # if 1 then magic is second from last, if 2 then last if telescope_ids[magic - 3] > 0: lines = [ - f'export IN1=/fefs/onsite/common/MAGIC/data/M{magic}/event/Calibrated/{i[0].split("_")[0]}/{i[0].split("_")[1]}/{i[0].split("_")[2]}\n', + f'export IN1=/fefs/onsite/common/MAGIC/data/M{magic}/event/Calibrated/{i[0].replace("_","/")}\n', f"export OUT1={target_dir}/v{__version__}/{source}/DL1/{obs_tag}/M{magic}/{i[0]}/{i[1]}/logs \n", f"ls $IN1/*{i[1][-2:]}.*_Y_*.root > $OUT1/list_cal.txt\n\n", ] @@ -269,7 +269,7 @@ def lists_and_bash_gen_MAGIC( if telescope_ids[magic - 3] > 0: for i in MAGIC_runs: number_of_nodes = glob.glob( - f'/fefs/onsite/common/MAGIC/data/M{magic}/event/Calibrated/{i[0].split("_")[0]}/{i[0].split("_")[1]}/{i[0].split("_")[2]}/*{i[1]}.*_Y_*.root' + f'/fefs/onsite/common/MAGIC/data/M{magic}/event/Calibrated/{i[0].replace("_","/")}/*{i[1]}.*_Y_*.root' ) number_of_nodes = len(number_of_nodes) - 1 if number_of_nodes < 0: diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py index f863d933..22559f1b 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py @@ -113,7 +113,8 @@ def bash_stereo(target_dir, source, env_name, NSB_match, cluster): ) return for nightLST in listOfNightsLST: - stereoDir = f"{coincidence_DL1_dir}/DL1Stereo/{nightLST.split('/')[-1]}" + night=nightLST.split('/')[-1] + stereoDir = f"{coincidence_DL1_dir}/DL1Stereo/{night}" os.makedirs(f"{stereoDir}/logs", exist_ok=True) if not os.listdir(f"{nightLST}"): continue @@ -151,7 +152,7 @@ def bash_stereo(target_dir, source, env_name, NSB_match, cluster): ] + rc ) - with open(f"{source}_StereoEvents_{nightLST.split('/')[-1]}.sh", "w") as f: + with open(f"{source}_StereoEvents_{night}.sh", "w") as f: f.writelines(lines) From 0416d258be85b77786e4c01ba2663cfada3246dc Mon Sep 17 00:00:00 2001 From: Elisa-Visentin Date: Tue, 4 Jun 2024 12:57:46 +0000 Subject: [PATCH 121/236] Updated docs --- README.rst | 4 +- magicctapipe/scripts/lst1_magic/README.md | 108 +++++++----------- .../semi_automatic_scripts/merging_runs.py | 3 +- 3 files changed, 45 insertions(+), 70 deletions(-) diff --git a/README.rst b/README.rst index 7dfc8c27..75057f0e 100644 --- a/README.rst +++ b/README.rst @@ -39,7 +39,9 @@ for the joint analysis. Installation for users ---------------------- -*magic-cta-pipe* and its dependencies may be installed using the *Anaconda* or *Miniconda* package system. We recommend creating a conda virtual environment +The very first step to reduce MAGIC-LST data is to have remote access/credentials to the IT Container. If you do not have it, please write an email to request it to , and the admin will send you the instructions to connect to the IT container. + +*magic-cta-pipe* and its dependencies may be installed using the *Anaconda* or *Miniconda* package system (if you have mamba installed, we recommend you to use it instead of conda, so that the installation process will be much faster; if you don't have anaconda/miniconda/miniforge, please install one of them into your workspace directory). We recommend creating a conda virtual environment first, to isolate the installed version and dependencies from your master environment (this is optional). The following command will set up a conda virtual environment, add the necessary package channels, and install *magic-cta-pipe* and its dependencies:: diff --git a/magicctapipe/scripts/lst1_magic/README.md b/magicctapipe/scripts/lst1_magic/README.md index d3194925..2eee37e4 100644 --- a/magicctapipe/scripts/lst1_magic/README.md +++ b/magicctapipe/scripts/lst1_magic/README.md @@ -22,31 +22,18 @@ Behind the scenes, the semi-automatic scripts will run: From DL3 on, the analysis is done with gammapy. -## Installation - -1) The very first step to reduce MAGIC-LST data is to have remote access/credentials to the IT Container. If you do not have it, please write an email to request it to , and the admin will send you the instructions to connect to the IT container. - -2) Once connected to the IT Container, install magic-cta-pipe (e.g. in your home directory in the IT Container) with the following commands (if you have mamba installed, we recommend you to use it instead of conda, so that the installation process will be much faster; if you don't have anaconda/miniconda/miniforge, please install one of them into your workspace directory): - -``` -git clone https://github.com/cta-observatory/magic-cta-pipe.git -cd magic-cta-pipe -mamba env create -n magic-lst -f environment.yml -mamba activate magic-lst -pip install . -``` ## Analysis -During the analysis, some files are automatically produced by the scripts and are saved in your working directory. These files are necessary for the subsequent steps in the analysis chain. It is therefore mandatory to always launch the scripts from the same working directory so that the output files stored there can be correctly assigned as input files at the subsequent analysis steps. +During the analysis, some files (i.e., bash scripts, lists of sources and runs) are automatically produced by the scripts and are saved in your working directory. These files are necessary for the subsequent steps in the analysis chain. It is therefore mandatory to always launch the scripts from the same working directory so that the output files stored there can be correctly assigned as input files at the subsequent analysis steps. ### DL0 to DL1 -In this step, we will convert the MAGIC and Monte Carlo (MC) Data Level (DL) 0 to DL1 (our goal is to reach DL3). +In this step, we will convert the MAGIC Calibrated data to Data Level (DL) 1 (our goal is to reach DL3) and MC DL0 to DL1. -In your working IT Container directory (e.g. /fefs/aswg/workspace/yourname/yourprojectname), open the magic-lst environment with the command `conda activate magic-lst` and update the file `config_general.yaml` according to your analysis. +In your working IT Container directory (e.g. /fefs/aswg/workspace/yourname/yourprojectname), open your environment with the command `conda activate {env_name}` and update the file `config_general.yaml` according to your analysis. -The file `config_general.yaml` must contain the telescope IDs, the directories with the MC data (mandatory only if NSB_matching = False), the data selection, and some information on the night sky background (NSB) level and software versions: +The file `config_general.yaml` must contain the telescope IDs, the directories with the MC data (ignored if you set NSB_matching = true), the data selection, and some information on the night sky background (NSB) level and software versions: ``` mc_tel_ids: @@ -58,7 +45,7 @@ mc_tel_ids: MAGIC-II: 3 directories: - workspace_dir : "/fefs/aswg/workspace/yourname/yourprojectname/" # Output directory where all the data products will be saved. + workspace_dir : "/fefs/aswg/workspace/elisa.visentin/auto_MCP_PR/" # Output directory where all the data products will be saved. # MC paths below are ignored if you set NSB_matching = true. MC_gammas : "/fefs/aswg/data/mc/DL0/LSTProd2/TestDataset/sim_telarray" # set to "" if you don't want to process these Monte Carlo simulations. MC_electrons : "" @@ -69,23 +56,25 @@ directories: data_selection: source_name_database: "CrabNebula" # MUST BE THE SAME AS IN THE DATABASE; Set to null to process all sources in the given time range. source_name_output: 'Crabtest' # Name tag of your target. Used only if source_name_database != null. - target_RA_deg : 83.629 # RA in degrees; Please set it to null if source_name_database=null. - target_Dec_deg: 22.015 # Dec in degrees; Please set it to null if source_name_database=null. + target_RA_deg : 83.629 # RA in degrees; Set to null if source_name_database=null. + target_Dec_deg: 22.015 # Dec in degrees; set to null if source_name_database=null. time_range : True # Search for all runs in a LST time range (e.g., 2020_01_01 -> 2022_01_01). - min : "2021_01_17" - max : "2022_12_03" + min : "2023_11_17" + max : "2024_03_03" date_list : ['2020_12_15','2021_03_11'] # LST list of days to be processed (only if time_range=False), format: YYYY_MM_DD. skip_LST_runs: [3216,3217] # LST runs to ignore. skip_MAGIC_runs: [5094658] # MAGIC runs to ignore. general: SimTel_version: "v1.4" - LST_version : "v0.10" + LST_version : "v0.10" # check the `processed_lstchain_file` version in the LST database! LST_tailcut : "tailcut84" focal_length : "effective" + simtel_nsb : "/fefs/aswg/data/mc/DL0/LSTProd2/TestDataset/sim_telarray/node_theta_14.984_az_355.158_/output_v1.4/simtel_corsika_theta_14.984_az_355.158_run10.simtel.gz" # simtel file (DL0) to evaluate NSB + lstchain_modified_config : true # use_flatfield_heuristic = True to evaluate NSB proton_train_fraction : 0.8 # 0.8 means that 80% of the DL1 protons will be used for training the Random Forest. nsb : [0.5, 1.0, 1.5, 2.0, 2.5, 3.0] - env_name : auto_MCP_PR # name of the conda environment to be used to process data. + env_name : magic-lst # name of the conda environment to be used to process data. cluster : "SLURM" # cluster management system on which data are processed. At the moment we have only SLURM available, in the future maybe also condor (PIC, CNAF). NSB_matching : true # Set to false to process also the MCs. Set to true if adequate MC productions (DLx) are already available on the IT Container. NSB_MC : 0.5 # extra noise in dim pixels used to process MCs; e.g., you could put here the average NSB value of the processed LST runs. Ignored if NSB_matching=true. @@ -93,7 +82,7 @@ general: ``` -WARNING: the `LST_version` parameter must match the `processed_lstchain_file` version in the LST database (i.e., generally the last available and processable version of a run), which is the one used to evaluate the NSB level +WARNING: Only the runs for which the `LST_version` parameter matches the `processed_lstchain_file` version in the LST database (i.e., the version used to evaluate the NSB level; generally the last available and processable version of a run) will be processed. Now that the configuration file is ready, let's create a list with all the MAGIC+LST1 runs for the time window (or list of nights) defined on the config_general.yaml file: @@ -102,10 +91,12 @@ Now that the configuration file is ready, let's create a list with all the MAGIC The output in the terminal should look like this: ``` Cleaning pre-existing *_LST_runs.txt and *_MAGIC_runs.txt files +Source: XXX Finding LST runs... +Source: XXX Finding MAGIC runs... ``` -And it will save the files TARGET_LST_runs.txt, TARGET_MAGIC_runs.txt, and list_sources.dat in your working directory. In case no runs are found for MAGIC and/or LST (for a source and a given time range/list of dates), a warning will be printed and no output text file will be produced for the given source and telescope(s). +And it will save the files TARGET_LST_runs.txt, TARGET_MAGIC_runs.txt, and list_sources.dat in your current working directory. In case no runs are found for MAGIC and/or LST (for a source and a given time range/list of dates), a warning will be printed and no output text file will be produced for the given source and telescope(s). At this point, we can convert the MAGIC data into DL1 format with the following command: > $ setting_up_config_and_dir -c config_general.yaml @@ -113,36 +104,34 @@ At this point, we can convert the MAGIC data into DL1 format with the following The output in the terminal will be something like this: ``` *** Converting DL0 into DL1 data *** -Process name: yourprojectname -To check the jobs submitted to the cluster, type: squeue -n yourprojectname +Process name: {source} +To check the jobs submitted to the cluster, type: squeue -n {source} This process will take about 10 min to run if the IT cluster is free. ``` The command `setting_up_config_and_dir` does a series of things: -- Collects the txt files produced above into a single txt file per NSB bin (e.g., `LST_1.5_.txt`), whose content is a list of all the `date,runs` couples associated to this background value. -- According to the date of the selected MAGIC runs, each run is associated (actually, each day, but here it is done run-wise) to the corresponding MAGIC observation period ST_XXXX. -- Creates a directory with the target name within the directory `yourprojectname` and several subdirectories inside it that are necessary for the rest of the data reduction. The main directories are: + +- Creates a directory with the target name within the directory `yourprojectname/{MCP_version}` and several subdirectories inside it that are necessary for the rest of the data reduction. The main directories are: ``` -/fefs/aswg/workspace/yourname/yourprojectname/Crab/ -/fefs/aswg/workspace/yourname/yourprojectname/Crab/VERSION/DL1 -/fefs/aswg/workspace/yourname/yourprojectname/Crab/VERSION/DL1/ST{} -/fefs/aswg/workspace/yourname/yourprojectname/Crab/VERSION/DL1/[subdirectories] +/fefs/aswg/workspace/yourname/yourprojectname/VERSION/ +/fefs/aswg/workspace/yourname/yourprojectname/VERSION/{source}/DL1 +/fefs/aswg/workspace/yourname/yourprojectname/VERSION/{source}/DL1/[subdirectories] ``` where [subdirectories] stands for several subdirectories containing the MAGIC subruns in the DL1 format. -- Generates a configuration file called `config_DL0_to_DL1.yaml` with telescope ID information and adopted imaging/cleaning cuts, and puts it in the directory `[...]/yourprojectname/Crab/` created in the previous step. +- Generates a configuration file called `config_DL0_to_DL1.yaml` with telescope ID information and adopted imaging/cleaning cuts, and puts it in the directory `[...]/yourprojectname/VERSION/Crab/` created in the previous step. - Links the MAGIC data addresses to their respective subdirectories defined in the previous steps. - Runs the script `magic_calib_to_dl1.py` for each one of the linked data files. You can check if this process is done with the following commands: -> $ squeue -n yourprojectname_Crab +> $ squeue -n {source} or > $ squeue -u your_user_name -Once it is done, all of the subdirectories in `/fefs/aswg/workspace/yourname/yourprojectname/Crab/VERSION/DL1/` will be filled with files of the type `dl1_MX.RunXXXXXX.0XX.h5` for each MAGIC subrun. The next step of the conversion from DL0 to DL1 is to merge all the MAGIC data files such that in the end, we have only one datafile per night. To do so, we run the following command (always in the directory `yourprojectname`): +Once it is done, all of the subdirectories in `/fefs/aswg/workspace/yourname/yourprojectname/VERSION/{source}/DL1` will be filled with files of the type `dl1_MX.RunXXXXXX.0XX.h5` for each MAGIC subrun. The next step of the conversion from calibrated to DL1 is to merge all the MAGIC data files such that in the end, we have only one datafile per night. To do so, we run the following command (always in the directory `yourprojectname`): > $ merging_runs (-c config_general.yaml) @@ -151,14 +140,13 @@ The output in the terminal will be something like this: ``` ***** Generating merge_MAGIC bashscripts... ***** Running merge_hdf_files.py in the MAGIC data files... -Process name: merging_Crab -To check the jobs submitted to the cluster, type: squeue -n merging_Crab -This process will take about 10 to 30 min to run. +Process name: merging_{source} +To check the jobs submitted to the cluster, type: squeue -n merging_{source} ``` This script will merge the MAGIC data files in the following order: - MAGIC subruns are merged into single runs. -- MAGIC I and II runs are merged (only if both telescopes are used, of course). +- MAGIC I and II runs are merged (only if both telescopes are available, of course). - All runs in specific nights are merged, such that in the end we have only one datafile per night. ### Coincident events and stereo parameters on DL1 @@ -167,32 +155,17 @@ To find coincident events between MAGIC and LST, starting from DL1 data, we run > $ coincident_events (-c config_general.yaml) -This script creates the file config_coincidence.yaml containing the telescope IDs and the following parameters: -``` -event_coincidence: - timestamp_type_lst: "dragon_time" # select "dragon_time", "tib_time" or "ucts_time" - pre_offset_search: true - n_pre_offset_search_events: 100 - window_half_width: "300 ns" - time_offset: - start: "-10 us" - stop: "0 us -``` +This script creates the file config_coincidence.yaml containing the telescope IDs and the coincidence parameters listed in the general config.yaml file (the one in magicctapipe/resources). -Then, for each NSB level, it reads the corresponding txt file generated by the second script and, taking into account the day (and, as a consequence the MAGIC period of the joint MAGIC runs) links the LST data files for these runs to the output directory [...]DL1Coincident/ST{}/NSB{}, and runs the script lst1_magic_event_coincidence.py in all of them. +Then, matches LST and MAGIC dates and links the LST data files to the output directory [...]DL1Coincident; eventually, it runs the script lst1_magic_event_coincidence.py in all of them. Once it is done, we add stereo parameters to the MAGIC+LST coincident DL1 files by running: > $ stereo_events (-c config_general.yaml) -This script creates the file config_stereo.yaml with the following parameters: -``` -stereo_reco: - quality_cuts: "(intensity > 50) & (width > 0)" - theta_uplim: "6 arcmin" -``` +This script creates the file config_stereo.yaml ontaining the telescope IDs and the stereo parameters listed in the general config.yaml file (the one in magicctapipe/resources). -It then creates the output directories for the DL1 with stereo parameters [...]DL1CoincidentStereo/ST{}/NSB{}, and then runs the script lst1_magic_stereo_reco.py in all of the coincident DL1 files. The stereo DL1 files are then saved in these directories. +It then creates the output directories for the DL1 with stereo parameters [...]DL1Stereo, and then runs the script lst1_magic_stereo_reco.py in all of the coincident DL1 files. The stereo DL1 files are then saved in these directories. ### Random forest and DL1 to DL2 @@ -204,22 +177,23 @@ TBD. ## High-level analysis -Since the DL3 may have only a few MBs, it is typically convenient to download it to your own computer at this point. It will be necessary to have astropy and gammapy (version > 0.20) installed before proceeding. +Since the DL3 may have only a few MBs, it is typically convenient to download it to your own computer at this point. It will be necessary to have astropy and gammapy (version >= 0.20) installed before proceeding. The folder [Notebooks](https://github.com/cta-observatory/magic-cta-pipe/tree/master/notebooks) contains Jupyter notebooks to perform checks on the IRF, to produce theta2 plots and SEDs. -## For mainteiners (creation of MAGIC adn LST databases) +## For mainteiners (creation of MAGIC and LST databases) To create and update the MAGIC and LST databases (from the one produced by AB and FDP) you should use the scripts in `database_production` -- `create_lst_table`: creates the LST database (1 row per LST run) by dropping some columns from the parent one (AB, FDP) and adding columns for NSB value (NaN by default), lstchain version (one column per version, False by default) and error codes (NaN by default). Launched as `python create_lst_table.py` +- `create_lst_table`: creates the LST database (1 row per LST run) by dropping some columns from the parent one (AB, FDP) and adding columns for NSB value (-1 by default), lstchain available versions, most recent lstchain version, processed file and error codes (-1 by default). Launched as `python create_lst_table.py` + +- `lstchain_version`: this scripts loop over all the rows of the database, estract date and run number from the table and look for the data saved in the IT (i.e., which version of lstchain has been used to process a run). It evaluates all the versions used to process a run and the most recent MCP-compatible one according to a hard-coded, ordered list. Launched as `python lstchain_version.py` -- `nsb_level`: evaluates, for every LST run, the respective NSB value (i.e., the median over the NSB estimated by lstchain over approx. 25 sub-runs per run). This scripts launch a set of jobs (one per run; each job calls the `LSTnsb` script) and each jobs produces an output txt file containing a string like `date,run,NSB`; in the title of these files, both the run number and the NSB range are indicated (0.5=(0,0.75), 1.0=(0.75, 1.25),...., 2.5=(2.25,2.75), 3.0=(2.75,3.25), `high`=(3.25,Infinity) ). To limit the number of simultaneous jobs running on SLURM, you should always provide a begin and a end date (format YYYY_MM_DD) in the options. Launched as `python nsb_level.py -c config_general.yaml -b begin_date -e end_date` +- `nsb_level`: evaluates, for the last (MCP compatible) version of every LST run, the respective NSB value (i.e., the median over the NSB estimated by lstchain over approx. 25 sub-runs per run). This scripts launch a set of jobs (one per run; each job calls the `LSTnsb` script) and each jobs produces an output txt file containing a string like `date,run,NSB`; in the title of these files, both the run number and the NSB range are indicated (0.5=(0,0.75), 1.0=(0.75, 1.25),...., 2.5=(2.25,2.75), 3.0=(2.75,3.25), `high`=(3.25,Infinity) ). To limit the number of simultaneous jobs running on SLURM, you should always provide a begin and a end date (format YYYY_MM_DD) in the options. Launched as `python nsb_level.py -c config_general.yaml -b begin_date -e end_date` - `LSTnsb`: called by `nsb_level`, it gathers all the subruns for a run, evaluates the NSB for approx. 25 of them (using the lstchain `calculate_noise_parameters` function), evaluates the median over these values and the approximate NSB level (0.5, 1.0, 1.5, ...., 2.5, 3.0, `high`) and then creates one txt file per run. These files contain the value of the NSB (i.e., the median over subruns) and are needed to fill the database `nsb` column -- `nsb_to_h5`: this script reads the txt files created by `nsb_level` to know the NSB value for each run. This value is used to fill the `nsb` column of the database at the location of the respective run number. It also put '000' as an error code in case the NSB is NaN (i.e., not evaluated for the LST run), '001' in case NSB>3.0 (too high NSB to be processed with a standard analysis!). Launched as `python nsb_to_h5.py` +- `nsb_to_h5`: this script reads the txt files created by `nsb_level` to know the NSB value for each run. This value is used to fill the `nsb` column of the database at the location of the respective run number. It also updates the error codes (0: fine, 1: nsb=NaN, 2: NSB>3.0). Launched as `python nsb_to_h5.py` -- `lstchain_version`: this scripts loop over all the rows of the database, estract date and run number from the table and look for the data saved in the IT (i.e., which version of lstchain has been used to process a run). For each run, it sets to True the lstchain_0.9(0.10) cell if this run has been processed up to DL1 with lstchain 0.9(0.10). It sets error code '002' in case none of the two versions has been used to process the run. Launched as `python lstchain_version.py` diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py index 9c9553a5..11682aa7 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py @@ -437,8 +437,7 @@ def main(): print( f"To check the jobs submitted to the cluster, type: squeue -n merging_{source_name}" ) - print("This process will take about 10 to 30 min to run.") - + if __name__ == "__main__": main() From 76c6641ffa4946240bd8b3b5e922e58219b1d1b1 Mon Sep 17 00:00:00 2001 From: Elisa-Visentin Date: Tue, 4 Jun 2024 14:03:59 +0000 Subject: [PATCH 122/236] fix list --- .../semi_automatic_scripts/list_from_h5.py | 20 ++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/list_from_h5.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/list_from_h5.py index 3dadc304..bfcbdbe1 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/list_from_h5.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/list_from_h5.py @@ -52,10 +52,10 @@ def magic_date(df): The input dataframe with an added column. """ - date_lst = pd.to_datetime(df["Date (LST convention)"], format="%Y_%m_%d") + date_lst = pd.to_datetime(df["Date (LST convention)"], format="%Y%m%d") delta = pd.Timedelta("1 day") date_magic = date_lst + delta - date_magic = date_magic.dt.strftime("%Y-%m-%d") + date_magic = date_magic.dt.strftime("%Y%m%d") df["date_MAGIC"] = date_magic return df @@ -133,6 +133,7 @@ def list_run(source_in, source_out, df, skip_LST, skip_MAGIC, is_LST, M1_run_lis source_list.append(source_out) for source_name in source_list: + file_list = [ f"{source_name}_LST_runs.txt", f"{source_name}_MAGIC_runs.txt", @@ -183,7 +184,7 @@ def list_run(source_in, source_out, df, skip_LST, skip_MAGIC, is_LST, M1_run_lis continue with open(file_list[1], "a+") as f: - f.write(f"{MAGIC_date[k].replace('-','_')},{int(M2_run[k])}\n") + f.write(f"{MAGIC_date[k][0:4]}_{MAGIC_date[k][4:6]}_{MAGIC_date[k][6:8]},{int(M2_run[k])}\n") run_listed.append(int(M2_run[k])) @@ -264,18 +265,23 @@ def main(): df_LST = df_LST.reset_index() df_LST = df_LST.drop("index", axis=1) df_MAGIC1 = pd.read_hdf( - "/fefs/aswg/workspace/joanna.wojtowicz/data/Common_MAGIC_LST1_data_MAGIC_RUNS.h5", + "/fefs/aswg/workspace/joanna.wojtowicz/Common_MAGIC_LST1_data_MAGIC_RUNS.h5", key="MAGIC1/runs_M1", ) df_MAGIC2 = pd.read_hdf( - "/fefs/aswg/workspace/joanna.wojtowicz/data/Common_MAGIC_LST1_data_MAGIC_RUNS.h5", + "/fefs/aswg/workspace/joanna.wojtowicz/Common_MAGIC_LST1_data_MAGIC_RUNS.h5", key="MAGIC2/runs_M2", ) - list_date_LST = np.unique(df_LST["date_LST"]) - list_date_LST_low = [sub.replace("-", "_") for sub in list_date_LST] + df_MAGIC1['Source'] = df_MAGIC1['Source'].str.replace(' ','') + df_MAGIC2['Source'] = df_MAGIC2['Source'].str.replace(' ','') + list_date_LST = np.unique(df_LST["date_LST"]) + list_date_LST_low = [int(sub.replace("-", "")) for sub in list_date_LST] + df_MAGIC1 = df_MAGIC1[df_MAGIC1["Date (LST convention)"].isin(list_date_LST_low)] df_MAGIC2 = df_MAGIC2[df_MAGIC2["Date (LST convention)"].isin(list_date_LST_low)] + + clear_files(source_in, source_out, df_LST, df_MAGIC1, df_MAGIC2) list_run(source_in, source_out, df_LST, skip_LST, skip_MAGIC, True) From 8eced2de540ab20bdca4f740f41e0f45064f0297 Mon Sep 17 00:00:00 2001 From: Elisa-Visentin Date: Tue, 11 Jun 2024 08:30:54 +0000 Subject: [PATCH 123/236] fix config --- .../coincident_events.py | 35 +++++-------------- .../database_production/LSTnsb.py | 5 --- .../database_production/nsb_level.py | 12 +++---- .../semi_automatic_scripts/list_from_h5.py | 13 +++---- .../semi_automatic_scripts/merging_runs.py | 16 +++------ .../setting_up_config_and_dir.py | 25 ++++--------- .../semi_automatic_scripts/stereo_events.py | 28 +++++---------- 7 files changed, 42 insertions(+), 92 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py index 9542e0cd..d698ec4a 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py @@ -38,21 +38,17 @@ logger.setLevel(logging.INFO) -def configfile_coincidence(ids, target_dir, source_name, NSB_match): +def configfile_coincidence(target_dir, source_name): """ This function creates the configuration file needed for the event coincidence step Parameters ---------- - ids : list - List of telescope IDs target_dir : str Path to the working directory source_name : str Name of the target source - NSB_match : bool - If real data are matched to pre-processed MCs or not """ config_file = resource_file("config.yaml") @@ -60,27 +56,16 @@ def configfile_coincidence(ids, target_dir, source_name, NSB_match): config_file, "rb" ) as fc: # "rb" mode opens the file in binary format for reading config_dict = yaml.safe_load(fc) - coincidence = config_dict["event_coincidence"] - conf = {} - conf["event_coincidence"] = coincidence + conf = { + "mc_tel_ids": config_dict["mc_tel_ids"], + "event_coincidence": config_dict["event_coincidence"], + } + + file_name = f"{target_dir}/v{__version__}/{source_name}/config_coincidence.yaml" - if not NSB_match: - file_name = f"{target_dir}/v{__version__}/{source_name}/config_coincidence.yaml" - else: - file_name = f"{target_dir}/v{__version__}/{source_name}/config_coincidence.yaml" with open(file_name, "w") as f: - lines = [ - "mc_tel_ids:", - f"\n LST-1: {ids[0]}", - f"\n LST-2: {ids[1]}", - f"\n LST-3: {ids[2]}", - f"\n LST-4: {ids[3]}", - f"\n MAGIC-I: {ids[4]}", - f"\n MAGIC-II: {ids[5]}", - "\n", - ] - f.writelines(lines) + yaml.dump(conf, f, default_flow_style=False) @@ -210,8 +195,6 @@ def main(): args.config_file, "rb" ) as f: # "rb" mode opens the file in binary format for reading config = yaml.safe_load(f) - - telescope_ids = list(config["mc_tel_ids"].values()) target_dir = Path(config["directories"]["workspace_dir"]) NSB_match = config["general"]["NSB_matching"] @@ -231,7 +214,7 @@ def main(): for source_name in source_list: print("***** Generating file config_coincidence.yaml...") - configfile_coincidence(telescope_ids, target_dir, source_name, NSB_match) + configfile_coincidence(target_dir, source_name) LST_runs_and_dates = f"{source_name}_LST_runs.txt" LST_runs = np.genfromtxt(LST_runs_and_dates, dtype=str, delimiter=",", ndmin=2) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/LSTnsb.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/LSTnsb.py index b97bcf71..d161337c 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/LSTnsb.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/LSTnsb.py @@ -4,14 +4,11 @@ import argparse import glob import logging -import os import numpy as np import yaml from lstchain.image.modifier import calculate_noise_parameters -from magicctapipe.io import resource_file - __all__ = ["nsb"] logger = logging.getLogger(__name__) @@ -135,8 +132,6 @@ def main(): width.append(0.25) nsb_limit = [a + b for a, b in zip(nsb_list[:], width[:])] nsb_limit.insert(0, 0) - - LST_files = np.sort(glob.glob(f"nsb_LST_*_{run_number}.txt")) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/nsb_level.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/nsb_level.py index 0daee37d..c3c45ce4 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/nsb_level.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/nsb_level.py @@ -6,15 +6,15 @@ import argparse import glob +import json import logging import os from datetime import datetime -import json + import numpy as np import pandas as pd import yaml - from magicctapipe.scripts.lst1_magic.semi_automatic_scripts.clusters import slurm_lines from .lstchain_version import lstchain_versions @@ -114,11 +114,11 @@ def main(): str(conda_path) + "/lib/python3.11/site-packages/lstchain/data/lstchain_standard_config.json" ) - with open(lst_config_orig, 'r') as f_lst: - lst_dict=json.load(f_lst) + with open(lst_config_orig, "r") as f_lst: + lst_dict = json.load(f_lst) if lstchain_modified: - lst_dict["source_config"]['LSTEventSource']['use_flatfield_heuristic'] = True - with open("lstchain.json", "w+") as outfile: + lst_dict["source_config"]["LSTEventSource"]["use_flatfield_heuristic"] = True + with open("lstchain.json", "w+") as outfile: json.dump(lst_dict, outfile) lst_config = "lstchain.json" diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/list_from_h5.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/list_from_h5.py index bfcbdbe1..69fae60e 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/list_from_h5.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/list_from_h5.py @@ -133,7 +133,7 @@ def list_run(source_in, source_out, df, skip_LST, skip_MAGIC, is_LST, M1_run_lis source_list.append(source_out) for source_name in source_list: - + file_list = [ f"{source_name}_LST_runs.txt", f"{source_name}_MAGIC_runs.txt", @@ -184,7 +184,9 @@ def list_run(source_in, source_out, df, skip_LST, skip_MAGIC, is_LST, M1_run_lis continue with open(file_list[1], "a+") as f: - f.write(f"{MAGIC_date[k][0:4]}_{MAGIC_date[k][4:6]}_{MAGIC_date[k][6:8]},{int(M2_run[k])}\n") + f.write( + f"{MAGIC_date[k][0:4]}_{MAGIC_date[k][4:6]}_{MAGIC_date[k][6:8]},{int(M2_run[k])}\n" + ) run_listed.append(int(M2_run[k])) @@ -272,15 +274,14 @@ def main(): "/fefs/aswg/workspace/joanna.wojtowicz/Common_MAGIC_LST1_data_MAGIC_RUNS.h5", key="MAGIC2/runs_M2", ) - df_MAGIC1['Source'] = df_MAGIC1['Source'].str.replace(' ','') - df_MAGIC2['Source'] = df_MAGIC2['Source'].str.replace(' ','') + df_MAGIC1["Source"] = df_MAGIC1["Source"].str.replace(" ", "") + df_MAGIC2["Source"] = df_MAGIC2["Source"].str.replace(" ", "") list_date_LST = np.unique(df_LST["date_LST"]) list_date_LST_low = [int(sub.replace("-", "")) for sub in list_date_LST] - + df_MAGIC1 = df_MAGIC1[df_MAGIC1["Date (LST convention)"].isin(list_date_LST_low)] df_MAGIC2 = df_MAGIC2[df_MAGIC2["Date (LST convention)"].isin(list_date_LST_low)] - clear_files(source_in, source_out, df_LST, df_MAGIC1, df_MAGIC2) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py index 11682aa7..bdca7dff 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py @@ -96,10 +96,8 @@ def split_train_test(target_dir, train_fraction, source_name): for directory in tqdm( range(len(list_of_dir)) ): # tqdm allows us to print a progessbar in the terminal - node = list_of_dir[directory].split('/')[-2] - os.makedirs( - f"{proton_dir}/train/{node}", exist_ok=True - ) + node = list_of_dir[directory].split("/")[-2] + os.makedirs(f"{proton_dir}/train/{node}", exist_ok=True) os.makedirs( f'{proton_dir}/../protons_test/{list_of_dir[directory].split("/")[-2]}', exist_ok=True, @@ -109,13 +107,9 @@ def split_train_test(target_dir, train_fraction, source_name): ) number_train_runs = int(len(list_of_runs) * train_fraction) for j in list_of_runs[0:number_train_runs]: - os.system( - f"mv {j} {proton_dir}/train/{node}" - ) + os.system(f"mv {j} {proton_dir}/train/{node}") - os.system( - f"cp {list_of_dir[directory]}*.txt {proton_dir}/train/{node}" - ) + os.system(f"cp {list_of_dir[directory]}*.txt {proton_dir}/train/{node}") os.system( f"mv {list_of_dir[directory]}*.txt {proton_dir}/../protons_test/{node}" ) @@ -437,7 +431,7 @@ def main(): print( f"To check the jobs submitted to the cluster, type: squeue -n merging_{source_name}" ) - + if __name__ == "__main__": main() diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py index 70a9d686..983eadd4 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py @@ -44,15 +44,13 @@ logger.setLevel(logging.INFO) -def config_file_gen(ids, target_dir, noise_value, NSB_match, source_name): +def config_file_gen(target_dir, noise_value, NSB_match, source_name): """ Here we create the configuration file needed for transforming DL0 into DL1 Parameters ---------- - ids : list - Telescope IDs target_dir : path Directory to store the results noise_value : list @@ -75,23 +73,14 @@ def config_file_gen(ids, target_dir, noise_value, NSB_match, source_name): LST_config["increase_nsb"]["extra_noise_in_dim_pixels"] = noise_value[0] LST_config["increase_nsb"]["extra_bias_in_dim_pixels"] = noise_value[2] LST_config["increase_nsb"]["extra_noise_in_bright_pixels"] = noise_value[1] - conf = {} - conf["LST"] = LST_config + conf = { + "mc_tel_ids": config_dict["mc_tel_ids"], + "LST": LST_config, + "MAGIC": MAGIC_config, + } - conf["MAGIC"] = MAGIC_config file_name = f"{target_dir}/v{__version__}/{source_name}/config_DL0_to_DL1.yaml" with open(file_name, "w") as f: - lines = [ - "mc_tel_ids:", - f"\n LST-1: {ids[0]}", - f"\n LST-2: {ids[1]}", - f"\n LST-3: {ids[2]}", - f"\n LST-4: {ids[3]}", - f"\n MAGIC-I: {ids[4]}", - f"\n MAGIC-II: {ids[5]}", - "\n", - ] - f.writelines(lines) yaml.dump(conf, f, default_flow_style=False) @@ -454,7 +443,7 @@ def main(): str(target_dir), telescope_ids, MAGIC_runs, NSB_match, source_name ) # Here we create all the necessary directories in the given workspace and collect the main directory of the target config_file_gen( - telescope_ids, target_dir, noise_value, NSB_match, source_name + target_dir, noise_value, NSB_match, source_name ) # TODO: fix here if not NSB_match: diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py index 22559f1b..d8c8f329 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py @@ -40,21 +40,17 @@ logger.setLevel(logging.INFO) -def configfile_stereo(ids, target_dir, source_name, NSB_match): +def configfile_stereo(target_dir, source_name): """ This function creates the configuration file needed for the event stereo step Parameters ---------- - ids : list - List of telescope IDs target_dir : str Path to the working directory source_name : str Name of the target source - NSB_match : bool - If real data are matched to pre-processed MCs or not """ config_file = resource_file("config.yaml") @@ -62,21 +58,14 @@ def configfile_stereo(ids, target_dir, source_name, NSB_match): config_file, "rb" ) as fc: # "rb" mode opens the file in binary format for reading config_dict = yaml.safe_load(fc) - conf = {"stereo_reco": config_dict["stereo_reco"]} + conf = { + "mc_tel_ids": config_dict["mc_tel_ids"], + "stereo_reco": config_dict["stereo_reco"], + } file_name = f"{target_dir}/v{__version__}/{source_name}/config_stereo.yaml" with open(file_name, "w") as f: - lines = [ - "mc_tel_ids:", - f"\n LST-1: {ids[0]}", - f"\n LST-2: {ids[1]}", - f"\n LST-3: {ids[2]}", - f"\n LST-4: {ids[3]}", - f"\n MAGIC-I: {ids[4]}", - f"\n MAGIC-II: {ids[5]}", - "\n", - ] - f.writelines(lines) + yaml.dump(conf, f, default_flow_style=False) @@ -113,7 +102,7 @@ def bash_stereo(target_dir, source, env_name, NSB_match, cluster): ) return for nightLST in listOfNightsLST: - night=nightLST.split('/')[-1] + night = nightLST.split("/")[-1] stereoDir = f"{coincidence_DL1_dir}/DL1Stereo/{night}" os.makedirs(f"{stereoDir}/logs", exist_ok=True) if not os.listdir(f"{nightLST}"): @@ -246,7 +235,6 @@ def main(): env_name = config["general"]["env_name"] NSB_match = config["general"]["NSB_matching"] - telescope_ids = list(config["mc_tel_ids"].values()) source_in = config["data_selection"]["source_name_database"] source = config["data_selection"]["source_name_output"] @@ -259,7 +247,7 @@ def main(): for source_name in source_list: print("***** Generating file config_stereo.yaml...") - configfile_stereo(telescope_ids, target_dir, source_name, NSB_match) + configfile_stereo(target_dir, source_name) # Below we run the analysis on the MC data if ( From 26125b6439c2bb9d4c2762362064ae83f7008e52 Mon Sep 17 00:00:00 2001 From: Elisa-Visentin Date: Tue, 11 Jun 2024 08:49:09 +0000 Subject: [PATCH 124/236] auto-deleting of files --- .../lst1_magic/semi_automatic_scripts/merging_runs.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py index bdca7dff..1dcb191e 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py @@ -66,7 +66,7 @@ def cleaning(list_of_nodes): cwd = os.getcwd() for i in tqdm(range(len(list_of_nodes)), desc="Cleaning failed runs"): os.chdir(list_of_nodes[i]) - os.system('find . -type f -name "*.h5" -size -1k -delete') + os.system('find . -type f -name "dl1_[gphe]*_zd*_az*.h5" -size -1k -delete') os.chdir(cwd) print("Cleaning done.") @@ -170,9 +170,9 @@ def merge(target_dir, identification, MAGIC_runs, env_name, source, NSB_match, c if os.path.exists(f"{indir}"): outdir = f"{MAGIC_DL1_dir}/Merged/{i[0]}/{i[1]}" os.makedirs(f"{outdir}/logs", exist_ok=True) - os.system( - f'find {indir} -type f -name "dl1_M{magic}.Run*.h5" -size -3k -delete' - ) + #os.system( + # f'find {indir} -type f -name "dl1_M{magic}.Run*.h5" -size -3k -delete' + #) f.write( f"conda run -n {env_name} merge_hdf_files --input-dir {indir} --output-dir {outdir} >{outdir}/logs/merge_M{magic}_{i[0]}_{i[1]}_${{SLURM_JOB_ID}}.log\n" ) From b32151bceb792bca532626d235ecf12091542530 Mon Sep 17 00:00:00 2001 From: Elisa-Visentin Date: Tue, 11 Jun 2024 14:02:16 +0000 Subject: [PATCH 125/236] MC out of loop on sources (first step) --- .../semi_automatic_scripts/__init__.py | 6 +- .../setting_up_config_and_dir.py | 220 +++++++++++------- 2 files changed, 135 insertions(+), 91 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/__init__.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/__init__.py index 56df1adc..f5c23b44 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/__init__.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/__init__.py @@ -3,7 +3,8 @@ from .merging_runs import cleaning, merge, mergeMC, split_train_test from .setting_up_config_and_dir import ( config_file_gen, - directories_generator, + directories_generator_real, + directories_generator_MC, lists_and_bash_gen_MAGIC, lists_and_bash_generator, ) @@ -17,7 +18,8 @@ "config_file_gen", "lists_and_bash_generator", "lists_and_bash_gen_MAGIC", - "directories_generator", + "directories_generator_real", + "directories_generator_MC", "configfile_coincidence", "linking_bash_lst", "configfile_stereo", diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py index 983eadd4..a03903d1 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py @@ -36,7 +36,8 @@ "config_file_gen", "lists_and_bash_generator", "lists_and_bash_gen_MAGIC", - "directories_generator", + "directories_generator_real", + "directories_generator_MC", ] logger = logging.getLogger(__name__) @@ -78,8 +79,10 @@ def config_file_gen(target_dir, noise_value, NSB_match, source_name): "LST": LST_config, "MAGIC": MAGIC_config, } - - file_name = f"{target_dir}/v{__version__}/{source_name}/config_DL0_to_DL1.yaml" + if source_name == 'MC': + file_name = f"{target_dir}/v{__version__}/MC/config_DL0_to_DL1.yaml" + else: + file_name = f"{target_dir}/v{__version__}/{source_name}/config_DL0_to_DL1.yaml" with open(file_name, "w") as f: yaml.dump(conf, f, default_flow_style=False) @@ -91,7 +94,6 @@ def lists_and_bash_generator( SimTel_version, focal_length, env_name, - source_name, cluster, ): @@ -123,15 +125,15 @@ def lists_and_bash_generator( if MC_path == "": return print(f"running {particle_type} from {MC_path}") - process_name = source_name + process_name = 'MC' list_of_nodes = glob.glob(f"{MC_path}/node*") - dir1 = f"{target_dir}/v{__version__}/{source_name}" + dir1 = f"{target_dir}/v{__version__}/MC" with open( f"{dir1}/logs/list_nodes_{particle_type}_complete.txt", "w" ) as f: # creating list_nodes_gammas_complete.txt for i in list_of_nodes: - f.write(f"{i}/output{SimTel_version}\n") + f.write(f"{i}/output_{SimTel_version}\n") with open( f"{dir1}/logs/list_folder_{particle_type}.txt", "w" @@ -151,21 +153,21 @@ def lists_and_bash_generator( slurm = slurm_lines( queue="short", job_name=process_name, - out_name=f"{dir1}/DL1/MC/{particle_type}/logs/slurm-linkMC-%x.%j", + out_name=f"{dir1}/DL1/{particle_type}/logs/slurm-linkMC-%x.%j", ) lines_of_config_file = slurm + [ "while read -r -u 3 lineA && read -r -u 4 lineB\n", "do\n", - f" cd {dir1}/DL1/MC/{particle_type}\n", + f" cd {dir1}/DL1/{particle_type}\n", " mkdir $lineB\n", " cd $lineA\n", " ls -lR *.gz |wc -l\n", - f" mkdir -p {dir1}/DL1/MC/{particle_type}/$lineB/logs/\n", - f" ls *.gz > {dir1}/DL1/MC/{particle_type}/$lineB/logs/list_dl0.txt\n", + f" mkdir -p {dir1}/DL1/{particle_type}/$lineB/logs/\n", + f" ls *.gz > {dir1}/DL1/{particle_type}/$lineB/logs/list_dl0.txt\n", ' string=$lineA"/"\n', - f" export file={dir1}/DL1/MC/{particle_type}/$lineB/logs/list_dl0.txt\n\n", + f" export file={dir1}/DL1/{particle_type}/$lineB/logs/list_dl0.txt\n\n", " cat $file | while read line; do echo $string${line}" - + f" >>{dir1}/DL1/MC/{particle_type}/$lineB/logs/list_dl0_ok.txt; done\n\n", + + f" >>{dir1}/DL1/{particle_type}/$lineB/logs/list_dl0_ok.txt; done\n\n", ' echo "folder $lineB and node $lineA"\n', f'done 3<"{dir1}/logs/list_nodes_{particle_type}_complete.txt" 4<"{dir1}/logs/list_folder_{particle_type}.txt"\n', "", @@ -184,19 +186,19 @@ def lists_and_bash_generator( job_name=process_name, array=number_of_nodes, mem="10g", - out_name=f"{dir1}/DL1/MC/{particle_type}/logs/slurm-%x.%A_%a", + out_name=f"{dir1}/DL1/{particle_type}/logs/slurm-%x.%A_%a", ) lines_of_config_file = slurm + [ - f"cd {dir1}/DL1/MC/{particle_type}\n\n", + f"cd {dir1}/DL1/{particle_type}\n\n", f"export INF={dir1}/logs\n", f"SAMPLE_LIST=($(<$INF/list_folder_{particle_type}.txt))\n", "SAMPLE=${SAMPLE_LIST[${SLURM_ARRAY_TASK_ID}]}\n", "cd $SAMPLE\n\n", - f"export LOG={dir1}/DL1/MC/{particle_type}/logs/simtel_{{$SAMPLE}}_${{SLURM_ARRAY_JOB_ID}}_${{SLURM_ARRAY_TASK_ID}}_all.log\n", + f"export LOG={dir1}/DL1/{particle_type}/logs/simtel_{{$SAMPLE}}_${{SLURM_ARRAY_JOB_ID}}_${{SLURM_ARRAY_TASK_ID}}_all.log\n", "cat logs/list_dl0_ok.txt | while read line\n", "do\n", f" cd {dir1}/../\n", - f" conda run -n {env_name} lst1_magic_mc_dl0_to_dl1 --input-file $line --output-dir {dir1}/DL1/MC/{particle_type}/$SAMPLE --config-file {dir1}/config_DL0_to_DL1.yaml --focal_length_choice {focal_length}>>$LOG 2>&1\n\n", + f" conda run -n {env_name} lst1_magic_mc_dl0_to_dl1 --input-file $line --output-dir {dir1}/DL1/{particle_type}/$SAMPLE --config-file {dir1}/config_DL0_to_DL1.yaml --focal_length_choice {focal_length}>>$LOG 2>&1\n\n", "done\n", "", ] @@ -292,10 +294,9 @@ def lists_and_bash_gen_MAGIC( f.writelines(lines) -def directories_generator( +def directories_generator_real( target_dir, telescope_ids, MAGIC_runs, NSB_match, source_name ): - """ Here we create all subdirectories for a given workspace and target name. @@ -317,38 +318,21 @@ def directories_generator( os.makedirs(f"{target_dir}/v{__version__}/{source_name}/DL1", exist_ok=True) dl1_dir = str(f"{target_dir}/v{__version__}/{source_name}/DL1") else: + dl1_dir = str(f"{target_dir}/v{__version__}/{source_name}/DL1/Observations") - dir_list = [ - "Observations", - "MC/gammas", - "MC/gammadiffuse", - "MC/electrons", - "MC/protons", - "MC/helium", - ] if not os.path.exists(f"{target_dir}/v{__version__}/{source_name}"): - os.makedirs( - f"{target_dir}/v{__version__}/{source_name}/logs", exist_ok=True - ) - for dir in dir_list: - os.makedirs( - f"{target_dir}/v{__version__}/{source_name}/DL1/{dir}/logs", - exist_ok=True, - ) + os.makedirs(f"{target_dir}/v{__version__}/{source_name}/DL1/Observations", exist_ok=True) + else: overwrite = input( - f'MC&data directory for {target_dir.split("/")[-1]} already exists. Would you like to overwrite it? [only "y" or "n"]: ' + f'data directory for {target_dir.split("/")[-1]} already exists. Would you like to overwrite it? [only "y" or "n"]: ' ) if overwrite == "y": os.system(f"rm -r {target_dir}/v{__version__}/{source_name}") os.makedirs( - f"{target_dir}/v{__version__}/{source_name}/logs", exist_ok=True + f"{target_dir}/v{__version__}/{source_name}/DL1/Observations", exist_ok=True ) - for dir in dir_list: - os.makedirs( - f"{target_dir}/v{__version__}/{source_name}/DL1/{dir}/logs", - exist_ok=True, - ) + else: print("Directory not modified.") @@ -359,6 +343,57 @@ def directories_generator( for magic in [1, 2]: if telescope_ids[magic - 3] > 0: os.makedirs(f"{dl1_dir}/M{magic}/{i[0]}/{i[1]}/logs", exist_ok=True) +def directories_generator_MC( + target_dir, telescope_ids, NSB_match +): + + """ + Here we create all subdirectories for a given workspace and target name. + + Parameters + ---------- + target_dir : str + Directory to store the results + telescope_ids : list + List of the telescope IDs (set by the user) + NSB_match : bool + If real data are matched to pre-processed MCs or not + """ + + + dl1_dir = str(f"{target_dir}/v{__version__}/MC/DL1") + + dir_list = [ + "gammas", + "gammadiffuse", + "electrons", + "protons", + "helium", + ] + if not os.path.exists(f'{target_dir}/v{__version__}/MC'): + os.makedirs(f"{target_dir}/v{__version__}/MC/logs", exist_ok=True) + os.makedirs(f"{target_dir}/v{__version__}/MC/DL1", exist_ok=True) + for dir in dir_list: + os.makedirs( + f"{target_dir}/v{__version__}/MC/DL1/{dir}/logs", + exist_ok=True, + ) + else: + overwrite = input( + f'MC directory already exists. Would you like to overwrite it? [only "y" or "n"]: ' + ) + if overwrite == "y": + os.system(f"rm -r {target_dir}/v{__version__}/MC") + for dir in dir_list: + os.makedirs( + f"{target_dir}/v{__version__}/MC/DL1/{dir}/logs", + exist_ok=True, + ) + else: + print("Directory not modified.") + + + def main(): @@ -408,29 +443,71 @@ def main(): source_in = config["data_selection"]["source_name_database"] source = config["data_selection"]["source_name_output"] cluster = config["general"]["cluster"] - + target_dir = Path(config["directories"]["workspace_dir"]) source_list = [] if source_in is None: source_list = joblib.load("list_sources.dat") else: source_list.append(source) + noise_value = [0, 0, 0] + if not NSB_match: + nsb = config["general"]["NSB_MC"] + + noisebright = 1.15 * pow(nsb, 1.115) + biasdim = 0.358 * pow(nsb, 0.805) + noise_value = [nsb, noisebright, biasdim] + directories_generator_MC( + str(target_dir), telescope_ids, NSB_match + ) # Here we create all the necessary directories in the given workspace and collect the main directory of the target + if not NSB_match: + # Below we run the analysis on the MC data + if (args.analysis_type == "onlyMC") or ( + args.analysis_type == "doEverything" + ): + config_file_gen( + target_dir, noise_value, NSB_match, 'MC' + ) # TODO: fix here + to_process = { + "gammas": MC_gammas, + "electrons": MC_electrons, + "helium": MC_helium, + "protons": MC_protons, + "gammadiffuse": MC_gammadiff, + } + for particle in to_process.keys(): + lists_and_bash_generator( + particle, + target_dir, + to_process[particle], + SimTel_version, + focal_length, + env_name, + + cluster, + ) + + # Here we do the MC DL0 to DL1 conversion: + list_of_MC = glob.glob("linking_MC_*s.sh") + + # os.system("RES=$(sbatch --parsable linking_MC_gammas_paths.sh) && sbatch --dependency=afterok:$RES MC_dl0_to_dl1.sh") + + for n, run in enumerate(list_of_MC): + if n == 0: + launch_jobs_MC = f"linking{n}=$(sbatch --parsable {run}) && running{n}=$(sbatch --parsable --dependency=afterany:$linking{n} {run[0:-3]}_r.sh)" + else: + launch_jobs_MC = f"{launch_jobs_MC} && linking{n}=$(sbatch --parsable {run}) && running{n}=$(sbatch --parsable --dependency=afterany:$linking{n} {run[0:-3]}_r.sh)" + + os.system(launch_jobs_MC) for source_name in source_list: - target_dir = Path(config["directories"]["workspace_dir"]) + MAGIC_runs_and_dates = f"{source_name}_MAGIC_runs.txt" MAGIC_runs = np.genfromtxt( MAGIC_runs_and_dates, dtype=str, delimiter=",", ndmin=2 ) # READ LIST OF DATES AND RUNS: format table where each line is like "2020_11_19,5093174" - noise_value = [0, 0, 0] - if not NSB_match: - nsb = config["general"]["NSB_MC"] - - noisebright = 1.15 * pow(nsb, 1.115) - biasdim = 0.358 * pow(nsb, 0.805) - noise_value = [nsb, noisebright, biasdim] - + # TODO: fix here above print("*** Converting Calibrated into DL1 data ***") print(f"Process name: {source_name}") @@ -439,49 +516,14 @@ def main(): ) print("This process will take about 10 min to run if the IT cluster is free.") - directories_generator( + directories_generator_real( str(target_dir), telescope_ids, MAGIC_runs, NSB_match, source_name ) # Here we create all the necessary directories in the given workspace and collect the main directory of the target config_file_gen( target_dir, noise_value, NSB_match, source_name ) # TODO: fix here - if not NSB_match: - # Below we run the analysis on the MC data - if (args.analysis_type == "onlyMC") or ( - args.analysis_type == "doEverything" - ): - to_process = { - "gammas": MC_gammas, - "electrons": MC_electrons, - "helium": MC_helium, - "protons": MC_protons, - "gammadiffuse": MC_gammadiff, - } - for particle in to_process.keys(): - lists_and_bash_generator( - particle, - target_dir, - to_process[particle], - SimTel_version, - focal_length, - env_name, - source_name, - cluster, - ) - - # Here we do the MC DL0 to DL1 conversion: - list_of_MC = glob.glob("linking_MC_*s.sh") - - # os.system("RES=$(sbatch --parsable linking_MC_gammas_paths.sh) && sbatch --dependency=afterok:$RES MC_dl0_to_dl1.sh") - - for n, run in enumerate(list_of_MC): - if n == 0: - launch_jobs_MC = f"linking{n}=$(sbatch --parsable {run}) && running{n}=$(sbatch --parsable --dependency=afterany:$linking{n} {run[0:-3]}_r.sh)" - else: - launch_jobs_MC = f"{launch_jobs_MC} && linking{n}=$(sbatch --parsable {run}) && running{n}=$(sbatch --parsable --dependency=afterany:$linking{n} {run[0:-3]}_r.sh)" - - os.system(launch_jobs_MC) + # Below we run the analysis on the MAGIC data if ( From 390daf9b7eca18b51a47ed9066501598993b9d71 Mon Sep 17 00:00:00 2001 From: Elisa-Visentin Date: Wed, 12 Jun 2024 08:20:50 +0000 Subject: [PATCH 126/236] put config.yaml in config_general (+ default) --- .../semi_automatic_scripts/coincident_events.py | 12 +++++++----- .../semi_automatic_scripts/config_general.yaml | 1 + .../setting_up_config_and_dir.py | 13 +++++++------ .../semi_automatic_scripts/stereo_events.py | 11 +++++++---- 4 files changed, 22 insertions(+), 15 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py index d698ec4a..f8087e32 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py @@ -38,7 +38,7 @@ logger.setLevel(logging.INFO) -def configfile_coincidence(target_dir, source_name): +def configfile_coincidence(target_dir, source_name, config_gen): """ This function creates the configuration file needed for the event coincidence step @@ -50,15 +50,17 @@ def configfile_coincidence(target_dir, source_name): source_name : str Name of the target source """ - - config_file = resource_file("config.yaml") + config_file = config_gen['general']['base_config_file'] + if config_file=='': + config_file = resource_file("config.yaml") + with open( config_file, "rb" ) as fc: # "rb" mode opens the file in binary format for reading config_dict = yaml.safe_load(fc) conf = { - "mc_tel_ids": config_dict["mc_tel_ids"], + "mc_tel_ids": config_gen["mc_tel_ids"], "event_coincidence": config_dict["event_coincidence"], } @@ -214,7 +216,7 @@ def main(): for source_name in source_list: print("***** Generating file config_coincidence.yaml...") - configfile_coincidence(target_dir, source_name) + configfile_coincidence(target_dir, source_name, config) LST_runs_and_dates = f"{source_name}_LST_runs.txt" LST_runs = np.genfromtxt(LST_runs_and_dates, dtype=str, delimiter=",", ndmin=2) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/config_general.yaml b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/config_general.yaml index 51a52c12..7864778c 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/config_general.yaml +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/config_general.yaml @@ -28,6 +28,7 @@ data_selection: skip_MAGIC_runs: [5094658] # MAGIC runs to ignore. general: + base_config_file: '' # path + name to a custom MCP config file. If not provided, the default config.yaml file will be used SimTel_version: "v1.4" LST_version : "v0.10" # check the `processed_lstchain_file` version in the LST database! LST_tailcut : "tailcut84" diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py index a03903d1..305681ab 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py @@ -45,7 +45,7 @@ logger.setLevel(logging.INFO) -def config_file_gen(target_dir, noise_value, NSB_match, source_name): +def config_file_gen(target_dir, noise_value, NSB_match, source_name, config_gen): """ Here we create the configuration file needed for transforming DL0 into DL1 @@ -61,8 +61,9 @@ def config_file_gen(target_dir, noise_value, NSB_match, source_name): source_name : str Name of the target source """ - - config_file = resource_file("config.yaml") + config_file = config_gen['general']['base_config_file'] + if config_file=='': + config_file = resource_file("config.yaml") with open( config_file, "rb" ) as fc: # "rb" mode opens the file in binary format for reading @@ -75,7 +76,7 @@ def config_file_gen(target_dir, noise_value, NSB_match, source_name): LST_config["increase_nsb"]["extra_bias_in_dim_pixels"] = noise_value[2] LST_config["increase_nsb"]["extra_noise_in_bright_pixels"] = noise_value[1] conf = { - "mc_tel_ids": config_dict["mc_tel_ids"], + "mc_tel_ids": config_gen["mc_tel_ids"], "LST": LST_config, "MAGIC": MAGIC_config, } @@ -466,7 +467,7 @@ def main(): args.analysis_type == "doEverything" ): config_file_gen( - target_dir, noise_value, NSB_match, 'MC' + target_dir, noise_value, NSB_match, 'MC', config ) # TODO: fix here to_process = { "gammas": MC_gammas, @@ -520,7 +521,7 @@ def main(): str(target_dir), telescope_ids, MAGIC_runs, NSB_match, source_name ) # Here we create all the necessary directories in the given workspace and collect the main directory of the target config_file_gen( - target_dir, noise_value, NSB_match, source_name + target_dir, noise_value, NSB_match, source_name, config ) # TODO: fix here diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py index d8c8f329..7ee42cab 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py @@ -40,7 +40,7 @@ logger.setLevel(logging.INFO) -def configfile_stereo(target_dir, source_name): +def configfile_stereo(target_dir, source_name, config_gen): """ This function creates the configuration file needed for the event stereo step @@ -53,13 +53,16 @@ def configfile_stereo(target_dir, source_name): Name of the target source """ - config_file = resource_file("config.yaml") + config_file = config_gen['general']['base_config_file'] + if config_file=='': + config_file = resource_file("config.yaml") + with open( config_file, "rb" ) as fc: # "rb" mode opens the file in binary format for reading config_dict = yaml.safe_load(fc) conf = { - "mc_tel_ids": config_dict["mc_tel_ids"], + "mc_tel_ids": config_gen["mc_tel_ids"], "stereo_reco": config_dict["stereo_reco"], } @@ -247,7 +250,7 @@ def main(): for source_name in source_list: print("***** Generating file config_stereo.yaml...") - configfile_stereo(target_dir, source_name) + configfile_stereo(target_dir, source_name, config) # Below we run the analysis on the MC data if ( From 03aacf85f5df76392c5dee6d262ad1d485b2a2c0 Mon Sep 17 00:00:00 2001 From: Elisa-Visentin Date: Wed, 12 Jun 2024 12:30:56 +0000 Subject: [PATCH 127/236] create dir. MC only if MC processed --- .../semi_automatic_scripts/setting_up_config_and_dir.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py index 305681ab..f5ef9550 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py @@ -458,14 +458,15 @@ def main(): noisebright = 1.15 * pow(nsb, 1.115) biasdim = 0.358 * pow(nsb, 0.805) noise_value = [nsb, noisebright, biasdim] - directories_generator_MC( - str(target_dir), telescope_ids, NSB_match - ) # Here we create all the necessary directories in the given workspace and collect the main directory of the target + if not NSB_match: # Below we run the analysis on the MC data if (args.analysis_type == "onlyMC") or ( args.analysis_type == "doEverything" ): + directories_generator_MC( + str(target_dir), telescope_ids, NSB_match + ) # Here we create all the necessary directories in the given workspace and collect the main directory of the target config_file_gen( target_dir, noise_value, NSB_match, 'MC', config ) # TODO: fix here From 832b6718b28950172e1e11eb5eed939b96eaa4ee Mon Sep 17 00:00:00 2001 From: Elisa-Visentin Date: Wed, 12 Jun 2024 12:46:34 +0000 Subject: [PATCH 128/236] Fix for different MC DL0 simtel output versions --- .../semi_automatic_scripts/config_general.yaml | 1 - .../setting_up_config_and_dir.py | 17 +++++++++++------ 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/config_general.yaml b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/config_general.yaml index 7864778c..9a4bafef 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/config_general.yaml +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/config_general.yaml @@ -29,7 +29,6 @@ data_selection: general: base_config_file: '' # path + name to a custom MCP config file. If not provided, the default config.yaml file will be used - SimTel_version: "v1.4" LST_version : "v0.10" # check the `processed_lstchain_file` version in the LST database! LST_tailcut : "tailcut84" focal_length : "effective" diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py index f5ef9550..d810453b 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py @@ -92,7 +92,6 @@ def lists_and_bash_generator( particle_type, target_dir, MC_path, - SimTel_version, focal_length, env_name, cluster, @@ -111,8 +110,6 @@ def lists_and_bash_generator( Directory to store the results MC_path : str Path to the MCs DL0s - SimTel_version : str - Version of SimTel (used to produce MCs) focal_length : str Focal length to be used to process MCs (e.g., 'nominal') env_name : str @@ -134,7 +131,17 @@ def lists_and_bash_generator( f"{dir1}/logs/list_nodes_{particle_type}_complete.txt", "w" ) as f: # creating list_nodes_gammas_complete.txt for i in list_of_nodes: - f.write(f"{i}/output_{SimTel_version}\n") + out_list=glob.glob(f"{i}/output*\n") + if len(out_list)==0: + logging.error(f'No output file for node {i}, or the directory structure is not the usual one. Skipping...') + continue + elif len(out_list)==1: + f.write(f"{out_list[0]}\n") + else: + output_index = input(f"The available outputs are {out_list}, please provide the array index of the desired one:") + f.write(f"{out_list[output_index]}\n") + + with open( f"{dir1}/logs/list_folder_{particle_type}.txt", "w" @@ -430,7 +437,6 @@ def main(): config = yaml.safe_load(f) telescope_ids = list(config["mc_tel_ids"].values()) - SimTel_version = config["general"]["SimTel_version"] env_name = config["general"]["env_name"] NSB_match = config["general"]["NSB_matching"] @@ -482,7 +488,6 @@ def main(): particle, target_dir, to_process[particle], - SimTel_version, focal_length, env_name, From 11e79ea2452ebce41ca5fffe658fadf34c3cbaab Mon Sep 17 00:00:00 2001 From: Elisa-Visentin Date: Wed, 12 Jun 2024 12:53:01 +0000 Subject: [PATCH 129/236] fix typo --- .../semi_automatic_scripts/setting_up_config_and_dir.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py index d810453b..8c87a4f1 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py @@ -133,7 +133,7 @@ def lists_and_bash_generator( for i in list_of_nodes: out_list=glob.glob(f"{i}/output*\n") if len(out_list)==0: - logging.error(f'No output file for node {i}, or the directory structure is not the usual one. Skipping...') + logger.error(f'No output file for node {i}, or the directory structure is not the usual one. Skipping...') continue elif len(out_list)==1: f.write(f"{out_list[0]}\n") From 090cfbbfedf5fab4ed67561aaa6e90e1c7bd4dee Mon Sep 17 00:00:00 2001 From: Julian Sitarek Date: Wed, 19 Jun 2024 12:58:09 +0000 Subject: [PATCH 130/236] changes in job_accounting - it catches now a case when RSS memory is not available and returned as '0' (instead of much more common empty string) - now from the config file we read also the source name and range of dates and check only directories that much those note that the other option with list of dates instead of range is still ignored --- .../semi_automatic_scripts/job_accounting.py | 42 +++++++++++++++---- 1 file changed, 33 insertions(+), 9 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/job_accounting.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/job_accounting.py index 9f779329..bba61134 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/job_accounting.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/job_accounting.py @@ -1,11 +1,14 @@ """ This script does checks of status of jobs based on the log files generated during the execution. It also does accounting of memory and CPU usage +It loads the config_general file to figure out what files it should look for and processes source name and time range +For the moment it ignores date_list and skip_*_runs """ import argparse import glob import os -from datetime import timedelta +import re +from datetime import datetime, timedelta from subprocess import PIPE, run import numpy as np @@ -81,19 +84,27 @@ def main(): config = yaml.safe_load(f) # TODO: those variables will be needed when more features are implemented - # source_in = config["data_selection"]["source_name_database"] - # source_out = config["data_selection"]["source_name_output"] - # timerange = config["data_selection"]["time_range"] + source_out = config["data_selection"]["source_name_output"] + timerange = config["data_selection"]["time_range"] + # skip_LST = config["data_selection"]["skip_LST_runs"] # skip_MAGIC = config["data_selection"]["skip_MAGIC_runs"] NSB_matching = config["general"]["NSB_matching"] work_dir = config["directories"]["workspace_dir"] print(f"Checking progress of jobs stored in {work_dir}") + if source_out is None: + source_out = "*" dirs = sorted( - glob.glob(f"{work_dir}/v{args.version}/*/{args.data_level}/[0-9]*/[M0-9]*") - + glob.glob(f"{work_dir}/v{args.version}/*/{args.data_level}/Merged_[0-9]*") - + glob.glob(f"{work_dir}/v{args.version}/*/{args.data_level}/" + "[0-9]" * 8) + glob.glob( + f"{work_dir}/v{args.version}/{source_out}/{args.data_level}/[0-9]*/[M0-9]*" + ) + + glob.glob( + f"{work_dir}/v{args.version}/{source_out}/{args.data_level}/Merged_[0-9]*" + ) + + glob.glob( + f"{work_dir}/v{args.version}/{source_out}/{args.data_level}/" + "[0-9]" * 8 + ) ) if dirs == []: versions = [x.split("/v")[-1] for x in glob.glob(f"{work_dir}/v*")] @@ -106,6 +117,12 @@ def main(): ) exit(1) + if timerange: + timemin = str(config["data_selection"]["min"]) + timemax = str(config["data_selection"]["max"]) + timemin = datetime.strptime(timemin, "%Y_%m_%d") + timemax = datetime.strptime(timemax, "%Y_%m_%d") + all_todo = 0 all_return = 0 all_good = 0 @@ -114,6 +131,12 @@ def main(): total_time = 0 all_jobs = [] for dir in dirs: + this_date = re.sub(f".+/{args.data_level}/", "", dir) + this_date = re.sub(r"\D", "", this_date.split("/")[0]) + this_date = datetime.strptime(this_date, "%Y%m%d") + if timerange and (this_date < timemin or this_date > timemax): + continue + print(dir) list_dl0 = "" ins = ["list_dl0.txt", "list_LST.txt", "list_coin.txt", "list_cal.txt"] @@ -154,7 +177,6 @@ def main(): ): # MaxRSS sometimes is missing in the output cpu = out[1] mem = None - print("Memory usage information is missing") else: print("Unexpected sacct output: {out}") if cpu is not None: @@ -166,8 +188,10 @@ def main(): total_time += delta.total_seconds() / 3600 all_jobs += [slurm_id] this_cpu.append(delta) - if mem is not None: + if mem is not None and mem.endswith("M"): this_mem.append(float(mem[0:-1])) + else: + print("Memory usage information is missing") else: print(f"file {file_in} failed with error {rc}") if len(this_cpu) > 0: From be85c1730babefcccc449e4834444b93afd6cea8 Mon Sep 17 00:00:00 2001 From: Julian Sitarek Date: Thu, 20 Jun 2024 08:33:52 +0000 Subject: [PATCH 131/236] slight refactoring to avoid multiply-broken lines --- .../semi_automatic_scripts/job_accounting.py | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/job_accounting.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/job_accounting.py index bba61134..5fca0969 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/job_accounting.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/job_accounting.py @@ -95,16 +95,12 @@ def main(): print(f"Checking progress of jobs stored in {work_dir}") if source_out is None: source_out = "*" + + indir = f"{work_dir}/v{args.version}/{source_out}/{args.data_level}" dirs = sorted( - glob.glob( - f"{work_dir}/v{args.version}/{source_out}/{args.data_level}/[0-9]*/[M0-9]*" - ) - + glob.glob( - f"{work_dir}/v{args.version}/{source_out}/{args.data_level}/Merged_[0-9]*" - ) - + glob.glob( - f"{work_dir}/v{args.version}/{source_out}/{args.data_level}/" + "[0-9]" * 8 - ) + glob.glob(f"{indir}/[0-9]*/[M0-9]*") + + glob.glob(f"{indir}/Merged_[0-9]*") + + glob.glob(f"{indir}/" + "[0-9]" * 8) ) if dirs == []: versions = [x.split("/v")[-1] for x in glob.glob(f"{work_dir}/v*")] From ad33ed821ceddfcd27ae26071efbd019f9e38a41 Mon Sep 17 00:00:00 2001 From: Julian Sitarek Date: Thu, 20 Jun 2024 15:20:07 +0000 Subject: [PATCH 132/236] introduced updating of the database it takes text outputs with return codes of running in the cluster, and updates the h5 database with the runs for the moment it only works with DL1/M[12] step --- .../semi_automatic_scripts/job_accounting.py | 66 ++++++++++++++++++- 1 file changed, 63 insertions(+), 3 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/job_accounting.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/job_accounting.py index 5fca0969..7251da51 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/job_accounting.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/job_accounting.py @@ -3,15 +3,19 @@ It also does accounting of memory and CPU usage It loads the config_general file to figure out what files it should look for and processes source name and time range For the moment it ignores date_list and skip_*_runs + +It can also update the h5 file with the list of runs to process """ import argparse import glob +import json import os import re from datetime import datetime, timedelta from subprocess import PIPE, run import numpy as np +import pandas as pd import yaml from magicctapipe import __version__ @@ -79,10 +83,42 @@ def main(): help="No CPU/Memory usage check (faster)", ) + parser.add_argument( + "--run-list-file", + "-r", + dest="run_list", + type=str, + default=None, + help="h5 file with run list", + ) + args = parser.parse_args() with open(args.config_file, "r") as f: config = yaml.safe_load(f) + if args.run_list is not None: + try: + h5key = "joint_obs" + for magic in [1, 2]: + if args.data_level[-2:] == f"M{magic}": + h5key = f"MAGIC{magic}/runs_M{magic}" + run_key = "Run ID" + h5runs = pd.read_hdf(args.run_list, key=h5key) + except (FileNotFoundError, KeyError): + print(f"Cannot open {h5key} in {args.run_list}") + exit(1) + + if h5key != "joint_obs": + rc_col = "DL1_rc" + + if rc_col not in h5runs.keys(): + h5runs[rc_col] = "{}" + h5runs[rc_col + "_all"] = None + + rc_dicts = {} + for rrun, dct in np.array(h5runs[[run_key, rc_col]]): + rc_dicts[rrun] = json.loads(dct) + # TODO: those variables will be needed when more features are implemented source_out = config["data_selection"]["source_name_output"] timerange = config["data_selection"]["time_range"] @@ -127,9 +163,10 @@ def main(): total_time = 0 all_jobs = [] for dir in dirs: - this_date = re.sub(f".+/{args.data_level}/", "", dir) - this_date = re.sub(r"\D", "", this_date.split("/")[0]) - this_date = datetime.strptime(this_date, "%Y%m%d") + this_date_str = re.sub(f".+/{args.data_level}/", "", dir) + this_date_str = re.sub(r"\D", "", this_date_str.split("/")[0]) + this_date = datetime.strptime(this_date_str, "%Y%m%d") + if timerange and (this_date < timemin or this_date > timemax): continue @@ -159,6 +196,15 @@ def main(): file_in = line[0] slurm_id = f"{line[1]}_{line[2]}" if len(line) == 4 else line[1] rc = line[-1] + + if args.run_list is not None: + if h5key != "joint_obs": # DL1/M[12] + run_subrun = file_in.split("/")[-1].split("_")[2] + this_run = int(run_subrun.split(".")[0]) + this_subrun = int(run_subrun.split(".")[1]) + + rc_dicts[this_run][str(this_subrun)] = rc + if rc == "0": this_good += 1 # now check accounting @@ -243,6 +289,20 @@ def main(): f"CPU: median={np.median(all_cpu)}, max={all_cpu.max()}, total={total_time:.2f} CPU hrs; memory [M]: median={np.median(all_mem)}, max={all_mem.max()}" ) + if args.run_list is not None: + print("Updating the database") + for rrun in rc_dicts.keys(): + idx = h5runs[run_key] == rrun + h5runs.loc[idx, rc_col] = json.dumps(rc_dicts[rrun]) + all_subruns = np.array(h5runs[idx]["number of subruns"])[0] + good_subruns = sum(np.array(list(rc_dicts[rrun].values())) == "0") + h5runs.loc[idx, rc_col + "_all"] = good_subruns == all_subruns + + # fixme: for DL1/M[12] files since htere are two dataframes in the file, we need to append it + # and this causes increase in the file size every time the file is updated + with pd.option_context("display.max_rows", None): + h5runs.to_hdf(args.run_list, key=h5key, mode="r+") + if __name__ == "__main__": main() From 0cf29d7e4cc7a6d5d631b3df7ed788c9f82a3cde Mon Sep 17 00:00:00 2001 From: joannaWojS <167758973+joannaWojS@users.noreply.github.com> Date: Fri, 21 Jun 2024 12:35:40 +0200 Subject: [PATCH 133/236] Update check_MAGIC_runs.py --- .../check_MAGIC_runs.py | 98 ++++++++++++++----- 1 file changed, 73 insertions(+), 25 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/check_MAGIC_runs.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/check_MAGIC_runs.py index 0e2cb261..081f3774 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/check_MAGIC_runs.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/check_MAGIC_runs.py @@ -12,26 +12,32 @@ import os import re +def fix_lists_and_convert(cell): + # Remove brackets to avoid double lists and split on '][' + parts = cell.replace('][', ',').strip('[]').split(',') + return list(dict.fromkeys(int(item) for item in parts)) + def table_first_last_run(df): - df_selected_data = df.iloc[:, [2, 1, 5, 6]] - df_selected_data.columns = ['DATE','source', 'MAGIC_first_run', 'MAGIC_last_run'] + df_selected_data = df.iloc[:, [2, 1, 5, 6, 25]] + df_selected_data.columns = ['DATE','source', 'MAGIC_first_run', 'MAGIC_last_run', 'MAGIC_runs'] grouped_data = df_selected_data.groupby(['DATE', 'source']) - result_table = [] for (date, source), group in grouped_data: First_run = group['MAGIC_first_run'].min() Last_run = group['MAGIC_last_run'].max() + runs_combined = group['MAGIC_runs'].sum() result_table.append({ - 'Date (LST conv.)': date, - 'Source': source, + 'DATE': date, + 'source': source, 'First run': First_run, - 'Last run': Last_run + 'Last run': Last_run, + 'MAGIC runs': runs_combined }) result = pd.DataFrame(result_table) - + result['MAGIC runs'] = result['MAGIC runs'].apply(fix_lists_and_convert) return(result) def check_run_ID(path, filename, first_run, last_run, date, source, tel_id): @@ -50,8 +56,7 @@ def check_run_ID(path, filename, first_run, last_run, date, source, tel_id): for id in run_ids: if first_run <= id <= last_run: - magic_runs.append(f"{date} \t {source} \t {id}") - #print(f"{date} \t {source} \t {id}") + magic_runs.append(f"{date}\t{source}\t{id}") return magic_runs def check_directory(date, source, first_run, last_run, tel_id): @@ -72,41 +77,84 @@ def check_directory(date, source, first_run, last_run, tel_id): if os.path.exists(path): files = os.listdir(path) - count_with_source = 0 + for filename in files: if source in filename: - count_with_source += 1 results = check_run_ID(path, filename, first_run, last_run, date, source, tel_id) - #We will see many results because a file with a run ID has subruns. + #We will see many results becuse a file with a run ID has subruns. #We must count the same results to get information how many subruns we have. for result in results: if result in results_count: results_count[result] += 1 else: results_count[result] = 1 + + for result, count in results_count.items(): + print(f"{result}\t{count}") + +def missing_files( tel_id, date, source, magic_runs ): + + for runs in magic_runs: + run = str(runs) + + date_obj = datetime.strptime(date, '%Y%m%d') + date_obj += timedelta(days=1) + new_date = datetime.strftime(date_obj, '%Y%m%d') + YYYY = new_date[:4] + MM = new_date[4:6] + DD = new_date[6:8] + Y = f"_Y_" + + path = f"/fefs/onsite/common/MAGIC/data/M{tel_id}/event/Calibrated/{YYYY}/{MM}/{DD}" + + if os.path.exists(path): + files = os.listdir(path) + count_with_source = 0 + count_with_run_id = 0 + # Counter for files that include the source. We want to check if any file with the source was found. + # Counter for files that include the run_id. We want to check if any file with the run_id was found. + for filename in files: + if source in filename: + count_with_source += 1 + for runs in magic_runs: + # run = str(runs) + if run in filename: + count_with_run_id += 1 if count_with_source == 0: if(tel_id == 1): #Between 2022/09/04 - 2022/12/14 MAGIC 1 had a failure. Therefore we have to skip the range when we want to get information about missing files. if(date<'20220904' or date>'20221214'): - print(f"No files found containing the source '{source}' on {date}, (M{tel_id})") + print(f"No files found containing the source '{source}' on {date}") else: print(f"M1 failure. No files found containing the source '{source}' on {date}.") if(tel_id == 2): - print(f"No files found containing the source '{source}' on {date}, (M{tel_id})") - + print(f"No files found containing the source '{source}' on {date}") + if count_with_source != 0 and count_with_run_id == 0: + if(date<'20220904' or date>'20221214'): + print(f"No run id: {run} found in the {source} on {date}.") else: print(f"No such file or directory: {date}") + +def main(): + + df = pd.read_hdf( '/fefs/aswg/workspace/federico.dipierro/MAGIC_LST1_simultaneous_runs_info/simultaneous_obs_summary.h5', key='str/table') + + tel_id = [1, 2] + database = table_first_last_run(df) + + for tel in tel_id: + print(f"MAGIC {tel}") + print(f"DATE\tsource\tRun ID\t Subruns") + for index, row in database.iterrows(): + check_directory(row['DATE'], row['source'], row['First run'], row['Last run'], tel) + print() + for index, row in database.iterrows(): + missing_files(tel, row['DATE'], row['source'], row['MAGIC runs']) + print() + +if __name__ == "__main__": + main() - for result, count in results_count.items(): - print(f"M{tel_id} \t {result} \t {count}") -df = pd.read_hdf( '/fefs/aswg/workspace/federico.dipierro/simultaneous_obs_summary.h5', key='str/table') -database = table_first_last_run(df) -tel_id = [1, 2] -for tel in tel_id: - print() - print(f"Telescope ID \t Date (LST convention) \t Source \t Run ID \t Subruns") - for index, row in database.iterrows(): - check_directory(row['Date (LST conv.)'], row['Source'], row['First run'], row['Last run'], tel) From 7d6c4c2ce69d6eab3de0dac38846ac5198697d62 Mon Sep 17 00:00:00 2001 From: Julian Sitarek Date: Mon, 24 Jun 2024 11:06:33 +0000 Subject: [PATCH 134/236] adding updating of the file database with rc also for coincidence and stereo reconstruction --- .../semi_automatic_scripts/job_accounting.py | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/job_accounting.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/job_accounting.py index 7251da51..d7194067 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/job_accounting.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/job_accounting.py @@ -99,10 +99,12 @@ def main(): if args.run_list is not None: try: h5key = "joint_obs" + run_key = "LST1_run" for magic in [1, 2]: if args.data_level[-2:] == f"M{magic}": h5key = f"MAGIC{magic}/runs_M{magic}" run_key = "Run ID" + h5runs = pd.read_hdf(args.run_list, key=h5key) except (FileNotFoundError, KeyError): print(f"Cannot open {h5key} in {args.run_list}") @@ -110,6 +112,8 @@ def main(): if h5key != "joint_obs": rc_col = "DL1_rc" + else: + rc_col = args.data_level + "_rc" if rc_col not in h5runs.keys(): h5runs[rc_col] = "{}" @@ -202,6 +206,10 @@ def main(): run_subrun = file_in.split("/")[-1].split("_")[2] this_run = int(run_subrun.split(".")[0]) this_subrun = int(run_subrun.split(".")[1]) + else: + filename = file_in.split("/")[-1] + this_run = filename.split(".")[1].replace("Run", "") + this_subrun = int(filename.split(".")[2]) rc_dicts[this_run][str(this_subrun)] = rc @@ -294,14 +302,17 @@ def main(): for rrun in rc_dicts.keys(): idx = h5runs[run_key] == rrun h5runs.loc[idx, rc_col] = json.dumps(rc_dicts[rrun]) - all_subruns = np.array(h5runs[idx]["number of subruns"])[0] + if h5key == "joint_obs": + all_subruns = len(rc_dicts[rrun]) + else: + all_subruns = np.array(h5runs[idx]["number of subruns"])[0] good_subruns = sum(np.array(list(rc_dicts[rrun].values())) == "0") - h5runs.loc[idx, rc_col + "_all"] = good_subruns == all_subruns + isgood = np.logical_and(good_subruns == all_subruns, good_subruns > 0) + h5runs.loc[idx, rc_col + "_all"] = isgood # fixme: for DL1/M[12] files since htere are two dataframes in the file, we need to append it # and this causes increase in the file size every time the file is updated - with pd.option_context("display.max_rows", None): - h5runs.to_hdf(args.run_list, key=h5key, mode="r+") + h5runs.to_hdf(args.run_list, key=h5key, mode="r+") if __name__ == "__main__": From 5bf2ebf5d067426c3d3d1f782d22247f8d892ab0 Mon Sep 17 00:00:00 2001 From: Julian Sitarek Date: Mon, 24 Jun 2024 11:15:49 +0000 Subject: [PATCH 135/236] slight refactoring --- .../semi_automatic_scripts/job_accounting.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/job_accounting.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/job_accounting.py index d7194067..d654bf19 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/job_accounting.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/job_accounting.py @@ -100,20 +100,19 @@ def main(): try: h5key = "joint_obs" run_key = "LST1_run" + ismagic = False for magic in [1, 2]: if args.data_level[-2:] == f"M{magic}": h5key = f"MAGIC{magic}/runs_M{magic}" run_key = "Run ID" + ismagic = True h5runs = pd.read_hdf(args.run_list, key=h5key) except (FileNotFoundError, KeyError): print(f"Cannot open {h5key} in {args.run_list}") exit(1) - if h5key != "joint_obs": - rc_col = "DL1_rc" - else: - rc_col = args.data_level + "_rc" + rc_col = "DL1_rc" if ismagic else args.data_level + "_rc" if rc_col not in h5runs.keys(): h5runs[rc_col] = "{}" @@ -202,7 +201,7 @@ def main(): rc = line[-1] if args.run_list is not None: - if h5key != "joint_obs": # DL1/M[12] + if ismagic: run_subrun = file_in.split("/")[-1].split("_")[2] this_run = int(run_subrun.split(".")[0]) this_subrun = int(run_subrun.split(".")[1]) @@ -302,10 +301,10 @@ def main(): for rrun in rc_dicts.keys(): idx = h5runs[run_key] == rrun h5runs.loc[idx, rc_col] = json.dumps(rc_dicts[rrun]) - if h5key == "joint_obs": - all_subruns = len(rc_dicts[rrun]) - else: + if ismagic: all_subruns = np.array(h5runs[idx]["number of subruns"])[0] + else: + all_subruns = len(rc_dicts[rrun]) good_subruns = sum(np.array(list(rc_dicts[rrun].values())) == "0") isgood = np.logical_and(good_subruns == all_subruns, good_subruns > 0) h5runs.loc[idx, rc_col + "_all"] = isgood From 02e0350c65e60bb89d542aa7b0812e0c5014b4e0 Mon Sep 17 00:00:00 2001 From: Elisa-Visentin Date: Thu, 27 Jun 2024 07:00:57 +0000 Subject: [PATCH 136/236] rename first script --- magicctapipe/scripts/lst1_magic/README.md | 4 ++-- .../scripts/lst1_magic/semi_automatic_scripts/__init__.py | 2 +- .../{setting_up_config_and_dir.py => dl1_production.py} | 5 +++-- setup.cfg | 2 +- 4 files changed, 7 insertions(+), 6 deletions(-) rename magicctapipe/scripts/lst1_magic/semi_automatic_scripts/{setting_up_config_and_dir.py => dl1_production.py} (99%) diff --git a/magicctapipe/scripts/lst1_magic/README.md b/magicctapipe/scripts/lst1_magic/README.md index 2eee37e4..729432ee 100644 --- a/magicctapipe/scripts/lst1_magic/README.md +++ b/magicctapipe/scripts/lst1_magic/README.md @@ -99,7 +99,7 @@ Finding MAGIC runs... And it will save the files TARGET_LST_runs.txt, TARGET_MAGIC_runs.txt, and list_sources.dat in your current working directory. In case no runs are found for MAGIC and/or LST (for a source and a given time range/list of dates), a warning will be printed and no output text file will be produced for the given source and telescope(s). At this point, we can convert the MAGIC data into DL1 format with the following command: -> $ setting_up_config_and_dir -c config_general.yaml +> $ dl1_production -c config_general.yaml The output in the terminal will be something like this: ``` @@ -109,7 +109,7 @@ To check the jobs submitted to the cluster, type: squeue -n {source} This process will take about 10 min to run if the IT cluster is free. ``` -The command `setting_up_config_and_dir` does a series of things: +The command `dl1_production` does a series of things: - Creates a directory with the target name within the directory `yourprojectname/{MCP_version}` and several subdirectories inside it that are necessary for the rest of the data reduction. The main directories are: ``` diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/__init__.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/__init__.py index f5c23b44..9a2f3fde 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/__init__.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/__init__.py @@ -1,7 +1,7 @@ from .clusters import slurm_lines from .coincident_events import configfile_coincidence, linking_bash_lst from .merging_runs import cleaning, merge, mergeMC, split_train_test -from .setting_up_config_and_dir import ( +from .dl1_production import ( config_file_gen, directories_generator_real, directories_generator_MC, diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py similarity index 99% rename from magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py rename to magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py index 8c87a4f1..8474a82b 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/setting_up_config_and_dir.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py @@ -11,7 +11,7 @@ No LST data is used here. Standard usage: -$ python setting_up_config_and_dir.py (-c config_file.yaml) +$ python dl1_production.py (-c config_file.yaml) """ import argparse import glob @@ -498,7 +498,8 @@ def main(): list_of_MC = glob.glob("linking_MC_*s.sh") # os.system("RES=$(sbatch --parsable linking_MC_gammas_paths.sh) && sbatch --dependency=afterok:$RES MC_dl0_to_dl1.sh") - + + for n, run in enumerate(list_of_MC): if n == 0: launch_jobs_MC = f"linking{n}=$(sbatch --parsable {run}) && running{n}=$(sbatch --parsable --dependency=afterany:$linking{n} {run[0:-3]}_r.sh)" diff --git a/setup.cfg b/setup.cfg index b0391b8f..fee19e15 100644 --- a/setup.cfg +++ b/setup.cfg @@ -101,7 +101,7 @@ console_scripts = merging_runs = magicctapipe.scripts.lst1_magic.semi_automatic_scripts.merging_runs:main nsb_level = magicctapipe.scripts.lst1_magic.semi_automatic_scripts.database_production.nsb_level:main lstchain_version = magicctapipe.scripts.lst1_magic.semi_automatic_scripts.database_production.lstchain_version:main - setting_up_config_and_dir = magicctapipe.scripts.lst1_magic.semi_automatic_scripts.setting_up_config_and_dir:main + dl1_production = magicctapipe.scripts.lst1_magic.semi_automatic_scripts.dl1_production:main stereo_events = magicctapipe.scripts.lst1_magic.semi_automatic_scripts.stereo_events:main job_accounting = magicctapipe.scripts.lst1_magic.semi_automatic_scripts.job_accounting:main From 4f0471d356b9a64b411d9a56e169291074ee8afc Mon Sep 17 00:00:00 2001 From: Elisa-Visentin Date: Thu, 27 Jun 2024 07:05:06 +0000 Subject: [PATCH 137/236] helium and electrons --- .../semi_automatic_scripts/merging_runs.py | 2 ++ .../semi_automatic_scripts/stereo_events.py | 17 +++++++++-------- 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py index 1dcb191e..d8ac118c 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py @@ -359,6 +359,8 @@ def main(): target_dir, "gammas", env_name, source_name, cluster ) # generating the bash script to merge the files mergeMC(target_dir, "protons_test", env_name, source_name, cluster) + mergeMC(target_dir, "helium", env_name, source_name, cluster) + mergeMC(target_dir, "electrons", env_name, source_name, cluster) print("***** Running merge_hdf_files.py on the MC data files...") diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py index 7ee42cab..a2155ccf 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py @@ -259,18 +259,19 @@ def main(): and not NSB_match ): print("***** Generating the bashscript for MCs...") - for part in ["gammadiffuse", "gammas", "protons", "protons_test"]: + for part in ["gammadiffuse", "gammas", "protons", "protons_test", "helium", "electrons"]: bash_stereoMC(target_dir, part, env_name, source_name, cluster) list_of_stereo_scripts = np.sort(glob.glob("StereoEvents_MC_*.sh")) - + launch_jobs = "" + + + os.system(launch_jobs) for n, run in enumerate(list_of_stereo_scripts): - if n == 0: - launch_jobs = f"stereo{n}=$(sbatch --parsable {run})" - else: - launch_jobs = ( - f"{launch_jobs} && stereo{n}=$(sbatch --parsable {run})" - ) + launch_jobs += ( + " && " if n > 0 else "" + ) + f"{launch_jobs} && stereo{n}=$(sbatch --parsable {run})" + os.system(launch_jobs) From b9e2c92c617909a40fbd5ce218b4da31b615cc07 Mon Sep 17 00:00:00 2001 From: Elisa-Visentin Date: Thu, 27 Jun 2024 07:12:12 +0000 Subject: [PATCH 138/236] linting --- .../semi_automatic_scripts/__init__.py | 4 +- .../coincident_events.py | 8 +- .../semi_automatic_scripts/dl1_production.py | 74 +++++++++---------- .../semi_automatic_scripts/merging_runs.py | 4 +- .../semi_automatic_scripts/stereo_events.py | 21 ++++-- 5 files changed, 56 insertions(+), 55 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/__init__.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/__init__.py index 9a2f3fde..5fa0cf9d 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/__init__.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/__init__.py @@ -1,13 +1,13 @@ from .clusters import slurm_lines from .coincident_events import configfile_coincidence, linking_bash_lst -from .merging_runs import cleaning, merge, mergeMC, split_train_test from .dl1_production import ( config_file_gen, - directories_generator_real, directories_generator_MC, + directories_generator_real, lists_and_bash_gen_MAGIC, lists_and_bash_generator, ) +from .merging_runs import cleaning, merge, mergeMC, split_train_test from .stereo_events import bash_stereo, bash_stereoMC, configfile_stereo __all__ = [ diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py index f8087e32..353ce2b9 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py @@ -49,11 +49,13 @@ def configfile_coincidence(target_dir, source_name, config_gen): Path to the working directory source_name : str Name of the target source + config_gen : dict + Dictionary of the entries of the general configuration file """ - config_file = config_gen['general']['base_config_file'] - if config_file=='': + config_file = config_gen["general"]["base_config_file"] + if config_file == "": config_file = resource_file("config.yaml") - + with open( config_file, "rb" ) as fc: # "rb" mode opens the file in binary format for reading diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py index 8474a82b..d5a86982 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py @@ -60,9 +60,11 @@ def config_file_gen(target_dir, noise_value, NSB_match, source_name, config_gen) If real data are matched to pre-processed MCs or not source_name : str Name of the target source + config_gen : dict + Dictionary of the entries of the general configuration file """ - config_file = config_gen['general']['base_config_file'] - if config_file=='': + config_file = config_gen["general"]["base_config_file"] + if config_file == "": config_file = resource_file("config.yaml") with open( config_file, "rb" @@ -80,7 +82,7 @@ def config_file_gen(target_dir, noise_value, NSB_match, source_name, config_gen) "LST": LST_config, "MAGIC": MAGIC_config, } - if source_name == 'MC': + if source_name == "MC": file_name = f"{target_dir}/v{__version__}/MC/config_DL0_to_DL1.yaml" else: file_name = f"{target_dir}/v{__version__}/{source_name}/config_DL0_to_DL1.yaml" @@ -114,8 +116,6 @@ def lists_and_bash_generator( Focal length to be used to process MCs (e.g., 'nominal') env_name : str Name of the environment - source_name : str - Name of the target source cluster : str Cluster system """ @@ -123,7 +123,7 @@ def lists_and_bash_generator( if MC_path == "": return print(f"running {particle_type} from {MC_path}") - process_name = 'MC' + process_name = "MC" list_of_nodes = glob.glob(f"{MC_path}/node*") dir1 = f"{target_dir}/v{__version__}/MC" @@ -131,17 +131,19 @@ def lists_and_bash_generator( f"{dir1}/logs/list_nodes_{particle_type}_complete.txt", "w" ) as f: # creating list_nodes_gammas_complete.txt for i in list_of_nodes: - out_list=glob.glob(f"{i}/output*\n") - if len(out_list)==0: - logger.error(f'No output file for node {i}, or the directory structure is not the usual one. Skipping...') + out_list = glob.glob(f"{i}/output*\n") + if len(out_list) == 0: + logger.error( + f"No output file for node {i}, or the directory structure is not the usual one. Skipping..." + ) continue - elif len(out_list)==1: + elif len(out_list) == 1: f.write(f"{out_list[0]}\n") else: - output_index = input(f"The available outputs are {out_list}, please provide the array index of the desired one:") + output_index = input( + f"The available outputs are {out_list}, please provide the array index of the desired one:" + ) f.write(f"{out_list[output_index]}\n") - - with open( f"{dir1}/logs/list_folder_{particle_type}.txt", "w" @@ -326,11 +328,14 @@ def directories_generator_real( os.makedirs(f"{target_dir}/v{__version__}/{source_name}/DL1", exist_ok=True) dl1_dir = str(f"{target_dir}/v{__version__}/{source_name}/DL1") else: - + dl1_dir = str(f"{target_dir}/v{__version__}/{source_name}/DL1/Observations") if not os.path.exists(f"{target_dir}/v{__version__}/{source_name}"): - os.makedirs(f"{target_dir}/v{__version__}/{source_name}/DL1/Observations", exist_ok=True) - + os.makedirs( + f"{target_dir}/v{__version__}/{source_name}/DL1/Observations", + exist_ok=True, + ) + else: overwrite = input( f'data directory for {target_dir.split("/")[-1]} already exists. Would you like to overwrite it? [only "y" or "n"]: ' @@ -338,9 +343,10 @@ def directories_generator_real( if overwrite == "y": os.system(f"rm -r {target_dir}/v{__version__}/{source_name}") os.makedirs( - f"{target_dir}/v{__version__}/{source_name}/DL1/Observations", exist_ok=True + f"{target_dir}/v{__version__}/{source_name}/DL1/Observations", + exist_ok=True, ) - + else: print("Directory not modified.") @@ -351,9 +357,9 @@ def directories_generator_real( for magic in [1, 2]: if telescope_ids[magic - 3] > 0: os.makedirs(f"{dl1_dir}/M{magic}/{i[0]}/{i[1]}/logs", exist_ok=True) -def directories_generator_MC( - target_dir, telescope_ids, NSB_match -): + + +def directories_generator_MC(target_dir, telescope_ids, NSB_match): """ Here we create all subdirectories for a given workspace and target name. @@ -368,9 +374,6 @@ def directories_generator_MC( If real data are matched to pre-processed MCs or not """ - - dl1_dir = str(f"{target_dir}/v{__version__}/MC/DL1") - dir_list = [ "gammas", "gammadiffuse", @@ -378,7 +381,7 @@ def directories_generator_MC( "protons", "helium", ] - if not os.path.exists(f'{target_dir}/v{__version__}/MC'): + if not os.path.exists(f"{target_dir}/v{__version__}/MC"): os.makedirs(f"{target_dir}/v{__version__}/MC/logs", exist_ok=True) os.makedirs(f"{target_dir}/v{__version__}/MC/DL1", exist_ok=True) for dir in dir_list: @@ -388,7 +391,7 @@ def directories_generator_MC( ) else: overwrite = input( - f'MC directory already exists. Would you like to overwrite it? [only "y" or "n"]: ' + 'MC directory already exists. Would you like to overwrite it? [only "y" or "n"]: ' ) if overwrite == "y": os.system(f"rm -r {target_dir}/v{__version__}/MC") @@ -400,9 +403,6 @@ def directories_generator_MC( else: print("Directory not modified.") - - - def main(): @@ -464,17 +464,15 @@ def main(): noisebright = 1.15 * pow(nsb, 1.115) biasdim = 0.358 * pow(nsb, 0.805) noise_value = [nsb, noisebright, biasdim] - + if not NSB_match: # Below we run the analysis on the MC data - if (args.analysis_type == "onlyMC") or ( - args.analysis_type == "doEverything" - ): + if (args.analysis_type == "onlyMC") or (args.analysis_type == "doEverything"): directories_generator_MC( str(target_dir), telescope_ids, NSB_match ) # Here we create all the necessary directories in the given workspace and collect the main directory of the target config_file_gen( - target_dir, noise_value, NSB_match, 'MC', config + target_dir, noise_value, NSB_match, "MC", config ) # TODO: fix here to_process = { "gammas": MC_gammas, @@ -490,7 +488,6 @@ def main(): to_process[particle], focal_length, env_name, - cluster, ) @@ -498,8 +495,7 @@ def main(): list_of_MC = glob.glob("linking_MC_*s.sh") # os.system("RES=$(sbatch --parsable linking_MC_gammas_paths.sh) && sbatch --dependency=afterok:$RES MC_dl0_to_dl1.sh") - - + for n, run in enumerate(list_of_MC): if n == 0: launch_jobs_MC = f"linking{n}=$(sbatch --parsable {run}) && running{n}=$(sbatch --parsable --dependency=afterany:$linking{n} {run[0:-3]}_r.sh)" @@ -508,14 +504,12 @@ def main(): os.system(launch_jobs_MC) for source_name in source_list: - MAGIC_runs_and_dates = f"{source_name}_MAGIC_runs.txt" MAGIC_runs = np.genfromtxt( MAGIC_runs_and_dates, dtype=str, delimiter=",", ndmin=2 ) # READ LIST OF DATES AND RUNS: format table where each line is like "2020_11_19,5093174" - # TODO: fix here above print("*** Converting Calibrated into DL1 data ***") print(f"Process name: {source_name}") @@ -531,8 +525,6 @@ def main(): target_dir, noise_value, NSB_match, source_name, config ) # TODO: fix here - - # Below we run the analysis on the MAGIC data if ( (args.analysis_type == "onlyMAGIC") diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py index d8ac118c..7eaac72b 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py @@ -170,9 +170,9 @@ def merge(target_dir, identification, MAGIC_runs, env_name, source, NSB_match, c if os.path.exists(f"{indir}"): outdir = f"{MAGIC_DL1_dir}/Merged/{i[0]}/{i[1]}" os.makedirs(f"{outdir}/logs", exist_ok=True) - #os.system( + # os.system( # f'find {indir} -type f -name "dl1_M{magic}.Run*.h5" -size -3k -delete' - #) + # ) f.write( f"conda run -n {env_name} merge_hdf_files --input-dir {indir} --output-dir {outdir} >{outdir}/logs/merge_M{magic}_{i[0]}_{i[1]}_${{SLURM_JOB_ID}}.log\n" ) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py index a2155ccf..c0e09f7b 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py @@ -51,12 +51,14 @@ def configfile_stereo(target_dir, source_name, config_gen): Path to the working directory source_name : str Name of the target source + config_gen : dict + Dictionary of the entries of the general configuration file """ - config_file = config_gen['general']['base_config_file'] - if config_file=='': + config_file = config_gen["general"]["base_config_file"] + if config_file == "": config_file = resource_file("config.yaml") - + with open( config_file, "rb" ) as fc: # "rb" mode opens the file in binary format for reading @@ -259,19 +261,24 @@ def main(): and not NSB_match ): print("***** Generating the bashscript for MCs...") - for part in ["gammadiffuse", "gammas", "protons", "protons_test", "helium", "electrons"]: + for part in [ + "gammadiffuse", + "gammas", + "protons", + "protons_test", + "helium", + "electrons", + ]: bash_stereoMC(target_dir, part, env_name, source_name, cluster) list_of_stereo_scripts = np.sort(glob.glob("StereoEvents_MC_*.sh")) launch_jobs = "" - - + os.system(launch_jobs) for n, run in enumerate(list_of_stereo_scripts): launch_jobs += ( " && " if n > 0 else "" ) + f"{launch_jobs} && stereo{n}=$(sbatch --parsable {run})" - os.system(launch_jobs) From 6cacb1bc68d289c782f98cd53e443b41c980784d Mon Sep 17 00:00:00 2001 From: Elisa-Visentin Date: Thu, 27 Jun 2024 07:36:19 +0000 Subject: [PATCH 139/236] logging --- .../coincident_events.py | 3 +- .../semi_automatic_scripts/dl1_production.py | 23 +++++++------- .../semi_automatic_scripts/merging_runs.py | 30 +++++++++++-------- .../semi_automatic_scripts/stereo_events.py | 3 +- 4 files changed, 35 insertions(+), 24 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py index 353ce2b9..8a150097 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py @@ -249,6 +249,7 @@ def main(): glob.glob(f"{source_name}_LST_coincident*.sh") ) if len(list_of_coincidence_scripts) < 1: + logger.warning("No bash scripts") continue launch_jobs = "" for n, run in enumerate(list_of_coincidence_scripts): @@ -259,7 +260,7 @@ def main(): os.system(launch_jobs) except OSError as exc: - print(exc) + logger.error(exc) if __name__ == "__main__": diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py index d5a86982..59edd8e0 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py @@ -493,16 +493,19 @@ def main(): # Here we do the MC DL0 to DL1 conversion: list_of_MC = glob.glob("linking_MC_*s.sh") - + if len(list_of_MC) < 1: + logger.warning( + "No bash script has been produced for MC" + ) # os.system("RES=$(sbatch --parsable linking_MC_gammas_paths.sh) && sbatch --dependency=afterok:$RES MC_dl0_to_dl1.sh") + else: + for n, run in enumerate(list_of_MC): + if n == 0: + launch_jobs_MC = f"linking{n}=$(sbatch --parsable {run}) && running{n}=$(sbatch --parsable --dependency=afterany:$linking{n} {run[0:-3]}_r.sh)" + else: + launch_jobs_MC = f"{launch_jobs_MC} && linking{n}=$(sbatch --parsable {run}) && running{n}=$(sbatch --parsable --dependency=afterany:$linking{n} {run[0:-3]}_r.sh)" - for n, run in enumerate(list_of_MC): - if n == 0: - launch_jobs_MC = f"linking{n}=$(sbatch --parsable {run}) && running{n}=$(sbatch --parsable --dependency=afterany:$linking{n} {run[0:-3]}_r.sh)" - else: - launch_jobs_MC = f"{launch_jobs_MC} && linking{n}=$(sbatch --parsable {run}) && running{n}=$(sbatch --parsable --dependency=afterany:$linking{n} {run[0:-3]}_r.sh)" - - os.system(launch_jobs_MC) + os.system(launch_jobs_MC) for source_name in source_list: MAGIC_runs_and_dates = f"{source_name}_MAGIC_runs.txt" @@ -543,8 +546,8 @@ def main(): if (telescope_ids[-2] > 0) or (telescope_ids[-1] > 0): list_of_MAGIC_runs = glob.glob(f"{source_name}_MAGIC-*.sh") if len(list_of_MAGIC_runs) < 1: - print( - "Warning: no bash script has been produced. Please check the provided MAGIC_runs.txt and the MAGIC calibrated data" + logger.warning( + "No bash script has been produced. Please check the provided MAGIC_runs.txt and the MAGIC calibrated data" ) continue diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py index 7eaac72b..b36c44bb 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py @@ -183,7 +183,7 @@ def merge(target_dir, identification, MAGIC_runs, env_name, source, NSB_match, c f.writelines(rc) os.system(f"echo {indir} >> {outdir}/logs/list_dl0.txt") else: - print(f"ERROR: {indir} does not exist") + logger.error(f"{indir} does not exist") elif identification == "1_M1M2": for i in MAGIC_runs: @@ -202,8 +202,8 @@ def merge(target_dir, identification, MAGIC_runs, env_name, source, NSB_match, c f.writelines(rc) os.system(f"echo {indir} >> {outdir}/logs/list_dl0.txt") else: - print( - f"ERROR {MAGIC_DL1_dir}/M1/{i[0]}/{i[1]} or {MAGIC_DL1_dir}/M2/{i[0]}/{i[1]} does not exist" + logger.error( + f"{MAGIC_DL1_dir}/M1/{i[0]}/{i[1]} or {MAGIC_DL1_dir}/M2/{i[0]}/{i[1]} does not exist" ) else: dates = np.unique(MAGIC_runs.T[0]) @@ -366,16 +366,22 @@ def main(): # Below we run the bash scripts to merge the MC files list_of_merging_scripts = np.sort(glob.glob("Merge_MC_*.sh")) + if len(list_of_merging_scripts) < 1: + logger.warning( + "No bash script has been produced for MC" + ) + # TODO: check - for n, run in enumerate(list_of_merging_scripts): - if n == 0: - launch_jobs = f"merging{n}=$(sbatch --parsable {run})" - else: - launch_jobs = ( - f"{launch_jobs} && merging{n}=$(sbatch --parsable {run})" - ) + else: + for n, run in enumerate(list_of_merging_scripts): + if n == 0: + launch_jobs = f"merging{n}=$(sbatch --parsable {run})" + else: + launch_jobs = ( + f"{launch_jobs} && merging{n}=$(sbatch --parsable {run})" + ) - os.system(launch_jobs) + os.system(launch_jobs) # Below we run the analysis on the MAGIC data if ( @@ -419,7 +425,7 @@ def main(): glob.glob(f"{source_name}_Merge_MAGIC_*.sh") ) if len(list_of_merging_scripts) < 1: - logger.warning("no bash scripts") + logger.warning("No bash scripts for real data") continue for n, run in enumerate(list_of_merging_scripts): if n == 0: diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py index c0e09f7b..f959ec9c 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py @@ -273,7 +273,7 @@ def main(): list_of_stereo_scripts = np.sort(glob.glob("StereoEvents_MC_*.sh")) launch_jobs = "" - + #TODO: check on N. bash scripts os.system(launch_jobs) for n, run in enumerate(list_of_stereo_scripts): launch_jobs += ( @@ -296,6 +296,7 @@ def main(): # Below we run the bash scripts to find the stereo events list_of_stereo_scripts = np.sort(glob.glob(f"{source_name}_StereoEvents*.sh")) if len(list_of_stereo_scripts) < 1: + logger.warning("No bash scripts for real data") continue for n, run in enumerate(list_of_stereo_scripts): if n == 0: From 53bd30a000b754f4464136bdef3a4ee1d33c912d Mon Sep 17 00:00:00 2001 From: Elisa-Visentin Date: Thu, 27 Jun 2024 07:57:00 +0000 Subject: [PATCH 140/236] launch jobs --- .../semi_automatic_scripts/dl1_production.py | 25 ++++++++----------- .../semi_automatic_scripts/merging_runs.py | 24 +++++++++--------- .../semi_automatic_scripts/stereo_events.py | 13 ++++++---- 3 files changed, 30 insertions(+), 32 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py index 59edd8e0..7b8c54ef 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py @@ -490,22 +490,17 @@ def main(): env_name, cluster, ) - + list_of_MC = glob.glob(f"linking_MC_{particle}_*.sh") + if len(list_of_MC) < 2: + logger.warning( + f"No bash script has been produced for processing {particle}" + ) + else: + launch_jobs_MC = f"linking=$(sbatch --parsable linking_MC_{particle}_paths.sh) && running=$(sbatch --parsable --dependency=afterany:$linking linking_MC_{particle}_paths_r.sh)" + os.system(launch_jobs_MC) # Here we do the MC DL0 to DL1 conversion: - list_of_MC = glob.glob("linking_MC_*s.sh") - if len(list_of_MC) < 1: - logger.warning( - "No bash script has been produced for MC" - ) - # os.system("RES=$(sbatch --parsable linking_MC_gammas_paths.sh) && sbatch --dependency=afterok:$RES MC_dl0_to_dl1.sh") - else: - for n, run in enumerate(list_of_MC): - if n == 0: - launch_jobs_MC = f"linking{n}=$(sbatch --parsable {run}) && running{n}=$(sbatch --parsable --dependency=afterany:$linking{n} {run[0:-3]}_r.sh)" - else: - launch_jobs_MC = f"{launch_jobs_MC} && linking{n}=$(sbatch --parsable {run}) && running{n}=$(sbatch --parsable --dependency=afterany:$linking{n} {run[0:-3]}_r.sh)" - - os.system(launch_jobs_MC) + + for source_name in source_list: MAGIC_runs_and_dates = f"{source_name}_MAGIC_runs.txt" diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py index b36c44bb..119123ca 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py @@ -373,16 +373,14 @@ def main(): # TODO: check else: + launch_jobs = "" for n, run in enumerate(list_of_merging_scripts): - if n == 0: - launch_jobs = f"merging{n}=$(sbatch --parsable {run})" - else: - launch_jobs = ( - f"{launch_jobs} && merging{n}=$(sbatch --parsable {run})" - ) + launch_jobs += ( + " && " if n > 0 else "" + ) + f"merging{n}=$(sbatch --parsable {run})" os.system(launch_jobs) - + # Below we run the analysis on the MAGIC data if ( (args.analysis_type == "onlyMAGIC") @@ -427,13 +425,15 @@ def main(): if len(list_of_merging_scripts) < 1: logger.warning("No bash scripts for real data") continue + launch_jobs = "" for n, run in enumerate(list_of_merging_scripts): - if n == 0: - launch_jobs = f"merging{n}=$(sbatch --parsable {run})" - else: - launch_jobs = f"{launch_jobs} && merging{n}=$(sbatch --parsable --dependency=afterany:$merging{n-1} {run})" + launch_jobs += ( + " && " if n > 0 else "" + ) + f"merging{n}=$(sbatch --parsable --dependency=afterany:$merging{n-1} {run})" - os.system(launch_jobs) + os.system(launch_jobs) + + print(f"Process name: merging_{source_name}") print( diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py index f959ec9c..7b1bff75 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py @@ -274,7 +274,7 @@ def main(): list_of_stereo_scripts = np.sort(glob.glob("StereoEvents_MC_*.sh")) launch_jobs = "" #TODO: check on N. bash scripts - os.system(launch_jobs) + for n, run in enumerate(list_of_stereo_scripts): launch_jobs += ( " && " if n > 0 else "" @@ -298,13 +298,16 @@ def main(): if len(list_of_stereo_scripts) < 1: logger.warning("No bash scripts for real data") continue + launch_jobs = "" + + for n, run in enumerate(list_of_stereo_scripts): - if n == 0: - launch_jobs = f"stereo{n}=$(sbatch --parsable {run})" - else: - launch_jobs = f"{launch_jobs} && stereo{n}=$(sbatch --parsable {run})" + launch_jobs += ( + " && " if n > 0 else "" + ) + f"{launch_jobs} && stereo{n}=$(sbatch --parsable {run})" os.system(launch_jobs) + if __name__ == "__main__": From 4ba1fc4b81303dbb7a6628d37305eef0acd6ff84 Mon Sep 17 00:00:00 2001 From: Elisa-Visentin Date: Thu, 4 Jul 2024 10:12:13 +0000 Subject: [PATCH 141/236] minor fixes --- .../semi_automatic_scripts/dl1_production.py | 57 ++++++++++--------- 1 file changed, 29 insertions(+), 28 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py index 7b8c54ef..43d0af94 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py @@ -131,7 +131,7 @@ def lists_and_bash_generator( f"{dir1}/logs/list_nodes_{particle_type}_complete.txt", "w" ) as f: # creating list_nodes_gammas_complete.txt for i in list_of_nodes: - out_list = glob.glob(f"{i}/output*\n") + out_list = glob.glob(f"{i}/output*") if len(out_list) == 0: logger.error( f"No output file for node {i}, or the directory structure is not the usual one. Skipping..." @@ -395,6 +395,7 @@ def directories_generator_MC(target_dir, telescope_ids, NSB_match): ) if overwrite == "y": os.system(f"rm -r {target_dir}/v{__version__}/MC") + os.makedirs(f"{target_dir}/v{__version__}/MC/logs", exist_ok=True) for dir in dir_list: os.makedirs( f"{target_dir}/v{__version__}/MC/DL1/{dir}/logs", @@ -499,36 +500,36 @@ def main(): launch_jobs_MC = f"linking=$(sbatch --parsable linking_MC_{particle}_paths.sh) && running=$(sbatch --parsable --dependency=afterany:$linking linking_MC_{particle}_paths_r.sh)" os.system(launch_jobs_MC) # Here we do the MC DL0 to DL1 conversion: - - - for source_name in source_list: - - MAGIC_runs_and_dates = f"{source_name}_MAGIC_runs.txt" - MAGIC_runs = np.genfromtxt( - MAGIC_runs_and_dates, dtype=str, delimiter=",", ndmin=2 - ) # READ LIST OF DATES AND RUNS: format table where each line is like "2020_11_19,5093174" - - # TODO: fix here above - print("*** Converting Calibrated into DL1 data ***") - print(f"Process name: {source_name}") - print( - f"To check the jobs submitted to the cluster, type: squeue -n {source_name}" - ) - print("This process will take about 10 min to run if the IT cluster is free.") - - directories_generator_real( - str(target_dir), telescope_ids, MAGIC_runs, NSB_match, source_name - ) # Here we create all the necessary directories in the given workspace and collect the main directory of the target - config_file_gen( - target_dir, noise_value, NSB_match, source_name, config - ) # TODO: fix here - - # Below we run the analysis on the MAGIC data - if ( + if ( (args.analysis_type == "onlyMAGIC") or (args.analysis_type == "doEverything") or (NSB_match) - ): + ): + + for source_name in source_list: + + MAGIC_runs_and_dates = f"{source_name}_MAGIC_runs.txt" + MAGIC_runs = np.genfromtxt( + MAGIC_runs_and_dates, dtype=str, delimiter=",", ndmin=2 + ) # READ LIST OF DATES AND RUNS: format table where each line is like "2020_11_19,5093174" + + # TODO: fix here above + print("*** Converting Calibrated into DL1 data ***") + print(f"Process name: {source_name}") + print( + f"To check the jobs submitted to the cluster, type: squeue -n {source_name}" + ) + print("This process will take about 10 min to run if the IT cluster is free.") + + directories_generator_real( + str(target_dir), telescope_ids, MAGIC_runs, NSB_match, source_name + ) # Here we create all the necessary directories in the given workspace and collect the main directory of the target + config_file_gen( + target_dir, noise_value, NSB_match, source_name, config + ) # TODO: fix here + + # Below we run the analysis on the MAGIC data + lists_and_bash_gen_MAGIC( target_dir, telescope_ids, From 90ecc444723a02b3d2a1ba82370415a583be7716 Mon Sep 17 00:00:00 2001 From: joannaWojS <167758973+joannaWojS@users.noreply.github.com> Date: Tue, 9 Jul 2024 10:37:52 +0200 Subject: [PATCH 142/236] Update check_MAGIC_runs.py --- .../check_MAGIC_runs.py | 104 +++++++----------- 1 file changed, 39 insertions(+), 65 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/check_MAGIC_runs.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/check_MAGIC_runs.py index 081f3774..773996f0 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/check_MAGIC_runs.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/check_MAGIC_runs.py @@ -1,4 +1,4 @@ -#This script allows us to get information about every MAGIC run ID (and subruns) in files used for common data analysis (MAGIC1, MAGIC2, LST1). +#This script allows us to get information about every MAGIC run ID (and subruns) in files (in a time interval) used for common data analysis (MAGIC1, MAGIC2, LST1). #The MAGIC files that can be used for analysis are located in the IT cluster in the following directory: #/fefs/onsite/common/MAGIC/data/M{tel_id}/event/Calibrated/{YYYY}/{MM}/{DD} @@ -17,81 +17,53 @@ def fix_lists_and_convert(cell): parts = cell.replace('][', ',').strip('[]').split(',') return list(dict.fromkeys(int(item) for item in parts)) -def table_first_last_run(df): - df_selected_data = df.iloc[:, [2, 1, 5, 6, 25]] - df_selected_data.columns = ['DATE','source', 'MAGIC_first_run', 'MAGIC_last_run', 'MAGIC_runs'] +def table_magic_runs(df, date_min, date_max): + df_selected_data = df.iloc[:, [2, 1, 25]] + df_selected_data.columns = ['DATE','source', 'MAGIC_runs'] grouped_data = df_selected_data.groupby(['DATE', 'source']) result_table = [] for (date, source), group in grouped_data: - First_run = group['MAGIC_first_run'].min() - Last_run = group['MAGIC_last_run'].max() - runs_combined = group['MAGIC_runs'].sum() - - result_table.append({ - 'DATE': date, - 'source': source, - 'First run': First_run, - 'Last run': Last_run, - 'MAGIC runs': runs_combined - }) + if (date>=date_min and date<=date_max): + runs_combined = group['MAGIC_runs'].sum() + result_table.append({ + 'DATE': date, + 'source': source, + 'MAGIC runs': runs_combined + }) + result = pd.DataFrame(result_table) result['MAGIC runs'] = result['MAGIC runs'].apply(fix_lists_and_convert) return(result) -def check_run_ID(path, filename, first_run, last_run, date, source, tel_id): - - #We have to be sure that the function counts right filename. - date_obs = filename.split("_")[0] - run = filename.split("_")[2].split(".")[0] - subrun = filename.split("_")[2].split(".")[1] - Y = f'{date_obs}_M{tel_id}_{run}.{subrun}_Y_{source}' - r = f".root" +def existing_files( tel_id, date, source, magic_runs ): - if Y and r in filename: - # Extract run_ids from filename and check range - run_ids = [int(filename.split("_")[2].split(".")[0])] - magic_runs = [] - - for id in run_ids: - if first_run <= id <= last_run: - magic_runs.append(f"{date}\t{source}\t{id}") - return magic_runs - -def check_directory(date, source, first_run, last_run, tel_id): - # In the table date are written as follows: YYYYMMDD, for example '20191123' We need a datetime object. + magic_runs = str(magic_runs) date_obj = datetime.strptime(date, '%Y%m%d') - - # Date in MAGIC convention ( 'LST +1 day') date_obj += timedelta(days=1) new_date = datetime.strftime(date_obj, '%Y%m%d') - YYYY = new_date[:4] MM = new_date[4:6] DD = new_date[6:8] + Y = f"_Y_" - results_count = {} - - path = f"/fefs/onsite/common/MAGIC/data/M{tel_id}/event/Calibrated/{YYYY}/{MM}/{DD}" + path = f"/fefs/onsite/common/MAGIC/data/M{tel_id}/event/Calibrated/{YYYY}/{MM}/{DD}" if os.path.exists(path): files = os.listdir(path) - + count_with_source = 0 + count_with_run_id = 0 + # Counter for files that include the source. + # Counter for files that include the run_id. for filename in files: - if source in filename: - results = check_run_ID(path, filename, first_run, last_run, date, source, tel_id) - #We will see many results becuse a file with a run ID has subruns. - #We must count the same results to get information how many subruns we have. - for result in results: - if result in results_count: - results_count[result] += 1 - else: - results_count[result] = 1 - - for result, count in results_count.items(): - print(f"{result}\t{count}") - + if date and source and Y in filename: + count_with_source += 1 + if magic_runs in filename: + count_with_run_id += 1 + if count_with_source != 0 and count_with_run_id != 0: + print(f"{date}\t{source}\t{magic_runs}\t{count_with_run_id}") + def missing_files( tel_id, date, source, magic_runs ): for runs in magic_runs: @@ -114,7 +86,7 @@ def missing_files( tel_id, date, source, magic_runs ): # Counter for files that include the source. We want to check if any file with the source was found. # Counter for files that include the run_id. We want to check if any file with the run_id was found. for filename in files: - if source in filename: + if date and source and Y in filename: count_with_source += 1 for runs in magic_runs: # run = str(runs) @@ -123,7 +95,7 @@ def missing_files( tel_id, date, source, magic_runs ): if count_with_source == 0: if(tel_id == 1): #Between 2022/09/04 - 2022/12/14 MAGIC 1 had a failure. Therefore we have to skip the range when we want to get information about missing files. - if(date<'20220904' or date>'20221214'): + if(date<='20220904' or date>='20221214'): print(f"No files found containing the source '{source}' on {date}") else: print(f"M1 failure. No files found containing the source '{source}' on {date}.") @@ -136,17 +108,23 @@ def missing_files( tel_id, date, source, magic_runs ): print(f"No such file or directory: {date}") def main(): - + + #TO DO : set time interval- format YYYYMMDD + date_min = '20240601' + date_max = '20240630' + df = pd.read_hdf( '/fefs/aswg/workspace/federico.dipierro/MAGIC_LST1_simultaneous_runs_info/simultaneous_obs_summary.h5', key='str/table') tel_id = [1, 2] - database = table_first_last_run(df) + database = table_magic_runs(df, date_min, date_max) + database_exploded = database.explode('MAGIC runs') + database_exploded_reset = database_exploded.reset_index(drop=True) for tel in tel_id: print(f"MAGIC {tel}") print(f"DATE\tsource\tRun ID\t Subruns") - for index, row in database.iterrows(): - check_directory(row['DATE'], row['source'], row['First run'], row['Last run'], tel) + for index, row in database_exploded_reset.iterrows(): + existing_files(tel, row['DATE'], row['source'], row['MAGIC runs']) print() for index, row in database.iterrows(): missing_files(tel, row['DATE'], row['source'], row['MAGIC runs']) @@ -154,7 +132,3 @@ def main(): if __name__ == "__main__": main() - - - - From 6e2a1dc6373913ab5d9f0a8c4e0530beed7b5b48 Mon Sep 17 00:00:00 2001 From: joannaWojS <167758973+joannaWojS@users.noreply.github.com> Date: Thu, 11 Jul 2024 23:35:29 +0200 Subject: [PATCH 143/236] Update check_MAGIC_runs.py --- .../check_MAGIC_runs.py | 117 +++++++++++++----- 1 file changed, 84 insertions(+), 33 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/check_MAGIC_runs.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/check_MAGIC_runs.py index 773996f0..c13fec10 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/check_MAGIC_runs.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/check_MAGIC_runs.py @@ -1,23 +1,43 @@ -#This script allows us to get information about every MAGIC run ID (and subruns) in files (in a time interval) used for common data analysis (MAGIC1, MAGIC2, LST1). +""" +This script allows to get information about every MAGIC run ID (and subruns) +in files (in a time interval) used for common data analysis (MAGIC1, MAGIC2, LST1). -#The MAGIC files that can be used for analysis are located in the IT cluster in the following directory: -#/fefs/onsite/common/MAGIC/data/M{tel_id}/event/Calibrated/{YYYY}/{MM}/{DD} +The MAGIC files that can be used for analysis are located in the IT cluster +in the following directory: +/fefs/onsite/common/MAGIC/data/M{tel_id}/event/Calibrated/{YYYY}/{MM}/{DD} -#In this path, 'tel_id' refers to the telescope ID, which must be either 1 or 2. 'YYYY', 'MM', and 'DD' specify the date. - -#In the first step, we have to load a dataframe that contains information about the date, the name of the source, and the range of MAGIC #runs. The file in file_path was generated using the spreadsheet "Common MAGIC LST1 data". +In this path, 'tel_id' refers to the telescope ID, which must be either 1 or 2. +'YYYY', 'MM', and 'DD' specify the date. +""" import pandas as pd from datetime import datetime, timedelta import os -import re def fix_lists_and_convert(cell): - # Remove brackets to avoid double lists and split on '][' + """ + An additional function necessary to organize lists in the function table_magic_runs. + The function remove brackets to avoid double lists and split on '][' + """ + parts = cell.replace('][', ',').strip('[]').split(',') return list(dict.fromkeys(int(item) for item in parts)) def table_magic_runs(df, date_min, date_max): + + """ + Data selection from the general table with informations about MAGIC+LST1 observations. + + Parameters: + ----------- + df: table + Dataframe with general information about MAGIC+LST1 observations. + date_min: str + Start of the time interval (in LST convention). + date_max: str + End of the time interval (in LST convention). + """ + df_selected_data = df.iloc[:, [2, 1, 25]] df_selected_data.columns = ['DATE','source', 'MAGIC_runs'] grouped_data = df_selected_data.groupby(['DATE', 'source']) @@ -37,37 +57,64 @@ def table_magic_runs(df, date_min, date_max): result['MAGIC runs'] = result['MAGIC runs'].apply(fix_lists_and_convert) return(result) -def existing_files( tel_id, date, source, magic_runs ): +def existing_files( tel_id, date, source, magic_run ): + + """ + Checking existing files on the IT cluster. + + Parameters: + ----------- + tel_id: int + The telescope ID, which must be either 1 or 2. + date: str + Date (in LST convention). + source: str + Name of the source. + magic_run: int + The MAGIC run from the date and the source. + """ - magic_runs = str(magic_runs) + magic_run = str(magic_run) + date_obj = datetime.strptime(date, '%Y%m%d') date_obj += timedelta(days=1) new_date = datetime.strftime(date_obj, '%Y%m%d') YYYY = new_date[:4] MM = new_date[4:6] DD = new_date[6:8] - Y = f"_Y_" + Y = "_Y_" path = f"/fefs/onsite/common/MAGIC/data/M{tel_id}/event/Calibrated/{YYYY}/{MM}/{DD}" if os.path.exists(path): - files = os.listdir(path) - count_with_source = 0 - count_with_run_id = 0 - # Counter for files that include the source. + files = os.listdir(path) + count_with_run_id = 0 # Counter for files that include the run_id. for filename in files: - if date and source and Y in filename: - count_with_source += 1 - if magic_runs in filename: - count_with_run_id += 1 - if count_with_source != 0 and count_with_run_id != 0: - print(f"{date}\t{source}\t{magic_runs}\t{count_with_run_id}") + if Y in filename: + if new_date in filename: + if source in filename: + if magic_run in filename: + count_with_run_id += 1 + if count_with_run_id != 0: + print(f"{date}\t{source}\t{magic_run}\t{count_with_run_id}") def missing_files( tel_id, date, source, magic_runs ): + + """ + Checking missing files on the IT cluster. - for runs in magic_runs: - run = str(runs) + Parameters: + ----------- + tel_id: int + The telescope ID, which must be either 1 or 2. + date: str + Date (in LST convention). + source: str + Name of the source. + magic_runs: list + List of MAGIC runs from the date and the source. + """ date_obj = datetime.strptime(date, '%Y%m%d') date_obj += timedelta(days=1) @@ -75,7 +122,7 @@ def missing_files( tel_id, date, source, magic_runs ): YYYY = new_date[:4] MM = new_date[4:6] DD = new_date[6:8] - Y = f"_Y_" + Y = "_Y_" path = f"/fefs/onsite/common/MAGIC/data/M{tel_id}/event/Calibrated/{YYYY}/{MM}/{DD}" @@ -86,28 +133,32 @@ def missing_files( tel_id, date, source, magic_runs ): # Counter for files that include the source. We want to check if any file with the source was found. # Counter for files that include the run_id. We want to check if any file with the run_id was found. for filename in files: - if date and source and Y in filename: - count_with_source += 1 - for runs in magic_runs: - # run = str(runs) - if run in filename: - count_with_run_id += 1 + if Y in filename: + if new_date in filename: + if source in filename: + count_with_source += 1 + for run in magic_runs: + run=str(run) + if run in filename: + count_with_run_id += 1 if count_with_source == 0: if(tel_id == 1): #Between 2022/09/04 - 2022/12/14 MAGIC 1 had a failure. Therefore we have to skip the range when we want to get information about missing files. if(date<='20220904' or date>='20221214'): print(f"No files found containing the source '{source}' on {date}") - else: - print(f"M1 failure. No files found containing the source '{source}' on {date}.") if(tel_id == 2): print(f"No files found containing the source '{source}' on {date}") if count_with_source != 0 and count_with_run_id == 0: - if(date<'20220904' or date>'20221214'): + if tel_id == 1 and (date<'20220904' or date>'20221214'): print(f"No run id: {run} found in the {source} on {date}.") + if tel_id == 2: + print(f"No run id: {run} found in the {source} on {date}.") else: print(f"No such file or directory: {date}") def main(): + + """Main function.""" #TO DO : set time interval- format YYYYMMDD date_min = '20240601' From c1f3c77d13b6450202072decc9c76f0475cb39b3 Mon Sep 17 00:00:00 2001 From: Elisa-Visentin Date: Fri, 12 Jul 2024 13:37:05 +0000 Subject: [PATCH 144/236] MC merging out of source loop --- .../semi_automatic_scripts/merging_runs.py | 99 ++++++++++--------- 1 file changed, 50 insertions(+), 49 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py index 119123ca..998d4d97 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py @@ -72,7 +72,7 @@ def cleaning(list_of_nodes): print("Cleaning done.") -def split_train_test(target_dir, train_fraction, source_name): +def split_train_test(target_dir, train_fraction): """ This function splits the MC proton sample in 2, i.e. the "test" and the "train" subsamples, in case you want to make performance studies on MC. For regular analyses, you can/should use the whole MC sample for training. @@ -89,7 +89,7 @@ def split_train_test(target_dir, train_fraction, source_name): Name of the target source """ - proton_dir = f"{target_dir}/{source_name}/DL1/MC/protons" + proton_dir = f"{target_dir}/v{__version__}/MC/DL1/protons" list_of_dir = np.sort(glob.glob(f"{proton_dir}/node*{os.path.sep}")) @@ -224,7 +224,7 @@ def merge(target_dir, identification, MAGIC_runs, env_name, source, NSB_match, c os.system(f"echo {indir} >> {outdir}/logs/list_dl0.txt") -def mergeMC(target_dir, identification, env_name, source_name, cluster): +def mergeMC(target_dir, identification, env_name, cluster): """ This function creates the bash scripts to run merge_hdf_files.py in all MC runs. @@ -243,9 +243,9 @@ def mergeMC(target_dir, identification, env_name, source_name, cluster): Cluster system """ - process_name = f"merging_{source_name}" + process_name = f"merging_MC" - MC_DL1_dir = f"{target_dir}/{source_name}/DL1/MC" + MC_DL1_dir = f"{target_dir}/v{__version__}/MC/DL1" os.makedirs(f"{MC_DL1_dir}/{identification}/Merged", exist_ok=True) if identification == "protons": @@ -331,56 +331,57 @@ def main(): else: source_list.append(source) + if not NSB_match: + if (args.analysis_type == "onlyMC") or ( + args.analysis_type == "doEverything" + ): + # Here we slice the proton MC data into "train" and "test" (but first we check if the directory already exists): + if not os.path.exists( + f"{target_dir}/v{__version__}/MC/DL1/protons_test" + ): + print("***** Splitting protons into 'train' and 'test' datasets...") + split_train_test(target_dir, train_fraction) + + print("***** Generating merge_MC bashscripts...") + mergeMC( + target_dir, "protons", env_name,cluster + ) # generating the bash script to merge the files + mergeMC( + target_dir, "gammadiffuse", env_name, cluster + ) # generating the bash script to merge the files + mergeMC( + target_dir, "gammas", env_name, cluster + ) # generating the bash script to merge the files + mergeMC(target_dir, "protons_test", env_name, cluster) + mergeMC(target_dir, "helium", env_name, cluster) + mergeMC(target_dir, "electrons", env_name, cluster) + + print("***** Running merge_hdf_files.py on the MC data files...") + + # Below we run the bash scripts to merge the MC files + list_of_merging_scripts = np.sort(glob.glob("Merge_MC_*.sh")) + if len(list_of_merging_scripts) < 1: + logger.warning( + "No bash script has been produced for MC" + ) + # TODO: check + + else: + launch_jobs = "" + for n, run in enumerate(list_of_merging_scripts): + launch_jobs += ( + " && " if n > 0 else "" + ) + f"merging{n}=$(sbatch --parsable {run})" + + os.system(launch_jobs) + for source_name in source_list: # Below we run the analysis on the MC data MAGIC_runs_and_dates = f"{source_name}_MAGIC_runs.txt" MAGIC_runs = np.genfromtxt( MAGIC_runs_and_dates, dtype=str, delimiter=",", ndmin=2 ) - if not NSB_match: - if (args.analysis_type == "onlyMC") or ( - args.analysis_type == "doEverything" - ): - # Here we slice the proton MC data into "train" and "test" (but first we check if the directory already exists): - if not os.path.exists( - f"{target_dir}/{source_name}/DL1/MC/protons_test" - ): - print("***** Splitting protons into 'train' and 'test' datasets...") - split_train_test(target_dir, train_fraction, source_name) - - print("***** Generating merge_MC bashscripts...") - mergeMC( - target_dir, "protons", env_name, source_name, cluster - ) # generating the bash script to merge the files - mergeMC( - target_dir, "gammadiffuse", env_name, source_name, cluster - ) # generating the bash script to merge the files - mergeMC( - target_dir, "gammas", env_name, source_name, cluster - ) # generating the bash script to merge the files - mergeMC(target_dir, "protons_test", env_name, source_name, cluster) - mergeMC(target_dir, "helium", env_name, source_name, cluster) - mergeMC(target_dir, "electrons", env_name, source_name, cluster) - - print("***** Running merge_hdf_files.py on the MC data files...") - - # Below we run the bash scripts to merge the MC files - list_of_merging_scripts = np.sort(glob.glob("Merge_MC_*.sh")) - if len(list_of_merging_scripts) < 1: - logger.warning( - "No bash script has been produced for MC" - ) - # TODO: check - - else: - launch_jobs = "" - for n, run in enumerate(list_of_merging_scripts): - launch_jobs += ( - " && " if n > 0 else "" - ) + f"merging{n}=$(sbatch --parsable {run})" - - os.system(launch_jobs) - + # Below we run the analysis on the MAGIC data if ( (args.analysis_type == "onlyMAGIC") From 4707aa649829944f06be2958c7e2cce6858bd63e Mon Sep 17 00:00:00 2001 From: joannaWojS <167758973+joannaWojS@users.noreply.github.com> Date: Fri, 12 Jul 2024 19:45:16 +0200 Subject: [PATCH 145/236] Update check_MAGIC_runs.py --- .../check_MAGIC_runs.py | 77 ++++++++++--------- 1 file changed, 42 insertions(+), 35 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/check_MAGIC_runs.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/check_MAGIC_runs.py index c13fec10..21fa917e 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/check_MAGIC_runs.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/check_MAGIC_runs.py @@ -10,17 +10,23 @@ 'YYYY', 'MM', and 'DD' specify the date. """ -import pandas as pd -from datetime import datetime, timedelta import os +from datetime import datetime, timedelta + +import pandas as pd def fix_lists_and_convert(cell): """ An additional function necessary to organize lists in the function table_magic_runs. - The function remove brackets to avoid double lists and split on '][' + The function remove brackets to avoid double lists and split on "][". + + Parameters: + ---------- + cell: list + Lists of MAGIC runs from the date and the source. """ - parts = cell.replace('][', ',').strip('[]').split(',') + parts = cell.replace("][", ",").strip("[]").split(",") return list(dict.fromkeys(int(item) for item in parts)) def table_magic_runs(df, date_min, date_max): @@ -39,25 +45,23 @@ def table_magic_runs(df, date_min, date_max): """ df_selected_data = df.iloc[:, [2, 1, 25]] - df_selected_data.columns = ['DATE','source', 'MAGIC_runs'] - grouped_data = df_selected_data.groupby(['DATE', 'source']) + df_selected_data.columns = ["DATE","source", "MAGIC_runs"] + grouped_data = df_selected_data.groupby(["DATE", "source"]) result_table = [] for (date, source), group in grouped_data: - if (date>=date_min and date<=date_max): - runs_combined = group['MAGIC_runs'].sum() + if date >= date_min and date <= date_max: + runs_combined = group["MAGIC_runs"].sum() - result_table.append({ - 'DATE': date, - 'source': source, - 'MAGIC runs': runs_combined - }) + result_table.append( + {"DATE": date, "source": source, "MAGIC runs": runs_combined} + ) result = pd.DataFrame(result_table) - result['MAGIC runs'] = result['MAGIC runs'].apply(fix_lists_and_convert) - return(result) + result["MAGIC runs"] = result["MAGIC runs"].apply(fix_lists_and_convert) + return result -def existing_files( tel_id, date, source, magic_run ): +def existing_files(tel_id, date, source, magic_run): """ Checking existing files on the IT cluster. @@ -76,9 +80,9 @@ def existing_files( tel_id, date, source, magic_run ): magic_run = str(magic_run) - date_obj = datetime.strptime(date, '%Y%m%d') + date_obj = datetime.strptime(date, "%Y%m%d") date_obj += timedelta(days=1) - new_date = datetime.strftime(date_obj, '%Y%m%d') + new_date = datetime.strftime(date_obj, "%Y%m%d") YYYY = new_date[:4] MM = new_date[4:6] DD = new_date[6:8] @@ -89,7 +93,7 @@ def existing_files( tel_id, date, source, magic_run ): if os.path.exists(path): files = os.listdir(path) count_with_run_id = 0 - # Counter for files that include the run_id. + # Counter for files that include the run_id. for filename in files: if Y in filename: if new_date in filename: @@ -99,7 +103,7 @@ def existing_files( tel_id, date, source, magic_run ): if count_with_run_id != 0: print(f"{date}\t{source}\t{magic_run}\t{count_with_run_id}") -def missing_files( tel_id, date, source, magic_runs ): +def missing_files(tel_id, date, source, magic_runs): """ Checking missing files on the IT cluster. @@ -116,9 +120,9 @@ def missing_files( tel_id, date, source, magic_runs ): List of MAGIC runs from the date and the source. """ - date_obj = datetime.strptime(date, '%Y%m%d') + date_obj = datetime.strptime(date, "%Y%m%d") date_obj += timedelta(days=1) - new_date = datetime.strftime(date_obj, '%Y%m%d') + new_date = datetime.strftime(date_obj, "%Y%m%d") YYYY = new_date[:4] MM = new_date[4:6] DD = new_date[6:8] @@ -138,21 +142,21 @@ def missing_files( tel_id, date, source, magic_runs ): if source in filename: count_with_source += 1 for run in magic_runs: - run=str(run) + run = str(run) if run in filename: count_with_run_id += 1 if count_with_source == 0: - if(tel_id == 1): + if tel_id == 1: #Between 2022/09/04 - 2022/12/14 MAGIC 1 had a failure. Therefore we have to skip the range when we want to get information about missing files. - if(date<='20220904' or date>='20221214'): + if date <= "20220904" or date >= "20221214": print(f"No files found containing the source '{source}' on {date}") - if(tel_id == 2): + if tel_id == 2: print(f"No files found containing the source '{source}' on {date}") if count_with_source != 0 and count_with_run_id == 0: - if tel_id == 1 and (date<'20220904' or date>'20221214'): + if tel_id == 1 and (date < "20220904" or date > "20221214"): print(f"No run id: {run} found in the {source} on {date}.") if tel_id == 2: - print(f"No run id: {run} found in the {source} on {date}.") + print(f"No run id: {run} found in the {source} on {date}.") else: print(f"No such file or directory: {date}") @@ -160,25 +164,28 @@ def main(): """Main function.""" - #TO DO : set time interval- format YYYYMMDD - date_min = '20240601' - date_max = '20240630' + # TO DO : set time interval - format YYYYMMDD + date_min = "20240601" + date_max = "20240630" - df = pd.read_hdf( '/fefs/aswg/workspace/federico.dipierro/MAGIC_LST1_simultaneous_runs_info/simultaneous_obs_summary.h5', key='str/table') + df = pd.read_hdf( + "/fefs/aswg/workspace/federico.dipierro/MAGIC_LST1_simultaneous_runs_info/simultaneous_obs_summary.h5", + key="str/table" + ) tel_id = [1, 2] database = table_magic_runs(df, date_min, date_max) - database_exploded = database.explode('MAGIC runs') + database_exploded = database.explode("MAGIC runs") database_exploded_reset = database_exploded.reset_index(drop=True) for tel in tel_id: print(f"MAGIC {tel}") print(f"DATE\tsource\tRun ID\t Subruns") for index, row in database_exploded_reset.iterrows(): - existing_files(tel, row['DATE'], row['source'], row['MAGIC runs']) + existing_files(tel, row["DATE"], row["source"], row["MAGIC runs"]) print() for index, row in database.iterrows(): - missing_files(tel, row['DATE'], row['source'], row['MAGIC runs']) + missing_files(tel, row["DATE"], row["source"], row["MAGIC runs"]) print() if __name__ == "__main__": From 4c2383b55784f0534655975d3fb6683dacc598a9 Mon Sep 17 00:00:00 2001 From: Elisa-Visentin Date: Tue, 16 Jul 2024 09:07:32 +0000 Subject: [PATCH 146/236] config file for database paths --- magicctapipe/resources/database_config.yaml | 11 ++++++++ .../database_production/create_LST_table.py | 26 ++++++++++++------- .../database_production/lstchain_version.py | 17 +++++++++--- .../database_production/nsb_level.py | 21 ++++++++++----- .../database_production/nsb_to_h5.py | 19 ++++++++++---- .../semi_automatic_scripts/list_from_h5.py | 24 ++++++++++++----- 6 files changed, 88 insertions(+), 30 deletions(-) create mode 100644 magicctapipe/resources/database_config.yaml diff --git a/magicctapipe/resources/database_config.yaml b/magicctapipe/resources/database_config.yaml new file mode 100644 index 00000000..4ec00138 --- /dev/null +++ b/magicctapipe/resources/database_config.yaml @@ -0,0 +1,11 @@ +database_paths: + input_1: "/fefs/aswg/workspace/federico.dipierro/simultaneous_obs_summary.h5" + input_2: "/home/alessio.berti/MAGIC-LST_common/runfile/simultaneous_obs_summary.h5" + MAGIC: '/fefs/aswg/workspace/joanna.wojtowicz/data/Common_MAGIC_LST1_data_MAGIC_runs_subruns.h5' + LST: "/fefs/aswg/workspace/elisa.visentin/auto_MCP_PR/observations_LST.h5" +database_keys: + input_1: '/str' + input_2: '/str' + MAGIC-I: "MAGIC1/runs_M1" + MAGIC-II: "MAGIC2/runs_M2" + LST: "joint_obs" diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/create_LST_table.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/create_LST_table.py index 5e250e5a..6c0839c2 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/create_LST_table.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/create_LST_table.py @@ -8,20 +8,28 @@ import numpy as np import pandas as pd - +from magicctapipe.io import resource_file def main(): """ Main function """ + config_file = resource_file("database_config.yaml") + + with open( + config_file, "rb" + ) as fc: # "rb" mode opens the file in binary format for reading + config_dict = yaml.safe_load(fc) + out_h5=config_dict['database_paths']['LST'] + out_key=config_dict['database_keys']['LST'] + df = pd.read_hdf( - "/fefs/aswg/workspace/federico.dipierro/simultaneous_obs_summary.h5", key="/str" + config_dict['database_paths']['input_1'], key=config_dict['database_keys']['input_1'] ) # TODO: put this file in a shared folder df2 = pd.read_hdf( - "/home/alessio.berti/MAGIC-LST_common/runfile/simultaneous_obs_summary.h5", - key="/str", + config_dict['database_paths']['input_2'], key=config_dict['database_keys']['input_2'] ) # TODO: put this file in a shared folder df = pd.concat([df, df2]).drop_duplicates(subset="LST1_run", keep="first") needed_cols = [ @@ -44,11 +52,11 @@ def main(): df_cut = df_cut.assign(error_code_stereo=-1) if os.path.isfile( - "/fefs/aswg/workspace/elisa.visentin/auto_MCP_PR/observations_LST.h5" + out_h5 ): df_old = pd.read_hdf( - "/fefs/aswg/workspace/elisa.visentin/auto_MCP_PR/observations_LST.h5", - key="joint_obs", + out_h5, + key=out_key, ) df_cut = pd.concat([df_old, df_cut]).drop_duplicates( subset="LST1_run", keep="first" @@ -57,8 +65,8 @@ def main(): # TODO check if fine with update and nsb df_cut = df_cut.reset_index(drop=True) df_cut.to_hdf( - "/fefs/aswg/workspace/elisa.visentin/auto_MCP_PR/observations_LST.h5", - key="joint_obs", + out_h5, + key=out_key, mode="w", min_itemsize={ "lstchain_versions": 20, diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/lstchain_version.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/lstchain_version.py index b946ff39..4d170ceb 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/lstchain_version.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/lstchain_version.py @@ -7,6 +7,7 @@ import os import pandas as pd +from magicctapipe.io import resource_file lstchain_versions = ["v0.9", "v0.10"] __all__ = ["version_lstchain"] @@ -61,17 +62,25 @@ def main(): """ Main function """ + config_file = resource_file("database_config.yaml") + with open( + config_file, "rb" + ) as fc: # "rb" mode opens the file in binary format for reading + config_dict = yaml.safe_load(fc) + + LST_h5=config_dict['database_paths']['LST'] + LST_key=config_dict['database_keys']['LST'] df_LST = pd.read_hdf( - "/fefs/aswg/workspace/elisa.visentin/auto_MCP_PR/observations_LST.h5", - key="joint_obs", + LST_h5, + key=LST_key ) version_lstchain(df_LST) df_LST.to_hdf( - "/fefs/aswg/workspace/elisa.visentin/auto_MCP_PR/observations_LST.h5", - key="joint_obs", + LST_h5, + key=LST_key, mode="w", min_itemsize={ "lstchain_versions": 20, diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/nsb_level.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/nsb_level.py index c3c45ce4..e471ab45 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/nsb_level.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/nsb_level.py @@ -14,6 +14,7 @@ import numpy as np import pandas as pd import yaml +from magicctapipe.io import resource_file from magicctapipe.scripts.lst1_magic.semi_automatic_scripts.clusters import slurm_lines @@ -98,14 +99,22 @@ def main(): args.config_file, "rb" ) as f: # "rb" mode opens the file in binary format for reading config = yaml.safe_load(f) + config_db = resource_file("database_config.yaml") + with open( + config_db, "rb" + ) as fc: # "rb" mode opens the file in binary format for reading + config_dict = yaml.safe_load(fc) + + LST_h5=config_dict['database_paths']['LST'] + LST_key=config_dict['database_keys']['LST'] env_name = config["general"]["env_name"] cluster = config["general"]["cluster"] df_LST = pd.read_hdf( - "/fefs/aswg/workspace/elisa.visentin/auto_MCP_PR/observations_LST.h5", - key="joint_obs", + LST_h5, + key=LST_key, ) lstchain_v = config["general"]["LST_version"] lstchain_modified = config["general"]["lstchain_modified_config"] @@ -168,8 +177,8 @@ def main(): return print("Update database and launch jobs") df_old = pd.read_hdf( - "/fefs/aswg/workspace/elisa.visentin/auto_MCP_PR/observations_LST.h5", - key="joint_obs", + LST_h5, + key=LST_key, ) df_LST = pd.concat([df_LST, df_old]).drop_duplicates( subset="LST1_run", keep="first" @@ -183,8 +192,8 @@ def main(): os.system(launch_jobs) df_LST.to_hdf( - "/fefs/aswg/workspace/elisa.visentin/auto_MCP_PR/observations_LST.h5", - key="joint_obs", + LST_h5, + key=LST_key, mode="w", min_itemsize={ "lstchain_versions": 20, diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/nsb_to_h5.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/nsb_to_h5.py index b5f87b6c..16772698 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/nsb_to_h5.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/nsb_to_h5.py @@ -9,6 +9,7 @@ import numpy as np import pandas as pd +from magicctapipe.io import resource_file __all__ = ["collect_nsb"] @@ -43,7 +44,7 @@ def collect_nsb(df_LST): df_LST.loc[run, "nsb"] = float(nsb) df_LST = df_LST.reset_index() - return df_LST + return df_LSTreading/ def main(): @@ -51,10 +52,18 @@ def main(): """ Main function """ + config_file = resource_file("database_config.yaml") + with open( + config_file, "rb" + ) as fc: # "rb" mode opens the file in binary format for reading + config_dict = yaml.safe_load(fc) + + LST_h5=config_dict['database_paths']['LST'] + LST_key=config_dict['database_keys']['LST'] df_LST = pd.read_hdf( - "/fefs/aswg/workspace/elisa.visentin/auto_MCP_PR/observations_LST.h5", - key="joint_obs", + LST_h5, + key=LST_key, ) df_new = collect_nsb(df_LST) @@ -84,8 +93,8 @@ def main(): ] ] df_new.to_hdf( - "/fefs/aswg/workspace/elisa.visentin/auto_MCP_PR/observations_LST.h5", - key="joint_obs", + LST_h5, + key=LST_key, mode="w", ) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/list_from_h5.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/list_from_h5.py index 69fae60e..82e6c0fb 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/list_from_h5.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/list_from_h5.py @@ -10,6 +10,7 @@ import numpy as np import pandas as pd import yaml +from magicctapipe.io import resource_file def split_lst_date(df): @@ -213,7 +214,18 @@ def main(): args.config_file, "rb" ) as f: # "rb" mode opens the file in binary format for reading config = yaml.safe_load(f) + config_db = resource_file("database_config.yaml") + with open( + config_db, "rb" + ) as fc: # "rb" mode opens the file in binary format for reading + config_dict = yaml.safe_load(fc) + + LST_h5=config_dict['database_paths']['LST'] + LST_key=config_dict['database_keys']['LST'] + MAGIC_h5=config_dict['database_paths']['MAGIC'] + MAGIC1_key=config_dict['database_keys']['MAGIC-I'] + MAGIC2_key=config_dict['database_keys']['MAGIC-II'] source_in = config["data_selection"]["source_name_database"] source_out = config["data_selection"]["source_name_output"] range = config["data_selection"]["time_range"] @@ -221,8 +233,8 @@ def main(): skip_MAGIC = config["data_selection"]["skip_MAGIC_runs"] df_LST = pd.read_hdf( - "/fefs/aswg/workspace/elisa.visentin/auto_MCP_PR/observations_LST.h5", - key="joint_obs", + LST_h5, + key=LST_key, ) # TODO: put this file in a shared folder df_LST.dropna(subset=["LST1_run"], inplace=True) df_LST = split_lst_date(df_LST) @@ -267,12 +279,12 @@ def main(): df_LST = df_LST.reset_index() df_LST = df_LST.drop("index", axis=1) df_MAGIC1 = pd.read_hdf( - "/fefs/aswg/workspace/joanna.wojtowicz/Common_MAGIC_LST1_data_MAGIC_RUNS.h5", - key="MAGIC1/runs_M1", + MAGIC_h5, + key=MAGIC1_key, ) df_MAGIC2 = pd.read_hdf( - "/fefs/aswg/workspace/joanna.wojtowicz/Common_MAGIC_LST1_data_MAGIC_RUNS.h5", - key="MAGIC2/runs_M2", + MAGIC_h5, + key=MAGIC2_key, ) df_MAGIC1["Source"] = df_MAGIC1["Source"].str.replace(" ", "") df_MAGIC2["Source"] = df_MAGIC2["Source"].str.replace(" ", "") From 685137a2ded056152aca909377c82d0e84b2c595 Mon Sep 17 00:00:00 2001 From: Elisa-Visentin Date: Wed, 17 Jul 2024 08:53:10 +0000 Subject: [PATCH 147/236] add dates option for update --- .../database_production/create_LST_table.py | 26 +++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/create_LST_table.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/create_LST_table.py index 6c0839c2..85f78875 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/create_LST_table.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/create_LST_table.py @@ -15,6 +15,27 @@ def main(): """ Main function """ + + parser = argparse.ArgumentParser() + + parser.add_argument( + "--begin-date", + "-b", + dest="begin", + type=int, + default=0, + help="First date to update database (YYYYMMDD)", + ) + parser.add_argument( + "--end-date", + "-e", + dest="end", + type=int, + default=0, + help="End date to update database (YYYYMMDD)", + ) + + args = parser.parse_args() config_file = resource_file("database_config.yaml") with open( @@ -32,6 +53,11 @@ def main(): config_dict['database_paths']['input_2'], key=config_dict['database_keys']['input_2'] ) # TODO: put this file in a shared folder df = pd.concat([df, df2]).drop_duplicates(subset="LST1_run", keep="first") + if args.begin != 0: + df = df[df["DATE"] >= args.begin] + if args.end != 0: + df = df[df["DATE"] <= args.end] + needed_cols = [ "source", "DATE", From e51cd417b210a48ebab126d78283c03f16793c27 Mon Sep 17 00:00:00 2001 From: Elisa-Visentin Date: Wed, 17 Jul 2024 09:02:12 +0000 Subject: [PATCH 148/236] fix list from h5 to new database formats --- magicctapipe/resources/database_config.yaml | 2 +- .../semi_automatic_scripts/list_from_h5.py | 16 ++++++++-------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/magicctapipe/resources/database_config.yaml b/magicctapipe/resources/database_config.yaml index 4ec00138..d7aac09e 100644 --- a/magicctapipe/resources/database_config.yaml +++ b/magicctapipe/resources/database_config.yaml @@ -1,5 +1,5 @@ database_paths: - input_1: "/fefs/aswg/workspace/federico.dipierro/simultaneous_obs_summary.h5" + input_1: "/fefs/aswg/workspace/federico.dipierro/simultaneous_obs_summary-apr2024.h5" input_2: "/home/alessio.berti/MAGIC-LST_common/runfile/simultaneous_obs_summary.h5" MAGIC: '/fefs/aswg/workspace/joanna.wojtowicz/data/Common_MAGIC_LST1_data_MAGIC_runs_subruns.h5' LST: "/fefs/aswg/workspace/elisa.visentin/auto_MCP_PR/observations_LST.h5" diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/list_from_h5.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/list_from_h5.py index 82e6c0fb..2a44c68b 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/list_from_h5.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/list_from_h5.py @@ -53,7 +53,7 @@ def magic_date(df): The input dataframe with an added column. """ - date_lst = pd.to_datetime(df["Date (LST convention)"], format="%Y%m%d") + date_lst = pd.to_datetime(df["DATE"], format="%Y%m%d") delta = pd.Timedelta("1 day") date_magic = date_lst + delta date_magic = date_magic.dt.strftime("%Y%m%d") @@ -83,8 +83,8 @@ def clear_files(source_in, source_out, df_LST, df_MAGIC1, df_MAGIC2): source_list = [] if source_in is None: source_list = np.intersect1d( - np.intersect1d(np.unique(df_LST["source"]), np.unique(df_MAGIC1["Source"])), - np.unique(df_MAGIC2["Source"]), + np.intersect1d(np.unique(df_LST["source"]), np.unique(df_MAGIC1["source"])), + np.unique(df_MAGIC2["source"]), ) else: source_list.append(source_out) @@ -286,14 +286,14 @@ def main(): MAGIC_h5, key=MAGIC2_key, ) - df_MAGIC1["Source"] = df_MAGIC1["Source"].str.replace(" ", "") - df_MAGIC2["Source"] = df_MAGIC2["Source"].str.replace(" ", "") + #df_MAGIC1["Source"] = df_MAGIC1["Source"].str.replace(" ", "") + #df_MAGIC2["Source"] = df_MAGIC2["Source"].str.replace(" ", "") list_date_LST = np.unique(df_LST["date_LST"]) list_date_LST_low = [int(sub.replace("-", "")) for sub in list_date_LST] - df_MAGIC1 = df_MAGIC1[df_MAGIC1["Date (LST convention)"].isin(list_date_LST_low)] - df_MAGIC2 = df_MAGIC2[df_MAGIC2["Date (LST convention)"].isin(list_date_LST_low)] + df_MAGIC1 = df_MAGIC1[df_MAGIC1["DATE"].isin(list_date_LST_low)] + df_MAGIC2 = df_MAGIC2[df_MAGIC2["DATE"].isin(list_date_LST_low)] clear_files(source_in, source_out, df_LST, df_MAGIC1, df_MAGIC2) @@ -301,7 +301,7 @@ def main(): df_MAGIC2 = magic_date(df_MAGIC2) df_MAGIC1 = magic_date(df_MAGIC1) - df_MAGIC2 = df_MAGIC2.rename(columns={"Source": "source"}) + #df_MAGIC2 = df_MAGIC2.rename(columns={"Source": "source"}) M1_runs = df_MAGIC1["Run ID"].tolist() if (len(M1_runs) == 0) or (len(df_MAGIC2) == 0): From ad61e3a75a4b4933ad30472b281b357db6bb4648 Mon Sep 17 00:00:00 2001 From: Elisa-Visentin <121040436+Elisa-Visentin@users.noreply.github.com> Date: Wed, 17 Jul 2024 11:21:07 +0200 Subject: [PATCH 149/236] Update merge_stereo.py --- .../semi_automatic_scripts/merge_stereo.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merge_stereo.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merge_stereo.py index de8ceadd..c4c2f32e 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merge_stereo.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merge_stereo.py @@ -19,7 +19,7 @@ def MergeStereo(target_dir, env_name, source, NSB_match, cluster): """ - This function creates the bash scripts to run merge_hdf_files.py in all DL2 subruns. + This function creates the bash scripts to run merge_hdf_files.py in all DL1Stereo subruns. Parameters ---------- @@ -36,13 +36,14 @@ def MergeStereo(target_dir, env_name, source, NSB_match, cluster): listOfNightsLST = np.sort(glob.glob(f"{stereo_DL1_dir}/DL1Stereo/*")) if cluster == 'SLURM': for nightLST in listOfNightsLST: + night = nightLST.split('/')[-1] stereoMergeDir = ( - f"{stereo_DL1_dir}/DL1Stereo/{nightLST.split('/')[-1]}/Merged" + f"{stereo_DL1_dir}/DL1Stereo/{night}/Merged" ) os.makedirs(f"{stereoMergeDir}/logs", exist_ok=True) if not os.listdir(f"{nightLST}"): continue - if len(os.listdir(nightLST)) < 3: + if len(glob.glob("f{nightLST}/dl1_stereo*.h5") < 1: continue @@ -58,12 +59,12 @@ def MergeStereo(target_dir, env_name, source, NSB_match, cluster): lines = ( slurm + [ - f"conda run -n {env_name} merge_hdf_files --input-dir {nightLST} --output-dir {stereoMergeDir} --run-wise >{stereoMergeDir}/logs/merge_{nightLST.split('/')[-1]}_${{SLURM_JOB_ID}}.log\n" + f"conda run -n {env_name} merge_hdf_files --input-dir {nightLST} --output-dir {stereoMergeDir} --run-wise >{stereoMergeDir}/logs/merge_{night}_${{SLURM_JOB_ID}}.log\n" ] + rc ) - with open(f"{source}_StereoMerge_{nightLST.split('/')[-1]}.sh", "w") as f: + with open(f"{source}_StereoMerge_{night}.sh", "w") as f: f.writelines(lines) else: logger.warning('Automatic processing not implemented for the cluster indicated in the config file') @@ -111,7 +112,7 @@ def main(): source_list.append(source) for source_name in source_list: - print("***** Merging DL2 files run-wise...") + print("***** Merging DL1Stereo files run-wise...") MergeStereo(target_dir, env_name, source, NSB_match, cluster) list_of_merge = glob.glob(f"{source_name}_StereoMerge_*.sh") From ec0ab01ab0935b4841328af138786843d647dd75 Mon Sep 17 00:00:00 2001 From: Elisa-Visentin Date: Wed, 17 Jul 2024 09:34:15 +0000 Subject: [PATCH 150/236] bugs, imports, pre-commit --- .../database_production/create_LST_table.py | 20 ++++++----- .../database_production/lstchain_version.py | 11 +++---- .../database_production/nsb_level.py | 6 ++-- .../database_production/nsb_to_h5.py | 8 +++-- .../semi_automatic_scripts/dl1_production.py | 16 +++++---- .../semi_automatic_scripts/list_from_h5.py | 17 +++++----- .../semi_automatic_scripts/merging_runs.py | 33 +++++++------------ .../semi_automatic_scripts/stereo_events.py | 8 ++--- 8 files changed, 57 insertions(+), 62 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/create_LST_table.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/create_LST_table.py index 85f78875..b305d52d 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/create_LST_table.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/create_LST_table.py @@ -4,12 +4,16 @@ Only the columns needed to produce the lists of LST runs to be processed are preserved, and two columns are added to store NSB level and error codes """ +import argparse import os import numpy as np import pandas as pd +import yaml + from magicctapipe.io import resource_file + def main(): """ @@ -43,14 +47,16 @@ def main(): ) as fc: # "rb" mode opens the file in binary format for reading config_dict = yaml.safe_load(fc) - out_h5=config_dict['database_paths']['LST'] - out_key=config_dict['database_keys']['LST'] - + out_h5 = config_dict["database_paths"]["LST"] + out_key = config_dict["database_keys"]["LST"] + df = pd.read_hdf( - config_dict['database_paths']['input_1'], key=config_dict['database_keys']['input_1'] + config_dict["database_paths"]["input_1"], + key=config_dict["database_keys"]["input_1"], ) # TODO: put this file in a shared folder df2 = pd.read_hdf( - config_dict['database_paths']['input_2'], key=config_dict['database_keys']['input_2'] + config_dict["database_paths"]["input_2"], + key=config_dict["database_keys"]["input_2"], ) # TODO: put this file in a shared folder df = pd.concat([df, df2]).drop_duplicates(subset="LST1_run", keep="first") if args.begin != 0: @@ -77,9 +83,7 @@ def main(): df_cut = df_cut.assign(error_code_coincidence=-1) df_cut = df_cut.assign(error_code_stereo=-1) - if os.path.isfile( - out_h5 - ): + if os.path.isfile(out_h5): df_old = pd.read_hdf( out_h5, key=out_key, diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/lstchain_version.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/lstchain_version.py index 4d170ceb..677ba00a 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/lstchain_version.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/lstchain_version.py @@ -7,6 +7,8 @@ import os import pandas as pd +import yaml + from magicctapipe.io import resource_file lstchain_versions = ["v0.9", "v0.10"] @@ -69,12 +71,9 @@ def main(): ) as fc: # "rb" mode opens the file in binary format for reading config_dict = yaml.safe_load(fc) - LST_h5=config_dict['database_paths']['LST'] - LST_key=config_dict['database_keys']['LST'] - df_LST = pd.read_hdf( - LST_h5, - key=LST_key - ) + LST_h5 = config_dict["database_paths"]["LST"] + LST_key = config_dict["database_keys"]["LST"] + df_LST = pd.read_hdf(LST_h5, key=LST_key) version_lstchain(df_LST) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/nsb_level.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/nsb_level.py index e471ab45..be292433 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/nsb_level.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/nsb_level.py @@ -14,8 +14,8 @@ import numpy as np import pandas as pd import yaml -from magicctapipe.io import resource_file +from magicctapipe.io import resource_file from magicctapipe.scripts.lst1_magic.semi_automatic_scripts.clusters import slurm_lines from .lstchain_version import lstchain_versions @@ -106,8 +106,8 @@ def main(): ) as fc: # "rb" mode opens the file in binary format for reading config_dict = yaml.safe_load(fc) - LST_h5=config_dict['database_paths']['LST'] - LST_key=config_dict['database_keys']['LST'] + LST_h5 = config_dict["database_paths"]["LST"] + LST_key = config_dict["database_keys"]["LST"] env_name = config["general"]["env_name"] cluster = config["general"]["cluster"] diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/nsb_to_h5.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/nsb_to_h5.py index 16772698..df788cd6 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/nsb_to_h5.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/nsb_to_h5.py @@ -9,6 +9,8 @@ import numpy as np import pandas as pd +import yaml + from magicctapipe.io import resource_file __all__ = ["collect_nsb"] @@ -44,7 +46,7 @@ def collect_nsb(df_LST): df_LST.loc[run, "nsb"] = float(nsb) df_LST = df_LST.reset_index() - return df_LSTreading/ + return df_LST def main(): @@ -59,8 +61,8 @@ def main(): ) as fc: # "rb" mode opens the file in binary format for reading config_dict = yaml.safe_load(fc) - LST_h5=config_dict['database_paths']['LST'] - LST_key=config_dict['database_keys']['LST'] + LST_h5 = config_dict["database_paths"]["LST"] + LST_key = config_dict["database_keys"]["LST"] df_LST = pd.read_hdf( LST_h5, key=LST_key, diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py index 43d0af94..163e1aec 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py @@ -501,11 +501,11 @@ def main(): os.system(launch_jobs_MC) # Here we do the MC DL0 to DL1 conversion: if ( - (args.analysis_type == "onlyMAGIC") - or (args.analysis_type == "doEverything") - or (NSB_match) - ): - + (args.analysis_type == "onlyMAGIC") + or (args.analysis_type == "doEverything") + or (NSB_match) + ): + for source_name in source_list: MAGIC_runs_and_dates = f"{source_name}_MAGIC_runs.txt" @@ -519,7 +519,9 @@ def main(): print( f"To check the jobs submitted to the cluster, type: squeue -n {source_name}" ) - print("This process will take about 10 min to run if the IT cluster is free.") + print( + "This process will take about 10 min to run if the IT cluster is free." + ) directories_generator_real( str(target_dir), telescope_ids, MAGIC_runs, NSB_match, source_name @@ -529,7 +531,7 @@ def main(): ) # TODO: fix here # Below we run the analysis on the MAGIC data - + lists_and_bash_gen_MAGIC( target_dir, telescope_ids, diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/list_from_h5.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/list_from_h5.py index 2a44c68b..e38a45e0 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/list_from_h5.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/list_from_h5.py @@ -10,6 +10,7 @@ import numpy as np import pandas as pd import yaml + from magicctapipe.io import resource_file @@ -221,11 +222,11 @@ def main(): ) as fc: # "rb" mode opens the file in binary format for reading config_dict = yaml.safe_load(fc) - LST_h5=config_dict['database_paths']['LST'] - LST_key=config_dict['database_keys']['LST'] - MAGIC_h5=config_dict['database_paths']['MAGIC'] - MAGIC1_key=config_dict['database_keys']['MAGIC-I'] - MAGIC2_key=config_dict['database_keys']['MAGIC-II'] + LST_h5 = config_dict["database_paths"]["LST"] + LST_key = config_dict["database_keys"]["LST"] + MAGIC_h5 = config_dict["database_paths"]["MAGIC"] + MAGIC1_key = config_dict["database_keys"]["MAGIC-I"] + MAGIC2_key = config_dict["database_keys"]["MAGIC-II"] source_in = config["data_selection"]["source_name_database"] source_out = config["data_selection"]["source_name_output"] range = config["data_selection"]["time_range"] @@ -286,8 +287,8 @@ def main(): MAGIC_h5, key=MAGIC2_key, ) - #df_MAGIC1["Source"] = df_MAGIC1["Source"].str.replace(" ", "") - #df_MAGIC2["Source"] = df_MAGIC2["Source"].str.replace(" ", "") + # df_MAGIC1["Source"] = df_MAGIC1["Source"].str.replace(" ", "") + # df_MAGIC2["Source"] = df_MAGIC2["Source"].str.replace(" ", "") list_date_LST = np.unique(df_LST["date_LST"]) list_date_LST_low = [int(sub.replace("-", "")) for sub in list_date_LST] @@ -301,7 +302,7 @@ def main(): df_MAGIC2 = magic_date(df_MAGIC2) df_MAGIC1 = magic_date(df_MAGIC1) - #df_MAGIC2 = df_MAGIC2.rename(columns={"Source": "source"}) + # df_MAGIC2 = df_MAGIC2.rename(columns={"Source": "source"}) M1_runs = df_MAGIC1["Run ID"].tolist() if (len(M1_runs) == 0) or (len(df_MAGIC2) == 0): diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py index 998d4d97..fdb4a960 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py @@ -85,8 +85,6 @@ def split_train_test(target_dir, train_fraction): Path to the working directory train_fraction : float Fraction of proton MC files to be used in the training RF dataset - source_name : str - Name of the target source """ proton_dir = f"{target_dir}/v{__version__}/MC/DL1/protons" @@ -237,13 +235,11 @@ def mergeMC(target_dir, identification, env_name, cluster): Tells which batch to create. Options: protons, gammadiffuse env_name : str Name of the environment - source_name : str - Name of the target source cluster : str Cluster system """ - process_name = f"merging_MC" + process_name = "merging_MC" MC_DL1_dir = f"{target_dir}/v{__version__}/MC/DL1" os.makedirs(f"{MC_DL1_dir}/{identification}/Merged", exist_ok=True) @@ -332,19 +328,15 @@ def main(): else: source_list.append(source) if not NSB_match: - if (args.analysis_type == "onlyMC") or ( - args.analysis_type == "doEverything" - ): + if (args.analysis_type == "onlyMC") or (args.analysis_type == "doEverything"): # Here we slice the proton MC data into "train" and "test" (but first we check if the directory already exists): - if not os.path.exists( - f"{target_dir}/v{__version__}/MC/DL1/protons_test" - ): + if not os.path.exists(f"{target_dir}/v{__version__}/MC/DL1/protons_test"): print("***** Splitting protons into 'train' and 'test' datasets...") split_train_test(target_dir, train_fraction) print("***** Generating merge_MC bashscripts...") mergeMC( - target_dir, "protons", env_name,cluster + target_dir, "protons", env_name, cluster ) # generating the bash script to merge the files mergeMC( target_dir, "gammadiffuse", env_name, cluster @@ -361,9 +353,7 @@ def main(): # Below we run the bash scripts to merge the MC files list_of_merging_scripts = np.sort(glob.glob("Merge_MC_*.sh")) if len(list_of_merging_scripts) < 1: - logger.warning( - "No bash script has been produced for MC" - ) + logger.warning("No bash script has been produced for MC") # TODO: check else: @@ -374,14 +364,14 @@ def main(): ) + f"merging{n}=$(sbatch --parsable {run})" os.system(launch_jobs) - + for source_name in source_list: # Below we run the analysis on the MC data MAGIC_runs_and_dates = f"{source_name}_MAGIC_runs.txt" MAGIC_runs = np.genfromtxt( MAGIC_runs_and_dates, dtype=str, delimiter=",", ndmin=2 ) - + # Below we run the analysis on the MAGIC data if ( (args.analysis_type == "onlyMAGIC") @@ -429,12 +419,11 @@ def main(): launch_jobs = "" for n, run in enumerate(list_of_merging_scripts): launch_jobs += ( - " && " if n > 0 else "" - ) + f"merging{n}=$(sbatch --parsable --dependency=afterany:$merging{n-1} {run})" + (" && " if n > 0 else "") + + f"merging{n}=$(sbatch --parsable --dependency=afterany:$merging{n-1} {run})" + ) - os.system(launch_jobs) - - + os.system(launch_jobs) print(f"Process name: merging_{source_name}") print( diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py index 7b1bff75..d982dcae 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py @@ -273,8 +273,8 @@ def main(): list_of_stereo_scripts = np.sort(glob.glob("StereoEvents_MC_*.sh")) launch_jobs = "" - #TODO: check on N. bash scripts - + # TODO: check on N. bash scripts + for n, run in enumerate(list_of_stereo_scripts): launch_jobs += ( " && " if n > 0 else "" @@ -299,15 +299,13 @@ def main(): logger.warning("No bash scripts for real data") continue launch_jobs = "" - - + for n, run in enumerate(list_of_stereo_scripts): launch_jobs += ( " && " if n > 0 else "" ) + f"{launch_jobs} && stereo{n}=$(sbatch --parsable {run})" os.system(launch_jobs) - if __name__ == "__main__": From 75859eb4d5eac162916a0273263978dbeb6585c6 Mon Sep 17 00:00:00 2001 From: Elisa-Visentin Date: Wed, 17 Jul 2024 13:35:08 +0000 Subject: [PATCH 151/236] MC out of source --- .../semi_automatic_scripts/dl1_production.py | 14 ++-- .../semi_automatic_scripts/stereo_events.py | 75 ++++++++++--------- 2 files changed, 48 insertions(+), 41 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py index 163e1aec..f1b08a76 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py @@ -500,13 +500,13 @@ def main(): launch_jobs_MC = f"linking=$(sbatch --parsable linking_MC_{particle}_paths.sh) && running=$(sbatch --parsable --dependency=afterany:$linking linking_MC_{particle}_paths_r.sh)" os.system(launch_jobs_MC) # Here we do the MC DL0 to DL1 conversion: - if ( - (args.analysis_type == "onlyMAGIC") - or (args.analysis_type == "doEverything") - or (NSB_match) - ): - - for source_name in source_list: + + for source_name in source_list: + if ( + (args.analysis_type == "onlyMAGIC") + or (args.analysis_type == "doEverything") + or (NSB_match) + ): MAGIC_runs_and_dates = f"{source_name}_MAGIC_runs.txt" MAGIC_runs = np.genfromtxt( diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py index d982dcae..885924ce 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py @@ -67,8 +67,10 @@ def configfile_stereo(target_dir, source_name, config_gen): "mc_tel_ids": config_gen["mc_tel_ids"], "stereo_reco": config_dict["stereo_reco"], } - - file_name = f"{target_dir}/v{__version__}/{source_name}/config_stereo.yaml" + if source_name == "MC": + file_name = f"{target_dir}/v{__version__}/MC/config_stereo.yaml" + else: + file_name = f"{target_dir}/v{__version__}/{source_name}/config_stereo.yaml" with open(file_name, "w") as f: yaml.dump(conf, f, default_flow_style=False) @@ -150,7 +152,7 @@ def bash_stereo(target_dir, source, env_name, NSB_match, cluster): f.writelines(lines) -def bash_stereoMC(target_dir, identification, env_name, source, cluster): +def bash_stereoMC(target_dir, identification, env_name, cluster): """ This function generates the bashscript for running the stereo analysis. @@ -162,16 +164,14 @@ def bash_stereoMC(target_dir, identification, env_name, source, cluster): identification : str Particle name. Options: protons, gammadiffuse, gammas, protons_test env_name : str - Name of the environment - source : str - Name of the target source + Name of the environment cluster : str Cluster system """ process_name = source - inputdir = f"{target_dir}/{source}/DL1/MC/{identification}/Merged" + inputdir = f"{target_dir}/v{__version__}/MC/DL1/{identification}/Merged" os.makedirs(f"{inputdir}/StereoMerged", exist_ok=True) os.system( @@ -198,7 +198,7 @@ def bash_stereoMC(target_dir, identification, env_name, source, cluster): "SAMPLE_LIST=($(<$INPUTDIR/list_coin.txt))\n", "SAMPLE=${SAMPLE_LIST[${SLURM_ARRAY_TASK_ID}]}\n", "export LOG=$OUTPUTDIR/stereo_${SLURM_ARRAY_JOB_ID}_${SLURM_ARRAY_TASK_ID}.log\n", - f"conda run -n {env_name} lst1_magic_stereo_reco --input-file $SAMPLE --output-dir $OUTPUTDIR --config-file {target_dir}/{source}/config_stereo.yaml >$LOG 2>&1", + f"conda run -n {env_name} lst1_magic_stereo_reco --input-file $SAMPLE --output-dir $OUTPUTDIR --config-file {target_dir}/v{__version__}/MC/config_stereo.yaml >$LOG 2>&1", ] f.writelines(lines) @@ -249,17 +249,13 @@ def main(): source_list = joblib.load("list_sources.dat") else: source_list = [source] - for source_name in source_list: - - print("***** Generating file config_stereo.yaml...") - configfile_stereo(target_dir, source_name, config) - - # Below we run the analysis on the MC data + if not NSB_match: if ( (args.analysis_type == "onlyMC") or (args.analysis_type == "doEverything") and not NSB_match ): + configfile_stereo(target_dir, 'MC', config) print("***** Generating the bashscript for MCs...") for part in [ "gammadiffuse", @@ -269,7 +265,7 @@ def main(): "helium", "electrons", ]: - bash_stereoMC(target_dir, part, env_name, source_name, cluster) + bash_stereoMC(target_dir, part, env_name, cluster) list_of_stereo_scripts = np.sort(glob.glob("StereoEvents_MC_*.sh")) launch_jobs = "" @@ -281,31 +277,42 @@ def main(): ) + f"{launch_jobs} && stereo{n}=$(sbatch --parsable {run})" os.system(launch_jobs) + for source_name in source_list: + if ( + (args.analysis_type == "onlyMAGIC") + or (args.analysis_type == "doEverything") + or (NSB_match) + ): + print("***** Generating file config_stereo.yaml...") + configfile_stereo(target_dir, source_name, config) - # Below we run the analysis on the real data + + - print("***** Generating the bashscript...") - bash_stereo(target_dir, source_name, env_name, NSB_match, cluster) + # Below we run the analysis on the real data - print("***** Submitting processess to the cluster...") - print(f"Process name: {source_name}_stereo") - print( - f"To check the jobs submitted to the cluster, type: squeue -n {source_name}_stereo" - ) + print("***** Generating the bashscript...") + bash_stereo(target_dir, source_name, env_name, NSB_match, cluster) - # Below we run the bash scripts to find the stereo events - list_of_stereo_scripts = np.sort(glob.glob(f"{source_name}_StereoEvents*.sh")) - if len(list_of_stereo_scripts) < 1: - logger.warning("No bash scripts for real data") - continue - launch_jobs = "" + print("***** Submitting processess to the cluster...") + print(f"Process name: {source_name}_stereo") + print( + f"To check the jobs submitted to the cluster, type: squeue -n {source_name}_stereo" + ) + + # Below we run the bash scripts to find the stereo events + list_of_stereo_scripts = np.sort(glob.glob(f"{source_name}_StereoEvents*.sh")) + if len(list_of_stereo_scripts) < 1: + logger.warning("No bash scripts for real data") + continue + launch_jobs = "" - for n, run in enumerate(list_of_stereo_scripts): - launch_jobs += ( - " && " if n > 0 else "" - ) + f"{launch_jobs} && stereo{n}=$(sbatch --parsable {run})" + for n, run in enumerate(list_of_stereo_scripts): + launch_jobs += ( + " && " if n > 0 else "" + ) + f"{launch_jobs} && stereo{n}=$(sbatch --parsable {run})" - os.system(launch_jobs) + os.system(launch_jobs) if __name__ == "__main__": From 28c061cced54ba0714f1b0e5ad10bce6c8a00891 Mon Sep 17 00:00:00 2001 From: Joanna Wojtowicz Date: Wed, 17 Jul 2024 22:29:52 +0200 Subject: [PATCH 152/236] New version of the script with full documentation. --- .../check_MAGIC_runs.py | 136 ++++++++++-------- 1 file changed, 74 insertions(+), 62 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/check_MAGIC_runs.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/check_MAGIC_runs.py index 21fa917e..b28e740b 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/check_MAGIC_runs.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/check_MAGIC_runs.py @@ -1,12 +1,12 @@ """ This script allows to get information about every MAGIC run ID (and subruns) -in files (in a time interval) used for common data analysis (MAGIC1, MAGIC2, LST1). +in files (in a time interval) used for common data analysis (MAGIC1, MAGIC2, LST1). -The MAGIC files that can be used for analysis are located in the IT cluster +The MAGIC files that can be used for analysis are located in the IT cluster in the following directory: /fefs/onsite/common/MAGIC/data/M{tel_id}/event/Calibrated/{YYYY}/{MM}/{DD} -In this path, 'tel_id' refers to the telescope ID, which must be either 1 or 2. +In this path, 'tel_id' refers to the telescope ID, which must be either 1 or 2. 'YYYY', 'MM', and 'DD' specify the date. """ @@ -15,71 +15,82 @@ import pandas as pd + def fix_lists_and_convert(cell): """ An additional function necessary to organize lists in the function table_magic_runs. - The function remove brackets to avoid double lists and split on "][". + The function removes brackets to avoid double lists and splits on "][". - Parameters: + Parameters ---------- - cell: list - Lists of MAGIC runs from the date and the source. + cell : str + A string representing lists of MAGIC runs from the date and the source. + + Returns + ------- + list + A list of unique integers representing the MAGIC runs. """ - + parts = cell.replace("][", ",").strip("[]").split(",") return list(dict.fromkeys(int(item) for item in parts)) -def table_magic_runs(df, date_min, date_max): +def table_magic_runs(df, date_min, date_max): """ - Data selection from the general table with informations about MAGIC+LST1 observations. - - Parameters: - ----------- - df: table - Dataframe with general information about MAGIC+LST1 observations. - date_min: str + Generate a table with data filtered by the specified date range. + + Parameters + ---------- + df : pandas.DataFrame + DataFrame with general information about MAGIC+LST1 observations. + date_min : str Start of the time interval (in LST convention). - date_max: str + date_max : str End of the time interval (in LST convention). + + Returns + ------- + pandas.DataFrame + A DataFrame filtered by the specified date range. """ - + df_selected_data = df.iloc[:, [2, 1, 25]] - df_selected_data.columns = ["DATE","source", "MAGIC_runs"] + df_selected_data.columns = ["DATE", "source", "MAGIC_runs"] grouped_data = df_selected_data.groupby(["DATE", "source"]) result_table = [] for (date, source), group in grouped_data: - if date >= date_min and date <= date_max: + if date >= date_min and date <= date_max: runs_combined = group["MAGIC_runs"].sum() - + result_table.append( {"DATE": date, "source": source, "MAGIC runs": runs_combined} ) - + result = pd.DataFrame(result_table) result["MAGIC runs"] = result["MAGIC runs"].apply(fix_lists_and_convert) return result -def existing_files(tel_id, date, source, magic_run): +def existing_files(tel_id, date, source, magic_run): """ Checking existing files on the IT cluster. - - Parameters: - ----------- - tel_id: int + + Parameters + ---------- + tel_id : int The telescope ID, which must be either 1 or 2. - date: str + date : str Date (in LST convention). - source: str + source : str Name of the source. - magic_run: int + magic_run : int The MAGIC run from the date and the source. """ - magic_run = str(magic_run) - + magic_run = str(magic_run) + date_obj = datetime.strptime(date, "%Y%m%d") date_obj += timedelta(days=1) new_date = datetime.strftime(date_obj, "%Y%m%d") @@ -87,12 +98,12 @@ def existing_files(tel_id, date, source, magic_run): MM = new_date[4:6] DD = new_date[6:8] Y = "_Y_" - - path = f"/fefs/onsite/common/MAGIC/data/M{tel_id}/event/Calibrated/{YYYY}/{MM}/{DD}" - + + path = f"/fefs/onsite/common/MAGIC/data/M{tel_id}/event/Calibrated/{YYYY}/{MM}/{DD}" + if os.path.exists(path): - files = os.listdir(path) - count_with_run_id = 0 + files = os.listdir(path) + count_with_run_id = 0 # Counter for files that include the run_id. for filename in files: if Y in filename: @@ -102,24 +113,23 @@ def existing_files(tel_id, date, source, magic_run): count_with_run_id += 1 if count_with_run_id != 0: print(f"{date}\t{source}\t{magic_run}\t{count_with_run_id}") - -def missing_files(tel_id, date, source, magic_runs): + +def missing_files(tel_id, date, source, magic_runs): """ Checking missing files on the IT cluster. - - Parameters: - ----------- - tel_id: int + + Parameters + ---------- + tel_id : int The telescope ID, which must be either 1 or 2. - date: str + date : str Date (in LST convention). - source: str + source : str Name of the source. - magic_runs: list + magic_runs : list List of MAGIC runs from the date and the source. """ - date_obj = datetime.strptime(date, "%Y%m%d") date_obj += timedelta(days=1) new_date = datetime.strftime(date_obj, "%Y%m%d") @@ -127,15 +137,15 @@ def missing_files(tel_id, date, source, magic_runs): MM = new_date[4:6] DD = new_date[6:8] Y = "_Y_" - - path = f"/fefs/onsite/common/MAGIC/data/M{tel_id}/event/Calibrated/{YYYY}/{MM}/{DD}" - + + path = f"/fefs/onsite/common/MAGIC/data/M{tel_id}/event/Calibrated/{YYYY}/{MM}/{DD}" + if os.path.exists(path): files = os.listdir(path) - count_with_source = 0 + count_with_source = 0 count_with_run_id = 0 - # Counter for files that include the source. We want to check if any file with the source was found. - # Counter for files that include the run_id. We want to check if any file with the run_id was found. + # Counter for files that include the source. We want to check if any file with the source was found. + # Counter for files that include the run_id. We want to check if any file with the run_id was found. for filename in files: if Y in filename: if new_date in filename: @@ -145,9 +155,9 @@ def missing_files(tel_id, date, source, magic_runs): run = str(run) if run in filename: count_with_run_id += 1 - if count_with_source == 0: + if count_with_source == 0: if tel_id == 1: - #Between 2022/09/04 - 2022/12/14 MAGIC 1 had a failure. Therefore we have to skip the range when we want to get information about missing files. + # Between 2022/09/04 - 2022/12/14 MAGIC 1 had a failure. Therefore we have to skip the range when we want to get information about missing files. if date <= "20220904" or date >= "20221214": print(f"No files found containing the source '{source}' on {date}") if tel_id == 2: @@ -159,34 +169,36 @@ def missing_files(tel_id, date, source, magic_runs): print(f"No run id: {run} found in the {source} on {date}.") else: print(f"No such file or directory: {date}") - + + def main(): """Main function.""" - + # TO DO : set time interval - format YYYYMMDD date_min = "20240601" date_max = "20240630" - + df = pd.read_hdf( "/fefs/aswg/workspace/federico.dipierro/MAGIC_LST1_simultaneous_runs_info/simultaneous_obs_summary.h5", - key="str/table" + key="str/table", ) tel_id = [1, 2] database = table_magic_runs(df, date_min, date_max) - database_exploded = database.explode("MAGIC runs") + database_exploded = database.explode("MAGIC runs") database_exploded_reset = database_exploded.reset_index(drop=True) for tel in tel_id: print(f"MAGIC {tel}") - print(f"DATE\tsource\tRun ID\t Subruns") + print("DATE\tsource\tRun ID\t Subruns") for index, row in database_exploded_reset.iterrows(): existing_files(tel, row["DATE"], row["source"], row["MAGIC runs"]) print() for index, row in database.iterrows(): missing_files(tel, row["DATE"], row["source"], row["MAGIC runs"]) print() - + + if __name__ == "__main__": main() From 6ff221aff691de902eae7705d6f13345f9d4e973 Mon Sep 17 00:00:00 2001 From: Elisa-Visentin Date: Thu, 18 Jul 2024 08:20:56 +0000 Subject: [PATCH 153/236] minor fixes --- .../semi_automatic_scripts/dl1_production.py | 2 +- .../semi_automatic_scripts/stereo_events.py | 16 ++++++++-------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py index f1b08a76..5beef168 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py @@ -500,7 +500,7 @@ def main(): launch_jobs_MC = f"linking=$(sbatch --parsable linking_MC_{particle}_paths.sh) && running=$(sbatch --parsable --dependency=afterany:$linking linking_MC_{particle}_paths_r.sh)" os.system(launch_jobs_MC) # Here we do the MC DL0 to DL1 conversion: - + for source_name in source_list: if ( (args.analysis_type == "onlyMAGIC") diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py index 885924ce..fd8c8d41 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py @@ -130,6 +130,7 @@ def bash_stereo(target_dir, source, env_name, NSB_match, cluster): queue="short", job_name=f"{process_name}_stereo", array=process_size, + mem="2g", out_name=f"{stereoDir}/logs/slurm-%x.%A_%a", ) rc = rc_lines( @@ -164,12 +165,12 @@ def bash_stereoMC(target_dir, identification, env_name, cluster): identification : str Particle name. Options: protons, gammadiffuse, gammas, protons_test env_name : str - Name of the environment + Name of the environment cluster : str Cluster system """ - process_name = source + process_name = "stereo_MC" inputdir = f"{target_dir}/v{__version__}/MC/DL1/{identification}/Merged" os.makedirs(f"{inputdir}/StereoMerged", exist_ok=True) @@ -189,7 +190,7 @@ def bash_stereoMC(target_dir, identification, env_name, cluster): queue="xxl", job_name=f"{process_name}_stereo", array=f"{process_size}%100", - mem="8g", + mem="7g", out_name=f"{inputdir}/StereoMerged/logs/slurm-%x.%A_%a", ) lines = slurm + [ @@ -255,7 +256,7 @@ def main(): or (args.analysis_type == "doEverything") and not NSB_match ): - configfile_stereo(target_dir, 'MC', config) + configfile_stereo(target_dir, "MC", config) print("***** Generating the bashscript for MCs...") for part in [ "gammadiffuse", @@ -286,9 +287,6 @@ def main(): print("***** Generating file config_stereo.yaml...") configfile_stereo(target_dir, source_name, config) - - - # Below we run the analysis on the real data print("***** Generating the bashscript...") @@ -301,7 +299,9 @@ def main(): ) # Below we run the bash scripts to find the stereo events - list_of_stereo_scripts = np.sort(glob.glob(f"{source_name}_StereoEvents*.sh")) + list_of_stereo_scripts = np.sort( + glob.glob(f"{source_name}_StereoEvents*.sh") + ) if len(list_of_stereo_scripts) < 1: logger.warning("No bash scripts for real data") continue From c1d11c205be64e0807a32a7e1eac73457b907619 Mon Sep 17 00:00:00 2001 From: Elisa-Visentin Date: Thu, 18 Jul 2024 09:44:13 +0000 Subject: [PATCH 154/236] easier directory structure --- .../semi_automatic_scripts/coincident_events.py | 13 ++++--------- .../semi_automatic_scripts/dl1_production.py | 14 +++++++------- .../semi_automatic_scripts/job_accounting.py | 4 ++-- .../semi_automatic_scripts/merging_runs.py | 8 +++----- .../semi_automatic_scripts/stereo_events.py | 7 +++---- 5 files changed, 19 insertions(+), 27 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py index 8a150097..a2df81fc 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py @@ -98,16 +98,11 @@ def linking_bash_lst( Cluster system """ - if NSB_match: - coincidence_DL1_dir = f"{target_dir}/v{__version__}/{source_name}" - - MAGIC_DL1_dir = f"{target_dir}/v{__version__}/{source_name}/DL1" - else: - coincidence_DL1_dir = ( - f"{target_dir}/v{__version__}/{source_name}/DL1/Observations" - ) - MAGIC_DL1_dir = f"{target_dir}/v{__version__}/{source_name}/DL1/Observations/" + + coincidence_DL1_dir = f"{target_dir}/v{__version__}/{source_name}" + MAGIC_DL1_dir = f"{target_dir}/v{__version__}/{source_name}/DL1" + dates = [os.path.basename(x) for x in glob.glob(f"{MAGIC_DL1_dir}/Merged/Merged_*")] if cluster != "SLURM": logger.warning( diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py index 5beef168..4cf84ca0 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py @@ -251,7 +251,7 @@ def lists_and_bash_gen_MAGIC( out_name=f"{target_dir}/v{__version__}/{source}/DL1/slurm-linkMAGIC-%x.%j", ) - obs_tag = "" if NSB_match else "Observations" + with open(f"{source}_linking_MAGIC_data_paths.sh", "w") as f: f.writelines(lines) for i in MAGIC_runs: @@ -260,7 +260,7 @@ def lists_and_bash_gen_MAGIC( if telescope_ids[magic - 3] > 0: lines = [ f'export IN1=/fefs/onsite/common/MAGIC/data/M{magic}/event/Calibrated/{i[0].replace("_","/")}\n', - f"export OUT1={target_dir}/v{__version__}/{source}/DL1/{obs_tag}/M{magic}/{i[0]}/{i[1]}/logs \n", + f"export OUT1={target_dir}/v{__version__}/{source}/DL1/M{magic}/{i[0]}/{i[1]}/logs \n", f"ls $IN1/*{i[1][-2:]}.*_Y_*.root > $OUT1/list_cal.txt\n\n", ] f.writelines(lines) @@ -280,7 +280,7 @@ def lists_and_bash_gen_MAGIC( job_name=process_name, array=number_of_nodes, mem="2g", - out_name=f"{target_dir}/v{__version__}/{source}/DL1/{obs_tag}/M{magic}/{i[0]}/{i[1]}/logs/slurm-%x.%A_%a", # without version for no NSB_match + out_name=f"{target_dir}/v{__version__}/{source}/DL1/M{magic}/{i[0]}/{i[1]}/logs/slurm-%x.%A_%a", # without version for no NSB_match ) rc = rc_lines( store="$SAMPLE ${SLURM_ARRAY_JOB_ID} ${SLURM_ARRAY_TASK_ID}", @@ -289,7 +289,7 @@ def lists_and_bash_gen_MAGIC( lines = ( slurm + [ # without version for no NSB_match - f"export OUTPUTDIR={target_dir}/v{__version__}/{source}/DL1/{obs_tag}/M{magic}/{i[0]}/{i[1]}\n", + f"export OUTPUTDIR={target_dir}/v{__version__}/{source}/DL1/M{magic}/{i[0]}/{i[1]}\n", "SAMPLE_LIST=($(<$OUTPUTDIR/logs/list_cal.txt))\n", "SAMPLE=${SAMPLE_LIST[${SLURM_ARRAY_TASK_ID}]}\n\n", "export LOG=$OUTPUTDIR/logs/real_0_1_task_${SLURM_ARRAY_JOB_ID}_${SLURM_ARRAY_TASK_ID}.log\n", @@ -329,10 +329,10 @@ def directories_generator_real( dl1_dir = str(f"{target_dir}/v{__version__}/{source_name}/DL1") else: - dl1_dir = str(f"{target_dir}/v{__version__}/{source_name}/DL1/Observations") + dl1_dir = str(f"{target_dir}/v{__version__}/{source_name}/DL1") if not os.path.exists(f"{target_dir}/v{__version__}/{source_name}"): os.makedirs( - f"{target_dir}/v{__version__}/{source_name}/DL1/Observations", + f"{target_dir}/v{__version__}/{source_name}/DL1", exist_ok=True, ) @@ -343,7 +343,7 @@ def directories_generator_real( if overwrite == "y": os.system(f"rm -r {target_dir}/v{__version__}/{source_name}") os.makedirs( - f"{target_dir}/v{__version__}/{source_name}/DL1/Observations", + f"{target_dir}/v{__version__}/{source_name}/DL1", exist_ok=True, ) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/job_accounting.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/job_accounting.py index 5fca0969..91abebe5 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/job_accounting.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/job_accounting.py @@ -107,9 +107,9 @@ def main(): print("Error, no directories found") print(f"for path {work_dir} found in {args.config_file} this is available") print(f"Versions {versions}") - tag = "" if NSB_matching else "/Observations" + print( - f"Supported data types: DL1{tag}/M1, DL1{tag}/M2, DL1{tag}/Merged, DL1Coincident, DL1Stereo" + f"Supported data types: DL1/M1, DL1/M2, DL1/Merged, DL1Coincident, DL1Stereo" ) exit(1) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py index fdb4a960..036aee4b 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py @@ -9,11 +9,10 @@ 1) Merge the subruns into runs for M1 and M2 individually. 2) Merge the runs of M1 and M2 into M1-M2 runs. 3) Merge all the M1-M2 runs for a given night. -Workingdir/DL1/Observations/Merged + MC: -1) Merges all MC runs in a node and save them at -Workingdir/DL1/MC/PARTICLE/Merged +1) Merges all MC runs in a node Usage: $ merging_runs (-c config.yaml) @@ -143,8 +142,7 @@ def merge(target_dir, identification, MAGIC_runs, env_name, source, NSB_match, c process_name = f"merging_{source}" MAGIC_DL1_dir = f"{target_dir}/v{__version__}/{source}/DL1/" - if not NSB_match: - MAGIC_DL1_dir += "Observations/" + if cluster != "SLURM": logger.warning( "Automatic processing not implemented for the cluster indicated in the config file" diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py index fd8c8d41..118e081a 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py @@ -97,10 +97,9 @@ def bash_stereo(target_dir, source, env_name, NSB_match, cluster): process_name = source - if NSB_match: - coincidence_DL1_dir = f"{target_dir}/v{__version__}/{source}" - else: - coincidence_DL1_dir = f"{target_dir}/v{__version__}/{source}/DL1/Observations" + + coincidence_DL1_dir = f"{target_dir}/v{__version__}/{source}" + listOfNightsLST = np.sort(glob.glob(f"{coincidence_DL1_dir}/DL1Coincident/*")) if cluster != "SLURM": From c06684a89af4bec34042cce0dcf8ae8df878ef32 Mon Sep 17 00:00:00 2001 From: Elisa-Visentin Date: Thu, 18 Jul 2024 09:49:21 +0000 Subject: [PATCH 155/236] CI fixes --- .../lst1_magic/semi_automatic_scripts/coincident_events.py | 3 +-- .../lst1_magic/semi_automatic_scripts/dl1_production.py | 1 - .../lst1_magic/semi_automatic_scripts/job_accounting.py | 5 ++--- .../lst1_magic/semi_automatic_scripts/merging_runs.py | 5 ++--- .../lst1_magic/semi_automatic_scripts/stereo_events.py | 2 -- 5 files changed, 5 insertions(+), 11 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py index a2df81fc..7a1f24ad 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py @@ -98,11 +98,10 @@ def linking_bash_lst( Cluster system """ - coincidence_DL1_dir = f"{target_dir}/v{__version__}/{source_name}" MAGIC_DL1_dir = f"{target_dir}/v{__version__}/{source_name}/DL1" - + dates = [os.path.basename(x) for x in glob.glob(f"{MAGIC_DL1_dir}/Merged/Merged_*")] if cluster != "SLURM": logger.warning( diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py index 4cf84ca0..6505ed33 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py @@ -251,7 +251,6 @@ def lists_and_bash_gen_MAGIC( out_name=f"{target_dir}/v{__version__}/{source}/DL1/slurm-linkMAGIC-%x.%j", ) - with open(f"{source}_linking_MAGIC_data_paths.sh", "w") as f: f.writelines(lines) for i in MAGIC_runs: diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/job_accounting.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/job_accounting.py index 91abebe5..e4bc892b 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/job_accounting.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/job_accounting.py @@ -89,7 +89,6 @@ def main(): # skip_LST = config["data_selection"]["skip_LST_runs"] # skip_MAGIC = config["data_selection"]["skip_MAGIC_runs"] - NSB_matching = config["general"]["NSB_matching"] work_dir = config["directories"]["workspace_dir"] print(f"Checking progress of jobs stored in {work_dir}") @@ -107,9 +106,9 @@ def main(): print("Error, no directories found") print(f"for path {work_dir} found in {args.config_file} this is available") print(f"Versions {versions}") - + print( - f"Supported data types: DL1/M1, DL1/M2, DL1/Merged, DL1Coincident, DL1Stereo" + "Supported data types: DL1/M1, DL1/M2, DL1/Merged, DL1Coincident, DL1Stereo" ) exit(1) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py index 036aee4b..03143c96 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py @@ -10,9 +10,8 @@ 2) Merge the runs of M1 and M2 into M1-M2 runs. 3) Merge all the M1-M2 runs for a given night. - MC: -1) Merges all MC runs in a node +1) Merges all MC runs in a node Usage: $ merging_runs (-c config.yaml) @@ -142,7 +141,7 @@ def merge(target_dir, identification, MAGIC_runs, env_name, source, NSB_match, c process_name = f"merging_{source}" MAGIC_DL1_dir = f"{target_dir}/v{__version__}/{source}/DL1/" - + if cluster != "SLURM": logger.warning( "Automatic processing not implemented for the cluster indicated in the config file" diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py index 118e081a..c2b31946 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py @@ -97,9 +97,7 @@ def bash_stereo(target_dir, source, env_name, NSB_match, cluster): process_name = source - coincidence_DL1_dir = f"{target_dir}/v{__version__}/{source}" - listOfNightsLST = np.sort(glob.glob(f"{coincidence_DL1_dir}/DL1Coincident/*")) if cluster != "SLURM": From 872f29df6cbbac9b990797cf4a9315da917e9df5 Mon Sep 17 00:00:00 2001 From: Elisa-Visentin <121040436+Elisa-Visentin@users.noreply.github.com> Date: Thu, 18 Jul 2024 14:33:58 +0200 Subject: [PATCH 156/236] Update environment.yml --- environment.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/environment.yml b/environment.yml index 55a3fd16..6f6354f1 100644 --- a/environment.yml +++ b/environment.yml @@ -38,7 +38,7 @@ dependencies: - pyyaml - scipy~=1.11.4 - scikit-learn=1.2 - - setuptools + - setuptools<=71 - sphinx - sphinx-automodapi - sphinx-design From 8c801c00e1f2ed5dfafbfed2352a44b0d4400967 Mon Sep 17 00:00:00 2001 From: Joanna Wojtowicz Date: Fri, 19 Jul 2024 20:29:45 +0200 Subject: [PATCH 157/236] Added new script for automatically updating the MAGIC database. --- .../update_MAGIC_database.py | 300 ++++++++++++++++++ 1 file changed, 300 insertions(+) create mode 100644 magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/update_MAGIC_database.py diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/update_MAGIC_database.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/update_MAGIC_database.py new file mode 100644 index 00000000..eb4453cc --- /dev/null +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/update_MAGIC_database.py @@ -0,0 +1,300 @@ +""" +The script updates the common MAGIC database from a given time range. +At the moment, to avoid accidentally destroying the previous database, +we save the updated database as a new file. If the path to the database is not found, +the script creates a new one. The start of the time interval +is the date of the beginning of the common MAGIC+LST1 observations. +The end of the time interval is the current date. + +The MAGIC files that can be used for analysis are located in the IT cluster +in the following directory: +/fefs/onsite/common/MAGIC/data/M{tel_id}/event/Calibrated/{YYYY}/{MM}/{DD} + +In this path, 'tel_id' refers to the telescope ID, which must be either 1 or 2. +'YYYY', 'MM', and 'DD' specify the date. +""" + +import os + +import numpy as np +import pandas as pd +from datetime import datetime, timedelta + +def fix_lists_and_convert(cell): + """ + An additional function necessary to organize lists in the function table_magic_runs. + The function remove brackets to avoid double lists and split on "][". + + Parameters + ---------- + cell: list + List of MAGIC runs from the date and the source. + + Returns + ------- + list + A list of unique integers representing the MAGIC runs. + """ + + parts = cell.replace("][", ",").strip("[]").split(",") + return list(dict.fromkeys(int(item) for item in parts)) + +def table_magic_runs(df, date_min, date_max): + + """ + Generate a table with data filtered by the specified date range. + + Parameters + ----------- + df: pandas.DataFrame + Dataframe with general information about MAGIC+LST1 observations. + date_min: str + Start of the time interval (in LST convention). + date_max: str + End of the time interval (in LST convention). + """ + + df_selected_data = df.iloc[:, [2, 1, 25]] + df_selected_data.columns = ["DATE","source", "MAGIC_runs"] + grouped_data = df_selected_data.groupby(["DATE", "source"]) + result_table = [] + + for (date, source), group in grouped_data: + if date >= date_min and date <= date_max: + runs_combined = group["MAGIC_runs"].sum() + + result_table.append( + {"DATE": date, "source": source, "Run ID": runs_combined} + ) + + result = pd.DataFrame(result_table) + result["Run ID"] = result["Run ID"].apply(fix_lists_and_convert) + result_exploded = result.explode("Run ID") + result_exploded_reset = result_exploded.reset_index(drop=True) + return result_exploded_reset + +def update_tables(database, DF, tel_id): + """ + Updating the MAGIC database by comparison of data that are only in + common MAGIC+LST1 database and not in the MAGIC database. + Then, the function checks existing files and counts number of subruns. + Data are added chronologically. + + The updated table DF may include new rows that contain NaN values in some cells. + The function automatically filling NaN values withpredefined default values + based on the column's data type. + + Parameters + ----------- + database : pandas.DataFrame + Table with informations about MAGIC runs from the date and the source from given time interval. + DF : pandas.DataFrame + The previous MAGIC database which we want to update. + tel_id : int + The telescope ID, which must be either 1 or 2. + """ + + database["DATE"] = database["DATE"].astype(str) + DF["DATE"] = DF["DATE"].astype(str) + columns_to_compare = ["DATE", "source", "Run ID"] + merged_df = pd.merge(database, DF[columns_to_compare], on=columns_to_compare, how='left', indicator=True) + non_matching_rows = merged_df[merged_df["_merge"] == "left_only"].drop(columns=["_merge"]) + + if non_matching_rows.empty: + raise Exception("There is no un-updated data for a given time interval. ") + else: + + non_matching_rows_reset = non_matching_rows.reset_index(drop=True) + new_rows = [] + + for index, row in non_matching_rows_reset.iterrows(): + date = row["DATE"] + source = row["source"] + run_id = row["Run ID"] + run_id = str(run_id) + + date_obj = datetime.strptime(date, "%Y%m%d") + date_obj += timedelta(days=1) + new_date = datetime.strftime(date_obj, "%Y%m%d") + YYYY = new_date[:4] + MM = new_date[4:6] + DD = new_date[6:8] + Y = "_Y_" + + path = f"/fefs/onsite/common/MAGIC/data/M{tel_id}/event/Calibrated/{YYYY}/{MM}/{DD}" + + if os.path.exists(path): + files = os.listdir(path) + count_with_run_id = 0 + # Counter for files that include the run_id. + for filename in files: + if Y in filename: + if new_date in filename: + if source in filename: + if run_id in filename: + count_with_run_id += 1 + if count_with_run_id != 0: + new_rows.append( + {"DATE": date, "source": source, "Run ID": run_id, "number of subruns": count_with_run_id} + ) + + new_rows = pd.DataFrame(new_rows) + new_rows['DATE'] = pd.to_datetime(new_rows['DATE']) + combined_df = pd.concat([DF, new_rows], ignore_index=True) + combined_df['DATE'] = pd.to_datetime(combined_df['DATE'], errors='coerce') + combined_df = combined_df.sort_values('DATE') + + combined_df['DATE'] = combined_df['DATE'].dt.strftime('%Y%m%d') + combined_df['Run ID'] = combined_df['Run ID'].astype(int) + combined_df.reset_index(drop=True, inplace=True) + + for column in combined_df.columns[4:]: + not_null_data = combined_df[column].dropna() + if not_null_data.empty: + continue # Skip if all values are NaN + + inferred_type = pd.api.types.infer_dtype(not_null_data, skipna=True) + + if inferred_type == 'boolean': + default_value = False + elif inferred_type == 'integer': + default_value = 0 + elif inferred_type == 'floating': + default_value = 0.0 + elif inferred_type == 'string': + default_value = 'NaN' + else: + continue + + combined_df[column] = combined_df[column].fillna(default_value).astype(type(not_null_data.iloc[0])) + + combined_df = combined_df.infer_objects() + + return combined_df + +def create_new_database(df, date_min, date_max, tel_id): + """ + Creating a new MAGIC database. + + Parameters + ----------- + df: pandas.DataFrame + Dataframe with general information about MAGIC+LST1 observations. + date_min: str + Start of the time interval (in LST convention). + date_max: str + End of the time interval (in LST convention). + tel_id : int + The telescope ID, which must be either 1 or 2. + """ + + database = table_magic_runs(df, date_min, date_max) + new_rows = [] + + for index, row in database.iterrows(): + date = row["DATE"] + source = row["source"] + run_id = row["Run ID"] + run_id = str(run_id) + + date_obj = datetime.strptime(date, "%Y%m%d") + date_obj += timedelta(days=1) + new_date = datetime.strftime(date_obj, "%Y%m%d") + YYYY = new_date[:4] + MM = new_date[4:6] + DD = new_date[6:8] + Y = "_Y_" + + path = f"/fefs/onsite/common/MAGIC/data/M{tel_id}/event/Calibrated/{YYYY}/{MM}/{DD}" + + if os.path.exists(path): + files = os.listdir(path) + count_with_run_id = 0 + # Counter for files that include the run_id. + for filename in files: + if Y in filename: + if new_date in filename: + if source in filename: + if run_id in filename: + count_with_run_id += 1 + if count_with_run_id != 0: + new_rows.append( + {"DATE": date, "source": source, "Run ID": run_id, "number of subruns": count_with_run_id} + ) + + new_rows = pd.DataFrame(new_rows) + + return new_rows + +def main(): + + """Main function.""" + + tel_id = [1,2] + + df = pd.read_hdf( + "/fefs/aswg/workspace/federico.dipierro/MAGIC_LST1_simultaneous_runs_info/simultaneous_obs_summary.h5", + key="str/table", + ) + + # Set "" to generate a new database. + previous_database_path = "/fefs/aswg/workspace/joanna.wojtowicz/data/Common_MAGIC_LST1_data_MAGIC_runs_subruns.h5" + file_exists = os.path.exists(previous_database_path) + + if file_exists: + + # TO DO : set time interval - format YYYYMMDD + date_min = "20240601" + date_max = "20240718" + + database = table_magic_runs(df, date_min, date_max) + + for tel in tel_id : + + DF = pd.read_hdf( + previous_database_path, + key=f"MAGIC{tel}/runs_M{tel}", + ) + + if tel == 1: + updated_df_1 = update_tables(database, DF, tel) + print(updated_df_1) + if tel == 2: + updated_df_2 = update_tables(database, DF, tel) + print(updated_df_2) + # TO DO : set a path to save new database + new_h5_file_path = "/fefs/aswg/workspace/joanna.wojtowicz/output/update_database.h5" + + try: + updated_df_1.to_hdf(new_h5_file_path, key='MAGIC1/runs_M1', mode='w', format='table') + updated_df_2.to_hdf(new_h5_file_path, key='MAGIC2/runs_M2', mode='a', format='table') + print(f"File saved successfully at {new_h5_file_path}") + + except Exception as e: + print(f"An error occurred: {e}") + + else: + print("Database does not exist. Creating a new database...") + + date_min = "20191101" + current_datetime = datetime.now() + date_max = current_datetime.strftime("%Y%m%d") + + tel_id_M1 = 1 + tel_id_M2 = 2 + database_M1 = create_new_database(df, date_min, date_max, tel_id_M1) + database_M2 = create_new_database(df, date_min, date_max, tel_id_M2) + + # TO DO : set a path to save a new database + new_database_file_path = "/fefs/aswg/workspace/joanna.wojtowicz/output/Common_MAGIC_LST1_data_MAGIC_runs_subruns.h5" + + try: + database_M1.to_hdf(new_database_file_path, key='MAGIC1/runs_M1', mode='w', format='table') + database_M2.to_hdf(new_database_file_path, key='MAGIC2/runs_M2', mode='a', format='table') + print(f"File saved successfully at {new_database_file_path}") + + except Exception as e: + print(f"An error occurred: {e}") + +if __name__ == "__main__": + main() From 49c862c5b19602f2528f707cd5849e46e3f5c2c2 Mon Sep 17 00:00:00 2001 From: Joanna Wojtowicz Date: Sat, 20 Jul 2024 14:11:52 +0200 Subject: [PATCH 158/236] The updated script script update_MAGIC_runs.py (lint fixes). --- .../update_MAGIC_database.py | 226 +++++++++++------- 1 file changed, 134 insertions(+), 92 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/update_MAGIC_database.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/update_MAGIC_database.py index eb4453cc..464c0bf2 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/update_MAGIC_database.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/update_MAGIC_database.py @@ -6,7 +6,7 @@ is the date of the beginning of the common MAGIC+LST1 observations. The end of the time interval is the current date. -The MAGIC files that can be used for analysis are located in the IT cluster +The MAGIC files that can be used for analysis are located in the IT cluster in the following directory: /fefs/onsite/common/MAGIC/data/M{tel_id}/event/Calibrated/{YYYY}/{MM}/{DD} @@ -15,10 +15,9 @@ """ import os +from datetime import datetime, timedelta -import numpy as np import pandas as pd -from datetime import datetime, timedelta def fix_lists_and_convert(cell): """ @@ -26,8 +25,8 @@ def fix_lists_and_convert(cell): The function remove brackets to avoid double lists and split on "][". Parameters - ---------- - cell: list + ----------- + cell : str List of MAGIC runs from the date and the source. Returns @@ -43,19 +42,24 @@ def table_magic_runs(df, date_min, date_max): """ Generate a table with data filtered by the specified date range. - + Parameters - ----------- - df: pandas.DataFrame - Dataframe with general information about MAGIC+LST1 observations. - date_min: str + ---------- + df : pandas.DataFrame + DataFrame with general information about MAGIC+LST1 observations. + date_min : str Start of the time interval (in LST convention). - date_max: str + date_max : str End of the time interval (in LST convention). + + Returns + ------- + pandas.DataFrame + A DataFrame filtered by the specified date range. """ df_selected_data = df.iloc[:, [2, 1, 25]] - df_selected_data.columns = ["DATE","source", "MAGIC_runs"] + df_selected_data.columns = ["DATE", "source", "MAGIC_runs"] grouped_data = df_selected_data.groupby(["DATE", "source"]) result_table = [] @@ -69,11 +73,11 @@ def table_magic_runs(df, date_min, date_max): result = pd.DataFrame(result_table) result["Run ID"] = result["Run ID"].apply(fix_lists_and_convert) - result_exploded = result.explode("Run ID") + result_exploded = result.explode("Run ID") result_exploded_reset = result_exploded.reset_index(drop=True) return result_exploded_reset -def update_tables(database, DF, tel_id): +def update_tables(database, DF, tel_id): """ Updating the MAGIC database by comparison of data that are only in common MAGIC+LST1 database and not in the MAGIC database. @@ -92,13 +96,26 @@ def update_tables(database, DF, tel_id): The previous MAGIC database which we want to update. tel_id : int The telescope ID, which must be either 1 or 2. + + Returns + ------- + pandas.DataFrame + A DataFrame with updated MAGIC database. """ database["DATE"] = database["DATE"].astype(str) DF["DATE"] = DF["DATE"].astype(str) columns_to_compare = ["DATE", "source", "Run ID"] - merged_df = pd.merge(database, DF[columns_to_compare], on=columns_to_compare, how='left', indicator=True) - non_matching_rows = merged_df[merged_df["_merge"] == "left_only"].drop(columns=["_merge"]) + merged_df = pd.merge( + database, + DF[columns_to_compare], + on=columns_to_compare, + how='left', + indicator=True, + ) + non_matching_rows = merged_df[merged_df["_merge"] == "left_only"].drop( + columns=["_merge"] + ) if non_matching_rows.empty: raise Exception("There is no un-updated data for a given time interval. ") @@ -108,44 +125,49 @@ def update_tables(database, DF, tel_id): new_rows = [] for index, row in non_matching_rows_reset.iterrows(): - date = row["DATE"] - source = row["source"] - run_id = row["Run ID"] - run_id = str(run_id) + date = row["DATE"] + source = row["source"] + run_id = row["Run ID"] + run_id = str(run_id) - date_obj = datetime.strptime(date, "%Y%m%d") - date_obj += timedelta(days=1) - new_date = datetime.strftime(date_obj, "%Y%m%d") - YYYY = new_date[:4] - MM = new_date[4:6] - DD = new_date[6:8] - Y = "_Y_" + date_obj = datetime.strptime(date, "%Y%m%d") + date_obj += timedelta(days=1) + new_date = datetime.strftime(date_obj, "%Y%m%d") + YYYY = new_date[:4] + MM = new_date[4:6] + DD = new_date[6:8] + Y = "_Y_" - path = f"/fefs/onsite/common/MAGIC/data/M{tel_id}/event/Calibrated/{YYYY}/{MM}/{DD}" + path = f"/fefs/onsite/common/MAGIC/data/M{tel_id}/event/Calibrated/{YYYY}/{MM}/{DD}" - if os.path.exists(path): - files = os.listdir(path) - count_with_run_id = 0 - # Counter for files that include the run_id. - for filename in files: - if Y in filename: - if new_date in filename: - if source in filename: - if run_id in filename: - count_with_run_id += 1 - if count_with_run_id != 0: - new_rows.append( - {"DATE": date, "source": source, "Run ID": run_id, "number of subruns": count_with_run_id} - ) + if os.path.exists(path): + files = os.listdir(path) + count_with_run_id = 0 + # Counter for files that include the run_id. + for filename in files: + if Y in filename: + if new_date in filename: + if source in filename: + if run_id in filename: + count_with_run_id += 1 + if count_with_run_id != 0: + new_rows.append( + { + "DATE": date, + "source": source, + "Run ID": run_id, + "number of subruns": count_with_run_id + } + ) new_rows = pd.DataFrame(new_rows) - new_rows['DATE'] = pd.to_datetime(new_rows['DATE']) + new_rows["DATE"] = pd.to_datetime(new_rows["DATE"]) combined_df = pd.concat([DF, new_rows], ignore_index=True) - combined_df['DATE'] = pd.to_datetime(combined_df['DATE'], errors='coerce') - combined_df = combined_df.sort_values('DATE') + combined_df["DATE"] = pd.to_datetime(combined_df["DATE"], errors="coerce") + combined_df = combined_df.sort_values("DATE") - combined_df['DATE'] = combined_df['DATE'].dt.strftime('%Y%m%d') - combined_df['Run ID'] = combined_df['Run ID'].astype(int) + combined_df["DATE"] = combined_df["DATE"].dt.strftime("%Y%m%d") + combined_df["Run ID"] = combined_df["Run ID"].astype(int) combined_df.reset_index(drop=True, inplace=True) for column in combined_df.columns[4:]: @@ -155,18 +177,22 @@ def update_tables(database, DF, tel_id): inferred_type = pd.api.types.infer_dtype(not_null_data, skipna=True) - if inferred_type == 'boolean': + if inferred_type == "boolean": default_value = False - elif inferred_type == 'integer': + elif inferred_type == "integer": default_value = 0 - elif inferred_type == 'floating': + elif inferred_type == "floating": default_value = 0.0 - elif inferred_type == 'string': - default_value = 'NaN' + elif inferred_type == "string": + default_value = "NaN" else: continue - combined_df[column] = combined_df[column].fillna(default_value).astype(type(not_null_data.iloc[0])) + combined_df[column] = ( + combined_df[column] + .fillna(default_value) + .astype(type(not_null_data.iloc[0])) + ) combined_df = combined_df.infer_objects() @@ -178,11 +204,11 @@ def create_new_database(df, date_min, date_max, tel_id): Parameters ----------- - df: pandas.DataFrame + df : pandas.DataFrame Dataframe with general information about MAGIC+LST1 observations. - date_min: str + date_min : str Start of the time interval (in LST convention). - date_max: str + date_max : str End of the time interval (in LST convention). tel_id : int The telescope ID, which must be either 1 or 2. @@ -192,35 +218,40 @@ def create_new_database(df, date_min, date_max, tel_id): new_rows = [] for index, row in database.iterrows(): - date = row["DATE"] - source = row["source"] - run_id = row["Run ID"] - run_id = str(run_id) + date = row["DATE"] + source = row["source"] + run_id = row["Run ID"] + run_id = str(run_id) - date_obj = datetime.strptime(date, "%Y%m%d") - date_obj += timedelta(days=1) - new_date = datetime.strftime(date_obj, "%Y%m%d") - YYYY = new_date[:4] - MM = new_date[4:6] - DD = new_date[6:8] - Y = "_Y_" + date_obj = datetime.strptime(date, "%Y%m%d") + date_obj += timedelta(days=1) + new_date = datetime.strftime(date_obj, "%Y%m%d") + YYYY = new_date[:4] + MM = new_date[4:6] + DD = new_date[6:8] + Y = "_Y_" - path = f"/fefs/onsite/common/MAGIC/data/M{tel_id}/event/Calibrated/{YYYY}/{MM}/{DD}" + path = f"/fefs/onsite/common/MAGIC/data/M{tel_id}/event/Calibrated/{YYYY}/{MM}/{DD}" - if os.path.exists(path): - files = os.listdir(path) - count_with_run_id = 0 - # Counter for files that include the run_id. - for filename in files: - if Y in filename: - if new_date in filename: - if source in filename: - if run_id in filename: - count_with_run_id += 1 - if count_with_run_id != 0: - new_rows.append( - {"DATE": date, "source": source, "Run ID": run_id, "number of subruns": count_with_run_id} - ) + if os.path.exists(path): + files = os.listdir(path) + count_with_run_id = 0 + # Counter for files that include the run_id. + for filename in files: + if Y in filename: + if new_date in filename: + if source in filename: + if run_id in filename: + count_with_run_id += 1 + if count_with_run_id != 0: + new_rows.append( + { + "DATE": date, + "source": source, + "Run ID": run_id, + "number of subruns": count_with_run_id + } + ) new_rows = pd.DataFrame(new_rows) @@ -230,7 +261,7 @@ def main(): """Main function.""" - tel_id = [1,2] + tel_id = [1, 2] df = pd.read_hdf( "/fefs/aswg/workspace/federico.dipierro/MAGIC_LST1_simultaneous_runs_info/simultaneous_obs_summary.h5", @@ -247,14 +278,16 @@ def main(): date_min = "20240601" date_max = "20240718" + print("Updating database...") + database = table_magic_runs(df, date_min, date_max) - for tel in tel_id : + for tel in tel_id: DF = pd.read_hdf( - previous_database_path, - key=f"MAGIC{tel}/runs_M{tel}", - ) + previous_database_path, + key=f"MAGIC{tel}/runs_M{tel}", + ) if tel == 1: updated_df_1 = update_tables(database, DF, tel) @@ -263,11 +296,17 @@ def main(): updated_df_2 = update_tables(database, DF, tel) print(updated_df_2) # TO DO : set a path to save new database - new_h5_file_path = "/fefs/aswg/workspace/joanna.wojtowicz/output/update_database.h5" + new_h5_file_path = ( + "/fefs/aswg/workspace/joanna.wojtowicz/output/update_database.h5" + ) try: - updated_df_1.to_hdf(new_h5_file_path, key='MAGIC1/runs_M1', mode='w', format='table') - updated_df_2.to_hdf(new_h5_file_path, key='MAGIC2/runs_M2', mode='a', format='table') + updated_df_1.to_hdf( + new_h5_file_path, key="MAGIC1/runs_M1", mode="w", format="table" + ) + updated_df_2.to_hdf( + new_h5_file_path, key="MAGIC2/runs_M2", mode="a", format="table" + ) print(f"File saved successfully at {new_h5_file_path}") except Exception as e: @@ -289,8 +328,11 @@ def main(): new_database_file_path = "/fefs/aswg/workspace/joanna.wojtowicz/output/Common_MAGIC_LST1_data_MAGIC_runs_subruns.h5" try: - database_M1.to_hdf(new_database_file_path, key='MAGIC1/runs_M1', mode='w', format='table') - database_M2.to_hdf(new_database_file_path, key='MAGIC2/runs_M2', mode='a', format='table') + database_M1.to_hdf( + new_database_file_path, key="MAGIC1/runs_M1", mode="w", format="table" + ) + database_M2.to_hdf(new_database_file_path, key="MAGIC2/runs_M2", mode="a", format="table" + ) print(f"File saved successfully at {new_database_file_path}") except Exception as e: From 61489f057eb7aea2625ca3b91d6fd81d00f7f397 Mon Sep 17 00:00:00 2001 From: Joanna Wojtowicz Date: Sat, 20 Jul 2024 15:15:40 +0200 Subject: [PATCH 159/236] The updated script script update_MAGIC_runs.py (lint fixes). --- .../update_MAGIC_database.py | 122 ++++++++++-------- 1 file changed, 65 insertions(+), 57 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/update_MAGIC_database.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/update_MAGIC_database.py index 464c0bf2..401f1b06 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/update_MAGIC_database.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/update_MAGIC_database.py @@ -19,6 +19,7 @@ import pandas as pd + def fix_lists_and_convert(cell): """ An additional function necessary to organize lists in the function table_magic_runs. @@ -28,13 +29,13 @@ def fix_lists_and_convert(cell): ----------- cell : str List of MAGIC runs from the date and the source. - + Returns ------- list A list of unique integers representing the MAGIC runs. """ - + parts = cell.replace("][", ",").strip("[]").split(",") return list(dict.fromkeys(int(item) for item in parts)) @@ -64,30 +65,31 @@ def table_magic_runs(df, date_min, date_max): result_table = [] for (date, source), group in grouped_data: - if date >= date_min and date <= date_max: + if date >= date_min and date <= date_max: runs_combined = group["MAGIC_runs"].sum() - + result_table.append( {"DATE": date, "source": source, "Run ID": runs_combined} ) - + result = pd.DataFrame(result_table) result["Run ID"] = result["Run ID"].apply(fix_lists_and_convert) result_exploded = result.explode("Run ID") result_exploded_reset = result_exploded.reset_index(drop=True) return result_exploded_reset -def update_tables(database, DF, tel_id): + +def update_tables(database, DF, tel_id): """ - Updating the MAGIC database by comparison of data that are only in + Updating the MAGIC database by comparison of data that are only in common MAGIC+LST1 database and not in the MAGIC database. Then, the function checks existing files and counts number of subruns. - Data are added chronologically. - + Data are added chronologically. + The updated table DF may include new rows that contain NaN values in some cells. The function automatically filling NaN values withpredefined default values based on the column's data type. - + Parameters ----------- database : pandas.DataFrame @@ -96,13 +98,13 @@ def update_tables(database, DF, tel_id): The previous MAGIC database which we want to update. tel_id : int The telescope ID, which must be either 1 or 2. - + Returns ------- pandas.DataFrame A DataFrame with updated MAGIC database. """ - + database["DATE"] = database["DATE"].astype(str) DF["DATE"] = DF["DATE"].astype(str) columns_to_compare = ["DATE", "source", "Run ID"] @@ -110,26 +112,26 @@ def update_tables(database, DF, tel_id): database, DF[columns_to_compare], on=columns_to_compare, - how='left', + how="left", indicator=True, ) non_matching_rows = merged_df[merged_df["_merge"] == "left_only"].drop( columns=["_merge"] ) - + if non_matching_rows.empty: raise Exception("There is no un-updated data for a given time interval. ") else: - + non_matching_rows_reset = non_matching_rows.reset_index(drop=True) new_rows = [] - + for index, row in non_matching_rows_reset.iterrows(): date = row["DATE"] source = row["source"] run_id = row["Run ID"] run_id = str(run_id) - + date_obj = datetime.strptime(date, "%Y%m%d") date_obj += timedelta(days=1) new_date = datetime.strftime(date_obj, "%Y%m%d") @@ -137,9 +139,9 @@ def update_tables(database, DF, tel_id): MM = new_date[4:6] DD = new_date[6:8] Y = "_Y_" - + path = f"/fefs/onsite/common/MAGIC/data/M{tel_id}/event/Calibrated/{YYYY}/{MM}/{DD}" - + if os.path.exists(path): files = os.listdir(path) count_with_run_id = 0 @@ -156,29 +158,29 @@ def update_tables(database, DF, tel_id): "DATE": date, "source": source, "Run ID": run_id, - "number of subruns": count_with_run_id + "number of subruns": count_with_run_id, } ) - + new_rows = pd.DataFrame(new_rows) new_rows["DATE"] = pd.to_datetime(new_rows["DATE"]) combined_df = pd.concat([DF, new_rows], ignore_index=True) combined_df["DATE"] = pd.to_datetime(combined_df["DATE"], errors="coerce") combined_df = combined_df.sort_values("DATE") - + combined_df["DATE"] = combined_df["DATE"].dt.strftime("%Y%m%d") combined_df["Run ID"] = combined_df["Run ID"].astype(int) combined_df.reset_index(drop=True, inplace=True) - + for column in combined_df.columns[4:]: not_null_data = combined_df[column].dropna() if not_null_data.empty: continue # Skip if all values are NaN - + inferred_type = pd.api.types.infer_dtype(not_null_data, skipna=True) - + if inferred_type == "boolean": - default_value = False + default_value = False elif inferred_type == "integer": default_value = 0 elif inferred_type == "floating": @@ -187,7 +189,7 @@ def update_tables(database, DF, tel_id): default_value = "NaN" else: continue - + combined_df[column] = ( combined_df[column] .fillna(default_value) @@ -195,13 +197,13 @@ def update_tables(database, DF, tel_id): ) combined_df = combined_df.infer_objects() - + return combined_df def create_new_database(df, date_min, date_max, tel_id): """ Creating a new MAGIC database. - + Parameters ----------- df : pandas.DataFrame @@ -212,8 +214,13 @@ def create_new_database(df, date_min, date_max, tel_id): End of the time interval (in LST convention). tel_id : int The telescope ID, which must be either 1 or 2. + + Returns + ------- + pandas.DataFrame + A DataFrame with a new MAGIC database for all common MAGIC+LST1 observations. """ - + database = table_magic_runs(df, date_min, date_max) new_rows = [] @@ -222,7 +229,7 @@ def create_new_database(df, date_min, date_max, tel_id): source = row["source"] run_id = row["Run ID"] run_id = str(run_id) - + date_obj = datetime.strptime(date, "%Y%m%d") date_obj += timedelta(days=1) new_date = datetime.strftime(date_obj, "%Y%m%d") @@ -230,12 +237,12 @@ def create_new_database(df, date_min, date_max, tel_id): MM = new_date[4:6] DD = new_date[6:8] Y = "_Y_" - + path = f"/fefs/onsite/common/MAGIC/data/M{tel_id}/event/Calibrated/{YYYY}/{MM}/{DD}" - + if os.path.exists(path): files = os.listdir(path) - count_with_run_id = 0 + count_with_run_id = 0 # Counter for files that include the run_id. for filename in files: if Y in filename: @@ -249,46 +256,46 @@ def create_new_database(df, date_min, date_max, tel_id): "DATE": date, "source": source, "Run ID": run_id, - "number of subruns": count_with_run_id + "number of subruns": count_with_run_id, } ) - + new_rows = pd.DataFrame(new_rows) - + return new_rows def main(): """Main function.""" - + tel_id = [1, 2] - + df = pd.read_hdf( "/fefs/aswg/workspace/federico.dipierro/MAGIC_LST1_simultaneous_runs_info/simultaneous_obs_summary.h5", key="str/table", ) - + # Set "" to generate a new database. previous_database_path = "/fefs/aswg/workspace/joanna.wojtowicz/data/Common_MAGIC_LST1_data_MAGIC_runs_subruns.h5" file_exists = os.path.exists(previous_database_path) - + if file_exists: - + # TO DO : set time interval - format YYYYMMDD date_min = "20240601" date_max = "20240718" - + print("Updating database...") - + database = table_magic_runs(df, date_min, date_max) - + for tel in tel_id: - + DF = pd.read_hdf( previous_database_path, key=f"MAGIC{tel}/runs_M{tel}", ) - + if tel == 1: updated_df_1 = update_tables(database, DF, tel) print(updated_df_1) @@ -299,7 +306,7 @@ def main(): new_h5_file_path = ( "/fefs/aswg/workspace/joanna.wojtowicz/output/update_database.h5" ) - + try: updated_df_1.to_hdf( new_h5_file_path, key="MAGIC1/runs_M1", mode="w", format="table" @@ -308,35 +315,36 @@ def main(): new_h5_file_path, key="MAGIC2/runs_M2", mode="a", format="table" ) print(f"File saved successfully at {new_h5_file_path}") - + except Exception as e: print(f"An error occurred: {e}") - + else: print("Database does not exist. Creating a new database...") - + date_min = "20191101" current_datetime = datetime.now() date_max = current_datetime.strftime("%Y%m%d") - + tel_id_M1 = 1 tel_id_M2 = 2 database_M1 = create_new_database(df, date_min, date_max, tel_id_M1) database_M2 = create_new_database(df, date_min, date_max, tel_id_M2) - + # TO DO : set a path to save a new database new_database_file_path = "/fefs/aswg/workspace/joanna.wojtowicz/output/Common_MAGIC_LST1_data_MAGIC_runs_subruns.h5" - + try: database_M1.to_hdf( new_database_file_path, key="MAGIC1/runs_M1", mode="w", format="table" ) - database_M2.to_hdf(new_database_file_path, key="MAGIC2/runs_M2", mode="a", format="table" + database_M2.to_hdf( + new_database_file_path, key="MAGIC2/runs_M2", mode="a", format="table" ) print(f"File saved successfully at {new_database_file_path}") - + except Exception as e: print(f"An error occurred: {e}") - + if __name__ == "__main__": main() From 8aae607f82b846795ddb477b179150e161c5eccb Mon Sep 17 00:00:00 2001 From: Elisa-Visentin Date: Sat, 20 Jul 2024 13:22:50 +0000 Subject: [PATCH 160/236] fixed typo --- .../scripts/lst1_magic/semi_automatic_scripts/merging_runs.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py index 03143c96..1ea39928 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py @@ -105,9 +105,9 @@ def split_train_test(target_dir, train_fraction): for j in list_of_runs[0:number_train_runs]: os.system(f"mv {j} {proton_dir}/train/{node}") - os.system(f"cp {list_of_dir[directory]}*.txt {proton_dir}/train/{node}") + os.system(f"cp {list_of_dir[directory]}logs/*.txt {proton_dir}/train/{node}") os.system( - f"mv {list_of_dir[directory]}*.txt {proton_dir}/../protons_test/{node}" + f"mv {list_of_dir[directory]}logs/*.txt {proton_dir}/../protons_test/{node}" ) os.system( f"mv {list_of_dir[directory]}*.h5 {proton_dir}/../protons_test/{node}" From 6578cd634f7da741ae5f01ca47dd8d8722717882 Mon Sep 17 00:00:00 2001 From: Joanna Wojtowicz Date: Sat, 20 Jul 2024 15:27:18 +0200 Subject: [PATCH 161/236] The updated script script update_MAGIC_runs.py (lint fixes). --- .../database_production/update_MAGIC_database.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/update_MAGIC_database.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/update_MAGIC_database.py index 401f1b06..0317e207 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/update_MAGIC_database.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/update_MAGIC_database.py @@ -39,6 +39,7 @@ def fix_lists_and_convert(cell): parts = cell.replace("][", ",").strip("[]").split(",") return list(dict.fromkeys(int(item) for item in parts)) + def table_magic_runs(df, date_min, date_max): """ @@ -58,14 +59,14 @@ def table_magic_runs(df, date_min, date_max): pandas.DataFrame A DataFrame filtered by the specified date range. """ - + df_selected_data = df.iloc[:, [2, 1, 25]] df_selected_data.columns = ["DATE", "source", "MAGIC_runs"] grouped_data = df_selected_data.groupby(["DATE", "source"]) result_table = [] for (date, source), group in grouped_data: - if date >= date_min and date <= date_max: + if date >= date_min and date <= date_max: runs_combined = group["MAGIC_runs"].sum() result_table.append( @@ -336,7 +337,7 @@ def main(): try: database_M1.to_hdf( - new_database_file_path, key="MAGIC1/runs_M1", mode="w", format="table" + new_database_file_path, key="MAGIC1/runs_M1", mode="w", format="table" ) database_M2.to_hdf( new_database_file_path, key="MAGIC2/runs_M2", mode="a", format="table" From 0c7f5cb4a9a33a4ee2464965bcd2d2b23d515544 Mon Sep 17 00:00:00 2001 From: Elisa-Visentin Date: Mon, 22 Jul 2024 07:28:49 +0000 Subject: [PATCH 162/236] minor fixes to setup --- .../semi_automatic_scripts/__init__.py | 2 + .../database_production/__init__.py | 9 +- .../semi_automatic_scripts/merge_stereo.py | 98 ++++++++++--------- setup.cfg | 7 +- 4 files changed, 69 insertions(+), 47 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/__init__.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/__init__.py index 5fa0cf9d..fc95b25d 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/__init__.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/__init__.py @@ -7,6 +7,7 @@ lists_and_bash_gen_MAGIC, lists_and_bash_generator, ) +from .merge_stereo import MergeStereo from .merging_runs import cleaning, merge, mergeMC, split_train_test from .stereo_events import bash_stereo, bash_stereoMC, configfile_stereo @@ -26,4 +27,5 @@ "bash_stereo", "bash_stereoMC", "slurm_lines", + "MergeStereo", ] diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/__init__.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/__init__.py index f37b4346..23129c35 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/__init__.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/__init__.py @@ -1,5 +1,12 @@ from .lstchain_version import lstchain_versions, version_lstchain from .LSTnsb import nsb from .nsb_level import bash_scripts +from .nsb_to_h5 import collect_nsb -__all__ = ["nsb", "bash_scripts", "version_lstchain", "lstchain_versions"] +__all__ = [ + "nsb", + "bash_scripts", + "version_lstchain", + "lstchain_versions", + "collect_nsb", +] diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merge_stereo.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merge_stereo.py index c4c2f32e..821721e9 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merge_stereo.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merge_stereo.py @@ -1,17 +1,27 @@ +""" +This scripts merges LST DL1Stereo subruns into runs + +Usage: +$ python merge_stereo.py (-c config_file.yaml) +""" import argparse import glob import logging import os from pathlib import Path + import joblib import numpy as np import yaml + from magicctapipe import __version__ from magicctapipe.scripts.lst1_magic.semi_automatic_scripts.clusters import ( rc_lines, slurm_lines, ) +__all__ = ["MergeStereo"] + logger = logging.getLogger(__name__) logger.addHandler(logging.StreamHandler()) logger.setLevel(logging.INFO) @@ -23,8 +33,16 @@ def MergeStereo(target_dir, env_name, source, NSB_match, cluster): Parameters ---------- - target_dir: str + target_dir : str Path to the working directory + env_name : str + Name of the environment + source : str + Name of the target + NSB_match : bool + If real data are matched to pre-processed MCs or not + cluster : str + Cluster system """ process_name = source @@ -34,44 +52,41 @@ def MergeStereo(target_dir, env_name, source, NSB_match, cluster): stereo_DL1_dir = f"{target_dir}/v{__version__}/{source}/DL1/Observations" listOfNightsLST = np.sort(glob.glob(f"{stereo_DL1_dir}/DL1Stereo/*")) - if cluster == 'SLURM': - for nightLST in listOfNightsLST: - night = nightLST.split('/')[-1] - stereoMergeDir = ( - f"{stereo_DL1_dir}/DL1Stereo/{night}/Merged" - ) - os.makedirs(f"{stereoMergeDir}/logs", exist_ok=True) - if not os.listdir(f"{nightLST}"): - continue - if len(glob.glob("f{nightLST}/dl1_stereo*.h5") < 1: - continue - - - slurm = slurm_lines( - queue="short", - job_name=f"{process_name}_stereo_merge", - out_name=f"{stereoMergeDir}/logs/slurm-%x.%A_%a", - ) - rc = rc_lines( - store=f"{nightLST} ${{SLURM_JOB_ID}}", out=f"{stereoMergeDir}/logs/list" - ) - os.system(f"echo {nightLST} >> {stereoMergeDir}/logs/list_dl0.txt") - lines = ( - slurm - + [ - f"conda run -n {env_name} merge_hdf_files --input-dir {nightLST} --output-dir {stereoMergeDir} --run-wise >{stereoMergeDir}/logs/merge_{night}_${{SLURM_JOB_ID}}.log\n" - ] - + rc - ) - - with open(f"{source}_StereoMerge_{night}.sh", "w") as f: - f.writelines(lines) - else: - logger.warning('Automatic processing not implemented for the cluster indicated in the config file') + if cluster != "SLURM": + logger.warning( + "Automatic processing not implemented for the cluster indicated in the config file" + ) return + for nightLST in listOfNightsLST: + night = nightLST.split("/")[-1] + stereoMergeDir = f"{stereo_DL1_dir}/DL1Stereo/{night}/Merged" + os.makedirs(f"{stereoMergeDir}/logs", exist_ok=True) + if not os.listdir(f"{nightLST}"): + continue + if len(glob.glob("f{nightLST}/dl1_stereo*.h5")) < 1: + continue + + slurm = slurm_lines( + queue="short", + job_name=f"{process_name}_stereo_merge", + out_name=f"{stereoMergeDir}/logs/slurm-%x.%A_%a", + ) + rc = rc_lines( + store=f"{nightLST} ${{SLURM_JOB_ID}}", out=f"{stereoMergeDir}/logs/list" + ) + os.system(f"echo {nightLST} >> {stereoMergeDir}/logs/list_dl0.txt") + lines = ( + slurm + + [ + f"conda run -n {env_name} merge_hdf_files --input-dir {nightLST} --output-dir {stereoMergeDir} --run-wise >{stereoMergeDir}/logs/merge_{night}_${{SLURM_JOB_ID}}.log\n" + ] + + rc + ) + + with open(f"{source}_StereoMerge_{night}.sh", "w") as f: + f.writelines(lines) + - - def main(): """ Here we read the config_general.yaml file and call the functions defined above. @@ -98,12 +113,10 @@ def main(): NSB_match = config["general"]["NSB_matching"] env_name = config["general"]["env_name"] - source_in = config["data_selection"]["source_name_database"] source = config["data_selection"]["source_name_output"] cluster = config["general"]["cluster"] - source_list = [] if source_in is None: source_list = joblib.load("list_sources.dat") @@ -111,15 +124,13 @@ def main(): else: source_list.append(source) for source_name in source_list: - + print("***** Merging DL1Stereo files run-wise...") MergeStereo(target_dir, env_name, source, NSB_match, cluster) list_of_merge = glob.glob(f"{source_name}_StereoMerge_*.sh") if len(list_of_merge) < 1: - print( - "Warning: no bash script has been produced" - ) + print("Warning: no bash script has been produced") continue launch_jobs = "" @@ -129,6 +140,5 @@ def main(): os.system(launch_jobs) - if __name__ == "__main__": main() diff --git a/setup.cfg b/setup.cfg index f2569cf4..edebaff4 100644 --- a/setup.cfg +++ b/setup.cfg @@ -97,13 +97,16 @@ console_scripts = merge_hdf_files = magicctapipe.scripts.lst1_magic.merge_hdf_files:main tune_magic_nsb = magicctapipe.scripts.lst1_magic.tune_magic_nsb:main coincident_events = magicctapipe.scripts.lst1_magic.semi_automatic_scripts.coincident_events:main + create_LST_table = magicctapipe.scripts.lst1_magic.semi_automatic_scripts.database_production.create_LST_table:main dl1_production = magicctapipe.scripts.lst1_magic.semi_automatic_scripts.dl1_production:main job_accounting = magicctapipe.scripts.lst1_magic.semi_automatic_scripts.job_accounting:main list_from_h5 = magicctapipe.scripts.lst1_magic.semi_automatic_scripts.list_from_h5:main lstchain_version = magicctapipe.scripts.lst1_magic.semi_automatic_scripts.database_production.lstchain_version:main - LSTnsb = magicctapipe.scripts.lst1_magic.semi_automatic_scripts.database_production.LSTnsb:main + LSTnsb = magicctapipe.scripts.lst1_magic.semi_automatic_scripts.database_production.LSTnsb:main + merge_stereo = magicctapipe.scripts.lst1_magic.semi_automatic_scripts.merge_stereo:main merging_runs = magicctapipe.scripts.lst1_magic.semi_automatic_scripts.merging_runs:main - nsb_level = magicctapipe.scripts.lst1_magic.semi_automatic_scripts.database_production.nsb_level:main + nsb_level = magicctapipe.scripts.lst1_magic.semi_automatic_scripts.database_production.nsb_level:main + nsb_to_h5 = magicctapipe.scripts.lst1_magic.semi_automatic_scripts.database_production.nsb_to_h5:main stereo_events = magicctapipe.scripts.lst1_magic.semi_automatic_scripts.stereo_events:main From a14b64fa235e6dde76cdea16915ec04b8d6aa77d Mon Sep 17 00:00:00 2001 From: Elisa-Visentin Date: Mon, 22 Jul 2024 12:42:13 +0000 Subject: [PATCH 163/236] bug --- .../semi_automatic_scripts/stereo_events.py | 20 ++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py index c2b31946..6ace26dc 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py @@ -266,15 +266,18 @@ def main(): bash_stereoMC(target_dir, part, env_name, cluster) list_of_stereo_scripts = np.sort(glob.glob("StereoEvents_MC_*.sh")) - launch_jobs = "" - # TODO: check on N. bash scripts + if len(list_of_stereo_scripts) < 1: + logger.warning("No bash script has been produced for processing MCs") + else: + launch_jobs = "" + # TODO: check on N. bash scripts - for n, run in enumerate(list_of_stereo_scripts): - launch_jobs += ( - " && " if n > 0 else "" - ) + f"{launch_jobs} && stereo{n}=$(sbatch --parsable {run})" + for n, run in enumerate(list_of_stereo_scripts): + launch_jobs += ( + " && " if n > 0 else "" + ) + f"stereo{n}=$(sbatch --parsable {run})" - os.system(launch_jobs) + os.system(launch_jobs) for source_name in source_list: if ( (args.analysis_type == "onlyMAGIC") @@ -303,11 +306,10 @@ def main(): logger.warning("No bash scripts for real data") continue launch_jobs = "" - for n, run in enumerate(list_of_stereo_scripts): launch_jobs += ( " && " if n > 0 else "" - ) + f"{launch_jobs} && stereo{n}=$(sbatch --parsable {run})" + ) + f"stereo{n}=$(sbatch --parsable {run})" os.system(launch_jobs) From 184a9d052aeba21d4edadf742d939acc5e54624a Mon Sep 17 00:00:00 2001 From: Elisa-Visentin Date: Tue, 23 Jul 2024 13:26:35 +0000 Subject: [PATCH 164/236] bug + minor fixes --- .../lst1_magic/semi_automatic_scripts/merging_runs.py | 6 +++--- .../lst1_magic/semi_automatic_scripts/stereo_events.py | 7 ++++--- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py index 1ea39928..8658ef97 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py @@ -239,7 +239,7 @@ def mergeMC(target_dir, identification, env_name, cluster): process_name = "merging_MC" MC_DL1_dir = f"{target_dir}/v{__version__}/MC/DL1" - os.makedirs(f"{MC_DL1_dir}/{identification}/Merged", exist_ok=True) + os.makedirs(f"{MC_DL1_dir}/{identification}/Merged/logs", exist_ok=True) if identification == "protons": list_of_nodes = np.sort(glob.glob(f"{MC_DL1_dir}/{identification}/train/node*")) @@ -264,12 +264,12 @@ def mergeMC(target_dir, identification, env_name, cluster): array=process_size, mem="7g", job_name=process_name, - out_name=f"{MC_DL1_dir}/{identification}/Merged/slurm-%x.%A_%a", + out_name=f"{MC_DL1_dir}/{identification}/Merged/logs/slurm-%x.%A_%a", ) lines_bash_file = slurm + [ f"SAMPLE_LIST=($(<{MC_DL1_dir}/{identification}/list_of_nodes.txt))\n", "SAMPLE=${SAMPLE_LIST[${SLURM_ARRAY_TASK_ID}]}\n", - f"export LOG={MC_DL1_dir}/{identification}/Merged" + f"export LOG={MC_DL1_dir}/{identification}/Merged/logs" + "/merged_${SLURM_ARRAY_JOB_ID}_${SLURM_ARRAY_TASK_ID}.log\n", f"conda run -n {env_name} merge_hdf_files --input-dir $SAMPLE --output-dir {MC_DL1_dir}/{identification}/Merged >$LOG 2>&1\n", ] diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py index 6ace26dc..8b71e67e 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py @@ -170,7 +170,7 @@ def bash_stereoMC(target_dir, identification, env_name, cluster): process_name = "stereo_MC" inputdir = f"{target_dir}/v{__version__}/MC/DL1/{identification}/Merged" - os.makedirs(f"{inputdir}/StereoMerged", exist_ok=True) + os.makedirs(f"{inputdir}/StereoMerged/logs", exist_ok=True) os.system( f"ls {inputdir}/dl1*.h5 > {inputdir}/list_coin.txt" @@ -186,7 +186,7 @@ def bash_stereoMC(target_dir, identification, env_name, cluster): slurm = slurm_lines( queue="xxl", job_name=f"{process_name}_stereo", - array=f"{process_size}%100", + array=process_size, mem="7g", out_name=f"{inputdir}/StereoMerged/logs/slurm-%x.%A_%a", ) @@ -195,7 +195,7 @@ def bash_stereoMC(target_dir, identification, env_name, cluster): f"export OUTPUTDIR={inputdir}/StereoMerged\n", "SAMPLE_LIST=($(<$INPUTDIR/list_coin.txt))\n", "SAMPLE=${SAMPLE_LIST[${SLURM_ARRAY_TASK_ID}]}\n", - "export LOG=$OUTPUTDIR/stereo_${SLURM_ARRAY_JOB_ID}_${SLURM_ARRAY_TASK_ID}.log\n", + "export LOG=$OUTPUTDIR/logs/stereo_${SLURM_ARRAY_JOB_ID}_${SLURM_ARRAY_TASK_ID}.log\n", f"conda run -n {env_name} lst1_magic_stereo_reco --input-file $SAMPLE --output-dir $OUTPUTDIR --config-file {target_dir}/v{__version__}/MC/config_stereo.yaml >$LOG 2>&1", ] f.writelines(lines) @@ -278,6 +278,7 @@ def main(): ) + f"stereo{n}=$(sbatch --parsable {run})" os.system(launch_jobs) + for source_name in source_list: if ( (args.analysis_type == "onlyMAGIC") From 67ab8049489a2e7f03a209f55d66a1b11dc8c113 Mon Sep 17 00:00:00 2001 From: Elisa-Visentin Date: Thu, 25 Jul 2024 11:49:27 +0000 Subject: [PATCH 165/236] bug fixes --- .../semi_automatic_scripts/merge_stereo.py | 25 ++++++++----------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merge_stereo.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merge_stereo.py index 821721e9..8814a85e 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merge_stereo.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merge_stereo.py @@ -27,7 +27,7 @@ logger.setLevel(logging.INFO) -def MergeStereo(target_dir, env_name, source, NSB_match, cluster): +def MergeStereo(target_dir, env_name, source, cluster): """ This function creates the bash scripts to run merge_hdf_files.py in all DL1Stereo subruns. @@ -39,18 +39,12 @@ def MergeStereo(target_dir, env_name, source, NSB_match, cluster): Name of the environment source : str Name of the target - NSB_match : bool - If real data are matched to pre-processed MCs or not cluster : str Cluster system """ process_name = source - if NSB_match: - stereo_DL1_dir = f"{target_dir}/v{__version__}/{source}" - else: - stereo_DL1_dir = f"{target_dir}/v{__version__}/{source}/DL1/Observations" - + stereo_DL1_dir = f"{target_dir}/v{__version__}/{source}" listOfNightsLST = np.sort(glob.glob(f"{stereo_DL1_dir}/DL1Stereo/*")) if cluster != "SLURM": logger.warning( @@ -61,9 +55,8 @@ def MergeStereo(target_dir, env_name, source, NSB_match, cluster): night = nightLST.split("/")[-1] stereoMergeDir = f"{stereo_DL1_dir}/DL1Stereo/{night}/Merged" os.makedirs(f"{stereoMergeDir}/logs", exist_ok=True) - if not os.listdir(f"{nightLST}"): - continue - if len(glob.glob("f{nightLST}/dl1_stereo*.h5")) < 1: + + if len(glob.glob(f"{nightLST}/dl1_stereo*.h5")) < 1: continue slurm = slurm_lines( @@ -110,7 +103,6 @@ def main(): target_dir = Path(config["directories"]["workspace_dir"]) - NSB_match = config["general"]["NSB_matching"] env_name = config["general"]["env_name"] source_in = config["data_selection"]["source_name_database"] @@ -118,15 +110,17 @@ def main(): cluster = config["general"]["cluster"] source_list = [] + if source_in is None: source_list = joblib.load("list_sources.dat") else: source_list.append(source) + for source_name in source_list: print("***** Merging DL1Stereo files run-wise...") - MergeStereo(target_dir, env_name, source, NSB_match, cluster) + MergeStereo(target_dir, env_name, source_name, cluster) list_of_merge = glob.glob(f"{source_name}_StereoMerge_*.sh") if len(list_of_merge) < 1: @@ -134,8 +128,11 @@ def main(): continue launch_jobs = "" + for n, run in enumerate(list_of_merge): - launch_jobs = f"{launch_jobs} && RES{n}=$(sbatch --parsable {run})" + launch_jobs += ( + " && " if n > 0 else "" + ) + f"merge{n}=$(sbatch --parsable {run})" os.system(launch_jobs) From a508c7fca93675d6956a7f2ff83bd1790094bf0b Mon Sep 17 00:00:00 2001 From: Elisa-Visentin Date: Fri, 26 Jul 2024 07:19:34 +0000 Subject: [PATCH 166/236] bug & minor fixes db --- .../database_production/create_LST_table.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/create_LST_table.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/create_LST_table.py index b305d52d..1c442184 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/create_LST_table.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/create_LST_table.py @@ -60,13 +60,13 @@ def main(): ) # TODO: put this file in a shared folder df = pd.concat([df, df2]).drop_duplicates(subset="LST1_run", keep="first") if args.begin != 0: - df = df[df["DATE"] >= args.begin] + df = df[df["DATE"].astype(int) >= args.begin] if args.end != 0: - df = df[df["DATE"] <= args.end] + df = df[df["DATE"].astype(int) <= args.end] - needed_cols = [ - "source", + needed_cols = [ "DATE", + "source", "LST1_run", "MAGIC_stereo", "MAGIC_trigger", @@ -80,8 +80,7 @@ def main(): df_cut = df_cut.assign(processed_lstchain_file="") df_cut = df_cut.assign(error_code_nsb=-1) - df_cut = df_cut.assign(error_code_coincidence=-1) - df_cut = df_cut.assign(error_code_stereo=-1) + if os.path.isfile(out_h5): df_old = pd.read_hdf( From 3eb3172d070b0c5ecb146507bfa6d0e9aa3ae148 Mon Sep 17 00:00:00 2001 From: Elisa-Visentin Date: Fri, 26 Jul 2024 12:33:25 +0000 Subject: [PATCH 167/236] job accounting + minor bug fixes --- .../semi_automatic_scripts/job_accounting.py | 26 +++++++++++++------ .../semi_automatic_scripts/merge_stereo.py | 2 +- .../semi_automatic_scripts/merging_runs.py | 6 ++--- 3 files changed, 22 insertions(+), 12 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/job_accounting.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/job_accounting.py index e4bc892b..b5dd6bdb 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/job_accounting.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/job_accounting.py @@ -96,11 +96,17 @@ def main(): source_out = "*" indir = f"{work_dir}/v{args.version}/{source_out}/{args.data_level}" - dirs = sorted( - glob.glob(f"{indir}/[0-9]*/[M0-9]*") - + glob.glob(f"{indir}/Merged_[0-9]*") - + glob.glob(f"{indir}/" + "[0-9]" * 8) - ) + + if args.data_level == 'MergedStereo': + dirs = sorted(glob.glob(f'{work_dir}/v{args.version}/{source_out}/DL1Stereo/[0-9]*/Merged')) + + else: + dirs = sorted( + glob.glob(f"{indir}/[0-9]*/[M0-9]*") + + glob.glob(f"{indir}/Merged_[0-9]*") + + glob.glob(f"{indir}/" + "[0-9]" * 8) + ) + if dirs == []: versions = [x.split("/v")[-1] for x in glob.glob(f"{work_dir}/v*")] print("Error, no directories found") @@ -108,7 +114,7 @@ def main(): print(f"Versions {versions}") print( - "Supported data types: DL1/M1, DL1/M2, DL1/Merged, DL1Coincident, DL1Stereo" + "Supported data types: DL1/M1, DL1/M2, DL1/Merged, DL1Coincident, DL1Stereo, MergedStereo" ) exit(1) @@ -126,8 +132,11 @@ def main(): total_time = 0 all_jobs = [] for dir in dirs: - this_date = re.sub(f".+/{args.data_level}/", "", dir) - this_date = re.sub(r"\D", "", this_date.split("/")[0]) + if args.data_level == 'MergedStereo': + this_date=dir.split('/')[-2] + else: + this_date = re.sub(f".+/{args.data_level}/", "", dir) + this_date = re.sub(r"\D", "", this_date.split("/")[0]) this_date = datetime.strptime(this_date, "%Y%m%d") if timerange and (this_date < timemin or this_date > timemax): continue @@ -135,6 +144,7 @@ def main(): print(dir) list_dl0 = "" ins = ["list_dl0.txt", "list_LST.txt", "list_coin.txt", "list_cal.txt"] + for file in ins: if os.path.exists(f"{dir}/logs/{file}"): list_dl0 = f"{dir}/logs/{file}" diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merge_stereo.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merge_stereo.py index 8814a85e..0487678b 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merge_stereo.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merge_stereo.py @@ -71,7 +71,7 @@ def MergeStereo(target_dir, env_name, source, cluster): lines = ( slurm + [ - f"conda run -n {env_name} merge_hdf_files --input-dir {nightLST} --output-dir {stereoMergeDir} --run-wise >{stereoMergeDir}/logs/merge_{night}_${{SLURM_JOB_ID}}.log\n" + f"conda run -n {env_name} merge_hdf_files --input-dir {nightLST} --output-dir {stereoMergeDir} --run-wise >{stereoMergeDir}/logs/merge_{night}_${{SLURM_JOB_ID}}.log 2>&1\n" ] + rc ) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py index 8658ef97..52c4ccc9 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py @@ -169,7 +169,7 @@ def merge(target_dir, identification, MAGIC_runs, env_name, source, NSB_match, c # f'find {indir} -type f -name "dl1_M{magic}.Run*.h5" -size -3k -delete' # ) f.write( - f"conda run -n {env_name} merge_hdf_files --input-dir {indir} --output-dir {outdir} >{outdir}/logs/merge_M{magic}_{i[0]}_{i[1]}_${{SLURM_JOB_ID}}.log\n" + f"conda run -n {env_name} merge_hdf_files --input-dir {indir} --output-dir {outdir} >{outdir}/logs/merge_M{magic}_{i[0]}_{i[1]}_${{SLURM_JOB_ID}}.log 2>&1\n" ) rc = rc_lines( store=f"{indir} ${{SLURM_JOB_ID}}", @@ -189,7 +189,7 @@ def merge(target_dir, identification, MAGIC_runs, env_name, source, NSB_match, c outdir = f"{MAGIC_DL1_dir}/Merged/{i[0]}/Merged" os.makedirs(f"{outdir}/logs", exist_ok=True) f.write( - f"conda run -n {env_name} merge_hdf_files --input-dir {indir} --output-dir {outdir} --run-wise >{outdir}/logs/merge_{i[0]}_{i[1]}_${{SLURM_JOB_ID}}.log\n" + f"conda run -n {env_name} merge_hdf_files --input-dir {indir} --output-dir {outdir} --run-wise >{outdir}/logs/merge_{i[0]}_{i[1]}_${{SLURM_JOB_ID}}.log 2>&1\n" ) rc = rc_lines( store=f"{indir} ${{SLURM_JOB_ID}}", out=f"{outdir}/logs/list" @@ -210,7 +210,7 @@ def merge(target_dir, identification, MAGIC_runs, env_name, source, NSB_match, c outdir = f"{MAGIC_DL1_dir}/Merged/Merged_{i}" os.makedirs(f"{outdir}/logs", exist_ok=True) f.write( - f"conda run -n {env_name} merge_hdf_files --input-dir {indir} --output-dir {outdir} >{outdir}/logs/merge_night_{i}_${{SLURM_JOB_ID}}.log\n" + f"conda run -n {env_name} merge_hdf_files --input-dir {indir} --output-dir {outdir} >{outdir}/logs/merge_night_{i}_${{SLURM_JOB_ID}}.log 2>&1\n" ) rc = rc_lines( store=f"{indir} ${{SLURM_JOB_ID}}", out=f"{outdir}/logs/list" From b7fd41402572452722cba35f982eebe9b79cf8f9 Mon Sep 17 00:00:00 2001 From: Elisa-Visentin Date: Mon, 29 Jul 2024 08:09:15 +0000 Subject: [PATCH 168/236] minor fixes --- .../database_production/LSTnsb.py | 4 +++- .../database_production/create_LST_table.py | 4 +--- .../database_production/nsb_to_h5.py | 17 -------------- .../semi_automatic_scripts/job_accounting.py | 22 +++++++++++-------- 4 files changed, 17 insertions(+), 30 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/LSTnsb.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/LSTnsb.py index d161337c..559e478f 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/LSTnsb.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/LSTnsb.py @@ -52,6 +52,7 @@ def nsb(run_list, simtel, lst_config, run_number, denominator): else: mod = len(run_list) // denominator failed = 0 + logger.info("NSB levels (sub-runs): \n") for ii in range(0, len(run_list)): subrun = run_list[ii].split(".")[-2] if mod == 0: @@ -149,7 +150,8 @@ def main(): ) return median_NSB = np.median(noise) - logger.info(f"Run n. {run_number}, nsb median {median_NSB}") + logger.info("\n\n") + logger.info(f"Run n. {run_number}, NSB median {median_NSB}") for j in range(0, len(nsb_list)): if (median_NSB < nsb_limit[j + 1]) & (median_NSB > nsb_limit[j]): diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/create_LST_table.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/create_LST_table.py index 1c442184..7ce63cd2 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/create_LST_table.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/create_LST_table.py @@ -64,7 +64,7 @@ def main(): if args.end != 0: df = df[df["DATE"].astype(int) <= args.end] - needed_cols = [ + needed_cols = [ "DATE", "source", "LST1_run", @@ -80,8 +80,6 @@ def main(): df_cut = df_cut.assign(processed_lstchain_file="") df_cut = df_cut.assign(error_code_nsb=-1) - - if os.path.isfile(out_h5): df_old = pd.read_hdf( out_h5, diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/nsb_to_h5.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/nsb_to_h5.py index df788cd6..cba3366d 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/nsb_to_h5.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/nsb_to_h5.py @@ -77,23 +77,6 @@ def main(): df_new.loc[df_new["nsb"].notna(), "error_code_nsb"] = "0" df_new.loc[df_new["nsb"] > 3.0, "error_code_nsb"] = "2" - df_new = df_new[ - [ - "source", - "DATE", - "LST1_run", - "MAGIC_stereo", - "MAGIC_trigger", - "MAGIC_HV", - "nsb", - "lstchain_versions", - "last_lstchain_file", - "processed_lstchain_file", - "error_code_nsb", - "error_code_coincidence", - "error_code_stereo", - ] - ] df_new.to_hdf( LST_h5, key=LST_key, diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/job_accounting.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/job_accounting.py index b5dd6bdb..e59f322a 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/job_accounting.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/job_accounting.py @@ -96,17 +96,21 @@ def main(): source_out = "*" indir = f"{work_dir}/v{args.version}/{source_out}/{args.data_level}" - - if args.data_level == 'MergedStereo': - dirs = sorted(glob.glob(f'{work_dir}/v{args.version}/{source_out}/DL1Stereo/[0-9]*/Merged')) - - else: + + if args.data_level == "MergedStereo": + dirs = sorted( + glob.glob( + f"{work_dir}/v{args.version}/{source_out}/DL1Stereo/[0-9]*/Merged" + ) + ) + + else: dirs = sorted( glob.glob(f"{indir}/[0-9]*/[M0-9]*") + glob.glob(f"{indir}/Merged_[0-9]*") + glob.glob(f"{indir}/" + "[0-9]" * 8) ) - + if dirs == []: versions = [x.split("/v")[-1] for x in glob.glob(f"{work_dir}/v*")] print("Error, no directories found") @@ -132,8 +136,8 @@ def main(): total_time = 0 all_jobs = [] for dir in dirs: - if args.data_level == 'MergedStereo': - this_date=dir.split('/')[-2] + if args.data_level == "MergedStereo": + this_date = dir.split("/")[-2] else: this_date = re.sub(f".+/{args.data_level}/", "", dir) this_date = re.sub(r"\D", "", this_date.split("/")[0]) @@ -144,7 +148,7 @@ def main(): print(dir) list_dl0 = "" ins = ["list_dl0.txt", "list_LST.txt", "list_coin.txt", "list_cal.txt"] - + for file in ins: if os.path.exists(f"{dir}/logs/{file}"): list_dl0 = f"{dir}/logs/{file}" From b295c9e4f5fb8e9bc18786f3f77747162a2e0281 Mon Sep 17 00:00:00 2001 From: Elisa-Visentin Date: Mon, 29 Jul 2024 12:26:35 +0000 Subject: [PATCH 169/236] doc --- README.rst | 4 ++-- magicctapipe/scripts/lst1_magic/README.md | 9 +++++---- .../semi_automatic_scripts/config_general.yaml | 2 +- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/README.rst b/README.rst index 75057f0e..8f22cfe2 100644 --- a/README.rst +++ b/README.rst @@ -48,8 +48,8 @@ The following command will set up a conda virtual environment, add the necessary git clone https://github.com/cta-observatory/magic-cta-pipe.git cd magic-cta-pipe - conda env create -n magic-lst1 -f environment.yml - conda activate magic-lst1 + conda env create -n magic-lst -f environment.yml + conda activate magic-lst pip install . In general, *magic-cta-pipe* is still in heavy development phase, so expect large changes between different releases. diff --git a/magicctapipe/scripts/lst1_magic/README.md b/magicctapipe/scripts/lst1_magic/README.md index 729432ee..d4a97831 100644 --- a/magicctapipe/scripts/lst1_magic/README.md +++ b/magicctapipe/scripts/lst1_magic/README.md @@ -65,8 +65,7 @@ data_selection: skip_LST_runs: [3216,3217] # LST runs to ignore. skip_MAGIC_runs: [5094658] # MAGIC runs to ignore. -general: - SimTel_version: "v1.4" +general: LST_version : "v0.10" # check the `processed_lstchain_file` version in the LST database! LST_tailcut : "tailcut84" focal_length : "effective" @@ -84,6 +83,8 @@ general: WARNING: Only the runs for which the `LST_version` parameter matches the `processed_lstchain_file` version in the LST database (i.e., the version used to evaluate the NSB level; generally the last available and processable version of a run) will be processed. +WARNING: `env_name` must be the same as the name of the environment in which you installed this version of the pipeline + Now that the configuration file is ready, let's create a list with all the MAGIC+LST1 runs for the time window (or list of nights) defined on the config_general.yaml file: > $ list_from_h5 -c config_general.yaml @@ -96,7 +97,7 @@ Finding LST runs... Source: XXX Finding MAGIC runs... ``` -And it will save the files TARGET_LST_runs.txt, TARGET_MAGIC_runs.txt, and list_sources.dat in your current working directory. In case no runs are found for MAGIC and/or LST (for a source and a given time range/list of dates), a warning will be printed and no output text file will be produced for the given source and telescope(s). +And it will save the files {TARGET}_LST_runs.txt, {TARGET}_MAGIC_runs.txt, and list_sources.dat (i.e., the list of all the sources found in the database according to user and default options) in your current working directory. In case no runs are found for MAGIC and/or LST (for a source and a given time range/list of dates), a warning will be printed and no output text file will be produced for the given source and telescope(s). At this point, we can convert the MAGIC data into DL1 format with the following command: > $ dl1_production -c config_general.yaml @@ -186,7 +187,7 @@ The folder [Notebooks](https://github.com/cta-observatory/magic-cta-pipe/tree/ma To create and update the MAGIC and LST databases (from the one produced by AB and FDP) you should use the scripts in `database_production` -- `create_lst_table`: creates the LST database (1 row per LST run) by dropping some columns from the parent one (AB, FDP) and adding columns for NSB value (-1 by default), lstchain available versions, most recent lstchain version, processed file and error codes (-1 by default). Launched as `python create_lst_table.py` +- `create_lst_table`: creates the LST database (1 row per LST run) by dropping some columns from the parent one (AB, FDP) and adding columns for NSB value (-1 by default), lstchain available versions, most recent lstchain version, processed file and NSB error codes (-1 by default). It could also be used to update the given database, possibly selecting a given time range from the parent databases (by the -b and -e parameters, which stand for begin and end date of the range). Launched as `python create_lst_table.py (-b YYYYMMDD -e YYYYMMDD)` - `lstchain_version`: this scripts loop over all the rows of the database, estract date and run number from the table and look for the data saved in the IT (i.e., which version of lstchain has been used to process a run). It evaluates all the versions used to process a run and the most recent MCP-compatible one according to a hard-coded, ordered list. Launched as `python lstchain_version.py` diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/config_general.yaml b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/config_general.yaml index 9a4bafef..c5acd9d7 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/config_general.yaml +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/config_general.yaml @@ -17,7 +17,7 @@ directories: data_selection: source_name_database: "CrabNebula" # MUST BE THE SAME AS IN THE DATABASE; Set to null to process all sources in the given time range. - source_name_output: 'Crabtest' # Name tag of your target. Used only if source_name_database != null. + source_name_output: 'Crabtest' # Name tag of your target. Used only if source_name_database != null. Set it to null if source_name_database = null target_RA_deg : 83.629 # RA in degrees; Set to null if source_name_database=null. target_Dec_deg: 22.015 # Dec in degrees; set to null if source_name_database=null. time_range : True # Search for all runs in a LST time range (e.g., 2020_01_01 -> 2022_01_01). From f7b394ee80cf2d5121e07be1602de0164d5797ed Mon Sep 17 00:00:00 2001 From: joannaWojS <167758973+joannaWojS@users.noreply.github.com> Date: Mon, 29 Jul 2024 17:44:10 +0200 Subject: [PATCH 170/236] Update update_MAGIC_database.py In the function 'create_new_database', data types were added for each column. --- .../database_production/update_MAGIC_database.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/update_MAGIC_database.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/update_MAGIC_database.py index 0317e207..0f7aadc9 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/update_MAGIC_database.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/update_MAGIC_database.py @@ -254,10 +254,10 @@ def create_new_database(df, date_min, date_max, tel_id): if count_with_run_id != 0: new_rows.append( { - "DATE": date, - "source": source, - "Run ID": run_id, - "number of subruns": count_with_run_id, + "DATE": int(date), + "source": str(source), + "Run ID": int(run_id), + "number of subruns": int(count_with_run_id), } ) From b13deeccff32b01095d020ba5b871608cbe945b8 Mon Sep 17 00:00:00 2001 From: Julian Sitarek Date: Tue, 30 Jul 2024 08:44:13 +0000 Subject: [PATCH 171/236] fixed wrong indent --- .../lst1_magic/semi_automatic_scripts/job_accounting.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/job_accounting.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/job_accounting.py index d654bf19..7586a3ce 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/job_accounting.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/job_accounting.py @@ -309,9 +309,9 @@ def main(): isgood = np.logical_and(good_subruns == all_subruns, good_subruns > 0) h5runs.loc[idx, rc_col + "_all"] = isgood - # fixme: for DL1/M[12] files since htere are two dataframes in the file, we need to append it - # and this causes increase in the file size every time the file is updated - h5runs.to_hdf(args.run_list, key=h5key, mode="r+") + # fixme: for DL1/M[12] files since htere are two dataframes in the file, we need to append it + # and this causes increase in the file size every time the file is updated + h5runs.to_hdf(args.run_list, key=h5key, mode="r+") if __name__ == "__main__": From 6c4664dac92777f95f1ffe9382df990039483337 Mon Sep 17 00:00:00 2001 From: Elisa-Visentin Date: Tue, 30 Jul 2024 14:35:10 +0000 Subject: [PATCH 172/236] removed unused lines (ra/dec) --- .../lst1_magic/semi_automatic_scripts/config_general.yaml | 2 -- 1 file changed, 2 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/config_general.yaml b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/config_general.yaml index 9a4bafef..00380cfc 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/config_general.yaml +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/config_general.yaml @@ -18,8 +18,6 @@ directories: data_selection: source_name_database: "CrabNebula" # MUST BE THE SAME AS IN THE DATABASE; Set to null to process all sources in the given time range. source_name_output: 'Crabtest' # Name tag of your target. Used only if source_name_database != null. - target_RA_deg : 83.629 # RA in degrees; Set to null if source_name_database=null. - target_Dec_deg: 22.015 # Dec in degrees; set to null if source_name_database=null. time_range : True # Search for all runs in a LST time range (e.g., 2020_01_01 -> 2022_01_01). min : "2023_11_17" max : "2024_03_03" From ea6c73ecf1ee677cf6dff4f814c33195fa08e822 Mon Sep 17 00:00:00 2001 From: Elisa-Visentin Date: Tue, 30 Jul 2024 14:35:28 +0000 Subject: [PATCH 173/236] removed comment --- .../lst1_magic/semi_automatic_scripts/dl1_production.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py index 6505ed33..d3ff40ef 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py @@ -518,9 +518,7 @@ def main(): print( f"To check the jobs submitted to the cluster, type: squeue -n {source_name}" ) - print( - "This process will take about 10 min to run if the IT cluster is free." - ) + directories_generator_real( str(target_dir), telescope_ids, MAGIC_runs, NSB_match, source_name From 846c2bbf64d735fcf4b21327d66233830c4c1492 Mon Sep 17 00:00:00 2001 From: Elisa-Visentin Date: Tue, 30 Jul 2024 14:36:18 +0000 Subject: [PATCH 174/236] stereo merging changes --- .../semi_automatic_scripts/job_accounting.py | 28 ++++++------------- .../semi_automatic_scripts/merge_stereo.py | 4 +-- 2 files changed, 11 insertions(+), 21 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/job_accounting.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/job_accounting.py index e59f322a..b2a68cf8 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/job_accounting.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/job_accounting.py @@ -97,19 +97,11 @@ def main(): indir = f"{work_dir}/v{args.version}/{source_out}/{args.data_level}" - if args.data_level == "MergedStereo": - dirs = sorted( - glob.glob( - f"{work_dir}/v{args.version}/{source_out}/DL1Stereo/[0-9]*/Merged" - ) - ) - - else: - dirs = sorted( - glob.glob(f"{indir}/[0-9]*/[M0-9]*") - + glob.glob(f"{indir}/Merged_[0-9]*") - + glob.glob(f"{indir}/" + "[0-9]" * 8) - ) + + dirs = sorted( + glob.glob(f"{indir}/[0-9]*/[M0-9]*") + + glob.glob(f"{indir}/" + "[0-9]" * 8) + ) if dirs == []: versions = [x.split("/v")[-1] for x in glob.glob(f"{work_dir}/v*")] @@ -118,7 +110,7 @@ def main(): print(f"Versions {versions}") print( - "Supported data types: DL1/M1, DL1/M2, DL1/Merged, DL1Coincident, DL1Stereo, MergedStereo" + "Supported data types: DL1/M1, DL1/M2, DL1/Merged, DL1Coincident, DL1Stereo, DL1Stereo/Merged" ) exit(1) @@ -136,11 +128,9 @@ def main(): total_time = 0 all_jobs = [] for dir in dirs: - if args.data_level == "MergedStereo": - this_date = dir.split("/")[-2] - else: - this_date = re.sub(f".+/{args.data_level}/", "", dir) - this_date = re.sub(r"\D", "", this_date.split("/")[0]) + + this_date = re.sub(f".+/{args.data_level}/", "", dir) + this_date = re.sub(r"\D", "", this_date.split("/")[0]) this_date = datetime.strptime(this_date, "%Y%m%d") if timerange and (this_date < timemin or this_date > timemax): continue diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merge_stereo.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merge_stereo.py index 0487678b..dcc7579d 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merge_stereo.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merge_stereo.py @@ -53,7 +53,7 @@ def MergeStereo(target_dir, env_name, source, cluster): return for nightLST in listOfNightsLST: night = nightLST.split("/")[-1] - stereoMergeDir = f"{stereo_DL1_dir}/DL1Stereo/{night}/Merged" + stereoMergeDir = f"{stereo_DL1_dir}/DL1Stereo/Merged/{night}" os.makedirs(f"{stereoMergeDir}/logs", exist_ok=True) if len(glob.glob(f"{nightLST}/dl1_stereo*.h5")) < 1: @@ -132,7 +132,7 @@ def main(): for n, run in enumerate(list_of_merge): launch_jobs += ( " && " if n > 0 else "" - ) + f"merge{n}=$(sbatch --parsable {run})" + ) + f"sbatch {run}" os.system(launch_jobs) From 7edf5a44141cb45d5d06744812aa365350dd183e Mon Sep 17 00:00:00 2001 From: Elisa-Visentin Date: Tue, 30 Jul 2024 14:37:05 +0000 Subject: [PATCH 175/236] removed run-> night MAGIC merge --- .../semi_automatic_scripts/merging_runs.py | 33 +++---------------- 1 file changed, 4 insertions(+), 29 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py index 52c4ccc9..bec0cfdb 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py @@ -180,7 +180,7 @@ def merge(target_dir, identification, MAGIC_runs, env_name, source, NSB_match, c else: logger.error(f"{indir} does not exist") - elif identification == "1_M1M2": + else: for i in MAGIC_runs: if os.path.exists(f"{MAGIC_DL1_dir}/M1/{i[0]}/{i[1]}") & os.path.exists( f"{MAGIC_DL1_dir}/M2/{i[0]}/{i[1]}" @@ -200,23 +200,7 @@ def merge(target_dir, identification, MAGIC_runs, env_name, source, NSB_match, c logger.error( f"{MAGIC_DL1_dir}/M1/{i[0]}/{i[1]} or {MAGIC_DL1_dir}/M2/{i[0]}/{i[1]} does not exist" ) - else: - dates = np.unique(MAGIC_runs.T[0]) - for i in dates: - if not os.path.exists(f"{MAGIC_DL1_dir}/Merged/{i}/Merged"): - continue - - indir = f"{MAGIC_DL1_dir}/Merged/{i}/Merged" - outdir = f"{MAGIC_DL1_dir}/Merged/Merged_{i}" - os.makedirs(f"{outdir}/logs", exist_ok=True) - f.write( - f"conda run -n {env_name} merge_hdf_files --input-dir {indir} --output-dir {outdir} >{outdir}/logs/merge_night_{i}_${{SLURM_JOB_ID}}.log 2>&1\n" - ) - rc = rc_lines( - store=f"{indir} ${{SLURM_JOB_ID}}", out=f"{outdir}/logs/list" - ) - f.writelines(rc) - os.system(f"echo {indir} >> {outdir}/logs/list_dl0.txt") + def mergeMC(target_dir, identification, env_name, cluster): @@ -358,7 +342,7 @@ def main(): for n, run in enumerate(list_of_merging_scripts): launch_jobs += ( " && " if n > 0 else "" - ) + f"merging{n}=$(sbatch --parsable {run})" + ) + f"sbatch {run}" os.system(launch_jobs) @@ -394,16 +378,7 @@ def main(): NSB_match, cluster, ) # generating the bash script to merge the M1 and M2 runs - merge( - target_dir, - "2_nights", - MAGIC_runs, - env_name, - source_name, - NSB_match, - cluster, - ) # generating the bash script to merge all runs per night - + print("***** Running merge_hdf_files.py on the MAGIC data files...") # Below we run the bash scripts to merge the MAGIC files From 759fe9805bce43d271ac8184818ca22210409ec7 Mon Sep 17 00:00:00 2001 From: Elisa-Visentin Date: Tue, 30 Jul 2024 14:37:28 +0000 Subject: [PATCH 176/236] new magic merging directories --- .../lst1_magic/semi_automatic_scripts/coincident_events.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py index 7a1f24ad..3fb0a5ae 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py @@ -102,7 +102,7 @@ def linking_bash_lst( MAGIC_DL1_dir = f"{target_dir}/v{__version__}/{source_name}/DL1" - dates = [os.path.basename(x) for x in glob.glob(f"{MAGIC_DL1_dir}/Merged/Merged_*")] + dates = [os.path.basename(x) for x in glob.glob(f"{MAGIC_DL1_dir}/Merged/[0-9]*")] if cluster != "SLURM": logger.warning( "Automatic processing not implemented for the cluster indicated in the config file" @@ -249,7 +249,7 @@ def main(): for n, run in enumerate(list_of_coincidence_scripts): launch_jobs += ( " && " if n > 0 else "" - ) + f"coincidence{n}=$(sbatch --parsable {run})" + ) + f"sbatch {run}" os.system(launch_jobs) From fd11d796bdff4d5882b81c6d367d63b6b081a77a Mon Sep 17 00:00:00 2001 From: Elisa-Visentin Date: Tue, 30 Jul 2024 14:38:09 +0000 Subject: [PATCH 177/236] removed unused vars in sbatch --- .../semi_automatic_scripts/database_production/nsb_level.py | 2 +- .../lst1_magic/semi_automatic_scripts/stereo_events.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/nsb_level.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/nsb_level.py index be292433..b4418dca 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/nsb_level.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/nsb_level.py @@ -187,7 +187,7 @@ def main(): launch_jobs = "" for n, run in enumerate(list_of_bash_scripts): - launch_jobs += (" && " if n > 0 else "") + f"nsb{n}=$(sbatch --parsable {run})" + launch_jobs += (" && " if n > 0 else "") + f"sbatch {run}" os.system(launch_jobs) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py index 8b71e67e..316ae159 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py @@ -275,7 +275,7 @@ def main(): for n, run in enumerate(list_of_stereo_scripts): launch_jobs += ( " && " if n > 0 else "" - ) + f"stereo{n}=$(sbatch --parsable {run})" + ) + f"sbatch {run}" os.system(launch_jobs) @@ -310,7 +310,7 @@ def main(): for n, run in enumerate(list_of_stereo_scripts): launch_jobs += ( " && " if n > 0 else "" - ) + f"stereo{n}=$(sbatch --parsable {run})" + ) + f"sbatch {run}" os.system(launch_jobs) From 3a2c01189198538d299436e55a76d221c5294051 Mon Sep 17 00:00:00 2001 From: Julian Sitarek Date: Tue, 30 Jul 2024 19:48:06 +0200 Subject: [PATCH 178/236] fixing formating --- .../semi_automatic_scripts/coincident_events.py | 4 +--- .../lst1_magic/semi_automatic_scripts/job_accounting.py | 4 +--- .../lst1_magic/semi_automatic_scripts/merge_stereo.py | 4 +--- .../lst1_magic/semi_automatic_scripts/merging_runs.py | 7 ++----- .../lst1_magic/semi_automatic_scripts/stereo_events.py | 8 ++------ 5 files changed, 7 insertions(+), 20 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py index 3fb0a5ae..1513d8c1 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py @@ -247,9 +247,7 @@ def main(): continue launch_jobs = "" for n, run in enumerate(list_of_coincidence_scripts): - launch_jobs += ( - " && " if n > 0 else "" - ) + f"sbatch {run}" + launch_jobs += (" && " if n > 0 else "") + f"sbatch {run}" os.system(launch_jobs) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/job_accounting.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/job_accounting.py index 41008cbf..a6ecd7f2 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/job_accounting.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/job_accounting.py @@ -136,10 +136,8 @@ def main(): indir = f"{work_dir}/v{args.version}/{source_out}/{args.data_level}" - dirs = sorted( - glob.glob(f"{indir}/[0-9]*/[M0-9]*") - + glob.glob(f"{indir}/" + "[0-9]" * 8) + glob.glob(f"{indir}/[0-9]*/[M0-9]*") + glob.glob(f"{indir}/" + "[0-9]" * 8) ) if dirs == []: diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merge_stereo.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merge_stereo.py index dcc7579d..82ac0bed 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merge_stereo.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merge_stereo.py @@ -130,9 +130,7 @@ def main(): launch_jobs = "" for n, run in enumerate(list_of_merge): - launch_jobs += ( - " && " if n > 0 else "" - ) + f"sbatch {run}" + launch_jobs += (" && " if n > 0 else "") + f"sbatch {run}" os.system(launch_jobs) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py index bec0cfdb..b49dc85e 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py @@ -200,7 +200,6 @@ def merge(target_dir, identification, MAGIC_runs, env_name, source, NSB_match, c logger.error( f"{MAGIC_DL1_dir}/M1/{i[0]}/{i[1]} or {MAGIC_DL1_dir}/M2/{i[0]}/{i[1]} does not exist" ) - def mergeMC(target_dir, identification, env_name, cluster): @@ -340,9 +339,7 @@ def main(): else: launch_jobs = "" for n, run in enumerate(list_of_merging_scripts): - launch_jobs += ( - " && " if n > 0 else "" - ) + f"sbatch {run}" + launch_jobs += (" && " if n > 0 else "") + f"sbatch {run}" os.system(launch_jobs) @@ -378,7 +375,7 @@ def main(): NSB_match, cluster, ) # generating the bash script to merge the M1 and M2 runs - + print("***** Running merge_hdf_files.py on the MAGIC data files...") # Below we run the bash scripts to merge the MAGIC files diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py index 316ae159..f7379f91 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py @@ -273,9 +273,7 @@ def main(): # TODO: check on N. bash scripts for n, run in enumerate(list_of_stereo_scripts): - launch_jobs += ( - " && " if n > 0 else "" - ) + f"sbatch {run}" + launch_jobs += (" && " if n > 0 else "") + f"sbatch {run}" os.system(launch_jobs) @@ -308,9 +306,7 @@ def main(): continue launch_jobs = "" for n, run in enumerate(list_of_stereo_scripts): - launch_jobs += ( - " && " if n > 0 else "" - ) + f"sbatch {run}" + launch_jobs += (" && " if n > 0 else "") + f"sbatch {run}" os.system(launch_jobs) From db40f9d41966003c9e6d0391bee0f7ddce2d1ec9 Mon Sep 17 00:00:00 2001 From: Julian Sitarek Date: Tue, 30 Jul 2024 19:49:57 +0200 Subject: [PATCH 179/236] fixing formating --- .../scripts/lst1_magic/semi_automatic_scripts/dl1_production.py | 1 - 1 file changed, 1 deletion(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py index d3ff40ef..8f3938be 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py @@ -518,7 +518,6 @@ def main(): print( f"To check the jobs submitted to the cluster, type: squeue -n {source_name}" ) - directories_generator_real( str(target_dir), telescope_ids, MAGIC_runs, NSB_match, source_name From d2e2b81324743aa45934ebfc0a9f23ef14e3d290 Mon Sep 17 00:00:00 2001 From: Elisa-Visentin Date: Wed, 31 Jul 2024 09:40:45 +0000 Subject: [PATCH 180/236] fix bug --- .../lst1_magic/semi_automatic_scripts/coincident_events.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py index 1513d8c1..45e55cf5 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py @@ -109,7 +109,7 @@ def linking_bash_lst( ) return for d in dates: - Y_M, M_M, D_M = [int(x) for x in d.split("_")[1:]] + Y_M, M_M, D_M = [int(x) for x in d.split("_")[:]] day_MAGIC = dtdt(Y_M, M_M, D_M) @@ -156,7 +156,7 @@ def linking_bash_lst( lines = ( slurm + [ - f"export INM={MAGIC_DL1_dir}/Merged/{d}\n", + f"export INM={MAGIC_DL1_dir}/Merged/{d}/Merged\n", f"export OUTPUTDIR={outputdir}\n", "SAMPLE_LIST=($(<$OUTPUTDIR/logs/list_LST.txt))\n", "SAMPLE=${SAMPLE_LIST[${SLURM_ARRAY_TASK_ID}]}\n", From 79233816f52dd81a47116bb22219b64311a166bb Mon Sep 17 00:00:00 2001 From: Elisa-Visentin <121040436+Elisa-Visentin@users.noreply.github.com> Date: Wed, 31 Jul 2024 15:04:08 +0200 Subject: [PATCH 181/236] Update merge_stereo.py --- .../scripts/lst1_magic/semi_automatic_scripts/merge_stereo.py | 1 + 1 file changed, 1 insertion(+) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merge_stereo.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merge_stereo.py index 82ac0bed..ed726b53 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merge_stereo.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merge_stereo.py @@ -62,6 +62,7 @@ def MergeStereo(target_dir, env_name, source, cluster): slurm = slurm_lines( queue="short", job_name=f"{process_name}_stereo_merge", + mem="2g", out_name=f"{stereoMergeDir}/logs/slurm-%x.%A_%a", ) rc = rc_lines( From 93eb6a0ad5b9a3a17e7cde839e1bbe5692eb3506 Mon Sep 17 00:00:00 2001 From: Elisa-Visentin Date: Thu, 1 Aug 2024 13:26:15 +0000 Subject: [PATCH 182/236] upgrade LST db w/ keeping error codes --- .../database_production/create_LST_table.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/create_LST_table.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/create_LST_table.py index 7ce63cd2..84ef6178 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/create_LST_table.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/create_LST_table.py @@ -90,7 +90,15 @@ def main(): ) df_cut = df_cut.sort_values(by=["DATE", "source"]) # TODO check if fine with update and nsb + df_cut = df_cut.reset_index(drop=True) + df_cols=df_cut.columns.tolist() + for col in df_cols: + if '_rc_all' in col: + df_cut[col]=df_cut[col].fillna(False) + elif '_rc' in col: + df_cut[col]=df_cut[col].fillna('{}') + df_cut.to_hdf( out_h5, key=out_key, From 5fbf2847d89cac72c293866214f7d4f59c0c5060 Mon Sep 17 00:00:00 2001 From: Elisa-Visentin Date: Thu, 1 Aug 2024 13:43:47 +0000 Subject: [PATCH 183/236] remove merging MAGIC steps --- .../coincident_events.py | 2 +- .../database_production/create_LST_table.py | 12 ++-- .../semi_automatic_scripts/job_accounting.py | 4 +- .../semi_automatic_scripts/merging_runs.py | 67 ++++--------------- 4 files changed, 22 insertions(+), 63 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py index 45e55cf5..58b44f46 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py @@ -156,7 +156,7 @@ def linking_bash_lst( lines = ( slurm + [ - f"export INM={MAGIC_DL1_dir}/Merged/{d}/Merged\n", + f"export INM={MAGIC_DL1_dir}/Merged/{d}\n", f"export OUTPUTDIR={outputdir}\n", "SAMPLE_LIST=($(<$OUTPUTDIR/logs/list_LST.txt))\n", "SAMPLE=${SAMPLE_LIST[${SLURM_ARRAY_TASK_ID}]}\n", diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/create_LST_table.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/create_LST_table.py index 84ef6178..8393c11c 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/create_LST_table.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/create_LST_table.py @@ -92,13 +92,13 @@ def main(): # TODO check if fine with update and nsb df_cut = df_cut.reset_index(drop=True) - df_cols=df_cut.columns.tolist() + df_cols = df_cut.columns.tolist() for col in df_cols: - if '_rc_all' in col: - df_cut[col]=df_cut[col].fillna(False) - elif '_rc' in col: - df_cut[col]=df_cut[col].fillna('{}') - + if "_rc_all" in col: + df_cut[col] = df_cut[col].fillna(False) + elif "_rc" in col: + df_cut[col] = df_cut[col].fillna("{}") + df_cut.to_hdf( out_h5, key=out_key, diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/job_accounting.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/job_accounting.py index a6ecd7f2..a782c2bb 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/job_accounting.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/job_accounting.py @@ -136,9 +136,7 @@ def main(): indir = f"{work_dir}/v{args.version}/{source_out}/{args.data_level}" - dirs = sorted( - glob.glob(f"{indir}/[0-9]*/[M0-9]*") + glob.glob(f"{indir}/" + "[0-9]" * 8) - ) + dirs = sorted(glob.glob(f"{indir}/[0-9]*")) if dirs == []: versions = [x.split("/v")[-1] for x in glob.glob(f"{work_dir}/v*")] diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py index b49dc85e..bc68ce16 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py @@ -115,7 +115,7 @@ def split_train_test(target_dir, train_fraction): os.system(f"rm -r {list_of_dir[directory]}") -def merge(target_dir, identification, MAGIC_runs, env_name, source, NSB_match, cluster): +def merge(target_dir, MAGIC_runs, env_name, source, NSB_match, cluster): """ This function creates the bash scripts to run merge_hdf_files.py in all MAGIC subruns. @@ -124,8 +124,6 @@ def merge(target_dir, identification, MAGIC_runs, env_name, source, NSB_match, c ---------- target_dir : str Path to the working directory - identification : str - Tells which batch to create. Options: subruns, M1M2, nights MAGIC_runs : matrix of strings This matrix is imported from config_general.yaml and tells the function where to find the data and where to put the merged files env_name : str @@ -155,51 +153,27 @@ def merge(target_dir, identification, MAGIC_runs, env_name, source, NSB_match, c ) os.makedirs(f"{MAGIC_DL1_dir}/Merged/logs", exist_ok=True) - with open(f"{source}_Merge_MAGIC_{identification}.sh", "w") as f: + with open(f"{source}_Merge_MAGIC.sh", "w") as f: f.writelines(lines) - if identification == "0_subruns": - for magic in [1, 2]: - for i in MAGIC_runs: - # Here is a difference w.r.t. original code. If only one telescope data are available they will be merged now for this telescope - indir = f"{MAGIC_DL1_dir}/M{magic}/{i[0]}/{i[1]}" - if os.path.exists(f"{indir}"): - outdir = f"{MAGIC_DL1_dir}/Merged/{i[0]}/{i[1]}" - os.makedirs(f"{outdir}/logs", exist_ok=True) - # os.system( - # f'find {indir} -type f -name "dl1_M{magic}.Run*.h5" -size -3k -delete' - # ) - f.write( - f"conda run -n {env_name} merge_hdf_files --input-dir {indir} --output-dir {outdir} >{outdir}/logs/merge_M{magic}_{i[0]}_{i[1]}_${{SLURM_JOB_ID}}.log 2>&1\n" - ) - rc = rc_lines( - store=f"{indir} ${{SLURM_JOB_ID}}", - out=f"{outdir}/logs/list", - ) - f.writelines(rc) - os.system(f"echo {indir} >> {outdir}/logs/list_dl0.txt") - else: - logger.error(f"{indir} does not exist") - - else: + for magic in [1, 2]: for i in MAGIC_runs: - if os.path.exists(f"{MAGIC_DL1_dir}/M1/{i[0]}/{i[1]}") & os.path.exists( - f"{MAGIC_DL1_dir}/M2/{i[0]}/{i[1]}" - ): - indir = f"{MAGIC_DL1_dir}/Merged/{i[0]}/{i[1]}" - outdir = f"{MAGIC_DL1_dir}/Merged/{i[0]}/Merged" + # Here is a difference w.r.t. original code. If only one telescope data are available they will be merged now for this telescope + indir = f"{MAGIC_DL1_dir}/M{magic}/{i[0]}/{i[1]}" + if os.path.exists(f"{indir}"): + outdir = f"{MAGIC_DL1_dir}/Merged/{i[0]}" os.makedirs(f"{outdir}/logs", exist_ok=True) + f.write( - f"conda run -n {env_name} merge_hdf_files --input-dir {indir} --output-dir {outdir} --run-wise >{outdir}/logs/merge_{i[0]}_{i[1]}_${{SLURM_JOB_ID}}.log 2>&1\n" + f"conda run -n {env_name} merge_hdf_files --input-dir {indir} --output-dir {outdir} >{outdir}/logs/merge_M{magic}_{i[0]}_{i[1]}_${{SLURM_JOB_ID}}.log 2>&1\n" ) rc = rc_lines( - store=f"{indir} ${{SLURM_JOB_ID}}", out=f"{outdir}/logs/list" + store=f"{indir} ${{SLURM_JOB_ID}}", + out=f"{outdir}/logs/list", ) f.writelines(rc) os.system(f"echo {indir} >> {outdir}/logs/list_dl0.txt") else: - logger.error( - f"{MAGIC_DL1_dir}/M1/{i[0]}/{i[1]} or {MAGIC_DL1_dir}/M2/{i[0]}/{i[1]} does not exist" - ) + logger.error(f"{indir} does not exist") def mergeMC(target_dir, identification, env_name, cluster): @@ -359,38 +333,25 @@ def main(): print("***** Generating merge_MAGIC bashscripts...") merge( target_dir, - "0_subruns", MAGIC_runs, env_name, source_name, NSB_match, cluster, ) # generating the bash script to merge the subruns - merge( - target_dir, - "1_M1M2", - MAGIC_runs, - env_name, - source_name, - NSB_match, - cluster, - ) # generating the bash script to merge the M1 and M2 runs print("***** Running merge_hdf_files.py on the MAGIC data files...") # Below we run the bash scripts to merge the MAGIC files list_of_merging_scripts = np.sort( - glob.glob(f"{source_name}_Merge_MAGIC_*.sh") + glob.glob(f"{source_name}_Merge_MAGIC*.sh") ) if len(list_of_merging_scripts) < 1: logger.warning("No bash scripts for real data") continue launch_jobs = "" for n, run in enumerate(list_of_merging_scripts): - launch_jobs += ( - (" && " if n > 0 else "") - + f"merging{n}=$(sbatch --parsable --dependency=afterany:$merging{n-1} {run})" - ) + launch_jobs += (" && " if n > 0 else "") + f"sbatch {run}" os.system(launch_jobs) From 9c8bc931be925d949c25dbd5569fb935444afd3b Mon Sep 17 00:00:00 2001 From: Elisa-Visentin Date: Fri, 2 Aug 2024 08:06:35 +0000 Subject: [PATCH 184/236] fix bug --- .../lst1_magic/semi_automatic_scripts/job_accounting.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/job_accounting.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/job_accounting.py index a782c2bb..9e7fc694 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/job_accounting.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/job_accounting.py @@ -136,7 +136,9 @@ def main(): indir = f"{work_dir}/v{args.version}/{source_out}/{args.data_level}" - dirs = sorted(glob.glob(f"{indir}/[0-9]*")) + dirs = sorted( + glob.glob(f"{indir}/[0-9]*/[0-9]*") + glob.glob(f"{indir}/[0-9]*") + ) if dirs == []: versions = [x.split("/v")[-1] for x in glob.glob(f"{work_dir}/v*")] From f2cf8e74994e79143312d51d1f0f13d42b955bb3 Mon Sep 17 00:00:00 2001 From: Elisa-Visentin Date: Fri, 2 Aug 2024 09:15:57 +0000 Subject: [PATCH 185/236] fix glob --- .../lst1_magic/semi_automatic_scripts/job_accounting.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/job_accounting.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/job_accounting.py index 9e7fc694..79064221 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/job_accounting.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/job_accounting.py @@ -136,9 +136,9 @@ def main(): indir = f"{work_dir}/v{args.version}/{source_out}/{args.data_level}" - dirs = sorted( - glob.glob(f"{indir}/[0-9]*/[0-9]*") + glob.glob(f"{indir}/[0-9]*") - ) + dirs = [x.replace('/logs', '') for x in (sorted( + glob.glob(f"{indir}/[0-9]*/[0-9]*/logs") + glob.glob(f"{indir}/[0-9]*/logs")) + )] if dirs == []: versions = [x.split("/v")[-1] for x in glob.glob(f"{work_dir}/v*")] From cbfe1e1d096e837b52013be7247ea0715f865979 Mon Sep 17 00:00:00 2001 From: Elisa-Visentin Date: Fri, 2 Aug 2024 09:37:05 +0000 Subject: [PATCH 186/236] doc --- .../database_production/LSTnsb.py | 11 ++++++++--- .../database_production/create_LST_table.py | 8 +++++--- .../database_production/lstchain_version.py | 9 +++++++-- .../database_production/nsb_level.py | 7 +++++-- .../database_production/nsb_to_h5.py | 9 ++++++--- 5 files changed, 31 insertions(+), 13 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/LSTnsb.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/LSTnsb.py index 559e478f..70431e32 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/LSTnsb.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/LSTnsb.py @@ -1,5 +1,10 @@ """ -Evaluates NSB level for a LST run +Evaluates NSB level for a LST run (as a median over the NSB values for a subset of subruns) + +One txt file per run is created here: its content is a (date,run,NSB) n-tuple and its title contain an information about the NSB-bin to which the run belongs (according to the list of NSB values provided in the config file) + +Usage: +$ LSTnsb (-c MCP_config) -i run -d date -l lstchain_config (-s N_subruns) """ import argparse import glob @@ -19,7 +24,7 @@ def nsb(run_list, simtel, lst_config, run_number, denominator): """ - Here we compute the NSB value for a run based on a subset of subruns. + Here we compute the NSB value for a run based on a subset of its subruns Parameters ---------- @@ -140,7 +145,7 @@ def main(): logger.info(f"Run {run_number} already processed") return - # date_lst = date.split("_")[0] + date.split("_")[1] + date.split("_")[2] + inputdir = f"/fefs/aswg/data/real/DL1/{date}/{lst_version}/{lst_tailcut}" run_list = np.sort(glob.glob(f"{inputdir}/dl1*Run*{run_number}.*.h5")) noise = nsb(run_list, simtel, lst_config, run_number, denominator) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/create_LST_table.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/create_LST_table.py index 8393c11c..6636c086 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/create_LST_table.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/create_LST_table.py @@ -1,7 +1,10 @@ """ -Create a new h5 table from the one of joint observations. +Create a new h5 table (or upgrades an existing database by adding data collected in the time range defined by the provided begin and end dates) from the one of joint observations. -Only the columns needed to produce the lists of LST runs to be processed are preserved, and two columns are added to store NSB level and error codes +Only the columns needed to produce the lists of LST runs to be processed are preserved, and columns are added to store NSB level (and related error code) and lstchain versions (available, last and processed) + +Usage: +$ create_LST_table (-b YYYYMMDD -e YYYYMMDD) """ import argparse @@ -89,7 +92,6 @@ def main(): subset="LST1_run", keep="first" ) df_cut = df_cut.sort_values(by=["DATE", "source"]) - # TODO check if fine with update and nsb df_cut = df_cut.reset_index(drop=True) df_cols = df_cut.columns.tolist() diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/lstchain_version.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/lstchain_version.py index 677ba00a..d3cb1935 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/lstchain_version.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/lstchain_version.py @@ -1,5 +1,10 @@ """ -Fill the lstchain_0.9 and lstchain_0.10 columns of the LST database (i.e., which version of data is on the IT cluster) +Fills the lstchain_versions column of the LST database with a list of the versions of LST data which are stored on the IT cluster + +Moreover, it fills the last_lstchain_files column of the LST database with the path to the LST DL1 file processed with the last available lstchain version (the name and order of the versions to be considered is stored in the lstchain_versions variable here defined) + +Usage: +$ lstchain_version """ @@ -17,7 +22,7 @@ def version_lstchain(df_LST): """ - Evaluates all the versions used to process a given file and the last version of a file + Evaluates (and store in the database) all the versions used to process a given file and the last version of a file Parameters ---------- diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/nsb_level.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/nsb_level.py index b4418dca..391aa3a3 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/nsb_level.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/nsb_level.py @@ -1,7 +1,10 @@ """ -Bash scripts to run LSTnsb.py on all the LST runs by using parallel jobs +Creates bash scripts to run LSTnsb.py on all the LST runs, in the provided time range (-b, -e), by using parallel jobs. It sets error_code_nsb = NaN for these runs -Usage: python nsb_level.py (-c config.yaml) +Moreover, it can modify the lstchain standard configuration file (used to evaluate NSB) by adding "use_flatfield_heuristic" = True + +Usage: +$ nsb_level (-c config.yaml -b begin -e end) """ import argparse diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/nsb_to_h5.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/nsb_to_h5.py index cba3366d..f961129c 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/nsb_to_h5.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/nsb_to_h5.py @@ -1,7 +1,10 @@ """ -Script to fill the 'nsb' column of the LST database. To be called after nsb_level.py +Script to fill the 'nsb' column of the LST database by using the txt files produced by nsb_level -Usage: python nsb_to_h5.py +It also fills the error_code_nsb column by 0 if the NSB could be evaluated and is < 3.0, by 2 if the NSB is > 3.0 and by 1 if the NSB could not be evaluated (NSB = NaN) + +Usage: +$ nsb_to_h5 """ import glob @@ -22,7 +25,7 @@ def collect_nsb(df_LST): """ - Here we collect NSB values from .txt files and store them into the dataframe + Here we collect NSB values from txt files and store them into the dataframe Parameters ---------- From 058cbe2b81de36337d2ff1d4d73d4f4aa7143930 Mon Sep 17 00:00:00 2001 From: Elisa-Visentin Date: Fri, 2 Aug 2024 10:54:57 +0000 Subject: [PATCH 187/236] doc and cleaning unused vars --- .../coincident_events.py | 11 ++---- .../semi_automatic_scripts/dl1_production.py | 30 +++++++-------- .../semi_automatic_scripts/list_from_h5.py | 15 +++----- .../semi_automatic_scripts/merge_stereo.py | 4 +- .../semi_automatic_scripts/merging_runs.py | 38 +++++++++---------- .../semi_automatic_scripts/stereo_events.py | 24 ++++++------ 6 files changed, 53 insertions(+), 69 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py index 58b44f46..a82f66c7 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py @@ -10,7 +10,7 @@ event files. Usage: -$ python coincident_events.py (-c config_file.yaml) +$ coincident_events (-c config) """ import argparse import glob @@ -74,7 +74,7 @@ def configfile_coincidence(target_dir, source_name, config_gen): def linking_bash_lst( - target_dir, LST_runs, source_name, LST_version, env_name, NSB_match, cluster + target_dir, LST_runs, source_name, LST_version, env_name, cluster ): """ @@ -92,8 +92,6 @@ def linking_bash_lst( The lstchain version used to process the LST data env_name : str Name of the conda environment - NSB_match : bool - If real data are matched to pre-processed MCs or not cluster : str Cluster system """ @@ -175,7 +173,7 @@ def linking_bash_lst( def main(): """ - Here we read the config file and call the functions defined above. + Main function """ parser = argparse.ArgumentParser() @@ -195,7 +193,7 @@ def main(): config = yaml.safe_load(f) target_dir = Path(config["directories"]["workspace_dir"]) - NSB_match = config["general"]["NSB_matching"] + env_name = config["general"]["env_name"] LST_version = config["general"]["LST_version"] @@ -228,7 +226,6 @@ def main(): source_name, LST_version, env_name, - NSB_match, cluster, ) # linking the data paths to current working directory diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py index 8f3938be..6c033305 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py @@ -11,14 +11,13 @@ No LST data is used here. Standard usage: -$ python dl1_production.py (-c config_file.yaml) +$ dl1_production (-t analysis_type) (-c config_file.yaml) """ import argparse import glob import logging import os -# import time from pathlib import Path import joblib @@ -100,8 +99,8 @@ def lists_and_bash_generator( ): """ - This function creates the lists list_nodes_gamma_complete.txt and list_folder_gamma.txt with the MC file paths. - After that, it generates a few bash scripts to link the MC paths to each subdirectory. + This function creates the lists list_nodes_*_complete.txt and list_folder_*.txt with the MC file paths. + After that, it generates a few bash scripts to link the MC paths to each subdirectory and to process them from DL0 to DL1. These bash scripts will be called later in the main() function below. This step will be skipped in case the MC path has not been provided (MC_path='') Parameters @@ -216,11 +215,11 @@ def lists_and_bash_generator( def lists_and_bash_gen_MAGIC( - target_dir, telescope_ids, MAGIC_runs, source, env_name, NSB_match, cluster + target_dir, telescope_ids, MAGIC_runs, source, env_name, cluster ): """ - Below we create a bash script that links the MAGIC data paths to each subdirectory. + Below we create bash scripts that link the MAGIC data paths to each subdirectory and process them from Calibrated to Dl1 Parameters ---------- @@ -234,8 +233,6 @@ def lists_and_bash_gen_MAGIC( Name of the target env_name : str Name of the environment - NSB_match : bool - If real data are matched to pre-processed MCs or not cluster : str Cluster system """ @@ -275,11 +272,11 @@ def lists_and_bash_gen_MAGIC( if number_of_nodes < 0: continue slurm = slurm_lines( - queue="short", # was long for no NSB_match + queue="short", job_name=process_name, array=number_of_nodes, mem="2g", - out_name=f"{target_dir}/v{__version__}/{source}/DL1/M{magic}/{i[0]}/{i[1]}/logs/slurm-%x.%A_%a", # without version for no NSB_match + out_name=f"{target_dir}/v{__version__}/{source}/DL1/M{magic}/{i[0]}/{i[1]}/logs/slurm-%x.%A_%a", ) rc = rc_lines( store="$SAMPLE ${SLURM_ARRAY_JOB_ID} ${SLURM_ARRAY_TASK_ID}", @@ -287,7 +284,7 @@ def lists_and_bash_gen_MAGIC( ) lines = ( slurm - + [ # without version for no NSB_match + + [ f"export OUTPUTDIR={target_dir}/v{__version__}/{source}/DL1/M{magic}/{i[0]}/{i[1]}\n", "SAMPLE_LIST=($(<$OUTPUTDIR/logs/list_cal.txt))\n", "SAMPLE=${SAMPLE_LIST[${SLURM_ARRAY_TASK_ID}]}\n\n", @@ -358,7 +355,7 @@ def directories_generator_real( os.makedirs(f"{dl1_dir}/M{magic}/{i[0]}/{i[1]}/logs", exist_ok=True) -def directories_generator_MC(target_dir, telescope_ids, NSB_match): +def directories_generator_MC(target_dir, telescope_ids): """ Here we create all subdirectories for a given workspace and target name. @@ -369,8 +366,6 @@ def directories_generator_MC(target_dir, telescope_ids, NSB_match): Directory to store the results telescope_ids : list List of the telescope IDs (set by the user) - NSB_match : bool - If real data are matched to pre-processed MCs or not """ dir_list = [ @@ -406,7 +401,9 @@ def directories_generator_MC(target_dir, telescope_ids, NSB_match): def main(): - """Here we read the config file and call the functions to generate the necessary directories, bash scripts and launching the jobs.""" + """ + Main function + """ # Here we are simply collecting the parameters from the command line, as input file, output directory, and configuration file @@ -469,7 +466,7 @@ def main(): # Below we run the analysis on the MC data if (args.analysis_type == "onlyMC") or (args.analysis_type == "doEverything"): directories_generator_MC( - str(target_dir), telescope_ids, NSB_match + str(target_dir), telescope_ids ) # Here we create all the necessary directories in the given workspace and collect the main directory of the target config_file_gen( target_dir, noise_value, NSB_match, "MC", config @@ -534,7 +531,6 @@ def main(): MAGIC_runs, source_name, env_name, - NSB_match, cluster, ) # MAGIC real data if (telescope_ids[-2] > 0) or (telescope_ids[-1] > 0): diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/list_from_h5.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/list_from_h5.py index e38a45e0..cc2cb217 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/list_from_h5.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/list_from_h5.py @@ -1,5 +1,5 @@ """ -This script creates the lists of MAGIC and LST runs (date and run number) from a dataframe in the .h5 format for a specific time range. +This script creates the lists of MAGIC and LST runs (date and run number) from a dataframe in the .h5 format for a specific time range (or specific dates). """ import argparse @@ -17,7 +17,7 @@ def split_lst_date(df): """ - This function appends to the provided dataframe, which contains the LST date as YYYYMMDD in one of the columns, four new columns: the LST year, month and day and the date as YYYY_MM_DD + This function appends to the provided dataframe, which contains the LST date as YYYYMMDD in one of the columns, four new columns: the LST year, month and day and the date as YYYY-MM-DD Parameters ---------- @@ -41,7 +41,7 @@ def split_lst_date(df): def magic_date(df): """ - This function appends to the provided dataframe (which contains the LST date, year, month and day) a column with the MAGIC dates (in the YYYY_MM_DD format). + This function appends to the provided dataframe (which contains the LST date, year, month and day) a column with the MAGIC dates (in the YYYYMMDD format). Parameters ---------- @@ -106,7 +106,7 @@ def clear_files(source_in, source_out, df_LST, df_MAGIC1, df_MAGIC2): def list_run(source_in, source_out, df, skip_LST, skip_MAGIC, is_LST, M1_run_list=None): """ - This function creates the MAGIC_runs.txt and LST_runs.txt files, which contain the list of runs (with corresponding dates) to be processed. + This function creates the *_MAGIC_runs.txt and *_LST_runs.txt files, which contain the list of runs (with corresponding dates) to be processed for a given source. Parameters ---------- @@ -195,8 +195,7 @@ def list_run(source_in, source_out, df, skip_LST, skip_MAGIC, is_LST, M1_run_lis def main(): """ - This function is automatically called when the script is launched. - It calls the functions above to create the files XXXXXX_LST_runs.txt and XXXXX_MAGIC_runs.txt for the desired targets. + Main function """ parser = argparse.ArgumentParser() @@ -287,9 +286,7 @@ def main(): MAGIC_h5, key=MAGIC2_key, ) - # df_MAGIC1["Source"] = df_MAGIC1["Source"].str.replace(" ", "") - # df_MAGIC2["Source"] = df_MAGIC2["Source"].str.replace(" ", "") - + list_date_LST = np.unique(df_LST["date_LST"]) list_date_LST_low = [int(sub.replace("-", "")) for sub in list_date_LST] diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merge_stereo.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merge_stereo.py index ed726b53..f4b08799 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merge_stereo.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merge_stereo.py @@ -2,7 +2,7 @@ This scripts merges LST DL1Stereo subruns into runs Usage: -$ python merge_stereo.py (-c config_file.yaml) +$ merge_stereo (-c config_file.yaml) """ import argparse import glob @@ -83,7 +83,7 @@ def MergeStereo(target_dir, env_name, source, cluster): def main(): """ - Here we read the config_general.yaml file and call the functions defined above. + Main function """ parser = argparse.ArgumentParser() diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py index bc68ce16..21485953 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py @@ -1,29 +1,29 @@ """ This script splits the proton MC data sample into "train" -and "test", deletes possible failed runs (only those files +and "test", deletes possible MC failed runs (only those files that end up with a size < 1 kB), and generates the bash -scripts to merge the data files calling the script "merge_hdf_files.py" +scripts to merge MC and real data files by calling the script "merge_hdf_files.py" in the following order: MAGIC: -1) Merge the subruns into runs for M1 and M2 individually. -2) Merge the runs of M1 and M2 into M1-M2 runs. -3) Merge all the M1-M2 runs for a given night. + +Merge the subruns into runs for M1 and M2 individually. MC: -1) Merges all MC runs in a node + +Merges all MC runs in a node Usage: -$ merging_runs (-c config.yaml) +$ merging_runs (-c config.yaml) (-t analysis_type) If you want to merge only the MAGIC or only the MC data, you can do as follows: Only MAGIC: -$ merging_runs --analysis-type onlyMAGIC (-c config.yaml) +$ merging_runs -t onlyMAGIC (-c config.yaml) Only MC: -$ merging_runs --analysis-type onlyMC (-c config.yaml) +$ merging_runs -t onlyMC (-c config.yaml) """ import argparse @@ -53,7 +53,7 @@ def cleaning(list_of_nodes): """ - This function looks for failed runs in each node and remove them. + This function looks for failed runs in each MC node and remove them. Parameters ---------- @@ -82,7 +82,7 @@ def split_train_test(target_dir, train_fraction): target_dir : str Path to the working directory train_fraction : float - Fraction of proton MC files to be used in the training RF dataset + Fraction of proton MC files to be used in the training of RFs """ proton_dir = f"{target_dir}/v{__version__}/MC/DL1/protons" @@ -115,23 +115,20 @@ def split_train_test(target_dir, train_fraction): os.system(f"rm -r {list_of_dir[directory]}") -def merge(target_dir, MAGIC_runs, env_name, source, NSB_match, cluster): +def merge(target_dir, MAGIC_runs, env_name, source, cluster): """ - This function creates the bash scripts to run merge_hdf_files.py in all MAGIC subruns. - + This function creates the bash scripts to run merge_hdf_files.py for real data Parameters ---------- target_dir : str Path to the working directory MAGIC_runs : matrix of strings - This matrix is imported from config_general.yaml and tells the function where to find the data and where to put the merged files + Matrix of [(date,run)] n-tuples env_name : str Name of the environment source : str Target name - NSB_match : bool - If real data are matched to pre-processed MCs or not cluster : str Cluster system """ @@ -179,14 +176,14 @@ def merge(target_dir, MAGIC_runs, env_name, source, NSB_match, cluster): def mergeMC(target_dir, identification, env_name, cluster): """ - This function creates the bash scripts to run merge_hdf_files.py in all MC runs. + This function creates the bash scripts to run merge_hdf_files.py in all MC nodes. Parameters ---------- target_dir : str Path to the working directory identification : str - Tells which batch to create. Options: protons, gammadiffuse + Tells which sample to process env_name : str Name of the environment cluster : str @@ -236,7 +233,7 @@ def mergeMC(target_dir, identification, env_name, cluster): def main(): """ - Here we read the config_general.yaml file, split the proton sample into "test" and "train", and merge the MAGIC files. + Main function """ parser = argparse.ArgumentParser() @@ -336,7 +333,6 @@ def main(): MAGIC_runs, env_name, source_name, - NSB_match, cluster, ) # generating the bash script to merge the subruns diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py index f7379f91..ac7a973f 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py @@ -4,16 +4,16 @@ Coincident MAGIC+LST data files. Usage: -$ python stereo_events.py (-c config.yaml) +$ stereo_events (-c config.yaml) (-t analysis_type) If you want to compute the stereo parameters only the real data or only the MC data, you can do as follows: Only real data: -$ python stereo_events.py --analysis-type onlyReal (-c config.yaml) +$ stereo_events -t onlyMAGIC (-c config.yaml) Only MC: -$ python stereo_events.py --analysis-type onlyMC (-c config.yaml) +$ stereo_events -t onlyMC (-c config.yaml) """ import argparse @@ -43,7 +43,7 @@ def configfile_stereo(target_dir, source_name, config_gen): """ - This function creates the configuration file needed for the event stereo step + This function creates the configuration file needed for the stereo reconstruction step Parameters ---------- @@ -76,10 +76,10 @@ def configfile_stereo(target_dir, source_name, config_gen): yaml.dump(conf, f, default_flow_style=False) -def bash_stereo(target_dir, source, env_name, NSB_match, cluster): +def bash_stereo(target_dir, source, env_name, cluster): """ - This function generates the bashscript for running the stereo analysis. + This function generates the bashscripts for running the stereo analysis. Parameters ---------- @@ -89,8 +89,6 @@ def bash_stereo(target_dir, source, env_name, NSB_match, cluster): Target name env_name : str Name of the environment - NSB_match : bool - If real data are matched to pre-processed MCs or not cluster : str Cluster system """ @@ -153,7 +151,7 @@ def bash_stereo(target_dir, source, env_name, NSB_match, cluster): def bash_stereoMC(target_dir, identification, env_name, cluster): """ - This function generates the bashscript for running the stereo analysis. + This function generates the bashscripts for running the stereo analysis. Parameters ---------- @@ -204,7 +202,7 @@ def bash_stereoMC(target_dir, identification, env_name, cluster): def main(): """ - Here we read the config_general.yaml file and call the functions defined above. + Main function """ parser = argparse.ArgumentParser() @@ -220,11 +218,11 @@ def main(): parser.add_argument( "--analysis-type", "-t", - choices=["onlyReal", "onlyMC"], + choices=["onlyMAGIC", "onlyMC"], dest="analysis_type", type=str, default="doEverything", - help="You can type 'onlyReal' or 'onlyMC' to run this script only on real or MC data, respectively.", + help="You can type 'onlyMAGIC' or 'onlyMC' to run this script only on real or MC data, respectively.", ) args = parser.parse_args() @@ -289,7 +287,7 @@ def main(): # Below we run the analysis on the real data print("***** Generating the bashscript...") - bash_stereo(target_dir, source_name, env_name, NSB_match, cluster) + bash_stereo(target_dir, source_name, env_name, cluster) print("***** Submitting processess to the cluster...") print(f"Process name: {source_name}_stereo") From f2945527c7c87c2c4c6ce98673c5a82d11f96334 Mon Sep 17 00:00:00 2001 From: Elisa-Visentin Date: Fri, 2 Aug 2024 12:11:47 +0000 Subject: [PATCH 188/236] fix linter --- .../semi_automatic_scripts/coincident_events.py | 5 +---- .../database_production/LSTnsb.py | 1 - .../database_production/nsb_level.py | 2 +- .../database_production/nsb_to_h5.py | 4 ++-- .../semi_automatic_scripts/dl1_production.py | 7 +++---- .../semi_automatic_scripts/job_accounting.py | 12 +++++++++--- .../semi_automatic_scripts/list_from_h5.py | 2 +- .../semi_automatic_scripts/merging_runs.py | 1 + 8 files changed, 18 insertions(+), 16 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py index a82f66c7..7dc40ce4 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py @@ -73,9 +73,7 @@ def configfile_coincidence(target_dir, source_name, config_gen): yaml.dump(conf, f, default_flow_style=False) -def linking_bash_lst( - target_dir, LST_runs, source_name, LST_version, env_name, cluster -): +def linking_bash_lst(target_dir, LST_runs, source_name, LST_version, env_name, cluster): """ This function links the LST data paths to the working directory and creates bash scripts. @@ -193,7 +191,6 @@ def main(): config = yaml.safe_load(f) target_dir = Path(config["directories"]["workspace_dir"]) - env_name = config["general"]["env_name"] LST_version = config["general"]["LST_version"] diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/LSTnsb.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/LSTnsb.py index 70431e32..b20866b8 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/LSTnsb.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/LSTnsb.py @@ -145,7 +145,6 @@ def main(): logger.info(f"Run {run_number} already processed") return - inputdir = f"/fefs/aswg/data/real/DL1/{date}/{lst_version}/{lst_tailcut}" run_list = np.sort(glob.glob(f"{inputdir}/dl1*Run*{run_number}.*.h5")) noise = nsb(run_list, simtel, lst_config, run_number, denominator) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/nsb_level.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/nsb_level.py index 391aa3a3..f2f9d341 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/nsb_level.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/nsb_level.py @@ -3,7 +3,7 @@ Moreover, it can modify the lstchain standard configuration file (used to evaluate NSB) by adding "use_flatfield_heuristic" = True -Usage: +Usage: $ nsb_level (-c config.yaml -b begin -e end) """ diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/nsb_to_h5.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/nsb_to_h5.py index f961129c..b881d8be 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/nsb_to_h5.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/nsb_to_h5.py @@ -3,8 +3,8 @@ It also fills the error_code_nsb column by 0 if the NSB could be evaluated and is < 3.0, by 2 if the NSB is > 3.0 and by 1 if the NSB could not be evaluated (NSB = NaN) -Usage: -$ nsb_to_h5 +Usage: +$ nsb_to_h5 """ import glob diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py index 6c033305..232c38f6 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py @@ -17,7 +17,6 @@ import glob import logging import os - from pathlib import Path import joblib @@ -272,11 +271,11 @@ def lists_and_bash_gen_MAGIC( if number_of_nodes < 0: continue slurm = slurm_lines( - queue="short", + queue="short", job_name=process_name, array=number_of_nodes, mem="2g", - out_name=f"{target_dir}/v{__version__}/{source}/DL1/M{magic}/{i[0]}/{i[1]}/logs/slurm-%x.%A_%a", + out_name=f"{target_dir}/v{__version__}/{source}/DL1/M{magic}/{i[0]}/{i[1]}/logs/slurm-%x.%A_%a", ) rc = rc_lines( store="$SAMPLE ${SLURM_ARRAY_JOB_ID} ${SLURM_ARRAY_TASK_ID}", @@ -284,7 +283,7 @@ def lists_and_bash_gen_MAGIC( ) lines = ( slurm - + [ + + [ f"export OUTPUTDIR={target_dir}/v{__version__}/{source}/DL1/M{magic}/{i[0]}/{i[1]}\n", "SAMPLE_LIST=($(<$OUTPUTDIR/logs/list_cal.txt))\n", "SAMPLE=${SAMPLE_LIST[${SLURM_ARRAY_TASK_ID}]}\n\n", diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/job_accounting.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/job_accounting.py index 79064221..c9a1b2b8 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/job_accounting.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/job_accounting.py @@ -136,9 +136,15 @@ def main(): indir = f"{work_dir}/v{args.version}/{source_out}/{args.data_level}" - dirs = [x.replace('/logs', '') for x in (sorted( - glob.glob(f"{indir}/[0-9]*/[0-9]*/logs") + glob.glob(f"{indir}/[0-9]*/logs")) - )] + dirs = [ + x.replace("/logs", "") + for x in ( + sorted( + glob.glob(f"{indir}/[0-9]*/[0-9]*/logs") + + glob.glob(f"{indir}/[0-9]*/logs") + ) + ) + ] if dirs == []: versions = [x.split("/v")[-1] for x in glob.glob(f"{work_dir}/v*")] diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/list_from_h5.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/list_from_h5.py index cc2cb217..53674e31 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/list_from_h5.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/list_from_h5.py @@ -286,7 +286,7 @@ def main(): MAGIC_h5, key=MAGIC2_key, ) - + list_date_LST = np.unique(df_LST["date_LST"]) list_date_LST_low = [int(sub.replace("-", "")) for sub in list_date_LST] diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py index 21485953..bec6c8eb 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py @@ -119,6 +119,7 @@ def merge(target_dir, MAGIC_runs, env_name, source, cluster): """ This function creates the bash scripts to run merge_hdf_files.py for real data + Parameters ---------- target_dir : str From a6bfe6524a43c9590b23540b508123955a8cf9f6 Mon Sep 17 00:00:00 2001 From: Joanna Wojtowicz Date: Fri, 2 Aug 2024 14:58:19 +0200 Subject: [PATCH 189/236] In database_config.yaml, added the path to the common MAGIC+LST1 database. --- magicctapipe/resources/database_config.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/magicctapipe/resources/database_config.yaml b/magicctapipe/resources/database_config.yaml index d7aac09e..fb9c2976 100644 --- a/magicctapipe/resources/database_config.yaml +++ b/magicctapipe/resources/database_config.yaml @@ -3,9 +3,11 @@ database_paths: input_2: "/home/alessio.berti/MAGIC-LST_common/runfile/simultaneous_obs_summary.h5" MAGIC: '/fefs/aswg/workspace/joanna.wojtowicz/data/Common_MAGIC_LST1_data_MAGIC_runs_subruns.h5' LST: "/fefs/aswg/workspace/elisa.visentin/auto_MCP_PR/observations_LST.h5" + MAGIC+LST1: "/fefs/aswg/workspace/federico.dipierro/MAGIC_LST1_simultaneous_runs_info/simultaneous_obs_summary.h5" database_keys: input_1: '/str' input_2: '/str' MAGIC-I: "MAGIC1/runs_M1" MAGIC-II: "MAGIC2/runs_M2" LST: "joint_obs" + MAGIC+LST1: "str/table" From 2321e041a5989892be2e10175433401eb9cf2486 Mon Sep 17 00:00:00 2001 From: Joanna Wojtowicz Date: Fri, 2 Aug 2024 14:59:33 +0200 Subject: [PATCH 190/236] Changes made to update_MAGIC_database.py: The script can now be used by the parser. The create_new_database function has been removed; new databases and updates are now handled within a single function. --- .../update_MAGIC_database.py | 211 ++++++++---------- 1 file changed, 91 insertions(+), 120 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/update_MAGIC_database.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/update_MAGIC_database.py index 0f7aadc9..e865dae6 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/update_MAGIC_database.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/update_MAGIC_database.py @@ -1,7 +1,9 @@ """ The script updates the common MAGIC database from a given time range. At the moment, to avoid accidentally destroying the previous database, -we save the updated database as a new file. If the path to the database is not found, +we save the updated database as a new file (see in main function new_h5_file_path +and new_database_file_path). +If the path to the previous database is not found, the script creates a new one. The start of the time interval is the date of the beginning of the common MAGIC+LST1 observations. The end of the time interval is the current date. @@ -14,10 +16,16 @@ 'YYYY', 'MM', and 'DD' specify the date. """ +import argparse +import sys import os from datetime import datetime, timedelta import pandas as pd +import yaml + +from magicctapipe.io import resource_file +import numpy as np def fix_lists_and_convert(cell): @@ -50,9 +58,9 @@ def table_magic_runs(df, date_min, date_max): df : pandas.DataFrame DataFrame with general information about MAGIC+LST1 observations. date_min : str - Start of the time interval (in LST convention). + Start of the time interval (in LST convention, format YYYYMMDD). date_max : str - End of the time interval (in LST convention). + End of the time interval (in LST convention, format YYYYMMDD). Returns ------- @@ -60,8 +68,7 @@ def table_magic_runs(df, date_min, date_max): A DataFrame filtered by the specified date range. """ - df_selected_data = df.iloc[:, [2, 1, 25]] - df_selected_data.columns = ["DATE", "source", "MAGIC_runs"] + df_selected_data = df[["DATE", "source", "MAGIC_runs"]] grouped_data = df_selected_data.groupby(["DATE", "source"]) result_table = [] @@ -88,7 +95,7 @@ def update_tables(database, DF, tel_id): Data are added chronologically. The updated table DF may include new rows that contain NaN values in some cells. - The function automatically filling NaN values withpredefined default values + The function automatically filling NaN values with predefined default values based on the column's data type. Parameters @@ -121,18 +128,15 @@ def update_tables(database, DF, tel_id): ) if non_matching_rows.empty: - raise Exception("There is no un-updated data for a given time interval. ") + raise Exception("There is no un-updated data for a given time interval.") else: non_matching_rows_reset = non_matching_rows.reset_index(drop=True) new_rows = [] - for index, row in non_matching_rows_reset.iterrows(): - date = row["DATE"] - source = row["source"] - run_id = row["Run ID"] - run_id = str(run_id) + for _, (date, source, run_id) in non_matching_rows_reset[["DATE", "source", "Run ID"]].iterrows(): + run_id = str(run_id) date_obj = datetime.strptime(date, "%Y%m%d") date_obj += timedelta(days=1) new_date = datetime.strftime(date_obj, "%Y%m%d") @@ -170,10 +174,13 @@ def update_tables(database, DF, tel_id): combined_df = combined_df.sort_values("DATE") combined_df["DATE"] = combined_df["DATE"].dt.strftime("%Y%m%d") + combined_df["DATE"] = combined_df["DATE"].astype(int) + combined_df["number of subruns"] = combined_df["number of subruns"].astype(int) combined_df["Run ID"] = combined_df["Run ID"].astype(int) combined_df.reset_index(drop=True, inplace=True) for column in combined_df.columns[4:]: + combined_df[column] = combined_df[column].replace(r'^\s*$', np.nan, regex=True) not_null_data = combined_df[column].dropna() if not_null_data.empty: continue # Skip if all values are NaN @@ -201,90 +208,66 @@ def update_tables(database, DF, tel_id): return combined_df -def create_new_database(df, date_min, date_max, tel_id): - """ - Creating a new MAGIC database. +def main(): - Parameters - ----------- - df : pandas.DataFrame - Dataframe with general information about MAGIC+LST1 observations. - date_min : str - Start of the time interval (in LST convention). - date_max : str - End of the time interval (in LST convention). - tel_id : int - The telescope ID, which must be either 1 or 2. + """Main function.""" + + parser = argparse.ArgumentParser() + + date_min_default = "20191101" + current_datetime = datetime.now() + date_max_default = current_datetime.strftime("%Y%m%d") + + parser.add_argument( + "--date-min", + "-m", + dest="date_min", + type=str, + default=date_min_default, + help="Start of the time interval (in LST convention, format YYYYMMDD)." + ) - Returns - ------- - pandas.DataFrame - A DataFrame with a new MAGIC database for all common MAGIC+LST1 observations. - """ + parser.add_argument( + "--date-max", + "-M", + dest="date_max", + type=str, + default=date_max_default, + help="End of the time interval (in LST convention, format YYYYMMDD)." + ) - database = table_magic_runs(df, date_min, date_max) - new_rows = [] - - for index, row in database.iterrows(): - date = row["DATE"] - source = row["source"] - run_id = row["Run ID"] - run_id = str(run_id) - - date_obj = datetime.strptime(date, "%Y%m%d") - date_obj += timedelta(days=1) - new_date = datetime.strftime(date_obj, "%Y%m%d") - YYYY = new_date[:4] - MM = new_date[4:6] - DD = new_date[6:8] - Y = "_Y_" - - path = f"/fefs/onsite/common/MAGIC/data/M{tel_id}/event/Calibrated/{YYYY}/{MM}/{DD}" - - if os.path.exists(path): - files = os.listdir(path) - count_with_run_id = 0 - # Counter for files that include the run_id. - for filename in files: - if Y in filename: - if new_date in filename: - if source in filename: - if run_id in filename: - count_with_run_id += 1 - if count_with_run_id != 0: - new_rows.append( - { - "DATE": int(date), - "source": str(source), - "Run ID": int(run_id), - "number of subruns": int(count_with_run_id), - } - ) - - new_rows = pd.DataFrame(new_rows) - - return new_rows + args = parser.parse_args() -def main(): + config = resource_file("database_config.yaml") - """Main function.""" + with open( + config, "rb" + ) as bf: + config_dict = yaml.safe_load(bf) - tel_id = [1, 2] + df_path = config_dict["database_paths"]["MAGIC+LST1"] + df_key = config_dict["database_keys"]["MAGIC+LST1"] + df = pd.read_hdf(df_path, key=df_key) - df = pd.read_hdf( - "/fefs/aswg/workspace/federico.dipierro/MAGIC_LST1_simultaneous_runs_info/simultaneous_obs_summary.h5", - key="str/table", - ) + #Set "" to generate a new database. + previous_database_path = config_dict["database_paths"]["MAGIC"] + + tel_id = [1, 2] + roman_numerals = { + 1: "I", + 2: "II" + } - # Set "" to generate a new database. - previous_database_path = "/fefs/aswg/workspace/joanna.wojtowicz/data/Common_MAGIC_LST1_data_MAGIC_runs_subruns.h5" file_exists = os.path.exists(previous_database_path) if file_exists: - # TO DO : set time interval - format YYYYMMDD - date_min = "20240601" - date_max = "20240718" + new_h5_file_path = ( + "/fefs/aswg/workspace/joanna.wojtowicz/data/Common_MAGIC_LST1_data_MAGIC_runs_subruns_UPDATED.h5" + ) + + date_min = args.date_min + date_max = args.date_max print("Updating database...") @@ -292,29 +275,19 @@ def main(): for tel in tel_id: + tel_roman = roman_numerals[tel] + key = config_dict["database_keys"][f"MAGIC-{tel_roman}"] + DF = pd.read_hdf( previous_database_path, - key=f"MAGIC{tel}/runs_M{tel}", + key=key, ) - if tel == 1: - updated_df_1 = update_tables(database, DF, tel) - print(updated_df_1) - if tel == 2: - updated_df_2 = update_tables(database, DF, tel) - print(updated_df_2) - # TO DO : set a path to save new database - new_h5_file_path = ( - "/fefs/aswg/workspace/joanna.wojtowicz/output/update_database.h5" - ) + updated_df = update_tables(database, DF, tel) + print(updated_df) try: - updated_df_1.to_hdf( - new_h5_file_path, key="MAGIC1/runs_M1", mode="w", format="table" - ) - updated_df_2.to_hdf( - new_h5_file_path, key="MAGIC2/runs_M2", mode="a", format="table" - ) + updated_df.to_hdf(new_h5_file_path, key=key, mode=("w" if tel == 1 else "a"), format = "table") print(f"File saved successfully at {new_h5_file_path}") except Exception as e: @@ -322,30 +295,28 @@ def main(): else: print("Database does not exist. Creating a new database...") + + new_database_file_path = "/fefs/aswg/workspace/joanna.wojtowicz/data/Common_MAGIC_LST1_data_MAGIC_runs_subruns_NEW.h5" - date_min = "20191101" - current_datetime = datetime.now() - date_max = current_datetime.strftime("%Y%m%d") + database_default = table_magic_runs(df, date_min_default, date_max_default) - tel_id_M1 = 1 - tel_id_M2 = 2 - database_M1 = create_new_database(df, date_min, date_max, tel_id_M1) - database_M2 = create_new_database(df, date_min, date_max, tel_id_M2) + for tel in tel_id: + + tel_roman = roman_numerals[tel] + key = config_dict["database_keys"][f"MAGIC-{tel_roman}"] - # TO DO : set a path to save a new database - new_database_file_path = "/fefs/aswg/workspace/joanna.wojtowicz/output/Common_MAGIC_LST1_data_MAGIC_runs_subruns.h5" + #an empty table filled by NaN + DF_empty = pd.DataFrame(columns=["DATE", "source", "Run ID"]) - try: - database_M1.to_hdf( - new_database_file_path, key="MAGIC1/runs_M1", mode="w", format="table" - ) - database_M2.to_hdf( - new_database_file_path, key="MAGIC2/runs_M2", mode="a", format="table" - ) - print(f"File saved successfully at {new_database_file_path}") + new_database = update_tables(database_default, DF_empty, tel) + print(new_database) - except Exception as e: - print(f"An error occurred: {e}") + try: + new_database.to_hdf(new_database_file_path, key=key, mode=("w" if tel == 1 else "a"), format = "table") + print(f"File saved successfully at {new_database_file_path}") + + except Exception as e: + print(f"An error occurred: {e}") if __name__ == "__main__": main() From 9117f86ec241f25c04b6db2f94b21178d6a7f0fb Mon Sep 17 00:00:00 2001 From: Joanna Wojtowicz Date: Fri, 2 Aug 2024 16:26:51 +0200 Subject: [PATCH 191/236] A few corrections in update_MAGIC_database.py --- .../update_MAGIC_database.py | 73 ++++++++++--------- 1 file changed, 38 insertions(+), 35 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/update_MAGIC_database.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/update_MAGIC_database.py index e865dae6..7d8d1e6c 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/update_MAGIC_database.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/update_MAGIC_database.py @@ -1,8 +1,7 @@ """ The script updates the common MAGIC database from a given time range. At the moment, to avoid accidentally destroying the previous database, -we save the updated database as a new file (see in main function new_h5_file_path -and new_database_file_path). +we save the updated database as a new file (see in main function new_h5_file_path). If the path to the previous database is not found, the script creates a new one. The start of the time interval is the date of the beginning of the common MAGIC+LST1 observations. @@ -17,15 +16,14 @@ """ import argparse -import sys import os from datetime import datetime, timedelta +import numpy as np import pandas as pd import yaml from magicctapipe.io import resource_file -import numpy as np def fix_lists_and_convert(cell): @@ -73,7 +71,7 @@ def table_magic_runs(df, date_min, date_max): result_table = [] for (date, source), group in grouped_data: - if date >= date_min and date <= date_max: + if date >= date_min and date <= date_max: runs_combined = group["MAGIC_runs"].sum() result_table.append( @@ -134,7 +132,9 @@ def update_tables(database, DF, tel_id): non_matching_rows_reset = non_matching_rows.reset_index(drop=True) new_rows = [] - for _, (date, source, run_id) in non_matching_rows_reset[["DATE", "source", "Run ID"]].iterrows(): + for _, (date, source, run_id) in non_matching_rows_reset[ + ["DATE", "source", "Run ID"] + ].iterrows(): run_id = str(run_id) date_obj = datetime.strptime(date, "%Y%m%d") @@ -180,7 +180,9 @@ def update_tables(database, DF, tel_id): combined_df.reset_index(drop=True, inplace=True) for column in combined_df.columns[4:]: - combined_df[column] = combined_df[column].replace(r'^\s*$', np.nan, regex=True) + combined_df[column] = combined_df[column].replace( + r"^\s*$", np.nan, regex=True + ) not_null_data = combined_df[column].dropna() if not_null_data.empty: continue # Skip if all values are NaN @@ -208,12 +210,13 @@ def update_tables(database, DF, tel_id): return combined_df + def main(): """Main function.""" - + parser = argparse.ArgumentParser() - + date_min_default = "20191101" current_datetime = datetime.now() date_max_default = current_datetime.strftime("%Y%m%d") @@ -224,7 +227,7 @@ def main(): dest="date_min", type=str, default=date_min_default, - help="Start of the time interval (in LST convention, format YYYYMMDD)." + help="Start of the time interval (in LST convention, format YYYYMMDD).", ) parser.add_argument( @@ -233,38 +236,30 @@ def main(): dest="date_max", type=str, default=date_max_default, - help="End of the time interval (in LST convention, format YYYYMMDD)." + help="End of the time interval (in LST convention, format YYYYMMDD).", ) args = parser.parse_args() config = resource_file("database_config.yaml") - with open( - config, "rb" - ) as bf: + with open(config, "rb") as bf: config_dict = yaml.safe_load(bf) df_path = config_dict["database_paths"]["MAGIC+LST1"] df_key = config_dict["database_keys"]["MAGIC+LST1"] df = pd.read_hdf(df_path, key=df_key) - #Set "" to generate a new database. - previous_database_path = config_dict["database_paths"]["MAGIC"] + # Set "" to generate a new database. + previous_database_path = "" #config_dict["database_paths"]["MAGIC"] tel_id = [1, 2] - roman_numerals = { - 1: "I", - 2: "II" - } file_exists = os.path.exists(previous_database_path) - if file_exists: + new_h5_file_path = "/fefs/aswg/workspace/joanna.wojtowicz/data/Common_MAGIC_LST1_data_MAGIC_runs_subruns_UPDATED.h5" - new_h5_file_path = ( - "/fefs/aswg/workspace/joanna.wojtowicz/data/Common_MAGIC_LST1_data_MAGIC_runs_subruns_UPDATED.h5" - ) + if file_exists: date_min = args.date_min date_max = args.date_max @@ -274,9 +269,8 @@ def main(): database = table_magic_runs(df, date_min, date_max) for tel in tel_id: - - tel_roman = roman_numerals[tel] - key = config_dict["database_keys"][f"MAGIC-{tel_roman}"] + dat_key = "MAGIC-" + "I" * tel + key = config_dict["database_keys"][dat_key] DF = pd.read_hdf( previous_database_path, @@ -287,7 +281,12 @@ def main(): print(updated_df) try: - updated_df.to_hdf(new_h5_file_path, key=key, mode=("w" if tel == 1 else "a"), format = "table") + updated_df.to_hdf( + new_h5_file_path, + key=key, + mode=("w" if tel == 1 else "a"), + format = "table", + ) print(f"File saved successfully at {new_h5_file_path}") except Exception as e: @@ -295,25 +294,29 @@ def main(): else: print("Database does not exist. Creating a new database...") - - new_database_file_path = "/fefs/aswg/workspace/joanna.wojtowicz/data/Common_MAGIC_LST1_data_MAGIC_runs_subruns_NEW.h5" database_default = table_magic_runs(df, date_min_default, date_max_default) for tel in tel_id: - tel_roman = roman_numerals[tel] - key = config_dict["database_keys"][f"MAGIC-{tel_roman}"] + dat_key = "MAGIC-" + "I" * tel + key = config_dict["database_keys"][dat_key] - #an empty table filled by NaN + # an empty table filled by NaN DF_empty = pd.DataFrame(columns=["DATE", "source", "Run ID"]) new_database = update_tables(database_default, DF_empty, tel) print(new_database) try: - new_database.to_hdf(new_database_file_path, key=key, mode=("w" if tel == 1 else "a"), format = "table") - print(f"File saved successfully at {new_database_file_path}") + new_database.to_hdf( + new_h5_file_path, + key=key, + mode=("w" if tel == 1 else "a"), + format = "table" + ) + + print(f"File saved successfully at {new_h5_file_path}") except Exception as e: print(f"An error occurred: {e}") From 203ba68bf137ed9e2771c752d54ee363089308c2 Mon Sep 17 00:00:00 2001 From: Joanna Wojtowicz Date: Fri, 2 Aug 2024 16:38:45 +0200 Subject: [PATCH 192/236] A few corrections in update_MAGIC_database.py --- .../database_production/update_MAGIC_database.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/update_MAGIC_database.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/update_MAGIC_database.py index 7d8d1e6c..3cd5a0ec 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/update_MAGIC_database.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/update_MAGIC_database.py @@ -251,7 +251,7 @@ def main(): df = pd.read_hdf(df_path, key=df_key) # Set "" to generate a new database. - previous_database_path = "" #config_dict["database_paths"]["MAGIC"] + previous_database_path = config_dict["database_paths"]["MAGIC"] tel_id = [1, 2] @@ -285,7 +285,7 @@ def main(): new_h5_file_path, key=key, mode=("w" if tel == 1 else "a"), - format = "table", + format="table", ) print(f"File saved successfully at {new_h5_file_path}") @@ -313,7 +313,7 @@ def main(): new_h5_file_path, key=key, mode=("w" if tel == 1 else "a"), - format = "table" + format="table", ) print(f"File saved successfully at {new_h5_file_path}") From 1124882bf6f12b34f80ddb10d52ae2231361aa42 Mon Sep 17 00:00:00 2001 From: Joanna Wojtowicz Date: Fri, 2 Aug 2024 16:47:42 +0200 Subject: [PATCH 193/236] A few corrections in update_MAGIC_database.py --- .../database_production/update_MAGIC_database.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/update_MAGIC_database.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/update_MAGIC_database.py index 3cd5a0ec..5c5e7e42 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/update_MAGIC_database.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/update_MAGIC_database.py @@ -182,7 +182,7 @@ def update_tables(database, DF, tel_id): for column in combined_df.columns[4:]: combined_df[column] = combined_df[column].replace( r"^\s*$", np.nan, regex=True - ) + ) not_null_data = combined_df[column].dropna() if not_null_data.empty: continue # Skip if all values are NaN @@ -269,6 +269,7 @@ def main(): database = table_magic_runs(df, date_min, date_max) for tel in tel_id: + dat_key = "MAGIC-" + "I" * tel key = config_dict["database_keys"][dat_key] @@ -298,7 +299,7 @@ def main(): database_default = table_magic_runs(df, date_min_default, date_max_default) for tel in tel_id: - + dat_key = "MAGIC-" + "I" * tel key = config_dict["database_keys"][dat_key] @@ -321,5 +322,6 @@ def main(): except Exception as e: print(f"An error occurred: {e}") + if __name__ == "__main__": main() From e5119cbf899e47fda719f4b496e1c8a786989740 Mon Sep 17 00:00:00 2001 From: Julian Sitarek <33022433+jsitarek@users.noreply.github.com> Date: Fri, 2 Aug 2024 17:43:12 +0200 Subject: [PATCH 194/236] Update README.md added short description of job_accounting script --- magicctapipe/scripts/lst1_magic/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/magicctapipe/scripts/lst1_magic/README.md b/magicctapipe/scripts/lst1_magic/README.md index d4a97831..0acdb991 100644 --- a/magicctapipe/scripts/lst1_magic/README.md +++ b/magicctapipe/scripts/lst1_magic/README.md @@ -197,4 +197,4 @@ To create and update the MAGIC and LST databases (from the one produced by AB an - `nsb_to_h5`: this script reads the txt files created by `nsb_level` to know the NSB value for each run. This value is used to fill the `nsb` column of the database at the location of the respective run number. It also updates the error codes (0: fine, 1: nsb=NaN, 2: NSB>3.0). Launched as `python nsb_to_h5.py` - +- `job_accounting`: this script (in semi_automatic_scripts directory) allows to track progress of the submitted jobs, in particular listing errors. It also provides basic resource statistics (CPU and memory) of the completed jobs. Finally, it can be also used to update the database files with the progress of data processing From cb9d18674a252292f3663b415ff74718099ccb0e Mon Sep 17 00:00:00 2001 From: Joanna Wojtowicz Date: Fri, 2 Aug 2024 18:12:45 +0200 Subject: [PATCH 195/236] A few corrections in update_MAGIC_database.py --- .../update_MAGIC_database.py | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/update_MAGIC_database.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/update_MAGIC_database.py index 5c5e7e42..628de890 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/update_MAGIC_database.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/update_MAGIC_database.py @@ -281,17 +281,17 @@ def main(): updated_df = update_tables(database, DF, tel) print(updated_df) - try: - updated_df.to_hdf( - new_h5_file_path, - key=key, - mode=("w" if tel == 1 else "a"), - format="table", - ) - print(f"File saved successfully at {new_h5_file_path}") + try: + updated_df.to_hdf( + new_h5_file_path, + key=key, + mode=("w" if tel == 1 else "a"), + format="table", + ) + print(f"File saved successfully at {new_h5_file_path}") - except Exception as e: - print(f"An error occurred: {e}") + except Exception as e: + print(f"An error occurred: {e}") else: print("Database does not exist. Creating a new database...") From 3c475469d08281fefdf573d3831c23c6c49a9caa Mon Sep 17 00:00:00 2001 From: Elisa-Visentin Date: Mon, 5 Aug 2024 06:53:31 +0000 Subject: [PATCH 196/236] remove auto rm MC --- .../semi_automatic_scripts/__init__.py | 3 +-- .../semi_automatic_scripts/merging_runs.py | 21 ++----------------- 2 files changed, 3 insertions(+), 21 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/__init__.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/__init__.py index fc95b25d..2cc82cf9 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/__init__.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/__init__.py @@ -8,11 +8,10 @@ lists_and_bash_generator, ) from .merge_stereo import MergeStereo -from .merging_runs import cleaning, merge, mergeMC, split_train_test +from .merging_runs import merge, mergeMC, split_train_test from .stereo_events import bash_stereo, bash_stereoMC, configfile_stereo __all__ = [ - "cleaning", "split_train_test", "merge", "mergeMC", diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py index bec6c8eb..678cffd0 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py @@ -43,31 +43,14 @@ slurm_lines, ) -__all__ = ["cleaning", "split_train_test", "merge", "mergeMC"] +__all__ = ["split_train_test", "merge", "mergeMC"] logger = logging.getLogger(__name__) logger.addHandler(logging.StreamHandler()) logger.setLevel(logging.INFO) -def cleaning(list_of_nodes): - """ - This function looks for failed runs in each MC node and remove them. - - Parameters - ---------- - list_of_nodes : array of str - List of nodes where the function will look for failed runs. - """ - - cwd = os.getcwd() - for i in tqdm(range(len(list_of_nodes)), desc="Cleaning failed runs"): - os.chdir(list_of_nodes[i]) - os.system('find . -type f -name "dl1_[gphe]*_zd*_az*.h5" -size -1k -delete') - - os.chdir(cwd) - print("Cleaning done.") def split_train_test(target_dir, train_fraction): @@ -207,7 +190,7 @@ def mergeMC(target_dir, identification, env_name, cluster): process_size = len(list_of_nodes) - 1 - cleaning(list_of_nodes) # This will delete the (possibly) failed runs. + if cluster != "SLURM": logger.warning( "Automatic processing not implemented for the cluster indicated in the config file" From 8c58b94fb49b1ebb1a5502bfcb35cb2d177019f0 Mon Sep 17 00:00:00 2001 From: Elisa-Visentin Date: Mon, 5 Aug 2024 06:54:18 +0000 Subject: [PATCH 197/236] memory coinc. 6 GB --- .../lst1_magic/semi_automatic_scripts/coincident_events.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py index 7dc40ce4..ad792e1f 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py @@ -141,7 +141,7 @@ def linking_bash_lst(target_dir, LST_runs, source_name, LST_version, env_name, c queue="short", job_name=f"{source_name}_coincidence", array=process_size, - mem="8g", + mem="6g", out_name=f"{outputdir}/logs/slurm-%x.%A_%a", ) rc = rc_lines( From 83512fe2493a7f3c8d33dced59350fc212fe1f8b Mon Sep 17 00:00:00 2001 From: Elisa-Visentin Date: Mon, 5 Aug 2024 07:00:06 +0000 Subject: [PATCH 198/236] rename config file --- magicctapipe/conftest.py | 4 ++-- ..._general.yaml => test_config_auto_MCP.yaml} | 0 ...LST.yaml => test_config_auto_MCP_4LST.yaml} | 0 magicctapipe/scripts/lst1_magic/README.md | 18 +++++++++--------- .../coincident_events.py | 2 +- ...onfig_general.yaml => config_auto_MCP.yaml} | 0 .../database_production/LSTnsb.py | 2 +- .../database_production/nsb_level.py | 2 +- .../semi_automatic_scripts/dl1_production.py | 2 +- .../semi_automatic_scripts/job_accounting.py | 6 +++--- .../semi_automatic_scripts/list_from_h5.py | 4 ++-- .../semi_automatic_scripts/merge_stereo.py | 2 +- .../semi_automatic_scripts/merging_runs.py | 2 +- .../semi_automatic_scripts/stereo_events.py | 2 +- 14 files changed, 23 insertions(+), 23 deletions(-) rename magicctapipe/resources/{test_config_general.yaml => test_config_auto_MCP.yaml} (100%) rename magicctapipe/resources/{test_config_general_4LST.yaml => test_config_auto_MCP_4LST.yaml} (100%) rename magicctapipe/scripts/lst1_magic/semi_automatic_scripts/{config_general.yaml => config_auto_MCP.yaml} (100%) diff --git a/magicctapipe/conftest.py b/magicctapipe/conftest.py index 6158d760..aba518d7 100644 --- a/magicctapipe/conftest.py +++ b/magicctapipe/conftest.py @@ -411,7 +411,7 @@ def config_monly(): @pytest.fixture(scope="session") def config_gen(): - config_path = resource_file("test_config_general.yaml") + config_path = resource_file("test_config_auto_MCP.yaml") with open(config_path, "rb") as f: config = yaml.safe_load(f) return config @@ -419,7 +419,7 @@ def config_gen(): @pytest.fixture(scope="session") def config_gen_4lst(): - config_path = resource_file("test_config_general_4LST.yaml") + config_path = resource_file("test_config_auto_MCP_4LST.yaml") with open(config_path, "rb") as f: config = yaml.safe_load(f) return config diff --git a/magicctapipe/resources/test_config_general.yaml b/magicctapipe/resources/test_config_auto_MCP.yaml similarity index 100% rename from magicctapipe/resources/test_config_general.yaml rename to magicctapipe/resources/test_config_auto_MCP.yaml diff --git a/magicctapipe/resources/test_config_general_4LST.yaml b/magicctapipe/resources/test_config_auto_MCP_4LST.yaml similarity index 100% rename from magicctapipe/resources/test_config_general_4LST.yaml rename to magicctapipe/resources/test_config_auto_MCP_4LST.yaml diff --git a/magicctapipe/scripts/lst1_magic/README.md b/magicctapipe/scripts/lst1_magic/README.md index 0acdb991..7b03be3b 100644 --- a/magicctapipe/scripts/lst1_magic/README.md +++ b/magicctapipe/scripts/lst1_magic/README.md @@ -31,9 +31,9 @@ During the analysis, some files (i.e., bash scripts, lists of sources and runs) In this step, we will convert the MAGIC Calibrated data to Data Level (DL) 1 (our goal is to reach DL3) and MC DL0 to DL1. -In your working IT Container directory (e.g. /fefs/aswg/workspace/yourname/yourprojectname), open your environment with the command `conda activate {env_name}` and update the file `config_general.yaml` according to your analysis. +In your working IT Container directory (e.g. /fefs/aswg/workspace/yourname/yourprojectname), open your environment with the command `conda activate {env_name}` and update the file `config_auto_MCP.yaml` according to your analysis. -The file `config_general.yaml` must contain the telescope IDs, the directories with the MC data (ignored if you set NSB_matching = true), the data selection, and some information on the night sky background (NSB) level and software versions: +The file `config_auto_MCP.yaml` must contain the telescope IDs, the directories with the MC data (ignored if you set NSB_matching = true), the data selection, and some information on the night sky background (NSB) level and software versions: ``` mc_tel_ids: @@ -85,9 +85,9 @@ WARNING: Only the runs for which the `LST_version` parameter matches the `proces WARNING: `env_name` must be the same as the name of the environment in which you installed this version of the pipeline -Now that the configuration file is ready, let's create a list with all the MAGIC+LST1 runs for the time window (or list of nights) defined on the config_general.yaml file: +Now that the configuration file is ready, let's create a list with all the MAGIC+LST1 runs for the time window (or list of nights) defined on the config_auto_MCP.yaml file: -> $ list_from_h5 -c config_general.yaml +> $ list_from_h5 -c config_auto_MCP.yaml The output in the terminal should look like this: ``` @@ -100,7 +100,7 @@ Finding MAGIC runs... And it will save the files {TARGET}_LST_runs.txt, {TARGET}_MAGIC_runs.txt, and list_sources.dat (i.e., the list of all the sources found in the database according to user and default options) in your current working directory. In case no runs are found for MAGIC and/or LST (for a source and a given time range/list of dates), a warning will be printed and no output text file will be produced for the given source and telescope(s). At this point, we can convert the MAGIC data into DL1 format with the following command: -> $ dl1_production -c config_general.yaml +> $ dl1_production -c config_auto_MCP.yaml The output in the terminal will be something like this: ``` @@ -134,7 +134,7 @@ or Once it is done, all of the subdirectories in `/fefs/aswg/workspace/yourname/yourprojectname/VERSION/{source}/DL1` will be filled with files of the type `dl1_MX.RunXXXXXX.0XX.h5` for each MAGIC subrun. The next step of the conversion from calibrated to DL1 is to merge all the MAGIC data files such that in the end, we have only one datafile per night. To do so, we run the following command (always in the directory `yourprojectname`): -> $ merging_runs (-c config_general.yaml) +> $ merging_runs (-c config_auto_MCP.yaml) **The command inside parenthesis is not mandatory if you are running the command in the working directory**. By the way, it is better if you don't use it unless you know what you are doing. The output in the terminal will be something like this: @@ -154,7 +154,7 @@ This script will merge the MAGIC data files in the following order: To find coincident events between MAGIC and LST, starting from DL1 data, we run the following command in the working directory: -> $ coincident_events (-c config_general.yaml) +> $ coincident_events (-c config_auto_MCP.yaml) This script creates the file config_coincidence.yaml containing the telescope IDs and the coincidence parameters listed in the general config.yaml file (the one in magicctapipe/resources). @@ -162,7 +162,7 @@ Then, matches LST and MAGIC dates and links the LST data files to the output dir Once it is done, we add stereo parameters to the MAGIC+LST coincident DL1 files by running: -> $ stereo_events (-c config_general.yaml) +> $ stereo_events (-c config_auto_MCP.yaml) This script creates the file config_stereo.yaml ontaining the telescope IDs and the stereo parameters listed in the general config.yaml file (the one in magicctapipe/resources). @@ -191,7 +191,7 @@ To create and update the MAGIC and LST databases (from the one produced by AB an - `lstchain_version`: this scripts loop over all the rows of the database, estract date and run number from the table and look for the data saved in the IT (i.e., which version of lstchain has been used to process a run). It evaluates all the versions used to process a run and the most recent MCP-compatible one according to a hard-coded, ordered list. Launched as `python lstchain_version.py` -- `nsb_level`: evaluates, for the last (MCP compatible) version of every LST run, the respective NSB value (i.e., the median over the NSB estimated by lstchain over approx. 25 sub-runs per run). This scripts launch a set of jobs (one per run; each job calls the `LSTnsb` script) and each jobs produces an output txt file containing a string like `date,run,NSB`; in the title of these files, both the run number and the NSB range are indicated (0.5=(0,0.75), 1.0=(0.75, 1.25),...., 2.5=(2.25,2.75), 3.0=(2.75,3.25), `high`=(3.25,Infinity) ). To limit the number of simultaneous jobs running on SLURM, you should always provide a begin and a end date (format YYYY_MM_DD) in the options. Launched as `python nsb_level.py -c config_general.yaml -b begin_date -e end_date` +- `nsb_level`: evaluates, for the last (MCP compatible) version of every LST run, the respective NSB value (i.e., the median over the NSB estimated by lstchain over approx. 25 sub-runs per run). This scripts launch a set of jobs (one per run; each job calls the `LSTnsb` script) and each jobs produces an output txt file containing a string like `date,run,NSB`; in the title of these files, both the run number and the NSB range are indicated (0.5=(0,0.75), 1.0=(0.75, 1.25),...., 2.5=(2.25,2.75), 3.0=(2.75,3.25), `high`=(3.25,Infinity) ). To limit the number of simultaneous jobs running on SLURM, you should always provide a begin and a end date (format YYYY_MM_DD) in the options. Launched as `python nsb_level.py -c config_auto_MCP.yaml -b begin_date -e end_date` - `LSTnsb`: called by `nsb_level`, it gathers all the subruns for a run, evaluates the NSB for approx. 25 of them (using the lstchain `calculate_noise_parameters` function), evaluates the median over these values and the approximate NSB level (0.5, 1.0, 1.5, ...., 2.5, 3.0, `high`) and then creates one txt file per run. These files contain the value of the NSB (i.e., the median over subruns) and are needed to fill the database `nsb` column diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py index ad792e1f..7b8893c5 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py @@ -180,7 +180,7 @@ def main(): "-c", dest="config_file", type=str, - default="./config_general.yaml", + default="./config_auto_MCP.yaml", help="Path to a configuration file", ) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/config_general.yaml b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/config_auto_MCP.yaml similarity index 100% rename from magicctapipe/scripts/lst1_magic/semi_automatic_scripts/config_general.yaml rename to magicctapipe/scripts/lst1_magic/semi_automatic_scripts/config_auto_MCP.yaml diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/LSTnsb.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/LSTnsb.py index b20866b8..126225ff 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/LSTnsb.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/LSTnsb.py @@ -89,7 +89,7 @@ def main(): "-c", dest="config_file", type=str, - default="./config_general.yaml", + default="./config_auto_MCP.yaml", help="Path to a configuration file", ) parser.add_argument( diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/nsb_level.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/nsb_level.py index f2f9d341..50f957a3 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/nsb_level.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/nsb_level.py @@ -80,7 +80,7 @@ def main(): "-c", dest="config_file", type=str, - default="./config_general.yaml", + default="./config_auto_MCP.yaml", help="Path to a configuration file", ) parser.add_argument( diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py index 232c38f6..38edec0b 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py @@ -422,7 +422,7 @@ def main(): "-c", dest="config_file", type=str, - default="./config_general.yaml", + default="./config_auto_MCP.yaml", help="Path to a configuration file", ) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/job_accounting.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/job_accounting.py index c9a1b2b8..32fa9591 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/job_accounting.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/job_accounting.py @@ -1,7 +1,7 @@ """ This script does checks of status of jobs based on the log files generated during the execution. It also does accounting of memory and CPU usage -It loads the config_general file to figure out what files it should look for and processes source name and time range +It loads the config_auto_MCP file to figure out what files it should look for and processes source name and time range For the moment it ignores date_list and skip_*_runs It can also update the h5 file with the list of runs to process @@ -55,8 +55,8 @@ def main(): "-c", dest="config_file", type=str, - default="./config_general.yaml", - help="Path to a configuration file config_general.yaml", + default="./config_auto_MCP.yaml", + help="Path to a configuration file config_auto_MCP.yaml", ) parser.add_argument( diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/list_from_h5.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/list_from_h5.py index 53674e31..c1eca310 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/list_from_h5.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/list_from_h5.py @@ -205,8 +205,8 @@ def main(): "-c", dest="config_file", type=str, - default="./config_general.yaml", - help="Path to a configuration file config_general.yaml", + default="./config_auto_MCP.yaml", + help="Path to a configuration file config_auto_MCP.yaml", ) args = parser.parse_args() diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merge_stereo.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merge_stereo.py index f4b08799..5a18caec 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merge_stereo.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merge_stereo.py @@ -92,7 +92,7 @@ def main(): "-c", dest="config_file", type=str, - default="./config_general.yaml", + default="./config_auto_MCP.yaml", help="Path to a configuration file", ) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py index 678cffd0..9c6dd651 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py @@ -226,7 +226,7 @@ def main(): "-c", dest="config_file", type=str, - default="./config_general.yaml", + default="./config_auto_MCP.yaml", help="Path to a configuration file", ) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py index ac7a973f..4ffd2530 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py @@ -211,7 +211,7 @@ def main(): "-c", dest="config_file", type=str, - default="./config_general.yaml", + default="./config_auto_MCP.yaml", help="Path to a configuration file", ) From 1f6cbbc323d29e6897a9142a491e62c1cb9b4e04 Mon Sep 17 00:00:00 2001 From: Elisa-Visentin Date: Mon, 5 Aug 2024 07:02:30 +0000 Subject: [PATCH 199/236] lint --- .../scripts/lst1_magic/semi_automatic_scripts/merging_runs.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py index 9c6dd651..e4426453 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py @@ -50,9 +50,6 @@ logger.setLevel(logging.INFO) - - - def split_train_test(target_dir, train_fraction): """ @@ -190,7 +187,6 @@ def mergeMC(target_dir, identification, env_name, cluster): process_size = len(list_of_nodes) - 1 - if cluster != "SLURM": logger.warning( "Automatic processing not implemented for the cluster indicated in the config file" From b5b84defb86e39053941175162dab4ceab514e39 Mon Sep 17 00:00:00 2001 From: Elisa-Visentin Date: Mon, 5 Aug 2024 13:35:54 +0000 Subject: [PATCH 200/236] updated readme --- magicctapipe/scripts/lst1_magic/README.md | 65 +++++++++++++---------- 1 file changed, 37 insertions(+), 28 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/README.md b/magicctapipe/scripts/lst1_magic/README.md index 7b03be3b..9da8b49e 100644 --- a/magicctapipe/scripts/lst1_magic/README.md +++ b/magicctapipe/scripts/lst1_magic/README.md @@ -31,12 +31,13 @@ During the analysis, some files (i.e., bash scripts, lists of sources and runs) In this step, we will convert the MAGIC Calibrated data to Data Level (DL) 1 (our goal is to reach DL3) and MC DL0 to DL1. -In your working IT Container directory (e.g. /fefs/aswg/workspace/yourname/yourprojectname), open your environment with the command `conda activate {env_name}` and update the file `config_auto_MCP.yaml` according to your analysis. +In your working IT Container directory (e.g. /fefs/aswg/workspace/yourname/yourprojectname), open your environment with the command `conda activate {env_name}` and update the file `config_auto_MCP.yaml` according to your analysis. If you need non-standard parameters (e.g., for the cleaning), take care that the `resources/config.yaml` file gets installed when you install the pipeline, so you will have to copy it, e.g. in your workspace, modify it and put the path to this new file in the `config_auto_MCP.yaml` (this way you don't need to install again the pipeline). The file `config_auto_MCP.yaml` must contain the telescope IDs, the directories with the MC data (ignored if you set NSB_matching = true), the data selection, and some information on the night sky background (NSB) level and software versions: ``` -mc_tel_ids: + + mc_tel_ids: LST-1: 1 LST-2: 0 LST-3: 0 @@ -56,8 +57,6 @@ directories: data_selection: source_name_database: "CrabNebula" # MUST BE THE SAME AS IN THE DATABASE; Set to null to process all sources in the given time range. source_name_output: 'Crabtest' # Name tag of your target. Used only if source_name_database != null. - target_RA_deg : 83.629 # RA in degrees; Set to null if source_name_database=null. - target_Dec_deg: 22.015 # Dec in degrees; set to null if source_name_database=null. time_range : True # Search for all runs in a LST time range (e.g., 2020_01_01 -> 2022_01_01). min : "2023_11_17" max : "2024_03_03" @@ -65,7 +64,8 @@ data_selection: skip_LST_runs: [3216,3217] # LST runs to ignore. skip_MAGIC_runs: [5094658] # MAGIC runs to ignore. -general: +general: + base_config_file: '' # path + name to a custom MCP config file. If not provided, the default config.yaml file will be used LST_version : "v0.10" # check the `processed_lstchain_file` version in the LST database! LST_tailcut : "tailcut84" focal_length : "effective" @@ -77,7 +77,7 @@ general: cluster : "SLURM" # cluster management system on which data are processed. At the moment we have only SLURM available, in the future maybe also condor (PIC, CNAF). NSB_matching : true # Set to false to process also the MCs. Set to true if adequate MC productions (DLx) are already available on the IT Container. NSB_MC : 0.5 # extra noise in dim pixels used to process MCs; e.g., you could put here the average NSB value of the processed LST runs. Ignored if NSB_matching=true. - + ``` @@ -85,7 +85,7 @@ WARNING: Only the runs for which the `LST_version` parameter matches the `proces WARNING: `env_name` must be the same as the name of the environment in which you installed this version of the pipeline -Now that the configuration file is ready, let's create a list with all the MAGIC+LST1 runs for the time window (or list of nights) defined on the config_auto_MCP.yaml file: +Now that the configuration file is ready, let's create a list with all the MAGIC+LST1 runs for the time window (or list of nights) defined on the `config_auto_MCP.yaml` file: > $ list_from_h5 -c config_auto_MCP.yaml @@ -97,17 +97,16 @@ Finding LST runs... Source: XXX Finding MAGIC runs... ``` -And it will save the files {TARGET}_LST_runs.txt, {TARGET}_MAGIC_runs.txt, and list_sources.dat (i.e., the list of all the sources found in the database according to user and default options) in your current working directory. In case no runs are found for MAGIC and/or LST (for a source and a given time range/list of dates), a warning will be printed and no output text file will be produced for the given source and telescope(s). +And it will save the files `{TARGET}_LST_runs.txt`, `{TARGET}_MAGIC_runs.txt`, and `list_sources.dat` (i.e., the list of all the sources found in the database according to both custom and default settings) in your current working directory. In case no runs are found for MAGIC and/or LST (for a source and a given time range/list of dates), a warning will be printed and no output text file will be produced for the given source and telescope(s). At this point, we can convert the MAGIC data into DL1 format with the following command: > $ dl1_production -c config_auto_MCP.yaml The output in the terminal will be something like this: ``` -*** Converting DL0 into DL1 data *** +*** Converting Calibrated into DL1 data *** Process name: {source} To check the jobs submitted to the cluster, type: squeue -n {source} -This process will take about 10 min to run if the IT cluster is free. ``` The command `dl1_production` does a series of things: @@ -119,7 +118,7 @@ The command `dl1_production` does a series of things: /fefs/aswg/workspace/yourname/yourprojectname/VERSION/{source}/DL1/[subdirectories] ``` where [subdirectories] stands for several subdirectories containing the MAGIC subruns in the DL1 format. -- Generates a configuration file called `config_DL0_to_DL1.yaml` with telescope ID information and adopted imaging/cleaning cuts, and puts it in the directory `[...]/yourprojectname/VERSION/Crab/` created in the previous step. +- Generates a configuration file called `config_DL0_to_DL1.yaml` with telescope ID information and adopted imaging/cleaning cuts, and puts it in the directory `[...]/yourprojectname/VERSION/{source}/` created in the previous step. - Links the MAGIC data addresses to their respective subdirectories defined in the previous steps. - Runs the script `magic_calib_to_dl1.py` for each one of the linked data files. @@ -132,11 +131,14 @@ or > $ squeue -u your_user_name -Once it is done, all of the subdirectories in `/fefs/aswg/workspace/yourname/yourprojectname/VERSION/{source}/DL1` will be filled with files of the type `dl1_MX.RunXXXXXX.0XX.h5` for each MAGIC subrun. The next step of the conversion from calibrated to DL1 is to merge all the MAGIC data files such that in the end, we have only one datafile per night. To do so, we run the following command (always in the directory `yourprojectname`): +Once it is done, all of the subdirectories in `/fefs/aswg/workspace/yourname/yourprojectname/VERSION/{source}/DL1` will be filled with files of the type `dl1_MX.RunXXXXXX.0XX.h5` for each MAGIC subrun. + +WARNING: some of these jobs could fail due to 'broken' input files: before moving to the next step, check for failed jobs (through `job_accounting` and/or log files) and remove the output files produced by these failed jobs (these output files will generally have a very small size, lower than few kB, and cannot be read in the following steps) + +The next step of the conversion from calibrated to DL1 is to merge all the MAGIC data files such that in the end, we have only one datafile per night. To do so, we run the following command (always in the directory `yourprojectname`): > $ merging_runs (-c config_auto_MCP.yaml) -**The command inside parenthesis is not mandatory if you are running the command in the working directory**. By the way, it is better if you don't use it unless you know what you are doing. The output in the terminal will be something like this: ``` ***** Generating merge_MAGIC bashscripts... @@ -145,10 +147,7 @@ Process name: merging_{source} To check the jobs submitted to the cluster, type: squeue -n merging_{source} ``` -This script will merge the MAGIC data files in the following order: -- MAGIC subruns are merged into single runs. -- MAGIC I and II runs are merged (only if both telescopes are available, of course). -- All runs in specific nights are merged, such that in the end we have only one datafile per night. +This script will merge MAGIC-I (and MAGIC-II) subruns into runs. ### Coincident events and stereo parameters on DL1 @@ -156,17 +155,21 @@ To find coincident events between MAGIC and LST, starting from DL1 data, we run > $ coincident_events (-c config_auto_MCP.yaml) -This script creates the file config_coincidence.yaml containing the telescope IDs and the coincidence parameters listed in the general config.yaml file (the one in magicctapipe/resources). +This script creates the file `config_coincidence.yaml` containing both the telescope IDs and the coincidence parameters listed in the general `config.yaml` file (the one in `magicctapipe/resources`). -Then, matches LST and MAGIC dates and links the LST data files to the output directory [...]DL1Coincident; eventually, it runs the script lst1_magic_event_coincidence.py in all of them. +Then, matches LST and MAGIC dates and links the LST data files to the output directory `[...]/DL1Coincident`; eventually, it runs the script `lst1_magic_event_coincidence.py` in all of them. Once it is done, we add stereo parameters to the MAGIC+LST coincident DL1 files by running: > $ stereo_events (-c config_auto_MCP.yaml) -This script creates the file config_stereo.yaml ontaining the telescope IDs and the stereo parameters listed in the general config.yaml file (the one in magicctapipe/resources). +This script creates the file `config_stereo.yaml` containing both the telescope IDs and the stereo parameters listed in the general `config.yaml` file (the one in `magicctapipe/resources`). + +It then creates the output directories for the DL1 with stereo parameters `[...]/DL1Stereo`, and then runs the script `lst1_magic_stereo_reco.py` in all of the coincident DL1 files. The stereo DL1 files are then saved in these directories. -It then creates the output directories for the DL1 with stereo parameters [...]DL1Stereo, and then runs the script lst1_magic_stereo_reco.py in all of the coincident DL1 files. The stereo DL1 files are then saved in these directories. +Eventually, to merge DL1 stereo (LST) subruns into runs, we run the `merge_stereo.py` script, whose output will be saved in `[...]/DL1Stereo/Merged`: + +> $ merge_stereo (-c config_auto_MCP.yaml) ### Random forest and DL1 to DL2 @@ -183,18 +186,24 @@ Since the DL3 may have only a few MBs, it is typically convenient to download it The folder [Notebooks](https://github.com/cta-observatory/magic-cta-pipe/tree/master/notebooks) contains Jupyter notebooks to perform checks on the IRF, to produce theta2 plots and SEDs. -## For mainteiners (creation of MAGIC and LST databases) +## For mainteiners (MAGIC and LST databases) To create and update the MAGIC and LST databases (from the one produced by AB and FDP) you should use the scripts in `database_production` -- `create_lst_table`: creates the LST database (1 row per LST run) by dropping some columns from the parent one (AB, FDP) and adding columns for NSB value (-1 by default), lstchain available versions, most recent lstchain version, processed file and NSB error codes (-1 by default). It could also be used to update the given database, possibly selecting a given time range from the parent databases (by the -b and -e parameters, which stand for begin and end date of the range). Launched as `python create_lst_table.py (-b YYYYMMDD -e YYYYMMDD)` +- `create_lst_table`: creates the LST database (1 row per LST run) by dropping some columns from the parent one (AB, FDP) and adding columns for NSB value (default: NaN), lstchain available versions, most recent lstchain version, processed file and NSB error codes (default: -1). It could also be used to update the given database, possibly selecting a given time range from the parent databases (by the -b and -e parameters, which stand for begin and end date of the range). Launched as `create_lst_table (-b YYYYMMDD -e YYYYMMDD)` + +- `lstchain_version`: this scripts loop over all the rows of the database, estract date and run number from the table and look for the data stored on the IT (i.e., which version of lstchain has been used to process a run). It evaluates all the versions used to process a run and the most recent MCP-compatible one according to a hard-coded, ordered list. Launched as `lstchain_version` + +- `nsb_level`: evaluates, for the last (MCP compatible) version of every LST run, the respective NSB value (i.e., the median over the NSB estimated by lstchain over a sub-set of sub-runs per run). This scripts launch a set of jobs (one per run; each job calls the `LSTnsb.py` script) and each jobs produces an output txt file containing a string like `date,run,NSB`; in the title of these files, both the run number and the NSB range are indicated (0.5=(0,0.75), 1.0=(0.75, 1.25),...., 2.5=(2.25,2.75), 3.0=(2.75,3.25), `high`=(3.25,Infinity) ). To limit the number of simultaneous jobs running on SLURM, the script requires that you provide a begin and a end date (-b and -e parameters) in the options. Launched as `nsb_level -c config_auto_MCP.yaml -b YYYY_MM_DD -e YYYY_MM_DD` + +- `LSTnsb`: called by `nsb_level`, it gathers all the subruns for a run, evaluates the NSB for a subset of them (using the lstchain `calculate_noise_parameters` function), evaluates the median over these values and the approximate NSB level according to the list provided in `config_auto_MCP.yaml` (e.g., 0.5, 1.0, 1.5, ...., 2.5, 3.0, `high`) and then creates one txt file per run. These files contain the value of the NSB (i.e., the median over subruns) and are needed to fill the `nsb` column in the LST database. Launched as `LSTnsb (-c MCP_config) -i run -d date -l lstchain_config (-s N_subruns)` + +- `nsb_to_h5`: this script reads the txt files created by `nsb_level` to know the NSB value for each run. This value is used to fill the `nsb` column of the LST database at the location of the respective run number. It also updates the error codes (0: NSB lower than 3.0, 1: NSB could not be evaluated, 2: NSB higher than 3.0). Launched as `nsb_to_h5` -- `lstchain_version`: this scripts loop over all the rows of the database, estract date and run number from the table and look for the data saved in the IT (i.e., which version of lstchain has been used to process a run). It evaluates all the versions used to process a run and the most recent MCP-compatible one according to a hard-coded, ordered list. Launched as `python lstchain_version.py` +- `update_magic_db`: this script updates (or creates, if it does not exist) the MAGIC database from a time range provided by the user (-m and -M parameters, which stand for minimum and maximum date). Not to accidentally destroy the current database, the updated database is saved as a new file instead of overwriting the current one. Launched as `update_magic_db -m YYYYMMDD -M YYYYMMDD` -- `nsb_level`: evaluates, for the last (MCP compatible) version of every LST run, the respective NSB value (i.e., the median over the NSB estimated by lstchain over approx. 25 sub-runs per run). This scripts launch a set of jobs (one per run; each job calls the `LSTnsb` script) and each jobs produces an output txt file containing a string like `date,run,NSB`; in the title of these files, both the run number and the NSB range are indicated (0.5=(0,0.75), 1.0=(0.75, 1.25),...., 2.5=(2.25,2.75), 3.0=(2.75,3.25), `high`=(3.25,Infinity) ). To limit the number of simultaneous jobs running on SLURM, you should always provide a begin and a end date (format YYYY_MM_DD) in the options. Launched as `python nsb_level.py -c config_auto_MCP.yaml -b begin_date -e end_date` +- `job_accounting`: this script (in `semi_automatic_scripts` directory) allows to track progress of the submitted jobs, in particular listing errors. If you don-t use the `--no-accounting` option, it also provides basic resource statistics (CPU and memory) of the completed jobs. Finally, it can be also used to update the database files with the progress of data processing. Launched as `job_accounting (-c config) (-d data_level) (-v MCP_version) (--no-accounting) (-r h5_database)` -- `LSTnsb`: called by `nsb_level`, it gathers all the subruns for a run, evaluates the NSB for approx. 25 of them (using the lstchain `calculate_noise_parameters` function), evaluates the median over these values and the approximate NSB level (0.5, 1.0, 1.5, ...., 2.5, 3.0, `high`) and then creates one txt file per run. These files contain the value of the NSB (i.e., the median over subruns) and are needed to fill the database `nsb` column +- `check_MAGIC_runs`: this script checks the MAGIC data stored on the IT (i.e., missing and existing data) in a given time range (-m and -M parameters, which stand for minimum and maximum date). Launched as `check_MAGIC_runs -m YYYYMMDD -M YYYYMMDD` -- `nsb_to_h5`: this script reads the txt files created by `nsb_level` to know the NSB value for each run. This value is used to fill the `nsb` column of the database at the location of the respective run number. It also updates the error codes (0: fine, 1: nsb=NaN, 2: NSB>3.0). Launched as `python nsb_to_h5.py` -- `job_accounting`: this script (in semi_automatic_scripts directory) allows to track progress of the submitted jobs, in particular listing errors. It also provides basic resource statistics (CPU and memory) of the completed jobs. Finally, it can be also used to update the database files with the progress of data processing From 19204ae79cd383da57f37c32e7bfda4dafec0433 Mon Sep 17 00:00:00 2001 From: Elisa-Visentin Date: Mon, 5 Aug 2024 13:36:12 +0000 Subject: [PATCH 201/236] fix docstring --- .../semi_automatic_scripts/database_production/nsb_level.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/nsb_level.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/nsb_level.py index 50f957a3..58c4caaf 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/nsb_level.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/nsb_level.py @@ -4,7 +4,7 @@ Moreover, it can modify the lstchain standard configuration file (used to evaluate NSB) by adding "use_flatfield_heuristic" = True Usage: -$ nsb_level (-c config.yaml -b begin -e end) +$ nsb_level (-c config.yaml -b YYYY_MM_DD -e YYYY_MM_DD) """ import argparse From d89f0d36bbe5496d512a06d04b2760f4cb8207ac Mon Sep 17 00:00:00 2001 From: Elisa-Visentin Date: Mon, 5 Aug 2024 13:38:43 +0000 Subject: [PATCH 202/236] allow user input dates and use database config --- .../check_MAGIC_runs.py | 42 ++++++++++++++++--- 1 file changed, 37 insertions(+), 5 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/check_MAGIC_runs.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/check_MAGIC_runs.py index b28e740b..52bca7b1 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/check_MAGIC_runs.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/check_MAGIC_runs.py @@ -11,10 +11,13 @@ """ import os +import argparse from datetime import datetime, timedelta import pandas as pd +from magicctapipe.io import resource_file + def fix_lists_and_convert(cell): """ @@ -175,16 +178,45 @@ def main(): """Main function.""" - # TO DO : set time interval - format YYYYMMDD - date_min = "20240601" - date_max = "20240630" + parser = argparse.ArgumentParser() + + date_min_default = "20191101" + current_datetime = datetime.now() + date_max_default = current_datetime.strftime("%Y%m%d") + parser.add_argument( + "--date-min", + "-m", + dest="date_min", + type=str, + default=date_min_default, + help="Start of the time interval (in LST convention, format YYYYMMDD).", + ) + + parser.add_argument( + "--date-max", + "-M", + dest="date_max", + type=str, + default=date_max_default, + help="End of the time interval (in LST convention, format YYYYMMDD).", + ) + + args = parser.parse_args() + + config = resource_file("database_config.yaml") + + with open(config, "rb") as bf: + config_dict = yaml.safe_load(bf) + df_path = config_dict["database_paths"]["MAGIC+LST1"] + df_key = config_dict["database_keys"]["MAGIC+LST1"] df = pd.read_hdf( - "/fefs/aswg/workspace/federico.dipierro/MAGIC_LST1_simultaneous_runs_info/simultaneous_obs_summary.h5", - key="str/table", + df_path, + key=df_key, ) tel_id = [1, 2] + database = table_magic_runs(df, date_min, date_max) database_exploded = database.explode("MAGIC runs") database_exploded_reset = database_exploded.reset_index(drop=True) From 5fd2a4ad80c8ea2238b924a7f0cd080288224e70 Mon Sep 17 00:00:00 2001 From: Elisa-Visentin Date: Mon, 5 Aug 2024 13:45:49 +0000 Subject: [PATCH 203/236] lint --- .../lst1_magic/semi_automatic_scripts/check_MAGIC_runs.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/check_MAGIC_runs.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/check_MAGIC_runs.py index 52bca7b1..fee74fc8 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/check_MAGIC_runs.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/check_MAGIC_runs.py @@ -10,11 +10,12 @@ 'YYYY', 'MM', and 'DD' specify the date. """ -import os import argparse +import os from datetime import datetime, timedelta import pandas as pd +import yaml from magicctapipe.io import resource_file @@ -217,7 +218,7 @@ def main(): tel_id = [1, 2] - database = table_magic_runs(df, date_min, date_max) + database = table_magic_runs(df, args.date_min, args.date_max) database_exploded = database.explode("MAGIC runs") database_exploded_reset = database_exploded.reset_index(drop=True) From d61d158e43e355fab9e4ee376c6de3b13d066e26 Mon Sep 17 00:00:00 2001 From: Elisa-Visentin Date: Tue, 6 Aug 2024 08:19:35 +0000 Subject: [PATCH 204/236] started removal MC --- .../semi_automatic_scripts/dl1_production.py | 368 +++--------------- .../semi_automatic_scripts/merging_runs.py | 199 ++-------- .../semi_automatic_scripts/stereo_events.py | 156 ++------ 3 files changed, 114 insertions(+), 609 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py index 38edec0b..8e39e38b 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py @@ -32,10 +32,8 @@ __all__ = [ "config_file_gen", - "lists_and_bash_generator", "lists_and_bash_gen_MAGIC", "directories_generator_real", - "directories_generator_MC", ] logger = logging.getLogger(__name__) @@ -43,7 +41,7 @@ logger.setLevel(logging.INFO) -def config_file_gen(target_dir, noise_value, NSB_match, source_name, config_gen): +def config_file_gen(target_dir, noise_value, source_name, config_gen): """ Here we create the configuration file needed for transforming DL0 into DL1 @@ -54,8 +52,6 @@ def config_file_gen(target_dir, noise_value, NSB_match, source_name, config_gen) Directory to store the results noise_value : list List of the noise correction values for LST - NSB_match : bool - If real data are matched to pre-processed MCs or not source_name : str Name of the target source config_gen : dict @@ -71,147 +67,18 @@ def config_file_gen(target_dir, noise_value, NSB_match, source_name, config_gen) LST_config = config_dict["LST"] MAGIC_config = config_dict["MAGIC"] - if not NSB_match: - LST_config["increase_nsb"]["extra_noise_in_dim_pixels"] = noise_value[0] - LST_config["increase_nsb"]["extra_bias_in_dim_pixels"] = noise_value[2] - LST_config["increase_nsb"]["extra_noise_in_bright_pixels"] = noise_value[1] + conf = { "mc_tel_ids": config_gen["mc_tel_ids"], "LST": LST_config, "MAGIC": MAGIC_config, } - if source_name == "MC": - file_name = f"{target_dir}/v{__version__}/MC/config_DL0_to_DL1.yaml" - else: - file_name = f"{target_dir}/v{__version__}/{source_name}/config_DL0_to_DL1.yaml" + + file_name = f"{target_dir}/v{__version__}/{source_name}/config_DL0_to_DL1.yaml" with open(file_name, "w") as f: yaml.dump(conf, f, default_flow_style=False) -def lists_and_bash_generator( - particle_type, - target_dir, - MC_path, - focal_length, - env_name, - cluster, -): - - """ - This function creates the lists list_nodes_*_complete.txt and list_folder_*.txt with the MC file paths. - After that, it generates a few bash scripts to link the MC paths to each subdirectory and to process them from DL0 to DL1. - These bash scripts will be called later in the main() function below. This step will be skipped in case the MC path has not been provided (MC_path='') - - Parameters - ---------- - particle_type : str - Particle type (e.g., protons) - target_dir : str - Directory to store the results - MC_path : str - Path to the MCs DL0s - focal_length : str - Focal length to be used to process MCs (e.g., 'nominal') - env_name : str - Name of the environment - cluster : str - Cluster system - """ - - if MC_path == "": - return - print(f"running {particle_type} from {MC_path}") - process_name = "MC" - - list_of_nodes = glob.glob(f"{MC_path}/node*") - dir1 = f"{target_dir}/v{__version__}/MC" - with open( - f"{dir1}/logs/list_nodes_{particle_type}_complete.txt", "w" - ) as f: # creating list_nodes_gammas_complete.txt - for i in list_of_nodes: - out_list = glob.glob(f"{i}/output*") - if len(out_list) == 0: - logger.error( - f"No output file for node {i}, or the directory structure is not the usual one. Skipping..." - ) - continue - elif len(out_list) == 1: - f.write(f"{out_list[0]}\n") - else: - output_index = input( - f"The available outputs are {out_list}, please provide the array index of the desired one:" - ) - f.write(f"{out_list[output_index]}\n") - - with open( - f"{dir1}/logs/list_folder_{particle_type}.txt", "w" - ) as f: # creating list_folder_gammas.txt - for i in list_of_nodes: - f.write(f'{i.split("/")[-1]}\n') - - #################################################################################### - # bash scripts that link the MC paths to each subdirectory. - #################################################################################### - if cluster != "SLURM": - logger.warning( - "Automatic processing not implemented for the cluster indicated in the config file" - ) - return - with open(f"linking_MC_{particle_type}_paths.sh", "w") as f: - slurm = slurm_lines( - queue="short", - job_name=process_name, - out_name=f"{dir1}/DL1/{particle_type}/logs/slurm-linkMC-%x.%j", - ) - lines_of_config_file = slurm + [ - "while read -r -u 3 lineA && read -r -u 4 lineB\n", - "do\n", - f" cd {dir1}/DL1/{particle_type}\n", - " mkdir $lineB\n", - " cd $lineA\n", - " ls -lR *.gz |wc -l\n", - f" mkdir -p {dir1}/DL1/{particle_type}/$lineB/logs/\n", - f" ls *.gz > {dir1}/DL1/{particle_type}/$lineB/logs/list_dl0.txt\n", - ' string=$lineA"/"\n', - f" export file={dir1}/DL1/{particle_type}/$lineB/logs/list_dl0.txt\n\n", - " cat $file | while read line; do echo $string${line}" - + f" >>{dir1}/DL1/{particle_type}/$lineB/logs/list_dl0_ok.txt; done\n\n", - ' echo "folder $lineB and node $lineA"\n', - f'done 3<"{dir1}/logs/list_nodes_{particle_type}_complete.txt" 4<"{dir1}/logs/list_folder_{particle_type}.txt"\n', - "", - ] - f.writelines(lines_of_config_file) - - ################################################################################################################ - # bash script that applies lst1_magic_mc_dl0_to_dl1.py to all MC data files. - ################################################################################################################ - - number_of_nodes = glob.glob(f"{MC_path}/node*") - number_of_nodes = len(number_of_nodes) - 1 - with open(f"linking_MC_{particle_type}_paths_r.sh", "w") as f: - slurm = slurm_lines( - queue="xxl", - job_name=process_name, - array=number_of_nodes, - mem="10g", - out_name=f"{dir1}/DL1/{particle_type}/logs/slurm-%x.%A_%a", - ) - lines_of_config_file = slurm + [ - f"cd {dir1}/DL1/{particle_type}\n\n", - f"export INF={dir1}/logs\n", - f"SAMPLE_LIST=($(<$INF/list_folder_{particle_type}.txt))\n", - "SAMPLE=${SAMPLE_LIST[${SLURM_ARRAY_TASK_ID}]}\n", - "cd $SAMPLE\n\n", - f"export LOG={dir1}/DL1/{particle_type}/logs/simtel_{{$SAMPLE}}_${{SLURM_ARRAY_JOB_ID}}_${{SLURM_ARRAY_TASK_ID}}_all.log\n", - "cat logs/list_dl0_ok.txt | while read line\n", - "do\n", - f" cd {dir1}/../\n", - f" conda run -n {env_name} lst1_magic_mc_dl0_to_dl1 --input-file $line --output-dir {dir1}/DL1/{particle_type}/$SAMPLE --config-file {dir1}/config_DL0_to_DL1.yaml --focal_length_choice {focal_length}>>$LOG 2>&1\n\n", - "done\n", - "", - ] - f.writelines(lines_of_config_file) - def lists_and_bash_gen_MAGIC( target_dir, telescope_ids, MAGIC_runs, source, env_name, cluster @@ -300,7 +167,7 @@ def lists_and_bash_gen_MAGIC( def directories_generator_real( - target_dir, telescope_ids, MAGIC_runs, NSB_match, source_name + target_dir, telescope_ids, MAGIC_runs, source_name ): """ Here we create all subdirectories for a given workspace and target name. @@ -313,38 +180,14 @@ def directories_generator_real( List of the telescope IDs (set by the user) MAGIC_runs : array MAGIC dates and runs to be processed - NSB_match : bool - If real data are matched to pre-processed MCs or not source_name : str Name of the target source """ - if NSB_match: - os.makedirs(f"{target_dir}/v{__version__}/{source_name}/DL1", exist_ok=True) - dl1_dir = str(f"{target_dir}/v{__version__}/{source_name}/DL1") - else: - - dl1_dir = str(f"{target_dir}/v{__version__}/{source_name}/DL1") - if not os.path.exists(f"{target_dir}/v{__version__}/{source_name}"): - os.makedirs( - f"{target_dir}/v{__version__}/{source_name}/DL1", - exist_ok=True, - ) - - else: - overwrite = input( - f'data directory for {target_dir.split("/")[-1]} already exists. Would you like to overwrite it? [only "y" or "n"]: ' - ) - if overwrite == "y": - os.system(f"rm -r {target_dir}/v{__version__}/{source_name}") - os.makedirs( - f"{target_dir}/v{__version__}/{source_name}/DL1", - exist_ok=True, - ) - - else: - print("Directory not modified.") - + + os.makedirs(f"{target_dir}/v{__version__}/{source_name}/DL1", exist_ok=True) + dl1_dir = str(f"{target_dir}/v{__version__}/{source_name}/DL1") + ########################################### # MAGIC ########################################### @@ -354,48 +197,6 @@ def directories_generator_real( os.makedirs(f"{dl1_dir}/M{magic}/{i[0]}/{i[1]}/logs", exist_ok=True) -def directories_generator_MC(target_dir, telescope_ids): - - """ - Here we create all subdirectories for a given workspace and target name. - - Parameters - ---------- - target_dir : str - Directory to store the results - telescope_ids : list - List of the telescope IDs (set by the user) - """ - - dir_list = [ - "gammas", - "gammadiffuse", - "electrons", - "protons", - "helium", - ] - if not os.path.exists(f"{target_dir}/v{__version__}/MC"): - os.makedirs(f"{target_dir}/v{__version__}/MC/logs", exist_ok=True) - os.makedirs(f"{target_dir}/v{__version__}/MC/DL1", exist_ok=True) - for dir in dir_list: - os.makedirs( - f"{target_dir}/v{__version__}/MC/DL1/{dir}/logs", - exist_ok=True, - ) - else: - overwrite = input( - 'MC directory already exists. Would you like to overwrite it? [only "y" or "n"]: ' - ) - if overwrite == "y": - os.system(f"rm -r {target_dir}/v{__version__}/MC") - os.makedirs(f"{target_dir}/v{__version__}/MC/logs", exist_ok=True) - for dir in dir_list: - os.makedirs( - f"{target_dir}/v{__version__}/MC/DL1/{dir}/logs", - exist_ok=True, - ) - else: - print("Directory not modified.") def main(): @@ -407,15 +208,7 @@ def main(): # Here we are simply collecting the parameters from the command line, as input file, output directory, and configuration file parser = argparse.ArgumentParser() - parser.add_argument( - "--analysis-type", - "-t", - choices=["onlyMAGIC", "onlyMC"], - dest="analysis_type", - type=str, - default="doEverything", - help="You can type 'onlyMAGIC' or 'onlyMC' to run this script only on MAGIC or MC data, respectively.", - ) + parser.add_argument( "--config-file", @@ -434,14 +227,8 @@ def main(): telescope_ids = list(config["mc_tel_ids"].values()) env_name = config["general"]["env_name"] - NSB_match = config["general"]["NSB_matching"] - - # LST_runs_and_dates = config["general"]["LST_runs"] - MC_gammas = config["directories"]["MC_gammas"] - MC_electrons = config["directories"]["MC_electrons"] - MC_helium = config["directories"]["MC_helium"] - MC_protons = config["directories"]["MC_protons"] - MC_gammadiff = config["directories"]["MC_gammadiff"] + + focal_length = config["general"]["focal_length"] source_in = config["data_selection"]["source_name_database"] source = config["data_selection"]["source_name_output"] @@ -454,97 +241,54 @@ def main(): else: source_list.append(source) noise_value = [0, 0, 0] - if not NSB_match: - nsb = config["general"]["NSB_MC"] - - noisebright = 1.15 * pow(nsb, 1.115) - biasdim = 0.358 * pow(nsb, 0.805) - noise_value = [nsb, noisebright, biasdim] - - if not NSB_match: - # Below we run the analysis on the MC data - if (args.analysis_type == "onlyMC") or (args.analysis_type == "doEverything"): - directories_generator_MC( - str(target_dir), telescope_ids - ) # Here we create all the necessary directories in the given workspace and collect the main directory of the target - config_file_gen( - target_dir, noise_value, NSB_match, "MC", config - ) # TODO: fix here - to_process = { - "gammas": MC_gammas, - "electrons": MC_electrons, - "helium": MC_helium, - "protons": MC_protons, - "gammadiffuse": MC_gammadiff, - } - for particle in to_process.keys(): - lists_and_bash_generator( - particle, - target_dir, - to_process[particle], - focal_length, - env_name, - cluster, - ) - list_of_MC = glob.glob(f"linking_MC_{particle}_*.sh") - if len(list_of_MC) < 2: - logger.warning( - f"No bash script has been produced for processing {particle}" - ) - else: - launch_jobs_MC = f"linking=$(sbatch --parsable linking_MC_{particle}_paths.sh) && running=$(sbatch --parsable --dependency=afterany:$linking linking_MC_{particle}_paths_r.sh)" - os.system(launch_jobs_MC) - # Here we do the MC DL0 to DL1 conversion: + + for source_name in source_list: - if ( - (args.analysis_type == "onlyMAGIC") - or (args.analysis_type == "doEverything") - or (NSB_match) - ): - - MAGIC_runs_and_dates = f"{source_name}_MAGIC_runs.txt" - MAGIC_runs = np.genfromtxt( - MAGIC_runs_and_dates, dtype=str, delimiter=",", ndmin=2 - ) # READ LIST OF DATES AND RUNS: format table where each line is like "2020_11_19,5093174" - - # TODO: fix here above - print("*** Converting Calibrated into DL1 data ***") - print(f"Process name: {source_name}") - print( - f"To check the jobs submitted to the cluster, type: squeue -n {source_name}" - ) - - directories_generator_real( - str(target_dir), telescope_ids, MAGIC_runs, NSB_match, source_name - ) # Here we create all the necessary directories in the given workspace and collect the main directory of the target - config_file_gen( - target_dir, noise_value, NSB_match, source_name, config - ) # TODO: fix here - - # Below we run the analysis on the MAGIC data - - lists_and_bash_gen_MAGIC( - target_dir, - telescope_ids, - MAGIC_runs, - source_name, - env_name, - cluster, - ) # MAGIC real data - if (telescope_ids[-2] > 0) or (telescope_ids[-1] > 0): - list_of_MAGIC_runs = glob.glob(f"{source_name}_MAGIC-*.sh") - if len(list_of_MAGIC_runs) < 1: - logger.warning( - "No bash script has been produced. Please check the provided MAGIC_runs.txt and the MAGIC calibrated data" - ) - continue + + + MAGIC_runs_and_dates = f"{source_name}_MAGIC_runs.txt" + MAGIC_runs = np.genfromtxt( + MAGIC_runs_and_dates, dtype=str, delimiter=",", ndmin=2 + ) # READ LIST OF DATES AND RUNS: format table where each line is like "2020_11_19,5093174" + + # TODO: fix here above + print("*** Converting Calibrated into DL1 data ***") + print(f"Process name: {source_name}") + print( + f"To check the jobs submitted to the cluster, type: squeue -n {source_name}" + ) + + directories_generator_real( + str(target_dir), telescope_ids, MAGIC_runs, source_name + ) # Here we create all the necessary directories in the given workspace and collect the main directory of the target + config_file_gen( + target_dir, noise_value, source_name, config + ) # TODO: fix here + + # Below we run the analysis on the MAGIC data + + lists_and_bash_gen_MAGIC( + target_dir, + telescope_ids, + MAGIC_runs, + source_name, + env_name, + cluster, + ) # MAGIC real data + if (telescope_ids[-2] > 0) or (telescope_ids[-1] > 0): + list_of_MAGIC_runs = glob.glob(f"{source_name}_MAGIC-*.sh") + if len(list_of_MAGIC_runs) < 1: + logger.warning( + "No bash script has been produced. Please check the provided MAGIC_runs.txt and the MAGIC calibrated data" + ) + continue - launch_jobs = f"linking=$(sbatch --parsable {source_name}_linking_MAGIC_data_paths.sh)" - for n, run in enumerate(list_of_MAGIC_runs): - launch_jobs = f"{launch_jobs} && RES{n}=$(sbatch --parsable --dependency=afterany:$linking {run})" + launch_jobs = f"linking=$(sbatch --parsable {source_name}_linking_MAGIC_data_paths.sh)" + for n, run in enumerate(list_of_MAGIC_runs): + launch_jobs = f"{launch_jobs} && RES{n}=$(sbatch --parsable --dependency=afterany:$linking {run})" - os.system(launch_jobs) + os.system(launch_jobs) if __name__ == "__main__": diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py index e4426453..c499e488 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py @@ -43,56 +43,13 @@ slurm_lines, ) -__all__ = ["split_train_test", "merge", "mergeMC"] +__all__ = ["merge"] logger = logging.getLogger(__name__) logger.addHandler(logging.StreamHandler()) logger.setLevel(logging.INFO) -def split_train_test(target_dir, train_fraction): - - """ - This function splits the MC proton sample in 2, i.e. the "test" and the "train" subsamples, in case you want to make performance studies on MC. For regular analyses, you can/should use the whole MC sample for training. - It generates 2 subdirectories in the directory .../DL1/MC/protons named "test" and "train" and creates sub-sub-directories with the names of all nodes. - For each node sub-sub-directory we move `train_fraction` of the .h5 files to the "train" subdirectory and `1-train_fraction` of the .h5 files to the "test" subdirectory. - - Parameters - ---------- - target_dir : str - Path to the working directory - train_fraction : float - Fraction of proton MC files to be used in the training of RFs - """ - - proton_dir = f"{target_dir}/v{__version__}/MC/DL1/protons" - - list_of_dir = np.sort(glob.glob(f"{proton_dir}/node*{os.path.sep}")) - - for directory in tqdm( - range(len(list_of_dir)) - ): # tqdm allows us to print a progessbar in the terminal - node = list_of_dir[directory].split("/")[-2] - os.makedirs(f"{proton_dir}/train/{node}", exist_ok=True) - os.makedirs( - f'{proton_dir}/../protons_test/{list_of_dir[directory].split("/")[-2]}', - exist_ok=True, - ) - list_of_runs = np.sort( - glob.glob(f'{proton_dir}/{list_of_dir[directory].split("/")[-2]}/*.h5') - ) - number_train_runs = int(len(list_of_runs) * train_fraction) - for j in list_of_runs[0:number_train_runs]: - os.system(f"mv {j} {proton_dir}/train/{node}") - - os.system(f"cp {list_of_dir[directory]}logs/*.txt {proton_dir}/train/{node}") - os.system( - f"mv {list_of_dir[directory]}logs/*.txt {proton_dir}/../protons_test/{node}" - ) - os.system( - f"mv {list_of_dir[directory]}*.h5 {proton_dir}/../protons_test/{node}" - ) - os.system(f"rm -r {list_of_dir[directory]}") def merge(target_dir, MAGIC_runs, env_name, source, cluster): @@ -154,60 +111,6 @@ def merge(target_dir, MAGIC_runs, env_name, source, cluster): logger.error(f"{indir} does not exist") -def mergeMC(target_dir, identification, env_name, cluster): - - """ - This function creates the bash scripts to run merge_hdf_files.py in all MC nodes. - - Parameters - ---------- - target_dir : str - Path to the working directory - identification : str - Tells which sample to process - env_name : str - Name of the environment - cluster : str - Cluster system - """ - - process_name = "merging_MC" - - MC_DL1_dir = f"{target_dir}/v{__version__}/MC/DL1" - os.makedirs(f"{MC_DL1_dir}/{identification}/Merged/logs", exist_ok=True) - - if identification == "protons": - list_of_nodes = np.sort(glob.glob(f"{MC_DL1_dir}/{identification}/train/node*")) - else: - list_of_nodes = np.sort(glob.glob(f"{MC_DL1_dir}/{identification}/node*")) - - np.savetxt( - f"{MC_DL1_dir}/{identification}/list_of_nodes.txt", list_of_nodes, fmt="%s" - ) - - process_size = len(list_of_nodes) - 1 - - if cluster != "SLURM": - logger.warning( - "Automatic processing not implemented for the cluster indicated in the config file" - ) - return - with open(f"Merge_MC_{identification}.sh", "w") as f: - slurm = slurm_lines( - queue="short", - array=process_size, - mem="7g", - job_name=process_name, - out_name=f"{MC_DL1_dir}/{identification}/Merged/logs/slurm-%x.%A_%a", - ) - lines_bash_file = slurm + [ - f"SAMPLE_LIST=($(<{MC_DL1_dir}/{identification}/list_of_nodes.txt))\n", - "SAMPLE=${SAMPLE_LIST[${SLURM_ARRAY_TASK_ID}]}\n", - f"export LOG={MC_DL1_dir}/{identification}/Merged/logs" - + "/merged_${SLURM_ARRAY_JOB_ID}_${SLURM_ARRAY_TASK_ID}.log\n", - f"conda run -n {env_name} merge_hdf_files --input-dir $SAMPLE --output-dir {MC_DL1_dir}/{identification}/Merged >$LOG 2>&1\n", - ] - f.writelines(lines_bash_file) def main(): @@ -226,16 +129,7 @@ def main(): help="Path to a configuration file", ) - parser.add_argument( - "--analysis-type", - "-t", - choices=["onlyMAGIC", "onlyMC"], - dest="analysis_type", - type=str, - default="doEverything", - help="You can type 'onlyMAGIC' or 'onlyMC' to run this script only on MAGIC or MC data, respectively.", - ) - + args = parser.parse_args() with open( args.config_file, "rb" @@ -258,42 +152,7 @@ def main(): else: source_list.append(source) - if not NSB_match: - if (args.analysis_type == "onlyMC") or (args.analysis_type == "doEverything"): - # Here we slice the proton MC data into "train" and "test" (but first we check if the directory already exists): - if not os.path.exists(f"{target_dir}/v{__version__}/MC/DL1/protons_test"): - print("***** Splitting protons into 'train' and 'test' datasets...") - split_train_test(target_dir, train_fraction) - - print("***** Generating merge_MC bashscripts...") - mergeMC( - target_dir, "protons", env_name, cluster - ) # generating the bash script to merge the files - mergeMC( - target_dir, "gammadiffuse", env_name, cluster - ) # generating the bash script to merge the files - mergeMC( - target_dir, "gammas", env_name, cluster - ) # generating the bash script to merge the files - mergeMC(target_dir, "protons_test", env_name, cluster) - mergeMC(target_dir, "helium", env_name, cluster) - mergeMC(target_dir, "electrons", env_name, cluster) - - print("***** Running merge_hdf_files.py on the MC data files...") - - # Below we run the bash scripts to merge the MC files - list_of_merging_scripts = np.sort(glob.glob("Merge_MC_*.sh")) - if len(list_of_merging_scripts) < 1: - logger.warning("No bash script has been produced for MC") - # TODO: check - - else: - launch_jobs = "" - for n, run in enumerate(list_of_merging_scripts): - launch_jobs += (" && " if n > 0 else "") + f"sbatch {run}" - - os.system(launch_jobs) - + for source_name in source_list: # Below we run the analysis on the MC data MAGIC_runs_and_dates = f"{source_name}_MAGIC_runs.txt" @@ -302,34 +161,30 @@ def main(): ) # Below we run the analysis on the MAGIC data - if ( - (args.analysis_type == "onlyMAGIC") - or (args.analysis_type == "doEverything") - or (NSB_match) - ): - print("***** Generating merge_MAGIC bashscripts...") - merge( - target_dir, - MAGIC_runs, - env_name, - source_name, - cluster, - ) # generating the bash script to merge the subruns - - print("***** Running merge_hdf_files.py on the MAGIC data files...") - - # Below we run the bash scripts to merge the MAGIC files - list_of_merging_scripts = np.sort( - glob.glob(f"{source_name}_Merge_MAGIC*.sh") - ) - if len(list_of_merging_scripts) < 1: - logger.warning("No bash scripts for real data") - continue - launch_jobs = "" - for n, run in enumerate(list_of_merging_scripts): - launch_jobs += (" && " if n > 0 else "") + f"sbatch {run}" - - os.system(launch_jobs) + + print("***** Generating merge_MAGIC bashscripts...") + merge( + target_dir, + MAGIC_runs, + env_name, + source_name, + cluster, + ) # generating the bash script to merge the subruns + + print("***** Running merge_hdf_files.py on the MAGIC data files...") + + # Below we run the bash scripts to merge the MAGIC files + list_of_merging_scripts = np.sort( + glob.glob(f"{source_name}_Merge_MAGIC*.sh") + ) + if len(list_of_merging_scripts) < 1: + logger.warning("No bash scripts for real data") + continue + launch_jobs = "" + for n, run in enumerate(list_of_merging_scripts): + launch_jobs += (" && " if n > 0 else "") + f"sbatch {run}" + + os.system(launch_jobs) print(f"Process name: merging_{source_name}") print( diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py index 4ffd2530..752b20a2 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py @@ -33,7 +33,7 @@ slurm_lines, ) -__all__ = ["configfile_stereo", "bash_stereo", "bash_stereoMC"] +__all__ = ["configfile_stereo", "bash_stereo"] logger = logging.getLogger(__name__) logger.addHandler(logging.StreamHandler()) @@ -67,10 +67,7 @@ def configfile_stereo(target_dir, source_name, config_gen): "mc_tel_ids": config_gen["mc_tel_ids"], "stereo_reco": config_dict["stereo_reco"], } - if source_name == "MC": - file_name = f"{target_dir}/v{__version__}/MC/config_stereo.yaml" - else: - file_name = f"{target_dir}/v{__version__}/{source_name}/config_stereo.yaml" + file_name = f"{target_dir}/v{__version__}/{source_name}/config_stereo.yaml" with open(file_name, "w") as f: yaml.dump(conf, f, default_flow_style=False) @@ -148,55 +145,6 @@ def bash_stereo(target_dir, source, env_name, cluster): f.writelines(lines) -def bash_stereoMC(target_dir, identification, env_name, cluster): - - """ - This function generates the bashscripts for running the stereo analysis. - - Parameters - ---------- - target_dir : str - Path to the working directory - identification : str - Particle name. Options: protons, gammadiffuse, gammas, protons_test - env_name : str - Name of the environment - cluster : str - Cluster system - """ - - process_name = "stereo_MC" - - inputdir = f"{target_dir}/v{__version__}/MC/DL1/{identification}/Merged" - os.makedirs(f"{inputdir}/StereoMerged/logs", exist_ok=True) - - os.system( - f"ls {inputdir}/dl1*.h5 > {inputdir}/list_coin.txt" - ) # generating a list with the DL1 coincident data files. - with open(f"{inputdir}/list_coin.txt", "r") as f: - process_size = len(f.readlines()) - 1 - if cluster != "SLURM": - logger.warning( - "Automatic processing not implemented for the cluster indicated in the config file" - ) - return - with open(f"StereoEvents_MC_{identification}.sh", "w") as f: - slurm = slurm_lines( - queue="xxl", - job_name=f"{process_name}_stereo", - array=process_size, - mem="7g", - out_name=f"{inputdir}/StereoMerged/logs/slurm-%x.%A_%a", - ) - lines = slurm + [ - f"export INPUTDIR={inputdir}\n", - f"export OUTPUTDIR={inputdir}/StereoMerged\n", - "SAMPLE_LIST=($(<$INPUTDIR/list_coin.txt))\n", - "SAMPLE=${SAMPLE_LIST[${SLURM_ARRAY_TASK_ID}]}\n", - "export LOG=$OUTPUTDIR/logs/stereo_${SLURM_ARRAY_JOB_ID}_${SLURM_ARRAY_TASK_ID}.log\n", - f"conda run -n {env_name} lst1_magic_stereo_reco --input-file $SAMPLE --output-dir $OUTPUTDIR --config-file {target_dir}/v{__version__}/MC/config_stereo.yaml >$LOG 2>&1", - ] - f.writelines(lines) def main(): @@ -215,16 +163,7 @@ def main(): help="Path to a configuration file", ) - parser.add_argument( - "--analysis-type", - "-t", - choices=["onlyMAGIC", "onlyMC"], - dest="analysis_type", - type=str, - default="doEverything", - help="You can type 'onlyMAGIC' or 'onlyMC' to run this script only on real or MC data, respectively.", - ) - + args = parser.parse_args() with open( args.config_file, "rb" @@ -245,68 +184,35 @@ def main(): source_list = joblib.load("list_sources.dat") else: source_list = [source] - if not NSB_match: - if ( - (args.analysis_type == "onlyMC") - or (args.analysis_type == "doEverything") - and not NSB_match - ): - configfile_stereo(target_dir, "MC", config) - print("***** Generating the bashscript for MCs...") - for part in [ - "gammadiffuse", - "gammas", - "protons", - "protons_test", - "helium", - "electrons", - ]: - bash_stereoMC(target_dir, part, env_name, cluster) - - list_of_stereo_scripts = np.sort(glob.glob("StereoEvents_MC_*.sh")) - if len(list_of_stereo_scripts) < 1: - logger.warning("No bash script has been produced for processing MCs") - else: - launch_jobs = "" - # TODO: check on N. bash scripts - - for n, run in enumerate(list_of_stereo_scripts): - launch_jobs += (" && " if n > 0 else "") + f"sbatch {run}" - - os.system(launch_jobs) - + for source_name in source_list: - if ( - (args.analysis_type == "onlyMAGIC") - or (args.analysis_type == "doEverything") - or (NSB_match) - ): - print("***** Generating file config_stereo.yaml...") - configfile_stereo(target_dir, source_name, config) - - # Below we run the analysis on the real data - - print("***** Generating the bashscript...") - bash_stereo(target_dir, source_name, env_name, cluster) - - print("***** Submitting processess to the cluster...") - print(f"Process name: {source_name}_stereo") - print( - f"To check the jobs submitted to the cluster, type: squeue -n {source_name}_stereo" - ) - - # Below we run the bash scripts to find the stereo events - list_of_stereo_scripts = np.sort( - glob.glob(f"{source_name}_StereoEvents*.sh") - ) - if len(list_of_stereo_scripts) < 1: - logger.warning("No bash scripts for real data") - continue - launch_jobs = "" - for n, run in enumerate(list_of_stereo_scripts): - launch_jobs += (" && " if n > 0 else "") + f"sbatch {run}" - - os.system(launch_jobs) + + print("***** Generating file config_stereo.yaml...") + configfile_stereo(target_dir, source_name, config) + + # Below we run the analysis on the real data + + print("***** Generating the bashscript...") + bash_stereo(target_dir, source_name, env_name, cluster) + + print("***** Submitting processess to the cluster...") + print(f"Process name: {source_name}_stereo") + print( + f"To check the jobs submitted to the cluster, type: squeue -n {source_name}_stereo" + ) + + # Below we run the bash scripts to find the stereo events + list_of_stereo_scripts = np.sort( + glob.glob(f"{source_name}_StereoEvents*.sh") + ) + if len(list_of_stereo_scripts) < 1: + logger.warning("No bash scripts for real data") + continue + launch_jobs = "" + for n, run in enumerate(list_of_stereo_scripts): + launch_jobs += (" && " if n > 0 else "") + f"sbatch {run}" + + os.system(launch_jobs) if __name__ == "__main__": From 7cc74d53fa3bdf3254bda3bcd3aa1e6338cfb6de Mon Sep 17 00:00:00 2001 From: Elisa-Visentin <121040436+Elisa-Visentin@users.noreply.github.com> Date: Tue, 6 Aug 2024 10:44:48 +0200 Subject: [PATCH 205/236] Fix doc string --- .../lst1_magic/semi_automatic_scripts/merging_runs.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py index e4426453..2ee2b40f 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py @@ -1,9 +1,7 @@ """ This script splits the proton MC data sample into "train" -and "test", deletes possible MC failed runs (only those files -that end up with a size < 1 kB), and generates the bash -scripts to merge MC and real data files by calling the script "merge_hdf_files.py" -in the following order: +and "test" and generates the bash scripts to merge MC and +real data files by calling the script "merge_hdf_files.py": MAGIC: From 5762f4bf1199941dfdbd211ef297b930cb4d956f Mon Sep 17 00:00:00 2001 From: Elisa-Visentin <121040436+Elisa-Visentin@users.noreply.github.com> Date: Tue, 6 Aug 2024 10:46:17 +0200 Subject: [PATCH 206/236] Remove wrong comment --- .../scripts/lst1_magic/semi_automatic_scripts/merging_runs.py | 1 - 1 file changed, 1 deletion(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py index 2ee2b40f..623f16bd 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py @@ -293,7 +293,6 @@ def main(): os.system(launch_jobs) for source_name in source_list: - # Below we run the analysis on the MC data MAGIC_runs_and_dates = f"{source_name}_MAGIC_runs.txt" MAGIC_runs = np.genfromtxt( MAGIC_runs_and_dates, dtype=str, delimiter=",", ndmin=2 From 02c4ca13e96122808bb33a85b05706a270f261f7 Mon Sep 17 00:00:00 2001 From: Elisa-Visentin Date: Tue, 6 Aug 2024 08:52:43 +0000 Subject: [PATCH 207/236] no MC --- .../config_auto_MCP.yaml | 14 +++--------- .../semi_automatic_scripts/dl1_production.py | 4 ++-- .../semi_automatic_scripts/merging_runs.py | 22 ++++--------------- .../semi_automatic_scripts/stereo_events.py | 14 +++--------- 4 files changed, 12 insertions(+), 42 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/config_auto_MCP.yaml b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/config_auto_MCP.yaml index 00380cfc..df381828 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/config_auto_MCP.yaml +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/config_auto_MCP.yaml @@ -8,12 +8,7 @@ mc_tel_ids: directories: workspace_dir : "/fefs/aswg/workspace/elisa.visentin/auto_MCP_PR/" # Output directory where all the data products will be saved. - # MC paths below are ignored if you set NSB_matching = true. - MC_gammas : "/fefs/aswg/data/mc/DL0/LSTProd2/TestDataset/sim_telarray" # set to "" if you don't want to process these Monte Carlo simulations. - MC_electrons : "" - MC_helium : "" - MC_protons : "/fefs/aswg/data/mc/DL0/LSTProd2/TrainingDataset/Protons/dec_2276/sim_telarray" - MC_gammadiff : "/fefs/aswg/data/mc/DL0/LSTProd2/TrainingDataset/GammaDiffuse/dec_2276/sim_telarray/" + data_selection: source_name_database: "CrabNebula" # MUST BE THE SAME AS IN THE DATABASE; Set to null to process all sources in the given time range. @@ -29,12 +24,9 @@ general: base_config_file: '' # path + name to a custom MCP config file. If not provided, the default config.yaml file will be used LST_version : "v0.10" # check the `processed_lstchain_file` version in the LST database! LST_tailcut : "tailcut84" - focal_length : "effective" simtel_nsb : "/fefs/aswg/data/mc/DL0/LSTProd2/TestDataset/sim_telarray/node_theta_14.984_az_355.158_/output_v1.4/simtel_corsika_theta_14.984_az_355.158_run10.simtel.gz" # simtel file (DL0) to evaluate NSB - lstchain_modified_config : true # use_flatfield_heuristic = True to evaluate NSB - proton_train_fraction : 0.8 # 0.8 means that 80% of the DL1 protons will be used for training the Random Forest. + lstchain_modified_config : true # use_flatfield_heuristic = True to evaluate NSB nsb : [0.5, 1.0, 1.5, 2.0, 2.5, 3.0] env_name : magic-lst # name of the conda environment to be used to process data. cluster : "SLURM" # cluster management system on which data are processed. At the moment we have only SLURM available, in the future maybe also condor (PIC, CNAF). - NSB_matching : true # Set to false to process also the MCs. Set to true if adequate MC productions (DLx) are already available on the IT Container. - NSB_MC : 0.5 # extra noise in dim pixels used to process MCs; e.g., you could put here the average NSB value of the processed LST runs. Ignored if NSB_matching=true. + diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py index 8e39e38b..48faf96b 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py @@ -11,7 +11,7 @@ No LST data is used here. Standard usage: -$ dl1_production (-t analysis_type) (-c config_file.yaml) +$ dl1_production (-c config_file.yaml) """ import argparse import glob @@ -229,7 +229,7 @@ def main(): env_name = config["general"]["env_name"] - focal_length = config["general"]["focal_length"] + source_in = config["data_selection"]["source_name_database"] source = config["data_selection"]["source_name_output"] cluster = config["general"]["cluster"] diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py index c499e488..5afe02c1 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py @@ -1,29 +1,16 @@ """ -This script splits the proton MC data sample into "train" -and "test", deletes possible MC failed runs (only those files -that end up with a size < 1 kB), and generates the bash -scripts to merge MC and real data files by calling the script "merge_hdf_files.py" -in the following order: +This script generates the bash +scripts to merge real data files by calling the script "merge_hdf_files.py": MAGIC: Merge the subruns into runs for M1 and M2 individually. -MC: - -Merges all MC runs in a node Usage: -$ merging_runs (-c config.yaml) (-t analysis_type) - -If you want to merge only the MAGIC or only the MC data, -you can do as follows: +$ merging_runs (-c config.yaml) -Only MAGIC: -$ merging_runs -t onlyMAGIC (-c config.yaml) -Only MC: -$ merging_runs -t onlyMC (-c config.yaml) """ import argparse @@ -138,7 +125,7 @@ def main(): target_dir = Path(config["directories"]["workspace_dir"]) - NSB_match = config["general"]["NSB_matching"] + train_fraction = float(config["general"]["proton_train_fraction"]) env_name = config["general"]["env_name"] @@ -154,7 +141,6 @@ def main(): source_list.append(source) for source_name in source_list: - # Below we run the analysis on the MC data MAGIC_runs_and_dates = f"{source_name}_MAGIC_runs.txt" MAGIC_runs = np.genfromtxt( MAGIC_runs_and_dates, dtype=str, delimiter=",", ndmin=2 diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py index 752b20a2..bb7c5dff 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py @@ -1,19 +1,11 @@ """ This scripts generates and runs the bashscripts -to compute the stereo parameters of DL1 MC and +to compute the stereo parameters of DL1 Coincident MAGIC+LST data files. Usage: -$ stereo_events (-c config.yaml) (-t analysis_type) +$ stereo_events (-c config.yaml) -If you want to compute the stereo parameters only the real data or only the MC data, -you can do as follows: - -Only real data: -$ stereo_events -t onlyMAGIC (-c config.yaml) - -Only MC: -$ stereo_events -t onlyMC (-c config.yaml) """ import argparse @@ -174,7 +166,7 @@ def main(): env_name = config["general"]["env_name"] - NSB_match = config["general"]["NSB_matching"] + source_in = config["data_selection"]["source_name_database"] source = config["data_selection"]["source_name_output"] From 0cd7b7a342ce9e22a025367c3d511f9a4f9616e8 Mon Sep 17 00:00:00 2001 From: Elisa-Visentin Date: Wed, 7 Aug 2024 06:40:44 +0000 Subject: [PATCH 208/236] remove MC --- .../lst1_magic/semi_automatic_scripts/dl1_production.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py index 48faf96b..58aaf1eb 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py @@ -41,7 +41,7 @@ logger.setLevel(logging.INFO) -def config_file_gen(target_dir, noise_value, source_name, config_gen): +def config_file_gen(target_dir, source_name, config_gen): """ Here we create the configuration file needed for transforming DL0 into DL1 @@ -50,8 +50,6 @@ def config_file_gen(target_dir, noise_value, source_name, config_gen): ---------- target_dir : path Directory to store the results - noise_value : list - List of the noise correction values for LST source_name : str Name of the target source config_gen : dict @@ -240,7 +238,6 @@ def main(): else: source_list.append(source) - noise_value = [0, 0, 0] @@ -263,7 +260,7 @@ def main(): str(target_dir), telescope_ids, MAGIC_runs, source_name ) # Here we create all the necessary directories in the given workspace and collect the main directory of the target config_file_gen( - target_dir, noise_value, source_name, config + target_dir, source_name, config ) # TODO: fix here # Below we run the analysis on the MAGIC data From e922a3a80b015713a4c5c807c791015ee989a3ad Mon Sep 17 00:00:00 2001 From: Elisa-Visentin <121040436+Elisa-Visentin@users.noreply.github.com> Date: Thu, 8 Aug 2024 11:57:28 +0200 Subject: [PATCH 209/236] fix order columns --- .../database_production/nsb_to_h5.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/nsb_to_h5.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/nsb_to_h5.py index b881d8be..269b1541 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/nsb_to_h5.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/nsb_to_h5.py @@ -49,6 +49,19 @@ def collect_nsb(df_LST): df_LST.loc[run, "nsb"] = float(nsb) df_LST = df_LST.reset_index() + df_LST = df_LST[[ + "DATE", + "source", + "LST1_run", + "MAGIC_stereo", + "MAGIC_trigger", + "MAGIC_HV", + "nsb", + "lstchain_versions", + "last_lstchain_file", + "processed_lstchain_file", + "error_code_nsb", + ]] return df_LST From 61ce4b9fc0a00796fb76a5110f318bc5f6e6eb1c Mon Sep 17 00:00:00 2001 From: Elisa-Visentin <121040436+Elisa-Visentin@users.noreply.github.com> Date: Thu, 8 Aug 2024 11:59:20 +0200 Subject: [PATCH 210/236] Update nsb_to_h5.py --- .../database_production/nsb_to_h5.py | 28 ++++++++++--------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/nsb_to_h5.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/nsb_to_h5.py index 269b1541..27a9bc0c 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/nsb_to_h5.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/nsb_to_h5.py @@ -49,19 +49,21 @@ def collect_nsb(df_LST): df_LST.loc[run, "nsb"] = float(nsb) df_LST = df_LST.reset_index() - df_LST = df_LST[[ - "DATE", - "source", - "LST1_run", - "MAGIC_stereo", - "MAGIC_trigger", - "MAGIC_HV", - "nsb", - "lstchain_versions", - "last_lstchain_file", - "processed_lstchain_file", - "error_code_nsb", - ]] + df_LST = df_LST[ + [ + "DATE", + "source", + "LST1_run", + "MAGIC_stereo", + "MAGIC_trigger", + "MAGIC_HV", + "nsb", + "lstchain_versions", + "last_lstchain_file", + "processed_lstchain_file", + "error_code_nsb", + ] + ] return df_LST From 7368b27b29e7362b3635fc5f68746096966261fa Mon Sep 17 00:00:00 2001 From: Elisa-Visentin Date: Thu, 8 Aug 2024 13:43:37 +0000 Subject: [PATCH 211/236] skip broken subruns --- .../database_production/LSTnsb.py | 41 ++++++++++++++++--- 1 file changed, 35 insertions(+), 6 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/LSTnsb.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/LSTnsb.py index 126225ff..c19cabc8 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/LSTnsb.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/LSTnsb.py @@ -20,6 +20,26 @@ logger.addHandler(logging.StreamHandler()) logger.setLevel(logging.INFO) +def update_mod(mod, n_sub, denominator, index, n_noise) + """ + Function to update the step used to extract the subruns for the NSB evaluation + + Parameters + ---------- + mod : int + Sampling step + n_sub : int + Number of subruns in the run + denominator : int + Number of subruns to be used to evaluate NSB for a run + index : int + Index of the currently used subrun + n_noise : int + Number of NSB values already computed + """ + if n_sub > denominator: + mod = (n_sub - index) // (denominator - n_noise) + return mod def nsb(run_list, simtel, lst_config, run_number, denominator): @@ -56,7 +76,7 @@ def nsb(run_list, simtel, lst_config, run_number, denominator): mod = 1 else: mod = len(run_list) // denominator - failed = 0 + logger.info("NSB levels (sub-runs): \n") for ii in range(0, len(run_list)): subrun = run_list[ii].split(".")[-2] @@ -65,12 +85,21 @@ def nsb(run_list, simtel, lst_config, run_number, denominator): if ii % mod == 0: try: a, _, _ = calculate_noise_parameters(simtel, run_list[ii], lst_config) - noise.append(a) - logger.info(a) + if a is not None: + if a>0.0: + noise.append(a) + logger.info(a) + else: + mod = update_mod(mod, len(run_list), denominator, ii, len(noise)) + logger.warning(f'NSB level could not be adequately evaluated for subrun {subrun} (negative value or missing pedestal events): skipping this subrun...') + else: + mod = update_mod(mod, len(run_list), denominator, ii, len(noise)) + logger.warning(f'NSB level is None for subrun {subrun} (missing interleaved FF): skipping this subrun...') + + except IndexError: - failed = failed + 1 - if len(run_list) > denominator: - mod = (len(run_list) - ii) // (denominator - len(noise)) + + mod = update_mod(mod, len(run_list), denominator, ii, len(noise)) logger.warning( f"Subrun {subrun} caused an error in the NSB level evaluation for run {run_number}. Check reports before using it" ) From 5472c2b29a144003e14bce430ad919968e020dff Mon Sep 17 00:00:00 2001 From: Elisa-Visentin Date: Fri, 9 Aug 2024 06:40:03 +0000 Subject: [PATCH 212/236] minor fixes --- .../database_production/LSTnsb.py | 38 ++++++++++++------- 1 file changed, 25 insertions(+), 13 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/LSTnsb.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/LSTnsb.py index c19cabc8..b230808f 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/LSTnsb.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/LSTnsb.py @@ -20,14 +20,15 @@ logger.addHandler(logging.StreamHandler()) logger.setLevel(logging.INFO) -def update_mod(mod, n_sub, denominator, index, n_noise) + +def update_mod(mod, n_sub, denominator, index, n_noise): """ Function to update the step used to extract the subruns for the NSB evaluation Parameters ---------- mod : int - Sampling step + Sampling step n_sub : int Number of subruns in the run denominator : int @@ -36,10 +37,16 @@ def update_mod(mod, n_sub, denominator, index, n_noise) Index of the currently used subrun n_noise : int Number of NSB values already computed + + Returns + ------- + int + Sampling step """ if n_sub > denominator: mod = (n_sub - index) // (denominator - n_noise) - return mod + return mod + def nsb(run_list, simtel, lst_config, run_number, denominator): @@ -72,11 +79,11 @@ def nsb(run_list, simtel, lst_config, run_number, denominator): "There is no subrun matching the provided run number. Check the list of the LST runs (LST_runs.txt)" ) return - if len(run_list) < denominator: + if len(run_list) <= denominator: mod = 1 else: mod = len(run_list) // denominator - + logger.info("NSB levels (sub-runs): \n") for ii in range(0, len(run_list)): subrun = run_list[ii].split(".")[-2] @@ -86,19 +93,24 @@ def nsb(run_list, simtel, lst_config, run_number, denominator): try: a, _, _ = calculate_noise_parameters(simtel, run_list[ii], lst_config) if a is not None: - if a>0.0: + if a > 0.0: noise.append(a) logger.info(a) else: - mod = update_mod(mod, len(run_list), denominator, ii, len(noise)) - logger.warning(f'NSB level could not be adequately evaluated for subrun {subrun} (negative value or missing pedestal events): skipping this subrun...') - else: + mod = update_mod( + mod, len(run_list), denominator, ii, len(noise) + ) + logger.warning( + f"NSB level could not be adequately evaluated for subrun {subrun} (negative value or missing pedestal events): skipping this subrun..." + ) + else: mod = update_mod(mod, len(run_list), denominator, ii, len(noise)) - logger.warning(f'NSB level is None for subrun {subrun} (missing interleaved FF): skipping this subrun...') - + logger.warning( + f"NSB level is None for subrun {subrun} (missing interleaved FF): skipping this subrun..." + ) except IndexError: - + mod = update_mod(mod, len(run_list), denominator, ii, len(noise)) logger.warning( f"Subrun {subrun} caused an error in the NSB level evaluation for run {run_number}. Check reports before using it" @@ -187,7 +199,7 @@ def main(): logger.info(f"Run n. {run_number}, NSB median {median_NSB}") for j in range(0, len(nsb_list)): - if (median_NSB < nsb_limit[j + 1]) & (median_NSB > nsb_limit[j]): + if (median_NSB <= nsb_limit[j + 1]) & (median_NSB > nsb_limit[j]): with open(f"nsb_LST_{nsb_list[j]}_{run_number}.txt", "a+") as f: f.write(f"{date},{run_number},{median_NSB}\n") if median_NSB > nsb_limit[-1]: From 26a63ef9456fefcbc77ad1e71b334c723e00a127 Mon Sep 17 00:00:00 2001 From: Elisa-Visentin Date: Fri, 9 Aug 2024 07:10:28 +0000 Subject: [PATCH 213/236] minor fixes --- .../semi_automatic_scripts/dl1_production.py | 23 ++++--------------- .../semi_automatic_scripts/merging_runs.py | 17 +++----------- .../semi_automatic_scripts/stereo_events.py | 17 ++++---------- 3 files changed, 12 insertions(+), 45 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py index 58aaf1eb..49a07fd4 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py @@ -65,19 +65,17 @@ def config_file_gen(target_dir, source_name, config_gen): LST_config = config_dict["LST"] MAGIC_config = config_dict["MAGIC"] - conf = { "mc_tel_ids": config_gen["mc_tel_ids"], "LST": LST_config, "MAGIC": MAGIC_config, } - + file_name = f"{target_dir}/v{__version__}/{source_name}/config_DL0_to_DL1.yaml" with open(file_name, "w") as f: yaml.dump(conf, f, default_flow_style=False) - def lists_and_bash_gen_MAGIC( target_dir, telescope_ids, MAGIC_runs, source, env_name, cluster ): @@ -164,9 +162,7 @@ def lists_and_bash_gen_MAGIC( f.writelines(lines) -def directories_generator_real( - target_dir, telescope_ids, MAGIC_runs, source_name -): +def directories_generator_real(target_dir, telescope_ids, MAGIC_runs, source_name): """ Here we create all subdirectories for a given workspace and target name. @@ -182,10 +178,9 @@ def directories_generator_real( Name of the target source """ - os.makedirs(f"{target_dir}/v{__version__}/{source_name}/DL1", exist_ok=True) dl1_dir = str(f"{target_dir}/v{__version__}/{source_name}/DL1") - + ########################################### # MAGIC ########################################### @@ -195,8 +190,6 @@ def directories_generator_real( os.makedirs(f"{dl1_dir}/M{magic}/{i[0]}/{i[1]}/logs", exist_ok=True) - - def main(): """ @@ -206,7 +199,6 @@ def main(): # Here we are simply collecting the parameters from the command line, as input file, output directory, and configuration file parser = argparse.ArgumentParser() - parser.add_argument( "--config-file", @@ -226,8 +218,6 @@ def main(): telescope_ids = list(config["mc_tel_ids"].values()) env_name = config["general"]["env_name"] - - source_in = config["data_selection"]["source_name_database"] source = config["data_selection"]["source_name_output"] cluster = config["general"]["cluster"] @@ -238,11 +228,8 @@ def main(): else: source_list.append(source) - - for source_name in source_list: - MAGIC_runs_and_dates = f"{source_name}_MAGIC_runs.txt" MAGIC_runs = np.genfromtxt( @@ -259,9 +246,7 @@ def main(): directories_generator_real( str(target_dir), telescope_ids, MAGIC_runs, source_name ) # Here we create all the necessary directories in the given workspace and collect the main directory of the target - config_file_gen( - target_dir, source_name, config - ) # TODO: fix here + config_file_gen(target_dir, source_name, config) # TODO: fix here # Below we run the analysis on the MAGIC data diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py index 2627ee30..83e1a22c 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py @@ -19,7 +19,6 @@ import joblib import numpy as np import yaml -from tqdm import tqdm from magicctapipe import __version__ from magicctapipe.scripts.lst1_magic.semi_automatic_scripts.clusters import ( @@ -34,8 +33,6 @@ logger.setLevel(logging.INFO) - - def merge(target_dir, MAGIC_runs, env_name, source, cluster): """ @@ -95,8 +92,6 @@ def merge(target_dir, MAGIC_runs, env_name, source, cluster): logger.error(f"{indir} does not exist") - - def main(): """ @@ -113,7 +108,6 @@ def main(): help="Path to a configuration file", ) - args = parser.parse_args() with open( args.config_file, "rb" @@ -122,9 +116,6 @@ def main(): target_dir = Path(config["directories"]["workspace_dir"]) - - train_fraction = float(config["general"]["proton_train_fraction"]) - env_name = config["general"]["env_name"] source_in = config["data_selection"]["source_name_database"] source = config["data_selection"]["source_name_output"] @@ -136,7 +127,7 @@ def main(): else: source_list.append(source) - + for source_name in source_list: MAGIC_runs_and_dates = f"{source_name}_MAGIC_runs.txt" MAGIC_runs = np.genfromtxt( @@ -144,7 +135,7 @@ def main(): ) # Below we run the analysis on the MAGIC data - + print("***** Generating merge_MAGIC bashscripts...") merge( target_dir, @@ -157,9 +148,7 @@ def main(): print("***** Running merge_hdf_files.py on the MAGIC data files...") # Below we run the bash scripts to merge the MAGIC files - list_of_merging_scripts = np.sort( - glob.glob(f"{source_name}_Merge_MAGIC*.sh") - ) + list_of_merging_scripts = np.sort(glob.glob(f"{source_name}_Merge_MAGIC*.sh")) if len(list_of_merging_scripts) < 1: logger.warning("No bash scripts for real data") continue diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py index bb7c5dff..2547316d 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py @@ -1,11 +1,10 @@ """ This scripts generates and runs the bashscripts -to compute the stereo parameters of DL1 +to compute the stereo parameters of DL1 Coincident MAGIC+LST data files. Usage: -$ stereo_events (-c config.yaml) - +$ stereo_events (-c config.yaml) """ import argparse @@ -137,8 +136,6 @@ def bash_stereo(target_dir, source, env_name, cluster): f.writelines(lines) - - def main(): """ @@ -155,7 +152,6 @@ def main(): help="Path to a configuration file", ) - args = parser.parse_args() with open( args.config_file, "rb" @@ -166,7 +162,6 @@ def main(): env_name = config["general"]["env_name"] - source_in = config["data_selection"]["source_name_database"] source = config["data_selection"]["source_name_output"] @@ -176,9 +171,9 @@ def main(): source_list = joblib.load("list_sources.dat") else: source_list = [source] - + for source_name in source_list: - + print("***** Generating file config_stereo.yaml...") configfile_stereo(target_dir, source_name, config) @@ -194,9 +189,7 @@ def main(): ) # Below we run the bash scripts to find the stereo events - list_of_stereo_scripts = np.sort( - glob.glob(f"{source_name}_StereoEvents*.sh") - ) + list_of_stereo_scripts = np.sort(glob.glob(f"{source_name}_StereoEvents*.sh")) if len(list_of_stereo_scripts) < 1: logger.warning("No bash scripts for real data") continue From 98795eabab5f1bd2d4cff67e4b9dca280efecc43 Mon Sep 17 00:00:00 2001 From: Elisa-Visentin Date: Fri, 9 Aug 2024 07:35:55 +0000 Subject: [PATCH 214/236] init and setup fixes --- .../semi_automatic_scripts/__init__.py | 40 ++++++++++++++----- .../check_MAGIC_runs.py | 7 ++++ .../database_production/__init__.py | 14 +++++-- .../update_MAGIC_database.py | 2 + .../semi_automatic_scripts/job_accounting.py | 2 + .../semi_automatic_scripts/list_from_h5.py | 2 + setup.cfg | 3 +- 7 files changed, 55 insertions(+), 15 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/__init__.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/__init__.py index 2cc82cf9..22dc6ca2 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/__init__.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/__init__.py @@ -1,4 +1,10 @@ -from .clusters import slurm_lines +from .check_MAGIC_runs import ( + existing_files, + fix_lists_and_convert, + missing_files, + table_magic_runs, +) +from .clusters import rc_lines, slurm_lines from .coincident_events import configfile_coincidence, linking_bash_lst from .dl1_production import ( config_file_gen, @@ -7,24 +13,36 @@ lists_and_bash_gen_MAGIC, lists_and_bash_generator, ) +from .job_accounting import run_shell +from .list_from_h5 import clear_files, list_run, magic_date, split_lst_date from .merge_stereo import MergeStereo from .merging_runs import merge, mergeMC, split_train_test from .stereo_events import bash_stereo, bash_stereoMC, configfile_stereo __all__ = [ - "split_train_test", - "merge", - "mergeMC", + "bash_stereo", + "bash_stereoMC", + "clear_files", + "configfile_coincidence", + "configfile_stereo", "config_file_gen", - "lists_and_bash_generator", - "lists_and_bash_gen_MAGIC", "directories_generator_real", "directories_generator_MC", - "configfile_coincidence", + "existing_files", + "fix_lists_and_convert", "linking_bash_lst", - "configfile_stereo", - "bash_stereo", - "bash_stereoMC", - "slurm_lines", + "lists_and_bash_generator", + "lists_and_bash_gen_MAGIC", + "list_run", + "magic_date", + "merge", + "mergeMC", "MergeStereo", + "missing_files", + "rc_lines", + "run_shell", + "slurm_lines", + "split_lst_date", + "split_train_test", + "table_magic_runs", ] diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/check_MAGIC_runs.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/check_MAGIC_runs.py index fee74fc8..aac5900a 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/check_MAGIC_runs.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/check_MAGIC_runs.py @@ -19,6 +19,13 @@ from magicctapipe.io import resource_file +__all__ = [ + "fix_lists_and_convert", + "table_magic_runs", + "existing_files", + "missing_files", +] + def fix_lists_and_convert(cell): """ diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/__init__.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/__init__.py index 23129c35..9957c9db 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/__init__.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/__init__.py @@ -2,11 +2,19 @@ from .LSTnsb import nsb from .nsb_level import bash_scripts from .nsb_to_h5 import collect_nsb +from .update_MAGIC_database import ( + fix_lists_and_convert, + table_magic_runs, + update_tables, +) __all__ = [ - "nsb", "bash_scripts", - "version_lstchain", - "lstchain_versions", "collect_nsb", + "fix_lists_and_convert", + "lstchain_versions", + "nsb", + "table_magic_runs", + "update_tables", + "version_lstchain", ] diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/update_MAGIC_database.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/update_MAGIC_database.py index 628de890..1c0e297f 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/update_MAGIC_database.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/update_MAGIC_database.py @@ -25,6 +25,8 @@ from magicctapipe.io import resource_file +__all__ = ["fix_lists_and_convert", "table_magic_runs", "update_tables"] + def fix_lists_and_convert(cell): """ diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/job_accounting.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/job_accounting.py index 32fa9591..70ecf212 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/job_accounting.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/job_accounting.py @@ -25,6 +25,8 @@ RED = "\033[31m" ENDC = "\033[0m" +__all__ = ["run_shell"] + def run_shell(command): """ diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/list_from_h5.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/list_from_h5.py index c1eca310..c2a4e94c 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/list_from_h5.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/list_from_h5.py @@ -13,6 +13,8 @@ from magicctapipe.io import resource_file +__all__ = ["split_lst_date", "magic_date", "clear_files", "list_run"] + def split_lst_date(df): diff --git a/setup.cfg b/setup.cfg index edebaff4..ecd18a55 100644 --- a/setup.cfg +++ b/setup.cfg @@ -96,6 +96,7 @@ console_scripts = magic_calib_to_dl1 = magicctapipe.scripts.lst1_magic.magic_calib_to_dl1:main merge_hdf_files = magicctapipe.scripts.lst1_magic.merge_hdf_files:main tune_magic_nsb = magicctapipe.scripts.lst1_magic.tune_magic_nsb:main + check_MAGIC_runs = magicctapipe.scripts.lst1_magic.semi_automatic_scripts.check_MAGIC_runs:main coincident_events = magicctapipe.scripts.lst1_magic.semi_automatic_scripts.coincident_events:main create_LST_table = magicctapipe.scripts.lst1_magic.semi_automatic_scripts.database_production.create_LST_table:main dl1_production = magicctapipe.scripts.lst1_magic.semi_automatic_scripts.dl1_production:main @@ -108,7 +109,7 @@ console_scripts = nsb_level = magicctapipe.scripts.lst1_magic.semi_automatic_scripts.database_production.nsb_level:main nsb_to_h5 = magicctapipe.scripts.lst1_magic.semi_automatic_scripts.database_production.nsb_to_h5:main stereo_events = magicctapipe.scripts.lst1_magic.semi_automatic_scripts.stereo_events:main - + update_MAGIC_database = magicctapipe.scripts.lst1_magic.semi_automatic_scripts.database_production.update_MAGIC_database:main [tool:pytest] minversion=3.0 From e37f16280065a9ec0894bc79e5f52d8ff9afb7e3 Mon Sep 17 00:00:00 2001 From: Elisa-Visentin Date: Fri, 9 Aug 2024 08:21:31 +0000 Subject: [PATCH 215/236] fix db paths --- magicctapipe/resources/database_config.yaml | 11 +++++------ .../database_production/create_LST_table.py | 8 ++++---- 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/magicctapipe/resources/database_config.yaml b/magicctapipe/resources/database_config.yaml index fb9c2976..19369883 100644 --- a/magicctapipe/resources/database_config.yaml +++ b/magicctapipe/resources/database_config.yaml @@ -1,13 +1,12 @@ database_paths: - input_1: "/fefs/aswg/workspace/federico.dipierro/simultaneous_obs_summary-apr2024.h5" - input_2: "/home/alessio.berti/MAGIC-LST_common/runfile/simultaneous_obs_summary.h5" + MAGIC+LST1: "/fefs/aswg/workspace/federico.dipierro/MAGIC_LST1_simultaneous_runs_info/simultaneous_obs_summary.h5" + MAGIC+LST1_bis: "/home/alessio.berti/MAGIC-LST_common/runfile/simultaneous_obs_summary.h5" MAGIC: '/fefs/aswg/workspace/joanna.wojtowicz/data/Common_MAGIC_LST1_data_MAGIC_runs_subruns.h5' LST: "/fefs/aswg/workspace/elisa.visentin/auto_MCP_PR/observations_LST.h5" - MAGIC+LST1: "/fefs/aswg/workspace/federico.dipierro/MAGIC_LST1_simultaneous_runs_info/simultaneous_obs_summary.h5" + database_keys: - input_1: '/str' - input_2: '/str' + MAGIC+LST1: '/str/table' + MAGIC+LST1_bis: '/str' MAGIC-I: "MAGIC1/runs_M1" MAGIC-II: "MAGIC2/runs_M2" LST: "joint_obs" - MAGIC+LST1: "str/table" diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/create_LST_table.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/create_LST_table.py index 6636c086..9fd1c069 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/create_LST_table.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/create_LST_table.py @@ -54,12 +54,12 @@ def main(): out_key = config_dict["database_keys"]["LST"] df = pd.read_hdf( - config_dict["database_paths"]["input_1"], - key=config_dict["database_keys"]["input_1"], + config_dict["database_paths"]["MAGIC+LST1"], + key=config_dict["database_keys"]["MAGIC+LST1"], ) # TODO: put this file in a shared folder df2 = pd.read_hdf( - config_dict["database_paths"]["input_2"], - key=config_dict["database_keys"]["input_2"], + config_dict["database_paths"]["MAGIC+LST1_bis"], + key=config_dict["database_keys"]["MAGIC+LST1_bis"], ) # TODO: put this file in a shared folder df = pd.concat([df, df2]).drop_duplicates(subset="LST1_run", keep="first") if args.begin != 0: From 04e6d6f9553380ecde303f52408c7658bb48de73 Mon Sep 17 00:00:00 2001 From: Elisa-Visentin Date: Fri, 9 Aug 2024 13:26:16 +0000 Subject: [PATCH 216/236] remove MC --- .../lst1_magic/semi_automatic_scripts/__init__.py | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/__init__.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/__init__.py index 2cc82cf9..27768690 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/__init__.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/__init__.py @@ -2,29 +2,22 @@ from .coincident_events import configfile_coincidence, linking_bash_lst from .dl1_production import ( config_file_gen, - directories_generator_MC, directories_generator_real, lists_and_bash_gen_MAGIC, - lists_and_bash_generator, ) from .merge_stereo import MergeStereo -from .merging_runs import merge, mergeMC, split_train_test -from .stereo_events import bash_stereo, bash_stereoMC, configfile_stereo +from .merging_runs import merge +from .stereo_events import bash_stereo, configfile_stereo __all__ = [ - "split_train_test", "merge", - "mergeMC", "config_file_gen", - "lists_and_bash_generator", "lists_and_bash_gen_MAGIC", "directories_generator_real", - "directories_generator_MC", "configfile_coincidence", "linking_bash_lst", "configfile_stereo", "bash_stereo", - "bash_stereoMC", "slurm_lines", "MergeStereo", ] From 51633b8c3adec417e217a2f87f6d7c9ae2fc1599 Mon Sep 17 00:00:00 2001 From: Elisa-Visentin Date: Fri, 9 Aug 2024 13:38:03 +0000 Subject: [PATCH 217/236] check number pedestal events --- .../database_production/LSTnsb.py | 22 ++++++++++++------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/LSTnsb.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/LSTnsb.py index b230808f..fccc4455 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/LSTnsb.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/LSTnsb.py @@ -9,8 +9,8 @@ import argparse import glob import logging - import numpy as np +import pandas as pd import yaml from lstchain.image.modifier import calculate_noise_parameters @@ -97,12 +97,18 @@ def nsb(run_list, simtel, lst_config, run_number, denominator): noise.append(a) logger.info(a) else: - mod = update_mod( - mod, len(run_list), denominator, ii, len(noise) - ) - logger.warning( - f"NSB level could not be adequately evaluated for subrun {subrun} (negative value or missing pedestal events): skipping this subrun..." - ) + df_subrun=pd.read_hdf(run_list[ii], key="dl1/event/telescope/parameters/LST_LSTCam") + n_ped=len(df_subrun[df_subrun['event_type']==2]) + if n_ped > 0: + noise.append(a) + logger.info(a) + else: + mod = update_mod( + mod, len(run_list), denominator, ii, len(noise) + ) + logger.warning( + f"NSB level could not be adequately evaluated for subrun {subrun} (missing pedestal events): skipping this subrun..." + ) else: mod = update_mod(mod, len(run_list), denominator, ii, len(noise)) logger.warning( @@ -178,7 +184,7 @@ def main(): width = [a / 2 - b / 2 for a, b in zip(nsb_list[1:], nsb_list[:-1])] width.append(0.25) nsb_limit = [a + b for a, b in zip(nsb_list[:], width[:])] - nsb_limit.insert(0, 0) + nsb_limit.insert(0,-0.01) #arbitrary small negative number so that 0.0 > nsb_limit[0] LST_files = np.sort(glob.glob(f"nsb_LST_*_{run_number}.txt")) From 66c0046ce12a85999da8bfcd166660e6119f6b97 Mon Sep 17 00:00:00 2001 From: Elisa-Visentin Date: Fri, 9 Aug 2024 13:47:15 +0000 Subject: [PATCH 218/236] lint --- .../database_production/LSTnsb.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/LSTnsb.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/LSTnsb.py index fccc4455..0e2d7a38 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/LSTnsb.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/LSTnsb.py @@ -9,6 +9,7 @@ import argparse import glob import logging + import numpy as np import pandas as pd import yaml @@ -97,8 +98,11 @@ def nsb(run_list, simtel, lst_config, run_number, denominator): noise.append(a) logger.info(a) else: - df_subrun=pd.read_hdf(run_list[ii], key="dl1/event/telescope/parameters/LST_LSTCam") - n_ped=len(df_subrun[df_subrun['event_type']==2]) + df_subrun = pd.read_hdf( + run_list[ii], + key="dl1/event/telescope/parameters/LST_LSTCam", + ) + n_ped = len(df_subrun[df_subrun["event_type"] == 2]) if n_ped > 0: noise.append(a) logger.info(a) @@ -184,7 +188,9 @@ def main(): width = [a / 2 - b / 2 for a, b in zip(nsb_list[1:], nsb_list[:-1])] width.append(0.25) nsb_limit = [a + b for a, b in zip(nsb_list[:], width[:])] - nsb_limit.insert(0,-0.01) #arbitrary small negative number so that 0.0 > nsb_limit[0] + nsb_limit.insert( + 0, -0.01 + ) # arbitrary small negative number so that 0.0 > nsb_limit[0] LST_files = np.sort(glob.glob(f"nsb_LST_*_{run_number}.txt")) From db5f63d13eb1d5c620ef5339a5a639a1b2278411 Mon Sep 17 00:00:00 2001 From: Elisa-Visentin Date: Fri, 9 Aug 2024 14:22:51 +0000 Subject: [PATCH 219/236] check on source out name --- .../lst1_magic/semi_automatic_scripts/coincident_events.py | 6 ++++-- .../lst1_magic/semi_automatic_scripts/dl1_production.py | 6 ++++-- .../lst1_magic/semi_automatic_scripts/list_from_h5.py | 3 ++- .../lst1_magic/semi_automatic_scripts/merge_stereo.py | 6 +++--- .../lst1_magic/semi_automatic_scripts/merging_runs.py | 6 ++++-- .../lst1_magic/semi_automatic_scripts/stereo_events.py | 2 ++ 6 files changed, 19 insertions(+), 10 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py index 7b8893c5..aac4c3b7 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py @@ -196,14 +196,16 @@ def main(): source_in = config["data_selection"]["source_name_database"] source = config["data_selection"]["source_name_output"] + cluster = config["general"]["cluster"] - source_list = [] if source_in is None: source_list = joblib.load("list_sources.dat") else: - source_list.append(source) + if source is None: + source=source_in + source_list = [source] for source_name in source_list: print("***** Generating file config_coincidence.yaml...") diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py index 38edec0b..9c6d6032 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py @@ -447,12 +447,14 @@ def main(): source = config["data_selection"]["source_name_output"] cluster = config["general"]["cluster"] target_dir = Path(config["directories"]["workspace_dir"]) - source_list = [] + if source_in is None: source_list = joblib.load("list_sources.dat") else: - source_list.append(source) + if source is None: + source=source_in + source_list = [source] noise_value = [0, 0, 0] if not NSB_match: nsb = config["general"]["NSB_MC"] diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/list_from_h5.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/list_from_h5.py index c2a4e94c..baa60796 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/list_from_h5.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/list_from_h5.py @@ -230,6 +230,8 @@ def main(): MAGIC2_key = config_dict["database_keys"]["MAGIC-II"] source_in = config["data_selection"]["source_name_database"] source_out = config["data_selection"]["source_name_output"] + if (source_out is None) and (source_in is not None): + source_out=source_in range = config["data_selection"]["time_range"] skip_LST = config["data_selection"]["skip_LST_runs"] skip_MAGIC = config["data_selection"]["skip_MAGIC_runs"] @@ -301,7 +303,6 @@ def main(): df_MAGIC2 = magic_date(df_MAGIC2) df_MAGIC1 = magic_date(df_MAGIC1) - # df_MAGIC2 = df_MAGIC2.rename(columns={"Source": "source"}) M1_runs = df_MAGIC1["Run ID"].tolist() if (len(M1_runs) == 0) or (len(df_MAGIC2) == 0): diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merge_stereo.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merge_stereo.py index 5a18caec..79f8d5be 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merge_stereo.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merge_stereo.py @@ -110,13 +110,13 @@ def main(): source = config["data_selection"]["source_name_output"] cluster = config["general"]["cluster"] - source_list = [] - if source_in is None: source_list = joblib.load("list_sources.dat") else: - source_list.append(source) + if source is None: + source=source_in + source_list = [source] for source_name in source_list: diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py index 623f16bd..070eb714 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py @@ -250,12 +250,14 @@ def main(): source = config["data_selection"]["source_name_output"] cluster = config["general"]["cluster"] - source_list = [] + if source_in is None: source_list = joblib.load("list_sources.dat") else: - source_list.append(source) + if source is None: + source=source_in + source_list = [source] if not NSB_match: if (args.analysis_type == "onlyMC") or (args.analysis_type == "doEverything"): # Here we slice the proton MC data into "train" and "test" (but first we check if the directory already exists): diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py index 4ffd2530..2e9f509f 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py @@ -244,6 +244,8 @@ def main(): if source_in is None: source_list = joblib.load("list_sources.dat") else: + if source is None: + source=source_in source_list = [source] if not NSB_match: if ( From 99365d66623f508070d0420ba37bc0b12279dee7 Mon Sep 17 00:00:00 2001 From: Elisa-Visentin Date: Fri, 9 Aug 2024 14:23:23 +0000 Subject: [PATCH 220/236] lint --- .../lst1_magic/semi_automatic_scripts/coincident_events.py | 2 +- .../lst1_magic/semi_automatic_scripts/dl1_production.py | 4 ++-- .../scripts/lst1_magic/semi_automatic_scripts/list_from_h5.py | 2 +- .../scripts/lst1_magic/semi_automatic_scripts/merge_stereo.py | 2 +- .../scripts/lst1_magic/semi_automatic_scripts/merging_runs.py | 3 +-- .../lst1_magic/semi_automatic_scripts/stereo_events.py | 2 +- 6 files changed, 7 insertions(+), 8 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py index aac4c3b7..21a65a90 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py @@ -204,7 +204,7 @@ def main(): else: if source is None: - source=source_in + source = source_in source_list = [source] for source_name in source_list: diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py index 9c6d6032..5513161d 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py @@ -447,13 +447,13 @@ def main(): source = config["data_selection"]["source_name_output"] cluster = config["general"]["cluster"] target_dir = Path(config["directories"]["workspace_dir"]) - + if source_in is None: source_list = joblib.load("list_sources.dat") else: if source is None: - source=source_in + source = source_in source_list = [source] noise_value = [0, 0, 0] if not NSB_match: diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/list_from_h5.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/list_from_h5.py index baa60796..f847fc71 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/list_from_h5.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/list_from_h5.py @@ -231,7 +231,7 @@ def main(): source_in = config["data_selection"]["source_name_database"] source_out = config["data_selection"]["source_name_output"] if (source_out is None) and (source_in is not None): - source_out=source_in + source_out = source_in range = config["data_selection"]["time_range"] skip_LST = config["data_selection"]["skip_LST_runs"] skip_MAGIC = config["data_selection"]["skip_MAGIC_runs"] diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merge_stereo.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merge_stereo.py index 79f8d5be..1833498f 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merge_stereo.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merge_stereo.py @@ -115,7 +115,7 @@ def main(): else: if source is None: - source=source_in + source = source_in source_list = [source] for source_name in source_list: diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py index 070eb714..b2fb9086 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py @@ -250,13 +250,12 @@ def main(): source = config["data_selection"]["source_name_output"] cluster = config["general"]["cluster"] - if source_in is None: source_list = joblib.load("list_sources.dat") else: if source is None: - source=source_in + source = source_in source_list = [source] if not NSB_match: if (args.analysis_type == "onlyMC") or (args.analysis_type == "doEverything"): diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py index 2e9f509f..4cc04391 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py @@ -245,7 +245,7 @@ def main(): source_list = joblib.load("list_sources.dat") else: if source is None: - source=source_in + source = source_in source_list = [source] if not NSB_match: if ( From ba4af77a73a9090c0ee4e9c9531517f5183ec001 Mon Sep 17 00:00:00 2001 From: Elisa-Visentin Date: Fri, 9 Aug 2024 15:01:54 +0000 Subject: [PATCH 221/236] use tel id from config.yaml --- .../semi_automatic_scripts/coincident_events.py | 13 +++++++------ .../semi_automatic_scripts/config_auto_MCP.yaml | 8 -------- .../database_production/LSTnsb.py | 2 +- .../database_production/nsb_level.py | 2 +- .../semi_automatic_scripts/dl1_production.py | 16 ++++++++-------- .../semi_automatic_scripts/stereo_events.py | 14 +++++++------- 6 files changed, 24 insertions(+), 31 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py index 7b8893c5..eb6450a4 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/coincident_events.py @@ -38,7 +38,7 @@ logger.setLevel(logging.INFO) -def configfile_coincidence(target_dir, source_name, config_gen): +def configfile_coincidence(target_dir, source_name, config_file): """ This function creates the configuration file needed for the event coincidence step @@ -49,10 +49,10 @@ def configfile_coincidence(target_dir, source_name, config_gen): Path to the working directory source_name : str Name of the target source - config_gen : dict - Dictionary of the entries of the general configuration file + config_file : str + Path to MCP configuration file (e.g., resources/config.yaml) """ - config_file = config_gen["general"]["base_config_file"] + if config_file == "": config_file = resource_file("config.yaml") @@ -62,7 +62,7 @@ def configfile_coincidence(target_dir, source_name, config_gen): config_dict = yaml.safe_load(fc) conf = { - "mc_tel_ids": config_gen["mc_tel_ids"], + "mc_tel_ids": config_dict["mc_tel_ids"], "event_coincidence": config_dict["event_coincidence"], } @@ -193,6 +193,7 @@ def main(): env_name = config["general"]["env_name"] LST_version = config["general"]["LST_version"] + config_file = config["general"]["base_config_file"] source_in = config["data_selection"]["source_name_database"] source = config["data_selection"]["source_name_output"] @@ -207,7 +208,7 @@ def main(): for source_name in source_list: print("***** Generating file config_coincidence.yaml...") - configfile_coincidence(target_dir, source_name, config) + configfile_coincidence(target_dir, source_name, config_file) LST_runs_and_dates = f"{source_name}_LST_runs.txt" LST_runs = np.genfromtxt(LST_runs_and_dates, dtype=str, delimiter=",", ndmin=2) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/config_auto_MCP.yaml b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/config_auto_MCP.yaml index 00380cfc..509585ba 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/config_auto_MCP.yaml +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/config_auto_MCP.yaml @@ -1,11 +1,3 @@ -mc_tel_ids: - LST-1: 1 - LST-2: 0 - LST-3: 0 - LST-4: 0 - MAGIC-I: 2 - MAGIC-II: 3 - directories: workspace_dir : "/fefs/aswg/workspace/elisa.visentin/auto_MCP_PR/" # Output directory where all the data products will be saved. # MC paths below are ignored if you set NSB_matching = true. diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/LSTnsb.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/LSTnsb.py index 126225ff..df4bafc1 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/LSTnsb.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/LSTnsb.py @@ -89,7 +89,7 @@ def main(): "-c", dest="config_file", type=str, - default="./config_auto_MCP.yaml", + default="../config_auto_MCP.yaml", help="Path to a configuration file", ) parser.add_argument( diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/nsb_level.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/nsb_level.py index 58c4caaf..802b9964 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/nsb_level.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/nsb_level.py @@ -80,7 +80,7 @@ def main(): "-c", dest="config_file", type=str, - default="./config_auto_MCP.yaml", + default="../config_auto_MCP.yaml", help="Path to a configuration file", ) parser.add_argument( diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py index 38edec0b..9b028806 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py @@ -43,7 +43,7 @@ logger.setLevel(logging.INFO) -def config_file_gen(target_dir, noise_value, NSB_match, source_name, config_gen): +def config_file_gen(target_dir, noise_value, NSB_match, source_name, config_file): """ Here we create the configuration file needed for transforming DL0 into DL1 @@ -58,10 +58,10 @@ def config_file_gen(target_dir, noise_value, NSB_match, source_name, config_gen) If real data are matched to pre-processed MCs or not source_name : str Name of the target source - config_gen : dict - Dictionary of the entries of the general configuration file + config_file : str + Path to MCP configuration file (e.g., resources/config.yaml) """ - config_file = config_gen["general"]["base_config_file"] + if config_file == "": config_file = resource_file("config.yaml") with open( @@ -76,7 +76,7 @@ def config_file_gen(target_dir, noise_value, NSB_match, source_name, config_gen) LST_config["increase_nsb"]["extra_bias_in_dim_pixels"] = noise_value[2] LST_config["increase_nsb"]["extra_noise_in_bright_pixels"] = noise_value[1] conf = { - "mc_tel_ids": config_gen["mc_tel_ids"], + "mc_tel_ids": config_dict["mc_tel_ids"], "LST": LST_config, "MAGIC": MAGIC_config, } @@ -435,8 +435,8 @@ def main(): telescope_ids = list(config["mc_tel_ids"].values()) env_name = config["general"]["env_name"] NSB_match = config["general"]["NSB_matching"] + config_file = config["general"]["base_config_file"] - # LST_runs_and_dates = config["general"]["LST_runs"] MC_gammas = config["directories"]["MC_gammas"] MC_electrons = config["directories"]["MC_electrons"] MC_helium = config["directories"]["MC_helium"] @@ -468,7 +468,7 @@ def main(): str(target_dir), telescope_ids ) # Here we create all the necessary directories in the given workspace and collect the main directory of the target config_file_gen( - target_dir, noise_value, NSB_match, "MC", config + target_dir, noise_value, NSB_match, "MC", config_file ) # TODO: fix here to_process = { "gammas": MC_gammas, @@ -519,7 +519,7 @@ def main(): str(target_dir), telescope_ids, MAGIC_runs, NSB_match, source_name ) # Here we create all the necessary directories in the given workspace and collect the main directory of the target config_file_gen( - target_dir, noise_value, NSB_match, source_name, config + target_dir, noise_value, NSB_match, source_name, config_file ) # TODO: fix here # Below we run the analysis on the MAGIC data diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py index 4ffd2530..3bba62c0 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py @@ -40,7 +40,7 @@ logger.setLevel(logging.INFO) -def configfile_stereo(target_dir, source_name, config_gen): +def configfile_stereo(target_dir, source_name, config_file): """ This function creates the configuration file needed for the stereo reconstruction step @@ -51,11 +51,10 @@ def configfile_stereo(target_dir, source_name, config_gen): Path to the working directory source_name : str Name of the target source - config_gen : dict - Dictionary of the entries of the general configuration file + config_file : str + Path to MCP configuration file (e.g., resources/config.yaml) """ - config_file = config_gen["general"]["base_config_file"] if config_file == "": config_file = resource_file("config.yaml") @@ -64,7 +63,7 @@ def configfile_stereo(target_dir, source_name, config_gen): ) as fc: # "rb" mode opens the file in binary format for reading config_dict = yaml.safe_load(fc) conf = { - "mc_tel_ids": config_gen["mc_tel_ids"], + "mc_tel_ids": config_dict["mc_tel_ids"], "stereo_reco": config_dict["stereo_reco"], } if source_name == "MC": @@ -234,6 +233,7 @@ def main(): target_dir = Path(config["directories"]["workspace_dir"]) env_name = config["general"]["env_name"] + config_file = config["general"]["base_config_file"] NSB_match = config["general"]["NSB_matching"] source_in = config["data_selection"]["source_name_database"] @@ -251,7 +251,7 @@ def main(): or (args.analysis_type == "doEverything") and not NSB_match ): - configfile_stereo(target_dir, "MC", config) + configfile_stereo(target_dir, "MC", config_file) print("***** Generating the bashscript for MCs...") for part in [ "gammadiffuse", @@ -282,7 +282,7 @@ def main(): or (NSB_match) ): print("***** Generating file config_stereo.yaml...") - configfile_stereo(target_dir, source_name, config) + configfile_stereo(target_dir, source_name, config_file) # Below we run the analysis on the real data From 20474b68a4f437957b58b6b00c08d6fd7d72a7fc Mon Sep 17 00:00:00 2001 From: Elisa-Visentin <121040436+Elisa-Visentin@users.noreply.github.com> Date: Mon, 12 Aug 2024 09:13:32 +0200 Subject: [PATCH 222/236] Update LSTnsb.py --- .../semi_automatic_scripts/database_production/LSTnsb.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/LSTnsb.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/LSTnsb.py index 0e2d7a38..1b1e1c50 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/LSTnsb.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/LSTnsb.py @@ -44,7 +44,7 @@ def update_mod(mod, n_sub, denominator, index, n_noise): int Sampling step """ - if n_sub > denominator: + if (n_sub > denominator) and (denominator > n_noise): mod = (n_sub - index) // (denominator - n_noise) return mod From 0e6d98f593deba5e2321ed611b84157275b9f110 Mon Sep 17 00:00:00 2001 From: Elisa-Visentin <121040436+Elisa-Visentin@users.noreply.github.com> Date: Mon, 12 Aug 2024 12:09:35 +0200 Subject: [PATCH 223/236] exit for loop in NSB limits --- .../semi_automatic_scripts/database_production/LSTnsb.py | 1 + 1 file changed, 1 insertion(+) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/LSTnsb.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/LSTnsb.py index 1b1e1c50..a937976f 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/LSTnsb.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/database_production/LSTnsb.py @@ -217,6 +217,7 @@ def main(): if median_NSB > nsb_limit[-1]: with open(f"nsb_LST_high_{run_number}.txt", "a+") as f: f.write(f"{date},{run_number},{median_NSB}\n") + break if __name__ == "__main__": From bbbf1123512cb8fdc843e11f2edb8f2eb6964b14 Mon Sep 17 00:00:00 2001 From: Elisa-Visentin Date: Tue, 13 Aug 2024 07:03:05 +0000 Subject: [PATCH 224/236] minor fix config --- .../semi_automatic_scripts/dl1_production.py | 27 ++++--------------- 1 file changed, 5 insertions(+), 22 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py index 9b028806..7e3dc875 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py @@ -43,7 +43,7 @@ logger.setLevel(logging.INFO) -def config_file_gen(target_dir, noise_value, NSB_match, source_name, config_file): +def config_file_gen(target_dir, NSB_match, source_name, config_file): """ Here we create the configuration file needed for transforming DL0 into DL1 @@ -52,8 +52,6 @@ def config_file_gen(target_dir, noise_value, NSB_match, source_name, config_file ---------- target_dir : path Directory to store the results - noise_value : list - List of the noise correction values for LST NSB_match : bool If real data are matched to pre-processed MCs or not source_name : str @@ -68,17 +66,11 @@ def config_file_gen(target_dir, noise_value, NSB_match, source_name, config_file config_file, "rb" ) as fc: # "rb" mode opens the file in binary format for reading config_dict = yaml.safe_load(fc) - LST_config = config_dict["LST"] - MAGIC_config = config_dict["MAGIC"] - if not NSB_match: - LST_config["increase_nsb"]["extra_noise_in_dim_pixels"] = noise_value[0] - LST_config["increase_nsb"]["extra_bias_in_dim_pixels"] = noise_value[2] - LST_config["increase_nsb"]["extra_noise_in_bright_pixels"] = noise_value[1] conf = { "mc_tel_ids": config_dict["mc_tel_ids"], - "LST": LST_config, - "MAGIC": MAGIC_config, + "LST": config_dict["LST"], + "MAGIC": config_dict["MAGIC"], } if source_name == "MC": file_name = f"{target_dir}/v{__version__}/MC/config_DL0_to_DL1.yaml" @@ -453,13 +445,6 @@ def main(): else: source_list.append(source) - noise_value = [0, 0, 0] - if not NSB_match: - nsb = config["general"]["NSB_MC"] - - noisebright = 1.15 * pow(nsb, 1.115) - biasdim = 0.358 * pow(nsb, 0.805) - noise_value = [nsb, noisebright, biasdim] if not NSB_match: # Below we run the analysis on the MC data @@ -467,9 +452,7 @@ def main(): directories_generator_MC( str(target_dir), telescope_ids ) # Here we create all the necessary directories in the given workspace and collect the main directory of the target - config_file_gen( - target_dir, noise_value, NSB_match, "MC", config_file - ) # TODO: fix here + config_file_gen(target_dir, NSB_match, "MC", config_file) # TODO: fix here to_process = { "gammas": MC_gammas, "electrons": MC_electrons, @@ -519,7 +502,7 @@ def main(): str(target_dir), telescope_ids, MAGIC_runs, NSB_match, source_name ) # Here we create all the necessary directories in the given workspace and collect the main directory of the target config_file_gen( - target_dir, noise_value, NSB_match, source_name, config_file + target_dir, NSB_match, source_name, config_file ) # TODO: fix here # Below we run the analysis on the MAGIC data From c17a0ebfaaa8c8d74f7f011a242229c28900eeb5 Mon Sep 17 00:00:00 2001 From: Elisa-Visentin Date: Tue, 13 Aug 2024 07:12:20 +0000 Subject: [PATCH 225/236] merge commit lint --- .../scripts/lst1_magic/semi_automatic_scripts/dl1_production.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py index c90625eb..8b7d9c71 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py @@ -447,7 +447,7 @@ def main(): if source is None: source = source_in source_list = [source] - + if not NSB_match: # Below we run the analysis on the MC data if (args.analysis_type == "onlyMC") or (args.analysis_type == "doEverything"): From 45fc5b5eb137315c38f2bccbab18cbfc1067d839 Mon Sep 17 00:00:00 2001 From: Elisa-Visentin <121040436+Elisa-Visentin@users.noreply.github.com> Date: Tue, 13 Aug 2024 10:19:18 +0200 Subject: [PATCH 226/236] Update dl1_production.py --- .../lst1_magic/semi_automatic_scripts/dl1_production.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py index 4cd685a9..1613ef78 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py @@ -230,8 +230,7 @@ def main(): if source is None: source = source_in source_list = [source] - - + for source_name in source_list: MAGIC_runs_and_dates = f"{source_name}_MAGIC_runs.txt" From cc75273b685b5ef97dc763310bcf84e4d44d2ff3 Mon Sep 17 00:00:00 2001 From: Elisa-Visentin <121040436+Elisa-Visentin@users.noreply.github.com> Date: Tue, 13 Aug 2024 10:20:20 +0200 Subject: [PATCH 227/236] Update merging_runs.py --- .../scripts/lst1_magic/semi_automatic_scripts/merging_runs.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py index c53caf59..548ac4c4 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py @@ -128,8 +128,7 @@ def main(): if source is None: source = source_in source_list = [source] - - + for source_name in source_list: MAGIC_runs_and_dates = f"{source_name}_MAGIC_runs.txt" MAGIC_runs = np.genfromtxt( From 04c247012e3bf5647fa2df73f7be9daa4a052093 Mon Sep 17 00:00:00 2001 From: Elisa-Visentin <121040436+Elisa-Visentin@users.noreply.github.com> Date: Tue, 13 Aug 2024 10:24:21 +0200 Subject: [PATCH 228/236] Update dl1_production.py --- .../scripts/lst1_magic/semi_automatic_scripts/dl1_production.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py index 1613ef78..b08153bd 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py @@ -230,7 +230,7 @@ def main(): if source is None: source = source_in source_list = [source] - + for source_name in source_list: MAGIC_runs_and_dates = f"{source_name}_MAGIC_runs.txt" From 4acd21c5eda211154fe1beee6b124f7463eef5de Mon Sep 17 00:00:00 2001 From: Elisa-Visentin <121040436+Elisa-Visentin@users.noreply.github.com> Date: Tue, 13 Aug 2024 10:24:44 +0200 Subject: [PATCH 229/236] Update merging_runs.py --- .../scripts/lst1_magic/semi_automatic_scripts/merging_runs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py index 548ac4c4..ca80d7d5 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/merging_runs.py @@ -128,7 +128,7 @@ def main(): if source is None: source = source_in source_list = [source] - + for source_name in source_list: MAGIC_runs_and_dates = f"{source_name}_MAGIC_runs.txt" MAGIC_runs = np.genfromtxt( From da9cc7bf010d3de1367b2a67d3ea3eb69720ee58 Mon Sep 17 00:00:00 2001 From: Elisa-Visentin Date: Tue, 13 Aug 2024 09:18:47 +0000 Subject: [PATCH 230/236] revert config changes --- .../semi_automatic_scripts/dl1_production.py | 31 ++++++++++++++----- 1 file changed, 24 insertions(+), 7 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py index 8b7d9c71..e8043c0c 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py @@ -43,7 +43,7 @@ logger.setLevel(logging.INFO) -def config_file_gen(target_dir, NSB_match, source_name, config_file): +def config_file_gen(target_dir, noise_value, NSB_match, source_name, config_file): """ Here we create the configuration file needed for transforming DL0 into DL1 @@ -52,6 +52,8 @@ def config_file_gen(target_dir, NSB_match, source_name, config_file): ---------- target_dir : path Directory to store the results + noise_value : list + List of the noise correction values for LST NSB_match : bool If real data are matched to pre-processed MCs or not source_name : str @@ -67,10 +69,18 @@ def config_file_gen(target_dir, NSB_match, source_name, config_file): ) as fc: # "rb" mode opens the file in binary format for reading config_dict = yaml.safe_load(fc) + LST_config = config_dict["LST"] + MAGIC_config = config_dict["MAGIC"] + + if not NSB_match: + LST_config["increase_nsb"]["extra_noise_in_dim_pixels"] = noise_value[0] + LST_config["increase_nsb"]["extra_bias_in_dim_pixels"] = noise_value[2] + LST_config["increase_nsb"]["extra_noise_in_bright_pixels"] = noise_value[1] + conf = { "mc_tel_ids": config_dict["mc_tel_ids"], - "LST": config_dict["LST"], - "MAGIC": config_dict["MAGIC"], + "LST": LST_config, + "MAGIC": MAGIC_config, } if source_name == "MC": file_name = f"{target_dir}/v{__version__}/MC/config_DL0_to_DL1.yaml" @@ -448,13 +458,21 @@ def main(): source = source_in source_list = [source] + noise_value = [0, 0, 0] + if not NSB_match: + nsb = config["general"]["NSB_MC"] + + noisebright = 1.15 * pow(nsb, 1.115) + biasdim = 0.358 * pow(nsb, 0.805) + noise_value = [nsb, noisebright, biasdim] + if not NSB_match: # Below we run the analysis on the MC data if (args.analysis_type == "onlyMC") or (args.analysis_type == "doEverything"): directories_generator_MC( str(target_dir), telescope_ids ) # Here we create all the necessary directories in the given workspace and collect the main directory of the target - config_file_gen(target_dir, NSB_match, "MC", config_file) # TODO: fix here + config_file_gen(target_dir, noise_value, NSB_match, "MC", config_file) to_process = { "gammas": MC_gammas, "electrons": MC_electrons, @@ -504,9 +522,8 @@ def main(): str(target_dir), telescope_ids, MAGIC_runs, NSB_match, source_name ) # Here we create all the necessary directories in the given workspace and collect the main directory of the target config_file_gen( - target_dir, NSB_match, source_name, config_file - ) # TODO: fix here - + target_dir, noise_value, NSB_match, source_name, config_file + ) # Below we run the analysis on the MAGIC data lists_and_bash_gen_MAGIC( From 7f91349ca5aff56b0b50beb775a1e49da2634cc4 Mon Sep 17 00:00:00 2001 From: Elisa-Visentin Date: Tue, 13 Aug 2024 09:21:29 +0000 Subject: [PATCH 231/236] fix config --- .../lst1_magic/semi_automatic_scripts/dl1_production.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py index b08153bd..9baf7269 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py @@ -62,13 +62,11 @@ def config_file_gen(target_dir, source_name, config_gen): config_file, "rb" ) as fc: # "rb" mode opens the file in binary format for reading config_dict = yaml.safe_load(fc) - LST_config = config_dict["LST"] - MAGIC_config = config_dict["MAGIC"] conf = { "mc_tel_ids": config_gen["mc_tel_ids"], - "LST": LST_config, - "MAGIC": MAGIC_config, + "LST": config_dict["LST"], + "MAGIC": config_dict["MAGIC"], } file_name = f"{target_dir}/v{__version__}/{source_name}/config_DL0_to_DL1.yaml" From fccd9508a04b67bffddd443607e0b5005ac9b943 Mon Sep 17 00:00:00 2001 From: Elisa-Visentin Date: Tue, 13 Aug 2024 09:28:49 +0000 Subject: [PATCH 232/236] swap lines --- .../scripts/lst1_magic/semi_automatic_scripts/dl1_production.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py index 9baf7269..a8633727 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py @@ -176,8 +176,8 @@ def directories_generator_real(target_dir, telescope_ids, MAGIC_runs, source_nam Name of the target source """ - os.makedirs(f"{target_dir}/v{__version__}/{source_name}/DL1", exist_ok=True) dl1_dir = str(f"{target_dir}/v{__version__}/{source_name}/DL1") + os.makedirs(dl1_dir, exist_ok=True) ########################################### # MAGIC From 71101c652cee548a33696191161946fd7b72c694 Mon Sep 17 00:00:00 2001 From: Elisa-Visentin Date: Tue, 13 Aug 2024 09:35:18 +0000 Subject: [PATCH 233/236] remove fixed TODO --- .../lst1_magic/semi_automatic_scripts/dl1_production.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py index a8633727..1cbcf78c 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py @@ -236,7 +236,6 @@ def main(): MAGIC_runs_and_dates, dtype=str, delimiter=",", ndmin=2 ) # READ LIST OF DATES AND RUNS: format table where each line is like "2020_11_19,5093174" - # TODO: fix here above print("*** Converting Calibrated into DL1 data ***") print(f"Process name: {source_name}") print( @@ -246,7 +245,7 @@ def main(): directories_generator_real( str(target_dir), telescope_ids, MAGIC_runs, source_name ) # Here we create all the necessary directories in the given workspace and collect the main directory of the target - config_file_gen(target_dir, source_name, config) # TODO: fix here + config_file_gen(target_dir, source_name, config) # Below we run the analysis on the MAGIC data From c3c087319a31f4584a466e21994d7438d9ad9072 Mon Sep 17 00:00:00 2001 From: Elisa-Visentin Date: Tue, 13 Aug 2024 09:42:34 +0000 Subject: [PATCH 234/236] fix readme --- magicctapipe/scripts/lst1_magic/README.md | 27 +++++------------------ 1 file changed, 5 insertions(+), 22 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/README.md b/magicctapipe/scripts/lst1_magic/README.md index 9da8b49e..d24d53ab 100644 --- a/magicctapipe/scripts/lst1_magic/README.md +++ b/magicctapipe/scripts/lst1_magic/README.md @@ -29,30 +29,17 @@ During the analysis, some files (i.e., bash scripts, lists of sources and runs) ### DL0 to DL1 -In this step, we will convert the MAGIC Calibrated data to Data Level (DL) 1 (our goal is to reach DL3) and MC DL0 to DL1. +In this step, we will convert the MAGIC Calibrated data to Data Level (DL) 1 (our goal is to reach DL3). In your working IT Container directory (e.g. /fefs/aswg/workspace/yourname/yourprojectname), open your environment with the command `conda activate {env_name}` and update the file `config_auto_MCP.yaml` according to your analysis. If you need non-standard parameters (e.g., for the cleaning), take care that the `resources/config.yaml` file gets installed when you install the pipeline, so you will have to copy it, e.g. in your workspace, modify it and put the path to this new file in the `config_auto_MCP.yaml` (this way you don't need to install again the pipeline). -The file `config_auto_MCP.yaml` must contain the telescope IDs, the directories with the MC data (ignored if you set NSB_matching = true), the data selection, and some information on the night sky background (NSB) level and software versions: +The file `config_auto_MCP.yaml` must contain parameters for data selection and some information on the night sky background (NSB) level and software versions: ``` - mc_tel_ids: - LST-1: 1 - LST-2: 0 - LST-3: 0 - LST-4: 0 - MAGIC-I: 2 - MAGIC-II: 3 - directories: workspace_dir : "/fefs/aswg/workspace/elisa.visentin/auto_MCP_PR/" # Output directory where all the data products will be saved. - # MC paths below are ignored if you set NSB_matching = true. - MC_gammas : "/fefs/aswg/data/mc/DL0/LSTProd2/TestDataset/sim_telarray" # set to "" if you don't want to process these Monte Carlo simulations. - MC_electrons : "" - MC_helium : "" - MC_protons : "/fefs/aswg/data/mc/DL0/LSTProd2/TrainingDataset/Protons/dec_2276/sim_telarray" - MC_gammadiff : "/fefs/aswg/data/mc/DL0/LSTProd2/TrainingDataset/GammaDiffuse/dec_2276/sim_telarray/" + data_selection: source_name_database: "CrabNebula" # MUST BE THE SAME AS IN THE DATABASE; Set to null to process all sources in the given time range. @@ -68,17 +55,13 @@ general: base_config_file: '' # path + name to a custom MCP config file. If not provided, the default config.yaml file will be used LST_version : "v0.10" # check the `processed_lstchain_file` version in the LST database! LST_tailcut : "tailcut84" - focal_length : "effective" simtel_nsb : "/fefs/aswg/data/mc/DL0/LSTProd2/TestDataset/sim_telarray/node_theta_14.984_az_355.158_/output_v1.4/simtel_corsika_theta_14.984_az_355.158_run10.simtel.gz" # simtel file (DL0) to evaluate NSB - lstchain_modified_config : true # use_flatfield_heuristic = True to evaluate NSB - proton_train_fraction : 0.8 # 0.8 means that 80% of the DL1 protons will be used for training the Random Forest. + lstchain_modified_config : true # use_flatfield_heuristic = True to evaluate NSB nsb : [0.5, 1.0, 1.5, 2.0, 2.5, 3.0] env_name : magic-lst # name of the conda environment to be used to process data. cluster : "SLURM" # cluster management system on which data are processed. At the moment we have only SLURM available, in the future maybe also condor (PIC, CNAF). - NSB_matching : true # Set to false to process also the MCs. Set to true if adequate MC productions (DLx) are already available on the IT Container. - NSB_MC : 0.5 # extra noise in dim pixels used to process MCs; e.g., you could put here the average NSB value of the processed LST runs. Ignored if NSB_matching=true. - + ``` WARNING: Only the runs for which the `LST_version` parameter matches the `processed_lstchain_file` version in the LST database (i.e., the version used to evaluate the NSB level; generally the last available and processable version of a run) will be processed. From 34cc7d79e17e1e30d6a5b337c31ef1723f8dcc4c Mon Sep 17 00:00:00 2001 From: Elisa-Visentin Date: Tue, 13 Aug 2024 13:08:11 +0000 Subject: [PATCH 235/236] fix merge problems --- .../lst1_magic/semi_automatic_scripts/dl1_production.py | 8 ++++---- .../lst1_magic/semi_automatic_scripts/stereo_events.py | 3 +-- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py index e0125c55..bfd17707 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/dl1_production.py @@ -41,7 +41,7 @@ logger.setLevel(logging.INFO) -def config_file_gen(target_dir, source_name, config_gen): +def config_file_gen(target_dir, source_name, config_file): """ Here we create the configuration file needed for transforming DL0 into DL1 @@ -64,7 +64,7 @@ def config_file_gen(target_dir, source_name, config_gen): config_dict = yaml.safe_load(fc) conf = { - "mc_tel_ids": config_gen["mc_tel_ids"], + "mc_tel_ids": config_dict["mc_tel_ids"], "LST": config_dict["LST"], "MAGIC": config_dict["MAGIC"], } @@ -228,7 +228,7 @@ def main(): if source is None: source = source_in source_list = [source] - + for source_name in source_list: MAGIC_runs_and_dates = f"{source_name}_MAGIC_runs.txt" MAGIC_runs = np.genfromtxt( @@ -244,7 +244,7 @@ def main(): directories_generator_real( str(target_dir), telescope_ids, MAGIC_runs, source_name ) # Here we create all the necessary directories in the given workspace and collect the main directory of the target - config_file_gen(target_dir, source_name, config) + config_file_gen(target_dir, source_name, config_file) # Below we run the analysis on the MAGIC data diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py index 2a97990d..a2f8a4ee 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/stereo_events.py @@ -177,7 +177,7 @@ def main(): for source_name in source_list: print("***** Generating file config_stereo.yaml...") - configfile_stereo(target_dir, source_name, config) + configfile_stereo(target_dir, source_name, config_file) # Below we run the analysis on the real data @@ -202,6 +202,5 @@ def main(): os.system(launch_jobs) - if __name__ == "__main__": main() From 827239c3e7e2e8d5fef918ead7d7b05edb063943 Mon Sep 17 00:00:00 2001 From: Elisa-Visentin Date: Tue, 13 Aug 2024 13:16:15 +0000 Subject: [PATCH 236/236] fixed workspace path (and spaces in the config) --- magicctapipe/scripts/lst1_magic/README.md | 10 ++++---- .../config_auto_MCP.yaml | 24 +++++++++---------- 2 files changed, 17 insertions(+), 17 deletions(-) diff --git a/magicctapipe/scripts/lst1_magic/README.md b/magicctapipe/scripts/lst1_magic/README.md index d24d53ab..c494d0d7 100644 --- a/magicctapipe/scripts/lst1_magic/README.md +++ b/magicctapipe/scripts/lst1_magic/README.md @@ -31,7 +31,7 @@ During the analysis, some files (i.e., bash scripts, lists of sources and runs) In this step, we will convert the MAGIC Calibrated data to Data Level (DL) 1 (our goal is to reach DL3). -In your working IT Container directory (e.g. /fefs/aswg/workspace/yourname/yourprojectname), open your environment with the command `conda activate {env_name}` and update the file `config_auto_MCP.yaml` according to your analysis. If you need non-standard parameters (e.g., for the cleaning), take care that the `resources/config.yaml` file gets installed when you install the pipeline, so you will have to copy it, e.g. in your workspace, modify it and put the path to this new file in the `config_auto_MCP.yaml` (this way you don't need to install again the pipeline). +In your working IT Container directory (i.e., `workspace_dir`), open your environment with the command `conda activate {env_name}` and update the file `config_auto_MCP.yaml` according to your analysis. If you need non-standard parameters (e.g., for the cleaning), take care that the `resources/config.yaml` file gets installed when you install the pipeline, so you will have to copy it, e.g. in your workspace, modify it and put the path to this new file in the `config_auto_MCP.yaml` (this way you don't need to install again the pipeline). The file `config_auto_MCP.yaml` must contain parameters for data selection and some information on the night sky background (NSB) level and software versions: @@ -96,9 +96,9 @@ The command `dl1_production` does a series of things: - Creates a directory with the target name within the directory `yourprojectname/{MCP_version}` and several subdirectories inside it that are necessary for the rest of the data reduction. The main directories are: ``` -/fefs/aswg/workspace/yourname/yourprojectname/VERSION/ -/fefs/aswg/workspace/yourname/yourprojectname/VERSION/{source}/DL1 -/fefs/aswg/workspace/yourname/yourprojectname/VERSION/{source}/DL1/[subdirectories] +workspace_dir/VERSION/ +workspace_dir/VERSION/{source}/DL1 +workspace_dir/VERSION/{source}/DL1/[subdirectories] ``` where [subdirectories] stands for several subdirectories containing the MAGIC subruns in the DL1 format. - Generates a configuration file called `config_DL0_to_DL1.yaml` with telescope ID information and adopted imaging/cleaning cuts, and puts it in the directory `[...]/yourprojectname/VERSION/{source}/` created in the previous step. @@ -114,7 +114,7 @@ or > $ squeue -u your_user_name -Once it is done, all of the subdirectories in `/fefs/aswg/workspace/yourname/yourprojectname/VERSION/{source}/DL1` will be filled with files of the type `dl1_MX.RunXXXXXX.0XX.h5` for each MAGIC subrun. +Once it is done, all of the subdirectories in `workspace_dir/VERSION/{source}/DL1` will be filled with files of the type `dl1_MX.RunXXXXXX.0XX.h5` for each MAGIC subrun. WARNING: some of these jobs could fail due to 'broken' input files: before moving to the next step, check for failed jobs (through `job_accounting` and/or log files) and remove the output files produced by these failed jobs (these output files will generally have a very small size, lower than few kB, and cannot be read in the following steps) diff --git a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/config_auto_MCP.yaml b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/config_auto_MCP.yaml index 7ad02b50..1ce9c418 100644 --- a/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/config_auto_MCP.yaml +++ b/magicctapipe/scripts/lst1_magic/semi_automatic_scripts/config_auto_MCP.yaml @@ -1,24 +1,24 @@ directories: - workspace_dir : "/fefs/aswg/workspace/elisa.visentin/auto_MCP_PR/" # Output directory where all the data products will be saved. + workspace_dir: "/fefs/aswg/workspace/elisa.visentin/auto_MCP_PR/" # Output directory where all the data products will be saved. data_selection: source_name_database: "CrabNebula" # MUST BE THE SAME AS IN THE DATABASE; Set to null to process all sources in the given time range. source_name_output: 'Crabtest' # Name tag of your target. Used only if source_name_database != null. - time_range : True # Search for all runs in a LST time range (e.g., 2020_01_01 -> 2022_01_01). - min : "2023_11_17" - max : "2024_03_03" - date_list : ['2020_12_15','2021_03_11'] # LST list of days to be processed (only if time_range=False), format: YYYY_MM_DD. + time_range: True # Search for all runs in a LST time range (e.g., 2020_01_01 -> 2022_01_01). + min: "2023_11_17" + max: "2024_03_03" + date_list: ['2020_12_15','2021_03_11'] # LST list of days to be processed (only if time_range=False), format: YYYY_MM_DD. skip_LST_runs: [3216,3217] # LST runs to ignore. skip_MAGIC_runs: [5094658] # MAGIC runs to ignore. general: base_config_file: '' # path + name to a custom MCP config file. If not provided, the default config.yaml file will be used - LST_version : "v0.10" # check the `processed_lstchain_file` version in the LST database! - LST_tailcut : "tailcut84" - simtel_nsb : "/fefs/aswg/data/mc/DL0/LSTProd2/TestDataset/sim_telarray/node_theta_14.984_az_355.158_/output_v1.4/simtel_corsika_theta_14.984_az_355.158_run10.simtel.gz" # simtel file (DL0) to evaluate NSB - lstchain_modified_config : true # use_flatfield_heuristic = True to evaluate NSB - nsb : [0.5, 1.0, 1.5, 2.0, 2.5, 3.0] - env_name : magic-lst # name of the conda environment to be used to process data. - cluster : "SLURM" # cluster management system on which data are processed. At the moment we have only SLURM available, in the future maybe also condor (PIC, CNAF). + LST_version: "v0.10" # check the `processed_lstchain_file` version in the LST database! + LST_tailcut: "tailcut84" + simtel_nsb: "/fefs/aswg/data/mc/DL0/LSTProd2/TestDataset/sim_telarray/node_theta_14.984_az_355.158_/output_v1.4/simtel_corsika_theta_14.984_az_355.158_run10.simtel.gz" # simtel file (DL0) to evaluate NSB + lstchain_modified_config: true # use_flatfield_heuristic = True to evaluate NSB + nsb: [0.5, 1.0, 1.5, 2.0, 2.5, 3.0] + env_name: magic-lst # name of the conda environment to be used to process data. + cluster: "SLURM" # cluster management system on which data are processed. At the moment we have only SLURM available, in the future maybe also condor (PIC, CNAF).