From 46a81596a2c1e394b75540007d2fe5c21d135ec8 Mon Sep 17 00:00:00 2001 From: Adam Myers Date: Tue, 21 May 2024 15:11:56 -0700 Subject: [PATCH 1/2] add new function to check overall format of secondary targeting files --- py/desitarget/secondary.py | 157 +++++++++++++++++++++++++++++++++++++ 1 file changed, 157 insertions(+) diff --git a/py/desitarget/secondary.py b/py/desitarget/secondary.py index 78e0a81c..fb588c0e 100644 --- a/py/desitarget/secondary.py +++ b/py/desitarget/secondary.py @@ -33,6 +33,8 @@ should be True (override) or False (do not override) for each target. In .txt files it should be 1 or 0 instead of True/False, and will be loaded from the text file as the corresponding Boolean. + +.. _`second DESI call for secondary targets`: https://desi.lbl.gov/trac/wiki/TargetSelectionWG/SpareFiber#Fileformats """ import os import re @@ -69,11 +71,21 @@ log = get_logger() start = time() +# ADM this was the original SV/main data model for secondary targets. indatamodel = np.array([], dtype=[ ('RA', '>f8'), ('DEC', '>f8'), ('PMRA', '>f4'), ('PMDEC', '>f4'), ('REF_EPOCH', '>f4'), ('OVERRIDE', '?') ]) +# ADM this is the updated, circa 2024 data model. +newindatamodel = np.array([], dtype=[ + ('RELEASE', '>i2'), ('BRICKID', '>i4'), ('OBJID', '>i4'), + ('RA', '>f8'), ('DEC', '>f8'), ('PMRA', '>f4'), + ('PMDEC', '>f4'), ('REF_EPOCH', '>f4'), ('FLUX_G', '>f4'), + ('FLUX_R', '>f4'), ('FLUX_Z', '>f4'), ('OVERRIDE', '?') +]) + + # ADM the columns not in the primary target files are: # OVERRIDE - If True/1 force as a target even if there is a primary. # - If False/0 allow this to be replaced by a primary target. @@ -503,6 +515,151 @@ def read_files(scxdir, scnd_mask, subset=False): return np.concatenate(scxall) +def check_file_format(filename, docfilename=None, new_program=True): + """Check a (circa 2024) input secondary file is correctly formatted. + + Parameters + ---------- + filename : :class:`str` + The full path to the file to check. + docfilename : :class:`str`, optional + If passed, then also check the format of the documentation file. + Again, this should be the full path to the documentation file. + new_program : :class:`bool`, optional, defaults to ``True`` + Pass ``False`` if your targets correspond to an existing program. + Passing ``False`` will skip the check that you aren't duplicating + an existing target bit-name. + + Returns + ------- + :class:`bool` + ``True`` if the file is correctly formatted. + + Notes + ----- + - This specifically checks the 2024 file format from the + `second DESI call for secondary targets`_ + """ + # ADM check the filename has an allowed extension and is upper-case. + basename = os.path.basename(filename) + bitname = basename.strip(".fits") + if bitname != bitname.upper() or ".fits" not in filename: + msg = (f"Filename (omitting extension) should be upper-case " + f"but filename is actually {basename}") + log.error(msg) + raise ValueError(msg) + + # ADM If the docfile was passed, check it has an allowed extension, + # ADM is upper-case, and that the doc and data filenames correspond. + if docfilename is not None: + docbasename = os.path.basename(docfilename) + docbitname, docext = os.path.splitext(docbasename) + if docext not in [".txt", ".ipynb"]: + msg = (f"Documentation file should be in .txt or .ipynb format but " + f"filename is {docfilename}") + log.error(msg) + raise ValueError(msg) + if docbitname != docbitname.upper(): + msg = (f"Documentation file (omitting extension) should be upper-" + f"case but filename is actually {docbasename}") + log.error(msg) + raise ValueError(msg) + # ADM check that the doc and data filenames correspond. + if docbitname != bitname: + msg = ("Data and documentation filenames should correspond (omitting" + f" extensions). But they are {docbasename} and {basename}") + log.error(msg) + raise ValueError(msg) + + # ADM check bit-part of filename isn't longer than 20 characters. + if len(bitname) > 20: + msg = (f"length of filename (omitting extension) should not exceed " + f"20 characters, but filename is actually {basename}") + log.error(msg) + raise ValueError(msg) + + # ADM check bit name doesn't match any previous bit name. + bitlist = [] + for surv in "main", "sv1", "sv2", "sv3": + if surv == "main": + Mx = import_module("desitarget.targetmask") + else: + Mx = import_module(f"desitarget.{surv}.{surv}_targetmask") + bitlist += Mx.desi_mask.names() + Mx.bgs_mask.names() + bitlist += Mx.mws_mask.names() + Mx.scnd_mask.names() + if bitname in bitlist and new_program: + msg = (f"Your filename {basename} matches an existing DESI target bit. " + f"Please choose a different filename.") + log.error(msg) + raise ValueError(msg) + + # ADM check the FITS file doesn't have data in extension 0. + data_in_zero = fitsio.read(filename, 0) + if data_in_zero is not None: + msg = (f"Only extension 1 in {filename} should contain data, but " + f"the following data is in extension 0: {data_in_zero}") + log.error(msg) + raise OSError(msg) + + # ADM check the FITS file doesn't have more than 1 extension. + n_ext = len(fitsio.FITS(filename)) + if n_ext > 2: + msg = (f"{filename} should only contain extensions 0 and 1, but " + f"{filename} actually includes {n_ext} total extensions") + log.error(msg) + raise OSError(msg) + + # ADM read in the FITS file. fitsio will flag its own meaningful + # ADM exception if the data model is wrong. + datamodel = newindatamodel.dtype + scxin = fitsio.read(filename, + columns=datamodel.names) + + # ADM check the column names are all correct (and upper-case). + if set(datamodel.names) != set(scxin.dtype.names): + msg = (f"Columns in {filename} should be {datamodel.names} but are " + f"actually {scxin.dtype.names}.") + log.error(msg) + raise ValueError(msg) + + # ADM check the overall data model. + msg = f"Data model mismatch: {datamodel.descr} in {filename} column " + for col in datamodel.names: + if scxin[col].dtype != newindatamodel[col].dtype: + msg += f"{col} is {scxin[col].dtype} not {newindatamodel[col].dtype}" + log.error(msg) + raise ValueError(msg) + + # ADM check RA/Dec are reasonable. + outofbounds = ((scxin["RA"] >= 360.) | (scxin["RA"] < 0) | + (scxin["DEC"] > 90) | (scxin["DEC"] < -90)) + if np.any(outofbounds): + msg = (f"RA/Dec outside of range in {filename}: " + f"RA={scxin['RA'][outofbounds]}, Dec={scxin['DEC'][outofbounds]}") + log.error(msg) + raise ValueError(msg) + + # ADM check for NaNs which are not allowed in downstream DESI code. + for col in datamodel.names: + if np.any(np.isnan(scxin[col])): + msg = f"Column {col} in {filename} includes at least one NaN value." + log.error(msg) + raise ValueError(msg) + + # ADM check fluxes are not zero. + zeroval = False + for col in ["FLUX_G", "FLUX_R", "FLUX_Z"]: + zeroval |= np.any((scxin[col] < 1e-16) & (scxin[col] > -1e-16)) + if zeroval: + msg = (f"Some fluxes in {filename} have values of zero. These targets " + f"will be interpreted as very bright and discarded. Please " + f"provide meaningful (and honest!) values of flux.") + log.error(msg) + raise ValueError(msg) + + return True + + def add_primary_info(scxtargs, priminfodir): """Add TARGETIDs to secondaries from directory of primary matches. From 450fc28effdecd6afe9811130b5f7c1e792144ca Mon Sep 17 00:00:00 2001 From: Adam Myers Date: Tue, 21 May 2024 15:15:23 -0700 Subject: [PATCH 2/2] Update changes docs --- doc/changes.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/changes.rst b/doc/changes.rst index 38f46750..858b55f3 100644 --- a/doc/changes.rst +++ b/doc/changes.rst @@ -5,11 +5,13 @@ desitarget Change Log 2.7.1 (unreleased) ------------------ +* Add function to check format of secondary target files [`PR #824`_]. * Function to match RA/Dec positions to Main Survey targets [`PR #820`_]. * Bump astropy from 5.0 to 5.3.3 (dependabot) [`PR #815`_]. .. _`PR #815`: https://github.com/desihub/desitarget/pull/815 .. _`PR #820`: https://github.com/desihub/desitarget/pull/820 +.. _`PR #824`: https://github.com/desihub/desitarget/pull/824 2.7.0 (2023-12-05) ------------------