diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index af0dcf53..75c3f80b 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -28,10 +28,10 @@ jobs: shell: bash -l {0} steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup conda - uses: conda-incubator/setup-miniconda@v2 + uses: conda-incubator/setup-miniconda@v3 with: activate-environment: stack python-version: ${{ matrix.py }} @@ -45,6 +45,11 @@ jobs: conda install -y --file etc/conda_requirements.txt conda info + - name: Install dust_extinction + run: | + conda install -y dust_extinction + conda info + - name: Install rubin_sim_data run: | mkdir rubin_sim_data diff --git a/etc/conda_requirements.txt b/etc/conda_requirements.txt index 9516ed03..72ebe0b5 100644 --- a/etc/conda_requirements.txt +++ b/etc/conda_requirements.txt @@ -2,5 +2,4 @@ stackvana>=0.2023.32 gitpython -dust_extinction sncosmo diff --git a/pyproject.toml b/pyproject.toml index 22381d99..ea881def 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,7 +20,6 @@ dependencies = [ 'pyarrow', 'pandas', 'sncosmo', - 'pyyaml-include' ] requires-python = ">=3.7" # For setuptools >= 61.0 support diff --git a/skycatalogs/_version.py b/skycatalogs/_version.py index 9ef12a9d..773f565f 100644 --- a/skycatalogs/_version.py +++ b/skycatalogs/_version.py @@ -1 +1 @@ -__version__ = "1.7.0-rc2" +__version__ = "1.7.0-rc3" diff --git a/skycatalogs/data/ci_yamlinclude/gaia_sdf.yaml b/skycatalogs/data/ci_yamlinclude/gaia_sdf.yaml new file mode 100644 index 00000000..7f4753a9 --- /dev/null +++ b/skycatalogs/data/ci_yamlinclude/gaia_sdf.yaml @@ -0,0 +1,7 @@ +# butler refcat available only at SLAC sdf + area_partition: None + butler_parameters: + collections: HSC/defaults + dstype: gaia_dr2_20200414 + data_file_type: butler_refcat + sed_method: use_lut diff --git a/skycatalogs/data/ci_yamlinclude/star.yaml b/skycatalogs/data/ci_yamlinclude/star.yaml new file mode 100644 index 00000000..8718794e --- /dev/null +++ b/skycatalogs/data/ci_yamlinclude/star.yaml @@ -0,0 +1,13 @@ + MW_extinction: F19 + area_partition: + nside: 32 + ordering: ring + type: healpix + data_file_type: parquet + file_template: pointsource_(?P\d+).parquet + flux_file_template: pointsource_flux_(?P\d+).parquet + internal_extinction: None + sed_file_root_env_var: SIMS_SED_LIBRARY_DIR + sed_model: file_nm + inputs: + star_truth: /global/cfs/cdirs/lsst/groups/SSim/DC2/dc2_stellar_healpixel.db diff --git a/skycatalogs/data/ci_yamlinclude/subdir/galaxy_include.yaml b/skycatalogs/data/ci_yamlinclude/subdir/galaxy_include.yaml new file mode 100644 index 00000000..098293eb --- /dev/null +++ b/skycatalogs/data/ci_yamlinclude/subdir/galaxy_include.yaml @@ -0,0 +1,11 @@ + area_partition: !include subsub/area_partition.yaml + attribute_aliases: + size_knots_true: size_disk_true + size_minor_knots_true: size_minor_disk_true + composite: + bulge: required + disk: required + knots: optional + data_file_type: parquet + file_template: galaxy_(?P\d+).parquet + flux_file_template: galaxy_flux_(?P\d+).parquet diff --git a/skycatalogs/data/ci_yamlinclude/subdir/subsub/area_partition.yaml b/skycatalogs/data/ci_yamlinclude/subdir/subsub/area_partition.yaml new file mode 100644 index 00000000..5734b100 --- /dev/null +++ b/skycatalogs/data/ci_yamlinclude/subdir/subsub/area_partition.yaml @@ -0,0 +1,3 @@ +nside: 32 +ordering: ring +type: healpix diff --git a/skycatalogs/data/ci_yamlinclude/top.yaml b/skycatalogs/data/ci_yamlinclude/top.yaml new file mode 100644 index 00000000..944c9e03 --- /dev/null +++ b/skycatalogs/data/ci_yamlinclude/top.yaml @@ -0,0 +1,18 @@ +catalog_dir: for_imsim_subpixel +catalog_name: skyCatalog +galaxy_magnitude_cut: 29.0 +knots_magnitude_cut: 27.0 +object_types: + gaia_star: !include gaia_sdf.yaml + galaxy: !include subdir/galaxy_include.yaml + star: !include star.yaml +provenance: + skyCatalogs_repo: + git_branch: u/jrbogart/nsides + git_hash: f0539b4397159f87010bc47785d03434b541232d + git_status: + - UNTRACKED_FILES + versioning: + code_version: 1.7.0-rc3 + schema_version: 1.2.0 +skycatalog_root: /pscratch/sd/j/jrbogart/desc/skycatalogs diff --git a/skycatalogs/skyCatalogs.py b/skycatalogs/skyCatalogs.py index 083f323d..c5c0aeba 100644 --- a/skycatalogs/skyCatalogs.py +++ b/skycatalogs/skyCatalogs.py @@ -1,8 +1,6 @@ import os import sys import re -import yaml -from yamlinclude import YamlIncludeConstructor import logging import healpy import numpy as np @@ -845,12 +843,12 @@ def open_catalog(config_file, mp=False, skycatalog_root=None, verbose=False): # Get bandpasses in case we need to compute fluxes _ = load_lsst_bandpasses() _ = load_roman_bandpasses() - base_dir = os.path.dirname(config_file) - YamlIncludeConstructor.add_to_loader_class(loader_class=yaml.SafeLoader, - base_dir=base_dir) - with open(config_file) as f: - return SkyCatalog(yaml.safe_load(f), skycatalog_root=skycatalog_root, - mp=mp, verbose=verbose) + + from skycatalogs.utils.config_utils import open_config_file + + config_dict = open_config_file(config_file) + return SkyCatalog(config_dict, skycatalog_root=skycatalog_root, mp=mp, + verbose=verbose) if __name__ == '__main__': diff --git a/skycatalogs/utils/config_utils.py b/skycatalogs/utils/config_utils.py index 00450b89..c79c6e41 100644 --- a/skycatalogs/utils/config_utils.py +++ b/skycatalogs/utils/config_utils.py @@ -1,10 +1,10 @@ import os +import sys import yaml import git import logging +from typing import Any from .exceptions import ConfigDuplicateKeyError -# import jsonschema - from collections import namedtuple __all__ = ['Config', 'open_config_file', 'Tophat', 'create_config', @@ -16,12 +16,81 @@ CURRENT_SCHEMA_VERSION = '1.2.0' +class YamlIncludeLoader(yaml.SafeLoader): + + """YAML Loader that supports file include directives. + + Uses ``!include`` directive in a YAML file to point to another + YAML file to be included. The path in the include directive is relative + to the top-level file + + storageClasses: !include storageClasses.yaml + + Examples + -------- + >>> with open("document.yaml", "r") as f: + data = yaml.load(f, Loader=YamlIncludeLoader) + + Parameters + ---------- + stream : text io stream + The stream to parse. + + This code was adapted from the LSST Science Pipelines Butler. + See in particular the Loader class in + daf_butler/python/lsst/daf/butler/_config.py in the daf_butler repo + https://github.com/lsst/daf_butler + """ + def __init__(self, filestream): + super().__init__(filestream) + self._logger = logging.getLogger('YamlIncludeLoader') + self._current_dir = os.path.dirname(filestream.name) + self.add_constructor("!include", YamlIncludeLoader.include) + + def include(self, node: yaml.Node) -> list[Any] | dict[str, Any]: + result: list[Any] | dict[str, Any] + if isinstance(node, yaml.ScalarNode): + return self.extractFile(self.construct_scalar(node)) # type: ignore[arg-type] + + elif isinstance(node, yaml.SequenceNode): + result = [] + for filename in self.construct_sequence(node): + result.append(self.extractFile(filename)) + return result + + elif isinstance(node, yaml.MappingNode): + result = {} + for k, v in self.construct_mapping(node).items(): + if not isinstance(k, str): + raise TypeError(f"Expected only strings in YAML mapping; got {k!r} of type {type(k)}.") + result[k] = self.extractFile(v) + return result + + else: + self._logger.error("Unrecognised node type in !include statement", + file=sys.stderr) + raise yaml.constructor.ConstructorError + + def extractFile(self, filepath: str) -> Any: + if filepath.startswith('/'): + actual_path = filepath + else: + actual_path = os.path.join(self._current_dir, filepath) + self._logger.info("Opening YAML file via !include: %s", actual_path) + + # Read all the data from the resource + with open(actual_path) as f: + content = yaml.load(f, YamlIncludeLoader) + return content + + def open_config_file(config_file): ''' Given path to config file, return a Config object ''' with open(config_file) as f: - return Config(yaml.safe_load(f)) + content = yaml.load(f, Loader=YamlIncludeLoader) + return Config(content) Tophat = namedtuple('Tophat', ['start', 'width']) @@ -91,6 +160,9 @@ def __getitem__(self, k): ''' return self._cfg.__getitem__(k) + def __contains__(self, k): + return k in self._cfg + def list_sed_models(self): return self._cfg['SED_models'].keys() @@ -210,8 +282,6 @@ def write_yaml(input_dict, outpath, overwrite=False, logname=None): def create_config(catalog_name, logname=None): return Config({'catalog_name': catalog_name}, logname) -# 'schema_version': schema_version, -# 'code_version': desc.skycatalogs.__version__}, logname) def assemble_cosmology(cosmology): diff --git a/tests/test_yamlinclude.py b/tests/test_yamlinclude.py new file mode 100644 index 00000000..ca0b4d30 --- /dev/null +++ b/tests/test_yamlinclude.py @@ -0,0 +1,41 @@ +""" +Test yaml loader which handles !include tag +""" +import unittest +import yaml +import os +from pathlib import Path + +from skycatalogs.utils.config_utils import YamlIncludeLoader + +class skyCatalogsTestCase(unittest.TestCase): + def setUp(self): + # Get directory containing top-level file + self._yaml_dir = os.path.join(Path(__file__).resolve().parents[1], + 'skycatalogs', 'data', 'ci_yamlinclude') + def tearDown(self): + pass + + def test_include(self): + # Top level file references to + # file in the same directory + # file in subdirectory + # file which itself has an !include directive + top_path = os.path.join(self._yaml_dir, 'top.yaml') + + # If the load succeeds, all required files have been found + with open(top_path) as f: + data = yaml.load(f, Loader=YamlIncludeLoader) + + # Verify that expected keys exist + assert('object_types' in data.keys()) + objs = data['object_types'] + assert('gaia_star' in objs) + assert('galaxy' in objs) + assert('area_partition' in objs['galaxy']) + assert('nside' in objs['galaxy']['area_partition']) + + print(data) + +if __name__ == '__main__': + unittest.main()