Skip to content

Commit

Permalink
Merge pull request #89 from LSSTDESC/u/jrbogart/yaml-include-class
Browse files Browse the repository at this point in the history
Eliminate dependence on pyyaml-include by adapting Butler loader
  • Loading branch information
JoanneBogart authored Apr 23, 2024
2 parents d1a98e5 + 0a34f61 commit 7a748c7
Show file tree
Hide file tree
Showing 12 changed files with 182 additions and 18 deletions.
9 changes: 7 additions & 2 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,10 @@ jobs:
shell: bash -l {0}

steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4

- name: Setup conda
uses: conda-incubator/setup-miniconda@v2
uses: conda-incubator/setup-miniconda@v3
with:
activate-environment: stack
python-version: ${{ matrix.py }}
Expand All @@ -45,6 +45,11 @@ jobs:
conda install -y --file etc/conda_requirements.txt
conda info
- name: Install dust_extinction
run: |
conda install -y dust_extinction
conda info
- name: Install rubin_sim_data
run: |
mkdir rubin_sim_data
Expand Down
1 change: 0 additions & 1 deletion etc/conda_requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,4 @@

stackvana>=0.2023.32
gitpython
dust_extinction
sncosmo
1 change: 0 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@ dependencies = [
'pyarrow',
'pandas',
'sncosmo',
'pyyaml-include'
]
requires-python = ">=3.7" # For setuptools >= 61.0 support

Expand Down
2 changes: 1 addition & 1 deletion skycatalogs/_version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "1.7.0-rc2"
__version__ = "1.7.0-rc3"
7 changes: 7 additions & 0 deletions skycatalogs/data/ci_yamlinclude/gaia_sdf.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# butler refcat available only at SLAC sdf
area_partition: None
butler_parameters:
collections: HSC/defaults
dstype: gaia_dr2_20200414
data_file_type: butler_refcat
sed_method: use_lut
13 changes: 13 additions & 0 deletions skycatalogs/data/ci_yamlinclude/star.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
MW_extinction: F19
area_partition:
nside: 32
ordering: ring
type: healpix
data_file_type: parquet
file_template: pointsource_(?P<healpix>\d+).parquet
flux_file_template: pointsource_flux_(?P<healpix>\d+).parquet
internal_extinction: None
sed_file_root_env_var: SIMS_SED_LIBRARY_DIR
sed_model: file_nm
inputs:
star_truth: /global/cfs/cdirs/lsst/groups/SSim/DC2/dc2_stellar_healpixel.db
11 changes: 11 additions & 0 deletions skycatalogs/data/ci_yamlinclude/subdir/galaxy_include.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
area_partition: !include subsub/area_partition.yaml
attribute_aliases:
size_knots_true: size_disk_true
size_minor_knots_true: size_minor_disk_true
composite:
bulge: required
disk: required
knots: optional
data_file_type: parquet
file_template: galaxy_(?P<healpix>\d+).parquet
flux_file_template: galaxy_flux_(?P<healpix>\d+).parquet
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
nside: 32
ordering: ring
type: healpix
18 changes: 18 additions & 0 deletions skycatalogs/data/ci_yamlinclude/top.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
catalog_dir: for_imsim_subpixel
catalog_name: skyCatalog
galaxy_magnitude_cut: 29.0
knots_magnitude_cut: 27.0
object_types:
gaia_star: !include gaia_sdf.yaml
galaxy: !include subdir/galaxy_include.yaml
star: !include star.yaml
provenance:
skyCatalogs_repo:
git_branch: u/jrbogart/nsides
git_hash: f0539b4397159f87010bc47785d03434b541232d
git_status:
- UNTRACKED_FILES
versioning:
code_version: 1.7.0-rc3
schema_version: 1.2.0
skycatalog_root: /pscratch/sd/j/jrbogart/desc/skycatalogs
14 changes: 6 additions & 8 deletions skycatalogs/skyCatalogs.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
import os
import sys
import re
import yaml
from yamlinclude import YamlIncludeConstructor
import logging
import healpy
import numpy as np
Expand Down Expand Up @@ -845,12 +843,12 @@ def open_catalog(config_file, mp=False, skycatalog_root=None, verbose=False):
# Get bandpasses in case we need to compute fluxes
_ = load_lsst_bandpasses()
_ = load_roman_bandpasses()
base_dir = os.path.dirname(config_file)
YamlIncludeConstructor.add_to_loader_class(loader_class=yaml.SafeLoader,
base_dir=base_dir)
with open(config_file) as f:
return SkyCatalog(yaml.safe_load(f), skycatalog_root=skycatalog_root,
mp=mp, verbose=verbose)

from skycatalogs.utils.config_utils import open_config_file

config_dict = open_config_file(config_file)
return SkyCatalog(config_dict, skycatalog_root=skycatalog_root, mp=mp,
verbose=verbose)


if __name__ == '__main__':
Expand Down
80 changes: 75 additions & 5 deletions skycatalogs/utils/config_utils.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
import os
import sys
import yaml
import git
import logging
from typing import Any
from .exceptions import ConfigDuplicateKeyError
# import jsonschema

from collections import namedtuple

__all__ = ['Config', 'open_config_file', 'Tophat', 'create_config',
Expand All @@ -16,12 +16,81 @@
CURRENT_SCHEMA_VERSION = '1.2.0'


class YamlIncludeLoader(yaml.SafeLoader):

"""YAML Loader that supports file include directives.
Uses ``!include`` directive in a YAML file to point to another
YAML file to be included. The path in the include directive is relative
to the top-level file
storageClasses: !include storageClasses.yaml
Examples
--------
>>> with open("document.yaml", "r") as f:
data = yaml.load(f, Loader=YamlIncludeLoader)
Parameters
----------
stream : text io stream
The stream to parse.
This code was adapted from the LSST Science Pipelines Butler.
See in particular the Loader class in
daf_butler/python/lsst/daf/butler/_config.py in the daf_butler repo
https://github.com/lsst/daf_butler
"""
def __init__(self, filestream):
super().__init__(filestream)
self._logger = logging.getLogger('YamlIncludeLoader')
self._current_dir = os.path.dirname(filestream.name)
self.add_constructor("!include", YamlIncludeLoader.include)

def include(self, node: yaml.Node) -> list[Any] | dict[str, Any]:
result: list[Any] | dict[str, Any]
if isinstance(node, yaml.ScalarNode):
return self.extractFile(self.construct_scalar(node)) # type: ignore[arg-type]

elif isinstance(node, yaml.SequenceNode):
result = []
for filename in self.construct_sequence(node):
result.append(self.extractFile(filename))
return result

elif isinstance(node, yaml.MappingNode):
result = {}
for k, v in self.construct_mapping(node).items():
if not isinstance(k, str):
raise TypeError(f"Expected only strings in YAML mapping; got {k!r} of type {type(k)}.")
result[k] = self.extractFile(v)
return result

else:
self._logger.error("Unrecognised node type in !include statement",
file=sys.stderr)
raise yaml.constructor.ConstructorError

def extractFile(self, filepath: str) -> Any:
if filepath.startswith('/'):
actual_path = filepath
else:
actual_path = os.path.join(self._current_dir, filepath)
self._logger.info("Opening YAML file via !include: %s", actual_path)

# Read all the data from the resource
with open(actual_path) as f:
content = yaml.load(f, YamlIncludeLoader)
return content


def open_config_file(config_file):
'''
Given path to config file, return a Config object
'''
with open(config_file) as f:
return Config(yaml.safe_load(f))
content = yaml.load(f, Loader=YamlIncludeLoader)
return Config(content)


Tophat = namedtuple('Tophat', ['start', 'width'])
Expand Down Expand Up @@ -91,6 +160,9 @@ def __getitem__(self, k):
'''
return self._cfg.__getitem__(k)

def __contains__(self, k):
return k in self._cfg

def list_sed_models(self):
return self._cfg['SED_models'].keys()

Expand Down Expand Up @@ -210,8 +282,6 @@ def write_yaml(input_dict, outpath, overwrite=False, logname=None):

def create_config(catalog_name, logname=None):
return Config({'catalog_name': catalog_name}, logname)
# 'schema_version': schema_version,
# 'code_version': desc.skycatalogs.__version__}, logname)


def assemble_cosmology(cosmology):
Expand Down
41 changes: 41 additions & 0 deletions tests/test_yamlinclude.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
"""
Test yaml loader which handles !include tag
"""
import unittest
import yaml
import os
from pathlib import Path

from skycatalogs.utils.config_utils import YamlIncludeLoader

class skyCatalogsTestCase(unittest.TestCase):
def setUp(self):
# Get directory containing top-level file
self._yaml_dir = os.path.join(Path(__file__).resolve().parents[1],
'skycatalogs', 'data', 'ci_yamlinclude')
def tearDown(self):
pass

def test_include(self):
# Top level file references to
# file in the same directory
# file in subdirectory
# file which itself has an !include directive
top_path = os.path.join(self._yaml_dir, 'top.yaml')

# If the load succeeds, all required files have been found
with open(top_path) as f:
data = yaml.load(f, Loader=YamlIncludeLoader)

# Verify that expected keys exist
assert('object_types' in data.keys())
objs = data['object_types']
assert('gaia_star' in objs)
assert('galaxy' in objs)
assert('area_partition' in objs['galaxy'])
assert('nside' in objs['galaxy']['area_partition'])

print(data)

if __name__ == '__main__':
unittest.main()

0 comments on commit 7a748c7

Please sign in to comment.