Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Eliminate dependence on pyyaml-include by adapting Butler loader #89

Merged
merged 11 commits into from
Apr 23, 2024
Merged
9 changes: 7 additions & 2 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,10 @@ jobs:
shell: bash -l {0}

steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4

- name: Setup conda
uses: conda-incubator/setup-miniconda@v2
uses: conda-incubator/setup-miniconda@v3
with:
activate-environment: stack
python-version: ${{ matrix.py }}
Expand All @@ -45,6 +45,11 @@ jobs:
conda install -y --file etc/conda_requirements.txt
conda info

- name: Install dust_extinction
run: |
conda install -y dust_extinction
conda info

- name: Install rubin_sim_data
run: |
mkdir rubin_sim_data
Expand Down
1 change: 0 additions & 1 deletion etc/conda_requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,4 @@

stackvana>=0.2023.32
gitpython
dust_extinction
sncosmo
1 change: 0 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@ dependencies = [
'pyarrow',
'pandas',
'sncosmo',
'pyyaml-include'
]
requires-python = ">=3.7" # For setuptools >= 61.0 support

Expand Down
2 changes: 1 addition & 1 deletion skycatalogs/_version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "1.7.0-rc2"
__version__ = "1.7.0-rc3"
7 changes: 7 additions & 0 deletions skycatalogs/data/ci_yamlinclude/gaia_sdf.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# butler refcat available only at SLAC sdf
area_partition: None
butler_parameters:
collections: HSC/defaults
dstype: gaia_dr2_20200414
data_file_type: butler_refcat
sed_method: use_lut
13 changes: 13 additions & 0 deletions skycatalogs/data/ci_yamlinclude/star.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
MW_extinction: F19
area_partition:
nside: 32
ordering: ring
type: healpix
data_file_type: parquet
file_template: pointsource_(?P<healpix>\d+).parquet
flux_file_template: pointsource_flux_(?P<healpix>\d+).parquet
internal_extinction: None
sed_file_root_env_var: SIMS_SED_LIBRARY_DIR
sed_model: file_nm
inputs:
star_truth: /global/cfs/cdirs/lsst/groups/SSim/DC2/dc2_stellar_healpixel.db
11 changes: 11 additions & 0 deletions skycatalogs/data/ci_yamlinclude/subdir/galaxy_include.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
area_partition: !include subsub/area_partition.yaml
attribute_aliases:
size_knots_true: size_disk_true
size_minor_knots_true: size_minor_disk_true
composite:
bulge: required
disk: required
knots: optional
data_file_type: parquet
file_template: galaxy_(?P<healpix>\d+).parquet
flux_file_template: galaxy_flux_(?P<healpix>\d+).parquet
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
nside: 32
ordering: ring
type: healpix
18 changes: 18 additions & 0 deletions skycatalogs/data/ci_yamlinclude/top.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
catalog_dir: for_imsim_subpixel
catalog_name: skyCatalog
galaxy_magnitude_cut: 29.0
knots_magnitude_cut: 27.0
object_types:
gaia_star: !include gaia_sdf.yaml
galaxy: !include subdir/galaxy_include.yaml
star: !include star.yaml
provenance:
skyCatalogs_repo:
git_branch: u/jrbogart/nsides
git_hash: f0539b4397159f87010bc47785d03434b541232d
git_status:
- UNTRACKED_FILES
versioning:
code_version: 1.7.0-rc3
schema_version: 1.2.0
skycatalog_root: /pscratch/sd/j/jrbogart/desc/skycatalogs
14 changes: 6 additions & 8 deletions skycatalogs/skyCatalogs.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
import os
import sys
import re
import yaml
from yamlinclude import YamlIncludeConstructor
import logging
import healpy
import numpy as np
Expand Down Expand Up @@ -845,12 +843,12 @@ def open_catalog(config_file, mp=False, skycatalog_root=None, verbose=False):
# Get bandpasses in case we need to compute fluxes
_ = load_lsst_bandpasses()
_ = load_roman_bandpasses()
base_dir = os.path.dirname(config_file)
YamlIncludeConstructor.add_to_loader_class(loader_class=yaml.SafeLoader,
base_dir=base_dir)
with open(config_file) as f:
return SkyCatalog(yaml.safe_load(f), skycatalog_root=skycatalog_root,
mp=mp, verbose=verbose)

from skycatalogs.utils.config_utils import open_config_file

config_dict = open_config_file(config_file)
return SkyCatalog(config_dict, skycatalog_root=skycatalog_root, mp=mp,
verbose=verbose)


if __name__ == '__main__':
Expand Down
80 changes: 75 additions & 5 deletions skycatalogs/utils/config_utils.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
import os
import sys
import yaml
import git
import logging
from typing import Any
from .exceptions import ConfigDuplicateKeyError
# import jsonschema

from collections import namedtuple

__all__ = ['Config', 'open_config_file', 'Tophat', 'create_config',
Expand All @@ -16,12 +16,81 @@
CURRENT_SCHEMA_VERSION = '1.2.0'


class YamlIncludeLoader(yaml.SafeLoader):

"""YAML Loader that supports file include directives.

Uses ``!include`` directive in a YAML file to point to another
YAML file to be included. The path in the include directive is relative
to the top-level file

storageClasses: !include storageClasses.yaml

Examples
--------
>>> with open("document.yaml", "r") as f:
data = yaml.load(f, Loader=YamlIncludeLoader)

Parameters
----------
stream : text io stream
The stream to parse.

This code was adapted from the LSST Science Pipelines Butler.
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would be useful to give a more complete reference to the LSST code, e.g., a url pointing to the daf_butler code.

See in particular the Loader class in
daf_butler/python/lsst/daf/butler/_config.py in the daf_butler repo
https://github.com/lsst/daf_butler
"""
def __init__(self, filestream):
super().__init__(filestream)
self._logger = logging.getLogger('YamlIncludeLoader')
self._current_dir = os.path.dirname(filestream.name)
self.add_constructor("!include", YamlIncludeLoader.include)

def include(self, node: yaml.Node) -> list[Any] | dict[str, Any]:
result: list[Any] | dict[str, Any]
if isinstance(node, yaml.ScalarNode):
return self.extractFile(self.construct_scalar(node)) # type: ignore[arg-type]

elif isinstance(node, yaml.SequenceNode):
result = []
for filename in self.construct_sequence(node):
result.append(self.extractFile(filename))
return result

elif isinstance(node, yaml.MappingNode):
result = {}
for k, v in self.construct_mapping(node).items():
if not isinstance(k, str):
raise TypeError(f"Expected only strings in YAML mapping; got {k!r} of type {type(k)}.")
result[k] = self.extractFile(v)
return result

else:
self._logger.error("Unrecognised node type in !include statement",
file=sys.stderr)
raise yaml.constructor.ConstructorError

def extractFile(self, filepath: str) -> Any:
if filepath.startswith('/'):
actual_path = filepath
else:
actual_path = os.path.join(self._current_dir, filepath)
self._logger.info("Opening YAML file via !include: %s", actual_path)

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This formatting works for a logger message, but a print statement needs to have

print("Opening YAML file via !include: %s" % actual_path)

# Read all the data from the resource
with open(actual_path) as f:
content = yaml.load(f, YamlIncludeLoader)
return content


def open_config_file(config_file):
'''
Given path to config file, return a Config object
'''
with open(config_file) as f:
return Config(yaml.safe_load(f))
content = yaml.load(f, Loader=YamlIncludeLoader)
return Config(content)


Tophat = namedtuple('Tophat', ['start', 'width'])
Expand Down Expand Up @@ -91,6 +160,9 @@ def __getitem__(self, k):
'''
return self._cfg.__getitem__(k)

def __contains__(self, k):
return k in self._cfg

def list_sed_models(self):
return self._cfg['SED_models'].keys()

Expand Down Expand Up @@ -210,8 +282,6 @@ def write_yaml(input_dict, outpath, overwrite=False, logname=None):

def create_config(catalog_name, logname=None):
return Config({'catalog_name': catalog_name}, logname)
# 'schema_version': schema_version,
# 'code_version': desc.skycatalogs.__version__}, logname)


def assemble_cosmology(cosmology):
Expand Down
41 changes: 41 additions & 0 deletions tests/test_yamlinclude.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
"""
Test yaml loader which handles !include tag
"""
import unittest
import yaml
import os
from pathlib import Path

from skycatalogs.utils.config_utils import YamlIncludeLoader

class skyCatalogsTestCase(unittest.TestCase):
def setUp(self):
# Get directory containing top-level file
self._yaml_dir = os.path.join(Path(__file__).resolve().parents[1],
'skycatalogs', 'data', 'ci_yamlinclude')
def tearDown(self):
pass

def test_include(self):
# Top level file references to
# file in the same directory
# file in subdirectory
# file which itself has an !include directive
top_path = os.path.join(self._yaml_dir, 'top.yaml')

# If the load succeeds, all required files have been found
with open(top_path) as f:
data = yaml.load(f, Loader=YamlIncludeLoader)

# Verify that expected keys exist
assert('object_types' in data.keys())
objs = data['object_types']
assert('gaia_star' in objs)
assert('galaxy' in objs)
assert('area_partition' in objs['galaxy'])
assert('nside' in objs['galaxy']['area_partition'])

print(data)

if __name__ == '__main__':
unittest.main()