Skip to content

Commit

Permalink
WIP config schema
Browse files Browse the repository at this point in the history
Generate docs using json-schema-for-humans:
```
generate-schema-doc --config template_name=js config/schema.yaml public/schema.html
```
  • Loading branch information
jameshadfield committed Nov 25, 2024
1 parent 4078613 commit c7d5024
Show file tree
Hide file tree
Showing 4 changed files with 93 additions and 0 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -49,3 +49,6 @@ Thumbs.db

# cluster logs
slurm-*

# generated docs
public/
3 changes: 3 additions & 0 deletions Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,9 @@ if os.path.exists("config.yaml"):

from pprint import pp; pp(config, stream=sys.stderr) # TODO XXX remove

from scripts.validate_utils import validate
validate(config)

class InvalidConfigError(Exception):
pass

Expand Down
53 changes: 53 additions & 0 deletions config/schema.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
$schema: http://json-schema.org/draft-07/schema#

type: object
title: Avian-flu config schema
additionalProperties: True # FIXME XXX
required: []
properties:
builds:
title: Target subtype/segment/time combinations
description: >
Each element defines one or more subtypes, segments and time resolutions
which are expanded to produce all combinations. You can supply multiple elements
here in order to define different combinations.
NOTE: H5N1 cattle-outbreak schemas should not define `time`.
examples:
- |
- subtype:
- h7n9
- h9n2
segment:
- ha
- na
time:
- all-time
type: array
items:
$ref: "#/$defs/build_element"
target_patterns:
type: array
items:
type: string
minItems: 1

$defs:
string_or_array_of_strings:
oneOf:
- type: string
- type: array
minItems: 1
items:
type: string
build_element:
type: object
required: ['subtype', segment]
additionalProperties: False
properties:
subtype:
$ref: "#/$defs/string_or_array_of_strings"
segment:
$ref: "#/$defs/string_or_array_of_strings"
time:
$ref: "#/$defs/string_or_array_of_strings"
34 changes: 34 additions & 0 deletions scripts/validate_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@

class ValidateError(Exception):
pass

def validate(config):
# prototype based on <https://github.com/nextstrain/augur/blob/master/augur/validate.py>
from importlib import metadata
# <https://github.com/nextstrain/augur/issues/1358>
assert str(metadata.version("jsonschema")).startswith('3.'), "jsonschema must be version 3"
import jsonschema
import jsonschema.exceptions
import yte
from os import path


with open(path.join(path.dirname(path.realpath(__file__)), "../config/schema.yaml"), encoding='utf-8') as f:
schema = yte.process_yaml(f, require_use_yte=True)

Validator = jsonschema.validators.validator_for(schema)

try:
Validator.check_schema(schema)
except jsonschema.exceptions.SchemaError as err:
raise ValidateError(f"Internal error: config schema is not a valid JSON Schema ({Validator.META_SCHEMA['$schema']}). Error: {err}")

# Here we're validating the merged schema. We could also validate the user config on its own by making (all?) properties optional?

from pprint import pp; pp(schema)

# Note: augur uses a better approach using `iter_errors``
jsonschema.validate(instance=config, schema=schema)

# BEYOND SCHEMA:
# all keys match provided subtype/segment/time

0 comments on commit c7d5024

Please sign in to comment.