From 8ab5515351a00eb6749680c0cace701dc4f92a9c Mon Sep 17 00:00:00 2001 From: Paul Madden <136389411+maddenp-noaa@users.noreply.github.com> Date: Tue, 19 Nov 2024 17:12:21 -0700 Subject: [PATCH] Notebooks (#656) --- .github/scripts/test.sh | 3 +- Makefile | 5 +- binder/environment.yml | 6 + docs/sections/user_guide/api/config.rst | 3 + docs/sections/user_guide/api/fs.rst | 3 + docs/sections/user_guide/api/rocoto.rst | 3 + docs/sections/user_guide/api/template.rst | 3 + docs/sections/user_guide/index.rst | 2 + docs/shared/binder_links.rst | 13 + notebooks/.gitignore | 2 + notebooks/Makefile | 18 + notebooks/config.ipynb | 1906 +++++++++++++++++ notebooks/exp-config-cb.ipynb | 403 ++++ notebooks/fixtures/config/alt-config.nml | 5 + notebooks/fixtures/config/base-config.nml | 5 + notebooks/fixtures/config/fruit-config.ini | 4 + notebooks/fixtures/config/get-config.yaml | 2 + notebooks/fixtures/config/keys-config.yaml | 4 + notebooks/fixtures/config/validate.jsonschema | 15 + notebooks/fixtures/exp-config/base-file.yaml | 27 + .../fixtures/exp-config/fv3-rap-physics.yaml | 6 + notebooks/fixtures/exp-config/user.yaml | 6 + notebooks/fixtures/fs/copy-config.yaml | 3 + notebooks/fixtures/fs/copy-keys-config.yaml | 6 + notebooks/fixtures/fs/dir-config.yaml | 3 + notebooks/fixtures/fs/dir-keys-config.yaml | 6 + notebooks/fixtures/fs/file1.nml | 6 + notebooks/fixtures/fs/file2.txt | 1 + notebooks/fixtures/fs/file3.csv | 4 + notebooks/fixtures/fs/link-config.yaml | 3 + notebooks/fixtures/fs/link-keys-config.yaml | 6 + .../fixtures/rocoto/ent-cs-workflow.yaml | 16 + notebooks/fixtures/rocoto/ent-workflow.yaml | 14 + notebooks/fixtures/rocoto/err-workflow.xml | 10 + notebooks/fixtures/rocoto/err-workflow.yaml | 9 + .../fixtures/rocoto/meta-nested-workflow.yaml | 18 + notebooks/fixtures/rocoto/meta-workflow.yaml | 16 + notebooks/fixtures/rocoto/simple-workflow.xml | 11 + .../fixtures/rocoto/simple-workflow.yaml | 12 + .../fixtures/rocoto/tasks-deps-workflow.yaml | 31 + notebooks/fixtures/rocoto/tasks-workflow.yaml | 16 + .../fixtures/template/render-complete-1.yaml | 3 + .../fixtures/template/render-complete-2.yaml | 3 + .../fixtures/template/render-template.yaml | 3 + .../fixtures/template/render-values.yaml | 3 + .../fixtures/template/translate-complete.yaml | 3 + .../fixtures/template/translate-template.yaml | 3 + notebooks/fs.ipynb | 1586 ++++++++++++++ notebooks/install-deps | 1 + notebooks/pyproject.toml | 21 + notebooks/rocoto.ipynb | 1060 +++++++++ notebooks/template.ipynb | 511 +++++ notebooks/tests/test_config.py | 152 ++ notebooks/tests/test_exp_config_cb.py | 35 + notebooks/tests/test_fs.py | 100 + notebooks/tests/test_rocoto.py | 90 + notebooks/tests/test_template.py | 75 + 57 files changed, 6282 insertions(+), 2 deletions(-) create mode 100644 binder/environment.yml create mode 100644 docs/shared/binder_links.rst create mode 100644 notebooks/.gitignore create mode 100644 notebooks/Makefile create mode 100644 notebooks/config.ipynb create mode 100644 notebooks/exp-config-cb.ipynb create mode 100644 notebooks/fixtures/config/alt-config.nml create mode 100644 notebooks/fixtures/config/base-config.nml create mode 100644 notebooks/fixtures/config/fruit-config.ini create mode 100644 notebooks/fixtures/config/get-config.yaml create mode 100644 notebooks/fixtures/config/keys-config.yaml create mode 100644 notebooks/fixtures/config/validate.jsonschema create mode 100644 notebooks/fixtures/exp-config/base-file.yaml create mode 100644 notebooks/fixtures/exp-config/fv3-rap-physics.yaml create mode 100644 notebooks/fixtures/exp-config/user.yaml create mode 100644 notebooks/fixtures/fs/copy-config.yaml create mode 100644 notebooks/fixtures/fs/copy-keys-config.yaml create mode 100644 notebooks/fixtures/fs/dir-config.yaml create mode 100644 notebooks/fixtures/fs/dir-keys-config.yaml create mode 100644 notebooks/fixtures/fs/file1.nml create mode 100644 notebooks/fixtures/fs/file2.txt create mode 100644 notebooks/fixtures/fs/file3.csv create mode 100644 notebooks/fixtures/fs/link-config.yaml create mode 100644 notebooks/fixtures/fs/link-keys-config.yaml create mode 100644 notebooks/fixtures/rocoto/ent-cs-workflow.yaml create mode 100644 notebooks/fixtures/rocoto/ent-workflow.yaml create mode 100644 notebooks/fixtures/rocoto/err-workflow.xml create mode 100644 notebooks/fixtures/rocoto/err-workflow.yaml create mode 100644 notebooks/fixtures/rocoto/meta-nested-workflow.yaml create mode 100644 notebooks/fixtures/rocoto/meta-workflow.yaml create mode 100644 notebooks/fixtures/rocoto/simple-workflow.xml create mode 100644 notebooks/fixtures/rocoto/simple-workflow.yaml create mode 100644 notebooks/fixtures/rocoto/tasks-deps-workflow.yaml create mode 100644 notebooks/fixtures/rocoto/tasks-workflow.yaml create mode 100644 notebooks/fixtures/template/render-complete-1.yaml create mode 100644 notebooks/fixtures/template/render-complete-2.yaml create mode 100644 notebooks/fixtures/template/render-template.yaml create mode 100644 notebooks/fixtures/template/render-values.yaml create mode 100644 notebooks/fixtures/template/translate-complete.yaml create mode 100644 notebooks/fixtures/template/translate-template.yaml create mode 100644 notebooks/fs.ipynb create mode 100644 notebooks/install-deps create mode 100644 notebooks/pyproject.toml create mode 100644 notebooks/rocoto.ipynb create mode 100644 notebooks/template.ipynb create mode 100644 notebooks/tests/test_config.py create mode 100644 notebooks/tests/test_exp_config_cb.py create mode 100644 notebooks/tests/test_fs.py create mode 100644 notebooks/tests/test_rocoto.py create mode 100644 notebooks/tests/test_template.py diff --git a/.github/scripts/test.sh b/.github/scripts/test.sh index 048fb5240..acaedeafc 100755 --- a/.github/scripts/test.sh +++ b/.github/scripts/test.sh @@ -8,11 +8,12 @@ run_tests() { devpkgs=$(jq .packages.dev[] recipe/meta.json | tr -d ' "') conda create --yes --name $env --quiet python=$PYTHON_VERSION $devpkgs conda activate $env + . notebooks/install-deps set -x python --version git clean -dfx pip install --editable src # sets new Python version in entry-point scripts - make test + make test && make test-nb status=$? set +x conda deactivate diff --git a/Makefile b/Makefile index f42433637..d0899a757 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ CHANNELS = $(addprefix -c ,$(shell tr '\n' ' ' <$(RECIPE_DIR)/channels)) -c local METADEPS = $(addprefix $(RECIPE_DIR)/,meta.yaml) src/uwtools/resources/info.json METAJSON = $(RECIPE_DIR)/meta.json -TARGETS = clean-devenv devshell docs env format lint meta package test typecheck unittest +TARGETS = clean-devenv devshell docs env format lint meta package test test-nb typecheck unittest export RECIPE_DIR := $(shell cd ./recipe && pwd) @@ -41,6 +41,9 @@ package: meta test: recipe/run_test.sh +test-nb: + $(MAKE) -C notebooks test-nb + typecheck: recipe/run_test.sh typecheck diff --git a/binder/environment.yml b/binder/environment.yml new file mode 100644 index 000000000..0ccdf58bd --- /dev/null +++ b/binder/environment.yml @@ -0,0 +1,6 @@ +name: default +channels: + - conda-forge + - ufs-community +dependencies: + - uwtools diff --git a/docs/sections/user_guide/api/config.rst b/docs/sections/user_guide/api/config.rst index 5f689e1e7..0de57f368 100644 --- a/docs/sections/user_guide/api/config.rst +++ b/docs/sections/user_guide/api/config.rst @@ -1,6 +1,9 @@ ``uwtools.api.config`` ====================== +.. image:: https://mybinder.org/badge_logo.svg + :target: https://mybinder.org/v2/gh/ufs-community/uwtools/notebooks?labpath=notebooks%2Fconfig.ipynb + .. automodule:: uwtools.api.config :inherited-members: UserDict :members: diff --git a/docs/sections/user_guide/api/fs.rst b/docs/sections/user_guide/api/fs.rst index 0ac50fc87..63fe32be9 100644 --- a/docs/sections/user_guide/api/fs.rst +++ b/docs/sections/user_guide/api/fs.rst @@ -1,5 +1,8 @@ ``uwtools.api.fs`` ================== +.. image:: https://mybinder.org/badge_logo.svg + :target: https://mybinder.org/v2/gh/ufs-community/uwtools/notebooks?labpath=notebooks%2Ffs.ipynb + .. automodule:: uwtools.api.fs :members: diff --git a/docs/sections/user_guide/api/rocoto.rst b/docs/sections/user_guide/api/rocoto.rst index dcacb9c0e..45dbfa06b 100644 --- a/docs/sections/user_guide/api/rocoto.rst +++ b/docs/sections/user_guide/api/rocoto.rst @@ -1,5 +1,8 @@ ``uwtools.api.rocoto`` ====================== +.. image:: https://mybinder.org/badge_logo.svg + :target: https://mybinder.org/v2/gh/ufs-community/uwtools/notebooks?labpath=notebooks%2Frocoto.ipynb + .. automodule:: uwtools.api.rocoto :members: diff --git a/docs/sections/user_guide/api/template.rst b/docs/sections/user_guide/api/template.rst index ac89e7002..b75de604f 100644 --- a/docs/sections/user_guide/api/template.rst +++ b/docs/sections/user_guide/api/template.rst @@ -1,5 +1,8 @@ ``uwtools.api.template`` ======================== +.. image:: https://mybinder.org/badge_logo.svg + :target: https://mybinder.org/v2/gh/ufs-community/uwtools/notebooks?labpath=notebooks%2Ftemplate.ipynb + .. automodule:: uwtools.api.template :members: diff --git a/docs/sections/user_guide/index.rst b/docs/sections/user_guide/index.rst index a385a1801..54ae9cd6d 100644 --- a/docs/sections/user_guide/index.rst +++ b/docs/sections/user_guide/index.rst @@ -8,3 +8,5 @@ User Guide cli/index api/index yaml/index + +.. include:: /shared/binder_links.rst diff --git a/docs/shared/binder_links.rst b/docs/shared/binder_links.rst new file mode 100644 index 000000000..7d0aba236 --- /dev/null +++ b/docs/shared/binder_links.rst @@ -0,0 +1,13 @@ +Jupyter Notebooks +----------------- + +* Jupyter Notebook Tutorials (API) + + * `Config Tool `_ + * `File System Tool `_ + * `Rocoto Tool `_ + * `Template Tool `_ + +* Cookbooks + + * `Configuring an Experiment with UW Tools `_ diff --git a/notebooks/.gitignore b/notebooks/.gitignore new file mode 100644 index 000000000..3d51db06f --- /dev/null +++ b/notebooks/.gitignore @@ -0,0 +1,2 @@ +.ipynb_checkpoints +tmp diff --git a/notebooks/Makefile b/notebooks/Makefile new file mode 100644 index 000000000..3a0635579 --- /dev/null +++ b/notebooks/Makefile @@ -0,0 +1,18 @@ +TARGETS = format lint test-nb unittest + +.PHONY: $(TARGETS) + +all: + $(error Valid targets are: $(TARGETS)) + +format: + black tests + isort tests + +lint: + pylint tests + +test-nb: lint unittest + +unittest: + pytest tests diff --git a/notebooks/config.ipynb b/notebooks/config.ipynb new file mode 100644 index 000000000..740d056a4 --- /dev/null +++ b/notebooks/config.ipynb @@ -0,0 +1,1906 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "d023d283-7e1d-4e75-95b8-5033bea42a59", + "metadata": {}, + "source": [ + "# Config Tool\n", + "\n", + "The `uwtools` API's `config` module provides functions to create and manipulate configuration files, objects, and dictionaries.\n", + "\n", + "Tested on `uwtools` version 2.4.2. For more information, please see the uwtools.api.config Read the Docs page.\n", + "\n", + "## Table of Contents\n", + "\n", + "* [Getting Config Objects](#Getting-Config-Objects)\n", + "* [Config Depth Limitations](#Config-Depth-Limitations)\n", + "* [Realizing Configs](#Realizing-Configs)\n", + " * [Updating Configs](#Updating-Configs)\n", + " * [Using the `key_path` Parameter](#Using-the-key_path-Parameter)\n", + " * [Using the `values_needed` Parameter](#Using-the-values_needed-Parameter)\n", + " * [Using the `total` Parameter](#Using-the-total-Parameter)\n", + "* [Realizing Configs to a Dictionary](#Realizing-Configs-to-a-Dictionary)\n", + "* [Comparing Configs](#Comparing-Configs)\n", + "* [Validating Configs](#Validating-Configs)\n", + "* [Working with Config Classes](#Working-with-Config-Classes)\n", + " * [Comparing Config Objects](#Comparing-Config-Objects)\n", + " * [Rendering Values](#Rendering-Values)\n", + " * [Writing Configs in a Specified Format](#Writing-Configs-in-a-Specified-Format)\n", + " * [Updating Values](#Updating-Values) \n", + "" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "6d02d033-0992-4990-861d-3f80d09d7083", + "metadata": {}, + "outputs": [], + "source": [ + "from pathlib import Path\n", + "from uwtools.api import config\n", + "from uwtools.api.logging import use_uwtools_logger\n", + "\n", + "use_uwtools_logger()" + ] + }, + { + "cell_type": "markdown", + "id": "212594bb-379f-4441-805e-af0dbabe1815", + "metadata": {}, + "source": [ + "## Getting Config Objects\n", + "\n", + "The `config` tool can create configuration objects given a Python ``dict`` or a file in one of five different formats: FieldTable, INI, Fortran namelist, Shell, or YAML. `config.get_yaml_config` is demonstrated here, but the config module also has similar functions for each of the other supported formats: `get_fieldtable_config()`, `get_ini_config()`, `get_nml_config()`, and `get_sh_config()`.\n", + "" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "ab0e21c3-a4b6-404c-bffd-e0d393d9b0a2", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Help on function get_yaml_config in module uwtools.api.config:\n", + "\n", + "get_yaml_config(config: Union[dict, str, pathlib.Path, NoneType] = None, stdin_ok: bool = False) -> uwtools.config.formats.yaml.YAMLConfig\n", + " Get a ``YAMLConfig`` object.\n", + "\n", + " :param config: YAML file or ``dict`` (``None`` => read ``stdin``).\n", + " :param stdin_ok: OK to read from ``stdin``?\n", + " :return: An initialized ``YAMLConfig`` object.\n", + "\n" + ] + } + ], + "source": [ + "help(config.get_yaml_config)" + ] + }, + { + "cell_type": "markdown", + "id": "606da5b3-4bff-4148-a9a5-908aa7dd5e8c", + "metadata": {}, + "source": [ + "The `stdin_ok` argument can be used to permit reads from `stdin`, but this is a rare use case beyond the scope of this notebook that will not be discussed here.\n", + "\n", + "`get_yaml_config()` can take input from a Python `dict` or a YAML file like the one below.\n", + "" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "c6e049df-38f6-4879-8e0d-68356226d94b", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "greeting: Hello\n", + "recipient: World\n" + ] + } + ], + "source": [ + "%%bash\n", + "cat fixtures/config/get-config.yaml" + ] + }, + { + "cell_type": "markdown", + "id": "c2f72448-d35e-4a0b-a371-cb47c7b3338b", + "metadata": {}, + "source": [ + "Paths to config files can be provided either as a string or Path object. Since `get_yaml_config()` is used here, a `YAMLConfig` object is returned.\n", + "" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "cc3020a6-4eb4-4830-9263-a9fc8fac7450", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "greeting: Hello\n", + "recipient: World\n" + ] + } + ], + "source": [ + "config1 = config.get_yaml_config(\n", + " config=Path(\"fixtures/config/get-config.yaml\")\n", + ")\n", + "print(type(config1))\n", + "print(config1)" + ] + }, + { + "cell_type": "markdown", + "id": "b7bcd736-ff78-4e8b-957f-b348b812c5f6", + "metadata": {}, + "source": [ + "Providing a Python `dict` will create a UW `Config` object with format matching the function used.\n", + "" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "f01ac223-4a02-40ba-822f-8e66ad39f313", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "message:\n", + " greeting: Hi\n", + " recipient: Earth\n" + ] + } + ], + "source": [ + "input_config = {\"message\": {\"greeting\":\"Hi\", \"recipient\":\"Earth\"}}\n", + "config2 = config.get_yaml_config(\n", + " config=input_config\n", + ")\n", + "print(config2)" + ] + }, + { + "cell_type": "markdown", + "id": "dc745e95-d1ce-435c-a488-13b761979e36", + "metadata": {}, + "source": [ + "## Config Depth Limitations\n", + "\n", + "Some config formats have limitations on the depth of their nested configs. Shell configs, for example, may only contain top-level, bash-syntax `key=value` pairs.\n", + "" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "28d23ac5-52a0-45bc-bfee-98d9ea518ca2", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "greeting=Salutations\n", + "recipient=Mars" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "config.get_sh_config(\n", + " config={\"greeting\":\"Salutations\", \"recipient\":\"Mars\"}\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "3dd7292f-cbd1-4e45-b641-1e213a4ead07", + "metadata": {}, + "source": [ + "Shell configs cannot be nested, and any attempt to do so will raise a `UWConfigError`.\n", + "" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "b3a0a5bc-9d1b-4d48-a05f-be6f94fb6e1d", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Cannot instantiate depth-1 SHConfig with depth-2 config\n" + ] + } + ], + "source": [ + "try: \n", + " config.get_sh_config(\n", + " config={\"message\": {\"greeting\":\"Salutations\", \"recipient\":\"Mars\"}}\n", + " )\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "7677bdaa-8707-4dff-b812-91b4521f4820", + "metadata": {}, + "source": [ + "When creating INI configs, exactly one level of nesting is required so that each key-value pair is contained within a section. The top level keys become sections, which are contained within square brackets `[]`. Read more about INI configuration files here.\n", + "" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "6837e75b-bd20-4c3b-bd33-650e4b4f9f23", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[message]\n", + "greeting = Salutations\n", + "recipient = Mars" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "config.get_ini_config(\n", + " config={\"message\": {\"greeting\":\"Salutations\", \"recipient\":\"Mars\"}}\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "04ab0d52-75cc-4227-8058-a9a1faba7b54", + "metadata": {}, + "source": [ + "Either more or fewer levels of nesting will raise a `UWConfigError`.\n", + "" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "9655b36b-2d39-4fc1-b3b8-9cb3443cf4b8", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Cannot instantiate depth-2 INIConfig with depth-1 config\n" + ] + } + ], + "source": [ + "try:\n", + " config.get_ini_config(\n", + " config={\"greeting\":\"Salutations\", \"recipient\":\"Mars\"}\n", + " )\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "e4403333-c247-465f-9a75-f96d1be914f6", + "metadata": {}, + "source": [ + "## Realizing Configs\n", + "\n", + "The `config.realize()` function writes config files to disk or `stdout` with the ability to render Jinja2 expressions and add/update values.\n", + "" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "2c3f1b75-b26f-4893-beb7-37a58c09f511", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Help on function realize in module uwtools.api.config:\n", + "\n", + "realize(input_config: Union[uwtools.config.formats.base.Config, pathlib.Path, dict, str, NoneType] = None, input_format: Optional[str] = None, update_config: Union[uwtools.config.formats.base.Config, pathlib.Path, dict, str, NoneType] = None, update_format: Optional[str] = None, output_file: Union[str, pathlib.Path, NoneType] = None, output_format: Optional[str] = None, key_path: Optional[list[Union[str, int]]] = None, values_needed: bool = False, total: bool = False, dry_run: bool = False, stdin_ok: bool = False) -> dict\n", + " Realize a config based on a base input config and an optional update config.\n", + "\n", + " The input config may be specified as a filesystem path, a ``dict``, or a ``Config`` object. When it\n", + " is not, it will be read from ``stdin``.\n", + "\n", + " If an update config is specified, it is merged onto the input config, augmenting or overriding base\n", + " values. It may be specified as a filesystem path, a ``dict``, or a ``Config`` object. When it is\n", + " not, it will be read from ``stdin``.\n", + "\n", + " At most one of the input config or the update config may be left unspecified, in which case the\n", + " other will be read from ``stdin``. If neither filename or format is specified for the update config, no\n", + " update will be performed.\n", + "\n", + " The output destination may be specified as a filesystem path. When it is not, it will be written to\n", + " ``stdout``.\n", + "\n", + " If ``values_needed`` is ``True``, a report of values needed to realize the config is logged. In\n", + " ``dry_run`` mode, output is written to ``stderr``.\n", + "\n", + " If ``total`` is ``True``, an exception will be raised if any Jinja2 variables/expressions cannot be\n", + " rendered. Otherwise, such variables/expressions will be passed through unchanged in the output.\n", + "\n", + " Recognized file extensions are: ini, nml, sh, yaml\n", + "\n", + " :param input_config: Input config file (``None`` => read ``stdin``).\n", + " :param input_format: Format of the input config (optional if file's extension is recognized).\n", + " :param update_config: Update config file (``None`` => read ``stdin``).\n", + " :param update_format: Format of the update config (optional if file's extension is recognized).\n", + " :param output_file: Output config file (``None`` => write to ``stdout``).\n", + " :param output_format: Format of the output config (optional if file's extension is recognized).\n", + " :param key_path: Path through keys to the desired output block.\n", + " :param values_needed: Report complete, missing, and template values.\n", + " :param total: Require rendering of all Jinja2 variables/expressions.\n", + " :param dry_run: Log output instead of writing to output.\n", + " :param stdin_ok: OK to read from ``stdin``?\n", + " :return: The ``dict`` representation of the realized config.\n", + " :raises: UWConfigRealizeError if ``total`` is ``True`` and any Jinja2 variable/expression was not rendered.\n", + "\n" + ] + } + ], + "source": [ + "help(config.realize)" + ] + }, + { + "cell_type": "markdown", + "id": "501da514-a654-4511-928f-b2ad7db102b2", + "metadata": {}, + "source": [ + "The `input_config` parameter takes a config from a string path, Path object, Python `dict`, or UW `Config` object like the `YAMLConfig` object from the Getting Config Objects section. The `input_format` argument must be provided for `dict` inputs or for files without recognized extensions. Configs are written to `stdout` if `output_file` is unspecified or explicitly set to `None`, or to the file specified by `output_file`. The `output_format` argument must be provided when writing to `stdout` or to a file without a recognized extension. Recognized extensions are: `.ini`, `.nml`, `.sh`, and `.yaml`.\n", + "" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "77873e14-db3c-417d-be7a-2ba12c9a38f6", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'greeting': 'Hello', 'recipient': 'World'}" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "config.realize(\n", + " input_config=config1,\n", + " output_file=Path('tmp/config1.yaml')\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "fd1823c4-6c3d-4d4a-a614-4d1238588bdd", + "metadata": {}, + "source": [ + "The `realize()` method returns a dict version of the config regardless of input type, and the file is written in the YAML format as indicated by the file extension.\n", + "" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "4f237a73-da83-4632-990f-644632b15cd9", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "greeting: Hello\n", + "recipient: World\n" + ] + } + ], + "source": [ + "%%bash\n", + "cat tmp/config1.yaml" + ] + }, + { + "cell_type": "markdown", + "id": "902c4863-e772-49c6-88f2-31e1ab76b418", + "metadata": {}, + "source": [ + "Input and output formats are not required to match. This can be used to convert some configs from one format to another. YAML configs can be converted to configs of other recognized formats so long as the depth restrictions of the output format are met. All configs of recognized formats can be converted into YAML configs. Keep in mind that some formats are unable to express some types (for example, Shell configs can't express a value as an `int` while a Fortran namelist can) so type information may be lost when converting between formats.\n", + "" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "b8854dc6-9dd2-4843-99e4-278b116b9767", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'greeting': 'Hello', 'recipient': 'World'}" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "config.realize(\n", + " input_config='fixtures/config/get-config.yaml',\n", + " input_format='yaml',\n", + " output_file='tmp/realize-config.sh',\n", + " output_format='sh'\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "4032ab38-f6c7-45d1-bff7-f70d23832f26", + "metadata": {}, + "source": [ + "Here a Shell config is created from a YAML config.\n", + "" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "0a59a3e8-27b5-4daa-a924-941aceaad157", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "greeting=Hello\n", + "recipient=World\n" + ] + } + ], + "source": [ + "%%bash\n", + "cat tmp/realize-config.sh" + ] + }, + { + "cell_type": "markdown", + "id": "ef0136e4-f549-467f-9341-f77f84738bb0", + "metadata": {}, + "source": [ + "### Updating Configs\n", + "\n", + "Configs can be updated by providing a second config with the `update_config` parameter. If the update config contains keys that match the base config, the base config values for those keys will be overwritten. Once updated, if the config contains Jinja2 expressions, like the one below, they will be rendered in the config wherever possible.\n", + "" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "2d72bbc1-e438-48b2-8bd7-554b598c6f24", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "&memo\n", + " sender_id = \"{{ id }}\"\n", + " message = \"{{ greeting }}, {{ recipient }}!\"\n", + " sent = .FALSE.\n", + "/\n" + ] + } + ], + "source": [ + "%%bash\n", + "cat fixtures/config/base-config.nml" + ] + }, + { + "cell_type": "markdown", + "id": "67b07a34-1bea-402f-9cec-f33a2e519d27", + "metadata": {}, + "source": [ + "Here, the update config provides values that will update two of the Jinja2 expressions and override one key with a new value.\n", + "" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "7648fdd5-5752-4bf3-b366-db8da1eac601", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'memo': {'sender_id': '{{ id }}',\n", + " 'message': 'Salutations, Mars!',\n", + " 'sent': True,\n", + " 'greeting': 'Salutations',\n", + " 'recipient': 'Mars'}}" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "config.realize(\n", + " input_config='fixtures/config/base-config.nml',\n", + " update_config={\"memo\": {\"greeting\":\"Salutations\", \"recipient\":\"Mars\", \"sent\": True}},\n", + " output_file='tmp/updated-config.nml'\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "bdac7f6c-4452-4137-9104-784449584100", + "metadata": {}, + "source": [ + "All of the key-value pairs were added to the updated config, and the base config was rendered where the appropriate values were provided. However, not all Jinja2 expressions are required to be rendered: An `id` key was not provided in the update config, so the expression referencing it was not rendered.\n", + "" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "f4538965-d3b0-4c0c-a878-b6852f8d8ab0", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "&memo\n", + " sender_id = '{{ id }}'\n", + " message = 'Salutations, Mars!'\n", + " sent = .true.\n", + " greeting = 'Salutations'\n", + " recipient = 'Mars'\n", + "/\n" + ] + } + ], + "source": [ + "%%bash\n", + "cat tmp/updated-config.nml" + ] + }, + { + "cell_type": "markdown", + "id": "a20024ac-0f33-4000-942d-29c99dc0502e", + "metadata": {}, + "source": [ + "### Using the `key_path` Parameter\n", + "\n", + "Consider the following config file, where the desired keys and values are not at the top level.\n", + "" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "7ce0e917-f0d0-4302-9c8c-b136ffc5410a", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "keys:\n", + " to:\n", + " config:\n", + " message: \"{{ greeting }}, {{ recipient }}!\"\n" + ] + } + ], + "source": [ + "%%bash\n", + "cat fixtures/config/keys-config.yaml" + ] + }, + { + "cell_type": "markdown", + "id": "633778ca-391c-4ba4-b3b4-b3f599af5d41", + "metadata": {}, + "source": [ + "The `key_path` parameter allows only a portion of the config, identified by following a given list of keys, to be written to a file or, in this case, to `stdout`. Note that the key-value pairs from the update config are used to render values, but don't appear in the config written to `stdout`.\n", + "" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "00150efa-848c-44eb-ac0c-dab3845546b8", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "message: Good morning, Venus!\n" + ] + } + ], + "source": [ + "_ = config.realize(\n", + " input_config=\"fixtures/config/keys-config.yaml\",\n", + " update_config={\"greeting\": \"Good morning\", \"recipient\": \"Venus\"},\n", + " output_file=None,\n", + " output_format='yaml',\n", + " key_path=['keys', 'to', 'config']\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "903ea23f-4625-48f0-9efc-1c0a106d5bf6", + "metadata": {}, + "source": [ + "### Using the `values_needed` Parameter\n", + "\n", + "Consider the config file below, which contains unrendered Jinja2 expressions.\n", + "" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "928a23d7-8ba9-4217-935d-01563bb36cb6", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "&memo\n", + " sender_id = \"{{ id }}\"\n", + " message = \"{{ greeting }}, {{ recipient }}!\"\n", + " sent = .FALSE.\n", + "/\n" + ] + } + ], + "source": [ + "%%bash\n", + "cat fixtures/config/base-config.nml" + ] + }, + { + "cell_type": "markdown", + "id": "7d878080-55ae-4233-bb65-37cfa5ef7cff", + "metadata": {}, + "source": [ + "Setting `values_needed` to `True` will allow logging of keys that contain unrendered Jinja2 expressions and their values. A logger needs to be initialized for this information to be displayed. The config is not written and the returned `dict` is empty.\n", + "" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "d22c692d-e98e-4f88-bdac-a369f0a1962f", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[2024-11-19T23:12:52] INFO Keys that are complete:\n", + "[2024-11-19T23:12:52] INFO memo\n", + "[2024-11-19T23:12:52] INFO memo.sent\n", + "[2024-11-19T23:12:52] INFO \n", + "[2024-11-19T23:12:52] INFO Keys with unrendered Jinja2 variables/expressions:\n", + "[2024-11-19T23:12:52] INFO memo.sender_id: {{ id }}\n", + "[2024-11-19T23:12:52] INFO memo.message: {{ greeting }}, {{ recipient }}!\n" + ] + }, + { + "data": { + "text/plain": [ + "{}" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "config.realize(\n", + " input_config='fixtures/config/base-config.nml',\n", + " output_file=None,\n", + " output_format='nml',\n", + " values_needed=True\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "ddbc989a-a9ed-4649-8e87-f74d1ff43b89", + "metadata": {}, + "source": [ + "### Using the `total` Parameter\n", + "\n", + "The `total` parameter is used to specify that all Jinja2 expressions must be rendered before the final config is written. Consider the config below which contains multiple expressions.\n", + "" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "647538d9-cd22-4f94-b15b-c34d68a324da", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "&memo\n", + " sender_id = \"{{ id }}\"\n", + " message = \"{{ greeting }}, {{ recipient }}!\"\n", + " sent = .FALSE.\n", + "/\n" + ] + } + ], + "source": [ + "%%bash\n", + "cat fixtures/config/base-config.nml" + ] + }, + { + "cell_type": "markdown", + "id": "7c0d64be-c97c-4d07-b636-54b1c8bb5d0c", + "metadata": {}, + "source": [ + "As was shown in the Updating Configs section, by default not all Jinja2 expressions are required to be rendered. However, when `total` is set to `True` and not enough values are provided to fully realize the config, a `UWConfigRealizeError` is raised. Notice that values are provided for `greeting` and `recipient`, but not for `id`.\n", + "" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "ba360102-f558-4dba-b0b6-c6f550c7d40f", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Config could not be totally realized\n" + ] + } + ], + "source": [ + "try:\n", + " config.realize(\n", + " input_config='fixtures/config/base-config.nml',\n", + " update_config={\"memo\": {\"greeting\":\"Salutations\", \"recipient\":\"Mars\", \"sent\":True}},\n", + " output_file='tmp/config-total.nml',\n", + " total=True\n", + " )\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "4048efdd-42f4-4400-bdba-1c8e0001d8f6", + "metadata": {}, + "source": [ + "With all values provided to fully render the config, `realize()` writes the complete config without error.\n", + "" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "e891a446-f699-460d-b4a8-568d9d4cf631", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'memo': {'sender_id': '321',\n", + " 'message': 'Salutations, Mars!',\n", + " 'sent': True,\n", + " 'greeting': 'Salutations',\n", + " 'recipient': 'Mars',\n", + " 'id': 321}}" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "config.realize(\n", + " input_config='fixtures/config/base-config.nml',\n", + " update_config={\"memo\": {\"greeting\":\"Salutations\", \"recipient\":\"Mars\", \"sent\":True, \"id\":321}},\n", + " output_file='tmp/config-total.nml',\n", + " total=True\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "07876d41-dad2-4c30-adba-f635050708ed", + "metadata": {}, + "source": [ + "The newly created config file is free from any unrendered Jinja2 expressions.\n", + "" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "aeab0ec5-7e6e-4309-b484-4de5dd9324b5", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "&memo\n", + " sender_id = '321'\n", + " message = 'Salutations, Mars!'\n", + " sent = .true.\n", + " greeting = 'Salutations'\n", + " recipient = 'Mars'\n", + " id = 321\n", + "/\n" + ] + } + ], + "source": [ + "%%bash\n", + "cat tmp/config-total.nml" + ] + }, + { + "cell_type": "markdown", + "id": "e121dab6-5d16-40b0-aa16-161c329a8e9a", + "metadata": {}, + "source": [ + "## Realizing Configs to a Dictionary\n", + "\n", + "The `config.realize_to_dict()` function has the ability to manipulate config values, and returns the config as a Python `dict` just as `realize()` does. However, a config won't be written to a file or to `stdout`. Like `realize()`, input or update configs can be Python dictionaries, UW `Config` objects, or files like the one below.\n", + "" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "id": "bdc98ce1-e213-41ac-b1f2-03bd52238e30", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "greeting: Hello\n", + "recipient: World\n" + ] + } + ], + "source": [ + "%%bash\n", + "cat fixtures/config/get-config.yaml" + ] + }, + { + "cell_type": "markdown", + "id": "eeacc37f-b808-4d0e-9e29-ff8aa1fc2ff2", + "metadata": {}, + "source": [ + "`realize_to_dict()` has the same parameters as `realize()`, with the exception of `output_file` and `output_format`. Instead, configs can be manipulated or converted to a `dict` without the need to specify an output file or format.\n", + "" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "id": "ae7f648e-6586-4700-87e7-492ca3a02a06", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'id': '456', 'greeting': 'Hello', 'recipient': 'World'}" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "config.realize_to_dict(\n", + " input_config={\"id\": \"456\"},\n", + " update_config=\"fixtures/config/get-config.yaml\"\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "be66b870-a42c-46f2-a045-b769354403de", + "metadata": {}, + "source": [ + "For more details on usage and parameters, see the Realizing Configs section above.\n", + "\n", + "## Comparing Configs\n", + "\n", + "The `config` tool can be used to compare two configuration files using `config.compare()`.\n", + "" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "id": "249ae7a9-1ad3-4401-98b0-bc3c433f22f4", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Help on function compare in module uwtools.api.config:\n", + "\n", + "compare(config_1_path: Union[pathlib.Path, str], config_2_path: Union[pathlib.Path, str], config_1_format: Optional[str] = None, config_2_format: Optional[str] = None) -> bool\n", + " Compare two config files.\n", + "\n", + " Recognized file extensions are: ini, nml, sh, yaml\n", + "\n", + " :param config_1_path: Path to 1st config file.\n", + " :param config_2_path: Path to 2nd config file.\n", + " :param config_1_format: Format of 1st config file (optional if file's extension is recognized).\n", + " :param config_2_format: Format of 2nd config file (optional if file's extension is recognized).\n", + " :return: ``False`` if config files had differences, otherwise ``True``.\n", + "\n" + ] + } + ], + "source": [ + "help(config.compare)" + ] + }, + { + "cell_type": "markdown", + "id": "908b17fe-ae2d-4964-a659-0481d063c037", + "metadata": {}, + "source": [ + "Consider the following config files, which have similar values, with the exception of `sent`'s value.\n", + "" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "id": "0218e5de-2c25-4d7a-a7b6-0f05ad81afb2", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "&memo\n", + " sender_id = \"{{ id }}\"\n", + " message = \"{{ greeting }}, {{ recipient }}!\"\n", + " sent = .FALSE.\n", + "/\n", + "----------------------------------------------\n", + "&memo\n", + " sender_id = \"{{ id }}\"\n", + " message = \"{{ greeting }}, {{ recipient }}!\"\n", + " sent = .TRUE.\n", + "/\n" + ] + } + ], + "source": [ + "%%bash\n", + "cat fixtures/config/base-config.nml\n", + "echo ----------------------------------------------\n", + "cat fixtures/config/alt-config.nml " + ] + }, + { + "cell_type": "markdown", + "id": "c4361a7c-9c16-4bc0-b3e6-985d85f1450c", + "metadata": {}, + "source": [ + "`compare()` returns `True` if the configs contain identical key-value pairs, and `False` otherwise. If a logger has been initialized, information is logged on which files are being compared and the values that differ, if any. Files are passed to `config_1_path` and `config_2_path` as a string filename or Path object. Corresponding optional formats may be passed using `config_1_format` and `config_2_format` and are only needed if the format suffix is not recognized.\n", + "" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "id": "988b72bc-7983-4d24-9d37-c9ba32a31ac4", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[2024-11-19T23:12:52] INFO - fixtures/config/base-config.nml\n", + "[2024-11-19T23:12:52] INFO + fixtures/config/alt-config.nml\n", + "[2024-11-19T23:12:52] INFO ---------------------------------------------------------------------\n", + "[2024-11-19T23:12:52] INFO ↓ ? = info | -/+ = line unique to - or + file | blank = matching line\n", + "[2024-11-19T23:12:52] INFO ---------------------------------------------------------------------\n", + "[2024-11-19T23:12:52] INFO memo:\n", + "[2024-11-19T23:12:52] INFO message: '{{ greeting }}, {{ recipient }}!'\n", + "[2024-11-19T23:12:52] INFO sender_id: '{{ id }}'\n", + "[2024-11-19T23:12:52] INFO - sent: false\n", + "[2024-11-19T23:12:52] INFO + sent: true\n" + ] + }, + { + "data": { + "text/plain": [ + "False" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "config.compare(\n", + " config_1_path=Path('fixtures/config/base-config.nml'),\n", + " config_2_path='fixtures/config/alt-config.nml',\n", + " config_1_format='nml',\n", + " config_2_format='nml'\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "655717b7-b7db-4711-9dc4-c00557e6c2df", + "metadata": {}, + "source": [ + "To see the behavior of `compare()` when key-value pairs are identical, one of the configs from above is copied in the cell below.\n", + "" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "id": "ceb7f893-398b-41e4-818a-49cd036c2bfe", + "metadata": {}, + "outputs": [], + "source": [ + "%%bash\n", + "cp fixtures/config/base-config.nml tmp/config-copy.nml" + ] + }, + { + "cell_type": "markdown", + "id": "02c75cf7-9f24-44a0-97c6-abd7b44fb60f", + "metadata": {}, + "source": [ + "When these two files are compared, `True` is returned and the log reports no differences.\n", + "" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "id": "8b495735-2396-436e-87db-d886ac6769fa", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[2024-11-19T23:12:52] INFO - fixtures/config/base-config.nml\n", + "[2024-11-19T23:12:52] INFO + tmp/config-copy.nml\n" + ] + }, + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "config.compare(\n", + " config_1_path='fixtures/config/base-config.nml',\n", + " config_2_path='tmp/config-copy.nml',\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "eb4536f8-d198-46f8-ac74-ee9d2010704e", + "metadata": {}, + "source": [ + "If a comparison is attempted between two files whose formats that don't match, `compare()` returns `False` and the mismatch is reported.\n", + "" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "id": "438ccd5b-30d5-49b4-ba12-6e6e89f46d28", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[2024-11-19T23:12:52] ERROR Formats do not match: yaml vs nml\n" + ] + }, + { + "data": { + "text/plain": [ + "False" + ] + }, + "execution_count": 33, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "config.compare(\n", + " config_1_path=Path('fixtures/config/get-config.yaml'),\n", + " config_2_path=Path('fixtures/config/base-config.nml')\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "e14c0158-baa4-45d0-a86c-d9a245021229", + "metadata": {}, + "source": [ + "## Validating Configs\n", + "\n", + "The `config.validate()` function checks if a given config conforms to a specified JSON schema.\n", + "" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "id": "2eb5b2ce-bebd-449d-90a5-764484aa03aa", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Help on function validate in module uwtools.api.config:\n", + "\n", + "validate(schema_file: Union[pathlib.Path, str], config: Union[dict, str, uwtools.config.formats.yaml.YAMLConfig, pathlib.Path, NoneType] = None, stdin_ok: bool = False) -> bool\n", + " Check whether the specified config conforms to the specified JSON Schema spec.\n", + "\n", + " If no config is specified, ``stdin`` is read and will be parsed as YAML and then validated. A\n", + " ``dict`` or a YAMLConfig instance may also be provided for validation.\n", + "\n", + " :param schema_file: The JSON Schema file to use for validation.\n", + " :param config: The config to validate.\n", + " :param stdin_ok: OK to read from ``stdin``?\n", + " :return: ``True`` if the YAML file conforms to the schema, ``False`` otherwise.\n", + "\n" + ] + } + ], + "source": [ + "help(config.validate)" + ] + }, + { + "cell_type": "markdown", + "id": "57cdde4c-b3ab-40b1-8e52-1f99ea6d9696", + "metadata": {}, + "source": [ + "Consider the simple YAML config below. `validate()` used together with an appropriate JSON schema ensures that the config meets expectations before it's used elsewhere.\n", + "" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "id": "dc7d2b80-ea92-4301-ae85-4edbf61bf510", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "greeting: Hello\n", + "recipient: World\n" + ] + } + ], + "source": [ + "%%bash\n", + "cat fixtures/config/get-config.yaml" + ] + }, + { + "cell_type": "markdown", + "id": "1eeca5ad-e969-4670-bddd-8e20b38f3d6c", + "metadata": {}, + "source": [ + "Below is an example of a schema used to validate a config. It ensures that the required keys are present and the value types match expectations. For information on the keys used here and more, please refer to JSON Schema documentation.\n", + "" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "id": "f9c66a24-2a7e-43be-839f-3ad5b136b646", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"additionalProperties\": false,\n", + " \"properties\": {\n", + " \"greeting\": {\n", + " \"type\": \"string\"\n", + " },\n", + " \"recipient\": {\n", + " \"type\": \"string\"\n", + " }\n", + " },\n", + " \"required\": [\n", + " \"greeting\", \"recipient\"\n", + " ],\n", + " \"type\": \"object\"\n", + "}\n" + ] + } + ], + "source": [ + "%%bash\n", + "cat fixtures/config/validate.jsonschema" + ] + }, + { + "cell_type": "markdown", + "id": "8c61a2d2-473c-45c6-9c6c-6c07fc5bf940", + "metadata": {}, + "source": [ + "The schema file and config from above are passed to the respective `schema_file` and `config` parameters. Config file paths should be passed as a string or Path object. Files should be of YAML format, or parseable as YAML. Alternatively, a `YAMLConfig` object or a Python `dict` can be provided. `validate()` returns `True` if the config conforms to the JSON schema, and `False` otherwise. With a logger initialized, details about any validation errors are reported.\n", + "" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "id": "f3873050-d857-490d-aeeb-f9217a7f808c", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[2024-11-19T23:12:53] INFO 0 UW schema-validation errors found in config\n" + ] + }, + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 37, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "config.validate(\n", + " schema_file='fixtures/config/validate.jsonschema',\n", + " config='fixtures/config/get-config.yaml'\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "8d151205-4a95-4b46-aa1d-30ec30d96e88", + "metadata": {}, + "source": [ + "The `config` argument also accepts a dictionary. In the next example, validation errors exist, and the logger reports the number of errors found along with their locations and details.\n", + "" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "id": "b5664e58-8ddc-438c-8180-1e2911838744", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[2024-11-19T23:12:53] ERROR 1 UW schema-validation error found in config\n", + "[2024-11-19T23:12:53] ERROR Error at recipient:\n", + "[2024-11-19T23:12:53] ERROR 47 is not of type 'string'\n" + ] + }, + { + "data": { + "text/plain": [ + "False" + ] + }, + "execution_count": 38, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "config.validate(\n", + " schema_file='fixtures/config/validate.jsonschema',\n", + " config={'greeting':'Hello', 'recipient':47}\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "fffeb768-7109-472f-916b-1407520b0f5e", + "metadata": {}, + "source": [ + "## Working with Config Classes\n", + "\n", + "The `config` tool provides five classes that can be used to work with configs in an object-oriented way. The five different classes each work with a single format: `config.FieldTableConfig`, `config.INIConfig`, `config.NMLConfig`, `config.SHConfig`, and `config.YAMLConfig`. `config.INIConfig` is demonstrated here, but the other classes all use methods of the same names for working with each respective format.\n", + "" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "id": "6f4df804-cda2-48c4-bf42-a5b57de5e066", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Help on class INIConfig in module uwtools.config.formats.ini:\n", + "\n", + "class INIConfig(uwtools.config.formats.base.Config)\n", + " | INIConfig(config: Union[dict, pathlib.Path, NoneType] = None)\n", + " |\n", + " | Work with INI configs.\n", + " |\n", + " | Method resolution order:\n", + " | INIConfig\n", + " | uwtools.config.formats.base.Config\n", + " | abc.ABC\n", + " | collections.UserDict\n", + " | collections.abc.MutableMapping\n", + " | collections.abc.Mapping\n", + " | collections.abc.Collection\n", + " | collections.abc.Sized\n", + " | collections.abc.Iterable\n", + " | collections.abc.Container\n", + " | builtins.object\n", + " |\n", + " | Methods defined here:\n", + " |\n", + " | __init__(self, config: Union[dict, pathlib.Path, NoneType] = None)\n", + " | :param config: Config file to load (None => read from stdin), or initial dict.\n", + " |\n", + " | as_dict(self) -> dict\n", + " | Returns a pure dict version of the config.\n", + " |\n", + " | dump(self, path: Optional[pathlib.Path] = None) -> None\n", + " | Dump the config in INI format.\n", + " |\n", + " | :param path: Path to dump config to (default: stdout).\n", + " |\n", + " | ----------------------------------------------------------------------\n", + " | Class methods defined here:\n", + " |\n", + " | dump_dict(cfg: dict, path: Optional[pathlib.Path] = None) -> None\n", + " | Dump a provided config dictionary in INI format.\n", + " |\n", + " | :param cfg: The in-memory config object to dump.\n", + " | :param path: Path to dump config to (default: stdout).\n", + " |\n", + " | ----------------------------------------------------------------------\n", + " | Data and other attributes defined here:\n", + " |\n", + " | __abstractmethods__ = frozenset()\n", + " |\n", + " | ----------------------------------------------------------------------\n", + " | Methods inherited from uwtools.config.formats.base.Config:\n", + " |\n", + " | __repr__(self) -> str\n", + " | Return the string representation of a Config object.\n", + " |\n", + " | compare_config(self, dict1: dict, dict2: Optional[dict] = None, header: Optional[bool] = True) -> bool\n", + " | Compare two config dictionaries.\n", + " |\n", + " | Assumes a section/key/value structure.\n", + " |\n", + " | :param dict1: The first dictionary.\n", + " | :param dict2: The second dictionary (default: this config).\n", + " | :return: True if the configs are identical, False otherwise.\n", + " |\n", + " | dereference(self, context: Optional[dict] = None) -> None\n", + " | Render as much Jinja2 syntax as possible.\n", + " |\n", + " | update_from(self, src: Union[dict, collections.UserDict]) -> None\n", + " | Update a config.\n", + " |\n", + " | :param src: The dictionary with new data to use.\n", + " |\n", + " | ----------------------------------------------------------------------\n", + " | Data descriptors inherited from uwtools.config.formats.base.Config:\n", + " |\n", + " | __dict__\n", + " | dictionary for instance variables\n", + " |\n", + " | __weakref__\n", + " | list of weak references to the object\n", + " |\n", + " | ----------------------------------------------------------------------\n", + " | Methods inherited from collections.UserDict:\n", + " |\n", + " | __contains__(self, key)\n", + " | # Modify __contains__ and get() to work like dict\n", + " | # does when __missing__ is present.\n", + " |\n", + " | __copy__(self)\n", + " |\n", + " | __delitem__(self, key)\n", + " |\n", + " | __getitem__(self, key)\n", + " |\n", + " | __ior__(self, other)\n", + " |\n", + " | __iter__(self)\n", + " |\n", + " | __len__(self)\n", + " |\n", + " | __or__(self, other)\n", + " | Return self|value.\n", + " |\n", + " | __ror__(self, other)\n", + " | Return value|self.\n", + " |\n", + " | __setitem__(self, key, item)\n", + " |\n", + " | copy(self)\n", + " |\n", + " | get(self, key, default=None)\n", + " | D.get(k[,d]) -> D[k] if k in D, else d. d defaults to None.\n", + " |\n", + " | ----------------------------------------------------------------------\n", + " | Class methods inherited from collections.UserDict:\n", + " |\n", + " | fromkeys(iterable, value=None)\n", + " |\n", + " | ----------------------------------------------------------------------\n", + " | Methods inherited from collections.abc.MutableMapping:\n", + " |\n", + " | clear(self)\n", + " | D.clear() -> None. Remove all items from D.\n", + " |\n", + " | pop(self, key, default=)\n", + " | D.pop(k[,d]) -> v, remove specified key and return the corresponding value.\n", + " | If key is not found, d is returned if given, otherwise KeyError is raised.\n", + " |\n", + " | popitem(self)\n", + " | D.popitem() -> (k, v), remove and return some (key, value) pair\n", + " | as a 2-tuple; but raise KeyError if D is empty.\n", + " |\n", + " | setdefault(self, key, default=None)\n", + " | D.setdefault(k[,d]) -> D.get(k,d), also set D[k]=d if k not in D\n", + " |\n", + " | update(self, other=(), /, **kwds)\n", + " | D.update([E, ]**F) -> None. Update D from mapping/iterable E and F.\n", + " | If E present and has a .keys() method, does: for k in E: D[k] = E[k]\n", + " | If E present and lacks .keys() method, does: for (k, v) in E: D[k] = v\n", + " | In either case, this is followed by: for k, v in F.items(): D[k] = v\n", + " |\n", + " | ----------------------------------------------------------------------\n", + " | Methods inherited from collections.abc.Mapping:\n", + " |\n", + " | __eq__(self, other)\n", + " | Return self==value.\n", + " |\n", + " | items(self)\n", + " | D.items() -> a set-like object providing a view on D's items\n", + " |\n", + " | keys(self)\n", + " | D.keys() -> a set-like object providing a view on D's keys\n", + " |\n", + " | values(self)\n", + " | D.values() -> an object providing a view on D's values\n", + " |\n", + " | ----------------------------------------------------------------------\n", + " | Data and other attributes inherited from collections.abc.Mapping:\n", + " |\n", + " | __hash__ = None\n", + " |\n", + " | __reversed__ = None\n", + " |\n", + " | ----------------------------------------------------------------------\n", + " | Class methods inherited from collections.abc.Collection:\n", + " |\n", + " | __subclasshook__(C)\n", + " | Abstract classes can override this to customize issubclass().\n", + " |\n", + " | This is invoked early on by abc.ABCMeta.__subclasscheck__().\n", + " | It should return True, False or NotImplemented. If it returns\n", + " | NotImplemented, the normal algorithm is used. Otherwise, it\n", + " | overrides the normal algorithm (and the outcome is cached).\n", + " |\n", + " | ----------------------------------------------------------------------\n", + " | Class methods inherited from collections.abc.Iterable:\n", + " |\n", + " | __class_getitem__ = GenericAlias(...)\n", + " | Represent a PEP 585 generic type\n", + " |\n", + " | E.g. for t = list[int], t.__origin__ is list and t.__args__ is (int,).\n", + "\n" + ] + } + ], + "source": [ + "help(config.INIConfig)" + ] + }, + { + "cell_type": "markdown", + "id": "91a793e9-d1d1-41bb-84fd-c0c279606473", + "metadata": {}, + "source": [ + "An object can be initialized by providing a config either as a Python `dict` or a Path to the file.\n", + "" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "id": "0e8d6c8a-cc3c-49d7-9b97-32187cd5f754", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[fruit count]\n", + "apples = 3\n", + "grapes = {{ grape_count }}\n", + "kiwis = 2\n" + ] + } + ], + "source": [ + "fruits = config.INIConfig(\n", + " config=Path('fixtures/config/fruit-config.ini')\n", + ")\n", + "print(fruits)" + ] + }, + { + "cell_type": "markdown", + "id": "1a50a362-d063-4ae1-9b6a-d98d0bf84c4f", + "metadata": {}, + "source": [ + "### Comparing Config Objects\n", + "\n", + "The `compare_config()` method compares two config `dict`s and returns `True` when they match and `False` otherwise. Two config `dict`s can be passed to the `dict1` and `dict2` parameters. Config objects of every format use the same method demonstrated here, and it stands as an alternative to `config.compare()`, which compares files rather than dictionaries. See the [Comparing Configs](#Comparing-Configs) section above for more details on `config.compare()`. The configs compared using `compare_config()` can be compared without regard for their intended format, since they are compared as dictionaries, but they must have a section/key/value structure.\n", + "" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "id": "73925576-6b07-4af6-a9b0-7bd6f4235987", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 41, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "fruits.compare_config(\n", + " dict1={'fruit count':{'apples':'3', 'grapes':'8', 'kiwis':'1'}},\n", + " dict2={'fruit count':{'apples':'3', 'grapes':'8', 'kiwis':'1'}}\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "3caa77b7-2eca-498b-841f-b37c91068974", + "metadata": {}, + "source": [ + "If `dict2` is left unspecified or set to `None`, the `dict1` config is compared to the config stored in the object itself. When there are differences between the two configs, as is the case here, `False` is returned. When a logger is initialized, the values that differ are displayed.\n", + "" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "id": "9d94a325-93f6-43b4-bc56-0ce5a1830647", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[2024-11-19T23:12:53] INFO ---------------------------------------------------------------------\n", + "[2024-11-19T23:12:53] INFO ↓ ? = info | -/+ = line unique to - or + file | blank = matching line\n", + "[2024-11-19T23:12:53] INFO ---------------------------------------------------------------------\n", + "[2024-11-19T23:12:53] INFO fruit count:\n", + "[2024-11-19T23:12:53] INFO apples: '3'\n", + "[2024-11-19T23:12:53] INFO - grapes: '{{ grape_count }}'\n", + "[2024-11-19T23:12:53] INFO + grapes: '8'\n", + "[2024-11-19T23:12:53] INFO - kiwis: '2'\n", + "[2024-11-19T23:12:53] INFO ? ^\n", + "[2024-11-19T23:12:53] INFO + kiwis: '1'\n", + "[2024-11-19T23:12:53] INFO ? ^\n" + ] + }, + { + "data": { + "text/plain": [ + "False" + ] + }, + "execution_count": 42, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "fruits.compare_config(\n", + " dict1={'fruit count':{'apples':'3', 'grapes':'8', 'kiwis':'1'}}\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "a9c0020f-db59-4af3-a099-ebcdfbb017c8", + "metadata": {}, + "source": [ + "### Rendering Values\n", + "\n", + "If the object's config contains unrendered Jinja2 expressions, the `dereference()` method will render as many as possible. The optional `context` parameter can be used to provide additional values with a Python `dict`.\n", + "" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "id": "65315885-28f7-4e34-b5e9-f07b51b85e42", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[fruit count]\n", + "apples = 3\n", + "grapes = 15\n", + "kiwis = 2\n" + ] + } + ], + "source": [ + "fruits.dereference(\n", + " context={'grape_count':'15'}\n", + ")\n", + "print(fruits)" + ] + }, + { + "cell_type": "markdown", + "id": "d9a330d1-35ff-4843-90fa-dd19e6eb98e9", + "metadata": {}, + "source": [ + "### Writing Configs in a Specified Format\n", + "\n", + "Each of the `config` tool's classes provide methods that write configs of their format. With the `fruits` object, which is an instance of `INIConfig`, INI configs are written. `dump()` is one of these methods, which writes the config stored in the object to a file specified by providing the `path` parameter with a Path object. If `path` is `None`, the config is written to `stdout`.\n", + "" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "id": "7cd35285-9eb9-4796-b1d9-cf84160c0c0e", + "metadata": {}, + "outputs": [], + "source": [ + "fruits.dump(\n", + " path=Path('tmp/fruits.ini')\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "0c0170d2-16ab-4b61-a3e5-2840b4f59271", + "metadata": {}, + "source": [ + "Below we can see that the config was written in the INI format at the specified path.\n", + "" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "id": "f2ce9e59-f03c-4203-b929-87b4c0eae9de", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[fruit count]\n", + "apples = 3\n", + "grapes = 15\n", + "kiwis = 2\n" + ] + } + ], + "source": [ + "%%bash\n", + "cat tmp/fruits.ini" + ] + }, + { + "cell_type": "markdown", + "id": "83a9d6b2-161f-4bc0-891c-477bd5c7a013", + "metadata": {}, + "source": [ + "To write a config that is not stored in the object, the `dump_dict()` method is used. This method takes a config in the form of a `dict` and, like `dump()`, writes the config in the INI format to `stdout` if `path` is `None` or to the path that a Path object indicates.\n", + "" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "id": "b896632e-6a5a-4756-a32b-0630bc02d504", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[fruit count]\n", + "oranges = 4\n", + "blueberries = 9\n" + ] + } + ], + "source": [ + "other_fruits = {'fruit count':{'oranges':4, 'blueberries':9}}\n", + "fruits.dump_dict(\n", + " cfg=other_fruits,\n", + " path=None\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "945d1e71-2704-49de-9a7c-dc8bf58c12fe", + "metadata": {}, + "source": [ + "### Updating Values\n", + "\n", + "The `update_from()` method adds new or updated key-value pairs to the stored config, and these are provided as a dictionary via the `src` parameter. \n", + "" + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "id": "7a3f8247-75f6-4cc6-9a9d-7e69896120e7", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[fruit count]\n", + "apples = 3\n", + "grapes = 15\n", + "kiwis = 4\n", + "raspberries = 12\n" + ] + } + ], + "source": [ + "fruits.update_from(\n", + " src={'fruit count':{'kiwis': '4', 'raspberries': '12'}}\n", + ")\n", + "print(fruits)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.7" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/notebooks/exp-config-cb.ipynb b/notebooks/exp-config-cb.ipynb new file mode 100644 index 000000000..823478052 --- /dev/null +++ b/notebooks/exp-config-cb.ipynb @@ -0,0 +1,403 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "46a75a26-d2b7-4e44-8c35-01851975dd69", + "metadata": {}, + "source": [ + "# Building and Validating an Experiment Configuration\n", + "\n", + "
Note: This notebook was tested using uwtools version 2.4.2.
\n", + "\n", + "This notebook demonstrates how to build up a configuration file for generating FV3 initial conditions (ICs) from a hierarchy of smaller, purpose-specific files; dereferencing Jinja2 expressions in the configuration; and validating the final configuration to check for potential errors. A larger, more complex experimental setup could be built up by applying similar techniques.\n", + "" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "3a872d35-99f1-434c-927e-5c8fee3f0f2d", + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import datetime, timedelta\n", + "from uwtools.api.config import get_yaml_config\n", + "from uwtools.api import chgres_cube\n", + "from uwtools.api.logging import use_uwtools_logger\n", + "\n", + "use_uwtools_logger()" + ] + }, + { + "cell_type": "markdown", + "id": "8834e2e9-8c1e-4791-b110-2f1916f7289e", + "metadata": {}, + "source": [ + "We start with a base file that configures the `chgres_cube` component to generate FV3 ICs for use with the default physics suite, controlled by the `varmap_file` key:\n", + "" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "8b42bfca-18ae-48b1-a7ad-a1b76b9e24a8", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "task_make_ics:\n", + " chgres_cube:\n", + " execution:\n", + " executable: \"execdir/chgres_cube\"\n", + " namelist:\n", + " update_values:\n", + " config:\n", + " cycle_day: !int \"{{ cycle.strftime('%d') }}\"\n", + " cycle_hour: !int \"{{ cycle.strftime('%H') }}\"\n", + " cycle_mon: !int \"{{ cycle.strftime('%m') }}\"\n", + " convert_atm: true\n", + " convert_nst: true\n", + " convert_sfc: true\n", + " data_dir_input_grid: \"{{ task_make_ics.chgres_cube.rundir }}\"\n", + " external_model: \"GFS\"\n", + " input_type: \"gaussian_nemsio\"\n", + " mosaic_file_target_grid: \"path/to/example_mosaic.halo.nc\"\n", + " tg3_from_soil: false\n", + " tracers:\n", + " - sphum\n", + " - liq_wat\n", + " tracers_input:\n", + " - spfh\n", + " - clwmr\n", + " varmap_file: \"{{ user.PARMdir }}/ufs_utils/varmap_tables/GFSphys_var_map.txt\"\n", + " vcoord_file_target_grid: \"path/to/global_hyblev.165.txt\"\n", + " rundir: '{{ workflow.EXPTDIR }}/make_ics'\n" + ] + } + ], + "source": [ + "%%bash\n", + "cat fixtures/exp-config/base-file.yaml" + ] + }, + { + "cell_type": "markdown", + "id": "dece324c-ee0e-4c0b-9913-79d63c36ec4e", + "metadata": {}, + "source": [ + "To produce ICs compatible with the FV3_RAP physics suite instead, this partial configuration can be used to update the base:\n", + "" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "e7fbe2c2-90af-446c-b398-621d91c763c9", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "task_make_ics:\n", + " chgres_cube:\n", + " namelist:\n", + " update_values:\n", + " config:\n", + " varmap_file: \"{{ user.PARMdir }}/ufs_utils/varmap_tables/GSDphys_var_map.txt\"\n" + ] + } + ], + "source": [ + "%%bash\n", + "cat fixtures/exp-config/fv3-rap-physics.yaml" + ] + }, + { + "cell_type": "markdown", + "id": "cfe8bf85-7f11-40d3-af95-f60ddf12318f", + "metadata": {}, + "source": [ + "User- and experiment-specific values can be supplied via a third configuration file:\n", + "" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "d3bbf762-b4fc-49d8-90e4-e7851c9da49a", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "user:\n", + " ACCOUNT: zrtrr\n", + " MACHINE: hera\n", + " PARMdir: /path/to/ufs-srweather-app/parm\n", + "workflow: \n", + " EXPTDIR: /path/to/my/output\n" + ] + } + ], + "source": [ + "%%bash\n", + "cat fixtures/exp-config/user.yaml" + ] + }, + { + "cell_type": "markdown", + "id": "a3d1ca1f-1750-4a67-94d4-99d2c3c9db91", + "metadata": {}, + "source": [ + "Structuring the configuration as a hierarchy of increasing specificity provides a better user experience through separation of concerns: Users can see why certain values are changing, and can mix together app-supplied fragments with known-good values into larger experiment configurations.\n", + "\n", + "Here, we start by instantiating a `YAMLConfig` object from the most general base config file, which contains unrendered Jinja2 expressions and is missing certain user- and experiment-specific values:\n", + "" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "4bda78dc-33ee-4a23-82a8-271b40abca7b", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "task_make_ics:\n", + " chgres_cube:\n", + " execution:\n", + " executable: execdir/chgres_cube\n", + " namelist:\n", + " update_values:\n", + " config:\n", + " cycle_day: !int '{{ cycle.strftime(''%d'') }}'\n", + " cycle_hour: !int '{{ cycle.strftime(''%H'') }}'\n", + " cycle_mon: !int '{{ cycle.strftime(''%m'') }}'\n", + " convert_atm: true\n", + " convert_nst: true\n", + " convert_sfc: true\n", + " data_dir_input_grid: '{{ task_make_ics.chgres_cube.rundir }}'\n", + " external_model: GFS\n", + " input_type: gaussian_nemsio\n", + " mosaic_file_target_grid: path/to/example_mosaic.halo.nc\n", + " tg3_from_soil: false\n", + " tracers:\n", + " - sphum\n", + " - liq_wat\n", + " tracers_input:\n", + " - spfh\n", + " - clwmr\n", + " varmap_file: '{{ user.PARMdir }}/ufs_utils/varmap_tables/GFSphys_var_map.txt'\n", + " vcoord_file_target_grid: path/to/global_hyblev.165.txt\n", + " rundir: '{{ workflow.EXPTDIR }}/make_ics'\n" + ] + } + ], + "source": [ + "experiment_config = get_yaml_config('fixtures/exp-config/base-file.yaml')\n", + "print(experiment_config)" + ] + }, + { + "cell_type": "markdown", + "id": "bc6e47f6-ed03-41ef-bacf-b1790b5bf56f", + "metadata": {}, + "source": [ + "Next, we define a list of additional config files, iterate over those, and update the base config with each, in turn. Note that, if the configs share any keys, the values from the update will override and replace existing ones. For example, the original `varmap_file:` path to file `GFSphys_var_map.txt` is updated with a path to file `GSDphys_var_map.txt`:\n", + "" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "e863d5ac-f727-4d91-a4bd-9bf813d35e6c", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "task_make_ics:\n", + " chgres_cube:\n", + " execution:\n", + " executable: execdir/chgres_cube\n", + " namelist:\n", + " update_values:\n", + " config:\n", + " cycle_day: !int '{{ cycle.strftime(''%d'') }}'\n", + " cycle_hour: !int '{{ cycle.strftime(''%H'') }}'\n", + " cycle_mon: !int '{{ cycle.strftime(''%m'') }}'\n", + " convert_atm: true\n", + " convert_nst: true\n", + " convert_sfc: true\n", + " data_dir_input_grid: '{{ task_make_ics.chgres_cube.rundir }}'\n", + " external_model: GFS\n", + " input_type: gaussian_nemsio\n", + " mosaic_file_target_grid: path/to/example_mosaic.halo.nc\n", + " tg3_from_soil: false\n", + " tracers:\n", + " - sphum\n", + " - liq_wat\n", + " tracers_input:\n", + " - spfh\n", + " - clwmr\n", + " varmap_file: '{{ user.PARMdir }}/ufs_utils/varmap_tables/GSDphys_var_map.txt'\n", + " vcoord_file_target_grid: path/to/global_hyblev.165.txt\n", + " rundir: '{{ workflow.EXPTDIR }}/make_ics'\n", + "user:\n", + " ACCOUNT: zrtrr\n", + " MACHINE: hera\n", + " PARMdir: /path/to/ufs-srweather-app/parm\n", + "workflow:\n", + " EXPTDIR: /path/to/my/output\n" + ] + } + ], + "source": [ + "config_files = [\n", + " 'fixtures/exp-config/fv3-rap-physics.yaml',\n", + " 'fixtures/exp-config/user.yaml'\n", + "]\n", + "for config_file in config_files:\n", + " config = get_yaml_config(config_file)\n", + " experiment_config.update_from(config)\n", + "\n", + "print(experiment_config)" + ] + }, + { + "cell_type": "markdown", + "id": "aa43fe3c-6d35-49c2-b2fc-20c178ed30c3", + "metadata": {}, + "source": [ + "Once the hierarchy of configs is merged, we call the `dereference()` method to render Jinja2 expressions into final values. Keys like `varmap_file:` and `rundir:` have their values rendered using references to the `PARMdir` and `EXPTDIR` keys in the `user` and `workflow` sections, respectively. Expressions with cycle-specific references remain, and will be rendered at run time.\n", + "" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "0fc07baf-1094-4d8c-a51e-c4e541ae4df6", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "task_make_ics:\n", + " chgres_cube:\n", + " execution:\n", + " executable: execdir/chgres_cube\n", + " namelist:\n", + " update_values:\n", + " config:\n", + " cycle_day: !int '{{ cycle.strftime(''%d'') }}'\n", + " cycle_hour: !int '{{ cycle.strftime(''%H'') }}'\n", + " cycle_mon: !int '{{ cycle.strftime(''%m'') }}'\n", + " convert_atm: true\n", + " convert_nst: true\n", + " convert_sfc: true\n", + " data_dir_input_grid: /path/to/my/output/make_ics\n", + " external_model: GFS\n", + " input_type: gaussian_nemsio\n", + " mosaic_file_target_grid: path/to/example_mosaic.halo.nc\n", + " tg3_from_soil: false\n", + " tracers:\n", + " - sphum\n", + " - liq_wat\n", + " tracers_input:\n", + " - spfh\n", + " - clwmr\n", + " varmap_file: /path/to/ufs-srweather-app/parm/ufs_utils/varmap_tables/GSDphys_var_map.txt\n", + " vcoord_file_target_grid: path/to/global_hyblev.165.txt\n", + " rundir: /path/to/my/output/make_ics\n", + "user:\n", + " ACCOUNT: zrtrr\n", + " MACHINE: hera\n", + " PARMdir: /path/to/ufs-srweather-app/parm\n", + "workflow:\n", + " EXPTDIR: /path/to/my/output\n" + ] + } + ], + "source": [ + "experiment_config.dereference()\n", + "print(experiment_config)" + ] + }, + { + "cell_type": "markdown", + "id": "5103465d-1a64-4b66-b9cb-09910633f8e1", + "metadata": {}, + "source": [ + "To catch potential configuration errors as early as possible, the `uwtools` driver for `chgres_cube` is called to validate the config using a built-in schema. The driver requires a `cycle` parameter with a `datetime` value, and the current time is used here. As the output shows, no schema-validation errors are found\n", + "" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "f9f0c0df-821e-492a-9669-3ac5e43e3151", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[2024-11-19T23:14:15] INFO Validating config against internal schema: chgres-cube\n", + "[2024-11-19T23:14:15] INFO 0 UW schema-validation errors found in chgres_cube config\n", + "[2024-11-19T23:14:15] INFO Validating config against internal schema: platform\n", + "[2024-11-19T23:14:15] INFO 0 UW schema-validation errors found in platform config\n", + "[2024-11-19T23:14:15] INFO 20241120 05:14:15 chgres_cube valid schema: State: Ready\n" + ] + }, + { + "data": { + "text/plain": [ + "Asset(ref=None, ready=. at 0xffff685ad6c0>)" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "driver = chgres_cube.ChgresCube(\n", + " config=experiment_config,\n", + " key_path=['task_make_ics'],\n", + " cycle=datetime.now(),\n", + " leadtime=timedelta(hours=6),\n", + ")\n", + "driver.validate()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.7" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/notebooks/fixtures/config/alt-config.nml b/notebooks/fixtures/config/alt-config.nml new file mode 100644 index 000000000..487670727 --- /dev/null +++ b/notebooks/fixtures/config/alt-config.nml @@ -0,0 +1,5 @@ +&memo + sender_id = "{{ id }}" + message = "{{ greeting }}, {{ recipient }}!" + sent = .TRUE. +/ diff --git a/notebooks/fixtures/config/base-config.nml b/notebooks/fixtures/config/base-config.nml new file mode 100644 index 000000000..9f30c6505 --- /dev/null +++ b/notebooks/fixtures/config/base-config.nml @@ -0,0 +1,5 @@ +&memo + sender_id = "{{ id }}" + message = "{{ greeting }}, {{ recipient }}!" + sent = .FALSE. +/ diff --git a/notebooks/fixtures/config/fruit-config.ini b/notebooks/fixtures/config/fruit-config.ini new file mode 100644 index 000000000..08973afb0 --- /dev/null +++ b/notebooks/fixtures/config/fruit-config.ini @@ -0,0 +1,4 @@ +[fruit count] +apples = 3 +grapes = {{ grape_count }} +kiwis = 2 diff --git a/notebooks/fixtures/config/get-config.yaml b/notebooks/fixtures/config/get-config.yaml new file mode 100644 index 000000000..be310733b --- /dev/null +++ b/notebooks/fixtures/config/get-config.yaml @@ -0,0 +1,2 @@ +greeting: Hello +recipient: World diff --git a/notebooks/fixtures/config/keys-config.yaml b/notebooks/fixtures/config/keys-config.yaml new file mode 100644 index 000000000..a8f524b40 --- /dev/null +++ b/notebooks/fixtures/config/keys-config.yaml @@ -0,0 +1,4 @@ +keys: + to: + config: + message: "{{ greeting }}, {{ recipient }}!" diff --git a/notebooks/fixtures/config/validate.jsonschema b/notebooks/fixtures/config/validate.jsonschema new file mode 100644 index 000000000..8593430f8 --- /dev/null +++ b/notebooks/fixtures/config/validate.jsonschema @@ -0,0 +1,15 @@ +{ + "additionalProperties": false, + "properties": { + "greeting": { + "type": "string" + }, + "recipient": { + "type": "string" + } + }, + "required": [ + "greeting", "recipient" + ], + "type": "object" +} diff --git a/notebooks/fixtures/exp-config/base-file.yaml b/notebooks/fixtures/exp-config/base-file.yaml new file mode 100644 index 000000000..e686a9480 --- /dev/null +++ b/notebooks/fixtures/exp-config/base-file.yaml @@ -0,0 +1,27 @@ +task_make_ics: + chgres_cube: + execution: + executable: "execdir/chgres_cube" + namelist: + update_values: + config: + cycle_day: !int "{{ cycle.strftime('%d') }}" + cycle_hour: !int "{{ cycle.strftime('%H') }}" + cycle_mon: !int "{{ cycle.strftime('%m') }}" + convert_atm: true + convert_nst: true + convert_sfc: true + data_dir_input_grid: "{{ task_make_ics.chgres_cube.rundir }}" + external_model: "GFS" + input_type: "gaussian_nemsio" + mosaic_file_target_grid: "path/to/example_mosaic.halo.nc" + tg3_from_soil: false + tracers: + - sphum + - liq_wat + tracers_input: + - spfh + - clwmr + varmap_file: "{{ user.PARMdir }}/ufs_utils/varmap_tables/GFSphys_var_map.txt" + vcoord_file_target_grid: "path/to/global_hyblev.165.txt" + rundir: '{{ workflow.EXPTDIR }}/make_ics' diff --git a/notebooks/fixtures/exp-config/fv3-rap-physics.yaml b/notebooks/fixtures/exp-config/fv3-rap-physics.yaml new file mode 100644 index 000000000..eff7ed21b --- /dev/null +++ b/notebooks/fixtures/exp-config/fv3-rap-physics.yaml @@ -0,0 +1,6 @@ +task_make_ics: + chgres_cube: + namelist: + update_values: + config: + varmap_file: "{{ user.PARMdir }}/ufs_utils/varmap_tables/GSDphys_var_map.txt" diff --git a/notebooks/fixtures/exp-config/user.yaml b/notebooks/fixtures/exp-config/user.yaml new file mode 100644 index 000000000..c8815fc35 --- /dev/null +++ b/notebooks/fixtures/exp-config/user.yaml @@ -0,0 +1,6 @@ +user: + ACCOUNT: zrtrr + MACHINE: hera + PARMdir: /path/to/ufs-srweather-app/parm +workflow: + EXPTDIR: /path/to/my/output diff --git a/notebooks/fixtures/fs/copy-config.yaml b/notebooks/fixtures/fs/copy-config.yaml new file mode 100644 index 000000000..ed3879d58 --- /dev/null +++ b/notebooks/fixtures/fs/copy-config.yaml @@ -0,0 +1,3 @@ +file1-copy.nml: fixtures/fs/file1.nml +data/file2-copy.txt: fixtures/fs/file2.txt +data/file3-copy.csv: fixtures/fs/file3.csv diff --git a/notebooks/fixtures/fs/copy-keys-config.yaml b/notebooks/fixtures/fs/copy-keys-config.yaml new file mode 100644 index 000000000..9c4cdc673 --- /dev/null +++ b/notebooks/fixtures/fs/copy-keys-config.yaml @@ -0,0 +1,6 @@ +files: + to: + copy: + file1-copy.nml: fixtures/fs/file1.nml + data/file2-copy.txt: fixtures/fs/file2.txt + data/file3-copy.csv: fixtures/fs/file3.csv diff --git a/notebooks/fixtures/fs/dir-config.yaml b/notebooks/fixtures/fs/dir-config.yaml new file mode 100644 index 000000000..1faec9f52 --- /dev/null +++ b/notebooks/fixtures/fs/dir-config.yaml @@ -0,0 +1,3 @@ +makedirs: + - foo + - bar/baz diff --git a/notebooks/fixtures/fs/dir-keys-config.yaml b/notebooks/fixtures/fs/dir-keys-config.yaml new file mode 100644 index 000000000..c18ae1478 --- /dev/null +++ b/notebooks/fixtures/fs/dir-keys-config.yaml @@ -0,0 +1,6 @@ +path: + to: + dirs: + makedirs: + - foo/bar + - baz diff --git a/notebooks/fixtures/fs/file1.nml b/notebooks/fixtures/fs/file1.nml new file mode 100644 index 000000000..f85ada67c --- /dev/null +++ b/notebooks/fixtures/fs/file1.nml @@ -0,0 +1,6 @@ +&animal + name = 'zebra' + num_legs = 4 + diet_type = 'herbivore' + location = 'Africa' +/ diff --git a/notebooks/fixtures/fs/file2.txt b/notebooks/fixtures/fs/file2.txt new file mode 100644 index 000000000..36fd2ad7e --- /dev/null +++ b/notebooks/fixtures/fs/file2.txt @@ -0,0 +1 @@ +Fun Fact: A group of zebras is called a "zeal" or a "dazzle". diff --git a/notebooks/fixtures/fs/file3.csv b/notebooks/fixtures/fs/file3.csv new file mode 100644 index 000000000..6556f0598 --- /dev/null +++ b/notebooks/fixtures/fs/file3.csv @@ -0,0 +1,4 @@ +id,location,age +B524,Botswana,12 +N290,Namibia,4 +K296,Kenya,23 diff --git a/notebooks/fixtures/fs/link-config.yaml b/notebooks/fixtures/fs/link-config.yaml new file mode 100644 index 000000000..401e77fda --- /dev/null +++ b/notebooks/fixtures/fs/link-config.yaml @@ -0,0 +1,3 @@ +file1-link.nml: fixtures/fs/file1.nml +file2-link.txt: fixtures/fs/file2.txt +data/file3-link.csv: fixtures/fs/file3.csv diff --git a/notebooks/fixtures/fs/link-keys-config.yaml b/notebooks/fixtures/fs/link-keys-config.yaml new file mode 100644 index 000000000..6abffc60c --- /dev/null +++ b/notebooks/fixtures/fs/link-keys-config.yaml @@ -0,0 +1,6 @@ +files: + to: + link: + file1-link.nml: fixtures/fs/file1.nml + file2-link.txt: fixtures/fs/file2.txt + data/file3-link.csv: fixtures/fs/file3.csv diff --git a/notebooks/fixtures/rocoto/ent-cs-workflow.yaml b/notebooks/fixtures/rocoto/ent-cs-workflow.yaml new file mode 100644 index 000000000..4aad5d5b8 --- /dev/null +++ b/notebooks/fixtures/rocoto/ent-cs-workflow.yaml @@ -0,0 +1,16 @@ +workflow: + attrs: + realtime: false + scheduler: slurm + cycledef: + - spec: 202410290000 202410300000 06:00:00 + entities: + LOG: "@Y-@m-@d/test@X.log" + log: + cyclestr: + value: logs/&LOG; + tasks: + task_greet: + command: echo Hello, World! + cores: 1 + walltime: 00:00:10 diff --git a/notebooks/fixtures/rocoto/ent-workflow.yaml b/notebooks/fixtures/rocoto/ent-workflow.yaml new file mode 100644 index 000000000..e6020aabb --- /dev/null +++ b/notebooks/fixtures/rocoto/ent-workflow.yaml @@ -0,0 +1,14 @@ +workflow: + attrs: + realtime: false + scheduler: slurm + cycledef: + - spec: 202410290000 202410300000 06:00:00 + entities: + LOG: "2024-10-29/test06:00:00.log" + log: logs/&LOG; + tasks: + task_greet: + command: echo Hello, World! + cores: 1 + walltime: 00:00:10 diff --git a/notebooks/fixtures/rocoto/err-workflow.xml b/notebooks/fixtures/rocoto/err-workflow.xml new file mode 100644 index 000000000..5f8f61473 --- /dev/null +++ b/notebooks/fixtures/rocoto/err-workflow.xml @@ -0,0 +1,10 @@ + + + logs/test.log + + 1 + 00:00:10 + echo Hello, World! + greet + + diff --git a/notebooks/fixtures/rocoto/err-workflow.yaml b/notebooks/fixtures/rocoto/err-workflow.yaml new file mode 100644 index 000000000..a6e37c180 --- /dev/null +++ b/notebooks/fixtures/rocoto/err-workflow.yaml @@ -0,0 +1,9 @@ +workflow: + attrs: + scheduler: slurm + cycledef: + - spec: 202410290000 202410300000 06:00:00 + tasks: + task_greet: + cores: 1 + walltime: 00:00:10 diff --git a/notebooks/fixtures/rocoto/meta-nested-workflow.yaml b/notebooks/fixtures/rocoto/meta-nested-workflow.yaml new file mode 100644 index 000000000..55a716d5f --- /dev/null +++ b/notebooks/fixtures/rocoto/meta-nested-workflow.yaml @@ -0,0 +1,18 @@ +workflow: + attrs: + realtime: false + scheduler: slurm + cycledef: + - spec: 202410290000 202410300000 06:00:00 + log: logs/test.log + tasks: + metatask_process: + var: + process: bake cool store + metatask_process_food: + var: + food: cookies cakes + task_#process#_#food#: + command: "echo It's time to #process# the #food#." + nodes: 1:ppn=4 + walltime: 00:00:30 diff --git a/notebooks/fixtures/rocoto/meta-workflow.yaml b/notebooks/fixtures/rocoto/meta-workflow.yaml new file mode 100644 index 000000000..1071b2653 --- /dev/null +++ b/notebooks/fixtures/rocoto/meta-workflow.yaml @@ -0,0 +1,16 @@ +workflow: + attrs: + realtime: false + scheduler: slurm + cycledef: + - spec: 202410290000 202410300000 06:00:00 + log: logs/test.log + tasks: + metatask_breakfast: + var: + food: biscuits OJ hashbrowns + prepare: bake pour fry + task_#food#: + command: "echo It's time for breakfast, #prepare# the #food#!" + cores: 1 + walltime: 00:00:03 diff --git a/notebooks/fixtures/rocoto/simple-workflow.xml b/notebooks/fixtures/rocoto/simple-workflow.xml new file mode 100644 index 000000000..62326060b --- /dev/null +++ b/notebooks/fixtures/rocoto/simple-workflow.xml @@ -0,0 +1,11 @@ + + + 202410290000 202410300000 06:00:00 + logs/test.log + + 1 + 00:00:10 + echo Hello, World! + greet + + diff --git a/notebooks/fixtures/rocoto/simple-workflow.yaml b/notebooks/fixtures/rocoto/simple-workflow.yaml new file mode 100644 index 000000000..47e7c55b6 --- /dev/null +++ b/notebooks/fixtures/rocoto/simple-workflow.yaml @@ -0,0 +1,12 @@ +workflow: + attrs: + realtime: false + scheduler: slurm + cycledef: + - spec: 202410290000 202410300000 06:00:00 + log: logs/test.log + tasks: + task_greet: + command: echo Hello, World! + cores: 1 + walltime: 00:00:10 diff --git a/notebooks/fixtures/rocoto/tasks-deps-workflow.yaml b/notebooks/fixtures/rocoto/tasks-deps-workflow.yaml new file mode 100644 index 000000000..3161e86c5 --- /dev/null +++ b/notebooks/fixtures/rocoto/tasks-deps-workflow.yaml @@ -0,0 +1,31 @@ +workflow: + attrs: + realtime: false + scheduler: slurm + cycledef: + - spec: 202410290000 202410300000 06:00:00 + log: logs/test.log + tasks: + task_bacon: + command: "echo Cooking bacon..." + cores: 1 + walltime: 00:00:10 + task_eggs: + command: "echo Cooking eggs..." + nodes: 1:ppn=4 + walltime: 00:00:10 + dependency: + datadep: + value: eggs_recipe.txt + task_serve: + command: "echo Serving breakfast..." + cores: 2 + walltime: 00:00:01 + dependency: + and: + taskdep_eggs: + attrs: + task: bacon + taskdep_bacon: + attrs: + task: eggs diff --git a/notebooks/fixtures/rocoto/tasks-workflow.yaml b/notebooks/fixtures/rocoto/tasks-workflow.yaml new file mode 100644 index 000000000..f415d700d --- /dev/null +++ b/notebooks/fixtures/rocoto/tasks-workflow.yaml @@ -0,0 +1,16 @@ +workflow: + attrs: + realtime: false + scheduler: slurm + cycledef: + - spec: 202410290000 202410300000 06:00:00 + log: logs/test.log + tasks: + task_bacon: + command: "echo Cooking bacon..." + cores: 1 + walltime: 00:00:10 + task_eggs: + command: "echo Cooking eggs..." + nodes: 1:ppn=4 + walltime: 00:00:10 diff --git a/notebooks/fixtures/template/render-complete-1.yaml b/notebooks/fixtures/template/render-complete-1.yaml new file mode 100644 index 000000000..2840700bf --- /dev/null +++ b/notebooks/fixtures/template/render-complete-1.yaml @@ -0,0 +1,3 @@ +user: + name: John Doe + favorite_food: burritos diff --git a/notebooks/fixtures/template/render-complete-2.yaml b/notebooks/fixtures/template/render-complete-2.yaml new file mode 100644 index 000000000..df6d5f0bd --- /dev/null +++ b/notebooks/fixtures/template/render-complete-2.yaml @@ -0,0 +1,3 @@ +user: + name: Jane Doe + favorite_food: tamales diff --git a/notebooks/fixtures/template/render-template.yaml b/notebooks/fixtures/template/render-template.yaml new file mode 100644 index 000000000..78ddc7977 --- /dev/null +++ b/notebooks/fixtures/template/render-template.yaml @@ -0,0 +1,3 @@ +user: + name: {{ first }} {{ last }} + favorite_food: {{ food }} diff --git a/notebooks/fixtures/template/render-values.yaml b/notebooks/fixtures/template/render-values.yaml new file mode 100644 index 000000000..5ae0cc34e --- /dev/null +++ b/notebooks/fixtures/template/render-values.yaml @@ -0,0 +1,3 @@ +first: John +last: Doe +food: burritos diff --git a/notebooks/fixtures/template/translate-complete.yaml b/notebooks/fixtures/template/translate-complete.yaml new file mode 100644 index 000000000..7841b3c8c --- /dev/null +++ b/notebooks/fixtures/template/translate-complete.yaml @@ -0,0 +1,3 @@ +flowers: + roses: {{ color1 }} + violets: {{ color2 }} diff --git a/notebooks/fixtures/template/translate-template.yaml b/notebooks/fixtures/template/translate-template.yaml new file mode 100644 index 000000000..0eab2bc00 --- /dev/null +++ b/notebooks/fixtures/template/translate-template.yaml @@ -0,0 +1,3 @@ +flowers: + roses: @[color1] + violets: @[color2] diff --git a/notebooks/fs.ipynb b/notebooks/fs.ipynb new file mode 100644 index 000000000..e07f078e0 --- /dev/null +++ b/notebooks/fs.ipynb @@ -0,0 +1,1586 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "2c013e12-a6c6-4786-aa50-900f7da77e6b", + "metadata": {}, + "source": [ + "# File System Tool\n", + "\n", + "The `uwtools` API's `fs` module provides functions to copy and link files as well as create directories. \n", + "\n", + "For more information, please see the uwtools.api.fs Read the Docs page.\n", + "\n", + "## Table of Contents\n", + "\n", + "* [Copying Files](#Copying-Files)\n", + " * [Failing to copy](#Failing-to-copy)\n", + " * [Using the `keys` parameter](#Using-the-keys-parameter)\n", + " * [Using the `Copier` class](#Using-the-Copier-class)\n", + "* [Linking Files](#Linking-files)\n", + " * [Failing to link](#Failing-to-link)\n", + " * [Using the `keys` parameter](#Using-the-keys-parameter-)\n", + " * [Using the `Linker` class](#Using-the-Linker-class)\n", + "* [Creating directories](#Creating-directories)\n", + " * [Using the `keys` parameter](#Using-the-keys-parameter--)\n", + " * [Using the `MakeDirs` class](#Using-the-MakeDirs-class)" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "77864d80-e6f4-48c2-a5d5-88fc512106a9", + "metadata": {}, + "outputs": [], + "source": [ + "from pathlib import Path\n", + "from shutil import rmtree\n", + "from uwtools.api import fs\n", + "from uwtools.api.logging import use_uwtools_logger\n", + "\n", + "use_uwtools_logger()" + ] + }, + { + "cell_type": "markdown", + "id": "354cf476-720e-4352-8954-0752fd05250f", + "metadata": {}, + "source": [ + "## Copying Files\n", + "\n", + "The `copy()` function copies files, automatically creating parent directories as needed." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "834654da-dfa9-4997-bcc5-846420381b18", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Help on function copy in module uwtools.api.fs:\n", + "\n", + "copy(config: Union[dict, str, pathlib.Path, NoneType] = None, target_dir: Union[str, pathlib.Path, NoneType] = None, cycle: Optional[datetime.datetime] = None, leadtime: Optional[datetime.timedelta] = None, keys: Optional[list[str]] = None, dry_run: bool = False, stdin_ok: bool = False) -> bool\n", + " Copy files.\n", + "\n", + " :param config: YAML-file path, or ``dict`` (read ``stdin`` if missing or ``None``).\n", + " :param target_dir: Path to target directory.\n", + " :param cycle: A datetime object to make available for use in the config.\n", + " :param leadtime: A timedelta object to make available for use in the config.\n", + " :param keys: YAML keys leading to file dst/src block.\n", + " :param dry_run: Do not copy files.\n", + " :param stdin_ok: OK to read from ``stdin``?\n", + " :return: ``True`` if all copies were created.\n", + "\n" + ] + } + ], + "source": [ + "help(fs.copy)" + ] + }, + { + "cell_type": "markdown", + "id": "0585971b-47c6-48aa-9f1f-d5890cbb2061", + "metadata": {}, + "source": [ + "Files to be copied are specified by a mapping from keys destination-pathname keys to source-pathname values, either in a YAML file or a a Python ``dict``." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "a959522f-d769-48c6-918d-d42776b3600a", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "file1-copy.nml: fixtures/fs/file1.nml\n", + "data/file2-copy.txt: fixtures/fs/file2.txt\n", + "data/file3-copy.csv: fixtures/fs/file3.csv\n" + ] + } + ], + "source": [ + "%%bash\n", + "cat fixtures/fs/copy-config.yaml" + ] + }, + { + "cell_type": "markdown", + "id": "5885fd76-78de-4814-ad7b-dfd6df18a07d", + "metadata": {}, + "source": [ + "With these instructions, `copy()` creates a copy of each given file with the given name and in the given subdirectory. Copies are created in the directory indicated by `target_dir`. Paths can be provided either as a string or Path object. Any directories in the targeted paths for copying will be created if they don't already exist. `True` is returned upon a successful copy." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "a6aff6e3-815c-496e-81d7-d8756be9c232", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[2024-11-19T23:14:42] INFO Validating config against internal schema: files-to-stage\n", + "[2024-11-19T23:14:42] INFO 0 UW schema-validation errors found in fs config\n", + "[2024-11-19T23:14:42] INFO File copies: Initial state: Not Ready\n", + "[2024-11-19T23:14:42] INFO File copies: Checking requirements\n", + "[2024-11-19T23:14:42] INFO Copy fixtures/fs/file1.nml -> tmp/copy-target/file1-copy.nml: Initial state: Not Ready\n", + "[2024-11-19T23:14:42] INFO Copy fixtures/fs/file1.nml -> tmp/copy-target/file1-copy.nml: Checking requirements\n", + "[2024-11-19T23:14:42] INFO Copy fixtures/fs/file1.nml -> tmp/copy-target/file1-copy.nml: Requirement(s) ready\n", + "[2024-11-19T23:14:42] INFO Copy fixtures/fs/file1.nml -> tmp/copy-target/file1-copy.nml: Executing\n", + "[2024-11-19T23:14:42] INFO Copy fixtures/fs/file1.nml -> tmp/copy-target/file1-copy.nml: Final state: Ready\n", + "[2024-11-19T23:14:42] INFO Copy fixtures/fs/file2.txt -> tmp/copy-target/data/file2-copy.txt: Initial state: Not Ready\n", + "[2024-11-19T23:14:42] INFO Copy fixtures/fs/file2.txt -> tmp/copy-target/data/file2-copy.txt: Checking requirements\n", + "[2024-11-19T23:14:42] INFO Copy fixtures/fs/file2.txt -> tmp/copy-target/data/file2-copy.txt: Requirement(s) ready\n", + "[2024-11-19T23:14:42] INFO Copy fixtures/fs/file2.txt -> tmp/copy-target/data/file2-copy.txt: Executing\n", + "[2024-11-19T23:14:42] INFO Copy fixtures/fs/file2.txt -> tmp/copy-target/data/file2-copy.txt: Final state: Ready\n", + "[2024-11-19T23:14:42] INFO Copy fixtures/fs/file3.csv -> tmp/copy-target/data/file3-copy.csv: Initial state: Not Ready\n", + "[2024-11-19T23:14:42] INFO Copy fixtures/fs/file3.csv -> tmp/copy-target/data/file3-copy.csv: Checking requirements\n", + "[2024-11-19T23:14:42] INFO Copy fixtures/fs/file3.csv -> tmp/copy-target/data/file3-copy.csv: Requirement(s) ready\n", + "[2024-11-19T23:14:42] INFO Copy fixtures/fs/file3.csv -> tmp/copy-target/data/file3-copy.csv: Executing\n", + "[2024-11-19T23:14:42] INFO Copy fixtures/fs/file3.csv -> tmp/copy-target/data/file3-copy.csv: Final state: Ready\n", + "[2024-11-19T23:14:42] INFO File copies: Final state: Ready\n" + ] + }, + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "rmtree(\"tmp/copy-target\", ignore_errors=True)\n", + "fs.copy(\n", + " config=\"fixtures/fs/copy-config.yaml\",\n", + " target_dir=Path(\"tmp/copy-target\")\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "5c05369f-3cf6-4576-8689-df98ed9d151d", + "metadata": {}, + "source": [ + "Examining the target directory, we can see that the copies of the files have been made with their specified names and in their specified directories." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "e67fdb49-beef-4006-9e36-1a22829f21fc", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[01;34mtmp/copy-target\u001b[0m\n", + "├── \u001b[01;34mdata\u001b[0m\n", + "│   ├── \u001b[00mfile2-copy.txt\u001b[0m\n", + "│   └── \u001b[00mfile3-copy.csv\u001b[0m\n", + "└── \u001b[00mfile1-copy.nml\u001b[0m\n", + "\n", + "1 directory, 3 files\n" + ] + } + ], + "source": [ + "%%bash\n", + "tree tmp/copy-target" + ] + }, + { + "cell_type": "markdown", + "id": "90e17445-3d87-4894-8211-8c737f7579d6", + "metadata": {}, + "source": [ + "### Failing to copy\n", + "\n", + "A configuration can be provided as a dictionary instead as this example demonstrates. However, `missing-file.nml` does not exist. The function provides a warning and returns `False`." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "b1fa6662-c4f3-4f7a-9b5d-8ee258cd6e0e", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[2024-11-19T23:14:42] INFO Validating config against internal schema: files-to-stage\n", + "[2024-11-19T23:14:42] INFO 0 UW schema-validation errors found in fs config\n", + "[2024-11-19T23:14:42] INFO File copies: Initial state: Not Ready\n", + "[2024-11-19T23:14:42] INFO File copies: Checking requirements\n", + "[2024-11-19T23:14:42] INFO Copy fixtures/fs/missing-file.nml -> tmp/copy-target/missing-copy.nml: Initial state: Not Ready\n", + "[2024-11-19T23:14:42] INFO Copy fixtures/fs/missing-file.nml -> tmp/copy-target/missing-copy.nml: Checking requirements\n", + "[2024-11-19T23:14:42] WARNING File fixtures/fs/missing-file.nml: State: Not Ready (external asset)\n", + "[2024-11-19T23:14:42] INFO Copy fixtures/fs/missing-file.nml -> tmp/copy-target/missing-copy.nml: Requirement(s) not ready\n", + "[2024-11-19T23:14:42] WARNING Copy fixtures/fs/missing-file.nml -> tmp/copy-target/missing-copy.nml: Final state: Not Ready\n", + "[2024-11-19T23:14:42] WARNING File copies: Final state: Not Ready\n" + ] + }, + { + "data": { + "text/plain": [ + "False" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "fs.copy(\n", + " config={\"missing-copy.nml\":\"fixtures/fs/missing-file.nml\"},\n", + " target_dir=\"tmp/copy-target\"\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "6f93ced5-0953-462b-9739-e74333c94e64", + "metadata": {}, + "source": [ + "The missing copy does not appear in the target directory." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "ab122bde-f483-4981-8308-fc6d4a90e50d", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[01;34mtmp/copy-target\u001b[0m\n", + "├── \u001b[01;34mdata\u001b[0m\n", + "│   ├── \u001b[00mfile2-copy.txt\u001b[0m\n", + "│   └── \u001b[00mfile3-copy.csv\u001b[0m\n", + "└── \u001b[00mfile1-copy.nml\u001b[0m\n", + "\n", + "1 directory, 3 files\n" + ] + } + ], + "source": [ + "%%bash\n", + "tree tmp/copy-target" + ] + }, + { + "cell_type": "markdown", + "id": "b2527839-c217-428d-a686-c684a682c0e8", + "metadata": {}, + "source": [ + "### Using the `keys` parameter\n", + "\n", + "Consider the following configuration, in which the destination/source mapping is not located at the top level of the configuration:" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "1f567844-ff8d-4e7f-87be-dffae9e15643", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "files:\n", + " to:\n", + " copy:\n", + " file1-copy.nml: fixtures/fs/file1.nml\n", + " data/file2-copy.txt: fixtures/fs/file2.txt\n", + " data/file3-copy.csv: fixtures/fs/file3.csv\n" + ] + } + ], + "source": [ + "%%bash\n", + "cat fixtures/fs/copy-keys-config.yaml" + ] + }, + { + "cell_type": "markdown", + "id": "5311866f-a1f5-4243-81a8-2c52172e091a", + "metadata": {}, + "source": [ + "Without additional information, `copy()` would raise a `UWConfigError` given this configuration. However, the list of keys leading to the destination/source mapping can be provided with the `keys` parameter:" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "dda3e407-a1a2-4b11-823a-3b6fdc39f67a", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[2024-11-19T23:14:42] INFO Validating config against internal schema: files-to-stage\n", + "[2024-11-19T23:14:42] INFO 0 UW schema-validation errors found in fs config\n", + "[2024-11-19T23:14:42] INFO File copies: Initial state: Not Ready\n", + "[2024-11-19T23:14:42] INFO File copies: Checking requirements\n", + "[2024-11-19T23:14:42] INFO Copy fixtures/fs/file1.nml -> tmp/copy-keys-target/file1-copy.nml: Initial state: Not Ready\n", + "[2024-11-19T23:14:42] INFO Copy fixtures/fs/file1.nml -> tmp/copy-keys-target/file1-copy.nml: Checking requirements\n", + "[2024-11-19T23:14:42] INFO Copy fixtures/fs/file1.nml -> tmp/copy-keys-target/file1-copy.nml: Requirement(s) ready\n", + "[2024-11-19T23:14:42] INFO Copy fixtures/fs/file1.nml -> tmp/copy-keys-target/file1-copy.nml: Executing\n", + "[2024-11-19T23:14:42] INFO Copy fixtures/fs/file1.nml -> tmp/copy-keys-target/file1-copy.nml: Final state: Ready\n", + "[2024-11-19T23:14:42] INFO Copy fixtures/fs/file2.txt -> tmp/copy-keys-target/data/file2-copy.txt: Initial state: Not Ready\n", + "[2024-11-19T23:14:42] INFO Copy fixtures/fs/file2.txt -> tmp/copy-keys-target/data/file2-copy.txt: Checking requirements\n", + "[2024-11-19T23:14:42] INFO Copy fixtures/fs/file2.txt -> tmp/copy-keys-target/data/file2-copy.txt: Requirement(s) ready\n", + "[2024-11-19T23:14:42] INFO Copy fixtures/fs/file2.txt -> tmp/copy-keys-target/data/file2-copy.txt: Executing\n", + "[2024-11-19T23:14:42] INFO Copy fixtures/fs/file2.txt -> tmp/copy-keys-target/data/file2-copy.txt: Final state: Ready\n", + "[2024-11-19T23:14:42] INFO Copy fixtures/fs/file3.csv -> tmp/copy-keys-target/data/file3-copy.csv: Initial state: Not Ready\n", + "[2024-11-19T23:14:42] INFO Copy fixtures/fs/file3.csv -> tmp/copy-keys-target/data/file3-copy.csv: Checking requirements\n", + "[2024-11-19T23:14:42] INFO Copy fixtures/fs/file3.csv -> tmp/copy-keys-target/data/file3-copy.csv: Requirement(s) ready\n", + "[2024-11-19T23:14:42] INFO Copy fixtures/fs/file3.csv -> tmp/copy-keys-target/data/file3-copy.csv: Executing\n", + "[2024-11-19T23:14:42] INFO Copy fixtures/fs/file3.csv -> tmp/copy-keys-target/data/file3-copy.csv: Final state: Ready\n", + "[2024-11-19T23:14:42] INFO File copies: Final state: Ready\n" + ] + }, + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "rmtree(\"tmp/copy-keys-target\", ignore_errors=True)\n", + "fs.copy(\n", + " config=\"fixtures/fs/copy-keys-config.yaml\",\n", + " target_dir=\"tmp/copy-keys-target\",\n", + " keys=[\"files\",\"to\",\"copy\"]\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "d62ae369-69f9-4003-b5d3-d7b5908f23af", + "metadata": {}, + "source": [ + "With this information provided, the copy is successful." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "59c67e22-fe98-4e74-8b0b-b40e24a804e8", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[01;34mtmp/copy-keys-target\u001b[0m\n", + "├── \u001b[01;34mdata\u001b[0m\n", + "│   ├── \u001b[00mfile2-copy.txt\u001b[0m\n", + "│   └── \u001b[00mfile3-copy.csv\u001b[0m\n", + "└── \u001b[00mfile1-copy.nml\u001b[0m\n", + "\n", + "1 directory, 3 files\n" + ] + } + ], + "source": [ + "%%bash\n", + "tree tmp/copy-keys-target" + ] + }, + { + "cell_type": "markdown", + "id": "1a1adba8-2daf-4fb6-b224-980b134f011c", + "metadata": {}, + "source": [ + "### Using the `Copier` class\n", + "\n", + "An alternative to using `copy()` is to instantiate a `Copier` object , then call its `go()` method." + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "7434dee7-fb52-4d9b-b2a1-d414165f3186", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Help on class Copier in module uwtools.fs:\n", + "\n", + "class Copier(FileStager)\n", + " | Copier(config: Union[dict, str, pathlib.Path, NoneType] = None, target_dir: Union[str, pathlib.Path, NoneType] = None, cycle: Optional[datetime.datetime] = None, leadtime: Optional[datetime.timedelta] = None, keys: Optional[list[str]] = None, dry_run: bool = False) -> None\n", + " |\n", + " | Stage files by copying.\n", + " |\n", + " | Method resolution order:\n", + " | Copier\n", + " | FileStager\n", + " | Stager\n", + " | abc.ABC\n", + " | builtins.object\n", + " |\n", + " | Methods defined here:\n", + " |\n", + " | go(self)\n", + " | Copy files.\n", + " |\n", + " | ----------------------------------------------------------------------\n", + " | Data and other attributes defined here:\n", + " |\n", + " | __abstractmethods__ = frozenset()\n", + " |\n", + " | ----------------------------------------------------------------------\n", + " | Methods inherited from Stager:\n", + " |\n", + " | __init__(self, config: Union[dict, str, pathlib.Path, NoneType] = None, target_dir: Union[str, pathlib.Path, NoneType] = None, cycle: Optional[datetime.datetime] = None, leadtime: Optional[datetime.timedelta] = None, keys: Optional[list[str]] = None, dry_run: bool = False) -> None\n", + " | Stage files and directories.\n", + " |\n", + " | :param config: YAML-file path, or ``dict`` (read ``stdin`` if missing or ``None``).\n", + " | :param target_dir: Path to target directory.\n", + " | :param cycle: A ``datetime`` object to make available for use in the config.\n", + " | :param leadtime: A ``timedelta`` object to make available for use in the config.\n", + " | :param keys: YAML keys leading to file dst/src block.\n", + " | :param dry_run: Do not copy files.\n", + " | :raises: ``UWConfigError`` if config fails validation.\n", + " |\n", + " | ----------------------------------------------------------------------\n", + " | Data descriptors inherited from Stager:\n", + " |\n", + " | __dict__\n", + " | dictionary for instance variables\n", + " |\n", + " | __weakref__\n", + " | list of weak references to the object\n", + "\n" + ] + } + ], + "source": [ + "help(fs.Copier)" + ] + }, + { + "cell_type": "markdown", + "id": "061ac341-96cb-4af6-94ce-4f1e4d342b63", + "metadata": {}, + "source": [ + "A `Copier` object is instantiated using the same parameters as `copy()`, but copying is not performed until `Copier.go()` is called." + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "578cc091-c0eb-4293-8dbd-ee74a69a0940", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[2024-11-19T23:14:42] INFO Validating config against internal schema: files-to-stage\n", + "[2024-11-19T23:14:42] INFO 0 UW schema-validation errors found in fs config\n", + "[2024-11-19T23:14:42] INFO File copies: Initial state: Not Ready\n", + "[2024-11-19T23:14:42] INFO File copies: Checking requirements\n", + "[2024-11-19T23:14:42] INFO Copy fixtures/fs/file1.nml -> tmp/copier-target/file1-copy.nml: Initial state: Not Ready\n", + "[2024-11-19T23:14:42] INFO Copy fixtures/fs/file1.nml -> tmp/copier-target/file1-copy.nml: Checking requirements\n", + "[2024-11-19T23:14:42] INFO Copy fixtures/fs/file1.nml -> tmp/copier-target/file1-copy.nml: Requirement(s) ready\n", + "[2024-11-19T23:14:42] INFO Copy fixtures/fs/file1.nml -> tmp/copier-target/file1-copy.nml: Executing\n", + "[2024-11-19T23:14:42] INFO Copy fixtures/fs/file1.nml -> tmp/copier-target/file1-copy.nml: Final state: Ready\n", + "[2024-11-19T23:14:42] INFO Copy fixtures/fs/file2.txt -> tmp/copier-target/data/file2-copy.txt: Initial state: Not Ready\n", + "[2024-11-19T23:14:42] INFO Copy fixtures/fs/file2.txt -> tmp/copier-target/data/file2-copy.txt: Checking requirements\n", + "[2024-11-19T23:14:42] INFO Copy fixtures/fs/file2.txt -> tmp/copier-target/data/file2-copy.txt: Requirement(s) ready\n", + "[2024-11-19T23:14:42] INFO Copy fixtures/fs/file2.txt -> tmp/copier-target/data/file2-copy.txt: Executing\n", + "[2024-11-19T23:14:42] INFO Copy fixtures/fs/file2.txt -> tmp/copier-target/data/file2-copy.txt: Final state: Ready\n", + "[2024-11-19T23:14:42] INFO Copy fixtures/fs/file3.csv -> tmp/copier-target/data/file3-copy.csv: Initial state: Not Ready\n", + "[2024-11-19T23:14:42] INFO Copy fixtures/fs/file3.csv -> tmp/copier-target/data/file3-copy.csv: Checking requirements\n", + "[2024-11-19T23:14:42] INFO Copy fixtures/fs/file3.csv -> tmp/copier-target/data/file3-copy.csv: Requirement(s) ready\n", + "[2024-11-19T23:14:42] INFO Copy fixtures/fs/file3.csv -> tmp/copier-target/data/file3-copy.csv: Executing\n", + "[2024-11-19T23:14:42] INFO Copy fixtures/fs/file3.csv -> tmp/copier-target/data/file3-copy.csv: Final state: Ready\n", + "[2024-11-19T23:14:42] INFO File copies: Final state: Ready\n" + ] + }, + { + "data": { + "text/plain": [ + "[Asset(ref=PosixPath('tmp/copier-target/file1-copy.nml'), ready=),\n", + " Asset(ref=PosixPath('tmp/copier-target/data/file2-copy.txt'), ready=),\n", + " Asset(ref=PosixPath('tmp/copier-target/data/file3-copy.csv'), ready=)]" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "rmtree(\"tmp/copier-target\", ignore_errors=True)\n", + "copier = fs.Copier(\n", + " config=\"fixtures/fs/copy-config.yaml\",\n", + " target_dir=\"tmp/copier-target\"\n", + ")\n", + "copier.go()" + ] + }, + { + "cell_type": "markdown", + "id": "842f638a-bf97-4d40-bb40-0f37cc03ad9b", + "metadata": {}, + "source": [ + "Once `Copier.go()` is called, copies are created in the same way as they would have with `copy()`." + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "c6aaac2b-bb72-433d-8ad4-349a1056cfa3", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[01;34mtmp/copier-target\u001b[0m\n", + "├── \u001b[01;34mdata\u001b[0m\n", + "│   ├── \u001b[00mfile2-copy.txt\u001b[0m\n", + "│   └── \u001b[00mfile3-copy.csv\u001b[0m\n", + "└── \u001b[00mfile1-copy.nml\u001b[0m\n", + "\n", + "1 directory, 3 files\n" + ] + } + ], + "source": [ + "%%bash\n", + "tree tmp/copier-target" + ] + }, + { + "cell_type": "markdown", + "id": "76f144f9-0a2f-48ad-ae83-14bd7a97353e", + "metadata": {}, + "source": [ + "## Linking files\n", + "\n", + "The `link()` function creates symbolic links to files, automatically creating parent directories as needed." + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "404d051e-18e1-4927-a24f-cbe98ab01ce9", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Help on function link in module uwtools.api.fs:\n", + "\n", + "link(config: Union[dict, str, pathlib.Path, NoneType] = None, target_dir: Union[str, pathlib.Path, NoneType] = None, cycle: Optional[datetime.datetime] = None, leadtime: Optional[datetime.timedelta] = None, keys: Optional[list[str]] = None, dry_run: bool = False, stdin_ok: bool = False) -> bool\n", + " Link files.\n", + "\n", + " :param config: YAML-file path, or ``dict`` (read ``stdin`` if missing or ``None``).\n", + " :param target_dir: Path to target directory.\n", + " :param cycle: A datetime object to make available for use in the config.\n", + " :param leadtime: A timedelta object to make available for use in the config.\n", + " :param keys: YAML keys leading to file dst/src block.\n", + " :param dry_run: Do not link files.\n", + " :param stdin_ok: OK to read from ``stdin``?\n", + " :return: ``True`` if all links were created.\n", + "\n" + ] + } + ], + "source": [ + "help(fs.link)" + ] + }, + { + "cell_type": "markdown", + "id": "710edac4-ba97-4599-a0f3-bc75ba2210e2", + "metadata": {}, + "source": [ + "Links to be created are specified by a mapping from keys destination-pathname keys to source-pathname values, either in a YAML file or a Python ``dict``." + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "097b896c-aef4-48ac-aea5-eb2d463d172b", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "file1-link.nml: fixtures/fs/file1.nml\n", + "file2-link.txt: fixtures/fs/file2.txt\n", + "data/file3-link.csv: fixtures/fs/file3.csv\n" + ] + } + ], + "source": [ + "%%bash\n", + "cat fixtures/fs/link-config.yaml" + ] + }, + { + "cell_type": "markdown", + "id": "1acd36eb-a5e7-4451-9d22-bfe8798cb4b0", + "metadata": {}, + "source": [ + "With these instructions, `link()` creates a symbolic link of each given file with the given name and in the given subdirectory. Links are created in the directory indicated by `target_dir`. Paths can be provided either as a string or Path object. Any directories in the targeted paths will be created if they don't already exist. `True` is returned upon a successful run." + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "b6317f8a-c5fb-4114-93fa-236df3fd8805", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[2024-11-19T23:14:42] INFO Validating config against internal schema: files-to-stage\n", + "[2024-11-19T23:14:42] INFO 0 UW schema-validation errors found in fs config\n", + "[2024-11-19T23:14:42] INFO File links: Initial state: Not Ready\n", + "[2024-11-19T23:14:42] INFO File links: Checking requirements\n", + "[2024-11-19T23:14:42] INFO Link tmp/link-target/file1-link.nml -> fixtures/fs/file1.nml: Initial state: Not Ready\n", + "[2024-11-19T23:14:42] INFO Link tmp/link-target/file1-link.nml -> fixtures/fs/file1.nml: Checking requirements\n", + "[2024-11-19T23:14:42] INFO Link tmp/link-target/file1-link.nml -> fixtures/fs/file1.nml: Requirement(s) ready\n", + "[2024-11-19T23:14:42] INFO Link tmp/link-target/file1-link.nml -> fixtures/fs/file1.nml: Executing\n", + "[2024-11-19T23:14:42] INFO Link tmp/link-target/file1-link.nml -> fixtures/fs/file1.nml: Final state: Ready\n", + "[2024-11-19T23:14:42] INFO Link tmp/link-target/file2-link.txt -> fixtures/fs/file2.txt: Initial state: Not Ready\n", + "[2024-11-19T23:14:42] INFO Link tmp/link-target/file2-link.txt -> fixtures/fs/file2.txt: Checking requirements\n", + "[2024-11-19T23:14:42] INFO Link tmp/link-target/file2-link.txt -> fixtures/fs/file2.txt: Requirement(s) ready\n", + "[2024-11-19T23:14:42] INFO Link tmp/link-target/file2-link.txt -> fixtures/fs/file2.txt: Executing\n", + "[2024-11-19T23:14:42] INFO Link tmp/link-target/file2-link.txt -> fixtures/fs/file2.txt: Final state: Ready\n", + "[2024-11-19T23:14:42] INFO Link tmp/link-target/data/file3-link.csv -> fixtures/fs/file3.csv: Initial state: Not Ready\n", + "[2024-11-19T23:14:42] INFO Link tmp/link-target/data/file3-link.csv -> fixtures/fs/file3.csv: Checking requirements\n", + "[2024-11-19T23:14:42] INFO Link tmp/link-target/data/file3-link.csv -> fixtures/fs/file3.csv: Requirement(s) ready\n", + "[2024-11-19T23:14:42] INFO Link tmp/link-target/data/file3-link.csv -> fixtures/fs/file3.csv: Executing\n", + "[2024-11-19T23:14:42] INFO Link tmp/link-target/data/file3-link.csv -> fixtures/fs/file3.csv: Final state: Ready\n", + "[2024-11-19T23:14:42] INFO File links: Final state: Ready\n" + ] + }, + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "rmtree(\"tmp/link-target\", ignore_errors=True)\n", + "fs.link(\n", + " config=Path(\"fixtures/fs/link-config.yaml\"),\n", + " target_dir=\"tmp/link-target\"\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "28fe0009-11cb-4ec4-b203-221e2a59cedb", + "metadata": {}, + "source": [ + "Examining the target directory, we can see that the links have been created with their specified names and in their specified directories." + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "b31ca50e-01c4-4665-81e0-de70a75ceb2a", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[01;34mtmp/link-target\u001b[0m\n", + "├── \u001b[01;34mdata\u001b[0m\n", + "│   └── \u001b[01;36mfile3-link.csv\u001b[0m -> \u001b[00m../../../fixtures/fs/file3.csv\u001b[0m\n", + "├── \u001b[01;36mfile1-link.nml\u001b[0m -> \u001b[00m../../fixtures/fs/file1.nml\u001b[0m\n", + "└── \u001b[01;36mfile2-link.txt\u001b[0m -> \u001b[00m../../fixtures/fs/file2.txt\u001b[0m\n", + "\n", + "1 directory, 3 files\n" + ] + } + ], + "source": [ + "%%bash\n", + "tree tmp/link-target" + ] + }, + { + "cell_type": "markdown", + "id": "e0661083-6e83-490f-82c5-19098b0f1b3c", + "metadata": {}, + "source": [ + "### Failing to link\n", + "\n", + "A configuration can be provided as a dictionary instead as this example demonstrates. However, `missing-file.nml` does not exist. The function provides a warning and returns `False`." + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "59d93133-891d-4903-a965-23607cc72474", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[2024-11-19T23:14:42] INFO Validating config against internal schema: files-to-stage\n", + "[2024-11-19T23:14:42] INFO 0 UW schema-validation errors found in fs config\n", + "[2024-11-19T23:14:42] INFO File links: Initial state: Not Ready\n", + "[2024-11-19T23:14:42] INFO File links: Checking requirements\n", + "[2024-11-19T23:14:42] INFO Link tmp/link-target/missing-link.nml -> fixtures/fs/missing-file.nml: Initial state: Not Ready\n", + "[2024-11-19T23:14:42] INFO Link tmp/link-target/missing-link.nml -> fixtures/fs/missing-file.nml: Checking requirements\n", + "[2024-11-19T23:14:42] WARNING Filesystem item fixtures/fs/missing-file.nml: State: Not Ready (external asset)\n", + "[2024-11-19T23:14:42] INFO Link tmp/link-target/missing-link.nml -> fixtures/fs/missing-file.nml: Requirement(s) not ready\n", + "[2024-11-19T23:14:42] WARNING Link tmp/link-target/missing-link.nml -> fixtures/fs/missing-file.nml: Final state: Not Ready\n", + "[2024-11-19T23:14:42] WARNING File links: Final state: Not Ready\n" + ] + }, + { + "data": { + "text/plain": [ + "False" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "fs.link(\n", + " config={\"missing-link.nml\":\"fixtures/fs/missing-file.nml\"},\n", + " target_dir=\"tmp/link-target\"\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "7a3ca573-c160-4afa-8a96-4165b01eecfe", + "metadata": {}, + "source": [ + "The missing link does not appear in the target directory." + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "7a6e94b9-1161-4f41-9333-55736aec07b3", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[01;34mtmp/link-target\u001b[0m\n", + "├── \u001b[01;34mdata\u001b[0m\n", + "│   └── \u001b[01;36mfile3-link.csv\u001b[0m -> \u001b[00m../../../fixtures/fs/file3.csv\u001b[0m\n", + "├── \u001b[01;36mfile1-link.nml\u001b[0m -> \u001b[00m../../fixtures/fs/file1.nml\u001b[0m\n", + "└── \u001b[01;36mfile2-link.txt\u001b[0m -> \u001b[00m../../fixtures/fs/file2.txt\u001b[0m\n", + "\n", + "1 directory, 3 files\n" + ] + } + ], + "source": [ + "%%bash\n", + "tree tmp/link-target" + ] + }, + { + "cell_type": "markdown", + "id": "b887c95e-f71f-4a26-b709-d410a3c30c2e", + "metadata": {}, + "source": [ + "### Using the `keys` parameter \n", + "\n", + "Consider the following configuration, in which the destination/source mapping is not located at the top level of the configuration:" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "1de6cbd4-3b10-4b18-a8a5-c0cd21064bd3", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "files:\n", + " to:\n", + " link:\n", + " file1-link.nml: fixtures/fs/file1.nml\n", + " file2-link.txt: fixtures/fs/file2.txt\n", + " data/file3-link.csv: fixtures/fs/file3.csv\n" + ] + } + ], + "source": [ + "%%bash\n", + "cat fixtures/fs/link-keys-config.yaml" + ] + }, + { + "cell_type": "markdown", + "id": "9977ee46-17da-419e-821b-a32fac5139f8", + "metadata": {}, + "source": [ + "Without additional information, `link()` would raise a `UWConfigError` given this configuration. However, the list of keys leading to the destination/source mapping can be provided with the `keys` parameter:" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "ee4bf2a3-4101-4d95-afd5-120e95e64550", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[2024-11-19T23:14:42] INFO Validating config against internal schema: files-to-stage\n", + "[2024-11-19T23:14:42] INFO 0 UW schema-validation errors found in fs config\n", + "[2024-11-19T23:14:42] INFO File links: Initial state: Not Ready\n", + "[2024-11-19T23:14:42] INFO File links: Checking requirements\n", + "[2024-11-19T23:14:42] INFO Link tmp/link-keys-target/file1-link.nml -> fixtures/fs/file1.nml: Initial state: Not Ready\n", + "[2024-11-19T23:14:42] INFO Link tmp/link-keys-target/file1-link.nml -> fixtures/fs/file1.nml: Checking requirements\n", + "[2024-11-19T23:14:42] INFO Link tmp/link-keys-target/file1-link.nml -> fixtures/fs/file1.nml: Requirement(s) ready\n", + "[2024-11-19T23:14:42] INFO Link tmp/link-keys-target/file1-link.nml -> fixtures/fs/file1.nml: Executing\n", + "[2024-11-19T23:14:42] INFO Link tmp/link-keys-target/file1-link.nml -> fixtures/fs/file1.nml: Final state: Ready\n", + "[2024-11-19T23:14:42] INFO Link tmp/link-keys-target/file2-link.txt -> fixtures/fs/file2.txt: Initial state: Not Ready\n", + "[2024-11-19T23:14:42] INFO Link tmp/link-keys-target/file2-link.txt -> fixtures/fs/file2.txt: Checking requirements\n", + "[2024-11-19T23:14:42] INFO Link tmp/link-keys-target/file2-link.txt -> fixtures/fs/file2.txt: Requirement(s) ready\n", + "[2024-11-19T23:14:42] INFO Link tmp/link-keys-target/file2-link.txt -> fixtures/fs/file2.txt: Executing\n", + "[2024-11-19T23:14:42] INFO Link tmp/link-keys-target/file2-link.txt -> fixtures/fs/file2.txt: Final state: Ready\n", + "[2024-11-19T23:14:42] INFO Link tmp/link-keys-target/data/file3-link.csv -> fixtures/fs/file3.csv: Initial state: Not Ready\n", + "[2024-11-19T23:14:42] INFO Link tmp/link-keys-target/data/file3-link.csv -> fixtures/fs/file3.csv: Checking requirements\n", + "[2024-11-19T23:14:42] INFO Link tmp/link-keys-target/data/file3-link.csv -> fixtures/fs/file3.csv: Requirement(s) ready\n", + "[2024-11-19T23:14:42] INFO Link tmp/link-keys-target/data/file3-link.csv -> fixtures/fs/file3.csv: Executing\n", + "[2024-11-19T23:14:42] INFO Link tmp/link-keys-target/data/file3-link.csv -> fixtures/fs/file3.csv: Final state: Ready\n", + "[2024-11-19T23:14:42] INFO File links: Final state: Ready\n" + ] + }, + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "rmtree(\"tmp/link-keys-target\", ignore_errors=True)\n", + "fs.link(\n", + " config=\"fixtures/fs/link-keys-config.yaml\",\n", + " target_dir=\"tmp/link-keys-target\",\n", + " keys=[\"files\",\"to\",\"link\"]\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "d331d715-b6f1-4a9a-a207-2fb296aec4af", + "metadata": {}, + "source": [ + "With this information provided, the links are successfully created." + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "1393ae73-798b-49c0-9b68-e8ed28ad1df0", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[01;34mtmp/link-keys-target\u001b[0m\n", + "├── \u001b[01;34mdata\u001b[0m\n", + "│   └── \u001b[01;36mfile3-link.csv\u001b[0m -> \u001b[00m../../../fixtures/fs/file3.csv\u001b[0m\n", + "├── \u001b[01;36mfile1-link.nml\u001b[0m -> \u001b[00m../../fixtures/fs/file1.nml\u001b[0m\n", + "└── \u001b[01;36mfile2-link.txt\u001b[0m -> \u001b[00m../../fixtures/fs/file2.txt\u001b[0m\n", + "\n", + "1 directory, 3 files\n" + ] + } + ], + "source": [ + "%%bash\n", + "tree tmp/link-keys-target" + ] + }, + { + "cell_type": "markdown", + "id": "29a9457a-e4f3-460a-b873-cf1bf236c9de", + "metadata": {}, + "source": [ + "### Using the `Linker` class\n", + "\n", + "An alternative to using `link()` is to instantiate a `Linker` object , then call its `go()` method." + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "b1e5d3a2-7003-4449-9483-440236f66df7", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Help on class Linker in module uwtools.fs:\n", + "\n", + "class Linker(FileStager)\n", + " | Linker(config: Union[dict, str, pathlib.Path, NoneType] = None, target_dir: Union[str, pathlib.Path, NoneType] = None, cycle: Optional[datetime.datetime] = None, leadtime: Optional[datetime.timedelta] = None, keys: Optional[list[str]] = None, dry_run: bool = False) -> None\n", + " |\n", + " | Stage files by linking.\n", + " |\n", + " | Method resolution order:\n", + " | Linker\n", + " | FileStager\n", + " | Stager\n", + " | abc.ABC\n", + " | builtins.object\n", + " |\n", + " | Methods defined here:\n", + " |\n", + " | go(self)\n", + " | Link files.\n", + " |\n", + " | ----------------------------------------------------------------------\n", + " | Data and other attributes defined here:\n", + " |\n", + " | __abstractmethods__ = frozenset()\n", + " |\n", + " | ----------------------------------------------------------------------\n", + " | Methods inherited from Stager:\n", + " |\n", + " | __init__(self, config: Union[dict, str, pathlib.Path, NoneType] = None, target_dir: Union[str, pathlib.Path, NoneType] = None, cycle: Optional[datetime.datetime] = None, leadtime: Optional[datetime.timedelta] = None, keys: Optional[list[str]] = None, dry_run: bool = False) -> None\n", + " | Stage files and directories.\n", + " |\n", + " | :param config: YAML-file path, or ``dict`` (read ``stdin`` if missing or ``None``).\n", + " | :param target_dir: Path to target directory.\n", + " | :param cycle: A ``datetime`` object to make available for use in the config.\n", + " | :param leadtime: A ``timedelta`` object to make available for use in the config.\n", + " | :param keys: YAML keys leading to file dst/src block.\n", + " | :param dry_run: Do not copy files.\n", + " | :raises: ``UWConfigError`` if config fails validation.\n", + " |\n", + " | ----------------------------------------------------------------------\n", + " | Data descriptors inherited from Stager:\n", + " |\n", + " | __dict__\n", + " | dictionary for instance variables\n", + " |\n", + " | __weakref__\n", + " | list of weak references to the object\n", + "\n" + ] + } + ], + "source": [ + "help(fs.Linker)" + ] + }, + { + "cell_type": "markdown", + "id": "3312a98b-9f5d-41bd-ad02-f69d291cc947", + "metadata": {}, + "source": [ + "A `Linker` object is instantiated using the same parameters as `link()`, but links are not created until `Linker.go()` is called." + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "ecfa9e89-9fbd-4352-babc-dfa5b91afe6a", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[2024-11-19T23:14:42] INFO Validating config against internal schema: files-to-stage\n", + "[2024-11-19T23:14:42] INFO 0 UW schema-validation errors found in fs config\n", + "[2024-11-19T23:14:42] INFO File links: Initial state: Not Ready\n", + "[2024-11-19T23:14:42] INFO File links: Checking requirements\n", + "[2024-11-19T23:14:42] INFO Link tmp/linker-target/file1-link.nml -> fixtures/fs/file1.nml: Initial state: Not Ready\n", + "[2024-11-19T23:14:42] INFO Link tmp/linker-target/file1-link.nml -> fixtures/fs/file1.nml: Checking requirements\n", + "[2024-11-19T23:14:42] INFO Link tmp/linker-target/file1-link.nml -> fixtures/fs/file1.nml: Requirement(s) ready\n", + "[2024-11-19T23:14:42] INFO Link tmp/linker-target/file1-link.nml -> fixtures/fs/file1.nml: Executing\n", + "[2024-11-19T23:14:42] INFO Link tmp/linker-target/file1-link.nml -> fixtures/fs/file1.nml: Final state: Ready\n", + "[2024-11-19T23:14:42] INFO Link tmp/linker-target/file2-link.txt -> fixtures/fs/file2.txt: Initial state: Not Ready\n", + "[2024-11-19T23:14:42] INFO Link tmp/linker-target/file2-link.txt -> fixtures/fs/file2.txt: Checking requirements\n", + "[2024-11-19T23:14:42] INFO Link tmp/linker-target/file2-link.txt -> fixtures/fs/file2.txt: Requirement(s) ready\n", + "[2024-11-19T23:14:42] INFO Link tmp/linker-target/file2-link.txt -> fixtures/fs/file2.txt: Executing\n", + "[2024-11-19T23:14:42] INFO Link tmp/linker-target/file2-link.txt -> fixtures/fs/file2.txt: Final state: Ready\n", + "[2024-11-19T23:14:42] INFO Link tmp/linker-target/data/file3-link.csv -> fixtures/fs/file3.csv: Initial state: Not Ready\n", + "[2024-11-19T23:14:42] INFO Link tmp/linker-target/data/file3-link.csv -> fixtures/fs/file3.csv: Checking requirements\n", + "[2024-11-19T23:14:42] INFO Link tmp/linker-target/data/file3-link.csv -> fixtures/fs/file3.csv: Requirement(s) ready\n", + "[2024-11-19T23:14:42] INFO Link tmp/linker-target/data/file3-link.csv -> fixtures/fs/file3.csv: Executing\n", + "[2024-11-19T23:14:42] INFO Link tmp/linker-target/data/file3-link.csv -> fixtures/fs/file3.csv: Final state: Ready\n", + "[2024-11-19T23:14:42] INFO File links: Final state: Ready\n" + ] + }, + { + "data": { + "text/plain": [ + "[Asset(ref=PosixPath('tmp/linker-target/file1-link.nml'), ready=),\n", + " Asset(ref=PosixPath('tmp/linker-target/file2-link.txt'), ready=),\n", + " Asset(ref=PosixPath('tmp/linker-target/data/file3-link.csv'), ready=)]" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "rmtree(\"tmp/linker-target\", ignore_errors=True)\n", + "linker = fs.Linker(\n", + " config=\"fixtures/fs/link-config.yaml\",\n", + " target_dir=\"tmp/linker-target\"\n", + ")\n", + "linker.go()" + ] + }, + { + "cell_type": "markdown", + "id": "8d2cbb32-cabb-498e-b4db-414e3ac2cf1d", + "metadata": {}, + "source": [ + "Once `Linker.go()` is called, links are created in the same way as they would have with `link()`." + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "6695f7bb-7ab7-42d1-9d2c-0bef7341147d", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[01;34mtmp/linker-target\u001b[0m\n", + "├── \u001b[01;34mdata\u001b[0m\n", + "│   └── \u001b[01;36mfile3-link.csv\u001b[0m -> \u001b[00m../../../fixtures/fs/file3.csv\u001b[0m\n", + "├── \u001b[01;36mfile1-link.nml\u001b[0m -> \u001b[00m../../fixtures/fs/file1.nml\u001b[0m\n", + "└── \u001b[01;36mfile2-link.txt\u001b[0m -> \u001b[00m../../fixtures/fs/file2.txt\u001b[0m\n", + "\n", + "1 directory, 3 files\n" + ] + } + ], + "source": [ + "%%bash\n", + "tree tmp/linker-target" + ] + }, + { + "cell_type": "markdown", + "id": "bd367e2a-e44c-4a5c-9600-4d86719f7d36", + "metadata": {}, + "source": [ + "## Creating directories\n", + "\n", + "The `makedirs()` function creates directories." + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "id": "43b381d1-8dc2-4ea6-924c-e21149f05e7f", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Help on function makedirs in module uwtools.api.fs:\n", + "\n", + "makedirs(config: Union[dict, str, pathlib.Path, NoneType] = None, target_dir: Union[str, pathlib.Path, NoneType] = None, cycle: Optional[datetime.datetime] = None, leadtime: Optional[datetime.timedelta] = None, keys: Optional[list[str]] = None, dry_run: bool = False, stdin_ok: bool = False) -> bool\n", + " Make directories.\n", + "\n", + " :param config: YAML-file path, or ``dict`` (read ``stdin`` if missing or ``None``).\n", + " :param target_dir: Path to target directory.\n", + " :param cycle: A datetime object to make available for use in the config.\n", + " :param leadtime: A timedelta object to make available for use in the config.\n", + " :param keys: YAML keys leading to file dst/src block.\n", + " :param dry_run: Do not link files.\n", + " :param stdin_ok: OK to read from ``stdin``?\n", + " :return: ``True`` if all directories were made.\n", + "\n" + ] + } + ], + "source": [ + "help(fs.makedirs)" + ] + }, + { + "cell_type": "markdown", + "id": "83b88d7e-f4cf-4358-98f9-106b47bd5d9f", + "metadata": {}, + "source": [ + "Directories to be created are specified by either a configuration YAML file or a Python ``dict``. A `makedirs` key must be included with a list of directories to create as its value." + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "id": "2f946927-509f-4cd6-a7ec-2d36f4d17318", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "makedirs:\n", + " - foo\n", + " - bar/baz\n" + ] + } + ], + "source": [ + "%%bash\n", + "cat fixtures/fs/dir-config.yaml" + ] + }, + { + "cell_type": "markdown", + "id": "eec8938c-1e2a-482b-aa0e-9e6e89dcf200", + "metadata": {}, + "source": [ + "With these instructions, `makedirs()` creates each directory in the list within the directory indicated by `target_dir`. Paths can be provided either as a string or Path object. `True` is returned upon a successful run." + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "id": "fdd4e832-3bc5-4c7a-9b31-e387a4e7d48b", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[2024-11-19T23:14:42] INFO Validating config against internal schema: makedirs\n", + "[2024-11-19T23:14:42] INFO 0 UW schema-validation errors found in fs config\n", + "[2024-11-19T23:14:42] INFO Directories: Initial state: Not Ready\n", + "[2024-11-19T23:14:42] INFO Directories: Checking requirements\n", + "[2024-11-19T23:14:42] INFO Directory tmp/dir-target/foo: Initial state: Not Ready\n", + "[2024-11-19T23:14:42] INFO Directory tmp/dir-target/foo: Checking requirements\n", + "[2024-11-19T23:14:42] INFO Directory tmp/dir-target/foo: Requirement(s) ready\n", + "[2024-11-19T23:14:42] INFO Directory tmp/dir-target/foo: Executing\n", + "[2024-11-19T23:14:42] INFO Directory tmp/dir-target/foo: Final state: Ready\n", + "[2024-11-19T23:14:42] INFO Directory tmp/dir-target/bar/baz: Initial state: Not Ready\n", + "[2024-11-19T23:14:42] INFO Directory tmp/dir-target/bar/baz: Checking requirements\n", + "[2024-11-19T23:14:42] INFO Directory tmp/dir-target/bar/baz: Requirement(s) ready\n", + "[2024-11-19T23:14:42] INFO Directory tmp/dir-target/bar/baz: Executing\n", + "[2024-11-19T23:14:42] INFO Directory tmp/dir-target/bar/baz: Final state: Ready\n", + "[2024-11-19T23:14:42] INFO Directories: Final state: Ready\n" + ] + }, + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "rmtree(\"tmp/dir-target\", ignore_errors=True)\n", + "fs.makedirs(\n", + " config=\"fixtures/fs/dir-config.yaml\",\n", + " target_dir=Path(\"tmp/dir-target\")\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "df4245f6-7083-45ee-b56e-9697a50db5da", + "metadata": {}, + "source": [ + "Examining the target directory, we can see that the directories have been created with their specified names." + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "id": "c00ec8cc-964a-498e-bd8f-a3686a468dc3", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[01;34mtmp/dir-target\u001b[0m\n", + "├── \u001b[01;34mbar\u001b[0m\n", + "│   └── \u001b[01;34mbaz\u001b[0m\n", + "└── \u001b[01;34mfoo\u001b[0m\n", + "\n", + "3 directories, 0 files\n" + ] + } + ], + "source": [ + "%%bash\n", + "tree tmp/dir-target" + ] + }, + { + "cell_type": "markdown", + "id": "329e939d-0f6d-412c-a36a-4682fe99609a", + "metadata": {}, + "source": [ + "### Using the `keys` parameter \n", + "\n", + "Consider the following configuration, in which the destination/source mapping is not located at the top level of the configuration:" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "id": "3a93956d-0acf-4c37-87bf-83c0d5287644", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "path:\n", + " to:\n", + " dirs:\n", + " makedirs:\n", + " - foo/bar\n", + " - baz\n" + ] + } + ], + "source": [ + "%%bash\n", + "cat fixtures/fs/dir-keys-config.yaml" + ] + }, + { + "cell_type": "markdown", + "id": "909ff3ea-8577-4b91-94fc-6ce6effe4bec", + "metadata": {}, + "source": [ + "Without additional information, `makedirs()` would raise a `UWConfigError` given this configuration. However, the list of keys leading to the destination/source mapping can be provided with the `keys` parameter:" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "id": "91549822-e85e-4d41-8860-1da05d713f75", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[2024-11-19T23:14:42] INFO Validating config against internal schema: makedirs\n", + "[2024-11-19T23:14:42] INFO 0 UW schema-validation errors found in fs config\n", + "[2024-11-19T23:14:42] INFO Directories: Initial state: Not Ready\n", + "[2024-11-19T23:14:42] INFO Directories: Checking requirements\n", + "[2024-11-19T23:14:42] INFO Directory tmp/dir-keys-target/foo/bar: Initial state: Not Ready\n", + "[2024-11-19T23:14:42] INFO Directory tmp/dir-keys-target/foo/bar: Checking requirements\n", + "[2024-11-19T23:14:42] INFO Directory tmp/dir-keys-target/foo/bar: Requirement(s) ready\n", + "[2024-11-19T23:14:42] INFO Directory tmp/dir-keys-target/foo/bar: Executing\n", + "[2024-11-19T23:14:42] INFO Directory tmp/dir-keys-target/foo/bar: Final state: Ready\n", + "[2024-11-19T23:14:42] INFO Directory tmp/dir-keys-target/baz: Initial state: Not Ready\n", + "[2024-11-19T23:14:42] INFO Directory tmp/dir-keys-target/baz: Checking requirements\n", + "[2024-11-19T23:14:42] INFO Directory tmp/dir-keys-target/baz: Requirement(s) ready\n", + "[2024-11-19T23:14:42] INFO Directory tmp/dir-keys-target/baz: Executing\n", + "[2024-11-19T23:14:42] INFO Directory tmp/dir-keys-target/baz: Final state: Ready\n", + "[2024-11-19T23:14:42] INFO Directories: Final state: Ready\n" + ] + }, + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "rmtree(\"tmp/dir-keys-target\", ignore_errors=True)\n", + "fs.makedirs(\n", + " config=\"fixtures/fs/dir-keys-config.yaml\",\n", + " target_dir=\"tmp/dir-keys-target\",\n", + " keys=[\"path\",\"to\",\"dirs\"]\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "5b7ed362-2edb-438a-bb9f-e2dd7d505379", + "metadata": {}, + "source": [ + "With this information provided, the directories are successfully created." + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "id": "cb4ded9c-0de1-4010-af75-fbb7becd3fbc", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[01;34mtmp/dir-keys-target\u001b[0m\n", + "├── \u001b[01;34mbaz\u001b[0m\n", + "└── \u001b[01;34mfoo\u001b[0m\n", + " └── \u001b[01;34mbar\u001b[0m\n", + "\n", + "3 directories, 0 files\n" + ] + } + ], + "source": [ + "%%bash\n", + "tree tmp/dir-keys-target" + ] + }, + { + "cell_type": "markdown", + "id": "742ce55e-fded-4961-931d-49bd75c09901", + "metadata": {}, + "source": [ + "### Using the `MakeDirs` class\n", + "\n", + "An alternative to using `makedirs()` is to instantiate a `MakeDirs` object , then call its `go()` method." + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "id": "7fe53736-d8e8-4ca9-ab2b-87729934fc19", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Help on class MakeDirs in module uwtools.fs:\n", + "\n", + "class MakeDirs(Stager)\n", + " | MakeDirs(config: Union[dict, str, pathlib.Path, NoneType] = None, target_dir: Union[str, pathlib.Path, NoneType] = None, cycle: Optional[datetime.datetime] = None, leadtime: Optional[datetime.timedelta] = None, keys: Optional[list[str]] = None, dry_run: bool = False) -> None\n", + " |\n", + " | Make directories.\n", + " |\n", + " | Method resolution order:\n", + " | MakeDirs\n", + " | Stager\n", + " | abc.ABC\n", + " | builtins.object\n", + " |\n", + " | Methods defined here:\n", + " |\n", + " | go(self)\n", + " | Make directories.\n", + " |\n", + " | ----------------------------------------------------------------------\n", + " | Data and other attributes defined here:\n", + " |\n", + " | __abstractmethods__ = frozenset()\n", + " |\n", + " | ----------------------------------------------------------------------\n", + " | Methods inherited from Stager:\n", + " |\n", + " | __init__(self, config: Union[dict, str, pathlib.Path, NoneType] = None, target_dir: Union[str, pathlib.Path, NoneType] = None, cycle: Optional[datetime.datetime] = None, leadtime: Optional[datetime.timedelta] = None, keys: Optional[list[str]] = None, dry_run: bool = False) -> None\n", + " | Stage files and directories.\n", + " |\n", + " | :param config: YAML-file path, or ``dict`` (read ``stdin`` if missing or ``None``).\n", + " | :param target_dir: Path to target directory.\n", + " | :param cycle: A ``datetime`` object to make available for use in the config.\n", + " | :param leadtime: A ``timedelta`` object to make available for use in the config.\n", + " | :param keys: YAML keys leading to file dst/src block.\n", + " | :param dry_run: Do not copy files.\n", + " | :raises: ``UWConfigError`` if config fails validation.\n", + " |\n", + " | ----------------------------------------------------------------------\n", + " | Data descriptors inherited from Stager:\n", + " |\n", + " | __dict__\n", + " | dictionary for instance variables\n", + " |\n", + " | __weakref__\n", + " | list of weak references to the object\n", + "\n" + ] + } + ], + "source": [ + "help(fs.MakeDirs)" + ] + }, + { + "cell_type": "markdown", + "id": "cf3b50de-4a8b-4f51-96bb-a477b2c53430", + "metadata": {}, + "source": [ + "A `MakeDirs` object is instantiated using the same parameters as `makedirs()`, but directories are not created until `MakeDirs.go()` is called." + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "id": "950d6b43-6db7-40df-b645-beaa1369cfa4", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[2024-11-19T23:14:42] INFO Validating config against internal schema: makedirs\n", + "[2024-11-19T23:14:42] INFO 0 UW schema-validation errors found in fs config\n", + "[2024-11-19T23:14:42] INFO Directories: Initial state: Not Ready\n", + "[2024-11-19T23:14:42] INFO Directories: Checking requirements\n", + "[2024-11-19T23:14:42] INFO Directory tmp/makedirs-target/foo: Initial state: Not Ready\n", + "[2024-11-19T23:14:42] INFO Directory tmp/makedirs-target/foo: Checking requirements\n", + "[2024-11-19T23:14:42] INFO Directory tmp/makedirs-target/foo: Requirement(s) ready\n", + "[2024-11-19T23:14:42] INFO Directory tmp/makedirs-target/foo: Executing\n", + "[2024-11-19T23:14:42] INFO Directory tmp/makedirs-target/foo: Final state: Ready\n", + "[2024-11-19T23:14:42] INFO Directory tmp/makedirs-target/bar/baz: Initial state: Not Ready\n", + "[2024-11-19T23:14:42] INFO Directory tmp/makedirs-target/bar/baz: Checking requirements\n", + "[2024-11-19T23:14:42] INFO Directory tmp/makedirs-target/bar/baz: Requirement(s) ready\n", + "[2024-11-19T23:14:42] INFO Directory tmp/makedirs-target/bar/baz: Executing\n", + "[2024-11-19T23:14:42] INFO Directory tmp/makedirs-target/bar/baz: Final state: Ready\n", + "[2024-11-19T23:14:42] INFO Directories: Final state: Ready\n" + ] + }, + { + "data": { + "text/plain": [ + "[Asset(ref=PosixPath('tmp/makedirs-target/foo'), ready=),\n", + " Asset(ref=PosixPath('tmp/makedirs-target/bar/baz'), ready=)]" + ] + }, + "execution_count": 34, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "rmtree(\"tmp/makedirs-target\", ignore_errors=True)\n", + "dirs_stager = fs.MakeDirs(\n", + " config=\"fixtures/fs/dir-config.yaml\",\n", + " target_dir=\"tmp/makedirs-target\"\n", + ")\n", + "dirs_stager.go()" + ] + }, + { + "cell_type": "markdown", + "id": "9ae5f357-1d56-4670-a8d7-8546e73c4efa", + "metadata": {}, + "source": [ + "Once `MakeDirs.go()` is called, directories are created in the same way as they would have with `makedirs()`." + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "id": "a61fb9ac-df2f-4e39-9f66-bfb789c39117", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[01;34mtmp/makedirs-target\u001b[0m\n", + "├── \u001b[01;34mbar\u001b[0m\n", + "│   └── \u001b[01;34mbaz\u001b[0m\n", + "└── \u001b[01;34mfoo\u001b[0m\n", + "\n", + "3 directories, 0 files\n" + ] + } + ], + "source": [ + "%%bash\n", + "tree tmp/makedirs-target" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.7" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/notebooks/install-deps b/notebooks/install-deps new file mode 100644 index 000000000..54a214b38 --- /dev/null +++ b/notebooks/install-deps @@ -0,0 +1 @@ +conda install -q -y --repodata-fn repodata.json "jupyterlab<4.4" "testbook<0.5" diff --git a/notebooks/pyproject.toml b/notebooks/pyproject.toml new file mode 100644 index 000000000..c3096a235 --- /dev/null +++ b/notebooks/pyproject.toml @@ -0,0 +1,21 @@ +[tool.black] +line-length = 100 + +[tool.isort] +line_length = 100 +profile = "black" + +[tool.pylint.main] +recursive = true + +[tool.pylint."messages control"] +disable = [ + "missing-function-docstring", + "missing-module-docstring", +] +enable = [ + "useless-suppression", +] + +[tool.pytest.ini_options] +filterwarnings = ["ignore::DeprecationWarning"] diff --git a/notebooks/rocoto.ipynb b/notebooks/rocoto.ipynb new file mode 100644 index 000000000..9b36c6d8a --- /dev/null +++ b/notebooks/rocoto.ipynb @@ -0,0 +1,1060 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "c787c8df-e973-44d6-aa64-1f09c7a9a9d4", + "metadata": {}, + "source": [ + "# Rocoto Tool\n", + "\n", + "The `uwtools` API's `rocoto` module provides functions to build and validate Rocoto workflows. For more information on the UW YAML language than what is discussed here, see the Defining a Rocoto Workflow page. For more on Rocoto XML documents, see the Rocoto Documentation.\n", + "\n", + "Tested on `uwtools` version 2.4.2. For more information, please see the uwtools.api.rocoto Read the Docs page.\n", + "\n", + "## Table of Contents\n", + "* [Building Rocoto Workflows with UW YAML](#Building-Rocoto-Workflows-with-UW-YAML)\n", + " * [Entities and Cyclestrings](#Entities-and-Cyclestrings)\n", + " * [Tasks and Dependencies](#Tasks-and-Dependencies)\n", + " * [Metatasks](#Metatasks)\n", + "* [Validating Workflows](#Validating-Workflows)\n", + "" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "7dd67340-6553-40e9-be68-d79c1979280c", + "metadata": {}, + "outputs": [], + "source": [ + "from pathlib import Path\n", + "from uwtools.api import rocoto\n", + "from uwtools.api.logging import use_uwtools_logger\n", + "\n", + "use_uwtools_logger()" + ] + }, + { + "cell_type": "markdown", + "id": "14ac9321-c59e-4149-bd08-7f2bcef1113e", + "metadata": {}, + "source": [ + "## Building Rocoto Workflows with UW YAML\n", + "\n", + "The `rocoto.realize()` function uses a UW YAML language to create Rocoto workflows in XML format.\n", + "" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "1d10d514-d918-4cbd-aa61-c2be8ee9e298", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Help on function realize in module uwtools.api.rocoto:\n", + "\n", + "realize(config: Union[uwtools.config.formats.yaml.YAMLConfig, pathlib.Path, str, NoneType], output_file: Union[str, pathlib.Path, NoneType] = None, stdin_ok: bool = False) -> bool\n", + " Realize the Rocoto workflow defined in the given YAML as XML.\n", + "\n", + " If no input file is specified, ``stdin`` is read. A ``YAMLConfig`` object may also be provided\n", + " as input. If no output file is specified, ``stdout`` is written to. Both the input config and\n", + " output Rocoto XML will be validated against appropriate schemas.\n", + "\n", + " :param config: YAML input file or ``YAMLConfig`` object (``None`` => read ``stdin``).\n", + " :param output_file: XML output file path (``None`` => write to ``stdout``).\n", + " :param stdin_ok: OK to read from ``stdin``?\n", + " :return: ``True``.\n", + "\n" + ] + } + ], + "source": [ + "help(rocoto.realize)" + ] + }, + { + "cell_type": "markdown", + "id": "65694bbf-cce1-4979-b872-17d4aac8ae84", + "metadata": {}, + "source": [ + "The following is an example of a simple workflow written in the UW YAML language. It uses a top-level `workflow:` block that contains all other blocks in the workflow. The workflow's global attributes are set within an `attrs:` block, and each workflow has two required attributes: `realtime` and `scheduler`. The `realtime` key indicates whether the workflow will be run in realtime or in retrospective mode, where a value of `true` means that the workflow will be run in realtime mode. The `scheduler` key tells Rocoto which batch system to use when submitting and monitoring jobs. Each workflow must contain a `cycledef:` block that defines one or more sets of cycles the workflow will iterate over. A set of cycles must be given using the `spec` key. This key may define a set of cycles using either the \"start stop step\" method or the \"crontab-like\" method. The \"start stop step\" method is used below. A `log:` block is required to define the path where Rocoto logs are written. At least one task must be defined in the `tasks:` block, which is discussed in the [Tasks and Dependencies](#Tasks-and-Dependencies) section of this notebook.\n", + "\n", + "The simple workflow below contains a minimal set of keys. For more on the UW YAML language, see the Defining a Rocoto Workflow page.\n", + "" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "9281ae4f-4d78-4401-bf6f-87d4b873e846", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "workflow:\n", + " attrs:\n", + " realtime: false\n", + " scheduler: slurm\n", + " cycledef:\n", + " - spec: 202410290000 202410300000 06:00:00\n", + " log: logs/test.log\n", + " tasks:\n", + " task_greet:\n", + " command: echo Hello, World!\n", + " cores: 1\n", + " walltime: 00:00:10\n" + ] + } + ], + "source": [ + "%%bash\n", + "cat fixtures/rocoto/simple-workflow.yaml" + ] + }, + { + "cell_type": "markdown", + "id": "da06b1f6-7699-4e89-bbd0-de6a5e9ea0b7", + "metadata": {}, + "source": [ + "Using `rocoto.realize()`, the UW YAML from above is translated to Rocoto XML. A `config` may be given as a string path, Path object, or `YAMLConfig` object. Likewise, the path to the XML output file may be defined by providing `output_file` with a string path or Path object. If `output_file` is omitted or set to `None`, the XML will be written to `stdout`. Both the input config and the output Rocoto XML are validated against appropriate schemas. The number of schema-validation errors, as well as details on the errors (if any), are reported.\n", + "\n", + "The `stdin_ok` argument can be used to permit configs to be read from `stdin` when `config` is omitted or set to `None`, but this is a rare use case beyond the scope of this notebook that will not be discussed here.\n", + "" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "983636f9-7c39-4f0e-a76e-e35129d2b9fe", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[2024-11-19T23:15:43] INFO 0 UW schema-validation errors found in Rocoto config\n", + "[2024-11-19T23:15:43] INFO 0 Rocoto XML validation errors found\n" + ] + }, + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "rocoto.realize(\n", + " config=Path('fixtures/rocoto/simple-workflow.yaml'),\n", + " output_file='tmp/simple-workflow.xml'\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "12b07a2c-7f9c-4480-b165-9e3b5a0ced6d", + "metadata": {}, + "source": [ + "The resulting Rocoto XML file is shown below. An XML header is automatically added without the need to explicitly define it in the UW YAML. Note how blocks from UW YAML language have been transformed into XML tags along with their attributes and values. For example, attributes defined by the `attrs:` block in the UW YAML have become attributes of the `` tag in the XML.\n", + "\n", + "For more information on Rocoto workflows, including tags like the ones shown here and thier attributes, see the Rocoto Documentation.\n", + "" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "d895e1cf-e8af-437b-9a38-2b03ec34f527", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + " 202410290000 202410300000 06:00:00\n", + " logs/test.log\n", + " \n", + " 1\n", + " 00:00:10\n", + " echo Hello, World!\n", + " greet\n", + " \n", + "\n" + ] + } + ], + "source": [ + "%%bash\n", + "cat tmp/simple-workflow.xml" + ] + }, + { + "cell_type": "markdown", + "id": "39f50dbd-e3f3-4e1e-a10b-f622f635b16b", + "metadata": {}, + "source": [ + "The following workflow is missing required components: `workflow` doesn't contain a `realtime` attribute, a `log:` block isn't included, and `task_greet` doesn't include a `command`.\n", + "" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "56d04e6b-5fa2-4c93-ac2f-c95fa23c888e", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "workflow:\n", + " attrs:\n", + " scheduler: slurm\n", + " cycledef:\n", + " - spec: 202410290000 202410300000 06:00:00\n", + " tasks:\n", + " task_greet:\n", + " cores: 1\n", + " walltime: 00:00:10\n" + ] + } + ], + "source": [ + "%%bash\n", + "cat fixtures/rocoto/err-workflow.yaml" + ] + }, + { + "cell_type": "markdown", + "id": "f102c6ca-c8a5-4f9c-8e40-b5fa040bbeab", + "metadata": {}, + "source": [ + "When validation errors occur, `realize()` raises an exception indicating what type of error occurred. Here, the YAML validation errors cause a `UWConfigError` to be raised. The number of validation errors present and their locations within the workflow structure are also shown.\n", + "" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "16704f19-cbca-4765-8c72-16512fc96e9b", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[2024-11-19T23:15:43] ERROR 3 UW schema-validation errors found in Rocoto config\n", + "[2024-11-19T23:15:43] ERROR Error at workflow -> attrs:\n", + "[2024-11-19T23:15:43] ERROR 'realtime' is a required property\n", + "[2024-11-19T23:15:43] ERROR Error at workflow -> tasks -> task_greet:\n", + "[2024-11-19T23:15:43] ERROR 'command' is a required property\n", + "[2024-11-19T23:15:43] ERROR Error at workflow:\n", + "[2024-11-19T23:15:43] ERROR 'log' is a required property\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "YAML validation errors \n" + ] + } + ], + "source": [ + "try:\n", + " rocoto.realize(\n", + " config=Path('fixtures/rocoto/err-workflow.yaml'),\n", + " output_file='tmp/err-workflow.xml'\n", + " )\n", + "except Exception as e:\n", + " print(e, type(e))" + ] + }, + { + "cell_type": "markdown", + "id": "cbad7aef-02b6-43a3-9241-349affa2f71c", + "metadata": {}, + "source": [ + "### Entities and Cyclestrings\n", + "\n", + "Constants called entities may be defined so that their values can be referenced throughout the rest of the Rocoto XML. These are defined in an `entities:` block, with their names and values given as keys and values in the YAML. Below, an entity named `LOG` is defined with a string value. This value is referred elsewhere in the Rocoto XML with the syntax `&ENTITY_NAME;`. In this case, note the `&LOG;` entity within the `log:` block.\n", + "" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "074a6e7e-7c05-4037-b954-06eba8ae2241", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "workflow:\n", + " attrs:\n", + " realtime: false\n", + " scheduler: slurm\n", + " cycledef:\n", + " - spec: 202410290000 202410300000 06:00:00\n", + " entities:\n", + " LOG: \"2024-10-29/test06:00:00.log\"\n", + " log: logs/&LOG;\n", + " tasks:\n", + " task_greet:\n", + " command: echo Hello, World!\n", + " cores: 1\n", + " walltime: 00:00:10\n" + ] + } + ], + "source": [ + "%%bash\n", + "cat fixtures/rocoto/ent-workflow.yaml" + ] + }, + { + "cell_type": "markdown", + "id": "34cbb0e1-ac5f-4764-afbb-9574966542c6", + "metadata": {}, + "source": [ + "Cycle strings represent dynamic cycle time components that are represented by specific flags and are rendered when Rocoto runs the XML. Here, the `LOG` entity contains `@Y`, `@m`, `@d` and `@X` flags that represent the year, month, day, and time relative to a cycle defined by the `cycledefs:` entry. For more information on these flags, see the Rocoto Documentation. A `cyclestr:` block is used to mark a string containing cycle string flags for rendering when Rocoto runs. Here, since the `LOG` entity contains these flags, a `cyclestr:` block within the `log:` block indicates that the flags should be rendered when Rocoto runs. This string itself is contained in a `value` key.\n", + "" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "e4df500f-6b11-4c0e-ac44-3a5443d0ee02", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "workflow:\n", + " attrs:\n", + " realtime: false\n", + " scheduler: slurm\n", + " cycledef:\n", + " - spec: 202410290000 202410300000 06:00:00\n", + " entities:\n", + " LOG: \"@Y-@m-@d/test@X.log\"\n", + " log: \n", + " cyclestr:\n", + " value: logs/&LOG;\n", + " tasks:\n", + " task_greet:\n", + " command: echo Hello, World!\n", + " cores: 1\n", + " walltime: 00:00:10\n" + ] + } + ], + "source": [ + "%%bash\n", + "cat fixtures/rocoto/ent-cs-workflow.yaml" + ] + }, + { + "cell_type": "markdown", + "id": "305485ae-1676-473c-ad4e-5521145b5663", + "metadata": {}, + "source": [ + "As before, the `realize()` function transforms the UW YAML into Rocoto XML.\n", + "" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "5f1b9dcd-87cf-41c3-ab3c-e581e2967214", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[2024-11-19T23:15:43] INFO 0 UW schema-validation errors found in Rocoto config\n", + "[2024-11-19T23:15:43] INFO 0 Rocoto XML validation errors found\n" + ] + }, + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "rocoto.realize(\n", + " config='fixtures/rocoto/ent-cs-workflow.yaml',\n", + " output_file='tmp/ent-cs-workflow.xml'\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "3f28d943-5e39-412b-8708-865a63030e29", + "metadata": {}, + "source": [ + "Here we see the Rocoto XML with the addition of an entity and a `` tag. The entity is defined in the header of the XML document, and the `` tag is added within the `` tag.\n", + "" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "1a1bea14-dbae-4e7d-96f7-0ec552b0e25a", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "]>\n", + "\n", + " 202410290000 202410300000 06:00:00\n", + " \n", + " logs/&LOG;\n", + " \n", + " \n", + " 1\n", + " 00:00:10\n", + " echo Hello, World!\n", + " greet\n", + " \n", + "\n" + ] + } + ], + "source": [ + "%%bash\n", + "cat tmp/ent-cs-workflow.xml" + ] + }, + { + "cell_type": "markdown", + "id": "6b339d55-8506-443e-9b7b-0d943aaf4641", + "metadata": {}, + "source": [ + "### Tasks and Dependencies\n", + "\n", + "A `tasks:` block defines all tasks in a Rocoto workflow. Each task is contained within its own block, where the key is `task_` followed by the name of the task. There are two tasks in the example below, `task_bacon` and `task_eggs`. In the Rocoto XML, two separate `` tags will be created with their `name` attributes set to \"bacon\" and \"eggs\" respectively. Each task must contain a command to execute indicated by the `command` key and an amount of time to request when submitting the task for execution indicated by the `walltime` key. Each task must also contain either a `cores`, `nodes`, or `native` key to request a given number of nodes/cores used to execute the task. The `task_bacon:` block below requests 1 core, while the `task_eggs:` block requests 4 cores on 1 node.\n", + "" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "33c67471-84a1-4b0f-b0bd-40f805e6615f", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "workflow:\n", + " attrs:\n", + " realtime: false\n", + " scheduler: slurm\n", + " cycledef:\n", + " - spec: 202410290000 202410300000 06:00:00\n", + " log: logs/test.log\n", + " tasks:\n", + " task_bacon:\n", + " command: \"echo Cooking bacon...\"\n", + " cores: 1\n", + " walltime: 00:00:10\n", + " task_eggs:\n", + " command: \"echo Cooking eggs...\"\n", + " nodes: 1:ppn=4\n", + " walltime: 00:00:10\n" + ] + } + ], + "source": [ + "%%bash\n", + "cat fixtures/rocoto/tasks-workflow.yaml" + ] + }, + { + "cell_type": "markdown", + "id": "08e9c593-7ba1-45da-8f1a-99389c9dfdf5", + "metadata": {}, + "source": [ + "Each task may optionally have one or more dependencies that must be accounted for before a task runs. These are specified using a `dependency:` block within the `task_*` block that the dependencies apply to. Dependencies are structured as boolean expressions using a variety of keys that may define specific types of dependencies like task or data dependencies. They may also group dependencies together using boolean operators keys like `and` or `or`. For a full list of possible tags, see the Rocoto Documentation. \n", + "\n", + "Below, the `task_eggs:` block includes one data dependency indicated by the `datadep` key, plus a `value` key that identifies the required data. The `task_serve:` block includes two task dependencies for the bacon and eggs tasks. Since there are multiple dependencies here, they need to be contained within a boolean operator block that describes how to deal with the group of dependencies which may not all have the same level of completion. Here the `and:` block indicates that all of the individual tasks (i.e. `task_eggs`) within need to be completed. The two task dependencies must have unique names since they exist at the same level, and they are differentiated here using the `_name` suffix. To prevent circular dependencies, task dependencies must have a `task` attribute that indicates the name of a task that is already defined above it. Similar to the `workflow:` block, an `attrs:` block is used here to add attributes to `taskdep`, and the `task` key specifies the value of the task attribute.\n", + "" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "be4d4488-9ff6-4a6d-ace0-464e21f31116", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "workflow:\n", + " attrs:\n", + " realtime: false\n", + " scheduler: slurm\n", + " cycledef:\n", + " - spec: 202410290000 202410300000 06:00:00\n", + " log: logs/test.log\n", + " tasks:\n", + " task_bacon:\n", + " command: \"echo Cooking bacon...\"\n", + " cores: 1\n", + " walltime: 00:00:10\n", + " task_eggs:\n", + " command: \"echo Cooking eggs...\"\n", + " nodes: 1:ppn=4\n", + " walltime: 00:00:10\n", + " dependency:\n", + " datadep:\n", + " value: eggs_recipe.txt\n", + " task_serve:\n", + " command: \"echo Serving breakfast...\"\n", + " cores: 2\n", + " walltime: 00:00:01\n", + " dependency:\n", + " and:\n", + " taskdep_eggs:\n", + " attrs:\n", + " task: bacon\n", + " taskdep_bacon:\n", + " attrs:\n", + " task: eggs\n" + ] + } + ], + "source": [ + "%%bash\n", + "cat fixtures/rocoto/tasks-deps-workflow.yaml" + ] + }, + { + "cell_type": "markdown", + "id": "c6015712-f144-4640-9be8-1cd651df74b9", + "metadata": {}, + "source": [ + "Here, the `realize()` function transforms this UW YAML into Rocoto XML.\n", + "" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "001ac8ed-0f31-4012-bc63-55d63848e1d4", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[2024-11-19T23:15:43] INFO 0 UW schema-validation errors found in Rocoto config\n", + "[2024-11-19T23:15:43] INFO 0 Rocoto XML validation errors found\n" + ] + }, + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "rocoto.realize(\n", + " config='fixtures/rocoto/tasks-deps-workflow.yaml',\n", + " output_file='tmp/tasks-deps-workflow.xml'\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "dc87e5aa-aa21-4bbf-8204-389c355a1bbd", + "metadata": {}, + "source": [ + "Note how each task has its own tag in the Rocoto XML document, with name attributes that came from the unique suffixes of the `task_` keys. While the bacon task contains no `` tag, the eggs and serve tasks do. Within the serve task's dependencies, the `` tag describes the need for both of the two task dependencies to be fulfilled. Each `` task dependency uses the `task` attribute to point to a previously named task. \n", + "" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "7b9ecc04-9851-4e34-985f-d908285dc8e2", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + " 202410290000 202410300000 06:00:00\n", + " logs/test.log\n", + " \n", + " 1\n", + " 00:00:10\n", + " echo Cooking bacon...\n", + " bacon\n", + " \n", + " \n", + " 1:ppn=4\n", + " 00:00:10\n", + " echo Cooking eggs...\n", + " eggs\n", + " \n", + " eggs_recipe.txt\n", + " \n", + " \n", + " \n", + " 2\n", + " 00:00:01\n", + " echo Serving breakfast...\n", + " serve\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n" + ] + } + ], + "source": [ + "%%bash\n", + "cat tmp/tasks-deps-workflow.xml" + ] + }, + { + "cell_type": "markdown", + "id": "f88d22e0-3433-482d-aeb3-44bbb8cf840f", + "metadata": {}, + "source": [ + "### Metatasks\n", + "\n", + "Metatasks define one or more tasks that are similar to one another using a substitution of values. Like tasks, metatask block keys use a suffix after an underscore to name a particular metatask. The metatask in the example below will have a `name=breakfast` attribute in its `` tag in the XML document. The values to substitute are defined in a `var:` block, and this block contains one or more keys representing the name of a list of values. The values in the list are separated by spaces. The number of tasks defined by a metatask is equal to the number of values in any list in the `var:` block. In the example below, two lists named `food` and `prepare` contain three values each, so three tasks are defined by this metatask. It is necessary that each list defined in a metatask has the same number of values. The values are referenced using the name of the list that contains the values bracketed by pound signs, as seen in the `task_#food#` key and in the following `command` string. \n", + "" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "6c8a8b8e-62b0-47f1-b5e3-763aef2e71ea", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "workflow:\n", + " attrs:\n", + " realtime: false\n", + " scheduler: slurm\n", + " cycledef:\n", + " - spec: 202410290000 202410300000 06:00:00\n", + " log: logs/test.log\n", + " tasks:\n", + " metatask_breakfast:\n", + " var:\n", + " food: biscuits OJ hashbrowns\n", + " prepare: bake pour fry\n", + " task_#food#:\n", + " command: \"echo It's time for breakfast, #prepare# the #food#!\"\n", + " cores: 1\n", + " walltime: 00:00:03\n" + ] + } + ], + "source": [ + "%%bash\n", + "cat fixtures/rocoto/meta-workflow.yaml" + ] + }, + { + "cell_type": "markdown", + "id": "8c2457c5-e305-4c4c-9020-3eb5553d2ba6", + "metadata": {}, + "source": [ + "Similar to previous examples, `realize()` transforms the metatask workflow to Rocoto XML.\n", + "" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "112a7586-5fef-46a0-83a5-f7016257fe9b", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[2024-11-19T23:15:43] INFO 0 UW schema-validation errors found in Rocoto config\n", + "[2024-11-19T23:15:43] INFO 0 Rocoto XML validation errors found\n" + ] + }, + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "rocoto.realize(\n", + " config='fixtures/rocoto/meta-workflow.yaml',\n", + " output_file='tmp/meta-workflow.xml'\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "0b078044-50fb-4158-b68e-aa0001621f7b", + "metadata": {}, + "source": [ + "The XML document below shows how the `` tag and each of its child tags efficiently define multiple similar tasks. Like previous examples, name attributes for task-related tags are created here from the suffixes of their keys in the UW YAML. Note that `` names were derived from full key names in the `var:` block. The ``, ``, and `` tags each contain strings that will receive substitute values wherever the placeholders `#food#` or `#prepare#` appear.\n", + "" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "3ac7f1af-44cf-440a-8b4f-6f63e4e98fee", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + " 202410290000 202410300000 06:00:00\n", + " logs/test.log\n", + " \n", + " biscuits OJ hashbrowns\n", + " bake pour fry\n", + " \n", + " 1\n", + " 00:00:03\n", + " echo It's time for breakfast, #prepare# the #food#!\n", + " #food#\n", + " \n", + " \n", + "\n" + ] + } + ], + "source": [ + "%%bash\n", + "cat tmp/meta-workflow.xml" + ] + }, + { + "cell_type": "markdown", + "id": "d9c5ccad-07c4-4aad-aadd-e0343949f9fa", + "metadata": {}, + "source": [ + "Metatasks may be nested to create tasks using combinatorial lists of variables. This will create sets of tasks where each `var` value in a parent metatask applies to every child metatask. In the example below, a parent metatask contains a `var` named `process` with values `bake`, `cool`, and `store`. Its child metatask contains a `var` named `food` with values `cookies` and `cakes`. Tasks will be created to bake, cool, and store both cookies and cakes. Note that `var:` blocks at different levels do not necessarily contain the same number of values. \n", + "" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "69797f73-2475-449f-b036-2529f4379440", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "workflow:\n", + " attrs:\n", + " realtime: false\n", + " scheduler: slurm\n", + " cycledef:\n", + " - spec: 202410290000 202410300000 06:00:00\n", + " log: logs/test.log\n", + " tasks:\n", + " metatask_process:\n", + " var:\n", + " process: bake cool store\n", + " metatask_process_food:\n", + " var:\n", + " food: cookies cakes\n", + " task_#process#_#food#:\n", + " command: \"echo It's time to #process# the #food#.\"\n", + " nodes: 1:ppn=4\n", + " walltime: 00:00:30\n" + ] + } + ], + "source": [ + "%%bash\n", + "cat fixtures/rocoto/meta-nested-workflow.yaml" + ] + }, + { + "cell_type": "markdown", + "id": "40e85303-be55-4ebc-8f78-d9a0373cbf09", + "metadata": {}, + "source": [ + "## Validating Workflows\n", + "\n", + "The `rocoto.validate()` function checks the content of a Rocoto XML file against its schema, detecting and reporting any errors.\n", + "" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "6a681e6d-800c-4d9f-87a0-270e72dcb7be", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Help on function validate in module uwtools.api.rocoto:\n", + "\n", + "validate(xml_file: Union[str, pathlib.Path, NoneType] = None, stdin_ok: bool = False) -> bool\n", + " Validate purported Rocoto XML file against its schema.\n", + "\n", + " :param xml_file: Path to XML file (``None`` or unspecified => read ``stdin``).\n", + " :param stdin_ok: OK to read from ``stdin``?\n", + " :return: ``True`` if the XML conforms to the schema, ``False`` otherwise.\n", + "\n" + ] + } + ], + "source": [ + "help(rocoto.validate)" + ] + }, + { + "cell_type": "markdown", + "id": "91f72e92-083c-4c36-90d8-1860133fe33b", + "metadata": {}, + "source": [ + "The following Rocoto XML is identical that generated in the [Building Rocoto Workflows with UW YAML](#Building-Rocoto-Workflows-with-UW-YAML) section above.\n", + "" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "22a8fe77-2094-4139-9ff2-91dc897c3af3", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + " 202410290000 202410300000 06:00:00\n", + " logs/test.log\n", + " \n", + " 1\n", + " 00:00:10\n", + " echo Hello, World!\n", + " greet\n", + " \n", + "\n" + ] + } + ], + "source": [ + "%%bash\n", + "cat fixtures/rocoto/simple-workflow.xml" + ] + }, + { + "cell_type": "markdown", + "id": "bb04cf78-1afd-48eb-8673-803fbabac836", + "metadata": {}, + "source": [ + "`validate()` accepts Path objects or string paths passed via the `xml_file` parameter. (If `xml_file` is omitted or `None`, and `stdin_ok` is `True`, XML will be read from `stdin`, but this is a rare use case that won't be covered here.) The function returns `True` if the XML is validated without any errors, and `False` otherwise. The number of schema-validation errors, as well as details on the errors (if any), are reported.\n", + "" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "fcbf5ffd-7722-4801-b6f7-5867248d471d", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[2024-11-19T23:15:43] INFO 0 Rocoto XML validation errors found\n" + ] + }, + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "rocoto.validate(\n", + " xml_file=\"fixtures/rocoto/simple-workflow.xml\"\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "57e975e2-c46e-421b-8b5a-45c4f62afdd0", + "metadata": {}, + "source": [ + "The following Rocoto XML is missing two required components: ``'s `scheduler` attribute and a `` tag.\n", + "" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "9788207b-3c1f-4b60-bd4d-9c8a75666b24", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + " logs/test.log\n", + " \n", + " 1\n", + " 00:00:10\n", + " echo Hello, World!\n", + " greet\n", + " \n", + "\n" + ] + } + ], + "source": [ + "%%bash\n", + "cat fixtures/rocoto/err-workflow.xml" + ] + }, + { + "cell_type": "markdown", + "id": "f407c4a5-a5a6-4546-b23b-390c9bb52f9b", + "metadata": {}, + "source": [ + "When Rocoto validation errors are found, `validate()` returns `False`. Details are reported regarding the types of errors and number of errors found. For more information on required Rocoto XML components, see the Rocoto Documentation.\n", + "" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "54fcfb54-361d-47ef-9379-4b235fa54316", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[2024-11-19T23:15:43] ERROR 4 Rocoto XML validation errors found\n", + "[2024-11-19T23:15:43] ERROR :2:0:ERROR:RELAXNGV:RELAXNG_ERR_ATTRVALID: Element workflow failed to validate attributes\n", + "[2024-11-19T23:15:43] ERROR :2:0:ERROR:RELAXNGV:RELAXNG_ERR_NOELEM: Expecting an element cycledef, got nothing\n", + "[2024-11-19T23:15:43] ERROR :2:0:ERROR:RELAXNGV:RELAXNG_ERR_INTERSEQ: Invalid sequence in interleave\n", + "[2024-11-19T23:15:43] ERROR :2:0:ERROR:RELAXNGV:RELAXNG_ERR_CONTENTVALID: Element workflow failed to validate content\n", + "[2024-11-19T23:15:43] ERROR Invalid Rocoto XML:\n", + "[2024-11-19T23:15:43] ERROR 1 \n", + "[2024-11-19T23:15:43] ERROR 2 \n", + "[2024-11-19T23:15:43] ERROR 3 logs/test.log\n", + "[2024-11-19T23:15:43] ERROR 4 \n", + "[2024-11-19T23:15:43] ERROR 5 1\n", + "[2024-11-19T23:15:43] ERROR 6 00:00:10\n", + "[2024-11-19T23:15:43] ERROR 7 echo Hello, World!\n", + "[2024-11-19T23:15:43] ERROR 8 greet\n", + "[2024-11-19T23:15:43] ERROR 9 \n", + "[2024-11-19T23:15:43] ERROR 10 \n" + ] + }, + { + "data": { + "text/plain": [ + "False" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "rocoto.validate(\n", + " xml_file=Path(\"fixtures/rocoto/err-workflow.xml\")\n", + ")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.7" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/notebooks/template.ipynb b/notebooks/template.ipynb new file mode 100644 index 000000000..5e5627d05 --- /dev/null +++ b/notebooks/template.ipynb @@ -0,0 +1,511 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "50994576-4783-4e8c-a6b0-21b1f258685d", + "metadata": {}, + "source": [ + "# Template Tool\n", + "\n", + "The `uwtools` API's `template` module provides functions to render Jinja2 templates and to translate atparse templates to Jinja2.\n", + "\n", + "Tested on `uwtools` version 2.4.2. For more information, please see the uwtools.api.template Read the Docs page." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "cbec4cc0-369e-41ff-a8a6-8a2699cb5759", + "metadata": {}, + "outputs": [], + "source": [ + "from uwtools.api import template\n", + "from pathlib import Path" + ] + }, + { + "cell_type": "markdown", + "id": "017c777a-1ca0-4fef-873f-89924e209da8", + "metadata": {}, + "source": [ + "## render\n", + "\n", + "`template.render()` renders a Jinja2 template using values provided by the specified values source." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "54e88f1b-0b9f-4011-b070-df107f928cf9", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Help on function render in module uwtools.api.template:\n", + "\n", + "render(values_src: Union[dict, str, pathlib.Path, NoneType] = None, values_format: Optional[str] = None, input_file: Union[str, pathlib.Path, NoneType] = None, output_file: Union[str, pathlib.Path, NoneType] = None, overrides: Optional[dict[str, str]] = None, env: bool = False, searchpath: Optional[list[str]] = None, values_needed: bool = False, dry_run: bool = False, stdin_ok: bool = False) -> str\n", + " Render a Jinja2 template to a file, based on specified values.\n", + "\n", + " Primary values used to render the template are taken from the specified file. The format of the\n", + " values source will be deduced from the filename extension, if possible. This can be overridden\n", + " via the ``values_format`` argument. A ``dict`` object may alternatively be provided as the\n", + " primary values source. If no input file is specified, ``stdin`` is read. If no output file is\n", + " specified, ``stdout`` is written to.\n", + "\n", + " :param values_src: Source of values to render the template.\n", + " :param values_format: Format of values when sourced from file.\n", + " :param input_file: Raw input template file (``None`` => read ``stdin``).\n", + " :param output_file: Rendered template output file (``None`` => write to ``stdout``).\n", + " :param overrides: Supplemental override values.\n", + " :param env: Supplement values with environment variables?\n", + " :param searchpath: Paths to search for extra templates.\n", + " :param values_needed: Just report variables needed to render the template?\n", + " :param dry_run: Run in dry-run mode?\n", + " :param stdin_ok: OK to read from ``stdin``?\n", + " :return: The rendered template string.\n", + " :raises: UWTemplateRenderError if template could not be rendered.\n", + "\n" + ] + } + ], + "source": [ + "help(template.render)" + ] + }, + { + "cell_type": "markdown", + "id": "28a3415f-3f0e-42c7-8be2-bc94057e8510", + "metadata": {}, + "source": [ + "Consider the following template, to be rendered as YAML data:" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "91bd29fd-77ba-4ea2-946f-cd7a2d9301f1", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "user:\n", + " name: {{ first }} {{ last }}\n", + " favorite_food: {{ food }}\n" + ] + } + ], + "source": [ + "%%bash\n", + "cat fixtures/template/render-template.yaml" + ] + }, + { + "cell_type": "markdown", + "id": "d335aec6-5fcd-4df2-ae2e-8afa1a510683", + "metadata": {}, + "source": [ + "The `values_needed` parameter can be used to display which values are needed to complete the template. A logger needs to be initialized for the log of the missing values to be displayed." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "040eceb1-0821-4e82-825a-5be18f06397d", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[2024-11-19T23:16:37] INFO Value(s) needed to render this template are:\n", + "[2024-11-19T23:16:37] INFO first\n", + "[2024-11-19T23:16:37] INFO food\n", + "[2024-11-19T23:16:37] INFO last\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "user:\n", + " name: {{ first }} {{ last }}\n", + " favorite_food: {{ food }}\n", + "\n" + ] + } + ], + "source": [ + "import uwtools.logging\n", + "uwtools.logging.setup_logging(verbose=False)\n", + "\n", + "print(\n", + " template.render(\n", + " input_file='fixtures/template/render-template.yaml',\n", + " values_needed=True\n", + " )\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "6d6913b9-a375-447a-b729-566afe84f694", + "metadata": {}, + "source": [ + "The log messages indicate that values are needed for keys `first`, `food`, and `last`. These values can be sourced from a Python dictionary or from a file. The following file provides the needed values:" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "7f794c66-8840-419a-adf5-20efddb85708", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "first: John\n", + "last: Doe\n", + "food: burritos\n" + ] + } + ], + "source": [ + "%%bash\n", + "cat fixtures/template/render-values.yaml" + ] + }, + { + "cell_type": "markdown", + "id": "9c809a20-b09d-438a-a1da-3cb9986ce9fa", + "metadata": {}, + "source": [ + "With these values, we can render the template to a file. When the source of values is a file, its path can be given either as a string or a Path object. If it has an unrecognized (or no) extension, its format can be specified with `values_format`. The rendered template can be written to a file specified with `output_file`; otherwise, it will be written to `stdout`. " + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "834b7a40-293e-4d35-81e8-121eed4cf8f2", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "user:\n", + " name: John Doe\n", + " favorite_food: burritos\n" + ] + } + ], + "source": [ + "print(\n", + " template.render(\n", + " values_src=Path('fixtures/template/render-values.yaml'),\n", + " values_format='yaml',\n", + " input_file='fixtures/template/render-template.yaml',\n", + " output_file='fixtures/template/render-complete-1.yaml'\n", + " )\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "c45b0ac1-23d6-4d25-a691-7bc4f482429e", + "metadata": {}, + "source": [ + "Values can be selectively overridden with a dictionary passed via the optional `overrides` argument." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "55eec4f4-4f91-4618-8382-78061907bd2a", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "user:\n", + " name: Jane Doe\n", + " favorite_food: tamales\n" + ] + } + ], + "source": [ + "print(\n", + " template.render(\n", + " values_src=Path('fixtures/template/render-values.yaml'),\n", + " values_format='yaml',\n", + " input_file='fixtures/template/render-template.yaml',\n", + " output_file='fixtures/template/render-complete-2.yaml',\n", + " overrides={'first':'Jane', 'food':'tamales'}\n", + " )\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "861e1e96-58b6-4537-bc7d-7986b450e774", + "metadata": {}, + "source": [ + "Let's take a look at the two newly rendered files." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "8601d4d9-5e53-44b7-880c-666ab810d8b8", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "user:\n", + " name: John Doe\n", + " favorite_food: burritos\n", + "---------------------------------------\n", + "user:\n", + " name: Jane Doe\n", + " favorite_food: tamales\n" + ] + } + ], + "source": [ + "%%bash\n", + "cat fixtures/template/render-complete-1.yaml\n", + "echo ---------------------------------------\n", + "cat fixtures/template/render-complete-2.yaml" + ] + }, + { + "cell_type": "markdown", + "id": "58cbbfbf-b47d-485d-9ef3-80b067316d11", + "metadata": {}, + "source": [ + "## render_to_str\n", + "\n", + "`template.render_to_str()` is identical to `template.render()` except that it does not accept an `output_file` parameter: It returns the rendered template as a string and does not write to a file or to `stdout`." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "ff8b80b2-590c-476f-94f7-37c4f34932f7", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Help on function render_to_str in module uwtools.api.template:\n", + "\n", + "render_to_str(values_src: Union[dict, str, pathlib.Path, NoneType] = None, values_format: Optional[str] = None, input_file: Union[str, pathlib.Path, NoneType] = None, overrides: Optional[dict[str, str]] = None, env: bool = False, searchpath: Optional[list[str]] = None, values_needed: bool = False, dry_run: bool = False) -> str\n", + " Render a Jinja2 template to a string, based on specified values.\n", + "\n", + " See ``render()`` for details on arguments, etc.\n", + "\n" + ] + } + ], + "source": [ + "help(template.render_to_str)" + ] + }, + { + "cell_type": "markdown", + "id": "65905cf1-679d-46ef-96e6-23e0c952947c", + "metadata": {}, + "source": [ + "We can see the resulting string using the same template and values from the first `template.render()` example." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "1f2cec30-0761-42f4-85fc-05593e215b23", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "user:\n", + " name: John Doe\n", + " favorite_food: burritos\n" + ] + } + ], + "source": [ + "result = template.render_to_str(\n", + " values_src=Path('fixtures/template/render-values.yaml'),\n", + " values_format='yaml',\n", + " input_file='fixtures/template/render-template.yaml'\n", + ")\n", + "print(result)" + ] + }, + { + "cell_type": "markdown", + "id": "fe45be03-c1aa-4c50-a21b-3f35180569b4", + "metadata": {}, + "source": [ + "For more examples, please refer to the render section above.\n", + "\n", + "## translate\n", + "\n", + "This function can be used to translate atparse templates into Jinja2 templates by replacing `@[]` tokens with their corresponding `{{}}` Jinja2 equivalents. " + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "2ddcefac-030d-415c-a97f-eab9e176e811", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Help on function translate in module uwtools.api.template:\n", + "\n", + "translate(input_file: Union[str, pathlib.Path, NoneType] = None, output_file: Union[str, pathlib.Path, NoneType] = None, dry_run: bool = False, stdin_ok: bool = False) -> bool\n", + " Translate an atparse template to a Jinja2 template.\n", + "\n", + " ``@[]`` tokens are replaced with Jinja2 ``{{}}`` equivalents. If no input file is specified,\n", + " ``stdin`` is read. If no output file is specified, ``stdout`` is written to. In ``dry_run``\n", + " mode, output is written to ``stderr``.\n", + "\n", + " :param input_file: Path to atparse file (``None`` => read ``stdin``).\n", + " :param output_file: Path to the file to write the converted template to.\n", + " :param dry_run: Run in dry-run mode?\n", + " :param stdin_ok: OK to read from ``stdin``?\n", + " :return: ``True``.\n", + "\n" + ] + } + ], + "source": [ + "help(template.translate)" + ] + }, + { + "cell_type": "markdown", + "id": "1340097f-5ace-482d-bd13-01b426e768a1", + "metadata": {}, + "source": [ + "The template tool works with atparse templates like the one shown below." + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "66fbde65-2c4e-48fa-bc49-c4faec78f944", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "flowers:\n", + " roses: @[color1]\n", + " violets: @[color2]\n" + ] + } + ], + "source": [ + "%%bash\n", + "cat 'fixtures/template/translate-template.yaml'" + ] + }, + { + "cell_type": "markdown", + "id": "62d87063-2cd0-40de-bf02-dee0ace11d5a", + "metadata": {}, + "source": [ + "We can translate this file to a Jinja2 template by passing appropriate `input_file` and `output_file` (either `str` or Path) values to `template.render()`." + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "bab9026c-9f5a-435d-b8a2-71fa2a325109", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "template.translate(\n", + " input_file=Path('fixtures/template/translate-template.yaml'),\n", + " output_file='fixtures/template/translate-complete.yaml'\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "e8712324-39a1-49d1-b2c6-dce2907b149e", + "metadata": {}, + "source": [ + "Now we have created a Jinja2 template that can be rendered using `template.render()` or `template.render_to_str()`." + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "5f30435c-e253-4f8a-a8e7-6bdbd8be92c9", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "flowers:\n", + " roses: {{ color1 }}\n", + " violets: {{ color2 }}\n" + ] + } + ], + "source": [ + "%%bash\n", + "cat 'fixtures/template/translate-complete.yaml'" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.7" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/notebooks/tests/test_config.py b/notebooks/tests/test_config.py new file mode 100644 index 000000000..4f09b24fb --- /dev/null +++ b/notebooks/tests/test_config.py @@ -0,0 +1,152 @@ +from textwrap import dedent + +import yaml +from testbook import testbook + + +def test_get_config(): + with open("fixtures/config/get-config.yaml", "r", encoding="utf-8") as f: + config1_str = f.read().rstrip() + config1_dict = yaml.safe_load(config1_str) + with testbook("config.ipynb", execute=True) as tb: + assert tb.ref("config1") == config1_dict + assert tb.cell_output_text(5) == config1_str + assert config1_str in tb.cell_output_text(7) + assert tb.cell_output_text(9) == "message:\n greeting: Hi\n recipient: Earth" + + +def test_depth(): + with testbook("config.ipynb", execute=True) as tb: + assert tb.cell_output_text(11) == "greeting=Salutations\nrecipient=Mars" + assert tb.cell_output_text(13) == "Cannot instantiate depth-1 SHConfig with depth-2 config" + assert tb.cell_output_text(15) == "[message]\ngreeting = Salutations\nrecipient = Mars" + assert tb.cell_output_text(17) == "Cannot instantiate depth-2 INIConfig with depth-1 config" + + +def test_realize(): + # Get config file data to compare to cell output. + with open("fixtures/config/get-config.yaml", "r", encoding="utf-8") as f: + config_str = f.read().rstrip() + config_dict = yaml.safe_load(config_str) + with open("fixtures/config/base-config.nml", "r", encoding="utf-8") as f: + update_config_str = f.read().rstrip() + with open("fixtures/config/keys-config.yaml", "r", encoding="utf-8") as f: + keys_config_str = f.read().rstrip() + with testbook("config.ipynb", execute=True) as tb: + with open("tmp/updated-config.nml", "r", encoding="utf-8") as f: + updated_config = f.read().rstrip() + with open("tmp/config-total.nml", "r", encoding="utf-8") as f: + total_config = f.read().rstrip() + # Ensure that cell output text matches expectations. + assert tb.cell_output_text(21) == str(config_dict) + assert tb.cell_output_text(23) == config_str + assert tb.cell_output_text(25) == str(config_dict) + for item in config_dict.items(): + assert item[0] + "=" + item[1] in tb.cell_output_text(27) + assert tb.cell_output_text(29) == update_config_str + updated_dict = ( + "'sender_id': '{{ id }}'", + "'message': 'Salutations, Mars!'", + "'sent': True", + ) + assert all(x in tb.cell_output_text(31) for x in updated_dict) + assert tb.cell_output_text(33) == updated_config + assert tb.cell_output_text(35) == keys_config_str + assert tb.cell_output_text(37) == "message: Good morning, Venus!" + assert tb.cell_output_text(39) == update_config_str + expected_log = ( + "memo.sender_id: {{ id }}", + "memo.message: {{ greeting }}, {{ recipient }}!", + ) + assert all(x in tb.cell_output_text(41) for x in expected_log) + assert tb.cell_output_text(43) == update_config_str + assert tb.cell_output_text(45) == "Config could not be totally realized" + total_dict = ("'sender_id': '321'", "'message': 'Salutations, Mars!'", "'sent': True") + assert all(x in tb.cell_output_text(47) for x in total_dict) + assert tb.cell_output_text(49) == total_config + + +def test_realize_to_dict(): + with open("fixtures/config/get-config.yaml", "r", encoding="utf-8") as f: + config_str = f.read().rstrip() + with testbook("config.ipynb", execute=True) as tb: + assert tb.cell_output_text(51) == config_str + config_out = ("'id': '456'", "'greeting': 'Hello'", "'recipient': 'World'") + assert all(x in tb.cell_output_text(53) for x in config_out) + + +def test_compare(): + with open("fixtures/config/base-config.nml", "r", encoding="utf-8") as f: + base_cfg = f.read().rstrip() + with open("fixtures/config/alt-config.nml", "r", encoding="utf-8") as f: + alt_cfg = f.read().rstrip() + with open("tmp/config-copy.nml", "r", encoding="utf-8") as f: + cp_cfg = f.read().rstrip() + with testbook("config.ipynb", execute=True) as tb: + assert base_cfg in tb.cell_output_text(57) + assert alt_cfg in tb.cell_output_text(57) + diff_cmp = """ + INFO - fixtures/config/base-config.nml + INFO + fixtures/config/alt-config.nml + INFO --------------------------------------------------------------------- + INFO ↓ ? = info | -/+ = line unique to - or + file | blank = matching line + INFO --------------------------------------------------------------------- + INFO memo: + INFO message: '{{ greeting }}, {{ recipient }}!' + INFO sender_id: '{{ id }}' + INFO - sent: false + INFO + sent: true + """ + assert all(x in tb.cell_output_text(59) for x in dedent(diff_cmp).strip().split("\n")) + assert base_cfg == cp_cfg # cell 61 creates this copy + same_cmp = ("INFO - fixtures/config/base-config.nml", "INFO + tmp/config-copy.nml", "True") + assert all(x in tb.cell_output_text(63) for x in same_cmp) + assert "ERROR Formats do not match: yaml vs nml" in tb.cell_output_text(65) + + +def test_validate(): + with open("fixtures/config/get-config.yaml", "r", encoding="utf-8") as f: + cfg = f.read().rstrip() + with open("fixtures/config/validate.jsonschema", "r", encoding="utf-8") as f: + schema = f.read().rstrip() + with testbook("config.ipynb", execute=True) as tb: + assert tb.cell_output_text(69) == cfg + assert tb.cell_output_text(71) == schema + valid_out = ("INFO 0 UW schema-validation errors found", "True") + assert all(x in tb.cell_output_text(73) for x in valid_out) + invalid_out = ( + "ERROR 1 UW schema-validation error found", + "ERROR 47 is not of type 'string'", + "False", + ) + assert all(x in tb.cell_output_text(75) for x in invalid_out) + + +def test_cfg_classes(): + with open("fixtures/config/fruit-config.ini", "r", encoding="utf-8") as f: + cfg = f.read().rstrip() + with testbook("config.ipynb", execute=True) as tb: + with open("tmp/fruits.ini", "r", encoding="utf-8") as f: + dump = f.read().rstrip() + assert tb.cell_output_text(79) == cfg + assert tb.cell_output_text(81) == "True" + diff_cmp = """ + INFO --------------------------------------------------------------------- + INFO ↓ ? = info | -/+ = line unique to - or + file | blank = matching line + INFO --------------------------------------------------------------------- + INFO fruit count: + INFO apples: '3' + INFO - grapes: '{{ grape_count }}' + INFO + grapes: '8' + INFO - kiwis: '2' + INFO ? ^ + INFO + kiwis: '1' + INFO ? ^ + """ + assert all(x in tb.cell_output_text(83) for x in dedent(diff_cmp).strip().split("\n")) + assert "grapes = 15" in tb.cell_output_text(85) + assert tb.cell_output_text(89) == dump + dump_dict = ("[fruit count]", "oranges = 4", "blueberries = 9") + assert all(x in tb.cell_output_text(91) for x in dump_dict) + updated_vals = ("kiwis = 4", "raspberries = 12") + assert all(x in tb.cell_output_text(93) for x in updated_vals) diff --git a/notebooks/tests/test_exp_config_cb.py b/notebooks/tests/test_exp_config_cb.py new file mode 100644 index 000000000..b806948a8 --- /dev/null +++ b/notebooks/tests/test_exp_config_cb.py @@ -0,0 +1,35 @@ +from testbook import testbook +from uwtools.config.formats.yaml import YAMLConfig + + +def test_exp_config(): + with open("fixtures/exp-config/base-file.yaml", "r", encoding="utf-8") as f: + base_cfg = f.read().rstrip() + with open("fixtures/exp-config/fv3-rap-physics.yaml", "r", encoding="utf-8") as f: + fv3_rap_phys = f.read().rstrip() + with open("fixtures/exp-config/user.yaml", "r", encoding="utf-8") as f: + user_cfg = f.read().rstrip() + with testbook("exp-config-cb.ipynb", execute=True) as tb: + assert tb.cell_output_text(1) == "" + assert tb.cell_output_text(3) == base_cfg + assert tb.cell_output_text(5) == fv3_rap_phys + assert tb.cell_output_text(7) == user_cfg + assert tb.cell_output_text(9) == str(YAMLConfig("fixtures/exp-config/base-file.yaml")) + updated_cfg = ( + "cycle_day: !int '{{ cycle.strftime(''%d'') }}'", + "varmap_file: '{{ user.PARMdir }}/ufs_utils/varmap_tables/GSDphys_var_map.txt'", + "PARMdir: /path/to/ufs-srweather-app/parm", + ) + assert all(x in tb.cell_output_text(11) for x in updated_cfg) + deref_cfg = ( + "data_dir_input_grid: /path/to/my/output/make_ics", + "rundir: /path/to/my/output/make_ics", + ) + assert all(x in tb.cell_output_text(13) for x in deref_cfg) + validate_out = ( + "INFO Validating config against internal schema: chgres-cube", + "INFO 0 UW schema-validation errors found", + "INFO Validating config against internal schema: platform", + "chgres_cube valid schema: State: Ready", + ) + assert all(x in tb.cell_output_text(15) for x in validate_out) diff --git a/notebooks/tests/test_fs.py b/notebooks/tests/test_fs.py new file mode 100644 index 000000000..b5b0b2607 --- /dev/null +++ b/notebooks/tests/test_fs.py @@ -0,0 +1,100 @@ +import os + +import yaml +from testbook import testbook + + +def test_copy(): + # Get the config files as text and dictionaries. + with open("fixtures/fs/copy-config.yaml", "r", encoding="utf-8") as f: + config_str = f.read().rstrip() + config_dict = yaml.safe_load(config_str) + with open("fixtures/fs/copy-keys-config.yaml", "r", encoding="utf-8") as f: + config_keys_str = f.read().rstrip() + config_keys_dict = yaml.safe_load(config_keys_str) + with testbook("fs.ipynb", execute=True) as tb: + # Each key in each config should have created a copy of the file given by each value. + for item in config_dict.items(): + with open("tmp/copy-target/" + item[0], "r", encoding="utf-8") as f: + copy_dst_txt = f.read().rstrip() + with open("tmp/copier-target/" + item[0], "r", encoding="utf-8") as f: + copier_dst_txt = f.read().rstrip() + with open(item[1], "r", encoding="utf-8") as f: + src_txt = f.read().rstrip() + assert copy_dst_txt == src_txt + assert copier_dst_txt == src_txt + for item in config_keys_dict["files"]["to"]["copy"].items(): + with open("tmp/copy-keys-target/" + item[0], "r", encoding="utf-8") as f: + copy_keys_dst_txt = f.read().rstrip() + with open(item[1], "r", encoding="utf-8") as f: + src_txt = f.read().rstrip() + assert copy_keys_dst_txt == src_txt + # Ensure that cell output text matches expectations. + assert tb.cell_output_text(5) == config_str + assert "True" in tb.cell_output_text(7) + assert "False" in tb.cell_output_text(11) + assert tb.cell_output_text(13) == tb.cell_output_text(9) + assert tb.cell_output_text(15) == config_keys_str + assert "True" in tb.cell_output_text(17) + + +def test_link(): + # Get the config files as text and dictionaries. + with open("fixtures/fs/link-config.yaml", "r", encoding="utf-8") as f: + config_str = f.read().rstrip() + config_dict = yaml.safe_load(config_str) + with open("fixtures/fs/link-keys-config.yaml", "r", encoding="utf-8") as f: + config_keys_str = f.read().rstrip() + config_keys_dict = yaml.safe_load(config_keys_str) + with testbook("fs.ipynb", execute=True) as tb: + # Each key in each config should have created a symlink of the file given by each value. + for item in config_dict.items(): + link_path = "tmp/link-target/" + item[0] + linker_path = "tmp/linker-target/" + item[0] + with open(link_path, "r", encoding="utf-8") as f: + link_dst_txt = f.read().rstrip() + with open(linker_path, "r", encoding="utf-8") as f: + linker_dst_txt = f.read().rstrip() + with open(item[1], "r", encoding="utf-8") as f: + src_txt = f.read().rstrip() + assert os.path.islink(link_path) + assert link_dst_txt == src_txt + assert os.path.islink(linker_path) + assert linker_dst_txt == src_txt + for item in config_keys_dict["files"]["to"]["link"].items(): + link_keys_path = "tmp/link-keys-target/" + item[0] + with open(link_keys_path, "r", encoding="utf-8") as f: + link_keys_dst_txt = f.read().rstrip() + with open(item[1], "r", encoding="utf-8") as f: + src_txt = f.read().rstrip() + assert os.path.islink(link_keys_path) + assert link_keys_dst_txt == src_txt + # Ensure that cell output text matches expectations. + assert tb.cell_output_text(29) == config_str + assert "True" in tb.cell_output_text(31) + assert "False" in tb.cell_output_text(35) + assert tb.cell_output_text(37) == tb.cell_output_text(33) + assert tb.cell_output_text(39) == config_keys_str + assert "True" in tb.cell_output_text(41) + + +def test_makedirs(): + # Get the config files as text and dictionaries. + with open("fixtures/fs/dir-config.yaml", "r", encoding="utf-8") as f: + config_str = f.read().rstrip() + config_dict = yaml.safe_load(config_str) + with open("fixtures/fs/dir-keys-config.yaml", "r", encoding="utf-8") as f: + config_keys_str = f.read().rstrip() + config_keys_dict = yaml.safe_load(config_keys_str) + with testbook("fs.ipynb", execute=True) as tb: + # Each value in each config should have been created as one or more subdirectories. + for subdir in config_dict["makedirs"]: + assert os.path.exists("tmp/dir-target/" + subdir) + assert os.path.exists("tmp/makedirs-target/" + subdir) + for subdir in config_keys_dict["path"]["to"]["dirs"]["makedirs"]: + assert os.path.exists("tmp/dir-keys-target/" + subdir) + # Ensure that cell output text matches expectations. + assert tb.cell_output_text(53) == config_str + assert "True" in tb.cell_output_text(55) + assert tb.cell_output_text(59) == config_keys_str + assert "True" in tb.cell_output_text(61) diff --git a/notebooks/tests/test_rocoto.py b/notebooks/tests/test_rocoto.py new file mode 100644 index 000000000..389b8851d --- /dev/null +++ b/notebooks/tests/test_rocoto.py @@ -0,0 +1,90 @@ +from testbook import testbook + + +def test_building_simple_workflow(): + with open("fixtures/rocoto/simple-workflow.yaml", "r", encoding="utf-8") as f: + simple_yaml = f.read().rstrip() + with open("fixtures/rocoto/err-workflow.yaml", "r", encoding="utf-8") as f: + err_yaml = f.read().rstrip() + with testbook("rocoto.ipynb", execute=True) as tb: + with open("tmp/simple-workflow.xml", "r", encoding="utf-8") as f: + simple_xml = f.read().rstrip() + assert tb.cell_output_text(5) == simple_yaml + valid_out = ( + "INFO 0 UW schema-validation errors found", + "INFO 0 Rocoto XML validation errors found", + "True", + ) + assert all(x in tb.cell_output_text(7) for x in valid_out) + assert tb.cell_output_text(9) == simple_xml + assert tb.cell_output_text(11) == err_yaml + err_out = ( + "ERROR 3 UW schema-validation errors found", + "ERROR Error at workflow -> attrs:", + "ERROR 'realtime' is a required property", + "ERROR Error at workflow -> tasks -> task_greet:", + "ERROR 'command' is a required property", + "ERROR Error at workflow:", + "ERROR 'log' is a required property", + "YAML validation errors", + ) + assert all(x in tb.cell_output_text(13) for x in err_out) + + +def test_building_workflows(): + with open("fixtures/rocoto/ent-workflow.yaml", "r", encoding="utf-8") as f: + ent_yaml = f.read().rstrip() + with open("fixtures/rocoto/ent-cs-workflow.yaml", "r", encoding="utf-8") as f: + ent_cs_yaml = f.read().rstrip() + with open("fixtures/rocoto/tasks-workflow.yaml", "r", encoding="utf-8") as f: + tasks_yaml = f.read().rstrip() + with open("fixtures/rocoto/tasks-deps-workflow.yaml", "r", encoding="utf-8") as f: + tasks_deps_yaml = f.read().rstrip() + with open("fixtures/rocoto/meta-workflow.yaml", "r", encoding="utf-8") as f: + meta_yaml = f.read().rstrip() + with open("fixtures/rocoto/meta-nested-workflow.yaml", "r", encoding="utf-8") as f: + meta_nested_yaml = f.read().rstrip() + with testbook("rocoto.ipynb", execute=True) as tb: + with open("tmp/ent-cs-workflow.xml", "r", encoding="utf-8") as f: + ent_cs_xml = f.read().rstrip() + with open("tmp/tasks-deps-workflow.xml", "r", encoding="utf-8") as f: + tasks_deps_xml = f.read().rstrip() + with open("tmp/meta-workflow.xml", "r", encoding="utf-8") as f: + meta_xml = f.read().rstrip() + assert tb.cell_output_text(15) == ent_yaml + assert tb.cell_output_text(17) == ent_cs_yaml + valid_out = ( + "INFO 0 UW schema-validation errors found", + "INFO 0 Rocoto XML validation errors found", + "True", + ) + assert all(x in tb.cell_output_text(19) for x in valid_out) + assert tb.cell_output_text(21) == ent_cs_xml + assert tb.cell_output_text(23) == tasks_yaml + assert tb.cell_output_text(25) == tasks_deps_yaml + assert all(x in tb.cell_output_text(27) for x in valid_out) + assert tb.cell_output_text(29) == tasks_deps_xml + assert tb.cell_output_text(31) == meta_yaml + assert all(x in tb.cell_output_text(33) for x in valid_out) + assert tb.cell_output_text(35) == meta_xml + assert tb.cell_output_text(37) == meta_nested_yaml + + +def test_validate(): + with open("fixtures/rocoto/simple-workflow.xml", "r", encoding="utf-8") as f: + simple_xml = f.read().rstrip() + with open("fixtures/rocoto/err-workflow.xml", "r", encoding="utf-8") as f: + err_xml = f.read().rstrip() + with testbook("rocoto.ipynb", execute=True) as tb: + assert tb.cell_output_text(41) == simple_xml + valid_out = ("INFO 0 Rocoto XML validation errors found", "True") + assert all(x in tb.cell_output_text(43) for x in valid_out) + assert tb.cell_output_text(45) == err_xml + err_out = ( + "ERROR 4 Rocoto XML validation errors found", + "Element workflow failed to validate attributes", + "Expecting an element cycledef, got nothing", + "Invalid sequence in interleave", + "Element workflow failed to validate content", + ) + assert all(x in tb.cell_output_text(47) for x in err_out) diff --git a/notebooks/tests/test_template.py b/notebooks/tests/test_template.py new file mode 100644 index 000000000..d0395573f --- /dev/null +++ b/notebooks/tests/test_template.py @@ -0,0 +1,75 @@ +import os + +import yaml +from testbook import testbook + + +def test_render(): + + template = "fixtures/template/render-template.yaml" + values = "fixtures/template/render-values.yaml" + rendered_template1 = "fixtures/template/render-complete-1.yaml" + rendered_template2 = "fixtures/template/render-complete-2.yaml" + + for file in rendered_template1, rendered_template2: + if os.path.exists(file): + os.remove(file) + + with open(template, "r", encoding="utf-8") as f: + template_str = f.read().rstrip() + with open(values, "r", encoding="utf-8") as f: + values_str = f.read().rstrip() + + with testbook("template.ipynb", execute=True) as tb: + + with open(rendered_template1, "r", encoding="utf-8") as f: + rend_temp_str1 = f.read().rstrip() + temp_yaml1 = yaml.safe_load(rend_temp_str1) + assert temp_yaml1["user"] == {"name": "John Doe", "favorite_food": "burritos"} + + with open(rendered_template2, "r", encoding="utf-8") as f: + rend_temp_str2 = f.read().rstrip() + temp_yaml2 = yaml.safe_load(rend_temp_str2) + assert temp_yaml2["user"] == {"name": "Jane Doe", "favorite_food": "tamales"} + + assert tb.cell_output_text(5) == template_str + assert ( + "INFO first" in tb.cell_output_text(7) + and "INFO food" in tb.cell_output_text(7) + and "INFO last" in tb.cell_output_text(7) + ) + assert tb.cell_output_text(9) == values_str + assert tb.cell_output_text(11) == rend_temp_str1 + assert tb.cell_output_text(13) == rend_temp_str2 + assert rend_temp_str1 in tb.cell_output_text(15) and rend_temp_str2 in tb.cell_output_text( + 15 + ) + + +@testbook("template.ipynb", execute=True) +def test_render_to_str(tb): + + rend_temp_str = "user:\n name: John Doe\n favorite_food: burritos" + assert tb.ref("result") == rend_temp_str + assert tb.cell_output_text(19) == rend_temp_str + + +def test_translate(): + + atparse_template = "fixtures/template/translate-template.yaml" + translated_template = "fixtures/template/translate-complete.yaml" + + if os.path.exists(translated_template): + os.remove(translated_template) + + with open(atparse_template, "r", encoding="utf-8") as f: + atparse_str = f.read().rstrip() + + with testbook("template.ipynb", execute=True) as tb: + + with open(translated_template, "r", encoding="utf-8") as f: + translated_str = f.read().rstrip() + + assert tb.cell_output_text(23) == atparse_str + assert tb.cell_output_text(25) == "True" + assert tb.cell_output_text(27) == translated_str