Skip to content

Commit

Permalink
Merge branch 'develop'
Browse files Browse the repository at this point in the history
  • Loading branch information
b8raoult committed Nov 1, 2024
2 parents e3f97fc + 162d4da commit 95a0228
Show file tree
Hide file tree
Showing 11 changed files with 228 additions and 19 deletions.
2 changes: 1 addition & 1 deletion .github/ci-hpc-config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,4 @@ build:
parallel: 64

pytest_cmd: |
python -m pytest -vv -m 'not notebook and not no_cache_init' --cov=. --cov-report=xml
python -m pytest -vv -m 'not notebook and not no_cache_init and not skip_on_hpc' --cov=. --cov-report=xml
8 changes: 8 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,13 @@ Keep it human-readable, your future self will thank you!

## [0.4.2](https://github.com/ecmwf/anemoi-utils/compare/0.4.1...0.4.2) - 2024-10-25

### Added
- Add supporting_arrays to checkpoints
- Add factories registry
- Optional renaming of subcommands via `command` attribute [#34](https://github.com/ecmwf/anemoi-utils/pull/34)
- `skip_on_hpc` pytest marker for tests that should not be run on HPC [36](https://github.com/ecmwf/anemoi-utils/pull/36)


## [0.4.1](https://github.com/ecmwf/anemoi-utils/compare/0.4.0...0.4.1) - 2024-10-23

## Fixed
Expand All @@ -32,6 +39,7 @@ Keep it human-readable, your future self will thank you!
### Added

- Add anemoi-transform link to documentation
- Add CONTRIBUTORS.md (#33)

## [0.3.17](https://github.com/ecmwf/anemoi-utils/compare/0.3.13...0.3.17) - 2024-10-01

Expand Down
13 changes: 13 additions & 0 deletions CONTRIBUTORS.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
## How to Contribute

Please see the [read the docs](https://anemoi-training.readthedocs.io/en/latest/dev/contributing.html).


## Contributors

Thank you to all the wonderful people who have contributed to Anemoi. Contributions can come in many forms, including code, documentation, bug reports, feature suggestions, design, and more. A list of code-based contributors can be found [here](https://github.com/ecmwf/anemoi-utils/graphs/contributors).


## Contributing Organisations

Significant contributions have been made by the following organisations: [DWD](https://www.dwd.de/), [MET Norway](https://www.met.no/), [MeteoSwiss](https://www.meteoswiss.admin.ch/), [RMI](https://www.meteo.be/) & [ECMWF](https://www.ecmwf.int/)
4 changes: 2 additions & 2 deletions docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,15 +29,15 @@

project = "Anemoi Utils"

author = "ECMWF"
author = "Anemoi contributors"

year = datetime.datetime.now().year
if year == 2024:
years = "2024"
else:
years = "2024-%s" % (year,)

copyright = "%s, ECMWF" % (years,)
copyright = "%s, Anemoi contributors" % (years,)

try:
from anemoi.utils._version import __version__
Expand Down
5 changes: 5 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -82,3 +82,8 @@ scripts.anemoi-utils = "anemoi.utils.__main__:main"

[tool.setuptools_scm]
version_file = "src/anemoi/utils/_version.py"

[tool.pytest.ini_options]
markers = [
"skip_on_hpc: mark a test that should not be run on HPC",
]
4 changes: 3 additions & 1 deletion src/anemoi/utils/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
# (C) Copyright 2024 European Centre for Medium-Range Weather Forecasts.
# (C) Copyright 2024 Anemoi contributors.
#
# This software is licensed under the terms of the Apache Licence Version 2.0
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
#
# In applying this licence, ECMWF does not waive the privileges and immunities
# granted to it by virtue of its status as an intergovernmental organisation
# nor does it submit to any jurisdiction.
Expand Down
83 changes: 74 additions & 9 deletions src/anemoi/utils/checkpoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
DEFAULT_FOLDER = "anemoi-metadata"


def has_metadata(path: str, name: str = DEFAULT_NAME) -> bool:
def has_metadata(path: str, *, name: str = DEFAULT_NAME) -> bool:
"""Check if a checkpoint file has a metadata file
Parameters
Expand All @@ -49,13 +49,26 @@ def has_metadata(path: str, name: str = DEFAULT_NAME) -> bool:
return False


def load_metadata(path: str, name: str = DEFAULT_NAME) -> dict:
def metadata_root(path: str, *, name: str = DEFAULT_NAME) -> bool:

with zipfile.ZipFile(path, "r") as f:
for b in f.namelist():
if os.path.basename(b) == name:
return os.path.dirname(b)
raise ValueError(f"Could not find '{name}' in {path}.")


def load_metadata(path: str, *, supporting_arrays=False, name: str = DEFAULT_NAME) -> dict:
"""Load metadata from a checkpoint file
Parameters
----------
path : str
The path to the checkpoint file
supporting_arrays: bool, optional
If True, the function will return a dictionary with the supporting arrays
name : str, optional
The name of the metadata file in the zip archive
Expand All @@ -79,12 +92,29 @@ def load_metadata(path: str, name: str = DEFAULT_NAME) -> dict:

if metadata is not None:
with zipfile.ZipFile(path, "r") as f:
return json.load(f.open(metadata, "r"))
metadata = json.load(f.open(metadata, "r"))
if supporting_arrays:
metadata["supporting_arrays"] = load_supporting_arrays(f, metadata.get("supporting_arrays", {}))
return metadata, supporting_arrays

return metadata
else:
raise ValueError(f"Could not find '{name}' in {path}.")


def save_metadata(path, metadata, name=DEFAULT_NAME, folder=DEFAULT_FOLDER) -> None:
def load_supporting_arrays(zipf, entries) -> dict:
import numpy as np

supporting_arrays = {}
for key, entry in entries.items():
supporting_arrays[key] = np.frombuffer(
zipf.read(entry["path"]),
dtype=entry["dtype"],
).reshape(entry["shape"])
return supporting_arrays


def save_metadata(path, metadata, *, supporting_arrays=None, name=DEFAULT_NAME, folder=DEFAULT_FOLDER) -> None:
"""Save metadata to a checkpoint file
Parameters
Expand All @@ -93,6 +123,8 @@ def save_metadata(path, metadata, name=DEFAULT_NAME, folder=DEFAULT_FOLDER) -> N
The path to the checkpoint file
metadata : JSON
A JSON serializable object
supporting_arrays: dict, optional
A dictionary of supporting NumPy arrays
name : str, optional
The name of the metadata file in the zip archive
folder : str, optional
Expand All @@ -118,19 +150,41 @@ def save_metadata(path, metadata, name=DEFAULT_NAME, folder=DEFAULT_FOLDER) -> N

directory = list(directories)[0]

LOG.info("Adding extra information to checkpoint %s", path)
LOG.info("Saving metadata to %s/%s/%s", directory, folder, name)

metadata = metadata.copy()
if supporting_arrays is not None:
metadata["supporting_arrays_paths"] = {
key: dict(path=f"{directory}/{folder}/{key}.numpy", shape=value.shape, dtype=str(value.dtype))
for key, value in supporting_arrays.items()
}
else:
metadata["supporting_arrays_paths"] = {}

zipf.writestr(
f"{directory}/{folder}/{name}",
json.dumps(metadata),
)

for name, entry in metadata["supporting_arrays_paths"].items():
value = supporting_arrays[name]
LOG.info(
"Saving supporting array `%s` to %s (shape=%s, dtype=%s)",
name,
entry["path"],
entry["shape"],
entry["dtype"],
)
zipf.writestr(entry["path"], value.tobytes())


def _edit_metadata(path, name, callback):
def _edit_metadata(path, name, callback, supporting_arrays=None):
new_path = f"{path}.anemoi-edit-{time.time()}-{os.getpid()}.tmp"

found = False

directory = None
with TemporaryDirectory() as temp_dir:
zipfile.ZipFile(path, "r").extractall(temp_dir)
total = 0
Expand All @@ -141,10 +195,21 @@ def _edit_metadata(path, name, callback):
if f == name:
found = True
callback(full)
directory = os.path.dirname(full)

if not found:
raise ValueError(f"Could not find '{name}' in {path}")

if supporting_arrays is not None:

for key, entry in supporting_arrays.items():
value = entry.tobytes()
fname = os.path.join(directory, f"{key}.numpy")
os.makedirs(os.path.dirname(fname), exist_ok=True)
with open(fname, "wb") as f:
f.write(value)
total += 1

with zipfile.ZipFile(new_path, "w", zipfile.ZIP_DEFLATED) as zipf:
with tqdm.tqdm(total=total, desc="Rebuilding checkpoint") as pbar:
for root, dirs, files in os.walk(temp_dir):
Expand All @@ -158,7 +223,7 @@ def _edit_metadata(path, name, callback):
LOG.info("Updated metadata in %s", path)


def replace_metadata(path, metadata, name=DEFAULT_NAME):
def replace_metadata(path, metadata, supporting_arrays=None, *, name=DEFAULT_NAME):

if not isinstance(metadata, dict):
raise ValueError(f"metadata must be a dict, got {type(metadata)}")
Expand All @@ -170,14 +235,14 @@ def callback(full):
with open(full, "w") as f:
json.dump(metadata, f)

_edit_metadata(path, name, callback)
return _edit_metadata(path, name, callback, supporting_arrays)


def remove_metadata(path, name=DEFAULT_NAME):
def remove_metadata(path, *, name=DEFAULT_NAME):

LOG.info("Removing metadata '%s' from %s", name, path)

def callback(full):
os.remove(full)

_edit_metadata(path, name, callback)
return _edit_metadata(path, name, callback)
16 changes: 14 additions & 2 deletions src/anemoi/utils/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,8 +96,20 @@ def register_commands(here, package, select, fail=None):
continue

obj = select(imported)
if obj is not None:
result[name] = obj
if obj is None:
continue

if hasattr(obj, "command"):
name = obj.command

if name in result:
msg = f"Duplicate command '{name}', please choose a different command name for {type(obj)}"
raise ValueError(msg)
if " " in name:
msg = f"Commands cannot contain spaces: '{name}' in {type(obj)}"
raise ValueError(msg)

result[name] = obj

for name, e in not_available.items():
if fail is None:
Expand Down
5 changes: 3 additions & 2 deletions src/anemoi/utils/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -358,7 +358,7 @@ def check_config_mode(name="settings.toml", secrets_name=None, secrets=None) ->
CHECKED[name] = True


def find(metadata, what, result=None):
def find(metadata, what, result=None, *, select: callable = None):
if result is None:
result = []

Expand All @@ -369,7 +369,8 @@ def find(metadata, what, result=None):

if isinstance(metadata, dict):
if what in metadata:
result.append(metadata[what])
if select is None or select(metadata[what]):
result.append(metadata[what])

for k, v in metadata.items():
find(v, what, result)
Expand Down
9 changes: 7 additions & 2 deletions src/anemoi/utils/dates.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,8 +107,8 @@ def as_datetime_list(date, default_increment=1):
return list(_as_datetime_list(date, default_increment))


def frequency_to_timedelta(frequency) -> datetime.timedelta:
"""Convert a frequency to a timedelta object.
def as_timedelta(frequency) -> datetime.timedelta:
"""Convert anything to a timedelta object.
Parameters
----------
Expand Down Expand Up @@ -171,6 +171,11 @@ def frequency_to_timedelta(frequency) -> datetime.timedelta:
raise ValueError(f"Cannot convert frequency {frequency} to timedelta")


def frequency_to_timedelta(frequency) -> datetime.timedelta:
"""Convert a frequency to a timedelta object."""
return as_timedelta(frequency)


def frequency_to_string(frequency) -> str:
"""Convert a frequency (i.e. a datetime.timedelta) to a string.
Expand Down
Loading

0 comments on commit 95a0228

Please sign in to comment.