diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 6aa6be5..e3278fd 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -16,5 +16,7 @@ jobs: cache: true # auth-host: prefix.dev # auth-token: ${{ secrets.PREFIX_DEV_TOKEN }} + - run: pixi run --environment ${{ matrix.environment }} lint - run: pixi run --environment ${{ matrix.environment }} test + diff --git a/.gitignore b/.gitignore index 7e1f777..9ecf252 100644 --- a/.gitignore +++ b/.gitignore @@ -164,3 +164,4 @@ xarray_subset_grid/_version.py .pixi *.egg-info +.ruff_cache/ \ No newline at end of file diff --git a/examples/subset_file.py b/examples/subset_file.py index 4be259a..6afd451 100644 --- a/examples/subset_file.py +++ b/examples/subset_file.py @@ -2,7 +2,6 @@ import numpy as np import xarray as xr -import xarray_subset_grid.accessor from xarray_subset_grid.grids import ugrid from xarray_subset_grid.utils import format_bytes from xarray_subset_grid.visualization.mpl_plotting import plot_ugrid diff --git a/pixi.lock b/pixi.lock index 34fef5c..e7408b9 100644 --- a/pixi.lock +++ b/pixi.lock @@ -27236,9 +27236,9 @@ packages: timestamp: 1715608167507 - kind: pypi name: xarray-subset-grid - version: 0.1.dev41+g75a92e8 + version: 0.1.dev42+g4e6b150 path: . - sha256: aebb8a03415496daf25d8c2e6b9c7edf1a49f745ab8583af316ecd8ee95e6ab2 + sha256: 4110a4f5cdfaf101cf97a447e0cc8909d0a023d3b111b0946ba1681d0b1051d5 requires_dist: - numpy - xarray>=2023.10.0 diff --git a/profiling/STOFS_benchmarks.py b/profiling/STOFS_benchmarks.py index d48226d..c41fd16 100644 --- a/profiling/STOFS_benchmarks.py +++ b/profiling/STOFS_benchmarks.py @@ -1,15 +1,11 @@ import time -import cf_xarray import fsspec -import s3fs import shapely import thalassa import xarray as xr import xugrid as xu -import xarray_subset_grid as xrsg - bucket_name = 'noaa-gestofs-pds' key = '_para2/stofs_2d_glo.20230819/stofs_2d_glo.t00z.fields.cwl.nc' url = f"s3://{bucket_name}/{key}" diff --git a/profiling/p_in_p_perf.py b/profiling/p_in_p_perf.py index 3dd02bb..5bb7aa4 100644 --- a/profiling/p_in_p_perf.py +++ b/profiling/p_in_p_perf.py @@ -44,30 +44,32 @@ def ray_tracing_numpy(x, y, poly): import numpy as np -# Using a 20 vertex polygon -- arbitrary decision as common max for +# Using a 20 vertex polygon -- arbitrary decision as common max for # bounding poly used for subsetting. -test_poly = np.array([ - [-69.0842494, 41.8576263], - [-69.3834133, 41.6994390], - [-69.4844079, 41.5818408], - [-69.7009389, 41.5498641], - [-70.0628678, 41.5884718], - [-70.3054548, 41.6810850], - [-70.6109682, 41.7607248], - [-70.8657576, 41.9553727], - [-71.1089099, 42.1369069], - [-71.1294295, 42.4274792], - [-70.8877302, 42.6500898], - [-70.7118900, 42.7635708], - [-70.4645152, 42.8363260], - [-70.1066827, 42.8113145], - [-69.9021696, 42.7796958], - [-69.7686684, 42.7210923], - [-69.4055325, 42.5535379], - [-69.1527168, 42.3072355], - [-68.9597074, 42.0243090], - [-68.9939291, 41.9264228], -]) +test_poly = np.array( + [ + [-69.0842494, 41.8576263], + [-69.3834133, 41.6994390], + [-69.4844079, 41.5818408], + [-69.7009389, 41.5498641], + [-70.0628678, 41.5884718], + [-70.3054548, 41.6810850], + [-70.6109682, 41.7607248], + [-70.8657576, 41.9553727], + [-71.1089099, 42.1369069], + [-71.1294295, 42.4274792], + [-70.8877302, 42.6500898], + [-70.7118900, 42.7635708], + [-70.4645152, 42.8363260], + [-70.1066827, 42.8113145], + [-69.9021696, 42.7796958], + [-69.7686684, 42.7210923], + [-69.4055325, 42.5535379], + [-69.1527168, 42.3072355], + [-68.9597074, 42.0243090], + [-68.9939291, 41.9264228], + ] +) min_lon, min_lat = test_poly.min(axis=0) max_lon, max_lat = test_poly.max(axis=0) @@ -88,8 +90,8 @@ def ray_tracing_numpy(x, y, poly): # 9.03 ms ± 303 µs per loop (mean ± std. dev. of 7 runs, 100 loops each) - from geometry_utils import polygon_inside + geom = polygon_inside(test_poly, test_points) # 1000 pts # In [11]: %timeit polygon_inside(test_poly, test_points) @@ -101,7 +103,7 @@ def ray_tracing_numpy(x, y, poly): # Do they give the same answer? assert np.array_equal(rtn_inside, geom) -from shapely import contains_xy, Polygon +from shapely import Polygon, contains_xy sh_poly = Polygon(test_poly) @@ -114,7 +116,4 @@ def ray_tracing_numpy(x, y, poly): # 54.7 ms ± 1.4 ms per loop (mean ± std. dev. of 7 runs, 10 loops each) - assert np.array_equal(rtn_inside, shapely) - - diff --git a/pyproject.toml b/pyproject.toml index f5094ab..d485d32 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,8 +4,10 @@ build-backend = "setuptools.build_meta" [project] name = "xarray_subset_grid" -authors = [{ name = "Matthew Iannucci", email = "matt.iannucci@tetratech.com" }, - { name = "Christopher H. Barker", email = "chris.barker@noaa.gov" }] +authors = [ + { name = "Matthew Iannucci", email = "matt.iannucci@tetratech.com" }, + { name = "Christopher H. Barker", email = "chris.barker@noaa.gov" }, +] description = "Subset Xarray datasets in space" readme = "README.md" requires-python = ">=3.10" @@ -13,27 +15,27 @@ keywords = ["xarray"] license = { file = "LICENSE" } classifiers = [ - "Development Status :: 5 - Production/Stable", - "Intended Audience :: Science/Research", - "Operating System :: OS Independent", - "License :: OSI Approved :: BSD License", - "Programming Language :: Python", - "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.10", - "Programming Language :: Python :: 3.11", - "Programming Language :: Python :: 3.12", - "Topic :: Scientific/Engineering", + "Development Status :: 5 - Production/Stable", + "Intended Audience :: Science/Research", + "Operating System :: OS Independent", + "License :: OSI Approved :: BSD License", + "Programming Language :: Python", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Topic :: Scientific/Engineering", ] dynamic = ["version"] dependencies = [ - "numpy", - "xarray>=2023.10.0", - "cf_xarray", + "numpy", + "xarray>=2023.10.0", + "cf_xarray", "dask[complete]", "netcdf4", - ] +] [project.optional-dependencies] dev = [ @@ -45,13 +47,7 @@ dev = [ "sphinx", "sphinx-rtd-theme", ] -examples = [ - "fsspec", - "s3fs", - "zarr", - "matplotlib", - "h5netcdf" -] +examples = ["fsspec", "s3fs", "zarr", "matplotlib", "h5netcdf"] [project.urls] "Homepage" = "https://github.com/asascience-open/xarray-subset-grid" @@ -64,12 +60,10 @@ write_to = "xarray_subset_grid/_version.py" [tool.ruff] builtins = ["ellipsis"] -extend-exclude = [ - "xarray_subset_grid/_version.py" -] +extend-exclude = ["xarray_subset_grid/_version.py"] target-version = "py310" # Use a longer line length. -line-length = 95 +line-length = 100 [tool.ruff.lint] ignore = [ @@ -77,12 +71,12 @@ ignore = [ "E731", # do not assign a lambda expression, use a def ] select = [ - "F", # Pyflakes - "E", # Pycodestyle + "F", # Pyflakes + "E", # Pycodestyle "W", "TID", # flake8-tidy-imports (absolute imports) - "I", # isort - "UP", # Pyupgrade + "I", # isort + "UP", # Pyupgrade ] extend-safe-fixes = [ "TID252", # absolute imports @@ -104,7 +98,7 @@ default = { solve-group = "default" } dev = { features = ["dev"], solve-group = "default" } examples = { features = ["examples"], solve-group = "default" } all = { features = ["dev", "examples"], solve-group = "default" } -test310 = ["dev", "py310"] # implicit: test310 = ["dev", "py310", "default"] +test310 = ["dev", "py310"] # implicit: test310 = ["dev", "py310", "default"] test311 = ["dev", "py311"] test312 = ["dev", "py312"] @@ -134,7 +128,9 @@ pytest-cov = "*" sphinx = "*" sphinx-rtd-theme = "*" + [tool.pixi.feature.dev.tasks] +lint = "ruff check" test = "pytest tests/" [tool.pixi.feature.examples.dependencies] diff --git a/tests/test_grids/test_ugrid.py b/tests/test_grids/test_ugrid.py index 818d1ec..1ffd24d 100644 --- a/tests/test_grids/test_ugrid.py +++ b/tests/test_grids/test_ugrid.py @@ -2,10 +2,11 @@ tests for ugrid code """ from pathlib import Path -import xarray as xr -from xarray_subset_grid.grids import ugrid import pytest +import xarray as xr + +from xarray_subset_grid.grids import ugrid EXAMPLE_DATA = Path(__file__).parent.parent.parent / "examples" / "example_data" @@ -220,7 +221,6 @@ # topology for TEST_FILE1 grid_topology = {'node_coordinates': ('lon', 'lat'), 'face_node_connectivity': 'nv', - 'node_coordinates': ('lon', 'lat'), 'face_coordinates': ('lonc', 'latc'), 'face_face_connectivity': 'nbe' } diff --git a/xarray_subset_grid/accessor.py b/xarray_subset_grid/accessor.py index ef6f87e..4f21ba6 100644 --- a/xarray_subset_grid/accessor.py +++ b/xarray_subset_grid/accessor.py @@ -1,7 +1,6 @@ -from typing import Optional import warnings -#from typing import Optional, Union +#from typing import Optional, Union import numpy as np import xarray as xr @@ -20,7 +19,7 @@ def register_grid_impl(grid_impl: Grid, priority: int = 0): _grid_impls.insert(priority, grid_impl) -def grid_factory(ds: xr.Dataset) -> Optional[Grid]: +def grid_factory(ds: xr.Dataset) -> Grid | None: """ Get the grid implementation for the given dataset. :param ds: The dataset to get the grid implementation for @@ -37,7 +36,7 @@ class GridDatasetAccessor: """Accessor for grid operations on datasets""" _ds: xr.Dataset - _grid: Optional[Grid] + _grid: Grid | None def __init__(self, ds: xr.Dataset): """ @@ -48,7 +47,7 @@ def __init__(self, ds: xr.Dataset): self._grid = grid_factory(ds) @property - def grid(self) -> Optional[Grid]: + def grid(self) -> Grid | None: """The recognized grid implementation for the given dataset :return: The grid implementation or None if no implementation is found """ @@ -100,7 +99,7 @@ def subset_vars(self, vars: list[str]) -> xr.Dataset: return self._ds def subset_polygon(self, polygon: list[tuple[float, float]] | np.ndarray - ) -> Optional[xr.Dataset]: + ) -> xr.Dataset | None: """ Subset the dataset to the grid. @@ -116,7 +115,7 @@ def subset_polygon(self, polygon: list[tuple[float, float]] | np.ndarray def subset_bbox( self, bbox: tuple[float, float, float, float] - ) -> Optional[xr.Dataset]: + ) -> xr.Dataset | None: """Subset the dataset to the bounding box This call is forwarded to the grid implementation with the loaded dataset. diff --git a/xarray_subset_grid/grid.py b/xarray_subset_grid/grid.py index e05987f..0f66b2d 100644 --- a/xarray_subset_grid/grid.py +++ b/xarray_subset_grid/grid.py @@ -1,6 +1,5 @@ from abc import ABC, abstractmethod from collections.abc import Iterable -from typing import Union import numpy as np import xarray as xr @@ -55,7 +54,7 @@ def subset_vars(self, ds: xr.Dataset, vars: Iterable[str]) -> xr.Dataset: @abstractmethod def subset_polygon( - self, ds: xr.Dataset, polygon: Union[list[tuple[float, float]], np.ndarray] + self, ds: xr.Dataset, polygon: list[tuple[float, float]] | np.ndarray ) -> xr.Dataset: """Subset the dataset to the grid :param ds: The dataset to subset diff --git a/xarray_subset_grid/grids/sgrid.py b/xarray_subset_grid/grids/sgrid.py index 0b73c1d..daf9037 100644 --- a/xarray_subset_grid/grids/sgrid.py +++ b/xarray_subset_grid/grids/sgrid.py @@ -1,5 +1,3 @@ -from typing import Union - import numpy as np import xarray as xr @@ -18,7 +16,8 @@ def recognize(ds: xr.Dataset) -> bool: except KeyError: return False - # For now, if the dataset has a grid topology and not a mesh topology, we assume it's a SGRID + # For now, if the dataset has a grid topology and not a mesh topology, + # we assume it's a SGRID return len(_grid_topology_keys) > 0 and _grid_topology_keys[0] in ds @property @@ -56,7 +55,7 @@ def data_vars(self, ds: xr.Dataset) -> set[str]: return {var for var in ds.data_vars if not set(ds[var].dims).isdisjoint(dims)} def subset_polygon( - self, ds: xr.Dataset, polygon: Union[list[tuple[float, float]], np.ndarray] + self, ds: xr.Dataset, polygon: list[tuple[float, float]] | np.ndarray ) -> xr.Dataset: """Subset the dataset to the grid :param ds: The dataset to subset @@ -111,14 +110,10 @@ def subset_polygon( # so that we can use it to mask and drop via xr.where, which requires that # the mask and data have the same shape and both are DataArrays with matching # dimensions - ds_subset = ds.assign( - subset_mask=xr.DataArray(polygon_mask, dims=mask_dims) - ) + ds_subset = ds.assign(subset_mask=xr.DataArray(polygon_mask, dims=mask_dims)) # Now we can use the mask to subset the data - ds_subset = ( - ds_subset[vars].where(ds_subset.subset_mask, drop=True).drop_encoding() - ) + ds_subset = ds_subset[vars].where(ds_subset.subset_mask, drop=True).drop_encoding() # Add the subsetted dataset to the list for merging ds_out.append(ds_subset) diff --git a/xarray_subset_grid/utils.py b/xarray_subset_grid/utils.py index 73d545d..4cfbd6b 100644 --- a/xarray_subset_grid/utils.py +++ b/xarray_subset_grid/utils.py @@ -1,6 +1,8 @@ +import warnings + import cf_xarray # noqa import numpy as np -import xarray as xr + def normalize_polygon_x_coords(x, poly): """ @@ -23,7 +25,7 @@ def normalize_polygon_x_coords(x, poly): x_min, x_max = x.min(), x.max() poly_x = poly[:, 0] - poly_x_min, poly_x_max = poly_x.min(), poly_x.max() + _poly_x_min, poly_x_max = poly_x.min(), poly_x.max() if x_max > 180 and poly_x_max < 0: poly_x[poly_x < 0] += 360 @@ -33,6 +35,7 @@ def normalize_polygon_x_coords(x, poly): poly[:, 0] = poly_x return poly + def ray_tracing_numpy(x, y, poly): """Find vertices inside of the given polygon @@ -51,9 +54,7 @@ def ray_tracing_numpy(x, y, poly): p1x, p1y = poly[0] for i in range(n + 1): p2x, p2y = poly[i % n] - idx = np.nonzero( - (y > min(p1y, p2y)) & (y <= max(p1y, p2y)) & (x <= max(p1x, p2x)) - )[0] + idx = np.nonzero((y > min(p1y, p2y)) & (y <= max(p1y, p2y)) & (x <= max(p1x, p2x)))[0] if len(idx): if p1y != p2y: xints = (y[idx] - p1y) * (p2x - p1x) / (p2y - p1y) + p1x @@ -70,12 +71,17 @@ def ray_tracing_numpy(x, y, poly): # This is defined in ugrid.py # this placeholder for backwards compatibility for a brief period def assign_ugrid_topology(*args, **kwargs): - warnings.warn(DeprecationWarning, "The function `assign_grid_topology` has been moved to the " - "`grids.ugrid` module. It will not be able to be called from " - "the utils `module` in the future.") + warnings.warn( + DeprecationWarning, + "The function `assign_grid_topology` has been moved to the " + "`grids.ugrid` module. It will not be able to be called from " + "the utils `module` in the future.", + ) from .grids.ugrid import assign_ugrid_topology + return assign_ugrid_topology(*args, **kwargs) + def format_bytes(num): """ This function will convert bytes to MB.... GB... etc @@ -84,10 +90,9 @@ def format_bytes(num): """ # not much to it, but handy for demos, etc. - step_unit = 1024 #1024 bad the size + step_unit = 1024 # 1024 bad the size - for x in ['bytes', 'KB', 'MB', 'GB', 'TB', 'PB']: + for x in ["bytes", "KB", "MB", "GB", "TB", "PB"]: if num < step_unit: - return "%3.1f %s" % (num, x) + return f"{num:3.1f} {x}" num /= step_unit - diff --git a/xarray_subset_grid/visualization/mpl_plotting.py b/xarray_subset_grid/visualization/mpl_plotting.py index c277020..6afd9aa 100644 --- a/xarray_subset_grid/visualization/mpl_plotting.py +++ b/xarray_subset_grid/visualization/mpl_plotting.py @@ -4,14 +4,11 @@ NOTE: this could probably be built on existing packages -- worth a look. """ +from matplotlib.collections import LineCollection from matplotlib.tri import Triangulation -from matplotlib.collections import LineCollection -def plot_ugrid(axes, - ds, - nodes=False, - node_numbers=False, - face_numbers=False): + +def plot_ugrid(axes, ds, nodes=False, node_numbers=False, face_numbers=False): """ Plot a UGRID in the provided MPL axes @@ -30,14 +27,14 @@ def plot_ugrid(axes, """ mesh_defs = ds[ds.cf.cf_roles["mesh_topology"][0]].attrs - lon_var, lat_var = mesh_defs['node_coordinates'].split() - nodes_lon, nodes_lat = (ds[n] for n in mesh_defs['node_coordinates'].split()) - faces = ds[mesh_defs['face_node_connectivity']] + lon_var, lat_var = mesh_defs["node_coordinates"].split() + nodes_lon, nodes_lat = (ds[n] for n in mesh_defs["node_coordinates"].split()) + faces = ds[mesh_defs["face_node_connectivity"]] if faces.shape[0] == 3: # swap order for mpl triangulation faces = faces.T - start_index = faces.attrs.get('start_index') + start_index = faces.attrs.get("start_index") start_index = 0 if start_index is None else start_index faces = faces - start_index @@ -46,38 +43,54 @@ def plot_ugrid(axes, axes.triplot(mpl_tri) if face_numbers: try: - face_lon, face_lat = (ds[n] for n in mesh_defs['face_coordinates'].split()) + face_lon, face_lat = (ds[n] for n in mesh_defs["face_coordinates"].split()) except KeyError: - raise VAlueError('"face_coordinates" must be defined to plot the face numbers') + raise ValueError('"face_coordinates" must be defined to plot the face numbers') for i, point in enumerate(zip(face_lon, face_lat)): - axes.annotate(f"{i}", point, - xytext=(0, 0), - textcoords='offset points', - horizontalalignment='center', - verticalalignment='center', - bbox = {'facecolor': 'white', 'alpha': 1.0, 'boxstyle': "round,pad=0.0", 'ec': 'white'}, - ) + axes.annotate( + f"{i}", + point, + xytext=(0, 0), + textcoords="offset points", + horizontalalignment="center", + verticalalignment="center", + bbox={ + "facecolor": "white", + "alpha": 1.0, + "boxstyle": "round,pad=0.0", + "ec": "white", + }, + ) # plot nodes if nodes: - axes.plot(nodes_lon, nodes_lat, 'o') + axes.plot(nodes_lon, nodes_lat, "o") # plot node numbers if node_numbers: for i, point in enumerate(zip(nodes_lon, nodes_lat)): - axes.annotate(f"{i}", point, - xytext=(2, 2), - textcoords='offset points', - bbox = {'facecolor': 'white', 'alpha': 1.0, 'boxstyle': "round,pad=0.0", 'ec': 'white'}, - ) + axes.annotate( + f"{i}", + point, + xytext=(2, 2), + textcoords="offset points", + bbox={ + "facecolor": "white", + "alpha": 1.0, + "boxstyle": "round,pad=0.0", + "ec": "white", + }, + ) # boundaries -- if they are there. - if 'boundary_node_connectivity' in mesh_defs: - bounds = ds[mesh_defs['boundary_node_connectivity']] + if "boundary_node_connectivity" in mesh_defs: + bounds = ds[mesh_defs["boundary_node_connectivity"]] lines = [] for bound in bounds.data: - line = ((nodes_lon[bound[0]], nodes_lat[bound[0]]), - (nodes_lon[bound[1]], nodes_lat[bound[1]])) + line = ( + (nodes_lon[bound[0]], nodes_lat[bound[0]]), + (nodes_lon[bound[1]], nodes_lat[bound[1]]), + ) lines.append(line) lc = LineCollection(lines, linewidths=2, colors=(1, 0, 0, 1)) axes.add_collection(lc)