From 0b33c36d1b1ebea1e0a79ea19b332a73a6741778 Mon Sep 17 00:00:00 2001 From: Pieter Roggemans Date: Sat, 21 Dec 2024 00:24:08 +0100 Subject: [PATCH] MAINT: update ruff (config) + add mypy (#46) --- .pre-commit-config.yaml | 11 ++- benchmarks_IO/benchmarks_pyogrio.py | 9 +- .../benchmarks_dask_geopandas.py | 4 +- benchmarks_zonalstats/__init__.py | 0 .../benchmarks_exactextract.py | 6 +- benchmarks_zonalstats/benchmarks_geowombat.py | 4 +- .../benchmarks_pygeoprocessing.py | 4 +- benchmarks_zonalstats/benchmarks_pyjeo.py | 5 +- benchmarks_zonalstats/benchmarks_pyqgis.py | 4 +- .../benchmarks_rasterstats.py | 4 +- project.toml | 99 ++++++------------- reporter.py | 7 -- testdata.py | 7 +- 13 files changed, 64 insertions(+), 100 deletions(-) create mode 100644 benchmarks_zonalstats/__init__.py diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 94fd07a..d413f90 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -4,7 +4,14 @@ ci: repos: - repo: https://github.com/astral-sh/ruff-pre-commit - rev: "v0.1.13" + rev: "v0.8.2" hooks: + # Format the code - id: ruff-format - - id: ruff \ No newline at end of file + # Lint the code + - id: ruff + # args: [ --fix ] + - repo: https://github.com/pre-commit/mirrors-mypy + rev: "v1.8.0" + hooks: + - id: mypy \ No newline at end of file diff --git a/benchmarks_IO/benchmarks_pyogrio.py b/benchmarks_IO/benchmarks_pyogrio.py index 6842570..deb3136 100644 --- a/benchmarks_IO/benchmarks_pyogrio.py +++ b/benchmarks_IO/benchmarks_pyogrio.py @@ -35,7 +35,7 @@ def _get_version() -> str: class set_env_variables(object): def __init__(self, env_variables_to_set: dict): - self.env_variables_backup = {} + self.env_variables_backup: dict[str, str] = {} self.env_variables_to_set = env_variables_to_set def __enter__(self): @@ -63,7 +63,10 @@ def __exit__(self, type, value, traceback): def write_dataframe(tmp_dir: Path) -> List[RunResult]: # Init results = [] - input_path, _, _ = testdata.TestFile.AGRIPRC_2018.get_file(tmp_dir) + ( + input_path, + _, + ) = testdata.TestFile.AGRIPRC_2018.get_file(tmp_dir) # Go! # Read input files @@ -84,7 +87,7 @@ def write_dataframe(tmp_dir: Path) -> List[RunResult]: sqlite_pragma_combinations_tmp = [] for lengths in range(0, len(sqlite_possible_pragmas) + 1): for subset in itertools.combinations(sqlite_possible_pragmas, lengths): - sqlite_pragma_combinations_tmp.append(subset) + sqlite_pragma_combinations_tmp.append(list(subset)) # Now additionally add some different values for the cache_size pragma sqlite_caches_sizes = { diff --git a/benchmarks_vector_ops/benchmarks_dask_geopandas.py b/benchmarks_vector_ops/benchmarks_dask_geopandas.py index c715052..88abf14 100644 --- a/benchmarks_vector_ops/benchmarks_dask_geopandas.py +++ b/benchmarks_vector_ops/benchmarks_dask_geopandas.py @@ -247,8 +247,8 @@ def dissolve_groupby(tmp_dir: Path) -> RunResult: def join_by_location_intersects(tmp_dir: Path) -> RunResult: # Init- - input1_path = testdata.TestFile.AGRIPRC_2018.get_file(tmp_dir) - input2_path = testdata.TestFile.AGRIPRC_2019.get_file(tmp_dir) + input1_path, _ = testdata.TestFile.AGRIPRC_2018.get_file(tmp_dir) + input2_path, _ = testdata.TestFile.AGRIPRC_2019.get_file(tmp_dir) ### Go! ### # Read input files diff --git a/benchmarks_zonalstats/__init__.py b/benchmarks_zonalstats/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/benchmarks_zonalstats/benchmarks_exactextract.py b/benchmarks_zonalstats/benchmarks_exactextract.py index 19ed2c3..f1c8673 100644 --- a/benchmarks_zonalstats/benchmarks_exactextract.py +++ b/benchmarks_zonalstats/benchmarks_exactextract.py @@ -12,7 +12,7 @@ import rasterio from benchmarker import RunResult -from benchmarks_zonalstats import _common as common +from benchmarks_zonalstats import _common import testdata logger = logging.getLogger(__name__) @@ -33,7 +33,7 @@ def zonalstats_1band(tmp_dir: Path) -> List[RunResult]: raster_path, _ = testdata.TestFile.S2_NDVI_2020.get_file(tmp_dir) # Prepare a sample of the parcels, otherwise to slow - nb_poly = common.nb_polygons_for_test + nb_poly = _common.nb_polygons_for_test vector_gdf = gpd.read_file(vector_path, rows=slice(0, nb_poly)) vector_tmp_path = tmp_dir / "vector_input.gpkg" vector_gdf.to_file(vector_tmp_path) @@ -97,7 +97,7 @@ def zonalstats_3bands(tmp_dir: Path) -> List[RunResult]: raster_path, _ = testdata.TestFile.S2_NDVI_2020.get_file(tmp_dir) # Prepare a sample of the parcels, otherwise to slow - nb_poly = common.nb_polygons_for_test + nb_poly = _common.nb_polygons_for_test vector_gdf = gpd.read_file(vector_path, rows=slice(0, nb_poly)) vector_tmp_path = tmp_dir / "vector_input.gpkg" vector_gdf.to_file(vector_tmp_path) diff --git a/benchmarks_zonalstats/benchmarks_geowombat.py b/benchmarks_zonalstats/benchmarks_geowombat.py index 7fc3817..e32af66 100644 --- a/benchmarks_zonalstats/benchmarks_geowombat.py +++ b/benchmarks_zonalstats/benchmarks_geowombat.py @@ -11,7 +11,7 @@ import geowombat as gw from benchmarker import RunResult -from benchmarks_zonalstats import _common as common +from benchmarks_zonalstats import _common import testdata logger = logging.getLogger(__name__) @@ -32,7 +32,7 @@ def zonalstats_1band(tmp_dir: Path) -> List[RunResult]: raster_path, _ = testdata.TestFile.S2_NDVI_2020.get_file(tmp_dir) # Prepare a sample of the parcels, otherwise to slow - nb_poly = common.nb_polygons_for_test + nb_poly = _common.nb_polygons_for_test vector_gdf = gpd.read_file(vector_path, rows=slice(0, nb_poly)) vector_tmp_path = tmp_dir / "vector_input.gpkg" vector_gdf.to_file(vector_tmp_path) diff --git a/benchmarks_zonalstats/benchmarks_pygeoprocessing.py b/benchmarks_zonalstats/benchmarks_pygeoprocessing.py index 2b4ebc7..4a07c0e 100644 --- a/benchmarks_zonalstats/benchmarks_pygeoprocessing.py +++ b/benchmarks_zonalstats/benchmarks_pygeoprocessing.py @@ -11,7 +11,7 @@ import pygeoprocessing.geoprocessing from benchmarker import RunResult -from benchmarks_zonalstats import _common as common +from benchmarks_zonalstats import _common import testdata logger = logging.getLogger(__name__) @@ -32,7 +32,7 @@ def zonalstats_1band(tmp_dir: Path) -> List[RunResult]: raster_path, _ = testdata.TestFile.S2_NDVI_2020.get_file(tmp_dir) # Prepare a sample of the parcels, otherwise to slow - nb_poly = common.nb_polygons_for_test + nb_poly = _common.nb_polygons_for_test vector_gdf = gpd.read_file(vector_path, rows=slice(0, nb_poly)) vector_tmp_path = tmp_dir / "vector_input.gpkg" vector_gdf.to_file(vector_tmp_path) diff --git a/benchmarks_zonalstats/benchmarks_pyjeo.py b/benchmarks_zonalstats/benchmarks_pyjeo.py index dd57fc1..2494a27 100644 --- a/benchmarks_zonalstats/benchmarks_pyjeo.py +++ b/benchmarks_zonalstats/benchmarks_pyjeo.py @@ -1,6 +1,7 @@ """ Module to benchmark zonalstats. """ + import os from datetime import datetime import logging @@ -12,7 +13,7 @@ import pyjeo as pj from benchmarker import RunResult -from benchmarks_zonalstats import _common as common +from benchmarks_zonalstats import _common import testdata logger = logging.getLogger(__name__) @@ -37,7 +38,7 @@ def zonalstats_1band(tmp_dir: Path) -> List[RunResult]: raster_path, _ = testdata.TestFile.S2_NDVI_2020.get_file(tmp_dir) # Prepare a sample of the parcels, otherwise to slow - nb_poly = common.nb_polygons_for_test + nb_poly = _common.nb_polygons_for_test vector_gdf = gpd.read_file(vector_path, rows=slice(0, nb_poly)) vector_tmp_path = tmp_dir / "vector_input.gpkg" vector_gdf.to_file(vector_tmp_path) diff --git a/benchmarks_zonalstats/benchmarks_pyqgis.py b/benchmarks_zonalstats/benchmarks_pyqgis.py index 822d2bf..4d2d9b2 100644 --- a/benchmarks_zonalstats/benchmarks_pyqgis.py +++ b/benchmarks_zonalstats/benchmarks_pyqgis.py @@ -13,7 +13,7 @@ import qgis.analysis # type: ignore from benchmarker import RunResult -from benchmarks_zonalstats import _common as common +from benchmarks_zonalstats import _common import testdata logger = logging.getLogger(__name__) @@ -34,7 +34,7 @@ def zonalstats_1band(tmp_dir: Path) -> List[RunResult]: raster_path, _ = testdata.TestFile.S2_NDVI_2020.get_file(tmp_dir) # Prepare a sample of the parcels, otherwise to slow - nb_poly = common.nb_polygons_for_test + nb_poly = _common.nb_polygons_for_test vector_gdf = gpd.read_file(vector_path, rows=slice(0, nb_poly)) vector_tmp_path = tmp_dir / "vector_input.gpkg" vector_gdf.to_file(vector_tmp_path) diff --git a/benchmarks_zonalstats/benchmarks_rasterstats.py b/benchmarks_zonalstats/benchmarks_rasterstats.py index 71048d3..11be5ef 100644 --- a/benchmarks_zonalstats/benchmarks_rasterstats.py +++ b/benchmarks_zonalstats/benchmarks_rasterstats.py @@ -11,7 +11,7 @@ import rasterstats from benchmarker import RunResult -from benchmarks_zonalstats import _common as common +from benchmarks_zonalstats import _common import testdata logger = logging.getLogger(__name__) @@ -32,7 +32,7 @@ def zonalstats_1band(tmp_dir: Path) -> List[RunResult]: raster_path, _ = testdata.TestFile.S2_NDVI_2020.get_file(tmp_dir) # Prepare a sample of the parcels, otherwise to slow - nb_poly = common.nb_polygons_for_test + nb_poly = _common.nb_polygons_for_test vector_gdf = gpd.read_file(vector_path, rows=slice(0, nb_poly)) vector_tmp_path = tmp_dir / "vector_input.gpkg" vector_gdf.to_file(vector_tmp_path) diff --git a/project.toml b/project.toml index 169e0fb..700058b 100644 --- a/project.toml +++ b/project.toml @@ -1,22 +1,23 @@ - [tool.ruff] line-length = 88 +target-version = "py39" +extend-exclude = ["docs/*", "local_ignore/*"] + +[tool.ruff.lint] select = [ # pyflakes "F", # pycodestyle "E", "W", - # flake8-2020 - "YTT", + # pyupgrade + "UP", # flake8-bugbear "B", - # flake8-quotes - "Q", # flake8-debugger "T10", - # flake8-gettext - "INT", + # flake8-simplify + # "SIM", # pylint "PLC", "PLE", @@ -24,54 +25,36 @@ select = [ "PLW", # misc lints "PIE", - # flake8-pyi - "PYI", - # tidy imports - "TID", # implicit string concatenation "ISC", # type-checking imports "TCH", # comprehensions "C4", - # pygrep-hooks - "PGH", # Ruff-specific rules "RUF", + # isort + "I", + # pydocstyle + "D", ] -target-version = "py38" -ignore = [ # space before : (needed for how black formats slicing) - # "E203", # not yet implemented + +ignore = [ + ### Intentionally disabled + # module level import not at top of file + "E402", # do not assign a lambda expression, use a def "E731", - # line break before binary operator - # "W503", # not yet implemented - # line break after binary operator - # "W504", # not yet implemented - # controversial + # mutable-argument-default "B006", - # controversial + # unused-loop-control-variable "B007", - # controversial - "B008", - # setattr is used to side-step mypy + # get-attr-with-constant "B009", - # getattr is used to side-step mypy - "B010", - # tests use assert False - "B011", - # tests use comparisons but not their returned value - "B015", - # false positives - "B019", - # Loop control variable overrides iterable it iterates - "B020", - # Function definition does not bind loop variable - "B023", - # Functions defined inside a loop must not use variables redefined in the loop - # "B301", # not yet implemented # Only works with python >=3.10 "B905", + # dict literals + "C408", # Too many arguments to function call "PLR0913", # Too many returns @@ -80,42 +63,20 @@ ignore = [ # space before : (needed for how black formats slicing) "PLR0912", # Too many statements "PLR0915", + # Magic number + "PLR2004", # Redefined loop name "PLW2901", # Global statements are discouraged "PLW0603", - # Docstrings should not be included in stubs - "PYI021", - # No builtin `eval()` allowed - "PGH001", # compare-to-empty-string "PLC1901", - # Use typing_extensions.TypeAlias for type aliases - # "PYI026", # not yet implemented - # Use "collections.abc.*" instead of "typing.*" (PEP 585 syntax) - # "PYI027", # not yet implemented - # while int | float can be shortened to float, the former is more explicit - # "PYI041", # not yet implemented - - # Additional checks that don't pass yet - # Useless statement - "B018", - # Within an except clause, raise exceptions with ... - "B904", - # Magic number - "PLR2004", - # Consider `elif` instead of `else` then `if` to remove indentation level - "PLR5501", - # ambiguous-unicode-character-string - "RUF001", - # ambiguous-unicode-character-docstring - "RUF002", - # ambiguous-unicode-character-comment - "RUF003", # collection-literal-concatenation "RUF005", - # pairwise-over-zipped (>=PY310 only) - "RUF007", - # explicit-f-string-type-conversion - "RUF010", ] + +[tool.ruff.lint.per-file-ignores] +"tests/*" = ["D"] + +[tool.ruff.lint.pydocstyle] +convention = "google" diff --git a/reporter.py b/reporter.py index 441a825..2e1fb0e 100644 --- a/reporter.py +++ b/reporter.py @@ -4,7 +4,6 @@ import ast import math -import os from pathlib import Path import shutil import tempfile @@ -119,7 +118,6 @@ def save_chart( yscale: Optional[Literal["linear", "log", "symlog", "logit"]] = None, y_value_formatter: Optional[str] = None, print_labels_on_points: bool = False, - open_output_file: bool = False, size: Tuple[float, float] = (8, 4), plot_kind: Literal[ "line", @@ -151,7 +149,6 @@ def save_chart( - {0:.2f} for a float with two decimals. Defaults to None. print_labels_on_points (bool, optional): _description_. Defaults to False. - open_output_file (bool, optional): _description_. Defaults to False. size (Tuple[float, float], optional): _description_. Defaults to (8, 4). plot_kind (str, optional): _description_. Defaults to "line". gridlines (str, optional): where to draw grid lines: @@ -268,10 +265,6 @@ def save_chart( plt.close(fig) - # Open if wanted - if open_output_file is True: - os.startfile(output_path) - if __name__ == "__main__": # results_dir = Path(__file__).resolve().parent / "results_vector_ops" diff --git a/testdata.py b/testdata.py index 8a07659..6a30767 100644 --- a/testdata.py +++ b/testdata.py @@ -254,7 +254,7 @@ def _create_complex_poly_points( ) nb_points_extra = int((nb_points - nb_points_created) / 2) nb_points_estimate += nb_points_extra - line_distance_estimate = _estimate_line_distance(nb_points_estimate) + line_distance_estimate = _estimate_line_distance(nb_points_estimate, width) elif nb_points_created > nb_points_max: # Too many points... decrease nb_points_estimate logger.info( @@ -351,7 +351,7 @@ def _create_complex_square_poly( height = width # Vertical lines - for x_offset in range(0, 0 + width, line_distance): + for x_offset in range(0, 0 + int(width), line_distance): lines.append( shapely.LineString( [(xmin + x_offset, ymin), (xmin + x_offset, ymin + height)] @@ -359,7 +359,7 @@ def _create_complex_square_poly( ) # Horizontal lines - for y_offset in range(0, 0 + height, line_distance): + for y_offset in range(0, 0 + int(height), line_distance): lines.append( shapely.LineString( [(xmin, ymin + y_offset), (xmin + width, ymin + y_offset)] @@ -479,7 +479,6 @@ def _determine_number_points(width: float): xmin=0, ymin=0, width=width, - height=width, line_distance=line_distance, max_segment_length=100, )