From 0b33c36d1b1ebea1e0a79ea19b332a73a6741778 Mon Sep 17 00:00:00 2001
From: Pieter Roggemans <pieter.roggemans@gmail.com>
Date: Sat, 21 Dec 2024 00:24:08 +0100
Subject: [PATCH] MAINT: update ruff (config) + add mypy (#46)

---
 .pre-commit-config.yaml                       | 11 ++-
 benchmarks_IO/benchmarks_pyogrio.py           |  9 +-
 .../benchmarks_dask_geopandas.py              |  4 +-
 benchmarks_zonalstats/__init__.py             |  0
 .../benchmarks_exactextract.py                |  6 +-
 benchmarks_zonalstats/benchmarks_geowombat.py |  4 +-
 .../benchmarks_pygeoprocessing.py             |  4 +-
 benchmarks_zonalstats/benchmarks_pyjeo.py     |  5 +-
 benchmarks_zonalstats/benchmarks_pyqgis.py    |  4 +-
 .../benchmarks_rasterstats.py                 |  4 +-
 project.toml                                  | 99 ++++++-------------
 reporter.py                                   |  7 --
 testdata.py                                   |  7 +-
 13 files changed, 64 insertions(+), 100 deletions(-)
 create mode 100644 benchmarks_zonalstats/__init__.py

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 94fd07a..d413f90 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -4,7 +4,14 @@ ci:
 
 repos:
   - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: "v0.1.13"
+    rev: "v0.8.2"
     hooks:
+      # Format the code
       - id: ruff-format
-      - id: ruff
\ No newline at end of file
+      # Lint the code
+      - id: ruff
+        # args: [ --fix ]
+  - repo: https://github.com/pre-commit/mirrors-mypy
+    rev: "v1.8.0"
+    hooks:
+      - id: mypy
\ No newline at end of file
diff --git a/benchmarks_IO/benchmarks_pyogrio.py b/benchmarks_IO/benchmarks_pyogrio.py
index 6842570..deb3136 100644
--- a/benchmarks_IO/benchmarks_pyogrio.py
+++ b/benchmarks_IO/benchmarks_pyogrio.py
@@ -35,7 +35,7 @@ def _get_version() -> str:
 
 class set_env_variables(object):
     def __init__(self, env_variables_to_set: dict):
-        self.env_variables_backup = {}
+        self.env_variables_backup: dict[str, str] = {}
         self.env_variables_to_set = env_variables_to_set
 
     def __enter__(self):
@@ -63,7 +63,10 @@ def __exit__(self, type, value, traceback):
 def write_dataframe(tmp_dir: Path) -> List[RunResult]:
     # Init
     results = []
-    input_path, _, _ = testdata.TestFile.AGRIPRC_2018.get_file(tmp_dir)
+    (
+        input_path,
+        _,
+    ) = testdata.TestFile.AGRIPRC_2018.get_file(tmp_dir)
 
     # Go!
     # Read input files
@@ -84,7 +87,7 @@ def write_dataframe(tmp_dir: Path) -> List[RunResult]:
     sqlite_pragma_combinations_tmp = []
     for lengths in range(0, len(sqlite_possible_pragmas) + 1):
         for subset in itertools.combinations(sqlite_possible_pragmas, lengths):
-            sqlite_pragma_combinations_tmp.append(subset)
+            sqlite_pragma_combinations_tmp.append(list(subset))
 
     # Now additionally add some different values for the cache_size pragma
     sqlite_caches_sizes = {
diff --git a/benchmarks_vector_ops/benchmarks_dask_geopandas.py b/benchmarks_vector_ops/benchmarks_dask_geopandas.py
index c715052..88abf14 100644
--- a/benchmarks_vector_ops/benchmarks_dask_geopandas.py
+++ b/benchmarks_vector_ops/benchmarks_dask_geopandas.py
@@ -247,8 +247,8 @@ def dissolve_groupby(tmp_dir: Path) -> RunResult:
 
 def join_by_location_intersects(tmp_dir: Path) -> RunResult:
     # Init-
-    input1_path = testdata.TestFile.AGRIPRC_2018.get_file(tmp_dir)
-    input2_path = testdata.TestFile.AGRIPRC_2019.get_file(tmp_dir)
+    input1_path, _ = testdata.TestFile.AGRIPRC_2018.get_file(tmp_dir)
+    input2_path, _ = testdata.TestFile.AGRIPRC_2019.get_file(tmp_dir)
 
     ### Go! ###
     # Read input files
diff --git a/benchmarks_zonalstats/__init__.py b/benchmarks_zonalstats/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/benchmarks_zonalstats/benchmarks_exactextract.py b/benchmarks_zonalstats/benchmarks_exactextract.py
index 19ed2c3..f1c8673 100644
--- a/benchmarks_zonalstats/benchmarks_exactextract.py
+++ b/benchmarks_zonalstats/benchmarks_exactextract.py
@@ -12,7 +12,7 @@
 import rasterio
 
 from benchmarker import RunResult
-from benchmarks_zonalstats import _common as common
+from benchmarks_zonalstats import _common
 import testdata
 
 logger = logging.getLogger(__name__)
@@ -33,7 +33,7 @@ def zonalstats_1band(tmp_dir: Path) -> List[RunResult]:
     raster_path, _ = testdata.TestFile.S2_NDVI_2020.get_file(tmp_dir)
 
     # Prepare a sample of the parcels, otherwise to slow
-    nb_poly = common.nb_polygons_for_test
+    nb_poly = _common.nb_polygons_for_test
     vector_gdf = gpd.read_file(vector_path, rows=slice(0, nb_poly))
     vector_tmp_path = tmp_dir / "vector_input.gpkg"
     vector_gdf.to_file(vector_tmp_path)
@@ -97,7 +97,7 @@ def zonalstats_3bands(tmp_dir: Path) -> List[RunResult]:
     raster_path, _ = testdata.TestFile.S2_NDVI_2020.get_file(tmp_dir)
 
     # Prepare a sample of the parcels, otherwise to slow
-    nb_poly = common.nb_polygons_for_test
+    nb_poly = _common.nb_polygons_for_test
     vector_gdf = gpd.read_file(vector_path, rows=slice(0, nb_poly))
     vector_tmp_path = tmp_dir / "vector_input.gpkg"
     vector_gdf.to_file(vector_tmp_path)
diff --git a/benchmarks_zonalstats/benchmarks_geowombat.py b/benchmarks_zonalstats/benchmarks_geowombat.py
index 7fc3817..e32af66 100644
--- a/benchmarks_zonalstats/benchmarks_geowombat.py
+++ b/benchmarks_zonalstats/benchmarks_geowombat.py
@@ -11,7 +11,7 @@
 import geowombat as gw
 
 from benchmarker import RunResult
-from benchmarks_zonalstats import _common as common
+from benchmarks_zonalstats import _common
 import testdata
 
 logger = logging.getLogger(__name__)
@@ -32,7 +32,7 @@ def zonalstats_1band(tmp_dir: Path) -> List[RunResult]:
     raster_path, _ = testdata.TestFile.S2_NDVI_2020.get_file(tmp_dir)
 
     # Prepare a sample of the parcels, otherwise to slow
-    nb_poly = common.nb_polygons_for_test
+    nb_poly = _common.nb_polygons_for_test
     vector_gdf = gpd.read_file(vector_path, rows=slice(0, nb_poly))
     vector_tmp_path = tmp_dir / "vector_input.gpkg"
     vector_gdf.to_file(vector_tmp_path)
diff --git a/benchmarks_zonalstats/benchmarks_pygeoprocessing.py b/benchmarks_zonalstats/benchmarks_pygeoprocessing.py
index 2b4ebc7..4a07c0e 100644
--- a/benchmarks_zonalstats/benchmarks_pygeoprocessing.py
+++ b/benchmarks_zonalstats/benchmarks_pygeoprocessing.py
@@ -11,7 +11,7 @@
 import pygeoprocessing.geoprocessing
 
 from benchmarker import RunResult
-from benchmarks_zonalstats import _common as common
+from benchmarks_zonalstats import _common
 import testdata
 
 logger = logging.getLogger(__name__)
@@ -32,7 +32,7 @@ def zonalstats_1band(tmp_dir: Path) -> List[RunResult]:
     raster_path, _ = testdata.TestFile.S2_NDVI_2020.get_file(tmp_dir)
 
     # Prepare a sample of the parcels, otherwise to slow
-    nb_poly = common.nb_polygons_for_test
+    nb_poly = _common.nb_polygons_for_test
     vector_gdf = gpd.read_file(vector_path, rows=slice(0, nb_poly))
     vector_tmp_path = tmp_dir / "vector_input.gpkg"
     vector_gdf.to_file(vector_tmp_path)
diff --git a/benchmarks_zonalstats/benchmarks_pyjeo.py b/benchmarks_zonalstats/benchmarks_pyjeo.py
index dd57fc1..2494a27 100644
--- a/benchmarks_zonalstats/benchmarks_pyjeo.py
+++ b/benchmarks_zonalstats/benchmarks_pyjeo.py
@@ -1,6 +1,7 @@
 """
 Module to benchmark zonalstats.
 """
+
 import os
 from datetime import datetime
 import logging
@@ -12,7 +13,7 @@
 import pyjeo as pj
 
 from benchmarker import RunResult
-from benchmarks_zonalstats import _common as common
+from benchmarks_zonalstats import _common
 import testdata
 
 logger = logging.getLogger(__name__)
@@ -37,7 +38,7 @@ def zonalstats_1band(tmp_dir: Path) -> List[RunResult]:
     raster_path, _ = testdata.TestFile.S2_NDVI_2020.get_file(tmp_dir)
 
     # Prepare a sample of the parcels, otherwise to slow
-    nb_poly = common.nb_polygons_for_test
+    nb_poly = _common.nb_polygons_for_test
     vector_gdf = gpd.read_file(vector_path, rows=slice(0, nb_poly))
     vector_tmp_path = tmp_dir / "vector_input.gpkg"
     vector_gdf.to_file(vector_tmp_path)
diff --git a/benchmarks_zonalstats/benchmarks_pyqgis.py b/benchmarks_zonalstats/benchmarks_pyqgis.py
index 822d2bf..4d2d9b2 100644
--- a/benchmarks_zonalstats/benchmarks_pyqgis.py
+++ b/benchmarks_zonalstats/benchmarks_pyqgis.py
@@ -13,7 +13,7 @@
 import qgis.analysis  # type: ignore
 
 from benchmarker import RunResult
-from benchmarks_zonalstats import _common as common
+from benchmarks_zonalstats import _common
 import testdata
 
 logger = logging.getLogger(__name__)
@@ -34,7 +34,7 @@ def zonalstats_1band(tmp_dir: Path) -> List[RunResult]:
     raster_path, _ = testdata.TestFile.S2_NDVI_2020.get_file(tmp_dir)
 
     # Prepare a sample of the parcels, otherwise to slow
-    nb_poly = common.nb_polygons_for_test
+    nb_poly = _common.nb_polygons_for_test
     vector_gdf = gpd.read_file(vector_path, rows=slice(0, nb_poly))
     vector_tmp_path = tmp_dir / "vector_input.gpkg"
     vector_gdf.to_file(vector_tmp_path)
diff --git a/benchmarks_zonalstats/benchmarks_rasterstats.py b/benchmarks_zonalstats/benchmarks_rasterstats.py
index 71048d3..11be5ef 100644
--- a/benchmarks_zonalstats/benchmarks_rasterstats.py
+++ b/benchmarks_zonalstats/benchmarks_rasterstats.py
@@ -11,7 +11,7 @@
 import rasterstats
 
 from benchmarker import RunResult
-from benchmarks_zonalstats import _common as common
+from benchmarks_zonalstats import _common
 import testdata
 
 logger = logging.getLogger(__name__)
@@ -32,7 +32,7 @@ def zonalstats_1band(tmp_dir: Path) -> List[RunResult]:
     raster_path, _ = testdata.TestFile.S2_NDVI_2020.get_file(tmp_dir)
 
     # Prepare a sample of the parcels, otherwise to slow
-    nb_poly = common.nb_polygons_for_test
+    nb_poly = _common.nb_polygons_for_test
     vector_gdf = gpd.read_file(vector_path, rows=slice(0, nb_poly))
     vector_tmp_path = tmp_dir / "vector_input.gpkg"
     vector_gdf.to_file(vector_tmp_path)
diff --git a/project.toml b/project.toml
index 169e0fb..700058b 100644
--- a/project.toml
+++ b/project.toml
@@ -1,22 +1,23 @@
-
 [tool.ruff]
 line-length = 88
+target-version = "py39"
+extend-exclude = ["docs/*", "local_ignore/*"]
+
+[tool.ruff.lint]
 select = [
     # pyflakes
     "F",
     # pycodestyle
     "E",
     "W",
-    # flake8-2020
-    "YTT",
+    # pyupgrade
+    "UP",
     # flake8-bugbear
     "B",
-    # flake8-quotes
-    "Q",
     # flake8-debugger
     "T10",
-    # flake8-gettext
-    "INT",
+    # flake8-simplify
+    # "SIM",
     # pylint
     "PLC",
     "PLE",
@@ -24,54 +25,36 @@ select = [
     "PLW",
     # misc lints
     "PIE",
-    # flake8-pyi
-    "PYI",
-    # tidy imports
-    "TID",
     # implicit string concatenation
     "ISC",
     # type-checking imports
     "TCH",
     # comprehensions
     "C4",
-    # pygrep-hooks
-    "PGH",
     # Ruff-specific rules
     "RUF",
+    # isort
+    "I",
+    # pydocstyle
+    "D",
 ]
-target-version = "py38"
-ignore = [ # space before : (needed for how black formats slicing)
-    # "E203",  # not yet implemented
+
+ignore = [
+    ### Intentionally disabled
+    # module level import not at top of file
+    "E402",
     # do not assign a lambda expression, use a def
     "E731",
-    # line break before binary operator
-    # "W503",  # not yet implemented
-    # line break after binary operator
-    # "W504",  # not yet implemented
-    # controversial
+    # mutable-argument-default
     "B006",
-    # controversial
+    # unused-loop-control-variable
     "B007",
-    # controversial
-    "B008",
-    # setattr is used to side-step mypy
+    # get-attr-with-constant
     "B009",
-    # getattr is used to side-step mypy
-    "B010",
-    # tests use assert False
-    "B011",
-    # tests use comparisons but not their returned value
-    "B015",
-    # false positives
-    "B019",
-    # Loop control variable overrides iterable it iterates
-    "B020",
-    # Function definition does not bind loop variable
-    "B023",
-    # Functions defined inside a loop must not use variables redefined in the loop
-    # "B301",  # not yet implemented
     # Only works with python >=3.10
     "B905",
+    # dict literals
+    "C408",
     # Too many arguments to function call
     "PLR0913",
     # Too many returns
@@ -80,42 +63,20 @@ ignore = [ # space before : (needed for how black formats slicing)
     "PLR0912",
     # Too many statements
     "PLR0915",
+    # Magic number
+    "PLR2004",
     # Redefined loop name
     "PLW2901",
     # Global statements are discouraged
     "PLW0603",
-    # Docstrings should not be included in stubs
-    "PYI021",
-    # No builtin `eval()` allowed
-    "PGH001",
     # compare-to-empty-string
     "PLC1901",
-    # Use typing_extensions.TypeAlias for type aliases
-    # "PYI026",  # not yet implemented
-    # Use "collections.abc.*" instead of "typing.*" (PEP 585 syntax)
-    # "PYI027",  # not yet implemented
-    # while int | float can be shortened to float, the former is more explicit
-    # "PYI041",  # not yet implemented
-
-    # Additional checks that don't pass yet
-    # Useless statement
-    "B018",
-    # Within an except clause, raise exceptions with ...
-    "B904",
-    # Magic number
-    "PLR2004",
-    # Consider `elif` instead of `else` then `if` to remove indentation level
-    "PLR5501",
-    # ambiguous-unicode-character-string
-    "RUF001",
-    # ambiguous-unicode-character-docstring
-    "RUF002",
-    # ambiguous-unicode-character-comment
-    "RUF003",
     # collection-literal-concatenation
     "RUF005",
-    # pairwise-over-zipped (>=PY310 only)
-    "RUF007",
-    # explicit-f-string-type-conversion
-    "RUF010",
 ]
+
+[tool.ruff.lint.per-file-ignores]
+"tests/*" = ["D"]
+
+[tool.ruff.lint.pydocstyle]
+convention = "google"
diff --git a/reporter.py b/reporter.py
index 441a825..2e1fb0e 100644
--- a/reporter.py
+++ b/reporter.py
@@ -4,7 +4,6 @@
 
 import ast
 import math
-import os
 from pathlib import Path
 import shutil
 import tempfile
@@ -119,7 +118,6 @@ def save_chart(
     yscale: Optional[Literal["linear", "log", "symlog", "logit"]] = None,
     y_value_formatter: Optional[str] = None,
     print_labels_on_points: bool = False,
-    open_output_file: bool = False,
     size: Tuple[float, float] = (8, 4),
     plot_kind: Literal[
         "line",
@@ -151,7 +149,6 @@ def save_chart(
               - {0:.2f} for a float with two decimals.
             Defaults to None.
         print_labels_on_points (bool, optional): _description_. Defaults to False.
-        open_output_file (bool, optional): _description_. Defaults to False.
         size (Tuple[float, float], optional): _description_. Defaults to (8, 4).
         plot_kind (str, optional): _description_. Defaults to "line".
         gridlines (str, optional): where to draw grid lines:
@@ -268,10 +265,6 @@ def save_chart(
 
     plt.close(fig)
 
-    # Open if wanted
-    if open_output_file is True:
-        os.startfile(output_path)
-
 
 if __name__ == "__main__":
     # results_dir = Path(__file__).resolve().parent / "results_vector_ops"
diff --git a/testdata.py b/testdata.py
index 8a07659..6a30767 100644
--- a/testdata.py
+++ b/testdata.py
@@ -254,7 +254,7 @@ def _create_complex_poly_points(
             )
             nb_points_extra = int((nb_points - nb_points_created) / 2)
             nb_points_estimate += nb_points_extra
-            line_distance_estimate = _estimate_line_distance(nb_points_estimate)
+            line_distance_estimate = _estimate_line_distance(nb_points_estimate, width)
         elif nb_points_created > nb_points_max:
             # Too many points... decrease nb_points_estimate
             logger.info(
@@ -351,7 +351,7 @@ def _create_complex_square_poly(
     height = width
 
     # Vertical lines
-    for x_offset in range(0, 0 + width, line_distance):
+    for x_offset in range(0, 0 + int(width), line_distance):
         lines.append(
             shapely.LineString(
                 [(xmin + x_offset, ymin), (xmin + x_offset, ymin + height)]
@@ -359,7 +359,7 @@ def _create_complex_square_poly(
         )
 
     # Horizontal lines
-    for y_offset in range(0, 0 + height, line_distance):
+    for y_offset in range(0, 0 + int(height), line_distance):
         lines.append(
             shapely.LineString(
                 [(xmin, ymin + y_offset), (xmin + width, ymin + y_offset)]
@@ -479,7 +479,6 @@ def _determine_number_points(width: float):
             xmin=0,
             ymin=0,
             width=width,
-            height=width,
             line_distance=line_distance,
             max_segment_length=100,
         )