Ref/with flake8 (#81)

* try to fix flake8 error * reformat with black * reformat with black * rerun isort * fix pylint issue
CorrelAid · Mar 10, 2024 · 85ccd1c · 85ccd1c
1 parent 40dd43c
commit 85ccd1c
Show file tree

Hide file tree

Showing 17 changed files with 54 additions and 155 deletions.
diff --git a/.flake8 b/.flake8
@@ -5,8 +5,7 @@ exclude = ".git,__pycache__,docs,build,dist,.venv,.github"
 filename = "*py"
 ignore = "E4,E501"
 max-complexity = 10
-max-line-length = 80
-require-plugins = "flake8-docstrings"
+max-line-length = 120
 show-source = true
 statistics = true
 tee = true
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -15,7 +15,7 @@ repos:
     hooks:
       - id: isort
         name: isort (python)
-        args: ["--profile", "black", "--filter-files", "--line-length", "80"]
+        args: ["--profile", "black", "--filter-files", "--line-length", "120"]
   - repo: https://github.com/psf/black
     rev: 23.10.0
     hooks:

diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -20,11 +20,7 @@
     "Marco Hübner <[email protected]>",
 ]
 maintainers = ["Michael Aydinbas <[email protected]>"]
-release = (
-    subprocess.check_output(["poetry", "version"], text=True)
-    .strip()
-    .split()[-1]
-)
+release = subprocess.check_output(["poetry", "version"], text=True).strip().split()[-1]
 version = release
 
 

diff --git a/nb/presentation.py b/nb/presentation.py
@@ -213,15 +213,13 @@
 # ### set proper column types
 
 # %%
-students.data["Kreise und kreisfreie Städte_Code"] = students.data[
-    "Kreise und kreisfreie Städte_Code"
-].astype(str)
+students.data["Kreise und kreisfreie Städte_Code"] = students.data["Kreise und kreisfreie Städte_Code"].astype(str)
 students.data["Kreise und kreisfreie Städte_Code"]
 
 # %%
-students.data["Kreise und kreisfreie Städte_Code"] = students.data[
-    "Kreise und kreisfreie Städte_Code"
-].apply(lambda x: "0" + x if len(x) <= 1 else x)
+students.data["Kreise und kreisfreie Städte_Code"] = students.data["Kreise und kreisfreie Städte_Code"].apply(
+    lambda x: "0" + x if len(x) <= 1 else x
+)
 students.data["Kreise und kreisfreie Städte_Code"]
 
 # %% [markdown]
@@ -236,8 +234,7 @@
 # %%
 ratio_international = (
     students.data[
-        (students.data.Geschlecht == "Insgesamt")
-        & (students.data["Fächergruppe (mit Insgesamt)"] == "Insgesamt")
+        (students.data.Geschlecht == "Insgesamt") & (students.data["Fächergruppe (mit Insgesamt)"] == "Insgesamt")
     ]
     .groupby(
         by=[
@@ -251,10 +248,7 @@
 ratio_international.rename("ratio_international", inplace=True)
 
 ratio_international = pd.DataFrame(ratio_international)
-ratio_international["year"] = [
-    int(semester[3:7])
-    for semester in ratio_international.index.get_level_values(2)
-]
+ratio_international["year"] = [int(semester[3:7]) for semester in ratio_international.index.get_level_values(2)]
 
 ratio_international
 
@@ -277,12 +271,8 @@
     "  Berlin",
 ]:
     plt.plot(
-        ratio_international[
-            ratio_international.index.get_level_values(0) == region
-        ].year,
-        ratio_international[
-            ratio_international.index.get_level_values(0) == region
-        ].ratio_international,
+        ratio_international[ratio_international.index.get_level_values(0) == region].year,
+        ratio_international[ratio_international.index.get_level_values(0) == region].ratio_international,
         label=region,
     )
 plt.legend()

diff --git a/pyproject.toml b/pyproject.toml
@@ -64,7 +64,7 @@ sphinx-rtd-theme = "^2.0.0"
 urllib3 = {extras = ["appengine"], version = "^2.2.1"}
 
 [tool.black]
-line-length = 80
+line-length = 120
 target-version = ['py310']
 
 [build-system]
@@ -77,4 +77,4 @@ log_cli = false
 
 [tool.isort]
 profile = "black"
-line_length = 80
+line_length = 120
diff --git a/src/pystatis/cache.py b/src/pystatis/cache.py
@@ -177,9 +177,7 @@ def clear_cache(name: Optional[str] = None) -> None:
 
     # remove specified file (directory) from the data cache
     # or clear complete cache (remove childs, preserve base)
-    file_paths = (
-        [cache_dir / name] if name is not None else list(cache_dir.iterdir())
-    )
+    file_paths = [cache_dir / name] if name is not None else list(cache_dir.iterdir())
 
     for file_path in file_paths:
         # delete if file or symlink, otherwise remove complete tree

diff --git a/src/pystatis/config.py b/src/pystatis/config.py
@@ -18,6 +18,7 @@
     Subsequent calls to other `pystatis` functions will throw an error
     until the user has filled in the credentials.
 """
+
 import logging
 import os
 import re
@@ -104,8 +105,10 @@ def load_config(config_file: Path | None = None) -> ConfigParser:
 
     if not successful_reads:
         logger.critical(
-            "Error while loading the config file. Could not find %s. "
-            "Please make sure to run init_config() first. ",
+            (
+                "Error while loading the config file. Could not find %s. "
+                "Please make sure to run init_config() first. "
+            ),
             config_file,
         )
 

diff --git a/src/pystatis/find.py b/src/pystatis/find.py
@@ -31,9 +31,7 @@ class Find:
 
     # pylint: disable=too-many-instance-attributes
 
-    def __init__(
-        self, query: str, db_name: str, top_n_preview: int = 5
-    ) -> None:
+    def __init__(self, query: str, db_name: str, top_n_preview: int = 5) -> None:
         """Method for retrieving data from find endpoint.
 
         Args:

diff --git a/src/pystatis/http_helper.py b/src/pystatis/http_helper.py
@@ -8,12 +8,7 @@
 import requests
 
 from pystatis import config, db
-from pystatis.cache import (
-    cache_data,
-    hit_in_cash,
-    normalize_name,
-    read_from_cache,
-)
+from pystatis.cache import cache_data, hit_in_cash, normalize_name, read_from_cache
 from pystatis.exception import DestatisStatusError, PystatisConfigError
 
 logger = logging.getLogger(__name__)
@@ -54,9 +49,7 @@ def load_data(
             data = read_from_cache(cache_dir, name, params)
         else:
             response = get_data_from_endpoint(endpoint, method, params, db_name)
-            content_type = response.headers.get(
-                "Content-Type", "text/csv"
-            ).split("/")[-1]
+            content_type = response.headers.get("Content-Type", "text/csv").split("/")[-1]
             data = response.content
 
             # status code 98 means that the table is too big
@@ -85,9 +78,7 @@ def load_data(
     return data
 
 
-def get_data_from_endpoint(
-    endpoint: str, method: str, params: dict, db_name: str | None = None
-) -> requests.Response:
+def get_data_from_endpoint(endpoint: str, method: str, params: dict, db_name: str | None = None) -> requests.Response:
     """
     Wrapper method which constructs a url for querying data from Destatis and
     sends a GET request.
@@ -171,9 +162,7 @@ def start_job(endpoint: str, method: str, params: dict) -> requests.Response:
     params["job"] = "true"
 
     # starting a job
-    response = get_data_from_endpoint(
-        endpoint=endpoint, method=method, params=params
-    )
+    response = get_data_from_endpoint(endpoint=endpoint, method=method, params=params)
 
     return response
 
@@ -221,9 +210,7 @@ def get_data_from_resultfile(job_id: str, db_name: str | None = None) -> bytes:
     time_ = time.perf_counter()
 
     while (time.perf_counter() - time_) < JOB_TIMEOUT:
-        response = get_data_from_endpoint(
-            endpoint="catalogue", method="jobs", params=params, db_name=db_name
-        )
+        response = get_data_from_endpoint(endpoint="catalogue", method="jobs", params=params, db_name=db_name)
 
         jobs = response.json().get("List")
         if len(jobs) > 0 and jobs[0].get("State") == "Fertig":
@@ -240,9 +227,7 @@ def get_data_from_resultfile(job_id: str, db_name: str | None = None) -> bytes:
         "compress": "false",
         "format": "ffcsv",
     }
-    response = get_data_from_endpoint(
-        endpoint="data", method="resultfile", params=params, db_name=db_name
-    )
+    response = get_data_from_endpoint(endpoint="data", method="resultfile", params=params, db_name=db_name)
     assert isinstance(response.content, bytes)  # nosec assert_used
     return response.content
 
@@ -266,9 +251,7 @@ def _check_invalid_status_code(response: requests.Response) -> None:
         content = body.get("Content")
         code = body.get("Code")
         logger.error("Error Code: %s. Content: %s.", code, content)
-        raise requests.exceptions.HTTPError(
-            f"The server returned a {response.status_code} status code."
-        )
+        raise requests.exceptions.HTTPError(f"The server returned a {response.status_code} status code.")
 
 
 def _check_invalid_destatis_status_code(response: requests.Response) -> None:
@@ -332,13 +315,9 @@ def _check_destatis_status(destatis_status: dict) -> None:
             raise DestatisStatusError(destatis_status_content)
 
     # output warnings to user
-    elif (destatis_status_code == 22) or (
-        destatis_status_type in warning_en_de
-    ):
+    elif (destatis_status_code == 22) or (destatis_status_type in warning_en_de):
         logger.warning(destatis_status_content)
 
     # output information to user
     elif destatis_status_type.lower() == "information":
-        logger.info(
-            "Code %d: %s", destatis_status_code, destatis_status_content
-        )
+        logger.info("Code %d: %s", destatis_status_code, destatis_status_content)
diff --git a/src/pystatis/profile.py b/src/pystatis/profile.py
@@ -26,9 +26,7 @@ def change_password(db_name: str, new_password: str) -> str:
     }
 
     # change remote password
-    response_text = load_data(
-        endpoint="profile", method="password", params=params, db_name=db_name
-    )
+    response_text = load_data(endpoint="profile", method="password", params=params, db_name=db_name)
     # change local password
     db.set_db_pw(db_name, new_password)
 
@@ -52,8 +50,6 @@ def remove_result(name: str, area: str = "all") -> str:
     params = {"name": name, "area": area, "language": "de"}
 
     # remove 'Ergebnistabelle' with previously defined parameters
-    response_text = load_data(
-        endpoint="profile", method="removeresult", params=params
-    )
+    response_text = load_data(endpoint="profile", method="removeresult", params=params)
 
     return cast(str, response_text)
diff --git a/src/pystatis/results.py b/src/pystatis/results.py
@@ -22,9 +22,7 @@ class Results:
         get_metadata(): Gets metadata based on the index of the object.
     """
 
-    def __init__(
-        self, result: pd.DataFrame, category: str, db_name: str
-    ) -> None:
+    def __init__(self, result: pd.DataFrame, category: str, db_name: str) -> None:
         """
         Class that contains the results of a find query.
 
@@ -93,17 +91,10 @@ def get_metadata(self, row_numbers: list) -> None:
                         structure_dict["Head"]["Content"],
                         f"{'-' * 20}",
                         "Columns:",
-                        "\n".join(
-                            [
-                                col["Content"]
-                                for col in structure_dict["Columns"]
-                            ]
-                        ),
+                        "\n".join([col["Content"] for col in structure_dict["Columns"]]),
                         f"{'-' * 20}",
                         "Rows:",
-                        "\n".join(
-                            [row["Content"] for row in structure_dict["Rows"]]
-                        ),
+                        "\n".join([row["Content"] for row in structure_dict["Rows"]]),
                         f"{'-' * 40}",
                     ]
                 )
@@ -118,9 +109,7 @@ def get_metadata(self, row_numbers: list) -> None:
                         response["Object"]["Content"],
                         f"{'-' * 20}",
                         "Content:",
-                        "\n".join(
-                            [content["Content"] for content in axis_dict]
-                        ),
+                        "\n".join([content["Content"] for content in axis_dict]),
                         f"{'-' * 40}",
                     ]
                 )
@@ -136,10 +125,7 @@ def get_metadata(self, row_numbers: list) -> None:
                         f"{'-' * 20}",
                         "Content:",
                         "\n".join(
-                            [
-                                f"{structure_dict[content]} {content}"
-                                for content in ["Cubes", "Variables", "Updated"]
-                            ]
+                            [f"{structure_dict[content]} {content}" for content in ["Cubes", "Variables", "Updated"]]
                         ),
                         f"{'-' * 40}",
                     ]

diff --git a/src/pystatis/table.py b/src/pystatis/table.py
@@ -81,22 +81,16 @@ def get_data(
             "format": "ffcsv",
         }
 
-        raw_data_bytes = load_data(
-            endpoint="data", method="tablefile", params=params
-        )
+        raw_data_bytes = load_data(endpoint="data", method="tablefile", params=params)
         assert isinstance(raw_data_bytes, bytes)  # nosec assert_used
         raw_data_str = raw_data_bytes.decode("utf-8-sig")
 
         self.raw_data = raw_data_str
         data_buffer = StringIO(raw_data_str)
-        self.data = pd.read_csv(
-            data_buffer, sep=";", na_values=["...", ".", "-", "/", "x"]
-        )
+        self.data = pd.read_csv(data_buffer, sep=";", na_values=["...", ".", "-", "/", "x"])
 
         if prettify:
-            self.data = self.prettify_table(
-                self.data, db.identify_db(self.name)[0]
-            )
+            self.data = self.prettify_table(self.data, db.identify_db(self.name)[0])
 
         metadata = load_data(endpoint="metadata", method="table", params=params)
         metadata = json.loads(metadata)
@@ -158,13 +152,9 @@ def parse_zensus_table(data: pd.DataFrame) -> pd.DataFrame:
         # Extracts new column names from first values of the Merkmal_Label columns
         # and assigns these to the relevant attribute columns (Auspraegung_Label)
         attributes = data.filter(like="variable_attribute_label")
-        attributes.columns = (
-            data.filter(regex=r"\d+_variable_label").iloc[0].tolist()
-        )
+        attributes.columns = data.filter(regex=r"\d+_variable_label").iloc[0].tolist()
 
-        values = pd.DataFrame(
-            {data["value_variable_label"].iloc[0]: data["value"]}
-        )
+        values = pd.DataFrame({data["value_variable_label"].iloc[0]: data["value"]})
 
         pretty_data = pd.concat([time, attributes, values], axis=1)
         return pretty_data

diff --git a/tests/test_cache.py b/tests/test_cache.py
@@ -5,14 +5,7 @@
 import pytest
 
 from pystatis import config
-from pystatis.cache import (
-    _build_file_path,
-    cache_data,
-    clear_cache,
-    hit_in_cash,
-    normalize_name,
-    read_from_cache,
-)
+from pystatis.cache import _build_file_path, cache_data, clear_cache, hit_in_cash, normalize_name, read_from_cache
 
 
 @pytest.fixture()