fixed lints except tests

LeonardoCastro · Apr 9, 2024 · aaf6111 · aaf6111
1 parent 34722f9
commit aaf6111
Show file tree

Hide file tree

Showing 20 changed files with 318 additions and 270 deletions.
diff --git a/.flake8 b/.flake8
@@ -2,4 +2,4 @@
 exclude = .git,venv
 max-line-length = 100
 extend-ignore = E203
-per-file-ignores = __init__.py:F401
+per-file-ignores = __init__.py:F401
diff --git a/.gitignore b/.gitignore
@@ -166,4 +166,4 @@ cython_debug/
 .vscode/settings.json
 
 # pdm
-.pdm-python
+.pdm-python
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -49,4 +49,4 @@ repos:
         entry: pytest
         language: system
         pass_filenames: false
-        always_run: true
+        always_run: true
diff --git a/LICENSE b/LICENSE
@@ -16,4 +16,4 @@ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
+SOFTWARE.
diff --git a/README.md b/README.md
@@ -42,7 +42,7 @@ $ pip install -e .
 
 ## User guide
 
-The cleaning and pre-processing has been wrapped in the function `initialise()`. 
+The cleaning and pre-processing has been wrapped in the function `initialise()`.
 
 ```python
 from rama import initialise
@@ -64,8 +64,8 @@ from rama import initialise_humans
 graph, connected_components, graphs_with_humans = initialise_humans(path, psc_filename, companies_filename, string_nature)
 ```
 
-** Coming soon ** 
-A collection of notebooks can be found in `notebooks/` with a series of quick and simple tutorial on how to analyse the processed data. 
+** Coming soon **
+A collection of notebooks can be found in `notebooks/` with a series of quick and simple tutorial on how to analyse the processed data.
 
 ## Contact
 

diff --git a/pyproject.toml b/pyproject.toml
@@ -62,4 +62,4 @@ dev = [
     "pyroma>=4.2",
     "pydocstyle>=6.3.0",
     "grip>=4.6.2",
-]
+]
diff --git a/src/rama/analysing/differential_evolution.py b/src/rama/analysing/differential_evolution.py
@@ -1,9 +1,9 @@
 import random
-import networkx as nx
+
 import numpy as np
 from tqdm import tqdm
 
-from rama.src.rama.analysing.transfer_money import loss_function
+from rama.analysing.transfer_money import loss_function
 
 
 def make_profit_dict(profit_distribution, nodes_with_profit, nodes_without_profit):

diff --git a/src/rama/analysing/swapping.py b/src/rama/analysing/swapping.py
@@ -1,65 +1,60 @@
 import random
+from collections import deque
+
 import networkx as nx
 import numpy as np
 
 
-#### Restrictions ####
+# Restrictions
 
 
 # First restriction
-def no_cycles(subgraph):
+def no_cycles(subgraph: nx.DiGraph) -> bool:
     """Function to check that a graph does not have cycles"""
     subgraph_undirected = subgraph.to_undirected()
     list_cycles = nx.cycle_basis(subgraph_undirected)
     return len(list_cycles) == 0
 
 
 # Second restriction
-def connected(subgraph):
+def connected(subgraph: nx.DiGraph) -> bool:
     """Function to check that graph stays as a single connected component"""
     connected_components = nx.weakly_connected_components(subgraph)
     list_ccs = list(connected_components)
     return len(list_ccs) == 1
 
 
 # Third restriction
-def no_more_than_two_per(subgraph, limit=2):
+def no_more_than_two_per(subgraph: nx.DiGraph, limit: int = 2) -> bool:
     """Function to check that a company does not contain more than two human owners"""
-    indegrees = [
-        subgraph.in_degree(node)
-        for node in subgraph.nodes
-        if subgraph.nodes[node]["human"]
-    ]
-    indegrees = np.array(indegrees)
+    indegrees = np.array(
+        [subgraph.in_degree(node) for node in subgraph.nodes if subgraph.nodes[node]["human"]]
+    )
     return not any(indegrees > limit)
 
 
 # Fourth restriction
-def only_human_roots(subgraph):
+def only_human_roots(subgraph: nx.DiGraph) -> bool:
     """Function to check that a graph only contains human roots"""
     is_human = [
-        subgraph.nodes[node]["human"]
-        for node in subgraph.nodes
-        if subgraph.out_degree(node) == 0
+        subgraph.nodes[node]["human"] for node in subgraph.nodes if subgraph.out_degree(node) == 0
     ]
     return sum(is_human) == len(is_human)
 
 
 # Fifth restriction
-def no_slavery(subgraph):
+def no_slavery(subgraph: nx.DiGraph) -> bool:
     """Function to check that a human cannot own share of another human"""
     degrees = sum(
-        subgraph.out_degree(node)
-        for node in subgraph.nodes
-        if subgraph.nodes[node]["human"]
+        subgraph.out_degree(node) for node in subgraph.nodes if subgraph.nodes[node]["human"]
     )
     return degrees == 0
 
 
-#### Alterations ####
+# Alterations
 
 
-def one_swap(subgraph, change="origin"):
+def one_swap(subgraph: nx.DiGraph, change: str = "origin") -> nx.DiGraph:
     """
     Function to swap an edge in a given graph.
     By default, we are only changing the source of a link.
@@ -97,7 +92,7 @@ def one_swap(subgraph, change="origin"):
     return subgraph_copy
 
 
-def check_if_subgraph_passes(subgraph, checks):
+def check_if_subgraph_passes(subgraph: nx.DiGraph, checks: deque) -> bool:
     """Function to check if a subgraph passes the given restrictions"""
     passed = []
     for func in checks:
@@ -108,7 +103,9 @@ def check_if_subgraph_passes(subgraph, checks):
     return sum_ == len(checks)
 
 
-def get_swapped_subgraph(subgraph, checks, n_tries=100, change="random"):
+def get_swapped_subgraph(
+    subgraph: nx.DiGraph, checks: deque, n_tries: int = 100, change: str = "random"
+) -> nx.DiGraph | None:
     """Function that returns a subgraph with an edge swap passing all the checks"""
     n_try = 0
     passing = False
@@ -120,3 +117,4 @@ def get_swapped_subgraph(subgraph, checks, n_tries=100, change="random"):
         return None
     if passing:
         return swapped_subgraph
+    return None
diff --git a/src/rama/analysing/transfer_money.py b/src/rama/analysing/transfer_money.py
@@ -1,5 +1,5 @@
 import networkx as nx
-import numpy as np
+
 
 dictionary_taxes = dict(
     zip(
@@ -72,9 +72,7 @@ def give_dividends(graph, node, profits):
     wealth = profits[node]
 
     if len(in_hood) != 0:
-        in_weights = sum(
-            graph.edges[(neighbour, node)]["weight"] for neighbour in in_hood
-        )
+        in_weights = sum(graph.edges[(neighbour, node)]["weight"] for neighbour in in_hood)
     else:
         in_weights = 0
 
@@ -84,12 +82,9 @@ def give_dividends(graph, node, profits):
 def recursive_wrapper(graph, profits):
     """Recursive wrapper"""
     dummy_dict = {
-        node: get_dividend_from_neighbours(graph, node, profits)
-        for node in graph.nodes()
-    }
-    return_dict = {
-        node: give_dividends(graph, node, dummy_dict) for node in graph.nodes()
+        node: get_dividend_from_neighbours(graph, node, profits) for node in graph.nodes()
     }
+    return_dict = {node: give_dividends(graph, node, dummy_dict) for node in graph.nodes()}
     return return_dict
 
 
@@ -99,9 +94,7 @@ def theoretical_wealth(graph, node, profits):
 
     for node2 in list(nx.descendants(graph, node)):
         path = nx.shortest_path(graph, node, node2)
-        local_wealth = compose_function(
-            taxes, len(path) - 1, profits[node2], human=False
-        )
+        local_wealth = compose_function(taxes, len(path) - 1, profits[node2], human=False)
         for i in range(len(path) - 1):
             local_wealth *= graph.edges[(path[i], path[i + 1])]["weight"]
         wealth += local_wealth
@@ -114,7 +107,5 @@ def theoretical_wealth(graph, node, profits):
 
 def theoretical_wrapper(graph, profits):
     """Theoretical wrapper"""
-    return_dict = {
-        node: theoretical_wealth(graph, node, profits) for node in graph.nodes()
-    }
+    return_dict = {node: theoretical_wealth(graph, node, profits) for node in graph.nodes()}
     return return_dict
diff --git a/src/rama/processing/cleaning.py b/src/rama/processing/cleaning.py
@@ -1,8 +1,8 @@
 from typing import Sequence
-import pandas as pd
 
+import pandas as pd
 
-from rama.src.rama.processing.helper_functions import fill_company_number
+from rama.processing.helper_functions import fill_company_number
 
 
 def clean_psc(

diff --git a/src/rama/processing/helper_functions.py b/src/rama/processing/helper_functions.py
@@ -1,8 +1,8 @@
-from typing import Sequence
 import os
-import pandas as pd
+from typing import Sequence
+
 import numpy as np
-from tqdm import tqdm
+import pandas as pd
 
 
 def check_dir_exists(path: str) -> None:
@@ -22,7 +22,7 @@ def check_dir_exists(path: str) -> None:
 def get_mutual_company_numbers(psc: pd.DataFrame, companies: pd.DataFrame) -> np.ndarray:
     """Function to match company numbers from PSCs and companies"""
     company_numbers_psc = psc.company_number.values
-    if "CompanyNumer" in companies.columns:
+    if "CompanyNumber" in companies.columns:
         mutual_company_numbers = companies.loc[
             companies.CompanyNumber.isin(company_numbers_psc)
         ].CompanyNumber.values
@@ -120,6 +120,10 @@ def get_company_company_link(
     psc_companies = psc_companies.dropna(subset=["company_name"])
 
     # Fill owned companies that have already been indexed as owners of other companies
+    if "company_number" not in companies.columns:
+        companies = companies.rename(columns={"CompanyNumber": "company_number"})
+    if "company_name" not in companies.columns:
+        companies = companies.rename(columns={"CompanyName": "company_name"})
     names_owned = companies.loc[
         companies.company_number.isin(psc_companies.company_number),
         ["company_number", "company_name"],
@@ -131,7 +135,7 @@ def get_company_company_link(
         .reset_index()
     )
 
-    ### 1 - Index Owners already indexed
+    # 1 - Index Owners already indexed
     already_indexed_owners = small_firstlink[
         small_firstlink.company_name.isin(psc_companies.company_name.unique())
     ][["company_name", "idx_company"]].drop_duplicates()
@@ -142,7 +146,7 @@ def get_company_company_link(
         .reset_index()
     )
 
-    ### 2 - Index Owners not seen before
+    # 2 - Index Owners not seen before
 
     # do not take into account those that appear in company_names_2
     idxs_nan = psc_companies.idx_company.isna()
@@ -167,8 +171,8 @@ def get_company_company_link(
         psc_companies.groupby("company_name")["idx_company"].transform("first"), inplace=True
     )
 
-    #### Second companies
-    ### 1 - Index Owneds already indexed by company number
+    # Second companies
+    # 1 - Index Owneds already indexed by company number
     already_indexed_owneds_number = small_firstlink[
         small_firstlink.company_number.isin(psc_companies.company_number.unique())
     ][["company_number", "idx_company"]].drop_duplicates()
@@ -180,7 +184,7 @@ def get_company_company_link(
         .reset_index()
     )
 
-    ### 2 - Index Owneds already indexed in column 1 by name
+    # 2 - Index Owneds already indexed in column 1 by name
     # Fill owned companies that have already been indexed as owners of other companies
     unique_names_owned = names_owned.company_name.unique()
     min_ = max_
@@ -218,8 +222,8 @@ def get_list_unique_natures_of_control(psc: pd.DataFrame) -> list:
     list_unique_natures = []
     for _, list_str in enumerate(natures):
         if list_str != "":
-            l = eval(list_str)
-            for element in l:
+            eval_list = eval(list_str)
+            for element in eval_list:
                 list_unique_natures.append(element)
 
     list_unique_natures = np.unique(np.array(list_unique_natures))

diff --git a/src/rama/processing/initialise_db.py b/src/rama/processing/initialise_db.py
@@ -1,10 +1,11 @@
 from typing import Sequence
-import pandas as pd
+
 import networkx as nx
+import pandas as pd
 
-from rama.src.rama.processing.load_database_pipeline import process_database, get_graph
-from rama.src.rama.processing.node_attributes import set_attributes
-from rama.src.rama.processing.study_graphs import get_dict_cluster, classify_cluster
+from rama.processing.load_database_pipeline import get_graph, process_database
+from rama.processing.node_attributes import set_attributes
+from rama.processing.study_graphs import classify_cluster, get_dict_cluster
 
 
 def initialise(
@@ -31,18 +32,18 @@ def initialise(
     merged_firstlink = list_dfs[1]
     psc_companies = list_dfs[2]
 
-    ### Get graph
+    # Get graph
     graph = get_graph(edge_list)
 
-    ### Set attributes
+    # Set attributes
     set_attributes(graph, merged_firstlink, psc_companies, companies)
 
-    ### Connected components
+    # Connected components
     connected_components = list(
         sorted(nx.weakly_connected_components(graph), key=len, reverse=True)
     )
 
-    ### Set attributes to connected components
+    # Set attributes to connected components
     dict_cluster = {}
     for number_of_cluster, set_nodes in enumerate(connected_components):
         dict_cluster_unclassified = get_dict_cluster(graph, list(set_nodes))
@@ -61,7 +62,7 @@ def initialise_humans(
     graph, connected_components, dict_cluster = initialise(
         path, psc_filenames, companies_filenames, string_ownership
     )
-    ### get indices where there are humans
+    # get indices where there are humans
     graphs_with_humans = [
         i
         for i in range(len(connected_components))