Skip to content

Commit

Permalink
fixed lints except tests
Browse files Browse the repository at this point in the history
  • Loading branch information
LeonardoCastro committed Apr 9, 2024
1 parent 34722f9 commit aaf6111
Show file tree
Hide file tree
Showing 20 changed files with 318 additions and 270 deletions.
2 changes: 1 addition & 1 deletion .flake8
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@
exclude = .git,venv
max-line-length = 100
extend-ignore = E203
per-file-ignores = __init__.py:F401
per-file-ignores = __init__.py:F401
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -166,4 +166,4 @@ cython_debug/
.vscode/settings.json

# pdm
.pdm-python
.pdm-python
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -49,4 +49,4 @@ repos:
entry: pytest
language: system
pass_filenames: false
always_run: true
always_run: true
2 changes: 1 addition & 1 deletion LICENSE
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,4 @@ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
SOFTWARE.
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ $ pip install -e .

## User guide

The cleaning and pre-processing has been wrapped in the function `initialise()`.
The cleaning and pre-processing has been wrapped in the function `initialise()`.

```python
from rama import initialise
Expand All @@ -64,8 +64,8 @@ from rama import initialise_humans
graph, connected_components, graphs_with_humans = initialise_humans(path, psc_filename, companies_filename, string_nature)
```

** Coming soon **
A collection of notebooks can be found in `notebooks/` with a series of quick and simple tutorial on how to analyse the processed data.
** Coming soon **
A collection of notebooks can be found in `notebooks/` with a series of quick and simple tutorial on how to analyse the processed data.

## Contact

Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -62,4 +62,4 @@ dev = [
"pyroma>=4.2",
"pydocstyle>=6.3.0",
"grip>=4.6.2",
]
]
4 changes: 2 additions & 2 deletions src/rama/analysing/differential_evolution.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import random
import networkx as nx

import numpy as np
from tqdm import tqdm

from rama.src.rama.analysing.transfer_money import loss_function
from rama.analysing.transfer_money import loss_function


def make_profit_dict(profit_distribution, nodes_with_profit, nodes_without_profit):
Expand Down
42 changes: 20 additions & 22 deletions src/rama/analysing/swapping.py
Original file line number Diff line number Diff line change
@@ -1,65 +1,60 @@
import random
from collections import deque

import networkx as nx
import numpy as np


#### Restrictions ####
# Restrictions


# First restriction
def no_cycles(subgraph):
def no_cycles(subgraph: nx.DiGraph) -> bool:
"""Function to check that a graph does not have cycles"""
subgraph_undirected = subgraph.to_undirected()
list_cycles = nx.cycle_basis(subgraph_undirected)
return len(list_cycles) == 0


# Second restriction
def connected(subgraph):
def connected(subgraph: nx.DiGraph) -> bool:
"""Function to check that graph stays as a single connected component"""
connected_components = nx.weakly_connected_components(subgraph)
list_ccs = list(connected_components)
return len(list_ccs) == 1


# Third restriction
def no_more_than_two_per(subgraph, limit=2):
def no_more_than_two_per(subgraph: nx.DiGraph, limit: int = 2) -> bool:
"""Function to check that a company does not contain more than two human owners"""
indegrees = [
subgraph.in_degree(node)
for node in subgraph.nodes
if subgraph.nodes[node]["human"]
]
indegrees = np.array(indegrees)
indegrees = np.array(
[subgraph.in_degree(node) for node in subgraph.nodes if subgraph.nodes[node]["human"]]
)
return not any(indegrees > limit)


# Fourth restriction
def only_human_roots(subgraph):
def only_human_roots(subgraph: nx.DiGraph) -> bool:
"""Function to check that a graph only contains human roots"""
is_human = [
subgraph.nodes[node]["human"]
for node in subgraph.nodes
if subgraph.out_degree(node) == 0
subgraph.nodes[node]["human"] for node in subgraph.nodes if subgraph.out_degree(node) == 0
]
return sum(is_human) == len(is_human)


# Fifth restriction
def no_slavery(subgraph):
def no_slavery(subgraph: nx.DiGraph) -> bool:
"""Function to check that a human cannot own share of another human"""
degrees = sum(
subgraph.out_degree(node)
for node in subgraph.nodes
if subgraph.nodes[node]["human"]
subgraph.out_degree(node) for node in subgraph.nodes if subgraph.nodes[node]["human"]
)
return degrees == 0


#### Alterations ####
# Alterations


def one_swap(subgraph, change="origin"):
def one_swap(subgraph: nx.DiGraph, change: str = "origin") -> nx.DiGraph:
"""
Function to swap an edge in a given graph.
By default, we are only changing the source of a link.
Expand Down Expand Up @@ -97,7 +92,7 @@ def one_swap(subgraph, change="origin"):
return subgraph_copy


def check_if_subgraph_passes(subgraph, checks):
def check_if_subgraph_passes(subgraph: nx.DiGraph, checks: deque) -> bool:
"""Function to check if a subgraph passes the given restrictions"""
passed = []
for func in checks:
Expand All @@ -108,7 +103,9 @@ def check_if_subgraph_passes(subgraph, checks):
return sum_ == len(checks)


def get_swapped_subgraph(subgraph, checks, n_tries=100, change="random"):
def get_swapped_subgraph(
subgraph: nx.DiGraph, checks: deque, n_tries: int = 100, change: str = "random"
) -> nx.DiGraph | None:
"""Function that returns a subgraph with an edge swap passing all the checks"""
n_try = 0
passing = False
Expand All @@ -120,3 +117,4 @@ def get_swapped_subgraph(subgraph, checks, n_tries=100, change="random"):
return None
if passing:
return swapped_subgraph
return None
21 changes: 6 additions & 15 deletions src/rama/analysing/transfer_money.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import networkx as nx
import numpy as np


dictionary_taxes = dict(
zip(
Expand Down Expand Up @@ -72,9 +72,7 @@ def give_dividends(graph, node, profits):
wealth = profits[node]

if len(in_hood) != 0:
in_weights = sum(
graph.edges[(neighbour, node)]["weight"] for neighbour in in_hood
)
in_weights = sum(graph.edges[(neighbour, node)]["weight"] for neighbour in in_hood)
else:
in_weights = 0

Expand All @@ -84,12 +82,9 @@ def give_dividends(graph, node, profits):
def recursive_wrapper(graph, profits):
"""Recursive wrapper"""
dummy_dict = {
node: get_dividend_from_neighbours(graph, node, profits)
for node in graph.nodes()
}
return_dict = {
node: give_dividends(graph, node, dummy_dict) for node in graph.nodes()
node: get_dividend_from_neighbours(graph, node, profits) for node in graph.nodes()
}
return_dict = {node: give_dividends(graph, node, dummy_dict) for node in graph.nodes()}
return return_dict


Expand All @@ -99,9 +94,7 @@ def theoretical_wealth(graph, node, profits):

for node2 in list(nx.descendants(graph, node)):
path = nx.shortest_path(graph, node, node2)
local_wealth = compose_function(
taxes, len(path) - 1, profits[node2], human=False
)
local_wealth = compose_function(taxes, len(path) - 1, profits[node2], human=False)
for i in range(len(path) - 1):
local_wealth *= graph.edges[(path[i], path[i + 1])]["weight"]
wealth += local_wealth
Expand All @@ -114,7 +107,5 @@ def theoretical_wealth(graph, node, profits):

def theoretical_wrapper(graph, profits):
"""Theoretical wrapper"""
return_dict = {
node: theoretical_wealth(graph, node, profits) for node in graph.nodes()
}
return_dict = {node: theoretical_wealth(graph, node, profits) for node in graph.nodes()}
return return_dict
4 changes: 2 additions & 2 deletions src/rama/processing/cleaning.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
from typing import Sequence
import pandas as pd

import pandas as pd

from rama.src.rama.processing.helper_functions import fill_company_number
from rama.processing.helper_functions import fill_company_number


def clean_psc(
Expand Down
26 changes: 15 additions & 11 deletions src/rama/processing/helper_functions.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
from typing import Sequence
import os
import pandas as pd
from typing import Sequence

import numpy as np
from tqdm import tqdm
import pandas as pd


def check_dir_exists(path: str) -> None:
Expand All @@ -22,7 +22,7 @@ def check_dir_exists(path: str) -> None:
def get_mutual_company_numbers(psc: pd.DataFrame, companies: pd.DataFrame) -> np.ndarray:
"""Function to match company numbers from PSCs and companies"""
company_numbers_psc = psc.company_number.values
if "CompanyNumer" in companies.columns:
if "CompanyNumber" in companies.columns:
mutual_company_numbers = companies.loc[
companies.CompanyNumber.isin(company_numbers_psc)
].CompanyNumber.values
Expand Down Expand Up @@ -120,6 +120,10 @@ def get_company_company_link(
psc_companies = psc_companies.dropna(subset=["company_name"])

# Fill owned companies that have already been indexed as owners of other companies
if "company_number" not in companies.columns:
companies = companies.rename(columns={"CompanyNumber": "company_number"})
if "company_name" not in companies.columns:
companies = companies.rename(columns={"CompanyName": "company_name"})
names_owned = companies.loc[
companies.company_number.isin(psc_companies.company_number),
["company_number", "company_name"],
Expand All @@ -131,7 +135,7 @@ def get_company_company_link(
.reset_index()
)

### 1 - Index Owners already indexed
# 1 - Index Owners already indexed
already_indexed_owners = small_firstlink[
small_firstlink.company_name.isin(psc_companies.company_name.unique())
][["company_name", "idx_company"]].drop_duplicates()
Expand All @@ -142,7 +146,7 @@ def get_company_company_link(
.reset_index()
)

### 2 - Index Owners not seen before
# 2 - Index Owners not seen before

# do not take into account those that appear in company_names_2
idxs_nan = psc_companies.idx_company.isna()
Expand All @@ -167,8 +171,8 @@ def get_company_company_link(
psc_companies.groupby("company_name")["idx_company"].transform("first"), inplace=True
)

#### Second companies
### 1 - Index Owneds already indexed by company number
# Second companies
# 1 - Index Owneds already indexed by company number
already_indexed_owneds_number = small_firstlink[
small_firstlink.company_number.isin(psc_companies.company_number.unique())
][["company_number", "idx_company"]].drop_duplicates()
Expand All @@ -180,7 +184,7 @@ def get_company_company_link(
.reset_index()
)

### 2 - Index Owneds already indexed in column 1 by name
# 2 - Index Owneds already indexed in column 1 by name
# Fill owned companies that have already been indexed as owners of other companies
unique_names_owned = names_owned.company_name.unique()
min_ = max_
Expand Down Expand Up @@ -218,8 +222,8 @@ def get_list_unique_natures_of_control(psc: pd.DataFrame) -> list:
list_unique_natures = []
for _, list_str in enumerate(natures):
if list_str != "":
l = eval(list_str)
for element in l:
eval_list = eval(list_str)
for element in eval_list:
list_unique_natures.append(element)

list_unique_natures = np.unique(np.array(list_unique_natures))
Expand Down
19 changes: 10 additions & 9 deletions src/rama/processing/initialise_db.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
from typing import Sequence
import pandas as pd

import networkx as nx
import pandas as pd

from rama.src.rama.processing.load_database_pipeline import process_database, get_graph
from rama.src.rama.processing.node_attributes import set_attributes
from rama.src.rama.processing.study_graphs import get_dict_cluster, classify_cluster
from rama.processing.load_database_pipeline import get_graph, process_database
from rama.processing.node_attributes import set_attributes
from rama.processing.study_graphs import classify_cluster, get_dict_cluster


def initialise(
Expand All @@ -31,18 +32,18 @@ def initialise(
merged_firstlink = list_dfs[1]
psc_companies = list_dfs[2]

### Get graph
# Get graph
graph = get_graph(edge_list)

### Set attributes
# Set attributes
set_attributes(graph, merged_firstlink, psc_companies, companies)

### Connected components
# Connected components
connected_components = list(
sorted(nx.weakly_connected_components(graph), key=len, reverse=True)
)

### Set attributes to connected components
# Set attributes to connected components
dict_cluster = {}
for number_of_cluster, set_nodes in enumerate(connected_components):
dict_cluster_unclassified = get_dict_cluster(graph, list(set_nodes))
Expand All @@ -61,7 +62,7 @@ def initialise_humans(
graph, connected_components, dict_cluster = initialise(
path, psc_filenames, companies_filenames, string_ownership
)
### get indices where there are humans
# get indices where there are humans
graphs_with_humans = [
i
for i in range(len(connected_components))
Expand Down
Loading

0 comments on commit aaf6111

Please sign in to comment.