forked from epfl-ada-2018/Project
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathprocessing_indicators.py
106 lines (79 loc) · 4.97 KB
/
processing_indicators.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import json
import seaborn as sns
# This function is used to read all the data needed for ploting the indicator data
def read_eveything(indicator_path, indicator=True):
parties = pd.read_csv("data/country_party_dataset.csv", index_col=0)
positions = pd.read_csv("data/positions_scale.csv")
positions_sorted = ['far-left', 'left-wing to far-left', 'left-wing', 'centre-left to left-wing', 'centre-left', \
'centre to centre-left', 'centre', 'syncretic', 'big tent', 'centre to centre-right', \
'centre-right', 'centre-right to right-wing', 'right-wing', 'right-wing to far-right', \
'far-right']
parties = pd.merge(parties, positions, left_on=["Political_position"], right_on=["Position"])
parties["weighted_seats_last"] = (parties["Seats %_last"]/100)*parties["Scale"]
parties["weighted_seats_previous"] = (parties["Seats %_previous"]/100)*parties["Scale"]
parties["weighted_votes_last"] = (parties["Votes %_last"]/100)*parties["Scale"]
parties["weighted_votes_previous"] = (parties["Votes %_previous"]/100)*parties["Scale"]
parties = parties.groupby("Country").sum()
parties["difference_seats"] = parties["weighted_seats_last"] - parties["weighted_seats_previous"]
parties["difference_votes"] = parties["weighted_votes_last"] - parties["weighted_votes_previous"]
parties = parties.reset_index()
election_years = pd.read_csv('data/election_years.csv')
# because the indicators don't contain data for 2018
election_years["prev_el_year"] = election_years["prev_el_year"].apply(lambda x: x-1 if x == 2018 else x)
election_years["last_el_year"] = election_years["last_el_year"].apply(lambda x: x-1 if x == 2018 else x)
if indicator:
indicator = pd.read_csv(indicator_path, header=2, sep=',')
else:
indicator = pd.read_csv(indicator_path, sep=',')
# The indicators don't contain data for 2007, but we don't need it
indicator["2007"] = indicator["2008"]
indicator["Country Name"] = indicator["Country Name"].replace("Slovak Republic", "Slovakia")
# countries and features of interest
countries = ['Austria','Belgium','Bulgaria','Croatia','Cyprus','Czech Republic','Denmark',
'Estonia','Finland','France','Germany','Greece','Hungary','Ireland','Italy','Latvia','Lithuania',
'Luxembourg','Malta','Netherlands','Poland','Portugal','Romania','Slovakia','Slovenia','Spain',
'Sweden','United Kingdom','Norway','Iceland','Switzerland']
columns = ['Country Name','Country Code','2007','2008','2009','2010','2011','2012','2013','2014','2015','2016','2017']
# extract only countries of interest
indicator = indicator.loc[indicator['Country Name'].isin(countries)]
# reset index
indicator.reset_index(inplace=True)
# extract only features of interest
indicator = indicator[columns]
indicator = indicator.drop(['Country Code'], axis=1)
indicator = pd.melt(indicator, id_vars=['Country Name'])
indicator["variable"] = indicator["variable"].astype(int)
indicator["value"] = indicator["value"].astype(float)
elections_indicator = pd.merge(election_years, indicator, left_on=['Country', 'prev_el_year'], right_on=['Country Name', 'variable'])
elections_indicator = elections_indicator.rename(columns={"value": "value_previous"})
elections_indicator = pd.merge(elections_indicator, indicator, left_on=['Country', 'last_el_year'], right_on=['Country Name', 'variable'])
elections_indicator = elections_indicator.rename(columns={"value": "value_last"})
elections_indicator = elections_indicator.drop(["variable_x", "variable_y", "Country Name_x", "Country Name_y"], axis=1)
elections_indicator["difference_value"] = elections_indicator['value_last'] - elections_indicator['value_previous']
return elections_indicator, parties
# This function plots a regression plot for visualizing the indicator data
def plot_everything(elections_indicator, parties, column_x, column_y, hide_germany=False, only_right=False, params={}):
plt.style.use('seaborn-poster')
merged = pd.merge(elections_indicator, parties)
if hide_germany:
merged = merged[merged['Country'] != 'Germany']
#merged = merged[merged['Country'] != 'France']
if only_right:
merged = merged[merged[column_x] > 0]
plt.subplots(figsize=(10,6))
sns.regplot(x=column_x, y=column_y, data=merged)
if "x_label" in params.keys():
plt.xlabel(params["x_label"])
if "y_label" in params.keys():
plt.ylabel(params["y_label"])
if "title" in params.keys():
plt.title(params["title"])
if "save_2" in params.keys():
plt.savefig(params["save_2"], bbox_inches="tight", dpi=200)
plt.show()
merged = merged[[column_x, column_y]]
print(merged.corr('spearman'))
print("\nData for %s countires" % merged.dropna().shape[0])