-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmultipleLinearRegression.py
82 lines (58 loc) · 2.41 KB
/
multipleLinearRegression.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
" Created by Ecem Balıkçı on 11/21/2020 at 1:56 PM (Contact: [email protected]) "
import numpy as np
import csv
import matplotlib.pyplot as plt
exp_list = np.array([])
salary_list = np.array([])
age_list = np.array([])
one_list = np.array([])
pow_list = np.array([])
with open("team_big.csv") as f:
csv_list = list(csv.reader(f))
for a in csv_list:
if a != csv_list[0]: # to avoid the first row (Player No, Country, Name...)
exp_list = np.append(exp_list, int(a[6]))
salary_list = np.append(salary_list, int(a[8]))
age_list = np.append(age_list, int(a[4]))
pow_list = np.append(pow_list, float(a[7]))
one_list = np.append(one_list, 1)
X = np.column_stack((one_list, age_list, exp_list, pow_list)) # we merge the arrays into a matrice.
xt = np.transpose(X) # the formula is in description
xtx = np.dot(xt, X)
xty = np.dot(xt, salary_list)
xtx_inv = np.linalg.inv(xtx)
beta = np.dot(xtx_inv, xty)
# working alternative to calculate coefficients:
# regression_coefficients = np.linalg.inv(x.T.dot(x)).dot(x.T).dot(y)
y_est = X.dot(beta) # 𝒚̂=𝑿𝜷̂ predictions for salary
u = abs(salary_list-y_est) # |𝒖̂|=|𝒚−𝒚̂| error margins for predictions
def simlin_plot(x, y): # to plot a graph shows the comparison
plt.scatter(x, y, c="c") # scatter() is to plot "o" shaped graph,c= selects the color
plt.xlabel("Estimated Salary Values(y_est)")
plt.ylabel("Errors(u)")
plt.title("Residual Error Plot")
plt.show()
simlin_plot(y_est, u)
def calculateRsquare(a, b): # a=salary_list b=estimated y values(y_est)
avg_a = sum(a)/len(a)
rss = 0
tss = 0
for i in range(len(salary_list)):
rss += np.square(a[i]-b[i])
tss += np.square(a[i]-avg_a)
rsquarescore = 1 - (rss/tss)
print("r^2 score: ", rsquarescore)
return rsquarescore
print("Showing original results:")
r1 = calculateRsquare(salary_list, y_est)
random_list = np.random.randint(-1000, 1000, len(salary_list))
X = np.column_stack((X, random_list))
xt_2 = np.transpose(X)
xtx_2 = np.dot(xt_2, X)
xty_2 = np.dot(xt_2, salary_list)
xtx_inv_2 = np.linalg.inv(xtx_2)
beta_2 = np.dot(xtx_inv_2, xty_2)
y_est_2 = X.dot(beta_2)
u_2 = abs(salary_list-y_est_2)
print("Showing results with an added random column:")
r2 = calculateRsquare(salary_list, y_est_2)