-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathanalyse_stock.py
158 lines (120 loc) · 4.8 KB
/
analyse_stock.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
# Code only barely modified from https://www.kaggle.com/raoulma/ny-stock-price-prediction-rnn-lstm-gru/data?select=prices-split-adjusted.csv
import numpy as np
import pandas as pd
import math
import sklearn
import sklearn.preprocessing
import datetime
import os
import matplotlib.pyplot as plt
def main(stock: "The training data file",
show_plots: ("Whether or not to display the plots to the users", 'flag', 's') = False,
only_100_days: ("Only use 100 days worth of stock info for most of the figures", 'flag', '100') = False):
stock_filename = "./data/daily_" + stock + ".csv"
figures_dir = "figures/"
if not os.path.exists(figures_dir):
os.mkdir(figures_dir)
df = pd.read_csv(stock_filename, index_col = 0)
df = df.sort_index(axis=0, ascending=True)
print(df.info())
print(df.head())
print(df.describe())
df_100_days = df.head(100)
curr_datafraome = df
if only_100_days:
curr_dataframe = df_100_days
# Display figures regarding the opening, closing, low, and high per day
plt.figure(figsize=(10, 5))
plt.plot(curr_dataframe.open.values, color='red', label='Open')
plt.plot(curr_dataframe.close.values, color='green', label='Close')
plt.plot(curr_dataframe.low.values, color='blue', label='Low')
plt.plot(curr_dataframe.high.values, color='black', label='High')
plt.title(stock + ' stock price')
plt.xlabel('Time [days]')
plt.ylabel('Price')
plt.legend(loc='best')
plt.savefig(figures_dir + "/" + stock + "_changes.png")
if show_plots:
plt.show()
plt.clf()
# Display figures regarding the volume
plt.plot(curr_dataframe.volume.values, color='black', label='Volume')
plt.title(stock + ' stock volume')
plt.xlabel('Time [days]')
plt.ylabel('Volume')
plt.legend(loc='best')
plt.savefig(figures_dir + "/" + stock + "_volume.png")
if show_plots:
plt.show()
plt.clf()
# function for min-max normalization of stock
def normalize_data(df):
min_max_scaler = sklearn.preprocessing.MinMaxScaler()
df['open'] = min_max_scaler.fit_transform(df.open.values.reshape(-1,1))
df['high'] = min_max_scaler.fit_transform(df.high.values.reshape(-1,1))
df['low'] = min_max_scaler.fit_transform(df.low.values.reshape(-1,1))
df['close'] = min_max_scaler.fit_transform(df['close'].values.reshape(-1,1))
return df
# Copy the stock
df_stock = curr_dataframe.copy()
df_stock.drop(['volume'],1,inplace=True)
cols = list(df_stock.columns.values)
print('df_stock.columns.values = ', cols)
# normalize stock
df_stock_norm = df_stock.copy()
df_stock_norm = normalize_data(df_stock_norm)
# Display figures regarding the normalized price and volume
plt.figure(figsize=(10, 5))
plt.plot(df_stock_norm.open.values, color='red', label='open')
plt.plot(df_stock_norm.close.values, color='green', label='low')
plt.plot(df_stock_norm.low.values, color='blue', label='low')
plt.plot(df_stock_norm.high.values, color='black', label='high')
#plt.plot(df_stock_norm.volume.values, color='gray', label='volume')
plt.title(stock)
plt.xlabel('Time [days]')
plt.ylabel('Normalized price/volume')
plt.legend(loc='best')
plt.savefig(figures_dir + "/" + stock + "_normalized.png")
if show_plots:
plt.show()
plt.clf()
# Display histograms of the stock information using all of the data
df.hist(figsize=(12, 12))
plt.title(stock + ' histograms')
plt.savefig(figures_dir + "/" + stock + "_histograms.png")
if show_plots:
plt.show()
# Display figures regarding the moving average
ma_day = [10, 20, 50]
df_stock_ma = curr_dataframe.copy()
ma_cols = list()
ma_cols.append('close')
for ma in ma_day:
column_name = f"MA for {ma} days"
ma_cols.append(column_name)
df_stock_ma[column_name] = df_stock_ma['close'].rolling(ma).mean()
plt.figure(figsize=(10, 5))
df_stock_ma[ma_cols].plot()
plt.title(stock + ' moving averages')
plt.savefig(figures_dir + "/" + stock + "_moving_average.png")
if show_plots:
plt.show()
plt.clf()
plt.figure(figsize=(10, 5))
df_stock_ma['Daily Return'] = df_stock_ma['close'].pct_change()
df_stock_ma['Daily Return'].plot(legend=True, linestyle='--', marker='o')
plt.title(stock + ' daily return')
plt.savefig(figures_dir + "/" + stock + "_daily_return.png")
if show_plots:
plt.show()
plt.clf()
df.hist(figsize=(12, 12))
df_stock_ma['Daily Return'].hist()
plt.title(stock + ' daily return histogram')
plt.savefig(figures_dir + "/" + stock + "_daily_return_histogram.png")
if show_plots:
plt.show()
plt.clf()
if __name__ == "__main__":
import plac
plac.call(main)