-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmain.py
206 lines (162 loc) · 8.16 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
from DataHandling import DataHandling
from TIndicators import TIndicators
from LSTMNetworks import LSTMNetworks1
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
# ------------------------------------Basic Setup-------------------------------------------------- #
# Read the whole dataset
# Please provide the right path to your OHLC Stock price Dataset.
df_intraday_full = pd.read_csv('datasetname.csv', converters={'Time': lambda x: str(x)})
# Change values as per your requirement.
# Set up global variables
batch_size = 30
timesteps_e = 60
timesteps_d = 15
features = 16
from_index = 50
to_index = 54125
test_train_split_index = 54000
# Set up global objects for custom classes
lstm1 = LSTMNetworks1(timesteps=timesteps_e, features=features, batch_size=batch_size)
dh = DataHandling(timesteps_e, timesteps_d, features)
ti = TIndicators()
# Slice the dataframe to required length
df_intraday = df_intraday_full[:to_index]
# Store stock price related information
time = df_intraday['Time']
open_p = df_intraday['Open']
high_p = df_intraday['High']
low_p = df_intraday['Low']
close_p = df_intraday['Close']
volume = df_intraday['Volume']
# ------------------------------------Setting up the data--------------------------------------------- #
# Stock Price Information
time_req = dh.slice_from(time, from_index, 1)
open_p_req = dh.slice_from(open_p, from_index - 1, 1)
high_p_req = dh.slice_from(high_p, from_index - 1, 1)
low_p_req = dh.slice_from(low_p, from_index - 1, 1)
close_p_req = dh.slice_from(close_p, from_index - 1, 1)
volume_req = dh.slice_from(volume, from_index, 1)
#
# Computing Log-Returns on the stock prices data.
open_p_req_lr = dh.slice_from(dh.log_returns(open_p_req), 1, 1)
high_p_req_lr = dh.slice_from(dh.log_returns(high_p_req), 1, 1)
low_p_req_lr = dh.slice_from(dh.log_returns(low_p_req), 1, 1)
close_p_req_lr = dh.slice_from(dh.log_returns(close_p_req), 1, 1)
# Momentum Indicators
so, ss = ti.stochastic_oscillator(high=high_p, low=low_p,
close=close_p, periods=15, ma_periods=3)
so_req = dh.slice_from(so, from_index, 1)
roc_req = dh.slice_from(ti.roc(close_p, 15), from_index, 1)
rsi_req = dh.slice_from(ti.rsi(close_p, periods=15), from_index, 1)
kama_req = dh.slice_from(ti.kama(close_p, periods=15), from_index, 1)
# Trend Indicators
macd_req = dh.slice_from(ti.macd(close_p), from_index, 1)
psar_req = dh.slice_from(ti.psar(high_p, low_p, close_p), from_index, 1)
vortex_req = dh.slice_from(ti.psar(high_p, low_p, close_p), from_index, 1)
cci_req = dh.slice_from(ti.cci(high_p, low_p, close_p), from_index, 1)
adx_req = dh.slice_from(ti.adx(high_p, low_p, close_p, 15), from_index, 1)
# Volume Indicators
acci_req = dh.slice_from(ti.acc_dist_index(high_p, low_p, close_p, volume), from_index, 1)
cmf_req = dh.slice_from(ti.chaikin_money_flow(high_p, low_p, close_p, volume), from_index, 1)
vpt_req = dh.slice_from(ti.volume_price_trend(close_p, volume), from_index, 1)
fi_req = dh.slice_from(ti.force_index(close_p, volume, 15), from_index, 1)
# Volatility Indicators
atr_req = dh.slice_from(ti.atr(high_p, low_p, close_p, 15), from_index, 1)
bbs_req = dh.slice_from(ti.bollinger_bands(close_p), from_index, 6)
# Creating single numpy array for each category
stock_data = np.concatenate([open_p_req_lr, high_p_req_lr, low_p_req_lr, close_p_req_lr], axis=1)
momentum_indicators = np.concatenate([rsi_req, so_req, roc_req, kama_req], axis=1)
trend_indicators = np.concatenate([macd_req, psar_req, vortex_req, cci_req, adx_req], axis=1)
volume_indicators = np.concatenate([acci_req, cmf_req, vpt_req, fi_req], axis=1)
volatility_indicators = np.concatenate([atr_req, bbs_req], axis=1)
# Performing Min-Max Normalization.
stock_data_n = dh.time_normalize(stock_data, for_layer='e')
volume_n = dh.time_normalize(volume_req, for_layer='e')
momentum_indicators_n = dh.time_normalize(momentum_indicators, for_layer='e')
trend_indicators_n = dh.time_normalize(trend_indicators, for_layer='e')
volume_indicators_n = dh.time_normalize(volume_indicators, for_layer='e')
volatility_indicators_n = dh.time_normalize(volatility_indicators, for_layer='e')
# Apply dimensionality reduction via PCA
stock_data_pca = dh.pca(stock_data_n, 3)
momentum_indicators_pca = dh.pca(momentum_indicators_n, 3)
trend_indicators_pca = dh.pca(trend_indicators_n, 4)
volume_indicators_pca = dh.pca(volume_indicators_n, 3)
volatility_indicators_pca = dh.pca(volatility_indicators_n, 5)
# Forming our final dataset
data_req = np.concatenate([volume_n, momentum_indicators_pca, trend_indicators_pca,
volume_indicators_pca, volatility_indicators_pca], axis=1)
data_req_df = pd.DataFrame(data_req)
stock_data_req = np.concatenate([open_p_req_lr, high_p_req_lr, low_p_req_lr, close_p_req_lr], axis=1)
stock_data_n_dec = stock_data_req
stock_data_n_dec_pd = pd.DataFrame(stock_data_n_dec)
# --------------------------- Setting up Training Data -------------------------------------#
# Set up training X
x_train_raw = data_req[:test_train_split_index]
x_train = dh.create_x(x_train_raw)
# Set up training Y
y_raw = stock_data_n_dec
y_raw_n = y_raw[:test_train_split_index]
y_dec_train = dh.create_y(y_raw_n, dim=4)
y_final_n = y_raw[timesteps_d: test_train_split_index + timesteps_d]
y_final = dh.create_y(y_final_n, dim=4)
# --------------------------- Setting up Testing Data -------------------------------------#
# Set up Test Data X
look_back_e = (timesteps_e * batch_size) - timesteps_e
x_test_raw = data_req[(test_train_split_index - look_back_e): -timesteps_d]
# print(x_test_raw.shape)
x_test = dh.create_x(x_test_raw)
look_back_d = (timesteps_e * batch_size) - timesteps_e
y_dec_test_raw = stock_data_n_dec[(test_train_split_index - look_back_d): -timesteps_d]
y_dec_test = dh.create_y(y_dec_test_raw, dim=4)
# # Set up True results for Test X (next 15 window).
# df_test = df_intraday_full.iloc[to_index: to_index+timesteps_e]
# time_test_y = df_test['Time']
# open_p_test_y = df_test['Open']
# high_p_test_y = df_test['High']
# low_p_test_y = df_test['Low']
# close_p_test_y = df_test['Close']
# real_oc = pd.concat([open_p_test_y, high_p_test_y, low_p_test_y, close_p_test_y], axis=1)
# --------------------------------------------LSTM Network Model--------------------------------------- #
# Train the model
lstm1.fit(x_train, y_dec_train, y_final)
# Save the trained model
lstm1.save_trained_model('test-x.h5')
# Call the saved model.
# lstm1.load_model('test-2 .h5')
# Predict with the model
pred = lstm1.predict(x_test, y_dec_test)
# Reformat the predicted data values from 3D array to 2D.
open_pred = dh.slice_from(pred[:, 0], 0, 1)
high_pred = dh.slice_from(pred[:, 1], 0, 1)
low_pred = dh.slice_from(pred[:, 2], 0, 1)
close_pred = dh.slice_from(pred[:, 3], 0, 1)
pred_oc_lr = np.concatenate([open_pred, high_pred, low_pred, close_pred], axis=1)
dh.set_seeds(open_p_req[-timesteps_d:], high_p_req[-timesteps_d:], low_p_req[-timesteps_d:], close_p_req[-timesteps_d:])
pred_final_df = pd.DataFrame(dh.inverse_log_returns(pred_oc_lr))
pred_final_df.columns = ['Open', 'High', 'Low', 'Close']
print(pred_final_df[:5])
real_vals = np.concatenate([open_p_req[-timesteps_d:], high_p_req[-timesteps_d:], low_p_req[-timesteps_d:],
close_p_req[-timesteps_d:]], axis=1)
real_final_df = pd.DataFrame(real_vals)
real_final_df.columns = ['Open', 'High', 'Low', 'Close']
# # --------------------------------------------Check the model--------------------------------------- #
print(real_final_df[:5])
time_slice = time[-timesteps_d:][:5]
# print(time_slice)
plt.clf()
plt.plot(time_slice, pred_final_df['Close'][:5], label="Predicted Closing Value")
plt.plot(time_slice, real_final_df['Close'][:5], label="Real Closing Value")
plt.xlabel('Time')
plt.ylabel('Closing Prices')
plt.title('Comparison between predicted and real prices')
plt.legend()
plt.show()
plt.clf()
length = range(close_p_req_lr.shape[0])
plt.plot(length, close_p_req_lr, label='Log Returned Close Values')
plt.title('Closing Prices - Log Returned')
plt.xlabel('No. of observations')
plt.ylabel('Close Prices - LR')
plt.show()