-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
123 lines (92 loc) · 4.15 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
import logging
logging.basicConfig(level = logging.INFO, format=
'%(asctime)s:%(levelname)s:%(name)s:%(threadName)s:line %(lineno)d: %(message)s')
logger = logging.getLogger(__name__)
# Libraries
import sys
import json
import pandas as pd
# import numpy as np
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
# Source
import util
commit_hash = 'dev' #util.save_code()
from util.archiver import get_archiver
import config as c
# ML Libraries
# from keras.models import Model
from torch.nn import Module
# ML Source
from datasets import load
import models
import train
# Global Variables
embeddings_matrix = None
indexer = None
training_set = None
validation_set = None
testing_set = None
@util.idempotent
def load_data(dataset, data_path, embeddings_path):
global embeddings_matrix, indexer, training_set, validation_set, testing_set
all_data = load.get_data(dataset, data_path, embeddings_path)
training_set, validation_set, testing_set, embeddings_matrix, indexer = all_data
def save_model(hyperparams, model, get_filename):
'''
hyperparams : dict of hyper parameters
model : keras Model or pytorch Module
get_filename : a function/or lambda that takes in a filename and retuns saveable path
'''
util.assert_type(hyperparams, dict)
util.assert_type(model, Module)
assert callable(get_filename), 'takes in a filename and retuns saveable path'
with open(get_filename('hyperparameters.json'), 'w') as f:
json.dump(hyperparams, f, sort_keys=True, indent=2)
with open(get_filename('model.json'), 'w') as f:
f.write(model.to_json(indent=2))
stdout = sys.stdout
with open(get_filename('summary.txt'), 'w') as sys.stdout:
if isinstance(model, Module):
sys.stdout.write(str(model))
sys.stdout = stdout
return
def save_history(history, dirpath):
'''
Saves the parameters of training as returned by the history object of keras
Saves the history dataframe, not required since also saved by csvlogger
Plots the metrics required from this data, this depends on the experiment
'''
with open(dirpath+'/training.json', 'w') as f:
json.dump(history.params, f, indent=2)
df = pd.DataFrame.from_dict(history.history)
df.to_csv(dirpath+'/history.csv')
i = df.loc[:, c.monitor].argmax()
for m in c.metrics + ['loss']:
util.plot_metric(df, m, i, dirpath)
return
# main
def run_experiments(finetune, kernel_sizes, filters, lr, pooling, weight_decay, other_params):
global embeddings_matrix, training_set, validation_set
other_params['commit_hash'] = commit_hash
(vocab_size, dimensions) = embeddings_matrix.shape
net = models.Net(
dimensions=dimensions, finetune=finetune, vocab_size=vocab_size,
kernel_sizes=kernel_sizes, filters=filters, dropout_rate = 0.5, pooling=pooling,
lr=lr, weight_decay=weight_decay, embeddings_matrix = embeddings_matrix)
hyperparams = util.fill_dict(net.hyperparameters, other_params)
logger.info('experiment with hyperparameters: {}'.format(json.dumps(hyperparams, sort_keys=True, indent=None)))
with get_archiver(datadir='data/models', suffix="_"+commit_hash[:6]) as a1, get_archiver(datadir='data/results', suffix="_"+commit_hash[:6]) as a:
save_model(hyperparams, net, a.getFilePath)
early_stopping = train.EarlyStopping(c.monitor, c.patience, c.monitor_objective)
model_checkpoint = train.ModelCheckpoint(a1.getFilePath('checkpoint'))
csv_logger = train.CSVLogger(a.getFilePath('logger.csv'))
adam_config = train.AdamConfig(lr=net.hyperparameters['lr'], beta_1=net.hyperparameters['beta_1'],
beta_2=net.hyperparameters['beta_2'], epsilon=net.hyperparameters['epsilon'],
weight_decay=net.hyperparameters['weight_decay'])
history = train.fit(net, training_set, validation_set,
batch_size=c.batch_size, epochs=c.epochs, validation_split=0.2,
callbacks = [early_stopping, model_checkpoint, csv_logger], optimizer=adam_config)
save_history(history, a.getDirPath())
return