-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathrun.py
75 lines (55 loc) · 2.37 KB
/
run.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
from transformers import BertForSequenceClassification, AdamW, BertConfig
from transformers import get_linear_schedule_with_warmup
from transformers import AutoTokenizer
from sklearn.model_selection import train_test_split
import pandas as pd
import time
import datetime
import random
import numpy as np
import csv
import torch
from torch import nn
from torch.nn import functional as F
from torch.utils.data import RandomSampler, DataLoader, Subset
from torch.utils.data import TensorDataset, random_split, SequentialSampler
from tqdm import tqdm
from preprocessing_bert import *
from helpers_bert import *
from models_bert import *
from train_bert import *
from helpers import *
def main():
'''
Run.py load our the model that
'''
### PATHS
PATH_DATA = './data/'
PATH_PREPROCESSING = PATH_DATA + 'preprocessing/'
# replace this with the path where you save the model file, ideally in a subdirectory
# of where you placed run.py
# model_1 is our orevrall best submission using Bertforsequenceclassification
# corresponds to submission #169220 on ai-crowd
path_model_1 = PATH_DATA+'models/BERT/best_submission_bert.pkl'
# model_2 is our second best submission using a modified Bertforsequenceclassification
path_model_2 = PATH_DATA+'models/BERT/best_submission_bert_custom.pkl'
path_test_data = PATH_DATA + 'twitter-datasets/test_data.txt'
path_submission = PATH_DATA + 'submissions/output_run_py.csv'
###
# progress bars
tqdm.pandas()
# setup the device : gpu if available otherwise cpu
device = gpu_cpu_setup()
# load the model with the trained parameters from disk
# if loading model_1 (our BEST model) use this line :
model = load_model_disk(device, path_model_1, model_name = 'BertForSequenceClassification')
# if loading model_2 ( our second best model) use this line :
# model = load_model_disk(device, path_model_2, model_name = 'BertWithCustomClassifier')
# load the test data, tokenize it and put into a dataloader
test_dataloader = load_test_data(path_test_data, max_len=140)
# make the prediction on the test data using the loaded model
y_pred, ids = make_prediction(model, test_dataloader,device)
# create the submission csv file
create_csv_submission(ids, y_pred, path_submission )
if __name__ == '__main__':
main()