-
Notifications
You must be signed in to change notification settings - Fork 1
/
main.py
67 lines (50 loc) · 2.41 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
# from finetune import finetune
import os
import pandas as pd
from utils import load_model_and_tokenizer as load
from utils import predict, evaluate, load_validation_data
import torch
import gc
import warnings
from finetune import finetune
warnings.filterwarnings('ignore')
def main(test_times=10):
prompt_types = ['good_prompt', 'bad_prompt', 'no_prompt']
for prompt_type in prompt_types:
if not os.path.exists(f"./finetuned_models/{prompt_type}"):
os.makedirs(f"./finetuned_models/{prompt_type}")
finetune(prompt_type)
gc.collect()
torch.cuda.empty_cache()
if not os.path.exists("./results"):
os.makedirs("./results")
for prompt_type in prompt_types:
df = pd.DataFrame(columns=["Test", "Accuracy", "Precision", "Recall", "F1 Score"])
model, tokenizer = load(prompt_type, True)
for i in range(test_times):
X, y_true = load_validation_data(sample_size=100)
y_pred = predict(X, model, tokenizer, prompt_type)
accuracy, precision, recall, f1score, conf_matrix = evaluate(y_true, y_pred)
print(f"Test {i+1}: Accuracy: {accuracy}, Precision: {precision}, Recall: {recall}, F1 Score: {f1score}")
df.loc[len(df)] = {"Test": i+1, "Accuracy": accuracy, "Precision": precision, "Recall": recall, "F1 Score": f1score}
df.to_csv(f"./results/{prompt_type}_fintuned.csv", index=False)
del model
del tokenizer
gc.collect()
torch.cuda.empty_cache()
for prompt_type in prompt_types:
df = pd.DataFrame(columns=["Test", "Accuracy", "Precision", "Recall", "F1 Score"])
model, tokenizer = load(prompt_type, False)
for i in range(test_times):
X, y_true = load_validation_data(sample_size=100)
y_pred = predict(X, model, tokenizer, prompt_type)
accuracy, precision, recall, f1score, conf_matrix = evaluate(y_true, y_pred)
print(f"Test {i+1}: Accuracy: {accuracy}, Precision: {precision}, Recall: {recall}, F1 Score: {f1score}")
df.loc[len(df)] = {"Test": i+1, "Accuracy": accuracy, "Precision": precision, "Recall": recall, "F1 Score": f1score}
df.to_csv(f"./results/{prompt_type}_unfinetuned.csv", index=False)
del model
del tokenizer
gc.collect()
torch.cuda.empty_cache()
if __name__ == "__main__":
main()