-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathmain_llama.py
109 lines (92 loc) · 6.15 KB
/
main_llama.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
import argparse
import os
import time
import pandas as pd
import replicate
from utils import create_file_name, create_IO_example, get_additional_instruction, create_explanation, process_response
from reader import read_dataset
def parse_arguments() -> argparse.Namespace:
parser = argparse.ArgumentParser(description="LLM Dataset Evaluation Script for Meta Models.")
parser.add_argument("--datasets", nargs="+", default=["Insurance_Cost", "Admission_Chance", "Used_Car_Prices"], help="List of datasets to evaluate.")
parser.add_argument("--models", nargs="+", default=["meta/meta-llama-3-70b-instruct"], help="List of Replicate models to use.")
parser.add_argument("--in-context-numbers", nargs="+", type=int, default=[0, 10, 30, 100], help="List of in-context example numbers to use.")
parser.add_argument("--feature-nums", nargs="+", type=int, default=[1, 2, 3], help="List of feature numbers to use.")
parser.add_argument("--configs", nargs="+", default=["Named_Features", "Anonymized_Features", "Randomized_Ground_Truth", "Reasoning"], choices=["Named_Features", "Anonymized_Features", "Randomized_Ground_Truth", "Reasoning", "Missing_Inputs", "Missing_Inputs_and_Anonymized_Features"], help="List of prompt configurations to use.")
parser.add_argument("--api-key-token", required=True, help="Replicate AI API token.")
parser.add_argument("--test-sample-num", type=int, default=300, help="Number of test samples to evaluate.")
parser.add_argument("--max-retries", type=int, default=10, help="Number of tries before skipping the instance.")
parser.add_argument("--output-folder", type=str, default="LLM_Results", help="The output folder's name to save the outputs.")
parser.add_argument("--testing-sampling", type=int, default=0, help="A number assigned to the outputs as sampling.")
return parser.parse_args()
def run_replicate_model(model_name: str, prompt: str, max_tokens: int) -> str:
response = replicate.run(
model_name,
input={
"top_p": 0.99,
"prompt": prompt,
"max_tokens": max_tokens,
"min_tokens": 1,
"temperature": 0.1,
"prompt_template": "{prompt}"
},
)
return "".join(response)
def evaluate_dataset(args: argparse.Namespace, dataset: str, model_name: str, in_context: int, feature_num: int, config: str) -> None:
file_name = create_file_name(args.output_folder,dataset, model_name, in_context, feature_num, config, testing_sampling=args.testing_sampling)
print(f"Processing: {file_name}")
names, x_incontext, x_test, y_incontext, y_test = read_dataset(dataset, config)
existing_df = pd.read_csv(file_name) if os.path.exists(file_name) else pd.DataFrame()
additional_instruction = get_additional_instruction(dataset, names[-1]) if in_context == 0 else ""
explanation = create_explanation(names[-1], additional_instruction,("Reasoning" in config))
prompt_messages = f"<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n{explanation}<|eot_id|>"
for x, y in zip(x_incontext[:in_context], y_incontext[:in_context]):
ex_context, ex_output = create_IO_example(dataset, x, y, feature_num, names, config)
prompt_messages += f"<|start_header_id|>user<|end_header_id|>\n\n{ex_context}<|eot_id|>"
prompt_messages += f"<|start_header_id|>assistant<|end_header_id|>\n\n{ex_output}<|eot_id|>"
failures = 0
for num, (x, y) in enumerate(zip(x_test[:args.test_sample_num], y_test[:args.test_sample_num])):
print(num, end=" ")
if num < len(existing_df):
continue
ex_context, _ = create_IO_example(dataset, x, y, feature_num, names, config)
cur_prompt_messages = prompt_messages + f"<|start_header_id|>user<|end_header_id|>\n\n{ex_context}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n"
for _ in range(args.max_retries):
try:
if "Reasoning" in config:
response_text = run_replicate_model(model_name, cur_prompt_messages, 1000)
else:
response_text = run_replicate_model(model_name, cur_prompt_messages, 6)
processed_text = process_response(response_text, "Reasoning" in config)
df = pd.DataFrame([{"raw_text": response_text, "processed_response": processed_text}])
df.to_csv(file_name, mode='a', header=not os.path.exists(file_name), index=False)
break
except Exception as e:
print(f"Error processing response: {e}")
failures += 1
if failures > args.max_failures:
print(f"Exceeded maximum number of failures ({args.max_failures}). Exiting.")
return
time.sleep(20)
else:
print(f"Failed to process response after {args.max_retries} attempts. Skipping and replacing the number with -1.")
df = pd.DataFrame([{"raw_text": response_text, "processed_response": -1.0}])
df.to_csv(file_name, mode='a', header=not os.path.exists(file_name), index=False)
time.sleep(1)
def main():
args = parse_arguments()
os.environ["REPLICATE_API_TOKEN"] = args.api_key_token
# Iterate through all combinations of datasets, models, in-context examples, features, and configurations
for dataset in args.datasets:
for model_name in args.models:
for in_context in args.in_context_numbers:
for feature_num in args.feature_nums:
for config in args.configs:
# Skip certain combinations based on experimental constraints
if (in_context == 0 and not (("Named_Features" in config) or ("Reasoning" in config))) or \
(config == "Reasoning" and in_context > 0) or \
(dataset == "Admission_Chance" and in_context > 101) or \
(feature_num == 4 and dataset != "Used_Car_Prices"):
continue
evaluate_dataset(args, dataset, model_name, in_context, feature_num, config)
if __name__ == "__main__":
main()