-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathaggregare_raw_outputs.py
89 lines (76 loc) · 3.34 KB
/
aggregare_raw_outputs.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
import os
import json
import csv
import subprocess
# Load the configuration from the .sh file
def load_config(file_path):
command = f"bash -c 'source {file_path} && env'"
proc = subprocess.Popen(command, stdout=subprocess.PIPE, shell=True)
env_vars = {}
for line in proc.stdout:
key, _, value = line.decode('utf-8').partition("=")
env_vars[key.strip()] = value.strip()
proc.communicate()
return env_vars
# Load the configuration variables
config_file = './analysis_configuration.sh'
config_vars = load_config(config_file)
# Path variables based on configuration
json_subfolder = config_vars.get('model_name')
annotation_results_folder = config_vars.get('annotation_results_folder')
dataset = config_vars.get('dataset')
clip = config_vars.get('clip')
dimension = config_vars.get('dimension')
criteria_group = config_vars.get('criteria_category')
criteria = config_vars.get('criteria')
version = config_vars.get('prompt_version')
test_number = config_vars.get('test_number')
model_name_with_test_number = f"{json_subfolder}-{test_number}"
# Paths to JSON and CSV folders
json_parent_folder = os.path.join(annotation_results_folder, 'model_raw_output', dataset, dimension, version, model_name_with_test_number, clip)
csv_folder = os.path.join(annotation_results_folder, 'extracted_annotation', dimension, version, model_name_with_test_number)
csv_filename = f'Evaluation_of_{dimension}_with_prompt_version_{version}_test_number_{test_number}_model_{json_subfolder}.csv'
# List all JSON files in the folder
json_files = []
for json_file in os.listdir(json_parent_folder):
if json_file.endswith(".json"):
json_files.append({
"ID": os.path.splitext(json_file)[0], # Save the name of the JSON file as the ID
"clip": config_vars.get('clip'), # Save the current clip variable
})
# Check if there are any JSON files
if json_files:
# Create the CSV file
os.makedirs(csv_folder, exist_ok=True)
csv_file_path = os.path.join(csv_folder, csv_filename)
# Extract all unique fieldnames
all_fieldnames = set()
for json_data in json_files:
with open(os.path.join(json_parent_folder, json_data["ID"] + ".json"), 'r') as jsonfile:
try:
data = json.load(jsonfile)
all_fieldnames.update(data.keys())
except json.JSONDecodeError:
print(f"Empty JSON file: {json_data['ID']}.json")
# Include additional fieldnames
all_fieldnames.update(["ID", "clip"])
# Open CSV file for writing
with open(csv_file_path, 'w', newline='') as csvfile:
writer = csv.DictWriter(csvfile, fieldnames=all_fieldnames, extrasaction='ignore')
# Write header
writer.writeheader()
# Iterate through JSON files
for json_data in json_files:
# Load JSON data
with open(os.path.join(json_parent_folder, json_data["ID"] + ".json"), 'r') as jsonfile:
try:
data = json.load(jsonfile)
except json.JSONDecodeError:
print(f"Empty JSON file: {json_data['ID']}.json")
data = {} # Set empty data if JSON file is empty
# Add ID and clip to the data
data.update(json_data)
# Write data to CSV file
writer.writerow(data)
else:
print("No JSON files found.")