-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathconvert_results.py
52 lines (41 loc) · 1.93 KB
/
convert_results.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
import csv
import sys
import os
import pandas as pd
def main(PATH: str):
df = pd.read_csv(os.path.join(PATH, "test_pred.csv"))
indices = pd.read_csv(os.path.join(PATH, "test_indices.txt"), header=None, names=["index"])
df["problem_id"] = indices["index"].apply(lambda text: int(text))
data = df.to_dict('records')
converted_data = []
current_index = int(data[0]["problem_id"])
current_data_point = data[0]
for row in data[1:]:
index = int(row["problem_id"])
if index != current_index:
converted_data.append(current_data_point)
current_data_point = row
current_index = index
else:
current_data_point["Question"] += "\n" + row["Question"]
current_data_point["Ground Truth"] += row["Ground Truth"]
new_questions = row["Prediction"].split("\n")
existing_questions = current_data_point["Prediction"].split("\n")
for new_question in new_questions:
if new_question not in existing_questions:
existing_questions.append(new_question)
existing_questions = [question for question in existing_questions if len(question) > 0]
current_data_point["Prediction"] = "\n".join(existing_questions)
converted_data.append(current_data_point)
PATH_CONVERTED_RESULTS = f"{os.path.join(PATH)}/converted_test_pred.csv"
with open(PATH_CONVERTED_RESULTS, "w") as pred_file:
write = csv.writer(pred_file)
write.writerow(["Question", "Ground Truth", "Prediction"])
for row in converted_data:
write.writerow([row["Question"], row["Ground Truth"], row["Prediction"]])
print(f"File saved at :{PATH_CONVERTED_RESULTS}")
return PATH_CONVERTED_RESULTS
if __name__ == '__main__':
# path to predictions and newline delimited file where each row refers to the index of algebra story problem
PATH = sys.argv[1]
main(PATH)