-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathrun_all.sh
46 lines (39 loc) · 3.35 KB
/
run_all.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
# First check the runtime env
python env_check.py
# Generate responses with 10 trails using GPT4 and GPT3.5, saved to responses/
python openai_caller.py suite_v1.yaml gpt-4 --n 10
python openai_caller.py suite_v1.yaml gpt-3.5-turbo --n 10
# Compute scores of best@10, saved to results/
python grader_main.py suite_v1.yaml responses/gpt-4_0.2_0.9_10
python grader_main.py suite_v1.yaml responses/gpt-3.5-turbo_0.2_0.9_10
# Compute result statistics
python print_result_stat.py results/suite_v1_gpt-4_0.2_0.9_10.yaml results/suite_v1_gpt-4_0.2_0.9_10_stats_table.txt --model_name gpt4
python print_result_stat.py results/suite_v1_gpt-3.5-turbo_0.2_0.9_10.yaml results/suite_v1_gpt-3.5-turbo_0.2_0.9_10_stats_table.txt --model_name gpt3.5
# Running part 0
# python openai_caller.py suite_v2_part_0.yaml gpt-4 --n 10
# python openai_caller.py suite_v2_part_0.yaml gpt-3.5-turbo --n 10
python grader_main.py suite_v2_part_0.yaml responses/gpt-4_0.2_0.9_10_suite_v2_part_0
python grader_main.py suite_v2_part_0.yaml responses/gpt-3.5-turbo_0.2_0.9_10_suite_v2_part_0
python print_result_stat.py results/suite_v2_part_0_gpt-4_0.2_0.9_10_suite_v2_part_0.yaml results/suite_v2_part_0_gpt-4_0.2_0.9_10_stats_table.txt --model_name gpt4
python print_result_stat.py results/suite_v2_part_0_gpt-3.5-turbo_0.2_0.9_10_suite_v2_part_0.yaml results/suite_v2_part_0_gpt-3.5-turbo_0.2_0.9_10_stats_table.txt --model_name gpt3.5
# Running part 3
# python openai_caller.py suite_v2_part_3.yaml gpt-4 --n 10
# python openai_caller.py suite_v2_part_3.yaml gpt-3.5-turbo --n 10
python grader_main.py suite_v2_part_3.yaml responses/gpt-4_0.2_0.9_10_suite_v2_part_3
python grader_main.py suite_v2_part_3.yaml responses/gpt-3.5-turbo_0.2_0.9_10_suite_v2_part_3
python print_result_stat.py results/suite_v2_part_3_gpt-4_0.2_0.9_10_suite_v2_part_3.yaml results/suite_v2_part_3_gpt-4_0.2_0.9_10_stats_table.txt --model_name gpt4
python print_result_stat.py results/suite_v2_part_3_gpt-3.5-turbo_0.2_0.9_10_suite_v2_part_3.yaml results/suite_v2_part_3_gpt-3.5-turbo_0.2_0.9_10_stats_table.txt --model_name gpt3.5
# Running part 4
# python openai_caller.py suite_v2_part_4.yaml gpt-4 --n 10
# python openai_caller.py suite_v2_part_4.yaml gpt-3.5-turbo --n 10
python grader_main.py suite_v2_part_4.yaml responses/gpt-4_0.2_0.9_10_suite_v2_part_4
python grader_main.py suite_v2_part_4.yaml responses/gpt-3.5-turbo_0.2_0.9_10_suite_v2_part_4
python print_result_stat.py results/suite_v2_part_4_gpt-4_0.2_0.9_10_suite_v2_part_4.yaml results/suite_v2_part_4_gpt-4_0.2_0.9_10_stats_table.txt --model_name gpt4
python print_result_stat.py results/suite_v2_part_4_gpt-3.5-turbo_0.2_0.9_10_suite_v2_part_4.yaml results/suite_v2_part_4_gpt-3.5-turbo_0.2_0.9_10_stats_table.txt --model_name gpt3.5
# Running part 2
# python openai_caller.py suite_v2_part_2.yaml gpt-4 --n 10
# python openai_caller.py suite_v2_part_2.yaml gpt-3.5-turbo --n 10
python grader_main.py suite_v2_part_2.yaml responses/gpt-4_0.2_0.9_10_suite_v2_part_2
python grader_main.py suite_v2_part_2.yaml responses/gpt-3.5-turbo_0.2_0.9_10_suite_v2_part_2
python print_result_stat.py results/suite_v2_part_2_gpt-4_0.2_0.9_10_suite_v2_part_2.yaml results/suite_v2_part_2_gpt-4_0.2_0.9_10_stats_table.txt --model_name gpt4
python print_result_stat.py results/suite_v2_part_2_gpt-3.5-turbo_0.2_0.9_10_suite_v2_part_2.yaml results/suite_v2_part_2_gpt-3.5-turbo_0.2_0.9_10_stats_table.txt --model_name gpt3.5