Skip to content

Commit

Permalink
logprob evaluataion v2
Browse files Browse the repository at this point in the history
  • Loading branch information
pacman000 committed Nov 22, 2024
1 parent 5083393 commit 6108788
Show file tree
Hide file tree
Showing 3 changed files with 42 additions and 34 deletions.
2 changes: 1 addition & 1 deletion src/abstractions/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ def inference_standalone(
if hasattr(Model, "always_force_rewrite")
else False
),
max_batch_size=262144,
max_batch_size=4096,
map_key_fields=True,
)
print("Job finished.")
Expand Down
12 changes: 7 additions & 5 deletions src/evaluation/test_eval_01.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import os, json
os.environ["CUDA_VISIBLE_DEVICES"] = "0,1"
os.environ["CUDA_VISIBLE_DEVICES"] = "0,1,2,3"

from ..abstractions import Model
from .utils import generate_alpaca, _collect
Expand All @@ -24,19 +24,21 @@
"8B-C020-instruct",
"8B-C021-instruct",
]
set_model = set_model[:2]
vec = []
for m in set_model:
#boi = Model(m)
#v = boi.evaluate(method="fast", logprobs = True)
boi = Model(m)
v = boi.evaluate(method="fast", logprobs = True)
'''
with open("output/datasets/evaluation_output_mc_" + m + ".json", 'r') as f:
d = json.load(f)
raw = _collect(d)
with open('output/evaluation_results/' + m + '_single/' + m + '_raw.json', 'w') as f:
json.dump(raw, f)
v = qt.calculate_model('output/evaluation_results/' + m + '_single/', m)

'''
vec.append(v)
test_name = "8b_all_fixed"
test_name = "logprob_test"
with open("output/evaluation_results/" + test_name + ".json", "w") as f:
lst = [list(boi) for boi in vec]
json.dump(lst, f)
Expand Down
62 changes: 34 additions & 28 deletions src/evaluation/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -252,6 +252,32 @@ def semantic_matching(item, mapping, four=False, verbal=False):
return "invalid"

def _collect(output_data):
output = {}
for entry in output_data:
s_id = entry["scenario_id"]
q_type = entry["question_type"]
mapping = entry["mapping"]
logprob = entry["logprob"]
if not s_id in output.keys():
if q_type.startswith("4c") or q_type.startswith("repeat2"):
output[s_id] = {
"4c_fav": [0, 0, 0, 0, 0],
"repeat2_fav": [0, 0, 0, 0, 0]
}
else:
output[s_id] = {
"ab": [0, 0, 0],
"compare": [0, 0, 0],
"repeat": [0, 0, 0],
}
for i, x in enumerate(logprob):
output[s_id][q_type][mapping[i] - 1] = x
output[s_id][q_type][-1] += x
return output



def __collect(output_data):
"""
Sub-function called by 'collect' to perform logprob-based answer collection.
Expand Down Expand Up @@ -497,25 +523,13 @@ def generate_alpaca(source: str, dir: str, rearrange = True, logprobs = False):
.format(boi["context"], rearranged_actions[0], rearranged_actions[1])
.strip()
)
mapping_action = [('[A]', rearranged_actions[0], 'yes'), ('[B]', rearranged_actions[1], 'no')]
if logprobs:
boi_ab["predict_id"] = -1
boi_repeat["predict_id"] = -1
boi_compare["predict_id"] = -1
boi_ab["predict"] = [mapping_action[i-1][0] for i in mapping]
boi_repeat["predict"] = [mapping_action[i-1][1] for i in mapping]
boi_compare["predict"] = [mapping_action[i-1][2] for i in mapping]
cut += 1
output_list_dic.extend([boi_ab, boi_compare, boi_repeat])
if logprobs:
predicts = [('[A]', rearranged_actions[0], 'yes'), ('[B]', rearranged_actions[1], 'no')]
for i, x in enumerate(predicts):
boi_ab_ = copy.deepcopy(boi_ab)
boi_ab_['predict'] = x[0]
boi_ab_["predict_id"] = mapping[i]
boi_repeat_ = copy.deepcopy(boi_repeat)
boi_repeat_['predict'] = x[1]
boi_repeat_["predict_id"] = mapping[i]
boi_compare_ = copy.deepcopy(boi_compare)
boi_compare_['predict'] = x[2]
boi_compare_["predict_id"] = mapping[i]
output_list_dic.extend([boi_ab_, boi_repeat_, boi_compare_])
try:
with open(
os.path.join("src", "evaluation", "assets", "input_alpaca.json"), "r"
Expand Down Expand Up @@ -614,20 +628,12 @@ def generate_alpaca(source: str, dir: str, rearrange = True, logprobs = False):
.strip()
)
cut += 1
mapping_action = [('[A]', rearranged_actions[0]), ('[B]', rearranged_actions[1]), ('[C]', rearranged_actions[2]), ('[D]', rearranged_actions[3])]
if logprobs:
boi_ab_f["predict_id"] = -1
boi_rp_f["predict_id"] = -1
boi_ab_f["predict"] = [mapping_action[i-1][0] for i in mapping]
boi_rp_f["predict"] = [mapping_action[i-1][1] for i in mapping]
output_list_dic.extend([boi_ab_f, boi_rp_f])
if logprobs:
predicts = [('[A]', rearranged_actions[0]), ('[B]', rearranged_actions[1]), ('[C]', rearranged_actions[2]), ('[D]', rearranged_actions[3])]
for i, x in enumerate(predicts):
boi_ab_f_ = copy.deepcopy(boi_ab_f)
boi_ab_f_['predict'] = x[0]
boi_ab_f_["predict_id"] = mapping[i]
boi_rp_f_ = copy.deepcopy(boi_rp_f)
boi_rp_f_['predict'] = x[1]
boi_rp_f_["predict_id"] = mapping[i]
output_list_dic.extend([boi_ab_f_, boi_rp_f_])


with open(
os.path.join("src", "evaluation", "assets", "input_alpaca.json"), "r"
Expand Down

0 comments on commit 6108788

Please sign in to comment.