-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathevaluate_all_tasks.py
130 lines (105 loc) · 3.54 KB
/
evaluate_all_tasks.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
from model_eval import ModelEval
import argparse
import torch
import os
import pandas as pd
from transformers import TrainingArguments, HfArgumentParser
from dataclasses import dataclass, field
@dataclass
class CustomArgumentsGeneral:
hf_token: str = field(
metadata={"help": "Specify your HuggingFace token to access a closed modek."}
)
model: str = field(
metadata={"help": "Specify the model name from HuggingFace"}
)
tokenizer: str = field(
metadata={"help": "Specify the tokenizer name from HuggingFace"}
)
data: str = field(
metadata={"help": "Specify the data path that contains folders of all datasets in the same structure as 'scibert/data' fomr the SciBERT repository."}
)
max_length: int = field(
metadata={"help": "Specify the maximum sequence length of the model to use when tokenizing."}
)
def main():
parser = HfArgumentParser((TrainingArguments, CustomArgumentsGeneral))
training_args, custom_args = parser.parse_args_into_dataclasses()
hf_token = custom_args.hf_token
model_name = custom_args.model
tokenizer_name = custom_args.tokenizer
data_path = custom_args.data
max_length = custom_args.max_length
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
tasks_datasets_dict = {
"ner": [
data_path + "/ner/bc5cdr",
data_path + "/ner/JNLPBA",
data_path + "/ner/NCBI-disease",
data_path + "/ner/sciie"
],
"pico": [
data_path + "/pico/ebmnlp"
],
"rel": [
data_path + "/text_classification/chemprot",
data_path + "/text_classification/sciie-relation-extraction"
],
"cls": [
data_path + "/text_classification/citation_intent",
data_path + "/text_classification/mag",
data_path + "/text_classification/sci-cite"
],
"dep": [
data_path + "/parsing/genia"
]
}
tasks = []
datasets = []
metrics1 = []
scores1 = []
metrics2 = []
scores2 = []
for key, value in tasks_datasets_dict.items():
print("Evaluating " + key + "...")
for data_path_value in value:
print("Using " + data_path_value + "...")
model_eval = ModelEval(
task=key,
model_name=model_name,
tokenizer_name=tokenizer_name,
data_path=data_path_value,
device=device,
hf_args = training_args,
hf_token = hf_token,
max_length = max_length
)
score1, score2 = model_eval.evaluate_model()
directories = data_path_value.split(os.path.sep)
dataset_name = directories[-1]
if key == 'dep':
metric1 = 'uas'
metric2 = 'las'
else:
metric1 = 'micro F1'
metric2 = 'macro F1'
tasks.append(key)
datasets.append(dataset_name)
metrics1.append(metric1)
scores1.append(score1)
metrics2.append(metric2)
scores2.append(score2)
df_dict = {
'task' : tasks,
'dataset': datasets,
'metric 1': metrics1,
'score 1': scores1,
'metric 2': metrics2,
'score 2': scores2
}
results_df = pd.DataFrame(df_dict)
current_dir = os.getcwd()
model = model_name.split('/')[-1]
results_df.to_csv(current_dir + "/" + model + ".csv")
if __name__ == "__main__":
main()