-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfewshot.py
132 lines (115 loc) · 5.09 KB
/
fewshot.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
import openai
import pandas as pd
from sklearn.metrics import f1_score, roc_auc_score, accuracy_score
CATEGORY_DEFINITIONS = {
'aspersion': 'Aspersion is the use of disrespectful attacks or damaging statements targeted towards ideas, plans, or policies.',
'personal_attack': 'Personal attack is specific damaging or derogatory remarks towards participants in the conversation.',
'third_party_attack': 'Third party attack is specific damaging or derogatory remarks towards others such as group of people, branch of the government, or political party.',
'stereotype': 'Stereotype is the use of neutral or negative generalizations or labels, or impose discrimination upon certain groups.',
'vulgarity': 'Vulgarity is the user of vulgar language or abbreviations of such with toxic intentions towards the discussion or fellow discussants.',
'civility': 'Incivility are behaviors that disrespect towards individuals, groups, political communities, or topics of discussion.'
}
def compute_metrics(y_pred, y_true):
f1 = f1_score(y_true, y_pred, average='weighted')
try:
roc = roc_auc_score(y_true, y_pred)
except ValueError:
roc = 0
acc = accuracy_score(y_true, y_pred)
return f1, roc, acc
def construct_category_header(category, k, examples_df):
'''
construct the header for a certain category to be used in prompts
Parameters
----------
category : str
k : number of examples for each class
examples_df : pandas dataframe (comment, 1/0 label)
Returns
-------
header : str
'''
defn = CATEGORY_DEFINITIONS[category]
header = f"The following is a list of comments and True/False labels of whether they contain {category}.\n\n"
header = ' '.join((defn, header))
# randomize example orders
examples_df = examples_df.sample(frac=1)
# avoid newlines inside comments
comments = examples_df['comment'].apply(lambda x: x.replace('\n', '')).tolist()
labels_str = examples_df[category].apply(lambda x: 'False' if x == 0 else 'True').tolist()
examples_lst = list(zip(comments, labels_str))
examples = [f"Comment: {comment}\nHas_{category}:{label}\n" for (comment, label) in examples_lst]
examples_str = '###\n'.join(examples)
header = ''.join((header, examples_str))
return header
def construct_prompt(header, query):
'''
construct prompt for gpt-3
Parameters
----------
header : str, header for a fixed category
query : str, test example
Returns
-------
prompt : str
'''
query = query.replace('\n', '') # avoid newlines in comments
query_str = f'Comment: {query}\nHas_{category}:'
query_str = f'###\n{query_str}'
prompt = ''.join((header, query_str))
return prompt
openai.api_key = ''
# Define the model to be used
COMPLETIONS_MODEL = "text-davinci-003"
# Define parameters
COMPLETIONS_API_PARAMS = {
# We use temperature of 0.0 because it gives the most predictable, factual answer.
"temperature": 0.0,
"max_tokens": 5,
"model": COMPLETIONS_MODEL,
}
# read in data
file_path = './data/labeled/final_annotated_data_incivility_3030_processed.pickle'
annotated_df = pd.read_pickle(file_path)
# extract examples without any incivility, from which we'll sample neg examples
def no_incivility(row):
return row['aspersion']==0 and row['namecalling']==0 \
and row['stereotype']==0 and row['vulgarity']==0 \
and row['other'] == 0 and row['human_incivility']==0
pure_negatives = annotated_df[annotated_df.apply(lambda r: no_incivility(r), axis=1)]
k = 5 # num of examples for each class (pos/neg)
seed=42
batchsize = 10
predictions = dict()
performances = dict()
for category in CATEGORY_DEFINITIONS.keys():
# sample training examples
neg_examples = pure_negatives.sample(n=k, random_state=seed)
pos_df = annotated_df[annotated_df[category]!=0]
pos_examples = pos_df.sample(n=k, random_state=seed)
examples_df = pd.concat([pos_examples, neg_examples])
training_ids = examples_df['id'].tolist()
test_df = annotated_df[annotated_df['id'].apply(lambda x: x not in training_ids)]
# generate header
cat_header = construct_category_header(category, k, examples_df)
test_comments = test_df['comment'].tolist()
# make predictions with gpt3
y_pred_cat_raw = []
for i in range(0, len(test_comments), batchsize):
query_lst = test_comments[i:i+batchsize]
prompt_lst = [construct_prompt(cat_header, q) for q in query_lst]
response = openai.Completion.create(
prompt=prompt_lst,
**COMPLETIONS_API_PARAMS
)
response_lst = response['choices']
y_pred = [obj['text'] for obj in response_lst]
y_pred_cat_raw.extend(y_pred)
# save predictions
predictions[category] = y_pred_cat_raw
# convert generated string to int
y_pred_cat_int = [1 if x=='True' else 0 for x in y_pred_cat_raw]
y_true_cat = test_df[category].tolist()
# compute performance metrics
f1, auc, acc = compute_metrics(y_pred_cat_int, y_true_cat)
performances[category] = {'f1': f1, 'auc': auc, 'acc': acc}