-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path2.avg_COH_count.py
208 lines (155 loc) · 7.53 KB
/
2.avg_COH_count.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
# run to produce graph of average per-document count for each coherence relation type
# (compares grover and human)
import csv
import re
import os
import sys
import scipy
import scipy.stats as st
from scipy.stats import pearsonr
import statistics
from statistics import mean
import time
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import copy
from clean_up_SE_coh import simplify_all_SE_types
from clean_up_SE_coh import clean_up_coh_rels
from extract_annotations import fill_in_human_grover, fill_in_containers
### determine path to the annotations
file_path = os.path.abspath(__file__)
path = os.path.dirname(file_path)+"/"
### Extract and print out document-annotator assignments
regex = re.compile('[^a-zA-Z]')
annotators = {"0":[],"2":[],"1":[]}
with open('1.info.csv', 'r') as f:
reader = csv.reader(f)
for idx,row in enumerate(reader):
if idx != 0 and len(row) > 1 and "file name" not in row:
index = re.sub('[^0-9]','',row[0])
for key in annotators:
if key.lower() in [i.strip() for i in row[1].split(",")]:
annotators[key].append(int(index))
print("***")
print("per annotator count")
print("***")
for annotator in annotators:
print(annotator)
print(len(annotators[annotator]))
### EXTRACT THE HUMAN, GROVER, OR DAVINCI SOURCE OF EACH DOCUMENT
# Create lists for keeping track of human and AI generations
h_docs = []
g_docs = []
d_docs = []
fill_in_human_grover(h_docs, g_docs, d_docs)
### Extract Situation Entities, Coherence Relations and Document-level ratings
# from each annotated document
# Create containers
G_SE_container = {"0":{},"2":{},"1":{}}
G_Coh_container = {"0":{},"2":{},"1":{}}
G_Doc_container = {"0":{},"2":{},"1":{}}
H_SE_container = {"0":{},"2":{},"1":{}}
H_Coh_container = {"0":{},"2":{},"1":{}}
H_Doc_container = {"0":{},"2":{},"1":{}}
D_SE_container = {"0":{},"2":{},"1":{}}
D_Coh_container = {"0":{},"2":{},"1":{}}
D_Doc_container = {"0":{},"2":{},"1":{}}
SE_accounted_for = [] # to prevent double-counting of shared documents
Coh_accounted_for = [] # to prevent double-counting of shared documents
doc_counter = 0
doc_counter = fill_in_containers(h_docs, g_docs, d_docs, G_SE_container, G_Coh_container,
G_Doc_container, H_SE_container, H_Coh_container, H_Doc_container, D_SE_container, D_Coh_container,
D_Doc_container, SE_accounted_for, Coh_accounted_for, doc_counter)
Coh_types = ['elab', 'temp', 've', 'ce', 'same', 'contr', 'sim', 'attr', 'examp', 'cond', 'deg', 'gen']
def annotator_tag(Doc_container): # tags every doc_id with its annotator number
tagged_dict = {}
for annotator in Doc_container.keys():
tagged_dict[annotator] = {}
for k,v in Doc_container[annotator].items():
tagged_dict[annotator][str(k) + str(annotator)] = v
return tagged_dict
# applying the tags to the containers
H_Doc_container = annotator_tag(H_Doc_container)
G_Doc_container = annotator_tag(G_Doc_container)
H_Coh_container = annotator_tag(H_Coh_container)
G_Coh_container = annotator_tag(G_Coh_container)
########################################################
### AVERAGE PER-DOCUMENT COUNT OF EACH COH BETWEEN GROVER AND HUMAN DOCS
def Coh_counts(Doc_container, Coh_container, type): # produces a list with counts of a specific COH type
# from each document, called by avg_count_calculator
quality_dict = {}
for annotator in Doc_container.keys():
for doc_id in Doc_container[annotator].keys():
for Coh_id in Coh_container[annotator].keys():
Coh_lists = Coh_container[annotator][doc_id]
if doc_id == Coh_id: # to match IDs across dicts
for list in Coh_lists:
if(list[4] == type):
if doc_id not in quality_dict: # if doc_id not in quality_dict, creates new entry for it
quality_dict[doc_id] = {}
if(type not in quality_dict[doc_id]): # adds counter for each COH type for each doc_id
quality_dict[doc_id][type] = 0
quality_dict[doc_id][type] += 1
if doc_id not in quality_dict: # so COH types that don't appear in doc aren't left out of the dict
quality_dict[doc_id] = {}
quality_dict[doc_id][type] = 0
return quality_dict
def avg_count_calculator(Doc_container, Coh_container): # produces dictionary with list of the average count and standard
# error per document for each COH type, used in the process of creating bar graph below
output_dict = {}
averages = []
standard_errors = []
for Coh in Coh_types:
quality_dict = Coh_counts(Doc_container, Coh_container, Coh)
quality_list = []
for k,v in quality_dict.items(): # creates array of counts so that mean, standard error can be calculated for each COH type
entry = v
for k1,v1 in entry.items():
quality_list.append(v1)
averages.append(round(mean(quality_list), 2)) # add each mean to list
standard_errors.append(round(scipy.stats.sem(quality_list), 2)) # add each standard error to list
output_dict['Average number'] = averages # list of averages from output_dict will be used in creating graph below
output_dict['Standard errors'] = standard_errors # list of standard errors from output_dict will be used in creating graph below
return output_dict
# setup for graph function
human_data = avg_count_calculator(H_Doc_container, H_Coh_container)
grover_data = avg_count_calculator(G_Doc_container, G_Coh_container)
human_means, human_sem = human_data['Average number'], human_data['Standard errors']
grover_means, grover_sem = grover_data['Average number'], grover_data['Standard errors']
ind = np.arange(len(human_means)) # the x locations for the groups
width = 0.25 # the width of the bars
fig, ax = plt.subplots()
rects1 = ax.bar(ind - width/2, human_means, width, yerr=human_sem,
label='Human')
rects2 = ax.bar(ind + width/2, grover_means, width, yerr=grover_sem,
label='Grover')
# Add some text for labels, title and custom x-axis tick labels, etc.
ax.set_ylabel('Average Per-Document Count')
ax.set_title('Average Per-Document Count for Each Coherence Relation Type')
ax.set_xticks(ind)
plt.xticks(rotation=45)
ax.set_xticklabels(['elab', 'temporal', 'violated\nexpectation', 'cause-effect', 'same', 'contrast', 'similarity',
'attribution', 'example', 'condition', 'degenerate', 'generalization'])
ax.legend()
def autolabel(rects, xpos='center'): # creates a graph comparing the average Coh tyoe count between grover
# and human documents
"""
Attach a text label above each bar in *rects*, displaying its height.
*xpos* indicates which side to place the text w.r.t. the center of
the bar. It can be one of the following {'center', 'right', 'left'}.
"""
ha = {'center': 'center', 'right': 'left', 'left': 'right'}
offset = {'center': 0, 'right': 1, 'left': -1}
for rect in rects:
height = rect.get_height()
ax.annotate('{}'.format(height),
xy=(rect.get_x() + rect.get_width() / 2, height),
xytext=(offset[xpos]*3, 3), # use 3 points offset
textcoords="offset points", # in both directions
ha=ha[xpos], va='bottom')
autolabel(rects1, "left")
autolabel(rects2, "right")
fig.tight_layout()
plt.show()
########################################################