-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathplots_lemmas.py
60 lines (47 loc) · 2.25 KB
/
plots_lemmas.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
#! /usr/bin/env python3
import matplotlib.pyplot as plt
from collections import Counter
def extract_and_plot(plm, all_or_first):
with open(f'/home/federica/vallex-pokus/predicting_frames/sallust-bert-GH/{plm.lower()}_constrained_candidate_senses.tsv', 'r', encoding='utf8') as f:
data = []
next(f)
for line in f:
line = line.split('\t')[6]
# first option: all retrieved candidates
if all_or_first == 'all':
verbs = [v.strip('\n') for v in line.split(';')]
data.extend(verbs)
how_many = 40
extra = 8
# second option: only first retrieved candidate
elif all_or_first == 'first':
verbs = [v.strip('\n') for v in line.split(';')][0]
data.append(verbs)
how_many = 20
extra = 0.25
counted_data = Counter(data)
counted_data = counted_data.most_common(how_many)
counted_data = {pair[0]: pair[1] for pair in counted_data}
if all_or_first == 'first':
counted_data = {k: v for k, v in counted_data.items() if v > 4}
# PLOT
# figure size and adjust layout
if all_or_first == 'all':
plt.figure(figsize=(20, 12))
plt.subplots_adjust(bottom=0.25)
elif all_or_first == 'first':
plt.figure(figsize=(12, 15)) # Adjust the height to provide more space for title and values
plt.subplots_adjust(left=0.3, right=0.9) # Adjust left and right margins
plt.barh(list(counted_data.keys()), list(counted_data.values())) # plt.barh() for horizontal bars
plt.yticks(rotation='horizontal', fontsize=20) # Rotate y-axis labels and increase font size
# adding text annotations
for key, value in counted_data.items():
plt.text(value + extra, key, str(value), ha='left', va='center', rotation='horizontal', fontsize=15)
plt.title(p)
plt.tight_layout()
plt.savefig(f'./plots/{plm.lower()}_{all_or_first}_lemmas.png')
if __name__ == "__main__":
plms = ['mBERT', 'Latin-BERT', 'PhilBERTa', 'PhilTa']
for p in plms:
extract_and_plot(p, all_or_first='all')
extract_and_plot(p, all_or_first='first')