-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy patheval.py
77 lines (59 loc) · 1.9 KB
/
eval.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
import sys
from sklearn.metrics.cluster import adjusted_mutual_info_score, adjusted_rand_score, homogeneity_score, completeness_score
from sklearn.metrics import jaccard_similarity_score
import math
from collections import Counter
import pandas as pd
import numpy as np
def eta(data, unit='natural'):
base = { 'shannon' : 2.,
'natural' : math.exp(1),
'hartley' : 10. }
if len(data) < 1:
return 0, -1
counts = Counter()
for d in data:
counts[d] += 1
probs = [float(c) / len(data) for c in counts.values()]
probs = [p for p in probs if p > 0.]
ent = 0
for p in probs:
if p > 0.:
ent -= p * math.log(p, base[unit])
return ent, counts
if __name__ == '__main__':
predict = sys.argv[2]
groundtruth = sys.argv[1]
print("predict: ", predict)
print("groundtruth: ", groundtruth)
with open(predict) as f:
content = [line.rstrip() for line in f]
with open(groundtruth) as f:
content1 = [line.rstrip() for line in f]
if len(content1)==len(content):
print("Mutual info score: ", adjusted_mutual_info_score(content1, content))
print("Rand Index: ", adjusted_rand_score(content1, content))
print("Homogeneity Score: ", homogeneity_score(content1, content))
print("Completeness Score: ", completeness_score(content1, content))
print("Jaccard Similarity Score: ", jaccard_similarity_score(content1, content))
unique_val = set(content)
print(len(unique_val))
#print(unique_val)
df = pd.DataFrame({ 'groundtruth': content1,
'predict': content })
en_arr = []
for label in unique_val:
#print(label)
#print(df[df['predict']==label])
group = list(df[df['predict']==label]['groundtruth'])
en, ok = eta(group, "shannon")
if ok != -1:
print(en)
en_arr.append(en)
for key, value in ok.items():
print("\t", key, value)
#print(en, "\t", ok)
print(np.average(en_arr))
print(np.median(en_arr))
else:
print("Lengths of two groups are not equal!!!")