-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathOverlap_analysis.py
70 lines (48 loc) · 2.36 KB
/
Overlap_analysis.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
import numpy as np
import collections
from sklearn.metrics import cohen_kappa_score
def calculate_overlap_labels(list1, list2):
return sum([1 if list1[ind]==list2[ind] else 0 for ind in range(len(list1))]) / float(max( len(list1), len(list2) ) )
def calculate_overlap(list1, list2):
# print ("The list values are: ",list2, list1)
return len(set(list2).intersection(set(list1))) / float(max( len(set(list1)), len(set(list2)) ) )
def calculate_alignment_overlap(list1, list2):
overlap_list = [1 if list1[ind1] == list2[ind1] else 0 for ind1, v1 in enumerate(list1)]
return sum(overlap_list)/float(len(list1))
def calculate_kappa(list1, list2):
return cohen_kappa_score(list1, list2)
def calculate_all_overlap(list1, list2, list3):
return ( len( set(list3).intersection( set(list2).intersection(set(list1)) ) ) )
def get_union(list1, list2):
return list(set(list2).union(set(list1)))
def calculate_alignment_union(list1, list2):
overlap_list = [1 if list1[ind1] + list2[ind1] >= 1 else 0 for ind1, v1 in enumerate(list1)]
return overlap_list
def calculate_alignment_union_soft(list1, list2):
overlap_list = [list1[ind1] + list2[ind1] for ind1, v1 in enumerate(list1)]
return overlap_list
def get_intersection(list1, list2):
return list(set(list2).intersection(set(list1)))
def get_intersection_withIDF(list1, list2, IDF_vals):
covered_terms = list(set(list2).intersection(set(list1)))
# covered_terms = [t1 for t1 in list2 if t1 in list1] ## this was to check whether TF in coverage makes any difference or not. IT DOES NOT.
covered_terms_sum = 0
for ct1 in covered_terms:
if ct1 in IDF_vals:
covered_terms_sum += IDF_vals[ct1]
else:
covered_terms_sum += 3
print ("this IDF value not found case should not come. ")
return covered_terms_sum
def get_union_dummy(list1, list2):
return [1 if list1[ind]+list2[ind] >= 1 else 0 for ind in range(len(list1))]
def get_intersection_dummy(list1, list2):
return [1 if list2[ind]==1 and list1[ind] == list2[ind] else 0 for ind in range(len(list1))]
def get_normalized_scores(dict1):
All_scores = []
for key1 in dict1:
All_scores.append(dict1[key1])
normed_score = [float(s1)/sum(All_scores) for s1 in All_scores]
for key1 in dict1:
dict1[key1] = normed_score[key1]
return dict1