-
Notifications
You must be signed in to change notification settings - Fork 1
/
formatResults.py
62 lines (54 loc) · 1.87 KB
/
formatResults.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
#!/usr/bin/env python
'''
Formatting the results of the dataset tests so they can be compared
'''
def main():
resultsFileName = "ZackCorpusResults.csv"
resultsScoreFileName = resultsFileName[:-4] + "_Scores.csv"
fin = open(resultsFileName, 'r')
data = fin.readlines()
fin.close()
fout = open(resultsScoreFileName, 'w')
fout.write("Filename,SciKit-tfidf,SciKit-lsa,SciKit-lda,GenSim-tfidf,"
"GenSim-lsa,GenSim-lda\n")
j = 1 # Skip name of columns
for i in range(1, ((len(data) / 6) + 1)): # For each set of 6 results
l = data[j].split(',')
matches = {}
for k in range(6): # Save document name with number for common docs
info = data[j + k].split(',')
if info[3] in matches:
matches[info[3]] += 5
else:
matches[info[3]] = 5
if info[5] in matches:
matches[info[5]] += 4
else:
matches[info[5]] = 4
if info[7] in matches:
matches[info[7]] += 3
else:
matches[info[7]] = 3
if info[9] in matches:
matches[info[9]] += 2
else:
matches[info[9]] = 2
if info[11] in matches:
matches[info[11]] += 1
else:
matches[info[11]] = 1
for num in matches: # Divide the matches by 90 (The number of points)
matches[num] /= 30.0
line = l[0]
print line
for k in range(6): # Get the info and write it out to the score .csv
info = data[j + k].split(',')
tool = info[1]
model = info[2]
score = matches[info[3]] * (float(info[4]) / 100)
line += "," + str(score)
fout.write(line + '\n')
j += 6
fout.close()
if __name__ == '__main__':
main()