-
Notifications
You must be signed in to change notification settings - Fork 0
/
statsCalculator.py
74 lines (59 loc) · 2.66 KB
/
statsCalculator.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
import pandas as pd
import csv
from sklearn.metrics import f1_score
from sklearn import metrics
def makesheetOOD(filename, outputname):
df = pd.read_csv(filename)
vectors = df['predictionVector'].tolist()
pred_rating = df['predictionVector'].tolist()
for i in range(len(vectors)):
astring = '"'
astring += str(vectors[i])
astring += '"'
vectors[i] = astring
nums = vectors[i].split(' ')
num1 = nums[0]
num2 = nums[1]
num1 = num1.replace('[', '')
num2 = num2.replace(']', '')
num1 = num1.replace('"', '')
num2 = num2.replace('"', '')
if(float(num1) > 0.5):
pred_rating[i] = '0'
else:
pred_rating[i] = '1'
df2 = pd.read_csv('ood_categories.csv')
categories = df2['category'].tolist()
df = pd.read_csv('oodVectors-200-new.csv')
overallrating = df['rating'].tolist()
dftotal = pd.DataFrame(data={"predictionVector": vectors , "category": categories, "trueRating": overallrating, "predictedRating": pred_rating})
dftotal.to_csv(outputname, quoting = csv.QUOTE_NONE, escapechar = ' ', index=False)
def statCalcOOD(filename):
df = pd.read_csv(filename)
categories = df['category'].tolist()
unique_categories = ['Video_Games', 'Luxury_Beauty', 'Patio_Lawn_and_Garden', 'Prime_Pantry', 'Musical_Instruments', 'Digital_Music', 'Software', 'Automotive', 'Industrial_and_Scientific', 'Cell_Phones_and_Accessories', 'All_Beauty', 'Grocery_and_Gourmet_Food', 'Electronics', 'Gift_Cards', 'Office_Products', 'CDs_and_Vinyl', 'Tools_and_Home_Improvement', 'Home_and_Kitchen', 'Sports_and_Outdoors', 'Arts_Crafts_and_Sewing', 'Clothing_Shoes_and_Jewelry', 'Magazine_Subscriptions', 'Toys_and_Games', 'Pet_Supplies']
f1_scorevals = {}
roc_vals = {}
df = df.reset_index() # make sure indexes pair with number of rows
for i in unique_categories:
predictedRating = []
trueRating = []
for index, row in df.iterrows():
if(row['category'] == i):
predictedRating.append(row['predictedRating'])
trueRating.append(row['trueRating'])
f1_scorevals[i] = f1_score(trueRating, predictedRating)
try:
roc_vals[i] = metrics.roc_auc_score(trueRating, predictedRating)
except ValueError:
pass
print(f1_scorevals)
print(roc_vals)
def statCalcInDomain(filename):
df = pd.read_csv(filename)
true = df['trueRating'].tolist()
predict = df['predictedRating'].tolist()
f1_scorevals = f1_score(true, predict)
print(f1_scorevals)
rocVal = metrics.roc_auc_score(true, predict)
print("rocval: ", rocVal)