forked from kzlecha/RecommenderSystem
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathalgorithm_comparision.py
146 lines (120 loc) · 4.63 KB
/
algorithm_comparision.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
from datetime import datetime
from pandas import DataFrame, Series, read_csv
# HELPER METHODS
def sim(L1, L2, D1, D2):
'''
@param L1: list of user1 liked
@param L2: list of user2 liked
@param D1: list of user1 disliked
@param D2: list of user2 disliked
---
Compare the lists and return the set similarity
Similarity Calculation:
s(u1, u2) = (|L1 intersection L2| + |D1 intersection D2| -
|L1 intersection D2| - |L2 intersection D1|) /
|L1 union L2 union D1 union D2|
'''
num = 0
initial = len((L1.intersection(L2))) + len((D1.intersection(D2))) - len((L1.intersection(D2))) - len((L2.intersection(D1)))
divisor = len((L1.union(L2, D1, D2)))
num = initial/divisor
return num
def compareInv(A,B):
'''
@param A: list of rankings
@param B: list of rankings
---
Compare the lists and return the number of inversions
'''
numInv = 0
for i in range(0, len(A)):
for j in range(0, len(A)):
if A[i] != B[j] and i != j:
numInv = numInv + 1
return numInv
# ALGORITHMS
def recommend_brute_force(likes, dislikes, index):
'''
@param likes: matrix of n users by m items likes
@param dislikes: matrix of n users by m items dislikes
@param index: int index of the given user
---
return the list of recommended items via similarity inversion calculation
'''
# get the similarity series
given_user_likes = likes.loc[index].values.tolist()
given_user_dislikes = dislikes.loc[index].values.tolist()
# create list of similaities
similarity_series = Series(0, index=likes.index)
for i in likes.index:
if(i != index):
sim = 0
simLikes = 0
simDislikes = 0
simLikes = compareInv(given_user_likes, likes.loc[i].values.tolist())
simDislikes = compareInv(given_user_dislikes, dislikes.loc[index].values.tolist())
sim = simLikes + simDislikes
similarity_series.loc[i] = sim
# sort the list
similarity_series = similarity_series.sort_values(ascending=True)
# everything the user has reviewed is in their likes and dislikes
already_reviewed = given_user_likes
already_reviewed.extend(given_user_dislikes)
list_items = []
for user_id in similarity_series.index:
# get the items the user has reviewed and liked
user_items = likes.loc[user_id].values.tolist()
for item in user_items:
if item not in list_items and item not in already_reviewed:
list_items.append(item)
return list_items
def recommend_set_operations(likes, dislikes, index):
'''
@param likes: matrix of n users by m items likes
@param dislikes: matrix of n users by m items dislikes
@param index: int index of the given user
---
return the list of recommended items via similarity inversion calculation
'''
# get the similarity series
given_user_likes = set(likes.loc[index])
given_user_dislikes = set(dislikes.loc[index])
# create list of similaities
similarity_series = Series(0, index=likes.index)
for i in likes.index:
if(i != index):
user_likes = set(likes.loc[i])
user_dislikes = set(dislikes.loc[i])
num = sim(given_user_likes, user_likes, given_user_dislikes, user_dislikes)
similarity_series.loc[i] = num
# sort the list
similarity_series = similarity_series.sort_values(ascending=False)
# everything the user has reviewed is in their likes and dislikes
already_reviewed = given_user_likes
already_reviewed.update(given_user_dislikes)
list_items = []
for user_id in similarity_series.index:
# get the items the user has reviewed and liked
user_items = set(likes.loc[user_id])
for item in user_items:
if item not in list_items and item not in already_reviewed:
list_items.append(item)
return list_items
# read in data
movies = read_csv('data/movies.csv')
df_likes = read_csv('data/users_likes.csv', index_col="User ID")
df_dislikes = read_csv('data/users_dislikes.csv', index_col = "User ID")
# select user
user = 6925
start = datetime.now()
recommend_brute_force(df_likes, df_dislikes, user)
stop = datetime.now()
brute_force_time = stop - start
print("Time to calculate with inversions(brute force):", brute_force_time)
start = datetime.now()
recommend_set_operations(df_likes, df_dislikes, user)
stop = datetime.now()
set_op_time = stop - start
print("Time to calculate with set operations:", set_op_time)
difference = brute_force_time - set_op_time
print("Difference in runtime:", difference)