Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimizing operation using redis internal functions. #108

Open
wants to merge 7 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
183 changes: 130 additions & 53 deletions lib/recommendable/helpers/calculations.rb
Original file line number Diff line number Diff line change
Expand Up @@ -12,42 +12,77 @@ class << self
# @return [Float] the numeric similarity between this user and the passed user
# @note Similarity values are asymmetrical. `Calculations.similarity_between(user_id, other_user_id)` will not necessarily equal `Calculations.similarity_between(other_user_id, user_id)`
def similarity_between(user_id, other_user_id)
user_id = user_id.to_s
other_user_id = other_user_id.to_s
similarity_set = Recommendable::Helpers::RedisKeyMapper.similarity_set_for(user_id)
Recommendable.redis.eval(similarity_between_lua,
[ user_id, other_user_id, similarity_set,
Recommendable.config.redis_namespace,
Recommendable.config.user_class.to_s.tableize ],
Recommendable.config.ratable_classes.map { |klass| klass.to_s.tableize }).to_f
end

similarity = liked_count = disliked_count = 0
in_common = Recommendable.config.ratable_classes.each do |klass|
liked_set = Recommendable::Helpers::RedisKeyMapper.liked_set_for(klass, user_id)
other_liked_set = Recommendable::Helpers::RedisKeyMapper.liked_set_for(klass, other_user_id)
disliked_set = Recommendable::Helpers::RedisKeyMapper.disliked_set_for(klass, user_id)
other_disliked_set = Recommendable::Helpers::RedisKeyMapper.disliked_set_for(klass, other_user_id)
def similarity_between_lua_func
<<-LUA.strip_heredoc
local function similarity_between(klasses, user_id, other_user_id, similarity_set, redis_namespace, user_namespace)
local similarity = 0
local liked_count = 0
local disliked_count = 0

results = Recommendable.redis.pipelined do
# Agreements
Recommendable.redis.sinter(liked_set, other_liked_set)
Recommendable.redis.sinter(disliked_set, other_disliked_set)
for i=1, #klasses do
local klass = klasses[i]

# Disagreements
Recommendable.redis.sinter(liked_set, other_disliked_set)
Recommendable.redis.sinter(disliked_set, other_liked_set)
local liked_set = table.concat({redis_namespace, user_namespace, user_id, 'liked_'..klass}, ':')
local other_liked_set = table.concat({redis_namespace, user_namespace, other_user_id, 'liked_'..klass}, ':')
local disliked_set = table.concat({redis_namespace, user_namespace, user_id, 'disliked_'..klass}, ':')
local other_disliked_set = table.concat({redis_namespace, user_namespace, other_user_id, 'disliked_'..klass}, ':')

Recommendable.redis.scard(liked_set)
Recommendable.redis.scard(disliked_set)
end
local agreements_set = table.concat({redis_namespace, klass, user_id, other_user_id, 'agreements'}, ':')
local disagreements_set = table.concat({redis_namespace, klass, user_id, other_user_id, 'disagreements'}, ':')

local similarity0 = redis.call('SINTERSTORE', agreements_set, liked_set, other_liked_set)
local similarity1 = redis.call('SINTERSTORE', agreements_set, disliked_set, other_disliked_set)
local similarity2 = redis.call('SINTERSTORE', disagreements_set, liked_set, other_disliked_set)
local similarity3 = redis.call('SINTERSTORE', disagreements_set, disliked_set, other_liked_set)

# Agreements
similarity += results[0].size
similarity += results[1].size
similarity = similarity + similarity0 + similarity1 - similarity2 - similarity3

# Disagreements
similarity -= results[2].size
similarity -= results[3].size
redis.call('DEL', agreements_set)
redis.call('DEL', disagreements_set)

liked_count += results[4]
disliked_count += results[5]
liked_count = liked_count + redis.call('SCARD', liked_set)
disliked_count = disliked_count + redis.call('SCARD', disliked_set)
end

return ((liked_count + disliked_count) > 0) and similarity / (liked_count + disliked_count) or 0.0
end
LUA
end

similarity / (liked_count + disliked_count).to_f
def similarity_between_lua
<<-LUA.strip_heredoc
#{similarity_between_lua_func}

return tostring(similarity_between(ARGV, unpack(KEYS)))
LUA
end

def similarity_between_multi_zadd_lua
<<-LUA.strip_heredoc
#{similarity_between_lua_func}

local user_id = KEYS[1]
local other_user_ids = redis.call('SMEMBERS', KEYS[2])
local similarity_set = KEYS[3]

for i=1, #other_user_ids do
if user_id ~= other_user_ids[i] then
local other_user_id = other_user_ids[i]
redis.call('ZADD',
similarity_set,
similarity_between(ARGV, user_id, other_user_id, similarity_set, KEYS[4], KEYS[5]),
other_user_id)
end
end
LUA
end

# Used internally to update the similarity values between this user and all
Expand All @@ -58,34 +93,39 @@ def update_similarities_for(user_id)

# Only calculate similarities for users who have rated the items that
# this user has rated
relevant_user_ids = Recommendable.config.ratable_classes.inject([]) do |memo, klass|
temp_set = Recommendable::Helpers::RedisKeyMapper.temp_set_for(Recommendable.config.user_class, user_id)
Recommendable.config.ratable_classes.each do |klass|
liked_set = Recommendable::Helpers::RedisKeyMapper.liked_set_for(klass, user_id)
disliked_set = Recommendable::Helpers::RedisKeyMapper.disliked_set_for(klass, user_id)
temp_klass_set = Recommendable::Helpers::RedisKeyMapper.temp_set_for(klass, user_id)
item_count = Recommendable.redis.sunionstore(temp_klass_set, liked_set, disliked_set)

item_ids = Recommendable.redis.sunion(liked_set, disliked_set)

unless item_ids.empty?
sets = item_ids.map do |id|
liked_by_set = Recommendable::Helpers::RedisKeyMapper.liked_by_set_for(klass, id)
disliked_by_set = Recommendable::Helpers::RedisKeyMapper.disliked_by_set_for(klass, id)

[liked_by_set, disliked_by_set]
end

memo | Recommendable.redis.sunion(*sets.flatten)
else
memo
if item_count > 0
Recommendable.redis.eval(sunion_sets_lua,
[temp_set],
[temp_klass_set, Recommendable.config.redis_namespace, klass.to_s.tableize])
end
end

similarity_values = relevant_user_ids.map { |id| similarity_between(user_id, id) }
Recommendable.redis.pipelined do
relevant_user_ids.zip(similarity_values).each do |id, similarity_value|
next if id == user_id # Skip comparing with self.
Recommendable.redis.zadd(similarity_set, similarity_value, id)
Recommendable.config.ratable_classes.each do |klass|
Recommendable.redis.del Recommendable::Helpers::RedisKeyMapper.temp_set_for(klass, user_id)
end
end

temp_sub_set = Recommendable::Helpers::RedisKeyMapper.temp_sub_set_for(Recommendable.config.user_class, user_id)
similarity_set = Recommendable::Helpers::RedisKeyMapper.similarity_set_for(user_id)
klasses = Recommendable.config.ratable_classes.map { |klass| klass.to_s.tableize }
scan_slice(temp_set, temp_sub_set, count: 300) do
Recommendable.redis.eval(similarity_between_multi_zadd_lua,
[ user_id, temp_sub_set, similarity_set,
Recommendable.config.redis_namespace,
Recommendable.config.user_class.to_s.tableize ],
klasses)
end

Recommendable.redis.del temp_set

if knn = Recommendable.config.nearest_neighbors
length = Recommendable.redis.zcard(similarity_set)
kfn = Recommendable.config.furthest_neighbors || 0
Expand All @@ -96,6 +136,33 @@ def update_similarities_for(user_id)
true
end

def scan_slice(set, sub_set, options={})
cursor = 0
loop do
cursor, keys = Recommendable.redis.sscan(set, cursor, options)
unless keys.empty?
Recommendable.redis.sadd(sub_set, keys)
yield
Recommendable.redis.del sub_set
end
break if cursor == '0'
end
end

def sunion_sets_lua
<<-LUA.strip_heredoc
local item_ids = redis.call('SMEMBERS', ARGV[1])

local sets = {}
for i=1, #item_ids do
table.insert(sets, table.concat({ARGV[2], ARGV[3], item_ids[i], 'liked_by'}, ':'))
table.insert(sets, table.concat({ARGV[2], ARGV[3], item_ids[i], 'disliked_by'}, ':'))
end

redis.call('SUNIONSTORE', KEYS[1], KEYS[1], unpack(sets))
LUA
end

# Used internally to update this user's prediction values across all
# recommendable types. This is called by the background worker.
#
Expand All @@ -111,7 +178,7 @@ def update_recommendations_for(user_id)
Recommendable::Helpers::RedisKeyMapper.hidden_set_for(klass, user_id),
Recommendable::Helpers::RedisKeyMapper.bookmarked_set_for(klass, user_id)
]
temp_set = Recommendable::Helpers::RedisKeyMapper.temp_set_for(Recommendable.config.user_class, user_id)
temp_set = Recommendable::Helpers::RedisKeyMapper.temp_set_for(klass, user_id)
similarity_set = Recommendable::Helpers::RedisKeyMapper.similarity_set_for(user_id)
recommended_set = Recommendable::Helpers::RedisKeyMapper.recommended_set_for(klass, user_id)
most_similar_user_ids, least_similar_user_ids = Recommendable.redis.pipelined do
Expand Down Expand Up @@ -181,15 +248,25 @@ def predict_for(user_id, klass, item_id)
prediction.finite? ? prediction : 0.0
end

def similarity_total_for(user_id, set)
similarity_set = Recommendable::Helpers::RedisKeyMapper.similarity_set_for(user_id)
ids = Recommendable.redis.smembers(set)
similarity_values = Recommendable.redis.pipelined do
ids.each do |id|
Recommendable.redis.zscore(similarity_set, id)
def sum_of_scores_lua
<<-LUA.strip_heredoc
local sum=0
local z=redis.call('ZRANGE', KEYS[2], 0, -1, 'WITHSCORES')

for i=1, #z, 2 do
if redis.call('SISMEMBER', KEYS[1], z[i]) == 1 then
sum=sum+z[i+1]
end
end
similarity_values.map(&:to_f).reduce(&:+).to_f

return tostring(sum)
LUA
end

def similarity_total_for(user_id, set)
similarity_set = Recommendable::Helpers::RedisKeyMapper.similarity_set_for(user_id)

Recommendable.redis.eval(sum_of_scores_lua, keys: [set, similarity_set]).to_f
end

def update_score_for(klass, id)
Expand Down
12 changes: 12 additions & 0 deletions lib/recommendable/helpers/redis_key_mapper.rb
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,18 @@ def temp_set_for(klass, id)
[redis_namespace, ratable_namespace(klass), id, 'temp'].compact.join(':')
end

def temp_sub_set_for(klass, id)
[redis_namespace, ratable_namespace(klass), id, 'temp_sub'].compact.join(':')
end

def agreements_set_for(klass, id, other_id)
[redis_namespace, ratable_namespace(klass), id, other_id, 'agreements'].compact.join(':')
end

def disagreements_set_for(klass, id, other_id)
[redis_namespace, ratable_namespace(klass), id, other_id, 'disagreements'].compact.join(':')
end

private

def redis_namespace
Expand Down