Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

z3 optimize for computing min cost matching with other constraints #3

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions csec_mentor_matching/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,4 @@
*.xls
*.xlsx
costs
*.png
77 changes: 70 additions & 7 deletions csec_mentor_matching/analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
import itertools as it
import random
import warnings
import z3
import matplotlib.pyplot as plt

#warnings.filterwarnings('error')

Expand Down Expand Up @@ -149,6 +151,7 @@ def mentee_analysis(mentees) :
# cost with number of extra subjects that dont match with mentor
extra_cost = np.exp(np.arange(12)/3.0)

U = int('1'*11, 2)

def count_bits(x) :
t = 0
Expand All @@ -161,7 +164,6 @@ def eval_cost(x) :
# remember more the cost, less chances of matching
(tee_int, tee_year) , (tor_int, pro, tor_year) = x

U = int('1'*11, 2)
tor_int = int(tor_int)
tee_int = int(tee_int)

Expand All @@ -170,7 +172,6 @@ def eval_cost(x) :
return 0.0

# 0 common very high cost, less common low cost, more chances of matching.
# lot of matches considered superflous.
common = dot_cost[count_bits(tee_int & tor_int)]

# more the tee extra more the cost, less chances of matching.
Expand All @@ -181,14 +182,14 @@ def eval_cost(x) :
# lower the chances of getting that mentor.
# More focussed the mentor on particular topics, the
# lesser the chance of getting the mentor.
pro_cost = np.exp(pro* (0.5/count_bits(tor_int)))
pro_cost = np.exp(10 * (1/pro) * (1/count_bits(tor_int)))

# Chances of matching a mentor in lower year than you
# should be negligible
year_cost = 100 if tee_year > tor_year else 0
year_cost = 100 if tee_year > tor_year else (10 if tee_year == tor_year else 0)

# random luck
luck = random.uniform(3.0, 9.0)
luck = random.uniform(2.0, 10.0)

"""
print("=" * 80)
Expand All @@ -206,8 +207,7 @@ def eval_cost(x) :
def costs(multi, mentee_int, mentee_year, mentor_int, proficiency, mentor_year) :
# the matcher is a minimum cost matching algorithm
# Make the costs higher to decrease the possibility of matching.

# add dummy candidates with very high weight
# multi is number of mentees
mentee_int = np.append(mentee_int, np.asarray([0] * (multi - len(mentee_int))))
mentee_year = np.append(mentee_year, np.asarray([len(MENTEE_YEAR_FULL)+1] * (multi - len(mentee_year))))
costs = list(it.product(list(zip(mentee_int, mentee_year)), list(zip(mentor_int, proficiency, mentor_year))))
Expand All @@ -219,3 +219,66 @@ def detect_duplicates(mentors, mentees) :
for pt in (set(mentors[0]) & set(mentees[0])) :
n = mentors[0].index(pt)
print( mentors[1][n] + " <" + pt + "> Filled both mentor and mentee forms.")

# to encode matching as SAT; every mentee-mentor pair will have a cost and a boolean var to denote whether they are picked or not
# min cost matching = optimise cost of chosen pairs + choose only one edge per mentee
# other constraints to be added separately

# vars have mentor as first index and mentee as second
def mentor_count_constraint(tee_tor_vars, costs, min_cnt):
l = []
for i in range(len(tee_tor_vars)):
l.append(z3.AtLeast(*tee_tor_vars[i], min_cnt))
return z3.And(l)

def matching(optimiser, tee_tor_vars, costs):
for j in range(len(tee_tor_vars[0])):
optimiser.add(z3.simplify(z3.PbEq([(tee_tor_vars[i][j],1) for i in range(len(tee_tor_vars))], 1)))

v = 0
for i in range(len(tee_tor_vars)):
for j in range(len(tee_tor_vars[0])):
optimiser.add_soft(tee_tor_vars[i][j], costs[i][j])
# optimiser.minimize(v)

# print(optimiser)

return optimiser.check()

def visualise_interests(mentees):
interests = mentees[2]
BLK = 20
img = np.zeros((len(MENTEE_INTEREST_FULL)*BLK, BLK*len(interests), 3), dtype=np.float)

for i, v in enumerate(interests):
mask = v
for j, val in enumerate(bin(mask)[2:].zfill(len(MENTEE_INTEREST_FULL))):
img[j*BLK:(j+1)*BLK, i*BLK:(i+1)*BLK, :] = int(val)

# print(img)
plt.imsave('mentee_interests.png', img)


def visualize_mapping(mapping, mentors, mentees):
BLK = 20
img = np.zeros((BLK*7, BLK*len(mentors[0]), 3), dtype=np.uint8)
cnts = np.zeros((len(mentors[0])), dtype=np.int)
for tor, tee in mapping:
tor_int = mentors[2][tor]
tee_int = mentees[2][tee]
val = [[[0, 255, 0]]] # green is default

if count_bits(tor_int & tee_int) <= 3:
# if count_bits(tee_int & (~(tor_int & tee_int) & U)) > 2:
# set difference of tee_int with tor_int more than, say, 2
val = [[[255, 0, 0]]] # red
# elif count_bits(tor_int & (~(tor_int & tee_int) & U)) > 2:
# set difference of tor_int with tee_int
#val = [[[0, 0, 255]]] # blue

img[cnts[tor]*BLK:(cnts[tor]+1)*BLK, tor*BLK:(tor+1)*BLK, :] = val
cnts[tor] += 1

# print(img)

plt.imsave('mapping.png', img)
24 changes: 19 additions & 5 deletions csec_mentor_matching/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,12 @@
import numpy as np
from functools import reduce
import sys
import z3

z3.set_param('parallel.enable', True)
z3.set_param('parallel.threads.max', 32)
z3.set_param('sat.local_search_threads', 4)
z3.set_param('sat.threads', 4)

MENTEE_SPREADSHEET = '1byO5GRxBVTaDVXNR_JeykVWwA9LrirKhmjjn_8TIUXk'
MENTOR_SPREADSHEET = '1SMl0cjfrLgU6D2iYtiQwdl1O5kHTdGdyDqWdysdwTWg'
Expand All @@ -31,10 +37,13 @@
mentors = analysis.mentor_analysis((mentors, mentors_year))
mentees = analysis.mentee_analysis(mentees)

mentees_og = mentees
print('+' * 80)

analysis.detect_duplicates(mentors, mentees)

analysis.visualise_interests(mentees)

mentee_set1, mentee_set2 = analysis.arrange_mentees(mentees, len(mentors[0]))

#mentors = list(mentors)
Expand Down Expand Up @@ -74,22 +83,27 @@
multi = len(mentees[0])
costs = analysis.costs(multi, mentees[2], mentees[3], mentors[2], mentors[3], mentors[4])
temp = matcher.match(multi, len(mentors[0]), costs)
temp = list( zip(temp[:len(mentees[0])], list(range(len(mentees[0])))) )
temp1 = list( zip(temp[:len(mentees[0])], list(range(len(mentees[0])))) )

assignment = list(map( lambda x : (mentors[0][x[0]], mentors[1][x[0]], mentors[4][x[0]], mentees[0][x[1]], mentees[1][x[1]], mentees[3][x[1]]), temp))
assignment = list(map( lambda x : (mentors[0][x[0]], mentors[1][x[0]], mentors[4][x[0]], mentees[0][x[1]], mentees[1][x[1]], mentees[3][x[1]]), temp1))

print('+' * 80)

mentees = mentee_set2
multi = len(mentors[0])
costs = analysis.costs(multi, mentees[2], mentees[3], mentors[2], mentors[3], mentors[4])
temp = matcher.match(multi, len(mentors[0]), costs)
temp = list( zip(temp[:len(mentees[0])], list(range(len(mentees[0])))) )
temp2 = list( zip(temp[:len(mentees[0])], list(range(len(mentees[0])))) )

assignment += list(map(lambda x : (mentors[0][x[0]], mentors[1][x[0]], mentors[4][x[0]], mentees[0][x[1]], mentees[1][x[1]], mentees[3][x[1]]), temp))
assignment += list(map(lambda x : (mentors[0][x[0]], mentors[1][x[0]], mentors[4][x[0]], mentees[0][x[1]], mentees[1][x[1]], mentees[3][x[1]]), temp2))
assignment = pd.DataFrame(assignment)

print('+' * 80, file=sys.stderr)
mapping = temp1 + temp2
mapping.sort()

analysis.visualize_mapping(mapping, mentors, mentees_og)

print('+' * 80)
assignment.columns = ['Mentor Email', 'Mentor Name', 'Mentor Year', 'Mentee Email', 'Mentee Name', 'Mentee Year']
print("The assertion that Mentor Year >= Mentee Year: " + ("Failed" if np.all(assignment['Mentor Year'] >= assignment['Mentee Year'] ) else "Passed"))

Expand Down