Owl-A · jsahil730 · Jan 30, 2022 · Jan 30, 2022 · Feb 1, 2022 · Feb 1, 2022
diff --git a/csec_mentor_matching/.gitignore b/csec_mentor_matching/.gitignore
@@ -3,3 +3,4 @@
 *.xls
 *.xlsx
 costs
+*.png
diff --git a/csec_mentor_matching/analysis.py b/csec_mentor_matching/analysis.py
@@ -4,6 +4,8 @@
 import itertools as it
 import random
 import warnings
+import z3
+import matplotlib.pyplot as plt
 
 #warnings.filterwarnings('error')
 
@@ -149,6 +151,7 @@ def mentee_analysis(mentees) :
 # cost with number of extra subjects that dont match with mentor
 extra_cost = np.exp(np.arange(12)/3.0)
 
+U = int('1'*11, 2)
 
 def count_bits(x) :
     t = 0
@@ -161,7 +164,6 @@ def eval_cost(x) :
     # remember more the cost, less chances of matching
     (tee_int, tee_year) , (tor_int, pro, tor_year) = x
 
-    U = int('1'*11, 2)
     tor_int = int(tor_int)
     tee_int = int(tee_int)
 
@@ -170,7 +172,6 @@ def eval_cost(x) :
         return 0.0
 
     # 0 common very high cost, less common low cost, more chances of matching.
-    # lot of matches considered superflous.
     common    = dot_cost[count_bits(tee_int & tor_int)]
 
     # more the tee extra more the cost, less chances of matching.
@@ -181,14 +182,14 @@ def eval_cost(x) :
     # lower the chances of getting that mentor.
     # More focussed the mentor on particular topics, the
     # lesser the chance of getting the mentor.
-    pro_cost  = np.exp(pro* (0.5/count_bits(tor_int)))
+    pro_cost  = np.exp(10 * (1/pro) * (1/count_bits(tor_int)))
 
     # Chances of matching a mentor in lower year than you
     # should be negligible
-    year_cost = 100 if tee_year > tor_year else 0
+    year_cost = 100 if tee_year > tor_year else (10 if tee_year == tor_year else 0)
 
     # random luck
-    luck      = random.uniform(3.0, 9.0)
+    luck      = random.uniform(2.0, 10.0)
 
     """
     print("=" * 80)
@@ -206,8 +207,7 @@ def eval_cost(x) :
 def costs(multi, mentee_int, mentee_year, mentor_int, proficiency, mentor_year) :
     # the matcher is a minimum cost matching algorithm
     # Make the costs higher to decrease the possibility of matching.
-
-    # add dummy candidates with very high weight
+    # multi is number of mentees
     mentee_int = np.append(mentee_int, np.asarray([0] * (multi - len(mentee_int))))
     mentee_year = np.append(mentee_year, np.asarray([len(MENTEE_YEAR_FULL)+1] * (multi - len(mentee_year))))
     costs = list(it.product(list(zip(mentee_int, mentee_year)), list(zip(mentor_int, proficiency, mentor_year))))
@@ -219,3 +219,66 @@ def detect_duplicates(mentors, mentees) :
     for pt in (set(mentors[0]) & set(mentees[0])) :
         n = mentors[0].index(pt)
         print( mentors[1][n] + " <" + pt + "> Filled both mentor and mentee forms.")
+
+# to encode matching as SAT; every mentee-mentor pair will have a cost and a boolean var to denote whether they are picked or not
+# min cost matching = optimise cost of chosen pairs + choose only one edge per mentee
+# other constraints to be added separately
+
+# vars have mentor as first index and mentee as second
+def mentor_count_constraint(tee_tor_vars, costs, min_cnt):
+    l = []
+    for i in range(len(tee_tor_vars)):
+        l.append(z3.AtLeast(*tee_tor_vars[i], min_cnt))
+    return z3.And(l)
+
+def matching(optimiser, tee_tor_vars, costs):
+    for j in range(len(tee_tor_vars[0])):
+        optimiser.add(z3.simplify(z3.PbEq([(tee_tor_vars[i][j],1) for i in range(len(tee_tor_vars))], 1)))
+
+    v = 0
+    for i in range(len(tee_tor_vars)):
+        for j in range(len(tee_tor_vars[0])):
+            optimiser.add_soft(tee_tor_vars[i][j], costs[i][j])
+#    optimiser.minimize(v)
+
+#    print(optimiser)
+
+    return optimiser.check()
+
+def visualise_interests(mentees):
+    interests = mentees[2]
+    BLK = 20 
+    img = np.zeros((len(MENTEE_INTEREST_FULL)*BLK, BLK*len(interests), 3), dtype=np.float)
+
+    for i, v in enumerate(interests):
+        mask = v
+        for j, val in enumerate(bin(mask)[2:].zfill(len(MENTEE_INTEREST_FULL))):
+            img[j*BLK:(j+1)*BLK, i*BLK:(i+1)*BLK, :] = int(val)
+
+#    print(img)
+    plt.imsave('mentee_interests.png', img)
+
+
+def visualize_mapping(mapping, mentors, mentees):
+    BLK = 20
+    img = np.zeros((BLK*7, BLK*len(mentors[0]), 3), dtype=np.uint8)
+    cnts = np.zeros((len(mentors[0])), dtype=np.int)
+    for tor, tee in mapping:
+        tor_int = mentors[2][tor]
+        tee_int = mentees[2][tee]
+        val = [[[0, 255, 0]]] # green is default
+
+        if count_bits(tor_int & tee_int) <= 3:
+        # if count_bits(tee_int & (~(tor_int & tee_int) & U)) > 2:
+            # set difference of tee_int with tor_int more than, say, 2
+            val = [[[255, 0, 0]]] # red
+        # elif count_bits(tor_int & (~(tor_int & tee_int) & U)) > 2:
+            # set difference of tor_int with tee_int
+            #val = [[[0, 0, 255]]] # blue
+
+        img[cnts[tor]*BLK:(cnts[tor]+1)*BLK, tor*BLK:(tor+1)*BLK, :] = val
+        cnts[tor] += 1
+
+#    print(img)
+
+    plt.imsave('mapping.png', img)
diff --git a/csec_mentor_matching/main.py b/csec_mentor_matching/main.py
@@ -13,6 +13,12 @@
 import numpy as np
 from functools import reduce
 import sys
+import z3
+
+z3.set_param('parallel.enable', True)
+z3.set_param('parallel.threads.max', 32)
+z3.set_param('sat.local_search_threads', 4)
+z3.set_param('sat.threads', 4)
 
 MENTEE_SPREADSHEET = '1byO5GRxBVTaDVXNR_JeykVWwA9LrirKhmjjn_8TIUXk'
 MENTOR_SPREADSHEET = '1SMl0cjfrLgU6D2iYtiQwdl1O5kHTdGdyDqWdysdwTWg'
@@ -31,10 +37,13 @@
 mentors = analysis.mentor_analysis((mentors, mentors_year))
 mentees = analysis.mentee_analysis(mentees)
 
+mentees_og = mentees
 print('+' * 80)
 
 analysis.detect_duplicates(mentors, mentees)
 
+analysis.visualise_interests(mentees)
+
 mentee_set1, mentee_set2 = analysis.arrange_mentees(mentees, len(mentors[0]))
 
 #mentors = list(mentors)
@@ -74,22 +83,27 @@
 multi = len(mentees[0]) 
 costs = analysis.costs(multi, mentees[2], mentees[3], mentors[2], mentors[3], mentors[4])
 temp = matcher.match(multi, len(mentors[0]), costs)
-temp = list( zip(temp[:len(mentees[0])], list(range(len(mentees[0])))) )
+temp1 = list( zip(temp[:len(mentees[0])], list(range(len(mentees[0])))) )
 
-assignment = list(map( lambda x : (mentors[0][x[0]], mentors[1][x[0]], mentors[4][x[0]], mentees[0][x[1]], mentees[1][x[1]], mentees[3][x[1]]), temp))
+assignment = list(map( lambda x : (mentors[0][x[0]], mentors[1][x[0]], mentors[4][x[0]], mentees[0][x[1]], mentees[1][x[1]], mentees[3][x[1]]), temp1))
 
 print('+' * 80)
 
 mentees = mentee_set2
 multi = len(mentors[0])
 costs = analysis.costs(multi, mentees[2], mentees[3], mentors[2], mentors[3], mentors[4])
 temp = matcher.match(multi, len(mentors[0]), costs)
-temp = list( zip(temp[:len(mentees[0])], list(range(len(mentees[0])))) )
+temp2 = list( zip(temp[:len(mentees[0])], list(range(len(mentees[0])))) )
 
-assignment += list(map(lambda x : (mentors[0][x[0]], mentors[1][x[0]], mentors[4][x[0]], mentees[0][x[1]], mentees[1][x[1]], mentees[3][x[1]]), temp))
+assignment += list(map(lambda x : (mentors[0][x[0]], mentors[1][x[0]], mentors[4][x[0]], mentees[0][x[1]], mentees[1][x[1]], mentees[3][x[1]]), temp2))
 assignment = pd.DataFrame(assignment)
 
-print('+' * 80, file=sys.stderr)
+mapping = temp1 + temp2
+mapping.sort()
+
+analysis.visualize_mapping(mapping, mentors, mentees_og)
+
+print('+' * 80)
 assignment.columns = ['Mentor Email', 'Mentor Name', 'Mentor Year', 'Mentee Email', 'Mentee Name', 'Mentee Year']
 print("The assertion that Mentor Year >= Mentee Year: " + ("Failed" if np.all(assignment['Mentor Year'] >= assignment['Mentee Year'] ) else "Passed"))
-Original file line number
+Diff line change
@@ Expand Up / @@ -3,3 +3,4 @@ @@
     *.xls
     *.xlsx
     costs
+    *.png