Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Arbitrary precision Mallows Model under Hamming distance + solved numpy float type deprecation bug #3

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
**/__pycache__/**
33 changes: 22 additions & 11 deletions mallows_hamming.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import itertools as it
from scipy.optimize import linear_sum_assignment
import mallows_model as mm

from mpmath import mp


#************* Distance **************#
Expand Down Expand Up @@ -33,7 +33,7 @@ def dist_at_uniform(n): return n

#************ Sampling ************#

def sample(m, n, *, theta=None, phi=None, s0=None):
def sample(m, n, *, theta=None, phi=None, s0=None, precision: int = 50):
"""This function generates m permutations (rankings) according to Mallows Models.
Parameters
----------
Expand All @@ -47,6 +47,9 @@ def sample(m, n, *, theta=None, phi=None, s0=None):
Dispersion parameter phi
s0: ndarray
Consensus ranking
precision: int
Integer precision used in the calculation to obtain the
probability of each
Returns
-------
ndarray
Expand All @@ -55,21 +58,29 @@ def sample(m, n, *, theta=None, phi=None, s0=None):
sample = np.zeros((m, n))
theta, phi = mm.check_theta_phi(theta, phi)

facts_ = np.array([1, 1]+[0]*(n-1), dtype=np.float)
deran_num_ = np.array([1, 0]+[0]*(n-1), dtype=np.float)
# Set the precision
mp.dps = precision

# Calculate probability distribution over distances
facts_ = mp.zeros(n+1, 1)
deran_num_ = mp.zeros(n+1, 1)
facts_[0] = mp.mpf('1'); facts_[1] = mp.mpf('1')
deran_num_[0] = mp.mpf('1'); deran_num_[1] = mp.mpf('0')
for i in range(2, n+1):
facts_[i] = facts_[i-1] * i
deran_num_[i] = deran_num_[i-1]*(i-1) + deran_num_[i-2]*(i-1);
hamm_count_ = np.array([ deran_num_[d]*facts_[n] / (facts_[d] * facts_[n - d]) for d in range(n+1)], dtype=np.float)
probsd = np.array([hamm_count_[d] * np.exp(-theta * d) for d in range(n+1)], dtype=np.float)
deran_num_[i] = deran_num_[i-1]*(i-1) + deran_num_[i-2]*(i-1)
hamm_count_ = [deran_num_[d]*facts_[n] / (facts_[d] * facts_[n - d]) for d in range(n+1)]
probsd = [hamm_count_[d] * mp.exp(-theta * d) for d in range(n+1)]
probsd = [p / mp.fsum(probsd) for p in probsd]
distance_probabilities = np.array(probsd, dtype=float)

# Draw sample
for m_ in range(m):
target_distance = np.random.choice(n+1,p=probsd/probsd.sum())
target_distance = np.random.choice(n+1,p=distance_probabilities)
sample[m_,:] = sample_at_dist(n, target_distance, s0)

return sample


def sample_at_dist(n, dist, sigma0=None):
"""This function randomly generates a permutation with length n at distance
dist to a given permutation sigma0.
Expand Down Expand Up @@ -117,7 +128,7 @@ def expected_dist_mm(n, theta=None, phi=None):
"""
theta, phi = mm.check_theta_phi(theta, phi)

facts_ = np.array([1,1] + [0]*(n-1), dtype=np.float)
facts_ = np.array([1,1] + [0]*(n-1), dtype=np.float64)
for i in range(2, n+1):
facts_[i] = facts_[i-1] * i
x_n_1 , x_n= 0, 0
Expand Down Expand Up @@ -177,7 +188,7 @@ def prob(sigma, sigma0, theta=None, phi=None):
theta, phi = mm.check_theta_phi(theta, phi)
d = distance(sigma, sigma0)
n = len(sigma)
facts_ = np.array([1, 1] + [0]*(n-1), dtype=np.float)
facts_ = np.array([1, 1] + [0]*(n-1), dtype=np.float64)

for i in range(2, n+1):
facts_[i] = facts_[i-1] * i
Expand Down
2 changes: 1 addition & 1 deletion mallows_kendall.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,7 @@ def ranking_to_v(sigma, k=None):
n = len(sigma)
if k is not None:
sigma = sigma[:k]
sigma = np.concatenate((sigma, np.array([np.float(i) for i in range(n) if i not in sigma])))
sigma = np.concatenate((sigma, np.array([np.float64(i) for i in range(n) if i not in sigma])))
V = []
for j, sigma_j in enumerate(sigma):
V_j = 0
Expand Down
4 changes: 4 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
numpy
scipy
pandas
mpmath