Basic implementation of RankNet. Added RankNetLoss into NeuralNetwork…

…'s component.
vinhkhuc · Jul 20, 2016 · 7b4ba90 · 7b4ba90
1 parent debf70d
commit 7b4ba90
Show file tree

Hide file tree

Showing 6 changed files with 263 additions and 2 deletions.
diff --git a/tests/supervised/ranking/__init__.py b/tests/supervised/ranking/__init__.py
@@ -0,0 +1 @@
+
diff --git a/tests/supervised/ranking/test_rank_net.py b/tests/supervised/ranking/test_rank_net.py
@@ -0,0 +1,33 @@
+import unittest
+
+from vanilla_ml.supervised.ranking.rank_net import RankNet
+from vanilla_ml.util import data_io
+from vanilla_ml.util.metrics.ranking import ndcg
+
+
+class TestRankNet(unittest.TestCase):
+
+    def test_iris_two_classes(self):
+        train_X, test_X, train_y, test_y = data_io.get_ranking_train_test()
+        print("train_X's shape = %s, train_y's shape = %s" % (train_X.shape, train_y.shape))
+        print("test_X's shape = %s, test_y's shape = %s" % (test_X.shape, test_y.shape))
+
+        layers = [100]
+        rnk = RankNet(layers, batch_size=train_X.shape[0], n_epochs=10, learning_rate=0.1)
+        print("rnk: %s" % rnk)
+
+        print("Fitting ...")
+        rnk.fit(train_X, train_y)
+
+        print("Predicting ...")
+        pred_proba_y = rnk.rank_score(test_X)
+        pred_y = rnk.rank(test_X)
+        print("y = %s" % test_y)
+        print("pred_proba_y = %s" % pred_proba_y)
+        print("pred_y = %s" % pred_y)
+
+        k = 5
+        ndcg_score = ndcg(test_y, pred_proba_y, k)
+        print("NDCG@%d = %g" % (k, ndcg_score))
+
+        self.assertGreaterEqual(ndcg_score, 0.91)
diff --git a/vanilla_ml/base/neural_network/loss.py b/vanilla_ml/base/neural_network/loss.py
@@ -1,3 +1,4 @@
+import itertools
 from abc import ABCMeta, abstractmethod
 
 import numpy as np
@@ -17,8 +18,8 @@ def bprop(self, input_data, target_data):
 
 class MSELoss(Loss):
 
-    def __init__(self):
-        self.size_average = True
+    def __init__(self, size_average=True):
+        self.size_average = size_average
 
     def fprop(self, input_data, target_data):
         cost = np.sum(np.square(target_data - input_data))
@@ -74,3 +75,69 @@ def bprop(self, input_data, target_data):
             grad_input /= input_data.shape[0]
 
         return grad_input
+
+
+class RankNetLoss(Loss):
+    """
+    Loss for RankNet
+    See the section 2 about RankNet in
+        "From RankNet to LambdaRank to LambdaMART: An Overview", Christ Burges.
+    """
+    def __init__(self, sigma=1., size_average=True):
+        self.sigma = sigma
+        self.size_average = size_average
+
+    def fprop(self, input_data, target_data):
+        # Try to use the same notation as in the paper
+        s, y, sigma = input_data, target_data, self.sigma
+
+        # Iterate over all combinations of indices, i.e (0, 0), (0, 1), ...
+        n_samples = s.shape[0]
+        cost = 0
+        # TODO: This loop makes the computation slow
+        for i, j in itertools.combinations(range(n_samples), 2):
+            s_ij = s[i] - s[j]
+            S_ij = 1 if y[i] > y[j] else -1 if y[i] < y[j] else 0
+            cost += 0.5 * sigma * (1 - S_ij) * s_ij + np.log(1 + np.exp(-sigma * s_ij))
+
+        # ij = [(i, j) for i, j in itertools.combinations(range(n_samples), 2)]
+        # s_ij = [s[i] - s[j] for i, j in ij]
+        # S_ij = [1 if y[i] > y[j] else -1 if y[i] < y[j] else 0 for i, j in ij]
+        # s_ij, S_ij = np.array(s_ij), np.array(S_ij)
+        #
+        # cost = np.sum(0.5 * sigma * (1 - S_ij) * s_ij + np.log(1 + np.exp(-sigma * s_ij)))
+
+        if self.size_average:
+            cost /= 0.5 * n_samples * (n_samples + 1)  # normalized by the total number of pairs
+        return cost
+
+    def bprop(self, input_data, target_data):
+        """ Back-propagation. Here we use the approach of calculating gradient
+            as shown in the section 2.1 in the paper.
+        """
+        s, y, sigma = input_data, target_data, self.sigma
+        n_samples = s.shape[0]
+
+        grad_input = np.zeros_like(input_data, np.float32)  # grad_input is lambda (as in the paper)
+        # TODO: This loop makes the computation slow
+        for i, j in itertools.combinations(range(n_samples), 2):
+            S_ij = 1 if y[i] > y[j] else -1 if y[i] < y[j] else 0
+            s_ij = s[i] - s[j]
+            lambda_ij = 0.5 * sigma * (1 - S_ij) - 1 / (1 + np.exp(sigma * s_ij))  # dcost/ds_i
+            grad_input[i] += lambda_ij
+            grad_input[j] -= lambda_ij
+
+        # ij = [(i, j) for i, j in itertools.combinations(range(n_samples), 2)]
+        # s_ij = [s[i] - s[j] for i, j in ij]
+        # S_ij = [1 if y[i] > y[j] else -1 if y[i] < y[j] else 0 for i, j in ij]
+        # s_ij, S_ij = np.array(s_ij), np.array(S_ij)
+        #
+        # lambda_ij = 0.5 * sigma * (1 - S_ij) - 1 / (1 + np.exp(sigma * s_ij))
+        # for k, (i, j) in enumerate(ij):
+        #     grad_input[i] += lambda_ij[k]
+        #     grad_input[j] -= lambda_ij[k]
+
+        if self.size_average:
+            grad_input /= 0.5 * n_samples * (n_samples + 1)
+
+        return grad_input
diff --git a/vanilla_ml/supervised/ranking/abstract_ranker.py b/vanilla_ml/supervised/ranking/abstract_ranker.py
@@ -0,0 +1,46 @@
+from abc import ABCMeta, abstractmethod
+
+
+class AbstractRanker(object):
+    """
+    Abstract ranker
+    """
+    __metaclass__ = ABCMeta
+
+    @abstractmethod
+    def fit(self, X, y, sample_weights=None):
+        """ Fit the model using the given training data set with n data points and p features.
+
+        Args:
+            X (ndarray): training data set, shape N x P.
+            y (ndarray): training ranks, shape N x 1.
+            sample_weights (Optional[ndarray]): sample weights, shape N x 1.
+        """
+        pass
+
+    @abstractmethod
+    def rank_score(self, X):
+        """ Compute ranking scores for the test set.
+
+        Args:
+            X (ndarray): test set, shape M x P.
+
+        Returns:
+            ndarray: ranking scores, shape N.
+
+        """
+        pass
+
+    def rank(self, X):
+        """ Rank elements from the test set. The elements are sorted in descending
+        order of ranking scores.
+
+        Args:
+            X (ndarray): test set, shape M x P.
+
+        Returns:
+            ndarray: ranked element's indices, shape N.
+
+        """
+        scores = self.rank_score(X).ravel()
+        return scores.argsort()[::-1]
diff --git a/vanilla_ml/supervised/ranking/rank_net.py b/vanilla_ml/supervised/ranking/rank_net.py
@@ -0,0 +1,103 @@
+"""
+RankNet using Feed-forward Neural Network.
+
+1) "From RankNet to LambdaRank to LambdaMART: An Overview", Christ Burges.
+2) "Learning to Rank using Gradient Descent", Chris Burges et. al.
+"""
+import numpy as np
+
+from vanilla_ml.base.neural_network.activators import Sigmoid
+from vanilla_ml.base.neural_network.containers import Sequential
+from vanilla_ml.base.neural_network.layers import Linear
+from vanilla_ml.base.neural_network.loss import RankNetLoss
+from vanilla_ml.supervised.ranking.abstract_ranker import AbstractRanker
+from vanilla_ml.util.metrics.ranking import ndcg
+
+
+class RankNet(AbstractRanker):
+
+    def __init__(self, layers, learning_rate=1.0, batch_size=10,
+                 n_epochs=50, tol=1e-5, verbose=True, random_state=42):
+
+        assert learning_rate > 0, "Learning rate must be positive."
+
+        self.layers = layers
+        self.lr = learning_rate
+        self.batch_size = batch_size
+        self.n_epochs = n_epochs
+        self.tol = tol
+        self.verbose = verbose
+        self.random_state = random_state
+        self.input_size = None
+        self.model = None
+        self.loss = None
+
+    def fit(self, X, y, sample_weights=None):
+        assert sample_weights is None, "Specifying sample weights is not supported!"
+        assert len(X) == len(y), "Length mismatches: len(X) = %d, len(y) = %d" % (len(X), len(y))
+
+        np.random.seed(self.random_state)
+        n_samples, self.input_size = X.shape
+
+        # Model
+        self.model, self.loss = self._build_model()
+
+        # SGD params
+        params = {"lrate": self.lr, "max_grad_norm": 40}
+
+        indices = np.arange(n_samples)
+
+        # Run SGD
+        for epoch in range(self.n_epochs):
+            if self.verbose and (epoch + 1) % 10 == 0:
+                print("\n * Epoch %d ..." % (epoch + 1))
+
+            # For report
+            # total_ndcg_score  = 0.
+            # total_cost = 0.
+            # total_num  = 0
+
+            for it in range(n_samples / self.batch_size):
+
+                # batch = np.random.choice(indices, size=self.batch_size, replace=False)
+                start = it * self.batch_size
+                end = min((it + 1) * self.batch_size, n_samples)
+                batch = indices[start:end]
+                input_data, target_data = X[batch], y[batch]
+
+                # Forward propagation
+                pred = self.model.fprop(input_data)
+                # total_cost += self.loss.fprop(pred, target_data)
+                # total_num  += self.batch_size
+                ndcg_score = ndcg(target_data, pred, k=10)
+                # total_ndcg_score += ndcg_score
+
+                if self.verbose:
+                    print("\n* Iter %d" % (it + 1))
+                    print("Train NDCG@10: %g" % ndcg_score)
+
+                # Backward propagation
+                grad_output = self.loss.bprop(pred, target_data)
+                self.model.bprop(input_data, grad_output)
+                self.model.update(params)
+
+    def rank_score(self, X):
+        return self.model.fprop(X).ravel()
+
+    def _build_model(self):
+        input_size, layer_sizes = self.input_size, self.layers
+        model = Sequential()
+        for i in range(len(layer_sizes)):
+            if i == 0:
+                model.add(Linear(input_size, layer_sizes[i]))
+            else:
+                model.add(Linear(layer_sizes[i - 1], layer_sizes[i]))
+            model.add(Sigmoid())
+            # model.add(ReLU())
+
+        model.add(Linear(layer_sizes[-1], 1))
+
+        # Cost
+        loss = RankNetLoss(sigma=1, size_average=True)
+
+        return model, loss
diff --git a/vanilla_ml/util/data_io.py b/vanilla_ml/util/data_io.py
@@ -164,6 +164,17 @@ def get_regression_curve(noise=True):
     return _get_train_test_split(X, y)
 
 
+# Adapted from http://qiita.com/sz_dr/items/0e50120318527a928407 (Japanese)
+def get_ranking_train_test(n_dim=50, n_rank=5, n_sample=1000, sigma=5., random_state=42):
+    rand = np.random
+    rand.seed(random_state)
+    y = rand.random_integers(n_rank, size=n_sample)
+    w = rand.standard_normal(n_dim)
+    X = [sigma * np.random.standard_normal(n_dim) + w * y_i for y_i in y]
+    X = np.array(X, np.float32)
+    return _get_train_test_split(X, y)
+
+
 def get_accuracy(model, train_test):
     tr_X, te_X, tr_y, te_y = train_test