From 4ed3685272f7597ab5919d0fef7ac3774638bbc5 Mon Sep 17 00:00:00 2001 From: Sean Kruzel Date: Fri, 19 Feb 2016 22:38:08 -0500 Subject: [PATCH 1/2] added ability to push transition probability constraints on perceptron --- seqlearn/_utils/__init__.py | 1 + seqlearn/_utils/transmatrix.py | 31 +++++++++++++++++++++++++++++++ seqlearn/perceptron.py | 29 ++++++++++++++++++++++------- 3 files changed, 54 insertions(+), 7 deletions(-) diff --git a/seqlearn/_utils/__init__.py b/seqlearn/_utils/__init__.py index d0d5f6a..87dbf56 100644 --- a/seqlearn/_utils/__init__.py +++ b/seqlearn/_utils/__init__.py @@ -18,6 +18,7 @@ from .ctrans import count_trans from .safeadd import safe_add from .transmatrix import make_trans_matrix +from .transmatrix import make_trans_mask def _assert_all_finite(X): diff --git a/seqlearn/_utils/transmatrix.py b/seqlearn/_utils/transmatrix.py index 7d83b64..4f6e19b 100644 --- a/seqlearn/_utils/transmatrix.py +++ b/seqlearn/_utils/transmatrix.py @@ -23,3 +23,34 @@ def make_trans_matrix(y, n_classes, dtype=np.float64): return csr_matrix((np.ones(len(y), dtype=dtype), indices, indptr), shape=(len(y), n_classes ** 2)) + + +def make_trans_mask(trans_constraints, classes): + """ Given a list of tuples that match elements in the list classes + + Parameters + ---------- + trans_constraints : list + A list of tuples of length two. The first element is the prev_state, + the latter element is the current_state. The existance of a constraint + pair (prev_state, current_state) significantly lowers the transition + probability between elements + + classes : list + The list of classes + + """ + n_classes = len(classes) + classdict = {c:i for i,c in enumerate(classes)} + + trans_mask = np.zeros((n_classes, n_classes), dtype=int) + + for src, dest in trans_constraints: + r = classdict.get(src,-1) + c = classdict.get(dest,-1) + + # Check if valid constraint + if r > -1 and c > -1: + trans_mask[r,c] = 1 + + return trans_mask \ No newline at end of file diff --git a/seqlearn/perceptron.py b/seqlearn/perceptron.py index a716dc8..eec4a14 100644 --- a/seqlearn/perceptron.py +++ b/seqlearn/perceptron.py @@ -11,7 +11,8 @@ from .base import BaseSequenceClassifier from ._utils import (atleast2d_or_csr, check_random_state, count_trans, - make_trans_matrix, safe_add, safe_sparse_dot) + make_trans_matrix, safe_add, safe_sparse_dot, + make_trans_mask) class StructuredPerceptron(BaseSequenceClassifier): """Structured perceptron for sequence classification. @@ -41,6 +42,11 @@ class StructuredPerceptron(BaseSequenceClassifier): individual labels. This requires more time, more memory and more samples to train properly. + trans_constraints : array-like, shape(,3) + A list of tuples where each tuple is a constraint on the transisition + matrix. Each tuple is of the form (from_state_string, to_state_string, fixed_probability ) + It overrides the transition matrix to ensure these probabilities are fixed. + verbose : integer, optional Verbosity level. Defaults to zero (quiet mode). @@ -54,13 +60,15 @@ class StructuredPerceptron(BaseSequenceClassifier): """ def __init__(self, decode="viterbi", lr_exponent=.1, max_iter=10, - random_state=None, trans_features=False, verbose=0): + random_state=None, trans_features=False, trans_constraints=None, verbose=0): self.decode = decode self.lr_exponent = lr_exponent self.max_iter = max_iter self.random_state = random_state self.trans_features = trans_features + self.trans_constraints = trans_constraints self.verbose = verbose + self.CONSTRAINT_VALUE = -20 def fit(self, X, y, lengths): """Fit to a set of sequences. @@ -81,6 +89,7 @@ def fit(self, X, y, lengths): ------- self : StructuredPerceptron """ + import numpy.ma as ma decode = self._get_decoder() @@ -94,6 +103,11 @@ def fit(self, X, y, lengths): class_range = np.arange(n_classes) Y_true = y.reshape(-1, 1) == class_range + if self.trans_constraints: + trans_mask = make_trans_mask(self.trans_constraints, classes) + else : + trans_mask = make_trans_mask([], classes) + lengths = np.asarray(lengths) n_samples, n_features = X.shape @@ -101,7 +115,9 @@ def fit(self, X, y, lengths): start = end - lengths w = np.zeros((n_classes, n_features), order='F') - b_trans = np.zeros((n_classes, n_classes)) + b_trans = ma.masked_array(np.zeros((n_classes, n_classes)), + mask=trans_mask, + fill_value=self.CONSTRAINT_VALUE).harden_mask() b_init = np.zeros(n_classes) b_final = np.zeros(n_classes) @@ -124,7 +140,6 @@ def fit(self, X, y, lengths): for it in six.moves.xrange(1, self.max_iter + 1): lr = 1. / (it ** lr_exponent) - if self.verbose: print("Iteration {0:2d}".format(it), end="... ") sys.stdout.flush() @@ -132,7 +147,6 @@ def fit(self, X, y, lengths): rng.shuffle(sequence_ids) sum_loss = 0 - for i in sequence_ids: X_i = X[start[i]:end[i]] score = safe_sparse_dot(X_i, w.T) @@ -141,7 +155,7 @@ def fit(self, X, y, lengths): trans_score = trans_score.reshape(-1, n_classes, n_classes) else: trans_score = None - y_pred = decode(score, trans_score, b_trans, b_init, b_final) + y_pred = decode(score, trans_score, b_trans.filled(), b_init, b_final) y_t_i = y[start[i]:end[i]] loss = (y_pred != y_t_i).sum() @@ -153,6 +167,7 @@ def fit(self, X, y, lengths): Y_pred = Y_pred.astype(np.float64) Y_diff = csc_matrix(Y_pred - Y_t_i) + Y_diff *= -lr w_update = safe_sparse_dot(Y_diff.T, X_i) @@ -209,7 +224,7 @@ def fit(self, X, y, lengths): if self.trans_features: self.coef_trans_ = w_trans self.intercept_init_ = b_init - self.intercept_trans_ = b_trans + self.intercept_trans_ = b_trans.filled() self.intercept_final_ = b_final self.classes_ = classes From 8b6a60dc655547f578867c4a2398c8d73b23d1cf Mon Sep 17 00:00:00 2001 From: Sean Kruzel Date: Fri, 19 Feb 2016 22:38:39 -0500 Subject: [PATCH 2/2] added test for transition probability constraints --- seqlearn/tests/test_perceptron.py | 42 +++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/seqlearn/tests/test_perceptron.py b/seqlearn/tests/test_perceptron.py index abb1a38..15ba1ee 100644 --- a/seqlearn/tests/test_perceptron.py +++ b/seqlearn/tests/test_perceptron.py @@ -1,4 +1,5 @@ from numpy.testing import assert_array_equal +from numpy.testing import assert_raises import numpy as np from scipy.sparse import coo_matrix, csc_matrix @@ -49,3 +50,44 @@ def test_perceptron_single_iter(): """Assert that averaging works after a single iteration.""" clf = StructuredPerceptron(max_iter=1) clf.fit([[1, 2, 3]], [1], [1]) # no exception + +def test_perceptron_mask(): + X = [[0, 1, 0], + [0, 1, 0], + [1, 0, 0], + [0, 1, 0], + [1, 0, 0], + [0, 0, 1], + [0, 0, 1], + [0, 1, 0], + [1, 0, 0], + [1, 0, 0]] + + y = [0, 0, 0, 0, 0, 1, 1, 0, 2, 2] + + trans_constraints = [('spam','eggs'), ('spam', 'ham')] + + clf = StructuredPerceptron(verbose=True, random_state=42, max_iter=15, + trans_constraints=trans_constraints) + + # Try again with string labels and sparse input. + y_str = np.array(["eggs", "ham", "spam"])[y] + + + clf.fit(csc_matrix(X), y_str, [len(y_str)]) + + # Still fits + assert_array_equal(y_str, clf.predict(coo_matrix(X))) + # Weights are overridden properly + assert_array_equal([clf.intercept_trans_[2,0], clf.intercept_trans_[2,1]], + [clf.CONSTRAINT_VALUE]*2) + + # Add impossible constriants and model should fail to converge + impossible_constraints = [('spam','eggs'), ('eggs', 'ham')] + clf2 = StructuredPerceptron(verbose=True, random_state=12, max_iter=15, + trans_constraints=impossible_constraints) + + clf2.fit(csc_matrix(X), y_str, [len(y_str)]) + + # Should raise error saying that prediction is incorrect + assert_raises(AssertionError, assert_array_equal, y_str, clf2.predict(coo_matrix(X))) \ No newline at end of file