larsmans · closedLoop · Feb 20, 2016 · Feb 20, 2016
diff --git a/seqlearn/_utils/__init__.py b/seqlearn/_utils/__init__.py
@@ -18,6 +18,7 @@
 from .ctrans import count_trans
 from .safeadd import safe_add
 from .transmatrix import make_trans_matrix
+from .transmatrix import make_trans_mask
 
 
 def _assert_all_finite(X):

diff --git a/seqlearn/_utils/transmatrix.py b/seqlearn/_utils/transmatrix.py
@@ -23,3 +23,34 @@ def make_trans_matrix(y, n_classes, dtype=np.float64):
 
     return csr_matrix((np.ones(len(y), dtype=dtype), indices, indptr),
                       shape=(len(y), n_classes ** 2))
+
+
+def make_trans_mask(trans_constraints, classes):
+    """ Given a list of tuples that match elements in the list classes
+
+    Parameters
+    ----------
+    trans_constraints : list
+        A list of tuples of length two.  The first element is the prev_state,
+        the latter element is the current_state.  The existance of a constraint
+        pair (prev_state, current_state) significantly lowers the transition
+        probability between elements
+
+    classes : list
+        The list of classes
+
+    """
+    n_classes = len(classes)
+    classdict = {c:i for i,c in enumerate(classes)}
+
+    trans_mask = np.zeros((n_classes, n_classes), dtype=int)
+
+    for src, dest in trans_constraints:
+        r = classdict.get(src,-1)
+        c = classdict.get(dest,-1)
+
+        # Check if valid constraint
+        if r > -1 and c > -1:
+            trans_mask[r,c] = 1
+
+    return trans_mask
diff --git a/seqlearn/perceptron.py b/seqlearn/perceptron.py
@@ -11,7 +11,8 @@
 
 from .base import BaseSequenceClassifier
 from ._utils import (atleast2d_or_csr, check_random_state, count_trans,
-                     make_trans_matrix, safe_add, safe_sparse_dot)
+                     make_trans_matrix, safe_add, safe_sparse_dot,
+                     make_trans_mask)
 
 class StructuredPerceptron(BaseSequenceClassifier):
     """Structured perceptron for sequence classification.
@@ -41,6 +42,11 @@ class StructuredPerceptron(BaseSequenceClassifier):
         individual labels. This requires more time, more memory and more
         samples to train properly.
 
+    trans_constraints : array-like, shape(,3)
+        A list of tuples where each tuple is a constraint on the transisition
+        matrix.  Each tuple is of the form (from_state_string, to_state_string, fixed_probability )
+        It overrides the transition matrix to ensure these probabilities are fixed.
+
     verbose : integer, optional
         Verbosity level. Defaults to zero (quiet mode).
 
@@ -54,13 +60,15 @@ class StructuredPerceptron(BaseSequenceClassifier):
 
     """
     def __init__(self, decode="viterbi", lr_exponent=.1, max_iter=10,
-                 random_state=None, trans_features=False, verbose=0):
+                 random_state=None, trans_features=False, trans_constraints=None, verbose=0):
         self.decode = decode
         self.lr_exponent = lr_exponent
         self.max_iter = max_iter
         self.random_state = random_state
         self.trans_features = trans_features
+        self.trans_constraints = trans_constraints
         self.verbose = verbose
+        self.CONSTRAINT_VALUE = -20
 
     def fit(self, X, y, lengths):
         """Fit to a set of sequences.
@@ -81,6 +89,7 @@ def fit(self, X, y, lengths):
         -------
         self : StructuredPerceptron
         """
+        import numpy.ma as ma        
 
         decode = self._get_decoder()
 
@@ -94,14 +103,21 @@ def fit(self, X, y, lengths):
         class_range = np.arange(n_classes)
         Y_true = y.reshape(-1, 1) == class_range
 
+        if self.trans_constraints:
+            trans_mask = make_trans_mask(self.trans_constraints, classes)
+        else :
+            trans_mask = make_trans_mask([], classes)
+
         lengths = np.asarray(lengths)
         n_samples, n_features = X.shape
 
         end = np.cumsum(lengths)
         start = end - lengths
 
         w = np.zeros((n_classes, n_features), order='F')
-        b_trans = np.zeros((n_classes, n_classes))
+        b_trans = ma.masked_array(np.zeros((n_classes, n_classes)),
+                                  mask=trans_mask,
+                                  fill_value=self.CONSTRAINT_VALUE).harden_mask()
         b_init = np.zeros(n_classes)
         b_final = np.zeros(n_classes)
 
@@ -124,15 +140,13 @@ def fit(self, X, y, lengths):
 
         for it in six.moves.xrange(1, self.max_iter + 1):
             lr = 1. / (it ** lr_exponent)
-
             if self.verbose:
                 print("Iteration {0:2d}".format(it), end="... ")
                 sys.stdout.flush()
 
             rng.shuffle(sequence_ids)
 
             sum_loss = 0
-
             for i in sequence_ids:
                 X_i = X[start[i]:end[i]]
                 score = safe_sparse_dot(X_i, w.T)
@@ -141,7 +155,7 @@ def fit(self, X, y, lengths):
                     trans_score = trans_score.reshape(-1, n_classes, n_classes)
                 else:
                     trans_score = None
-                y_pred = decode(score, trans_score, b_trans, b_init, b_final)
+                y_pred = decode(score, trans_score, b_trans.filled(), b_init, b_final)
                 y_t_i = y[start[i]:end[i]]
                 loss = (y_pred != y_t_i).sum()
 
@@ -153,6 +167,7 @@ def fit(self, X, y, lengths):
                     Y_pred = Y_pred.astype(np.float64)
 
                     Y_diff = csc_matrix(Y_pred - Y_t_i)
+
                     Y_diff *= -lr
                     w_update = safe_sparse_dot(Y_diff.T, X_i)
 
@@ -209,7 +224,7 @@ def fit(self, X, y, lengths):
         if self.trans_features:
             self.coef_trans_ = w_trans
         self.intercept_init_ = b_init
-        self.intercept_trans_ = b_trans
+        self.intercept_trans_ = b_trans.filled()
         self.intercept_final_ = b_final
 
         self.classes_ = classes

diff --git a/seqlearn/tests/test_perceptron.py b/seqlearn/tests/test_perceptron.py
@@ -1,4 +1,5 @@
 from numpy.testing import assert_array_equal
+from numpy.testing import assert_raises
 
 import numpy as np
 from scipy.sparse import coo_matrix, csc_matrix
@@ -49,3 +50,44 @@ def test_perceptron_single_iter():
     """Assert that averaging works after a single iteration."""
     clf = StructuredPerceptron(max_iter=1)
     clf.fit([[1, 2, 3]], [1], [1])  # no exception
+
+def test_perceptron_mask():
+    X = [[0, 1, 0],
+         [0, 1, 0],
+         [1, 0, 0],
+         [0, 1, 0],
+         [1, 0, 0],
+         [0, 0, 1],
+         [0, 0, 1],
+         [0, 1, 0],
+         [1, 0, 0],
+         [1, 0, 0]]
+
+    y = [0, 0, 0, 0, 0, 1, 1, 0, 2, 2]
+
+    trans_constraints = [('spam','eggs'), ('spam', 'ham')]
+
+    clf = StructuredPerceptron(verbose=True, random_state=42, max_iter=15,
+                               trans_constraints=trans_constraints)
+
+    # Try again with string labels and sparse input.
+    y_str = np.array(["eggs", "ham", "spam"])[y]
+
+
+    clf.fit(csc_matrix(X), y_str, [len(y_str)])
+
+    # Still fits
+    assert_array_equal(y_str, clf.predict(coo_matrix(X)))
+    # Weights are overridden properly
+    assert_array_equal([clf.intercept_trans_[2,0], clf.intercept_trans_[2,1]], 
+                       [clf.CONSTRAINT_VALUE]*2)
+
+    # Add impossible constriants and model should fail to converge
+    impossible_constraints = [('spam','eggs'), ('eggs', 'ham')]
+    clf2 = StructuredPerceptron(verbose=True, random_state=12, max_iter=15,
+                               trans_constraints=impossible_constraints)
+
+    clf2.fit(csc_matrix(X), y_str, [len(y_str)])
+
+    # Should raise error saying that prediction is incorrect
+    assert_raises(AssertionError, assert_array_equal, y_str, clf2.predict(coo_matrix(X)))