Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Constraints on Transition Probabilities #22

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions seqlearn/_utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from .ctrans import count_trans
from .safeadd import safe_add
from .transmatrix import make_trans_matrix
from .transmatrix import make_trans_mask


def _assert_all_finite(X):
Expand Down
31 changes: 31 additions & 0 deletions seqlearn/_utils/transmatrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,3 +23,34 @@ def make_trans_matrix(y, n_classes, dtype=np.float64):

return csr_matrix((np.ones(len(y), dtype=dtype), indices, indptr),
shape=(len(y), n_classes ** 2))


def make_trans_mask(trans_constraints, classes):
""" Given a list of tuples that match elements in the list classes

Parameters
----------
trans_constraints : list
A list of tuples of length two. The first element is the prev_state,
the latter element is the current_state. The existance of a constraint
pair (prev_state, current_state) significantly lowers the transition
probability between elements

classes : list
The list of classes

"""
n_classes = len(classes)
classdict = {c:i for i,c in enumerate(classes)}

trans_mask = np.zeros((n_classes, n_classes), dtype=int)

for src, dest in trans_constraints:
r = classdict.get(src,-1)
c = classdict.get(dest,-1)

# Check if valid constraint
if r > -1 and c > -1:
trans_mask[r,c] = 1

return trans_mask
29 changes: 22 additions & 7 deletions seqlearn/perceptron.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@

from .base import BaseSequenceClassifier
from ._utils import (atleast2d_or_csr, check_random_state, count_trans,
make_trans_matrix, safe_add, safe_sparse_dot)
make_trans_matrix, safe_add, safe_sparse_dot,
make_trans_mask)

class StructuredPerceptron(BaseSequenceClassifier):
"""Structured perceptron for sequence classification.
Expand Down Expand Up @@ -41,6 +42,11 @@ class StructuredPerceptron(BaseSequenceClassifier):
individual labels. This requires more time, more memory and more
samples to train properly.

trans_constraints : array-like, shape(,3)
A list of tuples where each tuple is a constraint on the transisition
matrix. Each tuple is of the form (from_state_string, to_state_string, fixed_probability )
It overrides the transition matrix to ensure these probabilities are fixed.

verbose : integer, optional
Verbosity level. Defaults to zero (quiet mode).

Expand All @@ -54,13 +60,15 @@ class StructuredPerceptron(BaseSequenceClassifier):

"""
def __init__(self, decode="viterbi", lr_exponent=.1, max_iter=10,
random_state=None, trans_features=False, verbose=0):
random_state=None, trans_features=False, trans_constraints=None, verbose=0):
self.decode = decode
self.lr_exponent = lr_exponent
self.max_iter = max_iter
self.random_state = random_state
self.trans_features = trans_features
self.trans_constraints = trans_constraints
self.verbose = verbose
self.CONSTRAINT_VALUE = -20

def fit(self, X, y, lengths):
"""Fit to a set of sequences.
Expand All @@ -81,6 +89,7 @@ def fit(self, X, y, lengths):
-------
self : StructuredPerceptron
"""
import numpy.ma as ma

decode = self._get_decoder()

Expand All @@ -94,14 +103,21 @@ def fit(self, X, y, lengths):
class_range = np.arange(n_classes)
Y_true = y.reshape(-1, 1) == class_range

if self.trans_constraints:
trans_mask = make_trans_mask(self.trans_constraints, classes)
else :
trans_mask = make_trans_mask([], classes)

lengths = np.asarray(lengths)
n_samples, n_features = X.shape

end = np.cumsum(lengths)
start = end - lengths

w = np.zeros((n_classes, n_features), order='F')
b_trans = np.zeros((n_classes, n_classes))
b_trans = ma.masked_array(np.zeros((n_classes, n_classes)),
mask=trans_mask,
fill_value=self.CONSTRAINT_VALUE).harden_mask()
b_init = np.zeros(n_classes)
b_final = np.zeros(n_classes)

Expand All @@ -124,15 +140,13 @@ def fit(self, X, y, lengths):

for it in six.moves.xrange(1, self.max_iter + 1):
lr = 1. / (it ** lr_exponent)

if self.verbose:
print("Iteration {0:2d}".format(it), end="... ")
sys.stdout.flush()

rng.shuffle(sequence_ids)

sum_loss = 0

for i in sequence_ids:
X_i = X[start[i]:end[i]]
score = safe_sparse_dot(X_i, w.T)
Expand All @@ -141,7 +155,7 @@ def fit(self, X, y, lengths):
trans_score = trans_score.reshape(-1, n_classes, n_classes)
else:
trans_score = None
y_pred = decode(score, trans_score, b_trans, b_init, b_final)
y_pred = decode(score, trans_score, b_trans.filled(), b_init, b_final)
y_t_i = y[start[i]:end[i]]
loss = (y_pred != y_t_i).sum()

Expand All @@ -153,6 +167,7 @@ def fit(self, X, y, lengths):
Y_pred = Y_pred.astype(np.float64)

Y_diff = csc_matrix(Y_pred - Y_t_i)

Y_diff *= -lr
w_update = safe_sparse_dot(Y_diff.T, X_i)

Expand Down Expand Up @@ -209,7 +224,7 @@ def fit(self, X, y, lengths):
if self.trans_features:
self.coef_trans_ = w_trans
self.intercept_init_ = b_init
self.intercept_trans_ = b_trans
self.intercept_trans_ = b_trans.filled()
self.intercept_final_ = b_final

self.classes_ = classes
Expand Down
42 changes: 42 additions & 0 deletions seqlearn/tests/test_perceptron.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from numpy.testing import assert_array_equal
from numpy.testing import assert_raises

import numpy as np
from scipy.sparse import coo_matrix, csc_matrix
Expand Down Expand Up @@ -49,3 +50,44 @@ def test_perceptron_single_iter():
"""Assert that averaging works after a single iteration."""
clf = StructuredPerceptron(max_iter=1)
clf.fit([[1, 2, 3]], [1], [1]) # no exception

def test_perceptron_mask():
X = [[0, 1, 0],
[0, 1, 0],
[1, 0, 0],
[0, 1, 0],
[1, 0, 0],
[0, 0, 1],
[0, 0, 1],
[0, 1, 0],
[1, 0, 0],
[1, 0, 0]]

y = [0, 0, 0, 0, 0, 1, 1, 0, 2, 2]

trans_constraints = [('spam','eggs'), ('spam', 'ham')]

clf = StructuredPerceptron(verbose=True, random_state=42, max_iter=15,
trans_constraints=trans_constraints)

# Try again with string labels and sparse input.
y_str = np.array(["eggs", "ham", "spam"])[y]


clf.fit(csc_matrix(X), y_str, [len(y_str)])

# Still fits
assert_array_equal(y_str, clf.predict(coo_matrix(X)))
# Weights are overridden properly
assert_array_equal([clf.intercept_trans_[2,0], clf.intercept_trans_[2,1]],
[clf.CONSTRAINT_VALUE]*2)

# Add impossible constriants and model should fail to converge
impossible_constraints = [('spam','eggs'), ('eggs', 'ham')]
clf2 = StructuredPerceptron(verbose=True, random_state=12, max_iter=15,
trans_constraints=impossible_constraints)

clf2.fit(csc_matrix(X), y_str, [len(y_str)])

# Should raise error saying that prediction is incorrect
assert_raises(AssertionError, assert_array_equal, y_str, clf2.predict(coo_matrix(X)))