From f57ad84d49dc536da27f1270e3f43a775c5add5c Mon Sep 17 00:00:00 2001 From: Chyi-Kwei Yau Date: Tue, 8 Apr 2014 19:23:31 -0400 Subject: [PATCH 1/8] add forward_backward algorithm --- seqlearn/_inference/__init__.py | 1 + seqlearn/_inference/forward_backward.pyx | 166 ++++++++++++++++++ .../_inference/tests/test_forward_backward.py | 49 ++++++ setup.py | 3 +- 4 files changed, 218 insertions(+), 1 deletion(-) create mode 100644 seqlearn/_inference/__init__.py create mode 100644 seqlearn/_inference/forward_backward.pyx create mode 100644 seqlearn/_inference/tests/test_forward_backward.py diff --git a/seqlearn/_inference/__init__.py b/seqlearn/_inference/__init__.py new file mode 100644 index 0000000..151b1e2 --- /dev/null +++ b/seqlearn/_inference/__init__.py @@ -0,0 +1 @@ +from . import forward_backward \ No newline at end of file diff --git a/seqlearn/_inference/forward_backward.pyx b/seqlearn/_inference/forward_backward.pyx new file mode 100644 index 0000000..e4b495c --- /dev/null +++ b/seqlearn/_inference/forward_backward.pyx @@ -0,0 +1,166 @@ +# Author: Chyi-Kwei Yau + +"""Forward-Backward algorithm for CRF training & posterior calculation""" + +cimport cython +cimport numpy as np +import numpy as np + +from .._utils import logsumexp + + +np.import_array() + +@cython.boundscheck(False) +@cython.wraparound(False) +def _forward(np.ndarray[ndim=2, dtype=np.float64_t] score, + np.ndarray[ndim=3, dtype=np.float64_t] trans_score, + np.ndarray[ndim=2, dtype=np.float64_t] b_trans, + np.ndarray[ndim=1, dtype=np.float64_t] init, + np.ndarray[ndim=1, dtype=np.float64_t] final): + """ + Forward Algorithm + + Parameters + ---------- + score : array, shape = (n_samples, n_states) + Scores per sample/class combination; in a linear model, X * w.T. + May be overwritten. + trans_score : array, shape = (n_samples, n_states, n_states), optional + Scores per sample/transition combination. + b_trans : array, shape = (n_states, n_states) + Transition weights. + init : array, shape = (n_states,) + final : array, shape = (n_states,) + Initial and final state weights. + + Return + ------ + forward : array, shape = (n_samples, n_states) + + References + ---------- + L. R. Rabiner (1989). A tutorial on hidden Markov models and selected + applications in speech recognition. Proc. IEEE 77(2):257-286. + + """ + + cdef np.ndarray[ndim=2, dtype=np.float64_t] forward + cdef np.npy_intp i, j, k, m, n_samples, n_states, last_index + + n_samples, n_states = score.shape[0], score.shape[1] + last_index = n_samples - 1 + forward = np.empty((n_samples, n_states), dtype=np.float64) + + # initialize + for j in range(n_states): + forward[0, j] = init[j] + score[0, j] + + for i in range(1, n_samples): + for k in range(n_states): + temp_array = forward[i-1, :] + b_trans[:, k] + score[i, k] + #if trans_score is not None: + # temp_array += trans_score[i-1, k, :] + if i == last_index: + temp_array += final[k] + forward[i, k] = logsumexp(temp_array) + + return forward + + +@cython.boundscheck(False) +@cython.wraparound(False) +def _backward(np.ndarray[ndim=2, dtype=np.float64_t] score, + np.ndarray[ndim=3, dtype=np.float64_t] trans_score, + np.ndarray[ndim=2, dtype=np.float64_t] b_trans, + np.ndarray[ndim=1, dtype=np.float64_t] init, + np.ndarray[ndim=1, dtype=np.float64_t] final): + + """ + Backward Algorithm (similar to forward Algorithm) + + Parameters + ---------- + Same as Forward function + + + Returns + ------- + backward : array, shape = (n_samples, n_states) + + """ + + cdef np.ndarray[ndim=2, dtype=np.float64_t] backward + cdef np.npy_intp i, j, k, m, n_samples, n_states, last_index + + n_samples, n_states = score.shape[0], score.shape[1] + last_index = n_samples - 1 + + backward = np.empty((n_samples, n_states), dtype=np.float64) + + # initialize + for j in range(n_states): + backward[last_index, j] = final[j] + + for i in range(last_index-1, -1, -1): + for k in range(n_states): + temp_array = backward[i+1, :] + b_trans[k, :] + score[i+1, :] + #if trans_score is not None: + # temp_array += trans_score[i, :, k] + print temp_array + backward[i, k] = logsumexp(temp_array) + + return backward + + +@cython.boundscheck(False) +@cython.wraparound(False) +def _posterior(np.ndarray[ndim=2, dtype=np.float64_t] score, + np.ndarray[ndim=3, dtype=np.float64_t] trans_score, + np.ndarray[ndim=2, dtype=np.float64_t] b_trans, + np.ndarray[ndim=1, dtype=np.float64_t] init, + np.ndarray[ndim=1, dtype=np.float64_t] final): + + """ + Calculate posterior distrubtion based on Forward-Backward algorithm + + Parameters + ---------- + Same as Forward function + """ + + cdef np.ndarray[ndim=2, dtype=np.float64_t] forward, backward, state_posterior, trans_posterior + cdef np.npy_intp i, j, k, n_samples, n_states + # log likelihood value + cdef np.float64_t ll, temp_trans_val + + n_samples, n_states = score.shape[0], score.shape[1] + + # initialize + state_posterior = np.empty((n_samples, n_states), dtype=np.float64) + trans_posterior = np.zeros((n_states, n_states), dtype=np.float64) + + # get forward-backward values + forward = _forward(score, trans_score, b_trans, init, final) + backward = _backward(score, trans_score, b_trans, init, final) + + # get log likelihood + ll = logsumexp(forward[n_samples-1, :] + final) + + # states poterior + for i in range(n_samples): + for j in range(n_states): + state_posterior[i, j] = forward[i, j] + backward[i, j] - ll + state_posterior = np.exp(state_posterior) + + # transition posterior + for i in range(n_samples-1): + for j in range(n_states): + for k in range(n_states): + temp_trans_val = forward[i, j] + b_trans[j, k] + score[i+1, k] + backward[i+1, k] - ll + #if trans_score is not None: + # temp_trans_val += trans_score[i, j, k] + trans_posterior[j, k] += np.exp(temp_trans_val) + + return state_posterior, trans_posterior + diff --git a/seqlearn/_inference/tests/test_forward_backward.py b/seqlearn/_inference/tests/test_forward_backward.py new file mode 100644 index 0000000..b954783 --- /dev/null +++ b/seqlearn/_inference/tests/test_forward_backward.py @@ -0,0 +1,49 @@ +import numpy as np + +from numpy.testing import assert_almost_equal +from seqlearn._inference.forward_backward import _forward, _backward, _posterior + + +init = np.array([.2, .1]) +final = np.array([.1, .2]) + +trans = np.array([[.1, .2], + [.4, .3]]) + +score = np.array([[.3, .2], + [.4, .1], + [.5, .4]]) + + +def test_forward(): + forward = _forward(score, None, trans, init, final) + + true_forward = np.array([[0.5, 0.3], + [1.7443, 1.4444], + [3.1375, 3.1425]]) + + # assert equal + assert_almost_equal(true_forward, forward, decimal=3) + + +def test_backward(): + backward = _backward(score, None, trans, init, final) + + #true value + true_backward = np.array([[2.6375, 2.8425], + [1.4444, 1.6444], + [0.1, 0.2]]) + # assert equal + assert_almost_equal(true_backward, backward, decimal=3) + + +def test_forward_backward(): + + forward = _forward(score, None, trans, init, final) + backward = _backward(score, None, trans, init, final) + + assert_almost_equal(forward[-1, :], backward[0, :] + score[0, :] + init) + +def test_posterior(): + pass + #assert_almost_equal \ No newline at end of file diff --git a/setup.py b/setup.py index f382328..3d0c946 100644 --- a/setup.py +++ b/setup.py @@ -11,7 +11,7 @@ maintainer_email="L.J.Buitinck@uva.nl", license="MIT", url="https://github.com/larsmans/seqlearn", - packages=["seqlearn", "seqlearn._utils", "seqlearn._decode", "seqlearn.datasets"], + packages=["seqlearn", "seqlearn._utils", "seqlearn._decode", "seqlearn.datasets", "seqlearn._inference"], classifiers=[ "Intended Audience :: Developers", "Intended Audience :: Science/Research", @@ -27,6 +27,7 @@ Extension("seqlearn._decode.viterbi", ["seqlearn/_decode/viterbi.pyx"]), Extension("seqlearn._utils.ctrans", ["seqlearn/_utils/ctrans.pyx"]), Extension("seqlearn._utils.safeadd", ["seqlearn/_utils/safeadd.pyx"]), + Extension("seqlearn._inference.forward_backward", ["seqlearn/_inference/forward_backward.pyx"]), ], ) From 4aa47ba2e86957b90cc39f70f48c689e1ad7e9a2 Mon Sep 17 00:00:00 2001 From: Chyi-Kwei Yau Date: Wed, 9 Apr 2014 13:34:29 -0400 Subject: [PATCH 2/8] change init backward value to 0.0 --- seqlearn/_inference/forward_backward.pyx | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/seqlearn/_inference/forward_backward.pyx b/seqlearn/_inference/forward_backward.pyx index e4b495c..0d3186f 100644 --- a/seqlearn/_inference/forward_backward.pyx +++ b/seqlearn/_inference/forward_backward.pyx @@ -100,7 +100,8 @@ def _backward(np.ndarray[ndim=2, dtype=np.float64_t] score, # initialize for j in range(n_states): - backward[last_index, j] = final[j] + # inital backward value = 1.0 = exp(0.0) + backward[last_index, j] = 0.0 for i in range(last_index-1, -1, -1): for k in range(n_states): @@ -108,8 +109,12 @@ def _backward(np.ndarray[ndim=2, dtype=np.float64_t] score, #if trans_score is not None: # temp_array += trans_score[i, :, k] print temp_array + if i == last_index-1: + temp_array += final + backward[i, k] = logsumexp(temp_array) + return backward From 5cb2220973959159f1d4b5e78cb419b373754b36 Mon Sep 17 00:00:00 2001 From: Chyi-Kwei Yau Date: Wed, 9 Apr 2014 13:35:19 -0400 Subject: [PATCH 3/8] fix test error --- seqlearn/_inference/tests/test_forward_backward.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/seqlearn/_inference/tests/test_forward_backward.py b/seqlearn/_inference/tests/test_forward_backward.py index b954783..618aae0 100644 --- a/seqlearn/_inference/tests/test_forward_backward.py +++ b/seqlearn/_inference/tests/test_forward_backward.py @@ -32,7 +32,7 @@ def test_backward(): #true value true_backward = np.array([[2.6375, 2.8425], [1.4444, 1.6444], - [0.1, 0.2]]) + [0.0, 0.0]]) # assert equal assert_almost_equal(true_backward, backward, decimal=3) From e9534593fbf3b2507089b581745efd63034a8773 Mon Sep 17 00:00:00 2001 From: Chyi-Kwei Yau Date: Fri, 11 Apr 2014 14:32:07 -0400 Subject: [PATCH 4/8] add CRF model and its test file --- seqlearn/_inference/forward_backward.pyx | 28 ++- .../_inference/tests/test_forward_backward.py | 20 +- seqlearn/crf.py | 199 ++++++++++++++++++ seqlearn/tests/test_crf.py | 26 +++ 4 files changed, 262 insertions(+), 11 deletions(-) create mode 100644 seqlearn/crf.py create mode 100644 seqlearn/tests/test_crf.py diff --git a/seqlearn/_inference/forward_backward.pyx b/seqlearn/_inference/forward_backward.pyx index 0d3186f..458cc8b 100644 --- a/seqlearn/_inference/forward_backward.pyx +++ b/seqlearn/_inference/forward_backward.pyx @@ -48,6 +48,9 @@ def _forward(np.ndarray[ndim=2, dtype=np.float64_t] score, cdef np.ndarray[ndim=2, dtype=np.float64_t] forward cdef np.npy_intp i, j, k, m, n_samples, n_states, last_index + if trans_score is not None: + raise NotImplementedError("No transition scores for forward algorithm yet.") + n_samples, n_states = score.shape[0], score.shape[1] last_index = n_samples - 1 forward = np.empty((n_samples, n_states), dtype=np.float64) @@ -93,6 +96,9 @@ def _backward(np.ndarray[ndim=2, dtype=np.float64_t] score, cdef np.ndarray[ndim=2, dtype=np.float64_t] backward cdef np.npy_intp i, j, k, m, n_samples, n_states, last_index + if trans_score is not None: + raise NotImplementedError("No transition scores for backward yet.") + n_samples, n_states = score.shape[0], score.shape[1] last_index = n_samples - 1 @@ -108,7 +114,6 @@ def _backward(np.ndarray[ndim=2, dtype=np.float64_t] score, temp_array = backward[i+1, :] + b_trans[k, :] + score[i+1, :] #if trans_score is not None: # temp_array += trans_score[i, :, k] - print temp_array if i == last_index-1: temp_array += final @@ -132,6 +137,12 @@ def _posterior(np.ndarray[ndim=2, dtype=np.float64_t] score, Parameters ---------- Same as Forward function + + References + ---------- + C. Sutton (2006) An Introduction to Conditional Random Fields for + Relational Learning + """ cdef np.ndarray[ndim=2, dtype=np.float64_t] forward, backward, state_posterior, trans_posterior @@ -139,6 +150,9 @@ def _posterior(np.ndarray[ndim=2, dtype=np.float64_t] score, # log likelihood value cdef np.float64_t ll, temp_trans_val + if trans_score is not None: + raise NotImplementedError("No transition scores for posterior func yet.") + n_samples, n_states = score.shape[0], score.shape[1] # initialize @@ -150,9 +164,9 @@ def _posterior(np.ndarray[ndim=2, dtype=np.float64_t] score, backward = _backward(score, trans_score, b_trans, init, final) # get log likelihood - ll = logsumexp(forward[n_samples-1, :] + final) + ll = logsumexp(forward[n_samples-1, :]) - # states poterior + # states posterior for i in range(n_samples): for j in range(n_states): state_posterior[i, j] = forward[i, j] + backward[i, j] - ll @@ -163,9 +177,11 @@ def _posterior(np.ndarray[ndim=2, dtype=np.float64_t] score, for j in range(n_states): for k in range(n_states): temp_trans_val = forward[i, j] + b_trans[j, k] + score[i+1, k] + backward[i+1, k] - ll - #if trans_score is not None: - # temp_trans_val += trans_score[i, j, k] + # add final feature + if i == n_samples-2: + temp_trans_val += final[k] + # Note: get tranistion posterior from log scale and sum up from position 1 to (n_samples-1) trans_posterior[j, k] += np.exp(temp_trans_val) - return state_posterior, trans_posterior + return state_posterior, trans_posterior, ll diff --git a/seqlearn/_inference/tests/test_forward_backward.py b/seqlearn/_inference/tests/test_forward_backward.py index 618aae0..21058db 100644 --- a/seqlearn/_inference/tests/test_forward_backward.py +++ b/seqlearn/_inference/tests/test_forward_backward.py @@ -1,9 +1,7 @@ import numpy as np - from numpy.testing import assert_almost_equal from seqlearn._inference.forward_backward import _forward, _backward, _posterior - init = np.array([.2, .1]) final = np.array([.1, .2]) @@ -38,12 +36,24 @@ def test_backward(): def test_forward_backward(): - forward = _forward(score, None, trans, init, final) backward = _backward(score, None, trans, init, final) assert_almost_equal(forward[-1, :], backward[0, :] + score[0, :] + init) + def test_posterior(): - pass - #assert_almost_equal \ No newline at end of file + state_posterior, trans_posterior, ll = _posterior(score, None, trans, init, final) + + state_posterior_true = np.array([[0.4987, 0.5012], + [0.5249, 0.4750], + [0.4987, 0.5012]]) + + trans_posterior_true = np.array([[0.4987, 0.5249], + [0.5249, 0.4512]]) + + assert_almost_equal(state_posterior_true, state_posterior, decimal=3) + assert_almost_equal(trans_posterior_true, trans_posterior, decimal=3) + + # sum of transition posterior should sum up to (n_samples-1) + assert_almost_equal(np.sum(trans_posterior), state_posterior.shape[0]-1) diff --git a/seqlearn/crf.py b/seqlearn/crf.py new file mode 100644 index 0000000..00daafc --- /dev/null +++ b/seqlearn/crf.py @@ -0,0 +1,199 @@ +# Linear Chain CRF with Stochastic Gradient Descent. +# author: Chyi-Kwei Yau, 2014 + +from __future__ import division, print_function + +import numpy as np + +from .base import BaseSequenceClassifier +from ._utils import (atleast2d_or_csr, check_random_state, count_trans, + safe_add, safe_sparse_dot) + +from ._inference.forward_backward import _posterior + + +class LinearChainCRF(BaseSequenceClassifier): + """Linear Chian Conditional Random Field for sequence classification. + + This implements a linear chain CRF with Stochastic Gradient Descent. + + Parameters + ---------- + decode : string, optional + Decoding algorithm, either "bestfirst" or "viterbi" (default). + + lr : float, optional + Initial learning rate + + lr_exponent : float, optional + Exponent for inverse scaling learning rate. The effective learning + rate is lr / (t ** lr_exponent), where t is the iteration number. + + max_iter : integer, optional + Number of iterations (aka. epochs). Each sequence is visited once in + each iteration. + + random_state : {integer, np.random.RandomState}, optional + Random state or seed used for shuffling sequences within each + iteration. + + reg: L2 regularization value + + compute_obj_val: compute objective value. Set this to True to check whether the objective + value converges. + + verbose : integer, optional + Verbosity level. Defaults to zero (quiet mode). + + References + ---------- + J. Lafferty (2001). Conditional random fields: Probabilistic models + for segmenting and labeling sequence data + + C. Sutton (2006) An Introduction to Conditional Random Fields for + Relational Learning + + N. Schraudolph (2006). Accelerated Training of Conditional Random + Fields with Stochastic Gradient Methods + + """ + + def __init__(self, decode="viterbi", lr=1.0, lr_exponent=.1, max_iter=10, + random_state=None, reg=.01, compute_obj_val=False, verbose=0): + self.decode = decode + self.lr = lr + self.lr_exponent = lr_exponent + self.max_iter = max_iter + self.random_state = random_state + self.reg = reg + self.compute_obj_val = compute_obj_val + self.verbose = verbose + + def fit(self, X, y, lengths): + """Fit to a set of sequences. + + Parameters + ---------- + X : {array-like, sparse matrix}, shape (n_samples, n_features) + Feature matrix of individual samples. + + y : array-like, shape (n_samples,) + Target labels. + + lengths : array-like of integers, shape (n_sequences,) + Lengths of the individual sequences in X, y. The sum of these + should be n_samples. + + Returns + ------- + self : LinearChainCRF + """ + + X = atleast2d_or_csr(X) + + classes, y = np.unique(y, return_inverse=True) + n_classes = len(classes) + + class_range = np.arange(n_classes) + Y_true = y.reshape(-1, 1) == class_range + + lengths = np.asarray(lengths) + n_samples, n_features = X.shape + n_sequence = lengths.shape[0] + + end = np.cumsum(lengths) + start = end - lengths + + # initialize parameters + w = np.zeros((n_classes, n_features), order='F') + b_trans = np.zeros((n_classes, n_classes)) + b_init = np.zeros(n_classes) + b_final = np.zeros(n_classes) + + w_avg = np.zeros_like(w) + b_trans_avg = np.zeros_like(b_trans) + b_init_avg = np.zeros_like(b_init) + b_final_avg = np.zeros_like(b_final) + + sequence_ids = np.arange(n_sequence) + rng = check_random_state(self.random_state) + + avg_count = 1. + for it in xrange(1, self.max_iter + 1): + if self.verbose: + print("Iteration {0:2d}...".format(it)) + + if self.compute_obj_val: + sample_count = 0 + sum_obj_val = 0.0 + + rng.shuffle(sequence_ids) + + lr = self.lr / (it ** self.lr_exponent) + + reg = self.reg / n_sequence + + for i in sequence_ids: + X_i = X[start[i]:end[i]] + y_t_i = Y_true[start[i]:end[i]] + t_trans = count_trans(y[start[i]:end[i]], n_classes) + + score = safe_sparse_dot(X_i, w.T) + + w_true = safe_sparse_dot(y_t_i.T, X_i) + + # posterior distribution for states & transtion + post_state, post_trans, ll = _posterior(score, None, b_trans, b_init, b_final) + + if self.compute_obj_val: + feature_val = np.sum(w_true * w) + trans_val = np.sum(t_trans * b_trans) + init_val = np.sum(y_t_i[0] * b_init) + final_val = np.sum(y_t_i[-1] * b_final) + sum_obj_val += feature_val + trans_val + init_val + final_val - ll - (0.5 * reg * np.sum(w * w)) + + sample_count += 1 + if sample_count % 1000 == 0: + avg_obj_val = sum_obj_val / sample_count + print("iter: {0:d}, sample: {1:d}, avg. objective value {2:.4f}".format( + it, sample_count, avg_obj_val)) + + # update feature w + w_update = lr * (safe_sparse_dot(y_t_i.T, X_i) - safe_sparse_dot(post_state.T, X_i) - (reg * w)) + + # update init & final matrix + b_init_update = lr * (post_state[0, :] - y_t_i[0] + reg * b_init) + b_final_update = lr * (post_state[-1, :] - y_t_i[-1] + reg * b_final) + + # update transition matrix + b_trans_update = lr * (post_trans - t_trans + reg * b_trans) + + safe_add(w, w_update) + b_init -= b_init_update + b_final -= b_final_update + b_trans -= b_trans_update + + w_update *= avg_count + b_trans_update *= avg_count + b_init_update *= avg_count + b_final_update *= avg_count + + safe_add(w_avg, w_update) + b_trans_avg -= b_trans_update + b_init_avg -= b_init_update + b_final_avg -= b_final_update + + avg_count += 1. + + w -= w_avg / avg_count + b_init -= b_init_avg / avg_count + b_trans -= b_trans_avg / avg_count + b_final -= b_final_avg / avg_count + + self.coef_ = w + self.intercept_init_ = b_init + self.intercept_trans_ = b_trans + self.intercept_final_ = b_final + self.classes_ = classes + + return self diff --git a/seqlearn/tests/test_crf.py b/seqlearn/tests/test_crf.py new file mode 100644 index 0000000..525a396 --- /dev/null +++ b/seqlearn/tests/test_crf.py @@ -0,0 +1,26 @@ +import numpy as np +from numpy.testing import assert_array_equal +from scipy.sparse import csc_matrix + +from seqlearn.crf import LinearChainCRF + + +def test_crf(): + X = np.array([[1, 0, 0], + [1, 0, 0], + [0, 1, 0], + [0, 0, 1], + [1, 0, 0], + [0, 1, 0], + [0, 0, 1]]) + + X = csc_matrix(X) + y = np.array(['0', '0', '1', '1', '0', '1', '1']) + lengths = np.array([4, 3]) + + for it in [1, 5, 10]: + clf = LinearChainCRF(max_iter=it) + clf.fit(X, y, lengths) + + y_pred = clf.predict(X, lengths) + assert_array_equal(y, y_pred) From 17726914e18037c5909f9e21d42b66c643dc2dbf Mon Sep 17 00:00:00 2001 From: Chyi-Kwei Yau Date: Mon, 14 Apr 2014 18:54:14 -0400 Subject: [PATCH 5/8] implement logsumexp() in cython code --- seqlearn/_inference/forward_backward.pyx | 37 ++++++++++++++++++++---- setup.py | 4 ++- 2 files changed, 35 insertions(+), 6 deletions(-) diff --git a/seqlearn/_inference/forward_backward.pyx b/seqlearn/_inference/forward_backward.pyx index 458cc8b..15fc6c1 100644 --- a/seqlearn/_inference/forward_backward.pyx +++ b/seqlearn/_inference/forward_backward.pyx @@ -6,11 +6,36 @@ cimport cython cimport numpy as np import numpy as np -from .._utils import logsumexp - +from libc.math cimport exp, log np.import_array() + +cdef np.float64_t _logsumexp(np.ndarray[ndim=1, dtype=np.float64_t] arr): + """ + simple 1-D logsumexp function + """ + cdef np.npy_intp i, j, arr_length + cdef np.float64_t v_max, v_sum + + arr_length = arr.shape[0] + + # find max + v_max = arr[0] + for i from 1 <= i < arr_length: + if arr[i] > v_max: + v_max = arr[i] + + #sum of exp value + v_sum = 0.0 + for j from 0 <= j < arr_length: + v_sum += exp(arr[j] - v_max) + + # logsumexp value + v_sum = log(v_sum) + v_max + return v_sum + + @cython.boundscheck(False) @cython.wraparound(False) def _forward(np.ndarray[ndim=2, dtype=np.float64_t] score, @@ -46,6 +71,7 @@ def _forward(np.ndarray[ndim=2, dtype=np.float64_t] score, """ cdef np.ndarray[ndim=2, dtype=np.float64_t] forward + cdef np.ndarray[ndim=1, dtype=np.float64_t] temp_array cdef np.npy_intp i, j, k, m, n_samples, n_states, last_index if trans_score is not None: @@ -66,7 +92,7 @@ def _forward(np.ndarray[ndim=2, dtype=np.float64_t] score, # temp_array += trans_score[i-1, k, :] if i == last_index: temp_array += final[k] - forward[i, k] = logsumexp(temp_array) + forward[i, k] = _logsumexp(temp_array) return forward @@ -94,6 +120,7 @@ def _backward(np.ndarray[ndim=2, dtype=np.float64_t] score, """ cdef np.ndarray[ndim=2, dtype=np.float64_t] backward + cdef np.ndarray[ndim=1, dtype=np.float64_t] temp_array cdef np.npy_intp i, j, k, m, n_samples, n_states, last_index if trans_score is not None: @@ -117,7 +144,7 @@ def _backward(np.ndarray[ndim=2, dtype=np.float64_t] score, if i == last_index-1: temp_array += final - backward[i, k] = logsumexp(temp_array) + backward[i, k] = _logsumexp(temp_array) return backward @@ -164,7 +191,7 @@ def _posterior(np.ndarray[ndim=2, dtype=np.float64_t] score, backward = _backward(score, trans_score, b_trans, init, final) # get log likelihood - ll = logsumexp(forward[n_samples-1, :]) + ll = _logsumexp(forward[n_samples-1, :]) # states posterior for i in range(n_samples): diff --git a/setup.py b/setup.py index 3d0c946..3b56ba6 100644 --- a/setup.py +++ b/setup.py @@ -27,7 +27,9 @@ Extension("seqlearn._decode.viterbi", ["seqlearn/_decode/viterbi.pyx"]), Extension("seqlearn._utils.ctrans", ["seqlearn/_utils/ctrans.pyx"]), Extension("seqlearn._utils.safeadd", ["seqlearn/_utils/safeadd.pyx"]), - Extension("seqlearn._inference.forward_backward", ["seqlearn/_inference/forward_backward.pyx"]), + Extension("seqlearn._inference.forward_backward", + ["seqlearn/_inference/forward_backward.pyx"], + libraries=["m"]), ], ) From 45e82a7c9e4ef93c87127c735946196ec136adc1 Mon Sep 17 00:00:00 2001 From: Lars Buitinck Date: Mon, 21 Apr 2014 12:52:39 +0200 Subject: [PATCH 6/8] ENH optimize forward-backward algorithm for LCCRF Training is about 5% faster. --- seqlearn/_inference/forward_backward.pyx | 30 +++++++++++++----------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/seqlearn/_inference/forward_backward.pyx b/seqlearn/_inference/forward_backward.pyx index 15fc6c1..be013c9 100644 --- a/seqlearn/_inference/forward_backward.pyx +++ b/seqlearn/_inference/forward_backward.pyx @@ -11,6 +11,8 @@ from libc.math cimport exp, log np.import_array() +@cython.boundscheck(False) +@cython.wraparound(False) cdef np.float64_t _logsumexp(np.ndarray[ndim=1, dtype=np.float64_t] arr): """ simple 1-D logsumexp function @@ -38,11 +40,11 @@ cdef np.float64_t _logsumexp(np.ndarray[ndim=1, dtype=np.float64_t] arr): @cython.boundscheck(False) @cython.wraparound(False) -def _forward(np.ndarray[ndim=2, dtype=np.float64_t] score, - np.ndarray[ndim=3, dtype=np.float64_t] trans_score, - np.ndarray[ndim=2, dtype=np.float64_t] b_trans, - np.ndarray[ndim=1, dtype=np.float64_t] init, - np.ndarray[ndim=1, dtype=np.float64_t] final): +cdef _forward(np.ndarray[ndim=2, dtype=np.float64_t] score, + np.ndarray[ndim=3, dtype=np.float64_t] trans_score, + np.ndarray[ndim=2, dtype=np.float64_t] b_trans, + np.ndarray[ndim=1, dtype=np.float64_t] init, + np.ndarray[ndim=1, dtype=np.float64_t] final): """ Forward Algorithm @@ -99,12 +101,11 @@ def _forward(np.ndarray[ndim=2, dtype=np.float64_t] score, @cython.boundscheck(False) @cython.wraparound(False) -def _backward(np.ndarray[ndim=2, dtype=np.float64_t] score, - np.ndarray[ndim=3, dtype=np.float64_t] trans_score, - np.ndarray[ndim=2, dtype=np.float64_t] b_trans, - np.ndarray[ndim=1, dtype=np.float64_t] init, - np.ndarray[ndim=1, dtype=np.float64_t] final): - +cdef _backward(np.ndarray[ndim=2, dtype=np.float64_t] score, + np.ndarray[ndim=3, dtype=np.float64_t] trans_score, + np.ndarray[ndim=2, dtype=np.float64_t] b_trans, + np.ndarray[ndim=1, dtype=np.float64_t] init, + np.ndarray[ndim=1, dtype=np.float64_t] final): """ Backward Algorithm (similar to forward Algorithm) @@ -197,7 +198,7 @@ def _posterior(np.ndarray[ndim=2, dtype=np.float64_t] score, for i in range(n_samples): for j in range(n_states): state_posterior[i, j] = forward[i, j] + backward[i, j] - ll - state_posterior = np.exp(state_posterior) + np.exp(state_posterior, out=state_posterior) # transition posterior for i in range(n_samples-1): @@ -207,8 +208,9 @@ def _posterior(np.ndarray[ndim=2, dtype=np.float64_t] score, # add final feature if i == n_samples-2: temp_trans_val += final[k] - # Note: get tranistion posterior from log scale and sum up from position 1 to (n_samples-1) - trans_posterior[j, k] += np.exp(temp_trans_val) + # Note: get transition posterior from log scale and sum up + # from position 1 to (n_samples-1) + trans_posterior[j, k] += exp(temp_trans_val) return state_posterior, trans_posterior, ll From 8011747c2e0c58f474b03bd962de7fe53425157d Mon Sep 17 00:00:00 2001 From: Chyi-Kwei Yau Date: Mon, 21 Apr 2014 21:19:51 -0400 Subject: [PATCH 7/8] only use safe_sparse_dot once in computing w_update --- seqlearn/_inference/forward_backward.pyx | 1 + seqlearn/crf.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/seqlearn/_inference/forward_backward.pyx b/seqlearn/_inference/forward_backward.pyx index be013c9..6016b5f 100644 --- a/seqlearn/_inference/forward_backward.pyx +++ b/seqlearn/_inference/forward_backward.pyx @@ -1,3 +1,4 @@ +# cython: profile=True # Author: Chyi-Kwei Yau """Forward-Backward algorithm for CRF training & posterior calculation""" diff --git a/seqlearn/crf.py b/seqlearn/crf.py index 00daafc..e93bfe9 100644 --- a/seqlearn/crf.py +++ b/seqlearn/crf.py @@ -159,7 +159,7 @@ def fit(self, X, y, lengths): it, sample_count, avg_obj_val)) # update feature w - w_update = lr * (safe_sparse_dot(y_t_i.T, X_i) - safe_sparse_dot(post_state.T, X_i) - (reg * w)) + w_update = safe_sparse_dot(lr * (y_t_i - post_state).T, X_i) - ((lr * reg) * w) # update init & final matrix b_init_update = lr * (post_state[0, :] - y_t_i[0] + reg * b_init) From 523ec03d69a38d473357a04ce8c32d303287aede Mon Sep 17 00:00:00 2001 From: Chyi-Kwei Yau Date: Mon, 21 Apr 2014 22:09:17 -0400 Subject: [PATCH 8/8] don't need to compute w_true when compute_obj_val=False --- seqlearn/crf.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/seqlearn/crf.py b/seqlearn/crf.py index e93bfe9..0ea2155 100644 --- a/seqlearn/crf.py +++ b/seqlearn/crf.py @@ -140,12 +140,11 @@ def fit(self, X, y, lengths): score = safe_sparse_dot(X_i, w.T) - w_true = safe_sparse_dot(y_t_i.T, X_i) - # posterior distribution for states & transtion post_state, post_trans, ll = _posterior(score, None, b_trans, b_init, b_final) if self.compute_obj_val: + w_true = safe_sparse_dot(y_t_i.T, X_i) feature_val = np.sum(w_true * w) trans_val = np.sum(t_trans * b_trans) init_val = np.sum(y_t_i[0] * b_init)