forked from erickrf/treernn
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrnn_dynamic_graph.py
375 lines (326 loc) · 12.5 KB
/
rnn_dynamic_graph.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
import os, sys, shutil, time, itertools
import math, random
from collections import OrderedDict
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
import tree as tr
from utils import Vocab
RESET_AFTER = 50
MODEL_STR = 'rnn_embed=%d_l2=%f_lr=%f.weights'
SAVE_DIR = './weights/'
class Config(object):
"""Holds model hyperparams and data information.
Model objects are passed a Config() object at instantiation.
"""
embed_size = 35
label_size = 2
early_stopping = 2
anneal_threshold = 0.99
anneal_by = 1.5
max_epochs = 30
lr = 0.01
l2 = 0.02
model_name = MODEL_STR % (embed_size, l2, lr)
class RNN_Model():
def load_data(self):
"""Loads train/dev/test data and builds vocabulary."""
self.train_data, self.dev_data, self.test_data = tr.simplified_data(
700, 100, 200)
# build vocab from training data
self.vocab = Vocab()
train_sents = [t.get_words() for t in self.train_data]
self.vocab.construct(list(itertools.chain.from_iterable(train_sents)))
def inference(self, tree, predict_only_root=False):
"""For a given tree build the RNN models computation graph up to where it
may be used for inference.
Args:
tree: a Tree object on which to build the computation graph for the
RNN
Returns:
softmax_linear: Output tensor with the computed logits.
"""
node_tensors = self.add_model(tree.root)
if predict_only_root:
node_tensors = node_tensors[tree.root]
else:
node_tensors = [tensor for node, tensor in node_tensors.iteritems()
if node.label != 2]
node_tensors = tf.concat(0, node_tensors)
return self.add_projections(node_tensors)
def add_model_vars(self):
'''
You model contains the following parameters:
embedding: tensor(vocab_size, embed_size)
W1: tensor(2* embed_size, embed_size)
b1: tensor(1, embed_size)
U: tensor(embed_size, output_size)
bs: tensor(1, output_size)
Hint: Add the tensorflow variables to the graph here and *reuse* them
while building
the compution graphs for composition and projection for each
tree
Hint: Use a variable_scope "Composition" for the composition layer, and
"Projection") for the linear transformations preceding the
softmax.
'''
with tf.variable_scope('Embeddings'):
tf.get_variable('embeddings', [len(self.vocab), self.config.embed_size])
with tf.variable_scope('Composition'):
tf.get_variable('W1',
[2 * self.config.embed_size, self.config.embed_size])
tf.get_variable('b1', [1, self.config.embed_size])
with tf.variable_scope('Projection'):
tf.get_variable('U', [self.config.embed_size, self.config.label_size])
tf.get_variable('bs', [1, self.config.label_size])
def embed_word(self, word):
with tf.variable_scope('Embeddings', reuse=True):
embeddings = tf.get_variable('embeddings')
with tf.device('/cpu:0'):
return tf.expand_dims(
tf.nn.embedding_lookup(embeddings, self.vocab.encode(word)), 0)
def add_model(self, node):
"""Recursively build the model to compute the phrase embeddings in the tree
Hint: Refer to tree.py and vocab.py before you start. Refer to
the model's vocab with self.vocab
Hint: Reuse the "Composition" variable_scope here
Hint: Store a node's vector representation in node.tensor so it can be
used by it's parent
Hint: If node is a leaf node, it's vector representation is just that of
the
word vector (see tf.gather()).
Args:
node: a Node object
Returns:
node_tensors: Dict: key = Node, value = tensor(1, embed_size)
"""
with tf.variable_scope('Composition', reuse=True):
W1 = tf.get_variable('W1')
b1 = tf.get_variable('b1')
node_tensors = OrderedDict()
curr_node_tensor = None
if node.isLeaf:
curr_node_tensor = self.embed_word(node.word)
else:
node_tensors.update(self.add_model(node.left))
node_tensors.update(self.add_model(node.right))
node_input = tf.concat(
1, [node_tensors[node.left], node_tensors[node.right]])
curr_node_tensor = tf.nn.relu(tf.matmul(node_input, W1) + b1)
node_tensors[node] = curr_node_tensor
return node_tensors
def add_projections(self, node_tensors):
"""Add projections to the composition vectors to compute the raw sentiment scores
Hint: Reuse the "Projection" variable_scope here
Args:
node_tensors: tensor(?, embed_size)
Returns:
output: tensor(?, label_size)
"""
with tf.variable_scope('Projection', reuse=True):
U = tf.get_variable('U')
bs = tf.get_variable('bs')
logits = tf.matmul(node_tensors, U) + bs
return logits
def loss(self, logits, labels):
"""Adds loss ops to the computational graph.
Hint: Use sparse_softmax_cross_entropy_with_logits
Hint: Remember to add l2_loss (see tf.nn.l2_loss)
Args:
logits: tensor(num_nodes, output_size)
labels: python list, len = num_nodes
Returns:
loss: tensor 0-D
"""
softmax_loss = tf.reduce_sum(
tf.nn.sparse_softmax_cross_entropy_with_logits(logits, tf.constant(
labels)))
with tf.variable_scope('Composition', reuse=True):
W1 = tf.get_variable('W1')
with tf.variable_scope('Projection', reuse=True):
U = tf.get_variable('U')
return softmax_loss + self.config.l2 * (tf.nn.l2_loss(W1) + tf.nn.l2_loss(U)
)
def training(self, loss_tensor):
"""Sets up the training Ops.
Creates an optimizer and applies the gradients to all trainable
variables.
The Op returned by this function is what must be passed to the
`sess.run()` call to cause the model to train. See
https://www.tensorflow.org/versions/r0.7/api_docs/python/train.html#Optimizer
for more information.
Hint: Use tf.train.GradientDescentOptimizer for this model.
Calling optimizer.minimize() will return a train_op object.
Args:
loss: tensor 0-D
Returns:
train_op: tensorflow op for training.
"""
return tf.train.GradientDescentOptimizer(self.config.lr).minimize(
loss_tensor)
def predictions(self, y):
"""Returns predictions from sparse scores
Args:
y: tensor(?, label_size)
Returns:
predictions: tensor(?)
"""
return tf.argmax(y, 1)
def __init__(self, config):
self.config = config
self.load_data()
def predict(self, trees, weights_path, get_loss=False):
"""Make predictions from the provided model."""
results = []
losses = []
for i in xrange(int(math.ceil(len(trees) / float(RESET_AFTER)))):
with tf.Graph().as_default(), tf.Session() as sess:
self.add_model_vars()
saver = tf.train.Saver()
saver.restore(sess, weights_path)
for tree in trees[i * RESET_AFTER:(i + 1) * RESET_AFTER]:
logits = self.inference(tree, True)
predictions = self.predictions(logits)
root_prediction = sess.run(predictions)[0]
if get_loss:
root_label = tree.root.label
loss = sess.run(self.loss(logits, [root_label]))
losses.append(loss)
results.append(root_prediction)
return results, losses
def run_epoch(self, new_model=False, verbose=True):
step = 0
loss_history = []
random.shuffle(self.train_data)
while step < len(self.train_data):
with tf.Graph().as_default(), tf.Session() as sess:
self.add_model_vars()
if new_model:
init = tf.initialize_all_variables()
sess.run(init)
new_model = False
else:
saver = tf.train.Saver()
saver.restore(sess, SAVE_DIR + '%s.temp' % self.config.model_name)
for _ in xrange(RESET_AFTER):
if step >= len(self.train_data):
break
tree = self.train_data[step]
logits = self.inference(tree)
labels = [l for l in tree.labels if l != 2]
loss_tensor = self.loss(logits, labels)
train_op = self.training(loss_tensor)
loss_value, _ = sess.run([loss_tensor, train_op])
loss_history.append(loss_value)
if verbose:
sys.stdout.write('\r{} / {} : loss = {}'.format(step, len(
self.train_data), np.mean(loss_history)))
sys.stdout.flush()
step += 1
saver = tf.train.Saver()
if not os.path.exists(SAVE_DIR):
os.makedirs(SAVE_DIR)
saver.save(sess, SAVE_DIR + '%s.temp' % self.config.model_name)
train_preds, _ = self.predict(self.train_data,
SAVE_DIR + '%s.temp' % self.config.model_name)
val_preds, val_losses = self.predict(
self.dev_data,
SAVE_DIR + '%s.temp' % self.config.model_name,
get_loss=True)
train_labels = [t.root.label for t in self.train_data]
val_labels = [t.root.label for t in self.dev_data]
train_acc = np.equal(train_preds, train_labels).mean()
val_acc = np.equal(val_preds, val_labels).mean()
print
print 'Training acc (only root node): {}'.format(train_acc)
print 'Valiation acc (only root node): {}'.format(val_acc)
print self.make_conf(train_labels, train_preds)
print self.make_conf(val_labels, val_preds)
return train_acc, val_acc, loss_history, np.mean(val_losses)
def train(self, verbose=True):
complete_loss_history = []
train_acc_history = []
val_acc_history = []
prev_epoch_loss = float('inf')
best_val_loss = float('inf')
best_val_epoch = 0
stopped = -1
for epoch in xrange(self.config.max_epochs):
print 'epoch %d' % epoch
if epoch == 0:
train_acc, val_acc, loss_history, val_loss = self.run_epoch(
new_model=True)
else:
train_acc, val_acc, loss_history, val_loss = self.run_epoch()
complete_loss_history.extend(loss_history)
train_acc_history.append(train_acc)
val_acc_history.append(val_acc)
#lr annealing
epoch_loss = np.mean(loss_history)
if epoch_loss > prev_epoch_loss * self.config.anneal_threshold:
self.config.lr /= self.config.anneal_by
print 'annealed lr to %f' % self.config.lr
prev_epoch_loss = epoch_loss
#save if model has improved on val
if val_loss < best_val_loss:
shutil.copyfile(SAVE_DIR + '%s.temp' % self.config.model_name,
SAVE_DIR + '%s' % self.config.model_name)
best_val_loss = val_loss
best_val_epoch = epoch
# if model has not imprvoved for a while stop
if epoch - best_val_epoch > self.config.early_stopping:
stopped = epoch
#break
if verbose:
sys.stdout.write('\r')
sys.stdout.flush()
print '\n\nstopped at %d\n' % stopped
return {
'loss_history': complete_loss_history,
'train_acc_history': train_acc_history,
'val_acc_history': val_acc_history,
}
def make_conf(self, labels, predictions):
confmat = np.zeros([2, 2])
for l, p in itertools.izip(labels, predictions):
confmat[l, p] += 1
return confmat
def plot_loss_history(stats):
plt.plot(stats['loss_history'])
plt.title('Loss history')
plt.xlabel('Iteration')
plt.ylabel('Loss')
plt.savefig('loss_history.png')
plt.show()
def test_RNN():
"""Test RNN model implementation.
You can use this function to test your implementation of the Named Entity
Recognition network. When debugging, set max_epochs in the Config object to
1
so you can rapidly iterate.
"""
config = Config()
model = RNN_Model(config)
start_time = time.time()
stats = model.train(verbose=True)
print 'Training time: {}'.format(time.time() - start_time)
plot_loss_history(stats)
start_time = time.time()
val_preds, val_losses = model.predict(
model.dev_data,
SAVE_DIR + '%s.temp' % model.config.model_name,
get_loss=True)
val_labels = [t.root.label for t in model.dev_data]
val_acc = np.equal(val_preds, val_labels).mean()
print val_acc
print 'Test'
print '=-=-='
predictions, _ = model.predict(model.test_data,
SAVE_DIR + '%s.temp' % model.config.model_name)
labels = [t.root.label for t in model.test_data]
print model.make_conf(labels, predictions)
test_acc = np.equal(predictions, labels).mean()
print 'Test acc: {}'.format(test_acc)
print 'Time to run inference on dev+test: {}'.format(time.time() - start_time)
if __name__ == '__main__':
test_RNN()