Skip to content
This repository has been archived by the owner on Jul 7, 2023. It is now read-only.

Commit

Permalink
merge from github
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 160671813
  • Loading branch information
Lukasz Kaiser committed Jul 1, 2017
1 parent 302d0ca commit f3e5859
Show file tree
Hide file tree
Showing 3 changed files with 40 additions and 14 deletions.
9 changes: 8 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,9 +1,16 @@
# Compiled python modules.
*.pyc

# Byte-compiled
_pycache__/

# Python egg metadata, regenerated from source files by setuptools.
/*.egg-info

# PyPI distribution artificats
# PyPI distribution artifacts.
build/
dist/

# Sublime project files
*.sublime-project
*.sublime-workspace
11 changes: 7 additions & 4 deletions tensor2tensor/data_generators/text_encoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -310,21 +310,24 @@ def build_to_target_size(cls,
tf.logging.info("Alphabet contains %d characters" % len(alphabet_set))

def bisect(min_val, max_val):
"""Bisection to find the right size."""
present_count = (max_val + min_val) // 2
tf.logging.info("Trying min_count %d" % present_count)
subtokenizer = cls()
subtokenizer.build_from_token_counts(token_counts, alphabet_set,
present_count, num_iterations)
if min_val >= max_val or subtokenizer.vocab_size == target_size:
return subtokenizer

if subtokenizer.vocab_size > target_size:
other_subtokenizer = bisect(present_count + 1, max_val)
else:
other_subtokenizer = bisect(min_val, present_count - 1)
if (abs(other_subtokenizer.vocab_size - target_size) <
abs(subtokenizer.vocab_size - target_size)):
return other_subtokenizer
return subtokenizer

if (abs(other_subtokenizer.vocab_size - target_size) <
abs(subtokenizer.vocab_size - target_size)):
return other_subtokenizer
return subtokenizer

return bisect(min_val, max_val)

Expand Down
34 changes: 25 additions & 9 deletions tensor2tensor/models/bluenet.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,8 @@ def run_binary_modules(modules, cur1, cur2, hparams):
"""Run binary modules."""
selection_var = tf.get_variable("selection", [len(modules)],
initializer=tf.zeros_initializer())
inv_t = 100.0 * common_layers.inverse_exp_decay(100000, min_value=0.01)
inv_t = 100.0 * common_layers.inverse_exp_decay(
hparams.anneal_until, min_value=0.01)
selected_weights = tf.nn.softmax(selection_var * inv_t)
all_res = [modules[n](cur1, cur2, hparams) for n in xrange(len(modules))]
all_res = tf.concat([tf.expand_dims(r, axis=0) for r in all_res], axis=0)
Expand All @@ -89,7 +90,8 @@ def run_unary_modules_basic(modules, cur, hparams):
"""Run unary modules."""
selection_var = tf.get_variable("selection", [len(modules)],
initializer=tf.zeros_initializer())
inv_t = 100.0 * common_layers.inverse_exp_decay(100000, min_value=0.01)
inv_t = 100.0 * common_layers.inverse_exp_decay(
hparams.anneal_until, min_value=0.01)
selected_weights = tf.nn.softmax(selection_var * inv_t)
all_res = [modules[n](cur, hparams) for n in xrange(len(modules))]
all_res = tf.concat([tf.expand_dims(r, axis=0) for r in all_res], axis=0)
Expand All @@ -109,7 +111,8 @@ def run_unary_modules_sample(modules, cur, hparams, k):
lambda: tf.zeros_like(cur),
lambda i=n: modules[i](cur, hparams))
for n in xrange(len(modules))]
inv_t = 100.0 * common_layers.inverse_exp_decay(100000, min_value=0.01)
inv_t = 100.0 * common_layers.inverse_exp_decay(
hparams.anneal_until, min_value=0.01)
selected_weights = tf.nn.softmax(selection_var * inv_t - 1e9 * (1.0 - to_run))
all_res = tf.concat([tf.expand_dims(r, axis=0) for r in all_res], axis=0)
res = all_res * tf.reshape(selected_weights, [-1, 1, 1, 1, 1])
Expand All @@ -122,6 +125,14 @@ def run_unary_modules(modules, cur, hparams):
return run_unary_modules_sample(modules, cur, hparams, 4)


def batch_deviation(x):
"""Average deviation of the batch."""
x_mean = tf.reduce_mean(x, axis=[0], keep_dims=True)
x_variance = tf.reduce_mean(
tf.square(x - x_mean), axis=[0], keep_dims=True)
return tf.reduce_mean(tf.sqrt(x_variance))


@registry.register_model
class BlueNet(t2t_model.T2TModel):

Expand Down Expand Up @@ -153,14 +164,15 @@ def run_unary(x, name):
with tf.variable_scope("conv"):
x = run_unary_modules(conv_modules, x, hparams)
x.set_shape(x_shape)
return x
return tf.nn.dropout(x, 1.0 - hparams.dropout), batch_deviation(x)

cur1, cur2 = inputs, inputs
cur1, cur2, extra_loss = inputs, inputs, 0.0
cur_shape = inputs.get_shape()
for i in xrange(hparams.num_hidden_layers):
with tf.variable_scope("layer_%d" % i):
cur1 = run_unary(cur1, "unary1")
cur2 = run_unary(cur2, "unary2")
cur1, loss1 = run_unary(cur1, "unary1")
cur2, loss2 = run_unary(cur2, "unary2")
extra_loss += (loss1 + loss2) / float(hparams.num_hidden_layers)
with tf.variable_scope("binary1"):
next1 = run_binary_modules(binary_modules, cur1, cur2, hparams)
next1.set_shape(cur_shape)
Expand All @@ -169,7 +181,9 @@ def run_unary(x, name):
next2.set_shape(cur_shape)
cur1, cur2 = next1, next2

return cur1
anneal = common_layers.inverse_exp_decay(hparams.anneal_until)
extra_loss *= hparams.batch_deviation_loss_factor * anneal
return cur1, extra_loss


@registry.register_hparams
Expand All @@ -185,7 +199,7 @@ def bluenet_base():
hparams.num_hidden_layers = 8
hparams.kernel_height = 3
hparams.kernel_width = 3
hparams.learning_rate_decay_scheme = "exp50k"
hparams.learning_rate_decay_scheme = "exp10k"
hparams.learning_rate = 0.05
hparams.learning_rate_warmup_steps = 3000
hparams.initializer_gain = 1.0
Expand All @@ -196,6 +210,8 @@ def bluenet_base():
hparams.optimizer_adam_beta1 = 0.85
hparams.optimizer_adam_beta2 = 0.997
hparams.add_hparam("imagenet_use_2d", True)
hparams.add_hparam("anneal_until", 40000)
hparams.add_hparam("batch_deviation_loss_factor", 0.001)
return hparams


Expand Down

0 comments on commit f3e5859

Please sign in to comment.