Merge pull request #646 from rsepassi/push

v1.5.5
tensorflow · Mar 10, 2018 · 8bdecbe · 8bdecbe
2 parents af82068 + 688f4d5
commit 8bdecbe
Show file tree

Hide file tree

Showing 32 changed files with 883 additions and 280 deletions.
diff --git a/.travis.yml b/.travis.yml
@@ -16,7 +16,7 @@ matrix:
     - python: "3.6"
       env: TF_VERSION="1.4.*"
     - python: "3.6"
-      env: TF_VERSION="1.6.*"
+      env: TF_VERSION="1.5.*"
 before_install:
   - echo "deb [arch=amd64] http://storage.googleapis.com/tensorflow-serving-apt stable tensorflow-model-server tensorflow-model-server-universal" | sudo tee /etc/apt/sources.list.d/tensorflow-serving.list
   - curl https://storage.googleapis.com/tensorflow-serving-apt/tensorflow-serving.release.pub.gpg | sudo apt-key add -

diff --git a/.github/ISSUE_TEMPLATE.md → ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md → ISSUE_TEMPLATE.md
@@ -6,7 +6,7 @@
 
 ### *TensorFlow* and *tensor2tensor* versions
 
-<!-- **Note** Run `pip list | grep tensor` to include TensorFlow and tensor2tensor versions -->
+<!-- **Note** Run `pip freeze | grep tensor` to get versions -->
 
 > …
 
@@ -16,7 +16,7 @@
 
 ### In case of bug report: Error log
 
-<!-- Please use code markdown to format output messages. -->
+<!-- Please use code markdown (```) to format output messages. -->
 <!-- See https://help.github.com/articles/creating-and-highlighting-code-blocks/ -->
 
 > …
diff --git a/README.md b/README.md
@@ -15,7 +15,7 @@ welcome](https://img.shields.io/badge/contributions-welcome-brightgreen.svg)](CO
 of deep learning models and datasets designed to make deep learning more
 accessible and [accelerate ML
 research](https://research.googleblog.com/2017/06/accelerating-deep-learning-research.html).
- is actively used and maintained by researchers and engineers within the
+T2T is actively used and maintained by researchers and engineers within the
 [Google Brain team](https://research.google.com/teams/brain/) and a community
 of users. We're eager to collaborate with you too, so feel free to
 [open an issue on GitHub](https://github.com/tensorflow/tensor2tensor/issues)

diff --git a/docs/cloud_tpu.md b/docs/cloud_tpu.md
@@ -18,6 +18,17 @@ See the official tutorial for [running Transfomer
 on Cloud TPUs](https://cloud.google.com/tpu/docs/tutorials/transformer)
 for some examples and try out your own problems.
 
+Image Transformer:
+* `imagetransformer` with `imagetransformer_base_tpu` (or
+    `imagetransformer_tiny_tpu`)
+* `img2img_transformer` with `img2img_transformer_base_tpu` (or
+    `img2img_transformer_tiny_tpu`)
+
+You can run the `ImageTransformer` model on problems like unconditional or
+conditional Image generation and `Img2ImgTransformer` model on Super Resolution.
+We run on datasets like CelebA, CIFAR and ImageNet but they should work with any
+other image dataset.
+
 Residual networks:
 * `resnet` with `resnet_50` (or `resnet_18` or `resnet_34`)
 * `revnet` with `revnet_104` (or `revnet_38_cifar`)

diff --git a/docs/walkthrough.md b/docs/walkthrough.md
@@ -15,7 +15,7 @@ welcome](https://img.shields.io/badge/contributions-welcome-brightgreen.svg)](CO
 of deep learning models and datasets designed to make deep learning more
 accessible and [accelerate ML
 research](https://research.googleblog.com/2017/06/accelerating-deep-learning-research.html).
- is actively used and maintained by researchers and engineers within the
+T2T is actively used and maintained by researchers and engineers within the
 [Google Brain team](https://research.google.com/teams/brain/) and a community
 of users. We're eager to collaborate with you too, so feel free to
 [open an issue on GitHub](https://github.com/tensorflow/tensor2tensor/issues)
@@ -154,7 +154,7 @@ For all translation problems, we suggest to try the Transformer model:
 this should reach a BLEU score of about 28 on the English-German data-set,
 which is close to state-of-the art. If training on a single GPU, try the
 `--hparams_set=transformer_base_single_gpu` setting. For very good results
-or larger data-sets (e.g., for English-French)m, try the big model
+or larger data-sets (e.g., for English-French), try the big model
 with `--hparams_set=transformer_big`.
 
 ## Basics

diff --git a/setup.py b/setup.py
@@ -5,7 +5,7 @@
 
 setup(
     name='tensor2tensor',
-    version='1.5.4',
+    version='1.5.5',
     description='Tensor2Tensor',
     author='Google Inc.',
     author_email='[email protected]',

diff --git a/tensor2tensor/data_generators/cifar.py b/tensor2tensor/data_generators/cifar.py
@@ -124,7 +124,8 @@ def preprocess_example(self, example, mode, unused_hparams):
     image.set_shape([_CIFAR10_IMAGE_SIZE, _CIFAR10_IMAGE_SIZE, 3])
     if mode == tf.estimator.ModeKeys.TRAIN:
       image = image_utils.cifar_image_augmentation(image)
-    image = tf.image.per_image_standardization(image)
+    if not self._was_reversed:
+      image = tf.image.per_image_standardization(image)
     example["inputs"] = image
     return example
 
@@ -151,7 +152,8 @@ class ImageCifar10Plain(ImageCifar10):
   def preprocess_example(self, example, mode, unused_hparams):
     image = example["inputs"]
     image.set_shape([_CIFAR10_IMAGE_SIZE, _CIFAR10_IMAGE_SIZE, 3])
-    image = tf.image.per_image_standardization(image)
+    if not self._was_reversed:
+      image = tf.image.per_image_standardization(image)
     example["inputs"] = image
     return example
 
@@ -179,7 +181,8 @@ def dataset_filename(self):
   def preprocess_example(self, example, mode, unused_hparams):
     image = example["inputs"]
     image = image_utils.resize_by_area(image, 8)
-    image = tf.image.per_image_standardization(image)
+    if not self._was_reversed:
+      image = tf.image.per_image_standardization(image)
     example["inputs"] = image
     return example
 
@@ -192,7 +195,6 @@ def dataset_filename(self):
     return "image_cifar10_plain"  # Reuse CIFAR-10 plain data.
 
   def preprocess_example(self, example, unused_mode, unused_hparams):
-
     inputs = example["inputs"]
     # For Img2Img resize input and output images as desired.
     example["inputs"] = image_utils.resize_by_area(inputs, 8)
@@ -330,7 +332,8 @@ def preprocess_example(self, example, mode, unused_hparams):
     image.set_shape([_CIFAR100_IMAGE_SIZE, _CIFAR100_IMAGE_SIZE, 3])
     if mode == tf.estimator.ModeKeys.TRAIN:
       image = image_utils.cifar_image_augmentation(image)
-    image = tf.image.per_image_standardization(image)
+    if not self._was_reversed:
+      image = tf.image.per_image_standardization(image)
     example["inputs"] = image
     return example
 
@@ -357,7 +360,8 @@ class ImageCifar100Plain(ImageCifar100):
   def preprocess_example(self, example, mode, unused_hparams):
     image = example["inputs"]
     image.set_shape([_CIFAR100_IMAGE_SIZE, _CIFAR100_IMAGE_SIZE, 3])
-    image = tf.image.per_image_standardization(image)
+    if not self._was_reversed:
+      image = tf.image.per_image_standardization(image)
     example["inputs"] = image
     return example
 
@@ -385,7 +389,8 @@ def dataset_filename(self):
   def preprocess_example(self, example, mode, unused_hparams):
     image = example["inputs"]
     image = image_utils.resize_by_area(image, 8)
-    image = tf.image.per_image_standardization(image)
+    if not self._was_reversed:
+      image = tf.image.per_image_standardization(image)
     example["inputs"] = image
     return example
 
@@ -398,7 +403,6 @@ def dataset_filename(self):
     return "image_cifar100_plain"  # Reuse CIFAR-100 plain data.
 
   def preprocess_example(self, example, unused_mode, unused_hparams):
-
     inputs = example["inputs"]
     # For Img2Img resize input and output images as desired.
     example["inputs"] = image_utils.resize_by_area(inputs, 8)

diff --git a/tensor2tensor/data_generators/gym.py b/tensor2tensor/data_generators/gym.py
@@ -35,6 +35,8 @@
 import tensorflow as tf
 
 
+
+
 flags = tf.flags
 FLAGS = flags.FLAGS
 
@@ -157,7 +159,6 @@ def num_steps(self):
     return 5000
 
 
-
 @registry.register_problem
 class GymPongTrajectoriesFromPolicy(GymDiscreteProblem):
   """Pong game, loaded actions."""
@@ -197,7 +198,7 @@ def generator(self, data_dir, tmp_dir):
         model_saver.restore(sess, FLAGS.model_path)
         for item in super(GymPongTrajectoriesFromPolicy,
                           self).generator(data_dir, tmp_dir):
-            yield item
+          yield item
 
   # TODO(blazej0): For training of atari agents wrappers are usually used.
   # Below we have a hacky solution which is a workaround to be used together

diff --git a/tensor2tensor/data_generators/image_utils.py b/tensor2tensor/data_generators/image_utils.py
@@ -26,6 +26,7 @@
 from tensor2tensor.data_generators import generator_utils
 from tensor2tensor.data_generators import problem
 from tensor2tensor.data_generators import text_encoder
+from tensor2tensor.utils import metrics
 from tensor2tensor.utils import registry
 
 import tensorflow as tf
@@ -64,9 +65,19 @@ def example_reading_spec(self, label_repr=None):
     return data_fields, data_items_to_decoders
 
   def preprocess_example(self, example, mode, hparams):
-    example["inputs"] = tf.image.per_image_standardization(example["inputs"])
+    if not self._was_reversed:
+      example["inputs"] = tf.image.per_image_standardization(example["inputs"])
     return example
 
+  def eval_metrics(self):
+    eval_metrics = [
+        metrics.Metrics.ACC, metrics.Metrics.ACC_TOP5,
+        metrics.Metrics.ACC_PER_SEQ, metrics.Metrics.NEG_LOG_PERPLEXITY
+    ]
+    if self._was_reversed:
+      eval_metrics += [metrics.Metrics.IMAGE_SUMMARY]
+    return eval_metrics
+
 
 class Image2ClassProblem(ImageProblem):
   """Base class for image classification problems."""

diff --git a/tensor2tensor/data_generators/imagenet.py b/tensor2tensor/data_generators/imagenet.py
@@ -334,7 +334,8 @@ def distorted_bounding_box_crop(image,
   Returns:
     (cropped image `Tensor`, distorted bbox `Tensor`).
   """
-  with tf.name_scope(scope, default_name="distorted_bounding_box_crop", values=[image, bbox]):
+  with tf.name_scope(scope, default_name="distorted_bounding_box_crop",
+                     values=[image, bbox]):
     # Each bounding box has shape [1, num_boxes, box coords] and
     # the coordinates are ordered [ymin, xmin, ymax, xmax].
 

diff --git a/tensor2tensor/data_generators/librispeech.py b/tensor2tensor/data_generators/librispeech.py
@@ -39,7 +39,7 @@
         "train-other-500"
     ],
 ]
-_LIBRISPEECH_TEST_DATASETS = [
+_LIBRISPEECH_DEV_DATASETS = [
     [
         "http://www.openslr.org/resources/12/dev-clean.tar.gz",
         "dev-clean"
@@ -49,6 +49,16 @@
         "dev-other"
     ],
 ]
+_LIBRISPEECH_TEST_DATASETS = [
+    [
+        "http://www.openslr.org/resources/12/test-clean.tar.gz",
+        "test-clean"
+    ],
+    [
+        "http://www.openslr.org/resources/12/test-other.tar.gz",
+        "test-other"
+    ],
+]
 
 
 def _collect_data(directory, input_ext, transcription_ext):
@@ -72,7 +82,7 @@ def _collect_data(directory, input_ext, transcription_ext):
           assert key not in data_files
           media_name = "%s.%s"%(media_base, input_ext)
           media_path = os.path.join(root, media_name)
-          data_files[key] = (media_path, label)
+          data_files[key] = (media_base, media_path, label)
   return data_files
 
 
@@ -82,7 +92,8 @@ class Librispeech(speech_recognition.SpeechRecognitionProblem):
 
   # Select only the clean data
   TRAIN_DATASETS = _LIBRISPEECH_TRAIN_DATASETS
-  DEV_DATASETS = _LIBRISPEECH_TEST_DATASETS
+  DEV_DATASETS = _LIBRISPEECH_DEV_DATASETS
+  TEST_DATASETS = _LIBRISPEECH_TEST_DATASETS
 
   @property
   def num_shards(self):
@@ -96,6 +107,10 @@ def use_subword_tokenizer(self):
   def num_dev_shards(self):
     return 1
 
+  @property
+  def num_test_shards(self):
+    return 1
+
   @property
   def use_train_shards_for_dev(self):
     """If true, we only generate training data and hold out shards for dev."""
@@ -127,20 +142,31 @@ def generator(self, data_dir, tmp_dir, datasets,
       audio_encoder = encoders["waveforms"]
       text_encoder = encoders["targets"]
 
-      for media_file, text_data in sorted(data_pairs)[start_from:]:
+      for utt_id, media_file, text_data in sorted(data_pairs)[start_from:]:
         if how_many > 0 and i == how_many:
           return
         i += 1
+        wav_data = audio_encoder.encode(media_file)
+        spk_id, unused_book_id, _ = utt_id.split("-")
         yield {
-            "waveforms": audio_encoder.encode(media_file),
-            "targets": text_encoder.encode(text_data)
+            "waveforms": wav_data,
+            "waveform_lens": [len(wav_data)],
+            "targets": text_encoder.encode(text_data),
+            "raw_transcript": [text_data],
+            "utt_id": [utt_id],
+            "spk_id": [spk_id],
         }
 
   def generate_data(self, data_dir, tmp_dir, task_id=-1):
     train_paths = self.training_filepaths(
         data_dir, self.num_shards, shuffled=False)
     dev_paths = self.dev_filepaths(
         data_dir, self.num_dev_shards, shuffled=False)
+    test_paths = self.test_filepaths(
+        data_dir, self.num_test_shards, shuffled=True)
+
+    generator_utils.generate_files(
+        self.generator(data_dir, tmp_dir, self.TEST_DATASETS), test_paths)
 
     if self.use_train_shards_for_dev:
       all_paths = train_paths + dev_paths
@@ -153,22 +179,51 @@ def generate_data(self, data_dir, tmp_dir, task_id=-1):
           self.generator(data_dir, tmp_dir, self.DEV_DATASETS), dev_paths)
 
 
+@registry.register_problem()
+class LibrispeechTrainFullTestClean(Librispeech):
+  """Problem to train on full 960h, but evaluate on clean data only."""
+
+  def training_filepaths(self, data_dir, num_shards, shuffled):
+    return Librispeech.training_filepaths(data_dir, num_shards, shuffled)
+
+  def dev_filepaths(self, data_dir, num_shards, shuffled):
+    return LibrispeechClean.dev_filepaths(data_dir, num_shards, shuffled)
+
+  def test_filepaths(self, data_dir, num_shards, shuffled):
+    return LibrispeechClean.test_filepaths(data_dir, num_shards, shuffled)
+
+  def generate_data(self, data_dir, tmp_dir, task_id=-1):
+    raise Exception("Generate librispeech and librispeech_clean data.")
+
+
 @registry.register_problem()
 class LibrispeechCleanSmall(Librispeech):
-  """Problem spec for Librispeech using 100h clean train data."""
+  """Problem spec for Librispeech using 100h clean train and clean eval data."""
 
   # Select only the clean data
   TRAIN_DATASETS = _LIBRISPEECH_TRAIN_DATASETS[:1]
-  DEV_DATASETS = _LIBRISPEECH_TEST_DATASETS[:1]
+  DEV_DATASETS = _LIBRISPEECH_DEV_DATASETS[:1]
+  TEST_DATASETS = _LIBRISPEECH_TEST_DATASETS[:1]
 
 
 @registry.register_problem()
 class LibrispeechClean(Librispeech):
-  """Problem spec for Librispeech using 460h clean train data."""
+  """Problem spec for Librispeech using 460h clean train and clean eval data."""
 
   # Select only the clean data
   TRAIN_DATASETS = _LIBRISPEECH_TRAIN_DATASETS[:2]
-  DEV_DATASETS = _LIBRISPEECH_TEST_DATASETS[:1]
+  DEV_DATASETS = _LIBRISPEECH_DEV_DATASETS[:1]
+  TEST_DATASETS = _LIBRISPEECH_TEST_DATASETS[:1]
+
+
+@registry.register_problem()
+class LibrispeechNoisy(Librispeech):
+  """Problem spec for Librispeech using 400h noisy train and noisy eval data."""
+
+  # Select only the clean data
+  TRAIN_DATASETS = _LIBRISPEECH_TRAIN_DATASETS[2:]
+  DEV_DATASETS = _LIBRISPEECH_DEV_DATASETS[1:]
+  TEST_DATASETS = _LIBRISPEECH_TEST_DATASETS[1:]
 
 
 # TODO(lukaszkaiser): clean up hparams or remove from here.

diff --git a/tensor2tensor/data_generators/mnist.py b/tensor2tensor/data_generators/mnist.py
@@ -162,7 +162,8 @@ def train_shards(self):
   def preprocess_example(self, example, mode, unused_hparams):
     image = example["inputs"]
     image.set_shape([_MNIST_IMAGE_SIZE, _MNIST_IMAGE_SIZE, 1])
-    image = tf.image.per_image_standardization(image)
+    if not self._was_reversed:
+      image = tf.image.per_image_standardization(image)
     example["inputs"] = image
     return example
 

diff --git a/tensor2tensor/data_generators/ptb.py b/tensor2tensor/data_generators/ptb.py
@@ -82,6 +82,10 @@ def _maybe_download_corpus(tmp_dir, vocab_type):
 
   Args:
     tmp_dir: directory containing dataset.
+    vocab_type: which vocabulary are we using.
+
+  Returns:
+    The list of names of files.
   """
   filename = os.path.basename(PTB_URL)
   compressed_filepath = generator_utils.maybe_download(