Merge pull request #75 from rshin/master

Add image augmentation for CIFAR-10
tensorflow · Jun 30, 2017 · aae9966 · aae9966
2 parents f95b7c9 + 24571fb
commit aae9966
Show file tree

Hide file tree

Showing 2 changed files with 17 additions and 0 deletions.
diff --git a/tensor2tensor/models/common_layers.py b/tensor2tensor/models/common_layers.py
@@ -132,6 +132,17 @@ def image_augmentation(images, do_colors=False):
   return images
 
 
+def cifar_image_augmentation(images):
+  """Image augmentation suitable for CIFAR-10/100.
+
+  As described in https://arxiv.org/pdf/1608.06993v3.pdf (page 5)."""
+  images = tf.image.resize_image_with_crop_or_pad(
+      images, 40, 40)
+  images = tf.random_crop(images, [32, 32, 3])
+  images = tf.image.random_flip_left_right(images)
+  return images
+
+
 def flatten4d3d(x):
   """Flatten a 4d-tensor into a 3d-tensor by joining width and height."""
   xshape = tf.shape(x)

diff --git a/tensor2tensor/utils/data_reader.py b/tensor2tensor/utils/data_reader.py
@@ -203,6 +203,12 @@ def preprocess(img):
             lambda img=inputs: resize(img))
       else:
         examples["inputs"] = tf.to_int64(resize(inputs))
+
+    elif ("image_cifar10" in data_file_pattern
+        and mode == tf.contrib.learn.ModeKeys.TRAIN):
+      examples["inputs"] = common_layers.cifar_image_augmentation(
+          examples["inputs"])
+
   elif "audio" in data_file_pattern:
     # Reshape audio to proper shape
     sample_count = tf.to_int32(examples.pop("audio/sample_count"))