No public description

PiperOrigin-RevId: 714957630
tensorflow · Jan 13, 2025 · 65339fa · 65339fa
1 parent 41cc801
commit 65339fa
Show file tree

Hide file tree

Showing 2 changed files with 28 additions and 8 deletions.
diff --git a/official/vision/dataloaders/segmentation_input.py b/official/vision/dataloaders/segmentation_input.py
@@ -157,9 +157,14 @@ def _prepare_image_and_label(self, data):
         dtype=tf.uint8,
     )
     image = tf.reshape(image, (height, width, self._image_feature.num_channels))
-    # Normalizes the image feature with mean and std values, which are divided
-    # by 255 because an uint8 image are re-scaled automatically. Images other
-    # than uint8 type will be wrongly normalized.
+    # Normalizes the image feature.
+    # The mean and stddev values are divided by 255 to ensure correct
+    # normalization, as the input `uint8` image is automatically converted to
+    # `float32` and rescaled to values in the range [0, 1] before the
+    # normalization happens (as a pre-processing step). So, we re-scale the
+    # mean and stddev values to the range [0, 1] beforehand.
+    # See `preprocess_ops.normalize_image` for details on the expected ranges
+    # for the image mean (`offset`) and stddev (`scale`).
     image = preprocess_ops.normalize_image(
         image,
         [mean / 255.0 for mean in self._image_feature.mean],

diff --git a/official/vision/ops/preprocess_ops.py b/official/vision/ops/preprocess_ops.py
@@ -82,13 +82,28 @@ def normalize_image(
 ) -> tf.Tensor:
   """Normalizes the image to zero mean and unit variance.
 
-  If the input image dtype is float, it is expected to either have values in
-  [0, 1) and offset is MEAN_NORM, or have values in [0, 255] and offset is
-  MEAN_RGB.
+  This function normalizes the input image by subtracting the `offset`
+  and dividing by the `scale`.
+
+  **Important Note about Input Types and Normalization:**
+
+  * **Integer Images:** If the input `image` is an integer type (e.g., `uint8`),
+    the provided `offset` and `scale` values should be already **normalized**
+    to the range [0, 1]. This is because the function converts integer images to
+    float32 with values in the range [0, 1] before the normalization happens.
+
+  * **Float Images:** If the input `image` is a float type (e.g., `float32`),
+    the `offset` and `scale` values should be in the **same range** as the
+    image data.
+      - If the image has values in [0, 1], the `offset` and `scale` should
+        also be in [0, 1].
+      - If the image has values in [0, 255], the `offset` and `scale` should
+        also be in [0, 255].
 
   Args:
-    image: A tf.Tensor in either (1) float dtype with values in range [0, 1) or
-      [0, 255], or (2) int type with values in range [0, 255].
+    image: A `tf.Tensor` in either:
+           (1) float dtype with values in range [0, 1) or [0, 255], or
+           (2) int type with values in range [0, 255].
     offset: A tuple of mean values to be subtracted from the image.
     scale: A tuple of normalization factors.