From 65339fa1e660773a4e0a8c303afda819c12212c9 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 13 Jan 2025 07:18:51 -0800 Subject: [PATCH] No public description PiperOrigin-RevId: 714957630 --- .../vision/dataloaders/segmentation_input.py | 11 +++++--- official/vision/ops/preprocess_ops.py | 25 +++++++++++++++---- 2 files changed, 28 insertions(+), 8 deletions(-) diff --git a/official/vision/dataloaders/segmentation_input.py b/official/vision/dataloaders/segmentation_input.py index 472855f0900..f91dbc4a77a 100644 --- a/official/vision/dataloaders/segmentation_input.py +++ b/official/vision/dataloaders/segmentation_input.py @@ -157,9 +157,14 @@ def _prepare_image_and_label(self, data): dtype=tf.uint8, ) image = tf.reshape(image, (height, width, self._image_feature.num_channels)) - # Normalizes the image feature with mean and std values, which are divided - # by 255 because an uint8 image are re-scaled automatically. Images other - # than uint8 type will be wrongly normalized. + # Normalizes the image feature. + # The mean and stddev values are divided by 255 to ensure correct + # normalization, as the input `uint8` image is automatically converted to + # `float32` and rescaled to values in the range [0, 1] before the + # normalization happens (as a pre-processing step). So, we re-scale the + # mean and stddev values to the range [0, 1] beforehand. + # See `preprocess_ops.normalize_image` for details on the expected ranges + # for the image mean (`offset`) and stddev (`scale`). image = preprocess_ops.normalize_image( image, [mean / 255.0 for mean in self._image_feature.mean], diff --git a/official/vision/ops/preprocess_ops.py b/official/vision/ops/preprocess_ops.py index a409ddefcbf..59e05500700 100644 --- a/official/vision/ops/preprocess_ops.py +++ b/official/vision/ops/preprocess_ops.py @@ -82,13 +82,28 @@ def normalize_image( ) -> tf.Tensor: """Normalizes the image to zero mean and unit variance. - If the input image dtype is float, it is expected to either have values in - [0, 1) and offset is MEAN_NORM, or have values in [0, 255] and offset is - MEAN_RGB. + This function normalizes the input image by subtracting the `offset` + and dividing by the `scale`. + + **Important Note about Input Types and Normalization:** + + * **Integer Images:** If the input `image` is an integer type (e.g., `uint8`), + the provided `offset` and `scale` values should be already **normalized** + to the range [0, 1]. This is because the function converts integer images to + float32 with values in the range [0, 1] before the normalization happens. + + * **Float Images:** If the input `image` is a float type (e.g., `float32`), + the `offset` and `scale` values should be in the **same range** as the + image data. + - If the image has values in [0, 1], the `offset` and `scale` should + also be in [0, 1]. + - If the image has values in [0, 255], the `offset` and `scale` should + also be in [0, 255]. Args: - image: A tf.Tensor in either (1) float dtype with values in range [0, 1) or - [0, 255], or (2) int type with values in range [0, 255]. + image: A `tf.Tensor` in either: + (1) float dtype with values in range [0, 1) or [0, 255], or + (2) int type with values in range [0, 255]. offset: A tuple of mean values to be subtracted from the image. scale: A tuple of normalization factors.