Add VectorizedBaseImageAugmentation layer (#1373)

LukeWood · web-flow · commit 08ca8d6697e1 · 2023-02-08T18:08:28.000-08:00
* implement vectorized base image augmentation layer

* Implement vectorized RandomContrast layer

* KPL performance

* Random contrast vectorized

* Vectorized contrast

* Fix vectorized base layer

* Fix vectorized base layer

* Add vectorized grayscale layer

* Remove random contrast

* Remove random contrast

* test_preserves_ragged_status_Grayscale

* test_preserves_ragged_status_Grayscale

* Fix

* Fix masks

* Fix masks

* rename to 'batched'

* Fix docstrings

* Fix docstrings

* Remove ragged method

* Begin ragged image support

* Begin ragged image support

* Begin ragged image support

* Begin ragged image support

* Begin ragged image support

* Performance benchmark

* Reformat

* Vectorized grayscale

* Fix ragged test case

* Fix ragged test case

* Fix ragged test case

* Fix ragged test case

* Fix ragged test case
diff --git a/benchmarks/vectorized_grayscale.py b/benchmarks/vectorized_grayscale.py
@@ -0,0 +1,178 @@
+# Copyright 2023 The KerasCV Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import time
+
+import matplotlib.pyplot as plt
+import tensorflow as tf
+import tensorflow.keras as keras
+
+from keras_cv.layers import Grayscale
+from keras_cv.layers.preprocessing.base_image_augmentation_layer import (
+    BaseImageAugmentationLayer,
+)
+
+
+class OldGrayscale(BaseImageAugmentationLayer):
+    """Grayscale is a preprocessing layer that transforms RGB images to Grayscale images.
+    Input images should have values in the range of [0, 255].
+    Input shape:
+        3D (unbatched) or 4D (batched) tensor with shape:
+        `(..., height, width, channels)`, in `"channels_last"` format
+    Output shape:
+        3D (unbatched) or 4D (batched) tensor with shape:
+        `(..., height, width, channels)`, in `"channels_last"` format
+    Args:
+        output_channels.
+            Number color channels present in the output image.
+            The output_channels can be 1 or 3. RGB image with shape
+            (..., height, width, 3) will have the following shapes
+            after the `Grayscale` operation:
+                 a. (..., height, width, 1) if output_channels = 1
+                 b. (..., height, width, 3) if output_channels = 3.
+    Usage:
+    ```python
+    (images, labels), _ = tf.keras.datasets.cifar10.load_data()
+    to_grayscale = keras_cv.layers.preprocessing.Grayscale()
+    augmented_images = to_grayscale(images)
+    ```
+    """
+
+    def __init__(self, output_channels=1, **kwargs):
+        super().__init__(**kwargs)
+        self.output_channels = output_channels
+        # This layer may raise an error when running on GPU using auto_vectorize
+        self.auto_vectorize = False
+
+    def compute_image_signature(self, images):
+        # required because of the `output_channels` argument
+        if isinstance(images, tf.RaggedTensor):
+            ragged_spec = tf.RaggedTensorSpec(
+                shape=images.shape[1:3] + [self.output_channels],
+                ragged_rank=1,
+                dtype=self.compute_dtype,
+            )
+            return ragged_spec
+        return tf.TensorSpec(
+            images.shape[1:3] + [self.output_channels], self.compute_dtype
+        )
+
+    def _check_input_params(self, output_channels):
+        if output_channels not in [1, 3]:
+            raise ValueError(
+                "Received invalid argument output_channels. "
+                f"output_channels must be in 1 or 3. Got {output_channels}"
+            )
+        self.output_channels = output_channels
+
+    def augment_image(self, image, transformation=None, **kwargs):
+        grayscale = tf.image.rgb_to_grayscale(image)
+        if self.output_channels == 1:
+            return grayscale
+        elif self.output_channels == 3:
+            return tf.image.grayscale_to_rgb(grayscale)
+        else:
+            raise ValueError("Unsupported value for `output_channels`.")
+
+    def augment_bounding_boxes(self, bounding_boxes, **kwargs):
+        return bounding_boxes
+
+    def augment_label(self, label, transformation=None, **kwargs):
+        return label
+
+    def augment_segmentation_mask(self, segmentation_mask, transformation, **kwargs):
+        return segmentation_mask
+
+    def get_config(self):
+        config = {
+            "output_channels": self.output_channels,
+        }
+        base_config = super().get_config()
+        return dict(list(base_config.items()) + list(config.items()))
+
+
+(x_train, _), _ = keras.datasets.cifar10.load_data()
+x_train = x_train.astype(float)
+
+x_train.shape
+
+
+images = []
+
+num_images = [1000, 2000, 5000, 10000]
+
+results = {}
+
+for aug in [Grayscale, OldGrayscale]:
+    c = aug.__name__
+
+    layer = aug()
+
+    runtimes = []
+    print(f"Timing {c}")
+
+    for n_images in num_images:
+        # warmup
+        layer(x_train[:n_images])
+
+        t0 = time.time()
+        r1 = layer(x_train[:n_images])
+        t1 = time.time()
+        runtimes.append(t1 - t0)
+        print(f"Runtime for {c}, n_images={n_images}: {t1-t0}")
+
+    results[c] = runtimes
+
+    c = aug.__name__ + " Graph Mode"
+
+    layer = aug()
+
+    @tf.function()
+    def apply_aug(inputs):
+        return layer(inputs)
+
+    runtimes = []
+    print(f"Timing {c}")
+
+    for n_images in num_images:
+        # warmup
+        apply_aug(x_train[:n_images])
+
+        t0 = time.time()
+        r1 = apply_aug(x_train[:n_images])
+        t1 = time.time()
+        runtimes.append(t1 - t0)
+        print(f"Runtime for {c}, n_images={n_images}: {t1-t0}")
+
+    results[c] = runtimes
+
+plt.figure()
+for key in results:
+    plt.plot(num_images, results[key], label=key)
+    plt.xlabel("Number images")
+
+plt.ylabel("Runtime (seconds)")
+plt.legend()
+plt.show()
+
+# So we can actually see more relevant margins
+del results["OldGrayscale"]
+
+plt.figure()
+for key in results:
+    plt.plot(num_images, results[key], label=key)
+    plt.xlabel("Number images")
+
+plt.ylabel("Runtime (seconds)")
+plt.legend()
+plt.show()
diff --git a/keras_cv/layers/preprocessing/grayscale.py b/keras_cv/layers/preprocessing/grayscale.py
@@ -14,13 +14,13 @@
 
 import tensorflow as tf
 
-from keras_cv.layers.preprocessing.base_image_augmentation_layer import (
-    BaseImageAugmentationLayer,
+from keras_cv.layers.preprocessing.vectorized_base_image_augmentation_layer import (
+    VectorizedBaseImageAugmentationLayer,
 )
 
 
 @tf.keras.utils.register_keras_serializable(package="keras_cv")
-class Grayscale(BaseImageAugmentationLayer):
+class Grayscale(VectorizedBaseImageAugmentationLayer):
     """Grayscale is a preprocessing layer that transforms RGB images to Grayscale images.
     Input images should have values in the range of [0, 255].
 
@@ -50,21 +50,7 @@ class Grayscale(BaseImageAugmentationLayer):
     def __init__(self, output_channels=1, **kwargs):
         super().__init__(**kwargs)
         self.output_channels = output_channels
-        # This layer may raise an error when running on GPU using auto_vectorize
-        self.auto_vectorize = False
-
-    def compute_image_signature(self, images):
-        # required because of the `output_channels` argument
-        if isinstance(images, tf.RaggedTensor):
-            ragged_spec = tf.RaggedTensorSpec(
-                shape=images.shape[1:3] + [self.output_channels],
-                ragged_rank=1,
-                dtype=self.compute_dtype,
-            )
-            return ragged_spec
-        return tf.TensorSpec(
-            images.shape[1:3] + [self.output_channels], self.compute_dtype
-        )
+        self._check_input_params(output_channels)
 
     def _check_input_params(self, output_channels):
         if output_channels not in [1, 3]:
@@ -74,8 +60,19 @@ def _check_input_params(self, output_channels):
             )
         self.output_channels = output_channels
 
-    def augment_image(self, image, transformation=None, **kwargs):
-        grayscale = tf.image.rgb_to_grayscale(image)
+    def compute_ragged_image_signature(self, images):
+        ragged_spec = tf.RaggedTensorSpec(
+            shape=images.shape[1:3] + (self.output_channels,),
+            ragged_rank=1,
+            dtype=self.compute_dtype,
+        )
+        return ragged_spec
+
+    def augment_ragged_image(self, image, transformation, **kwargs):
+        return self.augment_images(image, transformations=transformation, **kwargs)
+
+    def augment_images(self, images, transformations=None, **kwargs):
+        grayscale = tf.image.rgb_to_grayscale(images)
         if self.output_channels == 1:
             return grayscale
         elif self.output_channels == 3:
@@ -86,11 +83,11 @@ def augment_image(self, image, transformation=None, **kwargs):
     def augment_bounding_boxes(self, bounding_boxes, **kwargs):
         return bounding_boxes
 
-    def augment_label(self, label, transformation=None, **kwargs):
-        return label
+    def augment_labels(self, labels, transformations=None, **kwargs):
+        return labels
 
-    def augment_segmentation_mask(self, segmentation_mask, transformation, **kwargs):
-        return segmentation_mask
+    def augment_segmentation_masks(self, segmentation_masks, transformations, **kwargs):
+        return segmentation_masks
 
     def get_config(self):
         config = {
diff --git a/keras_cv/layers/preprocessing/grayscale_test.py b/keras_cv/layers/preprocessing/grayscale_test.py
@@ -18,7 +18,7 @@
 
 class GrayscaleTest(tf.test.TestCase):
     def test_return_shapes(self):
-        xs = tf.ones((2, 512, 512, 3))
+        xs = tf.ones((2, 52, 24, 3))
 
         layer = preprocessing.Grayscale(
             output_channels=1,
@@ -30,12 +30,12 @@ def test_return_shapes(self):
         )
         xs2 = layer(xs, training=True)
 
-        self.assertEqual(xs1.shape, [2, 512, 512, 1])
-        self.assertEqual(xs2.shape, [2, 512, 512, 3])
+        self.assertEqual(xs1.shape, [2, 52, 24, 1])
+        self.assertEqual(xs2.shape, [2, 52, 24, 3])
 
     def test_in_tf_function(self):
         xs = tf.cast(
-            tf.stack([2 * tf.ones((100, 100, 3)), tf.ones((100, 100, 3))], axis=0),
+            tf.stack([2 * tf.ones((10, 10, 3)), tf.ones((10, 10, 3))], axis=0),
             tf.float32,
         )
 
@@ -61,12 +61,12 @@ def augment(x):
 
         xs2 = augment(xs)
 
-        self.assertEqual(xs1.shape, [2, 100, 100, 1])
-        self.assertEqual(xs2.shape, [2, 100, 100, 3])
+        self.assertEqual(xs1.shape, [2, 10, 10, 1])
+        self.assertEqual(xs2.shape, [2, 10, 10, 3])
 
     def test_non_square_image(self):
         xs = tf.cast(
-            tf.stack([2 * tf.ones((512, 1024, 3)), tf.ones((512, 1024, 3))], axis=0),
+            tf.stack([2 * tf.ones((52, 24, 3)), tf.ones((52, 24, 3))], axis=0),
             tf.float32,
         )
 
@@ -80,12 +80,12 @@ def test_non_square_image(self):
         )
         xs2 = layer(xs, training=True)
 
-        self.assertEqual(xs1.shape, [2, 512, 1024, 1])
-        self.assertEqual(xs2.shape, [2, 512, 1024, 3])
+        self.assertEqual(xs1.shape, [2, 52, 24, 1])
+        self.assertEqual(xs2.shape, [2, 52, 24, 3])
 
     def test_in_single_image(self):
         xs = tf.cast(
-            tf.ones((512, 512, 3)),
+            tf.ones((52, 24, 3)),
             dtype=tf.float32,
         )
 
@@ -99,5 +99,5 @@ def test_in_single_image(self):
         )
         xs2 = layer(xs, training=True)
 
-        self.assertEqual(xs1.shape, [512, 512, 1])
-        self.assertEqual(xs2.shape, [512, 512, 3])
+        self.assertEqual(xs1.shape, [52, 24, 1])
+        self.assertEqual(xs2.shape, [52, 24, 3])
diff --git a/keras_cv/layers/preprocessing/ragged_image_test.py b/keras_cv/layers/preprocessing/ragged_image_test.py
@@ -126,8 +126,8 @@ def test_preserves_ragged_status(self, layer_cls, init_args):
         layer = layer_cls(**init_args)
         inputs = tf.ragged.stack(
             [
-                tf.ones((512, 512, 3)),
-                tf.ones((600, 300, 3)),
+                tf.ones((5, 5, 3)),
+                tf.ones((8, 8, 3)),
             ]
         )
         outputs = layer(inputs)
@@ -138,8 +138,8 @@ def test_converts_ragged_to_dense(self, layer_cls, init_args):
         layer = layer_cls(**init_args)
         inputs = tf.ragged.stack(
             [
-                tf.ones((512, 512, 3)),
-                tf.ones((600, 300, 3)),
+                tf.ones((5, 5, 3)),
+                tf.ones((8, 8, 3)),
             ]
         )
         outputs = layer(inputs)
diff --git a/keras_cv/layers/preprocessing/vectorized_base_image_augmentation_layer.py b/keras_cv/layers/preprocessing/vectorized_base_image_augmentation_layer.py
diff --git a/keras_cv/layers/preprocessing/vectorized_base_image_augmentation_layer_test.py b/keras_cv/layers/preprocessing/vectorized_base_image_augmentation_layer_test.py

Original file line number	Diff line number	Diff line change
`@@ -126,8 +126,8 @@ def test_preserves_ragged_status(self, layer_cls, init_args):`
`126`	`126`	`layer = layer_cls(**init_args)`
`127`	`127`	`inputs = tf.ragged.stack(`
`128`	`128`	`[`
`129`		`- tf.ones((512, 512, 3)),`
`130`		`- tf.ones((600, 300, 3)),`
	`129`	`+ tf.ones((5, 5, 3)),`
	`130`	`+ tf.ones((8, 8, 3)),`
`131`	`131`	`]`
`132`	`132`	`)`
`133`	`133`	`outputs = layer(inputs)`
`@@ -138,8 +138,8 @@ def test_converts_ragged_to_dense(self, layer_cls, init_args):`
`138`	`138`	`layer = layer_cls(**init_args)`
`139`	`139`	`inputs = tf.ragged.stack(`
`140`	`140`	`[`
`141`		`- tf.ones((512, 512, 3)),`
`142`		`- tf.ones((600, 300, 3)),`
	`141`	`+ tf.ones((5, 5, 3)),`
	`142`	`+ tf.ones((8, 8, 3)),`
`143`	`143`	`]`
`144`	`144`	`)`
`145`	`145`	`outputs = layer(inputs)`