From 420fa87d039425a906b7f755e4562b65947f016a Mon Sep 17 00:00:00 2001 From: Ekaterina Aidova Date: Fri, 20 Dec 2024 14:54:03 +0400 Subject: [PATCH] support any input resolution in stable diffusion models (#1087) * support any input resolution in stable diffusion models * Update optimum/exporters/openvino/model_configs.py --- optimum/exporters/openvino/model_configs.py | 17 ++++++++++ tests/openvino/test_diffusion.py | 37 +++++++++++++++++++++ 2 files changed, 54 insertions(+) diff --git a/optimum/exporters/openvino/model_configs.py b/optimum/exporters/openvino/model_configs.py index 802cd02418..02a8c300a8 100644 --- a/optimum/exporters/openvino/model_configs.py +++ b/optimum/exporters/openvino/model_configs.py @@ -1783,6 +1783,23 @@ def generate(self, input_name: str, framework: str = "pt", int_dtype: str = "int return super().generate(input_name, framework, int_dtype, float_dtype) +class DummyUnetVisionInputGenerator(DummyVisionInputGenerator): + def generate(self, input_name: str, framework: str = "pt", int_dtype: str = "int64", float_dtype: str = "fp32"): + if input_name not in ["sample", "latent_sample"]: + return super().generate(input_name, framework, int_dtype, float_dtype) + # add height and width discount for enable any resolution generation + return self.random_float_tensor( + shape=[self.batch_size, self.num_channels, self.height - 1, self.width - 1], + framework=framework, + dtype=float_dtype, + ) + + +@register_in_tasks_manager("unet", *["semantic-segmentation"], library_name="diffusers") +class UnetOpenVINOConfig(UNetOnnxConfig): + DUMMY_INPUT_GENERATOR_CLASSES = (DummyUnetVisionInputGenerator,) + UNetOnnxConfig.DUMMY_INPUT_GENERATOR_CLASSES[1:] + + @register_in_tasks_manager("sd3-transformer", *["semantic-segmentation"], library_name="diffusers") class SD3TransformerOpenVINOConfig(UNetOnnxConfig): DUMMY_INPUT_GENERATOR_CLASSES = ( diff --git a/tests/openvino/test_diffusion.py b/tests/openvino/test_diffusion.py index 805953c8a4..0cce8c3dea 100644 --- a/tests/openvino/test_diffusion.py +++ b/tests/openvino/test_diffusion.py @@ -144,6 +144,17 @@ def test_compare_to_diffusers_pipeline(self, model_arch: str): np.testing.assert_allclose(ov_output, diffusers_output, atol=6e-3, rtol=1e-2) + # test on inputs nondivisible on 64 + height, width, batch_size = 96, 96, 1 + + for output_type in ["latent", "np", "pt"]: + inputs["output_type"] = output_type + + ov_output = ov_pipeline(**inputs, generator=get_generator("pt", SEED)).images + diffusers_output = diffusers_pipeline(**inputs, generator=get_generator("pt", SEED)).images + + np.testing.assert_allclose(ov_output, diffusers_output, atol=6e-3, rtol=1e-2) + @parameterized.expand(CALLBACK_SUPPORT_ARCHITECTURES) @require_diffusers def test_callback(self, model_arch: str): @@ -541,6 +552,20 @@ def test_compare_to_diffusers_pipeline(self, model_arch: str): np.testing.assert_allclose(ov_output, diffusers_output, atol=6e-3, rtol=1e-2) + # test generation when input resolution nondevisible on 64 + height, width, batch_size = 96, 96, 1 + + inputs = self.generate_inputs(height=height, width=width, batch_size=batch_size, model_type=model_arch) + + for output_type in ["latent", "np", "pt"]: + print(output_type) + inputs["output_type"] = output_type + + ov_output = ov_pipeline(**inputs, generator=get_generator("pt", SEED)).images + diffusers_output = diffusers_pipeline(**inputs, generator=get_generator("pt", SEED)).images + + np.testing.assert_allclose(ov_output, diffusers_output, atol=6e-3, rtol=1e-2) + @parameterized.expand(SUPPORTED_ARCHITECTURES) @require_diffusers def test_image_reproducibility(self, model_arch: str): @@ -777,6 +802,18 @@ def test_compare_to_diffusers_pipeline(self, model_arch: str): np.testing.assert_allclose(ov_output, diffusers_output, atol=6e-3, rtol=1e-2) + # test generation when input resolution nondevisible on 64 + height, width, batch_size = 96, 96, 1 + inputs = self.generate_inputs(height=height, width=width, batch_size=batch_size) + + for output_type in ["latent", "np", "pt"]: + inputs["output_type"] = output_type + + ov_output = ov_pipeline(**inputs, generator=get_generator("pt", SEED)).images + diffusers_output = diffusers_pipeline(**inputs, generator=get_generator("pt", SEED)).images + + np.testing.assert_allclose(ov_output, diffusers_output, atol=6e-3, rtol=1e-2) + @parameterized.expand(SUPPORTED_ARCHITECTURES) @require_diffusers def test_image_reproducibility(self, model_arch: str):