Skip to content

Commit

Permalink
Added Python API for other image generation models (#1349)
Browse files Browse the repository at this point in the history
  • Loading branch information
ilya-lavrenov authored Dec 10, 2024
1 parent 1373314 commit c4857e3
Show file tree
Hide file tree
Showing 9 changed files with 455 additions and 141 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ namespace genai {

class OPENVINO_GENAI_EXPORTS FluxTransformer2DModel {
public:
struct Config {
struct OPENVINO_GENAI_EXPORTS Config {
size_t in_channels = 64;
bool guidance_embeds = false;
size_t m_default_sample_size = 128;
Expand Down Expand Up @@ -69,7 +69,7 @@ class OPENVINO_GENAI_EXPORTS FluxTransformer2DModel {

template <typename... Properties>
ov::util::EnableIfAllStringAny<FluxTransformer2DModel&, Properties...> compile(const std::string& device,
Properties&&... properties) {
Properties&&... properties) {
return compile(device, ov::AnyMap{std::forward<Properties>(properties)...});
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ namespace genai {

class OPENVINO_GENAI_EXPORTS SD3Transformer2DModel {
public:
struct Config {
struct OPENVINO_GENAI_EXPORTS Config {
size_t sample_size = 128;
size_t patch_size = 2;
size_t in_channels = 16;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ class OPENVINO_GENAI_EXPORTS Text2ImagePipeline {
const CLIPTextModel& clip_text_model,
const T5EncoderModel t5_encoder_model,
const FluxTransformer2DModel& transformer,
const AutoencoderKL& vae_decoder);
const AutoencoderKL& vae);

ImageGenerationConfig get_generation_config() const;
void set_generation_config(const ImageGenerationConfig& generation_config);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@ FluxTransformer2DModel::Config::Config(const std::filesystem::path& config_path)

read_json_param(data, "in_channels", in_channels);
read_json_param(data, "guidance_embeds", guidance_embeds);
file.close();
}

FluxTransformer2DModel::FluxTransformer2DModel(const std::filesystem::path& root_dir)
Expand Down
3 changes: 3 additions & 0 deletions src/python/openvino_genai/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,10 @@
from .py_openvino_genai import (
CLIPTextModel,
CLIPTextModelWithProjection,
T5EncoderModel,
UNet2DConditionModel,
FluxTransformer2DModel,
SD3Transformer2DModel,
AutoencoderKL,
Text2ImagePipeline,
Scheduler,
Expand Down
5 changes: 4 additions & 1 deletion src/python/openvino_genai/__init__.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -15,17 +15,20 @@ from openvino_genai.py_openvino_genai import ContinuousBatchingPipeline
from openvino_genai.py_openvino_genai import CppStdGenerator
from openvino_genai.py_openvino_genai import DecodedResults
from openvino_genai.py_openvino_genai import EncodedResults
from openvino_genai.py_openvino_genai import FluxTransformer2DModel
from openvino_genai.py_openvino_genai import GenerationConfig
from openvino_genai.py_openvino_genai import GenerationResult
from openvino_genai.py_openvino_genai import Generator
from openvino_genai.py_openvino_genai import ImageGenerationConfig
from openvino_genai.py_openvino_genai import LLMPipeline
from openvino_genai.py_openvino_genai import PerfMetrics
from openvino_genai.py_openvino_genai import RawPerfMetrics
from openvino_genai.py_openvino_genai import SD3Transformer2DModel
from openvino_genai.py_openvino_genai import Scheduler
from openvino_genai.py_openvino_genai import SchedulerConfig
from openvino_genai.py_openvino_genai import StopCriteria
from openvino_genai.py_openvino_genai import StreamerBase
from openvino_genai.py_openvino_genai import T5EncoderModel
from openvino_genai.py_openvino_genai import Text2ImagePipeline
from openvino_genai.py_openvino_genai import TokenizedInputs
from openvino_genai.py_openvino_genai import Tokenizer
Expand All @@ -38,5 +41,5 @@ from openvino_genai.py_openvino_genai import WhisperRawPerfMetrics
from openvino_genai.py_openvino_genai import draft_model
import os as os
from . import py_openvino_genai
__all__ = ['Adapter', 'AdapterConfig', 'AggregationMode', 'AutoencoderKL', 'CLIPTextModel', 'CLIPTextModelWithProjection', 'CacheEvictionConfig', 'ChunkStreamerBase', 'ContinuousBatchingPipeline', 'CppStdGenerator', 'DecodedResults', 'EncodedResults', 'GenerationConfig', 'GenerationResult', 'Generator', 'ImageGenerationConfig', 'LLMPipeline', 'PerfMetrics', 'RawPerfMetrics', 'Scheduler', 'SchedulerConfig', 'StopCriteria', 'StreamerBase', 'Text2ImagePipeline', 'TokenizedInputs', 'Tokenizer', 'UNet2DConditionModel', 'VLMPipeline', 'WhisperGenerationConfig', 'WhisperPerfMetrics', 'WhisperPipeline', 'WhisperRawPerfMetrics', 'draft_model', 'openvino', 'os', 'py_openvino_genai']
__all__ = ['Adapter', 'AdapterConfig', 'AggregationMode', 'AutoencoderKL', 'CLIPTextModel', 'CLIPTextModelWithProjection', 'CacheEvictionConfig', 'ChunkStreamerBase', 'ContinuousBatchingPipeline', 'CppStdGenerator', 'DecodedResults', 'EncodedResults', 'FluxTransformer2DModel', 'GenerationConfig', 'GenerationResult', 'Generator', 'ImageGenerationConfig', 'LLMPipeline', 'PerfMetrics', 'RawPerfMetrics', 'SD3Transformer2DModel', 'Scheduler', 'SchedulerConfig', 'StopCriteria', 'StreamerBase', 'T5EncoderModel', 'Text2ImagePipeline', 'TokenizedInputs', 'Tokenizer', 'UNet2DConditionModel', 'VLMPipeline', 'WhisperGenerationConfig', 'WhisperPerfMetrics', 'WhisperPipeline', 'WhisperRawPerfMetrics', 'draft_model', 'openvino', 'os', 'py_openvino_genai']
__version__: str = '2025.0.0.0'
143 changes: 141 additions & 2 deletions src/python/openvino_genai/py_openvino_genai.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ from __future__ import annotations
import openvino._pyopenvino
import os
import typing
__all__ = ['Adapter', 'AdapterConfig', 'AggregationMode', 'AutoencoderKL', 'CLIPTextModel', 'CLIPTextModelWithProjection', 'CacheEvictionConfig', 'ChunkStreamerBase', 'ContinuousBatchingPipeline', 'CppStdGenerator', 'DecodedResults', 'EncodedGenerationResult', 'EncodedResults', 'GenerationConfig', 'GenerationFinishReason', 'GenerationHandle', 'GenerationOutput', 'GenerationResult', 'GenerationStatus', 'Generator', 'ImageGenerationConfig', 'LLMPipeline', 'MeanStdPair', 'PerfMetrics', 'PipelineMetrics', 'RawPerfMetrics', 'Scheduler', 'SchedulerConfig', 'StopCriteria', 'StreamerBase', 'Text2ImagePipeline', 'TokenizedInputs', 'Tokenizer', 'UNet2DConditionModel', 'VLMDecodedResults', 'VLMPerfMetrics', 'VLMPipeline', 'VLMRawPerfMetrics', 'WhisperDecodedResultChunk', 'WhisperDecodedResults', 'WhisperGenerationConfig', 'WhisperPerfMetrics', 'WhisperPipeline', 'WhisperRawPerfMetrics', 'draft_model']
__all__ = ['Adapter', 'AdapterConfig', 'AggregationMode', 'AutoencoderKL', 'CLIPTextModel', 'CLIPTextModelWithProjection', 'CacheEvictionConfig', 'ChunkStreamerBase', 'ContinuousBatchingPipeline', 'CppStdGenerator', 'DecodedResults', 'EncodedGenerationResult', 'EncodedResults', 'FluxTransformer2DModel', 'GenerationConfig', 'GenerationFinishReason', 'GenerationHandle', 'GenerationOutput', 'GenerationResult', 'GenerationStatus', 'Generator', 'ImageGenerationConfig', 'LLMPipeline', 'MeanStdPair', 'PerfMetrics', 'PipelineMetrics', 'RawPerfMetrics', 'SD3Transformer2DModel', 'Scheduler', 'SchedulerConfig', 'StopCriteria', 'StreamerBase', 'T5EncoderModel', 'Text2ImagePipeline', 'TokenizedInputs', 'Tokenizer', 'UNet2DConditionModel', 'VLMDecodedResults', 'VLMPerfMetrics', 'VLMPipeline', 'VLMRawPerfMetrics', 'WhisperDecodedResultChunk', 'WhisperDecodedResults', 'WhisperGenerationConfig', 'WhisperPerfMetrics', 'WhisperPipeline', 'WhisperRawPerfMetrics', 'draft_model']
class Adapter:
"""
Immutable LoRA Adapter that carries the adaptation matrices and serves as unique adapter identifier.
Expand Down Expand Up @@ -222,7 +222,7 @@ class CLIPTextModel:
"""
max_position_embeddings: int
num_hidden_layers: int
def __init__(self, config_path: str) -> None:
def __init__(self, config_path: os.PathLike) -> None:
...
@typing.overload
def __init__(self, root_dir: os.PathLike) -> None:
Expand Down Expand Up @@ -470,6 +470,53 @@ class EncodedResults:
@property
def tokens(self) -> list[list[int]]:
...
class FluxTransformer2DModel:
"""
FluxTransformer2DModel class.
"""
class Config:
"""
This class is used for storing FluxTransformer2DModel config.
"""
default_sample_size: int
in_channels: int
def __init__(self, config_path: os.PathLike) -> None:
...
@typing.overload
def __init__(self, root_dir: os.PathLike) -> None:
"""
FluxTransformer2DModel class
root_dir (os.PathLike): Model root directory.
"""
@typing.overload
def __init__(self, root_dir: os.PathLike, device: str, **kwargs) -> None:
"""
UNet2DConditionModel class
root_dir (os.PathLike): Model root directory.
device (str): Device on which inference will be done.
kwargs: Device properties.
"""
@typing.overload
def __init__(self, model: FluxTransformer2DModel) -> None:
"""
FluxTransformer2DModel model
FluxTransformer2DModel class
model (FluxTransformer2DModel): FluxTransformer2DModel model
"""
def compile(self, device: str, **kwargs) -> None:
"""
Compiles the model.
device (str): Device to run the model on (e.g., CPU, GPU).
kwargs: Device properties.
"""
def get_config(self) -> FluxTransformer2DModel.Config:
...
def infer(self, sample: openvino._pyopenvino.Tensor, timestep: openvino._pyopenvino.Tensor) -> openvino._pyopenvino.Tensor:
...
def reshape(self, batch_size: int, height: int, width: int, tokenizer_model_max_length: int) -> FluxTransformer2DModel:
...
def set_hidden_states(self, tensor_name: str, encoder_hidden_states: openvino._pyopenvino.Tensor) -> None:
...
class GenerationConfig:
"""
Expand Down Expand Up @@ -1068,6 +1115,55 @@ class RawPerfMetrics:
@property
def tokenization_durations(self) -> list[float]:
...
class SD3Transformer2DModel:
"""
SD3Transformer2DModel class.
"""
class Config:
"""
This class is used for storing SD3Transformer2DModel config.
"""
in_channels: int
joint_attention_dim: int
patch_size: int
sample_size: int
def __init__(self, config_path: os.PathLike) -> None:
...
@typing.overload
def __init__(self, root_dir: os.PathLike) -> None:
"""
SD3Transformer2DModel class
root_dir (os.PathLike): Model root directory.
"""
@typing.overload
def __init__(self, root_dir: os.PathLike, device: str, **kwargs) -> None:
"""
SD3Transformer2DModel class
root_dir (os.PathLike): Model root directory.
device (str): Device on which inference will be done.
kwargs: Device properties.
"""
@typing.overload
def __init__(self, model: SD3Transformer2DModel) -> None:
"""
SD3Transformer2DModel model
SD3Transformer2DModel class
model (SD3Transformer2DModel): SD3Transformer2DModel model
"""
def compile(self, device: str, **kwargs) -> None:
"""
Compiles the model.
device (str): Device to run the model on (e.g., CPU, GPU).
kwargs: Device properties.
"""
def get_config(self) -> SD3Transformer2DModel.Config:
...
def infer(self, sample: openvino._pyopenvino.Tensor, timestep: openvino._pyopenvino.Tensor) -> openvino._pyopenvino.Tensor:
...
def reshape(self, batch_size: int, height: int, width: int, tokenizer_model_max_length: int) -> SD3Transformer2DModel:
...
def set_hidden_states(self, tensor_name: str, encoder_hidden_states: openvino._pyopenvino.Tensor) -> None:
...
class Scheduler:
"""
Scheduler for image generation pipelines.
Expand Down Expand Up @@ -1220,17 +1316,60 @@ class StreamerBase:
"""
Put is called every time new token is decoded. Returns a bool flag to indicate whether generation should be stopped, if return true generation stops
"""
class T5EncoderModel:
"""
T5EncoderModel class.
"""
@typing.overload
def __init__(self, root_dir: os.PathLike) -> None:
"""
T5EncoderModel class
root_dir (os.PathLike): Model root directory.
"""
@typing.overload
def __init__(self, root_dir: os.PathLike, device: str, **kwargs) -> None:
"""
T5EncoderModel class
root_dir (os.PathLike): Model root directory.
device (str): Device on which inference will be done.
kwargs: Device properties.
"""
@typing.overload
def __init__(self, model: T5EncoderModel) -> None:
"""
T5EncoderModel model
T5EncoderModel class
model (T5EncoderModel): T5EncoderModel model
"""
def compile(self, device: str, **kwargs) -> None:
"""
Compiles the model.
device (str): Device to run the model on (e.g., CPU, GPU).
kwargs: Device properties.
"""
def get_output_tensor(self, idx: int) -> openvino._pyopenvino.Tensor:
...
def infer(self, pos_prompt: str, neg_prompt: str, do_classifier_free_guidance: bool, max_sequence_length: int) -> openvino._pyopenvino.Tensor:
...
def reshape(self, batch_size: int, max_sequence_length: int) -> T5EncoderModel:
...
class Text2ImagePipeline:
"""
This class is used for generation with text-to-image models.
"""
@staticmethod
def flux(scheduler: Scheduler, clip_text_model: CLIPTextModel, t5_encoder_model: T5EncoderModel, transformer: FluxTransformer2DModel, vae: AutoencoderKL) -> Text2ImagePipeline:
...
@staticmethod
def latent_consistency_model(scheduler: Scheduler, clip_text_model: CLIPTextModel, unet: UNet2DConditionModel, vae: AutoencoderKL) -> Text2ImagePipeline:
...
@staticmethod
def stable_diffusion(scheduler: Scheduler, clip_text_model: CLIPTextModel, unet: UNet2DConditionModel, vae: AutoencoderKL) -> Text2ImagePipeline:
...
@staticmethod
def stable_diffusion_3(scheduler: Scheduler, clip_text_model_1: CLIPTextModelWithProjection, clip_text_model_2: CLIPTextModelWithProjection, t5_encoder_model: T5EncoderModel, transformer: SD3Transformer2DModel, vae: AutoencoderKL) -> Text2ImagePipeline:
...
@staticmethod
def stable_diffusion_xl(scheduler: Scheduler, clip_text_model: CLIPTextModel, clip_text_model_with_projection: CLIPTextModelWithProjection, unet: UNet2DConditionModel, vae: AutoencoderKL) -> Text2ImagePipeline:
...
@typing.overload
Expand Down
Loading

0 comments on commit c4857e3

Please sign in to comment.