diff --git a/README.md b/README.md index 8da6db37e..6503c8f63 100644 --- a/README.md +++ b/README.md @@ -15,7 +15,7 @@ See documentation at https://onnxruntime.ai/docs/genai. | Support matrix | Supported now | Under development | On the roadmap | | -------------- | ------------- | ----------------- | -------------- | -| Model architectures | Gemma
Llama *
Mistral +
Phi (language + vision)
Qwen
Nemotron
Granite
| Whisper | Stable diffusion | +| Model architectures | Gemma
Llama *
Mistral +
Phi (language + vision)
Qwen
Nemotron
Granite
AMD OLMo | Whisper | Stable diffusion | | API | Python
C#
C/C++
Java ^ | Objective-C | | | Platform | Linux
Windows
Mac ^
Android ^ | | iOS | | Architecture | x86
x64
Arm64 ~ | | | diff --git a/src/models/model.cpp b/src/models/model.cpp index b766b369f..1841d2e0f 100644 --- a/src/models/model.cpp +++ b/src/models/model.cpp @@ -590,7 +590,7 @@ std::shared_ptr CreateModel(OrtEnv& ort_env, const char* config_path, con } std::shared_ptr CreateModel(OrtEnv& ort_env, std::unique_ptr config) { - std::set llm_types = {"chatglm", "decoder", "gemma", "gemma2", "granite", "llama", "mistral", "nemotron", "phi", "phimoe", "phi3", "phi3small", "qwen2"}; + std::set llm_types = {"chatglm", "decoder", "gemma", "gemma2", "granite", "llama", "mistral", "nemotron", "olmo", "phi", "phimoe", "phi3", "phi3small", "qwen2"}; if (config->model.type == "gpt2") return std::make_shared(std::move(config), ort_env); if (llm_types.find(config->model.type) != llm_types.end()) diff --git a/src/python/py/models/README.md b/src/python/py/models/README.md index e216a0ca1..c10683ef1 100644 --- a/src/python/py/models/README.md +++ b/src/python/py/models/README.md @@ -39,6 +39,7 @@ The tool currently supports the following model architectures. - Nemotron - Phi - Qwen +- AMD OLMo It is intended for supporting the latest, popular state-of-the-art models. diff --git a/src/python/py/models/builder.py b/src/python/py/models/builder.py index e3ac63a04..49741304d 100644 --- a/src/python/py/models/builder.py +++ b/src/python/py/models/builder.py @@ -330,7 +330,7 @@ def make_genai_config(self, model_name_or_path, extra_kwargs, out_dir): genai_config = { "model": { - "bos_token_id": config.bos_token_id if hasattr(config, "bos_token_id") else 1, # config.bos_token_id not present in ChatGLM model configs. + "bos_token_id": config.bos_token_id if hasattr(config, "bos_token_id") and config.bos_token_id != None else 1, # config.bos_token_id not present in ChatGLM model configs. "context_length": self.context_length, "decoder": { "session_options" : { @@ -3068,6 +3068,14 @@ def make_layer(self, layer_id, layer): layer.self_attn = layer.self_attn if hasattr(layer, 'self_attn') else layer.self_attention super().make_layer(layer_id, layer) +class OLMoModel(Model): + def __init__(self, config, io_dtype, onnx_dtype, ep, cache_dir, extra_options): + super().__init__(config, io_dtype, onnx_dtype, ep, cache_dir, extra_options) + + def make_layernorm(self, layer_id, layernorm, skip, simple, location): + layernorm.weight = torch.ones(self.hidden_size) + layernorm.bias = torch.zeros(self.hidden_size) + super().make_layernorm(layer_id, layernorm, skip, simple, location) class GraniteModel(MistralModel): def __init__(self, config, io_dtype, onnx_dtype, ep, cache_dir, extra_options): @@ -3200,6 +3208,8 @@ def create_model(model_name, input_path, output_dir, precision, execution_provid onnx_model = MistralModel(config, io_dtype, precision, execution_provider, cache_dir, extra_options) elif config.architectures[0] == "NemotronForCausalLM": onnx_model = NemotronModel(config, io_dtype, precision, execution_provider, cache_dir, extra_options) + elif config.architectures[0] == "OlmoForCausalLM": + onnx_model = OLMoModel(config, io_dtype, precision, execution_provider, cache_dir, extra_options) elif config.architectures[0] == "PhiForCausalLM": onnx_model = PhiModel(config, io_dtype, precision, execution_provider, cache_dir, extra_options) elif config.architectures[0] == "Phi3ForCausalLM" and config.max_position_embeddings == config.original_max_position_embeddings: diff --git a/test/python/_test_utils.py b/test/python/_test_utils.py index da5785bb3..d5a2aef10 100644 --- a/test/python/_test_utils.py +++ b/test/python/_test_utils.py @@ -55,6 +55,8 @@ def run_subprocess( def get_model_paths(): hf_paths = { "phi-2": "microsoft/phi-2", + "olmo": "amd/AMD-OLMo-1B-SFT-DPO", + "qwen": "Qwen/Qwen2.5-0.5B", "phi-3.5": "microsoft/Phi-3.5-mini-instruct", # "llama-3.2": "meta-llama/Llama-3.2-1B-instruct", "granite-3.0": "ibm-granite/granite-3.0-2b-instruct",