diff --git a/README.md b/README.md
index 8da6db37e..6503c8f63 100644
--- a/README.md
+++ b/README.md
@@ -15,7 +15,7 @@ See documentation at https://onnxruntime.ai/docs/genai.
| Support matrix | Supported now | Under development | On the roadmap |
| -------------- | ------------- | ----------------- | -------------- |
-| Model architectures | Gemma
Llama *
Mistral +
Phi (language + vision)
Qwen
Nemotron
Granite
| Whisper | Stable diffusion |
+| Model architectures | Gemma
Llama *
Mistral +
Phi (language + vision)
Qwen
Nemotron
Granite
AMD OLMo | Whisper | Stable diffusion |
| API | Python
C#
C/C++
Java ^ | Objective-C | |
| Platform | Linux
Windows
Mac ^
Android ^ | | iOS |
| Architecture | x86
x64
Arm64 ~ | | |
diff --git a/src/models/model.cpp b/src/models/model.cpp
index b766b369f..1841d2e0f 100644
--- a/src/models/model.cpp
+++ b/src/models/model.cpp
@@ -590,7 +590,7 @@ std::shared_ptr CreateModel(OrtEnv& ort_env, const char* config_path, con
}
std::shared_ptr CreateModel(OrtEnv& ort_env, std::unique_ptr config) {
- std::set llm_types = {"chatglm", "decoder", "gemma", "gemma2", "granite", "llama", "mistral", "nemotron", "phi", "phimoe", "phi3", "phi3small", "qwen2"};
+ std::set llm_types = {"chatglm", "decoder", "gemma", "gemma2", "granite", "llama", "mistral", "nemotron", "olmo", "phi", "phimoe", "phi3", "phi3small", "qwen2"};
if (config->model.type == "gpt2")
return std::make_shared(std::move(config), ort_env);
if (llm_types.find(config->model.type) != llm_types.end())
diff --git a/src/python/py/models/README.md b/src/python/py/models/README.md
index e216a0ca1..c10683ef1 100644
--- a/src/python/py/models/README.md
+++ b/src/python/py/models/README.md
@@ -39,6 +39,7 @@ The tool currently supports the following model architectures.
- Nemotron
- Phi
- Qwen
+- AMD OLMo
It is intended for supporting the latest, popular state-of-the-art models.
diff --git a/src/python/py/models/builder.py b/src/python/py/models/builder.py
index e3ac63a04..49741304d 100644
--- a/src/python/py/models/builder.py
+++ b/src/python/py/models/builder.py
@@ -330,7 +330,7 @@ def make_genai_config(self, model_name_or_path, extra_kwargs, out_dir):
genai_config = {
"model": {
- "bos_token_id": config.bos_token_id if hasattr(config, "bos_token_id") else 1, # config.bos_token_id not present in ChatGLM model configs.
+ "bos_token_id": config.bos_token_id if hasattr(config, "bos_token_id") and config.bos_token_id != None else 1, # config.bos_token_id not present in ChatGLM model configs.
"context_length": self.context_length,
"decoder": {
"session_options" : {
@@ -3068,6 +3068,14 @@ def make_layer(self, layer_id, layer):
layer.self_attn = layer.self_attn if hasattr(layer, 'self_attn') else layer.self_attention
super().make_layer(layer_id, layer)
+class OLMoModel(Model):
+ def __init__(self, config, io_dtype, onnx_dtype, ep, cache_dir, extra_options):
+ super().__init__(config, io_dtype, onnx_dtype, ep, cache_dir, extra_options)
+
+ def make_layernorm(self, layer_id, layernorm, skip, simple, location):
+ layernorm.weight = torch.ones(self.hidden_size)
+ layernorm.bias = torch.zeros(self.hidden_size)
+ super().make_layernorm(layer_id, layernorm, skip, simple, location)
class GraniteModel(MistralModel):
def __init__(self, config, io_dtype, onnx_dtype, ep, cache_dir, extra_options):
@@ -3200,6 +3208,8 @@ def create_model(model_name, input_path, output_dir, precision, execution_provid
onnx_model = MistralModel(config, io_dtype, precision, execution_provider, cache_dir, extra_options)
elif config.architectures[0] == "NemotronForCausalLM":
onnx_model = NemotronModel(config, io_dtype, precision, execution_provider, cache_dir, extra_options)
+ elif config.architectures[0] == "OlmoForCausalLM":
+ onnx_model = OLMoModel(config, io_dtype, precision, execution_provider, cache_dir, extra_options)
elif config.architectures[0] == "PhiForCausalLM":
onnx_model = PhiModel(config, io_dtype, precision, execution_provider, cache_dir, extra_options)
elif config.architectures[0] == "Phi3ForCausalLM" and config.max_position_embeddings == config.original_max_position_embeddings:
diff --git a/test/python/_test_utils.py b/test/python/_test_utils.py
index da5785bb3..d5a2aef10 100644
--- a/test/python/_test_utils.py
+++ b/test/python/_test_utils.py
@@ -55,6 +55,8 @@ def run_subprocess(
def get_model_paths():
hf_paths = {
"phi-2": "microsoft/phi-2",
+ "olmo": "amd/AMD-OLMo-1B-SFT-DPO",
+ "qwen": "Qwen/Qwen2.5-0.5B",
"phi-3.5": "microsoft/Phi-3.5-mini-instruct",
# "llama-3.2": "meta-llama/Llama-3.2-1B-instruct",
"granite-3.0": "ibm-granite/granite-3.0-2b-instruct",