Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added OLMo support to builder.py #1061

Merged
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ See documentation at https://onnxruntime.ai/docs/genai.

| Support matrix | Supported now | Under development | On the roadmap |
| -------------- | ------------- | ----------------- | -------------- |
| Model architectures | Gemma <br/> Llama * <br/> Mistral + <br/> Phi (language + vision) <br/> Qwen <br/> Nemotron <br/> Granite <br/> | Whisper | Stable diffusion |
| Model architectures | Gemma <br/> Llama * <br/> Mistral + <br/> Phi (language + vision) <br/> Qwen <br/> Nemotron <br/> Granite <br/> AMD OLMo | Whisper | Stable diffusion |
| API | Python <br/> C# <br/> C/C++ <br/> Java ^ | Objective-C | |
| Platform | Linux <br/> Windows <br/> Mac ^ <br/> Android ^ | | iOS |
| Architecture | x86 <br/> x64 <br/> Arm64 ~ | | |
Expand Down
2 changes: 1 addition & 1 deletion src/models/model.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -590,7 +590,7 @@ std::shared_ptr<Model> CreateModel(OrtEnv& ort_env, const char* config_path, con
}

std::shared_ptr<Model> CreateModel(OrtEnv& ort_env, std::unique_ptr<Config> config) {
std::set<std::string> llm_types = {"chatglm", "decoder", "gemma", "gemma2", "granite", "llama", "mistral", "nemotron", "phi", "phimoe", "phi3", "phi3small", "qwen2"};
std::set<std::string> llm_types = {"chatglm", "decoder", "gemma", "gemma2", "granite", "llama", "mistral", "nemotron", "olmo", "phi", "phimoe", "phi3", "phi3small", "qwen2"};
if (config->model.type == "gpt2")
return std::make_shared<Gpt_Model>(std::move(config), ort_env);
if (llm_types.find(config->model.type) != llm_types.end())
Expand Down
1 change: 1 addition & 0 deletions src/python/py/models/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ The tool currently supports the following model architectures.
- Nemotron
- Phi
- Qwen
- AMD OLMo

It is intended for supporting the latest, popular state-of-the-art models.

Expand Down
12 changes: 11 additions & 1 deletion src/python/py/models/builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -330,7 +330,7 @@ def make_genai_config(self, model_name_or_path, extra_kwargs, out_dir):

genai_config = {
"model": {
"bos_token_id": config.bos_token_id if hasattr(config, "bos_token_id") else 1, # config.bos_token_id not present in ChatGLM model configs.
"bos_token_id": config.bos_token_id if hasattr(config, "bos_token_id") and config.bos_token_id != None else 1, # config.bos_token_id not present in ChatGLM model configs.
"context_length": self.context_length,
"decoder": {
"session_options" : {
Expand Down Expand Up @@ -3068,6 +3068,14 @@ def make_layer(self, layer_id, layer):
layer.self_attn = layer.self_attn if hasattr(layer, 'self_attn') else layer.self_attention
super().make_layer(layer_id, layer)

class OLMoModel(Model):
def __init__(self, config, io_dtype, onnx_dtype, ep, cache_dir, extra_options):
super().__init__(config, io_dtype, onnx_dtype, ep, cache_dir, extra_options)

def make_layernorm(self, layer_id, layernorm, skip, simple, location):
layernorm.weight = torch.ones(self.hidden_size)
layernorm.bias = torch.zeros(self.hidden_size)
super().make_layernorm(layer_id, layernorm, skip, simple, location)

class GraniteModel(MistralModel):
def __init__(self, config, io_dtype, onnx_dtype, ep, cache_dir, extra_options):
Expand Down Expand Up @@ -3200,6 +3208,8 @@ def create_model(model_name, input_path, output_dir, precision, execution_provid
onnx_model = MistralModel(config, io_dtype, precision, execution_provider, cache_dir, extra_options)
elif config.architectures[0] == "NemotronForCausalLM":
onnx_model = NemotronModel(config, io_dtype, precision, execution_provider, cache_dir, extra_options)
elif config.architectures[0] == "OlmoForCausalLM":
onnx_model = OLMoModel(config, io_dtype, precision, execution_provider, cache_dir, extra_options)
elif config.architectures[0] == "PhiForCausalLM":
onnx_model = PhiModel(config, io_dtype, precision, execution_provider, cache_dir, extra_options)
elif config.architectures[0] == "Phi3ForCausalLM" and config.max_position_embeddings == config.original_max_position_embeddings:
Expand Down
2 changes: 2 additions & 0 deletions test/python/_test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,8 @@ def run_subprocess(
def get_model_paths():
hf_paths = {
"phi-2": "microsoft/phi-2",
"olmo": "amd/AMD-OLMo-1B-SFT-DPO",
"qwen": "Qwen/Qwen2.5-0.5B",
"phi-3.5": "microsoft/Phi-3.5-mini-instruct",
# "llama-3.2": "meta-llama/Llama-3.2-1B-instruct",
"granite-3.0": "ibm-granite/granite-3.0-2b-instruct",
Expand Down
Loading