diff --git a/examples/embedding_model.py b/examples/embedding_model.py new file mode 100644 index 00000000..4babfe9a --- /dev/null +++ b/examples/embedding_model.py @@ -0,0 +1,43 @@ +# ----------------------------------------------------------------------------- +# +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# +# ----------------------------------------------------------------------------- + +# This is the work example of the Embedding model with the AI 100 +import torch +import torch.nn.functional as F +from transformers import AutoTokenizer + +from QEfficient import QEFFAutoModel as AutoModel + + +def mean_pooling(model_output, attention_mask): + token_embeddings = model_output # First element of model_output contains all token embeddings + input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float() + return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9) + + +# Sentences we want sentence embeddings for +sentences = "This is an example sentence" + +# Load model from HuggingFace Hub +tokenizer = AutoTokenizer.from_pretrained("sentence-transformers/all-MiniLM-L6-v2") + + +qeff_model = AutoModel.from_pretrained("sentence-transformers/all-MiniLM-L6-v2") +qeff_model.compile(num_cores=14) + +# Tokenize sentences +encoded_input = tokenizer(sentences, return_tensors="pt") +qeff_output = torch.tensor(qeff_model.generate(encoded_input)) + +# Perform pooling +sentence_embeddings = mean_pooling(qeff_output, encoded_input["attention_mask"]) + +# Normalize embeddings +sentence_embeddings = F.normalize(sentence_embeddings, p=2, dim=1) + +print("Sentence embeddings:") +print(sentence_embeddings)