Fix head dimension in mistral (quic#157)

* Fix head dimension in mistral Signed-off-by: Mamta Singh <[email protected]> * Run ruff linter and formatter Signed-off-by: Mamta Singh <[email protected]> --------- Signed-off-by: Mamta Singh <[email protected]>
quic-amitraj · Oct 17, 2024 · 4b8bacc · 4b8bacc
1 parent 08ca83c
commit 4b8bacc
Show file tree

Hide file tree

Showing 2 changed files with 5 additions and 2 deletions.
diff --git a/QEfficient/transformers/models/mistral/modeling_mistral.py b/QEfficient/transformers/models/mistral/modeling_mistral.py
@@ -185,7 +185,7 @@ def forward(
 
         attn_output = attn_output.transpose(1, 2).contiguous()
 
-        attn_output = attn_output.reshape(bsz, q_len, self.hidden_size)
+        attn_output = attn_output.reshape(bsz, q_len, -1)
 
         attn_output = self.o_proj(attn_output)
 

diff --git a/QEfficient/utils/_utils.py b/QEfficient/utils/_utils.py
@@ -239,7 +239,10 @@ def get_padding_shape_from_config(config, batch_size, seq_len):
         config, "num_attention_heads"
     ):  # Check for num_key_value_heads (Llama/Mistral)
         n_heads = config.num_key_value_heads
-        d_head = config.hidden_size // config.num_attention_heads
+        if hasattr(config, "head_dim"):
+            d_head = config.head_dim
+        else:
+            d_head = config.hidden_size // config.num_attention_heads
     elif hasattr(config, "n_heads"):  # Check for n_heads and d_model in the config (MPT Model)
         n_heads = config.n_heads
         d_head = config.d_model // config.n_heads