diff --git a/python/llm/src/bigdl/llm/transformers/models/llama.py b/python/llm/src/bigdl/llm/transformers/models/llama.py index 55bcfb37..181caacf 100644 --- a/python/llm/src/bigdl/llm/transformers/models/llama.py +++ b/python/llm/src/bigdl/llm/transformers/models/llama.py @@ -276,8 +276,8 @@ def llama_attention_forward_4_31( elif use_esimd_sdp(q_len, self.head_dim, query_states): import linear_fp16_esimd attn_output = linear_fp16_esimd.sdp_forward(query_states, - key_states.contiguous(), - value_states.contiguous()) + key_states, + value_states) attn_output = attn_output.view(query_states.shape) attn_weights = None else: