From 3e05c9e11bdb8372e41713fd3218ff66e1b1bed7 Mon Sep 17 00:00:00 2001 From: Ruonan Wang Date: Tue, 9 Jan 2024 18:10:01 +0800 Subject: [PATCH] LLM: update esimd sdp kernel (#9871) --- python/llm/src/bigdl/llm/transformers/models/llama.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/llm/src/bigdl/llm/transformers/models/llama.py b/python/llm/src/bigdl/llm/transformers/models/llama.py index 55bcfb37..181caacf 100644 --- a/python/llm/src/bigdl/llm/transformers/models/llama.py +++ b/python/llm/src/bigdl/llm/transformers/models/llama.py @@ -276,8 +276,8 @@ def llama_attention_forward_4_31( elif use_esimd_sdp(q_len, self.head_dim, query_states): import linear_fp16_esimd attn_output = linear_fp16_esimd.sdp_forward(query_states, - key_states.contiguous(), - value_states.contiguous()) + key_states, + value_states) attn_output = attn_output.view(query_states.shape) attn_weights = None else: