From 7d27f134ddd094ef49b3dd71487261c452d46056 Mon Sep 17 00:00:00 2001 From: Kai Huang Date: Wed, 4 Dec 2024 17:54:40 +0800 Subject: [PATCH] Fix hf generate for llama3.2 (#12497) * fix kv condition] * meet review --- python/llm/src/ipex_llm/transformers/npu_models/convert.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/llm/src/ipex_llm/transformers/npu_models/convert.py b/python/llm/src/ipex_llm/transformers/npu_models/convert.py index 55a7d091..b5a2feda 100644 --- a/python/llm/src/ipex_llm/transformers/npu_models/convert.py +++ b/python/llm/src/ipex_llm/transformers/npu_models/convert.py @@ -455,7 +455,7 @@ def optimize_llm_single_process( def prepare_input_ids( self, input_ids, past_key_values=None, attention_mask=None, inputs_embeds=None, **kwargs): - if past_key_values is not None: # kvcache + if past_key_values and isinstance(past_key_values, bool): # kvcache input_ids = input_ids[:, -1] else: # prefill, reset the model here from .npu_llm_cpp import reset @@ -495,7 +495,7 @@ def causal_lm_forward( return CausalLMOutputWithPast( loss=None, logits=logits, - past_key_values=1, # just an indicator + past_key_values=True, # just an indicator hidden_states=None, attentions=None, )