Fix hf generate for llama3.2 (#12497)

* fix kv condition] * meet review
2024-12-04 17:54:40 +08:00 · 2024-12-04 17:54:40 +08:00 · 7d27f134dd
commit 7d27f134dd
parent ffa9a9e1b3
1 changed files with 2 additions and 2 deletions
--- a/python/llm/src/ipex_llm/transformers/npu_models/convert.py
+++ b/python/llm/src/ipex_llm/transformers/npu_models/convert.py
@ -455,7 +455,7 @@ def optimize_llm_single_process(
 def prepare_input_ids(
        self, input_ids, past_key_values=None, attention_mask=None, inputs_embeds=None, **kwargs):
-    if past_key_values is not None:  # kvcache
+    if past_key_values and isinstance(past_key_values, bool):  # kvcache
        input_ids = input_ids[:, -1]
    else:  # prefill, reset the model here
        from .npu_llm_cpp import reset
@ -495,7 +495,7 @@ def causal_lm_forward(
    return CausalLMOutputWithPast(
        loss=None,
        logits=logits,
-        past_key_values=1,  # just an indicator
+        past_key_values=True,  # just an indicator
        hidden_states=None,
        attentions=None,
    )