Fix hf generate for llama3.2 (#12497)
* fix kv condition] * meet review
This commit is contained in:
parent
ffa9a9e1b3
commit
7d27f134dd
1 changed files with 2 additions and 2 deletions
|
|
@ -455,7 +455,7 @@ def optimize_llm_single_process(
|
|||
|
||||
def prepare_input_ids(
|
||||
self, input_ids, past_key_values=None, attention_mask=None, inputs_embeds=None, **kwargs):
|
||||
if past_key_values is not None: # kvcache
|
||||
if past_key_values and isinstance(past_key_values, bool): # kvcache
|
||||
input_ids = input_ids[:, -1]
|
||||
else: # prefill, reset the model here
|
||||
from .npu_llm_cpp import reset
|
||||
|
|
@ -495,7 +495,7 @@ def causal_lm_forward(
|
|||
return CausalLMOutputWithPast(
|
||||
loss=None,
|
||||
logits=logits,
|
||||
past_key_values=1, # just an indicator
|
||||
past_key_values=True, # just an indicator
|
||||
hidden_states=None,
|
||||
attentions=None,
|
||||
)
|
||||
|
|
|
|||
Loading…
Reference in a new issue