Fix hf generate for llama3.2 (#12497)
* fix kv condition] * meet review
This commit is contained in:
parent
ffa9a9e1b3
commit
7d27f134dd
1 changed files with 2 additions and 2 deletions
|
|
@ -455,7 +455,7 @@ def optimize_llm_single_process(
|
||||||
|
|
||||||
def prepare_input_ids(
|
def prepare_input_ids(
|
||||||
self, input_ids, past_key_values=None, attention_mask=None, inputs_embeds=None, **kwargs):
|
self, input_ids, past_key_values=None, attention_mask=None, inputs_embeds=None, **kwargs):
|
||||||
if past_key_values is not None: # kvcache
|
if past_key_values and isinstance(past_key_values, bool): # kvcache
|
||||||
input_ids = input_ids[:, -1]
|
input_ids = input_ids[:, -1]
|
||||||
else: # prefill, reset the model here
|
else: # prefill, reset the model here
|
||||||
from .npu_llm_cpp import reset
|
from .npu_llm_cpp import reset
|
||||||
|
|
@ -495,7 +495,7 @@ def causal_lm_forward(
|
||||||
return CausalLMOutputWithPast(
|
return CausalLMOutputWithPast(
|
||||||
loss=None,
|
loss=None,
|
||||||
logits=logits,
|
logits=logits,
|
||||||
past_key_values=1, # just an indicator
|
past_key_values=True, # just an indicator
|
||||||
hidden_states=None,
|
hidden_states=None,
|
||||||
attentions=None,
|
attentions=None,
|
||||||
)
|
)
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue