[NPU] fix transpose_value = False for NPU optimize_model=True (#12525)

This commit is contained in:
Ruonan Wang 2024-12-10 23:51:39 -08:00 committed by GitHub
parent 588bfa24dc
commit 41ef4974ab
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -225,6 +225,7 @@ class LLMBaseNNFactory(NNFactory):
head_dim=head_dim,
)
new_key_states = key_states
new_value_states = value_states
if mode == "decode":
key_states = self.concat(past_key, key_states, axis=-2)