fix chatglm (#10540)

This commit is contained in:
Yishuo Wang 2024-03-26 16:01:00 +08:00 committed by GitHub
parent 2ecd737474
commit 69a28d6b4c
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -184,7 +184,7 @@ def chatglm2_model_forward(
def chatglm2_attention_forward(
self, hidden_states, attention_mask, rotary_pos_emb, kv_cache=None, use_cache=True
):
if use_quantize_kv_cache(self.query_key_value, hidden_states):
if use_quantize_kv_cache(self.query_key_value, hidden_states.transpose(0, 1)):
forward_function = chatglm2_quantized_attention_forward_8eb45c
else:
forward_function = chatglm2_attention_forward_8eb45c