mistral 4.36 use fp16 sdp (#10704)
This commit is contained in:
parent
dcb2038aad
commit
c7422712fc
1 changed files with 9 additions and 0 deletions
|
|
@ -896,6 +896,15 @@ def mistral_attention_forward_4_36_original(
|
||||||
attn_weights = None
|
attn_weights = None
|
||||||
attn_output = attn_output.transpose(1, 2).contiguous()
|
attn_output = attn_output.transpose(1, 2).contiguous()
|
||||||
attn_output = attn_output.reshape(bsz, q_len, self.hidden_size)
|
attn_output = attn_output.reshape(bsz, q_len, self.hidden_size)
|
||||||
|
elif use_esimd_sdp(q_len, key_states.shape[2], self.head_dim, query_states):
|
||||||
|
import linear_fp16_esimd
|
||||||
|
attn_output = linear_fp16_esimd.sdp_forward(query_states,
|
||||||
|
key_states,
|
||||||
|
value_states)
|
||||||
|
attn_output = attn_output.view(query_states.shape)
|
||||||
|
attn_weights = None
|
||||||
|
attn_output = attn_output.transpose(1, 2).contiguous()
|
||||||
|
attn_output = attn_output.reshape(bsz, q_len, self.hidden_size)
|
||||||
else:
|
else:
|
||||||
attn_output, attn_weights = compute_attn_outputs_weights(query_states,
|
attn_output, attn_weights = compute_attn_outputs_weights(query_states,
|
||||||
key_states,
|
key_states,
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue