LLM: fix llama 4.36 forward(#10047)
This commit is contained in:
parent
53a5140eff
commit
3685622f29
1 changed files with 1 additions and 1 deletions
|
|
@ -638,7 +638,7 @@ def llama_attention_forward_4_36(
|
||||||
"Please make sure use `attention_mask` instead.`"
|
"Please make sure use `attention_mask` instead.`"
|
||||||
)
|
)
|
||||||
|
|
||||||
bsz, q_len, _ = hidden_states.size()
|
bsz, q_len, hidden_size = hidden_states.size()
|
||||||
device = hidden_states.device
|
device = hidden_states.device
|
||||||
# for flash attention
|
# for flash attention
|
||||||
original_dtype = hidden_states.dtype
|
original_dtype = hidden_states.dtype
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue