LLM: Fix rope of chatglm3 to support speculative decoding on CPU (#9926)
This commit is contained in:
parent
18cd1f1432
commit
054952f82f
1 changed files with 2 additions and 1 deletions
|
|
@ -218,7 +218,8 @@ def chatglm2_attention_forward_8eb45c(
|
|||
|
||||
# apply relative positional encoding (rotary embedding)
|
||||
if rotary_pos_emb is not None:
|
||||
if len(rotary_pos_emb) == 2: # use_fuse_rope, see chatglm2_model_forward
|
||||
if len(rotary_pos_emb) == 2 and isinstance(rotary_pos_emb, tuple):
|
||||
# use_fuse_rope, see chatglm2_model_forward
|
||||
cos, sin = rotary_pos_emb
|
||||
rot_dim = cos.shape[-1]
|
||||
query_layer = query_layer.transpose(0, 1)
|
||||
|
|
|
|||
Loading…
Reference in a new issue