LLM: Fix rope of chatglm3 to support speculative decoding on CPU (#9926)

2024-01-18 09:28:10 +08:00 · 2024-01-18 09:28:10 +08:00 · 054952f82f
commit 054952f82f
parent 18cd1f1432
1 changed files with 2 additions and 1 deletions
--- a/python/llm/src/bigdl/llm/transformers/models/chatglm2.py
+++ b/python/llm/src/bigdl/llm/transformers/models/chatglm2.py
@ -218,7 +218,8 @@ def chatglm2_attention_forward_8eb45c(

    # apply relative positional encoding (rotary embedding)
    if rotary_pos_emb is not None:
-        if len(rotary_pos_emb) == 2:  # use_fuse_rope, see chatglm2_model_forward
+        if len(rotary_pos_emb) == 2 and isinstance(rotary_pos_emb, tuple):
+            # use_fuse_rope, see chatglm2_model_forward
            cos, sin = rotary_pos_emb
            rot_dim = cos.shape[-1]
            query_layer = query_layer.transpose(0, 1)