LLM: Fix rope of chatglm3 to support speculative decoding on CPU (#9926)
This commit is contained in:
		
							parent
							
								
									18cd1f1432
								
							
						
					
					
						commit
						054952f82f
					
				
					 1 changed files with 2 additions and 1 deletions
				
			
		| 
						 | 
				
			
			@ -218,7 +218,8 @@ def chatglm2_attention_forward_8eb45c(
 | 
			
		|||
 | 
			
		||||
    # apply relative positional encoding (rotary embedding)
 | 
			
		||||
    if rotary_pos_emb is not None:
 | 
			
		||||
        if len(rotary_pos_emb) == 2:  # use_fuse_rope, see chatglm2_model_forward
 | 
			
		||||
        if len(rotary_pos_emb) == 2 and isinstance(rotary_pos_emb, tuple):
 | 
			
		||||
            # use_fuse_rope, see chatglm2_model_forward
 | 
			
		||||
            cos, sin = rotary_pos_emb
 | 
			
		||||
            rot_dim = cos.shape[-1]
 | 
			
		||||
            query_layer = query_layer.transpose(0, 1)
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in a new issue