fix chatglm lookahead on ARC (#11320)
This commit is contained in:
		
							parent
							
								
									f5ef94046e
								
							
						
					
					
						commit
						e8dd8e97ef
					
				
					 2 changed files with 2 additions and 2 deletions
				
			
		| 
						 | 
				
			
			@ -244,7 +244,7 @@ def chatglm2_attention_forward(
 | 
			
		|||
        key_states[..., :rot_dim] = k_rot[...]
 | 
			
		||||
 | 
			
		||||
    # IPEX-LLM OPT: kv cache and quantize kv
 | 
			
		||||
    use_quantize_kv = use_quantize_kv_cache(self.query_key_value, hidden_states)
 | 
			
		||||
    use_quantize_kv = use_quantize_kv_cache(self.query_key_value, query_states)
 | 
			
		||||
    key_states, value_states = update_past_key_value(
 | 
			
		||||
        past_key_value, key_states, value_states,
 | 
			
		||||
        kv_seq_len, use_quantize_kv, hidden_states.device
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -171,7 +171,7 @@ def chatglm4_attention_forward(
 | 
			
		|||
        key_states[..., :rot_dim] = k_rot[...]
 | 
			
		||||
 | 
			
		||||
    # IPEX-LLM OPT: kv cache and quantize kv
 | 
			
		||||
    use_quantize_kv = use_quantize_kv_cache(self.query_key_value, hidden_states)
 | 
			
		||||
    use_quantize_kv = use_quantize_kv_cache(self.query_key_value, query_states)
 | 
			
		||||
    key_states, value_states = update_past_key_value(
 | 
			
		||||
        past_key_value, key_states, value_states,
 | 
			
		||||
        kv_seq_len, use_quantize_kv, hidden_states.device
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in a new issue