parent
							
								
									a71ae7c22b
								
							
						
					
					
						commit
						d2abc9711b
					
				
					 2 changed files with 2 additions and 2 deletions
				
			
		| 
						 | 
				
			
			@ -152,7 +152,6 @@ def compress_kv(attn_config, key_states, query_states, value_states, attention_m
 | 
			
		|||
    if not hasattr(attn_config, 'pooling'):
 | 
			
		||||
        attn_config.pooling = 'maxpool'
 | 
			
		||||
    bsz, num_heads, q_len, head_dim = query_states.shape
 | 
			
		||||
    print(f"attn_config.max_capacity_prompt: ", attn_config.max_capacity_prompt, " ", q_len)
 | 
			
		||||
    if q_len <= attn_config.max_capacity_prompt:
 | 
			
		||||
        return key_states, value_states
 | 
			
		||||
    else:
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -127,7 +127,8 @@ def qwen2_model_forward(
 | 
			
		|||
                                                DynamicCompressCache):
 | 
			
		||||
            past_key_values = DynamicCompressCache.from_legacy_cache(past_key_values)
 | 
			
		||||
        if not use_quantize_kv and not use_compress_kv and not isinstance(past_key_values,
 | 
			
		||||
                                                                          DynamicNormalCache):
 | 
			
		||||
                                                                          (DynamicNormalCache,
 | 
			
		||||
                                                                           DynamicCompressCache)):
 | 
			
		||||
            past_key_values = DynamicNormalCache.from_legacy_cache(past_key_values)
 | 
			
		||||
        past_key_values_length = past_key_values.get_usable_length(seq_length)
 | 
			
		||||
    # ipex-llm changes end
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in a new issue