[LLM] vLLM: Delete last_kv_cache before prefilling (#9619)
Remove last_kv_cache before prefilling to reduce peak memory usage.
This commit is contained in:
		
							parent
							
								
									48b85593b3
								
							
						
					
					
						commit
						deee65785c
					
				
					 1 changed files with 2 additions and 0 deletions
				
			
		| 
						 | 
				
			
			@ -186,6 +186,8 @@ class BigDLLlamaForCausalLM(BigDLModelForCausalLM):
 | 
			
		|||
                "use_cache": True,
 | 
			
		||||
                # "return_dict": True,
 | 
			
		||||
            }
 | 
			
		||||
            if self.last_kv_cache:
 | 
			
		||||
                del self.last_kv_cache
 | 
			
		||||
        # pdb.set_trace()
 | 
			
		||||
 | 
			
		||||
        if self.device.type == 'xpu':
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in a new issue