[LLM] vLLM: Delete last_kv_cache before prefilling (#9619)
Remove last_kv_cache before prefilling to reduce peak memory usage.
This commit is contained in:
		
							parent
							
								
									48b85593b3
								
							
						
					
					
						commit
						deee65785c
					
				
					 1 changed files with 2 additions and 0 deletions
				
			
		| 
						 | 
					@ -186,6 +186,8 @@ class BigDLLlamaForCausalLM(BigDLModelForCausalLM):
 | 
				
			||||||
                "use_cache": True,
 | 
					                "use_cache": True,
 | 
				
			||||||
                # "return_dict": True,
 | 
					                # "return_dict": True,
 | 
				
			||||||
            }
 | 
					            }
 | 
				
			||||||
 | 
					            if self.last_kv_cache:
 | 
				
			||||||
 | 
					                del self.last_kv_cache
 | 
				
			||||||
        # pdb.set_trace()
 | 
					        # pdb.set_trace()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        if self.device.type == 'xpu':
 | 
					        if self.device.type == 'xpu':
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue