[LLM] vLLM: Delete last_kv_cache before prefilling (#9619)
Remove last_kv_cache before prefilling to reduce peak memory usage.
This commit is contained in:
parent
48b85593b3
commit
deee65785c
1 changed files with 2 additions and 0 deletions
|
|
@ -186,6 +186,8 @@ class BigDLLlamaForCausalLM(BigDLModelForCausalLM):
|
||||||
"use_cache": True,
|
"use_cache": True,
|
||||||
# "return_dict": True,
|
# "return_dict": True,
|
||||||
}
|
}
|
||||||
|
if self.last_kv_cache:
|
||||||
|
del self.last_kv_cache
|
||||||
# pdb.set_trace()
|
# pdb.set_trace()
|
||||||
|
|
||||||
if self.device.type == 'xpu':
|
if self.device.type == 'xpu':
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue