diff --git a/python/llm/src/bigdl/llm/vllm/model_executor/models/bigdl_llama.py b/python/llm/src/bigdl/llm/vllm/model_executor/models/bigdl_llama.py index 331e740e..cecf4df6 100644 --- a/python/llm/src/bigdl/llm/vllm/model_executor/models/bigdl_llama.py +++ b/python/llm/src/bigdl/llm/vllm/model_executor/models/bigdl_llama.py @@ -186,6 +186,8 @@ class BigDLLlamaForCausalLM(BigDLModelForCausalLM): "use_cache": True, # "return_dict": True, } + if self.last_kv_cache: + del self.last_kv_cache # pdb.set_trace() if self.device.type == 'xpu':