diff --git a/python/llm/src/bigdl/llm/serving/model_worker.py b/python/llm/src/bigdl/llm/serving/model_worker.py index f2eef6b4..13b98057 100644 --- a/python/llm/src/bigdl/llm/serving/model_worker.py +++ b/python/llm/src/bigdl/llm/serving/model_worker.py @@ -256,6 +256,8 @@ class ModelWorker(BaseModelWorker): self.context_len, self.stream_interval, ): + if self.device == "xpu": + torch.xpu.empty_cache() ret = { "text": output["text"], "error_code": 0,