From d7b765fd3fff5aa5ad6a584dbee103f5b8c2c68b Mon Sep 17 00:00:00 2001 From: Yina Chen <33650826+cyita@users.noreply.github.com> Date: Mon, 11 Mar 2024 15:21:22 +0800 Subject: [PATCH] serving xpu memory opt (#10358) --- python/llm/src/bigdl/llm/serving/model_worker.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/python/llm/src/bigdl/llm/serving/model_worker.py b/python/llm/src/bigdl/llm/serving/model_worker.py index f2eef6b4..13b98057 100644 --- a/python/llm/src/bigdl/llm/serving/model_worker.py +++ b/python/llm/src/bigdl/llm/serving/model_worker.py @@ -256,6 +256,8 @@ class ModelWorker(BaseModelWorker): self.context_len, self.stream_interval, ): + if self.device == "xpu": + torch.xpu.empty_cache() ret = { "text": output["text"], "error_code": 0,