serving xpu memory opt (#10358)

2024-03-11 15:21:22 +08:00 · 2024-03-11 15:21:22 +08:00 · d7b765fd3f
commit d7b765fd3f
parent be29833b2b
1 changed files with 2 additions and 0 deletions
--- a/python/llm/src/bigdl/llm/serving/model_worker.py
+++ b/python/llm/src/bigdl/llm/serving/model_worker.py
@ -256,6 +256,8 @@ class ModelWorker(BaseModelWorker):
                self.context_len,
                self.stream_interval,
            ):
                if self.device == "xpu":
                    torch.xpu.empty_cache()
                ret = {
                    "text": output["text"],
                    "error_code": 0,