From 85ef3f1d99c6a1fb6032bfe7e45ff23a457fed5b Mon Sep 17 00:00:00 2001
From: binbin Deng <108676127+plusbang@users.noreply.github.com>
Date: Thu, 21 Mar 2024 10:51:23 +0800
Subject: [PATCH] LLM: add empty cache in deepspeed autotp benchmark script
 (#10488)

---
 python/llm/dev/benchmark/all-in-one/run.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/python/llm/dev/benchmark/all-in-one/run.py b/python/llm/dev/benchmark/all-in-one/run.py
index b88e5311..3a9b5347 100644
--- a/python/llm/dev/benchmark/all-in-one/run.py
+++ b/python/llm/dev/benchmark/all-in-one/run.py
@@ -1519,6 +1519,7 @@ def run_deepspeed_optimize_model_gpu(repo_id,
                 output = tokenizer.batch_decode(output_ids)
                 actual_out_len = output_ids.shape[1] - actual_in_len
                 print(output[0])
+                torch.xpu.empty_cache()
                 if i >= warm_up:
                     result[in_out].append([model.first_cost, model.rest_cost_mean, model.encoder_time,
                                            actual_in_len, actual_out_len, load_time])