From 968d99e6f59b6ce1757b281a2a61fca490c40b95 Mon Sep 17 00:00:00 2001
From: Yuwen Hu <54161268+Oscilloscope98@users.noreply.github.com>
Date: Tue, 12 Dec 2023 17:24:06 +0800
Subject: [PATCH] Remove empty cache between each iteration of generation
 (#9660)

---
 python/llm/dev/benchmark/all-in-one/run.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/llm/dev/benchmark/all-in-one/run.py b/python/llm/dev/benchmark/all-in-one/run.py
index bc267208..3f3f6011 100644
--- a/python/llm/dev/benchmark/all-in-one/run.py
+++ b/python/llm/dev/benchmark/all-in-one/run.py
@@ -733,7 +733,7 @@ def run_transformer_int4_gpu_win(repo_id,
                     if i >= warm_up:
                         result[in_out].append([model.first_cost, model.rest_cost_mean, model.encoder_time,
                                             actual_in_len, actual_out_len, gpu_peak_mem])
-                    torch.xpu.empty_cache()
+                    # torch.xpu.empty_cache() # this may make first token slower
             except RuntimeError:
                 traceback.print_exc()
                 pass