From 13e61738c57bebe5a164cd61ae2fe92945baeaec Mon Sep 17 00:00:00 2001
From: Xin Qiu <qiuxin2012@users.noreply.github.com>
Date: Tue, 30 Jan 2024 16:04:17 +0800
Subject: [PATCH] hide detail memory for each token in benchmark_utils.py
 (#10037)

---
 python/llm/dev/benchmark/benchmark_util.py | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/python/llm/dev/benchmark/benchmark_util.py b/python/llm/dev/benchmark/benchmark_util.py
index 36f2bcdb..7d40de34 100644
--- a/python/llm/dev/benchmark/benchmark_util.py
+++ b/python/llm/dev/benchmark/benchmark_util.py
@@ -510,9 +510,10 @@ class BenchmarkWrapper:
     learn more about decoding strategies refer to the [text generation strategies guide](../generation_strategies).
     """
     
-    def __init__(self, model, do_print=False):
+    def __init__(self, model, do_print=False, verbose=False):
         self.model = model
         self.do_print = do_print
+        self.verbose = verbose
         self.encoder_time = 0.0
         self.first_cost = 0.0
         self.rest_cost_mean = 0.0
@@ -2469,7 +2470,8 @@ class BenchmarkWrapper:
                 if self.device.type == "xpu":
                     print(f"=========Rest tokens cost average {self.rest_cost_mean:.4f} s ({len(last_token_time)}"
                           f" tokens in all) and {np.max(memory_every_token[1:])} GB=========")
-                    print(f"Peak memory for every token: {memory_every_token}")
+                    if self.verbose:
+                        print(f"Peak memory for every token: {memory_every_token}")
                 else:
                     print(f"=========Rest tokens cost average {self.rest_cost_mean:.4f} s ({len(last_token_time)}"
                           f" tokens in all)=========")
@@ -2783,7 +2785,8 @@ class BenchmarkWrapper:
                 if self.device.type == "xpu":
                     print(f"=========Rest tokens cost average {self.rest_cost_mean:.4f} s ({len(last_token_time)}"
                           f" tokens in all) and {np.max(memory_every_token[1:])} GB=========")
-                    print(f"Peak memory for every token: {memory_every_token}")
+                    if self.verbose:
+                        print(f"Peak memory for every token: {memory_every_token}")
                 else:
                     print(f"=========Rest tokens cost average {self.rest_cost_mean:.4f} s ({len(last_token_time)}"
                           f" tokens in all)=========")
@@ -3133,7 +3136,8 @@ class BenchmarkWrapper:
                 if self.device.type == "xpu":
                     print(f"=========Rest tokens cost average {self.rest_cost_mean:.4f} s ({len(last_token_time)}"
                           f" tokens in all) and {np.max(memory_every_token[1:])} GB=========")
-                    print(f"Peak memory for every token: {memory_every_token}")
+                    if self.verbose:
+                        print(f"Peak memory for every token: {memory_every_token}")
                 else:
                     print(f"=========Rest tokens cost average {self.rest_cost_mean:.4f} s ({len(last_token_time)}"
                           f" tokens in all)=========")
@@ -3498,7 +3502,8 @@ class BenchmarkWrapper:
                 if self.device.type == "xpu":
                     print(f"=========Rest tokens cost average {self.rest_cost_mean:.4f} s ({len(last_token_time)}"
                           f" tokens in all) and {np.max(memory_every_token[1:])} GB=========")
-                    print(f"Peak memory for every token: {memory_every_token}")
+                    if self.verbose:
+                        print(f"Peak memory for every token: {memory_every_token}")
                 else:
                     print(f"=========Rest tokens cost average {self.rest_cost_mean:.4f} s ({len(last_token_time)}"
                           f" tokens in all)=========")