From 13e61738c57bebe5a164cd61ae2fe92945baeaec Mon Sep 17 00:00:00 2001 From: Xin Qiu Date: Tue, 30 Jan 2024 16:04:17 +0800 Subject: [PATCH] hide detail memory for each token in benchmark_utils.py (#10037) --- python/llm/dev/benchmark/benchmark_util.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/python/llm/dev/benchmark/benchmark_util.py b/python/llm/dev/benchmark/benchmark_util.py index 36f2bcdb..7d40de34 100644 --- a/python/llm/dev/benchmark/benchmark_util.py +++ b/python/llm/dev/benchmark/benchmark_util.py @@ -510,9 +510,10 @@ class BenchmarkWrapper: learn more about decoding strategies refer to the [text generation strategies guide](../generation_strategies). """ - def __init__(self, model, do_print=False): + def __init__(self, model, do_print=False, verbose=False): self.model = model self.do_print = do_print + self.verbose = verbose self.encoder_time = 0.0 self.first_cost = 0.0 self.rest_cost_mean = 0.0 @@ -2469,7 +2470,8 @@ class BenchmarkWrapper: if self.device.type == "xpu": print(f"=========Rest tokens cost average {self.rest_cost_mean:.4f} s ({len(last_token_time)}" f" tokens in all) and {np.max(memory_every_token[1:])} GB=========") - print(f"Peak memory for every token: {memory_every_token}") + if self.verbose: + print(f"Peak memory for every token: {memory_every_token}") else: print(f"=========Rest tokens cost average {self.rest_cost_mean:.4f} s ({len(last_token_time)}" f" tokens in all)=========") @@ -2783,7 +2785,8 @@ class BenchmarkWrapper: if self.device.type == "xpu": print(f"=========Rest tokens cost average {self.rest_cost_mean:.4f} s ({len(last_token_time)}" f" tokens in all) and {np.max(memory_every_token[1:])} GB=========") - print(f"Peak memory for every token: {memory_every_token}") + if self.verbose: + print(f"Peak memory for every token: {memory_every_token}") else: print(f"=========Rest tokens cost average {self.rest_cost_mean:.4f} s ({len(last_token_time)}" f" tokens in all)=========") @@ -3133,7 +3136,8 @@ class BenchmarkWrapper: if self.device.type == "xpu": print(f"=========Rest tokens cost average {self.rest_cost_mean:.4f} s ({len(last_token_time)}" f" tokens in all) and {np.max(memory_every_token[1:])} GB=========") - print(f"Peak memory for every token: {memory_every_token}") + if self.verbose: + print(f"Peak memory for every token: {memory_every_token}") else: print(f"=========Rest tokens cost average {self.rest_cost_mean:.4f} s ({len(last_token_time)}" f" tokens in all)=========") @@ -3498,7 +3502,8 @@ class BenchmarkWrapper: if self.device.type == "xpu": print(f"=========Rest tokens cost average {self.rest_cost_mean:.4f} s ({len(last_token_time)}" f" tokens in all) and {np.max(memory_every_token[1:])} GB=========") - print(f"Peak memory for every token: {memory_every_token}") + if self.verbose: + print(f"Peak memory for every token: {memory_every_token}") else: print(f"=========Rest tokens cost average {self.rest_cost_mean:.4f} s ({len(last_token_time)}" f" tokens in all)=========")