hide detail memory for each token in benchmark_utils.py (#10037)

2024-01-30 16:04:17 +08:00 · 2024-01-30 16:04:17 +08:00 · 13e61738c5
commit 13e61738c5
parent 6b63ba23d1
1 changed files with 10 additions and 5 deletions
--- a/python/llm/dev/benchmark/benchmark_util.py
+++ b/python/llm/dev/benchmark/benchmark_util.py
@ -510,9 +510,10 @@ class BenchmarkWrapper:
    learn more about decoding strategies refer to the [text generation strategies guide](../generation_strategies).
    """
-    def __init__(self, model, do_print=False):
+    def __init__(self, model, do_print=False, verbose=False):
        self.model = model
        self.do_print = do_print
        self.verbose = verbose
        self.encoder_time = 0.0
        self.first_cost = 0.0
        self.rest_cost_mean = 0.0
@ -2469,6 +2470,7 @@ class BenchmarkWrapper:
                if self.device.type == "xpu":
                    print(f"=========Rest tokens cost average {self.rest_cost_mean:.4f} s ({len(last_token_time)}"
                          f" tokens in all) and {np.max(memory_every_token[1:])} GB=========")
                    if self.verbose:
                        print(f"Peak memory for every token: {memory_every_token}")
                else:
                    print(f"=========Rest tokens cost average {self.rest_cost_mean:.4f} s ({len(last_token_time)}"
@ -2783,6 +2785,7 @@ class BenchmarkWrapper:
                if self.device.type == "xpu":
                    print(f"=========Rest tokens cost average {self.rest_cost_mean:.4f} s ({len(last_token_time)}"
                          f" tokens in all) and {np.max(memory_every_token[1:])} GB=========")
                    if self.verbose:
                        print(f"Peak memory for every token: {memory_every_token}")
                else:
                    print(f"=========Rest tokens cost average {self.rest_cost_mean:.4f} s ({len(last_token_time)}"
@ -3133,6 +3136,7 @@ class BenchmarkWrapper:
                if self.device.type == "xpu":
                    print(f"=========Rest tokens cost average {self.rest_cost_mean:.4f} s ({len(last_token_time)}"
                          f" tokens in all) and {np.max(memory_every_token[1:])} GB=========")
                    if self.verbose:
                        print(f"Peak memory for every token: {memory_every_token}")
                else:
                    print(f"=========Rest tokens cost average {self.rest_cost_mean:.4f} s ({len(last_token_time)}"
@ -3498,6 +3502,7 @@ class BenchmarkWrapper:
                if self.device.type == "xpu":
                    print(f"=========Rest tokens cost average {self.rest_cost_mean:.4f} s ({len(last_token_time)}"
                          f" tokens in all) and {np.max(memory_every_token[1:])} GB=========")
                    if self.verbose:
                        print(f"Peak memory for every token: {memory_every_token}")
                else:
                    print(f"=========Rest tokens cost average {self.rest_cost_mean:.4f} s ({len(last_token_time)}"