hide detail memory for each token in benchmark_utils.py (#10037)

This commit is contained in:
Xin Qiu 2024-01-30 16:04:17 +08:00 committed by GitHub
parent 6b63ba23d1
commit 13e61738c5

View file

@ -510,9 +510,10 @@ class BenchmarkWrapper:
learn more about decoding strategies refer to the [text generation strategies guide](../generation_strategies). learn more about decoding strategies refer to the [text generation strategies guide](../generation_strategies).
""" """
def __init__(self, model, do_print=False): def __init__(self, model, do_print=False, verbose=False):
self.model = model self.model = model
self.do_print = do_print self.do_print = do_print
self.verbose = verbose
self.encoder_time = 0.0 self.encoder_time = 0.0
self.first_cost = 0.0 self.first_cost = 0.0
self.rest_cost_mean = 0.0 self.rest_cost_mean = 0.0
@ -2469,6 +2470,7 @@ class BenchmarkWrapper:
if self.device.type == "xpu": if self.device.type == "xpu":
print(f"=========Rest tokens cost average {self.rest_cost_mean:.4f} s ({len(last_token_time)}" print(f"=========Rest tokens cost average {self.rest_cost_mean:.4f} s ({len(last_token_time)}"
f" tokens in all) and {np.max(memory_every_token[1:])} GB=========") f" tokens in all) and {np.max(memory_every_token[1:])} GB=========")
if self.verbose:
print(f"Peak memory for every token: {memory_every_token}") print(f"Peak memory for every token: {memory_every_token}")
else: else:
print(f"=========Rest tokens cost average {self.rest_cost_mean:.4f} s ({len(last_token_time)}" print(f"=========Rest tokens cost average {self.rest_cost_mean:.4f} s ({len(last_token_time)}"
@ -2783,6 +2785,7 @@ class BenchmarkWrapper:
if self.device.type == "xpu": if self.device.type == "xpu":
print(f"=========Rest tokens cost average {self.rest_cost_mean:.4f} s ({len(last_token_time)}" print(f"=========Rest tokens cost average {self.rest_cost_mean:.4f} s ({len(last_token_time)}"
f" tokens in all) and {np.max(memory_every_token[1:])} GB=========") f" tokens in all) and {np.max(memory_every_token[1:])} GB=========")
if self.verbose:
print(f"Peak memory for every token: {memory_every_token}") print(f"Peak memory for every token: {memory_every_token}")
else: else:
print(f"=========Rest tokens cost average {self.rest_cost_mean:.4f} s ({len(last_token_time)}" print(f"=========Rest tokens cost average {self.rest_cost_mean:.4f} s ({len(last_token_time)}"
@ -3133,6 +3136,7 @@ class BenchmarkWrapper:
if self.device.type == "xpu": if self.device.type == "xpu":
print(f"=========Rest tokens cost average {self.rest_cost_mean:.4f} s ({len(last_token_time)}" print(f"=========Rest tokens cost average {self.rest_cost_mean:.4f} s ({len(last_token_time)}"
f" tokens in all) and {np.max(memory_every_token[1:])} GB=========") f" tokens in all) and {np.max(memory_every_token[1:])} GB=========")
if self.verbose:
print(f"Peak memory for every token: {memory_every_token}") print(f"Peak memory for every token: {memory_every_token}")
else: else:
print(f"=========Rest tokens cost average {self.rest_cost_mean:.4f} s ({len(last_token_time)}" print(f"=========Rest tokens cost average {self.rest_cost_mean:.4f} s ({len(last_token_time)}"
@ -3498,6 +3502,7 @@ class BenchmarkWrapper:
if self.device.type == "xpu": if self.device.type == "xpu":
print(f"=========Rest tokens cost average {self.rest_cost_mean:.4f} s ({len(last_token_time)}" print(f"=========Rest tokens cost average {self.rest_cost_mean:.4f} s ({len(last_token_time)}"
f" tokens in all) and {np.max(memory_every_token[1:])} GB=========") f" tokens in all) and {np.max(memory_every_token[1:])} GB=========")
if self.verbose:
print(f"Peak memory for every token: {memory_every_token}") print(f"Peak memory for every token: {memory_every_token}")
else: else:
print(f"=========Rest tokens cost average {self.rest_cost_mean:.4f} s ({len(last_token_time)}" print(f"=========Rest tokens cost average {self.rest_cost_mean:.4f} s ({len(last_token_time)}"