fix error for benchmark_util.py running on cpu (#9949)
This commit is contained in:
parent
fb91c97fe8
commit
6fb3f40f7e
1 changed files with 48 additions and 20 deletions
|
|
@ -2443,6 +2443,7 @@ class BenchmarkWrapper:
|
|||
if self.device.type == "xpu":
|
||||
torch.xpu.synchronize()
|
||||
memory_every_token.append(torch.xpu.memory.memory_reserved() / (1024**3))
|
||||
self.peak_memory = np.max(memory_every_token)
|
||||
end = time.perf_counter()
|
||||
if first_token_time is None:
|
||||
first_token_time = end - st
|
||||
|
|
@ -2457,15 +2458,21 @@ class BenchmarkWrapper:
|
|||
break
|
||||
|
||||
if self.do_print:
|
||||
print(f"=========First token cost {first_token_time:.4f} s and {memory_every_token[0]} GB=========")
|
||||
if self.device.type == "xpu":
|
||||
print(f"=========First token cost {first_token_time:.4f} s and {memory_every_token[0]} GB=========")
|
||||
else:
|
||||
print(f"=========First token cost {first_token_time:.4f} s=========")
|
||||
if len(last_token_time) > 1:
|
||||
self.first_cost = first_token_time
|
||||
self.rest_cost_mean = np.mean(last_token_time)
|
||||
self.peak_memory = np.max(memory_every_token[1:])
|
||||
if self.do_print:
|
||||
print(f"=========Rest tokens cost average {self.rest_cost_mean:.4f} s ({len(last_token_time)}"
|
||||
f" tokens in all) and {self.peak_memory} GB=========")
|
||||
print(f"Peak memory for every token: {memory_every_token}")
|
||||
if self.device.type == "xpu":
|
||||
print(f"=========Rest tokens cost average {self.rest_cost_mean:.4f} s ({len(last_token_time)}"
|
||||
f" tokens in all) and {np.max(memory_every_token[1:])} GB=========")
|
||||
print(f"Peak memory for every token: {memory_every_token}")
|
||||
else:
|
||||
print(f"=========Rest tokens cost average {self.rest_cost_mean:.4f} s ({len(last_token_time)}"
|
||||
f" tokens in all)=========")
|
||||
|
||||
if streamer is not None:
|
||||
streamer.end()
|
||||
|
|
@ -2750,6 +2757,7 @@ class BenchmarkWrapper:
|
|||
if self.device.type == "xpu":
|
||||
torch.xpu.synchronize()
|
||||
memory_every_token.append(torch.xpu.memory.memory_reserved() / (1024 ** 3))
|
||||
self.peak_memory = np.max(memory_every_token)
|
||||
end = time.perf_counter()
|
||||
if first_token_time is None:
|
||||
first_token_time = end - st
|
||||
|
|
@ -2764,15 +2772,21 @@ class BenchmarkWrapper:
|
|||
break
|
||||
|
||||
if self.do_print:
|
||||
print(f"=========First token cost {first_token_time:.4f} s and {memory_every_token[0]} GB=========")
|
||||
if self.device.type == "xpu":
|
||||
print(f"=========First token cost {first_token_time:.4f} s and {memory_every_token[0]} GB=========")
|
||||
else:
|
||||
print(f"=========First token cost {first_token_time:.4f} s=========")
|
||||
if len(last_token_time) > 1:
|
||||
self.first_cost = first_token_time
|
||||
self.rest_cost_mean = np.mean(last_token_time)
|
||||
self.peak_memory = np.max(memory_every_token[1:])
|
||||
if self.do_print:
|
||||
print(f"=========Rest tokens cost average {self.rest_cost_mean:.4f} s ({len(last_token_time)}"
|
||||
f" tokens in all) and {self.peak_memory} GB=========")
|
||||
print(f"Peak memory for every token: {memory_every_token}")
|
||||
if self.device.type == "xpu":
|
||||
print(f"=========Rest tokens cost average {self.rest_cost_mean:.4f} s ({len(last_token_time)}"
|
||||
f" tokens in all) and {np.max(memory_every_token[1:])} GB=========")
|
||||
print(f"Peak memory for every token: {memory_every_token}")
|
||||
else:
|
||||
print(f"=========Rest tokens cost average {self.rest_cost_mean:.4f} s ({len(last_token_time)}"
|
||||
f" tokens in all)=========")
|
||||
|
||||
if streamer is not None:
|
||||
streamer.end()
|
||||
|
|
@ -3083,6 +3097,7 @@ class BenchmarkWrapper:
|
|||
if self.device.type == "xpu":
|
||||
torch.xpu.synchronize()
|
||||
memory_every_token.append(torch.xpu.memory.memory_reserved() / (1024 ** 3))
|
||||
self.peak_memory = np.max(memory_every_token)
|
||||
end = time.perf_counter()
|
||||
if first_token_time is None:
|
||||
first_token_time = end - st
|
||||
|
|
@ -3107,15 +3122,21 @@ class BenchmarkWrapper:
|
|||
)
|
||||
|
||||
if self.do_print:
|
||||
print(f"=========First token cost {first_token_time:.4f} s and {memory_every_token[0]} GB=========")
|
||||
if self.device.type == "xpu":
|
||||
print(f"=========First token cost {first_token_time:.4f} s and {memory_every_token[0]} GB=========")
|
||||
else:
|
||||
print(f"=========First token cost {first_token_time:.4f} s=========")
|
||||
if len(last_token_time) > 1:
|
||||
self.first_cost = first_token_time
|
||||
self.rest_cost_mean = np.mean(last_token_time)
|
||||
self.peak_memory = np.max(memory_every_token[1:])
|
||||
if self.do_print:
|
||||
print(f"=========Rest tokens cost average {self.rest_cost_mean:.4f} s ({len(last_token_time)}"
|
||||
f" tokens in all) and {self.peak_memory} GB=========")
|
||||
print(f"Peak memory for every token: {memory_every_token}")
|
||||
if self.device.type == "xpu":
|
||||
print(f"=========Rest tokens cost average {self.rest_cost_mean:.4f} s ({len(last_token_time)}"
|
||||
f" tokens in all) and {np.max(memory_every_token[1:])} GB=========")
|
||||
print(f"Peak memory for every token: {memory_every_token}")
|
||||
else:
|
||||
print(f"=========Rest tokens cost average {self.rest_cost_mean:.4f} s ({len(last_token_time)}"
|
||||
f" tokens in all)=========")
|
||||
|
||||
if return_dict_in_generate:
|
||||
if not output_scores:
|
||||
|
|
@ -3447,6 +3468,7 @@ class BenchmarkWrapper:
|
|||
if self.device.type == "xpu":
|
||||
torch.xpu.synchronize()
|
||||
memory_every_token.append(torch.xpu.memory.memory_reserved() / (1024 ** 3))
|
||||
self.peak_memory = np.max(memory_every_token)
|
||||
end = time.perf_counter()
|
||||
if first_token_time is None:
|
||||
first_token_time = end - st
|
||||
|
|
@ -3465,15 +3487,21 @@ class BenchmarkWrapper:
|
|||
)
|
||||
|
||||
if self.do_print:
|
||||
print(f"=========First token cost {first_token_time:.4f} s and {memory_every_token[0]} GB=========")
|
||||
if self.device.type == "xpu":
|
||||
print(f"=========First token cost {first_token_time:.4f} s and {memory_every_token[0]} GB=========")
|
||||
else:
|
||||
print(f"=========First token cost {first_token_time:.4f} s=========")
|
||||
if len(last_token_time) > 1:
|
||||
self.first_cost = first_token_time
|
||||
self.rest_cost_mean = np.mean(last_token_time)
|
||||
self.peak_memory = np.max(memory_every_token[1:])
|
||||
if self.do_print:
|
||||
print(f"=========Rest tokens cost average {self.rest_cost_mean:.4f} s ({len(last_token_time)}"
|
||||
f" tokens in all) and {self.peak_memory} GB=========")
|
||||
print(f"Peak memory for every token: {memory_every_token}")
|
||||
if self.device.type == "xpu":
|
||||
print(f"=========Rest tokens cost average {self.rest_cost_mean:.4f} s ({len(last_token_time)}"
|
||||
f" tokens in all) and {np.max(memory_every_token[1:])} GB=========")
|
||||
print(f"Peak memory for every token: {memory_every_token}")
|
||||
else:
|
||||
print(f"=========Rest tokens cost average {self.rest_cost_mean:.4f} s ({len(last_token_time)}"
|
||||
f" tokens in all)=========")
|
||||
|
||||
if return_dict_in_generate:
|
||||
if not output_scores:
|
||||
|
|
|
|||
Loading…
Reference in a new issue