fix error for benchmark_util.py running on cpu (#9949)

This commit is contained in:
Xin Qiu 2024-01-22 10:14:40 +08:00 committed by GitHub
parent fb91c97fe8
commit 6fb3f40f7e

View file

@ -2443,6 +2443,7 @@ class BenchmarkWrapper:
if self.device.type == "xpu":
torch.xpu.synchronize()
memory_every_token.append(torch.xpu.memory.memory_reserved() / (1024**3))
self.peak_memory = np.max(memory_every_token)
end = time.perf_counter()
if first_token_time is None:
first_token_time = end - st
@ -2457,15 +2458,21 @@ class BenchmarkWrapper:
break
if self.do_print:
print(f"=========First token cost {first_token_time:.4f} s and {memory_every_token[0]} GB=========")
if self.device.type == "xpu":
print(f"=========First token cost {first_token_time:.4f} s and {memory_every_token[0]} GB=========")
else:
print(f"=========First token cost {first_token_time:.4f} s=========")
if len(last_token_time) > 1:
self.first_cost = first_token_time
self.rest_cost_mean = np.mean(last_token_time)
self.peak_memory = np.max(memory_every_token[1:])
if self.do_print:
print(f"=========Rest tokens cost average {self.rest_cost_mean:.4f} s ({len(last_token_time)}"
f" tokens in all) and {self.peak_memory} GB=========")
print(f"Peak memory for every token: {memory_every_token}")
if self.device.type == "xpu":
print(f"=========Rest tokens cost average {self.rest_cost_mean:.4f} s ({len(last_token_time)}"
f" tokens in all) and {np.max(memory_every_token[1:])} GB=========")
print(f"Peak memory for every token: {memory_every_token}")
else:
print(f"=========Rest tokens cost average {self.rest_cost_mean:.4f} s ({len(last_token_time)}"
f" tokens in all)=========")
if streamer is not None:
streamer.end()
@ -2750,6 +2757,7 @@ class BenchmarkWrapper:
if self.device.type == "xpu":
torch.xpu.synchronize()
memory_every_token.append(torch.xpu.memory.memory_reserved() / (1024 ** 3))
self.peak_memory = np.max(memory_every_token)
end = time.perf_counter()
if first_token_time is None:
first_token_time = end - st
@ -2764,15 +2772,21 @@ class BenchmarkWrapper:
break
if self.do_print:
print(f"=========First token cost {first_token_time:.4f} s and {memory_every_token[0]} GB=========")
if self.device.type == "xpu":
print(f"=========First token cost {first_token_time:.4f} s and {memory_every_token[0]} GB=========")
else:
print(f"=========First token cost {first_token_time:.4f} s=========")
if len(last_token_time) > 1:
self.first_cost = first_token_time
self.rest_cost_mean = np.mean(last_token_time)
self.peak_memory = np.max(memory_every_token[1:])
if self.do_print:
print(f"=========Rest tokens cost average {self.rest_cost_mean:.4f} s ({len(last_token_time)}"
f" tokens in all) and {self.peak_memory} GB=========")
print(f"Peak memory for every token: {memory_every_token}")
if self.device.type == "xpu":
print(f"=========Rest tokens cost average {self.rest_cost_mean:.4f} s ({len(last_token_time)}"
f" tokens in all) and {np.max(memory_every_token[1:])} GB=========")
print(f"Peak memory for every token: {memory_every_token}")
else:
print(f"=========Rest tokens cost average {self.rest_cost_mean:.4f} s ({len(last_token_time)}"
f" tokens in all)=========")
if streamer is not None:
streamer.end()
@ -3083,6 +3097,7 @@ class BenchmarkWrapper:
if self.device.type == "xpu":
torch.xpu.synchronize()
memory_every_token.append(torch.xpu.memory.memory_reserved() / (1024 ** 3))
self.peak_memory = np.max(memory_every_token)
end = time.perf_counter()
if first_token_time is None:
first_token_time = end - st
@ -3107,15 +3122,21 @@ class BenchmarkWrapper:
)
if self.do_print:
print(f"=========First token cost {first_token_time:.4f} s and {memory_every_token[0]} GB=========")
if self.device.type == "xpu":
print(f"=========First token cost {first_token_time:.4f} s and {memory_every_token[0]} GB=========")
else:
print(f"=========First token cost {first_token_time:.4f} s=========")
if len(last_token_time) > 1:
self.first_cost = first_token_time
self.rest_cost_mean = np.mean(last_token_time)
self.peak_memory = np.max(memory_every_token[1:])
if self.do_print:
print(f"=========Rest tokens cost average {self.rest_cost_mean:.4f} s ({len(last_token_time)}"
f" tokens in all) and {self.peak_memory} GB=========")
print(f"Peak memory for every token: {memory_every_token}")
if self.device.type == "xpu":
print(f"=========Rest tokens cost average {self.rest_cost_mean:.4f} s ({len(last_token_time)}"
f" tokens in all) and {np.max(memory_every_token[1:])} GB=========")
print(f"Peak memory for every token: {memory_every_token}")
else:
print(f"=========Rest tokens cost average {self.rest_cost_mean:.4f} s ({len(last_token_time)}"
f" tokens in all)=========")
if return_dict_in_generate:
if not output_scores:
@ -3447,6 +3468,7 @@ class BenchmarkWrapper:
if self.device.type == "xpu":
torch.xpu.synchronize()
memory_every_token.append(torch.xpu.memory.memory_reserved() / (1024 ** 3))
self.peak_memory = np.max(memory_every_token)
end = time.perf_counter()
if first_token_time is None:
first_token_time = end - st
@ -3465,15 +3487,21 @@ class BenchmarkWrapper:
)
if self.do_print:
print(f"=========First token cost {first_token_time:.4f} s and {memory_every_token[0]} GB=========")
if self.device.type == "xpu":
print(f"=========First token cost {first_token_time:.4f} s and {memory_every_token[0]} GB=========")
else:
print(f"=========First token cost {first_token_time:.4f} s=========")
if len(last_token_time) > 1:
self.first_cost = first_token_time
self.rest_cost_mean = np.mean(last_token_time)
self.peak_memory = np.max(memory_every_token[1:])
if self.do_print:
print(f"=========Rest tokens cost average {self.rest_cost_mean:.4f} s ({len(last_token_time)}"
f" tokens in all) and {self.peak_memory} GB=========")
print(f"Peak memory for every token: {memory_every_token}")
if self.device.type == "xpu":
print(f"=========Rest tokens cost average {self.rest_cost_mean:.4f} s ({len(last_token_time)}"
f" tokens in all) and {np.max(memory_every_token[1:])} GB=========")
print(f"Peak memory for every token: {memory_every_token}")
else:
print(f"=========Rest tokens cost average {self.rest_cost_mean:.4f} s ({len(last_token_time)}"
f" tokens in all)=========")
if return_dict_in_generate:
if not output_scores: