fix error for benchmark_util.py running on cpu (#9949)
This commit is contained in:
parent
fb91c97fe8
commit
6fb3f40f7e
1 changed files with 48 additions and 20 deletions
|
|
@ -2443,6 +2443,7 @@ class BenchmarkWrapper:
|
||||||
if self.device.type == "xpu":
|
if self.device.type == "xpu":
|
||||||
torch.xpu.synchronize()
|
torch.xpu.synchronize()
|
||||||
memory_every_token.append(torch.xpu.memory.memory_reserved() / (1024**3))
|
memory_every_token.append(torch.xpu.memory.memory_reserved() / (1024**3))
|
||||||
|
self.peak_memory = np.max(memory_every_token)
|
||||||
end = time.perf_counter()
|
end = time.perf_counter()
|
||||||
if first_token_time is None:
|
if first_token_time is None:
|
||||||
first_token_time = end - st
|
first_token_time = end - st
|
||||||
|
|
@ -2457,15 +2458,21 @@ class BenchmarkWrapper:
|
||||||
break
|
break
|
||||||
|
|
||||||
if self.do_print:
|
if self.do_print:
|
||||||
print(f"=========First token cost {first_token_time:.4f} s and {memory_every_token[0]} GB=========")
|
if self.device.type == "xpu":
|
||||||
|
print(f"=========First token cost {first_token_time:.4f} s and {memory_every_token[0]} GB=========")
|
||||||
|
else:
|
||||||
|
print(f"=========First token cost {first_token_time:.4f} s=========")
|
||||||
if len(last_token_time) > 1:
|
if len(last_token_time) > 1:
|
||||||
self.first_cost = first_token_time
|
self.first_cost = first_token_time
|
||||||
self.rest_cost_mean = np.mean(last_token_time)
|
self.rest_cost_mean = np.mean(last_token_time)
|
||||||
self.peak_memory = np.max(memory_every_token[1:])
|
|
||||||
if self.do_print:
|
if self.do_print:
|
||||||
print(f"=========Rest tokens cost average {self.rest_cost_mean:.4f} s ({len(last_token_time)}"
|
if self.device.type == "xpu":
|
||||||
f" tokens in all) and {self.peak_memory} GB=========")
|
print(f"=========Rest tokens cost average {self.rest_cost_mean:.4f} s ({len(last_token_time)}"
|
||||||
print(f"Peak memory for every token: {memory_every_token}")
|
f" tokens in all) and {np.max(memory_every_token[1:])} GB=========")
|
||||||
|
print(f"Peak memory for every token: {memory_every_token}")
|
||||||
|
else:
|
||||||
|
print(f"=========Rest tokens cost average {self.rest_cost_mean:.4f} s ({len(last_token_time)}"
|
||||||
|
f" tokens in all)=========")
|
||||||
|
|
||||||
if streamer is not None:
|
if streamer is not None:
|
||||||
streamer.end()
|
streamer.end()
|
||||||
|
|
@ -2750,6 +2757,7 @@ class BenchmarkWrapper:
|
||||||
if self.device.type == "xpu":
|
if self.device.type == "xpu":
|
||||||
torch.xpu.synchronize()
|
torch.xpu.synchronize()
|
||||||
memory_every_token.append(torch.xpu.memory.memory_reserved() / (1024 ** 3))
|
memory_every_token.append(torch.xpu.memory.memory_reserved() / (1024 ** 3))
|
||||||
|
self.peak_memory = np.max(memory_every_token)
|
||||||
end = time.perf_counter()
|
end = time.perf_counter()
|
||||||
if first_token_time is None:
|
if first_token_time is None:
|
||||||
first_token_time = end - st
|
first_token_time = end - st
|
||||||
|
|
@ -2764,15 +2772,21 @@ class BenchmarkWrapper:
|
||||||
break
|
break
|
||||||
|
|
||||||
if self.do_print:
|
if self.do_print:
|
||||||
print(f"=========First token cost {first_token_time:.4f} s and {memory_every_token[0]} GB=========")
|
if self.device.type == "xpu":
|
||||||
|
print(f"=========First token cost {first_token_time:.4f} s and {memory_every_token[0]} GB=========")
|
||||||
|
else:
|
||||||
|
print(f"=========First token cost {first_token_time:.4f} s=========")
|
||||||
if len(last_token_time) > 1:
|
if len(last_token_time) > 1:
|
||||||
self.first_cost = first_token_time
|
self.first_cost = first_token_time
|
||||||
self.rest_cost_mean = np.mean(last_token_time)
|
self.rest_cost_mean = np.mean(last_token_time)
|
||||||
self.peak_memory = np.max(memory_every_token[1:])
|
|
||||||
if self.do_print:
|
if self.do_print:
|
||||||
print(f"=========Rest tokens cost average {self.rest_cost_mean:.4f} s ({len(last_token_time)}"
|
if self.device.type == "xpu":
|
||||||
f" tokens in all) and {self.peak_memory} GB=========")
|
print(f"=========Rest tokens cost average {self.rest_cost_mean:.4f} s ({len(last_token_time)}"
|
||||||
print(f"Peak memory for every token: {memory_every_token}")
|
f" tokens in all) and {np.max(memory_every_token[1:])} GB=========")
|
||||||
|
print(f"Peak memory for every token: {memory_every_token}")
|
||||||
|
else:
|
||||||
|
print(f"=========Rest tokens cost average {self.rest_cost_mean:.4f} s ({len(last_token_time)}"
|
||||||
|
f" tokens in all)=========")
|
||||||
|
|
||||||
if streamer is not None:
|
if streamer is not None:
|
||||||
streamer.end()
|
streamer.end()
|
||||||
|
|
@ -3083,6 +3097,7 @@ class BenchmarkWrapper:
|
||||||
if self.device.type == "xpu":
|
if self.device.type == "xpu":
|
||||||
torch.xpu.synchronize()
|
torch.xpu.synchronize()
|
||||||
memory_every_token.append(torch.xpu.memory.memory_reserved() / (1024 ** 3))
|
memory_every_token.append(torch.xpu.memory.memory_reserved() / (1024 ** 3))
|
||||||
|
self.peak_memory = np.max(memory_every_token)
|
||||||
end = time.perf_counter()
|
end = time.perf_counter()
|
||||||
if first_token_time is None:
|
if first_token_time is None:
|
||||||
first_token_time = end - st
|
first_token_time = end - st
|
||||||
|
|
@ -3107,15 +3122,21 @@ class BenchmarkWrapper:
|
||||||
)
|
)
|
||||||
|
|
||||||
if self.do_print:
|
if self.do_print:
|
||||||
print(f"=========First token cost {first_token_time:.4f} s and {memory_every_token[0]} GB=========")
|
if self.device.type == "xpu":
|
||||||
|
print(f"=========First token cost {first_token_time:.4f} s and {memory_every_token[0]} GB=========")
|
||||||
|
else:
|
||||||
|
print(f"=========First token cost {first_token_time:.4f} s=========")
|
||||||
if len(last_token_time) > 1:
|
if len(last_token_time) > 1:
|
||||||
self.first_cost = first_token_time
|
self.first_cost = first_token_time
|
||||||
self.rest_cost_mean = np.mean(last_token_time)
|
self.rest_cost_mean = np.mean(last_token_time)
|
||||||
self.peak_memory = np.max(memory_every_token[1:])
|
|
||||||
if self.do_print:
|
if self.do_print:
|
||||||
print(f"=========Rest tokens cost average {self.rest_cost_mean:.4f} s ({len(last_token_time)}"
|
if self.device.type == "xpu":
|
||||||
f" tokens in all) and {self.peak_memory} GB=========")
|
print(f"=========Rest tokens cost average {self.rest_cost_mean:.4f} s ({len(last_token_time)}"
|
||||||
print(f"Peak memory for every token: {memory_every_token}")
|
f" tokens in all) and {np.max(memory_every_token[1:])} GB=========")
|
||||||
|
print(f"Peak memory for every token: {memory_every_token}")
|
||||||
|
else:
|
||||||
|
print(f"=========Rest tokens cost average {self.rest_cost_mean:.4f} s ({len(last_token_time)}"
|
||||||
|
f" tokens in all)=========")
|
||||||
|
|
||||||
if return_dict_in_generate:
|
if return_dict_in_generate:
|
||||||
if not output_scores:
|
if not output_scores:
|
||||||
|
|
@ -3447,6 +3468,7 @@ class BenchmarkWrapper:
|
||||||
if self.device.type == "xpu":
|
if self.device.type == "xpu":
|
||||||
torch.xpu.synchronize()
|
torch.xpu.synchronize()
|
||||||
memory_every_token.append(torch.xpu.memory.memory_reserved() / (1024 ** 3))
|
memory_every_token.append(torch.xpu.memory.memory_reserved() / (1024 ** 3))
|
||||||
|
self.peak_memory = np.max(memory_every_token)
|
||||||
end = time.perf_counter()
|
end = time.perf_counter()
|
||||||
if first_token_time is None:
|
if first_token_time is None:
|
||||||
first_token_time = end - st
|
first_token_time = end - st
|
||||||
|
|
@ -3465,15 +3487,21 @@ class BenchmarkWrapper:
|
||||||
)
|
)
|
||||||
|
|
||||||
if self.do_print:
|
if self.do_print:
|
||||||
print(f"=========First token cost {first_token_time:.4f} s and {memory_every_token[0]} GB=========")
|
if self.device.type == "xpu":
|
||||||
|
print(f"=========First token cost {first_token_time:.4f} s and {memory_every_token[0]} GB=========")
|
||||||
|
else:
|
||||||
|
print(f"=========First token cost {first_token_time:.4f} s=========")
|
||||||
if len(last_token_time) > 1:
|
if len(last_token_time) > 1:
|
||||||
self.first_cost = first_token_time
|
self.first_cost = first_token_time
|
||||||
self.rest_cost_mean = np.mean(last_token_time)
|
self.rest_cost_mean = np.mean(last_token_time)
|
||||||
self.peak_memory = np.max(memory_every_token[1:])
|
|
||||||
if self.do_print:
|
if self.do_print:
|
||||||
print(f"=========Rest tokens cost average {self.rest_cost_mean:.4f} s ({len(last_token_time)}"
|
if self.device.type == "xpu":
|
||||||
f" tokens in all) and {self.peak_memory} GB=========")
|
print(f"=========Rest tokens cost average {self.rest_cost_mean:.4f} s ({len(last_token_time)}"
|
||||||
print(f"Peak memory for every token: {memory_every_token}")
|
f" tokens in all) and {np.max(memory_every_token[1:])} GB=========")
|
||||||
|
print(f"Peak memory for every token: {memory_every_token}")
|
||||||
|
else:
|
||||||
|
print(f"=========Rest tokens cost average {self.rest_cost_mean:.4f} s ({len(last_token_time)}"
|
||||||
|
f" tokens in all)=========")
|
||||||
|
|
||||||
if return_dict_in_generate:
|
if return_dict_in_generate:
|
||||||
if not output_scores:
|
if not output_scores:
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue