LLM: add device id to benchmark utils. (#10877)

2024-04-25 14:01:51 +08:00 · 2024-04-25 14:01:51 +08:00 · cd369c2715
commit cd369c2715
parent 1ce8d7bcd9
1 changed files with 4 additions and 4 deletions
--- a/python/llm/dev/benchmark/benchmark_util.py
+++ b/python/llm/dev/benchmark/benchmark_util.py
@ -2443,7 +2443,7 @@ class BenchmarkWrapper:

            if self.device.type == "xpu":
                torch.xpu.synchronize()
-                memory_every_token.append(torch.xpu.memory.memory_reserved() / (1024**3))
+                memory_every_token.append(torch.xpu.memory.memory_reserved(self.device) / (1024**3))
                self.peak_memory = np.max(memory_every_token)
            end = time.perf_counter()
            if first_token_time is None:
@ -2758,7 +2758,7 @@ class BenchmarkWrapper:

            if self.device.type == "xpu":
                torch.xpu.synchronize()
-                memory_every_token.append(torch.xpu.memory.memory_reserved() / (1024 ** 3))
+                memory_every_token.append(torch.xpu.memory.memory_reserved(self.device) / (1024 ** 3))
                self.peak_memory = np.max(memory_every_token)
            end = time.perf_counter()
            if first_token_time is None:
@ -3099,7 +3099,7 @@ class BenchmarkWrapper:

            if self.device.type == "xpu":
                torch.xpu.synchronize()
-                memory_every_token.append(torch.xpu.memory.memory_reserved() / (1024 ** 3))
+                memory_every_token.append(torch.xpu.memory.memory_reserved(self.device) / (1024 ** 3))
                self.peak_memory = np.max(memory_every_token)
            end = time.perf_counter()
            if first_token_time is None:
@ -3471,7 +3471,7 @@ class BenchmarkWrapper:

            if self.device.type == "xpu":
                torch.xpu.synchronize()
-                memory_every_token.append(torch.xpu.memory.memory_reserved() / (1024 ** 3))
+                memory_every_token.append(torch.xpu.memory.memory_reserved(self.device) / (1024 ** 3))
                self.peak_memory = np.max(memory_every_token)
            end = time.perf_counter()
            if first_token_time is None: