fix error for benchmark_util.py running on cpu (#9949)
This commit is contained in:
		
							parent
							
								
									fb91c97fe8
								
							
						
					
					
						commit
						6fb3f40f7e
					
				
					 1 changed files with 48 additions and 20 deletions
				
			
		| 
						 | 
				
			
			@ -2443,6 +2443,7 @@ class BenchmarkWrapper:
 | 
			
		|||
            if self.device.type == "xpu":
 | 
			
		||||
                torch.xpu.synchronize()
 | 
			
		||||
                memory_every_token.append(torch.xpu.memory.memory_reserved() / (1024**3))
 | 
			
		||||
                self.peak_memory = np.max(memory_every_token)
 | 
			
		||||
            end = time.perf_counter()
 | 
			
		||||
            if first_token_time is None:
 | 
			
		||||
                first_token_time = end - st
 | 
			
		||||
| 
						 | 
				
			
			@ -2457,15 +2458,21 @@ class BenchmarkWrapper:
 | 
			
		|||
                break
 | 
			
		||||
 | 
			
		||||
        if self.do_print:
 | 
			
		||||
            if self.device.type == "xpu":
 | 
			
		||||
                print(f"=========First token cost {first_token_time:.4f} s and {memory_every_token[0]} GB=========")
 | 
			
		||||
            else:
 | 
			
		||||
                print(f"=========First token cost {first_token_time:.4f} s=========")
 | 
			
		||||
        if len(last_token_time) > 1:
 | 
			
		||||
            self.first_cost = first_token_time
 | 
			
		||||
            self.rest_cost_mean = np.mean(last_token_time)
 | 
			
		||||
            self.peak_memory = np.max(memory_every_token[1:])
 | 
			
		||||
            if self.do_print:
 | 
			
		||||
                if self.device.type == "xpu":
 | 
			
		||||
                    print(f"=========Rest tokens cost average {self.rest_cost_mean:.4f} s ({len(last_token_time)}"
 | 
			
		||||
                      f" tokens in all) and {self.peak_memory} GB=========")
 | 
			
		||||
                          f" tokens in all) and {np.max(memory_every_token[1:])} GB=========")
 | 
			
		||||
                    print(f"Peak memory for every token: {memory_every_token}")
 | 
			
		||||
                else:
 | 
			
		||||
                    print(f"=========Rest tokens cost average {self.rest_cost_mean:.4f} s ({len(last_token_time)}"
 | 
			
		||||
                          f" tokens in all)=========")
 | 
			
		||||
 | 
			
		||||
        if streamer is not None:
 | 
			
		||||
            streamer.end()
 | 
			
		||||
| 
						 | 
				
			
			@ -2750,6 +2757,7 @@ class BenchmarkWrapper:
 | 
			
		|||
            if self.device.type == "xpu":
 | 
			
		||||
                torch.xpu.synchronize()
 | 
			
		||||
                memory_every_token.append(torch.xpu.memory.memory_reserved() / (1024 ** 3))
 | 
			
		||||
                self.peak_memory = np.max(memory_every_token)
 | 
			
		||||
            end = time.perf_counter()
 | 
			
		||||
            if first_token_time is None:
 | 
			
		||||
                first_token_time = end - st
 | 
			
		||||
| 
						 | 
				
			
			@ -2764,15 +2772,21 @@ class BenchmarkWrapper:
 | 
			
		|||
                break
 | 
			
		||||
 | 
			
		||||
        if self.do_print:
 | 
			
		||||
            if self.device.type == "xpu":
 | 
			
		||||
                print(f"=========First token cost {first_token_time:.4f} s and {memory_every_token[0]} GB=========")
 | 
			
		||||
            else:
 | 
			
		||||
                print(f"=========First token cost {first_token_time:.4f} s=========")
 | 
			
		||||
        if len(last_token_time) > 1:
 | 
			
		||||
            self.first_cost = first_token_time
 | 
			
		||||
            self.rest_cost_mean = np.mean(last_token_time)
 | 
			
		||||
            self.peak_memory = np.max(memory_every_token[1:])
 | 
			
		||||
            if self.do_print:
 | 
			
		||||
                if self.device.type == "xpu":
 | 
			
		||||
                    print(f"=========Rest tokens cost average {self.rest_cost_mean:.4f} s ({len(last_token_time)}"
 | 
			
		||||
                      f" tokens in all) and {self.peak_memory} GB=========")
 | 
			
		||||
                          f" tokens in all) and {np.max(memory_every_token[1:])} GB=========")
 | 
			
		||||
                    print(f"Peak memory for every token: {memory_every_token}")
 | 
			
		||||
                else:
 | 
			
		||||
                    print(f"=========Rest tokens cost average {self.rest_cost_mean:.4f} s ({len(last_token_time)}"
 | 
			
		||||
                          f" tokens in all)=========")
 | 
			
		||||
 | 
			
		||||
        if streamer is not None:
 | 
			
		||||
            streamer.end()
 | 
			
		||||
| 
						 | 
				
			
			@ -3083,6 +3097,7 @@ class BenchmarkWrapper:
 | 
			
		|||
            if self.device.type == "xpu":
 | 
			
		||||
                torch.xpu.synchronize()
 | 
			
		||||
                memory_every_token.append(torch.xpu.memory.memory_reserved() / (1024 ** 3))
 | 
			
		||||
                self.peak_memory = np.max(memory_every_token)
 | 
			
		||||
            end = time.perf_counter()
 | 
			
		||||
            if first_token_time is None:
 | 
			
		||||
                first_token_time = end - st
 | 
			
		||||
| 
						 | 
				
			
			@ -3107,15 +3122,21 @@ class BenchmarkWrapper:
 | 
			
		|||
        )
 | 
			
		||||
 | 
			
		||||
        if self.do_print:
 | 
			
		||||
            if self.device.type == "xpu":
 | 
			
		||||
                print(f"=========First token cost {first_token_time:.4f} s and {memory_every_token[0]} GB=========")
 | 
			
		||||
            else:
 | 
			
		||||
                print(f"=========First token cost {first_token_time:.4f} s=========")
 | 
			
		||||
        if len(last_token_time) > 1:
 | 
			
		||||
            self.first_cost = first_token_time
 | 
			
		||||
            self.rest_cost_mean = np.mean(last_token_time)
 | 
			
		||||
            self.peak_memory = np.max(memory_every_token[1:])
 | 
			
		||||
            if self.do_print:
 | 
			
		||||
                if self.device.type == "xpu":
 | 
			
		||||
                    print(f"=========Rest tokens cost average {self.rest_cost_mean:.4f} s ({len(last_token_time)}"
 | 
			
		||||
                      f" tokens in all) and {self.peak_memory} GB=========")
 | 
			
		||||
                          f" tokens in all) and {np.max(memory_every_token[1:])} GB=========")
 | 
			
		||||
                    print(f"Peak memory for every token: {memory_every_token}")
 | 
			
		||||
                else:
 | 
			
		||||
                    print(f"=========Rest tokens cost average {self.rest_cost_mean:.4f} s ({len(last_token_time)}"
 | 
			
		||||
                          f" tokens in all)=========")
 | 
			
		||||
 | 
			
		||||
        if return_dict_in_generate:
 | 
			
		||||
            if not output_scores:
 | 
			
		||||
| 
						 | 
				
			
			@ -3447,6 +3468,7 @@ class BenchmarkWrapper:
 | 
			
		|||
            if self.device.type == "xpu":
 | 
			
		||||
                torch.xpu.synchronize()
 | 
			
		||||
                memory_every_token.append(torch.xpu.memory.memory_reserved() / (1024 ** 3))
 | 
			
		||||
                self.peak_memory = np.max(memory_every_token)
 | 
			
		||||
            end = time.perf_counter()
 | 
			
		||||
            if first_token_time is None:
 | 
			
		||||
                first_token_time = end - st
 | 
			
		||||
| 
						 | 
				
			
			@ -3465,15 +3487,21 @@ class BenchmarkWrapper:
 | 
			
		|||
        )
 | 
			
		||||
 | 
			
		||||
        if self.do_print:
 | 
			
		||||
            if self.device.type == "xpu":
 | 
			
		||||
                print(f"=========First token cost {first_token_time:.4f} s and {memory_every_token[0]} GB=========")
 | 
			
		||||
            else:
 | 
			
		||||
                print(f"=========First token cost {first_token_time:.4f} s=========")
 | 
			
		||||
        if len(last_token_time) > 1:
 | 
			
		||||
            self.first_cost = first_token_time
 | 
			
		||||
            self.rest_cost_mean = np.mean(last_token_time)
 | 
			
		||||
            self.peak_memory = np.max(memory_every_token[1:])
 | 
			
		||||
            if self.do_print:
 | 
			
		||||
                if self.device.type == "xpu":
 | 
			
		||||
                    print(f"=========Rest tokens cost average {self.rest_cost_mean:.4f} s ({len(last_token_time)}"
 | 
			
		||||
                      f" tokens in all) and {self.peak_memory} GB=========")
 | 
			
		||||
                          f" tokens in all) and {np.max(memory_every_token[1:])} GB=========")
 | 
			
		||||
                    print(f"Peak memory for every token: {memory_every_token}")
 | 
			
		||||
                else:
 | 
			
		||||
                    print(f"=========Rest tokens cost average {self.rest_cost_mean:.4f} s ({len(last_token_time)}"
 | 
			
		||||
                          f" tokens in all)=========")
 | 
			
		||||
 | 
			
		||||
        if return_dict_in_generate:
 | 
			
		||||
            if not output_scores:
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in a new issue