LLM: add empty cache in deepspeed autotp benchmark script (#10488)
This commit is contained in:
		
							parent
							
								
									5a5fd5af5b
								
							
						
					
					
						commit
						85ef3f1d99
					
				
					 1 changed files with 1 additions and 0 deletions
				
			
		| 
						 | 
				
			
			@ -1519,6 +1519,7 @@ def run_deepspeed_optimize_model_gpu(repo_id,
 | 
			
		|||
                output = tokenizer.batch_decode(output_ids)
 | 
			
		||||
                actual_out_len = output_ids.shape[1] - actual_in_len
 | 
			
		||||
                print(output[0])
 | 
			
		||||
                torch.xpu.empty_cache()
 | 
			
		||||
                if i >= warm_up:
 | 
			
		||||
                    result[in_out].append([model.first_cost, model.rest_cost_mean, model.encoder_time,
 | 
			
		||||
                                           actual_in_len, actual_out_len, load_time])
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in a new issue