diff --git a/python/llm/dev/benchmark/all-in-one/run.py b/python/llm/dev/benchmark/all-in-one/run.py index b88e5311..3a9b5347 100644 --- a/python/llm/dev/benchmark/all-in-one/run.py +++ b/python/llm/dev/benchmark/all-in-one/run.py @@ -1519,6 +1519,7 @@ def run_deepspeed_optimize_model_gpu(repo_id, output = tokenizer.batch_decode(output_ids) actual_out_len = output_ids.shape[1] - actual_in_len print(output[0]) + torch.xpu.empty_cache() if i >= warm_up: result[in_out].append([model.first_cost, model.rest_cost_mean, model.encoder_time, actual_in_len, actual_out_len, load_time])