diff --git a/python/llm/dev/benchmark/all-in-one/run.py b/python/llm/dev/benchmark/all-in-one/run.py index 86d48572..230ca017 100644 --- a/python/llm/dev/benchmark/all-in-one/run.py +++ b/python/llm/dev/benchmark/all-in-one/run.py @@ -394,6 +394,7 @@ def run_transformer_int4_gpu(repo_id, actual_in_len, actual_out_len]) except RuntimeError: pass + del model torch.xpu.empty_cache() return result @@ -473,6 +474,7 @@ def run_optimize_model_gpu(repo_id, if i >= warm_up: result[in_out].append([model.first_cost, model.rest_cost_mean, model.encoder_time, actual_in_len, actual_out_len]) + del model torch.xpu.empty_cache() return result @@ -544,6 +546,7 @@ def run_ipex_fp16_gpu(repo_id, if i >= warm_up: result[in_out].append([model.first_cost, model.rest_cost_mean, model.encoder_time, actual_in_len, actual_out_len]) + del model torch.xpu.empty_cache() return result