LLM: add empty cache in deepspeed autotp benchmark script (#10488)

2024-03-21 10:51:23 +08:00 · 2024-03-21 10:51:23 +08:00 · 85ef3f1d99
commit 85ef3f1d99
parent 5a5fd5af5b
1 changed files with 1 additions and 0 deletions
--- a/python/llm/dev/benchmark/all-in-one/run.py
+++ b/python/llm/dev/benchmark/all-in-one/run.py
@ -1519,6 +1519,7 @@ def run_deepspeed_optimize_model_gpu(repo_id,
                output = tokenizer.batch_decode(output_ids)
                actual_out_len = output_ids.shape[1] - actual_in_len
                print(output[0])
+                torch.xpu.empty_cache()
                if i >= warm_up:
                    result[in_out].append([model.first_cost, model.rest_cost_mean, model.encoder_time,
                                           actual_in_len, actual_out_len, load_time])