LLM: add empty cache in deepspeed autotp benchmark script (#10488)
This commit is contained in:
parent
5a5fd5af5b
commit
85ef3f1d99
1 changed files with 1 additions and 0 deletions
|
|
@ -1519,6 +1519,7 @@ def run_deepspeed_optimize_model_gpu(repo_id,
|
||||||
output = tokenizer.batch_decode(output_ids)
|
output = tokenizer.batch_decode(output_ids)
|
||||||
actual_out_len = output_ids.shape[1] - actual_in_len
|
actual_out_len = output_ids.shape[1] - actual_in_len
|
||||||
print(output[0])
|
print(output[0])
|
||||||
|
torch.xpu.empty_cache()
|
||||||
if i >= warm_up:
|
if i >= warm_up:
|
||||||
result[in_out].append([model.first_cost, model.rest_cost_mean, model.encoder_time,
|
result[in_out].append([model.first_cost, model.rest_cost_mean, model.encoder_time,
|
||||||
actual_in_len, actual_out_len, load_time])
|
actual_in_len, actual_out_len, load_time])
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue