Remove empty cache between each iteration of generation (#9660)

This commit is contained in:
Yuwen Hu 2023-12-12 17:24:06 +08:00 committed by GitHub
parent 0e639b920f
commit 968d99e6f5

View file

@ -733,7 +733,7 @@ def run_transformer_int4_gpu_win(repo_id,
if i >= warm_up: if i >= warm_up:
result[in_out].append([model.first_cost, model.rest_cost_mean, model.encoder_time, result[in_out].append([model.first_cost, model.rest_cost_mean, model.encoder_time,
actual_in_len, actual_out_len, gpu_peak_mem]) actual_in_len, actual_out_len, gpu_peak_mem])
torch.xpu.empty_cache() # torch.xpu.empty_cache() # this may make first token slower
except RuntimeError: except RuntimeError:
traceback.print_exc() traceback.print_exc()
pass pass