Remove empty cache between each iteration of generation (#9660)
This commit is contained in:
parent
0e639b920f
commit
968d99e6f5
1 changed files with 1 additions and 1 deletions
|
|
@ -733,7 +733,7 @@ def run_transformer_int4_gpu_win(repo_id,
|
||||||
if i >= warm_up:
|
if i >= warm_up:
|
||||||
result[in_out].append([model.first_cost, model.rest_cost_mean, model.encoder_time,
|
result[in_out].append([model.first_cost, model.rest_cost_mean, model.encoder_time,
|
||||||
actual_in_len, actual_out_len, gpu_peak_mem])
|
actual_in_len, actual_out_len, gpu_peak_mem])
|
||||||
torch.xpu.empty_cache()
|
# torch.xpu.empty_cache() # this may make first token slower
|
||||||
except RuntimeError:
|
except RuntimeError:
|
||||||
traceback.print_exc()
|
traceback.print_exc()
|
||||||
pass
|
pass
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue