Remove empty cache between each iteration of generation (#9660)

2023-12-12 17:24:06 +08:00 · 2023-12-12 17:24:06 +08:00 · 968d99e6f5
commit 968d99e6f5
parent 0e639b920f
1 changed files with 1 additions and 1 deletions
--- a/python/llm/dev/benchmark/all-in-one/run.py
+++ b/python/llm/dev/benchmark/all-in-one/run.py
@ -733,7 +733,7 @@ def run_transformer_int4_gpu_win(repo_id,
                    if i >= warm_up:
                        result[in_out].append([model.first_cost, model.rest_cost_mean, model.encoder_time,
                                            actual_in_len, actual_out_len, gpu_peak_mem])
-                    torch.xpu.empty_cache()
+                    # torch.xpu.empty_cache() # this may make first token slower
            except RuntimeError:
                traceback.print_exc()
                pass