From 7f7f6c89f580bc24285d134a058584bb85171786 Mon Sep 17 00:00:00 2001 From: binbin Deng <108676127+plusbang@users.noreply.github.com> Date: Tue, 27 Aug 2024 15:29:27 +0800 Subject: [PATCH] Quick fix benchmark script (#11938) --- python/llm/dev/benchmark/all-in-one/run.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/llm/dev/benchmark/all-in-one/run.py b/python/llm/dev/benchmark/all-in-one/run.py index 3dcb4011..4b5a64d6 100644 --- a/python/llm/dev/benchmark/all-in-one/run.py +++ b/python/llm/dev/benchmark/all-in-one/run.py @@ -615,9 +615,9 @@ def transformers_int4_npu_win(repo_id, # which convert the relevant layers in the model into INT4 format st = time.perf_counter() if repo_id in CHATGLM_IDS: - model = AutoModel.from_pretrained(model_path, load_in_low_bit=low_bit, trust_remote_code=True, torch_dtype=torch.float16, + model = AutoModel.from_pretrained(model_path, load_in_low_bit=low_bit, trust_remote_code=True, optimize_model=optimize_model, max_output_len=max_output_len, max_prompt_len=int(in_out_len[0]), transpose_value_cache=True, - torch_dtype='auto', attn_implementation="eager").eval() + torch_dtype=torch.float16, attn_implementation="eager").eval() tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True) elif repo_id in LLAMA_IDS: model = AutoModelForCausalLM.from_pretrained(model_path, load_in_low_bit=low_bit, trust_remote_code=True, torch_dtype=torch.float16,