From 33fd1f9c76a1ed53262c849539048a2863f7f2ee Mon Sep 17 00:00:00 2001 From: WeiguangHan Date: Wed, 10 Jan 2024 18:20:14 +0800 Subject: [PATCH] LLM: fix input length logic for run_transformer_int4_gpu (#9864) * LLM: fix input length logic for run_transformer_int4_gpu * small fix * small fix * small fix --- python/llm/dev/benchmark/all-in-one/run.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/python/llm/dev/benchmark/all-in-one/run.py b/python/llm/dev/benchmark/all-in-one/run.py index c5eef678..553b0b90 100644 --- a/python/llm/dev/benchmark/all-in-one/run.py +++ b/python/llm/dev/benchmark/all-in-one/run.py @@ -399,8 +399,10 @@ def run_transformer_int4_gpu(repo_id, # in_len.txt maybe shorter than we need, # use much longer context to make sure input length test_length = min(in_len*2, 8192) - while test_length not in [32, 256, 1024, 2048, 8192]: + while test_length not in [32, 256, 1024, 2048, 8192] and test_length < 8192: test_length = test_length * 2 + # For the sequence length not in [32, 256, 1024, 2048, 8192], it will be truncated from 8192.txt. + test_length = min(test_length, 8192) input_str = open(f"prompt/{test_length}.txt", 'r').read() # As different tokenizer has different encodings, # slice the input_ids to ensure the prompt length is required length.