fix nightly perf test (#11251)
This commit is contained in:
parent
2f809116e2
commit
85df5e7699
2 changed files with 20 additions and 20 deletions
|
|
@ -210,7 +210,7 @@ def run_native_int4(repo_id,
|
|||
in_out_len = in_out.split("-")
|
||||
in_len = int(in_out_len[0])
|
||||
out_len = int(in_out_len[1])
|
||||
input_str = open(f"prompt/{in_len}.txt", 'r').read()
|
||||
input_str = open(f"prompt/continuation/{in_len}.txt", 'r').read()
|
||||
# As different tokenizer has different encodings,
|
||||
# slice the input_ids to ensure the prompt length is required length.
|
||||
n_ctx = in_len + out_len if in_len + out_len > 512 else 512
|
||||
|
|
@ -272,7 +272,7 @@ def run_transformer_int4(repo_id,
|
|||
test_length = min(in_len*2, 8192)
|
||||
while test_length not in [32, 256, 1024, 2048, 8192]:
|
||||
test_length = test_length * 2
|
||||
input_str = open(f"prompt/{test_length}.txt", 'r').read()
|
||||
input_str = open(f"prompt/continuation/{test_length}.txt", 'r').read()
|
||||
# As different tokenizer has different encodings,
|
||||
# slice the input_ids to ensure the prompt length is required length.
|
||||
input_ids = tokenizer.encode(input_str, return_tensors="pt")
|
||||
|
|
@ -337,7 +337,7 @@ def run_pytorch_autocast_bf16(repo_id,
|
|||
test_length = min(in_len*2, 8192)
|
||||
while test_length not in [32, 256, 1024, 2048, 8192]:
|
||||
test_length = test_length * 2
|
||||
input_str = open(f"prompt/{test_length}.txt", 'r').read()
|
||||
input_str = open(f"prompt/continuation/{test_length}.txt", 'r').read()
|
||||
# As different tokenizer has different encodings,
|
||||
# slice the input_ids to ensure the prompt length is required length.
|
||||
input_ids = tokenizer.encode(input_str, return_tensors="pt")
|
||||
|
|
@ -408,7 +408,7 @@ def run_optimize_model(repo_id,
|
|||
test_length = min(in_len*2, 8192)
|
||||
while test_length not in [32, 256, 1024, 2048, 8192]:
|
||||
test_length = test_length * 2
|
||||
input_str = open(f"prompt/{test_length}.txt", 'r').read()
|
||||
input_str = open(f"prompt/continuation/{test_length}.txt", 'r').read()
|
||||
# As different tokenizer has different encodings,
|
||||
# slice the input_ids to ensure the prompt length is required length.
|
||||
input_ids = tokenizer.encode(input_str, return_tensors="pt")
|
||||
|
|
@ -597,7 +597,7 @@ def run_optimize_model_gpu(repo_id,
|
|||
test_length = min(in_len*2, 8192)
|
||||
while test_length not in [32, 256, 1024, 2048, 8192]:
|
||||
test_length = test_length * 2
|
||||
input_str = open(f"prompt/{test_length}.txt", 'r').read()
|
||||
input_str = open(f"prompt/continuation/{test_length}.txt", 'r').read()
|
||||
# As different tokenizer has different encodings,
|
||||
# slice the input_ids to ensure the prompt length is required length.
|
||||
input_ids = tokenizer.encode(input_str, return_tensors="pt")
|
||||
|
|
@ -669,7 +669,7 @@ def run_ipex_fp16_gpu(repo_id,
|
|||
test_length = min(in_len*2, 8192)
|
||||
while test_length not in [32, 256, 1024, 2048, 8192]:
|
||||
test_length = test_length * 2
|
||||
input_str = open(f"prompt/{test_length}.txt", 'r').read()
|
||||
input_str = open(f"prompt/continuation/{test_length}.txt", 'r').read()
|
||||
# As different tokenizer has different encodings,
|
||||
# slice the input_ids to ensure the prompt length is required length.
|
||||
input_ids = tokenizer.encode(input_str, return_tensors="pt")
|
||||
|
|
@ -747,7 +747,7 @@ def run_bigdl_fp16_gpu(repo_id,
|
|||
test_length = min(in_len*2, 8192)
|
||||
while test_length not in [32, 256, 1024, 2048, 8192]:
|
||||
test_length = test_length * 2
|
||||
input_str = open(f"prompt/{test_length}.txt", 'r').read()
|
||||
input_str = open(f"prompt/continuation/{test_length}.txt", 'r').read()
|
||||
# As different tokenizer has different encodings,
|
||||
# slice the input_ids to ensure the prompt length is required length.
|
||||
input_ids = tokenizer.encode(input_str, return_tensors="pt")
|
||||
|
|
@ -839,7 +839,7 @@ def run_deepspeed_transformer_int4_cpu(repo_id,
|
|||
test_length = min(in_len*2, 8192)
|
||||
while test_length not in [32, 256, 1024, 2048, 8192]:
|
||||
test_length = test_length * 2
|
||||
input_str = open(f"prompt/{test_length}.txt", 'r').read()
|
||||
input_str = open(f"prompt/continuation/{test_length}.txt", 'r').read()
|
||||
# As different tokenizer has different encodings,
|
||||
# slice the input_ids to ensure the prompt length is required length.
|
||||
input_ids = tokenizer.encode(input_str, return_tensors="pt")
|
||||
|
|
@ -926,7 +926,7 @@ def run_transformer_int4_gpu_win(repo_id,
|
|||
test_length = min(in_len*2, 8192)
|
||||
while test_length not in [32, 256, 1024, 2048, 8192]:
|
||||
test_length = test_length * 2
|
||||
input_str = open(f"prompt/{test_length}.txt", 'r').read()
|
||||
input_str = open(f"prompt/continuation/{test_length}.txt", 'r').read()
|
||||
# As different tokenizer has different encodings,
|
||||
# slice the input_ids to ensure the prompt length is required length.
|
||||
input_ids = tokenizer.encode(input_str, return_tensors="pt")
|
||||
|
|
@ -1035,7 +1035,7 @@ def run_transformer_int4_fp16_gpu_win(repo_id,
|
|||
test_length = min(in_len*2, 8192)
|
||||
while test_length not in [32, 256, 1024, 2048, 8192]:
|
||||
test_length = test_length * 2
|
||||
input_str = open(f"prompt/{test_length}.txt", 'r').read()
|
||||
input_str = open(f"prompt/continuation/{test_length}.txt", 'r').read()
|
||||
# As different tokenizer has different encodings,
|
||||
# slice the input_ids to ensure the prompt length is required length.
|
||||
input_ids = tokenizer.encode(input_str, return_tensors="pt")
|
||||
|
|
@ -1139,7 +1139,7 @@ def run_transformer_int4_loadlowbit_gpu_win(repo_id,
|
|||
test_length = min(in_len*2, 8192)
|
||||
while test_length not in [32, 256, 1024, 2048, 8192]:
|
||||
test_length = test_length * 2
|
||||
input_str = open(f"prompt/{test_length}.txt", 'r').read()
|
||||
input_str = open(f"prompt/continuation/{test_length}.txt", 'r').read()
|
||||
# As different tokenizer has different encodings,
|
||||
# slice the input_ids to ensure the prompt length is required length.
|
||||
input_ids = tokenizer.encode(input_str, return_tensors="pt")
|
||||
|
|
@ -1228,7 +1228,7 @@ def run_transformer_autocast_bf16( repo_id,
|
|||
test_length = min(in_len*2, 8192)
|
||||
while test_length not in [32, 256, 1024, 2048, 8192]:
|
||||
test_length = test_length * 2
|
||||
input_str = open(f"prompt/{test_length}.txt", 'r').read()
|
||||
input_str = open(f"prompt/continuation/{test_length}.txt", 'r').read()
|
||||
# As different tokenizer has different encodings,
|
||||
# slice the input_ids to ensure the prompt length is required length.
|
||||
input_ids = tokenizer.encode(input_str, return_tensors="pt")
|
||||
|
|
@ -1299,7 +1299,7 @@ def run_bigdl_ipex_bf16(repo_id,
|
|||
test_length = min(in_len*2, 8192)
|
||||
while test_length not in [32, 256, 1024, 2048, 8192]:
|
||||
test_length = test_length * 2
|
||||
input_str = open(f"prompt/{test_length}.txt", 'r').read()
|
||||
input_str = open(f"prompt/continuation/{test_length}.txt", 'r').read()
|
||||
# As different tokenizer has different encodings,
|
||||
# slice the input_ids to ensure the prompt length is required length.
|
||||
input_ids = tokenizer.encode(input_str, return_tensors="pt")
|
||||
|
|
@ -1369,7 +1369,7 @@ def run_bigdl_ipex_int4(repo_id,
|
|||
test_length = min(in_len*2, 8192)
|
||||
while test_length not in [32, 256, 1024, 2048, 8192]:
|
||||
test_length = test_length * 2
|
||||
input_str = open(f"prompt/{test_length}.txt", 'r').read()
|
||||
input_str = open(f"prompt/continuation/{test_length}.txt", 'r').read()
|
||||
# As different tokenizer has different encodings,
|
||||
# slice the input_ids to ensure the prompt length is required length.
|
||||
input_ids = tokenizer.encode(input_str, return_tensors="pt")
|
||||
|
|
@ -1439,7 +1439,7 @@ def run_bigdl_ipex_int8(repo_id,
|
|||
test_length = min(in_len*2, 8192)
|
||||
while test_length not in [32, 256, 1024, 2048, 8192]:
|
||||
test_length = test_length * 2
|
||||
input_str = open(f"prompt/{test_length}.txt", 'r').read()
|
||||
input_str = open(f"prompt/continuation/{test_length}.txt", 'r').read()
|
||||
# As different tokenizer has different encodings,
|
||||
# slice the input_ids to ensure the prompt length is required length.
|
||||
input_ids = tokenizer.encode(input_str, return_tensors="pt")
|
||||
|
|
@ -1550,7 +1550,7 @@ def run_deepspeed_optimize_model_gpu(repo_id,
|
|||
test_length = min(in_len*2, 8192)
|
||||
while test_length not in [32, 256, 1024, 2048, 8192] and test_length < 8192:
|
||||
test_length = test_length * 2
|
||||
input_str = open(f"prompt/{test_length}.txt", 'r').read()
|
||||
input_str = open(f"prompt/continuation/{test_length}.txt", 'r').read()
|
||||
# As different tokenizer has different encodings,
|
||||
# slice the input_ids to ensure the prompt length is required length.
|
||||
input_ids = tokenizer.encode(input_str, return_tensors="pt")
|
||||
|
|
@ -1627,7 +1627,7 @@ def run_speculative_cpu(repo_id,
|
|||
test_length = min(in_len*2, 8192)
|
||||
while test_length not in [32, 256, 1024, 2048, 8192]:
|
||||
test_length = test_length * 2
|
||||
input_str = open(f"prompt/{test_length}.txt", 'r').read()
|
||||
input_str = open(f"prompt/continuation/{test_length}.txt", 'r').read()
|
||||
# As different tokenizer has different encodings,
|
||||
# slice the input_ids to ensure the prompt length is required length.
|
||||
input_ids = tokenizer.encode(input_str, return_tensors="pt")
|
||||
|
|
@ -1705,7 +1705,7 @@ def run_speculative_gpu(repo_id,
|
|||
test_length = min(in_len*2, 8192)
|
||||
while test_length not in [32, 256, 1024, 2048, 8192]:
|
||||
test_length = test_length * 2
|
||||
input_str = open(f"prompt/{test_length}.txt", 'r').read()
|
||||
input_str = open(f"prompt/continuation/{test_length}.txt", 'r').read()
|
||||
# As different tokenizer has different encodings,
|
||||
# slice the input_ids to ensure the prompt length is required length.
|
||||
input_ids = tokenizer.encode(input_str, return_tensors="pt")
|
||||
|
|
@ -1825,7 +1825,7 @@ def run_pipeline_parallel_gpu(repo_id,
|
|||
test_length = min(in_len*2, 8192)
|
||||
while test_length not in [32, 256, 1024, 2048, 8192]:
|
||||
test_length = test_length * 2
|
||||
input_str = open(f"prompt/{test_length}.txt", 'r').read()
|
||||
input_str = open(f"prompt/continuation/{test_length}.txt", 'r').read()
|
||||
# As different tokenizer has different encodings,
|
||||
# slice the input_ids to ensure the prompt length is required length.
|
||||
input_ids = tokenizer.encode(input_str, return_tensors="pt")
|
||||
|
|
|
|||
|
|
@ -247,7 +247,7 @@ parser.add_argument(
|
|||
)
|
||||
args = parser.parse_args()
|
||||
PROMPT_LENGTH = args.prompt_length
|
||||
PROMPT = open(f"prompt/{PROMPT_LENGTH}.txt", "r").read()
|
||||
PROMPT = open(f"prompt/continuation/{PROMPT_LENGTH}.txt", "r").read()
|
||||
MAX_TOKENS = args.max_new_tokens
|
||||
|
||||
|
||||
|
|
|
|||
Loading…
Reference in a new issue