fix vllm_online_benchmark.py (#12056)

This commit is contained in:
Shaojun Liu 2024-09-11 09:45:30 +08:00 committed by GitHub
parent d8c044e79d
commit 52863dd567
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -176,7 +176,7 @@ def benchmark(llm_urls, model, prompt, num_requests, max_concurrent_requests, ma
cur_len = len(cur_llm_urls) cur_len = len(cur_llm_urls)
if dataset is None: if dataset is None:
payload = { payload = {
"model": model_name, "model": model,
"prompt": prompt, "prompt": prompt,
"n": 1, "n": 1,
"best_of": 1, "best_of": 1,
@ -193,7 +193,7 @@ def benchmark(llm_urls, model, prompt, num_requests, max_concurrent_requests, ma
for index in range(num_requests): for index in range(num_requests):
prompt, prompt_len, output_len = sampled_requests[index] prompt, prompt_len, output_len = sampled_requests[index]
payload = { payload = {
"model": model_name, "model": model,
"prompt": prompt, "prompt": prompt,
"n": 1, "n": 1,
"best_of": 1, "best_of": 1,
@ -279,7 +279,7 @@ max_batch=int(max_seq)
for MAX_CONCURRENT_REQUESTS in [max_batch]: for MAX_CONCURRENT_REQUESTS in [max_batch]:
NUM_WARMUP = 2 * MAX_CONCURRENT_REQUESTS NUM_WARMUP = 2 * MAX_CONCURRENT_REQUESTS
NUM_REQUESTS = 5 * MAX_CONCURRENT_REQUESTS # 总请求次数 NUM_REQUESTS = 4 * MAX_CONCURRENT_REQUESTS # 总请求次数
# to avoid warm_up time out # to avoid warm_up time out
benchmark(LLM_URLS, MODEL, PROMPT_1024, 2, 1, 32, is_warmup = True) benchmark(LLM_URLS, MODEL, PROMPT_1024, 2, 1, 32, is_warmup = True)