[fix] vllm-online-benchmark first token latency error (#12271)
This commit is contained in:
parent
0bbc04b5ec
commit
3700e81977
1 changed files with 1 additions and 7 deletions
|
|
@ -270,13 +270,7 @@ def perform_request(session, url, payload, headers):
|
||||||
json_data = json.loads(data)
|
json_data = json.loads(data)
|
||||||
if 'choices' in json_data and len(json_data['choices']) > 0:
|
if 'choices' in json_data and len(json_data['choices']) > 0:
|
||||||
choice = json_data['choices'][0]
|
choice = json_data['choices'][0]
|
||||||
if 'finish_reason' in choice and (choice['finish_reason'] == 'length' or choice['finish_reason'] == 'stop'):
|
if 'text' in choice:
|
||||||
if 'first_token_time' in choice and isinstance(choice['first_token_time'], float):
|
|
||||||
first_token_inference_time = choice['first_token_time']
|
|
||||||
if 'rest_token_time' in choice and isinstance(choice['rest_token_time'], float):
|
|
||||||
next_token_inference_time = choice['rest_token_time']
|
|
||||||
else:
|
|
||||||
# 记录第一个token的时间
|
|
||||||
if first_token_time is None:
|
if first_token_time is None:
|
||||||
first_token_time = token_time
|
first_token_time = token_time
|
||||||
else:
|
else:
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue