From fd933c92d85cd5edf167be1eae5fa6dd99c53615 Mon Sep 17 00:00:00 2001 From: Xiangyu Tian <109123695+xiangyuT@users.noreply.github.com> Date: Fri, 28 Jun 2024 16:10:51 +0800 Subject: [PATCH] Fix: Correct num_requests in benchmark for Pipeline Parallel Serving (#11462) --- python/llm/example/GPU/Pipeline-Parallel-FastAPI/benchmark.py | 1 + 1 file changed, 1 insertion(+) diff --git a/python/llm/example/GPU/Pipeline-Parallel-FastAPI/benchmark.py b/python/llm/example/GPU/Pipeline-Parallel-FastAPI/benchmark.py index dab6146c..5fc87c29 100644 --- a/python/llm/example/GPU/Pipeline-Parallel-FastAPI/benchmark.py +++ b/python/llm/example/GPU/Pipeline-Parallel-FastAPI/benchmark.py @@ -165,6 +165,7 @@ def benchmark( total_time = time.perf_counter() - start_time log_file = f"{max_concurrent_requests}.log" + num_requests = num_requests - num_warmup_requests with open(log_file, "w") as file: print(