From fd933c92d85cd5edf167be1eae5fa6dd99c53615 Mon Sep 17 00:00:00 2001
From: Xiangyu Tian <109123695+xiangyuT@users.noreply.github.com>
Date: Fri, 28 Jun 2024 16:10:51 +0800
Subject: [PATCH] Fix: Correct num_requests in benchmark for Pipeline Parallel
 Serving (#11462)

---
 python/llm/example/GPU/Pipeline-Parallel-FastAPI/benchmark.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/python/llm/example/GPU/Pipeline-Parallel-FastAPI/benchmark.py b/python/llm/example/GPU/Pipeline-Parallel-FastAPI/benchmark.py
index dab6146c..5fc87c29 100644
--- a/python/llm/example/GPU/Pipeline-Parallel-FastAPI/benchmark.py
+++ b/python/llm/example/GPU/Pipeline-Parallel-FastAPI/benchmark.py
@@ -165,6 +165,7 @@ def benchmark(
 
             total_time = time.perf_counter() - start_time
             log_file = f"{max_concurrent_requests}.log"
+            num_requests = num_requests - num_warmup_requests
 
             with open(log_file, "w") as file:
                 print(