diff --git a/python/llm/example/GPU/Pipeline-Parallel-FastAPI/benchmark.py b/python/llm/example/GPU/Pipeline-Parallel-FastAPI/benchmark.py index dab6146c..5fc87c29 100644 --- a/python/llm/example/GPU/Pipeline-Parallel-FastAPI/benchmark.py +++ b/python/llm/example/GPU/Pipeline-Parallel-FastAPI/benchmark.py @@ -165,6 +165,7 @@ def benchmark( total_time = time.perf_counter() - start_time log_file = f"{max_concurrent_requests}.log" + num_requests = num_requests - num_warmup_requests with open(log_file, "w") as file: print(