From f6cd628cd8099b4d6d1d671b8f462a3149299237 Mon Sep 17 00:00:00 2001 From: Xiangyu Tian <109123695+xiangyuT@users.noreply.github.com> Date: Tue, 18 Jun 2024 16:50:48 +0800 Subject: [PATCH] Fix script usage in vLLM CPU Quickstart (#11353) --- .../vllm_cpu_docker_quickstart.md | 20 ++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/docs/readthedocs/source/doc/LLM/DockerGuides/vllm_cpu_docker_quickstart.md b/docs/readthedocs/source/doc/LLM/DockerGuides/vllm_cpu_docker_quickstart.md index 36b39ed5..d5b14000 100644 --- a/docs/readthedocs/source/doc/LLM/DockerGuides/vllm_cpu_docker_quickstart.md +++ b/docs/readthedocs/source/doc/LLM/DockerGuides/vllm_cpu_docker_quickstart.md @@ -115,4 +115,22 @@ wrk -t8 -c8 -d15m -s payload-1024.lua http://localhost:8000/v1/completions --tim #### Offline benchmark through benchmark_vllm_throughput.py -Please refer to this [section](https://ipex-llm.readthedocs.io/en/latest/doc/LLM/Quickstart/vLLM_quickstart.html#performing-benchmark) on how to use `benchmark_vllm_throughput.py` for benchmarking. +```bash +cd /llm +wget https://huggingface.co/datasets/anon8231489123/ShareGPT_Vicuna_unfiltered/resolve/main/ShareGPT_V3_unfiltered_cleaned_split.json + +source ipex-llm-init -t +export MODEL="YOUR_MODEL" + +python3 ./benchmark_vllm_throughput.py \ + --backend vllm \ + --dataset ./ShareGPT_V3_unfiltered_cleaned_split.json \ + --model $MODEL \ + --num-prompts 1000 \ + --seed 42 \ + --trust-remote-code \ + --enforce-eager \ + --dtype bfloat16 \ + --device cpu \ + --load-in-low-bit bf16 +``` \ No newline at end of file