From 886c7632b212929896c733115b05f224958ea54f Mon Sep 17 00:00:00 2001 From: Xiangyu Tian <109123695+xiangyuT@users.noreply.github.com> Date: Mon, 12 May 2025 13:44:33 +0800 Subject: [PATCH] Add IPEX_LLM_FORCE_BATCH_FORWARD for vLLM docker image (#13151) --- docker/llm/serving/xpu/docker/Dockerfile | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docker/llm/serving/xpu/docker/Dockerfile b/docker/llm/serving/xpu/docker/Dockerfile index e9ca4525..1dac2239 100644 --- a/docker/llm/serving/xpu/docker/Dockerfile +++ b/docker/llm/serving/xpu/docker/Dockerfile @@ -186,5 +186,8 @@ RUN set -eux && \ # Re-enable sym_int4 sed -i 's/qtype = ggml_tensor_qtype\["woq_int4"\]/qtype = ggml_tensor_qtype["sym_int4"]/' /usr/local/lib/python3.11/dist-packages/ipex_llm/transformers/convert.py +# Set environment variables to force use batch_forward_new() +ENV IPEX_LLM_FORCE_BATCH_FORWARD=1 + WORKDIR /llm/ ENTRYPOINT ["bash", "/llm/start-vllm-service.sh"]