Add IPEX_LLM_FORCE_BATCH_FORWARD for vLLM docker image (#13151)

This commit is contained in:
Xiangyu Tian 2025-05-12 13:44:33 +08:00 committed by GitHub
parent 5df03ced2c
commit 886c7632b2
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -186,5 +186,8 @@ RUN set -eux && \
# Re-enable sym_int4
sed -i 's/qtype = ggml_tensor_qtype\["woq_int4"\]/qtype = ggml_tensor_qtype["sym_int4"]/' /usr/local/lib/python3.11/dist-packages/ipex_llm/transformers/convert.py
# Set environment variables to force use batch_forward_new()
ENV IPEX_LLM_FORCE_BATCH_FORWARD=1
WORKDIR /llm/
ENTRYPOINT ["bash", "/llm/start-vllm-service.sh"]