diff --git a/docker/llm/serving/xpu/docker/Dockerfile b/docker/llm/serving/xpu/docker/Dockerfile index fb346332..e4bfb1a4 100644 --- a/docker/llm/serving/xpu/docker/Dockerfile +++ b/docker/llm/serving/xpu/docker/Dockerfile @@ -53,7 +53,7 @@ RUN set -eux && \ dpkg -i *.deb && \ # # Install Intel PyTorch extension for LLM inference - pip install --pre --upgrade ipex-llm[xpu_2.6] --extra-index-url https://download.pytorch.org/whl/xpu && \ + pip install --pre --upgrade ipex-llm[xpu_2.6]==2.3.0b20250407 --extra-index-url https://download.pytorch.org/whl/xpu && \ # # Build torch-ccl mkdir -p /build && \ @@ -123,7 +123,7 @@ RUN set -eux && \ wget https://bootstrap.pypa.io/get-pip.py -O get-pip.py && \ python3 get-pip.py && rm get-pip.py && \ pip install --upgrade requests argparse urllib3 && \ - pip install --pre --upgrade ipex-llm[xpu_2.6] --extra-index-url https://download.pytorch.org/whl/xpu && \ + pip install --pre --upgrade ipex-llm[xpu_2.6]==2.3.0b20250407 --extra-index-url https://download.pytorch.org/whl/xpu && \ pip install transformers_stream_generator einops tiktoken && \ pip install --upgrade colorama && \ # diff --git a/docker/llm/serving/xpu/docker/vllm_for_multi_arc.patch b/docker/llm/serving/xpu/docker/vllm_for_multi_arc.patch index 672ad407..7e4c62d6 100644 --- a/docker/llm/serving/xpu/docker/vllm_for_multi_arc.patch +++ b/docker/llm/serving/xpu/docker/vllm_for_multi_arc.patch @@ -12211,6 +12211,19 @@ index 000000000..75a99ffc1 +# Pass the collected arguments to the main entrypoint +exec python3 -m vllm.entrypoints.openai.api_server "${ARGS[@]}" \ No newline at end of file +diff --git a/requirements-common.txt b/requirements-common.txt +index 6c390bcfd..b22364c5a 100644 +--- a/requirements-common.txt ++++ b/requirements-common.txt +@@ -21,7 +21,7 @@ tiktoken >= 0.6.0 # Required for DBRX tokenizer + lm-format-enforcer >= 0.10.9, < 0.11 + outlines == 0.1.11 # Requires pytorch + lark == 1.2.2 +-xgrammar >= 0.1.6; platform_machine == "x86_64" ++xgrammar <= 0.1.17; platform_machine == "x86_64" + typing_extensions >= 4.10 + filelock >= 3.16.1 # need to contain https://github.com/tox-dev/filelock/pull/317 + partial-json-parser # used for parsing partial JSON outputs diff --git a/requirements-neuron.txt b/requirements-neuron.txt index 148fdbe0d..5e08d101f 100644 --- a/requirements-neuron.txt