diff --git a/docker/llm/serving/xpu/docker/Dockerfile b/docker/llm/serving/xpu/docker/Dockerfile index 1dac2239..abaa99e6 100644 --- a/docker/llm/serving/xpu/docker/Dockerfile +++ b/docker/llm/serving/xpu/docker/Dockerfile @@ -125,7 +125,7 @@ RUN set -eux && \ python3 get-pip.py && rm get-pip.py && \ pip install --upgrade requests argparse urllib3 && \ pip install --pre --upgrade ipex-llm[xpu_2.6] --extra-index-url https://download.pytorch.org/whl/xpu && \ - pip install transformers_stream_generator einops tiktoken && \ + pip install transformers_stream_generator einops tiktoken librosa && \ pip install --upgrade colorama && \ # git clone https://github.com/intel/ipex-llm.git && \ diff --git a/python/llm/src/ipex_llm/vllm/xpu/model_convert.py b/python/llm/src/ipex_llm/vllm/xpu/model_convert.py index 551357a0..b842c2a8 100644 --- a/python/llm/src/ipex_llm/vllm/xpu/model_convert.py +++ b/python/llm/src/ipex_llm/vllm/xpu/model_convert.py @@ -124,6 +124,8 @@ def get_load_function(low_bit): modules = ["vision_model", "mlp1"] if "deepseek-v2" in self.vllm_config.model_config.model.lower(): modules = ["down_proj"] + if "whisper" in self.vllm_config.model_config.model.lower(): + modules = ["proj_out"] optimize_model(self.model, low_bit=low_bit, torch_dtype=self.vllm_config.model_config.dtype,