From f2598b119eb07259e6f16d1131e7188cfbfa0e05 Mon Sep 17 00:00:00 2001 From: "Wang, Jian4" <61138589+hzjane@users.noreply.github.com> Date: Wed, 7 May 2025 16:59:52 +0800 Subject: [PATCH] update for bge-m3 (#13138) --- python/llm/src/ipex_llm/vllm/xpu/model_convert.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/python/llm/src/ipex_llm/vllm/xpu/model_convert.py b/python/llm/src/ipex_llm/vllm/xpu/model_convert.py index 346ecb70..551357a0 100644 --- a/python/llm/src/ipex_llm/vllm/xpu/model_convert.py +++ b/python/llm/src/ipex_llm/vllm/xpu/model_convert.py @@ -65,13 +65,14 @@ def _model_sample_convert(): def _ipex_llm_convert(load_in_low_bit): # import pdb # pdb.set_trace() - from vllm.worker.xpu_model_runner import XPUModelRunner + from vllm.worker.xpu_model_runner import XPUModelRunner, XPUModelRunnerBase from ipex_llm.vllm.xpu.ipex_llm_wrapper import get_ipex_llm_wrapper from ipex_llm.vllm.xpu.ipex_llm_v1_wrapper import get_ipex_llm_v1_wrapper import vllm.executor.ray_utils as ray_utils_v0 import vllm.v1.executor.ray_utils as ray_utils_v1 from vllm.v1.worker.gpu_model_runner import GPUModelRunner setattr(XPUModelRunner, "load_model", get_load_function(load_in_low_bit)) + setattr(XPUModelRunnerBase, "load_model", get_load_function(load_in_low_bit)) setattr(GPUModelRunner, "load_model", get_load_function(load_in_low_bit)) setattr(ray_utils_v0, "RayWorkerWrapper", get_ipex_llm_wrapper(load_in_low_bit)) setattr(ray_utils_v1, "RayWorkerWrapper", get_ipex_llm_v1_wrapper(load_in_low_bit))