From 6711a48a36d0276f8c3d81b6d04802421cdbe5ac Mon Sep 17 00:00:00 2001 From: "Wang, Jian4" <61138589+hzjane@users.noreply.github.com> Date: Fri, 3 Jan 2025 14:49:36 +0800 Subject: [PATCH] Enable internvl2-8b on vllm(#12645) --- python/llm/src/ipex_llm/vllm/xpu/model_convert.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/python/llm/src/ipex_llm/vllm/xpu/model_convert.py b/python/llm/src/ipex_llm/vllm/xpu/model_convert.py index 1cd1e2c6..3d88d8f9 100644 --- a/python/llm/src/ipex_llm/vllm/xpu/model_convert.py +++ b/python/llm/src/ipex_llm/vllm/xpu/model_convert.py @@ -105,6 +105,8 @@ def get_load_function(low_bit): if "minicpm-v" in self.model_config.model.lower(): from ipex_llm.transformers.models.minicpmv import merge_qkv self.model.vpm.apply(merge_qkv) + if "internvl2" in self.model_config.model.lower(): + modules = ["vision_model", "mlp1"] optimize_model(self.model, low_bit=low_bit, torch_dtype=self.model_config.dtype, modules_to_not_convert=modules) self.model = self.model.to(device=self.device_config.device,