From e1809a6295362d49c5581e950c03c920dac1c3e5 Mon Sep 17 00:00:00 2001 From: "Wang, Jian4" <61138589+hzjane@users.noreply.github.com> Date: Wed, 19 Feb 2025 10:04:42 +0800 Subject: [PATCH] Update multimodal on vllm 0.6.6 (#12816) * add glm4v and minicpmv example * fix --- .../vllm_offline_inference_vision_language.py | 34 +++++++++++++++++++ .../src/ipex_llm/vllm/xpu/model_convert.py | 4 --- 2 files changed, 34 insertions(+), 4 deletions(-) diff --git a/docker/llm/serving/xpu/docker/vllm_offline_inference_vision_language.py b/docker/llm/serving/xpu/docker/vllm_offline_inference_vision_language.py index f860c760..bff9c60e 100644 --- a/docker/llm/serving/xpu/docker/vllm_offline_inference_vision_language.py +++ b/docker/llm/serving/xpu/docker/vllm_offline_inference_vision_language.py @@ -6,8 +6,39 @@ import requests model_path = "/llm/models/MiniCPM-V-2_6" model_path = "/llm/models/Qwen2-VL-7B-Instruct" +model_path = "/llm/models/glm-4v-9b" +model_path = "/llm/models/InternVL2-8B" + prompt = "What is in the image?" +def run_internvl(question: str, modality: str): + assert modality == "image" + + tokenizer = AutoTokenizer.from_pretrained(model_path, + trust_remote_code=True) + messages = [{'role': 'user', 'content': f"\n{question}"}] + prompt = tokenizer.apply_chat_template(messages, + tokenize=False, + add_generation_prompt=True) + + # Stop tokens for InternVL + # models variants may have different stop tokens + # please refer to the model card for the correct "stop words": + # https://huggingface.co/OpenGVLab/InternVL2-2B/blob/main/conversation.py + stop_tokens = ["<|endoftext|>", "<|im_start|>", "<|im_end|>", "<|end|>"] + stop_token_ids = [tokenizer.convert_tokens_to_ids(i) for i in stop_tokens] + return prompt, stop_token_ids + +def run_glm4v(question: str, modality: str): + assert modality == "image" + model_name = "THUDM/glm-4v-9b" + + prompt = f"<|user|>\n<|begin_of_image|><|endoftext|><|end_of_image|>\ + {question}<|assistant|>" + + stop_token_ids = [151329, 151336, 151338] + return prompt, stop_token_ids + def run_minicpmv(question, modality): assert modality == "image" tokenizer = AutoTokenizer.from_pretrained(model_path, @@ -38,6 +69,9 @@ def run_qwen2_vl(question, modality): model_example_map = { "minicpmv": run_minicpmv, "qwen2_vl": run_qwen2_vl, + # only for glm4v + "chatglm": run_glm4v, + "internvl_chat": run_internvl, } llm = LLM( diff --git a/python/llm/src/ipex_llm/vllm/xpu/model_convert.py b/python/llm/src/ipex_llm/vllm/xpu/model_convert.py index 7cce3c38..12963b72 100644 --- a/python/llm/src/ipex_llm/vllm/xpu/model_convert.py +++ b/python/llm/src/ipex_llm/vllm/xpu/model_convert.py @@ -103,10 +103,6 @@ def get_load_function(low_bit): modules = None if "minicpm" in self.vllm_config.model_config.model.lower(): modules = ["vpm", "resampler"] - # only for minicpm_2_6 - if "minicpm-v" in self.vllm_config.model_config.model.lower(): - from ipex_llm.transformers.models.minicpmv import merge_qkv - self.model.vpm.apply(merge_qkv) if "internvl2" in self.vllm_config.model_config.model.lower(): modules = ["vision_model", "mlp1"] if "deepseek-v2" in self.vllm_config.model_config.model.lower():