Update multimodal on vllm 0.6.6 (#12816)
* add glm4v and minicpmv example * fix
This commit is contained in:
		
							parent
							
								
									09150b6058
								
							
						
					
					
						commit
						e1809a6295
					
				
					 2 changed files with 34 additions and 4 deletions
				
			
		| 
						 | 
				
			
			@ -6,8 +6,39 @@ import requests
 | 
			
		|||
 | 
			
		||||
model_path = "/llm/models/MiniCPM-V-2_6"
 | 
			
		||||
model_path = "/llm/models/Qwen2-VL-7B-Instruct"
 | 
			
		||||
model_path = "/llm/models/glm-4v-9b"
 | 
			
		||||
model_path = "/llm/models/InternVL2-8B"
 | 
			
		||||
 | 
			
		||||
prompt = "What is in the image?"
 | 
			
		||||
 | 
			
		||||
def run_internvl(question: str, modality: str):
 | 
			
		||||
    assert modality == "image"
 | 
			
		||||
 | 
			
		||||
    tokenizer = AutoTokenizer.from_pretrained(model_path,
 | 
			
		||||
                                              trust_remote_code=True)
 | 
			
		||||
    messages = [{'role': 'user', 'content': f"<image>\n{question}"}]
 | 
			
		||||
    prompt = tokenizer.apply_chat_template(messages,
 | 
			
		||||
                                           tokenize=False,
 | 
			
		||||
                                           add_generation_prompt=True)
 | 
			
		||||
 | 
			
		||||
    # Stop tokens for InternVL
 | 
			
		||||
    # models variants may have different stop tokens
 | 
			
		||||
    # please refer to the model card for the correct "stop words":
 | 
			
		||||
    # https://huggingface.co/OpenGVLab/InternVL2-2B/blob/main/conversation.py
 | 
			
		||||
    stop_tokens = ["<|endoftext|>", "<|im_start|>", "<|im_end|>", "<|end|>"]
 | 
			
		||||
    stop_token_ids = [tokenizer.convert_tokens_to_ids(i) for i in stop_tokens]
 | 
			
		||||
    return prompt, stop_token_ids
 | 
			
		||||
 | 
			
		||||
def run_glm4v(question: str, modality: str):
 | 
			
		||||
    assert modality == "image"
 | 
			
		||||
    model_name = "THUDM/glm-4v-9b"
 | 
			
		||||
 | 
			
		||||
    prompt = f"<|user|>\n<|begin_of_image|><|endoftext|><|end_of_image|>\
 | 
			
		||||
        {question}<|assistant|>"
 | 
			
		||||
 | 
			
		||||
    stop_token_ids = [151329, 151336, 151338]
 | 
			
		||||
    return prompt, stop_token_ids
 | 
			
		||||
 | 
			
		||||
def run_minicpmv(question, modality):
 | 
			
		||||
    assert modality == "image"
 | 
			
		||||
    tokenizer = AutoTokenizer.from_pretrained(model_path,
 | 
			
		||||
| 
						 | 
				
			
			@ -38,6 +69,9 @@ def run_qwen2_vl(question, modality):
 | 
			
		|||
model_example_map = {
 | 
			
		||||
    "minicpmv": run_minicpmv,
 | 
			
		||||
    "qwen2_vl": run_qwen2_vl,
 | 
			
		||||
    # only for glm4v
 | 
			
		||||
    "chatglm": run_glm4v,
 | 
			
		||||
    "internvl_chat": run_internvl,
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
llm = LLM(
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -103,10 +103,6 @@ def get_load_function(low_bit):
 | 
			
		|||
                modules = None
 | 
			
		||||
            if "minicpm" in self.vllm_config.model_config.model.lower():
 | 
			
		||||
                modules = ["vpm", "resampler"]
 | 
			
		||||
            # only for minicpm_2_6
 | 
			
		||||
            if "minicpm-v" in self.vllm_config.model_config.model.lower():
 | 
			
		||||
                from ipex_llm.transformers.models.minicpmv import merge_qkv
 | 
			
		||||
                self.model.vpm.apply(merge_qkv)
 | 
			
		||||
            if "internvl2" in self.vllm_config.model_config.model.lower():
 | 
			
		||||
                modules = ["vision_model", "mlp1"]
 | 
			
		||||
            if "deepseek-v2" in self.vllm_config.model_config.model.lower():
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in a new issue