Enable vllm multimodal minicpm-v-2-6 (#12074)
* enable minicpm-v-2-6 * add image_url readme
This commit is contained in:
		
							parent
							
								
									a767438546
								
							
						
					
					
						commit
						d703e4f127
					
				
					 2 changed files with 35 additions and 0 deletions
				
			
		| 
						 | 
					@ -128,6 +128,35 @@ curl http://localhost:8000/v1/completions \
 | 
				
			||||||
 }' &
 | 
					 }' &
 | 
				
			||||||
```
 | 
					```
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					##### Image input
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					image input only supports [MiniCPM-V-2_6](https://huggingface.co/openbmb/MiniCPM-V-2_6)now.
 | 
				
			||||||
 | 
					```bash
 | 
				
			||||||
 | 
					curl http://localhost:8000/v1/chat/completions \
 | 
				
			||||||
 | 
					  -H "Content-Type: application/json" \
 | 
				
			||||||
 | 
					  -d '{
 | 
				
			||||||
 | 
					    "model": "MiniCPM-V-2_6",
 | 
				
			||||||
 | 
					    "messages": [
 | 
				
			||||||
 | 
					      {
 | 
				
			||||||
 | 
					        "role": "user",
 | 
				
			||||||
 | 
					        "content": [
 | 
				
			||||||
 | 
					          {
 | 
				
			||||||
 | 
					            "type": "text",
 | 
				
			||||||
 | 
					            "text": "图片里有什么?"
 | 
				
			||||||
 | 
					          },
 | 
				
			||||||
 | 
					          {
 | 
				
			||||||
 | 
					            "type": "image_url",
 | 
				
			||||||
 | 
					            "image_url": {
 | 
				
			||||||
 | 
					              "url": "http://farm6.staticflickr.com/5268/5602445367_3504763978_z.jpg"
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					          }
 | 
				
			||||||
 | 
					        ]
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					    ],
 | 
				
			||||||
 | 
					    "max_tokens": 128
 | 
				
			||||||
 | 
					  }'
 | 
				
			||||||
 | 
					```
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#### Tensor parallel
 | 
					#### Tensor parallel
 | 
				
			||||||
 | 
					
 | 
				
			||||||
> Note: We recommend to use docker for tensor parallel deployment.
 | 
					> Note: We recommend to use docker for tensor parallel deployment.
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -102,6 +102,12 @@ def get_load_function(low_bit):
 | 
				
			||||||
                modules = ["35.mlp", "36.mlp", "37.mlp", "38.mlp", "39.mlp"]
 | 
					                modules = ["35.mlp", "36.mlp", "37.mlp", "38.mlp", "39.mlp"]
 | 
				
			||||||
            else:
 | 
					            else:
 | 
				
			||||||
                modules = None
 | 
					                modules = None
 | 
				
			||||||
 | 
					            if "minicpm" in self.model_config.model.lower():
 | 
				
			||||||
 | 
					                modules = ["vpm", "resampler"]
 | 
				
			||||||
 | 
					            # only for minicpm_2_6
 | 
				
			||||||
 | 
					            if "minicpm-v" in self.model_config.model.lower():
 | 
				
			||||||
 | 
					                from ipex_llm.transformers.models.minicpmv import merge_qkv
 | 
				
			||||||
 | 
					                self.model.vpm.apply(merge_qkv)
 | 
				
			||||||
            optimize_model(self.model, low_bit=low_bit, torch_dtype=self.model_config.dtype,
 | 
					            optimize_model(self.model, low_bit=low_bit, torch_dtype=self.model_config.dtype,
 | 
				
			||||||
                           modules_to_not_convert=modules)
 | 
					                           modules_to_not_convert=modules)
 | 
				
			||||||
            self.model = self.model.to(device=self.device_config.device,
 | 
					            self.model = self.model.to(device=self.device_config.device,
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue