Enable vllm multimodal minicpm-v-2-6 (#12074)
* enable minicpm-v-2-6 * add image_url readme
This commit is contained in:
		
							parent
							
								
									a767438546
								
							
						
					
					
						commit
						d703e4f127
					
				
					 2 changed files with 35 additions and 0 deletions
				
			
		| 
						 | 
				
			
			@ -128,6 +128,35 @@ curl http://localhost:8000/v1/completions \
 | 
			
		|||
 }' &
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
##### Image input
 | 
			
		||||
 | 
			
		||||
image input only supports [MiniCPM-V-2_6](https://huggingface.co/openbmb/MiniCPM-V-2_6)now.
 | 
			
		||||
```bash
 | 
			
		||||
curl http://localhost:8000/v1/chat/completions \
 | 
			
		||||
  -H "Content-Type: application/json" \
 | 
			
		||||
  -d '{
 | 
			
		||||
    "model": "MiniCPM-V-2_6",
 | 
			
		||||
    "messages": [
 | 
			
		||||
      {
 | 
			
		||||
        "role": "user",
 | 
			
		||||
        "content": [
 | 
			
		||||
          {
 | 
			
		||||
            "type": "text",
 | 
			
		||||
            "text": "图片里有什么?"
 | 
			
		||||
          },
 | 
			
		||||
          {
 | 
			
		||||
            "type": "image_url",
 | 
			
		||||
            "image_url": {
 | 
			
		||||
              "url": "http://farm6.staticflickr.com/5268/5602445367_3504763978_z.jpg"
 | 
			
		||||
            }
 | 
			
		||||
          }
 | 
			
		||||
        ]
 | 
			
		||||
      }
 | 
			
		||||
    ],
 | 
			
		||||
    "max_tokens": 128
 | 
			
		||||
  }'
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
#### Tensor parallel
 | 
			
		||||
 | 
			
		||||
> Note: We recommend to use docker for tensor parallel deployment.
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -102,6 +102,12 @@ def get_load_function(low_bit):
 | 
			
		|||
                modules = ["35.mlp", "36.mlp", "37.mlp", "38.mlp", "39.mlp"]
 | 
			
		||||
            else:
 | 
			
		||||
                modules = None
 | 
			
		||||
            if "minicpm" in self.model_config.model.lower():
 | 
			
		||||
                modules = ["vpm", "resampler"]
 | 
			
		||||
            # only for minicpm_2_6
 | 
			
		||||
            if "minicpm-v" in self.model_config.model.lower():
 | 
			
		||||
                from ipex_llm.transformers.models.minicpmv import merge_qkv
 | 
			
		||||
                self.model.vpm.apply(merge_qkv)
 | 
			
		||||
            optimize_model(self.model, low_bit=low_bit, torch_dtype=self.model_config.dtype,
 | 
			
		||||
                           modules_to_not_convert=modules)
 | 
			
		||||
            self.model = self.model.to(device=self.device_config.device,
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in a new issue