Enable vllm multimodal minicpm-v-2-6 (#12074)

* enable minicpm-v-2-6

* add image_url readme
This commit is contained in:
Wang, Jian4 2024-09-13 13:28:35 +08:00 committed by GitHub
parent a767438546
commit d703e4f127
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 35 additions and 0 deletions

View file

@ -128,6 +128,35 @@ curl http://localhost:8000/v1/completions \
}' &
```
##### Image input
image input only supports [MiniCPM-V-2_6](https://huggingface.co/openbmb/MiniCPM-V-2_6)now.
```bash
curl http://localhost:8000/v1/chat/completions \
-H "Content-Type: application/json" \
-d '{
"model": "MiniCPM-V-2_6",
"messages": [
{
"role": "user",
"content": [
{
"type": "text",
"text": "图片里有什么?"
},
{
"type": "image_url",
"image_url": {
"url": "http://farm6.staticflickr.com/5268/5602445367_3504763978_z.jpg"
}
}
]
}
],
"max_tokens": 128
}'
```
#### Tensor parallel
> Note: We recommend to use docker for tensor parallel deployment.

View file

@ -102,6 +102,12 @@ def get_load_function(low_bit):
modules = ["35.mlp", "36.mlp", "37.mlp", "38.mlp", "39.mlp"]
else:
modules = None
if "minicpm" in self.model_config.model.lower():
modules = ["vpm", "resampler"]
# only for minicpm_2_6
if "minicpm-v" in self.model_config.model.lower():
from ipex_llm.transformers.models.minicpmv import merge_qkv
self.model.vpm.apply(merge_qkv)
optimize_model(self.model, low_bit=low_bit, torch_dtype=self.model_config.dtype,
modules_to_not_convert=modules)
self.model = self.model.to(device=self.device_config.device,