Enable vllm multimodal minicpm-v-2-6 (#12074)
* enable minicpm-v-2-6 * add image_url readme
This commit is contained in:
parent
a767438546
commit
d703e4f127
2 changed files with 35 additions and 0 deletions
|
|
@ -128,6 +128,35 @@ curl http://localhost:8000/v1/completions \
|
|||
}' &
|
||||
```
|
||||
|
||||
##### Image input
|
||||
|
||||
image input only supports [MiniCPM-V-2_6](https://huggingface.co/openbmb/MiniCPM-V-2_6)now.
|
||||
```bash
|
||||
curl http://localhost:8000/v1/chat/completions \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"model": "MiniCPM-V-2_6",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{
|
||||
"type": "text",
|
||||
"text": "图片里有什么?"
|
||||
},
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": {
|
||||
"url": "http://farm6.staticflickr.com/5268/5602445367_3504763978_z.jpg"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"max_tokens": 128
|
||||
}'
|
||||
```
|
||||
|
||||
#### Tensor parallel
|
||||
|
||||
> Note: We recommend to use docker for tensor parallel deployment.
|
||||
|
|
|
|||
|
|
@ -102,6 +102,12 @@ def get_load_function(low_bit):
|
|||
modules = ["35.mlp", "36.mlp", "37.mlp", "38.mlp", "39.mlp"]
|
||||
else:
|
||||
modules = None
|
||||
if "minicpm" in self.model_config.model.lower():
|
||||
modules = ["vpm", "resampler"]
|
||||
# only for minicpm_2_6
|
||||
if "minicpm-v" in self.model_config.model.lower():
|
||||
from ipex_llm.transformers.models.minicpmv import merge_qkv
|
||||
self.model.vpm.apply(merge_qkv)
|
||||
optimize_model(self.model, low_bit=low_bit, torch_dtype=self.model_config.dtype,
|
||||
modules_to_not_convert=modules)
|
||||
self.model = self.model.to(device=self.device_config.device,
|
||||
|
|
|
|||
Loading…
Reference in a new issue