Enable vllm multimodal minicpm-v-2-6 (#12074)
* enable minicpm-v-2-6 * add image_url readme
This commit is contained in:
parent
a767438546
commit
d703e4f127
2 changed files with 35 additions and 0 deletions
|
|
@ -128,6 +128,35 @@ curl http://localhost:8000/v1/completions \
|
||||||
}' &
|
}' &
|
||||||
```
|
```
|
||||||
|
|
||||||
|
##### Image input
|
||||||
|
|
||||||
|
image input only supports [MiniCPM-V-2_6](https://huggingface.co/openbmb/MiniCPM-V-2_6)now.
|
||||||
|
```bash
|
||||||
|
curl http://localhost:8000/v1/chat/completions \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d '{
|
||||||
|
"model": "MiniCPM-V-2_6",
|
||||||
|
"messages": [
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": [
|
||||||
|
{
|
||||||
|
"type": "text",
|
||||||
|
"text": "图片里有什么?"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "image_url",
|
||||||
|
"image_url": {
|
||||||
|
"url": "http://farm6.staticflickr.com/5268/5602445367_3504763978_z.jpg"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"max_tokens": 128
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
#### Tensor parallel
|
#### Tensor parallel
|
||||||
|
|
||||||
> Note: We recommend to use docker for tensor parallel deployment.
|
> Note: We recommend to use docker for tensor parallel deployment.
|
||||||
|
|
|
||||||
|
|
@ -102,6 +102,12 @@ def get_load_function(low_bit):
|
||||||
modules = ["35.mlp", "36.mlp", "37.mlp", "38.mlp", "39.mlp"]
|
modules = ["35.mlp", "36.mlp", "37.mlp", "38.mlp", "39.mlp"]
|
||||||
else:
|
else:
|
||||||
modules = None
|
modules = None
|
||||||
|
if "minicpm" in self.model_config.model.lower():
|
||||||
|
modules = ["vpm", "resampler"]
|
||||||
|
# only for minicpm_2_6
|
||||||
|
if "minicpm-v" in self.model_config.model.lower():
|
||||||
|
from ipex_llm.transformers.models.minicpmv import merge_qkv
|
||||||
|
self.model.vpm.apply(merge_qkv)
|
||||||
optimize_model(self.model, low_bit=low_bit, torch_dtype=self.model_config.dtype,
|
optimize_model(self.model, low_bit=low_bit, torch_dtype=self.model_config.dtype,
|
||||||
modules_to_not_convert=modules)
|
modules_to_not_convert=modules)
|
||||||
self.model = self.model.to(device=self.device_config.device,
|
self.model = self.model.to(device=self.device_config.device,
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue