add gradio_web_ui to llm-serving image (#9918)
This commit is contained in:
parent
99ff6cf048
commit
51aa8b62b2
3 changed files with 36 additions and 5 deletions
|
|
@ -76,6 +76,7 @@ calculate_total_cores() {
|
||||||
# Default values
|
# Default values
|
||||||
controller_host="localhost"
|
controller_host="localhost"
|
||||||
controller_port="21001"
|
controller_port="21001"
|
||||||
|
gradio_port="8002"
|
||||||
api_host="localhost"
|
api_host="localhost"
|
||||||
api_port="8000"
|
api_port="8000"
|
||||||
worker_host="localhost"
|
worker_host="localhost"
|
||||||
|
|
@ -155,6 +156,10 @@ else
|
||||||
api_port=$API_PORT
|
api_port=$API_PORT
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
if [[ -n $GRADIO_PORT ]]; then
|
||||||
|
gradio_port=$GRADIO_PORT
|
||||||
|
fi
|
||||||
|
|
||||||
if [[ -n $WORKER_HOST ]]; then
|
if [[ -n $WORKER_HOST ]]; then
|
||||||
worker_host=$WORKER_HOST
|
worker_host=$WORKER_HOST
|
||||||
fi
|
fi
|
||||||
|
|
@ -185,7 +190,9 @@ else
|
||||||
echo "OpenAI API address: $api_address"
|
echo "OpenAI API address: $api_address"
|
||||||
python3 -m fastchat.serve.controller --host $controller_host --port $controller_port --dispatch-method $dispatch_method &
|
python3 -m fastchat.serve.controller --host $controller_host --port $controller_port --dispatch-method $dispatch_method &
|
||||||
# Boot openai api server
|
# Boot openai api server
|
||||||
python3 -m fastchat.serve.openai_api_server --host $api_host --port $api_port --controller-address $controller_address
|
python3 -m fastchat.serve.openai_api_server --host $api_host --port $api_port --controller-address $controller_address &
|
||||||
|
# Boot gradio_web_server
|
||||||
|
python3 -m fastchat.serve.gradio_web_server --host $controller_host --port $gradio_port --controller-url $controller_address --model-list-mode reload
|
||||||
else
|
else
|
||||||
# Logic for non-controller(worker) mode
|
# Logic for non-controller(worker) mode
|
||||||
worker_address="http://$worker_host:$worker_port"
|
worker_address="http://$worker_host:$worker_port"
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,5 @@
|
||||||
## Deployment bigdl-llm serving service in K8S environment
|
## Deployment bigdl-llm serving service in K8S environment
|
||||||
|
|
||||||
|
|
||||||
## Image
|
## Image
|
||||||
|
|
||||||
To deploy BigDL-LLM-serving cpu in Kubernetes environment, please use this image: `intelanalytics/bigdl-llm-serving-cpu:2.5.0-SNAPSHOT`
|
To deploy BigDL-LLM-serving cpu in Kubernetes environment, please use this image: `intelanalytics/bigdl-llm-serving-cpu:2.5.0-SNAPSHOT`
|
||||||
|
|
@ -36,7 +35,9 @@ The entrypoint of the image will try to set `OMP_NUM_THREADS` to the correct num
|
||||||
If you want to use the vllm AsyncLLMEngine for serving, you should set the args -w vllm_worker in worker part of deployment.yaml.
|
If you want to use the vllm AsyncLLMEngine for serving, you should set the args -w vllm_worker in worker part of deployment.yaml.
|
||||||
|
|
||||||
### PersistentVolume
|
### PersistentVolume
|
||||||
|
|
||||||
We use the following yaml file for PersistentVolume deployment:
|
We use the following yaml file for PersistentVolume deployment:
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
apiVersion: v1
|
apiVersion: v1
|
||||||
kind: PersistentVolume
|
kind: PersistentVolume
|
||||||
|
|
@ -55,6 +56,7 @@ spec:
|
||||||
server: YOUR_NFS_SERVER
|
server: YOUR_NFS_SERVER
|
||||||
|
|
||||||
```
|
```
|
||||||
|
|
||||||
Then you should upload model to `YOUR_NFS_PATH`
|
Then you should upload model to `YOUR_NFS_PATH`
|
||||||
|
|
||||||
### Controller
|
### Controller
|
||||||
|
|
@ -84,6 +86,8 @@ spec:
|
||||||
value: "0.0.0.0"
|
value: "0.0.0.0"
|
||||||
- name: API_PORT # fixed
|
- name: API_PORT # fixed
|
||||||
value: "8000"
|
value: "8000"
|
||||||
|
- name: "GRADIO_PORT" # You can change this port
|
||||||
|
value: "8002"
|
||||||
ports:
|
ports:
|
||||||
- containerPort: 21005
|
- containerPort: 21005
|
||||||
name: con-port
|
name: con-port
|
||||||
|
|
@ -181,6 +185,15 @@ spec:
|
||||||
|
|
||||||
You may want to change the `MODEL_PATH` variable in the yaml. Also, please remember to change the volume path accordingly.
|
You may want to change the `MODEL_PATH` variable in the yaml. Also, please remember to change the volume path accordingly.
|
||||||
|
|
||||||
|
### Use gradio_web_ui
|
||||||
|
|
||||||
|
We have set port using `GRADIO_PORT` envrionment variable in `deployment.yaml`, you can use this command
|
||||||
|
|
||||||
|
```bash
|
||||||
|
k port-forward bigdl-fschat-a1234bd-controller --address 0.0.0.0 8002:8002
|
||||||
|
```
|
||||||
|
|
||||||
|
Then visit http://YOUR_HOST_IP:8002 to access ui.
|
||||||
|
|
||||||
### Testing
|
### Testing
|
||||||
|
|
||||||
|
|
@ -191,11 +204,13 @@ If you need to access the serving on host , you can use `kubectl get nodes -o wi
|
||||||
#### Using openai-python
|
#### Using openai-python
|
||||||
|
|
||||||
First, install openai-python:
|
First, install openai-python:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
pip install --upgrade openai
|
pip install --upgrade openai
|
||||||
```
|
```
|
||||||
|
|
||||||
Then, interact with model vicuna-7b-v1.5-bigdl:
|
Then, interact with model vicuna-7b-v1.5-bigdl:
|
||||||
|
|
||||||
```python
|
```python
|
||||||
import openai
|
import openai
|
||||||
openai.api_key = "EMPTY"
|
openai.api_key = "EMPTY"
|
||||||
|
|
@ -219,9 +234,11 @@ print(completion.choices[0].message.content)
|
||||||
```
|
```
|
||||||
|
|
||||||
#### cURL
|
#### cURL
|
||||||
|
|
||||||
cURL is another good tool for observing the output of the api.
|
cURL is another good tool for observing the output of the api.
|
||||||
|
|
||||||
Before using cURL, you should set your `http_proxy` and `https_proxy` to empty
|
Before using cURL, you should set your `http_proxy` and `https_proxy` to empty
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
export http_proxy=
|
export http_proxy=
|
||||||
export https_proxy=
|
export https_proxy=
|
||||||
|
|
@ -230,16 +247,19 @@ export https_proxy=
|
||||||
For the following examples, you may also change the service deployment address.
|
For the following examples, you may also change the service deployment address.
|
||||||
|
|
||||||
List Models:
|
List Models:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
curl http://localhost:8000/v1/models
|
curl http://localhost:8000/v1/models
|
||||||
```
|
```
|
||||||
|
|
||||||
If you have `jq` installed, you can use it to format the output like this:
|
If you have `jq` installed, you can use it to format the output like this:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
curl http://localhost:8000/v1/models | jq
|
curl http://localhost:8000/v1/models | jq
|
||||||
```
|
```
|
||||||
|
|
||||||
Chat Completions:
|
Chat Completions:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
curl http://localhost:8000/v1/chat/completions \
|
curl http://localhost:8000/v1/chat/completions \
|
||||||
-H "Content-Type: application/json" \
|
-H "Content-Type: application/json" \
|
||||||
|
|
@ -250,6 +270,7 @@ curl http://localhost:8000/v1/chat/completions \
|
||||||
```
|
```
|
||||||
|
|
||||||
Text Completions:
|
Text Completions:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
curl http://localhost:8000/v1/completions \
|
curl http://localhost:8000/v1/completions \
|
||||||
-H "Content-Type: application/json" \
|
-H "Content-Type: application/json" \
|
||||||
|
|
@ -262,6 +283,7 @@ curl http://localhost:8000/v1/completions \
|
||||||
```
|
```
|
||||||
|
|
||||||
Embeddings:
|
Embeddings:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
curl http://localhost:8000/v1/embeddings \
|
curl http://localhost:8000/v1/embeddings \
|
||||||
-H "Content-Type: application/json" \
|
-H "Content-Type: application/json" \
|
||||||
|
|
|
||||||
|
|
@ -35,6 +35,8 @@ spec:
|
||||||
value: "0.0.0.0"
|
value: "0.0.0.0"
|
||||||
- name: API_PORT # fixed
|
- name: API_PORT # fixed
|
||||||
value: "8000"
|
value: "8000"
|
||||||
|
- name: "GRADIO_PORT" # You can change this port
|
||||||
|
value: "8002"
|
||||||
ports:
|
ports:
|
||||||
- containerPort: 21005
|
- containerPort: 21005
|
||||||
name: con-port
|
name: con-port
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue