add gradio_web_ui to llm-serving image (#9918)
This commit is contained in:
		
							parent
							
								
									99ff6cf048
								
							
						
					
					
						commit
						51aa8b62b2
					
				
					 3 changed files with 36 additions and 5 deletions
				
			
		| 
						 | 
					@ -76,6 +76,7 @@ calculate_total_cores() {
 | 
				
			||||||
# Default values
 | 
					# Default values
 | 
				
			||||||
controller_host="localhost"
 | 
					controller_host="localhost"
 | 
				
			||||||
controller_port="21001"
 | 
					controller_port="21001"
 | 
				
			||||||
 | 
					gradio_port="8002"
 | 
				
			||||||
api_host="localhost"
 | 
					api_host="localhost"
 | 
				
			||||||
api_port="8000"
 | 
					api_port="8000"
 | 
				
			||||||
worker_host="localhost"
 | 
					worker_host="localhost"
 | 
				
			||||||
| 
						 | 
					@ -155,6 +156,10 @@ else
 | 
				
			||||||
    api_port=$API_PORT
 | 
					    api_port=$API_PORT
 | 
				
			||||||
  fi
 | 
					  fi
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if [[ -n $GRADIO_PORT ]]; then
 | 
				
			||||||
 | 
					    gradio_port=$GRADIO_PORT
 | 
				
			||||||
 | 
					  fi
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  if [[ -n $WORKER_HOST ]]; then
 | 
					  if [[ -n $WORKER_HOST ]]; then
 | 
				
			||||||
    worker_host=$WORKER_HOST
 | 
					    worker_host=$WORKER_HOST
 | 
				
			||||||
  fi
 | 
					  fi
 | 
				
			||||||
| 
						 | 
					@ -185,7 +190,9 @@ else
 | 
				
			||||||
    echo "OpenAI API address: $api_address"
 | 
					    echo "OpenAI API address: $api_address"
 | 
				
			||||||
    python3 -m fastchat.serve.controller --host $controller_host --port $controller_port --dispatch-method $dispatch_method &
 | 
					    python3 -m fastchat.serve.controller --host $controller_host --port $controller_port --dispatch-method $dispatch_method &
 | 
				
			||||||
    # Boot openai api server
 | 
					    # Boot openai api server
 | 
				
			||||||
    python3 -m fastchat.serve.openai_api_server --host $api_host --port $api_port --controller-address $controller_address
 | 
					    python3 -m fastchat.serve.openai_api_server --host $api_host --port $api_port --controller-address $controller_address &
 | 
				
			||||||
 | 
					    # Boot gradio_web_server
 | 
				
			||||||
 | 
					    python3 -m fastchat.serve.gradio_web_server --host $controller_host --port $gradio_port --controller-url $controller_address --model-list-mode reload
 | 
				
			||||||
  else
 | 
					  else
 | 
				
			||||||
    # Logic for non-controller(worker) mode
 | 
					    # Logic for non-controller(worker) mode
 | 
				
			||||||
    worker_address="http://$worker_host:$worker_port"
 | 
					    worker_address="http://$worker_host:$worker_port"
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1,6 +1,5 @@
 | 
				
			||||||
## Deployment bigdl-llm serving service in K8S environment
 | 
					## Deployment bigdl-llm serving service in K8S environment
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					 | 
				
			||||||
## Image
 | 
					## Image
 | 
				
			||||||
 | 
					
 | 
				
			||||||
To deploy BigDL-LLM-serving cpu in Kubernetes environment, please use this image: `intelanalytics/bigdl-llm-serving-cpu:2.5.0-SNAPSHOT`
 | 
					To deploy BigDL-LLM-serving cpu in Kubernetes environment, please use this image: `intelanalytics/bigdl-llm-serving-cpu:2.5.0-SNAPSHOT`
 | 
				
			||||||
| 
						 | 
					@ -36,7 +35,9 @@ The entrypoint of the image will try to set `OMP_NUM_THREADS` to the correct num
 | 
				
			||||||
If you want to use the vllm AsyncLLMEngine for serving, you should set the args -w vllm_worker in worker part of deployment.yaml.
 | 
					If you want to use the vllm AsyncLLMEngine for serving, you should set the args -w vllm_worker in worker part of deployment.yaml.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
### PersistentVolume
 | 
					### PersistentVolume
 | 
				
			||||||
 | 
					
 | 
				
			||||||
We use the following yaml file for PersistentVolume deployment:
 | 
					We use the following yaml file for PersistentVolume deployment:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
```yaml
 | 
					```yaml
 | 
				
			||||||
apiVersion: v1
 | 
					apiVersion: v1
 | 
				
			||||||
kind: PersistentVolume
 | 
					kind: PersistentVolume
 | 
				
			||||||
| 
						 | 
					@ -55,6 +56,7 @@ spec:
 | 
				
			||||||
    server: YOUR_NFS_SERVER
 | 
					    server: YOUR_NFS_SERVER
 | 
				
			||||||
 | 
					
 | 
				
			||||||
```
 | 
					```
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Then you should upload model to `YOUR_NFS_PATH`
 | 
					Then you should upload model to `YOUR_NFS_PATH`
 | 
				
			||||||
 | 
					
 | 
				
			||||||
### Controller
 | 
					### Controller
 | 
				
			||||||
| 
						 | 
					@ -84,6 +86,8 @@ spec:
 | 
				
			||||||
      value: "0.0.0.0"
 | 
					      value: "0.0.0.0"
 | 
				
			||||||
    - name: API_PORT # fixed
 | 
					    - name: API_PORT # fixed
 | 
				
			||||||
      value: "8000"
 | 
					      value: "8000"
 | 
				
			||||||
 | 
					    - name: "GRADIO_PORT" # You can change this port
 | 
				
			||||||
 | 
					      value: "8002"
 | 
				
			||||||
    ports:
 | 
					    ports:
 | 
				
			||||||
      - containerPort: 21005
 | 
					      - containerPort: 21005
 | 
				
			||||||
        name: con-port
 | 
					        name: con-port
 | 
				
			||||||
| 
						 | 
					@ -181,6 +185,15 @@ spec:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
You may want to change the `MODEL_PATH` variable in the yaml.  Also, please remember to change the volume path accordingly.
 | 
					You may want to change the `MODEL_PATH` variable in the yaml.  Also, please remember to change the volume path accordingly.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					### Use gradio_web_ui
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					We have set port using `GRADIO_PORT` envrionment variable in `deployment.yaml`, you can use this command
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					```bash
 | 
				
			||||||
 | 
					k port-forward bigdl-fschat-a1234bd-controller --address 0.0.0.0 8002:8002
 | 
				
			||||||
 | 
					```
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Then visit http://YOUR_HOST_IP:8002 to access ui.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
### Testing
 | 
					### Testing
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -191,11 +204,13 @@ If you need to access the serving on host , you can use `kubectl get nodes -o wi
 | 
				
			||||||
#### Using openai-python
 | 
					#### Using openai-python
 | 
				
			||||||
 | 
					
 | 
				
			||||||
First, install openai-python:
 | 
					First, install openai-python:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
```bash
 | 
					```bash
 | 
				
			||||||
pip install --upgrade openai
 | 
					pip install --upgrade openai
 | 
				
			||||||
```
 | 
					```
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Then, interact with model vicuna-7b-v1.5-bigdl:
 | 
					Then, interact with model vicuna-7b-v1.5-bigdl:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
```python
 | 
					```python
 | 
				
			||||||
import openai
 | 
					import openai
 | 
				
			||||||
openai.api_key = "EMPTY"
 | 
					openai.api_key = "EMPTY"
 | 
				
			||||||
| 
						 | 
					@ -219,9 +234,11 @@ print(completion.choices[0].message.content)
 | 
				
			||||||
```
 | 
					```
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#### cURL
 | 
					#### cURL
 | 
				
			||||||
 | 
					
 | 
				
			||||||
cURL is another good tool for observing the output of the api.
 | 
					cURL is another good tool for observing the output of the api.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Before using cURL, you should set your `http_proxy` and `https_proxy` to empty
 | 
					Before using cURL, you should set your `http_proxy` and `https_proxy` to empty
 | 
				
			||||||
 | 
					
 | 
				
			||||||
```bash
 | 
					```bash
 | 
				
			||||||
export http_proxy=
 | 
					export http_proxy=
 | 
				
			||||||
export https_proxy=
 | 
					export https_proxy=
 | 
				
			||||||
| 
						 | 
					@ -230,16 +247,19 @@ export https_proxy=
 | 
				
			||||||
For the following examples, you may also change the service deployment address.
 | 
					For the following examples, you may also change the service deployment address.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
List Models:
 | 
					List Models:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
```bash
 | 
					```bash
 | 
				
			||||||
curl http://localhost:8000/v1/models
 | 
					curl http://localhost:8000/v1/models
 | 
				
			||||||
```
 | 
					```
 | 
				
			||||||
 | 
					
 | 
				
			||||||
If you have `jq` installed, you can use it to format the output like this:
 | 
					If you have `jq` installed, you can use it to format the output like this:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
```bash
 | 
					```bash
 | 
				
			||||||
curl http://localhost:8000/v1/models | jq
 | 
					curl http://localhost:8000/v1/models | jq
 | 
				
			||||||
```
 | 
					```
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Chat Completions:
 | 
					Chat Completions:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
```bash
 | 
					```bash
 | 
				
			||||||
curl http://localhost:8000/v1/chat/completions \
 | 
					curl http://localhost:8000/v1/chat/completions \
 | 
				
			||||||
  -H "Content-Type: application/json" \
 | 
					  -H "Content-Type: application/json" \
 | 
				
			||||||
| 
						 | 
					@ -250,6 +270,7 @@ curl http://localhost:8000/v1/chat/completions \
 | 
				
			||||||
```
 | 
					```
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Text Completions:
 | 
					Text Completions:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
```bash
 | 
					```bash
 | 
				
			||||||
curl http://localhost:8000/v1/completions \
 | 
					curl http://localhost:8000/v1/completions \
 | 
				
			||||||
  -H "Content-Type: application/json" \
 | 
					  -H "Content-Type: application/json" \
 | 
				
			||||||
| 
						 | 
					@ -262,6 +283,7 @@ curl http://localhost:8000/v1/completions \
 | 
				
			||||||
```
 | 
					```
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Embeddings:
 | 
					Embeddings:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
```bash
 | 
					```bash
 | 
				
			||||||
curl http://localhost:8000/v1/embeddings \
 | 
					curl http://localhost:8000/v1/embeddings \
 | 
				
			||||||
  -H "Content-Type: application/json" \
 | 
					  -H "Content-Type: application/json" \
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -35,6 +35,8 @@ spec:
 | 
				
			||||||
      value: "0.0.0.0"
 | 
					      value: "0.0.0.0"
 | 
				
			||||||
    - name: API_PORT # fixed
 | 
					    - name: API_PORT # fixed
 | 
				
			||||||
      value: "8000"
 | 
					      value: "8000"
 | 
				
			||||||
 | 
					    - name: "GRADIO_PORT" # You can change this port
 | 
				
			||||||
 | 
					      value: "8002"
 | 
				
			||||||
    ports:
 | 
					    ports:
 | 
				
			||||||
      - containerPort: 21005
 | 
					      - containerPort: 21005
 | 
				
			||||||
        name: con-port
 | 
					        name: con-port
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue