Add entry point to llm-serving-xpu (#9339)
* add entry point to llm-serving-xpu * manually build * manually build * add entry point to llm-serving-xpu * manually build * add entry point to llm-serving-xpu * add entry point to llm-serving-xpu * add entry point to llm-serving-xpu
This commit is contained in:
		
							parent
							
								
									4df66f5cbc
								
							
						
					
					
						commit
						74a8ad32dc
					
				
					 3 changed files with 108 additions and 2 deletions
				
			
		| 
						 | 
					@ -279,6 +279,7 @@ An example could be:
 | 
				
			||||||
export DOCKER_IMAGE=intelanalytics/bigdl-llm-serving-cpu:2.4.0-SNAPSHOT
 | 
					export DOCKER_IMAGE=intelanalytics/bigdl-llm-serving-cpu:2.4.0-SNAPSHOT
 | 
				
			||||||
export CONTAINER_NAME=my_container
 | 
					export CONTAINER_NAME=my_container
 | 
				
			||||||
export MODEL_PATH=/llm/models[change to your model path]
 | 
					export MODEL_PATH=/llm/models[change to your model path]
 | 
				
			||||||
 | 
					export SERVICE_MODEL_PATH=/llm/models/chatglm2-6b[a specified model path for running service]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
docker run -itd \
 | 
					docker run -itd \
 | 
				
			||||||
    --net=host \
 | 
					    --net=host \
 | 
				
			||||||
| 
						 | 
					@ -287,8 +288,10 @@ docker run -itd \
 | 
				
			||||||
    --name=$CONTAINER_NAME \
 | 
					    --name=$CONTAINER_NAME \
 | 
				
			||||||
    --shm-size="16g" \
 | 
					    --shm-size="16g" \
 | 
				
			||||||
    -v $MODEL_PATH:/llm/models \
 | 
					    -v $MODEL_PATH:/llm/models \
 | 
				
			||||||
    $DOCKER_IMAGE
 | 
					    -e SERVICE_MODEL_PATH=$SERVICE_MODEL_PATH \
 | 
				
			||||||
 | 
					    $DOCKER_IMAGE --service-model-path $SERVICE_MODEL_PATH
 | 
				
			||||||
```
 | 
					```
 | 
				
			||||||
 | 
					You can assign specified model path to service-model-path to run the service while booting the container. Also you can manually run the service after entering container. Run `/opt/entrypoint.sh --help` in container to see more information. There are steps below describe how to run service in details as well.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
To verify the device is successfully mapped into the container, run `sycl-ls` to check the result. In a machine with Arc A770, the sampled output is:
 | 
					To verify the device is successfully mapped into the container, run `sycl-ls` to check the result. In a machine with Arc A770, the sampled output is:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -6,13 +6,17 @@ ARG https_proxy
 | 
				
			||||||
# Disable pip's cache behavior
 | 
					# Disable pip's cache behavior
 | 
				
			||||||
ARG PIP_NO_CACHE_DIR=false
 | 
					ARG PIP_NO_CACHE_DIR=false
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					COPY ./entrypoint.sh /opt/entrypoint.sh
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# Install Serving Dependencies
 | 
					# Install Serving Dependencies
 | 
				
			||||||
RUN cd /llm && \
 | 
					RUN cd /llm && \
 | 
				
			||||||
    git clone https://github.com/analytics-zoo/FastChat.git && \
 | 
					    git clone https://github.com/analytics-zoo/FastChat.git && \
 | 
				
			||||||
    cd FastChat && \
 | 
					    cd FastChat && \
 | 
				
			||||||
    git checkout dev-2023-09-22 && \
 | 
					    git checkout dev-2023-09-22 && \
 | 
				
			||||||
    pip3 install -e ".[model_worker,webui]" && \
 | 
					    pip3 install -e ".[model_worker,webui]" && \
 | 
				
			||||||
    cd /llm
 | 
					    cd /llm && \
 | 
				
			||||||
 | 
					    chmod +x /opt/entrypoint.sh
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
WORKDIR /llm/
 | 
					WORKDIR /llm/
 | 
				
			||||||
 | 
					ENTRYPOINT [ "/opt/entrypoint.sh" ]
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
							
								
								
									
										99
									
								
								docker/llm/serving/xpu/docker/entrypoint.sh
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										99
									
								
								docker/llm/serving/xpu/docker/entrypoint.sh
									
									
									
									
									
										Normal file
									
								
							| 
						 | 
					@ -0,0 +1,99 @@
 | 
				
			||||||
 | 
					#!/bin/bash
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					usage() {
 | 
				
			||||||
 | 
					  echo "Usage: $0 [--service-model-path <service model path>] [--help]"
 | 
				
			||||||
 | 
					  echo "--help: Print help message."
 | 
				
			||||||
 | 
					  echo "--service-model-path: set model path for model worker"
 | 
				
			||||||
 | 
					  echo "The following environment variables can be set."
 | 
				
			||||||
 | 
					  echo "CONTROLLER_HOST (default: localhost)."
 | 
				
			||||||
 | 
					  echo "CONTROLLER_PORT (default: 21001)."
 | 
				
			||||||
 | 
					  echo "WORKER_HOST (default: localhost)."
 | 
				
			||||||
 | 
					  echo "WORKER_PORT (default: 21002)."
 | 
				
			||||||
 | 
					  echo "API_HOST (default: localhost)."
 | 
				
			||||||
 | 
					  echo "API_PORT (default: 8000)."
 | 
				
			||||||
 | 
					  exit 1
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# Default values
 | 
				
			||||||
 | 
					controller_host="localhost"
 | 
				
			||||||
 | 
					controller_port="21001"
 | 
				
			||||||
 | 
					worker_host="localhost"
 | 
				
			||||||
 | 
					worker_port="21002"
 | 
				
			||||||
 | 
					api_host="localhost"
 | 
				
			||||||
 | 
					api_port="8000"
 | 
				
			||||||
 | 
					service_model_path=""
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# We do not have any arguments, just run bash
 | 
				
			||||||
 | 
					if [ "$#" == 0 ]; then
 | 
				
			||||||
 | 
					  echo "[INFO] no command is passed in"
 | 
				
			||||||
 | 
					  echo "[INFO] enter pass-through mode"
 | 
				
			||||||
 | 
					  exec /usr/bin/bash -s -- "bash"
 | 
				
			||||||
 | 
					else
 | 
				
			||||||
 | 
					  # Parse command-line options
 | 
				
			||||||
 | 
					  options=$(getopt -o "" --long "service-model-path:,help" -n "$0" -- "$@")
 | 
				
			||||||
 | 
					  if [ $? != 0 ]; then
 | 
				
			||||||
 | 
					    usage
 | 
				
			||||||
 | 
					  fi
 | 
				
			||||||
 | 
					  eval set -- "$options"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  while true; do
 | 
				
			||||||
 | 
					    case "$1" in
 | 
				
			||||||
 | 
					    --service-model-path)
 | 
				
			||||||
 | 
					        service_model_path="$2"
 | 
				
			||||||
 | 
					        shift 2
 | 
				
			||||||
 | 
					        ;;
 | 
				
			||||||
 | 
					    --help)
 | 
				
			||||||
 | 
					      usage
 | 
				
			||||||
 | 
					      ;;
 | 
				
			||||||
 | 
					    --)
 | 
				
			||||||
 | 
					      shift
 | 
				
			||||||
 | 
					      break
 | 
				
			||||||
 | 
					      ;;
 | 
				
			||||||
 | 
					    *)
 | 
				
			||||||
 | 
					      usage
 | 
				
			||||||
 | 
					      ;;
 | 
				
			||||||
 | 
					    esac
 | 
				
			||||||
 | 
					  done
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if [[ -n $CONTROLLER_HOST ]]; then
 | 
				
			||||||
 | 
					    controller_host=$CONTROLLER_HOST
 | 
				
			||||||
 | 
					  fi
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if [[ -n $CONTROLLER_PORT ]]; then
 | 
				
			||||||
 | 
					    controller_port=$CONTROLLER_PORT
 | 
				
			||||||
 | 
					  fi
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if [[ -n $WORKER_HOST ]]; then
 | 
				
			||||||
 | 
					    worker_host=$WORKER_HOST
 | 
				
			||||||
 | 
					  fi
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if [[ -n $WORKER_PORT ]]; then
 | 
				
			||||||
 | 
					    worker_port=$WORKER_PORT
 | 
				
			||||||
 | 
					  fi
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if [[ -n $API_HOST ]]; then
 | 
				
			||||||
 | 
					    api_host=$API_HOST
 | 
				
			||||||
 | 
					  fi
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if [[ -n $API_PORT ]]; then
 | 
				
			||||||
 | 
					    api_port=$API_PORT
 | 
				
			||||||
 | 
					  fi
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  controller_address="http://$controller_host:$controller_port"
 | 
				
			||||||
 | 
					  worker_address="http://$worker_host:$worker_port"
 | 
				
			||||||
 | 
					  api_address="http://$api_host:$api_port"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  unset http_proxy
 | 
				
			||||||
 | 
					  unset https_proxy
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  python3 -m fastchat.serve.controller --host $controller_host --port $controller_port &
 | 
				
			||||||
 | 
					  python3 -m bigdl.llm.serving.model_worker --model-path $service_model_path --device xpu --host $worker_host --port $worker_port --worker-address $worker_address --controller-address $controller_address &
 | 
				
			||||||
 | 
					  python3 -m fastchat.serve.openai_api_server --host $api_host --port $api_port --controller-address $controller_address &
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  echo "Controller address: $controller_address"
 | 
				
			||||||
 | 
					  echo "Worker address: $worker_address"
 | 
				
			||||||
 | 
					  echo "OpenAI API address: $api_address"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					fi
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					exec /usr/bin/bash -s -- "bash"
 | 
				
			||||||
		Loading…
	
		Reference in a new issue