Add entry point to llm-serving-xpu (#9339)

* add entry point to llm-serving-xpu * manually build * manually build * add entry point to llm-serving-xpu * manually build * add entry point to llm-serving-xpu * add entry point to llm-serving-xpu * add entry point to llm-serving-xpu
2023-11-02 16:31:07 +08:00 · 2023-11-02 16:31:07 +08:00 · 74a8ad32dc
commit 74a8ad32dc
parent 4df66f5cbc
3 changed files with 108 additions and 2 deletions
--- a/docker/llm/README.md
+++ b/docker/llm/README.md
@ -279,6 +279,7 @@ An example could be:
 export DOCKER_IMAGE=intelanalytics/bigdl-llm-serving-cpu:2.4.0-SNAPSHOT
 export CONTAINER_NAME=my_container
 export MODEL_PATH=/llm/models[change to your model path]
+export SERVICE_MODEL_PATH=/llm/models/chatglm2-6b[a specified model path for running service]

 docker run -itd \
    --net=host \
@ -287,8 +288,10 @@ docker run -itd \
    --name=$CONTAINER_NAME \
    --shm-size="16g" \
    -v $MODEL_PATH:/llm/models \
-    $DOCKER_IMAGE
+    -e SERVICE_MODEL_PATH=$SERVICE_MODEL_PATH \
+    $DOCKER_IMAGE --service-model-path $SERVICE_MODEL_PATH
 ```
+You can assign specified model path to service-model-path to run the service while booting the container. Also you can manually run the service after entering container. Run `/opt/entrypoint.sh --help` in container to see more information. There are steps below describe how to run service in details as well.

 To verify the device is successfully mapped into the container, run `sycl-ls` to check the result. In a machine with Arc A770, the sampled output is:

--- a/docker/llm/serving/xpu/docker/Dockerfile
+++ b/docker/llm/serving/xpu/docker/Dockerfile
@ -6,13 +6,17 @@ ARG https_proxy
 # Disable pip's cache behavior
 ARG PIP_NO_CACHE_DIR=false

+COPY ./entrypoint.sh /opt/entrypoint.sh
+
 # Install Serving Dependencies
 RUN cd /llm && \
    git clone https://github.com/analytics-zoo/FastChat.git && \
    cd FastChat && \
    git checkout dev-2023-09-22 && \
    pip3 install -e ".[model_worker,webui]" && \
-    cd /llm
+    cd /llm && \
+    chmod +x /opt/entrypoint.sh


 WORKDIR /llm/
+ENTRYPOINT [ "/opt/entrypoint.sh" ]
--- a/docker/llm/serving/xpu/docker/entrypoint.sh
+++ b/docker/llm/serving/xpu/docker/entrypoint.sh
@ -0,0 +1,99 @@
+#!/bin/bash
+
+usage() {
+  echo "Usage: $0 [--service-model-path <service model path>] [--help]"
+  echo "--help: Print help message."
+  echo "--service-model-path: set model path for model worker"
+  echo "The following environment variables can be set."
+  echo "CONTROLLER_HOST (default: localhost)."
+  echo "CONTROLLER_PORT (default: 21001)."
+  echo "WORKER_HOST (default: localhost)."
+  echo "WORKER_PORT (default: 21002)."
+  echo "API_HOST (default: localhost)."
+  echo "API_PORT (default: 8000)."
+  exit 1
+}
+
+# Default values
+controller_host="localhost"
+controller_port="21001"
+worker_host="localhost"
+worker_port="21002"
+api_host="localhost"
+api_port="8000"
+service_model_path=""
+
+# We do not have any arguments, just run bash
+if [ "$#" == 0 ]; then
+  echo "[INFO] no command is passed in"
+  echo "[INFO] enter pass-through mode"
+  exec /usr/bin/bash -s -- "bash"
+else
+  # Parse command-line options
+  options=$(getopt -o "" --long "service-model-path:,help" -n "$0" -- "$@")
+  if [ $? != 0 ]; then
+    usage
+  fi
+  eval set -- "$options"
+
+  while true; do
+    case "$1" in
+    --service-model-path)
+        service_model_path="$2"
+        shift 2
+        ;;
+    --help)
+      usage
+      ;;
+    --)
+      shift
+      break
+      ;;
+    *)
+      usage
+      ;;
+    esac
+  done
+
+  if [[ -n $CONTROLLER_HOST ]]; then
+    controller_host=$CONTROLLER_HOST
+  fi
+
+  if [[ -n $CONTROLLER_PORT ]]; then
+    controller_port=$CONTROLLER_PORT
+  fi
+
+  if [[ -n $WORKER_HOST ]]; then
+    worker_host=$WORKER_HOST
+  fi
+
+  if [[ -n $WORKER_PORT ]]; then
+    worker_port=$WORKER_PORT
+  fi
+
+  if [[ -n $API_HOST ]]; then
+    api_host=$API_HOST
+  fi
+
+  if [[ -n $API_PORT ]]; then
+    api_port=$API_PORT
+  fi
+
+  controller_address="http://$controller_host:$controller_port"
+  worker_address="http://$worker_host:$worker_port"
+  api_address="http://$api_host:$api_port"
+
+  unset http_proxy
+  unset https_proxy
+
+  python3 -m fastchat.serve.controller --host $controller_host --port $controller_port &
+  python3 -m bigdl.llm.serving.model_worker --model-path $service_model_path --device xpu --host $worker_host --port $worker_port --worker-address $worker_address --controller-address $controller_address &
+  python3 -m fastchat.serve.openai_api_server --host $api_host --port $api_port --controller-address $controller_address &
+
+  echo "Controller address: $controller_address"
+  echo "Worker address: $worker_address"
+  echo "OpenAI API address: $api_address"
+
+fi
+
+exec /usr/bin/bash -s -- "bash"