From 74a8ad32dc6f58342a06d1e7530195126578d951 Mon Sep 17 00:00:00 2001 From: Lilac09 <74996885+Zhengjin-Wang@users.noreply.github.com> Date: Thu, 2 Nov 2023 16:31:07 +0800 Subject: [PATCH] Add entry point to llm-serving-xpu (#9339) * add entry point to llm-serving-xpu * manually build * manually build * add entry point to llm-serving-xpu * manually build * add entry point to llm-serving-xpu * add entry point to llm-serving-xpu * add entry point to llm-serving-xpu --- docker/llm/README.md | 5 +- docker/llm/serving/xpu/docker/Dockerfile | 6 +- docker/llm/serving/xpu/docker/entrypoint.sh | 99 +++++++++++++++++++++ 3 files changed, 108 insertions(+), 2 deletions(-) create mode 100644 docker/llm/serving/xpu/docker/entrypoint.sh diff --git a/docker/llm/README.md b/docker/llm/README.md index b155a319..292e17d7 100644 --- a/docker/llm/README.md +++ b/docker/llm/README.md @@ -279,6 +279,7 @@ An example could be: export DOCKER_IMAGE=intelanalytics/bigdl-llm-serving-cpu:2.4.0-SNAPSHOT export CONTAINER_NAME=my_container export MODEL_PATH=/llm/models[change to your model path] +export SERVICE_MODEL_PATH=/llm/models/chatglm2-6b[a specified model path for running service] docker run -itd \ --net=host \ @@ -287,8 +288,10 @@ docker run -itd \ --name=$CONTAINER_NAME \ --shm-size="16g" \ -v $MODEL_PATH:/llm/models \ - $DOCKER_IMAGE + -e SERVICE_MODEL_PATH=$SERVICE_MODEL_PATH \ + $DOCKER_IMAGE --service-model-path $SERVICE_MODEL_PATH ``` +You can assign specified model path to service-model-path to run the service while booting the container. Also you can manually run the service after entering container. Run `/opt/entrypoint.sh --help` in container to see more information. There are steps below describe how to run service in details as well. To verify the device is successfully mapped into the container, run `sycl-ls` to check the result. In a machine with Arc A770, the sampled output is: diff --git a/docker/llm/serving/xpu/docker/Dockerfile b/docker/llm/serving/xpu/docker/Dockerfile index 2dc57e72..829778d6 100644 --- a/docker/llm/serving/xpu/docker/Dockerfile +++ b/docker/llm/serving/xpu/docker/Dockerfile @@ -6,13 +6,17 @@ ARG https_proxy # Disable pip's cache behavior ARG PIP_NO_CACHE_DIR=false +COPY ./entrypoint.sh /opt/entrypoint.sh + # Install Serving Dependencies RUN cd /llm && \ git clone https://github.com/analytics-zoo/FastChat.git && \ cd FastChat && \ git checkout dev-2023-09-22 && \ pip3 install -e ".[model_worker,webui]" && \ - cd /llm + cd /llm && \ + chmod +x /opt/entrypoint.sh WORKDIR /llm/ +ENTRYPOINT [ "/opt/entrypoint.sh" ] diff --git a/docker/llm/serving/xpu/docker/entrypoint.sh b/docker/llm/serving/xpu/docker/entrypoint.sh new file mode 100644 index 00000000..92ea43c5 --- /dev/null +++ b/docker/llm/serving/xpu/docker/entrypoint.sh @@ -0,0 +1,99 @@ +#!/bin/bash + +usage() { + echo "Usage: $0 [--service-model-path ] [--help]" + echo "--help: Print help message." + echo "--service-model-path: set model path for model worker" + echo "The following environment variables can be set." + echo "CONTROLLER_HOST (default: localhost)." + echo "CONTROLLER_PORT (default: 21001)." + echo "WORKER_HOST (default: localhost)." + echo "WORKER_PORT (default: 21002)." + echo "API_HOST (default: localhost)." + echo "API_PORT (default: 8000)." + exit 1 +} + +# Default values +controller_host="localhost" +controller_port="21001" +worker_host="localhost" +worker_port="21002" +api_host="localhost" +api_port="8000" +service_model_path="" + +# We do not have any arguments, just run bash +if [ "$#" == 0 ]; then + echo "[INFO] no command is passed in" + echo "[INFO] enter pass-through mode" + exec /usr/bin/bash -s -- "bash" +else + # Parse command-line options + options=$(getopt -o "" --long "service-model-path:,help" -n "$0" -- "$@") + if [ $? != 0 ]; then + usage + fi + eval set -- "$options" + + while true; do + case "$1" in + --service-model-path) + service_model_path="$2" + shift 2 + ;; + --help) + usage + ;; + --) + shift + break + ;; + *) + usage + ;; + esac + done + + if [[ -n $CONTROLLER_HOST ]]; then + controller_host=$CONTROLLER_HOST + fi + + if [[ -n $CONTROLLER_PORT ]]; then + controller_port=$CONTROLLER_PORT + fi + + if [[ -n $WORKER_HOST ]]; then + worker_host=$WORKER_HOST + fi + + if [[ -n $WORKER_PORT ]]; then + worker_port=$WORKER_PORT + fi + + if [[ -n $API_HOST ]]; then + api_host=$API_HOST + fi + + if [[ -n $API_PORT ]]; then + api_port=$API_PORT + fi + + controller_address="http://$controller_host:$controller_port" + worker_address="http://$worker_host:$worker_port" + api_address="http://$api_host:$api_port" + + unset http_proxy + unset https_proxy + + python3 -m fastchat.serve.controller --host $controller_host --port $controller_port & + python3 -m bigdl.llm.serving.model_worker --model-path $service_model_path --device xpu --host $worker_host --port $worker_port --worker-address $worker_address --controller-address $controller_address & + python3 -m fastchat.serve.openai_api_server --host $api_host --port $api_port --controller-address $controller_address & + + echo "Controller address: $controller_address" + echo "Worker address: $worker_address" + echo "OpenAI API address: $api_address" + +fi + +exec /usr/bin/bash -s -- "bash"