Using original fastchat and add bigdl worker in docker image (#9967)

* add vllm worker * add options in entrypoint
2024-01-23 14:17:05 +08:00 · 2024-01-23 14:17:05 +08:00 · 052962dfa5
commit 052962dfa5
parent 301425e377
4 changed files with 126 additions and 94 deletions
--- a/docker/llm/serving/cpu/docker/Dockerfile
+++ b/docker/llm/serving/cpu/docker/Dockerfile
@ -11,11 +11,7 @@ COPY ./entrypoint.sh /opt/entrypoint.sh
 ADD  https://github.com/krallin/tini/releases/download/${TINI_VERSION}/tini /sbin/tini
 # Install Serving Dependencies
 RUN cd /llm && \
-    git clone https://github.com/analytics-zoo/FastChat.git && \
-    cd FastChat && \
-    git checkout dev-2023-09-22 && \
-    pip3 install -e ".[model_worker,webui]" && \
-    cd /llm && \
+    pip install --pre --upgrade bigdl-llm[serving] && \
    chmod +x /opt/entrypoint.sh && \
    chmod +x /sbin/tini && \
    cp /sbin/tini /usr/bin/tini
--- a/docker/llm/serving/cpu/docker/entrypoint.sh
+++ b/docker/llm/serving/cpu/docker/entrypoint.sh
@ -134,9 +134,9 @@ else
  done

  if [ "$worker_type" == "model_worker" ]; then
-      worker_type="fastchat.serve.model_worker"
+      worker_type="bigdl.llm.serving.model_worker"
  elif [ "$worker_type" == "vllm_worker" ]; then
-      worker_type="fastchat.serve.vllm_worker"
+      worker_type="bigdl.llm.serving.vllm_worker"
  fi

  if [[ -n $CONTROLLER_HOST ]]; then
@ -213,9 +213,9 @@ else
    echo "Worker type: $worker_type"
    echo "Worker address: $worker_address"
    echo "Controller address: $controller_address"
-    if [ "$worker_type" == "fastchat.serve.model_worker" ]; then
+    if [ "$worker_type" == "bigdl.llm.serving.model_worker" ]; then
      python3 -m "$worker_type" --model-path $model_path --device cpu --host $worker_host --port $worker_port --worker-address $worker_address --controller-address $controller_address --stream-interval $stream_interval
-    elif [ "$worker_type" == "fastchat.serve.vllm_worker" ]; then
+    elif [ "$worker_type" == "bigdl.llm.serving.vllm_worker" ]; then
      python3 -m "$worker_type" --model-path $model_path --device cpu --host $worker_host --port $worker_port --worker-address $worker_address --controller-address $controller_address
    fi
  fi
--- a/docker/llm/serving/xpu/docker/Dockerfile
+++ b/docker/llm/serving/xpu/docker/Dockerfile
@ -10,11 +10,7 @@ COPY ./entrypoint.sh /opt/entrypoint.sh

 # Install Serving Dependencies
 RUN cd /llm && \
-    git clone https://github.com/analytics-zoo/FastChat.git && \
-    cd FastChat && \
-    git checkout dev-2023-09-22 && \
-    pip3 install -e ".[model_worker,webui]" && \
-    cd /llm && \
+    pip install --pre --upgrade bigdl-llm[serving] && \
    chmod +x /opt/entrypoint.sh


--- a/docker/llm/serving/xpu/docker/entrypoint.sh
+++ b/docker/llm/serving/xpu/docker/entrypoint.sh
@ -1,17 +1,17 @@
 #!/bin/bash

 usage() {
-  echo "Usage: $0 [--service-model-path <service model path>] [--help]"
-  echo "--help: Print help message."
-  echo "--service-model-path: set model path for model worker"
-  echo "The following environment variables can be set."
-  echo "CONTROLLER_HOST (default: localhost)."
-  echo "CONTROLLER_PORT (default: 21001)."
-  echo "WORKER_HOST (default: localhost)."
-  echo "WORKER_PORT (default: 21002)."
-  echo "API_HOST (default: localhost)."
-  echo "API_PORT (default: 8000)."
-  exit 1
+    echo "Usage: $0 [-m --mode <controller|worker>] [-w --worker <model_worker|vllm_worker>] [--help]"
+    echo "--help: Print help message."
+    echo "The following environment variables can be set."
+    echo "MODEL_PATH (default: empty)."
+    echo "CONTROLLER_HOST (default: localhost)."
+    echo "CONTROLLER_PORT (default: 21001)."
+    echo "WORKER_HOST (default: localhost)."
+    echo "WORKER_PORT (default: 21002)."
+    echo "API_HOST (default: localhost)."
+    echo "API_PORT (default: 8000)."
+    exit 1
 }

 # Default values
@ -21,79 +21,119 @@ worker_host="localhost"
 worker_port="21002"
 api_host="localhost"
 api_port="8000"
-service_model_path=""
+model_path=""
+mode=""
+dispatch_method="shortest_queue" # shortest_queue or lottery
+stream_interval=1
+worker_type="model_worker"

 # We do not have any arguments, just run bash
 if [ "$#" == 0 ]; then
-  echo "[INFO] no command is passed in"
-  echo "[INFO] enter pass-through mode"
-  exec /usr/bin/bash -s -- "bash"
+    echo "[INFO] no command is passed in"
+    echo "[INFO] enter pass-through mode"
+    exec /usr/bin/bash -s -- "bash"
 else
-  # Parse command-line options
-  options=$(getopt -o "" --long "service-model-path:,help" -n "$0" -- "$@")
-  if [ $? != 0 ]; then
-    usage
-  fi
-  eval set -- "$options"
-
-  while true; do
-    case "$1" in
-    --service-model-path)
-        service_model_path="$2"
-        shift 2
-        ;;
-    --help)
-      usage
-      ;;
-    --)
-      shift
-      break
-      ;;
-    *)
-      usage
-      ;;
-    esac
-  done
-
-  if [[ -n $CONTROLLER_HOST ]]; then
-    controller_host=$CONTROLLER_HOST
-  fi
-
-  if [[ -n $CONTROLLER_PORT ]]; then
-    controller_port=$CONTROLLER_PORT
-  fi
-
-  if [[ -n $WORKER_HOST ]]; then
-    worker_host=$WORKER_HOST
-  fi
-
-  if [[ -n $WORKER_PORT ]]; then
-    worker_port=$WORKER_PORT
-  fi
-
-  if [[ -n $API_HOST ]]; then
-    api_host=$API_HOST
-  fi
-
-  if [[ -n $API_PORT ]]; then
-    api_port=$API_PORT
-  fi
-
-  controller_address="http://$controller_host:$controller_port"
-  worker_address="http://$worker_host:$worker_port"
-  api_address="http://$api_host:$api_port"
-
-  unset http_proxy
-  unset https_proxy
-
-  python3 -m fastchat.serve.controller --host $controller_host --port $controller_port &
-  python3 -m bigdl.llm.serving.model_worker --model-path $service_model_path --device xpu --host $worker_host --port $worker_port --worker-address $worker_address --controller-address $controller_address &
-  python3 -m fastchat.serve.openai_api_server --host $api_host --port $api_port --controller-address $controller_address &
-
-  echo "Controller address: $controller_address"
-  echo "Worker address: $worker_address"
-  echo "OpenAI API address: $api_address"
-
+    # Parse command-line options
+    options=$(getopt -o "m:hw:" --long "mode:,help,worker:" -n "$0" -- "$@")
+    if [ $? != 0 ]; then
+        usage
+    fi
+    eval set -- "$options"
+    
+    while true; do
+        case "$1" in
+            -m|--mode)
+                mode="$2"
+                [[ $mode == "controller" || $mode == "worker" ]] || usage
+                shift 2
+            ;;
+            -w|--worker)
+                worker_type="$2"
+                [[ $worker_type == "model_worker" || $worker_type == "vllm_worker" ]] || usage
+                shift 2
+            ;;
+            -h|--help)
+                usage
+            ;;
+            --)
+                shift
+                break
+            ;;
+            *)
+                usage
+            ;;
+        esac
+    done
+    
+    if [ "$worker_type" == "model_worker" ]; then
+        worker_type="bigdl.llm.serving.model_worker"
+    elif [ "$worker_type" == "vllm_worker" ]; then
+        worker_type="bigdl.llm.serving.vllm_worker"
+    fi
+    
+    if [[ -n $CONTROLLER_HOST ]]; then
+        controller_host=$CONTROLLER_HOST
+    fi
+    
+    if [[ -n $CONTROLLER_PORT ]]; then
+        controller_port=$CONTROLLER_PORT
+    fi
+    
+    if [[ -n $WORKER_HOST ]]; then
+        worker_host=$WORKER_HOST
+    fi
+    
+    if [[ -n $WORKER_PORT ]]; then
+        worker_port=$WORKER_PORT
+    fi
+    
+    if [[ -n $MODEL_PATH ]]; then
+        model_path=$MODEL_PATH
+    fi
+    
+    if [[ -n $API_HOST ]]; then
+        api_host=$API_HOST
+    fi
+    
+    if [[ -n $API_PORT ]]; then
+        api_port=$API_PORT
+    fi
+    
+    if [[ -n $DISPATCH_METHOD ]]; then
+        dispatch_method=$DISPATCH_METHOD
+    fi
+    
+    if [[ -n $STREAM_INTERVAL ]]; then
+        stream_interval=$STREAM_INTERVAL
+    fi
+    
+    controller_address="http://$controller_host:$controller_port"
+    
+    unset http_proxy
+    unset https_proxy
+    
+    if [[ $mode == "controller" ]]; then
+        
+        api_address="http://$api_host:$api_port"
+        echo "Controller address: $controller_address"
+        echo "OpenAI API address: $api_address"
+        python3 -m fastchat.serve.controller --host $controller_host --port $controller_port --dispatch-method $dispatch_method &
+        python3 -m fastchat.serve.openai_api_server --host $api_host --port $api_port --controller-address $controller_address
+        
+    else
+        
+        worker_address="http://$worker_host:$worker_port"
+        echo "Worker type: $worker_type"
+        echo "Worker address: $worker_address"
+        echo "Controller address: $controller_address"
+        
+        if [ "$worker_type" == "bigdl.llm.serving.model_worker" ]; then
+            python3 -m "$worker_type" --model-path $model_path --device xpu --host $worker_host --port $worker_port --worker-address $worker_address --controller-address $controller_address --stream-interval $stream_interval
+        elif [ "$worker_type" == "bigdl.llm.serving.vllm_worker" ]; then
+            python3 -m "$worker_type" --model-path $model_path --device xpu --host $worker_host --port $worker_port --worker-address $worker_address --controller-address $controller_address
+        fi
+    fi
+    
 fi

 exec /usr/bin/bash -s -- "bash"