Using original fastchat and add bigdl worker in docker image (#9967)
* add vllm worker * add options in entrypoint
This commit is contained in:
parent
301425e377
commit
052962dfa5
4 changed files with 126 additions and 94 deletions
|
|
@ -11,11 +11,7 @@ COPY ./entrypoint.sh /opt/entrypoint.sh
|
|||
ADD https://github.com/krallin/tini/releases/download/${TINI_VERSION}/tini /sbin/tini
|
||||
# Install Serving Dependencies
|
||||
RUN cd /llm && \
|
||||
git clone https://github.com/analytics-zoo/FastChat.git && \
|
||||
cd FastChat && \
|
||||
git checkout dev-2023-09-22 && \
|
||||
pip3 install -e ".[model_worker,webui]" && \
|
||||
cd /llm && \
|
||||
pip install --pre --upgrade bigdl-llm[serving] && \
|
||||
chmod +x /opt/entrypoint.sh && \
|
||||
chmod +x /sbin/tini && \
|
||||
cp /sbin/tini /usr/bin/tini
|
||||
|
|
|
|||
|
|
@ -134,9 +134,9 @@ else
|
|||
done
|
||||
|
||||
if [ "$worker_type" == "model_worker" ]; then
|
||||
worker_type="fastchat.serve.model_worker"
|
||||
worker_type="bigdl.llm.serving.model_worker"
|
||||
elif [ "$worker_type" == "vllm_worker" ]; then
|
||||
worker_type="fastchat.serve.vllm_worker"
|
||||
worker_type="bigdl.llm.serving.vllm_worker"
|
||||
fi
|
||||
|
||||
if [[ -n $CONTROLLER_HOST ]]; then
|
||||
|
|
@ -213,9 +213,9 @@ else
|
|||
echo "Worker type: $worker_type"
|
||||
echo "Worker address: $worker_address"
|
||||
echo "Controller address: $controller_address"
|
||||
if [ "$worker_type" == "fastchat.serve.model_worker" ]; then
|
||||
if [ "$worker_type" == "bigdl.llm.serving.model_worker" ]; then
|
||||
python3 -m "$worker_type" --model-path $model_path --device cpu --host $worker_host --port $worker_port --worker-address $worker_address --controller-address $controller_address --stream-interval $stream_interval
|
||||
elif [ "$worker_type" == "fastchat.serve.vllm_worker" ]; then
|
||||
elif [ "$worker_type" == "bigdl.llm.serving.vllm_worker" ]; then
|
||||
python3 -m "$worker_type" --model-path $model_path --device cpu --host $worker_host --port $worker_port --worker-address $worker_address --controller-address $controller_address
|
||||
fi
|
||||
fi
|
||||
|
|
|
|||
|
|
@ -10,11 +10,7 @@ COPY ./entrypoint.sh /opt/entrypoint.sh
|
|||
|
||||
# Install Serving Dependencies
|
||||
RUN cd /llm && \
|
||||
git clone https://github.com/analytics-zoo/FastChat.git && \
|
||||
cd FastChat && \
|
||||
git checkout dev-2023-09-22 && \
|
||||
pip3 install -e ".[model_worker,webui]" && \
|
||||
cd /llm && \
|
||||
pip install --pre --upgrade bigdl-llm[serving] && \
|
||||
chmod +x /opt/entrypoint.sh
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -1,17 +1,17 @@
|
|||
#!/bin/bash
|
||||
|
||||
usage() {
|
||||
echo "Usage: $0 [--service-model-path <service model path>] [--help]"
|
||||
echo "--help: Print help message."
|
||||
echo "--service-model-path: set model path for model worker"
|
||||
echo "The following environment variables can be set."
|
||||
echo "CONTROLLER_HOST (default: localhost)."
|
||||
echo "CONTROLLER_PORT (default: 21001)."
|
||||
echo "WORKER_HOST (default: localhost)."
|
||||
echo "WORKER_PORT (default: 21002)."
|
||||
echo "API_HOST (default: localhost)."
|
||||
echo "API_PORT (default: 8000)."
|
||||
exit 1
|
||||
echo "Usage: $0 [-m --mode <controller|worker>] [-w --worker <model_worker|vllm_worker>] [--help]"
|
||||
echo "--help: Print help message."
|
||||
echo "The following environment variables can be set."
|
||||
echo "MODEL_PATH (default: empty)."
|
||||
echo "CONTROLLER_HOST (default: localhost)."
|
||||
echo "CONTROLLER_PORT (default: 21001)."
|
||||
echo "WORKER_HOST (default: localhost)."
|
||||
echo "WORKER_PORT (default: 21002)."
|
||||
echo "API_HOST (default: localhost)."
|
||||
echo "API_PORT (default: 8000)."
|
||||
exit 1
|
||||
}
|
||||
|
||||
# Default values
|
||||
|
|
@ -21,79 +21,119 @@ worker_host="localhost"
|
|||
worker_port="21002"
|
||||
api_host="localhost"
|
||||
api_port="8000"
|
||||
service_model_path=""
|
||||
model_path=""
|
||||
mode=""
|
||||
dispatch_method="shortest_queue" # shortest_queue or lottery
|
||||
stream_interval=1
|
||||
worker_type="model_worker"
|
||||
|
||||
# We do not have any arguments, just run bash
|
||||
if [ "$#" == 0 ]; then
|
||||
echo "[INFO] no command is passed in"
|
||||
echo "[INFO] enter pass-through mode"
|
||||
exec /usr/bin/bash -s -- "bash"
|
||||
echo "[INFO] no command is passed in"
|
||||
echo "[INFO] enter pass-through mode"
|
||||
exec /usr/bin/bash -s -- "bash"
|
||||
else
|
||||
# Parse command-line options
|
||||
options=$(getopt -o "" --long "service-model-path:,help" -n "$0" -- "$@")
|
||||
if [ $? != 0 ]; then
|
||||
usage
|
||||
fi
|
||||
eval set -- "$options"
|
||||
|
||||
while true; do
|
||||
case "$1" in
|
||||
--service-model-path)
|
||||
service_model_path="$2"
|
||||
shift 2
|
||||
;;
|
||||
--help)
|
||||
usage
|
||||
;;
|
||||
--)
|
||||
shift
|
||||
break
|
||||
;;
|
||||
*)
|
||||
usage
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
if [[ -n $CONTROLLER_HOST ]]; then
|
||||
controller_host=$CONTROLLER_HOST
|
||||
fi
|
||||
|
||||
if [[ -n $CONTROLLER_PORT ]]; then
|
||||
controller_port=$CONTROLLER_PORT
|
||||
fi
|
||||
|
||||
if [[ -n $WORKER_HOST ]]; then
|
||||
worker_host=$WORKER_HOST
|
||||
fi
|
||||
|
||||
if [[ -n $WORKER_PORT ]]; then
|
||||
worker_port=$WORKER_PORT
|
||||
fi
|
||||
|
||||
if [[ -n $API_HOST ]]; then
|
||||
api_host=$API_HOST
|
||||
fi
|
||||
|
||||
if [[ -n $API_PORT ]]; then
|
||||
api_port=$API_PORT
|
||||
fi
|
||||
|
||||
controller_address="http://$controller_host:$controller_port"
|
||||
worker_address="http://$worker_host:$worker_port"
|
||||
api_address="http://$api_host:$api_port"
|
||||
|
||||
unset http_proxy
|
||||
unset https_proxy
|
||||
|
||||
python3 -m fastchat.serve.controller --host $controller_host --port $controller_port &
|
||||
python3 -m bigdl.llm.serving.model_worker --model-path $service_model_path --device xpu --host $worker_host --port $worker_port --worker-address $worker_address --controller-address $controller_address &
|
||||
python3 -m fastchat.serve.openai_api_server --host $api_host --port $api_port --controller-address $controller_address &
|
||||
|
||||
echo "Controller address: $controller_address"
|
||||
echo "Worker address: $worker_address"
|
||||
echo "OpenAI API address: $api_address"
|
||||
|
||||
# Parse command-line options
|
||||
options=$(getopt -o "m:hw:" --long "mode:,help,worker:" -n "$0" -- "$@")
|
||||
if [ $? != 0 ]; then
|
||||
usage
|
||||
fi
|
||||
eval set -- "$options"
|
||||
|
||||
while true; do
|
||||
case "$1" in
|
||||
-m|--mode)
|
||||
mode="$2"
|
||||
[[ $mode == "controller" || $mode == "worker" ]] || usage
|
||||
shift 2
|
||||
;;
|
||||
-w|--worker)
|
||||
worker_type="$2"
|
||||
[[ $worker_type == "model_worker" || $worker_type == "vllm_worker" ]] || usage
|
||||
shift 2
|
||||
;;
|
||||
-h|--help)
|
||||
usage
|
||||
;;
|
||||
--)
|
||||
shift
|
||||
break
|
||||
;;
|
||||
*)
|
||||
usage
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
if [ "$worker_type" == "model_worker" ]; then
|
||||
worker_type="bigdl.llm.serving.model_worker"
|
||||
elif [ "$worker_type" == "vllm_worker" ]; then
|
||||
worker_type="bigdl.llm.serving.vllm_worker"
|
||||
fi
|
||||
|
||||
if [[ -n $CONTROLLER_HOST ]]; then
|
||||
controller_host=$CONTROLLER_HOST
|
||||
fi
|
||||
|
||||
if [[ -n $CONTROLLER_PORT ]]; then
|
||||
controller_port=$CONTROLLER_PORT
|
||||
fi
|
||||
|
||||
if [[ -n $WORKER_HOST ]]; then
|
||||
worker_host=$WORKER_HOST
|
||||
fi
|
||||
|
||||
if [[ -n $WORKER_PORT ]]; then
|
||||
worker_port=$WORKER_PORT
|
||||
fi
|
||||
|
||||
if [[ -n $MODEL_PATH ]]; then
|
||||
model_path=$MODEL_PATH
|
||||
fi
|
||||
|
||||
if [[ -n $API_HOST ]]; then
|
||||
api_host=$API_HOST
|
||||
fi
|
||||
|
||||
if [[ -n $API_PORT ]]; then
|
||||
api_port=$API_PORT
|
||||
fi
|
||||
|
||||
if [[ -n $DISPATCH_METHOD ]]; then
|
||||
dispatch_method=$DISPATCH_METHOD
|
||||
fi
|
||||
|
||||
if [[ -n $STREAM_INTERVAL ]]; then
|
||||
stream_interval=$STREAM_INTERVAL
|
||||
fi
|
||||
|
||||
controller_address="http://$controller_host:$controller_port"
|
||||
|
||||
unset http_proxy
|
||||
unset https_proxy
|
||||
|
||||
if [[ $mode == "controller" ]]; then
|
||||
|
||||
api_address="http://$api_host:$api_port"
|
||||
echo "Controller address: $controller_address"
|
||||
echo "OpenAI API address: $api_address"
|
||||
python3 -m fastchat.serve.controller --host $controller_host --port $controller_port --dispatch-method $dispatch_method &
|
||||
python3 -m fastchat.serve.openai_api_server --host $api_host --port $api_port --controller-address $controller_address
|
||||
|
||||
else
|
||||
|
||||
worker_address="http://$worker_host:$worker_port"
|
||||
echo "Worker type: $worker_type"
|
||||
echo "Worker address: $worker_address"
|
||||
echo "Controller address: $controller_address"
|
||||
|
||||
if [ "$worker_type" == "bigdl.llm.serving.model_worker" ]; then
|
||||
python3 -m "$worker_type" --model-path $model_path --device xpu --host $worker_host --port $worker_port --worker-address $worker_address --controller-address $controller_address --stream-interval $stream_interval
|
||||
elif [ "$worker_type" == "bigdl.llm.serving.vllm_worker" ]; then
|
||||
python3 -m "$worker_type" --model-path $model_path --device xpu --host $worker_host --port $worker_port --worker-address $worker_address --controller-address $controller_address
|
||||
fi
|
||||
fi
|
||||
|
||||
fi
|
||||
|
||||
exec /usr/bin/bash -s -- "bash"
|
||||
|
|
|
|||
Loading…
Reference in a new issue