Add entry point to llm-serving-xpu (#9339)
* add entry point to llm-serving-xpu * manually build * manually build * add entry point to llm-serving-xpu * manually build * add entry point to llm-serving-xpu * add entry point to llm-serving-xpu * add entry point to llm-serving-xpu
This commit is contained in:
parent
4df66f5cbc
commit
74a8ad32dc
3 changed files with 108 additions and 2 deletions
|
|
@ -279,6 +279,7 @@ An example could be:
|
||||||
export DOCKER_IMAGE=intelanalytics/bigdl-llm-serving-cpu:2.4.0-SNAPSHOT
|
export DOCKER_IMAGE=intelanalytics/bigdl-llm-serving-cpu:2.4.0-SNAPSHOT
|
||||||
export CONTAINER_NAME=my_container
|
export CONTAINER_NAME=my_container
|
||||||
export MODEL_PATH=/llm/models[change to your model path]
|
export MODEL_PATH=/llm/models[change to your model path]
|
||||||
|
export SERVICE_MODEL_PATH=/llm/models/chatglm2-6b[a specified model path for running service]
|
||||||
|
|
||||||
docker run -itd \
|
docker run -itd \
|
||||||
--net=host \
|
--net=host \
|
||||||
|
|
@ -287,8 +288,10 @@ docker run -itd \
|
||||||
--name=$CONTAINER_NAME \
|
--name=$CONTAINER_NAME \
|
||||||
--shm-size="16g" \
|
--shm-size="16g" \
|
||||||
-v $MODEL_PATH:/llm/models \
|
-v $MODEL_PATH:/llm/models \
|
||||||
$DOCKER_IMAGE
|
-e SERVICE_MODEL_PATH=$SERVICE_MODEL_PATH \
|
||||||
|
$DOCKER_IMAGE --service-model-path $SERVICE_MODEL_PATH
|
||||||
```
|
```
|
||||||
|
You can assign specified model path to service-model-path to run the service while booting the container. Also you can manually run the service after entering container. Run `/opt/entrypoint.sh --help` in container to see more information. There are steps below describe how to run service in details as well.
|
||||||
|
|
||||||
To verify the device is successfully mapped into the container, run `sycl-ls` to check the result. In a machine with Arc A770, the sampled output is:
|
To verify the device is successfully mapped into the container, run `sycl-ls` to check the result. In a machine with Arc A770, the sampled output is:
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -6,13 +6,17 @@ ARG https_proxy
|
||||||
# Disable pip's cache behavior
|
# Disable pip's cache behavior
|
||||||
ARG PIP_NO_CACHE_DIR=false
|
ARG PIP_NO_CACHE_DIR=false
|
||||||
|
|
||||||
|
COPY ./entrypoint.sh /opt/entrypoint.sh
|
||||||
|
|
||||||
# Install Serving Dependencies
|
# Install Serving Dependencies
|
||||||
RUN cd /llm && \
|
RUN cd /llm && \
|
||||||
git clone https://github.com/analytics-zoo/FastChat.git && \
|
git clone https://github.com/analytics-zoo/FastChat.git && \
|
||||||
cd FastChat && \
|
cd FastChat && \
|
||||||
git checkout dev-2023-09-22 && \
|
git checkout dev-2023-09-22 && \
|
||||||
pip3 install -e ".[model_worker,webui]" && \
|
pip3 install -e ".[model_worker,webui]" && \
|
||||||
cd /llm
|
cd /llm && \
|
||||||
|
chmod +x /opt/entrypoint.sh
|
||||||
|
|
||||||
|
|
||||||
WORKDIR /llm/
|
WORKDIR /llm/
|
||||||
|
ENTRYPOINT [ "/opt/entrypoint.sh" ]
|
||||||
|
|
|
||||||
99
docker/llm/serving/xpu/docker/entrypoint.sh
Normal file
99
docker/llm/serving/xpu/docker/entrypoint.sh
Normal file
|
|
@ -0,0 +1,99 @@
|
||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
usage() {
|
||||||
|
echo "Usage: $0 [--service-model-path <service model path>] [--help]"
|
||||||
|
echo "--help: Print help message."
|
||||||
|
echo "--service-model-path: set model path for model worker"
|
||||||
|
echo "The following environment variables can be set."
|
||||||
|
echo "CONTROLLER_HOST (default: localhost)."
|
||||||
|
echo "CONTROLLER_PORT (default: 21001)."
|
||||||
|
echo "WORKER_HOST (default: localhost)."
|
||||||
|
echo "WORKER_PORT (default: 21002)."
|
||||||
|
echo "API_HOST (default: localhost)."
|
||||||
|
echo "API_PORT (default: 8000)."
|
||||||
|
exit 1
|
||||||
|
}
|
||||||
|
|
||||||
|
# Default values
|
||||||
|
controller_host="localhost"
|
||||||
|
controller_port="21001"
|
||||||
|
worker_host="localhost"
|
||||||
|
worker_port="21002"
|
||||||
|
api_host="localhost"
|
||||||
|
api_port="8000"
|
||||||
|
service_model_path=""
|
||||||
|
|
||||||
|
# We do not have any arguments, just run bash
|
||||||
|
if [ "$#" == 0 ]; then
|
||||||
|
echo "[INFO] no command is passed in"
|
||||||
|
echo "[INFO] enter pass-through mode"
|
||||||
|
exec /usr/bin/bash -s -- "bash"
|
||||||
|
else
|
||||||
|
# Parse command-line options
|
||||||
|
options=$(getopt -o "" --long "service-model-path:,help" -n "$0" -- "$@")
|
||||||
|
if [ $? != 0 ]; then
|
||||||
|
usage
|
||||||
|
fi
|
||||||
|
eval set -- "$options"
|
||||||
|
|
||||||
|
while true; do
|
||||||
|
case "$1" in
|
||||||
|
--service-model-path)
|
||||||
|
service_model_path="$2"
|
||||||
|
shift 2
|
||||||
|
;;
|
||||||
|
--help)
|
||||||
|
usage
|
||||||
|
;;
|
||||||
|
--)
|
||||||
|
shift
|
||||||
|
break
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
usage
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
done
|
||||||
|
|
||||||
|
if [[ -n $CONTROLLER_HOST ]]; then
|
||||||
|
controller_host=$CONTROLLER_HOST
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [[ -n $CONTROLLER_PORT ]]; then
|
||||||
|
controller_port=$CONTROLLER_PORT
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [[ -n $WORKER_HOST ]]; then
|
||||||
|
worker_host=$WORKER_HOST
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [[ -n $WORKER_PORT ]]; then
|
||||||
|
worker_port=$WORKER_PORT
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [[ -n $API_HOST ]]; then
|
||||||
|
api_host=$API_HOST
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [[ -n $API_PORT ]]; then
|
||||||
|
api_port=$API_PORT
|
||||||
|
fi
|
||||||
|
|
||||||
|
controller_address="http://$controller_host:$controller_port"
|
||||||
|
worker_address="http://$worker_host:$worker_port"
|
||||||
|
api_address="http://$api_host:$api_port"
|
||||||
|
|
||||||
|
unset http_proxy
|
||||||
|
unset https_proxy
|
||||||
|
|
||||||
|
python3 -m fastchat.serve.controller --host $controller_host --port $controller_port &
|
||||||
|
python3 -m bigdl.llm.serving.model_worker --model-path $service_model_path --device xpu --host $worker_host --port $worker_port --worker-address $worker_address --controller-address $controller_address &
|
||||||
|
python3 -m fastchat.serve.openai_api_server --host $api_host --port $api_port --controller-address $controller_address &
|
||||||
|
|
||||||
|
echo "Controller address: $controller_address"
|
||||||
|
echo "Worker address: $worker_address"
|
||||||
|
echo "OpenAI API address: $api_address"
|
||||||
|
|
||||||
|
fi
|
||||||
|
|
||||||
|
exec /usr/bin/bash -s -- "bash"
|
||||||
Loading…
Reference in a new issue