From f37547249d204e6b44e809c5539733fcfacacc16 Mon Sep 17 00:00:00 2001 From: Guancheng Fu <110874468+gc-fu@users.noreply.github.com> Date: Tue, 24 Oct 2023 12:56:03 +0800 Subject: [PATCH] Refine README/CICD (#9253) --- .github/workflows/manually_build.yml | 7 ++++++- python/llm/src/bigdl/llm/serving/README.md | 4 +++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/.github/workflows/manually_build.yml b/.github/workflows/manually_build.yml index 47394651..0601b2d0 100644 --- a/.github/workflows/manually_build.yml +++ b/.github/workflows/manually_build.yml @@ -876,18 +876,23 @@ jobs: echo "##############################################################" echo "############## bigdl-ppml-trusted-bigdl-llm-serving-tdx ###############" echo "##############################################################" - export image=intelanalytics/bigdl-ppml-trusted-bigdl-llm-serving-tdx + export image=intelanalytics/trusted-bigdl-llm-serving-tdx + export base_image=intelanalytics/bigdl-llm-serving-cpu + docker pull ${base_image}:${TAG} cd ppml/tdx/docker/trusted-bigdl-llm/serving/docker sudo docker build \ --no-cache=true \ --build-arg http_proxy=${HTTP_PROXY} \ --build-arg https_proxy=${HTTPS_PROXY} \ + --build-arg BASE_IMAGE_NAME=${base_image} \ + --build-arg BASE_IMAGE_TAG=${TAG} \ --build-arg no_proxy=${NO_PROXY} \ -t ${image}:${TAG} -f ./Dockerfile . sudo docker push ${image}:${TAG} sudo docker tag ${image}:${TAG} 10.239.45.10/arda/${image}:${TAG} sudo docker push 10.239.45.10/arda/${image}:${TAG} sudo docker rmi -f ${image}:${TAG} 10.239.45.10/arda/${image}:${TAG} + sudo docker rmi -f ${base_image}:${TAG} bigdl-ppml-trusted-bigdl-llm-tdx: if: ${{ github.event.inputs.artifact == 'bigdl-ppml-trusted-bigdl-llm-tdx' || github.event.inputs.artifact == 'all' }} diff --git a/python/llm/src/bigdl/llm/serving/README.md b/python/llm/src/bigdl/llm/serving/README.md index 14aa0253..e967f3da 100644 --- a/python/llm/src/bigdl/llm/serving/README.md +++ b/python/llm/src/bigdl/llm/serving/README.md @@ -38,7 +38,9 @@ Using BigDL-LLM in FastChat does not impose any new limitations on model usage. FastChat determines the Model adapter to use through path matching. Therefore, in order to load models using BigDL-LLM, you need to make some modifications to the model's name. For instance, assuming you have downloaded the `llama-7b-hf` from [HuggingFace](https://huggingface.co/decapoda-research/llama-7b-hf). Then, to use the `BigDL-LLM` as backend, you need to change name from `llama-7b-hf` to `bigdl-7b`. -The key point here is that the model's path should include "bigdl" and should not include paths matched by other model adapters. +The key point here is that the model's path should include "bigdl" and **should not include paths matched by other model adapters**. + +> note: This is caused by the priority of name matching list. The new added `BigDL-LLM` adapter is at the tail of the name-matching list so that it has the lowest priority. If model path contains other keywords like `vicuna` which matches to another adapter with higher priority, then the `BigDL-LLM` adapter will not work. A special case is `ChatGLM` models. For these models, you do not need to do any changes after downloading the model and the `BigDL-LLM` backend will be used automatically.