Fix qwen model adapter in docker (#9969)
* fix qwen in docker * add patch for model_adapter.py in fastchat * add patch for model_adapter.py in fastchat
This commit is contained in:
parent
50a851e3b3
commit
a2718038f7
2 changed files with 24 additions and 0 deletions
|
|
@ -8,10 +8,13 @@ ARG TINI_VERSION=v0.18.0
|
||||||
ARG PIP_NO_CACHE_DIR=false
|
ARG PIP_NO_CACHE_DIR=false
|
||||||
|
|
||||||
COPY ./entrypoint.sh /opt/entrypoint.sh
|
COPY ./entrypoint.sh /opt/entrypoint.sh
|
||||||
|
COPY ./model_adapter.py/patch /llm/model_adapter.py.patch
|
||||||
ADD https://github.com/krallin/tini/releases/download/${TINI_VERSION}/tini /sbin/tini
|
ADD https://github.com/krallin/tini/releases/download/${TINI_VERSION}/tini /sbin/tini
|
||||||
# Install Serving Dependencies
|
# Install Serving Dependencies
|
||||||
RUN cd /llm && \
|
RUN cd /llm && \
|
||||||
pip install --pre --upgrade bigdl-llm[serving] && \
|
pip install --pre --upgrade bigdl-llm[serving] && \
|
||||||
|
# Fix Qwen model adpater in fastchat
|
||||||
|
patch /usr/local/lib/python3.9/dist-packages/fastchat/model/model_adapter.py < /llm/model_adapter.py.patch && \
|
||||||
chmod +x /opt/entrypoint.sh && \
|
chmod +x /opt/entrypoint.sh && \
|
||||||
chmod +x /sbin/tini && \
|
chmod +x /sbin/tini && \
|
||||||
cp /sbin/tini /usr/bin/tini
|
cp /sbin/tini /usr/bin/tini
|
||||||
|
|
|
||||||
21
docker/llm/serving/cpu/docker/model_adapter.py.patch
Normal file
21
docker/llm/serving/cpu/docker/model_adapter.py.patch
Normal file
|
|
@ -0,0 +1,21 @@
|
||||||
|
--- model_adapter.py.old 2024-01-24 01:56:23.903144335 +0000
|
||||||
|
+++ model_adapter.py 2024-01-24 01:59:22.605062765 +0000
|
||||||
|
@@ -1346,15 +1346,17 @@
|
||||||
|
)
|
||||||
|
# NOTE: if you use the old version of model file, please remove the comments below
|
||||||
|
# config.use_flash_attn = False
|
||||||
|
- config.fp16 = True
|
||||||
|
+ # config.fp16 = True
|
||||||
|
generation_config = GenerationConfig.from_pretrained(
|
||||||
|
model_path, trust_remote_code=True
|
||||||
|
)
|
||||||
|
+ from bigdl.llm.transformers import AutoModelForCausalLM
|
||||||
|
model = AutoModelForCausalLM.from_pretrained(
|
||||||
|
model_path,
|
||||||
|
config=config,
|
||||||
|
low_cpu_mem_usage=True,
|
||||||
|
trust_remote_code=True,
|
||||||
|
+ load_in_4bit=True,
|
||||||
|
**from_pretrained_kwargs,
|
||||||
|
).eval()
|
||||||
|
if hasattr(model.config, "use_dynamic_ntk") and model.config.use_dynamic_ntk:
|
||||||
Loading…
Reference in a new issue