From 146b77f1130623c633852ee042ace5fa29a6db99 Mon Sep 17 00:00:00 2001 From: ZehuaCao <47251317+Romanticoseu@users.noreply.github.com> Date: Tue, 12 Mar 2024 13:20:06 +0800 Subject: [PATCH] fix qlora-finetune Dockerfile (#10379) --- docker/llm/finetune/qlora/cpu/docker/Dockerfile | 4 +--- docker/llm/finetune/qlora/cpu/docker/Dockerfile.k8s | 4 ++-- .../qlora/cpu/docker/bigdl-qlora-finetuing-entrypoint.sh | 2 +- .../cpu/kubernetes/templates/bigdl-qlora-finetuning-job.yaml | 2 ++ docker/llm/finetune/qlora/cpu/kubernetes/values.yaml | 1 + 5 files changed, 7 insertions(+), 6 deletions(-) diff --git a/docker/llm/finetune/qlora/cpu/docker/Dockerfile b/docker/llm/finetune/qlora/cpu/docker/Dockerfile index 82f39893..f47a4c88 100644 --- a/docker/llm/finetune/qlora/cpu/docker/Dockerfile +++ b/docker/llm/finetune/qlora/cpu/docker/Dockerfile @@ -46,10 +46,8 @@ RUN mkdir -p /bigdl/data && mkdir -p /bigdl/model && \ rm -r BigDL && \ chown -R mpiuser /bigdl -# for docker directly run example +# for standalone COPY ./start-qlora-finetuning-on-cpu.sh /bigdl/start-qlora-finetuning-on-cpu.sh -# for k8s -COPY ./bigdl-qlora-finetuing-entrypoint.sh /bigdl/bigdl-qlora-finetuing-entrypoint.sh USER mpiuser diff --git a/docker/llm/finetune/qlora/cpu/docker/Dockerfile.k8s b/docker/llm/finetune/qlora/cpu/docker/Dockerfile.k8s index f2454890..d2991985 100644 --- a/docker/llm/finetune/qlora/cpu/docker/Dockerfile.k8s +++ b/docker/llm/finetune/qlora/cpu/docker/Dockerfile.k8s @@ -65,8 +65,8 @@ RUN mkdir -p /bigdl/data && mkdir -p /bigdl/model && \ rm -r BigDL && \ chown -R mpiuser /bigdl -# for standalone -COPY ./start-qlora-finetuning-on-cpu.sh /bigdl/start-qlora-finetuning-on-cpu.sh +# for k8s +COPY ./bigdl-qlora-finetuing-entrypoint.sh /bigdl/bigdl-qlora-finetuing-entrypoint.sh USER mpiuser diff --git a/docker/llm/finetune/qlora/cpu/docker/bigdl-qlora-finetuing-entrypoint.sh b/docker/llm/finetune/qlora/cpu/docker/bigdl-qlora-finetuing-entrypoint.sh index 1da7eec8..3ed37dbb 100644 --- a/docker/llm/finetune/qlora/cpu/docker/bigdl-qlora-finetuing-entrypoint.sh +++ b/docker/llm/finetune/qlora/cpu/docker/bigdl-qlora-finetuing-entrypoint.sh @@ -20,7 +20,7 @@ then -f /home/mpiuser/hostfile \ -iface eth0 \ --bind-to socket \ - -genv OMP_NUM_THREADS=48 \ + -genv OMP_NUM_THREADS=$OMP_NUM_THREADS \ -genv KMP_AFFINITY="granularity=fine,none" \ -genv KMP_BLOCKTIME=1 \ -genv TF_ENABLE_ONEDNN_OPTS=1 \ diff --git a/docker/llm/finetune/qlora/cpu/kubernetes/templates/bigdl-qlora-finetuning-job.yaml b/docker/llm/finetune/qlora/cpu/kubernetes/templates/bigdl-qlora-finetuning-job.yaml index 2ca952e6..71b1cd03 100644 --- a/docker/llm/finetune/qlora/cpu/kubernetes/templates/bigdl-qlora-finetuning-job.yaml +++ b/docker/llm/finetune/qlora/cpu/kubernetes/templates/bigdl-qlora-finetuning-job.yaml @@ -43,6 +43,8 @@ spec: value: "{{ .Values.httpProxy }}" - name: https_proxy value: "{{ .Values.httpsProxy }}" + - name: OMP_NUM_THREADS + value: "{{ .Values.omp_num_threads}}" - name: LOCAL_POD_NAME valueFrom: fieldRef: diff --git a/docker/llm/finetune/qlora/cpu/kubernetes/values.yaml b/docker/llm/finetune/qlora/cpu/kubernetes/values.yaml index 293cbe05..b195f203 100644 --- a/docker/llm/finetune/qlora/cpu/kubernetes/values.yaml +++ b/docker/llm/finetune/qlora/cpu/kubernetes/values.yaml @@ -6,5 +6,6 @@ nfsServerIp: your_nfs_server_ip nfsPath: a_nfs_shared_folder_path_on_the_server dataSubPath: alpaca_data_cleaned_archive.json # a subpath of the data file under nfs directory modelSubPath: Llama-2-7b-chat-hf # a subpath of the model file (dir) under nfs directory +omp_num_threads: 48 # configure this value based on the number of CPU cores httpProxy: "your_http_proxy_like_http://xxx:xxxx_if_needed_else_empty" httpsProxy: "your_https_proxy_like_http://xxx:xxxx_if_needed_else_empty"