[LLM] Multi-process and distributed QLoRA on CPU platform (#9491)

* [LLM] Multi-process and distributed QLoRA on CPU platform * Update README.md * Update README.md * Update README.md * Update README.md * enable llm-init and bind to socket * refine * Update Dockerfile * add all files of qlora cpu example to /bigdl * fix * fix k8s * Update bigdl-qlora-finetuing-entrypoint.sh * Update bigdl-qlora-finetuing-entrypoint.sh * Update bigdl-qlora-finetuning-job.yaml * fix train sync and performance issues * add node affinity * disable user to tune cpu per pod * Update bigdl-qlora-finetuning-job.yaml
2023-12-01 13:47:19 +08:00 · 2023-12-01 13:47:19 +08:00 · 74fd7077a2
commit 74fd7077a2
parent ed0dc57c6e
11 changed files with 368 additions and 28 deletions
--- a/docker/llm/finetune/qlora/cpu/docker/Dockerfile
+++ b/docker/llm/finetune/qlora/cpu/docker/Dockerfile
@ -1,19 +1,43 @@
-FROM intel/oneapi-basekit:2023.2.1-devel-ubuntu22.04
+FROM ubuntu:20.04 as key-getter
+ARG http_proxy
+ARG https_proxy
+
+RUN apt-get update && \
+    apt-get install -y curl gpg && \
+    curl -fsSL https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS-2023.PUB | gpg --dearmor | tee /root/intel-oneapi-archive-keyring.gpg
+
+FROM mpioperator/intel as builder
+
 ARG http_proxy
 ARG https_proxy
 ENV TZ=Asia/Shanghai
 ARG PIP_NO_CACHE_DIR=false
 ENV TRANSFORMERS_COMMIT_ID=95fe0f5

-# retrive oneapi repo public key
-RUN curl -fsSL https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS-2023.PUB | gpg --dearmor | tee /usr/share/keyrings/intel-oneapi-archive-keyring.gpg && \
-    echo "deb [signed-by=/usr/share/keyrings/intel-oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main " > /etc/apt/sources.list.d/oneAPI.list
+# add public key
+COPY --from=key-getter /root/intel-oneapi-archive-keyring.gpg /usr/share/keyrings/intel-oneapi-archive-keyring.gpg
+RUN echo "deb [signed-by=/usr/share/keyrings/intel-oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main " > /etc/apt/sources.list.d/oneAPI.list

-# update dependencies
-RUN apt-get update && \
-    # install basic dependencies
+
+RUN mkdir -p /bigdl/data && mkdir -p /bigdl/model && \
+# install pytorch 2.1.0
+    apt-get update && \
+    apt-get install -y python3-pip python3.9-dev python3-wheel git software-properties-common && \
+    pip3 install --upgrade pip && \
+    export PIP_DEFAULT_TIMEOUT=100 && \
+    pip install --upgrade torch==2.1.0 --index-url https://download.pytorch.org/whl/cpu && \
+# install CPU bigdl-llm
+    pip3 install --pre --upgrade bigdl-llm[all] -i https://pypi.tuna.tsinghua.edu.cn/simple/ && \
+# install ipex and oneccl
+    pip install intel_extension_for_pytorch==2.0.100 && \
+    pip install oneccl_bind_pt -f https://developer.intel.com/ipex-whl-stable && \
+# install huggingface dependencies
+    pip install datasets transformers==4.34.0 && \
+    pip install fire peft==0.5.0 && \
+    pip install accelerate==0.23.0 && \
+# install basic dependencies
    apt-get install -y curl wget git gnupg gpg-agent software-properties-common libunwind8-dev vim less && \
-    # install python 3.9
+# install python 3.9
    ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone && \
    env DEBIAN_FRONTEND=noninteractive apt-get update && \
    add-apt-repository ppa:deadsnakes/ppa -y && \
@ -22,18 +46,29 @@ RUN apt-get update && \
    ln -s /usr/bin/python3.9 /usr/bin/python3 && \
    ln -s /usr/bin/python3 /usr/bin/python && \
    apt-get install -y python3-pip python3.9-dev python3-wheel python3.9-distutils && \
-    curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py
+    curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py && \
+# install OpenSSH for MPI to communicate between containers
+    apt-get install -y --no-install-recommends openssh-client openssh-server && \
+    mkdir -p /var/run/sshd && \
+# allow OpenSSH to talk to containers without asking for confirmation
+# by disabling StrictHostKeyChecking.
+# mpi-operator mounts the .ssh folder from a Secret. For that to work, we need
+# to disable UserKnownHostsFile to avoid write permissions.
+# disabling StrictModes avoids directory and files read permission checks.
+    sed -i 's/[ #]\(.*StrictHostKeyChecking \).*/ \1no/g' /etc/ssh/ssh_config && \
+    echo "    UserKnownHostsFile /dev/null" >> /etc/ssh/ssh_config && \
+    sed -i 's/#\(StrictModes \).*/\1no/g' /etc/ssh/sshd_config && \
+# add bigdl-llm qlora cpu example
+    cd /bigdl && \
+    git clone https://github.com/intel-analytics/BigDL.git && \
+    mv BigDL/python/llm/example/CPU/QLoRA-FineTuning/* . && \
+    rm -r BigDL
+   
+# for docker directly run example
+COPY ./start-qlora-finetuning-on-cpu.sh /bigdl/start-qlora-finetuning-on-cpu.sh
+# for k8s
+COPY ./bigdl-qlora-finetuing-entrypoint.sh /bigdl/bigdl-qlora-finetuing-entrypoint.sh

-# install torch and oneccl to reduce bigdl-llm size
-RUN pip3 install --upgrade pip && \
-    export PIP_DEFAULT_TIMEOUT=100 && \
-    pip install --upgrade torch==2.0.1 --index-url https://download.pytorch.org/whl/cpu && \
-    pip install oneccl_bind_pt -f https://developer.intel.com/ipex-whl-stable && \
-    # install CPU bigdl-llm
-    pip install --pre --upgrade bigdl-llm[all] -i https://pypi.tuna.tsinghua.edu.cn/simple/ && \
-    # install huggingface dependencies
-    pip install transformers==4.34.0 && \
-    pip install peft==0.5.0 datasets
-
-ADD ./qlora_finetuning_cpu.py /qlora_finetuning_cpu.py
-ADD ./start-qlora-finetuning-on-cpu.sh /start-qlora-finetuning-on-cpu.sh
+RUN chown -R mpiuser /bigdl
+USER mpiuser
+ENTRYPOINT ["/bin/bash"]
--- a/docker/llm/finetune/qlora/cpu/docker/README.md
+++ b/docker/llm/finetune/qlora/cpu/docker/README.md
@ -38,8 +38,8 @@ docker run -itd \
   --name=bigdl-llm-fintune-qlora-cpu \
   -e http_proxy=${HTTP_PROXY} \
   -e https_proxy=${HTTPS_PROXY} \
-   -v $BASE_MODE_PATH:/model \
-   -v $DATA_PATH:/data/english_quotes \
+   -v $BASE_MODE_PATH:/bigdl/model \
+   -v $DATA_PATH:/bigdl/data/english_quotes \
   intelanalytics/bigdl-llm-finetune-qlora-cpu:2.4.0-SNAPSHOT
 ```

@ -59,7 +59,7 @@ docker run -itd \

 However, we do recommend you to handle them manually, because the automatical download can be blocked by Internet access and Huggingface authentication etc. according to different environment, and the manual method allows you to fine-tune in a custom way (with different base model and dataset).

-### 3. Start Fine-Tuning
+### 3. Start Fine-Tuning (Local Mode)

 Enter the running container:

@ -70,9 +70,7 @@ docker exec -it bigdl-llm-fintune-qlora-cpu bash
 Then, start QLoRA fine-tuning:
 If the machine memory is not enough, you can try to set `use_gradient_checkpointing=True`.

-And remember to use `bigdl-llm-init` before you start finetuning, which can accelerate the job.
 ```bash
-source bigdl-llm-init -t
 bash start-qlora-finetuning-on-cpu.sh
 ```

@ -127,3 +125,32 @@ Inference time: xxx s
 -------------------- Output --------------------
 “QLoRA fine-tuning using BigDL-LLM 4bit optimizations on Intel CPU is Efficient and convenient” ->: ['bigdl'] ['deep-learning'] ['distributed-computing'] ['intel'] ['optimization'] ['training'] ['training-speed']
 ```
+
+### 4. Start Multi-Porcess Fine-Tuning in One Docker
+
+<img src="https://github.com/Uxito-Ada/BigDL/assets/60865256/f25c43b3-2b24-4476-a0fe-804c0ef3c36c" height="240px"><br>
+
+Multi-process parallelism enables higher performance for QLoRA fine-tuning, e.g. Xeon server series with multi-processor-socket architecture is suitable to run one instance on each QLoRA. This can be done by simply invoke >=2 OneCCL instances in BigDL QLoRA docker:
+
+```bash
+docker run -itd \
+ --name=bigdl-llm-fintune-qlora-cpu \
+ --cpuset-cpus="your_expected_range_of_cpu_numbers" \
+ -e STANDALONE_DOCKER=TRUE \
+ -e WORKER_COUNT_DOCKER=your_worker_count \
+ -v your_downloaded_base_model_path:/bigdl/model \
+ -v your_downloaded_data_path:/bigdl/data/alpaca_data_cleaned_archive.json \
+ intelanalytics/bigdl-llm-finetune-qlora-cpu:2.5.0-SNAPSHOT
+```
+
+Note that `STANDALONE_DOCKER` is set to **TRUE** here.
+
+Then following the same way as above to enter the docker container and start fine-tuning:
+
+```bash
+bash start-qlora-finetuning-on-cpu.sh
+```
+
+### 5. Start Distributed Fine-Tuning on Kubernetes
+
+Besides multi-process mode, you can also run QLoRA on a kubernetes cluster. please refer [here](https://github.com/intel-analytics/BigDL/blob/main/docker/llm/finetune/qlora/cpu/kubernetes/README.md).
--- a/docker/llm/finetune/qlora/cpu/docker/bigdl-qlora-finetuing-entrypoint.sh
+++ b/docker/llm/finetune/qlora/cpu/docker/bigdl-qlora-finetuing-entrypoint.sh
@ -0,0 +1,46 @@
+#!/bin/bash
+# this is to run alpaca qlora on k8s
+set -x
+source /opt/intel/oneapi/setvars.sh
+export CCL_WORKER_COUNT=$WORLD_SIZE
+source bigdl-llm-init -t
+if [ "$WORKER_ROLE" = "launcher" ]
+then
+  sed "s/:1/ /g" /etc/mpi/hostfile > /home/mpiuser/hostfile
+  sleep 10 # wait for worker pods to be ready
+  export ACCELERATE_USE_CPU=True
+  mpirun \
+    -n $WORLD_SIZE \
+    -ppn 1 \
+    -f /home/mpiuser/hostfile \
+    -iface eth0 \
+    --bind-to socket \
+    -genv OMP_NUM_THREADS=48 \
+    -genv KMP_AFFINITY="granularity=fine,none" \
+    -genv KMP_BLOCKTIME=1 \
+    -genv TF_ENABLE_ONEDNN_OPTS=1 \
+    python /bigdl/alpaca-qlora/alpaca_qlora_finetuning_cpu.py \
+      --base_model '/bigdl/model'  \
+      --data_path "/bigdl/data" \
+      --output_dir "/home/mpiuser/finetuned_model" \
+      --batch_size 128 \
+      --micro_batch_size $MICRO_BATCH_SIZE > /home/mpiuser/launcher.log 2>&1
+  exit_status=$?
+  if [ $exit_status -ne 0 ];
+  then
+    cat /home/mpiuser/launcher.log
+    exit $exit_status
+  else
+    while true
+    do
+      echo "[INFO] Successfully finished fine-tuning"
+      sleep 900
+    done
+  fi
+elif [ "$WORKER_ROLE" = "trainer" ]
+then
+  export LOCAL_RANK=$(cut -d "-" -f6 <<< "$LOCAL_POD_NAME")
+  export PMI_SIZE=$WORLD_SIZE
+  export PMI_RANK=$LOCAL_RANK
+  /usr/sbin/sshd -De -f /home/mpiuser/.sshd_config
+fi
--- a/docker/llm/finetune/qlora/cpu/docker/start-qlora-finetuning-on-cpu.sh
+++ b/docker/llm/finetune/qlora/cpu/docker/start-qlora-finetuning-on-cpu.sh
@ -1,8 +1,10 @@
 #!/bin/bash
 set -x
+cd /bigdl
 export USE_XETLA=OFF
 export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
 source /opt/intel/oneapi/setvars.sh
+source bigdl-llm-init -t

 if [ -d "./model" ];
 then
@ -14,5 +16,23 @@ then
  DATA_PARAM="--dataset ./data/english_quotes" # otherwise, default to download from HF dataset
 fi

-python qlora_finetuning_cpu.py $MODEL_PARAM $DATA_PARAM
+if [ "$STANDALONE_DOCKER" = "TRUE" ]
+then
+  export CONTAINER_IP=$(hostname -i)
+  export CPU_CORES=$(nproc)
+  source /opt/intel/oneapi/setvars.sh
+  export CCL_WORKER_COUNT=$WORKER_COUNT_DOCKER
+  export CCL_WORKER_AFFINITY=auto
+  export MASTER_ADDR=$CONTAINER_IP
+  mpirun \
+     -n $CCL_WORKER_COUNT \
+     -ppn $CCL_WORKER_COUNT \
+     -genv OMP_NUM_THREADS=$((CPU_CORES / CCL_WORKER_COUNT)) \
+     -genv KMP_AFFINITY="granularity=fine,none" \
+     -genv KMP_BLOCKTIME=1 \
+     -genv TF_ENABLE_ONEDNN_OPTS=1 \
+     python qlora_finetuning_cpu.py $MODEL_PARAM $DATA_PARAM
+else
+  python qlora_finetuning_cpu.py $MODEL_PARAM $DATA_PARAM
+fi

--- a/docker/llm/finetune/qlora/cpu/kubernetes/Chart.yaml
+++ b/docker/llm/finetune/qlora/cpu/kubernetes/Chart.yaml
@ -0,0 +1,6 @@
+apiVersion: v2
+name: bigdl-fintune-service
+description: A Helm chart for BigDL Finetune Service on Kubernetes
+type: application
+version: 1.1.27
+appVersion: "1.16.0"
--- a/docker/llm/finetune/qlora/cpu/kubernetes/README.md
+++ b/docker/llm/finetune/qlora/cpu/kubernetes/README.md
@ -0,0 +1,55 @@
+## Run NF4&BF16-quantized QLoRA Finetuning on Kubernetes with OneCCL
+
+![image](https://github.com/intel-analytics/BigDL/assets/60865256/825f47d9-c864-4f39-a331-adb1e3cb528e)
+
+BigDL here provides a CPU optimization to accelerate the QLoRA finetuning of Llama2-7b, in the power of mixed-precision and distributed training. Detailedly, [Intel OneCCL](https://www.intel.com/content/www/us/en/developer/tools/oneapi/oneccl.html), an available Hugging Face backend, is able to speed up the Pytorch computation with BF16 datatype on CPUs, as well as parallel processing on Kubernetes enabled by [Intel MPI](https://www.intel.com/content/www/us/en/developer/tools/oneapi/mpi-library.html). Moreover, advanaced quantization of BigDL-LLM has been applied to improve memory utilization, which makes CPU large-scale fine-tuning possible with runtime NF4 model storage and BF16 computing types.
+
+The architecture is illustrated in the following:
+
+As above, BigDL implements its MPI training with [Kubeflow MPI operator](https://github.com/kubeflow/mpi-operator/tree/master), which encapsulates the deployment as MPIJob CRD, and assists users to handle the construction of a MPI worker cluster on Kubernetes, such as public key distribution, SSH connection, and log collection. 
+
+Now, let's go to deploy a QLoRA finetuning to create a new LLM from Llama2-7b.
+
+**Note: Please make sure you have already have an available Kubernetes infrastructure and NFS shared storage, and install [Helm CLI](https://helm.sh/docs/helm/helm_install/) for Kubernetes job submission.**
+
+### 1. Install Kubeflow MPI Operator
+
+Follow [here](https://github.com/kubeflow/mpi-operator/tree/master#installation) to install a Kubeflow MPI operator in your Kubernetes, which will listen and receive the following MPIJob request at backend.
+
+### 2. Download Image, Base Model and Finetuning Data
+
+Follow [here](https://github.com/intel-analytics/BigDL/tree/main/docker/llm/finetune/qlora/cpu/docker#1-prepare-docker-image) to prepare BigDL QLoRA Finetuning image in your cluster.
+
+As finetuning is from a base model, first download [Llama2-7b model from the public download site of Hugging Face](https://huggingface.co/meta-llama/Llama-2-7b). Then, download [cleaned alpaca data](https://raw.githubusercontent.com/tloen/alpaca-lora/main/alpaca_data_cleaned_archive.json), which contains all kinds of general knowledge and has already been cleaned. Next, move the downloaded files to a shared directory on your NFS server.
+
+### 3. Deploy through Helm Chart
+
+You are allowed to edit and experiment with different parameters in `./kubernetes/values.yaml` to improve finetuning performance and accuracy. For example, you can adjust `trainerNum` and `cpuPerPod` according to node and CPU core numbers in your cluster to make full use of these resources, and different `microBatchSize` result in different training speed and loss (here note that `microBatchSize`×`trainerNum` should not more than 128, as it is the batch size).
+
+**Note: `dataSubPath` and `modelSubPath` need to have the same names as files under the NFS directory in step 2.**
+
+After preparing parameters in `./kubernetes/values.yaml`, submit the job as beflow:
+
+```bash
+cd ./kubernetes
+helm install bigdl-qlora-finetuning .
+```
+
+### 4. Check Deployment
+```bash
+kubectl get all -n bigdl-qlora-finetuning # you will see launcher and worker pods running
+```
+
+### 5. Check Finetuning Process
+
+After deploying successfully, you can find a launcher pod, and then go inside this pod and check the logs collected from all workers.
+
+```bash
+kubectl get all -n bigdl-qlora-finetuning # you will see a launcher pod
+kubectl exec -it <launcher_pod_name> bash -n bigdl-qlora-finetuning # enter launcher pod
+cat launcher.log # display logs collected from other workers
+```
+
+From the log, you can see whether finetuning process has been invoked successfully in all MPI worker pods, and a progress bar with finetuning speed and estimated time will be showed after some data preprocessing steps (this may take quiet a while).
+
+For the fine-tuned model, it is written by the worker 0 (who holds rank 0), so you can find the model output inside the pod, which can be saved to host by command tools like `kubectl cp` or `scp`.
--- a/docker/llm/finetune/qlora/cpu/kubernetes/templates/bigdl-qlora-finetuning-job.yaml
+++ b/docker/llm/finetune/qlora/cpu/kubernetes/templates/bigdl-qlora-finetuning-job.yaml
@ -0,0 +1,111 @@
+apiVersion: kubeflow.org/v2beta1
+kind: MPIJob
+metadata:
+  name: bigdl-qlora-finetuning-job
+  namespace: bigdl-qlora-finetuning
+spec:
+  slotsPerWorker: 1
+  runPolicy:
+    cleanPodPolicy: Running
+  sshAuthMountPath: /home/mpiuser/.ssh
+  mpiImplementation: Intel
+  mpiReplicaSpecs:
+    Launcher:
+      replicas: 1
+      template:
+         spec:
+           volumes:
+           - name: nfs-storage
+             persistentVolumeClaim:
+               claimName: nfs-pvc
+           containers:
+           - image: {{ .Values.imageName }}
+             name: bigdl-qlora-finetuning-launcher
+             securityContext:
+              runAsUser: 1000
+             command: ['sh' , '-c', 'bash /bigdl/bigdl-qlora-finetuing-entrypoint.sh']
+             env:
+             - name: WORKER_ROLE
+               value: "launcher"
+             - name: WORLD_SIZE
+               value: "{{ .Values.trainerNum }}"
+             - name: MICRO_BATCH_SIZE
+               value: "{{ .Values.microBatchSize }}"
+             - name: MASTER_PORT
+               value: "42679"
+             - name: MASTER_ADDR
+               value: "bigdl-qlora-finetuning-job-worker-0.bigdl-qlora-finetuning-job-worker"
+             - name: DATA_SUB_PATH
+               value: "{{ .Values.dataSubPath }}"
+             - name: http_proxy
+               value: "{{ .Values.httpProxy }}"
+             - name: https_proxy
+               value: "{{ .Values.httpsProxy }}"
+             - name: LOCAL_POD_NAME
+               valueFrom:
+                 fieldRef:
+                   fieldPath: metadata.name
+             volumeMounts:
+             - name: nfs-storage
+               subPath: {{ .Values.modelSubPath }}
+               mountPath: /bigdl/model
+             - name: nfs-storage
+               subPath: {{ .Values.dataSubPath }}
+               mountPath: "/bigdl/data/{{ .Values.dataSubPath }}"
+    Worker:
+      replicas: {{ .Values.trainerNum }}
+      template:
+        spec:
+          affinity:
+             podAntiAffinity:
+              preferredDuringSchedulingIgnoredDuringExecution:
+                - weight: 100
+                  podAffinityTerm:
+                    labelSelector:
+                      matchExpressions:
+                        - key: training.kubeflow.org/job-role
+                          operator: In
+                          values:
+                            - worker
+                    topologyKey: kubernetes.io/hostname
+          containers:
+          - image: {{ .Values.imageName }}
+            name: bigdl-qlora-finetuning-worker
+            securityContext:
+              runAsUser: 1000
+            command: ['sh' , '-c', 'bash /bigdl/bigdl-qlora-finetuing-entrypoint.sh']
+            env:
+            - name: WORKER_ROLE
+              value: "trainer"
+            - name: WORLD_SIZE
+              value: "{{ .Values.trainerNum }}"
+            - name: MICRO_BATCH_SIZE
+              value: "{{ .Values.microBatchSize }}"
+            - name: MASTER_PORT
+              value: "42679"
+            - name: MASTER_ADDR
+              value: "bigdl-qlora-finetuning-job-worker-0.bigdl-qlora-finetuning-job-worker"
+            - name: http_proxy
+              value: "{{ .Values.httpProxy }}"
+            - name: https_proxy
+              value: "{{ .Values.httpsProxy }}"
+            - name: LOCAL_POD_NAME
+              valueFrom:
+                fieldRef:
+                  fieldPath: metadata.name
+            volumeMounts:
+            - name: nfs-storage
+              subPath: {{ .Values.modelSubPath }}
+              mountPath: /bigdl/model
+            - name: nfs-storage
+              subPath: {{ .Values.dataSubPath }}
+              mountPath: "/bigdl/data/{{ .Values.dataSubPath }}"
+            resources:
+              requests:
+                cpu: 48
+              limits:
+                cpu: 48
+          volumes:
+          - name: nfs-storage
+            persistentVolumeClaim:
+              claimName: nfs-pvc
--- a/docker/llm/finetune/qlora/cpu/kubernetes/templates/bigdl-qlora-finetuning-namespace.yaml
+++ b/docker/llm/finetune/qlora/cpu/kubernetes/templates/bigdl-qlora-finetuning-namespace.yaml
@ -0,0 +1,4 @@
+apiVersion: v1
+kind: Namespace
+metadata:
+  name: bigdl-qlora-finetuning
--- a/docker/llm/finetune/qlora/cpu/kubernetes/templates/nfs-pv.yaml
+++ b/docker/llm/finetune/qlora/cpu/kubernetes/templates/nfs-pv.yaml
@ -0,0 +1,15 @@
+apiVersion: v1
+kind: PersistentVolume
+metadata:
+  name: nfs-pv-bigdl-qlora-finetuning
+  namespace: bigdl-qlora-finetuning
+spec:
+  capacity:
+    storage: 15Gi
+  accessModes:
+    - ReadWriteOnce
+  persistentVolumeReclaimPolicy: Retain
+  storageClassName: nfs
+  nfs:
+   path: {{ .Values.nfsPath }}
+   server: {{ .Values.nfsServerIp }}
--- a/docker/llm/finetune/qlora/cpu/kubernetes/templates/nfs-pvc.yaml
+++ b/docker/llm/finetune/qlora/cpu/kubernetes/templates/nfs-pvc.yaml
@ -0,0 +1,12 @@
+kind: PersistentVolumeClaim
+apiVersion: v1
+metadata:
+  name: nfs-pvc
+  namespace: bigdl-qlora-finetuning
+spec:
+  accessModes:
+  - ReadWriteOnce
+  resources:
+    requests:
+      storage: 10Gi
+  storageClassName: nfs
--- a/docker/llm/finetune/qlora/cpu/kubernetes/values.yaml
+++ b/docker/llm/finetune/qlora/cpu/kubernetes/values.yaml
@ -0,0 +1,9 @@
+imageName: intelanalytics/bigdl-llm-finetune-qlora-cpu:2.5.0-SNAPSHOT
+trainerNum: 2
+microBatchSize: 8
+nfsServerIp: your_nfs_server_ip
+nfsPath: a_nfs_shared_folder_path_on_the_server
+dataSubPath: alpaca_data_cleaned_archive.json # a subpath of the data file under nfs directory
+modelSubPath: Llama-2-7b-chat-hf # a subpath of the model file (dir) under nfs directory
+httpProxy: "your_http_proxy_like_http://xxx:xxxx_if_needed_else_empty"
+httpsProxy: "your_https_proxy_like_http://xxx:xxxx_if_needed_else_empty"