changed '/ppml' into '/bigdl' and modified llama-7b (#9209)
This commit is contained in:
		
							parent
							
								
									b9194c5786
								
							
						
					
					
						commit
						2f14f53b1c
					
				
					 5 changed files with 25 additions and 25 deletions
				
			
		| 
						 | 
				
			
			@ -12,13 +12,13 @@ FROM mpioperator/intel as builder
 | 
			
		|||
ARG http_proxy
 | 
			
		||||
ARG https_proxy
 | 
			
		||||
ENV PIP_NO_CACHE_DIR=false
 | 
			
		||||
ADD ./requirements.txt /ppml/requirements.txt
 | 
			
		||||
ADD ./requirements.txt /bigdl/requirements.txt
 | 
			
		||||
 | 
			
		||||
# add public key
 | 
			
		||||
COPY --from=key-getter /root/intel-oneapi-archive-keyring.gpg /usr/share/keyrings/intel-oneapi-archive-keyring.gpg
 | 
			
		||||
RUN echo "deb [signed-by=/usr/share/keyrings/intel-oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main " > /etc/apt/sources.list.d/oneAPI.list
 | 
			
		||||
 | 
			
		||||
RUN mkdir /ppml/data && mkdir /ppml/model && \
 | 
			
		||||
RUN mkdir /bigdl/data && mkdir /bigdl/model && \
 | 
			
		||||
# install pytorch 2.0.1
 | 
			
		||||
    apt-get update && \
 | 
			
		||||
    apt-get install -y python3-pip python3.9-dev python3-wheel git software-properties-common && \
 | 
			
		||||
| 
						 | 
				
			
			@ -29,12 +29,12 @@ RUN mkdir /ppml/data && mkdir /ppml/model && \
 | 
			
		|||
    pip install intel_extension_for_pytorch==2.0.100 && \
 | 
			
		||||
    pip install oneccl_bind_pt -f https://developer.intel.com/ipex-whl-stable && \
 | 
			
		||||
# install transformers etc.
 | 
			
		||||
    cd /ppml && \
 | 
			
		||||
    cd /bigdl && \
 | 
			
		||||
    git clone https://github.com/huggingface/transformers.git && \
 | 
			
		||||
    cd transformers && \
 | 
			
		||||
    git reset --hard 057e1d74733f52817dc05b673a340b4e3ebea08c && \
 | 
			
		||||
    pip install . && \
 | 
			
		||||
    pip install -r /ppml/requirements.txt && \
 | 
			
		||||
    pip install -r /bigdl/requirements.txt && \
 | 
			
		||||
# install python
 | 
			
		||||
    add-apt-repository ppa:deadsnakes/ppa -y && \
 | 
			
		||||
    apt-get install -y python3.9 && \
 | 
			
		||||
| 
						 | 
				
			
			@ -56,9 +56,9 @@ RUN mkdir /ppml/data && mkdir /ppml/model && \
 | 
			
		|||
    echo "    UserKnownHostsFile /dev/null" >> /etc/ssh/ssh_config && \
 | 
			
		||||
    sed -i 's/#\(StrictModes \).*/\1no/g' /etc/ssh/sshd_config
 | 
			
		||||
 | 
			
		||||
ADD ./bigdl-lora-finetuing-entrypoint.sh /ppml/bigdl-lora-finetuing-entrypoint.sh
 | 
			
		||||
ADD ./lora_finetune.py /ppml/lora_finetune.py
 | 
			
		||||
ADD ./bigdl-lora-finetuing-entrypoint.sh /bigdl/bigdl-lora-finetuing-entrypoint.sh
 | 
			
		||||
ADD ./lora_finetune.py /bigdl/lora_finetune.py
 | 
			
		||||
 | 
			
		||||
RUN chown -R mpiuser /ppml
 | 
			
		||||
RUN chown -R mpiuser /bigdl
 | 
			
		||||
USER mpiuser
 | 
			
		||||
ENTRYPOINT ["/bin/bash"]
 | 
			
		||||
| 
						 | 
				
			
			@ -31,8 +31,8 @@ docker run -itd \
 | 
			
		|||
 --cpuset-cpus="your_expected_range_of_cpu_numbers" \
 | 
			
		||||
 -e STANDALONE_DOCKER=TRUE \
 | 
			
		||||
 -e WORKER_COUNT_DOCKER=your_worker_count \
 | 
			
		||||
 -v your_downloaded_base_model_path:/ppml/model \
 | 
			
		||||
 -v your_downloaded_data_path:/ppml/data/alpaca_data_cleaned_archive.json \
 | 
			
		||||
 -v your_downloaded_base_model_path:/bigdl/model \
 | 
			
		||||
 -v your_downloaded_data_path:/bigdl/data/alpaca_data_cleaned_archive.json \
 | 
			
		||||
 intelanalytics/bigdl-llm-finetune-cpu:2.4.0-SNAPSHOT \
 | 
			
		||||
 bash
 | 
			
		||||
```
 | 
			
		||||
| 
						 | 
				
			
			@ -50,15 +50,15 @@ docker exec -it bigdl-llm-fintune-lora-cpu bash
 | 
			
		|||
Then, run the script to start finetuning:
 | 
			
		||||
 | 
			
		||||
```
 | 
			
		||||
bash /ppml/bigdl-lora-finetuing-entrypoint.sh
 | 
			
		||||
bash /bigdl/bigdl-lora-finetuing-entrypoint.sh
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
After minutes, it is expected to get results like:
 | 
			
		||||
 | 
			
		||||
```
 | 
			
		||||
Training Alpaca-LoRA model with params:
 | 
			
		||||
base_model: /ppml/model/
 | 
			
		||||
data_path: /ppml/data/alpaca_data_cleaned_archive.json
 | 
			
		||||
base_model: /bigdl/model/
 | 
			
		||||
data_path: /bigdl/data/alpaca_data_cleaned_archive.json
 | 
			
		||||
output_dir: /home/mpiuser/finetuned_model
 | 
			
		||||
batch_size: 128
 | 
			
		||||
micro_batch_size: 8
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -15,9 +15,9 @@ then
 | 
			
		|||
     -genv KMP_AFFINITY="granularity=fine,none" \
 | 
			
		||||
     -genv KMP_BLOCKTIME=1 \
 | 
			
		||||
     -genv TF_ENABLE_ONEDNN_OPTS=1 \
 | 
			
		||||
     python /ppml/lora_finetune.py \
 | 
			
		||||
       --base_model '/ppml/model/'  \
 | 
			
		||||
       --data_path "/ppml/data/alpaca_data_cleaned_archive.json" \
 | 
			
		||||
     python /bigdl/lora_finetune.py \
 | 
			
		||||
       --base_model '/bigdl/model/'  \
 | 
			
		||||
       --data_path "/bigdl/data/alpaca_data_cleaned_archive.json" \
 | 
			
		||||
       --output_dir "/home/mpiuser/finetuned_model" \
 | 
			
		||||
       --micro_batch_size 8 \
 | 
			
		||||
       --bf16 
 | 
			
		||||
| 
						 | 
				
			
			@ -29,7 +29,7 @@ else
 | 
			
		|||
  if [ "$WORKER_ROLE" = "launcher" ]
 | 
			
		||||
  then
 | 
			
		||||
    sed "s/:1/ /g" /etc/mpi/hostfile > /home/mpiuser/hostfile
 | 
			
		||||
    export DATA_PATH="/ppml/data/$DATA_SUB_PATH"
 | 
			
		||||
    export DATA_PATH="/bigdl/data/$DATA_SUB_PATH"
 | 
			
		||||
    sleep 10
 | 
			
		||||
    mpirun \
 | 
			
		||||
      -n $WORLD_SIZE \
 | 
			
		||||
| 
						 | 
				
			
			@ -40,8 +40,8 @@ else
 | 
			
		|||
      -genv KMP_AFFINITY="granularity=fine,none" \
 | 
			
		||||
      -genv KMP_BLOCKTIME=1 \
 | 
			
		||||
      -genv TF_ENABLE_ONEDNN_OPTS=1 \
 | 
			
		||||
      python /ppml/lora_finetune.py \
 | 
			
		||||
        --base_model '/ppml/model/'  \
 | 
			
		||||
      python /bigdl/lora_finetune.py \
 | 
			
		||||
        --base_model '/bigdl/model/'  \
 | 
			
		||||
        --data_path "$DATA_PATH" \
 | 
			
		||||
        --output_dir "/home/mpiuser/finetuned_model" \
 | 
			
		||||
        --micro_batch_size $MICRO_BATCH_SIZE \
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -23,7 +23,7 @@ spec:
 | 
			
		|||
             name: bigdl-ppml-finetuning-launcher
 | 
			
		||||
             securityContext:
 | 
			
		||||
              runAsUser: 1000
 | 
			
		||||
             command: ['sh' , '-c', 'bash /ppml/bigdl-lora-finetuing-entrypoint.sh']
 | 
			
		||||
             command: ['sh' , '-c', 'bash /bigdl/bigdl-lora-finetuing-entrypoint.sh']
 | 
			
		||||
             env:
 | 
			
		||||
             - name: WORKER_ROLE
 | 
			
		||||
               value: "launcher"
 | 
			
		||||
| 
						 | 
				
			
			@ -46,10 +46,10 @@ spec:
 | 
			
		|||
             volumeMounts:
 | 
			
		||||
             - name: nfs-storage
 | 
			
		||||
               subPath: {{ .Values.modelSubPath }}
 | 
			
		||||
               mountPath: /ppml/model
 | 
			
		||||
               mountPath: /bigdl/model
 | 
			
		||||
             - name: nfs-storage
 | 
			
		||||
               subPath: {{ .Values.dataSubPath }}
 | 
			
		||||
               mountPath: "/ppml/data/{{ .Values.dataSubPath }}"
 | 
			
		||||
               mountPath: "/bigdl/data/{{ .Values.dataSubPath }}"
 | 
			
		||||
    Worker:
 | 
			
		||||
      replicas: {{ .Values.trainerNum }}
 | 
			
		||||
      template:
 | 
			
		||||
| 
						 | 
				
			
			@ -59,7 +59,7 @@ spec:
 | 
			
		|||
            name: bigdl-ppml-finetuning-worker
 | 
			
		||||
            securityContext:
 | 
			
		||||
              runAsUser: 1000
 | 
			
		||||
            command: ['sh' , '-c', 'bash /ppml/bigdl-lora-finetuing-entrypoint.sh']
 | 
			
		||||
            command: ['sh' , '-c', 'bash /bigdl/bigdl-lora-finetuing-entrypoint.sh']
 | 
			
		||||
            env:
 | 
			
		||||
            - name: WORKER_ROLE
 | 
			
		||||
              value: "trainer"
 | 
			
		||||
| 
						 | 
				
			
			@ -78,10 +78,10 @@ spec:
 | 
			
		|||
            volumeMounts:
 | 
			
		||||
            - name: nfs-storage
 | 
			
		||||
              subPath: {{ .Values.modelSubPath }}
 | 
			
		||||
              mountPath: /ppml/model
 | 
			
		||||
              mountPath: /bigdl/model
 | 
			
		||||
            - name: nfs-storage
 | 
			
		||||
              subPath: {{ .Values.dataSubPath }}
 | 
			
		||||
              mountPath: "/ppml/data/{{ .Values.dataSubPath }}"
 | 
			
		||||
              mountPath: "/bigdl/data/{{ .Values.dataSubPath }}"
 | 
			
		||||
            resources:
 | 
			
		||||
              requests:
 | 
			
		||||
                cpu: {{ .Values.cpuPerPod }}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -4,6 +4,6 @@ microBatchSize: 8
 | 
			
		|||
nfsServerIp: your_nfs_server_ip
 | 
			
		||||
nfsPath: a_nfs_shared_folder_path_on_the_server
 | 
			
		||||
dataSubPath: alpaca_data_cleaned_archive.json # a subpath of the data file under nfs directory
 | 
			
		||||
modelSubPath: llama-7b-hf # a subpath of the model file (dir) under nfs directory
 | 
			
		||||
modelSubPath: Llama-2-7b-chat-hf # a subpath of the model file (dir) under nfs directory
 | 
			
		||||
ompNumThreads: 14
 | 
			
		||||
cpuPerPod: 42
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in a new issue