XPU QLoRA Container (#9082)
* XPU QLoRA Container * fix apt issue * refine
This commit is contained in:
		
							parent
							
								
									aad68100ae
								
							
						
					
					
						commit
						2756f9c20d
					
				
					 3 changed files with 145 additions and 0 deletions
				
			
		
							
								
								
									
										38
									
								
								docker/llm/finetune/qlora/xpu/docker/Dockerfile
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										38
									
								
								docker/llm/finetune/qlora/xpu/docker/Dockerfile
									
									
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,38 @@
 | 
			
		|||
FROM intel/oneapi-basekit:2023.2.1-devel-ubuntu22.04
 | 
			
		||||
ARG http_proxy
 | 
			
		||||
ARG https_proxy
 | 
			
		||||
ENV TZ=Asia/Shanghai
 | 
			
		||||
ARG PIP_NO_CACHE_DIR=false
 | 
			
		||||
ENV TRANSFORMERS_COMMIT_ID=95fe0f5
 | 
			
		||||
 | 
			
		||||
# retrive oneapi repo public key
 | 
			
		||||
RUN curl -fsSL https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS-2023.PUB | gpg --dearmor | tee /usr/share/keyrings/intel-oneapi-archive-keyring.gpg && \
 | 
			
		||||
    echo "deb [signed-by=/usr/share/keyrings/intel-oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main " > /etc/apt/sources.list.d/oneAPI.list && \
 | 
			
		||||
    # retrive intel gpu driver repo public key 
 | 
			
		||||
    wget -qO - https://repositories.intel.com/graphics/intel-graphics.key | gpg --dearmor --output /usr/share/keyrings/intel-graphics.gpg && \
 | 
			
		||||
    echo 'deb [arch=amd64,i386 signed-by=/usr/share/keyrings/intel-graphics.gpg] https://repositories.intel.com/graphics/ubuntu jammy arc' | tee /etc/apt/sources.list.d/intel.gpu.jammy.list && \
 | 
			
		||||
    rm /etc/apt/sources.list.d/intel-graphics.list && \
 | 
			
		||||
    # update dependencies
 | 
			
		||||
    apt-get update && \
 | 
			
		||||
    # install basic dependencies
 | 
			
		||||
    apt-get install -y curl wget git gnupg gpg-agent software-properties-common libunwind8-dev vim less && \
 | 
			
		||||
    # install Intel GPU driver
 | 
			
		||||
    apt-get install -y intel-opencl-icd intel-level-zero-gpu level-zero level-zero-dev && \
 | 
			
		||||
    # install python 3.9
 | 
			
		||||
    ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone && \
 | 
			
		||||
    env DEBIAN_FRONTEND=noninteractive apt-get update && \
 | 
			
		||||
    add-apt-repository ppa:deadsnakes/ppa -y && \
 | 
			
		||||
    apt-get install -y python3.9 && \
 | 
			
		||||
    rm /usr/bin/python3 && \
 | 
			
		||||
    ln -s /usr/bin/python3.9 /usr/bin/python3 && \
 | 
			
		||||
    ln -s /usr/bin/python3 /usr/bin/python && \
 | 
			
		||||
    apt-get install -y python3-pip python3.9-dev python3-wheel python3.9-distutils && \
 | 
			
		||||
    curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py && \
 | 
			
		||||
    # install XPU bigdl-llm
 | 
			
		||||
    pip install --pre --upgrade bigdl-llm[xpu] -f https://developer.intel.com/ipex-whl-stable-xpu && \
 | 
			
		||||
    # install huggingface dependencies
 | 
			
		||||
    pip install git+https://github.com/huggingface/transformers.git@${TRANSFORMERS_COMMIT_ID} && \
 | 
			
		||||
    pip install peft==0.5.0 datasets && \
 | 
			
		||||
    wget https://raw.githubusercontent.com/intel-analytics/BigDL/main/python/llm/example/gpu/qlora_finetuning/qlora_finetuning.py
 | 
			
		||||
 | 
			
		||||
ADD ./start-qlora-finetuning-on-xpu.sh /start-qlora-finetuning-on-xpu.sh
 | 
			
		||||
							
								
								
									
										89
									
								
								docker/llm/finetune/qlora/xpu/docker/README.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										89
									
								
								docker/llm/finetune/qlora/xpu/docker/README.md
									
									
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,89 @@
 | 
			
		|||
## Fine-tune LLM with BigDL LLM Container
 | 
			
		||||
 | 
			
		||||
The following shows how to fine-tune LLM with Quantization (QLoRA built on BigDL-LLM 4bit optimizations) in a docker environment, which is accelerated by Intel XPU.
 | 
			
		||||
 | 
			
		||||
### 1. Prepare Docker Image
 | 
			
		||||
 | 
			
		||||
You can download directly from Dockerhub like:
 | 
			
		||||
 | 
			
		||||
```bash
 | 
			
		||||
docker pull intelanalytics/bigdl-llm-finetune-qlora-xpu:2.4.0-SNAPSHOT
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
Or build the image from source:
 | 
			
		||||
 | 
			
		||||
```bash
 | 
			
		||||
export HTTP_PROXY=your_http_proxy
 | 
			
		||||
export HTTPS_PROXY=your_https_proxy
 | 
			
		||||
 | 
			
		||||
docker build \
 | 
			
		||||
  --build-arg http_proxy=${HTTP_PROXY} \
 | 
			
		||||
  --build-arg https_proxy=${HTTPS_PROXY} \
 | 
			
		||||
  -t intelanalytics/bigdl-llm-finetune-qlora-xpu:2.4.0-SNAPSHOT \
 | 
			
		||||
  -f ./Dockerfile .
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
### 2. Prepare Base Model, Data and Container
 | 
			
		||||
 | 
			
		||||
Here, we try to fine-tune a [Llama2-7b](https://huggingface.co/meta-llama/Llama-2-7b) with [English Quotes](https://huggingface.co/datasets/Abirate/english_quotes) dataset, and please download them and start a docker container with files mounted like below:
 | 
			
		||||
 | 
			
		||||
```bash
 | 
			
		||||
export BASE_MODE_PATH=<your_downloaded_base_model_path>
 | 
			
		||||
export DATA_PATH=<your_downloaded_data_path>
 | 
			
		||||
 | 
			
		||||
docker run -itd \
 | 
			
		||||
   --net=host \
 | 
			
		||||
   --device=/dev/dri \
 | 
			
		||||
   --memory="32G" \
 | 
			
		||||
   --name=bigdl-llm-fintune-qlora-xpu \
 | 
			
		||||
   -v $BASE_MODE_PATH:/model \
 | 
			
		||||
   -v $DATA_PATH:/data/english_quotes \
 | 
			
		||||
   --shm-size="16g" \
 | 
			
		||||
   intelanalytics/bigdl-llm-fintune-qlora-xpu:2.4.0-SNAPSHOT
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
The download and mount of base model and data to a docker container demonstrates a standard fine-tuning process. You can skip this step for a quick start, and in this way, the fine-tuning codes will automatically download the needed files:
 | 
			
		||||
 | 
			
		||||
```bash
 | 
			
		||||
docker run -itd \
 | 
			
		||||
   --net=host \
 | 
			
		||||
   --device=/dev/dri \
 | 
			
		||||
   --memory="32G" \
 | 
			
		||||
   --name=bigdl-llm-fintune-qlora-xpu \
 | 
			
		||||
   --shm-size="16g" \
 | 
			
		||||
   intelanalytics/bigdl-llm-fintune-qlora-xpu:2.4.0-SNAPSHOT
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
However, we do recommend you to handle them manually, because the automatical download can be blocked by Internet access and Huggingface authentication etc. according to different environment, and the manual method allows you to fine-tune in a custom way (with different base model and dataset).
 | 
			
		||||
 | 
			
		||||
### 3. Start Fine-Tuning
 | 
			
		||||
 | 
			
		||||
Enter the running container:
 | 
			
		||||
 | 
			
		||||
```bash
 | 
			
		||||
docker exec -it bigdl-llm-fintune-qlora-xpu bash
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
Then, start QLoRA fine-tuning:
 | 
			
		||||
 | 
			
		||||
```bash
 | 
			
		||||
bash start-qlora-finetuning-on-xpu.sh
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
After minutes, it is expected to get results like:
 | 
			
		||||
 | 
			
		||||
```bash
 | 
			
		||||
{'loss': 2.256, 'learning_rate': 0.0002, 'epoch': 0.03}
 | 
			
		||||
{'loss': 1.8869, 'learning_rate': 0.00017777777777777779, 'epoch': 0.06}
 | 
			
		||||
{'loss': 1.5334, 'learning_rate': 0.00015555555555555556, 'epoch': 0.1}
 | 
			
		||||
{'loss': 1.4975, 'learning_rate': 0.00013333333333333334, 'epoch': 0.13}
 | 
			
		||||
{'loss': 1.3245, 'learning_rate': 0.00011111111111111112, 'epoch': 0.16}
 | 
			
		||||
{'loss': 1.2622, 'learning_rate': 8.888888888888889e-05, 'epoch': 0.19}
 | 
			
		||||
{'loss': 1.3944, 'learning_rate': 6.666666666666667e-05, 'epoch': 0.22}
 | 
			
		||||
{'loss': 1.2481, 'learning_rate': 4.4444444444444447e-05, 'epoch': 0.26}
 | 
			
		||||
{'loss': 1.3442, 'learning_rate': 2.2222222222222223e-05, 'epoch': 0.29}
 | 
			
		||||
{'loss': 1.3256, 'learning_rate': 0.0, 'epoch': 0.32}
 | 
			
		||||
{'train_runtime': 204.4633, 'train_samples_per_second': 3.913, 'train_steps_per_second': 0.978, 'train_loss': 1.5072882556915284, 'epoch': 0.32}
 | 
			
		||||
100%|██████████████████████████████████████████████████████████████████████████████████████| 200/200 [03:24<00:00,  1.02s/it]
 | 
			
		||||
TrainOutput(global_step=200, training_loss=1.5072882556915284, metrics={'train_runtime': 204.4633, 'train_samples_per_second': 3.913, 'train_steps_per_second': 0.978, 'train_loss': 1.5072882556915284, 'epoch': 0.32})
 | 
			
		||||
```
 | 
			
		||||
| 
						 | 
				
			
			@ -0,0 +1,18 @@
 | 
			
		|||
#!/bin/bash
 | 
			
		||||
set -x
 | 
			
		||||
export USE_XETLA=OFF
 | 
			
		||||
export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
 | 
			
		||||
source /opt/intel/oneapi/setvars.sh
 | 
			
		||||
 | 
			
		||||
if [ -d "./model" ];
 | 
			
		||||
then
 | 
			
		||||
  MODEL_PARAM="--repo-id-or-model-path ./model"  # otherwise, default to download from HF repo
 | 
			
		||||
fi
 | 
			
		||||
 | 
			
		||||
if [ -d "./data/english_quotes" ];
 | 
			
		||||
then
 | 
			
		||||
  DATA_PARAM="--dataset ./data/english_quotes" # otherwise, default to download from HF dataset
 | 
			
		||||
fi
 | 
			
		||||
 | 
			
		||||
python qlora_finetuning.py $MODEL_PARAM $DATA_PARAM
 | 
			
		||||
 | 
			
		||||
		Loading…
	
		Reference in a new issue