From d30b22a81bf495c425eaa91780843bef18b12dae Mon Sep 17 00:00:00 2001
From: Qiyuan Gong <qiyuan.gong@intel.com>
Date: Tue, 16 Apr 2024 14:47:45 +0800
Subject: [PATCH] Refine axolotl 0.3.0 documents and links (#10764)

* Refine axolotl 0.3 based on comments
* Rename requirements to requirement-xpu
* Add comments for paged_adamw_32bit
* change lora_r from 8 to 16
---
 .../GPU/LLM-Finetuning/axolotl/README.md      | 31 ++++++----------
 .../GPU/LLM-Finetuning/axolotl/finetune.py    | 36 +++++++++++++++++--
 .../GPU/LLM-Finetuning/axolotl/qlora.yml      |  7 ++--
 ...{requirements.txt => requirements-xpu.txt} |  6 ++--
 4 files changed, 53 insertions(+), 27 deletions(-)
 rename python/llm/example/GPU/LLM-Finetuning/axolotl/{requirements.txt => requirements-xpu.txt} (74%)

diff --git a/python/llm/example/GPU/LLM-Finetuning/axolotl/README.md b/python/llm/example/GPU/LLM-Finetuning/axolotl/README.md
index da705d1f..ae6c7ca4 100644
--- a/python/llm/example/GPU/LLM-Finetuning/axolotl/README.md
+++ b/python/llm/example/GPU/LLM-Finetuning/axolotl/README.md
@@ -1,6 +1,6 @@
-# Finetune LLM on Intel GPU using axolotl without writing code
+# Finetune LLM on Intel GPU using axolotl v0.3.0 without writing code
 
-This example demonstrates how to easily run LLM finetuning application using axolotl and IPEX-LLM 4bit optimizations with [Intel GPUs](../../../README.md). By applying IPEX-LLM patch, you could use axolotl on Intel GPUs using IPEX-LLM optimization without writing code.
+This example demonstrates how to easily run LLM finetuning application using [axolotl v0.3.0](https://github.com/OpenAccess-AI-Collective/axolotl/tree/v0.3.0) and IPEX-LLM 4bit optimizations with [Intel GPUs](../../../README.md). By applying IPEX-LLM patch, you could use axolotl on Intel GPUs using IPEX-LLM optimization without writing code.
 
 Note, this example is just used for illustrating related usage and don't guarantee convergence of training.
 
@@ -15,36 +15,35 @@ conda create -n llm python=3.11
 conda activate llm
 # below command will install intel_extension_for_pytorch==2.1.10+xpu as default
 pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
-pip install transformers==4.34.0 datasets
-pip install fire peft==0.5.0
 # install axolotl v0.3.0
 git clone https://github.com/OpenAccess-AI-Collective/axolotl
 cd axolotl
 git checkout v0.3.0
-# replace default requirements.txt in axolotl to avoid conflict
-cp ../requirements.txt .
+cp ../requirements-xpu.txt requirements.txt
 pip install -e .
-# change to transformers 4.34.0
-pip install transformers==4.34.0 datasets
 ```
 
 ### 2. Configures OneAPI environment variables and accelerate
 
+Configures OneAPI environment variables 
+
 ```bash
 source /opt/intel/oneapi/setvars.sh
 ```
 
-Config `accelerate`
+Configures `accelerate` in command line interactively. 
 
 ```bash
 accelerate config
 ```
 
-Ensure `use_cpu` is disable in config (`~/.cache/huggingface/accelerate/default_config.yaml`).
+Please answer `NO` in option `Do you want to run your training on CPU only (even if a GPU / Apple Silicon device is available)? [yes/NO]:`.
 
-### 3. Finetune
+After finish accelerate config, check if `use_cpu` is disable (i.e., ` use_cpu: false`) in accelerate config file (`~/.cache/huggingface/accelerate/default_config.yaml`).
 
-This example shows how to run [Alpaca QLoRA finetune on Llama-2](https://github.com/artidoro/qlora) directly on Intel GPU, based on [axolotl Llama-2 qlora example](https://github.com/OpenAccess-AI-Collective/axolotl/blob/v0.3.0/examples/llama-2/qlora.yml).
+### 3. Finetune Llama-2-7B
+
+This example shows how to run [Alpaca QLoRA finetune on Llama-2](https://github.com/artidoro/qlora) directly on Intel GPU, based on [axolotl Llama-2 qlora example](https://github.com/OpenAccess-AI-Collective/axolotl/blob/v0.3.0/examples/llama-2/qlora.yml). Note that only Llama-2-7B QLora example is verified on Intel ARC 770 with 16GB memory.
 
 Modify parameters in `qlora.yml` based on your requirements.
 
@@ -68,11 +67,3 @@ Output in console
 {'loss': 0.9651, 'learning_rate': 0.00019189578116202307, 'epoch': 0.54}
 {'loss': 0.9067, 'learning_rate': 0.00019107766703887764, 'epoch': 0.56}
 ```
-
-### 4. Other examples
-
-Please refer to [axolotl examples](https://github.com/OpenAccess-AI-Collective/axolotl/tree/v0.3.0/examples) for more models. Download `xxx.yml` and replace `qlora.yml` with new `xxx.yml`.
-
-```
-accelerate launch finetune.py xxx.yml
-```
diff --git a/python/llm/example/GPU/LLM-Finetuning/axolotl/finetune.py b/python/llm/example/GPU/LLM-Finetuning/axolotl/finetune.py
index e434e529..15d9e7fc 100644
--- a/python/llm/example/GPU/LLM-Finetuning/axolotl/finetune.py
+++ b/python/llm/example/GPU/LLM-Finetuning/axolotl/finetune.py
@@ -1,3 +1,37 @@
+#
+# Copyright 2016 The BigDL Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# This file is copied from
+# https://github.com/OpenAccess-AI-Collective/axolotl/blob/v0.3.0/scripts/finetune.py
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from ipex_llm import llm_patch
+llm_patch(train=True)
+# The following is the original axolotl finetune code (without IPEX-LLM)
+
 """Prepare and train a model on a dataset. Can also infer from a model or merge lora"""
 
 import importlib
@@ -8,8 +42,6 @@ import sys
 from pathlib import Path
 from typing import Any, Dict, List, Optional, Union
 
-from ipex_llm import llm_patch
-llm_patch(train=True)
 import fire
 import torch
 import transformers
diff --git a/python/llm/example/GPU/LLM-Finetuning/axolotl/qlora.yml b/python/llm/example/GPU/LLM-Finetuning/axolotl/qlora.yml
index d9f3d86f..7e5c2fbe 100644
--- a/python/llm/example/GPU/LLM-Finetuning/axolotl/qlora.yml
+++ b/python/llm/example/GPU/LLM-Finetuning/axolotl/qlora.yml
@@ -1,3 +1,4 @@
+# This file is copied from https://github.com/OpenAccess-AI-Collective/axolotl/blob/v0.3.0/examples/llama-2/qlora.yml
 base_model: meta-llama/Llama-2-7b-hf
 base_model_config: meta-llama/Llama-2-7b-hf
 model_type: LlamaForCausalLM
@@ -22,7 +23,7 @@ sequence_len: 4096
 sample_packing: true
 pad_to_sequence_len: true
 
-lora_r: 8
+lora_r: 16
 lora_alpha: 16
 lora_dropout: 0.05
 lora_target_modules:
@@ -38,8 +39,8 @@ wandb_log_model:
 gradient_accumulation_steps: 2
 micro_batch_size: 1
 num_epochs: 3
-# change optimizer from paged_adamw_32bit to adamw_torch
-# due to bitsandbytes issue https://github.com/TimDettmers/bitsandbytes/issues/244
+# paged_adamw_32bit is not supported
+# due to bitsandbytes issue https://github.com/TimDettmers/bitsandbytes/issues/1180
 # optimizer: paged_adamw_32bit
 optimizer: adamw_torch
 lr_scheduler: cosine
diff --git a/python/llm/example/GPU/LLM-Finetuning/axolotl/requirements.txt b/python/llm/example/GPU/LLM-Finetuning/axolotl/requirements-xpu.txt
similarity index 74%
rename from python/llm/example/GPU/LLM-Finetuning/axolotl/requirements.txt
rename to python/llm/example/GPU/LLM-Finetuning/axolotl/requirements-xpu.txt
index 6b93ba88..942a5ea0 100644
--- a/python/llm/example/GPU/LLM-Finetuning/axolotl/requirements.txt
+++ b/python/llm/example/GPU/LLM-Finetuning/axolotl/requirements-xpu.txt
@@ -1,9 +1,11 @@
+# This file is copied from https://github.com/OpenAccess-AI-Collective/axolotl/blob/v0.3.0/requirements.txt
 --extra-index-url https://download.pytorch.org/whl/cu118
 --extra-index-url https://huggingface.github.io/autogptq-index/whl/cu118/
 # torch==2.1.0
-#auto-gptq
+# auto-gptq
 packaging
 peft==0.5.0
+transformers==4.34.0
 bitsandbytes>=0.41.1
 accelerate==0.23.0
 addict
@@ -15,7 +17,7 @@ flash-attn>=2.2.1
 sentencepiece
 wandb
 einops
-#xformers
+# xformers
 optimum
 hf_transfer
 colorama