From d30b22a81bf495c425eaa91780843bef18b12dae Mon Sep 17 00:00:00 2001 From: Qiyuan Gong Date: Tue, 16 Apr 2024 14:47:45 +0800 Subject: [PATCH] Refine axolotl 0.3.0 documents and links (#10764) * Refine axolotl 0.3 based on comments * Rename requirements to requirement-xpu * Add comments for paged_adamw_32bit * change lora_r from 8 to 16 --- .../GPU/LLM-Finetuning/axolotl/README.md | 31 ++++++---------- .../GPU/LLM-Finetuning/axolotl/finetune.py | 36 +++++++++++++++++-- .../GPU/LLM-Finetuning/axolotl/qlora.yml | 7 ++-- ...{requirements.txt => requirements-xpu.txt} | 6 ++-- 4 files changed, 53 insertions(+), 27 deletions(-) rename python/llm/example/GPU/LLM-Finetuning/axolotl/{requirements.txt => requirements-xpu.txt} (74%) diff --git a/python/llm/example/GPU/LLM-Finetuning/axolotl/README.md b/python/llm/example/GPU/LLM-Finetuning/axolotl/README.md index da705d1f..ae6c7ca4 100644 --- a/python/llm/example/GPU/LLM-Finetuning/axolotl/README.md +++ b/python/llm/example/GPU/LLM-Finetuning/axolotl/README.md @@ -1,6 +1,6 @@ -# Finetune LLM on Intel GPU using axolotl without writing code +# Finetune LLM on Intel GPU using axolotl v0.3.0 without writing code -This example demonstrates how to easily run LLM finetuning application using axolotl and IPEX-LLM 4bit optimizations with [Intel GPUs](../../../README.md). By applying IPEX-LLM patch, you could use axolotl on Intel GPUs using IPEX-LLM optimization without writing code. +This example demonstrates how to easily run LLM finetuning application using [axolotl v0.3.0](https://github.com/OpenAccess-AI-Collective/axolotl/tree/v0.3.0) and IPEX-LLM 4bit optimizations with [Intel GPUs](../../../README.md). By applying IPEX-LLM patch, you could use axolotl on Intel GPUs using IPEX-LLM optimization without writing code. Note, this example is just used for illustrating related usage and don't guarantee convergence of training. @@ -15,36 +15,35 @@ conda create -n llm python=3.11 conda activate llm # below command will install intel_extension_for_pytorch==2.1.10+xpu as default pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ -pip install transformers==4.34.0 datasets -pip install fire peft==0.5.0 # install axolotl v0.3.0 git clone https://github.com/OpenAccess-AI-Collective/axolotl cd axolotl git checkout v0.3.0 -# replace default requirements.txt in axolotl to avoid conflict -cp ../requirements.txt . +cp ../requirements-xpu.txt requirements.txt pip install -e . -# change to transformers 4.34.0 -pip install transformers==4.34.0 datasets ``` ### 2. Configures OneAPI environment variables and accelerate +Configures OneAPI environment variables + ```bash source /opt/intel/oneapi/setvars.sh ``` -Config `accelerate` +Configures `accelerate` in command line interactively. ```bash accelerate config ``` -Ensure `use_cpu` is disable in config (`~/.cache/huggingface/accelerate/default_config.yaml`). +Please answer `NO` in option `Do you want to run your training on CPU only (even if a GPU / Apple Silicon device is available)? [yes/NO]:`. -### 3. Finetune +After finish accelerate config, check if `use_cpu` is disable (i.e., ` use_cpu: false`) in accelerate config file (`~/.cache/huggingface/accelerate/default_config.yaml`). -This example shows how to run [Alpaca QLoRA finetune on Llama-2](https://github.com/artidoro/qlora) directly on Intel GPU, based on [axolotl Llama-2 qlora example](https://github.com/OpenAccess-AI-Collective/axolotl/blob/v0.3.0/examples/llama-2/qlora.yml). +### 3. Finetune Llama-2-7B + +This example shows how to run [Alpaca QLoRA finetune on Llama-2](https://github.com/artidoro/qlora) directly on Intel GPU, based on [axolotl Llama-2 qlora example](https://github.com/OpenAccess-AI-Collective/axolotl/blob/v0.3.0/examples/llama-2/qlora.yml). Note that only Llama-2-7B QLora example is verified on Intel ARC 770 with 16GB memory. Modify parameters in `qlora.yml` based on your requirements. @@ -68,11 +67,3 @@ Output in console {'loss': 0.9651, 'learning_rate': 0.00019189578116202307, 'epoch': 0.54} {'loss': 0.9067, 'learning_rate': 0.00019107766703887764, 'epoch': 0.56} ``` - -### 4. Other examples - -Please refer to [axolotl examples](https://github.com/OpenAccess-AI-Collective/axolotl/tree/v0.3.0/examples) for more models. Download `xxx.yml` and replace `qlora.yml` with new `xxx.yml`. - -``` -accelerate launch finetune.py xxx.yml -``` diff --git a/python/llm/example/GPU/LLM-Finetuning/axolotl/finetune.py b/python/llm/example/GPU/LLM-Finetuning/axolotl/finetune.py index e434e529..15d9e7fc 100644 --- a/python/llm/example/GPU/LLM-Finetuning/axolotl/finetune.py +++ b/python/llm/example/GPU/LLM-Finetuning/axolotl/finetune.py @@ -1,3 +1,37 @@ +# +# Copyright 2016 The BigDL Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# This file is copied from +# https://github.com/OpenAccess-AI-Collective/axolotl/blob/v0.3.0/scripts/finetune.py +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from ipex_llm import llm_patch +llm_patch(train=True) +# The following is the original axolotl finetune code (without IPEX-LLM) + """Prepare and train a model on a dataset. Can also infer from a model or merge lora""" import importlib @@ -8,8 +42,6 @@ import sys from pathlib import Path from typing import Any, Dict, List, Optional, Union -from ipex_llm import llm_patch -llm_patch(train=True) import fire import torch import transformers diff --git a/python/llm/example/GPU/LLM-Finetuning/axolotl/qlora.yml b/python/llm/example/GPU/LLM-Finetuning/axolotl/qlora.yml index d9f3d86f..7e5c2fbe 100644 --- a/python/llm/example/GPU/LLM-Finetuning/axolotl/qlora.yml +++ b/python/llm/example/GPU/LLM-Finetuning/axolotl/qlora.yml @@ -1,3 +1,4 @@ +# This file is copied from https://github.com/OpenAccess-AI-Collective/axolotl/blob/v0.3.0/examples/llama-2/qlora.yml base_model: meta-llama/Llama-2-7b-hf base_model_config: meta-llama/Llama-2-7b-hf model_type: LlamaForCausalLM @@ -22,7 +23,7 @@ sequence_len: 4096 sample_packing: true pad_to_sequence_len: true -lora_r: 8 +lora_r: 16 lora_alpha: 16 lora_dropout: 0.05 lora_target_modules: @@ -38,8 +39,8 @@ wandb_log_model: gradient_accumulation_steps: 2 micro_batch_size: 1 num_epochs: 3 -# change optimizer from paged_adamw_32bit to adamw_torch -# due to bitsandbytes issue https://github.com/TimDettmers/bitsandbytes/issues/244 +# paged_adamw_32bit is not supported +# due to bitsandbytes issue https://github.com/TimDettmers/bitsandbytes/issues/1180 # optimizer: paged_adamw_32bit optimizer: adamw_torch lr_scheduler: cosine diff --git a/python/llm/example/GPU/LLM-Finetuning/axolotl/requirements.txt b/python/llm/example/GPU/LLM-Finetuning/axolotl/requirements-xpu.txt similarity index 74% rename from python/llm/example/GPU/LLM-Finetuning/axolotl/requirements.txt rename to python/llm/example/GPU/LLM-Finetuning/axolotl/requirements-xpu.txt index 6b93ba88..942a5ea0 100644 --- a/python/llm/example/GPU/LLM-Finetuning/axolotl/requirements.txt +++ b/python/llm/example/GPU/LLM-Finetuning/axolotl/requirements-xpu.txt @@ -1,9 +1,11 @@ +# This file is copied from https://github.com/OpenAccess-AI-Collective/axolotl/blob/v0.3.0/requirements.txt --extra-index-url https://download.pytorch.org/whl/cu118 --extra-index-url https://huggingface.github.io/autogptq-index/whl/cu118/ # torch==2.1.0 -#auto-gptq +# auto-gptq packaging peft==0.5.0 +transformers==4.34.0 bitsandbytes>=0.41.1 accelerate==0.23.0 addict @@ -15,7 +17,7 @@ flash-attn>=2.2.1 sentencepiece wandb einops -#xformers +# xformers optimum hf_transfer colorama