fix galore and peft finetune example (#12776)
This commit is contained in:
parent
c0d6b282b8
commit
b4c9e23f73
3 changed files with 10 additions and 13 deletions
|
|
@ -13,11 +13,8 @@ conda activate llm
|
|||
# below command will install intel_extension_for_pytorch==2.1.10+xpu as default
|
||||
pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
|
||||
pip install galore-torch
|
||||
pip install accelerate==0.28.0
|
||||
pip install bitsandbytes==0.43.0
|
||||
pip install datasets==2.18.0
|
||||
pip install transformers==4.39.1
|
||||
pip install trl==0.8.1
|
||||
pip install transformers==4.45.0 "trl<0.12.0" datasets
|
||||
pip install bitsandbytes==0.45.1
|
||||
```
|
||||
|
||||
### 2. GaLore Finetune
|
||||
|
|
|
|||
|
|
@ -14,10 +14,10 @@ conda create -n llm python=3.11
|
|||
conda activate llm
|
||||
# below command will install intel_extension_for_pytorch==2.1.10+xpu as default
|
||||
pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
|
||||
pip install transformers==4.36.0 datasets
|
||||
pip install transformers==4.45.0 "trl<0.12.0" datasets
|
||||
pip install bitsandbytes==0.45.1 scipy
|
||||
pip install fire peft==0.10.0
|
||||
pip install oneccl_bind_pt==2.1.100 --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ # necessary to run distributed finetuning
|
||||
pip install bitsandbytes scipy
|
||||
```
|
||||
|
||||
### 2. Configures OneAPI environment variables
|
||||
|
|
|
|||
|
|
@ -53,10 +53,10 @@ from peft import (
|
|||
LoraConfig,
|
||||
get_peft_model,
|
||||
get_peft_model_state_dict,
|
||||
prepare_model_for_int8_training,
|
||||
prepare_model_for_kbit_training,
|
||||
set_peft_model_state_dict,
|
||||
)
|
||||
from transformers import LlamaForCausalLM, LlamaTokenizer
|
||||
from transformers import AutoModelForCausalLM, AutoTokenizer
|
||||
|
||||
from utils.prompter import Prompter
|
||||
|
||||
|
|
@ -145,14 +145,14 @@ def train(
|
|||
if len(wandb_log_model) > 0:
|
||||
os.environ["WANDB_LOG_MODEL"] = wandb_log_model
|
||||
|
||||
model = LlamaForCausalLM.from_pretrained(
|
||||
model = AutoModelForCausalLM.from_pretrained(
|
||||
base_model,
|
||||
load_in_8bit=True,
|
||||
load_in_4bit=True,
|
||||
torch_dtype=torch.float16,
|
||||
device_map=device_map,
|
||||
)
|
||||
|
||||
tokenizer = LlamaTokenizer.from_pretrained(base_model)
|
||||
tokenizer = AutoTokenizer.from_pretrained(base_model)
|
||||
|
||||
tokenizer.pad_token_id = (
|
||||
0 # unk. we want this to be different from the eos token
|
||||
|
|
@ -207,7 +207,7 @@ def train(
|
|||
] # could be sped up, probably
|
||||
return tokenized_full_prompt
|
||||
|
||||
model = prepare_model_for_int8_training(model)
|
||||
model = prepare_model_for_kbit_training(model)
|
||||
|
||||
config = LoraConfig(
|
||||
r=lora_r,
|
||||
|
|
|
|||
Loading…
Reference in a new issue