bugfix for qlora finetuning on GPU (#12298)

* bugfix for qlora 100 step error * indent fix * annotation fix
2024-10-30 16:54:10 +08:00 · 2024-10-30 16:54:10 +08:00 · 46d8300f6b
commit 46d8300f6b
parent 70037ad55f
2 changed files with 8 additions and 2 deletions
--- a/python/llm/example/GPU/LLM-Finetuning/QLoRA/alpaca-qlora/README.md
+++ b/python/llm/example/GPU/LLM-Finetuning/QLoRA/alpaca-qlora/README.md
@ -23,6 +23,8 @@ pip install bitsandbytes scipy
 source /opt/intel/oneapi/setvars.sh # necessary to run before installing deepspeed
 pip install git+https://github.com/microsoft/DeepSpeed.git@78c518e
 pip install git+https://github.com/intel/intel-extension-for-deepspeed.git@ec33277
+# (optional) install mpirun to run multi-card finetuning
+sudo apt install openmpi-bin
 ```

 ### 2. Configures OneAPI environment variables
--- a/python/llm/src/ipex_llm/transformers/low_bit_linear.py
+++ b/python/llm/src/ipex_llm/transformers/low_bit_linear.py
@ -794,8 +794,12 @@ class LowBitLinear(nn.Linear):
                                                   self.weight.qtype, input_seq_size)
                    result = result.to(x.dtype)
                else:
-                    result = xe_linear.forward_new(x_2d, self.weight.data,
-                                                   self.weight.qtype, input_seq_size)
+                    if self.weight.qtype == NF4:
+                        result = xe_linear.forward_new(x_2d, self.weight.data.view(torch.uint8),
+                                                       self.weight.qtype, input_seq_size)
+                    else:
+                        result = xe_linear.forward_new(x_2d, self.weight.data,
+                                                       self.weight.qtype, input_seq_size)

                if do_empty_cache:
                    torch.xpu.empty_cache()