fix nf4 to cpu (#12722)

2025-01-21 09:23:22 +08:00 · 2025-01-21 09:23:22 +08:00 · 085974e307
commit 085974e307
parent 9aa4be8ced
1 changed files with 7 additions and 4 deletions
--- a/python/llm/src/ipex_llm/transformers/low_bit_linear.py
+++ b/python/llm/src/ipex_llm/transformers/low_bit_linear.py
@ -204,12 +204,15 @@ def ggml_q_format_convet_cpu2xpu(tensor: torch.Tensor, num_elem: int, qtype: int
 def ggml_q_format_convet_xpu2cpu(tensor: torch.Tensor, num_elem: int, qtype: int):
-
+    if qtype == NF4:
-    invalidInputError(tensor.dtype == torch.uint8,
+        invalidInputError(tensor.dtype == torch.bfloat16,
-                      "Input tensor must be uint8")
+                          "NF4 Input tensor must be bfloat16")
    else:
        invalidInputError(tensor.dtype == torch.uint8,
                          "Input tensor must be uint8")
    invalidInputError(tensor.device == torch.device('cpu'),
-                      "Input tensor must be uint8")
+                      "Input tensor must be on cpu")
    src = ctypes.c_void_p(tensor.data.data_ptr())