From b685cf434910911045923ffde90338262c2dc72a Mon Sep 17 00:00:00 2001
From: binbin Deng <108676127+plusbang@users.noreply.github.com>
Date: Wed, 23 Oct 2024 17:53:54 +0800
Subject: [PATCH] Fix npu group size setting of optimize_model=False (#12256)

---
 python/llm/src/ipex_llm/transformers/npu_model.py         | 3 ++-
 python/llm/src/ipex_llm/transformers/npu_models/linear.py | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/python/llm/src/ipex_llm/transformers/npu_model.py b/python/llm/src/ipex_llm/transformers/npu_model.py
index c9936f25..119c31c7 100644
--- a/python/llm/src/ipex_llm/transformers/npu_model.py
+++ b/python/llm/src/ipex_llm/transformers/npu_model.py
@@ -198,7 +198,8 @@ class _BaseAutoModelClass:
             from ipex_llm.transformers.npu_models.convert import optimize_llm
             optimize_llm(model)
             with torch.no_grad():
-                cls.load_convert(qtype, model, "cpu", modules_to_not_convert, *args, **kwargs)
+                cls.load_convert(qtype, model, "cpu", modules_to_not_convert,
+                                 quantization_group_size, *args, **kwargs)
                 if hasattr(model, "llm"):
                     create_npu_kernels(model.llm)
                 else:
diff --git a/python/llm/src/ipex_llm/transformers/npu_models/linear.py b/python/llm/src/ipex_llm/transformers/npu_models/linear.py
index d419da30..9fb5d525 100644
--- a/python/llm/src/ipex_llm/transformers/npu_models/linear.py
+++ b/python/llm/src/ipex_llm/transformers/npu_models/linear.py
@@ -130,7 +130,7 @@ class QuantizedLinear(torch.nn.Module):
         weight: torch.Tensor,
         scale: torch.Tensor,
         bias: Optional[torch.Tensor] = None,
-        group_size: int = False,
+        group_size: int = 0,
     ):
         """Initialize the QuantizedLinear class.