From b685cf434910911045923ffde90338262c2dc72a Mon Sep 17 00:00:00 2001 From: binbin Deng <108676127+plusbang@users.noreply.github.com> Date: Wed, 23 Oct 2024 17:53:54 +0800 Subject: [PATCH] Fix npu group size setting of optimize_model=False (#12256) --- python/llm/src/ipex_llm/transformers/npu_model.py | 3 ++- python/llm/src/ipex_llm/transformers/npu_models/linear.py | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/python/llm/src/ipex_llm/transformers/npu_model.py b/python/llm/src/ipex_llm/transformers/npu_model.py index c9936f25..119c31c7 100644 --- a/python/llm/src/ipex_llm/transformers/npu_model.py +++ b/python/llm/src/ipex_llm/transformers/npu_model.py @@ -198,7 +198,8 @@ class _BaseAutoModelClass: from ipex_llm.transformers.npu_models.convert import optimize_llm optimize_llm(model) with torch.no_grad(): - cls.load_convert(qtype, model, "cpu", modules_to_not_convert, *args, **kwargs) + cls.load_convert(qtype, model, "cpu", modules_to_not_convert, + quantization_group_size, *args, **kwargs) if hasattr(model, "llm"): create_npu_kernels(model.llm) else: diff --git a/python/llm/src/ipex_llm/transformers/npu_models/linear.py b/python/llm/src/ipex_llm/transformers/npu_models/linear.py index d419da30..9fb5d525 100644 --- a/python/llm/src/ipex_llm/transformers/npu_models/linear.py +++ b/python/llm/src/ipex_llm/transformers/npu_models/linear.py @@ -130,7 +130,7 @@ class QuantizedLinear(torch.nn.Module): weight: torch.Tensor, scale: torch.Tensor, bias: Optional[torch.Tensor] = None, - group_size: int = False, + group_size: int = 0, ): """Initialize the QuantizedLinear class.