Fix npu group size setting of optimize_model=False (#12256)

This commit is contained in:
binbin Deng 2024-10-23 17:53:54 +08:00 committed by GitHub
parent 567b77a76b
commit b685cf4349
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 3 additions and 2 deletions

View file

@ -198,7 +198,8 @@ class _BaseAutoModelClass:
from ipex_llm.transformers.npu_models.convert import optimize_llm
optimize_llm(model)
with torch.no_grad():
cls.load_convert(qtype, model, "cpu", modules_to_not_convert, *args, **kwargs)
cls.load_convert(qtype, model, "cpu", modules_to_not_convert,
quantization_group_size, *args, **kwargs)
if hasattr(model, "llm"):
create_npu_kernels(model.llm)
else:

View file

@ -130,7 +130,7 @@ class QuantizedLinear(torch.nn.Module):
weight: torch.Tensor,
scale: torch.Tensor,
bias: Optional[torch.Tensor] = None,
group_size: int = False,
group_size: int = 0,
):
"""Initialize the QuantizedLinear class.