Fix npu group size setting of optimize_model=False (#12256)
This commit is contained in:
parent
567b77a76b
commit
b685cf4349
2 changed files with 3 additions and 2 deletions
|
|
@ -198,7 +198,8 @@ class _BaseAutoModelClass:
|
|||
from ipex_llm.transformers.npu_models.convert import optimize_llm
|
||||
optimize_llm(model)
|
||||
with torch.no_grad():
|
||||
cls.load_convert(qtype, model, "cpu", modules_to_not_convert, *args, **kwargs)
|
||||
cls.load_convert(qtype, model, "cpu", modules_to_not_convert,
|
||||
quantization_group_size, *args, **kwargs)
|
||||
if hasattr(model, "llm"):
|
||||
create_npu_kernels(model.llm)
|
||||
else:
|
||||
|
|
|
|||
|
|
@ -130,7 +130,7 @@ class QuantizedLinear(torch.nn.Module):
|
|||
weight: torch.Tensor,
|
||||
scale: torch.Tensor,
|
||||
bias: Optional[torch.Tensor] = None,
|
||||
group_size: int = False,
|
||||
group_size: int = 0,
|
||||
):
|
||||
"""Initialize the QuantizedLinear class.
|
||||
|
||||
|
|
|
|||
Loading…
Reference in a new issue