Fix npu group size setting of optimize_model=False (#12256)
This commit is contained in:
parent
567b77a76b
commit
b685cf4349
2 changed files with 3 additions and 2 deletions
|
|
@ -198,7 +198,8 @@ class _BaseAutoModelClass:
|
||||||
from ipex_llm.transformers.npu_models.convert import optimize_llm
|
from ipex_llm.transformers.npu_models.convert import optimize_llm
|
||||||
optimize_llm(model)
|
optimize_llm(model)
|
||||||
with torch.no_grad():
|
with torch.no_grad():
|
||||||
cls.load_convert(qtype, model, "cpu", modules_to_not_convert, *args, **kwargs)
|
cls.load_convert(qtype, model, "cpu", modules_to_not_convert,
|
||||||
|
quantization_group_size, *args, **kwargs)
|
||||||
if hasattr(model, "llm"):
|
if hasattr(model, "llm"):
|
||||||
create_npu_kernels(model.llm)
|
create_npu_kernels(model.llm)
|
||||||
else:
|
else:
|
||||||
|
|
|
||||||
|
|
@ -130,7 +130,7 @@ class QuantizedLinear(torch.nn.Module):
|
||||||
weight: torch.Tensor,
|
weight: torch.Tensor,
|
||||||
scale: torch.Tensor,
|
scale: torch.Tensor,
|
||||||
bias: Optional[torch.Tensor] = None,
|
bias: Optional[torch.Tensor] = None,
|
||||||
group_size: int = False,
|
group_size: int = 0,
|
||||||
):
|
):
|
||||||
"""Initialize the QuantizedLinear class.
|
"""Initialize the QuantizedLinear class.
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue