[NPU] Hot fix of load_low_bit (#12344)
This commit is contained in:
parent
899a30331a
commit
69e3a56943
1 changed files with 1 additions and 1 deletions
|
|
@ -549,7 +549,7 @@ class _BaseAutoModelClass:
|
||||||
from ipex_llm.transformers.npu_models.convert_mp import optimize_llm
|
from ipex_llm.transformers.npu_models.convert_mp import optimize_llm
|
||||||
optimize_llm(
|
optimize_llm(
|
||||||
llm,
|
llm,
|
||||||
max_output_len=max_context_len,
|
max_context_len=max_context_len,
|
||||||
max_prompt_len=max_prompt_len,
|
max_prompt_len=max_prompt_len,
|
||||||
inter_pp=inter_pp,
|
inter_pp=inter_pp,
|
||||||
intra_pp=intra_pp,
|
intra_pp=intra_pp,
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue