LLM: fix MLP check of mixtral (#9891)
This commit is contained in:
parent
4f4ce73f31
commit
d9cf55bce9
1 changed files with 1 additions and 0 deletions
|
|
@ -50,6 +50,7 @@ from bigdl.llm.transformers.models.utils import apply_rotary_pos_emb,\
|
|||
apply_rotary_pos_emb_no_cache_xpu, is_enough_kv_cache_room_4_36
|
||||
from bigdl.llm.transformers.models.mistral import should_use_fuse_rope, use_decoding_fast_path
|
||||
from bigdl.llm.transformers.models.utils import use_flash_attention
|
||||
from bigdl.llm.transformers.models.utils import mlp_fusion_check
|
||||
|
||||
|
||||
KV_CACHE_ALLOC_BLOCK_LENGTH = 256
|
||||
|
|
|
|||
Loading…
Reference in a new issue