LLM: fix MLP check of mixtral (#9891)

This commit is contained in:
Ruonan Wang 2024-01-11 18:01:59 +08:00 committed by GitHub
parent 4f4ce73f31
commit d9cf55bce9

View file

@ -50,6 +50,7 @@ from bigdl.llm.transformers.models.utils import apply_rotary_pos_emb,\
apply_rotary_pos_emb_no_cache_xpu, is_enough_kv_cache_room_4_36
from bigdl.llm.transformers.models.mistral import should_use_fuse_rope, use_decoding_fast_path
from bigdl.llm.transformers.models.utils import use_flash_attention
from bigdl.llm.transformers.models.utils import mlp_fusion_check
KV_CACHE_ALLOC_BLOCK_LENGTH = 256