diff --git a/python/llm/src/bigdl/llm/transformers/models/mixtral.py b/python/llm/src/bigdl/llm/transformers/models/mixtral.py index 72e4f110..9e6248a4 100644 --- a/python/llm/src/bigdl/llm/transformers/models/mixtral.py +++ b/python/llm/src/bigdl/llm/transformers/models/mixtral.py @@ -50,6 +50,7 @@ from bigdl.llm.transformers.models.utils import apply_rotary_pos_emb,\ apply_rotary_pos_emb_no_cache_xpu, is_enough_kv_cache_room_4_36 from bigdl.llm.transformers.models.mistral import should_use_fuse_rope, use_decoding_fast_path from bigdl.llm.transformers.models.utils import use_flash_attention +from bigdl.llm.transformers.models.utils import mlp_fusion_check KV_CACHE_ALLOC_BLOCK_LENGTH = 256