LLM: fix MLP check of mixtral (#9891)
This commit is contained in:
		
							parent
							
								
									4f4ce73f31
								
							
						
					
					
						commit
						d9cf55bce9
					
				
					 1 changed files with 1 additions and 0 deletions
				
			
		| 
						 | 
				
			
			@ -50,6 +50,7 @@ from bigdl.llm.transformers.models.utils import apply_rotary_pos_emb,\
 | 
			
		|||
    apply_rotary_pos_emb_no_cache_xpu, is_enough_kv_cache_room_4_36
 | 
			
		||||
from bigdl.llm.transformers.models.mistral import should_use_fuse_rope, use_decoding_fast_path
 | 
			
		||||
from bigdl.llm.transformers.models.utils import use_flash_attention
 | 
			
		||||
from bigdl.llm.transformers.models.utils import mlp_fusion_check
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
KV_CACHE_ALLOC_BLOCK_LENGTH = 256
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in a new issue