Fix mixtral-8x7b with transformers=4.37.0 (#11132)
This commit is contained in:
		
							parent
							
								
									ab476c7fe2
								
							
						
					
					
						commit
						367de141f2
					
				
					 1 changed files with 3 additions and 0 deletions
				
			
		| 
						 | 
				
			
			@ -414,6 +414,9 @@ def mixtral_model_forward(
 | 
			
		|||
        output_router_logits: Optional[bool] = None,
 | 
			
		||||
        return_dict: Optional[bool] = None,
 | 
			
		||||
) -> Union[Tuple, MoeModelOutputWithPast]:
 | 
			
		||||
    # to be compatible with transformers>=4.37.0
 | 
			
		||||
    self._use_flash_attention_2 = self.config._attn_implementation == "flash_attention_2"
 | 
			
		||||
 | 
			
		||||
    output_attentions = output_attentions if output_attentions is not None \
 | 
			
		||||
        else self.config.output_attentions
 | 
			
		||||
    output_router_logits = (
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in a new issue