fix qwen 14b fp6 abnormal output (#11583)
This commit is contained in:
		
							parent
							
								
									c279849d27
								
							
						
					
					
						commit
						99c22745b2
					
				
					 1 changed files with 5 additions and 4 deletions
				
			
		| 
						 | 
					@ -667,7 +667,7 @@ def replace_with_low_bit_linear_for_module(model, qtype, module_name=None,
 | 
				
			||||||
    return model
 | 
					    return model
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def _optimize_pre(model):
 | 
					def _optimize_pre(model, qtype=None):
 | 
				
			||||||
    try:
 | 
					    try:
 | 
				
			||||||
        from sentence_transformers.SentenceTransformer import SentenceTransformer
 | 
					        from sentence_transformers.SentenceTransformer import SentenceTransformer
 | 
				
			||||||
        if isinstance(model, SentenceTransformer):
 | 
					        if isinstance(model, SentenceTransformer):
 | 
				
			||||||
| 
						 | 
					@ -743,6 +743,7 @@ def _optimize_pre(model):
 | 
				
			||||||
        if should_apply_merge_qkv:
 | 
					        if should_apply_merge_qkv:
 | 
				
			||||||
            from ipex_llm.transformers.models.qwen2 import merge_qkv
 | 
					            from ipex_llm.transformers.models.qwen2 import merge_qkv
 | 
				
			||||||
            model.apply(merge_qkv)
 | 
					            model.apply(merge_qkv)
 | 
				
			||||||
 | 
					            if qtype != ggml_tensor_qtype["fp6"]:
 | 
				
			||||||
                from ipex_llm.transformers.models.qwen2 import padding_mlp
 | 
					                from ipex_llm.transformers.models.qwen2 import padding_mlp
 | 
				
			||||||
                model.apply(padding_mlp)
 | 
					                model.apply(padding_mlp)
 | 
				
			||||||
    if model.config.model_type == "qwen2_moe":
 | 
					    if model.config.model_type == "qwen2_moe":
 | 
				
			||||||
| 
						 | 
					@ -795,7 +796,7 @@ def ggml_convert_low_bit(model, qtype, optimize_model=True,
 | 
				
			||||||
        return model
 | 
					        return model
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if optimize_model:
 | 
					    if optimize_model:
 | 
				
			||||||
        model = _optimize_pre(model)
 | 
					        model = _optimize_pre(model, qtype)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    act_order = False
 | 
					    act_order = False
 | 
				
			||||||
    if getattr(model, "quantization_method", None) == "gptq":
 | 
					    if getattr(model, "quantization_method", None) == "gptq":
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue