Support fp8_e4m3 scale search (#11339)
* fp8e4m3 switch off * fix style
This commit is contained in:
		
							parent
							
								
									e50c890e1f
								
							
						
					
					
						commit
						5dad33e5af
					
				
					 1 changed files with 7 additions and 0 deletions
				
			
		| 
						 | 
				
			
			@ -290,6 +290,13 @@ def convert_gptq(module, awq=False, llm_awq=False, act_order=False):
 | 
			
		|||
def use_scale_search(model_config, qtype):
 | 
			
		||||
    if qtype == ggml_tensor_qtype["fp6"] and model_config.model_type not in ["qwen2"]:
 | 
			
		||||
        return True
 | 
			
		||||
    elif qtype == ggml_tensor_qtype["fp8_e4m3"] and \
 | 
			
		||||
            model_config.model_type not in ["qwen2", "baichuan"]:
 | 
			
		||||
        if model_config.model_type == "llama" and model_config.vocab_size == 128256 and \
 | 
			
		||||
                "instruct" in model_config._name_or_path.lower():
 | 
			
		||||
            # Llama-3-instruct
 | 
			
		||||
            return False
 | 
			
		||||
        return True
 | 
			
		||||
    return False
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in a new issue