Support fp8_e4m3 scale search (#11339)
* fp8e4m3 switch off * fix style
This commit is contained in:
parent
e50c890e1f
commit
5dad33e5af
1 changed files with 7 additions and 0 deletions
|
|
@ -290,6 +290,13 @@ def convert_gptq(module, awq=False, llm_awq=False, act_order=False):
|
|||
def use_scale_search(model_config, qtype):
|
||||
if qtype == ggml_tensor_qtype["fp6"] and model_config.model_type not in ["qwen2"]:
|
||||
return True
|
||||
elif qtype == ggml_tensor_qtype["fp8_e4m3"] and \
|
||||
model_config.model_type not in ["qwen2", "baichuan"]:
|
||||
if model_config.model_type == "llama" and model_config.vocab_size == 128256 and \
|
||||
"instruct" in model_config._name_or_path.lower():
|
||||
# Llama-3-instruct
|
||||
return False
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
|
|
|
|||
Loading…
Reference in a new issue