From 5dad33e5af5890fe7f4b799a99b69c4d09e1cc46 Mon Sep 17 00:00:00 2001 From: Yina Chen <33650826+cyita@users.noreply.github.com> Date: Tue, 18 Jun 2024 11:47:43 +0800 Subject: [PATCH] Support fp8_e4m3 scale search (#11339) * fp8e4m3 switch off * fix style --- python/llm/src/ipex_llm/transformers/convert.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/python/llm/src/ipex_llm/transformers/convert.py b/python/llm/src/ipex_llm/transformers/convert.py index 6b577c29..0766244d 100644 --- a/python/llm/src/ipex_llm/transformers/convert.py +++ b/python/llm/src/ipex_llm/transformers/convert.py @@ -290,6 +290,13 @@ def convert_gptq(module, awq=False, llm_awq=False, act_order=False): def use_scale_search(model_config, qtype): if qtype == ggml_tensor_qtype["fp6"] and model_config.model_type not in ["qwen2"]: return True + elif qtype == ggml_tensor_qtype["fp8_e4m3"] and \ + model_config.model_type not in ["qwen2", "baichuan"]: + if model_config.model_type == "llama" and model_config.vocab_size == 128256 and \ + "instruct" in model_config._name_or_path.lower(): + # Llama-3-instruct + return False + return True return False