From 5dad33e5af5890fe7f4b799a99b69c4d09e1cc46 Mon Sep 17 00:00:00 2001
From: Yina Chen <33650826+cyita@users.noreply.github.com>
Date: Tue, 18 Jun 2024 11:47:43 +0800
Subject: [PATCH] Support fp8_e4m3 scale search (#11339)

* fp8e4m3 switch off

* fix style
---
 python/llm/src/ipex_llm/transformers/convert.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/python/llm/src/ipex_llm/transformers/convert.py b/python/llm/src/ipex_llm/transformers/convert.py
index 6b577c29..0766244d 100644
--- a/python/llm/src/ipex_llm/transformers/convert.py
+++ b/python/llm/src/ipex_llm/transformers/convert.py
@@ -290,6 +290,13 @@ def convert_gptq(module, awq=False, llm_awq=False, act_order=False):
 def use_scale_search(model_config, qtype):
     if qtype == ggml_tensor_qtype["fp6"] and model_config.model_type not in ["qwen2"]:
         return True
+    elif qtype == ggml_tensor_qtype["fp8_e4m3"] and \
+            model_config.model_type not in ["qwen2", "baichuan"]:
+        if model_config.model_type == "llama" and model_config.vocab_size == 128256 and \
+                "instruct" in model_config._name_or_path.lower():
+            # Llama-3-instruct
+            return False
+        return True
     return False