From 1b585b0d40c991cad9eb1bf3b38a7856a186840c Mon Sep 17 00:00:00 2001
From: "Chen, Zhentao" <zhentao.chen@intel.com>
Date: Mon, 8 Jan 2024 15:53:57 +0800
Subject: [PATCH] set fp8 default as e5m2 (#9859)

---
 python/llm/src/bigdl/llm/ggml/quantize.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/llm/src/bigdl/llm/ggml/quantize.py b/python/llm/src/bigdl/llm/ggml/quantize.py
index 185d31a7..11e04ab8 100644
--- a/python/llm/src/bigdl/llm/ggml/quantize.py
+++ b/python/llm/src/bigdl/llm/ggml/quantize.py
@@ -38,7 +38,7 @@ ggml_tensor_qtype = {"sym_int4": 2,   # q4_0 in ggml
                      "mixed_fp4": 17,     # Mixture of Formats Quantization 4 bits
                      "mixed_fp8": 18,     # Mixture of Formats Quantization 8 bits
                      "fp8_e5m2": 19,      # fp8 in e5m2 format
-                     "fp8": 15,           # fp8 in e4m3 format
+                     "fp8": 19,           # fp8 in e5m2 format
                      "bf16": 20}
 
 _llama_quantize_type = {"q4_0": 2,