From 3a72e5df8cb01ca967061c5e1b0055a672f502c3 Mon Sep 17 00:00:00 2001 From: Ruonan Wang Date: Fri, 17 May 2024 10:10:16 +0800 Subject: [PATCH] disable mlp fusion of fp6 on mtl (#11059) --- python/llm/src/ipex_llm/transformers/models/utils.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/python/llm/src/ipex_llm/transformers/models/utils.py b/python/llm/src/ipex_llm/transformers/models/utils.py index 22320a4b..58af7a9c 100644 --- a/python/llm/src/ipex_llm/transformers/models/utils.py +++ b/python/llm/src/ipex_llm/transformers/models/utils.py @@ -358,6 +358,10 @@ def mlp_fusion_check(x, qtype, training): return False if training or x.requires_grad: return False + if qtype == FP6: + device = get_xpu_device_type(x) + if device == "mtl": + return False return True