From c9ecb7a113a35792d02571805f3675d0de2ef90f Mon Sep 17 00:00:00 2001
From: "Wang, Jian4" <61138589+hzjane@users.noreply.github.com>
Date: Fri, 14 Mar 2025 14:43:54 +0800
Subject: [PATCH] Fix qwen nan value issue on vllm (#12971)

* add to fix qwen nan value issue

* update
---
 python/llm/src/ipex_llm/vllm/xpu/model_convert.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/python/llm/src/ipex_llm/vllm/xpu/model_convert.py b/python/llm/src/ipex_llm/vllm/xpu/model_convert.py
index 73783c97..1237d2b2 100644
--- a/python/llm/src/ipex_llm/vllm/xpu/model_convert.py
+++ b/python/llm/src/ipex_llm/vllm/xpu/model_convert.py
@@ -107,6 +107,12 @@ def get_load_function(low_bit):
                     modules = ["35.mlp", "36.mlp", "37.mlp", "38.mlp", "39.mlp"]
                 else:
                     modules = None
+                not_convert_o_proj = os.getenv("IPEX_LLM_NOT_CONVERT_O_PROJ", None)
+                if not_convert_o_proj is not None:
+                    # only use to avoid nan value in o_proj running DeepSeek-R1-Distill-Qwen-14B
+                    modules = ["o_proj"]
+                else:
+                    modules = None
                 if "minicpm" in self.vllm_config.model_config.model.lower():
                     modules = ["vpm", "resampler"]
                 if "internvl2" in self.vllm_config.model_config.model.lower():