From 4499d25c26b7f93e82917b526a77cb550df97b0a Mon Sep 17 00:00:00 2001
From: Xiangyu Tian <109123695+xiangyuT@users.noreply.github.com>
Date: Thu, 25 Jul 2024 13:07:19 +0800
Subject: [PATCH] LLM: Fix ParallelLMHead convert in vLLM cpu (#11654)

---
 python/llm/src/ipex_llm/transformers/convert.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/python/llm/src/ipex_llm/transformers/convert.py b/python/llm/src/ipex_llm/transformers/convert.py
index 789fd67d..aa637577 100644
--- a/python/llm/src/ipex_llm/transformers/convert.py
+++ b/python/llm/src/ipex_llm/transformers/convert.py
@@ -153,8 +153,9 @@ def is_linear_module(module):
         VLLM_LINEAR_LIST = [
             ColumnParallelLinear, RowParallelLinear, QKVParallelLinear,
             MergedColumnParallelLinear,
-            ParallelLMHead
         ]
+        if 'xpu' in _VLLM_VERSION:
+            VLLM_LINEAR_LIST.append(ParallelLMHead)
         if is_module_in_classes(module, VLLM_LINEAR_LIST):
             if 'xpu' in _VLLM_VERSION:
                 # For vllm xpu