From 4499d25c26b7f93e82917b526a77cb550df97b0a Mon Sep 17 00:00:00 2001 From: Xiangyu Tian <109123695+xiangyuT@users.noreply.github.com> Date: Thu, 25 Jul 2024 13:07:19 +0800 Subject: [PATCH] LLM: Fix ParallelLMHead convert in vLLM cpu (#11654) --- python/llm/src/ipex_llm/transformers/convert.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/python/llm/src/ipex_llm/transformers/convert.py b/python/llm/src/ipex_llm/transformers/convert.py index 789fd67d..aa637577 100644 --- a/python/llm/src/ipex_llm/transformers/convert.py +++ b/python/llm/src/ipex_llm/transformers/convert.py @@ -153,8 +153,9 @@ def is_linear_module(module): VLLM_LINEAR_LIST = [ ColumnParallelLinear, RowParallelLinear, QKVParallelLinear, MergedColumnParallelLinear, - ParallelLMHead ] + if 'xpu' in _VLLM_VERSION: + VLLM_LINEAR_LIST.append(ParallelLMHead) if is_module_in_classes(module, VLLM_LINEAR_LIST): if 'xpu' in _VLLM_VERSION: # For vllm xpu