From 531bef28103b76b2af29426e1b8fd13027f31d77 Mon Sep 17 00:00:00 2001 From: Xiangyu Tian <109123695+xiangyuT@users.noreply.github.com> Date: Thu, 22 May 2025 15:44:10 +0800 Subject: [PATCH] vLLM: Fix conver_to_half condition (#13177) * fix * format --- python/llm/src/ipex_llm/transformers/low_bit_linear.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/python/llm/src/ipex_llm/transformers/low_bit_linear.py b/python/llm/src/ipex_llm/transformers/low_bit_linear.py index 292c765a..13e27a05 100644 --- a/python/llm/src/ipex_llm/transformers/low_bit_linear.py +++ b/python/llm/src/ipex_llm/transformers/low_bit_linear.py @@ -654,7 +654,8 @@ class LowBitLinear(nn.Linear): else: w = self.weight.data - if use_batch_forward(x_2d, self.weight.qtype, self.out_len) and self.conver_to_half: + if use_batch_forward(x_2d, self.weight.qtype, self.out_len) and \ + (x_2d.dtype == torch.half or self.conver_to_half): import xe_batch result = xe_batch.batch_forward(x_2d, w, self.qtype) elif not is_training and self.conver_to_half \