vLLM: Fix conver_to_half condition (#13177)

* fix * format
2025-05-22 15:44:10 +08:00 · 2025-05-22 15:44:10 +08:00 · 531bef2810
commit 531bef2810
parent e3130a06ed
1 changed files with 2 additions and 1 deletions
--- a/python/llm/src/ipex_llm/transformers/low_bit_linear.py
+++ b/python/llm/src/ipex_llm/transformers/low_bit_linear.py
@ -654,7 +654,8 @@ class LowBitLinear(nn.Linear):
                else:
                    w = self.weight.data

-                if use_batch_forward(x_2d, self.weight.qtype, self.out_len) and self.conver_to_half:
+                if use_batch_forward(x_2d, self.weight.qtype, self.out_len) and \
+                        (x_2d.dtype == torch.half or self.conver_to_half):
                    import xe_batch
                    result = xe_batch.batch_forward(x_2d, w, self.qtype)
                elif not is_training and self.conver_to_half \