vLLM: Fix conver_to_half condition (#13177)

* fix

* format
This commit is contained in:
Xiangyu Tian 2025-05-22 15:44:10 +08:00 committed by GitHub
parent e3130a06ed
commit 531bef2810
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -654,7 +654,8 @@ class LowBitLinear(nn.Linear):
else:
w = self.weight.data
if use_batch_forward(x_2d, self.weight.qtype, self.out_len) and self.conver_to_half:
if use_batch_forward(x_2d, self.weight.qtype, self.out_len) and \
(x_2d.dtype == torch.half or self.conver_to_half):
import xe_batch
result = xe_batch.batch_forward(x_2d, w, self.qtype)
elif not is_training and self.conver_to_half \