Add half precision for fastchat models (#11130)
This commit is contained in:
parent
65f4212f89
commit
7f772c5a4f
1 changed files with 2 additions and 0 deletions
|
|
@ -91,6 +91,8 @@ def load_model(
|
||||||
|
|
||||||
if device == "xpu":
|
if device == "xpu":
|
||||||
import intel_extension_for_pytorch as ipex
|
import intel_extension_for_pytorch as ipex
|
||||||
|
print("Convert model to half precision...")
|
||||||
|
model = model.half()
|
||||||
model = model.to('xpu')
|
model = model.to('xpu')
|
||||||
|
|
||||||
return model, tokenizer
|
return model, tokenizer
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue