From 7f772c5a4f32ee1caa16e53dd587660dce6f5a9d Mon Sep 17 00:00:00 2001 From: Guancheng Fu <110874468+gc-fu@users.noreply.github.com> Date: Fri, 24 May 2024 15:41:14 +0800 Subject: [PATCH] Add half precision for fastchat models (#11130) --- python/llm/src/ipex_llm/transformers/loader.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/python/llm/src/ipex_llm/transformers/loader.py b/python/llm/src/ipex_llm/transformers/loader.py index acaddb7a..7eaf679c 100644 --- a/python/llm/src/ipex_llm/transformers/loader.py +++ b/python/llm/src/ipex_llm/transformers/loader.py @@ -91,6 +91,8 @@ def load_model( if device == "xpu": import intel_extension_for_pytorch as ipex + print("Convert model to half precision...") + model = model.half() model = model.to('xpu') return model, tokenizer