Fix Loader issue with dtype fp16 (#10907)

2024-04-29 10:16:02 +08:00 · 2024-04-29 10:16:02 +08:00 · fbcd7bc737
commit fbcd7bc737
parent c9fac8c26b
1 changed files with 2 additions and 0 deletions
--- a/python/llm/src/ipex_llm/transformers/loader.py
+++ b/python/llm/src/ipex_llm/transformers/loader.py
@ -59,6 +59,8 @@ def load_model(
        model_kwargs["trust_remote_code"] = True
    if low_bit == "bf16":
        model_kwargs.update({"load_in_low_bit": low_bit, "torch_dtype": torch.bfloat16})
+    elif low_bit == "fp16":
+        model_kwargs.update({"load_in_low_bit": low_bit, "torch_dtype": torch.float16})
    else:
        model_kwargs.update({"load_in_low_bit": low_bit, "torch_dtype": 'auto'})