diff --git a/python/llm/src/bigdl/llm/optimize.py b/python/llm/src/bigdl/llm/optimize.py index f1f12283..3169660c 100644 --- a/python/llm/src/bigdl/llm/optimize.py +++ b/python/llm/src/bigdl/llm/optimize.py @@ -138,6 +138,12 @@ def load_low_bit(model, model_path): "model should be a instance of " f"`torch.nn.Module`, but got {type(model)} at last.") if low_bit: + invalidInputError(isinstance(model, torch.nn.Module), + "model should be an instance of `torch.nn.Module`, " + f"but got {type(model)} at last.") + invalidInputError(model.device.type in ('cpu', 'meta'), + "Expect model on device `cpu` or `meta`, " + f"but got device type {model.device.type}") qtype = ggml_tensor_qtype[low_bit] model = ggml_convert_low_bit(model, qtype=qtype, convert_shape_only=True) @@ -173,6 +179,12 @@ def optimize_model(model, low_bit='sym_int4', optimize_llm=True): invalidInputError(low_bit in ggml_tensor_qtype, f"Unknown load_in_low_bit value: {low_bit}, expected:" f" sym_int4, asym_int4, sym_int5, asym_int5 or sym_int8.") + invalidInputError(isinstance(model, torch.nn.Module), + "model should be an instance of " + f"`torch.nn.Module`, but got {type(model)} at last.") + invalidInputError(model.device.type == 'cpu', + "Expect model on device `cpu`, " + f"but got device type {model.device.type}") qtype = ggml_tensor_qtype[low_bit] model = ggml_convert_low_bit(model, qtype=qtype, optimize_model=optimize_llm) # add save_low_bit to pretrained model dynamically