parent
2c0c9fecd0
commit
edccfb2ed3
1 changed files with 12 additions and 0 deletions
|
|
@ -138,6 +138,12 @@ def load_low_bit(model, model_path):
|
||||||
"model should be a instance of "
|
"model should be a instance of "
|
||||||
f"`torch.nn.Module`, but got {type(model)} at last.")
|
f"`torch.nn.Module`, but got {type(model)} at last.")
|
||||||
if low_bit:
|
if low_bit:
|
||||||
|
invalidInputError(isinstance(model, torch.nn.Module),
|
||||||
|
"model should be an instance of `torch.nn.Module`, "
|
||||||
|
f"but got {type(model)} at last.")
|
||||||
|
invalidInputError(model.device.type in ('cpu', 'meta'),
|
||||||
|
"Expect model on device `cpu` or `meta`, "
|
||||||
|
f"but got device type {model.device.type}")
|
||||||
qtype = ggml_tensor_qtype[low_bit]
|
qtype = ggml_tensor_qtype[low_bit]
|
||||||
model = ggml_convert_low_bit(model, qtype=qtype, convert_shape_only=True)
|
model = ggml_convert_low_bit(model, qtype=qtype, convert_shape_only=True)
|
||||||
|
|
||||||
|
|
@ -173,6 +179,12 @@ def optimize_model(model, low_bit='sym_int4', optimize_llm=True):
|
||||||
invalidInputError(low_bit in ggml_tensor_qtype,
|
invalidInputError(low_bit in ggml_tensor_qtype,
|
||||||
f"Unknown load_in_low_bit value: {low_bit}, expected:"
|
f"Unknown load_in_low_bit value: {low_bit}, expected:"
|
||||||
f" sym_int4, asym_int4, sym_int5, asym_int5 or sym_int8.")
|
f" sym_int4, asym_int4, sym_int5, asym_int5 or sym_int8.")
|
||||||
|
invalidInputError(isinstance(model, torch.nn.Module),
|
||||||
|
"model should be an instance of "
|
||||||
|
f"`torch.nn.Module`, but got {type(model)} at last.")
|
||||||
|
invalidInputError(model.device.type == 'cpu',
|
||||||
|
"Expect model on device `cpu`, "
|
||||||
|
f"but got device type {model.device.type}")
|
||||||
qtype = ggml_tensor_qtype[low_bit]
|
qtype = ggml_tensor_qtype[low_bit]
|
||||||
model = ggml_convert_low_bit(model, qtype=qtype, optimize_model=optimize_llm)
|
model = ggml_convert_low_bit(model, qtype=qtype, optimize_model=optimize_llm)
|
||||||
# add save_low_bit to pretrained model dynamically
|
# add save_low_bit to pretrained model dynamically
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue