diff --git a/python/llm/src/bigdl/llm/transformers/model.py b/python/llm/src/bigdl/llm/transformers/model.py index 70a353a6..f9424ccc 100644 --- a/python/llm/src/bigdl/llm/transformers/model.py +++ b/python/llm/src/bigdl/llm/transformers/model.py @@ -115,6 +115,9 @@ class _BaseAutoModelClass: # Speed up when loading model kwargs["low_cpu_mem_usage"] = True + # set default torch_dtype='auto' + kwargs["torch_dtype"] = kwargs.get("torch_dtype", 'auto') + qtype = ggml_tensor_qtype[bigdl_transformers_low_bit] # Note that the int4 linear layers cannot currently # be recorded in huggingface Pretrained Model or AutoConfig, @@ -125,8 +128,6 @@ class _BaseAutoModelClass: # Avoid KeyError kwargs["ignore_mismatched_sizes"] = True - # Avoid reading from local file at the first initialization - kwargs["state_dict"] = {} # Maybe needed when extract_local_archive_file subfolder = kwargs.get("subfolder", "")