remove load_in_8bit usage as it is not supported a long time ago (#12779)
This commit is contained in:
parent
9e9b6c9f2b
commit
d0d9c9d636
7 changed files with 9 additions and 15 deletions
|
|
@ -56,7 +56,7 @@ to the following codes to load the low bit models.
|
|||
class ModifiedAutoModelForCausalLM(AutoModelForCausalLM):
|
||||
@classmethod
|
||||
def load_low_bit(cls,*args,**kwargs):
|
||||
for k in ['load_in_low_bit', 'device_map', 'max_memory', 'load_in_8bit','load_in_4bit']:
|
||||
for k in ['load_in_low_bit', 'device_map', 'max_memory','load_in_4bit']:
|
||||
kwargs.pop(k)
|
||||
return super().load_low_bit(*args, **kwargs)
|
||||
|
||||
|
|
|
|||
|
|
@ -3,7 +3,6 @@ base_model: meta-llama/Meta-Llama-3-8B
|
|||
model_type: AutoModelForCausalLM
|
||||
tokenizer_type: AutoTokenizer
|
||||
|
||||
load_in_8bit: false
|
||||
load_in_4bit: true
|
||||
strict: false
|
||||
|
||||
|
|
|
|||
|
|
@ -3,7 +3,6 @@ base_model: NousResearch/Llama-2-7b-hf
|
|||
model_type: LlamaForCausalLM
|
||||
tokenizer_type: LlamaTokenizer
|
||||
|
||||
load_in_8bit: false
|
||||
load_in_4bit: true
|
||||
strict: false
|
||||
|
||||
|
|
|
|||
|
|
@ -4,7 +4,6 @@ model_type: LlamaForCausalLM
|
|||
tokenizer_type: LlamaTokenizer
|
||||
is_llama_derived_model: true
|
||||
|
||||
load_in_8bit: false
|
||||
load_in_4bit: true
|
||||
strict: false
|
||||
|
||||
|
|
|
|||
|
|
@ -312,7 +312,6 @@ def get_model_answers(
|
|||
torch_dtype=torch.float16,
|
||||
# torch_dtype=torch.float32,
|
||||
low_cpu_mem_usage=True,
|
||||
# load_in_8bit=True,
|
||||
total_token=args.total_token,
|
||||
depth=args.depth,
|
||||
top_k=args.top_k,
|
||||
|
|
|
|||
|
|
@ -233,7 +233,6 @@ class _BaseAutoModelClass:
|
|||
optimize_model = False
|
||||
kwargs["modules_to_not_convert"] = ["lm_head"]
|
||||
|
||||
load_in_8bit = kwargs.pop("load_in_8bit", False)
|
||||
from ipex_llm.llm_patching import bigdl_patched
|
||||
if bigdl_patched == 'Train':
|
||||
global patched_training_mode
|
||||
|
|
|
|||
|
|
@ -117,7 +117,6 @@ class _BaseAutoModelClass:
|
|||
# ignore following arguments
|
||||
ignore_argument(kwargs, "model_hub")
|
||||
ignore_argument(kwargs, "load_in_4bit")
|
||||
ignore_argument(kwargs, "load_in_8bit")
|
||||
ignore_argument(kwargs, "imatrix")
|
||||
ignore_argument(kwargs, "cpu_embedding")
|
||||
ignore_argument(kwargs, "embedding_qtype")
|
||||
|
|
|
|||
Loading…
Reference in a new issue