remove load_in_8bit usage as it is not supported a long time ago (#12779)

This commit is contained in:
Yishuo Wang 2025-02-07 11:21:29 +08:00 committed by GitHub
parent 9e9b6c9f2b
commit d0d9c9d636
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 9 additions and 15 deletions

View file

@ -56,7 +56,7 @@ to the following codes to load the low bit models.
class ModifiedAutoModelForCausalLM(AutoModelForCausalLM):
@classmethod
def load_low_bit(cls,*args,**kwargs):
for k in ['load_in_low_bit', 'device_map', 'max_memory', 'load_in_8bit','load_in_4bit']:
for k in ['load_in_low_bit', 'device_map', 'max_memory','load_in_4bit']:
kwargs.pop(k)
return super().load_low_bit(*args, **kwargs)

View file

@ -3,7 +3,6 @@ base_model: meta-llama/Meta-Llama-3-8B
model_type: AutoModelForCausalLM
tokenizer_type: AutoTokenizer
load_in_8bit: false
load_in_4bit: true
strict: false

View file

@ -3,7 +3,6 @@ base_model: NousResearch/Llama-2-7b-hf
model_type: LlamaForCausalLM
tokenizer_type: LlamaTokenizer
load_in_8bit: false
load_in_4bit: true
strict: false

View file

@ -4,7 +4,6 @@ model_type: LlamaForCausalLM
tokenizer_type: LlamaTokenizer
is_llama_derived_model: true
load_in_8bit: false
load_in_4bit: true
strict: false

View file

@ -312,7 +312,6 @@ def get_model_answers(
torch_dtype=torch.float16,
# torch_dtype=torch.float32,
low_cpu_mem_usage=True,
# load_in_8bit=True,
total_token=args.total_token,
depth=args.depth,
top_k=args.top_k,

View file

@ -233,7 +233,6 @@ class _BaseAutoModelClass:
optimize_model = False
kwargs["modules_to_not_convert"] = ["lm_head"]
load_in_8bit = kwargs.pop("load_in_8bit", False)
from ipex_llm.llm_patching import bigdl_patched
if bigdl_patched == 'Train':
global patched_training_mode

View file

@ -117,7 +117,6 @@ class _BaseAutoModelClass:
# ignore following arguments
ignore_argument(kwargs, "model_hub")
ignore_argument(kwargs, "load_in_4bit")
ignore_argument(kwargs, "load_in_8bit")
ignore_argument(kwargs, "imatrix")
ignore_argument(kwargs, "cpu_embedding")
ignore_argument(kwargs, "embedding_qtype")