remove load_in_8bit usage as it is not supported a long time ago (#12779)

This commit is contained in:
Yishuo Wang 2025-02-07 11:21:29 +08:00 committed by GitHub
parent 9e9b6c9f2b
commit d0d9c9d636
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 9 additions and 15 deletions

View file

@ -1,5 +1,5 @@
# Harness Evaluation
[Harness evaluation](https://github.com/EleutherAI/lm-evaluation-harness) allows users to eaisly get accuracy on various datasets. Here we have enabled harness evaluation with IPEX-LLM under
[Harness evaluation](https://github.com/EleutherAI/lm-evaluation-harness) allows users to eaisly get accuracy on various datasets. Here we have enabled harness evaluation with IPEX-LLM under
[Open LLM Leaderboard](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard) settings.
Before running, make sure to have [ipex-llm](../../../README.md) installed.
@ -53,21 +53,21 @@ AutoModelForCausalLM.from_pretrained = partial(AutoModelForCausalLM.from_pretrai
```
to the following codes to load the low bit models.
```python
class ModifiedAutoModelForCausalLM(AutoModelForCausalLM):
class ModifiedAutoModelForCausalLM(AutoModelForCausalLM):
@classmethod
def load_low_bit(cls,*args,**kwargs):
for k in ['load_in_low_bit', 'device_map', 'max_memory', 'load_in_8bit','load_in_4bit']:
for k in ['load_in_low_bit', 'device_map', 'max_memory','load_in_4bit']:
kwargs.pop(k)
return super().load_low_bit(*args, **kwargs)
AutoModelForCausalLM.from_pretrained=partial(ModifiedAutoModelForCausalLM.load_low_bit, *self.bigdl_llm_kwargs)
```
### 2.Please pass the argument `trust_remote_code=True` to allow custom code to be run.
`lm-evaluation-harness` doesn't pass `trust_remote_code=true` argument to datasets. This may cause errors similar to the following one:
`lm-evaluation-harness` doesn't pass `trust_remote_code=true` argument to datasets. This may cause errors similar to the following one:
```
RuntimeError: Job config of task=winogrande, precision=sym_int4 failed.
RuntimeError: Job config of task=winogrande, precision=sym_int4 failed.
Error Message: The repository for winogrande contains custom code which must be executed to correctly load the dataset. You can inspect the repository content at https://hf.co/datasets/winogrande.
please pass the argument trust_remote_code=True to allow custom code to be run.
please pass the argument trust_remote_code=True to allow custom code to be run.
```
Please refer to these:

View file

@ -3,7 +3,6 @@ base_model: meta-llama/Meta-Llama-3-8B
model_type: AutoModelForCausalLM
tokenizer_type: AutoTokenizer
load_in_8bit: false
load_in_4bit: true
strict: false

View file

@ -3,7 +3,6 @@ base_model: NousResearch/Llama-2-7b-hf
model_type: LlamaForCausalLM
tokenizer_type: LlamaTokenizer
load_in_8bit: false
load_in_4bit: true
strict: false

View file

@ -4,7 +4,6 @@ model_type: LlamaForCausalLM
tokenizer_type: LlamaTokenizer
is_llama_derived_model: true
load_in_8bit: false
load_in_4bit: true
strict: false

View file

@ -312,7 +312,6 @@ def get_model_answers(
torch_dtype=torch.float16,
# torch_dtype=torch.float32,
low_cpu_mem_usage=True,
# load_in_8bit=True,
total_token=args.total_token,
depth=args.depth,
top_k=args.top_k,
@ -384,7 +383,7 @@ def get_model_answers(
]
if len(stop_token_ids_index) > 0:
output_ids = output_ids[: stop_token_ids_index[0]]
output = tokenizer.decode(
output_ids,
spaces_between_special_tokens=False,
@ -572,8 +571,8 @@ if __name__ == "__main__":
)
parser.add_argument(
"--enable-ipex-llm",
action='store_true',
"--enable-ipex-llm",
action='store_true',
help="Enable ipex-llm optimization"
)
args = parser.parse_args()

View file

@ -233,7 +233,6 @@ class _BaseAutoModelClass:
optimize_model = False
kwargs["modules_to_not_convert"] = ["lm_head"]
load_in_8bit = kwargs.pop("load_in_8bit", False)
from ipex_llm.llm_patching import bigdl_patched
if bigdl_patched == 'Train':
global patched_training_mode

View file

@ -117,7 +117,6 @@ class _BaseAutoModelClass:
# ignore following arguments
ignore_argument(kwargs, "model_hub")
ignore_argument(kwargs, "load_in_4bit")
ignore_argument(kwargs, "load_in_8bit")
ignore_argument(kwargs, "imatrix")
ignore_argument(kwargs, "cpu_embedding")
ignore_argument(kwargs, "embedding_qtype")