remove load_in_8bit usage as it is not supported a long time ago (#12779)
This commit is contained in:
parent
9e9b6c9f2b
commit
d0d9c9d636
7 changed files with 9 additions and 15 deletions
|
|
@ -1,5 +1,5 @@
|
|||
# Harness Evaluation
|
||||
[Harness evaluation](https://github.com/EleutherAI/lm-evaluation-harness) allows users to eaisly get accuracy on various datasets. Here we have enabled harness evaluation with IPEX-LLM under
|
||||
[Harness evaluation](https://github.com/EleutherAI/lm-evaluation-harness) allows users to eaisly get accuracy on various datasets. Here we have enabled harness evaluation with IPEX-LLM under
|
||||
[Open LLM Leaderboard](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard) settings.
|
||||
Before running, make sure to have [ipex-llm](../../../README.md) installed.
|
||||
|
||||
|
|
@ -53,21 +53,21 @@ AutoModelForCausalLM.from_pretrained = partial(AutoModelForCausalLM.from_pretrai
|
|||
```
|
||||
to the following codes to load the low bit models.
|
||||
```python
|
||||
class ModifiedAutoModelForCausalLM(AutoModelForCausalLM):
|
||||
class ModifiedAutoModelForCausalLM(AutoModelForCausalLM):
|
||||
@classmethod
|
||||
def load_low_bit(cls,*args,**kwargs):
|
||||
for k in ['load_in_low_bit', 'device_map', 'max_memory', 'load_in_8bit','load_in_4bit']:
|
||||
for k in ['load_in_low_bit', 'device_map', 'max_memory','load_in_4bit']:
|
||||
kwargs.pop(k)
|
||||
return super().load_low_bit(*args, **kwargs)
|
||||
|
||||
AutoModelForCausalLM.from_pretrained=partial(ModifiedAutoModelForCausalLM.load_low_bit, *self.bigdl_llm_kwargs)
|
||||
```
|
||||
### 2.Please pass the argument `trust_remote_code=True` to allow custom code to be run.
|
||||
`lm-evaluation-harness` doesn't pass `trust_remote_code=true` argument to datasets. This may cause errors similar to the following one:
|
||||
`lm-evaluation-harness` doesn't pass `trust_remote_code=true` argument to datasets. This may cause errors similar to the following one:
|
||||
```
|
||||
RuntimeError: Job config of task=winogrande, precision=sym_int4 failed.
|
||||
RuntimeError: Job config of task=winogrande, precision=sym_int4 failed.
|
||||
Error Message: The repository for winogrande contains custom code which must be executed to correctly load the dataset. You can inspect the repository content at https://hf.co/datasets/winogrande.
|
||||
please pass the argument trust_remote_code=True to allow custom code to be run.
|
||||
please pass the argument trust_remote_code=True to allow custom code to be run.
|
||||
```
|
||||
Please refer to these:
|
||||
|
||||
|
|
|
|||
|
|
@ -3,7 +3,6 @@ base_model: meta-llama/Meta-Llama-3-8B
|
|||
model_type: AutoModelForCausalLM
|
||||
tokenizer_type: AutoTokenizer
|
||||
|
||||
load_in_8bit: false
|
||||
load_in_4bit: true
|
||||
strict: false
|
||||
|
||||
|
|
|
|||
|
|
@ -3,7 +3,6 @@ base_model: NousResearch/Llama-2-7b-hf
|
|||
model_type: LlamaForCausalLM
|
||||
tokenizer_type: LlamaTokenizer
|
||||
|
||||
load_in_8bit: false
|
||||
load_in_4bit: true
|
||||
strict: false
|
||||
|
||||
|
|
|
|||
|
|
@ -4,7 +4,6 @@ model_type: LlamaForCausalLM
|
|||
tokenizer_type: LlamaTokenizer
|
||||
is_llama_derived_model: true
|
||||
|
||||
load_in_8bit: false
|
||||
load_in_4bit: true
|
||||
strict: false
|
||||
|
||||
|
|
|
|||
|
|
@ -312,7 +312,6 @@ def get_model_answers(
|
|||
torch_dtype=torch.float16,
|
||||
# torch_dtype=torch.float32,
|
||||
low_cpu_mem_usage=True,
|
||||
# load_in_8bit=True,
|
||||
total_token=args.total_token,
|
||||
depth=args.depth,
|
||||
top_k=args.top_k,
|
||||
|
|
@ -384,7 +383,7 @@ def get_model_answers(
|
|||
]
|
||||
if len(stop_token_ids_index) > 0:
|
||||
output_ids = output_ids[: stop_token_ids_index[0]]
|
||||
|
||||
|
||||
output = tokenizer.decode(
|
||||
output_ids,
|
||||
spaces_between_special_tokens=False,
|
||||
|
|
@ -572,8 +571,8 @@ if __name__ == "__main__":
|
|||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--enable-ipex-llm",
|
||||
action='store_true',
|
||||
"--enable-ipex-llm",
|
||||
action='store_true',
|
||||
help="Enable ipex-llm optimization"
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
|
|
|||
|
|
@ -233,7 +233,6 @@ class _BaseAutoModelClass:
|
|||
optimize_model = False
|
||||
kwargs["modules_to_not_convert"] = ["lm_head"]
|
||||
|
||||
load_in_8bit = kwargs.pop("load_in_8bit", False)
|
||||
from ipex_llm.llm_patching import bigdl_patched
|
||||
if bigdl_patched == 'Train':
|
||||
global patched_training_mode
|
||||
|
|
|
|||
|
|
@ -117,7 +117,6 @@ class _BaseAutoModelClass:
|
|||
# ignore following arguments
|
||||
ignore_argument(kwargs, "model_hub")
|
||||
ignore_argument(kwargs, "load_in_4bit")
|
||||
ignore_argument(kwargs, "load_in_8bit")
|
||||
ignore_argument(kwargs, "imatrix")
|
||||
ignore_argument(kwargs, "cpu_embedding")
|
||||
ignore_argument(kwargs, "embedding_qtype")
|
||||
|
|
|
|||
Loading…
Reference in a new issue