From ae7302a654a85006974549450bfe51b7275294af Mon Sep 17 00:00:00 2001 From: "Chu,Youcheng" Date: Fri, 30 Aug 2024 13:43:48 +0800 Subject: [PATCH] add gptq option for ppl test (#11921) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat:add gptq for ppl * fix: add an empty line * fix: add an empty line * fix: remove an empty line * Resolve comments * Resolve comments * Resolve comments --- .../dev/benchmark/perplexity/run_wikitext.py | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/python/llm/dev/benchmark/perplexity/run_wikitext.py b/python/llm/dev/benchmark/perplexity/run_wikitext.py index 92426a86..061c87ba 100644 --- a/python/llm/dev/benchmark/perplexity/run_wikitext.py +++ b/python/llm/dev/benchmark/perplexity/run_wikitext.py @@ -38,12 +38,24 @@ args = parser.parse_args() if args.precision == "fp16": # ipex fp16 from transformers import AutoModelForCausalLM - model = AutoModelForCausalLM.from_pretrained(args.model_path, use_cache=args.use_cache, trust_remote_code=True) + model = AutoModelForCausalLM.from_pretrained(args.model_path, + use_cache=args.use_cache, + trust_remote_code=True) model = model.half() +elif 'gptq' in args.model_path.lower(): # ipex-llm gptq + from ipex_llm.transformers import AutoModelForCausalLM + model = AutoModelForCausalLM.from_pretrained(args.model_path, + load_in_4bit=True, + torch_dtype=torch.float, + use_cache=args.use_cache, + trust_remote_code=True) else: # ipex-llm from ipex_llm.transformers import AutoModelForCausalLM - model = AutoModelForCausalLM.from_pretrained(args.model_path, load_in_low_bit=args.precision, - use_cache=args.use_cache, trust_remote_code=True, mixed_precision= args.mixed_precision) + model = AutoModelForCausalLM.from_pretrained(args.model_path, + load_in_low_bit=args.precision, + use_cache=args.use_cache, + trust_remote_code=True, + mixed_precision=args.mixed_precision) model = model.half() model = model.to(args.device) model = model.eval()