Update npu example and all in one benckmark (#11766)

This commit is contained in:
Jin, Qiao 2024-08-12 16:46:46 +08:00 committed by GitHub
parent 57d177738d
commit 05989ad0f9
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 10 additions and 8 deletions

View file

@ -580,15 +580,16 @@ def transformers_int4_npu_win(repo_id,
# which convert the relevant layers in the model into INT4 format # which convert the relevant layers in the model into INT4 format
st = time.perf_counter() st = time.perf_counter()
if repo_id in CHATGLM_IDS: if repo_id in CHATGLM_IDS:
model = AutoModel.from_pretrained(model_path, load_in_low_bit=low_bit, trust_remote_code=True, torch_dtype='auto').eval() model = AutoModel.from_pretrained(model_path, load_in_low_bit=low_bit, trust_remote_code=True,
torch_dtype='auto', attn_implementation="eager").eval()
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True) tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
elif repo_id in LLAMA_IDS: elif repo_id in LLAMA_IDS:
model = AutoModelForCausalLM.from_pretrained(model_path, load_in_low_bit=low_bit, trust_remote_code=True, model = AutoModelForCausalLM.from_pretrained(model_path, load_in_low_bit=low_bit, trust_remote_code=True,
use_cache=True).eval() use_cache=True, attn_implementation="eager").eval()
tokenizer = LlamaTokenizer.from_pretrained(model_path, trust_remote_code=True) tokenizer = LlamaTokenizer.from_pretrained(model_path, trust_remote_code=True)
else: else:
model = AutoModelForCausalLM.from_pretrained(model_path, load_in_low_bit=low_bit, trust_remote_code=True, model = AutoModelForCausalLM.from_pretrained(model_path, load_in_low_bit=low_bit, trust_remote_code=True,
use_cache=True).eval() use_cache=True, attn_implementation="eager").eval()
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True) tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
end = time.perf_counter() end = time.perf_counter()
load_time = end - st load_time = end - st

View file

@ -29,11 +29,11 @@ In the example [generate.py](./generate.py), we show a basic use case for a Llam
#### 1.1 Installation on Windows #### 1.1 Installation on Windows
We suggest using conda to manage environment: We suggest using conda to manage environment:
```bash ```bash
conda create -n llm python=3.10 libuv conda create -n llm python=3.10
conda activate llm conda activate llm
# below command will install intel_extension_for_pytorch==2.1.10+xpu as default # install ipex-llm with 'all' option
pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ pip install --pre --upgrade ipex-llm[all]
# below command will install intel_npu_acceleration_library # below command will install intel_npu_acceleration_library
pip install intel-npu-acceleration-library==1.3 pip install intel-npu-acceleration-library==1.3

View file

@ -24,7 +24,7 @@ from transformers import AutoTokenizer
if __name__ == '__main__': if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Predict Tokens using `generate()` API for npu model') parser = argparse.ArgumentParser(description='Predict Tokens using `generate()` API for npu model')
parser.add_argument('--repo-id-or-model-path', type=str, default="D:\llm-models\Llama-2-7b-chat-hf", parser.add_argument('--repo-id-or-model-path', type=str, default="meta-llama/Llama-2-7b-chat-hf",
help='The huggingface repo id for the Llama2 model to be downloaded' help='The huggingface repo id for the Llama2 model to be downloaded'
', or the path to the huggingface checkpoint folder') ', or the path to the huggingface checkpoint folder')
parser.add_argument('--prompt', type=str, default="Once upon a time, there existed a little girl who liked to have adventures. She wanted to go to places and meet new people, and have fun", parser.add_argument('--prompt', type=str, default="Once upon a time, there existed a little girl who liked to have adventures. She wanted to go to places and meet new people, and have fun",
@ -40,7 +40,8 @@ if __name__ == '__main__':
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True) tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(model_path, trust_remote_code=True, model = AutoModelForCausalLM.from_pretrained(model_path, trust_remote_code=True,
load_in_low_bit=args.load_in_low_bit) load_in_low_bit=args.load_in_low_bit,
attn_implementation="eager")
print(model) print(model)