Fix the not stop issue of llama3 examples (#10860)

* fix not stop issue in GPU/HF-Transformers-AutoModels

* fix not stop issue in GPU/PyTorch-Models/Model/llama3

* fix not stop issue in CPU/HF-Transformers-AutoModels/Model/llama3

* fix not stop issue in CPU/PyTorch-Models/Model/llama3

* update the output in readme

* update format

* add reference

* update prompt format

* update output format in readme

* update example output in readme
This commit is contained in:
hxsz1997 2024-04-23 19:10:09 +08:00 committed by GitHub
parent 5c9eb5d0f5
commit 328b1a1de9
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 70 additions and 28 deletions

View file

@ -57,12 +57,16 @@ numactl -C 0-47 -m 0 python ./generate.py
Inference time: xxxx s
-------------------- Prompt --------------------
<|begin_of_text|><|start_header_id|>user<|end_header_id|>
What is AI?<|eot_id|><|start_header_id|>assistant<|end_header_id|>
-------------------- Output (skip_special_tokens=False) --------------------
<|begin_of_text|><|start_header_id|>user<|end_header_id|>
What is AI?<|eot_id|><|start_header_id|>assistant<|end_header_id|>
A question that gets to the heart of the 21st century!
Artificial Intelligence (AI) refers to the development of computer systems that can perform tasks that
What is AI?<|eot_id|><|start_header_id|>assistant<|end_header_id|>
Artificial Intelligence (AI) refers to the development of computer systems that can perform tasks that would typically require human intelligence, such as:
1. Learning: AI
```

View file

@ -31,13 +31,13 @@ def get_prompt(user_input: str, chat_history: list[tuple[str, str]],
prompt_texts = [f'<|begin_of_text|>']
if system_prompt != '':
prompt_texts.append(f'<|start_header_id|>system<|end_header_id|>\n{system_prompt}<|eot_id|>')
prompt_texts.append(f'<|start_header_id|>system<|end_header_id|>\n\n{system_prompt}<|eot_id|>')
for history_input, history_response in chat_history:
prompt_texts.append(f'<|start_header_id|>user<|end_header_id|>\n{history_input.strip()}<|eot_id|>')
prompt_texts.append(f'<|start_header_id|>assistant<|end_header_id|>\n{history_response.strip()}<|eot_id|>')
prompt_texts.append(f'<|start_header_id|>user<|end_header_id|>\n\n{history_input.strip()}<|eot_id|>')
prompt_texts.append(f'<|start_header_id|>assistant<|end_header_id|>\n\n{history_response.strip()}<|eot_id|>')
prompt_texts.append(f'<|start_header_id|>user<|end_header_id|>\n{user_input.strip()}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n')
prompt_texts.append(f'<|start_header_id|>user<|end_header_id|>\n\n{user_input.strip()}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n')
return ''.join(prompt_texts)
if __name__ == '__main__':
@ -63,6 +63,12 @@ if __name__ == '__main__':
# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
# here the terminators refer to https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct#transformers-automodelforcausallm
terminators = [
tokenizer.eos_token_id,
tokenizer.convert_tokens_to_ids("<|eot_id|>"),
]
# Generate predicted tokens
with torch.inference_mode():
@ -70,6 +76,7 @@ if __name__ == '__main__':
input_ids = tokenizer.encode(prompt, return_tensors="pt")
st = time.time()
output = model.generate(input_ids,
eos_token_id=terminators,
max_new_tokens=args.n_predict)
end = time.time()
output_str = tokenizer.decode(output[0], skip_special_tokens=False)

View file

@ -57,12 +57,16 @@ In the example, several arguments can be passed to satisfy your requirements:
Inference time: xxxx s
-------------------- Prompt --------------------
<|begin_of_text|><|start_header_id|>user<|end_header_id|>
What is AI?<|eot_id|><|start_header_id|>assistant<|end_header_id|>
-------------------- Output (skip_special_tokens=False) --------------------
<|begin_of_text|><|start_header_id|>user<|end_header_id|>
What is AI?<|eot_id|><|start_header_id|>assistant<|end_header_id|>
A question that gets to the heart of the 21st century!
Artificial Intelligence (AI) refers to the development of computer systems that can perform tasks that
What is AI?<|eot_id|><|start_header_id|>assistant<|end_header_id|>
Artificial Intelligence (AI) refers to the development of computer systems that can perform tasks that would typically require human intelligence, such as:
1. Learning: AI
```

View file

@ -31,13 +31,13 @@ def get_prompt(user_input: str, chat_history: list[tuple[str, str]],
prompt_texts = [f'<|begin_of_text|>']
if system_prompt != '':
prompt_texts.append(f'<|start_header_id|>system<|end_header_id|>\n{system_prompt}<|eot_id|>')
prompt_texts.append(f'<|start_header_id|>system<|end_header_id|>\n\n{system_prompt}<|eot_id|>')
for history_input, history_response in chat_history:
prompt_texts.append(f'<|start_header_id|>user<|end_header_id|>\n{history_input.strip()}<|eot_id|>')
prompt_texts.append(f'<|start_header_id|>assistant<|end_header_id|>\n{history_response.strip()}<|eot_id|>')
prompt_texts.append(f'<|start_header_id|>user<|end_header_id|>\n\n{history_input.strip()}<|eot_id|>')
prompt_texts.append(f'<|start_header_id|>assistant<|end_header_id|>\n\n{history_response.strip()}<|eot_id|>')
prompt_texts.append(f'<|start_header_id|>user<|end_header_id|>\n{user_input.strip()}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n')
prompt_texts.append(f'<|start_header_id|>user<|end_header_id|>\n\n{user_input.strip()}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n')
return ''.join(prompt_texts)
if __name__ == '__main__':
@ -65,6 +65,12 @@ if __name__ == '__main__':
# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
# here the terminators refer to https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct#transformers-automodelforcausallm
terminators = [
tokenizer.eos_token_id,
tokenizer.convert_tokens_to_ids("<|eot_id|>"),
]
# Generate predicted tokens
with torch.inference_mode():
@ -72,6 +78,7 @@ if __name__ == '__main__':
input_ids = tokenizer.encode(prompt, return_tensors="pt")
st = time.time()
output = model.generate(input_ids,
eos_token_id=terminators,
max_new_tokens=args.n_predict)
end = time.time()
output_str = tokenizer.decode(output[0], skip_special_tokens=False)

View file

@ -125,12 +125,14 @@ Arguments info:
Inference time: xxxx s
-------------------- Prompt --------------------
<|begin_of_text|><|start_header_id|>user<|end_header_id|>
What is AI?<|eot_id|><|start_header_id|>assistant<|end_header_id|>
-------------------- Output (skip_special_tokens=False) --------------------
<|begin_of_text|><|start_header_id|>user<|end_header_id|>
What is AI?<|eot_id|><|start_header_id|>assistant<|end_header_id|>
A question that gets to the heart of the 21st century!
Artificial Intelligence (AI) refers to the development of computer systems that can perform tasks that
What is AI?<|eot_id|><|start_header_id|>assistant<|end_header_id|>
Artificial Intelligence (AI) refers to the development of computer systems that can perform tasks that would typically require human intelligence, such as learning, problem-solving, decision
```

View file

@ -31,13 +31,13 @@ def get_prompt(user_input: str, chat_history: list[tuple[str, str]],
prompt_texts = [f'<|begin_of_text|>']
if system_prompt != '':
prompt_texts.append(f'<|start_header_id|>system<|end_header_id|>\n{system_prompt}<|eot_id|>')
prompt_texts.append(f'<|start_header_id|>system<|end_header_id|>\n\n{system_prompt}<|eot_id|>')
for history_input, history_response in chat_history:
prompt_texts.append(f'<|start_header_id|>user<|end_header_id|>\n{history_input.strip()}<|eot_id|>')
prompt_texts.append(f'<|start_header_id|>assistant<|end_header_id|>\n{history_response.strip()}<|eot_id|>')
prompt_texts.append(f'<|start_header_id|>user<|end_header_id|>\n\n{history_input.strip()}<|eot_id|>')
prompt_texts.append(f'<|start_header_id|>assistant<|end_header_id|>\n\n{history_response.strip()}<|eot_id|>')
prompt_texts.append(f'<|start_header_id|>user<|end_header_id|>\n{user_input.strip()}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n')
prompt_texts.append(f'<|start_header_id|>user<|end_header_id|>\n\n{user_input.strip()}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n')
return ''.join(prompt_texts)
if __name__ == '__main__':
@ -67,17 +67,25 @@ if __name__ == '__main__':
# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
# here the terminators refer to https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct#transformers-automodelforcausallm
terminators = [
tokenizer.eos_token_id,
tokenizer.convert_tokens_to_ids("<|eot_id|>"),
]
# Generate predicted tokens
with torch.inference_mode():
prompt = get_prompt(args.prompt, [], system_prompt=DEFAULT_SYSTEM_PROMPT)
input_ids = tokenizer.encode(prompt, return_tensors="pt").to('xpu')
# ipex_llm model needs a warmup, then inference time can be accurate
output = model.generate(input_ids,
eos_token_id=terminators,
max_new_tokens=args.n_predict)
# start inference
st = time.time()
output = model.generate(input_ids,
eos_token_id=terminators,
max_new_tokens=args.n_predict)
torch.xpu.synchronize()
end = time.time()

View file

@ -126,12 +126,14 @@ In the example, several arguments can be passed to satisfy your requirements:
Inference time: xxxx s
-------------------- Prompt --------------------
<|begin_of_text|><|start_header_id|>user<|end_header_id|>
What is AI?<|eot_id|><|start_header_id|>assistant<|end_header_id|>
-------------------- Output (skip_special_tokens=False) --------------------
<|begin_of_text|><|start_header_id|>user<|end_header_id|>
What is AI?<|eot_id|><|start_header_id|>assistant<|end_header_id|>
A question that gets to the heart of the 21st century!
Artificial Intelligence (AI) refers to the development of computer systems that can perform tasks that
What is AI?<|eot_id|><|start_header_id|>assistant<|end_header_id|>
Artificial Intelligence (AI) refers to the development of computer systems that can perform tasks that would typically require human intelligence, such as learning, problem-solving, decision
```

View file

@ -31,13 +31,13 @@ def get_prompt(user_input: str, chat_history: list[tuple[str, str]],
prompt_texts = [f'<|begin_of_text|>']
if system_prompt != '':
prompt_texts.append(f'<|start_header_id|>system<|end_header_id|>\n{system_prompt}<|eot_id|>')
prompt_texts.append(f'<|start_header_id|>system<|end_header_id|>\n\n{system_prompt}<|eot_id|>')
for history_input, history_response in chat_history:
prompt_texts.append(f'<|start_header_id|>user<|end_header_id|>\n{history_input.strip()}<|eot_id|>')
prompt_texts.append(f'<|start_header_id|>assistant<|end_header_id|>\n{history_response.strip()}<|eot_id|>')
prompt_texts.append(f'<|start_header_id|>user<|end_header_id|>\n\n{history_input.strip()}<|eot_id|>')
prompt_texts.append(f'<|start_header_id|>assistant<|end_header_id|>\n\n{history_response.strip()}<|eot_id|>')
prompt_texts.append(f'<|start_header_id|>user<|end_header_id|>\n{user_input.strip()}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n')
prompt_texts.append(f'<|start_header_id|>user<|end_header_id|>\n\n{user_input.strip()}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n')
return ''.join(prompt_texts)
if __name__ == '__main__':
@ -69,6 +69,12 @@ if __name__ == '__main__':
# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
# here the terminators refer to https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct#transformers-automodelforcausallm
terminators = [
tokenizer.eos_token_id,
tokenizer.convert_tokens_to_ids("<|eot_id|>"),
]
# Generate predicted tokens
with torch.inference_mode():
@ -76,11 +82,13 @@ if __name__ == '__main__':
input_ids = tokenizer.encode(prompt, return_tensors="pt").to('xpu')
# ipex_llm model needs a warmup, then inference time can be accurate
output = model.generate(input_ids,
eos_token_id=terminators,
max_new_tokens=args.n_predict)
# start inference
st = time.time()
output = model.generate(input_ids,
eos_token_id=terminators,
max_new_tokens=args.n_predict)
torch.xpu.synchronize()
end = time.time()