[NPU] acclib llama3.2 support groupwise (#12355)

* change inter_pp

* add comment
This commit is contained in:
Yina Chen 2024-11-07 05:19:55 +02:00 committed by GitHub
parent 79f2877413
commit d880e534d2
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -337,7 +337,17 @@ def optimize_llm(
if intra_pp is None: if intra_pp is None:
intra_pp = 2 intra_pp = 2
if inter_pp is None: if inter_pp is None:
inter_pp = 2 if group_size == 0 else 8 if group_size == 0:
inter_pp = 2
# llama3.2
elif model.config.intermediate_size == 8192:
# llama3.2 1b
if model.config.hidden_size == 2048:
inter_pp = 1
else:
inter_pp = 2
else:
inter_pp = 8
convert_llama(model, convert_llama(model,
max_output_len=max_context_len, max_output_len=max_context_len,
max_prompt_len=max_prompt_len, max_prompt_len=max_prompt_len,