[NPU] acclib llama3.2 support groupwise (#12355)
* change inter_pp * add comment
This commit is contained in:
parent
79f2877413
commit
d880e534d2
1 changed files with 11 additions and 1 deletions
|
|
@ -337,7 +337,17 @@ def optimize_llm(
|
||||||
if intra_pp is None:
|
if intra_pp is None:
|
||||||
intra_pp = 2
|
intra_pp = 2
|
||||||
if inter_pp is None:
|
if inter_pp is None:
|
||||||
inter_pp = 2 if group_size == 0 else 8
|
if group_size == 0:
|
||||||
|
inter_pp = 2
|
||||||
|
# llama3.2
|
||||||
|
elif model.config.intermediate_size == 8192:
|
||||||
|
# llama3.2 1b
|
||||||
|
if model.config.hidden_size == 2048:
|
||||||
|
inter_pp = 1
|
||||||
|
else:
|
||||||
|
inter_pp = 2
|
||||||
|
else:
|
||||||
|
inter_pp = 8
|
||||||
convert_llama(model,
|
convert_llama(model,
|
||||||
max_output_len=max_context_len,
|
max_output_len=max_context_len,
|
||||||
max_prompt_len=max_prompt_len,
|
max_prompt_len=max_prompt_len,
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue