[NPU] acclib llama3.2 support groupwise (#12355)
* change inter_pp * add comment
This commit is contained in:
parent
79f2877413
commit
d880e534d2
1 changed files with 11 additions and 1 deletions
|
|
@ -337,7 +337,17 @@ def optimize_llm(
|
|||
if intra_pp is None:
|
||||
intra_pp = 2
|
||||
if inter_pp is None:
|
||||
inter_pp = 2 if group_size == 0 else 8
|
||||
if group_size == 0:
|
||||
inter_pp = 2
|
||||
# llama3.2
|
||||
elif model.config.intermediate_size == 8192:
|
||||
# llama3.2 1b
|
||||
if model.config.hidden_size == 2048:
|
||||
inter_pp = 1
|
||||
else:
|
||||
inter_pp = 2
|
||||
else:
|
||||
inter_pp = 8
|
||||
convert_llama(model,
|
||||
max_output_len=max_context_len,
|
||||
max_prompt_len=max_prompt_len,
|
||||
|
|
|
|||
Loading…
Reference in a new issue