[NPU]Qwen2 groupwise performance opt (#12299)
* qwen2 gw performance opt * remove debug
This commit is contained in:
parent
41b8064554
commit
0763268e4c
1 changed files with 4 additions and 1 deletions
|
|
@ -229,6 +229,9 @@ class LowBitQwenMultiDecoderlayer(LLMBaseNNFactory):
|
||||||
new_value_states = self.convert_to_fp16(curr_key_values[i][1])
|
new_value_states = self.convert_to_fp16(curr_key_values[i][1])
|
||||||
|
|
||||||
print(f"{mode} start compiling")
|
print(f"{mode} start compiling")
|
||||||
|
if group_size != 0 and (mode == "prefill" or num_layers == 2):
|
||||||
|
self.compile(npu_dpu_groups=6)
|
||||||
|
else:
|
||||||
self.compile()
|
self.compile()
|
||||||
print(f"{mode} end compiling")
|
print(f"{mode} end compiling")
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue