[NPU]Qwen2 groupwise performance opt (#12299)

* qwen2 gw performance opt

* remove debug
This commit is contained in:
Yina Chen 2024-10-30 11:40:21 +02:00 committed by GitHub
parent 41b8064554
commit 0763268e4c
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -229,6 +229,9 @@ class LowBitQwenMultiDecoderlayer(LLMBaseNNFactory):
new_value_states = self.convert_to_fp16(curr_key_values[i][1]) new_value_states = self.convert_to_fp16(curr_key_values[i][1])
print(f"{mode} start compiling") print(f"{mode} start compiling")
if group_size != 0 and (mode == "prefill" or num_layers == 2):
self.compile(npu_dpu_groups=6)
else:
self.compile() self.compile()
print(f"{mode} end compiling") print(f"{mode} end compiling")