Yuwen Hu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								872a74481a 
								
							 
						 
						
							
							
								
								Small optimization to glm4 models ( #12351 )  
							
							 
							
							
							
						 
						
							2024-11-06 19:16:58 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								c267355b35 
								
							 
						 
						
							
							
								
								fix three NPU benchmark issues ( #12350 )  
							
							 
							
							... 
							
							
							
							* fix three issues
* limit mixed_precision for CW only 
							
						 
						
							2024-11-06 19:01:01 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yina Chen 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								f24352aef9 
								
							 
						 
						
							
							
								
								llama 3.1/3.2 support compresskv ( #12347 )  
							
							 
							
							... 
							
							
							
							* llama 3.1/3.2 support compresskv
* update
* fix transformers 4.45 error
* fix style
* fix typo
* disable llama3.2 1b compresskv 
							
						 
						
							2024-11-06 17:33:43 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jin, Qiao 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								d984c0672a 
								
							 
						 
						
							
							
								
								Add MiniCPM-V-2_6 to arc perf test ( #12349 )  
							
							 
							
							
							
						 
						
							2024-11-06 16:32:28 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								e23ef7d088 
								
							 
						 
						
							
							
								
								optimize glm4v's vision part ( #12346 )  
							
							 
							
							
							
						 
						
							2024-11-06 15:43:40 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								c8b7265359 
								
							 
						 
						
							
							
								
								Add basic glm4v support ( #12345 )  
							
							 
							
							
							
						 
						
							2024-11-06 13:50:10 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								69e3a56943 
								
							 
						 
						
							
							
								
								[NPU] Hot fix of load_low_bit ( #12344 )  
							
							 
							
							
							
						 
						
							2024-11-06 10:07:00 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xu, Shuo 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								899a30331a 
								
							 
						 
						
							
							
								
								Replace gradio_web_server.patch to adjust webui ( #12329 )  
							
							 
							
							... 
							
							
							
							* replace gradio_web_server.patch to adjust webui
* fix patch problem
---------
Co-authored-by: ATMxsp01 <shou.xu@intel.com> 
							
						 
						
							2024-11-06 09:16:32 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jin, Qiao 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								7240c283a3 
								
							 
						 
						
							
							
								
								Add dummy model in iGPU perf ( #12341 )  
							
							 
							
							... 
							
							
							
							* Add dummy model in iGPU perf
* Add dummy model in iGPU perf
* Fix 
							
						 
						
							2024-11-05 17:56:10 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zhao Changmin 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								8e9a3a1158 
								
							 
						 
						
							
							
								
								fix chatglm2 cpu ut ( #12336 )  
							
							 
							
							
							
						 
						
							2024-11-05 16:43:57 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yina Chen 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								d872639395 
								
							 
						 
						
							
							
								
								[NPU] Llama3, Qwen2 1.5b, MiniCPM 1/2B groupwise support ( #12327 )  
							
							 
							
							... 
							
							
							
							* support minicpm 1b & qwen 1.5b gw
* support minicpm 1b
* support minicpm 2b
* fix style & error
* fix style & update
* remove print 
							
						 
						
							2024-11-05 15:51:31 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jin, Qiao 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								82a61b5cf3 
								
							 
						 
						
							
							
								
								Limit trl version in example ( #12332 )  
							
							 
							
							... 
							
							
							
							* Limit trl version in example
* Limit trl version in example 
							
						 
						
							2024-11-05 14:50:10 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								923d696854 
								
							 
						 
						
							
							
								
								Small fix to LNL performance tests ( #12333 )  
							
							 
							
							
							
						 
						
							2024-11-05 13:24:58 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zijie Li 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								45b0d371aa 
								
							 
						 
						
							
							
								
								update benchmark readme ( #12323 )  
							
							 
							
							... 
							
							
							
							* update benchmark readme
update new comment with memory usage included
* Update README.md 
							
						 
						
							2024-11-05 08:19:08 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								e2adc974fd 
								
							 
						 
						
							
							
								
								Small fix to LNL performance tests ( #12331 )  
							
							 
							
							
							
						 
						
							2024-11-04 19:22:41 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								522cdf8e9d 
								
							 
						 
						
							
							
								
								Add initial support for LNL nightly performance tests ( #12326 )  
							
							 
							
							... 
							
							
							
							* Add initial support for LNL nightly performance tests
* Small fix 
							
						 
						
							2024-11-04 18:53:51 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zhao Changmin 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								1b637e4477 
								
							 
						 
						
							
							
								
								Add chatglm2&3 fuse mlp ( #12328 )  
							
							 
							
							... 
							
							
							
							* add chatglm fuse mlp 
							
						 
						
							2024-11-04 18:04:41 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yina Chen 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								94c4ce389f 
								
							 
						 
						
							
							
								
								[NPU] Add env to disable compile opt ( #12330 )  
							
							 
							
							... 
							
							
							
							* add env to disable compile opt
* fix style
* fix style 
							
						 
						
							2024-11-04 17:46:17 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ch1y0q 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								e54af44ed6 
								
							 
						 
						
							
							
								
								Add transformers_int4_npu_pipeline_win in all-in-one benchmark ( #12325 )  
							
							 
							
							... 
							
							
							
							* add transformers_int4_npu_pipeline_win
* bugfix
* bugfix: wrong actual_output_len
* fix format
* bugfix & update `README.md` 
							
						 
						
							2024-11-04 16:00:20 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								5ee6f97d6f 
								
							 
						 
						
							
							
								
								[NPU L0] Add layernorm weight as const / input setting ( #12322 )  
							
							 
							
							
							
						 
						
							2024-11-04 15:46:24 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Chu,Youcheng 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								a01371f90b 
								
							 
						 
						
							
							
								
								Doc: update harness readme ( #12324 )  
							
							 
							
							
							
						 
						
							2024-11-04 14:58:54 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								4644cb640c 
								
							 
						 
						
							
							
								
								Perf test further fix regarding trl version ( #12321 )  
							
							 
							
							
							
						 
						
							2024-11-04 11:01:25 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								8fe01c9e4d 
								
							 
						 
						
							
							
								
								[NPU pipeline] update cmake usage of pipeline ( #12320 )  
							
							 
							
							
							
						 
						
							2024-11-04 10:30:03 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Kai Huang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								c8679ad592 
								
							 
						 
						
							
							
								
								Qwen layernorm as input ( #12309 )  
							
							 
							
							... 
							
							
							
							* qwen layernorm as input
* add group size 
							
						 
						
							2024-11-04 09:51:15 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								94ce447794 
								
							 
						 
						
							
							
								
								Fix performance tests regarding trl version ( #12319 )  
							
							 
							
							... 
							
							
							
							* Fix performance tests regarding trl version
* Small fix 
							
						 
						
							2024-11-04 09:42:18 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								20755e8077 
								
							 
						 
						
							
							
								
								Small fix to all-in-one benchmark scripts ( #12317 )  
							
							 
							
							
							
						 
						
							2024-11-01 19:16:25 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ch1y0q 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								48123af463 
								
							 
						 
						
							
							
								
								add npu_group_size for transformers_int4_npu_win in all-in-one benchmark api ( #12316 )  
							
							 
							
							... 
							
							
							
							* add `npu_group_size` for `transformers_int4_npu_win`
small bugfix
* update 
							
						 
						
							2024-11-01 18:44:27 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zijie Li 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								cd5e22cee5 
								
							 
						 
						
							
							
								
								Update Llava GPU Example ( #12311 )  
							
							 
							
							... 
							
							
							
							* update-llava-example
* add warmup
* small fix on llava example
* remove space& extra print prompt
* renew example
* small fix
---------
Co-authored-by: Jinhe Tang <jin.tang1337@gmail.com> 
							
						 
						
							2024-11-01 17:06:00 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								f53bb4ea0b 
								
							 
						 
						
							
							
								
								[NPU L0] Update 1st token generation ( #12314 )  
							
							 
							
							
							
						 
						
							2024-11-01 17:02:07 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								d409d9d0eb 
								
							 
						 
						
							
							
								
								[NPU L0] Update streaming mode of example ( #12312 )  
							
							 
							
							
							
						 
						
							2024-11-01 15:38:10 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jin, Qiao 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								126f95be80 
								
							 
						 
						
							
							
								
								Fix DPO finetuning example ( #12313 )  
							
							 
							
							
							
						 
						
							2024-11-01 13:29:44 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yina Chen 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								05c5d0267a 
								
							 
						 
						
							
							
								
								[NPU] Llama2 prefill use ov sdp ( #12310 )  
							
							 
							
							... 
							
							
							
							* prefill use sdp
* add param
* update
* fix style
* fix style
* meet comments 
							
						 
						
							2024-11-01 11:05:20 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								eda764909c 
								
							 
						 
						
							
							
								
								Add minicpm-2b in L0 pipeline ( #12308 )  
							
							 
							
							
							
						 
						
							2024-11-01 09:30:01 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								b9853f98b3 
								
							 
						 
						
							
							
								
								fix qwen2 attention_mask slice ( #12307 )  
							
							 
							
							
							
						 
						
							2024-10-31 17:00:05 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jin, Qiao 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								3df6195cb0 
								
							 
						 
						
							
							
								
								Fix application quickstart ( #12305 )  
							
							 
							
							... 
							
							
							
							* fix graphrag quickstart
* fix axolotl quickstart
* fix ragflow quickstart
* fix ragflow quickstart
* fix graphrag toc
* fix comments
* fix comment
* fix comments 
							
						 
						
							2024-10-31 16:57:35 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								4892df61c9 
								
							 
						 
						
							
							
								
								Add qwen2-1.5b in l0 pipeline example ( #12306 )  
							
							 
							
							
							
						 
						
							2024-10-31 16:44:25 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jinhe 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								30f668c206 
								
							 
						 
						
							
							
								
								updated transformers & accelerate requirements ( #12301 )  
							
							 
							
							
							
						 
						
							2024-10-31 15:59:40 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xin Qiu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								97a0f7fd35 
								
							 
						 
						
							
							
								
								Codegeex support ( #12303 )  
							
							 
							
							... 
							
							
							
							* new codegeex attn
* use kv cache
* add compress/quantize kv
* remove compress/quantize kv
* fix style check
* fix style
* fix codegeex 
							
						 
						
							2024-10-31 15:28:56 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								72605c7016 
								
							 
						 
						
							
							
								
								fix llama3.1/3.2 quantize kv check ( #12302 )  
							
							 
							
							
							
						 
						
							2024-10-31 11:55:07 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Kai Huang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								416c19165c 
								
							 
						 
						
							
							
								
								Add Qwen pipeline and example ( #12292 )  
							
							 
							
							... 
							
							
							
							* support qwen pipeline
* update error msg
* style
* meet review
* minor 
							
						 
						
							2024-10-31 11:25:25 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Rahul Nair 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								4cf1ccc43a 
								
							 
						 
						
							
							
								
								Update DPO EADME.md ( #12162 )  
							
							 
							
							... 
							
							
							
							bitsanbytes multi backend is now available and is required , otherwise would error out saying that no cuda is available 
							
						 
						
							2024-10-31 10:56:46 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Chu,Youcheng 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								29400e2e75 
								
							 
						 
						
							
							
								
								feat: change oneccl to internal ( #12296 )  
							
							 
							
							... 
							
							
							
							* feat: change oneccl
* fix: restore llama-70b
* fix: remove tab
* fix: remove extra blank
* small fix
* add comments
* fix: add a blank space 
							
						 
						
							2024-10-31 09:51:43 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zijie Li 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								6f22133efc 
								
							 
						 
						
							
							
								
								Update AWQ and GPTQ GPU example ( #12300 )  
							
							 
							
							
							
						 
						
							2024-10-31 09:35:31 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yina Chen 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								0763268e4c 
								
							 
						 
						
							
							
								
								[NPU]Qwen2 groupwise performance opt ( #12299 )  
							
							 
							
							... 
							
							
							
							* qwen2 gw performance opt
* remove debug 
							
						 
						
							2024-10-30 17:40:21 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								41b8064554 
								
							 
						 
						
							
							
								
								Support minicpm-1B in level0 pipeline ( #12297 )  
							
							 
							
							
							
						 
						
							2024-10-30 17:21:47 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jinhe 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								46d8300f6b 
								
							 
						 
						
							
							
								
								bugfix for qlora finetuning on GPU ( #12298 )  
							
							 
							
							... 
							
							
							
							* bugfix for qlora 100 step error
* indent fix
* annotation fix 
							
						 
						
							2024-10-30 16:54:10 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yina Chen 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								70037ad55f 
								
							 
						 
						
							
							
								
								Groupwise prefill optimization ( #12291 )  
							
							 
							
							... 
							
							
							
							* except lm_head
* remove
* support gw lm_head
* update
* fix
* remove run.bat
* fix style
* support llama3
* slice -> split
* remove debug
* fix style
* add dpu 
							
						 
						
							2024-10-30 14:59:45 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								540eaeb12c 
								
							 
						 
						
							
							
								
								refactor attention_softmax ( #12295 )  
							
							 
							
							
							
						 
						
							2024-10-30 13:20:50 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								2b2cb9c693 
								
							 
						 
						
							
							
								
								[NPU pipeline] Support save & load and update examples ( #12293 )  
							
							 
							
							... 
							
							
							
							* support save & load, update llama examples
* update baichuan2 example
* update readme 
							
						 
						
							2024-10-30 10:02:00 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								5a15098835 
								
							 
						 
						
							
							
								
								Initial support for quantized forward on CPU when quantization_group_size=0 ( #12282 )  
							
							 
							
							... 
							
							
							
							* Initial support for quantized forward on CPU when quantization_group_size=0
* Style fix
* Style fix
* Small fix
* Small fix 
							
						 
						
							2024-10-29 19:40:17 +08:00