Zhao Changmin 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								b9c66994a5 
								
							 
						 
						
							
							
								
								add npu sdp ( #11562 )  
							
							 
							
							
							
						 
						
							2024-07-11 16:57:35 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								2b8ad8731e 
								
							 
						 
						
							
							
								
								Support pipeline parallel for glm-4v ( #11545 )  
							
							 
							
							
							
						 
						
							2024-07-11 16:06:06 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xiangyu Tian 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								7f5111a998 
								
							 
						 
						
							
							
								
								LLM: Refine start script for Pipeline Parallel Serving ( #11557 )  
							
							 
							
							... 
							
							
							
							Refine start script and readme for Pipeline Parallel Serving 
							
						 
						
							2024-07-11 15:45:27 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xu, Shuo 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								1355b2ce06 
								
							 
						 
						
							
							
								
								Add model Qwen-VL-Chat to iGPU-perf ( #11558 )  
							
							 
							
							... 
							
							
							
							* Add model Qwen-VL-Chat to iGPU-perf
* small fix
---------
Co-authored-by: ATMxsp01 <shou.xu@intel.com> 
							
						 
						
							2024-07-11 15:39:02 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zhao Changmin 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								105e124752 
								
							 
						 
						
							
							
								
								optimize phi3-v encoder npu performance and add multimodal example ( #11553 )  
							
							 
							
							... 
							
							
							
							* phi3-v
* readme 
							
						 
						
							2024-07-11 13:59:14 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Cengguang Zhang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								70ab1a6f1a 
								
							 
						 
						
							
							
								
								LLM: unify memory optimization env variables. ( #11549 )  
							
							 
							
							... 
							
							
							
							* LLM: unify memory optimization env variables.
* fix comments. 
							
						 
						
							2024-07-11 11:01:28 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xu, Shuo 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								028ad4f63c 
								
							 
						 
						
							
							
								
								Add model phi-3-vision-128k-instruct to iGPU-perf benchmark ( #11554 )  
							
							 
							
							... 
							
							
							
							* try to improve MIniCPM performance
* Add model phi-3-vision-128k-instruct to iGPU-perf benchmark
---------
Co-authored-by: ATMxsp01 <shou.xu@intel.com> 
							
						 
						
							2024-07-10 17:26:30 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								994e49a510 
								
							 
						 
						
							
							
								
								optimize internlm xcomposser performance again ( #11551 )  
							
							 
							
							
							
						 
						
							2024-07-10 17:08:56 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xu, Shuo 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								61613b210c 
								
							 
						 
						
							
							
								
								try to improve MIniCPM performance ( #11552 )  
							
							 
							
							... 
							
							
							
							Co-authored-by: ATMxsp01 <shou.xu@intel.com> 
							
						 
						
							2024-07-10 16:58:23 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								82f9514303 
								
							 
						 
						
							
							
								
								optimize internlm xcomposer2 performance ( #11550 )  
							
							 
							
							
							
						 
						
							2024-07-10 15:57:04 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zhao Changmin 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								3c16c9f725 
								
							 
						 
						
							
							
								
								Optimize baichuan on NPU ( #11548 )  
							
							 
							
							... 
							
							
							
							* baichuan_npu 
							
						 
						
							2024-07-10 13:18:48 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								8982ab73d5 
								
							 
						 
						
							
							
								
								Add Yi-6B and StableLM to iGPU perf test ( #11546 )  
							
							 
							
							... 
							
							
							
							* Add transformer4.38.2 test to igpu benchmark (#11529 )
* add transformer4.38.1 test to igpu benchmark
* use transformers4.38.2 & fix csv name error in 4.38 workflow
* add model Yi-6B-Chat & remove temporarily most models
---------
Co-authored-by: ATMxsp01 <shou.xu@intel.com>
* filter some errorlevel (#11541 )
Co-authored-by: ATMxsp01 <shou.xu@intel.com>
* Restore the temporarily removed models in iGPU-perf (#11544 )
* filter some errorlevel
* restore the temporarily removed models in iGPU-perf
---------
Co-authored-by: ATMxsp01 <shou.xu@intel.com>
---------
Co-authored-by: Xu, Shuo <100334393+ATMxsp01@users.noreply.github.com>
Co-authored-by: ATMxsp01 <shou.xu@intel.com> 
							
						 
						
							2024-07-09 18:51:23 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								7dc6756d86 
								
							 
						 
						
							
							
								
								add disk embedding ( #11543 )  
							
							 
							
							
							
						 
						
							2024-07-09 17:38:40 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zhao Changmin 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								76a5802acf 
								
							 
						 
						
							
							
								
								update NPU examples ( #11540 )  
							
							 
							
							... 
							
							
							
							* update NPU examples 
							
						 
						
							2024-07-09 17:19:42 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								99b2802d3b 
								
							 
						 
						
							
							
								
								optimize qewn2 memory ( #11535 )  
							
							 
							
							
							
						 
						
							2024-07-09 17:14:01 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								2929eb262e 
								
							 
						 
						
							
							
								
								support npu glm4 ( #11539 )  
							
							 
							
							
							
						 
						
							2024-07-09 15:46:49 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xiangyu Tian 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								a1cede926d 
								
							 
						 
						
							
							
								
								Fix update_kv_cache in Pipeline-Parallel-Serving for glm4-9b model ( #11537 )  
							
							 
							
							
							
						 
						
							2024-07-09 14:08:04 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Cengguang Zhang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								fa81dbefd3 
								
							 
						 
						
							
							
								
								LLM: update multi gpu write csv in all-in-one benchmark. ( #11538 )  
							
							 
							
							
							
						 
						
							2024-07-09 11:14:17 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xin Qiu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								69701b3ec8 
								
							 
						 
						
							
							
								
								fix typo in python/llm/scripts/README.md ( #11536 )  
							
							 
							
							
							
						 
						
							2024-07-09 09:53:14 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jason Dai 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								099486afb7 
								
							 
						 
						
							
							
								
								Update README.md ( #11530 )  
							
							 
							
							
							
						 
						
							2024-07-08 20:18:41 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								66f6ffe4b2 
								
							 
						 
						
							
							
								
								Update GPU HF-Transformers example structure ( #11526 )  
							
							 
							
							
							
						 
						
							2024-07-08 17:58:06 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xu, Shuo 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								f9a199900d 
								
							 
						 
						
							
							
								
								add model RWKV/v5-Eagle-7B-HF to igpu benchmark ( #11528 )  
							
							 
							
							... 
							
							
							
							Co-authored-by: ATMxsp01 <shou.xu@intel.com> 
							
						 
						
							2024-07-08 15:50:16 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Shaojun Liu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								9b37ca6027 
								
							 
						 
						
							
							
								
								remove ( #11527 )  
							
							 
							
							
							
						 
						
							2024-07-08 15:49:52 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								c26651f91f 
								
							 
						 
						
							
							
								
								add mistral npu support ( #11523 )  
							
							 
							
							
							
						 
						
							2024-07-08 13:17:15 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jun Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								5a57e54400 
								
							 
						 
						
							
							
								
								[ADD] add 5 new models for igpu-perf ( #11524 )  
							
							 
							
							
							
						 
						
							2024-07-08 11:12:15 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xu, Shuo 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								64cfed602d 
								
							 
						 
						
							
							
								
								Add new models to benchmark ( #11505 )  
							
							 
							
							... 
							
							
							
							* Add new models to benchmark
* remove Qwen/Qwen-VL-Chat to pass the validation
---------
Co-authored-by: ATMxsp01 <shou.xu@intel.com> 
							
						 
						
							2024-07-08 10:35:55 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								252426793b 
								
							 
						 
						
							
							
								
								Fix setting of use_quantize_kv_cache on different GPU in pipeline parallel ( #11516 )  
							
							 
							
							
							
						 
						
							2024-07-08 09:27:01 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								7cb09a8eac 
								
							 
						 
						
							
							
								
								optimize qwen2 memory usage again ( #11520 )  
							
							 
							
							
							
						 
						
							2024-07-05 17:32:34 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								8f376e5192 
								
							 
						 
						
							
							
								
								Change igpu perf to mainly test int4+fp16 ( #11513 )  
							
							 
							
							
							
						 
						
							2024-07-05 17:12:33 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jun Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								1efb6ebe93 
								
							 
						 
						
							
							
								
								[ADD] add transformer_int4_fp16_loadlowbit_gpu_win api ( #11511 )  
							
							 
							
							... 
							
							
							
							* [ADD] add transformer_int4_fp16_loadlowbit_gpu_win api
* [UPDATE] add int4_fp16_lowbit config and description
* [FIX] fix run.py mistake
* [FIX] fix run.py mistake
* [FIX] fix indent; change dtype=float16 to model.half() 
							
						 
						
							2024-07-05 16:38:41 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zhao Changmin 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								f7e957aaf9 
								
							 
						 
						
							
							
								
								Clean npu dtype branch ( #11515 )  
							
							 
							
							... 
							
							
							
							* clean branch
* create_npu_kernels 
							
						 
						
							2024-07-05 15:45:26 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								14ce058004 
								
							 
						 
						
							
							
								
								add chatglm3 npu support ( #11518 )  
							
							 
							
							
							
						 
						
							2024-07-05 15:31:27 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xin Qiu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								a31f2cbe13 
								
							 
						 
						
							
							
								
								update minicpm.py ( #11517 )  
							
							 
							
							... 
							
							
							
							* update minicpm
* meet code review 
							
						 
						
							2024-07-05 15:25:44 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zhao Changmin 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								24de13fc45 
								
							 
						 
						
							
							
								
								Optimize stablelm on NPU ( #11512 )  
							
							 
							
							... 
							
							
							
							* stablelm_optimize 
							
						 
						
							2024-07-05 14:21:57 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xiangyu Tian 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								7d8bc83415 
								
							 
						 
						
							
							
								
								LLM: Partial Prefilling for Pipeline Parallel Serving ( #11457 )  
							
							 
							
							... 
							
							
							
							LLM: Partial Prefilling for Pipeline Parallel Serving 
							
						 
						
							2024-07-05 13:10:35 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								60de428b37 
								
							 
						 
						
							
							
								
								Support pipeline parallel for qwen-vl ( #11503 )  
							
							 
							
							
							
						 
						
							2024-07-04 18:03:57 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zhao Changmin 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								57b8adb189 
								
							 
						 
						
							
							
								
								[WIP] Support npu load_low_bit method ( #11502 )  
							
							 
							
							... 
							
							
							
							* npu_load_low_bit 
							
						 
						
							2024-07-04 17:15:34 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jun Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								f07937945f 
								
							 
						 
						
							
							
								
								[REMOVE] remove all useless repo-id in benchmark/igpu-perf ( #11508 )  
							
							 
							
							
							
						 
						
							2024-07-04 16:38:34 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								1a8bab172e 
								
							 
						 
						
							
							
								
								add minicpm 1B/2B npu support ( #11507 )  
							
							 
							
							
							
						 
						
							2024-07-04 16:31:04 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								bb0a84044b 
								
							 
						 
						
							
							
								
								add qwen2 npu support ( #11504 )  
							
							 
							
							
							
						 
						
							2024-07-04 11:01:25 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xin Qiu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								f84ca99b9f 
								
							 
						 
						
							
							
								
								optimize gemma2 rmsnorm ( #11500 )  
							
							 
							
							
							
						 
						
							2024-07-03 15:21:03 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Wang, Jian4 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								61c36ba085 
								
							 
						 
						
							
							
								
								Add pp_serving verified models ( #11498 )  
							
							 
							
							... 
							
							
							
							* add verified models
* update
* verify large model
* update commend 
							
						 
						
							2024-07-03 14:57:09 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								9274282ef7 
								
							 
						 
						
							
							
								
								Support pipeline parallel for glm-4-9b-chat ( #11463 )  
							
							 
							
							
							
						 
						
							2024-07-03 14:25:28 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								d97c2664ce 
								
							 
						 
						
							
							
								
								use new fuse rope in stablelm family ( #11497 )  
							
							 
							
							
							
						 
						
							2024-07-03 11:08:26 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xu, Shuo 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								52519e07df 
								
							 
						 
						
							
							
								
								remove models we no longer need in benchmark. ( #11492 )  
							
							 
							
							... 
							
							
							
							Co-authored-by: ATMxsp01 <shou.xu@intel.com> 
							
						 
						
							2024-07-02 17:20:48 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zhao Changmin 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								6a0134a9b2 
								
							 
						 
						
							
							
								
								support q4_0_rtn  ( #11477 )  
							
							 
							
							... 
							
							
							
							* q4_0_rtn 
							
						 
						
							2024-07-02 16:57:02 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								5e967205ac 
								
							 
						 
						
							
							
								
								remove the code converts input to fp16 before calling batch forward kernel ( #11489 )  
							
							 
							
							
							
						 
						
							2024-07-02 16:23:53 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Wang, Jian4 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								4390e7dc49 
								
							 
						 
						
							
							
								
								Fix codegeex2 transformers version ( #11487 )  
							
							 
							
							
							
						 
						
							2024-07-02 15:09:28 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								ec3a912ab6 
								
							 
						 
						
							
							
								
								optimize npu llama long context performance ( #11478 )  
							
							 
							
							
							
						 
						
							2024-07-01 16:49:23 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Heyang Sun 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								913e750b01 
								
							 
						 
						
							
							
								
								fix non-string deepseed config path bug ( #11476 )  
							
							 
							
							... 
							
							
							
							* fix non-string deepseed config path bug
* Update lora_finetune_chatglm.py 
							
						 
						
							2024-07-01 15:53:50 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								48ad482d3d 
								
							 
						 
						
							
							
								
								Fix import error caused by pydantic on cpu ( #11474 )  
							
							 
							
							
							
						 
						
							2024-07-01 15:49:49 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								39bcb33a67 
								
							 
						 
						
							
							
								
								add sdp support for stablelm 3b ( #11473 )  
							
							 
							
							
							
						 
						
							2024-07-01 14:56:15 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zhao Changmin 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								cf8eb7b128 
								
							 
						 
						
							
							
								
								Init NPU quantize method and support q8_0_rtn ( #11452 )  
							
							 
							
							... 
							
							
							
							* q8_0_rtn
* fix float point 
							
						 
						
							2024-07-01 13:45:07 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								319a3b36b2 
								
							 
						 
						
							
							
								
								fix npu llama2 ( #11471 )  
							
							 
							
							
							
						 
						
							2024-07-01 10:14:11 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Heyang Sun 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								07362ffffc 
								
							 
						 
						
							
							
								
								ChatGLM3-6B LoRA Fine-tuning Demo ( #11450 )  
							
							 
							
							... 
							
							
							
							* ChatGLM3-6B LoRA Fine-tuning Demo
* refine
* refine
* add 2-card deepspeed
* refine format
* add mpi4py and deepspeed install 
							
						 
						
							2024-07-01 09:18:39 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xiangyu Tian 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								fd933c92d8 
								
							 
						 
						
							
							
								
								Fix: Correct num_requests in benchmark for Pipeline Parallel Serving ( #11462 )  
							
							 
							
							
							
						 
						
							2024-06-28 16:10:51 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									SONG Ge 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								a414e3ff8a 
								
							 
						 
						
							
							
								
								add pipeline parallel support with load_low_bit ( #11414 )  
							
							 
							
							
							
						 
						
							2024-06-28 10:17:56 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Cengguang Zhang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								d0b801d7bc 
								
							 
						 
						
							
							
								
								LLM: change write mode in all-in-one benchmark. ( #11444 )  
							
							 
							
							... 
							
							
							
							* LLM: change write mode in all-in-one benchmark.
* update output style. 
							
						 
						
							2024-06-27 19:36:38 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								987017ef47 
								
							 
						 
						
							
							
								
								Update pipeline parallel serving for more model support ( #11428 )  
							
							 
							
							
							
						 
						
							2024-06-27 18:21:01 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								029ff15d28 
								
							 
						 
						
							
							
								
								optimize npu llama2 first token performance ( #11451 )  
							
							 
							
							
							
						 
						
							2024-06-27 17:37:33 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Qiyuan Gong 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								4e4ecd5095 
								
							 
						 
						
							
							
								
								Control sys.modules ipex duplicate check with BIGDL_CHECK_DUPLICATE_IMPORT ( #11453 )  
							
							 
							
							... 
							
							
							
							* Control sys.modules ipex duplicate check with BIGDL_CHECK_DUPLICATE_IMPORT。 
							
						 
						
							2024-06-27 17:21:45 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								c6e5ad668d 
								
							 
						 
						
							
							
								
								fix internlm xcomposser meta-instruction typo ( #11448 )  
							
							 
							
							
							
						 
						
							2024-06-27 15:29:43 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								f89ca23748 
								
							 
						 
						
							
							
								
								optimize npu llama2 perf again ( #11445 )  
							
							 
							
							
							
						 
						
							2024-06-27 15:13:42 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								cf0f5c4322 
								
							 
						 
						
							
							
								
								change npu document ( #11446 )  
							
							 
							
							
							
						 
						
							2024-06-27 13:59:59 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								508c364a79 
								
							 
						 
						
							
							
								
								Add precision option in PP inference examples ( #11440 )  
							
							 
							
							
							
						 
						
							2024-06-27 09:24:27 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								2a0f8087e3 
								
							 
						 
						
							
							
								
								optimize qwen2 gpu memory usage again ( #11435 )  
							
							 
							
							
							
						 
						
							2024-06-26 16:52:29 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Shaojun Liu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								ab9f7f3ac5 
								
							 
						 
						
							
							
								
								FIX: Qwen1.5-GPTQ-Int4 inference error ( #11432 )  
							
							 
							
							... 
							
							
							
							* merge_qkv if quant_method is 'gptq'
* fix python style checks
* refactor
* update GPU example 
							
						 
						
							2024-06-26 15:36:22 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Guancheng Fu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								99cd16ef9f 
								
							 
						 
						
							
							
								
								Fix error while using pipeline parallism ( #11434 )  
							
							 
							
							
							
						 
						
							2024-06-26 15:33:47 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jiao Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								40fa23560e 
								
							 
						 
						
							
							
								
								Fix LLAVA example on CPU ( #11271 )  
							
							 
							
							... 
							
							
							
							* update
* update
* update
* update 
							
						 
						
							2024-06-25 20:04:59 -07:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								ca0e69c3a7 
								
							 
						 
						
							
							
								
								optimize npu llama perf again ( #11431 )  
							
							 
							
							
							
						 
						
							2024-06-26 10:52:54 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								9f6e5b4fba 
								
							 
						 
						
							
							
								
								optimize llama npu perf ( #11426 )  
							
							 
							
							
							
						 
						
							2024-06-25 17:43:20 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								e473b8d946 
								
							 
						 
						
							
							
								
								Add more qwen1.5 and qwen2 support for pipeline parallel inference ( #11423 )  
							
							 
							
							
							
						 
						
							2024-06-25 15:49:32 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								aacc1fd8c0 
								
							 
						 
						
							
							
								
								Fix shape error when run qwen1.5-14b using deepspeed autotp ( #11420 )  
							
							 
							
							
							
						 
						
							2024-06-25 13:48:37 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								3b23de684a 
								
							 
						 
						
							
							
								
								update npu examples ( #11422 )  
							
							 
							
							
							
						 
						
							2024-06-25 13:32:53 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xiangyu Tian 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								8ddae22cfb 
								
							 
						 
						
							
							
								
								LLM: Refactor Pipeline-Parallel-FastAPI example ( #11319 )  
							
							 
							
							... 
							
							
							
							Initially Refactor for Pipeline-Parallel-FastAPI example 
							
						 
						
							2024-06-25 13:30:36 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									SONG Ge 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								34c15d3a10 
								
							 
						 
						
							
							
								
								update pp document ( #11421 )  
							
							 
							
							
							
						 
						
							2024-06-25 10:17:20 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xin Qiu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								9e4ee61737 
								
							 
						 
						
							
							
								
								rename BIGDL_OPTIMIZE_LM_HEAD to IPEX_LLM_LAST_LM_HEAD and add qwen2  ( #11418 )  
							
							 
							
							
							
						 
						
							2024-06-24 18:42:37 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Heyang Sun 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								c985912ee3 
								
							 
						 
						
							
							
								
								Add Deepspeed LoRA dependencies in document ( #11410 )  
							
							 
							
							
							
						 
						
							2024-06-24 15:29:59 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								abe53eaa4f 
								
							 
						 
						
							
							
								
								optimize qwen1.5/2 memory usage when running long input with fp16 ( #11403 )  
							
							 
							
							
							
						 
						
							2024-06-24 13:43:04 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Guoqiong Song 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								7507000ef2 
								
							 
						 
						
							
							
								
								Fix 1383 Llama model on transformers=4.41[WIP] ( #11280 )  
							
							 
							
							
							
						 
						
							2024-06-21 11:24:10 -07:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									SONG Ge 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								0c67639539 
								
							 
						 
						
							
							
								
								Add more examples for pipeline parallel inference ( #11372 )  
							
							 
							
							... 
							
							
							
							* add more model exampels for pipelien parallel inference
* add mixtral and vicuna models
* add yi model and past_kv supprot for chatglm family
* add docs
* doc update
* add license
* update 
							
						 
						
							2024-06-21 17:55:16 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xiangyu Tian 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								b30bf7648e 
								
							 
						 
						
							
							
								
								Fix vLLM CPU api_server params ( #11384 )  
							
							 
							
							
							
						 
						
							2024-06-21 13:00:06 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									ivy-lv11 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								21fc781fce 
								
							 
						 
						
							
							
								
								Add GLM-4V example ( #11343 )  
							
							 
							
							... 
							
							
							
							* add example
* modify
* modify
* add line
* add
* add link and replace with phi-3-vision template
* fix generate options
* fix
* fix
---------
Co-authored-by: jinbridge <2635480475@qq.com> 
							
						 
						
							2024-06-21 12:54:31 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								4ba82191f2 
								
							 
						 
						
							
							
								
								Support PP inference for chatglm3 ( #11375 )  
							
							 
							
							
							
						 
						
							2024-06-21 09:59:01 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								f0fdfa081b 
								
							 
						 
						
							
							
								
								Optimize qwen 1.5 14B batch performance ( #11370 )  
							
							 
							
							
							
						 
						
							2024-06-20 17:23:39 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Wenjing Margaret Mao 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								c0e86c523a 
								
							 
						 
						
							
							
								
								Add qwen-moe batch1 to nightly perf ( #11369 )  
							
							 
							
							... 
							
							
							
							* add moe
* reduce 437 models
* rename
* fix syntax
* add moe check result
* add 430 + 437
* all modes
* 4-37-4 exclud
* revert & comment
---------
Co-authored-by: Yishuo Wang <yishuo.wang@intel.com> 
							
						 
						
							2024-06-20 14:17:41 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								a5e7d93242 
								
							 
						 
						
							
							
								
								Add initial save/load low bit support for NPU(now only fp16 is supported) ( #11359 )  
							
							 
							
							
							
						 
						
							2024-06-20 10:49:39 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									RyuKosei 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								05a8d051f6 
								
							 
						 
						
							
							
								
								Fix run.py run_ipex_fp16_gpu ( #11361 )  
							
							 
							
							... 
							
							
							
							* fix a bug on run.py
* Update run.py
fixed the format problem
---------
Co-authored-by: sgwhat <ge.song@intel.com> 
							
						 
						
							2024-06-20 10:29:32 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Wenjing Margaret Mao 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								b2f62a8561 
								
							 
						 
						
							
							
								
								Add batch 4 perf test  ( #11355 )  
							
							 
							
							... 
							
							
							
							* copy files to this branch
* add tasks
* comment one model
* change the model to test the 4.36
* only test batch-4
* typo
* typo
* typo
* typo
* typo
* typo
* add 4.37-batch4
* change the file name
* revet yaml file
* no print
* add batch4 task
* revert
---------
Co-authored-by: Yishuo Wang <yishuo.wang@intel.com> 
							
						 
						
							2024-06-20 09:48:52 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zijie Li 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								ae452688c2 
								
							 
						 
						
							
							
								
								Add NPU HF example ( #11358 )  
							
							 
							
							
							
						 
						
							2024-06-19 18:07:28 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Qiyuan Gong 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								1eb884a249 
								
							 
						 
						
							
							
								
								IPEX Duplicate importer V2 ( #11310 )  
							
							 
							
							... 
							
							
							
							* Add gguf support.
* Avoid error when import ipex-llm for multiple times.
* Add check to avoid duplicate replace and revert.
* Add calling from check to avoid raising exceptions in the submodule.
* Add BIGDL_CHECK_DUPLICATE_IMPORT for controlling duplicate checker. Default is true. 
							
						 
						
							2024-06-19 16:29:19 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								ae7b662ed2 
								
							 
						 
						
							
							
								
								add fp16 NPU Linear support and fix intel_npu_acceleration_library version 1.0 support ( #11352 )  
							
							 
							
							
							
						 
						
							2024-06-19 09:14:59 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Guoqiong Song 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								c44b1942ed 
								
							 
						 
						
							
							
								
								fix mistral for transformers>=4.39 ( #11191 )  
							
							 
							
							... 
							
							
							
							* fix mistral for transformers>=4.39 
							
						 
						
							2024-06-18 13:39:35 -07:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Heyang Sun 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								67a1e05876 
								
							 
						 
						
							
							
								
								Remove zero3 context manager from LoRA ( #11346 )  
							
							 
							
							
							
						 
						
							2024-06-18 17:24:43 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								83082e5cc7 
								
							 
						 
						
							
							
								
								add initial support for intel npu acceleration library ( #11347 )  
							
							 
							
							
							
						 
						
							2024-06-18 16:07:16 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Shaojun Liu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								694912698e 
								
							 
						 
						
							
							
								
								Upgrade scikit-learn to 1.5.0 to fix dependabot issue ( #11349 )  
							
							 
							
							
							
						 
						
							2024-06-18 15:47:25 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									hxsz1997 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								44f22cba70 
								
							 
						 
						
							
							
								
								add config and default value ( #11344 )  
							
							 
							
							... 
							
							
							
							* add config and default value
* add config in taml
* remove lookahead and max_matching_ngram_size in config
* remove streaming and use_fp16_torch_dtype in test yaml
* update task in readme
* update commit of task 
							
						 
						
							2024-06-18 15:28:57 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Heyang Sun 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								00f322d8ee 
								
							 
						 
						
							
							
								
								Finetune ChatGLM with Deepspeed Zero3 LoRA ( #11314 )  
							
							 
							
							... 
							
							
							
							* Fintune ChatGLM with Deepspeed Zero3 LoRA
* add deepspeed zero3 config
* rename config
* remove offload_param
* add save_checkpoint parameter
* Update lora_deepspeed_zero3_finetune_chatglm3_6b_arc_2_card.sh
* refine 
							
						 
						
							2024-06-18 12:31:26 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yina Chen 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								5dad33e5af 
								
							 
						 
						
							
							
								
								Support fp8_e4m3 scale search ( #11339 )  
							
							 
							
							... 
							
							
							
							* fp8e4m3 switch off
* fix style 
							
						 
						
							2024-06-18 11:47:43 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								e50c890e1f 
								
							 
						 
						
							
							
								
								Support finishing PP inference once eos_token_id is found ( #11336 )  
							
							 
							
							
							
						 
						
							2024-06-18 09:55:40 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Qiyuan Gong 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								de4bb97b4f 
								
							 
						 
						
							
							
								
								Remove accelerate 0.23.0 install command in readme and docker ( #11333 )  
							
							 
							
							... 
							
							
							
							*ipex-llm's accelerate has been upgraded to 0.23.0. Remove accelerate 0.23.0 install command in README and docker。 
							
						 
						
							2024-06-17 17:52:12 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									SONG Ge 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								ef4b6519fb 
								
							 
						 
						
							
							
								
								Add phi-3 model support for pipeline parallel inference ( #11334 )  
							
							 
							
							... 
							
							
							
							* add phi-3 model support
* add phi3 example 
							
						 
						
							2024-06-17 17:44:24 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									hxsz1997 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								99b309928b 
								
							 
						 
						
							
							
								
								Add lookahead in test_api: transformer_int4_fp16_gpu ( #11337 )  
							
							 
							
							... 
							
							
							
							* add lookahead in test_api:transformer_int4_fp16_gpu
* change the short prompt of summarize
* change short prompt to cnn_64
* change short prompt of summarize 
							
						 
						
							2024-06-17 17:41:41 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Qiyuan Gong 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								5d7c9bf901 
								
							 
						 
						
							
							
								
								Upgrade accelerate to 0.23.0 ( #11331 )  
							
							 
							
							... 
							
							
							
							* Upgrade accelerate to 0.23.0 
							
						 
						
							2024-06-17 15:03:11 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xin Qiu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								183e0c6cf5 
								
							 
						 
						
							
							
								
								glm-4v-9b support ( #11327 )  
							
							 
							
							... 
							
							
							
							* chatglm4v support
* fix style check
* update glm4v 
							
						 
						
							2024-06-17 13:52:37 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Wenjing Margaret Mao 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								bca5cbd96c 
								
							 
						 
						
							
							
								
								Modify arc nightly perf to fp16 ( #11275 )  
							
							 
							
							... 
							
							
							
							* change api
* move to pr mode and remove the build
* add batch4 yaml and remove the bigcode
* remove batch4
* revert the starcode
* remove the exclude
* revert
---------
Co-authored-by: Yishuo Wang <yishuo.wang@intel.com> 
							
						 
						
							2024-06-17 13:47:22 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								6ea1e71af0 
								
							 
						 
						
							
							
								
								Update PP inference benchmark script ( #11323 )  
							
							 
							
							
							
						 
						
							2024-06-17 09:59:36 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									SONG Ge 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								be00380f1a 
								
							 
						 
						
							
							
								
								Fix pipeline parallel inference past_key_value error in Baichuan ( #11318 )  
							
							 
							
							... 
							
							
							
							* fix past_key_value error
* add baichuan2 example
* fix style
* update doc
* add script link in doc
* fix import error
* update 
							
						 
						
							2024-06-17 09:29:32 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yina Chen 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								0af0102e61 
								
							 
						 
						
							
							
								
								Add quantization scale search switch ( #11326 )  
							
							 
							
							... 
							
							
							
							* add scale_search switch
* remove llama3 instruct
* remove print 
							
						 
						
							2024-06-14 18:46:52 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								8a3247ac71 
								
							 
						 
						
							
							
								
								support batch forward for q4_k, q6_k ( #11325 )  
							
							 
							
							
							
						 
						
							2024-06-14 18:25:50 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								e8dd8e97ef 
								
							 
						 
						
							
							
								
								fix chatglm lookahead on ARC ( #11320 )  
							
							 
							
							
							
						 
						
							2024-06-14 16:26:11 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Shaojun Liu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								f5ef94046e 
								
							 
						 
						
							
							
								
								exclude dolly-v2-12b for arc perf test ( #11315 )  
							
							 
							
							... 
							
							
							
							* test arc perf
* test
* test
* exclude dolly-v2-12b:2048
* revert changes 
							
						 
						
							2024-06-14 15:35:56 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xiangyu Tian 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								4359ab3172 
								
							 
						 
						
							
							
								
								LLM: Add /generate_stream endpoint for Pipeline-Parallel-FastAPI example ( #11187 )  
							
							 
							
							... 
							
							
							
							Add /generate_stream and OpenAI-formatted endpoint for Pipeline-Parallel-FastAPI example 
							
						 
						
							2024-06-14 15:15:32 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jin Qiao 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								0e7a31a09c 
								
							 
						 
						
							
							
								
								ChatGLM Examples Restructure regarding Installation Steps  ( #11285 )  
							
							 
							
							... 
							
							
							
							* merge install step in glm examples
* fix section
* fix section
* fix tiktoken 
							
						 
						
							2024-06-14 12:37:05 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								91965b5d05 
								
							 
						 
						
							
							
								
								add glm_sdpa back to fix chatglm-6b ( #11313 )  
							
							 
							
							
							
						 
						
							2024-06-14 10:31:43 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								7f65836cb9 
								
							 
						 
						
							
							
								
								fix chatglm2/3-32k/128k fp16 ( #11311 )  
							
							 
							
							
							
						 
						
							2024-06-14 09:58:07 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xin Qiu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								1b0c4c8cb8 
								
							 
						 
						
							
							
								
								use new rotary two in chatglm4 ( #11312 )  
							
							 
							
							... 
							
							
							
							* use new rotary two in chatglm4
* rempve 
							
						 
						
							2024-06-13 19:02:18 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xin Qiu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								f1410d6823 
								
							 
						 
						
							
							
								
								refactor chatglm4  ( #11301 )  
							
							 
							
							... 
							
							
							
							* glm4
* remove useless code
* stype
* add rope_ratio
* update
* fix fp16
* fix style 
							
						 
						
							2024-06-13 18:06:04 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								5e25766855 
								
							 
						 
						
							
							
								
								fix and optimize chatglm2-32k and chatglm3-128k ( #11306 )  
							
							 
							
							
							
						 
						
							2024-06-13 17:37:58 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								60cb1dac7c 
								
							 
						 
						
							
							
								
								Support PP for qwen1.5  ( #11300 )  
							
							 
							
							
							
						 
						
							2024-06-13 17:35:24 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								f97cce2642 
								
							 
						 
						
							
							
								
								Fix import error of ds autotp ( #11307 )  
							
							 
							
							
							
						 
						
							2024-06-13 16:22:52 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jin Qiao 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								3682c6a979 
								
							 
						 
						
							
							
								
								add glm4 and qwen2 to igpu perf ( #11304 )  
							
							 
							
							
							
						 
						
							2024-06-13 16:16:35 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								a24666b8f3 
								
							 
						 
						
							
							
								
								fix chatglm3-6b-32k ( #11303 )  
							
							 
							
							
							
						 
						
							2024-06-13 16:01:34 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								01fe0fc1a2 
								
							 
						 
						
							
							
								
								refactor chatglm2/3 ( #11290 )  
							
							 
							
							
							
						 
						
							2024-06-13 12:22:58 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Guancheng Fu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								57a023aadc 
								
							 
						 
						
							
							
								
								Fix vllm tp ( #11297 )  
							
							 
							
							
							
						 
						
							2024-06-13 10:47:48 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								986af21896 
								
							 
						 
						
							
							
								
								fix perf test( #11295 )  
							
							 
							
							
							
						 
						
							2024-06-13 10:35:48 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								220151e2a1 
								
							 
						 
						
							
							
								
								Refactor pipeline parallel multi-stage implementation ( #11286 )  
							
							 
							
							
							
						 
						
							2024-06-13 10:00:23 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								14b1e6b699 
								
							 
						 
						
							
							
								
								Fix gguf_q4k ( #11293 )  
							
							 
							
							... 
							
							
							
							* udpate embedding parameter
* update benchmark 
							
						 
						
							2024-06-12 20:43:08 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								8edcdeb0e7 
								
							 
						 
						
							
							
								
								Fix bug that torch.ops.torch_ipex.matmul_bias_out cannot work on Linux MTL for short input ( #11292 )  
							
							 
							
							
							
						 
						
							2024-06-12 19:12:57 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Wenjing Margaret Mao 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								b61f6e3ab1 
								
							 
						 
						
							
							
								
								Add update_parent_folder for nightly_perf_test ( #11287 )  
							
							 
							
							... 
							
							
							
							* add update_parent_folder and change the workflow file
* add update_parent_folder and change the workflow file
* move to pr mode and comment the test
* use one model per comfig
* revert
---------
Co-authored-by: Yishuo Wang <yishuo.wang@intel.com> 
							
						 
						
							2024-06-12 17:58:13 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xin Qiu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								592f7aa61e 
								
							 
						 
						
							
							
								
								Refine glm1-4 sdp ( #11276 )  
							
							 
							
							... 
							
							
							
							* chatglm
* update
* update
* change chatglm
* update sdpa
* update
* fix style
* fix
* fix glm
* update glm2-32k
* update glm2-32k
* fix cpu
* update
* change lower_bound 
							
						 
						
							2024-06-12 17:11:56 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								cffb932f05 
								
							 
						 
						
							
							
								
								Expose timeout for streamer for fastchat worker ( #11288 )  
							
							 
							
							... 
							
							
							
							* Expose timeout for stremer for fastchat worker
* Change to read from env variables 
							
						 
						
							2024-06-12 17:02:40 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									ivy-lv11 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								e7a4e2296f 
								
							 
						 
						
							
							
								
								Add Stable Diffusion examples on GPU and CPU ( #11166 )  
							
							 
							
							... 
							
							
							
							* add sdxl and lcm-lora
* readme
* modify
* add cpu
* add license
* modify
* add file 
							
						 
						
							2024-06-12 16:33:25 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jin Qiao 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								f224e98297 
								
							 
						 
						
							
							
								
								Add GLM-4 CPU example ( #11223 )  
							
							 
							
							... 
							
							
							
							* Add GLM-4 example
* add tiktoken dependency
* fix
* fix 
							
						 
						
							2024-06-12 15:30:51 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zijie Li 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								40fc8704c4 
								
							 
						 
						
							
							
								
								Add GPU example for GLM-4 ( #11267 )  
							
							 
							
							... 
							
							
							
							* Add GPU example for GLM-4
* Update streamchat.py
* Fix pretrianed arguments
Fix pretrained arguments in generate and streamchat.py
* Update Readme
Update install tiktoken required for GLM-4
* Update comments in generate.py 
							
						 
						
							2024-06-12 14:29:50 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Qiyuan Gong 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								0d9cc9c106 
								
							 
						 
						
							
							
								
								Remove duplicate check for ipex ( #11281 )  
							
							 
							
							... 
							
							
							
							* Replacing builtin.import is causing lots of unpredicted problems. Remove this function. 
							
						 
						
							2024-06-12 13:52:02 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								10e480ee96 
								
							 
						 
						
							
							
								
								refactor internlm and internlm2 ( #11274 )  
							
							 
							
							
							
						 
						
							2024-06-11 14:19:19 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								fac49f15e3 
								
							 
						 
						
							
							
								
								Remove manual importing ipex in all-in-one benchmark ( #11272 )  
							
							 
							
							
							
						 
						
							2024-06-11 09:32:13 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Wenjing Margaret Mao 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								70b17c87be 
								
							 
						 
						
							
							
								
								Merge multiple batches ( #11264 )  
							
							 
							
							... 
							
							
							
							* add merge steps
* move to pr mode
* remove build + add merge.py
* add tohtml and change cp
* change test_batch folder path
* change merge_temp path
* change to html folder
* revert
* change place
* revert 437
* revert space
---------
Co-authored-by: Yishuo Wang <yishuo.wang@intel.com> 
							
						 
						
							2024-06-07 18:38:45 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xiangyu Tian 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								4b07712fd8 
								
							 
						 
						
							
							
								
								LLM: Fix vLLM CPU model convert mismatch ( #11254 )  
							
							 
							
							... 
							
							
							
							Fix vLLM CPU model convert mismatch. 
							
						 
						
							2024-06-07 15:54:34 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								42fab480ea 
								
							 
						 
						
							
							
								
								support stablm2 12b ( #11265 )  
							
							 
							
							
							
						 
						
							2024-06-07 15:46:00 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xin Qiu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								dbc3c2d72d 
								
							 
						 
						
							
							
								
								glm4 sdp ( #11253 )  
							
							 
							
							... 
							
							
							
							* glm4 sdp
* fix style
* update comment 
							
						 
						
							2024-06-07 15:42:23 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xin Qiu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								151fcf37bb 
								
							 
						 
						
							
							
								
								check devie name in use_flash_attention ( #11263 )  
							
							 
							
							
							
						 
						
							2024-06-07 15:07:47 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								2623944604 
								
							 
						 
						
							
							
								
								qwen2 sdpa small fix ( #11261 )  
							
							 
							
							
							
						 
						
							2024-06-07 14:42:18 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								ea0d03fd28 
								
							 
						 
						
							
							
								
								Refactor baichuan1 7B and 13B ( #11258 )  
							
							 
							
							
							
						 
						
							2024-06-07 14:29:20 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Qiyuan Gong 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								1aa9c9597a 
								
							 
						 
						
							
							
								
								Avoid duplicate import in IPEX auto importer ( #11227 )  
							
							 
							
							... 
							
							
							
							* Add custom import to avoid ipex duplicate importing
* Add scope limitation 
							
						 
						
							2024-06-07 14:08:00 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Wang, Jian4 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								6f2684e5c9 
								
							 
						 
						
							
							
								
								Update pp llama.py to save memory ( #11233 )  
							
							 
							
							
							
						 
						
							2024-06-07 13:18:16 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								ef8e9b2ecd 
								
							 
						 
						
							
							
								
								Refactor qwen2 moe ( #11244 )  
							
							 
							
							
							
						 
						
							2024-06-07 13:14:54 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zijie Li 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								7b753dc8ca 
								
							 
						 
						
							
							
								
								Update sample output for HF Qwen2 GPU and CPU ( #11257 )  
							
							 
							
							
							
						 
						
							2024-06-07 11:36:22 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zhao Changmin 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								b7948671de 
								
							 
						 
						
							
							
								
								[WIP] Add look up table in 1st token stage ( #11193 )  
							
							 
							
							... 
							
							
							
							* lookuptb 
							
						 
						
							2024-06-07 10:51:05 +08:00