Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								15219944b8 
								
							 
						 
						
							
							
								
								optimize glm edge again ( #12539 )  
							
							 
							
							
							
						 
						
							2024-12-13 13:52:39 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								6596c18489 
								
							 
						 
						
							
							
								
								[NPU] Modify IPEX_LLM_NPU_DISABLE_COMPILE_OPT setting for long input ( #12537 )  
							
							 
							
							
							
						 
						
							2024-12-13 13:49:56 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								7cc01fdc86 
								
							 
						 
						
							
							
								
								[NPU] further fix of new_value_states ( #12538 )  
							
							 
							
							
							
						 
						
							2024-12-13 13:42:00 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Heyang Sun 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								fa261b8af1 
								
							 
						 
						
							
							
								
								torch 2.3 inference docker ( #12517 )  
							
							 
							
							... 
							
							
							
							* torch 2.3 inference docker
* Update README.md
* add convert code
* rename image
* remove 2.1 and add graph example
* Update README.md 
							
						 
						
							2024-12-13 10:47:04 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								f36c23664f 
								
							 
						 
						
							
							
								
								[NPU] Fix abnormal output with latest driver ( #12530 )  
							
							 
							
							
							
						 
						
							2024-12-12 17:56:30 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								ffce86d69f 
								
							 
						 
						
							
							
								
								add basic glm-edge-v support ( #12533 )  
							
							 
							
							
							
						 
						
							2024-12-12 17:25:48 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								3e0823d2ae 
								
							 
						 
						
							
							
								
								add basic glm-edge support ( #12531 )  
							
							 
							
							
							
						 
						
							2024-12-12 16:02:22 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								dbaf4abcb3 
								
							 
						 
						
							
							
								
								[NPU] Update C++ example with repetition_penalty & update Python code accordingly ( #12528 )  
							
							 
							
							... 
							
							
							
							* Update c++ npu examples with repetition penalty
* Fit python with updated C++ API
* Style fix
* Small fix
* Small fix 
							
						 
						
							2024-12-12 13:42:55 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Shaojun Liu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								2cce89691a 
								
							 
						 
						
							
							
								
								Enable use_batch_forward Optimization on Battlemage GPU ( #12516 )  
							
							 
							
							... 
							
							
							
							* Update get_xpu_device_type() to support bmg
* enable use_batch_forward for bmg
* Update low_bit_linear.py
* Update utils.py
* use batch kernel for fp8e5 
							
						 
						
							2024-12-12 12:44:36 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								6fc27da9c1 
								
							 
						 
						
							
							
								
								[NPU] Update glm-edge support in docs ( #12529 )  
							
							 
							
							
							
						 
						
							2024-12-12 11:14:09 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								509bdb4661 
								
							 
						 
						
							
							
								
								[NPU] Fix minicpm-2B error ( #12527 )  
							
							 
							
							
							
						 
						
							2024-12-11 16:49:32 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xu, Shuo 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								fd9cf767ed 
								
							 
						 
						
							
							
								
								All-in-one Benchmark run.py: Ignore error if import BenchmarkWrapper failed. ( #12526 )  
							
							 
							
							
							
						 
						
							2024-12-11 16:20:55 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								41ef4974ab 
								
							 
						 
						
							
							
								
								[NPU] fix transpose_value = False for NPU optimize_model=True ( #12525 )  
							
							 
							
							
							
						 
						
							2024-12-11 15:51:39 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								588bfa24dc 
								
							 
						 
						
							
							
								
								support hqq ( #12518 )  
							
							 
							
							... 
							
							
							
							* support
* fix 
							
						 
						
							2024-12-11 15:43:02 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								68f2873bd3 
								
							 
						 
						
							
							
								
								[NPU] Support repetition penalty for simple generate, Python (cpp backend) ( #12522 )  
							
							 
							
							... 
							
							
							
							* Initial support of repetition penalty on NPU (cpp backend) for simple generate
* Bug fix for generation config and others
* Remove unnecessary print and style fix
* Remove unnecessary print
* Fix based on comments 
							
						 
						
							2024-12-11 14:55:25 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								77404d2a63 
								
							 
						 
						
							
							
								
								support new model ( #12523 )  
							
							 
							
							
							
						 
						
							2024-12-11 13:41:15 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								ea55235cbd 
								
							 
						 
						
							
							
								
								[NPU] Support glm-edge models ( #12511 )  
							
							 
							
							
							
						 
						
							2024-12-09 14:06:27 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								12c78978dd 
								
							 
						 
						
							
							
								
								[NPU C++] Update example with conversation mode support ( #12510 )  
							
							 
							
							
							
						 
						
							2024-12-06 12:46:37 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								0918d3baca 
								
							 
						 
						
							
							
								
								[NPU] Fix hf generate with save/load generation config for Python (cpp backend) ( #12509 )  
							
							 
							
							... 
							
							
							
							* Fix hf generate with save/load generation config
* Small fix
* Fix based on comments 
							
						 
						
							2024-12-05 19:19:58 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								49ab8974fa 
								
							 
						 
						
							
							
								
								[NPU] initial support of asym_int4_rtn ( #12484 )  
							
							 
							
							... 
							
							
							
							* initiail support of q4_1
* fix
* fix
* update
* update min to Z1
* update
* fix
* update
* fix style
* fix
* support qwen2 optimize_model=True mp version
* temp save
* fix
* fix style
* replace min with zero
* support split linear for q4_1
* fix lm_head with mixed_precision=True
* fix style
* revert test code
* add down proj back for q4_0
* remove print 
							
						 
						
							2024-12-05 17:40:36 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jinhe 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								5e1416c9aa 
								
							 
						 
						
							
							
								
								fix readme for npu cpp examples and llama.cpp ( #12505 )  
							
							 
							
							... 
							
							
							
							* fix cpp readme
* fix cpp readme
* fix cpp readme 
							
						 
						
							2024-12-05 12:32:42 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								f56a111aa2 
								
							 
						 
						
							
							
								
								[NPU] Fix load-low-bit benchmark script ( #12502 )  
							
							 
							
							
							
						 
						
							2024-12-05 10:01:32 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								84f1c4ad57 
								
							 
						 
						
							
							
								
								Small fix for NPU Python cpp simple generate regarding eos tokens ( #12501 )  
							
							 
							
							
							
						 
						
							2024-12-04 18:54:06 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Kai Huang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								d8b14a6305 
								
							 
						 
						
							
							
								
								Update save/load comments ( #12500 )  
							
							 
							
							
							
						 
						
							2024-12-04 18:51:38 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Kai Huang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								b89ea1b0cf 
								
							 
						 
						
							
							
								
								Support save/load model for hf generate ( #12499 )  
							
							 
							
							... 
							
							
							
							* change dummy model
* style
* meet review 
							
						 
						
							2024-12-04 18:26:39 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Kai Huang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								7d27f134dd 
								
							 
						 
						
							
							
								
								Fix hf generate for llama3.2 ( #12497 )  
							
							 
							
							... 
							
							
							
							* fix kv condition]
* meet review 
							
						 
						
							2024-12-04 17:54:40 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Chu,Youcheng 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								ffa9a9e1b3 
								
							 
						 
						
							
							
								
								Update streaming in npu examples ( #12495 )  
							
							 
							
							... 
							
							
							
							* feat: add streaming
* Update readme accordingly
---------
Co-authored-by: Yuwen Hu <yuwen.hu@intel.com> 
							
						 
						
							2024-12-04 17:51:10 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								a9e3f7f14c 
								
							 
						 
						
							
							
								
								optimize minicpm ( #12496 )  
							
							 
							
							
							
						 
						
							2024-12-04 17:14:16 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								e0bf0054e1 
								
							 
						 
						
							
							
								
								small fix ( #12493 )  
							
							 
							
							
							
						 
						
							2024-12-04 16:37:39 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Kai Huang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								7ff4533b39 
								
							 
						 
						
							
							
								
								Support hf generate ( #12477 )  
							
							 
							
							... 
							
							
							
							* generate
* style
* update
* remove timing
* style
* style
* combine generate api
* simple in kwargs 
							
						 
						
							2024-12-04 16:31:09 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								ef4028ac2d 
								
							 
						 
						
							
							
								
								[NPU] Support split lm_head for Qwen2 with CPP ( #12491 )  
							
							 
							
							... 
							
							
							
							* Use split for Qwen2 lm_head instead of slice in optimize_pre
* Support split lm_head in Qwen2 python cpp backend
* Fit with Python acc lib pipeline
* Removed default mixed_precision=True in all-in-one and related examples
* Small fix
* Style fix
* Fix based on comments
* Fix based on comments
* Stype fix 
							
						 
						
							2024-12-04 14:41:08 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								5629fdd518 
								
							 
						 
						
							
							
								
								optimize qwen2_vl multiple image input or video input ( #12487 )  
							
							 
							
							
							
						 
						
							2024-12-04 09:24:38 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								c59284418c 
								
							 
						 
						
							
							
								
								Hotfix of BCE-Emdedding model ( #12490 )  
							
							 
							
							
							
						 
						
							2024-12-03 18:16:04 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								4ac66db034 
								
							 
						 
						
							
							
								
								[NPU] Support streaming in Python (cpp backend) ( #12488 )  
							
							 
							
							... 
							
							
							
							* Support streaming in NPU Python (cpp backend)
* Small fix 
							
						 
						
							2024-12-03 17:17:26 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jin, Qiao 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								7082844f3f 
								
							 
						 
						
							
							
								
								Fix NPU LLM example save/load tokenizer ( #12485 )  
							
							 
							
							
							
						 
						
							2024-12-03 16:30:55 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jin, Qiao 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								5fe766788e 
								
							 
						 
						
							
							
								
								Fix MiniCPM-V-2_6 running on NPU ( #12486 )  
							
							 
							
							
							
						 
						
							2024-12-03 16:16:29 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								598603bea6 
								
							 
						 
						
							
							
								
								small fix of imatrix ( #12480 )  
							
							 
							
							
							
						 
						
							2024-12-03 10:46:36 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								ab01753b1c 
								
							 
						 
						
							
							
								
								[NPU] update save-load API usage ( #12473 )  
							
							 
							
							
							
						 
						
							2024-12-03 09:46:15 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								26adb82ee3 
								
							 
						 
						
							
							
								
								[NPU] Remove hard code ( #12479 )  
							
							 
							
							
							
						 
						
							2024-12-02 18:26:07 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								b2e56a2e03 
								
							 
						 
						
							
							
								
								Add release support for option xpu_arc ( #12422 )  
							
							 
							
							... 
							
							
							
							* Add release support for xpu-arc
* Dependency update 
							
						 
						
							2024-12-02 17:16:04 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								aee9acb303 
								
							 
						 
						
							
							
								
								Add NPU QuickStart & update example links ( #12470 )  
							
							 
							
							... 
							
							
							
							* Add initial NPU quickstart (c++ part unfinished)
* Small update
* Update based on comments
* Update main readme
* Remove LLaMA description
* Small fix
* Small fix
* Remove subsection link in main README
* Small fix
* Update based on comments
* Small fix
* TOC update and other small fixes
* Update for Chinese main readme
* Update based on comments and other small fixes
* Change order 
							
						 
						
							2024-12-02 17:03:10 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jin, Qiao 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								31c69a8d31 
								
							 
						 
						
							
							
								
								Fix MiniCPM-V models running on NPU ( #12478 )  
							
							 
							
							
							
						 
						
							2024-12-02 16:29:46 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								54d9a590d4 
								
							 
						 
						
							
							
								
								[NPU]Fix eos_token setting ( #12475 )  
							
							 
							
							
							
						 
						
							2024-12-02 14:18:22 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Guancheng Fu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								59bd4a214f 
								
							 
						 
						
							
							
								
								add vLLM glm4 fix ( #12474 )  
							
							 
							
							
							
						 
						
							2024-12-02 14:05:16 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								4b6c3160be 
								
							 
						 
						
							
							
								
								Support imatrix-guided quantization for NPU CW ( #12468 )  
							
							 
							
							... 
							
							
							
							* init commit
* remove print
* add interface
* fix
* fix
* fix style 
							
						 
						
							2024-12-02 11:31:26 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								f99f188023 
								
							 
						 
						
							
							
								
								Hotfix of benchmark script ( #12467 )  
							
							 
							
							
							
						 
						
							2024-11-29 14:00:59 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								c911026f03 
								
							 
						 
						
							
							
								
								[NPU C++] Update model support & examples & benchmark  ( #12466 )  
							
							 
							
							
							
						 
						
							2024-11-29 13:35:58 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								14d8d3d8af 
								
							 
						 
						
							
							
								
								Integrate NPU C++ imple into ipex-llm ( #12461 )  
							
							 
							
							
							
						 
						
							2024-11-29 09:25:37 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								490bb0ca53 
								
							 
						 
						
							
							
								
								[NPU] update fused layers for GW ( #12459 )  
							
							 
							
							... 
							
							
							
							* update fused layers for GW
* fix
* fix llama condition for glm model
* update 
							
						 
						
							2024-11-28 17:14:30 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yina Chen 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								1b533a105c 
								
							 
						 
						
							
							
								
								[NPU] Add env to enable scale search ( #12462 )  
							
							 
							
							... 
							
							
							
							* add env enable scale search
* address comment
* move logic 
							
						 
						
							2024-11-28 17:06:00 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Heyang Sun 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								d272f6b471 
								
							 
						 
						
							
							
								
								remove nf4 unsupport comment in cpu finetuning ( #12460 )  
							
							 
							
							... 
							
							
							
							Co-authored-by: Ariadne <wyn2000330@126.com> 
							
						 
						
							2024-11-28 13:26:46 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								b29da30205 
								
							 
						 
						
							
							
								
								[NPU] Update C++ L0 ( #12458 )  
							
							 
							
							... 
							
							
							
							* update
* fix style 
							
						 
						
							2024-11-27 22:08:48 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								6f3441ba4c 
								
							 
						 
						
							
							
								
								fix glm4-9b overflow ( #12455 )  
							
							 
							
							
							
						 
						
							2024-11-27 17:39:13 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								281c9b0bb9 
								
							 
						 
						
							
							
								
								[NPU] Add L0 support for NPU C++ ( #12454 )  
							
							 
							
							... 
							
							
							
							* add L0 models support
* meet review
* fix style 
							
						 
						
							2024-11-27 17:04:13 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Chu,Youcheng 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								ce6fcaa9ba 
								
							 
						 
						
							
							
								
								update transformers version in example of glm4 ( #12453 )  
							
							 
							
							... 
							
							
							
							* fix: update transformers version in example of glm4
* fix: textual adjustments
* fix: texual adjustment 
							
						 
						
							2024-11-27 15:02:25 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								effb9bb41c 
								
							 
						 
						
							
							
								
								Small update to LangChain examples readme ( #12452 )  
							
							 
							
							
							
						 
						
							2024-11-27 14:02:25 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Chu,Youcheng 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								acd77d9e87 
								
							 
						 
						
							
							
								
								Remove env variable BIGDL_LLM_XMX_DISABLED in documentation ( #12445 )  
							
							 
							
							... 
							
							
							
							* fix: remove BIGDL_LLM_XMX_DISABLED in mddocs
* fix: remove set SYCL_CACHE_PERSISTENT=1 in example
* fix: remove BIGDL_LLM_XMX_DISABLED in workflows
* fix: merge igpu and A-series Graphics
* fix: remove set BIGDL_LLM_XMX_DISABLED=1 in example
* fix: remove BIGDL_LLM_XMX_DISABLED in workflows
* fix: merge igpu and A-series Graphics
* fix: textual adjustment
* fix: textual adjustment
* fix: textual adjustment 
							
						 
						
							2024-11-27 11:16:36 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								f8c2bb2943 
								
							 
						 
						
							
							
								
								[NPU] optimize qwen2 prefill performance for C++ ( #12451 )  
							
							 
							
							
							
						 
						
							2024-11-27 10:46:18 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								7b40f9b372 
								
							 
						 
						
							
							
								
								[NPU] Support GW for NPU C++ ( #12450 )  
							
							 
							
							
							
						 
						
							2024-11-26 17:46:40 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jin, Qiao 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								c2efa264d9 
								
							 
						 
						
							
							
								
								Update LangChain examples to use upstream ( #12388 )  
							
							 
							
							... 
							
							
							
							* Update LangChain examples to use upstream
* Update README and fix links
* Update LangChain CPU examples to use upstream
* Update LangChain CPU voice_assistant example
* Update CPU README
* Update GPU README
* Remove GPU Langchain vLLM example and fix comments
* Change langchain -> LangChain
* Add reference for both upstream llms and embeddings
* Fix comments
* Fix comments
* Fix comments
* Fix comments
* Fix comment 
							
						 
						
							2024-11-26 16:43:15 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								24b46b2b19 
								
							 
						 
						
							
							
								
								[NPU] further fix  of qwen2 int8 pipeline & C++ ( #12449 )  
							
							 
							
							... 
							
							
							
							* fix
* fix style 
							
						 
						
							2024-11-26 16:39:39 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								303b104c10 
								
							 
						 
						
							
							
								
								Fix abnormal output for Qwen2-7B when sym_int8 ( #12446 )  
							
							 
							
							
							
						 
						
							2024-11-26 15:53:04 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								52c17fe104 
								
							 
						 
						
							
							
								
								Optimize first token of C++ NPU by adding npu_dpu_groups ( #12443 )  
							
							 
							
							... 
							
							
							
							* add npu_dpu_groups
* add check for env
* fix style 
							
						 
						
							2024-11-26 11:41:32 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jinhe 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								66bd7abae4 
								
							 
						 
						
							
							
								
								add sdxl and lora-lcm optimization ( #12444 )  
							
							 
							
							... 
							
							
							
							* add sdxl and lora-lcm optimization
* fix openjourney speed drop 
							
						 
						
							2024-11-26 11:38:09 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								0e23bd779f 
								
							 
						 
						
							
							
								
								Add support of llama3.2 for NPU C++ ( #12442 )  
							
							 
							
							... 
							
							
							
							* initial support of  llama3.2
* update
* update
* fix style
* fix style
* fix
* small fix 
							
						 
						
							2024-11-26 09:26:55 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								cdd41f5e4c 
								
							 
						 
						
							
							
								
								optimize sdxl again ( #12441 )  
							
							 
							
							
							
						 
						
							2024-11-25 17:46:46 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								b9abb8a285 
								
							 
						 
						
							
							
								
								Support qwen2.5 3B for NPU & update related examples ( #12438 )  
							
							 
							
							... 
							
							
							
							* update qwen2.5-3B
* update convert
* small fix
* replace load_in_low_bit with low_bit
* small fix 
							
						 
						
							2024-11-25 16:38:31 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jinhe 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								b633fbf26c 
								
							 
						 
						
							
							
								
								add chinese prompt troubleshooting for npu cpp examples ( #12437 )  
							
							 
							
							... 
							
							
							
							* add chinese prompt troubleshooting
* add chinese prompt troubleshooting 
							
						 
						
							2024-11-25 15:28:47 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								8164aed802 
								
							 
						 
						
							
							
								
								small change ( #12439 )  
							
							 
							
							
							
						 
						
							2024-11-25 14:35:49 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								be132c4209 
								
							 
						 
						
							
							
								
								fix and optimize sd ( #12436 )  
							
							 
							
							
							
						 
						
							2024-11-25 14:09:48 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								f41405368a 
								
							 
						 
						
							
							
								
								Support minicpm for NPU C++ ( #12434 )  
							
							 
							
							... 
							
							
							
							* support minicpm-1b
* update
* tune fused_layers
* update readme.md 
							
						 
						
							2024-11-25 10:42:02 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								0819fad34e 
								
							 
						 
						
							
							
								
								support Llama2-7B / Llama3-8B for NPU C++ ( #12431 )  
							
							 
							
							... 
							
							
							
							* support llama2
* update
* support fused_layers=4 for Llama2-7B 
							
						 
						
							2024-11-22 18:47:19 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								4ffa6c752c 
								
							 
						 
						
							
							
								
								New convert support for C++ NPU ( #12430 )  
							
							 
							
							... 
							
							
							
							* initial commit
* fix
* fix style
* fix style
* fix
* fix 
							
						 
						
							2024-11-22 14:28:30 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								e61ae88c5b 
								
							 
						 
						
							
							
								
								Upgrade denpendency for xpu_lnl and xpu_arl option ( #12424 )  
							
							 
							
							
							
						 
						
							2024-11-21 18:37:15 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								2935e97610 
								
							 
						 
						
							
							
								
								small fix of cpp readme( #12425 )  
							
							 
							
							
							
						 
						
							2024-11-21 18:21:34 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								8fdc36c140 
								
							 
						 
						
							
							
								
								Optimize with new batch kernel when batch_size=1 on LNL ( #12419 )  
							
							 
							
							... 
							
							
							
							* Add use batch kernel condition for LNL
* Fix for other device judgement
* Fix based on comment 
							
						 
						
							2024-11-21 16:21:35 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jinhe 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								7e0a840f74 
								
							 
						 
						
							
							
								
								add optimization to openjourney ( #12423 )  
							
							 
							
							... 
							
							
							
							* add optimization to openjourney
* add optimization to openjourney 
							
						 
						
							2024-11-21 15:23:51 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								145e8b480f 
								
							 
						 
						
							
							
								
								update batch kernel condition ( #12421 )  
							
							 
							
							
							
						 
						
							2024-11-21 10:12:46 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								7288c759ce 
								
							 
						 
						
							
							
								
								Initial NPU C++ Example ( #12417 )  
							
							 
							
							... 
							
							
							
							* temp save
* meet review, update
* update
* meet review, add license
* typo 
							
						 
						
							2024-11-21 10:09:26 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jinhe 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								d2a37b6ab2 
								
							 
						 
						
							
							
								
								add Stable diffusion examples ( #12418 )  
							
							 
							
							... 
							
							
							
							* add openjourney example
* add timing
* add stable diffusion to model page
* 4.1 fix
* small fix 
							
						 
						
							2024-11-20 17:18:36 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								54c62feb74 
								
							 
						 
						
							
							
								
								[NPU] dump prefill IR for further C++ solution ( #12402 )  
							
							 
							
							... 
							
							
							
							* save prefill ir
* fix
* shorten convert time
* fix
* fix
* fix
* fix
* fix style
* dump config.json
* meet review
* small fix 
							
						 
						
							2024-11-20 15:20:05 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									SONG Ge 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								ff3f7cb25f 
								
							 
						 
						
							
							
								
								Fix speech_paraformer issue with unexpected changes ( #12416 )  
							
							 
							
							... 
							
							
							
							* Fix speech_paraformer issue with unexpected changes
* Add paraformer version specified 
							
						 
						
							2024-11-19 15:01:20 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								a69395f31f 
								
							 
						 
						
							
							
								
								Support performance mode of GLM4 model ( #12401 )  
							
							 
							
							... 
							
							
							
							* Initial support of prepare generation args for transformers 445
* Small fix to chatglm4 model optimization
* Small fix
* fix glm4 position id
* fix glm4 error
* Small change in conditon & fix based on comments
* Style fixes
---------
Co-authored-by: cyita <yitastudy@gmail.com> 
							
						 
						
							2024-11-18 18:46:52 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Song Fuchang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								d2c821d458 
								
							 
						 
						
							
							
								
								Add missing arguments in pipeline parallel generate method ( #12142 )  
							
							 
							
							... 
							
							
							
							Add two arguments: negative_prompt_ids and negative_prompt_attention_mask to generate method in pipeline_parallel.py. 
							
						 
						
							2024-11-18 13:50:18 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								3d5fbf2069 
								
							 
						 
						
							
							
								
								update batch kernel condition ( #12408 )  
							
							 
							
							
							
						 
						
							2024-11-15 13:47:05 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								d4d949443f 
								
							 
						 
						
							
							
								
								[NPU] change attention_mask to fp16 ( #12400 )  
							
							 
							
							
							
						 
						
							2024-11-14 17:20:29 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Qiyuan Gong 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								7e50ff113c 
								
							 
						 
						
							
							
								
								Add padding_token=eos_token for GPU trl QLora example ( #12398 )  
							
							 
							
							... 
							
							
							
							* Avoid tokenizer doesn't have a padding token error. 
							
						 
						
							2024-11-14 10:51:30 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									SONG Ge 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								d2cbcb060c 
								
							 
						 
						
							
							
								
								Add initial support for modeling_xlm encoder on NPU ( #12393 )  
							
							 
							
							... 
							
							
							
							* Add initial support for modeling_xlm encoder on NPU
* Add EmbeddingModel class to keep the same usage with bce and npu fp16 linear convert
* Optimize currently implementation to support EmbeddingModel.encode API and convert other torch modules to NPU
* Add related example and documents 
							
						 
						
							2024-11-14 10:50:27 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yina Chen 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								59b01fa7d2 
								
							 
						 
						
							
							
								
								small fix ( #12397 )  
							
							 
							
							
							
						 
						
							2024-11-14 10:03:36 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								00fce5c940 
								
							 
						 
						
							
							
								
								use new q4_0 batch kernel ( #12396 )  
							
							 
							
							
							
						 
						
							2024-11-13 18:37:34 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yina Chen 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								d6d63d6b84 
								
							 
						 
						
							
							
								
								[NPU] Qwen prefill attn_mask type hotfix ( #12395 )  
							
							 
							
							... 
							
							
							
							* qwen prefill attn_mask type fp16
* update 
							
						 
						
							2024-11-13 17:51:34 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yina Chen 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								9220babaab 
								
							 
						 
						
							
							
								
								qwen prefill attn_mask type fp16 ( #12394 )  
							
							 
							
							
							
						 
						
							2024-11-13 17:45:26 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								1158f91648 
								
							 
						 
						
							
							
								
								Fix llava with multi-image inputs ( #12384 )  
							
							 
							
							
							
						 
						
							2024-11-13 09:27:50 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Guancheng Fu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								0ee54fc55f 
								
							 
						 
						
							
							
								
								Upgrade to vllm 0.6.2 ( #12338 )  
							
							 
							
							... 
							
							
							
							* Initial updates for vllm 0.6.2
* fix
* Change Dockerfile to support v062
* Fix
* fix examples
* Fix
* done
* fix
* Update engine.py
* Fix Dockerfile to original path
* fix
* add option
* fix
* fix
* fix
* fix
---------
Co-authored-by: xiangyuT <xiangyu.tian@intel.com> 
							
						 
						
							2024-11-12 20:35:34 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								6bf5a8c230 
								
							 
						 
						
							
							
								
								[NPU] Update qwen2 compile config ( #12383 )  
							
							 
							
							... 
							
							
							
							* update
* fix 
							
						 
						
							2024-11-12 16:59:44 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								7a97fbb779 
								
							 
						 
						
							
							
								
								Support vpm and resampler module of minicpm-v on NPU ( #12375 )  
							
							 
							
							
							
						 
						
							2024-11-12 15:59:55 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								e0918934c8 
								
							 
						 
						
							
							
								
								Add fused_mlp to glm4v models ( #12378 )  
							
							 
							
							
							
						 
						
							2024-11-11 17:10:25 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								dc34e8c51f 
								
							 
						 
						
							
							
								
								optimize glm4v vision attention ( #12369 )  
							
							 
							
							
							
						 
						
							2024-11-08 17:01:57 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Qiyuan Gong 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								2dfcc36825 
								
							 
						 
						
							
							
								
								Fix trl version and padding in trl qlora example ( #12368 )  
							
							 
							
							... 
							
							
							
							* Change trl to 0.9.6
* Enable padding to avoid padding related errors. 
							
						 
						
							2024-11-08 16:05:17 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								51f7f87768 
								
							 
						 
						
							
							
								
								fix ipex 2.3 bug ( #12366 )  
							
							 
							
							
							
						 
						
							2024-11-08 13:29:15 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yina Chen 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								b2e69a896c 
								
							 
						 
						
							
							
								
								[NPU] Support Baichuan groupwise & gw code refactor ( #12337 )  
							
							 
							
							... 
							
							
							
							* support minicpm 1b & qwen 1.5b gw
* support minicpm 1b
* baichuan part
* update
* support minicpm 1b & qwen 1.5b gw
* support minicpm 1b
* baichuan part
* update
* update
* update
* baichuan support
* code refactor
* remove code
* fix style
* address comments
* revert 
							
						 
						
							2024-11-08 11:42:42 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								812d5cc32e 
								
							 
						 
						
							
							
								
								[NPU L0] Support llama3.2 in L0 pipeline ( #12361 )  
							
							 
							
							
							
						 
						
							2024-11-08 10:01:23 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								8fe294e01f 
								
							 
						 
						
							
							
								
								Small fix to all-in-one benchmark ( #12362 )  
							
							 
							
							
							
						 
						
							2024-11-07 18:56:34 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								1a6cbc473f 
								
							 
						 
						
							
							
								
								Add fused mlp optimizations to glm4 models ( #12360 )  
							
							 
							
							... 
							
							
							
							* Add fused mlp to glm4 models
* Small fix 
							
						 
						
							2024-11-07 18:52:47 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								ad68c56573 
								
							 
						 
						
							
							
								
								small improvement ( #12359 )  
							
							 
							
							
							
						 
						
							2024-11-07 15:57:41 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yina Chen 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								d880e534d2 
								
							 
						 
						
							
							
								
								[NPU] acclib llama3.2 support groupwise ( #12355 )  
							
							 
							
							... 
							
							
							
							* change inter_pp
* add comment 
							
						 
						
							2024-11-07 11:19:55 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jinhe 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								79f2877413 
								
							 
						 
						
							
							
								
								add minicpm-v models to transformers_int4_npu_win api ( #12352 )  
							
							 
							
							... 
							
							
							
							* add minicpm npu
* optimize model 
							
						 
						
							2024-11-07 10:05:10 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									SONG Ge 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								a7b66683f1 
								
							 
						 
						
							
							
								
								[NPU] Add Optimized Support for Llama3.2-1B/3B on NPU ( #12339 )  
							
							 
							
							... 
							
							
							
							* Add initial support for llama3.2-1b/3b
* move llama3.2 support into current llama_mp impl 
							
						 
						
							2024-11-06 19:21:40 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								872a74481a 
								
							 
						 
						
							
							
								
								Small optimization to glm4 models ( #12351 )  
							
							 
							
							
							
						 
						
							2024-11-06 19:16:58 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								c267355b35 
								
							 
						 
						
							
							
								
								fix three NPU benchmark issues ( #12350 )  
							
							 
							
							... 
							
							
							
							* fix three issues
* limit mixed_precision for CW only 
							
						 
						
							2024-11-06 19:01:01 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yina Chen 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								f24352aef9 
								
							 
						 
						
							
							
								
								llama 3.1/3.2 support compresskv ( #12347 )  
							
							 
							
							... 
							
							
							
							* llama 3.1/3.2 support compresskv
* update
* fix transformers 4.45 error
* fix style
* fix typo
* disable llama3.2 1b compresskv 
							
						 
						
							2024-11-06 17:33:43 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jin, Qiao 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								d984c0672a 
								
							 
						 
						
							
							
								
								Add MiniCPM-V-2_6 to arc perf test ( #12349 )  
							
							 
							
							
							
						 
						
							2024-11-06 16:32:28 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								e23ef7d088 
								
							 
						 
						
							
							
								
								optimize glm4v's vision part ( #12346 )  
							
							 
							
							
							
						 
						
							2024-11-06 15:43:40 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								c8b7265359 
								
							 
						 
						
							
							
								
								Add basic glm4v support ( #12345 )  
							
							 
							
							
							
						 
						
							2024-11-06 13:50:10 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								69e3a56943 
								
							 
						 
						
							
							
								
								[NPU] Hot fix of load_low_bit ( #12344 )  
							
							 
							
							
							
						 
						
							2024-11-06 10:07:00 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jin, Qiao 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								7240c283a3 
								
							 
						 
						
							
							
								
								Add dummy model in iGPU perf ( #12341 )  
							
							 
							
							... 
							
							
							
							* Add dummy model in iGPU perf
* Add dummy model in iGPU perf
* Fix 
							
						 
						
							2024-11-05 17:56:10 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zhao Changmin 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								8e9a3a1158 
								
							 
						 
						
							
							
								
								fix chatglm2 cpu ut ( #12336 )  
							
							 
							
							
							
						 
						
							2024-11-05 16:43:57 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yina Chen 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								d872639395 
								
							 
						 
						
							
							
								
								[NPU] Llama3, Qwen2 1.5b, MiniCPM 1/2B groupwise support ( #12327 )  
							
							 
							
							... 
							
							
							
							* support minicpm 1b & qwen 1.5b gw
* support minicpm 1b
* support minicpm 2b
* fix style & error
* fix style & update
* remove print 
							
						 
						
							2024-11-05 15:51:31 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jin, Qiao 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								82a61b5cf3 
								
							 
						 
						
							
							
								
								Limit trl version in example ( #12332 )  
							
							 
							
							... 
							
							
							
							* Limit trl version in example
* Limit trl version in example 
							
						 
						
							2024-11-05 14:50:10 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zijie Li 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								45b0d371aa 
								
							 
						 
						
							
							
								
								update benchmark readme ( #12323 )  
							
							 
							
							... 
							
							
							
							* update benchmark readme
update new comment with memory usage included
* Update README.md 
							
						 
						
							2024-11-05 08:19:08 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zhao Changmin 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								1b637e4477 
								
							 
						 
						
							
							
								
								Add chatglm2&3 fuse mlp ( #12328 )  
							
							 
							
							... 
							
							
							
							* add chatglm fuse mlp 
							
						 
						
							2024-11-04 18:04:41 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yina Chen 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								94c4ce389f 
								
							 
						 
						
							
							
								
								[NPU] Add env to disable compile opt ( #12330 )  
							
							 
							
							... 
							
							
							
							* add env to disable compile opt
* fix style
* fix style 
							
						 
						
							2024-11-04 17:46:17 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ch1y0q 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								e54af44ed6 
								
							 
						 
						
							
							
								
								Add transformers_int4_npu_pipeline_win in all-in-one benchmark ( #12325 )  
							
							 
							
							... 
							
							
							
							* add transformers_int4_npu_pipeline_win
* bugfix
* bugfix: wrong actual_output_len
* fix format
* bugfix & update `README.md` 
							
						 
						
							2024-11-04 16:00:20 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								5ee6f97d6f 
								
							 
						 
						
							
							
								
								[NPU L0] Add layernorm weight as const / input setting ( #12322 )  
							
							 
							
							
							
						 
						
							2024-11-04 15:46:24 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Chu,Youcheng 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								a01371f90b 
								
							 
						 
						
							
							
								
								Doc: update harness readme ( #12324 )  
							
							 
							
							
							
						 
						
							2024-11-04 14:58:54 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Kai Huang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								c8679ad592 
								
							 
						 
						
							
							
								
								Qwen layernorm as input ( #12309 )  
							
							 
							
							... 
							
							
							
							* qwen layernorm as input
* add group size 
							
						 
						
							2024-11-04 09:51:15 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								20755e8077 
								
							 
						 
						
							
							
								
								Small fix to all-in-one benchmark scripts ( #12317 )  
							
							 
							
							
							
						 
						
							2024-11-01 19:16:25 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ch1y0q 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								48123af463 
								
							 
						 
						
							
							
								
								add npu_group_size for transformers_int4_npu_win in all-in-one benchmark api ( #12316 )  
							
							 
							
							... 
							
							
							
							* add `npu_group_size` for `transformers_int4_npu_win`
small bugfix
* update 
							
						 
						
							2024-11-01 18:44:27 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zijie Li 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								cd5e22cee5 
								
							 
						 
						
							
							
								
								Update Llava GPU Example ( #12311 )  
							
							 
							
							... 
							
							
							
							* update-llava-example
* add warmup
* small fix on llava example
* remove space& extra print prompt
* renew example
* small fix
---------
Co-authored-by: Jinhe Tang <jin.tang1337@gmail.com> 
							
						 
						
							2024-11-01 17:06:00 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								f53bb4ea0b 
								
							 
						 
						
							
							
								
								[NPU L0] Update 1st token generation ( #12314 )  
							
							 
							
							
							
						 
						
							2024-11-01 17:02:07 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								d409d9d0eb 
								
							 
						 
						
							
							
								
								[NPU L0] Update streaming mode of example ( #12312 )  
							
							 
							
							
							
						 
						
							2024-11-01 15:38:10 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jin, Qiao 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								126f95be80 
								
							 
						 
						
							
							
								
								Fix DPO finetuning example ( #12313 )  
							
							 
							
							
							
						 
						
							2024-11-01 13:29:44 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yina Chen 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								05c5d0267a 
								
							 
						 
						
							
							
								
								[NPU] Llama2 prefill use ov sdp ( #12310 )  
							
							 
							
							... 
							
							
							
							* prefill use sdp
* add param
* update
* fix style
* fix style
* meet comments 
							
						 
						
							2024-11-01 11:05:20 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								eda764909c 
								
							 
						 
						
							
							
								
								Add minicpm-2b in L0 pipeline ( #12308 )  
							
							 
							
							
							
						 
						
							2024-11-01 09:30:01 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								b9853f98b3 
								
							 
						 
						
							
							
								
								fix qwen2 attention_mask slice ( #12307 )  
							
							 
							
							
							
						 
						
							2024-10-31 17:00:05 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jin, Qiao 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								3df6195cb0 
								
							 
						 
						
							
							
								
								Fix application quickstart ( #12305 )  
							
							 
							
							... 
							
							
							
							* fix graphrag quickstart
* fix axolotl quickstart
* fix ragflow quickstart
* fix ragflow quickstart
* fix graphrag toc
* fix comments
* fix comment
* fix comments 
							
						 
						
							2024-10-31 16:57:35 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								4892df61c9 
								
							 
						 
						
							
							
								
								Add qwen2-1.5b in l0 pipeline example ( #12306 )  
							
							 
							
							
							
						 
						
							2024-10-31 16:44:25 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jinhe 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								30f668c206 
								
							 
						 
						
							
							
								
								updated transformers & accelerate requirements ( #12301 )  
							
							 
							
							
							
						 
						
							2024-10-31 15:59:40 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xin Qiu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								97a0f7fd35 
								
							 
						 
						
							
							
								
								Codegeex support ( #12303 )  
							
							 
							
							... 
							
							
							
							* new codegeex attn
* use kv cache
* add compress/quantize kv
* remove compress/quantize kv
* fix style check
* fix style
* fix codegeex 
							
						 
						
							2024-10-31 15:28:56 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								72605c7016 
								
							 
						 
						
							
							
								
								fix llama3.1/3.2 quantize kv check ( #12302 )  
							
							 
							
							
							
						 
						
							2024-10-31 11:55:07 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Kai Huang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								416c19165c 
								
							 
						 
						
							
							
								
								Add Qwen pipeline and example ( #12292 )  
							
							 
							
							... 
							
							
							
							* support qwen pipeline
* update error msg
* style
* meet review
* minor 
							
						 
						
							2024-10-31 11:25:25 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Rahul Nair 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								4cf1ccc43a 
								
							 
						 
						
							
							
								
								Update DPO EADME.md ( #12162 )  
							
							 
							
							... 
							
							
							
							bitsanbytes multi backend is now available and is required , otherwise would error out saying that no cuda is available 
							
						 
						
							2024-10-31 10:56:46 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Chu,Youcheng 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								29400e2e75 
								
							 
						 
						
							
							
								
								feat: change oneccl to internal ( #12296 )  
							
							 
							
							... 
							
							
							
							* feat: change oneccl
* fix: restore llama-70b
* fix: remove tab
* fix: remove extra blank
* small fix
* add comments
* fix: add a blank space 
							
						 
						
							2024-10-31 09:51:43 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zijie Li 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								6f22133efc 
								
							 
						 
						
							
							
								
								Update AWQ and GPTQ GPU example ( #12300 )  
							
							 
							
							
							
						 
						
							2024-10-31 09:35:31 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yina Chen 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								0763268e4c 
								
							 
						 
						
							
							
								
								[NPU]Qwen2 groupwise performance opt ( #12299 )  
							
							 
							
							... 
							
							
							
							* qwen2 gw performance opt
* remove debug 
							
						 
						
							2024-10-30 17:40:21 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								41b8064554 
								
							 
						 
						
							
							
								
								Support minicpm-1B in level0 pipeline ( #12297 )  
							
							 
							
							
							
						 
						
							2024-10-30 17:21:47 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jinhe 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								46d8300f6b 
								
							 
						 
						
							
							
								
								bugfix for qlora finetuning on GPU ( #12298 )  
							
							 
							
							... 
							
							
							
							* bugfix for qlora 100 step error
* indent fix
* annotation fix 
							
						 
						
							2024-10-30 16:54:10 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yina Chen 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								70037ad55f 
								
							 
						 
						
							
							
								
								Groupwise prefill optimization ( #12291 )  
							
							 
							
							... 
							
							
							
							* except lm_head
* remove
* support gw lm_head
* update
* fix
* remove run.bat
* fix style
* support llama3
* slice -> split
* remove debug
* fix style
* add dpu 
							
						 
						
							2024-10-30 14:59:45 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								540eaeb12c 
								
							 
						 
						
							
							
								
								refactor attention_softmax ( #12295 )  
							
							 
							
							
							
						 
						
							2024-10-30 13:20:50 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								2b2cb9c693 
								
							 
						 
						
							
							
								
								[NPU pipeline] Support save & load and update examples ( #12293 )  
							
							 
							
							... 
							
							
							
							* support save & load, update llama examples
* update baichuan2 example
* update readme 
							
						 
						
							2024-10-30 10:02:00 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								5a15098835 
								
							 
						 
						
							
							
								
								Initial support for quantized forward on CPU when quantization_group_size=0 ( #12282 )  
							
							 
							
							... 
							
							
							
							* Initial support for quantized forward on CPU when quantization_group_size=0
* Style fix
* Style fix
* Small fix
* Small fix 
							
						 
						
							2024-10-29 19:40:17 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								3feb58d1e4 
								
							 
						 
						
							
							
								
								Support baichuan2 for level0 pipeline ( #12289 )  
							
							 
							
							
							
						 
						
							2024-10-29 19:24:16 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zhao Changmin 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								546f455e8e 
								
							 
						 
						
							
							
								
								Patch sdpa check function in specific module attributes table ( #12285 )  
							
							 
							
							
							
						 
						
							2024-10-29 18:41:09 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								821b0033ed 
								
							 
						 
						
							
							
								
								[NPU L0] update layernorm & code refactor ( #12287 )  
							
							 
							
							... 
							
							
							
							* update layernorm & code refactor
* fix style
* add common utils
* change to Pool()
* remove print 
							
						 
						
							2024-10-29 15:01:45 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yina Chen 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								4467645088 
								
							 
						 
						
							
							
								
								[NPU] Support l0 Llama groupwise ( #12276 )  
							
							 
							
							... 
							
							
							
							* except lm_head
* remove
* support gw lm_head
* update
* fix
* remove run.bat
* fix style
* support llama3 
							
						 
						
							2024-10-28 17:06:55 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								3fe2ea3081 
								
							 
						 
						
							
							
								
								[NPU] Reuse prefill of acc lib for pipeline ( #12279 )  
							
							 
							
							... 
							
							
							
							* first commit
* update example
* fix style
* update example
* embedding as const
* fix generate
* code  refactor
* meet code review
* fix style
* change max_output_len to max_context_len
* fix all-in-one
* fix example
* add check for new tokens 
							
						 
						
							2024-10-28 16:05:49 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								ec362e6133 
								
							 
						 
						
							
							
								
								Add llama3 level0 example ( #12275 )  
							
							 
							
							
							
						 
						
							2024-10-28 09:24:51 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									SONG Ge 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								08cb065370 
								
							 
						 
						
							
							
								
								hot-fix redundant import funasr ( #12277 )  
							
							 
							
							
							
						 
						
							2024-10-25 19:40:39 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									SONG Ge 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								a0c6432899 
								
							 
						 
						
							
							
								
								[NPU] Add support for loading a FunASR model ( #12073 )  
							
							 
							
							... 
							
							
							
							* add support for loading funasr model
* add initial support for paraformer-encoder
* add npu ops impl
* add encoder-decoder npu pipeline
* move paraformer encoders prefix 30 layers  to npu and keep the rest layers on cpu 
							
						 
						
							2024-10-25 17:22:01 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								854398f6e0 
								
							 
						 
						
							
							
								
								update example to reduce peak memory usage ( #12274 )  
							
							 
							
							
							
						 
						
							2024-10-25 17:09:26 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								e713296090 
								
							 
						 
						
							
							
								
								Update all-in-one benchmark ( #12272 )  
							
							 
							
							... 
							
							
							
							* Update all-in-one benchmark
* Small fix
* Small fix
* Small fix 
							
						 
						
							2024-10-25 16:52:59 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								43b25a2fe7 
								
							 
						 
						
							
							
								
								Fix llama 3.2 vision on LNL ( #12264 )  
							
							 
							
							... 
							
							
							
							* Fix llama 3.2 vision on LNL
* Small fix 
							
						 
						
							2024-10-25 16:23:31 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								93895b2ac2 
								
							 
						 
						
							
							
								
								Openvino all in one benchmark small fix ( #12269 )  
							
							 
							
							... 
							
							
							
							* Small update for all-in-one benchmark readme to support OpenVINO tests
* Small fix 
							
						 
						
							2024-10-25 14:13:52 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zijie Li 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								f7f62a3fef 
								
							 
						 
						
							
							
								
								Add OpenVINO performance tests to all-in-one benchmark ( #12238 )  
							
							 
							
							... 
							
							
							
							* add-openvino-to-all-in-one
* update on openvino API
* Update save_openvino.py
* Update save_openvino.py
* Update save_openvino.py
* update on run.py and save_openvino
* update references
* Create openvino-requirements.txt
* fix on comments
* Small updates
* Small fix
* Fix
---------
Co-authored-by: Yuwen Hu <yuwen.hu@intel.com> 
							
						 
						
							2024-10-25 13:53:53 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								ae57e23e4f 
								
							 
						 
						
							
							
								
								fix incompatibility between llama GW & llama pipeline ( #12267 )  
							
							 
							
							... 
							
							
							
							* fix
* fix 
							
						 
						
							2024-10-25 10:31:44 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yina Chen 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								b5e663854b 
								
							 
						 
						
							
							
								
								[NPU] Support llama groupwise ( #12260 )  
							
							 
							
							... 
							
							
							
							* support llama gw
* support llama gw lm_head
* fix style
* remove unused code 
							
						 
						
							2024-10-24 18:06:45 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xin Qiu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								39c9d1de52 
								
							 
						 
						
							
							
								
								fix code geex ( #12261 )  
							
							 
							
							
							
						 
						
							2024-10-24 14:34:01 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								f3a2b20e6b 
								
							 
						 
						
							
							
								
								Optimize gpt2 ( #12259 )  
							
							 
							
							
							
						 
						
							2024-10-24 13:44:24 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								821fd96367 
								
							 
						 
						
							
							
								
								Initial integrate our L0 Llama impl into ipex-llm ( #12255 )  
							
							 
							
							... 
							
							
							
							* temp save
* initial support
* fix
* simplify code
* fix style
* fix example
* make default value of pipeline as False 
							
						 
						
							2024-10-24 09:49:27 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								cacc891962 
								
							 
						 
						
							
							
								
								Fix PR validation ( #12253 )  
							
							 
							
							
							
						 
						
							2024-10-23 18:10:47 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								b685cf4349 
								
							 
						 
						
							
							
								
								Fix npu group size setting of optimize_model=False ( #12256 )  
							
							 
							
							
							
						 
						
							2024-10-23 17:53:54 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								567b77a76b 
								
							 
						 
						
							
							
								
								Support IR and blob format for llama level0 pipeline ( #12251 )  
							
							 
							
							
							
						 
						
							2024-10-23 16:02:35 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								578aef245d 
								
							 
						 
						
							
							
								
								Fix models auto choose SdpaAttention with ipex 2.3 ( #12252 )  
							
							 
							
							
							
						 
						
							2024-10-23 15:33:45 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								88dc120a4c 
								
							 
						 
						
							
							
								
								fix fp16 linear ( #12250 )  
							
							 
							
							
							
						 
						
							2024-10-23 14:35:19 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yina Chen 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								e8cf7f32f5 
								
							 
						 
						
							
							
								
								npu gw small fix ( #12249 )  
							
							 
							
							
							
						 
						
							2024-10-23 14:26:01 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Shaojun Liu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								aae2490cb8 
								
							 
						 
						
							
							
								
								fix UT ( #12247 )  
							
							 
							
							... 
							
							
							
							* fix ut
* Update test_transformers_api_attention.py
* Update test_transformers_api_mlp.py 
							
						 
						
							2024-10-23 14:13:06 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yina Chen 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								e37f951cce 
								
							 
						 
						
							
							
								
								[NPU] Groupwise ( #12241 )  
							
							 
							
							... 
							
							
							
							* dq divide
* fix
* support attn divide
* update qwen2 7b
* divide down_proj & other linear
* use concat & reduce sum
* support scale after
* support qwen2
* w/ mm
* update reshape
* spda
* split
* split 2+
* update
* lm head-> 28
* no scale
* update
* update
* update
* fix style
* fix style
* to split linear
* update
* update code
* address comments
* fix style & remove redundant code & revert benchmark scripts
* fix style & remove code
* update save & load
---------
Co-authored-by: Yang Wang <yang3.wang@intel.com> 
							
						 
						
							2024-10-23 14:10:58 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jin, Qiao 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								8fa98e2742 
								
							 
						 
						
							
							
								
								Remove Qwen2-7b from NPU example for "Run Optimized Models (Experimental)" ( #12245 )  
							
							 
							
							... 
							
							
							
							* Remove qwen2-7b from npu example readme
* fix 
							
						 
						
							2024-10-22 17:07:51 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yina Chen 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								ec465fbcd7 
								
							 
						 
						
							
							
								
								Add lookup generate in load_low_bit ( #12243 )  
							
							 
							
							... 
							
							
							
							* add lookup generate in load_low_bit
* update comment 
							
						 
						
							2024-10-22 15:51:52 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								b3df47486d 
								
							 
						 
						
							
							
								
								Fix Gemma 2 on LNL ( #12240 )  
							
							 
							
							... 
							
							
							
							* Fix gemma 2 on LNL
* Python style fix 
							
						 
						
							2024-10-21 18:25:53 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								5935b25622 
								
							 
						 
						
							
							
								
								Further update windows gpu perf test regarding results integrity check ( #12232 )  
							
							 
							
							
							
						 
						
							2024-10-18 18:15:13 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								b88c1df324 
								
							 
						 
						
							
							
								
								Add Llama 3.1 & 3.2 to Arc Performance test ( #12225 )  
							
							 
							
							... 
							
							
							
							* Add llama3.1 and llama3.2 in arc perf (#12202 )
* Add llama3.1 and llama3.2 in arc perf
* Uninstall trl after arc test on transformers>=4.40
* Fix arc llama3 perf (#12212 )
* Fix pip uninstall
* Uninstall trl after test on transformers==4.43.1
* Fix llama3 arc perf (#12218 )
---------
Co-authored-by: Jin, Qiao <89779290+JinBridger@users.noreply.github.com> 
							
						 
						
							2024-10-17 21:12:45 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								9ea694484d 
								
							 
						 
						
							
							
								
								refactor ot remove old rope usage ( #12224 )  
							
							 
							
							
							
						 
						
							2024-10-17 17:06:09 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								324bcb057e 
								
							 
						 
						
							
							
								
								refactor to reduce old rope usage ( #12219 )  
							
							 
							
							
							
						 
						
							2024-10-17 14:45:09 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jiao Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								667f0db466 
								
							 
						 
						
							
							
								
								Update Eagle example to Eagle2+ipex-llm integration ( #11717 )  
							
							 
							
							... 
							
							
							
							* update to e2 example
* update
* update 
							
						 
						
							2024-10-16 23:16:14 -07:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								a4a758656a 
								
							 
						 
						
							
							
								
								refactor gemma to reduce old fuse rope usage ( #12215 )  
							
							 
							
							
							
						 
						
							2024-10-16 17:40:28 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								9104a168f6 
								
							 
						 
						
							
							
								
								refactor phi-2 to reduce old fuse rope usage ( #12214 )  
							
							 
							
							
							
						 
						
							2024-10-16 17:08:14 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								bb247e991b 
								
							 
						 
						
							
							
								
								refactor merge_qkv and attention_softmax ( #12213 )  
							
							 
							
							
							
						 
						
							2024-10-16 15:58:14 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								e279148aa0 
								
							 
						 
						
							
							
								
								optimize llama3.2 vision again ( #12211 )  
							
							 
							
							
							
						 
						
							2024-10-16 14:29:48 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Chu,Youcheng 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								f17cc4fdee 
								
							 
						 
						
							
							
								
								feat: add llama3.2-11b-vision in all in one ( #12207 )  
							
							 
							
							... 
							
							
							
							* feat: add llama3.2-11b-vision in all in one
* fix: change model
* fix: change name
* fix: add a space
* fix: switch import 
							
						 
						
							2024-10-16 10:32:11 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								c9ac39fc1e 
								
							 
						 
						
							
							
								
								Add Llama 3.2 to iGPU performance test (transformers 4.45) ( #12209 )  
							
							 
							
							... 
							
							
							
							* Add Llama 3.2 to iGPU Perf (#12200 )
* Add Llama 3.2 to iGPU Perf
* Downgrade accelerate after step
* Temporarily disable model for test
* Temporarily change ERRORLEVEL check (#12201 )
* Restore llama3.2 perf (#12206 )
* Revert "Temporarily change ERRORLEVEL check"
This reverts commit 909dbbc930ab4283737161a55bb32006e6ca1991.
* Revert "Temporarily disable model for test"
This reverts commit 95322dc3c6429aa836f21bda0b5ba8d9b48592f8.
---------
Co-authored-by: Jin, Qiao <89779290+JinBridger@users.noreply.github.com> 
							
						 
						
							2024-10-15 17:44:46 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								f6611f9d3a 
								
							 
						 
						
							
							
								
								optimize llama3.2 vison attention again ( #12204 )  
							
							 
							
							
							
						 
						
							2024-10-15 16:08:20 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								9b81236a2e 
								
							 
						 
						
							
							
								
								optimzie qwen2-vl vision ( #12203 )  
							
							 
							
							
							
						 
						
							2024-10-15 15:54:25 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								d5344587ab 
								
							 
						 
						
							
							
								
								optimize internvl2 vision model's attention ( #12198 )  
							
							 
							
							
							
						 
						
							2024-10-15 10:51:00 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								f8d1adc573 
								
							 
						 
						
							
							
								
								Fix Llama 3.2 & 3.1 on LNL ( #12196 )  
							
							 
							
							
							
						 
						
							2024-10-14 17:39:20 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								516b578104 
								
							 
						 
						
							
							
								
								Support cpp release for ARL on Windows ( #12189 )  
							
							 
							
							... 
							
							
							
							* Support cpp Windows release for ARL
* Temp commit for test
* Remove temp commit 
							
						 
						
							2024-10-14 17:20:31 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zijie Li 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								7d80db710e 
								
							 
						 
						
							
							
								
								Add benchmark_util for transformers >= 4.44.0 ( #12171 )  
							
							 
							
							... 
							
							
							
							* Create benchmark_util_4_45.py
* Update __init__.py
* Update lint-python
* Update benchmark_util_4_45.py
* Update benchmark_util_4_45.py
* Create benchmark_util_4_44.py 
							
						 
						
							2024-10-14 15:40:12 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jin, Qiao 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								8e35800abe 
								
							 
						 
						
							
							
								
								Add llama 3.1 in igpu perf ( #12194 )  
							
							 
							
							
							
						 
						
							2024-10-14 15:14:34 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								ddcdf47539 
								
							 
						 
						
							
							
								
								Support Windows ARL release ( #12183 )  
							
							 
							
							... 
							
							
							
							* Support release for ARL
* Small fix
* Small fix to doc
* Temp for test
* Remove temp commit for test 
							
						 
						
							2024-10-11 18:30:52 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jinhe 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								f983f1a8f4 
								
							 
						 
						
							
							
								
								Add Qwen2-VL gpu example ( #12135 )  
							
							 
							
							... 
							
							
							
							* qwen2-vl readme
* add qwen2-vl example
* fix
* fix
* fix
* add link
* Update regarding modules_to_not_convert and readme
* Further fix
* Small fix
---------
Co-authored-by: Yuwen Hu <yuwen.hu@intel.com> 
							
						 
						
							2024-10-11 18:25:23 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								310f18c8af 
								
							 
						 
						
							
							
								
								update NPU pipeline generate ( #12182 )  
							
							 
							
							... 
							
							
							
							* update
* fix style 
							
						 
						
							2024-10-11 17:39:20 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Shaojun Liu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								724b2ae66d 
								
							 
						 
						
							
							
								
								add npu-level0 pipeline.dll to ipex-llm ( #12181 )  
							
							 
							
							... 
							
							
							
							* add npu-level0 pipeline.dll to ipex-llm
* test
* update runner label
* fix
* update
* fix
* fix 
							
						 
						
							2024-10-11 16:05:20 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								4d93bb81fe 
								
							 
						 
						
							
							
								
								Initial support of NPU level0 Model ( #12177 )  
							
							 
							
							... 
							
							
							
							* first commit to support load dll and init llm pipeline
* add init generate
* fix style
* small updates
* fix style and check tokens number 
							
						 
						
							2024-10-11 09:45:53 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								890662610b 
								
							 
						 
						
							
							
								
								Fix auto importer for LNL release ( #12175 )  
							
							 
							
							
							
						 
						
							2024-10-10 15:17:43 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								535bee5381 
								
							 
						 
						
							
							
								
								fix qwen2 vl again ( #12174 )  
							
							 
							
							
							
						 
						
							2024-10-10 13:50:01 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								aef1f671bd 
								
							 
						 
						
							
							
								
								Support LNL Windows release ( #12169 )  
							
							 
							
							... 
							
							
							
							* Release for LNL on Windows
* Temp commit for release test
* Change option name
* Remove temp commit and change option name
* temp commit for test again
* Remove temp commit 
							
						 
						
							2024-10-09 17:41:10 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								78d253165d 
								
							 
						 
						
							
							
								
								optimize qwen2 vl perf again ( #12167 )  
							
							 
							
							
							
						 
						
							2024-10-09 16:43:48 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zijie Li 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								3d044dbf53 
								
							 
						 
						
							
							
								
								add llama3.2-vision Pytorch example ( #12165 )  
							
							 
							
							
							
						 
						
							2024-10-09 09:20:42 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								644af2a76e 
								
							 
						 
						
							
							
								
								add basic llama 3.2 vision support ( #12163 )  
							
							 
							
							
							
						 
						
							2024-10-08 10:46:48 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ch1y0q 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								17c23cd759 
								
							 
						 
						
							
							
								
								add llama3.2 GPU example ( #12137 )  
							
							 
							
							... 
							
							
							
							* add llama3.2 GPU example
* change prompt format reference url
* update
* add Meta-Llama-3.2-1B-Instruct sample output
* update wording 
							
						 
						
							2024-09-29 14:41:54 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								f71b38a994 
								
							 
						 
						
							
							
								
								Update MiniCPM_V_26 GPU example with save & load ( #12127 )  
							
							 
							
							
							
						 
						
							2024-09-26 17:40:22 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								669ff1a97b 
								
							 
						 
						
							
							
								
								fix sd1.5 ( #12129 )  
							
							 
							
							
							
						 
						
							2024-09-26 17:15:16 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								a266528719 
								
							 
						 
						
							
							
								
								optimize llama 3.2 rope ( #12128 )  
							
							 
							
							
							
						 
						
							2024-09-26 16:08:10 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								584c3489e7 
								
							 
						 
						
							
							
								
								add basic support for llama3.2 ( #12125 )  
							
							 
							
							
							
						 
						
							2024-09-26 15:46:19 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								66f419f8b7 
								
							 
						 
						
							
							
								
								fix qwen2 vl ( #12126 )  
							
							 
							
							
							
						 
						
							2024-09-26 15:44:02 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ch1y0q 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								2ea13d502f 
								
							 
						 
						
							
							
								
								Add minicpm3 gpu example ( #12114 )  
							
							 
							
							... 
							
							
							
							* add minicpm3 gpu example
* update GPU example
* update
---------
Co-authored-by: Huang, Xinshengzi <xinshengzi.huang@intel.com> 
							
						 
						
							2024-09-26 13:51:37 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								77af9bc5fa 
								
							 
						 
						
							
							
								
								support passing None to low_bit in optimize_model ( #12121 )  
							
							 
							
							
							
						 
						
							2024-09-26 11:09:35 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								47e0b83cbf 
								
							 
						 
						
							
							
								
								optimize sd 1.5 ( #12119 )  
							
							 
							
							
							
						 
						
							2024-09-25 15:45:13 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jin, Qiao 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								2bedb17be7 
								
							 
						 
						
							
							
								
								Add Qwen2.5 NPU Example ( #12110 )  
							
							 
							
							... 
							
							
							
							* Add Qwen2.5 NPU Example
* fix
* Merge qwen2.py and qwen2.5.py into qwen.py
* Fix description 
							
						 
						
							2024-09-25 15:20:03 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								5d63aef60b 
								
							 
						 
						
							
							
								
								optimize qwen2 vl again ( #12109 )  
							
							 
							
							
							
						 
						
							2024-09-23 13:22:01 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								03bd01c99c 
								
							 
						 
						
							
							
								
								optimize npu qwen2 ( #12107 )  
							
							 
							
							
							
						 
						
							2024-09-20 19:46:16 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jinhe 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								02399021d6 
								
							 
						 
						
							
							
								
								add npu load_low_bit api in all-in-one benchmark ( #12103 )  
							
							 
							
							
							
						 
						
							2024-09-20 17:56:08 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								9239fd4f12 
								
							 
						 
						
							
							
								
								add basic support and optimization for qwen2-vl ( #12104 )  
							
							 
							
							
							
						 
						
							2024-09-20 17:23:06 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								828fa01ad3 
								
							 
						 
						
							
							
								
								[NPU] Add mixed_precision for Qwen2 7B ( #12098 )  
							
							 
							
							... 
							
							
							
							* Add mix_precision argument to control whether use INT8 lm_head for Qwen2-7B-Instruct
* Small fix
* Fixed on load low bit with mixed precision
* Small fix
* Update example accordingly
* Update for default prompt
* Update base on comments
* Final fix 
							
						 
						
							2024-09-20 16:36:21 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ch1y0q 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								2269768e71 
								
							 
						 
						
							
							
								
								add internvl2 example ( #12102 )  
							
							 
							
							... 
							
							
							
							* add internvl2 example
* add to README.md
* update
* add link to zh-CN readme 
							
						 
						
							2024-09-20 16:31:54 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								09b8c80d9d 
								
							 
						 
						
							
							
								
								update code for NPU qwen2 ( #12094 )  
							
							 
							
							... 
							
							
							
							* update code
* fix 
							
						 
						
							2024-09-20 15:58:32 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jin, Qiao 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								db7500bfd4 
								
							 
						 
						
							
							
								
								Add Qwen2.5 GPU example ( #12101 )  
							
							 
							
							... 
							
							
							
							* Add Qwen2.5 GPU example
* fix end line
* fix description 
							
						 
						
							2024-09-20 15:55:57 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								54b973c744 
								
							 
						 
						
							
							
								
								fix ipex_llm import in transformers 4.45 ( #12099 )  
							
							 
							
							
							
						 
						
							2024-09-20 15:24:59 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ch1y0q 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								9650bf616a 
								
							 
						 
						
							
							
								
								add transpose_value_cache for NPU benchmark ( #12092 )  
							
							 
							
							... 
							
							
							
							* add `transpose_value_cache`
* update
* update 
							
						 
						
							2024-09-19 18:45:05 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								f7fb3c896c 
								
							 
						 
						
							
							
								
								Update lm_head optimization for Qwen2 7B ( #12090 )  
							
							 
							
							
							
						 
						
							2024-09-18 17:02:02 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xu, Shuo 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								ee33b93464 
								
							 
						 
						
							
							
								
								Longbench: NV code to ipex-llm ( #11662 )  
							
							 
							
							... 
							
							
							
							* add nv longbench
* LongBench: NV code to ipex-llm
* ammend
* add more models support
* ammend
* optimize LongBench's user experience
* ammend
* ammend
* fix typo
* ammend
* remove cuda related information & add a readme
* add license to python scripts & polish the readme
* ammend
* ammend
---------
Co-authored-by: cyita <yitastudy@gmail.com>
Co-authored-by: ATMxsp01 <shou.xu@intel.com>
Co-authored-by: leonardozcm <leonardo1997zcm@gmail.com> 
							
						 
						
							2024-09-18 15:55:14 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Wang, Jian4 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								40e463c66b 
								
							 
						 
						
							
							
								
								Enable vllm load gptq model ( #12083 )  
							
							 
							
							... 
							
							
							
							* enable vllm load gptq model
* update
* update
* update
* update style 
							
						 
						
							2024-09-18 14:41:00 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								081af41def 
								
							 
						 
						
							
							
								
								[NPU] Optimize Qwen2 lm_head to use INT4 ( #12072 )  
							
							 
							
							... 
							
							
							
							* temp save
* update
* fix
* fix
* Split lm_head into 7 parts & remove int8 for lm_head when sym_int4
* Simlify and add condition to code
* Small fix
* refactor some code
* fix style
* fix style
* fix style
* fix
* fix
* temp sav e
* refactor
* fix style
* further refactor
* simplify code
* meet code review
* fix style
---------
Co-authored-by: Yuwen Hu <yuwen.hu@intel.com> 
							
						 
						
							2024-09-14 15:26:46 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ch1y0q 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								b4b8c3e495 
								
							 
						 
						
							
							
								
								add lowbit_path for generate.py, fix npu_model ( #12077 )  
							
							 
							
							... 
							
							
							
							* add `lowbit_path` for `generate.py`, fix `npu_model`
* update `README.md` 
							
						 
						
							2024-09-13 17:28:05 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Wang, Jian4 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								d703e4f127 
								
							 
						 
						
							
							
								
								Enable vllm multimodal minicpm-v-2-6 ( #12074 )  
							
							 
							
							... 
							
							
							
							* enable minicpm-v-2-6
* add image_url readme 
							
						 
						
							2024-09-13 13:28:35 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								48d9092b5a 
								
							 
						 
						
							
							
								
								upgrade OneAPI version for cpp Windows ( #12063 )  
							
							 
							
							... 
							
							
							
							* update version
* update quickstart 
							
						 
						
							2024-09-12 11:12:12 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jinhe 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								e78e45ee01 
								
							 
						 
						
							
							
								
								update NPU readme: run conhost as administrator ( #12066 )  
							
							 
							
							
							
						 
						
							2024-09-11 17:54:04 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jinhe 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								4ca330da15 
								
							 
						 
						
							
							
								
								Fix NPU load error message and add minicpm npu lowbit feat ( #12064 )  
							
							 
							
							... 
							
							
							
							* fix npu_model raise sym_int4 error
* add load_lowbit
* remove print&perf 
							
						 
						
							2024-09-11 16:56:35 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jinhe 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								32e8362da7 
								
							 
						 
						
							
							
								
								added minicpm cpu examples ( #12027 )  
							
							 
							
							... 
							
							
							
							* minicpm cpu examples
* add link for minicpm-2 
							
						 
						
							2024-09-11 15:51:21 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								a0c73c26d8 
								
							 
						 
						
							
							
								
								clean NPU code ( #12060 )  
							
							 
							
							... 
							
							
							
							* clean code
* remove time.perf_counter() 
							
						 
						
							2024-09-11 15:10:35 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Wang, Jian4 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								c75f3dd874 
								
							 
						 
						
							
							
								
								vllm no padding glm4 to avoid nan error ( #12062 )  
							
							 
							
							... 
							
							
							
							* no padding glm4
* add codegeex 
							
						 
						
							2024-09-11 13:44:40 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Chu,Youcheng 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								649390c464 
								
							 
						 
						
							
							
								
								fix: textual and env variable adjustment ( #12038 )  
							
							 
							
							
							
						 
						
							2024-09-11 13:38:01 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Wang, Jian4 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								30a8680645 
								
							 
						 
						
							
							
								
								Update for vllm one card padding ( #12058 )  
							
							 
							
							
							
						 
						
							2024-09-11 10:52:55 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zijie Li 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								c5fdfde1bd 
								
							 
						 
						
							
							
								
								fix npu-model prompt ( #12057 )  
							
							 
							
							
							
						 
						
							2024-09-11 10:06:45 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								d8c044e79d 
								
							 
						 
						
							
							
								
								optimize minicpm3 kv cache ( #12052 )  
							
							 
							
							
							
						 
						
							2024-09-10 16:51:21 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Wang, Jian4 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								5d3ab16a80 
								
							 
						 
						
							
							
								
								Add vllm glm and baichuan padding ( #12053 )  
							
							 
							
							
							
						 
						
							2024-09-10 15:57:28 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Guancheng Fu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								69c8d36f16 
								
							 
						 
						
							
							
								
								Switching from vLLM v0.3.3 to vLLM 0.5.4 ( #12042 )  
							
							 
							
							... 
							
							
							
							* Enable single card sync engine
* enable ipex-llm optimizations for vllm
* enable optimizations for lm_head
* Fix chatglm multi-reference problem
* Remove duplicate layer
* LLM: Update vLLM to v0.5.4 (#11746 )
* Enable single card sync engine
* enable ipex-llm optimizations for vllm
* enable optimizations for lm_head
* Fix chatglm multi-reference problem
* update 0.5.4 api_server
* add dockerfile
* fix
* fix
* refine
* fix
---------
Co-authored-by: gc-fu <guancheng.fu@intel.com>
* Add vllm-0.5.4 Dockerfile (#11838 )
* Update BIGDL_LLM_SDP_IGNORE_MASK in start-vllm-service.sh (#11957 )
* Fix vLLM not convert issues (#11817 ) (#11918 )
* Fix not convert issues
* refine
Co-authored-by: Guancheng Fu <110874468+gc-fu@users.noreply.github.com>
* Fix glm4-9b-chat nan error on vllm 0.5.4 (#11969 )
* init
* update mlp forward
* fix minicpm error in vllm 0.5.4
* fix dependabot alerts (#12008 )
* Update 0.5.4 dockerfile (#12021 )
* Add vllm awq loading logic (#11987 )
* [ADD] Add vllm awq loading logic
* [FIX] fix the module.linear_method path
* [FIX] fix quant_config path error
* Enable Qwen padding mlp to 256 to support batch_forward (#12030 )
* Enable padding mlp
* padding to 256
* update style
* Install 27191 runtime in 0.5.4 docker image (#12040 )
* fix rebase error
* fix rebase error
* vLLM: format for 0.5.4 rebase (#12043 )
* format
* Update model_convert.py
* Fix serving docker related modifications (#12046 )
* Fix undesired modifications (#12048 )
* fix
* Refine offline_inference arguments
---------
Co-authored-by: Xiangyu Tian <109123695+xiangyuT@users.noreply.github.com>
Co-authored-by: Jun Wang <thoughts.times@gmail.com>
Co-authored-by: Wang, Jian4 <61138589+hzjane@users.noreply.github.com>
Co-authored-by: liu-shaojun <johnssalyn@outlook.com>
Co-authored-by: Shaojun Liu <61072813+liu-shaojun@users.noreply.github.com> 
							
						 
						
							2024-09-10 15:37:43 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ch1y0q 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								73a4360f3f 
								
							 
						 
						
							
							
								
								update lowbit path for baichuan2, qwen2, generate.py ( #12051 )  
							
							 
							
							... 
							
							
							
							* update lowbit path for baichuan2, qwen2, `generate.py`
* update readme 
							
						 
						
							2024-09-10 15:35:24 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								dc4af02b2a 
								
							 
						 
						
							
							
								
								Fix qwen2 1.5B NPU load error ( #12049 )  
							
							 
							
							
							
						 
						
							2024-09-10 14:41:18 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								abc370728c 
								
							 
						 
						
							
							
								
								optimize minicpm3 again ( #12047 )  
							
							 
							
							
							
						 
						
							2024-09-10 14:19:57 +08:00