Ruonan Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								e90a9ad196 
								
							 
						 
						
							
							
								
								[NPU] Support non-const parameter for decoder layers when keep_ir=True ( #12789 )  
							
							 
							
							... 
							
							
							
							* support layernorm=False for decoder layers
* renbame to meet review
* fix style
* rename to const_parameter
* fix rebase error
* fix rebase error 
							
						 
						
							2025-02-08 09:58:42 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								8aea5319bb 
								
							 
						 
						
							
							
								
								update more lora example ( #12785 )  
							
							 
							
							
							
						 
						
							2025-02-08 09:46:48 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								fd28cf1672 
								
							 
						 
						
							
							
								
								Upgrade ipex-llm[cpp] to oneAPI 2025.0 on Windows ( #12778 )  
							
							 
							
							... 
							
							
							
							* Upgrade ipex-llm[cpp] to oneAPI 2025.0
* Fit oneapi pypi dependency on Windows for now 
							
						 
						
							2025-02-07 18:29:34 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								ca1d7b7c2c 
								
							 
						 
						
							
							
								
								[NPU] Support qwen models with cos_sin_input=True ( #12788 )  
							
							 
							
							
							
						 
						
							2025-02-07 16:41:13 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								6ff7faa781 
								
							 
						 
						
							
							
								
								[NPU] Update deepseek support in python examples and quickstart ( #12786 )  
							
							 
							
							
							
						 
						
							2025-02-07 11:25:16 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								b4f2be2b09 
								
							 
						 
						
							
							
								
								[NPU] Update C++ example to add DeepSeek-R1 ( #12787 )  
							
							 
							
							
							
						 
						
							2025-02-07 11:23:34 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								d0d9c9d636 
								
							 
						 
						
							
							
								
								remove load_in_8bit usage as it is not supported a long time ago ( #12779 )  
							
							 
							
							
							
						 
						
							2025-02-07 11:21:29 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xiangyu Tian 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								9e9b6c9f2b 
								
							 
						 
						
							
							
								
								Fix cpu serving docker image ( #12783 )  
							
							 
							
							
							
						 
						
							2025-02-07 11:12:42 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								b4c9e23f73 
								
							 
						 
						
							
							
								
								fix galore and peft finetune example ( #12776 )  
							
							 
							
							
							
						 
						
							2025-02-06 16:36:13 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								c0d6b282b8 
								
							 
						 
						
							
							
								
								fix lisa finetune example ( #12775 )  
							
							 
							
							
							
						 
						
							2025-02-06 16:35:43 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								2e5f2e5dda 
								
							 
						 
						
							
							
								
								fix dpo finetune ( #12774 )  
							
							 
							
							
							
						 
						
							2025-02-06 16:35:21 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								9697197f3e 
								
							 
						 
						
							
							
								
								fix qlora finetune example ( #12769 )  
							
							 
							
							
							
						 
						
							2025-02-06 11:18:28 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								094a25b740 
								
							 
						 
						
							
							
								
								[NPU] Expose parameter to control blob / IR save logic ( #12767 )  
							
							 
							
							... 
							
							
							
							* update api
* fix convert.py
* fix style
* remove unnecessary bin file
* fix style 
							
						 
						
							2025-02-06 10:07:45 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jason Dai 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								9c0daf6396 
								
							 
						 
						
							
							
								
								Fix readme links ( #12771 )  
							
							 
							
							
							
						 
						
							2025-02-05 19:24:25 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jason Dai 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								a1e7bfc638 
								
							 
						 
						
							
							
								
								Update Readme ( #12770 )  
							
							 
							
							
							
						 
						
							2025-02-05 19:19:57 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								0237ffb302 
								
							 
						 
						
							
							
								
								refactor xpu linear forward ( #12768 )  
							
							 
							
							
							
						 
						
							2025-02-05 17:40:38 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Danciu Georgian 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								413d6c2b66 
								
							 
						 
						
							
							
								
								Update check.py removing a twice defined function ( #12760 )  
							
							 
							
							... 
							
							
							
							Remove duplicate function 
							
						 
						
							2025-02-05 11:37:59 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								184adb2653 
								
							 
						 
						
							
							
								
								Small fix to MiniCPM-o-2_6 GPU example ( #12766 )  
							
							 
							
							
							
						 
						
							2025-02-05 11:32:26 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Shaojun Liu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								ee809e71df 
								
							 
						 
						
							
							
								
								add troubleshooting section ( #12755 )  
							
							 
							
							
							
						 
						
							2025-01-26 11:03:58 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Shaojun Liu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								5fb87d7486 
								
							 
						 
						
							
							
								
								remove ${HF_TOKEN} ( #12742 )  
							
							 
							
							
							
						 
						
							2025-01-26 10:31:42 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xiangyu Tian 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								f924880694 
								
							 
						 
						
							
							
								
								vLLM: Fix vLLM-CPU docker image ( #12741 )  
							
							 
							
							
							
						 
						
							2025-01-24 10:00:29 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								69f13c78b8 
								
							 
						 
						
							
							
								
								[NPU] Update layernorm node on MTL/ARL ( #12738 )  
							
							 
							
							... 
							
							
							
							* Update layernorm node on MTL/ARL
* Fix on style 
							
						 
						
							2025-01-23 17:25:19 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								d11f257ee7 
								
							 
						 
						
							
							
								
								Add GPU example for MiniCPM-o-2_6 ( #12735 )  
							
							 
							
							... 
							
							
							
							* Add init example for omni mode
* Small fix
* Small fix
* Add chat example
* Remove lagecy link
* Further update link
* Add readme
* Small fix
* Update main readme link
* Update based on comments
* Small fix
* Small fix
* Small fix 
							
						 
						
							2025-01-23 16:10:19 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								dcca522618 
								
							 
						 
						
							
							
								
								Remove sdpa available patch ( #12734 )  
							
							 
							
							
							
						 
						
							2025-01-22 17:22:28 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xiangyu Tian 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								c9b6c94a59 
								
							 
						 
						
							
							
								
								vLLM: Update vLLM-cpu to v0.6.6-post1 ( #12728 )  
							
							 
							
							... 
							
							
							
							Update vLLM-cpu to v0.6.6-post1 
							
						 
						
							2025-01-22 15:03:01 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								78cca0a68c 
								
							 
						 
						
							
							
								
								[NPU] update llm-npu-cli example ( #12729 )  
							
							 
							
							... 
							
							
							
							* update cli example
* add license
* rename
* update readme sample output 
							
						 
						
							2025-01-22 09:59:27 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jason Dai 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								7e29edcc4b 
								
							 
						 
						
							
							
								
								Update Readme ( #12730 )  
							
							 
							
							
							
						 
						
							2025-01-22 08:43:32 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								6789e5d92f 
								
							 
						 
						
							
							
								
								small fix ( #12727 )  
							
							 
							
							
							
						 
						
							2025-01-21 17:27:18 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jason Dai 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								412bfd6644 
								
							 
						 
						
							
							
								
								Update readme ( #12724 )  
							
							 
							
							
							
						 
						
							2025-01-21 10:59:14 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Wang, Jian4 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								716d4fe563 
								
							 
						 
						
							
							
								
								Add  vllm 0.6.2 vision offline example ( #12721 )  
							
							 
							
							... 
							
							
							
							* add vision offline example
* add to docker 
							
						 
						
							2025-01-21 09:58:01 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								085974e307 
								
							 
						 
						
							
							
								
								fix nf4 to cpu ( #12722 )  
							
							 
							
							
							
						 
						
							2025-01-21 09:23:22 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								9aa4be8ced 
								
							 
						 
						
							
							
								
								Update runtime configuration on MTL ( #12720 )  
							
							 
							
							
							
						 
						
							2025-01-20 11:06:37 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								bda87c21eb 
								
							 
						 
						
							
							
								
								add support and optimization for minicpmo audio part ( #12716 )  
							
							 
							
							
							
						 
						
							2025-01-16 16:39:00 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Shaojun Liu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								53aae24616 
								
							 
						 
						
							
							
								
								Add note about enabling Resizable BAR in BIOS for GPU setup ( #12715 )  
							
							 
							
							
							
						 
						
							2025-01-16 16:22:35 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								534e0e6774 
								
							 
						 
						
							
							
								
								Update dependency for PyTorch 2.6 RC support for woq int4 ( #12714 )  
							
							 
							
							
							
						 
						
							2025-01-16 15:51:57 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zhao Changmin 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								54d6328b3c 
								
							 
						 
						
							
							
								
								woq int4 fwd ( #12711 )  
							
							 
							
							
							
						 
						
							2025-01-16 15:48:05 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								b62734748f 
								
							 
						 
						
							
							
								
								add support and optimization for minicpmo vision part ( #12713 )  
							
							 
							
							
							
						 
						
							2025-01-16 14:51:00 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								c52bdff76b 
								
							 
						 
						
							
							
								
								Update Deepseek coder GPU example ( #12712 )  
							
							 
							
							... 
							
							
							
							* Update Deepseek coder GPU example
* Fix based on comment 
							
						 
						
							2025-01-16 14:05:31 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								9d65dcd7ef 
								
							 
						 
						
							
							
								
								Fix deepseek coder with linear rope type support on GPU ( #12709 )  
							
							 
							
							... 
							
							
							
							* Fix deepseek coder with linear rope type
* Style fix
* Move to optimize_pre
* Small fix
* Small fix
* Small fix to not affect other cases
* Style fixes
* Update function name
* Small fix
* Small fix
* Small fix
* Fix for low transformers version first
* Style fix
* Small fix 
							
						 
						
							2025-01-15 21:12:34 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								36bf3d8e29 
								
							 
						 
						
							
							
								
								[NPU doc] Update ARL product in QuickStart ( #12708 )  
							
							 
							
							
							
						 
						
							2025-01-15 15:57:06 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Cengguang Zhang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								9930351112 
								
							 
						 
						
							
							
								
								LLM: add new qtype woq_int4 to support gemm int4 temporary. ( #12706 )  
							
							 
							
							... 
							
							
							
							This PR add temporary qtype woq_int4 to avoid affecting other qtype and models.
Co-authored-by: leonardozcm <leonardo1997zcm@gmail.com> 
							
						 
						
							2025-01-15 14:41:33 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								6d03d06ebb 
								
							 
						 
						
							
							
								
								Change runtime configurations for perf test on Windows ( #12705 )  
							
							 
							
							... 
							
							
							
							* Change runtime configurations for perf test on Windows
* Small fix 
							
						 
						
							2025-01-14 17:54:57 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xu, Shuo 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								350fae285d 
								
							 
						 
						
							
							
								
								Add Qwen2-VL HF GPU example with ModelScope Support ( #12606 )  
							
							 
							
							... 
							
							
							
							* Add qwen2-vl example
* complete generate.py & readme
* improve lint style
* update 1-6
* update main readme
* Format and other small fixes
---------
Co-authored-by: Yuwen Hu <yuwen.hu@intel.com> 
							
						 
						
							2025-01-13 15:42:04 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								a1da7908b9 
								
							 
						 
						
							
							
								
								Fix name device is not found bug ( #12703 )  
							
							 
							
							
							
						 
						
							2025-01-13 10:11:02 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									SONG Ge 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								e2d58f733e 
								
							 
						 
						
							
							
								
								Update ollama v0.5.1 document ( #12699 )  
							
							 
							
							... 
							
							
							
							* Update ollama document version and known issue 
							
						 
						
							2025-01-10 18:04:49 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								db9db51e2c 
								
							 
						 
						
							
							
								
								fix lnl perf ( #12700 )  
							
							 
							
							
							
						 
						
							2025-01-10 18:00:58 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								4bf93c66e8 
								
							 
						 
						
							
							
								
								Support install from source for PyTorch 2.6 RC in UT ( #12697 )  
							
							 
							
							... 
							
							
							
							* Support install from source for PyTorch 2.6 RC in UT
* Remove expecttest 
							
						 
						
							2025-01-10 16:44:18 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								da8bcb7db1 
								
							 
						 
						
							
							
								
								[NPU ] fix load logic of glm-edge models ( #12698 )  
							
							 
							
							
							
						 
						
							2025-01-10 16:08:37 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									joan726 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								584c1c5373 
								
							 
						 
						
							
							
								
								Update B580 CN doc ( #12695 )  
							
							 
							
							
							
						 
						
							2025-01-10 11:20:47 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jason Dai 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								cbb8e2a2d5 
								
							 
						 
						
							
							
								
								Update documents ( #12693 )  
							
							 
							
							
							
						 
						
							2025-01-10 10:47:11 +08:00