Yuwen Hu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								7c0c77cce3 
								
							 
						 
						
							
							
								
								Tiny fixes ( #12936 )  
							
							 
							
							
							
						 
						
							2025-03-05 14:55:26 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								68a770745b 
								
							 
						 
						
							
							
								
								Add moonlight GPU example ( #12929 )  
							
							 
							
							... 
							
							
							
							* Add moonlight GPU example and update table
* Small fix
* Fix based on comments
* Small fix 
							
						 
						
							2025-03-05 11:31:14 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Shaojun Liu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								f81d89d908 
								
							 
						 
						
							
							
								
								Remove Unnecessary --privileged Flag While Keeping It for WSL Users ( #12920 )  
							
							 
							
							
							
						 
						
							2025-03-03 11:11:42 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								b6f33d5c4d 
								
							 
						 
						
							
							
								
								optimize moonlight again ( #12909 )  
							
							 
							
							
							
						 
						
							2025-03-03 09:21:15 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								443cb5d4e0 
								
							 
						 
						
							
							
								
								Update Janus-Pro GPU example ( #12906 )  
							
							 
							
							
							
						 
						
							2025-02-28 15:39:03 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								39e360fe9d 
								
							 
						 
						
							
							
								
								add grouped topk optimization for moonlight ( #12903 )  
							
							 
							
							
							
						 
						
							2025-02-28 13:25:56 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xin Qiu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								e946127613 
								
							 
						 
						
							
							
								
								glm 4v 1st sdp for vision ( #12904 )  
							
							 
							
							... 
							
							
							
							* glm4v 1st sdp
* update glm4v example
* meet code review
* fix style 
							
						 
						
							2025-02-28 13:23:27 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								be1f073866 
								
							 
						 
						
							
							
								
								add fuse moe optimization for moonlight ( #12898 )  
							
							 
							
							
							
						 
						
							2025-02-27 09:15:24 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								5faba06409 
								
							 
						 
						
							
							
								
								simple optimization for moonlight moe decoding forward ( #12891 )  
							
							 
							
							
							
						 
						
							2025-02-25 16:18:27 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								ab3fc66eb7 
								
							 
						 
						
							
							
								
								optimize attention part of moonlight-14B-A3B ( #12886 )  
							
							 
							
							
							
						 
						
							2025-02-25 09:38:13 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								3f6ecce508 
								
							 
						 
						
							
							
								
								support using xgrammar to get json output ( #12870 )  
							
							 
							
							
							
						 
						
							2025-02-24 14:10:58 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Guancheng Fu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								02ec313eab 
								
							 
						 
						
							
							
								
								Update README.md ( #12877 )  
							
							 
							
							
							
						 
						
							2025-02-24 09:59:17 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xu, Shuo 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								1e00bed001 
								
							 
						 
						
							
							
								
								Add GPU example for Janus-Pro ( #12869 )  
							
							 
							
							... 
							
							
							
							* Add example for Janus-Pro
* Update model link
* Fixes
* Fixes
---------
Co-authored-by: ATMxsp01 <shou.xu@intel.com>
Co-authored-by: Yuwen Hu <yuwen.hu@intel.com> 
							
						 
						
							2025-02-21 18:36:50 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Wang, Jian4 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								3ea5389a99 
								
							 
						 
						
							
							
								
								Fix vllm api_server v1/models error ( #12867 )  
							
							 
							
							
							
						 
						
							2025-02-21 11:08:29 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								8077850452 
								
							 
						 
						
							
							
								
								[NPU GGUF] Add simple example ( #12853 )  
							
							 
							
							
							
						 
						
							2025-02-21 09:58:00 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Wang, Jian4 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								348dc8056d 
								
							 
						 
						
							
							
								
								Fix vllm gptq awq error ( #12863 )  
							
							 
							
							... 
							
							
							
							* fix gptq awq error
* fix python style 
							
						 
						
							2025-02-20 16:27:23 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Guancheng Fu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								4eed0c7d99 
								
							 
						 
						
							
							
								
								initial implementation for low_bit_loader vLLM ( #12838 )  
							
							 
							
							... 
							
							
							
							* initial
* add logic for handling tensor parallel models
* fix
* Add some comments
* add doc
* fix done 
							
						 
						
							2025-02-19 19:45:34 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xiangyu Tian 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								b26409d53f 
								
							 
						 
						
							
							
								
								R1 Hybrid: Add Benchmark for DeepSeek R1 transformers example ( #12854 )  
							
							 
							
							... 
							
							
							
							* init
* fix
* update
* update
* fix
* fix 
							
						 
						
							2025-02-19 18:33:21 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								aee2db30f9 
								
							 
						 
						
							
							
								
								update sdp support ( #12847 )  
							
							 
							
							
							
						 
						
							2025-02-19 12:07:00 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xiangyu Tian 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								93c10be762 
								
							 
						 
						
							
							
								
								LLM: Support hybrid convert for DeepSeek V3/R1 ( #12834 )  
							
							 
							
							... 
							
							
							
							LLM: Support hybrid convert for DeepSeek V3/R1 
							
						 
						
							2025-02-19 11:31:19 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Wang, Jian4 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								e1809a6295 
								
							 
						 
						
							
							
								
								Update multimodal on vllm 0.6.6 ( #12816 )  
							
							 
							
							... 
							
							
							
							* add glm4v and minicpmv example
* fix 
							
						 
						
							2025-02-19 10:04:42 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xiangyu Tian 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								09150b6058 
								
							 
						 
						
							
							
								
								Initiate CPU-XPU Hybrid Inference for DeepSeek-R1 ( #12832 )  
							
							 
							
							... 
							
							
							
							Initiate CPU-XPU Hybrid Inference for DeepSeek-R1 with DeepseekV3Attention
and DeepseekV3MLP to XPU 
							
						 
						
							2025-02-18 13:34:14 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xiangyu Tian 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								09ed96082b 
								
							 
						 
						
							
							
								
								Add DeepSeek V3/R1 CPU example ( #12836 )  
							
							 
							
							... 
							
							
							
							Add DeepSeek V3/R1 CPU example for bf16 model 
							
						 
						
							2025-02-18 12:45:49 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								8418450300 
								
							 
						 
						
							
							
								
								optimize minicpm-o's tts part ( #12833 )  
							
							 
							
							
							
						 
						
							2025-02-17 14:53:37 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Wang, Jian4 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								1083fe5508 
								
							 
						 
						
							
							
								
								Reenable pp and lightweight-serving serving on 0.6.6 ( #12814 )  
							
							 
							
							... 
							
							
							
							* reenable pp ang lightweight serving on 066
* update readme
* updat
* update tag 
							
						 
						
							2025-02-13 10:16:00 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Guancheng Fu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								af693425f1 
								
							 
						 
						
							
							
								
								Upgrade to vLLM 0.6.6 ( #12796 )  
							
							 
							
							... 
							
							
							
							* init
* update engine init
* fix serving load_in_low_bit problem
* temp
* temp
* temp
* temp
* temp
* fix
* fixed
* done
* fix
* fix all arguments
* fix
* fix throughput script
* fix
* fix
* use official ipex-llm
* Fix readme
* fix
---------
Co-authored-by: hzjane <a1015616934@qq.com> 
							
						 
						
							2025-02-12 16:47:51 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								f8ab833f74 
								
							 
						 
						
							
							
								
								support and optimize janus pro ( #12813 )  
							
							 
							
							
							
						 
						
							2025-02-12 15:07:24 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								73cfe293fa 
								
							 
						 
						
							
							
								
								add basic support for Baichuan-M1-14B-Instruct ( #12808 )  
							
							 
							
							
							
						 
						
							2025-02-11 17:27:42 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xiangyu Tian 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								b70ad902b4 
								
							 
						 
						
							
							
								
								Fix ipex-llm CPU linear dtype not match ( #12805 )  
							
							 
							
							
							
						 
						
							2025-02-11 10:34:44 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yina Chen 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								eb2df5ed70 
								
							 
						 
						
							
							
								
								common.h -> npu/npu_common.h ( #12800 )  
							
							 
							
							
							
						 
						
							2025-02-10 14:38:22 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								e4ceb722b6 
								
							 
						 
						
							
							
								
								fix qwen2 vl ( #12798 )  
							
							 
							
							
							
						 
						
							2025-02-10 13:25:53 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								3fee838b14 
								
							 
						 
						
							
							
								
								[NPU] Fix of c++ convert example ( #12797 )  
							
							 
							
							
							
						 
						
							2025-02-10 11:17:58 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Kai Huang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								468d3f22fc 
								
							 
						 
						
							
							
								
								Rename NPU public example to llm-cli ( #12790 )  
							
							 
							
							... 
							
							
							
							* rename to llm-cli
* update readme 
							
						 
						
							2025-02-08 10:19:59 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								e90a9ad196 
								
							 
						 
						
							
							
								
								[NPU] Support non-const parameter for decoder layers when keep_ir=True ( #12789 )  
							
							 
							
							... 
							
							
							
							* support layernorm=False for decoder layers
* renbame to meet review
* fix style
* rename to const_parameter
* fix rebase error
* fix rebase error 
							
						 
						
							2025-02-08 09:58:42 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								8aea5319bb 
								
							 
						 
						
							
							
								
								update more lora example ( #12785 )  
							
							 
							
							
							
						 
						
							2025-02-08 09:46:48 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								fd28cf1672 
								
							 
						 
						
							
							
								
								Upgrade ipex-llm[cpp] to oneAPI 2025.0 on Windows ( #12778 )  
							
							 
							
							... 
							
							
							
							* Upgrade ipex-llm[cpp] to oneAPI 2025.0
* Fit oneapi pypi dependency on Windows for now 
							
						 
						
							2025-02-07 18:29:34 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								ca1d7b7c2c 
								
							 
						 
						
							
							
								
								[NPU] Support qwen models with cos_sin_input=True ( #12788 )  
							
							 
							
							
							
						 
						
							2025-02-07 16:41:13 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								6ff7faa781 
								
							 
						 
						
							
							
								
								[NPU] Update deepseek support in python examples and quickstart ( #12786 )  
							
							 
							
							
							
						 
						
							2025-02-07 11:25:16 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								b4f2be2b09 
								
							 
						 
						
							
							
								
								[NPU] Update C++ example to add DeepSeek-R1 ( #12787 )  
							
							 
							
							
							
						 
						
							2025-02-07 11:23:34 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								d0d9c9d636 
								
							 
						 
						
							
							
								
								remove load_in_8bit usage as it is not supported a long time ago ( #12779 )  
							
							 
							
							
							
						 
						
							2025-02-07 11:21:29 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								b4c9e23f73 
								
							 
						 
						
							
							
								
								fix galore and peft finetune example ( #12776 )  
							
							 
							
							
							
						 
						
							2025-02-06 16:36:13 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								c0d6b282b8 
								
							 
						 
						
							
							
								
								fix lisa finetune example ( #12775 )  
							
							 
							
							
							
						 
						
							2025-02-06 16:35:43 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								2e5f2e5dda 
								
							 
						 
						
							
							
								
								fix dpo finetune ( #12774 )  
							
							 
							
							
							
						 
						
							2025-02-06 16:35:21 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								9697197f3e 
								
							 
						 
						
							
							
								
								fix qlora finetune example ( #12769 )  
							
							 
							
							
							
						 
						
							2025-02-06 11:18:28 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								094a25b740 
								
							 
						 
						
							
							
								
								[NPU] Expose parameter to control blob / IR save logic ( #12767 )  
							
							 
							
							... 
							
							
							
							* update api
* fix convert.py
* fix style
* remove unnecessary bin file
* fix style 
							
						 
						
							2025-02-06 10:07:45 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								0237ffb302 
								
							 
						 
						
							
							
								
								refactor xpu linear forward ( #12768 )  
							
							 
							
							
							
						 
						
							2025-02-05 17:40:38 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Danciu Georgian 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								413d6c2b66 
								
							 
						 
						
							
							
								
								Update check.py removing a twice defined function ( #12760 )  
							
							 
							
							... 
							
							
							
							Remove duplicate function 
							
						 
						
							2025-02-05 11:37:59 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								184adb2653 
								
							 
						 
						
							
							
								
								Small fix to MiniCPM-o-2_6 GPU example ( #12766 )  
							
							 
							
							
							
						 
						
							2025-02-05 11:32:26 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Shaojun Liu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								5fb87d7486 
								
							 
						 
						
							
							
								
								remove ${HF_TOKEN} ( #12742 )  
							
							 
							
							
							
						 
						
							2025-01-26 10:31:42 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								69f13c78b8 
								
							 
						 
						
							
							
								
								[NPU] Update layernorm node on MTL/ARL ( #12738 )  
							
							 
							
							... 
							
							
							
							* Update layernorm node on MTL/ARL
* Fix on style 
							
						 
						
							2025-01-23 17:25:19 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								d11f257ee7 
								
							 
						 
						
							
							
								
								Add GPU example for MiniCPM-o-2_6 ( #12735 )  
							
							 
							
							... 
							
							
							
							* Add init example for omni mode
* Small fix
* Small fix
* Add chat example
* Remove lagecy link
* Further update link
* Add readme
* Small fix
* Update main readme link
* Update based on comments
* Small fix
* Small fix
* Small fix 
							
						 
						
							2025-01-23 16:10:19 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								dcca522618 
								
							 
						 
						
							
							
								
								Remove sdpa available patch ( #12734 )  
							
							 
							
							
							
						 
						
							2025-01-22 17:22:28 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xiangyu Tian 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								c9b6c94a59 
								
							 
						 
						
							
							
								
								vLLM: Update vLLM-cpu to v0.6.6-post1 ( #12728 )  
							
							 
							
							... 
							
							
							
							Update vLLM-cpu to v0.6.6-post1 
							
						 
						
							2025-01-22 15:03:01 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								78cca0a68c 
								
							 
						 
						
							
							
								
								[NPU] update llm-npu-cli example ( #12729 )  
							
							 
							
							... 
							
							
							
							* update cli example
* add license
* rename
* update readme sample output 
							
						 
						
							2025-01-22 09:59:27 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								6789e5d92f 
								
							 
						 
						
							
							
								
								small fix ( #12727 )  
							
							 
							
							
							
						 
						
							2025-01-21 17:27:18 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								085974e307 
								
							 
						 
						
							
							
								
								fix nf4 to cpu ( #12722 )  
							
							 
							
							
							
						 
						
							2025-01-21 09:23:22 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								9aa4be8ced 
								
							 
						 
						
							
							
								
								Update runtime configuration on MTL ( #12720 )  
							
							 
							
							
							
						 
						
							2025-01-20 11:06:37 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								bda87c21eb 
								
							 
						 
						
							
							
								
								add support and optimization for minicpmo audio part ( #12716 )  
							
							 
							
							
							
						 
						
							2025-01-16 16:39:00 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								534e0e6774 
								
							 
						 
						
							
							
								
								Update dependency for PyTorch 2.6 RC support for woq int4 ( #12714 )  
							
							 
							
							
							
						 
						
							2025-01-16 15:51:57 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zhao Changmin 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								54d6328b3c 
								
							 
						 
						
							
							
								
								woq int4 fwd ( #12711 )  
							
							 
							
							
							
						 
						
							2025-01-16 15:48:05 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								b62734748f 
								
							 
						 
						
							
							
								
								add support and optimization for minicpmo vision part ( #12713 )  
							
							 
							
							
							
						 
						
							2025-01-16 14:51:00 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								c52bdff76b 
								
							 
						 
						
							
							
								
								Update Deepseek coder GPU example ( #12712 )  
							
							 
							
							... 
							
							
							
							* Update Deepseek coder GPU example
* Fix based on comment 
							
						 
						
							2025-01-16 14:05:31 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								9d65dcd7ef 
								
							 
						 
						
							
							
								
								Fix deepseek coder with linear rope type support on GPU ( #12709 )  
							
							 
							
							... 
							
							
							
							* Fix deepseek coder with linear rope type
* Style fix
* Move to optimize_pre
* Small fix
* Small fix
* Small fix to not affect other cases
* Style fixes
* Update function name
* Small fix
* Small fix
* Small fix
* Fix for low transformers version first
* Style fix
* Small fix 
							
						 
						
							2025-01-15 21:12:34 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Cengguang Zhang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								9930351112 
								
							 
						 
						
							
							
								
								LLM: add new qtype woq_int4 to support gemm int4 temporary. ( #12706 )  
							
							 
							
							... 
							
							
							
							This PR add temporary qtype woq_int4 to avoid affecting other qtype and models.
Co-authored-by: leonardozcm <leonardo1997zcm@gmail.com> 
							
						 
						
							2025-01-15 14:41:33 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xu, Shuo 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								350fae285d 
								
							 
						 
						
							
							
								
								Add Qwen2-VL HF GPU example with ModelScope Support ( #12606 )  
							
							 
							
							... 
							
							
							
							* Add qwen2-vl example
* complete generate.py & readme
* improve lint style
* update 1-6
* update main readme
* Format and other small fixes
---------
Co-authored-by: Yuwen Hu <yuwen.hu@intel.com> 
							
						 
						
							2025-01-13 15:42:04 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								a1da7908b9 
								
							 
						 
						
							
							
								
								Fix name device is not found bug ( #12703 )  
							
							 
							
							
							
						 
						
							2025-01-13 10:11:02 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								db9db51e2c 
								
							 
						 
						
							
							
								
								fix lnl perf ( #12700 )  
							
							 
							
							
							
						 
						
							2025-01-10 18:00:58 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								da8bcb7db1 
								
							 
						 
						
							
							
								
								[NPU ] fix load logic of glm-edge models ( #12698 )  
							
							 
							
							
							
						 
						
							2025-01-10 16:08:37 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								f8dc408888 
								
							 
						 
						
							
							
								
								fix user issue ( #12692 )  
							
							 
							
							
							
						 
						
							2025-01-10 10:18:47 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								68857494a5 
								
							 
						 
						
							
							
								
								refactor to simplify following upgrade 2 ( #12685 )  
							
							 
							
							
							
						 
						
							2025-01-10 09:29:03 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								7234c9b27b 
								
							 
						 
						
							
							
								
								update quantize kv cache condition ( #12681 )  
							
							 
							
							
							
						 
						
							2025-01-09 15:23:04 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								5d8081afbc 
								
							 
						 
						
							
							
								
								Remove dummy model from performance tests ( #12682 )  
							
							 
							
							
							
						 
						
							2025-01-09 14:50:17 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								1ec40cd09e 
								
							 
						 
						
							
							
								
								refactor to simplify following upgrade ( #12680 )  
							
							 
							
							
							
						 
						
							2025-01-09 13:34:30 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								5c24276fc4 
								
							 
						 
						
							
							
								
								fix custom kernel registration ( #12674 )  
							
							 
							
							
							
						 
						
							2025-01-08 17:39:17 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								a22a8c21bb 
								
							 
						 
						
							
							
								
								small fix and remove ununsed code about ipex ( #12671 )  
							
							 
							
							
							
						 
						
							2025-01-08 17:39:04 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								c11f5f0fcd 
								
							 
						 
						
							
							
								
								also convert SdpaAttention in optimize_model ( #12673 )  
							
							 
							
							
							
						 
						
							2025-01-08 16:48:03 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								7dd156d292 
								
							 
						 
						
							
							
								
								small fix and add comment ( #12670 )  
							
							 
							
							
							
						 
						
							2025-01-08 10:56:50 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								ccf618ff4a 
								
							 
						 
						
							
							
								
								Remove all ipex usage ( #12666 )  
							
							 
							
							
							
						 
						
							2025-01-08 10:31:18 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								5db6f9dcde 
								
							 
						 
						
							
							
								
								Add option with PyTorch 2.6 RC version for testing purposes ( #12668 )  
							
							 
							
							... 
							
							
							
							* Add option with PyTorch 2.6 RC version for testing purposes
* Small update 
							
						 
						
							2025-01-07 18:28:55 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								f9ee7898c8 
								
							 
						 
						
							
							
								
								fix onednn dependency bug ( #12665 )  
							
							 
							
							
							
						 
						
							2025-01-07 16:26:56 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								29ad5c449e 
								
							 
						 
						
							
							
								
								refactor codegeex to remove ipex kernel usage ( #12664 )  
							
							 
							
							
							
						 
						
							2025-01-07 16:17:40 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								525b0ee991 
								
							 
						 
						
							
							
								
								[NPU] Tiny fixes on examples ( #12661 )  
							
							 
							
							
							
						 
						
							2025-01-07 14:30:38 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								ebdf19fa7e 
								
							 
						 
						
							
							
								
								[NPU] Further fix saving of generation config ( #12657 )  
							
							 
							
							... 
							
							
							
							* Further fix saving of generation config
* Fix based on comments
* Small fix 
							
						 
						
							2025-01-07 13:53:54 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								381d448ee2 
								
							 
						 
						
							
							
								
								[NPU] Example & Quickstart updates ( #12650 )  
							
							 
							
							... 
							
							
							
							* Remove model with optimize_model=False in NPU verified models tables, and remove related example
* Remove experimental in run optimized model section title
* Unify model table order & example cmd
* Move embedding example to separate folder & update quickstart example link
* Add Quickstart reference in main NPU readme
* Small fix
* Small fix
* Move save/load examples under NPU/HF-Transformers-AutoModels
* Add low-bit and polish arguments for LLM Python examples
* Small fix
* Add low-bit and polish arguments for Multi-Model  examples
* Polish argument for Embedding models
* Polish argument for LLM CPP examples
* Add low-bit and polish argument for Save-Load examples
* Add accuracy tuning tips for examples
* Update NPU qucikstart accuracy tuning with low-bit optimizations
* Add save/load section to qucikstart
* Update CPP example sample output to EN
* Add installation regarding cmake for CPP examples
* Small fix
* Small fix
* Small fix
* Small fix
* Small fix
* Small fix
* Unify max prompt length to 512
* Change recommended low-bit for Qwen2.5-3B-Instruct to asym_int4
* Update based on comments
* Small fix 
							
						 
						
							2025-01-07 13:52:41 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								ddc0ef3993 
								
							 
						 
						
							
							
								
								refactor device check and remove cohere/mixtral support ( #12659 )  
							
							 
							
							
							
						 
						
							2025-01-07 11:15:51 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								ea65e4fecc 
								
							 
						 
						
							
							
								
								remove falcon support and related UT ( #12656 )  
							
							 
							
							
							
						 
						
							2025-01-07 09:26:00 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yina Chen 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								fae73eee79 
								
							 
						 
						
							
							
								
								[NPU] Support save npu quantized model without npu dependency ( #12647 )  
							
							 
							
							... 
							
							
							
							* support save awq
* load quantized model & save npu compiled model
* fix style
* update
* fix dll load issue
* update error message
* fix style 
							
						 
						
							2025-01-06 18:06:22 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								502461d836 
								
							 
						 
						
							
							
								
								remove unnecessary ipex kernel usage ( #12649 )  
							
							 
							
							
							
						 
						
							2025-01-03 16:45:24 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								9f8b134889 
								
							 
						 
						
							
							
								
								add ipex-llm custom kernel registration ( #12648 )  
							
							 
							
							
							
						 
						
							2025-01-03 16:45:04 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								0b377100c5 
								
							 
						 
						
							
							
								
								Add guide for save-load usage ( #12498 )  
							
							 
							
							
							
						 
						
							2025-01-03 16:30:15 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Wang, Jian4 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								6711a48a36 
								
							 
						 
						
							
							
								
								Enable internvl2-8b on vllm( #12645 )  
							
							 
							
							
							
						 
						
							2025-01-03 14:49:36 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zijie Li 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								8fd2dcba86 
								
							 
						 
						
							
							
								
								Add benchmark_util for transformers >= 4.47.0 ( #12644 )  
							
							 
							
							
							
						 
						
							2025-01-03 10:48:29 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yina Chen 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								8e5328e9b4 
								
							 
						 
						
							
							
								
								add disable opts for awq ( #12641 )  
							
							 
							
							
							
						 
						
							2025-01-02 15:45:22 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xu, Shuo 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								62318964fa 
								
							 
						 
						
							
							
								
								Update llama example information ( #12640 )  
							
							 
							
							... 
							
							
							
							Co-authored-by: ATMxsp01 <shou.xu@intel.com> 
							
						 
						
							2025-01-02 13:48:39 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								81211fd010 
								
							 
						 
						
							
							
								
								remove unused code ( #12635 )  
							
							 
							
							
							
						 
						
							2025-01-02 13:31:09 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								534566e290 
								
							 
						 
						
							
							
								
								[NPU] Support minicpm-v with python cpp backend ( #12637 )  
							
							 
							
							
							
						 
						
							2025-01-02 11:13:15 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								f289f68d57 
								
							 
						 
						
							
							
								
								small fix ( #12634 )  
							
							 
							
							
							
						 
						
							2024-12-30 17:14:25 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								2d08155513 
								
							 
						 
						
							
							
								
								remove bmm, which is only required in ipex 2.0 ( #12630 )  
							
							 
							
							
							
						 
						
							2024-12-27 17:28:57 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								f17ccfa61a 
								
							 
						 
						
							
							
								
								[NPU] Fix save-load usage of minicpm models ( #12628 )  
							
							 
							
							
							
						 
						
							2024-12-27 15:56:46 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								c72a5db757 
								
							 
						 
						
							
							
								
								remove unused code again ( #12624 )  
							
							 
							
							
							
						 
						
							2024-12-27 14:17:11 +08:00