Wang, Jian4 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								e3130a06ed 
								
							 
						 
						
							
							
								
								Fix multimodal errors ( #13178 )  
							
							 
							
							... 
							
							
							
							* fix glm4v int4 output error
* fix glm-4v qwen2.5-vl fp16 error
* update 
							
						 
						
							2025-05-22 15:39:27 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xiangyu Tian 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								154af7d7f7 
								
							 
						 
						
							
							
								
								vLLM: set convert_to_half to False by default ( #13172 )  
							
							 
							
							... 
							
							
							
							* init
* remove
* fix 
							
						 
						
							2025-05-21 18:41:28 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Wang, Jian4 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								d83e5068d2 
								
							 
						 
						
							
							
								
								Enable whisper ( #13162 )  
							
							 
							
							... 
							
							
							
							* fix error
* update dockerfile 
							
						 
						
							2025-05-19 14:07:51 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yina Chen 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								8ba57b41cd 
								
							 
						 
						
							
							
								
								Add merge quantized qkv ( #13160 )  
							
							 
							
							... 
							
							
							
							* add merge quantized qkv
* fix style & device
* add check 
							
						 
						
							2025-05-16 15:46:47 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Emmanuel Ferdman 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								1e4e1353a0 
								
							 
						 
						
							
							
								
								Resolve messages formatting issues ( #13095 )  
							
							 
							
							... 
							
							
							
							Signed-off-by: Emmanuel Ferdman <emmanuelferdman@gmail.com> 
							
						 
						
							2025-05-15 16:46:52 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Kai Huang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								35b49e4d91 
								
							 
						 
						
							
							
								
								Add trl version in error message ( #13049 )  
							
							 
							
							... 
							
							
							
							* add version in error msg
* fix style 
							
						 
						
							2025-05-15 09:16:27 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yina Chen 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								f6441b4e3d 
								
							 
						 
						
							
							
								
								Add moe_softmax_topk ( #13157 )  
							
							 
							
							... 
							
							
							
							* add moe_softmax_topk
* address comments
* update 
							
						 
						
							2025-05-13 14:50:59 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Shaojun Liu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								45f7bf6688 
								
							 
						 
						
							
							
								
								Refactor vLLM Documentation: Centralize Benchmarking and Improve Readability ( #13141 )  
							
							 
							
							... 
							
							
							
							* update vllm doc
* update image name
* update
* update
* update
* update 
							
						 
						
							2025-05-09 10:19:42 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								f5d9c49a2a 
								
							 
						 
						
							
							
								
								add rotary_half_with_cache_inplaced to ipex_llm.transformers.models.common ( #13143 )  
							
							 
							
							... 
							
							
							
							* update
* small fix 
							
						 
						
							2025-05-09 09:20:44 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Wang, Jian4 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								f2598b119e 
								
							 
						 
						
							
							
								
								update for bge-m3 ( #13138 )  
							
							 
							
							
							
						 
						
							2025-05-07 16:59:52 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								3a28b69202 
								
							 
						 
						
							
							
								
								Add qwen3 support ( #13137 )  
							
							 
							
							
							
						 
						
							2025-05-07 14:03:16 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Wang, Jian4 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								01bc7e9eb9 
								
							 
						 
						
							
							
								
								Fix 083 lm_head error ( #13132 )  
							
							 
							
							... 
							
							
							
							* fix no quantize error
* update
* update style 
							
						 
						
							2025-05-06 15:47:20 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xiangyu Tian 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								51b41faad7 
								
							 
						 
						
							
							
								
								vLLM: update vLLM XPU to 0.8.3 version ( #13118 )  
							
							 
							
							... 
							
							
							
							vLLM: update vLLM XPU to 0.8.3 version 
							
						 
						
							2025-04-30 14:40:53 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Guancheng Fu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								d222eaffd7 
								
							 
						 
						
							
							
								
								Update README.md ( #13113 )  
							
							 
							
							
							
						 
						
							2025-04-27 17:13:18 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Wang, Jian4 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								16fa778e65 
								
							 
						 
						
							
							
								
								enable glm4v and gemma-3 on vllm 083 ( #13114 )  
							
							 
							
							... 
							
							
							
							* enable glm4v and gemma-3
* update
* add qwen2.5-vl 
							
						 
						
							2025-04-27 17:10:56 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Guancheng Fu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								0cfdd399e7 
								
							 
						 
						
							
							
								
								Update README.md ( #13104 )  
							
							 
							
							
							
						 
						
							2025-04-24 10:21:17 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								908fdb982e 
								
							 
						 
						
							
							
								
								small refactor and fix ( #13101 )  
							
							 
							
							
							
						 
						
							2025-04-22 14:45:31 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Guancheng Fu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								14cd613fe1 
								
							 
						 
						
							
							
								
								Update vLLM docs with some new features ( #13092 )  
							
							 
							
							... 
							
							
							
							* done
* fix
* done
* Update README.md 
							
						 
						
							2025-04-22 14:39:28 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								0801d27a6f 
								
							 
						 
						
							
							
								
								Remove PyTorch 2.3 support for Intel GPU ( #13097 )  
							
							 
							
							... 
							
							
							
							* Remove PyTorch 2.3 installation option for GPU
* Remove xpu_lnl option in installation guides for docs
* Update BMG quickstart
* Remove PyTorch 2.3 dependencies for GPU examples
* Update the graphmode example to use stable version 2.2.0
* Fix based on comments 
							
						 
						
							2025-04-22 10:26:16 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								2f78afcd2a 
								
							 
						 
						
							
							
								
								Refactor some functions to ipex_llm.transformers.models.common ( #13091 )  
							
							 
							
							... 
							
							
							
							* add quantize_linear & linear_forward
* add moe_group_topk
* rotary_two_with_cache_inplaced
* fix code style
* update related models 
							
						 
						
							2025-04-18 11:15:43 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								e08c6bd018 
								
							 
						 
						
							
							
								
								Fix several models based on sdp api change ( #13075 )  
							
							 
							
							... 
							
							
							
							* fix baichuan based on sdp api change
* fix several models based on api change
* fix style 
							
						 
						
							2025-04-15 11:13:12 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								10c30cdba9 
								
							 
						 
						
							
							
								
								set woq_int4 as default int4 ( #13021 )  
							
							 
							
							
							
						 
						
							2025-04-14 14:10:59 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								6693e8ab04 
								
							 
						 
						
							
							
								
								Deepseek kv / sdp support ( #13068 )  
							
							 
							
							... 
							
							
							
							* update kv
* fix
* fix style 
							
						 
						
							2025-04-11 11:26:15 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								cd0d4857b8 
								
							 
						 
						
							
							
								
								ipex-llm 2.2.0 post-release update (#13053 )  
							
							 
							
							... 
							
							
							
							* Update ollama/llama.cpp release link to 2.2.0 (#13052 )
* Post-update for releasing ipex-llm 2.2.0 
							
						 
						
							2025-04-07 17:41:22 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								ef852dcb4a 
								
							 
						 
						
							
							
								
								add audio optimization for qwen2.5-omni ( #13037 )  
							
							 
							
							
							
						 
						
							2025-04-07 17:20:26 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								300eb01d98 
								
							 
						 
						
							
							
								
								Add basic optimization for Qwen2.5 omni ( #13022 )  
							
							 
							
							
							
						 
						
							2025-03-28 17:21:52 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Wang, Jian4 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								7809ca9864 
								
							 
						 
						
							
							
								
								Reuse --privileged ( #13015 )  
							
							 
							
							... 
							
							
							
							* fix
* add 
							
						 
						
							2025-03-27 10:00:50 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Guancheng Fu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								f437b36678 
								
							 
						 
						
							
							
								
								Fix vllm glm edge model ( #13007 )  
							
							 
							
							... 
							
							
							
							* fix done
* fix 
							
						 
						
							2025-03-26 09:25:32 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								374747b492 
								
							 
						 
						
							
							
								
								Update bert optimization to fit higher transformers/torch version ( #13006 )  
							
							 
							
							
							
						 
						
							2025-03-25 16:12:03 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								27d669210f 
								
							 
						 
						
							
							
								
								remove fschat in EAGLE example ( #13005 )  
							
							 
							
							... 
							
							
							
							* update fschat version
* fix 
							
						 
						
							2025-03-25 15:48:48 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Shaojun Liu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								08f96a5139 
								
							 
						 
						
							
							
								
								Rename LICENSE-Intel®-OpenMP*-Runtime-Library.txt to LICENSE-Intel®-OpenMP-Runtime-Library.txt ( #13002 )  
							
							 
							
							
							
						 
						
							2025-03-25 10:07:55 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Shaojun Liu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								46a4f53967 
								
							 
						 
						
							
							
								
								OSPDT: add tpp licenses for release 2.2.0 ( #12840 )  
							
							 
							
							... 
							
							
							
							* Create LICENSE-zstd.txt
* Create LICENSE-libcxx.txt
* Create LICENSE-libcxxabi.txt
* Create LICENSE-safestring.txt
* Create LICENSE-stb-image.txt
* Create LICENSE-cluster-agent.txt
* Create LICENSE-hd-agent.txt
* Create LICENSE-platform-telemetry-agent.txt
* Create LICENSE-platform-update-agent.txt
* Create LICENSE-OpenCL-ICD-Loader.txt
* Create LICENSE-xptifw.txt
* Create LICENSE-intel-openmp.txt
* Create LICENSE-Intel®-OpenMP*-Runtime-Library.txt
* Create LICENSE-Intel®-C-C++-Fortran-Compiler-Mainline.txt
* add TPP files
* Add TPP files
* add tpp
* add tpp
* update
* update 
							
						 
						
							2025-03-21 15:52:22 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								5bdf57327d 
								
							 
						 
						
							
							
								
								Remove ipex import in fastchat loader ( #12984 )  
							
							 
							
							
							
						 
						
							2025-03-20 18:29:00 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Wang, Jian4 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								c9ecb7a113 
								
							 
						 
						
							
							
								
								Fix qwen nan value issue on vllm ( #12971 )  
							
							 
							
							... 
							
							
							
							* add to fix qwen nan value issue
* update 
							
						 
						
							2025-03-14 14:43:54 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Heyang Sun 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								cd109bb061 
								
							 
						 
						
							
							
								
								Gemma QLoRA example ( #12969 )  
							
							 
							
							... 
							
							
							
							* Gemma QLoRA example
* Update README.md
* Update README.md
---------
Co-authored-by: sgwhat <ge.song@intel.com> 
							
						 
						
							2025-03-14 14:27:51 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								8bc41c13ab 
								
							 
						 
						
							
							
								
								Support PyTorch 2.6 with Arrow Lake-H AOT on Windows ( #12967 )  
							
							 
							
							
							
						 
						
							2025-03-13 15:29:47 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Wang, Jian4 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								c8a0462507 
								
							 
						 
						
							
							
								
								Add vllm api_server input output log ( #12962 )  
							
							 
							
							
							
						 
						
							2025-03-12 20:58:04 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Shaojun Liu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								6a2d87e40f 
								
							 
						 
						
							
							
								
								add --entrypoint /bin/bash ( #12957 )  
							
							 
							
							... 
							
							
							
							Co-authored-by: gc-fu <guancheng.fu@intel.com> 
							
						 
						
							2025-03-10 10:10:27 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								7c0c77cce3 
								
							 
						 
						
							
							
								
								Tiny fixes ( #12936 )  
							
							 
							
							
							
						 
						
							2025-03-05 14:55:26 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								68a770745b 
								
							 
						 
						
							
							
								
								Add moonlight GPU example ( #12929 )  
							
							 
							
							... 
							
							
							
							* Add moonlight GPU example and update table
* Small fix
* Fix based on comments
* Small fix 
							
						 
						
							2025-03-05 11:31:14 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Shaojun Liu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								f81d89d908 
								
							 
						 
						
							
							
								
								Remove Unnecessary --privileged Flag While Keeping It for WSL Users ( #12920 )  
							
							 
							
							
							
						 
						
							2025-03-03 11:11:42 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								b6f33d5c4d 
								
							 
						 
						
							
							
								
								optimize moonlight again ( #12909 )  
							
							 
							
							
							
						 
						
							2025-03-03 09:21:15 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								443cb5d4e0 
								
							 
						 
						
							
							
								
								Update Janus-Pro GPU example ( #12906 )  
							
							 
							
							
							
						 
						
							2025-02-28 15:39:03 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								39e360fe9d 
								
							 
						 
						
							
							
								
								add grouped topk optimization for moonlight ( #12903 )  
							
							 
							
							
							
						 
						
							2025-02-28 13:25:56 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xin Qiu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								e946127613 
								
							 
						 
						
							
							
								
								glm 4v 1st sdp for vision ( #12904 )  
							
							 
							
							... 
							
							
							
							* glm4v 1st sdp
* update glm4v example
* meet code review
* fix style 
							
						 
						
							2025-02-28 13:23:27 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								be1f073866 
								
							 
						 
						
							
							
								
								add fuse moe optimization for moonlight ( #12898 )  
							
							 
							
							
							
						 
						
							2025-02-27 09:15:24 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								5faba06409 
								
							 
						 
						
							
							
								
								simple optimization for moonlight moe decoding forward ( #12891 )  
							
							 
							
							
							
						 
						
							2025-02-25 16:18:27 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								ab3fc66eb7 
								
							 
						 
						
							
							
								
								optimize attention part of moonlight-14B-A3B ( #12886 )  
							
							 
							
							
							
						 
						
							2025-02-25 09:38:13 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								3f6ecce508 
								
							 
						 
						
							
							
								
								support using xgrammar to get json output ( #12870 )  
							
							 
							
							
							
						 
						
							2025-02-24 14:10:58 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Guancheng Fu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								02ec313eab 
								
							 
						 
						
							
							
								
								Update README.md ( #12877 )  
							
							 
							
							
							
						 
						
							2025-02-24 09:59:17 +08:00