Yuwen Hu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								aa12f69bbf 
								
							 
						 
						
							
							
								
								Update Ollama portable zip QuickStart regarding saving VRAM ( #13155 )  
							
							 
							
							... 
							
							
							
							* Update Ollama portable zip quickstart regarding saving VRAM
* Small fix 
							
						 
						
							2025-05-13 13:25:22 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jason Dai 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								086a8b3ab9 
								
							 
						 
						
							
							
								
								Update flashmoe_quickstart ( #13154 )  
							
							 
							
							
							
						 
						
							2025-05-13 07:56:09 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xiangyu Tian 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								886c7632b2 
								
							 
						 
						
							
							
								
								Add IPEX_LLM_FORCE_BATCH_FORWARD for vLLM docker image ( #13151 )  
							
							 
							
							
							
						 
						
							2025-05-12 13:44:33 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Wang, Jian4 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								5df03ced2c 
								
							 
						 
						
							
							
								
								Update vllm patch for fix telechat2 and baichuan2 error( #13150 )  
							
							 
							
							
							
						 
						
							2025-05-12 10:54:22 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jason Dai 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								9da1c56fa8 
								
							 
						 
						
							
							
								
								Create flashmoe quickstart ( #13147 )  
							
							 
							
							
							
						 
						
							2025-05-12 10:11:22 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Guancheng Fu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								da08c9ca60 
								
							 
						 
						
							
							
								
								Update Dockerfile ( #13148 )  
							
							 
							
							
							
						 
						
							2025-05-12 09:19:18 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								0438e39f3e 
								
							 
						 
						
							
							
								
								Add PyTorch 2.6 support in Latest Update ( #13144 )  
							
							 
							
							
							
						 
						
							2025-05-09 13:26:49 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Shaojun Liu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								45f7bf6688 
								
							 
						 
						
							
							
								
								Refactor vLLM Documentation: Centralize Benchmarking and Improve Readability ( #13141 )  
							
							 
							
							... 
							
							
							
							* update vllm doc
* update image name
* update
* update
* update
* update 
							
						 
						
							2025-05-09 10:19:42 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								f5d9c49a2a 
								
							 
						 
						
							
							
								
								add rotary_half_with_cache_inplaced to ipex_llm.transformers.models.common ( #13143 )  
							
							 
							
							... 
							
							
							
							* update
* small fix 
							
						 
						
							2025-05-09 09:20:44 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Wang, Jian4 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								f2598b119e 
								
							 
						 
						
							
							
								
								update for bge-m3 ( #13138 )  
							
							 
							
							
							
						 
						
							2025-05-07 16:59:52 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									SONG Ge 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								e88a2aa65b 
								
							 
						 
						
							
							
								
								Modify ollama num_ctx related doc ( #13139 )  
							
							 
							
							... 
							
							
							
							* Modify ollama num_ctx related doc
* meet comments 
							
						 
						
							2025-05-07 16:44:58 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								3a28b69202 
								
							 
						 
						
							
							
								
								Add qwen3 support ( #13137 )  
							
							 
							
							
							
						 
						
							2025-05-07 14:03:16 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Wang, Jian4 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								be76918b61 
								
							 
						 
						
							
							
								
								Update 083 multimodal benchmark ( #13135 )  
							
							 
							
							... 
							
							
							
							* update multimodal benchmark
* update 
							
						 
						
							2025-05-07 09:35:09 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Wang, Jian4 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								01bc7e9eb9 
								
							 
						 
						
							
							
								
								Fix 083 lm_head error ( #13132 )  
							
							 
							
							... 
							
							
							
							* fix no quantize error
* update
* update style 
							
						 
						
							2025-05-06 15:47:20 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									SONG Ge 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								685a749adb 
								
							 
						 
						
							
							
								
								Update ollama-release doc into v0.6.2 ( #13094 )  
							
							 
							
							... 
							
							
							
							* Update ollama-release doc into v0.6.2
* update
* revert signature changes 
							
						 
						
							2025-04-30 16:22:42 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xiangyu Tian 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								51b41faad7 
								
							 
						 
						
							
							
								
								vLLM: update vLLM XPU to 0.8.3 version ( #13118 )  
							
							 
							
							... 
							
							
							
							vLLM: update vLLM XPU to 0.8.3 version 
							
						 
						
							2025-04-30 14:40:53 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								f66eee1d1d 
								
							 
						 
						
							
							
								
								Update BMG troubleshooting guides regarding PPA installation ( #13119 )  
							
							 
							
							... 
							
							
							
							* Update bmg troubleshooting guides regarding PPA installation
* Small fix
* Update based on comments
* Small fix 
							
						 
						
							2025-04-28 15:48:17 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jason Dai 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								ad741503a9 
								
							 
						 
						
							
							
								
								Update bmg_quickstart.md ( #13117 )  
							
							 
							
							
							
						 
						
							2025-04-27 22:03:14 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jason Dai 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								6b033f8982 
								
							 
						 
						
							
							
								
								Update readme ( #13116 )  
							
							 
							
							
							
						 
						
							2025-04-27 18:18:19 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Guancheng Fu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								d222eaffd7 
								
							 
						 
						
							
							
								
								Update README.md ( #13113 )  
							
							 
							
							
							
						 
						
							2025-04-27 17:13:18 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Wang, Jian4 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								16fa778e65 
								
							 
						 
						
							
							
								
								enable glm4v and gemma-3 on vllm 083 ( #13114 )  
							
							 
							
							... 
							
							
							
							* enable glm4v and gemma-3
* update
* add qwen2.5-vl 
							
						 
						
							2025-04-27 17:10:56 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Guancheng Fu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								cf97d8f1d7 
								
							 
						 
						
							
							
								
								Update start-vllm-service.sh ( #13109 )  
							
							 
							
							
							
						 
						
							2025-04-25 15:42:15 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								9808fb1ac2 
								
							 
						 
						
							
							
								
								update doc about flash-moe ( #13103 )  
							
							 
							
							... 
							
							
							
							* update doc about flashmoe
* revert toc
* meet review, add version note
* small fix 
							
						 
						
							2025-04-24 17:53:14 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Guancheng Fu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								0cfdd399e7 
								
							 
						 
						
							
							
								
								Update README.md ( #13104 )  
							
							 
							
							
							
						 
						
							2025-04-24 10:21:17 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								908fdb982e 
								
							 
						 
						
							
							
								
								small refactor and fix ( #13101 )  
							
							 
							
							
							
						 
						
							2025-04-22 14:45:31 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Guancheng Fu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								14cd613fe1 
								
							 
						 
						
							
							
								
								Update vLLM docs with some new features ( #13092 )  
							
							 
							
							... 
							
							
							
							* done
* fix
* done
* Update README.md 
							
						 
						
							2025-04-22 14:39:28 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								0801d27a6f 
								
							 
						 
						
							
							
								
								Remove PyTorch 2.3 support for Intel GPU ( #13097 )  
							
							 
							
							... 
							
							
							
							* Remove PyTorch 2.3 installation option for GPU
* Remove xpu_lnl option in installation guides for docs
* Update BMG quickstart
* Remove PyTorch 2.3 dependencies for GPU examples
* Update the graphmode example to use stable version 2.2.0
* Fix based on comments 
							
						 
						
							2025-04-22 10:26:16 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yina Chen 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								a2a35fdfad 
								
							 
						 
						
							
							
								
								Update portable zip link ( #13098 )  
							
							 
							
							... 
							
							
							
							* update  portable zip link
* update CN
* address comments
* update latest updates
* revert 
							
						 
						
							2025-04-21 17:25:35 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								2f78afcd2a 
								
							 
						 
						
							
							
								
								Refactor some functions to ipex_llm.transformers.models.common ( #13091 )  
							
							 
							
							... 
							
							
							
							* add quantize_linear & linear_forward
* add moe_group_topk
* rotary_two_with_cache_inplaced
* fix code style
* update related models 
							
						 
						
							2025-04-18 11:15:43 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Shaojun Liu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								73198d5b80 
								
							 
						 
						
							
							
								
								Update to b17 image ( #13085 )  
							
							 
							
							... 
							
							
							
							* update vllm patch
* fix
* fix triton
---------
Co-authored-by: gc-fu <guancheng.fu@intel.com> 
							
						 
						
							2025-04-17 16:18:22 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Shaojun Liu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								db5edba786 
								
							 
						 
						
							
							
								
								Update Dockerfile ( #13081 )  
							
							 
							
							
							
						 
						
							2025-04-16 09:18:46 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Shaojun Liu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								fa56212bb3 
								
							 
						 
						
							
							
								
								Update vLLM patch ( #13079 )  
							
							 
							
							... 
							
							
							
							* update vllm patch
* Update Dockerfile 
							
						 
						
							2025-04-15 16:55:29 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Shaojun Liu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								f5aaa83649 
								
							 
						 
						
							
							
								
								Update serving-xpu Dockerfile ( #13077 )  
							
							 
							
							... 
							
							
							
							* Update Dockerfile
* Update Dockerfile 
							
						 
						
							2025-04-15 13:34:14 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Shaojun Liu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								cfadf3f2f7 
								
							 
						 
						
							
							
								
								upgrade linux-libc-dev to fix CVEs ( #13076 )  
							
							 
							
							
							
						 
						
							2025-04-15 11:43:53 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								e08c6bd018 
								
							 
						 
						
							
							
								
								Fix several models based on sdp api change ( #13075 )  
							
							 
							
							... 
							
							
							
							* fix baichuan based on sdp api change
* fix several models based on api change
* fix style 
							
						 
						
							2025-04-15 11:13:12 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Shaojun Liu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								7826152f5a 
								
							 
						 
						
							
							
								
								update vllm patch ( #13072 )  
							
							 
							
							
							
						 
						
							2025-04-14 14:56:10 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								10c30cdba9 
								
							 
						 
						
							
							
								
								set woq_int4 as default int4 ( #13021 )  
							
							 
							
							
							
						 
						
							2025-04-14 14:10:59 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								6693e8ab04 
								
							 
						 
						
							
							
								
								Deepseek kv / sdp support ( #13068 )  
							
							 
							
							... 
							
							
							
							* update kv
* fix
* fix style 
							
						 
						
							2025-04-11 11:26:15 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Guancheng Fu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								3ee6dec0f8 
								
							 
						 
						
							
							
								
								update vllm patch ( #13064 )  
							
							 
							
							
							
						 
						
							2025-04-10 15:03:37 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Shaojun Liu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								1d7f4a83ac 
								
							 
						 
						
							
							
								
								Update documentation to build Docker image from Dockerfile instead of pulling from registry ( #13057 )  
							
							 
							
							... 
							
							
							
							* Update README.md
* Update README.md
* Update README.md
* Update README.md
* Update README.md
* Update docker_cpp_xpu_quickstart.md
* Update vllm_cpu_docker_quickstart.md
* Update docker_cpp_xpu_quickstart.md
* Update vllm_docker_quickstart.md
* Update fastchat_docker_quickstart.md
* Update docker_pytorch_inference_gpu.md 
							
						 
						
							2025-04-09 16:40:20 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								cd0d4857b8 
								
							 
						 
						
							
							
								
								ipex-llm 2.2.0 post-release update (#13053 )  
							
							 
							
							... 
							
							
							
							* Update ollama/llama.cpp release link to 2.2.0 (#13052 )
* Post-update for releasing ipex-llm 2.2.0 
							
						 
						
							2025-04-07 17:41:22 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								ef852dcb4a 
								
							 
						 
						
							
							
								
								add audio optimization for qwen2.5-omni ( #13037 )  
							
							 
							
							
							
						 
						
							2025-04-07 17:20:26 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								7548c12b2c 
								
							 
						 
						
							
							
								
								Update portable zip QuickStart regarding signature verification ( #13050 )  
							
							 
							
							... 
							
							
							
							* Update portable zip QuickStart regarding sigurature verification
* Small fix
* Small fix 
							
						 
						
							2025-04-07 13:34:00 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								33ae52d083 
								
							 
						 
						
							
							
								
								Small doc fix ( #13045 )  
							
							 
							
							
							
						 
						
							2025-04-03 17:35:22 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								3cb718d715 
								
							 
						 
						
							
							
								
								Small updates to Ollama portable zip quickstart ( #13043 )  
							
							 
							
							
							
						 
						
							2025-04-03 17:18:22 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								b73728c7ce 
								
							 
						 
						
							
							
								
								Small updates to Ollama portable zip Quickstart ( #13040 )  
							
							 
							
							
							
						 
						
							2025-04-02 18:44:36 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								4427012672 
								
							 
						 
						
							
							
								
								Link updates to pytorch 2.6 quickstart ( #13032 )  
							
							 
							
							
							
						 
						
							2025-04-01 10:35:22 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								633d1c72e7 
								
							 
						 
						
							
							
								
								Add PyTorch 2.6 QuickStart for Intel GPU ( #13024 )  
							
							 
							
							... 
							
							
							
							* Add quickstart for install IPEX-LLM with PyTorch 2.6 on Intel GPUs
* Add jump links
* Rename
* Small fix
* Small fix
* Update based on comments
* Small fix 
							
						 
						
							2025-04-01 10:21:38 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xiangyu Tian 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								34b1b14225 
								
							 
						 
						
							
							
								
								vLLM: Fix vLLM CPU dockerfile to resolve cmake deprecated issue ( #13026 )  
							
							 
							
							
							
						 
						
							2025-03-31 16:09:25 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								300eb01d98 
								
							 
						 
						
							
							
								
								Add basic optimization for Qwen2.5 omni ( #13022 )  
							
							 
							
							
							
						 
						
							2025-03-28 17:21:52 +08:00