Keyan (Kyrie) Zhang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								37820e1d86 
								
							 
						 
						
							
							
								
								Add privateGPT quickstart ( #10932 )  
							
							 
							
							... 
							
							
							
							* Add privateGPT quickstart
* Update privateGPT_quickstart.md
* Update _toc.yml
* Update _toc.yml
---------
Co-authored-by: Shengsheng Huang <shengsheng.huang@intel.com> 
							
						 
						
							2024-05-08 20:48:00 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Wang, Jian4 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								f4c615b1ee 
								
							 
						 
						
							
							
								
								Add cohere example ( #10954 )  
							
							 
							
							... 
							
							
							
							* add link first
* add_cpu_example
* add GPU example 
							
						 
						
							2024-05-08 17:19:59 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zephyr1101 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								7e7d969dcb 
								
							 
						 
						
							
							
								
								a experimental for workflow abuse step1 fix a typo ( #10965 )  
							
							 
							
							... 
							
							
							
							* Update llm_unit_tests.yml
* Update README.md
* Update llm_unit_tests.yml
* Update llm_unit_tests.yml 
							
						 
						
							2024-05-08 17:12:50 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Wang, Jian4 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								3209d6b057 
								
							 
						 
						
							
							
								
								Fix spculative llama3 no stop error ( #10963 )  
							
							 
							
							... 
							
							
							
							* fix normal
* add eos_tokens_id on sp and add list if
* update
* no none 
							
						 
						
							2024-05-08 17:09:47 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xiangyu Tian 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								02870dc385 
								
							 
						 
						
							
							
								
								LLM: Refine README of AutoTP-FastAPI example ( #10960 )  
							
							 
							
							
							
						 
						
							2024-05-08 16:55:23 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								2ebec0395c 
								
							 
						 
						
							
							
								
								optimize phi-3-mini-128 ( #10959 )  
							
							 
							
							
							
						 
						
							2024-05-08 16:33:17 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xin Qiu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								dfa3147278 
								
							 
						 
						
							
							
								
								update ( #10944 )  
							
							 
							
							
							
						 
						
							2024-05-08 14:28:05 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xin Qiu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								5973d6c753 
								
							 
						 
						
							
							
								
								make gemma's output better ( #10943 )  
							
							 
							
							
							
						 
						
							2024-05-08 14:27:51 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jin Qiao 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								15ee3fd542 
								
							 
						 
						
							
							
								
								Update igpu perf internlm ( #10958 )  
							
							 
							
							
							
						 
						
							2024-05-08 14:16:43 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zhao Changmin 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								0d6e12036f 
								
							 
						 
						
							
							
								
								Disable fast_init_ in load_low_bit ( #10945 )  
							
							 
							
							... 
							
							
							
							* fast_init_ disable 
							
						 
						
							2024-05-08 10:46:19 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Qiyuan Gong 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								164e6957af 
								
							 
						 
						
							
							
								
								Refine axolotl quickstart ( #10957 )  
							
							 
							
							... 
							
							
							
							* Add default accelerate config for axolotl quickstart.
* Fix requirement link.
* Upgrade peft to 0.10.0 in requirement. 
							
						 
						
							2024-05-08 09:34:02 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								c801c37bc6 
								
							 
						 
						
							
							
								
								optimize phi3 again: use quantize kv if possible ( #10953 )  
							
							 
							
							
							
						 
						
							2024-05-07 17:26:19 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								aa2fa9fde1 
								
							 
						 
						
							
							
								
								optimize phi3 again: use sdp if possible ( #10951 )  
							
							 
							
							
							
						 
						
							2024-05-07 15:53:08 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Qiyuan Gong 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								c11170b96f 
								
							 
						 
						
							
							
								
								Upgrade Peft to 0.10.0 in finetune examples and docker ( #10930 )  
							
							 
							
							... 
							
							
							
							* Upgrade Peft to 0.10.0 in finetune examples.
* Upgrade Peft to 0.10.0 in docker. 
							
						 
						
							2024-05-07 15:12:26 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Qiyuan Gong 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								d7ca5d935b 
								
							 
						 
						
							
							
								
								Upgrade Peft version to 0.10.0 for LLM finetune ( #10886 )  
							
							 
							
							... 
							
							
							
							* Upgrade Peft version to 0.10.0
* Upgrade Peft version in ARC unit test and HF-Peft example. 
							
						 
						
							2024-05-07 15:09:14 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								0efe26c3b6 
								
							 
						 
						
							
							
								
								Change order of chatglm2-6b and chatglm3-6b in iGPU perf test for more stable performance ( #10948 )  
							
							 
							
							
							
						 
						
							2024-05-07 13:48:39 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									hxsz1997 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								245c7348bc 
								
							 
						 
						
							
							
								
								Add codegemma example ( #10884 )  
							
							 
							
							... 
							
							
							
							* add codegemma example in GPU/HF-Transformers-AutoModels/
* add README of codegemma example in GPU/HF-Transformers-AutoModels/
* add codegemma example in GPU/PyTorch-Models/
* add readme of codegemma example in GPU/PyTorch-Models/
* add codegemma example in CPU/HF-Transformers-AutoModels/
* add readme of codegemma example in CPU/HF-Transformers-AutoModels/
* add codegemma example in CPU/PyTorch-Models/
* add readme of codegemma example in CPU/PyTorch-Models/
* fix typos
* fix filename typo
* add codegemma in tables
* add comments of lm_head
* remove comments of use_cache 
							
						 
						
							2024-05-07 13:35:42 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Shaojun Liu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								08ad40b251 
								
							 
						 
						
							
							
								
								improve ipex-llm-init for Linux ( #10928 )  
							
							 
							
							... 
							
							
							
							* refine ipex-llm-init
* install libtcmalloc.so for Max
* update based on comment
* remove unneeded code 
							
						 
						
							2024-05-07 12:55:14 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Wang, Jian4 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								33b8f524c2 
								
							 
						 
						
							
							
								
								Add cpp docker manually_test ( #10946 )  
							
							 
							
							... 
							
							
							
							* add cpp docker
* update 
							
						 
						
							2024-05-07 11:23:28 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Wang, Jian4 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								191b184341 
								
							 
						 
						
							
							
								
								LLM: Optimize cohere model ( #10878 )  
							
							 
							
							... 
							
							
							
							* use mlp and rms
* optimize kv_cache
* add fuse qkv
* add flash attention and fp16 sdp
* error fp8 sdp
* fix optimized
* fix style
* update
* add for pp 
							
						 
						
							2024-05-07 10:19:50 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xiangyu Tian 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								13a44cdacb 
								
							 
						 
						
							
							
								
								LLM: Refine Deepspped-AutoTP-FastAPI example ( #10916 )  
							
							 
							
							
							
						 
						
							2024-05-07 09:37:31 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Wang, Jian4 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								1de878bee1 
								
							 
						 
						
							
							
								
								LLM: Fix speculative llama3 long input error ( #10934 )  
							
							 
							
							
							
						 
						
							2024-05-07 09:25:20 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Guancheng Fu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								49ab5a2b0e 
								
							 
						 
						
							
							
								
								Add embeddings ( #10931 )  
							
							 
							
							
							
						 
						
							2024-05-07 09:07:02 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Shengsheng Huang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								d649236321 
								
							 
						 
						
							
							
								
								make images clickable ( #10939 )  
							
							 
							
							
							
						 
						
							2024-05-06 20:24:15 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Shengsheng Huang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								64938c2ca7 
								
							 
						 
						
							
							
								
								Dify quickstart revision ( #10938 )  
							
							 
							
							... 
							
							
							
							* revise dify quickstart guide
* update quick links and a small typo 
							
						 
						
							2024-05-06 19:59:17 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								3f438495e4 
								
							 
						 
						
							
							
								
								update llama.cpp and ollama quickstart ( #10929 )  
							
							 
							
							
							
						 
						
							2024-05-06 15:01:06 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Qiyuan Gong 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								41ffe1526c 
								
							 
						 
						
							
							
								
								Modify CPU finetune docker for bz2 error ( #10919 )  
							
							 
							
							... 
							
							
							
							* Avoid bz2 error
* change to cpu torch 
							
						 
						
							2024-05-06 10:41:50 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Wang, Jian4 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								0e0bd309e2 
								
							 
						 
						
							
							
								
								LLM: Enable Speculative on Fastchat ( #10909 )  
							
							 
							
							... 
							
							
							
							* init
* enable streamer
* update
* update
* remove deprecated
* update
* update
* add gpu example 
							
						 
						
							2024-05-06 10:06:20 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zhicun 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								8379f02a74 
								
							 
						 
						
							
							
								
								Add Dify quickstart ( #10903 )  
							
							 
							
							... 
							
							
							
							* add quick start
* modify
* modify
* add
* add
* resize
* add mp4
* add vedio
* add video
* video
* add
* modify
* add
* modify 
							
						 
						
							2024-05-06 10:01:34 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Cengguang Zhang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								0edef1f94c 
								
							 
						 
						
							
							
								
								LLM: add min_new_tokens to all in one benchmark. ( #10911 )  
							
							 
							
							
							
						 
						
							2024-05-06 09:32:59 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Shengsheng Huang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								c78a8e3677 
								
							 
						 
						
							
							
								
								update quickstart ( #10923 )  
							
							 
							
							
							
						 
						
							2024-04-30 18:19:31 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Shengsheng Huang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								282d676561 
								
							 
						 
						
							
							
								
								update continue quickstart ( #10922 )  
							
							 
							
							
							
						 
						
							2024-04-30 17:51:21 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Cengguang Zhang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								75dbf240ec 
								
							 
						 
						
							
							
								
								LLM: update split tensor conditions. ( #10872 )  
							
							 
							
							... 
							
							
							
							* LLM: update split tensor condition.
* add cond for split tensor.
* update priority of env.
* fix style.
* update env name. 
							
						 
						
							2024-04-30 17:07:21 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								71f51ce589 
								
							 
						 
						
							
							
								
								Initial Update for Continue Quickstart with Ollama backend ( #10918 )  
							
							 
							
							... 
							
							
							
							* Initial continue quickstart with ollama backend updates
* Small fix
* Small fix 
							
						 
						
							2024-04-30 15:10:30 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Guancheng Fu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								2c64754eb0 
								
							 
						 
						
							
							
								
								Add vLLM to ipex-llm serving image ( #10807 )  
							
							 
							
							... 
							
							
							
							* add vllm
* done
* doc work
* fix done
* temp
* add docs
* format
* add start-fastchat-service.sh
* fix 
							
						 
						
							2024-04-29 17:25:42 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jin Qiao 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								1f876fd837 
								
							 
						 
						
							
							
								
								Add example for phi-3 ( #10881 )  
							
							 
							
							... 
							
							
							
							* Add example for phi-3
* add in readme and index
* fix
* fix
* fix
* fix indent
* fix 
							
						 
						
							2024-04-29 16:43:55 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								c936ba3b64 
								
							 
						 
						
							
							
								
								Small fix for supporting workflow dispatch in nightly perf ( #10908 )  
							
							 
							
							
							
						 
						
							2024-04-29 13:25:14 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								d884c62dc4 
								
							 
						 
						
							
							
								
								remove new_layout parameter ( #10906 )  
							
							 
							
							
							
						 
						
							2024-04-29 10:31:50 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Guancheng Fu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								fbcd7bc737 
								
							 
						 
						
							
							
								
								Fix Loader issue with dtype fp16 ( #10907 )  
							
							 
							
							
							
						 
						
							2024-04-29 10:16:02 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Guancheng Fu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								c9fac8c26b 
								
							 
						 
						
							
							
								
								Fix sdp logic ( #10896 )  
							
							 
							
							... 
							
							
							
							* fix
* fix 
							
						 
						
							2024-04-28 22:02:14 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yina Chen 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								015d07a58f 
								
							 
						 
						
							
							
								
								Fix lookahead sample error & add update strategy ( #10894 )  
							
							 
							
							... 
							
							
							
							* Fix sample error & add update strategy
* add mtl config
* fix style
* remove print 
							
						 
						
							2024-04-28 17:21:00 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								94b4e96fa6 
								
							 
						 
						
							
							
								
								Small updates for workflow-dispatch triggered nightly perf ( #10902 )  
							
							 
							
							... 
							
							
							
							* Small fix for workflow-dispatch triggerd nightly perf
* Small fix 
							
						 
						
							2024-04-28 11:27:20 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								1a8a93d5e0 
								
							 
						 
						
							
							
								
								Further fix nightly perf ( #10901 )  
							
							 
							
							
							
						 
						
							2024-04-28 10:18:58 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								7c290d3f92 
								
							 
						 
						
							
							
								
								Add workflow dispatch trigger to nightly perf ( #10900 )  
							
							 
							
							
							
						 
						
							2024-04-28 09:54:30 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								ddfdaec137 
								
							 
						 
						
							
							
								
								Fix nightly perf ( #10899 )  
							
							 
							
							... 
							
							
							
							* Fix nightly perf by adding default value in benchmark for use_fp16_torch_dtype
* further fixes 
							
						 
						
							2024-04-28 09:39:29 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jason Dai 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								ea035f5e15 
								
							 
						 
						
							
							
								
								Update README.md ( #10898 )  
							
							 
							
							
							
						 
						
							2024-04-26 22:32:45 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Cengguang Zhang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								9752ffe979 
								
							 
						 
						
							
							
								
								LLM: update split qkv native sdp. ( #10895 )  
							
							 
							
							... 
							
							
							
							* LLM: update split qkv native sdp.
* fix typo. 
							
						 
						
							2024-04-26 18:47:35 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Guancheng Fu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								990535b1cf 
								
							 
						 
						
							
							
								
								Add tensor parallel for vLLM ( #10879 )  
							
							 
							
							... 
							
							
							
							* initial
* test initial tp
* initial sup
* fix format
* fix
* fix 
							
						 
						
							2024-04-26 17:10:49 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Shaojun Liu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								d058f2b403 
								
							 
						 
						
							
							
								
								Fix apt install oneapi scripts ( #10891 )  
							
							 
							
							... 
							
							
							
							* Fix apt install oneapi scripts
* add intel-oneapi-mkl-devel
* add apt pkgs 
							
						 
						
							2024-04-26 16:39:37 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								f51bf018eb 
								
							 
						 
						
							
							
								
								Add benchmark script for pipeline parallel inference ( #10873 )  
							
							 
							
							
							
						 
						
							2024-04-26 15:28:11 +08:00