Xin Qiu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								71bcd18f44 
								
							 
						 
						
							
							
								
								fix qwen vl ( #11090 )  
							
							 
							
							
							
						 
						
							2024-05-21 18:40:29 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Guancheng Fu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								f654f7e08c 
								
							 
						 
						
							
							
								
								Add serving docker quickstart ( #11072 )  
							
							 
							
							... 
							
							
							
							* add temp file
* add initial docker readme
* temp
* done
* add fastchat service
* fix
* fix
* fix
* fix
* remove stale file 
							
						 
						
							2024-05-21 17:00:58 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								f00625f9a4 
								
							 
						 
						
							
							
								
								refactor qwen2 ( #11087 )  
							
							 
							
							
							
						 
						
							2024-05-21 16:53:42 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Qiyuan Gong 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								492ed3fd41 
								
							 
						 
						
							
							
								
								Add verified models to GPU finetune README ( #11088 )  
							
							 
							
							... 
							
							
							
							* Add verified models to GPU finetune README 
							
						 
						
							2024-05-21 15:49:15 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Qiyuan Gong 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								1210491748 
								
							 
						 
						
							
							
								
								ChatGLM3, Baichuan2 and Qwen1.5 QLoRA example ( #11078 )  
							
							 
							
							... 
							
							
							
							* Add chatglm3, qwen15-7b and baichuan-7b QLoRA alpaca example
* Remove unnecessary tokenization setting. 
							
						 
						
							2024-05-21 15:29:43 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								ecb16dcf14 
								
							 
						 
						
							
							
								
								Add deepspeed autotp support for xpu docker ( #11077 )  
							
							 
							
							
							
						 
						
							2024-05-21 14:49:54 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									ZehuaCao 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								842d6dfc2d 
								
							 
						 
						
							
							
								
								Further Modify CPU example ( #11081 )  
							
							 
							
							... 
							
							
							
							* modify CPU example
* update 
							
						 
						
							2024-05-21 13:55:47 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								d830a63bb7 
								
							 
						 
						
							
							
								
								refactor qwen ( #11074 )  
							
							 
							
							
							
						 
						
							2024-05-20 18:08:37 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Wang, Jian4 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								74950a152a 
								
							 
						 
						
							
							
								
								Fix tgi_api_server error file name ( #11075 )  
							
							 
							
							
							
						 
						
							2024-05-20 16:48:40 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								4e97047d70 
								
							 
						 
						
							
							
								
								fix baichuan2 13b fp16 ( #11071 )  
							
							 
							
							
							
						 
						
							2024-05-20 11:21:20 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								7170dd9192 
								
							 
						 
						
							
							
								
								Update guide for running qwen with AutoTP ( #11065 )  
							
							 
							
							
							
						 
						
							2024-05-20 10:53:17 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Wang, Jian4 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								a2e1578fd9 
								
							 
						 
						
							
							
								
								Merge tgi_api_server to main ( #11036 )  
							
							 
							
							... 
							
							
							
							* init
* fix style
* speculative can not use benchmark
* add tgi server readme 
							
						 
						
							2024-05-20 09:15:03 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								f60565adc7 
								
							 
						 
						
							
							
								
								Fix toc for vllm serving quickstart ( #11068 )  
							
							 
							
							
							
						 
						
							2024-05-17 17:12:48 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Guancheng Fu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								dfac168d5f 
								
							 
						 
						
							
							
								
								fix format/typo ( #11067 )  
							
							 
							
							
							
						 
						
							2024-05-17 16:52:17 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								31ce3e0c13 
								
							 
						 
						
							
							
								
								refactor baichuan2-13b ( #11064 )  
							
							 
							
							
							
						 
						
							2024-05-17 16:25:30 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Guancheng Fu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								67db925112 
								
							 
						 
						
							
							
								
								Add vllm quickstart ( #10978 )  
							
							 
							
							... 
							
							
							
							* temp
* add doc
* finish
* done
* fix
* add initial docker readme
* temp
* done fixing vllm_quickstart
* done
* remove not used file
* add
* fix 
							
						 
						
							2024-05-17 16:16:42 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									ZehuaCao 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								56cb992497 
								
							 
						 
						
							
							
								
								LLM: Modify CPU Installation Command for most examples ( #11049 )  
							
							 
							
							... 
							
							
							
							* init
* refine
* refine
* refine
* modify hf-agent example
* modify all CPU model example
* remove readthedoc modify
* replace powershell with cmd
* fix repo
* fix repo
* update
* remove comment on windows code block
* update
* update
* update
* update
---------
Co-authored-by: xiangyuT <xiangyu.tian@intel.com> 
							
						 
						
							2024-05-17 15:52:20 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								f1156e6b20 
								
							 
						 
						
							
							
								
								support gguf_q4k_m / gguf_q4k_s ( #10887 )  
							
							 
							
							... 
							
							
							
							* initial commit
* UPDATE
* fix style
* fix style
* add gguf_q4k_s
* update comment
* fix 
							
						 
						
							2024-05-17 14:30:09 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								981d668be6 
								
							 
						 
						
							
							
								
								refactor baichuan2-7b ( #11062 )  
							
							 
							
							
							
						 
						
							2024-05-17 13:01:34 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Shaojun Liu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								84239d0bd3 
								
							 
						 
						
							
							
								
								Update docker image tags in Docker Quickstart ( #11061 )  
							
							 
							
							... 
							
							
							
							* update docker image tag to latest
* add note
* simplify note
* add link in reStructuredText
* minor fix
* update tag 
							
						 
						
							2024-05-17 11:06:11 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								b3027e2d60 
								
							 
						 
						
							
							
								
								Update for cpu install option in performance tests ( #11060 )  
							
							 
							
							
							
						 
						
							2024-05-17 10:33:43 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xiangyu Tian 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								d963e95363 
								
							 
						 
						
							
							
								
								LLM: Modify CPU Installation Command for documentation ( #11042 )  
							
							 
							
							... 
							
							
							
							* init
* refine
* refine
* refine
* refine comments 
							
						 
						
							2024-05-17 10:14:00 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								fff067d240 
								
							 
						 
						
							
							
								
								Make install ut for cpu exactly the same as what we want for users ( #11051 )  
							
							 
							
							
							
						 
						
							2024-05-17 10:11:01 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								3a72e5df8c 
								
							 
						 
						
							
							
								
								disable mlp fusion of fp6 on mtl ( #11059 )  
							
							 
							
							
							
						 
						
							2024-05-17 10:10:16 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									SONG Ge 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								192ae35012 
								
							 
						 
						
							
							
								
								Add support for llama2 quantize_kv with transformers 4.38.0 ( #11054 )  
							
							 
							
							... 
							
							
							
							* add support for llama2 quantize_kv with transformers 4.38.0
* fix code style
* fix code style 
							
						 
						
							2024-05-16 22:23:39 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									SONG Ge 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								16b2a418be 
								
							 
						 
						
							
							
								
								hotfix native_sdp ut ( #11046 )  
							
							 
							
							... 
							
							
							
							* hotfix native_sdp
* update 
							
						 
						
							2024-05-16 17:15:37 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xin Qiu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								6be70283b7 
								
							 
						 
						
							
							
								
								fix chatglm run error ( #11045 )  
							
							 
							
							... 
							
							
							
							* fix chatglm
* update
* fix style 
							
						 
						
							2024-05-16 15:39:18 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								8cae897643 
								
							 
						 
						
							
							
								
								use new rope in phi3 ( #11047 )  
							
							 
							
							
							
						 
						
							2024-05-16 15:12:35 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Wang, Jian4 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								00d4410746 
								
							 
						 
						
							
							
								
								Update cpp docker quickstart ( #11040 )  
							
							 
							
							... 
							
							
							
							* add sample output
* update link
* update
* update header
* update 
							
						 
						
							2024-05-16 14:55:13 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Shaojun Liu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								c62e828281 
								
							 
						 
						
							
							
								
								Create release-ipex-llm.yaml ( #11039 )  
							
							 
							
							
							
						 
						
							2024-05-16 11:10:10 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Qiyuan Gong 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								4638682140 
								
							 
						 
						
							
							
								
								Fix xpu finetune image path in action ( #11037 )  
							
							 
							
							... 
							
							
							
							* Fix xpu finetune image path in action 
							
						 
						
							2024-05-16 10:48:02 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jin Qiao 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								9a96af4232 
								
							 
						 
						
							
							
								
								Remove oneAPI pip install command in related examples ( #11030 )  
							
							 
							
							... 
							
							
							
							* Remove pip install command in windows installation guide
* fix chatglm3 installation guide
* Fix gemma cpu example
* Apply on other examples
* fix 
							
						 
						
							2024-05-16 10:46:29 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xiangyu Tian 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								612a365479 
								
							 
						 
						
							
							
								
								LLM: Install CPU version torch with extras [all] ( #10868 )  
							
							 
							
							... 
							
							
							
							Modify setup.py to install CPU version torch with extras [all] 
							
						 
						
							2024-05-16 10:39:55 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								59df750326 
								
							 
						 
						
							
							
								
								Use new sdp again ( #11025 )  
							
							 
							
							
							
						 
						
							2024-05-16 09:33:34 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Guancheng Fu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								7e29928865 
								
							 
						 
						
							
							
								
								refactor serving docker image ( #11028 )  
							
							 
							
							
							
						 
						
							2024-05-16 09:30:36 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									SONG Ge 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								9942a4ba69 
								
							 
						 
						
							
							
								
								[WIP] Support llama2 with transformers==4.38.0 ( #11024 )  
							
							 
							
							... 
							
							
							
							* support llama2 with transformers==4.38.0
* add supprot for quantize_qkv
* add original support for 4.38.0 now
* code style fix 
							
						 
						
							2024-05-15 18:07:00 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yina Chen 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								686f6038a8 
								
							 
						 
						
							
							
								
								Support fp6 save & load ( #11034 )  
							
							 
							
							
							
						 
						
							2024-05-15 17:52:02 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								ac384e0f45 
								
							 
						 
						
							
							
								
								add fp6 mlp fusion ( #11032 )  
							
							 
							
							... 
							
							
							
							* add fp6 fusion
* add qkv fusion for fp6
* remove qkv first 
							
						 
						
							2024-05-15 17:42:50 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Wang, Jian4 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								2084ebe4ee 
								
							 
						 
						
							
							
								
								Enable fastchat benchmark latency ( #11017 )  
							
							 
							
							... 
							
							
							
							* enable fastchat benchmark
* add readme
* update readme
* update 
							
						 
						
							2024-05-15 14:52:09 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									hxsz1997 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								93d40ab127 
								
							 
						 
						
							
							
								
								Update lookahead strategy ( #11021 )  
							
							 
							
							... 
							
							
							
							* update lookahead strategy
* remove lines
* fix python style check 
							
						 
						
							2024-05-15 14:48:05 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								1d73fc8106 
								
							 
						 
						
							
							
								
								update cpp quickstart ( #11031 )  
							
							 
							
							
							
						 
						
							2024-05-15 14:33:36 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Wang, Jian4 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								d9f71f1f53 
								
							 
						 
						
							
							
								
								Update benchmark util for example using ( #11027 )  
							
							 
							
							... 
							
							
							
							* mv benchmark_util.py to utils/
* remove
* update 
							
						 
						
							2024-05-15 14:16:35 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Wang, Jian4 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								86cec80b51 
								
							 
						 
						
							
							
								
								LLM: Add llm inference_cpp_xpu_docker ( #10933 )  
							
							 
							
							... 
							
							
							
							* test_cpp_docker
* update
* update
* update
* update
* add sudo
* update nodejs version
* no need npm
* remove blinker
* new cpp docker
* restore
* add line
* add manually_build
* update and add mtl
* update for workdir llm
* add benchmark part
* update readme
* update 1024-128
* update readme
* update
* fix
* update
* update
* update readme too
* update readme
* no change
* update dir_name
* update readme 
							
						 
						
							2024-05-15 11:10:22 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								4053a6ef94 
								
							 
						 
						
							
							
								
								Update environment variable setting in AutoTP with arc ( #11018 )  
							
							 
							
							
							
						 
						
							2024-05-15 10:23:58 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								fad1dbaf60 
								
							 
						 
						
							
							
								
								use sdp fp8 causal kernel ( #11023 )  
							
							 
							
							
							
						 
						
							2024-05-15 10:22:35 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								c34f85e7d0 
								
							 
						 
						
							
							
								
								[Doc] Simplify installation on Windows for Intel GPU ( #11004 )  
							
							 
							
							... 
							
							
							
							* Simplify GPU installation guide regarding windows Prerequisites
* Update Windows install quickstart on Intel GPU
* Update for llama.cpp quickstart
* Update regarding minimum driver version
* Small fix
* Update based on comments
* Small fix 
							
						 
						
							2024-05-15 09:55:41 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Qiyuan Gong 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								1e00bd7bbe 
								
							 
						 
						
							
							
								
								Re-org XPU finetune images ( #10971 )  
							
							 
							
							... 
							
							
							
							* Rename xpu finetune image from `ipex-llm-finetune-qlora-xpu` to `ipex-llm-finetune-xpu`.
* Add axolotl to xpu finetune image.
* Upgrade peft to 0.10.0, transformers to 4.36.0.
* Add accelerate default config to home. 
							
						 
						
							2024-05-15 09:42:43 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								ee325e9cc9 
								
							 
						 
						
							
							
								
								fix phi3 ( #11022 )  
							
							 
							
							
							
						 
						
							2024-05-15 09:32:12 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ziteng Zhang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								7d3791c819 
								
							 
						 
						
							
							
								
								[LLM] Add llama3 alpaca qlora example ( #11011 )  
							
							 
							
							... 
							
							
							
							* Add llama3 finetune example based on alpaca qlora example 
							
						 
						
							2024-05-15 09:17:32 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zhao Changmin 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								0a732bebe7 
								
							 
						 
						
							
							
								
								Add phi3 cached RotaryEmbedding ( #11013 )  
							
							 
							
							... 
							
							
							
							* phi3cachedrotaryembed
* pep8 
							
						 
						
							2024-05-15 08:16:43 +08:00