Cengguang Zhang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								011b9faa5c 
								
							 
						 
						
							
							
								
								LLM: unify baichuan2-13b alibi mask dtype with model dtype. ( #11107 )  
							
							 
							
							... 
							
							
							
							* LLM: unify alibi mask dtype.
* fix comments. 
							
						 
						
							2024-05-24 10:27:53 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jiao Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								0a06a6e1d4 
								
							 
						 
						
							
							
								
								Update tests for transformers 4.36 ( #10858 )  
							
							 
							
							... 
							
							
							
							* update unit test
* update
* update
* update
* update
* update
* fix gpu attention test
* update
* update
* update
* update
* update
* update
* update example test
* replace replit code
* update
* update
* update
* update
* set safe_serialization false
* perf test
* update
* update
* update
* update
* update
* update
* update
* update
* update
* update
* update
* update
* update
* update
* update
* update
* update
* update
* update
* update
* update
* update
* update
* update
* update
* update
* update
* delete
* update
* update
* update
* update
* update
* update
* revert
* update 
							
						 
						
							2024-05-24 10:26:38 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xiangyu Tian 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								b3f6faa038 
								
							 
						 
						
							
							
								
								LLM: Add CPU vLLM entrypoint ( #11083 )  
							
							 
							
							... 
							
							
							
							Add CPU vLLM entrypoint and update CPU vLLM serving example. 
							
						 
						
							2024-05-24 09:16:59 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								797dbc48b8 
								
							 
						 
						
							
							
								
								fix phi-2 and phi-3 convert ( #11116 )  
							
							 
							
							
							
						 
						
							2024-05-23 17:37:37 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								37b98a531f 
								
							 
						 
						
							
							
								
								support running internlm xcomposer2 on gpu and add sdp optimization ( #11115 )  
							
							 
							
							
							
						 
						
							2024-05-23 17:26:24 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zhao Changmin 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								c5e8b90c8d 
								
							 
						 
						
							
							
								
								Add Qwen register attention implemention ( #11110 )  
							
							 
							
							... 
							
							
							
							* qwen_register 
							
						 
						
							2024-05-23 17:17:45 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								0e53f20edb 
								
							 
						 
						
							
							
								
								support running internlm-xcomposer2 on cpu ( #11111 )  
							
							 
							
							
							
						 
						
							2024-05-23 16:36:09 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								d36b41d59e 
								
							 
						 
						
							
							
								
								Add setuptools limitation for ipex-llm[xpu] ( #11102 )  
							
							 
							
							... 
							
							
							
							* Add setuptool limitation for ipex-llm[xpu]
* llamaindex option update 
							
						 
						
							2024-05-22 18:20:30 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								cd4dff09ee 
								
							 
						 
						
							
							
								
								support phi-3 vision ( #11101 )  
							
							 
							
							
							
						 
						
							2024-05-22 17:43:50 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zhao Changmin 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								15d906a97b 
								
							 
						 
						
							
							
								
								Update linux igpu run script ( #11098 )  
							
							 
							
							... 
							
							
							
							* update run script 
							
						 
						
							2024-05-22 17:18:07 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Kai Huang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								f63172ef63 
								
							 
						 
						
							
							
								
								Align ppl with llama.cpp ( #11055 )  
							
							 
							
							... 
							
							
							
							* update script
* remove
* add header
* update readme 
							
						 
						
							2024-05-22 16:43:11 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Qiyuan Gong 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								f6c9ffe4dc 
								
							 
						 
						
							
							
								
								Add WANDB_MODE and HF_HUB_OFFLINE to XPU finetune README ( #11097 )  
							
							 
							
							... 
							
							
							
							* Add WANDB_MODE=offline to avoid multi-GPUs finetune errors.
* Add HF_HUB_OFFLINE=1 to avoid Hugging Face related errors. 
							
						 
						
							2024-05-22 15:20:53 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Shaojun Liu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								584439e498 
								
							 
						 
						
							
							
								
								update homepage url for ipex-llm ( #11094 )  
							
							 
							
							... 
							
							
							
							* update homepage url
* Update python version to 3.11
* Update long description 
							
						 
						
							2024-05-22 11:10:44 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xin Qiu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								71bcd18f44 
								
							 
						 
						
							
							
								
								fix qwen vl ( #11090 )  
							
							 
							
							
							
						 
						
							2024-05-21 18:40:29 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								f00625f9a4 
								
							 
						 
						
							
							
								
								refactor qwen2 ( #11087 )  
							
							 
							
							
							
						 
						
							2024-05-21 16:53:42 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Qiyuan Gong 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								492ed3fd41 
								
							 
						 
						
							
							
								
								Add verified models to GPU finetune README ( #11088 )  
							
							 
							
							... 
							
							
							
							* Add verified models to GPU finetune README 
							
						 
						
							2024-05-21 15:49:15 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Qiyuan Gong 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								1210491748 
								
							 
						 
						
							
							
								
								ChatGLM3, Baichuan2 and Qwen1.5 QLoRA example ( #11078 )  
							
							 
							
							... 
							
							
							
							* Add chatglm3, qwen15-7b and baichuan-7b QLoRA alpaca example
* Remove unnecessary tokenization setting. 
							
						 
						
							2024-05-21 15:29:43 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									ZehuaCao 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								842d6dfc2d 
								
							 
						 
						
							
							
								
								Further Modify CPU example ( #11081 )  
							
							 
							
							... 
							
							
							
							* modify CPU example
* update 
							
						 
						
							2024-05-21 13:55:47 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								d830a63bb7 
								
							 
						 
						
							
							
								
								refactor qwen ( #11074 )  
							
							 
							
							
							
						 
						
							2024-05-20 18:08:37 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Wang, Jian4 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								74950a152a 
								
							 
						 
						
							
							
								
								Fix tgi_api_server error file name ( #11075 )  
							
							 
							
							
							
						 
						
							2024-05-20 16:48:40 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								4e97047d70 
								
							 
						 
						
							
							
								
								fix baichuan2 13b fp16 ( #11071 )  
							
							 
							
							
							
						 
						
							2024-05-20 11:21:20 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								7170dd9192 
								
							 
						 
						
							
							
								
								Update guide for running qwen with AutoTP ( #11065 )  
							
							 
							
							
							
						 
						
							2024-05-20 10:53:17 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Wang, Jian4 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								a2e1578fd9 
								
							 
						 
						
							
							
								
								Merge tgi_api_server to main ( #11036 )  
							
							 
							
							... 
							
							
							
							* init
* fix style
* speculative can not use benchmark
* add tgi server readme 
							
						 
						
							2024-05-20 09:15:03 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								31ce3e0c13 
								
							 
						 
						
							
							
								
								refactor baichuan2-13b ( #11064 )  
							
							 
							
							
							
						 
						
							2024-05-17 16:25:30 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									ZehuaCao 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								56cb992497 
								
							 
						 
						
							
							
								
								LLM: Modify CPU Installation Command for most examples ( #11049 )  
							
							 
							
							... 
							
							
							
							* init
* refine
* refine
* refine
* modify hf-agent example
* modify all CPU model example
* remove readthedoc modify
* replace powershell with cmd
* fix repo
* fix repo
* update
* remove comment on windows code block
* update
* update
* update
* update
---------
Co-authored-by: xiangyuT <xiangyu.tian@intel.com> 
							
						 
						
							2024-05-17 15:52:20 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								f1156e6b20 
								
							 
						 
						
							
							
								
								support gguf_q4k_m / gguf_q4k_s ( #10887 )  
							
							 
							
							... 
							
							
							
							* initial commit
* UPDATE
* fix style
* fix style
* add gguf_q4k_s
* update comment
* fix 
							
						 
						
							2024-05-17 14:30:09 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								981d668be6 
								
							 
						 
						
							
							
								
								refactor baichuan2-7b ( #11062 )  
							
							 
							
							
							
						 
						
							2024-05-17 13:01:34 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xiangyu Tian 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								d963e95363 
								
							 
						 
						
							
							
								
								LLM: Modify CPU Installation Command for documentation ( #11042 )  
							
							 
							
							... 
							
							
							
							* init
* refine
* refine
* refine
* refine comments 
							
						 
						
							2024-05-17 10:14:00 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								3a72e5df8c 
								
							 
						 
						
							
							
								
								disable mlp fusion of fp6 on mtl ( #11059 )  
							
							 
							
							
							
						 
						
							2024-05-17 10:10:16 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									SONG Ge 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								192ae35012 
								
							 
						 
						
							
							
								
								Add support for llama2 quantize_kv with transformers 4.38.0 ( #11054 )  
							
							 
							
							... 
							
							
							
							* add support for llama2 quantize_kv with transformers 4.38.0
* fix code style
* fix code style 
							
						 
						
							2024-05-16 22:23:39 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									SONG Ge 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								16b2a418be 
								
							 
						 
						
							
							
								
								hotfix native_sdp ut ( #11046 )  
							
							 
							
							... 
							
							
							
							* hotfix native_sdp
* update 
							
						 
						
							2024-05-16 17:15:37 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xin Qiu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								6be70283b7 
								
							 
						 
						
							
							
								
								fix chatglm run error ( #11045 )  
							
							 
							
							... 
							
							
							
							* fix chatglm
* update
* fix style 
							
						 
						
							2024-05-16 15:39:18 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								8cae897643 
								
							 
						 
						
							
							
								
								use new rope in phi3 ( #11047 )  
							
							 
							
							
							
						 
						
							2024-05-16 15:12:35 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jin Qiao 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								9a96af4232 
								
							 
						 
						
							
							
								
								Remove oneAPI pip install command in related examples ( #11030 )  
							
							 
							
							... 
							
							
							
							* Remove pip install command in windows installation guide
* fix chatglm3 installation guide
* Fix gemma cpu example
* Apply on other examples
* fix 
							
						 
						
							2024-05-16 10:46:29 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xiangyu Tian 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								612a365479 
								
							 
						 
						
							
							
								
								LLM: Install CPU version torch with extras [all] ( #10868 )  
							
							 
							
							... 
							
							
							
							Modify setup.py to install CPU version torch with extras [all] 
							
						 
						
							2024-05-16 10:39:55 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								59df750326 
								
							 
						 
						
							
							
								
								Use new sdp again ( #11025 )  
							
							 
							
							
							
						 
						
							2024-05-16 09:33:34 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									SONG Ge 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								9942a4ba69 
								
							 
						 
						
							
							
								
								[WIP] Support llama2 with transformers==4.38.0 ( #11024 )  
							
							 
							
							... 
							
							
							
							* support llama2 with transformers==4.38.0
* add supprot for quantize_qkv
* add original support for 4.38.0 now
* code style fix 
							
						 
						
							2024-05-15 18:07:00 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yina Chen 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								686f6038a8 
								
							 
						 
						
							
							
								
								Support fp6 save & load ( #11034 )  
							
							 
							
							
							
						 
						
							2024-05-15 17:52:02 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								ac384e0f45 
								
							 
						 
						
							
							
								
								add fp6 mlp fusion ( #11032 )  
							
							 
							
							... 
							
							
							
							* add fp6 fusion
* add qkv fusion for fp6
* remove qkv first 
							
						 
						
							2024-05-15 17:42:50 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Wang, Jian4 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								2084ebe4ee 
								
							 
						 
						
							
							
								
								Enable fastchat benchmark latency ( #11017 )  
							
							 
							
							... 
							
							
							
							* enable fastchat benchmark
* add readme
* update readme
* update 
							
						 
						
							2024-05-15 14:52:09 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									hxsz1997 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								93d40ab127 
								
							 
						 
						
							
							
								
								Update lookahead strategy ( #11021 )  
							
							 
							
							... 
							
							
							
							* update lookahead strategy
* remove lines
* fix python style check 
							
						 
						
							2024-05-15 14:48:05 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Wang, Jian4 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								d9f71f1f53 
								
							 
						 
						
							
							
								
								Update benchmark util for example using ( #11027 )  
							
							 
							
							... 
							
							
							
							* mv benchmark_util.py to utils/
* remove
* update 
							
						 
						
							2024-05-15 14:16:35 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								4053a6ef94 
								
							 
						 
						
							
							
								
								Update environment variable setting in AutoTP with arc ( #11018 )  
							
							 
							
							
							
						 
						
							2024-05-15 10:23:58 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								fad1dbaf60 
								
							 
						 
						
							
							
								
								use sdp fp8 causal kernel ( #11023 )  
							
							 
							
							
							
						 
						
							2024-05-15 10:22:35 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								ee325e9cc9 
								
							 
						 
						
							
							
								
								fix phi3 ( #11022 )  
							
							 
							
							
							
						 
						
							2024-05-15 09:32:12 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ziteng Zhang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								7d3791c819 
								
							 
						 
						
							
							
								
								[LLM] Add llama3 alpaca qlora example ( #11011 )  
							
							 
							
							... 
							
							
							
							* Add llama3 finetune example based on alpaca qlora example 
							
						 
						
							2024-05-15 09:17:32 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zhao Changmin 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								0a732bebe7 
								
							 
						 
						
							
							
								
								Add phi3 cached RotaryEmbedding ( #11013 )  
							
							 
							
							... 
							
							
							
							* phi3cachedrotaryembed
* pep8 
							
						 
						
							2024-05-15 08:16:43 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yina Chen 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								893197434d 
								
							 
						 
						
							
							
								
								Add fp6 support on gpu ( #11008 )  
							
							 
							
							... 
							
							
							
							* add fp6 support
* fix style 
							
						 
						
							2024-05-14 16:31:44 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zhao Changmin 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								b03c859278 
								
							 
						 
						
							
							
								
								Add phi3RMS ( #10988 )  
							
							 
							
							... 
							
							
							
							* phi3RMS 
							
						 
						
							2024-05-14 15:16:27 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								170e3d65e0 
								
							 
						 
						
							
							
								
								use new sdp and fp32 sdp ( #11007 )  
							
							 
							
							
							
						 
						
							2024-05-14 14:29:18 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Qiyuan Gong 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								c957ea3831 
								
							 
						 
						
							
							
								
								Add axolotl main support and axolotl Llama-3-8B QLoRA example  ( #10984 )  
							
							 
							
							... 
							
							
							
							* Support axolotl main (796a085).
* Add axolotl Llama-3-8B QLoRA example.
* Change `sequence_len` to 256 for alpaca, and revert `lora_r` value.
* Add example to quick_start. 
							
						 
						
							2024-05-14 13:43:59 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								fb656fbf74 
								
							 
						 
						
							
							
								
								Add requirements for oneAPI pypi packages for windows Intel GPU users ( #11009 )  
							
							 
							
							
							
						 
						
							2024-05-14 13:40:54 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Shaojun Liu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								7f8c5b410b 
								
							 
						 
						
							
							
								
								Quickstart: Run PyTorch Inference on Intel GPU using Docker (on Linux or WSL) ( #10970 )  
							
							 
							
							... 
							
							
							
							* add entrypoint.sh
* add quickstart
* remove entrypoint
* update
* Install related library of benchmarking
* update
* print out results
* update docs
* minor update
* update
* update quickstart
* update
* update
* update
* update
* update
* update
* add chat & example section
* add more details
* minor update
* rename quickstart
* update
* minor update
* update
* update config.yaml
* update readme
* use --gpu
* add tips
* minor update
* update 
							
						 
						
							2024-05-14 12:58:31 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Guancheng Fu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								a465111cf4 
								
							 
						 
						
							
							
								
								Update README.md ( #11003 )  
							
							 
							
							
							
						 
						
							2024-05-13 16:44:48 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Guancheng Fu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								74997a3ed1 
								
							 
						 
						
							
							
								
								Adding load_low_bit interface for ipex_llm_worker ( #11000 )  
							
							 
							
							... 
							
							
							
							* initial implementation, need tests
* fix
* fix baichuan issue
* fix typo 
							
						 
						
							2024-05-13 15:30:19 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								1b3c7a6928 
								
							 
						 
						
							
							
								
								remove phi3 empty cache ( #10997 )  
							
							 
							
							
							
						 
						
							2024-05-13 14:09:55 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									ZehuaCao 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								99255fe36e 
								
							 
						 
						
							
							
								
								fix ppl ( #10996 )  
							
							 
							
							
							
						 
						
							2024-05-13 13:57:19 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Kai Huang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								f8dd2e52ad 
								
							 
						 
						
							
							
								
								Fix Langchain upstream ut ( #10985 )  
							
							 
							
							... 
							
							
							
							* Fix Langchain upstream ut
* Small fix
* Install bigdl-llm
* Update run-langchain-upstream-tests.sh
* Update run-langchain-upstream-tests.sh
* Update llm_unit_tests.yml
* Update run-langchain-upstream-tests.sh
* Update llm_unit_tests.yml
* Update run-langchain-upstream-tests.sh
* fix git checkout
* fix
---------
Co-authored-by: Zhangky11 <2321096202@qq.com>
Co-authored-by: Keyan (Kyrie) Zhang <79576162+Zhangky11@users.noreply.github.com> 
							
						 
						
							2024-05-11 14:40:37 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								9f6358e4c2 
								
							 
						 
						
							
							
								
								Deprecate support for pytorch 2.0 on Linux for ipex-llm >= 2.1.0b20240511 ( #10986 )  
							
							 
							
							... 
							
							
							
							* Remove xpu_2.0 option in setup.py
* Disable xpu_2.0 test in UT and nightly
* Update docs for deprecated pytorch 2.0
* Small doc update 
							
						 
						
							2024-05-11 12:33:35 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								ad96f32ce0 
								
							 
						 
						
							
							
								
								optimize phi3 1st token performance ( #10981 )  
							
							 
							
							
							
						 
						
							2024-05-10 17:33:46 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Cengguang Zhang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								cfed76b2ed 
								
							 
						 
						
							
							
								
								LLM: add long-context support for Qwen1.5-7B/Baichuan2-7B/Mistral-7B. ( #10937 )  
							
							 
							
							... 
							
							
							
							* LLM: add split tensor support for baichuan2-7b and qwen1.5-7b.
* fix style.
* fix style.
* fix style.
* add support for mistral and fix condition threshold.
* fix  style.
* fix comments. 
							
						 
						
							2024-05-10 16:40:15 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								f9615f12d1 
								
							 
						 
						
							
							
								
								Add driver related packages version check in env script ( #10977 )  
							
							 
							
							
							
						 
						
							2024-05-10 15:02:58 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Kai Huang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								a6342cc068 
								
							 
						 
						
							
							
								
								Empty cache after phi first attention to support 4k input ( #10972 )  
							
							 
							
							... 
							
							
							
							* empty cache
* fix style 
							
						 
						
							2024-05-09 19:50:04 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								e753125880 
								
							 
						 
						
							
							
								
								use fp16_sdp when head_dim=96 ( #10976 )  
							
							 
							
							
							
						 
						
							2024-05-09 17:02:59 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								697ca79eca 
								
							 
						 
						
							
							
								
								use quantize kv and sdp in phi3-mini ( #10973 )  
							
							 
							
							
							
						 
						
							2024-05-09 15:16:18 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Wang, Jian4 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								f4c615b1ee 
								
							 
						 
						
							
							
								
								Add cohere example ( #10954 )  
							
							 
							
							... 
							
							
							
							* add link first
* add_cpu_example
* add GPU example 
							
						 
						
							2024-05-08 17:19:59 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Wang, Jian4 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								3209d6b057 
								
							 
						 
						
							
							
								
								Fix spculative llama3 no stop error ( #10963 )  
							
							 
							
							... 
							
							
							
							* fix normal
* add eos_tokens_id on sp and add list if
* update
* no none 
							
						 
						
							2024-05-08 17:09:47 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xiangyu Tian 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								02870dc385 
								
							 
						 
						
							
							
								
								LLM: Refine README of AutoTP-FastAPI example ( #10960 )  
							
							 
							
							
							
						 
						
							2024-05-08 16:55:23 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								2ebec0395c 
								
							 
						 
						
							
							
								
								optimize phi-3-mini-128 ( #10959 )  
							
							 
							
							
							
						 
						
							2024-05-08 16:33:17 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xin Qiu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								dfa3147278 
								
							 
						 
						
							
							
								
								update ( #10944 )  
							
							 
							
							
							
						 
						
							2024-05-08 14:28:05 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xin Qiu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								5973d6c753 
								
							 
						 
						
							
							
								
								make gemma's output better ( #10943 )  
							
							 
							
							
							
						 
						
							2024-05-08 14:27:51 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jin Qiao 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								15ee3fd542 
								
							 
						 
						
							
							
								
								Update igpu perf internlm ( #10958 )  
							
							 
							
							
							
						 
						
							2024-05-08 14:16:43 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zhao Changmin 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								0d6e12036f 
								
							 
						 
						
							
							
								
								Disable fast_init_ in load_low_bit ( #10945 )  
							
							 
							
							... 
							
							
							
							* fast_init_ disable 
							
						 
						
							2024-05-08 10:46:19 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Qiyuan Gong 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								164e6957af 
								
							 
						 
						
							
							
								
								Refine axolotl quickstart ( #10957 )  
							
							 
							
							... 
							
							
							
							* Add default accelerate config for axolotl quickstart.
* Fix requirement link.
* Upgrade peft to 0.10.0 in requirement. 
							
						 
						
							2024-05-08 09:34:02 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								c801c37bc6 
								
							 
						 
						
							
							
								
								optimize phi3 again: use quantize kv if possible ( #10953 )  
							
							 
							
							
							
						 
						
							2024-05-07 17:26:19 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								aa2fa9fde1 
								
							 
						 
						
							
							
								
								optimize phi3 again: use sdp if possible ( #10951 )  
							
							 
							
							
							
						 
						
							2024-05-07 15:53:08 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Qiyuan Gong 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								c11170b96f 
								
							 
						 
						
							
							
								
								Upgrade Peft to 0.10.0 in finetune examples and docker ( #10930 )  
							
							 
							
							... 
							
							
							
							* Upgrade Peft to 0.10.0 in finetune examples.
* Upgrade Peft to 0.10.0 in docker. 
							
						 
						
							2024-05-07 15:12:26 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Qiyuan Gong 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								d7ca5d935b 
								
							 
						 
						
							
							
								
								Upgrade Peft version to 0.10.0 for LLM finetune ( #10886 )  
							
							 
							
							... 
							
							
							
							* Upgrade Peft version to 0.10.0
* Upgrade Peft version in ARC unit test and HF-Peft example. 
							
						 
						
							2024-05-07 15:09:14 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								0efe26c3b6 
								
							 
						 
						
							
							
								
								Change order of chatglm2-6b and chatglm3-6b in iGPU perf test for more stable performance ( #10948 )  
							
							 
							
							
							
						 
						
							2024-05-07 13:48:39 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									hxsz1997 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								245c7348bc 
								
							 
						 
						
							
							
								
								Add codegemma example ( #10884 )  
							
							 
							
							... 
							
							
							
							* add codegemma example in GPU/HF-Transformers-AutoModels/
* add README of codegemma example in GPU/HF-Transformers-AutoModels/
* add codegemma example in GPU/PyTorch-Models/
* add readme of codegemma example in GPU/PyTorch-Models/
* add codegemma example in CPU/HF-Transformers-AutoModels/
* add readme of codegemma example in CPU/HF-Transformers-AutoModels/
* add codegemma example in CPU/PyTorch-Models/
* add readme of codegemma example in CPU/PyTorch-Models/
* fix typos
* fix filename typo
* add codegemma in tables
* add comments of lm_head
* remove comments of use_cache 
							
						 
						
							2024-05-07 13:35:42 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Shaojun Liu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								08ad40b251 
								
							 
						 
						
							
							
								
								improve ipex-llm-init for Linux ( #10928 )  
							
							 
							
							... 
							
							
							
							* refine ipex-llm-init
* install libtcmalloc.so for Max
* update based on comment
* remove unneeded code 
							
						 
						
							2024-05-07 12:55:14 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Wang, Jian4 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								191b184341 
								
							 
						 
						
							
							
								
								LLM: Optimize cohere model ( #10878 )  
							
							 
							
							... 
							
							
							
							* use mlp and rms
* optimize kv_cache
* add fuse qkv
* add flash attention and fp16 sdp
* error fp8 sdp
* fix optimized
* fix style
* update
* add for pp 
							
						 
						
							2024-05-07 10:19:50 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xiangyu Tian 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								13a44cdacb 
								
							 
						 
						
							
							
								
								LLM: Refine Deepspped-AutoTP-FastAPI example ( #10916 )  
							
							 
							
							
							
						 
						
							2024-05-07 09:37:31 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Wang, Jian4 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								1de878bee1 
								
							 
						 
						
							
							
								
								LLM: Fix speculative llama3 long input error ( #10934 )  
							
							 
							
							
							
						 
						
							2024-05-07 09:25:20 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Guancheng Fu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								49ab5a2b0e 
								
							 
						 
						
							
							
								
								Add embeddings ( #10931 )  
							
							 
							
							
							
						 
						
							2024-05-07 09:07:02 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Wang, Jian4 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								0e0bd309e2 
								
							 
						 
						
							
							
								
								LLM: Enable Speculative on Fastchat ( #10909 )  
							
							 
							
							... 
							
							
							
							* init
* enable streamer
* update
* update
* remove deprecated
* update
* update
* add gpu example 
							
						 
						
							2024-05-06 10:06:20 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Cengguang Zhang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								0edef1f94c 
								
							 
						 
						
							
							
								
								LLM: add min_new_tokens to all in one benchmark. ( #10911 )  
							
							 
							
							
							
						 
						
							2024-05-06 09:32:59 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Cengguang Zhang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								75dbf240ec 
								
							 
						 
						
							
							
								
								LLM: update split tensor conditions. ( #10872 )  
							
							 
							
							... 
							
							
							
							* LLM: update split tensor condition.
* add cond for split tensor.
* update priority of env.
* fix style.
* update env name. 
							
						 
						
							2024-04-30 17:07:21 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Guancheng Fu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								2c64754eb0 
								
							 
						 
						
							
							
								
								Add vLLM to ipex-llm serving image ( #10807 )  
							
							 
							
							... 
							
							
							
							* add vllm
* done
* doc work
* fix done
* temp
* add docs
* format
* add start-fastchat-service.sh
* fix 
							
						 
						
							2024-04-29 17:25:42 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jin Qiao 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								1f876fd837 
								
							 
						 
						
							
							
								
								Add example for phi-3 ( #10881 )  
							
							 
							
							... 
							
							
							
							* Add example for phi-3
* add in readme and index
* fix
* fix
* fix
* fix indent
* fix 
							
						 
						
							2024-04-29 16:43:55 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								d884c62dc4 
								
							 
						 
						
							
							
								
								remove new_layout parameter ( #10906 )  
							
							 
							
							
							
						 
						
							2024-04-29 10:31:50 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Guancheng Fu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								fbcd7bc737 
								
							 
						 
						
							
							
								
								Fix Loader issue with dtype fp16 ( #10907 )  
							
							 
							
							
							
						 
						
							2024-04-29 10:16:02 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Guancheng Fu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								c9fac8c26b 
								
							 
						 
						
							
							
								
								Fix sdp logic ( #10896 )  
							
							 
							
							... 
							
							
							
							* fix
* fix 
							
						 
						
							2024-04-28 22:02:14 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yina Chen 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								015d07a58f 
								
							 
						 
						
							
							
								
								Fix lookahead sample error & add update strategy ( #10894 )  
							
							 
							
							... 
							
							
							
							* Fix sample error & add update strategy
* add mtl config
* fix style
* remove print 
							
						 
						
							2024-04-28 17:21:00 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								1a8a93d5e0 
								
							 
						 
						
							
							
								
								Further fix nightly perf ( #10901 )  
							
							 
							
							
							
						 
						
							2024-04-28 10:18:58 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								ddfdaec137 
								
							 
						 
						
							
							
								
								Fix nightly perf ( #10899 )  
							
							 
							
							... 
							
							
							
							* Fix nightly perf by adding default value in benchmark for use_fp16_torch_dtype
* further fixes 
							
						 
						
							2024-04-28 09:39:29 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Cengguang Zhang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								9752ffe979 
								
							 
						 
						
							
							
								
								LLM: update split qkv native sdp. ( #10895 )  
							
							 
							
							... 
							
							
							
							* LLM: update split qkv native sdp.
* fix typo. 
							
						 
						
							2024-04-26 18:47:35 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Guancheng Fu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								990535b1cf 
								
							 
						 
						
							
							
								
								Add tensor parallel for vLLM ( #10879 )  
							
							 
							
							... 
							
							
							
							* initial
* test initial tp
* initial sup
* fix format
* fix
* fix 
							
						 
						
							2024-04-26 17:10:49 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								f51bf018eb 
								
							 
						 
						
							
							
								
								Add benchmark script for pipeline parallel inference ( #10873 )  
							
							 
							
							
							
						 
						
							2024-04-26 15:28:11 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								46ba962168 
								
							 
						 
						
							
							
								
								use new quantize kv ( #10888 )  
							
							 
							
							
							
						 
						
							2024-04-26 14:42:17 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xiangyu Tian 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								3d4950b0f0 
								
							 
						 
						
							
							
								
								LLM: Enable batch generate (world_size>1) in Deepspeed-AutoTP-FastAPI example ( #10876 )  
							
							 
							
							... 
							
							
							
							Enable batch generate (world_size>1) in Deepspeed-AutoTP-FastAPI example. 
							
						 
						
							2024-04-26 13:24:28 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Wang, Jian4 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								3e8ed54270 
								
							 
						 
						
							
							
								
								LLM: Fix bigdl_ipex_int8 warning ( #10890 )  
							
							 
							
							
							
						 
						
							2024-04-26 11:18:44 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jin Qiao 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								fb3c268d13 
								
							 
						 
						
							
							
								
								Add phi-3 to perf ( #10883 )  
							
							 
							
							
							
						 
						
							2024-04-25 20:21:56 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yina Chen 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								8811f268ff 
								
							 
						 
						
							
							
								
								Use new fp16 sdp in Qwen and modify the constraint ( #10882 )  
							
							 
							
							
							
						 
						
							2024-04-25 19:23:37 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuxuan Xia 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								0213c1c1da 
								
							 
						 
						
							
							
								
								Add phi3 to the nightly test ( #10885 )  
							
							 
							
							... 
							
							
							
							* Add llama3 and phi2 nightly test
* Change llama3-8b to llama3-8b-instruct
* Add phi3 to nightly test
* Add phi3 to nightly test
---------
Co-authored-by: Yishuo Wang <yishuo.wang@intel.com> 
							
						 
						
							2024-04-25 17:39:12 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuxuan Xia 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								ca2479be87 
								
							 
						 
						
							
							
								
								Update scripts readme ( #10725 )  
							
							 
							
							... 
							
							
							
							* Update scripts readme
* Update scripts readme
* Update README
* Update readme
* Update readme
* Update windows env check readme
* Adjust env check readme
* Update windows env check
* Update env check readme
* Adjust the env-check README
* Modify the env-check README 
							
						 
						
							2024-04-25 17:24:37 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Cengguang Zhang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								cd369c2715 
								
							 
						 
						
							
							
								
								LLM: add device id to benchmark utils. ( #10877 )  
							
							 
							
							
							
						 
						
							2024-04-25 14:01:51 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yang Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								1ce8d7bcd9 
								
							 
						 
						
							
							
								
								Support the desc_act feature in GPTQ model ( #10851 )  
							
							 
							
							... 
							
							
							
							* support act_order
* update versions
* fix style
* fix bug
* clean up 
							
						 
						
							2024-04-24 10:17:13 -07:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yina Chen 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								dc27b3bc35 
								
							 
						 
						
							
							
								
								Use sdp when rest token seq_len > 1 in llama & mistral (for lookup & spec) ( #10790 )  
							
							 
							
							... 
							
							
							
							* update sdp condition
* update
* fix
* update & test llama
* mistral
* fix style
* update
* fix style
* remove pvc constrain
* update ds on arc
* fix style 
							
						 
						
							2024-04-24 17:24:01 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuxuan Xia 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								844e18b1db 
								
							 
						 
						
							
							
								
								Add llama3 and phi2 nightly test ( #10874 )  
							
							 
							
							... 
							
							
							
							* Add llama3 and phi2 nightly test
* Change llama3-8b to llama3-8b-instruct
---------
Co-authored-by: Yishuo Wang <yishuo.wang@intel.com> 
							
						 
						
							2024-04-24 16:58:56 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								c9feffff9a 
								
							 
						 
						
							
							
								
								LLM: support Qwen1.5-MoE-A2.7B-Chat pipeline parallel inference ( #10864 )  
							
							 
							
							
							
						 
						
							2024-04-24 16:02:27 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								2d210817ff 
								
							 
						 
						
							
							
								
								add phi3 optimization ( #10871 )  
							
							 
							
							
							
						 
						
							2024-04-24 15:17:40 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Cengguang Zhang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								eb39c61607 
								
							 
						 
						
							
							
								
								LLM: add min new token to perf test. ( #10869 )  
							
							 
							
							
							
						 
						
							2024-04-24 14:32:02 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								fb2a160af3 
								
							 
						 
						
							
							
								
								Add phi-2 to 2048-256 test for fixes ( #10867 )  
							
							 
							
							
							
						 
						
							2024-04-24 10:00:25 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								fabf54e052 
								
							 
						 
						
							
							
								
								LLM: make pipeline parallel inference example more common ( #10786 )  
							
							 
							
							
							
						 
						
							2024-04-24 09:28:52 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									hxsz1997 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								328b1a1de9 
								
							 
						 
						
							
							
								
								Fix the not stop issue of llama3 examples ( #10860 )  
							
							 
							
							... 
							
							
							
							* fix not stop issue in GPU/HF-Transformers-AutoModels
* fix not stop issue in GPU/PyTorch-Models/Model/llama3
* fix not stop issue in CPU/HF-Transformers-AutoModels/Model/llama3
* fix not stop issue in CPU/PyTorch-Models/Model/llama3
* update the output in readme
* update format
* add reference
* update prompt format
* update output format in readme
* update example output in readme 
							
						 
						
							2024-04-23 19:10:09 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								5c9eb5d0f5 
								
							 
						 
						
							
							
								
								Support llama-index install option for upstreaming purposes ( #10866 )  
							
							 
							
							... 
							
							
							
							* Support llama-index install option for upstreaming purposes
* Small fix
* Small fix 
							
						 
						
							2024-04-23 19:08:29 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								21bb8bd164 
								
							 
						 
						
							
							
								
								Add phi-2 to igpu performance test ( #10865 )  
							
							 
							
							
							
						 
						
							2024-04-23 18:13:14 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									ZehuaCao 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								36eb8b2e96 
								
							 
						 
						
							
							
								
								Add llama3 speculative example ( #10856 )  
							
							 
							
							... 
							
							
							
							* Initial llama3 speculative example
* update README
* update README
* update README 
							
						 
						
							2024-04-23 17:03:54 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Cengguang Zhang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								763413b7e1 
								
							 
						 
						
							
							
								
								LLM: support llama split tensor for long context in transformers>=4.36. ( #10844 )  
							
							 
							
							... 
							
							
							
							* LLm: support llama split tensor for long context in transformers>=4.36.
* fix dtype.
* fix style.
* fix style.
* fix style.
* fix style.
* fix dtype.
* fix style. 
							
						 
						
							2024-04-23 16:13:25 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									ZehuaCao 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								92ea54b512 
								
							 
						 
						
							
							
								
								Fix speculative decoding bug ( #10855 )  
							
							 
							
							
							
						 
						
							2024-04-23 14:28:31 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									yb-peng 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								c9dee6cd0e 
								
							 
						 
						
							
							
								
								Update 8192.txt ( #10824 )  
							
							 
							
							... 
							
							
							
							* Update 8192.txt
* Update 8192.txt with original text 
							
						 
						
							2024-04-23 14:02:09 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Wang, Jian4 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								18c032652d 
								
							 
						 
						
							
							
								
								LLM: Add mixtral speculative CPU example ( #10830 )  
							
							 
							
							... 
							
							
							
							* init mixtral sp example
* use different prompt_format
* update output
* update 
							
						 
						
							2024-04-23 10:05:51 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Qiyuan Gong 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								5494aa55f6 
								
							 
						 
						
							
							
								
								Downgrade datasets in axolotl example ( #10849 )  
							
							 
							
							... 
							
							
							
							* Downgrade datasets to 2.15.0 to address axolotl prepare issue https://github.com/OpenAccess-AI-Collective/axolotl/issues/1544 
Tks to @kwaa for providing the solution in https://github.com/intel-analytics/ipex-llm/issues/10821#issuecomment-2068861571  
							
						 
						
							2024-04-23 09:41:58 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								fe5a082b84 
								
							 
						 
						
							
							
								
								add phi-2 optimization ( #10843 )  
							
							 
							
							
							
						 
						
							2024-04-22 18:56:47 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Guancheng Fu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								47bd5f504c 
								
							 
						 
						
							
							
								
								[vLLM]Remove vllm-v1, refactor v2 ( #10842 )  
							
							 
							
							... 
							
							
							
							* remove vllm-v1
* fix format 
							
						 
						
							2024-04-22 17:51:32 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Wang, Jian4 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								23c6a52fb0 
								
							 
						 
						
							
							
								
								LLM: Fix ipex torchscript=True error ( #10832 )  
							
							 
							
							... 
							
							
							
							* remove
* update
* remove torchscript 
							
						 
						
							2024-04-22 15:53:09 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Heyang Sun 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								fc33aa3721 
								
							 
						 
						
							
							
								
								fix missing import ( #10839 )  
							
							 
							
							
							
						 
						
							2024-04-22 14:34:52 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yina Chen 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								3daad242b8 
								
							 
						 
						
							
							
								
								Fix  No module named 'transformers.cache_utils' with transformers < 4.36 ( #10835 )  
							
							 
							
							... 
							
							
							
							* update sdp condition
* update
* fix
* fix 431 error
* revert sdp & style fix
* fix
* meet comments 
							
						 
						
							2024-04-22 14:05:50 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Guancheng Fu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								ae3b577537 
								
							 
						 
						
							
							
								
								Update README.md ( #10833 )  
							
							 
							
							
							
						 
						
							2024-04-22 11:07:10 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Wang, Jian4 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								5f95054f97 
								
							 
						 
						
							
							
								
								LLM:Add qwen moe example libs md ( #10828 )  
							
							 
							
							
							
						 
						
							2024-04-22 10:03:19 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Guancheng Fu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								61c67af386 
								
							 
						 
						
							
							
								
								Fix vLLM-v2 install instructions( #10822 )  
							
							 
							
							
							
						 
						
							2024-04-22 09:02:48 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Guancheng Fu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								caf75beef8 
								
							 
						 
						
							
							
								
								Disable sdpa ( #10814 )  
							
							 
							
							
							
						 
						
							2024-04-19 17:33:18 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								57edf2033c 
								
							 
						 
						
							
							
								
								fix lookahead with transformers >= 4.36 ( #10808 )  
							
							 
							
							
							
						 
						
							2024-04-19 16:24:56 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ovo233 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								1a885020ee 
								
							 
						 
						
							
							
								
								Updated importing of top_k_top_p_filtering for transformers>=4.39.0 ( #10794 )  
							
							 
							
							... 
							
							
							
							* In transformers>=4.39.0, the top_k_top_p_filtering function has been deprecated and moved to the hugging face package trl. Thus, for versions >= 4.39.0, import this function from trl. 
							
						 
						
							2024-04-19 15:34:39 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								07e8b045a9 
								
							 
						 
						
							
							
								
								Add Meta-llama-3-8B-Instruct and Yi-6B-Chat to igpu nightly perf ( #10810 )  
							
							 
							
							
							
						 
						
							2024-04-19 15:09:58 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								08458b4f74 
								
							 
						 
						
							
							
								
								remove rms norm copy ( #10793 )  
							
							 
							
							
							
						 
						
							2024-04-19 13:57:48 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yang Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								8153c3008e 
								
							 
						 
						
							
							
								
								Initial llama3 example ( #10799 )  
							
							 
							
							... 
							
							
							
							* Add initial hf huggingface GPU example
* Small fix
* Add llama3 gpu pytorch model example
* Add llama 3 hf transformers CPU example
* Add llama 3 pytorch model CPU example
* Fixes
* Small fix
* Small fixes
* Small fix
* Small fix
* Add links
* update repo id
* change prompt tuning url
* remove system header if there is no system prompt
---------
Co-authored-by: Yuwen Hu <yuwen.hu@intel.com>
Co-authored-by: Yuwen Hu <54161268+Oscilloscope98@users.noreply.github.com> 
							
						 
						
							2024-04-18 11:01:33 -07:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								754b0ffecf 
								
							 
						 
						
							
							
								
								Fix pvc llama ( #10798 )  
							
							 
							
							... 
							
							
							
							* ifx
* update 
							
						 
						
							2024-04-18 10:44:57 -07:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								439c834ed3 
								
							 
						 
						
							
							
								
								LLM: add mixed precision for lm_head ( #10795 )  
							
							 
							
							... 
							
							
							
							* add mixed_quantization
* meet code review
* update
* fix style
* meet review 
							
						 
						
							2024-04-18 19:11:31 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yina Chen 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								8796401b08 
								
							 
						 
						
							
							
								
								Support q4k in ipex-llm ( #10796 )  
							
							 
							
							... 
							
							
							
							* support q4k
* update 
							
						 
						
							2024-04-18 18:55:28 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								0e8aac19e3 
								
							 
						 
						
							
							
								
								add q6k precision in ipex-llm ( #10792 )  
							
							 
							
							... 
							
							
							
							* add q6k
* add initial 16k
* update
* fix style 
							
						 
						
							2024-04-18 16:52:09 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Qiyuan Gong 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								e90e31719f 
								
							 
						 
						
							
							
								
								axolotl lora example ( #10789 )  
							
							 
							
							... 
							
							
							
							* Add axolotl lora example
* Modify readme
* Add comments in yml 
							
						 
						
							2024-04-18 16:38:32 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Wang, Jian4 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								14ca42a048 
								
							 
						 
						
							
							
								
								LLM:Fix moe indexs error on cpu ( #10791 )  
							
							 
							
							
							
						 
						
							2024-04-18 15:56:52 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Guancheng Fu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								cbe7b5753f 
								
							 
						 
						
							
							
								
								Add vLLM[xpu] related code ( #10779 )  
							
							 
							
							... 
							
							
							
							* Add ipex-llm side change
* add runable offline_inference
* refactor to call vllm2
* Verified async server
* add new v2 example
* add README
* fix
* change dir
* refactor readme.md
* add experimental
* fix 
							
						 
						
							2024-04-18 15:29:20 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Kai Huang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								053ec30737 
								
							 
						 
						
							
							
								
								Transformers ppl evaluation on wikitext ( #10784 )  
							
							 
							
							... 
							
							
							
							* tranformers code
* cache 
							
						 
						
							2024-04-18 15:27:18 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Wang, Jian4 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								209c3501e6 
								
							 
						 
						
							
							
								
								LLM: Optimize qwen1.5 moe model ( #10706 )  
							
							 
							
							... 
							
							
							
							* update moe block
* fix style
* enable optmize MLP
* enabel kv_cache
* enable fuse rope
* enable fused qkv
* enable flash_attention
* error sdp quantize
* use old api
* use fuse
* use xetla
* fix python style
* update moe_blocks num
* fix output error
* add cpu sdpa
* update
* update
* update 
							
						 
						
							2024-04-18 14:54:05 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ziteng Zhang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								ff040c8f01 
								
							 
						 
						
							
							
								
								LISA Finetuning Example ( #10743 )  
							
							 
							
							... 
							
							
							
							* enabling xetla only supports qtype=SYM_INT4 or FP8E5
* LISA Finetuning Example on gpu
* update readme
* add licence
* Explain parameters of lisa & Move backend codes to src dir
* fix style
* fix style
* update readme
* support chatglm
* fix style
* fix style
* update readme
* fix 
							
						 
						
							2024-04-18 13:48:10 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Heyang Sun 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								581ebf6104 
								
							 
						 
						
							
							
								
								GaLore Finetuning Example ( #10722 )  
							
							 
							
							... 
							
							
							
							* GaLore Finetuning Example
* Update README.md
* Update README.md
* change data to HuggingFaceH4/helpful_instructions
* Update README.md
* Update README.md
* shrink train size and delete cache before starting training to save memory
* Update README.md
* Update galore_finetuning.py
* change model to llama2 3b
* Update README.md 
							
						 
						
							2024-04-18 13:47:41 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yang Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								952e517db9 
								
							 
						 
						
							
							
								
								use config rope_theta ( #10787 )  
							
							 
							
							... 
							
							
							
							* use config rope_theta
* fix style 
							
						 
						
							2024-04-17 20:39:11 -07:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Guancheng Fu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								31ea2f9a9f 
								
							 
						 
						
							
							
								
								Fix wrong output for Llama models on CPU ( #10742 )  
							
							 
							
							
							
						 
						
							2024-04-18 11:07:27 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xin Qiu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								e764f9b1b1 
								
							 
						 
						
							
							
								
								Disable fast fused rope on UHD  ( #10780 )  
							
							 
							
							... 
							
							
							
							* use decoding fast path
* update
* update
* cleanup 
							
						 
						
							2024-04-18 10:03:53 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yina Chen 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								ea5b373a97 
								
							 
						 
						
							
							
								
								Add lookahead GPU example ( #10785 )  
							
							 
							
							... 
							
							
							
							* Add lookahead example
* fix style & attn mask
* fix typo
* address comments 
							
						 
						
							2024-04-17 17:41:55 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Wang, Jian4 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								a20271ffe4 
								
							 
						 
						
							
							
								
								LLM: Fix yi-6b fp16 error on pvc ( #10781 )  
							
							 
							
							... 
							
							
							
							* updat for yi fp16
* update
* update 
							
						 
						
							2024-04-17 16:49:59 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									ZehuaCao 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								0646e2c062 
								
							 
						 
						
							
							
								
								Fix short prompt for IPEX_CPU speculative decoding cause no_attr error ( #10783 )  
							
							 
							
							
							
						 
						
							2024-04-17 16:19:57 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Cengguang Zhang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								7ec82c6042 
								
							 
						 
						
							
							
								
								LLM: add README.md for Long-Context examples. ( #10765 )  
							
							 
							
							... 
							
							
							
							* LLM: add readme to long-context examples.
* add precision.
* update wording.
* add GPU type.
* add Long-Context example to GPU examples.
* fix comments.
* update max input length.
* update max length.
* add output length.
* fix wording. 
							
						 
						
							2024-04-17 15:34:59 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yina Chen 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								766fe45222 
								
							 
						 
						
							
							
								
								Fix spec error caused by lookup pr ( #10777 )  
							
							 
							
							... 
							
							
							
							* Fix spec error
* remove
* fix style 
							
						 
						
							2024-04-17 11:27:35 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Qiyuan Gong 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								9e5069437f 
								
							 
						 
						
							
							
								
								Fix gradio version in axolotl example ( #10776 )  
							
							 
							
							... 
							
							
							
							* Change to gradio>=4.19.2 
							
						 
						
							2024-04-17 10:23:43 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Qiyuan Gong 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								f2e923b3ca 
								
							 
						 
						
							
							
								
								Axolotl v0.4.0 support  ( #10773 )  
							
							 
							
							... 
							
							
							
							* Add Axolotl 0.4.0, remove legacy 0.3.0 support.
* replace is_torch_bf16_gpu_available
* Add HF_HUB_OFFLINE=1
* Move transformers out of requirement
* Refine readme and qlora.yml 
							
						 
						
							2024-04-17 09:49:11 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Heyang Sun 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								26cae0a39c 
								
							 
						 
						
							
							
								
								Update FLEX in Deepspeed README ( #10774 )  
							
							 
							
							... 
							
							
							
							* Update FLEX in Deepspeed README
* Update README.md 
							
						 
						
							2024-04-17 09:28:24 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Wenjing Margaret Mao 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								c41730e024 
								
							 
						 
						
							
							
								
								edit 'ppl_result does not exist' issue, delete useless code ( #10767 )  
							
							 
							
							... 
							
							
							
							* edit ppl_result not exist issue, delete useless code
* delete nonzero_min function
---------
Co-authored-by: jenniew <jenniewang123@gmail.com> 
							
						 
						
							2024-04-16 18:11:56 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yina Chen 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								899d392e2f 
								
							 
						 
						
							
							
								
								Support prompt lookup in ipex-llm ( #10768 )  
							
							 
							
							... 
							
							
							
							* lookup init
* add lookup
* fix style
* remove redundant code
* change param name
* fix style 
							
						 
						
							2024-04-16 16:52:38 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Qiyuan Gong 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								d30b22a81b 
								
							 
						 
						
							
							
								
								Refine axolotl 0.3.0 documents and links ( #10764 )  
							
							 
							
							... 
							
							
							
							* Refine axolotl 0.3 based on comments
* Rename requirements to requirement-xpu
* Add comments for paged_adamw_32bit
* change lora_r from 8 to 16 
							
						 
						
							2024-04-16 14:47:45 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									ZehuaCao 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								599a88db53 
								
							 
						 
						
							
							
								
								Add deepsped-autoTP-Fastapi serving ( #10748 )  
							
							 
							
							... 
							
							
							
							* add deepsped-autoTP-Fastapi serving
* add readme
* add license
* update
* update
* fix 
							
						 
						
							2024-04-16 14:03:23 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								0a62933d36 
								
							 
						 
						
							
							
								
								LLM: fix qwen AutoTP ( #10766 )  
							
							 
							
							
							
						 
						
							2024-04-16 09:56:17 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Cengguang Zhang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								3e2662c87e 
								
							 
						 
						
							
							
								
								LLM: fix get env KV_CACHE_ALLOC_BLOCK_LENGTH type. ( #10771 )  
							
							 
							
							
							
						 
						
							2024-04-16 09:32:30 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jin Qiao 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								73a67804a4 
								
							 
						 
						
							
							
								
								GPU configuration update for examples (windows pip installer, etc.) ( #10762 )  
							
							 
							
							... 
							
							
							
							* renew chatglm3-6b gpu example readme
fix
fix
fix
* fix for comments
* fix
* fix
* fix
* fix
* fix
* apply on HF-Transformers-AutoModels
* apply on PyTorch-Models
* fix
* fix 
							
						 
						
							2024-04-15 17:42:52 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									yb-peng 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								b5209d3ec1 
								
							 
						 
						
							
							
								
								Update example/GPU/PyTorch-Models/Model/llava/README.md ( #10757 )  
							
							 
							
							... 
							
							
							
							* Update example/GPU/PyTorch-Models/Model/llava/README.md
* Update README.md
fix path in windows installation 
							
						 
						
							2024-04-15 13:01:37 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								3d561b60ac 
								
							 
						 
						
							
							
								
								LLM: add enable_xetla parameter for optimize_model API ( #10753 )  
							
							 
							
							
							
						 
						
							2024-04-15 12:18:25 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jiao Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								a9a6b6b7af 
								
							 
						 
						
							
							
								
								Fix baichuan-13b issue on portable zip under transformers 4.36 ( #10746 )  
							
							 
							
							... 
							
							
							
							* fix baichuan-13b issue
* update
* update 
							
						 
						
							2024-04-12 16:27:01 -07:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jiao Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								9e668a5bf0 
								
							 
						 
						
							
							
								
								fix_internlm-chat-7b-8k repo name in examples ( #10747 )  
							
							 
							
							
							
						 
						
							2024-04-12 10:15:48 -07:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								c3fc8f4b90 
								
							 
						 
						
							
							
								
								LLM: add bs limitation for llama softmax upcast to fp32 ( #10752 )  
							
							 
							
							
							
						 
						
							2024-04-12 15:40:25 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									hxsz1997 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								0d518aab8d 
								
							 
						 
						
							
							
								
								Merge pull request  #10697  from MargarettMao/ceval  
							
							 
							
							... 
							
							
							
							combine english and chinese, remove nan 
							
						 
						
							2024-04-12 14:37:47 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									jenniew 
								
							 
						 
						
							
							
							
							
								
							
							
								dd0d2df5af 
								
							 
						 
						
							
							
								
								Change fp16.csv mistral-7b-v0.1 into Mistral-7B-v0.1  
							
							 
							
							
							
						 
						
							2024-04-12 14:28:46 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									jenniew 
								
							 
						 
						
							
							
							
							
								
							
							
								7309f1ddf9 
								
							 
						 
						
							
							
								
								Mofidy Typos  
							
							 
							
							
							
						 
						
							2024-04-12 14:23:13 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									jenniew 
								
							 
						 
						
							
							
							
							
								
							
							
								cb594e1fc5 
								
							 
						 
						
							
							
								
								Mofidy Typos  
							
							 
							
							
							
						 
						
							2024-04-12 14:22:09 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									jenniew 
								
							 
						 
						
							
							
							
							
								
							
							
								382c18e600 
								
							 
						 
						
							
							
								
								Mofidy Typos  
							
							 
							
							
							
						 
						
							2024-04-12 14:15:48 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									jenniew 
								
							 
						 
						
							
							
							
							
								
							
							
								1a360823ce 
								
							 
						 
						
							
							
								
								Mofidy Typos  
							
							 
							
							
							
						 
						
							2024-04-12 14:13:21 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									jenniew 
								
							 
						 
						
							
							
							
							
								
							
							
								cdbb1de972 
								
							 
						 
						
							
							
								
								Mark Color Modification  
							
							 
							
							
							
						 
						
							2024-04-12 14:00:50 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									jenniew 
								
							 
						 
						
							
							
							
							
								
							
							
								9bbfcaf736 
								
							 
						 
						
							
							
								
								Mark Color Modification  
							
							 
							
							
							
						 
						
							2024-04-12 13:30:16 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									jenniew 
								
							 
						 
						
							
							
							
							
								
							
							
								bb34c6e325 
								
							 
						 
						
							
							
								
								Mark Color Modification  
							
							 
							
							
							
						 
						
							2024-04-12 13:26:36 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								8086554d33 
								
							 
						 
						
							
							
								
								use new fp16 sdp in llama and mistral ( #10734 )  
							
							 
							
							
							
						 
						
							2024-04-12 10:49:02 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yang Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								019293e1b9 
								
							 
						 
						
							
							
								
								Fuse MOE indexes computation ( #10716 )  
							
							 
							
							... 
							
							
							
							* try moe
* use c++ cpu to compute indexes
* fix style 
							
						 
						
							2024-04-11 10:12:55 -07:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									jenniew 
								
							 
						 
						
							
							
							
							
								
							
							
								b151a9b672 
								
							 
						 
						
							
							
								
								edit csv_to_html to combine en & zh  
							
							 
							
							
							
						 
						
							2024-04-11 17:35:36 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								70ed9397f9 
								
							 
						 
						
							
							
								
								LLM: fix AttributeError of FP16Linear ( #10740 )  
							
							 
							
							
							
						 
						
							2024-04-11 17:03:56 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Keyan (Kyrie) Zhang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								1256a2cc4e 
								
							 
						 
						
							
							
								
								Add chatglm3 long input example ( #10739 )  
							
							 
							
							... 
							
							
							
							* Add long context input example for chatglm3
* Small fix
* Small fix
* Small fix 
							
						 
						
							2024-04-11 16:33:43 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									hxsz1997 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								fd473ddb1b 
								
							 
						 
						
							
							
								
								Merge pull request  #10730  from MargarettMao/MargarettMao-parent_folder  
							
							 
							
							... 
							
							
							
							Edit ppl update_HTML_parent_folder 
							
						 
						
							2024-04-11 15:45:24 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Qiyuan Gong 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								2d64630757 
								
							 
						 
						
							
							
								
								Remove transformers version in axolotl example ( #10736 )  
							
							 
							
							... 
							
							
							
							* Remove transformers version in axolotl requirements.txt 
							
						 
						
							2024-04-11 14:02:31 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									yb-peng 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								2685c41318 
								
							 
						 
						
							
							
								
								Modify all-in-one benchmark ( #10726 )  
							
							 
							
							... 
							
							
							
							* Update 8192 prompt in all-in-one
* Add cpu_embedding param for linux api
* Update run.py
* Update README.md 
							
						 
						
							2024-04-11 13:38:50 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xiangyu Tian 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								301504aa8d 
								
							 
						 
						
							
							
								
								Fix transformers version warning ( #10732 )  
							
							 
							
							
							
						 
						
							2024-04-11 13:12:49 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Wenjing Margaret Mao 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								9bec233e4d 
								
							 
						 
						
							
							
								
								Delete python/llm/test/benchmark/perplexity/update_html_in_parent_folder.py  
							
							 
							
							... 
							
							
							
							Delete due to repetition 
							
						 
						
							2024-04-11 07:21:12 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Cengguang Zhang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								4b024b7aac 
								
							 
						 
						
							
							
								
								LLM: optimize chatglm2 8k input. ( #10723 )  
							
							 
							
							... 
							
							
							
							* LLM: optimize chatglm2 8k input.
* rename. 
							
						 
						
							2024-04-10 16:59:06 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuxuan Xia 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								cd22cb8257 
								
							 
						 
						
							
							
								
								Update Env check Script ( #10709 )  
							
							 
							
							... 
							
							
							
							* Update env check bash file
* Update env-check 
							
						 
						
							2024-04-10 15:06:00 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Shaojun Liu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								29bf28bd6f 
								
							 
						 
						
							
							
								
								Upgrade python to 3.11 in Docker Image ( #10718 )  
							
							 
							
							... 
							
							
							
							* install python 3.11 for cpu-inference docker image
* update xpu-inference dockerfile
* update cpu-serving image
* update qlora image
* update lora image
* update document 
							
						 
						
							2024-04-10 14:41:27 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Qiyuan Gong 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								b727767f00 
								
							 
						 
						
							
							
								
								Add axolotl v0.3.0 with ipex-llm on Intel GPU ( #10717 )  
							
							 
							
							... 
							
							
							
							* Add axolotl v0.3.0 support on Intel GPU.
* Add finetune example on llama-2-7B with Alpaca dataset. 
							
						 
						
							2024-04-10 14:38:29 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Wang, Jian4 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								c9e6d42ad1 
								
							 
						 
						
							
							
								
								LLM: Fix chatglm3-6b-32k error ( #10719 )  
							
							 
							
							... 
							
							
							
							* fix chatglm3-6b-32k
* update style 
							
						 
						
							2024-04-10 11:24:06 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Keyan (Kyrie) Zhang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								585c174e92 
								
							 
						 
						
							
							
								
								Read the value of KV_CACHE_ALLOC_BLOCK_LENGTH from the environment variables ( #10707 )  
							
							 
							
							... 
							
							
							
							* Read the value of KV_CACHE_ALLOC_BLOCK_LENGTH from the environment variables.
* Fix style 
							
						 
						
							2024-04-10 10:48:46 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jiao Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								d1eaea509f 
								
							 
						 
						
							
							
								
								update chatglm readme ( #10659 )  
							
							 
							
							
							
						 
						
							2024-04-09 14:24:46 -07:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jiao Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								878a97077b 
								
							 
						 
						
							
							
								
								Fix llava example to support transformerds 4.36 ( #10614 )  
							
							 
							
							... 
							
							
							
							* fix llava example
* update 
							
						 
						
							2024-04-09 13:47:07 -07:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jiao Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								1e817926ba 
								
							 
						 
						
							
							
								
								Fix low memory generation example issue in transformers 4.36 ( #10702 )  
							
							 
							
							... 
							
							
							
							* update cache in low memory generate
* update 
							
						 
						
							2024-04-09 09:56:52 -07:00