Cengguang Zhang
								
							 
						 | 
						
							
							
								
								
							
							
							
								
							
							
								011b9faa5c
								
							
						 | 
						
							
							
								
								LLM: unify baichuan2-13b alibi mask dtype with model dtype. (#11107)
							
							
							
							
							
							
							
							* LLM: unify alibi mask dtype.
* fix comments. 
							
						 | 
						
							2024-05-24 10:27:53 +08:00 | 
						
						
							
							
							
								
							
							
						 | 
					
				
					
						
							
								
								
									 
									Jiao Wang
								
							 
						 | 
						
							
							
								
								
							
							
							
								
							
							
								0a06a6e1d4
								
							
						 | 
						
							
							
								
								Update tests for transformers 4.36 (#10858)
							
							
							
							
							
							
							
							* update unit test
* update
* update
* update
* update
* update
* fix gpu attention test
* update
* update
* update
* update
* update
* update
* update example test
* replace replit code
* update
* update
* update
* update
* set safe_serialization false
* perf test
* update
* update
* update
* update
* update
* update
* update
* update
* update
* update
* update
* update
* update
* update
* update
* update
* update
* update
* update
* update
* update
* update
* update
* update
* update
* update
* update
* delete
* update
* update
* update
* update
* update
* update
* revert
* update 
							
						 | 
						
							2024-05-24 10:26:38 +08:00 | 
						
						
							
							
							
								
							
							
						 | 
					
				
					
						
							
								
								
									 
									Xiangyu Tian
								
							 
						 | 
						
							
							
								
								
							
							
							
								
							
							
								b3f6faa038
								
							
						 | 
						
							
							
								
								LLM: Add CPU vLLM entrypoint (#11083)
							
							
							
							
							
							
							
							Add CPU vLLM entrypoint and update CPU vLLM serving example. 
							
						 | 
						
							2024-05-24 09:16:59 +08:00 | 
						
						
							
							
							
								
							
							
						 | 
					
				
					
						
							
								
								
									 
									Yishuo Wang
								
							 
						 | 
						
							
							
								
								
							
							
							
								
							
							
								797dbc48b8
								
							
						 | 
						
							
							
								
								fix phi-2 and phi-3 convert (#11116)
							
							
							
							
							
						 | 
						
							2024-05-23 17:37:37 +08:00 | 
						
						
							
							
							
								
							
							
						 | 
					
				
					
						
							
								
								
									 
									Yishuo Wang
								
							 
						 | 
						
							
							
								
								
							
							
							
								
							
							
								37b98a531f
								
							
						 | 
						
							
							
								
								support running internlm xcomposer2 on gpu and add sdp optimization (#11115)
							
							
							
							
							
						 | 
						
							2024-05-23 17:26:24 +08:00 | 
						
						
							
							
							
								
							
							
						 | 
					
				
					
						
							
								
								
									 
									Zhao Changmin
								
							 
						 | 
						
							
							
								
								
							
							
							
								
							
							
								c5e8b90c8d
								
							
						 | 
						
							
							
								
								Add Qwen register attention implemention (#11110)
							
							
							
							
							
							
							
							* qwen_register 
							
						 | 
						
							2024-05-23 17:17:45 +08:00 | 
						
						
							
							
							
								
							
							
						 | 
					
				
					
						
							
								
								
									 
									Yishuo Wang
								
							 
						 | 
						
							
							
								
								
							
							
							
								
							
							
								0e53f20edb
								
							
						 | 
						
							
							
								
								support running internlm-xcomposer2 on cpu (#11111)
							
							
							
							
							
						 | 
						
							2024-05-23 16:36:09 +08:00 | 
						
						
							
							
							
								
							
							
						 | 
					
				
					
						
							
								
								
									 
									Yuwen Hu
								
							 
						 | 
						
							
							
								
								
							
							
							
								
							
							
								d36b41d59e
								
							
						 | 
						
							
							
								
								Add setuptools limitation for ipex-llm[xpu] (#11102)
							
							
							
							
							
							
							
							* Add setuptool limitation for ipex-llm[xpu]
* llamaindex option update 
							
						 | 
						
							2024-05-22 18:20:30 +08:00 | 
						
						
							
							
							
								
							
							
						 | 
					
				
					
						
							
								
								
									 
									Yishuo Wang
								
							 
						 | 
						
							
							
								
								
							
							
							
								
							
							
								cd4dff09ee
								
							
						 | 
						
							
							
								
								support phi-3 vision (#11101)
							
							
							
							
							
						 | 
						
							2024-05-22 17:43:50 +08:00 | 
						
						
							
							
							
								
							
							
						 | 
					
				
					
						
							
								
								
									 
									Zhao Changmin
								
							 
						 | 
						
							
							
								
								
							
							
							
								
							
							
								15d906a97b
								
							
						 | 
						
							
							
								
								Update linux igpu run script (#11098)
							
							
							
							
							
							
							
							* update run script 
							
						 | 
						
							2024-05-22 17:18:07 +08:00 | 
						
						
							
							
							
								
							
							
						 | 
					
				
					
						
							
								
								
									 
									Kai Huang
								
							 
						 | 
						
							
							
								
								
							
							
							
								
							
							
								f63172ef63
								
							
						 | 
						
							
							
								
								Align ppl with llama.cpp (#11055)
							
							
							
							
							
							
							
							* update script
* remove
* add header
* update readme 
							
						 | 
						
							2024-05-22 16:43:11 +08:00 | 
						
						
							
							
							
								
							
							
						 | 
					
				
					
						
							
								
								
									 
									Qiyuan Gong
								
							 
						 | 
						
							
							
								
								
							
							
							
								
							
							
								f6c9ffe4dc
								
							
						 | 
						
							
							
								
								Add WANDB_MODE and HF_HUB_OFFLINE to XPU finetune README (#11097)
							
							
							
							
							
							
							
							* Add WANDB_MODE=offline to avoid multi-GPUs finetune errors.
* Add HF_HUB_OFFLINE=1 to avoid Hugging Face related errors. 
							
						 | 
						
							2024-05-22 15:20:53 +08:00 | 
						
						
							
							
							
								
							
							
						 | 
					
				
					
						
							
								
								
									 
									Shaojun Liu
								
							 
						 | 
						
							
							
								
								
							
							
							
								
							
							
								584439e498
								
							
						 | 
						
							
							
								
								update homepage url for ipex-llm (#11094)
							
							
							
							
							
							
							
							* update homepage url
* Update python version to 3.11
* Update long description 
							
						 | 
						
							2024-05-22 11:10:44 +08:00 | 
						
						
							
							
							
								
							
							
						 | 
					
				
					
						
							
								
								
									 
									Xin Qiu
								
							 
						 | 
						
							
							
								
								
							
							
							
								
							
							
								71bcd18f44
								
							
						 | 
						
							
							
								
								fix qwen vl (#11090)
							
							
							
							
							
						 | 
						
							2024-05-21 18:40:29 +08:00 | 
						
						
							
							
							
								
							
							
						 | 
					
				
					
						
							
								
								
									 
									Yishuo Wang
								
							 
						 | 
						
							
							
								
								
							
							
							
								
							
							
								f00625f9a4
								
							
						 | 
						
							
							
								
								refactor qwen2 (#11087)
							
							
							
							
							
						 | 
						
							2024-05-21 16:53:42 +08:00 | 
						
						
							
							
							
								
							
							
						 | 
					
				
					
						
							
								
								
									 
									Qiyuan Gong
								
							 
						 | 
						
							
							
								
								
							
							
							
								
							
							
								492ed3fd41
								
							
						 | 
						
							
							
								
								Add verified models to GPU finetune README (#11088)
							
							
							
							
							
							
							
							* Add verified models to GPU finetune README 
							
						 | 
						
							2024-05-21 15:49:15 +08:00 | 
						
						
							
							
							
								
							
							
						 | 
					
				
					
						
							
								
								
									 
									Qiyuan Gong
								
							 
						 | 
						
							
							
								
								
							
							
							
								
							
							
								1210491748
								
							
						 | 
						
							
							
								
								ChatGLM3, Baichuan2 and Qwen1.5 QLoRA example (#11078)
							
							
							
							
							
							
							
							* Add chatglm3, qwen15-7b and baichuan-7b QLoRA alpaca example
* Remove unnecessary tokenization setting. 
							
						 | 
						
							2024-05-21 15:29:43 +08:00 | 
						
						
							
							
							
								
							
							
						 | 
					
				
					
						
							
								
								
									 
									ZehuaCao
								
							 
						 | 
						
							
							
								
								
							
							
							
								
							
							
								842d6dfc2d
								
							
						 | 
						
							
							
								
								Further Modify CPU example (#11081)
							
							
							
							
							
							
							
							* modify CPU example
* update 
							
						 | 
						
							2024-05-21 13:55:47 +08:00 | 
						
						
							
							
							
								
							
							
						 | 
					
				
					
						
							
								
								
									 
									Yishuo Wang
								
							 
						 | 
						
							
							
								
								
							
							
							
								
							
							
								d830a63bb7
								
							
						 | 
						
							
							
								
								refactor qwen (#11074)
							
							
							
							
							
						 | 
						
							2024-05-20 18:08:37 +08:00 | 
						
						
							
							
							
								
							
							
						 | 
					
				
					
						
							
								
								
									 
									Wang, Jian4
								
							 
						 | 
						
							
							
								
								
							
							
							
								
							
							
								74950a152a
								
							
						 | 
						
							
							
								
								Fix tgi_api_server error file name (#11075)
							
							
							
							
							
						 | 
						
							2024-05-20 16:48:40 +08:00 | 
						
						
							
							
							
								
							
							
						 | 
					
				
					
						
							
								
								
									 
									Yishuo Wang
								
							 
						 | 
						
							
							
								
								
							
							
							
								
							
							
								4e97047d70
								
							
						 | 
						
							
							
								
								fix baichuan2 13b fp16 (#11071)
							
							
							
							
							
						 | 
						
							2024-05-20 11:21:20 +08:00 | 
						
						
							
							
							
								
							
							
						 | 
					
				
					
						
							
								
								
									 
									binbin Deng
								
							 
						 | 
						
							
							
								
								
							
							
							
								
							
							
								7170dd9192
								
							
						 | 
						
							
							
								
								Update guide for running qwen with AutoTP (#11065)
							
							
							
							
							
						 | 
						
							2024-05-20 10:53:17 +08:00 | 
						
						
							
							
							
								
							
							
						 | 
					
				
					
						
							
								
								
									 
									Wang, Jian4
								
							 
						 | 
						
							
							
								
								
							
							
							
								
							
							
								a2e1578fd9
								
							
						 | 
						
							
							
								
								Merge tgi_api_server to main (#11036)
							
							
							
							
							
							
							
							* init
* fix style
* speculative can not use benchmark
* add tgi server readme 
							
						 | 
						
							2024-05-20 09:15:03 +08:00 | 
						
						
							
							
							
								
							
							
						 | 
					
				
					
						
							
								
								
									 
									Yishuo Wang
								
							 
						 | 
						
							
							
								
								
							
							
							
								
							
							
								31ce3e0c13
								
							
						 | 
						
							
							
								
								refactor baichuan2-13b (#11064)
							
							
							
							
							
						 | 
						
							2024-05-17 16:25:30 +08:00 | 
						
						
							
							
							
								
							
							
						 | 
					
				
					
						
							
								
								
									 
									ZehuaCao
								
							 
						 | 
						
							
							
								
								
							
							
							
								
							
							
								56cb992497
								
							
						 | 
						
							
							
								
								LLM: Modify CPU Installation Command for most examples (#11049)
							
							
							
							
							
							
							
							* init
* refine
* refine
* refine
* modify hf-agent example
* modify all CPU model example
* remove readthedoc modify
* replace powershell with cmd
* fix repo
* fix repo
* update
* remove comment on windows code block
* update
* update
* update
* update
---------
Co-authored-by: xiangyuT <xiangyu.tian@intel.com> 
							
						 | 
						
							2024-05-17 15:52:20 +08:00 | 
						
						
							
							
							
								
							
							
						 | 
					
				
					
						
							
								
								
									 
									Ruonan Wang
								
							 
						 | 
						
							
							
								
								
							
							
							
								
							
							
								f1156e6b20
								
							
						 | 
						
							
							
								
								support gguf_q4k_m / gguf_q4k_s (#10887)
							
							
							
							
							
							
							
							* initial commit
* UPDATE
* fix style
* fix style
* add gguf_q4k_s
* update comment
* fix 
							
						 | 
						
							2024-05-17 14:30:09 +08:00 | 
						
						
							
							
							
								
							
							
						 | 
					
				
					
						
							
								
								
									 
									Yishuo Wang
								
							 
						 | 
						
							
							
								
								
							
							
							
								
							
							
								981d668be6
								
							
						 | 
						
							
							
								
								refactor baichuan2-7b (#11062)
							
							
							
							
							
						 | 
						
							2024-05-17 13:01:34 +08:00 | 
						
						
							
							
							
								
							
							
						 | 
					
				
					
						
							
								
								
									 
									Xiangyu Tian
								
							 
						 | 
						
							
							
								
								
							
							
							
								
							
							
								d963e95363
								
							
						 | 
						
							
							
								
								LLM: Modify CPU Installation Command for documentation (#11042)
							
							
							
							
							
							
							
							* init
* refine
* refine
* refine
* refine comments 
							
						 | 
						
							2024-05-17 10:14:00 +08:00 | 
						
						
							
							
							
								
							
							
						 | 
					
				
					
						
							
								
								
									 
									Ruonan Wang
								
							 
						 | 
						
							
							
								
								
							
							
							
								
							
							
								3a72e5df8c
								
							
						 | 
						
							
							
								
								disable mlp fusion of fp6 on mtl (#11059)
							
							
							
							
							
						 | 
						
							2024-05-17 10:10:16 +08:00 | 
						
						
							
							
							
								
							
							
						 | 
					
				
					
						
							
								
								
									 
									SONG Ge
								
							 
						 | 
						
							
							
								
								
							
							
							
								
							
							
								192ae35012
								
							
						 | 
						
							
							
								
								Add support for llama2 quantize_kv with transformers 4.38.0 (#11054)
							
							
							
							
							
							
							
							* add support for llama2 quantize_kv with transformers 4.38.0
* fix code style
* fix code style 
							
						 | 
						
							2024-05-16 22:23:39 +08:00 | 
						
						
							
							
							
								
							
							
						 | 
					
				
					
						
							
								
								
									 
									SONG Ge
								
							 
						 | 
						
							
							
								
								
							
							
							
								
							
							
								16b2a418be
								
							
						 | 
						
							
							
								
								hotfix native_sdp ut (#11046)
							
							
							
							
							
							
							
							* hotfix native_sdp
* update 
							
						 | 
						
							2024-05-16 17:15:37 +08:00 | 
						
						
							
							
							
								
							
							
						 | 
					
				
					
						
							
								
								
									 
									Xin Qiu
								
							 
						 | 
						
							
							
								
								
							
							
							
								
							
							
								6be70283b7
								
							
						 | 
						
							
							
								
								fix chatglm run error (#11045)
							
							
							
							
							
							
							
							* fix chatglm
* update
* fix style 
							
						 | 
						
							2024-05-16 15:39:18 +08:00 | 
						
						
							
							
							
								
							
							
						 | 
					
				
					
						
							
								
								
									 
									Yishuo Wang
								
							 
						 | 
						
							
							
								
								
							
							
							
								
							
							
								8cae897643
								
							
						 | 
						
							
							
								
								use new rope in phi3 (#11047)
							
							
							
							
							
						 | 
						
							2024-05-16 15:12:35 +08:00 | 
						
						
							
							
							
								
							
							
						 | 
					
				
					
						
							
								
								
									 
									Jin Qiao
								
							 
						 | 
						
							
							
								
								
							
							
							
								
							
							
								9a96af4232
								
							
						 | 
						
							
							
								
								Remove oneAPI pip install command in related examples (#11030)
							
							
							
							
							
							
							
							* Remove pip install command in windows installation guide
* fix chatglm3 installation guide
* Fix gemma cpu example
* Apply on other examples
* fix 
							
						 | 
						
							2024-05-16 10:46:29 +08:00 | 
						
						
							
							
							
								
							
							
						 | 
					
				
					
						
							
								
								
									 
									Xiangyu Tian
								
							 
						 | 
						
							
							
								
								
							
							
							
								
							
							
								612a365479
								
							
						 | 
						
							
							
								
								LLM: Install CPU version torch with extras [all] (#10868)
							
							
							
							
							
							
							
							Modify setup.py to install CPU version torch with extras [all] 
							
						 | 
						
							2024-05-16 10:39:55 +08:00 | 
						
						
							
							
							
								
							
							
						 | 
					
				
					
						
							
								
								
									 
									Yishuo Wang
								
							 
						 | 
						
							
							
								
								
							
							
							
								
							
							
								59df750326
								
							
						 | 
						
							
							
								
								Use new sdp again (#11025)
							
							
							
							
							
						 | 
						
							2024-05-16 09:33:34 +08:00 | 
						
						
							
							
							
								
							
							
						 | 
					
				
					
						
							
								
								
									 
									SONG Ge
								
							 
						 | 
						
							
							
								
								
							
							
							
								
							
							
								9942a4ba69
								
							
						 | 
						
							
							
								
								[WIP] Support llama2 with transformers==4.38.0 (#11024)
							
							
							
							
							
							
							
							* support llama2 with transformers==4.38.0
* add supprot for quantize_qkv
* add original support for 4.38.0 now
* code style fix 
							
						 | 
						
							2024-05-15 18:07:00 +08:00 | 
						
						
							
							
							
								
							
							
						 | 
					
				
					
						
							
								
								
									 
									Yina Chen
								
							 
						 | 
						
							
							
								
								
							
							
							
								
							
							
								686f6038a8
								
							
						 | 
						
							
							
								
								Support fp6 save & load (#11034)
							
							
							
							
							
						 | 
						
							2024-05-15 17:52:02 +08:00 | 
						
						
							
							
							
								
							
							
						 | 
					
				
					
						
							
								
								
									 
									Ruonan Wang
								
							 
						 | 
						
							
							
								
								
							
							
							
								
							
							
								ac384e0f45
								
							
						 | 
						
							
							
								
								add fp6 mlp fusion (#11032)
							
							
							
							
							
							
							
							* add fp6 fusion
* add qkv fusion for fp6
* remove qkv first 
							
						 | 
						
							2024-05-15 17:42:50 +08:00 | 
						
						
							
							
							
								
							
							
						 | 
					
				
					
						
							
								
								
									 
									Wang, Jian4
								
							 
						 | 
						
							
							
								
								
							
							
							
								
							
							
								2084ebe4ee
								
							
						 | 
						
							
							
								
								Enable fastchat benchmark latency (#11017)
							
							
							
							
							
							
							
							* enable fastchat benchmark
* add readme
* update readme
* update 
							
						 | 
						
							2024-05-15 14:52:09 +08:00 | 
						
						
							
							
							
								
							
							
						 | 
					
				
					
						
							
								
								
									 
									hxsz1997
								
							 
						 | 
						
							
							
								
								
							
							
							
								
							
							
								93d40ab127
								
							
						 | 
						
							
							
								
								Update lookahead strategy (#11021)
							
							
							
							
							
							
							
							* update lookahead strategy
* remove lines
* fix python style check 
							
						 | 
						
							2024-05-15 14:48:05 +08:00 | 
						
						
							
							
							
								
							
							
						 | 
					
				
					
						
							
								
								
									 
									Wang, Jian4
								
							 
						 | 
						
							
							
								
								
							
							
							
								
							
							
								d9f71f1f53
								
							
						 | 
						
							
							
								
								Update benchmark util for example using (#11027)
							
							
							
							
							
							
							
							* mv benchmark_util.py to utils/
* remove
* update 
							
						 | 
						
							2024-05-15 14:16:35 +08:00 | 
						
						
							
							
							
								
							
							
						 | 
					
				
					
						
							
								
								
									 
									binbin Deng
								
							 
						 | 
						
							
							
								
								
							
							
							
								
							
							
								4053a6ef94
								
							
						 | 
						
							
							
								
								Update environment variable setting in AutoTP with arc (#11018)
							
							
							
							
							
						 | 
						
							2024-05-15 10:23:58 +08:00 | 
						
						
							
							
							
								
							
							
						 | 
					
				
					
						
							
								
								
									 
									Yishuo Wang
								
							 
						 | 
						
							
							
								
								
							
							
							
								
							
							
								fad1dbaf60
								
							
						 | 
						
							
							
								
								use sdp fp8 causal kernel (#11023)
							
							
							
							
							
						 | 
						
							2024-05-15 10:22:35 +08:00 | 
						
						
							
							
							
								
							
							
						 | 
					
				
					
						
							
								
								
									 
									Yishuo Wang
								
							 
						 | 
						
							
							
								
								
							
							
							
								
							
							
								ee325e9cc9
								
							
						 | 
						
							
							
								
								fix phi3 (#11022)
							
							
							
							
							
						 | 
						
							2024-05-15 09:32:12 +08:00 | 
						
						
							
							
							
								
							
							
						 | 
					
				
					
						
							
								
								
									 
									Ziteng Zhang
								
							 
						 | 
						
							
							
								
								
							
							
							
								
							
							
								7d3791c819
								
							
						 | 
						
							
							
								
								[LLM] Add llama3 alpaca qlora example (#11011)
							
							
							
							
							
							
							
							* Add llama3 finetune example based on alpaca qlora example 
							
						 | 
						
							2024-05-15 09:17:32 +08:00 | 
						
						
							
							
							
								
							
							
						 | 
					
				
					
						
							
								
								
									 
									Zhao Changmin
								
							 
						 | 
						
							
							
								
								
							
							
							
								
							
							
								0a732bebe7
								
							
						 | 
						
							
							
								
								Add phi3 cached RotaryEmbedding (#11013)
							
							
							
							
							
							
							
							* phi3cachedrotaryembed
* pep8 
							
						 | 
						
							2024-05-15 08:16:43 +08:00 | 
						
						
							
							
							
								
							
							
						 | 
					
				
					
						
							
								
								
									 
									Yina Chen
								
							 
						 | 
						
							
							
								
								
							
							
							
								
							
							
								893197434d
								
							
						 | 
						
							
							
								
								Add fp6 support on gpu (#11008)
							
							
							
							
							
							
							
							* add fp6 support
* fix style 
							
						 | 
						
							2024-05-14 16:31:44 +08:00 | 
						
						
							
							
							
								
							
							
						 | 
					
				
					
						
							
								
								
									 
									Zhao Changmin
								
							 
						 | 
						
							
							
								
								
							
							
							
								
							
							
								b03c859278
								
							
						 | 
						
							
							
								
								Add phi3RMS (#10988)
							
							
							
							
							
							
							
							* phi3RMS 
							
						 | 
						
							2024-05-14 15:16:27 +08:00 | 
						
						
							
							
							
								
							
							
						 | 
					
				
					
						
							
								
								
									 
									Yishuo Wang
								
							 
						 | 
						
							
							
								
								
							
							
							
								
							
							
								170e3d65e0
								
							
						 | 
						
							
							
								
								use new sdp and fp32 sdp (#11007)
							
							
							
							
							
						 | 
						
							2024-05-14 14:29:18 +08:00 | 
						
						
							
							
							
								
							
							
						 |