Yishuo Wang
								
							 
						 | 
						
							
							
								
								
							
							
							
								
							
							
								1dc680341b
								
							
						 | 
						
							
							
								
								fix phi-3-vision import (#11129)
							
							
							
							
							
						 | 
						
							2024-05-24 15:57:15 +08:00 | 
						
						
							
							
							
								
							
							
						 | 
					
				
					
						
							
								
								
									 
									Guancheng Fu
								
							 
						 | 
						
							
							
								
								
							
							
							
								
							
							
								7f772c5a4f
								
							
						 | 
						
							
							
								
								Add half precision for fastchat models (#11130)
							
							
							
							
							
						 | 
						
							2024-05-24 15:41:14 +08:00 | 
						
						
							
							
							
								
							
							
						 | 
					
				
					
						
							
								
								
									 
									Zhao Changmin
								
							 
						 | 
						
							
							
								
								
							
							
							
								
							
							
								65f4212f89
								
							
						 | 
						
							
							
								
								Fix qwen 14b run into register attention fwd (#11128)
							
							
							
							
							
							
							
							* fix qwen 14b 
							
						 | 
						
							2024-05-24 14:45:07 +08:00 | 
						
						
							
							
							
								
							
							
						 | 
					
				
					
						
							
								
								
									 
									Yishuo Wang
								
							 
						 | 
						
							
							
								
								
							
							
							
								
							
							
								1db9d9a63b
								
							
						 | 
						
							
							
								
								optimize internlm2 xcomposer agin (#11124)
							
							
							
							
							
						 | 
						
							2024-05-24 13:44:52 +08:00 | 
						
						
							
							
							
								
							
							
						 | 
					
				
					
						
							
								
								
									 
									Yishuo Wang
								
							 
						 | 
						
							
							
								
								
							
							
							
								
							
							
								9372ce87ce
								
							
						 | 
						
							
							
								
								fix internlm xcomposer2 fp16 (#11123)
							
							
							
							
							
						 | 
						
							2024-05-24 11:03:31 +08:00 | 
						
						
							
							
							
								
							
							
						 | 
					
				
					
						
							
								
								
									 
									Cengguang Zhang
								
							 
						 | 
						
							
							
								
								
							
							
							
								
							
							
								011b9faa5c
								
							
						 | 
						
							
							
								
								LLM: unify baichuan2-13b alibi mask dtype with model dtype. (#11107)
							
							
							
							
							
							
							
							* LLM: unify alibi mask dtype.
* fix comments. 
							
						 | 
						
							2024-05-24 10:27:53 +08:00 | 
						
						
							
							
							
								
							
							
						 | 
					
				
					
						
							
								
								
									 
									Jiao Wang
								
							 
						 | 
						
							
							
								
								
							
							
							
								
							
							
								0a06a6e1d4
								
							
						 | 
						
							
							
								
								Update tests for transformers 4.36 (#10858)
							
							
							
							
							
							
							
							* update unit test
* update
* update
* update
* update
* update
* fix gpu attention test
* update
* update
* update
* update
* update
* update
* update example test
* replace replit code
* update
* update
* update
* update
* set safe_serialization false
* perf test
* update
* update
* update
* update
* update
* update
* update
* update
* update
* update
* update
* update
* update
* update
* update
* update
* update
* update
* update
* update
* update
* update
* update
* update
* update
* update
* update
* delete
* update
* update
* update
* update
* update
* update
* revert
* update 
							
						 | 
						
							2024-05-24 10:26:38 +08:00 | 
						
						
							
							
							
								
							
							
						 | 
					
				
					
						
							
								
								
									 
									Xiangyu Tian
								
							 
						 | 
						
							
							
								
								
							
							
							
								
							
							
								b3f6faa038
								
							
						 | 
						
							
							
								
								LLM: Add CPU vLLM entrypoint (#11083)
							
							
							
							
							
							
							
							Add CPU vLLM entrypoint and update CPU vLLM serving example. 
							
						 | 
						
							2024-05-24 09:16:59 +08:00 | 
						
						
							
							
							
								
							
							
						 | 
					
				
					
						
							
								
								
									 
									Yishuo Wang
								
							 
						 | 
						
							
							
								
								
							
							
							
								
							
							
								797dbc48b8
								
							
						 | 
						
							
							
								
								fix phi-2 and phi-3 convert (#11116)
							
							
							
							
							
						 | 
						
							2024-05-23 17:37:37 +08:00 | 
						
						
							
							
							
								
							
							
						 | 
					
				
					
						
							
								
								
									 
									Yishuo Wang
								
							 
						 | 
						
							
							
								
								
							
							
							
								
							
							
								37b98a531f
								
							
						 | 
						
							
							
								
								support running internlm xcomposer2 on gpu and add sdp optimization (#11115)
							
							
							
							
							
						 | 
						
							2024-05-23 17:26:24 +08:00 | 
						
						
							
							
							
								
							
							
						 | 
					
				
					
						
							
								
								
									 
									Zhao Changmin
								
							 
						 | 
						
							
							
								
								
							
							
							
								
							
							
								c5e8b90c8d
								
							
						 | 
						
							
							
								
								Add Qwen register attention implemention (#11110)
							
							
							
							
							
							
							
							* qwen_register 
							
						 | 
						
							2024-05-23 17:17:45 +08:00 | 
						
						
							
							
							
								
							
							
						 | 
					
				
					
						
							
								
								
									 
									Yishuo Wang
								
							 
						 | 
						
							
							
								
								
							
							
							
								
							
							
								0e53f20edb
								
							
						 | 
						
							
							
								
								support running internlm-xcomposer2 on cpu (#11111)
							
							
							
							
							
						 | 
						
							2024-05-23 16:36:09 +08:00 | 
						
						
							
							
							
								
							
							
						 | 
					
				
					
						
							
								
								
									 
									Yishuo Wang
								
							 
						 | 
						
							
							
								
								
							
							
							
								
							
							
								cd4dff09ee
								
							
						 | 
						
							
							
								
								support phi-3 vision (#11101)
							
							
							
							
							
						 | 
						
							2024-05-22 17:43:50 +08:00 | 
						
						
							
							
							
								
							
							
						 | 
					
				
					
						
							
								
								
									 
									Xin Qiu
								
							 
						 | 
						
							
							
								
								
							
							
							
								
							
							
								71bcd18f44
								
							
						 | 
						
							
							
								
								fix qwen vl (#11090)
							
							
							
							
							
						 | 
						
							2024-05-21 18:40:29 +08:00 | 
						
						
							
							
							
								
							
							
						 | 
					
				
					
						
							
								
								
									 
									Yishuo Wang
								
							 
						 | 
						
							
							
								
								
							
							
							
								
							
							
								f00625f9a4
								
							
						 | 
						
							
							
								
								refactor qwen2 (#11087)
							
							
							
							
							
						 | 
						
							2024-05-21 16:53:42 +08:00 | 
						
						
							
							
							
								
							
							
						 | 
					
				
					
						
							
								
								
									 
									Yishuo Wang
								
							 
						 | 
						
							
							
								
								
							
							
							
								
							
							
								d830a63bb7
								
							
						 | 
						
							
							
								
								refactor qwen (#11074)
							
							
							
							
							
						 | 
						
							2024-05-20 18:08:37 +08:00 | 
						
						
							
							
							
								
							
							
						 | 
					
				
					
						
							
								
								
									 
									Wang, Jian4
								
							 
						 | 
						
							
							
								
								
							
							
							
								
							
							
								74950a152a
								
							
						 | 
						
							
							
								
								Fix tgi_api_server error file name (#11075)
							
							
							
							
							
						 | 
						
							2024-05-20 16:48:40 +08:00 | 
						
						
							
							
							
								
							
							
						 | 
					
				
					
						
							
								
								
									 
									Yishuo Wang
								
							 
						 | 
						
							
							
								
								
							
							
							
								
							
							
								4e97047d70
								
							
						 | 
						
							
							
								
								fix baichuan2 13b fp16 (#11071)
							
							
							
							
							
						 | 
						
							2024-05-20 11:21:20 +08:00 | 
						
						
							
							
							
								
							
							
						 | 
					
				
					
						
							
								
								
									 
									Wang, Jian4
								
							 
						 | 
						
							
							
								
								
							
							
							
								
							
							
								a2e1578fd9
								
							
						 | 
						
							
							
								
								Merge tgi_api_server to main (#11036)
							
							
							
							
							
							
							
							* init
* fix style
* speculative can not use benchmark
* add tgi server readme 
							
						 | 
						
							2024-05-20 09:15:03 +08:00 | 
						
						
							
							
							
								
							
							
						 | 
					
				
					
						
							
								
								
									 
									Yishuo Wang
								
							 
						 | 
						
							
							
								
								
							
							
							
								
							
							
								31ce3e0c13
								
							
						 | 
						
							
							
								
								refactor baichuan2-13b (#11064)
							
							
							
							
							
						 | 
						
							2024-05-17 16:25:30 +08:00 | 
						
						
							
							
							
								
							
							
						 | 
					
				
					
						
							
								
								
									 
									Ruonan Wang
								
							 
						 | 
						
							
							
								
								
							
							
							
								
							
							
								f1156e6b20
								
							
						 | 
						
							
							
								
								support gguf_q4k_m / gguf_q4k_s (#10887)
							
							
							
							
							
							
							
							* initial commit
* UPDATE
* fix style
* fix style
* add gguf_q4k_s
* update comment
* fix 
							
						 | 
						
							2024-05-17 14:30:09 +08:00 | 
						
						
							
							
							
								
							
							
						 | 
					
				
					
						
							
								
								
									 
									Yishuo Wang
								
							 
						 | 
						
							
							
								
								
							
							
							
								
							
							
								981d668be6
								
							
						 | 
						
							
							
								
								refactor baichuan2-7b (#11062)
							
							
							
							
							
						 | 
						
							2024-05-17 13:01:34 +08:00 | 
						
						
							
							
							
								
							
							
						 | 
					
				
					
						
							
								
								
									 
									Ruonan Wang
								
							 
						 | 
						
							
							
								
								
							
							
							
								
							
							
								3a72e5df8c
								
							
						 | 
						
							
							
								
								disable mlp fusion of fp6 on mtl (#11059)
							
							
							
							
							
						 | 
						
							2024-05-17 10:10:16 +08:00 | 
						
						
							
							
							
								
							
							
						 | 
					
				
					
						
							
								
								
									 
									SONG Ge
								
							 
						 | 
						
							
							
								
								
							
							
							
								
							
							
								192ae35012
								
							
						 | 
						
							
							
								
								Add support for llama2 quantize_kv with transformers 4.38.0 (#11054)
							
							
							
							
							
							
							
							* add support for llama2 quantize_kv with transformers 4.38.0
* fix code style
* fix code style 
							
						 | 
						
							2024-05-16 22:23:39 +08:00 | 
						
						
							
							
							
								
							
							
						 | 
					
				
					
						
							
								
								
									 
									SONG Ge
								
							 
						 | 
						
							
							
								
								
							
							
							
								
							
							
								16b2a418be
								
							
						 | 
						
							
							
								
								hotfix native_sdp ut (#11046)
							
							
							
							
							
							
							
							* hotfix native_sdp
* update 
							
						 | 
						
							2024-05-16 17:15:37 +08:00 | 
						
						
							
							
							
								
							
							
						 | 
					
				
					
						
							
								
								
									 
									Xin Qiu
								
							 
						 | 
						
							
							
								
								
							
							
							
								
							
							
								6be70283b7
								
							
						 | 
						
							
							
								
								fix chatglm run error (#11045)
							
							
							
							
							
							
							
							* fix chatglm
* update
* fix style 
							
						 | 
						
							2024-05-16 15:39:18 +08:00 | 
						
						
							
							
							
								
							
							
						 | 
					
				
					
						
							
								
								
									 
									Yishuo Wang
								
							 
						 | 
						
							
							
								
								
							
							
							
								
							
							
								8cae897643
								
							
						 | 
						
							
							
								
								use new rope in phi3 (#11047)
							
							
							
							
							
						 | 
						
							2024-05-16 15:12:35 +08:00 | 
						
						
							
							
							
								
							
							
						 | 
					
				
					
						
							
								
								
									 
									Yishuo Wang
								
							 
						 | 
						
							
							
								
								
							
							
							
								
							
							
								59df750326
								
							
						 | 
						
							
							
								
								Use new sdp again (#11025)
							
							
							
							
							
						 | 
						
							2024-05-16 09:33:34 +08:00 | 
						
						
							
							
							
								
							
							
						 | 
					
				
					
						
							
								
								
									 
									SONG Ge
								
							 
						 | 
						
							
							
								
								
							
							
							
								
							
							
								9942a4ba69
								
							
						 | 
						
							
							
								
								[WIP] Support llama2 with transformers==4.38.0 (#11024)
							
							
							
							
							
							
							
							* support llama2 with transformers==4.38.0
* add supprot for quantize_qkv
* add original support for 4.38.0 now
* code style fix 
							
						 | 
						
							2024-05-15 18:07:00 +08:00 | 
						
						
							
							
							
								
							
							
						 | 
					
				
					
						
							
								
								
									 
									Yina Chen
								
							 
						 | 
						
							
							
								
								
							
							
							
								
							
							
								686f6038a8
								
							
						 | 
						
							
							
								
								Support fp6 save & load (#11034)
							
							
							
							
							
						 | 
						
							2024-05-15 17:52:02 +08:00 | 
						
						
							
							
							
								
							
							
						 | 
					
				
					
						
							
								
								
									 
									Ruonan Wang
								
							 
						 | 
						
							
							
								
								
							
							
							
								
							
							
								ac384e0f45
								
							
						 | 
						
							
							
								
								add fp6 mlp fusion (#11032)
							
							
							
							
							
							
							
							* add fp6 fusion
* add qkv fusion for fp6
* remove qkv first 
							
						 | 
						
							2024-05-15 17:42:50 +08:00 | 
						
						
							
							
							
								
							
							
						 | 
					
				
					
						
							
								
								
									 
									Wang, Jian4
								
							 
						 | 
						
							
							
								
								
							
							
							
								
							
							
								2084ebe4ee
								
							
						 | 
						
							
							
								
								Enable fastchat benchmark latency (#11017)
							
							
							
							
							
							
							
							* enable fastchat benchmark
* add readme
* update readme
* update 
							
						 | 
						
							2024-05-15 14:52:09 +08:00 | 
						
						
							
							
							
								
							
							
						 | 
					
				
					
						
							
								
								
									 
									hxsz1997
								
							 
						 | 
						
							
							
								
								
							
							
							
								
							
							
								93d40ab127
								
							
						 | 
						
							
							
								
								Update lookahead strategy (#11021)
							
							
							
							
							
							
							
							* update lookahead strategy
* remove lines
* fix python style check 
							
						 | 
						
							2024-05-15 14:48:05 +08:00 | 
						
						
							
							
							
								
							
							
						 | 
					
				
					
						
							
								
								
									 
									Wang, Jian4
								
							 
						 | 
						
							
							
								
								
							
							
							
								
							
							
								d9f71f1f53
								
							
						 | 
						
							
							
								
								Update benchmark util for example using (#11027)
							
							
							
							
							
							
							
							* mv benchmark_util.py to utils/
* remove
* update 
							
						 | 
						
							2024-05-15 14:16:35 +08:00 | 
						
						
							
							
							
								
							
							
						 | 
					
				
					
						
							
								
								
									 
									Yishuo Wang
								
							 
						 | 
						
							
							
								
								
							
							
							
								
							
							
								fad1dbaf60
								
							
						 | 
						
							
							
								
								use sdp fp8 causal kernel (#11023)
							
							
							
							
							
						 | 
						
							2024-05-15 10:22:35 +08:00 | 
						
						
							
							
							
								
							
							
						 | 
					
				
					
						
							
								
								
									 
									Yishuo Wang
								
							 
						 | 
						
							
							
								
								
							
							
							
								
							
							
								ee325e9cc9
								
							
						 | 
						
							
							
								
								fix phi3 (#11022)
							
							
							
							
							
						 | 
						
							2024-05-15 09:32:12 +08:00 | 
						
						
							
							
							
								
							
							
						 | 
					
				
					
						
							
								
								
									 
									Zhao Changmin
								
							 
						 | 
						
							
							
								
								
							
							
							
								
							
							
								0a732bebe7
								
							
						 | 
						
							
							
								
								Add phi3 cached RotaryEmbedding (#11013)
							
							
							
							
							
							
							
							* phi3cachedrotaryembed
* pep8 
							
						 | 
						
							2024-05-15 08:16:43 +08:00 | 
						
						
							
							
							
								
							
							
						 | 
					
				
					
						
							
								
								
									 
									Yina Chen
								
							 
						 | 
						
							
							
								
								
							
							
							
								
							
							
								893197434d
								
							
						 | 
						
							
							
								
								Add fp6 support on gpu (#11008)
							
							
							
							
							
							
							
							* add fp6 support
* fix style 
							
						 | 
						
							2024-05-14 16:31:44 +08:00 | 
						
						
							
							
							
								
							
							
						 | 
					
				
					
						
							
								
								
									 
									Zhao Changmin
								
							 
						 | 
						
							
							
								
								
							
							
							
								
							
							
								b03c859278
								
							
						 | 
						
							
							
								
								Add phi3RMS (#10988)
							
							
							
							
							
							
							
							* phi3RMS 
							
						 | 
						
							2024-05-14 15:16:27 +08:00 | 
						
						
							
							
							
								
							
							
						 | 
					
				
					
						
							
								
								
									 
									Yishuo Wang
								
							 
						 | 
						
							
							
								
								
							
							
							
								
							
							
								170e3d65e0
								
							
						 | 
						
							
							
								
								use new sdp and fp32 sdp (#11007)
							
							
							
							
							
						 | 
						
							2024-05-14 14:29:18 +08:00 | 
						
						
							
							
							
								
							
							
						 | 
					
				
					
						
							
								
								
									 
									Guancheng Fu
								
							 
						 | 
						
							
							
								
								
							
							
							
								
							
							
								a465111cf4
								
							
						 | 
						
							
							
								
								Update README.md (#11003)
							
							
							
							
							
						 | 
						
							2024-05-13 16:44:48 +08:00 | 
						
						
							
							
							
								
							
							
						 | 
					
				
					
						
							
								
								
									 
									Guancheng Fu
								
							 
						 | 
						
							
							
								
								
							
							
							
								
							
							
								74997a3ed1
								
							
						 | 
						
							
							
								
								Adding load_low_bit interface for ipex_llm_worker (#11000)
							
							
							
							
							
							
							
							* initial implementation, need tests
* fix
* fix baichuan issue
* fix typo 
							
						 | 
						
							2024-05-13 15:30:19 +08:00 | 
						
						
							
							
							
								
							
							
						 | 
					
				
					
						
							
								
								
									 
									Yishuo Wang
								
							 
						 | 
						
							
							
								
								
							
							
							
								
							
							
								1b3c7a6928
								
							
						 | 
						
							
							
								
								remove phi3 empty cache (#10997)
							
							
							
							
							
						 | 
						
							2024-05-13 14:09:55 +08:00 | 
						
						
							
							
							
								
							
							
						 | 
					
				
					
						
							
								
								
									 
									Yishuo Wang
								
							 
						 | 
						
							
							
								
								
							
							
							
								
							
							
								ad96f32ce0
								
							
						 | 
						
							
							
								
								optimize phi3 1st token performance (#10981)
							
							
							
							
							
						 | 
						
							2024-05-10 17:33:46 +08:00 | 
						
						
							
							
							
								
							
							
						 | 
					
				
					
						
							
								
								
									 
									Cengguang Zhang
								
							 
						 | 
						
							
							
								
								
							
							
							
								
							
							
								cfed76b2ed
								
							
						 | 
						
							
							
								
								LLM: add long-context support for Qwen1.5-7B/Baichuan2-7B/Mistral-7B. (#10937)
							
							
							
							
							
							
							
							* LLM: add split tensor support for baichuan2-7b and qwen1.5-7b.
* fix style.
* fix style.
* fix style.
* add support for mistral and fix condition threshold.
* fix  style.
* fix comments. 
							
						 | 
						
							2024-05-10 16:40:15 +08:00 | 
						
						
							
							
							
								
							
							
						 | 
					
				
					
						
							
								
								
									 
									Kai Huang
								
							 
						 | 
						
							
							
								
								
							
							
							
								
							
							
								a6342cc068
								
							
						 | 
						
							
							
								
								Empty cache after phi first attention to support 4k input (#10972)
							
							
							
							
							
							
							
							* empty cache
* fix style 
							
						 | 
						
							2024-05-09 19:50:04 +08:00 | 
						
						
							
							
							
								
							
							
						 | 
					
				
					
						
							
								
								
									 
									Yishuo Wang
								
							 
						 | 
						
							
							
								
								
							
							
							
								
							
							
								e753125880
								
							
						 | 
						
							
							
								
								use fp16_sdp when head_dim=96 (#10976)
							
							
							
							
							
						 | 
						
							2024-05-09 17:02:59 +08:00 | 
						
						
							
							
							
								
							
							
						 | 
					
				
					
						
							
								
								
									 
									Yishuo Wang
								
							 
						 | 
						
							
							
								
								
							
							
							
								
							
							
								697ca79eca
								
							
						 | 
						
							
							
								
								use quantize kv and sdp in phi3-mini (#10973)
							
							
							
							
							
						 | 
						
							2024-05-09 15:16:18 +08:00 | 
						
						
							
							
							
								
							
							
						 | 
					
				
					
						
							
								
								
									 
									Wang, Jian4
								
							 
						 | 
						
							
							
								
								
							
							
							
								
							
							
								3209d6b057
								
							
						 | 
						
							
							
								
								Fix spculative llama3 no stop error (#10963)
							
							
							
							
							
							
							
							* fix normal
* add eos_tokens_id on sp and add list if
* update
* no none 
							
						 | 
						
							2024-05-08 17:09:47 +08:00 | 
						
						
							
							
							
								
							
							
						 | 
					
				
					
						
							
								
								
									 
									Yishuo Wang
								
							 
						 | 
						
							
							
								
								
							
							
							
								
							
							
								2ebec0395c
								
							
						 | 
						
							
							
								
								optimize phi-3-mini-128 (#10959)
							
							
							
							
							
						 | 
						
							2024-05-08 16:33:17 +08:00 | 
						
						
							
							
							
								
							
							
						 |