Ch1y0q 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								730d9ec811 
								
							 
						 
						
							
							
								
								Add Qwen2-audio example ( #11835 )  
							
							 
							
							... 
							
							
							
							* add draft for qwen2-audio
* update example for `Qwen2-Audio`
* update
* update
* add warmup 
							
						 
						
							2024-08-27 13:35:24 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Shaojun Liu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								b11b28e9a9 
								
							 
						 
						
							
							
								
								update CORE_XE_VERSION to 2.6.0 ( #11929 )  
							
							 
							
							
							
						 
						
							2024-08-27 13:10:13 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yina Chen 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								e246f1e258 
								
							 
						 
						
							
							
								
								update llama3 npu example ( #11933 )  
							
							 
							
							
							
						 
						
							2024-08-27 13:03:18 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								14dddfc0d6 
								
							 
						 
						
							
							
								
								Update NPU example readme ( #11931 )  
							
							 
							
							
							
						 
						
							2024-08-27 12:44:58 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zijie Li 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								6c3eb1e1e8 
								
							 
						 
						
							
							
								
								refactor from_pretrained API for NPU ( #11927 )  
							
							 
							
							
							
						 
						
							2024-08-27 09:50:30 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xiangyu Tian 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								7ca557aada 
								
							 
						 
						
							
							
								
								LLM: Fix vLLM CPU convert error ( #11926 )  
							
							 
							
							
							
						 
						
							2024-08-27 09:22:19 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ch1y0q 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								5a8fc1baa2 
								
							 
						 
						
							
							
								
								update troubleshooting for llama.cpp and ollama ( #11890 )  
							
							 
							
							... 
							
							
							
							* update troubleshooting for llama.cpp and ollama
* update
* update 
							
						 
						
							2024-08-26 20:55:23 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								c1d07bc626 
								
							 
						 
						
							
							
								
								Support streaming for lookup generation ( #11922 )  
							
							 
							
							... 
							
							
							
							* Support streaming for lookup generation
* Small update
* Style fixes
* Add origin generate full back for batch inference and beam search; support input length threshold judgement for directly input with input_ids
* Fix lookup stream generate with eos token
* Small fixes
* Small fix
* index fix
* Small fix 
							
						 
						
							2024-08-26 19:33:31 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								a0bbd8e28d 
								
							 
						 
						
							
							
								
								All-in-one benchmark update regarding performance mode for input length threshold ( #11920 )  
							
							 
							
							... 
							
							
							
							* All-in-one benchmark update regarding performance mode input length threshold
* typo fix 
							
						 
						
							2024-08-26 18:52:13 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									SONG Ge 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								019f725d4d 
								
							 
						 
						
							
							
								
								[NPU] Add support for running mp minicpm model on npu ( #11909 )  
							
							 
							
							... 
							
							
							
							* add initial support for npu minicpm mp
* fix minicpm-1b abnormal output error 
							
						 
						
							2024-08-26 17:52:55 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								dd303776cf 
								
							 
						 
						
							
							
								
								Add troubleshooting about transpose value setting  
							
							 
							
							
							
						 
						
							2024-08-26 16:06:32 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Shaojun Liu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								e5dc4e9123 
								
							 
						 
						
							
							
								
								disable outdated scheduled workflow ( #11915 )  
							
							 
							
							
							
						 
						
							2024-08-24 07:17:42 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								24c279e0ae 
								
							 
						 
						
							
							
								
								Update IPEX_LLM_PERFORMANCE_MODE with input length threshold ( #11908 )  
							
							 
							
							... 
							
							
							
							* Update IPEX_LLM_PERFORMANCE_MODE with input length threshold
* Update based on comments. And and judgement for inputs_embeds
* Fix for benchmarking purposes
* Update based on comments
* Small fix 
							
						 
						
							2024-08-23 20:49:15 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								303a090a6b 
								
							 
						 
						
							
							
								
								Add lm_head optimization on NPU ( #11903 )  
							
							 
							
							
							
						 
						
							2024-08-23 15:51:07 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yina Chen 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								23631cd357 
								
							 
						 
						
							
							
								
								disable lm_head opt for baichuan2-13b ( #11905 )  
							
							 
							
							
							
						 
						
							2024-08-23 15:39:47 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Shaojun Liu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								4cf640c548 
								
							 
						 
						
							
							
								
								update docker image tag to 2.2.0-SNAPSHOT ( #11904 )  
							
							 
							
							
							
						 
						
							2024-08-23 13:57:41 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									hxsz1997 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								650e6e6ce4 
								
							 
						 
						
							
							
								
								Merge pull request  #11891  from hxsz1997/baichuan2-compresskv  
							
							 
							
							... 
							
							
							
							Add compress_kv for Baichuan2 
							
						 
						
							2024-08-23 06:09:58 +03:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								4a61f7d20d 
								
							 
						 
						
							
							
								
								update mlp of llama ( #11897 )  
							
							 
							
							... 
							
							
							
							* update mlp of llama
* relax threshold of  mlp test
* revert code 
							
						 
						
							2024-08-22 20:34:53 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								420ce7d164 
								
							 
						 
						
							
							
								
								Fix non-stop at eos token problem for lookup generation ( #11896 )  
							
							 
							
							... 
							
							
							
							* Fix non-stop by eos_token_id problem for lookup
* Small fix
* Add judgement when generation_config.eos_token_id is None
* Fix based on comments 
							
						 
						
							2024-08-22 18:55:59 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Huang, Xinshengzi 
								
							 
						 
						
							
							
							
							
								
							
							
								4cf03d6212 
								
							 
						 
						
							
							
								
								update baichuan-7b  
							
							 
							
							
							
						 
						
							2024-08-22 18:16:33 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zijie Li 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								794abe2ce8 
								
							 
						 
						
							
							
								
								update npu-readme ( #11900 )  
							
							 
							
							
							
						 
						
							2024-08-22 17:49:35 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Guancheng Fu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								278b191dc1 
								
							 
						 
						
							
							
								
								Fix optimize lm head error ( #11899 )  
							
							 
							
							
							
						 
						
							2024-08-22 17:45:26 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Shaojun Liu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								c5b51d41fb 
								
							 
						 
						
							
							
								
								Update pypi tag to 2.2.0.dev0 ( #11895 )  
							
							 
							
							
							
						 
						
							2024-08-22 16:48:09 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jinhe 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								18662dca1c 
								
							 
						 
						
							
							
								
								change 5 pytorch/huggingface models to fp16 ( #11894 )  
							
							 
							
							
							
						 
						
							2024-08-22 16:12:09 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Wang, Jian4 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								5c4ed00593 
								
							 
						 
						
							
							
								
								Add lightweight-serving whisper asr example ( #11847 )  
							
							 
							
							... 
							
							
							
							* add asr init
* update for pp
* update style
* update readme
* update reamde 
							
						 
						
							2024-08-22 15:46:28 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Huang, Xinshengzi 
								
							 
						 
						
							
							
							
							
								
							
							
								eb1e65f8a9 
								
							 
						 
						
							
							
								
								add comment  
							
							 
							
							
							
						 
						
							2024-08-22 15:14:47 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Huang, Xinshengzi 
								
							 
						 
						
							
							
							
							
								
							
							
								a2be3d7501 
								
							 
						 
						
							
							
								
								add comment of compress kv in attention forward  
							
							 
							
							
							
						 
						
							2024-08-22 15:11:55 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jinhe 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								a8e2573421 
								
							 
						 
						
							
							
								
								added tokenization file for codegeex2-6b in pytorch-models( #11875 )  
							
							 
							
							... 
							
							
							
							* added tokenization file
* tokenization file readme update
* optional 
							
						 
						
							2024-08-22 14:37:56 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Huang, Xinshengzi 
								
							 
						 
						
							
							
							
							
								
							
							
								ce7de77085 
								
							 
						 
						
							
							
								
								add comment of change in model forward  
							
							 
							
							
							
						 
						
							2024-08-22 14:29:27 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Huang, Xinshengzi 
								
							 
						 
						
							
							
							
							
								
							
							
								42398a0045 
								
							 
						 
						
							
							
								
								add comment  
							
							 
							
							
							
						 
						
							2024-08-22 13:17:13 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Huang, Xinshengzi 
								
							 
						 
						
							
							
							
							
								
							
							
								48a827aa07 
								
							 
						 
						
							
							
								
								fix typos  
							
							 
							
							
							
						 
						
							2024-08-22 11:35:47 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Huang, Xinshengzi 
								
							 
						 
						
							
							
							
							
								
							
							
								8a5df93de2 
								
							 
						 
						
							
							
								
								fix typos  
							
							 
							
							
							
						 
						
							2024-08-22 11:33:07 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Huang, Xinshengzi 
								
							 
						 
						
							
							
							
							
								
							
							
								01ed397e7a 
								
							 
						 
						
							
							
								
								fix typos  
							
							 
							
							
							
						 
						
							2024-08-22 11:31:25 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Huang, Xinshengzi 
								
							 
						 
						
							
							
							
							
								
							
							
								c6ed1c412d 
								
							 
						 
						
							
							
								
								fix typos  
							
							 
							
							
							
						 
						
							2024-08-22 11:26:49 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Huang, Xinshengzi 
								
							 
						 
						
							
							
							
							
								
							
							
								2a0aa9271b 
								
							 
						 
						
							
							
								
								fix typos  
							
							 
							
							
							
						 
						
							2024-08-22 11:23:22 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Huang, Xinshengzi 
								
							 
						 
						
							
							
							
							
								
							
							
								4adadddbbc 
								
							 
						 
						
							
							
								
								fix typos  
							
							 
							
							
							
						 
						
							2024-08-22 11:12:23 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								bac98baab9 
								
							 
						 
						
							
							
								
								Make performance test install specific ipex-llm version from pypi ( #11892 )  
							
							 
							
							
							
						 
						
							2024-08-22 11:10:12 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Huang, Xinshengzi 
								
							 
						 
						
							
							
							
							
								
							
							
								6a5ca17afc 
								
							 
						 
						
							
							
								
								fix typoes  
							
							 
							
							
							
						 
						
							2024-08-22 11:09:58 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								72a7bf624b 
								
							 
						 
						
							
							
								
								Support qwen2-1.5b with fused decoderlayer optimization on NPU ( #11888 )  
							
							 
							
							
							
						 
						
							2024-08-22 11:09:12 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Huang, Xinshengzi 
								
							 
						 
						
							
							
							
							
								
							
							
								6bb9035788 
								
							 
						 
						
							
							
								
								fix typos  
							
							 
							
							
							
						 
						
							2024-08-22 11:08:48 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Huang, Xinshengzi 
								
							 
						 
						
							
							
							
							
								
							
							
								86248b0505 
								
							 
						 
						
							
							
								
								add compress_kv for baichuan2  
							
							 
							
							
							
						 
						
							2024-08-22 10:59:08 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zijie Li 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								bdbe995b01 
								
							 
						 
						
							
							
								
								Update README.md ( #11889 )  
							
							 
							
							... 
							
							
							
							Set datasets version to 2.16.1. Clear out the transformers version requirement. 
							
						 
						
							2024-08-22 09:40:16 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yina Chen 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								cc27321441 
								
							 
						 
						
							
							
								
								support chatglm4 in lookup ( #11855 )  
							
							 
							
							
							
						 
						
							2024-08-21 15:53:17 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yina Chen 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								0236de3ac2 
								
							 
						 
						
							
							
								
								set IPEX_LLM_LAST_LM_HEAD=1 as default ( #11885 )  
							
							 
							
							
							
						 
						
							2024-08-21 15:06:12 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									SONG Ge 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								8c5c7f32dd 
								
							 
						 
						
							
							
								
								Update doc for running npu generate example with ipex-llm[npu] ( #11876 )  
							
							 
							
							... 
							
							
							
							* update doc for running npu generate example with ipex-llm[npu]
* switch max_prompt_len to 512 to fix compile error on mtl 
							
						 
						
							2024-08-21 13:45:29 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yang Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								209d42ab79 
								
							 
						 
						
							
							
								
								Refactor npu mp to make it easier to integrate new models ( #11873 )  
							
							 
							
							... 
							
							
							
							* Refactor npu mp to make it easier to integrate new models
* fix style
* move layer functions to base 
							
						 
						
							2024-08-20 20:58:47 -07:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Guancheng Fu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								537c0d2767 
								
							 
						 
						
							
							
								
								fix vllm qwen2 models ( #11879 )  
							
							 
							
							
							
						 
						
							2024-08-21 11:05:24 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								bd1e490d62 
								
							 
						 
						
							
							
								
								fix phi3 ( #11878 )  
							
							 
							
							
							
						 
						
							2024-08-21 10:31:41 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								eab6f6dde4 
								
							 
						 
						
							
							
								
								Spr perf small fix ( #11874 )  
							
							 
							
							
							
						 
						
							2024-08-21 09:35:26 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								37106a877c 
								
							 
						 
						
							
							
								
								igpu performance test smal fix ( #11872 )  
							
							 
							
							
							
						 
						
							2024-08-21 03:09:14 +08:00