SONG Ge 
								
							 
						 
						
							
							
							
							
								
							
							
								ba0b939579 
								
							 
						 
						
							
							
								
								[LLM] Support transformers-v4.36.0 on mistral model ( #9744 )  
							
							 
							
							... 
							
							
							
							* add support transformers-v4.36.0 on mistral model
* python/llm/src/bigdl/llm/transformers/models/mistral.py
* make the redundant implementation as utils
* fix code style
* fix
* fix style
* update with utils enough_kv_room 
							
						 
						
							2023-12-22 09:59:27 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xin Qiu 
								
							 
						 
						
							
							
							
							
								
							
							
								e36111e713 
								
							 
						 
						
							
							
								
								mixstral fused qkv and rope ( #9724 )  
							
							 
							
							... 
							
							
							
							* mixstral fused qkv and rope
* fix and clean
* fix style
* update
* update
* fix
* update
* fix 
							
						 
						
							2023-12-22 09:26:35 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jiao Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								e4f6e43675 
								
							 
						 
						
							
							
								
								safetenor to false ( #9728 )  
							
							 
							
							
							
						 
						
							2023-12-21 14:41:51 -08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Shaojun Liu 
								
							 
						 
						
							
							
							
							
								
							
							
								bb52239e0a 
								
							 
						 
						
							
							
								
								bigdl-llm stable version release & test ( #9732 )  
							
							 
							
							... 
							
							
							
							* stable version test
* trigger spr test
* update
* trigger
* test
* test
* test
* test
* test
* refine
* release linux first 
							
						 
						
							2023-12-21 22:55:33 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									WeiguangHan 
								
							 
						 
						
							
							
							
							
								
							
							
								d4d2ccdd9d 
								
							 
						 
						
							
							
								
								LLM: remove startcorder-15.5b ( #9748 )  
							
							 
							
							
							
						 
						
							2023-12-21 18:52:52 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									WeiguangHan 
								
							 
						 
						
							
							
							
							
								
							
							
								474c099559 
								
							 
						 
						
							
							
								
								LLM: using separate threads to do inference ( #9727 )  
							
							 
							
							... 
							
							
							
							* using separate threads to do inference
* resolve some comments
* resolve some comments
* revert llm_performance_tests.yml file 
							
						 
						
							2023-12-21 17:56:43 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								426660b88e 
								
							 
						 
						
							
							
								
								simplify qwen attention ( #9747 )  
							
							 
							
							
							
						 
						
							2023-12-21 17:53:29 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Wang, Jian4 
								
							 
						 
						
							
							
							
							
								
							
							
								984697afe2 
								
							 
						 
						
							
							
								
								LLM: Add bloom gguf support ( #9734 )  
							
							 
							
							... 
							
							
							
							* init
* update bloom add merges
* update
* update readme
* update for llama error
* update 
							
						 
						
							2023-12-21 14:06:25 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Heyang Sun 
								
							 
						 
						
							
							
							
							
								
							
							
								df775cf316 
								
							 
						 
						
							
							
								
								fix python style ( #9742 )  
							
							 
							
							... 
							
							
							
							* fix python style
* fix
* fix 
							
						 
						
							2023-12-21 11:25:05 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Chen, Zhentao 
								
							 
						 
						
							
							
							
							
								
							
							
								b06a3146c8 
								
							 
						 
						
							
							
								
								Fix 70b oom ( #9738 )  
							
							 
							
							... 
							
							
							
							* add default value to bigdl llm
* fix model oom 
							
						 
						
							2023-12-21 10:40:52 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xin Qiu 
								
							 
						 
						
							
							
							
							
								
							
							
								6c3e698bf1 
								
							 
						 
						
							
							
								
								mistral decoding_fast_path and fused mlp ( #9714 )  
							
							 
							
							... 
							
							
							
							* mistral decoding_fast_path and fused mlp
* meet code review 
							
						 
						
							2023-12-21 10:11:37 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Heyang Sun 
								
							 
						 
						
							
							
							
							
								
							
							
								d157f623b6 
								
							 
						 
						
							
							
								
								Load Mixtral gguf in a block-wise way ( #9725 )  
							
							 
							
							... 
							
							
							
							* Load Mixtral gguf in a block-wise way
* refine 
							
						 
						
							2023-12-21 10:03:23 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									WeiguangHan 
								
							 
						 
						
							
							
							
							
								
							
							
								34bb804189 
								
							 
						 
						
							
							
								
								LLM: check csv and its corresponding yaml file ( #9702 )  
							
							 
							
							... 
							
							
							
							* LLM: check csv and its corresponding yaml file
* run PR arc perf test
* modify the name of some variables
* execute the check results script in right place
* use cp to replace mv command
* resolve some comments
* resolve more comments
* revert the llm_performance_test.yaml file 
							
						 
						
							2023-12-21 09:54:33 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zhao Changmin 
								
							 
						 
						
							
							
							
							
								
							
							
								4bda975a3e 
								
							 
						 
						
							
							
								
								LLM: Align lowbit model config ( #9735 )  
							
							 
							
							... 
							
							
							
							* align lowbit model config 
							
						 
						
							2023-12-21 09:48:58 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Wang, Jian4 
								
							 
						 
						
							
							
							
							
								
							
							
								e1e921f425 
								
							 
						 
						
							
							
								
								LLM: gguf other model using dtype ( #9729 )  
							
							 
							
							
							
						 
						
							2023-12-21 09:33:40 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								13ea6330bd 
								
							 
						 
						
							
							
								
								optimize qwen rope ( #9737 )  
							
							 
							
							
							
						 
						
							2023-12-20 17:34:34 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ziteng Zhang 
								
							 
						 
						
							
							
							
							
								
							
							
								4c032a433e 
								
							 
						 
						
							
							
								
								[LLM] Add glibc checker ( #9624 )  
							
							 
							
							... 
							
							
							
							* Add glibc checker
* Add env BIGDL_GLIBC_CHECK to control glibc checker. The default is false, i.e., don't check. 
							
						 
						
							2023-12-20 16:52:43 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yina Chen 
								
							 
						 
						
							
							
							
							
								
							
							
								cd652a1710 
								
							 
						 
						
							
							
								
								Support fp8 e5m2 on arc ( #9711 )  
							
							 
							
							... 
							
							
							
							* init
* fix style
* update
* fix style
* update 
							
						 
						
							2023-12-20 16:26:17 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								e54c428d30 
								
							 
						 
						
							
							
								
								add bf16/fp16 fuse mlp support ( #9726 )  
							
							 
							
							
							
						 
						
							2023-12-20 10:40:45 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Heyang Sun 
								
							 
						 
						
							
							
							
							
								
							
							
								612651cb5d 
								
							 
						 
						
							
							
								
								fix typo ( #9723 )  
							
							 
							
							
							
						 
						
							2023-12-20 09:41:59 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									WeiguangHan 
								
							 
						 
						
							
							
							
							
								
							
							
								3aa8b66bc3 
								
							 
						 
						
							
							
								
								LLM: remove starcoder-15.5b model temporarily ( #9720 )  
							
							 
							
							
							
						 
						
							2023-12-19 20:14:46 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								522cf5ed82 
								
							 
						 
						
							
							
								
								[LLM] Improve chatglm2/3 rest token performance with long context ( #9716 )  
							
							 
							
							
							
						 
						
							2023-12-19 17:29:38 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								f2e6abb563 
								
							 
						 
						
							
							
								
								fix mlp batch size check ( #9718 )  
							
							 
							
							
							
						 
						
							2023-12-19 14:22:22 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Heyang Sun 
								
							 
						 
						
							
							
							
							
								
							
							
								1fa7793fc0 
								
							 
						 
						
							
							
								
								Load Mixtral GGUF Model ( #9690 )  
							
							 
							
							... 
							
							
							
							* Load Mixtral GGUF Model
* refactor
* fix empty tensor when to cpu
* update gpu and cpu readmes
* add dtype when set tensor into module 
							
						 
						
							2023-12-19 13:54:38 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Qiyuan Gong 
								
							 
						 
						
							
							
							
							
								
							
							
								d0a3095b97 
								
							 
						 
						
							
							
								
								[LLM] IPEX auto importer ( #9706 )  
							
							 
							
							... 
							
							
							
							* IPEX auto importer and get_ipex_version.
* Add BIGDL_IMPORT_IPEX to control auto import, default is false. 
							
						 
						
							2023-12-19 13:39:38 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yang Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								f4fb58d99c 
								
							 
						 
						
							
							
								
								fusing qkv project and rope ( #9612 )  
							
							 
							
							... 
							
							
							
							* Try fusing qkv project and rope
* add fused mlp
* fuse append cache
* fix style and clean up code
* clean up 
							
						 
						
							2023-12-18 16:45:00 -08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Kai Huang 
								
							 
						 
						
							
							
							
							
								
							
							
								4c112ee70c 
								
							 
						 
						
							
							
								
								Rename qwen in model name for arc perf test ( #9712 )  
							
							 
							
							
							
						 
						
							2023-12-18 20:34:31 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Cengguang Zhang 
								
							 
						 
						
							
							
							
							
								
							
							
								4d22add4af 
								
							 
						 
						
							
							
								
								LLM: fix qwen efficiency issue in perf-test.  
							
							 
							
							
							
						 
						
							2023-12-18 18:32:54 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								8ed89557e5 
								
							 
						 
						
							
							
								
								LLM: add mlp optimization of mixtral ( #9709 )  
							
							 
							
							
							
						 
						
							2023-12-18 16:59:52 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Chen, Zhentao 
								
							 
						 
						
							
							
							
							
								
							
							
								b3647507c0 
								
							 
						 
						
							
							
								
								Fix harness workflow ( #9704 )  
							
							 
							
							... 
							
							
							
							* error when larger than 0.001
* fix env setup
* fix typo
* fix typo 
							
						 
						
							2023-12-18 15:42:10 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
							
							
								
							
							
								12df70953e 
								
							 
						 
						
							
							
								
								LLM: add resume_from_checkpoint related section ( #9705 )  
							
							 
							
							
							
						 
						
							2023-12-18 12:27:02 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xin Qiu 
								
							 
						 
						
							
							
							
							
								
							
							
								320110d158 
								
							 
						 
						
							
							
								
								handle empty fused norm result ( #9688 )  
							
							 
							
							... 
							
							
							
							* handle empty fused norm result
* remove fast_rms_norm
* fix style 
							
						 
						
							2023-12-18 09:56:11 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ziteng Zhang 
								
							 
						 
						
							
							
							
							
								
							
							
								67cc155771 
								
							 
						 
						
							
							
								
								[LLM] Correct chat format of llama and add llama_stream_chat in chat.py  
							
							 
							
							... 
							
							
							
							* correct chat format of llama
* add llama_stream_chat 
							
						 
						
							2023-12-15 16:36:46 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ziteng Zhang 
								
							 
						 
						
							
							
							
							
								
							
							
								0d41b7ba7b 
								
							 
						 
						
							
							
								
								[LLM] Correct chat format & add stop words for chatglm3 in chat.py  
							
							 
							
							... 
							
							
							
							* correct chat format of chatglm3
* correct stop words of chatglm3 
							
						 
						
							2023-12-15 16:35:17 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ziteng Zhang 
								
							 
						 
						
							
							
							
							
								
							
							
								d57efd8eb9 
								
							 
						 
						
							
							
								
								[LM] Add stop_word for Qwen model and correct qwen chat format in chat.py ( #9642 )  
							
							 
							
							... 
							
							
							
							* add stop words list for qwen
* change qwen chat format 
							
						 
						
							2023-12-15 14:53:58 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									SONG Ge 
								
							 
						 
						
							
							
							
							
								
							
							
								d5b81af7bd 
								
							 
						 
						
							
							
								
								Support mixtral attention optimization on transformers-v4.36.0 ( #9674 )  
							
							 
							
							... 
							
							
							
							* add example code to support mistral/mixtral attention on transformers v4.36.0
* update
* style fix
* add update for seen-tokens
* support mixtral
* rm mistral change
* small fix
* add more comments and remove use_cache part
---------
Co-authored-by: plusbang <binbin1.deng@intel.com> 
							
						 
						
							2023-12-15 14:30:23 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Cengguang Zhang 
								
							 
						 
						
							
							
							
							
								
							
							
								adbef56001 
								
							 
						 
						
							
							
								
								LLM: update qwen attention forward. ( #9695 )  
							
							 
							
							... 
							
							
							
							* feat: update qwen attention forward.
* fix: style. 
							
						 
						
							2023-12-15 14:06:15 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Wang, Jian4 
								
							 
						 
						
							
							
							
							
								
							
							
								b8437a1c1e 
								
							 
						 
						
							
							
								
								LLM: Add gguf mistral model support ( #9691 )  
							
							 
							
							... 
							
							
							
							* add mistral support
* need to upgrade transformers version
* update 
							
						 
						
							2023-12-15 13:37:39 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Wang, Jian4 
								
							 
						 
						
							
							
							
							
								
							
							
								496bb2e845 
								
							 
						 
						
							
							
								
								LLM: Support load BaiChuan model family gguf model ( #9685 )  
							
							 
							
							... 
							
							
							
							* support baichuan model family gguf model
* update gguf generate.py
* add verify models
* add support model_family
* update
* update style
* update type
* update readme
* update
* remove support model_family 
							
						 
						
							2023-12-15 13:34:33 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Lilac09 
								
							 
						 
						
							
							
							
							
								
							
							
								3afed99216 
								
							 
						 
						
							
							
								
								fix path issue ( #9696 )  
							
							 
							
							
							
						 
						
							2023-12-15 11:21:49 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jason Dai 
								
							 
						 
						
							
							
							
							
								
							
							
								37f509bb95 
								
							 
						 
						
							
							
								
								Update readme ( #9692 )  
							
							 
							
							
							
						 
						
							2023-12-14 19:50:21 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									WeiguangHan 
								
							 
						 
						
							
							
							
							
								
							
							
								1f0245039d 
								
							 
						 
						
							
							
								
								LLM: check the final csv results for arc perf test ( #9684 )  
							
							 
							
							... 
							
							
							
							* LLM: check the final csv results for arc perf test
* delete useless python script
* change threshold
* revert the llm_performance_tests.yml 
							
						 
						
							2023-12-14 19:46:08 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								9a330bfc2b 
								
							 
						 
						
							
							
								
								fix fuse mlp when using q5_0 or fp8 ( #9689 )  
							
							 
							
							
							
						 
						
							2023-12-14 16:16:05 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
							
							
								
							
							
								82ac2dbf55 
								
							 
						 
						
							
							
								
								[LLM] Small fixes for win igpu test for ipex 2.1 ( #9686 )  
							
							 
							
							... 
							
							
							
							* Fixes to install for igpu performance tests
* Small update for core performance tests model lists 
							
						 
						
							2023-12-14 15:39:51 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									WeiguangHan 
								
							 
						 
						
							
							
							
							
								
							
							
								3e8d198b57 
								
							 
						 
						
							
							
								
								LLM: add eval func ( #9662 )  
							
							 
							
							... 
							
							
							
							* Add eval func
* add left eval 
							
						 
						
							2023-12-14 14:59:02 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ziteng Zhang 
								
							 
						 
						
							
							
							
							
								
							
							
								21c7503a42 
								
							 
						 
						
							
							
								
								[LLM] Correct prompt format of Qwen in generate.py ( #9678 )  
							
							 
							
							... 
							
							
							
							* Change qwen prompt format to chatml 
							
						 
						
							2023-12-14 14:01:30 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Qiyuan Gong 
								
							 
						 
						
							
							
							
							
								
							
							
								223c9622f7 
								
							 
						 
						
							
							
								
								[LLM] Mixtral CPU examples ( #9673 )  
							
							 
							
							... 
							
							
							
							* Mixtral CPU PyTorch and hugging face examples, based on #9661  and #9671  
							
						 
						
							2023-12-14 10:35:11 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xin Qiu 
								
							 
						 
						
							
							
							
							
								
							
							
								5e46e0e5af 
								
							 
						 
						
							
							
								
								fix baichuan2-7b 1st token performance regression on xpu ( #9683 )  
							
							 
							
							... 
							
							
							
							* fix baichuan2-7b 1st token performance regression
* add comments
* fix style 
							
						 
						
							2023-12-14 09:58:32 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									ZehuaCao 
								
							 
						 
						
							
							
							
							
								
							
							
								877229f3be 
								
							 
						 
						
							
							
								
								[LLM]Add Yi-34B-AWQ to verified AWQ model. ( #9676 )  
							
							 
							
							... 
							
							
							
							* verfiy Yi-34B-AWQ
* update 
							
						 
						
							2023-12-14 09:55:47 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
							
							
								
							
							
								68a4be762f 
								
							 
						 
						
							
							
								
								remove disco mixtral, update oneapi version ( #9671 )  
							
							 
							
							
							
						 
						
							2023-12-13 23:24:59 +08:00