JIN Qiao 
								
							 
						 
						
							
							
							
							
								
							
							
								d74834ff4c 
								
							 
						 
						
							
							
								
								LLM: add gpu pytorch-models example llama2 and chatglm2 ( #9142 )  
							
							 
							
							
							
						 
						
							2023-10-12 13:41:48 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								4f34557224 
								
							 
						 
						
							
							
								
								LLM: support num_beams in all-in-one benchmark ( #9141 )  
							
							 
							
							... 
							
							
							
							* support num_beams
* fix 
							
						 
						
							2023-10-12 13:35:12 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								62ac7ae444 
								
							 
						 
						
							
							
								
								LLM: fix inaccurate input / output tokens of current all-in-one benchmark ( #9137 )  
							
							 
							
							... 
							
							
							
							* first fix
* fix all apis
* fix 
							
						 
						
							2023-10-11 17:13:34 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
							
							
								
							
							
								eb3fb18eb4 
								
							 
						 
						
							
							
								
								LLM: improve PyTorch API doc ( #9128 )  
							
							 
							
							
							
						 
						
							2023-10-11 15:03:39 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
							
							
								
							
							
								995b0f119f 
								
							 
						 
						
							
							
								
								LLM: update some gpu examples ( #9136 )  
							
							 
							
							
							
						 
						
							2023-10-11 14:23:56 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								1c8d5da362 
								
							 
						 
						
							
							
								
								LLM: fix llama tokenizer for all-in-one benchmark ( #9129 )  
							
							 
							
							... 
							
							
							
							* fix tokenizer for gpu benchmark
* fix ipex fp16
* meet code review
* fix 
							
						 
						
							2023-10-11 13:39:39 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
							
							
								
							
							
								2ad67a18b1 
								
							 
						 
						
							
							
								
								LLM: add mistral examples ( #9121 )  
							
							 
							
							
							
						 
						
							2023-10-11 13:38:15 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								1363e666fc 
								
							 
						 
						
							
							
								
								LLM: update benchmark_util.py for beam search ( #9126 )  
							
							 
							
							... 
							
							
							
							* update reorder_cache
* fix 
							
						 
						
							2023-10-11 09:41:53 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Guoqiong Song 
								
							 
						 
						
							
							
							
							
								
							
							
								e8c5645067 
								
							 
						 
						
							
							
								
								add LLM example of aquila on GPU ( #9056 )  
							
							 
							
							... 
							
							
							
							* aquila, dolly-v1, dolly-v2, vacuna 
							
						 
						
							2023-10-10 17:01:35 -07:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								388f688ef3 
								
							 
						 
						
							
							
								
								LLM: update setup.py to add bigdl-core-xe package ( #9122 )  
							
							 
							
							
							
						 
						
							2023-10-10 15:02:48 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zhao Changmin 
								
							 
						 
						
							
							
							
							
								
							
							
								1709beba5b 
								
							 
						 
						
							
							
								
								LLM: Explicitly close pickle file pointer before removing temporary directory ( #9120 )  
							
							 
							
							... 
							
							
							
							* fp close 
							
						 
						
							2023-10-10 14:57:23 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
							
							
								
							
							
								0e09dd926b 
								
							 
						 
						
							
							
								
								[LLM] Fix example test ( #9118 )  
							
							 
							
							... 
							
							
							
							* Update llm example test link due to example layout change
* Add better change detect 
							
						 
						
							2023-10-10 13:24:18 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								ad7d9231f5 
								
							 
						 
						
							
							
								
								LLM: add benchmark script for Max gpu and ipex fp16 gpu ( #9112 )  
							
							 
							
							... 
							
							
							
							* add pvc bash
* meet code review
* rename to run-max-gpu.sh 
							
						 
						
							2023-10-10 10:18:41 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
							
							
								
							
							
								e4d1457a70 
								
							 
						 
						
							
							
								
								LLM: improve transformers style API doc ( #9113 )  
							
							 
							
							
							
						 
						
							2023-10-10 09:31:00 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
							
							
								
							
							
								65212451cc 
								
							 
						 
						
							
							
								
								[LLM] Small update to performance tests ( #9106 )  
							
							 
							
							... 
							
							
							
							* small updates to llm performance tests regarding model handling
* Small fix 
							
						 
						
							2023-10-09 16:55:25 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zhao Changmin 
								
							 
						 
						
							
							
							
							
								
							
							
								edccfb2ed3 
								
							 
						 
						
							
							
								
								LLM: Check model device type ( #9092 )  
							
							 
							
							... 
							
							
							
							* check model device 
							
						 
						
							2023-10-09 15:49:15 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
							
							
								
							
							
								5e9962b60e 
								
							 
						 
						
							
							
								
								LLM: update example layout ( #9046 )  
							
							 
							
							
							
						 
						
							2023-10-09 15:36:39 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yina Chen 
								
							 
						 
						
							
							
							
							
								
							
							
								4c4f8d1663 
								
							 
						 
						
							
							
								
								[LLM]Fix Arc falcon abnormal output issue ( #9096 )  
							
							 
							
							... 
							
							
							
							* update
* update
* fix error & style
* fix style
* update train
* to input_seq_size 
							
						 
						
							2023-10-09 15:09:37 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zhao Changmin 
								
							 
						 
						
							
							
							
							
								
							
							
								548e4dd5fe 
								
							 
						 
						
							
							
								
								LLM: Adapt transformers models for optimize model SL ( #9022 )  
							
							 
							
							... 
							
							
							
							* LLM: Adapt transformers model for SL 
							
						 
						
							2023-10-09 11:13:44 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								f64257a093 
								
							 
						 
						
							
							
								
								LLM: basic api support for esimd fp16 ( #9067 )  
							
							 
							
							... 
							
							
							
							* basic api support for fp16
* fix style
* fix
* fix error and style
* fix style
* meet code review
* update based on comments 
							
						 
						
							2023-10-09 11:05:17 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									JIN Qiao 
								
							 
						 
						
							
							
							
							
								
							
							
								65373d2a8b 
								
							 
						 
						
							
							
								
								LLM: adjust portable zip content ( #9054 )  
							
							 
							
							... 
							
							
							
							* LLM: adjust portable zip content
* LLM: adjust portable zip README 
							
						 
						
							2023-10-09 10:51:19 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xin Qiu 
								
							 
						 
						
							
							
							
							
								
							
							
								b3e94a32d4 
								
							 
						 
						
							
							
								
								change log4error import ( #9098 )  
							
							 
							
							
							
						 
						
							2023-10-08 09:23:28 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Kai Huang 
								
							 
						 
						
							
							
							
							
								
							
							
								78ea7ddb1c 
								
							 
						 
						
							
							
								
								Combine apply_rotary_pos_emb for gpt-neox ( #9074 )  
							
							 
							
							
							
						 
						
							2023-10-07 16:27:46 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yang Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								36dd4afd61 
								
							 
						 
						
							
							
								
								Fix llama when rope scaling is not None ( #9086 )  
							
							 
							
							... 
							
							
							
							* Fix llama when rope scaling is not None
* fix style
* fix style 
							
						 
						
							2023-10-06 13:27:37 -07:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yang Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								fcb1c618a0 
								
							 
						 
						
							
							
								
								using bigdl-llm fused rope for llama ( #9066 )  
							
							 
							
							... 
							
							
							
							* optimize llama xpu rope
* fix bug
* fix style
* refine append cache
* remove check
* do not cache cos sin
* remove unnecessary changes
* clean up
* fix style
* check for training 
							
						 
						
							2023-10-06 09:57:29 -07:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jiao Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								aefa5a5bfe 
								
							 
						 
						
							
							
								
								Qwen kv cache ( #9079 )  
							
							 
							
							... 
							
							
							
							* qwen and aquila
* update
* update
* style 
							
						 
						
							2023-10-05 11:59:17 -07:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jiao Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								d5ca1f32b6 
								
							 
						 
						
							
							
								
								Aquila KV cache optimization ( #9080 )  
							
							 
							
							... 
							
							
							
							* update
* update
* style 
							
						 
						
							2023-10-05 11:10:57 -07:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yang Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								88565c76f6 
								
							 
						 
						
							
							
								
								add export merged model example ( #9018 )  
							
							 
							
							... 
							
							
							
							* add export merged model example
* add sources
* add script
* fix style 
							
						 
						
							2023-10-04 21:18:52 -07:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yang Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								0cd8f1c79c 
								
							 
						 
						
							
							
								
								Use ipex fused rms norm for llama ( #9081 )  
							
							 
							
							... 
							
							
							
							* also apply rmsnorm
* fix cpu 
							
						 
						
							2023-10-04 21:04:55 -07:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Cengguang Zhang 
								
							 
						 
						
							
							
							
							
								
							
							
								fb883100e7 
								
							 
						 
						
							
							
								
								LLM: support chatglm-18b convert attention forward in benchmark scripts. ( #9072 )  
							
							 
							
							... 
							
							
							
							* add chatglm-18b convert.
* fix if statement.
* fix 
							
						 
						
							2023-09-28 14:04:52 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								6de2189e90 
								
							 
						 
						
							
							
								
								[LLM] fix chatglm main choice ( #9073 )  
							
							 
							
							
							
						 
						
							2023-09-28 11:23:37 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Cengguang Zhang 
								
							 
						 
						
							
							
							
							
								
							
							
								ad62c58b33 
								
							 
						 
						
							
							
								
								LLM: Enable jemalloc in benchmark scripts. ( #9058 )  
							
							 
							
							... 
							
							
							
							* enable jemalloc.
* fix readme. 
							
						 
						
							2023-09-26 15:37:49 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Cengguang Zhang 
								
							 
						 
						
							
							
							
							
								
							
							
								b4a1266ef0 
								
							 
						 
						
							
							
								
								[WIP] LLM: add kv cache support for internlm. ( #9036 )  
							
							 
							
							... 
							
							
							
							* LLM: add kv cache support for internlm
* add internlm apply_rotary_pos_emb
* fix.
* fix style. 
							
						 
						
							2023-09-25 14:16:59 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								975da86e00 
								
							 
						 
						
							
							
								
								LLM: fix gptneox kv cache ( #9044 )  
							
							 
							
							
							
						 
						
							2023-09-25 13:03:57 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Cengguang Zhang 
								
							 
						 
						
							
							
							
							
								
							
							
								26213a5829 
								
							 
						 
						
							
							
								
								LLM: Change benchmark bf16 load format. ( #9035 )  
							
							 
							
							... 
							
							
							
							* LLM: Change benchmark bf16 load format.
* comment on bf16 chatglm.
* fix. 
							
						 
						
							2023-09-22 17:38:38 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									JinBridge 
								
							 
						 
						
							
							
							
							
								
							
							
								023555fb1f 
								
							 
						 
						
							
							
								
								LLM: Add one-click installer for Windows ( #8999 )  
							
							 
							
							... 
							
							
							
							* LLM: init one-click installer for windows
* LLM: fix typo in one-click installer readme
* LLM: one-click installer try except logic
* LLM: one-click installer add dependency
* LLM: one-click installer adjust README.md
* LLM: one-click installer split README and add zip compress in setup.bat
* LLM: one-click installer verified internlm and llama2 and replace gif
* LLM: remove one-click installer images
* LLM: finetune the one-click installer README.md
* LLM: fix typo in one-click installer README.md
* LLM: rename one-click installer to protable executable
* LLM: rename other places to protable executable
* LLM: rename the zip filename to executable
* LLM: update .gitignore
* LLM: add colorama to setup.bat 
							
						 
						
							2023-09-22 14:46:30 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jiao Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								028a6d9383 
								
							 
						 
						
							
							
								
								MPT model optimize for long sequence ( #9020 )  
							
							 
							
							... 
							
							
							
							* mpt_long_seq
* update
* update
* update
* style
* style2
* update 
							
						 
						
							2023-09-21 21:27:23 -07:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								b943d73844 
								
							 
						 
						
							
							
								
								LLM: refactor kv cache ( #9030 )  
							
							 
							
							... 
							
							
							
							* refactor utils
* meet code review; update all models
* small fix 
							
						 
						
							2023-09-21 21:28:03 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Cengguang Zhang 
								
							 
						 
						
							
							
							
							
								
							
							
								868511cf02 
								
							 
						 
						
							
							
								
								LLM: fix kv cache issue of bloom and falcon. ( #9029 )  
							
							 
							
							
							
						 
						
							2023-09-21 18:12:20 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								bf51ec40b2 
								
							 
						 
						
							
							
								
								LLM: Fix empty cache ( #9024 )  
							
							 
							
							... 
							
							
							
							* fix
* fix
* update example 
							
						 
						
							2023-09-21 17:16:07 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yina Chen 
								
							 
						 
						
							
							
							
							
								
							
							
								714884414e 
								
							 
						 
						
							
							
								
								fix error ( #9025 )  
							
							 
							
							
							
						 
						
							2023-09-21 16:42:11 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
							
							
								
							
							
								edb225530b 
								
							 
						 
						
							
							
								
								add bark ( #9016 )  
							
							 
							
							
							
						 
						
							2023-09-21 12:24:58 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									SONG Ge 
								
							 
						 
						
							
							
							
							
								
							
							
								fa47967583 
								
							 
						 
						
							
							
								
								[LLM] Optimize kv_cache for gptj model family ( #9010 )  
							
							 
							
							... 
							
							
							
							* optimize gptj model family attention
* add license and comment for dolly-model
* remove xpu mentioned
* remove useless info
* code sytle
* style fix
* code style in gptj fix
* remove gptj arch
* move apply_rotary_pos_emb into utils
* kv_seq_length update
* use hidden_states instead of query layer to reach batch size 
							
						 
						
							2023-09-21 10:42:08 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Cengguang Zhang 
								
							 
						 
						
							
							
							
							
								
							
							
								b3cad7de57 
								
							 
						 
						
							
							
								
								LLM: add bloom kv cache support ( #9012 )  
							
							 
							
							... 
							
							
							
							* LLM: add bloom kv cache support
* fix style. 
							
						 
						
							2023-09-20 21:10:53 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Kai Huang 
								
							 
						 
						
							
							
							
							
								
							
							
								156af15d1e 
								
							 
						 
						
							
							
								
								Add NF3 ( #9008 )  
							
							 
							
							... 
							
							
							
							* add nf3
* grammar 
							
						 
						
							2023-09-20 20:03:07 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Kai Huang 
								
							 
						 
						
							
							
							
							
								
							
							
								6981745fe4 
								
							 
						 
						
							
							
								
								Optimize kv_cache for gpt-neox model family ( #9015 )  
							
							 
							
							... 
							
							
							
							* override gptneox
* style
* move to utils
* revert 
							
						 
						
							2023-09-20 19:59:19 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									JinBridge 
								
							 
						 
						
							
							
							
							
								
							
							
								48b503c630 
								
							 
						 
						
							
							
								
								LLM: add example of aquila ( #9006 )  
							
							 
							
							... 
							
							
							
							* LLM: add example of aquila
* LLM: replace AquilaChat with Aquila
* LLM: shorten prompt of aquila example 
							
						 
						
							2023-09-20 15:52:56 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Cengguang Zhang 
								
							 
						 
						
							
							
							
							
								
							
							
								735a17f7b4 
								
							 
						 
						
							
							
								
								LLM: add kv cache to falcon family. ( #8995 )  
							
							 
							
							... 
							
							
							
							* add kv cache to falcon family.
* fix: import error.
* refactor
* update comments.
* add two version falcon attention forward.
* fix
* fix.
* fix.
* fix.
* fix style.
* fix style. 
							
						 
						
							2023-09-20 15:36:30 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								94a7f8917b 
								
							 
						 
						
							
							
								
								LLM: fix optimized kv cache for baichuan-13b ( #9009 )  
							
							 
							
							... 
							
							
							
							* fix baichuan 13b
* fix style
* fix
* fix style 
							
						 
						
							2023-09-20 15:30:14 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yang Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								c88f6ec457 
								
							 
						 
						
							
							
								
								Experiment XPU QLora Finetuning ( #8937 )  
							
							 
							
							... 
							
							
							
							* Support xpu finetuning
* support xpu finetuning
* fix style
* fix style
* fix style
* refine example
* add readme
* refine readme
* refine api
* fix fp16
* fix example
* refactor
* fix style
* fix compute type
* add qlora
* refine training args
* fix example
* fix style
* fast path forinference
* address comments
* refine readme
* revert lint 
							
						 
						
							2023-09-19 10:15:44 -07:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jason Dai 
								
							 
						 
						
							
							
							
							
								
							
							
								51518e029d 
								
							 
						 
						
							
							
								
								Update llm readme ( #9005 )  
							
							 
							
							
							
						 
						
							2023-09-19 20:01:33 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								249386261c 
								
							 
						 
						
							
							
								
								LLM: add Baichuan2 cpu example ( #9002 )  
							
							 
							
							... 
							
							
							
							* add baichuan2 cpu examples
* add link
* update prompt 
							
						 
						
							2023-09-19 18:08:30 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								004c45c2be 
								
							 
						 
						
							
							
								
								LLM: Support optimized kv_cache for baichuan family ( #8997 )  
							
							 
							
							... 
							
							
							
							* add initial support for baichuan attantion
* support baichuan1
* update based on comment
* update based on comment
* support baichuan2
* update link, change how to jusge baichuan2
* fix style
* add model parameter for pob emb
* update based on comment 
							
						 
						
							2023-09-19 15:38:54 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xin Qiu 
								
							 
						 
						
							
							
							
							
								
							
							
								37bb0cbf8f 
								
							 
						 
						
							
							
								
								Speed up gpt-j in gpubenchmark ( #9000 )  
							
							 
							
							... 
							
							
							
							* Speedup gpt-j in gpubenchmark
* meet code review 
							
						 
						
							2023-09-19 14:22:28 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zhao Changmin 
								
							 
						 
						
							
							
							
							
								
							
							
								2a05581da7 
								
							 
						 
						
							
							
								
								LLM: Apply low_cpu_mem_usage algorithm on optimize_model API ( #8987 )  
							
							 
							
							... 
							
							
							
							* low_cpu_mem_usage 
							
						 
						
							2023-09-18 21:41:42 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Cengguang Zhang 
								
							 
						 
						
							
							
							
							
								
							
							
								8299b68fea 
								
							 
						 
						
							
							
								
								update readme. ( #8996 )  
							
							 
							
							
							
						 
						
							2023-09-18 17:06:15 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
							
							
								
							
							
								c1d25a51a8 
								
							 
						 
						
							
							
								
								LLM: add optimize_model example for bert ( #8975 )  
							
							 
							
							
							
						 
						
							2023-09-18 16:18:35 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Cengguang Zhang 
								
							 
						 
						
							
							
							
							
								
							
							
								74338fd291 
								
							 
						 
						
							
							
								
								LLM: add auto torch dtype in benchmark. ( #8981 )  
							
							 
							
							
							
						 
						
							2023-09-18 15:48:25 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								cabe7c0358 
								
							 
						 
						
							
							
								
								LLM: add baichuan2 example for arc ( #8994 )  
							
							 
							
							... 
							
							
							
							* add baichuan2 examples
* add link
* small fix 
							
						 
						
							2023-09-18 14:32:27 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
							
							
								
							
							
								0a552d5bdc 
								
							 
						 
						
							
							
								
								LLM: fix installation on windows ( #8989 )  
							
							 
							
							
							
						 
						
							2023-09-18 11:14:54 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								32716106e0 
								
							 
						 
						
							
							
								
								update use_cahce=True ( #8986 )  
							
							 
							
							
							
						 
						
							2023-09-18 07:59:33 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xin Qiu 
								
							 
						 
						
							
							
							
							
								
							
							
								64ee1d7689 
								
							 
						 
						
							
							
								
								update run_transformer_int4_gpu ( #8983 )  
							
							 
							
							... 
							
							
							
							* xpuperf
* update run.py
* clean upo
* uodate
* update
* meet code review 
							
						 
						
							2023-09-15 15:10:04 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zhao Changmin 
								
							 
						 
						
							
							
							
							
								
							
							
								16b9412e80 
								
							 
						 
						
							
							
								
								tie_word_embeddings ( #8977 )  
							
							 
							
							... 
							
							
							
							tie_word_embeddings 
							
						 
						
							2023-09-15 10:17:09 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									JinBridge 
								
							 
						 
						
							
							
							
							
								
							
							
								c12b8f24b6 
								
							 
						 
						
							
							
								
								LLM: add use_cache=True for all gpu examples ( #8971 )  
							
							 
							
							
							
						 
						
							2023-09-15 09:54:38 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Guancheng Fu 
								
							 
						 
						
							
							
							
							
								
							
							
								d1b62ef2f2 
								
							 
						 
						
							
							
								
								[bigdl-llm] Remove serving-dep from all_requires ( #8980 )  
							
							 
							
							... 
							
							
							
							* Remove serving-dep from all_requires
* pin fastchat version 
							
						 
						
							2023-09-14 16:59:24 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								bcf456070c 
								
							 
						 
						
							
							
								
								fix bloom-176b int overflow ( #8973 )  
							
							 
							
							
							
						 
						
							2023-09-14 14:37:57 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								dd57623650 
								
							 
						 
						
							
							
								
								LLM: reduce GPU memory for optimize_model=True ( #8965 )  
							
							 
							
							... 
							
							
							
							* reduce gpu memory for llama & chatglm
* change to device type 
							
						 
						
							2023-09-13 17:27:09 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
							
							
								
							
							
								be29c75c18 
								
							 
						 
						
							
							
								
								LLM: refactor gpu examples ( #8963 )  
							
							 
							
							... 
							
							
							
							* restructure
* change to hf-transformers-models/ 
							
						 
						
							2023-09-13 14:47:47 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Cengguang Zhang 
								
							 
						 
						
							
							
							
							
								
							
							
								cca84b0a64 
								
							 
						 
						
							
							
								
								LLM: update llm benchmark scripts. ( #8943 )  
							
							 
							
							... 
							
							
							
							* update llm benchmark scripts.
* change tranformer_bf16 to pytorch_autocast_bf16.
* add autocast in transformer int4.
* revert autocast.
* add "pytorch_autocast_bf16" to doc
* fix comments. 
							
						 
						
							2023-09-13 12:23:28 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									SONG Ge 
								
							 
						 
						
							
							
							
							
								
							
							
								7132ef6081 
								
							 
						 
						
							
							
								
								[LLM Doc] Add optimize_model doc in transformers api ( #8957 )  
							
							 
							
							... 
							
							
							
							* add optimize in from_pretrained
* add api doc for load_low_bit
* update api docs following comments
* update api docs
* update
* reord comments 
							
						 
						
							2023-09-13 10:42:33 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zhao Changmin 
								
							 
						 
						
							
							
							
							
								
							
							
								c32c260ce2 
								
							 
						 
						
							
							
								
								LLM: Add save/load API in optimize_model to support general pytorch model ( #8956 )  
							
							 
							
							... 
							
							
							
							* support hf format SL 
							
						 
						
							2023-09-13 10:22:00 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								4de73f592e 
								
							 
						 
						
							
							
								
								LLM: add gpu example of chinese-llama-2-7b ( #8960 )  
							
							 
							
							... 
							
							
							
							* add gpu example of chinese -llama2
* update model name and link
* update name 
							
						 
						
							2023-09-13 10:16:51 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Guancheng Fu 
								
							 
						 
						
							
							
							
							
								
							
							
								0bf5857908 
								
							 
						 
						
							
							
								
								[LLM] Integrate FastChat as a serving framework for BigDL-LLM ( #8821 )  
							
							 
							
							... 
							
							
							
							* Finish changing
* format
* add licence
* Add licence
* fix
* fix
* Add xpu support for fschat
* Fix patch
* Also install webui dependencies
* change setup.py dependency installs
* fiox
* format
* final test 
							
						 
						
							2023-09-13 09:28:05 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
							
							
								
							
							
								cb534ed5c4 
								
							 
						 
						
							
							
								
								[LLM] Add Arc demo gif to readme and readthedocs ( #8958 )  
							
							 
							
							... 
							
							
							
							* Add arc demo in main readme
* Small style fix
* Realize using table
* Update based on comments
* Small update
* Try to solve with height problem
* Small fix
* Update demo for inner llm readme
* Update demo video for readthedocs
* Small fix
* Update based on comments 
							
						 
						
							2023-09-13 09:23:52 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zhao Changmin 
								
							 
						 
						
							
							
							
							
								
							
							
								dcaa4dc130 
								
							 
						 
						
							
							
								
								LLM: Support GQA on llama kvcache ( #8938 )  
							
							 
							
							... 
							
							
							
							* support GQA 
							
						 
						
							2023-09-12 12:18:40 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
							
							
								
							
							
								2d81521019 
								
							 
						 
						
							
							
								
								LLM: add optimize_model examples for llama2 and chatglm  ( #8894 )  
							
							 
							
							... 
							
							
							
							* add llama2 and chatglm optimize_model examples
* update default usage
* update command and some descriptions
* move folder and remove general_int4 descriptions
* change folder name 
							
						 
						
							2023-09-12 10:36:29 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zhao Changmin 
								
							 
						 
						
							
							
							
							
								
							
							
								f00c442d40 
								
							 
						 
						
							
							
								
								fix accelerate ( #8946 )  
							
							 
							
							... 
							
							
							
							Co-authored-by: leonardozcm <leonardozcm@gmail.com> 
							
						 
						
							2023-09-12 09:27:58 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yang Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								16761c58be 
								
							 
						 
						
							
							
								
								Make llama attention stateless ( #8928 )  
							
							 
							
							... 
							
							
							
							* Make llama attention stateless
* fix style
* fix chatglm
* fix chatglm xpu 
							
						 
						
							2023-09-11 18:21:50 -07:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zhao Changmin 
								
							 
						 
						
							
							
							
							
								
							
							
								e62eda74b8 
								
							 
						 
						
							
							
								
								refine ( #8912 )  
							
							 
							
							... 
							
							
							
							Co-authored-by: leonardozcm <leonardozcm@gmail.com> 
							
						 
						
							2023-09-11 16:40:33 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yina Chen 
								
							 
						 
						
							
							
							
							
								
							
							
								df165ad165 
								
							 
						 
						
							
							
								
								init ( #8933 )  
							
							 
							
							
							
						 
						
							2023-09-11 14:30:55 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								b3f5dd5b5d 
								
							 
						 
						
							
							
								
								LLM: update q8 convert xpu&cpu ( #8930 )  
							
							 
							
							
							
						 
						
							2023-09-08 16:01:17 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yina Chen 
								
							 
						 
						
							
							
							
							
								
							
							
								33d75adadf 
								
							 
						 
						
							
							
								
								[LLM]Support q5_0 on arc ( #8926 )  
							
							 
							
							... 
							
							
							
							* support q5_0
* delete
* fix style 
							
						 
						
							2023-09-08 15:52:36 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
							
							
								
							
							
								ca35c93825 
								
							 
						 
						
							
							
								
								[LLM] Fix langchain UT ( #8929 )  
							
							 
							
							... 
							
							
							
							* Change dependency version for langchain uts
* Downgrade pandas version instead; and update example readme accordingly 
							
						 
						
							2023-09-08 13:51:04 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xin Qiu 
								
							 
						 
						
							
							
							
							
								
							
							
								ea0853c0b5 
								
							 
						 
						
							
							
								
								update benchmark_utils readme ( #8925 )  
							
							 
							
							... 
							
							
							
							* update readme
* meet code review 
							
						 
						
							2023-09-08 10:30:26 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yang Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								ee98cdd85c 
								
							 
						 
						
							
							
								
								Support latest transformer version ( #8923 )  
							
							 
							
							... 
							
							
							
							* Support latest transformer version
* fix style 
							
						 
						
							2023-09-07 19:01:32 -07:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yang Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								25428b22b4 
								
							 
						 
						
							
							
								
								Fix chatglm2 attention and kv cache ( #8924 )  
							
							 
							
							... 
							
							
							
							* fix chatglm2 attention
* fix bf16 bug
* make model stateless
* add utils
* cleanup
* fix style 
							
						 
						
							2023-09-07 18:54:29 -07:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yina Chen 
								
							 
						 
						
							
							
							
							
								
							
							
								b209b8f7b6 
								
							 
						 
						
							
							
								
								[LLM] Fix arc qtype != q4_0 generate issue ( #8920 )  
							
							 
							
							... 
							
							
							
							* Fix arc precision!=q4_0 generate issue
* meet comments 
							
						 
						
							2023-09-07 08:56:36 -07:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Cengguang Zhang 
								
							 
						 
						
							
							
							
							
								
							
							
								3d2efe9608 
								
							 
						 
						
							
							
								
								LLM: update llm latency benchmark. ( #8922 )  
							
							 
							
							
							
						 
						
							2023-09-07 19:00:19 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
							
							
								
							
							
								7897eb4b51 
								
							 
						 
						
							
							
								
								LLM: add benchmark scripts on GPU ( #8916 )  
							
							 
							
							
							
						 
						
							2023-09-07 18:08:17 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xin Qiu 
								
							 
						 
						
							
							
							
							
								
							
							
								d8a01d7c4f 
								
							 
						 
						
							
							
								
								fix chatglm in run.pu ( #8919 )  
							
							 
							
							
							
						 
						
							2023-09-07 16:44:10 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xin Qiu 
								
							 
						 
						
							
							
							
							
								
							
							
								e9de9d9950 
								
							 
						 
						
							
							
								
								benchmark for native int4  ( #8918 )  
							
							 
							
							... 
							
							
							
							* native4
* update
* update
* update 
							
						 
						
							2023-09-07 15:56:15 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								c0797ea232 
								
							 
						 
						
							
							
								
								LLM: update setup to specify bigdl-core-xe version ( #8913 )  
							
							 
							
							
							
						 
						
							2023-09-07 15:11:55 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								057e77e229 
								
							 
						 
						
							
							
								
								LLM: update benchmark_utils.py to handle do_sample=True ( #8903 )  
							
							 
							
							
							
						 
						
							2023-09-07 14:20:47 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yang Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								c34400e6b0 
								
							 
						 
						
							
							
								
								Use new layout for xpu qlinear ( #8896 )  
							
							 
							
							... 
							
							
							
							* use new layout for xpu qlinear
* fix style 
							
						 
						
							2023-09-06 21:55:33 -07:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zhao Changmin 
								
							 
						 
						
							
							
							
							
								
							
							
								8bc1d8a17c 
								
							 
						 
						
							
							
								
								LLM: Fix discards in optimize_model with non-hf models  and add openai whisper example ( #8877 )  
							
							 
							
							... 
							
							
							
							* openai-whisper 
							
						 
						
							2023-09-07 10:35:59 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xin Qiu 
								
							 
						 
						
							
							
							
							
								
							
							
								5d9942a3ca 
								
							 
						 
						
							
							
								
								transformer int4 and native int4's benchmark script for 32 256 1k 2k input ( #8871 )  
							
							 
							
							... 
							
							
							
							* transformer
* move
* update
* add header
* update all-in-one
* clean up 
							
						 
						
							2023-09-07 09:49:55 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yina Chen 
								
							 
						 
						
							
							
							
							
								
							
							
								bfc71fbc15 
								
							 
						 
						
							
							
								
								Add known issue in arc voice assistant example ( #8902 )  
							
							 
							
							... 
							
							
							
							* add known issue in voice assistant example
* update cpu 
							
						 
						
							2023-09-07 09:28:26 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
							
							
								
							
							
								db26c7b84d 
								
							 
						 
						
							
							
								
								[LLM] Update readme gif & image url to the ones hosted on readthedocs ( #8900 )  
							
							 
							
							
							
						 
						
							2023-09-06 20:04:17 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									SONG Ge 
								
							 
						 
						
							
							
							
							
								
							
							
								7a71ced78f 
								
							 
						 
						
							
							
								
								[LLM Docs] Remain API Docs Issues Solution ( #8780 )  
							
							 
							
							... 
							
							
							
							* langchain readthedocs update
* solve langchain.llms.transformersllm issues
* langchain.embeddings.transformersembeddings/transfortmersllms issues
* update docs for get_num_tokens
* add low_bit api doc
* add optimizer model api doc
* update rst index
* fix coomments style
* update docs following the comments
* update api doc 
							
						 
						
							2023-09-06 16:29:34 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xin Qiu 
								
							 
						 
						
							
							
							
							
								
							
							
								49a39452c6 
								
							 
						 
						
							
							
								
								update benchmark ( #8899 )  
							
							 
							
							
							
						 
						
							2023-09-06 15:11:43 +08:00