Cengguang Zhang 
								
							 
						 
						
							
							
							
							
								
							
							
								51a133de56 
								
							 
						 
						
							
							
								
								LLM: add fuse rope and norm optimization for Baichuan. ( #9166 )  
							
							 
							
							... 
							
							
							
							* add fuse rope optimization.
* add rms norm optimization. 
							
						 
						
							2023-10-13 17:36:52 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Cengguang Zhang 
								
							 
						 
						
							
							
							
							
								
							
							
								433f408081 
								
							 
						 
						
							
							
								
								LLM: Add fuse rope and norm optimization for Aquila. ( #9161 )  
							
							 
							
							... 
							
							
							
							* add fuse norm optimization.
* add fuse rope optimization 
							
						 
						
							2023-10-13 14:18:37 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									SONG Ge 
								
							 
						 
						
							
							
							
							
								
							
							
								e7aa67e141 
								
							 
						 
						
							
							
								
								[LLM] Add rope  optimization for internlm ( #9159 )  
							
							 
							
							... 
							
							
							
							* add rope and norm optimization for internlm and gptneox
* revert gptneox back and split with pr#9155 #
* add norm_forward
* style fix
* update
* update 
							
						 
						
							2023-10-13 14:18:28 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								b8aee7bb1b 
								
							 
						 
						
							
							
								
								LLM: Fix Qwen kv_cache optimization ( #9148 )  
							
							 
							
							... 
							
							
							
							* first commit
* ut pass
* accelerate rotate half by using common util function
* fix style 
							
						 
						
							2023-10-12 15:49:42 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
							
							
								
							
							
								69942d3826 
								
							 
						 
						
							
							
								
								LLM: fix model check before attention optimization ( #9149 )  
							
							 
							
							
							
						 
						
							2023-10-12 15:21:51 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
							
							
								
							
							
								eb3fb18eb4 
								
							 
						 
						
							
							
								
								LLM: improve PyTorch API doc ( #9128 )  
							
							 
							
							
							
						 
						
							2023-10-11 15:03:39 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zhao Changmin 
								
							 
						 
						
							
							
							
							
								
							
							
								1709beba5b 
								
							 
						 
						
							
							
								
								LLM: Explicitly close pickle file pointer before removing temporary directory ( #9120 )  
							
							 
							
							... 
							
							
							
							* fp close 
							
						 
						
							2023-10-10 14:57:23 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
							
							
								
							
							
								e4d1457a70 
								
							 
						 
						
							
							
								
								LLM: improve transformers style API doc ( #9113 )  
							
							 
							
							
							
						 
						
							2023-10-10 09:31:00 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zhao Changmin 
								
							 
						 
						
							
							
							
							
								
							
							
								edccfb2ed3 
								
							 
						 
						
							
							
								
								LLM: Check model device type ( #9092 )  
							
							 
							
							... 
							
							
							
							* check model device 
							
						 
						
							2023-10-09 15:49:15 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yina Chen 
								
							 
						 
						
							
							
							
							
								
							
							
								4c4f8d1663 
								
							 
						 
						
							
							
								
								[LLM]Fix Arc falcon abnormal output issue ( #9096 )  
							
							 
							
							... 
							
							
							
							* update
* update
* fix error & style
* fix style
* update train
* to input_seq_size 
							
						 
						
							2023-10-09 15:09:37 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zhao Changmin 
								
							 
						 
						
							
							
							
							
								
							
							
								548e4dd5fe 
								
							 
						 
						
							
							
								
								LLM: Adapt transformers models for optimize model SL ( #9022 )  
							
							 
							
							... 
							
							
							
							* LLM: Adapt transformers model for SL 
							
						 
						
							2023-10-09 11:13:44 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								f64257a093 
								
							 
						 
						
							
							
								
								LLM: basic api support for esimd fp16 ( #9067 )  
							
							 
							
							... 
							
							
							
							* basic api support for fp16
* fix style
* fix
* fix error and style
* fix style
* meet code review
* update based on comments 
							
						 
						
							2023-10-09 11:05:17 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xin Qiu 
								
							 
						 
						
							
							
							
							
								
							
							
								b3e94a32d4 
								
							 
						 
						
							
							
								
								change log4error import ( #9098 )  
							
							 
							
							
							
						 
						
							2023-10-08 09:23:28 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Kai Huang 
								
							 
						 
						
							
							
							
							
								
							
							
								78ea7ddb1c 
								
							 
						 
						
							
							
								
								Combine apply_rotary_pos_emb for gpt-neox ( #9074 )  
							
							 
							
							
							
						 
						
							2023-10-07 16:27:46 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yang Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								36dd4afd61 
								
							 
						 
						
							
							
								
								Fix llama when rope scaling is not None ( #9086 )  
							
							 
							
							... 
							
							
							
							* Fix llama when rope scaling is not None
* fix style
* fix style 
							
						 
						
							2023-10-06 13:27:37 -07:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yang Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								fcb1c618a0 
								
							 
						 
						
							
							
								
								using bigdl-llm fused rope for llama ( #9066 )  
							
							 
							
							... 
							
							
							
							* optimize llama xpu rope
* fix bug
* fix style
* refine append cache
* remove check
* do not cache cos sin
* remove unnecessary changes
* clean up
* fix style
* check for training 
							
						 
						
							2023-10-06 09:57:29 -07:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jiao Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								aefa5a5bfe 
								
							 
						 
						
							
							
								
								Qwen kv cache ( #9079 )  
							
							 
							
							... 
							
							
							
							* qwen and aquila
* update
* update
* style 
							
						 
						
							2023-10-05 11:59:17 -07:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jiao Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								d5ca1f32b6 
								
							 
						 
						
							
							
								
								Aquila KV cache optimization ( #9080 )  
							
							 
							
							... 
							
							
							
							* update
* update
* style 
							
						 
						
							2023-10-05 11:10:57 -07:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yang Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								88565c76f6 
								
							 
						 
						
							
							
								
								add export merged model example ( #9018 )  
							
							 
							
							... 
							
							
							
							* add export merged model example
* add sources
* add script
* fix style 
							
						 
						
							2023-10-04 21:18:52 -07:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yang Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								0cd8f1c79c 
								
							 
						 
						
							
							
								
								Use ipex fused rms norm for llama ( #9081 )  
							
							 
							
							... 
							
							
							
							* also apply rmsnorm
* fix cpu 
							
						 
						
							2023-10-04 21:04:55 -07:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Cengguang Zhang 
								
							 
						 
						
							
							
							
							
								
							
							
								fb883100e7 
								
							 
						 
						
							
							
								
								LLM: support chatglm-18b convert attention forward in benchmark scripts. ( #9072 )  
							
							 
							
							... 
							
							
							
							* add chatglm-18b convert.
* fix if statement.
* fix 
							
						 
						
							2023-09-28 14:04:52 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								6de2189e90 
								
							 
						 
						
							
							
								
								[LLM] fix chatglm main choice ( #9073 )  
							
							 
							
							
							
						 
						
							2023-09-28 11:23:37 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Cengguang Zhang 
								
							 
						 
						
							
							
							
							
								
							
							
								b4a1266ef0 
								
							 
						 
						
							
							
								
								[WIP] LLM: add kv cache support for internlm. ( #9036 )  
							
							 
							
							... 
							
							
							
							* LLM: add kv cache support for internlm
* add internlm apply_rotary_pos_emb
* fix.
* fix style. 
							
						 
						
							2023-09-25 14:16:59 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								975da86e00 
								
							 
						 
						
							
							
								
								LLM: fix gptneox kv cache ( #9044 )  
							
							 
							
							
							
						 
						
							2023-09-25 13:03:57 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jiao Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								028a6d9383 
								
							 
						 
						
							
							
								
								MPT model optimize for long sequence ( #9020 )  
							
							 
							
							... 
							
							
							
							* mpt_long_seq
* update
* update
* update
* style
* style2
* update 
							
						 
						
							2023-09-21 21:27:23 -07:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								b943d73844 
								
							 
						 
						
							
							
								
								LLM: refactor kv cache ( #9030 )  
							
							 
							
							... 
							
							
							
							* refactor utils
* meet code review; update all models
* small fix 
							
						 
						
							2023-09-21 21:28:03 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Cengguang Zhang 
								
							 
						 
						
							
							
							
							
								
							
							
								868511cf02 
								
							 
						 
						
							
							
								
								LLM: fix kv cache issue of bloom and falcon. ( #9029 )  
							
							 
							
							
							
						 
						
							2023-09-21 18:12:20 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								bf51ec40b2 
								
							 
						 
						
							
							
								
								LLM: Fix empty cache ( #9024 )  
							
							 
							
							... 
							
							
							
							* fix
* fix
* update example 
							
						 
						
							2023-09-21 17:16:07 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yina Chen 
								
							 
						 
						
							
							
							
							
								
							
							
								714884414e 
								
							 
						 
						
							
							
								
								fix error ( #9025 )  
							
							 
							
							
							
						 
						
							2023-09-21 16:42:11 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									SONG Ge 
								
							 
						 
						
							
							
							
							
								
							
							
								fa47967583 
								
							 
						 
						
							
							
								
								[LLM] Optimize kv_cache for gptj model family ( #9010 )  
							
							 
							
							... 
							
							
							
							* optimize gptj model family attention
* add license and comment for dolly-model
* remove xpu mentioned
* remove useless info
* code sytle
* style fix
* code style in gptj fix
* remove gptj arch
* move apply_rotary_pos_emb into utils
* kv_seq_length update
* use hidden_states instead of query layer to reach batch size 
							
						 
						
							2023-09-21 10:42:08 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Cengguang Zhang 
								
							 
						 
						
							
							
							
							
								
							
							
								b3cad7de57 
								
							 
						 
						
							
							
								
								LLM: add bloom kv cache support ( #9012 )  
							
							 
							
							... 
							
							
							
							* LLM: add bloom kv cache support
* fix style. 
							
						 
						
							2023-09-20 21:10:53 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Kai Huang 
								
							 
						 
						
							
							
							
							
								
							
							
								156af15d1e 
								
							 
						 
						
							
							
								
								Add NF3 ( #9008 )  
							
							 
							
							... 
							
							
							
							* add nf3
* grammar 
							
						 
						
							2023-09-20 20:03:07 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Kai Huang 
								
							 
						 
						
							
							
							
							
								
							
							
								6981745fe4 
								
							 
						 
						
							
							
								
								Optimize kv_cache for gpt-neox model family ( #9015 )  
							
							 
							
							... 
							
							
							
							* override gptneox
* style
* move to utils
* revert 
							
						 
						
							2023-09-20 19:59:19 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Cengguang Zhang 
								
							 
						 
						
							
							
							
							
								
							
							
								735a17f7b4 
								
							 
						 
						
							
							
								
								LLM: add kv cache to falcon family. ( #8995 )  
							
							 
							
							... 
							
							
							
							* add kv cache to falcon family.
* fix: import error.
* refactor
* update comments.
* add two version falcon attention forward.
* fix
* fix.
* fix.
* fix.
* fix style.
* fix style. 
							
						 
						
							2023-09-20 15:36:30 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								94a7f8917b 
								
							 
						 
						
							
							
								
								LLM: fix optimized kv cache for baichuan-13b ( #9009 )  
							
							 
							
							... 
							
							
							
							* fix baichuan 13b
* fix style
* fix
* fix style 
							
						 
						
							2023-09-20 15:30:14 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yang Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								c88f6ec457 
								
							 
						 
						
							
							
								
								Experiment XPU QLora Finetuning ( #8937 )  
							
							 
							
							... 
							
							
							
							* Support xpu finetuning
* support xpu finetuning
* fix style
* fix style
* fix style
* refine example
* add readme
* refine readme
* refine api
* fix fp16
* fix example
* refactor
* fix style
* fix compute type
* add qlora
* refine training args
* fix example
* fix style
* fast path forinference
* address comments
* refine readme
* revert lint 
							
						 
						
							2023-09-19 10:15:44 -07:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								004c45c2be 
								
							 
						 
						
							
							
								
								LLM: Support optimized kv_cache for baichuan family ( #8997 )  
							
							 
							
							... 
							
							
							
							* add initial support for baichuan attantion
* support baichuan1
* update based on comment
* update based on comment
* support baichuan2
* update link, change how to jusge baichuan2
* fix style
* add model parameter for pob emb
* update based on comment 
							
						 
						
							2023-09-19 15:38:54 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zhao Changmin 
								
							 
						 
						
							
							
							
							
								
							
							
								2a05581da7 
								
							 
						 
						
							
							
								
								LLM: Apply low_cpu_mem_usage algorithm on optimize_model API ( #8987 )  
							
							 
							
							... 
							
							
							
							* low_cpu_mem_usage 
							
						 
						
							2023-09-18 21:41:42 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zhao Changmin 
								
							 
						 
						
							
							
							
							
								
							
							
								16b9412e80 
								
							 
						 
						
							
							
								
								tie_word_embeddings ( #8977 )  
							
							 
							
							... 
							
							
							
							tie_word_embeddings 
							
						 
						
							2023-09-15 10:17:09 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								bcf456070c 
								
							 
						 
						
							
							
								
								fix bloom-176b int overflow ( #8973 )  
							
							 
							
							
							
						 
						
							2023-09-14 14:37:57 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								dd57623650 
								
							 
						 
						
							
							
								
								LLM: reduce GPU memory for optimize_model=True ( #8965 )  
							
							 
							
							... 
							
							
							
							* reduce gpu memory for llama & chatglm
* change to device type 
							
						 
						
							2023-09-13 17:27:09 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									SONG Ge 
								
							 
						 
						
							
							
							
							
								
							
							
								7132ef6081 
								
							 
						 
						
							
							
								
								[LLM Doc] Add optimize_model doc in transformers api ( #8957 )  
							
							 
							
							... 
							
							
							
							* add optimize in from_pretrained
* add api doc for load_low_bit
* update api docs following comments
* update api docs
* update
* reord comments 
							
						 
						
							2023-09-13 10:42:33 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zhao Changmin 
								
							 
						 
						
							
							
							
							
								
							
							
								c32c260ce2 
								
							 
						 
						
							
							
								
								LLM: Add save/load API in optimize_model to support general pytorch model ( #8956 )  
							
							 
							
							... 
							
							
							
							* support hf format SL 
							
						 
						
							2023-09-13 10:22:00 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Guancheng Fu 
								
							 
						 
						
							
							
							
							
								
							
							
								0bf5857908 
								
							 
						 
						
							
							
								
								[LLM] Integrate FastChat as a serving framework for BigDL-LLM ( #8821 )  
							
							 
							
							... 
							
							
							
							* Finish changing
* format
* add licence
* Add licence
* fix
* fix
* Add xpu support for fschat
* Fix patch
* Also install webui dependencies
* change setup.py dependency installs
* fiox
* format
* final test 
							
						 
						
							2023-09-13 09:28:05 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zhao Changmin 
								
							 
						 
						
							
							
							
							
								
							
							
								dcaa4dc130 
								
							 
						 
						
							
							
								
								LLM: Support GQA on llama kvcache ( #8938 )  
							
							 
							
							... 
							
							
							
							* support GQA 
							
						 
						
							2023-09-12 12:18:40 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yang Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								16761c58be 
								
							 
						 
						
							
							
								
								Make llama attention stateless ( #8928 )  
							
							 
							
							... 
							
							
							
							* Make llama attention stateless
* fix style
* fix chatglm
* fix chatglm xpu 
							
						 
						
							2023-09-11 18:21:50 -07:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zhao Changmin 
								
							 
						 
						
							
							
							
							
								
							
							
								e62eda74b8 
								
							 
						 
						
							
							
								
								refine ( #8912 )  
							
							 
							
							... 
							
							
							
							Co-authored-by: leonardozcm <leonardozcm@gmail.com> 
							
						 
						
							2023-09-11 16:40:33 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yina Chen 
								
							 
						 
						
							
							
							
							
								
							
							
								df165ad165 
								
							 
						 
						
							
							
								
								init ( #8933 )  
							
							 
							
							
							
						 
						
							2023-09-11 14:30:55 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								b3f5dd5b5d 
								
							 
						 
						
							
							
								
								LLM: update q8 convert xpu&cpu ( #8930 )  
							
							 
							
							
							
						 
						
							2023-09-08 16:01:17 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yina Chen 
								
							 
						 
						
							
							
							
							
								
							
							
								33d75adadf 
								
							 
						 
						
							
							
								
								[LLM]Support q5_0 on arc ( #8926 )  
							
							 
							
							... 
							
							
							
							* support q5_0
* delete
* fix style 
							
						 
						
							2023-09-08 15:52:36 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yang Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								ee98cdd85c 
								
							 
						 
						
							
							
								
								Support latest transformer version ( #8923 )  
							
							 
							
							... 
							
							
							
							* Support latest transformer version
* fix style 
							
						 
						
							2023-09-07 19:01:32 -07:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yang Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								25428b22b4 
								
							 
						 
						
							
							
								
								Fix chatglm2 attention and kv cache ( #8924 )  
							
							 
							
							... 
							
							
							
							* fix chatglm2 attention
* fix bf16 bug
* make model stateless
* add utils
* cleanup
* fix style 
							
						 
						
							2023-09-07 18:54:29 -07:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yina Chen 
								
							 
						 
						
							
							
							
							
								
							
							
								b209b8f7b6 
								
							 
						 
						
							
							
								
								[LLM] Fix arc qtype != q4_0 generate issue ( #8920 )  
							
							 
							
							... 
							
							
							
							* Fix arc precision!=q4_0 generate issue
* meet comments 
							
						 
						
							2023-09-07 08:56:36 -07:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yang Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								c34400e6b0 
								
							 
						 
						
							
							
								
								Use new layout for xpu qlinear ( #8896 )  
							
							 
							
							... 
							
							
							
							* use new layout for xpu qlinear
* fix style 
							
						 
						
							2023-09-06 21:55:33 -07:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zhao Changmin 
								
							 
						 
						
							
							
							
							
								
							
							
								8bc1d8a17c 
								
							 
						 
						
							
							
								
								LLM: Fix discards in optimize_model with non-hf models  and add openai whisper example ( #8877 )  
							
							 
							
							... 
							
							
							
							* openai-whisper 
							
						 
						
							2023-09-07 10:35:59 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									SONG Ge 
								
							 
						 
						
							
							
							
							
								
							
							
								7a71ced78f 
								
							 
						 
						
							
							
								
								[LLM Docs] Remain API Docs Issues Solution ( #8780 )  
							
							 
							
							... 
							
							
							
							* langchain readthedocs update
* solve langchain.llms.transformersllm issues
* langchain.embeddings.transformersembeddings/transfortmersllms issues
* update docs for get_num_tokens
* add low_bit api doc
* add optimizer model api doc
* update rst index
* fix coomments style
* update docs following the comments
* update api doc 
							
						 
						
							2023-09-06 16:29:34 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Kai Huang 
								
							 
						 
						
							
							
							
							
								
							
							
								4a9ff050a1 
								
							 
						 
						
							
							
								
								Add qlora nf4 ( #8782 )  
							
							 
							
							... 
							
							
							
							* add nf4
* dequant nf4
* style 
							
						 
						
							2023-09-06 09:39:22 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zhao Changmin 
								
							 
						 
						
							
							
							
							
								
							
							
								95271f10e0 
								
							 
						 
						
							
							
								
								LLM: Rename low bit layer ( #8875 )  
							
							 
							
							... 
							
							
							
							* rename lowbit
---------
Co-authored-by: leonardozcm <leonardozcm@gmail.com> 
							
						 
						
							2023-09-05 13:21:12 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yang Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								242c9d6036 
								
							 
						 
						
							
							
								
								Fix chatglm2 multi-turn streamchat ( #8867 )  
							
							 
							
							
							
						 
						
							2023-08-31 22:13:49 -07:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									xingyuan li 
								
							 
						 
						
							
							
							
							
								
							
							
								de6c6bb17f 
								
							 
						 
						
							
							
								
								[LLM] Downgrade amx build gcc version and remove avx flag display ( #8856 )  
							
							 
							
							... 
							
							
							
							* downgrade to gcc 11
* remove avx display 
							
						 
						
							2023-08-31 14:08:13 +09:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yang Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								3b4f4e1c3d 
								
							 
						 
						
							
							
								
								Fix llama attention optimization for XPU ( #8855 )  
							
							 
							
							... 
							
							
							
							* Fix llama attention optimization fo XPU
* fix chatglm2
* fix typo 
							
						 
						
							2023-08-30 21:30:49 -07:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Shengsheng Huang 
								
							 
						 
						
							
							
							
							
								
							
							
								7b566bf686 
								
							 
						 
						
							
							
								
								[LLM] add new API for optimize any pytorch models ( #8827 )  
							
							 
							
							... 
							
							
							
							* add new API for optimize any pytorch models
* change test util name
* revise API and update UT
* fix python style
* update ut config, change default value
* change defaults, disable ut transcribe 
							
						 
						
							2023-08-30 19:41:53 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xin Qiu 
								
							 
						 
						
							
							
							
							
								
							
							
								8eca982301 
								
							 
						 
						
							
							
								
								windows add env ( #8852 )  
							
							 
							
							
							
						 
						
							2023-08-30 15:54:52 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zhao Changmin 
								
							 
						 
						
							
							
							
							
								
							
							
								731916c639 
								
							 
						 
						
							
							
								
								LLM: Enable attempting loading method automatically ( #8841 )  
							
							 
							
							... 
							
							
							
							* enable auto load method
* warning error
* logger info
---------
Co-authored-by: leonardozcm <leonardozcm@gmail.com> 
							
						 
						
							2023-08-30 15:41:55 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								bba73ec9d2 
								
							 
						 
						
							
							
								
								[LLM] change chatglm native int4 checkpoint name ( #8851 )  
							
							 
							
							
							
						 
						
							2023-08-30 15:05:19 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yina Chen 
								
							 
						 
						
							
							
							
							
								
							
							
								55e705a84c 
								
							 
						 
						
							
							
								
								[LLM] Support the rest of AutoXXX classes in Transformers API ( #8815 )  
							
							 
							
							... 
							
							
							
							* add transformers auto models
* fix 
							
						 
						
							2023-08-30 11:16:14 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								7429ea0606 
								
							 
						 
						
							
							
								
								[LLM] support transformer int4 + amx int4 ( #8838 )  
							
							 
							
							
							
						 
						
							2023-08-29 17:27:18 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zhao Changmin 
								
							 
						 
						
							
							
							
							
								
							
							
								bb31d4fe80 
								
							 
						 
						
							
							
								
								LLM: Implement hf low_cpu_mem_usage with 1xbinary file peak memory on transformer int4 ( #8731 )  
							
							 
							
							... 
							
							
							
							* 1x peak memory 
							
						 
						
							2023-08-29 09:33:17 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									SONG Ge 
								
							 
						 
						
							
							
							
							
								
							
							
								d2926c7672 
								
							 
						 
						
							
							
								
								[LLM] Unify Langchain Native and Transformers LLM API ( #8752 )  
							
							 
							
							... 
							
							
							
							* deprecate BigDLNativeTransformers and add specific LMEmbedding method
* deprecate and add LM methods for langchain llms
* add native params to native langchain
* new imple for embedding
* move ut from bigdlnative to casual llm
* rename embeddings api and examples update align with usage updating
* docqa example hot-fix
* add more api docs
* add langchain ut for starcoder
* support model_kwargs for transformer methods when calling causalLM and add ut
* ut fix for transformers embedding
* update for langchain causal supporting transformers
* remove model_family in readme doc
* add model_families params to support more models
* update api docs and remove chatglm embeddings for now
* remove chatglm embeddings in examples
* new refactor for ut to add bloom and transformers llama ut
* disable llama transformers embedding ut 
							
						 
						
							2023-08-25 11:14:21 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yang Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								bf3591e2ff 
								
							 
						 
						
							
							
								
								Optimize chatglm2 for bf16 ( #8725 )  
							
							 
							
							... 
							
							
							
							* make chatglm works with bf16
* fix style
* support chatglm v1
* fix style
* fix style
* add chatglm2 file 
							
						 
						
							2023-08-24 10:04:25 -07:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								611c1fb628 
								
							 
						 
						
							
							
								
								[LLM] change default n_threads of native int4 langchain API ( #8779 )  
							
							 
							
							
							
						 
						
							2023-08-21 13:30:12 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								3d1f2b44f8 
								
							 
						 
						
							
							
								
								LLM: change default n_threads of native int4 models ( #8776 )  
							
							 
							
							
							
						 
						
							2023-08-18 15:46:19 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								2ba2133613 
								
							 
						 
						
							
							
								
								fix starcoder chinese output ( #8773 )  
							
							 
							
							
							
						 
						
							2023-08-18 13:37:02 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
							
							
								
							
							
								548f7a6cf7 
								
							 
						 
						
							
							
								
								LLM: update convert of llama family to support llama2-70B ( #8747 )  
							
							 
							
							
							
						 
						
							2023-08-18 09:30:35 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yina Chen 
								
							 
						 
						
							
							
							
							
								
							
							
								4afea496ab 
								
							 
						 
						
							
							
								
								support q8_0 ( #8765 )  
							
							 
							
							
							
						 
						
							2023-08-17 15:06:36 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								e9aa2bd890 
								
							 
						 
						
							
							
								
								LLM: reduce GPU 1st token latency and update example ( #8763 )  
							
							 
							
							... 
							
							
							
							* reduce 1st token latency
* update example
* fix
* fix style
* update readme of gpu benchmark 
							
						 
						
							2023-08-16 18:01:23 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									SONG Ge 
								
							 
						 
						
							
							
							
							
								
							
							
								f4164e4492 
								
							 
						 
						
							
							
								
								[BigDL LLM] Update readme for unifying transformers API ( #8737 )  
							
							 
							
							... 
							
							
							
							* update readme doc
* fix readthedocs error
* update comment
* update exception error info
* invalidInputError instead
* fix readme typo error and remove import error
* fix more typo 
							
						 
						
							2023-08-16 14:22:32 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								77844125f2 
								
							 
						 
						
							
							
								
								[LLM] Support chatglm cache ( #8745 )  
							
							 
							
							
							
						 
						
							2023-08-14 15:10:46 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									SONG Ge 
								
							 
						 
						
							
							
							
							
								
							
							
								aceea4dc29 
								
							 
						 
						
							
							
								
								[LLM] Unify Transformers and Native API ( #8713 )  
							
							 
							
							... 
							
							
							
							* re-open pr to run on latest runner
* re-add examples and ut
* rename ut and move deprecate to warning instead of raising an error info
* ut fix 
							
						 
						
							2023-08-11 19:45:47 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								f91035c298 
								
							 
						 
						
							
							
								
								[LLM] fix chatglm native int4 emoji output ( #8739 )  
							
							 
							
							
							
						 
						
							2023-08-11 15:38:41 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
							
							
								
							
							
								77efcf7b1d 
								
							 
						 
						
							
							
								
								LLM: fix ChatGLM2 native int4 stream output ( #8733 )  
							
							 
							
							
							
						 
						
							2023-08-11 14:51:50 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								ca3e59a1dc 
								
							 
						 
						
							
							
								
								LLM: support stop for starcoder native int4 stream ( #8734 )  
							
							 
							
							
							
						 
						
							2023-08-11 14:51:30 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								3d5a7484a2 
								
							 
						 
						
							
							
								
								[LLM] fix bloom and starcoder memory release ( #8728 )  
							
							 
							
							
							
						 
						
							2023-08-11 11:18:19 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								1a7b698a83 
								
							 
						 
						
							
							
								
								[LLM] support ipex arc int4 & add basic llama2 example ( #8700 )  
							
							 
							
							... 
							
							
							
							* first support of xpu
* make it works on gpu
update setup
update
add GPU llama2 examples
add use_optimize flag to disbale optimize for gpu
fix style
update gpu exmaple readme
fix
* update example, and update env
* fix setup to add cpp files
* replace jit with aot to avoid data leak
* rename to bigdl-core-xe
* update installation in example readme 
							
						 
						
							2023-08-09 22:20:32 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Kai Huang 
								
							 
						 
						
							
							
							
							
								
							
							
								1b65288bdb 
								
							 
						 
						
							
							
								
								Add api doc for LLM ( #8605 )  
							
							 
							
							... 
							
							
							
							* api doc initial
* update desc 
							
						 
						
							2023-08-08 18:17:16 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
							
							
								
							
							
								ea5d7aff5b 
								
							 
						 
						
							
							
								
								LLM: add chatglm native int4 transformers API ( #8695 )  
							
							 
							
							
							
						 
						
							2023-08-07 17:52:47 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								ef08250c21 
								
							 
						 
						
							
							
								
								[LLM] chatglm pybinding support ( #8672 )  
							
							 
							
							
							
						 
						
							2023-08-04 14:27:29 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yang Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								b6468bac43 
								
							 
						 
						
							
							
								
								optimize chatglm2 long sequence ( #8662 )  
							
							 
							
							... 
							
							
							
							* add chatglm2
* optimize a little
* optimize chatglm long sequence
* fix style
* address comments and fix style
* fix bug 
							
						 
						
							2023-08-03 17:56:24 -07:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yang Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								3407f87075 
								
							 
						 
						
							
							
								
								Fix llama kv cache bug ( #8674 )  
							
							 
							
							
							
						 
						
							2023-08-03 17:54:55 -07:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
							
							
								
							
							
								a15a2516e6 
								
							 
						 
						
							
							
								
								add ( #8659 )  
							
							 
							
							
							
						 
						
							2023-08-03 10:12:10 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yina Chen 
								
							 
						 
						
							
							
							
							
								
							
							
								119bf6d710 
								
							 
						 
						
							
							
								
								[LLM] Support linux cpp dynamic load .so ( #8655 )  
							
							 
							
							... 
							
							
							
							* support linux cpp dynamic load .so
* update cli 
							
						 
						
							2023-08-02 20:15:45 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zhao Changmin 
								
							 
						 
						
							
							
							
							
								
							
							
								ca998cc6f2 
								
							 
						 
						
							
							
								
								LLM: Mute shape mismatch output ( #8601 )  
							
							 
							
							... 
							
							
							
							* LLM: Mute shape mismatch output 
							
						 
						
							2023-08-02 16:46:22 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zhao Changmin 
								
							 
						 
						
							
							
							
							
								
							
							
								04c713ef06 
								
							 
						 
						
							
							
								
								LLM: Disable transformer api pretraining_tp ( #8645 )  
							
							 
							
							... 
							
							
							
							* disable pretraining_tp 
							
						 
						
							2023-08-02 11:26:01 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yang Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								cbeae97a26 
								
							 
						 
						
							
							
								
								Optimize Llama Attention to to reduce KV cache memory copy ( #8580 )  
							
							 
							
							... 
							
							
							
							* Optimize llama attention to reduce KV cache memory copy
* fix bug
* fix style
* remove git
* fix style
* fix style
* fix style
* fix tests
* move llama attention to another file
* revert
* fix style
* remove jit
* fix 
							
						 
						
							2023-08-01 16:37:58 -07:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									xingyuan li 
								
							 
						 
						
							
							
							
							
								
							
							
								cdfbe652ca 
								
							 
						 
						
							
							
								
								[LLM] Add chatglm support for llm-cli ( #8641 )  
							
							 
							
							... 
							
							
							
							* add chatglm build
* add llm-cli support
* update git
* install cmake
* add ut for chatglm
* add files to setup
* fix bug cause permission error when sf lack file 
							
						 
						
							2023-08-01 14:30:17 +09:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zhao Changmin 
								
							 
						 
						
							
							
							
							
								
							
							
								3e10260c6d 
								
							 
						 
						
							
							
								
								LLM: llm-convert support chatglm family ( #8643 )  
							
							 
							
							... 
							
							
							
							* convert chatglm 
							
						 
						
							2023-08-01 11:16:18 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yina Chen 
								
							 
						 
						
							
							
							
							
								
							
							
								a607972c0b 
								
							 
						 
						
							
							
								
								[LLM]LLM windows load -api.dll ( #8631 )  
							
							 
							
							... 
							
							
							
							* temp
* update
* revert setup.py 
							
						 
						
							2023-07-31 13:47:20 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									xingyuan li 
								
							 
						 
						
							
							
							
							
								
							
							
								3361b66449 
								
							 
						 
						
							
							
								
								[LLM] Revert llm-cli to disable selecting executables on Windows ( #8630 )  
							
							 
							
							... 
							
							
							
							* revert vnni file select
* revert setup.py
* add model-api.dll 
							
						 
						
							2023-07-31 11:15:44 +09:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
							
							
								
							
							
								fb32fefcbe 
								
							 
						 
						
							
							
								
								LLM: support tensor input of native int4 generate ( #8620 )  
							
							 
							
							
							
						 
						
							2023-07-27 17:59:49 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zhao Changmin 
								
							 
						 
						
							
							
							
							
								
							
							
								5b484ab48d 
								
							 
						 
						
							
							
								
								LLM: Support load_low_bit loading models in shards format ( #8612 )  
							
							 
							
							... 
							
							
							
							* shards_model
---------
Co-authored-by: leonardozcm <leonaordo1997zcm@gmail.com> 
							
						 
						
							2023-07-26 13:30:01 +08:00