Ruonan Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								975da86e00 
								
							 
						 
						
							
							
								
								LLM: fix gptneox kv cache ( #9044 )  
							
							 
							
							
							
						 
						
							2023-09-25 13:03:57 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jiao Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								028a6d9383 
								
							 
						 
						
							
							
								
								MPT model optimize for long sequence ( #9020 )  
							
							 
							
							... 
							
							
							
							* mpt_long_seq
* update
* update
* update
* style
* style2
* update 
							
						 
						
							2023-09-21 21:27:23 -07:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								b943d73844 
								
							 
						 
						
							
							
								
								LLM: refactor kv cache ( #9030 )  
							
							 
							
							... 
							
							
							
							* refactor utils
* meet code review; update all models
* small fix 
							
						 
						
							2023-09-21 21:28:03 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Cengguang Zhang 
								
							 
						 
						
							
							
							
							
								
							
							
								868511cf02 
								
							 
						 
						
							
							
								
								LLM: fix kv cache issue of bloom and falcon. ( #9029 )  
							
							 
							
							
							
						 
						
							2023-09-21 18:12:20 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								bf51ec40b2 
								
							 
						 
						
							
							
								
								LLM: Fix empty cache ( #9024 )  
							
							 
							
							... 
							
							
							
							* fix
* fix
* update example 
							
						 
						
							2023-09-21 17:16:07 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yina Chen 
								
							 
						 
						
							
							
							
							
								
							
							
								714884414e 
								
							 
						 
						
							
							
								
								fix error ( #9025 )  
							
							 
							
							
							
						 
						
							2023-09-21 16:42:11 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									SONG Ge 
								
							 
						 
						
							
							
							
							
								
							
							
								fa47967583 
								
							 
						 
						
							
							
								
								[LLM] Optimize kv_cache for gptj model family ( #9010 )  
							
							 
							
							... 
							
							
							
							* optimize gptj model family attention
* add license and comment for dolly-model
* remove xpu mentioned
* remove useless info
* code sytle
* style fix
* code style in gptj fix
* remove gptj arch
* move apply_rotary_pos_emb into utils
* kv_seq_length update
* use hidden_states instead of query layer to reach batch size 
							
						 
						
							2023-09-21 10:42:08 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Cengguang Zhang 
								
							 
						 
						
							
							
							
							
								
							
							
								b3cad7de57 
								
							 
						 
						
							
							
								
								LLM: add bloom kv cache support ( #9012 )  
							
							 
							
							... 
							
							
							
							* LLM: add bloom kv cache support
* fix style. 
							
						 
						
							2023-09-20 21:10:53 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Kai Huang 
								
							 
						 
						
							
							
							
							
								
							
							
								156af15d1e 
								
							 
						 
						
							
							
								
								Add NF3 ( #9008 )  
							
							 
							
							... 
							
							
							
							* add nf3
* grammar 
							
						 
						
							2023-09-20 20:03:07 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Kai Huang 
								
							 
						 
						
							
							
							
							
								
							
							
								6981745fe4 
								
							 
						 
						
							
							
								
								Optimize kv_cache for gpt-neox model family ( #9015 )  
							
							 
							
							... 
							
							
							
							* override gptneox
* style
* move to utils
* revert 
							
						 
						
							2023-09-20 19:59:19 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Cengguang Zhang 
								
							 
						 
						
							
							
							
							
								
							
							
								735a17f7b4 
								
							 
						 
						
							
							
								
								LLM: add kv cache to falcon family. ( #8995 )  
							
							 
							
							... 
							
							
							
							* add kv cache to falcon family.
* fix: import error.
* refactor
* update comments.
* add two version falcon attention forward.
* fix
* fix.
* fix.
* fix.
* fix style.
* fix style. 
							
						 
						
							2023-09-20 15:36:30 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								94a7f8917b 
								
							 
						 
						
							
							
								
								LLM: fix optimized kv cache for baichuan-13b ( #9009 )  
							
							 
							
							... 
							
							
							
							* fix baichuan 13b
* fix style
* fix
* fix style 
							
						 
						
							2023-09-20 15:30:14 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yang Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								c88f6ec457 
								
							 
						 
						
							
							
								
								Experiment XPU QLora Finetuning ( #8937 )  
							
							 
							
							... 
							
							
							
							* Support xpu finetuning
* support xpu finetuning
* fix style
* fix style
* fix style
* refine example
* add readme
* refine readme
* refine api
* fix fp16
* fix example
* refactor
* fix style
* fix compute type
* add qlora
* refine training args
* fix example
* fix style
* fast path forinference
* address comments
* refine readme
* revert lint 
							
						 
						
							2023-09-19 10:15:44 -07:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								004c45c2be 
								
							 
						 
						
							
							
								
								LLM: Support optimized kv_cache for baichuan family ( #8997 )  
							
							 
							
							... 
							
							
							
							* add initial support for baichuan attantion
* support baichuan1
* update based on comment
* update based on comment
* support baichuan2
* update link, change how to jusge baichuan2
* fix style
* add model parameter for pob emb
* update based on comment 
							
						 
						
							2023-09-19 15:38:54 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zhao Changmin 
								
							 
						 
						
							
							
							
							
								
							
							
								2a05581da7 
								
							 
						 
						
							
							
								
								LLM: Apply low_cpu_mem_usage algorithm on optimize_model API ( #8987 )  
							
							 
							
							... 
							
							
							
							* low_cpu_mem_usage 
							
						 
						
							2023-09-18 21:41:42 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zhao Changmin 
								
							 
						 
						
							
							
							
							
								
							
							
								16b9412e80 
								
							 
						 
						
							
							
								
								tie_word_embeddings ( #8977 )  
							
							 
							
							... 
							
							
							
							tie_word_embeddings 
							
						 
						
							2023-09-15 10:17:09 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								bcf456070c 
								
							 
						 
						
							
							
								
								fix bloom-176b int overflow ( #8973 )  
							
							 
							
							
							
						 
						
							2023-09-14 14:37:57 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								dd57623650 
								
							 
						 
						
							
							
								
								LLM: reduce GPU memory for optimize_model=True ( #8965 )  
							
							 
							
							... 
							
							
							
							* reduce gpu memory for llama & chatglm
* change to device type 
							
						 
						
							2023-09-13 17:27:09 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									SONG Ge 
								
							 
						 
						
							
							
							
							
								
							
							
								7132ef6081 
								
							 
						 
						
							
							
								
								[LLM Doc] Add optimize_model doc in transformers api ( #8957 )  
							
							 
							
							... 
							
							
							
							* add optimize in from_pretrained
* add api doc for load_low_bit
* update api docs following comments
* update api docs
* update
* reord comments 
							
						 
						
							2023-09-13 10:42:33 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zhao Changmin 
								
							 
						 
						
							
							
							
							
								
							
							
								c32c260ce2 
								
							 
						 
						
							
							
								
								LLM: Add save/load API in optimize_model to support general pytorch model ( #8956 )  
							
							 
							
							... 
							
							
							
							* support hf format SL 
							
						 
						
							2023-09-13 10:22:00 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Guancheng Fu 
								
							 
						 
						
							
							
							
							
								
							
							
								0bf5857908 
								
							 
						 
						
							
							
								
								[LLM] Integrate FastChat as a serving framework for BigDL-LLM ( #8821 )  
							
							 
							
							... 
							
							
							
							* Finish changing
* format
* add licence
* Add licence
* fix
* fix
* Add xpu support for fschat
* Fix patch
* Also install webui dependencies
* change setup.py dependency installs
* fiox
* format
* final test 
							
						 
						
							2023-09-13 09:28:05 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zhao Changmin 
								
							 
						 
						
							
							
							
							
								
							
							
								dcaa4dc130 
								
							 
						 
						
							
							
								
								LLM: Support GQA on llama kvcache ( #8938 )  
							
							 
							
							... 
							
							
							
							* support GQA 
							
						 
						
							2023-09-12 12:18:40 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yang Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								16761c58be 
								
							 
						 
						
							
							
								
								Make llama attention stateless ( #8928 )  
							
							 
							
							... 
							
							
							
							* Make llama attention stateless
* fix style
* fix chatglm
* fix chatglm xpu 
							
						 
						
							2023-09-11 18:21:50 -07:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zhao Changmin 
								
							 
						 
						
							
							
							
							
								
							
							
								e62eda74b8 
								
							 
						 
						
							
							
								
								refine ( #8912 )  
							
							 
							
							... 
							
							
							
							Co-authored-by: leonardozcm <leonardozcm@gmail.com> 
							
						 
						
							2023-09-11 16:40:33 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yina Chen 
								
							 
						 
						
							
							
							
							
								
							
							
								df165ad165 
								
							 
						 
						
							
							
								
								init ( #8933 )  
							
							 
							
							
							
						 
						
							2023-09-11 14:30:55 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								b3f5dd5b5d 
								
							 
						 
						
							
							
								
								LLM: update q8 convert xpu&cpu ( #8930 )  
							
							 
							
							
							
						 
						
							2023-09-08 16:01:17 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yina Chen 
								
							 
						 
						
							
							
							
							
								
							
							
								33d75adadf 
								
							 
						 
						
							
							
								
								[LLM]Support q5_0 on arc ( #8926 )  
							
							 
							
							... 
							
							
							
							* support q5_0
* delete
* fix style 
							
						 
						
							2023-09-08 15:52:36 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yang Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								ee98cdd85c 
								
							 
						 
						
							
							
								
								Support latest transformer version ( #8923 )  
							
							 
							
							... 
							
							
							
							* Support latest transformer version
* fix style 
							
						 
						
							2023-09-07 19:01:32 -07:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yang Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								25428b22b4 
								
							 
						 
						
							
							
								
								Fix chatglm2 attention and kv cache ( #8924 )  
							
							 
							
							... 
							
							
							
							* fix chatglm2 attention
* fix bf16 bug
* make model stateless
* add utils
* cleanup
* fix style 
							
						 
						
							2023-09-07 18:54:29 -07:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yina Chen 
								
							 
						 
						
							
							
							
							
								
							
							
								b209b8f7b6 
								
							 
						 
						
							
							
								
								[LLM] Fix arc qtype != q4_0 generate issue ( #8920 )  
							
							 
							
							... 
							
							
							
							* Fix arc precision!=q4_0 generate issue
* meet comments 
							
						 
						
							2023-09-07 08:56:36 -07:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yang Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								c34400e6b0 
								
							 
						 
						
							
							
								
								Use new layout for xpu qlinear ( #8896 )  
							
							 
							
							... 
							
							
							
							* use new layout for xpu qlinear
* fix style 
							
						 
						
							2023-09-06 21:55:33 -07:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zhao Changmin 
								
							 
						 
						
							
							
							
							
								
							
							
								8bc1d8a17c 
								
							 
						 
						
							
							
								
								LLM: Fix discards in optimize_model with non-hf models  and add openai whisper example ( #8877 )  
							
							 
							
							... 
							
							
							
							* openai-whisper 
							
						 
						
							2023-09-07 10:35:59 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									SONG Ge 
								
							 
						 
						
							
							
							
							
								
							
							
								7a71ced78f 
								
							 
						 
						
							
							
								
								[LLM Docs] Remain API Docs Issues Solution ( #8780 )  
							
							 
							
							... 
							
							
							
							* langchain readthedocs update
* solve langchain.llms.transformersllm issues
* langchain.embeddings.transformersembeddings/transfortmersllms issues
* update docs for get_num_tokens
* add low_bit api doc
* add optimizer model api doc
* update rst index
* fix coomments style
* update docs following the comments
* update api doc 
							
						 
						
							2023-09-06 16:29:34 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Kai Huang 
								
							 
						 
						
							
							
							
							
								
							
							
								4a9ff050a1 
								
							 
						 
						
							
							
								
								Add qlora nf4 ( #8782 )  
							
							 
							
							... 
							
							
							
							* add nf4
* dequant nf4
* style 
							
						 
						
							2023-09-06 09:39:22 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zhao Changmin 
								
							 
						 
						
							
							
							
							
								
							
							
								95271f10e0 
								
							 
						 
						
							
							
								
								LLM: Rename low bit layer ( #8875 )  
							
							 
							
							... 
							
							
							
							* rename lowbit
---------
Co-authored-by: leonardozcm <leonardozcm@gmail.com> 
							
						 
						
							2023-09-05 13:21:12 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yang Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								242c9d6036 
								
							 
						 
						
							
							
								
								Fix chatglm2 multi-turn streamchat ( #8867 )  
							
							 
							
							
							
						 
						
							2023-08-31 22:13:49 -07:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									xingyuan li 
								
							 
						 
						
							
							
							
							
								
							
							
								de6c6bb17f 
								
							 
						 
						
							
							
								
								[LLM] Downgrade amx build gcc version and remove avx flag display ( #8856 )  
							
							 
							
							... 
							
							
							
							* downgrade to gcc 11
* remove avx display 
							
						 
						
							2023-08-31 14:08:13 +09:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yang Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								3b4f4e1c3d 
								
							 
						 
						
							
							
								
								Fix llama attention optimization for XPU ( #8855 )  
							
							 
							
							... 
							
							
							
							* Fix llama attention optimization fo XPU
* fix chatglm2
* fix typo 
							
						 
						
							2023-08-30 21:30:49 -07:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Shengsheng Huang 
								
							 
						 
						
							
							
							
							
								
							
							
								7b566bf686 
								
							 
						 
						
							
							
								
								[LLM] add new API for optimize any pytorch models ( #8827 )  
							
							 
							
							... 
							
							
							
							* add new API for optimize any pytorch models
* change test util name
* revise API and update UT
* fix python style
* update ut config, change default value
* change defaults, disable ut transcribe 
							
						 
						
							2023-08-30 19:41:53 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xin Qiu 
								
							 
						 
						
							
							
							
							
								
							
							
								8eca982301 
								
							 
						 
						
							
							
								
								windows add env ( #8852 )  
							
							 
							
							
							
						 
						
							2023-08-30 15:54:52 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zhao Changmin 
								
							 
						 
						
							
							
							
							
								
							
							
								731916c639 
								
							 
						 
						
							
							
								
								LLM: Enable attempting loading method automatically ( #8841 )  
							
							 
							
							... 
							
							
							
							* enable auto load method
* warning error
* logger info
---------
Co-authored-by: leonardozcm <leonardozcm@gmail.com> 
							
						 
						
							2023-08-30 15:41:55 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								bba73ec9d2 
								
							 
						 
						
							
							
								
								[LLM] change chatglm native int4 checkpoint name ( #8851 )  
							
							 
							
							
							
						 
						
							2023-08-30 15:05:19 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yina Chen 
								
							 
						 
						
							
							
							
							
								
							
							
								55e705a84c 
								
							 
						 
						
							
							
								
								[LLM] Support the rest of AutoXXX classes in Transformers API ( #8815 )  
							
							 
							
							... 
							
							
							
							* add transformers auto models
* fix 
							
						 
						
							2023-08-30 11:16:14 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								7429ea0606 
								
							 
						 
						
							
							
								
								[LLM] support transformer int4 + amx int4 ( #8838 )  
							
							 
							
							
							
						 
						
							2023-08-29 17:27:18 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zhao Changmin 
								
							 
						 
						
							
							
							
							
								
							
							
								bb31d4fe80 
								
							 
						 
						
							
							
								
								LLM: Implement hf low_cpu_mem_usage with 1xbinary file peak memory on transformer int4 ( #8731 )  
							
							 
							
							... 
							
							
							
							* 1x peak memory 
							
						 
						
							2023-08-29 09:33:17 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									SONG Ge 
								
							 
						 
						
							
							
							
							
								
							
							
								d2926c7672 
								
							 
						 
						
							
							
								
								[LLM] Unify Langchain Native and Transformers LLM API ( #8752 )  
							
							 
							
							... 
							
							
							
							* deprecate BigDLNativeTransformers and add specific LMEmbedding method
* deprecate and add LM methods for langchain llms
* add native params to native langchain
* new imple for embedding
* move ut from bigdlnative to casual llm
* rename embeddings api and examples update align with usage updating
* docqa example hot-fix
* add more api docs
* add langchain ut for starcoder
* support model_kwargs for transformer methods when calling causalLM and add ut
* ut fix for transformers embedding
* update for langchain causal supporting transformers
* remove model_family in readme doc
* add model_families params to support more models
* update api docs and remove chatglm embeddings for now
* remove chatglm embeddings in examples
* new refactor for ut to add bloom and transformers llama ut
* disable llama transformers embedding ut 
							
						 
						
							2023-08-25 11:14:21 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yang Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								bf3591e2ff 
								
							 
						 
						
							
							
								
								Optimize chatglm2 for bf16 ( #8725 )  
							
							 
							
							... 
							
							
							
							* make chatglm works with bf16
* fix style
* support chatglm v1
* fix style
* fix style
* add chatglm2 file 
							
						 
						
							2023-08-24 10:04:25 -07:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								611c1fb628 
								
							 
						 
						
							
							
								
								[LLM] change default n_threads of native int4 langchain API ( #8779 )  
							
							 
							
							
							
						 
						
							2023-08-21 13:30:12 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								3d1f2b44f8 
								
							 
						 
						
							
							
								
								LLM: change default n_threads of native int4 models ( #8776 )  
							
							 
							
							
							
						 
						
							2023-08-18 15:46:19 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								2ba2133613 
								
							 
						 
						
							
							
								
								fix starcoder chinese output ( #8773 )  
							
							 
							
							
							
						 
						
							2023-08-18 13:37:02 +08:00