xingyuan li 
								
							 
						 
						
							
							
							
							
								
							
							
								610084e3c0 
								
							 
						 
						
							
							
								
								[LLM] Complete windows unittest ( #8611 )  
							
							 
							
							... 
							
							
							
							* add windows nightly test workflow
* use github runner to run pr test
* model load should use lowbit
* remove tmp dir after testing 
							
						 
						
							2023-08-03 14:48:42 +09:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
							
							
								
							
							
								a15a2516e6 
								
							 
						 
						
							
							
								
								add ( #8659 )  
							
							 
							
							
							
						 
						
							2023-08-03 10:12:10 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xin Qiu 
								
							 
						 
						
							
							
							
							
								
							
							
								0714888705 
								
							 
						 
						
							
							
								
								build windows avx dll ( #8657 )  
							
							 
							
							... 
							
							
							
							* windows avx
* add to actions 
							
						 
						
							2023-08-03 02:06:24 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yina Chen 
								
							 
						 
						
							
							
							
							
								
							
							
								119bf6d710 
								
							 
						 
						
							
							
								
								[LLM] Support linux cpp dynamic load .so ( #8655 )  
							
							 
							
							... 
							
							
							
							* support linux cpp dynamic load .so
* update cli 
							
						 
						
							2023-08-02 20:15:45 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zhao Changmin 
								
							 
						 
						
							
							
							
							
								
							
							
								ca998cc6f2 
								
							 
						 
						
							
							
								
								LLM: Mute shape mismatch output ( #8601 )  
							
							 
							
							... 
							
							
							
							* LLM: Mute shape mismatch output 
							
						 
						
							2023-08-02 16:46:22 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yina Chen 
								
							 
						 
						
							
							
							
							
								
							
							
								15b3adc7ec 
								
							 
						 
						
							
							
								
								[LLM] llm linux binary make -> cmake ( #8656 )  
							
							 
							
							... 
							
							
							
							* llm linux make -> cmake
* update
* update 
							
						 
						
							2023-08-02 16:41:54 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zhao Changmin 
								
							 
						 
						
							
							
							
							
								
							
							
								04c713ef06 
								
							 
						 
						
							
							
								
								LLM: Disable transformer api pretraining_tp ( #8645 )  
							
							 
							
							... 
							
							
							
							* disable pretraining_tp 
							
						 
						
							2023-08-02 11:26:01 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
							
							
								
							
							
								6fc31bb4cf 
								
							 
						 
						
							
							
								
								LLM: first update descriptions for ChatGLM transformers int4 example ( #8646 )  
							
							 
							
							
							
						 
						
							2023-08-02 11:00:56 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									xingyuan li 
								
							 
						 
						
							
							
							
							
								
							
							
								769209b7f0 
								
							 
						 
						
							
							
								
								Chatglm unittest disable due to missing instruction ( #8650 )  
							
							 
							
							
							
						 
						
							2023-08-02 10:28:42 +09:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yang Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								cbeae97a26 
								
							 
						 
						
							
							
								
								Optimize Llama Attention to to reduce KV cache memory copy ( #8580 )  
							
							 
							
							... 
							
							
							
							* Optimize llama attention to reduce KV cache memory copy
* fix bug
* fix style
* remove git
* fix style
* fix style
* fix style
* fix tests
* move llama attention to another file
* revert
* fix style
* remove jit
* fix 
							
						 
						
							2023-08-01 16:37:58 -07:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
							
							
								
							
							
								39994738d1 
								
							 
						 
						
							
							
								
								LLM: add chat & stream chat example for ChatGLM2 transformers int4 ( #8636 )  
							
							 
							
							
							
						 
						
							2023-08-01 14:57:45 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									xingyuan li 
								
							 
						 
						
							
							
							
							
								
							
							
								cdfbe652ca 
								
							 
						 
						
							
							
								
								[LLM] Add chatglm support for llm-cli ( #8641 )  
							
							 
							
							... 
							
							
							
							* add chatglm build
* add llm-cli support
* update git
* install cmake
* add ut for chatglm
* add files to setup
* fix bug cause permission error when sf lack file 
							
						 
						
							2023-08-01 14:30:17 +09:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zhao Changmin 
								
							 
						 
						
							
							
							
							
								
							
							
								d6cbfc6d2c 
								
							 
						 
						
							
							
								
								LLM: Add requirements in whisper example ( #8644 )  
							
							 
							
							... 
							
							
							
							* LLM: Add requirements in whisper example 
							
						 
						
							2023-08-01 12:07:14 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zhao Changmin 
								
							 
						 
						
							
							
							
							
								
							
							
								3e10260c6d 
								
							 
						 
						
							
							
								
								LLM: llm-convert support chatglm family ( #8643 )  
							
							 
							
							... 
							
							
							
							* convert chatglm 
							
						 
						
							2023-08-01 11:16:18 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yina Chen 
								
							 
						 
						
							
							
							
							
								
							
							
								a607972c0b 
								
							 
						 
						
							
							
								
								[LLM]LLM windows load -api.dll ( #8631 )  
							
							 
							
							... 
							
							
							
							* temp
* update
* revert setup.py 
							
						 
						
							2023-07-31 13:47:20 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									xingyuan li 
								
							 
						 
						
							
							
							
							
								
							
							
								3361b66449 
								
							 
						 
						
							
							
								
								[LLM] Revert llm-cli to disable selecting executables on Windows ( #8630 )  
							
							 
							
							... 
							
							
							
							* revert vnni file select
* revert setup.py
* add model-api.dll 
							
						 
						
							2023-07-31 11:15:44 +09:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
							
							
								
							
							
								3dbab9087b 
								
							 
						 
						
							
							
								
								LLM: add llama2-7b native int4 example ( #8629 )  
							
							 
							
							
							
						 
						
							2023-07-28 10:56:16 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
							
							
								
							
							
								fb32fefcbe 
								
							 
						 
						
							
							
								
								LLM: support tensor input of native int4 generate ( #8620 )  
							
							 
							
							
							
						 
						
							2023-07-27 17:59:49 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zhao Changmin 
								
							 
						 
						
							
							
							
							
								
							
							
								5b484ab48d 
								
							 
						 
						
							
							
								
								LLM: Support load_low_bit loading models in shards format ( #8612 )  
							
							 
							
							... 
							
							
							
							* shards_model
---------
Co-authored-by: leonardozcm <leonaordo1997zcm@gmail.com> 
							
						 
						
							2023-07-26 13:30:01 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									xingyuan li 
								
							 
						 
						
							
							
							
							
								
							
							
								919791e406 
								
							 
						 
						
							
							
								
								Add needs to make sure run in order ( #8621 )  
							
							 
							
							
							
						 
						
							2023-07-26 14:16:57 +09:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									xingyuan li 
								
							 
						 
						
							
							
							
							
								
							
							
								e3418d7e61 
								
							 
						 
						
							
							
								
								[LLM] Remove concurrency group for binary build workflow ( #8619 )  
							
							 
							
							... 
							
							
							
							* remove concurrency group for nightly test 
							
						 
						
							2023-07-26 12:15:53 +09:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
							
							
								
							
							
								fcf8c085e3 
								
							 
						 
						
							
							
								
								LLM: add llama2-13b native int4 example ( #8613 )  
							
							 
							
							
							
						 
						
							2023-07-26 10:12:52 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									xingyuan li 
								
							 
						 
						
							
							
							
							
								
							
							
								a98b3fe961 
								
							 
						 
						
							
							
								
								Fix cancel flag causing nightly builds to fail ( #8618 )  
							
							 
							
							
							
						 
						
							2023-07-26 11:11:08 +09:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									xingyuan li 
								
							 
						 
						
							
							
							
							
								
							
							
								7d45233825 
								
							 
						 
						
							
							
								
								fix trigger enable flag ( #8616 )  
							
							 
							
							
							
						 
						
							2023-07-26 10:53:03 +09:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Guancheng Fu 
								
							 
						 
						
							
							
							
							
								
							
							
								07d1aee825 
								
							 
						 
						
							
							
								
								[PPML] add fastchat image for tdx ( #8610 )  
							
							 
							
							
							
						 
						
							2023-07-25 15:23:41 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Song Jiaming 
								
							 
						 
						
							
							
							
							
								
							
							
								650b82fa6e 
								
							 
						 
						
							
							
								
								[LLM] add CausalLM and Speech UT ( #8597 )  
							
							 
							
							
							
						 
						
							2023-07-25 11:22:36 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									xingyuan li 
								
							 
						 
						
							
							
							
							
								
							
							
								9c897ac7db 
								
							 
						 
						
							
							
								
								[LLM] Merge redundant code in workflow ( #8596 )  
							
							 
							
							... 
							
							
							
							* modify workflow concurrency group
* Add build check to avoid repeated compilation
* remove redundant code 
							
						 
						
							2023-07-25 12:12:00 +09:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zhao Changmin 
								
							 
						 
						
							
							
							
							
								
							
							
								af201052db 
								
							 
						 
						
							
							
								
								avoid malloc all missing keys in fp32 ( #8600 )  
							
							 
							
							
							
						 
						
							2023-07-25 09:48:51 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
							
							
								
							
							
								3f24202e4c 
								
							 
						 
						
							
							
								
								[LLM] Add more transformers int4 example (Llama 2)  ( #8602 )  
							
							 
							
							
							
						 
						
							2023-07-25 09:21:12 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jason Dai 
								
							 
						 
						
							
							
							
							
								
							
							
								0f8201c730 
								
							 
						 
						
							
							
								
								llm readme update ( #8595 )  
							
							 
							
							
							
						 
						
							2023-07-24 09:47:49 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
							
							
								
							
							
								ba42a6da63 
								
							 
						 
						
							
							
								
								[LLM] Set torch_dtype default value to 'auto' for transformers low bit from_pretrained API  
							
							 
							
							
							
						 
						
							2023-07-21 17:55:00 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
							
							
								
							
							
								bbde423349 
								
							 
						 
						
							
							
								
								[LLM] Add current Linux UT inference tests to nightly tests ( #8578 )  
							
							 
							
							... 
							
							
							
							* Add current inference uts to nightly tests
* Change test model from chatglm-6b to chatglm2-6b
* Add thread num env variable for nightly test
* Fix urls
* Small fix 
							
						 
						
							2023-07-21 13:26:38 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yang Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								feb3af0567 
								
							 
						 
						
							
							
								
								Optimize transformer int4 memory footprint ( #8579 )  
							
							 
							
							
							
						 
						
							2023-07-20 20:22:13 -07:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yang Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								57e880f63a 
								
							 
						 
						
							
							
								
								[LLM] use pytorch linear for large input matrix ( #8492 )  
							
							 
							
							... 
							
							
							
							* use pytorch linear for large input matrix
* only works on server
* fix style
* optimize memory
* first check server
* revert
* address comments
* fix style 
							
						 
						
							2023-07-20 09:54:25 -07:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
							
							
								
							
							
								6504e31a97 
								
							 
						 
						
							
							
								
								Small fix ( #8577 )  
							
							 
							
							
							
						 
						
							2023-07-20 16:37:04 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
							
							
								
							
							
								2266ca7d2b 
								
							 
						 
						
							
							
								
								[LLM] Small updates to transformers int4 ut ( #8574 )  
							
							 
							
							... 
							
							
							
							* Small fix to transformers int4 ut
* Small fix 
							
						 
						
							2023-07-20 13:20:25 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									xingyuan li 
								
							 
						 
						
							
							
							
							
								
							
							
								7b8d9c1b0d 
								
							 
						 
						
							
							
								
								[LLM] Add dependency file check in setup.py ( #8565 )  
							
							 
							
							... 
							
							
							
							* add package file check 
							
						 
						
							2023-07-20 14:20:08 +09:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									xingyuan li 
								
							 
						 
						
							
							
							
							
								
							
							
								2eeb653c75 
								
							 
						 
						
							
							
								
								fix llm build workflow misspell ( #8575 )  
							
							 
							
							
							
						 
						
							2023-07-20 12:08:54 +09:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Song Jiaming 
								
							 
						 
						
							
							
							
							
								
							
							
								411d896636 
								
							 
						 
						
							
							
								
								LLM first transformers UT ( #8514 )  
							
							 
							
							... 
							
							
							
							* ut
* transformers api first ut
* name
* dir issue
* use chatglm instead of chatglm2
* omp
* set omp in sh
* source
* taskset
* test
* test omp
* add test 
							
						 
						
							2023-07-20 10:16:27 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
							
							
								
							
							
								cad78740a7 
								
							 
						 
						
							
							
								
								[LLM] Small fixes to the Whisper transformers INT4 example ( #8573 )  
							
							 
							
							... 
							
							
							
							* Small fixes to the whisper example
* Small fix
* Small fix 
							
						 
						
							2023-07-20 10:11:33 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
							
							
								
							
							
								7a9fdf74df 
								
							 
						 
						
							
							
								
								[LLM] Add more transformers int4 example (Dolly v2)  ( #8571 )  
							
							 
							
							... 
							
							
							
							* add
* add trust_remote_mode 
							
						 
						
							2023-07-19 18:20:16 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zhao Changmin 
								
							 
						 
						
							
							
							
							
								
							
							
								e680af45ea 
								
							 
						 
						
							
							
								
								LLM: Optimize Langchain Pipeline ( #8561 )  
							
							 
							
							... 
							
							
							
							* LLM: Optimize Langchain Pipeline
* load in low bit 
							
						 
						
							2023-07-19 17:43:13 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Shengsheng Huang 
								
							 
						 
						
							
							
							
							
								
							
							
								616b7cb0a2 
								
							 
						 
						
							
							
								
								add more langchain examples ( #8542 )  
							
							 
							
							... 
							
							
							
							* update langchain descriptions
* add mathchain example
* update readme
* update readme 
							
						 
						
							2023-07-19 17:42:18 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								3bd1420b71 
								
							 
						 
						
							
							
								
								LLM: use MSVC to build avx-vnni binary files ( #8570 )  
							
							 
							
							
							
						 
						
							2023-07-19 17:38:14 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
							
							
								
							
							
								457571b44e 
								
							 
						 
						
							
							
								
								[LLM] Add more transformers int4 example (InternLM)  ( #8557 )  
							
							 
							
							
							
						 
						
							2023-07-19 15:15:38 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									xingyuan li 
								
							 
						 
						
							
							
							
							
								
							
							
								b6510fa054 
								
							 
						 
						
							
							
								
								fix move/download dll step ( #8564 )  
							
							 
							
							
							
						 
						
							2023-07-19 12:17:07 +09:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									xingyuan li 
								
							 
						 
						
							
							
							
							
								
							
							
								c52ed37745 
								
							 
						 
						
							
							
								
								fix starcoder dll name ( #8563 )  
							
							 
							
							
							
						 
						
							2023-07-19 11:55:06 +09:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zhao Changmin 
								
							 
						 
						
							
							
							
							
								
							
							
								3dbe3bf18e 
								
							 
						 
						
							
							
								
								transformer_int4 ( #8553 )  
							
							 
							
							
							
						 
						
							2023-07-19 08:33:58 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zhao Changmin 
								
							 
						 
						
							
							
							
							
								
							
							
								49d636e295 
								
							 
						 
						
							
							
								
								[LLM] whisper model transformer int4 verification and example ( #8511 )  
							
							 
							
							... 
							
							
							
							* LLM: transformer api support
* va
* example
* revert
* pep8
* pep8 
							
						 
						
							2023-07-19 08:33:20 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yina Chen 
								
							 
						 
						
							
							
							
							
								
							
							
								9a7bc17ca1 
								
							 
						 
						
							
							
								
								[LLM] llm supports vnni link on windows ( #8543 )  
							
							 
							
							... 
							
							
							
							* support win vnni link
* fix style
* fix style
* use isa_checker
* fix
* typo
* fix
* update 
							
						 
						
							2023-07-18 16:43:45 +08:00