Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								10e480ee96 
								
							 
						 
						
							
							
								
								refactor internlm and internlm2 ( #11274 )  
							
							 
							
							
							
						 
						
							2024-06-11 14:19:19 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								fac49f15e3 
								
							 
						 
						
							
							
								
								Remove manual importing ipex in all-in-one benchmark ( #11272 )  
							
							 
							
							
							
						 
						
							2024-06-11 09:32:13 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Wenjing Margaret Mao 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								70b17c87be 
								
							 
						 
						
							
							
								
								Merge multiple batches ( #11264 )  
							
							 
							
							... 
							
							
							
							* add merge steps
* move to pr mode
* remove build + add merge.py
* add tohtml and change cp
* change test_batch folder path
* change merge_temp path
* change to html folder
* revert
* change place
* revert 437
* revert space
---------
Co-authored-by: Yishuo Wang <yishuo.wang@intel.com> 
							
						 
						
							2024-06-07 18:38:45 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xiangyu Tian 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								4b07712fd8 
								
							 
						 
						
							
							
								
								LLM: Fix vLLM CPU model convert mismatch ( #11254 )  
							
							 
							
							... 
							
							
							
							Fix vLLM CPU model convert mismatch. 
							
						 
						
							2024-06-07 15:54:34 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								42fab480ea 
								
							 
						 
						
							
							
								
								support stablm2 12b ( #11265 )  
							
							 
							
							
							
						 
						
							2024-06-07 15:46:00 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xin Qiu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								dbc3c2d72d 
								
							 
						 
						
							
							
								
								glm4 sdp ( #11253 )  
							
							 
							
							... 
							
							
							
							* glm4 sdp
* fix style
* update comment 
							
						 
						
							2024-06-07 15:42:23 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xin Qiu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								151fcf37bb 
								
							 
						 
						
							
							
								
								check devie name in use_flash_attention ( #11263 )  
							
							 
							
							
							
						 
						
							2024-06-07 15:07:47 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								2623944604 
								
							 
						 
						
							
							
								
								qwen2 sdpa small fix ( #11261 )  
							
							 
							
							
							
						 
						
							2024-06-07 14:42:18 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								ea0d03fd28 
								
							 
						 
						
							
							
								
								Refactor baichuan1 7B and 13B ( #11258 )  
							
							 
							
							
							
						 
						
							2024-06-07 14:29:20 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Qiyuan Gong 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								1aa9c9597a 
								
							 
						 
						
							
							
								
								Avoid duplicate import in IPEX auto importer ( #11227 )  
							
							 
							
							... 
							
							
							
							* Add custom import to avoid ipex duplicate importing
* Add scope limitation 
							
						 
						
							2024-06-07 14:08:00 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Wang, Jian4 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								6f2684e5c9 
								
							 
						 
						
							
							
								
								Update pp llama.py to save memory ( #11233 )  
							
							 
							
							
							
						 
						
							2024-06-07 13:18:16 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								ef8e9b2ecd 
								
							 
						 
						
							
							
								
								Refactor qwen2 moe ( #11244 )  
							
							 
							
							
							
						 
						
							2024-06-07 13:14:54 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zijie Li 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								7b753dc8ca 
								
							 
						 
						
							
							
								
								Update sample output for HF Qwen2 GPU and CPU ( #11257 )  
							
							 
							
							
							
						 
						
							2024-06-07 11:36:22 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zhao Changmin 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								b7948671de 
								
							 
						 
						
							
							
								
								[WIP] Add look up table in 1st token stage ( #11193 )  
							
							 
							
							... 
							
							
							
							* lookuptb 
							
						 
						
							2024-06-07 10:51:05 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								8c36b5bdde 
								
							 
						 
						
							
							
								
								Add qwen2 example ( #11252 )  
							
							 
							
							... 
							
							
							
							* Add GPU example for Qwen2
* Update comments in README
* Update README for Qwen2 GPU example
* Add CPU example for Qwen2
Sample Output under README pending
* Update generate.py and README for CPU Qwen2
* Update GPU example for Qwen2
* Small update
* Small fix
* Add Qwen2 table
* Update README for Qwen2 CPU and GPU
Update sample output under README
---------
Co-authored-by: Zijie Li <michael20001122@gmail.com> 
							
						 
						
							2024-06-07 10:29:33 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Shaojun Liu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								85df5e7699 
								
							 
						 
						
							
							
								
								fix nightly perf test ( #11251 )  
							
							 
							
							
							
						 
						
							2024-06-07 09:33:14 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xin Qiu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								2f809116e2 
								
							 
						 
						
							
							
								
								optimize Chatglm4 ( #11239 )  
							
							 
							
							... 
							
							
							
							* chatglm4
* update
* update
* add rms norm
* chatglm4 
							
						 
						
							2024-06-06 18:25:20 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									hxsz1997 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								b6234eb4e2 
								
							 
						 
						
							
							
								
								Add task in allinone ( #11226 )  
							
							 
							
							... 
							
							
							
							* add task
* update prompt
* modify typos
* add more cases in summarize
* Make the summarize & QA prompt preprocessing as a util function 
							
						 
						
							2024-06-06 17:22:40 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								2e4ccd541c 
								
							 
						 
						
							
							
								
								fix qwen2 cpu ( #11240 )  
							
							 
							
							
							
						 
						
							2024-06-06 16:24:19 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								e738ec38f4 
								
							 
						 
						
							
							
								
								disable quantize kv in specific qwen model ( #11238 )  
							
							 
							
							
							
						 
						
							2024-06-06 14:08:39 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								c4e5806e01 
								
							 
						 
						
							
							
								
								add latest optimization in starcoder2 ( #11236 )  
							
							 
							
							
							
						 
						
							2024-06-06 14:02:17 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								ba27e750b1 
								
							 
						 
						
							
							
								
								refactor yuan2 ( #11235 )  
							
							 
							
							
							
						 
						
							2024-06-06 13:17:54 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Shaojun Liu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								6be24fdd28 
								
							 
						 
						
							
							
								
								OSPDT: add tpp licenses ( #11165 )  
							
							 
							
							... 
							
							
							
							* add tpp licenses
* add licenses
* add licenses
* delete mitchellh-mapstructure license
* delete stb-image public domain license
* add README.md
* remove core-xe related licenses 
							
						 
						
							2024-06-06 10:59:06 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Guoqiong Song 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								09c6780d0c 
								
							 
						 
						
							
							
								
								phi-2 transformers 4.37 ( #11161 )  
							
							 
							
							... 
							
							
							
							* phi-2 transformers 4.37 
							
						 
						
							2024-06-05 13:36:41 -07:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Guoqiong Song 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								f6d5c6af78 
								
							 
						 
						
							
							
								
								fix issue 1407 ( #11171 )  
							
							 
							
							
							
						 
						
							2024-06-05 13:35:57 -07:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zijie Li 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								bfa1367149 
								
							 
						 
						
							
							
								
								Add CPU and GPU example for MiniCPM ( #11202 )  
							
							 
							
							... 
							
							
							
							* Change installation address
Change former address: "https://docs.conda.io/en/latest/miniconda.html# " to new address: "https://conda-forge.org/download/ " for 63 occurrences under python\llm\example
* Change Prompt
Change "Anaconda Prompt" to "Miniforge Prompt" for 1 occurrence
* Create and update model minicpm
* Update model minicpm
Update model minicpm under GPU/PyTorch-Models
* Update readme and generate.py
change "prompt = tokenizer.apply_chat_template(chat, tokenize=False, add_generation_prompt=False)" and delete "pip install transformers==4.37.0
"
* Update comments for minicpm GPU
Update comments for generate.py at minicpm GPU
* Add CPU example for MiniCPM
* Update minicpm README for CPU
* Update README for MiniCPM and Llama3
* Update Readme for Llama3 CPU Pytorch
* Update and fix comments for MiniCPM 
							
						 
						
							2024-06-05 18:09:53 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								af96579c76 
								
							 
						 
						
							
							
								
								Update installation guide for pipeline parallel inference ( #11224 )  
							
							 
							
							... 
							
							
							
							* Update installation guide for pipeline parallel inference
* Small fix
* further fix
* Small fix
* Small fix
* Update based on comments
* Small fix
* Small fix
* Small fix 
							
						 
						
							2024-06-05 17:54:29 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yina Chen 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								ed67435491 
								
							 
						 
						
							
							
								
								Support Fp6 k in ipex-llm ( #11222 )  
							
							 
							
							... 
							
							
							
							* support fp6_k
* support fp6_k
* remove
* fix style 
							
						 
						
							2024-06-05 17:34:36 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								a6674f5bce 
								
							 
						 
						
							
							
								
								Fix should_use_fuse_rope error of Qwen1.5-MoE-A2.7B-Chat  ( #11216 )  
							
							 
							
							
							
						 
						
							2024-06-05 15:56:10 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Wenjing Margaret Mao 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								231b968aba 
								
							 
						 
						
							
							
								
								Modify the check_results.py to support batch 2&4 ( #11133 )  
							
							 
							
							... 
							
							
							
							* add batch 2&4 and exclude to perf_test
* modify the perf-test&437 yaml
* modify llm_performance_test.yml
* remove batch 4
* modify check_results.py to support batch 2&4
* change the batch_size format
* remove genxir
* add str(batch_size)
* change actual_test_casese in check_results file to support batch_size
* change html highlight
* less models to test html and html_path
* delete the moe model
* split batch html
* split
* use installing from pypi
* use installing from pypi - batch2
* revert cpp
* revert cpp
* merge two jobs into one, test batch_size in one job
* merge two jobs into one, test batch_size in one job
* change file directory in workflow
* try catch deal with odd file without batch_size
* modify pandas version
* change the dir
* organize the code
* organize the code
* remove Qwen-MOE
* modify based on feedback
* modify based on feedback
* modify based on second round of feedback
* modify based on second round of feedback + change run-arc.sh mode
* modify based on second round of feedback + revert config
* modify based on second round of feedback + revert config
* modify based on second round of feedback + remove comments
* modify based on second round of feedback + remove comments
* modify based on second round of feedback + revert arc-perf-test
* modify based on third round of feedback
* change error type
* change error type
* modify check_results.html
* split batch into two folders
* add all models
* move csv_name
* revert pr test
* revert pr test
---------
Co-authored-by: Yishuo Wang <yishuo.wang@intel.com> 
							
						 
						
							2024-06-05 15:04:55 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xin Qiu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								566691c5a3 
								
							 
						 
						
							
							
								
								quantized attention forward for minicpm ( #11200 )  
							
							 
							
							... 
							
							
							
							* quantized minicpm
* fix style check 
							
						 
						
							2024-06-05 09:15:25 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jiao Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								bb83bc23fd 
								
							 
						 
						
							
							
								
								Fix Starcoder issue on CPU on transformers 4.36+ ( #11190 )  
							
							 
							
							... 
							
							
							
							* fix starcoder for sdpa
* update
* style 
							
						 
						
							2024-06-04 10:05:40 -07:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Kai Huang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								f93664147c 
								
							 
						 
						
							
							
								
								Update config.yaml ( #11208 )  
							
							 
							
							... 
							
							
							
							* update config.yaml
* fix
* minor
* style 
							
						 
						
							2024-06-04 19:58:18 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xiangyu Tian 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								ac3d53ff5d 
								
							 
						 
						
							
							
								
								LLM: Fix vLLM CPU version error ( #11206 )  
							
							 
							
							... 
							
							
							
							Fix vLLM CPU version error 
							
						 
						
							2024-06-04 19:10:23 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								1dde204775 
								
							 
						 
						
							
							
								
								update q6k ( #11205 )  
							
							 
							
							
							
						 
						
							2024-06-04 17:14:33 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Qiyuan Gong 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								ce3f08b25a 
								
							 
						 
						
							
							
								
								Fix IPEX auto importer ( #11192 )  
							
							 
							
							... 
							
							
							
							* Fix ipex auto importer with Python builtins.
* Raise errors if the user imports ipex manually before importing ipex_llm. Do nothing if they import ipex after importing ipex_llm.
* Remove import ipex in examples. 
							
						 
						
							2024-06-04 16:57:18 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yina Chen 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								711fa0199e 
								
							 
						 
						
							
							
								
								Fix fp6k phi3 ppl core dump ( #11204 )  
							
							 
							
							
							
						 
						
							2024-06-04 16:44:27 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xiangyu Tian 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								f02f097002 
								
							 
						 
						
							
							
								
								Fix vLLM verion in CPU/vLLM-Serving example README ( #11201 )  
							
							 
							
							
							
						 
						
							2024-06-04 15:56:55 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								6454655dcc 
								
							 
						 
						
							
							
								
								use sdp in baichuan2 13b ( #11198 )  
							
							 
							
							
							
						 
						
							2024-06-04 15:39:00 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								d90cd977d0 
								
							 
						 
						
							
							
								
								refactor stablelm ( #11195 )  
							
							 
							
							
							
						 
						
							2024-06-04 13:14:43 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zijie Li 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								a644e9409b 
								
							 
						 
						
							
							
								
								Miniconda/Anaconda -> Miniforge update in examples ( #11194 )  
							
							 
							
							... 
							
							
							
							* Change installation address
Change former address: "https://docs.conda.io/en/latest/miniconda.html# " to new address: "https://conda-forge.org/download/ " for 63 occurrences under python\llm\example
* Change Prompt
Change "Anaconda Prompt" to "Miniforge Prompt" for 1 occurrence 
							
						 
						
							2024-06-04 10:14:02 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xin Qiu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								5f13700c9f 
								
							 
						 
						
							
							
								
								optimize Minicpm ( #11189 )  
							
							 
							
							... 
							
							
							
							* minicpm optimize
* update 
							
						 
						
							2024-06-03 18:28:29 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Qiyuan Gong 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								15a6205790 
								
							 
						 
						
							
							
								
								Fix LoRA tokenizer for Llama and chatglm ( #11186 )  
							
							 
							
							... 
							
							
							
							* Set pad_token to eos_token if it's None. Otherwise, use model config. 
							
						 
						
							2024-06-03 15:35:38 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Cengguang Zhang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								3eb13ccd8c 
								
							 
						 
						
							
							
								
								LLM: fix input length condition in deepspeed all-in-one benchmark. ( #11185 )  
							
							 
							
							
							
						 
						
							2024-06-03 10:05:43 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Shaojun Liu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								401013a630 
								
							 
						 
						
							
							
								
								Remove chatglm_C Module to Eliminate LGPL Dependency ( #11178 )  
							
							 
							
							... 
							
							
							
							* remove chatglm_C.**.pyd to solve ngsolve weak copyright vunl
* fix style check error
* remove chatglm native int4 from langchain 
							
						 
						
							2024-05-31 17:03:11 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								50b5f4476f 
								
							 
						 
						
							
							
								
								update q4k convert ( #11179 )  
							
							 
							
							
							
						 
						
							2024-05-31 11:36:53 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Wang, Jian4 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								c0f1be6aea 
								
							 
						 
						
							
							
								
								Fix pp logic ( #11175 )  
							
							 
							
							... 
							
							
							
							* only send no none batch and rank1-n sending first
* always send first 
							
						 
						
							2024-05-30 16:40:59 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									ZehuaCao 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								4127b99ed6 
								
							 
						 
						
							
							
								
								Fix null pointer dereferences error. ( #11125 )  
							
							 
							
							... 
							
							
							
							* delete unused function on tgi_server
* update
* update
* fix style 
							
						 
						
							2024-05-30 16:16:10 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Guancheng Fu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								50ee004ac7 
								
							 
						 
						
							
							
								
								Fix vllm condition ( #11169 )  
							
							 
							
							... 
							
							
							
							* add use-vllm
* done
* fix style
* fix done 
							
						 
						
							2024-05-30 15:23:17 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jin Qiao 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								dcbf4d3d0a 
								
							 
						 
						
							
							
								
								Add phi-3-vision example ( #11156 )  
							
							 
							
							... 
							
							
							
							* Add phi-3-vision example (HF-Automodels)
* fix
* fix
* fix
* Add phi-3-vision CPU example (HF-Automodels)
* add in readme
* fix
* fix
* fix
* fix
* use fp8 for gpu example
* remove eval 
							
						 
						
							2024-05-30 10:02:47 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jiao Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								93146b9433 
								
							 
						 
						
							
							
								
								Reconstruct Speculative Decoding example directory ( #11136 )  
							
							 
							
							... 
							
							
							
							* update
* update
* update 
							
						 
						
							2024-05-29 13:15:27 -07:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xiangyu Tian 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								2299698b45 
								
							 
						 
						
							
							
								
								Refine Pipeline Parallel FastAPI example ( #11168 )  
							
							 
							
							
							
						 
						
							2024-05-29 17:16:50 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								9bfbf78bf4 
								
							 
						 
						
							
							
								
								update api usage of xe_batch & fp16 ( #11164 )  
							
							 
							
							... 
							
							
							
							* update api usage
* update setup.py 
							
						 
						
							2024-05-29 15:15:14 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yina Chen 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								e29e2f1c78 
								
							 
						 
						
							
							
								
								Support new fp8 e4m3 ( #11158 )  
							
							 
							
							
							
						 
						
							2024-05-29 14:27:14 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Wang, Jian4 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								8e25de1126 
								
							 
						 
						
							
							
								
								LLM: Add codegeex2 example ( #11143 )  
							
							 
							
							... 
							
							
							
							* add codegeex example
* update
* update cpu
* add GPU
* add gpu
* update readme 
							
						 
						
							2024-05-29 10:00:26 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									ZehuaCao 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								751e1a4e29 
								
							 
						 
						
							
							
								
								Fix concurrent issue in autoTP streming. ( #11150 )  
							
							 
							
							... 
							
							
							
							* add benchmark test
* update 
							
						 
						
							2024-05-29 08:22:38 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								bc5008f0d5 
								
							 
						 
						
							
							
								
								disable sdp_causal in phi-3 to fix overflow ( #11157 )  
							
							 
							
							
							
						 
						
							2024-05-28 17:25:53 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									SONG Ge 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								33852bd23e 
								
							 
						 
						
							
							
								
								Refactor pipeline parallel device config ( #11149 )  
							
							 
							
							... 
							
							
							
							* refactor pipeline parallel device config
* meet comments
* update example
* add warnings and update code doc 
							
						 
						
							2024-05-28 16:52:46 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									hxsz1997 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								62b2d8af6b 
								
							 
						 
						
							
							
								
								Add lookahead in all-in-one ( #11142 )  
							
							 
							
							... 
							
							
							
							* add lookahead in allinone
* delete save to csv in run_transformer_int4_gpu
* change lookup to lookahead
* fix the error of add model.peak_memory
* Set transformer_int4_gpu as the default option
* add comment of transformer_int4_fp16_lookahead_gpu 
							
						 
						
							2024-05-28 15:39:58 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xiangyu Tian 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								b44cf405e2 
								
							 
						 
						
							
							
								
								Refine Pipeline-Parallel-Fastapi example README ( #11155 )  
							
							 
							
							
							
						 
						
							2024-05-28 15:18:21 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								d307622797 
								
							 
						 
						
							
							
								
								fix first token sdp with batch ( #11153 )  
							
							 
							
							
							
						 
						
							2024-05-28 15:03:06 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yina Chen 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								3464440839 
								
							 
						 
						
							
							
								
								fix qwen import error ( #11154 )  
							
							 
							
							
							
						 
						
							2024-05-28 14:50:12 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jin Qiao 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								25b6402315 
								
							 
						 
						
							
							
								
								Add Windows GPU unit test ( #11050 )  
							
							 
							
							... 
							
							
							
							* Add Windows GPU UT
* temporarily remove ut on arc
* retry
* retry
* retry
* fix
* retry
* retry
* fix
* retry
* retry
* retry
* retry
* retry
* retry
* retry
* retry
* retry
* retry
* retry
* retry
* retry
* fix
* retry
* retry
* retry
* retry
* retry
* retry
* merge into single workflow
* retry inference test
* retry
* retrigger
* try to fix inference test
* retry
* retry
* retry
* retry
* retry
* retry
* retry
* retry
* retry
* retry
* retry
* check lower_bound
* retry
* retry
* try example test
* try fix example test
* retry
* fix
* seperate function into shell script
* remove cygpath
* try remove all cygpath
* retry
* retry
* Revert "try remove all cygpath"
This reverts commit 7ceeff3e48f08429062ecef548c1a3ad3488756f.
* Revert "retry"
This reverts commit 40ea2457843bff6991b8db24316cde5de1d35418.
* Revert "retry"
This reverts commit 817d0db3e5aec3bd449d3deaf4fb01d3ecfdc8a3.
* enable ut
* fix
* retrigger
* retrigger
* update download url
* fix
* fix
* retry
* add comment
* fix 
							
						 
						
							2024-05-28 13:29:47 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yina Chen 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								b6b70d1ba0 
								
							 
						 
						
							
							
								
								Divide core-xe packages ( #11131 )  
							
							 
							
							... 
							
							
							
							* temp
* add batch
* fix style
* update package name
* fix style
* add workflow
* use temp version to run uts
* trigger performance test
* trigger win igpu perf
* revert workflow & setup 
							
						 
						
							2024-05-28 12:00:18 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								c9168b85b7 
								
							 
						 
						
							
							
								
								Fix error during merging adapter ( #11145 )  
							
							 
							
							
							
						 
						
							2024-05-27 19:41:42 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Guancheng Fu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								daf7b1cd56 
								
							 
						 
						
							
							
								
								[Docker] Fix image using two cards error ( #11144 )  
							
							 
							
							... 
							
							
							
							* fix all
* done 
							
						 
						
							2024-05-27 16:20:13 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xiangyu Tian 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								5c8ccf0ba9 
								
							 
						 
						
							
							
								
								LLM: Add Pipeline-Parallel-FastAPI example ( #10917 )  
							
							 
							
							... 
							
							
							
							Add multi-stage Pipeline-Parallel-FastAPI example
---------
Co-authored-by: hzjane <a1015616934@qq.com> 
							
						 
						
							2024-05-27 14:46:29 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								d550af957a 
								
							 
						 
						
							
							
								
								fix security issue of eagle ( #11140 )  
							
							 
							
							... 
							
							
							
							* fix security issue of eagle
* small fix 
							
						 
						
							2024-05-27 10:15:28 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								367de141f2 
								
							 
						 
						
							
							
								
								Fix mixtral-8x7b with transformers=4.37.0 ( #11132 )  
							
							 
							
							
							
						 
						
							2024-05-27 09:50:54 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jean Yu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								ab476c7fe2 
								
							 
						 
						
							
							
								
								Eagle Speculative Sampling examples ( #11104 )  
							
							 
							
							... 
							
							
							
							* Eagle Speculative Sampling examples
* rm multi-gpu and ray content
* updated README to include Arc A770 
							
						 
						
							2024-05-24 11:13:43 -07:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Guancheng Fu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								fabc395d0d 
								
							 
						 
						
							
							
								
								add langchain vllm interface ( #11121 )  
							
							 
							
							... 
							
							
							
							* done
* fix
* fix
* add vllm
* add langchain vllm exampels
* add docs
* temp 
							
						 
						
							2024-05-24 17:19:27 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									ZehuaCao 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								63e95698eb 
								
							 
						 
						
							
							
								
								[LLM]Reopen autotp generate_stream ( #11120 )  
							
							 
							
							... 
							
							
							
							* reopen autotp generate_stream
* fix style error
* update 
							
						 
						
							2024-05-24 17:16:14 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								1dc680341b 
								
							 
						 
						
							
							
								
								fix phi-3-vision import ( #11129 )  
							
							 
							
							
							
						 
						
							2024-05-24 15:57:15 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Guancheng Fu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								7f772c5a4f 
								
							 
						 
						
							
							
								
								Add half precision for fastchat models ( #11130 )  
							
							 
							
							
							
						 
						
							2024-05-24 15:41:14 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zhao Changmin 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								65f4212f89 
								
							 
						 
						
							
							
								
								Fix qwen 14b run into register attention fwd ( #11128 )  
							
							 
							
							... 
							
							
							
							* fix qwen 14b 
							
						 
						
							2024-05-24 14:45:07 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Shaojun Liu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								373f9e6c79 
								
							 
						 
						
							
							
								
								add ipex-llm-init.bat for Windows ( #11082 )  
							
							 
							
							... 
							
							
							
							* add ipex-llm-init.bat for Windows
* update setup.py 
							
						 
						
							2024-05-24 14:26:25 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Qiyuan Gong 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								120a0035ac 
								
							 
						 
						
							
							
								
								Fix type mismatch in eval for Baichuan2 QLora example ( #11117 )  
							
							 
							
							... 
							
							
							
							* During the evaluation stage, Baichuan2 will raise type mismatch when training with bfloat16. Fix this issue by modifying modeling_baichuan.py. Add doc about how to modify this file. 
							
						 
						
							2024-05-24 14:14:30 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								1db9d9a63b 
								
							 
						 
						
							
							
								
								optimize internlm2 xcomposer agin ( #11124 )  
							
							 
							
							
							
						 
						
							2024-05-24 13:44:52 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								9372ce87ce 
								
							 
						 
						
							
							
								
								fix internlm xcomposer2 fp16 ( #11123 )  
							
							 
							
							
							
						 
						
							2024-05-24 11:03:31 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Cengguang Zhang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								011b9faa5c 
								
							 
						 
						
							
							
								
								LLM: unify baichuan2-13b alibi mask dtype with model dtype. ( #11107 )  
							
							 
							
							... 
							
							
							
							* LLM: unify alibi mask dtype.
* fix comments. 
							
						 
						
							2024-05-24 10:27:53 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jiao Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								0a06a6e1d4 
								
							 
						 
						
							
							
								
								Update tests for transformers 4.36 ( #10858 )  
							
							 
							
							... 
							
							
							
							* update unit test
* update
* update
* update
* update
* update
* fix gpu attention test
* update
* update
* update
* update
* update
* update
* update example test
* replace replit code
* update
* update
* update
* update
* set safe_serialization false
* perf test
* update
* update
* update
* update
* update
* update
* update
* update
* update
* update
* update
* update
* update
* update
* update
* update
* update
* update
* update
* update
* update
* update
* update
* update
* update
* update
* update
* delete
* update
* update
* update
* update
* update
* update
* revert
* update 
							
						 
						
							2024-05-24 10:26:38 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xiangyu Tian 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								b3f6faa038 
								
							 
						 
						
							
							
								
								LLM: Add CPU vLLM entrypoint ( #11083 )  
							
							 
							
							... 
							
							
							
							Add CPU vLLM entrypoint and update CPU vLLM serving example. 
							
						 
						
							2024-05-24 09:16:59 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								797dbc48b8 
								
							 
						 
						
							
							
								
								fix phi-2 and phi-3 convert ( #11116 )  
							
							 
							
							
							
						 
						
							2024-05-23 17:37:37 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								37b98a531f 
								
							 
						 
						
							
							
								
								support running internlm xcomposer2 on gpu and add sdp optimization ( #11115 )  
							
							 
							
							
							
						 
						
							2024-05-23 17:26:24 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zhao Changmin 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								c5e8b90c8d 
								
							 
						 
						
							
							
								
								Add Qwen register attention implemention ( #11110 )  
							
							 
							
							... 
							
							
							
							* qwen_register 
							
						 
						
							2024-05-23 17:17:45 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								0e53f20edb 
								
							 
						 
						
							
							
								
								support running internlm-xcomposer2 on cpu ( #11111 )  
							
							 
							
							
							
						 
						
							2024-05-23 16:36:09 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								d36b41d59e 
								
							 
						 
						
							
							
								
								Add setuptools limitation for ipex-llm[xpu] ( #11102 )  
							
							 
							
							... 
							
							
							
							* Add setuptool limitation for ipex-llm[xpu]
* llamaindex option update 
							
						 
						
							2024-05-22 18:20:30 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								cd4dff09ee 
								
							 
						 
						
							
							
								
								support phi-3 vision ( #11101 )  
							
							 
							
							
							
						 
						
							2024-05-22 17:43:50 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zhao Changmin 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								15d906a97b 
								
							 
						 
						
							
							
								
								Update linux igpu run script ( #11098 )  
							
							 
							
							... 
							
							
							
							* update run script 
							
						 
						
							2024-05-22 17:18:07 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Kai Huang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								f63172ef63 
								
							 
						 
						
							
							
								
								Align ppl with llama.cpp ( #11055 )  
							
							 
							
							... 
							
							
							
							* update script
* remove
* add header
* update readme 
							
						 
						
							2024-05-22 16:43:11 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Qiyuan Gong 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								f6c9ffe4dc 
								
							 
						 
						
							
							
								
								Add WANDB_MODE and HF_HUB_OFFLINE to XPU finetune README ( #11097 )  
							
							 
							
							... 
							
							
							
							* Add WANDB_MODE=offline to avoid multi-GPUs finetune errors.
* Add HF_HUB_OFFLINE=1 to avoid Hugging Face related errors. 
							
						 
						
							2024-05-22 15:20:53 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Shaojun Liu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								584439e498 
								
							 
						 
						
							
							
								
								update homepage url for ipex-llm ( #11094 )  
							
							 
							
							... 
							
							
							
							* update homepage url
* Update python version to 3.11
* Update long description 
							
						 
						
							2024-05-22 11:10:44 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xin Qiu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								71bcd18f44 
								
							 
						 
						
							
							
								
								fix qwen vl ( #11090 )  
							
							 
							
							
							
						 
						
							2024-05-21 18:40:29 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								f00625f9a4 
								
							 
						 
						
							
							
								
								refactor qwen2 ( #11087 )  
							
							 
							
							
							
						 
						
							2024-05-21 16:53:42 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Qiyuan Gong 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								492ed3fd41 
								
							 
						 
						
							
							
								
								Add verified models to GPU finetune README ( #11088 )  
							
							 
							
							... 
							
							
							
							* Add verified models to GPU finetune README 
							
						 
						
							2024-05-21 15:49:15 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Qiyuan Gong 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								1210491748 
								
							 
						 
						
							
							
								
								ChatGLM3, Baichuan2 and Qwen1.5 QLoRA example ( #11078 )  
							
							 
							
							... 
							
							
							
							* Add chatglm3, qwen15-7b and baichuan-7b QLoRA alpaca example
* Remove unnecessary tokenization setting. 
							
						 
						
							2024-05-21 15:29:43 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									ZehuaCao 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								842d6dfc2d 
								
							 
						 
						
							
							
								
								Further Modify CPU example ( #11081 )  
							
							 
							
							... 
							
							
							
							* modify CPU example
* update 
							
						 
						
							2024-05-21 13:55:47 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								d830a63bb7 
								
							 
						 
						
							
							
								
								refactor qwen ( #11074 )  
							
							 
							
							
							
						 
						
							2024-05-20 18:08:37 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Wang, Jian4 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								74950a152a 
								
							 
						 
						
							
							
								
								Fix tgi_api_server error file name ( #11075 )  
							
							 
							
							
							
						 
						
							2024-05-20 16:48:40 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								4e97047d70 
								
							 
						 
						
							
							
								
								fix baichuan2 13b fp16 ( #11071 )  
							
							 
							
							
							
						 
						
							2024-05-20 11:21:20 +08:00