ipex-llm

Yishuo Wang ad96f32ce0 optimize phi3 1st token performance (#10981 )	2024-05-10 17:33:46 +08:00
..
__init__.py	Refactor bigdl.llm to ipex_llm (#24 )	2024-03-22 15:41:21 +08:00
aquila.py	LLM: fix get env KV_CACHE_ALLOC_BLOCK_LENGTH type. (#10771 )	2024-04-16 09:32:30 +08:00
baichuan.py	remove new_layout parameter (#10906 )	2024-04-29 10:31:50 +08:00
baichuan2.py	LLM: add long-context support for Qwen1.5-7B/Baichuan2-7B/Mistral-7B. (#10937 )	2024-05-10 16:40:15 +08:00
bert.py	Refactor bigdl.llm to ipex_llm (#24 )	2024-03-22 15:41:21 +08:00
bloom.py	LLM: fix get env KV_CACHE_ALLOC_BLOCK_LENGTH type. (#10771 )	2024-04-16 09:32:30 +08:00
chatglm.py	LLM: fix get env KV_CACHE_ALLOC_BLOCK_LENGTH type. (#10771 )	2024-04-16 09:32:30 +08:00
chatglm2.py	LLM: update split tensor conditions. (#10872 )	2024-04-30 17:07:21 +08:00
chatglm2_32k.py	LLM: fix get env KV_CACHE_ALLOC_BLOCK_LENGTH type. (#10771 )	2024-04-16 09:32:30 +08:00
cohere.py	LLM: Optimize cohere model (#10878 )	2024-05-07 10:19:50 +08:00
decilm.py	LLM: fix get env KV_CACHE_ALLOC_BLOCK_LENGTH type. (#10771 )	2024-04-16 09:32:30 +08:00
falcon.py	LLM: fix get env KV_CACHE_ALLOC_BLOCK_LENGTH type. (#10771 )	2024-04-16 09:32:30 +08:00
gemma.py	remove rms norm copy (#10793 )	2024-04-19 13:57:48 +08:00
gptbigcode.py	Refactor bigdl.llm to ipex_llm (#24 )	2024-03-22 15:41:21 +08:00
gptj.py	LLM: fix get env KV_CACHE_ALLOC_BLOCK_LENGTH type. (#10771 )	2024-04-16 09:32:30 +08:00
gptneox.py	LLM: fix get env KV_CACHE_ALLOC_BLOCK_LENGTH type. (#10771 )	2024-04-16 09:32:30 +08:00
internlm.py	LLM: fix get env KV_CACHE_ALLOC_BLOCK_LENGTH type. (#10771 )	2024-04-16 09:32:30 +08:00
llama.py	LLM: update split tensor conditions. (#10872 )	2024-04-30 17:07:21 +08:00
mistral.py	LLM: add long-context support for Qwen1.5-7B/Baichuan2-7B/Mistral-7B. (#10937 )	2024-05-10 16:40:15 +08:00
mixtral.py	LLM: Add mixtral speculative CPU example (#10830 )	2024-04-23 10:05:51 +08:00
mpt.py	LLM: fix get env KV_CACHE_ALLOC_BLOCK_LENGTH type. (#10771 )	2024-04-16 09:32:30 +08:00
phi.py	remove new_layout parameter (#10906 )	2024-04-29 10:31:50 +08:00
phi3.py	optimize phi3 1st token performance (#10981 )	2024-05-10 17:33:46 +08:00
phixtral.py	Disable fast fused rope on UHD (#10780 )	2024-04-18 10:03:53 +08:00
qwen.py	remove new_layout parameter (#10906 )	2024-04-29 10:31:50 +08:00
qwen2.py	LLM: add long-context support for Qwen1.5-7B/Baichuan2-7B/Mistral-7B. (#10937 )	2024-05-10 16:40:15 +08:00
qwen2_moe.py	use new quantize kv (#10888 )	2024-04-26 14:42:17 +08:00
qwen_vl.py	Disable fast fused rope on UHD (#10780 )	2024-04-18 10:03:53 +08:00
rwkv4.py	fix rwkv with pip installer (#10591 )	2024-03-29 17:56:45 +08:00
rwkv5.py	disable rwkv5 fp16 (#10699 )	2024-04-09 16:42:11 +08:00
stablelm.py	use new quantize kv (#10888 )	2024-04-26 14:42:17 +08:00
starcoder2.py	remove new_layout parameter (#10906 )	2024-04-29 10:31:50 +08:00
utils.py	optimize phi3 1st token performance (#10981 )	2024-05-10 17:33:46 +08:00
yuan.py	use new quantize kv (#10888 )	2024-04-26 14:42:17 +08:00

__init__.py

Refactor bigdl.llm to ipex_llm (#24 )

2024-03-22 15:41:21 +08:00

aquila.py

LLM: fix get env KV_CACHE_ALLOC_BLOCK_LENGTH type. (#10771 )

2024-04-16 09:32:30 +08:00

baichuan.py

remove new_layout parameter (#10906 )

2024-04-29 10:31:50 +08:00

baichuan2.py

LLM: add long-context support for Qwen1.5-7B/Baichuan2-7B/Mistral-7B. (#10937 )

2024-05-10 16:40:15 +08:00

bert.py

Refactor bigdl.llm to ipex_llm (#24 )

2024-03-22 15:41:21 +08:00

bloom.py

LLM: fix get env KV_CACHE_ALLOC_BLOCK_LENGTH type. (#10771 )

2024-04-16 09:32:30 +08:00

chatglm.py

LLM: fix get env KV_CACHE_ALLOC_BLOCK_LENGTH type. (#10771 )

2024-04-16 09:32:30 +08:00

chatglm2.py

LLM: update split tensor conditions. (#10872 )

2024-04-30 17:07:21 +08:00

chatglm2_32k.py

LLM: fix get env KV_CACHE_ALLOC_BLOCK_LENGTH type. (#10771 )

2024-04-16 09:32:30 +08:00

cohere.py

LLM: Optimize cohere model (#10878 )

2024-05-07 10:19:50 +08:00

decilm.py

LLM: fix get env KV_CACHE_ALLOC_BLOCK_LENGTH type. (#10771 )

2024-04-16 09:32:30 +08:00

falcon.py

LLM: fix get env KV_CACHE_ALLOC_BLOCK_LENGTH type. (#10771 )

2024-04-16 09:32:30 +08:00

gemma.py

remove rms norm copy (#10793 )

2024-04-19 13:57:48 +08:00

gptbigcode.py

Refactor bigdl.llm to ipex_llm (#24 )

2024-03-22 15:41:21 +08:00

gptj.py

LLM: fix get env KV_CACHE_ALLOC_BLOCK_LENGTH type. (#10771 )

2024-04-16 09:32:30 +08:00

gptneox.py

LLM: fix get env KV_CACHE_ALLOC_BLOCK_LENGTH type. (#10771 )

2024-04-16 09:32:30 +08:00

internlm.py

LLM: fix get env KV_CACHE_ALLOC_BLOCK_LENGTH type. (#10771 )

2024-04-16 09:32:30 +08:00

llama.py

LLM: update split tensor conditions. (#10872 )

2024-04-30 17:07:21 +08:00

mistral.py

LLM: add long-context support for Qwen1.5-7B/Baichuan2-7B/Mistral-7B. (#10937 )

2024-05-10 16:40:15 +08:00

mixtral.py

LLM: Add mixtral speculative CPU example (#10830 )

2024-04-23 10:05:51 +08:00

mpt.py

LLM: fix get env KV_CACHE_ALLOC_BLOCK_LENGTH type. (#10771 )

2024-04-16 09:32:30 +08:00

phi.py

remove new_layout parameter (#10906 )

2024-04-29 10:31:50 +08:00

phi3.py

optimize phi3 1st token performance (#10981 )

2024-05-10 17:33:46 +08:00

phixtral.py

Disable fast fused rope on UHD (#10780 )

2024-04-18 10:03:53 +08:00

qwen.py

remove new_layout parameter (#10906 )

2024-04-29 10:31:50 +08:00

qwen2.py

LLM: add long-context support for Qwen1.5-7B/Baichuan2-7B/Mistral-7B. (#10937 )

2024-05-10 16:40:15 +08:00

qwen2_moe.py

use new quantize kv (#10888 )

2024-04-26 14:42:17 +08:00

qwen_vl.py

Disable fast fused rope on UHD (#10780 )

2024-04-18 10:03:53 +08:00

rwkv4.py

fix rwkv with pip installer (#10591 )

2024-03-29 17:56:45 +08:00

rwkv5.py

disable rwkv5 fp16 (#10699 )

2024-04-09 16:42:11 +08:00

stablelm.py

use new quantize kv (#10888 )

2024-04-26 14:42:17 +08:00

starcoder2.py

remove new_layout parameter (#10906 )

2024-04-29 10:31:50 +08:00

utils.py

optimize phi3 1st token performance (#10981 )

2024-05-10 17:33:46 +08:00

yuan.py

use new quantize kv (#10888 )

2024-04-26 14:42:17 +08:00