| .. |
|
__init__.py
|
Refactor bigdl.llm to ipex_llm (#24)
|
2024-03-22 15:41:21 +08:00 |
|
aquila.py
|
LLM: fix get env KV_CACHE_ALLOC_BLOCK_LENGTH type. (#10771)
|
2024-04-16 09:32:30 +08:00 |
|
baichuan.py
|
remove new_layout parameter (#10906)
|
2024-04-29 10:31:50 +08:00 |
|
baichuan2.py
|
LLM: add long-context support for Qwen1.5-7B/Baichuan2-7B/Mistral-7B. (#10937)
|
2024-05-10 16:40:15 +08:00 |
|
bert.py
|
Refactor bigdl.llm to ipex_llm (#24)
|
2024-03-22 15:41:21 +08:00 |
|
bloom.py
|
LLM: fix get env KV_CACHE_ALLOC_BLOCK_LENGTH type. (#10771)
|
2024-04-16 09:32:30 +08:00 |
|
chatglm.py
|
LLM: fix get env KV_CACHE_ALLOC_BLOCK_LENGTH type. (#10771)
|
2024-04-16 09:32:30 +08:00 |
|
chatglm2.py
|
LLM: update split tensor conditions. (#10872)
|
2024-04-30 17:07:21 +08:00 |
|
chatglm2_32k.py
|
LLM: fix get env KV_CACHE_ALLOC_BLOCK_LENGTH type. (#10771)
|
2024-04-16 09:32:30 +08:00 |
|
cohere.py
|
LLM: Optimize cohere model (#10878)
|
2024-05-07 10:19:50 +08:00 |
|
decilm.py
|
LLM: fix get env KV_CACHE_ALLOC_BLOCK_LENGTH type. (#10771)
|
2024-04-16 09:32:30 +08:00 |
|
falcon.py
|
LLM: fix get env KV_CACHE_ALLOC_BLOCK_LENGTH type. (#10771)
|
2024-04-16 09:32:30 +08:00 |
|
gemma.py
|
remove rms norm copy (#10793)
|
2024-04-19 13:57:48 +08:00 |
|
gptbigcode.py
|
Refactor bigdl.llm to ipex_llm (#24)
|
2024-03-22 15:41:21 +08:00 |
|
gptj.py
|
LLM: fix get env KV_CACHE_ALLOC_BLOCK_LENGTH type. (#10771)
|
2024-04-16 09:32:30 +08:00 |
|
gptneox.py
|
LLM: fix get env KV_CACHE_ALLOC_BLOCK_LENGTH type. (#10771)
|
2024-04-16 09:32:30 +08:00 |
|
internlm.py
|
LLM: fix get env KV_CACHE_ALLOC_BLOCK_LENGTH type. (#10771)
|
2024-04-16 09:32:30 +08:00 |
|
llama.py
|
LLM: update split tensor conditions. (#10872)
|
2024-04-30 17:07:21 +08:00 |
|
mistral.py
|
LLM: add long-context support for Qwen1.5-7B/Baichuan2-7B/Mistral-7B. (#10937)
|
2024-05-10 16:40:15 +08:00 |
|
mixtral.py
|
LLM: Add mixtral speculative CPU example (#10830)
|
2024-04-23 10:05:51 +08:00 |
|
mpt.py
|
LLM: fix get env KV_CACHE_ALLOC_BLOCK_LENGTH type. (#10771)
|
2024-04-16 09:32:30 +08:00 |
|
phi.py
|
remove new_layout parameter (#10906)
|
2024-04-29 10:31:50 +08:00 |
|
phi3.py
|
optimize phi3 1st token performance (#10981)
|
2024-05-10 17:33:46 +08:00 |
|
phixtral.py
|
Disable fast fused rope on UHD (#10780)
|
2024-04-18 10:03:53 +08:00 |
|
qwen.py
|
remove new_layout parameter (#10906)
|
2024-04-29 10:31:50 +08:00 |
|
qwen2.py
|
LLM: add long-context support for Qwen1.5-7B/Baichuan2-7B/Mistral-7B. (#10937)
|
2024-05-10 16:40:15 +08:00 |
|
qwen2_moe.py
|
use new quantize kv (#10888)
|
2024-04-26 14:42:17 +08:00 |
|
qwen_vl.py
|
Disable fast fused rope on UHD (#10780)
|
2024-04-18 10:03:53 +08:00 |
|
rwkv4.py
|
fix rwkv with pip installer (#10591)
|
2024-03-29 17:56:45 +08:00 |
|
rwkv5.py
|
disable rwkv5 fp16 (#10699)
|
2024-04-09 16:42:11 +08:00 |
|
stablelm.py
|
use new quantize kv (#10888)
|
2024-04-26 14:42:17 +08:00 |
|
starcoder2.py
|
remove new_layout parameter (#10906)
|
2024-04-29 10:31:50 +08:00 |
|
utils.py
|
optimize phi3 1st token performance (#10981)
|
2024-05-10 17:33:46 +08:00 |
|
yuan.py
|
use new quantize kv (#10888)
|
2024-04-26 14:42:17 +08:00 |