| .. |
|
__init__.py
|
Refactor bigdl.llm to ipex_llm (#24)
|
2024-03-22 15:41:21 +08:00 |
|
aquila.py
|
LLM: fix get env KV_CACHE_ALLOC_BLOCK_LENGTH type. (#10771)
|
2024-04-16 09:32:30 +08:00 |
|
baichuan.py
|
use new sdp and fp32 sdp (#11007)
|
2024-05-14 14:29:18 +08:00 |
|
baichuan2.py
|
LLM: unify baichuan2-13b alibi mask dtype with model dtype. (#11107)
|
2024-05-24 10:27:53 +08:00 |
|
bert.py
|
Refactor bigdl.llm to ipex_llm (#24)
|
2024-03-22 15:41:21 +08:00 |
|
bloom.py
|
LLM: fix get env KV_CACHE_ALLOC_BLOCK_LENGTH type. (#10771)
|
2024-04-16 09:32:30 +08:00 |
|
chatglm.py
|
fix chatglm run error (#11045)
|
2024-05-16 15:39:18 +08:00 |
|
chatglm2.py
|
Use new sdp again (#11025)
|
2024-05-16 09:33:34 +08:00 |
|
chatglm2_32k.py
|
LLM: fix get env KV_CACHE_ALLOC_BLOCK_LENGTH type. (#10771)
|
2024-04-16 09:32:30 +08:00 |
|
cohere.py
|
use new sdp and fp32 sdp (#11007)
|
2024-05-14 14:29:18 +08:00 |
|
decilm.py
|
LLM: fix get env KV_CACHE_ALLOC_BLOCK_LENGTH type. (#10771)
|
2024-04-16 09:32:30 +08:00 |
|
falcon.py
|
LLM: fix get env KV_CACHE_ALLOC_BLOCK_LENGTH type. (#10771)
|
2024-04-16 09:32:30 +08:00 |
|
gemma.py
|
remove rms norm copy (#10793)
|
2024-04-19 13:57:48 +08:00 |
|
gptbigcode.py
|
Refactor bigdl.llm to ipex_llm (#24)
|
2024-03-22 15:41:21 +08:00 |
|
gptj.py
|
LLM: fix get env KV_CACHE_ALLOC_BLOCK_LENGTH type. (#10771)
|
2024-04-16 09:32:30 +08:00 |
|
gptneox.py
|
LLM: fix get env KV_CACHE_ALLOC_BLOCK_LENGTH type. (#10771)
|
2024-04-16 09:32:30 +08:00 |
|
internlm.py
|
support running internlm xcomposer2 on gpu and add sdp optimization (#11115)
|
2024-05-23 17:26:24 +08:00 |
|
llama.py
|
Add support for llama2 quantize_kv with transformers 4.38.0 (#11054)
|
2024-05-16 22:23:39 +08:00 |
|
mistral.py
|
use new sdp and fp32 sdp (#11007)
|
2024-05-14 14:29:18 +08:00 |
|
mixtral.py
|
use new sdp and fp32 sdp (#11007)
|
2024-05-14 14:29:18 +08:00 |
|
mpt.py
|
LLM: fix get env KV_CACHE_ALLOC_BLOCK_LENGTH type. (#10771)
|
2024-04-16 09:32:30 +08:00 |
|
phi.py
|
remove new_layout parameter (#10906)
|
2024-04-29 10:31:50 +08:00 |
|
phi3.py
|
support phi-3 vision (#11101)
|
2024-05-22 17:43:50 +08:00 |
|
phixtral.py
|
Disable fast fused rope on UHD (#10780)
|
2024-04-18 10:03:53 +08:00 |
|
qwen.py
|
Add Qwen register attention implemention (#11110)
|
2024-05-23 17:17:45 +08:00 |
|
qwen2.py
|
refactor qwen2 (#11087)
|
2024-05-21 16:53:42 +08:00 |
|
qwen2_moe.py
|
Use new sdp again (#11025)
|
2024-05-16 09:33:34 +08:00 |
|
qwen_vl.py
|
fix qwen vl (#11090)
|
2024-05-21 18:40:29 +08:00 |
|
rwkv4.py
|
fix rwkv with pip installer (#10591)
|
2024-03-29 17:56:45 +08:00 |
|
rwkv5.py
|
disable rwkv5 fp16 (#10699)
|
2024-04-09 16:42:11 +08:00 |
|
stablelm.py
|
use new sdp and fp32 sdp (#11007)
|
2024-05-14 14:29:18 +08:00 |
|
starcoder2.py
|
remove new_layout parameter (#10906)
|
2024-04-29 10:31:50 +08:00 |
|
utils.py
|
refactor baichuan2-13b (#11064)
|
2024-05-17 16:25:30 +08:00 |
|
yuan.py
|
use new quantize kv (#10888)
|
2024-04-26 14:42:17 +08:00 |