| .. |
|
__init__.py
|
Refactor bigdl.llm to ipex_llm (#24)
|
2024-03-22 15:41:21 +08:00 |
|
aquila.py
|
refactor attention_softmax (#12295)
|
2024-10-30 13:20:50 +08:00 |
|
baichuan.py
|
add comment
|
2024-08-22 15:14:47 +08:00 |
|
bert.py
|
Refactor bigdl.llm to ipex_llm (#24)
|
2024-03-22 15:41:21 +08:00 |
|
bloom.py
|
Divide core-xe packages (#11131)
|
2024-05-28 12:00:18 +08:00 |
|
chatglm.py
|
add glm_sdpa back to fix chatglm-6b (#11313)
|
2024-06-14 10:31:43 +08:00 |
|
chatglm2.py
|
Add chatglm2&3 fuse mlp (#12328)
|
2024-11-04 18:04:41 +08:00 |
|
chatglm4.py
|
Support performance mode of GLM4 model (#12401)
|
2024-11-18 18:46:52 +08:00 |
|
chatglm4v.py
|
optimize glm4v's vision part (#12346)
|
2024-11-06 15:43:40 +08:00 |
|
cohere.py
|
Fix cohere model on transformers>=4.41 (#11575)
|
2024-07-17 17:18:59 -07:00 |
|
common.py
|
optimize glm4v vision attention (#12369)
|
2024-11-08 17:01:57 +08:00 |
|
decilm.py
|
refactor ot remove old rope usage (#12224)
|
2024-10-17 17:06:09 +08:00 |
|
falcon.py
|
LLM: fix get env KV_CACHE_ALLOC_BLOCK_LENGTH type. (#10771)
|
2024-04-16 09:32:30 +08:00 |
|
gemma.py
|
refactor attention_softmax (#12295)
|
2024-10-30 13:20:50 +08:00 |
|
gemma2.py
|
optimize minicpm3 again (#12047)
|
2024-09-10 14:19:57 +08:00 |
|
gpt2.py
|
Optimize gpt2 (#12259)
|
2024-10-24 13:44:24 +08:00 |
|
gptbigcode.py
|
Fix Starcoder issue on CPU on transformers 4.36+ (#11190)
|
2024-06-04 10:05:40 -07:00 |
|
gptj.py
|
LLM: fix get env KV_CACHE_ALLOC_BLOCK_LENGTH type. (#10771)
|
2024-04-16 09:32:30 +08:00 |
|
gptneox.py
|
refactor ot remove old rope usage (#12224)
|
2024-10-17 17:06:09 +08:00 |
|
internlm.py
|
refactor attention_softmax (#12295)
|
2024-10-30 13:20:50 +08:00 |
|
internvl.py
|
optimzie qwen2-vl vision (#12203)
|
2024-10-15 15:54:25 +08:00 |
|
llama.py
|
refactor to reduce old rope usage (#12219)
|
2024-10-17 14:45:09 +08:00 |
|
llama32.py
|
llama 3.1/3.2 support compresskv (#12347)
|
2024-11-06 17:33:43 +08:00 |
|
minicpm.py
|
Update compresskv model forward type logic (#11868)
|
2024-08-20 18:11:37 +08:00 |
|
minicpm3.py
|
optimize minicpm3 kv cache (#12052)
|
2024-09-10 16:51:21 +08:00 |
|
minicpmv.py
|
optimize glm4v vision attention (#12369)
|
2024-11-08 17:01:57 +08:00 |
|
mistral.py
|
remove some useless code (#12035)
|
2024-09-06 17:51:08 +08:00 |
|
mixtral.py
|
Divide core-xe packages (#11131)
|
2024-05-28 12:00:18 +08:00 |
|
mllama.py
|
fix llama3.1/3.2 quantize kv check (#12302)
|
2024-10-31 11:55:07 +08:00 |
|
mpt.py
|
LLM: fix get env KV_CACHE_ALLOC_BLOCK_LENGTH type. (#10771)
|
2024-04-16 09:32:30 +08:00 |
|
phi.py
|
refactor attention_softmax (#12295)
|
2024-10-30 13:20:50 +08:00 |
|
phi3.py
|
refactor attention_softmax (#12295)
|
2024-10-30 13:20:50 +08:00 |
|
phixtral.py
|
refactor to reduce old rope usage (#12219)
|
2024-10-17 14:45:09 +08:00 |
|
qwen.py
|
fix first token sdp with batch (#11153)
|
2024-05-28 15:03:06 +08:00 |
|
qwen2.py
|
small improvement (#12359)
|
2024-11-07 15:57:41 +08:00 |
|
qwen2_moe.py
|
refactor merge_qkv and attention_softmax (#12213)
|
2024-10-16 15:58:14 +08:00 |
|
qwen2_vl.py
|
refactor attention_softmax (#12295)
|
2024-10-30 13:20:50 +08:00 |
|
qwen_vl.py
|
Support pipeline parallel for qwen-vl (#11503)
|
2024-07-04 18:03:57 +08:00 |
|
rwkv4.py
|
Divide core-xe packages (#11131)
|
2024-05-28 12:00:18 +08:00 |
|
rwkv5.py
|
Divide core-xe packages (#11131)
|
2024-05-28 12:00:18 +08:00 |
|
sd15.py
|
refactor attention_softmax (#12295)
|
2024-10-30 13:20:50 +08:00 |
|
stablelm.py
|
refactor attention_softmax (#12295)
|
2024-10-30 13:20:50 +08:00 |
|
starcoder2.py
|
refactor attention_softmax (#12295)
|
2024-10-30 13:20:50 +08:00 |
|
utils.py
|
Optimize with new batch kernel when batch_size=1 on LNL (#12419)
|
2024-11-21 16:21:35 +08:00 |
|
yuan.py
|
refactor attention_softmax (#12295)
|
2024-10-30 13:20:50 +08:00 |