| .. |
|
__init__.py
|
Refactor bigdl.llm to ipex_llm (#24)
|
2024-03-22 15:41:21 +08:00 |
|
aquila.py
|
refactor attention_softmax (#12295)
|
2024-10-30 13:20:50 +08:00 |
|
baichuan.py
|
Fix several models based on sdp api change (#13075)
|
2025-04-15 11:13:12 +08:00 |
|
baichuan_m1.py
|
add basic support for Baichuan-M1-14B-Instruct (#12808)
|
2025-02-11 17:27:42 +08:00 |
|
bert.py
|
Update bert optimization to fit higher transformers/torch version (#13006)
|
2025-03-25 16:12:03 +08:00 |
|
bloom.py
|
refactor qwen2 and llama3 (#12587)
|
2024-12-20 13:25:25 +08:00 |
|
chatglm.py
|
Fix several models based on sdp api change (#13075)
|
2025-04-15 11:13:12 +08:00 |
|
chatglm2.py
|
update quantize kv cache condition (#12681)
|
2025-01-09 15:23:04 +08:00 |
|
chatglm4.py
|
update quantize kv cache condition (#12681)
|
2025-01-09 15:23:04 +08:00 |
|
chatglm4v.py
|
glm 4v 1st sdp for vision (#12904)
|
2025-02-28 13:23:27 +08:00 |
|
common.py
|
Refactor some functions to ipex_llm.transformers.models.common (#13091)
|
2025-04-18 11:15:43 +08:00 |
|
decilm.py
|
fix llama related import (#12611)
|
2024-12-25 16:23:52 +08:00 |
|
deepseek.py
|
Refactor some functions to ipex_llm.transformers.models.common (#13091)
|
2025-04-18 11:15:43 +08:00 |
|
deepseek_v3.py
|
LLM: Support hybrid convert for DeepSeek V3/R1 (#12834)
|
2025-02-19 11:31:19 +08:00 |
|
gemma.py
|
refactor attention_softmax (#12295)
|
2024-10-30 13:20:50 +08:00 |
|
gemma2.py
|
optimize minicpm3 again (#12047)
|
2024-09-10 14:19:57 +08:00 |
|
glm.py
|
Refactor some functions to ipex_llm.transformers.models.common (#13091)
|
2025-04-18 11:15:43 +08:00 |
|
gpt2.py
|
refactor mllama, gpt2 and internvl (#12602)
|
2024-12-24 14:18:31 +08:00 |
|
gptbigcode.py
|
Fix Starcoder issue on CPU on transformers 4.36+ (#11190)
|
2024-06-04 10:05:40 -07:00 |
|
gptneox.py
|
refactor ot remove old rope usage (#12224)
|
2024-10-17 17:06:09 +08:00 |
|
internlm.py
|
refactor to simplify following upgrade 2 (#12685)
|
2025-01-10 09:29:03 +08:00 |
|
internvl.py
|
refactor mllama, gpt2 and internvl (#12602)
|
2024-12-24 14:18:31 +08:00 |
|
janus.py
|
support and optimize janus pro (#12813)
|
2025-02-12 15:07:24 +08:00 |
|
llama.py
|
Fix deepseek coder with linear rope type support on GPU (#12709)
|
2025-01-15 21:12:34 +08:00 |
|
minicpm.py
|
update quantize kv cache condition (#12681)
|
2025-01-09 15:23:04 +08:00 |
|
minicpm3.py
|
optimize attention part of moonlight-14B-A3B (#12886)
|
2025-02-25 09:38:13 +08:00 |
|
minicpmv.py
|
also convert SdpaAttention in optimize_model (#12673)
|
2025-01-08 16:48:03 +08:00 |
|
mistral.py
|
update quantize kv cache condition (#12681)
|
2025-01-09 15:23:04 +08:00 |
|
mllama.py
|
update quantize kv cache condition (#12681)
|
2025-01-09 15:23:04 +08:00 |
|
mpt.py
|
LLM: fix get env KV_CACHE_ALLOC_BLOCK_LENGTH type. (#10771)
|
2024-04-16 09:32:30 +08:00 |
|
phi.py
|
refactor attention_softmax (#12295)
|
2024-10-30 13:20:50 +08:00 |
|
phi3.py
|
refactor to simplify following upgrade 2 (#12685)
|
2025-01-10 09:29:03 +08:00 |
|
phixtral.py
|
refactor to reduce old rope usage (#12219)
|
2024-10-17 14:45:09 +08:00 |
|
qwen.py
|
update quantize kv cache condition (#12681)
|
2025-01-09 15:23:04 +08:00 |
|
qwen2.py
|
refactor to simplify following upgrade 2 (#12685)
|
2025-01-10 09:29:03 +08:00 |
|
qwen2_5_omni.py
|
add audio optimization for qwen2.5-omni (#13037)
|
2025-04-07 17:20:26 +08:00 |
|
qwen2_moe.py
|
update quantize kv cache condition (#12681)
|
2025-01-09 15:23:04 +08:00 |
|
qwen2_vl.py
|
fix qwen2 vl (#12798)
|
2025-02-10 13:25:53 +08:00 |
|
qwen_vl.py
|
Fix several models based on sdp api change (#13075)
|
2025-04-15 11:13:12 +08:00 |
|
rwkv4.py
|
Divide core-xe packages (#11131)
|
2024-05-28 12:00:18 +08:00 |
|
rwkv5.py
|
Divide core-xe packages (#11131)
|
2024-05-28 12:00:18 +08:00 |
|
sd.py
|
refactor device check and remove cohere/mixtral support (#12659)
|
2025-01-07 11:15:51 +08:00 |
|
stablelm.py
|
update quantize kv cache condition (#12681)
|
2025-01-09 15:23:04 +08:00 |
|
starcoder2.py
|
update quantize kv cache condition (#12681)
|
2025-01-09 15:23:04 +08:00 |
|
utils.py
|
Deepseek kv / sdp support (#13068)
|
2025-04-11 11:26:15 +08:00 |
|
whisper.py
|
add support and optimization for minicpmo audio part (#12716)
|
2025-01-16 16:39:00 +08:00 |
|
yuan.py
|
update quantize kv cache condition (#12681)
|
2025-01-09 15:23:04 +08:00 |