| .. |
|
__init__.py
|
Refactor bigdl.llm to ipex_llm (#24)
|
2024-03-22 15:41:21 +08:00 |
|
aquila.py
|
LLM: fix get env KV_CACHE_ALLOC_BLOCK_LENGTH type. (#10771)
|
2024-04-16 09:32:30 +08:00 |
|
baichuan.py
|
Refactor baichuan1 7B and 13B (#11258)
|
2024-06-07 14:29:20 +08:00 |
|
bert.py
|
Refactor bigdl.llm to ipex_llm (#24)
|
2024-03-22 15:41:21 +08:00 |
|
bloom.py
|
Divide core-xe packages (#11131)
|
2024-05-28 12:00:18 +08:00 |
|
chatglm.py
|
add glm_sdpa back to fix chatglm-6b (#11313)
|
2024-06-14 10:31:43 +08:00 |
|
chatglm2.py
|
fix chatglm lookahead on ARC (#11320)
|
2024-06-14 16:26:11 +08:00 |
|
chatglm4.py
|
fix chatglm lookahead on ARC (#11320)
|
2024-06-14 16:26:11 +08:00 |
|
chatglm4v.py
|
glm-4v-9b support (#11327)
|
2024-06-17 13:52:37 +08:00 |
|
cohere.py
|
Fix should_use_fuse_rope error of Qwen1.5-MoE-A2.7B-Chat (#11216)
|
2024-06-05 15:56:10 +08:00 |
|
decilm.py
|
LLM: fix get env KV_CACHE_ALLOC_BLOCK_LENGTH type. (#10771)
|
2024-04-16 09:32:30 +08:00 |
|
falcon.py
|
LLM: fix get env KV_CACHE_ALLOC_BLOCK_LENGTH type. (#10771)
|
2024-04-16 09:32:30 +08:00 |
|
gemma.py
|
Divide core-xe packages (#11131)
|
2024-05-28 12:00:18 +08:00 |
|
gptbigcode.py
|
Fix Starcoder issue on CPU on transformers 4.36+ (#11190)
|
2024-06-04 10:05:40 -07:00 |
|
gptj.py
|
LLM: fix get env KV_CACHE_ALLOC_BLOCK_LENGTH type. (#10771)
|
2024-04-16 09:32:30 +08:00 |
|
gptneox.py
|
LLM: fix get env KV_CACHE_ALLOC_BLOCK_LENGTH type. (#10771)
|
2024-04-16 09:32:30 +08:00 |
|
internlm.py
|
refactor internlm and internlm2 (#11274)
|
2024-06-11 14:19:19 +08:00 |
|
llama.py
|
Divide core-xe packages (#11131)
|
2024-05-28 12:00:18 +08:00 |
|
minicpm.py
|
quantized attention forward for minicpm (#11200)
|
2024-06-05 09:15:25 +08:00 |
|
mistral.py
|
fix mistral for transformers>=4.39 (#11191)
|
2024-06-18 13:39:35 -07:00 |
|
mixtral.py
|
Divide core-xe packages (#11131)
|
2024-05-28 12:00:18 +08:00 |
|
mpt.py
|
LLM: fix get env KV_CACHE_ALLOC_BLOCK_LENGTH type. (#10771)
|
2024-04-16 09:32:30 +08:00 |
|
phi.py
|
remove new_layout parameter (#10906)
|
2024-04-29 10:31:50 +08:00 |
|
phi3.py
|
Add phi-3 model support for pipeline parallel inference (#11334)
|
2024-06-17 17:44:24 +08:00 |
|
phixtral.py
|
Disable fast fused rope on UHD (#10780)
|
2024-04-18 10:03:53 +08:00 |
|
qwen.py
|
fix first token sdp with batch (#11153)
|
2024-05-28 15:03:06 +08:00 |
|
qwen2.py
|
Support PP for qwen1.5 (#11300)
|
2024-06-13 17:35:24 +08:00 |
|
qwen2_moe.py
|
Refactor qwen2 moe (#11244)
|
2024-06-07 13:14:54 +08:00 |
|
qwen_vl.py
|
Divide core-xe packages (#11131)
|
2024-05-28 12:00:18 +08:00 |
|
rwkv4.py
|
Divide core-xe packages (#11131)
|
2024-05-28 12:00:18 +08:00 |
|
rwkv5.py
|
Divide core-xe packages (#11131)
|
2024-05-28 12:00:18 +08:00 |
|
stablelm.py
|
support stablm2 12b (#11265)
|
2024-06-07 15:46:00 +08:00 |
|
starcoder2.py
|
add latest optimization in starcoder2 (#11236)
|
2024-06-06 14:02:17 +08:00 |
|
utils.py
|
refactor chatglm2/3 (#11290)
|
2024-06-13 12:22:58 +08:00 |
|
yuan.py
|
refactor yuan2 (#11235)
|
2024-06-06 13:17:54 +08:00 |