| .. |
|
__init__.py
|
optimize llama npu perf (#11426)
|
2024-06-25 17:43:20 +08:00 |
|
baichuan.py
|
fix baichuan (#11606)
|
2024-07-18 09:43:36 +08:00 |
|
chatglm.py
|
fix chatglm3 npu output (#11590)
|
2024-07-16 18:16:30 +08:00 |
|
chatglm4.py
|
support npu glm4 (#11539)
|
2024-07-09 15:46:49 +08:00 |
|
common.py
|
Add lm_head optimization on NPU (#11903)
|
2024-08-23 15:51:07 +08:00 |
|
convert.py
|
Add experimental support of fused decoder layer for llama2 (#11768)
|
2024-08-13 14:41:36 +08:00 |
|
convert_mp.py
|
update minicpm to meet latest refactor (#11937)
|
2024-08-27 15:08:01 +08:00 |
|
kv.py
|
separate prefill into a process (#11787)
|
2024-08-19 17:53:36 +08:00 |
|
linear.py
|
fix llama3-8b npu long input stuck (#11613)
|
2024-07-18 11:08:17 +08:00 |
|
llama.py
|
Add experimental support of fused decoder layer for llama2 (#11768)
|
2024-08-13 14:41:36 +08:00 |
|
llama_mp.py
|
Add lm_head optimization on NPU (#11903)
|
2024-08-23 15:51:07 +08:00 |
|
minicpm.py
|
add minicpm 1B/2B npu support (#11507)
|
2024-07-04 16:31:04 +08:00 |
|
minicpm_mp.py
|
[NPU] Add support for running mp minicpm model on npu (#11909)
|
2024-08-26 17:52:55 +08:00 |
|
mistral.py
|
add mistral npu support (#11523)
|
2024-07-08 13:17:15 +08:00 |
|
mp_models_base.py
|
Support qwen2-1.5b with fused decoderlayer optimization on NPU (#11888)
|
2024-08-22 11:09:12 +08:00 |
|
phi3.py
|
add npu sdp (#11562)
|
2024-07-11 16:57:35 +08:00 |
|
phi3_v.py
|
optimize phi3-v encoder npu performance and add multimodal example (#11553)
|
2024-07-11 13:59:14 +08:00 |
|
pipeline_parallel.py
|
Add experimental support of fused decoder layer for llama2 (#11768)
|
2024-08-13 14:41:36 +08:00 |
|
qwen2.py
|
add qwen2 npu support (#11504)
|
2024-07-04 11:01:25 +08:00 |
|
qwen2_mp.py
|
Add lm_head optimization on NPU (#11903)
|
2024-08-23 15:51:07 +08:00 |
|
stablelm.py
|
Optimize stablelm on NPU (#11512)
|
2024-07-05 14:21:57 +08:00 |