diff --git a/python/llm/test/benchmark/igpu-perf/1024-128.yaml b/python/llm/test/benchmark/igpu-perf/1024-128.yaml index 6d59984d..0bc60479 100644 --- a/python/llm/test/benchmark/igpu-perf/1024-128.yaml +++ b/python/llm/test/benchmark/igpu-perf/1024-128.yaml @@ -6,7 +6,7 @@ repo_id: - 'internlm/internlm-chat-7b-8k' - 'Qwen/Qwen-7B-Chat' - 'BAAI/AquilaChat2-7B' - - '01-ai/Yi-6B' + # - '01-ai/Yi-6B' - 'meta-llama/Llama-2-7b-chat-hf' - 'meta-llama/Llama-2-13b-chat-hf' - 'WisdomShell/CodeShell-7B-Chat' diff --git a/python/llm/test/benchmark/igpu-perf/1024-128_437.yaml b/python/llm/test/benchmark/igpu-perf/1024-128_437.yaml index 2b829845..679a582b 100644 --- a/python/llm/test/benchmark/igpu-perf/1024-128_437.yaml +++ b/python/llm/test/benchmark/igpu-perf/1024-128_437.yaml @@ -1,5 +1,7 @@ repo_id: - 'Qwen/Qwen1.5-7B-Chat' + - 'meta-llama/Meta-Llama-3-8B-Instruct' + - '01-ai/Yi-6B-Chat' local_model_hub: 'path to your local model hub' warm_up: 1 num_trials: 3 diff --git a/python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16.yaml b/python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16.yaml index d4142ce8..0ac89516 100644 --- a/python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16.yaml +++ b/python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16.yaml @@ -6,7 +6,7 @@ repo_id: - 'internlm/internlm-chat-7b-8k' - 'Qwen/Qwen-7B-Chat' - 'BAAI/AquilaChat2-7B' - - '01-ai/Yi-6B' + # - '01-ai/Yi-6B' - 'meta-llama/Llama-2-7b-chat-hf' - 'meta-llama/Llama-2-13b-chat-hf' - 'WisdomShell/CodeShell-7B-Chat' diff --git a/python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16_437.yaml b/python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16_437.yaml index 2e2ec3c5..5051de78 100644 --- a/python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16_437.yaml +++ b/python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16_437.yaml @@ -1,5 +1,7 @@ repo_id: - 'Qwen/Qwen1.5-7B-Chat' + - 'meta-llama/Meta-Llama-3-8B-Instruct' + - '01-ai/Yi-6B-Chat' local_model_hub: 'path to your local model hub' warm_up: 1 num_trials: 3 diff --git a/python/llm/test/benchmark/igpu-perf/1024-128_loadlowbit.yaml b/python/llm/test/benchmark/igpu-perf/1024-128_loadlowbit.yaml index ea869865..a8dd75ea 100644 --- a/python/llm/test/benchmark/igpu-perf/1024-128_loadlowbit.yaml +++ b/python/llm/test/benchmark/igpu-perf/1024-128_loadlowbit.yaml @@ -6,7 +6,7 @@ repo_id: - 'internlm/internlm-chat-7b-8k' - 'Qwen/Qwen-7B-Chat' - 'BAAI/AquilaChat2-7B' - - '01-ai/Yi-6B' + # - '01-ai/Yi-6B' - 'meta-llama/Llama-2-7b-chat-hf' - 'meta-llama/Llama-2-13b-chat-hf' - 'WisdomShell/CodeShell-7B-Chat' diff --git a/python/llm/test/benchmark/igpu-perf/1024-128_loadlowbit_437.yaml b/python/llm/test/benchmark/igpu-perf/1024-128_loadlowbit_437.yaml index 1252d1bb..345b4990 100644 --- a/python/llm/test/benchmark/igpu-perf/1024-128_loadlowbit_437.yaml +++ b/python/llm/test/benchmark/igpu-perf/1024-128_loadlowbit_437.yaml @@ -1,5 +1,7 @@ repo_id: - 'Qwen/Qwen1.5-7B-Chat' + - 'meta-llama/Meta-Llama-3-8B-Instruct' + - '01-ai/Yi-6B-Chat' local_model_hub: 'path to your local model hub' warm_up: 1 num_trials: 3 diff --git a/python/llm/test/benchmark/igpu-perf/2048-256.yaml b/python/llm/test/benchmark/igpu-perf/2048-256.yaml index e279ed7c..5aa79015 100644 --- a/python/llm/test/benchmark/igpu-perf/2048-256.yaml +++ b/python/llm/test/benchmark/igpu-perf/2048-256.yaml @@ -6,7 +6,7 @@ repo_id: - 'internlm/internlm-chat-7b-8k' - 'Qwen/Qwen-7B-Chat' - 'BAAI/AquilaChat2-7B' - - '01-ai/Yi-6B' + # - '01-ai/Yi-6B' - 'meta-llama/Llama-2-7b-chat-hf' - 'meta-llama/Llama-2-13b-chat-hf' - 'WisdomShell/CodeShell-7B-Chat' diff --git a/python/llm/test/benchmark/igpu-perf/2048-256_437.yaml b/python/llm/test/benchmark/igpu-perf/2048-256_437.yaml index b5569a2a..012af8aa 100644 --- a/python/llm/test/benchmark/igpu-perf/2048-256_437.yaml +++ b/python/llm/test/benchmark/igpu-perf/2048-256_437.yaml @@ -1,5 +1,7 @@ repo_id: - 'Qwen/Qwen1.5-7B-Chat' + - 'meta-llama/Meta-Llama-3-8B-Instruct' + - '01-ai/Yi-6B-Chat' local_model_hub: 'path to your local model hub' warm_up: 1 num_trials: 3 diff --git a/python/llm/test/benchmark/igpu-perf/32-32.yaml b/python/llm/test/benchmark/igpu-perf/32-32.yaml index 35f67366..6f4fd2f0 100644 --- a/python/llm/test/benchmark/igpu-perf/32-32.yaml +++ b/python/llm/test/benchmark/igpu-perf/32-32.yaml @@ -6,7 +6,7 @@ repo_id: - 'internlm/internlm-chat-7b-8k' - 'Qwen/Qwen-7B-Chat' - 'BAAI/AquilaChat2-7B' - - '01-ai/Yi-6B' + # - '01-ai/Yi-6B' - 'meta-llama/Llama-2-7b-chat-hf' - 'meta-llama/Llama-2-13b-chat-hf' - 'WisdomShell/CodeShell-7B-Chat' diff --git a/python/llm/test/benchmark/igpu-perf/32-32_437.yaml b/python/llm/test/benchmark/igpu-perf/32-32_437.yaml index 4ec7bae9..a4ea2adb 100644 --- a/python/llm/test/benchmark/igpu-perf/32-32_437.yaml +++ b/python/llm/test/benchmark/igpu-perf/32-32_437.yaml @@ -1,5 +1,7 @@ repo_id: - 'Qwen/Qwen1.5-7B-Chat' + - 'meta-llama/Meta-Llama-3-8B-Instruct' + - '01-ai/Yi-6B-Chat' local_model_hub: 'path to your local model hub' warm_up: 3 num_trials: 5