diff --git a/python/llm/test/benchmark/igpu-perf/1024-128.yaml b/python/llm/test/benchmark/igpu-perf/1024-128.yaml index 1d6971d4..b089975f 100644 --- a/python/llm/test/benchmark/igpu-perf/1024-128.yaml +++ b/python/llm/test/benchmark/igpu-perf/1024-128.yaml @@ -6,6 +6,10 @@ repo_id: - 'meta-llama/Llama-2-7b-chat-hf' - 'meta-llama/Llama-2-13b-chat-hf' - 'meta-llama/Meta-Llama-3-8B-Instruct' + - 'mistralai/Mistral-7B-Instruct-v0.2' + - 'openbmb/MiniCPM-1B-sft-bf16' + - 'openbmb/MiniCPM-2B-sft-bf16' + - 'deepseek-ai/deepseek-coder-7b-instruct-v1.5' local_model_hub: 'path to your local model hub' warm_up: 1 num_trials: 3 diff --git a/python/llm/test/benchmark/igpu-perf/1024-128_437.yaml b/python/llm/test/benchmark/igpu-perf/1024-128_437.yaml index 62d754f2..6019026c 100644 --- a/python/llm/test/benchmark/igpu-perf/1024-128_437.yaml +++ b/python/llm/test/benchmark/igpu-perf/1024-128_437.yaml @@ -2,6 +2,7 @@ repo_id: - 'Qwen/Qwen1.5-7B-Chat' - 'Qwen/Qwen2-7B-Instruct' - 'microsoft/Phi-3-mini-4k-instruct' + - 'microsoft/Phi-3-mini-128k-instruct' local_model_hub: 'path to your local model hub' warm_up: 1 num_trials: 3 diff --git a/python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16.yaml b/python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16.yaml index 27a22ba4..7dbca21d 100644 --- a/python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16.yaml +++ b/python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16.yaml @@ -6,6 +6,10 @@ repo_id: - 'meta-llama/Llama-2-7b-chat-hf' - 'meta-llama/Llama-2-13b-chat-hf' - 'meta-llama/Meta-Llama-3-8B-Instruct' + - 'mistralai/Mistral-7B-Instruct-v0.2' + - 'openbmb/MiniCPM-1B-sft-bf16' + - 'openbmb/MiniCPM-2B-sft-bf16' + - 'deepseek-ai/deepseek-coder-7b-instruct-v1.5' local_model_hub: 'path to your local model hub' warm_up: 1 num_trials: 3 diff --git a/python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16_437.yaml b/python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16_437.yaml index f483989d..12ccaa5d 100644 --- a/python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16_437.yaml +++ b/python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16_437.yaml @@ -2,6 +2,7 @@ repo_id: - 'Qwen/Qwen1.5-7B-Chat' - 'Qwen/Qwen2-7B-Instruct' - 'microsoft/Phi-3-mini-4k-instruct' + - 'microsoft/Phi-3-mini-128k-instruct' local_model_hub: 'path to your local model hub' warm_up: 1 num_trials: 3 diff --git a/python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16_loadlowbit.yaml b/python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16_loadlowbit.yaml index 5963e78c..ca7b0726 100644 --- a/python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16_loadlowbit.yaml +++ b/python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16_loadlowbit.yaml @@ -6,6 +6,10 @@ repo_id: - 'meta-llama/Llama-2-7b-chat-hf' - 'meta-llama/Llama-2-13b-chat-hf' - 'meta-llama/Meta-Llama-3-8B-Instruct' + - 'mistralai/Mistral-7B-Instruct-v0.2' + - 'openbmb/MiniCPM-1B-sft-bf16' + - 'openbmb/MiniCPM-2B-sft-bf16' + - 'deepseek-ai/deepseek-coder-7b-instruct-v1.5' local_model_hub: 'path to your local model hub' warm_up: 1 num_trials: 3 diff --git a/python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16_loadlowbit_437.yaml b/python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16_loadlowbit_437.yaml index 8615b984..4401207c 100644 --- a/python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16_loadlowbit_437.yaml +++ b/python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16_loadlowbit_437.yaml @@ -2,6 +2,7 @@ repo_id: - 'Qwen/Qwen1.5-7B-Chat' - 'Qwen/Qwen2-7B-Instruct' - 'microsoft/Phi-3-mini-4k-instruct' + - 'microsoft/Phi-3-mini-128k-instruct' local_model_hub: 'path to your local model hub' warm_up: 1 num_trials: 3 diff --git a/python/llm/test/benchmark/igpu-perf/2048-256_int4_fp16.yaml b/python/llm/test/benchmark/igpu-perf/2048-256_int4_fp16.yaml index 0669bf2f..3ce342f0 100644 --- a/python/llm/test/benchmark/igpu-perf/2048-256_int4_fp16.yaml +++ b/python/llm/test/benchmark/igpu-perf/2048-256_int4_fp16.yaml @@ -6,6 +6,10 @@ repo_id: - 'meta-llama/Llama-2-7b-chat-hf' - 'meta-llama/Llama-2-13b-chat-hf' - 'meta-llama/Meta-Llama-3-8B-Instruct' + - 'mistralai/Mistral-7B-Instruct-v0.2' + - 'openbmb/MiniCPM-1B-sft-bf16' + - 'openbmb/MiniCPM-2B-sft-bf16' + - 'deepseek-ai/deepseek-coder-7b-instruct-v1.5' local_model_hub: 'path to your local model hub' warm_up: 1 num_trials: 3 diff --git a/python/llm/test/benchmark/igpu-perf/2048-256_int4_fp16_437.yaml b/python/llm/test/benchmark/igpu-perf/2048-256_int4_fp16_437.yaml index 6e2caeb6..f9ae8540 100644 --- a/python/llm/test/benchmark/igpu-perf/2048-256_int4_fp16_437.yaml +++ b/python/llm/test/benchmark/igpu-perf/2048-256_int4_fp16_437.yaml @@ -2,6 +2,7 @@ repo_id: - 'Qwen/Qwen1.5-7B-Chat' - 'Qwen/Qwen2-7B-Instruct' - 'microsoft/Phi-3-mini-4k-instruct' + - 'microsoft/Phi-3-mini-128k-instruct' local_model_hub: 'path to your local model hub' warm_up: 1 num_trials: 3 diff --git a/python/llm/test/benchmark/igpu-perf/32-32_int4_fp16.yaml b/python/llm/test/benchmark/igpu-perf/32-32_int4_fp16.yaml index 995bcff9..ff7c38a3 100644 --- a/python/llm/test/benchmark/igpu-perf/32-32_int4_fp16.yaml +++ b/python/llm/test/benchmark/igpu-perf/32-32_int4_fp16.yaml @@ -6,6 +6,10 @@ repo_id: - 'meta-llama/Llama-2-7b-chat-hf' - 'meta-llama/Llama-2-13b-chat-hf' - 'meta-llama/Meta-Llama-3-8B-Instruct' + - 'mistralai/Mistral-7B-Instruct-v0.2' + - 'openbmb/MiniCPM-1B-sft-bf16' + - 'openbmb/MiniCPM-2B-sft-bf16' + - 'deepseek-ai/deepseek-coder-7b-instruct-v1.5' local_model_hub: 'path to your local model hub' warm_up: 3 num_trials: 5 diff --git a/python/llm/test/benchmark/igpu-perf/32-32_int4_fp16_437.yaml b/python/llm/test/benchmark/igpu-perf/32-32_int4_fp16_437.yaml index 4d609662..de32d305 100644 --- a/python/llm/test/benchmark/igpu-perf/32-32_int4_fp16_437.yaml +++ b/python/llm/test/benchmark/igpu-perf/32-32_int4_fp16_437.yaml @@ -2,6 +2,7 @@ repo_id: - 'Qwen/Qwen1.5-7B-Chat' - 'Qwen/Qwen2-7B-Instruct' - 'microsoft/Phi-3-mini-4k-instruct' + - 'microsoft/Phi-3-mini-128k-instruct' local_model_hub: 'path to your local model hub' warm_up: 3 num_trials: 5