From 8e35800abed1bbc1ca2e1d896410d60bf4b6a311 Mon Sep 17 00:00:00 2001 From: "Jin, Qiao" <89779290+JinBridger@users.noreply.github.com> Date: Mon, 14 Oct 2024 15:14:34 +0800 Subject: [PATCH] Add llama 3.1 in igpu perf (#12194) --- python/llm/test/benchmark/igpu-perf/1024-128_443.yaml | 1 + python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16_443.yaml | 1 + .../benchmark/igpu-perf/1024-128_int4_fp16_loadlowbit_443.yaml | 1 + python/llm/test/benchmark/igpu-perf/2048-256_int4_fp16_443.yaml | 1 + python/llm/test/benchmark/igpu-perf/3072-384_int4_fp16_443.yaml | 1 + python/llm/test/benchmark/igpu-perf/32-32_int4_fp16_443.yaml | 1 + python/llm/test/benchmark/igpu-perf/4096-512_int4_fp16_443.yaml | 1 + 7 files changed, 7 insertions(+) diff --git a/python/llm/test/benchmark/igpu-perf/1024-128_443.yaml b/python/llm/test/benchmark/igpu-perf/1024-128_443.yaml index 4667ff34..74d0cdc5 100644 --- a/python/llm/test/benchmark/igpu-perf/1024-128_443.yaml +++ b/python/llm/test/benchmark/igpu-perf/1024-128_443.yaml @@ -1,6 +1,7 @@ repo_id: - 'google/gemma-2-2b-it' - 'google/gemma-2-9b-it' + - 'meta-llama/Llama-3.1-8B-Instruct' local_model_hub: 'path to your local model hub' warm_up: 1 num_trials: 3 diff --git a/python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16_443.yaml b/python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16_443.yaml index 2f4bbd22..ec529fc8 100644 --- a/python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16_443.yaml +++ b/python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16_443.yaml @@ -1,6 +1,7 @@ repo_id: - 'google/gemma-2-2b-it' - 'google/gemma-2-9b-it' + - 'meta-llama/Llama-3.1-8B-Instruct' local_model_hub: 'path to your local model hub' warm_up: 1 num_trials: 3 diff --git a/python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16_loadlowbit_443.yaml b/python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16_loadlowbit_443.yaml index 8d8e16c5..03108573 100644 --- a/python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16_loadlowbit_443.yaml +++ b/python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16_loadlowbit_443.yaml @@ -1,6 +1,7 @@ repo_id: - 'google/gemma-2-2b-it' - 'google/gemma-2-9b-it' + - 'meta-llama/Llama-3.1-8B-Instruct' local_model_hub: 'path to your local model hub' warm_up: 1 num_trials: 3 diff --git a/python/llm/test/benchmark/igpu-perf/2048-256_int4_fp16_443.yaml b/python/llm/test/benchmark/igpu-perf/2048-256_int4_fp16_443.yaml index 3f8e554d..975d0a02 100644 --- a/python/llm/test/benchmark/igpu-perf/2048-256_int4_fp16_443.yaml +++ b/python/llm/test/benchmark/igpu-perf/2048-256_int4_fp16_443.yaml @@ -1,6 +1,7 @@ repo_id: - 'google/gemma-2-2b-it' - 'google/gemma-2-9b-it' + - 'meta-llama/Llama-3.1-8B-Instruct' local_model_hub: 'path to your local model hub' warm_up: 1 num_trials: 3 diff --git a/python/llm/test/benchmark/igpu-perf/3072-384_int4_fp16_443.yaml b/python/llm/test/benchmark/igpu-perf/3072-384_int4_fp16_443.yaml index 9e72a9e0..9127111b 100644 --- a/python/llm/test/benchmark/igpu-perf/3072-384_int4_fp16_443.yaml +++ b/python/llm/test/benchmark/igpu-perf/3072-384_int4_fp16_443.yaml @@ -1,6 +1,7 @@ repo_id: - 'google/gemma-2-2b-it' # - 'google/gemma-2-9b-it' + - 'meta-llama/Llama-3.1-8B-Instruct' local_model_hub: 'path to your local model hub' warm_up: 1 num_trials: 3 diff --git a/python/llm/test/benchmark/igpu-perf/32-32_int4_fp16_443.yaml b/python/llm/test/benchmark/igpu-perf/32-32_int4_fp16_443.yaml index a02b19b1..45a7809f 100644 --- a/python/llm/test/benchmark/igpu-perf/32-32_int4_fp16_443.yaml +++ b/python/llm/test/benchmark/igpu-perf/32-32_int4_fp16_443.yaml @@ -1,6 +1,7 @@ repo_id: - 'google/gemma-2-2b-it' - 'google/gemma-2-9b-it' + - 'meta-llama/Llama-3.1-8B-Instruct' local_model_hub: 'path to your local model hub' warm_up: 3 num_trials: 5 diff --git a/python/llm/test/benchmark/igpu-perf/4096-512_int4_fp16_443.yaml b/python/llm/test/benchmark/igpu-perf/4096-512_int4_fp16_443.yaml index 94f3ec43..8579e3c9 100644 --- a/python/llm/test/benchmark/igpu-perf/4096-512_int4_fp16_443.yaml +++ b/python/llm/test/benchmark/igpu-perf/4096-512_int4_fp16_443.yaml @@ -1,6 +1,7 @@ repo_id: - 'google/gemma-2-2b-it' # - 'google/gemma-2-9b-it' + - 'meta-llama/Llama-3.1-8B-Instruct' local_model_hub: 'path to your local model hub' warm_up: 1 num_trials: 3