From 1eaaace2dc072d0ece3e798119e8a6596a59479a Mon Sep 17 00:00:00 2001 From: Yuwen Hu <54161268+Oscilloscope98@users.noreply.github.com> Date: Fri, 26 Jan 2024 16:46:36 +0800 Subject: [PATCH] Update perf test all-in-one config for batch_size arg (#10012) --- python/llm/test/benchmark/arc-perf-test.yaml | 1 + python/llm/test/benchmark/arc-perf-transformers-434.yaml | 1 + python/llm/test/benchmark/core-perf-test.yaml | 1 + python/llm/test/benchmark/cpu-perf-test.yaml | 1 + python/llm/test/benchmark/igpu-perf/1024-128.yaml | 1 + python/llm/test/benchmark/igpu-perf/1024-128_434.yaml | 1 + python/llm/test/benchmark/igpu-perf/2048-256.yaml | 1 + python/llm/test/benchmark/igpu-perf/2048-256_434.yaml | 1 + python/llm/test/benchmark/igpu-perf/32-32.yaml | 1 + python/llm/test/benchmark/igpu-perf/32-32_434.yaml | 1 + python/llm/test/benchmark/igpu-perf/32-512.yaml | 1 + python/llm/test/benchmark/igpu-perf/32-512_434.yaml | 1 + python/llm/test/benchmark/stable-version-arc-perf-test-fp8.yaml | 1 + .../test/benchmark/stable-version-arc-perf-test-sym_int4.yaml | 1 + .../llm/test/benchmark/stable-version-arc-stress-test-fp8.yaml | 1 + .../test/benchmark/stable-version-arc-stress-test-sym_int4.yaml | 1 + python/llm/test/benchmark/stable-version-cpu-perf-test.yaml | 1 + python/llm/test/benchmark/stable-version-cpu-stress-test.yaml | 1 + 18 files changed, 18 insertions(+) diff --git a/python/llm/test/benchmark/arc-perf-test.yaml b/python/llm/test/benchmark/arc-perf-test.yaml index 61b8d454..25e26281 100644 --- a/python/llm/test/benchmark/arc-perf-test.yaml +++ b/python/llm/test/benchmark/arc-perf-test.yaml @@ -22,6 +22,7 @@ warm_up: 1 num_trials: 3 num_beams: 1 # default to greedy search low_bit: 'sym_int4' # default to use 'sym_int4' (i.e. symmetric int4) +batch_size: 1 # default to 1 in_out_pairs: - '32-32' - '1024-128' diff --git a/python/llm/test/benchmark/arc-perf-transformers-434.yaml b/python/llm/test/benchmark/arc-perf-transformers-434.yaml index 1b97a044..1389e44a 100644 --- a/python/llm/test/benchmark/arc-perf-transformers-434.yaml +++ b/python/llm/test/benchmark/arc-perf-transformers-434.yaml @@ -6,6 +6,7 @@ warm_up: 1 num_trials: 3 num_beams: 1 # default to greedy search low_bit: 'sym_int4' # default to use 'sym_int4' (i.e. symmetric int4) +batch_size: 1 # default to 1 in_out_pairs: - '32-32' - '1024-128' diff --git a/python/llm/test/benchmark/core-perf-test.yaml b/python/llm/test/benchmark/core-perf-test.yaml index 9511da40..e922d218 100644 --- a/python/llm/test/benchmark/core-perf-test.yaml +++ b/python/llm/test/benchmark/core-perf-test.yaml @@ -13,6 +13,7 @@ warm_up: 1 num_trials: 3 num_beams: 1 # default to greedy search low_bit: 'sym_int4' # default to use 'sym_int4' (i.e. symmetric int4) +batch_size: 1 # default to 1 in_out_pairs: - '32-32' - '1024-128' diff --git a/python/llm/test/benchmark/cpu-perf-test.yaml b/python/llm/test/benchmark/cpu-perf-test.yaml index 25fa058d..502c693d 100644 --- a/python/llm/test/benchmark/cpu-perf-test.yaml +++ b/python/llm/test/benchmark/cpu-perf-test.yaml @@ -5,6 +5,7 @@ warm_up: 1 num_trials: 3 num_beams: 1 # default to greedy search low_bit: 'sym_int4' # default to use 'sym_int4' (i.e. symmetric int4) +batch_size: 1 # default to 1 in_out_pairs: - '32-32' - '1024-128' diff --git a/python/llm/test/benchmark/igpu-perf/1024-128.yaml b/python/llm/test/benchmark/igpu-perf/1024-128.yaml index 55b3bb8e..a41505fa 100644 --- a/python/llm/test/benchmark/igpu-perf/1024-128.yaml +++ b/python/llm/test/benchmark/igpu-perf/1024-128.yaml @@ -18,6 +18,7 @@ warm_up: 1 num_trials: 3 num_beams: 1 # default to greedy search low_bit: 'sym_int4' # default to use 'sym_int4' (i.e. symmetric int4) +batch_size: 1 # default to 1 in_out_pairs: - '1024-128' test_api: diff --git a/python/llm/test/benchmark/igpu-perf/1024-128_434.yaml b/python/llm/test/benchmark/igpu-perf/1024-128_434.yaml index 5c8422ef..b4b1e9b7 100644 --- a/python/llm/test/benchmark/igpu-perf/1024-128_434.yaml +++ b/python/llm/test/benchmark/igpu-perf/1024-128_434.yaml @@ -5,6 +5,7 @@ warm_up: 1 num_trials: 3 num_beams: 1 # default to greedy search low_bit: 'sym_int4' # default to use 'sym_int4' (i.e. symmetric int4) +batch_size: 1 # default to 1 in_out_pairs: - '1024-128' test_api: diff --git a/python/llm/test/benchmark/igpu-perf/2048-256.yaml b/python/llm/test/benchmark/igpu-perf/2048-256.yaml index 790530d3..b1080c87 100644 --- a/python/llm/test/benchmark/igpu-perf/2048-256.yaml +++ b/python/llm/test/benchmark/igpu-perf/2048-256.yaml @@ -18,6 +18,7 @@ warm_up: 1 num_trials: 3 num_beams: 1 # default to greedy search low_bit: 'sym_int4' # default to use 'sym_int4' (i.e. symmetric int4) +batch_size: 1 # default to 1 in_out_pairs: - '2048-256' test_api: diff --git a/python/llm/test/benchmark/igpu-perf/2048-256_434.yaml b/python/llm/test/benchmark/igpu-perf/2048-256_434.yaml index 693c48b0..b16e5493 100644 --- a/python/llm/test/benchmark/igpu-perf/2048-256_434.yaml +++ b/python/llm/test/benchmark/igpu-perf/2048-256_434.yaml @@ -5,6 +5,7 @@ warm_up: 1 num_trials: 3 num_beams: 1 # default to greedy search low_bit: 'sym_int4' # default to use 'sym_int4' (i.e. symmetric int4) +batch_size: 1 # default to 1 in_out_pairs: - '2048-256' test_api: diff --git a/python/llm/test/benchmark/igpu-perf/32-32.yaml b/python/llm/test/benchmark/igpu-perf/32-32.yaml index 4a91fd43..b925ebd0 100644 --- a/python/llm/test/benchmark/igpu-perf/32-32.yaml +++ b/python/llm/test/benchmark/igpu-perf/32-32.yaml @@ -18,6 +18,7 @@ warm_up: 3 num_trials: 5 num_beams: 1 # default to greedy search low_bit: 'sym_int4' # default to use 'sym_int4' (i.e. symmetric int4) +batch_size: 1 # default to 1 in_out_pairs: - '32-32' test_api: diff --git a/python/llm/test/benchmark/igpu-perf/32-32_434.yaml b/python/llm/test/benchmark/igpu-perf/32-32_434.yaml index 8e23f29a..6b5c4229 100644 --- a/python/llm/test/benchmark/igpu-perf/32-32_434.yaml +++ b/python/llm/test/benchmark/igpu-perf/32-32_434.yaml @@ -5,6 +5,7 @@ warm_up: 3 num_trials: 5 num_beams: 1 # default to greedy search low_bit: 'sym_int4' # default to use 'sym_int4' (i.e. symmetric int4) +batch_size: 1 # default to 1 in_out_pairs: - '32-32' test_api: diff --git a/python/llm/test/benchmark/igpu-perf/32-512.yaml b/python/llm/test/benchmark/igpu-perf/32-512.yaml index b04c659c..29a2eff0 100644 --- a/python/llm/test/benchmark/igpu-perf/32-512.yaml +++ b/python/llm/test/benchmark/igpu-perf/32-512.yaml @@ -18,6 +18,7 @@ warm_up: 1 num_trials: 3 num_beams: 1 # default to greedy search low_bit: 'sym_int4' # default to use 'sym_int4' (i.e. symmetric int4) +batch_size: 1 # default to 1 in_out_pairs: - '32-512' test_api: diff --git a/python/llm/test/benchmark/igpu-perf/32-512_434.yaml b/python/llm/test/benchmark/igpu-perf/32-512_434.yaml index 4a5d316d..a9d6d62a 100644 --- a/python/llm/test/benchmark/igpu-perf/32-512_434.yaml +++ b/python/llm/test/benchmark/igpu-perf/32-512_434.yaml @@ -5,6 +5,7 @@ warm_up: 1 num_trials: 3 num_beams: 1 # default to greedy search low_bit: 'sym_int4' # default to use 'sym_int4' (i.e. symmetric int4) +batch_size: 1 # default to 1 in_out_pairs: - '32-512' test_api: diff --git a/python/llm/test/benchmark/stable-version-arc-perf-test-fp8.yaml b/python/llm/test/benchmark/stable-version-arc-perf-test-fp8.yaml index 8dec7cbb..db44e31a 100644 --- a/python/llm/test/benchmark/stable-version-arc-perf-test-fp8.yaml +++ b/python/llm/test/benchmark/stable-version-arc-perf-test-fp8.yaml @@ -9,6 +9,7 @@ warm_up: 1 num_trials: 3 num_beams: 1 # default to greedy search low_bit: 'fp8' # default to use 'sym_int4' (i.e. symmetric int4) +batch_size: 1 # default to 1 in_out_pairs: - '32-32' - '1024-128' diff --git a/python/llm/test/benchmark/stable-version-arc-perf-test-sym_int4.yaml b/python/llm/test/benchmark/stable-version-arc-perf-test-sym_int4.yaml index 9eeab8c8..4c8b0904 100644 --- a/python/llm/test/benchmark/stable-version-arc-perf-test-sym_int4.yaml +++ b/python/llm/test/benchmark/stable-version-arc-perf-test-sym_int4.yaml @@ -9,6 +9,7 @@ warm_up: 1 num_trials: 3 num_beams: 1 # default to greedy search low_bit: 'sym_int4' # default to use 'sym_int4' (i.e. symmetric int4) +batch_size: 1 # default to 1 in_out_pairs: - '32-32' - '1024-128' diff --git a/python/llm/test/benchmark/stable-version-arc-stress-test-fp8.yaml b/python/llm/test/benchmark/stable-version-arc-stress-test-fp8.yaml index 7e7467ab..bc64ad92 100644 --- a/python/llm/test/benchmark/stable-version-arc-stress-test-fp8.yaml +++ b/python/llm/test/benchmark/stable-version-arc-stress-test-fp8.yaml @@ -9,6 +9,7 @@ warm_up: 10 num_trials: 100 num_beams: 1 # default to greedy search low_bit: 'fp8' # default to use 'sym_int4' (i.e. symmetric int4) +batch_size: 1 # default to 1 in_out_pairs: - '1024-512' - '2048-512' diff --git a/python/llm/test/benchmark/stable-version-arc-stress-test-sym_int4.yaml b/python/llm/test/benchmark/stable-version-arc-stress-test-sym_int4.yaml index c85b7bca..119a67e3 100644 --- a/python/llm/test/benchmark/stable-version-arc-stress-test-sym_int4.yaml +++ b/python/llm/test/benchmark/stable-version-arc-stress-test-sym_int4.yaml @@ -9,6 +9,7 @@ warm_up: 10 num_trials: 100 num_beams: 1 # default to greedy search low_bit: 'sym_int4' # default to use 'sym_int4' (i.e. symmetric int4) +batch_size: 1 # default to 1 in_out_pairs: - '1024-512' - '2048-512' diff --git a/python/llm/test/benchmark/stable-version-cpu-perf-test.yaml b/python/llm/test/benchmark/stable-version-cpu-perf-test.yaml index ff11c034..aa9158bd 100644 --- a/python/llm/test/benchmark/stable-version-cpu-perf-test.yaml +++ b/python/llm/test/benchmark/stable-version-cpu-perf-test.yaml @@ -11,6 +11,7 @@ warm_up: 1 num_trials: 3 num_beams: 1 # default to greedy search low_bit: 'sym_int4' # default to use 'sym_int4' (i.e. symmetric int4) +batch_size: 1 # default to 1 in_out_pairs: - '32-32' - '1024-128' diff --git a/python/llm/test/benchmark/stable-version-cpu-stress-test.yaml b/python/llm/test/benchmark/stable-version-cpu-stress-test.yaml index 3ca71f05..38aeb375 100644 --- a/python/llm/test/benchmark/stable-version-cpu-stress-test.yaml +++ b/python/llm/test/benchmark/stable-version-cpu-stress-test.yaml @@ -11,6 +11,7 @@ warm_up: 3 num_trials: 50 num_beams: 1 # default to greedy search low_bit: 'sym_int4' # default to use 'sym_int4' (i.e. symmetric int4) +batch_size: 1 # default to 1 in_out_pairs: - '1024-512' - '2048-512'