From 1012507a404303be64f0052c41bc82ec7f73de92 Mon Sep 17 00:00:00 2001 From: Yuwen Hu <54161268+Oscilloscope98@users.noreply.github.com> Date: Tue, 5 Dec 2023 10:59:28 +0800 Subject: [PATCH] [LLM] Fix performance tests (#9596) * Fix missing key for cpu_embedding * Remove 512 as it stuck for now * Small fix --- python/llm/test/benchmark/arc-perf-test.yaml | 2 +- python/llm/test/benchmark/arc-perf-transformers-434.yaml | 3 ++- python/llm/test/benchmark/core-perf-test.yaml | 3 ++- python/llm/test/benchmark/cpu-perf-test.yaml | 3 +++ python/llm/test/benchmark/igpu-perf-test-434.yaml | 2 +- python/llm/test/benchmark/igpu-perf-test.yaml | 2 +- 6 files changed, 10 insertions(+), 5 deletions(-) diff --git a/python/llm/test/benchmark/arc-perf-test.yaml b/python/llm/test/benchmark/arc-perf-test.yaml index adbd4c5a..82f64d04 100644 --- a/python/llm/test/benchmark/arc-perf-test.yaml +++ b/python/llm/test/benchmark/arc-perf-test.yaml @@ -26,4 +26,4 @@ in_out_pairs: - '2048-256' test_api: - "transformer_int4_gpu" # on Intel GPU - +cpu_embedding: False # whether put embedding to CPU (only avaiable now for gpu win related test_api) diff --git a/python/llm/test/benchmark/arc-perf-transformers-434.yaml b/python/llm/test/benchmark/arc-perf-transformers-434.yaml index 8f4c24d9..1b97a044 100644 --- a/python/llm/test/benchmark/arc-perf-transformers-434.yaml +++ b/python/llm/test/benchmark/arc-perf-transformers-434.yaml @@ -11,4 +11,5 @@ in_out_pairs: - '1024-128' - '2048-256' test_api: - - "transformer_int4_gpu" # on Intel GPU \ No newline at end of file + - "transformer_int4_gpu" # on Intel GPU +cpu_embedding: False # whether put embedding to CPU (only avaiable now for gpu win related test_api) diff --git a/python/llm/test/benchmark/core-perf-test.yaml b/python/llm/test/benchmark/core-perf-test.yaml index deb2ca53..c71f8c4b 100644 --- a/python/llm/test/benchmark/core-perf-test.yaml +++ b/python/llm/test/benchmark/core-perf-test.yaml @@ -25,4 +25,5 @@ test_api: # - "transformer_int4_gpu" # on Intel GPU # - "optimize_model_gpu" # on Intel GPU # - "deepspeed_transformer_int4_cpu" # on Intel SPR Server - + # - "transformer_int4_gpu_win" # on Intel GPU for Windows (catch GPU peak memory) +cpu_embedding: False # whether put embedding to CPU (only avaiable now for gpu win related test_api) diff --git a/python/llm/test/benchmark/cpu-perf-test.yaml b/python/llm/test/benchmark/cpu-perf-test.yaml index 2f543dc9..25fa058d 100644 --- a/python/llm/test/benchmark/cpu-perf-test.yaml +++ b/python/llm/test/benchmark/cpu-perf-test.yaml @@ -16,3 +16,6 @@ test_api: # - "ipex_fp16_gpu" # on Intel GPU # - "transformer_int4_gpu" # on Intel GPU # - "optimize_model_gpu" # on Intel GPU + # - "deepspeed_transformer_int4_cpu" # on Intel SPR Server + # - "transformer_int4_gpu_win" # on Intel GPU for Windows (catch GPU peak memory) +cpu_embedding: False # whether put embedding to CPU (only avaiable now for gpu win related test_api) diff --git a/python/llm/test/benchmark/igpu-perf-test-434.yaml b/python/llm/test/benchmark/igpu-perf-test-434.yaml index 101462c7..05ce879a 100644 --- a/python/llm/test/benchmark/igpu-perf-test-434.yaml +++ b/python/llm/test/benchmark/igpu-perf-test-434.yaml @@ -7,7 +7,7 @@ num_beams: 1 # default to greedy search low_bit: 'sym_int4' # default to use 'sym_int4' (i.e. symmetric int4) in_out_pairs: - '32-32' - - '512-64' + # - '512-64' # - '1024-128' test_api: # - "transformer_int4" diff --git a/python/llm/test/benchmark/igpu-perf-test.yaml b/python/llm/test/benchmark/igpu-perf-test.yaml index aaa40b79..92720d26 100644 --- a/python/llm/test/benchmark/igpu-perf-test.yaml +++ b/python/llm/test/benchmark/igpu-perf-test.yaml @@ -18,7 +18,7 @@ num_beams: 1 # default to greedy search low_bit: 'sym_int4' # default to use 'sym_int4' (i.e. symmetric int4) in_out_pairs: - '32-32' - - '512-64' + # - '512-64' # - '1024-128' test_api: # - "transformer_int4"