From 1012507a404303be64f0052c41bc82ec7f73de92 Mon Sep 17 00:00:00 2001
From: Yuwen Hu <54161268+Oscilloscope98@users.noreply.github.com>
Date: Tue, 5 Dec 2023 10:59:28 +0800
Subject: [PATCH] [LLM] Fix performance tests (#9596)

* Fix missing key for cpu_embedding

* Remove 512 as it stuck for now

* Small fix
---
 python/llm/test/benchmark/arc-perf-test.yaml             | 2 +-
 python/llm/test/benchmark/arc-perf-transformers-434.yaml | 3 ++-
 python/llm/test/benchmark/core-perf-test.yaml            | 3 ++-
 python/llm/test/benchmark/cpu-perf-test.yaml             | 3 +++
 python/llm/test/benchmark/igpu-perf-test-434.yaml        | 2 +-
 python/llm/test/benchmark/igpu-perf-test.yaml            | 2 +-
 6 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/python/llm/test/benchmark/arc-perf-test.yaml b/python/llm/test/benchmark/arc-perf-test.yaml
index adbd4c5a..82f64d04 100644
--- a/python/llm/test/benchmark/arc-perf-test.yaml
+++ b/python/llm/test/benchmark/arc-perf-test.yaml
@@ -26,4 +26,4 @@ in_out_pairs:
   - '2048-256'
 test_api:
   - "transformer_int4_gpu"  # on Intel GPU
-
+cpu_embedding: False # whether put embedding to CPU (only avaiable now for gpu win related test_api)
diff --git a/python/llm/test/benchmark/arc-perf-transformers-434.yaml b/python/llm/test/benchmark/arc-perf-transformers-434.yaml
index 8f4c24d9..1b97a044 100644
--- a/python/llm/test/benchmark/arc-perf-transformers-434.yaml
+++ b/python/llm/test/benchmark/arc-perf-transformers-434.yaml
@@ -11,4 +11,5 @@ in_out_pairs:
   - '1024-128'
   - '2048-256'
 test_api:
-  - "transformer_int4_gpu"  # on Intel GPU
\ No newline at end of file
+  - "transformer_int4_gpu"  # on Intel GPU
+cpu_embedding: False # whether put embedding to CPU (only avaiable now for gpu win related test_api)
diff --git a/python/llm/test/benchmark/core-perf-test.yaml b/python/llm/test/benchmark/core-perf-test.yaml
index deb2ca53..c71f8c4b 100644
--- a/python/llm/test/benchmark/core-perf-test.yaml
+++ b/python/llm/test/benchmark/core-perf-test.yaml
@@ -25,4 +25,5 @@ test_api:
   # - "transformer_int4_gpu"  # on Intel GPU
   # - "optimize_model_gpu"  # on Intel GPU
   # - "deepspeed_transformer_int4_cpu" # on Intel SPR Server
-
+  # - "transformer_int4_gpu_win" # on Intel GPU for Windows (catch GPU peak memory)
+cpu_embedding: False # whether put embedding to CPU (only avaiable now for gpu win related test_api)
diff --git a/python/llm/test/benchmark/cpu-perf-test.yaml b/python/llm/test/benchmark/cpu-perf-test.yaml
index 2f543dc9..25fa058d 100644
--- a/python/llm/test/benchmark/cpu-perf-test.yaml
+++ b/python/llm/test/benchmark/cpu-perf-test.yaml
@@ -16,3 +16,6 @@ test_api:
   # - "ipex_fp16_gpu" # on Intel GPU
   # - "transformer_int4_gpu"  # on Intel GPU
   # - "optimize_model_gpu"  # on Intel GPU
+  # - "deepspeed_transformer_int4_cpu" # on Intel SPR Server
+  # - "transformer_int4_gpu_win" # on Intel GPU for Windows (catch GPU peak memory)
+cpu_embedding: False # whether put embedding to CPU (only avaiable now for gpu win related test_api)
diff --git a/python/llm/test/benchmark/igpu-perf-test-434.yaml b/python/llm/test/benchmark/igpu-perf-test-434.yaml
index 101462c7..05ce879a 100644
--- a/python/llm/test/benchmark/igpu-perf-test-434.yaml
+++ b/python/llm/test/benchmark/igpu-perf-test-434.yaml
@@ -7,7 +7,7 @@ num_beams: 1 # default to greedy search
 low_bit: 'sym_int4' # default to use 'sym_int4' (i.e. symmetric int4)
 in_out_pairs:
   - '32-32'
-  - '512-64'
+  # - '512-64'
   # - '1024-128'
 test_api:
   # - "transformer_int4"
diff --git a/python/llm/test/benchmark/igpu-perf-test.yaml b/python/llm/test/benchmark/igpu-perf-test.yaml
index aaa40b79..92720d26 100644
--- a/python/llm/test/benchmark/igpu-perf-test.yaml
+++ b/python/llm/test/benchmark/igpu-perf-test.yaml
@@ -18,7 +18,7 @@ num_beams: 1 # default to greedy search
 low_bit: 'sym_int4' # default to use 'sym_int4' (i.e. symmetric int4)
 in_out_pairs:
   - '32-32'
-  - '512-64'
+  # - '512-64'
   # - '1024-128'
 test_api:
   # - "transformer_int4"