diff --git a/python/llm/src/bigdl/llm/transformers/convert.py b/python/llm/src/bigdl/llm/transformers/convert.py
index 6d1c2d5e..5c973c6e 100644
--- a/python/llm/src/bigdl/llm/transformers/convert.py
+++ b/python/llm/src/bigdl/llm/transformers/convert.py
@@ -299,6 +299,9 @@ def _optimize_pre(model):
         logger.info("Only HuggingFace Transformers models are currently "
                     "supported for further optimizations")
         return model
+    # for rwkv models (verified RWKV/rwkv-4-world-7b)
+    if model.config.model_type == "rwkv":
+        model.rwkv._rescale_layers()
     # process NormHead module in Baichuan2 7B and 13B
     if model.config.model_type == "baichuan" and model.config.vocab_size == 125696:
         # NormHead do normalization on the weights just once at inference time.
diff --git a/python/llm/test/benchmark/32-igpu-perf-test-434.yaml b/python/llm/test/benchmark/32-igpu-perf-test-434.yaml
index 28ff8ba8..d485ece0 100644
--- a/python/llm/test/benchmark/32-igpu-perf-test-434.yaml
+++ b/python/llm/test/benchmark/32-igpu-perf-test-434.yaml
@@ -1,12 +1,14 @@
 repo_id:
   - 'mistralai/Mistral-7B-Instruct-v0.1'
 local_model_hub: 'path to your local model hub'
-warm_up: 3
-num_trials: 5
+warm_up: 1
+num_trials: 3
 num_beams: 1 # default to greedy search
 low_bit: 'sym_int4' # default to use 'sym_int4' (i.e. symmetric int4)
 in_out_pairs:
   - '32-32'
+  - '32-256'
+  - '32-512'
   # - '1024-128'
 test_api:
   # - "transformer_int4"
diff --git a/python/llm/test/benchmark/32-igpu-perf-test.yaml b/python/llm/test/benchmark/32-igpu-perf-test.yaml
index 420d5b56..81ec620c 100644
--- a/python/llm/test/benchmark/32-igpu-perf-test.yaml
+++ b/python/llm/test/benchmark/32-igpu-perf-test.yaml
@@ -11,13 +11,16 @@ repo_id:
   - 'tiiuae/falcon-7b-instruct-with-patch'
   - 'mosaicml/mpt-7b-chat'
   - 'liuhaotian/llava-v1.5-7b'
+  - 'RWKV/rwkv-4-world-7b'
 local_model_hub: 'path to your local model hub'
-warm_up: 3
-num_trials: 5
+warm_up: 1
+num_trials: 3
 num_beams: 1 # default to greedy search
 low_bit: 'sym_int4' # default to use 'sym_int4' (i.e. symmetric int4)
 in_out_pairs:
   - '32-32'
+  - '32-256'
+  - '32-512'
   # - '1024-128'
 test_api:
   # - "transformer_int4"
diff --git a/python/llm/test/benchmark/512-igpu-perf-test.yaml b/python/llm/test/benchmark/512-igpu-perf-test.yaml
index 0cede39f..91ae96b2 100644
--- a/python/llm/test/benchmark/512-igpu-perf-test.yaml
+++ b/python/llm/test/benchmark/512-igpu-perf-test.yaml
@@ -11,6 +11,7 @@ repo_id:
   - 'tiiuae/falcon-7b-instruct-with-patch'
   - 'mosaicml/mpt-7b-chat'
   # - 'liuhaotian/llava-v1.5-7b'
+  - 'RWKV/rwkv-4-world-7b'
 local_model_hub: 'path to your local model hub'
 warm_up: 1
 num_trials: 3