diff --git a/python/llm/dev/benchmark/all-in-one/README.md b/python/llm/dev/benchmark/all-in-one/README.md
index 56bfc730..24b22c7f 100644
--- a/python/llm/dev/benchmark/all-in-one/README.md
+++ b/python/llm/dev/benchmark/all-in-one/README.md
@@ -1,7 +1,7 @@
 # All in One Benchmark Test
 All in one benchmark test allows users to test all the benchmarks and record them in a result CSV. Users can provide models and related information in `config.yaml`.
 
-Before running, make sure to have [bigdl-llm](../../../README.md) installed.
+Before running, make sure to have [bigdl-llm](../../../README.md) and [bigdl-nano](../../../../nano/README.md) installed.
 
 ## Config
 Config YAML file has following format
@@ -28,4 +28,10 @@ test_api:
 run `python run.py`, this will output results to `results.csv`.
 
 For SPR performance, run `bash run-spr.sh`.
-For ARC performance, run `bash run-arc.sh`
+> **Note**
+>
+> In `run-spr.sh`, we set optimal environment varaible by `source bigdl-nano-init -c`, `-c` stands for disabling jemalloc. Enabling jemalloc may lead to latency increasement after multiple trials.
+>
+> The value of `OMP_NUM_THREADS` should be the same as the cpu cores specified by `numactl -C`.
+
+For ARC performance, run `bash run-arc.sh`.
diff --git a/python/llm/dev/benchmark/all-in-one/run-spr.sh b/python/llm/dev/benchmark/all-in-one/run-spr.sh
index dd96962d..790ec242 100644
--- a/python/llm/dev/benchmark/all-in-one/run-spr.sh
+++ b/python/llm/dev/benchmark/all-in-one/run-spr.sh
@@ -1,5 +1,7 @@
 #!/bin/bash
-
+source bigdl-nano-init -c
+export OMP_NUM_THREADS=48
+export TRANSFORMERS_OFFLINE=1
 
 # set following parameters according to the actual specs of the test machine
 numactl -C 0-47 -m 0 python $(dirname "$0")/run.py
\ No newline at end of file
diff --git a/python/llm/dev/benchmark/all-in-one/run.py b/python/llm/dev/benchmark/all-in-one/run.py
index 0383f0a2..a985b233 100644
--- a/python/llm/dev/benchmark/all-in-one/run.py
+++ b/python/llm/dev/benchmark/all-in-one/run.py
@@ -116,8 +116,8 @@ def run_transformer_int4(repo_id,
         model = AutoModel.from_pretrained(model_path, load_in_4bit=True, trust_remote_code=True, torch_dtype='auto')
         tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
     else:
-        model = AutoModelForCausalLM.from_pretrained(model_path, load_in_4bit=True)
-        tokenizer = AutoTokenizer.from_pretrained(model_path)
+        model = AutoModelForCausalLM.from_pretrained(model_path, load_in_4bit=True, trust_remote_code=True)
+        tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
     end = time.perf_counter()
     print(">> loading of model costs {}s".format(end - st))