diff --git a/python/llm/dev/benchmark/all-in-one/run.py b/python/llm/dev/benchmark/all-in-one/run.py
index 16b825dc..00d6161c 100644
--- a/python/llm/dev/benchmark/all-in-one/run.py
+++ b/python/llm/dev/benchmark/all-in-one/run.py
@@ -365,12 +365,17 @@ def run_transformer_int4_gpu(repo_id,
     # Load model in 4 bit,
     # which convert the relevant layers in the model into INT4 format
     st = time.perf_counter()
-    if repo_id in CHATGLM_IDS:
-        model = AutoModel.from_pretrained(model_path, load_in_low_bit=low_bit, optimize_model=True,
-                                          trust_remote_code=True, use_cache=True).eval()
+    origin_repo_id = repo_id.replace("-4bit", "")
+    if origin_repo_id in CHATGLM_IDS:
+        if "4bit" in repo_id:
+            model = AutoModel.load_low_bit(model_path, optimize_model=True,
+                                            trust_remote_code=True, use_cache=True).eval()  
+        else:
+            model = AutoModel.from_pretrained(model_path, load_in_low_bit=low_bit, optimize_model=True,
+                                            trust_remote_code=True, use_cache=True).eval()
         tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
         model = model.to('xpu')
-    elif repo_id in LLAMA_IDS:
+    elif origin_repo_id in LLAMA_IDS:
         model = AutoModelForCausalLM.from_pretrained(model_path, load_in_low_bit=low_bit, trust_remote_code=True,
                                                      use_cache=True).eval()
         tokenizer = LlamaTokenizer.from_pretrained(model_path, trust_remote_code=True)
diff --git a/python/llm/test/benchmark/arc-perf-test.yaml b/python/llm/test/benchmark/arc-perf-test.yaml
index fdf7adac..61b8d454 100644
--- a/python/llm/test/benchmark/arc-perf-test.yaml
+++ b/python/llm/test/benchmark/arc-perf-test.yaml
@@ -2,6 +2,7 @@ repo_id:
   - 'meta-llama/Llama-2-7b-chat-hf'
   - 'meta-llama/Llama-2-13b-chat-hf'
   - 'THUDM/chatglm2-6b'
+  - 'THUDM/chatglm3-6b-4bit'
   - 'tiiuae/falcon-7b-instruct-with-patch'
   - 'mosaicml/mpt-7b-chat'
   - 'redpajama/gptneox-7b-redpajama-bf16'