diff --git a/python/llm/test/benchmark/harness_nightly/accuracy_regression.py b/python/llm/test/benchmark/harness_nightly/accuracy_regression.py
deleted file mode 100644
index ed793956..00000000
--- a/python/llm/test/benchmark/harness_nightly/accuracy_regression.py
+++ /dev/null
@@ -1,56 +0,0 @@
-#
-# Copyright 2016 The BigDL Authors.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-import json
-import sys
-import logging
-
-logging.basicConfig(level=logging.INFO)
-logger = logging.getLogger(__name__)
-
-def main(res_path, golden_path):
-    print(res_path, golden_path)
-    with open(res_path, "r") as f:
-        results = json.load(f)['results']
-        print(results)
-    
-    model_name, device, precision, task = res_path.split('/')[-5:-1]
-
-    with open(golden_path, "r") as f:
-        golden_results = json.load(f)[model_name][device][precision]
-        print(golden_results)
-
-    identical = True
-    for task in results.keys():
-
-        if task not in golden_results:
-            identical = False
-            logger.error(f"Task {task} should be updated to golden results.")
-            continue
-        task_results = results[task]
-        task_golden = golden_results[task]
-        for m in task_results.keys():
-            if m in task_golden and abs(task_results[m] - task_golden[m]) > 0.001:
-                if not m.endswith("_stderr"):
-                    identical = False
-                    logger.error(f"Different on metric '{m}' [golden acc/ current acc]: [{task_golden[m]}/{task_results[m]}]")
-                else:
-                    logger.warning(f"Diff on {m} [golden acc/ current acc]: [{task_golden[m]}/{task_results[m]}]")
-    if identical:
-        logger.info("Accuracy values are identical to golden results.")
-    else:
-        raise RuntimeError("Accuracy has changed, please check if any accuracy issue or update golden accuracy value.")
-
-main(*sys.argv[1:3])
\ No newline at end of file
diff --git a/python/llm/test/benchmark/harness_nightly/golden_results.json b/python/llm/test/benchmark/harness_nightly/golden_results.json
deleted file mode 100644
index 15300011..00000000
--- a/python/llm/test/benchmark/harness_nightly/golden_results.json
+++ /dev/null
@@ -1,57 +0,0 @@
-{
-  "stablelm-3b-4e1t": {"xpu": {
-    "mixed_fp4": {
-      "truthfulqa_mc": {"mc1": 0.24357405140758873,"mc1_stderr": 0.015026354824910782,"mc2": 0.37399115063281224,"mc2_stderr": 0.013684003173581748},
-      "arc_challenge": {
-        "acc": 0.40102389078498296,
-        "acc_stderr": 0.014322255790719869,
-        "acc_norm": 0.44283276450511944,
-        "acc_norm_stderr": 0.014515573873348897
-      }
-    },
-    "fp8": {
-      "truthfulqa_mc": {
-        "mc1": 0.24479804161566707,
-        "mc1_stderr": 0.01505186948671501,
-        "mc2": 0.3747170112957169,
-        "mc2_stderr": 0.013516983188729865
-      },
-      "arc_challenge": {
-        "acc": 0.41552901023890787,
-        "acc_stderr": 0.014401366641216377,
-        "acc_norm": 0.46245733788395904,
-        "acc_norm_stderr": 0.014570144495075581
-      }
-    }
-  }},
-"Mistral-7B-v0.1": {"xpu": {
-    "mixed_fp4": {
-      "truthfulqa_mc": {
-        "mc1": 0.2741738066095471,
-        "mc1_stderr": 0.015616518497219374,
-        "mc2": 0.4090424865843113,
-        "mc2_stderr": 0.014068835265546585
-      },
-      "arc_challenge": {
-        "acc": 0.5674061433447098,
-        "acc_stderr": 0.014478005694182528,
-        "acc_norm": 0.6023890784982935,
-        "acc_norm_stderr": 0.01430175222327954
-      }
-    },
-    "fp8": {
-      "truthfulqa_mc": {
-        "mc1": 0.2802937576499388,
-        "mc1_stderr": 0.015723139524608763,
-        "mc2": 0.4253576013662111,
-        "mc2_stderr": 0.014199215617062957
-      },
-      "arc_challenge": {
-        "acc": 0.5622866894197952,
-        "acc_stderr": 0.014497573881108283,
-        "acc_norm": 0.6032423208191127,
-        "acc_norm_stderr": 0.014296513020180646
-      }
-    }
-  }}
-}