From ab1a8339901ceccb73671f3c5c5f1b1a9aa7cf89 Mon Sep 17 00:00:00 2001
From: binbin Deng <108676127+plusbang@users.noreply.github.com>
Date: Mon, 19 Jun 2023 10:25:51 +0800
Subject: [PATCH] LLM: add basic uts related to inference (#8346)

---
 .github/workflows/llm_unit_tests_linux.yml    | 10 +++
 python/llm/test/inference/test_call_models.py | 64 +++++++++++++++++++
 python/llm/test/run-llm-inference-tests.sh    | 18 ++++++
 3 files changed, 92 insertions(+)
 create mode 100644 python/llm/test/inference/test_call_models.py
 create mode 100644 python/llm/test/run-llm-inference-tests.sh

diff --git a/.github/workflows/llm_unit_tests_linux.yml b/.github/workflows/llm_unit_tests_linux.yml
index 79e6ad3b..35e1b333 100644
--- a/.github/workflows/llm_unit_tests_linux.yml
+++ b/.github/workflows/llm_unit_tests_linux.yml
@@ -94,6 +94,16 @@ jobs:
             wget --no-verbose $llm_ftp_url/${BLOOM_INT4_CKPT_PATH:1} -P $INT4_CKPT_DIR
           fi
 
+      - name: Run LLM inference test
+        shell: bash
+        run: |
+          source $CONDA_HOME/bin/activate bigdl-init-llm
+          $CONDA_HOME/bin/conda info
+          bash python/llm/test/run-llm-inference-tests.sh
+          source $CONDA_HOME/bin/deactivate
+        env:
+          ANALYTICS_ZOO_ROOT: ${{ github.workspace }}
+
       # new test steps should be added here
 
       - name: Run LLM cli test
diff --git a/python/llm/test/inference/test_call_models.py b/python/llm/test/inference/test_call_models.py
new file mode 100644
index 00000000..88b77cfc
--- /dev/null
+++ b/python/llm/test/inference/test_call_models.py
@@ -0,0 +1,64 @@
+#
+# Copyright 2016 The BigDL Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+
+from bigdl.llm.models import Llama, Bloom, Gptneox
+from bigdl.llm.utils import get_avx_flags
+import pytest
+from unittest import TestCase
+import os
+
+
+class Test_Models_Basics(TestCase):
+
+    def setUp(self):
+        self.llama_model_path = os.environ.get('LLAMA_INT4_CKPT_PATH')
+        self.bloom_model_path = os.environ.get('BLOOM_INT4_CKPT_PATH')
+        self.gptneox_model_path = os.environ.get('GPTNEOX_INT4_CKPT_PATH')
+
+    def test_llama_completion_success(self):
+        llm = Llama(self.llama_model_path)
+        output = llm("What is the capital of France?", max_tokens=32, stream=False)
+        # assert "Paris" in output['choices'][0]['text']
+
+    def test_llama_completion_with_stream_success(self):
+        llm = Llama(self.llama_model_path)
+        output = llm("What is the capital of France?", max_tokens=32, stream=True)
+
+    def test_bloom_completion_success(self):
+        llm = Bloom(self.bloom_model_path)
+        output = llm("What is the capital of France?", max_tokens=32, stream=False)
+        # avx = get_avx_flags()
+        # if avx == "_avx512":
+        #     # For platforms without avx512, the current text completion may output gibberish
+        #     assert "Paris" in output['choices'][0]['text']
+
+    def test_bloom_completion_with_stream_success(self):
+        llm = Bloom(self.bloom_model_path)
+        output = llm("What is the capital of France?", max_tokens=32, stream=True)
+
+    def test_gptneox_completion_success(self):
+        llm = Gptneox(self.gptneox_model_path)
+        output = llm("Q: What is the capital of France? A:", max_tokens=32, stream=False)
+        # assert "Paris" in output['choices'][0]['text']
+
+    def test_gptneox_completion_with_stream_success(self):
+        llm = Gptneox(self.gptneox_model_path)
+        output = llm("Q: What is the capital of France? A:", max_tokens=32, stream=True)
+
+
+if __name__ == '__main__':
+    pytest.main([__file__])
diff --git a/python/llm/test/run-llm-inference-tests.sh b/python/llm/test/run-llm-inference-tests.sh
new file mode 100644
index 00000000..9d535eb8
--- /dev/null
+++ b/python/llm/test/run-llm-inference-tests.sh
@@ -0,0 +1,18 @@
+#!/bin/bash
+
+export ANALYTICS_ZOO_ROOT=${ANALYTICS_ZOO_ROOT}
+export LLM_HOME=${ANALYTICS_ZOO_ROOT}/python/llm/src
+export LLM_INFERENCE_TEST_DIR=${ANALYTICS_ZOO_ROOT}/python/llm/test/inference
+
+set -e
+
+echo "# Start testing inference"
+start=$(date "+%s")
+
+python -m pytest -s ${LLM_INFERENCE_TEST_DIR}
+
+now=$(date "+%s")
+time=$((now-start))
+
+echo "Bigdl-llm tests finished"
+echo "Time used:$time seconds"