LLM: add basic uts related to inference (#8346)

2023-06-19 10:25:51 +08:00 · 2023-06-19 10:25:51 +08:00 · ab1a833990
commit ab1a833990
parent daae7bd4e4
3 changed files with 92 additions and 0 deletions
--- a/.github/workflows/llm_unit_tests_linux.yml
+++ b/.github/workflows/llm_unit_tests_linux.yml
@ -94,6 +94,16 @@ jobs:
            wget --no-verbose $llm_ftp_url/${BLOOM_INT4_CKPT_PATH:1} -P $INT4_CKPT_DIR
          fi

+      - name: Run LLM inference test
+        shell: bash
+        run: |
+          source $CONDA_HOME/bin/activate bigdl-init-llm
+          $CONDA_HOME/bin/conda info
+          bash python/llm/test/run-llm-inference-tests.sh
+          source $CONDA_HOME/bin/deactivate
+        env:
+          ANALYTICS_ZOO_ROOT: ${{ github.workspace }}
+
      # new test steps should be added here

      - name: Run LLM cli test
--- a/python/llm/test/inference/test_call_models.py
+++ b/python/llm/test/inference/test_call_models.py
@ -0,0 +1,64 @@
+#
+# Copyright 2016 The BigDL Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+
+from bigdl.llm.models import Llama, Bloom, Gptneox
+from bigdl.llm.utils import get_avx_flags
+import pytest
+from unittest import TestCase
+import os
+
+
+class Test_Models_Basics(TestCase):
+
+    def setUp(self):
+        self.llama_model_path = os.environ.get('LLAMA_INT4_CKPT_PATH')
+        self.bloom_model_path = os.environ.get('BLOOM_INT4_CKPT_PATH')
+        self.gptneox_model_path = os.environ.get('GPTNEOX_INT4_CKPT_PATH')
+
+    def test_llama_completion_success(self):
+        llm = Llama(self.llama_model_path)
+        output = llm("What is the capital of France?", max_tokens=32, stream=False)
+        # assert "Paris" in output['choices'][0]['text']
+
+    def test_llama_completion_with_stream_success(self):
+        llm = Llama(self.llama_model_path)
+        output = llm("What is the capital of France?", max_tokens=32, stream=True)
+
+    def test_bloom_completion_success(self):
+        llm = Bloom(self.bloom_model_path)
+        output = llm("What is the capital of France?", max_tokens=32, stream=False)
+        # avx = get_avx_flags()
+        # if avx == "_avx512":
+        #     # For platforms without avx512, the current text completion may output gibberish
+        #     assert "Paris" in output['choices'][0]['text']
+
+    def test_bloom_completion_with_stream_success(self):
+        llm = Bloom(self.bloom_model_path)
+        output = llm("What is the capital of France?", max_tokens=32, stream=True)
+
+    def test_gptneox_completion_success(self):
+        llm = Gptneox(self.gptneox_model_path)
+        output = llm("Q: What is the capital of France? A:", max_tokens=32, stream=False)
+        # assert "Paris" in output['choices'][0]['text']
+
+    def test_gptneox_completion_with_stream_success(self):
+        llm = Gptneox(self.gptneox_model_path)
+        output = llm("Q: What is the capital of France? A:", max_tokens=32, stream=True)
+
+
+if __name__ == '__main__':
+    pytest.main([__file__])
--- a/python/llm/test/run-llm-inference-tests.sh
+++ b/python/llm/test/run-llm-inference-tests.sh
@ -0,0 +1,18 @@
+#!/bin/bash
+
+export ANALYTICS_ZOO_ROOT=${ANALYTICS_ZOO_ROOT}
+export LLM_HOME=${ANALYTICS_ZOO_ROOT}/python/llm/src
+export LLM_INFERENCE_TEST_DIR=${ANALYTICS_ZOO_ROOT}/python/llm/test/inference
+
+set -e
+
+echo "# Start testing inference"
+start=$(date "+%s")
+
+python -m pytest -s ${LLM_INFERENCE_TEST_DIR}
+
+now=$(date "+%s")
+time=$((now-start))
+
+echo "Bigdl-llm tests finished"
+echo "Time used:$time seconds"