LLM first transformers UT (#8514)

* ut * transformers api first ut * name * dir issue * use chatglm instead of chatglm2 * omp * set omp in sh * source * taskset * test * test omp * add test
2023-07-20 10:16:27 +08:00 · 2023-07-20 10:16:27 +08:00 · 411d896636
commit 411d896636
parent cad78740a7
3 changed files with 68 additions and 1 deletions
--- a/.github/workflows/llm_unit_tests_linux.yml
+++ b/.github/workflows/llm_unit_tests_linux.yml
@ -41,6 +41,9 @@ env:
  BLOOM_INT4_CKPT_PATH: ./llm/ggml-actions/stable/bigdl_llm_bloom_7b_q4_0.bin
  STARCODER_INT4_CKPT_PATH: ./llm/ggml-actions/stable/bigdl_llm_santacoder_1b_q4_0.bin

+  LLM_DIR: ./llm
+  ORIGINAL_CHATGLM_6B_PATH: ./llm/chatglm-6b/
+
 # A workflow run is made up of one or more jobs that can run sequentially or in parallel
 jobs:
  llm-cpp-build:
@ -73,6 +76,7 @@ jobs:
        uses: ./.github/actions/llm/setup-llm-env
        env:
          ANALYTICS_ZOO_ROOT: ${{ github.workspace }}
+          OMP_NUM_THREADS: 24

      - name: Download ckpt models
        run: |
@ -92,6 +96,10 @@ jobs:
            echo "Directory $STARCODER_INT4_CKPT_PATH not found. Downloading from FTP server..."
            wget --no-verbose $LLM_FTP_URL/${STARCODER_INT4_CKPT_PATH:1} -P $INT4_CKPT_DIR
          fi
+          if [ ! -d $ORIGINAL_CHATGLM_6B_PATH ]; then
+            echo "Directory $ORIGINAL_CHATGLM_6B_PATH not found. Downloading from FTP server..."
+            wget -r -nH --no-verbose --cut-dirs=2 $LLM_FTP_URL/${ORIGINAL_CHATGLM_6B_PATH:1} -P $LLM_DIR
+          fi

      - name: Run LLM cli test
        uses: ./.github/actions/llm/cli-test
--- a/python/llm/test/inference/test_transformers_api.py
+++ b/python/llm/test/inference/test_transformers_api.py
@ -0,0 +1,55 @@
+#
+# Copyright 2016 The BigDL Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+
+from bigdl.llm.models import Llama, Bloom, Gptneox, Starcoder
+from bigdl.llm.utils import get_avx_flags
+import unittest
+import os
+
+import time
+import torch
+from bigdl.llm.transformers import AutoModelForCausalLM, AutoModel
+from transformers import LlamaTokenizer, AutoTokenizer
+
+class TestTransformersAPI(unittest.TestCase):
+
+    def setUp(self):        
+        thread_num = os.environ.get('THREAD_NUM')
+        if thread_num is not None:
+            self.n_threads = int(thread_num)
+        else:
+            self.n_threads = 2
+
+    def test_transformers_int4(self):
+        model_path = os.environ.get('ORIGINAL_CHATGLM_6B_PATH')
+        model = AutoModel.from_pretrained(model_path, trust_remote_code=True, load_in_4bit=True)
+        tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
+        input_str = "晚上睡不着应该怎么办"
+
+        with torch.inference_mode():
+            st = time.time()
+            input_ids = tokenizer.encode(input_str, return_tensors="pt")
+            output = model.generate(input_ids, do_sample=False, max_new_tokens=32)
+            output_str = tokenizer.decode(output[0], skip_special_tokens=True)
+            end = time.time()
+        print('Prompt:', input_str)
+        print('Output:', output_str)
+        print(f'Inference time: {end-st} s')    
+
+
+if __name__ == '__main__':
+    unittest.main()
--- a/python/llm/test/run-llm-inference-tests.sh
+++ b/python/llm/test/run-llm-inference-tests.sh
@ -4,12 +4,16 @@ export ANALYTICS_ZOO_ROOT=${ANALYTICS_ZOO_ROOT}
 export LLM_HOME=${ANALYTICS_ZOO_ROOT}/python/llm/src
 export LLM_INFERENCE_TEST_DIR=${ANALYTICS_ZOO_ROOT}/python/llm/test/inference

+source bigdl-nano-init
+
 set -e

 echo "# Start testing inference"
 start=$(date "+%s")

-python -m pytest -s ${LLM_INFERENCE_TEST_DIR}
+python -m pytest -s ${LLM_INFERENCE_TEST_DIR} -k "not test_transformers_int4"
+export OMP_NUM_THREADS=24
+taskset -c 0-23 python -m pytest -s ${LLM_INFERENCE_TEST_DIR} -k test_transformers_int4

 now=$(date "+%s")
 time=$((now-start))