LLM first transformers UT (#8514)

* ut * transformers api first ut * name * dir issue * use chatglm instead of chatglm2 * omp * set omp in sh * source * taskset * test * test omp * add test
2023-07-20 10:16:27 +08:00 · 2023-07-20 10:16:27 +08:00 · 411d896636
commit 411d896636
parent cad78740a7
3 changed files with 68 additions and 1 deletions
--- a/.github/workflows/llm_unit_tests_linux.yml
+++ b/.github/workflows/llm_unit_tests_linux.yml
@ -41,6 +41,9 @@ env:
  BLOOM_INT4_CKPT_PATH: ./llm/ggml-actions/stable/bigdl_llm_bloom_7b_q4_0.bin
  STARCODER_INT4_CKPT_PATH: ./llm/ggml-actions/stable/bigdl_llm_santacoder_1b_q4_0.bin
  LLM_DIR: ./llm
  ORIGINAL_CHATGLM_6B_PATH: ./llm/chatglm-6b/
 # A workflow run is made up of one or more jobs that can run sequentially or in parallel
 jobs:
  llm-cpp-build:
@ -73,6 +76,7 @@ jobs:
        uses: ./.github/actions/llm/setup-llm-env
        env:
          ANALYTICS_ZOO_ROOT: ${{ github.workspace }}
          OMP_NUM_THREADS: 24
      - name: Download ckpt models
        run: |
@ -92,6 +96,10 @@ jobs:
            echo "Directory $STARCODER_INT4_CKPT_PATH not found. Downloading from FTP server..."
            wget --no-verbose $LLM_FTP_URL/${STARCODER_INT4_CKPT_PATH:1} -P $INT4_CKPT_DIR
          fi
          if [ ! -d $ORIGINAL_CHATGLM_6B_PATH ]; then
            echo "Directory $ORIGINAL_CHATGLM_6B_PATH not found. Downloading from FTP server..."
            wget -r -nH --no-verbose --cut-dirs=2 $LLM_FTP_URL/${ORIGINAL_CHATGLM_6B_PATH:1} -P $LLM_DIR
          fi
      - name: Run LLM cli test
        uses: ./.github/actions/llm/cli-test
--- a/python/llm/test/inference/test_transformers_api.py
+++ b/python/llm/test/inference/test_transformers_api.py
@ -0,0 +1,55 @@
 #
 # Copyright 2016 The BigDL Authors.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
 from bigdl.llm.models import Llama, Bloom, Gptneox, Starcoder
 from bigdl.llm.utils import get_avx_flags
 import unittest
 import os
 import time
 import torch
 from bigdl.llm.transformers import AutoModelForCausalLM, AutoModel
 from transformers import LlamaTokenizer, AutoTokenizer
 class TestTransformersAPI(unittest.TestCase):
    def setUp(self):        
        thread_num = os.environ.get('THREAD_NUM')
        if thread_num is not None:
            self.n_threads = int(thread_num)
        else:
            self.n_threads = 2
    def test_transformers_int4(self):
        model_path = os.environ.get('ORIGINAL_CHATGLM_6B_PATH')
        model = AutoModel.from_pretrained(model_path, trust_remote_code=True, load_in_4bit=True)
        tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
        input_str = "晚上睡不着应该怎么办"
        with torch.inference_mode():
            st = time.time()
            input_ids = tokenizer.encode(input_str, return_tensors="pt")
            output = model.generate(input_ids, do_sample=False, max_new_tokens=32)
            output_str = tokenizer.decode(output[0], skip_special_tokens=True)
            end = time.time()
        print('Prompt:', input_str)
        print('Output:', output_str)
        print(f'Inference time: {end-st} s')    
 if __name__ == '__main__':
    unittest.main()
--- a/python/llm/test/run-llm-inference-tests.sh
+++ b/python/llm/test/run-llm-inference-tests.sh
@ -4,12 +4,16 @@ export ANALYTICS_ZOO_ROOT=${ANALYTICS_ZOO_ROOT}
 export LLM_HOME=${ANALYTICS_ZOO_ROOT}/python/llm/src
 export LLM_INFERENCE_TEST_DIR=${ANALYTICS_ZOO_ROOT}/python/llm/test/inference
 source bigdl-nano-init
 set -e
 echo "# Start testing inference"
 start=$(date "+%s")
-python -m pytest -s ${LLM_INFERENCE_TEST_DIR}
+python -m pytest -s ${LLM_INFERENCE_TEST_DIR} -k "not test_transformers_int4"
 export OMP_NUM_THREADS=24
 taskset -c 0-23 python -m pytest -s ${LLM_INFERENCE_TEST_DIR} -k test_transformers_int4
 now=$(date "+%s")
 time=$((now-start))