From 411d896636c223abdc16023ebf4c852d25f1257c Mon Sep 17 00:00:00 2001 From: Song Jiaming Date: Thu, 20 Jul 2023 10:16:27 +0800 Subject: [PATCH] LLM first transformers UT (#8514) * ut * transformers api first ut * name * dir issue * use chatglm instead of chatglm2 * omp * set omp in sh * source * taskset * test * test omp * add test --- .github/workflows/llm_unit_tests_linux.yml | 8 +++ .../test/inference/test_transformers_api.py | 55 +++++++++++++++++++ python/llm/test/run-llm-inference-tests.sh | 6 +- 3 files changed, 68 insertions(+), 1 deletion(-) create mode 100644 python/llm/test/inference/test_transformers_api.py diff --git a/.github/workflows/llm_unit_tests_linux.yml b/.github/workflows/llm_unit_tests_linux.yml index eea0c139..313931b5 100644 --- a/.github/workflows/llm_unit_tests_linux.yml +++ b/.github/workflows/llm_unit_tests_linux.yml @@ -41,6 +41,9 @@ env: BLOOM_INT4_CKPT_PATH: ./llm/ggml-actions/stable/bigdl_llm_bloom_7b_q4_0.bin STARCODER_INT4_CKPT_PATH: ./llm/ggml-actions/stable/bigdl_llm_santacoder_1b_q4_0.bin + LLM_DIR: ./llm + ORIGINAL_CHATGLM_6B_PATH: ./llm/chatglm-6b/ + # A workflow run is made up of one or more jobs that can run sequentially or in parallel jobs: llm-cpp-build: @@ -73,6 +76,7 @@ jobs: uses: ./.github/actions/llm/setup-llm-env env: ANALYTICS_ZOO_ROOT: ${{ github.workspace }} + OMP_NUM_THREADS: 24 - name: Download ckpt models run: | @@ -92,6 +96,10 @@ jobs: echo "Directory $STARCODER_INT4_CKPT_PATH not found. Downloading from FTP server..." wget --no-verbose $LLM_FTP_URL/${STARCODER_INT4_CKPT_PATH:1} -P $INT4_CKPT_DIR fi + if [ ! -d $ORIGINAL_CHATGLM_6B_PATH ]; then + echo "Directory $ORIGINAL_CHATGLM_6B_PATH not found. Downloading from FTP server..." + wget -r -nH --no-verbose --cut-dirs=2 $LLM_FTP_URL/${ORIGINAL_CHATGLM_6B_PATH:1} -P $LLM_DIR + fi - name: Run LLM cli test uses: ./.github/actions/llm/cli-test diff --git a/python/llm/test/inference/test_transformers_api.py b/python/llm/test/inference/test_transformers_api.py new file mode 100644 index 00000000..f0f65fde --- /dev/null +++ b/python/llm/test/inference/test_transformers_api.py @@ -0,0 +1,55 @@ +# +# Copyright 2016 The BigDL Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + + +from bigdl.llm.models import Llama, Bloom, Gptneox, Starcoder +from bigdl.llm.utils import get_avx_flags +import unittest +import os + +import time +import torch +from bigdl.llm.transformers import AutoModelForCausalLM, AutoModel +from transformers import LlamaTokenizer, AutoTokenizer + +class TestTransformersAPI(unittest.TestCase): + + def setUp(self): + thread_num = os.environ.get('THREAD_NUM') + if thread_num is not None: + self.n_threads = int(thread_num) + else: + self.n_threads = 2 + + def test_transformers_int4(self): + model_path = os.environ.get('ORIGINAL_CHATGLM_6B_PATH') + model = AutoModel.from_pretrained(model_path, trust_remote_code=True, load_in_4bit=True) + tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True) + input_str = "晚上睡不着应该怎么办" + + with torch.inference_mode(): + st = time.time() + input_ids = tokenizer.encode(input_str, return_tensors="pt") + output = model.generate(input_ids, do_sample=False, max_new_tokens=32) + output_str = tokenizer.decode(output[0], skip_special_tokens=True) + end = time.time() + print('Prompt:', input_str) + print('Output:', output_str) + print(f'Inference time: {end-st} s') + + +if __name__ == '__main__': + unittest.main() diff --git a/python/llm/test/run-llm-inference-tests.sh b/python/llm/test/run-llm-inference-tests.sh index 9d535eb8..81358339 100644 --- a/python/llm/test/run-llm-inference-tests.sh +++ b/python/llm/test/run-llm-inference-tests.sh @@ -4,12 +4,16 @@ export ANALYTICS_ZOO_ROOT=${ANALYTICS_ZOO_ROOT} export LLM_HOME=${ANALYTICS_ZOO_ROOT}/python/llm/src export LLM_INFERENCE_TEST_DIR=${ANALYTICS_ZOO_ROOT}/python/llm/test/inference +source bigdl-nano-init + set -e echo "# Start testing inference" start=$(date "+%s") -python -m pytest -s ${LLM_INFERENCE_TEST_DIR} +python -m pytest -s ${LLM_INFERENCE_TEST_DIR} -k "not test_transformers_int4" +export OMP_NUM_THREADS=24 +taskset -c 0-23 python -m pytest -s ${LLM_INFERENCE_TEST_DIR} -k test_transformers_int4 now=$(date "+%s") time=$((now-start))