diff --git a/.github/workflows/llm_unit_tests.yml b/.github/workflows/llm_unit_tests.yml index 888c7952..d74965d1 100644 --- a/.github/workflows/llm_unit_tests.yml +++ b/.github/workflows/llm_unit_tests.yml @@ -192,3 +192,78 @@ jobs: pip install -U pandas==2.0.3 pip install -U typing_extensions==4.5.0 bash python/llm/test/run-llm-langchain-tests.sh + llm-unit-test-on-arc: + needs: llm-cpp-build + strategy: + fail-fast: false + matrix: + python-version: ["3.9"] + runs-on: [self-hosted, llm, perf] + env: + OMP_NUM_THREADS: 16 + THREAD_NUM: 16 + ANALYTICS_ZOO_ROOT: ${{ github.workspace }} + steps: + - name: Set environment variables + shell: bash + run: | + echo "LLAMA2_7B_ORIGIN_PATH=${ORIGIN_DIR}/Llama-2-7b-chat-hf" >> "$GITHUB_ENV" + echo "CHATGLM2_6B_ORIGIN_PATH=${ORIGIN_DIR}/chatglm2-6b" >> "$GITHUB_ENV" + echo "FALCON_7B_ORIGIN_PATH=${ORIGIN_DIR}/falcon-7b-instruct-with-patch" >> "$GITHUB_ENV" + echo "MPT_7B_ORIGIN_PATH=${ORIGIN_DIR}/mpt-7b-chat" >> "$GITHUB_ENV" + + - name: Checkout repo + uses: actions/checkout@v3 + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + + - name: Install dependencies + shell: bash + run: | + python -m pip install --upgrade pip + python -m pip install --upgrade setuptools + python -m pip install --upgrade wheel + + - name: Download llm binary + uses: ./.github/actions/llm/download-llm-binary + + - name: Run LLM install (all) test + uses: ./.github/actions/llm/setup-llm-env + with: + extra-dependency: "xpu" + + - name: Test installed xpu version + shell: bash + run: | + source /opt/intel/oneapi/setvars.sh + bash python/llm/test/run-llm-install-tests.sh + + - name: Download LLMs + shell: bash + run: | + if [ ! -d $LLAMA2_7B_ORIGIN_PATH ]; then + echo "Directory $LLAMA2_7B_ORIGIN_PATH not found. Downloading from FTP server..." + wget -r -nH --no-verbose --cut-dirs=1 $LLM_FTP_URL/llm/Llama-2-7b-chat-hf -P $ORIGIN_DIR + fi + if [ ! -d $CHATGLM2_6B_ORIGIN_PATH ]; then + echo "Directory $CHATGLM2_6B_ORIGIN_PATH not found. Downloading from FTP server..." + wget -r -nH --no-verbose --cut-dirs=1 $LLM_FTP_URL/llm/chatglm2-6b -P $ORIGIN_DIR + fi + if [ ! -d $FALCON_7B_ORIGIN_PATH ]; then + echo "Directory $FALCON_7B_ORIGIN_PATH not found. Downloading from FTP server..." + wget -r -nH --no-verbose --cut-dirs=1 $LLM_FTP_URL/llm/falcon-7b-instruct-with-patch -P $ORIGIN_DIR + fi + if [ ! -d $MPT_7B_ORIGIN_PATH ]; then + echo "Directory $MPT_7B_ORIGIN_PATH not found. Downloading from FTP server..." + wget -r -nH --no-verbose --cut-dirs=1 $LLM_FTP_URL/llm/mpt-7b-chat -P $ORIGIN_DIR + fi + + - name: Run LLM inference test + shell: bash + run: | + source /opt/intel/oneapi/setvars.sh + python -m pip install expecttest + bash python/llm/test/run-llm-inference-tests-gpu.sh diff --git a/python/llm/test/inference_gpu/test_transformers_api.py b/python/llm/test/inference_gpu/test_transformers_api.py new file mode 100644 index 00000000..69f2578d --- /dev/null +++ b/python/llm/test/inference_gpu/test_transformers_api.py @@ -0,0 +1,52 @@ +# +# Copyright 2016 The BigDL Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + + +import os +import pytest + +from bigdl.llm.transformers import AutoModelForCausalLM, AutoModel +from transformers import LlamaTokenizer, AutoTokenizer + +device = os.environ['DEVICE'] +print(f'Running on {device}') +if device == 'xpu': + import intel_extension_for_pytorch as ipex + +@pytest.mark.parametrize('prompt, answer', [ + ('What is the capital of France?\n\n','Paris') + ]) +@pytest.mark.parametrize('Model, Tokenizer, model_path',[ + (AutoModelForCausalLM, LlamaTokenizer, os.environ.get('LLAMA2_7B_ORIGIN_PATH')), + (AutoModel, AutoTokenizer, os.environ.get('CHATGLM2_6B_ORIGIN_PATH')), + (AutoModelForCausalLM, AutoTokenizer, os.environ.get('FALCON_7B_ORIGIN_PATH')), + ]) +def test_completion(Model, Tokenizer, model_path, prompt, answer): + tokenizer = Tokenizer.from_pretrained(model_path, trust_remote_code=True) + model = Model.from_pretrained(model_path, + load_in_4bit=True, + optimize_model=True, + trust_remote_code=True) + model = model.to(device) + + input_ids = tokenizer.encode(prompt, return_tensors="pt").to(device) + output = model.generate(input_ids, max_new_tokens=32) + output_str = tokenizer.decode(output[0], skip_special_tokens=True) + + assert answer in output_str + +if __name__ == '__main__': + pytest.main([__file__]) diff --git a/python/llm/test/run-llm-inference-tests-gpu.sh b/python/llm/test/run-llm-inference-tests-gpu.sh new file mode 100644 index 00000000..3d22c1cf --- /dev/null +++ b/python/llm/test/run-llm-inference-tests-gpu.sh @@ -0,0 +1,26 @@ +#!/bin/bash + +export ANALYTICS_ZOO_ROOT=${ANALYTICS_ZOO_ROOT} +export LLM_HOME=${ANALYTICS_ZOO_ROOT}/python/llm/src +export LLM_INFERENCE_TEST_DIR=${ANALYTICS_ZOO_ROOT}/python/llm/test/inference_gpu + +export USE_XETLA=OFF +export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 +export DEVICE='xpu' + +set -e + +echo "# Start testing inference" +start=$(date "+%s") + +if [ -z "$THREAD_NUM" ]; then + THREAD_NUM=2 +fi +export OMP_NUM_THREADS=$THREAD_NUM +pytest ${LLM_INFERENCE_TEST_DIR} -v -s + +now=$(date "+%s") +time=$((now-start)) + +echo "Bigdl-llm gpu tests finished" +echo "Time used:$time seconds"