Enable unit test workflow on Arc (#9213)

* Add gpu workflow and a transformers API inference test * Set device-specific env variables in script instead of workflow * Fix status message --------- Co-authored-by: sgwhat <ge.song@intel.com>
2023-10-25 15:17:18 +08:00 · 2023-10-25 15:17:18 +08:00 · ab40607b87
commit ab40607b87
parent 160a1e5ee7
3 changed files with 153 additions and 0 deletions
--- a/.github/workflows/llm_unit_tests.yml
+++ b/.github/workflows/llm_unit_tests.yml
@ -192,3 +192,78 @@ jobs:
          pip install -U pandas==2.0.3
          pip install -U typing_extensions==4.5.0
          bash python/llm/test/run-llm-langchain-tests.sh
  llm-unit-test-on-arc:
    needs: llm-cpp-build
    strategy:
      fail-fast: false
      matrix:
        python-version: ["3.9"]
    runs-on: [self-hosted, llm, perf]
    env:
      OMP_NUM_THREADS: 16
      THREAD_NUM: 16
      ANALYTICS_ZOO_ROOT: ${{ github.workspace }}
    steps:
      - name: Set environment variables
        shell: bash
        run: |
          echo "LLAMA2_7B_ORIGIN_PATH=${ORIGIN_DIR}/Llama-2-7b-chat-hf" >> "$GITHUB_ENV"
          echo "CHATGLM2_6B_ORIGIN_PATH=${ORIGIN_DIR}/chatglm2-6b" >> "$GITHUB_ENV"
          echo "FALCON_7B_ORIGIN_PATH=${ORIGIN_DIR}/falcon-7b-instruct-with-patch" >> "$GITHUB_ENV"
          echo "MPT_7B_ORIGIN_PATH=${ORIGIN_DIR}/mpt-7b-chat" >> "$GITHUB_ENV"
      - name: Checkout repo
        uses: actions/checkout@v3
      - name: Set up Python ${{ matrix.python-version }}
        uses: actions/setup-python@v4
        with:
          python-version: ${{ matrix.python-version }}
      - name: Install dependencies
        shell: bash
        run: |
          python -m pip install --upgrade pip
          python -m pip install --upgrade setuptools
          python -m pip install --upgrade wheel
      - name: Download llm binary
        uses: ./.github/actions/llm/download-llm-binary
      - name: Run LLM install (all) test
        uses: ./.github/actions/llm/setup-llm-env
        with:
          extra-dependency: "xpu"
      - name: Test installed xpu version
        shell: bash
        run: |
          source /opt/intel/oneapi/setvars.sh
          bash python/llm/test/run-llm-install-tests.sh
      - name: Download LLMs
        shell: bash
        run: |
          if [ ! -d $LLAMA2_7B_ORIGIN_PATH ]; then
            echo "Directory $LLAMA2_7B_ORIGIN_PATH not found. Downloading from FTP server..."
            wget -r -nH --no-verbose --cut-dirs=1 $LLM_FTP_URL/llm/Llama-2-7b-chat-hf -P $ORIGIN_DIR
          fi
          if [ ! -d $CHATGLM2_6B_ORIGIN_PATH ]; then
            echo "Directory $CHATGLM2_6B_ORIGIN_PATH not found. Downloading from FTP server..."
            wget -r -nH --no-verbose --cut-dirs=1 $LLM_FTP_URL/llm/chatglm2-6b -P $ORIGIN_DIR
          fi
          if [ ! -d $FALCON_7B_ORIGIN_PATH ]; then
            echo "Directory $FALCON_7B_ORIGIN_PATH not found. Downloading from FTP server..."
            wget -r -nH --no-verbose --cut-dirs=1 $LLM_FTP_URL/llm/falcon-7b-instruct-with-patch -P $ORIGIN_DIR
          fi
          if [ ! -d $MPT_7B_ORIGIN_PATH ]; then
            echo "Directory $MPT_7B_ORIGIN_PATH not found. Downloading from FTP server..."
            wget -r -nH --no-verbose --cut-dirs=1 $LLM_FTP_URL/llm/mpt-7b-chat -P $ORIGIN_DIR
          fi
      - name: Run LLM inference test
        shell: bash
        run: |
          source /opt/intel/oneapi/setvars.sh
          python -m pip install expecttest
          bash python/llm/test/run-llm-inference-tests-gpu.sh
--- a/python/llm/test/inference_gpu/test_transformers_api.py
+++ b/python/llm/test/inference_gpu/test_transformers_api.py
@ -0,0 +1,52 @@
 #
 # Copyright 2016 The BigDL Authors.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
 import os
 import pytest
 from bigdl.llm.transformers import AutoModelForCausalLM, AutoModel
 from transformers import LlamaTokenizer, AutoTokenizer
 device = os.environ['DEVICE']
 print(f'Running on {device}')
 if device == 'xpu':
    import intel_extension_for_pytorch as ipex
@pytest.mark.parametrize('prompt, answer', [
    ('What is the capital of France?\n\n','Paris')
    ])
@pytest.mark.parametrize('Model, Tokenizer, model_path',[
    (AutoModelForCausalLM, LlamaTokenizer, os.environ.get('LLAMA2_7B_ORIGIN_PATH')),
    (AutoModel, AutoTokenizer, os.environ.get('CHATGLM2_6B_ORIGIN_PATH')),
    (AutoModelForCausalLM, AutoTokenizer, os.environ.get('FALCON_7B_ORIGIN_PATH')),
    ])
 def test_completion(Model, Tokenizer, model_path, prompt, answer):
    tokenizer = Tokenizer.from_pretrained(model_path, trust_remote_code=True)
    model = Model.from_pretrained(model_path,
                                load_in_4bit=True,
                                optimize_model=True,
                                trust_remote_code=True)
    model = model.to(device)
    input_ids = tokenizer.encode(prompt, return_tensors="pt").to(device)
    output = model.generate(input_ids, max_new_tokens=32)
    output_str = tokenizer.decode(output[0], skip_special_tokens=True)
    assert answer in output_str
 if __name__ == '__main__':
    pytest.main([__file__])
--- a/python/llm/test/run-llm-inference-tests-gpu.sh
+++ b/python/llm/test/run-llm-inference-tests-gpu.sh
@ -0,0 +1,26 @@
 #!/bin/bash
 export ANALYTICS_ZOO_ROOT=${ANALYTICS_ZOO_ROOT}
 export LLM_HOME=${ANALYTICS_ZOO_ROOT}/python/llm/src
 export LLM_INFERENCE_TEST_DIR=${ANALYTICS_ZOO_ROOT}/python/llm/test/inference_gpu
 export USE_XETLA=OFF
 export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
 export DEVICE='xpu'
 set -e
 echo "# Start testing inference"
 start=$(date "+%s")
 if [ -z "$THREAD_NUM" ]; then
  THREAD_NUM=2
 fi
 export OMP_NUM_THREADS=$THREAD_NUM
 pytest ${LLM_INFERENCE_TEST_DIR} -v -s 
 now=$(date "+%s")
 time=$((now-start))
 echo "Bigdl-llm gpu tests finished"
 echo "Time used:$time seconds"