[LLM] Add Actions for downloading & converting models (#8320)

* First push to downloading and converting llm models for testing (Gondolin runner, avx2 for now) * Change yml file name
2023-06-15 13:43:47 +08:00 · 2023-06-15 13:43:47 +08:00 · b30aa49c4e
commit b30aa49c4e
parent bc11a2b1cd
5 changed files with 103 additions and 12 deletions
--- a/.github/workflows/llm_unit_tests_basic_linux.yml
+++ b/.github/workflows/llm_unit_tests_basic_linux.yml
@ -12,21 +12,30 @@ on:
    branches: [ main ]
    paths:
      - 'python/llm/**'
-      - '.github/workflows/llm_unit_tests_basic_linux.yml'
+      - '.github/workflows/llm_unit_tests_linux.yml'
  pull_request:
    branches: [ main ]
    paths:
      - 'python/llm/**'
-      - '.github/workflows/llm_unit_tests_basic_linux.yml'
+      - '.github/workflows/llm_unit_tests_linux.yml'
 # A workflow run is made up of one or more jobs that can run sequentially or in parallel
 jobs:
-  llm-unit-test-basic-linux:
+  llm-unit-test-linux:
    runs-on: [ self-hosted, Gondolin, ubuntu-20.04-lts ]
    strategy:
      fail-fast: false
      matrix:
        python-version: ["3.9"]
    env:
      ORIGIN_DIR: ./llm/models
      LLAMA_ORIGIN_PATH: ./llm/models/llama-7b-hf
      GPTNEOX_ORIGIN_PATH: ./llm/models/gptneox-7b-redpajama-bf16
      BLOOM_ORIGIN_PATH: ./llm/models/bloomz-7b1
      INT4_CKPT_DIR: ./llm/ggml
      LLAMA_INT4_CKPT_PATH: ./llm/ggml/bigdl_llm_llama_q4_0.bin
      GPTNEOX_INT4_CKPT_PATH: ./llm/ggml/bigdl_llm_gptneox_q4_0.bin
      BLOOM_INT4_CKPT_PATH: ./llm/ggml/bigdl_llm_bloom_q4_0.bin
    steps:
      - uses: actions/checkout@v2
      - name: Set up Python ${{ matrix.python-version }}
@ -54,7 +63,25 @@ jobs:
        env:
          ANALYTICS_ZOO_ROOT: ${{ github.workspace }}
-      - name: Run LLM naive installation test
+      - name: Download original models
        env:
          FTP_USERNAME: ${{ secrets.FTP_USERNAME }} 
          FTP_PASSWORD: ${{ secrets.FTP_PASSWORD }}
        run: |
          if [ ! -d $LLAMA_ORIGIN_PATH ]; then
            echo "Directory $LLAMA_ORIGIN_PATH not found. Downloading from FTP server..."
            wget -r -nH --no-verbose --cut-dirs=1 --ftp-user=$FTP_USERNAME --ftp-password=$FTP_PASSWORD ftp://10.112.231.51:8821/llm/llama-7b-hf -P $ORIGIN_DIR
          fi
          if [ ! -d $GPTNEOX_ORIGIN_PATH ]; then
            echo "Directory $GPTNEOX_ORIGIN_PATH not found. Downloading from FTP server..."
            wget -r -nH --no-verbose --cut-dirs=1 --ftp-user=$FTP_USERNAME --ftp-password=$FTP_PASSWORD ftp://10.112.231.51:8821/llm/gptneox-7b-redpajama-bf16 -P $ORIGIN_DIR
          fi
          if [ ! -d $BLOOM_ORIGIN_PATH ]; then
            echo "Directory $BLOOM_ORIGIN_PATH not found. Downloading from FTP server..."
            wget -r -nH --no-verbose --cut-dirs=1 --ftp-user=$FTP_USERNAME --ftp-password=$FTP_PASSWORD ftp://10.112.231.51:8821/llm/bloomz-7b1 -P $ORIGIN_DIR
          fi
      - name: Run LLM basic test (native install & convert)
        shell: bash
        run: |
          $CONDA_HOME/bin/conda env remove -y -n bigdl-init-llm || true
@ -71,3 +98,5 @@ jobs:
          $CONDA_HOME/bin/conda remove -n bigdl-init-llm --all
        env:
          ANALYTICS_ZOO_ROOT: ${{ github.workspace }}
      # new test steps should be added here
--- a/.github/workflows/llm_unit_tests_basic_windows.yml
+++ b/.github/workflows/llm_unit_tests_basic_windows.yml
@ -12,16 +12,16 @@ on:
    branches: [ main ]
    paths:
      - 'python/llm/**'
-      - '.github/workflows/llm_unit_tests_basic_windows.yml'
+      - '.github/workflows/llm_unit_tests_windows.yml'
  pull_request:
    branches: [ main ]
    paths:
      - 'python/llm/**'
-      - '.github/workflows/llm_unit_tests_basic_windows.yml'
+      - '.github/workflows/llm_unit_tests_windows.yml'
 # A workflow run is made up of one or more jobs that can run sequentially or in parallel
 jobs:
-  llm-unit-test-basic-windows:
+  llm-unit-test-windows:
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
@ -49,7 +49,7 @@ jobs:
        env:
          ANALYTICS_ZOO_ROOT: ${{ github.workspace }}
-      - name: Run LLM naive installation test
+      - name: Run LLM basic test (naive installation)
        shell: bash
        run: |
          pip install requests
@ -57,6 +57,6 @@ jobs:
          whl_name=$(ls python/llm/dist)
          pip install -i https://pypi.python.org/simple "python/llm/dist/${whl_name}[all]"
          pip install pytest
-          bash python/llm/test/run-llm-basic-tests.sh
+          bash python/llm/test/run-llm-basic-tests.sh windows
        env:
          ANALYTICS_ZOO_ROOT: ${{ github.workspace }}
--- a/python/llm/test/basic/convert/test_convert_model.py
+++ b/python/llm/test/basic/convert/test_convert_model.py
@ -0,0 +1,54 @@
 #
 # Copyright 2016 The BigDL Authors.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
 import pytest
 import os
 from unittest import TestCase
 from bigdl.llm.ggml import convert_model
 llama_model_path = os.environ.get('LLAMA_ORIGIN_PATH')
 gptneox_model_path = os.environ.get('GPTNEOX_ORIGIN_PATH')
 bloom_model_path = os.environ.get('BLOOM_ORIGIN_PATH')
 output_dir = os.environ.get('INT4_CKPT_DIR')
 class TestConvertModel(TestCase):
    def test_convert_llama(self):
        converted_model_path = convert_model(input_path=llama_model_path,
                                             output_path=output_dir,
                                             model_family='llama',
                                             dtype='int4')
        assert os.path.isfile(converted_model_path)
    def test_convert_gptneox(self):
        converted_model_path = convert_model(input_path=gptneox_model_path,
                                             output_path=output_dir,
                                             model_family='gptneox',
                                             dtype='int4')
        assert os.path.isfile(converted_model_path)
    def test_convert_bloom(self):
        converted_model_path = convert_model(input_path=bloom_model_path,
                                             output_path=output_dir,
                                             model_family='bloom',
                                             dtype='int4')
        assert os.path.isfile(converted_model_path)
 if __name__ == '__main__':
    pytest.main([__file__])
--- a/python/llm/test/basic/install/test_install.py
+++ b/python/llm/test/basic/install/test_install.py
--- a/python/llm/test/run-llm-basic-tests.sh
+++ b/python/llm/test/run-llm-basic-tests.sh
@ -2,14 +2,22 @@
 export ANALYTICS_ZOO_ROOT=${ANALYTICS_ZOO_ROOT}
 export LLM_HOME=${ANALYTICS_ZOO_ROOT}/python/llm/src
-export LLM_BASIC_TEST_DIR=${ANALYTICS_ZOO_ROOT}/python/llm/test/packaging
+export LLM_BASIC_TEST_DIR=${ANALYTICS_ZOO_ROOT}/python/llm/test/basic
 set -e
 # ipex is not installed here. Any tests needs ipex should be moved to next pytest command.
 echo "# Start testing"
 start=$(date "+%s")
-python -m pytest -s ${LLM_BASIC_TEST_DIR}
+
 echo "test install"
 python -m pytest -s ${LLM_BASIC_TEST_DIR}/install
 # TODO: supports tests on windows
 platform=$1
 if [[ $1 != "windows" ]]; then
  echo "test convert model"
  python -m pytest -s ${LLM_BASIC_TEST_DIR}/convert
 fi
 now=$(date "+%s")
 time=$((now-start))