[LLM] Add Actions for downloading & converting models (#8320)

* First push to downloading and converting llm models for testing (Gondolin runner, avx2 for now) * Change yml file name
2023-06-15 13:43:47 +08:00 · 2023-06-15 13:43:47 +08:00 · b30aa49c4e
commit b30aa49c4e
parent bc11a2b1cd
5 changed files with 103 additions and 12 deletions
--- a/.github/workflows/llm_unit_tests_basic_linux.yml
+++ b/.github/workflows/llm_unit_tests_basic_linux.yml
@ -12,21 +12,30 @@ on:
    branches: [ main ]
    paths:
      - 'python/llm/**'
-      - '.github/workflows/llm_unit_tests_basic_linux.yml'
+      - '.github/workflows/llm_unit_tests_linux.yml'
  pull_request:
    branches: [ main ]
    paths:
      - 'python/llm/**'
-      - '.github/workflows/llm_unit_tests_basic_linux.yml'
+      - '.github/workflows/llm_unit_tests_linux.yml'

 # A workflow run is made up of one or more jobs that can run sequentially or in parallel
 jobs:
-  llm-unit-test-basic-linux:
+  llm-unit-test-linux:
    runs-on: [ self-hosted, Gondolin, ubuntu-20.04-lts ]
    strategy:
      fail-fast: false
      matrix:
        python-version: ["3.9"]
+    env:
+      ORIGIN_DIR: ./llm/models
+      LLAMA_ORIGIN_PATH: ./llm/models/llama-7b-hf
+      GPTNEOX_ORIGIN_PATH: ./llm/models/gptneox-7b-redpajama-bf16
+      BLOOM_ORIGIN_PATH: ./llm/models/bloomz-7b1
+      INT4_CKPT_DIR: ./llm/ggml
+      LLAMA_INT4_CKPT_PATH: ./llm/ggml/bigdl_llm_llama_q4_0.bin
+      GPTNEOX_INT4_CKPT_PATH: ./llm/ggml/bigdl_llm_gptneox_q4_0.bin
+      BLOOM_INT4_CKPT_PATH: ./llm/ggml/bigdl_llm_bloom_q4_0.bin
    steps:
      - uses: actions/checkout@v2
      - name: Set up Python ${{ matrix.python-version }}
@ -53,8 +62,26 @@ jobs:
          $CONDA_HOME/bin/conda remove -n bigdl-init-llm --all
        env:
          ANALYTICS_ZOO_ROOT: ${{ github.workspace }}
+
+      - name: Download original models
+        env:
+          FTP_USERNAME: ${{ secrets.FTP_USERNAME }} 
+          FTP_PASSWORD: ${{ secrets.FTP_PASSWORD }}
+        run: |
+          if [ ! -d $LLAMA_ORIGIN_PATH ]; then
+            echo "Directory $LLAMA_ORIGIN_PATH not found. Downloading from FTP server..."
+            wget -r -nH --no-verbose --cut-dirs=1 --ftp-user=$FTP_USERNAME --ftp-password=$FTP_PASSWORD ftp://10.112.231.51:8821/llm/llama-7b-hf -P $ORIGIN_DIR
+          fi
+          if [ ! -d $GPTNEOX_ORIGIN_PATH ]; then
+            echo "Directory $GPTNEOX_ORIGIN_PATH not found. Downloading from FTP server..."
+            wget -r -nH --no-verbose --cut-dirs=1 --ftp-user=$FTP_USERNAME --ftp-password=$FTP_PASSWORD ftp://10.112.231.51:8821/llm/gptneox-7b-redpajama-bf16 -P $ORIGIN_DIR
+          fi
+          if [ ! -d $BLOOM_ORIGIN_PATH ]; then
+            echo "Directory $BLOOM_ORIGIN_PATH not found. Downloading from FTP server..."
+            wget -r -nH --no-verbose --cut-dirs=1 --ftp-user=$FTP_USERNAME --ftp-password=$FTP_PASSWORD ftp://10.112.231.51:8821/llm/bloomz-7b1 -P $ORIGIN_DIR
+          fi
      
-      - name: Run LLM naive installation test
+      - name: Run LLM basic test (native install & convert)
        shell: bash
        run: |
          $CONDA_HOME/bin/conda env remove -y -n bigdl-init-llm || true
@ -71,3 +98,5 @@ jobs:
          $CONDA_HOME/bin/conda remove -n bigdl-init-llm --all
        env:
          ANALYTICS_ZOO_ROOT: ${{ github.workspace }}
+
+      # new test steps should be added here
--- a/.github/workflows/llm_unit_tests_basic_windows.yml
+++ b/.github/workflows/llm_unit_tests_basic_windows.yml
@ -12,16 +12,16 @@ on:
    branches: [ main ]
    paths:
      - 'python/llm/**'
-      - '.github/workflows/llm_unit_tests_basic_windows.yml'
+      - '.github/workflows/llm_unit_tests_windows.yml'
  pull_request:
    branches: [ main ]
    paths:
      - 'python/llm/**'
-      - '.github/workflows/llm_unit_tests_basic_windows.yml'
+      - '.github/workflows/llm_unit_tests_windows.yml'

 # A workflow run is made up of one or more jobs that can run sequentially or in parallel
 jobs:
-  llm-unit-test-basic-windows:
+  llm-unit-test-windows:
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
@ -49,7 +49,7 @@ jobs:
        env:
          ANALYTICS_ZOO_ROOT: ${{ github.workspace }}
      
-      - name: Run LLM naive installation test
+      - name: Run LLM basic test (naive installation)
        shell: bash
        run: |
          pip install requests
@ -57,6 +57,6 @@ jobs:
          whl_name=$(ls python/llm/dist)
          pip install -i https://pypi.python.org/simple "python/llm/dist/${whl_name}[all]"
          pip install pytest
-          bash python/llm/test/run-llm-basic-tests.sh
+          bash python/llm/test/run-llm-basic-tests.sh windows
        env:
          ANALYTICS_ZOO_ROOT: ${{ github.workspace }}
--- a/python/llm/test/basic/convert/test_convert_model.py
+++ b/python/llm/test/basic/convert/test_convert_model.py
@ -0,0 +1,54 @@
+#
+# Copyright 2016 The BigDL Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+
+import pytest
+import os
+from unittest import TestCase
+
+from bigdl.llm.ggml import convert_model
+
+
+llama_model_path = os.environ.get('LLAMA_ORIGIN_PATH')
+gptneox_model_path = os.environ.get('GPTNEOX_ORIGIN_PATH')
+bloom_model_path = os.environ.get('BLOOM_ORIGIN_PATH')
+output_dir = os.environ.get('INT4_CKPT_DIR')
+
+class TestConvertModel(TestCase):
+    
+    def test_convert_llama(self):
+        converted_model_path = convert_model(input_path=llama_model_path,
+                                             output_path=output_dir,
+                                             model_family='llama',
+                                             dtype='int4')
+        assert os.path.isfile(converted_model_path)
+
+    def test_convert_gptneox(self):
+        converted_model_path = convert_model(input_path=gptneox_model_path,
+                                             output_path=output_dir,
+                                             model_family='gptneox',
+                                             dtype='int4')
+        assert os.path.isfile(converted_model_path)
+
+    def test_convert_bloom(self):
+        converted_model_path = convert_model(input_path=bloom_model_path,
+                                             output_path=output_dir,
+                                             model_family='bloom',
+                                             dtype='int4')
+        assert os.path.isfile(converted_model_path)
+
+if __name__ == '__main__':
+    pytest.main([__file__])
--- a/python/llm/test/basic/install/test_install.py
+++ b/python/llm/test/basic/install/test_install.py
--- a/python/llm/test/run-llm-basic-tests.sh
+++ b/python/llm/test/run-llm-basic-tests.sh
@ -2,14 +2,22 @@

 export ANALYTICS_ZOO_ROOT=${ANALYTICS_ZOO_ROOT}
 export LLM_HOME=${ANALYTICS_ZOO_ROOT}/python/llm/src
-export LLM_BASIC_TEST_DIR=${ANALYTICS_ZOO_ROOT}/python/llm/test/packaging
+export LLM_BASIC_TEST_DIR=${ANALYTICS_ZOO_ROOT}/python/llm/test/basic

 set -e

-# ipex is not installed here. Any tests needs ipex should be moved to next pytest command.
 echo "# Start testing"
 start=$(date "+%s")
-python -m pytest -s ${LLM_BASIC_TEST_DIR}
+
+echo "test install"
+python -m pytest -s ${LLM_BASIC_TEST_DIR}/install
+
+# TODO: supports tests on windows
+platform=$1
+if [[ $1 != "windows" ]]; then
+  echo "test convert model"
+  python -m pytest -s ${LLM_BASIC_TEST_DIR}/convert
+fi

 now=$(date "+%s")
 time=$((now-start))