[LLM] Add Actions for downloading & converting models (#8320)

* First push to downloading and converting llm models for testing (Gondolin runner, avx2 for now)

* Change yml file name
This commit is contained in:
Yuwen Hu 2023-06-15 13:43:47 +08:00 committed by GitHub
parent bc11a2b1cd
commit b30aa49c4e
5 changed files with 103 additions and 12 deletions

View file

@ -12,21 +12,30 @@ on:
branches: [ main ] branches: [ main ]
paths: paths:
- 'python/llm/**' - 'python/llm/**'
- '.github/workflows/llm_unit_tests_basic_linux.yml' - '.github/workflows/llm_unit_tests_linux.yml'
pull_request: pull_request:
branches: [ main ] branches: [ main ]
paths: paths:
- 'python/llm/**' - 'python/llm/**'
- '.github/workflows/llm_unit_tests_basic_linux.yml' - '.github/workflows/llm_unit_tests_linux.yml'
# A workflow run is made up of one or more jobs that can run sequentially or in parallel # A workflow run is made up of one or more jobs that can run sequentially or in parallel
jobs: jobs:
llm-unit-test-basic-linux: llm-unit-test-linux:
runs-on: [ self-hosted, Gondolin, ubuntu-20.04-lts ] runs-on: [ self-hosted, Gondolin, ubuntu-20.04-lts ]
strategy: strategy:
fail-fast: false fail-fast: false
matrix: matrix:
python-version: ["3.9"] python-version: ["3.9"]
env:
ORIGIN_DIR: ./llm/models
LLAMA_ORIGIN_PATH: ./llm/models/llama-7b-hf
GPTNEOX_ORIGIN_PATH: ./llm/models/gptneox-7b-redpajama-bf16
BLOOM_ORIGIN_PATH: ./llm/models/bloomz-7b1
INT4_CKPT_DIR: ./llm/ggml
LLAMA_INT4_CKPT_PATH: ./llm/ggml/bigdl_llm_llama_q4_0.bin
GPTNEOX_INT4_CKPT_PATH: ./llm/ggml/bigdl_llm_gptneox_q4_0.bin
BLOOM_INT4_CKPT_PATH: ./llm/ggml/bigdl_llm_bloom_q4_0.bin
steps: steps:
- uses: actions/checkout@v2 - uses: actions/checkout@v2
- name: Set up Python ${{ matrix.python-version }} - name: Set up Python ${{ matrix.python-version }}
@ -54,7 +63,25 @@ jobs:
env: env:
ANALYTICS_ZOO_ROOT: ${{ github.workspace }} ANALYTICS_ZOO_ROOT: ${{ github.workspace }}
- name: Run LLM naive installation test - name: Download original models
env:
FTP_USERNAME: ${{ secrets.FTP_USERNAME }}
FTP_PASSWORD: ${{ secrets.FTP_PASSWORD }}
run: |
if [ ! -d $LLAMA_ORIGIN_PATH ]; then
echo "Directory $LLAMA_ORIGIN_PATH not found. Downloading from FTP server..."
wget -r -nH --no-verbose --cut-dirs=1 --ftp-user=$FTP_USERNAME --ftp-password=$FTP_PASSWORD ftp://10.112.231.51:8821/llm/llama-7b-hf -P $ORIGIN_DIR
fi
if [ ! -d $GPTNEOX_ORIGIN_PATH ]; then
echo "Directory $GPTNEOX_ORIGIN_PATH not found. Downloading from FTP server..."
wget -r -nH --no-verbose --cut-dirs=1 --ftp-user=$FTP_USERNAME --ftp-password=$FTP_PASSWORD ftp://10.112.231.51:8821/llm/gptneox-7b-redpajama-bf16 -P $ORIGIN_DIR
fi
if [ ! -d $BLOOM_ORIGIN_PATH ]; then
echo "Directory $BLOOM_ORIGIN_PATH not found. Downloading from FTP server..."
wget -r -nH --no-verbose --cut-dirs=1 --ftp-user=$FTP_USERNAME --ftp-password=$FTP_PASSWORD ftp://10.112.231.51:8821/llm/bloomz-7b1 -P $ORIGIN_DIR
fi
- name: Run LLM basic test (native install & convert)
shell: bash shell: bash
run: | run: |
$CONDA_HOME/bin/conda env remove -y -n bigdl-init-llm || true $CONDA_HOME/bin/conda env remove -y -n bigdl-init-llm || true
@ -71,3 +98,5 @@ jobs:
$CONDA_HOME/bin/conda remove -n bigdl-init-llm --all $CONDA_HOME/bin/conda remove -n bigdl-init-llm --all
env: env:
ANALYTICS_ZOO_ROOT: ${{ github.workspace }} ANALYTICS_ZOO_ROOT: ${{ github.workspace }}
# new test steps should be added here

View file

@ -12,16 +12,16 @@ on:
branches: [ main ] branches: [ main ]
paths: paths:
- 'python/llm/**' - 'python/llm/**'
- '.github/workflows/llm_unit_tests_basic_windows.yml' - '.github/workflows/llm_unit_tests_windows.yml'
pull_request: pull_request:
branches: [ main ] branches: [ main ]
paths: paths:
- 'python/llm/**' - 'python/llm/**'
- '.github/workflows/llm_unit_tests_basic_windows.yml' - '.github/workflows/llm_unit_tests_windows.yml'
# A workflow run is made up of one or more jobs that can run sequentially or in parallel # A workflow run is made up of one or more jobs that can run sequentially or in parallel
jobs: jobs:
llm-unit-test-basic-windows: llm-unit-test-windows:
runs-on: ${{ matrix.os }} runs-on: ${{ matrix.os }}
strategy: strategy:
fail-fast: false fail-fast: false
@ -49,7 +49,7 @@ jobs:
env: env:
ANALYTICS_ZOO_ROOT: ${{ github.workspace }} ANALYTICS_ZOO_ROOT: ${{ github.workspace }}
- name: Run LLM naive installation test - name: Run LLM basic test (naive installation)
shell: bash shell: bash
run: | run: |
pip install requests pip install requests
@ -57,6 +57,6 @@ jobs:
whl_name=$(ls python/llm/dist) whl_name=$(ls python/llm/dist)
pip install -i https://pypi.python.org/simple "python/llm/dist/${whl_name}[all]" pip install -i https://pypi.python.org/simple "python/llm/dist/${whl_name}[all]"
pip install pytest pip install pytest
bash python/llm/test/run-llm-basic-tests.sh bash python/llm/test/run-llm-basic-tests.sh windows
env: env:
ANALYTICS_ZOO_ROOT: ${{ github.workspace }} ANALYTICS_ZOO_ROOT: ${{ github.workspace }}

View file

@ -0,0 +1,54 @@
#
# Copyright 2016 The BigDL Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import pytest
import os
from unittest import TestCase
from bigdl.llm.ggml import convert_model
llama_model_path = os.environ.get('LLAMA_ORIGIN_PATH')
gptneox_model_path = os.environ.get('GPTNEOX_ORIGIN_PATH')
bloom_model_path = os.environ.get('BLOOM_ORIGIN_PATH')
output_dir = os.environ.get('INT4_CKPT_DIR')
class TestConvertModel(TestCase):
def test_convert_llama(self):
converted_model_path = convert_model(input_path=llama_model_path,
output_path=output_dir,
model_family='llama',
dtype='int4')
assert os.path.isfile(converted_model_path)
def test_convert_gptneox(self):
converted_model_path = convert_model(input_path=gptneox_model_path,
output_path=output_dir,
model_family='gptneox',
dtype='int4')
assert os.path.isfile(converted_model_path)
def test_convert_bloom(self):
converted_model_path = convert_model(input_path=bloom_model_path,
output_path=output_dir,
model_family='bloom',
dtype='int4')
assert os.path.isfile(converted_model_path)
if __name__ == '__main__':
pytest.main([__file__])

View file

@ -2,14 +2,22 @@
export ANALYTICS_ZOO_ROOT=${ANALYTICS_ZOO_ROOT} export ANALYTICS_ZOO_ROOT=${ANALYTICS_ZOO_ROOT}
export LLM_HOME=${ANALYTICS_ZOO_ROOT}/python/llm/src export LLM_HOME=${ANALYTICS_ZOO_ROOT}/python/llm/src
export LLM_BASIC_TEST_DIR=${ANALYTICS_ZOO_ROOT}/python/llm/test/packaging export LLM_BASIC_TEST_DIR=${ANALYTICS_ZOO_ROOT}/python/llm/test/basic
set -e set -e
# ipex is not installed here. Any tests needs ipex should be moved to next pytest command.
echo "# Start testing" echo "# Start testing"
start=$(date "+%s") start=$(date "+%s")
python -m pytest -s ${LLM_BASIC_TEST_DIR}
echo "test install"
python -m pytest -s ${LLM_BASIC_TEST_DIR}/install
# TODO: supports tests on windows
platform=$1
if [[ $1 != "windows" ]]; then
echo "test convert model"
python -m pytest -s ${LLM_BASIC_TEST_DIR}/convert
fi
now=$(date "+%s") now=$(date "+%s")
time=$((now-start)) time=$((now-start))