[LLM] Change default runner for LLM Linux tests to the ones with AVX512 (#8448)
* Basic change for AVX512 runner * Remove conda channel and action rename * Small fix * Small fix and reduce peak convert disk space * Define n_threads based on runner status * Small thread num fix * Define thread_num for cli * test * Add self-hosted label and other small fix
This commit is contained in:
parent
edf23a95be
commit
372c775cb4
5 changed files with 92 additions and 58 deletions
85
.github/workflows/llm-nightly-test.yml
vendored
85
.github/workflows/llm-nightly-test.yml
vendored
|
|
@ -14,7 +14,7 @@ on:
|
||||||
# A workflow run is made up of one or more jobs that can run sequentially or in parallel
|
# A workflow run is made up of one or more jobs that can run sequentially or in parallel
|
||||||
jobs:
|
jobs:
|
||||||
llm-nightly-convert-test:
|
llm-nightly-convert-test:
|
||||||
runs-on: [ self-hosted, Gondolin, ubuntu-20.04-lts ]
|
runs-on: [ self-hosted, llm, AVX512, ubuntu-20.04-lts ]
|
||||||
strategy:
|
strategy:
|
||||||
fail-fast: false
|
fail-fast: false
|
||||||
matrix:
|
matrix:
|
||||||
|
|
@ -42,33 +42,11 @@ jobs:
|
||||||
python -m pip install --upgrade setuptools==58.0.4
|
python -m pip install --upgrade setuptools==58.0.4
|
||||||
python -m pip install --upgrade wheel
|
python -m pip install --upgrade wheel
|
||||||
|
|
||||||
- name: Download original models
|
- name: Install BigDL-LLM
|
||||||
run: |
|
|
||||||
llm_ftp_user=${RUNNER_REPOSITORY_URL:35}
|
|
||||||
llm_ftp_user=$(echo $llm_ftp_user | tr '[:upper:]' '[:lower:]')
|
|
||||||
llm_ftp_url=${FTP_URI:0:6}${llm_ftp_user}${FTP_URI:9}:8821
|
|
||||||
if [ ! -d $LLAMA_ORIGIN_PATH ]; then
|
|
||||||
echo "Directory $LLAMA_ORIGIN_PATH not found. Downloading from FTP server..."
|
|
||||||
wget -r -nH --no-verbose --cut-dirs=1 $llm_ftp_url/llm/llama-7b-hf -P $ORIGIN_DIR
|
|
||||||
fi
|
|
||||||
if [ ! -d $GPTNEOX_ORIGIN_PATH ]; then
|
|
||||||
echo "Directory $GPTNEOX_ORIGIN_PATH not found. Downloading from FTP server..."
|
|
||||||
wget -r -nH --no-verbose --cut-dirs=1 $llm_ftp_url/llm/gptneox-7b-redpajama-bf16 -P $ORIGIN_DIR
|
|
||||||
fi
|
|
||||||
if [ ! -d $BLOOM_ORIGIN_PATH ]; then
|
|
||||||
echo "Directory $BLOOM_ORIGIN_PATH not found. Downloading from FTP server..."
|
|
||||||
wget -r -nH --no-verbose --cut-dirs=1 $llm_ftp_url/llm/bloomz-7b1 -P $ORIGIN_DIR
|
|
||||||
fi
|
|
||||||
if [ ! -d $STARCODER_ORIGIN_PATH ]; then
|
|
||||||
echo "Directory $STARCODER_ORIGIN_PATH not found. Downloading from FTP server..."
|
|
||||||
wget -r -nH --no-verbose --cut-dirs=1 $llm_ftp_url/llm/gpt_bigcode-santacoder -P $ORIGIN_DIR
|
|
||||||
fi
|
|
||||||
|
|
||||||
- name: Run LLM convert test
|
|
||||||
shell: bash
|
shell: bash
|
||||||
run: |
|
run: |
|
||||||
$CONDA_HOME/bin/conda env remove -y -n bigdl-init-llm || true
|
$CONDA_HOME/bin/conda env remove -y -n bigdl-init-llm || true
|
||||||
$CONDA_HOME/bin/conda create -n bigdl-init-llm -y python==3.9 setuptools==58.0.4 -c ${GONDOLIN_CONDA_CHANNEL} --override-channels
|
$CONDA_HOME/bin/conda create -n bigdl-init-llm -y python==3.9 setuptools==58.0.4
|
||||||
source $CONDA_HOME/bin/activate bigdl-init-llm
|
source $CONDA_HOME/bin/activate bigdl-init-llm
|
||||||
$CONDA_HOME/bin/conda info
|
$CONDA_HOME/bin/conda info
|
||||||
pip install requests
|
pip install requests
|
||||||
|
|
@ -76,19 +54,58 @@ jobs:
|
||||||
whl_name=$(ls python/llm/dist)
|
whl_name=$(ls python/llm/dist)
|
||||||
pip install -i https://pypi.python.org/simple "python/llm/dist/${whl_name}[all]"
|
pip install -i https://pypi.python.org/simple "python/llm/dist/${whl_name}[all]"
|
||||||
pip install pytest
|
pip install pytest
|
||||||
bash python/llm/test/run-llm-convert-tests.sh
|
|
||||||
source $CONDA_HOME/bin/deactivate
|
source $CONDA_HOME/bin/deactivate
|
||||||
$CONDA_HOME/bin/conda remove -n bigdl-init-llm --all
|
env:
|
||||||
|
ANALYTICS_ZOO_ROOT: ${{ github.workspace }}
|
||||||
|
|
||||||
|
- name: Download original models & convert
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
source $CONDA_HOME/bin/activate bigdl-init-llm
|
||||||
|
|
||||||
|
if [ ! -d $LLAMA_ORIGIN_PATH ]; then
|
||||||
|
echo "Directory $LLAMA_ORIGIN_PATH not found. Downloading from FTP server..."
|
||||||
|
wget -r -nH --no-verbose --cut-dirs=1 $LLM_FTP_URL/llm/llama-7b-hf -P $ORIGIN_DIR
|
||||||
|
fi
|
||||||
|
bash python/llm/test/run-llm-convert-tests.sh llama
|
||||||
|
rm -rf $LLAMA_ORIGIN_PATH
|
||||||
|
|
||||||
|
if [ ! -d $GPTNEOX_ORIGIN_PATH ]; then
|
||||||
|
echo "Directory $GPTNEOX_ORIGIN_PATH not found. Downloading from FTP server..."
|
||||||
|
wget -r -nH --no-verbose --cut-dirs=1 $LLM_FTP_URL/llm/gptneox-7b-redpajama-bf16 -P $ORIGIN_DIR
|
||||||
|
fi
|
||||||
|
bash python/llm/test/run-llm-convert-tests.sh gptneox
|
||||||
|
rm -rf $GPTNEOX_ORIGIN_PATH
|
||||||
|
|
||||||
|
if [ ! -d $BLOOM_ORIGIN_PATH ]; then
|
||||||
|
echo "Directory $BLOOM_ORIGIN_PATH not found. Downloading from FTP server..."
|
||||||
|
wget -r -nH --no-verbose --cut-dirs=1 $LLM_FTP_URL/llm/bloomz-7b1 -P $ORIGIN_DIR
|
||||||
|
fi
|
||||||
|
bash python/llm/test/run-llm-convert-tests.sh bloom
|
||||||
|
rm -rf $BLOOM_ORIGIN_PATH
|
||||||
|
|
||||||
|
if [ ! -d $STARCODER_ORIGIN_PATH ]; then
|
||||||
|
echo "Directory $STARCODER_ORIGIN_PATH not found. Downloading from FTP server..."
|
||||||
|
wget -r -nH --no-verbose --cut-dirs=1 $LLM_FTP_URL/llm/gpt_bigcode-santacoder -P $ORIGIN_DIR
|
||||||
|
fi
|
||||||
|
bash python/llm/test/run-llm-convert-tests.sh starcoder
|
||||||
|
rm -rf $STARCODER_ORIGIN_PATH
|
||||||
|
|
||||||
|
source $CONDA_HOME/bin/deactivate
|
||||||
env:
|
env:
|
||||||
ANALYTICS_ZOO_ROOT: ${{ github.workspace }}
|
ANALYTICS_ZOO_ROOT: ${{ github.workspace }}
|
||||||
|
|
||||||
- name: Upload ckpt to ftp
|
- name: Upload ckpt to ftp
|
||||||
run: |
|
run: |
|
||||||
apt-get update && apt install tnftp
|
apt-get update && apt install tnftp
|
||||||
llm_ftp_user=${RUNNER_REPOSITORY_URL:35}
|
tnftp -u ${LLM_FTP_URL}/${INT4_CKPT_DIR:1}/bigdl_llm_llama_7b_q4_0.bin $LLAMA_INT4_CKPT_PATH
|
||||||
llm_ftp_user=$(echo $llm_ftp_user | tr '[:upper:]' '[:lower:]')
|
tnftp -u ${LLM_FTP_URL}/${INT4_CKPT_DIR:1}/bigdl_llm_redpajama_7b_q4_0.bin $GPTNEOX_INT4_CKPT_PATH
|
||||||
llm_ftp_url=${FTP_URI:0:6}${llm_ftp_user}${FTP_URI:9}:8821
|
tnftp -u ${LLM_FTP_URL}/${INT4_CKPT_DIR:1}/bigdl_llm_bloom_7b_q4_0.bin $BLOOM_INT4_CKPT_PATH
|
||||||
tnftp -u ${llm_ftp_url}/${INT4_CKPT_DIR:1}/bigdl_llm_llama_7b_q4_0.bin $LLAMA_INT4_CKPT_PATH
|
tnftp -u ${LLM_FTP_URL}/${INT4_CKPT_DIR:1}/bigdl_llm_santacoder_1b_q4_0.bin $STARCODER_INT4_CKPT_PATH
|
||||||
tnftp -u ${llm_ftp_url}/${INT4_CKPT_DIR:1}/bigdl_llm_redpajama_7b_q4_0.bin $GPTNEOX_INT4_CKPT_PATH
|
|
||||||
tnftp -u ${llm_ftp_url}/${INT4_CKPT_DIR:1}/bigdl_llm_bloom_7b_q4_0.bin $BLOOM_INT4_CKPT_PATH
|
- name: Clean up test environment
|
||||||
tnftp -u ${llm_ftp_url}/${INT4_CKPT_DIR:1}/bigdl_llm_santacoder_1b_q4_0.bin $STARCODER_INT4_CKPT_PATH
|
shell: bash
|
||||||
|
run: |
|
||||||
|
$CONDA_HOME/bin/conda env remove -y -n bigdl-init-llm || true
|
||||||
|
env:
|
||||||
|
ANALYTICS_ZOO_ROOT: ${{ github.workspace }}
|
||||||
|
|
|
||||||
20
.github/workflows/llm_unit_tests_linux.yml
vendored
20
.github/workflows/llm_unit_tests_linux.yml
vendored
|
|
@ -21,8 +21,8 @@ on:
|
||||||
|
|
||||||
# A workflow run is made up of one or more jobs that can run sequentially or in parallel
|
# A workflow run is made up of one or more jobs that can run sequentially or in parallel
|
||||||
jobs:
|
jobs:
|
||||||
llm-unit-test-linux:
|
llm-unit-test-linux-avx512:
|
||||||
runs-on: [ self-hosted, Gondolin, ubuntu-20.04-lts ]
|
runs-on: [ self-hosted, llm, AVX512, ubuntu-20.04-lts ]
|
||||||
strategy:
|
strategy:
|
||||||
fail-fast: false
|
fail-fast: false
|
||||||
matrix:
|
matrix:
|
||||||
|
|
@ -33,6 +33,7 @@ jobs:
|
||||||
GPTNEOX_INT4_CKPT_PATH: ./llm/ggml-actions/stable/bigdl_llm_redpajama_7b_q4_0.bin
|
GPTNEOX_INT4_CKPT_PATH: ./llm/ggml-actions/stable/bigdl_llm_redpajama_7b_q4_0.bin
|
||||||
BLOOM_INT4_CKPT_PATH: ./llm/ggml-actions/stable/bigdl_llm_bloom_7b_q4_0.bin
|
BLOOM_INT4_CKPT_PATH: ./llm/ggml-actions/stable/bigdl_llm_bloom_7b_q4_0.bin
|
||||||
STARCODER_INT4_CKPT_PATH: ./llm/ggml-actions/stable/bigdl_llm_santacoder_1b_q4_0.bin
|
STARCODER_INT4_CKPT_PATH: ./llm/ggml-actions/stable/bigdl_llm_santacoder_1b_q4_0.bin
|
||||||
|
THREAD_NUM: 6
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v2
|
- uses: actions/checkout@v2
|
||||||
- name: Set up Python ${{ matrix.python-version }}
|
- name: Set up Python ${{ matrix.python-version }}
|
||||||
|
|
@ -49,7 +50,7 @@ jobs:
|
||||||
shell: bash
|
shell: bash
|
||||||
run: |
|
run: |
|
||||||
$CONDA_HOME/bin/conda env remove -y -n bigdl-init-llm || true
|
$CONDA_HOME/bin/conda env remove -y -n bigdl-init-llm || true
|
||||||
$CONDA_HOME/bin/conda create -n bigdl-init-llm -y python==3.9 setuptools==58.0.4 -c ${GONDOLIN_CONDA_CHANNEL} --override-channels
|
$CONDA_HOME/bin/conda create -n bigdl-init-llm -y python==3.9 setuptools==58.0.4
|
||||||
source $CONDA_HOME/bin/activate bigdl-init-llm
|
source $CONDA_HOME/bin/activate bigdl-init-llm
|
||||||
$CONDA_HOME/bin/conda info
|
$CONDA_HOME/bin/conda info
|
||||||
pip install requests
|
pip install requests
|
||||||
|
|
@ -64,7 +65,7 @@ jobs:
|
||||||
shell: bash
|
shell: bash
|
||||||
run: |
|
run: |
|
||||||
$CONDA_HOME/bin/conda env remove -y -n bigdl-init-llm || true
|
$CONDA_HOME/bin/conda env remove -y -n bigdl-init-llm || true
|
||||||
$CONDA_HOME/bin/conda create -n bigdl-init-llm -y python==3.9 setuptools==58.0.4 -c ${GONDOLIN_CONDA_CHANNEL} --override-channels
|
$CONDA_HOME/bin/conda create -n bigdl-init-llm -y python==3.9 setuptools==58.0.4
|
||||||
source $CONDA_HOME/bin/activate bigdl-init-llm
|
source $CONDA_HOME/bin/activate bigdl-init-llm
|
||||||
$CONDA_HOME/bin/conda info
|
$CONDA_HOME/bin/conda info
|
||||||
pip install requests
|
pip install requests
|
||||||
|
|
@ -79,24 +80,21 @@ jobs:
|
||||||
|
|
||||||
- name: Download ckpt models
|
- name: Download ckpt models
|
||||||
run: |
|
run: |
|
||||||
llm_ftp_user=${RUNNER_REPOSITORY_URL:35}
|
|
||||||
llm_ftp_user=$(echo $llm_ftp_user | tr '[:upper:]' '[:lower:]')
|
|
||||||
llm_ftp_url=${FTP_URI:0:6}${llm_ftp_user}${FTP_URI:9}:8821
|
|
||||||
if [ ! -d $LLAMA_INT4_CKPT_PATH ]; then
|
if [ ! -d $LLAMA_INT4_CKPT_PATH ]; then
|
||||||
echo "Directory $LLAMA_INT4_CKPT_PATH not found. Downloading from FTP server..."
|
echo "Directory $LLAMA_INT4_CKPT_PATH not found. Downloading from FTP server..."
|
||||||
wget --no-verbose $llm_ftp_url/${LLAMA_INT4_CKPT_PATH:1} -P $INT4_CKPT_DIR
|
wget --no-verbose $LLM_FTP_URL/${LLAMA_INT4_CKPT_PATH:1} -P $INT4_CKPT_DIR
|
||||||
fi
|
fi
|
||||||
if [ ! -d $GPTNEOX_INT4_CKPT_PATH ]; then
|
if [ ! -d $GPTNEOX_INT4_CKPT_PATH ]; then
|
||||||
echo "Directory $GPTNEOX_INT4_CKPT_PATH not found. Downloading from FTP server..."
|
echo "Directory $GPTNEOX_INT4_CKPT_PATH not found. Downloading from FTP server..."
|
||||||
wget --no-verbose $llm_ftp_url/${GPTNEOX_INT4_CKPT_PATH:1} -P $INT4_CKPT_DIR
|
wget --no-verbose $LLM_FTP_URL/${GPTNEOX_INT4_CKPT_PATH:1} -P $INT4_CKPT_DIR
|
||||||
fi
|
fi
|
||||||
if [ ! -d $BLOOM_INT4_CKPT_PATH ]; then
|
if [ ! -d $BLOOM_INT4_CKPT_PATH ]; then
|
||||||
echo "Directory $BLOOM_INT4_CKPT_PATH not found. Downloading from FTP server..."
|
echo "Directory $BLOOM_INT4_CKPT_PATH not found. Downloading from FTP server..."
|
||||||
wget --no-verbose $llm_ftp_url/${BLOOM_INT4_CKPT_PATH:1} -P $INT4_CKPT_DIR
|
wget --no-verbose $LLM_FTP_URL/${BLOOM_INT4_CKPT_PATH:1} -P $INT4_CKPT_DIR
|
||||||
fi
|
fi
|
||||||
if [ ! -d $STARCODER_INT4_CKPT_PATH ]; then
|
if [ ! -d $STARCODER_INT4_CKPT_PATH ]; then
|
||||||
echo "Directory $STARCODER_INT4_CKPT_PATH not found. Downloading from FTP server..."
|
echo "Directory $STARCODER_INT4_CKPT_PATH not found. Downloading from FTP server..."
|
||||||
wget --no-verbose $llm_ftp_url/${STARCODER_INT4_CKPT_PATH:1} -P $INT4_CKPT_DIR
|
wget --no-verbose $LLM_FTP_URL/${STARCODER_INT4_CKPT_PATH:1} -P $INT4_CKPT_DIR
|
||||||
fi
|
fi
|
||||||
|
|
||||||
- name: Run LLM inference test
|
- name: Run LLM inference test
|
||||||
|
|
|
||||||
|
|
@ -29,18 +29,23 @@ class Test_Models_Basics(TestCase):
|
||||||
self.bloom_model_path = os.environ.get('BLOOM_INT4_CKPT_PATH')
|
self.bloom_model_path = os.environ.get('BLOOM_INT4_CKPT_PATH')
|
||||||
self.gptneox_model_path = os.environ.get('GPTNEOX_INT4_CKPT_PATH')
|
self.gptneox_model_path = os.environ.get('GPTNEOX_INT4_CKPT_PATH')
|
||||||
self.starcoder_model_path = os.environ.get('STARCODER_INT4_CKPT_PATH')
|
self.starcoder_model_path = os.environ.get('STARCODER_INT4_CKPT_PATH')
|
||||||
|
thread_num = os.environ.get('THREAD_NUM')
|
||||||
|
if thread_num is not None:
|
||||||
|
self.n_threads = int(thread_num)
|
||||||
|
else:
|
||||||
|
self.n_threads = 2
|
||||||
|
|
||||||
def test_llama_completion_success(self):
|
def test_llama_completion_success(self):
|
||||||
llm = Llama(self.llama_model_path)
|
llm = Llama(self.llama_model_path, n_threads=self.n_threads)
|
||||||
output = llm("What is the capital of France?", max_tokens=32, stream=False)
|
output = llm("What is the capital of France?", max_tokens=32, stream=False)
|
||||||
# assert "Paris" in output['choices'][0]['text']
|
# assert "Paris" in output['choices'][0]['text']
|
||||||
|
|
||||||
def test_llama_completion_with_stream_success(self):
|
def test_llama_completion_with_stream_success(self):
|
||||||
llm = Llama(self.llama_model_path)
|
llm = Llama(self.llama_model_path, n_threads=self.n_threads)
|
||||||
output = llm("What is the capital of France?", max_tokens=32, stream=True)
|
output = llm("What is the capital of France?", max_tokens=32, stream=True)
|
||||||
|
|
||||||
def test_bloom_completion_success(self):
|
def test_bloom_completion_success(self):
|
||||||
llm = Bloom(self.bloom_model_path)
|
llm = Bloom(self.bloom_model_path, n_threads=self.n_threads)
|
||||||
output = llm("What is the capital of France?", max_tokens=32, stream=False)
|
output = llm("What is the capital of France?", max_tokens=32, stream=False)
|
||||||
# avx = get_avx_flags()
|
# avx = get_avx_flags()
|
||||||
# if avx == "_avx512":
|
# if avx == "_avx512":
|
||||||
|
|
@ -48,25 +53,25 @@ class Test_Models_Basics(TestCase):
|
||||||
# assert "Paris" in output['choices'][0]['text']
|
# assert "Paris" in output['choices'][0]['text']
|
||||||
|
|
||||||
def test_bloom_completion_with_stream_success(self):
|
def test_bloom_completion_with_stream_success(self):
|
||||||
llm = Bloom(self.bloom_model_path)
|
llm = Bloom(self.bloom_model_path, n_threads=self.n_threads)
|
||||||
output = llm("What is the capital of France?", max_tokens=32, stream=True)
|
output = llm("What is the capital of France?", max_tokens=32, stream=True)
|
||||||
|
|
||||||
def test_gptneox_completion_success(self):
|
def test_gptneox_completion_success(self):
|
||||||
llm = Gptneox(self.gptneox_model_path)
|
llm = Gptneox(self.gptneox_model_path, n_threads=self.n_threads)
|
||||||
output = llm("Q: What is the capital of France? A:", max_tokens=32, stream=False)
|
output = llm("Q: What is the capital of France? A:", max_tokens=32, stream=False)
|
||||||
# assert "Paris" in output['choices'][0]['text']
|
# assert "Paris" in output['choices'][0]['text']
|
||||||
|
|
||||||
def test_gptneox_completion_with_stream_success(self):
|
def test_gptneox_completion_with_stream_success(self):
|
||||||
llm = Gptneox(self.gptneox_model_path)
|
llm = Gptneox(self.gptneox_model_path, n_threads=self.n_threads)
|
||||||
output = llm("Q: What is the capital of France? A:", max_tokens=32, stream=True)
|
output = llm("Q: What is the capital of France? A:", max_tokens=32, stream=True)
|
||||||
|
|
||||||
def test_starcoder_completion_success(self):
|
def test_starcoder_completion_success(self):
|
||||||
llm = Starcoder(self.starcoder_model_path)
|
llm = Starcoder(self.starcoder_model_path, n_threads=self.n_threads)
|
||||||
output = llm("def print_hello_world(", max_tokens=32, stream=False)
|
output = llm("def print_hello_world(", max_tokens=32, stream=False)
|
||||||
# assert "Paris" in output['choices'][0]['text']
|
# assert "Paris" in output['choices'][0]['text']
|
||||||
|
|
||||||
def test_starcoder_completion_with_stream_success(self):
|
def test_starcoder_completion_with_stream_success(self):
|
||||||
llm = Starcoder(self.starcoder_model_path)
|
llm = Starcoder(self.starcoder_model_path, n_threads=self.n_threads)
|
||||||
output = llm("def print_hello_world(", max_tokens=32, stream=True)
|
output = llm("def print_hello_world(", max_tokens=32, stream=True)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -27,6 +27,11 @@ class Test_Models_Basics(TestCase):
|
||||||
self.llama_model_path = os.environ.get('LLAMA_INT4_CKPT_PATH')
|
self.llama_model_path = os.environ.get('LLAMA_INT4_CKPT_PATH')
|
||||||
self.bloom_model_path = os.environ.get('BLOOM_INT4_CKPT_PATH')
|
self.bloom_model_path = os.environ.get('BLOOM_INT4_CKPT_PATH')
|
||||||
self.gptneox_model_path = os.environ.get('GPTNEOX_INT4_CKPT_PATH')
|
self.gptneox_model_path = os.environ.get('GPTNEOX_INT4_CKPT_PATH')
|
||||||
|
thread_num = os.environ.get('THREAD_NUM')
|
||||||
|
if thread_num is not None:
|
||||||
|
self.n_threads = int(thread_num)
|
||||||
|
else:
|
||||||
|
self.n_threads = 2
|
||||||
|
|
||||||
|
|
||||||
def test_langchain_llm_embedding_llama(self):
|
def test_langchain_llm_embedding_llama(self):
|
||||||
|
|
@ -49,7 +54,7 @@ class Test_Models_Basics(TestCase):
|
||||||
llm = BigdlLLM(
|
llm = BigdlLLM(
|
||||||
model_path=self.llama_model_path,
|
model_path=self.llama_model_path,
|
||||||
max_tokens=32,
|
max_tokens=32,
|
||||||
n_threads=2)
|
n_threads=self.n_threads)
|
||||||
question = "What is AI?"
|
question = "What is AI?"
|
||||||
result = llm(question)
|
result = llm(question)
|
||||||
|
|
||||||
|
|
@ -58,7 +63,7 @@ class Test_Models_Basics(TestCase):
|
||||||
model_path=self.gptneox_model_path,
|
model_path=self.gptneox_model_path,
|
||||||
model_family="gptneox",
|
model_family="gptneox",
|
||||||
max_tokens=32,
|
max_tokens=32,
|
||||||
n_threads=2)
|
n_threads=self.n_threads)
|
||||||
question = "What is AI?"
|
question = "What is AI?"
|
||||||
result = llm(question)
|
result = llm(question)
|
||||||
|
|
||||||
|
|
@ -67,7 +72,7 @@ class Test_Models_Basics(TestCase):
|
||||||
model_path=self.bloom_model_path,
|
model_path=self.bloom_model_path,
|
||||||
model_family="bloom",
|
model_family="bloom",
|
||||||
max_tokens=32,
|
max_tokens=32,
|
||||||
n_threads=2)
|
n_threads=self.n_threads)
|
||||||
question = "What is AI?"
|
question = "What is AI?"
|
||||||
result = llm(question)
|
result = llm(question)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -9,7 +9,16 @@ set -e
|
||||||
echo "# Start testing convert"
|
echo "# Start testing convert"
|
||||||
start=$(date "+%s")
|
start=$(date "+%s")
|
||||||
|
|
||||||
python -m pytest -s ${LLM_CONVERT_TEST_DIR}
|
# separate convert process to save disk space
|
||||||
|
if [[ $1 == "llama" ]]; then
|
||||||
|
python -m pytest -s ${LLM_CONVERT_TEST_DIR}/test_convert_model.py -k "test_convert_llama"
|
||||||
|
elif [[ $1 == "gptneox" ]]; then
|
||||||
|
python -m pytest -s ${LLM_CONVERT_TEST_DIR}/test_convert_model.py -k "test_convert_gptneox"
|
||||||
|
elif [[ $1 == "bloom" ]]; then
|
||||||
|
python -m pytest -s ${LLM_CONVERT_TEST_DIR}/test_convert_model.py -k "test_convert_bloom"
|
||||||
|
elif [[ $1 == "starcoder" ]]; then
|
||||||
|
python -m pytest -s ${LLM_CONVERT_TEST_DIR}/test_convert_model.py -k "test_convert_starcoder"
|
||||||
|
fi
|
||||||
|
|
||||||
now=$(date "+%s")
|
now=$(date "+%s")
|
||||||
time=$((now-start))
|
time=$((now-start))
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue