diff --git a/.github/workflows/llm-nightly-test.yml b/.github/workflows/llm-nightly-test.yml index 7dbef3b9..cca96aa6 100644 --- a/.github/workflows/llm-nightly-test.yml +++ b/.github/workflows/llm-nightly-test.yml @@ -14,7 +14,7 @@ on: # A workflow run is made up of one or more jobs that can run sequentially or in parallel jobs: llm-nightly-convert-test: - runs-on: [ self-hosted, Gondolin, ubuntu-20.04-lts ] + runs-on: [ self-hosted, llm, AVX512, ubuntu-20.04-lts ] strategy: fail-fast: false matrix: @@ -41,34 +41,12 @@ jobs: python -m pip install --upgrade pip python -m pip install --upgrade setuptools==58.0.4 python -m pip install --upgrade wheel - - - name: Download original models - run: | - llm_ftp_user=${RUNNER_REPOSITORY_URL:35} - llm_ftp_user=$(echo $llm_ftp_user | tr '[:upper:]' '[:lower:]') - llm_ftp_url=${FTP_URI:0:6}${llm_ftp_user}${FTP_URI:9}:8821 - if [ ! -d $LLAMA_ORIGIN_PATH ]; then - echo "Directory $LLAMA_ORIGIN_PATH not found. Downloading from FTP server..." - wget -r -nH --no-verbose --cut-dirs=1 $llm_ftp_url/llm/llama-7b-hf -P $ORIGIN_DIR - fi - if [ ! -d $GPTNEOX_ORIGIN_PATH ]; then - echo "Directory $GPTNEOX_ORIGIN_PATH not found. Downloading from FTP server..." - wget -r -nH --no-verbose --cut-dirs=1 $llm_ftp_url/llm/gptneox-7b-redpajama-bf16 -P $ORIGIN_DIR - fi - if [ ! -d $BLOOM_ORIGIN_PATH ]; then - echo "Directory $BLOOM_ORIGIN_PATH not found. Downloading from FTP server..." - wget -r -nH --no-verbose --cut-dirs=1 $llm_ftp_url/llm/bloomz-7b1 -P $ORIGIN_DIR - fi - if [ ! -d $STARCODER_ORIGIN_PATH ]; then - echo "Directory $STARCODER_ORIGIN_PATH not found. Downloading from FTP server..." - wget -r -nH --no-verbose --cut-dirs=1 $llm_ftp_url/llm/gpt_bigcode-santacoder -P $ORIGIN_DIR - fi - - name: Run LLM convert test + - name: Install BigDL-LLM shell: bash run: | $CONDA_HOME/bin/conda env remove -y -n bigdl-init-llm || true - $CONDA_HOME/bin/conda create -n bigdl-init-llm -y python==3.9 setuptools==58.0.4 -c ${GONDOLIN_CONDA_CHANNEL} --override-channels + $CONDA_HOME/bin/conda create -n bigdl-init-llm -y python==3.9 setuptools==58.0.4 source $CONDA_HOME/bin/activate bigdl-init-llm $CONDA_HOME/bin/conda info pip install requests @@ -76,19 +54,58 @@ jobs: whl_name=$(ls python/llm/dist) pip install -i https://pypi.python.org/simple "python/llm/dist/${whl_name}[all]" pip install pytest - bash python/llm/test/run-llm-convert-tests.sh source $CONDA_HOME/bin/deactivate - $CONDA_HOME/bin/conda remove -n bigdl-init-llm --all + env: + ANALYTICS_ZOO_ROOT: ${{ github.workspace }} + + - name: Download original models & convert + shell: bash + run: | + source $CONDA_HOME/bin/activate bigdl-init-llm + + if [ ! -d $LLAMA_ORIGIN_PATH ]; then + echo "Directory $LLAMA_ORIGIN_PATH not found. Downloading from FTP server..." + wget -r -nH --no-verbose --cut-dirs=1 $LLM_FTP_URL/llm/llama-7b-hf -P $ORIGIN_DIR + fi + bash python/llm/test/run-llm-convert-tests.sh llama + rm -rf $LLAMA_ORIGIN_PATH + + if [ ! -d $GPTNEOX_ORIGIN_PATH ]; then + echo "Directory $GPTNEOX_ORIGIN_PATH not found. Downloading from FTP server..." + wget -r -nH --no-verbose --cut-dirs=1 $LLM_FTP_URL/llm/gptneox-7b-redpajama-bf16 -P $ORIGIN_DIR + fi + bash python/llm/test/run-llm-convert-tests.sh gptneox + rm -rf $GPTNEOX_ORIGIN_PATH + + if [ ! -d $BLOOM_ORIGIN_PATH ]; then + echo "Directory $BLOOM_ORIGIN_PATH not found. Downloading from FTP server..." + wget -r -nH --no-verbose --cut-dirs=1 $LLM_FTP_URL/llm/bloomz-7b1 -P $ORIGIN_DIR + fi + bash python/llm/test/run-llm-convert-tests.sh bloom + rm -rf $BLOOM_ORIGIN_PATH + + if [ ! -d $STARCODER_ORIGIN_PATH ]; then + echo "Directory $STARCODER_ORIGIN_PATH not found. Downloading from FTP server..." + wget -r -nH --no-verbose --cut-dirs=1 $LLM_FTP_URL/llm/gpt_bigcode-santacoder -P $ORIGIN_DIR + fi + bash python/llm/test/run-llm-convert-tests.sh starcoder + rm -rf $STARCODER_ORIGIN_PATH + + source $CONDA_HOME/bin/deactivate env: ANALYTICS_ZOO_ROOT: ${{ github.workspace }} - name: Upload ckpt to ftp run: | apt-get update && apt install tnftp - llm_ftp_user=${RUNNER_REPOSITORY_URL:35} - llm_ftp_user=$(echo $llm_ftp_user | tr '[:upper:]' '[:lower:]') - llm_ftp_url=${FTP_URI:0:6}${llm_ftp_user}${FTP_URI:9}:8821 - tnftp -u ${llm_ftp_url}/${INT4_CKPT_DIR:1}/bigdl_llm_llama_7b_q4_0.bin $LLAMA_INT4_CKPT_PATH - tnftp -u ${llm_ftp_url}/${INT4_CKPT_DIR:1}/bigdl_llm_redpajama_7b_q4_0.bin $GPTNEOX_INT4_CKPT_PATH - tnftp -u ${llm_ftp_url}/${INT4_CKPT_DIR:1}/bigdl_llm_bloom_7b_q4_0.bin $BLOOM_INT4_CKPT_PATH - tnftp -u ${llm_ftp_url}/${INT4_CKPT_DIR:1}/bigdl_llm_santacoder_1b_q4_0.bin $STARCODER_INT4_CKPT_PATH + tnftp -u ${LLM_FTP_URL}/${INT4_CKPT_DIR:1}/bigdl_llm_llama_7b_q4_0.bin $LLAMA_INT4_CKPT_PATH + tnftp -u ${LLM_FTP_URL}/${INT4_CKPT_DIR:1}/bigdl_llm_redpajama_7b_q4_0.bin $GPTNEOX_INT4_CKPT_PATH + tnftp -u ${LLM_FTP_URL}/${INT4_CKPT_DIR:1}/bigdl_llm_bloom_7b_q4_0.bin $BLOOM_INT4_CKPT_PATH + tnftp -u ${LLM_FTP_URL}/${INT4_CKPT_DIR:1}/bigdl_llm_santacoder_1b_q4_0.bin $STARCODER_INT4_CKPT_PATH + + - name: Clean up test environment + shell: bash + run: | + $CONDA_HOME/bin/conda env remove -y -n bigdl-init-llm || true + env: + ANALYTICS_ZOO_ROOT: ${{ github.workspace }} diff --git a/.github/workflows/llm_unit_tests_linux.yml b/.github/workflows/llm_unit_tests_linux.yml index 6a127993..02f2b535 100644 --- a/.github/workflows/llm_unit_tests_linux.yml +++ b/.github/workflows/llm_unit_tests_linux.yml @@ -21,8 +21,8 @@ on: # A workflow run is made up of one or more jobs that can run sequentially or in parallel jobs: - llm-unit-test-linux: - runs-on: [ self-hosted, Gondolin, ubuntu-20.04-lts ] + llm-unit-test-linux-avx512: + runs-on: [ self-hosted, llm, AVX512, ubuntu-20.04-lts ] strategy: fail-fast: false matrix: @@ -33,6 +33,7 @@ jobs: GPTNEOX_INT4_CKPT_PATH: ./llm/ggml-actions/stable/bigdl_llm_redpajama_7b_q4_0.bin BLOOM_INT4_CKPT_PATH: ./llm/ggml-actions/stable/bigdl_llm_bloom_7b_q4_0.bin STARCODER_INT4_CKPT_PATH: ./llm/ggml-actions/stable/bigdl_llm_santacoder_1b_q4_0.bin + THREAD_NUM: 6 steps: - uses: actions/checkout@v2 - name: Set up Python ${{ matrix.python-version }} @@ -49,7 +50,7 @@ jobs: shell: bash run: | $CONDA_HOME/bin/conda env remove -y -n bigdl-init-llm || true - $CONDA_HOME/bin/conda create -n bigdl-init-llm -y python==3.9 setuptools==58.0.4 -c ${GONDOLIN_CONDA_CHANNEL} --override-channels + $CONDA_HOME/bin/conda create -n bigdl-init-llm -y python==3.9 setuptools==58.0.4 source $CONDA_HOME/bin/activate bigdl-init-llm $CONDA_HOME/bin/conda info pip install requests @@ -64,7 +65,7 @@ jobs: shell: bash run: | $CONDA_HOME/bin/conda env remove -y -n bigdl-init-llm || true - $CONDA_HOME/bin/conda create -n bigdl-init-llm -y python==3.9 setuptools==58.0.4 -c ${GONDOLIN_CONDA_CHANNEL} --override-channels + $CONDA_HOME/bin/conda create -n bigdl-init-llm -y python==3.9 setuptools==58.0.4 source $CONDA_HOME/bin/activate bigdl-init-llm $CONDA_HOME/bin/conda info pip install requests @@ -79,24 +80,21 @@ jobs: - name: Download ckpt models run: | - llm_ftp_user=${RUNNER_REPOSITORY_URL:35} - llm_ftp_user=$(echo $llm_ftp_user | tr '[:upper:]' '[:lower:]') - llm_ftp_url=${FTP_URI:0:6}${llm_ftp_user}${FTP_URI:9}:8821 if [ ! -d $LLAMA_INT4_CKPT_PATH ]; then echo "Directory $LLAMA_INT4_CKPT_PATH not found. Downloading from FTP server..." - wget --no-verbose $llm_ftp_url/${LLAMA_INT4_CKPT_PATH:1} -P $INT4_CKPT_DIR + wget --no-verbose $LLM_FTP_URL/${LLAMA_INT4_CKPT_PATH:1} -P $INT4_CKPT_DIR fi if [ ! -d $GPTNEOX_INT4_CKPT_PATH ]; then echo "Directory $GPTNEOX_INT4_CKPT_PATH not found. Downloading from FTP server..." - wget --no-verbose $llm_ftp_url/${GPTNEOX_INT4_CKPT_PATH:1} -P $INT4_CKPT_DIR + wget --no-verbose $LLM_FTP_URL/${GPTNEOX_INT4_CKPT_PATH:1} -P $INT4_CKPT_DIR fi if [ ! -d $BLOOM_INT4_CKPT_PATH ]; then echo "Directory $BLOOM_INT4_CKPT_PATH not found. Downloading from FTP server..." - wget --no-verbose $llm_ftp_url/${BLOOM_INT4_CKPT_PATH:1} -P $INT4_CKPT_DIR + wget --no-verbose $LLM_FTP_URL/${BLOOM_INT4_CKPT_PATH:1} -P $INT4_CKPT_DIR fi if [ ! -d $STARCODER_INT4_CKPT_PATH ]; then echo "Directory $STARCODER_INT4_CKPT_PATH not found. Downloading from FTP server..." - wget --no-verbose $llm_ftp_url/${STARCODER_INT4_CKPT_PATH:1} -P $INT4_CKPT_DIR + wget --no-verbose $LLM_FTP_URL/${STARCODER_INT4_CKPT_PATH:1} -P $INT4_CKPT_DIR fi - name: Run LLM inference test diff --git a/python/llm/test/inference/test_call_models.py b/python/llm/test/inference/test_call_models.py index 40613132..04f53214 100644 --- a/python/llm/test/inference/test_call_models.py +++ b/python/llm/test/inference/test_call_models.py @@ -29,18 +29,23 @@ class Test_Models_Basics(TestCase): self.bloom_model_path = os.environ.get('BLOOM_INT4_CKPT_PATH') self.gptneox_model_path = os.environ.get('GPTNEOX_INT4_CKPT_PATH') self.starcoder_model_path = os.environ.get('STARCODER_INT4_CKPT_PATH') + thread_num = os.environ.get('THREAD_NUM') + if thread_num is not None: + self.n_threads = int(thread_num) + else: + self.n_threads = 2 def test_llama_completion_success(self): - llm = Llama(self.llama_model_path) + llm = Llama(self.llama_model_path, n_threads=self.n_threads) output = llm("What is the capital of France?", max_tokens=32, stream=False) # assert "Paris" in output['choices'][0]['text'] def test_llama_completion_with_stream_success(self): - llm = Llama(self.llama_model_path) + llm = Llama(self.llama_model_path, n_threads=self.n_threads) output = llm("What is the capital of France?", max_tokens=32, stream=True) def test_bloom_completion_success(self): - llm = Bloom(self.bloom_model_path) + llm = Bloom(self.bloom_model_path, n_threads=self.n_threads) output = llm("What is the capital of France?", max_tokens=32, stream=False) # avx = get_avx_flags() # if avx == "_avx512": @@ -48,25 +53,25 @@ class Test_Models_Basics(TestCase): # assert "Paris" in output['choices'][0]['text'] def test_bloom_completion_with_stream_success(self): - llm = Bloom(self.bloom_model_path) + llm = Bloom(self.bloom_model_path, n_threads=self.n_threads) output = llm("What is the capital of France?", max_tokens=32, stream=True) def test_gptneox_completion_success(self): - llm = Gptneox(self.gptneox_model_path) + llm = Gptneox(self.gptneox_model_path, n_threads=self.n_threads) output = llm("Q: What is the capital of France? A:", max_tokens=32, stream=False) # assert "Paris" in output['choices'][0]['text'] def test_gptneox_completion_with_stream_success(self): - llm = Gptneox(self.gptneox_model_path) + llm = Gptneox(self.gptneox_model_path, n_threads=self.n_threads) output = llm("Q: What is the capital of France? A:", max_tokens=32, stream=True) def test_starcoder_completion_success(self): - llm = Starcoder(self.starcoder_model_path) + llm = Starcoder(self.starcoder_model_path, n_threads=self.n_threads) output = llm("def print_hello_world(", max_tokens=32, stream=False) # assert "Paris" in output['choices'][0]['text'] def test_starcoder_completion_with_stream_success(self): - llm = Starcoder(self.starcoder_model_path) + llm = Starcoder(self.starcoder_model_path, n_threads=self.n_threads) output = llm("def print_hello_world(", max_tokens=32, stream=True) diff --git a/python/llm/test/langchain/test_langchain.py b/python/llm/test/langchain/test_langchain.py index 3b72e100..1d221225 100644 --- a/python/llm/test/langchain/test_langchain.py +++ b/python/llm/test/langchain/test_langchain.py @@ -27,7 +27,12 @@ class Test_Models_Basics(TestCase): self.llama_model_path = os.environ.get('LLAMA_INT4_CKPT_PATH') self.bloom_model_path = os.environ.get('BLOOM_INT4_CKPT_PATH') self.gptneox_model_path = os.environ.get('GPTNEOX_INT4_CKPT_PATH') - + thread_num = os.environ.get('THREAD_NUM') + if thread_num is not None: + self.n_threads = int(thread_num) + else: + self.n_threads = 2 + def test_langchain_llm_embedding_llama(self): bigdl_embeddings = BigdlLLMEmbeddings( @@ -49,7 +54,7 @@ class Test_Models_Basics(TestCase): llm = BigdlLLM( model_path=self.llama_model_path, max_tokens=32, - n_threads=2) + n_threads=self.n_threads) question = "What is AI?" result = llm(question) @@ -58,7 +63,7 @@ class Test_Models_Basics(TestCase): model_path=self.gptneox_model_path, model_family="gptneox", max_tokens=32, - n_threads=2) + n_threads=self.n_threads) question = "What is AI?" result = llm(question) @@ -67,7 +72,7 @@ class Test_Models_Basics(TestCase): model_path=self.bloom_model_path, model_family="bloom", max_tokens=32, - n_threads=2) + n_threads=self.n_threads) question = "What is AI?" result = llm(question) diff --git a/python/llm/test/run-llm-convert-tests.sh b/python/llm/test/run-llm-convert-tests.sh index 42ffe148..3396435c 100644 --- a/python/llm/test/run-llm-convert-tests.sh +++ b/python/llm/test/run-llm-convert-tests.sh @@ -9,7 +9,16 @@ set -e echo "# Start testing convert" start=$(date "+%s") -python -m pytest -s ${LLM_CONVERT_TEST_DIR} +# separate convert process to save disk space +if [[ $1 == "llama" ]]; then + python -m pytest -s ${LLM_CONVERT_TEST_DIR}/test_convert_model.py -k "test_convert_llama" +elif [[ $1 == "gptneox" ]]; then + python -m pytest -s ${LLM_CONVERT_TEST_DIR}/test_convert_model.py -k "test_convert_gptneox" +elif [[ $1 == "bloom" ]]; then + python -m pytest -s ${LLM_CONVERT_TEST_DIR}/test_convert_model.py -k "test_convert_bloom" +elif [[ $1 == "starcoder" ]]; then + python -m pytest -s ${LLM_CONVERT_TEST_DIR}/test_convert_model.py -k "test_convert_starcoder" +fi now=$(date "+%s") time=$((now-start))