From 6a902b892e018e8f974e77518e51eaa116b6052b Mon Sep 17 00:00:00 2001 From: xingyuan li <108672484+hoshibara@users.noreply.github.com> Date: Mon, 28 Aug 2023 17:41:18 +0900 Subject: [PATCH] [LLM] Add amx build step (#8822) * add amx build step --- .../llm/download-llm-binary/action.yml | 2 + .github/workflows/llm-binary-build.yml | 70 +++++++++++++++++++ python/llm/setup.py | 5 ++ 3 files changed, 77 insertions(+) diff --git a/.github/actions/llm/download-llm-binary/action.yml b/.github/actions/llm/download-llm-binary/action.yml index 15257ea0..17ff4e3a 100644 --- a/.github/actions/llm/download-llm-binary/action.yml +++ b/.github/actions/llm/download-llm-binary/action.yml @@ -15,6 +15,7 @@ runs: mv linux-avx512/* python/llm/llm-binary/ mv linux-avxvnni/* python/llm/llm-binary/ mv linux-avx/* python/llm/llm-binary/ + mv linux-amx/* python/llm/llm-binary/ mv windows-avx2/* python/llm/llm-binary/ mv windows-avx2-vnni/* python/llm/llm-binary/ mv windows-avx/* python/llm/llm-binary/ @@ -22,6 +23,7 @@ runs: rm -rf linux-avx512 || true rm -rf linux-avxvnni || true rm -rf linux-avx || true + rm -rf linux-amx || true rm -rf windows-avx2 || true rm -rf windows-avx2-vnni || true rm -rf windows-avx || true diff --git a/.github/workflows/llm-binary-build.yml b/.github/workflows/llm-binary-build.yml index 996aebd9..f9add655 100644 --- a/.github/workflows/llm-binary-build.yml +++ b/.github/workflows/llm-binary-build.yml @@ -215,6 +215,76 @@ jobs: make clean conda remove -n python39 --all -y + check-linux-amx-artifact: + runs-on: ubuntu-latest + outputs: + if-exists: ${{steps.check_artifact.outputs.exists}} + steps: + - name: Check if built + id: check_artifact + uses: xSAVIKx/artifact-exists-action@v0 + with: + name: linux-amx + + linux-build-amx: + runs-on: [self-hosted, amx, almalinux8] + needs: check-linux-amx-artifact + if: needs.check-linux-amx-artifact.outputs.if-exists == 'false' + steps: + - name: Set access token + run: | + echo "github_access_token=${GITHUB_ACCESS_TOKEN}" >> "$GITHUB_ENV" + - name: Install Build Environment + shell: bash + run: | + export http_proxy=${HTTP_PROXY} + export https_proxy=${HTTPS_PROXY} + yum install -y gcc-toolset-12 cmake git + conda remove -n python39 --all -y + conda create -n python39 python=3.9 -y + - uses: actions/checkout@v3 + with: + repository: "intel-analytics/llm.cpp" + ref: ${{ inputs.llmcpp-ref }} + token: ${{ env.github_access_token }} + submodules: "recursive" + - name: Build amx binary + shell: bash + run: | + scl enable gcc-toolset-12 "cmake -DONLYAVX=OFF -DONLYAVX2=OFF -B build" + scl enable gcc-toolset-12 "cmake --build build --config Release" + # build chatglm + source activate python39 || conda activate python39 + cd src/chatglm + scl enable gcc-toolset-12 "cmake -B build" + scl enable gcc-toolset-12 "cmake --build build --config Release" + - name: Move amx release binary + shell: bash + run: | + mkdir amx_release + mv build/quantize-bloom amx_release/quantize-bloom_amx + mv build/libbloom.so amx_release/libbloom_amx.so + mv build/quantize-llama amx_release/quantize-llama_amx + mv build/libllama.so amx_release/libllama_amx.so + mv build/quantize-gptneox amx_release/quantize-gptneox_amx + mv build/libgptneox.so amx_release/libgptneox_amx.so + mv build/quantize-starcoder amx_release/quantize-starcoder_amx + mv build/libstarcoder.so amx_release/libstarcoder_amx.so + # chatglm binary files + mv src/chatglm/build/main amx_release/main-chatglm_amx + # mv src/chatglm/build/_C.cpython-39-x86_64-linux-gnu.so amx_release/chatglm_C.cpython-39-x86_64-linux-gnu.so + - name: Archive amx build files + uses: actions/upload-artifact@v3 + with: + name: linux-amx + path: | + amx_release + - name: Clean up test environment + shell: bash + run: | + make clean + conda remove -n python39 --all -y + check-windows-avx2-artifact: runs-on: ubuntu-latest outputs: diff --git a/python/llm/setup.py b/python/llm/setup.py index 4d749c92..22e0e74f 100644 --- a/python/llm/setup.py +++ b/python/llm/setup.py @@ -102,6 +102,10 @@ linux_binarys = [ "libgptneox_avx512.so", "libbloom_avx512.so", "libstarcoder_avx512.so", + "libllama_amx.so", + "libgptneox_amx.so", + "libbloom_amx.so", + "libstarcoder_amx.so", "quantize-llama", "quantize-gptneox", "quantize-bloom", @@ -116,6 +120,7 @@ linux_binarys = [ "main-starcoder", "main-chatglm_vnni", + "main-chatglm_amx", "chatglm_C.cpython-39-x86_64-linux-gnu.so", ]