diff --git a/.github/actions/llm/download-llm-binary/action.yml b/.github/actions/llm/download-llm-binary/action.yml index 45705cbe..6d77560d 100644 --- a/.github/actions/llm/download-llm-binary/action.yml +++ b/.github/actions/llm/download-llm-binary/action.yml @@ -14,6 +14,8 @@ runs: mv linux-avx2/* python/llm/llm-binary/ mv linux-avx512/* python/llm/llm-binary/ mv windows-avx2/* python/llm/llm-binary/ + mv windows-avx2-vnni/* python/llm/llm-binary/ rm -rf linux-avx2 || true rm -rf linux-avx512 || true rm -rf windows-avx2 || true + rm -rf windows-avx2-vnni || true diff --git a/.github/workflows/llm-binary-build.yml b/.github/workflows/llm-binary-build.yml index c00bd3a8..75d7e805 100644 --- a/.github/workflows/llm-binary-build.yml +++ b/.github/workflows/llm-binary-build.yml @@ -12,7 +12,7 @@ on: paths: - ".github/workflows/llm-binary-build.yml" workflow_dispatch: - + workflow_call: # A workflow run is made up of one or more jobs that can run sequentially or in parallel @@ -141,3 +141,57 @@ jobs: name: windows-avx2 path: | build/Release + + windows-build-avx2-vnni: + runs-on: [self-hosted, Windows] + steps: + - name: Set access token + run: | + echo "github_access_token=$env:GITHUB_ACCESS_TOKEN" >> $env:GITHUB_ENV + echo "github_access_token=$env:GITHUB_ACCESS_TOKEN" + - uses: actions/checkout@v3 + with: + repository: "intel-analytics/llm.cpp" + token: ${{ env.github_access_token }} + - name: Download w64devkit + shell: powershell + run: | + cd .. + if (-not (Test-Path ./w64devkit.zip) -or ((Get-FileHash -Path w64devkit.zip -Algorithm SHA256).Hash.ToUpper() -ne '2862F388E1720B40026F2FD95C6100A9932E3B14FB13AAC4F225A02B11E31CA9'.ToUpper())) { + Invoke-WebRequest -Uri https://github.com/skeeto/w64devkit/releases/download/v1.19.0/w64devkit-1.19.0.zip -OutFile ./w64devkit.zip + } + if (Test-Path ./w64devkit) { rm -r -fo w64devkit } + Add-Type -AssemblyName System.IO.Compression.FileSystem ; [System.IO.Compression.ZipFile]::ExtractToDirectory("$PWD/w64devkit.zip", "$PWD") + echo "w64devkit_path=$PWD/w64devkit/bin" >> $env:GITHUB_ENV + + - name: Build binary + shell: powershell + run: | + $env:Path = '${{ env.w64devkit_path }};' + $env:Path + make + - name: Move release binary + shell: powershell + run: | + if (Test-Path ./release) { rm -r -fo release } + mkdir release + mv build/main-bloom.exe release/main-bloom_vnni.exe + mv build/quantize-bloom.exe release/quantize-bloom_vnni.exe + mv build/libbloom.dll release/libbloom_vnni.dll + + mv build/main-llama.exe release/main-llama_vnni.exe + mv build/quantize-llama.exe release/quantize-llama_vnni.exe + mv build/libllama.dll release/libllama_vnni.dll + + mv build/main-gptneox.exe release/main-gptneox_vnni.exe + mv build/quantize-gptneox.exe release/quantize-gptneox_vnni.exe + mv build/libgptneox.dll release/libgptneox_vnni.dll + + mv build/main-starcoder.exe release/main-starcoder_vnni.exe + mv build/quantize-starcoder.exe release/quantize-starcoder_vnni.exe + mv build/libstarcoder.dll release/libstarcoder_vnni.dll + - name: Archive build files + uses: actions/upload-artifact@v3 + with: + name: windows-avx2-vnni + path: | + release diff --git a/python/llm/README.md b/python/llm/README.md index 5453d7b9..d4116274 100644 --- a/python/llm/README.md +++ b/python/llm/README.md @@ -189,11 +189,15 @@ The native code/lib in `bigdl-llm` has been built using the following tools; in | Model family | Platform | Compiler | GLIBC | | ------------ | -------- | ------------------ | ----- | -| llama | Linux | GCC 9.3.1 | 2.17 | +| llama | Linux | GCC 11.2.1 | 2.17 | | llama | Windows | MSVC 19.36.32532.0 | | -| gptneox | Linux | GCC 9.3.1 | 2.17 | +| llama | Windows | GCC 13.1.0 | | +| gptneox | Linux | GCC 11.2.1 | 2.17 | | gptneox | Windows | MSVC 19.36.32532.0 | | -| bloom | Linux | GCC 9.4.0 | 2.29 | +| gptneox | Windows | GCC 13.1.0 | | +| bloom | Linux | GCC 11.2.1 | 2.29 | | bloom | Windows | MSVC 19.36.32532.0 | | -| starcoder | Linux | GCC 9.4.0 | 2.29 | +| bloom | Windows | GCC 13.1.0 | | +| starcoder | Linux | GCC 11.2.1 | 2.29 | | starcoder | Windows | MSVC 19.36.32532.0 | | +| starcoder | Windows | GCC 13.1.0 | |