diff --git a/.github/workflows/llm_performance_tests.yml b/.github/workflows/llm_performance_tests.yml index 8d1abf64..c914e43c 100644 --- a/.github/workflows/llm_performance_tests.yml +++ b/.github/workflows/llm_performance_tests.yml @@ -164,7 +164,7 @@ jobs: fail-fast: false matrix: python-version: ["3.9"] - runs-on: [self-hosted, llm, spr-perf] + runs-on: [self-hosted, llm, spr01-perf] env: OMP_NUM_THREADS: 16 THREAD_NUM: 16 @@ -185,6 +185,8 @@ jobs: python -m pip install --upgrade omegaconf python -m pip install --upgrade pandas python -m pip install --upgrade einops + python -m pip install --upgrade tiktoken + python -m pip install --upgrade transformers_stream_generator # please uncomment it and comment the "Install BigDL-LLM from Pypi" part for PR tests # - name: Download llm binary @@ -213,13 +215,19 @@ jobs: cd python/llm/dev/benchmark/all-in-one export http_proxy=${HTTP_PROXY} export https_proxy=${HTTPS_PROXY} + source bigdl-llm-init -t + export OMP_NUM_THREADS=48 # hide time info sed -i 's/str(end - st)/"xxxxxx"/g' run.py python run.py - cp ./*.csv /mnt/disk1/nightly_perf_cpu/ + cp ./*.csv /models/nightly_perf_cpu cd ../../../test/benchmark python -m pip install pandas==1.5.3 - python csv_to_html.py -f /mnt/disk1/nightly_perf_cpu/ + python csv_to_html.py -f /models/nightly_perf_cpu + cd /models/nightly_perf_cpu + for f in *.html; do + curl -T "$f" ${LLM_FTP_URL}/llm/nightly_perf/nightly_perf_cpu/ + done llm-performance-test-on-core: if: ${{ github.event.schedule || github.event.inputs.artifact == 'llm-performance-test-on-core' || github.event.inputs.artifact == 'all' }} # please comment it for PR tests diff --git a/python/llm/test/benchmark/cpu-perf-test.yaml b/python/llm/test/benchmark/cpu-perf-test.yaml index 502c693d..aa9158bd 100644 --- a/python/llm/test/benchmark/cpu-perf-test.yaml +++ b/python/llm/test/benchmark/cpu-perf-test.yaml @@ -1,6 +1,12 @@ repo_id: - 'meta-llama/Llama-2-7b-chat-hf' -local_model_hub: '/mnt/disk1/models' + - 'meta-llama/Llama-2-13b-chat-hf' + - 'THUDM/chatglm2-6b' + - 'THUDM/chatglm3-6b' + - 'baichuan-inc/Baichuan2-7B-Chat' + - 'baichuan-inc/Baichuan2-13B-Chat' + - 'Qwen/Qwen-14B-Chat' +local_model_hub: '/models' warm_up: 1 num_trials: 3 num_beams: 1 # default to greedy search @@ -9,6 +15,7 @@ batch_size: 1 # default to 1 in_out_pairs: - '32-32' - '1024-128' + - '2048-256' test_api: - "transformer_int4" # - "native_int4"