Arc stable version performance regression test (#9785)
* add arc stable version regression test * empty gpu mem between different models * triger ci * comment spr test * triger ci * address kai's comments and disable ci * merge fp8 and int4 * disable ci
This commit is contained in:
parent
40edb7b5d7
commit
5cfb4c4f5b
4 changed files with 47 additions and 25 deletions
|
|
@ -32,7 +32,7 @@ jobs:
|
|||
OMP_NUM_THREADS: 16
|
||||
THREAD_NUM: 16
|
||||
ANALYTICS_ZOO_ROOT: ${{ github.workspace }}
|
||||
CSV_SAVE_PATH: ${{ github.event.schedule && '/mnt/disk1/nightly_perf_gpu/' || '/mnt/disk1/pr_perf_gpu/' }}
|
||||
CSV_SAVE_PATH: '/mnt/disk1/stable_version_perf_gpu/'
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
|
|
@ -69,36 +69,41 @@ jobs:
|
|||
source /home/arda/intel/oneapi/setvars.sh
|
||||
bash python/llm/test/run-llm-install-tests.sh
|
||||
|
||||
- name: Test on xpu
|
||||
- name: Test on xpu (int4)
|
||||
shell: bash
|
||||
run: |
|
||||
source /home/arda/intel/oneapi/setvars.sh
|
||||
export USE_XETLA=OFF
|
||||
export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
|
||||
mv python/llm/test/benchmark/stable-version-arc-perf-test.yaml python/llm/dev/benchmark/all-in-one/config.yaml
|
||||
mv python/llm/test/benchmark/stable-version-arc-perf-test-sym_int4.yaml python/llm/dev/benchmark/all-in-one/config.yaml
|
||||
cd python/llm/dev/benchmark/all-in-one
|
||||
# hide time info
|
||||
sed -i 's/str(end - st)/"xxxxxx"/g' run.py
|
||||
# change csv name
|
||||
sed -i 's/{today}/{today}_test1/g' run.py
|
||||
python run.py
|
||||
# upgrade transformers for model Mistral-7B-v0.1
|
||||
python -m pip install transformers==4.34.0
|
||||
mv ../../../test/benchmark/arc-perf-transformers-434.yaml ./config.yaml
|
||||
# change csv name
|
||||
sed -i 's/test1/test2/g' run.py
|
||||
python run.py
|
||||
python ../../../test/benchmark/concat_csv.py
|
||||
cp ./*.csv $CSV_SAVE_PATH
|
||||
cp ./*.csv $CSV_SAVE_PATH/int4
|
||||
rm ./*.csv
|
||||
cd ../../../test/benchmark
|
||||
python -m pip install pandas==1.5.3
|
||||
python csv_to_html.py -f $CSV_SAVE_PATH
|
||||
cd ../../dev/benchmark/all-in-one/
|
||||
python ../../../test/benchmark/check_results.py -n 45
|
||||
if [ ${{ github.event.schedule}} ]; then
|
||||
curl -T ./*.csv ${LLM_FTP_URL}/llm/nightly_perf/gpu/
|
||||
fi
|
||||
|
||||
python csv_to_html.py -f $CSV_SAVE_PATH/int4 -b $CSV_SAVE_PATH/int4/transformer_int4_gpu-results-1baseline.csv -t 5.0
|
||||
|
||||
- name: Test on xpu (fp8)
|
||||
shell: bash
|
||||
run: |
|
||||
source /home/arda/intel/oneapi/setvars.sh
|
||||
export USE_XETLA=OFF
|
||||
export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
|
||||
mv python/llm/test/benchmark/stable-version-arc-perf-test-fp8.yaml python/llm/dev/benchmark/all-in-one/config.yaml
|
||||
cd python/llm/dev/benchmark/all-in-one
|
||||
# hide time info
|
||||
sed -i 's/str(end - st)/"xxxxxx"/g' run.py
|
||||
python run.py
|
||||
cp ./*.csv $CSV_SAVE_PATH/fp8
|
||||
rm ./*.csv
|
||||
cd ../../../test/benchmark
|
||||
python -m pip install pandas==1.5.3
|
||||
python csv_to_html.py -f $CSV_SAVE_PATH/fp8 -b $CSV_SAVE_PATH/fp8/transformer_int4_gpu-results-1baseline.csv -t 5.0
|
||||
|
||||
|
||||
llm-performance-test-on-spr:
|
||||
needs: llm-cpp-build
|
||||
strategy:
|
||||
|
|
|
|||
|
|
@ -413,8 +413,11 @@ def run_transformer_int4_gpu(repo_id,
|
|||
thread = threading.Thread(target=run_model_in_thread, args=(model, in_out, tokenizer, result, warm_up, num_beams, input_ids, out_len, actual_in_len, num_trials, reserved_mem_list))
|
||||
thread.start()
|
||||
thread.join()
|
||||
del model
|
||||
model.to('cpu')
|
||||
torch.xpu.synchronize()
|
||||
torch.xpu.empty_cache()
|
||||
del model
|
||||
gc.collect()
|
||||
return result
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,18 @@
|
|||
repo_id:
|
||||
- 'meta-llama/Llama-2-7b-chat-hf'
|
||||
- 'THUDM/chatglm2-6b'
|
||||
- 'THUDM/chatglm3-6b'
|
||||
- 'baichuan-inc/Baichuan2-7B-Chat'
|
||||
- 'Qwen/Qwen-7B-Chat'
|
||||
local_model_hub: '/mnt/disk1/models'
|
||||
warm_up: 1
|
||||
num_trials: 3
|
||||
num_beams: 1 # default to greedy search
|
||||
low_bit: 'fp8' # default to use 'sym_int4' (i.e. symmetric int4)
|
||||
in_out_pairs:
|
||||
- '32-32'
|
||||
- '1024-128'
|
||||
- '2048-256'
|
||||
test_api:
|
||||
- "transformer_int4_gpu" # on Intel GPU
|
||||
cpu_embedding: False # whether put embedding to CPU (only avaiable now for gpu win related test_api)
|
||||
|
|
@ -16,7 +16,3 @@ in_out_pairs:
|
|||
test_api:
|
||||
- "transformer_int4_gpu" # on Intel GPU
|
||||
cpu_embedding: False # whether put embedding to CPU (only avaiable now for gpu win related test_api)
|
||||
exclude:
|
||||
- 'fnlp/moss-moon-003-sft:1024'
|
||||
- 'fnlp/moss-moon-003-sft:2048'
|
||||
- 'bigscience/bloomz-7b1:2048'
|
||||
Loading…
Reference in a new issue