Arc Stable version test (#10087)
* add batch_size in stable version test * add batch_size in excludes * add excludes for batch_size * fix ci * triger regression test * fix xpu version * disable ci * address kai's comment --------- Co-authored-by: Ariadne <wyn2000330@126.com>
This commit is contained in:
parent
33b9e7744d
commit
36c9442c6d
4 changed files with 97 additions and 14 deletions
|
|
@ -61,47 +61,87 @@ jobs:
|
|||
- name: Run LLM install (all) test
|
||||
uses: ./.github/actions/llm/setup-llm-env
|
||||
with:
|
||||
extra-dependency: "xpu"
|
||||
extra-dependency: "xpu_2.1"
|
||||
|
||||
- name: Test installed xpu version
|
||||
shell: bash
|
||||
run: |
|
||||
source /home/arda/intel/oneapi/setvars.sh
|
||||
source /opt/intel/oneapi/setvars.sh
|
||||
bash python/llm/test/run-llm-install-tests.sh
|
||||
|
||||
- name: Test on xpu (int4)
|
||||
shell: bash
|
||||
run: |
|
||||
source /home/arda/intel/oneapi/setvars.sh
|
||||
source /opt/intel/oneapi/setvars.sh
|
||||
export USE_XETLA=OFF
|
||||
export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
|
||||
mv python/llm/test/benchmark/stable-version-arc-perf-test-sym_int4.yaml python/llm/dev/benchmark/all-in-one/config.yaml
|
||||
cd python/llm/dev/benchmark/all-in-one
|
||||
# hide time info
|
||||
sed -i 's/str(end - st)/"xxxxxx"/g' run.py
|
||||
# batch_size = 1
|
||||
sed -i '/batch_size/c\batch_size: 1' config.yaml
|
||||
python run.py
|
||||
cp ./*.csv $CSV_SAVE_PATH/int4
|
||||
cp ./*.csv $CSV_SAVE_PATH/int4/batch_size_1
|
||||
rm ./*.csv
|
||||
# batch_size = 2
|
||||
sed -i '/batch_size/c\batch_size: 2' config.yaml
|
||||
python run.py
|
||||
cp ./*.csv $CSV_SAVE_PATH/int4/batch_size_2
|
||||
rm ./*.csv
|
||||
# batch_size = 4
|
||||
sed -i '/batch_size/c\batch_size: 4' config.yaml
|
||||
python run.py
|
||||
cp ./*.csv $CSV_SAVE_PATH/int4/batch_size_4
|
||||
rm ./*.csv
|
||||
# batch_size = 8
|
||||
sed -i '/batch_size/c\batch_size: 8' config.yaml
|
||||
python run.py
|
||||
cp ./*.csv $CSV_SAVE_PATH/int4/batch_size_8
|
||||
rm ./*.csv
|
||||
cd ../../../test/benchmark
|
||||
python -m pip install pandas==1.5.3
|
||||
python csv_to_html.py -f $CSV_SAVE_PATH/int4 -b $CSV_SAVE_PATH/int4/transformer_int4_gpu-results-1baseline.csv -t 5.0
|
||||
python csv_to_html.py -f $CSV_SAVE_PATH/int4/batch_size_1 -b $CSV_SAVE_PATH/int4/batch_size_1/transformer_int4_gpu-results-1baseline.csv -t 5.0
|
||||
python csv_to_html.py -f $CSV_SAVE_PATH/int4/batch_size_2 -b $CSV_SAVE_PATH/int4/batch_size_2/transformer_int4_gpu-results-1baseline.csv -t 5.0
|
||||
python csv_to_html.py -f $CSV_SAVE_PATH/int4/batch_size_4 -b $CSV_SAVE_PATH/int4/batch_size_4/transformer_int4_gpu-results-1baseline.csv -t 5.0
|
||||
python csv_to_html.py -f $CSV_SAVE_PATH/int4/batch_size_8 -b $CSV_SAVE_PATH/int4/batch_size_8/transformer_int4_gpu-results-1baseline.csv -t 5.0
|
||||
|
||||
- name: Test on xpu (fp8)
|
||||
shell: bash
|
||||
run: |
|
||||
source /home/arda/intel/oneapi/setvars.sh
|
||||
source /opt/intel/oneapi/setvars.sh
|
||||
export USE_XETLA=OFF
|
||||
export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
|
||||
mv python/llm/test/benchmark/stable-version-arc-perf-test-fp8.yaml python/llm/dev/benchmark/all-in-one/config.yaml
|
||||
cd python/llm/dev/benchmark/all-in-one
|
||||
# hide time info
|
||||
sed -i 's/str(end - st)/"xxxxxx"/g' run.py
|
||||
# batch_size = 1
|
||||
sed -i '/batch_size/c\batch_size: 1' config.yaml
|
||||
python run.py
|
||||
cp ./*.csv $CSV_SAVE_PATH/fp8
|
||||
cp ./*.csv $CSV_SAVE_PATH/fp8/batch_size_1
|
||||
rm ./*.csv
|
||||
# batch_size = 2
|
||||
sed -i '/batch_size/c\batch_size: 2' config.yaml
|
||||
python run.py
|
||||
cp ./*.csv $CSV_SAVE_PATH/fp8/batch_size_2
|
||||
rm ./*.csv
|
||||
# batch_size = 4
|
||||
sed -i '/batch_size/c\batch_size: 4' config.yaml
|
||||
python run.py
|
||||
cp ./*.csv $CSV_SAVE_PATH/fp8/batch_size_4
|
||||
rm ./*.csv
|
||||
# batch_size = 8
|
||||
sed -i '/batch_size/c\batch_size: 8' config.yaml
|
||||
python run.py
|
||||
cp ./*.csv $CSV_SAVE_PATH/fp8/batch_size_8
|
||||
rm ./*.csv
|
||||
cd ../../../test/benchmark
|
||||
python -m pip install pandas==1.5.3
|
||||
python csv_to_html.py -f $CSV_SAVE_PATH/fp8 -b $CSV_SAVE_PATH/fp8/transformer_int4_gpu-results-1baseline.csv -t 5.0
|
||||
python csv_to_html.py -f $CSV_SAVE_PATH/fp8/batch_size_1 -b $CSV_SAVE_PATH/fp8/batch_size_1/transformer_int4_gpu-results-1baseline.csv -t 5.0
|
||||
python csv_to_html.py -f $CSV_SAVE_PATH/fp8/batch_size_2 -b $CSV_SAVE_PATH/fp8/batch_size_2/transformer_int4_gpu-results-1baseline.csv -t 5.0
|
||||
python csv_to_html.py -f $CSV_SAVE_PATH/fp8/batch_size_4 -b $CSV_SAVE_PATH/fp8/batch_size_4/transformer_int4_gpu-results-1baseline.csv -t 5.0
|
||||
python csv_to_html.py -f $CSV_SAVE_PATH/fp8/batch_size_8 -b $CSV_SAVE_PATH/fp8/batch_size_8/transformer_int4_gpu-results-1baseline.csv -t 5.0
|
||||
|
||||
llm-stress-test-on-arc:
|
||||
needs: llm-perf-regression-test-on-arc
|
||||
|
|
@ -143,18 +183,18 @@ jobs:
|
|||
- name: Run LLM install (all) test
|
||||
uses: ./.github/actions/llm/setup-llm-env
|
||||
with:
|
||||
extra-dependency: "xpu"
|
||||
extra-dependency: "xpu_2.1"
|
||||
|
||||
- name: Test installed xpu version
|
||||
shell: bash
|
||||
run: |
|
||||
source /home/arda/intel/oneapi/setvars.sh
|
||||
source /opt/intel/oneapi/setvars.sh
|
||||
bash python/llm/test/run-llm-install-tests.sh
|
||||
|
||||
- name: Test on xpu (int4)
|
||||
shell: bash
|
||||
run: |
|
||||
source /home/arda/intel/oneapi/setvars.sh
|
||||
source /opt/intel/oneapi/setvars.sh
|
||||
export USE_XETLA=OFF
|
||||
export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
|
||||
mv python/llm/test/benchmark/stable-version-arc-stress-test-sym_int4.yaml python/llm/dev/benchmark/all-in-one/config.yaml
|
||||
|
|
@ -171,7 +211,7 @@ jobs:
|
|||
- name: Test on xpu (fp8)
|
||||
shell: bash
|
||||
run: |
|
||||
source /home/arda/intel/oneapi/setvars.sh
|
||||
source /opt/intel/oneapi/setvars.sh
|
||||
export USE_XETLA=OFF
|
||||
export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
|
||||
mv python/llm/test/benchmark/stable-version-arc-stress-test-fp8.yaml python/llm/dev/benchmark/all-in-one/config.yaml
|
||||
|
|
|
|||
|
|
@ -956,7 +956,8 @@ if __name__ == '__main__':
|
|||
if excludes:
|
||||
for in_out in conf['in_out_pairs']:
|
||||
model_id_input = model + ':' + in_out.split('-')[0]
|
||||
if model_id_input in excludes:
|
||||
model_id_input_batch_size = model_id_input + ':' + str(conf['batch_size'])
|
||||
if model_id_input in excludes or model_id_input_batch_size in excludes:
|
||||
in_out_pairs.remove(in_out)
|
||||
run_model(model, api, in_out_pairs, conf['local_model_hub'], conf['warm_up'], conf['num_trials'], conf['num_beams'],
|
||||
conf['low_bit'], conf['cpu_embedding'], conf['batch_size'])
|
||||
|
|
|
|||
|
|
@ -12,8 +12,31 @@ low_bit: 'fp8' # default to use 'sym_int4' (i.e. symmetric int4)
|
|||
batch_size: 1 # default to 1
|
||||
in_out_pairs:
|
||||
- '32-32'
|
||||
- '512-256'
|
||||
- '1024-128'
|
||||
- '2048-256'
|
||||
test_api:
|
||||
- "transformer_int4_gpu" # on Intel GPU
|
||||
cpu_embedding: False # whether put embedding to CPU (only avaiable now for gpu win related test_api)
|
||||
exclude:
|
||||
- 'meta-llama/Llama-2-7b-chat-hf:2048:4'
|
||||
- 'meta-llama/Llama-2-7b-chat-hf:512:8'
|
||||
- 'meta-llama/Llama-2-7b-chat-hf:1024:8'
|
||||
- 'meta-llama/Llama-2-7b-chat-hf:2048:8'
|
||||
- 'THUDM/chatglm2-6b:2048:8'
|
||||
- 'THUDM/chatglm3-6b:2048:8'
|
||||
- 'baichuan-inc/Baichuan2-7B-Chat:2048:2'
|
||||
- 'baichuan-inc/Baichuan2-7B-Chat:1024:4'
|
||||
- 'baichuan-inc/Baichuan2-7B-Chat:2048:4'
|
||||
- 'baichuan-inc/Baichuan2-7B-Chat:512:8'
|
||||
- 'baichuan-inc/Baichuan2-7B-Chat:1024:8'
|
||||
- 'baichuan-inc/Baichuan2-7B-Chat:2048:8'
|
||||
- 'Qwen/Qwen-7B-Chat:2048:1'
|
||||
- 'Qwen/Qwen-7B-Chat:1024:2'
|
||||
- 'Qwen/Qwen-7B-Chat:2048:2'
|
||||
- 'Qwen/Qwen-7B-Chat:512:4'
|
||||
- 'Qwen/Qwen-7B-Chat:1024:4'
|
||||
- 'Qwen/Qwen-7B-Chat:2048:4'
|
||||
- 'Qwen/Qwen-7B-Chat:512:8'
|
||||
- 'Qwen/Qwen-7B-Chat:1024:8'
|
||||
- 'Qwen/Qwen-7B-Chat:2048:8'
|
||||
|
|
|
|||
|
|
@ -12,8 +12,27 @@ low_bit: 'sym_int4' # default to use 'sym_int4' (i.e. symmetric int4)
|
|||
batch_size: 1 # default to 1
|
||||
in_out_pairs:
|
||||
- '32-32'
|
||||
- '512-256'
|
||||
- '1024-128'
|
||||
- '2048-256'
|
||||
test_api:
|
||||
- "transformer_int4_gpu" # on Intel GPU
|
||||
cpu_embedding: False # whether put embedding to CPU (only avaiable now for gpu win related test_api)
|
||||
exclude:
|
||||
- 'meta-llama/Llama-2-7b-chat-hf:2048:4'
|
||||
- 'meta-llama/Llama-2-7b-chat-hf:1024:8'
|
||||
- 'meta-llama/Llama-2-7b-chat-hf:2048:8'
|
||||
- 'THUDM/chatglm2-6b:2048:8'
|
||||
- 'THUDM/chatglm3-6b:2048:8'
|
||||
- 'baichuan-inc/Baichuan2-7B-Chat:2048:2'
|
||||
- 'baichuan-inc/Baichuan2-7B-Chat:1024:4'
|
||||
- 'baichuan-inc/Baichuan2-7B-Chat:2048:4'
|
||||
- 'baichuan-inc/Baichuan2-7B-Chat:512:8'
|
||||
- 'baichuan-inc/Baichuan2-7B-Chat:1024:8'
|
||||
- 'baichuan-inc/Baichuan2-7B-Chat:2048:8'
|
||||
- 'Qwen/Qwen-7B-Chat:2048:2'
|
||||
- 'Qwen/Qwen-7B-Chat:1024:4'
|
||||
- 'Qwen/Qwen-7B-Chat:2048:4'
|
||||
- 'Qwen/Qwen-7B-Chat:512:8'
|
||||
- 'Qwen/Qwen-7B-Chat:1024:8'
|
||||
- 'Qwen/Qwen-7B-Chat:2048:8'
|
||||
|
|
|
|||
Loading…
Reference in a new issue