Arc Stable version test (#10087)
* add batch_size in stable version test * add batch_size in excludes * add excludes for batch_size * fix ci * triger regression test * fix xpu version * disable ci * address kai's comment --------- Co-authored-by: Ariadne <wyn2000330@126.com>
This commit is contained in:
		
							parent
							
								
									33b9e7744d
								
							
						
					
					
						commit
						36c9442c6d
					
				
					 4 changed files with 97 additions and 14 deletions
				
			
		| 
						 | 
					@ -61,47 +61,87 @@ jobs:
 | 
				
			||||||
      - name: Run LLM install (all) test
 | 
					      - name: Run LLM install (all) test
 | 
				
			||||||
        uses: ./.github/actions/llm/setup-llm-env
 | 
					        uses: ./.github/actions/llm/setup-llm-env
 | 
				
			||||||
        with:
 | 
					        with:
 | 
				
			||||||
          extra-dependency: "xpu"
 | 
					          extra-dependency: "xpu_2.1"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
      - name: Test installed xpu version
 | 
					      - name: Test installed xpu version
 | 
				
			||||||
        shell: bash
 | 
					        shell: bash
 | 
				
			||||||
        run: |
 | 
					        run: |
 | 
				
			||||||
          source /home/arda/intel/oneapi/setvars.sh
 | 
					          source /opt/intel/oneapi/setvars.sh
 | 
				
			||||||
          bash python/llm/test/run-llm-install-tests.sh
 | 
					          bash python/llm/test/run-llm-install-tests.sh
 | 
				
			||||||
 | 
					
 | 
				
			||||||
      - name: Test on xpu (int4)
 | 
					      - name: Test on xpu (int4)
 | 
				
			||||||
        shell: bash
 | 
					        shell: bash
 | 
				
			||||||
        run: |
 | 
					        run: |
 | 
				
			||||||
          source /home/arda/intel/oneapi/setvars.sh
 | 
					          source /opt/intel/oneapi/setvars.sh
 | 
				
			||||||
          export USE_XETLA=OFF
 | 
					          export USE_XETLA=OFF
 | 
				
			||||||
          export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
 | 
					          export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
 | 
				
			||||||
          mv python/llm/test/benchmark/stable-version-arc-perf-test-sym_int4.yaml python/llm/dev/benchmark/all-in-one/config.yaml
 | 
					          mv python/llm/test/benchmark/stable-version-arc-perf-test-sym_int4.yaml python/llm/dev/benchmark/all-in-one/config.yaml
 | 
				
			||||||
          cd python/llm/dev/benchmark/all-in-one
 | 
					          cd python/llm/dev/benchmark/all-in-one
 | 
				
			||||||
          # hide time info
 | 
					          # hide time info
 | 
				
			||||||
          sed -i 's/str(end - st)/"xxxxxx"/g' run.py
 | 
					          sed -i 's/str(end - st)/"xxxxxx"/g' run.py
 | 
				
			||||||
 | 
					          # batch_size = 1
 | 
				
			||||||
 | 
					          sed -i '/batch_size/c\batch_size: 1' config.yaml
 | 
				
			||||||
          python run.py
 | 
					          python run.py
 | 
				
			||||||
          cp ./*.csv $CSV_SAVE_PATH/int4
 | 
					          cp ./*.csv $CSV_SAVE_PATH/int4/batch_size_1
 | 
				
			||||||
 | 
					          rm ./*.csv
 | 
				
			||||||
 | 
					          # batch_size = 2
 | 
				
			||||||
 | 
					          sed -i '/batch_size/c\batch_size: 2' config.yaml
 | 
				
			||||||
 | 
					          python run.py
 | 
				
			||||||
 | 
					          cp ./*.csv $CSV_SAVE_PATH/int4/batch_size_2
 | 
				
			||||||
 | 
					          rm ./*.csv
 | 
				
			||||||
 | 
					          # batch_size = 4
 | 
				
			||||||
 | 
					          sed -i '/batch_size/c\batch_size: 4' config.yaml
 | 
				
			||||||
 | 
					          python run.py
 | 
				
			||||||
 | 
					          cp ./*.csv $CSV_SAVE_PATH/int4/batch_size_4
 | 
				
			||||||
 | 
					          rm ./*.csv
 | 
				
			||||||
 | 
					          # batch_size = 8
 | 
				
			||||||
 | 
					          sed -i '/batch_size/c\batch_size: 8' config.yaml
 | 
				
			||||||
 | 
					          python run.py
 | 
				
			||||||
 | 
					          cp ./*.csv $CSV_SAVE_PATH/int4/batch_size_8
 | 
				
			||||||
          rm ./*.csv
 | 
					          rm ./*.csv
 | 
				
			||||||
          cd ../../../test/benchmark
 | 
					          cd ../../../test/benchmark
 | 
				
			||||||
          python -m pip install pandas==1.5.3
 | 
					          python -m pip install pandas==1.5.3
 | 
				
			||||||
          python csv_to_html.py -f $CSV_SAVE_PATH/int4 -b $CSV_SAVE_PATH/int4/transformer_int4_gpu-results-1baseline.csv -t 5.0
 | 
					          python csv_to_html.py -f $CSV_SAVE_PATH/int4/batch_size_1 -b $CSV_SAVE_PATH/int4/batch_size_1/transformer_int4_gpu-results-1baseline.csv -t 5.0
 | 
				
			||||||
 | 
					          python csv_to_html.py -f $CSV_SAVE_PATH/int4/batch_size_2 -b $CSV_SAVE_PATH/int4/batch_size_2/transformer_int4_gpu-results-1baseline.csv -t 5.0
 | 
				
			||||||
 | 
					          python csv_to_html.py -f $CSV_SAVE_PATH/int4/batch_size_4 -b $CSV_SAVE_PATH/int4/batch_size_4/transformer_int4_gpu-results-1baseline.csv -t 5.0
 | 
				
			||||||
 | 
					          python csv_to_html.py -f $CSV_SAVE_PATH/int4/batch_size_8 -b $CSV_SAVE_PATH/int4/batch_size_8/transformer_int4_gpu-results-1baseline.csv -t 5.0
 | 
				
			||||||
  
 | 
					  
 | 
				
			||||||
      - name: Test on xpu (fp8)
 | 
					      - name: Test on xpu (fp8)
 | 
				
			||||||
        shell: bash
 | 
					        shell: bash
 | 
				
			||||||
        run: |
 | 
					        run: |
 | 
				
			||||||
          source /home/arda/intel/oneapi/setvars.sh
 | 
					          source /opt/intel/oneapi/setvars.sh
 | 
				
			||||||
          export USE_XETLA=OFF
 | 
					          export USE_XETLA=OFF
 | 
				
			||||||
          export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
 | 
					          export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
 | 
				
			||||||
          mv python/llm/test/benchmark/stable-version-arc-perf-test-fp8.yaml python/llm/dev/benchmark/all-in-one/config.yaml
 | 
					          mv python/llm/test/benchmark/stable-version-arc-perf-test-fp8.yaml python/llm/dev/benchmark/all-in-one/config.yaml
 | 
				
			||||||
          cd python/llm/dev/benchmark/all-in-one
 | 
					          cd python/llm/dev/benchmark/all-in-one
 | 
				
			||||||
          # hide time info
 | 
					          # hide time info
 | 
				
			||||||
          sed -i 's/str(end - st)/"xxxxxx"/g' run.py
 | 
					          sed -i 's/str(end - st)/"xxxxxx"/g' run.py
 | 
				
			||||||
 | 
					          # batch_size = 1
 | 
				
			||||||
 | 
					          sed -i '/batch_size/c\batch_size: 1' config.yaml
 | 
				
			||||||
          python run.py
 | 
					          python run.py
 | 
				
			||||||
          cp ./*.csv $CSV_SAVE_PATH/fp8
 | 
					          cp ./*.csv $CSV_SAVE_PATH/fp8/batch_size_1
 | 
				
			||||||
 | 
					          rm ./*.csv
 | 
				
			||||||
 | 
					          # batch_size = 2
 | 
				
			||||||
 | 
					          sed -i '/batch_size/c\batch_size: 2' config.yaml
 | 
				
			||||||
 | 
					          python run.py
 | 
				
			||||||
 | 
					          cp ./*.csv $CSV_SAVE_PATH/fp8/batch_size_2
 | 
				
			||||||
 | 
					          rm ./*.csv
 | 
				
			||||||
 | 
					          # batch_size = 4
 | 
				
			||||||
 | 
					          sed -i '/batch_size/c\batch_size: 4' config.yaml
 | 
				
			||||||
 | 
					          python run.py
 | 
				
			||||||
 | 
					          cp ./*.csv $CSV_SAVE_PATH/fp8/batch_size_4
 | 
				
			||||||
 | 
					          rm ./*.csv
 | 
				
			||||||
 | 
					          # batch_size = 8
 | 
				
			||||||
 | 
					          sed -i '/batch_size/c\batch_size: 8' config.yaml
 | 
				
			||||||
 | 
					          python run.py
 | 
				
			||||||
 | 
					          cp ./*.csv $CSV_SAVE_PATH/fp8/batch_size_8
 | 
				
			||||||
          rm ./*.csv
 | 
					          rm ./*.csv
 | 
				
			||||||
          cd ../../../test/benchmark
 | 
					          cd ../../../test/benchmark
 | 
				
			||||||
          python -m pip install pandas==1.5.3
 | 
					          python -m pip install pandas==1.5.3
 | 
				
			||||||
          python csv_to_html.py -f $CSV_SAVE_PATH/fp8 -b $CSV_SAVE_PATH/fp8/transformer_int4_gpu-results-1baseline.csv -t 5.0
 | 
					          python csv_to_html.py -f $CSV_SAVE_PATH/fp8/batch_size_1 -b $CSV_SAVE_PATH/fp8/batch_size_1/transformer_int4_gpu-results-1baseline.csv -t 5.0
 | 
				
			||||||
 | 
					          python csv_to_html.py -f $CSV_SAVE_PATH/fp8/batch_size_2 -b $CSV_SAVE_PATH/fp8/batch_size_2/transformer_int4_gpu-results-1baseline.csv -t 5.0
 | 
				
			||||||
 | 
					          python csv_to_html.py -f $CSV_SAVE_PATH/fp8/batch_size_4 -b $CSV_SAVE_PATH/fp8/batch_size_4/transformer_int4_gpu-results-1baseline.csv -t 5.0
 | 
				
			||||||
 | 
					          python csv_to_html.py -f $CSV_SAVE_PATH/fp8/batch_size_8 -b $CSV_SAVE_PATH/fp8/batch_size_8/transformer_int4_gpu-results-1baseline.csv -t 5.0
 | 
				
			||||||
  
 | 
					  
 | 
				
			||||||
  llm-stress-test-on-arc:
 | 
					  llm-stress-test-on-arc:
 | 
				
			||||||
    needs: llm-perf-regression-test-on-arc
 | 
					    needs: llm-perf-regression-test-on-arc
 | 
				
			||||||
| 
						 | 
					@ -143,18 +183,18 @@ jobs:
 | 
				
			||||||
      - name: Run LLM install (all) test
 | 
					      - name: Run LLM install (all) test
 | 
				
			||||||
        uses: ./.github/actions/llm/setup-llm-env
 | 
					        uses: ./.github/actions/llm/setup-llm-env
 | 
				
			||||||
        with:
 | 
					        with:
 | 
				
			||||||
          extra-dependency: "xpu"
 | 
					          extra-dependency: "xpu_2.1"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
      - name: Test installed xpu version
 | 
					      - name: Test installed xpu version
 | 
				
			||||||
        shell: bash
 | 
					        shell: bash
 | 
				
			||||||
        run: |
 | 
					        run: |
 | 
				
			||||||
          source /home/arda/intel/oneapi/setvars.sh
 | 
					          source /opt/intel/oneapi/setvars.sh
 | 
				
			||||||
          bash python/llm/test/run-llm-install-tests.sh
 | 
					          bash python/llm/test/run-llm-install-tests.sh
 | 
				
			||||||
 | 
					
 | 
				
			||||||
      - name: Test on xpu (int4)
 | 
					      - name: Test on xpu (int4)
 | 
				
			||||||
        shell: bash
 | 
					        shell: bash
 | 
				
			||||||
        run: |
 | 
					        run: |
 | 
				
			||||||
          source /home/arda/intel/oneapi/setvars.sh
 | 
					          source /opt/intel/oneapi/setvars.sh
 | 
				
			||||||
          export USE_XETLA=OFF
 | 
					          export USE_XETLA=OFF
 | 
				
			||||||
          export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
 | 
					          export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
 | 
				
			||||||
          mv python/llm/test/benchmark/stable-version-arc-stress-test-sym_int4.yaml python/llm/dev/benchmark/all-in-one/config.yaml
 | 
					          mv python/llm/test/benchmark/stable-version-arc-stress-test-sym_int4.yaml python/llm/dev/benchmark/all-in-one/config.yaml
 | 
				
			||||||
| 
						 | 
					@ -171,7 +211,7 @@ jobs:
 | 
				
			||||||
      - name: Test on xpu (fp8)
 | 
					      - name: Test on xpu (fp8)
 | 
				
			||||||
        shell: bash
 | 
					        shell: bash
 | 
				
			||||||
        run: |
 | 
					        run: |
 | 
				
			||||||
          source /home/arda/intel/oneapi/setvars.sh
 | 
					          source /opt/intel/oneapi/setvars.sh
 | 
				
			||||||
          export USE_XETLA=OFF
 | 
					          export USE_XETLA=OFF
 | 
				
			||||||
          export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
 | 
					          export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
 | 
				
			||||||
          mv python/llm/test/benchmark/stable-version-arc-stress-test-fp8.yaml python/llm/dev/benchmark/all-in-one/config.yaml
 | 
					          mv python/llm/test/benchmark/stable-version-arc-stress-test-fp8.yaml python/llm/dev/benchmark/all-in-one/config.yaml
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -956,7 +956,8 @@ if __name__ == '__main__':
 | 
				
			||||||
            if excludes:
 | 
					            if excludes:
 | 
				
			||||||
                for in_out in conf['in_out_pairs']:
 | 
					                for in_out in conf['in_out_pairs']:
 | 
				
			||||||
                    model_id_input = model + ':' + in_out.split('-')[0]
 | 
					                    model_id_input = model + ':' + in_out.split('-')[0]
 | 
				
			||||||
                    if model_id_input in excludes:
 | 
					                    model_id_input_batch_size = model_id_input + ':' + str(conf['batch_size'])
 | 
				
			||||||
 | 
					                    if model_id_input in excludes or model_id_input_batch_size in excludes:
 | 
				
			||||||
                        in_out_pairs.remove(in_out)
 | 
					                        in_out_pairs.remove(in_out)
 | 
				
			||||||
            run_model(model, api, in_out_pairs, conf['local_model_hub'], conf['warm_up'], conf['num_trials'], conf['num_beams'],
 | 
					            run_model(model, api, in_out_pairs, conf['local_model_hub'], conf['warm_up'], conf['num_trials'], conf['num_beams'],
 | 
				
			||||||
                      conf['low_bit'], conf['cpu_embedding'], conf['batch_size'])
 | 
					                      conf['low_bit'], conf['cpu_embedding'], conf['batch_size'])
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -12,8 +12,31 @@ low_bit: 'fp8' # default to use 'sym_int4' (i.e. symmetric int4)
 | 
				
			||||||
batch_size: 1 # default to 1
 | 
					batch_size: 1 # default to 1
 | 
				
			||||||
in_out_pairs:
 | 
					in_out_pairs:
 | 
				
			||||||
  - '32-32'
 | 
					  - '32-32'
 | 
				
			||||||
 | 
					  - '512-256'
 | 
				
			||||||
  - '1024-128'
 | 
					  - '1024-128'
 | 
				
			||||||
  - '2048-256'
 | 
					  - '2048-256'
 | 
				
			||||||
test_api:
 | 
					test_api:
 | 
				
			||||||
  - "transformer_int4_gpu"  # on Intel GPU
 | 
					  - "transformer_int4_gpu"  # on Intel GPU
 | 
				
			||||||
cpu_embedding: False # whether put embedding to CPU (only avaiable now for gpu win related test_api)
 | 
					cpu_embedding: False # whether put embedding to CPU (only avaiable now for gpu win related test_api)
 | 
				
			||||||
 | 
					exclude:
 | 
				
			||||||
 | 
					  - 'meta-llama/Llama-2-7b-chat-hf:2048:4'
 | 
				
			||||||
 | 
					  - 'meta-llama/Llama-2-7b-chat-hf:512:8'
 | 
				
			||||||
 | 
					  - 'meta-llama/Llama-2-7b-chat-hf:1024:8'
 | 
				
			||||||
 | 
					  - 'meta-llama/Llama-2-7b-chat-hf:2048:8'
 | 
				
			||||||
 | 
					  - 'THUDM/chatglm2-6b:2048:8'
 | 
				
			||||||
 | 
					  - 'THUDM/chatglm3-6b:2048:8'
 | 
				
			||||||
 | 
					  - 'baichuan-inc/Baichuan2-7B-Chat:2048:2'
 | 
				
			||||||
 | 
					  - 'baichuan-inc/Baichuan2-7B-Chat:1024:4'
 | 
				
			||||||
 | 
					  - 'baichuan-inc/Baichuan2-7B-Chat:2048:4'
 | 
				
			||||||
 | 
					  - 'baichuan-inc/Baichuan2-7B-Chat:512:8'
 | 
				
			||||||
 | 
					  - 'baichuan-inc/Baichuan2-7B-Chat:1024:8'
 | 
				
			||||||
 | 
					  - 'baichuan-inc/Baichuan2-7B-Chat:2048:8'
 | 
				
			||||||
 | 
					  - 'Qwen/Qwen-7B-Chat:2048:1'
 | 
				
			||||||
 | 
					  - 'Qwen/Qwen-7B-Chat:1024:2'
 | 
				
			||||||
 | 
					  - 'Qwen/Qwen-7B-Chat:2048:2'
 | 
				
			||||||
 | 
					  - 'Qwen/Qwen-7B-Chat:512:4'
 | 
				
			||||||
 | 
					  - 'Qwen/Qwen-7B-Chat:1024:4'
 | 
				
			||||||
 | 
					  - 'Qwen/Qwen-7B-Chat:2048:4'
 | 
				
			||||||
 | 
					  - 'Qwen/Qwen-7B-Chat:512:8'
 | 
				
			||||||
 | 
					  - 'Qwen/Qwen-7B-Chat:1024:8'
 | 
				
			||||||
 | 
					  - 'Qwen/Qwen-7B-Chat:2048:8'
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -12,8 +12,27 @@ low_bit: 'sym_int4' # default to use 'sym_int4' (i.e. symmetric int4)
 | 
				
			||||||
batch_size: 1 # default to 1
 | 
					batch_size: 1 # default to 1
 | 
				
			||||||
in_out_pairs:
 | 
					in_out_pairs:
 | 
				
			||||||
  - '32-32'
 | 
					  - '32-32'
 | 
				
			||||||
 | 
					  - '512-256'
 | 
				
			||||||
  - '1024-128'
 | 
					  - '1024-128'
 | 
				
			||||||
  - '2048-256'
 | 
					  - '2048-256'
 | 
				
			||||||
test_api:
 | 
					test_api:
 | 
				
			||||||
  - "transformer_int4_gpu"  # on Intel GPU
 | 
					  - "transformer_int4_gpu"  # on Intel GPU
 | 
				
			||||||
cpu_embedding: False # whether put embedding to CPU (only avaiable now for gpu win related test_api)
 | 
					cpu_embedding: False # whether put embedding to CPU (only avaiable now for gpu win related test_api)
 | 
				
			||||||
 | 
					exclude:
 | 
				
			||||||
 | 
					  - 'meta-llama/Llama-2-7b-chat-hf:2048:4'
 | 
				
			||||||
 | 
					  - 'meta-llama/Llama-2-7b-chat-hf:1024:8'
 | 
				
			||||||
 | 
					  - 'meta-llama/Llama-2-7b-chat-hf:2048:8'
 | 
				
			||||||
 | 
					  - 'THUDM/chatglm2-6b:2048:8'
 | 
				
			||||||
 | 
					  - 'THUDM/chatglm3-6b:2048:8'
 | 
				
			||||||
 | 
					  - 'baichuan-inc/Baichuan2-7B-Chat:2048:2'
 | 
				
			||||||
 | 
					  - 'baichuan-inc/Baichuan2-7B-Chat:1024:4'
 | 
				
			||||||
 | 
					  - 'baichuan-inc/Baichuan2-7B-Chat:2048:4'
 | 
				
			||||||
 | 
					  - 'baichuan-inc/Baichuan2-7B-Chat:512:8'
 | 
				
			||||||
 | 
					  - 'baichuan-inc/Baichuan2-7B-Chat:1024:8'
 | 
				
			||||||
 | 
					  - 'baichuan-inc/Baichuan2-7B-Chat:2048:8'
 | 
				
			||||||
 | 
					  - 'Qwen/Qwen-7B-Chat:2048:2'
 | 
				
			||||||
 | 
					  - 'Qwen/Qwen-7B-Chat:1024:4'
 | 
				
			||||||
 | 
					  - 'Qwen/Qwen-7B-Chat:2048:4'
 | 
				
			||||||
 | 
					  - 'Qwen/Qwen-7B-Chat:512:8'
 | 
				
			||||||
 | 
					  - 'Qwen/Qwen-7B-Chat:1024:8'
 | 
				
			||||||
 | 
					  - 'Qwen/Qwen-7B-Chat:2048:8'
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue