Update perf test all-in-one config for batch_size arg (#10012)
This commit is contained in:
parent
7952bbc919
commit
1eaaace2dc
18 changed files with 18 additions and 0 deletions
|
|
@ -22,6 +22,7 @@ warm_up: 1
|
||||||
num_trials: 3
|
num_trials: 3
|
||||||
num_beams: 1 # default to greedy search
|
num_beams: 1 # default to greedy search
|
||||||
low_bit: 'sym_int4' # default to use 'sym_int4' (i.e. symmetric int4)
|
low_bit: 'sym_int4' # default to use 'sym_int4' (i.e. symmetric int4)
|
||||||
|
batch_size: 1 # default to 1
|
||||||
in_out_pairs:
|
in_out_pairs:
|
||||||
- '32-32'
|
- '32-32'
|
||||||
- '1024-128'
|
- '1024-128'
|
||||||
|
|
|
||||||
|
|
@ -6,6 +6,7 @@ warm_up: 1
|
||||||
num_trials: 3
|
num_trials: 3
|
||||||
num_beams: 1 # default to greedy search
|
num_beams: 1 # default to greedy search
|
||||||
low_bit: 'sym_int4' # default to use 'sym_int4' (i.e. symmetric int4)
|
low_bit: 'sym_int4' # default to use 'sym_int4' (i.e. symmetric int4)
|
||||||
|
batch_size: 1 # default to 1
|
||||||
in_out_pairs:
|
in_out_pairs:
|
||||||
- '32-32'
|
- '32-32'
|
||||||
- '1024-128'
|
- '1024-128'
|
||||||
|
|
|
||||||
|
|
@ -13,6 +13,7 @@ warm_up: 1
|
||||||
num_trials: 3
|
num_trials: 3
|
||||||
num_beams: 1 # default to greedy search
|
num_beams: 1 # default to greedy search
|
||||||
low_bit: 'sym_int4' # default to use 'sym_int4' (i.e. symmetric int4)
|
low_bit: 'sym_int4' # default to use 'sym_int4' (i.e. symmetric int4)
|
||||||
|
batch_size: 1 # default to 1
|
||||||
in_out_pairs:
|
in_out_pairs:
|
||||||
- '32-32'
|
- '32-32'
|
||||||
- '1024-128'
|
- '1024-128'
|
||||||
|
|
|
||||||
|
|
@ -5,6 +5,7 @@ warm_up: 1
|
||||||
num_trials: 3
|
num_trials: 3
|
||||||
num_beams: 1 # default to greedy search
|
num_beams: 1 # default to greedy search
|
||||||
low_bit: 'sym_int4' # default to use 'sym_int4' (i.e. symmetric int4)
|
low_bit: 'sym_int4' # default to use 'sym_int4' (i.e. symmetric int4)
|
||||||
|
batch_size: 1 # default to 1
|
||||||
in_out_pairs:
|
in_out_pairs:
|
||||||
- '32-32'
|
- '32-32'
|
||||||
- '1024-128'
|
- '1024-128'
|
||||||
|
|
|
||||||
|
|
@ -18,6 +18,7 @@ warm_up: 1
|
||||||
num_trials: 3
|
num_trials: 3
|
||||||
num_beams: 1 # default to greedy search
|
num_beams: 1 # default to greedy search
|
||||||
low_bit: 'sym_int4' # default to use 'sym_int4' (i.e. symmetric int4)
|
low_bit: 'sym_int4' # default to use 'sym_int4' (i.e. symmetric int4)
|
||||||
|
batch_size: 1 # default to 1
|
||||||
in_out_pairs:
|
in_out_pairs:
|
||||||
- '1024-128'
|
- '1024-128'
|
||||||
test_api:
|
test_api:
|
||||||
|
|
|
||||||
|
|
@ -5,6 +5,7 @@ warm_up: 1
|
||||||
num_trials: 3
|
num_trials: 3
|
||||||
num_beams: 1 # default to greedy search
|
num_beams: 1 # default to greedy search
|
||||||
low_bit: 'sym_int4' # default to use 'sym_int4' (i.e. symmetric int4)
|
low_bit: 'sym_int4' # default to use 'sym_int4' (i.e. symmetric int4)
|
||||||
|
batch_size: 1 # default to 1
|
||||||
in_out_pairs:
|
in_out_pairs:
|
||||||
- '1024-128'
|
- '1024-128'
|
||||||
test_api:
|
test_api:
|
||||||
|
|
|
||||||
|
|
@ -18,6 +18,7 @@ warm_up: 1
|
||||||
num_trials: 3
|
num_trials: 3
|
||||||
num_beams: 1 # default to greedy search
|
num_beams: 1 # default to greedy search
|
||||||
low_bit: 'sym_int4' # default to use 'sym_int4' (i.e. symmetric int4)
|
low_bit: 'sym_int4' # default to use 'sym_int4' (i.e. symmetric int4)
|
||||||
|
batch_size: 1 # default to 1
|
||||||
in_out_pairs:
|
in_out_pairs:
|
||||||
- '2048-256'
|
- '2048-256'
|
||||||
test_api:
|
test_api:
|
||||||
|
|
|
||||||
|
|
@ -5,6 +5,7 @@ warm_up: 1
|
||||||
num_trials: 3
|
num_trials: 3
|
||||||
num_beams: 1 # default to greedy search
|
num_beams: 1 # default to greedy search
|
||||||
low_bit: 'sym_int4' # default to use 'sym_int4' (i.e. symmetric int4)
|
low_bit: 'sym_int4' # default to use 'sym_int4' (i.e. symmetric int4)
|
||||||
|
batch_size: 1 # default to 1
|
||||||
in_out_pairs:
|
in_out_pairs:
|
||||||
- '2048-256'
|
- '2048-256'
|
||||||
test_api:
|
test_api:
|
||||||
|
|
|
||||||
|
|
@ -18,6 +18,7 @@ warm_up: 3
|
||||||
num_trials: 5
|
num_trials: 5
|
||||||
num_beams: 1 # default to greedy search
|
num_beams: 1 # default to greedy search
|
||||||
low_bit: 'sym_int4' # default to use 'sym_int4' (i.e. symmetric int4)
|
low_bit: 'sym_int4' # default to use 'sym_int4' (i.e. symmetric int4)
|
||||||
|
batch_size: 1 # default to 1
|
||||||
in_out_pairs:
|
in_out_pairs:
|
||||||
- '32-32'
|
- '32-32'
|
||||||
test_api:
|
test_api:
|
||||||
|
|
|
||||||
|
|
@ -5,6 +5,7 @@ warm_up: 3
|
||||||
num_trials: 5
|
num_trials: 5
|
||||||
num_beams: 1 # default to greedy search
|
num_beams: 1 # default to greedy search
|
||||||
low_bit: 'sym_int4' # default to use 'sym_int4' (i.e. symmetric int4)
|
low_bit: 'sym_int4' # default to use 'sym_int4' (i.e. symmetric int4)
|
||||||
|
batch_size: 1 # default to 1
|
||||||
in_out_pairs:
|
in_out_pairs:
|
||||||
- '32-32'
|
- '32-32'
|
||||||
test_api:
|
test_api:
|
||||||
|
|
|
||||||
|
|
@ -18,6 +18,7 @@ warm_up: 1
|
||||||
num_trials: 3
|
num_trials: 3
|
||||||
num_beams: 1 # default to greedy search
|
num_beams: 1 # default to greedy search
|
||||||
low_bit: 'sym_int4' # default to use 'sym_int4' (i.e. symmetric int4)
|
low_bit: 'sym_int4' # default to use 'sym_int4' (i.e. symmetric int4)
|
||||||
|
batch_size: 1 # default to 1
|
||||||
in_out_pairs:
|
in_out_pairs:
|
||||||
- '32-512'
|
- '32-512'
|
||||||
test_api:
|
test_api:
|
||||||
|
|
|
||||||
|
|
@ -5,6 +5,7 @@ warm_up: 1
|
||||||
num_trials: 3
|
num_trials: 3
|
||||||
num_beams: 1 # default to greedy search
|
num_beams: 1 # default to greedy search
|
||||||
low_bit: 'sym_int4' # default to use 'sym_int4' (i.e. symmetric int4)
|
low_bit: 'sym_int4' # default to use 'sym_int4' (i.e. symmetric int4)
|
||||||
|
batch_size: 1 # default to 1
|
||||||
in_out_pairs:
|
in_out_pairs:
|
||||||
- '32-512'
|
- '32-512'
|
||||||
test_api:
|
test_api:
|
||||||
|
|
|
||||||
|
|
@ -9,6 +9,7 @@ warm_up: 1
|
||||||
num_trials: 3
|
num_trials: 3
|
||||||
num_beams: 1 # default to greedy search
|
num_beams: 1 # default to greedy search
|
||||||
low_bit: 'fp8' # default to use 'sym_int4' (i.e. symmetric int4)
|
low_bit: 'fp8' # default to use 'sym_int4' (i.e. symmetric int4)
|
||||||
|
batch_size: 1 # default to 1
|
||||||
in_out_pairs:
|
in_out_pairs:
|
||||||
- '32-32'
|
- '32-32'
|
||||||
- '1024-128'
|
- '1024-128'
|
||||||
|
|
|
||||||
|
|
@ -9,6 +9,7 @@ warm_up: 1
|
||||||
num_trials: 3
|
num_trials: 3
|
||||||
num_beams: 1 # default to greedy search
|
num_beams: 1 # default to greedy search
|
||||||
low_bit: 'sym_int4' # default to use 'sym_int4' (i.e. symmetric int4)
|
low_bit: 'sym_int4' # default to use 'sym_int4' (i.e. symmetric int4)
|
||||||
|
batch_size: 1 # default to 1
|
||||||
in_out_pairs:
|
in_out_pairs:
|
||||||
- '32-32'
|
- '32-32'
|
||||||
- '1024-128'
|
- '1024-128'
|
||||||
|
|
|
||||||
|
|
@ -9,6 +9,7 @@ warm_up: 10
|
||||||
num_trials: 100
|
num_trials: 100
|
||||||
num_beams: 1 # default to greedy search
|
num_beams: 1 # default to greedy search
|
||||||
low_bit: 'fp8' # default to use 'sym_int4' (i.e. symmetric int4)
|
low_bit: 'fp8' # default to use 'sym_int4' (i.e. symmetric int4)
|
||||||
|
batch_size: 1 # default to 1
|
||||||
in_out_pairs:
|
in_out_pairs:
|
||||||
- '1024-512'
|
- '1024-512'
|
||||||
- '2048-512'
|
- '2048-512'
|
||||||
|
|
|
||||||
|
|
@ -9,6 +9,7 @@ warm_up: 10
|
||||||
num_trials: 100
|
num_trials: 100
|
||||||
num_beams: 1 # default to greedy search
|
num_beams: 1 # default to greedy search
|
||||||
low_bit: 'sym_int4' # default to use 'sym_int4' (i.e. symmetric int4)
|
low_bit: 'sym_int4' # default to use 'sym_int4' (i.e. symmetric int4)
|
||||||
|
batch_size: 1 # default to 1
|
||||||
in_out_pairs:
|
in_out_pairs:
|
||||||
- '1024-512'
|
- '1024-512'
|
||||||
- '2048-512'
|
- '2048-512'
|
||||||
|
|
|
||||||
|
|
@ -11,6 +11,7 @@ warm_up: 1
|
||||||
num_trials: 3
|
num_trials: 3
|
||||||
num_beams: 1 # default to greedy search
|
num_beams: 1 # default to greedy search
|
||||||
low_bit: 'sym_int4' # default to use 'sym_int4' (i.e. symmetric int4)
|
low_bit: 'sym_int4' # default to use 'sym_int4' (i.e. symmetric int4)
|
||||||
|
batch_size: 1 # default to 1
|
||||||
in_out_pairs:
|
in_out_pairs:
|
||||||
- '32-32'
|
- '32-32'
|
||||||
- '1024-128'
|
- '1024-128'
|
||||||
|
|
|
||||||
|
|
@ -11,6 +11,7 @@ warm_up: 3
|
||||||
num_trials: 50
|
num_trials: 50
|
||||||
num_beams: 1 # default to greedy search
|
num_beams: 1 # default to greedy search
|
||||||
low_bit: 'sym_int4' # default to use 'sym_int4' (i.e. symmetric int4)
|
low_bit: 'sym_int4' # default to use 'sym_int4' (i.e. symmetric int4)
|
||||||
|
batch_size: 1 # default to 1
|
||||||
in_out_pairs:
|
in_out_pairs:
|
||||||
- '1024-512'
|
- '1024-512'
|
||||||
- '2048-512'
|
- '2048-512'
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue