diff --git a/python/llm/src/bigdl/llm/transformers/convert.py b/python/llm/src/bigdl/llm/transformers/convert.py index 6d1c2d5e..5c973c6e 100644 --- a/python/llm/src/bigdl/llm/transformers/convert.py +++ b/python/llm/src/bigdl/llm/transformers/convert.py @@ -299,6 +299,9 @@ def _optimize_pre(model): logger.info("Only HuggingFace Transformers models are currently " "supported for further optimizations") return model + # for rwkv models (verified RWKV/rwkv-4-world-7b) + if model.config.model_type == "rwkv": + model.rwkv._rescale_layers() # process NormHead module in Baichuan2 7B and 13B if model.config.model_type == "baichuan" and model.config.vocab_size == 125696: # NormHead do normalization on the weights just once at inference time. diff --git a/python/llm/test/benchmark/32-igpu-perf-test-434.yaml b/python/llm/test/benchmark/32-igpu-perf-test-434.yaml index 28ff8ba8..d485ece0 100644 --- a/python/llm/test/benchmark/32-igpu-perf-test-434.yaml +++ b/python/llm/test/benchmark/32-igpu-perf-test-434.yaml @@ -1,12 +1,14 @@ repo_id: - 'mistralai/Mistral-7B-Instruct-v0.1' local_model_hub: 'path to your local model hub' -warm_up: 3 -num_trials: 5 +warm_up: 1 +num_trials: 3 num_beams: 1 # default to greedy search low_bit: 'sym_int4' # default to use 'sym_int4' (i.e. symmetric int4) in_out_pairs: - '32-32' + - '32-256' + - '32-512' # - '1024-128' test_api: # - "transformer_int4" diff --git a/python/llm/test/benchmark/32-igpu-perf-test.yaml b/python/llm/test/benchmark/32-igpu-perf-test.yaml index 420d5b56..81ec620c 100644 --- a/python/llm/test/benchmark/32-igpu-perf-test.yaml +++ b/python/llm/test/benchmark/32-igpu-perf-test.yaml @@ -11,13 +11,16 @@ repo_id: - 'tiiuae/falcon-7b-instruct-with-patch' - 'mosaicml/mpt-7b-chat' - 'liuhaotian/llava-v1.5-7b' + - 'RWKV/rwkv-4-world-7b' local_model_hub: 'path to your local model hub' -warm_up: 3 -num_trials: 5 +warm_up: 1 +num_trials: 3 num_beams: 1 # default to greedy search low_bit: 'sym_int4' # default to use 'sym_int4' (i.e. symmetric int4) in_out_pairs: - '32-32' + - '32-256' + - '32-512' # - '1024-128' test_api: # - "transformer_int4" diff --git a/python/llm/test/benchmark/512-igpu-perf-test.yaml b/python/llm/test/benchmark/512-igpu-perf-test.yaml index 0cede39f..91ae96b2 100644 --- a/python/llm/test/benchmark/512-igpu-perf-test.yaml +++ b/python/llm/test/benchmark/512-igpu-perf-test.yaml @@ -11,6 +11,7 @@ repo_id: - 'tiiuae/falcon-7b-instruct-with-patch' - 'mosaicml/mpt-7b-chat' # - 'liuhaotian/llava-v1.5-7b' + - 'RWKV/rwkv-4-world-7b' local_model_hub: 'path to your local model hub' warm_up: 1 num_trials: 3