From bfc1e2d7334b83b86b16352df3bfaac1a375501f Mon Sep 17 00:00:00 2001 From: SONG Ge <38711238+sgwhat@users.noreply.github.com> Date: Tue, 24 Oct 2023 14:40:58 +0800 Subject: [PATCH] add fused rms optimization for chatglm model (#9256) --- python/llm/src/bigdl/llm/transformers/convert.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/python/llm/src/bigdl/llm/transformers/convert.py b/python/llm/src/bigdl/llm/transformers/convert.py index c1902d35..5652b02f 100644 --- a/python/llm/src/bigdl/llm/transformers/convert.py +++ b/python/llm/src/bigdl/llm/transformers/convert.py @@ -233,6 +233,9 @@ def _optimize_post(model): convert_forward(model, module.CoreAttention, core_attn_forward_8eb45c) + convert_forward(model, + module.RMSNorm, + llama_rms_norm_forward) elif hasattr(model.config, 'vocab_size') and model.config.vocab_size == 130528: # chatglm-6b modeling_module_name = model.__class__.__module__