diff --git a/python/llm/test/inference_gpu/test_transformers_api_RMSNorm.py b/python/llm/test/inference_gpu/test_transformers_api_RMSNorm.py index 34e6c2b6..6876ca75 100644 --- a/python/llm/test/inference_gpu/test_transformers_api_RMSNorm.py +++ b/python/llm/test/inference_gpu/test_transformers_api_RMSNorm.py @@ -109,7 +109,7 @@ class Test_Optimize_Gpu_Model: del opt_model gc.collect() assert all(max_diff <= lower_bound for max_diff in max_diff_tensor) - + @pytest.mark.parametrize('Name, Model, Tokenizer, model_path',TEST_MODEL_LIST) def test_dynamic_functions(self, Name, Model, Tokenizer, model_path): if Name == "Llama2-7B": @@ -128,19 +128,19 @@ class Test_Optimize_Gpu_Model: RMSNorm_layer = "model.layers.31.input_layernorm" lower_bound = 2e-6 self.run_optimize_gpu_model(Name, Model, Tokenizer, model_path, RMSNorm_layer, layer_before_RMSNorm, lower_bound) - + def Chatglm2_gpu_model(self, Name, Model, Tokenizer, model_path): layer_before_RMSNorm = "transformer.encoder.layers.26" RMSNorm_layer = "transformer.encoder.layers.27.input_layernorm" - lower_bound = 6e-6 + lower_bound = 4e-6 self.run_optimize_gpu_model(Name, Model, Tokenizer, model_path, RMSNorm_layer, layer_before_RMSNorm, lower_bound) def Mistral_gpu_model(self, Name, Model, Tokenizer, model_path): layer_before_RMSNorm = "model.layers.30" RMSNorm_layer = "model.layers.31.input_layernorm" - lower_bound = 6e-6 + lower_bound = 8e-6 self.run_optimize_gpu_model(Name, Model, Tokenizer, model_path, RMSNorm_layer, layer_before_RMSNorm, lower_bound) - + def Baichuan_gpu_model(self, Name, Model, Tokenizer, model_path): layer_before_RMSNorm = "model.layers.30" RMSNorm_layer = "model.layers.31.input_layernorm" @@ -150,5 +150,5 @@ class Test_Optimize_Gpu_Model: def Qwen_gpu_model(self, Name, Model, Tokenizer, model_path): layer_before_RMSNorm = "transformer.h.30" RMSNorm_layer = "transformer.h.31.ln_1" - lower_bound = 2e-6 - self.run_optimize_gpu_model(Name, Model, Tokenizer, model_path, RMSNorm_layer, layer_before_RMSNorm, lower_bound) \ No newline at end of file + lower_bound = 4e-6 + self.run_optimize_gpu_model(Name, Model, Tokenizer, model_path, RMSNorm_layer, layer_before_RMSNorm, lower_bound)