This commit is contained in:
Yishuo Wang 2024-09-04 18:02:49 +08:00 committed by GitHub
parent 77cb348220
commit b1408a1f1c
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -151,7 +151,7 @@ class Test_Optimize_Gpu_Model:
# currently only compare the output of the last self-attention layer. # currently only compare the output of the last self-attention layer.
layer_norm = "model.layers.31.input_layernorm" layer_norm = "model.layers.31.input_layernorm"
self_attn = "model.layers.31.self_attn" self_attn = "model.layers.31.self_attn"
lower_bound = 8e-3 lower_bound = 2e-2
self.run_optimize_gpu_model(Name, Model, Tokenizer, model_path, self_attn, layer_norm, lower_bound) self.run_optimize_gpu_model(Name, Model, Tokenizer, model_path, self_attn, layer_norm, lower_bound)
def Falcon_7B_gpu_model(self, Name, Model, Tokenizer, model_path): def Falcon_7B_gpu_model(self, Name, Model, Tokenizer, model_path):
@ -165,7 +165,7 @@ class Test_Optimize_Gpu_Model:
# currently only need to compare the output of one self-attention layer. # currently only need to compare the output of one self-attention layer.
layer_norm = "transformer.encoder.layers.27.input_layernorm" layer_norm = "transformer.encoder.layers.27.input_layernorm"
self_attn = "transformer.encoder.layers.27.self_attention" self_attn = "transformer.encoder.layers.27.self_attention"
lower_bound = 4e-2 lower_bound = 1e-1
self.run_optimize_gpu_model(Name, Model, Tokenizer, model_path, self_attn, layer_norm, lower_bound) self.run_optimize_gpu_model(Name, Model, Tokenizer, model_path, self_attn, layer_norm, lower_bound)
def Mistral_gpu_model(self, Name, Model, Tokenizer, model_path): def Mistral_gpu_model(self, Name, Model, Tokenizer, model_path):
@ -182,7 +182,7 @@ class Test_Optimize_Gpu_Model:
# currently only need to compare the output of one self-attention layer. # currently only need to compare the output of one self-attention layer.
layer_norm = "model.layers.31.input_layernorm" layer_norm = "model.layers.31.input_layernorm"
self_attn = "model.layers.31.self_attn" self_attn = "model.layers.31.self_attn"
lower_bound = 8e-3 lower_bound = 2e-2
self.run_optimize_gpu_model(Name, Model, Tokenizer, model_path, self_attn, layer_norm, lower_bound) self.run_optimize_gpu_model(Name, Model, Tokenizer, model_path, self_attn, layer_norm, lower_bound)
def Qwen_gpu_model(self, Name, Model, Tokenizer, model_path): def Qwen_gpu_model(self, Name, Model, Tokenizer, model_path):