disable arc ut (#9825)
This commit is contained in:
parent
20e9742fa0
commit
f4eb5da42d
1 changed files with 27 additions and 27 deletions
|
|
@ -186,42 +186,42 @@ class Test_Optimize_Gpu_Model:
|
|||
assert all(max_diff <= lower_bound for max_diff in max_diff_tensor)
|
||||
|
||||
|
||||
def test_falcon_gpu_model(self):
|
||||
# def test_falcon_gpu_model(self):
|
||||
|
||||
Model = AutoModelForCausalLM
|
||||
Tokenizer = AutoTokenizer
|
||||
model_path = os.environ.get('FALCON_7B_ORIGIN_PATH')
|
||||
# currently only compare the output of the last self-attention layer.
|
||||
layer_norm = "transformer.h.31.input_layernorm"
|
||||
self_attn = "transformer.h.31.self_attention"
|
||||
lower_bound = 0
|
||||
# Model = AutoModelForCausalLM
|
||||
# Tokenizer = AutoTokenizer
|
||||
# model_path = os.environ.get('FALCON_7B_ORIGIN_PATH')
|
||||
# # currently only compare the output of the last self-attention layer.
|
||||
# layer_norm = "transformer.h.31.input_layernorm"
|
||||
# self_attn = "transformer.h.31.self_attention"
|
||||
# lower_bound = 0
|
||||
|
||||
self.run_optimize_gpu_model(Model, Tokenizer, model_path, self_attn, layer_norm, lower_bound)
|
||||
# self.run_optimize_gpu_model(Model, Tokenizer, model_path, self_attn, layer_norm, lower_bound)
|
||||
|
||||
|
||||
def test_llama_gpu_model(self):
|
||||
# def test_llama_gpu_model(self):
|
||||
|
||||
Model = AutoModelForCausalLM
|
||||
Tokenizer = AutoTokenizer
|
||||
model_path = os.environ.get('LLAMA2_7B_ORIGIN_PATH')
|
||||
# currently only compare the output of the last self-attention layer.
|
||||
layer_norm = "model.layers.31.input_layernorm"
|
||||
self_attn = "model.layers.31.self_attn"
|
||||
lower_bound = 5e-2
|
||||
# Model = AutoModelForCausalLM
|
||||
# Tokenizer = AutoTokenizer
|
||||
# model_path = os.environ.get('LLAMA2_7B_ORIGIN_PATH')
|
||||
# # currently only compare the output of the last self-attention layer.
|
||||
# layer_norm = "model.layers.31.input_layernorm"
|
||||
# self_attn = "model.layers.31.self_attn"
|
||||
# lower_bound = 5e-2
|
||||
|
||||
self.run_optimize_gpu_model(Model, Tokenizer, model_path, self_attn, layer_norm, lower_bound)
|
||||
# self.run_optimize_gpu_model(Model, Tokenizer, model_path, self_attn, layer_norm, lower_bound)
|
||||
|
||||
def test_chatglm2_gpu_model(self):
|
||||
# def test_chatglm2_gpu_model(self):
|
||||
|
||||
Model = AutoModel
|
||||
Tokenizer = AutoTokenizer
|
||||
model_path = os.environ.get('CHATGLM2_6B_ORIGIN_PATH')
|
||||
# currently only need to compare the output of one self-attention layer.
|
||||
layer_norm = "transformer.encoder.layers.27.input_layernorm"
|
||||
self_attn = "transformer.encoder.layers.27.self_attention"
|
||||
lower_bound = 1e-3
|
||||
# Model = AutoModel
|
||||
# Tokenizer = AutoTokenizer
|
||||
# model_path = os.environ.get('CHATGLM2_6B_ORIGIN_PATH')
|
||||
# # currently only need to compare the output of one self-attention layer.
|
||||
# layer_norm = "transformer.encoder.layers.27.input_layernorm"
|
||||
# self_attn = "transformer.encoder.layers.27.self_attention"
|
||||
# lower_bound = 1e-3
|
||||
|
||||
self.run_optimize_gpu_model(Model, Tokenizer, model_path, self_attn, layer_norm, lower_bound)
|
||||
# self.run_optimize_gpu_model(Model, Tokenizer, model_path, self_attn, layer_norm, lower_bound)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
|
|
|||
Loading…
Reference in a new issue