disable arc ut (#9825)
This commit is contained in:
parent
20e9742fa0
commit
f4eb5da42d
1 changed files with 27 additions and 27 deletions
|
|
@ -186,42 +186,42 @@ class Test_Optimize_Gpu_Model:
|
||||||
assert all(max_diff <= lower_bound for max_diff in max_diff_tensor)
|
assert all(max_diff <= lower_bound for max_diff in max_diff_tensor)
|
||||||
|
|
||||||
|
|
||||||
def test_falcon_gpu_model(self):
|
# def test_falcon_gpu_model(self):
|
||||||
|
|
||||||
Model = AutoModelForCausalLM
|
# Model = AutoModelForCausalLM
|
||||||
Tokenizer = AutoTokenizer
|
# Tokenizer = AutoTokenizer
|
||||||
model_path = os.environ.get('FALCON_7B_ORIGIN_PATH')
|
# model_path = os.environ.get('FALCON_7B_ORIGIN_PATH')
|
||||||
# currently only compare the output of the last self-attention layer.
|
# # currently only compare the output of the last self-attention layer.
|
||||||
layer_norm = "transformer.h.31.input_layernorm"
|
# layer_norm = "transformer.h.31.input_layernorm"
|
||||||
self_attn = "transformer.h.31.self_attention"
|
# self_attn = "transformer.h.31.self_attention"
|
||||||
lower_bound = 0
|
# lower_bound = 0
|
||||||
|
|
||||||
self.run_optimize_gpu_model(Model, Tokenizer, model_path, self_attn, layer_norm, lower_bound)
|
# self.run_optimize_gpu_model(Model, Tokenizer, model_path, self_attn, layer_norm, lower_bound)
|
||||||
|
|
||||||
|
|
||||||
def test_llama_gpu_model(self):
|
# def test_llama_gpu_model(self):
|
||||||
|
|
||||||
Model = AutoModelForCausalLM
|
# Model = AutoModelForCausalLM
|
||||||
Tokenizer = AutoTokenizer
|
# Tokenizer = AutoTokenizer
|
||||||
model_path = os.environ.get('LLAMA2_7B_ORIGIN_PATH')
|
# model_path = os.environ.get('LLAMA2_7B_ORIGIN_PATH')
|
||||||
# currently only compare the output of the last self-attention layer.
|
# # currently only compare the output of the last self-attention layer.
|
||||||
layer_norm = "model.layers.31.input_layernorm"
|
# layer_norm = "model.layers.31.input_layernorm"
|
||||||
self_attn = "model.layers.31.self_attn"
|
# self_attn = "model.layers.31.self_attn"
|
||||||
lower_bound = 5e-2
|
# lower_bound = 5e-2
|
||||||
|
|
||||||
self.run_optimize_gpu_model(Model, Tokenizer, model_path, self_attn, layer_norm, lower_bound)
|
# self.run_optimize_gpu_model(Model, Tokenizer, model_path, self_attn, layer_norm, lower_bound)
|
||||||
|
|
||||||
def test_chatglm2_gpu_model(self):
|
# def test_chatglm2_gpu_model(self):
|
||||||
|
|
||||||
Model = AutoModel
|
# Model = AutoModel
|
||||||
Tokenizer = AutoTokenizer
|
# Tokenizer = AutoTokenizer
|
||||||
model_path = os.environ.get('CHATGLM2_6B_ORIGIN_PATH')
|
# model_path = os.environ.get('CHATGLM2_6B_ORIGIN_PATH')
|
||||||
# currently only need to compare the output of one self-attention layer.
|
# # currently only need to compare the output of one self-attention layer.
|
||||||
layer_norm = "transformer.encoder.layers.27.input_layernorm"
|
# layer_norm = "transformer.encoder.layers.27.input_layernorm"
|
||||||
self_attn = "transformer.encoder.layers.27.self_attention"
|
# self_attn = "transformer.encoder.layers.27.self_attention"
|
||||||
lower_bound = 1e-3
|
# lower_bound = 1e-3
|
||||||
|
|
||||||
self.run_optimize_gpu_model(Model, Tokenizer, model_path, self_attn, layer_norm, lower_bound)
|
# self.run_optimize_gpu_model(Model, Tokenizer, model_path, self_attn, layer_norm, lower_bound)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue