chatglm2 correctness test (#9450)

* chatglm2 ut

* some update

* chatglm2 path

* fix

* add print
This commit is contained in:
Xin Qiu 2023-11-15 15:44:56 +08:00 committed by GitHub
parent 24146d108f
commit 170e0072af

View file

@ -127,9 +127,16 @@ class Test_Optimize_Gpu_Model:
else:
# 'past_key_value'is of type tuple as default.
for i, (t3, t4) in enumerate(zip(t1, t2)):
if model.config.architectures[0] == "ChatGLMModel" and \
hasattr(model.config, 'padded_vocab_size') and \
model.config.padded_vocab_size == 65024:
# chatglm2's past_key_value is expanded 16x for some speedup.
# We need to narrow it here.
t4 = t4[:, :, 15:17, :]
attn_output_diff.append(t3 - t4)
max_diff_tensor = [torch.max(item).item() for item in attn_output_diff]
print(max_diff_tensor)
assert all(max_diff <= lower_bound for max_diff in max_diff_tensor)
@ -158,6 +165,18 @@ class Test_Optimize_Gpu_Model:
self.run_optimize_gpu_model(Model, Tokenizer, model_path, self_attn, layer_norm, lower_bound)
def test_chatglm2_gpu_model(self):
Model = AutoModel
Tokenizer = AutoTokenizer
model_path = os.environ.get('CHATGLM2_6B_ORIGIN_PATH')
# currently only need to compare the output of one self-attention layer.
layer_norm = "transformer.encoder.layers.27.input_layernorm"
self_attn = "transformer.encoder.layers.27.self_attention"
lower_bound = 5e-5
self.run_optimize_gpu_model(Model, Tokenizer, model_path, self_attn, layer_norm, lower_bound)
if __name__ == '__main__':
pytest.main([__file__])