chatglm2 correctness test (#9450)
* chatglm2 ut * some update * chatglm2 path * fix * add print
This commit is contained in:
parent
24146d108f
commit
170e0072af
1 changed files with 19 additions and 0 deletions
|
|
@ -127,9 +127,16 @@ class Test_Optimize_Gpu_Model:
|
|||
else:
|
||||
# 'past_key_value'is of type tuple as default.
|
||||
for i, (t3, t4) in enumerate(zip(t1, t2)):
|
||||
if model.config.architectures[0] == "ChatGLMModel" and \
|
||||
hasattr(model.config, 'padded_vocab_size') and \
|
||||
model.config.padded_vocab_size == 65024:
|
||||
# chatglm2's past_key_value is expanded 16x for some speedup.
|
||||
# We need to narrow it here.
|
||||
t4 = t4[:, :, 15:17, :]
|
||||
attn_output_diff.append(t3 - t4)
|
||||
|
||||
max_diff_tensor = [torch.max(item).item() for item in attn_output_diff]
|
||||
print(max_diff_tensor)
|
||||
assert all(max_diff <= lower_bound for max_diff in max_diff_tensor)
|
||||
|
||||
|
||||
|
|
@ -158,6 +165,18 @@ class Test_Optimize_Gpu_Model:
|
|||
|
||||
self.run_optimize_gpu_model(Model, Tokenizer, model_path, self_attn, layer_norm, lower_bound)
|
||||
|
||||
def test_chatglm2_gpu_model(self):
|
||||
|
||||
Model = AutoModel
|
||||
Tokenizer = AutoTokenizer
|
||||
model_path = os.environ.get('CHATGLM2_6B_ORIGIN_PATH')
|
||||
# currently only need to compare the output of one self-attention layer.
|
||||
layer_norm = "transformer.encoder.layers.27.input_layernorm"
|
||||
self_attn = "transformer.encoder.layers.27.self_attention"
|
||||
lower_bound = 5e-5
|
||||
|
||||
self.run_optimize_gpu_model(Model, Tokenizer, model_path, self_attn, layer_norm, lower_bound)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
pytest.main([__file__])
|
||||
|
|
|
|||
Loading…
Reference in a new issue