[LLM] fix chatglm native int4 emoji output (#8739)
This commit is contained in:
parent
77efcf7b1d
commit
f91035c298
1 changed files with 6 additions and 0 deletions
|
|
@ -235,6 +235,7 @@ class ChatGLM(GenerationMixin):
|
||||||
|
|
||||||
text = self.detokenize(output_tokens)
|
text = self.detokenize(output_tokens)
|
||||||
split_text = text[len(prompt):]
|
split_text = text[len(prompt):]
|
||||||
|
split_text.rstrip('<EFBFBD>') # remove partial emoji
|
||||||
if stop != []:
|
if stop != []:
|
||||||
for stop_word in stop:
|
for stop_word in stop:
|
||||||
split_text = split_text.split(stop_word)[0]
|
split_text = split_text.split(stop_word)[0]
|
||||||
|
|
@ -309,6 +310,11 @@ class ChatGLM(GenerationMixin):
|
||||||
print('\n')
|
print('\n')
|
||||||
break
|
break
|
||||||
text = self.detokenize(output_tokens)
|
text = self.detokenize(output_tokens)
|
||||||
|
if text.endswith('<EFBFBD>'):
|
||||||
|
# generated new token is part of an emoji
|
||||||
|
# (some emoji consists of multiple tokens)
|
||||||
|
# continue to generate more tokens to decode this emoji
|
||||||
|
continue
|
||||||
text = text[len(history_text):]
|
text = text[len(history_text):]
|
||||||
history_text += text
|
history_text += text
|
||||||
yield {
|
yield {
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue