From f91035c2982489577820a5bdba969dc5ba1dd2fb Mon Sep 17 00:00:00 2001
From: Yishuo Wang <yishuo.wang@intel.com>
Date: Fri, 11 Aug 2023 15:38:41 +0800
Subject: [PATCH] [LLM] fix chatglm native int4 emoji output (#8739)

---
 python/llm/src/bigdl/llm/ggml/model/chatglm/chatglm.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/python/llm/src/bigdl/llm/ggml/model/chatglm/chatglm.py b/python/llm/src/bigdl/llm/ggml/model/chatglm/chatglm.py
index 794fc2c2..4ee9d10f 100644
--- a/python/llm/src/bigdl/llm/ggml/model/chatglm/chatglm.py
+++ b/python/llm/src/bigdl/llm/ggml/model/chatglm/chatglm.py
@@ -235,6 +235,7 @@ class ChatGLM(GenerationMixin):
 
         text = self.detokenize(output_tokens)
         split_text = text[len(prompt):]
+        split_text.rstrip('�')  # remove partial emoji
         if stop != []:
             for stop_word in stop:
                 split_text = split_text.split(stop_word)[0]
@@ -309,6 +310,11 @@ class ChatGLM(GenerationMixin):
                     print('\n')
                     break
                 text = self.detokenize(output_tokens)
+                if text.endswith('�'):
+                    # generated new token is part of an emoji
+                    # (some emoji consists of multiple tokens)
+                    # continue to generate more tokens to decode this emoji
+                    continue
                 text = text[len(history_text):]
                 history_text += text
                 yield {