diff --git a/python/llm/src/bigdl/llm/ggml/model/chatglm/chatglm.py b/python/llm/src/bigdl/llm/ggml/model/chatglm/chatglm.py
index 794fc2c2..4ee9d10f 100644
--- a/python/llm/src/bigdl/llm/ggml/model/chatglm/chatglm.py
+++ b/python/llm/src/bigdl/llm/ggml/model/chatglm/chatglm.py
@@ -235,6 +235,7 @@ class ChatGLM(GenerationMixin):
 
         text = self.detokenize(output_tokens)
         split_text = text[len(prompt):]
+        split_text.rstrip('�')  # remove partial emoji
         if stop != []:
             for stop_word in stop:
                 split_text = split_text.split(stop_word)[0]
@@ -309,6 +310,11 @@ class ChatGLM(GenerationMixin):
                     print('\n')
                     break
                 text = self.detokenize(output_tokens)
+                if text.endswith('�'):
+                    # generated new token is part of an emoji
+                    # (some emoji consists of multiple tokens)
+                    # continue to generate more tokens to decode this emoji
+                    continue
                 text = text[len(history_text):]
                 history_text += text
                 yield {