diff --git a/python/llm/portable-zip/chat.py b/python/llm/portable-zip/chat.py
index 5e463cf8..ee90ac0f 100644
--- a/python/llm/portable-zip/chat.py
+++ b/python/llm/portable-zip/chat.py
@@ -133,11 +133,12 @@ def stream_chat(model, tokenizer, kv_cache=None, max_gen_len=512, stop_words=[])
         )
 
 @torch.no_grad()
-def chatglm2_stream_chat(model, tokenizer):
+def chatglm3_stream_chat(model, tokenizer):
     chat_history = []
     past_key_values = None
     current_length = 0
-    stopping_criteria = StoppingCriteriaList([StopSequenceCriteria(HUMAN_ID, tokenizer)])
+    # https://github.com/THUDM/ChatGLM3/issues/274#issuecomment-1810160305
+    stopping_criteria = StoppingCriteriaList([StopSequenceCriteria(["<|user|>", "<|observation|>"], tokenizer)])
     max_past_length = 2048
 
     while True:
@@ -146,7 +147,14 @@ def chatglm2_stream_chat(model, tokenizer):
         if user_input == "stop":
             break
         print(Fore.BLUE+"BigDL-LLM: "+Fore.RESET, end="")
-        prompt = f"问：{user_input}\n答："
+        # https://github.com/THUDM/ChatGLM3/blob/main/PROMPT_en.md
+        prompt = f"""
+            <|system|>
+            You are an intelligent AI assistant, named ChatGLM3. Follow the user's instructions carefully.
+            <|user|>
+            {user_input}
+            <|assistant|>
+        """
         for response, chat_history, past_key_values in model.stream_chat(tokenizer, prompt,
                                                                          history=chat_history,
                                                                          stopping_criteria=stopping_criteria,