From ed81baa35e1cd4c69e5afbcf8ffcd5f2bf2de642 Mon Sep 17 00:00:00 2001
From: Mingyu Wei <76120304+Mingyu-Wei@users.noreply.github.com>
Date: Mon, 8 Jan 2024 16:50:55 +0800
Subject: [PATCH] LLM: Use default typing-extension in LangChain examples
 (#9857)

* remove typing extension downgrade in readme; minor fixes of code

* fix typos in README

* change default question of docqa.py
---
 python/llm/example/CPU/LangChain/README.md    |  7 ++-----
 .../LangChain/native_int4/voiceassistant.py   |  1 +
 .../CPU/LangChain/transformers_int4/docqa.py  | 21 +++++++++++++++----
 3 files changed, 20 insertions(+), 9 deletions(-)

diff --git a/python/llm/example/CPU/LangChain/README.md b/python/llm/example/CPU/LangChain/README.md
index 192eb3d3..f5dc1d90 100644
--- a/python/llm/example/CPU/LangChain/README.md
+++ b/python/llm/example/CPU/LangChain/README.md
@@ -1,6 +1,6 @@
 # Langchain examples
 
-The examples here shows how to use langchain with `bigdl-llm`.
+The examples here show how to use langchain with `bigdl-llm`.
 
 ## Install bigdl-llm
 Follow the instructions in [Install](https://github.com/intel-analytics/BigDL/tree/main/python/llm#install).
@@ -11,11 +11,8 @@ Follow the instructions in [Install](https://github.com/intel-analytics/BigDL/tr
 pip install langchain==0.0.184
 pip install -U chromadb==0.3.25
 pip install -U pandas==2.0.3
-pip install -U typing_extensions==4.5.0
 ```
 
-Note that typing_extensions==4.5.0 is required, or you may encounter error `TypeError: dataclass_transform() got an unexpected keyword argument 'field_specifiers'` when running the examples. 
-
 
 ## Convert Models using bigdl-llm
 Follow the instructions in [Convert model](https://github.com/intel-analytics/BigDL/tree/main/python/llm#convert-model).
@@ -71,7 +68,7 @@ arguments info:
 When you see output says
 > listening now...
 
-Please say something through your microphone (e.g. What is AI). The programe will automatically detect when you have completed your speech and recogize them.
+Please say something through your microphone (e.g. What is AI). The program will automatically detect when you have completed your speech and recognize them.
 
 #### Known Issues
 The speech_recognition library may occasionally skip recording due to low volume. An alternative option is to save the recording in WAV format using `PyAudio` and read the file as an input. Here is an example using PyAudio:
diff --git a/python/llm/example/CPU/LangChain/native_int4/voiceassistant.py b/python/llm/example/CPU/LangChain/native_int4/voiceassistant.py
index d2dcd80e..80718c50 100644
--- a/python/llm/example/CPU/LangChain/native_int4/voiceassistant.py
+++ b/python/llm/example/CPU/LangChain/native_int4/voiceassistant.py
@@ -35,6 +35,7 @@ import argparse
 def prepare_chain(args):
 
     model_path = args.model_path
+    model_family = args.model_family
     n_threads = args.thread_num
     n_ctx = args.context_size
 
diff --git a/python/llm/example/CPU/LangChain/transformers_int4/docqa.py b/python/llm/example/CPU/LangChain/transformers_int4/docqa.py
index 93aa0675..d936bc1e 100644
--- a/python/llm/example/CPU/LangChain/transformers_int4/docqa.py
+++ b/python/llm/example/CPU/LangChain/transformers_int4/docqa.py
@@ -33,7 +33,16 @@ from langchain.callbacks.manager import CallbackManager
 from bigdl.llm.langchain.llms import TransformersLLM
 from bigdl.llm.langchain.embeddings import TransformersEmbeddings
 
-
+text_doc = '''
+BigDL seamlessly scales your data analytics & AI applications from laptop to cloud, with the following libraries:
+LLM: Low-bit (INT3/INT4/INT5/INT8) large language model library for Intel CPU/GPU
+Orca: Distributed Big Data & AI (TF & PyTorch) Pipeline on Spark and Ray
+Nano: Transparent Acceleration of Tensorflow & PyTorch Programs on Intel CPU/GPU
+DLlib: “Equivalent of Spark MLlib” for Deep Learning
+Chronos: Scalable Time Series Analysis using AutoML
+Friesian: End-to-End Recommendation Systems
+PPML: Secure Big Data and AI (with SGX Hardware Security)
+'''
 
 def main(args):
 
@@ -42,8 +51,12 @@ def main(args):
     query = args.question
 
     # split texts of input doc
-    with open(input_path) as f:
-        input_doc = f.read()
+    if input_path is None:
+        input_doc = text_doc
+    else:
+        with open(input_path) as f:
+            input_doc = f.read()
+            
     text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
     texts = text_splitter.split_text(input_doc)
 
@@ -73,7 +86,7 @@ if __name__ == '__main__':
                         help='the path to transformers model')
     parser.add_argument('-i', '--input-path', type=str,
                         help='the path to the input doc.')
-    parser.add_argument('-q', '--question', type=str, default='What is AI?',
+    parser.add_argument('-q', '--question', type=str, default='What is BigDL?',
                         help='qustion you want to ask.')
     args = parser.parse_args()