From ed81baa35e1cd4c69e5afbcf8ffcd5f2bf2de642 Mon Sep 17 00:00:00 2001 From: Mingyu Wei <76120304+Mingyu-Wei@users.noreply.github.com> Date: Mon, 8 Jan 2024 16:50:55 +0800 Subject: [PATCH] LLM: Use default typing-extension in LangChain examples (#9857) * remove typing extension downgrade in readme; minor fixes of code * fix typos in README * change default question of docqa.py --- python/llm/example/CPU/LangChain/README.md | 7 ++----- .../LangChain/native_int4/voiceassistant.py | 1 + .../CPU/LangChain/transformers_int4/docqa.py | 21 +++++++++++++++---- 3 files changed, 20 insertions(+), 9 deletions(-) diff --git a/python/llm/example/CPU/LangChain/README.md b/python/llm/example/CPU/LangChain/README.md index 192eb3d3..f5dc1d90 100644 --- a/python/llm/example/CPU/LangChain/README.md +++ b/python/llm/example/CPU/LangChain/README.md @@ -1,6 +1,6 @@ # Langchain examples -The examples here shows how to use langchain with `bigdl-llm`. +The examples here show how to use langchain with `bigdl-llm`. ## Install bigdl-llm Follow the instructions in [Install](https://github.com/intel-analytics/BigDL/tree/main/python/llm#install). @@ -11,11 +11,8 @@ Follow the instructions in [Install](https://github.com/intel-analytics/BigDL/tr pip install langchain==0.0.184 pip install -U chromadb==0.3.25 pip install -U pandas==2.0.3 -pip install -U typing_extensions==4.5.0 ``` -Note that typing_extensions==4.5.0 is required, or you may encounter error `TypeError: dataclass_transform() got an unexpected keyword argument 'field_specifiers'` when running the examples. - ## Convert Models using bigdl-llm Follow the instructions in [Convert model](https://github.com/intel-analytics/BigDL/tree/main/python/llm#convert-model). @@ -71,7 +68,7 @@ arguments info: When you see output says > listening now... -Please say something through your microphone (e.g. What is AI). The programe will automatically detect when you have completed your speech and recogize them. +Please say something through your microphone (e.g. What is AI). The program will automatically detect when you have completed your speech and recognize them. #### Known Issues The speech_recognition library may occasionally skip recording due to low volume. An alternative option is to save the recording in WAV format using `PyAudio` and read the file as an input. Here is an example using PyAudio: diff --git a/python/llm/example/CPU/LangChain/native_int4/voiceassistant.py b/python/llm/example/CPU/LangChain/native_int4/voiceassistant.py index d2dcd80e..80718c50 100644 --- a/python/llm/example/CPU/LangChain/native_int4/voiceassistant.py +++ b/python/llm/example/CPU/LangChain/native_int4/voiceassistant.py @@ -35,6 +35,7 @@ import argparse def prepare_chain(args): model_path = args.model_path + model_family = args.model_family n_threads = args.thread_num n_ctx = args.context_size diff --git a/python/llm/example/CPU/LangChain/transformers_int4/docqa.py b/python/llm/example/CPU/LangChain/transformers_int4/docqa.py index 93aa0675..d936bc1e 100644 --- a/python/llm/example/CPU/LangChain/transformers_int4/docqa.py +++ b/python/llm/example/CPU/LangChain/transformers_int4/docqa.py @@ -33,7 +33,16 @@ from langchain.callbacks.manager import CallbackManager from bigdl.llm.langchain.llms import TransformersLLM from bigdl.llm.langchain.embeddings import TransformersEmbeddings - +text_doc = ''' +BigDL seamlessly scales your data analytics & AI applications from laptop to cloud, with the following libraries: +LLM: Low-bit (INT3/INT4/INT5/INT8) large language model library for Intel CPU/GPU +Orca: Distributed Big Data & AI (TF & PyTorch) Pipeline on Spark and Ray +Nano: Transparent Acceleration of Tensorflow & PyTorch Programs on Intel CPU/GPU +DLlib: “Equivalent of Spark MLlib” for Deep Learning +Chronos: Scalable Time Series Analysis using AutoML +Friesian: End-to-End Recommendation Systems +PPML: Secure Big Data and AI (with SGX Hardware Security) +''' def main(args): @@ -42,8 +51,12 @@ def main(args): query = args.question # split texts of input doc - with open(input_path) as f: - input_doc = f.read() + if input_path is None: + input_doc = text_doc + else: + with open(input_path) as f: + input_doc = f.read() + text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0) texts = text_splitter.split_text(input_doc) @@ -73,7 +86,7 @@ if __name__ == '__main__': help='the path to transformers model') parser.add_argument('-i', '--input-path', type=str, help='the path to the input doc.') - parser.add_argument('-q', '--question', type=str, default='What is AI?', + parser.add_argument('-q', '--question', type=str, default='What is BigDL?', help='qustion you want to ask.') args = parser.parse_args()