From c113ecb929a8947a71e2dc398b638cc0c44bbac9 Mon Sep 17 00:00:00 2001
From: Shengsheng Huang <shengsheng.huang@intel.com>
Date: Sun, 25 Jun 2023 17:38:00 +0800
Subject: [PATCH] [LLM] langchain bloom, UT's, default parameters (#8357)

* update langchain default parameters to align w/ api

* add ut's for llm and embeddings

* update inference test script to install langchain deps

* update tests workflows

---------

Co-authored-by: leonardozcm <changmin.zhao@intel.com>
---
 .github/workflows/llm_unit_tests_linux.yml    | 13 ++++
 .../llm/langchain/embeddings/bigdlllm.py      |  8 +-
 .../src/bigdl/llm/langchain/llms/bigdlllm.py  |  4 +-
 python/llm/test/langchain/test_langchain.py   | 75 +++++++++++++++++++
 python/llm/test/run-llm-langchain-tests.sh    | 17 +++++
 5 files changed, 111 insertions(+), 6 deletions(-)
 create mode 100644 python/llm/test/langchain/test_langchain.py
 create mode 100644 python/llm/test/run-llm-langchain-tests.sh

diff --git a/.github/workflows/llm_unit_tests_linux.yml b/.github/workflows/llm_unit_tests_linux.yml
index 35e1b333..82ee6fcc 100644
--- a/.github/workflows/llm_unit_tests_linux.yml
+++ b/.github/workflows/llm_unit_tests_linux.yml
@@ -104,6 +104,19 @@ jobs:
         env:
           ANALYTICS_ZOO_ROOT: ${{ github.workspace }}
 
+      - name: Run LLM langchain test
+        shell: bash
+        run: |
+          source $CONDA_HOME/bin/activate bigdl-init-llm
+          $CONDA_HOME/bin/conda info
+          pip install -U langchain==0.0.184
+          pip install -U chromadb==0.3.25
+          pip install -U typing_extensions==4.5.0
+          bash python/llm/test/run-llm-langchain-tests.sh
+          source $CONDA_HOME/bin/deactivate
+        env:
+          ANALYTICS_ZOO_ROOT: ${{ github.workspace }}
+
       # new test steps should be added here
 
       - name: Run LLM cli test
diff --git a/python/llm/src/bigdl/llm/langchain/embeddings/bigdlllm.py b/python/llm/src/bigdl/llm/langchain/embeddings/bigdlllm.py
index f9c90da3..5049f9c8 100644
--- a/python/llm/src/bigdl/llm/langchain/embeddings/bigdlllm.py
+++ b/python/llm/src/bigdl/llm/langchain/embeddings/bigdlllm.py
@@ -64,7 +64,7 @@ class BigdlLLMEmbeddings(BaseModel, Embeddings):
     """
 
     model_family: str = "llama"
-    """the model family: currently supports llama, gptneox, and bloom."""
+    """the model family"""
 
     family_info = {
         'llama': {'module': "bigdl.llm.models", 'class': "Llama"},
@@ -86,7 +86,7 @@ class BigdlLLMEmbeddings(BaseModel, Embeddings):
     seed: int = Field(-1, alias="seed")
     """Seed. If -1, a random seed is used."""
 
-    f16_kv: bool = Field(False, alias="f16_kv")
+    f16_kv: bool = Field(True, alias="f16_kv")
     """Use half-precision for key/value cache."""
 
     logits_all: bool = Field(False, alias="logits_all")
@@ -101,11 +101,11 @@ class BigdlLLMEmbeddings(BaseModel, Embeddings):
     n_threads: Optional[int] = Field(2, alias="n_threads")
     """Number of threads to use."""
 
-    n_batch: Optional[int] = Field(8, alias="n_batch")
+    n_batch: Optional[int] = Field(512, alias="n_batch")
     """Number of tokens to process in parallel.
     Should be a number between 1 and n_ctx."""
 
-    n_gpu_layers: Optional[int] = Field(None, alias="n_gpu_layers")
+    n_gpu_layers: Optional[int] = Field(0, alias="n_gpu_layers")
     """Number of layers to be loaded into gpu memory. Default None."""
 
     class Config:
diff --git a/python/llm/src/bigdl/llm/langchain/llms/bigdlllm.py b/python/llm/src/bigdl/llm/langchain/llms/bigdlllm.py
index b732311c..ecefde0d 100644
--- a/python/llm/src/bigdl/llm/langchain/llms/bigdlllm.py
+++ b/python/llm/src/bigdl/llm/langchain/llms/bigdlllm.py
@@ -113,11 +113,11 @@ class BigdlLLM(LLM):
     n_threads: Optional[int] = Field(2, alias="n_threads")
     """Number of threads to use."""
 
-    n_batch: Optional[int] = Field(8, alias="n_batch")
+    n_batch: Optional[int] = Field(512, alias="n_batch")
     """Number of tokens to process in parallel.
     Should be a number between 1 and n_ctx."""
 
-    n_gpu_layers: Optional[int] = Field(None, alias="n_gpu_layers")
+    n_gpu_layers: Optional[int] = Field(0, alias="n_gpu_layers")
     """Number of layers to be loaded into gpu memory. Default None."""
 
     suffix: Optional[str] = Field(None)
diff --git a/python/llm/test/langchain/test_langchain.py b/python/llm/test/langchain/test_langchain.py
new file mode 100644
index 00000000..273e3dab
--- /dev/null
+++ b/python/llm/test/langchain/test_langchain.py
@@ -0,0 +1,75 @@
+#
+# Copyright 2016 The BigDL Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from bigdl.llm.utils import get_avx_flags
+from bigdl.llm.langchain.embeddings import BigdlLLMEmbeddings
+from bigdl.llm.langchain.llms import BigdlLLM
+import pytest
+from unittest import TestCase
+import os
+
+
+class Test_Models_Basics(TestCase):
+    def setUp(self):
+        self.llama_model_path = os.environ.get('LLAMA_INT4_CKPT_PATH')
+        self.bloom_model_path = os.environ.get('BLOOM_INT4_CKPT_PATH')
+        self.gptneox_model_path = os.environ.get('GPTNEOX_INT4_CKPT_PATH')
+        
+        
+    def test_langchain_llm_embedding_llama(self):
+        bigdl_embeddings = BigdlLLMEmbeddings(
+            model_path=self.llama_model_path,
+            model_family="llama")
+        text = "This is a test document."
+        query_result = bigdl_embeddings.embed_query(text)
+        doc_result = bigdl_embeddings.embed_documents([text])
+    
+    def test_langchain_llm_embedding_gptneox(self):
+        bigdl_embeddings = BigdlLLMEmbeddings(
+            model_path=self.gptneox_model_path,
+            model_family="gptneox")
+        text = "This is a test document."
+        query_result = bigdl_embeddings.embed_query(text)
+        doc_result = bigdl_embeddings.embed_documents([text])
+        
+    def test_langchain_llm_llama(self):
+        llm = BigdlLLM(
+            model_path=self.llama_model_path, 
+            max_tokens=32,
+            n_threads=22)
+        question = "What is AI?"
+        result = llm(question)
+        
+    def test_langchain_llm_gptneox(self):
+        llm = BigdlLLM(
+            model_path=self.gptneox_model_path,
+            model_family="gptneox", 
+            max_tokens=32,
+            n_threads=22)
+        question = "What is AI?"
+        result = llm(question)
+        
+    def test_langchain_llm_bloom(self):
+        llm = BigdlLLM(
+            model_path=self.bloom_model_path, 
+            model_family="bloom",
+            max_tokens=32,
+            n_threads=22)
+        question = "What is AI?"
+        result = llm(question)
+        
+if __name__ == '__main__':
+    pytest.main([__file__])
\ No newline at end of file
diff --git a/python/llm/test/run-llm-langchain-tests.sh b/python/llm/test/run-llm-langchain-tests.sh
new file mode 100644
index 00000000..9a15f04e
--- /dev/null
+++ b/python/llm/test/run-llm-langchain-tests.sh
@@ -0,0 +1,17 @@
+#!/bin/bash
+
+export ANALYTICS_ZOO_ROOT=${ANALYTICS_ZOO_ROOT}
+export LLM_HOME=${ANALYTICS_ZOO_ROOT}/python/llm/src
+export LLM_INFERENCE_TEST_DIR=${ANALYTICS_ZOO_ROOT}/python/llm/test/langchain
+set -e
+
+echo "# Start testing inference"
+start=$(date "+%s")
+
+python -m pytest -s ${LLM_INFERENCE_TEST_DIR}
+
+now=$(date "+%s")
+time=$((now-start))
+
+echo "Bigdl-llm langchain tests finished"
+echo "Time used:$time seconds"
\ No newline at end of file