From c113ecb929a8947a71e2dc398b638cc0c44bbac9 Mon Sep 17 00:00:00 2001 From: Shengsheng Huang Date: Sun, 25 Jun 2023 17:38:00 +0800 Subject: [PATCH] [LLM] langchain bloom, UT's, default parameters (#8357) * update langchain default parameters to align w/ api * add ut's for llm and embeddings * update inference test script to install langchain deps * update tests workflows --------- Co-authored-by: leonardozcm --- .github/workflows/llm_unit_tests_linux.yml | 13 ++++ .../llm/langchain/embeddings/bigdlllm.py | 8 +- .../src/bigdl/llm/langchain/llms/bigdlllm.py | 4 +- python/llm/test/langchain/test_langchain.py | 75 +++++++++++++++++++ python/llm/test/run-llm-langchain-tests.sh | 17 +++++ 5 files changed, 111 insertions(+), 6 deletions(-) create mode 100644 python/llm/test/langchain/test_langchain.py create mode 100644 python/llm/test/run-llm-langchain-tests.sh diff --git a/.github/workflows/llm_unit_tests_linux.yml b/.github/workflows/llm_unit_tests_linux.yml index 35e1b333..82ee6fcc 100644 --- a/.github/workflows/llm_unit_tests_linux.yml +++ b/.github/workflows/llm_unit_tests_linux.yml @@ -104,6 +104,19 @@ jobs: env: ANALYTICS_ZOO_ROOT: ${{ github.workspace }} + - name: Run LLM langchain test + shell: bash + run: | + source $CONDA_HOME/bin/activate bigdl-init-llm + $CONDA_HOME/bin/conda info + pip install -U langchain==0.0.184 + pip install -U chromadb==0.3.25 + pip install -U typing_extensions==4.5.0 + bash python/llm/test/run-llm-langchain-tests.sh + source $CONDA_HOME/bin/deactivate + env: + ANALYTICS_ZOO_ROOT: ${{ github.workspace }} + # new test steps should be added here - name: Run LLM cli test diff --git a/python/llm/src/bigdl/llm/langchain/embeddings/bigdlllm.py b/python/llm/src/bigdl/llm/langchain/embeddings/bigdlllm.py index f9c90da3..5049f9c8 100644 --- a/python/llm/src/bigdl/llm/langchain/embeddings/bigdlllm.py +++ b/python/llm/src/bigdl/llm/langchain/embeddings/bigdlllm.py @@ -64,7 +64,7 @@ class BigdlLLMEmbeddings(BaseModel, Embeddings): """ model_family: str = "llama" - """the model family: currently supports llama, gptneox, and bloom.""" + """the model family""" family_info = { 'llama': {'module': "bigdl.llm.models", 'class': "Llama"}, @@ -86,7 +86,7 @@ class BigdlLLMEmbeddings(BaseModel, Embeddings): seed: int = Field(-1, alias="seed") """Seed. If -1, a random seed is used.""" - f16_kv: bool = Field(False, alias="f16_kv") + f16_kv: bool = Field(True, alias="f16_kv") """Use half-precision for key/value cache.""" logits_all: bool = Field(False, alias="logits_all") @@ -101,11 +101,11 @@ class BigdlLLMEmbeddings(BaseModel, Embeddings): n_threads: Optional[int] = Field(2, alias="n_threads") """Number of threads to use.""" - n_batch: Optional[int] = Field(8, alias="n_batch") + n_batch: Optional[int] = Field(512, alias="n_batch") """Number of tokens to process in parallel. Should be a number between 1 and n_ctx.""" - n_gpu_layers: Optional[int] = Field(None, alias="n_gpu_layers") + n_gpu_layers: Optional[int] = Field(0, alias="n_gpu_layers") """Number of layers to be loaded into gpu memory. Default None.""" class Config: diff --git a/python/llm/src/bigdl/llm/langchain/llms/bigdlllm.py b/python/llm/src/bigdl/llm/langchain/llms/bigdlllm.py index b732311c..ecefde0d 100644 --- a/python/llm/src/bigdl/llm/langchain/llms/bigdlllm.py +++ b/python/llm/src/bigdl/llm/langchain/llms/bigdlllm.py @@ -113,11 +113,11 @@ class BigdlLLM(LLM): n_threads: Optional[int] = Field(2, alias="n_threads") """Number of threads to use.""" - n_batch: Optional[int] = Field(8, alias="n_batch") + n_batch: Optional[int] = Field(512, alias="n_batch") """Number of tokens to process in parallel. Should be a number between 1 and n_ctx.""" - n_gpu_layers: Optional[int] = Field(None, alias="n_gpu_layers") + n_gpu_layers: Optional[int] = Field(0, alias="n_gpu_layers") """Number of layers to be loaded into gpu memory. Default None.""" suffix: Optional[str] = Field(None) diff --git a/python/llm/test/langchain/test_langchain.py b/python/llm/test/langchain/test_langchain.py new file mode 100644 index 00000000..273e3dab --- /dev/null +++ b/python/llm/test/langchain/test_langchain.py @@ -0,0 +1,75 @@ +# +# Copyright 2016 The BigDL Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from bigdl.llm.utils import get_avx_flags +from bigdl.llm.langchain.embeddings import BigdlLLMEmbeddings +from bigdl.llm.langchain.llms import BigdlLLM +import pytest +from unittest import TestCase +import os + + +class Test_Models_Basics(TestCase): + def setUp(self): + self.llama_model_path = os.environ.get('LLAMA_INT4_CKPT_PATH') + self.bloom_model_path = os.environ.get('BLOOM_INT4_CKPT_PATH') + self.gptneox_model_path = os.environ.get('GPTNEOX_INT4_CKPT_PATH') + + + def test_langchain_llm_embedding_llama(self): + bigdl_embeddings = BigdlLLMEmbeddings( + model_path=self.llama_model_path, + model_family="llama") + text = "This is a test document." + query_result = bigdl_embeddings.embed_query(text) + doc_result = bigdl_embeddings.embed_documents([text]) + + def test_langchain_llm_embedding_gptneox(self): + bigdl_embeddings = BigdlLLMEmbeddings( + model_path=self.gptneox_model_path, + model_family="gptneox") + text = "This is a test document." + query_result = bigdl_embeddings.embed_query(text) + doc_result = bigdl_embeddings.embed_documents([text]) + + def test_langchain_llm_llama(self): + llm = BigdlLLM( + model_path=self.llama_model_path, + max_tokens=32, + n_threads=22) + question = "What is AI?" + result = llm(question) + + def test_langchain_llm_gptneox(self): + llm = BigdlLLM( + model_path=self.gptneox_model_path, + model_family="gptneox", + max_tokens=32, + n_threads=22) + question = "What is AI?" + result = llm(question) + + def test_langchain_llm_bloom(self): + llm = BigdlLLM( + model_path=self.bloom_model_path, + model_family="bloom", + max_tokens=32, + n_threads=22) + question = "What is AI?" + result = llm(question) + +if __name__ == '__main__': + pytest.main([__file__]) \ No newline at end of file diff --git a/python/llm/test/run-llm-langchain-tests.sh b/python/llm/test/run-llm-langchain-tests.sh new file mode 100644 index 00000000..9a15f04e --- /dev/null +++ b/python/llm/test/run-llm-langchain-tests.sh @@ -0,0 +1,17 @@ +#!/bin/bash + +export ANALYTICS_ZOO_ROOT=${ANALYTICS_ZOO_ROOT} +export LLM_HOME=${ANALYTICS_ZOO_ROOT}/python/llm/src +export LLM_INFERENCE_TEST_DIR=${ANALYTICS_ZOO_ROOT}/python/llm/test/langchain +set -e + +echo "# Start testing inference" +start=$(date "+%s") + +python -m pytest -s ${LLM_INFERENCE_TEST_DIR} + +now=$(date "+%s") +time=$((now-start)) + +echo "Bigdl-llm langchain tests finished" +echo "Time used:$time seconds" \ No newline at end of file