[LLM] langchain bloom, UT's, default parameters (#8357)
* update langchain default parameters to align w/ api * add ut's for llm and embeddings * update inference test script to install langchain deps * update tests workflows --------- Co-authored-by: leonardozcm <changmin.zhao@intel.com>
This commit is contained in:
		
							parent
							
								
									446175cc05
								
							
						
					
					
						commit
						c113ecb929
					
				
					 5 changed files with 111 additions and 6 deletions
				
			
		
							
								
								
									
										13
									
								
								.github/workflows/llm_unit_tests_linux.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										13
									
								
								.github/workflows/llm_unit_tests_linux.yml
									
									
									
									
										vendored
									
									
								
							| 
						 | 
				
			
			@ -104,6 +104,19 @@ jobs:
 | 
			
		|||
        env:
 | 
			
		||||
          ANALYTICS_ZOO_ROOT: ${{ github.workspace }}
 | 
			
		||||
 | 
			
		||||
      - name: Run LLM langchain test
 | 
			
		||||
        shell: bash
 | 
			
		||||
        run: |
 | 
			
		||||
          source $CONDA_HOME/bin/activate bigdl-init-llm
 | 
			
		||||
          $CONDA_HOME/bin/conda info
 | 
			
		||||
          pip install -U langchain==0.0.184
 | 
			
		||||
          pip install -U chromadb==0.3.25
 | 
			
		||||
          pip install -U typing_extensions==4.5.0
 | 
			
		||||
          bash python/llm/test/run-llm-langchain-tests.sh
 | 
			
		||||
          source $CONDA_HOME/bin/deactivate
 | 
			
		||||
        env:
 | 
			
		||||
          ANALYTICS_ZOO_ROOT: ${{ github.workspace }}
 | 
			
		||||
 | 
			
		||||
      # new test steps should be added here
 | 
			
		||||
 | 
			
		||||
      - name: Run LLM cli test
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -64,7 +64,7 @@ class BigdlLLMEmbeddings(BaseModel, Embeddings):
 | 
			
		|||
    """
 | 
			
		||||
 | 
			
		||||
    model_family: str = "llama"
 | 
			
		||||
    """the model family: currently supports llama, gptneox, and bloom."""
 | 
			
		||||
    """the model family"""
 | 
			
		||||
 | 
			
		||||
    family_info = {
 | 
			
		||||
        'llama': {'module': "bigdl.llm.models", 'class': "Llama"},
 | 
			
		||||
| 
						 | 
				
			
			@ -86,7 +86,7 @@ class BigdlLLMEmbeddings(BaseModel, Embeddings):
 | 
			
		|||
    seed: int = Field(-1, alias="seed")
 | 
			
		||||
    """Seed. If -1, a random seed is used."""
 | 
			
		||||
 | 
			
		||||
    f16_kv: bool = Field(False, alias="f16_kv")
 | 
			
		||||
    f16_kv: bool = Field(True, alias="f16_kv")
 | 
			
		||||
    """Use half-precision for key/value cache."""
 | 
			
		||||
 | 
			
		||||
    logits_all: bool = Field(False, alias="logits_all")
 | 
			
		||||
| 
						 | 
				
			
			@ -101,11 +101,11 @@ class BigdlLLMEmbeddings(BaseModel, Embeddings):
 | 
			
		|||
    n_threads: Optional[int] = Field(2, alias="n_threads")
 | 
			
		||||
    """Number of threads to use."""
 | 
			
		||||
 | 
			
		||||
    n_batch: Optional[int] = Field(8, alias="n_batch")
 | 
			
		||||
    n_batch: Optional[int] = Field(512, alias="n_batch")
 | 
			
		||||
    """Number of tokens to process in parallel.
 | 
			
		||||
    Should be a number between 1 and n_ctx."""
 | 
			
		||||
 | 
			
		||||
    n_gpu_layers: Optional[int] = Field(None, alias="n_gpu_layers")
 | 
			
		||||
    n_gpu_layers: Optional[int] = Field(0, alias="n_gpu_layers")
 | 
			
		||||
    """Number of layers to be loaded into gpu memory. Default None."""
 | 
			
		||||
 | 
			
		||||
    class Config:
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -113,11 +113,11 @@ class BigdlLLM(LLM):
 | 
			
		|||
    n_threads: Optional[int] = Field(2, alias="n_threads")
 | 
			
		||||
    """Number of threads to use."""
 | 
			
		||||
 | 
			
		||||
    n_batch: Optional[int] = Field(8, alias="n_batch")
 | 
			
		||||
    n_batch: Optional[int] = Field(512, alias="n_batch")
 | 
			
		||||
    """Number of tokens to process in parallel.
 | 
			
		||||
    Should be a number between 1 and n_ctx."""
 | 
			
		||||
 | 
			
		||||
    n_gpu_layers: Optional[int] = Field(None, alias="n_gpu_layers")
 | 
			
		||||
    n_gpu_layers: Optional[int] = Field(0, alias="n_gpu_layers")
 | 
			
		||||
    """Number of layers to be loaded into gpu memory. Default None."""
 | 
			
		||||
 | 
			
		||||
    suffix: Optional[str] = Field(None)
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
							
								
								
									
										75
									
								
								python/llm/test/langchain/test_langchain.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										75
									
								
								python/llm/test/langchain/test_langchain.py
									
									
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,75 @@
 | 
			
		|||
#
 | 
			
		||||
# Copyright 2016 The BigDL Authors.
 | 
			
		||||
#
 | 
			
		||||
# Licensed under the Apache License, Version 2.0 (the "License");
 | 
			
		||||
# you may not use this file except in compliance with the License.
 | 
			
		||||
# You may obtain a copy of the License at
 | 
			
		||||
#
 | 
			
		||||
#     http://www.apache.org/licenses/LICENSE-2.0
 | 
			
		||||
#
 | 
			
		||||
# Unless required by applicable law or agreed to in writing, software
 | 
			
		||||
# distributed under the License is distributed on an "AS IS" BASIS,
 | 
			
		||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
			
		||||
# See the License for the specific language governing permissions and
 | 
			
		||||
# limitations under the License.
 | 
			
		||||
#
 | 
			
		||||
 | 
			
		||||
from bigdl.llm.utils import get_avx_flags
 | 
			
		||||
from bigdl.llm.langchain.embeddings import BigdlLLMEmbeddings
 | 
			
		||||
from bigdl.llm.langchain.llms import BigdlLLM
 | 
			
		||||
import pytest
 | 
			
		||||
from unittest import TestCase
 | 
			
		||||
import os
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class Test_Models_Basics(TestCase):
 | 
			
		||||
    def setUp(self):
 | 
			
		||||
        self.llama_model_path = os.environ.get('LLAMA_INT4_CKPT_PATH')
 | 
			
		||||
        self.bloom_model_path = os.environ.get('BLOOM_INT4_CKPT_PATH')
 | 
			
		||||
        self.gptneox_model_path = os.environ.get('GPTNEOX_INT4_CKPT_PATH')
 | 
			
		||||
        
 | 
			
		||||
        
 | 
			
		||||
    def test_langchain_llm_embedding_llama(self):
 | 
			
		||||
        bigdl_embeddings = BigdlLLMEmbeddings(
 | 
			
		||||
            model_path=self.llama_model_path,
 | 
			
		||||
            model_family="llama")
 | 
			
		||||
        text = "This is a test document."
 | 
			
		||||
        query_result = bigdl_embeddings.embed_query(text)
 | 
			
		||||
        doc_result = bigdl_embeddings.embed_documents([text])
 | 
			
		||||
    
 | 
			
		||||
    def test_langchain_llm_embedding_gptneox(self):
 | 
			
		||||
        bigdl_embeddings = BigdlLLMEmbeddings(
 | 
			
		||||
            model_path=self.gptneox_model_path,
 | 
			
		||||
            model_family="gptneox")
 | 
			
		||||
        text = "This is a test document."
 | 
			
		||||
        query_result = bigdl_embeddings.embed_query(text)
 | 
			
		||||
        doc_result = bigdl_embeddings.embed_documents([text])
 | 
			
		||||
        
 | 
			
		||||
    def test_langchain_llm_llama(self):
 | 
			
		||||
        llm = BigdlLLM(
 | 
			
		||||
            model_path=self.llama_model_path, 
 | 
			
		||||
            max_tokens=32,
 | 
			
		||||
            n_threads=22)
 | 
			
		||||
        question = "What is AI?"
 | 
			
		||||
        result = llm(question)
 | 
			
		||||
        
 | 
			
		||||
    def test_langchain_llm_gptneox(self):
 | 
			
		||||
        llm = BigdlLLM(
 | 
			
		||||
            model_path=self.gptneox_model_path,
 | 
			
		||||
            model_family="gptneox", 
 | 
			
		||||
            max_tokens=32,
 | 
			
		||||
            n_threads=22)
 | 
			
		||||
        question = "What is AI?"
 | 
			
		||||
        result = llm(question)
 | 
			
		||||
        
 | 
			
		||||
    def test_langchain_llm_bloom(self):
 | 
			
		||||
        llm = BigdlLLM(
 | 
			
		||||
            model_path=self.bloom_model_path, 
 | 
			
		||||
            model_family="bloom",
 | 
			
		||||
            max_tokens=32,
 | 
			
		||||
            n_threads=22)
 | 
			
		||||
        question = "What is AI?"
 | 
			
		||||
        result = llm(question)
 | 
			
		||||
        
 | 
			
		||||
if __name__ == '__main__':
 | 
			
		||||
    pytest.main([__file__])
 | 
			
		||||
							
								
								
									
										17
									
								
								python/llm/test/run-llm-langchain-tests.sh
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										17
									
								
								python/llm/test/run-llm-langchain-tests.sh
									
									
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,17 @@
 | 
			
		|||
#!/bin/bash
 | 
			
		||||
 | 
			
		||||
export ANALYTICS_ZOO_ROOT=${ANALYTICS_ZOO_ROOT}
 | 
			
		||||
export LLM_HOME=${ANALYTICS_ZOO_ROOT}/python/llm/src
 | 
			
		||||
export LLM_INFERENCE_TEST_DIR=${ANALYTICS_ZOO_ROOT}/python/llm/test/langchain
 | 
			
		||||
set -e
 | 
			
		||||
 | 
			
		||||
echo "# Start testing inference"
 | 
			
		||||
start=$(date "+%s")
 | 
			
		||||
 | 
			
		||||
python -m pytest -s ${LLM_INFERENCE_TEST_DIR}
 | 
			
		||||
 | 
			
		||||
now=$(date "+%s")
 | 
			
		||||
time=$((now-start))
 | 
			
		||||
 | 
			
		||||
echo "Bigdl-llm langchain tests finished"
 | 
			
		||||
echo "Time used:$time seconds"
 | 
			
		||||
		Loading…
	
		Reference in a new issue