From b7db21414e46f948248fb6cf10bbf46ee1fbb669 Mon Sep 17 00:00:00 2001 From: hxsz1997 <45651968+hxsz1997@users.noreply.github.com> Date: Thu, 7 Mar 2024 10:06:16 +0800 Subject: [PATCH] Update llamaindex ut (#10338) * add test_llamaindex of gpu * add llamaindex gpu tests bash * add llamaindex cpu tests bash * update name of Run LLM langchain GPU test * import llama_index in llamaindex gpu ut * update the dependency of test_llamaindex * add Run LLM llamaindex GPU test * modify import dependency of llamaindex cpu test * add Run LLM llamaindex test * update llama_model_path * delete unused model path * add LLAMA2_7B_ORIGIN_PATH in llamaindex cpu test --- .github/workflows/llm_unit_tests.yml | 27 ++++++- python/llm/test/llamaindex/test_llamaindex.py | 21 +----- .../test/llamaindex_gpu/test_llamaindex.py | 70 +++++++++++++++++++ .../llm/test/run-llm-llamaindex-tests-gpu.sh | 21 ++++++ python/llm/test/run-llm-llamaindex-tests.sh | 18 +++++ 5 files changed, 136 insertions(+), 21 deletions(-) create mode 100644 python/llm/test/llamaindex_gpu/test_llamaindex.py create mode 100644 python/llm/test/run-llm-llamaindex-tests-gpu.sh create mode 100644 python/llm/test/run-llm-llamaindex-tests.sh diff --git a/.github/workflows/llm_unit_tests.yml b/.github/workflows/llm_unit_tests.yml index fc37e464..e6d01a6c 100644 --- a/.github/workflows/llm_unit_tests.yml +++ b/.github/workflows/llm_unit_tests.yml @@ -102,6 +102,7 @@ jobs: echo "ORIGINAL_REPLIT_CODE_PATH=${ORIGIN_DIR}/replit-code-v1-3b" >> "$GITHUB_ENV" echo "ORIGINAL_WHISPER_TINY_PATH=${ORIGIN_DIR}/whisper-tiny" >> "$GITHUB_ENV" echo "MISTRAL_ORIGIN_PATH=${ORIGIN_DIR}/Mistral-7B-v0.1" >> "$GITHUB_ENV" + echo "LLAMA2_7B_ORIGIN_PATH=${ORIGIN_DIR}/Llama-2-7b-chat-hf" >> "$GITHUB_ENV" echo "LLAMA_INT4_CKPT_PATH=${INT4_CKPT_DIR}/bigdl_llm_llama_7b_q4_0.bin" >> "$GITHUB_ENV" echo "GPTNEOX_INT4_CKPT_PATH=${INT4_CKPT_DIR}/bigdl_llm_redpajama_7b_q4_0.bin" >> "$GITHUB_ENV" @@ -190,6 +191,10 @@ jobs: echo "wget -r -nH --no-verbose --cut-dirs=2 $LLM_FTP_URL/llm/datasets/common_voice -P $DATASET_DIR" wget -r -nH --no-verbose --cut-dirs=2 $LLM_FTP_URL/llm/datasets/common_voice -P $DATASET_DIR fi + if [ ! -d $LLAMA2_7B_ORIGIN_PATH ]; then + echo "Directory $LLAMA2_7B_ORIGIN_PATH not found. Downloading from FTP server..." + wget -r -nH --no-verbose --cut-dirs=1 $LLM_FTP_URL/llm/Llama-2-7b-chat-hf -P $ORIGIN_DIR + fi - name: Run LLM cli test (Linux) if: runner.os == 'Linux' @@ -209,6 +214,12 @@ jobs: pip install -U chromadb==0.3.25 pip install -U pandas==2.0.3 bash python/llm/test/run-llm-langchain-tests.sh + - name: Run LLM llamaindex test + shell: bash + run: | + pip install llama-index-readers-file llama-index-vector-stores-postgres llama-index-embeddings-huggingface + pip install transformers==4.31.0 + bash python/llm/test/run-llm-llamaindex-tests.sh llm-unit-test-on-arc: needs: [setup-python-version, llm-cpp-build] strategy: @@ -347,7 +358,7 @@ jobs: fi bash python/llm/test/run-llm-example-tests-gpu.sh - - name: Run LLM langchain test + - name: Run LLM langchain GPU test shell: bash run: | pip install -U langchain==0.0.184 @@ -360,3 +371,17 @@ jobs: source /home/arda/intel/oneapi/setvars.sh fi bash python/llm/test/run-llm-langchain-tests-gpu.sh + + - name: Run LLM llamaindex GPU test + shell: bash + run: | + pip install llama-index-readers-file llama-index-vector-stores-postgres llama-index-embeddings-huggingface + # Specific oneapi position on arc ut test machines + if [[ '${{ matrix.pytorch-version }}' == '2.1' ]]; then + pip install --pre --upgrade bigdl-llm[xpu] -f https://developer.intel.com/ipex-whl-stable-xpu + source /opt/intel/oneapi/setvars.sh + elif [[ '${{ matrix.pytorch-version }}' == '2.0' ]]; then + pip install --pre --upgrade bigdl-llm[xpu_2.0] -f https://developer.intel.com/ipex-whl-stable-xpu + source /home/arda/intel/oneapi/setvars.sh + fi + bash python/llm/test/run-llm-llamaindex-tests-gpu.sh \ No newline at end of file diff --git a/python/llm/test/llamaindex/test_llamaindex.py b/python/llm/test/llamaindex/test_llamaindex.py index 6df7293e..c0ebf4c3 100644 --- a/python/llm/test/llamaindex/test_llamaindex.py +++ b/python/llm/test/llamaindex/test_llamaindex.py @@ -14,22 +14,6 @@ # limitations under the License. # -from bigdl.llm.langchain.llms import TransformersLLM, TransformersPipelineLLM, \ - LlamaLLM, BloomLLM -from bigdl.llm.langchain.embeddings import TransformersEmbeddings, LlamaEmbeddings, \ - BloomEmbeddings - - -from langchain.document_loaders import WebBaseLoader -from langchain.indexes import VectorstoreIndexCreator - - -from langchain.chains.question_answering import load_qa_chain -from langchain.chains.chat_vector_db.prompts import (CONDENSE_QUESTION_PROMPT, - QA_PROMPT) -from langchain.text_splitter import CharacterTextSplitter -from langchain.vectorstores import Chroma - import pytest from unittest import TestCase import os @@ -37,10 +21,7 @@ from bigdl.llm.llamaindex.llms import BigdlLLM class Test_LlamaIndex_Transformers_API(TestCase): def setUp(self): - self.auto_model_path = os.environ.get('ORIGINAL_CHATGLM2_6B_PATH') - self.auto_causal_model_path = os.environ.get('ORIGINAL_REPLIT_CODE_PATH') - self.llama_model_path = os.environ.get('LLAMA_ORIGIN_PATH') - self.bloom_model_path = os.environ.get('BLOOM_ORIGIN_PATH') + self.llama_model_path = os.environ.get('LLAMA2_7B_ORIGIN_PATH') thread_num = os.environ.get('THREAD_NUM') if thread_num is not None: self.n_threads = int(thread_num) diff --git a/python/llm/test/llamaindex_gpu/test_llamaindex.py b/python/llm/test/llamaindex_gpu/test_llamaindex.py new file mode 100644 index 00000000..920ce534 --- /dev/null +++ b/python/llm/test/llamaindex_gpu/test_llamaindex.py @@ -0,0 +1,70 @@ +# +# Copyright 2016 The BigDL Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import torch +import pytest +from unittest import TestCase +import os +from bigdl.llm.llamaindex.llms import BigdlLLM + +class Test_LlamaIndex_Transformers_API(TestCase): + def setUp(self): + self.llama_model_path = os.environ.get('LLAMA2_7B_ORIGIN_PATH') + thread_num = os.environ.get('THREAD_NUM') + if thread_num is not None: + self.n_threads = int(thread_num) + else: + self.n_threads = 2 + + def completion_to_prompt(completion): + return f"<|system|>\n\n<|user|>\n{completion}\n<|assistant|>\n" + + def messages_to_prompt(messages): + prompt = "" + for message in messages: + if message.role == "system": + prompt += f"<|system|>\n{message.content}\n" + elif message.role == "user": + prompt += f"<|user|>\n{message.content}\n" + elif message.role == "assistant": + prompt += f"<|assistant|>\n{message.content}\n" + + # ensure we start with a system prompt, insert blank if needed + if not prompt.startswith("<|system|>\n"): + prompt = "<|system|>\n\n" + prompt + + # add final assistant prompt + prompt = prompt + "<|assistant|>\n" + return prompt + + def test_bigdl_llm(self): + llm = BigdlLLM( + model_name=self.llama_model_path, + tokenizer_name=self.llama_model_path, + context_window=512, + max_new_tokens=32, + model_kwargs={}, + generate_kwargs={"temperature": 0.7, "do_sample": False}, + messages_to_prompt=self.messages_to_prompt, + completion_to_prompt=self.completion_to_prompt, + device_map="xpu", + ) + res = llm.complete("What is AI?") + assert res!=None + + +if __name__ == '__main__': + pytest.main([__file__]) \ No newline at end of file diff --git a/python/llm/test/run-llm-llamaindex-tests-gpu.sh b/python/llm/test/run-llm-llamaindex-tests-gpu.sh new file mode 100644 index 00000000..387aa9cc --- /dev/null +++ b/python/llm/test/run-llm-llamaindex-tests-gpu.sh @@ -0,0 +1,21 @@ +#!/bin/bash + +export ANALYTICS_ZOO_ROOT=${ANALYTICS_ZOO_ROOT} +export LLM_HOME=${ANALYTICS_ZOO_ROOT}/python/llm/src +export LLM_INFERENCE_TEST_DIR=${ANALYTICS_ZOO_ROOT}/python/llm/test/llamaindex_gpu + +export USE_XETLA=OFF +export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 + +set -e + +echo "# Start testing inference" +start=$(date "+%s") + +python -m pytest -s ${LLM_INFERENCE_TEST_DIR} + +now=$(date "+%s") +time=$((now-start)) + +echo "Bigdl-llm llamaindex gpu tests finished" +echo "Time used:$time seconds" \ No newline at end of file diff --git a/python/llm/test/run-llm-llamaindex-tests.sh b/python/llm/test/run-llm-llamaindex-tests.sh new file mode 100644 index 00000000..125532a1 --- /dev/null +++ b/python/llm/test/run-llm-llamaindex-tests.sh @@ -0,0 +1,18 @@ +#!/bin/bash + +export ANALYTICS_ZOO_ROOT=${ANALYTICS_ZOO_ROOT} +export LLM_HOME=${ANALYTICS_ZOO_ROOT}/python/llm/src +export LLM_INFERENCE_TEST_DIR=${ANALYTICS_ZOO_ROOT}/python/llm/test/llamaindex + +set -e + +echo "# Start testing inference" +start=$(date "+%s") + +python -m pytest -s ${LLM_INFERENCE_TEST_DIR} + +now=$(date "+%s") +time=$((now-start)) + +echo "Bigdl-llm llamaindex tests finished" +echo "Time used:$time seconds" \ No newline at end of file