From 444b11af224dcd89aa76df6d9d03a4a4fce8c9de Mon Sep 17 00:00:00 2001 From: "Keyan (Kyrie) Zhang" <79576162+Zhangky11@users.noreply.github.com> Date: Fri, 15 Mar 2024 01:31:01 -0700 Subject: [PATCH] Add LangChain upstream ut test for ipynb (#10387) * Add LangChain upstream ut test for ipynb * Integrate unit test for LangChain upstream ut and ipynb into one file * Modify file name * Remove LangChain version update in unit test * Move Langchain upstream ut job to arc * Modify path in .yml file * Modify path in llm_unit_tests.yml * Avoid create directory repeatedly --- .github/workflows/llm_unit_tests.yml | 15 +++++++++++- .../llm/test/run-langchain-upstream-tests.sh | 23 +++++++++++++++++++ .../llm/test/run-llm-langchain-tests-gpu.sh | 6 ----- 3 files changed, 37 insertions(+), 7 deletions(-) create mode 100644 python/llm/test/run-langchain-upstream-tests.sh diff --git a/.github/workflows/llm_unit_tests.yml b/.github/workflows/llm_unit_tests.yml index c8db1b7d..783606c7 100644 --- a/.github/workflows/llm_unit_tests.yml +++ b/.github/workflows/llm_unit_tests.yml @@ -103,6 +103,7 @@ jobs: echo "ORIGINAL_WHISPER_TINY_PATH=${ORIGIN_DIR}/whisper-tiny" >> "$GITHUB_ENV" echo "MISTRAL_ORIGIN_PATH=${ORIGIN_DIR}/Mistral-7B-v0.1" >> "$GITHUB_ENV" echo "LLAMA2_7B_ORIGIN_PATH=${ORIGIN_DIR}/Llama-2-7b-chat-hf" >> "$GITHUB_ENV" + echo "VICUNA_7B_1_3_ORIGIN_PATH=${ORIGIN_DIR}/vicuna-7b-v1.3" >> "$GITHUB_ENV" echo "LLAMA_INT4_CKPT_PATH=${INT4_CKPT_DIR}/bigdl_llm_llama_7b_q4_0.bin" >> "$GITHUB_ENV" echo "GPTNEOX_INT4_CKPT_PATH=${INT4_CKPT_DIR}/bigdl_llm_redpajama_7b_q4_0.bin" >> "$GITHUB_ENV" @@ -195,6 +196,10 @@ jobs: echo "Directory $LLAMA2_7B_ORIGIN_PATH not found. Downloading from FTP server..." wget -r -nH --no-verbose --cut-dirs=1 $LLM_FTP_URL/llm/Llama-2-7b-chat-hf -P $ORIGIN_DIR fi + if [ ! -d $VICUNA_7B_1_3_ORIGIN_PATH ]; then + echo "Directory $VICUNA_7B_1_3_ORIGIN_PATH not found. Downloading from FTP server..." + wget -r -nH --no-verbose --cut-dirs=1 $LLM_FTP_URL/llm/vicuna-7b-v1.3 -P $ORIGIN_DIR + fi - name: Run LLM cli test (Linux) if: runner.os == 'Linux' @@ -248,6 +253,7 @@ jobs: echo "MISTRAL_7B_INSTRUCT_V0_1_ORIGIN_PATH=${ORIGIN_DIR}/Mistral-7B-Instruct-v0.1" >> "$GITHUB_ENV" echo "BAICHUAN2_7B_ORIGIN_PATH=${ORIGIN_DIR}/Baichuan2-7B-Chat" >> "$GITHUB_ENV" echo "QWEN_7B_ORIGIN_PATH=${ORIGIN_DIR}/Qwen-7B-Chat" >> "$GITHUB_ENV" + echo "VICUNA_7B_1_3_ORIGIN_PATH=${ORIGIN_DIR}/vicuna-7b-v1.3" >> "$GITHUB_ENV" - name: Checkout repo uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # actions/checkout@v3 @@ -328,6 +334,10 @@ jobs: echo "Directory $BAICHUAN2_7B_ORIGIN_PATH not found. Downloading from FTP server..." wget -r -nH --no-verbose --cut-dirs=1 $LLM_FTP_URL/llm/Baichuan2-7B-Chat -P $ORIGIN_DIR fi + if [ ! -d $VICUNA_7B_1_3_ORIGIN_PATH ]; then + echo "Directory $VICUNA_7B_1_3_ORIGIN_PATH not found. Downloading from FTP server..." + wget -r -nH --no-verbose --cut-dirs=1 $LLM_FTP_URL/llm/vicuna-7b-v1.3 -P $ORIGIN_DIR + fi - name: Run LLM inference test shell: bash @@ -363,7 +373,6 @@ jobs: pip install -U langchain==0.0.184 pip install -U chromadb==0.3.25 pip install -U pandas==2.0.3 - pip install -U langchain-community==0.0.27 # Specific oneapi position on arc ut test machines if [[ '${{ matrix.pytorch-version }}' == '2.1' ]]; then source /opt/intel/oneapi/setvars.sh @@ -371,6 +380,10 @@ jobs: source /home/arda/intel/oneapi/setvars.sh fi bash python/llm/test/run-llm-langchain-tests-gpu.sh + + pip install -U langchain + pip install -U langchain-community + bash python/llm/test/run-langchain-upstream-tests.sh - name: Run LLM llamaindex GPU test shell: bash diff --git a/python/llm/test/run-langchain-upstream-tests.sh b/python/llm/test/run-langchain-upstream-tests.sh new file mode 100644 index 00000000..f130df3e --- /dev/null +++ b/python/llm/test/run-langchain-upstream-tests.sh @@ -0,0 +1,23 @@ +export ANALYTICS_ZOO_ROOT=${ANALYTICS_ZOO_ROOT} +export LLM_HOME=${ANALYTICS_ZOO_ROOT}/python/llm/src +export LLM_INFERENCE_TEST_DIR=${ANALYTICS_ZOO_ROOT}/python/llm/test/langchain_gpu + +export VICUNA_7B_1_3_ORIGIN_PATH=${VICUNA_7B_1_3_ORIGIN_PATH} + +set -e + +rm -rf ${LLM_INFERENCE_TEST_DIR}/langchain_upstream +echo ">>> Testing LangChain upstream unit test" +mkdir ${LLM_INFERENCE_TEST_DIR}/langchain_upstream +wget https://raw.githubusercontent.com/langchain-ai/langchain/master/libs/community/tests/integration_tests/llms/test_bigdl.py -P ${LLM_INFERENCE_TEST_DIR}/langchain_upstream +sed -i "s,model_id=\"[^\"]*\",model_id=\"$VICUNA_7B_1_3_ORIGIN_PATH\",g" ${LLM_INFERENCE_TEST_DIR}/langchain_upstream/test_bigdl.py +python -m pytest -s ${LLM_INFERENCE_TEST_DIR}/langchain_upstream + +echo ">>> Testing LangChain upstream ipynb" +wget https://raw.githubusercontent.com/langchain-ai/langchain/master/docs/docs/integrations/llms/bigdl.ipynb -P ${LLM_INFERENCE_TEST_DIR}/langchain_upstream +mv ${LLM_INFERENCE_TEST_DIR}/langchain_upstream/bigdl.ipynb ${LLM_INFERENCE_TEST_DIR}/langchain_upstream/langchain_example.ipynb +bash ./apps/ipynb2py.sh ${LLM_INFERENCE_TEST_DIR}/langchain_upstream/langchain_example +sed -i '/^get_ipython/d' ${LLM_INFERENCE_TEST_DIR}/langchain_upstream/langchain_example.py +sed -i "s,model_id=\"[^\"]*\",model_id=\"$VICUNA_7B_1_3_ORIGIN_PATH\",g" ${LLM_INFERENCE_TEST_DIR}/langchain_upstream/langchain_example.py +python ${LLM_INFERENCE_TEST_DIR}/langchain_upstream/langchain_example.py +rm -rf ${LLM_INFERENCE_TEST_DIR}/langchain_upstream \ No newline at end of file diff --git a/python/llm/test/run-llm-langchain-tests-gpu.sh b/python/llm/test/run-llm-langchain-tests-gpu.sh index 130fd400..bff28198 100644 --- a/python/llm/test/run-llm-langchain-tests-gpu.sh +++ b/python/llm/test/run-llm-langchain-tests-gpu.sh @@ -15,12 +15,6 @@ start=$(date "+%s") python -m pytest -s ${LLM_INFERENCE_TEST_DIR} -mkdir ${LLM_INFERENCE_TEST_DIR}/tmp_wget_dir -wget https://raw.githubusercontent.com/langchain-ai/langchain/master/libs/community/tests/integration_tests/llms/test_bigdl.py -P ${LLM_INFERENCE_TEST_DIR}/tmp_wget_dir -sed -i "s,model_id=\"[^\"]*\",model_id=\"$LLAMA2_7B_ORIGIN_PATH\",g" ${LLM_INFERENCE_TEST_DIR}/tmp_wget_dir/test_bigdl.py -python -m pytest -s ${LLM_INFERENCE_TEST_DIR}/tmp_wget_dir -rm -rf ${LLM_INFERENCE_TEST_DIR}/tmp_wget_dir - now=$(date "+%s") time=$((now-start))