Add LangChain upstream ut test for ipynb (#10387)

* Add LangChain upstream ut test for ipynb * Integrate unit test for LangChain upstream ut and ipynb into one file * Modify file name * Remove LangChain version update in unit test * Move Langchain upstream ut job to arc * Modify path in .yml file * Modify path in llm_unit_tests.yml * Avoid create directory repeatedly
2024-03-15 01:31:01 -07:00 · 2024-03-15 01:31:01 -07:00 · 444b11af22
commit 444b11af22
parent ca372f6dab
3 changed files with 37 additions and 7 deletions
--- a/.github/workflows/llm_unit_tests.yml
+++ b/.github/workflows/llm_unit_tests.yml
@ -103,6 +103,7 @@ jobs:
          echo "ORIGINAL_WHISPER_TINY_PATH=${ORIGIN_DIR}/whisper-tiny" >> "$GITHUB_ENV"
          echo "MISTRAL_ORIGIN_PATH=${ORIGIN_DIR}/Mistral-7B-v0.1" >> "$GITHUB_ENV"
          echo "LLAMA2_7B_ORIGIN_PATH=${ORIGIN_DIR}/Llama-2-7b-chat-hf" >> "$GITHUB_ENV"
          echo "VICUNA_7B_1_3_ORIGIN_PATH=${ORIGIN_DIR}/vicuna-7b-v1.3" >> "$GITHUB_ENV"
          echo "LLAMA_INT4_CKPT_PATH=${INT4_CKPT_DIR}/bigdl_llm_llama_7b_q4_0.bin" >> "$GITHUB_ENV"
          echo "GPTNEOX_INT4_CKPT_PATH=${INT4_CKPT_DIR}/bigdl_llm_redpajama_7b_q4_0.bin" >> "$GITHUB_ENV"
@ -195,6 +196,10 @@ jobs:
            echo "Directory $LLAMA2_7B_ORIGIN_PATH not found. Downloading from FTP server..."
            wget -r -nH --no-verbose --cut-dirs=1 $LLM_FTP_URL/llm/Llama-2-7b-chat-hf -P $ORIGIN_DIR
          fi
          if [ ! -d $VICUNA_7B_1_3_ORIGIN_PATH ]; then
            echo "Directory $VICUNA_7B_1_3_ORIGIN_PATH not found. Downloading from FTP server..."
            wget -r -nH --no-verbose --cut-dirs=1 $LLM_FTP_URL/llm/vicuna-7b-v1.3 -P $ORIGIN_DIR
          fi
      - name: Run LLM cli test (Linux)
        if: runner.os == 'Linux' 
@ -248,6 +253,7 @@ jobs:
          echo "MISTRAL_7B_INSTRUCT_V0_1_ORIGIN_PATH=${ORIGIN_DIR}/Mistral-7B-Instruct-v0.1" >> "$GITHUB_ENV"
          echo "BAICHUAN2_7B_ORIGIN_PATH=${ORIGIN_DIR}/Baichuan2-7B-Chat" >> "$GITHUB_ENV"
          echo "QWEN_7B_ORIGIN_PATH=${ORIGIN_DIR}/Qwen-7B-Chat" >> "$GITHUB_ENV"
          echo "VICUNA_7B_1_3_ORIGIN_PATH=${ORIGIN_DIR}/vicuna-7b-v1.3" >> "$GITHUB_ENV"
      - name: Checkout repo
        uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # actions/checkout@v3
@ -328,6 +334,10 @@ jobs:
            echo "Directory $BAICHUAN2_7B_ORIGIN_PATH not found. Downloading from FTP server..."
            wget -r -nH --no-verbose --cut-dirs=1 $LLM_FTP_URL/llm/Baichuan2-7B-Chat -P $ORIGIN_DIR
          fi
          if [ ! -d $VICUNA_7B_1_3_ORIGIN_PATH ]; then
            echo "Directory $VICUNA_7B_1_3_ORIGIN_PATH not found. Downloading from FTP server..."
            wget -r -nH --no-verbose --cut-dirs=1 $LLM_FTP_URL/llm/vicuna-7b-v1.3 -P $ORIGIN_DIR
          fi
      - name: Run LLM inference test
        shell: bash
@ -363,7 +373,6 @@ jobs:
          pip install -U langchain==0.0.184
          pip install -U chromadb==0.3.25
          pip install -U pandas==2.0.3
          pip install -U langchain-community==0.0.27
          # Specific oneapi position on arc ut test machines
          if [[ '${{ matrix.pytorch-version }}' == '2.1' ]]; then
            source /opt/intel/oneapi/setvars.sh
@ -371,6 +380,10 @@ jobs:
            source /home/arda/intel/oneapi/setvars.sh
          fi
          bash python/llm/test/run-llm-langchain-tests-gpu.sh
          pip install -U langchain
          pip install -U langchain-community
          bash python/llm/test/run-langchain-upstream-tests.sh
      - name: Run LLM llamaindex GPU test
        shell: bash
--- a/python/llm/test/run-langchain-upstream-tests.sh
+++ b/python/llm/test/run-langchain-upstream-tests.sh
@ -0,0 +1,23 @@
 export ANALYTICS_ZOO_ROOT=${ANALYTICS_ZOO_ROOT}
 export LLM_HOME=${ANALYTICS_ZOO_ROOT}/python/llm/src
 export LLM_INFERENCE_TEST_DIR=${ANALYTICS_ZOO_ROOT}/python/llm/test/langchain_gpu
 export VICUNA_7B_1_3_ORIGIN_PATH=${VICUNA_7B_1_3_ORIGIN_PATH}
 set -e
 rm -rf ${LLM_INFERENCE_TEST_DIR}/langchain_upstream
 echo ">>> Testing LangChain upstream unit test"
 mkdir ${LLM_INFERENCE_TEST_DIR}/langchain_upstream
 wget https://raw.githubusercontent.com/langchain-ai/langchain/master/libs/community/tests/integration_tests/llms/test_bigdl.py -P ${LLM_INFERENCE_TEST_DIR}/langchain_upstream
 sed -i "s,model_id=\"[^\"]*\",model_id=\"$VICUNA_7B_1_3_ORIGIN_PATH\",g" ${LLM_INFERENCE_TEST_DIR}/langchain_upstream/test_bigdl.py
 python -m pytest -s ${LLM_INFERENCE_TEST_DIR}/langchain_upstream
 echo ">>> Testing LangChain upstream ipynb"
 wget https://raw.githubusercontent.com/langchain-ai/langchain/master/docs/docs/integrations/llms/bigdl.ipynb -P ${LLM_INFERENCE_TEST_DIR}/langchain_upstream
 mv ${LLM_INFERENCE_TEST_DIR}/langchain_upstream/bigdl.ipynb ${LLM_INFERENCE_TEST_DIR}/langchain_upstream/langchain_example.ipynb
 bash ./apps/ipynb2py.sh ${LLM_INFERENCE_TEST_DIR}/langchain_upstream/langchain_example
 sed -i '/^get_ipython/d' ${LLM_INFERENCE_TEST_DIR}/langchain_upstream/langchain_example.py
 sed -i "s,model_id=\"[^\"]*\",model_id=\"$VICUNA_7B_1_3_ORIGIN_PATH\",g" ${LLM_INFERENCE_TEST_DIR}/langchain_upstream/langchain_example.py
 python ${LLM_INFERENCE_TEST_DIR}/langchain_upstream/langchain_example.py
 rm -rf ${LLM_INFERENCE_TEST_DIR}/langchain_upstream
--- a/python/llm/test/run-llm-langchain-tests-gpu.sh
+++ b/python/llm/test/run-llm-langchain-tests-gpu.sh
@ -15,12 +15,6 @@ start=$(date "+%s")
 python -m pytest -s ${LLM_INFERENCE_TEST_DIR}
 mkdir ${LLM_INFERENCE_TEST_DIR}/tmp_wget_dir
 wget https://raw.githubusercontent.com/langchain-ai/langchain/master/libs/community/tests/integration_tests/llms/test_bigdl.py -P ${LLM_INFERENCE_TEST_DIR}/tmp_wget_dir
 sed -i "s,model_id=\"[^\"]*\",model_id=\"$LLAMA2_7B_ORIGIN_PATH\",g" ${LLM_INFERENCE_TEST_DIR}/tmp_wget_dir/test_bigdl.py
 python -m pytest -s ${LLM_INFERENCE_TEST_DIR}/tmp_wget_dir
 rm -rf ${LLM_INFERENCE_TEST_DIR}/tmp_wget_dir
 now=$(date "+%s")
 time=$((now-start))