Add LangChain upstream ut test for ipynb (#10387)

* Add LangChain upstream ut test for ipynb

* Integrate unit test for LangChain upstream ut and ipynb into one file

* Modify file name

* Remove LangChain version update in unit test

* Move Langchain upstream ut job to arc

* Modify path in .yml file

* Modify path in llm_unit_tests.yml

* Avoid create directory repeatedly
This commit is contained in:
Keyan (Kyrie) Zhang 2024-03-15 01:31:01 -07:00 committed by GitHub
parent ca372f6dab
commit 444b11af22
3 changed files with 37 additions and 7 deletions

View file

@ -103,6 +103,7 @@ jobs:
echo "ORIGINAL_WHISPER_TINY_PATH=${ORIGIN_DIR}/whisper-tiny" >> "$GITHUB_ENV" echo "ORIGINAL_WHISPER_TINY_PATH=${ORIGIN_DIR}/whisper-tiny" >> "$GITHUB_ENV"
echo "MISTRAL_ORIGIN_PATH=${ORIGIN_DIR}/Mistral-7B-v0.1" >> "$GITHUB_ENV" echo "MISTRAL_ORIGIN_PATH=${ORIGIN_DIR}/Mistral-7B-v0.1" >> "$GITHUB_ENV"
echo "LLAMA2_7B_ORIGIN_PATH=${ORIGIN_DIR}/Llama-2-7b-chat-hf" >> "$GITHUB_ENV" echo "LLAMA2_7B_ORIGIN_PATH=${ORIGIN_DIR}/Llama-2-7b-chat-hf" >> "$GITHUB_ENV"
echo "VICUNA_7B_1_3_ORIGIN_PATH=${ORIGIN_DIR}/vicuna-7b-v1.3" >> "$GITHUB_ENV"
echo "LLAMA_INT4_CKPT_PATH=${INT4_CKPT_DIR}/bigdl_llm_llama_7b_q4_0.bin" >> "$GITHUB_ENV" echo "LLAMA_INT4_CKPT_PATH=${INT4_CKPT_DIR}/bigdl_llm_llama_7b_q4_0.bin" >> "$GITHUB_ENV"
echo "GPTNEOX_INT4_CKPT_PATH=${INT4_CKPT_DIR}/bigdl_llm_redpajama_7b_q4_0.bin" >> "$GITHUB_ENV" echo "GPTNEOX_INT4_CKPT_PATH=${INT4_CKPT_DIR}/bigdl_llm_redpajama_7b_q4_0.bin" >> "$GITHUB_ENV"
@ -195,6 +196,10 @@ jobs:
echo "Directory $LLAMA2_7B_ORIGIN_PATH not found. Downloading from FTP server..." echo "Directory $LLAMA2_7B_ORIGIN_PATH not found. Downloading from FTP server..."
wget -r -nH --no-verbose --cut-dirs=1 $LLM_FTP_URL/llm/Llama-2-7b-chat-hf -P $ORIGIN_DIR wget -r -nH --no-verbose --cut-dirs=1 $LLM_FTP_URL/llm/Llama-2-7b-chat-hf -P $ORIGIN_DIR
fi fi
if [ ! -d $VICUNA_7B_1_3_ORIGIN_PATH ]; then
echo "Directory $VICUNA_7B_1_3_ORIGIN_PATH not found. Downloading from FTP server..."
wget -r -nH --no-verbose --cut-dirs=1 $LLM_FTP_URL/llm/vicuna-7b-v1.3 -P $ORIGIN_DIR
fi
- name: Run LLM cli test (Linux) - name: Run LLM cli test (Linux)
if: runner.os == 'Linux' if: runner.os == 'Linux'
@ -248,6 +253,7 @@ jobs:
echo "MISTRAL_7B_INSTRUCT_V0_1_ORIGIN_PATH=${ORIGIN_DIR}/Mistral-7B-Instruct-v0.1" >> "$GITHUB_ENV" echo "MISTRAL_7B_INSTRUCT_V0_1_ORIGIN_PATH=${ORIGIN_DIR}/Mistral-7B-Instruct-v0.1" >> "$GITHUB_ENV"
echo "BAICHUAN2_7B_ORIGIN_PATH=${ORIGIN_DIR}/Baichuan2-7B-Chat" >> "$GITHUB_ENV" echo "BAICHUAN2_7B_ORIGIN_PATH=${ORIGIN_DIR}/Baichuan2-7B-Chat" >> "$GITHUB_ENV"
echo "QWEN_7B_ORIGIN_PATH=${ORIGIN_DIR}/Qwen-7B-Chat" >> "$GITHUB_ENV" echo "QWEN_7B_ORIGIN_PATH=${ORIGIN_DIR}/Qwen-7B-Chat" >> "$GITHUB_ENV"
echo "VICUNA_7B_1_3_ORIGIN_PATH=${ORIGIN_DIR}/vicuna-7b-v1.3" >> "$GITHUB_ENV"
- name: Checkout repo - name: Checkout repo
uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # actions/checkout@v3 uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # actions/checkout@v3
@ -328,6 +334,10 @@ jobs:
echo "Directory $BAICHUAN2_7B_ORIGIN_PATH not found. Downloading from FTP server..." echo "Directory $BAICHUAN2_7B_ORIGIN_PATH not found. Downloading from FTP server..."
wget -r -nH --no-verbose --cut-dirs=1 $LLM_FTP_URL/llm/Baichuan2-7B-Chat -P $ORIGIN_DIR wget -r -nH --no-verbose --cut-dirs=1 $LLM_FTP_URL/llm/Baichuan2-7B-Chat -P $ORIGIN_DIR
fi fi
if [ ! -d $VICUNA_7B_1_3_ORIGIN_PATH ]; then
echo "Directory $VICUNA_7B_1_3_ORIGIN_PATH not found. Downloading from FTP server..."
wget -r -nH --no-verbose --cut-dirs=1 $LLM_FTP_URL/llm/vicuna-7b-v1.3 -P $ORIGIN_DIR
fi
- name: Run LLM inference test - name: Run LLM inference test
shell: bash shell: bash
@ -363,7 +373,6 @@ jobs:
pip install -U langchain==0.0.184 pip install -U langchain==0.0.184
pip install -U chromadb==0.3.25 pip install -U chromadb==0.3.25
pip install -U pandas==2.0.3 pip install -U pandas==2.0.3
pip install -U langchain-community==0.0.27
# Specific oneapi position on arc ut test machines # Specific oneapi position on arc ut test machines
if [[ '${{ matrix.pytorch-version }}' == '2.1' ]]; then if [[ '${{ matrix.pytorch-version }}' == '2.1' ]]; then
source /opt/intel/oneapi/setvars.sh source /opt/intel/oneapi/setvars.sh
@ -371,6 +380,10 @@ jobs:
source /home/arda/intel/oneapi/setvars.sh source /home/arda/intel/oneapi/setvars.sh
fi fi
bash python/llm/test/run-llm-langchain-tests-gpu.sh bash python/llm/test/run-llm-langchain-tests-gpu.sh
pip install -U langchain
pip install -U langchain-community
bash python/llm/test/run-langchain-upstream-tests.sh
- name: Run LLM llamaindex GPU test - name: Run LLM llamaindex GPU test
shell: bash shell: bash

View file

@ -0,0 +1,23 @@
export ANALYTICS_ZOO_ROOT=${ANALYTICS_ZOO_ROOT}
export LLM_HOME=${ANALYTICS_ZOO_ROOT}/python/llm/src
export LLM_INFERENCE_TEST_DIR=${ANALYTICS_ZOO_ROOT}/python/llm/test/langchain_gpu
export VICUNA_7B_1_3_ORIGIN_PATH=${VICUNA_7B_1_3_ORIGIN_PATH}
set -e
rm -rf ${LLM_INFERENCE_TEST_DIR}/langchain_upstream
echo ">>> Testing LangChain upstream unit test"
mkdir ${LLM_INFERENCE_TEST_DIR}/langchain_upstream
wget https://raw.githubusercontent.com/langchain-ai/langchain/master/libs/community/tests/integration_tests/llms/test_bigdl.py -P ${LLM_INFERENCE_TEST_DIR}/langchain_upstream
sed -i "s,model_id=\"[^\"]*\",model_id=\"$VICUNA_7B_1_3_ORIGIN_PATH\",g" ${LLM_INFERENCE_TEST_DIR}/langchain_upstream/test_bigdl.py
python -m pytest -s ${LLM_INFERENCE_TEST_DIR}/langchain_upstream
echo ">>> Testing LangChain upstream ipynb"
wget https://raw.githubusercontent.com/langchain-ai/langchain/master/docs/docs/integrations/llms/bigdl.ipynb -P ${LLM_INFERENCE_TEST_DIR}/langchain_upstream
mv ${LLM_INFERENCE_TEST_DIR}/langchain_upstream/bigdl.ipynb ${LLM_INFERENCE_TEST_DIR}/langchain_upstream/langchain_example.ipynb
bash ./apps/ipynb2py.sh ${LLM_INFERENCE_TEST_DIR}/langchain_upstream/langchain_example
sed -i '/^get_ipython/d' ${LLM_INFERENCE_TEST_DIR}/langchain_upstream/langchain_example.py
sed -i "s,model_id=\"[^\"]*\",model_id=\"$VICUNA_7B_1_3_ORIGIN_PATH\",g" ${LLM_INFERENCE_TEST_DIR}/langchain_upstream/langchain_example.py
python ${LLM_INFERENCE_TEST_DIR}/langchain_upstream/langchain_example.py
rm -rf ${LLM_INFERENCE_TEST_DIR}/langchain_upstream

View file

@ -15,12 +15,6 @@ start=$(date "+%s")
python -m pytest -s ${LLM_INFERENCE_TEST_DIR} python -m pytest -s ${LLM_INFERENCE_TEST_DIR}
mkdir ${LLM_INFERENCE_TEST_DIR}/tmp_wget_dir
wget https://raw.githubusercontent.com/langchain-ai/langchain/master/libs/community/tests/integration_tests/llms/test_bigdl.py -P ${LLM_INFERENCE_TEST_DIR}/tmp_wget_dir
sed -i "s,model_id=\"[^\"]*\",model_id=\"$LLAMA2_7B_ORIGIN_PATH\",g" ${LLM_INFERENCE_TEST_DIR}/tmp_wget_dir/test_bigdl.py
python -m pytest -s ${LLM_INFERENCE_TEST_DIR}/tmp_wget_dir
rm -rf ${LLM_INFERENCE_TEST_DIR}/tmp_wget_dir
now=$(date "+%s") now=$(date "+%s")
time=$((now-start)) time=$((now-start))