From d638b93dfec79f656d8d1b92ca221c9b993eba0c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cheen=20Hau=2C=20=E4=BF=8A=E8=B1=AA?= <33478814+chtanch@users.noreply.github.com> Date: Wed, 1 Nov 2023 09:39:53 +0800 Subject: [PATCH] Add test script and workflow for qlora fine-tuning (#9295) * Add test script and workflow for qlora fine-tuning * Test fix export model * Download dataset * Fix export model issue * Reduce number of training steps * Rename script * Correction --- .github/workflows/llm_unit_tests.yml | 19 +++++++++++++- python/llm/test/run-llm-example-tests-gpu.sh | 26 ++++++++++++++++++++ 2 files changed, 44 insertions(+), 1 deletion(-) create mode 100644 python/llm/test/run-llm-example-tests-gpu.sh diff --git a/.github/workflows/llm_unit_tests.yml b/.github/workflows/llm_unit_tests.yml index 698f0720..bb0e9336 100644 --- a/.github/workflows/llm_unit_tests.yml +++ b/.github/workflows/llm_unit_tests.yml @@ -207,6 +207,9 @@ jobs: - name: Set environment variables shell: bash run: | + echo "DATASET_DIR=${ORIGIN_DIR}/../datasets" >> "$GITHUB_ENV" + echo "ABIRATE_ENGLISH_QUOTES_PATH=${ORIGIN_DIR}/../datasets/abirate_english_quotes" >> "$GITHUB_ENV" + echo "LLAMA2_7B_ORIGIN_PATH=${ORIGIN_DIR}/Llama-2-7b-chat-hf" >> "$GITHUB_ENV" echo "CHATGLM2_6B_ORIGIN_PATH=${ORIGIN_DIR}/chatglm2-6b" >> "$GITHUB_ENV" echo "FALCON_7B_ORIGIN_PATH=${ORIGIN_DIR}/falcon-7b-instruct-with-patch" >> "$GITHUB_ENV" @@ -241,7 +244,7 @@ jobs: source /opt/intel/oneapi/setvars.sh bash python/llm/test/run-llm-install-tests.sh - - name: Download LLMs + - name: Download LLMs and datasets shell: bash run: | if [ ! -d $LLAMA2_7B_ORIGIN_PATH ]; then @@ -260,6 +263,13 @@ jobs: echo "Directory $MPT_7B_ORIGIN_PATH not found. Downloading from FTP server..." wget -r -nH --no-verbose --cut-dirs=1 $LLM_FTP_URL/llm/mpt-7b-chat -P $ORIGIN_DIR fi + if [ ! -d $DATASET_DIR ]; then + mkdir -p $DATASET_DIR + fi + if [ ! -d $ABIRATE_ENGLISH_QUOTES_PATH ]; then + echo "Directory $ABIRATE_ENGLISH_QUOTES_PATH not found. Downloading from FTP server..." + wget -r -nH --no-verbose --cut-dirs=2 $LLM_FTP_URL/llm/datasets/abirate_english_quotes -P $DATASET_DIR + fi - name: Run LLM inference test shell: bash @@ -267,3 +277,10 @@ jobs: source /opt/intel/oneapi/setvars.sh python -m pip install expecttest einops bash python/llm/test/run-llm-inference-tests-gpu.sh + + - name: Run LLM example tests + shell: bash + run: | + python -m pip install transformers==4.34.0 peft==0.5.0 accelerate==0.23.0 + source /opt/intel/oneapi/setvars.sh + bash python/llm/test/run-llm-example-tests-gpu.sh \ No newline at end of file diff --git a/python/llm/test/run-llm-example-tests-gpu.sh b/python/llm/test/run-llm-example-tests-gpu.sh new file mode 100644 index 00000000..7271c432 --- /dev/null +++ b/python/llm/test/run-llm-example-tests-gpu.sh @@ -0,0 +1,26 @@ +#!/bin/bash + +export ANALYTICS_ZOO_ROOT=${ANALYTICS_ZOO_ROOT} + +set -e + +echo "# Start testing qlora fine-tuning" +start=$(date "+%s") + +sed -i 's/max_steps=200/max_steps=2/; s/save_steps=100/save_steps=2/; s/logging_steps=20/logging_steps=1/' \ + ${ANALYTICS_ZOO_ROOT}/python/llm/example/GPU/QLoRA-FineTuning/qlora_finetuning.py + +python ${ANALYTICS_ZOO_ROOT}/python/llm/example/GPU/QLoRA-FineTuning/qlora_finetuning.py \ +--repo-id-or-model-path ${LLAMA2_7B_ORIGIN_PATH} \ +--dataset ${ABIRATE_ENGLISH_QUOTES_PATH} + +python ${ANALYTICS_ZOO_ROOT}/python/llm/example/GPU/QLoRA-FineTuning/export_merged_model.py \ +--repo-id-or-model-path ${LLAMA2_7B_ORIGIN_PATH} \ +--adapter_path ${PWD}/outputs/checkpoint-2 \ +--output_path ${PWD}/outputs/checkpoint-2-merged + +now=$(date "+%s") +time=$((now-start)) + +echo "qlora fine-tuning test finished" +echo "Time used:$time seconds"