From 650b82fa6e24fbfa69f4513b0885ddac3a60cd78 Mon Sep 17 00:00:00 2001
From: Song Jiaming <litchy233@gmail.com>
Date: Tue, 25 Jul 2023 11:22:36 +0800
Subject: [PATCH] [LLM] add CausalLM and Speech UT (#8597)

---
 .github/workflows/llm_unit_tests_linux.yml    | 16 ++++++
 .../test/inference/test_transformers_api.py   | 53 ++++++++++++++++---
 python/llm/test/run-llm-inference-tests.sh    |  4 +-
 3 files changed, 65 insertions(+), 8 deletions(-)

diff --git a/.github/workflows/llm_unit_tests_linux.yml b/.github/workflows/llm_unit_tests_linux.yml
index 99dea304..f774c31b 100644
--- a/.github/workflows/llm_unit_tests_linux.yml
+++ b/.github/workflows/llm_unit_tests_linux.yml
@@ -43,6 +43,10 @@ env:
 
   LLM_DIR: ./llm
   ORIGINAL_CHATGLM2_6B_PATH: ./llm/chatglm2-6b/
+  ORIGINAL_REPLIT_CODE_PATH: ./llm/replit-code-v1-3b/
+  ORIGINAL_WHISPER_TINY_PATH: ./llm/whisper-tiny/
+  SPEECH_DATASET_PATH: ./llm/datasets/librispeech_asr_dummy
+
 
 # A workflow run is made up of one or more jobs that can run sequentially or in parallel
 jobs:
@@ -100,6 +104,18 @@ jobs:
             echo "Directory $ORIGINAL_CHATGLM2_6B_PATH not found. Downloading from FTP server..."
             wget -r -nH --no-verbose --cut-dirs=1 $LLM_FTP_URL/${ORIGINAL_CHATGLM2_6B_PATH:2} -P $LLM_DIR
           fi
+          if [ ! -d $ORIGINAL_REPLIT_CODE_PATH ]; then
+            echo "Directory $ORIGINAL_REPLIT_CODE_PATH not found. Downloading from FTP server..."
+            wget -r -nH --no-verbose --cut-dirs=1 $LLM_FTP_URL/${ORIGINAL_REPLIT_CODE_PATH:2} -P $LLM_DIR
+          fi
+          if [ ! -d $ORIGINAL_WHISPER_TINY_PATH ]; then
+            echo "Directory $ORIGINAL_WHISPER_TINY_PATH not found. Downloading from FTP server..."
+            wget -r -nH --no-verbose --cut-dirs=1 $LLM_FTP_URL/${ORIGINAL_WHISPER_TINY_PATH:2} -P $LLM_DIR
+          fi
+          if [ ! -d $SPEECH_DATASET_PATH ]; then
+            echo "Directory $SPEECH_DATASET_PATH not found. Downloading from FTP server..."
+            wget -r -nH --no-verbose --cut-dirs=1 $LLM_FTP_URL/${SPEECH_DATASET_PATH:2} -P $LLM_DIR
+          fi
 
       - name: Run LLM cli test
         uses: ./.github/actions/llm/cli-test
diff --git a/python/llm/test/inference/test_transformers_api.py b/python/llm/test/inference/test_transformers_api.py
index 9ddc888e..ce6619dd 100644
--- a/python/llm/test/inference/test_transformers_api.py
+++ b/python/llm/test/inference/test_transformers_api.py
@@ -17,10 +17,10 @@
 
 import unittest
 import os
-
+import pytest
 import time
 import torch
-from bigdl.llm.transformers import AutoModel
+from bigdl.llm.transformers import AutoModel, AutoModelForCausalLM, AutoModelForSpeechSeq2Seq
 from transformers import AutoTokenizer
 
 class TestTransformersAPI(unittest.TestCase):
@@ -32,11 +32,11 @@ class TestTransformersAPI(unittest.TestCase):
         else:
             self.n_threads = 2
 
-    def test_transformers_int4(self):
+    def test_transformers_auto_model_int4(self):
         model_path = os.environ.get('ORIGINAL_CHATGLM2_6B_PATH')
         model = AutoModel.from_pretrained(model_path, trust_remote_code=True, load_in_4bit=True)
         tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
-        input_str = "晚上睡不着应该怎么办"
+        input_str = "Tell me the capital of France.\n\n"
 
         with torch.inference_mode():
             st = time.time()
@@ -46,8 +46,49 @@ class TestTransformersAPI(unittest.TestCase):
             end = time.time()
         print('Prompt:', input_str)
         print('Output:', output_str)
-        print(f'Inference time: {end-st} s')    
+        print(f'Inference time: {end-st} s')
+        res = 'Paris' in output_str        
+        self.assertTrue(res)
 
+    def test_transformers_auto_model_for_causal_lm_int4(self):
+        model_path = os.environ.get('ORIGINAL_REPLIT_CODE_PATH')
+        tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
+        input_str = 'def hello():\n  print("hello world")\n'
+        model = AutoModelForCausalLM.from_pretrained(model_path, trust_remote_code=True, load_in_4bit=True)
+        with torch.inference_mode():
+            
+            st = time.time()
+            input_ids = tokenizer.encode(input_str, return_tensors="pt")
+            output = model.generate(input_ids, do_sample=False, max_new_tokens=32)
+            output_str = tokenizer.decode(output[0], skip_special_tokens=True)
+            end = time.time()
+        print('Prompt:', input_str)
+        print('Output:', output_str)
+        print(f'Inference time: {end-st} s')
+        res = '\nhello()' in output_str        
+        self.assertTrue(res)
+        
+
+    def test_transformers_auto_model_for_speech_seq2seq_int4(self):
+        from transformers import WhisperProcessor, WhisperForConditionalGeneration
+        from datasets import load_from_disk
+        model_path = os.environ.get('ORIGINAL_WHISPER_TINY_PATH')
+        dataset_path = os.environ.get('SPEECH_DATASET_PATH')
+        processor = WhisperProcessor.from_pretrained(model_path)
+        ds = load_from_disk(dataset_path)
+        sample = ds[0]["audio"]
+        input_features = processor(sample["array"], sampling_rate=sample["sampling_rate"], return_tensors="pt").input_features
+        model = AutoModelForSpeechSeq2Seq.from_pretrained(model_path, trust_remote_code=True, load_in_4bit=True)
+        with torch.inference_mode():
+            st = time.time()
+            predicted_ids = model.generate(input_features)
+            # decode token ids to text
+            transcription = processor.batch_decode(predicted_ids, skip_special_tokens=False)
+            end = time.time()        
+        print('Output:', transcription)
+        print(f'Inference time: {end-st} s')
+        res = 'Mr. Quilter is the apostle of the middle classes and we are glad to welcome his gospel.' in transcription[0]
+        self.assertTrue(res)
 
 if __name__ == '__main__':
-    unittest.main()
+    pytest.main([__file__])
diff --git a/python/llm/test/run-llm-inference-tests.sh b/python/llm/test/run-llm-inference-tests.sh
index 816101fd..13e5a3ae 100644
--- a/python/llm/test/run-llm-inference-tests.sh
+++ b/python/llm/test/run-llm-inference-tests.sh
@@ -9,13 +9,13 @@ set -e
 echo "# Start testing inference"
 start=$(date "+%s")
 
-python -m pytest -s ${LLM_INFERENCE_TEST_DIR} -k "not test_transformers_int4"
+python -m pytest -s ${LLM_INFERENCE_TEST_DIR} -k "not test_transformers"
 
 if [ -z "$THREAD_NUM" ]; then
   THREAD_NUM=2
 fi
 export OMP_NUM_THREADS=$THREAD_NUM
-taskset -c 0-$((THREAD_NUM - 1)) python -m pytest -s ${LLM_INFERENCE_TEST_DIR} -k test_transformers_int4
+taskset -c 0-$((THREAD_NUM - 1)) python -m pytest -s ${LLM_INFERENCE_TEST_DIR} -k test_transformers
 
 now=$(date "+%s")
 time=$((now-start))