diff --git a/python/llm/dev/test/run-example-tests.sh b/python/llm/dev/test/run-example-tests.sh index 33cb6f91..26dad2cc 100644 --- a/python/llm/dev/test/run-example-tests.sh +++ b/python/llm/dev/test/run-example-tests.sh @@ -1,14 +1,17 @@ -# LLAMA2 example test -if [ ! -d $ORIGINAL_LLAMA2_PATH ]; then - echo "Directory $ORIGINAL_LLAMA2_PATH not found. Downloading from FTP server..." - wget -r -nH --no-verbose --cut-dirs=1 $LLM_FTP_URL/${ORIGINAL_LLAMA2_PATH:2} -P $LLM_DIR -fi - if [ -z "$THREAD_NUM" ]; then THREAD_NUM=2 fi export OMP_NUM_THREADS=$THREAD_NUM +######## LLAMA2 +# transformers +export ORIGINAL_LLAMA2_PATH=./llm/Llama-2-7b-chat-hf/ +if [ ! -d $ORIGINAL_LLAMA2_PATH ]; then + echo "Directory $ORIGINAL_LLAMA2_PATH not found. Downloading from FTP server..." + wget -r -nH --no-verbose --cut-dirs=1 $LLM_FTP_URL/${ORIGINAL_LLAMA2_PATH:2} -P $LLM_DIR +fi + +echo ">>> Testing LLAMA2 transformers API" std=$(taskset -c 0-$((THREAD_NUM - 1)) python python/llm/example/transformers/transformers_int4/llama2/generate.py --repo-id-or-model-path $ORIGINAL_LLAMA2_PATH) echo "the output of the example is: " echo $std @@ -16,3 +19,67 @@ if [[ ! $std == *"AI is a term"* ]]; then echo "The expected output is not met." return 1 fi +# transformers low-bit +echo ">>> Testing LLAMA2 transformers API sym_int4" +std=$(taskset -c 0-$((THREAD_NUM - 1)) python python/llm/example/transformers/transformers_low_bit/transformers_low_bit_pipeline.py --repo-id-or-model-path $ORIGINAL_LLAMA2_PATH) +echo "the output of the example is: " +echo $std +if [[ ! $std == *"But her parents were always telling her to stay close to home"* ]]; then + echo "The expected output is not met." + return 1 +fi + +echo ">>> Testing LLAMA2 transformers API sym_int5" +std=$(taskset -c 0-$((THREAD_NUM - 1)) python python/llm/example/transformers/transformers_low_bit/transformers_low_bit_pipeline.py --repo-id-or-model-path $ORIGINAL_LLAMA2_PATH --low-bit sym_int5) +echo "the output of the example is: " +echo $std +if [[ ! $std == *"She wanted to go to places and meet new people"* ]]; then + echo "The expected output is not met." + return 1 +fi +echo ">>> Testing LLAMA2 transformers API sym_int8" +std=$(taskset -c 0-$((THREAD_NUM - 1)) python python/llm/example/transformers/transformers_low_bit/transformers_low_bit_pipeline.py --repo-id-or-model-path $ORIGINAL_LLAMA2_PATH --low-bit sym_int8) +echo "the output of the example is: " +echo $std +if [[ ! $std == *"She wanted to go to places and meet new people"* ]]; then + echo "The expected output is not met." + return 1 +fi +echo ">>> Testing LLAMA2 transformers API asym_int4" +std=$(taskset -c 0-$((THREAD_NUM - 1)) python python/llm/example/transformers/transformers_low_bit/transformers_low_bit_pipeline.py --repo-id-or-model-path $ORIGINAL_LLAMA2_PATH --low-bit asym_int4) +echo "the output of the example is: " +echo $std +if [[ ! $std == *"She wanted to go to places and meet new people"* ]]; then + echo "The expected output is not met." + return 1 +fi + +echo ">>> Testing LLAMA2 transformers API asym_int5" +std=$(taskset -c 0-$((THREAD_NUM - 1)) python python/llm/example/transformers/transformers_low_bit/transformers_low_bit_pipeline.py --repo-id-or-model-path $ORIGINAL_LLAMA2_PATH --low-bit asym_int5) +echo "the output of the example is: " +echo $std +if [[ ! $std == *"She wanted to go to places and meet new people"* ]]; then + echo "The expected output is not met." + return 1 +fi + + +########## ChatGLM2 +export ORIGINAL_CHATGLM2_PATH=./llm/chatglm2-6b/ +if [ ! -d $ORIGINAL_CHATGLM2_PATH ]; then + echo "Directory $ORIGINAL_CHATGLM2_PATH not found. Downloading from FTP server..." + wget -r -nH --no-verbose --cut-dirs=1 $LLM_FTP_URL/${ORIGINAL_CHATGLM2_PATH:2} -P $LLM_DIR +fi + +echo ">>> Testing ChatGLM2 transformers API" +std=$(taskset -c 0-$((THREAD_NUM - 1)) python python/llm/example/transformers/transformers_int4/chatglm2/generate.py --repo-id-or-model-path $ORIGINAL_CHATGLM2_PATH) +echo "the output of the example is: " +echo $std +if [[ ! $std == *"AI指的是人工智能"* ]]; then + echo "The expected output is not met." + return 1 +fi + + + + diff --git a/python/llm/example/transformers/transformers_low_bit/README.md b/python/llm/example/transformers/transformers_low_bit/README.md index 46cd9406..6a992c85 100644 --- a/python/llm/example/transformers/transformers_low_bit/README.md +++ b/python/llm/example/transformers/transformers_low_bit/README.md @@ -13,7 +13,7 @@ pip install --pre --upgrade bigdl-llm[all] ## Run Example ```bash -python ./transformers_low_bit_pipeline.py --model-path decapoda-research/llama-7b-hf --low-bit sym_int5 --save-path ./llama-7b-sym_int5 +python ./transformers_low_bit_pipeline.py --repo-id-or-model-path decapoda-research/llama-7b-hf --low-bit sym_int5 --save-path ./llama-7b-sym_int5 ``` arguments info: - `--repo-id-or-model-path`: str value, argument defining the huggingface repo id for the large language model to be downloaded, or the path to the huggingface checkpoint folder, the value is 'decapoda-research/llama-7b-hf' by default. diff --git a/python/llm/example/transformers/transformers_low_bit/transformers_low_bit_pipeline.py b/python/llm/example/transformers/transformers_low_bit/transformers_low_bit_pipeline.py index d2ae7cab..9cf9cffb 100644 --- a/python/llm/example/transformers/transformers_low_bit/transformers_low_bit_pipeline.py +++ b/python/llm/example/transformers/transformers_low_bit/transformers_low_bit_pipeline.py @@ -20,7 +20,7 @@ from transformers import LlamaTokenizer, TextGenerationPipeline if __name__ == '__main__': parser = argparse.ArgumentParser(description='Transformer save_load example') - parser.add_argument('--model-path', type=str, default="decapoda-research/llama-7b-hf", + parser.add_argument('--repo-id-or-model-path', type=str, default="decapoda-research/llama-7b-hf", help='The huggingface repo id for the large language model to be downloaded' ', or the path to the huggingface checkpoint folder') parser.add_argument('--low-bit', type=str, default="sym_int4", @@ -31,7 +31,7 @@ if __name__ == '__main__': parser.add_argument('--load-path', type=str, default=None, help='The path to load the low-bit model.') args = parser.parse_args() - model_path = args.model_path + model_path = args.repo_id_or_model_path low_bit = args.low_bit load_path = args.load_path if load_path: @@ -40,8 +40,8 @@ if __name__ == '__main__': else: # load_in_low_bit in bigdl.llm.transformers will convert # the relevant layers in the model into corresponding int X format - model = AutoModelForCausalLM.from_pretrained(model_path, load_in_low_bit=low_bit) - tokenizer = LlamaTokenizer.from_pretrained(model_path) + model = AutoModelForCausalLM.from_pretrained(model_path, load_in_low_bit=low_bit, trust_remote_code=True) + tokenizer = LlamaTokenizer.from_pretrained(model_path, trust_remote_code=True) pipeline = TextGenerationPipeline(model=model, tokenizer=tokenizer, max_new_tokens=32) input_str = "Once upon a time, there existed a little girl who liked to have adventures. She wanted to go to places and meet new people, and have fun"