[LLM] chatglm example and transformers low-bit examples (#8751)

This commit is contained in:
Song Jiaming 2023-08-16 11:41:44 +08:00 committed by GitHub
parent 8805186f2f
commit c1f9af6d97
3 changed files with 78 additions and 11 deletions

View file

@ -1,14 +1,17 @@
# LLAMA2 example test
if [ ! -d $ORIGINAL_LLAMA2_PATH ]; then
echo "Directory $ORIGINAL_LLAMA2_PATH not found. Downloading from FTP server..."
wget -r -nH --no-verbose --cut-dirs=1 $LLM_FTP_URL/${ORIGINAL_LLAMA2_PATH:2} -P $LLM_DIR
fi
if [ -z "$THREAD_NUM" ]; then if [ -z "$THREAD_NUM" ]; then
THREAD_NUM=2 THREAD_NUM=2
fi fi
export OMP_NUM_THREADS=$THREAD_NUM export OMP_NUM_THREADS=$THREAD_NUM
######## LLAMA2
# transformers
export ORIGINAL_LLAMA2_PATH=./llm/Llama-2-7b-chat-hf/
if [ ! -d $ORIGINAL_LLAMA2_PATH ]; then
echo "Directory $ORIGINAL_LLAMA2_PATH not found. Downloading from FTP server..."
wget -r -nH --no-verbose --cut-dirs=1 $LLM_FTP_URL/${ORIGINAL_LLAMA2_PATH:2} -P $LLM_DIR
fi
echo ">>> Testing LLAMA2 transformers API"
std=$(taskset -c 0-$((THREAD_NUM - 1)) python python/llm/example/transformers/transformers_int4/llama2/generate.py --repo-id-or-model-path $ORIGINAL_LLAMA2_PATH) std=$(taskset -c 0-$((THREAD_NUM - 1)) python python/llm/example/transformers/transformers_int4/llama2/generate.py --repo-id-or-model-path $ORIGINAL_LLAMA2_PATH)
echo "the output of the example is: " echo "the output of the example is: "
echo $std echo $std
@ -16,3 +19,67 @@ if [[ ! $std == *"AI is a term"* ]]; then
echo "The expected output is not met." echo "The expected output is not met."
return 1 return 1
fi fi
# transformers low-bit
echo ">>> Testing LLAMA2 transformers API sym_int4"
std=$(taskset -c 0-$((THREAD_NUM - 1)) python python/llm/example/transformers/transformers_low_bit/transformers_low_bit_pipeline.py --repo-id-or-model-path $ORIGINAL_LLAMA2_PATH)
echo "the output of the example is: "
echo $std
if [[ ! $std == *"But her parents were always telling her to stay close to home"* ]]; then
echo "The expected output is not met."
return 1
fi
echo ">>> Testing LLAMA2 transformers API sym_int5"
std=$(taskset -c 0-$((THREAD_NUM - 1)) python python/llm/example/transformers/transformers_low_bit/transformers_low_bit_pipeline.py --repo-id-or-model-path $ORIGINAL_LLAMA2_PATH --low-bit sym_int5)
echo "the output of the example is: "
echo $std
if [[ ! $std == *"She wanted to go to places and meet new people"* ]]; then
echo "The expected output is not met."
return 1
fi
echo ">>> Testing LLAMA2 transformers API sym_int8"
std=$(taskset -c 0-$((THREAD_NUM - 1)) python python/llm/example/transformers/transformers_low_bit/transformers_low_bit_pipeline.py --repo-id-or-model-path $ORIGINAL_LLAMA2_PATH --low-bit sym_int8)
echo "the output of the example is: "
echo $std
if [[ ! $std == *"She wanted to go to places and meet new people"* ]]; then
echo "The expected output is not met."
return 1
fi
echo ">>> Testing LLAMA2 transformers API asym_int4"
std=$(taskset -c 0-$((THREAD_NUM - 1)) python python/llm/example/transformers/transformers_low_bit/transformers_low_bit_pipeline.py --repo-id-or-model-path $ORIGINAL_LLAMA2_PATH --low-bit asym_int4)
echo "the output of the example is: "
echo $std
if [[ ! $std == *"She wanted to go to places and meet new people"* ]]; then
echo "The expected output is not met."
return 1
fi
echo ">>> Testing LLAMA2 transformers API asym_int5"
std=$(taskset -c 0-$((THREAD_NUM - 1)) python python/llm/example/transformers/transformers_low_bit/transformers_low_bit_pipeline.py --repo-id-or-model-path $ORIGINAL_LLAMA2_PATH --low-bit asym_int5)
echo "the output of the example is: "
echo $std
if [[ ! $std == *"She wanted to go to places and meet new people"* ]]; then
echo "The expected output is not met."
return 1
fi
########## ChatGLM2
export ORIGINAL_CHATGLM2_PATH=./llm/chatglm2-6b/
if [ ! -d $ORIGINAL_CHATGLM2_PATH ]; then
echo "Directory $ORIGINAL_CHATGLM2_PATH not found. Downloading from FTP server..."
wget -r -nH --no-verbose --cut-dirs=1 $LLM_FTP_URL/${ORIGINAL_CHATGLM2_PATH:2} -P $LLM_DIR
fi
echo ">>> Testing ChatGLM2 transformers API"
std=$(taskset -c 0-$((THREAD_NUM - 1)) python python/llm/example/transformers/transformers_int4/chatglm2/generate.py --repo-id-or-model-path $ORIGINAL_CHATGLM2_PATH)
echo "the output of the example is: "
echo $std
if [[ ! $std == *"AI指的是人工智能"* ]]; then
echo "The expected output is not met."
return 1
fi

View file

@ -13,7 +13,7 @@ pip install --pre --upgrade bigdl-llm[all]
## Run Example ## Run Example
```bash ```bash
python ./transformers_low_bit_pipeline.py --model-path decapoda-research/llama-7b-hf --low-bit sym_int5 --save-path ./llama-7b-sym_int5 python ./transformers_low_bit_pipeline.py --repo-id-or-model-path decapoda-research/llama-7b-hf --low-bit sym_int5 --save-path ./llama-7b-sym_int5
``` ```
arguments info: arguments info:
- `--repo-id-or-model-path`: str value, argument defining the huggingface repo id for the large language model to be downloaded, or the path to the huggingface checkpoint folder, the value is 'decapoda-research/llama-7b-hf' by default. - `--repo-id-or-model-path`: str value, argument defining the huggingface repo id for the large language model to be downloaded, or the path to the huggingface checkpoint folder, the value is 'decapoda-research/llama-7b-hf' by default.

View file

@ -20,7 +20,7 @@ from transformers import LlamaTokenizer, TextGenerationPipeline
if __name__ == '__main__': if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Transformer save_load example') parser = argparse.ArgumentParser(description='Transformer save_load example')
parser.add_argument('--model-path', type=str, default="decapoda-research/llama-7b-hf", parser.add_argument('--repo-id-or-model-path', type=str, default="decapoda-research/llama-7b-hf",
help='The huggingface repo id for the large language model to be downloaded' help='The huggingface repo id for the large language model to be downloaded'
', or the path to the huggingface checkpoint folder') ', or the path to the huggingface checkpoint folder')
parser.add_argument('--low-bit', type=str, default="sym_int4", parser.add_argument('--low-bit', type=str, default="sym_int4",
@ -31,7 +31,7 @@ if __name__ == '__main__':
parser.add_argument('--load-path', type=str, default=None, parser.add_argument('--load-path', type=str, default=None,
help='The path to load the low-bit model.') help='The path to load the low-bit model.')
args = parser.parse_args() args = parser.parse_args()
model_path = args.model_path model_path = args.repo_id_or_model_path
low_bit = args.low_bit low_bit = args.low_bit
load_path = args.load_path load_path = args.load_path
if load_path: if load_path:
@ -40,8 +40,8 @@ if __name__ == '__main__':
else: else:
# load_in_low_bit in bigdl.llm.transformers will convert # load_in_low_bit in bigdl.llm.transformers will convert
# the relevant layers in the model into corresponding int X format # the relevant layers in the model into corresponding int X format
model = AutoModelForCausalLM.from_pretrained(model_path, load_in_low_bit=low_bit) model = AutoModelForCausalLM.from_pretrained(model_path, load_in_low_bit=low_bit, trust_remote_code=True)
tokenizer = LlamaTokenizer.from_pretrained(model_path) tokenizer = LlamaTokenizer.from_pretrained(model_path, trust_remote_code=True)
pipeline = TextGenerationPipeline(model=model, tokenizer=tokenizer, max_new_tokens=32) pipeline = TextGenerationPipeline(model=model, tokenizer=tokenizer, max_new_tokens=32)
input_str = "Once upon a time, there existed a little girl who liked to have adventures. She wanted to go to places and meet new people, and have fun" input_str = "Once upon a time, there existed a little girl who liked to have adventures. She wanted to go to places and meet new people, and have fun"