[LLM] chatglm example and transformers low-bit examples (#8751)
This commit is contained in:
parent
8805186f2f
commit
c1f9af6d97
3 changed files with 78 additions and 11 deletions
|
|
@ -1,14 +1,17 @@
|
||||||
# LLAMA2 example test
|
|
||||||
if [ ! -d $ORIGINAL_LLAMA2_PATH ]; then
|
|
||||||
echo "Directory $ORIGINAL_LLAMA2_PATH not found. Downloading from FTP server..."
|
|
||||||
wget -r -nH --no-verbose --cut-dirs=1 $LLM_FTP_URL/${ORIGINAL_LLAMA2_PATH:2} -P $LLM_DIR
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [ -z "$THREAD_NUM" ]; then
|
if [ -z "$THREAD_NUM" ]; then
|
||||||
THREAD_NUM=2
|
THREAD_NUM=2
|
||||||
fi
|
fi
|
||||||
export OMP_NUM_THREADS=$THREAD_NUM
|
export OMP_NUM_THREADS=$THREAD_NUM
|
||||||
|
|
||||||
|
######## LLAMA2
|
||||||
|
# transformers
|
||||||
|
export ORIGINAL_LLAMA2_PATH=./llm/Llama-2-7b-chat-hf/
|
||||||
|
if [ ! -d $ORIGINAL_LLAMA2_PATH ]; then
|
||||||
|
echo "Directory $ORIGINAL_LLAMA2_PATH not found. Downloading from FTP server..."
|
||||||
|
wget -r -nH --no-verbose --cut-dirs=1 $LLM_FTP_URL/${ORIGINAL_LLAMA2_PATH:2} -P $LLM_DIR
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo ">>> Testing LLAMA2 transformers API"
|
||||||
std=$(taskset -c 0-$((THREAD_NUM - 1)) python python/llm/example/transformers/transformers_int4/llama2/generate.py --repo-id-or-model-path $ORIGINAL_LLAMA2_PATH)
|
std=$(taskset -c 0-$((THREAD_NUM - 1)) python python/llm/example/transformers/transformers_int4/llama2/generate.py --repo-id-or-model-path $ORIGINAL_LLAMA2_PATH)
|
||||||
echo "the output of the example is: "
|
echo "the output of the example is: "
|
||||||
echo $std
|
echo $std
|
||||||
|
|
@ -16,3 +19,67 @@ if [[ ! $std == *"AI is a term"* ]]; then
|
||||||
echo "The expected output is not met."
|
echo "The expected output is not met."
|
||||||
return 1
|
return 1
|
||||||
fi
|
fi
|
||||||
|
# transformers low-bit
|
||||||
|
echo ">>> Testing LLAMA2 transformers API sym_int4"
|
||||||
|
std=$(taskset -c 0-$((THREAD_NUM - 1)) python python/llm/example/transformers/transformers_low_bit/transformers_low_bit_pipeline.py --repo-id-or-model-path $ORIGINAL_LLAMA2_PATH)
|
||||||
|
echo "the output of the example is: "
|
||||||
|
echo $std
|
||||||
|
if [[ ! $std == *"But her parents were always telling her to stay close to home"* ]]; then
|
||||||
|
echo "The expected output is not met."
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo ">>> Testing LLAMA2 transformers API sym_int5"
|
||||||
|
std=$(taskset -c 0-$((THREAD_NUM - 1)) python python/llm/example/transformers/transformers_low_bit/transformers_low_bit_pipeline.py --repo-id-or-model-path $ORIGINAL_LLAMA2_PATH --low-bit sym_int5)
|
||||||
|
echo "the output of the example is: "
|
||||||
|
echo $std
|
||||||
|
if [[ ! $std == *"She wanted to go to places and meet new people"* ]]; then
|
||||||
|
echo "The expected output is not met."
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
echo ">>> Testing LLAMA2 transformers API sym_int8"
|
||||||
|
std=$(taskset -c 0-$((THREAD_NUM - 1)) python python/llm/example/transformers/transformers_low_bit/transformers_low_bit_pipeline.py --repo-id-or-model-path $ORIGINAL_LLAMA2_PATH --low-bit sym_int8)
|
||||||
|
echo "the output of the example is: "
|
||||||
|
echo $std
|
||||||
|
if [[ ! $std == *"She wanted to go to places and meet new people"* ]]; then
|
||||||
|
echo "The expected output is not met."
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
echo ">>> Testing LLAMA2 transformers API asym_int4"
|
||||||
|
std=$(taskset -c 0-$((THREAD_NUM - 1)) python python/llm/example/transformers/transformers_low_bit/transformers_low_bit_pipeline.py --repo-id-or-model-path $ORIGINAL_LLAMA2_PATH --low-bit asym_int4)
|
||||||
|
echo "the output of the example is: "
|
||||||
|
echo $std
|
||||||
|
if [[ ! $std == *"She wanted to go to places and meet new people"* ]]; then
|
||||||
|
echo "The expected output is not met."
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo ">>> Testing LLAMA2 transformers API asym_int5"
|
||||||
|
std=$(taskset -c 0-$((THREAD_NUM - 1)) python python/llm/example/transformers/transformers_low_bit/transformers_low_bit_pipeline.py --repo-id-or-model-path $ORIGINAL_LLAMA2_PATH --low-bit asym_int5)
|
||||||
|
echo "the output of the example is: "
|
||||||
|
echo $std
|
||||||
|
if [[ ! $std == *"She wanted to go to places and meet new people"* ]]; then
|
||||||
|
echo "The expected output is not met."
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
|
||||||
|
########## ChatGLM2
|
||||||
|
export ORIGINAL_CHATGLM2_PATH=./llm/chatglm2-6b/
|
||||||
|
if [ ! -d $ORIGINAL_CHATGLM2_PATH ]; then
|
||||||
|
echo "Directory $ORIGINAL_CHATGLM2_PATH not found. Downloading from FTP server..."
|
||||||
|
wget -r -nH --no-verbose --cut-dirs=1 $LLM_FTP_URL/${ORIGINAL_CHATGLM2_PATH:2} -P $LLM_DIR
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo ">>> Testing ChatGLM2 transformers API"
|
||||||
|
std=$(taskset -c 0-$((THREAD_NUM - 1)) python python/llm/example/transformers/transformers_int4/chatglm2/generate.py --repo-id-or-model-path $ORIGINAL_CHATGLM2_PATH)
|
||||||
|
echo "the output of the example is: "
|
||||||
|
echo $std
|
||||||
|
if [[ ! $std == *"AI指的是人工智能"* ]]; then
|
||||||
|
echo "The expected output is not met."
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -13,7 +13,7 @@ pip install --pre --upgrade bigdl-llm[all]
|
||||||
|
|
||||||
## Run Example
|
## Run Example
|
||||||
```bash
|
```bash
|
||||||
python ./transformers_low_bit_pipeline.py --model-path decapoda-research/llama-7b-hf --low-bit sym_int5 --save-path ./llama-7b-sym_int5
|
python ./transformers_low_bit_pipeline.py --repo-id-or-model-path decapoda-research/llama-7b-hf --low-bit sym_int5 --save-path ./llama-7b-sym_int5
|
||||||
```
|
```
|
||||||
arguments info:
|
arguments info:
|
||||||
- `--repo-id-or-model-path`: str value, argument defining the huggingface repo id for the large language model to be downloaded, or the path to the huggingface checkpoint folder, the value is 'decapoda-research/llama-7b-hf' by default.
|
- `--repo-id-or-model-path`: str value, argument defining the huggingface repo id for the large language model to be downloaded, or the path to the huggingface checkpoint folder, the value is 'decapoda-research/llama-7b-hf' by default.
|
||||||
|
|
|
||||||
|
|
@ -20,7 +20,7 @@ from transformers import LlamaTokenizer, TextGenerationPipeline
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
parser = argparse.ArgumentParser(description='Transformer save_load example')
|
parser = argparse.ArgumentParser(description='Transformer save_load example')
|
||||||
parser.add_argument('--model-path', type=str, default="decapoda-research/llama-7b-hf",
|
parser.add_argument('--repo-id-or-model-path', type=str, default="decapoda-research/llama-7b-hf",
|
||||||
help='The huggingface repo id for the large language model to be downloaded'
|
help='The huggingface repo id for the large language model to be downloaded'
|
||||||
', or the path to the huggingface checkpoint folder')
|
', or the path to the huggingface checkpoint folder')
|
||||||
parser.add_argument('--low-bit', type=str, default="sym_int4",
|
parser.add_argument('--low-bit', type=str, default="sym_int4",
|
||||||
|
|
@ -31,7 +31,7 @@ if __name__ == '__main__':
|
||||||
parser.add_argument('--load-path', type=str, default=None,
|
parser.add_argument('--load-path', type=str, default=None,
|
||||||
help='The path to load the low-bit model.')
|
help='The path to load the low-bit model.')
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
model_path = args.model_path
|
model_path = args.repo_id_or_model_path
|
||||||
low_bit = args.low_bit
|
low_bit = args.low_bit
|
||||||
load_path = args.load_path
|
load_path = args.load_path
|
||||||
if load_path:
|
if load_path:
|
||||||
|
|
@ -40,8 +40,8 @@ if __name__ == '__main__':
|
||||||
else:
|
else:
|
||||||
# load_in_low_bit in bigdl.llm.transformers will convert
|
# load_in_low_bit in bigdl.llm.transformers will convert
|
||||||
# the relevant layers in the model into corresponding int X format
|
# the relevant layers in the model into corresponding int X format
|
||||||
model = AutoModelForCausalLM.from_pretrained(model_path, load_in_low_bit=low_bit)
|
model = AutoModelForCausalLM.from_pretrained(model_path, load_in_low_bit=low_bit, trust_remote_code=True)
|
||||||
tokenizer = LlamaTokenizer.from_pretrained(model_path)
|
tokenizer = LlamaTokenizer.from_pretrained(model_path, trust_remote_code=True)
|
||||||
|
|
||||||
pipeline = TextGenerationPipeline(model=model, tokenizer=tokenizer, max_new_tokens=32)
|
pipeline = TextGenerationPipeline(model=model, tokenizer=tokenizer, max_new_tokens=32)
|
||||||
input_str = "Once upon a time, there existed a little girl who liked to have adventures. She wanted to go to places and meet new people, and have fun"
|
input_str = "Once upon a time, there existed a little girl who liked to have adventures. She wanted to go to places and meet new people, and have fun"
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue