update benchmark_utils readme (#8925)

* update readme

* meet code review
This commit is contained in:
Xin Qiu 2023-09-08 10:30:26 +08:00 committed by GitHub
parent ea6d4148e9
commit ea0853c0b5

View file

@ -1,24 +1,19 @@
# Benchmark tool for transformers int4 (separate 1st token and rest) # Benchmark tool for transformers int4 (separate 1st token and rest)
`benchmark_util.py` is used to provide a simple benchmark tool for transformer int4 model to calculate 1st token performance and the rest on CPU. `benchmark_util.py` is used to provide a simple benchmark tool for transformer int4 model to calculate 1st token performance and the rest on CPU and GPU.
`gpu_benchmark_util.py` is used to provide a simple benchmark tool for transformer int4 model to calculate 1st token performance and the rest on GPU.
## CPU Usage ## CPU Usage
Just put this file into your benchmark directory, and then wrap your transformer int4 model with `BenchmarkWrapper` (`model = BenchmarkWrapper(model)`). Just put this file into your benchmark directory, and then wrap your transformer int4 model with `BenchmarkWrapper` (`model = BenchmarkWrapper(model)`).
Take `chatglm-6b` as an example: Take `chatglm-6b` as an example:
```python ```python
import torch import torch
import os
from bigdl.llm.transformers import AutoModel from bigdl.llm.transformers import AutoModel
from transformers import AutoTokenizer from transformers import AutoTokenizer
import time
import numpy as np
from benchmark_util import BenchmarkWrapper from benchmark_util import BenchmarkWrapper
model_path ='THUDM/chatglm-6b' model_path ='THUDM/chatglm-6b'
model = AutoModel.from_pretrained(model_path, trust_remote_code=True, load_in_4bit=True) model = AutoModel.from_pretrained(model_path, trust_remote_code=True, load_in_4bit=True)
model = BenchmarkWrapper(model) model = BenchmarkWrapper(model, do_print=True)
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True) tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
prompt = "今天睡不着怎么办" prompt = "今天睡不着怎么办"
@ -38,18 +33,15 @@ Just put this file into your benchmark directory, and then wrap your transformer
Take `chatglm-6b` as an example: Take `chatglm-6b` as an example:
```python ```python
import torch import torch
import os
import intel_extension_for_pytorch as ipex import intel_extension_for_pytorch as ipex
from bigdl.llm.transformers import AutoModel from bigdl.llm.transformers import AutoModel
from transformers import AutoTokenizer from transformers import AutoTokenizer
import time from benchmark_util import BenchmarkWrapper
import numpy as np
from gpu_benchmark_util import BenchmarkWrapper
model_path ='THUDM/chatglm-6b' model_path ='THUDM/chatglm-6b'
model = AutoModel.from_pretrained(model_path, trust_remote_code=True, load_in_4bit=True) model = AutoModel.from_pretrained(model_path, trust_remote_code=True, load_in_4bit=True)
model = model.half().to('xpu') model = model.to('xpu')
model = BenchmarkWrapper(model) model = BenchmarkWrapper(model, do_print=True)
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True) tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
prompt = "今天睡不着怎么办" prompt = "今天睡不着怎么办"