update benchmark_utils readme (#8925)

* update readme

* meet code review
This commit is contained in:
Xin Qiu 2023-09-08 10:30:26 +08:00 committed by GitHub
parent ea6d4148e9
commit ea0853c0b5

View file

@ -1,24 +1,19 @@
# Benchmark tool for transformers int4 (separate 1st token and rest)
`benchmark_util.py` is used to provide a simple benchmark tool for transformer int4 model to calculate 1st token performance and the rest on CPU.
`gpu_benchmark_util.py` is used to provide a simple benchmark tool for transformer int4 model to calculate 1st token performance and the rest on GPU.
`benchmark_util.py` is used to provide a simple benchmark tool for transformer int4 model to calculate 1st token performance and the rest on CPU and GPU.
## CPU Usage
Just put this file into your benchmark directory, and then wrap your transformer int4 model with `BenchmarkWrapper` (`model = BenchmarkWrapper(model)`).
Take `chatglm-6b` as an example:
```python
import torch
import os
from bigdl.llm.transformers import AutoModel
from transformers import AutoTokenizer
import time
import numpy as np
from benchmark_util import BenchmarkWrapper
model_path ='THUDM/chatglm-6b'
model = AutoModel.from_pretrained(model_path, trust_remote_code=True, load_in_4bit=True)
model = BenchmarkWrapper(model)
model = BenchmarkWrapper(model, do_print=True)
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
prompt = "今天睡不着怎么办"
@ -38,18 +33,15 @@ Just put this file into your benchmark directory, and then wrap your transformer
Take `chatglm-6b` as an example:
```python
import torch
import os
import intel_extension_for_pytorch as ipex
from bigdl.llm.transformers import AutoModel
from transformers import AutoTokenizer
import time
import numpy as np
from gpu_benchmark_util import BenchmarkWrapper
from benchmark_util import BenchmarkWrapper
model_path ='THUDM/chatglm-6b'
model = AutoModel.from_pretrained(model_path, trust_remote_code=True, load_in_4bit=True)
model = model.half().to('xpu')
model = BenchmarkWrapper(model)
model = model.to('xpu')
model = BenchmarkWrapper(model, do_print=True)
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
prompt = "今天睡不着怎么办"