update benchmark_utils readme (#8925)
* update readme * meet code review
This commit is contained in:
parent
ea6d4148e9
commit
ea0853c0b5
1 changed files with 5 additions and 13 deletions
|
|
@ -1,24 +1,19 @@
|
||||||
# Benchmark tool for transformers int4 (separate 1st token and rest)
|
# Benchmark tool for transformers int4 (separate 1st token and rest)
|
||||||
|
|
||||||
`benchmark_util.py` is used to provide a simple benchmark tool for transformer int4 model to calculate 1st token performance and the rest on CPU.
|
`benchmark_util.py` is used to provide a simple benchmark tool for transformer int4 model to calculate 1st token performance and the rest on CPU and GPU.
|
||||||
|
|
||||||
`gpu_benchmark_util.py` is used to provide a simple benchmark tool for transformer int4 model to calculate 1st token performance and the rest on GPU.
|
|
||||||
|
|
||||||
## CPU Usage
|
## CPU Usage
|
||||||
Just put this file into your benchmark directory, and then wrap your transformer int4 model with `BenchmarkWrapper` (`model = BenchmarkWrapper(model)`).
|
Just put this file into your benchmark directory, and then wrap your transformer int4 model with `BenchmarkWrapper` (`model = BenchmarkWrapper(model)`).
|
||||||
Take `chatglm-6b` as an example:
|
Take `chatglm-6b` as an example:
|
||||||
```python
|
```python
|
||||||
import torch
|
import torch
|
||||||
import os
|
|
||||||
from bigdl.llm.transformers import AutoModel
|
from bigdl.llm.transformers import AutoModel
|
||||||
from transformers import AutoTokenizer
|
from transformers import AutoTokenizer
|
||||||
import time
|
|
||||||
import numpy as np
|
|
||||||
from benchmark_util import BenchmarkWrapper
|
from benchmark_util import BenchmarkWrapper
|
||||||
|
|
||||||
model_path ='THUDM/chatglm-6b'
|
model_path ='THUDM/chatglm-6b'
|
||||||
model = AutoModel.from_pretrained(model_path, trust_remote_code=True, load_in_4bit=True)
|
model = AutoModel.from_pretrained(model_path, trust_remote_code=True, load_in_4bit=True)
|
||||||
model = BenchmarkWrapper(model)
|
model = BenchmarkWrapper(model, do_print=True)
|
||||||
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
|
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
|
||||||
prompt = "今天睡不着怎么办"
|
prompt = "今天睡不着怎么办"
|
||||||
|
|
||||||
|
|
@ -38,18 +33,15 @@ Just put this file into your benchmark directory, and then wrap your transformer
|
||||||
Take `chatglm-6b` as an example:
|
Take `chatglm-6b` as an example:
|
||||||
```python
|
```python
|
||||||
import torch
|
import torch
|
||||||
import os
|
|
||||||
import intel_extension_for_pytorch as ipex
|
import intel_extension_for_pytorch as ipex
|
||||||
from bigdl.llm.transformers import AutoModel
|
from bigdl.llm.transformers import AutoModel
|
||||||
from transformers import AutoTokenizer
|
from transformers import AutoTokenizer
|
||||||
import time
|
from benchmark_util import BenchmarkWrapper
|
||||||
import numpy as np
|
|
||||||
from gpu_benchmark_util import BenchmarkWrapper
|
|
||||||
|
|
||||||
model_path ='THUDM/chatglm-6b'
|
model_path ='THUDM/chatglm-6b'
|
||||||
model = AutoModel.from_pretrained(model_path, trust_remote_code=True, load_in_4bit=True)
|
model = AutoModel.from_pretrained(model_path, trust_remote_code=True, load_in_4bit=True)
|
||||||
model = model.half().to('xpu')
|
model = model.to('xpu')
|
||||||
model = BenchmarkWrapper(model)
|
model = BenchmarkWrapper(model, do_print=True)
|
||||||
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
|
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
|
||||||
prompt = "今天睡不着怎么办"
|
prompt = "今天睡不着怎么办"
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue