From ea0853c0b5e2f13e6ff5c550eee60fbb15d315bc Mon Sep 17 00:00:00 2001 From: Xin Qiu Date: Fri, 8 Sep 2023 10:30:26 +0800 Subject: [PATCH] update benchmark_utils readme (#8925) * update readme * meet code review --- python/llm/dev/benchmark/README.md | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) diff --git a/python/llm/dev/benchmark/README.md b/python/llm/dev/benchmark/README.md index 2cdba0fc..89c2fa6c 100644 --- a/python/llm/dev/benchmark/README.md +++ b/python/llm/dev/benchmark/README.md @@ -1,24 +1,19 @@ # Benchmark tool for transformers int4 (separate 1st token and rest) -`benchmark_util.py` is used to provide a simple benchmark tool for transformer int4 model to calculate 1st token performance and the rest on CPU. - -`gpu_benchmark_util.py` is used to provide a simple benchmark tool for transformer int4 model to calculate 1st token performance and the rest on GPU. +`benchmark_util.py` is used to provide a simple benchmark tool for transformer int4 model to calculate 1st token performance and the rest on CPU and GPU. ## CPU Usage Just put this file into your benchmark directory, and then wrap your transformer int4 model with `BenchmarkWrapper` (`model = BenchmarkWrapper(model)`). Take `chatglm-6b` as an example: ```python import torch -import os from bigdl.llm.transformers import AutoModel from transformers import AutoTokenizer -import time -import numpy as np from benchmark_util import BenchmarkWrapper model_path ='THUDM/chatglm-6b' model = AutoModel.from_pretrained(model_path, trust_remote_code=True, load_in_4bit=True) -model = BenchmarkWrapper(model) +model = BenchmarkWrapper(model, do_print=True) tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True) prompt = "今天睡不着怎么办" @@ -38,18 +33,15 @@ Just put this file into your benchmark directory, and then wrap your transformer Take `chatglm-6b` as an example: ```python import torch -import os import intel_extension_for_pytorch as ipex from bigdl.llm.transformers import AutoModel from transformers import AutoTokenizer -import time -import numpy as np -from gpu_benchmark_util import BenchmarkWrapper +from benchmark_util import BenchmarkWrapper model_path ='THUDM/chatglm-6b' model = AutoModel.from_pretrained(model_path, trust_remote_code=True, load_in_4bit=True) -model = model.half().to('xpu') -model = BenchmarkWrapper(model) +model = model.to('xpu') +model = BenchmarkWrapper(model, do_print=True) tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True) prompt = "今天睡不着怎么办"