From 7e2742cace7bba496183c8b09f74a9ad530c4e3e Mon Sep 17 00:00:00 2001 From: Yishuo Wang Date: Thu, 8 Dec 2022 11:23:48 +0800 Subject: [PATCH] Nano: Update examples and tutorials (#6888) --- docs/readthedocs/source/doc/Nano/Howto/install_in_colab.md | 2 +- .../source/doc/Nano/QuickStart/pytorch_onnxruntime.md | 4 ++-- .../source/doc/Nano/QuickStart/pytorch_openvino.md | 4 ++-- .../source/doc/Nano/QuickStart/pytorch_quantization_inc.md | 7 ++++--- .../doc/Nano/QuickStart/pytorch_quantization_inc_onnx.md | 5 +++-- .../doc/Nano/QuickStart/pytorch_quantization_openvino.md | 3 ++- 6 files changed, 14 insertions(+), 11 deletions(-) diff --git a/docs/readthedocs/source/doc/Nano/Howto/install_in_colab.md b/docs/readthedocs/source/doc/Nano/Howto/install_in_colab.md index c1ce8bb1..caf59cbd 100644 --- a/docs/readthedocs/source/doc/Nano/Howto/install_in_colab.md +++ b/docs/readthedocs/source/doc/Nano/Howto/install_in_colab.md @@ -56,7 +56,7 @@ To enable OpenVINO acceleration, or use POT for quantization, you need to instal ```eval_rst .. note:: - If you meet ``ValueError: numpy.ndarray size changed, may indicate binary incompatibility. Expected 88 from C header, got 80 from PyObject`` when using ``Trainer.trace`` or ``Trainer.quantize`` function, you could try to solve it by upgrading ``numpy`` through: + If you meet ``ValueError: numpy.ndarray size changed, may indicate binary incompatibility. Expected 88 from C header, got 80 from PyObject`` when using ``InferenceOptimizer.trace`` or ``InferenceOptimizer.quantize`` function, you could try to solve it by upgrading ``numpy`` through: .. code-block:: python diff --git a/docs/readthedocs/source/doc/Nano/QuickStart/pytorch_onnxruntime.md b/docs/readthedocs/source/doc/Nano/QuickStart/pytorch_onnxruntime.md index 59ed3bbd..3f7a6b52 100644 --- a/docs/readthedocs/source/doc/Nano/QuickStart/pytorch_onnxruntime.md +++ b/docs/readthedocs/source/doc/Nano/QuickStart/pytorch_onnxruntime.md @@ -78,8 +78,8 @@ When you're ready, you can simply append the following part to enable your ONNXR # you have run `trainer.fit` before trace # Model has `example_input_array` set # Model is a LightningModule with any dataloader attached. -from bigdl.nano.pytorch import Trainer -ort_model = Trainer.trace(model_ft, accelerator="onnxruntime", input_sample=torch.rand(1, 3, 224, 224)) +from bigdl.nano.pytorch import InferenceOptimizer +ort_model = InferenceOptimizer.trace(model_ft, accelerator="onnxruntime", input_sample=torch.rand(1, 3, 224, 224)) # The usage is almost the same with any PyTorch module y_hat = ort_model(x) diff --git a/docs/readthedocs/source/doc/Nano/QuickStart/pytorch_openvino.md b/docs/readthedocs/source/doc/Nano/QuickStart/pytorch_openvino.md index 7163594e..894dd917 100644 --- a/docs/readthedocs/source/doc/Nano/QuickStart/pytorch_openvino.md +++ b/docs/readthedocs/source/doc/Nano/QuickStart/pytorch_openvino.md @@ -78,8 +78,8 @@ When you're ready, you can simply append the following part to enable your OpenV # The argument `input_sample` is not required in the following cases: # you have run `trainer.fit` before trace # The Model has `example_input_array` set -from bigdl.nano.pytorch import Trainer -ov_model = Trainer.trace(model_ft, accelerator="openvino", input_sample=torch.rand(1, 3, 224, 224)) +from bigdl.nano.pytorch import InferenceOptimizer +ov_model = InferenceOptimizer.trace(model_ft, accelerator="openvino", input_sample=torch.rand(1, 3, 224, 224)) # The usage is almost the same with any PyTorch module y_hat = ov_model(x) diff --git a/docs/readthedocs/source/doc/Nano/QuickStart/pytorch_quantization_inc.md b/docs/readthedocs/source/doc/Nano/QuickStart/pytorch_quantization_inc.md index ec4b695c..5d0d3864 100644 --- a/docs/readthedocs/source/doc/Nano/QuickStart/pytorch_quantization_inc.md +++ b/docs/readthedocs/source/doc/Nano/QuickStart/pytorch_quantization_inc.md @@ -74,12 +74,13 @@ y_hat.argmax(dim=1) ``` ### Step 3: Quantization using Intel Neural Compressor -Quantization is widely used to compress models to a lower precision, which not only reduces the model size but also accelerates inference. BigDL-Nano provides `Trainer.quantize()` API for users to quickly obtain a quantized model with accuracy control by specifying a few arguments. +Quantization is widely used to compress models to a lower precision, which not only reduces the model size but also accelerates inference. BigDL-Nano provides `InferenceOptimizer.quantize()` API for users to quickly obtain a quantized model with accuracy control by specifying a few arguments. -Without extra accelerator, `Trainer.quantize()` returns a pytorch module with desired precision and accuracy. You can add quantization as below: +Without extra accelerator, `InferenceOptimizer.quantize()` returns a pytorch module with desired precision and accuracy. You can add quantization as below: ```python +from bigdl.nano.pytorch import InferenceOptimizer from torchmetrics.functional import accuracy -q_model = trainer.quantize(model, calib_dataloader=train_dataloader, metric=accuracy) +q_model = InferenceOptimizer.quantize(model, calib_data=train_dataloader, metric=accuracy) # run simple prediction y_hat = q_model(x) diff --git a/docs/readthedocs/source/doc/Nano/QuickStart/pytorch_quantization_inc_onnx.md b/docs/readthedocs/source/doc/Nano/QuickStart/pytorch_quantization_inc_onnx.md index d6fb3767..8ba8a04b 100644 --- a/docs/readthedocs/source/doc/Nano/QuickStart/pytorch_quantization_inc_onnx.md +++ b/docs/readthedocs/source/doc/Nano/QuickStart/pytorch_quantization_inc_onnx.md @@ -73,12 +73,13 @@ y_hat.argmax(dim=1) ``` ### Step 3: Quantization with ONNXRuntime accelerator -With the ONNXRuntime accelerator, `Trainer.quantize()` will return a model with compressed precision but running inference in the ONNXRuntime engine. +With the ONNXRuntime accelerator, `InferenceOptimizer.quantize()` will return a model with compressed precision but running inference in the ONNXRuntime engine. you can add quantization as below: ```python +from bigdl.nano.pytorch import InferenceOptimizer from torchmetrics.functional import accuracy -ort_q_model = trainer.quantize(model, accelerator='onnxruntime', calib_dataloader=train_dataloader, metric=accuracy) +ort_q_model = InferenceOptimizer.quantize(model, accelerator='onnxruntime', calib_data=train_dataloader, metric=accuracy) # run simple prediction y_hat = ort_q_model(x) diff --git a/docs/readthedocs/source/doc/Nano/QuickStart/pytorch_quantization_openvino.md b/docs/readthedocs/source/doc/Nano/QuickStart/pytorch_quantization_openvino.md index b14f686c..6d3de952 100644 --- a/docs/readthedocs/source/doc/Nano/QuickStart/pytorch_quantization_openvino.md +++ b/docs/readthedocs/source/doc/Nano/QuickStart/pytorch_quantization_openvino.md @@ -76,8 +76,9 @@ y_hat.argmax(dim=1) ### Step 3: Quantization using Post-training Optimization Tools Accelerator='openvino' means using OpenVINO POT to do quantization. The quantization can be added as below: ```python +from bigdl.nano.pytorch import InferenceOptimizer from torchmetrics import Accuracy -ov_q_model = trainer.quantize(model, accelerator="openvino", calib_dataloader=data_loader) +ov_q_model = InferenceOptimizer.quantize(model, accelerator="openvino", calib_data=data_loader) # run simple prediction batch = torch.stack([data_set[0][0], data_set[1][0]])