From 5506f60732120a22199f27da98dde6ec4b64ca9c Mon Sep 17 00:00:00 2001 From: Mingzhi Hu <49382651+y199387@users.noreply.github.com> Date: Tue, 9 Aug 2022 09:03:45 +0800 Subject: [PATCH] Nano: Add TensorFlow step by step tutorials (#5156) * Add tutorial notebook * Add md * Test on readthedocs * Fix markdown * fix md * update notebooks * update requirements version in doc * update * add and update tutorial * add unit test for tensorflow tutorial * reduce test time * reduce test time * update shell * update action * Update tutorial * reduce ut time * reduce ut time * reduce ut time * reduce ut time * reduce ut time * Update * Fix shell * update * update * rollback requirements * Update * Update Co-authored-by: pinggao187 --- docs/readthedocs/requirements-doc.txt | 2 +- .../source/doc/Nano/QuickStart/index.md | 29 ++++ .../Nano/QuickStart/tensorflow_embedding.md | 130 ++++++++++++++++++ .../tensorflow_quantization_quickstart.md | 88 ++++++++++++ .../QuickStart/tensorflow_train_quickstart.md | 130 ++++++++++++++++++ 5 files changed, 378 insertions(+), 1 deletion(-) create mode 100644 docs/readthedocs/source/doc/Nano/QuickStart/tensorflow_embedding.md create mode 100644 docs/readthedocs/source/doc/Nano/QuickStart/tensorflow_quantization_quickstart.md create mode 100644 docs/readthedocs/source/doc/Nano/QuickStart/tensorflow_train_quickstart.md diff --git a/docs/readthedocs/requirements-doc.txt b/docs/readthedocs/requirements-doc.txt index 8c76e926..44e3a367 100644 --- a/docs/readthedocs/requirements-doc.txt +++ b/docs/readthedocs/requirements-doc.txt @@ -37,4 +37,4 @@ optuna==2.10.0 ConfigSpace==0.5.0 sphinx-design==0.2.0 sphinx-external-toc==0.3.0 -nbsphinx==0.8.9 \ No newline at end of file +nbsphinx==0.8.9 diff --git a/docs/readthedocs/source/doc/Nano/QuickStart/index.md b/docs/readthedocs/source/doc/Nano/QuickStart/index.md index 7417a562..110dbd92 100644 --- a/docs/readthedocs/source/doc/Nano/QuickStart/index.md +++ b/docs/readthedocs/source/doc/Nano/QuickStart/index.md @@ -15,6 +15,13 @@ --------------------------- +- [**BigDL-Nano TensorFlow Training Quickstart**](./tensorflow_train_quickstart.html) + + > ![](../../../../image/GitHub-Mark-32px.png)[View source on GitHub][Nano_tensorflow_training] + + In this guide we will describe how to accelerate TensorFlow Keras applications on training workloads with BigDL-Nano + +--------------------------- - [**BigDL-Nano PyTorch ONNXRuntime Acceleration Quickstart**](./pytorch_onnxruntime.html) > ![](../../../../image/GitHub-Mark-32px.png)[View source on GitHub][Nano_pytorch_onnxruntime] @@ -56,8 +63,27 @@ --------------------------- +- [**BigDL-Nano TensorFlow Quantization with INC Quickstart**](./tensorflow_quantization_quickstart.html) + > ![](../../../../image/GitHub-Mark-32px.png)[View source on GitHub][Nano_tensorflow_quantization_inc] + + In this guide we will demonstrates how to apply Post-training quantization on a keras model with BigDL-Nano. + + +--------------------------- + +- [**BigDL-Nano TensorFlow SparseEmbedding and SparseAdam**](./tensorflow_embedding.html) + + > ![](../../../../image/GitHub-Mark-32px.png)[View source on GitHub][Nano_tensorflow_embedding] + + In this guide we demonstrates how to use SparseEmbedding and SparseAdam to obtain stroger performance with sparse gradient + + +------------------------- + + - [**BigDL-Nano Hyperparameter Tuning (Tensorflow Sequential/Functional API) Quickstart**](../Tutorials/seq_and_func.html) + > ![](../../../../image/colab_logo_32px.png)[Run in Google Colab][Nano_hpo_tf_seq_func_colab]  ![](../../../../image/GitHub-Mark-32px.png)[View source on GitHub][Nano_hpo_tf_seq_func] @@ -75,11 +101,14 @@ [Nano_pytorch_training]: [Nano_pytorch_nano]: +[Nano_tensorflow_training]: [Nano_pytorch_onnxruntime]: [Nano_pytorch_openvino]: [Nano_pytorch_Quantization_inc]: [Nano_pytorch_quantization_inc_onnx]: [Nano_pytorch_quantization_openvino]: +[Nano_tensorflow_quantization_inc]: +[Nano_tensorflow_embedding]: [Nano_hpo_tf_seq_func]: [Nano_hpo_tf_seq_func_colab]: [Nano_hpo_tf_subclassing]: diff --git a/docs/readthedocs/source/doc/Nano/QuickStart/tensorflow_embedding.md b/docs/readthedocs/source/doc/Nano/QuickStart/tensorflow_embedding.md new file mode 100644 index 00000000..a9659cc6 --- /dev/null +++ b/docs/readthedocs/source/doc/Nano/QuickStart/tensorflow_embedding.md @@ -0,0 +1,130 @@ +# BigDL-Nano TensorFlow SparseEmbedding and SparseAdam +**In this guide we demonstrates how to use `SparseEmbedding` and `SparseAdam` to obtain stroger performance with sparse gradient.** + +### **Step 0: Prepare Environment** + +We recommend using [conda](https://docs.conda.io/projects/conda/en/latest/user-guide/install/) to prepare the environment. Please refer to the [install guide](../../UserGuide/python.md) for more details. + +```bash +conda create py37 python==3.7.10 setuptools==58.0.4 +conda activate py37 +# nightly bulit version +pip install --pre --upgrade bigdl-nano[tensorflow] +# set env variables for your conda environment +source bigdl-nano-init +pip install tensorflow-datasets +``` + +### **Step 1: Import BigDL-Nano** +The optimizations in BigDL-Nano are delivered through BigDL-Nano’s `Model` and `Sequential` classes. For most cases, you can just replace your `tf.keras.Model` to `bigdl.nano.tf.keras.Model` and `tf.keras.Sequential` to `bigdl.nano.tf.keras.Sequential` to benefits from BigDL-Nano. +```python +from bigdl.nano.tf.keras import Model, Sequential +``` + +### **Step 2: Load the data** +We demonstrate with imdb_reviews, a large movie review dataset. +```python +import tensorflow_datasets as tfds +(raw_train_ds, raw_val_ds, raw_test_ds), info = tfds.load( + "imdb_reviews", + split=['train[:80%]', 'train[80%:]', 'test'], + as_supervised=True, + batch_size=32, + shuffle_files=False, + with_info=True +) +``` + +### **Step 3: Parepre the Data** +In particular, we remove
tags. +```python +import tensorflow as tf +from tensorflow.keras.layers import TextVectorization +import string +import re + +def custom_standardization(input_data): + lowercase = tf.strings.lower(input_data) + stripped_html = tf.strings.regex_replace(lowercase, "
", " ") + return tf.strings.regex_replace( + stripped_html, f"[{re.escape(string.punctuation)}]", "" + ) + +max_features = 20000 +embedding_dim = 128 +sequence_length = 500 + +vectorize_layer = TextVectorization( + standardize=custom_standardization, + max_tokens=max_features, + output_mode="int", + output_sequence_length=sequence_length, +) + +# Let's make a text-only dataset (no labels): +text_ds = raw_train_ds.map(lambda x, y: x) +# Let's call `adapt`: +vectorize_layer.adapt(text_ds) + +def vectorize_text(text, label): + text = tf.expand_dims(text, -1) + return vectorize_layer(text), label + + +# Vectorize the data. +train_ds = raw_train_ds.map(vectorize_text) +val_ds = raw_val_ds.map(vectorize_text) +test_ds = raw_test_ds.map(vectorize_text) + +# Do async prefetching / buffering of the data for best performance on GPU. +train_ds = train_ds.cache().prefetch(buffer_size=10) +val_ds = val_ds.cache().prefetch(buffer_size=10) +test_ds = test_ds.cache().prefetch(buffer_size=10) +``` + +### **Step 4: Build Model** +`bigdl.nano.tf.keras.Embedding` is a slightly modified version of `tf.keras.Embedding` layer, this embedding layer only applies regularizer to the output of the embedding layer, so that the gradient to embeddings is sparse. `bigdl.nano.tf.optimzers.Adam` is a variant of the `Adam` optimizer that handles sparse updates more efficiently. +Here we create two models, one using normal Embedding layer and Adam optimizer, the other using `SparseEmbedding` and `SparseAdam`. +```python +from tensorflow.keras import layers +from bigdl.nano.tf.keras.layers import Embedding +from bigdl.nano.tf.optimizers import SparseAdam + +from tensorflow.keras import layers +from bigdl.nano.tf.keras.layers import Embedding +from bigdl.nano.tf.optimizers import SparseAdam + +inputs = tf.keras.Input(shape=(None,), dtype="int64") + +# Embedding layer can only be used as the first layer in a model, +# you need to provide the argument inputShape (a Single Shape, does not include the batch dimension). +x = Embedding(max_features, embedding_dim)(inputs) +x = layers.Dropout(0.5)(x) + +# Conv1D + global max pooling +x = layers.Conv1D(128, 7, padding="valid", activation="relu", strides=3)(x) +x = layers.Conv1D(128, 7, padding="valid", activation="relu", strides=3)(x) +x = layers.GlobalMaxPooling1D()(x) + +# We add a vanilla hidden layer: +x = layers.Dense(128, activation="relu")(x) +x = layers.Dropout(0.5)(x) + +# We project onto a single unit output layer, and squash it with a sigmoid: +predictions = layers.Dense(1, activation="sigmoid", name="predictions")(x) + +model = Model(inputs, predictions) + +# Compile the model with binary crossentropy loss and an SparseAdam optimizer. +model.compile(loss="binary_crossentropy", optimizer=SparseAdam(), metrics=["accuracy"]) +``` + +### **Step 5: Training** +```python +# Fit the model using the train and val datasets. +model.fit(train_ds, validation_data=val_ds, epochs=3) + +model.evaluate(test_ds) +``` + +You can find the detailed result of training from [here](https://github.com/intel-analytics/BigDL/blob/main/python/nano/notebooks/tensorflow/tutorial/tensorflow_embedding.ipynb) \ No newline at end of file diff --git a/docs/readthedocs/source/doc/Nano/QuickStart/tensorflow_quantization_quickstart.md b/docs/readthedocs/source/doc/Nano/QuickStart/tensorflow_quantization_quickstart.md new file mode 100644 index 00000000..f94182e0 --- /dev/null +++ b/docs/readthedocs/source/doc/Nano/QuickStart/tensorflow_quantization_quickstart.md @@ -0,0 +1,88 @@ +## BigDL-Nano TensorFLow Quantization Quickstart +**In this guide we will demonstrates how to apply post-training quantization on a keras model with BigDL-Nano in 4 simple steps.** + +### **Step 0: Prepare Environment** + +We recommend using [conda](https://docs.conda.io/projects/conda/en/latest/user-guide/install/) to prepare the environment. Please refer to the [install guide](../../UserGuide/python.md) for more details. + +```bash +conda create py37 python==3.7.10 setuptools==58.0.4 +conda activate py37 +# nightly bulit version +pip install --pre --upgrade bigdl-nano[tensorflow] +# set env variables for your conda environment +source bigdl-nano-init +``` + +By default, [Intel Neural Compressor](https://github.com/intel/neural-compressor) is not installed with BigDL-Nano. So if you determine to use it as your quantization backend, you'll need to install it first: +```bash +pip install neural-compressor==1.11.0 +``` + +BigDL-Nano provides several APIs which can help users easily apply optimizations on inference pipelines to improve latency and throughput. The Keras Model(`bigdl.nano.tf.keras.Model`) and Sequential(`bigdl.nano.tf.keras.Sequential`) provides the APIs for all optimizations you need for inference. + +```python +from bigdl.nano.tf.keras import Model, Sequential +``` + +### Step 1: Loading Data + +Here we load data from tensorflow_datasets. The [Imagenette](https://github.com/fastai/imagenette) is a subset of 10 easily classified classes from the Imagenet dataset. + +```python +import tensorflow_datasets as tfds +DATANAME = 'imagenette/320px-v2' +(train_ds, test_ds), info = tfds.load(DATANAME, data_dir='../data/', + split=['train', 'validation'], + with_info=True, + as_supervised=True) +``` + +#### Prepare Inputs +Here we resize the input image to uniform `IMG_SIZE` and the labels are put into one_hot. + +```python +import tensorflow as tf +img_size = 224 +num_classes = info.features['label'].num_classes +train_ds = train_ds.map(lambda img, label: (tf.image.resize(img, (img_size, img_size)), tf.one_hot(label, num_classes))).batch(32) +test_ds = test_ds.map(lambda img, label: (tf.image.resize(img, (img_size, img_size)), tf.one_hot(label, num_classes))).batch(32) +``` + +### Step 2: Build Model +Here we initialize the ResNet50 from `tf.keras.applications` with pre-trained ImageNet weights. +```python +from tensorflow.keras.applications import ResNet50 +from tensorflow.keras import layers +inputs = tf.keras.layers.Input(shape=(224, 224, 3)) +x = tf.cast(inputs, tf.float32) +x = tf.keras.applications.resnet50.preprocess_input(x) +backbone = ResNet50(weights='imagenet') +backbone.trainable = False +x = backbone(x) +x = layers.Dense(512, activation='relu')(x) +outputs = layers.Dense(num_classes, activation='softmax')(x) + +model = Model(inputs=inputs, outputs=outputs) +model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=['accuracy']) + +# fit +model.fit(train_ds, epochs=1) +``` + +### Step 3: Quantization with Intel Neural Compressor +[`Model.quantize()`](https://bigdl.readthedocs.io/en/latest/doc/PythonAPI/Nano/tensorflow.html#bigdl.nano.tf.keras.Model) return a Keras module with desired precision and accuracy. Taking Resnet50 as an example, you can add quantization as below. + +```python +from tensorflow.keras.metrics import CategoricalAccuracy +q_model = model.quantize(calib_dataset=dataset, + metric=CategoricalAccuracy(), + tuning_strategy='basic' + ) +``` +The quantized model can be called to do inference as normal keras model. +```python +# run simple prediction with transparent acceleration +for img, _ in dataset: + q_model(img) +``` diff --git a/docs/readthedocs/source/doc/Nano/QuickStart/tensorflow_train_quickstart.md b/docs/readthedocs/source/doc/Nano/QuickStart/tensorflow_train_quickstart.md new file mode 100644 index 00000000..b681e236 --- /dev/null +++ b/docs/readthedocs/source/doc/Nano/QuickStart/tensorflow_train_quickstart.md @@ -0,0 +1,130 @@ +# BigDL-Nano TensorFlow Training Quickstart +**In this guide we will describe how to accelerate TensorFlow Keras application on training workloads using BigDL-Nano in 5 simple steps** + +### **Step 0: Prepare Environment** + +We recommend using [conda](https://docs.conda.io/projects/conda/en/latest/user-guide/install/) to prepare the environment. Please refer to the [install guide](../../UserGuide/python.md) for more details. + +```bash +conda create py37 python==3.7.10 setuptools==58.0.4 +conda activate py37 +# nightly bulit version +pip install --pre --upgrade bigdl-nano[tensorflow] +# set env variables for your conda environment +source bigdl-nano-init +pip install tensorflow-datasets +``` + +### **Step 1: Import BigDL-Nano** +The optimizations in BigDL-Nano are delivered through BigDL-Nano’s `Model` and `Sequential` classes. For most cases, you can just replace your `tf.keras.Model` to `bigdl.nano.tf.keras.Model` and `tf.keras.Sequential` to `bigdl.nano.tf.keras.Sequential` to benefits from BigDL-Nano. +```python +from bigdl.nano.tf.keras import Model, Sequential +``` + +### **Step 2: Load the Data** +Here we load data from tensorflow_datasets(hereafter [TFDS](https://www.tensorflow.org/datasets)). The [Stanford Dogs](http://vision.stanford.edu/aditya86/ImageNetDogs/main.html) dataset contains images of 120 breeds of dogs around the world. There are 20,580 images, out of which 12,000 are used for training and 8580 for testing. +```python +import tensorflow_datasets as tfds +(ds_train, ds_test), ds_info = tfds.load( + "stanford_dogs", + data_dir="../data/", + split=['train', 'test'], + with_info=True, + as_supervised=True +) +``` +#### Prepare Inputs +When the dataset include images with various size, we need to resize them into a shared size. The labels are put into one-hot. The dataset is batched. +```python +import tensorflow as tf +img_size = 224 +num_classes = ds_info.features['label'].num_classes +batch_size = 64 +def preprocessing(img, label): + return tf.image.resize(img, (img_size, img_size)), tf.one_hot(label, num_classes) +AUTOTUNE = tf.data.AUTOTUNE +ds_train = ds_train.cache().repeat().shuffle(1000).map(preprocessing).batch(batch_size, drop_remainder=True).prefetch(AUTOTUNE) +ds_test = ds_test.map(preprocessing).batch(batch_size, drop_remainder=True).prefetch(AUTOTUNE) +``` + +### **Step 3: Build Model** +BigDL-Nano's `Model` (`bigdl.nano.tf.keras.Model`) and `Sequential` (`bigdl.nano.tf.keras.Sequential`) classes have identical APIs with `tf.keras.Model` and `tf.keras.Sequential`. +Here we initialize the model with pre-trained ImageNet weights, and we fine-tune it on the Stanford Dogs dataset. +```python +from tensorflow.keras import layers +from tensorflow.keras.applications import EfficientNetB0 +data_augmentation = Sequential([ + layers.RandomRotation(factor=0.15), + layers.RandomTranslation(height_factor=0.1, width_factor=0.1), + layers.RandomFlip(), + layers.RandomContrast(factor=0.1), + ]) +def make_model(learning_rate=1e-2): + inputs = layers.Input(shape = (img_size, img_size, 3)) + + x = data_augmentation(inputs) + backbone = EfficientNetB0(include_top=False, input_tensor=x) + + backbone.trainable = False + + x = layers.GlobalAveragePooling2D(name='avg_pool')(backbone.output) + x = layers.BatchNormalization()(x) + + top_dropout_rate = 0.2 + x = layers.Dropout(top_dropout_rate, name="top_dropout")(x) + outputs = layers.Dense(num_classes, activation="softmax", name="pred")(x) + + model = Model(inputs, outputs, name='EfficientNet') + optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate) + model.compile( + loss="categorical_crossentropy", optimizer=optimizer, metrics=['accuracy'] + ) + return model + +def unfreeze_model(model): + for layer in model.layers[-20:]: + if not isinstance(layer, layers.BatchNormalization): + layer.trainable = True + + optimizer = tf.keras.optimizers.Adam(learning_rate=1e-4) + model.compile( + loss="categorical_crossentropy", optimizer=optimizer, metrics=['accuracy'] + ) +``` + +### **Step 4: Training** +```python +steps_per_epoch = ds_info.splits['train'].num_examples // batch_size +model_default = make_model() + +model_default.fit(ds_train, + epochs=15, + validation_data=ds_test, + steps_per_epoch=steps_per_epoch) +unfreeze_model(model_default) +his_default = model_default.fit(ds_train, + epochs=10, + validation_data=ds_test, + steps_per_epoch=steps_per_epoch) +``` +#### Multi-Instance Training +BigDL-Nano makes it very easy to conduct multi-instance training correctly. You can just set the `num_processes` parameter in the `fit` method in your `Model` or `Sequential` object and BigDL-Nano will launch the specific number of processes to perform data-parallel training. +```python +model_multi = make_model() + +model_multi.fit(ds_train, + epochs=15, + validation_data=ds_test, + steps_per_epoch=steps_per_epoch, + num_processes=4, + backend='multiprocessing') +unfreeze_model(model_multi) +his_multi = model_multi.fit(ds_train, + epochs=10, + validation_data=ds_test, + steps_per_epoch=steps_per_epoch, + num_processes=4, + backend='multiprocessing') +``` + +You can find the detailed result of training from [here](https://github.com/intel-analytics/BigDL/blob/main/python/nano/notebooks/tensorflow/tutorial/tensorflow_fit.ipynb) \ No newline at end of file