Update orca docs (#3278)
* update orca docs * update colab * update * meet comments * update * update
This commit is contained in:
		
							parent
							
								
									bc8d5733e8
								
							
						
					
					
						commit
						0334eda6fe
					
				
					 10 changed files with 91 additions and 56 deletions
				
			
		| 
						 | 
					@ -77,7 +77,7 @@ Most AI projects start with a Python notebook running on a single laptop; howeve
 | 
				
			||||||
First, initialize [Orca Context](https://bigdl.readthedocs.io/en/latest/doc/Orca/Overview/orca-context.html):
 | 
					First, initialize [Orca Context](https://bigdl.readthedocs.io/en/latest/doc/Orca/Overview/orca-context.html):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
```python
 | 
					```python
 | 
				
			||||||
from bigdl.orca import init_orca_context
 | 
					from bigdl.orca import init_orca_context, OrcaContext
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# cluster_mode can be "local", "k8s" or "yarn"
 | 
					# cluster_mode can be "local", "k8s" or "yarn"
 | 
				
			||||||
sc = init_orca_context(cluster_mode="yarn", cores=4, memory="10g", num_nodes=2) 
 | 
					sc = init_orca_context(cluster_mode="yarn", cores=4, memory="10g", num_nodes=2) 
 | 
				
			||||||
| 
						 | 
					@ -88,6 +88,7 @@ Next, perform [data-parallel processing in Orca](https://bigdl.readthedocs.io/en
 | 
				
			||||||
```python
 | 
					```python
 | 
				
			||||||
from pyspark.sql.functions import array
 | 
					from pyspark.sql.functions import array
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					spark = OrcaContext.get_spark_session()
 | 
				
			||||||
df = spark.read.parquet(file_path)
 | 
					df = spark.read.parquet(file_path)
 | 
				
			||||||
df = df.withColumn('user', array('user')) \  
 | 
					df = df.withColumn('user', array('user')) \  
 | 
				
			||||||
       .withColumn('item', array('item'))
 | 
					       .withColumn('item', array('item'))
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -12,7 +12,7 @@ TensorFlow Dataset:
 | 
				
			||||||
```python
 | 
					```python
 | 
				
			||||||
import tensorflow as tf
 | 
					import tensorflow as tf
 | 
				
			||||||
import tensorflow_datasets as tfds
 | 
					import tensorflow_datasets as tfds
 | 
				
			||||||
from zoo.orca.learn.tf.estimator import Estimator
 | 
					from bigdl.orca.learn.tf.estimator import Estimator
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def preprocess(data):
 | 
					def preprocess(data):
 | 
				
			||||||
    data['image'] = tf.cast(data["image"], tf.float32) / 255.
 | 
					    data['image'] = tf.cast(data["image"], tf.float32) / 255.
 | 
				
			||||||
| 
						 | 
					@ -30,7 +30,7 @@ Pytorch DataLoader:
 | 
				
			||||||
```python
 | 
					```python
 | 
				
			||||||
import torch
 | 
					import torch
 | 
				
			||||||
from torchvision import datasets, transforms
 | 
					from torchvision import datasets, transforms
 | 
				
			||||||
from zoo.orca.learn.pytorch import Estimator
 | 
					from bigdl.orca.learn.pytorch import Estimator
 | 
				
			||||||
 | 
					
 | 
				
			||||||
train_loader = torch.utils.data.DataLoader(
 | 
					train_loader = torch.utils.data.DataLoader(
 | 
				
			||||||
        datasets.MNIST("/tmp/mnist", train=True, download=True,
 | 
					        datasets.MNIST("/tmp/mnist", train=True, download=True,
 | 
				
			||||||
| 
						 | 
					@ -41,7 +41,7 @@ train_loader = torch.utils.data.DataLoader(
 | 
				
			||||||
        batch_size=batch_size, shuffle=True)
 | 
					        batch_size=batch_size, shuffle=True)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
est = Estimator.from_torch(model=torch_model, optimizer=torch_optim, loss=torch_criterion)
 | 
					est = Estimator.from_torch(model=torch_model, optimizer=torch_optim, loss=torch_criterion)
 | 
				
			||||||
zoo_estimator.fit(data=train_loader)
 | 
					est.fit(data=train_loader)
 | 
				
			||||||
```
 | 
					```
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Under the hood, Orca will automatically replicate the _TensorFlow Dataset_ or _PyTorch DataLoader_ pipeline on each node in the cluster, shard the input data, and execute the data pipelines using Apache Spark and/or Ray distributedly. 
 | 
					Under the hood, Orca will automatically replicate the _TensorFlow Dataset_ or _PyTorch DataLoader_ pipeline on each node in the cluster, shard the input data, and execute the data pipelines using Apache Spark and/or Ray distributedly. 
 | 
				
			||||||
| 
						 | 
					@ -101,7 +101,7 @@ est.fit(data=df,
 | 
				
			||||||
 | 
					
 | 
				
			||||||
```python
 | 
					```python
 | 
				
			||||||
import numpy as np
 | 
					import numpy as np
 | 
				
			||||||
from zoo.orca.data import XShards
 | 
					from bigdl.orca.data import XShards
 | 
				
			||||||
 | 
					
 | 
				
			||||||
train_images = np.random.random((20, 3, 224, 224))
 | 
					train_images = np.random.random((20, 3, 224, 224))
 | 
				
			||||||
train_label_images = np.zeros(20)
 | 
					train_label_images = np.zeros(20)
 | 
				
			||||||
| 
						 | 
					@ -122,7 +122,7 @@ The user may use `XShards` to efficiently process large-size Pandas Dataframes i
 | 
				
			||||||
 | 
					
 | 
				
			||||||
First, the user can read CVS, JSON or Parquet files (stored on local disk, HDFS, AWS S3, etc.) to obtain an `XShards` of Pandas Dataframe, as shown below:
 | 
					First, the user can read CVS, JSON or Parquet files (stored on local disk, HDFS, AWS S3, etc.) to obtain an `XShards` of Pandas Dataframe, as shown below:
 | 
				
			||||||
```python
 | 
					```python
 | 
				
			||||||
from zoo.orca.data.pandas import read_csv
 | 
					from bigdl.orca.data.pandas import read_csv
 | 
				
			||||||
csv_path = "/path/to/csv_file_or_folder"
 | 
					csv_path = "/path/to/csv_file_or_folder"
 | 
				
			||||||
shard = read_csv(csv_path)
 | 
					shard = read_csv(csv_path)
 | 
				
			||||||
```
 | 
					```
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -163,7 +163,7 @@ View the related [Python API doc]() for more details.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
The user may create a MXNet `Estimator` as follows:
 | 
					The user may create a MXNet `Estimator` as follows:
 | 
				
			||||||
```python
 | 
					```python
 | 
				
			||||||
from zoo.orca.learn.mxnet import Estimator, create_config
 | 
					from bigdl.orca.learn.mxnet import Estimator, create_config
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def get_model(config):
 | 
					def get_model(config):
 | 
				
			||||||
    net = LeNet() # a mxnet.gluon.Block
 | 
					    net = LeNet() # a mxnet.gluon.Block
 | 
				
			||||||
| 
						 | 
					@ -200,10 +200,10 @@ View the related [Python API doc]() for more details.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
The user may create a BigDL `Estimator` as follows:
 | 
					The user may create a BigDL `Estimator` as follows:
 | 
				
			||||||
```python
 | 
					```python
 | 
				
			||||||
from bigdl.nn.criterion import *
 | 
					from bigdl.dllib.nn.criterion import *
 | 
				
			||||||
from bigdl.nn.layer import *
 | 
					from bigdl.dllib.nn.layer import *
 | 
				
			||||||
from bigdl.optim.optimizer import *
 | 
					from bigdl.dllib.optim.optimizer import *
 | 
				
			||||||
from zoo.orca.learn.bigdl import Estimator
 | 
					from bigdl.orca.learn.bigdl import Estimator
 | 
				
			||||||
 | 
					
 | 
				
			||||||
linear_model = Sequential().add(Linear(2, 2))
 | 
					linear_model = Sequential().add(Linear(2, 2))
 | 
				
			||||||
mse_criterion = MSECriterion()
 | 
					mse_criterion = MSECriterion()
 | 
				
			||||||
| 
						 | 
					@ -230,7 +230,7 @@ View the related [Python API doc]() for more details.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
The user may create a OpenVINO `Estimator` as follows:
 | 
					The user may create a OpenVINO `Estimator` as follows:
 | 
				
			||||||
```python
 | 
					```python
 | 
				
			||||||
from zoo.orca.learn.openvino import Estimator
 | 
					from bigdl.orca.learn.openvino import Estimator
 | 
				
			||||||
 | 
					
 | 
				
			||||||
model_path = "The/file_path/to/the/OpenVINO_IR_xml_file"
 | 
					model_path = "The/file_path/to/the/OpenVINO_IR_xml_file"
 | 
				
			||||||
est = Estimator.from_openvino(model_path=model_path)
 | 
					est = Estimator.from_openvino(model_path=model_path)
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -10,7 +10,7 @@
 | 
				
			||||||
An Orca program usually starts with the initialization of `OrcaContext` as follows:
 | 
					An Orca program usually starts with the initialization of `OrcaContext` as follows:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
```python
 | 
					```python
 | 
				
			||||||
from zoo.orca import init_orca_context
 | 
					from bigdl.orca import init_orca_context
 | 
				
			||||||
 | 
					
 | 
				
			||||||
init_orca_context(...)
 | 
					init_orca_context(...)
 | 
				
			||||||
```
 | 
					```
 | 
				
			||||||
| 
						 | 
					@ -47,7 +47,7 @@ Under the hood, `OrcaContext` will automatically provision Apache Spark and/or R
 | 
				
			||||||
Users can easily retrieve `SparkContext` and `RayContext`, the main entry point for Spark and Ray respectively, via `OrcaContext`:
 | 
					Users can easily retrieve `SparkContext` and `RayContext`, the main entry point for Spark and Ray respectively, via `OrcaContext`:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
```python
 | 
					```python
 | 
				
			||||||
from zoo.orca import OrcaContext
 | 
					from bigdl.orca import OrcaContext
 | 
				
			||||||
 | 
					
 | 
				
			||||||
sc = OrcaContext.get_spark_context()
 | 
					sc = OrcaContext.get_spark_context()
 | 
				
			||||||
ray_ctx = OrcaContext.get_ray_context()
 | 
					ray_ctx = OrcaContext.get_ray_context()
 | 
				
			||||||
| 
						 | 
					@ -75,7 +75,7 @@ Users can make extra configurations when using the functionalities of Project Or
 | 
				
			||||||
After the Orca program finishes, the user can call `stop_orca_context` to release resources and shut down the underlying Spark and/or Ray execution engine.
 | 
					After the Orca program finishes, the user can call `stop_orca_context` to release resources and shut down the underlying Spark and/or Ray execution engine.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
```python
 | 
					```python
 | 
				
			||||||
from zoo.orca import stop_orca_context
 | 
					from bigdl.orca import stop_orca_context
 | 
				
			||||||
 | 
					
 | 
				
			||||||
stop_orca_context()
 | 
					stop_orca_context()
 | 
				
			||||||
```
 | 
					```
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1,14 +1,43 @@
 | 
				
			||||||
# The Orca Library
 | 
					# The Orca Library
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					## 1. Overview
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Most AI projects start with a Python notebook running on a single laptop; however, one usually needs to go through a mountain of pains to scale it to handle larger data set in a distributed fashion. The  _**Orca**_ library seamlessly scales out your single node Python notebook across large clusters (so as to process distributed Big Data).
 | 
					Most AI projects start with a Python notebook running on a single laptop; however, one usually needs to go through a mountain of pains to scale it to handle larger data set in a distributed fashion. The  _**Orca**_ library seamlessly scales out your single node Python notebook across large clusters (so as to process distributed Big Data).
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					## 2. Install
 | 
				
			||||||
 | 
					We recommend using [conda](https://docs.conda.io/projects/conda/en/latest/user-guide/install/) to prepare the Python environment.
 | 
				
			||||||
 | 
					```bash
 | 
				
			||||||
 | 
					conda create -n py37 python=3.7  # "py37" is conda environment name, you can use any name you like.
 | 
				
			||||||
 | 
					conda activate py37
 | 
				
			||||||
 | 
					pip install bigdl-orca
 | 
				
			||||||
 | 
					```
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					When installing bigdl-orca with pip, you can specify the extras key `[ray]` to additionally install the additional dependencies
 | 
				
			||||||
 | 
					essential for running [RayOnSpark](../../Ray/Overview/ray.md)
 | 
				
			||||||
 | 
					```bash
 | 
				
			||||||
 | 
					pip install bigdl-orca[ray]
 | 
				
			||||||
 | 
					```
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					You can install bigdl-orca nightly release version using
 | 
				
			||||||
 | 
					```bash
 | 
				
			||||||
 | 
					pip install --pre --upgrade bigdl-orca
 | 
				
			||||||
 | 
					pip install --pre --upgrade bigdl-orca[ray]
 | 
				
			||||||
 | 
					```
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					## 3. Run
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					This section uses TensorFlow 1.15, and you should install TensorFlow before running this example:
 | 
				
			||||||
 | 
					```bash
 | 
				
			||||||
 | 
					pip install tensorflow==1.15
 | 
				
			||||||
 | 
					```
 | 
				
			||||||
 | 
					
 | 
				
			||||||
First, initialize [Orca Context](orca-context.md):
 | 
					First, initialize [Orca Context](orca-context.md):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
```python
 | 
					```python
 | 
				
			||||||
from zoo.orca import init_orca_context
 | 
					from bigdl.orca import init_orca_context, OrcaContext
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# cluster_mode can be "local", "k8s" or "yarn"
 | 
					# cluster_mode can be "local", "k8s" or "yarn"
 | 
				
			||||||
sc = init_orca_context(cluster_mode="yarn", cores=4, memory="10g", num_nodes=2) 
 | 
					sc = init_orca_context(cluster_mode="local", cores=4, memory="10g", num_nodes=1) 
 | 
				
			||||||
```
 | 
					```
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Next, perform [data-parallel processing in Orca](data-parallel-processing.md) (supporting standard Spark Dataframes, TensorFlow Dataset, PyTorch DataLoader, Pandas, etc.):
 | 
					Next, perform [data-parallel processing in Orca](data-parallel-processing.md) (supporting standard Spark Dataframes, TensorFlow Dataset, PyTorch DataLoader, Pandas, etc.):
 | 
				
			||||||
| 
						 | 
					@ -16,6 +45,7 @@ Next, perform [data-parallel processing in Orca](data-parallel-processing.md) (s
 | 
				
			||||||
```python
 | 
					```python
 | 
				
			||||||
from pyspark.sql.functions import array
 | 
					from pyspark.sql.functions import array
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					spark = OrcaContext.get_spark_session()
 | 
				
			||||||
df = spark.read.parquet(file_path)
 | 
					df = spark.read.parquet(file_path)
 | 
				
			||||||
df = df.withColumn('user', array('user')) \  
 | 
					df = df.withColumn('user', array('user')) \  
 | 
				
			||||||
       .withColumn('item', array('item'))
 | 
					       .withColumn('item', array('item'))
 | 
				
			||||||
| 
						 | 
					@ -25,7 +55,7 @@ Finally, use [sklearn-style Estimator APIs in Orca](distributed-training-inferen
 | 
				
			||||||
 | 
					
 | 
				
			||||||
```python
 | 
					```python
 | 
				
			||||||
from tensorflow import keras
 | 
					from tensorflow import keras
 | 
				
			||||||
from zoo.orca.learn.tf.estimator import Estimator
 | 
					from bigdl.orca.learn.tf.estimator import Estimator
 | 
				
			||||||
 | 
					
 | 
				
			||||||
user = keras.layers.Input(shape=[1])  
 | 
					user = keras.layers.Input(shape=[1])  
 | 
				
			||||||
item = keras.layers.Input(shape=[1])  
 | 
					item = keras.layers.Input(shape=[1])  
 | 
				
			||||||
| 
						 | 
					@ -44,5 +74,7 @@ est.fit(data=df,
 | 
				
			||||||
        label_cols=['label'])
 | 
					        label_cols=['label'])
 | 
				
			||||||
```
 | 
					```
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					## Get Started
 | 
				
			||||||
 | 
					
 | 
				
			||||||
See [TensorFlow](../QuickStart/orca-tf-quickstart.md) and [PyTorch](../QuickStart/orca-pytorch-quickstart.md) quickstart for more details.
 | 
					See [TensorFlow](../QuickStart/orca-tf-quickstart.md) and [PyTorch](../QuickStart/orca-pytorch-quickstart.md) quickstart for more details.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -2,7 +2,7 @@
 | 
				
			||||||
 | 
					
 | 
				
			||||||
---
 | 
					---
 | 
				
			||||||
 | 
					
 | 
				
			||||||
[Run in Google Colab](https://colab.research.google.com/github/intel-analytics/analytics-zoo/blob/master/docs/docs/colab-notebook/orca/quickstart/keras_lenet_mnist.ipynb)  [View source on GitHub](https://github.com/intel-analytics/analytics-zoo/blob/master/docs/docs/colab-notebook/orca/quickstart/keras_lenet_mnist.ipynb)
 | 
					[Run in Google Colab](https://colab.research.google.com/github/intel-analytics/BigDL/blob/branch-2.0/docs/docs/colab-notebook/orca/quickstart/keras_lenet_mnist.ipynb)  [View source on GitHub](https://github.com/intel-analytics/BigDL/blob/branch-2.0/docs/docs/colab-notebook/orca/quickstart/keras_lenet_mnist.ipynb)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
---
 | 
					---
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -14,9 +14,9 @@
 | 
				
			||||||
We recommend using [conda](https://docs.conda.io/projects/conda/en/latest/user-guide/install/) to prepare the environment. Please refer to the [install guide](../../UserGuide/python.md) for more details.
 | 
					We recommend using [conda](https://docs.conda.io/projects/conda/en/latest/user-guide/install/) to prepare the environment. Please refer to the [install guide](../../UserGuide/python.md) for more details.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
```bash
 | 
					```bash
 | 
				
			||||||
conda create -n zoo python=3.7 # "zoo" is conda environment name, you can use any name you like.
 | 
					conda create -n py37 python=3.7  # "py37" is conda environment name, you can use any name you like.
 | 
				
			||||||
conda activate zoo
 | 
					conda activate py37
 | 
				
			||||||
pip install analytics_zoo-${VERSION} # install either version 0.9 or latest nightly build
 | 
					pip install bigdl-orca
 | 
				
			||||||
pip install tensorflow==1.15.0
 | 
					pip install tensorflow==1.15.0
 | 
				
			||||||
pip install tensorflow-datasets==2.1.0
 | 
					pip install tensorflow-datasets==2.1.0
 | 
				
			||||||
pip install psutil
 | 
					pip install psutil
 | 
				
			||||||
| 
						 | 
					@ -26,7 +26,7 @@ pip install scikit-learn
 | 
				
			||||||
 | 
					
 | 
				
			||||||
### **Step 1: Init Orca Context**
 | 
					### **Step 1: Init Orca Context**
 | 
				
			||||||
```python
 | 
					```python
 | 
				
			||||||
from zoo.orca import init_orca_context, stop_orca_context
 | 
					from bigdl.orca import init_orca_context, stop_orca_context
 | 
				
			||||||
 | 
					
 | 
				
			||||||
if cluster_mode == "local":  # For local machine
 | 
					if cluster_mode == "local":  # For local machine
 | 
				
			||||||
    init_orca_context(cluster_mode="local", cores=4, memory="10g")
 | 
					    init_orca_context(cluster_mode="local", cores=4, memory="10g")
 | 
				
			||||||
| 
						 | 
					@ -60,7 +60,7 @@ model = keras.Sequential(
 | 
				
			||||||
     ]
 | 
					     ]
 | 
				
			||||||
)
 | 
					)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
model.compile(optimizer=tf.keras.optimizers.RMSprop(),
 | 
					model.compile(optimizer=keras.optimizers.RMSprop(),
 | 
				
			||||||
              loss='sparse_categorical_crossentropy',
 | 
					              loss='sparse_categorical_crossentropy',
 | 
				
			||||||
              metrics=['accuracy'])
 | 
					              metrics=['accuracy'])
 | 
				
			||||||
```
 | 
					```
 | 
				
			||||||
| 
						 | 
					@ -77,6 +77,7 @@ def preprocess(data):
 | 
				
			||||||
    return data['image'], data['label']
 | 
					    return data['image'], data['label']
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# get DataSet
 | 
					# get DataSet
 | 
				
			||||||
 | 
					dataset_dir = "./mnist_data"
 | 
				
			||||||
mnist_train = tfds.load(name="mnist", split="train", data_dir=dataset_dir)
 | 
					mnist_train = tfds.load(name="mnist", split="train", data_dir=dataset_dir)
 | 
				
			||||||
mnist_test = tfds.load(name="mnist", split="test", data_dir=dataset_dir)
 | 
					mnist_test = tfds.load(name="mnist", split="test", data_dir=dataset_dir)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -89,7 +90,7 @@ mnist_test = mnist_test.map(preprocess)
 | 
				
			||||||
First, create an Estimator.
 | 
					First, create an Estimator.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
```python
 | 
					```python
 | 
				
			||||||
from zoo.orca.learn.tf.estimator import Estimator
 | 
					from bigdl.orca.learn.tf.estimator import Estimator
 | 
				
			||||||
 | 
					
 | 
				
			||||||
est = Estimator.from_keras(keras_model=model)
 | 
					est = Estimator.from_keras(keras_model=model)
 | 
				
			||||||
```
 | 
					```
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -2,7 +2,7 @@
 | 
				
			||||||
 | 
					
 | 
				
			||||||
---
 | 
					---
 | 
				
			||||||
 | 
					
 | 
				
			||||||
[Run in Google Colab](https://colab.research.google.com/github/intel-analytics/analytics-zoo/blob/master/docs/docs/colab-notebook/orca/quickstart/pytorch_distributed_lenet_mnist.ipynb)  [View source on GitHub](https://github.com/intel-analytics/analytics-zoo/blob/master/docs/docs/colab-notebook/orca/quickstart/pytorch_distributed_lenet_mnist.ipynb)
 | 
					[Run in Google Colab](https://colab.research.google.com/github/intel-analytics/BigDL/blob/branch-2.0/docs/docs/colab-notebook/orca/quickstart/pytorch_distributed_lenet_mnist.ipynb)  [View source on GitHub](https://github.com/intel-analytics/BigDL/blob/branch-2.0/docs/docs/colab-notebook/orca/quickstart/pytorch_distributed_lenet_mnist.ipynb)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
---
 | 
					---
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -13,15 +13,15 @@
 | 
				
			||||||
[Conda](https://docs.conda.io/projects/conda/en/latest/user-guide/install/) is needed to prepare the Python environment for running this example. Please refer to the [install guide](../../UserGuide/python.md) for more details.
 | 
					[Conda](https://docs.conda.io/projects/conda/en/latest/user-guide/install/) is needed to prepare the Python environment for running this example. Please refer to the [install guide](../../UserGuide/python.md) for more details.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
```bash
 | 
					```bash
 | 
				
			||||||
conda create -n zoo python=3.7 # zoo is conda environment name, you can use any name you like.
 | 
					conda create -n py37 python=3.7  # "py37" is conda environment name, you can use any name you like.
 | 
				
			||||||
conda activate zoo
 | 
					conda activate py37
 | 
				
			||||||
pip install analytics-zoo[ray]
 | 
					pip install bigdl-orca[ray]
 | 
				
			||||||
pip install torch==1.7.1 torchvision==0.8.2
 | 
					pip install torch==1.7.1 torchvision==0.8.2
 | 
				
			||||||
```
 | 
					```
 | 
				
			||||||
 | 
					
 | 
				
			||||||
### **Step 1: Init Orca Context**
 | 
					### **Step 1: Init Orca Context**
 | 
				
			||||||
```python
 | 
					```python
 | 
				
			||||||
from zoo.orca import init_orca_context, stop_orca_context
 | 
					from bigdl.orca import init_orca_context, stop_orca_context
 | 
				
			||||||
 | 
					
 | 
				
			||||||
if cluster_mode == "local":  # For local machine
 | 
					if cluster_mode == "local":  # For local machine
 | 
				
			||||||
    init_orca_context(cores=4, memory="10g")
 | 
					    init_orca_context(cores=4, memory="10g")
 | 
				
			||||||
| 
						 | 
					@ -114,8 +114,8 @@ def test_loader_creator(config, batch_size):
 | 
				
			||||||
First, Create an Estimator
 | 
					First, Create an Estimator
 | 
				
			||||||
 | 
					
 | 
				
			||||||
```python
 | 
					```python
 | 
				
			||||||
from zoo.orca.learn.pytorch import Estimator 
 | 
					from bigdl.orca.learn.pytorch import Estimator 
 | 
				
			||||||
from zoo.orca.learn.metrics import Accuracy
 | 
					from bigdl.orca.learn.metrics import Accuracy
 | 
				
			||||||
 | 
					
 | 
				
			||||||
est = Estimator.from_torch(model=model_creator, optimizer=optim_creator, loss=criterion, metrics=[Accuracy()],
 | 
					est = Estimator.from_torch(model=model_creator, optimizer=optim_creator, loss=criterion, metrics=[Accuracy()],
 | 
				
			||||||
                           backend="torch_distributed")
 | 
					                           backend="torch_distributed")
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -2,7 +2,7 @@
 | 
				
			||||||
 | 
					
 | 
				
			||||||
---
 | 
					---
 | 
				
			||||||
 | 
					
 | 
				
			||||||
[Run in Google Colab](https://colab.research.google.com/github/intel-analytics/analytics-zoo/blob/master/docs/docs/colab-notebook/orca/quickstart/pytorch_lenet_mnist.ipynb)  [View source on GitHub](https://github.com/intel-analytics/analytics-zoo/blob/master/docs/docs/colab-notebook/orca/quickstart/pytorch_lenet_mnist.ipynb)
 | 
					[Run in Google Colab](https://colab.research.google.com/github/intel-analytics/BigDL/blob/branch-2.0/docs/docs/colab-notebook/orca/quickstart/pytorch_lenet_mnist.ipynb)  [View source on GitHub](https://github.com/intel-analytics/BigDL/blob/branch-2.0/docs/docs/colab-notebook/orca/quickstart/pytorch_lenet_mnist.ipynb)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
---
 | 
					---
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -14,9 +14,9 @@
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
```bash
 | 
					```bash
 | 
				
			||||||
conda create -n zoo python=3.7 # zoo is conda environment name, you can use any name you like.
 | 
					conda create -n py37 python=3.7  # "py37" is conda environment name, you can use any name you like.
 | 
				
			||||||
conda activate zoo
 | 
					conda activate py37
 | 
				
			||||||
pip install analytics-zoo # install either version 0.9 or latest nightly build
 | 
					pip install bigdl-orca
 | 
				
			||||||
pip install torch==1.7.1 torchvision==0.8.2
 | 
					pip install torch==1.7.1 torchvision==0.8.2
 | 
				
			||||||
pip install six cloudpickle
 | 
					pip install six cloudpickle
 | 
				
			||||||
pip install jep==3.9.0
 | 
					pip install jep==3.9.0
 | 
				
			||||||
| 
						 | 
					@ -24,7 +24,7 @@ pip install jep==3.9.0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
### **Step 1: Init Orca Context**
 | 
					### **Step 1: Init Orca Context**
 | 
				
			||||||
```python
 | 
					```python
 | 
				
			||||||
from zoo.orca import init_orca_context, stop_orca_context
 | 
					from bigdl.orca import init_orca_context, stop_orca_context
 | 
				
			||||||
 | 
					
 | 
				
			||||||
if cluster_mode == "local":  # For local machine
 | 
					if cluster_mode == "local":  # For local machine
 | 
				
			||||||
    init_orca_context(cores=4, memory="10g")
 | 
					    init_orca_context(cores=4, memory="10g")
 | 
				
			||||||
| 
						 | 
					@ -105,15 +105,15 @@ test_loader = torch.utils.data.DataLoader(
 | 
				
			||||||
    batch_size=test_batch_size, shuffle=False)
 | 
					    batch_size=test_batch_size, shuffle=False)
 | 
				
			||||||
```
 | 
					```
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Alternatively, we can also use a [Data Creator Function](https://github.com/intel-analytics/analytics-zoo/blob/master/docs/docs/colab-notebook/orca/quickstart/pytorch_lenet_mnist_data_creator_func.ipynb) or [Orca XShards](../Overview/data-parallel-processing) as the input data, especially when the data size is very large)
 | 
					Alternatively, we can also use a [Data Creator Function](https://github.com/intel-analytics/BigDL/blob/branch-2.0/docs/docs/colab-notebook/orca/quickstart/pytorch_lenet_mnist_data_creator_func.ipynb) or [Orca XShards](../Overview/data-parallel-processing) as the input data, especially when the data size is very large)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
### **Step 4: Fit with Orca Estimator**
 | 
					### **Step 4: Fit with Orca Estimator**
 | 
				
			||||||
 | 
					
 | 
				
			||||||
First, Create an Estimator
 | 
					First, Create an Estimator
 | 
				
			||||||
 | 
					
 | 
				
			||||||
```python
 | 
					```python
 | 
				
			||||||
from zoo.orca.learn.pytorch import Estimator 
 | 
					from bigdl.orca.learn.pytorch import Estimator 
 | 
				
			||||||
from zoo.orca.learn.metrics import Accuracy
 | 
					from bigdl.orca.learn.metrics import Accuracy
 | 
				
			||||||
 | 
					
 | 
				
			||||||
est = Estimator.from_torch(model=model, optimizer=adam, loss=criterion, metrics=[Accuracy()])
 | 
					est = Estimator.from_torch(model=model, optimizer=adam, loss=criterion, metrics=[Accuracy()])
 | 
				
			||||||
```
 | 
					```
 | 
				
			||||||
| 
						 | 
					@ -121,7 +121,7 @@ est = Estimator.from_torch(model=model, optimizer=adam, loss=criterion, metrics=
 | 
				
			||||||
Next, fit and evaluate using the Estimator
 | 
					Next, fit and evaluate using the Estimator
 | 
				
			||||||
 | 
					
 | 
				
			||||||
```python
 | 
					```python
 | 
				
			||||||
from zoo.orca.learn.trigger import EveryEpoch 
 | 
					from bigdl.orca.learn.trigger import EveryEpoch 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
est.fit(data=train_loader, epochs=10, validation_data=test_loader,
 | 
					est.fit(data=train_loader, epochs=10, validation_data=test_loader,
 | 
				
			||||||
        checkpoint_trigger=EveryEpoch())
 | 
					        checkpoint_trigger=EveryEpoch())
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -2,7 +2,7 @@
 | 
				
			||||||
 | 
					
 | 
				
			||||||
---
 | 
					---
 | 
				
			||||||
 | 
					
 | 
				
			||||||
[Run in Google Colab](https://colab.research.google.com/github/intel-analytics/analytics-zoo/blob/master/docs/docs/colab-notebook/orca/quickstart/tf_lenet_mnist.ipynb)  [View source on GitHub](https://github.com/intel-analytics/analytics-zoo/blob/master/docs/docs/colab-notebook/orca/quickstart/tf_lenet_mnist.ipynb)
 | 
					[Run in Google Colab](https://colab.research.google.com/github/intel-analytics/BigDL/blob/branch-2.0/docs/docs/colab-notebook/orca/quickstart/tf_lenet_mnist.ipynb)  [View source on GitHub](https://github.com/intel-analytics/BigDL/blob/branch-2.0/docs/docs/colab-notebook/orca/quickstart/tf_lenet_mnist.ipynb)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
---
 | 
					---
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -14,17 +14,17 @@ We recommend using [conda](https://docs.conda.io/projects/conda/en/latest/user-g
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
```bash
 | 
					```bash
 | 
				
			||||||
conda create -n zoo python=3.7 # "zoo" is conda environment name, you can use any name you like.
 | 
					conda create -n py37 python=3.7  # "py37" is conda environment name, you can use any name you like.
 | 
				
			||||||
conda activate zoo
 | 
					conda activate py37
 | 
				
			||||||
pip install analytics-zoo # install either version 0.9 or latest nightly build
 | 
					pip install bigdl-orca
 | 
				
			||||||
pip install tensorflow==1.15.0
 | 
					pip install tensorflow==1.15
 | 
				
			||||||
pip install tensorflow-datasets==2.0
 | 
					pip install tensorflow-datasets==2.0
 | 
				
			||||||
pip install psutil
 | 
					pip install psutil
 | 
				
			||||||
```
 | 
					```
 | 
				
			||||||
 | 
					
 | 
				
			||||||
### **Step 1: Init Orca Context**
 | 
					### **Step 1: Init Orca Context**
 | 
				
			||||||
```python
 | 
					```python
 | 
				
			||||||
from zoo.orca import init_orca_context, stop_orca_context
 | 
					from bigdl.orca import init_orca_context, stop_orca_context
 | 
				
			||||||
 | 
					
 | 
				
			||||||
if cluster_mode == "local":  # For local machine
 | 
					if cluster_mode == "local":  # For local machine
 | 
				
			||||||
    init_orca_context(cluster_mode="local", cores=4, memory="10g")
 | 
					    init_orca_context(cluster_mode="local", cores=4, memory="10g")
 | 
				
			||||||
| 
						 | 
					@ -83,6 +83,7 @@ def preprocess(data):
 | 
				
			||||||
    return data['image'], data['label']
 | 
					    return data['image'], data['label']
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# get DataSet
 | 
					# get DataSet
 | 
				
			||||||
 | 
					dataset_dir = "./mnist_data"
 | 
				
			||||||
mnist_train = tfds.load(name="mnist", split="train", data_dir=dataset_dir)
 | 
					mnist_train = tfds.load(name="mnist", split="train", data_dir=dataset_dir)
 | 
				
			||||||
mnist_test = tfds.load(name="mnist", split="test", data_dir=dataset_dir)
 | 
					mnist_test = tfds.load(name="mnist", split="test", data_dir=dataset_dir)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -95,7 +96,7 @@ mnist_test = mnist_test.map(preprocess)
 | 
				
			||||||
First, create an Estimator.
 | 
					First, create an Estimator.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
```python
 | 
					```python
 | 
				
			||||||
from zoo.orca.learn.tf.estimator import Estimator
 | 
					from bigdl.orca.learn.tf.estimator import Estimator
 | 
				
			||||||
 | 
					
 | 
				
			||||||
est = Estimator.from_graph(inputs=images,
 | 
					est = Estimator.from_graph(inputs=images,
 | 
				
			||||||
                           outputs=logits,
 | 
					                           outputs=logits,
 | 
				
			||||||
| 
						 | 
					@ -107,7 +108,7 @@ est = Estimator.from_graph(inputs=images,
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Next, fit and evaluate using the Estimator.
 | 
					Next, fit and evaluate using the Estimator.
 | 
				
			||||||
```python
 | 
					```python
 | 
				
			||||||
est.fit(data=train_dataset,
 | 
					est.fit(data=mnist_train,
 | 
				
			||||||
        batch_size=320,
 | 
					        batch_size=320,
 | 
				
			||||||
        epochs=5,
 | 
					        epochs=5,
 | 
				
			||||||
        validation_data=mnist_test)
 | 
					        validation_data=mnist_test)
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -2,7 +2,7 @@
 | 
				
			||||||
 | 
					
 | 
				
			||||||
---
 | 
					---
 | 
				
			||||||
 | 
					
 | 
				
			||||||
[Run in Google Colab](https://colab.research.google.com/github/intel-analytics/analytics-zoo/blob/master/docs/docs/colab-notebook/orca/quickstart/tf2_keras_lenet_mnist.ipynb)  [View source on GitHub](https://github.com/intel-analytics/analytics-zoo/blob/master/docs/docs/colab-notebook/orca/quickstart/tf2_keras_lenet_mnist.ipynb)
 | 
					[Run in Google Colab](https://colab.research.google.com/github/intel-analytics/BigDL/blob/branch-2.0/docs/docs/colab-notebook/orca/quickstart/tf2_keras_lenet_mnist.ipynb)  [View source on GitHub](https://github.com/intel-analytics/BigDL/blob/branch-2.0/docs/docs/colab-notebook/orca/quickstart/tf2_keras_lenet_mnist.ipynb)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
---
 | 
					---
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -13,15 +13,15 @@
 | 
				
			||||||
We recommend using [conda](https://docs.conda.io/projects/conda/en/latest/user-guide/install/) to prepare the environment. Please refer to the [install guide](../../UserGuide/python.md) for more details.
 | 
					We recommend using [conda](https://docs.conda.io/projects/conda/en/latest/user-guide/install/) to prepare the environment. Please refer to the [install guide](../../UserGuide/python.md) for more details.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
```bash
 | 
					```bash
 | 
				
			||||||
conda create -n zoo python=3.7 # "zoo" is conda environment name, you can use any name you like.
 | 
					conda create -n py37 python=3.7  # "py37" is conda environment name, you can use any name you like.
 | 
				
			||||||
conda activate zoo
 | 
					conda activate py37
 | 
				
			||||||
pip install analytics-zoo[ray] # install either version 0.9 or latest nightly build
 | 
					pip install bigdl-orca[ray]
 | 
				
			||||||
pip install tensorflow==2.3.0
 | 
					pip install tensorflow==2.3.0
 | 
				
			||||||
```
 | 
					```
 | 
				
			||||||
 | 
					
 | 
				
			||||||
### **Step 1: Init Orca Context**
 | 
					### **Step 1: Init Orca Context**
 | 
				
			||||||
```python
 | 
					```python
 | 
				
			||||||
from zoo.orca import init_orca_context, stop_orca_context
 | 
					from bigdl.orca import init_orca_context, stop_orca_context
 | 
				
			||||||
 | 
					
 | 
				
			||||||
if cluster_mode == "local":  # For local machine
 | 
					if cluster_mode == "local":  # For local machine
 | 
				
			||||||
    init_orca_context(cluster_mode="local", cores=4, memory="10g")
 | 
					    init_orca_context(cluster_mode="local", cores=4, memory="10g")
 | 
				
			||||||
| 
						 | 
					@ -96,7 +96,7 @@ def val_data_creator(config, batch_size):
 | 
				
			||||||
First, create an Estimator.
 | 
					First, create an Estimator.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
```python
 | 
					```python
 | 
				
			||||||
from zoo.orca.learn.tf2 import Estimator
 | 
					from bigdl.orca.learn.tf2 import Estimator
 | 
				
			||||||
 | 
					
 | 
				
			||||||
est = Estimator.from_keras(model_creator=model_creator, workers_per_node=2)
 | 
					est = Estimator.from_keras(model_creator=model_creator, workers_per_node=2)
 | 
				
			||||||
```
 | 
					```
 | 
				
			||||||
| 
						 | 
					@ -118,6 +118,6 @@ est.shutdown()
 | 
				
			||||||
print(stats)
 | 
					print(stats)
 | 
				
			||||||
```
 | 
					```
 | 
				
			||||||
 | 
					
 | 
				
			||||||
That's it, the same code can run seamlessly in your local laptop and the distribute K8s or Hadoop cluster.
 | 
					That's it, the same code can run seamlessly in your local laptop and to distribute K8s or Hadoop cluster.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
**Note:** You should call `stop_orca_context()` when your program finishes.
 | 
					**Note:** You should call `stop_orca_context()` when your program finishes.
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue