Update orca docs (#3278)
* update orca docs * update colab * update * meet comments * update * update
This commit is contained in:
parent
bc8d5733e8
commit
0334eda6fe
10 changed files with 91 additions and 56 deletions
|
|
@ -77,7 +77,7 @@ Most AI projects start with a Python notebook running on a single laptop; howeve
|
||||||
First, initialize [Orca Context](https://bigdl.readthedocs.io/en/latest/doc/Orca/Overview/orca-context.html):
|
First, initialize [Orca Context](https://bigdl.readthedocs.io/en/latest/doc/Orca/Overview/orca-context.html):
|
||||||
|
|
||||||
```python
|
```python
|
||||||
from bigdl.orca import init_orca_context
|
from bigdl.orca import init_orca_context, OrcaContext
|
||||||
|
|
||||||
# cluster_mode can be "local", "k8s" or "yarn"
|
# cluster_mode can be "local", "k8s" or "yarn"
|
||||||
sc = init_orca_context(cluster_mode="yarn", cores=4, memory="10g", num_nodes=2)
|
sc = init_orca_context(cluster_mode="yarn", cores=4, memory="10g", num_nodes=2)
|
||||||
|
|
@ -88,6 +88,7 @@ Next, perform [data-parallel processing in Orca](https://bigdl.readthedocs.io/en
|
||||||
```python
|
```python
|
||||||
from pyspark.sql.functions import array
|
from pyspark.sql.functions import array
|
||||||
|
|
||||||
|
spark = OrcaContext.get_spark_session()
|
||||||
df = spark.read.parquet(file_path)
|
df = spark.read.parquet(file_path)
|
||||||
df = df.withColumn('user', array('user')) \
|
df = df.withColumn('user', array('user')) \
|
||||||
.withColumn('item', array('item'))
|
.withColumn('item', array('item'))
|
||||||
|
|
|
||||||
|
|
@ -12,7 +12,7 @@ TensorFlow Dataset:
|
||||||
```python
|
```python
|
||||||
import tensorflow as tf
|
import tensorflow as tf
|
||||||
import tensorflow_datasets as tfds
|
import tensorflow_datasets as tfds
|
||||||
from zoo.orca.learn.tf.estimator import Estimator
|
from bigdl.orca.learn.tf.estimator import Estimator
|
||||||
|
|
||||||
def preprocess(data):
|
def preprocess(data):
|
||||||
data['image'] = tf.cast(data["image"], tf.float32) / 255.
|
data['image'] = tf.cast(data["image"], tf.float32) / 255.
|
||||||
|
|
@ -30,7 +30,7 @@ Pytorch DataLoader:
|
||||||
```python
|
```python
|
||||||
import torch
|
import torch
|
||||||
from torchvision import datasets, transforms
|
from torchvision import datasets, transforms
|
||||||
from zoo.orca.learn.pytorch import Estimator
|
from bigdl.orca.learn.pytorch import Estimator
|
||||||
|
|
||||||
train_loader = torch.utils.data.DataLoader(
|
train_loader = torch.utils.data.DataLoader(
|
||||||
datasets.MNIST("/tmp/mnist", train=True, download=True,
|
datasets.MNIST("/tmp/mnist", train=True, download=True,
|
||||||
|
|
@ -41,7 +41,7 @@ train_loader = torch.utils.data.DataLoader(
|
||||||
batch_size=batch_size, shuffle=True)
|
batch_size=batch_size, shuffle=True)
|
||||||
|
|
||||||
est = Estimator.from_torch(model=torch_model, optimizer=torch_optim, loss=torch_criterion)
|
est = Estimator.from_torch(model=torch_model, optimizer=torch_optim, loss=torch_criterion)
|
||||||
zoo_estimator.fit(data=train_loader)
|
est.fit(data=train_loader)
|
||||||
```
|
```
|
||||||
|
|
||||||
Under the hood, Orca will automatically replicate the _TensorFlow Dataset_ or _PyTorch DataLoader_ pipeline on each node in the cluster, shard the input data, and execute the data pipelines using Apache Spark and/or Ray distributedly.
|
Under the hood, Orca will automatically replicate the _TensorFlow Dataset_ or _PyTorch DataLoader_ pipeline on each node in the cluster, shard the input data, and execute the data pipelines using Apache Spark and/or Ray distributedly.
|
||||||
|
|
@ -101,7 +101,7 @@ est.fit(data=df,
|
||||||
|
|
||||||
```python
|
```python
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from zoo.orca.data import XShards
|
from bigdl.orca.data import XShards
|
||||||
|
|
||||||
train_images = np.random.random((20, 3, 224, 224))
|
train_images = np.random.random((20, 3, 224, 224))
|
||||||
train_label_images = np.zeros(20)
|
train_label_images = np.zeros(20)
|
||||||
|
|
@ -122,7 +122,7 @@ The user may use `XShards` to efficiently process large-size Pandas Dataframes i
|
||||||
|
|
||||||
First, the user can read CVS, JSON or Parquet files (stored on local disk, HDFS, AWS S3, etc.) to obtain an `XShards` of Pandas Dataframe, as shown below:
|
First, the user can read CVS, JSON or Parquet files (stored on local disk, HDFS, AWS S3, etc.) to obtain an `XShards` of Pandas Dataframe, as shown below:
|
||||||
```python
|
```python
|
||||||
from zoo.orca.data.pandas import read_csv
|
from bigdl.orca.data.pandas import read_csv
|
||||||
csv_path = "/path/to/csv_file_or_folder"
|
csv_path = "/path/to/csv_file_or_folder"
|
||||||
shard = read_csv(csv_path)
|
shard = read_csv(csv_path)
|
||||||
```
|
```
|
||||||
|
|
|
||||||
|
|
@ -163,7 +163,7 @@ View the related [Python API doc]() for more details.
|
||||||
|
|
||||||
The user may create a MXNet `Estimator` as follows:
|
The user may create a MXNet `Estimator` as follows:
|
||||||
```python
|
```python
|
||||||
from zoo.orca.learn.mxnet import Estimator, create_config
|
from bigdl.orca.learn.mxnet import Estimator, create_config
|
||||||
|
|
||||||
def get_model(config):
|
def get_model(config):
|
||||||
net = LeNet() # a mxnet.gluon.Block
|
net = LeNet() # a mxnet.gluon.Block
|
||||||
|
|
@ -200,10 +200,10 @@ View the related [Python API doc]() for more details.
|
||||||
|
|
||||||
The user may create a BigDL `Estimator` as follows:
|
The user may create a BigDL `Estimator` as follows:
|
||||||
```python
|
```python
|
||||||
from bigdl.nn.criterion import *
|
from bigdl.dllib.nn.criterion import *
|
||||||
from bigdl.nn.layer import *
|
from bigdl.dllib.nn.layer import *
|
||||||
from bigdl.optim.optimizer import *
|
from bigdl.dllib.optim.optimizer import *
|
||||||
from zoo.orca.learn.bigdl import Estimator
|
from bigdl.orca.learn.bigdl import Estimator
|
||||||
|
|
||||||
linear_model = Sequential().add(Linear(2, 2))
|
linear_model = Sequential().add(Linear(2, 2))
|
||||||
mse_criterion = MSECriterion()
|
mse_criterion = MSECriterion()
|
||||||
|
|
@ -230,7 +230,7 @@ View the related [Python API doc]() for more details.
|
||||||
|
|
||||||
The user may create a OpenVINO `Estimator` as follows:
|
The user may create a OpenVINO `Estimator` as follows:
|
||||||
```python
|
```python
|
||||||
from zoo.orca.learn.openvino import Estimator
|
from bigdl.orca.learn.openvino import Estimator
|
||||||
|
|
||||||
model_path = "The/file_path/to/the/OpenVINO_IR_xml_file"
|
model_path = "The/file_path/to/the/OpenVINO_IR_xml_file"
|
||||||
est = Estimator.from_openvino(model_path=model_path)
|
est = Estimator.from_openvino(model_path=model_path)
|
||||||
|
|
|
||||||
|
|
@ -10,7 +10,7 @@
|
||||||
An Orca program usually starts with the initialization of `OrcaContext` as follows:
|
An Orca program usually starts with the initialization of `OrcaContext` as follows:
|
||||||
|
|
||||||
```python
|
```python
|
||||||
from zoo.orca import init_orca_context
|
from bigdl.orca import init_orca_context
|
||||||
|
|
||||||
init_orca_context(...)
|
init_orca_context(...)
|
||||||
```
|
```
|
||||||
|
|
@ -47,7 +47,7 @@ Under the hood, `OrcaContext` will automatically provision Apache Spark and/or R
|
||||||
Users can easily retrieve `SparkContext` and `RayContext`, the main entry point for Spark and Ray respectively, via `OrcaContext`:
|
Users can easily retrieve `SparkContext` and `RayContext`, the main entry point for Spark and Ray respectively, via `OrcaContext`:
|
||||||
|
|
||||||
```python
|
```python
|
||||||
from zoo.orca import OrcaContext
|
from bigdl.orca import OrcaContext
|
||||||
|
|
||||||
sc = OrcaContext.get_spark_context()
|
sc = OrcaContext.get_spark_context()
|
||||||
ray_ctx = OrcaContext.get_ray_context()
|
ray_ctx = OrcaContext.get_ray_context()
|
||||||
|
|
@ -75,7 +75,7 @@ Users can make extra configurations when using the functionalities of Project Or
|
||||||
After the Orca program finishes, the user can call `stop_orca_context` to release resources and shut down the underlying Spark and/or Ray execution engine.
|
After the Orca program finishes, the user can call `stop_orca_context` to release resources and shut down the underlying Spark and/or Ray execution engine.
|
||||||
|
|
||||||
```python
|
```python
|
||||||
from zoo.orca import stop_orca_context
|
from bigdl.orca import stop_orca_context
|
||||||
|
|
||||||
stop_orca_context()
|
stop_orca_context()
|
||||||
```
|
```
|
||||||
|
|
|
||||||
|
|
@ -1,14 +1,43 @@
|
||||||
# The Orca Library
|
# The Orca Library
|
||||||
|
|
||||||
|
## 1. Overview
|
||||||
|
|
||||||
Most AI projects start with a Python notebook running on a single laptop; however, one usually needs to go through a mountain of pains to scale it to handle larger data set in a distributed fashion. The _**Orca**_ library seamlessly scales out your single node Python notebook across large clusters (so as to process distributed Big Data).
|
Most AI projects start with a Python notebook running on a single laptop; however, one usually needs to go through a mountain of pains to scale it to handle larger data set in a distributed fashion. The _**Orca**_ library seamlessly scales out your single node Python notebook across large clusters (so as to process distributed Big Data).
|
||||||
|
|
||||||
|
## 2. Install
|
||||||
|
We recommend using [conda](https://docs.conda.io/projects/conda/en/latest/user-guide/install/) to prepare the Python environment.
|
||||||
|
```bash
|
||||||
|
conda create -n py37 python=3.7 # "py37" is conda environment name, you can use any name you like.
|
||||||
|
conda activate py37
|
||||||
|
pip install bigdl-orca
|
||||||
|
```
|
||||||
|
|
||||||
|
When installing bigdl-orca with pip, you can specify the extras key `[ray]` to additionally install the additional dependencies
|
||||||
|
essential for running [RayOnSpark](../../Ray/Overview/ray.md)
|
||||||
|
```bash
|
||||||
|
pip install bigdl-orca[ray]
|
||||||
|
```
|
||||||
|
|
||||||
|
You can install bigdl-orca nightly release version using
|
||||||
|
```bash
|
||||||
|
pip install --pre --upgrade bigdl-orca
|
||||||
|
pip install --pre --upgrade bigdl-orca[ray]
|
||||||
|
```
|
||||||
|
|
||||||
|
## 3. Run
|
||||||
|
|
||||||
|
This section uses TensorFlow 1.15, and you should install TensorFlow before running this example:
|
||||||
|
```bash
|
||||||
|
pip install tensorflow==1.15
|
||||||
|
```
|
||||||
|
|
||||||
First, initialize [Orca Context](orca-context.md):
|
First, initialize [Orca Context](orca-context.md):
|
||||||
|
|
||||||
```python
|
```python
|
||||||
from zoo.orca import init_orca_context
|
from bigdl.orca import init_orca_context, OrcaContext
|
||||||
|
|
||||||
# cluster_mode can be "local", "k8s" or "yarn"
|
# cluster_mode can be "local", "k8s" or "yarn"
|
||||||
sc = init_orca_context(cluster_mode="yarn", cores=4, memory="10g", num_nodes=2)
|
sc = init_orca_context(cluster_mode="local", cores=4, memory="10g", num_nodes=1)
|
||||||
```
|
```
|
||||||
|
|
||||||
Next, perform [data-parallel processing in Orca](data-parallel-processing.md) (supporting standard Spark Dataframes, TensorFlow Dataset, PyTorch DataLoader, Pandas, etc.):
|
Next, perform [data-parallel processing in Orca](data-parallel-processing.md) (supporting standard Spark Dataframes, TensorFlow Dataset, PyTorch DataLoader, Pandas, etc.):
|
||||||
|
|
@ -16,6 +45,7 @@ Next, perform [data-parallel processing in Orca](data-parallel-processing.md) (s
|
||||||
```python
|
```python
|
||||||
from pyspark.sql.functions import array
|
from pyspark.sql.functions import array
|
||||||
|
|
||||||
|
spark = OrcaContext.get_spark_session()
|
||||||
df = spark.read.parquet(file_path)
|
df = spark.read.parquet(file_path)
|
||||||
df = df.withColumn('user', array('user')) \
|
df = df.withColumn('user', array('user')) \
|
||||||
.withColumn('item', array('item'))
|
.withColumn('item', array('item'))
|
||||||
|
|
@ -25,7 +55,7 @@ Finally, use [sklearn-style Estimator APIs in Orca](distributed-training-inferen
|
||||||
|
|
||||||
```python
|
```python
|
||||||
from tensorflow import keras
|
from tensorflow import keras
|
||||||
from zoo.orca.learn.tf.estimator import Estimator
|
from bigdl.orca.learn.tf.estimator import Estimator
|
||||||
|
|
||||||
user = keras.layers.Input(shape=[1])
|
user = keras.layers.Input(shape=[1])
|
||||||
item = keras.layers.Input(shape=[1])
|
item = keras.layers.Input(shape=[1])
|
||||||
|
|
@ -44,5 +74,7 @@ est.fit(data=df,
|
||||||
label_cols=['label'])
|
label_cols=['label'])
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## Get Started
|
||||||
|
|
||||||
See [TensorFlow](../QuickStart/orca-tf-quickstart.md) and [PyTorch](../QuickStart/orca-pytorch-quickstart.md) quickstart for more details.
|
See [TensorFlow](../QuickStart/orca-tf-quickstart.md) and [PyTorch](../QuickStart/orca-pytorch-quickstart.md) quickstart for more details.
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -2,7 +2,7 @@
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
[Run in Google Colab](https://colab.research.google.com/github/intel-analytics/analytics-zoo/blob/master/docs/docs/colab-notebook/orca/quickstart/keras_lenet_mnist.ipynb) [View source on GitHub](https://github.com/intel-analytics/analytics-zoo/blob/master/docs/docs/colab-notebook/orca/quickstart/keras_lenet_mnist.ipynb)
|
[Run in Google Colab](https://colab.research.google.com/github/intel-analytics/BigDL/blob/branch-2.0/docs/docs/colab-notebook/orca/quickstart/keras_lenet_mnist.ipynb) [View source on GitHub](https://github.com/intel-analytics/BigDL/blob/branch-2.0/docs/docs/colab-notebook/orca/quickstart/keras_lenet_mnist.ipynb)
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
|
|
@ -14,9 +14,9 @@
|
||||||
We recommend using [conda](https://docs.conda.io/projects/conda/en/latest/user-guide/install/) to prepare the environment. Please refer to the [install guide](../../UserGuide/python.md) for more details.
|
We recommend using [conda](https://docs.conda.io/projects/conda/en/latest/user-guide/install/) to prepare the environment. Please refer to the [install guide](../../UserGuide/python.md) for more details.
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
conda create -n zoo python=3.7 # "zoo" is conda environment name, you can use any name you like.
|
conda create -n py37 python=3.7 # "py37" is conda environment name, you can use any name you like.
|
||||||
conda activate zoo
|
conda activate py37
|
||||||
pip install analytics_zoo-${VERSION} # install either version 0.9 or latest nightly build
|
pip install bigdl-orca
|
||||||
pip install tensorflow==1.15.0
|
pip install tensorflow==1.15.0
|
||||||
pip install tensorflow-datasets==2.1.0
|
pip install tensorflow-datasets==2.1.0
|
||||||
pip install psutil
|
pip install psutil
|
||||||
|
|
@ -26,7 +26,7 @@ pip install scikit-learn
|
||||||
|
|
||||||
### **Step 1: Init Orca Context**
|
### **Step 1: Init Orca Context**
|
||||||
```python
|
```python
|
||||||
from zoo.orca import init_orca_context, stop_orca_context
|
from bigdl.orca import init_orca_context, stop_orca_context
|
||||||
|
|
||||||
if cluster_mode == "local": # For local machine
|
if cluster_mode == "local": # For local machine
|
||||||
init_orca_context(cluster_mode="local", cores=4, memory="10g")
|
init_orca_context(cluster_mode="local", cores=4, memory="10g")
|
||||||
|
|
@ -60,7 +60,7 @@ model = keras.Sequential(
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
|
|
||||||
model.compile(optimizer=tf.keras.optimizers.RMSprop(),
|
model.compile(optimizer=keras.optimizers.RMSprop(),
|
||||||
loss='sparse_categorical_crossentropy',
|
loss='sparse_categorical_crossentropy',
|
||||||
metrics=['accuracy'])
|
metrics=['accuracy'])
|
||||||
```
|
```
|
||||||
|
|
@ -77,6 +77,7 @@ def preprocess(data):
|
||||||
return data['image'], data['label']
|
return data['image'], data['label']
|
||||||
|
|
||||||
# get DataSet
|
# get DataSet
|
||||||
|
dataset_dir = "./mnist_data"
|
||||||
mnist_train = tfds.load(name="mnist", split="train", data_dir=dataset_dir)
|
mnist_train = tfds.load(name="mnist", split="train", data_dir=dataset_dir)
|
||||||
mnist_test = tfds.load(name="mnist", split="test", data_dir=dataset_dir)
|
mnist_test = tfds.load(name="mnist", split="test", data_dir=dataset_dir)
|
||||||
|
|
||||||
|
|
@ -89,7 +90,7 @@ mnist_test = mnist_test.map(preprocess)
|
||||||
First, create an Estimator.
|
First, create an Estimator.
|
||||||
|
|
||||||
```python
|
```python
|
||||||
from zoo.orca.learn.tf.estimator import Estimator
|
from bigdl.orca.learn.tf.estimator import Estimator
|
||||||
|
|
||||||
est = Estimator.from_keras(keras_model=model)
|
est = Estimator.from_keras(keras_model=model)
|
||||||
```
|
```
|
||||||
|
|
|
||||||
|
|
@ -2,7 +2,7 @@
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
[Run in Google Colab](https://colab.research.google.com/github/intel-analytics/analytics-zoo/blob/master/docs/docs/colab-notebook/orca/quickstart/pytorch_distributed_lenet_mnist.ipynb) [View source on GitHub](https://github.com/intel-analytics/analytics-zoo/blob/master/docs/docs/colab-notebook/orca/quickstart/pytorch_distributed_lenet_mnist.ipynb)
|
[Run in Google Colab](https://colab.research.google.com/github/intel-analytics/BigDL/blob/branch-2.0/docs/docs/colab-notebook/orca/quickstart/pytorch_distributed_lenet_mnist.ipynb) [View source on GitHub](https://github.com/intel-analytics/BigDL/blob/branch-2.0/docs/docs/colab-notebook/orca/quickstart/pytorch_distributed_lenet_mnist.ipynb)
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
|
|
@ -13,15 +13,15 @@
|
||||||
[Conda](https://docs.conda.io/projects/conda/en/latest/user-guide/install/) is needed to prepare the Python environment for running this example. Please refer to the [install guide](../../UserGuide/python.md) for more details.
|
[Conda](https://docs.conda.io/projects/conda/en/latest/user-guide/install/) is needed to prepare the Python environment for running this example. Please refer to the [install guide](../../UserGuide/python.md) for more details.
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
conda create -n zoo python=3.7 # zoo is conda environment name, you can use any name you like.
|
conda create -n py37 python=3.7 # "py37" is conda environment name, you can use any name you like.
|
||||||
conda activate zoo
|
conda activate py37
|
||||||
pip install analytics-zoo[ray]
|
pip install bigdl-orca[ray]
|
||||||
pip install torch==1.7.1 torchvision==0.8.2
|
pip install torch==1.7.1 torchvision==0.8.2
|
||||||
```
|
```
|
||||||
|
|
||||||
### **Step 1: Init Orca Context**
|
### **Step 1: Init Orca Context**
|
||||||
```python
|
```python
|
||||||
from zoo.orca import init_orca_context, stop_orca_context
|
from bigdl.orca import init_orca_context, stop_orca_context
|
||||||
|
|
||||||
if cluster_mode == "local": # For local machine
|
if cluster_mode == "local": # For local machine
|
||||||
init_orca_context(cores=4, memory="10g")
|
init_orca_context(cores=4, memory="10g")
|
||||||
|
|
@ -114,8 +114,8 @@ def test_loader_creator(config, batch_size):
|
||||||
First, Create an Estimator
|
First, Create an Estimator
|
||||||
|
|
||||||
```python
|
```python
|
||||||
from zoo.orca.learn.pytorch import Estimator
|
from bigdl.orca.learn.pytorch import Estimator
|
||||||
from zoo.orca.learn.metrics import Accuracy
|
from bigdl.orca.learn.metrics import Accuracy
|
||||||
|
|
||||||
est = Estimator.from_torch(model=model_creator, optimizer=optim_creator, loss=criterion, metrics=[Accuracy()],
|
est = Estimator.from_torch(model=model_creator, optimizer=optim_creator, loss=criterion, metrics=[Accuracy()],
|
||||||
backend="torch_distributed")
|
backend="torch_distributed")
|
||||||
|
|
|
||||||
|
|
@ -2,7 +2,7 @@
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
[Run in Google Colab](https://colab.research.google.com/github/intel-analytics/analytics-zoo/blob/master/docs/docs/colab-notebook/orca/quickstart/pytorch_lenet_mnist.ipynb) [View source on GitHub](https://github.com/intel-analytics/analytics-zoo/blob/master/docs/docs/colab-notebook/orca/quickstart/pytorch_lenet_mnist.ipynb)
|
[Run in Google Colab](https://colab.research.google.com/github/intel-analytics/BigDL/blob/branch-2.0/docs/docs/colab-notebook/orca/quickstart/pytorch_lenet_mnist.ipynb) [View source on GitHub](https://github.com/intel-analytics/BigDL/blob/branch-2.0/docs/docs/colab-notebook/orca/quickstart/pytorch_lenet_mnist.ipynb)
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
|
|
@ -14,9 +14,9 @@
|
||||||
|
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
conda create -n zoo python=3.7 # zoo is conda environment name, you can use any name you like.
|
conda create -n py37 python=3.7 # "py37" is conda environment name, you can use any name you like.
|
||||||
conda activate zoo
|
conda activate py37
|
||||||
pip install analytics-zoo # install either version 0.9 or latest nightly build
|
pip install bigdl-orca
|
||||||
pip install torch==1.7.1 torchvision==0.8.2
|
pip install torch==1.7.1 torchvision==0.8.2
|
||||||
pip install six cloudpickle
|
pip install six cloudpickle
|
||||||
pip install jep==3.9.0
|
pip install jep==3.9.0
|
||||||
|
|
@ -24,7 +24,7 @@ pip install jep==3.9.0
|
||||||
|
|
||||||
### **Step 1: Init Orca Context**
|
### **Step 1: Init Orca Context**
|
||||||
```python
|
```python
|
||||||
from zoo.orca import init_orca_context, stop_orca_context
|
from bigdl.orca import init_orca_context, stop_orca_context
|
||||||
|
|
||||||
if cluster_mode == "local": # For local machine
|
if cluster_mode == "local": # For local machine
|
||||||
init_orca_context(cores=4, memory="10g")
|
init_orca_context(cores=4, memory="10g")
|
||||||
|
|
@ -105,15 +105,15 @@ test_loader = torch.utils.data.DataLoader(
|
||||||
batch_size=test_batch_size, shuffle=False)
|
batch_size=test_batch_size, shuffle=False)
|
||||||
```
|
```
|
||||||
|
|
||||||
Alternatively, we can also use a [Data Creator Function](https://github.com/intel-analytics/analytics-zoo/blob/master/docs/docs/colab-notebook/orca/quickstart/pytorch_lenet_mnist_data_creator_func.ipynb) or [Orca XShards](../Overview/data-parallel-processing) as the input data, especially when the data size is very large)
|
Alternatively, we can also use a [Data Creator Function](https://github.com/intel-analytics/BigDL/blob/branch-2.0/docs/docs/colab-notebook/orca/quickstart/pytorch_lenet_mnist_data_creator_func.ipynb) or [Orca XShards](../Overview/data-parallel-processing) as the input data, especially when the data size is very large)
|
||||||
|
|
||||||
### **Step 4: Fit with Orca Estimator**
|
### **Step 4: Fit with Orca Estimator**
|
||||||
|
|
||||||
First, Create an Estimator
|
First, Create an Estimator
|
||||||
|
|
||||||
```python
|
```python
|
||||||
from zoo.orca.learn.pytorch import Estimator
|
from bigdl.orca.learn.pytorch import Estimator
|
||||||
from zoo.orca.learn.metrics import Accuracy
|
from bigdl.orca.learn.metrics import Accuracy
|
||||||
|
|
||||||
est = Estimator.from_torch(model=model, optimizer=adam, loss=criterion, metrics=[Accuracy()])
|
est = Estimator.from_torch(model=model, optimizer=adam, loss=criterion, metrics=[Accuracy()])
|
||||||
```
|
```
|
||||||
|
|
@ -121,7 +121,7 @@ est = Estimator.from_torch(model=model, optimizer=adam, loss=criterion, metrics=
|
||||||
Next, fit and evaluate using the Estimator
|
Next, fit and evaluate using the Estimator
|
||||||
|
|
||||||
```python
|
```python
|
||||||
from zoo.orca.learn.trigger import EveryEpoch
|
from bigdl.orca.learn.trigger import EveryEpoch
|
||||||
|
|
||||||
est.fit(data=train_loader, epochs=10, validation_data=test_loader,
|
est.fit(data=train_loader, epochs=10, validation_data=test_loader,
|
||||||
checkpoint_trigger=EveryEpoch())
|
checkpoint_trigger=EveryEpoch())
|
||||||
|
|
|
||||||
|
|
@ -2,7 +2,7 @@
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
[Run in Google Colab](https://colab.research.google.com/github/intel-analytics/analytics-zoo/blob/master/docs/docs/colab-notebook/orca/quickstart/tf_lenet_mnist.ipynb) [View source on GitHub](https://github.com/intel-analytics/analytics-zoo/blob/master/docs/docs/colab-notebook/orca/quickstart/tf_lenet_mnist.ipynb)
|
[Run in Google Colab](https://colab.research.google.com/github/intel-analytics/BigDL/blob/branch-2.0/docs/docs/colab-notebook/orca/quickstart/tf_lenet_mnist.ipynb) [View source on GitHub](https://github.com/intel-analytics/BigDL/blob/branch-2.0/docs/docs/colab-notebook/orca/quickstart/tf_lenet_mnist.ipynb)
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
|
|
@ -14,17 +14,17 @@ We recommend using [conda](https://docs.conda.io/projects/conda/en/latest/user-g
|
||||||
|
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
conda create -n zoo python=3.7 # "zoo" is conda environment name, you can use any name you like.
|
conda create -n py37 python=3.7 # "py37" is conda environment name, you can use any name you like.
|
||||||
conda activate zoo
|
conda activate py37
|
||||||
pip install analytics-zoo # install either version 0.9 or latest nightly build
|
pip install bigdl-orca
|
||||||
pip install tensorflow==1.15.0
|
pip install tensorflow==1.15
|
||||||
pip install tensorflow-datasets==2.0
|
pip install tensorflow-datasets==2.0
|
||||||
pip install psutil
|
pip install psutil
|
||||||
```
|
```
|
||||||
|
|
||||||
### **Step 1: Init Orca Context**
|
### **Step 1: Init Orca Context**
|
||||||
```python
|
```python
|
||||||
from zoo.orca import init_orca_context, stop_orca_context
|
from bigdl.orca import init_orca_context, stop_orca_context
|
||||||
|
|
||||||
if cluster_mode == "local": # For local machine
|
if cluster_mode == "local": # For local machine
|
||||||
init_orca_context(cluster_mode="local", cores=4, memory="10g")
|
init_orca_context(cluster_mode="local", cores=4, memory="10g")
|
||||||
|
|
@ -83,6 +83,7 @@ def preprocess(data):
|
||||||
return data['image'], data['label']
|
return data['image'], data['label']
|
||||||
|
|
||||||
# get DataSet
|
# get DataSet
|
||||||
|
dataset_dir = "./mnist_data"
|
||||||
mnist_train = tfds.load(name="mnist", split="train", data_dir=dataset_dir)
|
mnist_train = tfds.load(name="mnist", split="train", data_dir=dataset_dir)
|
||||||
mnist_test = tfds.load(name="mnist", split="test", data_dir=dataset_dir)
|
mnist_test = tfds.load(name="mnist", split="test", data_dir=dataset_dir)
|
||||||
|
|
||||||
|
|
@ -95,7 +96,7 @@ mnist_test = mnist_test.map(preprocess)
|
||||||
First, create an Estimator.
|
First, create an Estimator.
|
||||||
|
|
||||||
```python
|
```python
|
||||||
from zoo.orca.learn.tf.estimator import Estimator
|
from bigdl.orca.learn.tf.estimator import Estimator
|
||||||
|
|
||||||
est = Estimator.from_graph(inputs=images,
|
est = Estimator.from_graph(inputs=images,
|
||||||
outputs=logits,
|
outputs=logits,
|
||||||
|
|
@ -107,7 +108,7 @@ est = Estimator.from_graph(inputs=images,
|
||||||
|
|
||||||
Next, fit and evaluate using the Estimator.
|
Next, fit and evaluate using the Estimator.
|
||||||
```python
|
```python
|
||||||
est.fit(data=train_dataset,
|
est.fit(data=mnist_train,
|
||||||
batch_size=320,
|
batch_size=320,
|
||||||
epochs=5,
|
epochs=5,
|
||||||
validation_data=mnist_test)
|
validation_data=mnist_test)
|
||||||
|
|
|
||||||
|
|
@ -2,7 +2,7 @@
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
[Run in Google Colab](https://colab.research.google.com/github/intel-analytics/analytics-zoo/blob/master/docs/docs/colab-notebook/orca/quickstart/tf2_keras_lenet_mnist.ipynb) [View source on GitHub](https://github.com/intel-analytics/analytics-zoo/blob/master/docs/docs/colab-notebook/orca/quickstart/tf2_keras_lenet_mnist.ipynb)
|
[Run in Google Colab](https://colab.research.google.com/github/intel-analytics/BigDL/blob/branch-2.0/docs/docs/colab-notebook/orca/quickstart/tf2_keras_lenet_mnist.ipynb) [View source on GitHub](https://github.com/intel-analytics/BigDL/blob/branch-2.0/docs/docs/colab-notebook/orca/quickstart/tf2_keras_lenet_mnist.ipynb)
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
|
|
@ -13,15 +13,15 @@
|
||||||
We recommend using [conda](https://docs.conda.io/projects/conda/en/latest/user-guide/install/) to prepare the environment. Please refer to the [install guide](../../UserGuide/python.md) for more details.
|
We recommend using [conda](https://docs.conda.io/projects/conda/en/latest/user-guide/install/) to prepare the environment. Please refer to the [install guide](../../UserGuide/python.md) for more details.
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
conda create -n zoo python=3.7 # "zoo" is conda environment name, you can use any name you like.
|
conda create -n py37 python=3.7 # "py37" is conda environment name, you can use any name you like.
|
||||||
conda activate zoo
|
conda activate py37
|
||||||
pip install analytics-zoo[ray] # install either version 0.9 or latest nightly build
|
pip install bigdl-orca[ray]
|
||||||
pip install tensorflow==2.3.0
|
pip install tensorflow==2.3.0
|
||||||
```
|
```
|
||||||
|
|
||||||
### **Step 1: Init Orca Context**
|
### **Step 1: Init Orca Context**
|
||||||
```python
|
```python
|
||||||
from zoo.orca import init_orca_context, stop_orca_context
|
from bigdl.orca import init_orca_context, stop_orca_context
|
||||||
|
|
||||||
if cluster_mode == "local": # For local machine
|
if cluster_mode == "local": # For local machine
|
||||||
init_orca_context(cluster_mode="local", cores=4, memory="10g")
|
init_orca_context(cluster_mode="local", cores=4, memory="10g")
|
||||||
|
|
@ -96,7 +96,7 @@ def val_data_creator(config, batch_size):
|
||||||
First, create an Estimator.
|
First, create an Estimator.
|
||||||
|
|
||||||
```python
|
```python
|
||||||
from zoo.orca.learn.tf2 import Estimator
|
from bigdl.orca.learn.tf2 import Estimator
|
||||||
|
|
||||||
est = Estimator.from_keras(model_creator=model_creator, workers_per_node=2)
|
est = Estimator.from_keras(model_creator=model_creator, workers_per_node=2)
|
||||||
```
|
```
|
||||||
|
|
@ -118,6 +118,6 @@ est.shutdown()
|
||||||
print(stats)
|
print(stats)
|
||||||
```
|
```
|
||||||
|
|
||||||
That's it, the same code can run seamlessly in your local laptop and the distribute K8s or Hadoop cluster.
|
That's it, the same code can run seamlessly in your local laptop and to distribute K8s or Hadoop cluster.
|
||||||
|
|
||||||
**Note:** You should call `stop_orca_context()` when your program finishes.
|
**Note:** You should call `stop_orca_context()` when your program finishes.
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue