parent
							
								
									274eddfb1a
								
							
						
					
					
						commit
						6a4c40fc1f
					
				
					 2 changed files with 36 additions and 35 deletions
				
			
		| 
						 | 
					@ -10,12 +10,13 @@
 | 
				
			||||||
 | 
					
 | 
				
			||||||
### Step 0: Prepare Environment
 | 
					### Step 0: Prepare Environment
 | 
				
			||||||
 | 
					
 | 
				
			||||||
[Conda](https://docs.conda.io/projects/conda/en/latest/user-guide/install/) is needed to prepare the Python environment for running this example. Please refer to the [install guide](../../UserGuide/python.md) for more details.
 | 
					We recommend using [conda](https://docs.conda.io/projects/conda/en/latest/user-guide/install/) to prepare the environment. Please refer to the [install guide](../Overview/install.md) for more details.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
```bash
 | 
					```bash
 | 
				
			||||||
conda create -n py37 python=3.7  # "py37" is conda environment name, you can use any name you like.
 | 
					conda create -n py37 python=3.7  # "py37" is conda environment name, you can use any name you like.
 | 
				
			||||||
conda activate py37
 | 
					conda activate py37
 | 
				
			||||||
pip install --pre --upgrade bigdl-orca 
 | 
					
 | 
				
			||||||
 | 
					pip install bigdl-orca 
 | 
				
			||||||
pip install torch torchvision
 | 
					pip install torch torchvision
 | 
				
			||||||
pip install tqdm
 | 
					pip install tqdm
 | 
				
			||||||
```
 | 
					```
 | 
				
			||||||
| 
						 | 
					@ -24,18 +25,17 @@ pip install tqdm
 | 
				
			||||||
```python
 | 
					```python
 | 
				
			||||||
from bigdl.orca import init_orca_context, stop_orca_context
 | 
					from bigdl.orca import init_orca_context, stop_orca_context
 | 
				
			||||||
 | 
					
 | 
				
			||||||
cluster_mode = "local"
 | 
					 | 
				
			||||||
if cluster_mode == "local":  # For local machine
 | 
					if cluster_mode == "local":  # For local machine
 | 
				
			||||||
    init_orca_context(cores=4, memory="10g")
 | 
					    init_orca_context(cores=4, memory="4g")
 | 
				
			||||||
elif cluster_mode == "k8s":  # For K8s cluster
 | 
					elif cluster_mode == "k8s":  # For K8s cluster
 | 
				
			||||||
    init_orca_context(cluster_mode="k8s", num_nodes=2, cores=2, memory="10g", driver_memory="10g", driver_cores=1)
 | 
					    init_orca_context(cluster_mode="k8s", num_nodes=2, cores=2, memory="4g", master=..., container_image=...)
 | 
				
			||||||
elif cluster_mode == "yarn":  # For Hadoop/YARN cluster
 | 
					elif cluster_mode == "yarn":  # For Hadoop/YARN cluster
 | 
				
			||||||
    init_orca_context(cluster_mode="yarn", num_nodes=2, cores=2, memory="10g", driver_memory="10g", driver_cores=1)
 | 
					    init_orca_context(cluster_mode="yarn", num_nodes=2, cores=2, memory="4g")
 | 
				
			||||||
```
 | 
					```
 | 
				
			||||||
 | 
					
 | 
				
			||||||
This is the only place where you need to specify local or distributed mode. View [Orca Context](../Overview/orca-context.md) for more details.
 | 
					This is the only place where you need to specify local or distributed mode. View [Orca Context](../Overview/orca-context.md) for more details.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
**Note:** You should `export HADOOP_CONF_DIR=/path/to/hadoop/conf/dir` when running on Hadoop YARN cluster. View [Hadoop User Guide](../../UserGuide/hadoop.md) for more details.
 | 
					Please check the tutorials if you want to run on [Kubernetes](../Tutorial/k8s.md) or [Hadoop/YARN](../Tutorial/yarn.md) clusters.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
### Step 2: Define the Model
 | 
					### Step 2: Define the Model
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -63,8 +63,11 @@ class LeNet(nn.Module):
 | 
				
			||||||
        x = F.relu(self.fc1(x))
 | 
					        x = F.relu(self.fc1(x))
 | 
				
			||||||
        x = self.fc2(x)
 | 
					        x = self.fc2(x)
 | 
				
			||||||
        return F.log_softmax(x, dim=1)
 | 
					        return F.log_softmax(x, dim=1)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					loss = nn.NLLLoss()
 | 
				
			||||||
```
 | 
					```
 | 
				
			||||||
After defining your model, you need to define a *Model Creator Function* that takes the parameter `config` and returns an instance of your model, and a *Optimizer Creator Function* that has two parameters `model` and `config` and returns a PyTorch optimizer.
 | 
					
 | 
				
			||||||
 | 
					You need to define a *Model Creator Function* that takes the parameter `config` and returns an instance of your PyTorch model, and an *Optimizer Creator Function* that takes two parameters `model` and `config` and returns an instance of your PyTorch optimizer.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
```python
 | 
					```python
 | 
				
			||||||
def model_creator(config):
 | 
					def model_creator(config):
 | 
				
			||||||
| 
						 | 
					@ -72,18 +75,16 @@ def model_creator(config):
 | 
				
			||||||
    return model
 | 
					    return model
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def optim_creator(model, config):
 | 
					def optim_creator(model, config):
 | 
				
			||||||
    return torch.optim.Adam(model.parameters(), lr=0.001)
 | 
					    return torch.optim.Adam(model.parameters(), lr=config.get("lr", 0.001))
 | 
				
			||||||
```
 | 
					```
 | 
				
			||||||
 | 
					
 | 
				
			||||||
### Step 3: Define Train Dataset
 | 
					### Step 3: Define Train Dataset
 | 
				
			||||||
 | 
					
 | 
				
			||||||
You can define the dataset using a *Data Creator Function* that has two parameters `config` and `batch_size` and returns a [Pytorch DataLoader](https://pytorch.org/docs/stable/data.html). Orca also supports [Spark DataFrames](../Overview/data-parallel-processing.html#spark-dataframes) and [XShards](../Overview/data-parallel-processing.html#xshards-distributed-data-parallel-python-processing).
 | 
					You can define the dataset using a *Data Creator Function* that has two parameters `config` and `batch_size` and returns a [Pytorch DataLoader](https://pytorch.org/docs/stable/data.html). Orca also supports [Spark DataFrame](./spark-dataframe.md) and [Orca XShards](./xshards-pandas.md).
 | 
				
			||||||
 | 
					
 | 
				
			||||||
```python
 | 
					```python
 | 
				
			||||||
import torch
 | 
					 | 
				
			||||||
from torchvision import datasets, transforms
 | 
					from torchvision import datasets, transforms
 | 
				
			||||||
 | 
					
 | 
				
			||||||
batch_size = 64
 | 
					 | 
				
			||||||
dir = '/tmp/dataset'
 | 
					dir = '/tmp/dataset'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def train_loader_creator(config, batch_size):
 | 
					def train_loader_creator(config, batch_size):
 | 
				
			||||||
| 
						 | 
					@ -109,22 +110,24 @@ def test_loader_creator(config, batch_size):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
### Step 4: Fit with Orca Estimator
 | 
					### Step 4: Fit with Orca Estimator
 | 
				
			||||||
 | 
					
 | 
				
			||||||
First, Create an Estimator
 | 
					First, Create an Orca Estimator for PyTorch.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
```python
 | 
					```python
 | 
				
			||||||
from bigdl.orca.learn.pytorch import Estimator 
 | 
					from bigdl.orca.learn.pytorch import Estimator 
 | 
				
			||||||
from bigdl.orca.learn.metrics import Accuracy
 | 
					from bigdl.orca.learn.metrics import Accuracy
 | 
				
			||||||
 | 
					
 | 
				
			||||||
est = Estimator.from_torch(model=model_creator, optimizer=optim_creator, loss=nn.NLLLoss(), metrics=[Accuracy()], use_tqdm=True)
 | 
					est = Estimator.from_torch(model=model_creator, optimizer=optim_creator, loss=loss,
 | 
				
			||||||
 | 
					                           metrics=[Accuracy()], use_tqdm=True)
 | 
				
			||||||
```
 | 
					```
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Next, fit and evaluate using the Estimator
 | 
					Next, fit and evaluate using the Estimator.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
```python
 | 
					```python
 | 
				
			||||||
est.fit(data=train_loader_creator, epochs=1, batch_size=batch_size)
 | 
					batch_size = 64
 | 
				
			||||||
result = est.evaluate(data=test_loader_creator, batch_size=batch_size)
 | 
					
 | 
				
			||||||
for r in result:
 | 
					train_stats = est.fit(data=train_loader_creator, epochs=1, batch_size=batch_size)
 | 
				
			||||||
    print(r, ":", result[r])
 | 
					eval_stats = est.evaluate(data=test_loader_creator, batch_size=batch_size)
 | 
				
			||||||
 | 
					print(eval_stats)
 | 
				
			||||||
```
 | 
					```
 | 
				
			||||||
 | 
					
 | 
				
			||||||
### Step 5: Save and Load the Model
 | 
					### Step 5: Save and Load the Model
 | 
				
			||||||
| 
						 | 
					@ -141,3 +144,5 @@ est.load("mnist_model")
 | 
				
			||||||
```
 | 
					```
 | 
				
			||||||
 | 
					
 | 
				
			||||||
**Note:** You should call `stop_orca_context()` when your application finishes.
 | 
					**Note:** You should call `stop_orca_context()` when your application finishes.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					That's it, the same code can run seamlessly on your local laptop and scale to [Kubernetes](../Tutorial/k8s.md) or [Hadoop/YARN](../Tutorial/yarn.md) clusters.
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -27,7 +27,7 @@ from bigdl.orca import init_orca_context, stop_orca_context
 | 
				
			||||||
if cluster_mode == "local":  # For local machine
 | 
					if cluster_mode == "local":  # For local machine
 | 
				
			||||||
    init_orca_context(cluster_mode="local", cores=4, memory="4g")
 | 
					    init_orca_context(cluster_mode="local", cores=4, memory="4g")
 | 
				
			||||||
elif cluster_mode == "k8s":  # For K8s cluster
 | 
					elif cluster_mode == "k8s":  # For K8s cluster
 | 
				
			||||||
    init_orca_context(cluster_mode="k8s", num_nodes=2, cores=2, memory="4g")
 | 
					    init_orca_context(cluster_mode="k8s", num_nodes=2, cores=2, memory="4g", master=..., container_image=...)
 | 
				
			||||||
elif cluster_mode == "yarn":  # For Hadoop/YARN cluster
 | 
					elif cluster_mode == "yarn":  # For Hadoop/YARN cluster
 | 
				
			||||||
    init_orca_context(cluster_mode="yarn", num_nodes=2, cores=2, memory="4g")
 | 
					    init_orca_context(cluster_mode="yarn", num_nodes=2, cores=2, memory="4g")
 | 
				
			||||||
```
 | 
					```
 | 
				
			||||||
| 
						 | 
					@ -38,7 +38,7 @@ Please check the tutorials if you want to run on [Kubernetes](../Tutorial/k8s.md
 | 
				
			||||||
 | 
					
 | 
				
			||||||
### Step 2: Define the Model
 | 
					### Step 2: Define the Model
 | 
				
			||||||
 | 
					
 | 
				
			||||||
You can then define the Keras model in the _Creator Function_ using the standard TensorFlow 2 Keras APIs.
 | 
					You can then define and compile the Keras model in the _Creator Function_ using the standard TensorFlow 2 Keras APIs.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
```python
 | 
					```python
 | 
				
			||||||
import tensorflow as tf
 | 
					import tensorflow as tf
 | 
				
			||||||
| 
						 | 
					@ -74,7 +74,6 @@ def preprocess(x, y):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def train_data_creator(config, batch_size):
 | 
					def train_data_creator(config, batch_size):
 | 
				
			||||||
    (train_feature, train_label), _ = tf.keras.datasets.mnist.load_data()
 | 
					    (train_feature, train_label), _ = tf.keras.datasets.mnist.load_data()
 | 
				
			||||||
 | 
					 | 
				
			||||||
    dataset = tf.data.Dataset.from_tensor_slices((train_feature, train_label))
 | 
					    dataset = tf.data.Dataset.from_tensor_slices((train_feature, train_label))
 | 
				
			||||||
    dataset = dataset.repeat()
 | 
					    dataset = dataset.repeat()
 | 
				
			||||||
    dataset = dataset.map(preprocess)
 | 
					    dataset = dataset.map(preprocess)
 | 
				
			||||||
| 
						 | 
					@ -84,7 +83,6 @@ def train_data_creator(config, batch_size):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def val_data_creator(config, batch_size):
 | 
					def val_data_creator(config, batch_size):
 | 
				
			||||||
    _, (val_feature, val_label) = tf.keras.datasets.mnist.load_data()
 | 
					    _, (val_feature, val_label) = tf.keras.datasets.mnist.load_data()
 | 
				
			||||||
 | 
					 | 
				
			||||||
    dataset = tf.data.Dataset.from_tensor_slices((val_feature, val_label))
 | 
					    dataset = tf.data.Dataset.from_tensor_slices((val_feature, val_label))
 | 
				
			||||||
    dataset = dataset.repeat()
 | 
					    dataset = dataset.repeat()
 | 
				
			||||||
    dataset = dataset.map(preprocess)
 | 
					    dataset = dataset.map(preprocess)
 | 
				
			||||||
| 
						 | 
					@ -105,17 +103,15 @@ est = Estimator.from_keras(model_creator=model_creator, workers_per_node=2)
 | 
				
			||||||
Next, fit and evaluate using the Estimator. 
 | 
					Next, fit and evaluate using the Estimator. 
 | 
				
			||||||
```python
 | 
					```python
 | 
				
			||||||
batch_size = 320
 | 
					batch_size = 320
 | 
				
			||||||
stats = est.fit(train_data_creator,
 | 
					train_stats = est.fit(train_data_creator,
 | 
				
			||||||
                      epochs=5,
 | 
					                      epochs=5,
 | 
				
			||||||
                      batch_size=batch_size,
 | 
					                      batch_size=batch_size,
 | 
				
			||||||
                      steps_per_epoch=60000 // batch_size,
 | 
					                      steps_per_epoch=60000 // batch_size,
 | 
				
			||||||
                      validation_data=val_data_creator,
 | 
					                      validation_data=val_data_creator,
 | 
				
			||||||
                      validation_steps=10000 // batch_size)
 | 
					                      validation_steps=10000 // batch_size)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
stats = est.evaluate(val_data_creator, num_steps=10000 // batch_size)
 | 
					eval_stats = est.evaluate(val_data_creator, num_steps=10000 // batch_size)
 | 
				
			||||||
print(stats)
 | 
					print(eval_stats)
 | 
				
			||||||
 | 
					 | 
				
			||||||
est.shutdown()
 | 
					 | 
				
			||||||
```
 | 
					```
 | 
				
			||||||
 | 
					
 | 
				
			||||||
### Step 5: Save and Load the Model
 | 
					### Step 5: Save and Load the Model
 | 
				
			||||||
| 
						 | 
					@ -146,6 +142,6 @@ est.save("lenet_model.h5", save_format='h5')
 | 
				
			||||||
est.load("lenet_model.h5")
 | 
					est.load("lenet_model.h5")
 | 
				
			||||||
```
 | 
					```
 | 
				
			||||||
 | 
					
 | 
				
			||||||
That's it, the same code can run seamlessly on your local laptop and scale to [Kubernetes](../Tutorial/k8s.md) or [Hadoop/YARN](../Tutorial/yarn.md) clusters.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
**Note:** You should call `stop_orca_context()` when your program finishes.
 | 
					**Note:** You should call `stop_orca_context()` when your program finishes.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					That's it, the same code can run seamlessly on your local laptop and scale to [Kubernetes](../Tutorial/k8s.md) or [Hadoop/YARN](../Tutorial/yarn.md) clusters.
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue