tf quickstart yarn integration test (#3671)

This commit is contained in:
Le-Zheng 2021-12-08 02:10:21 +00:00 committed by GitHub
parent edaef4aa4f
commit aa157429d9
2 changed files with 6 additions and 5 deletions

View file

@ -30,15 +30,17 @@ from bigdl.orca import init_orca_context, stop_orca_context
if cluster_mode == "local": # For local machine if cluster_mode == "local": # For local machine
init_orca_context(cluster_mode="local", cores=4, memory="10g") init_orca_context(cluster_mode="local", cores=4, memory="10g")
dataset_dir = "~/tensorflow_datasets"
elif cluster_mode == "k8s": # For K8s cluster elif cluster_mode == "k8s": # For K8s cluster
init_orca_context(cluster_mode="k8s", num_nodes=2, cores=2, memory="10g", driver_memory="10g", driver_cores=1) init_orca_context(cluster_mode="k8s", num_nodes=2, cores=2, memory="10g", driver_memory="10g", driver_cores=1)
elif cluster_mode == "yarn": # For Hadoop/YARN cluster elif cluster_mode == "yarn": # For Hadoop/YARN cluster
init_orca_context(cluster_mode="yarn", num_nodes=2, cores=2, memory="10g", driver_memory="10g", driver_cores=1) init_orca_context(cluster_mode="yarn", num_nodes=2, cores=2, memory="10g", driver_memory="10g", driver_cores=1)
dataset_dir = "hdfs:///tensorflow_datasets"
``` ```
This is the only place where you need to specify local or distributed mode. View [Orca Context](./../Overview/orca-context.md) for more details. This is the only place where you need to specify local or distributed mode. View [Orca Context](./../Overview/orca-context.md) for more details.
**Note:** You should `export HADOOP_CONF_DIR=/path/to/hadoop/conf/dir` when running on Hadoop YARN cluster. View [Hadoop User Guide](./../../UserGuide/hadoop.md) for more details. **Note:** You should `export HADOOP_CONF_DIR=/path/to/hadoop/conf/dir` when running on Hadoop YARN cluster. View [Hadoop User Guide](./../../UserGuide/hadoop.md) for more details. To use tensorflow_datasets on HDFS, you should correctly set HADOOP_HOME, HADOOP_HDFS_HOME, LD_LIBRARY_PATH, etc. For more details, please refer to TensorFlow documentation [link](https://github.com/tensorflow/docs/blob/r1.11/site/en/deploy/hadoop.md).
### **Step 2: Define the Model** ### **Step 2: Define the Model**
@ -77,7 +79,6 @@ def preprocess(data):
return data['image'], data['label'] return data['image'], data['label']
# get DataSet # get DataSet
dataset_dir = "./mnist_data"
mnist_train = tfds.load(name="mnist", split="train", data_dir=dataset_dir) mnist_train = tfds.load(name="mnist", split="train", data_dir=dataset_dir)
mnist_test = tfds.load(name="mnist", split="test", data_dir=dataset_dir) mnist_test = tfds.load(name="mnist", split="test", data_dir=dataset_dir)

View file

@ -12,7 +12,6 @@
We recommend using [conda](https://docs.conda.io/projects/conda/en/latest/user-guide/install/) to prepare the environment. Please refer to the [install guide](../../UserGuide/python.md) for more details. We recommend using [conda](https://docs.conda.io/projects/conda/en/latest/user-guide/install/) to prepare the environment. Please refer to the [install guide](../../UserGuide/python.md) for more details.
```bash ```bash
conda create -n py37 python=3.7 # "py37" is conda environment name, you can use any name you like. conda create -n py37 python=3.7 # "py37" is conda environment name, you can use any name you like.
conda activate py37 conda activate py37
@ -28,15 +27,17 @@ from bigdl.orca import init_orca_context, stop_orca_context
if cluster_mode == "local": # For local machine if cluster_mode == "local": # For local machine
init_orca_context(cluster_mode="local", cores=4, memory="10g") init_orca_context(cluster_mode="local", cores=4, memory="10g")
dataset_dir = "~/tensorflow_datasets"
elif cluster_mode == "k8s": # For K8s cluster elif cluster_mode == "k8s": # For K8s cluster
init_orca_context(cluster_mode="k8s", num_nodes=2, cores=2, memory="10g", driver_memory="10g", driver_cores=1) init_orca_context(cluster_mode="k8s", num_nodes=2, cores=2, memory="10g", driver_memory="10g", driver_cores=1)
elif cluster_mode == "yarn": # For Hadoop/YARN cluster elif cluster_mode == "yarn": # For Hadoop/YARN cluster
init_orca_context(cluster_mode="yarn", num_nodes=2, cores=2, memory="10g", driver_memory="10g", driver_cores=1) init_orca_context(cluster_mode="yarn", num_nodes=2, cores=2, memory="10g", driver_memory="10g", driver_cores=1)
dataset_dir = "hdfs:///tensorflow_datasets"
``` ```
This is the only place where you need to specify local or distributed mode. View [Orca Context](./../Overview/orca-context.md) for more details. This is the only place where you need to specify local or distributed mode. View [Orca Context](./../Overview/orca-context.md) for more details.
**Note:** You should `export HADOOP_CONF_DIR=/path/to/hadoop/conf/dir` when running on Hadoop YARN cluster. View [Hadoop User Guide](./../../UserGuide/hadoop.md) for more details. **Note:** You should `export HADOOP_CONF_DIR=/path/to/hadoop/conf/dir` when running on Hadoop YARN cluster. View [Hadoop User Guide](./../../UserGuide/hadoop.md) for more details. To use tensorflow_datasets on HDFS, you should correctly set HADOOP_HOME, HADOOP_HDFS_HOME, LD_LIBRARY_PATH, etc. For more details, please refer to TensorFlow documentation [link](https://github.com/tensorflow/docs/blob/r1.11/site/en/deploy/hadoop.md).
### **Step 2: Define the Model** ### **Step 2: Define the Model**
@ -83,7 +84,6 @@ def preprocess(data):
return data['image'], data['label'] return data['image'], data['label']
# get DataSet # get DataSet
dataset_dir = "./mnist_data"
mnist_train = tfds.load(name="mnist", split="train", data_dir=dataset_dir) mnist_train = tfds.load(name="mnist", split="train", data_dir=dataset_dir)
mnist_test = tfds.load(name="mnist", split="test", data_dir=dataset_dir) mnist_test = tfds.load(name="mnist", split="test", data_dir=dataset_dir)