From 8911c693024889471625500ebb72f9f18948d91a Mon Sep 17 00:00:00 2001 From: Kai Huang Date: Mon, 6 Feb 2023 11:24:07 +0800 Subject: [PATCH] Update k8s tutorial and script (#7356) * debug * update train * update remote dir * remove print * update train * update --- .../source/doc/Orca/Tutorial/k8s.md | 18 +++++++++--------- .../source/doc/Orca/Tutorial/yarn.md | 14 +++++++------- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/docs/readthedocs/source/doc/Orca/Tutorial/k8s.md b/docs/readthedocs/source/doc/Orca/Tutorial/k8s.md index d3246bbc..839c2d66 100644 --- a/docs/readthedocs/source/doc/Orca/Tutorial/k8s.md +++ b/docs/readthedocs/source/doc/Orca/Tutorial/k8s.md @@ -234,7 +234,7 @@ cp /path/to/fashion-mnist/data/fashion/* /bigdl/nfsdata/dataset/FashionMNIST/raw gzip -d /bigdl/nfsdata/dataset/FashionMNIST/raw/* ``` -In the given example, you can specify the argument `--remote_dir` to be the directory on NFS for the Fashion-MNIST dataset. The directory should contain `FashionMNIST/raw/train-images-idx3-ubyte` and `FashionMNIST/raw/t10k-images-idx3`. +In the given example, you can specify the argument `--data_dir` to be the directory on NFS for the Fashion-MNIST dataset. The directory should contain `FashionMNIST/raw/train-images-idx3-ubyte` and `FashionMNIST/raw/t10k-images-idx3`. --- @@ -304,7 +304,7 @@ See [here](#init-orca-context) for the runtime configurations. #### 6.1.1 K8s-Client Run the example with the following command by setting the cluster_mode to "k8s-client": ```bash -python train.py --cluster_mode k8s-client --remote_dir file:///bigdl/nfsdata/dataset +python train.py --cluster_mode k8s-client --data_dir /bigdl/nfsdata/dataset ``` @@ -333,7 +333,7 @@ conda pack -o environment.tar.gz Run the example with the following command by setting the cluster_mode to "k8s-cluster": ```bash -python /bigdl/nfsdata/train.py --cluster_mode k8s-cluster --remote_dir /bigdl/nfsdata/dataset +python /bigdl/nfsdata/train.py --cluster_mode k8s-cluster --data_dir /bigdl/nfsdata/dataset ``` @@ -395,7 +395,7 @@ ${SPARK_HOME}/bin/spark-submit \ --conf spark.executor.extraClassPath=${BIGDL_HOME}/jars/* \ --conf spark.kubernetes.executor.volumes.persistentVolumeClaim.${RUNTIME_PERSISTENT_VOLUME_CLAIM}.options.claimName=${RUNTIME_PERSISTENT_VOLUME_CLAIM} \ --conf spark.kubernetes.executor.volumes.persistentVolumeClaim.${RUNTIME_PERSISTENT_VOLUME_CLAIM}.mount.path=/bigdl/nfsdata \ - train.py --cluster_mode spark-submit --remote_dir /bigdl/nfsdata/dataset + train.py --cluster_mode spark-submit --data_dir /bigdl/nfsdata/dataset ``` In the `spark-submit` script: @@ -447,7 +447,7 @@ ${SPARK_HOME}/bin/spark-submit \ --conf spark.kubernetes.driver.volumes.persistentVolumeClaim.${RUNTIME_PERSISTENT_VOLUME_CLAIM}.mount.path=/bigdl/nfsdata \ --conf spark.kubernetes.executor.volumes.persistentVolumeClaim.${RUNTIME_PERSISTENT_VOLUME_CLAIM}.options.claimName=${RUNTIME_PERSISTENT_VOLUME_CLAIM} \ --conf spark.kubernetes.executor.volumes.persistentVolumeClaim.${RUNTIME_PERSISTENT_VOLUME_CLAIM}.mount.path=/bigdl/nfsdata \ - file:///bigdl/nfsdata/train.py --cluster_mode spark-submit --remote_dir /bigdl/nfsdata/dataset + file:///bigdl/nfsdata/train.py --cluster_mode spark-submit --data_dir /bigdl/nfsdata/dataset ``` In the `spark-submit` script: @@ -544,7 +544,7 @@ spec: --conf spark.executor.extraClassPath=local://${BIGDL_HOME}/jars/* \ local:///bigdl/nfsdata/train.py --cluster_mode spark-submit - --remote_dir file:///bigdl/nfsdata/dataset + --data_dir file:///bigdl/nfsdata/dataset "] securityContext: privileged: true @@ -688,7 +688,7 @@ spec: --conf spark.executor.extraClassPath=local://${BIGDL_HOME}/jars/* \ local:///bigdl/nfsdata/train.py --cluster_mode spark-submit - --remote_dir file:///bigdl/nfsdata/dataset + --data_dir file:///bigdl/nfsdata/dataset "] securityContext: privileged: true @@ -861,7 +861,7 @@ spec: --conf spark.executor.extraClassPath=local://${BIGDL_HOME}/jars/* \ local:///bigdl/nfsdata/train.py --cluster_mode spark-submit - --remote_dir file:///bigdl/nfsdata/dataset + --data_dir file:///bigdl/nfsdata/dataset "] securityContext: privileged: true @@ -999,7 +999,7 @@ spec: --conf spark.executor.extraClassPath=local://${BIGDL_HOME}/jars/* \ local:///bigdl/nfsdata/train.py --cluster_mode spark-submit - --remote_dir file:///bigdl/nfsdata/dataset + --data_dir file:///bigdl/nfsdata/dataset "] securityContext: privileged: true diff --git a/docs/readthedocs/source/doc/Orca/Tutorial/yarn.md b/docs/readthedocs/source/doc/Orca/Tutorial/yarn.md index 050fe511..a5c2d9cc 100644 --- a/docs/readthedocs/source/doc/Orca/Tutorial/yarn.md +++ b/docs/readthedocs/source/doc/Orca/Tutorial/yarn.md @@ -127,7 +127,7 @@ Then upload it to a distributed storage. Sample command to upload data to HDFS i ```bash hdfs dfs -put /path/to/local/data/FashionMNIST hdfs://path/to/remote/data ``` -In the given example, you can specify the argument `--remote_dir` to be the directory on a distributed storage for the Fashion-MNIST dataset. The directory should contain `FashionMNIST/raw/train-images-idx3-ubyte` and `FashionMNIST/raw/t10k-images-idx3`. +In the given example, you can specify the argument `--data_dir` to be the directory on a distributed storage for the Fashion-MNIST dataset. The directory should contain `FashionMNIST/raw/train-images-idx3-ubyte` and `FashionMNIST/raw/t10k-images-idx3`. --- ## 4. Prepare Custom Modules @@ -192,14 +192,14 @@ See [here](#init-orca-context) for the runtime configurations. #### 5.1.1 Yarn Client Run the example with the following command by setting the cluster_mode to "yarn-client": ```bash -python train.py --cluster_mode yarn-client --remote_dir hdfs://path/to/remote/data +python train.py --cluster_mode yarn-client --data_dir hdfs://path/to/remote/data ``` #### 5.1.2 Yarn Cluster Run the example with the following command by setting the cluster_mode to "yarn-cluster": ```bash -python train.py --cluster_mode yarn-cluster --remote_dir hdfs://path/to/remote/data +python train.py --cluster_mode yarn-cluster --data_dir hdfs://path/to/remote/data ``` @@ -254,7 +254,7 @@ bigdl-submit \ --archives /path/to/environment.tar.gz#environment \ --conf spark.pyspark.driver.python=/path/to/python \ --conf spark.pyspark.python=environment/bin/python \ - train.py --cluster_mode bigdl-submit --remote_dir hdfs://path/to/remote/data + train.py --cluster_mode bigdl-submit --data_dir hdfs://path/to/remote/data ``` In the `bigdl-submit` script: * `--master`: the spark master, set it to "yarn". @@ -277,7 +277,7 @@ bigdl-submit \ --archives /path/to/environment.tar.gz#environment \ --conf spark.yarn.appMasterEnv.PYSPARK_PYTHON=environment/bin/python \ --conf spark.executorEnv.PYSPARK_PYTHON=environment/bin/python \ - train.py --cluster_mode bigdl-submit --remote_dir hdfs://path/to/remote/data + train.py --cluster_mode bigdl-submit --data_dir hdfs://path/to/remote/data ``` In the `bigdl-submit` script: * `--master`: the spark master, set it to "yarn". @@ -344,7 +344,7 @@ ${SPARK_HOME}/bin/spark-submit \ --conf spark.pyspark.python=environment/bin/python \ --py-files ${BIGDL_HOME}/python/bigdl-spark_${SPARK_VERSION}-${BIGDL_VERSION}-python-api.zip,model.py \ --jars ${BIGDL_HOME}/jars/bigdl-assembly-spark_${SPARK_VERSION}-${BIGDL_VERSION}-jar-with-dependencies.jar \ - train.py --cluster_mode spark-submit --remote_dir hdfs://path/to/remote/data + train.py --cluster_mode spark-submit --data_dir hdfs://path/to/remote/data ``` In the `spark-submit` script: * `--master`: the spark master, set it to "yarn". @@ -368,7 +368,7 @@ ${SPARK_HOME}/bin/spark-submit \ --conf spark.executorEnv.PYSPARK_PYTHON=environment/bin/python \ --py-files ${BIGDL_HOME}/python/bigdl-spark_${SPARK_VERSION}-${BIGDL_VERSION}-python-api.zip,model.py \ --jars ${BIGDL_HOME}/jars/bigdl-assembly-spark_${SPARK_VERSION}-${BIGDL_VERSION}-jar-with-dependencies.jar \ - train.py --cluster_mode spark-submit --remote_dir hdfs://path/to/remote/data + train.py --cluster_mode spark-submit --data_dir hdfs://path/to/remote/data ``` In the `spark-submit` script: * `--master`: the spark master, set it to "yarn".