[PPML] Fix BigDL Graphene Azure scripts (#5767)

* update

* update submit scripts
This commit is contained in:
Jiao Wang 2022-09-15 18:47:38 -07:00 committed by GitHub
parent c49b52a95b
commit a1c9182515

View file

@ -298,9 +298,9 @@ export TF_MKL_ALLOC_MAX_BYTES=10737418240 && \
--conf spark.driver.defaultJavaOptions="-Dlog4j.configuration=/ppml/trusted-big-data-ml/work/spark-3.1.2/conf/log4j2.xml" \
--conf spark.executor.defaultJavaOptions="-Dlog4j.configuration=/ppml/trusted-big-data-ml/work/spark-3.1.2/conf/log4j2.xml" \
--conf spark.kubernetes.authenticate.driver.serviceAccountName=spark \
--conf spark.kubernetes.container.image=intelanalytics/bigdl-ppml-trusted-big-data-ml-python-graphene:2.1.1-SNAPSHOT \
--conf spark.kubernetes.driver.podTemplateFile=/ppml/trusted-big-data-ml/spark-driver-template-kv.yaml \
--conf spark.kubernetes.executor.podTemplateFile=/ppml/trusted-big-data-ml/spark-executor-template-kv.yaml \
--conf spark.kubernetes.container.image=intelanalytics/bigdl-ppml-trusted-big-data-ml-python-graphene:2.1.0-SNAPSHOT \
--conf spark.kubernetes.driver.podTemplateFile=/ppml/trusted-big-data-ml/azure/spark-driver-template-az.yaml \
--conf spark.kubernetes.executor.podTemplateFile=/ppml/trusted-big-data-ml/azure/spark-executor-template-az.yaml \
--conf spark.kubernetes.executor.deleteOnTermination=false \
--conf spark.network.timeout=10000000 \
--conf spark.executor.heartbeatInterval=10000000 \
@ -345,7 +345,7 @@ export TF_MKL_ALLOC_MAX_BYTES=10737418240 && \
--conf spark.bigdl.kms.key.data=$DATA_KEY_PATH \
--class $SPARK_JOB_MAIN_CLASS \
--verbose \
local://$SPARK_EXTRA_JAR_PATH \
$SPARK_EXTRA_JAR_PATH \
$ARGS
```
@ -378,7 +378,7 @@ Generate primary key and data key, then save to file system.
The example code of generate primary key and data key is like below:
```
java -cp '/ppml/trusted-big-data-ml/work/bigdl-2.1.0-SNAPSHOT/lib/bigdl-ppml-spark_3.1.2-2.1.0-SNAPSHOT-jar-with-dependencies.jar:/ppml/trusted-big-data-ml/work/spark-3.1.2/conf/:/ppml/trusted-big-data-ml/work/spark-3.1.2/jars/* \
java -cp '/ppml/trusted-big-data-ml/work/bigdl-2.1.0-SNAPSHOT/jars/*:/ppml/trusted-big-data-ml/work/spark-3.1.2/conf/:/ppml/trusted-big-data-ml/work/spark-3.1.2/jars/* \
-Xmx10g \
com.intel.analytics.bigdl.ppml.examples.GenerateKeys \
--kmsType AzureKeyManagementService \
@ -392,7 +392,7 @@ Encrypt data with specified BigDL `AzureKeyManagementService`
The example code of encrypt data is like below:
```
java -cp '/ppml/trusted-big-data-ml/work/bigdl-2.1.0-SNAPSHOT/lib/bigdl-ppml-spark_3.1.2-2.1.0-SNAPSHOT-jar-with-dependencies.jar:/ppml/trusted-big-data-ml/work/spark-3.1.2/conf/:/ppml/trusted-big-data-ml/work/spark-3.1.2/jars/* \
java -cp '/ppml/trusted-big-data-ml/work/bigdl-2.1.0-SNAPSHOT/jars/*:/ppml/trusted-big-data-ml/work/spark-3.1.2/conf/:/ppml/trusted-big-data-ml/work/spark-3.1.2/jars/* \
-Xmx10g \
com.intel.analytics.bigdl.ppml.examples.tpch.EncryptFiles \
--kmsType AzureKeyManagementService \
@ -432,7 +432,7 @@ OUTPUT_DIR=xxx/output
export TF_MKL_ALLOC_MAX_BYTES=10737418240 && \
/opt/jdk8/bin/java \
-cp '/ppml/trusted-big-data-ml/work/bigdl-2.1.0-SNAPSHOT/lib/bigdl-ppml-spark_3.1.2-2.1.0-SNAPSHOT-jar-with-dependencies.jar:/ppml/trusted-big-data-ml/work/spark-3.1.2/conf/:/ppml/trusted-big-data-ml/work/spark-3.1.2/jars/*' \
-cp '/ppml/trusted-big-data-ml/work/bigdl-2.1.0-SNAPSHOT/jars/*:/ppml/trusted-big-data-ml/work/spark-3.1.2/conf/:/ppml/trusted-big-data-ml/work/spark-3.1.2/jars/*' \
-Xmx10g \
-Dbigdl.mklNumThreads=1 \
org.apache.spark.deploy.SparkSubmit \
@ -448,9 +448,9 @@ export TF_MKL_ALLOC_MAX_BYTES=10737418240 && \
--conf spark.driver.defaultJavaOptions="-Dlog4j.configuration=/ppml/trusted-big-data-ml/work/spark-3.1.2/conf/log4j2.xml" \
--conf spark.executor.defaultJavaOptions="-Dlog4j.configuration=/ppml/trusted-big-data-ml/work/spark-3.1.2/conf/log4j2.xml" \
--conf spark.kubernetes.authenticate.driver.serviceAccountName=spark \
--conf spark.kubernetes.container.image=intelanalytics/bigdl-ppml-trusted-big-data-ml-python-graphene:2.1.1-SNAPSHOT \
--conf spark.kubernetes.driver.podTemplateFile=/ppml/trusted-big-data-ml/spark-driver-template-kv.yaml \
--conf spark.kubernetes.executor.podTemplateFile=/ppml/trusted-big-data-ml/spark-executor-template-kv.yaml \
--conf spark.kubernetes.container.image=intelanalytics/bigdl-ppml-trusted-big-data-ml-python-graphene:2.1.0-SNAPSHOT \
--conf spark.kubernetes.driver.podTemplateFile=/ppml/trusted-big-data-ml/azure/spark-driver-template-az.yaml \
--conf spark.kubernetes.executor.podTemplateFile=/ppml/trusted-big-data-ml/azure/spark-executor-template-az.yaml \
--conf spark.kubernetes.executor.deleteOnTermination=false \
--conf spark.network.timeout=10000000 \
--conf spark.executor.heartbeatInterval=10000000 \
@ -495,7 +495,7 @@ export TF_MKL_ALLOC_MAX_BYTES=10737418240 && \
--conf spark.bigdl.kms.key.data=$DATA_KEY_PATH \
--class $SPARK_JOB_MAIN_CLASS \
--verbose \
/ppml/trusted-big-data-ml/work/bigdl-2.1.0-SNAPSHOT/lib/bigdl-ppml-spark_3.1.2-2.1.0-SNAPSHOT-jar-with-dependencies.jar \
/ppml/trusted-big-data-ml/work/bigdl-2.1.0-SNAPSHOT/jars/bigdl-ppml-spark_3.1.2-2.1.0-SNAPSHOT.jar \
$INPUT_DIR $OUTPUT_DIR aes_cbc_pkcs5padding plain_text [QUERY]
```