From c75ad40e67bc9346bcd29796aaaea5d6282a5a64 Mon Sep 17 00:00:00 2001 From: Le-Zheng <30695225+Le-Zheng@users.noreply.github.com> Date: Tue, 16 Nov 2021 17:41:48 +0800 Subject: [PATCH] update orca data quickstart and ray md (#3498) --- .../source/doc/Ray/QuickStart/ray-quickstart.md | 14 +++++++------- .../source/doc/UseCase/spark-dataframe.md | 6 +++--- .../source/doc/UseCase/xshards-pandas.md | 6 +++--- 3 files changed, 13 insertions(+), 13 deletions(-) diff --git a/docs/readthedocs/source/doc/Ray/QuickStart/ray-quickstart.md b/docs/readthedocs/source/doc/Ray/QuickStart/ray-quickstart.md index e2d06d22..a9dcf607 100644 --- a/docs/readthedocs/source/doc/Ray/QuickStart/ray-quickstart.md +++ b/docs/readthedocs/source/doc/Ray/QuickStart/ray-quickstart.md @@ -13,9 +13,9 @@ We recommend using [conda](https://docs.conda.io/projects/conda/en/latest/user-guide/install/) to prepare the environment. Please refer to the [install guide](../../UserGuide/python.md) for more details. ```bash -conda create -n zoo python=3.7 # "zoo" is conda environment name, you can use any name you like. -conda activate zoo -pip install analytics-zoo[ray] +conda create -n bigdl python=3.7 # "bigdl" is conda environment name, you can use any name you like. +conda activate bigdl +pip install bigdl-orca[ray] ``` ### **Step 1: Initialize** @@ -23,7 +23,7 @@ pip install analytics-zoo[ray] We recommend using `init_orca_context` to initiate and run Analytics Zoo on the underlying cluster. The Ray cluster would be launched automatically by specifying `init_ray_on_spark=True`. ```python -from zoo.orca import init_orca_context +from bigdl.orca import init_orca_context if cluster_mode == "local": # For local machine sc = init_orca_context(cluster_mode="local", cores=4, memory="10g", init_ray_on_spark=True) @@ -38,7 +38,7 @@ This is the only place where you need to specify local or distributed mode. By default, the Ray cluster would be launched using Spark barrier execution mode, you can turn it off via the configurations of `OrcaContext`: ```python -from zoo.orca import OrcaContext +from bigdl.orca import OrcaContext OrcaContext.barrier_mode = False ``` @@ -50,7 +50,7 @@ View [Orca Context](./../../Orca/Overview/orca-context.md) for more details. You can retrieve the information of the Ray cluster via `OrcaContext`: ```python -from zoo.orca import OrcaContext +from bigdl.orca import OrcaContext ray_ctx = OrcaContext.get_ray_context() address_info = ray_ctx.address_info # The dictionary information of the ray cluster, including node_ip_address, object_store_address, webui_url, etc. @@ -125,7 +125,7 @@ print(ray.get(ps.get_parameters.remote())) **Note:** You should call `stop_orca_context()` when your program finishes: ```python -from zoo.orca import stop_orca_context +from bigdl.orca import stop_orca_context stop_orca_context() ``` diff --git a/docs/readthedocs/source/doc/UseCase/spark-dataframe.md b/docs/readthedocs/source/doc/UseCase/spark-dataframe.md index 615cbda3..ad5cdee1 100644 --- a/docs/readthedocs/source/doc/UseCase/spark-dataframe.md +++ b/docs/readthedocs/source/doc/UseCase/spark-dataframe.md @@ -2,7 +2,7 @@ --- -![](../../../image/colab_logo_32px.png)[Run in Google Colab](https://colab.research.google.com/github/intel-analytics/analytics-zoo/blob/master/docs/docs/colab-notebook/orca/quickstart/ncf_dataframe.ipynb)  ![](../../../image/GitHub-Mark-32px.png)[View source on GitHub](https://github.com/intel-analytics/analytics-zoo/blob/master/docs/docs/colab-notebook/orca/quickstart/ncf_dataframe.ipynb) +![](../../../image/colab_logo_32px.png)[Run in Google Colab](https://colab.research.google.com/github/intel-analytics/BigDL/blob/branch-2.0/python/orca/colab-notebook/quickstart/ncf_dataframe.ipynb)  ![](../../../image/GitHub-Mark-32px.png)[View source on GitHub](https://github.com/intel-analytics/BigDL/blob/branch-2.0/python/orca/colab-notebook/quickstart/ncf_dataframe.ipynb) --- @@ -15,7 +15,7 @@ The dataset used in this guide is [movielens-1M](https://grouplens.org/datasets/ First, read the input data into Spark Dataframes. ```python -from zoo.orca import OrcaContext +from bigdl.orca import OrcaContext spark = OrcaContext.get_spark_session() # read csv with specifying column names @@ -82,7 +82,7 @@ def model_creator(config): Finally, run distributed model training/inference on the Spark Dataframes directly. ```python -from zoo.orca.learn.tf2 import Estimator +from bigdl.orca.learn.tf2 import Estimator # create an Estimator est = Estimator.from_keras(model_creator=model_creator) # the model accept two inputs and one label diff --git a/docs/readthedocs/source/doc/UseCase/xshards-pandas.md b/docs/readthedocs/source/doc/UseCase/xshards-pandas.md index 9f37480f..25256200 100644 --- a/docs/readthedocs/source/doc/UseCase/xshards-pandas.md +++ b/docs/readthedocs/source/doc/UseCase/xshards-pandas.md @@ -2,7 +2,7 @@ --- -![](../../../image/colab_logo_32px.png)[Run in Google Colab](https://colab.research.google.com/github/intel-analytics/analytics-zoo/blob/master/docs/docs/colab-notebook/orca/quickstart/ncf_xshards_pandas.ipynb)  ![](../../../image/GitHub-Mark-32px.png)[View source on GitHub](https://github.com/intel-analytics/analytics-zoo/blob/master/docs/docs/colab-notebook/orca/quickstart/ncf_xshards_pandas.ipynb) +![](../../../image/colab_logo_32px.png)[Run in Google Colab](https://colab.research.google.com/github/intel-analytics/BigDL/blob/branch-2.0/python/orca/colab-notebook/quickstart/ncf_xshards_pandas.ipynb)  ![](../../../image/GitHub-Mark-32px.png)[View source on GitHub](https://github.com/intel-analytics/BigDL/blob/branch-2.0/python/orca/colab-notebook/quickstart/ncf_xshards_pandas.ipynb) --- @@ -13,7 +13,7 @@ First, read CVS, JSON or Parquet files into an `XShards` of Pandas Dataframe (i.e., a distributed and sharded dataset where each partition contained a Pandas Dataframe), as shown below: ```python -from zoo.orca.data.pandas import read_csv +from bigdl.orca.data.pandas import read_csv full_data = read_csv(new_rating_files, sep=':', header=None, names=['user', 'item', 'label'], usecols=[0, 1, 2], dtype={0: np.int32, 1: np.int32, 2: np.int32}) @@ -99,7 +99,7 @@ model = NCF(embedding_size, max_user_id, max_item_id) Finally, directly run distributed model training/inference on the XShards of Pandas DataFrames. ```python -from zoo.orca.learn.tf.estimator import Estimator +from bigdl.orca.learn.tf.estimator import Estimator # create an Estimator. estimator = Estimator.from_graph(