update orca data quickstart and ray md (#3498)
This commit is contained in:
parent
238a434950
commit
c75ad40e67
3 changed files with 13 additions and 13 deletions
|
|
@ -13,9 +13,9 @@
|
||||||
We recommend using [conda](https://docs.conda.io/projects/conda/en/latest/user-guide/install/) to prepare the environment. Please refer to the [install guide](../../UserGuide/python.md) for more details.
|
We recommend using [conda](https://docs.conda.io/projects/conda/en/latest/user-guide/install/) to prepare the environment. Please refer to the [install guide](../../UserGuide/python.md) for more details.
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
conda create -n zoo python=3.7 # "zoo" is conda environment name, you can use any name you like.
|
conda create -n bigdl python=3.7 # "bigdl" is conda environment name, you can use any name you like.
|
||||||
conda activate zoo
|
conda activate bigdl
|
||||||
pip install analytics-zoo[ray]
|
pip install bigdl-orca[ray]
|
||||||
```
|
```
|
||||||
|
|
||||||
### **Step 1: Initialize**
|
### **Step 1: Initialize**
|
||||||
|
|
@ -23,7 +23,7 @@ pip install analytics-zoo[ray]
|
||||||
We recommend using `init_orca_context` to initiate and run Analytics Zoo on the underlying cluster. The Ray cluster would be launched automatically by specifying `init_ray_on_spark=True`.
|
We recommend using `init_orca_context` to initiate and run Analytics Zoo on the underlying cluster. The Ray cluster would be launched automatically by specifying `init_ray_on_spark=True`.
|
||||||
|
|
||||||
```python
|
```python
|
||||||
from zoo.orca import init_orca_context
|
from bigdl.orca import init_orca_context
|
||||||
|
|
||||||
if cluster_mode == "local": # For local machine
|
if cluster_mode == "local": # For local machine
|
||||||
sc = init_orca_context(cluster_mode="local", cores=4, memory="10g", init_ray_on_spark=True)
|
sc = init_orca_context(cluster_mode="local", cores=4, memory="10g", init_ray_on_spark=True)
|
||||||
|
|
@ -38,7 +38,7 @@ This is the only place where you need to specify local or distributed mode.
|
||||||
By default, the Ray cluster would be launched using Spark barrier execution mode, you can turn it off via the configurations of `OrcaContext`:
|
By default, the Ray cluster would be launched using Spark barrier execution mode, you can turn it off via the configurations of `OrcaContext`:
|
||||||
|
|
||||||
```python
|
```python
|
||||||
from zoo.orca import OrcaContext
|
from bigdl.orca import OrcaContext
|
||||||
|
|
||||||
OrcaContext.barrier_mode = False
|
OrcaContext.barrier_mode = False
|
||||||
```
|
```
|
||||||
|
|
@ -50,7 +50,7 @@ View [Orca Context](./../../Orca/Overview/orca-context.md) for more details.
|
||||||
You can retrieve the information of the Ray cluster via `OrcaContext`:
|
You can retrieve the information of the Ray cluster via `OrcaContext`:
|
||||||
|
|
||||||
```python
|
```python
|
||||||
from zoo.orca import OrcaContext
|
from bigdl.orca import OrcaContext
|
||||||
|
|
||||||
ray_ctx = OrcaContext.get_ray_context()
|
ray_ctx = OrcaContext.get_ray_context()
|
||||||
address_info = ray_ctx.address_info # The dictionary information of the ray cluster, including node_ip_address, object_store_address, webui_url, etc.
|
address_info = ray_ctx.address_info # The dictionary information of the ray cluster, including node_ip_address, object_store_address, webui_url, etc.
|
||||||
|
|
@ -125,7 +125,7 @@ print(ray.get(ps.get_parameters.remote()))
|
||||||
**Note:** You should call `stop_orca_context()` when your program finishes:
|
**Note:** You should call `stop_orca_context()` when your program finishes:
|
||||||
|
|
||||||
```python
|
```python
|
||||||
from zoo.orca import stop_orca_context
|
from bigdl.orca import stop_orca_context
|
||||||
|
|
||||||
stop_orca_context()
|
stop_orca_context()
|
||||||
```
|
```
|
||||||
|
|
|
||||||
|
|
@ -2,7 +2,7 @@
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
[Run in Google Colab](https://colab.research.google.com/github/intel-analytics/analytics-zoo/blob/master/docs/docs/colab-notebook/orca/quickstart/ncf_dataframe.ipynb) [View source on GitHub](https://github.com/intel-analytics/analytics-zoo/blob/master/docs/docs/colab-notebook/orca/quickstart/ncf_dataframe.ipynb)
|
[Run in Google Colab](https://colab.research.google.com/github/intel-analytics/BigDL/blob/branch-2.0/python/orca/colab-notebook/quickstart/ncf_dataframe.ipynb) [View source on GitHub](https://github.com/intel-analytics/BigDL/blob/branch-2.0/python/orca/colab-notebook/quickstart/ncf_dataframe.ipynb)
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
|
|
@ -15,7 +15,7 @@ The dataset used in this guide is [movielens-1M](https://grouplens.org/datasets/
|
||||||
First, read the input data into Spark Dataframes.
|
First, read the input data into Spark Dataframes.
|
||||||
|
|
||||||
```python
|
```python
|
||||||
from zoo.orca import OrcaContext
|
from bigdl.orca import OrcaContext
|
||||||
|
|
||||||
spark = OrcaContext.get_spark_session()
|
spark = OrcaContext.get_spark_session()
|
||||||
# read csv with specifying column names
|
# read csv with specifying column names
|
||||||
|
|
@ -82,7 +82,7 @@ def model_creator(config):
|
||||||
Finally, run distributed model training/inference on the Spark Dataframes directly.
|
Finally, run distributed model training/inference on the Spark Dataframes directly.
|
||||||
|
|
||||||
```python
|
```python
|
||||||
from zoo.orca.learn.tf2 import Estimator
|
from bigdl.orca.learn.tf2 import Estimator
|
||||||
|
|
||||||
# create an Estimator
|
# create an Estimator
|
||||||
est = Estimator.from_keras(model_creator=model_creator) # the model accept two inputs and one label
|
est = Estimator.from_keras(model_creator=model_creator) # the model accept two inputs and one label
|
||||||
|
|
|
||||||
|
|
@ -2,7 +2,7 @@
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
[Run in Google Colab](https://colab.research.google.com/github/intel-analytics/analytics-zoo/blob/master/docs/docs/colab-notebook/orca/quickstart/ncf_xshards_pandas.ipynb) [View source on GitHub](https://github.com/intel-analytics/analytics-zoo/blob/master/docs/docs/colab-notebook/orca/quickstart/ncf_xshards_pandas.ipynb)
|
[Run in Google Colab](https://colab.research.google.com/github/intel-analytics/BigDL/blob/branch-2.0/python/orca/colab-notebook/quickstart/ncf_xshards_pandas.ipynb) [View source on GitHub](https://github.com/intel-analytics/BigDL/blob/branch-2.0/python/orca/colab-notebook/quickstart/ncf_xshards_pandas.ipynb)
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
|
|
@ -13,7 +13,7 @@
|
||||||
First, read CVS, JSON or Parquet files into an `XShards` of Pandas Dataframe (i.e., a distributed and sharded dataset where each partition contained a Pandas Dataframe), as shown below:
|
First, read CVS, JSON or Parquet files into an `XShards` of Pandas Dataframe (i.e., a distributed and sharded dataset where each partition contained a Pandas Dataframe), as shown below:
|
||||||
|
|
||||||
```python
|
```python
|
||||||
from zoo.orca.data.pandas import read_csv
|
from bigdl.orca.data.pandas import read_csv
|
||||||
full_data = read_csv(new_rating_files, sep=':', header=None,
|
full_data = read_csv(new_rating_files, sep=':', header=None,
|
||||||
names=['user', 'item', 'label'], usecols=[0, 1, 2],
|
names=['user', 'item', 'label'], usecols=[0, 1, 2],
|
||||||
dtype={0: np.int32, 1: np.int32, 2: np.int32})
|
dtype={0: np.int32, 1: np.int32, 2: np.int32})
|
||||||
|
|
@ -99,7 +99,7 @@ model = NCF(embedding_size, max_user_id, max_item_id)
|
||||||
Finally, directly run distributed model training/inference on the XShards of Pandas DataFrames.
|
Finally, directly run distributed model training/inference on the XShards of Pandas DataFrames.
|
||||||
|
|
||||||
```python
|
```python
|
||||||
from zoo.orca.learn.tf.estimator import Estimator
|
from bigdl.orca.learn.tf.estimator import Estimator
|
||||||
|
|
||||||
# create an Estimator.
|
# create an Estimator.
|
||||||
estimator = Estimator.from_graph(
|
estimator = Estimator.from_graph(
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue