add docs
19
docs/readthedocs/Makefile
Normal file
|
|
@ -0,0 +1,19 @@
|
||||||
|
# Minimal makefile for Sphinx documentation
|
||||||
|
#
|
||||||
|
|
||||||
|
# You can set these variables from the command line.
|
||||||
|
SPHINXOPTS =
|
||||||
|
SPHINXBUILD = sphinx-build
|
||||||
|
SOURCEDIR = source
|
||||||
|
BUILDDIR = _build
|
||||||
|
|
||||||
|
# Put it first so that "make" without argument is like "make help".
|
||||||
|
help:
|
||||||
|
@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
|
||||||
|
|
||||||
|
.PHONY: help Makefile
|
||||||
|
|
||||||
|
# Catch-all target: route all unknown targets to Sphinx using the new
|
||||||
|
# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
|
||||||
|
%: Makefile
|
||||||
|
@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
|
||||||
15
docs/readthedocs/README.md
Normal file
|
|
@ -0,0 +1,15 @@
|
||||||
|
# Analytics-zoo-doc Documentation
|
||||||
|
|
||||||
|
To compile the documentation, run the following commands from this directory.
|
||||||
|
|
||||||
|
```
|
||||||
|
pip install -r requirements-doc.txt
|
||||||
|
pip install -U -r requirements-rtd.txt # important for reproducing the deployment environment
|
||||||
|
make html
|
||||||
|
open _build/html/index.html
|
||||||
|
```
|
||||||
|
|
||||||
|
To test if there are any build errors with the documentation, do the following.
|
||||||
|
|
||||||
|
```
|
||||||
|
sphinx-build -b html -d _build/doctrees source _build/html
|
||||||
1
docs/readthedocs/_build/.keep
Normal file
|
|
@ -0,0 +1 @@
|
||||||
|
|
||||||
1
docs/readthedocs/image/.keep
Normal file
|
|
@ -0,0 +1 @@
|
||||||
|
|
||||||
BIN
docs/readthedocs/image/GitHub-Mark-32px.png
Normal file
|
After Width: | Height: | Size: 1.7 KiB |
BIN
docs/readthedocs/image/colab_logo_32px.png
Normal file
|
After Width: | Height: | Size: 1.6 KiB |
35
docs/readthedocs/make.bat
Normal file
|
|
@ -0,0 +1,35 @@
|
||||||
|
@ECHO OFF
|
||||||
|
|
||||||
|
pushd %~dp0
|
||||||
|
|
||||||
|
REM Command file for Sphinx documentation
|
||||||
|
|
||||||
|
if "%SPHINXBUILD%" == "" (
|
||||||
|
set SPHINXBUILD=sphinx-build
|
||||||
|
)
|
||||||
|
set SOURCEDIR=source
|
||||||
|
set BUILDDIR=build
|
||||||
|
|
||||||
|
if "%1" == "" goto help
|
||||||
|
|
||||||
|
%SPHINXBUILD% >NUL 2>NUL
|
||||||
|
if errorlevel 9009 (
|
||||||
|
echo.
|
||||||
|
echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
|
||||||
|
echo.installed, then set the SPHINXBUILD environment variable to point
|
||||||
|
echo.to the full path of the 'sphinx-build' executable. Alternatively you
|
||||||
|
echo.may add the Sphinx directory to PATH.
|
||||||
|
echo.
|
||||||
|
echo.If you don't have Sphinx installed, grab it from
|
||||||
|
echo.http://sphinx-doc.org/
|
||||||
|
exit /b 1
|
||||||
|
)
|
||||||
|
|
||||||
|
%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
|
||||||
|
goto end
|
||||||
|
|
||||||
|
:help
|
||||||
|
%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
|
||||||
|
|
||||||
|
:end
|
||||||
|
popd
|
||||||
29
docs/readthedocs/requirements-doc.txt
Normal file
|
|
@ -0,0 +1,29 @@
|
||||||
|
sphinx-copybutton
|
||||||
|
sphinx-version-warning
|
||||||
|
sphinx-click
|
||||||
|
sphinx-jsonschema
|
||||||
|
sphinxemoji
|
||||||
|
click
|
||||||
|
tensorflow==1.15.2
|
||||||
|
bigdl==0.12.0
|
||||||
|
ray[tune]==1.2.0
|
||||||
|
ray==1.2.0
|
||||||
|
torch==1.7.1
|
||||||
|
Pygments==2.3.1
|
||||||
|
setuptools==41.0.1
|
||||||
|
docutils==0.14
|
||||||
|
mock==1.0.1
|
||||||
|
pillow==5.4.1
|
||||||
|
sphinx==4.0.2
|
||||||
|
alabaster>=0.7,<0.8,!=0.7.5
|
||||||
|
commonmark==0.8.1
|
||||||
|
recommonmark==0.5.0
|
||||||
|
readthedocs-sphinx-ext<2.2
|
||||||
|
sphinx_rtd_theme==0.5.2
|
||||||
|
scikit-learn==0.22.2.post1
|
||||||
|
tsfresh==0.18.0
|
||||||
|
pystan==2.19.1.1
|
||||||
|
prophet
|
||||||
|
pmdarima
|
||||||
|
sphinx_markdown_tables
|
||||||
|
numpy==1.21.2
|
||||||
12
docs/readthedocs/requirements-rtd.txt
Normal file
|
|
@ -0,0 +1,12 @@
|
||||||
|
Pygments==2.3.1
|
||||||
|
setuptools==41.0.1
|
||||||
|
docutils==0.14
|
||||||
|
mock==1.0.1
|
||||||
|
pillow==5.4.1
|
||||||
|
alabaster>=0.7,<0.8,!=0.7.5
|
||||||
|
commonmark==0.8.1
|
||||||
|
recommonmark==0.5.0
|
||||||
|
readthedocs-sphinx-ext<1.1
|
||||||
|
sphinx-book-theme
|
||||||
|
sphinx_rtd_theme
|
||||||
|
sphinx_markdown_tables
|
||||||
65
docs/readthedocs/source/_static/css/custom.css
Normal file
|
|
@ -0,0 +1,65 @@
|
||||||
|
/*Extends the docstring signature box.*/
|
||||||
|
.rst-content dl:not(.docutils) dt {
|
||||||
|
display: block;
|
||||||
|
padding: 10px;
|
||||||
|
word-wrap: break-word;
|
||||||
|
padding-right: 100px;
|
||||||
|
}
|
||||||
|
/*Lists in an admonition note do not have awkward whitespace below.*/
|
||||||
|
.rst-content .admonition-note .section ul {
|
||||||
|
margin-bottom: 0px;
|
||||||
|
}
|
||||||
|
/*Properties become blue (classmethod, staticmethod, property)*/
|
||||||
|
.rst-content dl dt em.property {
|
||||||
|
color: #2980b9;
|
||||||
|
text-transform: uppercase;
|
||||||
|
}
|
||||||
|
|
||||||
|
.rst-content .section ol p,
|
||||||
|
.rst-content .section ul p {
|
||||||
|
margin-bottom: 0px;
|
||||||
|
}
|
||||||
|
|
||||||
|
div.sphx-glr-bigcontainer {
|
||||||
|
display: inline-block;
|
||||||
|
width: 100%;
|
||||||
|
}
|
||||||
|
|
||||||
|
td.tune-colab,
|
||||||
|
th.tune-colab {
|
||||||
|
border: 1px solid #dddddd;
|
||||||
|
text-align: left;
|
||||||
|
padding: 8px;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Adjustment to Sphinx Book Theme */
|
||||||
|
.table td {
|
||||||
|
/* Remove row spacing */
|
||||||
|
padding: 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
table {
|
||||||
|
/* Force full width for all table */
|
||||||
|
width: 136% !important;
|
||||||
|
}
|
||||||
|
|
||||||
|
img.inline-figure {
|
||||||
|
/* Override the display: block for img */
|
||||||
|
display: inherit !important;
|
||||||
|
}
|
||||||
|
|
||||||
|
#version-warning-banner {
|
||||||
|
/* Make version warning clickable */
|
||||||
|
z-index: 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
span.rst-current-version > span.fa.fa-book {
|
||||||
|
/* Move the book icon away from the top right
|
||||||
|
* corner of the version flyout menu */
|
||||||
|
margin: 10px 0px 0px 5px;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Adjustment to Version block */
|
||||||
|
.rst-versions {
|
||||||
|
z-index: 1200 !important;
|
||||||
|
}
|
||||||
51
docs/readthedocs/source/analytics_zoo_pytext.py
Normal file
|
|
@ -0,0 +1,51 @@
|
||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
#
|
||||||
|
# Copyright 2018 Analytics Zoo Authors.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
#
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
|
||||||
|
def _process_docstring(app, what, name, obj, options, lines):
|
||||||
|
liter_re = re.compile(r'\s*```\s*$')
|
||||||
|
|
||||||
|
liter_flag = False
|
||||||
|
|
||||||
|
offset = 0
|
||||||
|
for j in range(len(lines)):
|
||||||
|
i = j+offset
|
||||||
|
line = lines[i]
|
||||||
|
# first literal block line
|
||||||
|
if not liter_flag and liter_re.match(line):
|
||||||
|
liter_flag = True
|
||||||
|
lines.insert(i+1, '')
|
||||||
|
offset += 1
|
||||||
|
lines[i] = '::'
|
||||||
|
# last literal block line
|
||||||
|
elif liter_flag and liter_re.match(line):
|
||||||
|
liter_flag = False
|
||||||
|
lines[i] = ''
|
||||||
|
# regular line within literal block
|
||||||
|
elif liter_flag:
|
||||||
|
line = ' ' + line
|
||||||
|
lines[i] = line
|
||||||
|
# regualr line
|
||||||
|
else:
|
||||||
|
lines[i] = line.lstrip()
|
||||||
|
|
||||||
|
|
||||||
|
def setup(app):
|
||||||
|
app.connect("autodoc-process-docstring", _process_docstring)
|
||||||
226
docs/readthedocs/source/conf.py
Normal file
|
|
@ -0,0 +1,226 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
#
|
||||||
|
# Configuration file for the Sphinx documentation builder.
|
||||||
|
#
|
||||||
|
# This file does only contain a selection of the most common options. For a
|
||||||
|
# full list see the documentation:
|
||||||
|
# http://www.sphinx-doc.org/en/master/config
|
||||||
|
|
||||||
|
# -- Path setup --------------------------------------------------------------
|
||||||
|
|
||||||
|
# If extensions (or modules to document with autodoc) are in another directory,
|
||||||
|
# add these directories to sys.path here. If the directory is relative to the
|
||||||
|
# documentation root, use os.path.abspath to make it absolute, like shown here.
|
||||||
|
#
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import glob
|
||||||
|
import shutil
|
||||||
|
import urllib
|
||||||
|
|
||||||
|
# documentation root, use os.path.abspath to make it absolute, like shown here.
|
||||||
|
#sys.path.insert(0, '.')
|
||||||
|
sys.path.insert(0, os.path.abspath('.'))
|
||||||
|
sys.path.insert(0, os.path.abspath("../../../pyzoo/"))
|
||||||
|
|
||||||
|
|
||||||
|
# -- Project information -----------------------------------------------------
|
||||||
|
import sphinx_rtd_theme
|
||||||
|
html_theme = "sphinx_rtd_theme"
|
||||||
|
html_theme_path = [sphinx_rtd_theme.get_html_theme_path()]
|
||||||
|
#html_theme = "sphinx_book_theme"
|
||||||
|
html_theme_options = {
|
||||||
|
"repository_url": "https://github.com/intel-analytics/analytics-zoo",
|
||||||
|
"use_repository_button": True,
|
||||||
|
"use_issues_button": True,
|
||||||
|
"use_edit_page_button": True,
|
||||||
|
"path_to_docs": "doc/source",
|
||||||
|
"home_page_in_toc": True,
|
||||||
|
}
|
||||||
|
|
||||||
|
# The suffix of source filenames.
|
||||||
|
from recommonmark.parser import CommonMarkParser
|
||||||
|
source_suffix = {'.rst': 'restructuredtext',
|
||||||
|
'.txt': 'markdown',
|
||||||
|
'.md': 'markdown',}
|
||||||
|
|
||||||
|
master_doc = 'index'
|
||||||
|
|
||||||
|
project = 'Analytics Zoo'
|
||||||
|
copyright = '2020, Analytics Zoo Authors'
|
||||||
|
author = 'Analytics Zoo Authors'
|
||||||
|
|
||||||
|
# The short X.Y version
|
||||||
|
#version = ''
|
||||||
|
# The full version, including alpha/beta/rc tags
|
||||||
|
#from zoo import __version__ as version
|
||||||
|
#release = version
|
||||||
|
|
||||||
|
|
||||||
|
# -- General configuration ---------------------------------------------------
|
||||||
|
|
||||||
|
# If your documentation needs a minimal Sphinx version, state it here.
|
||||||
|
#
|
||||||
|
# needs_sphinx = '1.0'
|
||||||
|
|
||||||
|
# Add any Sphinx extension module names here, as strings. They can be
|
||||||
|
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
|
||||||
|
# ones.
|
||||||
|
#extensions = [
|
||||||
|
# 'sphinx.ext.autodoc',
|
||||||
|
#]
|
||||||
|
extensions = [
|
||||||
|
'sphinx.ext.autodoc',
|
||||||
|
'sphinx.ext.viewcode',
|
||||||
|
'sphinx_click.ext',
|
||||||
|
'sphinx-jsonschema',
|
||||||
|
'sphinx.ext.napoleon',
|
||||||
|
'sphinxemoji.sphinxemoji',
|
||||||
|
'sphinx_copybutton',
|
||||||
|
'sphinx.ext.mathjax',
|
||||||
|
'recommonmark',
|
||||||
|
'sphinx_markdown_tables'
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
# Add any paths that contain templates here, relative to this directory.
|
||||||
|
templates_path = ['_templates']
|
||||||
|
|
||||||
|
|
||||||
|
# The language for content autogenerated by Sphinx. Refer to documentation
|
||||||
|
# for a list of supported languages.
|
||||||
|
#
|
||||||
|
# This is also used if you do content translation via gettext catalogs.
|
||||||
|
# Usually you set "language" from the command line for these cases.
|
||||||
|
language = None
|
||||||
|
|
||||||
|
# List of patterns, relative to source directory, that match files and
|
||||||
|
# directories to ignore when looking for source files.
|
||||||
|
# This pattern also affects html_static_path and html_extra_path.
|
||||||
|
|
||||||
|
#exclude_patterns = []
|
||||||
|
|
||||||
|
# The name of the Pygments (syntax highlighting) style to use.
|
||||||
|
pygments_style = 'sphinx'
|
||||||
|
|
||||||
|
exclude_patterns = ['_build']
|
||||||
|
#todo_include_todos = False
|
||||||
|
|
||||||
|
|
||||||
|
# -- Options for HTML output -------------------------------------------------
|
||||||
|
|
||||||
|
# The theme to use for HTML and HTML Help pages. See the documentation for
|
||||||
|
# a list of builtin themes.
|
||||||
|
#
|
||||||
|
#html_theme = 'alabaster'
|
||||||
|
|
||||||
|
# Theme options are theme-specific and customize the look and feel of a theme
|
||||||
|
# further. For a list of options available for each theme, see the
|
||||||
|
# documentation.
|
||||||
|
#
|
||||||
|
# html_theme_options = {}
|
||||||
|
|
||||||
|
# Add any paths that contain custom static files (such as style sheets) here,
|
||||||
|
# relative to this directory. They are copied after the builtin static files,
|
||||||
|
# so a file named "default.css" will overwrite the builtin "default.css".
|
||||||
|
html_static_path = ['_static']
|
||||||
|
|
||||||
|
# Custom sidebar templates, must be a dictionary that maps document namesan
|
||||||
|
# to template names.
|
||||||
|
#
|
||||||
|
# The default sidebars (for documents that don't match any pattern) are
|
||||||
|
# defined by theme itself. Builtin themes are using these templates by
|
||||||
|
# default: ``['localtoc.html', 'relations.html', 'sourcelink.html',
|
||||||
|
# 'searchbox.html']``.
|
||||||
|
#
|
||||||
|
# html_sidebars = {}
|
||||||
|
|
||||||
|
|
||||||
|
# -- Options for HTMLHelp output ---------------------------------------------
|
||||||
|
|
||||||
|
# Output file base name for HTML help builder.
|
||||||
|
htmlhelp_basename = 'Analytics Zoo Documentation'
|
||||||
|
|
||||||
|
|
||||||
|
# -- Options for LaTeX output ------------------------------------------------
|
||||||
|
|
||||||
|
latex_elements = {
|
||||||
|
# The paper size ('letterpaper' or 'a4paper').
|
||||||
|
#
|
||||||
|
# 'papersize': 'letterpaper',
|
||||||
|
|
||||||
|
# The font size ('10pt', '11pt' or '12pt').
|
||||||
|
#
|
||||||
|
# 'pointsize': '10pt',
|
||||||
|
|
||||||
|
# Additional stuff for the LaTeX preamble.
|
||||||
|
#
|
||||||
|
# 'preamble': '',
|
||||||
|
|
||||||
|
# Latex figure (float) alignment
|
||||||
|
#
|
||||||
|
# 'figure_align': 'htbp',
|
||||||
|
}
|
||||||
|
|
||||||
|
# Grouping the document tree into LaTeX files. List of tuples
|
||||||
|
# (source start file, target name, title,
|
||||||
|
# author, documentclass [howto, manual, or own class]).
|
||||||
|
latex_documents = [
|
||||||
|
(master_doc, 'analytics-zoo.tex', 'analytics-zoo Documentation',
|
||||||
|
'analytice-zoo', 'manual'),
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
# -- Options for manual page output ------------------------------------------
|
||||||
|
|
||||||
|
# One entry per manual page. List of tuples
|
||||||
|
# (source start file, name, description, authors, manual section).
|
||||||
|
man_pages = [
|
||||||
|
(master_doc, 'analytics-zoo', 'analytics-zoo Documentation',
|
||||||
|
[author], 1)
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
# -- Options for Texinfo output ----------------------------------------------
|
||||||
|
|
||||||
|
# Grouping the document tree into Texinfo files. List of tuples
|
||||||
|
# (source start file, target name, title, author,
|
||||||
|
# dir menu entry, description, category)
|
||||||
|
texinfo_documents = [
|
||||||
|
(master_doc, 'analytics-zoo', 'analytics-zoo Documentation',
|
||||||
|
author, 'analytics-zoo', 'One line description of project.',
|
||||||
|
'Miscellaneous'),
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
# -- Options for Epub output -------------------------------------------------
|
||||||
|
|
||||||
|
# Bibliographic Dublin Core info.
|
||||||
|
epub_title = project
|
||||||
|
|
||||||
|
# The unique identifier of the text. This can be a ISBN number
|
||||||
|
# or the project homepage.
|
||||||
|
#
|
||||||
|
# epub_identifier = ''
|
||||||
|
|
||||||
|
# A unique identification for the text.
|
||||||
|
#
|
||||||
|
# epub_uid = ''
|
||||||
|
|
||||||
|
# A list of files that should not be packed into the epub file.
|
||||||
|
epub_exclude_files = ['search.html']
|
||||||
|
|
||||||
|
autoclass_content = 'both'
|
||||||
|
autodoc_member_order = 'bysource'
|
||||||
|
|
||||||
|
# app setup hook for AutoStructify
|
||||||
|
from recommonmark.transform import AutoStructify
|
||||||
|
def setup(app):
|
||||||
|
app.add_config_value('recommonmark_config', {
|
||||||
|
'auto_toc_tree_section': 'Contents',
|
||||||
|
'enable_math': False,
|
||||||
|
'enable_inline_math': False,
|
||||||
|
'enable_eval_rst': True,
|
||||||
|
'enable_auto_doc_ref': True,
|
||||||
|
}, True)
|
||||||
|
app.add_transform(AutoStructify)
|
||||||
70
docs/readthedocs/source/doc/Application/powered-by.md
Normal file
|
|
@ -0,0 +1,70 @@
|
||||||
|
# Powered By
|
||||||
|
---
|
||||||
|
|
||||||
|
* __Alibaba__
|
||||||
|
<br>[Deploy Analytics Zoo in Aliyun EMR](https://partners-intl.aliyun.com/help/doc-detail/93155.htm)
|
||||||
|
<br>[Better Together: Privacy-Preserving Machine Learning](https://www.intel.com/content/www/us/en/artificial-intelligence/posts/alibaba-privacy-preserving-machine-learning.html)
|
||||||
|
* __Baosight__
|
||||||
|
<br>[LSTM-Based Time Series Anomaly Detection Using Analytics Zoo for Apache Spark and BigDL at Baosight](https://software.intel.com/en-us/articles/lstm-based-time-series-anomaly-detection-using-analytics-zoo-for-apache-spark-and-bigdl)
|
||||||
|
* __Burger King__
|
||||||
|
<br>[Context-Aware Fast Food Recommendation at Burger King with RayOnSpark](https://medium.com/riselab/context-aware-fast-food-recommendation-at-burger-king-with-rayonspark-2e7a6009dd2d)
|
||||||
|
<br>[How Intel and Burger King built an order recommendation system that preserves customer privacy](https://venturebeat.com/2021/04/06/how-intel-and-burger-king-built-an-order-recommendation-system-that-preserves-customer-privacy/)
|
||||||
|
* __CERN__
|
||||||
|
<br>[Deep Learning Pipelines for High Energy Physics using Apache Spark with Distributed Keras on Analytics Zoo](https://databricks.com/session_eu19/deep-learning-pipelines-for-high-energy-physics-using-apache-spark-with-distributed-keras-on-analytics-zoo)
|
||||||
|
<br>[Topology classification at CERN's Large Hadron Collider using Analytics Zoo](https://db-blog.web.cern.ch/blog/luca-canali/machine-learning-pipelines-high-energy-physics-using-apache-spark-bigdl)
|
||||||
|
<br>[Deep Learning on Apache Spark at CERN's Large Hadron Collider with Intel Technologies](https://databricks.com/session/deep-learning-on-apache-spark-at-cerns-large-hadron-collider-with-intel-technologies)
|
||||||
|
* __China Telecom__
|
||||||
|
<br>[Face Recognition Application and Practice Based on Intel Analytics Zoo: Part 1](https://mp.weixin.qq.com/s/FEiXoTDi-yy04PJ2Mlfl4A) (in Chinese)
|
||||||
|
<br>[Face Recognition Application and Practice Based on Intel Analytics Zoo: Part 2](https://mp.weixin.qq.com/s/VIyWRORTAVAAsC4v6Fi0xw) (in Chinese)
|
||||||
|
* __Cray__
|
||||||
|
<br>[A deep learning approach for precipitation nowcasting with RNN using Analytics Zoo in Cray](https://conferences.oreilly.com/strata/strata-ny-2018/public/schedule/detail/69413)
|
||||||
|
* __Dell EMC__
|
||||||
|
<br>[Build AI on PowerEdge with Domino Data Labs, Apache Spark and Analytics Zoo](https://community.emc.com/community/products/rs_for_ai/blog/2019/09/19/build-ai-on-poweredge-with-domino-data-labs-and-apache-spark)
|
||||||
|
<br>[AI-assisted Radiology Using Distributed Deep
|
||||||
|
Learning on Apache Spark and Analytics Zoo](https://www.dellemc.com/resources/en-us/asset/white-papers/solutions/h17686_hornet_wp.pdf)
|
||||||
|
<br>[Using Deep Learning on Apache Spark to Diagnose Thoracic Pathology from Chest X-rays](https://databricks.com/session/using-deep-learning-on-apache-spark-to-diagnose-thoracic-pathology-from-chest-x-rays)
|
||||||
|
* __GoldWind__
|
||||||
|
<br>[Intel big data analysis + AI platform helps GoldWind to build a new energy intelligent power prediction solution](https://www.intel.cn/content/www/cn/zh/analytics/artificial-intelligence/create-power-forecasting-solutions.html)
|
||||||
|
* __Inspur__
|
||||||
|
<br>[Inspur End-to-End Smart Computing Solution with Intel Analytics Zoo](https://dpgresources.intel.com/asset-library/inspur-end-to-end-smart-computing-solution-with-intel-analytics-zoo/)
|
||||||
|
* __JD__
|
||||||
|
<br>[Object Detection and Image Feature Extraction at JD.com](https://software.intel.com/en-us/articles/building-large-scale-image-feature-extraction-with-bigdl-at-jdcom)
|
||||||
|
* __MasterCard__
|
||||||
|
<br> [Deep Learning with Analytic Zoo Optimizes Mastercard Recommender AI Service](https://software.intel.com/en-us/articles/deep-learning-with-analytic-zoo-optimizes-mastercard-recommender-ai-service)
|
||||||
|
* __Microsoft Azure__
|
||||||
|
<br>[Use Analytics Zoo to Inject AI Into Customer Service Platforms on Microsoft Azure: Part 1]( https://software.intel.com/en-us/use-analytics-zoo-to-inject-ai-into-customer-service-platforms-on-microsoft-azure-part-1)
|
||||||
|
<br>[Use Analytics Zoo to Inject AI Into Customer Service Platforms on Microsoft Azure: Part 2](https://www.infoq.com/articles/analytics-zoo-qa-module/?from=timeline&isappinstalled=0)
|
||||||
|
* __Midea__
|
||||||
|
<br>[Industrial Inspection Platform in Midea and KUKA: Using Distributed TensorFlow on Analytics Zoo](https://software.intel.com/en-us/articles/industrial-inspection-platform-in-midea-and-kuka-using-distributed-tensorflow-on-analytics)
|
||||||
|
<br>[Ability to add "eyes" and "brains" to smart manufacturing](https://www.intel.cn/content/www/cn/zh/analytics/artificial-intelligence/midea-case-study.html) (in Chinese)
|
||||||
|
* __MLSListings__
|
||||||
|
<br>[Image Similarity-Based House Recommendations and Search](https://software.intel.com/content/www/us/en/develop/articles/using-bigdl-to-build-image-similarity-based-house-recommendations.html)
|
||||||
|
* __NeuSoft/BMW__
|
||||||
|
<br>[Neusoft RealSight APM partners with Intel to create an application performance management platform with active defense capabilities](https://platform.neusoft.com/2020/01/17/xw-intel.html) (in Chinese)
|
||||||
|
* __NeuSoft/Mazda__
|
||||||
|
<br>[JD, Neusoft and Intel Jointly Building Intelligent and Connected Vehicle Cloud for HaiMa(former Hainan Mazda)](https://www.neusoft.com/Products/Platforms/2472/4735110231.html)
|
||||||
|
<br>[JD, Neusoft and Intel Jointly Building Intelligent and Connected Vehicle Cloud for Hainan-Mazda](https://platform.neusoft.com/2020/06/11/jjfa-haimaqiche.html) (in Chinese)
|
||||||
|
* __Office Depot__
|
||||||
|
<br>[Real-time Product Recommendations for Office Depot Using Apache Spark and Analytics Zoo on AWS](https://software.intel.com/en-us/articles/real-time-product-recommendations-for-office-depot-using-apache-spark-and-analytics-zoo-on)
|
||||||
|
<br>[Office Depot product recommender using Analytics Zoo on AWS](https://conferences.oreilly.com/strata/strata-ca/public/schedule/detail/73079)
|
||||||
|
* __SK Telecom__
|
||||||
|
<br>[SK Telecom, Intel Build AI Pipeline to Improve Network Quality](https://networkbuilders.intel.com/solutionslibrary/sk-telecom-intel-build-ai-pipeline-to-improve-network-quality)
|
||||||
|
<br>[Vectorized Deep Learning Acceleration from Preprocessing to Inference and Training on Apache Spark in SK Telecom](https://databricks.com/session_na20/vectorized-deep-learning-acceleration-from-preprocessing-to-inference-and-training-on-apache-spark-in-sk-telecom)
|
||||||
|
<br>[Apache Spark AI Use Case in Telco: Network Quality Analysis and Prediction with Geospatial Visualization](https://databricks.com/session_eu19/apache-spark-ai-use-case-in-telco-network-quality-analysis-and-prediction-with-geospatial-visualization)
|
||||||
|
* __Talroo__
|
||||||
|
<br>[Uses Analytics Zoo and AWS to Leverage Deep Learning for Job Recommendations](https://software.intel.com/en-us/articles/talroo-uses-analytics-zoo-and-aws-to-leverage-deep-learning-for-job-recommendations)
|
||||||
|
<br>[Job recommendations leveraging deep learning using Analytics Zoo on Apache Spark and BigDL](https://conferences.oreilly.com/strata/strata-ny-2018/public/schedule/detail/69113)
|
||||||
|
* __Telefonica__
|
||||||
|
<br>[Running Analytics Zoo jobs on Telefónica Open Cloud’s MRS Service](https://medium.com/@fernando.delaiglesia/running-analytics-zoo-jobs-on-telef%C3%B3nica-open-clouds-mrs-service-2e64bc823c50)
|
||||||
|
* __Tencent__
|
||||||
|
<br>[Analytics Zoo helps Tencent Cloud improve the performance of its intelligent titanium machine learning platform](https://www.intel.cn/content/www/cn/zh/service-providers/analytics-zoo-helps-tencent-cloud-improve-ti-ml-platform-performance.html)
|
||||||
|
<br>[Tencent* Cloud Leverages Analytics Zoo to Improve Performance of TI-ONE* ML Platform](https://software.intel.com/content/www/us/en/develop/articles/tencent-cloud-leverages-analytics-zoo-to-improve-performance-of-ti-one-ml-platform.html)
|
||||||
|
<br>[Enhance Tencent's TUSI Identity Practice with Intel Analytics Zoo](https://mp.weixin.qq.com/s?__biz=MzAwNzc5NzM5Mw==&mid=2651030944&idx=1&sn=d6e06c6e14a7355971953a501689b232&chksm=808f8a5eb7f80348fc8e88c4c9e415341bf43ef6bdf3fd4f3001da89e2c9ba7fa2ed5deeb09a&mpshare=1&scene=1&srcid=0412WxM3eWdsLLoO2TYJGWbS&pass_ticket=E6l%2FfOZNKjhr05lsU7inAVCi7mAy5LFEehvEJOS2ZGdHg6%2FH%2BeBQisHA9sfXDOoy#rd) (in Chinese)
|
||||||
|
* __UC Berkeley RISELab__
|
||||||
|
<br>[RayOnSpark: Running Emerging AI Applications on Big Data Clusters with Ray and Analytics Zoo](https://medium.com/riselab/rayonspark-running-emerging-ai-applications-on-big-data-clusters-with-ray-and-analytics-zoo-923e0136ed6a)
|
||||||
|
<br>[Scalable AutoML for Time Series Prediction Using Ray and Analytics Zoo](https://medium.com/riselab/scalable-automl-for-time-series-prediction-using-ray-and-analytics-zoo-b79a6fd08139)
|
||||||
|
* __World Bank__
|
||||||
|
<br>[Using Crowdsourced Images to Create Image Recognition Models with Analytics Zoo using BigDL](https://databricks.com/session/using-crowdsourced-images-to-create-image-recognition-models-with-bigdl)
|
||||||
|
* __Yunda__
|
||||||
|
<br>[Intelligent transformation brings "quality change" to the express delivery industry](https://www.intel.cn/content/www/cn/zh/analytics/artificial-intelligence/yunda-brings-quality-change-to-the-express-delivery-industry.html) (in Chinese)
|
||||||
|
|
||||||
77
docs/readthedocs/source/doc/Application/presentations.md
Normal file
|
|
@ -0,0 +1,77 @@
|
||||||
|
# Presentations
|
||||||
|
---
|
||||||
|
|
||||||
|
**Tutorial:**
|
||||||
|
|
||||||
|
- Analytics Zoo: Distributed TensorFlow and Keras on Apache Spark, [AI conference](https://conferences.oreilly.com/artificial-intelligence/ai-ca-2019/public/schedule/detail/77069), Sep 2019, San Jose ([slides](https://github.com/analytics-zoo/analytics-zoo.github.io/blob/master/presentations/Tutorial%20Analytics%20ZOO.pdf))
|
||||||
|
|
||||||
|
**Talks:**
|
||||||
|
|
||||||
|
- Context-aware Fast Food Recommendation with Ray on Apache Spark at Burger King, [Data + AI Summit Europe 2020](https://databricks.com/session_eu20/context-aware-fast-food-recommendation-with-ray-on-apache-spark-at-burger-king), November 2020, ([slides](https://github.com/analytics-zoo/analytics-zoo.github.io/blob/master/presentations/1118%20Context-aware%20Fast%20Food%20Recommendation%20with%20Ray%20on%20Apache%20Spark%20at%20Burger%20King.pdf))
|
||||||
|
|
||||||
|
- Cluster Serving: Distributed Model Inference using Apache Flink in Analytics Zoo, [Flink Forward 2020](https://www.flink-forward.org/global-2020/conference-program#cluster-serving--distributed-model-inference-using-apache-flink-in-analytics-zoo), October 2020, ([slides](https://github.com/analytics-zoo/analytics-zoo.github.io/blob/master/presentations/1020%20Cluster%20Serving%20Distributed%20Model%20Inference%20using%20Apache%20Flink%20in%20Analytics%20Zoo%20.pdf))
|
||||||
|
|
||||||
|
- Project Zouwu: Scalable AutoML for Telco Time Series Analysis using Ray and Analytics Zoo, [Ray Summit Connect 2020](https://anyscale.com/blog/videos-and-slides-for-the-fourth-ray-summit-connect-august-12-2020/), August 2020, ([slides](https://anyscale.com/wp-content/uploads/2020/08/Ding-Ding-Connect-slides.pdf))
|
||||||
|
|
||||||
|
- Cluster Serving: Distributed Model Inference using Big Data Streaming in Analytics Zoo, [OpML 2020](https://www.usenix.org/conference/opml20/presentation/song), July 2020, ([slides](https://www.usenix.org/sites/default/files/conference/protected-files/opml20_talks_43_slides_song.pdf))
|
||||||
|
|
||||||
|
- Scalable AutoML for Time Series Forecasting using Ray, [OpML 2020](https://www.usenix.org/conference/opml20/presentation/huang), July 2020, ([slides](https://www.usenix.org/sites/default/files/conference/protected-files/opml20_talks_84_slides_huang.pdf))
|
||||||
|
|
||||||
|
- Scalable AutoML for Time Series Forecasting using Ray, [Spark + AI Summit 2020](https://databricks.com/session_na20/scalable-automl-for-time-series-forecasting-using-ray), June 2020, ([slides](https://www.slideshare.net/databricks/scalable-automl-for-time-series-forecasting-using-ray))
|
||||||
|
|
||||||
|
- Running Emerging AI Applications on Big Data Platforms with Ray On Apache Spark, [Spark + AI Summit 2020](https://databricks.com/session_na20/running-emerging-ai-applications-on-big-data-platforms-with-ray-on-apache-spark), June 2020, ([slides](https://www.slideshare.net/databricks/running-emerging-ai-applications-on-big-data-platforms-with-ray-on-apache-spark))
|
||||||
|
|
||||||
|
- Vectorized Deep Learning Acceleration from Preprocessing to Inference and Training on Apache Spark in SK Telecom, [Spark + AI Summit 2020](https://databricks.com/session_na20/vectorized-deep-learning-acceleration-from-preprocessing-to-inference-and-training-on-apache-spark-in-sk-telecom), June 2020, ([slides](https://www.slideshare.net/databricks/vectorized-deep-learning-acceleration-from-preprocessing-to-inference-and-training-on-apache-spark-in-sk-telecom?from_action=save))
|
||||||
|
|
||||||
|
- Architecture and practice of big data analysis and deep learning model inference using Analytics Zoo on Flink, [Flink Forward Asia 2019](https://developer.aliyun.com/special/ffa2019-conference?spm=a2c6h.13239638.0.0.21f27955PCNMUB#), Nov 2019, Beijing ([slides](https://github.com/analytics-zoo/analytics-zoo.github.io/blob/master/presentations/Architecture%20and%20practice%20of%20big%20data%20analysis%20and%20deep%20learning%20model%20inference%20using%20Analytics%20Zoo%20on%20Flink(FFA2019)%20.pdf))
|
||||||
|
|
||||||
|
- Data analysis + AI platform technology and case studies, [AICon BJ 2019](https://aicon.infoq.cn/2019/beijing/), Nov 2019, Beijing ([slides](https://github.com/analytics-zoo/analytics-zoo.github.io/blob/master/presentations/AICON%20AZ%20Cluster%20Serving%20Beijing%20Qiyuan_v5.pdf))
|
||||||
|
|
||||||
|
- Architectural practices for building a unified big data AI application with Analytics-Zoo, [QCon SH 2019](https://qcon.infoq.cn/2019/shanghai/presentation/1921), Oct 2019, Shanghai ([slides](https://github.com/analytics-zoo/analytics-zoo.github.io/blob/master/presentations/Architectural%20practices%20for%20building%20a%20unified%20big%20data%20AI%20application%20with%20Analytics-Zoo.pdf))
|
||||||
|
|
||||||
|
- Building AI to play the FIFA video game using distributed TensorFlow, [TensorFlow World](https://conferences.oreilly.com/tensorflow/tf-ca/public/schedule/detail/78309), Oct 2019, Santa Clara ([slides](https://github.com/analytics-zoo/analytics-zoo.github.io/blob/master/presentations/Building%20AI%20to%20play%20the%20FIFA%20video%20game%20using%20distributed%20TensorFlow.pdf))
|
||||||
|
|
||||||
|
- Deep Learning Pipelines for High Energy Physics using Apache Spark with Distributed Keras on Analytics Zoo, [Spark+AI Summit](https://databricks.com/session_eu19/deep-learning-pipelines-for-high-energy-physics-using-apache-spark-with-distributed-keras-on-analytics-zoo), Oct 2019, Amsterdam ([slides](https://www.slideshare.net/databricks/deep-learning-pipelines-for-high-energy-physics-using-apache-spark-with-distributed-keras-on-analytics-zoo))
|
||||||
|
|
||||||
|
- Apache Spark AI Use Case in Telco: Network Quality Analysis and Prediction with Geospatial Visualization, [Spark+AI Summit](https://databricks.com/session_eu19/apache-spark-ai-use-case-in-telco-network-quality-analysis-and-prediction-with-geospatial-visualization), Oct 2019, Amsterdam ([slides](https://www.slideshare.net/databricks/apache-spark-ai-use-case-in-telco-network-quality-analysis-and-prediction-with-geospatial-visualization))
|
||||||
|
|
||||||
|
- LSTM-based time series anomaly detection using Analytics Zoo for Spark and BigDL, [Strata Data conference](https://conferences.oreilly.com/strata/strata-eu/public/schedule/detail/74077), May 2019, London ([slides](https://cdn.oreillystatic.com/en/assets/1/event/292/LSTM-based%20time%20series%20anomaly%20detection%20using%20Analytics%20Zoo%20for%20Spark%20and%20BigDL%20Presentation.pptx))
|
||||||
|
|
||||||
|
- Game Playing Using AI on Apache Spark, [Spark+AI Summit](https://databricks.com/session/game-playing-using-ai-on-apache-spark), April 2019, San Francisco ([slides](https://github.com/analytics-zoo/analytics-zoo.github.io/blob/master/presentations/game-playing-using-ai-on-apache-spark.pdf))
|
||||||
|
|
||||||
|
- Using Deep Learning on Apache Spark to Diagnose Thoracic Pathology from Chest X-rays in DELL EMC, [Spark+AI Summit](https://databricks.com/session/using-deep-learning-on-apache-spark-to-diagnose-thoracic-pathology-from-chest-x-rays), April 2019, San Francisco ([slides](https://github.com/analytics-zoo/analytics-zoo.github.io/blob/master/presentations/Using%20Deep%20Learning%20on%20Apache%20Spark%20to%20diagnose%20thoracic%20pathology%20from%20.._.pdf))
|
||||||
|
|
||||||
|
- Leveraging NLP and Deep Learning for Document Recommendation in the Cloud, [Spark+AI Summit](https://databricks.com/session/leveraging-nlp-and-deep-learning-for-document-recommendations-in-the-cloud), April 2019, San Francisco ([slides](https://github.com/analytics-zoo/analytics-zoo.github.io/blob/master/presentations/Leveraging%20NLP%20and%20Deep%20Learning%20for%20Document%20Recommendation%20in%20the%20Cloud.pdf))
|
||||||
|
|
||||||
|
- Analytics Zoo: Distributed Tensorflow, Keras and BigDL in production on Apache Spark, [Strata Data conference](https://conferences.oreilly.com/strata/strata-ca/public/schedule/detail/72802), March 2019, San Francisco ([slides](https://github.com/analytics-zoo/analytics-zoo.github.io/blob/master/presentations/Analytics%20Zoo-Distributed%20Tensorflow%2C%20Keras%20and%20BigDL%20in%20production%20on%20Apache%20Spark.pdf))
|
||||||
|
|
||||||
|
- User-based real-time product recommendations leveraging deep learning using Analytics Zoo on Apache Spark in Office Depot, [Strata Data conference](https://conferences.oreilly.com/strata/strata-ca/public/schedule/detail/73079), March 2019, San Francisco ([slides](https://github.com/analytics-zoo/analytics-zoo.github.io/blob/master/presentations/User-based%20real-time%20product%20recommendations%20leveraging%20deep%20learning%20using%20Analytics%20Zoo%20on%20Apache%20Spark%20and%20BigDL%20Presentation.pdf))
|
||||||
|
|
||||||
|
- Analytics Zoo: Unifying Big Data Analytics and AI for Apache Spark, [Shanghai Apache Spark + AI meetup](https://www.meetup.com/Shanghai-Apache-Spark-AI-Meetup/events/255788956/), Nov 2018, Shanghai ([slides](https://github.com/analytics-zoo/analytics-zoo.github.io/blob/master/presentations/Analytics%20Zoo-Unifying%20Big%20Data%20Analytics%20and%20AI%20for%20Apache%20Spark.pdf))
|
||||||
|
|
||||||
|
- Use Intel Analytics Zoo to build an intelligent QA Bot for Microsoft Azure, [Shanghai Apache Spark + AI meetup](https://www.meetup.com/Shanghai-Apache-Spark-AI-Meetup/events/255788956/), Nov 2018, Shanghai ([slides](https://github.com/analytics-zoo/analytics-zoo.github.io/blob/master/presentations/Use%20Intel%20Analytics%20Zoo%20to%20build%20an%20intelligent%20QA%20Bot%20for%20Microsoft%20Azure.pdf))
|
||||||
|
|
||||||
|
- A deep learning approach for precipitation nowcasting with RNN using Analytics Zoo in Cray, [Strata Data conference](https://conferences.oreilly.com/strata/strata-ny-2018/public/schedule/detail/69413), Sep 2018, New York ([slides](https://github.com/analytics-zoo/analytics-zoo.github.io/blob/master/presentations/A%20deep%20learning%20approach%20for%20precipitation%20nowcasting%20with%20RNN%20using%20Analytics%20Zoo%20on%20BigDL.pdf))
|
||||||
|
|
||||||
|
- Job recommendations leveraging deep learning using Analytics Zoo on Apache Spark in Talroo, [Strata Data conference](https://conferences.oreilly.com/strata/strata-ny-2018/public/schedule/detail/69113), Sep 2018, New York ([slides](https://cdn.oreillystatic.com/en/assets/1/event/278/Job%20recommendations%20leveraging%20deep%20learning%20using%20Analytics%20Zoo%20on%20Apache%20Spark%20and%20BigDL%20Presentation.pdf))
|
||||||
|
|
||||||
|
- Accelerating Deep Learning Training with BigDL and Drizzle on Apache Spark, [Spark + AI Summit](https://databricks.com/session/accelerating-deep-learning-training-with-bigdl-and-drizzle-on-apache-spark), June 2018, San Francisco ([slides](https://github.com/analytics-zoo/analytics-zoo.github.io/blob/master/presentations/Accelerating%20deep%20learning%20on%20apache%20spark%20Using%20BigDL%20with%20coarse-grained%20scheduling.pdf))
|
||||||
|
|
||||||
|
- Using Crowdsourced Images to Create Image Recognition Models with Analytics Zoo in World Bank, [Spark + AI Summit](https://databricks.com/session/using-crowdsourced-images-to-create-image-recognition-models-with-bigdl), June 2018, San Francisco ([slides](https://github.com/analytics-zoo/analytics-zoo.github.io/blob/master/presentations/Using%20Crowdsourced%20Images%20to%20Create%20Image%20Recognition%20Models%20with%20Analytics%20Zoo%20using%20BigDL.pdf))
|
||||||
|
|
||||||
|
- Building Deep Reinforcement Learning Applications on Apache Spark with Analytics Zoo using BigDL, [Spark + AI Summit](https://databricks.com/session/building-deep-reinforcement-learning-applications-on-apache-spark-using-bigdl), June 2018, San Francisco ([slides](https://github.com/analytics-zoo/analytics-zoo.github.io/blob/master/presentations/Building%20Deep%20Reinforcement%20Learning%20Applications%20on%20Apache%20Spark%20with%20Analytics%20Zoo%20using%20BigDL.pdf))
|
||||||
|
|
||||||
|
- Using BigDL on Apache Spark to Improve the MLS Real Estate Search Experience at Scale, [Spark + AI Summit](https://databricks.com/session/using-bigdl-on-apache-spark-to-improve-the-mls-real-estate-search-experience-at-scale), June 2018, San Francisco
|
||||||
|
|
||||||
|
- Analytics Zoo: Building Analytics and AI Pipeline for Apache Spark and BigDL, [Spark + AI Summit](https://databricks.com/session/analytics-zoo-building-analytics-and-ai-pipeline-for-apache-spark-and-bigdl), June 2018, San Francisco
|
||||||
|
|
||||||
|
- Using Siamese CNNs for removing duplicate entries from real estate listing databases, [Strata Data conference](https://conferences.oreilly.com/strata/strata-eu-2018/public/schedule/detail/65518), May 2018, London ([slides](https://cdn.oreillystatic.com/en/assets/1/event/267/Using%20Siamese%20CNNs%20for%20removing%20duplicate%20entries%20from%20real%20estate%20listing%20databases%20Presentation.pdf))
|
||||||
|
|
||||||
|
- Classifying images on Spark in World Bank, [AI conference](https://conferences.oreilly.com/artificial-intelligence/ai-ny-2018/public/schedule/detail/64939), May 2018, New York ([slides](https://cdn.oreillystatic.com/en/assets/1/event/280/Classifying%20images%20in%20Spark%20Presentation.pdf))
|
||||||
|
|
||||||
|
- Improving user-merchant propensity modeling using neural collaborative filtering and wide and deep models on Spark BigDL in Mastercard, [Strata Data conference](https://conferences.oreilly.com/strata/strata-ca-2018/public/schedule/detail/63897), March 2018, San Jose ([slides](https://cdn.oreillystatic.com/en/assets/1/event/269/Improving%20user-merchant%20propensity%20modeling%20using%20neural%20collaborative%20filtering%20and%20wide%20and%20deep%20models%20on%20Spark%20BigDL%20at%20scale%20Presentation.pdf))
|
||||||
|
|
||||||
|
- Accelerating deep learning on Apache Spark using BigDL with coarse-grained scheduling, [Strata Data conference](https://conferences.oreilly.com/strata/strata-ca-2018/public/schedule/detail/63960), March 2018, San Jose ([slides](https://cdn.oreillystatic.com/en/assets/1/event/269/Accelerating%20deep%20learning%20on%20Apache%20Spark%20using%20BigDL%20with%20coarse-grained%20scheduling%20Presentation.pptx))
|
||||||
|
|
||||||
|
- Automatic 3D MRI knee damage classification with 3D CNN using BigDL on Spark in UCSF, [Strata Data conference](https://conferences.oreilly.com/strata/strata-ca-2018/public/schedule/detail/64023), March 2018, San Jose ([slides](https://cdn.oreillystatic.com/en/assets/1/event/269/Automatic%203D%20MRI%20knee%20damage%20classification%20with%203D%20CNN%20using%20BigDL%20on%20Spark%20Presentation.pdf))
|
||||||
|
|
||||||
BIN
docs/readthedocs/source/doc/Chronos/Image/automl_hparams.png
Normal file
|
After Width: | Height: | Size: 154 KiB |
BIN
docs/readthedocs/source/doc/Chronos/Image/automl_monitor.png
Normal file
|
After Width: | Height: | Size: 177 KiB |
BIN
docs/readthedocs/source/doc/Chronos/Image/automl_scalars.png
Normal file
|
After Width: | Height: | Size: 244 KiB |
BIN
docs/readthedocs/source/doc/Chronos/Image/forecast-RR.png
Normal file
|
After Width: | Height: | Size: 49 KiB |
BIN
docs/readthedocs/source/doc/Chronos/Image/forecast-TS.png
Normal file
|
After Width: | Height: | Size: 24 KiB |
529
docs/readthedocs/source/doc/Chronos/Overview/chronos.md
Normal file
|
|
@ -0,0 +1,529 @@
|
||||||
|
# Chronos User Guide
|
||||||
|
|
||||||
|
### **1 Overview**
|
||||||
|
_Chronos_ is an application framework for building large-scale time series analysis applications.
|
||||||
|
|
||||||
|
You can use _Chronos_ to do:
|
||||||
|
|
||||||
|
- **Data pre/post-processing and feature generation** (using [TSDataset](#data-processing-and-feature-engineering))
|
||||||
|
- **Time Series Forecasting** (using [Standalone Forecasters](#use-standalone-forecaster-pipeline), [Auto Models](#use-auto-forecasting-model) (with HPO) or [AutoTS](#use-autots-pipeline) (full AutoML enabled pipelines))
|
||||||
|
- **Anomaly Detection** (using [Anomaly Detectors](#anomaly-detection))
|
||||||
|
- **Synthetic Data Generation** (using [Simulators](#generate-synthetic-data))
|
||||||
|
|
||||||
|
---
|
||||||
|
### **2 Install**
|
||||||
|
|
||||||
|
Install analytics-zoo with target `[automl]` to install the additional dependencies for _Chronos_.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
conda create -n my_env python=3.7
|
||||||
|
conda activate my_env
|
||||||
|
pip install --pre --upgrade analytics-zoo[automl]
|
||||||
|
```
|
||||||
|
---
|
||||||
|
### **3 Initialization**
|
||||||
|
|
||||||
|
_Chronos_ uses [Orca](../../Orca/Overview/orca.md) to enable distributed training and AutoML capabilities. Init orca as below when you want to:
|
||||||
|
|
||||||
|
1. Use the distributed mode of a standalone forecaster.
|
||||||
|
2. Use automl to distributedly tuning your model.
|
||||||
|
|
||||||
|
View [Orca Context](../../Orca/Overview/orca-context.md) for more details. Note that argument `init_ray_on_spark` must be `True` for _Chronos_.
|
||||||
|
|
||||||
|
```python
|
||||||
|
if args.cluster_mode == "local":
|
||||||
|
init_orca_context(cluster_mode="local", cores=4, init_ray_on_spark=True) # run in local mode
|
||||||
|
elif args.cluster_mode == "k8s":
|
||||||
|
init_orca_context(cluster_mode="k8s", num_nodes=2, cores=2, init_ray_on_spark=True) # run on K8s cluster
|
||||||
|
elif args.cluster_mode == "yarn":
|
||||||
|
init_orca_context(cluster_mode="yarn-client", num_nodes=2, cores=2, init_ray_on_spark=True) # run on Hadoop YARN cluster
|
||||||
|
```
|
||||||
|
View [Quick Start](../QuickStart/chronos-autotsest-quickstart.md) for a more detailed example.
|
||||||
|
|
||||||
|
---
|
||||||
|
<span id="TSDataset"></span>
|
||||||
|
### **4 Data Processing and Feature Engineering**
|
||||||
|
|
||||||
|
Time series data is a special data formulation with its specific operations. _Chronos_ provides [`TSDataset`](../../PythonAPI/Chronos/tsdataset.html) as a time series dataset abstract for data processing (e.g. impute, deduplicate, resample, scale/unscale, roll sampling) and auto feature engineering (e.g. datetime feature, aggregation feature). Cascade call is supported for most of the methods. [`TSDataset`](../../PythonAPI/Chronos/tsdataset.html) can be initialized from a pandas dataframe and be directly used in `AutoTSEstimator`. It can also be converted to a pandas dataframe or numpy ndarray for Forecasters and Anomaly Detectors.
|
||||||
|
|
||||||
|
[`TSDataset`](../../PythonAPI/Chronos/tsdataset.html) is designed for general time series processing while providing many specific operations for the convenience of different tasks(e.g. forecasting, anomaly detection).
|
||||||
|
|
||||||
|
#### **4.1 Basic concepts**
|
||||||
|
A time series can be interpreted as a sequence of real value whose order is timestamp. While a time series dataset can be a combination of one or a huge amount of time series. It may contain multiple time series since users may collect different time series in the same/different period of time (e.g. An AIops dataset may have CPU usage ratio and memory usage ratio data for two servers at a period of time. This dataset contains four time series).
|
||||||
|
|
||||||
|
In [`TSDataset`](../../PythonAPI/Chronos/tsdataset.html), we provide **2** possible dimensions to construct a high dimension time series dataset (i.e. **feature dimension** and **id dimension**).
|
||||||
|
* feature dimension: Time series along this dimension might be independent or related. Though they may be related, they are assumed to have **different patterns and distributions** and collected on the **same period of time**. For example, the CPU usage ratio and Memory usage ratio for the same server at a period of time.
|
||||||
|
* id dimension: Time series along this dimension are assumed to have the **same patterns and distributions** and might by collected on the **same or different period of time**. For example, the CPU usage ratio for two servers at a period of time.
|
||||||
|
|
||||||
|
All the preprocessing operations will be done on each independent time series(i.e on both feature dimension and id dimension), while feature scaling will be only carried out on the feature dimension.
|
||||||
|
|
||||||
|
#### **4.2 Create a TSDataset**
|
||||||
|
Currently [`TSDataset`](../../PythonAPI/Chronos/tsdataset.html) only supports initializing from a pandas dataframe through [`TSDataset.from_pandas`](../../PythonAPI/Chronos/tsdataset.html#zoo.chronos.data.tsdataset.TSDataset.from_pandas). A typical valid time series dataframe `df` is shown below.
|
||||||
|
|
||||||
|
You can initialize a [`TSDataset`](../../PythonAPI/Chronos/tsdataset.html) by simply:
|
||||||
|
```python
|
||||||
|
# Server id Datetime CPU usage Mem usage
|
||||||
|
# 0 08:39 2021/7/9 93 24
|
||||||
|
# 0 08:40 2021/7/9 91 24
|
||||||
|
# 0 08:41 2021/7/9 93 25
|
||||||
|
# 0 ... ... ...
|
||||||
|
# 1 08:39 2021/7/9 73 79
|
||||||
|
# 1 08:40 2021/7/9 72 80
|
||||||
|
# 1 08:41 2021/7/9 79 80
|
||||||
|
# 1 ... ... ...
|
||||||
|
tsdata = TSDataset.from_pandas(df,
|
||||||
|
dt_col="Datetime",
|
||||||
|
id_col="Server id",
|
||||||
|
target_col=["CPU usage",
|
||||||
|
"Mem usage"])
|
||||||
|
```
|
||||||
|
`target_col` is a list of all elements along feature dimension, while `id_col` is the identifier that distinguishes the id dimension. `dt_col` is the datetime column. For `extra_feature_col`(not shown in this case), you should list those features that you are not interested for your task (e.g. you will **not** perform forecasting or anomaly detection task on this col).
|
||||||
|
|
||||||
|
If you are building a prototype for your forecasting/anomaly detection task and you need to split you dataset to train/valid/test set, you can use `with_split` parameter.[`TSDataset`](../../PythonAPI/Chronos/tsdataset.html) supports split with ratio by `val_ratio` and `test_ratio`.
|
||||||
|
#### **4.3 Time series dataset preprocessing**
|
||||||
|
[`TSDataset`](../../PythonAPI/Chronos/tsdataset.html) now supports [`impute`](../../PythonAPI/Chronos/tsdataset.html#zoo.chronos.data.tsdataset.TSDataset.impute), [`deduplicate`](../../PythonAPI/Chronos/tsdataset.html#zoo.chronos.data.tsdataset.TSDataset.deduplicate) and [`resample`](../../PythonAPI/Chronos/tsdataset.html#zoo.chronos.data.tsdataset.TSDataset.resample). You may fill the missing point by [`impute`](../../PythonAPI/Chronos/tsdataset.html#zoo.chronos.data.tsdataset.TSDataset.impute) in different modes. You may remove the records that are totally the same by [`deduplicate`](../../PythonAPI/Chronos/tsdataset.html#zoo.chronos.data.tsdataset.TSDataset.deduplicate). You may change the sample frequency by [`resample`](../../PythonAPI/Chronos/tsdataset.html#zoo.chronos.data.tsdataset.TSDataset.resample). A typical cascade call for preprocessing is:
|
||||||
|
```python
|
||||||
|
tsdata.deduplicate().resample(interval="2s").impute()
|
||||||
|
```
|
||||||
|
#### **4.4 Feature scaling**
|
||||||
|
Scaling all features to one distribution is important, especially when we want to train a machine learning/deep learning system. [`TSDataset`](../../PythonAPI/Chronos/tsdataset.html) supports all the scalers in sklearn through [`scale`](../../PythonAPI/Chronos/tsdataset.html#zoo.chronos.data.tsdataset.TSDataset.scale) and [`unscale`](../../PythonAPI/Chronos/tsdataset.html#zoo.chronos.data.tsdataset.TSDataset.unscale) method. Since a scaler should not fit on the validation and test set, a typical call for scaling operations is:
|
||||||
|
```python
|
||||||
|
from sklearn.preprocessing import StandardScaler
|
||||||
|
scale = StandardScaler()
|
||||||
|
# scale
|
||||||
|
for tsdata in [tsdata_train, tsdata_valid, tsdata_test]:
|
||||||
|
tsdata.scale(scaler, fit=tsdata is tsdata_train)
|
||||||
|
# unscale
|
||||||
|
for tsdata in [tsdata_train, tsdata_valid, tsdata_test]:
|
||||||
|
tsdata.unscale()
|
||||||
|
```
|
||||||
|
[`unscale_numpy`](../../PythonAPI/Chronos/tsdataset.html#zoo.chronos.data.tsdataset.TSDataset.unscale_numpy) is specially designed for forecasters. Users may unscale the output of a forecaster by this operation. A typical call is:
|
||||||
|
```python
|
||||||
|
x, y = tsdata_test.scale(scaler)\
|
||||||
|
.roll(lookback=..., horizon=...)\
|
||||||
|
.to_numpy()
|
||||||
|
yhat = forecaster.predict(x)
|
||||||
|
unscaled_yhat = tsdata_test.unscale_numpy(yhat)
|
||||||
|
unscaled_y = tsdata_test.unscale_numpy(y)
|
||||||
|
# calculate metric by unscaled_yhat and unscaled_y
|
||||||
|
```
|
||||||
|
#### **4.5 Feature generation**
|
||||||
|
Other than historical target data and other extra feature provided by users, some additional features can be generated automatically by [`TSDataset`](../../PythonAPI/Chronos/tsdataset.html). [`gen_dt_feature`](../../PythonAPI/Chronos/tsdataset.html#zoo.chronos.data.tsdataset.TSDataset.gen_dt_feature) helps users to generate 10 datetime related features(e.g. MONTH, WEEKDAY, ...). [`gen_global_feature`](../../PythonAPI/Chronos/tsdataset.html#zoo.chronos.data.tsdataset.TSDataset.gen_global_feature) and [`gen_rolling_feature`](../../PythonAPI/Chronos/tsdataset.html#zoo.chronos.data.tsdataset.TSDataset.gen_rolling_feature) are powered by tsfresh to generate aggregated features (e.g. min, max, ...) for each time series or rolling windows respectively.
|
||||||
|
#### **4.6 Sampling and exporting**
|
||||||
|
A time series dataset needs to be sampling and exporting as numpy ndarray/dataloader to be used in machine learning and deep learning models(e.g. forecasters, anomaly detectors, auto models, etc.).
|
||||||
|
```eval_rst
|
||||||
|
.. warning::
|
||||||
|
You don't need to call any sampling or exporting methods introduced in this section when using `AutoTSEstimator`.
|
||||||
|
```
|
||||||
|
##### **4.6.1 Roll sampling**
|
||||||
|
Roll sampling (or sliding window sampling) is useful when you want to train a RR type supervised deep learning forecasting model. It works as the [diagram](#RR-forecast-image) shows. Please refer to the API doc [`roll`](../../PythonAPI/Chronos/tsdataset.html#zoo.chronos.data.tsdataset.TSDataset.roll) for detailed behavior. Users can simply export the sampling result as numpy ndarray by [`to_numpy`](../../PythonAPI/Chronos/tsdataset.html#zoo.chronos.data.tsdataset.TSDataset.to_numpy) or pytorch dataloader [`to_torch_data_loader`](../../PythonAPI/Chronos/tsdataset.html#zoo.chronos.data.tsdataset.TSDataset.to_torch_data_loader).
|
||||||
|
|
||||||
|
```eval_rst
|
||||||
|
.. note::
|
||||||
|
**Difference between `roll` and `to_torch_data_loader`**:
|
||||||
|
|
||||||
|
`.roll(...)` performs the rolling before RR forecasters/auto models training while `.to_torch_data_loader(roll=True, ...)` performs rolling during the training.
|
||||||
|
|
||||||
|
It is fine to use either of them when you have a relatively small dataset (less than 1G). `.to_torch_data_loader(roll=True, ...)` is recommended when you have a large dataset (larger than 1G) to save memory usage.
|
||||||
|
```
|
||||||
|
|
||||||
|
```eval_rst
|
||||||
|
.. note::
|
||||||
|
**Roll sampling format**:
|
||||||
|
|
||||||
|
As decribed in RR style forecasting concept, the sampling result will have the following shape requirement.
|
||||||
|
|
||||||
|
| x: (sample_num, lookback, input_feature_num)
|
||||||
|
| y: (sample_num, horizon, output_feature_num)
|
||||||
|
|
||||||
|
Please follow the same shape if you use customized data creator.
|
||||||
|
```
|
||||||
|
|
||||||
|
A typical call of [`roll`](../../PythonAPI/Chronos/tsdataset.html#zoo.chronos.data.tsdataset.TSDataset.roll) is as following:
|
||||||
|
```python
|
||||||
|
# forecaster
|
||||||
|
x, y = tsdata.roll(lookback=..., horizon=...).to_numpy()
|
||||||
|
forecaster.fit((x, y))
|
||||||
|
```
|
||||||
|
|
||||||
|
##### **4.6.2 Pandas Exporting**
|
||||||
|
Now we support pandas dataframe exporting through `to_pandas()` for users to carry out their own transformation. Here is an example of using only one time series for anomaly detection.
|
||||||
|
```python
|
||||||
|
# anomaly detector on "target" col
|
||||||
|
x = tsdata.to_pandas()["target"].to_numpy()
|
||||||
|
anomaly_detector.fit(x)
|
||||||
|
```
|
||||||
|
View [TSDataset API Doc](../../PythonAPI/Chronos/tsdataset.html#) for more details.
|
||||||
|
|
||||||
|
---
|
||||||
|
### **5 Forecasting**
|
||||||
|
|
||||||
|
_Chronos_ provides both deep learning/machine learning models and traditional statistical models for forecasting.
|
||||||
|
|
||||||
|
There're three ways to do forecasting:
|
||||||
|
- Use highly integrated [**AutoTS pipeline**](#use-autots-pipeline) with auto feature generation, data pre/post-processing, hyperparameter optimization.
|
||||||
|
- Use [**auto forecasting models**](#use-auto-forecasting-model) with auto hyperparameter optimization.
|
||||||
|
- Use [**standalone forecasters**](#use-standalone-forecaster-pipeline).
|
||||||
|
|
||||||
|
<span id="supported_forecasting_model"></span>
|
||||||
|
|
||||||
|
| Model | Style | Multi-Variate | Multi-Step | Distributed\* | Auto Models | AutoTS | Backend |
|
||||||
|
| ----------------- | ----- | ------------- | ---------- | ----------- | ----------- | ----------- | ----------- |
|
||||||
|
| LSTM | RR | ✅ | ❌ | ✅ | ✅ | ✅ | pytorch |
|
||||||
|
| Seq2Seq | RR | ✅ | ✅ | ✅ | ✅ | ✅ | pytorch |
|
||||||
|
| TCN | RR | ✅ | ✅ | ✅ | ✅ | ✅ | pytorch |
|
||||||
|
| MTNet | RR | ✅ | ❌ | ✅ | ❌ | ✳️\*\*\* | tensorflow |
|
||||||
|
| TCMF | TS | ✅ | ✅ | ✳️\*\* | ❌ | ❌ | pytorch |
|
||||||
|
| Prophet | TS | ❌ | ✅ | ❌ | ✅ | ❌ | prophet |
|
||||||
|
| ARIMA | TS | ❌ | ✅ | ❌ | ✅ | ❌ | pmdarima |
|
||||||
|
|
||||||
|
\* Distributed training/inferencing is only supported by standalone forecasters.<br>
|
||||||
|
\*\* TCMF only partially supports distributed training.<br>
|
||||||
|
\*\*\* Auto tuning of MTNet is only supported in our deprecated AutoTS API.<br>
|
||||||
|
|
||||||
|
|
||||||
|
#### **5.1 Time Series Forecasting Concepts**
|
||||||
|
Time series forecasting is one of the most popular tasks on time series data. **In short, forecasing aims at predicting the future by using the knowledge you can learn from the history.**
|
||||||
|
|
||||||
|
##### **5.1.1 Traditional Statistical(TS) Style**
|
||||||
|
Traditionally, Time series forecasting problem was formulated with rich mathematical fundamentals and statistical models. Typically, one model can only handle one time series and fit on the whole time series before the last observed timestamp and predict the next few steps. Training(fit) is needed every time you change the last observed timestamp.
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|
##### **5.1.2 Regular Regression(RR) Style**
|
||||||
|
Recent years, common deep learning architectures (e.g. RNN, CNN, Transformer, etc.) are being successfully applied to forecasting problem. Forecasting is transformed to a supervised learning regression problem in this style. A model can predict several time series. Typically, a sampling process based on sliding-window is needed, some terminology is explained as following:
|
||||||
|
|
||||||
|
- `lookback` / `past_seq_len`: the length of historical data along time. This number is tunable.
|
||||||
|
- `horizon` / `future_seq_len`: the length of predicted data along time. This number is depended on the task definition. If this value larger than 1, then the forecasting task is *Multi-Step*.
|
||||||
|
- `input_feature_num`: The number of variables the model can observe. This number is tunable since we can select a subset of extra feature to use.
|
||||||
|
- `output_feature_num`: The number of variables the model to predict. This number is depended on the task definition. If this value larger than 1, then the forecasting task is *Multi-Variate*.
|
||||||
|
|
||||||
|
<span id="RR-forecast-image"></span>
|
||||||
|

|
||||||
|
|
||||||
|
#### **5.2 Use AutoTS Pipeline**
|
||||||
|
For AutoTS Pipeline, we will leverage `AutoTSEstimator`, `TSPipeline` and preferably `TSDataset`. A typical usage of AutoTS pipeline basically contains 3 steps.
|
||||||
|
1. Prepare a `TSDataset` or customized data creator.
|
||||||
|
2. Init a `AutoTSEstimator` and call `.fit()` on the data.
|
||||||
|
3. Use the returned `TSPipeline` for further development.
|
||||||
|
```eval_rst
|
||||||
|
.. warning::
|
||||||
|
`AutoTSTrainer` workflow has been deprecated, no feature updates or performance improvement will be carried out. Users of `AutoTSTrainer` may refer to `Chronos API doc <https://analytics-zoo.readthedocs.io/en/latest/doc/PythonAPI/Chronos/autots.html>`_.
|
||||||
|
```
|
||||||
|
```eval_rst
|
||||||
|
.. note::
|
||||||
|
`AutoTSEstimator` currently only support pytorch backend.
|
||||||
|
```
|
||||||
|
View [Quick Start](https://analytics-zoo.readthedocs.io/en/latest/doc/Chronos/QuickStart/chronos-autotsest-quickstart.html) for a more detailed example.
|
||||||
|
|
||||||
|
##### **5.2.1 Prepare dataset**
|
||||||
|
`AutoTSEstimator` support 2 types of data input.
|
||||||
|
|
||||||
|
You can easily prepare your data in `TSDataset` (recommended). You may refer to [here](#TSDataset) for the detailed information to prepare your `TSDataset` with proper data processing and feature generation. Here is a typical `TSDataset` preparation.
|
||||||
|
```python
|
||||||
|
from zoo.chronos.data import TSDataset
|
||||||
|
from sklearn.preprocessing import StandardScaler
|
||||||
|
|
||||||
|
tsdata_train, tsdata_val, tsdata_test\
|
||||||
|
= TSDataset.from_pandas(df, dt_col="timestamp", target_col="value", with_split=True, val_ratio=0.1, test_ratio=0.1)
|
||||||
|
|
||||||
|
standard_scaler = StandardScaler()
|
||||||
|
for tsdata in [tsdata_train, tsdata_val, tsdata_test]:
|
||||||
|
tsdata.gen_dt_feature()\
|
||||||
|
.impute(mode="last")\
|
||||||
|
.scale(standard_scaler, fit=(tsdata is tsdata_train))
|
||||||
|
```
|
||||||
|
You can also create your own data creator. The data creator takes a dictionary config and returns a pytorch dataloader. Users may define their own customized key and add them to the search space. "batch_size" is the only fixed key.
|
||||||
|
```python
|
||||||
|
from torch.utils.data import DataLoader
|
||||||
|
def training_data_creator(config):
|
||||||
|
return Dataloader(..., batch_size=config['batch_size'])
|
||||||
|
```
|
||||||
|
##### **5.2.2 Create an AutoTSEstimator**
|
||||||
|
`AutoTSEstimator` depends on the [Distributed Hyper-parameter Tuning](../../Orca/Overview/distribute-tuning.html) supported by Project Orca. It also provides time series only functionalities and optimization. Here is a typical initialization process.
|
||||||
|
```python
|
||||||
|
import zoo.orca.automl.hp as hp
|
||||||
|
from zoo.chronos.autots import AutoTSEstimator
|
||||||
|
auto_estimator = AutoTSEstimator(model='lstm',
|
||||||
|
search_space='normal',
|
||||||
|
past_seq_len=hp.randint(1, 10),
|
||||||
|
future_seq_len=1,
|
||||||
|
selected_features="auto")
|
||||||
|
```
|
||||||
|
We prebuild three defualt search space for each build-in model, which you can use the by setting `search_space` to "minimal","normal", or "large" or define your own search space in a dictionary. The larger the search space, the better accuracy you will get and the more time will be cost.
|
||||||
|
|
||||||
|
`past_seq_len` can be set as a hp sample function, the proper range is highly related to your data. A range between 0.5 cycle and 3 cycle is reasonable.
|
||||||
|
|
||||||
|
`selected_features` is set to "auto" by default, where the `AutoTSEstimator` will find the best subset of extra features to help the forecasting task.
|
||||||
|
##### **5.2.3 Fit on AutoTSEstimator**
|
||||||
|
Fitting on `AutoTSEstimator` is fairly easy. A `TSPipeline` will be returned once fitting is completed.
|
||||||
|
```python
|
||||||
|
ts_pipeline = auto_estimator.fit(data=tsdata_train,
|
||||||
|
validation_data=tsdata_val,
|
||||||
|
batch_size=hp.randint(32, 64),
|
||||||
|
epochs=5)
|
||||||
|
```
|
||||||
|
Detailed information and settings please refer to [AutoTSEstimator API doc](https://analytics-zoo.readthedocs.io/en/latest/doc/PythonAPI/Chronos/autotsestimator.html#id1).
|
||||||
|
##### **5.2.4 Development on TSPipeline**
|
||||||
|
You may carry out predict, evaluate, incremental training or save/load for further development.
|
||||||
|
```python
|
||||||
|
# predict with the best trial
|
||||||
|
y_pred = ts_pipeline.predict(tsdata_test)
|
||||||
|
|
||||||
|
# evaluate the result pipeline
|
||||||
|
mse, smape = ts_pipeline.evaluate(tsdata_test, metrics=["mse", "smape"])
|
||||||
|
print("Evaluate: the mean square error is", mse)
|
||||||
|
print("Evaluate: the smape value is", smape)
|
||||||
|
|
||||||
|
# save the pipeline
|
||||||
|
my_ppl_file_path = "/tmp/saved_pipeline"
|
||||||
|
ts_pipeline.save(my_ppl_file_path)
|
||||||
|
|
||||||
|
# restore the pipeline for further deployment
|
||||||
|
from zoo.chronos.autots import TSPipeline
|
||||||
|
loaded_ppl = TSPipeline.load(my_ppl_file_path)
|
||||||
|
```
|
||||||
|
Detailed information please refer to [TSPipeline API doc](../../PythonAPI/Chronos/autotsestimator.html#tspipeline).
|
||||||
|
|
||||||
|
```eval_rst
|
||||||
|
.. note::
|
||||||
|
`init_orca_context` is not needed if you just use the trained TSPipeline for inference, evaluation or incremental fitting.
|
||||||
|
```
|
||||||
|
```eval_rst
|
||||||
|
.. note::
|
||||||
|
Incremental fitting on TSPipeline just update the model weights the standard way, which does not involve AutoML.
|
||||||
|
```
|
||||||
|
|
||||||
|
#### **5.3 Use Standalone Forecaster Pipeline**
|
||||||
|
|
||||||
|
_Chronos_ provides a set of standalone time series forecasters without AutoML support, including deep learning models as well as traditional statistical models.
|
||||||
|
|
||||||
|
View some examples notebooks for [Network Traffic Prediction](https://github.com/intel-analytics/analytics-zoo/blob/master/pyzoo/zoo/chronos/use-case/network_traffic/)
|
||||||
|
|
||||||
|
The common process of using a Forecaster looks like below.
|
||||||
|
```python
|
||||||
|
# set fixed hyperparameters, loss, metric...
|
||||||
|
f = Forecaster(...)
|
||||||
|
# input data, batch size, epoch...
|
||||||
|
f.fit(...)
|
||||||
|
# input test data x, batch size...
|
||||||
|
f.predict(...)
|
||||||
|
```
|
||||||
|
The input data can be easily get from `TSDataset`.
|
||||||
|
View [Quick Start](../QuickStart/chronos-tsdataset-forecaster-quickstart.md) for a more detailed example. Refer to [API docs](../../PythonAPI/Chronos/forecasters.html) of each Forecaster for detailed usage instructions and examples.
|
||||||
|
|
||||||
|
<span id="LSTMForecaster"></span>
|
||||||
|
###### **5.3.1 LSTMForecaster**
|
||||||
|
|
||||||
|
LSTMForecaster wraps a vanilla LSTM model, and is suitable for univariate time series forecasting.
|
||||||
|
|
||||||
|
View Network Traffic Prediction [notebook](https://github.com/intel-analytics/analytics-zoo/blob/master/pyzoo/zoo/chronos/use-case/network_traffic/network_traffic_model_forecasting.ipynb) and [LSTMForecaster API Doc](../../PythonAPI/Chronos/forecasters.html#lstmforecaster) for more details.
|
||||||
|
|
||||||
|
<span id="Seq2SeqForecaster"></span>
|
||||||
|
###### **5.3.2 Seq2SeqForecaster**
|
||||||
|
|
||||||
|
Seq2SeqForecaster wraps a sequence to sequence model based on LSTM, and is suitable for multivariant & multistep time series forecasting.
|
||||||
|
|
||||||
|
View [Seq2SeqForecaster API Doc](../../PythonAPI/Chronos/forecasters.html#seq2seqforecaster) for more details.
|
||||||
|
|
||||||
|
<span id="TCNForecaster"></span>
|
||||||
|
###### **5.3.3 TCNForecaster**
|
||||||
|
|
||||||
|
Temporal Convolutional Networks (TCN) is a neural network that use convolutional architecture rather than recurrent networks. It supports multi-step and multi-variant cases. Causal Convolutions enables large scale parallel computing which makes TCN has less inference time than RNN based model such as LSTM.
|
||||||
|
|
||||||
|
View Network Traffic multivariate multistep Prediction [notebook](https://github.com/intel-analytics/analytics-zoo/blob/master/pyzoo/zoo/chronos/use-case/network_traffic/network_traffic_multivariate_multistep_tcnforecaster.ipynb) and [TCNForecaster API Doc](../../PythonAPI/Chronos/forecasters.html#tcnforecaster) for more details.
|
||||||
|
|
||||||
|
<span id="MTNetForecaster"></span>
|
||||||
|
###### **5.3.4 MTNetForecaster**
|
||||||
|
|
||||||
|
MTNetForecaster wraps a MTNet model. The model architecture mostly follows the [MTNet paper](https://arxiv.org/abs/1809.02105) with slight modifications, and is suitable for multivariate time series forecasting.
|
||||||
|
|
||||||
|
View Network Traffic Prediction [notebook](https://github.com/intel-analytics/analytics-zoo/blob/master/pyzoo/zoo/chronos/use-case/network_traffic/network_traffic_model_forecasting.ipynb) and [MTNetForecaster API Doc](../../PythonAPI/Chronos/forecasters.html#mtnetforecaster) for more details.
|
||||||
|
|
||||||
|
<span id="TCMFForecaster"></span>
|
||||||
|
###### **5.3.5 TCMFForecaster**
|
||||||
|
|
||||||
|
TCMFForecaster wraps a model architecture that follows implementation of the paper [DeepGLO paper](https://arxiv.org/abs/1905.03806) with slight modifications. It is especially suitable for extremely high dimensional (up-to millions) multivariate time series forecasting.
|
||||||
|
|
||||||
|
View High-dimensional Electricity Data Forecasting [example](https://github.com/intel-analytics/analytics-zoo/blob/master/pyzoo/zoo/chronos/examples/tcmf/run_electricity.py) and [TCMFForecaster API Doc](../../PythonAPI/Chronos/forecasters.html#tcmfforecaster) for more details.
|
||||||
|
|
||||||
|
<span id="ARIMAForecaster"></span>
|
||||||
|
###### **5.3.6 ARIMAForecaster**
|
||||||
|
|
||||||
|
ARIMAForecaster wraps a ARIMA model and is suitable for univariate time series forecasting. It works best with data that show evidence of non-stationarity in the sense of mean (and an initial differencing step (corresponding to the "I, integrated" part of the model) can be applied one or more times to eliminate the non-stationarity of the mean function.
|
||||||
|
|
||||||
|
View [ARIMAForecaster API Doc](../../PythonAPI/Chronos/forecasters.html#arimaforecaster) for more details.
|
||||||
|
|
||||||
|
<span id="ProphetForecaster"></span>
|
||||||
|
###### **5.3.7 ProphetForecaster**
|
||||||
|
|
||||||
|
ProphetForecaster wraps the Prophet model ([site](https://github.com/facebook/prophet)) which is an additive model where non-linear trends are fit with yearly, weekly, and daily seasonality, plus holiday effects and is suitable for univariate time series forecasting. It works best with time series that have strong seasonal effects and several seasons of historical data and is robust to missing data and shifts in the trend, and typically handles outliers well.
|
||||||
|
|
||||||
|
View Stock Prediction [notebook](https://github.com/intel-analytics/analytics-zoo/blob/master/pyzoo/zoo/chronos/use-case/fsi/stock_prediction_prophet.ipynb) and [ProphetForecaster API Doc](../../PythonAPI/Chronos/forecasters.html#prophetforecaster) for more details.
|
||||||
|
|
||||||
|
#### **5.4 Use Auto forecasting model**
|
||||||
|
Auto forecasting models are designed to be used exactly the same as Forecasters. The only difference is that you can set hp search function to the hyperparameters and the `.fit()` method will search the best hyperparameter setting.
|
||||||
|
```python
|
||||||
|
# set hyperparameters in hp search function, loss, metric...
|
||||||
|
f = Forecaster(...)
|
||||||
|
# input data, batch size, epoch...
|
||||||
|
f.fit(...)
|
||||||
|
# input test data x, batch size...
|
||||||
|
f.predict(...)
|
||||||
|
```
|
||||||
|
The input data can be easily get from `TSDataset`. Users can refer to detailed [API doc](../../PythonAPI/Chronos/automodels.html).
|
||||||
|
|
||||||
|
---
|
||||||
|
### **6 Anomaly Detection**
|
||||||
|
|
||||||
|
Anomaly Detection detects abnormal samples in a given time series. _Chronos_ provides a set of unsupervised anomaly detectors.
|
||||||
|
|
||||||
|
View some examples notebooks for [Datacenter AIOps](https://github.com/intel-analytics/analytics-zoo/tree/master/pyzoo/zoo/chronos/use-case/AIOps).
|
||||||
|
|
||||||
|
#### **6.1 ThresholdDetector**
|
||||||
|
|
||||||
|
ThresholdDetector detects anomaly based on threshold. It can be used to detect anomaly on a given time series ([notebook](https://github.com/intel-analytics/analytics-zoo/blob/master/pyzoo/zoo/chronos/use-case/AIOps/AIOps_anomaly_detect_unsupervised.ipynb)), or used together with [Forecasters](#forecasting) to detect anomaly on new coming samples ([notebook](https://github.com/intel-analytics/analytics-zoo/blob/master/pyzoo/zoo/chronos/use-case/AIOps/AIOps_anomaly_detect_unsupervised_forecast_based.ipynb)).
|
||||||
|
|
||||||
|
View [ThresholdDetector API Doc](../../PythonAPI/Chronos/anomaly_detectors.html#chronos-model-anomaly-th-detector) for more details.
|
||||||
|
|
||||||
|
|
||||||
|
#### **6.2 AEDetector**
|
||||||
|
|
||||||
|
AEDetector detects anomaly based on the reconstruction error of an autoencoder network.
|
||||||
|
|
||||||
|
View anomaly detection [notebook](https://github.com/intel-analytics/analytics-zoo/blob/master/pyzoo/zoo/chronos/use-case/AIOps/AIOps_anomaly_detect_unsupervised.ipynb) and [AEDetector API Doc](../../PythonAPI/Chronos/anomaly_detectors.html#chronos-model-anomaly-ae-detector) for more details.
|
||||||
|
|
||||||
|
#### **6.3 DBScanDetector**
|
||||||
|
|
||||||
|
DBScanDetector uses DBSCAN clustering algortihm for anomaly detection.
|
||||||
|
|
||||||
|
View anomaly detection [notebook](https://github.com/intel-analytics/analytics-zoo/blob/master/pyzoo/zoo/chronos/use-case/AIOps/AIOps_anomaly_detect_unsupervised.ipynb) and [DBScanDetector API Doc](../../PythonAPI/Chronos/anomaly_detectors.html#chronos-model-anomaly-dbscan-detector) for more details.
|
||||||
|
|
||||||
|
---
|
||||||
|
### **7 Generate Synthetic Data**
|
||||||
|
|
||||||
|
Chronos provides simulators to generate synthetic time series data for users who want to conquer limited data access in a deep learning/machine learning project or only want to generate some synthetic data to play with.
|
||||||
|
|
||||||
|
```eval_rst
|
||||||
|
.. note::
|
||||||
|
DPGANSimulator is the only simulator chronos provides at the moment, more simulators are on their way.
|
||||||
|
```
|
||||||
|
|
||||||
|
#### **7.1 DPGANSimulator**
|
||||||
|
`DPGANSimulator` adopt DoppelGANger raised in [Using GANs for Sharing Networked Time Series Data: Challenges, Initial Promise, and Open Questions](http://arxiv.org/abs/1909.13403). The method is data-driven unsupervised method based on deep learning model with GAN (Generative Adversarial Networks) structure. The model features a pair of seperate attribute generator and feature generator and their corresponding discriminators `DPGANSimulator` also supports a rich and comprehensive input data (training data) format and outperform other algorithms in many evalution metrics.
|
||||||
|
|
||||||
|
Users may refer to detailed [API doc](https://analytics-zoo.readthedocs.io/en/latest/doc/PythonAPI/Chronos/simulator.html#module-zoo.chronos.simulator.doppelganger_simulator).
|
||||||
|
|
||||||
|
---
|
||||||
|
### **8 Useful Functionalities**
|
||||||
|
|
||||||
|
<span id="Visualization"></span>
|
||||||
|
#### **8.1 AutoML Visualization**
|
||||||
|
|
||||||
|
AutoML visualization provides two kinds of visualization. You may use them while fitting on auto models or AutoTS pipeline.
|
||||||
|
* During the searching process, the visualizations of each trail are shown and updated every 30 seconds. (Monitor view)
|
||||||
|
* After the searching process, a leaderboard of each trail's configs and metrics is shown. (Leaderboard view)
|
||||||
|
|
||||||
|
**Note**: AutoML visualization is based on tensorboard and tensorboardx. They should be installed properly before the training starts.
|
||||||
|
|
||||||
|
<span id="monitor_view">**Monitor view**</span>
|
||||||
|
|
||||||
|
Before training, start the tensorboard server through
|
||||||
|
|
||||||
|
```python
|
||||||
|
tensorboard --logdir=<logs_dir>/<name>
|
||||||
|
```
|
||||||
|
|
||||||
|
`logs_dir` is the log directory you set for your predictor(e.g. `AutoTSEstimator`, `AutoTCN`, etc.). `name ` is the name parameter you set for your predictor.
|
||||||
|
|
||||||
|
The data in SCALARS tag will be updated every 30 seconds for users to see the training progress.
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|
After training, start the tensorboard server through
|
||||||
|
|
||||||
|
```python
|
||||||
|
tensorboard --logdir=<logs_dir>/<name>_leaderboard/
|
||||||
|
```
|
||||||
|
|
||||||
|
where `logs_dir` and `name` are the same as stated in [Monitor view](#monitor_view).
|
||||||
|
|
||||||
|
A dashboard of each trail's configs and metrics is shown in the SCALARS tag.
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|
A leaderboard of each trail's configs and metrics is shown in the HPARAMS tag.
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|
**Use visualization in Jupyter Notebook**
|
||||||
|
|
||||||
|
You can enable a tensorboard view in jupyter notebook by the following code.
|
||||||
|
|
||||||
|
```python
|
||||||
|
%load_ext tensorboard
|
||||||
|
# for scalar view
|
||||||
|
%tensorboard --logdir <logs_dir>/<name>/
|
||||||
|
# for leaderboard view
|
||||||
|
%tensorboard --logdir <logs_dir>/<name>_leaderboard/
|
||||||
|
```
|
||||||
|
|
||||||
|
#### **8.2 ONNX/ONNX Runtime support**
|
||||||
|
Users may export their trained(w/wo auto tuning) model to ONNX file and deploy it on other service. Chronos also provides an internal onnxruntime inference support for those **users who pursue low latency and higher throughput during inference on a single node**.
|
||||||
|
|
||||||
|
LSTM, TCN and Seq2seq has supported onnx in their forecasters, auto models and AutoTS. When users use these built-in models, they may call `predict_with_onnx`/`evaluate_with_onnx` for prediction or evaluation. They may also call `export_onnx_file` to export the onnx model file and `build_onnx` to change the onnxruntime's setting(not necessary).
|
||||||
|
|
||||||
|
```python
|
||||||
|
f = Forecaster(...)
|
||||||
|
f.fit(...)
|
||||||
|
f.predict_with_onnx(...)
|
||||||
|
```
|
||||||
|
#### **8.3 Distributed training**
|
||||||
|
LSTM, TCN and Seq2seq users can easily train their forecasters in a distributed fashion to **handle extra large dataset and utilize a cluster**. The functionality is powered by Project Orca.
|
||||||
|
```python
|
||||||
|
f = Forecaster(..., distributed=True)
|
||||||
|
f.fit(...)
|
||||||
|
f.predict(...)
|
||||||
|
f.to_local() # collect the forecaster to single node
|
||||||
|
f.predict_with_onnx(...) # onnxruntime only supports single node
|
||||||
|
```
|
||||||
|
#### **8.4 XShardsTSDataset**
|
||||||
|
```eval_rst
|
||||||
|
.. warning::
|
||||||
|
`XShardsTSDataset` is still experimental.
|
||||||
|
```
|
||||||
|
`TSDataset` is a single thread lib with reasonable speed on large datasets(~10G). When you handle an extra large dataset or limited memory on a single node, `XShardsTSDataset` can be involved to handle the exact same functionality and usage as `TSDataset` in a distributed fashion.
|
||||||
|
|
||||||
|
```python
|
||||||
|
# a fully distributed forecaster pipeline
|
||||||
|
from orca.data.pandas import read_csv
|
||||||
|
from zoo.chronos.data.experimental import XShardsTSDataset
|
||||||
|
|
||||||
|
shards = read_csv("hdfs://...")
|
||||||
|
tsdata, _, test_tsdata = XShardsTSDataset.from_xshards(...)
|
||||||
|
tsdata_xshards = tsdata.roll(...).to_xshards()
|
||||||
|
test_tsdata_xshards = test_tsdata.roll(...).to_xshards()
|
||||||
|
|
||||||
|
f = Forecaster(..., distributed=True)
|
||||||
|
f.fit(tsdata_xshards, ...)
|
||||||
|
f.predict(test_tsdata_xshards, ...)
|
||||||
|
```
|
||||||
|
|
||||||
|
### **9 Examples and Demos**
|
||||||
|
- Quickstarts
|
||||||
|
- [Use AutoTSEstimator for Time-Series Forecasting](https://analytics-zoo.readthedocs.io/en/latest/doc/Chronos/QuickStart/chronos-autotsest-quickstart.html)
|
||||||
|
- [Use TSDataset and Forecaster for Time-Series Forecasting](https://analytics-zoo.readthedocs.io/en/latest/doc/Chronos/QuickStart/chronos-tsdataset-forecaster-quickstart.html)
|
||||||
|
- [Use Anomaly Detector for Unsupervised Anomaly Detection](https://analytics-zoo.readthedocs.io/en/latest/doc/Chronos/QuickStart/chronos-anomaly-detector.html)
|
||||||
|
- Examples
|
||||||
|
- [Use AutoLSTM on nyc taxi dataset](https://github.com/intel-analytics/analytics-zoo/blob/master/pyzoo/zoo/chronos/examples/auto_model/autolstm_nyc_taxi.py)
|
||||||
|
- [Use AutoProphet on nyc taxi dataset](https://github.com/intel-analytics/analytics-zoo/blob/master/pyzoo/zoo/chronos/examples/auto_model/autoprophet_nyc_taxi.py)
|
||||||
|
- [High dimension time series forecasting with Chronos TCMFForecaster](https://github.com/intel-analytics/analytics-zoo/blob/master/pyzoo/zoo/chronos/examples/tcmf/run_electricity.py)
|
||||||
|
- [Generate synthetic data with DPGANSimulator in a data-driven fashion](https://github.com/intel-analytics/analytics-zoo/blob/master/pyzoo/zoo/chronos/examples/simulator)
|
||||||
|
- Use cases
|
||||||
|
- [Unsupervised Anomaly Detection](https://github.com/intel-analytics/analytics-zoo/blob/master/pyzoo/zoo/chronos/use-case/AIOps/AIOps_anomaly_detect_unsupervised.ipynb)
|
||||||
|
- [Unsupervised Anomaly Detection based on Forecasts](https://github.com/intel-analytics/analytics-zoo/blob/master/pyzoo/zoo/chronos/use-case/AIOps/AIOps_anomaly_detect_unsupervised_forecast_based.ipynb)
|
||||||
|
- [Stock Price Prediction with LSTM](https://github.com/intel-analytics/analytics-zoo/blob/master/pyzoo/zoo/chronos/use-case/fsi/stock_prediction.ipynb)
|
||||||
|
- [Stock Price Prediction with ProphetForecaster and AutoProphet](https://github.com/intel-analytics/analytics-zoo/blob/master/pyzoo/zoo/chronos/use-case/fsi/stock_prediction_prophet.ipynb)
|
||||||
|
- [Network Traffic Forecasting with AutoTSEstimator](https://github.com/intel-analytics/analytics-zoo/blob/master/pyzoo/zoo/chronos/use-case/network_traffic/network_traffic_autots_forecasting_experimental.ipynb)
|
||||||
|
- [Network Traffic Forecasting (using multivariate time series data)](https://github.com/intel-analytics/analytics-zoo/blob/master/pyzoo/zoo/chronos/use-case/network_traffic/network_traffic_model_forecasting.ipynb)
|
||||||
|
- [Network Traffic Forecasting (using multistep time series data)](https://github.com/intel-analytics/analytics-zoo/blob/master/pyzoo/zoo/chronos/use-case/network_traffic/network_traffic_multivariate_multistep_tcnforecaster.ipynb)
|
||||||
|
|
@ -0,0 +1,49 @@
|
||||||
|
# Use Anomaly Detector for Unsupervised Anomaly Detection
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
[Run in Google Colab](https://colab.research.google.com/github/intel-analytics/analytics-zoo/blob/master/docs/docs/colab-notebook/chronos/chronos_minn_traffic_anomaly_detector.ipynb) [View source on GitHub](https://github.com/intel-analytics/analytics-zoo/blob/master/docs/docs/colab-notebook/chronos/chronos_minn_traffic_anomaly_detector.ipynb)
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
**In this guide we will demonstrate how to use _Chronos Anomaly Detector_ for time seires anomaly detection in 3 simple steps.**
|
||||||
|
|
||||||
|
### Step 0: Prepare Environment
|
||||||
|
|
||||||
|
We recommend using [conda](https://docs.conda.io/projects/conda/en/latest/user-guide/install/) to prepare the environment. Please refer to the [install guide](../../UserGuide/python.md) for more details.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
conda create -n zoo python=3.7 # "zoo" is conda environment name, you can use any name you like.
|
||||||
|
conda activate zoo
|
||||||
|
pip install analytics-zoo[automl] # install either version 0.10 or latest nightly build
|
||||||
|
```
|
||||||
|
|
||||||
|
## Step 1: Prepare dataset
|
||||||
|
For demonstration, we use the publicly available real time traffic data from the Twin Cities Metro area in Minnesota, collected by the Minnesota Department of Transportation. The detailed information can be found [here](https://github.com/numenta/NAB/blob/master/data/realTraffic/speed_7578.csv)
|
||||||
|
|
||||||
|
Now we need to do data cleaning and preprocessing on the raw data. Note that this part could vary for different dataset.
|
||||||
|
For the machine_usage data, the pre-processing contains 2 parts: <br>
|
||||||
|
1. Change the time interval from irregular to 5 minutes.<br>
|
||||||
|
2. Check missing values and handle missing data.
|
||||||
|
|
||||||
|
```python
|
||||||
|
from zoo.chronos.data import TSDataset
|
||||||
|
|
||||||
|
tsdata = TSDataset.from_pandas(df, dt_col="timestamp", target_col="value")
|
||||||
|
df = tsdata.resample("5min")\
|
||||||
|
.impute(mode="linear")\
|
||||||
|
.to_pandas()
|
||||||
|
```
|
||||||
|
|
||||||
|
## Step 2: Use Chronos Anomaly Detector
|
||||||
|
Chronos provides many anomaly detector for anomaly detection, here we use DBScan as an example. More anomaly detector can be found [here](https://analytics-zoo.readthedocs.io/en/latest/doc/PythonAPI/Chronos/anomaly_detectors.html).
|
||||||
|
|
||||||
|
```python
|
||||||
|
from zoo.chronos.detector.anomaly import DBScanDetector
|
||||||
|
|
||||||
|
ad = DBScanDetector(eps=0.3, min_samples=6)
|
||||||
|
ad.fit(df['value'].to_numpy())
|
||||||
|
anomaly_indexes = ad.anomaly_indexes()
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -0,0 +1,116 @@
|
||||||
|
# Use AutoTSEstimator for Time-Series Forecasting
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
[Run in Google Colab](https://colab.research.google.com/github/intel-analytics/analytics-zoo/blob/master/docs/docs/colab-notebook/chronos/chronos_experimental_autots_nyc_taxi.ipynb) [View source on GitHub](https://github.com/intel-analytics/analytics-zoo/blob/master/docs/docs/colab-notebook/chronos/chronos_experimental_autots_nyc_taxi.ipynb)
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
**In this guide we will demonstrate how to use _Chronos AutoTSEstimator_ and _Chronos TSPipeline_ to auto tune a time seires forecasting task and handle the whole model development process easily.**
|
||||||
|
|
||||||
|
### **Introduction**
|
||||||
|
|
||||||
|
Chronos provides `AutoTSEstimator` as a highly integrated solution for time series forecasting task with hyperparameter autotuning, auto feature selection and auto preprocessing. Users can prepare a `TSDataset`(recommended, used in this notebook) or their own data creator as input data. By constructing a `AutoTSEstimator` and calling `fit` on the data, a `TSPipeline` contains the best model and pre/post data processing will be returned for further development of deployment.
|
||||||
|
|
||||||
|
`AutoTSEstimator` is experimental and only support LSTM, TCN, and Seq2seq model for now.
|
||||||
|
|
||||||
|
### **Step 0: Prepare Environment**
|
||||||
|
|
||||||
|
We recommend using [conda](https://docs.conda.io/projects/conda/en/latest/user-guide/install/) to prepare the environment. Please refer to the [install guide](../../UserGuide/python.md) for more details.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
conda create -n zoo python=3.7
|
||||||
|
conda activate zoo
|
||||||
|
pip install --pre --upgrade analytics-zoo[automl]
|
||||||
|
```
|
||||||
|
|
||||||
|
### **Step 1: Init Orca Context**
|
||||||
|
```python
|
||||||
|
if args.cluster_mode == "local":
|
||||||
|
init_orca_context(cluster_mode="local", cores=4) # run in local mode
|
||||||
|
elif args.cluster_mode == "k8s":
|
||||||
|
init_orca_context(cluster_mode="k8s", num_nodes=2, cores=2) # run on K8s cluster
|
||||||
|
elif args.cluster_mode == "yarn":
|
||||||
|
init_orca_context(cluster_mode="yarn-client", num_nodes=2, cores=2) # run on Hadoop YARN cluster
|
||||||
|
```
|
||||||
|
This is the only place where you need to specify local or distributed mode. View [Orca Context](../../Orca/Overview/orca-context.md) for more details.
|
||||||
|
|
||||||
|
**Note:** You should `export HADOOP_CONF_DIR=/path/to/hadoop/conf/dir` when running on Hadoop YARN cluster. View [Hadoop User Guide](../../UserGuide/hadoop.md) for more details.
|
||||||
|
|
||||||
|
### **Step 2: Prepare a TSDataset**
|
||||||
|
Prepare a `TSDataset` and call necessary operations on it.
|
||||||
|
```python
|
||||||
|
from zoo.chronos.data import TSDataset
|
||||||
|
from sklearn.preprocessing import StandardScaler
|
||||||
|
|
||||||
|
tsdata_train, tsdata_val, tsdata_test\
|
||||||
|
= TSDataset.from_pandas(df, dt_col="timestamp", target_col="value", with_split=True, val_ratio=0.1, test_ratio=0.1)
|
||||||
|
|
||||||
|
standard_scaler = StandardScaler()
|
||||||
|
for tsdata in [tsdata_train, tsdata_val, tsdata_test]:
|
||||||
|
tsdata.gen_dt_feature()\
|
||||||
|
.impute(mode="last")\
|
||||||
|
.scale(standard_scaler, fit=(tsdata is tsdata_train))
|
||||||
|
```
|
||||||
|
There is no need to call `.roll()` or `.to_torch_data_loader()` in this step, which is the largest difference between the usage of `AutoTSEstimator` and _Chronos Forecaster_. `AutoTSEstimator` will do that automatically and tune the parameters as well.
|
||||||
|
|
||||||
|
Please call `.gen_dt_feature()`(recommended), `.gen_rolling_feature()`, and `gen_global_feature()` to generate all candidate features to be selected by `AutoTSEstimator` as well as your input extra feature.
|
||||||
|
|
||||||
|
Detailed information please refer to [TSDataset API doc](https://analytics-zoo.readthedocs.io/en/latest/doc/PythonAPI/Chronos/tsdataset.html#tsdataset) and [Time series data basic concepts](https://analytics-zoo.readthedocs.io/en/latest/doc/Chronos/Overview/chronos.html#data-processing-and-feature-engineering).
|
||||||
|
|
||||||
|
### **Step 3: Create an AutoTSEstimator**
|
||||||
|
|
||||||
|
```python
|
||||||
|
import zoo.orca.automl.hp as hp
|
||||||
|
from zoo.chronos.autots import AutoTSEstimator
|
||||||
|
auto_estimator = AutoTSEstimator(model='lstm', # the model name used for training
|
||||||
|
search_space='normal', # a default hyper parameter search space
|
||||||
|
past_seq_len=hp.randint(1, 10), # hp sampling function of past_seq_len for auto-tuning
|
||||||
|
)
|
||||||
|
```
|
||||||
|
We prebuild three defualt search space for each build-in model, which you can use the by setting `search_space` to "minimal","normal", or "large" or define your own search space in a dictionary. The larger the search space, the better accuracy you will get and the more time will be cost.
|
||||||
|
|
||||||
|
`past_seq_len` can be set as a hp sample function, the proper range is highly related to your data. A range between 0.5 cycle and 3 cycle is reasonable.
|
||||||
|
|
||||||
|
Detailed information please refer to [AutoTSEstimator API doc](https://analytics-zoo.readthedocs.io/en/latest/doc/PythonAPI/Chronos/autotsestimator.html#id1) and some basic concepts [here](https://analytics-zoo.readthedocs.io/en/latest/doc/Orca/Overview/distributed-tuning.html#search-space-and-search-algorithms).
|
||||||
|
|
||||||
|
### **Step 4: Fit with AutoTSEstimator**
|
||||||
|
```python
|
||||||
|
# fit with AutoTSEstimator for a returned TSPipeline
|
||||||
|
ts_pipeline = auto_estimator.fit(data=tsdata_train, # train dataset
|
||||||
|
validation_data=tsdata_val, # validation dataset
|
||||||
|
epochs=5) # number of epochs to train in each trial
|
||||||
|
```
|
||||||
|
Detailed information please refer to [AutoTSEstimator API doc](https://analytics-zoo.readthedocs.io/en/latest/doc/PythonAPI/Chronos/autotsestimator.html#id1).
|
||||||
|
### **Step 5: Further deployment with TSPipeline**
|
||||||
|
The `TSPipeline` will reply the same preprcessing and corresponding postprocessing operations on the test data. You may carry out predict, evaluate or save/load for further development.
|
||||||
|
```python
|
||||||
|
# predict with the best trial
|
||||||
|
y_pred = ts_pipeline.predict(tsdata_test)
|
||||||
|
```
|
||||||
|
|
||||||
|
```python
|
||||||
|
# evaluate the result pipeline
|
||||||
|
mse, smape = ts_pipeline.evaluate(tsdata_test, metrics=["mse", "smape"])
|
||||||
|
print("Evaluate: the mean square error is", mse)
|
||||||
|
print("Evaluate: the smape value is", smape)
|
||||||
|
```
|
||||||
|
|
||||||
|
```python
|
||||||
|
# save the pipeline
|
||||||
|
my_ppl_file_path = "/tmp/saved_pipeline"
|
||||||
|
ts_pipeline.save(my_ppl_file_path)
|
||||||
|
# restore the pipeline for further deployment
|
||||||
|
from zoo.chronos.autots import TSPipeline
|
||||||
|
loaded_ppl = TSPipeline.load(my_ppl_file_path)
|
||||||
|
```
|
||||||
|
Detailed information please refer to [TSPipeline API doc](https://analytics-zoo.readthedocs.io/en/latest/doc/PythonAPI/Chronos/autotsestimator.html#tspipeline-experimental).
|
||||||
|
|
||||||
|
### **Optional: Examine the leaderboard visualization**
|
||||||
|
To view the evaluation result of "not chosen" trails and find some insight or even possibly improve you search space for a new autotuning task. We provide a leaderboard through tensorboard.
|
||||||
|
```python
|
||||||
|
# show a tensorboard view
|
||||||
|
%load_ext tensorboard
|
||||||
|
%tensorboard --logdir /tmp/autots_estimator/autots_estimator_leaderboard/
|
||||||
|
```
|
||||||
|
Detailed information please refer to [Visualization](https://analytics-zoo.readthedocs.io/en/latest/doc/Chronos/Overview/chronos.html#Visualization).
|
||||||
|
|
@ -0,0 +1,89 @@
|
||||||
|
# Use TSDataset and Forecaster for Time-Series Forecasting
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
[Run in Google Colab](https://colab.research.google.com/github/intel-analytics/analytics-zoo/blob/master/docs/docs/colab-notebook/chronos/chronos_nyc_taxi_tsdataset_forecaster.ipynb) [View source on GitHub](https://github.com/intel-analytics/analytics-zoo/blob/master/docs/docs/colab-notebook/chronos/chronos_nyc_taxi_tsdataset_forecaster.ipynb)
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
**In this guide we will demonstrate how to use _Chronos TSDataset_ and _Chronos Forecaster_ for time seires processing and forecasting in 4 simple steps.**
|
||||||
|
|
||||||
|
### **Step 0: Prepare Environment**
|
||||||
|
|
||||||
|
We recommend using [conda](https://docs.conda.io/projects/conda/en/latest/user-guide/install/) to prepare the environment. Please refer to the [install guide](../../UserGuide/python.md) for more details.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
conda create -n zoo python=3.7 # "zoo" is conda environment name, you can use any name you like.
|
||||||
|
conda activate zoo
|
||||||
|
pip install analytics-zoo[automl] # install latest nightly build
|
||||||
|
```
|
||||||
|
|
||||||
|
### Step 1: Data transformation and feature engineering using Chronos TSDataset
|
||||||
|
|
||||||
|
[TSDataset](https://analytics-zoo.readthedocs.io/en/latest/doc/PythonAPI/Chronos/tsdataset.html) is our abstract of time series dataset for data transformation and feature engineering. Here we use it to preprocess the data.
|
||||||
|
|
||||||
|
Initialize train, valid and test tsdataset from raw pandas dataframe.
|
||||||
|
|
||||||
|
```python
|
||||||
|
from zoo.chronos.data import TSDataset
|
||||||
|
from sklearn.preprocessing import StandardScaler
|
||||||
|
|
||||||
|
tsdata_train, tsdata_valid, tsdata_test = TSDataset.from_pandas(df, dt_col="timestamp", target_col="value",
|
||||||
|
with_split=True, val_ratio=0.1, test_ratio=0.1)
|
||||||
|
```
|
||||||
|
Preprocess the datasets. Here we perform:
|
||||||
|
|
||||||
|
- deduplicate: remove those identical data records
|
||||||
|
- impute: fill the missing values
|
||||||
|
- gen_dt_feature: generate feature from datetime (e.g. month, day...)
|
||||||
|
- scale: scale each feature to standard distribution.
|
||||||
|
- roll: sample the data with sliding window.
|
||||||
|
- For forecasting task, we will look back 3 hours' historical data (6 records) and predict the value of next 30 miniutes (1 records).
|
||||||
|
|
||||||
|
We perform the same transformation processes on train, valid and test set.
|
||||||
|
|
||||||
|
```python
|
||||||
|
lookback, horizon = 6, 1
|
||||||
|
|
||||||
|
scaler = StandardScaler()
|
||||||
|
for tsdata in [tsdata_train, tsdata_valid, tsdata_test]:
|
||||||
|
tsdata.deduplicate().impute().gen_dt_feature()\
|
||||||
|
.scale(scaler, fit=(tsdata is tsdata_train))\
|
||||||
|
.roll(lookback=lookback, horizon=horizon)
|
||||||
|
```
|
||||||
|
|
||||||
|
### Step 2: Time series forecasting using Chronos Forecaster
|
||||||
|
|
||||||
|
After preprocessing the datasets. We can use [Chronos Forecaster](https://analytics-zoo.readthedocs.io/en/latest/doc/PythonAPI/Chronos/forecasters.html) to handle the forecasting tasks.
|
||||||
|
|
||||||
|
Transform TSDataset to sampled numpy ndarray and feed them to forecaster.
|
||||||
|
|
||||||
|
```python
|
||||||
|
x, y = tsdata_train.to_numpy()
|
||||||
|
x_val, y_val = tsdata_valid.to_numpy()
|
||||||
|
# x.shape = (num of sample, lookback, num of input feature)
|
||||||
|
# y.shape = (num of sample, horizon, num of output feature)
|
||||||
|
|
||||||
|
forecaster = TCNForecaster(past_seq_len=lookback, # number of steps to look back
|
||||||
|
future_seq_len=horizon, # number of steps to predict
|
||||||
|
input_feature_num=x.shape[-1], # number of feature to use
|
||||||
|
output_feature_num=y.shape[-1]) # number of feature to predict
|
||||||
|
res = forecaster.fit((x, y), validation_data=(x_val, y_val), epochs=3)
|
||||||
|
```
|
||||||
|
|
||||||
|
### Step 3: Further deployment with fitted forecaster
|
||||||
|
|
||||||
|
Use fitted forecaster to predict test data
|
||||||
|
|
||||||
|
```python
|
||||||
|
x_test, y_test = tsdata_test.to_numpy()
|
||||||
|
pred = forecaster.predict(x_test)
|
||||||
|
pred_unscale, groundtruth_unscale = tsdata_test.unscale_numpy(pred), tsdata_test.unscale_numpy(y_test)
|
||||||
|
```
|
||||||
|
|
||||||
|
Save & restore the forecaster.
|
||||||
|
|
||||||
|
```python
|
||||||
|
forecaster.save("nyc_taxi.fxt")
|
||||||
|
forecaster.restore("nyc_taxi.fxt")
|
||||||
|
```
|
||||||
|
|
@ -0,0 +1,143 @@
|
||||||
|
# Distributed Data-Parallel Processing
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
**Orca provides efficient support of distributed data-parallel processing pipeline, a critical component for large-scale AI applications.**
|
||||||
|
|
||||||
|
### **1. TensorFlow Dataset and PyTorch DataLoader**
|
||||||
|
|
||||||
|
Orca will seamlessly parallelize the standard `tf.data.Dataset` or `torch.utils.data.DataLoader` pipelines across a large cluster in a data-parallel fashion, which can be directly used for distributed deep learning training, as shown below:
|
||||||
|
|
||||||
|
TensorFlow Dataset:
|
||||||
|
```python
|
||||||
|
import tensorflow as tf
|
||||||
|
import tensorflow_datasets as tfds
|
||||||
|
from zoo.orca.learn.tf.estimator import Estimator
|
||||||
|
|
||||||
|
def preprocess(data):
|
||||||
|
data['image'] = tf.cast(data["image"], tf.float32) / 255.
|
||||||
|
return data['image'], data['label']
|
||||||
|
|
||||||
|
dataset = tfds.load(name="mnist", split="train", data_dir=dataset_dir)
|
||||||
|
dataset = dataset.map(preprocess)
|
||||||
|
dataset = dataset.shuffle(1000)
|
||||||
|
|
||||||
|
est = Estimator.from_keras(keras_model=model)
|
||||||
|
est.fit(data=dataset)
|
||||||
|
```
|
||||||
|
|
||||||
|
Pytorch DataLoader:
|
||||||
|
```python
|
||||||
|
import torch
|
||||||
|
from torchvision import datasets, transforms
|
||||||
|
from zoo.orca.learn.pytorch import Estimator
|
||||||
|
|
||||||
|
train_loader = torch.utils.data.DataLoader(
|
||||||
|
datasets.MNIST("/tmp/mnist", train=True, download=True,
|
||||||
|
transform=transforms.Compose([
|
||||||
|
transforms.ToTensor(),
|
||||||
|
transforms.Normalize((0.1307,), (0.3081,))
|
||||||
|
])),
|
||||||
|
batch_size=batch_size, shuffle=True)
|
||||||
|
|
||||||
|
est = Estimator.from_torch(model=torch_model, optimizer=torch_optim, loss=torch_criterion)
|
||||||
|
zoo_estimator.fit(data=train_loader)
|
||||||
|
```
|
||||||
|
|
||||||
|
Under the hood, Orca will automatically replicate the _TensorFlow Dataset_ or _PyTorch DataLoader_ pipeline on each node in the cluster, shard the input data, and execute the data pipelines using Apache Spark and/or Ray distributedly.
|
||||||
|
|
||||||
|
_**Note:** Known limitations include:_
|
||||||
|
1. _TensorFlow Dataset pipeline that contains transformations defined in native python function, such as `tf.py_func`, `tf.py_function`
|
||||||
|
and `tf.numpy_function` are currently not supported._
|
||||||
|
2. _TensorFlow Dataset pipeline created from generators, such as `Dataset.from_generators` are currently not supported._
|
||||||
|
3. _For TensorFlow Dataset and Pytorch DataLoader pipelines that read from files (including `tf.data.TFRecordDataset` and `tf.data.TextLineDataset`), one needs to ensure that the same file paths can be accessed on every node in the cluster._
|
||||||
|
|
||||||
|
#### **1.1. Data Creator Function**
|
||||||
|
Alternatively, the user may also pass a *Data Creator Function* as the input to the distributed training and inference. Inside the *Data Creator Function*, the user needs to create and return a `tf.data.Dataset` or `torch.utils.data.DataLoader` object, as shown below.
|
||||||
|
|
||||||
|
TensorFlow:
|
||||||
|
```python
|
||||||
|
import tensorflow as tf
|
||||||
|
import tensorflow_datasets as tfds
|
||||||
|
def preprocess(data):
|
||||||
|
data['image'] = tf.cast(data["image"], tf.float32) / 255.
|
||||||
|
return data['image'], data['label']
|
||||||
|
|
||||||
|
def train_data_creator(config, batch_size):
|
||||||
|
dataset = tfds.load(name="mnist", split="train", data_dir=dataset_dir)
|
||||||
|
dataset = dataset.map(preprocess)
|
||||||
|
dataset = dataset.shuffle(1000)
|
||||||
|
dataset = dataset.batch(batch_size)
|
||||||
|
return dataset
|
||||||
|
```
|
||||||
|
|
||||||
|
Pytorch:
|
||||||
|
```python
|
||||||
|
def train_data_creator(config, batch_size):
|
||||||
|
train_loader = torch.utils.data.DataLoader(
|
||||||
|
datasets.MNIST(config["dir"], train=True, download=True,
|
||||||
|
transform=transforms.Compose([
|
||||||
|
transforms.ToTensor(),
|
||||||
|
transforms.Normalize((0.1307,), (0.3081,))
|
||||||
|
])),
|
||||||
|
batch_size=batch_size, shuffle=True)
|
||||||
|
return train_loader
|
||||||
|
```
|
||||||
|
|
||||||
|
### **2. Spark Dataframes**
|
||||||
|
Orca supports Spark Dataframes as the input to the distributed training, and as the input/output of the distributed inference. Consequently, the user can easily process large-scale dataset using Apache Spark, and directly apply AI models on the distributed (and possibly in-memory) Dataframes without data conversion or serialization.
|
||||||
|
|
||||||
|
```python
|
||||||
|
df = spark.read.parquet("data.parquet")
|
||||||
|
est = Estimator.from_keras(keras_model=model) # the model accept two inputs and one label
|
||||||
|
est.fit(data=df,
|
||||||
|
feature_cols=['user', 'item'], # specifies which column(s) to be used as inputs
|
||||||
|
label_cols=['label']) # specifies which column(s) to be used as labels
|
||||||
|
```
|
||||||
|
|
||||||
|
### **3. XShards (Distributed Data-Parallel Python Processing)**
|
||||||
|
|
||||||
|
`XShards` in Orca allows the user to process large-scale dataset using *existing* Python codes in a distributed and data-parallel fashion, as shown below.
|
||||||
|
|
||||||
|
```python
|
||||||
|
import numpy as np
|
||||||
|
from zoo.orca.data import XShards
|
||||||
|
|
||||||
|
train_images = np.random.random((20, 3, 224, 224))
|
||||||
|
train_label_images = np.zeros(20)
|
||||||
|
train_shards = XShards.partition([train_images, train_label_images])
|
||||||
|
|
||||||
|
def transform_to_dict(train_data):
|
||||||
|
return {"x": train_data[0], "y": train_data[1]}
|
||||||
|
|
||||||
|
train_shards = train_shards.transform_shard(transform_to_dict)
|
||||||
|
```
|
||||||
|
|
||||||
|
In essence, an `XShards` contains an automatically sharded (or partitioned) Python object (e.g., Pandas Dataframe, Numpy NDArray, Python Dictionary or List, etc.). Each partition of the `XShards` stores a subset of the Python object and is distributed across different nodes in the cluster; and the user may run arbitrary Python codes on each partition in a data-parallel fashion using `XShards.transform_shard`.
|
||||||
|
|
||||||
|
View the related [Python API doc](./data) for more details.
|
||||||
|
|
||||||
|
#### **3.1 Data-Parallel Pandas**
|
||||||
|
The user may use `XShards` to efficiently process large-size Pandas Dataframes in a distributed and data-parallel fashion.
|
||||||
|
|
||||||
|
First, the user can read CVS, JSON or Parquet files (stored on local disk, HDFS, AWS S3, etc.) to obtain an `XShards` of Pandas Dataframe, as shown below:
|
||||||
|
```python
|
||||||
|
from zoo.orca.data.pandas import read_csv
|
||||||
|
csv_path = "/path/to/csv_file_or_folder"
|
||||||
|
shard = read_csv(csv_path)
|
||||||
|
```
|
||||||
|
|
||||||
|
Each partition of the returned `XShards` stores a Pandas Dataframe object (containing a subset of the entire dataset), and then the user can apply Pandas operations as well as other (e.g., sklearn) operations on each partition, as shown below:
|
||||||
|
```python
|
||||||
|
def negative(df, column_name):
|
||||||
|
df[column_name] = df[column_name] * (-1)
|
||||||
|
return df
|
||||||
|
|
||||||
|
train_shards = shard.transform_shard(negative, 'value')
|
||||||
|
```
|
||||||
|
|
||||||
|
In addition, some global operations (such as `partition_by`, `unique`, etc.) are also supported on the `XShards` of Pandas Dataframe, as shown below:
|
||||||
|
```python
|
||||||
|
shard.partition_by(cols="location", num_partitions=4)
|
||||||
|
location_list = shard["location"].unique()
|
||||||
|
```
|
||||||
|
|
@ -0,0 +1,252 @@
|
||||||
|
# Distributed Training and Inference
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
**Orca `Estimator` provides sklearn-style APIs for transparently distributed model training and inference**
|
||||||
|
|
||||||
|
### **1. Estimator**
|
||||||
|
|
||||||
|
To perform distributed training and inference, the user can first create an Orca `Estimator` from any standard (single-node) TensorFlow, Kera or PyTorch model, and then call `Estimator.fit` or `Estimator.predict` methods (using the [data-parallel processing pipeline](./data-parallel-processing.md) as input).
|
||||||
|
|
||||||
|
Under the hood, the Orca `Estimator` will replicate the model on each node in the cluster, feed the data partition (generated by the data-parallel processing pipeline) on each node to the local model replica, and synchronize model parameters using various *backend* technologies (such as *Horovod*, `tf.distribute.MirroredStrategy`, `torch.distributed`, or the parameter sync layer in [*BigDL*](https://github.com/intel-analytics/BigDL)).
|
||||||
|
|
||||||
|
### **2. TensorFlow/Keras Estimator**
|
||||||
|
|
||||||
|
#### **2.1 TensorFlow 1.15 and Keras 2.3**
|
||||||
|
|
||||||
|
There are two ways to create an Estimator for TensorFlow 1.15, either from a low level computation graph or a Keras model. Examples are as follow:
|
||||||
|
|
||||||
|
TensorFlow Computation Graph:
|
||||||
|
```python
|
||||||
|
# define inputs to the graph
|
||||||
|
images = tf.placeholder(dtype=tf.float32, shape=(None, 28, 28, 1))
|
||||||
|
labels = tf.placeholder(dtype=tf.int32, shape=(None,))
|
||||||
|
|
||||||
|
# define the network and loss
|
||||||
|
logits = lenet(images)
|
||||||
|
loss = tf.reduce_mean(tf.losses.sparse_softmax_cross_entropy(logits=logits, labels=labels))
|
||||||
|
|
||||||
|
# define a metric
|
||||||
|
acc = accuracy(logits, labels)
|
||||||
|
|
||||||
|
# create an estimator using endpoints of the graph
|
||||||
|
est = Estimator.from_graph(inputs=images,
|
||||||
|
outputs=logits,
|
||||||
|
labels=labels,
|
||||||
|
loss=loss,
|
||||||
|
optimizer=tf.train.AdamOptimizer(),
|
||||||
|
metrics={"acc": acc})
|
||||||
|
```
|
||||||
|
|
||||||
|
Keras Model:
|
||||||
|
```python
|
||||||
|
model = create_keras_lenet_model()
|
||||||
|
model.compile(optimizer=keras.optimizers.RMSprop(),
|
||||||
|
loss='sparse_categorical_crossentropy',
|
||||||
|
metrics=['accuracy'])
|
||||||
|
est = Estimator.from_keras(keras_model=model)
|
||||||
|
```
|
||||||
|
|
||||||
|
Then users can perform distributed model training and inference as follows:
|
||||||
|
|
||||||
|
```python
|
||||||
|
dataset = tfds.load(name="mnist", split="train")
|
||||||
|
dataset = dataset.map(preprocess)
|
||||||
|
est.fit(data=mnist_train,
|
||||||
|
batch_size=320,
|
||||||
|
epochs=max_epoch)
|
||||||
|
predictions = est.predict(data=df,
|
||||||
|
feature_cols=['image'])
|
||||||
|
```
|
||||||
|
The `data` argument in `fit` method can be a Spark DataFrame, an *XShards* or a `tf.data.Dataset`. The `data` argument in `predict` method can be a spark DataFrame or an *XShards*. See the *data-parallel processing pipeline* [page](./data-parallel-processing.md) for more details.
|
||||||
|
|
||||||
|
View the related [Python API doc]() for more details.
|
||||||
|
|
||||||
|
#### **2.2 TensorFlow 2.x and Keras 2.4+**
|
||||||
|
|
||||||
|
Users can create an `Estimator` for TensorFlow 2.x from a Keras model (using a _Model Creator Function_). For example:
|
||||||
|
|
||||||
|
```python
|
||||||
|
def model_creator(config):
|
||||||
|
model = create_keras_lenet_model()
|
||||||
|
model.compile(optimizer=keras.optimizers.RMSprop(),
|
||||||
|
loss='sparse_categorical_crossentropy',
|
||||||
|
metrics=['accuracy'])
|
||||||
|
return model
|
||||||
|
est = Estimator.from_keras(model_creator=model_creator)
|
||||||
|
```
|
||||||
|
|
||||||
|
The `model_creator` argument should be a function that takes a `config` dictionary and returns a compiled Keras model.
|
||||||
|
|
||||||
|
Then users can perform distributed model training and inference as follows:
|
||||||
|
|
||||||
|
```python
|
||||||
|
def train_data_creator(config, batch_size):
|
||||||
|
dataset = tfds.load(name="mnist", split="train")
|
||||||
|
dataset = dataset.map(preprocess)
|
||||||
|
dataset = dataset.batch(batch_size)
|
||||||
|
return dataset
|
||||||
|
stats = est.fit(data=train_data_creator,
|
||||||
|
epochs=max_epoch,
|
||||||
|
steps_per_epoch=total_size // batch_size)
|
||||||
|
predictions = est.predict(data=df,
|
||||||
|
feature_cols=['image'])
|
||||||
|
```
|
||||||
|
|
||||||
|
The `data` argument in `fit` method can be a spark DataFrame, an *XShards* or a *Data Creator Function* (that returns a `tf.data.Dataset`). The `data` argument in `predict` method can be a spark DataFrame or an *XShards*. See the *data-parallel processing pipeline* [page](./data-parallel-processing.md) for more details.
|
||||||
|
|
||||||
|
View the related [Python API doc]() for more details.
|
||||||
|
|
||||||
|
***For more details, view the distributed TensorFlow training/inference [page]().***
|
||||||
|
|
||||||
|
### **3. PyTorch Estimator**
|
||||||
|
|
||||||
|
**Using *BigDL* backend**
|
||||||
|
|
||||||
|
Users may create a PyTorch `Estimator` using the *BigDL* backend (currently default for PyTorch) as follows:
|
||||||
|
|
||||||
|
```python
|
||||||
|
model = LeNet() # a torch.nn.Module
|
||||||
|
model.train()
|
||||||
|
criterion = nn.NLLLoss()
|
||||||
|
|
||||||
|
adam = torch.optim.Adam(model.parameters(), args.lr)
|
||||||
|
est = Estimator.from_torch(model=model, optimizer=adam, loss=criterion)
|
||||||
|
```
|
||||||
|
|
||||||
|
Then users can perform distributed model training and inference as follows:
|
||||||
|
|
||||||
|
```python
|
||||||
|
est.fit(data=train_loader, epochs=args.epochs)
|
||||||
|
predictions = est.predict(xshards)
|
||||||
|
```
|
||||||
|
|
||||||
|
The input to `fit` methods can be a `torch.utils.data.DataLoader`, a Spark Dataframe, an *XShards*, or a *Data Creator Function* (that returns a `torch.utils.data.DataLoader`). The input to `predict` methods should be a Spark Dataframe, or an *XShards*. See the *data-parallel processing pipeline* [page](./data-parallel-processing.md) for more details.
|
||||||
|
|
||||||
|
View the related [Python API doc]() for more details.
|
||||||
|
|
||||||
|
**Using `torch.distributed` or *Horovod* backend**
|
||||||
|
|
||||||
|
Alternatively, users can create a PyTorch `Estimator` using `torch.distributed` or *Horovod* backend by specifying the `backend` argument to be "torch_distributed" or "horovod". In this case, the `model` and `optimizer` should be wrapped in _Creater Functions_. For example:
|
||||||
|
|
||||||
|
```python
|
||||||
|
def model_creator(config):
|
||||||
|
model = LeNet() # a torch.nn.Module
|
||||||
|
model.train()
|
||||||
|
return model
|
||||||
|
|
||||||
|
def optimizer_creator(model, config):
|
||||||
|
return torch.optim.Adam(model.parameters(), config["lr"])
|
||||||
|
|
||||||
|
est = Estimator.from_torch(model=model,
|
||||||
|
optimizer=optimizer_creator,
|
||||||
|
loss=nn.NLLLoss(),
|
||||||
|
config={"lr": 1e-2},
|
||||||
|
backend="torch_distributed") # or backend="horovod"
|
||||||
|
```
|
||||||
|
|
||||||
|
Then users can perform distributed model training and inference as follows:
|
||||||
|
|
||||||
|
```python
|
||||||
|
est.fit(data=train_loader_func, epochs=args.epochs)
|
||||||
|
predictions = est.predict(data=df,
|
||||||
|
feature_cols=['image'])
|
||||||
|
```
|
||||||
|
|
||||||
|
The input to `fit` methods can be a Spark DataFrame, an *XShards*, or a *Data Creator Function* (that returns a `torch.utils.data.DataLoader`). The `data` argument in `predict` method can be a Spark DataFrame or an *XShards*. See the *data-parallel processing pipeline* [page](./data-parallel-processing.md) for more details.
|
||||||
|
|
||||||
|
View the related [Python API doc]() for more details.
|
||||||
|
|
||||||
|
***For more details, view the distributed PyTorch training/inference [page]()<TODO: link to be added>.***
|
||||||
|
|
||||||
|
### **4. MXNet Estimator**
|
||||||
|
|
||||||
|
The user may create a MXNet `Estimator` as follows:
|
||||||
|
```python
|
||||||
|
from zoo.orca.learn.mxnet import Estimator, create_config
|
||||||
|
|
||||||
|
def get_model(config):
|
||||||
|
net = LeNet() # a mxnet.gluon.Block
|
||||||
|
return net
|
||||||
|
|
||||||
|
def get_loss(config):
|
||||||
|
return gluon.loss.SoftmaxCrossEntropyLoss()
|
||||||
|
|
||||||
|
config = create_config(log_interval=2, optimizer="adam",
|
||||||
|
optimizer_params={'learning_rate': 0.02})
|
||||||
|
est = Estimator.from_mxnet(config=config,
|
||||||
|
model_creator=get_model,
|
||||||
|
loss_creator=get_loss,
|
||||||
|
num_workers=2)
|
||||||
|
```
|
||||||
|
|
||||||
|
Then the user can perform distributed model training as follows:
|
||||||
|
```python
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
def get_train_data_iter(config, kv):
|
||||||
|
train = mx.io.NDArrayIter(data_ndarray, label_ndarray,
|
||||||
|
batch_size=config["batch_size"], shuffle=True)
|
||||||
|
return train
|
||||||
|
|
||||||
|
est.fit(get_train_data_iter, epochs=2)
|
||||||
|
```
|
||||||
|
|
||||||
|
The input to `fit` methods can be an *XShards*, or a *Data Creator Function* (that returns an `MXNet DataIter/DataLoader`). See the *data-parallel processing pipeline* [page](./data-parallel-processing.html) for more details.
|
||||||
|
|
||||||
|
View the related [Python API doc]() for more details.
|
||||||
|
|
||||||
|
### **5. BigDL Estimator**
|
||||||
|
|
||||||
|
The user may create a BigDL `Estimator` as follows:
|
||||||
|
```python
|
||||||
|
from bigdl.nn.criterion import *
|
||||||
|
from bigdl.nn.layer import *
|
||||||
|
from bigdl.optim.optimizer import *
|
||||||
|
from zoo.orca.learn.bigdl import Estimator
|
||||||
|
|
||||||
|
linear_model = Sequential().add(Linear(2, 2))
|
||||||
|
mse_criterion = MSECriterion()
|
||||||
|
est = Estimator.from_bigdl(model=linear_model, loss=mse_criterion, optimizer=Adam())
|
||||||
|
```
|
||||||
|
|
||||||
|
Then the user can perform distributed model training and inference as follows:
|
||||||
|
```python
|
||||||
|
# read spark Dataframe
|
||||||
|
df = spark.read.parquet("data.parquet")
|
||||||
|
|
||||||
|
# distributed model training
|
||||||
|
est.fit(df, 1, batch_size=4)
|
||||||
|
|
||||||
|
#distributed model inference
|
||||||
|
result_df = est.predict(df)
|
||||||
|
```
|
||||||
|
|
||||||
|
The input to `fit` and `predict` methods can be a *Spark Dataframe*, or an *XShards*. See the *data-parallel processing pipeline* [page](./data-parallel-processing.html) for more details.
|
||||||
|
|
||||||
|
View the related [Python API doc]() for more details.
|
||||||
|
|
||||||
|
### **6. OpenVINO Estimator**
|
||||||
|
|
||||||
|
The user may create a OpenVINO `Estimator` as follows:
|
||||||
|
```python
|
||||||
|
from zoo.orca.learn.openvino import Estimator
|
||||||
|
|
||||||
|
model_path = "The/file_path/to/the/OpenVINO_IR_xml_file"
|
||||||
|
est = Estimator.from_openvino(model_path=model_path)
|
||||||
|
```
|
||||||
|
|
||||||
|
Then the user can perform distributed model inference as follows:
|
||||||
|
```python
|
||||||
|
# ndarray
|
||||||
|
input_data = np.random.random([20, 4, 3, 224, 224])
|
||||||
|
result = est.predict(input_data)
|
||||||
|
|
||||||
|
# xshards
|
||||||
|
shards = XShards.partition({"x": input_data})
|
||||||
|
result_shards = est.predict(shards)
|
||||||
|
```
|
||||||
|
|
||||||
|
The input to `predict` methods can be an *XShards*, or a *numpy array*. See the *data-parallel processing pipeline* [page](./data-parallel-processing.html) for more details.
|
||||||
|
|
||||||
|
View the related [Python API doc]() for more details.
|
||||||
211
docs/readthedocs/source/doc/Orca/Overview/distributed-tuning.md
Normal file
|
|
@ -0,0 +1,211 @@
|
||||||
|
# Distributed Hyper-parameter Tuning
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
**Orca `AutoEstimator` provides similar APIs as Orca `Estimator` for distributed hyper-parameter tuning.**
|
||||||
|
|
||||||
|
### **1. AutoEstimator**
|
||||||
|
|
||||||
|
To perform distributed hyper-parameter tuning, user can first create an Orca `AutoEstimator` from standard TensorFlow Keras or PyTorch model, and then call `AutoEstimator.fit`.
|
||||||
|
|
||||||
|
Under the hood, the Orca `AutoEstimator` generates different trials and schedules them on each mode in the cluster. Each trial runs a different combination of hyper parameters, sampled from the user-desired hyper-parameter space.
|
||||||
|
HDFS is used to save temporary results of each trial and all the results will be finally transferred to driver for further analysis.
|
||||||
|
|
||||||
|
### **2. Pytorch AutoEstimator**
|
||||||
|
|
||||||
|
User could pass *Creator Function*s, including *Data Creator Function*, *Model Creator Function* and *Optimizer Creator Function* to `AutoEstimator` for training.
|
||||||
|
|
||||||
|
The *Creator Function*s should take a parameter of `config` as input and get the hyper-parameter values from `config` to enable hyper parameter search.
|
||||||
|
|
||||||
|
#### **2.1 Data Creator Function**
|
||||||
|
You can define the train and validation datasets using *Data Creator Function*. The *Data Creator Function* takes `config` as input and returns a `torch.utils.data.DataLoader` object, as shown below.
|
||||||
|
```python
|
||||||
|
# "batch_size" is the hyper-parameter to be tuned.
|
||||||
|
def train_loader_creator(config):
|
||||||
|
train_loader = torch.utils.data.DataLoader(
|
||||||
|
datasets.MNIST(dir, train=True, download=True,
|
||||||
|
transform=transforms.Compose([
|
||||||
|
transforms.ToTensor(),
|
||||||
|
transforms.Normalize((0.1307,), (0.3081,))
|
||||||
|
])),
|
||||||
|
batch_size=config["batch_size"], shuffle=True)
|
||||||
|
return train_loader
|
||||||
|
```
|
||||||
|
The input data for Pytorch `AutoEstimator` can be a *Data Creator Function* or a tuple of numpy ndarrays in the form of (x, y), where x is training input data and y is training target data.
|
||||||
|
|
||||||
|
#### **2.2 Model Creator Function**
|
||||||
|
*Model Creator Function* also takes `config` as input and returns a `torch.nn.Module` object, as shown below.
|
||||||
|
|
||||||
|
```python
|
||||||
|
import torch.nn as nn
|
||||||
|
class LeNet(nn.Module):
|
||||||
|
def __init__(self, fc1_hidden_size=500):
|
||||||
|
super(LeNet, self).__init__()
|
||||||
|
self.conv1 = nn.Conv2d(1, 20, 5, 1)
|
||||||
|
self.conv2 = nn.Conv2d(20, 50, 5, 1)
|
||||||
|
self.fc1 = nn.Linear(4*4*50, fc1_hidden_size)
|
||||||
|
self.fc2 = nn.Linear(fc1_hidden_size, 10)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def model_creator(config):
|
||||||
|
# "fc1_hidden_size" is the hyper-parameter to be tuned.
|
||||||
|
model = LeNet(fc1_hidden_size=config["fc1_hidden_size"])
|
||||||
|
return model
|
||||||
|
```
|
||||||
|
|
||||||
|
#### **2.3 Optimizer Creator Function**
|
||||||
|
*Optimizer Creator Function* takes `model` and `config` as input, and returns a `torch.optim.Optimizer` object.
|
||||||
|
```python
|
||||||
|
import torch
|
||||||
|
def optim_creator(model, config):
|
||||||
|
return torch.optim.Adam(model.parameters(), lr=config["lr"])
|
||||||
|
```
|
||||||
|
|
||||||
|
Note that the `optimizer` argument in Pytorch `AutoEstimator` constructor could be a *Optimizer Creator Function* or a string, which is the name of Pytorch Optimizer. The above *Optimizer Creator Function* has the same functionality with "Adam".
|
||||||
|
|
||||||
|
#### **2.4 Create and Fit Pytorch AutoEstimator**
|
||||||
|
User could create a Pytorch `AutoEstimator` as below.
|
||||||
|
```python
|
||||||
|
from zoo.orca.automl.auto_estimator import AutoEstimator
|
||||||
|
|
||||||
|
auto_est = AutoEstimator.from_torch(model_creator=model_creator,
|
||||||
|
optimizer=optim_creator,
|
||||||
|
loss=nn.NLLLoss(),
|
||||||
|
logs_dir="/tmp/zoo_automl_logs",
|
||||||
|
resources_per_trial={"cpu": 2},
|
||||||
|
name="lenet_mnist")
|
||||||
|
```
|
||||||
|
Then user can perform distributed hyper-parameter tuning as follows. For more details about the `search_space` argument, view the *search space and search algorithms* [page](#search-space-and-search-algorithms).
|
||||||
|
```python
|
||||||
|
auto_est.fit(data=train_loader_creator,
|
||||||
|
validation_data=test_loader_creator,
|
||||||
|
search_space=search_space,
|
||||||
|
n_sampling=2,
|
||||||
|
epochs=1,
|
||||||
|
metric="accuracy")
|
||||||
|
```
|
||||||
|
Finally, user can get the best learned model and the best hyper-parameters for further deployment.
|
||||||
|
```python
|
||||||
|
best_model = auto_est.get_best_model() # a `torch.nn.Module` object
|
||||||
|
best_config = auto_est.get_best_config() # a dictionary of hyper-parameter names and values.
|
||||||
|
```
|
||||||
|
View the related [Python API doc](https://analytics-zoo.readthedocs.io/en/latest/doc/PythonAPI/AutoML/automl.html#orca-automl-auto-estimator) for more details.
|
||||||
|
|
||||||
|
### **3. TensorFlow/Keras AutoEstimator**
|
||||||
|
Users can create an `AutoEstimator` for TensorFlow Keras from a `tf.keras` model (using a *Model Creator Function*). For example:
|
||||||
|
|
||||||
|
```python
|
||||||
|
def model_creator(config):
|
||||||
|
model = tf.keras.models.Sequential([tf.keras.layers.Dense(config["hidden_size"],
|
||||||
|
input_shape=(1,)),
|
||||||
|
tf.keras.layers.Dense(1)])
|
||||||
|
model.compile(loss="mse",
|
||||||
|
optimizer=tf.keras.optimizers.SGD(config["lr"]),
|
||||||
|
metrics=["mse"])
|
||||||
|
return model
|
||||||
|
|
||||||
|
auto_est = AutoEstimator.from_keras(model_creator=model_creator,
|
||||||
|
logs_dir="/tmp/zoo_automl_logs",
|
||||||
|
resources_per_trial={"cpu": 2},
|
||||||
|
name="auto_keras")
|
||||||
|
```
|
||||||
|
|
||||||
|
Then user can perform distributed hyper-parameter tuning as follows. For more details about `search_space`, view the *search space and search algorithms* [page](#search-space-and-search-algorithms).
|
||||||
|
```python
|
||||||
|
auto_est.fit(data=train_data,
|
||||||
|
validation_data=val_data,
|
||||||
|
search_space=search_space,
|
||||||
|
n_sampling=2,
|
||||||
|
epochs=1,
|
||||||
|
metric="accuracy")
|
||||||
|
```
|
||||||
|
The `data` and `validation_data` in `fit` method can only be a tuple of numpy ndarrays. We haven't support *Data Create Function* now. The numpy ndarray should also be in the form of (x, y), where x is training input data and y is training target data.
|
||||||
|
|
||||||
|
Finally, user can get the best learned model and the best hyper-parameters for further deployment.
|
||||||
|
```python
|
||||||
|
best_model = auto_est.get_best_model() # a `torch.nn.Module` object
|
||||||
|
best_config = auto_est.get_best_config() # a dictionary of hyper-parameter names and values.
|
||||||
|
```
|
||||||
|
View the related [Python API doc](https://analytics-zoo.readthedocs.io/en/latest/doc/PythonAPI/AutoML/automl.html#orca-automl-auto-estimator) for more details.
|
||||||
|
|
||||||
|
### **4. Search Space and Search Algorithms**
|
||||||
|
For Hyper-parameter Optimization, user should define the search space of various hyper-parameter values for neural network training, as well as how to search through the chosen hyper-parameter space.
|
||||||
|
|
||||||
|
#### **4.1 Basic Search Algorithms**
|
||||||
|
|
||||||
|
For basic search algorithms like **Grid Search** and **Random Search**, we provide several sampling functions with `automl.hp`. See [API doc](https://analytics-zoo.readthedocs.io/en/latest/doc/PythonAPI/AutoML/automl.html#orca-automl-hp) for more details.
|
||||||
|
|
||||||
|
`AutoEstimator` requires a dictionary for the `search_space` argument in `fit`.
|
||||||
|
In the dictionary, the keys are the hyper-parameter names, and the values specify how to sample the search spaces for the hyper-parameters.
|
||||||
|
|
||||||
|
```python
|
||||||
|
from zoo.orca.automl import hp
|
||||||
|
|
||||||
|
search_space = {
|
||||||
|
"fc1_hidden_size": hp.grid_search([500, 600]),
|
||||||
|
"lr": hp.loguniform(0.001, 0.1),
|
||||||
|
"batch_size": hp.choice([160, 320, 640]),
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
#### **4.2 Advanced Search Algorithms**
|
||||||
|
Beside grid search and random search, user could also choose to use some advanced hyper-parameter optimization methods,
|
||||||
|
such as [Ax](https://ax.dev/), [Bayesian Optimization](https://github.com/fmfn/BayesianOptimization), [Scikit-Optimize](https://scikit-optimize.github.io), etc. We supported all *Search Algorithms* in [Ray Tune](https://docs.ray.io/en/master/index.html). View the [Ray Tune Search Algorithms](https://docs.ray.io/en/master/tune/api_docs/suggestion.html) for more details.
|
||||||
|
Note that you should install the dependency for your search algorithm manually.
|
||||||
|
|
||||||
|
Take bayesian optimization as an instance. You need to first install the dependency with
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pip install bayesian-optimization
|
||||||
|
```
|
||||||
|
|
||||||
|
And pass the search algorithm name to `search_alg` in `AutoEstimator.fit`.
|
||||||
|
```python
|
||||||
|
from zoo.orca.automl import hp
|
||||||
|
|
||||||
|
search_space = {
|
||||||
|
"width": hp.uniform(0, 20),
|
||||||
|
"height": hp.uniform(-100, 100)
|
||||||
|
}
|
||||||
|
|
||||||
|
auto_estimator.fit(
|
||||||
|
data,
|
||||||
|
search_space=search_space,
|
||||||
|
metric="mean_loss",
|
||||||
|
mode="min",
|
||||||
|
search_alg="bayesopt",
|
||||||
|
)
|
||||||
|
```
|
||||||
|
See [API Doc](https://analytics-zoo.readthedocs.io/en/latest/doc/PythonAPI/AutoML/automl.html#orca-automl-auto-estimator) for more details.
|
||||||
|
|
||||||
|
### **4. Scheduler**
|
||||||
|
*Scheduler* can stop/pause/tweak the hyper-parameters of running trials, making the hyper-parameter tuning process much efficient.
|
||||||
|
|
||||||
|
We support all *Schedulers* in [Ray Tune](https://docs.ray.io/en/master/index.html). See [Ray Tune Schedulers](https://docs.ray.io/en/master/tune/api_docs/schedulers.html#schedulers-ref) for more details.
|
||||||
|
|
||||||
|
User can pass the *Scheduler* name to `scheduler` in `AutoEstimator.fit`. The *Scheduler* names supported are "fifo", "hyperband", "async_hyperband", "median_stopping_rule", "hb_bohb", "pbt", "pbt_replay".
|
||||||
|
The default `scheduler` is "fifo", which just runs trials in submission order.
|
||||||
|
|
||||||
|
See examples below about how to use *Scheduler* in `AutoEstimator`.
|
||||||
|
```python
|
||||||
|
scheduler_params = dict(
|
||||||
|
max_t=50,
|
||||||
|
grace_period=1,
|
||||||
|
reduction_factor=3,
|
||||||
|
brackets=3,
|
||||||
|
)
|
||||||
|
|
||||||
|
auto_estimator.fit(
|
||||||
|
data,
|
||||||
|
search_space=search_space,
|
||||||
|
metric="mean_loss",
|
||||||
|
mode="min",
|
||||||
|
search_alg="skopt",
|
||||||
|
scheduler = "AsyncHyperBand",
|
||||||
|
scheduler_params=scheduler_params
|
||||||
|
)
|
||||||
|
```
|
||||||
|
*Scheduler* shares the same parameters as ray tune schedulers.
|
||||||
|
And `scheduler_params` are extra parameters for `scheduler` other than `metric` and `mode`.
|
||||||
81
docs/readthedocs/source/doc/Orca/Overview/orca-context.md
Normal file
|
|
@ -0,0 +1,81 @@
|
||||||
|
# Orca Context
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
`OrcaContext` is the main entry for provisioning the Orca program on the underlying cluster (such as K8s or Hadoop cluster), or just on a single laptop.
|
||||||
|
|
||||||
|
---
|
||||||
|
### **1. Initialization**
|
||||||
|
|
||||||
|
An Orca program usually starts with the initialization of `OrcaContext` as follows:
|
||||||
|
|
||||||
|
```python
|
||||||
|
from zoo.orca import init_orca_context
|
||||||
|
|
||||||
|
init_orca_context(...)
|
||||||
|
```
|
||||||
|
|
||||||
|
In `init_orca_context`, the user may specify necessary runtime configurations for the Orca program, including:
|
||||||
|
|
||||||
|
- *Cluster mode*: Users can specify the computing environment for the program (a local machine, K8s cluster, Hadoop/YARN cluster, etc.).
|
||||||
|
- *Physical resources*: Users can specify the amount of physical resources to be allocated for the program on the underlying cluster, including the number of nodes in the cluster, the cores and memory allocated for each node, etc.
|
||||||
|
|
||||||
|
The Orca program simply runs `init_orca_context` on the local machine, which will automatically provision the runtime Python environment and distributed execution engine on the underlying computing environment (such as a single laptop, a large K8s or Hadoop cluster, etc.).
|
||||||
|
|
||||||
|
View the related [Python API doc]() for more details.
|
||||||
|
|
||||||
|
---
|
||||||
|
### **2. Python Dependencies**
|
||||||
|
|
||||||
|
A key challenge for scaling out Python program across a distributed cluster is how to properly install the required Python environment (libraries and dependencies) on each node in the cluster (preferably in an automatic and dynamic fashion).
|
||||||
|
|
||||||
|
For K8s cluster, the user may install required Python packages in the container and specify the `container_image` argument when `init_orca_context`. For Hadoop/YARN cluster, the user may use `conda` to create the Python virtual environment with required dependencies on the local machine, and `init_orca_context` will automatically detect the active `conda` environment and provision it on each node in the cluster.
|
||||||
|
|
||||||
|
You can also add .py, .zip or .egg files to distribute with your application by specifying `extra_python_lib` in `init_orca_context`. If you depend on multiple Python files we recommend packaging them into a .zip or .egg. Those files will be added to each node's python search path.
|
||||||
|
|
||||||
|
```python
|
||||||
|
init_orca_context(..., extra_python_lib="func1.py,func2.py,lib3.zip")
|
||||||
|
```
|
||||||
|
|
||||||
|
View the user guide for [K8s](../../UserGuide/k8s.md) and [Hadoop/YARN](../../UserGuide/hadoop.md) for more details.
|
||||||
|
|
||||||
|
---
|
||||||
|
### **3. Execution Engine**
|
||||||
|
|
||||||
|
Under the hood, `OrcaContext` will automatically provision Apache Spark and/or Ray as the underlying execution engine for the distributed data processing and model training/inference.
|
||||||
|
|
||||||
|
Users can easily retrieve `SparkContext` and `RayContext`, the main entry point for Spark and Ray respectively, via `OrcaContext`:
|
||||||
|
|
||||||
|
```python
|
||||||
|
from zoo.orca import OrcaContext
|
||||||
|
|
||||||
|
sc = OrcaContext.get_spark_context()
|
||||||
|
ray_ctx = OrcaContext.get_ray_context()
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
### **4. Extra Configurations**
|
||||||
|
|
||||||
|
Users can make extra configurations when using the functionalities of Project Orca via `OrcaContext`.
|
||||||
|
|
||||||
|
* `OrcaContext.log_output`: Default to be False. `OrcaContext.log_output = True` is recommended when running Jupyter notebook (this will display all the program output in the notebook). Make sure you set it before `init_orca_context`.
|
||||||
|
|
||||||
|
* `OrcaContext.serialize_data_creator`: Default to be False. `OrcaContext.serialize_data_creator = True` would add a file lock when initializing data for distributed training (this may be useful if you run multiple workers on a single node and they download data to the same destination).
|
||||||
|
|
||||||
|
* `OrcaContext.pandas_read_backend`: The backend to be used for reading data as Panda DataFrame. Default to be "spark". See [here](./data-parallel-processing.html#data-parallel-pandas) for more details.
|
||||||
|
|
||||||
|
* `OrcaContext.train_data_store`: Default to be "DRAM". `OrcaContext.train_data_store = "DISK_n"` (e.g., "DISK_2") if the training data cannot fit in memory (this will store the data on disk, and cache only 1/n of the data in memory; after going through the 1/n, it will release the current cache, and load another 1/n into memory). Currently it works for TensorFlow and Keras Estimators only.
|
||||||
|
|
||||||
|
* `OrcaContext.barrier_mode`: Whether to use Spark barrier execution mode to launch Ray. Default to be True. You can set it to be False if you are using Spark below 2.4 or you need to have dynamic allocation enabled.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### **5. Termination**
|
||||||
|
|
||||||
|
After the Orca program finishes, the user can call `stop_orca_context` to release resources and shut down the underlying Spark and/or Ray execution engine.
|
||||||
|
|
||||||
|
```python
|
||||||
|
from zoo.orca import stop_orca_context
|
||||||
|
|
||||||
|
stop_orca_context()
|
||||||
|
```
|
||||||
48
docs/readthedocs/source/doc/Orca/Overview/orca.md
Normal file
|
|
@ -0,0 +1,48 @@
|
||||||
|
# The Orca Library
|
||||||
|
|
||||||
|
Most AI projects start with a Python notebook running on a single laptop; however, one usually needs to go through a mountain of pains to scale it to handle larger data set in a distributed fashion. The _**Orca**_ library seamlessly scales out your single node Python notebook across large clusters (so as to process distributed Big Data).
|
||||||
|
|
||||||
|
First, initialize [Orca Context](orca-context.md):
|
||||||
|
|
||||||
|
```python
|
||||||
|
from zoo.orca import init_orca_context
|
||||||
|
|
||||||
|
# cluster_mode can be "local", "k8s" or "yarn"
|
||||||
|
sc = init_orca_context(cluster_mode="yarn", cores=4, memory="10g", num_nodes=2)
|
||||||
|
```
|
||||||
|
|
||||||
|
Next, perform [data-parallel processing in Orca](data-parallel-processing.md) (supporting standard Spark Dataframes, TensorFlow Dataset, PyTorch DataLoader, Pandas, etc.):
|
||||||
|
|
||||||
|
```python
|
||||||
|
from pyspark.sql.functions import array
|
||||||
|
|
||||||
|
df = spark.read.parquet(file_path)
|
||||||
|
df = df.withColumn('user', array('user')) \
|
||||||
|
.withColumn('item', array('item'))
|
||||||
|
```
|
||||||
|
|
||||||
|
Finally, use [sklearn-style Estimator APIs in Orca](distributed-training-inference.md) to perform distributed _TensorFlow_, _PyTorch_, _Keras_ and _BigDL_ training and inference:
|
||||||
|
|
||||||
|
```python
|
||||||
|
from tensorflow import keras
|
||||||
|
from zoo.orca.learn.tf.estimator import Estimator
|
||||||
|
|
||||||
|
user = keras.layers.Input(shape=[1])
|
||||||
|
item = keras.layers.Input(shape=[1])
|
||||||
|
feat = keras.layers.concatenate([user, item], axis=1)
|
||||||
|
predictions = keras.layers.Dense(2, activation='softmax')(feat)
|
||||||
|
model = keras.models.Model(inputs=[user, item], outputs=predictions)
|
||||||
|
model.compile(optimizer='rmsprop',
|
||||||
|
loss='sparse_categorical_crossentropy',
|
||||||
|
metrics=['accuracy'])
|
||||||
|
|
||||||
|
est = Estimator.from_keras(keras_model=model)
|
||||||
|
est.fit(data=df,
|
||||||
|
batch_size=64,
|
||||||
|
epochs=4,
|
||||||
|
feature_cols=['user', 'item'],
|
||||||
|
label_cols=['label'])
|
||||||
|
```
|
||||||
|
|
||||||
|
See [TensorFlow](../QuickStart/orca-tf-quickstart.md) and [PyTorch](../QuickStart/orca-pytorch-quickstart.md) quickstart for more details.
|
||||||
|
|
||||||
|
|
@ -0,0 +1,161 @@
|
||||||
|
# Enable AutoML for PyTorch
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
[Run in Google Colab](https://colab.research.google.com/github/intel-analytics/analytics-zoo/blob/master/docs/docs/colab-notebook/orca/quickstart/autoestimator_pytorch_lenet_mnist.ipynb) [View source on GitHub](https://github.com/intel-analytics/analytics-zoo/blob/master/docs/docs/colab-notebook/orca/quickstart/autoestimator_pytorch_lenet_mnist.ipynb)
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
**In this guide we will describe how to enable automated hyper-parameter search for PyTorch using Orca `AutoEstimator`.**
|
||||||
|
|
||||||
|
### **Step 0: Prepare Environment**
|
||||||
|
|
||||||
|
[Conda](https://docs.conda.io/projects/conda/en/latest/user-guide/install/) is needed to prepare the Python environment for running this example. Please refer to the [install guide](../../UserGuide/python.md) for more details.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
conda create -n zoo python=3.7 # zoo is conda environment name, you can use any name you like.
|
||||||
|
conda activate zoo
|
||||||
|
pip install analytics-zoo[ray]
|
||||||
|
pip install torch==1.7.1 torchvision==0.8.2
|
||||||
|
```
|
||||||
|
|
||||||
|
### **Step 1: Init Orca Context**
|
||||||
|
```python
|
||||||
|
from zoo.orca import init_orca_context, stop_orca_context
|
||||||
|
|
||||||
|
if cluster_mode == "local":
|
||||||
|
init_orca_context(cores=4, memory="2g", init_ray_on_spark=True) # run in local mode
|
||||||
|
elif cluster_mode == "k8s":
|
||||||
|
init_orca_context(cluster_mode="k8s", num_nodes=2, cores=4, init_ray_on_spark=True) # run on K8s cluster
|
||||||
|
elif cluster_mode == "yarn":
|
||||||
|
init_orca_context(
|
||||||
|
cluster_mode="yarn-client", cores=4, num_nodes=2, memory="2g", init_ray_on_spark=True,
|
||||||
|
driver_memory="10g", driver_cores=1) # run on Hadoop YARN cluster
|
||||||
|
```
|
||||||
|
|
||||||
|
This is the only place where you need to specify local or distributed mode. View [Orca Context](./../Overview/orca-context.md) for more details.
|
||||||
|
|
||||||
|
**Note:** You should `export HADOOP_CONF_DIR=/path/to/hadoop/conf/dir` when running on Hadoop YARN cluster. View [Hadoop User Guide](./../../UserGuide/hadoop.md) for more details.
|
||||||
|
|
||||||
|
### **Step 2: Define the Model**
|
||||||
|
|
||||||
|
You may define your model, loss and optimizer in the same way as in any standard PyTorch program.
|
||||||
|
|
||||||
|
```python
|
||||||
|
import torch
|
||||||
|
import torch.nn as nn
|
||||||
|
import torch.nn.functional as F
|
||||||
|
|
||||||
|
class LeNet(nn.Module):
|
||||||
|
def __init__(self, fc1_hidden_size=500):
|
||||||
|
super(LeNet, self).__init__()
|
||||||
|
self.conv1 = nn.Conv2d(1, 20, 5, 1)
|
||||||
|
self.conv2 = nn.Conv2d(20, 50, 5, 1)
|
||||||
|
self.fc1 = nn.Linear(4*4*50, fc1_hidden_size)
|
||||||
|
self.fc2 = nn.Linear(fc1_hidden_size, 10)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
x = F.relu(self.conv1(x))
|
||||||
|
x = F.max_pool2d(x, 2, 2)
|
||||||
|
x = F.relu(self.conv2(x))
|
||||||
|
x = F.max_pool2d(x, 2, 2)
|
||||||
|
x = x.view(-1, 4*4*50)
|
||||||
|
x = F.relu(self.fc1(x))
|
||||||
|
x = self.fc2(x)
|
||||||
|
return F.log_softmax(x, dim=1)
|
||||||
|
|
||||||
|
criterion = nn.NLLLoss()
|
||||||
|
```
|
||||||
|
After defining your model, you need to define a *Model Creator Function* that returns an instance of your model, and a *Optimizer Creator Function* that returns a PyTorch optimizer. Note that both the *Model Creator Function* and the *Optimizer Creator Function* should take `config` as input and get the hyper-parameter values from `config`.
|
||||||
|
|
||||||
|
```python
|
||||||
|
def model_creator(config):
|
||||||
|
model = LeNet(fc1_hidden_size=config["fc1_hidden_size"])
|
||||||
|
return model
|
||||||
|
|
||||||
|
def optim_creator(model, config):
|
||||||
|
return torch.optim.Adam(model.parameters(), lr=config["lr"])
|
||||||
|
```
|
||||||
|
|
||||||
|
### **Step 3: Define Dataset**
|
||||||
|
|
||||||
|
You can define the train and validation datasets using *Data Creator Function* that takes `config` as input and returns a PyTorch `DataLoader`.
|
||||||
|
|
||||||
|
```python
|
||||||
|
import torch
|
||||||
|
from torchvision import datasets, transforms
|
||||||
|
|
||||||
|
torch.manual_seed(0)
|
||||||
|
dir = './dataset'
|
||||||
|
test_batch_size = 640
|
||||||
|
|
||||||
|
def train_loader_creator(config):
|
||||||
|
train_loader = torch.utils.data.DataLoader(
|
||||||
|
datasets.MNIST(dir, train=True, download=True,
|
||||||
|
transform=transforms.Compose([
|
||||||
|
transforms.ToTensor(),
|
||||||
|
transforms.Normalize((0.1307,), (0.3081,))
|
||||||
|
])),
|
||||||
|
batch_size=config["batch_size"], shuffle=True)
|
||||||
|
return train_loader
|
||||||
|
|
||||||
|
def test_loader_creator(config):
|
||||||
|
test_loader = torch.utils.data.DataLoader(
|
||||||
|
datasets.MNIST(dir, train=False, download=True,
|
||||||
|
transform=transforms.Compose([
|
||||||
|
transforms.ToTensor(),
|
||||||
|
transforms.Normalize((0.1307,), (0.3081,))
|
||||||
|
])),
|
||||||
|
batch_size=test_batch_size, shuffle=False)
|
||||||
|
return test_loader
|
||||||
|
```
|
||||||
|
|
||||||
|
### **Step 4: Define Search Space**
|
||||||
|
You should define a dictionary as your hyper-parameter search space.
|
||||||
|
|
||||||
|
The keys are hyper-parameter names which should be the same with those in your creators, and you can specify how you want to sample each hyper-parameter in the values of the search space. See [automl.hp](https://analytics-zoo.readthedocs.io/en/latest/doc/PythonAPI/AutoML/automl.html#orca-automl-hp) for more details.
|
||||||
|
|
||||||
|
```python
|
||||||
|
from zoo.orca.automl import hp
|
||||||
|
|
||||||
|
search_space = {
|
||||||
|
"fc1_hidden_size": hp.choice([500, 600]),
|
||||||
|
"lr": hp.choice([0.001, 0.003]),
|
||||||
|
"batch_size": hp.choice([160, 320, 640]),
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### **Step 5: Automatically Fit and Search with Orca AutoEstimator**
|
||||||
|
|
||||||
|
First, create an `AutoEstimator`. You can refer to [AutoEstimator API doc](https://analytics-zoo.readthedocs.io/en/latest/doc/PythonAPI/AutoML/automl.html#orca-automl-auto-estimator) for more details.
|
||||||
|
|
||||||
|
```python
|
||||||
|
from zoo.orca.automl.auto_estimator import AutoEstimator
|
||||||
|
|
||||||
|
auto_est = AutoEstimator.from_torch(model_creator=model_creator,
|
||||||
|
optimizer=optim_creator,
|
||||||
|
loss=criterion,
|
||||||
|
logs_dir="/tmp/zoo_automl_logs",
|
||||||
|
resources_per_trial={"cpu": 2},
|
||||||
|
name="lenet_mnist")
|
||||||
|
```
|
||||||
|
|
||||||
|
Next, use the `AutoEstimator` to fit and search for the best hyper-parameter set.
|
||||||
|
|
||||||
|
```python
|
||||||
|
auto_est.fit(data=train_loader_creator,
|
||||||
|
validation_data=test_loader_creator,
|
||||||
|
search_space=search_space,
|
||||||
|
n_sampling=2,
|
||||||
|
epochs=1,
|
||||||
|
metric="accuracy")
|
||||||
|
```
|
||||||
|
|
||||||
|
Finally, you can get the best learned model and the best hyper-parameters.
|
||||||
|
|
||||||
|
```python
|
||||||
|
best_model = auto_est.get_best_model()
|
||||||
|
best_config = auto_est.get_best_config()
|
||||||
|
```
|
||||||
|
|
||||||
|
**Note:** You should call `stop_orca_context()` when your application finishes.
|
||||||
|
|
@ -0,0 +1,88 @@
|
||||||
|
# Use AutoXGBoost to auto-tune XGBoost parameters
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
[Run in Google Colab](https://colab.research.google.com/github/intel-analytics/analytics-zoo/blob/master/docs/docs/colab-notebook/orca/quickstart/autoxgboost_regressor_sklearn_boston.ipynb) [View source on GitHub](https://github.com/intel-analytics/analytics-zoo/blob/master/docs/docs/colab-notebook/orca/quickstart/autoxgboost_regressor_sklearn_boston.ipynb)
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
**In this guide we will describe how to use Orca AutoXGBoost for automated xgboost tuning**
|
||||||
|
|
||||||
|
Orca AutoXGBoost enables distributed automated hyper-parameter tuning for XGBoost, which includes `AutoXGBRegressor` and `AutoXGBClassifier` for sklearn`XGBRegressor` and `XGBClassifier` respectively. See more about [xgboost scikit-learn API](https://xgboost.readthedocs.io/en/latest/python/python_api.html#module-xgboost.sklearn).
|
||||||
|
### **Step 0: Prepare Environment**
|
||||||
|
|
||||||
|
[Conda](https://docs.conda.io/projects/conda/en/latest/user-guide/install/) is needed to prepare the Python environment for running this example. Please refer to the [install guide](../../UserGuide/python.md) for more details.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
conda create -n zoo python=3.7 # zoo is conda environment name, you can use any name you like.
|
||||||
|
conda activate zoo
|
||||||
|
pip install analytics-zoo[ray]
|
||||||
|
pip install torch==1.7.1 torchvision==0.8.2
|
||||||
|
```
|
||||||
|
|
||||||
|
### **Step 1: Init Orca Context**
|
||||||
|
```python
|
||||||
|
from zoo.orca import init_orca_context, stop_orca_context
|
||||||
|
|
||||||
|
if cluster_mode == "local":
|
||||||
|
init_orca_context(cores=6, memory="2g", init_ray_on_spark=True) # run in local mode
|
||||||
|
elif cluster_mode == "k8s":
|
||||||
|
init_orca_context(cluster_mode="k8s", num_nodes=2, cores=4, init_ray_on_spark=True) # run on K8s cluster
|
||||||
|
elif cluster_mode == "yarn":
|
||||||
|
init_orca_context(
|
||||||
|
cluster_mode="yarn-client", cores=4, num_nodes=2, memory="2g", init_ray_on_spark=True,
|
||||||
|
driver_memory="10g", driver_cores=1) # run on Hadoop YARN cluster
|
||||||
|
```
|
||||||
|
|
||||||
|
This is the only place where you need to specify local or distributed mode. View [Orca Context](./../Overview/orca-context.md) for more details.
|
||||||
|
|
||||||
|
**Note:** You should `export HADOOP_CONF_DIR=/path/to/hadoop/conf/dir` when running on Hadoop YARN cluster. View [Hadoop User Guide](./../../UserGuide/hadoop.md) for more details.
|
||||||
|
|
||||||
|
### **Step 2: Define Search space**
|
||||||
|
|
||||||
|
You should define a dictionary as your hyper-parameter search space.
|
||||||
|
|
||||||
|
The keys are hyper-parameter names you want to search for `XGBRegressor`, and you can specify how you want to sample each hyper-parameter in the values of the search space. See [automl.hp](https://analytics-zoo.readthedocs.io/en/latest/doc/PythonAPI/AutoML/automl.html#orca-automl-hp) for more details.
|
||||||
|
|
||||||
|
```python
|
||||||
|
from zoo.orca.automl import hp
|
||||||
|
|
||||||
|
search_space = {
|
||||||
|
"n_estimators": hp.grid_search([50, 100, 200]),
|
||||||
|
"max_depth": hp.choice([2, 4, 6]),
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### **Step 3: Automatically fit and search with Orca AutoXGBoost**
|
||||||
|
|
||||||
|
First create an `AutoXGBRegressor`.
|
||||||
|
|
||||||
|
```python
|
||||||
|
from zoo.orca.automl.xgboost import AutoXGBRegressor
|
||||||
|
|
||||||
|
auto_xgb_reg = AutoXGBRegressor(cpus_per_trial=2,
|
||||||
|
name="auto_xgb_classifier",
|
||||||
|
min_child_weight=3,
|
||||||
|
random_state=2)
|
||||||
|
```
|
||||||
|
|
||||||
|
Next, use the `AutoXGBRegressor` to fit and search for the best hyper-parameter set.
|
||||||
|
|
||||||
|
```python
|
||||||
|
auto_xgb_reg.fit(data=(X_train, y_train),
|
||||||
|
validation_data=(X_test, y_test),
|
||||||
|
search_space=search_space,
|
||||||
|
n_sampling=2,
|
||||||
|
metric="rmse")
|
||||||
|
```
|
||||||
|
|
||||||
|
### **Step 4: Get best model and hyper parameters**
|
||||||
|
|
||||||
|
You can get the best learned model and the best hyper-parameter set for further deployment. The best model is an sklearn `XGBRegressor` instance.
|
||||||
|
|
||||||
|
```python
|
||||||
|
best_model = auto_xgb_reg.get_best_model()
|
||||||
|
best_config = auto_xgb_reg.get_best_config()
|
||||||
|
```
|
||||||
|
|
||||||
|
**Note:** You should call `stop_orca_context()` when your application finishes.
|
||||||
|
|
@ -0,0 +1,110 @@
|
||||||
|
# Keras 2.3 Quickstart
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
[Run in Google Colab](https://colab.research.google.com/github/intel-analytics/analytics-zoo/blob/master/docs/docs/colab-notebook/orca/quickstart/keras_lenet_mnist.ipynb) [View source on GitHub](https://github.com/intel-analytics/analytics-zoo/blob/master/docs/docs/colab-notebook/orca/quickstart/keras_lenet_mnist.ipynb)
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
**In this guide we will describe how to scale out _Keras 2.3_ programs using Orca in 4 simple steps.** (_[TensorFlow 1.5](./orca-tf-quickstart.md) and [TensorFlow 2](./orca-tf2keras-quickstart.md) guides are also available._)
|
||||||
|
|
||||||
|
|
||||||
|
### **Step 0: Prepare Environment**
|
||||||
|
|
||||||
|
We recommend using [conda](https://docs.conda.io/projects/conda/en/latest/user-guide/install/) to prepare the environment. Please refer to the [install guide](../../UserGuide/python.md) for more details.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
conda create -n zoo python=3.7 # "zoo" is conda environment name, you can use any name you like.
|
||||||
|
conda activate zoo
|
||||||
|
pip install analytics_zoo-${VERSION} # install either version 0.9 or latest nightly build
|
||||||
|
pip install tensorflow==1.15.0
|
||||||
|
pip install tensorflow-datasets==2.1.0
|
||||||
|
pip install psutil
|
||||||
|
pip install pandas
|
||||||
|
pip install scikit-learn
|
||||||
|
```
|
||||||
|
|
||||||
|
### **Step 1: Init Orca Context**
|
||||||
|
```python
|
||||||
|
from zoo.orca import init_orca_context, stop_orca_context
|
||||||
|
|
||||||
|
if cluster_mode == "local": # For local machine
|
||||||
|
init_orca_context(cluster_mode="local", cores=4, memory="10g")
|
||||||
|
elif cluster_mode == "k8s": # For K8s cluster
|
||||||
|
init_orca_context(cluster_mode="k8s", num_nodes=2, cores=2, memory="10g", driver_memory="10g", driver_cores=1)
|
||||||
|
elif cluster_mode == "yarn": # For Hadoop/YARN cluster
|
||||||
|
init_orca_context(cluster_mode="yarn", num_nodes=2, cores=2, memory="10g", driver_memory="10g", driver_cores=1)
|
||||||
|
```
|
||||||
|
|
||||||
|
This is the only place where you need to specify local or distributed mode. View [Orca Context](./../Overview/orca-context.md) for more details.
|
||||||
|
|
||||||
|
**Note:** You should `export HADOOP_CONF_DIR=/path/to/hadoop/conf/dir` when running on Hadoop YARN cluster. View [Hadoop User Guide](./../../UserGuide/hadoop.md) for more details.
|
||||||
|
|
||||||
|
### **Step 2: Define the Model**
|
||||||
|
|
||||||
|
You may define your model, loss and metrics in the same way as in any standard (single node) Keras program.
|
||||||
|
|
||||||
|
```python
|
||||||
|
from tensorflow import keras
|
||||||
|
|
||||||
|
model = keras.Sequential(
|
||||||
|
[keras.layers.Conv2D(20, kernel_size=(5, 5), strides=(1, 1), activation='tanh',
|
||||||
|
input_shape=(28, 28, 1), padding='valid'),
|
||||||
|
keras.layers.MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='valid'),
|
||||||
|
keras.layers.Conv2D(50, kernel_size=(5, 5), strides=(1, 1), activation='tanh',
|
||||||
|
padding='valid'),
|
||||||
|
keras.layers.MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='valid'),
|
||||||
|
keras.layers.Flatten(),
|
||||||
|
keras.layers.Dense(500, activation='tanh'),
|
||||||
|
keras.layers.Dense(10, activation='softmax'),
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
model.compile(optimizer=tf.keras.optimizers.RMSprop(),
|
||||||
|
loss='sparse_categorical_crossentropy',
|
||||||
|
metrics=['accuracy'])
|
||||||
|
```
|
||||||
|
### **Step 3: Define Train Dataset**
|
||||||
|
|
||||||
|
You can define the dataset using standard [tf.data.Dataset](https://www.tensorflow.org/api_docs/python/tf/data/Dataset). Orca also supports [Spark DataFrame](https://spark.apache.org/docs/latest/sql-programming-guide.html) and [Orca XShards](../Overview/data-parallel-processing.md).
|
||||||
|
|
||||||
|
```python
|
||||||
|
import tensorflow as tf
|
||||||
|
import tensorflow_datasets as tfds
|
||||||
|
|
||||||
|
def preprocess(data):
|
||||||
|
data['image'] = tf.cast(data["image"], tf.float32) / 255.
|
||||||
|
return data['image'], data['label']
|
||||||
|
|
||||||
|
# get DataSet
|
||||||
|
mnist_train = tfds.load(name="mnist", split="train", data_dir=dataset_dir)
|
||||||
|
mnist_test = tfds.load(name="mnist", split="test", data_dir=dataset_dir)
|
||||||
|
|
||||||
|
mnist_train = mnist_train.map(preprocess)
|
||||||
|
mnist_test = mnist_test.map(preprocess)
|
||||||
|
```
|
||||||
|
|
||||||
|
### **Step 4: Fit with Orca Estimator**
|
||||||
|
|
||||||
|
First, create an Estimator.
|
||||||
|
|
||||||
|
```python
|
||||||
|
from zoo.orca.learn.tf.estimator import Estimator
|
||||||
|
|
||||||
|
est = Estimator.from_keras(keras_model=model)
|
||||||
|
```
|
||||||
|
|
||||||
|
Next, fit and evaluate using the Estimator.
|
||||||
|
```python
|
||||||
|
est.fit(data=mnist_train,
|
||||||
|
batch_size=320,
|
||||||
|
epochs=5,
|
||||||
|
validation_data=mnist_test)
|
||||||
|
|
||||||
|
result = est.evaluate(mnist_test)
|
||||||
|
print(result)
|
||||||
|
```
|
||||||
|
|
||||||
|
That's it, the same code can run seamlessly in your local laptop and the distribute K8s or Hadoop cluster.
|
||||||
|
|
||||||
|
**Note:** You should call `stop_orca_context()` when your program finishes.
|
||||||
|
|
@ -0,0 +1,133 @@
|
||||||
|
# Use `torch.distributed` in Orca
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
[Run in Google Colab](https://colab.research.google.com/github/intel-analytics/analytics-zoo/blob/master/docs/docs/colab-notebook/orca/quickstart/pytorch_distributed_lenet_mnist.ipynb) [View source on GitHub](https://github.com/intel-analytics/analytics-zoo/blob/master/docs/docs/colab-notebook/orca/quickstart/pytorch_distributed_lenet_mnist.ipynb)
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
**In this guide we will describe how to scale out _PyTorch_ programs using the `torch.distributed` package in Orca.**
|
||||||
|
|
||||||
|
### **Step 0: Prepare Environment**
|
||||||
|
|
||||||
|
[Conda](https://docs.conda.io/projects/conda/en/latest/user-guide/install/) is needed to prepare the Python environment for running this example. Please refer to the [install guide](../../UserGuide/python.md) for more details.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
conda create -n zoo python=3.7 # zoo is conda environment name, you can use any name you like.
|
||||||
|
conda activate zoo
|
||||||
|
pip install analytics-zoo[ray]
|
||||||
|
pip install torch==1.7.1 torchvision==0.8.2
|
||||||
|
```
|
||||||
|
|
||||||
|
### **Step 1: Init Orca Context**
|
||||||
|
```python
|
||||||
|
from zoo.orca import init_orca_context, stop_orca_context
|
||||||
|
|
||||||
|
if cluster_mode == "local": # For local machine
|
||||||
|
init_orca_context(cores=4, memory="10g")
|
||||||
|
elif cluster_mode == "k8s": # For K8s cluster
|
||||||
|
init_orca_context(cluster_mode="k8s", num_nodes=2, cores=2, memory="10g", driver_memory="10g", driver_cores=1)
|
||||||
|
elif cluster_mode == "yarn": # For Hadoop/YARN cluster
|
||||||
|
init_orca_context(cluster_mode="yarn", cores=2, num_nodes=2, memory="10g", driver_memory="10g", driver_cores=1)
|
||||||
|
```
|
||||||
|
|
||||||
|
This is the only place where you need to specify local or distributed mode. View [Orca Context](./../Overview/orca-context.md) for more details.
|
||||||
|
|
||||||
|
**Note:** You should `export HADOOP_CONF_DIR=/path/to/hadoop/conf/dir` when running on Hadoop YARN cluster. View [Hadoop User Guide](./../../UserGuide/hadoop.md) for more details.
|
||||||
|
|
||||||
|
### **Step 2: Define the Model**
|
||||||
|
|
||||||
|
You may define your model, loss and optimizer in the same way as in any standard (single node) PyTorch program.
|
||||||
|
|
||||||
|
```python
|
||||||
|
import torch
|
||||||
|
import torch.nn as nn
|
||||||
|
import torch.nn.functional as F
|
||||||
|
|
||||||
|
class LeNet(nn.Module):
|
||||||
|
def __init__(self):
|
||||||
|
super(LeNet, self).__init__()
|
||||||
|
self.conv1 = nn.Conv2d(1, 20, 5, 1)
|
||||||
|
self.conv2 = nn.Conv2d(20, 50, 5, 1)
|
||||||
|
self.fc1 = nn.Linear(4*4*50, 500)
|
||||||
|
self.fc2 = nn.Linear(500, 10)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
x = F.relu(self.conv1(x))
|
||||||
|
x = F.max_pool2d(x, 2, 2)
|
||||||
|
x = F.relu(self.conv2(x))
|
||||||
|
x = F.max_pool2d(x, 2, 2)
|
||||||
|
x = x.view(-1, 4*4*50)
|
||||||
|
x = F.relu(self.fc1(x))
|
||||||
|
x = self.fc2(x)
|
||||||
|
return F.log_softmax(x, dim=1)
|
||||||
|
|
||||||
|
criterion = nn.NLLLoss()
|
||||||
|
```
|
||||||
|
After defining your model, you need to define a *Model Creator Function* that returns an instance of your model, and a *Optimizer Creator Function* that returns a PyTorch optimizer.
|
||||||
|
|
||||||
|
```python
|
||||||
|
def model_creator(config):
|
||||||
|
model = LeNet()
|
||||||
|
return model
|
||||||
|
|
||||||
|
def optim_creator(model, config):
|
||||||
|
return torch.optim.Adam(model.parameters(), lr=0.001)
|
||||||
|
```
|
||||||
|
|
||||||
|
### **Step 3: Define Train Dataset**
|
||||||
|
|
||||||
|
You can define the dataset using a *Data Creator Function* that returns a PyTorch `DataLoader`. Orca also supports [Orca SparkXShards](../Overview/data-parallel-processing).
|
||||||
|
|
||||||
|
```python
|
||||||
|
import torch
|
||||||
|
from torchvision import datasets, transforms
|
||||||
|
|
||||||
|
torch.manual_seed(0)
|
||||||
|
batch_size = 320
|
||||||
|
test_batch_size = 320
|
||||||
|
dir = './dataset'
|
||||||
|
|
||||||
|
def train_loader_creator(config, batch_size):
|
||||||
|
train_loader = torch.utils.data.DataLoader(
|
||||||
|
datasets.MNIST(dir, train=True, download=True,
|
||||||
|
transform=transforms.Compose([
|
||||||
|
transforms.ToTensor(),
|
||||||
|
transforms.Normalize((0.1307,), (0.3081,))
|
||||||
|
])),
|
||||||
|
batch_size=batch_size, shuffle=True)
|
||||||
|
return train_loader
|
||||||
|
|
||||||
|
def test_loader_creator(config, batch_size):
|
||||||
|
test_loader = torch.utils.data.DataLoader(
|
||||||
|
datasets.MNIST(dir, train=False,
|
||||||
|
transform=transforms.Compose([
|
||||||
|
transforms.ToTensor(),
|
||||||
|
transforms.Normalize((0.1307,), (0.3081,))
|
||||||
|
])),
|
||||||
|
batch_size=batch_size, shuffle=False)
|
||||||
|
return test_loader
|
||||||
|
```
|
||||||
|
|
||||||
|
### **Step 4: Fit with Orca Estimator**
|
||||||
|
|
||||||
|
First, Create an Estimator
|
||||||
|
|
||||||
|
```python
|
||||||
|
from zoo.orca.learn.pytorch import Estimator
|
||||||
|
from zoo.orca.learn.metrics import Accuracy
|
||||||
|
|
||||||
|
est = Estimator.from_torch(model=model_creator, optimizer=optim_creator, loss=criterion, metrics=[Accuracy()],
|
||||||
|
backend="torch_distributed")
|
||||||
|
```
|
||||||
|
|
||||||
|
Next, fit and evaluate using the Estimator
|
||||||
|
|
||||||
|
```python
|
||||||
|
est.fit(data=train_loader_creator, epochs=1, batch_size=batch_size)
|
||||||
|
result = est.evaluate(data=test_loader_creator, batch_size=test_batch_size)
|
||||||
|
for r in result:
|
||||||
|
print(r, ":", result[r])
|
||||||
|
```
|
||||||
|
|
||||||
|
**Note:** You should call `stop_orca_context()` when your application finishes.
|
||||||
|
|
@ -0,0 +1,134 @@
|
||||||
|
# PyTorch Quickstart
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
[Run in Google Colab](https://colab.research.google.com/github/intel-analytics/analytics-zoo/blob/master/docs/docs/colab-notebook/orca/quickstart/pytorch_lenet_mnist.ipynb) [View source on GitHub](https://github.com/intel-analytics/analytics-zoo/blob/master/docs/docs/colab-notebook/orca/quickstart/pytorch_lenet_mnist.ipynb)
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
**In this guide we will describe how to scale out _PyTorch_ programs using Orca in 4 simple steps.**
|
||||||
|
|
||||||
|
### **Step 0: Prepare Environment**
|
||||||
|
|
||||||
|
[Conda](https://docs.conda.io/projects/conda/en/latest/user-guide/install/) is needed to prepare the Python environment for running this example. Please refer to the [install guide](../../UserGuide/python.md) for more details.
|
||||||
|
|
||||||
|
|
||||||
|
```bash
|
||||||
|
conda create -n zoo python=3.7 # zoo is conda environment name, you can use any name you like.
|
||||||
|
conda activate zoo
|
||||||
|
pip install analytics-zoo # install either version 0.9 or latest nightly build
|
||||||
|
pip install torch==1.7.1 torchvision==0.8.2
|
||||||
|
pip install six cloudpickle
|
||||||
|
pip install jep==3.9.0
|
||||||
|
```
|
||||||
|
|
||||||
|
### **Step 1: Init Orca Context**
|
||||||
|
```python
|
||||||
|
from zoo.orca import init_orca_context, stop_orca_context
|
||||||
|
|
||||||
|
if cluster_mode == "local": # For local machine
|
||||||
|
init_orca_context(cores=4, memory="10g")
|
||||||
|
elif cluster_mode == "k8s": # For K8s cluster
|
||||||
|
init_orca_context(cluster_mode="k8s", num_nodes=2, cores=2, memory="10g", driver_memory="10g", driver_cores=1)
|
||||||
|
elif cluster_mode == "yarn": # For Hadoop/YARN cluster
|
||||||
|
init_orca_context(
|
||||||
|
cluster_mode="yarn", cores=2, num_nodes=2, memory="10g",
|
||||||
|
driver_memory="10g", driver_cores=1,
|
||||||
|
conf={"spark.rpc.message.maxSize": "1024",
|
||||||
|
"spark.task.maxFailures": "1",
|
||||||
|
"spark.driver.extraJavaOptions": "-Dbigdl.failure.retryTimes=1"})
|
||||||
|
```
|
||||||
|
|
||||||
|
This is the only place where you need to specify local or distributed mode. View [Orca Context](./../Overview/orca-context.md) for more details.
|
||||||
|
|
||||||
|
**Note:** You should `export HADOOP_CONF_DIR=/path/to/hadoop/conf/dir` when running on Hadoop YARN cluster. View [Hadoop User Guide](./../../UserGuide/hadoop.md) for more details.
|
||||||
|
|
||||||
|
### **Step 2: Define the Model**
|
||||||
|
|
||||||
|
You may define your model, loss and optimizer in the same way as in any standard (single node) PyTorch program.
|
||||||
|
|
||||||
|
```python
|
||||||
|
import torch
|
||||||
|
import torch.nn as nn
|
||||||
|
import torch.nn.functional as F
|
||||||
|
|
||||||
|
class LeNet(nn.Module):
|
||||||
|
def __init__(self):
|
||||||
|
super(LeNet, self).__init__()
|
||||||
|
self.conv1 = nn.Conv2d(1, 20, 5, 1)
|
||||||
|
self.conv2 = nn.Conv2d(20, 50, 5, 1)
|
||||||
|
self.fc1 = nn.Linear(4*4*50, 500)
|
||||||
|
self.fc2 = nn.Linear(500, 10)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
x = F.relu(self.conv1(x))
|
||||||
|
x = F.max_pool2d(x, 2, 2)
|
||||||
|
x = F.relu(self.conv2(x))
|
||||||
|
x = F.max_pool2d(x, 2, 2)
|
||||||
|
x = x.view(-1, 4*4*50)
|
||||||
|
x = F.relu(self.fc1(x))
|
||||||
|
x = self.fc2(x)
|
||||||
|
return F.log_softmax(x, dim=1)
|
||||||
|
|
||||||
|
model = LeNet()
|
||||||
|
model.train()
|
||||||
|
criterion = nn.NLLLoss()
|
||||||
|
adam = torch.optim.Adam(model.parameters(), 0.001)
|
||||||
|
```
|
||||||
|
|
||||||
|
### **Step 3: Define Train Dataset**
|
||||||
|
|
||||||
|
You can define the dataset using standard [Pytorch DataLoader](https://pytorch.org/docs/stable/data.html).
|
||||||
|
|
||||||
|
```python
|
||||||
|
import torch
|
||||||
|
from torchvision import datasets, transforms
|
||||||
|
|
||||||
|
torch.manual_seed(0)
|
||||||
|
dir='./'
|
||||||
|
|
||||||
|
batch_size=64
|
||||||
|
test_batch_size=64
|
||||||
|
train_loader = torch.utils.data.DataLoader(
|
||||||
|
datasets.MNIST(dir, train=True, download=True,
|
||||||
|
transform=transforms.Compose([
|
||||||
|
transforms.ToTensor(),
|
||||||
|
transforms.Normalize((0.1307,), (0.3081,))
|
||||||
|
])),
|
||||||
|
batch_size=batch_size, shuffle=True)
|
||||||
|
test_loader = torch.utils.data.DataLoader(
|
||||||
|
datasets.MNIST(dir, train=False,
|
||||||
|
transform=transforms.Compose([
|
||||||
|
transforms.ToTensor(),
|
||||||
|
transforms.Normalize((0.1307,), (0.3081,))
|
||||||
|
])),
|
||||||
|
batch_size=test_batch_size, shuffle=False)
|
||||||
|
```
|
||||||
|
|
||||||
|
Alternatively, we can also use a [Data Creator Function](https://github.com/intel-analytics/analytics-zoo/blob/master/docs/docs/colab-notebook/orca/quickstart/pytorch_lenet_mnist_data_creator_func.ipynb) or [Orca XShards](../Overview/data-parallel-processing) as the input data, especially when the data size is very large)
|
||||||
|
|
||||||
|
### **Step 4: Fit with Orca Estimator**
|
||||||
|
|
||||||
|
First, Create an Estimator
|
||||||
|
|
||||||
|
```python
|
||||||
|
from zoo.orca.learn.pytorch import Estimator
|
||||||
|
from zoo.orca.learn.metrics import Accuracy
|
||||||
|
|
||||||
|
est = Estimator.from_torch(model=model, optimizer=adam, loss=criterion, metrics=[Accuracy()])
|
||||||
|
```
|
||||||
|
|
||||||
|
Next, fit and evaluate using the Estimator
|
||||||
|
|
||||||
|
```python
|
||||||
|
from zoo.orca.learn.trigger import EveryEpoch
|
||||||
|
|
||||||
|
est.fit(data=train_loader, epochs=10, validation_data=test_loader,
|
||||||
|
checkpoint_trigger=EveryEpoch())
|
||||||
|
|
||||||
|
result = est.evaluate(data=test_loader)
|
||||||
|
for r in result:
|
||||||
|
print(r, ":", result[r])
|
||||||
|
```
|
||||||
|
|
||||||
|
**Note:** You should call `stop_orca_context()` when your application finishes.
|
||||||
|
|
@ -0,0 +1,121 @@
|
||||||
|
# TensorFlow 1.15 Quickstart
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
[Run in Google Colab](https://colab.research.google.com/github/intel-analytics/analytics-zoo/blob/master/docs/docs/colab-notebook/orca/quickstart/tf_lenet_mnist.ipynb) [View source on GitHub](https://github.com/intel-analytics/analytics-zoo/blob/master/docs/docs/colab-notebook/orca/quickstart/tf_lenet_mnist.ipynb)
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
**In this guide we will describe how to scale out _TensorFlow 1.15_ programs using Orca in 4 simple steps.** (_[Keras 2.3](./orca-keras-quickstart.md) and [TensorFlow 2](./orca-tf2keras-quickstart.md) guides are also available._)
|
||||||
|
|
||||||
|
### **Step 0: Prepare Environment**
|
||||||
|
|
||||||
|
We recommend using [conda](https://docs.conda.io/projects/conda/en/latest/user-guide/install/) to prepare the environment. Please refer to the [install guide](../../UserGuide/python.md) for more details.
|
||||||
|
|
||||||
|
|
||||||
|
```bash
|
||||||
|
conda create -n zoo python=3.7 # "zoo" is conda environment name, you can use any name you like.
|
||||||
|
conda activate zoo
|
||||||
|
pip install analytics-zoo # install either version 0.9 or latest nightly build
|
||||||
|
pip install tensorflow==1.15.0
|
||||||
|
pip install tensorflow-datasets==2.0
|
||||||
|
pip install psutil
|
||||||
|
```
|
||||||
|
|
||||||
|
### **Step 1: Init Orca Context**
|
||||||
|
```python
|
||||||
|
from zoo.orca import init_orca_context, stop_orca_context
|
||||||
|
|
||||||
|
if cluster_mode == "local": # For local machine
|
||||||
|
init_orca_context(cluster_mode="local", cores=4, memory="10g")
|
||||||
|
elif cluster_mode == "k8s": # For K8s cluster
|
||||||
|
init_orca_context(cluster_mode="k8s", num_nodes=2, cores=2, memory="10g", driver_memory="10g", driver_cores=1)
|
||||||
|
elif cluster_mode == "yarn": # For Hadoop/YARN cluster
|
||||||
|
init_orca_context(cluster_mode="yarn", num_nodes=2, cores=2, memory="10g", driver_memory="10g", driver_cores=1)
|
||||||
|
```
|
||||||
|
|
||||||
|
This is the only place where you need to specify local or distributed mode. View [Orca Context](./../Overview/orca-context.md) for more details.
|
||||||
|
|
||||||
|
**Note:** You should `export HADOOP_CONF_DIR=/path/to/hadoop/conf/dir` when running on Hadoop YARN cluster. View [Hadoop User Guide](./../../UserGuide/hadoop.md) for more details.
|
||||||
|
|
||||||
|
### **Step 2: Define the Model**
|
||||||
|
|
||||||
|
You may define your model, loss and metrics in the same way as in any standard (single node) TensorFlow program.
|
||||||
|
|
||||||
|
```python
|
||||||
|
import tensorflow as tf
|
||||||
|
|
||||||
|
def accuracy(logits, labels):
|
||||||
|
predictions = tf.argmax(logits, axis=1, output_type=labels.dtype)
|
||||||
|
is_correct = tf.cast(tf.equal(predictions, labels), dtype=tf.float32)
|
||||||
|
return tf.reduce_mean(is_correct)
|
||||||
|
|
||||||
|
def lenet(images):
|
||||||
|
with tf.variable_scope('LeNet', [images]):
|
||||||
|
net = tf.layers.conv2d(images, 32, (5, 5), activation=tf.nn.relu, name='conv1')
|
||||||
|
net = tf.layers.max_pooling2d(net, (2, 2), 2, name='pool1')
|
||||||
|
net = tf.layers.conv2d(net, 64, (5, 5), activation=tf.nn.relu, name='conv2')
|
||||||
|
net = tf.layers.max_pooling2d(net, (2, 2), 2, name='pool2')
|
||||||
|
net = tf.layers.flatten(net)
|
||||||
|
net = tf.layers.dense(net, 1024, activation=tf.nn.relu, name='fc3')
|
||||||
|
logits = tf.layers.dense(net, 10)
|
||||||
|
return logits
|
||||||
|
|
||||||
|
# tensorflow inputs
|
||||||
|
images = tf.placeholder(dtype=tf.float32, shape=(None, 28, 28, 1))
|
||||||
|
# tensorflow labels
|
||||||
|
labels = tf.placeholder(dtype=tf.int32, shape=(None,))
|
||||||
|
|
||||||
|
logits = lenet(images)
|
||||||
|
loss = tf.reduce_mean(tf.losses.sparse_softmax_cross_entropy(logits=logits, labels=labels))
|
||||||
|
acc = accuracy(logits, labels)
|
||||||
|
```
|
||||||
|
### **Step 3: Define Train Dataset**
|
||||||
|
|
||||||
|
You can define the dataset using standard [tf.data.Dataset](https://www.tensorflow.org/api_docs/python/tf/data/Dataset). Orca also supports [Spark DataFrame](https://spark.apache.org/docs/latest/sql-programming-guide.html) and [Orca XShards](../Overview/data-parallel-processing.md).
|
||||||
|
|
||||||
|
|
||||||
|
```python
|
||||||
|
import tensorflow_datasets as tfds
|
||||||
|
|
||||||
|
def preprocess(data):
|
||||||
|
data['image'] = tf.cast(data["image"], tf.float32) / 255.
|
||||||
|
return data['image'], data['label']
|
||||||
|
|
||||||
|
# get DataSet
|
||||||
|
mnist_train = tfds.load(name="mnist", split="train", data_dir=dataset_dir)
|
||||||
|
mnist_test = tfds.load(name="mnist", split="test", data_dir=dataset_dir)
|
||||||
|
|
||||||
|
mnist_train = mnist_train.map(preprocess)
|
||||||
|
mnist_test = mnist_test.map(preprocess)
|
||||||
|
```
|
||||||
|
|
||||||
|
### **Step 4: Fit with Orca Estimator**
|
||||||
|
|
||||||
|
First, create an Estimator.
|
||||||
|
|
||||||
|
```python
|
||||||
|
from zoo.orca.learn.tf.estimator import Estimator
|
||||||
|
|
||||||
|
est = Estimator.from_graph(inputs=images,
|
||||||
|
outputs=logits,
|
||||||
|
labels=labels,
|
||||||
|
loss=loss,
|
||||||
|
optimizer=tf.train.AdamOptimizer(),
|
||||||
|
metrics={"acc": acc})
|
||||||
|
```
|
||||||
|
|
||||||
|
Next, fit and evaluate using the Estimator.
|
||||||
|
```python
|
||||||
|
est.fit(data=train_dataset,
|
||||||
|
batch_size=320,
|
||||||
|
epochs=5,
|
||||||
|
validation_data=mnist_test)
|
||||||
|
|
||||||
|
result = est.evaluate(mnist_test)
|
||||||
|
print(result)
|
||||||
|
```
|
||||||
|
|
||||||
|
That's it, the same code can run seamlessly in your local laptop and the distribute K8s or Hadoop cluster.
|
||||||
|
|
||||||
|
**Note:** You should call `stop_orca_context()` when your program finishes.
|
||||||
|
|
@ -0,0 +1,123 @@
|
||||||
|
# TensorFlow 2 Quickstart
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
[Run in Google Colab](https://colab.research.google.com/github/intel-analytics/analytics-zoo/blob/master/docs/docs/colab-notebook/orca/quickstart/tf2_keras_lenet_mnist.ipynb) [View source on GitHub](https://github.com/intel-analytics/analytics-zoo/blob/master/docs/docs/colab-notebook/orca/quickstart/tf2_keras_lenet_mnist.ipynb)
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
**In this guide we will describe how to to scale out _TensorFlow 2_ programs using Orca in 4 simple steps.** (_[TensorFlow 1.5](./orca-tf-quickstart.md) and [Keras 2.3](./orca-keras-quickstart.md) guides are also available._)
|
||||||
|
|
||||||
|
### **Step 0: Prepare Environment**
|
||||||
|
|
||||||
|
We recommend using [conda](https://docs.conda.io/projects/conda/en/latest/user-guide/install/) to prepare the environment. Please refer to the [install guide](../../UserGuide/python.md) for more details.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
conda create -n zoo python=3.7 # "zoo" is conda environment name, you can use any name you like.
|
||||||
|
conda activate zoo
|
||||||
|
pip install analytics-zoo[ray] # install either version 0.9 or latest nightly build
|
||||||
|
pip install tensorflow==2.3.0
|
||||||
|
```
|
||||||
|
|
||||||
|
### **Step 1: Init Orca Context**
|
||||||
|
```python
|
||||||
|
from zoo.orca import init_orca_context, stop_orca_context
|
||||||
|
|
||||||
|
if cluster_mode == "local": # For local machine
|
||||||
|
init_orca_context(cluster_mode="local", cores=4, memory="10g")
|
||||||
|
elif cluster_mode == "k8s": # For K8s cluster
|
||||||
|
init_orca_context(cluster_mode="k8s", num_nodes=2, cores=2, memory="10g", driver_memory="10g", driver_cores=1)
|
||||||
|
elif cluster_mode == "yarn": # For Hadoop/YARN cluster
|
||||||
|
init_orca_context(cluster_mode="yarn", num_nodes=2, cores=2, memory="10g", driver_memory="10g", driver_cores=1)
|
||||||
|
```
|
||||||
|
|
||||||
|
This is the only place where you need to specify local or distributed mode. View [Orca Context](./../Overview/orca-context.md) for more details.
|
||||||
|
|
||||||
|
**Note:** You should `export HADOOP_CONF_DIR=/path/to/hadoop/conf/dir` when running on Hadoop YARN cluster. View [Hadoop User Guide](./../../UserGuide/hadoop.md) for more details.
|
||||||
|
|
||||||
|
### **Step 2: Define the Model**
|
||||||
|
|
||||||
|
You can then define the Keras model in the _Creator Function_ using the standard TensroFlow 2 APIs.
|
||||||
|
|
||||||
|
```python
|
||||||
|
import tensorflow as tf
|
||||||
|
|
||||||
|
def model_creator(config):
|
||||||
|
model = tf.keras.Sequential(
|
||||||
|
[tf.keras.layers.Conv2D(20, kernel_size=(5, 5), strides=(1, 1), activation='tanh',
|
||||||
|
input_shape=(28, 28, 1), padding='valid'),
|
||||||
|
tf.keras.layers.MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='valid'),
|
||||||
|
tf.keras.layers.Conv2D(50, kernel_size=(5, 5), strides=(1, 1), activation='tanh',
|
||||||
|
padding='valid'),
|
||||||
|
tf.keras.layers.MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='valid'),
|
||||||
|
tf.keras.layers.Flatten(),
|
||||||
|
tf.keras.layers.Dense(500, activation='tanh'),
|
||||||
|
tf.keras.layers.Dense(10, activation='softmax'),
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
model.compile(optimizer=tf.keras.optimizers.RMSprop(),
|
||||||
|
loss='sparse_categorical_crossentropy',
|
||||||
|
metrics=['accuracy'])
|
||||||
|
return model
|
||||||
|
```
|
||||||
|
### **Step 3: Define Train Dataset**
|
||||||
|
|
||||||
|
You can define the dataset in the _Creator Function_ using standard [tf.data.Dataset](https://www.tensorflow.org/api_docs/python/tf/data/Dataset) APIs. Orca also supports [Spark DataFrame](https://spark.apache.org/docs/latest/sql-programming-guide.html) and [Orca XShards](../Overview/data-parallel-processing.md).
|
||||||
|
|
||||||
|
|
||||||
|
```python
|
||||||
|
def preprocess(x, y):
|
||||||
|
x = tf.cast(tf.reshape(x, (28, 28, 1)), dtype=tf.float32) / 255.0
|
||||||
|
return x, y
|
||||||
|
|
||||||
|
def train_data_creator(config, batch_size):
|
||||||
|
(train_feature, train_label), _ = tf.keras.datasets.mnist.load_data()
|
||||||
|
|
||||||
|
dataset = tf.data.Dataset.from_tensor_slices((train_feature, train_label))
|
||||||
|
dataset = dataset.repeat()
|
||||||
|
dataset = dataset.map(preprocess)
|
||||||
|
dataset = dataset.shuffle(1000)
|
||||||
|
dataset = dataset.batch(batch_size)
|
||||||
|
return dataset
|
||||||
|
|
||||||
|
def val_data_creator(config, batch_size):
|
||||||
|
_, (val_feature, val_label) = tf.keras.datasets.mnist.load_data()
|
||||||
|
|
||||||
|
dataset = tf.data.Dataset.from_tensor_slices((val_feature, val_label))
|
||||||
|
dataset = dataset.repeat()
|
||||||
|
dataset = dataset.map(preprocess)
|
||||||
|
dataset = dataset.batch(batch_size)
|
||||||
|
return dataset
|
||||||
|
```
|
||||||
|
|
||||||
|
### **Step 4: Fit with Orca Estimator**
|
||||||
|
|
||||||
|
First, create an Estimator.
|
||||||
|
|
||||||
|
```python
|
||||||
|
from zoo.orca.learn.tf2 import Estimator
|
||||||
|
|
||||||
|
est = Estimator.from_keras(model_creator=model_creator, workers_per_node=2)
|
||||||
|
```
|
||||||
|
|
||||||
|
Next, fit and evaluate using the Estimator.
|
||||||
|
```python
|
||||||
|
batch_size = 320
|
||||||
|
stats = est.fit(train_data_creator,
|
||||||
|
epochs=5,
|
||||||
|
batch_size=batch_size,
|
||||||
|
steps_per_epoch=60000 // batch_size,
|
||||||
|
validation_data=val_data_creator,
|
||||||
|
validation_steps=10000 // batch_size)
|
||||||
|
|
||||||
|
est.save("/tmp/mnist_keras.ckpt")
|
||||||
|
|
||||||
|
stats = est.evaluate(val_data_creator, num_steps=10000 // batch_size)
|
||||||
|
est.shutdown()
|
||||||
|
print(stats)
|
||||||
|
```
|
||||||
|
|
||||||
|
That's it, the same code can run seamlessly in your local laptop and the distribute K8s or Hadoop cluster.
|
||||||
|
|
||||||
|
**Note:** You should call `stop_orca_context()` when your program finishes.
|
||||||
792
docs/readthedocs/source/doc/PPML/Overview/ppml.md
Normal file
|
|
@ -0,0 +1,792 @@
|
||||||
|
# PPML User Guide
|
||||||
|
|
||||||
|
## 1. Privacy Preserving Machine Learning
|
||||||
|
Protecting privacy and confidentiality is critical for large-scale data analysis and machine learning. Analytics Zoo ***PPML*** combines various low level hardware and software security technologies (e.g., Intel SGX, LibOS such as Graphene and Occlum, Federated Learning, etc.), so that users can continue to apply standard Big Data and AI technologies (such as Apache Spark, Apache Flink, Tensorflow, PyTorch, etc.) without sacrificing privacy.
|
||||||
|
|
||||||
|
## 1.1 PPML for Big Data AI
|
||||||
|
Analytics Zoo provides a distributed PPML platform for protecting the *end-to-end Big Data AI pipeline* (from data ingestion, data analysis, all the way to machine learning and deep learning). In particular, it extends the single-node [Trusted Execution Environment](https://en.wikipedia.org/wiki/Trusted_execution_environment) to provide a *Trusted Cluster Environment*, so as to run unmodified Big Data analysis and ML/DL programs in a secure fashion on (private or public) cloud:
|
||||||
|
|
||||||
|
* Compute and memory protected by SGX Enclaves
|
||||||
|
* Network communication protected by remote attestation and TLS
|
||||||
|
* Storage (e.g., data and model) protected by encryption
|
||||||
|
* Optional federated learning support
|
||||||
|
|
||||||
|
That is, even when the program runs in an untrusted cloud environment, all the data and models are protected (e.g., using encryption) on disk and network, and the compute and memory are also protected using SGX Enclaves, so as to preserve the confidentiality and privacy during data analysis and machine learning.
|
||||||
|
|
||||||
|
In the current release, two types of trusted Big Data AI applications are supported:
|
||||||
|
|
||||||
|
1. Big Data analytics and ML/DL (supporting [Apache Spark](https://spark.apache.org/) and [BigDL](https://github.com/intel-analytics/BigDL))
|
||||||
|
2. Realtime compute and ML/DL (supporting [Apache Flink](https://flink.apache.org/) and Analytics Zoo [Cluster Serving](https://www.usenix.org/conference/opml20/presentation/song))
|
||||||
|
|
||||||
|
## 2. Trusted Big Data Analytics and ML
|
||||||
|
With the trusted Big Data analytics and ML/DL support, users can run standard Spark data analysis (such as Spark SQL, Dataframe, MLlib, etc.) and distributed deep learning (using BigDL) in a secure and trusted fashion.
|
||||||
|
|
||||||
|
### 2.1 Prerequisite
|
||||||
|
|
||||||
|
Download scripts and dockerfiles from [this link](https://github.com/intel-analytics/analytics-zoo). And do the following commands:
|
||||||
|
```bash
|
||||||
|
cd analytics-zoo/ppml/
|
||||||
|
```
|
||||||
|
|
||||||
|
1. Install SGX Driver
|
||||||
|
|
||||||
|
Please check if the current HW processor supports [SGX](https://www.intel.com/content/www/us/en/support/articles/000028173/processors/intel-core-processors.html). Then, enable SGX feature in BIOS. Note that after SGX is enabled, a portion of memory will be assigned to SGX (this memory cannot be seen/used by OS and other applications).
|
||||||
|
|
||||||
|
Check SGX driver with `ls /dev | grep sgx`. If SGX driver is not installed, please install [SGX DCAP driver](https://github.com/intel/SGXDataCenterAttestationPrimitives/tree/master/driver/linux):
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd scripts/
|
||||||
|
./install-graphene-driver.sh
|
||||||
|
cd ..
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Generate key for SGX enclave
|
||||||
|
|
||||||
|
Generate the enclave key using the command below, and keep it safely for future remote attestations and to start SGX enclaves more securely. It will generate a file `enclave-key.pem` in the current working directory, which will be the enclave key. To store the key elsewhere, modify the output file path.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd scripts/
|
||||||
|
openssl genrsa -3 -out enclave-key.pem 3072
|
||||||
|
cd ..
|
||||||
|
```
|
||||||
|
|
||||||
|
3. Prepare keys for TLS with root permission (test only, need input security password for keys). Please also install jdk/openjdk and set the environment path of java path to get keytool.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd scripts/
|
||||||
|
./generate-keys.sh
|
||||||
|
cd ..
|
||||||
|
```
|
||||||
|
When entering pass phrase or password, you could input the same password by yourself; and these passwords could also be used for the next step of generating password. Password should be longer than 6 bits and containing number and letter, and one sample password is "3456abcd". These passwords would be used for future remote attestations and to start SGX enclaves more securely. And This scripts will generate 6 files in `./ppml/scripts/keys` dir (you can replace them with your own TLS keys).
|
||||||
|
|
||||||
|
```bash
|
||||||
|
keystore.jks
|
||||||
|
keystore.pkcs12
|
||||||
|
server.crt
|
||||||
|
server.csr
|
||||||
|
server.key
|
||||||
|
server.pem
|
||||||
|
```
|
||||||
|
|
||||||
|
4. Generate `password` to avoid plain text security password (used for key generation in `generate-keys.sh`) transfer.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd scripts/
|
||||||
|
./generate-password.sh used_password_when_generate_keys
|
||||||
|
cd ..
|
||||||
|
```
|
||||||
|
This scrips will generate 2 files in `./ppml/scripts/password` dir.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
key.txt
|
||||||
|
output.bin
|
||||||
|
```
|
||||||
|
### 2.2 Trusted Big Data Analytics and ML on JVM
|
||||||
|
|
||||||
|
#### 2.2.1 Prepare Docker Image
|
||||||
|
|
||||||
|
Pull docker image from Dockerhub
|
||||||
|
```bash
|
||||||
|
docker pull intelanalytics/analytics-zoo-ppml-trusted-big-data-ml-scala-graphene:0.12.0-SNAPSHOT
|
||||||
|
```
|
||||||
|
|
||||||
|
Alternatively, you can build docker image from Dockerfile (this will take some time):
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd trusted-big-data-ml/scala/docker-graphene
|
||||||
|
./build-docker-image.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
#### 2.2.2 Run Trusted Big Data and ML on Single Node
|
||||||
|
|
||||||
|
##### 2.2.2.1 Start PPML Container
|
||||||
|
|
||||||
|
Enter `analytics-zoo/ppml/trusted-big-data-ml/scala/docker-graphene` dir.
|
||||||
|
|
||||||
|
1. Copy `keys` and `password`
|
||||||
|
```bash
|
||||||
|
cd trusted-big-data-ml/scala/docker-graphene
|
||||||
|
# copy keys and password into current directory
|
||||||
|
cp -r ../.././../scripts/keys/ .
|
||||||
|
cp -r ../.././../scripts/password/ .
|
||||||
|
```
|
||||||
|
2. Prepare the data
|
||||||
|
To train a model with ppml in analytics zoo and bigdl, you need to prepare the data first. The Docker image is taking lenet and mnist as example. <br>
|
||||||
|
You can download the MNIST Data from [here](http://yann.lecun.com/exdb/mnist/). Unzip all the files and put them in one folder(e.g. mnist). <br>
|
||||||
|
There are four files. **train-images-idx3-ubyte** contains train images, **train-labels-idx1-ubyte** is train label file, **t10k-images-idx3-ubyte** has validation images and **t10k-labels-idx1-ubyte** contains validation labels. For more detail, please refer to the download page. <br>
|
||||||
|
After you decompress the gzip files, these files may be renamed by some decompress tools, e.g. **train-images-idx3-ubyte** is renamed to **train-images.idx3-ubyte**. Please change the name back before you run the example. <br>
|
||||||
|
|
||||||
|
3. To start the container, first modify the paths in deploy-local-spark-sgx.sh, and then run the following commands:
|
||||||
|
```bash
|
||||||
|
./deploy-local-spark-sgx.sh
|
||||||
|
sudo docker exec -it spark-local bash
|
||||||
|
cd /ppml/trusted-big-data-ml
|
||||||
|
./init.sh
|
||||||
|
```
|
||||||
|
**ENCLAVE_KEY_PATH** means the absolute path to the "enclave-key.pem", according to the above commands, the path would be like "analytics-zoo/ppml/scripts/enclave-key.pem". <br>
|
||||||
|
**DATA_PATH** means the absolute path to the data(like mnist) that would used later in the spark program. According to the above commands, the path would be like "analytics-zoo/ppml/trusted-big-data-ml/scala/docker-graphene/mnist" <br>
|
||||||
|
**KEYS_PATH** means the absolute path to the keys you just created and copied to. According to the above commands, the path would be like "analytics-zoo/ppml/trusted-big-data-ml/scala/docker-graphene/keys" <br>
|
||||||
|
**LOCAL_IP** means your local IP address. <br>
|
||||||
|
|
||||||
|
##### 2.2.2.2 Run Your Spark Program with Analytics Zoo PPML on SGX
|
||||||
|
|
||||||
|
To run your pyspark program, first you need to prepare your own pyspark program and put it under the trusted directory in SGX `/ppml/trusted-big-data-ml/work`. Then run with `ppml-spark-submit.sh` using the command:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
./ppml-spark-submit.sh work/YOUR_PROMGRAM.py | tee YOUR_PROGRAM-sgx.log
|
||||||
|
```
|
||||||
|
|
||||||
|
When the program finishes, check the results with the log `YOUR_PROGRAM-sgx.log`.
|
||||||
|
|
||||||
|
##### 2.2.2.3 Run Trusted Spark Examples with Analytics Zoo PPML SGX
|
||||||
|
|
||||||
|
##### 2.2.2.3.1 Run Trusted Spark Pi
|
||||||
|
|
||||||
|
This example runs a simple Spark PI program, which is an easy way to verify if the Trusted PPML environment is ready.
|
||||||
|
|
||||||
|
Run the script to run trusted Spark Pi:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
bash start-spark-local-pi-sgx.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
Open another terminal and check the log:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
sudo docker exec -it spark-local cat /ppml/trusted-big-data-ml/spark.local.pi.sgx.log | egrep "###|INFO|Pi"
|
||||||
|
```
|
||||||
|
|
||||||
|
The result should look something like:
|
||||||
|
|
||||||
|
> Pi is roughly 3.1422957114785572
|
||||||
|
|
||||||
|
##### 2.2.2.3.2 Run Trusted Spark SQL
|
||||||
|
|
||||||
|
This example shows how to run trusted Spark SQL (e.g., TPC-H queries).
|
||||||
|
|
||||||
|
First, download and install [SBT](https://www.scala-sbt.org/download.html) and deploy a [HDFS](https://hadoop.apache.org/docs/r2.7.7/hadoop-project-dist/hadoop-common/ClusterSetup.html) for TPC-H dataset and output, then build the source codes with SBT and generate TPC-H dataset according to the [TPC-H example](https://github.com/intel-analytics/zoo-tutorials/tree/master/tpch-spark). After that, check if there is an `spark-tpc-h-queries_2.11-1.0.jar` under `tpch-spark/target/scala-2.11`; if so, we have successfully packaged the project.
|
||||||
|
|
||||||
|
Copy the TPC-H package to container:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker cp tpch-spark/ spark-local:/ppml/trusted-big-data-ml/work
|
||||||
|
docker cp tpch-spark/start-spark-local-tpc-h-sgx.sh spark-local:/ppml/trusted-big-data-ml/
|
||||||
|
sudo docker exec -it spark-local bash
|
||||||
|
cd /ppml/trusted-big-data-ml/
|
||||||
|
```
|
||||||
|
Then run the script below:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
bash start-spark-local-tpc-h-sgx.sh [your_hdfs_tpch_data_dir] [your_hdfs_output_dir]
|
||||||
|
```
|
||||||
|
|
||||||
|
Open another terminal and check the log:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
sudo docker exec -it spark-local cat /ppml/trusted-big-data-ml/spark.local.tpc.h.sgx.log | egrep "###|INFO|finished"
|
||||||
|
```
|
||||||
|
|
||||||
|
The result should look like:
|
||||||
|
|
||||||
|
> ----------------22 finished--------------------
|
||||||
|
|
||||||
|
##### 2.2.2.3.3 Run Trusted Deep Learning
|
||||||
|
|
||||||
|
This example shows how to run trusted deep learning (using an BigDL LetNet program).
|
||||||
|
|
||||||
|
First, download the MNIST Data from [here](http://yann.lecun.com/exdb/mnist/). Use `gzip -d` to unzip all the downloaded files (train-images-idx3-ubyte.gz, train-labels-idx1-ubyte.gz, t10k-images-idx3-ubyte.gz, t10k-labels-idx1-ubyte.gz) and put them into folder `/ppml/trusted-big-data-ml/work/data`.
|
||||||
|
|
||||||
|
Then run the following script:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
bash start-spark-local-train-sgx.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
Open another terminal and check the log:
|
||||||
|
```bash
|
||||||
|
sudo docker exec -it spark-local cat /ppml/trusted-big-data-ml/spark.local.sgx.log | egrep "###|INFO"
|
||||||
|
```
|
||||||
|
or
|
||||||
|
```bash
|
||||||
|
sudo docker logs spark-local | egrep "###|INFO"
|
||||||
|
```
|
||||||
|
|
||||||
|
The result should look like:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
############# train optimized[P1182:T2:java] ---- end time: 310534 ms return from shim_write(...) = 0x1d
|
||||||
|
############# ModuleLoader.saveToFile File.saveBytes end, used 827002 ms[P1182:T2:java] ---- end time: 1142754 ms return from shim_write(...) = 0x48
|
||||||
|
############# ModuleLoader.saveToFile saveWeightsToFile end, used 842543 ms[P1182:T2:java] ---- end time: 1985297 ms return from shim_write(...) = 0x4b
|
||||||
|
############# model saved[P1182:T2:java] ---- end time: 1985297 ms return from shim_write(...) = 0x19
|
||||||
|
```
|
||||||
|
|
||||||
|
#### 2.2.3 Run Trusted Big Data and ML on Cluster
|
||||||
|
|
||||||
|
##### 2.2.3.1 Configure the Environment
|
||||||
|
|
||||||
|
Prerequisite: passwordless ssh login to all the nodes needs to be properly set up first.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
nano environments.sh
|
||||||
|
```
|
||||||
|
##### 2.2.3.2 Start Distributed Big Data and ML Platform
|
||||||
|
|
||||||
|
First run the following command to start the service:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
./deploy-distributed-standalone-spark.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
Then run the following command to start the training:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
./start-distributed-spark-train-sgx.sh
|
||||||
|
```
|
||||||
|
##### 2.2.3.3 Stop Distributed Big Data and ML Platform
|
||||||
|
|
||||||
|
First, stop the training:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
./stop-distributed-standalone-spark.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
Then stop the service:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
./undeploy-distributed-standalone-spark.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
### 2.3 Trusted Big Data Analytics and ML with Python
|
||||||
|
|
||||||
|
#### 2.3.1 Prepare Docker Image
|
||||||
|
|
||||||
|
Pull docker image from Dockerhub
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker pull intelanalytics/analytics-zoo-ppml-trusted-big-data-ml-python-graphene:0.11-SNAPSHOT
|
||||||
|
```
|
||||||
|
|
||||||
|
Alternatively, you can build docker image from Dockerfile (this will take some time):
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd ppml/trusted-big-data-ml/python/docker-graphene
|
||||||
|
./build-docker-image.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
#### 2.3.2 Run Trusted Big Data and ML on Single Node
|
||||||
|
|
||||||
|
##### 2.3.2.1 Start PPML Container
|
||||||
|
|
||||||
|
Enter `analytics-zoo/ppml/trusted-big-data-ml/python/docker-graphene` directory.
|
||||||
|
|
||||||
|
1. Copy `keys` and `password` to current directory
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd ppml/trusted-big-data-ml/scala/docker-graphene
|
||||||
|
# copy keys and password into current directory
|
||||||
|
cp -r ../keys .
|
||||||
|
cp -r ../password .
|
||||||
|
```
|
||||||
|
|
||||||
|
2. To start the container, first modify the paths in deploy-local-spark-sgx.sh, and then run the following commands:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
./deploy-local-spark-sgx.sh
|
||||||
|
sudo docker exec -it spark-local bash
|
||||||
|
cd /ppml/trusted-big-data-ml
|
||||||
|
./init.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
##### 2.3.2.2 Run Your Pyspark Program with Analytics Zoo PPML on SGX
|
||||||
|
|
||||||
|
To run your pyspark program, first you need to prepare your own pyspark program and put it under the trusted directory in SGX `/ppml/trusted-big-data-ml/work`. Then run with `ppml-spark-submit.sh` using the command:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
./ppml-spark-submit.sh work/YOUR_PROMGRAM.py | tee YOUR_PROGRAM-sgx.log
|
||||||
|
```
|
||||||
|
|
||||||
|
When the program finishes, check the results with the log `YOUR_PROGRAM-sgx.log`.
|
||||||
|
|
||||||
|
##### 2.3.2.3 Run Python and Pyspark Examples with Analytics Zoo PPML on SGX
|
||||||
|
|
||||||
|
##### 2.3.2.3.1 Run Trusted Python Helloworld
|
||||||
|
|
||||||
|
This example runs a simple native python program, which is an easy way to verify if the Trusted PPML environment is correctly set up.
|
||||||
|
|
||||||
|
Run the script to run trusted Python Helloworld:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
bash work/start-scripts/start-python-helloworld-sgx.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
Open another terminal and check the log:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
sudo docker exec -it spark-local cat /ppml/trusted-big-data-ml/test-helloworld-sgx.log | egrep "Hello World"
|
||||||
|
```
|
||||||
|
|
||||||
|
The result should look something like:
|
||||||
|
|
||||||
|
> Hello World
|
||||||
|
|
||||||
|
##### 2.3.2.3.2 Run Trusted Python Numpy
|
||||||
|
|
||||||
|
This example shows how to run trusted native python numpy.
|
||||||
|
|
||||||
|
Run the script to run trusted Python Numpy:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
bash work/start-scripts/start-python-numpy-sgx.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
Open another terminal and check the log:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
sudo docker exec -it spark-local cat /ppml/trusted-big-data-ml/test-numpy-sgx.log | egrep "numpy.dot"
|
||||||
|
```
|
||||||
|
|
||||||
|
The result should look something like:
|
||||||
|
|
||||||
|
> numpy.dot: 0.034211914986371994 sec
|
||||||
|
|
||||||
|
##### 2.3.2.3.3 Run Trusted Spark Pi
|
||||||
|
|
||||||
|
This example runs a simple Spark PI program.
|
||||||
|
|
||||||
|
Run the script to run trusted Spark Pi:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
bash work/start-scripts/start-spark-local-pi-sgx.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
Open another terminal and check the log:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
sudo docker exec -it spark-local cat /ppml/trusted-big-data-ml/test-pi-sgx.log | egrep "roughly"
|
||||||
|
```
|
||||||
|
|
||||||
|
The result should look something like:
|
||||||
|
|
||||||
|
> Pi is roughly 3.146760
|
||||||
|
|
||||||
|
##### 2.3.2.3.4 Run Trusted Spark Wordcount
|
||||||
|
|
||||||
|
This example runs a simple Spark Wordcount program.
|
||||||
|
|
||||||
|
Run the script to run trusted Spark Wordcount:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
bash work/start-scripts/start-spark-local-wordcount-sgx.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
Open another terminal and check the log:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
sudo docker exec -it spark-local cat /ppml/trusted-big-data-ml/test-wordcount-sgx.log | egrep "print"
|
||||||
|
```
|
||||||
|
|
||||||
|
The result should look something like:
|
||||||
|
|
||||||
|
> print("Hello: 1
|
||||||
|
>
|
||||||
|
> print(sys.path);: 1
|
||||||
|
|
||||||
|
##### 2.3.2.3.5 Run Trusted Spark SQL
|
||||||
|
|
||||||
|
This example shows how to run trusted Spark SQL.
|
||||||
|
|
||||||
|
First, make sure that the paths of resource in `/ppml/trusted-big-data-ml/work/spark-2.4.6/examples/src/main/python/sql/basic.py` are the same as the paths of `people.json` and `people.txt`.
|
||||||
|
|
||||||
|
Run the script to run trusted Spark SQL:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
bash work/start-scripts/start-spark-local-sql-sgx.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
Open another terminal and check the log:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
sudo docker exec -it spark-local cat /ppml/trusted-big-data-ml/test-sql-basic-sgx.log | egrep "Justin"
|
||||||
|
```
|
||||||
|
|
||||||
|
The result should look something like:
|
||||||
|
|
||||||
|
>| 19| Justin|
|
||||||
|
>
|
||||||
|
>| Justin|
|
||||||
|
>
|
||||||
|
>| Justin| 20|
|
||||||
|
>
|
||||||
|
>| 19| Justin|
|
||||||
|
>
|
||||||
|
>| 19| Justin|
|
||||||
|
>
|
||||||
|
>| 19| Justin|
|
||||||
|
>
|
||||||
|
>Name: Justin
|
||||||
|
>
|
||||||
|
>| Justin|
|
||||||
|
|
||||||
|
##### 2.3.2.3.6 Run Trusted Spark BigDL
|
||||||
|
|
||||||
|
This example shows how to run trusted Spark BigDL.
|
||||||
|
|
||||||
|
Run the script to run trusted Spark BigDL and it would take some time to show the final results:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
bash work/start-scripts/start-spark-local-bigdl-sgx.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
Open another terminal and check the log:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
sudo docker exec -it spark-local cat /ppml/trusted-big-data-ml/test-bigdl-lenet-sgx.log | egrep "Accuracy"
|
||||||
|
```
|
||||||
|
|
||||||
|
The result should look something like:
|
||||||
|
|
||||||
|
> creating: createTop1Accuracy
|
||||||
|
>
|
||||||
|
> 2021-06-18 01:39:45 INFO DistriOptimizer$:180 - [Epoch 1 60032/60000][Iteration 469][Wall Clock 457.926565s] Top1Accuracy is Accuracy(correct: 9488, count: 10000, accuracy: 0.9488)
|
||||||
|
>
|
||||||
|
> 2021-06-18 01:46:20 INFO DistriOptimizer$:180 - [Epoch 2 60032/60000][Iteration 938][Wall Clock 845.747782s] Top1Accuracy is Accuracy(correct: 9696, count: 10000, accuracy: 0.9696)
|
||||||
|
|
||||||
|
##### 2.3.2.3.7 Run Trusted Spark XGBoost Regressor
|
||||||
|
|
||||||
|
This example shows how to run trusted Spark XGBoost Regressor.
|
||||||
|
|
||||||
|
First, make sure that `Boston_Housing.csv` is under `work/data` directory or the same path in the `start-spark-local-xgboost-regressor-sgx.sh`. Replace the value of `RABIT_TRACKER_IP` with your own IP address in the script.
|
||||||
|
|
||||||
|
Run the script to run trusted Spark XGBoost Regressor and it would take some time to show the final results:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
bash work/start-scripts/start-spark-local-xgboost-regressor-sgx.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
Open another terminal and check the log:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
sudo docker exec -it spark-local cat /ppml/trusted-big-data-ml/test-zoo-xgboost-regressor-sgx.log | egrep "prediction" -A19
|
||||||
|
```
|
||||||
|
|
||||||
|
The result should look something like:
|
||||||
|
|
||||||
|
> | features|label| prediction|
|
||||||
|
>
|
||||||
|
> +--------------------+-----+------------------+
|
||||||
|
>
|
||||||
|
> |[41.5292,0.0,18.1...| 8.5| 8.51994514465332|
|
||||||
|
>
|
||||||
|
> |[67.9208,0.0,18.1...| 5.0| 5.720333099365234|
|
||||||
|
>
|
||||||
|
> |[20.7162,0.0,18.1...| 11.9|10.601168632507324|
|
||||||
|
>
|
||||||
|
> |[11.9511,0.0,18.1...| 27.9| 26.19390106201172|
|
||||||
|
>
|
||||||
|
> |[7.40389,0.0,18.1...| 17.2|16.112293243408203|
|
||||||
|
>
|
||||||
|
> |[14.4383,0.0,18.1...| 27.5|25.952226638793945|
|
||||||
|
>
|
||||||
|
> |[51.1358,0.0,18.1...| 15.0| 14.67484188079834|
|
||||||
|
>
|
||||||
|
> |[14.0507,0.0,18.1...| 17.2|16.112293243408203|
|
||||||
|
>
|
||||||
|
> |[18.811,0.0,18.1,...| 17.9| 17.42863655090332|
|
||||||
|
>
|
||||||
|
> |[28.6558,0.0,18.1...| 16.3| 16.0191593170166|
|
||||||
|
>
|
||||||
|
> |[45.7461,0.0,18.1...| 7.0| 5.300708770751953|
|
||||||
|
>
|
||||||
|
> |[18.0846,0.0,18.1...| 7.2| 6.346951007843018|
|
||||||
|
>
|
||||||
|
> |[10.8342,0.0,18.1...| 7.5| 6.571983814239502|
|
||||||
|
>
|
||||||
|
> |[25.9406,0.0,18.1...| 10.4|10.235769271850586|
|
||||||
|
>
|
||||||
|
> |[73.5341,0.0,18.1...| 8.8| 8.460335731506348|
|
||||||
|
>
|
||||||
|
> |[11.8123,0.0,18.1...| 8.4| 9.193297386169434|
|
||||||
|
>
|
||||||
|
> |[11.0874,0.0,18.1...| 16.7|16.174896240234375|
|
||||||
|
>
|
||||||
|
> |[7.02259,0.0,18.1...| 14.2| 13.38729190826416|
|
||||||
|
|
||||||
|
##### 2.3.2.3.8 Run Trusted Spark XGBoost Classifier
|
||||||
|
|
||||||
|
This example shows how to run trusted Spark XGBoost Classifier.
|
||||||
|
|
||||||
|
Before running the example, download the sample dataset from [pima-indians-diabetes](https://raw.githubusercontent.com/jbrownlee/Datasets/master/pima-indians-diabetes.data.csv) dataset. After downloading the dataset, make sure that `pima-indians-diabetes.data.csv` is under `work/data` directory or the same path in the `start-spark-local-xgboost-classifier-sgx.sh`. Replace `path_of_pima_indians_diabetes_csv` with your path of `pima-indians-diabetes.data.csv` and the value of `RABIT_TRACKER_IP` with your own IP address in the script.
|
||||||
|
|
||||||
|
Run the script to run trusted Spark XGBoost Classifier and it would take some time to show the final results:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
bash start-spark-local-xgboost-classifier-sgx.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
Open another terminal and check the log:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
sudo docker exec -it spark-local cat /ppml/trusted-big-data-ml/test-xgboost-classifier-sgx.log | egrep "prediction" -A7
|
||||||
|
```
|
||||||
|
|
||||||
|
The result should look something like:
|
||||||
|
|
||||||
|
> | f1| f2| f3| f4| f5| f6| f7| f8|label| rawPrediction| probability|prediction|
|
||||||
|
>
|
||||||
|
> +----+-----+----+----+-----+----+-----+----+-----+--------------------+--------------------+----------+
|
||||||
|
>
|
||||||
|
> |11.0|138.0|74.0|26.0|144.0|36.1|0.557|50.0| 1.0|[-0.8209581375122...|[0.17904186248779...| 1.0|
|
||||||
|
>
|
||||||
|
> | 3.0|106.0|72.0| 0.0| 0.0|25.8|0.207|27.0| 0.0|[-0.0427864193916...|[0.95721358060836...| 0.0|
|
||||||
|
>
|
||||||
|
> | 6.0|117.0|96.0| 0.0| 0.0|28.7|0.157|30.0| 0.0|[-0.2336160838603...|[0.76638391613960...| 0.0|
|
||||||
|
>
|
||||||
|
> | 2.0| 68.0|62.0|13.0| 15.0|20.1|0.257|23.0| 0.0|[-0.0315906107425...|[0.96840938925743...| 0.0|
|
||||||
|
>
|
||||||
|
> | 9.0|112.0|82.0|24.0| 0.0|28.2|1.282|50.0| 1.0|[-0.7087597250938...|[0.29124027490615...| 1.0|
|
||||||
|
>
|
||||||
|
> | 0.0|119.0| 0.0| 0.0| 0.0|32.4|0.141|24.0| 1.0|[-0.4473398327827...|[0.55266016721725...| 0.0|
|
||||||
|
|
||||||
|
##### 2.3.2.3.9 Run Trusted Spark Orca Data
|
||||||
|
|
||||||
|
This example shows how to run trusted Spark Orca Data.
|
||||||
|
|
||||||
|
Before running the example, download the [NYC Taxi](https://raw.githubusercontent.com/numenta/NAB/master/data/realKnownCause/nyc_taxi.csv) dataset in Numenta Anoomaly Benchmark for demo. After downloading the dataset, make sure that `nyc_taxi.csv` is under `work/data` directory or the same path in the `start-spark-local-orca-data-sgx.sh`. Replace `path_of_nyc_taxi_csv` with your path of `nyc_taxi.csv` in the script.
|
||||||
|
|
||||||
|
Run the script to run trusted Spark Orca Data and it would take some time to show the final results:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
bash start-spark-local-orca-data-sgx.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
Open another terminal and check the log:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
sudo docker exec -it spark-local cat /ppml/trusted-big-data-ml/test-orca-data-sgx.log | egrep -a "INFO data|Stopping" -A10
|
||||||
|
```
|
||||||
|
|
||||||
|
The result should contain the content look like:
|
||||||
|
|
||||||
|
>INFO data collected: [ timestamp value
|
||||||
|
>
|
||||||
|
>0 2014-07-01 00:00:00 10844
|
||||||
|
>
|
||||||
|
>1 2014-07-01 00:30:00 8127
|
||||||
|
>
|
||||||
|
>2 2014-07-01 01:00:00 6210
|
||||||
|
>
|
||||||
|
>3 2014-07-01 01:30:00 4656
|
||||||
|
>
|
||||||
|
>4 2014-07-01 02:00:00 3820
|
||||||
|
>
|
||||||
|
>... ... ...
|
||||||
|
>
|
||||||
|
>10315 2015-01-31 21:30:00 24670
|
||||||
|
>
|
||||||
|
>10316 2015-01-31 22:00:00 25721
|
||||||
|
>
|
||||||
|
>10317 2015-01-31 22:30:00 27309
|
||||||
|
>
|
||||||
|
>10318 2015-01-31 23:00:00 26591
|
||||||
|
>
|
||||||
|
>\--
|
||||||
|
>
|
||||||
|
>INFO data2 collected: [ timestamp value datetime hours awake
|
||||||
|
>
|
||||||
|
>0 2014-07-01 00:00:00 10844 2014-07-01 00:00:00 0 1
|
||||||
|
>
|
||||||
|
>1 2014-07-01 00:30:00 8127 2014-07-01 00:30:00 0 1
|
||||||
|
>
|
||||||
|
>2 2014-07-01 03:00:00 2369 2014-07-01 03:00:00 3 0
|
||||||
|
>
|
||||||
|
>3 2014-07-01 04:30:00 2158 2014-07-01 04:30:00 4 0
|
||||||
|
>
|
||||||
|
>4 2014-07-01 05:00:00 2515 2014-07-01 05:00:00 5 0
|
||||||
|
>
|
||||||
|
>... ... ... ... ... ...
|
||||||
|
>
|
||||||
|
>5215 2015-01-31 17:30:00 23595 2015-01-31 17:30:00 17 1
|
||||||
|
>
|
||||||
|
>5216 2015-01-31 18:30:00 27286 2015-01-31 18:30:00 18 1
|
||||||
|
>
|
||||||
|
>5217 2015-01-31 19:00:00 28804 2015-01-31 19:00:00 19 1
|
||||||
|
>
|
||||||
|
>5218 2015-01-31 19:30:00 27773 2015-01-31 19:30:00 19 1
|
||||||
|
>
|
||||||
|
>\--
|
||||||
|
>
|
||||||
|
>Stopping orca context
|
||||||
|
|
||||||
|
##### 2.3.2.3.10 Run Trusted Spark Orca Learn Tensorflow Basic Text Classification
|
||||||
|
|
||||||
|
This example shows how to run trusted Spark Orca learn Tensorflow basic text classification.
|
||||||
|
|
||||||
|
Run the script to run trusted Spark Orca learn Tensorflow basic text classification and it would take some time to show the final results. To run this example in standalone mode, replace `-e SGX_MEM_SIZE=32G \` with `-e SGX_MEM_SIZE=64G \` in `start-distributed-spark-driver.sh`
|
||||||
|
|
||||||
|
```bash
|
||||||
|
bash start-spark-local-orca-tf-text.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
Open another terminal and check the log:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
sudo docker exec -it spark-local cat test-orca-tf-text.log | egrep "results"
|
||||||
|
```
|
||||||
|
|
||||||
|
The result should be similar to:
|
||||||
|
|
||||||
|
>INFO results: {'loss': 0.6932533979415894, 'acc Top1Accuracy': 0.7544000148773193}
|
||||||
|
|
||||||
|
#### 2.3.3 Run Trusted Big Data and ML on Cluster
|
||||||
|
|
||||||
|
##### 2.3.3.1 Configure the Environment
|
||||||
|
|
||||||
|
Prerequisite: passwordless ssh login to all the nodes needs to be properly set up first.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
nano environments.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
##### 2.3.3.2 Start Distributed Big Data and ML Platform
|
||||||
|
|
||||||
|
First run the following command to start the service:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
./deploy-distributed-standalone-spark.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
Then start the service:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
./start-distributed-spark-driver.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
After that, you can run previous examples on cluster by replacing `--master 'local[4]'` in the start scripts with
|
||||||
|
|
||||||
|
```bash
|
||||||
|
--master 'spark://your_master_url' \
|
||||||
|
--conf spark.authenticate=true \
|
||||||
|
--conf spark.authenticate.secret=your_secret_key \
|
||||||
|
```
|
||||||
|
|
||||||
|
##### 2.3.3.3 Stop Distributed Big Data and ML Platform
|
||||||
|
|
||||||
|
First, stop the training:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
./stop-distributed-standalone-spark.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
Then stop the service:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
./undeploy-distributed-standalone-spark.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
## 3. Trusted Realtime Compute and ML
|
||||||
|
|
||||||
|
With the trusted realtime compute and ML/DL support, users can run standard Flink stream processing and distributed DL model inference (using [Cluster Serving](https://www.usenix.org/conference/opml20/presentation/song)) in a secure and trusted fashion. In this feature, both [Graphene](https://github.com/oscarlab/graphene) and [Occlum](https://github.com/occlum/occlum) are supported, users can choose one of them as LibOS layer.
|
||||||
|
|
||||||
|
### 3.1 Prerequisite
|
||||||
|
|
||||||
|
Please refer to [Section 2.1 Prerequisite](#prerequisite). For Occlum backend, if your kernel version is below 5.11, please install [enable_rdfsbase](https://github.com/occlum/enable_rdfsbase).
|
||||||
|
|
||||||
|
### 3.2 Prepare Docker Image
|
||||||
|
|
||||||
|
Pull docker image from Dockerhub
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# For Graphene
|
||||||
|
docker pull intelanalytics/analytics-zoo-ppml-trusted-realtime-ml-scala-graphene:0.12.0-SNAPSHOT
|
||||||
|
```
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# For Occlum
|
||||||
|
docker pull intelanalytics/analytics-zoo-ppml-trusted-realtime-ml-scala-occlum:0.12.0-SNAPSHOT
|
||||||
|
```
|
||||||
|
|
||||||
|
Also, you can build docker image from Dockerfile (this will take some time).
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# For Graphene
|
||||||
|
cd ppml/trusted-realtime-ml/scala/docker-graphene
|
||||||
|
./build-docker-image.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# For Occlum
|
||||||
|
cd ppml/trusted-realtime-ml/scala/docker-occlum
|
||||||
|
./build-docker-image.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
### 3.3 Run Trusted Realtime Compute and ML
|
||||||
|
|
||||||
|
#### 3.3.1 Configure the Environment
|
||||||
|
|
||||||
|
Enter `analytics-zoo/ppml/trusted-realtime-ml/scala/docker-graphene` or `analytics-zoo/ppml/trusted-realtime-ml/scala/docker-occlum` dir.
|
||||||
|
|
||||||
|
Modify `environments.sh`. Change MASTER, WORKER IP and file paths (e.g., `keys` and `password`).
|
||||||
|
|
||||||
|
```bash
|
||||||
|
nano environments.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
#### 3.3.2 Start the service
|
||||||
|
|
||||||
|
Start Flink service:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
./deploy-flink.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
#### 3.3.3 Run Trusted Flink Program
|
||||||
|
|
||||||
|
Submit Flink jobs:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd ${FLINK_HOME}
|
||||||
|
./bin/flink run ./examples/batch/WordCount.jar
|
||||||
|
```
|
||||||
|
|
||||||
|
If Jobmanager is not running on current node, please add `-m ${FLINK_JOB_MANAGER_IP}`.
|
||||||
|
|
||||||
|
The result should look like:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
(a,5)
|
||||||
|
(action,1)
|
||||||
|
(after,1)
|
||||||
|
(against,1)
|
||||||
|
(all,2)
|
||||||
|
(and,12)
|
||||||
|
(arms,1)
|
||||||
|
(arrows,1)
|
||||||
|
(awry,1)
|
||||||
|
(ay,1)
|
||||||
|
(bare,1)
|
||||||
|
(be,4)
|
||||||
|
(bear,3)
|
||||||
|
(bodkin,1)
|
||||||
|
(bourn,1)
|
||||||
|
```
|
||||||
|
#### 3.3.4 Run Trusted Cluster Serving
|
||||||
|
|
||||||
|
Start Cluster Serving as follows:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
./start-local-cluster-serving.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
After all services are ready, you can directly push inference requests int queue with [Restful API](https://analytics-zoo.github.io/master/#ClusterServingGuide/ProgrammingGuide/#restful-api). Also, you can push image/input into queue with Python API
|
||||||
|
|
||||||
|
```python
|
||||||
|
from zoo.serving.client import InputQueue
|
||||||
|
input_api = InputQueue()
|
||||||
|
input_api.enqueue('my-image1', user_define_key={"path: 'path/to/image1'})
|
||||||
|
```
|
||||||
|
|
||||||
|
Cluster Serving service is a long running service in container, you can stop it as follows:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker stop trusted-cluster-serving-local
|
||||||
|
```
|
||||||
153
docs/readthedocs/source/doc/PPML/trusted-serving-on-k8s-guide.md
Normal file
|
|
@ -0,0 +1,153 @@
|
||||||
|
# Trusted Cluster Serving with Graphene on Kubernetes #
|
||||||
|
|
||||||
|
## Prerequisites ##
|
||||||
|
Prior to deploying PPML Cluster Serving, please make sure the following is setup
|
||||||
|
- Hardware that supports SGX
|
||||||
|
- A fully configured Kubernetes cluster
|
||||||
|
- Intel SGX Device Plugin to use SGX in K8S cluster (install following instructions [here](https://github.com/intel-analytics/analytics-zoo/tree/master/ppml/trusted-realtime-ml/scala/docker-graphene/kubernetes#deploy-the-intel-sgx-device-plugin-for-kubenetes "here"))
|
||||||
|
- Java
|
||||||
|
|
||||||
|
## Deploy Trusted Realtime ML for Kubernetes ##
|
||||||
|
1. Pull docker image from dockerhub
|
||||||
|
```
|
||||||
|
$ docker pull intelanalytics/analytics-zoo-ppml-trusted-realtime-ml-scala-graphene:0.12.0-SNAPSHOT
|
||||||
|
```
|
||||||
|
2. Pull the source code of Analytics Zoo and enter PPML graphene k8s directory
|
||||||
|
```
|
||||||
|
$ git clone https://github.com/intel-analytics/analytics-zoo.git
|
||||||
|
$ cd analytics-zoo/ppml/trusted-realtime-ml/scala/docker-graphene/kubernetes
|
||||||
|
```
|
||||||
|
3. Generate secure keys and passwords, and deploy as secrets (Refer [here](https://github.com/intel-analytics/analytics-zoo/blob/master/ppml/trusted-realtime-ml/scala/docker-graphene/kubernetes/README.md#secure-keys-and-password) for details)
|
||||||
|
1. Generate keys and passwords
|
||||||
|
|
||||||
|
Note: Make sure to add `${JAVA_HOME}/bin` to `$PATH` to avoid `keytool: command not found` error.
|
||||||
|
```
|
||||||
|
$ sudo ../../../../scripts/generate-keys.sh
|
||||||
|
$ openssl genrsa -3 -out enclave-key.pem 3072
|
||||||
|
$ ../../../../scripts/generate-password.sh <used_password_when_generate_keys>
|
||||||
|
```
|
||||||
|
2. Deploy as secrets for Kubernetes
|
||||||
|
```
|
||||||
|
$ kubectl apply -f keys/keys.yaml
|
||||||
|
$ kubectl apply -f password/password.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
4. In `values.yaml`, configure pulled image name, path of `enclave-key.pem` generated in step 3 and path of script `start-all-but-flink.sh`.
|
||||||
|
5. If kernel version is 5.11+ with built-in SGX support, create soft links for SGX device
|
||||||
|
```
|
||||||
|
$ sudo ln -s /dev/sgx_enclave /dev/sgx/enclave
|
||||||
|
$ sudo ln -s /dev/sgx_provision /dev/sgx/provision
|
||||||
|
```
|
||||||
|
|
||||||
|
### Configure SGX mode ###
|
||||||
|
In `templates/flink-configuration-configmap.yaml`, configure `sgx.mode` to `sgx` or `nonsgx` to determine whether to run the workload with SGX.
|
||||||
|
|
||||||
|
### Configure Resource for Components ###
|
||||||
|
1. Configure jobmanager resource allocation in `templates/jobmanager-deployment.yaml`
|
||||||
|
```
|
||||||
|
...
|
||||||
|
env:
|
||||||
|
- name: SGX_MEM_SIZE
|
||||||
|
value: "16G"
|
||||||
|
...
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
cpu: 2
|
||||||
|
memory: 16Gi
|
||||||
|
sgx.intel.com/enclave: "1"
|
||||||
|
sgx.intel.com/epc: 16Gi
|
||||||
|
limits:
|
||||||
|
cpu: 2
|
||||||
|
memory: 16Gi
|
||||||
|
sgx.intel.com/enclave: "1"
|
||||||
|
sgx.intel.com/epc: 16Gi
|
||||||
|
...
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Configure Taskmanager resource allocation
|
||||||
|
- Memory allocation in `templates/flink-configuration-configmap.yaml`
|
||||||
|
```
|
||||||
|
taskmanager.memory.managed.size: 4gb
|
||||||
|
taskmanager.memory.task.heap.size: 5gb
|
||||||
|
xmx.size: 5g
|
||||||
|
```
|
||||||
|
- Pod resource allocation
|
||||||
|
|
||||||
|
Use `taskmanager-deployment.yaml` instead of `taskmanager-statefulset.yaml` for functionality test
|
||||||
|
```
|
||||||
|
$ mv templates/taskmanager-statefulset.yaml ./
|
||||||
|
$ mv taskmanager-deployment.yaml.back templates/taskmanager-deployment.yaml
|
||||||
|
```
|
||||||
|
Configure resource in `templates/taskmanager-deployment.yaml` (allocate 16 cores in this example, please configure according to scenario)
|
||||||
|
```
|
||||||
|
...
|
||||||
|
env:
|
||||||
|
- name: CORE_NUM
|
||||||
|
value: "16"
|
||||||
|
- name: SGX_MEM_SIZE
|
||||||
|
value: "32G"
|
||||||
|
...
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
cpu: 16
|
||||||
|
memory: 32Gi
|
||||||
|
sgx.intel.com/enclave: "1"
|
||||||
|
sgx.intel.com/epc: 32Gi
|
||||||
|
limits:
|
||||||
|
cpu: 16
|
||||||
|
memory: 32Gi
|
||||||
|
sgx.intel.com/enclave: "1"
|
||||||
|
sgx.intel.com/epc: 32Gi
|
||||||
|
...
|
||||||
|
```
|
||||||
|
3. Configure Redis and client resource allocation
|
||||||
|
- SGX memory allocation in `start-all-but-flink.sh`
|
||||||
|
```
|
||||||
|
...
|
||||||
|
cd /ppml/trusted-realtime-ml/java
|
||||||
|
export SGX_MEM_SIZE=16G
|
||||||
|
test "$SGX_MODE" = sgx && ./init.sh
|
||||||
|
echo "java initiated"
|
||||||
|
...
|
||||||
|
```
|
||||||
|
- Pod resource allocation in `templates/master-deployment.yaml`
|
||||||
|
```
|
||||||
|
...
|
||||||
|
env:
|
||||||
|
- name: CORE_NUM #batchsize per instance
|
||||||
|
value: "16"
|
||||||
|
...
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
cpu: 12
|
||||||
|
memory: 32Gi
|
||||||
|
sgx.intel.com/enclave: "1"
|
||||||
|
sgx.intel.com/epc: 32Gi
|
||||||
|
limits:
|
||||||
|
cpu: 12
|
||||||
|
memory: 32Gi
|
||||||
|
sgx.intel.com/enclave: "1"
|
||||||
|
sgx.intel.com/epc: 32Gi
|
||||||
|
...
|
||||||
|
```
|
||||||
|
|
||||||
|
### Deploy Cluster Serving ###
|
||||||
|
1. Deploy all components and start job
|
||||||
|
1. Download helm from [release page](https://github.com/helm/helm/releases) and install
|
||||||
|
2. Deploy cluster serving
|
||||||
|
```
|
||||||
|
$ helm install ppml ./
|
||||||
|
```
|
||||||
|
2. Port forwarding
|
||||||
|
|
||||||
|
Set up port forwarding of jobmanager Rest port for access to Flink WebUI on host
|
||||||
|
1. Run `kubectl port-forward <flink-jobmanager-pod> --address 0.0.0.0 8081:8081` to forward jobmanager’s web UI port to 8081 on host.
|
||||||
|
2. Navigate to `http://<host-IP>:8081` in web browser to check status of Flink cluster and job.
|
||||||
|
3. Performance benchmark
|
||||||
|
```
|
||||||
|
$ kubectl exec <master-deployment-pod> -it -- bash
|
||||||
|
$ cd /ppml/trusted-realtime-ml/java/work/benchmark/
|
||||||
|
$ bash init-benchmark.sh
|
||||||
|
$ python3 e2e_throughput.py -n <image_num> -i ../data/ILSVRC2012_val_00000001.JPEG
|
||||||
|
```
|
||||||
|
The `e2e_throughput.py` script pushes test image for `-n` times (default 1000 if not manually set), and time the process from push images (enqueue) to retrieve all inference results (dequeue), to calculate cluster serving end-to-end throughput. The output should look like `Served xxx images in xxx sec, e2e throughput is xxx images/sec`
|
||||||
30
docs/readthedocs/source/doc/PythonAPI/AutoML/automl.rst
Normal file
|
|
@ -0,0 +1,30 @@
|
||||||
|
AutoML API
|
||||||
|
===========
|
||||||
|
|
||||||
|
orca.automl.auto_estimator
|
||||||
|
---------------------------
|
||||||
|
|
||||||
|
A general estimator supports automatic model tuning. It allows users to fit and search the best hyperparameter for their model.
|
||||||
|
|
||||||
|
.. automodule:: zoo.orca.automl.auto_estimator
|
||||||
|
:members:
|
||||||
|
:show-inheritance:
|
||||||
|
|
||||||
|
|
||||||
|
orca.automl.hp
|
||||||
|
----------------------------------------
|
||||||
|
|
||||||
|
Sampling specs to be used in search space configuration.
|
||||||
|
|
||||||
|
.. automodule:: zoo.orca.automl.hp
|
||||||
|
:members:
|
||||||
|
:show-inheritance:
|
||||||
|
|
||||||
|
automl.metrics
|
||||||
|
----------------------------
|
||||||
|
|
||||||
|
Evaluate unscaled metrics between y true value and y predicted value.
|
||||||
|
|
||||||
|
.. automodule:: zoo.orca.automl.metrics
|
||||||
|
:members:
|
||||||
|
:show-inheritance:
|
||||||
|
|
@ -0,0 +1,31 @@
|
||||||
|
Anomaly Detectors
|
||||||
|
=====================
|
||||||
|
|
||||||
|
chronos.anomaly.ae_detector
|
||||||
|
----------------------------------------
|
||||||
|
|
||||||
|
AEDetector is unsupervised anomaly detector. It builds an autoencoder network, tries to fit the model to the input data, and calcuates the reconstruction error. The samples with larger reconstruction errors are more likely the anomalies.
|
||||||
|
|
||||||
|
.. automodule:: zoo.chronos.detector.anomaly.ae_detector
|
||||||
|
:members:
|
||||||
|
:show-inheritance:
|
||||||
|
|
||||||
|
|
||||||
|
chronos.anomaly.dbscan_detector
|
||||||
|
----------------------------------------
|
||||||
|
|
||||||
|
DBScanDetector uses DBSCAN clustering for anomaly detection. The DBSCAN algorithm tries to cluster the points and label the points that do not belong to any clusters as -1. It thus detects outliers in the input time series.
|
||||||
|
|
||||||
|
.. automodule:: zoo.chronos.detector.anomaly.dbscan_detector
|
||||||
|
:members:
|
||||||
|
:show-inheritance:
|
||||||
|
|
||||||
|
|
||||||
|
chronos.anomaly.th_detector
|
||||||
|
----------------------------------------
|
||||||
|
|
||||||
|
ThresholdDetector is a simple anomaly detector that detectes anomalies based on threshold. The target value for anomaly testing can be either 1) the sample value itself or 2) the difference between the forecasted value and the actual value, if the forecasted values are provied. The thresold can be set by user or esitmated from the train data accoring to anomaly ratio and statistical distributions.
|
||||||
|
|
||||||
|
.. automodule:: zoo.chronos.detector.anomaly.th_detector
|
||||||
|
:members: ThresholdDetector
|
||||||
|
:show-inheritance:
|
||||||
64
docs/readthedocs/source/doc/PythonAPI/Chronos/automodels.rst
Normal file
|
|
@ -0,0 +1,64 @@
|
||||||
|
Auto Models
|
||||||
|
=====================
|
||||||
|
|
||||||
|
AutoTCN
|
||||||
|
-------------------------------------------
|
||||||
|
|
||||||
|
AutoTCN is a TCN forecasting model with Auto tuning.
|
||||||
|
Other API follows its base class(BasePytorchAutomodel).
|
||||||
|
|
||||||
|
.. automodule:: zoo.chronos.autots.model.auto_tcn
|
||||||
|
:members:
|
||||||
|
:undoc-members:
|
||||||
|
:show-inheritance:
|
||||||
|
|
||||||
|
|
||||||
|
AutoLSTM
|
||||||
|
----------------------------------------
|
||||||
|
|
||||||
|
AutoLSTM is an LSTM forecasting model with Auto tuning.
|
||||||
|
Other API follows its base class(BasePytorchAutomodel).
|
||||||
|
|
||||||
|
.. automodule:: zoo.chronos.autots.model.auto_lstm
|
||||||
|
:members:
|
||||||
|
:undoc-members:
|
||||||
|
:show-inheritance:
|
||||||
|
|
||||||
|
AutoSeq2Seq
|
||||||
|
----------------------------------------
|
||||||
|
|
||||||
|
AutoSeq2Seq is an Seq2Seq forecasting model with Auto tuning.
|
||||||
|
Other API follows its base class(BasePytorchAutomodel).
|
||||||
|
|
||||||
|
.. automodule:: zoo.chronos.autots.model.auto_seq2seq
|
||||||
|
:members:
|
||||||
|
:undoc-members:
|
||||||
|
:show-inheritance:
|
||||||
|
|
||||||
|
AutoARIMA
|
||||||
|
----------------------------------------
|
||||||
|
|
||||||
|
AutoARIMA is an ARIMA forecasting model with Auto tuning.
|
||||||
|
|
||||||
|
.. automodule:: zoo.chronos.autots.model.auto_arima
|
||||||
|
:members:
|
||||||
|
:undoc-members:
|
||||||
|
:show-inheritance:
|
||||||
|
|
||||||
|
AutoProphet
|
||||||
|
----------------------------------------
|
||||||
|
|
||||||
|
AutoProphet is a Prophet forecasting model with Auto tuning.
|
||||||
|
|
||||||
|
.. automodule:: zoo.chronos.autots.model.auto_prophet
|
||||||
|
:members:
|
||||||
|
:undoc-members:
|
||||||
|
:show-inheritance:
|
||||||
|
|
||||||
|
BasePytorchAutomodel
|
||||||
|
------------------------------------------------------------
|
||||||
|
AutoLSTM, AutoSeq2Seq and AutoTCN all follow the same API as stated below.
|
||||||
|
|
||||||
|
.. autoclass:: zoo.chronos.autots.model.base_automodel.BasePytorchAutomodel
|
||||||
|
:members:
|
||||||
|
:show-inheritance:
|
||||||
86
docs/readthedocs/source/doc/PythonAPI/Chronos/autots.rst
Normal file
|
|
@ -0,0 +1,86 @@
|
||||||
|
AutoTS (deprecated)
|
||||||
|
=====================
|
||||||
|
|
||||||
|
.. warning::
|
||||||
|
The API in this page will be deprecated soon. Please refer to our new AutoTS API.
|
||||||
|
|
||||||
|
AutoTSTrainer
|
||||||
|
----------------------------------------
|
||||||
|
|
||||||
|
AutoTSTrainer trains a time series pipeline (including data processing, feature engineering, and model) with AutoML.
|
||||||
|
|
||||||
|
.. autoclass:: zoo.chronos.autots.deprecated.forecast.AutoTSTrainer
|
||||||
|
:members:
|
||||||
|
:show-inheritance:
|
||||||
|
|
||||||
|
|
||||||
|
TSPipeline
|
||||||
|
----------------------------------------
|
||||||
|
|
||||||
|
A pipeline for time series forecasting.
|
||||||
|
|
||||||
|
.. autoclass:: zoo.chronos.autots.deprecated.forecast.TSPipeline
|
||||||
|
:members:
|
||||||
|
:show-inheritance:
|
||||||
|
|
||||||
|
|
||||||
|
Recipe
|
||||||
|
----------------------------------------
|
||||||
|
|
||||||
|
Recipe is used for search configuration for AutoTSTrainer.
|
||||||
|
|
||||||
|
.. autoclass:: zoo.chronos.autots.deprecated.config.recipe.SmokeRecipe
|
||||||
|
:members:
|
||||||
|
:show-inheritance:
|
||||||
|
|
||||||
|
.. autoclass:: zoo.chronos.autots.deprecated.config.recipe.MTNetSmokeRecipe
|
||||||
|
:members:
|
||||||
|
:show-inheritance:
|
||||||
|
|
||||||
|
.. autoclass:: zoo.chronos.autots.deprecated.config.recipe.TCNSmokeRecipe
|
||||||
|
:members:
|
||||||
|
:show-inheritance:
|
||||||
|
|
||||||
|
.. autoclass:: zoo.chronos.autots.deprecated.config.recipe.PastSeqParamHandler
|
||||||
|
:members:
|
||||||
|
:show-inheritance:
|
||||||
|
|
||||||
|
.. autoclass:: zoo.chronos.autots.deprecated.config.recipe.GridRandomRecipe
|
||||||
|
:members:
|
||||||
|
:show-inheritance:
|
||||||
|
|
||||||
|
.. autoclass:: zoo.chronos.autots.deprecated.config.recipe.LSTMSeq2SeqRandomRecipe
|
||||||
|
:members:
|
||||||
|
:show-inheritance:
|
||||||
|
|
||||||
|
.. autoclass:: zoo.chronos.autots.deprecated.config.recipe.LSTMGridRandomRecipe
|
||||||
|
:members:
|
||||||
|
:show-inheritance:
|
||||||
|
|
||||||
|
.. autoclass:: zoo.chronos.autots.deprecated.config.recipe.Seq2SeqRandomRecipe
|
||||||
|
:members:
|
||||||
|
:show-inheritance:
|
||||||
|
|
||||||
|
.. autoclass:: zoo.chronos.autots.deprecated.config.recipe.MTNetGridRandomRecipe
|
||||||
|
:members:
|
||||||
|
:show-inheritance:
|
||||||
|
|
||||||
|
.. autoclass:: zoo.chronos.autots.deprecated.config.recipe.TCNGridRandomRecipe
|
||||||
|
:members:
|
||||||
|
:show-inheritance:
|
||||||
|
|
||||||
|
.. autoclass:: zoo.chronos.autots.deprecated.config.recipe.RandomRecipe
|
||||||
|
:members:
|
||||||
|
:show-inheritance:
|
||||||
|
|
||||||
|
.. autoclass:: zoo.chronos.autots.deprecated.config.recipe.BayesRecipe
|
||||||
|
:members:
|
||||||
|
:show-inheritance:
|
||||||
|
|
||||||
|
.. autoclass:: zoo.chronos.autots.deprecated.config.recipe.XgbRegressorGridRandomRecipe
|
||||||
|
:members:
|
||||||
|
:show-inheritance:
|
||||||
|
|
||||||
|
.. autoclass:: zoo.chronos.autots.deprecated.config.recipe.XgbRegressorSkOptRecipe
|
||||||
|
:members:
|
||||||
|
:show-inheritance:
|
||||||
|
|
@ -0,0 +1,25 @@
|
||||||
|
AutoTS
|
||||||
|
=====================
|
||||||
|
|
||||||
|
AutoTSEstimator
|
||||||
|
-------------------------------------------
|
||||||
|
|
||||||
|
Automated TimeSeries Estimator for time series forecasting task.
|
||||||
|
AutoTSEstimator will replace AutoTSTrainer in later version.
|
||||||
|
|
||||||
|
.. automodule:: zoo.chronos.autots.autotsestimator
|
||||||
|
:members:
|
||||||
|
:undoc-members:
|
||||||
|
:show-inheritance:
|
||||||
|
|
||||||
|
|
||||||
|
TSPipeline
|
||||||
|
-------------------------------------------
|
||||||
|
|
||||||
|
TSPipeline is an E2E solution for time series forecasting task.
|
||||||
|
AutoTSEstimator will replace original TSPipeline returned by AutoTSTrainer in later version.
|
||||||
|
|
||||||
|
.. automodule:: zoo.chronos.autots.tspipeline
|
||||||
|
:members:
|
||||||
|
:undoc-members:
|
||||||
|
:show-inheritance:
|
||||||
118
docs/readthedocs/source/doc/PythonAPI/Chronos/forecasters.rst
Normal file
|
|
@ -0,0 +1,118 @@
|
||||||
|
Forecasters
|
||||||
|
=====================
|
||||||
|
|
||||||
|
LSTMForecaster
|
||||||
|
----------------------------------------
|
||||||
|
|
||||||
|
Please refer to BasePytorchForecaster for other methods other than initialization.
|
||||||
|
|
||||||
|
Long short-term memory(LSTM) is a special type of recurrent neural network(RNN). We implement the basic version of LSTM - VanillaLSTM for this forecaster for time-series forecasting task. It has two LSTM layers, two dropout layer and a dense layer.
|
||||||
|
|
||||||
|
For the detailed algorithm description, please refer to `here <https://github.com/intel-analytics/analytics-zoo/blob/master/docs/docs/Chronos/Algorithm/LSTMAlgorithm.md>`__.
|
||||||
|
|
||||||
|
.. automodule:: zoo.chronos.forecaster.lstm_forecaster
|
||||||
|
:members:
|
||||||
|
:undoc-members:
|
||||||
|
:show-inheritance:
|
||||||
|
|
||||||
|
|
||||||
|
Seq2SeqForecaster
|
||||||
|
-------------------------------------------
|
||||||
|
|
||||||
|
Please refer to BasePytorchForecaster for other methods other than initialization.
|
||||||
|
|
||||||
|
Seq2SeqForecaster wraps a sequence to sequence model based on LSTM, and is suitable for multivariant & multistep time series forecasting.
|
||||||
|
|
||||||
|
.. automodule:: zoo.chronos.forecaster.seq2seq_forecaster
|
||||||
|
:members:
|
||||||
|
:undoc-members:
|
||||||
|
:show-inheritance:
|
||||||
|
|
||||||
|
|
||||||
|
TCNForecaster
|
||||||
|
----------------------------------------
|
||||||
|
|
||||||
|
Please refer to BasePytorchForecaster for other methods other than initialization.
|
||||||
|
|
||||||
|
Temporal Convolutional Networks (TCN) is a neural network that use convolutional architecture rather than recurrent networks. It supports multi-step and multi-variant cases. Causal Convolutions enables large scale parallel computing which makes TCN has less inference time than RNN based model such as LSTM.
|
||||||
|
|
||||||
|
.. automodule:: zoo.chronos.forecaster.tcn_forecaster
|
||||||
|
:members:
|
||||||
|
:undoc-members:
|
||||||
|
:show-inheritance:
|
||||||
|
|
||||||
|
|
||||||
|
TCMFForecaster
|
||||||
|
----------------------------------------
|
||||||
|
|
||||||
|
Analytics Zoo Chronos TCMFForecaster provides an efficient way to forecast high dimensional time series.
|
||||||
|
|
||||||
|
TCMFForecaster is based on DeepGLO algorithm, which is a deep forecasting model which thinks globally and acts locally.
|
||||||
|
You can refer to `the deepglo paper <https://arxiv.org/abs/1905.03806>`__ for more details.
|
||||||
|
|
||||||
|
TCMFForecaster supports distributed training and inference. It is based on Orca PyTorch Estimator, which is an estimator to do PyTorch training/evaluation/prediction on Spark in a distributed fashion. Also you can choose to enable distributed training and inference or not.
|
||||||
|
|
||||||
|
**Remarks**:
|
||||||
|
|
||||||
|
* You can refer to `TCMFForecaster installation <https://github.com/intel-analytics/analytics-zoo/blob/master/docs/docs/Chronos/tutorials/TCMFForecaster.md/#step-0-prepare-environment>`__ to install required packages.
|
||||||
|
* Your operating system (OS) is required to be one of the following 64-bit systems: **Ubuntu 16.04 or later** and **macOS 10.12.6 or later**.
|
||||||
|
|
||||||
|
.. automodule:: zoo.chronos.forecaster.tcmf_forecaster
|
||||||
|
:members:
|
||||||
|
:undoc-members:
|
||||||
|
:show-inheritance:
|
||||||
|
|
||||||
|
|
||||||
|
MTNetForecaster
|
||||||
|
----------------------------------------
|
||||||
|
|
||||||
|
MTNet is a memory-network based solution for multivariate time-series forecasting. In a specific task of multivariate time-series forecasting, we have several variables observed in time series and we want to forecast some or all of the variables' value in a future time stamp.
|
||||||
|
|
||||||
|
MTNet is proposed by paper `A Memory-Network Based Solution for Multivariate Time-Series Forecasting <https://arxiv.org/abs/1809.02105>`__. MTNetForecaster is derived from tfpark.KerasMode, and can use all methods of KerasModel. Refer to `tfpark.KerasModel API Doc <https://github.com/intel-analytics/analytics-zoo/blob/master/docs/docs/APIGuide/TFPark/model.md>`__ for details.
|
||||||
|
|
||||||
|
For the detailed algorithm description, please refer to `here <https://github.com/intel-analytics/analytics-zoo/blob/master/docs/docs/Chronos/Algorithm/MTNetAlgorithm.md>`__.
|
||||||
|
|
||||||
|
.. automodule:: zoo.chronos.forecaster.mtnet_forecaster
|
||||||
|
:members:
|
||||||
|
:undoc-members:
|
||||||
|
:show-inheritance:
|
||||||
|
|
||||||
|
|
||||||
|
ARIMAForecaster
|
||||||
|
----------------------------------------
|
||||||
|
|
||||||
|
AutoRegressive Integrated Moving Average (ARIMA) is a class of statistical models for analyzing and forecasting time series data. It consists of 3 components: AR (AutoRegressive), I (Integrated) and MA (Moving Average). In ARIMAForecaster we use the SARIMA model (Seasonal ARIMA), which is an extension of ARIMA that additionally supports the direct modeling of the seasonal component of the time series.
|
||||||
|
|
||||||
|
.. automodule:: zoo.chronos.forecaster.arima_forecaster
|
||||||
|
:members:
|
||||||
|
:undoc-members:
|
||||||
|
:show-inheritance:
|
||||||
|
|
||||||
|
|
||||||
|
ProphetForecaster
|
||||||
|
----------------------------------------
|
||||||
|
|
||||||
|
Prophet is a procedure for forecasting time series data based on an additive model where non-linear trends are fit with yearly, weekly, and daily seasonality, plus holiday effects. It works best with time series that have strong seasonal effects and several seasons of historical data. Prophet is robust to missing data and shifts in the trend, and typically handles outliers well.
|
||||||
|
|
||||||
|
For the detailed algorithm description, please refer to `here <https://github.com/facebook/prophet>`__.
|
||||||
|
|
||||||
|
.. automodule:: zoo.chronos.forecaster.prophet_forecaster
|
||||||
|
:members:
|
||||||
|
:undoc-members:
|
||||||
|
:show-inheritance:
|
||||||
|
|
||||||
|
|
||||||
|
chronos.forecast.tfpark_forecaster
|
||||||
|
----------------------------------------
|
||||||
|
|
||||||
|
.. automodule:: zoo.chronos.forecaster.tfpark_forecaster
|
||||||
|
:members:
|
||||||
|
:undoc-members:
|
||||||
|
:show-inheritance:
|
||||||
|
|
||||||
|
chronos.forecast.base_forecaster.BasePytorchForecaster
|
||||||
|
----------------------------------------
|
||||||
|
|
||||||
|
.. autoclass:: zoo.chronos.forecaster.base_forecaster.BasePytorchForecaster
|
||||||
|
:members:
|
||||||
|
:show-inheritance:
|
||||||
17
docs/readthedocs/source/doc/PythonAPI/Chronos/index.rst
Normal file
|
|
@ -0,0 +1,17 @@
|
||||||
|
Chronos API
|
||||||
|
==================
|
||||||
|
|
||||||
|
.. toctree::
|
||||||
|
:maxdepth: 2
|
||||||
|
|
||||||
|
autotsestimator.rst
|
||||||
|
automodels.rst
|
||||||
|
forecasters.rst
|
||||||
|
anomaly_detectors.rst
|
||||||
|
tsdataset.rst
|
||||||
|
simulator.rst
|
||||||
|
|
||||||
|
.. toctree::
|
||||||
|
:maxdepth: 1
|
||||||
|
|
||||||
|
autots.rst
|
||||||
10
docs/readthedocs/source/doc/PythonAPI/Chronos/simulator.rst
Normal file
|
|
@ -0,0 +1,10 @@
|
||||||
|
Simulator
|
||||||
|
====================================
|
||||||
|
|
||||||
|
DPGANSimulator
|
||||||
|
------------------------------------
|
||||||
|
|
||||||
|
.. automodule:: zoo.chronos.simulator.doppelganger_simulator
|
||||||
|
:members:
|
||||||
|
:undoc-members:
|
||||||
|
:show-inheritance:
|
||||||
24
docs/readthedocs/source/doc/PythonAPI/Chronos/tsdataset.rst
Normal file
|
|
@ -0,0 +1,24 @@
|
||||||
|
TSDataset
|
||||||
|
===========
|
||||||
|
|
||||||
|
chronos.data.tsdataset
|
||||||
|
----------------------------------------
|
||||||
|
|
||||||
|
Time series data is a special data formulation with specific operations. TSDataset is an abstract of time series dataset, which provides various data processing operations (e.g. impute, deduplicate, resample, scale/unscale, roll) and feature engineering methods (e.g. datetime feature, aggregation feature). Cascade call is supported for most of the methods.
|
||||||
|
TSDataset can be initialized from a pandas dataframe and be converted to a pandas dataframe or numpy ndarray.
|
||||||
|
|
||||||
|
.. automodule:: zoo.chronos.data.tsdataset
|
||||||
|
:members:
|
||||||
|
:undoc-members:
|
||||||
|
:show-inheritance:
|
||||||
|
|
||||||
|
chronos.data.experimental.xshards_tsdataset
|
||||||
|
----------------------------------------
|
||||||
|
|
||||||
|
Time series data is a special data formulation with specific operations. XShardsTSDataset is an abstract of time series dataset, which provides various data processing operations (e.g. impute, deduplicate, resample, scale/unscale, roll) and feature engineering methods (e.g. datetime feature, aggregation feature). Cascade call is supported for most of the methods.
|
||||||
|
XShardsTSDataset can be initialized from xshards of pandas dataframe and be converted to xshards of numpy in an distributed and parallized fashion.
|
||||||
|
|
||||||
|
.. automodule:: zoo.chronos.data.experimental.xshards_tsdataset
|
||||||
|
:members:
|
||||||
|
:undoc-members:
|
||||||
|
:show-inheritance:
|
||||||
11
docs/readthedocs/source/doc/PythonAPI/Friesian/feature.rst
Normal file
|
|
@ -0,0 +1,11 @@
|
||||||
|
Friesian Feature API
|
||||||
|
=====================
|
||||||
|
|
||||||
|
friesian.feature.table
|
||||||
|
---------------------------
|
||||||
|
|
||||||
|
.. automodule:: zoo.friesian.feature.table
|
||||||
|
:members:
|
||||||
|
:undoc-members:
|
||||||
|
:show-inheritance:
|
||||||
|
|
||||||
45
docs/readthedocs/source/doc/PythonAPI/Orca/orca.rst
Normal file
|
|
@ -0,0 +1,45 @@
|
||||||
|
Orca API
|
||||||
|
=========
|
||||||
|
|
||||||
|
orca.learn.bigdl.estimator
|
||||||
|
---------------------------
|
||||||
|
|
||||||
|
.. automodule:: zoo.orca.learn.bigdl.estimator
|
||||||
|
:members:
|
||||||
|
:undoc-members:
|
||||||
|
:show-inheritance:
|
||||||
|
|
||||||
|
|
||||||
|
orca.learn.tf.estimator
|
||||||
|
------------------------
|
||||||
|
|
||||||
|
.. automodule:: zoo.orca.learn.tf.estimator
|
||||||
|
:members:
|
||||||
|
:undoc-members:
|
||||||
|
:show-inheritance:
|
||||||
|
|
||||||
|
|
||||||
|
orca.learn.tf2.estimator
|
||||||
|
-------------------------
|
||||||
|
|
||||||
|
.. automodule:: zoo.orca.learn.tf2.estimator
|
||||||
|
:members:
|
||||||
|
:undoc-members:
|
||||||
|
:show-inheritance:
|
||||||
|
|
||||||
|
orca.learn.pytorch.estimator
|
||||||
|
-----------------------------
|
||||||
|
|
||||||
|
.. automodule:: zoo.orca.learn.pytorch.estimator
|
||||||
|
:members:
|
||||||
|
:undoc-members:
|
||||||
|
:show-inheritance:
|
||||||
|
|
||||||
|
orca.learn.openvino.estimator
|
||||||
|
------------------------------
|
||||||
|
|
||||||
|
.. automodule:: zoo.orca.learn.openvino.estimator
|
||||||
|
:members:
|
||||||
|
:undoc-members:
|
||||||
|
:show-inheritance:
|
||||||
|
|
||||||
100
docs/readthedocs/source/doc/Ray/Overview/ray.md
Normal file
|
|
@ -0,0 +1,100 @@
|
||||||
|
# RayOnSpark User Guide
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
[Ray](https://github.com/ray-project/ray) is an open source distributed framework for emerging AI applications. With the _**RayOnSpark**_ support in Analytics Zoo, Users can seamlessly integrate Ray applications into the big data processing pipeline on the underlying Big Data cluster (such as [Hadoop/YARN](../../UserGuide/hadoop.md) or [K8s](../../UserGuide/k8s.md)).
|
||||||
|
|
||||||
|
_**Note:** Analytics Zoo has been tested on Ray 1.2.0 and you are highly recommended to use this tested version._
|
||||||
|
|
||||||
|
|
||||||
|
### **1. Install**
|
||||||
|
|
||||||
|
We recommend using [conda](https://docs.conda.io/projects/conda/en/latest/user-guide/install/) to prepare the Python environment.
|
||||||
|
When installing analytics-zoo with pip, you can specify the extras key `[ray]` to additionally install the additional dependencies essential for running Ray (i.e. `ray==1.2.0`, `psutil`, `aiohttp`, `setproctitle`):
|
||||||
|
|
||||||
|
```bash
|
||||||
|
conda create -n zoo python=3.7 # "zoo" is conda environment name, you can use any name you like.
|
||||||
|
conda activate zoo
|
||||||
|
|
||||||
|
pip install analytics-zoo[ray]
|
||||||
|
```
|
||||||
|
|
||||||
|
View [here](./python.html#install) for more installation instructions.
|
||||||
|
|
||||||
|
---
|
||||||
|
### **2. Initialize**
|
||||||
|
|
||||||
|
We recommend using `init_orca_context` to initiate and run Analytics Zoo on the underlying cluster. The Ray cluster would be launched as well by specifying `init_ray_on_spark=True`. For example, to launch Spark and Ray on standard Hadoop/YARN clusters in [YARN client mode](https://spark.apache.org/docs/latest/running-on-yarn.html#launching-spark-on-yarn):
|
||||||
|
|
||||||
|
```python
|
||||||
|
from zoo.orca import init_orca_context
|
||||||
|
|
||||||
|
sc = init_orca_context(cluster_mode="yarn-client", cores=4, memory="10g", num_nodes=2, init_ray_on_spark=True)
|
||||||
|
```
|
||||||
|
|
||||||
|
By default, the Ray cluster would be launched using Spark barrier execution mode, you can turn it off via the configurations of `OrcaContext`:
|
||||||
|
|
||||||
|
```python
|
||||||
|
from zoo.orca import OrcaContext
|
||||||
|
|
||||||
|
OrcaContext.barrier_mode = False
|
||||||
|
```
|
||||||
|
|
||||||
|
View [Orca Context](../../Orca/Overview/orca-context.md) for more details.
|
||||||
|
|
||||||
|
---
|
||||||
|
### **3. Run**
|
||||||
|
|
||||||
|
- After the initialization, you can directly run Ray applications on the underlying cluster. [Ray tasks](https://docs.ray.io/en/master/walkthrough.html#remote-functions-tasks) or [actors](https://docs.ray.io/en/master/actors.html) would be launched across the cluster. The following code shows a simple example:
|
||||||
|
|
||||||
|
```python
|
||||||
|
import ray
|
||||||
|
|
||||||
|
@ray.remote
|
||||||
|
class Counter(object):
|
||||||
|
def __init__(self):
|
||||||
|
self.n = 0
|
||||||
|
|
||||||
|
def increment(self):
|
||||||
|
self.n += 1
|
||||||
|
return self.n
|
||||||
|
|
||||||
|
|
||||||
|
counters = [Counter.remote() for i in range(5)]
|
||||||
|
print(ray.get([c.increment.remote() for c in counters]))
|
||||||
|
```
|
||||||
|
|
||||||
|
- You can retrieve the information of the Ray cluster via [`OrcaContext`](../Orca/Overview/orca-context.md):
|
||||||
|
|
||||||
|
```python
|
||||||
|
from zoo.orca import OrcaContext
|
||||||
|
|
||||||
|
ray_ctx = OrcaContext.get_ray_context()
|
||||||
|
address_info = ray_ctx.address_info # The dictionary information of the ray cluster, including node_ip_address, object_store_address, webui_url, etc.
|
||||||
|
redis_address = ray_ctx.redis_address # The redis address of the ray cluster.
|
||||||
|
```
|
||||||
|
|
||||||
|
- You should call `stop_orca_context()` when your program finishes:
|
||||||
|
|
||||||
|
```python
|
||||||
|
from zoo.orca import stop_orca_context
|
||||||
|
|
||||||
|
stop_orca_context()
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
### **4. Known Issue**
|
||||||
|
If you encounter the following error when launching Ray on the underlying cluster, especially when you are using a [Spark standalone](https://spark.apache.org/docs/latest/spark-standalone.html) cluster:
|
||||||
|
|
||||||
|
```
|
||||||
|
This system supports the C.UTF-8 locale which is recommended. You might be able to resolve your issue by exporting the following environment variables:
|
||||||
|
|
||||||
|
export LC_ALL=C.UTF-8
|
||||||
|
export LANG=C.UTF-8
|
||||||
|
```
|
||||||
|
|
||||||
|
Add the environment variables when calling `init_orca_context` would resolve the issue:
|
||||||
|
|
||||||
|
```python
|
||||||
|
sc = init_orca_context(cluster_mode, init_ray_on_spark=True, env={"LANG": "C.UTF-8", "LC_ALL": "C.UTF-8"})
|
||||||
|
```
|
||||||
131
docs/readthedocs/source/doc/Ray/QuickStart/ray-quickstart.md
Normal file
|
|
@ -0,0 +1,131 @@
|
||||||
|
# RayOnSpark Quickstart
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
[Run in Google Colab](https://colab.research.google.com/github/intel-analytics/analytics-zoo/blob/master/docs/docs/colab-notebook/ray/quickstart/ray_parameter_server.ipynb) [View source on GitHub](https://github.com/intel-analytics/analytics-zoo/blob/master/docs/docs/colab-notebook/ray/quickstart/ray_parameter_server.ipynb)
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
**In this guide, we will describe how to use RayOnSpark to directly run Ray programs on Big Data clusters in 2 simple steps.**
|
||||||
|
|
||||||
|
### **Step 0: Prepare Environment**
|
||||||
|
|
||||||
|
We recommend using [conda](https://docs.conda.io/projects/conda/en/latest/user-guide/install/) to prepare the environment. Please refer to the [install guide](../../UserGuide/python.md) for more details.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
conda create -n zoo python=3.7 # "zoo" is conda environment name, you can use any name you like.
|
||||||
|
conda activate zoo
|
||||||
|
pip install analytics-zoo[ray]
|
||||||
|
```
|
||||||
|
|
||||||
|
### **Step 1: Initialize**
|
||||||
|
|
||||||
|
We recommend using `init_orca_context` to initiate and run Analytics Zoo on the underlying cluster. The Ray cluster would be launched automatically by specifying `init_ray_on_spark=True`.
|
||||||
|
|
||||||
|
```python
|
||||||
|
from zoo.orca import init_orca_context
|
||||||
|
|
||||||
|
if cluster_mode == "local": # For local machine
|
||||||
|
sc = init_orca_context(cluster_mode="local", cores=4, memory="10g", init_ray_on_spark=True)
|
||||||
|
elif cluster_mode == "k8s": # For K8s cluster
|
||||||
|
sc = init_orca_context(cluster_mode="k8s", num_nodes=2, cores=2, memory="10g", driver_memory="10g", driver_cores=1, init_ray_on_spark=True)
|
||||||
|
elif cluster_mode == "yarn": # For Hadoop/YARN cluster
|
||||||
|
sc = init_orca_context(cluster_mode="yarn", num_nodes=2, cores=2, memory="10g", driver_memory="10g", driver_cores=1, init_ray_on_spark=True)
|
||||||
|
```
|
||||||
|
|
||||||
|
This is the only place where you need to specify local or distributed mode.
|
||||||
|
|
||||||
|
By default, the Ray cluster would be launched using Spark barrier execution mode, you can turn it off via the configurations of `OrcaContext`:
|
||||||
|
|
||||||
|
```python
|
||||||
|
from zoo.orca import OrcaContext
|
||||||
|
|
||||||
|
OrcaContext.barrier_mode = False
|
||||||
|
```
|
||||||
|
|
||||||
|
View [Orca Context](./../../Orca/Overview/orca-context.md) for more details.
|
||||||
|
|
||||||
|
**Note:** You should `export HADOOP_CONF_DIR=/path/to/hadoop/conf/dir` when running on Hadoop YARN cluster. View [Hadoop User Guide](./../../UserGuide/hadoop.md) for more details.
|
||||||
|
|
||||||
|
You can retrieve the information of the Ray cluster via `OrcaContext`:
|
||||||
|
|
||||||
|
```python
|
||||||
|
from zoo.orca import OrcaContext
|
||||||
|
|
||||||
|
ray_ctx = OrcaContext.get_ray_context()
|
||||||
|
address_info = ray_ctx.address_info # The dictionary information of the ray cluster, including node_ip_address, object_store_address, webui_url, etc.
|
||||||
|
redis_address = ray_ctx.redis_address # The redis address of the ray cluster.
|
||||||
|
```
|
||||||
|
|
||||||
|
### **Step 2: Run Ray Applications**
|
||||||
|
|
||||||
|
After the initialization, you can directly write Ray code inline with your Spark code, and run Ray programs on the underlying existing Big Data clusters. Ray [tasks](https://docs.ray.io/en/master/walkthrough.html#remote-functions-tasks) and [actors](https://docs.ray.io/en/master/actors.html) would be launched across the cluster.
|
||||||
|
|
||||||
|
The following example uses actor handles to implement a parameter server example for distributed asynchronous stochastic gradient descent. This is a simple Ray example for demonstration purpose. Similarly, you can write other Ray applications as you wish.
|
||||||
|
|
||||||
|
A parameter server is simply an object that stores the parameters (or "weights") of a machine learning model (this could be a neural network, a linear model, or something else). It exposes two methods: one for getting the parameters and one for updating the parameters.
|
||||||
|
|
||||||
|
By adding the `@ray.remote` decorator, the `ParameterServer` class becomes a Ray actor.
|
||||||
|
|
||||||
|
```python
|
||||||
|
import ray
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
dim = 10
|
||||||
|
@ray.remote
|
||||||
|
class ParameterServer(object):
|
||||||
|
def __init__(self, dim):
|
||||||
|
self.parameters = np.zeros(dim)
|
||||||
|
|
||||||
|
def get_parameters(self):
|
||||||
|
return self.parameters
|
||||||
|
|
||||||
|
def update_parameters(self, update):
|
||||||
|
self.parameters += update
|
||||||
|
|
||||||
|
ps = ParameterServer.remote(dim)
|
||||||
|
```
|
||||||
|
|
||||||
|
In a typical machine learning training application, worker processes will run in an infinite loop that does the following:
|
||||||
|
|
||||||
|
1. Get the latest parameters from the parameter server.
|
||||||
|
2. Compute an update to the parameters (using the current parameters and some data).
|
||||||
|
3. Send the update to the parameter server.
|
||||||
|
|
||||||
|
By adding the `@ray.remote` decorator, the `worker` function becomes a Ray remote function.
|
||||||
|
|
||||||
|
```python
|
||||||
|
import time
|
||||||
|
|
||||||
|
@ray.remote
|
||||||
|
def worker(ps, dim, num_iters):
|
||||||
|
for _ in range(num_iters):
|
||||||
|
# Get the latest parameters.
|
||||||
|
parameters = ray.get(ps.get_parameters.remote())
|
||||||
|
# Compute an update.
|
||||||
|
update = 1e-3 * parameters + np.ones(dim)
|
||||||
|
# Update the parameters.
|
||||||
|
ps.update_parameters.remote(update)
|
||||||
|
# Sleep a little to simulate a real workload.
|
||||||
|
time.sleep(0.5)
|
||||||
|
|
||||||
|
# Test that worker is implemented correctly. You do not need to change this line.
|
||||||
|
ray.get(worker.remote(ps, dim, 1))
|
||||||
|
|
||||||
|
# Start two workers.
|
||||||
|
worker_results = [worker.remote(ps, dim, 100) for _ in range(2)]
|
||||||
|
```
|
||||||
|
|
||||||
|
As the worker tasks are executing, you can query the parameter server from the driver and see the parameters changing in the background.
|
||||||
|
|
||||||
|
```
|
||||||
|
print(ray.get(ps.get_parameters.remote()))
|
||||||
|
```
|
||||||
|
|
||||||
|
**Note:** You should call `stop_orca_context()` when your program finishes:
|
||||||
|
|
||||||
|
```python
|
||||||
|
from zoo.orca import stop_orca_context
|
||||||
|
|
||||||
|
stop_orca_context()
|
||||||
|
```
|
||||||
2833
docs/readthedocs/source/doc/UseCase/keras-api.md
Normal file
428
docs/readthedocs/source/doc/UseCase/nnframes.md
Normal file
|
|
@ -0,0 +1,428 @@
|
||||||
|
# Use Spark ML Pipeline for BigDL
|
||||||
|
|
||||||
|
## 1. NNFrames Overview
|
||||||
|
|
||||||
|
`NNFrames` in Analytics Zoo provides to provide Spark DataFrame and and ML Pipeline support for [BigDL](https://github.com/intel-analytics/bigdl). It provides both Python and Scala interfaces, and is compatible with both Spark 2.x and Spark 3.x.
|
||||||
|
|
||||||
|
|
||||||
|
**Highlights**
|
||||||
|
|
||||||
|
- Easy-to-use DataFrame(DataSet)-based API for training, prediction and evaluation with deep learning models.
|
||||||
|
|
||||||
|
- Effortless integration with Spark ML pipeline and compatibility with other feature transformers and algorithms in Spark ML.
|
||||||
|
|
||||||
|
- In a few lines, run large scale inference or transfer learning from pre-trained models of Keras, Tensorflow, PyTorch or BigDL.
|
||||||
|
|
||||||
|
- Rich toolset for feature extraction and processing, including image, audio and texts.
|
||||||
|
|
||||||
|
|
||||||
|
**Examples**
|
||||||
|
|
||||||
|
The examples are included in the Analytics Zoo source code.
|
||||||
|
|
||||||
|
- image classification: model inference using pre-trained Inception v1 model. (See [Scala version](https://github.com/intel-analytics/analytics-zoo/tree/master/zoo/src/main/scala/com/intel/analytics/zoo/examples/nnframes/imageInference) and [Python version](https://github.com/intel-analytics/analytics-zoo/tree/master/pyzoo/zoo/examples/nnframes/imageInference))
|
||||||
|
- image classification: transfer learning from pre-trained Inception v1 model. (See [Scala version](https://github.com/intel-analytics/analytics-zoo/tree/master/zoo/src/main/scala/com/intel/analytics/zoo/examples/nnframes/imageTransferLearning) and [Python version](https://github.com/intel-analytics/analytics-zoo/tree/master/pyzoo/zoo/examples/nnframes/imageTransferLearning))
|
||||||
|
|
||||||
|
## 2. Primary APIs
|
||||||
|
|
||||||
|
- **NNEstimator and NNModel**
|
||||||
|
|
||||||
|
Analytics Zoo provides `NNEstimator` for model training with Spark DataFrame, which provides high level API for training a BigDL Model with the Apache Spark [Estimator](https://spark.apache.org/docs/2.1.1/ml-pipeline.html#estimators) and [Transfomer](https://spark.apache.org/docs/2.1.1/ml-pipeline.html#transformers) pattern, thus users can conveniently fit Analytics Zoo into a ML pipeline. The fit result of `NNEstimator` is a NNModel, which is a Spark ML Transformer.
|
||||||
|
|
||||||
|
- **NNClassifier and NNClassifierModel**
|
||||||
|
|
||||||
|
`NNClassifier` and `NNClassifierModel`extends `NNEstimator` and `NNModel` and focus on classification tasks, where both label column and prediction column are of Double type.
|
||||||
|
|
||||||
|
- **NNImageReader**
|
||||||
|
|
||||||
|
NNImageReader loads image into Spark DataFrame.
|
||||||
|
|
||||||
|
---
|
||||||
|
### 2.1 NNEstimator
|
||||||
|
|
||||||
|
**Scala:**
|
||||||
|
|
||||||
|
```scala
|
||||||
|
val estimator = NNEstimator(model, criterion)
|
||||||
|
```
|
||||||
|
|
||||||
|
**Python:**
|
||||||
|
|
||||||
|
```python
|
||||||
|
estimator = NNEstimator(model, criterion)
|
||||||
|
```
|
||||||
|
|
||||||
|
`NNEstimator` extends `org.apache.spark.ml.Estimator` and supports training a BigDL model with Spark DataFrame data. It can be integrated into a standard Spark ML Pipeline
|
||||||
|
to allow users to combine the components of BigDL and Spark MLlib.
|
||||||
|
|
||||||
|
`NNEstimator` supports different feature and label data types through `Preprocessing`. During fit (training), NNEstimator will extract feature and label data from input DataFrame and use the `Preprocessing` to convert data for the model, typically converts the feature and label to Tensors or converts the (feature, option[Label]) tuple to a BigDL `Sample`.
|
||||||
|
|
||||||
|
Each`Preprocessing` conducts a data conversion step in the preprocessing phase, multiple `Preprocessing` can be combined into a `ChainedPreprocessing`. Some pre-defined
|
||||||
|
`Preprocessing` for popular data types like Image, Array or Vector are provided in package `com.intel.analytics.zoo.feature`, while user can also develop customized `Preprocessing`.
|
||||||
|
|
||||||
|
NNEstimator and NNClassifier also supports setting the caching level for the training data. Options are "DRAM", "PMEM" or "DISK_AND_DRAM". If DISK_AND_DRAM(numSlice) is used, only 1/numSlice data will be loaded into memory during training time. By default, DRAM mode is used and all data are cached in memory.
|
||||||
|
|
||||||
|
By default, `SeqToTensor` is used to convert an array or Vector to a 1-dimension Tensor. Using the `Preprocessing` allows `NNEstimator` to cache only the raw data and decrease the memory consumption during feature conversion and training, it also enables the model to digest extra data types that DataFrame does not support currently.
|
||||||
|
|
||||||
|
More concrete examples are available in package `com.intel.analytics.zoo.examples.nnframes`
|
||||||
|
|
||||||
|
`NNEstimator` can be created with various parameters for different scenarios.
|
||||||
|
|
||||||
|
- `NNEstimator(model, criterion)`
|
||||||
|
|
||||||
|
Takes only model and criterion and use `SeqToTensor` as feature and label `Preprocessing`. `NNEstimator` will extract the data from feature and label columns (only Scalar, Array[_] or Vector data type are supported) and convert each feature/label to 1-dimension Tensor. The tensors will be combined into BigDL `Sample` and send to model for training.
|
||||||
|
|
||||||
|
- `NNEstimator(model, criterion, featureSize: Array[Int], labelSize: Array[Int])`
|
||||||
|
|
||||||
|
Takes model, criterion, featureSize(Array of Int) and labelSize(Array of Int). `NNEstimator` will extract the data from feature and label columns (only Scalar, Array[_] or Vector data type are supported) and convert each feature/label to Tensor according to the specified Tensor size.
|
||||||
|
|
||||||
|
- `NNEstimator(model, criterion, featureSize: Array[Array[Int]], labelSize: Array[Int])`
|
||||||
|
|
||||||
|
This is the interface for multi-input model. It takes model, criterion, featureSize(Array of Int Array) and labelSize(Array of Int). `NNEstimator` will extract the data from feature and label columns (only Scalar, Array[_] or Vector data type are supported) and convert each feature/label to Tensor according to the specified Tensor size.
|
||||||
|
|
||||||
|
- `NNEstimator(model, criterion, featurePreprocessing: Preprocessing[F, Tensor[T]],
|
||||||
|
labelPreprocessing: Preprocessing[F, Tensor[T]])`
|
||||||
|
|
||||||
|
Takes model, criterion, featurePreprocessing and labelPreprocessing. `NNEstimator` will extract the data from feature and label columns and convert each feature/label to Tensor with the featurePreprocessing and labelPreprocessing. This constructor provides more flexibility in supporting extra data types.
|
||||||
|
|
||||||
|
Meanwhile, for advanced use cases (e.g. model with multiple input tensor), `NNEstimator` supports: `setSamplePreprocessing(value: Preprocessing[(Any, Option[Any]), Sample[T]])` to directly compose Sample according to user-specified Preprocessing.
|
||||||
|
|
||||||
|
|
||||||
|
**Scala Example:**
|
||||||
|
```scala
|
||||||
|
import com.intel.analytics.bigdl.nn._
|
||||||
|
import com.intel.analytics.zoo.pipeline.nnframes.NNEstimator
|
||||||
|
import com.intel.analytics.bigdl.tensor.TensorNumericMath.TensorNumeric.NumericFloat
|
||||||
|
|
||||||
|
val model = Sequential().add(Linear(2, 2))
|
||||||
|
val criterion = MSECriterion()
|
||||||
|
val estimator = NNEstimator(model, criterion)
|
||||||
|
.setLearningRate(0.2)
|
||||||
|
.setMaxEpoch(40)
|
||||||
|
val data = sc.parallelize(Seq(
|
||||||
|
(Array(2.0, 1.0), Array(1.0, 2.0)),
|
||||||
|
(Array(1.0, 2.0), Array(2.0, 1.0)),
|
||||||
|
(Array(2.0, 1.0), Array(1.0, 2.0)),
|
||||||
|
(Array(1.0, 2.0), Array(2.0, 1.0))))
|
||||||
|
val df = sqlContext.createDataFrame(data).toDF("features", "label")
|
||||||
|
val nnModel = estimator.fit(df)
|
||||||
|
nnModel.transform(df).show(false)
|
||||||
|
```
|
||||||
|
|
||||||
|
**Python Example:**
|
||||||
|
```python
|
||||||
|
from bigdl.nn.layer import *
|
||||||
|
from bigdl.nn.criterion import *
|
||||||
|
from bigdl.util.common import *
|
||||||
|
from zoo.pipeline.nnframes.nn_classifier import *
|
||||||
|
from zoo.feature.common import *
|
||||||
|
|
||||||
|
data = self.sc.parallelize([
|
||||||
|
((2.0, 1.0), (1.0, 2.0)),
|
||||||
|
((1.0, 2.0), (2.0, 1.0)),
|
||||||
|
((2.0, 1.0), (1.0, 2.0)),
|
||||||
|
((1.0, 2.0), (2.0, 1.0))])
|
||||||
|
|
||||||
|
schema = StructType([
|
||||||
|
StructField("features", ArrayType(DoubleType(), False), False),
|
||||||
|
StructField("label", ArrayType(DoubleType(), False), False)])
|
||||||
|
df = self.sqlContext.createDataFrame(data, schema)
|
||||||
|
model = Sequential().add(Linear(2, 2))
|
||||||
|
criterion = MSECriterion()
|
||||||
|
estimator = NNEstimator(model, criterion, SeqToTensor([2]), ArrayToTensor([2]))\
|
||||||
|
.setBatchSize(4).setLearningRate(0.2).setMaxEpoch(40) \
|
||||||
|
nnModel = estimator.fit(df)
|
||||||
|
res = nnModel.transform(df)
|
||||||
|
```
|
||||||
|
|
||||||
|
***Example with multi-inputs Model.***
|
||||||
|
This example trains a model with 3 inputs. And users can use VectorAssembler from Spark MLlib to combine different fields. With the specified sizes for each model input, NNEstiamtor and NNClassifer will split the input features data and send tensors to corresponding inputs.
|
||||||
|
|
||||||
|
```python
|
||||||
|
sparkConf = init_spark_conf().setAppName("testNNClassifer").setMaster('local[1]')
|
||||||
|
sc = init_nncontext(sparkConf)
|
||||||
|
spark = SparkSession\
|
||||||
|
.builder\
|
||||||
|
.getOrCreate()
|
||||||
|
|
||||||
|
df = spark.createDataFrame(
|
||||||
|
[(1, 35, 109.0, Vectors.dense([2.0, 5.0, 0.5, 0.5]), 1.0),
|
||||||
|
(2, 58, 2998.0, Vectors.dense([4.0, 10.0, 0.5, 0.5]), 2.0),
|
||||||
|
(3, 18, 123.0, Vectors.dense([3.0, 15.0, 0.5, 0.5]), 1.0)],
|
||||||
|
["user", "age", "income", "history", "label"])
|
||||||
|
|
||||||
|
assembler = VectorAssembler(
|
||||||
|
inputCols=["user", "age", "income", "history"],
|
||||||
|
outputCol="features")
|
||||||
|
|
||||||
|
df = assembler.transform(df)
|
||||||
|
|
||||||
|
x1 = ZLayer.Input(shape=(1,))
|
||||||
|
x2 = ZLayer.Input(shape=(2,))
|
||||||
|
x3 = ZLayer.Input(shape=(2, 2,))
|
||||||
|
|
||||||
|
user_embedding = ZLayer.Embedding(5, 10)(x1)
|
||||||
|
flatten = ZLayer.Flatten()(user_embedding)
|
||||||
|
dense1 = ZLayer.Dense(2)(x2)
|
||||||
|
gru = ZLayer.LSTM(4, input_shape=(2, 2))(x3)
|
||||||
|
|
||||||
|
merged = ZLayer.merge([flatten, dense1, gru], mode="concat")
|
||||||
|
zy = ZLayer.Dense(2)(merged)
|
||||||
|
|
||||||
|
zmodel = ZModel([x1, x2, x3], zy)
|
||||||
|
criterion = ZooClassNLLCriterion()
|
||||||
|
classifier = NNClassifier(zmodel, criterion, [[1], [2], [2, 2]]) \
|
||||||
|
.setOptimMethod(Adam()) \
|
||||||
|
.setLearningRate(0.1)\
|
||||||
|
.setBatchSize(2) \
|
||||||
|
.setMaxEpoch(10)
|
||||||
|
|
||||||
|
nnClassifierModel = classifier.fit(df)
|
||||||
|
print(nnClassifierModel.getBatchSize())
|
||||||
|
res = nnClassifierModel.transform(df).collect()
|
||||||
|
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### 2.2 NNModel
|
||||||
|
**Scala:**
|
||||||
|
```scala
|
||||||
|
val nnModel = NNModel(bigDLModel)
|
||||||
|
```
|
||||||
|
|
||||||
|
**Python:**
|
||||||
|
```python
|
||||||
|
nn_model = NNModel(bigDLModel)
|
||||||
|
```
|
||||||
|
|
||||||
|
`NNModel` extends Spark's ML
|
||||||
|
[Transformer](https://spark.apache.org/docs/2.1.1/ml-pipeline.html#transformers). User can invoke `fit` in `NNEstimator` to get a `NNModel`, or directly compose a `NNModel` from BigDLModel. It enables users to wrap a pre-trained BigDL Model into a NNModel, and use it as a transformer in your Spark ML pipeline to predict the results for `DataFrame (DataSet)`.
|
||||||
|
|
||||||
|
`NNModel` can be created with various parameters for different scenarios.
|
||||||
|
|
||||||
|
- `NNModel(model)`
|
||||||
|
|
||||||
|
Takes only model and use `SeqToTensor` as feature Preprocessing. `NNModel` will extract the data from feature column (only Scalar, Array[_] or Vector data type are supported) and convert each feature to 1-dimension Tensor. The tensors will be sent to model for inference.
|
||||||
|
|
||||||
|
- `NNModel(model, featureSize: Array[Int])`
|
||||||
|
|
||||||
|
Takes model and featureSize(Array of Int). `NNModel` will extract the data from feature column (only Scalar, Array[_] or Vector data type are supported) and convert each feature to Tensor according to the specified Tensor size. User can also set featureSize as Array[Array[Int]] for multi-inputs model.
|
||||||
|
|
||||||
|
- `NNModel(model, featurePreprocessing: Preprocessing[F, Tensor[T]])`
|
||||||
|
|
||||||
|
Takes model and featurePreprocessing. `NNModel` will extract the data from feature column and convert each feature to Tensor with the featurePreprocessing. This constructor provides more flexibility in supporting extra data types.
|
||||||
|
|
||||||
|
Meanwhile, for advanced use cases (e.g. model with multiple input tensor), `NNModel` supports: `setSamplePreprocessing(value: Preprocessing[Any, Sample[T]])`to directly compose Sample according to user-specified Preprocessing.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### 2.3 NNClassifier
|
||||||
|
**Scala:**
|
||||||
|
```scala
|
||||||
|
val classifer = NNClassifer(model, criterion)
|
||||||
|
```
|
||||||
|
|
||||||
|
**Python:**
|
||||||
|
```python
|
||||||
|
classifier = NNClassifer(model, criterion)
|
||||||
|
```
|
||||||
|
|
||||||
|
`NNClassifier` is a specialized `NNEstimator` that simplifies the data format for classification tasks where the label space is discrete. It only supports label column of
|
||||||
|
DoubleType, and the fitted `NNClassifierModel` will have the prediction column of DoubleType.
|
||||||
|
|
||||||
|
* `model` BigDL module to be optimized in the fit() method
|
||||||
|
* `criterion` the criterion used to compute the loss and the gradient
|
||||||
|
|
||||||
|
`NNClassifier` can be created with various parameters for different scenarios.
|
||||||
|
|
||||||
|
- `NNClassifier(model, criterion)`
|
||||||
|
|
||||||
|
Takes only model and criterion and use `SeqToTensor` as feature and label Preprocessing. `NNClassifier` will extract the data from feature and label columns (only Scalar, Array[_] or Vector data type are supported) and convert each feature/label to 1-dimension Tensor. The tensors will be combined into BigDL samples and send to model for training.
|
||||||
|
|
||||||
|
- `NNClassifier(model, criterion, featureSize: Array[Int])`
|
||||||
|
|
||||||
|
Takes model, criterion, featureSize(Array of Int). `NNClassifier` will extract the data from feature and label columns and convert each feature to Tensor according to the specified Tensor size. `ScalarToTensor` is used to convert the label column. User can also set featureSize as Array[Array[Int]] for multi-inputs model.
|
||||||
|
|
||||||
|
- `NNClassifier(model, criterion, featurePreprocessing: Preprocessing[F, Tensor[T]])`
|
||||||
|
|
||||||
|
Takes model, criterion and featurePreprocessing. `NNClassifier` will extract the data from feature and label columns and convert each feature to Tensor with the featurePreprocessing. This constructor provides more flexibility in supporting extra data types.
|
||||||
|
|
||||||
|
Meanwhile, for advanced use cases (e.g. model with multiple input tensor), `NNClassifier` supports `setSamplePreprocessing(value: Preprocessing[(Any, Option[Any]), Sample[T]])` to directly compose Sample with user-specified Preprocessing.
|
||||||
|
|
||||||
|
**Scala example:**
|
||||||
|
```scala
|
||||||
|
import com.intel.analytics.bigdl.nn._
|
||||||
|
import com.intel.analytics.zoo.pipeline.nnframes.NNClassifier
|
||||||
|
import com.intel.analytics.bigdl.tensor.TensorNumericMath.TensorNumeric.NumericFloat
|
||||||
|
|
||||||
|
val model = Sequential().add(Linear(2, 2))
|
||||||
|
val criterion = MSECriterion()
|
||||||
|
val estimator = NNClassifier(model, criterion)
|
||||||
|
.setLearningRate(0.2)
|
||||||
|
.setMaxEpoch(40)
|
||||||
|
val data = sc.parallelize(Seq(
|
||||||
|
(Array(0.0, 1.0), 1.0),
|
||||||
|
(Array(1.0, 0.0), 2.0),
|
||||||
|
(Array(0.0, 1.0), 1.0),
|
||||||
|
(Array(1.0, 0.0), 2.0)))
|
||||||
|
val df = sqlContext.createDataFrame(data).toDF("features", "label")
|
||||||
|
val dlModel = estimator.fit(df)
|
||||||
|
dlModel.transform(df).show(false)
|
||||||
|
```
|
||||||
|
|
||||||
|
**Python Example:**
|
||||||
|
|
||||||
|
```python
|
||||||
|
from bigdl.nn.layer import *
|
||||||
|
from bigdl.nn.criterion import *
|
||||||
|
from bigdl.util.common import *
|
||||||
|
from bigdl.dlframes.dl_classifier import *
|
||||||
|
from pyspark.sql.types import *
|
||||||
|
|
||||||
|
#Logistic Regression with BigDL layers and Analytics zoo NNClassifier
|
||||||
|
model = Sequential().add(Linear(2, 2)).add(LogSoftMax())
|
||||||
|
criterion = ZooClassNLLCriterion()
|
||||||
|
estimator = NNClassifier(model, criterion, [2]).setBatchSize(4).setMaxEpoch(10)
|
||||||
|
data = sc.parallelize([
|
||||||
|
((0.0, 1.0), [1.0]),
|
||||||
|
((1.0, 0.0), [2.0]),
|
||||||
|
((0.0, 1.0), [1.0]),
|
||||||
|
((1.0, 0.0), [2.0])])
|
||||||
|
|
||||||
|
schema = StructType([
|
||||||
|
StructField("features", ArrayType(DoubleType(), False), False),
|
||||||
|
StructField("label", ArrayType(DoubleType(), False), False)])
|
||||||
|
df = sqlContext.createDataFrame(data, schema)
|
||||||
|
dlModel = estimator.fit(df)
|
||||||
|
dlModel.transform(df).show(False)
|
||||||
|
```
|
||||||
|
|
||||||
|
### 2.4 NNClassifierModel ##
|
||||||
|
|
||||||
|
**Scala:**
|
||||||
|
```scala
|
||||||
|
val nnClassifierModel = NNClassifierModel(model, featureSize)
|
||||||
|
```
|
||||||
|
|
||||||
|
**Python:**
|
||||||
|
```python
|
||||||
|
nn_classifier_model = NNClassifierModel(model)
|
||||||
|
```
|
||||||
|
|
||||||
|
NNClassifierModel is a specialized `NNModel` for classification tasks. Both label and prediction column will have the datatype of Double.
|
||||||
|
|
||||||
|
`NNClassifierModel` can be created with various parameters for different scenarios.
|
||||||
|
|
||||||
|
- `NNClassifierModel(model)`
|
||||||
|
|
||||||
|
Takes only model and use `SeqToTensor` as feature Preprocessing. `NNClassifierModel` will extract the data from feature column (only Scalar, Array[_] or Vector data type are supported) and convert each feature to 1-dimension Tensor. The tensors will be sent to model for inference.
|
||||||
|
|
||||||
|
- `NNClassifierModel(model, featureSize: Array[Int])`
|
||||||
|
|
||||||
|
Takes model and featureSize(Array of Int). `NNClassifierModel` will extract the data from feature column (only Scalar, Array[_] or Vector data type are supported) and convert each feature to Tensor according to the specified Tensor size. User can also set featureSize as Array[Array[Int]] for multi-inputs model.
|
||||||
|
|
||||||
|
- `NNClassifierModel(model, featurePreprocessing: Preprocessing[F, Tensor[T]])`
|
||||||
|
|
||||||
|
Takes model and featurePreprocessing. `NNClassifierModel` will extract the data from feature column and convert each feature to Tensor with the featurePreprocessing. This constructor provides more flexibility in supporting extra data types.
|
||||||
|
|
||||||
|
Meanwhile, for advanced use cases (e.g. model with multiple input tensor), `NNClassifierModel` supports: `setSamplePreprocessing(value: Preprocessing[Any, Sample[T]])`to directly compose Sample according to user-specified Preprocessing.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### 2.5 Hyperparameter Setting
|
||||||
|
|
||||||
|
Prior to the commencement of the training process, you can modify the optimization algorithm, batch size, the epoch number of your training, and learning rate to meet your goal or `NNEstimator`/`NNClassifier` will use the default value.
|
||||||
|
|
||||||
|
Continue the codes above, NNEstimator and NNClassifier can be set in the same way.
|
||||||
|
|
||||||
|
**Scala:**
|
||||||
|
|
||||||
|
```scala
|
||||||
|
//for esitmator
|
||||||
|
estimator.setBatchSize(4).setMaxEpoch(10).setLearningRate(0.01).setOptimMethod(new Adam())
|
||||||
|
//for classifier
|
||||||
|
classifier.setBatchSize(4).setMaxEpoch(10).setLearningRate(0.01).setOptimMethod(new Adam())
|
||||||
|
```
|
||||||
|
**Python:**
|
||||||
|
|
||||||
|
```python
|
||||||
|
# for esitmator
|
||||||
|
estimator.setBatchSize(4).setMaxEpoch(10).setLearningRate(0.01).setOptimMethod(Adam())
|
||||||
|
# for classifier
|
||||||
|
classifier.setBatchSize(4).setMaxEpoch(10).setLearningRate(0.01).setOptimMethod(Adam())
|
||||||
|
|
||||||
|
```
|
||||||
|
|
||||||
|
### 2.6 Training
|
||||||
|
|
||||||
|
NNEstimator/NNCLassifer supports training with Spark's [DataFrame/DataSet](https://spark.apache.org/docs/latest/sql-programming-guide.html#datasets-and-dataframes)
|
||||||
|
|
||||||
|
Suppose `df` is the training data, simple call `fit` method and let Analytics Zoo train the model for you.
|
||||||
|
|
||||||
|
**Scala:**
|
||||||
|
|
||||||
|
```scala
|
||||||
|
//get a NNClassifierModel
|
||||||
|
val nnClassifierModel = classifier.fit(df)
|
||||||
|
```
|
||||||
|
|
||||||
|
**Python:**
|
||||||
|
|
||||||
|
```python
|
||||||
|
# get a NNClassifierModel
|
||||||
|
nnClassifierModel = classifier.fit(df)
|
||||||
|
```
|
||||||
|
User may also set validation DataFrame and validation frequency through `setValidation` method. Train summay and validation summary can also be configured to log the training process for visualization in Tensorboard.
|
||||||
|
|
||||||
|
|
||||||
|
### 2.7 Prediction
|
||||||
|
|
||||||
|
Since `NNModel`/`NNClassifierModel` inherits from Spark's `Transformer` abstract class, simply call `transform` method on `NNModel`/`NNClassifierModel` to make prediction.
|
||||||
|
|
||||||
|
**Scala:**
|
||||||
|
|
||||||
|
```scala
|
||||||
|
nnModel.transform(df).show(false)
|
||||||
|
```
|
||||||
|
|
||||||
|
**Python:**
|
||||||
|
|
||||||
|
```python
|
||||||
|
nnModel.transform(df).show(false)
|
||||||
|
```
|
||||||
|
|
||||||
|
For the complete examples of NNFrames, please refer to:
|
||||||
|
[Scala examples](https://github.com/intel-analytics/analytics-zoo/tree/master/zoo/src/main/scala/com/intel/analytics/zoo/examples/nnframes)
|
||||||
|
[Python examples](https://github.com/intel-analytics/analytics-zoo/tree/master/pyzoo/zoo/examples/nnframes)
|
||||||
|
|
||||||
|
|
||||||
|
### 2.8 NNImageReader
|
||||||
|
|
||||||
|
`NNImageReader` is the primary DataFrame-based image loading interface, defining API to read images into DataFrame.
|
||||||
|
|
||||||
|
Scala:
|
||||||
|
```scala
|
||||||
|
val imageDF = NNImageReader.readImages(imageDirectory, sc)
|
||||||
|
```
|
||||||
|
|
||||||
|
Python:
|
||||||
|
```python
|
||||||
|
image_frame = NNImageReader.readImages(image_path, self.sc)
|
||||||
|
```
|
||||||
|
|
||||||
|
The output DataFrame contains a sinlge column named "image". The schema of "image" column can be accessed from `com.intel.analytics.zoo.pipeline.nnframes.DLImageSchema.byteSchema`. Each record in "image" column represents one image record, in the format of Row(origin, height, width, num of channels, mode, data), where origin contains the URI for the image file, and `data` holds the original file bytes for the image file. `mode` represents the OpenCV-compatible type: CV_8UC3, CV_8UC1 in most cases.
|
||||||
|
|
||||||
|
```scala
|
||||||
|
val byteSchema = StructType(
|
||||||
|
StructField("origin", StringType, true) ::
|
||||||
|
StructField("height", IntegerType, false) ::
|
||||||
|
StructField("width", IntegerType, false) ::
|
||||||
|
StructField("nChannels", IntegerType, false) ::
|
||||||
|
// OpenCV-compatible type: CV_8UC3, CV_32FC3 in most cases
|
||||||
|
StructField("mode", IntegerType, false) ::
|
||||||
|
// Bytes in OpenCV-compatible order: row-wise BGR in most cases
|
||||||
|
StructField("data", BinaryType, false) :: Nil)
|
||||||
|
```
|
||||||
|
|
||||||
|
After loading the image, user can compose the preprocess steps with the `Preprocessing` defined in `com.intel.analytics.zoo.feature.image`.
|
||||||
111
docs/readthedocs/source/doc/UseCase/spark-dataframe.md
Normal file
|
|
@ -0,0 +1,111 @@
|
||||||
|
# Use Spark Dataframe for Deep Learning
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
[Run in Google Colab](https://colab.research.google.com/github/intel-analytics/analytics-zoo/blob/master/docs/docs/colab-notebook/orca/quickstart/ncf_dataframe.ipynb) [View source on GitHub](https://github.com/intel-analytics/analytics-zoo/blob/master/docs/docs/colab-notebook/orca/quickstart/ncf_dataframe.ipynb)
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
**In this guide we will describe how to use Apache Spark Dataframes to scale-out data processing for distribtued deep learning.**
|
||||||
|
|
||||||
|
The dataset used in this guide is [movielens-1M](https://grouplens.org/datasets/movielens/1m/), which contains 1 million ratings of 5 levels from 6000 users on 4000 movies. We will read the data into Spark Dataframe and directly use the Spark Dataframe as the input to the distributed training.
|
||||||
|
|
||||||
|
### **1. Read input data into Spark DataFrame**
|
||||||
|
|
||||||
|
First, read the input data into Spark Dataframes.
|
||||||
|
|
||||||
|
```python
|
||||||
|
from zoo.orca import OrcaContext
|
||||||
|
|
||||||
|
spark = OrcaContext.get_spark_session()
|
||||||
|
# read csv with specifying column names
|
||||||
|
df = spark.read.csv(new_rating_files, sep=':', inferSchema=True).toDF(
|
||||||
|
"user", "item", "label", "timestamp")
|
||||||
|
```
|
||||||
|
|
||||||
|
### **2. Process data using Spark Dataframe**
|
||||||
|
|
||||||
|
Next, process the data using Spark Dataframe operations.
|
||||||
|
|
||||||
|
```python
|
||||||
|
# update label starting from 0. That's because ratings go from 1 to 5, while the matrix column index goes from 0 to 4
|
||||||
|
df = df.withColumn('label', df.label-1)
|
||||||
|
|
||||||
|
# split to train/test dataset
|
||||||
|
train_data, test_data = df.randomSplit([0.8, 0.2], 100)
|
||||||
|
```
|
||||||
|
|
||||||
|
### **3. Define NCF model**
|
||||||
|
|
||||||
|
This example defines NCF model in the _Creator Function_ using TensroFlow 2 APIs as follows.
|
||||||
|
|
||||||
|
```python
|
||||||
|
from tensorflow import keras
|
||||||
|
import tensorflow as tf
|
||||||
|
|
||||||
|
def model_creator(config):
|
||||||
|
embedding_size=16
|
||||||
|
user = keras.layers.Input(dtype=tf.int32, shape=(None,))
|
||||||
|
item = keras.layers.Input(dtype=tf.int32, shape=(None,))
|
||||||
|
label = keras.layers.Input(dtype=tf.int32, shape=(None,))
|
||||||
|
|
||||||
|
with tf.name_scope("GMF"):
|
||||||
|
user_embed_GMF = keras.layers.Embedding(max_user_id + 1, embedding_size)(user)
|
||||||
|
item_embed_GMF = keras.layers.Embedding(max_item_id + 1, embedding_size)(item)
|
||||||
|
GMF = keras.layers.Multiply()([user_embed_GMF, item_embed_GMF])
|
||||||
|
|
||||||
|
with tf.name_scope("MLP"):
|
||||||
|
user_embed_MLP = keras.layers.Embedding(max_user_id + 1, embedding_size)(user)
|
||||||
|
item_embed_MLP = keras.layers.Embedding(max_item_id + 1, embedding_size)(item)
|
||||||
|
interaction = concat([user_embed_MLP, item_embed_MLP], axis=-1)
|
||||||
|
layer1_MLP = keras.layers.Dense(units=embedding_size * 2, activation='relu')(interaction)
|
||||||
|
layer1_MLP = keras.layers.Dropout(rate=0.2)(layer1_MLP)
|
||||||
|
layer2_MLP = keras.layers.Dense(units=embedding_size, activation='relu')(layer1_MLP)
|
||||||
|
layer2_MLP = keras.layers.Dropout(rate=0.2)(layer2_MLP)
|
||||||
|
layer3_MLP = keras.layers.Dense(units=embedding_size // 2, activation='relu')(layer2_MLP)
|
||||||
|
layer3_MLP = keras.layers.Dropout(rate=0.2)(layer3_MLP)
|
||||||
|
|
||||||
|
# Concate the two parts together
|
||||||
|
with tf.name_scope("concatenation"):
|
||||||
|
concatenation = tf.concat([GMF, layer3_MLP], axis=-1)
|
||||||
|
outputs = keras.layers.Dense(units=5, activation='softmax')(concatenation)
|
||||||
|
|
||||||
|
model = keras.Model(inputs=[user, item], outputs=outputs)
|
||||||
|
model.compile(optimizer="adam",
|
||||||
|
loss="sparse_categorical_crossentropy",
|
||||||
|
metrics=['accuracy'])
|
||||||
|
return model
|
||||||
|
```
|
||||||
|
|
||||||
|
### **4. Fit with Orca Estimator**
|
||||||
|
|
||||||
|
Finally, run distributed model training/inference on the Spark Dataframes directly.
|
||||||
|
|
||||||
|
```python
|
||||||
|
from zoo.orca.learn.tf2 import Estimator
|
||||||
|
|
||||||
|
# create an Estimator
|
||||||
|
est = Estimator.from_keras(model_creator=model_creator) # the model accept two inputs and one label
|
||||||
|
|
||||||
|
# fit with Estimator
|
||||||
|
stats = est.fit(train_data,
|
||||||
|
epochs=epochs,
|
||||||
|
batch_size=batch_size,
|
||||||
|
feature_cols=['user', 'item'], # specifies which column(s) to be used as inputs
|
||||||
|
label_cols=['label'], # specifies which column(s) to be used as labels
|
||||||
|
steps_per_epoch=800000 // batch_size,
|
||||||
|
validation_data=test_data,
|
||||||
|
validation_steps=200000 // batch_size)
|
||||||
|
|
||||||
|
checkpoint_path = os.path.join(model_dir, "NCF.ckpt")
|
||||||
|
est.save(checkpoint_path)
|
||||||
|
|
||||||
|
# evaluate with Estimator
|
||||||
|
stats = est.evaluate(test_data,
|
||||||
|
feature_cols=['user', 'item'], # specifies which column(s) to be used as inputs
|
||||||
|
label_cols=['label'], # specifies which column(s) to be used as labels
|
||||||
|
num_steps=100000 // batch_size)
|
||||||
|
est.shutdown()
|
||||||
|
print(stats)
|
||||||
|
```
|
||||||
|
|
||||||
0
docs/readthedocs/source/doc/UseCase/tensorboard.md
Normal file
121
docs/readthedocs/source/doc/UseCase/xshards-pandas.md
Normal file
|
|
@ -0,0 +1,121 @@
|
||||||
|
# Use Distributed Pandas for Deep Learning
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
[Run in Google Colab](https://colab.research.google.com/github/intel-analytics/analytics-zoo/blob/master/docs/docs/colab-notebook/orca/quickstart/ncf_xshards_pandas.ipynb) [View source on GitHub](https://github.com/intel-analytics/analytics-zoo/blob/master/docs/docs/colab-notebook/orca/quickstart/ncf_xshards_pandas.ipynb)
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
**In this guide we will describe how to use [XShards](../Orca/Overview/data-parallel-processing.md) to scale-out Pandas data processing for distribtued deep learning.**
|
||||||
|
|
||||||
|
### **1. Read input data into XShards of Pandas DataFrame**
|
||||||
|
|
||||||
|
First, read CVS, JSON or Parquet files into an `XShards` of Pandas Dataframe (i.e., a distributed and sharded dataset where each partition contained a Pandas Dataframe), as shown below:
|
||||||
|
|
||||||
|
```python
|
||||||
|
from zoo.orca.data.pandas import read_csv
|
||||||
|
full_data = read_csv(new_rating_files, sep=':', header=None,
|
||||||
|
names=['user', 'item', 'label'], usecols=[0, 1, 2],
|
||||||
|
dtype={0: np.int32, 1: np.int32, 2: np.int32})
|
||||||
|
```
|
||||||
|
|
||||||
|
### **2. Process Pandas Dataframes using XShards**
|
||||||
|
|
||||||
|
Next, use XShards to efficiently process large-size Pandas Dataframes in a distributed and data-parallel fashion. You may run standard Python code on each partition in a data-parallel fashion using `XShards.transform_shard`, as shown below:
|
||||||
|
|
||||||
|
```python
|
||||||
|
# update label starting from 0. That's because ratings go from 1 to 5, while the matrix columns go from 0 to 4
|
||||||
|
def update_label(df):
|
||||||
|
df['label'] = df['label'] - 1
|
||||||
|
return df
|
||||||
|
|
||||||
|
full_data = full_data.transform_shard(update_label)
|
||||||
|
```
|
||||||
|
|
||||||
|
```python
|
||||||
|
from sklearn.model_selection import train_test_split
|
||||||
|
|
||||||
|
# split to train/test dataset
|
||||||
|
def split_train_test(data):
|
||||||
|
train, test = train_test_split(data, test_size=0.2, random_state=100)
|
||||||
|
return train, test
|
||||||
|
|
||||||
|
train_data, test_data = full_data.transform_shard(split_train_test).split()
|
||||||
|
```
|
||||||
|
|
||||||
|
### **3. Define NCF model**
|
||||||
|
|
||||||
|
Define the NCF model using TensorFlow 1.15 APIs:
|
||||||
|
|
||||||
|
```python
|
||||||
|
import tensorflow as tf
|
||||||
|
|
||||||
|
class NCF(object):
|
||||||
|
def __init__(self, embed_size, user_size, item_size):
|
||||||
|
self.user = tf.placeholder(dtype=tf.int32, shape=(None,))
|
||||||
|
self.item = tf.placeholder(dtype=tf.int32, shape=(None,))
|
||||||
|
self.label = tf.placeholder(dtype=tf.int32, shape=(None,))
|
||||||
|
|
||||||
|
with tf.name_scope("GMF"):
|
||||||
|
user_embed_GMF = tf.contrib.layers.embed_sequence(self.user, vocab_size=user_size + 1,
|
||||||
|
embed_dim=embed_size)
|
||||||
|
item_embed_GMF = tf.contrib.layers.embed_sequence(self.item, vocab_size=item_size + 1,
|
||||||
|
embed_dim=embed_size)
|
||||||
|
GMF = tf.multiply(user_embed_GMF, item_embed_GMF)
|
||||||
|
|
||||||
|
with tf.name_scope("MLP"):
|
||||||
|
user_embed_MLP = tf.contrib.layers.embed_sequence(self.user, vocab_size=user_size + 1,
|
||||||
|
embed_dim=embed_size)
|
||||||
|
item_embed_MLP = tf.contrib.layers.embed_sequence(self.item, vocab_size=item_size + 1,
|
||||||
|
embed_dim=embed_size)
|
||||||
|
interaction = tf.concat([user_embed_MLP, item_embed_MLP], axis=-1)
|
||||||
|
layer1_MLP = tf.layers.dense(inputs=interaction, units=embed_size * 2)
|
||||||
|
layer1_MLP = tf.layers.dropout(layer1_MLP, rate=0.2)
|
||||||
|
layer2_MLP = tf.layers.dense(inputs=layer1_MLP, units=embed_size)
|
||||||
|
layer2_MLP = tf.layers.dropout(layer2_MLP, rate=0.2)
|
||||||
|
layer3_MLP = tf.layers.dense(inputs=layer2_MLP, units=embed_size // 2)
|
||||||
|
layer3_MLP = tf.layers.dropout(layer3_MLP, rate=0.2)
|
||||||
|
|
||||||
|
# Concate the two parts together
|
||||||
|
with tf.name_scope("concatenation"):
|
||||||
|
concatenation = tf.concat([GMF, layer3_MLP], axis=-1)
|
||||||
|
self.logits = tf.layers.dense(inputs=concatenation, units=5)
|
||||||
|
self.logits_softmax = tf.nn.softmax(self.logits)
|
||||||
|
self.class_number = tf.argmax(self.logits_softmax, 1)
|
||||||
|
|
||||||
|
with tf.name_scope("loss"):
|
||||||
|
self.loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(
|
||||||
|
labels=self.label, logits=self.logits, name='loss'))
|
||||||
|
|
||||||
|
with tf.name_scope("optimzation"):
|
||||||
|
self.optim = tf.train.AdamOptimizer(1e-3, name='Adam')
|
||||||
|
self.optimizer = self.optim.minimize(self.loss)
|
||||||
|
|
||||||
|
embedding_size=16
|
||||||
|
model = NCF(embedding_size, max_user_id, max_item_id)
|
||||||
|
```
|
||||||
|
### **4. Fit with Orca Estimator**
|
||||||
|
|
||||||
|
Finally, directly run distributed model training/inference on the XShards of Pandas DataFrames.
|
||||||
|
|
||||||
|
```python
|
||||||
|
from zoo.orca.learn.tf.estimator import Estimator
|
||||||
|
|
||||||
|
# create an Estimator.
|
||||||
|
estimator = Estimator.from_graph(
|
||||||
|
inputs=[model.user, model.item], # the model accept two inputs and one label
|
||||||
|
outputs=[model.class_number],
|
||||||
|
labels=[model.label],
|
||||||
|
loss=model.loss,
|
||||||
|
optimizer=model.optim,
|
||||||
|
model_dir=model_dir,
|
||||||
|
metrics={"loss": model.loss})
|
||||||
|
|
||||||
|
# fit the Estimator
|
||||||
|
estimator.fit(data=train_data,
|
||||||
|
batch_size=1280,
|
||||||
|
epochs=1,
|
||||||
|
feature_cols=['user', 'item'], # specifies which column(s) to be used as inputs
|
||||||
|
label_cols=['label'], # specifies which column(s) to be used as labels
|
||||||
|
validation_data=test_data)
|
||||||
|
```
|
||||||
61
docs/readthedocs/source/doc/UserGuide/colab.md
Normal file
|
|
@ -0,0 +1,61 @@
|
||||||
|
# Colab User Guide
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
You can use Analytics Zoo without any installation by using [Google Colab](https://colab.research.google.com/).
|
||||||
|
|
||||||
|
### **1. Open a Colab Notebook**
|
||||||
|
|
||||||
|
Analytics Zoo includes a collection of [notebooks](./notebooks.md) that can be directly opened and run in Colab. You can click 'Run in Google Colab' that opens the notebook on Colab directly. Click the "run" triangle on the left of each cell to run the notebook cell. When you run the first cell, you may face a pop-up saying 'Warning: This notebook was not authored by Google'; you should click on 'Run Anyway' to get rid of the warning.
|
||||||
|
|
||||||
|
### **2. Notebook Setup**
|
||||||
|
|
||||||
|
The first few cells of the notebook contains the code necessary to set up Analytics Zoo and other libraries.
|
||||||
|
|
||||||
|
**Install Java 8**
|
||||||
|
|
||||||
|
Run the following command on the Google Colab to install jdk 1.8
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Install jdk8
|
||||||
|
!apt-get install openjdk-8-jdk-headless -qq > /dev/null
|
||||||
|
# Set jdk environment path which enables you to run Pyspark in your Colab environment.
|
||||||
|
import os
|
||||||
|
os.environ["JAVA_HOME"] = "/usr/lib/jvm/java-8-openjdk-amd64"
|
||||||
|
!update-alternatives --set java /usr/lib/jvm/java-8-openjdk-amd64/jre/bin/java
|
||||||
|
```
|
||||||
|
|
||||||
|
**Install Conda**
|
||||||
|
|
||||||
|
Run the code bellow to install [conda](https://docs.conda.io/en/latest/) on Colab.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Install Miniconda
|
||||||
|
!wget https://repo.continuum.io/miniconda/Miniconda3-4.5.4-Linux-x86_64.sh
|
||||||
|
!chmod +x Miniconda3-4.5.4-Linux-x86_64.sh
|
||||||
|
!./Miniconda3-4.5.4-Linux-x86_64.sh -b -f -p /usr/local
|
||||||
|
|
||||||
|
# Update Conda
|
||||||
|
!conda install --channel defaults conda python=3.6 --yes
|
||||||
|
!conda update --channel defaults --all --yes
|
||||||
|
|
||||||
|
# Append to the sys.path
|
||||||
|
import sys
|
||||||
|
_ = (sys.path
|
||||||
|
.append("/usr/local/lib/python3.6/site-packages"))
|
||||||
|
|
||||||
|
os.environ['PYTHONHOME']="/usr/local"
|
||||||
|
```
|
||||||
|
|
||||||
|
**Install Analytics Zoo**
|
||||||
|
|
||||||
|
Install the latest pre-release version using `pip install --pre --upgrade analytics-zoo`.
|
||||||
|
```bash
|
||||||
|
# Install latest pre-release version of Analytics Zoo
|
||||||
|
# Installing Analytics Zoo from pip will automatically install pyspark, bigdl, and their dependencies.
|
||||||
|
!pip install --pre --upgrade analytics-zoo
|
||||||
|
```
|
||||||
|
|
||||||
|
**Install Python Dependencies**
|
||||||
|
|
||||||
|
As Colab python environment provides some built-in Python libraries, you should check if the library versions are compatible with your application. You may refer [compatibility](./python.md) to specify the python library version that Analytics Zoo supports.
|
||||||
59
docs/readthedocs/source/doc/UserGuide/databricks.md
Normal file
|
|
@ -0,0 +1,59 @@
|
||||||
|
# Databricks User Guide
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
You can run Analytics Zoo program on the [Databricks](https://databricks.com/) cluster as follows.
|
||||||
|
### **1. Create a Databricks Cluster**
|
||||||
|
|
||||||
|
- Create either [AWS Databricks](https://docs.databricks.com/getting-started/try-databricks.html) workspace or [Azure Databricks](https://docs.microsoft.com/en-us/azure/azure-databricks/) workspace.
|
||||||
|
- Create a Databricks [clusters](https://docs.databricks.com/clusters/create.html) using the UI. Choose Databricks runtime version. This guide is tested on Runtime 7.5 (includes Apache Spark 3.0.1, Scala 2.12).
|
||||||
|
|
||||||
|
### **2. Installing Analytics Zoo libraries**
|
||||||
|
|
||||||
|
In the left pane, click **Clusters** and select your cluster.
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|
Install Analytics Zoo python environment using prebuilt release Wheel package. Click **Libraries > Install New > Upload > Python Whl**. Download Analytics Zoo prebuilt Wheel [here](https://sourceforge.net/projects/analytics-zoo/files/zoo-py). Choose a wheel with timestamp for the same Spark version and platform as Databricks runtime. Download and drop it on Databricks.
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|
Install Analytics Zoo prebuilt jar package. Click **Libraries > Install New > Upload > Jar**. Download Analytics Zoo prebuilt package from [Release Page](../release.md). Please note that you should choose the same spark version of package as your Databricks runtime version. Find jar named "analytics-zoo-bigdl_*-spark_*-jar-with-dependencies.jar" in the lib directory. Drop the jar on Databricks.
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|
Make sure the jar file and analytics-zoo (whl) are installed on all clusters. In **Libraries** tab of your cluster, check installed libraries and click “Install automatically on all clusters” option in **Admin Settings**.
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|
### **3. Setting Spark configuration**
|
||||||
|
|
||||||
|
On the cluster configuration page, click the **Advanced Options** toggle. Click the **Spark** tab. You can provide custom [Spark configuration properties](https://spark.apache.org/docs/latest/configuration.html) in a cluster configuration. Please set it according to your cluster resource and program needs.
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|
See below for an example of Spark config setting needed by Analytics Zoo. Here it sets 2 core per executor. Note that "spark.cores.max" needs to be properly set below.
|
||||||
|
|
||||||
|
```
|
||||||
|
spark.shuffle.reduceLocality.enabled false
|
||||||
|
spark.serializer org.apache.spark.serializer.JavaSerializer
|
||||||
|
spark.shuffle.blockTransferService nio
|
||||||
|
spark.databricks.delta.preview.enabled true
|
||||||
|
spark.executor.cores 2
|
||||||
|
spark.speculation false
|
||||||
|
spark.scheduler.minRegisteredResourcesRatio 1.0
|
||||||
|
spark.cores.max 4
|
||||||
|
```
|
||||||
|
|
||||||
|
### **4. Running Analytics Zoo on Databricks**
|
||||||
|
|
||||||
|
Open a new notebook, and call `init_orca_context` at the beginning of your code (with `cluster_mode` set to "spark-submit").
|
||||||
|
|
||||||
|
```python
|
||||||
|
from zoo.orca import init_orca_context, stop_orca_context
|
||||||
|
init_orca_context(cluster_mode="spark-submit")
|
||||||
|
```
|
||||||
|
|
||||||
|
Output on Databricks:
|
||||||
|
|
||||||
|

|
||||||
111
docs/readthedocs/source/doc/UserGuide/develop.md
Normal file
|
|
@ -0,0 +1,111 @@
|
||||||
|
# Developer Guide
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
Analytics Zoo source code is available at [GitHub](https://github.com/intel-analytics/analytics-zoo):
|
||||||
|
|
||||||
|
```bash
|
||||||
|
git clone https://github.com/intel-analytics/analytics-zoo.git
|
||||||
|
```
|
||||||
|
|
||||||
|
By default, `git clone` will download the development version of Analytics Zoo. If you want a release version, you can use the command `git checkout` to change the specified version.
|
||||||
|
|
||||||
|
|
||||||
|
### **1. Python**
|
||||||
|
|
||||||
|
#### **1.1 Build**
|
||||||
|
|
||||||
|
To generate a new [whl](https://pythonwheels.com/) package for pip install, you can run the following script:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
bash analytics-zoo/pyzoo/dev/build.sh linux default false
|
||||||
|
```
|
||||||
|
|
||||||
|
**Arguments:**
|
||||||
|
|
||||||
|
- The first argument is the __platform__ to build for. Either 'linux' or 'mac'.
|
||||||
|
- The second argument is the analytics-zoo __version__ to build for. 'default' means the default version for the current branch. You can also specify a different version if you wish, e.g., '0.6.0.dev1'.
|
||||||
|
- You can also add other profiles to build the package, especially Spark and BigDL versions.
|
||||||
|
For example, under the situation that `pyspark==2.4.3` is a dependency, you need to add profiles `-Dspark.version=2.4.3 -Dbigdl.artifactId=bigdl-SPARK_2.4 -P spark_2.4+` to build Analytics Zoo for Spark 2.4.3.
|
||||||
|
|
||||||
|
|
||||||
|
After running the above command, you will find a `whl` file under the folder `analytics-zoo/pyzoo/dist/`. You can then directly pip install it to your local Python environment:
|
||||||
|
```bash
|
||||||
|
pip install analytics-zoo/pyzoo/dist/analytics_zoo-VERSION-py2.py3-none-PLATFORM_x86_64.whl
|
||||||
|
```
|
||||||
|
|
||||||
|
See [here](./python.md) for more instructions to run analytics-zoo after pip install.
|
||||||
|
|
||||||
|
|
||||||
|
#### **1.2 IDE Setup**
|
||||||
|
Any IDE that support python should be able to run Analytics Zoo. PyCharm works fine for us.
|
||||||
|
|
||||||
|
You need to do the following preparations before starting the IDE to successfully run an Analytics Zoo Python program in the IDE:
|
||||||
|
|
||||||
|
- Build Analytics Zoo; see [here](#21-build) for more instructions.
|
||||||
|
- Prepare Spark environment by either setting `SPARK_HOME` as the environment variable or pip install `pyspark`. Note that the Spark version should match the one you build Analytics Zoo on.
|
||||||
|
- Set BIGDL_CLASSPATH:
|
||||||
|
```bash
|
||||||
|
export BIGDL_CLASSPATH=analytics-zoo/dist/lib/analytics-zoo-*-jar-with-dependencies.jar
|
||||||
|
```
|
||||||
|
|
||||||
|
- Prepare BigDL Python environment by either downloading BigDL source code from [GitHub](https://github.com/intel-analytics/BigDL) or pip install `bigdl`. Note that the BigDL version should match the one you build Analytics Zoo on.
|
||||||
|
- Add `pyzoo` and `spark-analytics-zoo.conf` to `PYTHONPATH`:
|
||||||
|
```bash
|
||||||
|
export PYTHONPATH=analytics-zoo/pyzoo:analytics-zoo/dist/conf/spark-analytics-zoo.conf:$PYTHONPATH
|
||||||
|
```
|
||||||
|
If you download BigDL from [GitHub](https://github.com/intel-analytics/BigDL), you also need to add `BigDL/pyspark` to `PYTHONPATH`:
|
||||||
|
```bash
|
||||||
|
export PYTHONPATH=BigDL/pyspark:$PYTHONPATH
|
||||||
|
```
|
||||||
|
|
||||||
|
The above environmental variables should be available when running or debugging code in IDE.
|
||||||
|
* In PyCharm, go to RUN -> Edit Configurations. In the "Run/Debug Configurations" panel, you can update the above environment variables in your configuration.
|
||||||
|
|
||||||
|
### **2. Scala**
|
||||||
|
|
||||||
|
#### **2.1 Build**
|
||||||
|
|
||||||
|
Maven 3 is needed to build Analytics Zoo, you can download it from the [maven website](https://maven.apache.org/download.cgi).
|
||||||
|
|
||||||
|
After installing Maven 3, please set the environment variable MAVEN_OPTS as follows:
|
||||||
|
```bash
|
||||||
|
$ export MAVEN_OPTS="-Xmx2g -XX:ReservedCodeCacheSize=512m"
|
||||||
|
```
|
||||||
|
|
||||||
|
**Build using `make-dist.sh`**
|
||||||
|
|
||||||
|
It is highly recommended that you build Analytics Zoo using the [make-dist.sh script](https://github.com/intel-analytics/analytics-zoo/blob/master/make-dist.sh) with **Java 8**.
|
||||||
|
|
||||||
|
You can build Analytics Zoo with the following commands:
|
||||||
|
```bash
|
||||||
|
$ bash make-dist.sh
|
||||||
|
```
|
||||||
|
After that, you can find a `dist` folder, which contains all the needed files to run a Analytics Zoo program. The files in `dist` include:
|
||||||
|
|
||||||
|
* **dist/lib/analytics-zoo-VERSION-jar-with-dependencies.jar**: This jar package contains all dependencies except Spark classes.
|
||||||
|
* **dist/lib/analytics-zoo-VERSION-python-api.zip**: This zip package contains all Python files of Analytics Zoo.
|
||||||
|
|
||||||
|
The instructions above will build Analytics Zoo with Spark 2.4.3. To build with other spark versions, for example building analytics-zoo with spark 2.2.0, you can use `bash make-dist.sh -Dspark.version=2.2.0 -Dbigdl.artifactId=bigdl_SPARK_2.2`.
|
||||||
|
|
||||||
|
**Build with JDK 11**
|
||||||
|
|
||||||
|
Spark starts to supports JDK 11 and Scala 2.12 at Spark 3.0. You can use `-P spark_3.x` to specify Spark3 and scala 2.12. Additionally, `make-dist.sh` default uses Java 8. To compile with Java 11, it is required to specify building opts `-Djava.version=11 -Djavac.version=11`. You can build with `make-dist.sh`.
|
||||||
|
|
||||||
|
It's recommended to download [Oracle JDK 11](https://www.oracle.com/java/technologies/javase-jdk11-downloads.html). This will avoid possible incompatibilities with maven plugins. You should update `PATH` and make sure your `JAVA_HOME` environment variable is set to Java 11 if you're running from the command line. If you're running from an IDE, you need to make sure it is set to run maven with your current JDK.
|
||||||
|
|
||||||
|
Build with `make-dist.sh`:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
$ bash make-dist.sh -P spark_3.x -Djava.version=11 -Djavac.version=11
|
||||||
|
```
|
||||||
|
|
||||||
|
#### **2.2 IDE Setup**
|
||||||
|
|
||||||
|
Analytics Zoo uses maven to organize project. You should choose an IDE that supports Maven project and scala language. IntelliJ IDEA works fine for us.
|
||||||
|
|
||||||
|
In IntelliJ, you can open Analytics Zoo project root directly, and the IDE will import the project automatically.
|
||||||
|
|
||||||
|
We set the scopes of spark related libraries to `provided` in the maven pom.xml, which, however, will cause a problem in IDE (throwing `NoClassDefFoundError` when you run applications). You can easily change the scopes using the `all-in-one` profile.
|
||||||
|
|
||||||
|
* In Intellij, go to View -> Tools Windows -> Maven Projects. Then in the Maven Projects panel, Profiles -> click "all-in-one".
|
||||||
143
docs/readthedocs/source/doc/UserGuide/docker.md
Normal file
|
|
@ -0,0 +1,143 @@
|
||||||
|
# Docker User Guide
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### **1. Pull Docker Image**
|
||||||
|
|
||||||
|
You may pull a Docker image from the [Analytics Zoo Docker Hub](https://hub.docker.com/r/intelanalytics/analytics-zoo).
|
||||||
|
|
||||||
|
To pull the nightly build version, use
|
||||||
|
```bash
|
||||||
|
sudo docker pull intelanalytics/analytics-zoo:latest
|
||||||
|
```
|
||||||
|
|
||||||
|
To pull other versions, please refer to [Analytics Zoo Docker Hub Tags](https://hub.docker.com/r/intelanalytics/analytics-zoo/tags?page=1&ordering=last_updated), select a tag and use
|
||||||
|
```bash
|
||||||
|
sudo docker pull intelanalytics/analytics-zoo:tag_name
|
||||||
|
```
|
||||||
|
|
||||||
|
**Configuring resources**
|
||||||
|
|
||||||
|
For Docker Desktop users, the default resources (2 CPUs and 2GB memory) are relatively small, and you may want to change them to larger values (8GB memory and 4 CPUs should be a good estimate for most examples, and the exact memory requirements vary for different applications). For more information, view the Docker documentation for [MacOS](https://docs.docker.com/docker-for-mac/#resources) and [Windows](https://docs.docker.com/docker-for-windows/#resources).
|
||||||
|
|
||||||
|
**Speed up pulling image by adding mirrors**
|
||||||
|
|
||||||
|
To speed up pulling the image from DockerHub, you may add the registry-mirrors key and value by editing `daemon.json` (located in `/etc/docker/` folder on Linux):
|
||||||
|
```
|
||||||
|
{
|
||||||
|
"registry-mirrors": ["https://<my-docker-mirror-host>"]
|
||||||
|
}
|
||||||
|
```
|
||||||
|
For instance, users in China may add the USTC mirror as follows:
|
||||||
|
```
|
||||||
|
{
|
||||||
|
"registry-mirrors": ["https://docker.mirrors.ustc.edu.cn"]
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
After that, flush changes and restart docker:
|
||||||
|
|
||||||
|
```
|
||||||
|
sudo systemctl daemon-reload
|
||||||
|
sudo systemctl restart docker
|
||||||
|
```
|
||||||
|
|
||||||
|
### **2. Launch Docker Container**
|
||||||
|
|
||||||
|
After pulling the Analytics Zoo Docker image, you can launch an Analytics Zoo Docker container:
|
||||||
|
```
|
||||||
|
sudo docker run -it --rm --net=host \
|
||||||
|
-e NOTEBOOK_PORT=12345 \
|
||||||
|
-e NOTEBOOK_TOKEN="your-token" \
|
||||||
|
-e http_proxy=http://your-proxy-host:your-proxy-port \
|
||||||
|
-e https_proxy=https://your-proxy-host:your-proxy-port \
|
||||||
|
intelanalytics/analytics-zoo:latest bash
|
||||||
|
```
|
||||||
|
|
||||||
|
* The value 12345 is a user specified port number.
|
||||||
|
* The value "your-token" is a user specified string.
|
||||||
|
* If you need to use http/https proxy, please use -e http_proxy/https_proxy
|
||||||
|
|
||||||
|
Once the container is successfully launched, you will automatically login into the container and see this as the output:
|
||||||
|
```
|
||||||
|
root@[hostname]:/opt/work#
|
||||||
|
```
|
||||||
|
|
||||||
|
The /opt/work directory contains:
|
||||||
|
|
||||||
|
* download-analytics-zoo.sh is used for downloading Analytics-Zoo distributions.
|
||||||
|
* start-notebook.sh is used for starting the jupyter notebook. You can specify the environment settings and spark settings to start a specified jupyter notebook.
|
||||||
|
* analytics-Zoo-${ANALYTICS_ZOO_VERSION} is the Analytics-Zoo home of Analytics-Zoo distribution.
|
||||||
|
* analytics-zoo-SPARK_x.x-x.x.x-dist.zip is the zip file of Analytics-Zoo distribution.
|
||||||
|
* spark-${SPARK_VERSION} is the Spark home.
|
||||||
|
* analytics-zoo is cloned from https://github.com/intel-analytics/analytics-zoo, contains apps, examples using analytics-zoo.
|
||||||
|
|
||||||
|
### **3. Run Jupyter Notebook Examples in the Container**
|
||||||
|
|
||||||
|
After a Docker container is launched and user login into the container, you can start the Jupyter Notebook service inside the container.
|
||||||
|
|
||||||
|
#### **3.1 Start the Jupyter Notebook services**
|
||||||
|
|
||||||
|
In the `/opt/work` directory, run this command line to start the Jupyter Notebook service:
|
||||||
|
```
|
||||||
|
./start-notebook.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
You will see the output message like below. This means the Jupyter Notebook service has started successfully within the container.
|
||||||
|
```
|
||||||
|
[I 01:04:45.625 NotebookApp] Serving notebooks from local directory: /opt/work/analytics-zoo-0.5.0-SNAPSHOT/apps
|
||||||
|
[I 01:04:45.625 NotebookApp] The Jupyter Notebook is running at:
|
||||||
|
[I 01:04:45.625 NotebookApp] http://(the-host-name or 127.0.0.1):12345/?token=...
|
||||||
|
[I 01:04:45.625 NotebookApp] Use Control-C to stop this server and shut down all kernels (twice to skip confirmation).
|
||||||
|
```
|
||||||
|
|
||||||
|
#### **3.2 Connect to Jupyter Notebook service from a browser**
|
||||||
|
|
||||||
|
After the Jupyter Notebook service is successfully started, you can connect to the Jupyter Notebook service from a browser.
|
||||||
|
|
||||||
|
1. Get the IP address of the container
|
||||||
|
2. Launch a browser, and connect to the Jupyter Notebook service with the URL: https://container-ip-address:port-number/?token=your-token
|
||||||
|
As a result, you will see the Jupyter Notebook like this:
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|
#### **3.3 Run Analytics Zoo Jupyter Notebooks**
|
||||||
|
|
||||||
|
After connecting to the Jupyter Notebook in the browser, you can run multiple Analytics Zoo Jupyter Notebook examples. The example shown below is the “dogs-vs-cats”.
|
||||||
|
|
||||||
|
* Click into the "dogs-vs-cats" folder:
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|
* Open the notebook file:
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|
* Start to run the "dogs-vs-cats" notebook:
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|
* Run through the example and check the prediction:
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|
### **4. Shut Down Docker Container**
|
||||||
|
|
||||||
|
You should shut down the Analytics Zoo Docker container after using it.
|
||||||
|
|
||||||
|
1. You can list all the active Docker containers by command line:
|
||||||
|
```
|
||||||
|
sudo docker ps
|
||||||
|
```
|
||||||
|
|
||||||
|
2. You will see your docker containers:
|
||||||
|
```
|
||||||
|
CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES
|
||||||
|
40de2cdad025 intelanalytics/analytics-zoo:latest "/opt/work/start-n..." 3 hours ago Up 3 hours upbeat_al
|
||||||
|
```
|
||||||
|
|
||||||
|
3. Shut down the corresponding docker container by its ID:
|
||||||
|
```
|
||||||
|
$sudo docker rm -f 40de2cdad025
|
||||||
|
```
|
||||||
131
docs/readthedocs/source/doc/UserGuide/hadoop.md
Normal file
|
|
@ -0,0 +1,131 @@
|
||||||
|
# Hadoop/YARN User Guide
|
||||||
|
|
||||||
|
Hadoop version: Hadoop >= 2.7 or [CDH](https://www.cloudera.com/products/open-source/apache-hadoop/key-cdh-components.html) 5.X. Hadoop 3.X or CDH 6.X have not been tested and thus currently not supported.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
You can run Analytics Zoo programs on standard Hadoop/YARN clusters without any changes to the cluster (i.e., no need to pre-install Analytics Zoo or any Python libraries in the cluster).
|
||||||
|
|
||||||
|
### **1. Prepare Environment**
|
||||||
|
|
||||||
|
- You need to first use [conda](https://docs.conda.io/projects/conda/en/latest/user-guide/install/) to prepare the Python environment _**on the local client machine**_. Create a conda environment and install all the needed Python libraries in the created conda environment:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
conda create -n zoo python=3.7 # "zoo" is conda environment name, you can use any name you like.
|
||||||
|
conda activate zoo
|
||||||
|
|
||||||
|
# Use conda or pip to install all the needed Python dependencies in the created conda environment.
|
||||||
|
```
|
||||||
|
|
||||||
|
- You need to download and install JDK in the environment, and properly set the environment variable `JAVA_HOME`, which is required by Spark. __JDK8__ is highly recommended.
|
||||||
|
|
||||||
|
You may take the following commands as a reference for installing [OpenJDK](https://openjdk.java.net/install/):
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# For Ubuntu
|
||||||
|
sudo apt-get install openjdk-8-jre
|
||||||
|
export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64/
|
||||||
|
|
||||||
|
# For CentOS
|
||||||
|
su -c "yum install java-1.8.0-openjdk"
|
||||||
|
export JAVA_HOME=/usr/lib/jvm/java-1.8.0-openjdk-1.8.0.282.b08-1.el7_9.x86_64/jre
|
||||||
|
|
||||||
|
export PATH=$PATH:$JAVA_HOME/bin
|
||||||
|
java -version # Verify the version of JDK.
|
||||||
|
```
|
||||||
|
|
||||||
|
- Check the Hadoop setup and configurations of your cluster. Make sure you properly set the environment variable `HADOOP_CONF_DIR`, which is needed to initialize Spark on YARN:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
export HADOOP_CONF_DIR=the directory of the hadoop and yarn configurations
|
||||||
|
```
|
||||||
|
|
||||||
|
- **For CDH users**
|
||||||
|
|
||||||
|
If your CDH cluster has already installed Spark, the CDH's spark will have conflict with the pyspark installed by pip required by analytics-zoo in next section.
|
||||||
|
|
||||||
|
Thus before running analytics-zoo applications, you should unset all the spark related environment variables. You can use `env | grep SPARK` to find all the existing spark environment variables.
|
||||||
|
|
||||||
|
Also, CDH cluster's `HADOOP_CONF_DIR` should by default be set to `/etc/hadoop/conf`.
|
||||||
|
|
||||||
|
---
|
||||||
|
### **2. YARN Client Mode**
|
||||||
|
|
||||||
|
- Install Analytics Zoo in the created conda environment via pip:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pip install analytics-zoo
|
||||||
|
```
|
||||||
|
|
||||||
|
View the [Python User Guide](./python.md) for more details.
|
||||||
|
|
||||||
|
|
||||||
|
- We recommend using `init_orca_context` at the very beginning of your code to initiate and run Analytics Zoo on standard Hadoop/YARN clusters in [YARN client mode](https://spark.apache.org/docs/latest/running-on-yarn.html#launching-spark-on-yarn):
|
||||||
|
|
||||||
|
```python
|
||||||
|
from zoo.orca import init_orca_context
|
||||||
|
|
||||||
|
sc = init_orca_context(cluster_mode="yarn-client", cores=4, memory="10g", num_nodes=2)
|
||||||
|
```
|
||||||
|
|
||||||
|
By specifying cluster_mode to be "yarn-client", `init_orca_context` would automatically prepare the runtime Python environment, detect the current Hadoop configurations from `HADOOP_CONF_DIR` and initiate the distributed execution engine on the underlying YARN cluster. View [Orca Context](../Orca/Overview/orca-context.md) for more details.
|
||||||
|
|
||||||
|
|
||||||
|
- You can then simply run your Analytics Zoo program in a Jupyter notebook:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
jupyter notebook --notebook-dir=./ --ip=* --no-browser
|
||||||
|
```
|
||||||
|
|
||||||
|
or as a normal Python script (e.g. script.py):
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python script.py
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
### **3. YARN Cluster Mode**
|
||||||
|
|
||||||
|
Follow the steps below if you need to run Analytics Zoo in [YARN cluster mode](https://spark.apache.org/docs/latest/running-on-yarn.html#launching-spark-on-yarn).
|
||||||
|
|
||||||
|
- Download and extract [Spark](https://spark.apache.org/downloads.html). You are recommended to use [Spark 2.4.3](https://archive.apache.org/dist/spark/spark-2.4.3/spark-2.4.3-bin-hadoop2.7.tgz). Set the environment variable `SPARK_HOME`:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
export SPARK_HOME=the root directory where you extract the downloaded Spark package
|
||||||
|
```
|
||||||
|
|
||||||
|
- Download and extract [Analytics Zoo](../release.md). Make sure the Analytics Zoo package you download is built with the compatible version with your Spark. Set the environment variable `ANALYTICS_ZOO_HOME`:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
export ANALYTICS_ZOO_HOME=the root directory where you extract the downloaded Analytics Zoo package
|
||||||
|
```
|
||||||
|
|
||||||
|
- Pack the current conda environment to `environment.tar.gz` (you can use any name you like):
|
||||||
|
|
||||||
|
```bash
|
||||||
|
conda pack -o environment.tar.gz
|
||||||
|
```
|
||||||
|
|
||||||
|
- _You need to write your Analytics Zoo program as a Python script._ In the script, you can call `init_orca_context` and specify cluster_mode to be "spark-submit":
|
||||||
|
|
||||||
|
```python
|
||||||
|
from zoo.orca import init_orca_context
|
||||||
|
|
||||||
|
sc = init_orca_context(cluster_mode="spark-submit")
|
||||||
|
```
|
||||||
|
|
||||||
|
- Use `spark-submit` to submit your Analytics Zoo program (e.g. script.py):
|
||||||
|
|
||||||
|
```bash
|
||||||
|
PYSPARK_PYTHON=./environment/bin/python ${ANALYTICS_ZOO_HOME}/bin/spark-submit-python-with-zoo.sh \
|
||||||
|
--conf spark.yarn.appMasterEnv.PYSPARK_PYTHON=./environment/bin/python \
|
||||||
|
--master yarn-cluster \
|
||||||
|
--executor-memory 10g \
|
||||||
|
--driver-memory 10g \
|
||||||
|
--executor-cores 8 \
|
||||||
|
--num-executors 2 \
|
||||||
|
--archives environment.tar.gz#environment \
|
||||||
|
script.py
|
||||||
|
```
|
||||||
|
|
||||||
|
You can adjust the configurations according to your cluster settings.
|
||||||
BIN
docs/readthedocs/source/doc/UserGuide/images/Databricks1.PNG
Normal file
|
After Width: | Height: | Size: 52 KiB |
BIN
docs/readthedocs/source/doc/UserGuide/images/Databricks2.PNG
Normal file
|
After Width: | Height: | Size: 64 KiB |
BIN
docs/readthedocs/source/doc/UserGuide/images/Databricks3.PNG
Normal file
|
After Width: | Height: | Size: 63 KiB |
BIN
docs/readthedocs/source/doc/UserGuide/images/Databricks4.PNG
Normal file
|
After Width: | Height: | Size: 40 KiB |
BIN
docs/readthedocs/source/doc/UserGuide/images/Databricks5.PNG
Normal file
|
After Width: | Height: | Size: 38 KiB |
BIN
docs/readthedocs/source/doc/UserGuide/images/Databricks6.PNG
Normal file
|
After Width: | Height: | Size: 36 KiB |
BIN
docs/readthedocs/source/doc/UserGuide/images/notebook1.jpg
Normal file
|
After Width: | Height: | Size: 95 KiB |
BIN
docs/readthedocs/source/doc/UserGuide/images/notebook2.jpg
Normal file
|
After Width: | Height: | Size: 50 KiB |
BIN
docs/readthedocs/source/doc/UserGuide/images/notebook3.jpg
Normal file
|
After Width: | Height: | Size: 103 KiB |
BIN
docs/readthedocs/source/doc/UserGuide/images/notebook4.jpg
Normal file
|
After Width: | Height: | Size: 112 KiB |
BIN
docs/readthedocs/source/doc/UserGuide/images/notebook5.jpg
Normal file
|
After Width: | Height: | Size: 86 KiB |
307
docs/readthedocs/source/doc/UserGuide/k8s.md
Normal file
|
|
@ -0,0 +1,307 @@
|
||||||
|
# K8s User Guide
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### **1. Pull `hyper-zoo` Docker Image**
|
||||||
|
|
||||||
|
You may pull the prebuilt Analytics Zoo `hyper-zoo` Image from [Docker Hub](https://hub.docker.com/r/intelanalytics/hyper-zoo/tags) as follows:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
sudo docker pull intelanalytics/hyper-zoo:latest
|
||||||
|
```
|
||||||
|
|
||||||
|
**Speed up pulling image by adding mirrors**
|
||||||
|
|
||||||
|
To speed up pulling the image from DockerHub, you may add the registry-mirrors key and value by editing `daemon.json` (located in `/etc/docker/` folder on Linux):
|
||||||
|
```
|
||||||
|
{
|
||||||
|
"registry-mirrors": ["https://<my-docker-mirror-host>"]
|
||||||
|
}
|
||||||
|
```
|
||||||
|
For instance, users in China may add the USTC mirror as follows:
|
||||||
|
```
|
||||||
|
{
|
||||||
|
"registry-mirrors": ["https://docker.mirrors.ustc.edu.cn"]
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
After that, flush changes and restart docker:
|
||||||
|
|
||||||
|
```
|
||||||
|
sudo systemctl daemon-reload
|
||||||
|
sudo systemctl restart docker
|
||||||
|
```
|
||||||
|
|
||||||
|
### **2. Launch a Client Container**
|
||||||
|
|
||||||
|
You can submit Analytics Zoo application from a client container that provides the required environment.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
sudo docker run -itd --net=host \
|
||||||
|
-v /etc/kubernetes:/etc/kubernetes \
|
||||||
|
-v /root/.kube:/root/.kube \
|
||||||
|
intelanalytics/hyper-zoo:latest bash
|
||||||
|
```
|
||||||
|
|
||||||
|
**Note:** to create the client container, `-v /etc/kubernetes:/etc/kubernetes:` and `-v /root/.kube:/root/.kube` are required to specify the path of kube config and installation.
|
||||||
|
|
||||||
|
You can specify more arguments:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
sudo docker run -itd --net=host \
|
||||||
|
-v /etc/kubernetes:/etc/kubernetes \
|
||||||
|
-v /root/.kube:/root/.kube \
|
||||||
|
-e NOTEBOOK_PORT=12345 \
|
||||||
|
-e NOTEBOOK_TOKEN="your-token" \
|
||||||
|
-e http_proxy=http://your-proxy-host:your-proxy-port \
|
||||||
|
-e https_proxy=https://your-proxy-host:your-proxy-port \
|
||||||
|
-e RUNTIME_SPARK_MASTER=k8s://https://<k8s-apiserver-host>:<k8s-apiserver-port> \
|
||||||
|
-e RUNTIME_K8S_SERVICE_ACCOUNT=account \
|
||||||
|
-e RUNTIME_K8S_SPARK_IMAGE=intelanalytics/hyper-zoo:latest \
|
||||||
|
-e RUNTIME_PERSISTENT_VOLUME_CLAIM=myvolumeclaim \
|
||||||
|
-e RUNTIME_DRIVER_HOST=x.x.x.x \
|
||||||
|
-e RUNTIME_DRIVER_PORT=54321 \
|
||||||
|
-e RUNTIME_EXECUTOR_INSTANCES=1 \
|
||||||
|
-e RUNTIME_EXECUTOR_CORES=4 \
|
||||||
|
-e RUNTIME_EXECUTOR_MEMORY=20g \
|
||||||
|
-e RUNTIME_TOTAL_EXECUTOR_CORES=4 \
|
||||||
|
-e RUNTIME_DRIVER_CORES=4 \
|
||||||
|
-e RUNTIME_DRIVER_MEMORY=10g \
|
||||||
|
intelanalytics/hyper-zoo:latest bash
|
||||||
|
```
|
||||||
|
|
||||||
|
- NOTEBOOK_PORT value 12345 is a user specified port number.
|
||||||
|
- NOTEBOOK_TOKEN value "your-token" is a user specified string.
|
||||||
|
- http_proxy/https_proxy is to specify http proxy/https_proxy.
|
||||||
|
- RUNTIME_SPARK_MASTER is to specify spark master, which should be `k8s://https://<k8s-apiserver-host>:<k8s-apiserver-port>` or `spark://<spark-master-host>:<spark-master-port>`.
|
||||||
|
- RUNTIME_K8S_SERVICE_ACCOUNT is service account for driver pod. Please refer to k8s [RBAC](https://spark.apache.org/docs/latest/running-on-kubernetes.html#rbac).
|
||||||
|
- RUNTIME_K8S_SPARK_IMAGE is the k8s image.
|
||||||
|
- RUNTIME_PERSISTENT_VOLUME_CLAIM is to specify [Kubernetes volume](https://spark.apache.org/docs/latest/running-on-kubernetes.html#volume-mounts) mount. We are supposed to use volume mount to store or receive data.
|
||||||
|
- RUNTIME_DRIVER_HOST/RUNTIME_DRIVER_PORT is to specify driver localhost and port number (only required when submitting jobs via kubernetes client mode).
|
||||||
|
- Other environment variables are for spark configuration setting. The default values in this image are listed above. Replace the values as you need.
|
||||||
|
|
||||||
|
Once the container is created, execute the container:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
sudo docker exec -it <containerID> bash
|
||||||
|
```
|
||||||
|
|
||||||
|
You will login into the container and see this as the output:
|
||||||
|
|
||||||
|
```
|
||||||
|
root@[hostname]:/opt/spark/work-dir#
|
||||||
|
```
|
||||||
|
|
||||||
|
`/opt/spark/work-dir` is the spark work path.
|
||||||
|
|
||||||
|
The `/opt` directory contains:
|
||||||
|
|
||||||
|
- download-analytics-zoo.sh is used for downloading Analytics-Zoo distributions.
|
||||||
|
- start-notebook-spark.sh is used for starting the jupyter notebook on standard spark cluster.
|
||||||
|
- start-notebook-k8s.sh is used for starting the jupyter notebook on k8s cluster.
|
||||||
|
- analytics-zoo-x.x-SNAPSHOT is `ANALYTICS_ZOO_HOME`, which is the home of Analytics Zoo distribution.
|
||||||
|
- analytics-zoo-examples directory contains downloaded python example code.
|
||||||
|
- jdk is the jdk home.
|
||||||
|
- spark is the spark home.
|
||||||
|
- redis is the redis home.
|
||||||
|
|
||||||
|
### **3. Run Analytics Zoo Examples on k8s**
|
||||||
|
|
||||||
|
_**Note**: Please make sure `kubectl` has appropriate permission to create, list and delete pod._
|
||||||
|
|
||||||
|
_**Note**: Please refer to section 4 for some know issues._
|
||||||
|
|
||||||
|
#### **3.1 K8s client mode**
|
||||||
|
|
||||||
|
We recommend using `init_orca_context` at the very beginning of your code (e.g. in script.py) to initiate and run Analytics Zoo on standard K8s clusters in [client mode](http://spark.apache.org/docs/latest/running-on-kubernetes.html#client-mode).
|
||||||
|
|
||||||
|
```python
|
||||||
|
from zoo.orca import init_orca_context
|
||||||
|
|
||||||
|
init_orca_context(cluster_mode="k8s", master="k8s://https://<k8s-apiserver-host>:<k8s-apiserver-port>",
|
||||||
|
container_image="intelanalytics/hyper-zoo:latest",
|
||||||
|
num_nodes=2, cores=2,
|
||||||
|
conf={"spark.driver.host": "x.x.x.x",
|
||||||
|
"spark.driver.port": "x"})
|
||||||
|
```
|
||||||
|
|
||||||
|
Execute `python script.py` to run your program on k8s cluster directly.
|
||||||
|
|
||||||
|
#### **3.2 K8s cluster mode**
|
||||||
|
|
||||||
|
For k8s [cluster mode](https://spark.apache.org/docs/2.4.5/running-on-kubernetes.html#cluster-mode), you can call `init_orca_context` and specify cluster_mode to be "spark-submit" in your python script (e.g. in script.py):
|
||||||
|
|
||||||
|
```python
|
||||||
|
from zoo.orca import init_orca_context
|
||||||
|
|
||||||
|
init_orca_context(cluster_mode="spark-submit")
|
||||||
|
```
|
||||||
|
|
||||||
|
Use spark-submit to submit your Analytics Zoo program:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
${ANALYTICS_ZOO_HOME}/bin/spark-submit-python-with-zoo.sh \
|
||||||
|
--master k8s://https://<k8s-apiserver-host>:<k8s-apiserver-port> \
|
||||||
|
--deploy-mode cluster \
|
||||||
|
--name analytics-zoo \
|
||||||
|
--conf spark.kubernetes.container.image="intelanalytics/hyper-zoo:latest" \
|
||||||
|
--conf spark.executor.instances=1 \
|
||||||
|
--executor-memory 10g \
|
||||||
|
--driver-memory 10g \
|
||||||
|
--executor-cores 8 \
|
||||||
|
--num-executors 2 \
|
||||||
|
file:///path/script.py
|
||||||
|
```
|
||||||
|
|
||||||
|
#### **3.3 Run Jupyter Notebooks**
|
||||||
|
|
||||||
|
After a Docker container is launched and user login into the container, you can start the Jupyter Notebook service inside the container.
|
||||||
|
|
||||||
|
In the `/opt` directory, run this command line to start the Jupyter Notebook service:
|
||||||
|
```
|
||||||
|
./start-notebook-k8s.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
You will see the output message like below. This means the Jupyter Notebook service has started successfully within the container.
|
||||||
|
```
|
||||||
|
[I 23:51:08.456 NotebookApp] Serving notebooks from local directory: /opt/analytics-zoo-0.11.0-SNAPSHOT/apps
|
||||||
|
[I 23:51:08.456 NotebookApp] Jupyter Notebook 6.2.0 is running at:
|
||||||
|
[I 23:51:08.456 NotebookApp] http://xxxx:12345/?token=...
|
||||||
|
[I 23:51:08.457 NotebookApp] or http://127.0.0.1:12345/?token=...
|
||||||
|
[I 23:51:08.457 NotebookApp] Use Control-C to stop this server and shut down all kernels (twice to skip confirmation).
|
||||||
|
```
|
||||||
|
|
||||||
|
Then, refer [docker guide](./docker.md) to open Jupyter Notebook service from a browser and run notebook.
|
||||||
|
|
||||||
|
#### **3.4 Run Scala programs**
|
||||||
|
|
||||||
|
Use spark-submit to submit your Analytics Zoo program. e.g., run [anomalydetection](https://github.com/intel-analytics/analytics-zoo/tree/master/zoo/src/main/scala/com/intel/analytics/zoo/examples/anomalydetection) example (running in either local mode or cluster mode) as follows:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
${SPARK_HOME}/bin/spark-submit \
|
||||||
|
--master ${RUNTIME_SPARK_MASTER} \
|
||||||
|
--deploy-mode client \
|
||||||
|
--conf spark.driver.host=${RUNTIME_DRIVER_HOST} \
|
||||||
|
--conf spark.driver.port=${RUNTIME_DRIVER_PORT} \
|
||||||
|
--conf spark.kubernetes.authenticate.driver.serviceAccountName=${RUNTIME_K8S_SERVICE_ACCOUNT} \
|
||||||
|
--name analytics-zoo \
|
||||||
|
--conf spark.kubernetes.container.image=${RUNTIME_K8S_SPARK_IMAGE} \
|
||||||
|
--conf spark.executor.instances=${RUNTIME_EXECUTOR_INSTANCES} \
|
||||||
|
--conf spark.kubernetes.driver.volumes.persistentVolumeClaim.${RUNTIME_PERSISTENT_VOLUME_CLAIM}.options.claimName=${RUNTIME_PERSISTENT_VOLUME_CLAIM} \
|
||||||
|
--conf spark.kubernetes.driver.volumes.persistentVolumeClaim.${RUNTIME_PERSISTENT_VOLUME_CLAIM}.mount.path=/path \
|
||||||
|
--conf spark.kubernetes.executor.volumes.persistentVolumeClaim.${RUNTIME_PERSISTENT_VOLUME_CLAIM}.options.claimName=${RUNTIME_PERSISTENT_VOLUME_CLAIM} \
|
||||||
|
--conf spark.kubernetes.executor.volumes.persistentVolumeClaim.${RUNTIME_PERSISTENT_VOLUME_CLAIM}.mount.path=/path \
|
||||||
|
--conf spark.kubernetes.driver.label.<your-label>=true \
|
||||||
|
--conf spark.kubernetes.executor.label.<your-label>=true \
|
||||||
|
--executor-cores ${RUNTIME_EXECUTOR_CORES} \
|
||||||
|
--executor-memory ${RUNTIME_EXECUTOR_MEMORY} \
|
||||||
|
--total-executor-cores ${RUNTIME_TOTAL_EXECUTOR_CORES} \
|
||||||
|
--driver-cores ${RUNTIME_DRIVER_CORES} \
|
||||||
|
--driver-memory ${RUNTIME_DRIVER_MEMORY} \
|
||||||
|
--properties-file ${ANALYTICS_ZOO_HOME}/conf/spark-analytics-zoo.conf \
|
||||||
|
--py-files ${ANALYTICS_ZOO_HOME}/lib/analytics-zoo-bigdl_${BIGDL_VERSION}-spark_${SPARK_VERSION}-${ANALYTICS_ZOO_VERSION}-python-api.zip \
|
||||||
|
--conf spark.driver.extraJavaOptions=-Dderby.stream.error.file=/tmp \
|
||||||
|
--conf spark.sql.catalogImplementation='in-memory' \
|
||||||
|
--conf spark.driver.extraClassPath=${ANALYTICS_ZOO_HOME}/lib/analytics-zoo-bigdl_${BIGDL_VERSION}-spark_${SPARK_VERSION}-${ANALYTICS_ZOO_VERSION}-jar-with-dependencies.jar \
|
||||||
|
--conf spark.executor.extraClassPath=${ANALYTICS_ZOO_HOME}/lib/analytics-zoo-bigdl_${BIGDL_VERSION}-spark_${SPARK_VERSION}-${ANALYTICS_ZOO_VERSION}-jar-with-dependencies.jar \
|
||||||
|
--class com.intel.analytics.zoo.examples.anomalydetection.AnomalyDetection \
|
||||||
|
${ANALYTICS_ZOO_HOME}/lib/analytics-zoo-bigdl_${BIGDL_VERSION}-spark_${SPARK_VERSION}-${ANALYTICS_ZOO_VERSION}-python-api.zip \
|
||||||
|
--inputDir /path
|
||||||
|
```
|
||||||
|
|
||||||
|
Options:
|
||||||
|
|
||||||
|
- --master: the spark mater, must be a URL with the format `k8s://https://<k8s-apiserver-host>:<k8s-apiserver-port>`.
|
||||||
|
- --deploy-mode: submit application in client/cluster mode.
|
||||||
|
- --name: the Spark application name.
|
||||||
|
- --conf: to specify k8s service account, container image to use for the Spark application, driver volumes name and path, label of pods, spark driver and executor configuration, etc. You can refer to [spark configuration](https://spark.apache.org/docs/latest/configuration.html) and [spark on k8s configuration](https://spark.apache.org/docs/latest/running-on-kubernetes.html#configuration) for more details.
|
||||||
|
- --properties-file: the customized conf properties.
|
||||||
|
- --py-files: the extra python packages is needed.
|
||||||
|
- --class: scala example class name.
|
||||||
|
- --inputDir: input data path of the anomaly detection example. The data path is the mounted filesystem of the host. Refer to more details by [Kubernetes Volumes](https://spark.apache.org/docs/latest/running-on-kubernetes.html#using-kubernetes-volumes).
|
||||||
|
|
||||||
|
### **4 Know issues**
|
||||||
|
|
||||||
|
This section shows some common topics for both client mode and cluster mode.
|
||||||
|
|
||||||
|
#### **4.1 How to retain executor logs for debugging?**
|
||||||
|
|
||||||
|
The k8s would delete the pod once the executor failed in client mode and cluster mode. If you want to get the content of executor log, you could set "temp-dir" to a mounted network file system (NFS) storage to change the log dir to replace the former one. In this case, you may meet `JSONDecodeError` because multiple executors would write logs to the same physical folder and cause conflicts. The solutions are in the next section.
|
||||||
|
|
||||||
|
```python
|
||||||
|
init_orca_context(..., extra_params = {"temp-dir": "/zoo/"})
|
||||||
|
```
|
||||||
|
|
||||||
|
#### **4.2 How to deal with "JSONDecodeError" ?**
|
||||||
|
|
||||||
|
If you set `temp-dir` to a mounted nfs storage and use multiple executors , you may meet `JSONDecodeError` since multiple executors would write to the same physical folder and cause conflicts. Do not mount `temp-dir` to shared storage is one option to avoid conflicts. But if you debug ray on k8s, you need to output logs to a shared storage. In this case, you could set num-nodes to 1. After testing, you can remove `temp-dir` setting and run multiple executors.
|
||||||
|
|
||||||
|
#### **4.3 How to use NFS?**
|
||||||
|
|
||||||
|
If you want to save some files out of pod's lifecycle, such as logging callbacks or tensorboard callbacks, you need to set the output dir to a mounted persistent volume dir. Let NFS be a simple example.
|
||||||
|
|
||||||
|
Use NFS in client mode:
|
||||||
|
|
||||||
|
```python
|
||||||
|
init_orca_context(cluster_mode="k8s", ...,
|
||||||
|
conf={...,
|
||||||
|
"spark.kubernetes.executor.volumes.persistentVolumeClaim.nfsvolumeclaim.options.claimName":"nfsvolumeclaim",
|
||||||
|
"spark.kubernetes.executor.volumes.persistentVolumeClaim.nfsvolumeclaim.mount.path": "/zoo"
|
||||||
|
})
|
||||||
|
```
|
||||||
|
|
||||||
|
Use NFS in cluster mode:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
${ANALYTICS_ZOO_HOME}/bin/spark-submit-python-with-zoo.sh \
|
||||||
|
--... ...\
|
||||||
|
--conf spark.kubernetes.executor.volumes.persistentVolumeClaim.nfsvolumeclaim.options.claimName="nfsvolumeclaim" \
|
||||||
|
--conf spark.kubernetes.executor.volumes.persistentVolumeClaim.nfsvolumeclaim.mount.path="/zoo" \
|
||||||
|
--conf spark.kubernetes.driver.volumes.persistentVolumeClaim.nfsvolumeclaim.options.claimName="nfsvolumeclaim" \
|
||||||
|
--conf spark.kubernetes.driver.volumes.persistentVolumeClaim.nfsvolumeclaim.mount.path="/zoo" \
|
||||||
|
file:///path/script.py
|
||||||
|
```
|
||||||
|
|
||||||
|
#### **4.4 How to deal with "RayActorError" ?**
|
||||||
|
|
||||||
|
"RayActorError" may caused by running out of the ray memory. If you meet this error, try to increase the memory for ray.
|
||||||
|
|
||||||
|
```python
|
||||||
|
init_orca_context(..., exra_executor_memory_for_ray=100g)
|
||||||
|
```
|
||||||
|
|
||||||
|
#### **4.5 How to set proper "steps_per_epoch" and "validation steps" ?**
|
||||||
|
|
||||||
|
The `steps_per_epoch` and `validation_steps` should equal to numbers of dataset divided by batch size if you want to train all dataset. The `steps_per_epoch` and `validation_steps` do not relate to the `num_nodes` when total dataset and batch size are fixed. For example, you set `num_nodes` to 1, and set `steps_per_epoch` to 6. If you change the `num_nodes` to 3, the `steps_per_epoch` should still be 6.
|
||||||
|
|
||||||
|
#### **4.6 Others**
|
||||||
|
|
||||||
|
`spark.kubernetes.container.image.pullPolicy` needs to be specified as `always` if you need to update your spark executor image for k8s.
|
||||||
|
|
||||||
|
### **5. Access logs and clear pods**
|
||||||
|
|
||||||
|
When application is running, it’s possible to stream logs on the driver pod:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
$ kubectl logs <spark-driver-pod>
|
||||||
|
```
|
||||||
|
|
||||||
|
To check pod status or to get some basic information around pod using:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
$ kubectl describe pod <spark-driver-pod>
|
||||||
|
```
|
||||||
|
|
||||||
|
You can also check other pods using the similar way.
|
||||||
|
|
||||||
|
After finishing running the application, deleting the driver pod:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
$ kubectl delete <spark-driver-pod>
|
||||||
|
```
|
||||||
|
|
||||||
|
Or clean up the entire spark application by pod label:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
$ kubectl delete pod -l <pod label>
|
||||||
|
```
|
||||||
72
docs/readthedocs/source/doc/UserGuide/notebooks.md
Normal file
|
|
@ -0,0 +1,72 @@
|
||||||
|
# Colab notebooks
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Quick Start
|
||||||
|
|
||||||
|
- **TensorFlow 1.15 Quickstart**
|
||||||
|
|
||||||
|
[Run in Google Colab](https://colab.research.google.com/github/intel-analytics/analytics-zoo/blob/master/docs/docs/colab-notebook/orca/quickstart/tf_lenet_mnist.ipynb) [View source on GitHub](https://github.com/intel-analytics/analytics-zoo/blob/master/docs/docs/colab-notebook/orca/quickstart/tf_lenet_mnist.ipynb)
|
||||||
|
|
||||||
|
- **Keras 2.3 Quickstart**
|
||||||
|
|
||||||
|
[Run in Google Colab](https://colab.research.google.com/github/intel-analytics/analytics-zoo/blob/master/docs/docs/colab-notebook/orca/quickstart/keras_lenet_mnist.ipynb) [View source on GitHub](https://github.com/intel-analytics/analytics-zoo/blob/master/docs/docs/colab-notebook/orca/quickstart/keras_lenet_mnist.ipynb)
|
||||||
|
|
||||||
|
- **TensorFlow 2 Quickstart**
|
||||||
|
|
||||||
|
[Run in Google Colab](https://colab.research.google.com/github/intel-analytics/analytics-zoo/blob/master/docs/docs/colab-notebook/orca/quickstart/tf2_keras_lenet_mnist.ipynb) [View source on GitHub](https://github.com/intel-analytics/analytics-zoo/blob/master/docs/docs/colab-notebook/orca/quickstart/tf2_keras_lenet_mnist.ipynb)
|
||||||
|
|
||||||
|
- **PyTorch Quickstart**
|
||||||
|
|
||||||
|
[Run in Google Colab](https://colab.research.google.com/github/intel-analytics/analytics-zoo/blob/master/docs/docs/colab-notebook/orca/quickstart/pytorch_lenet_mnist.ipynb) [View source on GitHub](https://github.com/intel-analytics/analytics-zoo/blob/master/docs/docs/colab-notebook/orca/quickstart/pytorch_lenet_mnist.ipynb)
|
||||||
|
|
||||||
|
|
||||||
|
## Common Use Case
|
||||||
|
|
||||||
|
- **Use `torch.distributed` in Orca**
|
||||||
|
|
||||||
|
[Run in Google Colab](https://colab.research.google.com/github/intel-analytics/analytics-zoo/blob/master/docs/docs/colab-notebook/orca/quickstart/pytorch_distributed_lenet_mnist.ipynb) [View source on GitHub](https://github.com/intel-analytics/analytics-zoo/blob/master/docs/docs/colab-notebook/orca/quickstart/pytorch_distributed_lenet_mnist.ipynb)
|
||||||
|
|
||||||
|
|
||||||
|
- **Use Spark Dataframe for Deep Learning**
|
||||||
|
|
||||||
|
[Run in Google Colab](https://colab.research.google.com/github/intel-analytics/analytics-zoo/blob/master/docs/docs/colab-notebook/orca/quickstart/ncf_dataframe.ipynb) [View source on GitHub](https://github.com/intel-analytics/analytics-zoo/blob/master/docs/docs/colab-notebook/orca/quickstart/ncf_dataframe.ipynb)
|
||||||
|
|
||||||
|
- **Use Distributed Pandas for Deep Learning**
|
||||||
|
|
||||||
|
[Run in Google Colab](https://colab.research.google.com/github/intel-analytics/analytics-zoo/blob/master/docs/docs/colab-notebook/orca/quickstart/ncf_xshards_pandas.ipynb) [View source on GitHub](https://github.com/intel-analytics/analytics-zoo/blob/master/docs/docs/colab-notebook/orca/quickstart/ncf_xshards_pandas.ipynb)
|
||||||
|
|
||||||
|
- **Use AutoML for Time-Series Forecasting**
|
||||||
|
|
||||||
|
[Run in Google Colab](https://colab.research.google.com/github/intel-analytics/analytics-zoo/blob/master/docs/docs/colab-notebook/chronos/chronos_autots_nyc_taxi.ipynb) [View source on GitHub](https://github.com/intel-analytics/analytics-zoo/blob/master/docs/docs/colab-notebook/chronos/chronos_autots_nyc_taxi.ipynb)
|
||||||
|
|
||||||
|
- **Use TSDataset and Forecaster for Time-Series Forecasting**
|
||||||
|
|
||||||
|
[Run in Google Colab](https://colab.research.google.com/github/intel-analytics/analytics-zoo/blob/master/docs/docs/colab-notebook/chronos/chronos_nyc_taxi_tsdataset_forecaster.ipynb) [View source on GitHub](https://github.com/intel-analytics/analytics-zoo/blob/master/docs/docs/colab-notebook/chronos/chronos_nyc_taxi_tsdataset_forecaster.ipynb)
|
||||||
|
|
||||||
|
- **Use Anomaly Detector for Unsupervised Anomaly Detection**
|
||||||
|
|
||||||
|
[Run in Google Colab](https://colab.research.google.com/github/intel-analytics/analytics-zoo/blob/master/docs/docs/colab-notebook/chronos/chronos_minn_traffic_anomaly_detector.ipynb) [View source on GitHub](https://github.com/intel-analytics/analytics-zoo/blob/master/docs/docs/colab-notebook/chronos/chronos_minn_traffic_anomaly_detector.ipynb)
|
||||||
|
|
||||||
|
- **Enable AutoML for PyTorch**
|
||||||
|
|
||||||
|
[Run in Google Colab](https://colab.research.google.com/github/intel-analytics/analytics-zoo/blob/master/docs/docs/colab-notebook/orca/quickstart/autoestimator_pytorch_lenet_mnist.ipynb) [View source on GitHub](https://github.com/intel-analytics/analytics-zoo/blob/master/docs/docs/colab-notebook/orca/quickstart/autoestimator_pytorch_lenet_mnist.ipynb)
|
||||||
|
|
||||||
|
- **Use AutoXGBoost to auto-tune XGBoost parameters**
|
||||||
|
|
||||||
|
[Run in Google Colab](https://colab.research.google.com/github/intel-analytics/analytics-zoo/blob/master/docs/docs/colab-notebook/orca/quickstart/autoxgboost_regressor_sklearn_boston.ipynb) [View source on GitHub](https://github.com/intel-analytics/analytics-zoo/blob/master/docs/docs/colab-notebook/orca/quickstart/autoxgboost_regressor_sklearn_boston.ipynb)
|
||||||
|
|
||||||
|
|
||||||
|
## AI Application Case
|
||||||
|
|
||||||
|
- **Use Pytorch for Fashion MNIST Image Classification**
|
||||||
|
|
||||||
|
[Run in Google Colab](https://colab.research.google.com/github/intel-analytics/analytics-zoo/blob/master/docs/docs/colab-notebook/orca/examples/fashion_mnist_bigdl.ipynb) [View source on GitHub](https://github.com/intel-analytics/analytics-zoo/blob/master/docs/docs/colab-notebook/orca/examples/fashion_mnist_bigdl.ipynb)
|
||||||
|
|
||||||
|
- **Use Keras for Text Classification**
|
||||||
|
|
||||||
|
[Run in Google Colab](https://colab.research.google.com/github/intel-analytics/analytics-zoo/blob/master/docs/docs/colab-notebook/orca/examples/basic_text_classification.ipynb) [View source on GitHub](https://github.com/intel-analytics/analytics-zoo/blob/master/docs/docs/colab-notebook/orca/examples/basic_text_classification.ipynb)
|
||||||
|
|
||||||
|
- **Use Pytorch for Image Super Resolution**
|
||||||
|
|
||||||
|
[Run in Google Colab](https://colab.research.google.com/github/intel-analytics/analytics-zoo/blob/master/docs/docs/colab-notebook/orca/examples/super_resolution.ipynb) [View source on GitHub](https://github.com/intel-analytics/analytics-zoo/blob/master/docs/docs/colab-notebook/orca/examples/super_resolution.ipynb)
|
||||||
150
docs/readthedocs/source/doc/UserGuide/python.md
Normal file
|
|
@ -0,0 +1,150 @@
|
||||||
|
# Python User Guide
|
||||||
|
|
||||||
|
---
|
||||||
|
### **1. Install**
|
||||||
|
- We recommend using [conda](https://docs.conda.io/projects/conda/en/latest/user-guide/install/) to prepare the Python environment as follows:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
conda create -n zoo python=3.7 # "zoo" is conda environment name, you can use any name you like.
|
||||||
|
conda activate zoo
|
||||||
|
```
|
||||||
|
|
||||||
|
- You need to install JDK in the environment, and properly set the environment variable `JAVA_HOME`. __JDK8__ is highly recommended.
|
||||||
|
|
||||||
|
You may take the following commands as a reference for installing [OpenJDK](https://openjdk.java.net/install/):
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# For Ubuntu
|
||||||
|
sudo apt-get install openjdk-8-jre
|
||||||
|
export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64/
|
||||||
|
|
||||||
|
# For CentOS
|
||||||
|
su -c "yum install java-1.8.0-openjdk"
|
||||||
|
export JAVA_HOME=/usr/lib/jvm/java-1.8.0-openjdk-1.8.0.282.b08-1.el7_9.x86_64/jre
|
||||||
|
|
||||||
|
export PATH=$PATH:$JAVA_HOME/bin
|
||||||
|
java -version # Verify the version of JDK.
|
||||||
|
```
|
||||||
|
|
||||||
|
#### **1.1 Official Release**
|
||||||
|
|
||||||
|
You can install the latest release version of Analytics Zoo as follows:
|
||||||
|
```bash
|
||||||
|
pip install analytics-zoo
|
||||||
|
```
|
||||||
|
_**Note:** Installing Analytics Zoo will automatically install `bigdl==0.13.0`, `pyspark==2.4.6`, `conda-pack==0.3.1` and their dependencies if they haven't been detected in your conda environment._
|
||||||
|
|
||||||
|
#### **1.2 Nightly Build**
|
||||||
|
|
||||||
|
You can install the latest nightly build of Analytics Zoo as follows:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pip install --pre --upgrade analytics-zoo
|
||||||
|
```
|
||||||
|
|
||||||
|
Alternatively, you can find the list of the nightly build versions [here](https://pypi.org/project/analytics-zoo/#history), and install a specific version as follows:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pip install analytics-zoo=version
|
||||||
|
```
|
||||||
|
|
||||||
|
_**Note:** If you are using a custom URL of Python Package Index, you may need to check whether the latest packages have been sync'ed with pypi.
|
||||||
|
Or you can add the option `-i https://pypi.python.org/simple` when pip install to use pypi as the index-url._
|
||||||
|
|
||||||
|
---
|
||||||
|
### **2. Run**
|
||||||
|
|
||||||
|
_**Note:** Installing Analytics Zoo from pip will automatically install `pyspark`. To avoid possible conflicts, you are highly recommended to **unset the environment variable `SPARK_HOME`** if it exists in your environment._
|
||||||
|
|
||||||
|
|
||||||
|
#### **2.1 Interactive Shell**
|
||||||
|
|
||||||
|
You may test if the installation is successful using the interactive Python shell as follows:
|
||||||
|
|
||||||
|
* Type `python` in the command line to start a REPL.
|
||||||
|
* Try to run the example code below to verify the installation:
|
||||||
|
|
||||||
|
```python
|
||||||
|
import zoo
|
||||||
|
from zoo.orca import init_orca_context
|
||||||
|
|
||||||
|
print(zoo.__version__) # Verify the version of analytics-zoo.
|
||||||
|
sc = init_orca_context() # Initiation of analytics-zoo on the underlying cluster.
|
||||||
|
```
|
||||||
|
|
||||||
|
#### **2.2 Jupyter Notebook**
|
||||||
|
|
||||||
|
You can start the Jupyter notebook as you normally do using the following command and run Analytics Zoo programs directly in a Jupyter notebook:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
jupyter notebook --notebook-dir=./ --ip=* --no-browser
|
||||||
|
```
|
||||||
|
|
||||||
|
#### **2.3 Python Script**
|
||||||
|
|
||||||
|
You can directly write Analytics Zoo programs in a Python file (e.g. script.py) and run in the command line as a normal Python program:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python script.py
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
### **3. Python Dependencies**
|
||||||
|
|
||||||
|
We recommend using [conda](https://docs.conda.io/projects/conda/en/latest/user-guide/install/) to manage your Python dependencies. Libraries installed in the current conda environment will be automatically distributed to the cluster when calling `init_orca_context`. You can also add extra dependencies as `.py`, `.zip` and `.egg` files by specifying `extra_python_lib` argument in `init_orca_context`.
|
||||||
|
|
||||||
|
For more details, please refer to [Orca Context](../Orca/Overview/orca-context.md).
|
||||||
|
|
||||||
|
---
|
||||||
|
### **4. Compatibility**
|
||||||
|
|
||||||
|
Analytics Zoo has been tested on __Python 3.6 and 3.7__ with the following library versions:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pyspark==2.4.6
|
||||||
|
ray==1.2.0
|
||||||
|
tensorflow==1.15.0 or >2.0
|
||||||
|
pytorch>=1.5.0
|
||||||
|
torchvision>=0.6.0
|
||||||
|
horovod==0.19.2
|
||||||
|
mxnet>=1.6.0
|
||||||
|
bayesian-optimization==1.1.0
|
||||||
|
dask==2.14.0
|
||||||
|
h5py==2.10.0
|
||||||
|
numpy==1.18.1
|
||||||
|
opencv-python==4.2.0.34
|
||||||
|
pandas==1.0.3
|
||||||
|
Pillow==7.1.1
|
||||||
|
protobuf==3.12.0
|
||||||
|
psutil==5.7.0
|
||||||
|
py4j==0.10.7
|
||||||
|
redis==3.4.1
|
||||||
|
scikit-learn==0.22.2.post1
|
||||||
|
scipy==1.4.1
|
||||||
|
tensorboard==1.15.0
|
||||||
|
tensorboardX>=2.1
|
||||||
|
tensorflow-datasets==3.2.0
|
||||||
|
tensorflow-estimator==1.15.1
|
||||||
|
tensorflow-gan==2.0.0
|
||||||
|
tensorflow-hub==0.8.0
|
||||||
|
tensorflow-metadata==0.21.1
|
||||||
|
tensorflow-probability==0.7.0
|
||||||
|
Theano==1.0.4
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
### **5. Known Issues**
|
||||||
|
|
||||||
|
- If you meet the following error when `pip install analytics-zoo`:
|
||||||
|
```
|
||||||
|
ERROR: Could not find a version that satisfies the requirement pypandoc (from versions: none)
|
||||||
|
ERROR: No matching distribution found for pypandoc
|
||||||
|
Could not import pypandoc - required to package PySpark
|
||||||
|
Traceback (most recent call last):
|
||||||
|
File "/root/anaconda3/lib/python3.8/site-packages/setuptools/installer.py", line 126, in fetch_build_egg
|
||||||
|
subprocess.check_call(cmd)
|
||||||
|
File "/root/anaconda3/lib/python3.8/subprocess.py", line 364, in check_call
|
||||||
|
raise CalledProcessError(retcode, cmd)
|
||||||
|
subprocess.CalledProcessError: Command '['/root/anaconda3/bin/python', '-m', 'pip', '--disable-pip-version-check', 'wheel', '--no-deps', '-w', '/tmp/tmprefr87ue', '--quiet', 'pypandoc']' returned non-zero exit status 1.
|
||||||
|
```
|
||||||
|
This is actually caused by `pip install pyspark` in your Python environment. You can fix it by running `pip install pypandoc` first and then `pip install analytics-zoo`.
|
||||||
163
docs/readthedocs/source/doc/UserGuide/scala.md
Normal file
|
|
@ -0,0 +1,163 @@
|
||||||
|
# Scala User Guide
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### **1. Try Analytics Zoo Examples**
|
||||||
|
This section will show you how to download Analytics Zoo prebuild packages and run the build-in examples.
|
||||||
|
|
||||||
|
#### **1.1 Download and config**
|
||||||
|
You can download the Analytics Zoo official releases and nightly build from the [Release Page](../release.md). After extracting the prebuild package, you need to set environment variables **ANALYTICS_ZOO_HOME** and **SPARK_HOME** as follows:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
export SPARK_HOME=folder path where you extract the Spark package
|
||||||
|
export ANALYTICS_ZOO_HOME=folder path where you extract the Analytics Zoo package
|
||||||
|
```
|
||||||
|
|
||||||
|
#### **1.2 Use Spark interactive shell**
|
||||||
|
You can try Analytics Zoo using the Spark interactive shell as follows:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
${ANALYTICS_ZOO_HOME}/bin/spark-shell-with-zoo.sh --master local[2]
|
||||||
|
```
|
||||||
|
|
||||||
|
You will then see a welcome message like below:
|
||||||
|
|
||||||
|
```
|
||||||
|
Welcome to
|
||||||
|
____ __
|
||||||
|
/ __/__ ___ _____/ /__
|
||||||
|
_\ \/ _ \/ _ `/ __/ '_/
|
||||||
|
/___/ .__/\_,_/_/ /_/\_\ version 2.4.3
|
||||||
|
/_/
|
||||||
|
|
||||||
|
Using Scala version 2.11.12 (Java HotSpot(TM) 64-Bit Server VM, Java 1.8.0_112)
|
||||||
|
Type in expressions to have them evaluated.
|
||||||
|
Type :help for more information.
|
||||||
|
```
|
||||||
|
|
||||||
|
Before you try Analytics Zoo APIs, you should use `initNNcontext` to verify your environment:
|
||||||
|
|
||||||
|
```scala
|
||||||
|
scala> import com.intel.analytics.zoo.common.NNContext
|
||||||
|
import com.intel.analytics.zoo.common.NNContext
|
||||||
|
|
||||||
|
scala> val sc = NNContext.initNNContext("Run Example")
|
||||||
|
2021-01-26 10:19:52 WARN SparkContext:66 - Using an existing SparkContext; some configuration may not take effect.
|
||||||
|
2021-01-26 10:19:53 WARN SparkContext:66 - Using an existing SparkContext; some configuration may not take effect.
|
||||||
|
sc: org.apache.spark.SparkContext = org.apache.spark.SparkContext@487f025
|
||||||
|
```
|
||||||
|
|
||||||
|
#### **1.3 Run Analytics Zoo examples**
|
||||||
|
|
||||||
|
You can run an Analytics Zoo example, e.g., the [Wide & Deep Recommendation](https://github.com/intel-analytics/analytics-zoo/tree/master/zoo/src/main/scala/com/intel/analytics/zoo/examples/recommendation), as a standard Spark program (running in either local mode or cluster mode) as follows:
|
||||||
|
|
||||||
|
1. Download Census Income Dataset to `./data/census` from [here](https://archive.ics.uci.edu/ml/datasets/Census+Income).
|
||||||
|
|
||||||
|
2. Run the following command:
|
||||||
|
```bash
|
||||||
|
# Spark local mode
|
||||||
|
${ANALYTICS_ZOO_HOME}/bin/spark-submit-scala-with-zoo.sh \
|
||||||
|
--master local[2] \
|
||||||
|
--class com.intel.analytics.zoo.examples.recommendation.WideAndDeepExample \
|
||||||
|
dist/lib/analytics-zoo-bigdl_0.12.1-spark_2.4.3-0.9.0-jar-with-dependencies.jar \ #change to your jar file if your download is not spark_2.4.3-0.9.0
|
||||||
|
--inputDir ./data/census \
|
||||||
|
--batchSize 320 \
|
||||||
|
--maxEpoch 20 \
|
||||||
|
--dataset census
|
||||||
|
|
||||||
|
# Spark standalone mode
|
||||||
|
${ANALYTICS_ZOO_HOME}/bin/spark-submit-scala-with-zoo.sh \
|
||||||
|
--master spark://... \ #add your spark master address
|
||||||
|
--executor-cores cores_per_executor \
|
||||||
|
--total-executor-cores total_cores_for_the_job \
|
||||||
|
--class com.intel.analytics.zoo.examples.recommendation.WideAndDeepExample \
|
||||||
|
dist/lib/analytics-zoo-bigdl_0.12.1-spark_2.4.3-0.9.0-jar-with-dependencies.jar \ #change to your jar file if your download is not spark_2.4.3-0.9.0
|
||||||
|
--inputDir ./data/census \
|
||||||
|
--batchSize 320 \
|
||||||
|
--maxEpoch 20 \
|
||||||
|
--dataset census
|
||||||
|
|
||||||
|
# Spark yarn client mode, please make sure the right HADOOP_CONF_DIR is set
|
||||||
|
${ANALYTICS_ZOO_HOME}/bin/spark-submit-scala-with-zoo.sh \
|
||||||
|
--master yarn \
|
||||||
|
--deploy-mode client \
|
||||||
|
--executor-cores cores_per_executor \
|
||||||
|
--num-executors executors_number \
|
||||||
|
--class com.intel.analytics.zoo.examples.recommendation.WideAndDeepExample \
|
||||||
|
dist/lib/analytics-zoo-bigdl_0.12.1-spark_2.4.3-0.9.0-jar-with-dependencies.jar \ #change to your jar file if your download is not spark_2.4.3-0.9.0
|
||||||
|
--inputDir ./data/census \
|
||||||
|
--batchSize 320 \
|
||||||
|
--maxEpoch 20 \
|
||||||
|
--dataset census
|
||||||
|
|
||||||
|
# Spark yarn cluster mode, please make sure the right HADOOP_CONF_DIR is set
|
||||||
|
${ANALYTICS_ZOO_HOME}/bin/spark-submit-scala-with-zoo.sh \
|
||||||
|
--master yarn \
|
||||||
|
--deploy-mode cluster \
|
||||||
|
--executor-cores cores_per_executor \
|
||||||
|
--num-executors executors_number \
|
||||||
|
--class com.intel.analytics.zoo.examples.recommendation.WideAndDeepExample \
|
||||||
|
dist/lib/analytics-zoo-bigdl_0.12.1-spark_2.4.3-0.9.0-jar-with-dependencies.jar \ #change to your jar file if your download is not spark_2.4.3-0.9.0
|
||||||
|
--inputDir ./data/census \
|
||||||
|
--batchSize 320 \
|
||||||
|
--maxEpoch 20 \
|
||||||
|
--dataset census
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### **2. Build Analytics Zoo Applications**
|
||||||
|
|
||||||
|
This section will show you how to build your own deep learning project with Analytics Zoo.
|
||||||
|
|
||||||
|
#### **2.1 Add Analytics Zoo dependency**
|
||||||
|
##### **2.1.1 official Release**
|
||||||
|
Currently, Analytics Zoo releases are hosted on maven central; below is an example to add the Analytics Zoo dependency to your own project:
|
||||||
|
|
||||||
|
|
||||||
|
```xml
|
||||||
|
<dependency>
|
||||||
|
<groupId>com.intel.analytics.zoo</groupId>
|
||||||
|
<artifactId>analytics-zoo-bigdl_0.12.1-spark_2.4.3</artifactId>
|
||||||
|
<version>0.9.0</version>
|
||||||
|
</dependency>
|
||||||
|
```
|
||||||
|
|
||||||
|
You can find the other SPARK version [here](https://search.maven.org/search?q=analytics-zoo-bigdl), such as `spark_2.1.1`, `spark_2.2.1`, `spark_2.3.1`, `spark_3.0.0`.
|
||||||
|
|
||||||
|
|
||||||
|
SBT developers can use
|
||||||
|
```sbt
|
||||||
|
libraryDependencies += "com.intel.analytics.zoo" % "analytics-zoo-bigdl_0.12.1-spark_2.4.3" % "0.9.0"
|
||||||
|
```
|
||||||
|
|
||||||
|
##### **2.1.2 Nightly Build**
|
||||||
|
|
||||||
|
Currently, Analytics Zoo nightly build is hosted on [SonaType](https://oss.sonatype.org/content/groups/public/com/intel/analytics/zoo/).
|
||||||
|
|
||||||
|
To link your application with the latest Analytics Zoo nightly build, you should add some dependencies like [official releases](#11-official-release), but change `0.9.0` to the snapshot version (such as 0.10.0-snapshot), and add below repository to your pom.xml.
|
||||||
|
|
||||||
|
|
||||||
|
```xml
|
||||||
|
<repository>
|
||||||
|
<id>sonatype</id>
|
||||||
|
<name>sonatype repository</name>
|
||||||
|
<url>https://oss.sonatype.org/content/groups/public/</url>
|
||||||
|
<releases>
|
||||||
|
<enabled>true</enabled>
|
||||||
|
</releases>
|
||||||
|
<snapshots>
|
||||||
|
<enabled>true</enabled>
|
||||||
|
</snapshots>
|
||||||
|
</repository>
|
||||||
|
```
|
||||||
|
|
||||||
|
SBT developers can use
|
||||||
|
```sbt
|
||||||
|
resolvers += "ossrh repository" at "https://oss.sonatype.org/content/repositories/snapshots/"
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
#### **2.2 Build a Scala project**
|
||||||
|
To enable Analytics Zoo in project, you should add Analytics Zoo to your project's dependencies using maven or sbt. Here is a [simple MLP example](https://github.com/intel-analytics/zoo-tutorials/tree/master/scala/SimpleMlp) to show you how to use Analytics Zoo to build your own deep learning project using maven or sbt, and how to run the simple example in IDEA and spark-submit.
|
||||||
|
|
||||||
179
docs/readthedocs/source/doc/release.md
Normal file
|
|
@ -0,0 +1,179 @@
|
||||||
|
# Release Download
|
||||||
|
|
||||||
|
- **Release 0.12.0 nightly build**
|
||||||
|
<table border="1"
|
||||||
|
cellpadding="10"
|
||||||
|
>
|
||||||
|
<tr>
|
||||||
|
<td></td>
|
||||||
|
<td>BigDL 0.13.0</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>Spark 2.1.1 </td>
|
||||||
|
<td><a href="https://oss.sonatype.org/content/repositories/snapshots/com/intel/analytics/zoo/analytics-zoo-bigdl_0.13.0-spark_2.1.1/0.12.0-SNAPSHOT/">download</a></td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>Spark 2.2.1 </td>
|
||||||
|
<td><a href="https://oss.sonatype.org/content/repositories/snapshots/com/intel/analytics/zoo/analytics-zoo-bigdl_0.13.0-spark_2.2.1/0.12.0-SNAPSHOT/">download</a></td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>Spark 2.3.1 </td>
|
||||||
|
<td><a href="https://oss.sonatype.org/content/repositories/snapshots/com/intel/analytics/zoo/analytics-zoo-bigdl_0.13.0-spark_2.3.1/0.12.0-SNAPSHOT/">download</a></td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>Spark 2.4.6 </td>
|
||||||
|
<td><a href="https://oss.sonatype.org/content/repositories/snapshots/com/intel/analytics/zoo/analytics-zoo-bigdl_0.13.0-spark_2.4.6/0.12.0-SNAPSHOT/">download</a></td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>Spark 3.0.0 </td>
|
||||||
|
<td><a href="https://oss.sonatype.org/content/repositories/snapshots/com/intel/analytics/zoo/analytics-zoo-bigdl_0.13.0-spark_3.0.0/0.12.0-SNAPSHOT/">download</a></td>
|
||||||
|
</tr>
|
||||||
|
</table>
|
||||||
|
<br>
|
||||||
|
|
||||||
|
- **Release 0.11.0**
|
||||||
|
<table border="1"
|
||||||
|
cellpadding="10"
|
||||||
|
>
|
||||||
|
<tr>
|
||||||
|
<td></td>
|
||||||
|
<td>BigDL 0.13.0</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>Spark 2.1.1 </td>
|
||||||
|
<td><a href="https://repo1.maven.org/maven2/com/intel/analytics/zoo/analytics-zoo-bigdl_0.13.0-spark_2.1.1/0.11.0/analytics-zoo-bigdl_0.13.0-spark_2.1.1-0.11.0-dist-all.zip">download</a></td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>Spark 2.2.1 </td>
|
||||||
|
<td><a href="https://repo1.maven.org/maven2/com/intel/analytics/zoo/analytics-zoo-bigdl_0.13.0-spark_2.2.1/0.11.0/analytics-zoo-bigdl_0.13.0-spark_2.2.1-0.11.0-dist-all.zip">download</a></td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>Spark 2.3.1 </td>
|
||||||
|
<td><a href="https://repo1.maven.org/maven2/com/intel/analytics/zoo/analytics-zoo-bigdl_0.13.0-spark_2.3.1/0.11.0/analytics-zoo-bigdl_0.13.0-spark_2.3.1-0.11.0-dist-all.zip">download</a></td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>Spark 2.4.6 </td>
|
||||||
|
<td><a href="https://repo1.maven.org/maven2/com/intel/analytics/zoo/analytics-zoo-bigdl_0.13.0-spark_2.4.6/0.11.0/analytics-zoo-bigdl_0.13.0-spark_2.4.6-0.11.0-dist-all.zip">download</a></td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>Spark 3.0.0 </td>
|
||||||
|
<td><a href="https://repo1.maven.org/maven2/com/intel/analytics/zoo/analytics-zoo-bigdl_0.13.0-spark_3.0.0/0.11.0/analytics-zoo-bigdl_0.13.0-spark_3.0.0-0.11.0-dist-all.zip">download</a></td>
|
||||||
|
</tr>
|
||||||
|
</table>
|
||||||
|
<br>
|
||||||
|
|
||||||
|
- **Release 0.10.0**
|
||||||
|
<table border="1"
|
||||||
|
cellpadding="10"
|
||||||
|
>
|
||||||
|
<tr>
|
||||||
|
<td></td>
|
||||||
|
<td>BigDL 0.12.2</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>Spark 2.1.1 </td>
|
||||||
|
<td><a href="https://repo1.maven.org/maven2/com/intel/analytics/zoo/analytics-zoo-bigdl_0.12.2-spark_2.1.1/0.10.0/analytics-zoo-bigdl_0.12.2-spark_2.1.1-0.10.0-dist-all.zip">download</a></td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>Spark 2.2.1 </td>
|
||||||
|
<td><a href="https://repo1.maven.org/maven2/com/intel/analytics/zoo/analytics-zoo-bigdl_0.12.2-spark_2.2.1/0.10.0/analytics-zoo-bigdl_0.12.2-spark_2.2.1-0.10.0-dist-all.zip">download</a></td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>Spark 2.3.1 </td>
|
||||||
|
<td><a href="https://repo1.maven.org/maven2/com/intel/analytics/zoo/analytics-zoo-bigdl_0.12.2-spark_2.3.1/0.10.0/analytics-zoo-bigdl_0.12.2-spark_2.3.1-0.10.0-dist-all.zip">download</a></td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>Spark 2.4.3 </td>
|
||||||
|
<td><a href="https://repo1.maven.org/maven2/com/intel/analytics/zoo/analytics-zoo-bigdl_0.12.2-spark_2.4.3/0.10.0/analytics-zoo-bigdl_0.12.2-spark_2.4.3-0.10.0-dist-all.zip">download</a></td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>Spark 3.0.0 </td>
|
||||||
|
<td><a href="https://repo1.maven.org/maven2/com/intel/analytics/zoo/analytics-zoo-bigdl_0.12.2-spark_3.0.0/0.10.0/analytics-zoo-bigdl_0.12.2-spark_3.0.0-0.10.0-dist-all.zip">download</a></td>
|
||||||
|
</tr>
|
||||||
|
</table>
|
||||||
|
<br>
|
||||||
|
|
||||||
|
- **Release 0.9.0**
|
||||||
|
<table border="1"
|
||||||
|
cellpadding="10">
|
||||||
|
<tr>
|
||||||
|
<td></td>
|
||||||
|
<td>BigDL 0.12.1</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>Spark 2.1.1 </td>
|
||||||
|
<td><a href="https://repo1.maven.org/maven2/com/intel/analytics/zoo/analytics-zoo-bigdl_0.12.1-spark_2.1.1/0.9.0/analytics-zoo-bigdl_0.12.1-spark_2.1.1-0.9.0-dist-all.zip">download</a></td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>Spark 2.2.1 </td>
|
||||||
|
<td><a href="https://repo1.maven.org/maven2/com/intel/analytics/zoo/analytics-zoo-bigdl_0.12.1-spark_2.2.1/0.9.0/analytics-zoo-bigdl_0.12.1-spark_2.2.1-0.9.0-dist-all.zip">download</a></td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>Spark 2.3.1 </td>
|
||||||
|
<td><a href="https://repo1.maven.org/maven2/com/intel/analytics/zoo/analytics-zoo-bigdl_0.12.1-spark_2.3.1/0.9.0/analytics-zoo-bigdl_0.12.1-spark_2.3.1-0.9.0-dist-all.zip">download</a></td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>Spark 2.4.3 </td>
|
||||||
|
<td><a href="https://repo1.maven.org/maven2/com/intel/analytics/zoo/analytics-zoo-bigdl_0.12.1-spark_2.4.3/0.9.0/analytics-zoo-bigdl_0.12.1-spark_2.4.3-0.9.0-dist-all.zip">download</a></td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>Spark 3.0.0 </td>
|
||||||
|
<td><a href="https://repo1.maven.org/maven2/com/intel/analytics/zoo/analytics-zoo-bigdl_0.12.1-spark_3.0.0/0.9.0/analytics-zoo-bigdl_0.12.1-spark_3.0.0-0.9.0-dist-all.zip">download</a></td>
|
||||||
|
</tr>
|
||||||
|
</table>
|
||||||
|
<br>
|
||||||
|
|
||||||
|
- **Release 0.8.1**
|
||||||
|
<table border="1"
|
||||||
|
cellpadding="10">
|
||||||
|
<tr>
|
||||||
|
<td></td>
|
||||||
|
<td>BigDL 0.10.0</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>Spark 2.1.1 </td>
|
||||||
|
<td><a href="https://repo1.maven.org/maven2/com/intel/analytics/zoo/analytics-zoo-bigdl_0.10.0-spark_2.1.1/0.8.1/analytics-zoo-bigdl_0.10.0-spark_2.1.1-0.8.1-dist-all.zip">download</a></td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>Spark 2.2.1 </td>
|
||||||
|
<td><a href="https://repo1.maven.org/maven2/com/intel/analytics/zoo/analytics-zoo-bigdl_0.10.0-spark_2.2.1/0.8.1/analytics-zoo-bigdl_0.10.0-spark_2.2.1-0.8.1-dist-all.zip">download</a></td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>Spark 2.3.1 </td>
|
||||||
|
<td><a href="https://repo1.maven.org/maven2/com/intel/analytics/zoo/analytics-zoo-bigdl_0.10.0-spark_2.3.1/0.8.1/analytics-zoo-bigdl_0.10.0-spark_2.3.1-0.8.1-dist-all.zip">download</a></td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>Spark 2.4.3 </td>
|
||||||
|
<td><a href="https://repo1.maven.org/maven2/com/intel/analytics/zoo/analytics-zoo-bigdl_0.10.0-spark_2.4.3/0.8.1/analytics-zoo-bigdl_0.10.0-spark_2.4.3-0.8.1-dist-all.zip">download</a></td>
|
||||||
|
</tr>
|
||||||
|
</table>
|
||||||
|
<br>
|
||||||
|
|
||||||
|
- **Release 0.7.0**
|
||||||
|
<table border="1"
|
||||||
|
cellpadding="10">
|
||||||
|
<tr>
|
||||||
|
<td></td>
|
||||||
|
<td>BigDL 0.10.0</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>Spark 2.1.1 </td>
|
||||||
|
<td><a href="https://repo1.maven.org/maven2/com/intel/analytics/zoo/analytics-zoo-bigdl_0.10.0-spark_2.1.1/0.7.0/analytics-zoo-bigdl_0.10.0-spark_2.1.1-0.7.0-dist-all.zip">download</a></td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>Spark 2.2.1 </td>
|
||||||
|
<td><a href="https://repo1.maven.org/maven2/com/intel/analytics/zoo/analytics-zoo-bigdl_0.10.0-spark_2.2.1/0.7.0/analytics-zoo-bigdl_0.10.0-spark_2.2.1-0.7.0-dist-all.zip">download</a></td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>Spark 2.3.1 </td>
|
||||||
|
<td><a href="https://repo1.maven.org/maven2/com/intel/analytics/zoo/analytics-zoo-bigdl_0.10.0-spark_2.3.1/0.7.0/analytics-zoo-bigdl_0.10.0-spark_2.3.1-0.7.0-dist-all.zip">download</a></td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>Spark 2.4.3 </td>
|
||||||
|
<td><a href="https://repo1.maven.org/maven2/com/intel/analytics/zoo/analytics-zoo-bigdl_0.10.0-spark_2.4.3/0.7.0/analytics-zoo-bigdl_0.10.0-spark_2.4.3-0.7.0-dist-all.zip">download</a></td>
|
||||||
|
</tr>
|
||||||
|
</table>
|
||||||
|
<br>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
86
docs/readthedocs/source/index.rst
Normal file
|
|
@ -0,0 +1,86 @@
|
||||||
|
Analytics Zoo Documentation
|
||||||
|
===========================
|
||||||
|
|
||||||
|
------
|
||||||
|
|
||||||
|
`Analytics Zoo <https://github.com/intel-analytics/analytics-zoo>`_ is an open source Big Data AI platform, and includes the following features for scaling end-to-end AI to distributed Big Data:
|
||||||
|
|
||||||
|
* `Orca <doc/Orca/Overview/orca.html>`_: seamlessly scale out TensorFlow and PyTorch for Big Data (using Spark & Ray)
|
||||||
|
* `RayOnSpark <doc/Ray/Overview/ray.html>`_: run Ray programs directly on Big Data clusters
|
||||||
|
* **BigDL Extensions**: high-level `Spark ML pipeline <doc/UseCase/nnframes.html>`_ and `Keras-like <doc/UseCase/keras-api.html>`_ APIs for BigDL
|
||||||
|
* `Chronos <doc/Chronos/Overview/chronos.html>`_: scalable time series analysis using AutoML
|
||||||
|
* `PPML <doc/PPML/Overview/ppml.html>`_: privacy preserving big data analysis and machine learning (*experimental*)
|
||||||
|
|
||||||
|
|
||||||
|
-------
|
||||||
|
|
||||||
|
|
||||||
|
.. meta::
|
||||||
|
:google-site-verification: hG9ocvSRSRTY5z8g6RLn97_tdJvYRx_tVGhNdtZZavM
|
||||||
|
|
||||||
|
.. toctree::
|
||||||
|
:maxdepth: 1
|
||||||
|
:caption: Quick Start
|
||||||
|
|
||||||
|
doc/Orca/QuickStart/orca-tf-quickstart.md
|
||||||
|
doc/Orca/QuickStart/orca-keras-quickstart.md
|
||||||
|
doc/Orca/QuickStart/orca-tf2keras-quickstart.md
|
||||||
|
doc/Orca/QuickStart/orca-pytorch-quickstart.md
|
||||||
|
doc/Ray/QuickStart/ray-quickstart.md
|
||||||
|
|
||||||
|
.. toctree::
|
||||||
|
:maxdepth: 1
|
||||||
|
:caption: User Guide
|
||||||
|
|
||||||
|
doc/UserGuide/python.md
|
||||||
|
doc/UserGuide/scala.md
|
||||||
|
doc/UserGuide/colab.md
|
||||||
|
doc/UserGuide/docker.md
|
||||||
|
doc/UserGuide/hadoop.md
|
||||||
|
doc/UserGuide/k8s.md
|
||||||
|
doc/UserGuide/databricks.md
|
||||||
|
doc/Ray/Overview/ray.md
|
||||||
|
doc/Chronos/Overview/chronos.md
|
||||||
|
doc/PPML/Overview/ppml.md
|
||||||
|
doc/UserGuide/develop.md
|
||||||
|
|
||||||
|
.. toctree::
|
||||||
|
:maxdepth: 1
|
||||||
|
:caption: Common Use Case
|
||||||
|
|
||||||
|
doc/Orca/QuickStart/orca-pytorch-distributed-quickstart.md
|
||||||
|
doc/UseCase/spark-dataframe.md
|
||||||
|
doc/UseCase/xshards-pandas.md
|
||||||
|
doc/Chronos/QuickStart/chronos-autotsest-quickstart.md
|
||||||
|
doc/Chronos/QuickStart/chronos-tsdataset-forecaster-quickstart.md
|
||||||
|
doc/Chronos/QuickStart/chronos-anomaly-detector.md
|
||||||
|
doc/UseCase/keras-api.md
|
||||||
|
doc/UseCase/nnframes.md
|
||||||
|
doc/Orca/QuickStart/orca-autoestimator-pytorch-quickstart.md
|
||||||
|
doc/Orca/QuickStart/orca-autoxgboost-quickstart.md
|
||||||
|
|
||||||
|
.. toctree::
|
||||||
|
:maxdepth: 1
|
||||||
|
:caption: Orca Overview
|
||||||
|
|
||||||
|
doc/Orca/Overview/orca.md
|
||||||
|
doc/Orca/Overview/orca-context.md
|
||||||
|
doc/Orca/Overview/data-parallel-processing.md
|
||||||
|
doc/Orca/Overview/distributed-training-inference.md
|
||||||
|
doc/Orca/Overview/distributed-tuning.md
|
||||||
|
|
||||||
|
.. toctree::
|
||||||
|
:maxdepth: 1
|
||||||
|
:caption: Python API
|
||||||
|
|
||||||
|
doc/PythonAPI/Orca/orca.rst
|
||||||
|
doc/PythonAPI/AutoML/automl.rst
|
||||||
|
doc/PythonAPI/Friesian/feature.rst
|
||||||
|
doc/PythonAPI/Chronos/index.rst
|
||||||
|
|
||||||
|
.. toctree::
|
||||||
|
:maxdepth: 1
|
||||||
|
:caption: Real-World Application
|
||||||
|
|
||||||
|
doc/Application/presentations.md
|
||||||
|
doc/Application/powered-by.md
|
||||||