diff --git a/docs/readthedocs/source/_templates/sidebar_quicklinks.html b/docs/readthedocs/source/_templates/sidebar_quicklinks.html index ef1ea3eb..673011fa 100644 --- a/docs/readthedocs/source/_templates/sidebar_quicklinks.html +++ b/docs/readthedocs/source/_templates/sidebar_quicklinks.html @@ -49,6 +49,9 @@
+
+
+By following these steps, you will be able to serve your models using the web UI with IPEX-LLM as the backend. You can open your browser and chat with a model now.
+
+### Launch RESTful API server
+
+To start an OpenAI API server that provides compatible APIs using IPEX-LLM backend, you can launch the `openai_api_server` and follow this [doc](https://github.com/lm-sys/FastChat/blob/main/docs/openai_api.md) to use it.
+
+When you have started the controller and the worker, you can start RESTful API server as follows:
+
+```bash
+python3 -m fastchat.serve.openai_api_server --host localhost --port 8000
+```
+
+You can use `curl` for observing the output of the api
+
+You can format the output using `jq`
+
+#### List Models
+
+```bash
+curl http://localhost:8000/v1/models | jq
+```
+
+Example output
+
+```json
+
+{
+  "object": "list",
+  "data": [
+    {
+      "id": "Llama-2-7b-chat-hf",
+      "object": "model",
+      "created": 1712919071,
+      "owned_by": "fastchat",
+      "root": "Llama-2-7b-chat-hf",
+      "parent": null,
+      "permission": [
+        {
+          "id": "modelperm-XpFyEE7Sewx4XYbEcdbCVz",
+          "object": "model_permission",
+          "created": 1712919071,
+          "allow_create_engine": false,
+          "allow_sampling": true,
+          "allow_logprobs": true,
+          "allow_search_indices": true,
+          "allow_view": true,
+          "allow_fine_tuning": false,
+          "organization": "*",
+          "group": null,
+          "is_blocking": false
+        }
+      ]
+    }
+  ]
+}
+```
+
+#### Chat Completions
+
+```bash
+curl http://localhost:8000/v1/chat/completions \
+  -H "Content-Type: application/json" \
+  -d '{
+    "model": "Llama-2-7b-chat-hf",
+    "messages": [{"role": "user", "content": "Hello! What is your name?"}]
+  }' | jq
+```
+
+Example output
+
+```json
+{
+  "id": "chatcmpl-jJ9vKSGkcDMTxKfLxK7q2x",
+  "object": "chat.completion",
+  "created": 1712919092,
+  "model": "Llama-2-7b-chat-hf",
+  "choices": [
+    {
+      "index": 0,
+      "message": {
+        "role": "assistant",
+        "content": " Hello! My name is LLaMA, I'm a large language model trained by a team of researcher at Meta AI. Unterscheidung. 😊"
+      },
+      "finish_reason": "stop"
+    }
+  ],
+  "usage": {
+    "prompt_tokens": 15,
+    "total_tokens": 53,
+    "completion_tokens": 38
+  }
+}
+
+```
+
+#### Text Completions
+
+```bash
+curl http://localhost:8000/v1/completions \
+  -H "Content-Type: application/json" \
+  -d '{
+    "model": "Llama-2-7b-chat-hf",
+    "prompt": "Once upon a time",
+    "max_tokens": 41,
+    "temperature": 0.5
+  }' | jq
+```
+
+Example Output:
+
+```json
+{
+  "id": "cmpl-PsAkpTWMmBLzWCTtM4r97Y",
+  "object": "text_completion",
+  "created": 1712919307,
+  "model": "Llama-2-7b-chat-hf",
+  "choices": [
+    {
+      "index": 0,
+      "text": ", in a far-off land, there was a magical kingdom called \"Happily Ever Laughter.\" It was a place where laughter was the key to happiness, and everyone who ",
+      "logprobs": null,
+      "finish_reason": "length"
+    }
+  ],
+  "usage": {
+    "prompt_tokens": 5,
+    "total_tokens": 45,
+    "completion_tokens": 40
+  }
+}
+
+```
diff --git a/docs/readthedocs/source/doc/LLM/Quickstart/index.rst b/docs/readthedocs/source/doc/LLM/Quickstart/index.rst
index ea9df495..adaa6fb8 100644
--- a/docs/readthedocs/source/doc/LLM/Quickstart/index.rst
+++ b/docs/readthedocs/source/doc/LLM/Quickstart/index.rst
@@ -19,6 +19,7 @@ This section includes efficient guide to show you how to:
 * `Run Coding Copilot (Continue) in VSCode with Intel GPU <./continue_quickstart.html>`_
 * `Run llama.cpp with IPEX-LLM on Intel GPU <./llama_cpp_quickstart.html>`_
 * `Run Ollama with IPEX-LLM on Intel GPU <./ollama_quickstart.html>`_
+* `Run IPEX-LLM Serving with FastChat <./fastchat_quickstart.html>`_
 
 .. |bigdl_llm_migration_guide| replace:: ``bigdl-llm`` Migration Guide
 .. _bigdl_llm_migration_guide: bigdl_llm_migration.html