From a7c12020b445264d5ee959081cf4c620b3b6c61b Mon Sep 17 00:00:00 2001 From: ZehuaCao <47251317+Romanticoseu@users.noreply.github.com> Date: Tue, 16 Apr 2024 14:02:38 +0800 Subject: [PATCH] Add fastchat quickstart (#10688) * add fastchat quickstart * update * update * update --- .../source/_templates/sidebar_quicklinks.html | 3 + docs/readthedocs/source/_toc.yml | 1 + .../doc/LLM/Quickstart/fastchat_quickstart.md | 239 ++++++++++++++++++ .../source/doc/LLM/Quickstart/index.rst | 1 + 4 files changed, 244 insertions(+) create mode 100644 docs/readthedocs/source/doc/LLM/Quickstart/fastchat_quickstart.md diff --git a/docs/readthedocs/source/_templates/sidebar_quicklinks.html b/docs/readthedocs/source/_templates/sidebar_quicklinks.html index ef1ea3eb..673011fa 100644 --- a/docs/readthedocs/source/_templates/sidebar_quicklinks.html +++ b/docs/readthedocs/source/_templates/sidebar_quicklinks.html @@ -49,6 +49,9 @@
+
+
+By following these steps, you will be able to serve your models using the web UI with IPEX-LLM as the backend. You can open your browser and chat with a model now.
+
+### Launch RESTful API server
+
+To start an OpenAI API server that provides compatible APIs using IPEX-LLM backend, you can launch the `openai_api_server` and follow this [doc](https://github.com/lm-sys/FastChat/blob/main/docs/openai_api.md) to use it.
+
+When you have started the controller and the worker, you can start RESTful API server as follows:
+
+```bash
+python3 -m fastchat.serve.openai_api_server --host localhost --port 8000
+```
+
+You can use `curl` for observing the output of the api
+
+You can format the output using `jq`
+
+#### List Models
+
+```bash
+curl http://localhost:8000/v1/models | jq
+```
+
+Example output
+
+```json
+
+{
+ "object": "list",
+ "data": [
+ {
+ "id": "Llama-2-7b-chat-hf",
+ "object": "model",
+ "created": 1712919071,
+ "owned_by": "fastchat",
+ "root": "Llama-2-7b-chat-hf",
+ "parent": null,
+ "permission": [
+ {
+ "id": "modelperm-XpFyEE7Sewx4XYbEcdbCVz",
+ "object": "model_permission",
+ "created": 1712919071,
+ "allow_create_engine": false,
+ "allow_sampling": true,
+ "allow_logprobs": true,
+ "allow_search_indices": true,
+ "allow_view": true,
+ "allow_fine_tuning": false,
+ "organization": "*",
+ "group": null,
+ "is_blocking": false
+ }
+ ]
+ }
+ ]
+}
+```
+
+#### Chat Completions
+
+```bash
+curl http://localhost:8000/v1/chat/completions \
+ -H "Content-Type: application/json" \
+ -d '{
+ "model": "Llama-2-7b-chat-hf",
+ "messages": [{"role": "user", "content": "Hello! What is your name?"}]
+ }' | jq
+```
+
+Example output
+
+```json
+{
+ "id": "chatcmpl-jJ9vKSGkcDMTxKfLxK7q2x",
+ "object": "chat.completion",
+ "created": 1712919092,
+ "model": "Llama-2-7b-chat-hf",
+ "choices": [
+ {
+ "index": 0,
+ "message": {
+ "role": "assistant",
+ "content": " Hello! My name is LLaMA, I'm a large language model trained by a team of researcher at Meta AI. Unterscheidung. 😊"
+ },
+ "finish_reason": "stop"
+ }
+ ],
+ "usage": {
+ "prompt_tokens": 15,
+ "total_tokens": 53,
+ "completion_tokens": 38
+ }
+}
+
+```
+
+#### Text Completions
+
+```bash
+curl http://localhost:8000/v1/completions \
+ -H "Content-Type: application/json" \
+ -d '{
+ "model": "Llama-2-7b-chat-hf",
+ "prompt": "Once upon a time",
+ "max_tokens": 41,
+ "temperature": 0.5
+ }' | jq
+```
+
+Example Output:
+
+```json
+{
+ "id": "cmpl-PsAkpTWMmBLzWCTtM4r97Y",
+ "object": "text_completion",
+ "created": 1712919307,
+ "model": "Llama-2-7b-chat-hf",
+ "choices": [
+ {
+ "index": 0,
+ "text": ", in a far-off land, there was a magical kingdom called \"Happily Ever Laughter.\" It was a place where laughter was the key to happiness, and everyone who ",
+ "logprobs": null,
+ "finish_reason": "length"
+ }
+ ],
+ "usage": {
+ "prompt_tokens": 5,
+ "total_tokens": 45,
+ "completion_tokens": 40
+ }
+}
+
+```
diff --git a/docs/readthedocs/source/doc/LLM/Quickstart/index.rst b/docs/readthedocs/source/doc/LLM/Quickstart/index.rst
index ea9df495..adaa6fb8 100644
--- a/docs/readthedocs/source/doc/LLM/Quickstart/index.rst
+++ b/docs/readthedocs/source/doc/LLM/Quickstart/index.rst
@@ -19,6 +19,7 @@ This section includes efficient guide to show you how to:
* `Run Coding Copilot (Continue) in VSCode with Intel GPU <./continue_quickstart.html>`_
* `Run llama.cpp with IPEX-LLM on Intel GPU <./llama_cpp_quickstart.html>`_
* `Run Ollama with IPEX-LLM on Intel GPU <./ollama_quickstart.html>`_
+* `Run IPEX-LLM Serving with FastChat <./fastchat_quickstart.html>`_
.. |bigdl_llm_migration_guide| replace:: ``bigdl-llm`` Migration Guide
.. _bigdl_llm_migration_guide: bigdl_llm_migration.html