From b3b341173ff677829fb20fc3ecc3e96e4fcbaac0 Mon Sep 17 00:00:00 2001
From: Yongteng Lei <yongtengrey@outlook.com>
Date: Wed, 26 Feb 2025 15:52:26 +0800
Subject: [PATCH] DOCS: add OpenAI-compatible http and python api reference
 (#5374)

### What problem does this PR solve?

Add OpenAI-compatible http and python api reference

### Type of change

- [x] Documentation Update

---------

Co-authored-by: Kevin Hu <kevinhu.sh@gmail.com>
Co-authored-by: writinwaters <93570324+writinwaters@users.noreply.github.com>
---
 api/apps/sdk/session.py                 |  37 +++---
 docs/references/http_api_reference.md   | 148 ++++++++++++++++++++++++
 docs/references/python_api_reference.md |  53 +++++++++
 3 files changed, 223 insertions(+), 15 deletions(-)

diff --git a/api/apps/sdk/session.py b/api/apps/sdk/session.py
index 486dd5af..1ee2673e 100644
--- a/api/apps/sdk/session.py
+++ b/api/apps/sdk/session.py
@@ -217,7 +217,7 @@ def chat_completion_openai_like(tenant_id, chat_id):
         model=model,
         messages=[
             {"role": "system", "content": "You are a helpful assistant."},
-            {"role": "user", "content": "Who you are?"},
+            {"role": "user", "content": "Who are you?"},
             {"role": "assistant", "content": "I am an AI assistant named..."},
             {"role": "user", "content": "Can you tell me how to install neovim"},
         ],
@@ -236,14 +236,20 @@ def chat_completion_openai_like(tenant_id, chat_id):
     messages = req.get("messages", [])
     # To prevent empty [] input
     if len(messages) < 1:
-        return get_error_data_result("You have to provide messages")
+        return get_error_data_result("You have to provide messages.")
+    if messages[-1]["role"] != "user":
+        return get_error_data_result("The last content of this conversation is not from user.")
+
+    prompt = messages[-1]["content"]
+    # Treat context tokens as reasoning tokens
+    context_token_used = sum(len(message["content"]) for message in messages)
 
     dia = DialogService.query(tenant_id=tenant_id, id=chat_id, status=StatusEnum.VALID.value)
     if not dia:
         return get_error_data_result(f"You don't own the chat {chat_id}")
     dia = dia[0]
 
-    # Filter system and assistant messages
+    # Filter system and non-sense assistant messages
     msg = None
     msg = [m for m in messages if m["role"] != "system" and (m["role"] != "assistant" or msg)]
 
@@ -251,7 +257,7 @@ def chat_completion_openai_like(tenant_id, chat_id):
         # The value for the usage field on all chunks except for the last one will be null.
         # The usage field on the last chunk contains token usage statistics for the entire request.
         # The choices field on the last chunk will always be an empty array [].
-        def streamed_respose_generator(chat_id, dia, msg):
+        def streamed_response_generator(chat_id, dia, msg):
             token_used = 0
             response = {
                 "id": f"chatcmpl-{chat_id}",
@@ -286,17 +292,17 @@ def chat_completion_openai_like(tenant_id, chat_id):
                 response["choices"][0]["delta"]["content"] = "**ERROR**: " + str(e)
                 yield f"data:{json.dumps(response, ensure_ascii=False)}\n\n".encode("utf-8")
 
-            # The last chunck
+            # The last chunk
             response["choices"][0]["delta"]["content"] = None
             response["choices"][0]["finish_reason"] = "stop"
             response["usage"] = {
-                "prompt_tokens": len(msg),
+                "prompt_tokens": len(prompt),
                 "completion_tokens": token_used,
-                "total_tokens": len(msg) + token_used
+                "total_tokens": len(prompt) + token_used
             }
             yield f"data:{json.dumps(response, ensure_ascii=False)}\n\n".encode("utf-8")
 
-        resp = Response(streamed_respose_generator(chat_id, dia, msg), mimetype="text/event-stream")
+        resp = Response(streamed_response_generator(chat_id, dia, msg), mimetype="text/event-stream")
         resp.headers.add_header("Cache-control", "no-cache")
         resp.headers.add_header("Connection", "keep-alive")
         resp.headers.add_header("X-Accel-Buffering", "no")
@@ -308,6 +314,7 @@ def chat_completion_openai_like(tenant_id, chat_id):
             # focus answer content only
             answer = ans
             break
+        content = answer["answer"]
 
         response  = {
             "id": f"chatcmpl-{chat_id}",
@@ -315,20 +322,20 @@ def chat_completion_openai_like(tenant_id, chat_id):
             "created": int(time.time()),
             "model": req.get("model", ""),
             "usage": {
-                "prompt_tokens": len(messages),
-                "completion_tokens": len(answer),
-                "total_tokens": len(messages) + len(answer),
+                "prompt_tokens": len(prompt),
+                "completion_tokens": len(content),
+                "total_tokens": len(prompt) + len(content),
                 "completion_tokens_details": {
-                    "reasoning_tokens": len(answer),
-                    "accepted_prediction_tokens": len(answer),
-                    "rejected_prediction_tokens": len(answer)
+                    "reasoning_tokens": context_token_used,
+                    "accepted_prediction_tokens": len(content),
+                    "rejected_prediction_tokens": 0 # 0 for simplicity
                 }
             },
             "choices": [
                 {
                     "message": {
                         "role": "assistant",
-                        "content": answer["answer"]
+                        "content": content
                     },
                     "logprobs": None,
                     "finish_reason": "stop",
diff --git a/docs/references/http_api_reference.md b/docs/references/http_api_reference.md
index 480a980e..920ff08c 100644
--- a/docs/references/http_api_reference.md
+++ b/docs/references/http_api_reference.md
@@ -9,6 +9,154 @@ A complete reference for RAGFlow's RESTful API. Before proceeding, please ensure
 
 ---
 
+## OpenAI-Compatible API
+
+---
+
+### Create chat completion
+
+**POST** `/api/v1/chats_openai/{chat_id}/chat/completions`
+
+Creates a model response for a given chat conversation.
+
+This API follows the same request and response format as OpenAI's API. It allows you to interact with the model in a manner similar to how you would with [OpenAI's API](https://platform.openai.com/docs/api-reference/chat/create).
+
+#### Request
+
+- Method: POST
+- URL: `/api/v1/chats_openai/{chat_id}/chat/completions`
+- Headers:
+  - `'content-Type: application/json'`
+  - `'Authorization: Bearer <YOUR_API_KEY>'`
+- Body:
+  - `"model"`: `string`
+  - `"messages"`: `object list`
+  - `"stream"`: `boolean`
+
+##### Request example
+
+```bash
+curl --request POST \
+     --url http://{address}/api/v1/chats_openai/{chat_id}/chat/completions \
+     --header 'Content-Type: application/json' \
+     --header 'Authorization: Bearer <YOUR_API_KEY>' \
+     --data '{
+        "model": "model",
+        "messages": [{"role": "user", "content": "Say this is a test!"}],
+        "stream": true
+      }'
+```
+
+##### Request Parameters
+
+- `model` (*Body parameter*) `string`, *Required*
+  The model used to generate the response. The server will parse this automatically, so you can set it to any value for now.
+
+- `messages` (*Body parameter*) `list[object]`, *Required*
+  A list of historical chat messages used to generate the response. This must contain at least one message with the `user` role.
+
+- `stream` (*Body parameter*) `boolean`
+  Whether to receive the response as a stream. Set this to `false` explicitly if you prefer to receive the entire response in one go instead of as a stream.
+
+#### Response
+
+Stream:
+
+```json
+{
+    "id": "chatcmpl-3a9c3572f29311efa69751e139332ced",
+    "choices": [
+        {
+            "delta": {
+                "content": "This is a test. If you have any specific questions or need information, feel",
+                "role": "assistant",
+                "function_call": null,
+                "tool_calls": null
+            },
+            "finish_reason": null,
+            "index": 0,
+            "logprobs": null
+        }
+    ],
+    "created": 1740543996,
+    "model": "model",
+    "object": "chat.completion.chunk",
+    "system_fingerprint": "",
+    "usage": null
+}
+// omit duplicated information
+{"choices":[{"delta":{"content":" free to ask, and I will do my best to provide an answer based on","role":"assistant"}}]}
+{"choices":[{"delta":{"content":" the knowledge I have. If your question is unrelated to the provided knowledge base,","role":"assistant"}}]}
+{"choices":[{"delta":{"content":" I will let you know.","role":"assistant"}}]}
+// the last chunk
+{
+    "id": "chatcmpl-3a9c3572f29311efa69751e139332ced",
+    "choices": [
+        {
+            "delta": {
+                "content": null,
+                "role": "assistant",
+                "function_call": null,
+                "tool_calls": null
+            },
+            "finish_reason": "stop",
+            "index": 0,
+            "logprobs": null
+        }
+    ],
+    "created": 1740543996,
+    "model": "model",
+    "object": "chat.completion.chunk",
+    "system_fingerprint": "",
+    "usage": {
+        "prompt_tokens": 18,
+        "completion_tokens": 225,
+        "total_tokens": 243
+    }
+}
+```
+
+Non-stream:
+
+```json
+{
+    "choices":[
+        {
+            "finish_reason":"stop",
+            "index":0,
+            "logprobs":null,
+            "message":{
+                "content":"This is a test. If you have any specific questions or need information, feel free to ask, and I will do my best to provide an answer based on the knowledge I have. If your question is unrelated to the provided knowledge base, I will let you know.",
+                "role":"assistant"
+            }
+        }
+    ],
+    "created":1740543499,
+    "id":"chatcmpl-3a9c3572f29311efa69751e139332ced",
+    "model":"model",
+    "object":"chat.completion",
+    "usage":{
+        "completion_tokens":246,
+        "completion_tokens_details":{
+            "accepted_prediction_tokens":246,
+            "reasoning_tokens":18,
+            "rejected_prediction_tokens":0
+        },
+        "prompt_tokens":18,
+        "total_tokens":264
+    }
+}
+```
+
+Failure:
+
+```json
+{
+  "code": 102,
+  "message": "The last content of this conversation is not from user."
+}
+```
+
 ## DATASET MANAGEMENT
 
 ---
diff --git a/docs/references/python_api_reference.md b/docs/references/python_api_reference.md
index b95c644e..6d4f7944 100644
--- a/docs/references/python_api_reference.md
+++ b/docs/references/python_api_reference.md
@@ -13,10 +13,63 @@ Run the following command to download the Python SDK:
 ```bash
 pip install ragflow-sdk
 ```
+
 :::
 
 ---
 
+## OpenAI-Compatible API
+
+---
+
+### Create chat completion
+
+Creates a model response for the given historical chat conversation via OpenAI's API.
+
+#### Parameters
+
+##### model: `str`, *Required*
+
+The model used to generate the response. The server will parse this automatically, so you can set it to any value for now.
+
+##### messages: `list[object]`, *Required*
+
+A list of historical chat messages used to generate the response. This must contain at least one message with the `user` role.
+
+##### stream: `boolean`
+
+Whether to receive the response as a stream. Set this to `false` explicitly if you prefer to receive the entire response in one go instead of as a stream.
+
+#### Returns
+
+- Success: Respose [message](https://platform.openai.com/docs/api-reference/chat/create) like OpenAI
+- Failure: `Exception`
+
+#### Examples
+
+```python
+from openai import OpenAI
+
+model = "model"
+client = OpenAI(api_key="ragflow-api-key", base_url=f"http://ragflow_address/api/v1/chats_openai/<chat_id>")
+
+completion = client.chat.completions.create(
+    model=model,
+    messages=[
+        {"role": "system", "content": "You are a helpful assistant."},
+        {"role": "user", "content": "Who are you?"},
+    ],
+    stream=True
+)
+
+stream = True
+if stream:
+    for chunk in completion:
+        print(chunk)
+else:
+    print(completion.choices[0].message.content)
+```
+
 ## DATASET MANAGEMENT
 
 ---