From b3b341173ff677829fb20fc3ecc3e96e4fcbaac0 Mon Sep 17 00:00:00 2001 From: Yongteng Lei Date: Wed, 26 Feb 2025 15:52:26 +0800 Subject: [PATCH] DOCS: add OpenAI-compatible http and python api reference (#5374) ### What problem does this PR solve? Add OpenAI-compatible http and python api reference ### Type of change - [x] Documentation Update --------- Co-authored-by: Kevin Hu Co-authored-by: writinwaters <93570324+writinwaters@users.noreply.github.com> --- api/apps/sdk/session.py | 37 +++--- docs/references/http_api_reference.md | 148 ++++++++++++++++++++++++ docs/references/python_api_reference.md | 53 +++++++++ 3 files changed, 223 insertions(+), 15 deletions(-) diff --git a/api/apps/sdk/session.py b/api/apps/sdk/session.py index 486dd5af..1ee2673e 100644 --- a/api/apps/sdk/session.py +++ b/api/apps/sdk/session.py @@ -217,7 +217,7 @@ def chat_completion_openai_like(tenant_id, chat_id): model=model, messages=[ {"role": "system", "content": "You are a helpful assistant."}, - {"role": "user", "content": "Who you are?"}, + {"role": "user", "content": "Who are you?"}, {"role": "assistant", "content": "I am an AI assistant named..."}, {"role": "user", "content": "Can you tell me how to install neovim"}, ], @@ -236,14 +236,20 @@ def chat_completion_openai_like(tenant_id, chat_id): messages = req.get("messages", []) # To prevent empty [] input if len(messages) < 1: - return get_error_data_result("You have to provide messages") + return get_error_data_result("You have to provide messages.") + if messages[-1]["role"] != "user": + return get_error_data_result("The last content of this conversation is not from user.") + + prompt = messages[-1]["content"] + # Treat context tokens as reasoning tokens + context_token_used = sum(len(message["content"]) for message in messages) dia = DialogService.query(tenant_id=tenant_id, id=chat_id, status=StatusEnum.VALID.value) if not dia: return get_error_data_result(f"You don't own the chat {chat_id}") dia = dia[0] - # Filter system and assistant messages + # Filter system and non-sense assistant messages msg = None msg = [m for m in messages if m["role"] != "system" and (m["role"] != "assistant" or msg)] @@ -251,7 +257,7 @@ def chat_completion_openai_like(tenant_id, chat_id): # The value for the usage field on all chunks except for the last one will be null. # The usage field on the last chunk contains token usage statistics for the entire request. # The choices field on the last chunk will always be an empty array []. - def streamed_respose_generator(chat_id, dia, msg): + def streamed_response_generator(chat_id, dia, msg): token_used = 0 response = { "id": f"chatcmpl-{chat_id}", @@ -286,17 +292,17 @@ def chat_completion_openai_like(tenant_id, chat_id): response["choices"][0]["delta"]["content"] = "**ERROR**: " + str(e) yield f"data:{json.dumps(response, ensure_ascii=False)}\n\n".encode("utf-8") - # The last chunck + # The last chunk response["choices"][0]["delta"]["content"] = None response["choices"][0]["finish_reason"] = "stop" response["usage"] = { - "prompt_tokens": len(msg), + "prompt_tokens": len(prompt), "completion_tokens": token_used, - "total_tokens": len(msg) + token_used + "total_tokens": len(prompt) + token_used } yield f"data:{json.dumps(response, ensure_ascii=False)}\n\n".encode("utf-8") - resp = Response(streamed_respose_generator(chat_id, dia, msg), mimetype="text/event-stream") + resp = Response(streamed_response_generator(chat_id, dia, msg), mimetype="text/event-stream") resp.headers.add_header("Cache-control", "no-cache") resp.headers.add_header("Connection", "keep-alive") resp.headers.add_header("X-Accel-Buffering", "no") @@ -308,6 +314,7 @@ def chat_completion_openai_like(tenant_id, chat_id): # focus answer content only answer = ans break + content = answer["answer"] response = { "id": f"chatcmpl-{chat_id}", @@ -315,20 +322,20 @@ def chat_completion_openai_like(tenant_id, chat_id): "created": int(time.time()), "model": req.get("model", ""), "usage": { - "prompt_tokens": len(messages), - "completion_tokens": len(answer), - "total_tokens": len(messages) + len(answer), + "prompt_tokens": len(prompt), + "completion_tokens": len(content), + "total_tokens": len(prompt) + len(content), "completion_tokens_details": { - "reasoning_tokens": len(answer), - "accepted_prediction_tokens": len(answer), - "rejected_prediction_tokens": len(answer) + "reasoning_tokens": context_token_used, + "accepted_prediction_tokens": len(content), + "rejected_prediction_tokens": 0 # 0 for simplicity } }, "choices": [ { "message": { "role": "assistant", - "content": answer["answer"] + "content": content }, "logprobs": None, "finish_reason": "stop", diff --git a/docs/references/http_api_reference.md b/docs/references/http_api_reference.md index 480a980e..920ff08c 100644 --- a/docs/references/http_api_reference.md +++ b/docs/references/http_api_reference.md @@ -9,6 +9,154 @@ A complete reference for RAGFlow's RESTful API. Before proceeding, please ensure --- +## OpenAI-Compatible API + +--- + +### Create chat completion + +**POST** `/api/v1/chats_openai/{chat_id}/chat/completions` + +Creates a model response for a given chat conversation. + +This API follows the same request and response format as OpenAI's API. It allows you to interact with the model in a manner similar to how you would with [OpenAI's API](https://platform.openai.com/docs/api-reference/chat/create). + +#### Request + +- Method: POST +- URL: `/api/v1/chats_openai/{chat_id}/chat/completions` +- Headers: + - `'content-Type: application/json'` + - `'Authorization: Bearer '` +- Body: + - `"model"`: `string` + - `"messages"`: `object list` + - `"stream"`: `boolean` + +##### Request example + +```bash +curl --request POST \ + --url http://{address}/api/v1/chats_openai/{chat_id}/chat/completions \ + --header 'Content-Type: application/json' \ + --header 'Authorization: Bearer ' \ + --data '{ + "model": "model", + "messages": [{"role": "user", "content": "Say this is a test!"}], + "stream": true + }' +``` + +##### Request Parameters + +- `model` (*Body parameter*) `string`, *Required* + The model used to generate the response. The server will parse this automatically, so you can set it to any value for now. + +- `messages` (*Body parameter*) `list[object]`, *Required* + A list of historical chat messages used to generate the response. This must contain at least one message with the `user` role. + +- `stream` (*Body parameter*) `boolean` + Whether to receive the response as a stream. Set this to `false` explicitly if you prefer to receive the entire response in one go instead of as a stream. + +#### Response + +Stream: + +```json +{ + "id": "chatcmpl-3a9c3572f29311efa69751e139332ced", + "choices": [ + { + "delta": { + "content": "This is a test. If you have any specific questions or need information, feel", + "role": "assistant", + "function_call": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1740543996, + "model": "model", + "object": "chat.completion.chunk", + "system_fingerprint": "", + "usage": null +} +// omit duplicated information +{"choices":[{"delta":{"content":" free to ask, and I will do my best to provide an answer based on","role":"assistant"}}]} +{"choices":[{"delta":{"content":" the knowledge I have. If your question is unrelated to the provided knowledge base,","role":"assistant"}}]} +{"choices":[{"delta":{"content":" I will let you know.","role":"assistant"}}]} +// the last chunk +{ + "id": "chatcmpl-3a9c3572f29311efa69751e139332ced", + "choices": [ + { + "delta": { + "content": null, + "role": "assistant", + "function_call": null, + "tool_calls": null + }, + "finish_reason": "stop", + "index": 0, + "logprobs": null + } + ], + "created": 1740543996, + "model": "model", + "object": "chat.completion.chunk", + "system_fingerprint": "", + "usage": { + "prompt_tokens": 18, + "completion_tokens": 225, + "total_tokens": 243 + } +} +``` + +Non-stream: + +```json +{ + "choices":[ + { + "finish_reason":"stop", + "index":0, + "logprobs":null, + "message":{ + "content":"This is a test. If you have any specific questions or need information, feel free to ask, and I will do my best to provide an answer based on the knowledge I have. If your question is unrelated to the provided knowledge base, I will let you know.", + "role":"assistant" + } + } + ], + "created":1740543499, + "id":"chatcmpl-3a9c3572f29311efa69751e139332ced", + "model":"model", + "object":"chat.completion", + "usage":{ + "completion_tokens":246, + "completion_tokens_details":{ + "accepted_prediction_tokens":246, + "reasoning_tokens":18, + "rejected_prediction_tokens":0 + }, + "prompt_tokens":18, + "total_tokens":264 + } +} +``` + +Failure: + +```json +{ + "code": 102, + "message": "The last content of this conversation is not from user." +} +``` + ## DATASET MANAGEMENT --- diff --git a/docs/references/python_api_reference.md b/docs/references/python_api_reference.md index b95c644e..6d4f7944 100644 --- a/docs/references/python_api_reference.md +++ b/docs/references/python_api_reference.md @@ -13,10 +13,63 @@ Run the following command to download the Python SDK: ```bash pip install ragflow-sdk ``` + ::: --- +## OpenAI-Compatible API + +--- + +### Create chat completion + +Creates a model response for the given historical chat conversation via OpenAI's API. + +#### Parameters + +##### model: `str`, *Required* + +The model used to generate the response. The server will parse this automatically, so you can set it to any value for now. + +##### messages: `list[object]`, *Required* + +A list of historical chat messages used to generate the response. This must contain at least one message with the `user` role. + +##### stream: `boolean` + +Whether to receive the response as a stream. Set this to `false` explicitly if you prefer to receive the entire response in one go instead of as a stream. + +#### Returns + +- Success: Respose [message](https://platform.openai.com/docs/api-reference/chat/create) like OpenAI +- Failure: `Exception` + +#### Examples + +```python +from openai import OpenAI + +model = "model" +client = OpenAI(api_key="ragflow-api-key", base_url=f"http://ragflow_address/api/v1/chats_openai/") + +completion = client.chat.completions.create( + model=model, + messages=[ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": "Who are you?"}, + ], + stream=True +) + +stream = True +if stream: + for chunk in completion: + print(chunk) +else: + print(completion.choices[0].message.content) +``` + ## DATASET MANAGEMENT ---