diff --git a/sdk/python/test/test_http_api/common.py b/sdk/python/test/test_http_api/common.py index 2b87e4d9..fc775bab 100644 --- a/sdk/python/test/test_http_api/common.py +++ b/sdk/python/test/test_http_api/common.py @@ -88,18 +88,25 @@ def upload_documnets(auth, dataset_id, files_path=None): files_path = [] fields = [] - for i, fp in enumerate(files_path): - p = Path(fp) - fields.append(("file", (p.name, p.open("rb")))) - m = MultipartEncoder(fields=fields) + file_objects = [] + try: + for fp in files_path: + p = Path(fp) + f = p.open("rb") + fields.append(("file", (p.name, f))) + file_objects.append(f) + m = MultipartEncoder(fields=fields) - res = requests.post( - url=url, - headers={"Content-Type": m.content_type}, - auth=auth, - data=m, - ) - return res.json() + res = requests.post( + url=url, + headers={"Content-Type": m.content_type}, + auth=auth, + data=m, + ) + return res.json() + finally: + for f in file_objects: + f.close() def batch_upload_documents(auth, dataset_id, num, tmp_path): diff --git a/sdk/python/test/test_http_api/test_file_management_within_dataset/test_download_document.py b/sdk/python/test/test_http_api/test_file_management_within_dataset/test_download_document.py index da7d6d19..cd8ffd1e 100644 --- a/sdk/python/test/test_http_api/test_file_management_within_dataset/test_download_document.py +++ b/sdk/python/test/test_http_api/test_file_management_within_dataset/test_download_document.py @@ -95,9 +95,8 @@ class TestDownloadDocument: ) @pytest.mark.parametrize( - "docment_id, expected_code, expected_message", + "document_id, expected_code, expected_message", [ - pytest.param("", 0, "", marks=pytest.mark.xfail(reason="issue#6031")), ( "invalid_document_id", 102, @@ -105,14 +104,14 @@ class TestDownloadDocument: ), ], ) - def test_invalid_docment_id( - self, get_http_api_auth, tmp_path, docment_id, expected_code, expected_message + def test_invalid_document_id( + self, get_http_api_auth, tmp_path, document_id, expected_code, expected_message ): ids = create_datasets(get_http_api_auth, 1) res = download_document( get_http_api_auth, ids[0], - docment_id, + document_id, tmp_path / "ragflow_test_download_1.txt", ) assert res.status_code == codes.ok diff --git a/sdk/python/test/test_http_api/test_file_management_within_dataset/test_update_document.py b/sdk/python/test/test_http_api/test_file_management_within_dataset/test_update_document.py new file mode 100644 index 00000000..3d14988a --- /dev/null +++ b/sdk/python/test/test_http_api/test_file_management_within_dataset/test_update_document.py @@ -0,0 +1,576 @@ +# +# Copyright 2025 The InfiniFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + + +import pytest +from common import ( + DOCUMENT_NAME_LIMIT, + INVALID_API_TOKEN, + batch_upload_documents, + create_datasets, + list_documnet, + update_documnet, +) +from libs.auth import RAGFlowHttpApiAuth + + +class TestAuthorization: + @pytest.mark.parametrize( + "auth, expected_code, expected_message", + [ + (None, 0, "`Authorization` can't be empty"), + ( + RAGFlowHttpApiAuth(INVALID_API_TOKEN), + 109, + "Authentication error: API key is invalid!", + ), + ], + ) + def test_invalid_auth( + self, get_http_api_auth, tmp_path, auth, expected_code, expected_message + ): + ids = create_datasets(get_http_api_auth, 1) + document_ids = batch_upload_documents(get_http_api_auth, ids[0], 1, tmp_path) + res = update_documnet(auth, ids[0], document_ids[0], {"name": "auth_test.txt"}) + assert res["code"] == expected_code + assert res["message"] == expected_message + + +class TestupdatedDocument: + @pytest.mark.parametrize( + "name, expected_code, expected_message", + [ + ("new_name.txt", 0, ""), + ( + f"{'a' * (DOCUMENT_NAME_LIMIT - 3)}.txt", + 101, + "The name should be less than 128 bytes.", + ), + ( + 0, + 100, + """AttributeError("\'int\' object has no attribute \'encode\'")""", + ), + ( + None, + 100, + """AttributeError("\'NoneType\' object has no attribute \'encode\'")""", + ), + ( + "", + 101, + "The extension of file can't be changed", + ), + ( + "ragflow_test_upload_0", + 101, + "The extension of file can't be changed", + ), + ( + "ragflow_test_upload_1.txt", + 102, + "Duplicated document name in the same dataset.", + ), + ( + "RAGFLOW_TEST_UPLOAD_1.TXT", + 0, + "", + ), + ], + ) + def test_name( + self, get_http_api_auth, tmp_path, name, expected_code, expected_message + ): + ids = create_datasets(get_http_api_auth, 1) + document_ids = batch_upload_documents(get_http_api_auth, ids[0], 2, tmp_path) + res = update_documnet( + get_http_api_auth, ids[0], document_ids[0], {"name": name} + ) + assert res["code"] == expected_code + if expected_code == 0: + res = list_documnet(get_http_api_auth, ids[0], {"id": document_ids[0]}) + assert res["data"]["docs"][0]["name"] == name + else: + assert res["message"] == expected_message + + @pytest.mark.parametrize( + "document_id, expected_code, expected_message", + [ + ("", 100, ""), + ( + "invalid_document_id", + 102, + "The dataset doesn't own the document.", + ), + ], + ) + def test_invalid_document_id( + self, get_http_api_auth, document_id, expected_code, expected_message + ): + ids = create_datasets(get_http_api_auth, 1) + res = update_documnet( + get_http_api_auth, ids[0], document_id, {"name": "new_name.txt"} + ) + assert res["code"] == expected_code + assert res["message"] == expected_message + + @pytest.mark.parametrize( + "dataset_id, expected_code, expected_message", + [ + ("", 100, ""), + ( + "invalid_dataset_id", + 102, + "You don't own the dataset.", + ), + ], + ) + def test_invalid_dataset_id( + self, get_http_api_auth, tmp_path, dataset_id, expected_code, expected_message + ): + ids = create_datasets(get_http_api_auth, 1) + document_ids = batch_upload_documents(get_http_api_auth, ids[0], 1, tmp_path) + res = update_documnet( + get_http_api_auth, dataset_id, document_ids[0], {"name": "new_name.txt"} + ) + assert res["code"] == expected_code + assert res["message"] == expected_message + + @pytest.mark.parametrize( + "meta_fields, expected_code, expected_message", + [({"test": "test"}, 0, ""), ("test", 102, "meta_fields must be a dictionary")], + ) + def test_meta_fields( + self, get_http_api_auth, tmp_path, meta_fields, expected_code, expected_message + ): + ids = create_datasets(get_http_api_auth, 1) + document_ids = batch_upload_documents(get_http_api_auth, ids[0], 1, tmp_path) + res = update_documnet( + get_http_api_auth, ids[0], document_ids[0], {"meta_fields": meta_fields} + ) + if expected_code == 0: + res = list_documnet(get_http_api_auth, ids[0], {"id": document_ids[0]}) + assert res["data"]["docs"][0]["meta_fields"] == meta_fields + else: + assert res["message"] == expected_message + + @pytest.mark.parametrize( + "chunk_method, expected_code, expected_message", + [ + ("naive", 0, ""), + ("manual", 0, ""), + ("qa", 0, ""), + pytest.param("table", 0, "", marks=pytest.mark.xfail(reason="issues/6081")), + ("paper", 0, ""), + ("book", 0, ""), + ("laws", 0, ""), + ("presentation", 0, ""), + pytest.param( + "picture", 0, "", marks=pytest.mark.xfail(reason="issues/6081") + ), + pytest.param("one", 0, "", marks=pytest.mark.xfail(reason="issues/6081")), + ("knowledge_graph", 0, ""), + pytest.param("email", 0, "", marks=pytest.mark.xfail(reason="issues/6081")), + pytest.param("tag", 0, "", marks=pytest.mark.xfail(reason="issues/6081")), + ("", 102, "`chunk_method` doesn't exist"), + ( + "other_chunk_method", + 102, + "`chunk_method` other_chunk_method doesn't exist", + ), + ], + ) + def test_chunk_method( + self, get_http_api_auth, tmp_path, chunk_method, expected_code, expected_message + ): + ids = create_datasets(get_http_api_auth, 1) + document_ids = batch_upload_documents(get_http_api_auth, ids[0], 1, tmp_path) + res = update_documnet( + get_http_api_auth, ids[0], document_ids[0], {"chunk_method": chunk_method} + ) + assert res["code"] == expected_code + if expected_code == 0: + res = list_documnet(get_http_api_auth, ids[0], {"id": document_ids[0]}) + if chunk_method != "": + assert res["data"]["docs"][0]["chunk_method"] == chunk_method + else: + assert res["data"]["docs"][0]["chunk_method"] == "naive" + else: + assert res["message"] == expected_message + + @pytest.mark.parametrize( + "chunk_method, parser_config, expected_code, expected_message", + [ + ( + "naive", + { + "chunk_token_num": 128, + "layout_recognize": "DeepDOC", + "html4excel": False, + "delimiter": "\n!?。;!?", + "task_page_size": 12, + "raptor": {"use_raptor": False}, + }, + 0, + "", + ), + ("naive", {}, 0, ""), + pytest.param( + "naive", + {"chunk_token_num": -1}, + 100, + "AssertionError('chunk_token_num should be in range from 1 to 100000000')", + marks=pytest.mark.xfail(reason="issues/6098"), + ), + pytest.param( + "naive", + {"chunk_token_num": 0}, + 100, + "AssertionError('chunk_token_num should be in range from 1 to 100000000')", + marks=pytest.mark.xfail(reason="issues/6098"), + ), + pytest.param( + "naive", + {"chunk_token_num": 100000000}, + 100, + "AssertionError('chunk_token_num should be in range from 1 to 100000000')", + marks=pytest.mark.xfail(reason="issues/6098"), + ), + pytest.param( + "naive", + {"chunk_token_num": 3.14}, + 102, + "", + marks=pytest.mark.xfail(reason="issues/6098"), + ), + pytest.param( + "naive", + {"chunk_token_num": "1024"}, + 100, + "", + marks=pytest.mark.xfail(reason="issues/6098"), + ), + ( + "naive", + {"layout_recognize": "DeepDOC"}, + 0, + "", + ), + ( + "naive", + {"layout_recognize": "Naive"}, + 0, + "", + ), + ("naive", {"html4excel": True}, 0, ""), + ("naive", {"html4excel": False}, 0, ""), + pytest.param( + "naive", + {"html4excel": 1}, + 100, + "AssertionError('html4excel should be True or False')", + marks=pytest.mark.xfail(reason="issues/6098"), + ), + ("naive", {"delimiter": ""}, 0, ""), + ("naive", {"delimiter": "`##`"}, 0, ""), + pytest.param( + "naive", + {"delimiter": 1}, + 100, + "", + marks=pytest.mark.xfail(reason="issues/6098"), + ), + pytest.param( + "naive", + {"task_page_size": -1}, + 100, + "AssertionError('task_page_size should be in range from 1 to 100000000')", + marks=pytest.mark.xfail(reason="issues/6098"), + ), + pytest.param( + "naive", + {"task_page_size": 0}, + 100, + "AssertionError('task_page_size should be in range from 1 to 100000000')", + marks=pytest.mark.xfail(reason="issues/6098"), + ), + pytest.param( + "naive", + {"task_page_size": 100000000}, + 100, + "AssertionError('task_page_size should be in range from 1 to 100000000')", + marks=pytest.mark.xfail(reason="issues/6098"), + ), + pytest.param( + "naive", + {"task_page_size": 3.14}, + 100, + "", + marks=pytest.mark.xfail(reason="issues/6098"), + ), + pytest.param( + "naive", + {"task_page_size": "1024"}, + 100, + "", + marks=pytest.mark.xfail(reason="issues/6098"), + ), + ("naive", {"raptor": {"use_raptor": True}}, 0, ""), + ("naive", {"raptor": {"use_raptor": False}}, 0, ""), + pytest.param( + "naive", + {"invalid_key": "invalid_value"}, + 100, + """AssertionError("Abnormal \'parser_config\'. Invalid key: invalid_key")""", + marks=pytest.mark.xfail(reason="issues/6098"), + ), + pytest.param( + "naive", + {"auto_keywords": -1}, + 100, + "AssertionError('auto_keywords should be in range from 0 to 32')", + marks=pytest.mark.xfail(reason="issues/6098"), + ), + pytest.param( + "naive", + {"auto_keywords": 32}, + 100, + "AssertionError('auto_keywords should be in range from 0 to 32')", + marks=pytest.mark.xfail(reason="issues/6098"), + ), + pytest.param( + "naive", + {"auto_questions": 3.14}, + 100, + "", + marks=pytest.mark.xfail(reason="issues/6098"), + ), + pytest.param( + "naive", + {"auto_keywords": "1024"}, + 100, + "", + marks=pytest.mark.xfail(reason="issues/6098"), + ), + pytest.param( + "naive", + {"auto_questions": -1}, + 100, + "AssertionError('auto_questions should be in range from 0 to 10')", + marks=pytest.mark.xfail(reason="issues/6098"), + ), + pytest.param( + "naive", + {"auto_questions": 10}, + 100, + "AssertionError('auto_questions should be in range from 0 to 10')", + marks=pytest.mark.xfail(reason="issues/6098"), + ), + pytest.param( + "naive", + {"auto_questions": 3.14}, + 100, + "", + marks=pytest.mark.xfail(reason="issues/6098"), + ), + pytest.param( + "naive", + {"auto_questions": "1024"}, + 100, + "", + marks=pytest.mark.xfail(reason="issues/6098"), + ), + pytest.param( + "naive", + {"topn_tags": -1}, + 100, + "AssertionError('topn_tags should be in range from 0 to 10')", + marks=pytest.mark.xfail(reason="issues/6098"), + ), + pytest.param( + "naive", + {"topn_tags": 10}, + 100, + "AssertionError('topn_tags should be in range from 0 to 10')", + marks=pytest.mark.xfail(reason="issues/6098"), + ), + pytest.param( + "naive", + {"topn_tags": 3.14}, + 100, + "", + marks=pytest.mark.xfail(reason="issues/6098"), + ), + pytest.param( + "naive", + {"topn_tags": "1024"}, + 100, + "", + marks=pytest.mark.xfail(reason="issues/6098"), + ), + ], + ) + def test_parser_config( + self, + get_http_api_auth, + tmp_path, + chunk_method, + parser_config, + expected_code, + expected_message, + ): + ids = create_datasets(get_http_api_auth, 1) + document_ids = batch_upload_documents(get_http_api_auth, ids[0], 1, tmp_path) + res = update_documnet( + get_http_api_auth, + ids[0], + document_ids[0], + {"chunk_method": chunk_method, "parser_config": parser_config}, + ) + assert res["code"] == expected_code + if expected_code == 0: + res = list_documnet(get_http_api_auth, ids[0], {"id": document_ids[0]}) + if parser_config != {}: + for k, v in parser_config.items(): + assert res["data"]["docs"][0]["parser_config"][k] == v + else: + assert res["data"]["docs"][0]["parser_config"] == { + "chunk_token_num": 128, + "delimiter": "\\n!?;。;!?", + "html4excel": False, + "layout_recognize": "DeepDOC", + "raptor": {"use_raptor": False}, + } + if expected_code != 0 or expected_message: + assert res["message"] == expected_message + + @pytest.mark.parametrize( + "payload, expected_code, expected_message", + [ + ({"chunk_count": 1}, 102, "Can't change `chunk_count`."), + pytest.param( + {"create_date": "Fri, 14 Mar 2025 16:53:42 GMT"}, + 102, + "The input parameters are invalid.", + marks=pytest.mark.xfail(reason="issues/6104"), + ), + pytest.param( + {"create_time": 1}, + 102, + "The input parameters are invalid.", + marks=pytest.mark.xfail(reason="issues/6104"), + ), + pytest.param( + {"created_by": "ragflow_test"}, + 102, + "The input parameters are invalid.", + marks=pytest.mark.xfail(reason="issues/6104"), + ), + pytest.param( + {"dataset_id": "ragflow_test"}, + 102, + "The input parameters are invalid.", + marks=pytest.mark.xfail(reason="issues/6104"), + ), + pytest.param( + {"id": "ragflow_test"}, + 102, + "The input parameters are invalid.", + marks=pytest.mark.xfail(reason="issues/6104"), + ), + pytest.param( + {"location": "ragflow_test.txt"}, + 102, + "The input parameters are invalid.", + marks=pytest.mark.xfail(reason="issues/6104"), + ), + pytest.param( + {"process_begin_at": 1}, + 102, + "The input parameters are invalid.", + marks=pytest.mark.xfail(reason="issues/6104"), + ), + pytest.param( + {"process_duation": 1.0}, + 102, + "The input parameters are invalid.", + marks=pytest.mark.xfail(reason="issues/6104"), + ), + pytest.param({"progress": 1.0}, 102, "Can't change `progress`."), + pytest.param( + {"progress_msg": "ragflow_test"}, + 102, + "The input parameters are invalid.", + marks=pytest.mark.xfail(reason="issues/6104"), + ), + pytest.param( + {"run": "ragflow_test"}, + 102, + "The input parameters are invalid.", + marks=pytest.mark.xfail(reason="issues/6104"), + ), + pytest.param( + {"size": 1}, + 102, + "The input parameters are invalid.", + marks=pytest.mark.xfail(reason="issues/6104"), + ), + pytest.param( + {"source_type": "ragflow_test"}, + 102, + "The input parameters are invalid.", + marks=pytest.mark.xfail(reason="issues/6104"), + ), + pytest.param( + {"thumbnail": "ragflow_test"}, + 102, + "The input parameters are invalid.", + marks=pytest.mark.xfail(reason="issues/6104"), + ), + ({"token_count": 1}, 102, "Can't change `token_count`."), + pytest.param( + {"type": "ragflow_test"}, + 102, + "The input parameters are invalid.", + marks=pytest.mark.xfail(reason="issues/6104"), + ), + pytest.param( + {"update_date": "Fri, 14 Mar 2025 16:33:17 GMT"}, + 102, + "The input parameters are invalid.", + marks=pytest.mark.xfail(reason="issues/6104"), + ), + pytest.param( + {"update_time": 1}, + 102, + "The input parameters are invalid.", + marks=pytest.mark.xfail(reason="issues/6104"), + ), + ], + ) + def test_invalid_field( + self, + get_http_api_auth, + tmp_path, + payload, + expected_code, + expected_message, + ): + ids = create_datasets(get_http_api_auth, 1) + document_ids = batch_upload_documents(get_http_api_auth, ids[0], 2, tmp_path) + res = update_documnet(get_http_api_auth, ids[0], document_ids[0], payload) + assert res["code"] == expected_code + assert res["message"] == expected_message