diff --git a/api/core/tools/provider/builtin/aliyuque/tools/base.py b/api/core/tools/provider/builtin/aliyuque/tools/base.py index 0046931abd..edfb9fea8e 100644 --- a/api/core/tools/provider/builtin/aliyuque/tools/base.py +++ b/api/core/tools/provider/builtin/aliyuque/tools/base.py @@ -10,10 +10,8 @@ class AliYuqueTool: @staticmethod def auth(token): session = requests.Session() - session.headers.update( - {"Accept": "application/json", "X-Auth-Token": token}) - login = session.request( - "GET", AliYuqueTool.server_url + "/api/v2/user") + session.headers.update({"Accept": "application/json", "X-Auth-Token": token}) + login = session.request("GET", AliYuqueTool.server_url + "/api/v2/user") login.raise_for_status() resp = login.json() return resp @@ -22,12 +20,10 @@ class AliYuqueTool: if not token: raise Exception("token is required") session = requests.Session() - session.headers.update( - {"accept": "application/json", "X-Auth-Token": token}) + session.headers.update({"accept": "application/json", "X-Auth-Token": token}) new_params = {**tool_parameters} - replacements = {k: v for k, v in new_params.items() - if f"{{{k}}}" in path} + replacements = {k: v for k, v in new_params.items() if f"{{{k}}}" in path} for key, value in replacements.items(): path = path.replace(f"{{{key}}}", str(value)) @@ -39,10 +35,8 @@ class AliYuqueTool: "Content-Type": "application/json", } ) - response = session.request( - method.upper(), self.server_url + path, json=new_params) + response = session.request(method.upper(), self.server_url + path, json=new_params) else: - response = session.request( - method, self.server_url + path, params=new_params) + response = session.request(method, self.server_url + path, params=new_params) response.raise_for_status() return response.text diff --git a/api/core/tools/provider/builtin/aliyuque/tools/delete_document.py b/api/core/tools/provider/builtin/aliyuque/tools/delete_document.py index ddbfa43114..84237cec30 100644 --- a/api/core/tools/provider/builtin/aliyuque/tools/delete_document.py +++ b/api/core/tools/provider/builtin/aliyuque/tools/delete_document.py @@ -13,6 +13,5 @@ class AliYuqueDeleteDocumentTool(AliYuqueTool, BuiltinTool): if not token: raise Exception("token is required") return self.create_text_message( - self.request("DELETE", token, tool_parameters, - "/api/v2/repos/{book_id}/docs/{id}") + self.request("DELETE", token, tool_parameters, "/api/v2/repos/{book_id}/docs/{id}") ) diff --git a/api/core/tools/provider/builtin/aliyuque/tools/describe_book_index_page.py b/api/core/tools/provider/builtin/aliyuque/tools/describe_book_index_page.py index ec834dd640..c23d30059a 100644 --- a/api/core/tools/provider/builtin/aliyuque/tools/describe_book_index_page.py +++ b/api/core/tools/provider/builtin/aliyuque/tools/describe_book_index_page.py @@ -13,6 +13,5 @@ class AliYuqueDescribeBookIndexPageTool(AliYuqueTool, BuiltinTool): if not token: raise Exception("token is required") return self.create_text_message( - self.request("GET", token, tool_parameters, - "/api/v2/repos/{group_login}/{book_slug}/index_page") + self.request("GET", token, tool_parameters, "/api/v2/repos/{group_login}/{book_slug}/index_page") ) diff --git a/api/core/tools/provider/builtin/aliyuque/tools/describe_document_content.py b/api/core/tools/provider/builtin/aliyuque/tools/describe_document_content.py index 50a9b37cf6..4b793cd61f 100644 --- a/api/core/tools/provider/builtin/aliyuque/tools/describe_document_content.py +++ b/api/core/tools/provider/builtin/aliyuque/tools/describe_document_content.py @@ -33,16 +33,14 @@ class AliYuqueDescribeDocumentContentTool(AliYuqueTool, BuiltinTool): new_params["group_login"] = group_id new_params["book_slug"] = book_slug index_page = json.loads( - self.request("GET", token, new_params, - "/api/v2/repos/{group_login}/{book_slug}/index_page") + self.request("GET", token, new_params, "/api/v2/repos/{group_login}/{book_slug}/index_page") ) book_id = index_page.get("data", {}).get("book", {}).get("id") if not book_id: raise Exception(f"can not parse book_id from {index_page}") new_params["book_id"] = book_id new_params["id"] = doc_id - data = self.request("GET", token, new_params, - "/api/v2/repos/{book_id}/docs/{id}") + data = self.request("GET", token, new_params, "/api/v2/repos/{book_id}/docs/{id}") data = json.loads(data) body_only = tool_parameters.get("body_only") or "" if body_only.lower() == "true": diff --git a/api/core/tools/provider/builtin/aliyuque/tools/describe_documents.py b/api/core/tools/provider/builtin/aliyuque/tools/describe_documents.py index 75436e5b1b..7a45684bed 100644 --- a/api/core/tools/provider/builtin/aliyuque/tools/describe_documents.py +++ b/api/core/tools/provider/builtin/aliyuque/tools/describe_documents.py @@ -13,6 +13,5 @@ class AliYuqueDescribeDocumentsTool(AliYuqueTool, BuiltinTool): if not token: raise Exception("token is required") return self.create_text_message( - self.request("GET", token, tool_parameters, - "/api/v2/repos/{book_id}/docs/{id}") + self.request("GET", token, tool_parameters, "/api/v2/repos/{book_id}/docs/{id}") ) diff --git a/api/core/tools/provider/builtin/aliyuque/tools/update_document.py b/api/core/tools/provider/builtin/aliyuque/tools/update_document.py index a6bcb1fcc2..d7eba46ad9 100644 --- a/api/core/tools/provider/builtin/aliyuque/tools/update_document.py +++ b/api/core/tools/provider/builtin/aliyuque/tools/update_document.py @@ -13,6 +13,5 @@ class AliYuqueUpdateDocumentTool(AliYuqueTool, BuiltinTool): if not token: raise Exception("token is required") return self.create_text_message( - self.request("PUT", token, tool_parameters, - "/api/v2/repos/{book_id}/docs/{id}") + self.request("PUT", token, tool_parameters, "/api/v2/repos/{book_id}/docs/{id}") ) diff --git a/api/core/workflow/nodes/document_extractor/node.py b/api/core/workflow/nodes/document_extractor/node.py index 2520caf713..9e09b6d29a 100644 --- a/api/core/workflow/nodes/document_extractor/node.py +++ b/api/core/workflow/nodes/document_extractor/node.py @@ -35,8 +35,7 @@ class DocumentExtractorNode(BaseNode[DocumentExtractorNodeData]): def _run(self): variable_selector = self.node_data.variable_selector - variable = self.graph_runtime_state.variable_pool.get( - variable_selector) + variable = self.graph_runtime_state.variable_pool.get(variable_selector) if variable is None: error_message = f"File variable not found for selector: {variable_selector}" @@ -47,8 +46,7 @@ class DocumentExtractorNode(BaseNode[DocumentExtractorNodeData]): value = variable.value inputs = {"variable_selector": variable_selector} - process_data = {"documents": value if isinstance(value, list) else [ - value]} + process_data = {"documents": value if isinstance(value, list) else [value]} try: if isinstance(value, list): @@ -68,8 +66,7 @@ class DocumentExtractorNode(BaseNode[DocumentExtractorNodeData]): outputs={"text": extracted_text}, ) else: - raise DocumentExtractorError( - f"Unsupported variable type: {type(value)}") + raise DocumentExtractorError(f"Unsupported variable type: {type(value)}") except DocumentExtractorError as e: return NodeRunResult( status=WorkflowNodeExecutionStatus.FAILED, @@ -105,8 +102,7 @@ def _extract_text_by_mime_type(*, file_content: bytes, mime_type: str) -> str: case "application/json": return _extract_text_from_json(file_content) case _: - raise UnsupportedFileTypeError( - f"Unsupported MIME type: {mime_type}") + raise UnsupportedFileTypeError(f"Unsupported MIME type: {mime_type}") def _extract_text_by_file_extension(*, file_content: bytes, file_extension: str) -> str: @@ -135,8 +131,7 @@ def _extract_text_by_file_extension(*, file_content: bytes, file_extension: str) case ".msg": return _extract_text_from_msg(file_content) case _: - raise UnsupportedFileTypeError( - f"Unsupported Extension Type: {file_extension}") + raise UnsupportedFileTypeError(f"Unsupported Extension Type: {file_extension}") def _extract_text_from_plain_text(file_content: bytes) -> str: @@ -151,8 +146,7 @@ def _extract_text_from_json(file_content: bytes) -> str: json_data = json.loads(file_content.decode("utf-8")) return json.dumps(json_data, indent=2, ensure_ascii=False) except (UnicodeDecodeError, json.JSONDecodeError) as e: - raise TextExtractionError( - f"Failed to decode or parse JSON file: {e}") from e + raise TextExtractionError(f"Failed to decode or parse JSON file: {e}") from e def _extract_text_from_pdf(file_content: bytes) -> str: @@ -167,8 +161,7 @@ def _extract_text_from_pdf(file_content: bytes) -> str: page.close() return text except Exception as e: - raise TextExtractionError( - f"Failed to extract text from PDF: {str(e)}") from e + raise TextExtractionError(f"Failed to extract text from PDF: {str(e)}") from e def _extract_text_from_doc(file_content: bytes) -> str: @@ -177,8 +170,7 @@ def _extract_text_from_doc(file_content: bytes) -> str: doc = docx.Document(doc_file) return "\n".join([paragraph.text for paragraph in doc.paragraphs]) except Exception as e: - raise TextExtractionError( - f"Failed to extract text from DOC/DOCX: {str(e)}") from e + raise TextExtractionError(f"Failed to extract text from DOC/DOCX: {str(e)}") from e def _download_file_content(file: File) -> bytes: @@ -193,8 +185,7 @@ def _download_file_content(file: File) -> bytes: elif file.transfer_method == FileTransferMethod.LOCAL_FILE: return file_manager.download(file) else: - raise ValueError( - f"Unsupported transfer method: {file.transfer_method}") + raise ValueError(f"Unsupported transfer method: {file.transfer_method}") except Exception as e: raise FileDownloadError(f"Error downloading file: {str(e)}") from e @@ -202,14 +193,11 @@ def _download_file_content(file: File) -> bytes: def _extract_text_from_file(file: File): file_content = _download_file_content(file) if file.extension: - extracted_text = _extract_text_by_file_extension( - file_content=file_content, file_extension=file.extension) + extracted_text = _extract_text_by_file_extension(file_content=file_content, file_extension=file.extension) elif file.mime_type: - extracted_text = _extract_text_by_mime_type( - file_content=file_content, mime_type=file.mime_type) + extracted_text = _extract_text_by_mime_type(file_content=file_content, mime_type=file.mime_type) else: - raise UnsupportedFileTypeError( - "Unable to determine file type: MIME type or file extension is missing") + raise UnsupportedFileTypeError("Unable to determine file type: MIME type or file extension is missing") return extracted_text @@ -230,8 +218,7 @@ def _extract_text_from_csv(file_content: bytes) -> str: return markdown_table.strip() except Exception as e: - raise TextExtractionError( - f"Failed to extract text from CSV: {str(e)}") from e + raise TextExtractionError(f"Failed to extract text from CSV: {str(e)}") from e def _extract_text_from_excel(file_content: bytes) -> str: @@ -247,8 +234,7 @@ def _extract_text_from_excel(file_content: bytes) -> str: markdown_table = df.to_markdown(index=False) return markdown_table except Exception as e: - raise TextExtractionError( - f"Failed to extract text from Excel file: {str(e)}") from e + raise TextExtractionError(f"Failed to extract text from Excel file: {str(e)}") from e def _extract_text_from_ppt(file_content: bytes) -> str: @@ -257,8 +243,7 @@ def _extract_text_from_ppt(file_content: bytes) -> str: elements = partition_ppt(file=file) return "\n".join([getattr(element, "text", "") for element in elements]) except Exception as e: - raise TextExtractionError( - f"Failed to extract text from PPT: {str(e)}") from e + raise TextExtractionError(f"Failed to extract text from PPT: {str(e)}") from e def _extract_text_from_pptx(file_content: bytes) -> str: @@ -267,8 +252,7 @@ def _extract_text_from_pptx(file_content: bytes) -> str: elements = partition_pptx(file=file) return "\n".join([getattr(element, "text", "") for element in elements]) except Exception as e: - raise TextExtractionError( - f"Failed to extract text from PPTX: {str(e)}") from e + raise TextExtractionError(f"Failed to extract text from PPTX: {str(e)}") from e def _extract_text_from_epub(file_content: bytes) -> str: @@ -277,8 +261,7 @@ def _extract_text_from_epub(file_content: bytes) -> str: elements = partition_epub(file=file) return "\n".join([str(element) for element in elements]) except Exception as e: - raise TextExtractionError( - f"Failed to extract text from EPUB: {str(e)}") from e + raise TextExtractionError(f"Failed to extract text from EPUB: {str(e)}") from e def _extract_text_from_eml(file_content: bytes) -> str: @@ -287,8 +270,7 @@ def _extract_text_from_eml(file_content: bytes) -> str: elements = partition_email(file=file) return "\n".join([str(element) for element in elements]) except Exception as e: - raise TextExtractionError( - f"Failed to extract text from EML: {str(e)}") from e + raise TextExtractionError(f"Failed to extract text from EML: {str(e)}") from e def _extract_text_from_msg(file_content: bytes) -> str: @@ -297,5 +279,4 @@ def _extract_text_from_msg(file_content: bytes) -> str: elements = partition_msg(file=file) return "\n".join([str(element) for element in elements]) except Exception as e: - raise TextExtractionError( - f"Failed to extract text from MSG: {str(e)}") from e + raise TextExtractionError(f"Failed to extract text from MSG: {str(e)}") from e diff --git a/api/tests/unit_tests/core/workflow/nodes/test_document_extractor_node.py b/api/tests/unit_tests/core/workflow/nodes/test_document_extractor_node.py index 625ff560fc..4f1f8f05c8 100644 --- a/api/tests/unit_tests/core/workflow/nodes/test_document_extractor_node.py +++ b/api/tests/unit_tests/core/workflow/nodes/test_document_extractor_node.py @@ -65,8 +65,7 @@ def test_run_invalid_variable_type(document_extractor_node, mock_graph_runtime_s @pytest.mark.parametrize( ("mime_type", "file_content", "expected_text", "transfer_method", "extension"), [ - ("text/plain", b"Hello, world!", - ["Hello, world!"], FileTransferMethod.LOCAL_FILE, ".txt"), + ("text/plain", b"Hello, world!", ["Hello, world!"], FileTransferMethod.LOCAL_FILE, ".txt"), ( "application/pdf", b"%PDF-1.5\n%Test PDF content", @@ -81,8 +80,7 @@ def test_run_invalid_variable_type(document_extractor_node, mock_graph_runtime_s FileTransferMethod.REMOTE_URL, "", ), - ("text/plain", b"Remote content", - ["Remote content"], FileTransferMethod.REMOTE_URL, None), + ("text/plain", b"Remote content", ["Remote content"], FileTransferMethod.REMOTE_URL, None), ], ) def test_run_extract_text( @@ -119,12 +117,10 @@ def test_run_extract_text( if mime_type == "application/pdf": mock_pdf_extract = Mock(return_value=expected_text[0]) - monkeypatch.setattr( - "core.workflow.nodes.document_extractor.node._extract_text_from_pdf", mock_pdf_extract) + monkeypatch.setattr("core.workflow.nodes.document_extractor.node._extract_text_from_pdf", mock_pdf_extract) elif mime_type.startswith("application/vnd.openxmlformats"): mock_docx_extract = Mock(return_value=expected_text[0]) - monkeypatch.setattr( - "core.workflow.nodes.document_extractor.node._extract_text_from_doc", mock_docx_extract) + monkeypatch.setattr("core.workflow.nodes.document_extractor.node._extract_text_from_doc", mock_docx_extract) result = document_extractor_node._run() @@ -134,8 +130,7 @@ def test_run_extract_text( assert result.outputs["text"] == expected_text if transfer_method == FileTransferMethod.REMOTE_URL: - mock_ssrf_proxy_get.assert_called_once_with( - "https://example.com/file.txt") + mock_ssrf_proxy_get.assert_called_once_with("https://example.com/file.txt") elif transfer_method == FileTransferMethod.LOCAL_FILE: mock_download.assert_called_once_with(mock_file) diff --git a/api/tests/unit_tests/oss/__mock/volcengine_tos.py b/api/tests/unit_tests/oss/__mock/volcengine_tos.py index c2dfff0de3..1194a03258 100644 --- a/api/tests/unit_tests/oss/__mock/volcengine_tos.py +++ b/api/tests/unit_tests/oss/__mock/volcengine_tos.py @@ -77,18 +77,12 @@ MOCK = os.getenv("MOCK_SWITCH", "false").lower() == "true" @pytest.fixture def setup_volcengine_tos_mock(monkeypatch: MonkeyPatch): if MOCK: - monkeypatch.setattr(TosClientV2, "__init__", - MockVolcengineTosClass.__init__) - monkeypatch.setattr(TosClientV2, "put_object", - MockVolcengineTosClass.put_object) - monkeypatch.setattr(TosClientV2, "get_object", - MockVolcengineTosClass.get_object) - monkeypatch.setattr(TosClientV2, "get_object_to_file", - MockVolcengineTosClass.get_object_to_file) - monkeypatch.setattr(TosClientV2, "head_object", - MockVolcengineTosClass.head_object) - monkeypatch.setattr(TosClientV2, "delete_object", - MockVolcengineTosClass.delete_object) + monkeypatch.setattr(TosClientV2, "__init__", MockVolcengineTosClass.__init__) + monkeypatch.setattr(TosClientV2, "put_object", MockVolcengineTosClass.put_object) + monkeypatch.setattr(TosClientV2, "get_object", MockVolcengineTosClass.get_object) + monkeypatch.setattr(TosClientV2, "get_object_to_file", MockVolcengineTosClass.get_object_to_file) + monkeypatch.setattr(TosClientV2, "head_object", MockVolcengineTosClass.head_object) + monkeypatch.setattr(TosClientV2, "delete_object", MockVolcengineTosClass.delete_object) yield