From 5f995fac32da4fd9c53a75a2c53e9862bde3e602 Mon Sep 17 00:00:00 2001 From: jyong <718720800@qq.com> Date: Thu, 20 Feb 2025 17:13:44 +0800 Subject: [PATCH] metadata update --- .../console/datasets/datasets_segments.py | 4 ++-- .../nodes/knowledge_retrieval/entities.py | 11 ++++++++++ .../knowledge_retrieval_node.py | 8 +++---- api/models/dataset.py | 13 +++++++++++ api/services/dataset_service.py | 22 ++++++++++++++++--- .../knowledge_entities/knowledge_entities.py | 7 ++++++ 6 files changed, 56 insertions(+), 9 deletions(-) diff --git a/api/controllers/console/datasets/datasets_segments.py b/api/controllers/console/datasets/datasets_segments.py index d2c94045ad..19255c618a 100644 --- a/api/controllers/console/datasets/datasets_segments.py +++ b/api/controllers/console/datasets/datasets_segments.py @@ -88,9 +88,9 @@ class DatasetDocumentSegmentListApi(Resource): if args["enabled"].lower() != "all": if args["enabled"].lower() == "true": - query = query.filter(DocumentSegment.enabled == True) + query = query.filter(DocumentSegment.enabled == True) # noqa: E712 elif args["enabled"].lower() == "false": - query = query.filter(DocumentSegment.enabled == False) + query = query.filter(DocumentSegment.enabled == False) # noqa: E712 segments = query.paginate(page=page, per_page=limit, max_per_page=100, error_out=False) diff --git a/api/core/workflow/nodes/knowledge_retrieval/entities.py b/api/core/workflow/nodes/knowledge_retrieval/entities.py index e8972d1381..133af9c838 100644 --- a/api/core/workflow/nodes/knowledge_retrieval/entities.py +++ b/api/core/workflow/nodes/knowledge_retrieval/entities.py @@ -73,6 +73,15 @@ class SingleRetrievalConfig(BaseModel): model: ModelConfig +class MetadataFilteringCondition(BaseModel): + """ + Metadata Filtering Condition. + """ + + logical_operator: Optional[Literal["and", "or"]] = "and" + conditions: Optional[list[Condition]] = Field(default=None, deprecated=True) + + class KnowledgeRetrievalNodeData(BaseNodeData): """ Knowledge retrieval Node Data. @@ -84,3 +93,5 @@ class KnowledgeRetrievalNodeData(BaseNodeData): retrieval_mode: Literal["single", "multiple"] multiple_retrieval_config: Optional[MultipleRetrievalConfig] = None single_retrieval_config: Optional[SingleRetrievalConfig] = None + metadata_filtering_mode: Optional[Literal["disabled", "automatic", "manual"]] = "disabled" + metadata_filtering_conditions: Optional[dict[str, Any]] = None \ No newline at end of file diff --git a/api/core/workflow/nodes/knowledge_retrieval/knowledge_retrieval_node.py b/api/core/workflow/nodes/knowledge_retrieval/knowledge_retrieval_node.py index 0f239af51a..5153abf0b0 100644 --- a/api/core/workflow/nodes/knowledge_retrieval/knowledge_retrieval_node.py +++ b/api/core/workflow/nodes/knowledge_retrieval/knowledge_retrieval_node.py @@ -95,8 +95,8 @@ class KnowledgeRetrievalNode(BaseNode[KnowledgeRetrievalNodeData]): db.session.query(Document.dataset_id, func.count(Document.id).label("available_document_count")) .filter( Document.indexing_status == "completed", - Document.enabled == True, - Document.archived == False, + Document.enabled == True, # noqa: E712 + Document.archived == False, # noqa: E712 Document.dataset_id.in_(dataset_ids), ) .group_by(Document.dataset_id) @@ -221,8 +221,8 @@ class KnowledgeRetrievalNode(BaseNode[KnowledgeRetrievalNodeData]): dataset = Dataset.query.filter_by(id=segment.dataset_id).first() document = Document.query.filter( Document.id == segment.document_id, - Document.enabled == True, - Document.archived == False, + Document.enabled == True, # noqa: E712 + Document.archived == False, # noqa: E712 ).first() if dataset and document: source = { diff --git a/api/models/dataset.py b/api/models/dataset.py index 6109d4a605..7e712f5da8 100644 --- a/api/models/dataset.py +++ b/api/models/dataset.py @@ -398,6 +398,19 @@ class Document(db.Model): # type: ignore[name-defined] ) @property + def uploader(self): + user = db.session.query(Account).filter(Account.id == self.created_by).first() + return user.name if user else None + + @property + def upload_date(self): + return self.created_at + + @property + def last_update_date(self): + return self.updated_at + + def process_rule_dict(self): if self.dataset_process_rule_id: return self.dataset_process_rule.to_dict() diff --git a/api/services/dataset_service.py b/api/services/dataset_service.py index 66fa62cd83..2c38937594 100644 --- a/api/services/dataset_service.py +++ b/api/services/dataset_service.py @@ -15,6 +15,7 @@ from configs import dify_config from core.errors.error import LLMBadRequestError, ProviderTokenNotInitError from core.model_manager import ModelManager from core.model_runtime.entities.model_entities import ModelType +from core.rag.index_processor.constant.built_in_field import BuiltInField from core.rag.index_processor.constant.index_type import IndexType from core.rag.retrieval.retrieval_methods import RetrievalMethod from events.dataset_event import dataset_was_deleted @@ -687,8 +688,12 @@ class DocumentService: if document.tenant_id != current_user.current_tenant_id: raise ValueError("No permission.") - - document.name = name + + if dataset.built_in_field_enabled: + if document.doc_metadata: + document.doc_metadata[BuiltInField.document_name] = name + else: + document.name = name db.session.add(document) db.session.commit() @@ -1086,9 +1091,20 @@ class DocumentService: doc_form=document_form, doc_language=document_language, ) + doc_metadata = {} + if dataset.built_in_field_enabled: + doc_metadata = { + BuiltInField.document_name: name, + BuiltInField.uploader: account.name, + BuiltInField.upload_date: datetime.datetime.now(datetime.timezone.utc).strftime("%Y-%m-%d %H:%M:%S"), + BuiltInField.last_update_date: datetime.datetime.now(datetime.timezone.utc).strftime("%Y-%m-%d %H:%M:%S"), + BuiltInField.source: data_source_type, + } if metadata is not None: - document.doc_metadata = metadata.doc_metadata + doc_metadata.update(metadata.doc_metadata) document.doc_type = metadata.doc_type + if doc_metadata: + document.doc_metadata = doc_metadata return document @staticmethod diff --git a/api/services/entities/knowledge_entities/knowledge_entities.py b/api/services/entities/knowledge_entities/knowledge_entities.py index f8f6b6c132..f23f578a15 100644 --- a/api/services/entities/knowledge_entities/knowledge_entities.py +++ b/api/services/entities/knowledge_entities/knowledge_entities.py @@ -129,3 +129,10 @@ class ChildChunkUpdateArgs(BaseModel): class MetadataArgs(BaseModel): type: Literal["string", "number", "time"] name: str + +class MetadataValue(BaseModel): + name: str + value: str + +class MetadataValueUpdateArgs(BaseModel): + fields: list[MetadataValue] \ No newline at end of file