metadata update

This commit is contained in:
jyong 2025-02-20 17:13:44 +08:00
parent f88f9d6970
commit 5f995fac32
6 changed files with 56 additions and 9 deletions

View File

@ -88,9 +88,9 @@ class DatasetDocumentSegmentListApi(Resource):
if args["enabled"].lower() != "all": if args["enabled"].lower() != "all":
if args["enabled"].lower() == "true": if args["enabled"].lower() == "true":
query = query.filter(DocumentSegment.enabled == True) query = query.filter(DocumentSegment.enabled == True) # noqa: E712
elif args["enabled"].lower() == "false": elif args["enabled"].lower() == "false":
query = query.filter(DocumentSegment.enabled == False) query = query.filter(DocumentSegment.enabled == False) # noqa: E712
segments = query.paginate(page=page, per_page=limit, max_per_page=100, error_out=False) segments = query.paginate(page=page, per_page=limit, max_per_page=100, error_out=False)

View File

@ -73,6 +73,15 @@ class SingleRetrievalConfig(BaseModel):
model: ModelConfig model: ModelConfig
class MetadataFilteringCondition(BaseModel):
"""
Metadata Filtering Condition.
"""
logical_operator: Optional[Literal["and", "or"]] = "and"
conditions: Optional[list[Condition]] = Field(default=None, deprecated=True)
class KnowledgeRetrievalNodeData(BaseNodeData): class KnowledgeRetrievalNodeData(BaseNodeData):
""" """
Knowledge retrieval Node Data. Knowledge retrieval Node Data.
@ -84,3 +93,5 @@ class KnowledgeRetrievalNodeData(BaseNodeData):
retrieval_mode: Literal["single", "multiple"] retrieval_mode: Literal["single", "multiple"]
multiple_retrieval_config: Optional[MultipleRetrievalConfig] = None multiple_retrieval_config: Optional[MultipleRetrievalConfig] = None
single_retrieval_config: Optional[SingleRetrievalConfig] = None single_retrieval_config: Optional[SingleRetrievalConfig] = None
metadata_filtering_mode: Optional[Literal["disabled", "automatic", "manual"]] = "disabled"
metadata_filtering_conditions: Optional[dict[str, Any]] = None

View File

@ -95,8 +95,8 @@ class KnowledgeRetrievalNode(BaseNode[KnowledgeRetrievalNodeData]):
db.session.query(Document.dataset_id, func.count(Document.id).label("available_document_count")) db.session.query(Document.dataset_id, func.count(Document.id).label("available_document_count"))
.filter( .filter(
Document.indexing_status == "completed", Document.indexing_status == "completed",
Document.enabled == True, Document.enabled == True, # noqa: E712
Document.archived == False, Document.archived == False, # noqa: E712
Document.dataset_id.in_(dataset_ids), Document.dataset_id.in_(dataset_ids),
) )
.group_by(Document.dataset_id) .group_by(Document.dataset_id)
@ -221,8 +221,8 @@ class KnowledgeRetrievalNode(BaseNode[KnowledgeRetrievalNodeData]):
dataset = Dataset.query.filter_by(id=segment.dataset_id).first() dataset = Dataset.query.filter_by(id=segment.dataset_id).first()
document = Document.query.filter( document = Document.query.filter(
Document.id == segment.document_id, Document.id == segment.document_id,
Document.enabled == True, Document.enabled == True, # noqa: E712
Document.archived == False, Document.archived == False, # noqa: E712
).first() ).first()
if dataset and document: if dataset and document:
source = { source = {

View File

@ -398,6 +398,19 @@ class Document(db.Model): # type: ignore[name-defined]
) )
@property @property
def uploader(self):
user = db.session.query(Account).filter(Account.id == self.created_by).first()
return user.name if user else None
@property
def upload_date(self):
return self.created_at
@property
def last_update_date(self):
return self.updated_at
def process_rule_dict(self): def process_rule_dict(self):
if self.dataset_process_rule_id: if self.dataset_process_rule_id:
return self.dataset_process_rule.to_dict() return self.dataset_process_rule.to_dict()

View File

@ -15,6 +15,7 @@ from configs import dify_config
from core.errors.error import LLMBadRequestError, ProviderTokenNotInitError from core.errors.error import LLMBadRequestError, ProviderTokenNotInitError
from core.model_manager import ModelManager from core.model_manager import ModelManager
from core.model_runtime.entities.model_entities import ModelType from core.model_runtime.entities.model_entities import ModelType
from core.rag.index_processor.constant.built_in_field import BuiltInField
from core.rag.index_processor.constant.index_type import IndexType from core.rag.index_processor.constant.index_type import IndexType
from core.rag.retrieval.retrieval_methods import RetrievalMethod from core.rag.retrieval.retrieval_methods import RetrievalMethod
from events.dataset_event import dataset_was_deleted from events.dataset_event import dataset_was_deleted
@ -687,8 +688,12 @@ class DocumentService:
if document.tenant_id != current_user.current_tenant_id: if document.tenant_id != current_user.current_tenant_id:
raise ValueError("No permission.") raise ValueError("No permission.")
document.name = name if dataset.built_in_field_enabled:
if document.doc_metadata:
document.doc_metadata[BuiltInField.document_name] = name
else:
document.name = name
db.session.add(document) db.session.add(document)
db.session.commit() db.session.commit()
@ -1086,9 +1091,20 @@ class DocumentService:
doc_form=document_form, doc_form=document_form,
doc_language=document_language, doc_language=document_language,
) )
doc_metadata = {}
if dataset.built_in_field_enabled:
doc_metadata = {
BuiltInField.document_name: name,
BuiltInField.uploader: account.name,
BuiltInField.upload_date: datetime.datetime.now(datetime.timezone.utc).strftime("%Y-%m-%d %H:%M:%S"),
BuiltInField.last_update_date: datetime.datetime.now(datetime.timezone.utc).strftime("%Y-%m-%d %H:%M:%S"),
BuiltInField.source: data_source_type,
}
if metadata is not None: if metadata is not None:
document.doc_metadata = metadata.doc_metadata doc_metadata.update(metadata.doc_metadata)
document.doc_type = metadata.doc_type document.doc_type = metadata.doc_type
if doc_metadata:
document.doc_metadata = doc_metadata
return document return document
@staticmethod @staticmethod

View File

@ -129,3 +129,10 @@ class ChildChunkUpdateArgs(BaseModel):
class MetadataArgs(BaseModel): class MetadataArgs(BaseModel):
type: Literal["string", "number", "time"] type: Literal["string", "number", "time"]
name: str name: str
class MetadataValue(BaseModel):
name: str
value: str
class MetadataValueUpdateArgs(BaseModel):
fields: list[MetadataValue]