knowledge metadata

This commit is contained in:
jyong 2025-02-17 18:17:26 +08:00
parent 566e548713
commit d2cc502c71
5 changed files with 75 additions and 0 deletions

View File

@ -393,6 +393,28 @@ class QdrantVector(BaseVector):
return documents return documents
def update_metadata(self, document_id: str, metadata: dict) -> None:
from qdrant_client.http import models
scroll_filter = models.Filter(
must=[
models.FieldCondition(
key="group_id",
match=models.MatchValue(value=self._group_id),
),
models.FieldCondition(
key="metadata.doc_id",
match=models.MatchValue(value=document_id),
),
]
)
self._client.set_payload(
collection_name=self._collection_name,
filter=scroll_filter,
payload={
Field.METADATA_KEY.value: metadata,
},
)
def _reload_if_needed(self): def _reload_if_needed(self):
if isinstance(self._client, QdrantLocal): if isinstance(self._client, QdrantLocal):
self._client = cast(QdrantLocal, self._client) self._client = cast(QdrantLocal, self._client)

View File

@ -49,6 +49,10 @@ class BaseVector(ABC):
def delete(self) -> None: def delete(self) -> None:
raise NotImplementedError raise NotImplementedError
@abstractmethod
def update_metadata(self, document_id: str, metadata: dict) -> None:
raise NotImplementedError
def _filter_duplicate_texts(self, texts: list[Document]) -> list[Document]: def _filter_duplicate_texts(self, texts: list[Document]) -> list[Document]:
for text in texts.copy(): for text in texts.copy():
if text.metadata and "doc_id" in text.metadata: if text.metadata and "doc_id" in text.metadata:

View File

@ -87,3 +87,9 @@ dataset_query_detail_fields = {
"created_by": fields.String, "created_by": fields.String,
"created_at": TimestampField, "created_at": TimestampField,
} }
dataset_metadata_fields = {
"id": fields.String,
"type": fields.String,
"name": fields.String,
}

View File

@ -926,3 +926,41 @@ class DatasetAutoDisableLog(db.Model): # type: ignore[name-defined]
document_id = db.Column(StringUUID, nullable=False) document_id = db.Column(StringUUID, nullable=False)
notified = db.Column(db.Boolean, nullable=False, server_default=db.text("false")) notified = db.Column(db.Boolean, nullable=False, server_default=db.text("false"))
created_at = db.Column(db.DateTime, nullable=False, server_default=db.text("CURRENT_TIMESTAMP(0)")) created_at = db.Column(db.DateTime, nullable=False, server_default=db.text("CURRENT_TIMESTAMP(0)"))
class DatasetMetadata(db.Model): # type: ignore[name-defined]
__tablename__ = "dataset_metadatas"
__table_args__ = (
db.PrimaryKeyConstraint("id", name="dataset_metadata_pkey"),
db.Index("dataset_metadata_tenant_idx", "tenant_id"),
db.Index("dataset_metadata_dataset_idx", "dataset_id"),
)
id = db.Column(StringUUID, server_default=db.text("uuid_generate_v4()"))
tenant_id = db.Column(StringUUID, nullable=False)
dataset_id = db.Column(StringUUID, nullable=False)
type = db.Column(db.String(255), nullable=False)
name = db.Column(db.String(255), nullable=False)
created_at = db.Column(db.DateTime, nullable=False, server_default=db.text("CURRENT_TIMESTAMP(0)"))
updated_at = db.Column(db.DateTime, nullable=False, server_default=db.text("CURRENT_TIMESTAMP(0)"))
created_by = db.Column(StringUUID, nullable=False)
updated_by = db.Column(StringUUID, nullable=True)
class DatasetMetadataBinding(db.Model): # type: ignore[name-defined]
__tablename__ = "dataset_metadata_bindings"
__table_args__ = (
db.PrimaryKeyConstraint("id", name="dataset_metadata_binding_pkey"),
db.Index("dataset_metadata_binding_tenant_idx", "tenant_id"),
db.Index("dataset_metadata_binding_dataset_idx", "dataset_id"),
db.Index("dataset_metadata_binding_metadata_idx", "metadata_id"),
db.Index("dataset_metadata_binding_document_idx", "document_id"),
)
id = db.Column(StringUUID, server_default=db.text("uuid_generate_v4()"))
tenant_id = db.Column(StringUUID, nullable=False)
dataset_id = db.Column(StringUUID, nullable=False)
metadata_id = db.Column(StringUUID, nullable=False)
document_id = db.Column(StringUUID, nullable=False)
created_at = db.Column(db.DateTime, nullable=False, server_default=func.current_timestamp())
created_by = db.Column(StringUUID, nullable=False)

View File

@ -124,3 +124,8 @@ class SegmentUpdateArgs(BaseModel):
class ChildChunkUpdateArgs(BaseModel): class ChildChunkUpdateArgs(BaseModel):
id: Optional[str] = None id: Optional[str] = None
content: str content: str
class MetadataArgs(BaseModel):
type: Literal["string", "number", "time"]
name: str