knowledge retrival with metadata

This commit is contained in:
jyong 2025-03-13 17:18:30 +08:00
parent a8c03bfb5b
commit 88733f8591
3 changed files with 31 additions and 24 deletions

View File

@ -862,7 +862,7 @@ class DatasetRetrieval:
document_query = document_query.filter(and_(*filters)) document_query = document_query.filter(and_(*filters))
documents = document_query.all() documents = document_query.all()
# group by dataset_id # group by dataset_id
metadata_filter_document_ids = defaultdict(list) metadata_filter_document_ids = defaultdict(list) if documents else None
for document in documents: for document in documents:
metadata_filter_document_ids[document.dataset_id].append(document.id) metadata_filter_document_ids[document.dataset_id].append(document.id)
return metadata_filter_document_ids, metadata_condition return metadata_filter_document_ids, metadata_condition

View File

@ -4,7 +4,8 @@ from collections import defaultdict
from collections.abc import Mapping, Sequence from collections.abc import Mapping, Sequence
from typing import Any, Optional, cast from typing import Any, Optional, cast
from sqlalchemy import and_, func, or_, text from sqlalchemy import Integer, and_, func, or_, text
from sqlalchemy import cast as sqlalchemy_cast
from core.app.app_config.entities import DatasetRetrieveConfigEntity from core.app.app_config.entities import DatasetRetrieveConfigEntity
from core.app.entities.app_invoke_entities import ModelConfigWithCredentialsEntity from core.app.entities.app_invoke_entities import ModelConfigWithCredentialsEntity
@ -313,23 +314,25 @@ class KnowledgeRetrievalNode(LLMNode):
) )
) )
metadata_condition = MetadataCondition( metadata_condition = MetadataCondition(
logical_operator="or", logical_operator=node_data.metadata_filtering_conditions.logical_operator,
conditions=conditions, conditions=conditions,
) )
elif node_data.metadata_filtering_mode == "manual": elif node_data.metadata_filtering_mode == "manual":
if node_data.metadata_filtering_conditions: if node_data.metadata_filtering_conditions:
for condition in node_data.metadata_filtering_conditions.conditions: metadata_condition = MetadataCondition(**node_data.metadata_filtering_conditions.model_dump())
metadata_name = condition.name if node_data.metadata_filtering_conditions:
expected_value = condition.value for condition in node_data.metadata_filtering_conditions.conditions:
if expected_value or condition.comparison_operator in ("empty", "not empty"): metadata_name = condition.name
if isinstance(expected_value, str): expected_value = condition.value
expected_value = self.graph_runtime_state.variable_pool.convert_template( if expected_value or condition.comparison_operator in ("empty", "not empty"):
expected_value if isinstance(expected_value, str):
).text expected_value = self.graph_runtime_state.variable_pool.convert_template(
expected_value
).text
filters = self._process_metadata_filter_func( filters = self._process_metadata_filter_func(
condition.comparison_operator, metadata_name, expected_value, filters condition.comparison_operator, metadata_name, expected_value, filters
) )
else: else:
raise ValueError("Invalid metadata filtering mode") raise ValueError("Invalid metadata filtering mode")
if filters: if filters:
@ -337,10 +340,10 @@ class KnowledgeRetrievalNode(LLMNode):
document_query = document_query.filter(and_(*filters)) document_query = document_query.filter(and_(*filters))
else: else:
document_query = document_query.filter(or_(*filters)) document_query = document_query.filter(or_(*filters))
documnents = document_query.all() documents = document_query.all()
# group by dataset_id # group by dataset_id
metadata_filter_document_ids = defaultdict(list) metadata_filter_document_ids = defaultdict(list) if documents else None
for document in documnents: for document in documents:
metadata_filter_document_ids[document.dataset_id].append(document.id) metadata_filter_document_ids[document.dataset_id].append(document.id)
return metadata_filter_document_ids, metadata_condition return metadata_filter_document_ids, metadata_condition
@ -431,24 +434,28 @@ class KnowledgeRetrievalNode(LLMNode):
if isinstance(value, str): if isinstance(value, str):
filters.append(Document.doc_metadata[metadata_name] == f'"{value}"') filters.append(Document.doc_metadata[metadata_name] == f'"{value}"')
else: else:
filters.append(Document.doc_metadata[metadata_name] == value) filters.append(
sqlalchemy_cast(Document.doc_metadata[metadata_name].astext, Integer) == value
)
case "is not" | "": case "is not" | "":
if isinstance(value, str): if isinstance(value, str):
filters.append(Document.doc_metadata[metadata_name] != f'"{value}"') filters.append(Document.doc_metadata[metadata_name] != f'"{value}"')
else: else:
filters.append(Document.doc_metadata[metadata_name] != value) filters.append(
sqlalchemy_cast(Document.doc_metadata[metadata_name].astext, Integer) != value
)
case "empty": case "empty":
filters.append(Document.doc_metadata[metadata_name].is_(None)) filters.append(Document.doc_metadata[metadata_name].is_(None))
case "not empty": case "not empty":
filters.append(Document.doc_metadata[metadata_name].isnot(None)) filters.append(Document.doc_metadata[metadata_name].isnot(None))
case "before" | "<": case "before" | "<":
filters.append(Document.doc_metadata[metadata_name] < value) filters.append(sqlalchemy_cast(Document.doc_metadata[metadata_name].astext, Integer) < value)
case "after" | ">": case "after" | ">":
filters.append(Document.doc_metadata[metadata_name] > value) filters.append(sqlalchemy_cast(Document.doc_metadata[metadata_name].astext, Integer) > value)
case "" | ">=": case "" | ">=":
filters.append(Document.doc_metadata[metadata_name] <= value) filters.append(sqlalchemy_cast(Document.doc_metadata[metadata_name].astext, Integer) <= value)
case "" | ">=": case "" | ">=":
filters.append(Document.doc_metadata[metadata_name] >= value) filters.append(sqlalchemy_cast(Document.doc_metadata[metadata_name].astext, Integer) >= value)
case _: case _:
pass pass
return filters return filters

View File

@ -20,7 +20,7 @@ class TagService:
) )
if keyword: if keyword:
query = query.filter(db.and_(Tag.name.ilike(f"%{keyword}%"))) query = query.filter(db.and_(Tag.name.ilike(f"%{keyword}%")))
query = query.group_by(Tag.id) query = query.group_by(Tag.id, Tag.type, Tag.name)
results: list = query.order_by(Tag.created_at.desc()).all() results: list = query.order_by(Tag.created_at.desc()).all()
return results return results