fix metadata

This commit is contained in:
jyong 2025-03-10 16:25:38 +08:00
parent 07e3805da7
commit 778c246c68
5 changed files with 637 additions and 259 deletions

View File

@ -140,12 +140,12 @@ SupportedComparisonOperator = Literal[
# for string or array # for string or array
"contains", "contains",
"not contains", "not contains",
"starts with", "start with",
"ends with", "end with",
"is", "is",
"is not", "is not",
"empty", "empty",
"is not empty", "not empty",
# for number # for number
"=", "=",
"", "",

View File

@ -796,7 +796,7 @@ class DatasetRetrieval:
) )
if automatic_metadata_filters: if automatic_metadata_filters:
for filter in automatic_metadata_filters: for filter in automatic_metadata_filters:
self._process_metadata_filter_func( document_query = self._process_metadata_filter_func(
filter.get("condition"), filter.get("metadata_name"), filter.get("value"), document_query filter.get("condition"), filter.get("metadata_name"), filter.get("value"), document_query
) )
elif metadata_filtering_mode == "manual": elif metadata_filtering_mode == "manual":
@ -805,7 +805,7 @@ class DatasetRetrieval:
expected_value = condition.value expected_value = condition.value
if isinstance(expected_value, str): if isinstance(expected_value, str):
expected_value = self._replace_metadata_filter_value(expected_value, inputs) expected_value = self._replace_metadata_filter_value(expected_value, inputs)
self._process_metadata_filter_func( document_query = self._process_metadata_filter_func(
condition.comparison_operator, metadata_name, expected_value, document_query condition.comparison_operator, metadata_name, expected_value, document_query
) )
else: else:
@ -883,31 +883,38 @@ class DatasetRetrieval:
def _process_metadata_filter_func(self, condition: str, metadata_name: str, value: str, query): def _process_metadata_filter_func(self, condition: str, metadata_name: str, value: str, query):
match condition: match condition:
case "contains": case "contains":
query = query.filter(Document.doc_metadata[metadata_name].like(f"%{value}%")) query = query.filter(DatasetDocument.doc_metadata[metadata_name].like(f'"%{value}%"'))
case "not contains": case "not contains":
query = query.filter(Document.doc_metadata[metadata_name].notlike(f"%{value}%")) query = query.filter(DatasetDocument.doc_metadata[metadata_name].notlike(f'"%{value}%"'))
case "start with": case "start with":
query = query.filter(Document.doc_metadata[metadata_name].like(f"{value}%")) query = query.filter(DatasetDocument.doc_metadata[metadata_name].like(f'"{value}%"'))
case "end with": case "end with":
query = query.filter(Document.doc_metadata[metadata_name].like(f"%{value}")) query = query.filter(DatasetDocument.doc_metadata[metadata_name].like(f'"%{value}"'))
case "is", "=": case "is" | "=":
query = query.filter(Document.doc_metadata[metadata_name] == value) if isinstance(value, str):
case "is not", "": query = query.filter(DatasetDocument.doc_metadata[metadata_name] == f'"{value}"')
query = query.filter(Document.doc_metadata[metadata_name] != value) else:
query = query.filter(DatasetDocument.doc_metadata[metadata_name] == value)
case "is not" | "":
if isinstance(value, str):
query = query.filter(DatasetDocument.doc_metadata[metadata_name] != f'"{value}"')
else:
query = query.filter(DatasetDocument.doc_metadata[metadata_name] != value)
case "is empty": case "is empty":
query = query.filter(Document.doc_metadata[metadata_name].is_(None)) query = query.filter(DatasetDocument.doc_metadata[metadata_name].is_(None))
case "is not empty": case "is not empty":
query = query.filter(Document.doc_metadata[metadata_name].isnot(None)) query = query.filter(DatasetDocument.doc_metadata[metadata_name].isnot(None))
case "before", "<": case "before" | "<":
query = query.filter(Document.doc_metadata[metadata_name] < value) query = query.filter(DatasetDocument.doc_metadata[metadata_name] < value)
case "after", ">": case "after" | ">":
query = query.filter(Document.doc_metadata[metadata_name] > value) query = query.filter(DatasetDocument.doc_metadata[metadata_name] > value)
case "", ">=": case "" | ">=":
query = query.filter(Document.doc_metadata[metadata_name] <= value) query = query.filter(DatasetDocument.doc_metadata[metadata_name] <= value)
case "", ">=": case "" | ">=":
query = query.filter(Document.doc_metadata[metadata_name] >= value) query = query.filter(DatasetDocument.doc_metadata[metadata_name] >= value)
case _: case _:
pass pass
return query
def _fetch_model_config( def _fetch_model_config(
self, tenant_id: str, model: ModelConfig self, tenant_id: str, model: ModelConfig

View File

@ -79,12 +79,12 @@ SupportedComparisonOperator = Literal[
# for string or array # for string or array
"contains", "contains",
"not contains", "not contains",
"starts with", "start with",
"ends with", "end with",
"is", "is",
"is not", "is not",
"empty", "empty",
"is not empty", "not empty",
# for number # for number
"=", "=",
"", "",

View File

@ -296,7 +296,7 @@ class KnowledgeRetrievalNode(LLMNode):
automatic_metadata_filters = self._automatic_metadata_filter_func(dataset_ids, query, node_data) automatic_metadata_filters = self._automatic_metadata_filter_func(dataset_ids, query, node_data)
if automatic_metadata_filters: if automatic_metadata_filters:
for filter in automatic_metadata_filters: for filter in automatic_metadata_filters:
self._process_metadata_filter_func( document_query = self._process_metadata_filter_func(
filter.get("condition"), filter.get("metadata_name"), filter.get("value"), document_query filter.get("condition"), filter.get("metadata_name"), filter.get("value"), document_query
) )
elif node_data.metadata_filtering_mode == "manual": elif node_data.metadata_filtering_mode == "manual":
@ -305,7 +305,7 @@ class KnowledgeRetrievalNode(LLMNode):
expected_value = condition.value expected_value = condition.value
if isinstance(expected_value, str): if isinstance(expected_value, str):
expected_value = self.graph_runtime_state.variable_pool.convert_template(expected_value).text expected_value = self.graph_runtime_state.variable_pool.convert_template(expected_value).text
self._process_metadata_filter_func( document_query = self._process_metadata_filter_func(
condition.comparison_operator, metadata_name, expected_value, document_query condition.comparison_operator, metadata_name, expected_value, document_query
) )
else: else:
@ -383,17 +383,23 @@ class KnowledgeRetrievalNode(LLMNode):
def _process_metadata_filter_func(self, condition: str, metadata_name: str, value: str, query): def _process_metadata_filter_func(self, condition: str, metadata_name: str, value: str, query):
match condition: match condition:
case "contains": case "contains":
query = query.filter(Document.doc_metadata[metadata_name].like(f"%{value}%")) query = query.filter(Document.doc_metadata[metadata_name].like(f'"%{value}%"'))
case "not contains": case "not contains":
query = query.filter(Document.doc_metadata[metadata_name].notlike(f"%{value}%")) query = query.filter(Document.doc_metadata[metadata_name].notlike(f'"%{value}%"'))
case "start with": case "start with":
query = query.filter(Document.doc_metadata[metadata_name].like(f"{value}%")) query = query.filter(Document.doc_metadata[metadata_name].like(f'"{value}%"'))
case "end with": case "end with":
query = query.filter(Document.doc_metadata[metadata_name].like(f"%{value}")) query = query.filter(Document.doc_metadata[metadata_name].like(f'"%{value}"'))
case "=" | "is": case "=" | "is":
query = query.filter(Document.doc_metadata[metadata_name] == value) if isinstance(value, str):
query = query.filter(Document.doc_metadata[metadata_name] == f'"{value}"')
else:
query = query.filter(Document.doc_metadata[metadata_name] == value)
case "is not" | "": case "is not" | "":
query = query.filter(Document.doc_metadata[metadata_name] != value) if isinstance(value, str):
query = query.filter(Document.doc_metadata[metadata_name] != f'"{value}"')
else:
query = query.filter(Document.doc_metadata[metadata_name] != value)
case "is empty": case "is empty":
query = query.filter(Document.doc_metadata[metadata_name].is_(None)) query = query.filter(Document.doc_metadata[metadata_name].is_(None))
case "is not empty": case "is not empty":
@ -408,7 +414,7 @@ class KnowledgeRetrievalNode(LLMNode):
query = query.filter(Document.doc_metadata[metadata_name] >= value) query = query.filter(Document.doc_metadata[metadata_name] >= value)
case _: case _:
pass pass
return query
@classmethod @classmethod
def _extract_variable_selector_to_variable_mapping( def _extract_variable_selector_to_variable_mapping(
cls, cls,

813
api/poetry.lock generated

File diff suppressed because it is too large Load Diff