fix text split

This commit is contained in:
jyong 2025-03-11 16:53:32 +08:00
parent e2e1a7add0
commit e90642d624
3 changed files with 41 additions and 21 deletions

View File

@ -331,7 +331,7 @@ class TidbOnQdrantVector(BaseVector):
should_conditions = [] should_conditions = []
for document_id_filter in document_ids_filter: for document_id_filter in document_ids_filter:
should_conditions.append( should_conditions.append(
models.FieldCondition( models.FieldCondition(
key="metadata.document_id", key="metadata.document_id",
match=models.MatchValue(value=document_id_filter), match=models.MatchValue(value=document_id_filter),
) )

View File

@ -7,7 +7,7 @@ from collections.abc import Generator, Mapping
from typing import Any, Optional, Union, cast from typing import Any, Optional, Union, cast
from flask import Flask, current_app from flask import Flask, current_app
from sqlalchemy import Integer, and_, or_ from sqlalchemy import Integer, and_, or_, text
from sqlalchemy import cast as sqlalchemy_cast from sqlalchemy import cast as sqlalchemy_cast
from core.app.app_config.entities import ( from core.app.app_config.entities import (
@ -847,7 +847,7 @@ class DatasetRetrieval:
for condition in metadata_filtering_conditions.conditions: for condition in metadata_filtering_conditions.conditions:
metadata_name = condition.name metadata_name = condition.name
expected_value = condition.value expected_value = condition.value
if expected_value: if expected_value or condition.comparison_operator in ("empty", "not empty"):
if isinstance(expected_value, str): if isinstance(expected_value, str):
expected_value = self._replace_metadata_filter_value(expected_value, inputs) expected_value = self._replace_metadata_filter_value(expected_value, inputs)
filters = self._process_metadata_filter_func( filters = self._process_metadata_filter_func(
@ -930,17 +930,27 @@ class DatasetRetrieval:
return None return None
return automatic_metadata_filters return automatic_metadata_filters
def _process_metadata_filter_func(self, condition: str, metadata_name: str, value: str, filters: list): def _process_metadata_filter_func(self, condition: str, metadata_name: str, value: Optional[str], filters: list):
match condition: match condition:
case "contains": case "contains":
filters.append(DatasetDocument.doc_metadata[metadata_name].like(f'"%{value}%"')) filters.append(
(text("documents.doc_metadata ->> :key LIKE :value")).params(key=metadata_name, value=f"%{value}%")
)
case "not contains": case "not contains":
filters.append(DatasetDocument.doc_metadata[metadata_name].notlike(f'"%{value}%"')) filters.append(
(text("documents.doc_metadata ->> :key NOT LIKE :value")).params(
key=metadata_name, value=f"%{value}%"
)
)
case "start with": case "start with":
filters.append(DatasetDocument.doc_metadata[metadata_name].like(f'"{value}%"')) filters.append(
(text("documents.doc_metadata ->> :key LIKE :value")).params(key=metadata_name, value=f"{value}%")
)
case "end with": case "end with":
filters.append(DatasetDocument.doc_metadata[metadata_name].like(f'"%{value}"')) filters.append(
(text("documents.doc_metadata ->> :key LIKE :value")).params(key=metadata_name, value=f"%{value}")
)
case "is" | "=": case "is" | "=":
if isinstance(value, str): if isinstance(value, str):
filters.append(DatasetDocument.doc_metadata[metadata_name] == f'"{value}"') filters.append(DatasetDocument.doc_metadata[metadata_name] == f'"{value}"')
@ -955,9 +965,9 @@ class DatasetRetrieval:
filters.append( filters.append(
sqlalchemy_cast(DatasetDocument.doc_metadata[metadata_name].astext, Integer) != value sqlalchemy_cast(DatasetDocument.doc_metadata[metadata_name].astext, Integer) != value
) )
case "is empty": case "empty":
filters.append(DatasetDocument.doc_metadata[metadata_name].is_(None)) filters.append(DatasetDocument.doc_metadata[metadata_name].is_(None))
case "is not empty": case "not empty":
filters.append(DatasetDocument.doc_metadata[metadata_name].isnot(None)) filters.append(DatasetDocument.doc_metadata[metadata_name].isnot(None))
case "before" | "<": case "before" | "<":
filters.append(sqlalchemy_cast(DatasetDocument.doc_metadata[metadata_name].astext, Integer) < value) filters.append(sqlalchemy_cast(DatasetDocument.doc_metadata[metadata_name].astext, Integer) < value)

View File

@ -4,7 +4,7 @@ from collections import defaultdict
from collections.abc import Mapping, Sequence from collections.abc import Mapping, Sequence
from typing import Any, Optional, cast from typing import Any, Optional, cast
from sqlalchemy import and_, func, or_ from sqlalchemy import and_, func, or_, text
from core.app.app_config.entities import DatasetRetrieveConfigEntity from core.app.app_config.entities import DatasetRetrieveConfigEntity
from core.app.entities.app_invoke_entities import ModelConfigWithCredentialsEntity from core.app.entities.app_invoke_entities import ModelConfigWithCredentialsEntity
@ -293,13 +293,14 @@ class KnowledgeRetrievalNode(LLMNode):
Document.enabled == True, Document.enabled == True,
Document.archived == False, Document.archived == False,
) )
filters = []
metadata_condition = None
if node_data.metadata_filtering_mode == "disabled": if node_data.metadata_filtering_mode == "disabled":
return None, None return None, None
elif node_data.metadata_filtering_mode == "automatic": elif node_data.metadata_filtering_mode == "automatic":
automatic_metadata_filters = self._automatic_metadata_filter_func(dataset_ids, query, node_data) automatic_metadata_filters = self._automatic_metadata_filter_func(dataset_ids, query, node_data)
if automatic_metadata_filters: if automatic_metadata_filters:
conditions = [] conditions = []
filters = []
for filter in automatic_metadata_filters: for filter in automatic_metadata_filters:
self._process_metadata_filter_func( self._process_metadata_filter_func(
filter.get("condition"), filter.get("metadata_name"), filter.get("value"), filters filter.get("condition"), filter.get("metadata_name"), filter.get("value"), filters
@ -318,10 +319,9 @@ class KnowledgeRetrievalNode(LLMNode):
elif node_data.metadata_filtering_mode == "manual": elif node_data.metadata_filtering_mode == "manual":
if node_data.metadata_filtering_conditions: if node_data.metadata_filtering_conditions:
for condition in node_data.metadata_filtering_conditions.conditions: for condition in node_data.metadata_filtering_conditions.conditions:
filters = []
metadata_name = condition.name metadata_name = condition.name
expected_value = condition.value expected_value = condition.value
if expected_value: if expected_value or condition.comparison_operator in ("empty", "not empty"):
if isinstance(expected_value, str): if isinstance(expected_value, str):
expected_value = self.graph_runtime_state.variable_pool.convert_template( expected_value = self.graph_runtime_state.variable_pool.convert_template(
expected_value expected_value
@ -407,16 +407,26 @@ class KnowledgeRetrievalNode(LLMNode):
return [] return []
return automatic_metadata_filters return automatic_metadata_filters
def _process_metadata_filter_func(self, condition: str, metadata_name: str, value: str, filters: list): def _process_metadata_filter_func(self, condition: str, metadata_name: str, value: Optional[str], filters: list):
match condition: match condition:
case "contains": case "contains":
filters.append(Document.doc_metadata[metadata_name].like(f'"%{value}%"')) filters.append(
(text("documents.doc_metadata ->> :key LIKE :value")).params(key=metadata_name, value=f"%{value}%")
)
case "not contains": case "not contains":
filters.append(Document.doc_metadata[metadata_name].notlike(f'"%{value}%"')) filters.append(
(text("documents.doc_metadata ->> :key NOT LIKE :value")).params(
key=metadata_name, value=f"%{value}%"
)
)
case "start with": case "start with":
filters.append(Document.doc_metadata[metadata_name].like(f'"{value}%"')) filters.append(
(text("documents.doc_metadata ->> :key LIKE :value")).params(key=metadata_name, value=f"{value}%")
)
case "end with": case "end with":
filters.append(Document.doc_metadata[metadata_name].like(f'"%{value}"')) filters.append(
(text("documents.doc_metadata ->> :key LIKE :value")).params(key=metadata_name, value=f"%{value}")
)
case "=" | "is": case "=" | "is":
if isinstance(value, str): if isinstance(value, str):
filters.append(Document.doc_metadata[metadata_name] == f'"{value}"') filters.append(Document.doc_metadata[metadata_name] == f'"{value}"')
@ -427,9 +437,9 @@ class KnowledgeRetrievalNode(LLMNode):
filters.append(Document.doc_metadata[metadata_name] != f'"{value}"') filters.append(Document.doc_metadata[metadata_name] != f'"{value}"')
else: else:
filters.append(Document.doc_metadata[metadata_name] != value) filters.append(Document.doc_metadata[metadata_name] != value)
case "is empty": case "empty":
filters.append(Document.doc_metadata[metadata_name].is_(None)) filters.append(Document.doc_metadata[metadata_name].is_(None))
case "is not empty": case "not empty":
filters.append(Document.doc_metadata[metadata_name].isnot(None)) filters.append(Document.doc_metadata[metadata_name].isnot(None))
case "before" | "<": case "before" | "<":
filters.append(Document.doc_metadata[metadata_name] < value) filters.append(Document.doc_metadata[metadata_name] < value)