chore: extract retrival method literal values into enum (#5060)
This commit is contained in:
parent
9d5a89eab6
commit
c923684edd
@ -17,6 +17,7 @@ from core.model_runtime.entities.model_entities import ModelType
|
|||||||
from core.provider_manager import ProviderManager
|
from core.provider_manager import ProviderManager
|
||||||
from core.rag.datasource.vdb.vector_type import VectorType
|
from core.rag.datasource.vdb.vector_type import VectorType
|
||||||
from core.rag.extractor.entity.extract_setting import ExtractSetting
|
from core.rag.extractor.entity.extract_setting import ExtractSetting
|
||||||
|
from core.rag.retrieval.retrival_methods import RetrievalMethod
|
||||||
from extensions.ext_database import db
|
from extensions.ext_database import db
|
||||||
from fields.app_fields import related_app_list
|
from fields.app_fields import related_app_list
|
||||||
from fields.dataset_fields import dataset_detail_fields, dataset_query_detail_fields
|
from fields.dataset_fields import dataset_detail_fields, dataset_query_detail_fields
|
||||||
@ -500,13 +501,15 @@ class DatasetRetrievalSettingApi(Resource):
|
|||||||
case VectorType.MILVUS | VectorType.RELYT | VectorType.PGVECTOR | VectorType.TIDB_VECTOR | VectorType.CHROMA | VectorType.TENCENT:
|
case VectorType.MILVUS | VectorType.RELYT | VectorType.PGVECTOR | VectorType.TIDB_VECTOR | VectorType.CHROMA | VectorType.TENCENT:
|
||||||
return {
|
return {
|
||||||
'retrieval_method': [
|
'retrieval_method': [
|
||||||
'semantic_search'
|
RetrievalMethod.SEMANTIC_SEARCH
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
case VectorType.QDRANT | VectorType.WEAVIATE | VectorType.OPENSEARCH:
|
case VectorType.QDRANT | VectorType.WEAVIATE | VectorType.OPENSEARCH:
|
||||||
return {
|
return {
|
||||||
'retrieval_method': [
|
'retrieval_method': [
|
||||||
'semantic_search', 'full_text_search', 'hybrid_search'
|
RetrievalMethod.SEMANTIC_SEARCH,
|
||||||
|
RetrievalMethod.FULL_TEXT_SEARCH,
|
||||||
|
RetrievalMethod.HYBRID_SEARCH,
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
case _:
|
case _:
|
||||||
@ -522,13 +525,15 @@ class DatasetRetrievalSettingMockApi(Resource):
|
|||||||
case VectorType.MILVUS | VectorType.RELYT | VectorType.PGVECTOR | VectorType.TIDB_VECTOR | VectorType.CHROMA | VectorType.TENCEN:
|
case VectorType.MILVUS | VectorType.RELYT | VectorType.PGVECTOR | VectorType.TIDB_VECTOR | VectorType.CHROMA | VectorType.TENCEN:
|
||||||
return {
|
return {
|
||||||
'retrieval_method': [
|
'retrieval_method': [
|
||||||
'semantic_search'
|
RetrievalMethod.SEMANTIC_SEARCH
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
case VectorType.QDRANT | VectorType.WEAVIATE | VectorType.OPENSEARCH:
|
case VectorType.QDRANT | VectorType.WEAVIATE | VectorType.OPENSEARCH:
|
||||||
return {
|
return {
|
||||||
'retrieval_method': [
|
'retrieval_method': [
|
||||||
'semantic_search', 'full_text_search', 'hybrid_search'
|
RetrievalMethod.SEMANTIC_SEARCH,
|
||||||
|
RetrievalMethod.FULL_TEXT_SEARCH,
|
||||||
|
RetrievalMethod.HYBRID_SEARCH,
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
case _:
|
case _:
|
||||||
|
|||||||
@ -6,11 +6,12 @@ from flask import Flask, current_app
|
|||||||
from core.rag.data_post_processor.data_post_processor import DataPostProcessor
|
from core.rag.data_post_processor.data_post_processor import DataPostProcessor
|
||||||
from core.rag.datasource.keyword.keyword_factory import Keyword
|
from core.rag.datasource.keyword.keyword_factory import Keyword
|
||||||
from core.rag.datasource.vdb.vector_factory import Vector
|
from core.rag.datasource.vdb.vector_factory import Vector
|
||||||
|
from core.rag.retrieval.retrival_methods import RetrievalMethod
|
||||||
from extensions.ext_database import db
|
from extensions.ext_database import db
|
||||||
from models.dataset import Dataset
|
from models.dataset import Dataset
|
||||||
|
|
||||||
default_retrieval_model = {
|
default_retrieval_model = {
|
||||||
'search_method': 'semantic_search',
|
'search_method': RetrievalMethod.SEMANTIC_SEARCH,
|
||||||
'reranking_enable': False,
|
'reranking_enable': False,
|
||||||
'reranking_model': {
|
'reranking_model': {
|
||||||
'reranking_provider_name': '',
|
'reranking_provider_name': '',
|
||||||
@ -47,7 +48,7 @@ class RetrievalService:
|
|||||||
threads.append(keyword_thread)
|
threads.append(keyword_thread)
|
||||||
keyword_thread.start()
|
keyword_thread.start()
|
||||||
# retrieval_model source with semantic
|
# retrieval_model source with semantic
|
||||||
if retrival_method == 'semantic_search' or retrival_method == 'hybrid_search':
|
if RetrievalMethod.is_support_semantic_search(retrival_method):
|
||||||
embedding_thread = threading.Thread(target=RetrievalService.embedding_search, kwargs={
|
embedding_thread = threading.Thread(target=RetrievalService.embedding_search, kwargs={
|
||||||
'flask_app': current_app._get_current_object(),
|
'flask_app': current_app._get_current_object(),
|
||||||
'dataset_id': dataset_id,
|
'dataset_id': dataset_id,
|
||||||
@ -63,7 +64,7 @@ class RetrievalService:
|
|||||||
embedding_thread.start()
|
embedding_thread.start()
|
||||||
|
|
||||||
# retrieval source with full text
|
# retrieval source with full text
|
||||||
if retrival_method == 'full_text_search' or retrival_method == 'hybrid_search':
|
if RetrievalMethod.is_support_fulltext_search(retrival_method):
|
||||||
full_text_index_thread = threading.Thread(target=RetrievalService.full_text_index_search, kwargs={
|
full_text_index_thread = threading.Thread(target=RetrievalService.full_text_index_search, kwargs={
|
||||||
'flask_app': current_app._get_current_object(),
|
'flask_app': current_app._get_current_object(),
|
||||||
'dataset_id': dataset_id,
|
'dataset_id': dataset_id,
|
||||||
@ -85,7 +86,7 @@ class RetrievalService:
|
|||||||
exception_message = ';\n'.join(exceptions)
|
exception_message = ';\n'.join(exceptions)
|
||||||
raise Exception(exception_message)
|
raise Exception(exception_message)
|
||||||
|
|
||||||
if retrival_method == 'hybrid_search':
|
if retrival_method == RetrievalMethod.HYBRID_SEARCH:
|
||||||
data_post_processor = DataPostProcessor(str(dataset.tenant_id), reranking_model, False)
|
data_post_processor = DataPostProcessor(str(dataset.tenant_id), reranking_model, False)
|
||||||
all_documents = data_post_processor.invoke(
|
all_documents = data_post_processor.invoke(
|
||||||
query=query,
|
query=query,
|
||||||
@ -141,7 +142,7 @@ class RetrievalService:
|
|||||||
)
|
)
|
||||||
|
|
||||||
if documents:
|
if documents:
|
||||||
if reranking_model and retrival_method == 'semantic_search':
|
if reranking_model and retrival_method == RetrievalMethod.SEMANTIC_SEARCH:
|
||||||
data_post_processor = DataPostProcessor(str(dataset.tenant_id), reranking_model, False)
|
data_post_processor = DataPostProcessor(str(dataset.tenant_id), reranking_model, False)
|
||||||
all_documents.extend(data_post_processor.invoke(
|
all_documents.extend(data_post_processor.invoke(
|
||||||
query=query,
|
query=query,
|
||||||
@ -173,7 +174,7 @@ class RetrievalService:
|
|||||||
top_k=top_k
|
top_k=top_k
|
||||||
)
|
)
|
||||||
if documents:
|
if documents:
|
||||||
if reranking_model and retrival_method == 'full_text_search':
|
if reranking_model and retrival_method == RetrievalMethod.FULL_TEXT_SEARCH:
|
||||||
data_post_processor = DataPostProcessor(str(dataset.tenant_id), reranking_model, False)
|
data_post_processor = DataPostProcessor(str(dataset.tenant_id), reranking_model, False)
|
||||||
all_documents.extend(data_post_processor.invoke(
|
all_documents.extend(data_post_processor.invoke(
|
||||||
query=query,
|
query=query,
|
||||||
|
|||||||
@ -15,6 +15,7 @@ from core.model_runtime.model_providers.__base.large_language_model import Large
|
|||||||
from core.rag.datasource.retrieval_service import RetrievalService
|
from core.rag.datasource.retrieval_service import RetrievalService
|
||||||
from core.rag.models.document import Document
|
from core.rag.models.document import Document
|
||||||
from core.rag.rerank.rerank import RerankRunner
|
from core.rag.rerank.rerank import RerankRunner
|
||||||
|
from core.rag.retrieval.retrival_methods import RetrievalMethod
|
||||||
from core.rag.retrieval.router.multi_dataset_function_call_router import FunctionCallMultiDatasetRouter
|
from core.rag.retrieval.router.multi_dataset_function_call_router import FunctionCallMultiDatasetRouter
|
||||||
from core.rag.retrieval.router.multi_dataset_react_route import ReactMultiDatasetRouter
|
from core.rag.retrieval.router.multi_dataset_react_route import ReactMultiDatasetRouter
|
||||||
from core.tools.tool.dataset_retriever.dataset_multi_retriever_tool import DatasetMultiRetrieverTool
|
from core.tools.tool.dataset_retriever.dataset_multi_retriever_tool import DatasetMultiRetrieverTool
|
||||||
@ -25,7 +26,7 @@ from models.dataset import Dataset, DatasetQuery, DocumentSegment
|
|||||||
from models.dataset import Document as DatasetDocument
|
from models.dataset import Document as DatasetDocument
|
||||||
|
|
||||||
default_retrieval_model = {
|
default_retrieval_model = {
|
||||||
'search_method': 'semantic_search',
|
'search_method': RetrievalMethod.SEMANTIC_SEARCH,
|
||||||
'reranking_enable': False,
|
'reranking_enable': False,
|
||||||
'reranking_model': {
|
'reranking_model': {
|
||||||
'reranking_provider_name': '',
|
'reranking_provider_name': '',
|
||||||
@ -419,7 +420,7 @@ class DatasetRetrieval:
|
|||||||
if retrieve_config.retrieve_strategy == DatasetRetrieveConfigEntity.RetrieveStrategy.SINGLE:
|
if retrieve_config.retrieve_strategy == DatasetRetrieveConfigEntity.RetrieveStrategy.SINGLE:
|
||||||
# get retrieval model config
|
# get retrieval model config
|
||||||
default_retrieval_model = {
|
default_retrieval_model = {
|
||||||
'search_method': 'semantic_search',
|
'search_method': RetrievalMethod.SEMANTIC_SEARCH,
|
||||||
'reranking_enable': False,
|
'reranking_enable': False,
|
||||||
'reranking_model': {
|
'reranking_model': {
|
||||||
'reranking_provider_name': '',
|
'reranking_provider_name': '',
|
||||||
|
|||||||
15
api/core/rag/retrieval/retrival_methods.py
Normal file
15
api/core/rag/retrieval/retrival_methods.py
Normal file
@ -0,0 +1,15 @@
|
|||||||
|
from enum import Enum
|
||||||
|
|
||||||
|
|
||||||
|
class RetrievalMethod(str, Enum):
|
||||||
|
SEMANTIC_SEARCH = 'semantic_search'
|
||||||
|
FULL_TEXT_SEARCH = 'full_text_search'
|
||||||
|
HYBRID_SEARCH = 'hybrid_search'
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def is_support_semantic_search(retrieval_method: str) -> bool:
|
||||||
|
return retrieval_method in {RetrievalMethod.SEMANTIC_SEARCH, RetrievalMethod.HYBRID_SEARCH}
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def is_support_fulltext_search(retrieval_method: str) -> bool:
|
||||||
|
return retrieval_method in {RetrievalMethod.FULL_TEXT_SEARCH, RetrievalMethod.HYBRID_SEARCH}
|
||||||
@ -8,12 +8,13 @@ from core.model_manager import ModelManager
|
|||||||
from core.model_runtime.entities.model_entities import ModelType
|
from core.model_runtime.entities.model_entities import ModelType
|
||||||
from core.rag.datasource.retrieval_service import RetrievalService
|
from core.rag.datasource.retrieval_service import RetrievalService
|
||||||
from core.rag.rerank.rerank import RerankRunner
|
from core.rag.rerank.rerank import RerankRunner
|
||||||
|
from core.rag.retrieval.retrival_methods import RetrievalMethod
|
||||||
from core.tools.tool.dataset_retriever.dataset_retriever_base_tool import DatasetRetrieverBaseTool
|
from core.tools.tool.dataset_retriever.dataset_retriever_base_tool import DatasetRetrieverBaseTool
|
||||||
from extensions.ext_database import db
|
from extensions.ext_database import db
|
||||||
from models.dataset import Dataset, Document, DocumentSegment
|
from models.dataset import Dataset, Document, DocumentSegment
|
||||||
|
|
||||||
default_retrieval_model = {
|
default_retrieval_model = {
|
||||||
'search_method': 'semantic_search',
|
'search_method': RetrievalMethod.SEMANTIC_SEARCH,
|
||||||
'reranking_enable': False,
|
'reranking_enable': False,
|
||||||
'reranking_model': {
|
'reranking_model': {
|
||||||
'reranking_provider_name': '',
|
'reranking_provider_name': '',
|
||||||
|
|||||||
@ -2,12 +2,13 @@
|
|||||||
from pydantic import BaseModel, Field
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
from core.rag.datasource.retrieval_service import RetrievalService
|
from core.rag.datasource.retrieval_service import RetrievalService
|
||||||
|
from core.rag.retrieval.retrival_methods import RetrievalMethod
|
||||||
from core.tools.tool.dataset_retriever.dataset_retriever_base_tool import DatasetRetrieverBaseTool
|
from core.tools.tool.dataset_retriever.dataset_retriever_base_tool import DatasetRetrieverBaseTool
|
||||||
from extensions.ext_database import db
|
from extensions.ext_database import db
|
||||||
from models.dataset import Dataset, Document, DocumentSegment
|
from models.dataset import Dataset, Document, DocumentSegment
|
||||||
|
|
||||||
default_retrieval_model = {
|
default_retrieval_model = {
|
||||||
'search_method': 'semantic_search',
|
'search_method': RetrievalMethod.SEMANTIC_SEARCH,
|
||||||
'reranking_enable': False,
|
'reranking_enable': False,
|
||||||
'reranking_model': {
|
'reranking_model': {
|
||||||
'reranking_provider_name': '',
|
'reranking_provider_name': '',
|
||||||
|
|||||||
@ -11,6 +11,7 @@ from core.model_manager import ModelInstance, ModelManager
|
|||||||
from core.model_runtime.entities.model_entities import ModelFeature, ModelType
|
from core.model_runtime.entities.model_entities import ModelFeature, ModelType
|
||||||
from core.model_runtime.model_providers.__base.large_language_model import LargeLanguageModel
|
from core.model_runtime.model_providers.__base.large_language_model import LargeLanguageModel
|
||||||
from core.rag.retrieval.dataset_retrieval import DatasetRetrieval
|
from core.rag.retrieval.dataset_retrieval import DatasetRetrieval
|
||||||
|
from core.rag.retrieval.retrival_methods import RetrievalMethod
|
||||||
from core.workflow.entities.base_node_data_entities import BaseNodeData
|
from core.workflow.entities.base_node_data_entities import BaseNodeData
|
||||||
from core.workflow.entities.node_entities import NodeRunResult, NodeType
|
from core.workflow.entities.node_entities import NodeRunResult, NodeType
|
||||||
from core.workflow.entities.variable_pool import VariablePool
|
from core.workflow.entities.variable_pool import VariablePool
|
||||||
@ -21,7 +22,7 @@ from models.dataset import Dataset, Document, DocumentSegment
|
|||||||
from models.workflow import WorkflowNodeExecutionStatus
|
from models.workflow import WorkflowNodeExecutionStatus
|
||||||
|
|
||||||
default_retrieval_model = {
|
default_retrieval_model = {
|
||||||
'search_method': 'semantic_search',
|
'search_method': RetrievalMethod.SEMANTIC_SEARCH,
|
||||||
'reranking_enable': False,
|
'reranking_enable': False,
|
||||||
'reranking_model': {
|
'reranking_model': {
|
||||||
'reranking_provider_name': '',
|
'reranking_provider_name': '',
|
||||||
|
|||||||
@ -13,6 +13,7 @@ from flask import current_app
|
|||||||
from sqlalchemy import func
|
from sqlalchemy import func
|
||||||
from sqlalchemy.dialects.postgresql import JSONB
|
from sqlalchemy.dialects.postgresql import JSONB
|
||||||
|
|
||||||
|
from core.rag.retrieval.retrival_methods import RetrievalMethod
|
||||||
from extensions.ext_database import db
|
from extensions.ext_database import db
|
||||||
from extensions.ext_storage import storage
|
from extensions.ext_storage import storage
|
||||||
from models import StringUUID
|
from models import StringUUID
|
||||||
@ -116,7 +117,7 @@ class Dataset(db.Model):
|
|||||||
@property
|
@property
|
||||||
def retrieval_model_dict(self):
|
def retrieval_model_dict(self):
|
||||||
default_retrieval_model = {
|
default_retrieval_model = {
|
||||||
'search_method': 'semantic_search',
|
'search_method': RetrievalMethod.SEMANTIC_SEARCH,
|
||||||
'reranking_enable': False,
|
'reranking_enable': False,
|
||||||
'reranking_model': {
|
'reranking_model': {
|
||||||
'reranking_provider_name': '',
|
'reranking_provider_name': '',
|
||||||
|
|||||||
@ -15,6 +15,7 @@ from core.model_manager import ModelManager
|
|||||||
from core.model_runtime.entities.model_entities import ModelType
|
from core.model_runtime.entities.model_entities import ModelType
|
||||||
from core.rag.datasource.keyword.keyword_factory import Keyword
|
from core.rag.datasource.keyword.keyword_factory import Keyword
|
||||||
from core.rag.models.document import Document as RAGDocument
|
from core.rag.models.document import Document as RAGDocument
|
||||||
|
from core.rag.retrieval.retrival_methods import RetrievalMethod
|
||||||
from events.dataset_event import dataset_was_deleted
|
from events.dataset_event import dataset_was_deleted
|
||||||
from events.document_event import document_was_deleted
|
from events.document_event import document_was_deleted
|
||||||
from extensions.ext_database import db
|
from extensions.ext_database import db
|
||||||
@ -602,7 +603,7 @@ class DocumentService:
|
|||||||
dataset.collection_binding_id = dataset_collection_binding.id
|
dataset.collection_binding_id = dataset_collection_binding.id
|
||||||
if not dataset.retrieval_model:
|
if not dataset.retrieval_model:
|
||||||
default_retrieval_model = {
|
default_retrieval_model = {
|
||||||
'search_method': 'semantic_search',
|
'search_method': RetrievalMethod.SEMANTIC_SEARCH,
|
||||||
'reranking_enable': False,
|
'reranking_enable': False,
|
||||||
'reranking_model': {
|
'reranking_model': {
|
||||||
'reranking_provider_name': '',
|
'reranking_provider_name': '',
|
||||||
@ -959,7 +960,7 @@ class DocumentService:
|
|||||||
retrieval_model = document_data['retrieval_model']
|
retrieval_model = document_data['retrieval_model']
|
||||||
else:
|
else:
|
||||||
default_retrieval_model = {
|
default_retrieval_model = {
|
||||||
'search_method': 'semantic_search',
|
'search_method': RetrievalMethod.SEMANTIC_SEARCH,
|
||||||
'reranking_enable': False,
|
'reranking_enable': False,
|
||||||
'reranking_model': {
|
'reranking_model': {
|
||||||
'reranking_provider_name': '',
|
'reranking_provider_name': '',
|
||||||
|
|||||||
@ -10,12 +10,13 @@ from core.model_runtime.entities.model_entities import ModelType
|
|||||||
from core.rag.datasource.entity.embedding import Embeddings
|
from core.rag.datasource.entity.embedding import Embeddings
|
||||||
from core.rag.datasource.retrieval_service import RetrievalService
|
from core.rag.datasource.retrieval_service import RetrievalService
|
||||||
from core.rag.models.document import Document
|
from core.rag.models.document import Document
|
||||||
|
from core.rag.retrieval.retrival_methods import RetrievalMethod
|
||||||
from extensions.ext_database import db
|
from extensions.ext_database import db
|
||||||
from models.account import Account
|
from models.account import Account
|
||||||
from models.dataset import Dataset, DatasetQuery, DocumentSegment
|
from models.dataset import Dataset, DatasetQuery, DocumentSegment
|
||||||
|
|
||||||
default_retrieval_model = {
|
default_retrieval_model = {
|
||||||
'search_method': 'semantic_search',
|
'search_method': RetrievalMethod.SEMANTIC_SEARCH,
|
||||||
'reranking_enable': False,
|
'reranking_enable': False,
|
||||||
'reranking_model': {
|
'reranking_model': {
|
||||||
'reranking_provider_name': '',
|
'reranking_provider_name': '',
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user