From 297b2d0ac986a4b177ab62d4fb6b08b9a9f231e2 Mon Sep 17 00:00:00 2001 From: Kevin Hu Date: Fri, 27 Sep 2024 10:29:30 +0800 Subject: [PATCH] force eml file to be parsed by EMAIL (#2615) ### What problem does this PR solve? #2613 ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) --- api/apps/dataset_api.py | 2 ++ api/apps/document_app.py | 2 ++ 2 files changed, 4 insertions(+) diff --git a/api/apps/dataset_api.py b/api/apps/dataset_api.py index d00c52bb..f8e2d193 100644 --- a/api/apps/dataset_api.py +++ b/api/apps/dataset_api.py @@ -381,6 +381,8 @@ def upload_documents(dataset_id): doc["parser_id"] = ParserType.AUDIO.value if re.search(r"\.(ppt|pptx|pages)$", filename): doc["parser_id"] = ParserType.PRESENTATION.value + if re.search(r"\.(eml)$", filename): + doc["parser_id"] = ParserType.EMAIL.value DocumentService.insert(doc) FileService.add_file_from_kb(doc, kb_folder["id"], dataset.tenant_id) diff --git a/api/apps/document_app.py b/api/apps/document_app.py index b33bff4c..0f69dc5e 100644 --- a/api/apps/document_app.py +++ b/api/apps/document_app.py @@ -139,6 +139,8 @@ def web_crawl(): doc["parser_id"] = ParserType.AUDIO.value if re.search(r"\.(ppt|pptx|pages)$", filename): doc["parser_id"] = ParserType.PRESENTATION.value + if re.search(r"\.(eml)$", filename): + doc["parser_id"] = ParserType.EMAIL.value DocumentService.insert(doc) FileService.add_file_from_kb(doc, kb_folder["id"], kb.tenant_id) except Exception as e: