From 76cd23eecff3fef95c9f103edd300527188d19d4 Mon Sep 17 00:00:00 2001 From: Kevin Hu Date: Tue, 24 Dec 2024 10:49:28 +0800 Subject: [PATCH] Catch the exception while parsing pptx. (#4202) ### What problem does this PR solve? #4189 ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) --- deepdoc/parser/ppt_parser.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/deepdoc/parser/ppt_parser.py b/deepdoc/parser/ppt_parser.py index b2a08b11..eca952ba 100644 --- a/deepdoc/parser/ppt_parser.py +++ b/deepdoc/parser/ppt_parser.py @@ -10,7 +10,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # - +import logging from io import BytesIO from pptx import Presentation @@ -53,9 +53,12 @@ class RAGFlowPptParser(object): texts = [] for shape in sorted( slide.shapes, key=lambda x: ((x.top if x.top is not None else 0) // 10, x.left)): - txt = self.__extract(shape) - if txt: - texts.append(txt) + try: + txt = self.__extract(shape) + if txt: + texts.append(txt) + except Exception as e: + logging.exception(e) txts.append("\n".join(texts)) return txts