refine error log while chunking (#1937)
### What problem does this PR solve? ### Type of change - [x] Refactoring
This commit is contained in:
parent
d73a75506e
commit
da8802d010
@ -501,7 +501,9 @@ def upload_and_parse():
|
|||||||
"callback": dummy,
|
"callback": dummy,
|
||||||
"parser_config": parser_config,
|
"parser_config": parser_config,
|
||||||
"from_page": 0,
|
"from_page": 0,
|
||||||
"to_page": 100000
|
"to_page": 100000,
|
||||||
|
"tenant_id": kb.tenant_id,
|
||||||
|
"lang": kb.language
|
||||||
}
|
}
|
||||||
threads.append(exe.submit(FACTORY.get(d["parser_id"], naive).chunk, d["name"], blob, **kwargs))
|
threads.append(exe.submit(FACTORY.get(d["parser_id"], naive).chunk, d["name"], blob, **kwargs))
|
||||||
|
|
||||||
|
|||||||
@ -146,27 +146,32 @@ def build(row):
|
|||||||
binary = get_minio_binary(bucket, name)
|
binary = get_minio_binary(bucket, name)
|
||||||
cron_logger.info(
|
cron_logger.info(
|
||||||
"From minio({}) {}/{}".format(timer() - st, row["location"], row["name"]))
|
"From minio({}) {}/{}".format(timer() - st, row["location"], row["name"]))
|
||||||
|
except TimeoutError as e:
|
||||||
|
callback(-1, f"Internal server error: Fetch file from minio timeout. Could you try it again.")
|
||||||
|
cron_logger.error(
|
||||||
|
"Minio {}/{}: Fetch file from minio timeout.".format(row["location"], row["name"]))
|
||||||
|
return
|
||||||
|
except Exception as e:
|
||||||
|
if re.search("(No such file|not found)", str(e)):
|
||||||
|
callback(-1, "Can not find file <%s> from minio. Could you try it again?" % row["name"])
|
||||||
|
else:
|
||||||
|
callback(-1, f"Get file from minio: %s" %
|
||||||
|
str(e).replace("'", ""))
|
||||||
|
traceback.print_exc()
|
||||||
|
return
|
||||||
|
|
||||||
|
try:
|
||||||
cks = chunker.chunk(row["name"], binary=binary, from_page=row["from_page"],
|
cks = chunker.chunk(row["name"], binary=binary, from_page=row["from_page"],
|
||||||
to_page=row["to_page"], lang=row["language"], callback=callback,
|
to_page=row["to_page"], lang=row["language"], callback=callback,
|
||||||
kb_id=row["kb_id"], parser_config=row["parser_config"], tenant_id=row["tenant_id"])
|
kb_id=row["kb_id"], parser_config=row["parser_config"], tenant_id=row["tenant_id"])
|
||||||
cron_logger.info(
|
cron_logger.info(
|
||||||
"Chunkking({}) {}/{}".format(timer() - st, row["location"], row["name"]))
|
"Chunking({}) {}/{}".format(timer() - st, row["location"], row["name"]))
|
||||||
except TimeoutError as e:
|
|
||||||
callback(-1, f"Internal server error: Fetch file timeout. Could you try it again.")
|
|
||||||
cron_logger.error(
|
|
||||||
"Chunkking {}/{}: Fetch file timeout.".format(row["location"], row["name"]))
|
|
||||||
return
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
if re.search("(No such file|not found)", str(e)):
|
callback(-1, f"Internal server error while chunking: %s" %
|
||||||
callback(-1, "Can not find file <%s>" % row["name"])
|
|
||||||
else:
|
|
||||||
callback(-1, f"Internal server error: %s" %
|
|
||||||
str(e).replace("'", ""))
|
str(e).replace("'", ""))
|
||||||
traceback.print_exc()
|
|
||||||
|
|
||||||
cron_logger.error(
|
cron_logger.error(
|
||||||
"Chunkking {}/{}: {}".format(row["location"], row["name"], str(e)))
|
"Chunking {}/{}: {}".format(row["location"], row["name"], str(e)))
|
||||||
|
traceback.print_exc()
|
||||||
return
|
return
|
||||||
|
|
||||||
docs = []
|
docs = []
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user