update text spliter
This commit is contained in:
parent
9ca453f7f7
commit
495163d5b3
@ -1467,7 +1467,9 @@ class SegmentService:
|
||||
if dataset.indexing_technique == "high_quality" and embedding_model:
|
||||
# calc embedding use tokens
|
||||
if document.doc_form == "qa_model":
|
||||
tokens = embedding_model.get_text_embedding_num_tokens(texts=[content + segment_item["answer"]])[0]
|
||||
tokens = embedding_model.get_text_embedding_num_tokens(
|
||||
texts=[content + segment_item["answer"]]
|
||||
)[0]
|
||||
else:
|
||||
tokens = embedding_model.get_text_embedding_num_tokens(texts=[content])[0]
|
||||
segment_document = DocumentSegment(
|
||||
|
||||
@ -59,7 +59,9 @@ def batch_create_segment_to_index_task(
|
||||
)
|
||||
word_count_change = 0
|
||||
if embedding_model:
|
||||
tokens_list = embedding_model.get_text_embedding_num_tokens(texts=[segment["content"] for segment in content])
|
||||
tokens_list = embedding_model.get_text_embedding_num_tokens(
|
||||
texts=[segment["content"] for segment in content]
|
||||
)
|
||||
else:
|
||||
tokens_list = [0] * len(content)
|
||||
for segment, tokens in zip(content, tokens_list):
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user