change chunk.status to chunk.available (#2646)

### What problem does this PR solve?

#1102

### Type of change

- [x] New Feature (non-breaking change which adds functionality)
This commit is contained in:
JobSmithManipulation 2024-09-29 10:13:07 +08:00 committed by GitHub
parent e82e8fde13
commit c103dd2746
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 21 additions and 13 deletions

View File

@ -609,8 +609,8 @@ def set(tenant_id):
d["content_sm_ltks"] = rag_tokenizer.fine_grained_tokenize(d["content_ltks"]) d["content_sm_ltks"] = rag_tokenizer.fine_grained_tokenize(d["content_ltks"])
d["important_kwd"] = req["important_keywords"] d["important_kwd"] = req["important_keywords"]
d["important_tks"] = rag_tokenizer.tokenize(" ".join(req["important_keywords"])) d["important_tks"] = rag_tokenizer.tokenize(" ".join(req["important_keywords"]))
if "available_int" in req: if "available" in req:
d["available_int"] = req["available_int"] d["available_int"] = req["available"]
try: try:
tenant_id = DocumentService.get_tenant_id(req["document_id"]) tenant_id = DocumentService.get_tenant_id(req["document_id"])

View File

@ -11,7 +11,7 @@ class Chunk(Base):
self.knowledgebase_id = None self.knowledgebase_id = None
self.document_name = "" self.document_name = ""
self.document_id = "" self.document_id = ""
self.status = "1" self.available = 1
for k in list(res_dict.keys()): for k in list(res_dict.keys()):
if k not in self.__dict__: if k not in self.__dict__:
res_dict.pop(k) res_dict.pop(k)
@ -39,7 +39,7 @@ class Chunk(Base):
"content": self.content, "content": self.content,
"important_keywords": self.important_keywords, "important_keywords": self.important_keywords,
"document_id": self.document_id, "document_id": self.document_id,
"status": self.status, "available": self.available,
}) })
res = res.json() res = res.json()
if res.get("retmsg") == "success": if res.get("retmsg") == "success":

View File

@ -151,14 +151,12 @@ class TestDocument(TestSdk):
name3 = 'westworld.pdf' name3 = 'westworld.pdf'
path = 'test_data/westworld.pdf' path = 'test_data/westworld.pdf'
# Create a document in the dataset using the file path # Create a document in the dataset using the file path
rag.create_document(ds, name=name3, blob=open(path, "rb").read()) rag.create_document(ds, name=name3, blob=open(path, "rb").read())
# Retrieve the document by name # Retrieve the document by name
doc = rag.get_document(name="westworld.pdf") doc = rag.get_document(name="westworld.pdf")
# Initiate asynchronous parsing # Initiate asynchronous parsing
doc.async_parse() doc.async_parse()
@ -279,7 +277,17 @@ class TestDocument(TestSdk):
assert isinstance(chunk, Chunk), "Chunk was not added to chunk list" assert isinstance(chunk, Chunk), "Chunk was not added to chunk list"
chunk.content = "ragflow123" chunk.content = "ragflow123"
res = chunk.save() res = chunk.save()
assert res is True, f"Failed to update chunk, error: {res}" assert res is True, f"Failed to update chunk content, error: {res}"
def test_update_chunk_available(self):
rag = RAGFlow(API_KEY, HOST_ADDRESS)
doc = rag.get_document(name='story.txt')
chunk = doc.add_chunk(content="ragflow")
assert chunk is not None, "Chunk is None"
assert isinstance(chunk, Chunk), "Chunk was not added to chunk list"
chunk.available = 0
res = chunk.save()
assert res is True, f"Failed to update chunk status, error: {res}"
def test_retrieval_chunks(self): def test_retrieval_chunks(self):
rag = RAGFlow(API_KEY, HOST_ADDRESS) rag = RAGFlow(API_KEY, HOST_ADDRESS)