diff --git a/README.md b/README.md
index 86c812fa..8fe306e9 100644
--- a/README.md
+++ b/README.md
@@ -69,11 +69,12 @@ data.
Try our demo at [https://demo.ragflow.io](https://demo.ragflow.io).
## 🔥 Latest Updates
+- 2024-11-01 Adds keyword extraction and related question generation to the parsed chunk to improve the accuracy of retrieval.
- 2024-09-29 Optimizes multi-round conversations.
- 2024-09-13 Adds search mode for knowledge base Q&A.
- 2024-09-09 Adds a medical consultant agent template.
diff --git a/README_ja.md b/README_ja.md
index a5c9e150..bdf887b3 100644
--- a/README_ja.md
+++ b/README_ja.md
@@ -47,12 +47,13 @@
デモをお試しください:[https://demo.ragflow.io](https://demo.ragflow.io)。
## 🔥 最新情報
+- 2024-11-01 再現の精度を向上させるために、解析されたチャンクにキーワード抽出と関連質問の生成を追加しました。
- 2024-09-29 マルチラウンドダイアログを最適化。
- 2024-09-13 ナレッジベース Q&A の検索モードを追加しました。
- 2024-09-09 エージェントに医療相談テンプレートを追加しました。
diff --git a/README_ko.md b/README_ko.md
index 7d526691..5d6a8b67 100644
--- a/README_ko.md
+++ b/README_ko.md
@@ -49,12 +49,14 @@
데모를 [https://demo.ragflow.io](https://demo.ragflow.io)에서 실행해 보세요.
## 🔥 업데이트
+- 2024-11-01 파싱된 청크에 키워드 추출 및 관련 질문 생성을 추가하여 재현율을 향상시킵니다.
+
- 2024-09-29 다단계 대화를 최적화합니다.
- 2024-09-13 지식베이스 Q&A 검색 모드를 추가합니다.
diff --git a/README_zh.md b/README_zh.md
index 2806d0b4..4cdb1164 100644
--- a/README_zh.md
+++ b/README_zh.md
@@ -47,12 +47,13 @@
请登录网址 [https://demo.ragflow.io](https://demo.ragflow.io) 试用 demo。
## 🔥 近期更新
+- 2024-11-01 对解析后的chunk加入关键词抽取和相关问题生成以提高召回的准确度。
- 2024-09-29 优化多轮对话.
- 2024-09-13 增加知识库问答搜索模式。
- 2024-09-09 在 Agent 中加入医疗问诊模板。
diff --git a/agent/component/base.py b/agent/component/base.py
index 0ae965a8..1f1fd33d 100644
--- a/agent/component/base.py
+++ b/agent/component/base.py
@@ -36,6 +36,7 @@ class ComponentParamBase(ABC):
def __init__(self):
self.output_var_name = "output"
self.message_history_window_size = 22
+ self.query = []
def set_name(self, name: str):
self._name = name
@@ -436,6 +437,16 @@ class ComponentBase(ABC):
setattr(self._param, self._param.output_var_name, v)
def get_input(self):
+ if self._param.query:
+ outs = []
+ for q in self._param.query:
+ if q["value"]: outs.append(pd.DataFrame([{"content": q["value"]}]))
+ if q["component_id"]: outs.append(self._canvas.get_component(q["component_id"])["obj"].output(allow_partial=False)[1])
+ if outs:
+ df = pd.concat(outs, ignore_index=True)
+ if "content" in df: df = df.drop_duplicates(subset=['content']).reset_index(drop=True)
+ return df
+
upstream_outs = []
reversed_cpnts = []
if len(self._canvas.path) > 1:
diff --git a/agent/component/generate.py b/agent/component/generate.py
index bdc431ea..19fd9159 100644
--- a/agent/component/generate.py
+++ b/agent/component/generate.py
@@ -130,6 +130,7 @@ class Generate(ComponentBase):
msg = self._canvas.get_history(self._param.message_history_window_size)
_, msg = message_fit_in([{"role": "system", "content": prompt}, *msg], int(chat_mdl.max_length * 0.97))
+ if len(msg) < 2: msg.append({"role": "user", "content": ""})
ans = chat_mdl.chat(msg[0]["content"], msg[1:], self._param.gen_conf())
if self._param.cite and "content_ltks" in retrieval_res.columns and "vector" in retrieval_res.columns:
@@ -149,6 +150,7 @@ class Generate(ComponentBase):
msg = self._canvas.get_history(self._param.message_history_window_size)
_, msg = message_fit_in([{"role": "system", "content": prompt}, *msg], int(chat_mdl.max_length * 0.97))
+ if len(msg) < 2: msg.append({"role": "user", "content": ""})
answer = ""
for ans in chat_mdl.chat_streamly(msg[0]["content"], msg[1:], self._param.gen_conf()):
res = {"content": ans, "reference": []}
diff --git a/agent/component/invoke.py b/agent/component/invoke.py
index d497be93..9b9053f5 100644
--- a/agent/component/invoke.py
+++ b/agent/component/invoke.py
@@ -51,6 +51,9 @@ class Invoke(ComponentBase, ABC):
for para in self._param.variables:
if para.get("component_id"):
cpn = self._canvas.get_component(para["component_id"])["obj"]
+ if cpn.component_name.lower() == "answer":
+ args[para["key"]] = self._canvas.get_history(1)[0]["content"]
+ continue
_, out = cpn.output(allow_partial=False)
args[para["key"]] = "\n".join(out["content"])
else:
diff --git a/agent/templates/HR_callout_zh.json b/agent/templates/HR_callout_zh.json
index 44643de4..b8cdbb43 100644
--- a/agent/templates/HR_callout_zh.json
+++ b/agent/templates/HR_callout_zh.json
@@ -152,7 +152,8 @@
"Generate:ToughLawsCheat",
"Generate:KindCarrotsSit",
"Generate:DirtyToolsTrain",
- "Generate:FluffyPillowsGrow"
+ "Generate:FluffyPillowsGrow",
+ "Generate:ProudEarsWorry"
]
},
"Retrieval:ShaggyRadiosRetire": {
@@ -212,7 +213,9 @@
"top_p": 0.3
}
},
- "downstream": [],
+ "downstream": [
+ "Answer:TwentyMugsDeny"
+ ],
"upstream": [
"categorize:0"
]
@@ -331,9 +334,9 @@
"message_history_window_size": 12,
"parameters": [
{
+ "component_id": "Retrieval:ColdEelsArrive",
"id": "5166a107-e859-4c71-99a2-3a216c775347",
- "key": "jd",
- "component_id": "Retrieval:ColdEelsArrive"
+ "key": "jd"
}
],
"presence_penalty": 0.4,
@@ -1266,9 +1269,9 @@
"parameter": "Precise",
"parameters": [
{
+ "component_id": "Retrieval:ColdEelsArrive",
"id": "5166a107-e859-4c71-99a2-3a216c775347",
- "key": "jd",
- "component_id": "Retrieval:ColdEelsArrive"
+ "key": "jd"
}
],
"presencePenaltyEnabled": true,
@@ -1541,6 +1544,19 @@
"target": "Answer:TwentyMugsDeny",
"targetHandle": "c",
"type": "buttonEdge"
+ },
+ {
+ "type": "buttonEdge",
+ "markerEnd": "logo",
+ "style": {
+ "strokeWidth": 2,
+ "stroke": "rgb(202 197 245)"
+ },
+ "source": "Generate:ProudEarsWorry",
+ "sourceHandle": "b",
+ "target": "Answer:TwentyMugsDeny",
+ "targetHandle": "c",
+ "id": "reactflow__edge-Generate:ProudEarsWorryb-Answer:TwentyMugsDenyc"
}
]
},