Feat/kb index (#20868)

Co-authored-by: twwu <twwu@dify.ai>
This commit is contained in:
Dongyu Li 2025-06-25 17:52:59 +08:00 committed by GitHub
commit 00f0b569cc
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 80 additions and 85 deletions

View file

@ -534,7 +534,7 @@ class IndexingRunner:
# chunk nodes by chunk size
indexing_start_at = time.perf_counter()
tokens = 0
if dataset_document.doc_form != IndexType.PARENT_CHILD_INDEX:
if dataset_document.doc_form != IndexType.PARENT_CHILD_INDEX and dataset.indexing_technique == "economy":
# create keyword index
create_keyword_thread = threading.Thread(
target=self._process_keyword_index,
@ -572,7 +572,7 @@ class IndexingRunner:
for future in futures:
tokens += future.result()
if dataset_document.doc_form != IndexType.PARENT_CHILD_INDEX:
if dataset_document.doc_form != IndexType.PARENT_CHILD_INDEX and dataset.indexing_technique == "economy":
create_keyword_thread.join()
indexing_end_at = time.perf_counter()

View file

@ -76,6 +76,7 @@ class ParagraphIndexProcessor(BaseIndexProcessor):
if dataset.indexing_technique == "high_quality":
vector = Vector(dataset)
vector.create(documents)
with_keywords = False
if with_keywords:
keywords_list = kwargs.get("keywords_list")
keyword = Keyword(dataset)
@ -91,6 +92,7 @@ class ParagraphIndexProcessor(BaseIndexProcessor):
vector.delete_by_ids(node_ids)
else:
vector.delete()
with_keywords = False
if with_keywords:
keyword = Keyword(dataset)
if node_ids: