support images and tables extract from docx (#4619)

This commit is contained in:
Jyong 2024-05-23 18:05:23 +08:00 committed by GitHub
commit 233c4150d1
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
10 changed files with 163 additions and 23 deletions

View file

@ -99,9 +99,9 @@ class DatasetMultiRetrieverTool(DatasetRetrieverBaseTool):
float('inf')))
for segment in sorted_segments:
if segment.answer:
document_context_list.append(f'question:{segment.content} answer:{segment.answer}')
document_context_list.append(f'question:{segment.get_sign_content()} answer:{segment.answer}')
else:
document_context_list.append(segment.content)
document_context_list.append(segment.get_sign_content())
if self.return_resource:
context_list = []
resource_number = 1

View file

@ -105,9 +105,9 @@ class DatasetRetrieverTool(DatasetRetrieverBaseTool):
float('inf')))
for segment in sorted_segments:
if segment.answer:
document_context_list.append(f'question:{segment.content} answer:{segment.answer}')
document_context_list.append(f'question:{segment.get_sign_content()} answer:{segment.answer}')
else:
document_context_list.append(segment.content)
document_context_list.append(segment.get_sign_content())
if self.return_resource:
context_list = []
resource_number = 1