feat:api Add support for extracting EPUB files in ExtractProcessor (#3254)

Co-authored-by: crazywoola <427733928@qq.com>
This commit is contained in:
LiuVaayne 2024-04-12 11:25:02 +08:00 committed by GitHub
commit b00466f025
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 44 additions and 2 deletions

View file

@ -22,7 +22,7 @@ IMAGE_EXTENSIONS.extend([ext.upper() for ext in IMAGE_EXTENSIONS])
ALLOWED_EXTENSIONS = ['txt', 'markdown', 'md', 'pdf', 'html', 'htm', 'xlsx', 'docx', 'csv']
UNSTRUSTURED_ALLOWED_EXTENSIONS = ['txt', 'markdown', 'md', 'pdf', 'html', 'htm', 'xlsx',
'docx', 'csv', 'eml', 'msg', 'pptx', 'ppt', 'xml']
'docx', 'csv', 'eml', 'msg', 'pptx', 'ppt', 'xml', 'epub']
PREVIEW_WORDS_LIMIT = 3000