Refactor file loader component
This commit is contained in:
parent
6fea589778
commit
ccfd8e8acd
1 changed files with 6 additions and 118 deletions
|
|
@ -1,119 +1,7 @@
|
|||
from langflow import CustomComponent
|
||||
from langchain.schema import Document
|
||||
from typing import Any, Dict, List
|
||||
|
||||
loaders_info: List[Dict[str, Any]] = [
|
||||
{
|
||||
"loader": "AirbyteJSONLoader",
|
||||
"name": "Airbyte JSON (.jsonl)",
|
||||
"import": "langchain.document_loaders.AirbyteJSONLoader",
|
||||
"defaultFor": ["jsonl"],
|
||||
"allowdTypes": ["jsonl"],
|
||||
},
|
||||
{
|
||||
"loader": "JSONLoader",
|
||||
"name": "JSON (.json)",
|
||||
"import": "langchain.document_loaders.JSONLoader",
|
||||
"defaultFor": ["json"],
|
||||
"allowdTypes": ["json"],
|
||||
},
|
||||
{
|
||||
"loader": "BSHTMLLoader",
|
||||
"name": "BeautifulSoup4 HTML (.html, .htm)",
|
||||
"import": "langchain.document_loaders.BSHTMLLoader",
|
||||
"allowdTypes": ["html", "htm"],
|
||||
},
|
||||
{
|
||||
"loader": "CSVLoader",
|
||||
"name": "CSV (.csv)",
|
||||
"import": "langchain.document_loaders.CSVLoader",
|
||||
"defaultFor": ["csv"],
|
||||
"allowdTypes": ["csv"],
|
||||
},
|
||||
{
|
||||
"loader": "CoNLLULoader",
|
||||
"name": "CoNLL-U (.conllu)",
|
||||
"import": "langchain.document_loaders.CoNLLULoader",
|
||||
"defaultFor": ["conllu"],
|
||||
"allowdTypes": ["conllu"],
|
||||
},
|
||||
{
|
||||
"loader": "EverNoteLoader",
|
||||
"name": "EverNote (.enex)",
|
||||
"import": "langchain.document_loaders.EverNoteLoader",
|
||||
"defaultFor": ["enex"],
|
||||
"allowdTypes": ["enex"],
|
||||
},
|
||||
{
|
||||
"loader": "FacebookChatLoader",
|
||||
"name": "Facebook Chat (.json)",
|
||||
"import": "langchain.document_loaders.FacebookChatLoader",
|
||||
"allowdTypes": ["json"],
|
||||
},
|
||||
{
|
||||
"loader": "OutlookMessageLoader",
|
||||
"name": "Outlook Message (.msg)",
|
||||
"import": "langchain.document_loaders.OutlookMessageLoader",
|
||||
"defaultFor": ["msg"],
|
||||
"allowdTypes": ["msg"],
|
||||
},
|
||||
{
|
||||
"loader": "PyPDFLoader",
|
||||
"name": "PyPDF (.pdf)",
|
||||
"import": "langchain.document_loaders.PyPDFLoader",
|
||||
"defaultFor": ["pdf"],
|
||||
"allowdTypes": ["pdf"],
|
||||
},
|
||||
{
|
||||
"loader": "STRLoader",
|
||||
"name": "Subtitle (.str)",
|
||||
"import": "langchain.document_loaders.STRLoader",
|
||||
"defaultFor": ["str"],
|
||||
"allowdTypes": ["str"],
|
||||
},
|
||||
{
|
||||
"loader": "TextLoader",
|
||||
"name": "Text (.txt)",
|
||||
"import": "langchain.document_loaders.TextLoader",
|
||||
"defaultFor": ["txt"],
|
||||
"allowdTypes": ["txt"],
|
||||
},
|
||||
{
|
||||
"loader": "UnstructuredEmailLoader",
|
||||
"name": "Unstructured Email (.eml)",
|
||||
"import": "langchain.document_loaders.UnstructuredEmailLoader",
|
||||
"defaultFor": ["eml"],
|
||||
"allowdTypes": ["eml"],
|
||||
},
|
||||
{
|
||||
"loader": "UnstructuredHTMLLoader",
|
||||
"name": "Unstructured HTML (.html, .htm)",
|
||||
"import": "langchain.document_loaders.UnstructuredHTMLLoader",
|
||||
"defaultFor": ["html", "htm"],
|
||||
"allowdTypes": ["html", "htm"],
|
||||
},
|
||||
{
|
||||
"loader": "UnstructuredMarkdownLoader",
|
||||
"name": "Unstructured Markdown (.md)",
|
||||
"import": "langchain.document_loaders.UnstructuredMarkdownLoader",
|
||||
"defaultFor": ["md"],
|
||||
"allowdTypes": ["md"],
|
||||
},
|
||||
{
|
||||
"loader": "UnstructuredPowerPointLoader",
|
||||
"name": "Unstructured PowerPoint (.pptx)",
|
||||
"import": "langchain.document_loaders.UnstructuredPowerPointLoader",
|
||||
"defaultFor": ["pptx"],
|
||||
"allowdTypes": ["pptx"],
|
||||
},
|
||||
{
|
||||
"loader": "UnstructuredWordLoader",
|
||||
"name": "Unstructured Word (.docx)",
|
||||
"import": "langchain.document_loaders.UnstructuredWordLoader",
|
||||
"defaultFor": ["docx"],
|
||||
"allowdTypes": ["docx"],
|
||||
},
|
||||
]
|
||||
from langflow import CustomComponent
|
||||
from langflow.utils.constants import LOADERS_INFO
|
||||
|
||||
|
||||
class FileLoaderComponent(CustomComponent):
|
||||
|
|
@ -122,12 +10,12 @@ class FileLoaderComponent(CustomComponent):
|
|||
beta = True
|
||||
|
||||
def build_config(self):
|
||||
loader_options = ["Automatic"] + [loader_info["name"] for loader_info in loaders_info]
|
||||
loader_options = ["Automatic"] + [loader_info["name"] for loader_info in LOADERS_INFO]
|
||||
|
||||
file_types = []
|
||||
suffixes = []
|
||||
|
||||
for loader_info in loaders_info:
|
||||
for loader_info in LOADERS_INFO:
|
||||
if "allowedTypes" in loader_info:
|
||||
file_types.extend(loader_info["allowedTypes"])
|
||||
suffixes.extend([f".{ext}" for ext in loader_info["allowedTypes"]])
|
||||
|
|
@ -189,7 +77,7 @@ class FileLoaderComponent(CustomComponent):
|
|||
|
||||
# Mapeie o nome do loader selecionado para suas informações
|
||||
selected_loader_info = None
|
||||
for loader_info in loaders_info:
|
||||
for loader_info in LOADERS_INFO:
|
||||
if loader_info["name"] == loader:
|
||||
selected_loader_info = loader_info
|
||||
break
|
||||
|
|
@ -200,7 +88,7 @@ class FileLoaderComponent(CustomComponent):
|
|||
if loader == "Automatic":
|
||||
# Determine o loader automaticamente com base na extensão do arquivo
|
||||
default_loader_info = None
|
||||
for info in loaders_info:
|
||||
for info in LOADERS_INFO:
|
||||
if "defaultFor" in info and file_type in info["defaultFor"]:
|
||||
default_loader_info = info
|
||||
break
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue