feat(documentloaders.py): add metadata field to document loader template

The function build_template was renamed to build_file_field to improve naming consistency. A metadata field of type dict was added to the document loader template to allow for additional information to be passed along with the document. The format_field method was updated to show the metadata field when it is present.
🔨 refactor(documentloaders.py): rename build_template function to build_file_field for better semantics
This commit is contained in:
Gabriel Luiz Freitas Almeida 2023-06-22 13:47:43 -03:00
commit 796ce298f6

View file

@ -2,7 +2,7 @@ from langflow.template.field.base import TemplateField
from langflow.template.frontend_node.base import FrontendNode
def build_template(
def build_file_field(
suffixes: list, fileTypes: list, name: str = "file_path"
) -> TemplateField:
"""Build a template field for a document loader."""
@ -19,32 +19,34 @@ def build_template(
class DocumentLoaderFrontNode(FrontendNode):
file_path_templates = {
"AirbyteJSONLoader": build_template(suffixes=[".json"], fileTypes=["json"]),
"CoNLLULoader": build_template(suffixes=[".csv"], fileTypes=["csv"]),
"CSVLoader": build_template(suffixes=[".csv"], fileTypes=["csv"]),
"UnstructuredEmailLoader": build_template(suffixes=[".eml"], fileTypes=["eml"]),
"EverNoteLoader": build_template(suffixes=[".xml"], fileTypes=["xml"]),
"FacebookChatLoader": build_template(suffixes=[".json"], fileTypes=["json"]),
"GutenbergLoader": build_template(suffixes=[".txt"], fileTypes=["txt"]),
"BSHTMLLoader": build_template(suffixes=[".html"], fileTypes=["html"]),
"UnstructuredHTMLLoader": build_template(
"AirbyteJSONLoader": build_file_field(suffixes=[".json"], fileTypes=["json"]),
"CoNLLULoader": build_file_field(suffixes=[".csv"], fileTypes=["csv"]),
"CSVLoader": build_file_field(suffixes=[".csv"], fileTypes=["csv"]),
"UnstructuredEmailLoader": build_file_field(
suffixes=[".eml"], fileTypes=["eml"]
),
"EverNoteLoader": build_file_field(suffixes=[".xml"], fileTypes=["xml"]),
"FacebookChatLoader": build_file_field(suffixes=[".json"], fileTypes=["json"]),
"GutenbergLoader": build_file_field(suffixes=[".txt"], fileTypes=["txt"]),
"BSHTMLLoader": build_file_field(suffixes=[".html"], fileTypes=["html"]),
"UnstructuredHTMLLoader": build_file_field(
suffixes=[".html"], fileTypes=["html"]
),
"UnstructuredImageLoader": build_template(
"UnstructuredImageLoader": build_file_field(
suffixes=[".jpg", ".jpeg", ".png", ".gif", ".bmp"],
fileTypes=["jpg", "jpeg", "png", "gif", "bmp"],
),
"UnstructuredMarkdownLoader": build_template(
"UnstructuredMarkdownLoader": build_file_field(
suffixes=[".md"], fileTypes=["md"]
),
"PyPDFLoader": build_template(suffixes=[".pdf"], fileTypes=["pdf"]),
"UnstructuredPowerPointLoader": build_template(
"PyPDFLoader": build_file_field(suffixes=[".pdf"], fileTypes=["pdf"]),
"UnstructuredPowerPointLoader": build_file_field(
suffixes=[".pptx", ".ppt"], fileTypes=["pptx", "ppt"]
),
"SRTLoader": build_template(suffixes=[".srt"], fileTypes=["srt"]),
"TelegramChatLoader": build_template(suffixes=[".json"], fileTypes=["json"]),
"TextLoader": build_template(suffixes=[".txt"], fileTypes=["txt"]),
"UnstructuredWordDocumentLoader": build_template(
"SRTLoader": build_file_field(suffixes=[".srt"], fileTypes=["srt"]),
"TelegramChatLoader": build_file_field(suffixes=[".json"], fileTypes=["json"]),
"TextLoader": build_file_field(suffixes=[".txt"], fileTypes=["txt"]),
"UnstructuredWordDocumentLoader": build_file_field(
suffixes=[".docx", ".doc"], fileTypes=["docx", "doc"]
),
}
@ -90,3 +92,22 @@ class DocumentLoaderFrontNode(FrontendNode):
display_name="glob",
)
)
# add a metadata field of type dict
self.template.add_field(
TemplateField(
field_type="code",
required=True,
show=True,
name="metadata",
value="{}",
display_name="Metadata",
multiline=False,
)
)
@staticmethod
def format_field(field: TemplateField, name: str | None = None) -> None:
FrontendNode.format_field(field, name)
if field.name == "metadata":
field.show = True
field.advanced = False