diff --git a/src/backend/langflow/template/frontend_node/documentloaders.py b/src/backend/langflow/template/frontend_node/documentloaders.py index 59909d5d9..42ed0f601 100644 --- a/src/backend/langflow/template/frontend_node/documentloaders.py +++ b/src/backend/langflow/template/frontend_node/documentloaders.py @@ -2,7 +2,7 @@ from langflow.template.field.base import TemplateField from langflow.template.frontend_node.base import FrontendNode -def build_template( +def build_file_field( suffixes: list, fileTypes: list, name: str = "file_path" ) -> TemplateField: """Build a template field for a document loader.""" @@ -19,32 +19,34 @@ def build_template( class DocumentLoaderFrontNode(FrontendNode): file_path_templates = { - "AirbyteJSONLoader": build_template(suffixes=[".json"], fileTypes=["json"]), - "CoNLLULoader": build_template(suffixes=[".csv"], fileTypes=["csv"]), - "CSVLoader": build_template(suffixes=[".csv"], fileTypes=["csv"]), - "UnstructuredEmailLoader": build_template(suffixes=[".eml"], fileTypes=["eml"]), - "EverNoteLoader": build_template(suffixes=[".xml"], fileTypes=["xml"]), - "FacebookChatLoader": build_template(suffixes=[".json"], fileTypes=["json"]), - "GutenbergLoader": build_template(suffixes=[".txt"], fileTypes=["txt"]), - "BSHTMLLoader": build_template(suffixes=[".html"], fileTypes=["html"]), - "UnstructuredHTMLLoader": build_template( + "AirbyteJSONLoader": build_file_field(suffixes=[".json"], fileTypes=["json"]), + "CoNLLULoader": build_file_field(suffixes=[".csv"], fileTypes=["csv"]), + "CSVLoader": build_file_field(suffixes=[".csv"], fileTypes=["csv"]), + "UnstructuredEmailLoader": build_file_field( + suffixes=[".eml"], fileTypes=["eml"] + ), + "EverNoteLoader": build_file_field(suffixes=[".xml"], fileTypes=["xml"]), + "FacebookChatLoader": build_file_field(suffixes=[".json"], fileTypes=["json"]), + "GutenbergLoader": build_file_field(suffixes=[".txt"], fileTypes=["txt"]), + "BSHTMLLoader": build_file_field(suffixes=[".html"], fileTypes=["html"]), + "UnstructuredHTMLLoader": build_file_field( suffixes=[".html"], fileTypes=["html"] ), - "UnstructuredImageLoader": build_template( + "UnstructuredImageLoader": build_file_field( suffixes=[".jpg", ".jpeg", ".png", ".gif", ".bmp"], fileTypes=["jpg", "jpeg", "png", "gif", "bmp"], ), - "UnstructuredMarkdownLoader": build_template( + "UnstructuredMarkdownLoader": build_file_field( suffixes=[".md"], fileTypes=["md"] ), - "PyPDFLoader": build_template(suffixes=[".pdf"], fileTypes=["pdf"]), - "UnstructuredPowerPointLoader": build_template( + "PyPDFLoader": build_file_field(suffixes=[".pdf"], fileTypes=["pdf"]), + "UnstructuredPowerPointLoader": build_file_field( suffixes=[".pptx", ".ppt"], fileTypes=["pptx", "ppt"] ), - "SRTLoader": build_template(suffixes=[".srt"], fileTypes=["srt"]), - "TelegramChatLoader": build_template(suffixes=[".json"], fileTypes=["json"]), - "TextLoader": build_template(suffixes=[".txt"], fileTypes=["txt"]), - "UnstructuredWordDocumentLoader": build_template( + "SRTLoader": build_file_field(suffixes=[".srt"], fileTypes=["srt"]), + "TelegramChatLoader": build_file_field(suffixes=[".json"], fileTypes=["json"]), + "TextLoader": build_file_field(suffixes=[".txt"], fileTypes=["txt"]), + "UnstructuredWordDocumentLoader": build_file_field( suffixes=[".docx", ".doc"], fileTypes=["docx", "doc"] ), } @@ -90,3 +92,22 @@ class DocumentLoaderFrontNode(FrontendNode): display_name="glob", ) ) + # add a metadata field of type dict + self.template.add_field( + TemplateField( + field_type="code", + required=True, + show=True, + name="metadata", + value="{}", + display_name="Metadata", + multiline=False, + ) + ) + + @staticmethod + def format_field(field: TemplateField, name: str | None = None) -> None: + FrontendNode.format_field(field, name) + if field.name == "metadata": + field.show = True + field.advanced = False