From 796ce298f6ba37d6fed8b85755b70c65956eb99d Mon Sep 17 00:00:00 2001 From: Gabriel Luiz Freitas Almeida Date: Thu, 22 Jun 2023 13:47:43 -0300 Subject: [PATCH] =?UTF-8?q?=E2=9C=A8=20feat(documentloaders.py):=20add=20m?= =?UTF-8?q?etadata=20field=20to=20document=20loader=20template=20The=20fun?= =?UTF-8?q?ction=20build=5Ftemplate=20was=20renamed=20to=20build=5Ffile=5F?= =?UTF-8?q?field=20to=20improve=20naming=20consistency.=20A=20metadata=20f?= =?UTF-8?q?ield=20of=20type=20dict=20was=20added=20to=20the=20document=20l?= =?UTF-8?q?oader=20template=20to=20allow=20for=20additional=20information?= =?UTF-8?q?=20to=20be=20passed=20along=20with=20the=20document.=20The=20fo?= =?UTF-8?q?rmat=5Ffield=20method=20was=20updated=20to=20show=20the=20metad?= =?UTF-8?q?ata=20field=20when=20it=20is=20present.=20=F0=9F=94=A8=20refact?= =?UTF-8?q?or(documentloaders.py):=20rename=20build=5Ftemplate=20function?= =?UTF-8?q?=20to=20build=5Ffile=5Ffield=20for=20better=20semantics?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../template/frontend_node/documentloaders.py | 57 +++++++++++++------ 1 file changed, 39 insertions(+), 18 deletions(-) diff --git a/src/backend/langflow/template/frontend_node/documentloaders.py b/src/backend/langflow/template/frontend_node/documentloaders.py index 59909d5d9..42ed0f601 100644 --- a/src/backend/langflow/template/frontend_node/documentloaders.py +++ b/src/backend/langflow/template/frontend_node/documentloaders.py @@ -2,7 +2,7 @@ from langflow.template.field.base import TemplateField from langflow.template.frontend_node.base import FrontendNode -def build_template( +def build_file_field( suffixes: list, fileTypes: list, name: str = "file_path" ) -> TemplateField: """Build a template field for a document loader.""" @@ -19,32 +19,34 @@ def build_template( class DocumentLoaderFrontNode(FrontendNode): file_path_templates = { - "AirbyteJSONLoader": build_template(suffixes=[".json"], fileTypes=["json"]), - "CoNLLULoader": build_template(suffixes=[".csv"], fileTypes=["csv"]), - "CSVLoader": build_template(suffixes=[".csv"], fileTypes=["csv"]), - "UnstructuredEmailLoader": build_template(suffixes=[".eml"], fileTypes=["eml"]), - "EverNoteLoader": build_template(suffixes=[".xml"], fileTypes=["xml"]), - "FacebookChatLoader": build_template(suffixes=[".json"], fileTypes=["json"]), - "GutenbergLoader": build_template(suffixes=[".txt"], fileTypes=["txt"]), - "BSHTMLLoader": build_template(suffixes=[".html"], fileTypes=["html"]), - "UnstructuredHTMLLoader": build_template( + "AirbyteJSONLoader": build_file_field(suffixes=[".json"], fileTypes=["json"]), + "CoNLLULoader": build_file_field(suffixes=[".csv"], fileTypes=["csv"]), + "CSVLoader": build_file_field(suffixes=[".csv"], fileTypes=["csv"]), + "UnstructuredEmailLoader": build_file_field( + suffixes=[".eml"], fileTypes=["eml"] + ), + "EverNoteLoader": build_file_field(suffixes=[".xml"], fileTypes=["xml"]), + "FacebookChatLoader": build_file_field(suffixes=[".json"], fileTypes=["json"]), + "GutenbergLoader": build_file_field(suffixes=[".txt"], fileTypes=["txt"]), + "BSHTMLLoader": build_file_field(suffixes=[".html"], fileTypes=["html"]), + "UnstructuredHTMLLoader": build_file_field( suffixes=[".html"], fileTypes=["html"] ), - "UnstructuredImageLoader": build_template( + "UnstructuredImageLoader": build_file_field( suffixes=[".jpg", ".jpeg", ".png", ".gif", ".bmp"], fileTypes=["jpg", "jpeg", "png", "gif", "bmp"], ), - "UnstructuredMarkdownLoader": build_template( + "UnstructuredMarkdownLoader": build_file_field( suffixes=[".md"], fileTypes=["md"] ), - "PyPDFLoader": build_template(suffixes=[".pdf"], fileTypes=["pdf"]), - "UnstructuredPowerPointLoader": build_template( + "PyPDFLoader": build_file_field(suffixes=[".pdf"], fileTypes=["pdf"]), + "UnstructuredPowerPointLoader": build_file_field( suffixes=[".pptx", ".ppt"], fileTypes=["pptx", "ppt"] ), - "SRTLoader": build_template(suffixes=[".srt"], fileTypes=["srt"]), - "TelegramChatLoader": build_template(suffixes=[".json"], fileTypes=["json"]), - "TextLoader": build_template(suffixes=[".txt"], fileTypes=["txt"]), - "UnstructuredWordDocumentLoader": build_template( + "SRTLoader": build_file_field(suffixes=[".srt"], fileTypes=["srt"]), + "TelegramChatLoader": build_file_field(suffixes=[".json"], fileTypes=["json"]), + "TextLoader": build_file_field(suffixes=[".txt"], fileTypes=["txt"]), + "UnstructuredWordDocumentLoader": build_file_field( suffixes=[".docx", ".doc"], fileTypes=["docx", "doc"] ), } @@ -90,3 +92,22 @@ class DocumentLoaderFrontNode(FrontendNode): display_name="glob", ) ) + # add a metadata field of type dict + self.template.add_field( + TemplateField( + field_type="code", + required=True, + show=True, + name="metadata", + value="{}", + display_name="Metadata", + multiline=False, + ) + ) + + @staticmethod + def format_field(field: TemplateField, name: str | None = None) -> None: + FrontendNode.format_field(field, name) + if field.name == "metadata": + field.show = True + field.advanced = False