From cd2944bac60f1d71d4f6b4f969de19e9c40e42cc Mon Sep 17 00:00:00 2001 From: Alexandre Henrique Date: Wed, 31 May 2023 15:40:09 -0300 Subject: [PATCH] added add_extra_fields method to textsplitters --- src/backend/langflow/interface/chains/base.py | 1 - .../interface/document_loaders/base.py | 10 +- .../langflow/interface/text_splitters/base.py | 6 +- .../template/frontend_node/documentloaders.py | 114 ++++++------------ .../template/frontend_node/textsplitters.py | 82 ++++++------- 5 files changed, 83 insertions(+), 130 deletions(-) diff --git a/src/backend/langflow/interface/chains/base.py b/src/backend/langflow/interface/chains/base.py index ad66f07b2..1b95d4118 100644 --- a/src/backend/langflow/interface/chains/base.py +++ b/src/backend/langflow/interface/chains/base.py @@ -10,7 +10,6 @@ from langflow.utils.util import build_template_from_class # Assuming necessary imports for Field, Template, and FrontendNode classes - class ChainCreator(LangChainTypeCreator): type_name: str = "chains" diff --git a/src/backend/langflow/interface/document_loaders/base.py b/src/backend/langflow/interface/document_loaders/base.py index 0a0efbb71..5219fbd13 100644 --- a/src/backend/langflow/interface/document_loaders/base.py +++ b/src/backend/langflow/interface/document_loaders/base.py @@ -1,30 +1,28 @@ from typing import Dict, List, Optional, Type from langflow.interface.base import LangChainTypeCreator -from langflow.template.field.base import TemplateField from langflow.template.frontend_node.documentloaders import DocumentLoaderFrontNode from langflow.interface.custom_lists import documentloaders_type_to_cls_dict from langflow.settings import settings from langflow.utils.logger import logger from langflow.utils.util import build_template_from_class + class DocumentLoaderCreator(LangChainTypeCreator): type_name: str = "documentloaders" @property - def frontend_node_class(self) -> Type[DocumentLoaderFrontNode]: + def frontend_node_class(self) -> Type[DocumentLoaderFrontNode]: return DocumentLoaderFrontNode @property def type_to_loader_dict(self) -> Dict: return documentloaders_type_to_cls_dict - + def get_signature(self, name: str) -> Optional[Dict]: """Get the signature of a document loader.""" try: - return build_template_from_class( - name, documentloaders_type_to_cls_dict - ) + return build_template_from_class(name, documentloaders_type_to_cls_dict) except ValueError as exc: raise ValueError(f"Documment Loader {name} not found") from exc except AttributeError as exc: diff --git a/src/backend/langflow/interface/text_splitters/base.py b/src/backend/langflow/interface/text_splitters/base.py index 11dfaca88..203f30086 100644 --- a/src/backend/langflow/interface/text_splitters/base.py +++ b/src/backend/langflow/interface/text_splitters/base.py @@ -1,7 +1,7 @@ from typing import Dict, List, Optional, Type from langflow.interface.base import LangChainTypeCreator -from langflow.template.frontend_node.textsplitters import VectorStoreFrontendNode +from langflow.template.frontend_node.textsplitters import TextSplittersFrontendNode from langflow.interface.custom_lists import textsplitter_type_to_cls_dict from langflow.settings import settings from langflow.utils.logger import logger @@ -12,8 +12,8 @@ class TextSplitterCreator(LangChainTypeCreator): type_name: str = "textsplitters" @property - def frontend_node_class(self) -> Type[VectorStoreFrontendNode]: - return VectorStoreFrontendNode + def frontend_node_class(self) -> Type[TextSplittersFrontendNode]: + return TextSplittersFrontendNode @property def type_to_loader_dict(self) -> Dict: diff --git a/src/backend/langflow/template/frontend_node/documentloaders.py b/src/backend/langflow/template/frontend_node/documentloaders.py index 8d924f0fe..48c07a188 100644 --- a/src/backend/langflow/template/frontend_node/documentloaders.py +++ b/src/backend/langflow/template/frontend_node/documentloaders.py @@ -1,49 +1,32 @@ -from typing import Dict, List, Optional, Type - from langflow.template.field.base import TemplateField from langflow.template.frontend_node.base import FrontendNode -class DocumentLoaderFrontNode(FrontendNode): +class DocumentLoaderFrontNode(FrontendNode): @staticmethod - def build_template(suffixes: list, fileTypes: list, name: str = "file_path" - ) -> TemplateField: - """Build a template field for a document loader.""" - return TemplateField( - field_type="file", - required=True, - show=True, - name=name, - value="", - suffixes=suffixes, - fileTypes=fileTypes, - ) - + def build_template( + suffixes: list, fileTypes: list, name: str = "file_path" + ) -> TemplateField: + """Build a template field for a document loader.""" + return TemplateField( + field_type="file", + required=True, + show=True, + name=name, + value="", + suffixes=suffixes, + fileTypes=fileTypes, + ) + file_path_templates = { - "AirbyteJSONLoader": build_template( - suffixes=[".json"], fileTypes=["json"] - ), - "CoNLLULoader": build_template( - suffixes=[".csv"], fileTypes=["csv"] - ), - "CSVLoader": build_template( - suffixes=[".csv"], fileTypes=["csv"] - ), - "UnstructuredEmailLoader": build_template( - suffixes=[".eml"], fileTypes=["eml"] - ), - "EverNoteLoader": build_template( - suffixes=[".xml"], fileTypes=["xml"] - ), - "FacebookChatLoader": build_template( - suffixes=[".json"], fileTypes=["json"] - ), - "GutenbergLoader": build_template( - suffixes=[".txt"], fileTypes=["txt"] - ), - "BSHTMLLoader": build_template( - suffixes=[".html"], fileTypes=["html"] - ), + "AirbyteJSONLoader": build_template(suffixes=[".json"], fileTypes=["json"]), + "CoNLLULoader": build_template(suffixes=[".csv"], fileTypes=["csv"]), + "CSVLoader": build_template(suffixes=[".csv"], fileTypes=["csv"]), + "UnstructuredEmailLoader": build_template(suffixes=[".eml"], fileTypes=["eml"]), + "EverNoteLoader": build_template(suffixes=[".xml"], fileTypes=["xml"]), + "FacebookChatLoader": build_template(suffixes=[".json"], fileTypes=["json"]), + "GutenbergLoader": build_template(suffixes=[".txt"], fileTypes=["txt"]), + "BSHTMLLoader": build_template(suffixes=[".html"], fileTypes=["html"]), "UnstructuredHTMLLoader": build_template( suffixes=[".html"], fileTypes=["html"] ), @@ -54,28 +37,21 @@ class DocumentLoaderFrontNode(FrontendNode): "UnstructuredMarkdownLoader": build_template( suffixes=[".md"], fileTypes=["md"] ), - "PyPDFLoader": build_template( - suffixes=[".pdf"], fileTypes=["pdf"] - ), + "PyPDFLoader": build_template(suffixes=[".pdf"], fileTypes=["pdf"]), "UnstructuredPowerPointLoader": build_template( suffixes=[".pptx", ".ppt"], fileTypes=["pptx", "ppt"] ), - "SRTLoader": build_template( - suffixes=[".srt"], fileTypes=["srt"] - ), - "TelegramChatLoader": build_template( - suffixes=[".json"], fileTypes=["json"] - ), - "TextLoader": build_template( - suffixes=[".txt"], fileTypes=["txt"] - ), + "SRTLoader": build_template(suffixes=[".srt"], fileTypes=["srt"]), + "TelegramChatLoader": build_template(suffixes=[".json"], fileTypes=["json"]), + "TextLoader": build_template(suffixes=[".txt"], fileTypes=["txt"]), "UnstructuredWordDocumentLoader": build_template( suffixes=[".docx", ".doc"], fileTypes=["docx", "doc"] ), } - - def add_extra_fields(self) -> None: - if self.template.type_name in self.file_path_templates: + + def add_extra_fields(self) -> None: + name = None + if self.template.type_name in self.file_path_templates: self.template.add_field(self.file_path_templates[self.template.type_name]) elif self.template.type_name in { "WebBaseLoader", @@ -85,37 +61,19 @@ class DocumentLoaderFrontNode(FrontendNode): "IFixitLoader", "IMSDbLoader", }: - self.template.add_field( - TemplateField( - field_type="str", - required=True, - show=True, - name="web_path", - value="", - display_name="Web Page", - ) - ) + name = "web_path" elif self.template.type_name in {"GitbookLoader"}: - self.template.add_field( - TemplateField( - field_type="str", - required=True, - show=True, - name="web_page", - value="", - display_name="Web Page", - ) - ) + name = "web_page" elif self.template.type_name in {"ReadTheDocsLoader"}: + name = "path" + if name: self.template.add_field( TemplateField( field_type="str", required=True, show=True, - name="path", + name=name, value="", display_name="Web Page", ) ) - - \ No newline at end of file diff --git a/src/backend/langflow/template/frontend_node/textsplitters.py b/src/backend/langflow/template/frontend_node/textsplitters.py index 49d7d67f3..f0bed10b0 100644 --- a/src/backend/langflow/template/frontend_node/textsplitters.py +++ b/src/backend/langflow/template/frontend_node/textsplitters.py @@ -1,46 +1,44 @@ -from typing import Dict, List, Optional, Type - from langflow.template.field.base import TemplateField from langflow.template.frontend_node.base import FrontendNode -class VectorStoreFrontendNode(FrontendNode): - + +class TextSplittersFrontendNode(FrontendNode): def add_extra_fields(self) -> None: - pass - - """ - signature["template"]["documents"] = { - "type": "BaseLoader", - "required": True, - "show": True, - "name": "documents", - } - - signature["template"]["separator"] = { - "type": "str", - "required": True, - "show": True, - "value": ".", - "name": "separator", - "display_name": "Separator", - } - - signature["template"]["chunk_size"] = { - "type": "int", - "required": True, - "show": True, - "value": 4000, - "name": "chunk_size", - "display_name": "Chunk Size", - } - - signature["template"]["chunk_overlap"] = { - "type": "int", - "required": True, - "show": True, - "value": 200, - "name": "chunk_overlap", - "display_name": "Chunk Overlap", - } - """ - \ No newline at end of file + self.template.add_field( + TemplateField( + field_type="BaseLoader", + required=True, + show=True, + name="documents", + ) + ) + self.template.add_field( + TemplateField( + field_type="str", + required=True, + show=True, + value=".", + name="separator", + display_name="Separator", + ) + ) + self.template.add_field( + TemplateField( + field_type="int", + required=True, + show=True, + value=1000, + name="chunk_size", + display_name="Chunk Size", + ) + ) + self.template.add_field( + TemplateField( + field_type="int", + required=True, + show=True, + value=200, + name="chunk_overlap", + display_name="Chunk Overlap", + ) + )