diff --git a/src/backend/langflow/config.yaml b/src/backend/langflow/config.yaml index 5e02e2984..a8c1643f4 100644 --- a/src/backend/langflow/config.yaml +++ b/src/backend/langflow/config.yaml @@ -53,7 +53,7 @@ llms: # - AzureOpenAI # - AzureChatOpenAI - ChatOpenAI - - LlamaCpp + - LlamaCpp - CTransformers - Cohere - Anthropic @@ -69,7 +69,7 @@ prompts: - ZeroShotPrompt textsplitters: - CharacterTextSplitter - # - RecursiveCharacterTextSplitter + - RecursiveCharacterTextSplitter # - LatexTextSplitter # - PythonCodeTextSplitter toolkits: diff --git a/src/backend/langflow/interface/document_loaders/base.py b/src/backend/langflow/interface/document_loaders/base.py index cb02ebe28..5219fbd13 100644 --- a/src/backend/langflow/interface/document_loaders/base.py +++ b/src/backend/langflow/interface/document_loaders/base.py @@ -1,30 +1,20 @@ -from typing import Dict, List, Optional +from typing import Dict, List, Optional, Type from langflow.interface.base import LangChainTypeCreator +from langflow.template.frontend_node.documentloaders import DocumentLoaderFrontNode from langflow.interface.custom_lists import documentloaders_type_to_cls_dict from langflow.settings import settings from langflow.utils.logger import logger from langflow.utils.util import build_template_from_class -def build_file_path_template( - suffixes: list, fileTypes: list, name: str = "file_path" -) -> Dict: - """Build a file path template for a document loader.""" - return { - "type": "file", - "required": True, - "show": True, - "name": name, - "value": "", - "suffixes": suffixes, - "fileTypes": fileTypes, - } - - class DocumentLoaderCreator(LangChainTypeCreator): type_name: str = "documentloaders" + @property + def frontend_node_class(self) -> Type[DocumentLoaderFrontNode]: + return DocumentLoaderFrontNode + @property def type_to_loader_dict(self) -> Dict: return documentloaders_type_to_cls_dict @@ -32,106 +22,7 @@ class DocumentLoaderCreator(LangChainTypeCreator): def get_signature(self, name: str) -> Optional[Dict]: """Get the signature of a document loader.""" try: - signature = build_template_from_class( - name, documentloaders_type_to_cls_dict - ) - - file_path_templates = { - "AirbyteJSONLoader": build_file_path_template( - suffixes=[".json"], fileTypes=["json"] - ), - "CoNLLULoader": build_file_path_template( - suffixes=[".csv"], fileTypes=["csv"] - ), - "CSVLoader": build_file_path_template( - suffixes=[".csv"], fileTypes=["csv"] - ), - "UnstructuredEmailLoader": build_file_path_template( - suffixes=[".eml"], fileTypes=["eml"] - ), - "EverNoteLoader": build_file_path_template( - suffixes=[".xml"], fileTypes=["xml"] - ), - "FacebookChatLoader": build_file_path_template( - suffixes=[".json"], fileTypes=["json"] - ), - "GutenbergLoader": build_file_path_template( - suffixes=[".txt"], fileTypes=["txt"] - ), - "BSHTMLLoader": build_file_path_template( - suffixes=[".html"], fileTypes=["html"] - ), - "UnstructuredHTMLLoader": build_file_path_template( - suffixes=[".html"], fileTypes=["html"] - ), - "UnstructuredImageLoader": build_file_path_template( - suffixes=[".jpg", ".jpeg", ".png", ".gif", ".bmp"], - fileTypes=["jpg", "jpeg", "png", "gif", "bmp"], - ), - "UnstructuredMarkdownLoader": build_file_path_template( - suffixes=[".md"], fileTypes=["md"] - ), - "PyPDFLoader": build_file_path_template( - suffixes=[".pdf"], fileTypes=["pdf"] - ), - "UnstructuredPowerPointLoader": build_file_path_template( - suffixes=[".pptx", ".ppt"], fileTypes=["pptx", "ppt"] - ), - "SRTLoader": build_file_path_template( - suffixes=[".srt"], fileTypes=["srt"] - ), - "TelegramChatLoader": build_file_path_template( - suffixes=[".json"], fileTypes=["json"] - ), - "TextLoader": build_file_path_template( - suffixes=[".txt"], fileTypes=["txt"] - ), - "UnstructuredWordDocumentLoader": build_file_path_template( - suffixes=[".docx", ".doc"], fileTypes=["docx", "doc"] - ), - "SlackDirectoryLoader": build_file_path_template( - suffixes=[".zip"], fileTypes=["zip"] - ), - } - - if name in file_path_templates: - signature["template"]["file_path"] = file_path_templates[name] - elif name in { - "WebBaseLoader", - "AZLyricsLoader", - "CollegeConfidentialLoader", - "HNLoader", - "IFixitLoader", - "IMSDbLoader", - }: - signature["template"]["web_path"] = { - "type": "str", - "required": True, - "show": True, - "name": "web_path", - "value": "", - "display_name": "Web Page", - } - elif name in {"GitbookLoader"}: - signature["template"]["web_page"] = { - "type": "str", - "required": True, - "show": True, - "name": "web_page", - "value": "", - "display_name": "Web Page", - } - elif name in {"ReadTheDocsLoader", "NotionDirectoryLoader"}: - signature["template"]["path"] = { - "type": "str", - "required": True, - "show": True, - "name": "path", - "value": "", - "display_name": "Web Page", - } - - return signature + return build_template_from_class(name, documentloaders_type_to_cls_dict) except ValueError as exc: raise ValueError(f"Documment Loader {name} not found") from exc except AttributeError as exc: diff --git a/src/backend/langflow/interface/text_splitters/base.py b/src/backend/langflow/interface/text_splitters/base.py index fbacae4f9..203f30086 100644 --- a/src/backend/langflow/interface/text_splitters/base.py +++ b/src/backend/langflow/interface/text_splitters/base.py @@ -1,6 +1,7 @@ -from typing import Dict, List, Optional +from typing import Dict, List, Optional, Type from langflow.interface.base import LangChainTypeCreator +from langflow.template.frontend_node.textsplitters import TextSplittersFrontendNode from langflow.interface.custom_lists import textsplitter_type_to_cls_dict from langflow.settings import settings from langflow.utils.logger import logger @@ -10,6 +11,10 @@ from langflow.utils.util import build_template_from_class class TextSplitterCreator(LangChainTypeCreator): type_name: str = "textsplitters" + @property + def frontend_node_class(self) -> Type[TextSplittersFrontendNode]: + return TextSplittersFrontendNode + @property def type_to_loader_dict(self) -> Dict: return textsplitter_type_to_cls_dict @@ -17,43 +22,7 @@ class TextSplitterCreator(LangChainTypeCreator): def get_signature(self, name: str) -> Optional[Dict]: """Get the signature of a text splitter.""" try: - signature = build_template_from_class(name, textsplitter_type_to_cls_dict) - - signature["template"]["documents"] = { - "type": "BaseLoader", - "required": True, - "show": True, - "name": "documents", - } - - signature["template"]["separator"] = { - "type": "str", - "required": True, - "show": True, - "value": ".", - "name": "separator", - "display_name": "Separator", - } - - signature["template"]["chunk_size"] = { - "type": "int", - "required": True, - "show": True, - "value": 1000, - "name": "chunk_size", - "display_name": "Chunk Size", - } - - signature["template"]["chunk_overlap"] = { - "type": "int", - "required": True, - "show": True, - "value": 200, - "name": "chunk_overlap", - "display_name": "Chunk Overlap", - } - - return signature + return build_template_from_class(name, textsplitter_type_to_cls_dict) except ValueError as exc: raise ValueError(f"Text Splitter {name} not found") from exc except AttributeError as exc: diff --git a/src/backend/langflow/template/frontend_node/__init__.py b/src/backend/langflow/template/frontend_node/__init__.py index 1aa946d41..c36234364 100644 --- a/src/backend/langflow/template/frontend_node/__init__.py +++ b/src/backend/langflow/template/frontend_node/__init__.py @@ -7,6 +7,8 @@ from langflow.template.frontend_node import ( prompts, tools, vectorstores, + documentloaders, + textsplitters, ) __all__ = [ @@ -18,4 +20,6 @@ __all__ = [ "llms", "prompts", "vectorstores", + "documentloaders", + "textsplitters", ] diff --git a/src/backend/langflow/template/frontend_node/documentloaders.py b/src/backend/langflow/template/frontend_node/documentloaders.py new file mode 100644 index 000000000..48c07a188 --- /dev/null +++ b/src/backend/langflow/template/frontend_node/documentloaders.py @@ -0,0 +1,79 @@ +from langflow.template.field.base import TemplateField +from langflow.template.frontend_node.base import FrontendNode + + +class DocumentLoaderFrontNode(FrontendNode): + @staticmethod + def build_template( + suffixes: list, fileTypes: list, name: str = "file_path" + ) -> TemplateField: + """Build a template field for a document loader.""" + return TemplateField( + field_type="file", + required=True, + show=True, + name=name, + value="", + suffixes=suffixes, + fileTypes=fileTypes, + ) + + file_path_templates = { + "AirbyteJSONLoader": build_template(suffixes=[".json"], fileTypes=["json"]), + "CoNLLULoader": build_template(suffixes=[".csv"], fileTypes=["csv"]), + "CSVLoader": build_template(suffixes=[".csv"], fileTypes=["csv"]), + "UnstructuredEmailLoader": build_template(suffixes=[".eml"], fileTypes=["eml"]), + "EverNoteLoader": build_template(suffixes=[".xml"], fileTypes=["xml"]), + "FacebookChatLoader": build_template(suffixes=[".json"], fileTypes=["json"]), + "GutenbergLoader": build_template(suffixes=[".txt"], fileTypes=["txt"]), + "BSHTMLLoader": build_template(suffixes=[".html"], fileTypes=["html"]), + "UnstructuredHTMLLoader": build_template( + suffixes=[".html"], fileTypes=["html"] + ), + "UnstructuredImageLoader": build_template( + suffixes=[".jpg", ".jpeg", ".png", ".gif", ".bmp"], + fileTypes=["jpg", "jpeg", "png", "gif", "bmp"], + ), + "UnstructuredMarkdownLoader": build_template( + suffixes=[".md"], fileTypes=["md"] + ), + "PyPDFLoader": build_template(suffixes=[".pdf"], fileTypes=["pdf"]), + "UnstructuredPowerPointLoader": build_template( + suffixes=[".pptx", ".ppt"], fileTypes=["pptx", "ppt"] + ), + "SRTLoader": build_template(suffixes=[".srt"], fileTypes=["srt"]), + "TelegramChatLoader": build_template(suffixes=[".json"], fileTypes=["json"]), + "TextLoader": build_template(suffixes=[".txt"], fileTypes=["txt"]), + "UnstructuredWordDocumentLoader": build_template( + suffixes=[".docx", ".doc"], fileTypes=["docx", "doc"] + ), + } + + def add_extra_fields(self) -> None: + name = None + if self.template.type_name in self.file_path_templates: + self.template.add_field(self.file_path_templates[self.template.type_name]) + elif self.template.type_name in { + "WebBaseLoader", + "AZLyricsLoader", + "CollegeConfidentialLoader", + "HNLoader", + "IFixitLoader", + "IMSDbLoader", + }: + name = "web_path" + elif self.template.type_name in {"GitbookLoader"}: + name = "web_page" + elif self.template.type_name in {"ReadTheDocsLoader"}: + name = "path" + if name: + self.template.add_field( + TemplateField( + field_type="str", + required=True, + show=True, + name=name, + value="", + display_name="Web Page", + ) + ) diff --git a/src/backend/langflow/template/frontend_node/textsplitters.py b/src/backend/langflow/template/frontend_node/textsplitters.py new file mode 100644 index 000000000..03880379d --- /dev/null +++ b/src/backend/langflow/template/frontend_node/textsplitters.py @@ -0,0 +1,49 @@ +from langflow.template.field.base import TemplateField +from langflow.template.frontend_node.base import FrontendNode + + +class TextSplittersFrontendNode(FrontendNode): + def add_extra_fields(self) -> None: + self.template.add_field( + TemplateField( + field_type="BaseLoader", + required=True, + show=True, + name="documents", + ) + ) + name = "separator" + if self.template.type_name == "CharacterTextSplitter": + name = "separator" + elif self.template.type_name == "RecursiveCharacterTextSplitter": + name = "separators" + self.template.add_field( + TemplateField( + field_type="str", + required=True, + show=True, + value=".", + name=name, + display_name="Separator", + ) + ) + self.template.add_field( + TemplateField( + field_type="int", + required=True, + show=True, + value=1000, + name="chunk_size", + display_name="Chunk Size", + ) + ) + self.template.add_field( + TemplateField( + field_type="int", + required=True, + show=True, + value=200, + name="chunk_overlap", + display_name="Chunk Overlap", + ) + )