Delete document loader components

2024-01-31 15:50:24 -03:00 · 2024-01-31 15:50:24 -03:00 · cbf80c9fcd
commit cbf80c9fcd
parent c54c7246b6
20 changed files with 0 additions and 635 deletions
--- a/src/backend/langflow/components/documentloaders/AZLyricsLoader.py
+++ b/src/backend/langflow/components/documentloaders/AZLyricsLoader.py
@ -1,26 +0,0 @@
-from langflow import CustomComponent
-from langflow.field_typing import Document
-from typing import List, Optional, Dict
-from langchain_community.document_loaders.azlyrics import AZLyricsLoader
-
-
-class AZLyricsLoaderComponent(CustomComponent):
-    display_name = "AZLyricsLoader"
-    description = "Load `AZLyrics` webpages."
-    documentation = "https://python.langchain.com/docs/modules/data_connection/document_loaders/integrations/azlyrics"
-
-    def build_config(self):
-        return {
-            "metadata": {"display_name": "Metadata", "field_type": "dict", "value": {}, "show": True},
-            "web_path": {"display_name": "Web Page", "type": "str", "required": True, "show": True},
-        }
-
-    def build(self, metadata: Optional[Dict] = None, web_path: str = "") -> List[Document]:
-        documents = AZLyricsLoader(web_path=web_path).load()
-        if metadata:
-            for document in documents:
-                if not document.metadata:
-                    document.metadata = metadata
-                else:
-                    document.metadata.update(metadata)
-        return documents
--- a/src/backend/langflow/components/documentloaders/AirbyteJSONLoader.py
+++ b/src/backend/langflow/components/documentloaders/AirbyteJSONLoader.py
@ -1,37 +0,0 @@
-from langflow import CustomComponent
-from langflow.field_typing import Document
-from typing import List, Optional, Dict
-from langchain_community.document_loaders.airbyte_json import AirbyteJSONLoader
-
-
-class AirbyteJSONLoaderComponent(CustomComponent):
-    display_name = "AirbyteJSONLoader"
-    description = "Load local `Airbyte` json files."
-    documentation = (
-        "https://python.langchain.com/docs/modules/data_connection/document_loaders/integrations/airbyte_json"
-    )
-
-    def build_config(self):
-        return {
-            "file_path": {
-                "display_name": "File Path",
-                "file_types": [".json"],
-                "required": True,
-                "field_type": "file",
-            },
-            "metadata": {
-                "display_name": "Metadata",
-                "field_type": "dict",
-                "required": False,
-            },
-        }
-
-    def build(self, file_path: str, metadata: Optional[Dict] = None) -> List[Document]:
-        documents = AirbyteJSONLoader(file_path=file_path).load()
-        if metadata:
-            for document in documents:
-                if not document.metadata:
-                    document.metadata = metadata
-                else:
-                    document.metadata.update(metadata)
-        return documents
--- a/src/backend/langflow/components/documentloaders/BSHTMLLoader.py
+++ b/src/backend/langflow/components/documentloaders/BSHTMLLoader.py
@ -1,34 +0,0 @@
-from langflow import CustomComponent
-from langflow.field_typing import Document
-from typing import Optional, Dict
-
-
-class BSHTMLLoaderComponent(CustomComponent):
-    display_name = "BSHTMLLoader"
-    description = "Load `HTML` files and parse them with `beautiful soup`."
-    documentation = "https://python.langchain.com/docs/modules/data_connection/document_loaders/how_to/html"
-
-    def build_config(self):
-        return {
-            "file_path": {
-                "display_name": "File Path",
-                "required": True,
-                "show": True,
-                "type": "file",
-                "suffixes": [".html"],
-                "file_types": ["html"],
-                "field_type": "file",
-            },
-            "metadata": {
-                "display_name": "Metadata",
-                "required": False,
-                "show": True,
-                "field_type": "dict",
-            },
-        }
-
-    def build(self, file_path: str, metadata: Optional[Dict] = None) -> Document:
-        # Assuming there is a class or function named BSHTMLLoader that takes a file path and optional metadata
-        # and returns a Document object after parsing HTML. Since the actual implementation of BSHTMLLoader is not provided,
-        # this is a placeholder and should be replaced with the actual logic.
-        raise NotImplementedError("The BSHTMLLoader function or class needs to be implemented.")
--- a/src/backend/langflow/components/documentloaders/CSVLoader.py
+++ b/src/backend/langflow/components/documentloaders/CSVLoader.py
@ -1,34 +0,0 @@
-from langflow import CustomComponent
-from typing import List
-from langchain_community.document_loaders.csv_loader import CSVLoader
-from langchain.docstore.document import Document
-
-
-class CSVLoaderComponent(CustomComponent):
-    display_name = "CSVLoader"
-    description = "Load a `CSV` file into a list of Documents."
-
-    def build_config(self):
-        return {
-            "file_path": {
-                "display_name": "File Path",
-                "required": True,
-                "suffixes": [".csv"],
-                "file_types": ["csv"],
-                "field_type": "file",
-            },
-            "metadata": {
-                "display_name": "Metadata",
-                "required": False,
-            },
-        }
-
-    def build(self, file_path: str, metadata: dict) -> List[Document]:
-        documents = CSVLoader(file_path=file_path).load()
-        if metadata:
-            for document in documents:
-                if not document.metadata:
-                    document.metadata = metadata
-                else:
-                    document.metadata.update(metadata)
-        return documents
--- a/src/backend/langflow/components/documentloaders/CoNLLULoader.py
+++ b/src/backend/langflow/components/documentloaders/CoNLLULoader.py
@ -1,36 +0,0 @@
-from typing import List
-from langflow import CustomComponent
-from langchain.docstore.document import Document
-from langchain_community.document_loaders.conllu import CoNLLULoader
-
-
-class CoNLLULoaderComponent(CustomComponent):
-    display_name = "CoNLLULoader"
-    description = "Load `CoNLL-U` files."
-    documentation = "https://python.langchain.com/docs/modules/data_connection/document_loaders/integrations/conll-u"
-
-    def build_config(self):
-        return {
-            "file_path": {
-                "display_name": "File Path",
-                "required": True,
-                "suffixes": [".conllu"],
-                "file_types": ["conllu"],
-                "field_type": "file",
-            },
-            "metadata": {
-                "display_name": "Metadata",
-                "field_type": "dict",
-                "required": False,
-            },
-        }
-
-    def build(self, file_path: str, metadata: dict) -> List[Document]:
-        documents = CoNLLULoader(file_path=file_path).load()
-        if metadata:
-            for document in documents:
-                if not document.metadata:
-                    document.metadata = metadata
-                else:
-                    document.metadata.update(metadata)
-        return documents
--- a/src/backend/langflow/components/documentloaders/CollegeConfidentialLoader.py
+++ b/src/backend/langflow/components/documentloaders/CollegeConfidentialLoader.py
@ -1,28 +0,0 @@
-from langflow import CustomComponent
-from langchain.docstore.document import Document
-from typing import Optional, List
-from langchain_community.document_loaders.college_confidential import CollegeConfidentialLoader
-
-
-class CollegeConfidentialLoaderComponent(CustomComponent):
-    display_name = "CollegeConfidentialLoader"
-    description = "Load `College Confidential` webpages."
-    documentation = (
-        "https://python.langchain.com/docs/modules/data_connection/document_loaders/integrations/college_confidential"
-    )
-
-    def build_config(self):
-        return {
-            "metadata": {"display_name": "Metadata", "values": {}},
-            "web_path": {"display_name": "Web Page", "required": True},
-        }
-
-    def build(self, web_path: str, metadata: Optional[dict] = {}) -> List[Document]:
-        documents = CollegeConfidentialLoader(web_path=web_path).load()
-        if metadata:
-            for document in documents:
-                if not document.metadata:
-                    document.metadata = metadata
-                else:
-                    document.metadata.update(metadata)
-        return documents
--- a/src/backend/langflow/components/documentloaders/EverNoteLoader.py
+++ b/src/backend/langflow/components/documentloaders/EverNoteLoader.py
@ -1,38 +0,0 @@
-from langflow import CustomComponent
-from langflow.field_typing import Document
-from typing import List, Optional, Dict
-from langchain_community.document_loaders.evernote import EverNoteLoader
-
-
-class EverNoteLoaderComponent(CustomComponent):
-    display_name = "EverNoteLoader"
-    description = "Load from `EverNote`."
-    documentation = "https://python.langchain.com/docs/modules/data_connection/document_loaders/integrations/evernote"
-
-    def build_config(self):
-        return {
-            "file_path": {
-                "display_name": "File Path",
-                "required": True,
-                "show": True,
-                "type": "file",
-                "file_types": [".xml"],
-                "field_type": "file",
-            },
-            "metadata": {
-                "display_name": "Metadata",
-                "required": False,
-                "show": True,
-                "field_type": "dict",
-            },
-        }
-
-    def build(self, file_path: str, metadata: Optional[Dict] = None) -> List[Document]:
-        documents = EverNoteLoader(file_path=file_path).load()
-        if metadata:
-            for document in documents:
-                if not document.metadata:
-                    document.metadata = metadata
-                else:
-                    document.metadata.update(metadata)
-        return documents
--- a/src/backend/langflow/components/documentloaders/FacebookChatLoader.py
+++ b/src/backend/langflow/components/documentloaders/FacebookChatLoader.py
@ -1,37 +0,0 @@
-from langflow import CustomComponent
-from langchain.docstore.document import Document
-from typing import List, Optional, Dict
-from langchain_community.document_loaders.facebook_chat import FacebookChatLoader
-
-
-class FacebookChatLoaderComponent(CustomComponent):
-    display_name = "FacebookChatLoader"
-    description = "Load `Facebook Chat` messages directory dump."
-    documentation = (
-        "https://python.langchain.com/docs/modules/data_connection/document_loaders/integrations/facebook_chat"
-    )
-
-    def build_config(self):
-        return {
-            "file_path": {
-                "display_name": "File Path",
-                "required": True,
-                "file_types": [".json"],
-                "field_type": "file",
-            },
-            "metadata": {
-                "display_name": "Metadata",
-                "required": False,
-                "field_type": "dict",
-            },
-        }
-
-    def build(self, file_path: str, metadata: Optional[Dict] = None) -> List[Document]:
-        documents = FacebookChatLoader(path=file_path).load()
-        if metadata:
-            for document in documents:
-                if not document.metadata:
-                    document.metadata = metadata
-                else:
-                    document.metadata.update(metadata)
-        return documents
--- a/src/backend/langflow/components/documentloaders/GitbookLoader.py
+++ b/src/backend/langflow/components/documentloaders/GitbookLoader.py
@ -1,32 +0,0 @@
-from langflow import CustomComponent
-from langflow.field_typing import Document
-from typing import List, Optional, Dict
-from langchain_community.document_loaders.gitbook import GitbookLoader
-
-
-class GitbookLoaderComponent(CustomComponent):
-    display_name = "GitbookLoader"
-    description = "Load `GitBook` data."
-
-    def build_config(self):
-        return {
-            "metadata": {
-                "display_name": "Metadata",
-                "field_type": "dict",
-                "value": {},
-            },
-            "web_page": {
-                "display_name": "Web Page",
-                "required": True,
-            },
-        }
-
-    def build(self, metadata: Optional[Dict] = None, web_page: str = "") -> List[Document]:
-        documents = GitbookLoader(web_page=web_page).load()
-        if metadata:
-            for document in documents:
-                if not document.metadata:
-                    document.metadata = metadata
-                else:
-                    document.metadata.update(metadata)
-        return documents
--- a/src/backend/langflow/components/documentloaders/HNLoader.py
+++ b/src/backend/langflow/components/documentloaders/HNLoader.py
@ -1,29 +0,0 @@
-from langflow import CustomComponent
-from typing import List, Optional, Dict
-from langchain_community.document_loaders.hn import HNLoader
-from langflow.field_typing import Document
-
-
-class HNLoaderComponent(CustomComponent):
-    display_name = "HNLoader"
-    description = "Load `Hacker News` data."
-
-    def build_config(self):
-        return {
-            "metadata": {"display_name": "Metadata", "value": {}, "required": False, "field_type": "dict"},
-            "web_path": {"display_name": "Web Page", "required": True},
-        }
-
-    def build(
-        self,
-        web_path: str,
-        metadata: Optional[Dict] = None,
-    ) -> List[Document]:
-        documents = HNLoader(web_path=web_path).load()
-        if metadata:
-            for document in documents:
-                if not document.metadata:
-                    document.metadata = metadata
-                else:
-                    document.metadata.update(metadata)
-        return documents
--- a/src/backend/langflow/components/documentloaders/IFixitLoader.py
+++ b/src/backend/langflow/components/documentloaders/IFixitLoader.py
@ -1,32 +0,0 @@
-from typing import Dict, List, Optional
-
-from langchain_community.document_loaders.ifixit import IFixitLoader
-from langflow import CustomComponent
-from langflow.field_typing import Document
-
-
-class IFixitLoaderComponent(CustomComponent):
-    display_name = "IFixitLoader"
-    description = "Load `iFixit` repair guides, device wikis and answers."
-    documentation = "https://python.langchain.com/docs/modules/data_connection/document_loaders/integrations/ifixit"
-
-    def build_config(self):
-        return {
-            "metadata": {"display_name": "Metadata", "type": "dict", "default": {}},
-            "web_path": {"display_name": "Web Page", "type": "str"},
-        }
-
-    def build(self, web_path: str, metadata: Optional[Dict] = None) -> List[Document]:
-        # Assuming IFixitLoader is the correct class name from the langchain library,
-        # and it has a load method that returns a Document object.
-        if metadata is None:
-            metadata = {}
-
-        docs = IFixitLoader(web_path=web_path).load()
-
-        if metadata:
-            for doc in docs:
-                if doc.metadata is None:
-                    doc.metadata = {}
-                doc.metadata.update(metadata)
-        return docs
--- a/src/backend/langflow/components/documentloaders/IMSDbLoader.py
+++ b/src/backend/langflow/components/documentloaders/IMSDbLoader.py
@ -1,30 +0,0 @@
-from langflow import CustomComponent
-from langflow.field_typing import Document
-from langchain_community.document_loaders.imsdb import IMSDbLoader
-
-from typing import Dict, List, Optional
-
-
-class IMSDbLoaderComponent(CustomComponent):
-    display_name = "IMSDbLoader"
-    description = "Load `IMSDb` webpages."
-
-    def build_config(self):
-        return {
-            "metadata": {"display_name": "Metadata", "field_type": "dict"},
-            "web_path": {"display_name": "Web Page", "field_type": "str"},
-        }
-
-    def build(
-        self,
-        metadata: Optional[Dict] = None,
-        web_path: str = "",
-    ) -> List[Document]:
-        documents = IMSDbLoader(web_path=web_path).load()
-        if metadata:
-            for document in documents:
-                if not document.metadata:
-                    document.metadata = metadata
-                else:
-                    document.metadata.update(metadata)
-        return documents
--- a/src/backend/langflow/components/documentloaders/PyPDFLoader.py
+++ b/src/backend/langflow/components/documentloaders/PyPDFLoader.py
@ -1,41 +0,0 @@
-from typing import Dict, List, Optional
-
-from langchain_community.document_loaders.pdf import PyPDFLoader
-from langchain_core.documents import Document
-
-from langflow import CustomComponent
-
-
-class PyPDFLoaderComponent(CustomComponent):
-    display_name = "PyPDFLoader"
-    description = "Load PDF using pypdf into list of documents"
-    documentation = "https://python.langchain.com/docs/modules/data_connection/document_loaders/how_to/pdf"
-
-    def build_config(self):
-        return {
-            "file_path": {
-                "display_name": "File Path",
-                "required": True,
-                "type": "file",
-                "fileTypes": ["pdf"],
-                "show": True,
-            },
-            "metadata": {
-                "display_name": "Metadata",
-                "required": False,
-                "type": "dict",
-                "show": True,
-            },
-        }
-
-    def build(self, file_path: str, metadata: Optional[Dict] = None) -> List[Document]:
-        # Assuming there is a PyPDFLoader class that takes file_path and metadata as parameters
-        # and inherits from BaseLoader
-        docs = PyPDFLoader(file_path=file_path).load()
-
-        if metadata:
-            for doc in docs:
-                if doc.metadata is None:
-                    doc.metadata = {}
-                doc.metadata.update(metadata)
-        return docs
--- a/src/backend/langflow/components/documentloaders/ReadTheDocsLoader.py
+++ b/src/backend/langflow/components/documentloaders/ReadTheDocsLoader.py
@ -1,29 +0,0 @@
-from langflow import CustomComponent
-from typing import Dict, Optional, List
-from langchain_core.documents import Document
-from langchain_community.document_loaders.readthedocs import ReadTheDocsLoader
-
-
-class ReadTheDocsLoaderComponent(CustomComponent):
-    display_name = "ReadTheDocsLoader"
-    description = "Load `ReadTheDocs` documentation directory."
-
-    def build_config(self):
-        return {
-            "metadata": {"display_name": "Metadata", "default": {}, "field_type": "dict"},
-            "path": {"display_name": "Local directory", "required": True},
-        }
-
-    def build(
-        self,
-        path: str,
-        metadata: Optional[Dict] = None,
-    ) -> List[Document]:
-        documents = ReadTheDocsLoader(path=path).load()
-        if metadata:
-            for document in documents:
-                if not document.metadata:
-                    document.metadata = metadata
-                else:
-                    document.metadata.update(metadata)
-        return documents
--- a/src/backend/langflow/components/documentloaders/SRTLoader.py
+++ b/src/backend/langflow/components/documentloaders/SRTLoader.py
@ -1,27 +0,0 @@
-from typing import Dict, Optional
-
-from langchain_core.documents import Document
-
-from langflow import CustomComponent
-
-
-class SRTLoaderComponent(CustomComponent):
-    display_name = "SRTLoader"
-    description = "Load `.srt` (subtitle) files."
-    documentation = "https://python.langchain.com/docs/modules/data_connection/document_loaders/integrations/subtitle"
-
-    def build_config(self):
-        return {
-            "file_path": {
-                "display_name": "File Path",
-                "required": True,
-                "fileTypes": ["srt"],
-            },
-            "metadata": {
-                "display_name": "Metadata",
-                "required": False,
-            },
-        }
-
-    def build(self, file_path: str, metadata: Optional[Dict] = None) -> Document:
-        return Document(file_path=file_path, metadata=metadata)
--- a/src/backend/langflow/components/documentloaders/SlackDirectoryLoader.py
+++ b/src/backend/langflow/components/documentloaders/SlackDirectoryLoader.py
@ -1,32 +0,0 @@
-from langflow import CustomComponent
-from typing import Optional, Dict, List
-from langchain_core.documents import Document
-from langchain_community.document_loaders.slack_directory import SlackDirectoryLoader
-
-
-class SlackDirectoryLoaderComponent(CustomComponent):
-    display_name = "SlackDirectoryLoader"
-    description = "Load from a `Slack` directory dump."
-    documentation = "https://python.langchain.com/docs/modules/data_connection/document_loaders/integrations/slack"
-
-    def build_config(self):
-        return {
-            "zip_path": {"display_name": "Path to zip file", "field_type": "file", "file_types": [".zip"]},
-            "metadata": {"display_name": "Metadata", "field_type": "dict"},
-            "workspace_url": {"display_name": "Workspace URL"},
-        }
-
-    def build(
-        self,
-        zip_path: str,
-        metadata: Optional[Dict] = None,
-        workspace_url: Optional[str] = None,
-    ) -> List[Document]:
-        documents = SlackDirectoryLoader(zip_path=zip_path, workspace_url=workspace_url).load()
-        if metadata:
-            for document in documents:
-                if not document.metadata:
-                    document.metadata = metadata
-                else:
-                    document.metadata.update(metadata)
-        return documents
--- a/src/backend/langflow/components/documentloaders/TextLoader.py
+++ b/src/backend/langflow/components/documentloaders/TextLoader.py
@ -1,28 +0,0 @@
-from langflow import CustomComponent
-from langflow.field_typing import Document
-from typing import Optional, Dict
-
-
-class TextLoaderComponent(CustomComponent):
-    display_name = "TextLoader"
-    description = "Load text file."
-    documentation = "https://python.langchain.com/docs/modules/data_connection/document_loaders/"
-
-    def build_config(self):
-        return {
-            "file_path": {
-                "display_name": "File Path",
-                "required": True,
-                "type": "file",
-                "suffixes": [".txt"],
-            },
-            "metadata": {
-                "display_name": "Metadata",
-                "required": False,
-                "type": "dict",
-                "default": {},
-            },
-        }
-
-    def build(self, file_path: str, metadata: Optional[Dict] = None) -> Document:
-        return Document(file_path=file_path, metadata=metadata)
--- a/src/backend/langflow/components/documentloaders/UnstructuredHTMLLoader.py
+++ b/src/backend/langflow/components/documentloaders/UnstructuredHTMLLoader.py
@ -1,29 +0,0 @@
-from typing import Dict, List, Optional
-
-from langchain import CustomComponent
-from langchain_community.document_loaders import UnstructuredHTMLLoader
-from langchain_core.documents import Document
-
-
-class UnstructuredHTMLLoaderComponent(CustomComponent):
-    display_name = "UnstructuredHTMLLoader"
-    description = "Load `HTML` files using `Unstructured`."
-    documentation = "https://python.langchain.com/docs/modules/data_connection/document_loaders/how_to/html"
-
-    def build_config(self):
-        return {
-            "file_path": {"display_name": "File Path", "type": "file", "fileTypes": ["html"]},
-            "metadata": {"display_name": "Metadata"},
-        }
-
-    def build(self, file_path: str, metadata: Optional[Dict] = None) -> List[Document]:
-        # Assuming the existence of a function or class named UnstructuredHTMLLoader that
-        # loads HTML and creates a Document object; Replace with actual implementation.
-        docs = UnstructuredHTMLLoader(file_path=file_path).load()
-
-        if metadata:
-            for doc in docs:
-                if doc.metadata is None:
-                    doc.metadata = {}
-                doc.metadata.update(metadata)
-        return docs
--- a/src/backend/langflow/components/documentloaders/UnstructuredPowerPointLoader.py
+++ b/src/backend/langflow/components/documentloaders/UnstructuredPowerPointLoader.py
@ -1,31 +0,0 @@
-from langflow import CustomComponent
-from langchain.document_loaders import Document
-from typing import Optional, Dict
-
-
-class UnstructuredPowerPointLoaderComponent(CustomComponent):
-    display_name = "UnstructuredPowerPointLoader"
-    description = "Load `Microsoft PowerPoint` files using `Unstructured`."
-
-    def build_config(self):
-        return {
-            "file_path": {
-                "display_name": "File Path",
-                "type": "file",
-                "fileTypes": ["pptx", "ppt"],
-            },
-            "metadata": {
-                "display_name": "Metadata",
-                "type": "dict",
-            },
-        }
-
-    def build(
-        self,
-        file_path: str,
-        metadata: Optional[Dict] = None,
-    ) -> Document:
-        # Assuming there is a loader class `UnstructuredPowerPointLoader` that takes these parameters
-        # Since the actual loader class is not provided, this is a placeholder for the actual implementation
-        loader_class = self.get_loader_class()  # Placeholder method to obtain the correct loader class
-        return loader_class(file_path=file_path, metadata=metadata)
--- a/src/backend/langflow/components/documentloaders/UnstructuredWordDocumentLoader.py
+++ b/src/backend/langflow/components/documentloaders/UnstructuredWordDocumentLoader.py
@ -1,25 +0,0 @@
-from langflow import CustomComponent
-from langflow.field_typing import Document
-from typing import Optional, Dict
-
-
-class UnstructuredWordDocumentLoaderComponent(CustomComponent):
-    display_name = "UnstructuredWordDocumentLoader"
-    description = "Load `Microsoft Word` file using `Unstructured`."
-    documentation = (
-        "https://python.langchain.com/docs/modules/data_connection/document_loaders/integrations/microsoft_word"
-    )
-
-    def build_config(self):
-        return {
-            "file_path": {
-                "display_name": "File Path",
-                "required": True,
-                "type": "file",
-                "suffixes": [".docx", ".doc"],
-            },
-            "metadata": {"display_name": "Metadata", "required": False, "type": "dict"},
-        }
-
-    def build(self, file_path: str, metadata: Optional[Dict] = None) -> Document:
-        return Document(file_path=file_path, metadata=metadata)