From cbf80c9fcd89b6d34f34d8643e92b194b3ea0866 Mon Sep 17 00:00:00 2001 From: Gabriel Luiz Freitas Almeida Date: Wed, 31 Jan 2024 15:50:24 -0300 Subject: [PATCH] Delete document loader components --- .../documentloaders/AZLyricsLoader.py | 26 ------------ .../documentloaders/AirbyteJSONLoader.py | 37 ----------------- .../documentloaders/BSHTMLLoader.py | 34 --------------- .../components/documentloaders/CSVLoader.py | 34 --------------- .../documentloaders/CoNLLULoader.py | 36 ---------------- .../CollegeConfidentialLoader.py | 28 ------------- .../documentloaders/EverNoteLoader.py | 38 ----------------- .../documentloaders/FacebookChatLoader.py | 37 ----------------- .../documentloaders/GitbookLoader.py | 32 --------------- .../components/documentloaders/HNLoader.py | 29 ------------- .../documentloaders/IFixitLoader.py | 32 --------------- .../components/documentloaders/IMSDbLoader.py | 30 -------------- .../components/documentloaders/PyPDFLoader.py | 41 ------------------- .../documentloaders/ReadTheDocsLoader.py | 29 ------------- .../components/documentloaders/SRTLoader.py | 27 ------------ .../documentloaders/SlackDirectoryLoader.py | 32 --------------- .../components/documentloaders/TextLoader.py | 28 ------------- .../documentloaders/UnstructuredHTMLLoader.py | 29 ------------- .../UnstructuredPowerPointLoader.py | 31 -------------- .../UnstructuredWordDocumentLoader.py | 25 ----------- 20 files changed, 635 deletions(-) delete mode 100644 src/backend/langflow/components/documentloaders/AZLyricsLoader.py delete mode 100644 src/backend/langflow/components/documentloaders/AirbyteJSONLoader.py delete mode 100644 src/backend/langflow/components/documentloaders/BSHTMLLoader.py delete mode 100644 src/backend/langflow/components/documentloaders/CSVLoader.py delete mode 100644 src/backend/langflow/components/documentloaders/CoNLLULoader.py delete mode 100644 src/backend/langflow/components/documentloaders/CollegeConfidentialLoader.py delete mode 100644 src/backend/langflow/components/documentloaders/EverNoteLoader.py delete mode 100644 src/backend/langflow/components/documentloaders/FacebookChatLoader.py delete mode 100644 src/backend/langflow/components/documentloaders/GitbookLoader.py delete mode 100644 src/backend/langflow/components/documentloaders/HNLoader.py delete mode 100644 src/backend/langflow/components/documentloaders/IFixitLoader.py delete mode 100644 src/backend/langflow/components/documentloaders/IMSDbLoader.py delete mode 100644 src/backend/langflow/components/documentloaders/PyPDFLoader.py delete mode 100644 src/backend/langflow/components/documentloaders/ReadTheDocsLoader.py delete mode 100644 src/backend/langflow/components/documentloaders/SRTLoader.py delete mode 100644 src/backend/langflow/components/documentloaders/SlackDirectoryLoader.py delete mode 100644 src/backend/langflow/components/documentloaders/TextLoader.py delete mode 100644 src/backend/langflow/components/documentloaders/UnstructuredHTMLLoader.py delete mode 100644 src/backend/langflow/components/documentloaders/UnstructuredPowerPointLoader.py delete mode 100644 src/backend/langflow/components/documentloaders/UnstructuredWordDocumentLoader.py diff --git a/src/backend/langflow/components/documentloaders/AZLyricsLoader.py b/src/backend/langflow/components/documentloaders/AZLyricsLoader.py deleted file mode 100644 index 82d507d68..000000000 --- a/src/backend/langflow/components/documentloaders/AZLyricsLoader.py +++ /dev/null @@ -1,26 +0,0 @@ -from langflow import CustomComponent -from langflow.field_typing import Document -from typing import List, Optional, Dict -from langchain_community.document_loaders.azlyrics import AZLyricsLoader - - -class AZLyricsLoaderComponent(CustomComponent): - display_name = "AZLyricsLoader" - description = "Load `AZLyrics` webpages." - documentation = "https://python.langchain.com/docs/modules/data_connection/document_loaders/integrations/azlyrics" - - def build_config(self): - return { - "metadata": {"display_name": "Metadata", "field_type": "dict", "value": {}, "show": True}, - "web_path": {"display_name": "Web Page", "type": "str", "required": True, "show": True}, - } - - def build(self, metadata: Optional[Dict] = None, web_path: str = "") -> List[Document]: - documents = AZLyricsLoader(web_path=web_path).load() - if metadata: - for document in documents: - if not document.metadata: - document.metadata = metadata - else: - document.metadata.update(metadata) - return documents diff --git a/src/backend/langflow/components/documentloaders/AirbyteJSONLoader.py b/src/backend/langflow/components/documentloaders/AirbyteJSONLoader.py deleted file mode 100644 index 8c670a8c0..000000000 --- a/src/backend/langflow/components/documentloaders/AirbyteJSONLoader.py +++ /dev/null @@ -1,37 +0,0 @@ -from langflow import CustomComponent -from langflow.field_typing import Document -from typing import List, Optional, Dict -from langchain_community.document_loaders.airbyte_json import AirbyteJSONLoader - - -class AirbyteJSONLoaderComponent(CustomComponent): - display_name = "AirbyteJSONLoader" - description = "Load local `Airbyte` json files." - documentation = ( - "https://python.langchain.com/docs/modules/data_connection/document_loaders/integrations/airbyte_json" - ) - - def build_config(self): - return { - "file_path": { - "display_name": "File Path", - "file_types": [".json"], - "required": True, - "field_type": "file", - }, - "metadata": { - "display_name": "Metadata", - "field_type": "dict", - "required": False, - }, - } - - def build(self, file_path: str, metadata: Optional[Dict] = None) -> List[Document]: - documents = AirbyteJSONLoader(file_path=file_path).load() - if metadata: - for document in documents: - if not document.metadata: - document.metadata = metadata - else: - document.metadata.update(metadata) - return documents diff --git a/src/backend/langflow/components/documentloaders/BSHTMLLoader.py b/src/backend/langflow/components/documentloaders/BSHTMLLoader.py deleted file mode 100644 index 672e1e24d..000000000 --- a/src/backend/langflow/components/documentloaders/BSHTMLLoader.py +++ /dev/null @@ -1,34 +0,0 @@ -from langflow import CustomComponent -from langflow.field_typing import Document -from typing import Optional, Dict - - -class BSHTMLLoaderComponent(CustomComponent): - display_name = "BSHTMLLoader" - description = "Load `HTML` files and parse them with `beautiful soup`." - documentation = "https://python.langchain.com/docs/modules/data_connection/document_loaders/how_to/html" - - def build_config(self): - return { - "file_path": { - "display_name": "File Path", - "required": True, - "show": True, - "type": "file", - "suffixes": [".html"], - "file_types": ["html"], - "field_type": "file", - }, - "metadata": { - "display_name": "Metadata", - "required": False, - "show": True, - "field_type": "dict", - }, - } - - def build(self, file_path: str, metadata: Optional[Dict] = None) -> Document: - # Assuming there is a class or function named BSHTMLLoader that takes a file path and optional metadata - # and returns a Document object after parsing HTML. Since the actual implementation of BSHTMLLoader is not provided, - # this is a placeholder and should be replaced with the actual logic. - raise NotImplementedError("The BSHTMLLoader function or class needs to be implemented.") diff --git a/src/backend/langflow/components/documentloaders/CSVLoader.py b/src/backend/langflow/components/documentloaders/CSVLoader.py deleted file mode 100644 index ac2b271e9..000000000 --- a/src/backend/langflow/components/documentloaders/CSVLoader.py +++ /dev/null @@ -1,34 +0,0 @@ -from langflow import CustomComponent -from typing import List -from langchain_community.document_loaders.csv_loader import CSVLoader -from langchain.docstore.document import Document - - -class CSVLoaderComponent(CustomComponent): - display_name = "CSVLoader" - description = "Load a `CSV` file into a list of Documents." - - def build_config(self): - return { - "file_path": { - "display_name": "File Path", - "required": True, - "suffixes": [".csv"], - "file_types": ["csv"], - "field_type": "file", - }, - "metadata": { - "display_name": "Metadata", - "required": False, - }, - } - - def build(self, file_path: str, metadata: dict) -> List[Document]: - documents = CSVLoader(file_path=file_path).load() - if metadata: - for document in documents: - if not document.metadata: - document.metadata = metadata - else: - document.metadata.update(metadata) - return documents diff --git a/src/backend/langflow/components/documentloaders/CoNLLULoader.py b/src/backend/langflow/components/documentloaders/CoNLLULoader.py deleted file mode 100644 index 0cd2de50c..000000000 --- a/src/backend/langflow/components/documentloaders/CoNLLULoader.py +++ /dev/null @@ -1,36 +0,0 @@ -from typing import List -from langflow import CustomComponent -from langchain.docstore.document import Document -from langchain_community.document_loaders.conllu import CoNLLULoader - - -class CoNLLULoaderComponent(CustomComponent): - display_name = "CoNLLULoader" - description = "Load `CoNLL-U` files." - documentation = "https://python.langchain.com/docs/modules/data_connection/document_loaders/integrations/conll-u" - - def build_config(self): - return { - "file_path": { - "display_name": "File Path", - "required": True, - "suffixes": [".conllu"], - "file_types": ["conllu"], - "field_type": "file", - }, - "metadata": { - "display_name": "Metadata", - "field_type": "dict", - "required": False, - }, - } - - def build(self, file_path: str, metadata: dict) -> List[Document]: - documents = CoNLLULoader(file_path=file_path).load() - if metadata: - for document in documents: - if not document.metadata: - document.metadata = metadata - else: - document.metadata.update(metadata) - return documents diff --git a/src/backend/langflow/components/documentloaders/CollegeConfidentialLoader.py b/src/backend/langflow/components/documentloaders/CollegeConfidentialLoader.py deleted file mode 100644 index 8fea6e5d2..000000000 --- a/src/backend/langflow/components/documentloaders/CollegeConfidentialLoader.py +++ /dev/null @@ -1,28 +0,0 @@ -from langflow import CustomComponent -from langchain.docstore.document import Document -from typing import Optional, List -from langchain_community.document_loaders.college_confidential import CollegeConfidentialLoader - - -class CollegeConfidentialLoaderComponent(CustomComponent): - display_name = "CollegeConfidentialLoader" - description = "Load `College Confidential` webpages." - documentation = ( - "https://python.langchain.com/docs/modules/data_connection/document_loaders/integrations/college_confidential" - ) - - def build_config(self): - return { - "metadata": {"display_name": "Metadata", "values": {}}, - "web_path": {"display_name": "Web Page", "required": True}, - } - - def build(self, web_path: str, metadata: Optional[dict] = {}) -> List[Document]: - documents = CollegeConfidentialLoader(web_path=web_path).load() - if metadata: - for document in documents: - if not document.metadata: - document.metadata = metadata - else: - document.metadata.update(metadata) - return documents diff --git a/src/backend/langflow/components/documentloaders/EverNoteLoader.py b/src/backend/langflow/components/documentloaders/EverNoteLoader.py deleted file mode 100644 index 6f7431fcb..000000000 --- a/src/backend/langflow/components/documentloaders/EverNoteLoader.py +++ /dev/null @@ -1,38 +0,0 @@ -from langflow import CustomComponent -from langflow.field_typing import Document -from typing import List, Optional, Dict -from langchain_community.document_loaders.evernote import EverNoteLoader - - -class EverNoteLoaderComponent(CustomComponent): - display_name = "EverNoteLoader" - description = "Load from `EverNote`." - documentation = "https://python.langchain.com/docs/modules/data_connection/document_loaders/integrations/evernote" - - def build_config(self): - return { - "file_path": { - "display_name": "File Path", - "required": True, - "show": True, - "type": "file", - "file_types": [".xml"], - "field_type": "file", - }, - "metadata": { - "display_name": "Metadata", - "required": False, - "show": True, - "field_type": "dict", - }, - } - - def build(self, file_path: str, metadata: Optional[Dict] = None) -> List[Document]: - documents = EverNoteLoader(file_path=file_path).load() - if metadata: - for document in documents: - if not document.metadata: - document.metadata = metadata - else: - document.metadata.update(metadata) - return documents diff --git a/src/backend/langflow/components/documentloaders/FacebookChatLoader.py b/src/backend/langflow/components/documentloaders/FacebookChatLoader.py deleted file mode 100644 index ecb99eea4..000000000 --- a/src/backend/langflow/components/documentloaders/FacebookChatLoader.py +++ /dev/null @@ -1,37 +0,0 @@ -from langflow import CustomComponent -from langchain.docstore.document import Document -from typing import List, Optional, Dict -from langchain_community.document_loaders.facebook_chat import FacebookChatLoader - - -class FacebookChatLoaderComponent(CustomComponent): - display_name = "FacebookChatLoader" - description = "Load `Facebook Chat` messages directory dump." - documentation = ( - "https://python.langchain.com/docs/modules/data_connection/document_loaders/integrations/facebook_chat" - ) - - def build_config(self): - return { - "file_path": { - "display_name": "File Path", - "required": True, - "file_types": [".json"], - "field_type": "file", - }, - "metadata": { - "display_name": "Metadata", - "required": False, - "field_type": "dict", - }, - } - - def build(self, file_path: str, metadata: Optional[Dict] = None) -> List[Document]: - documents = FacebookChatLoader(path=file_path).load() - if metadata: - for document in documents: - if not document.metadata: - document.metadata = metadata - else: - document.metadata.update(metadata) - return documents diff --git a/src/backend/langflow/components/documentloaders/GitbookLoader.py b/src/backend/langflow/components/documentloaders/GitbookLoader.py deleted file mode 100644 index 05b03e611..000000000 --- a/src/backend/langflow/components/documentloaders/GitbookLoader.py +++ /dev/null @@ -1,32 +0,0 @@ -from langflow import CustomComponent -from langflow.field_typing import Document -from typing import List, Optional, Dict -from langchain_community.document_loaders.gitbook import GitbookLoader - - -class GitbookLoaderComponent(CustomComponent): - display_name = "GitbookLoader" - description = "Load `GitBook` data." - - def build_config(self): - return { - "metadata": { - "display_name": "Metadata", - "field_type": "dict", - "value": {}, - }, - "web_page": { - "display_name": "Web Page", - "required": True, - }, - } - - def build(self, metadata: Optional[Dict] = None, web_page: str = "") -> List[Document]: - documents = GitbookLoader(web_page=web_page).load() - if metadata: - for document in documents: - if not document.metadata: - document.metadata = metadata - else: - document.metadata.update(metadata) - return documents diff --git a/src/backend/langflow/components/documentloaders/HNLoader.py b/src/backend/langflow/components/documentloaders/HNLoader.py deleted file mode 100644 index 23f77d570..000000000 --- a/src/backend/langflow/components/documentloaders/HNLoader.py +++ /dev/null @@ -1,29 +0,0 @@ -from langflow import CustomComponent -from typing import List, Optional, Dict -from langchain_community.document_loaders.hn import HNLoader -from langflow.field_typing import Document - - -class HNLoaderComponent(CustomComponent): - display_name = "HNLoader" - description = "Load `Hacker News` data." - - def build_config(self): - return { - "metadata": {"display_name": "Metadata", "value": {}, "required": False, "field_type": "dict"}, - "web_path": {"display_name": "Web Page", "required": True}, - } - - def build( - self, - web_path: str, - metadata: Optional[Dict] = None, - ) -> List[Document]: - documents = HNLoader(web_path=web_path).load() - if metadata: - for document in documents: - if not document.metadata: - document.metadata = metadata - else: - document.metadata.update(metadata) - return documents diff --git a/src/backend/langflow/components/documentloaders/IFixitLoader.py b/src/backend/langflow/components/documentloaders/IFixitLoader.py deleted file mode 100644 index 48c5f7b47..000000000 --- a/src/backend/langflow/components/documentloaders/IFixitLoader.py +++ /dev/null @@ -1,32 +0,0 @@ -from typing import Dict, List, Optional - -from langchain_community.document_loaders.ifixit import IFixitLoader -from langflow import CustomComponent -from langflow.field_typing import Document - - -class IFixitLoaderComponent(CustomComponent): - display_name = "IFixitLoader" - description = "Load `iFixit` repair guides, device wikis and answers." - documentation = "https://python.langchain.com/docs/modules/data_connection/document_loaders/integrations/ifixit" - - def build_config(self): - return { - "metadata": {"display_name": "Metadata", "type": "dict", "default": {}}, - "web_path": {"display_name": "Web Page", "type": "str"}, - } - - def build(self, web_path: str, metadata: Optional[Dict] = None) -> List[Document]: - # Assuming IFixitLoader is the correct class name from the langchain library, - # and it has a load method that returns a Document object. - if metadata is None: - metadata = {} - - docs = IFixitLoader(web_path=web_path).load() - - if metadata: - for doc in docs: - if doc.metadata is None: - doc.metadata = {} - doc.metadata.update(metadata) - return docs diff --git a/src/backend/langflow/components/documentloaders/IMSDbLoader.py b/src/backend/langflow/components/documentloaders/IMSDbLoader.py deleted file mode 100644 index bd16f854b..000000000 --- a/src/backend/langflow/components/documentloaders/IMSDbLoader.py +++ /dev/null @@ -1,30 +0,0 @@ -from langflow import CustomComponent -from langflow.field_typing import Document -from langchain_community.document_loaders.imsdb import IMSDbLoader - -from typing import Dict, List, Optional - - -class IMSDbLoaderComponent(CustomComponent): - display_name = "IMSDbLoader" - description = "Load `IMSDb` webpages." - - def build_config(self): - return { - "metadata": {"display_name": "Metadata", "field_type": "dict"}, - "web_path": {"display_name": "Web Page", "field_type": "str"}, - } - - def build( - self, - metadata: Optional[Dict] = None, - web_path: str = "", - ) -> List[Document]: - documents = IMSDbLoader(web_path=web_path).load() - if metadata: - for document in documents: - if not document.metadata: - document.metadata = metadata - else: - document.metadata.update(metadata) - return documents diff --git a/src/backend/langflow/components/documentloaders/PyPDFLoader.py b/src/backend/langflow/components/documentloaders/PyPDFLoader.py deleted file mode 100644 index b44a90577..000000000 --- a/src/backend/langflow/components/documentloaders/PyPDFLoader.py +++ /dev/null @@ -1,41 +0,0 @@ -from typing import Dict, List, Optional - -from langchain_community.document_loaders.pdf import PyPDFLoader -from langchain_core.documents import Document - -from langflow import CustomComponent - - -class PyPDFLoaderComponent(CustomComponent): - display_name = "PyPDFLoader" - description = "Load PDF using pypdf into list of documents" - documentation = "https://python.langchain.com/docs/modules/data_connection/document_loaders/how_to/pdf" - - def build_config(self): - return { - "file_path": { - "display_name": "File Path", - "required": True, - "type": "file", - "fileTypes": ["pdf"], - "show": True, - }, - "metadata": { - "display_name": "Metadata", - "required": False, - "type": "dict", - "show": True, - }, - } - - def build(self, file_path: str, metadata: Optional[Dict] = None) -> List[Document]: - # Assuming there is a PyPDFLoader class that takes file_path and metadata as parameters - # and inherits from BaseLoader - docs = PyPDFLoader(file_path=file_path).load() - - if metadata: - for doc in docs: - if doc.metadata is None: - doc.metadata = {} - doc.metadata.update(metadata) - return docs diff --git a/src/backend/langflow/components/documentloaders/ReadTheDocsLoader.py b/src/backend/langflow/components/documentloaders/ReadTheDocsLoader.py deleted file mode 100644 index 55284b9bd..000000000 --- a/src/backend/langflow/components/documentloaders/ReadTheDocsLoader.py +++ /dev/null @@ -1,29 +0,0 @@ -from langflow import CustomComponent -from typing import Dict, Optional, List -from langchain_core.documents import Document -from langchain_community.document_loaders.readthedocs import ReadTheDocsLoader - - -class ReadTheDocsLoaderComponent(CustomComponent): - display_name = "ReadTheDocsLoader" - description = "Load `ReadTheDocs` documentation directory." - - def build_config(self): - return { - "metadata": {"display_name": "Metadata", "default": {}, "field_type": "dict"}, - "path": {"display_name": "Local directory", "required": True}, - } - - def build( - self, - path: str, - metadata: Optional[Dict] = None, - ) -> List[Document]: - documents = ReadTheDocsLoader(path=path).load() - if metadata: - for document in documents: - if not document.metadata: - document.metadata = metadata - else: - document.metadata.update(metadata) - return documents diff --git a/src/backend/langflow/components/documentloaders/SRTLoader.py b/src/backend/langflow/components/documentloaders/SRTLoader.py deleted file mode 100644 index 931660947..000000000 --- a/src/backend/langflow/components/documentloaders/SRTLoader.py +++ /dev/null @@ -1,27 +0,0 @@ -from typing import Dict, Optional - -from langchain_core.documents import Document - -from langflow import CustomComponent - - -class SRTLoaderComponent(CustomComponent): - display_name = "SRTLoader" - description = "Load `.srt` (subtitle) files." - documentation = "https://python.langchain.com/docs/modules/data_connection/document_loaders/integrations/subtitle" - - def build_config(self): - return { - "file_path": { - "display_name": "File Path", - "required": True, - "fileTypes": ["srt"], - }, - "metadata": { - "display_name": "Metadata", - "required": False, - }, - } - - def build(self, file_path: str, metadata: Optional[Dict] = None) -> Document: - return Document(file_path=file_path, metadata=metadata) diff --git a/src/backend/langflow/components/documentloaders/SlackDirectoryLoader.py b/src/backend/langflow/components/documentloaders/SlackDirectoryLoader.py deleted file mode 100644 index 782f1cea9..000000000 --- a/src/backend/langflow/components/documentloaders/SlackDirectoryLoader.py +++ /dev/null @@ -1,32 +0,0 @@ -from langflow import CustomComponent -from typing import Optional, Dict, List -from langchain_core.documents import Document -from langchain_community.document_loaders.slack_directory import SlackDirectoryLoader - - -class SlackDirectoryLoaderComponent(CustomComponent): - display_name = "SlackDirectoryLoader" - description = "Load from a `Slack` directory dump." - documentation = "https://python.langchain.com/docs/modules/data_connection/document_loaders/integrations/slack" - - def build_config(self): - return { - "zip_path": {"display_name": "Path to zip file", "field_type": "file", "file_types": [".zip"]}, - "metadata": {"display_name": "Metadata", "field_type": "dict"}, - "workspace_url": {"display_name": "Workspace URL"}, - } - - def build( - self, - zip_path: str, - metadata: Optional[Dict] = None, - workspace_url: Optional[str] = None, - ) -> List[Document]: - documents = SlackDirectoryLoader(zip_path=zip_path, workspace_url=workspace_url).load() - if metadata: - for document in documents: - if not document.metadata: - document.metadata = metadata - else: - document.metadata.update(metadata) - return documents diff --git a/src/backend/langflow/components/documentloaders/TextLoader.py b/src/backend/langflow/components/documentloaders/TextLoader.py deleted file mode 100644 index d3fa8e7ad..000000000 --- a/src/backend/langflow/components/documentloaders/TextLoader.py +++ /dev/null @@ -1,28 +0,0 @@ -from langflow import CustomComponent -from langflow.field_typing import Document -from typing import Optional, Dict - - -class TextLoaderComponent(CustomComponent): - display_name = "TextLoader" - description = "Load text file." - documentation = "https://python.langchain.com/docs/modules/data_connection/document_loaders/" - - def build_config(self): - return { - "file_path": { - "display_name": "File Path", - "required": True, - "type": "file", - "suffixes": [".txt"], - }, - "metadata": { - "display_name": "Metadata", - "required": False, - "type": "dict", - "default": {}, - }, - } - - def build(self, file_path: str, metadata: Optional[Dict] = None) -> Document: - return Document(file_path=file_path, metadata=metadata) diff --git a/src/backend/langflow/components/documentloaders/UnstructuredHTMLLoader.py b/src/backend/langflow/components/documentloaders/UnstructuredHTMLLoader.py deleted file mode 100644 index 41e5a468e..000000000 --- a/src/backend/langflow/components/documentloaders/UnstructuredHTMLLoader.py +++ /dev/null @@ -1,29 +0,0 @@ -from typing import Dict, List, Optional - -from langchain import CustomComponent -from langchain_community.document_loaders import UnstructuredHTMLLoader -from langchain_core.documents import Document - - -class UnstructuredHTMLLoaderComponent(CustomComponent): - display_name = "UnstructuredHTMLLoader" - description = "Load `HTML` files using `Unstructured`." - documentation = "https://python.langchain.com/docs/modules/data_connection/document_loaders/how_to/html" - - def build_config(self): - return { - "file_path": {"display_name": "File Path", "type": "file", "fileTypes": ["html"]}, - "metadata": {"display_name": "Metadata"}, - } - - def build(self, file_path: str, metadata: Optional[Dict] = None) -> List[Document]: - # Assuming the existence of a function or class named UnstructuredHTMLLoader that - # loads HTML and creates a Document object; Replace with actual implementation. - docs = UnstructuredHTMLLoader(file_path=file_path).load() - - if metadata: - for doc in docs: - if doc.metadata is None: - doc.metadata = {} - doc.metadata.update(metadata) - return docs diff --git a/src/backend/langflow/components/documentloaders/UnstructuredPowerPointLoader.py b/src/backend/langflow/components/documentloaders/UnstructuredPowerPointLoader.py deleted file mode 100644 index 797ffbc12..000000000 --- a/src/backend/langflow/components/documentloaders/UnstructuredPowerPointLoader.py +++ /dev/null @@ -1,31 +0,0 @@ -from langflow import CustomComponent -from langchain.document_loaders import Document -from typing import Optional, Dict - - -class UnstructuredPowerPointLoaderComponent(CustomComponent): - display_name = "UnstructuredPowerPointLoader" - description = "Load `Microsoft PowerPoint` files using `Unstructured`." - - def build_config(self): - return { - "file_path": { - "display_name": "File Path", - "type": "file", - "fileTypes": ["pptx", "ppt"], - }, - "metadata": { - "display_name": "Metadata", - "type": "dict", - }, - } - - def build( - self, - file_path: str, - metadata: Optional[Dict] = None, - ) -> Document: - # Assuming there is a loader class `UnstructuredPowerPointLoader` that takes these parameters - # Since the actual loader class is not provided, this is a placeholder for the actual implementation - loader_class = self.get_loader_class() # Placeholder method to obtain the correct loader class - return loader_class(file_path=file_path, metadata=metadata) diff --git a/src/backend/langflow/components/documentloaders/UnstructuredWordDocumentLoader.py b/src/backend/langflow/components/documentloaders/UnstructuredWordDocumentLoader.py deleted file mode 100644 index 48ff3a24e..000000000 --- a/src/backend/langflow/components/documentloaders/UnstructuredWordDocumentLoader.py +++ /dev/null @@ -1,25 +0,0 @@ -from langflow import CustomComponent -from langflow.field_typing import Document -from typing import Optional, Dict - - -class UnstructuredWordDocumentLoaderComponent(CustomComponent): - display_name = "UnstructuredWordDocumentLoader" - description = "Load `Microsoft Word` file using `Unstructured`." - documentation = ( - "https://python.langchain.com/docs/modules/data_connection/document_loaders/integrations/microsoft_word" - ) - - def build_config(self): - return { - "file_path": { - "display_name": "File Path", - "required": True, - "type": "file", - "suffixes": [".docx", ".doc"], - }, - "metadata": {"display_name": "Metadata", "required": False, "type": "dict"}, - } - - def build(self, file_path: str, metadata: Optional[Dict] = None) -> Document: - return Document(file_path=file_path, metadata=metadata)