Delete document loader components

This commit is contained in:
Gabriel Luiz Freitas Almeida 2024-01-31 15:50:24 -03:00
commit cbf80c9fcd
20 changed files with 0 additions and 635 deletions

View file

@ -1,26 +0,0 @@
from langflow import CustomComponent
from langflow.field_typing import Document
from typing import List, Optional, Dict
from langchain_community.document_loaders.azlyrics import AZLyricsLoader
class AZLyricsLoaderComponent(CustomComponent):
display_name = "AZLyricsLoader"
description = "Load `AZLyrics` webpages."
documentation = "https://python.langchain.com/docs/modules/data_connection/document_loaders/integrations/azlyrics"
def build_config(self):
return {
"metadata": {"display_name": "Metadata", "field_type": "dict", "value": {}, "show": True},
"web_path": {"display_name": "Web Page", "type": "str", "required": True, "show": True},
}
def build(self, metadata: Optional[Dict] = None, web_path: str = "") -> List[Document]:
documents = AZLyricsLoader(web_path=web_path).load()
if metadata:
for document in documents:
if not document.metadata:
document.metadata = metadata
else:
document.metadata.update(metadata)
return documents

View file

@ -1,37 +0,0 @@
from langflow import CustomComponent
from langflow.field_typing import Document
from typing import List, Optional, Dict
from langchain_community.document_loaders.airbyte_json import AirbyteJSONLoader
class AirbyteJSONLoaderComponent(CustomComponent):
display_name = "AirbyteJSONLoader"
description = "Load local `Airbyte` json files."
documentation = (
"https://python.langchain.com/docs/modules/data_connection/document_loaders/integrations/airbyte_json"
)
def build_config(self):
return {
"file_path": {
"display_name": "File Path",
"file_types": [".json"],
"required": True,
"field_type": "file",
},
"metadata": {
"display_name": "Metadata",
"field_type": "dict",
"required": False,
},
}
def build(self, file_path: str, metadata: Optional[Dict] = None) -> List[Document]:
documents = AirbyteJSONLoader(file_path=file_path).load()
if metadata:
for document in documents:
if not document.metadata:
document.metadata = metadata
else:
document.metadata.update(metadata)
return documents

View file

@ -1,34 +0,0 @@
from langflow import CustomComponent
from langflow.field_typing import Document
from typing import Optional, Dict
class BSHTMLLoaderComponent(CustomComponent):
display_name = "BSHTMLLoader"
description = "Load `HTML` files and parse them with `beautiful soup`."
documentation = "https://python.langchain.com/docs/modules/data_connection/document_loaders/how_to/html"
def build_config(self):
return {
"file_path": {
"display_name": "File Path",
"required": True,
"show": True,
"type": "file",
"suffixes": [".html"],
"file_types": ["html"],
"field_type": "file",
},
"metadata": {
"display_name": "Metadata",
"required": False,
"show": True,
"field_type": "dict",
},
}
def build(self, file_path: str, metadata: Optional[Dict] = None) -> Document:
# Assuming there is a class or function named BSHTMLLoader that takes a file path and optional metadata
# and returns a Document object after parsing HTML. Since the actual implementation of BSHTMLLoader is not provided,
# this is a placeholder and should be replaced with the actual logic.
raise NotImplementedError("The BSHTMLLoader function or class needs to be implemented.")

View file

@ -1,34 +0,0 @@
from langflow import CustomComponent
from typing import List
from langchain_community.document_loaders.csv_loader import CSVLoader
from langchain.docstore.document import Document
class CSVLoaderComponent(CustomComponent):
display_name = "CSVLoader"
description = "Load a `CSV` file into a list of Documents."
def build_config(self):
return {
"file_path": {
"display_name": "File Path",
"required": True,
"suffixes": [".csv"],
"file_types": ["csv"],
"field_type": "file",
},
"metadata": {
"display_name": "Metadata",
"required": False,
},
}
def build(self, file_path: str, metadata: dict) -> List[Document]:
documents = CSVLoader(file_path=file_path).load()
if metadata:
for document in documents:
if not document.metadata:
document.metadata = metadata
else:
document.metadata.update(metadata)
return documents

View file

@ -1,36 +0,0 @@
from typing import List
from langflow import CustomComponent
from langchain.docstore.document import Document
from langchain_community.document_loaders.conllu import CoNLLULoader
class CoNLLULoaderComponent(CustomComponent):
display_name = "CoNLLULoader"
description = "Load `CoNLL-U` files."
documentation = "https://python.langchain.com/docs/modules/data_connection/document_loaders/integrations/conll-u"
def build_config(self):
return {
"file_path": {
"display_name": "File Path",
"required": True,
"suffixes": [".conllu"],
"file_types": ["conllu"],
"field_type": "file",
},
"metadata": {
"display_name": "Metadata",
"field_type": "dict",
"required": False,
},
}
def build(self, file_path: str, metadata: dict) -> List[Document]:
documents = CoNLLULoader(file_path=file_path).load()
if metadata:
for document in documents:
if not document.metadata:
document.metadata = metadata
else:
document.metadata.update(metadata)
return documents

View file

@ -1,28 +0,0 @@
from langflow import CustomComponent
from langchain.docstore.document import Document
from typing import Optional, List
from langchain_community.document_loaders.college_confidential import CollegeConfidentialLoader
class CollegeConfidentialLoaderComponent(CustomComponent):
display_name = "CollegeConfidentialLoader"
description = "Load `College Confidential` webpages."
documentation = (
"https://python.langchain.com/docs/modules/data_connection/document_loaders/integrations/college_confidential"
)
def build_config(self):
return {
"metadata": {"display_name": "Metadata", "values": {}},
"web_path": {"display_name": "Web Page", "required": True},
}
def build(self, web_path: str, metadata: Optional[dict] = {}) -> List[Document]:
documents = CollegeConfidentialLoader(web_path=web_path).load()
if metadata:
for document in documents:
if not document.metadata:
document.metadata = metadata
else:
document.metadata.update(metadata)
return documents

View file

@ -1,38 +0,0 @@
from langflow import CustomComponent
from langflow.field_typing import Document
from typing import List, Optional, Dict
from langchain_community.document_loaders.evernote import EverNoteLoader
class EverNoteLoaderComponent(CustomComponent):
display_name = "EverNoteLoader"
description = "Load from `EverNote`."
documentation = "https://python.langchain.com/docs/modules/data_connection/document_loaders/integrations/evernote"
def build_config(self):
return {
"file_path": {
"display_name": "File Path",
"required": True,
"show": True,
"type": "file",
"file_types": [".xml"],
"field_type": "file",
},
"metadata": {
"display_name": "Metadata",
"required": False,
"show": True,
"field_type": "dict",
},
}
def build(self, file_path: str, metadata: Optional[Dict] = None) -> List[Document]:
documents = EverNoteLoader(file_path=file_path).load()
if metadata:
for document in documents:
if not document.metadata:
document.metadata = metadata
else:
document.metadata.update(metadata)
return documents

View file

@ -1,37 +0,0 @@
from langflow import CustomComponent
from langchain.docstore.document import Document
from typing import List, Optional, Dict
from langchain_community.document_loaders.facebook_chat import FacebookChatLoader
class FacebookChatLoaderComponent(CustomComponent):
display_name = "FacebookChatLoader"
description = "Load `Facebook Chat` messages directory dump."
documentation = (
"https://python.langchain.com/docs/modules/data_connection/document_loaders/integrations/facebook_chat"
)
def build_config(self):
return {
"file_path": {
"display_name": "File Path",
"required": True,
"file_types": [".json"],
"field_type": "file",
},
"metadata": {
"display_name": "Metadata",
"required": False,
"field_type": "dict",
},
}
def build(self, file_path: str, metadata: Optional[Dict] = None) -> List[Document]:
documents = FacebookChatLoader(path=file_path).load()
if metadata:
for document in documents:
if not document.metadata:
document.metadata = metadata
else:
document.metadata.update(metadata)
return documents

View file

@ -1,32 +0,0 @@
from langflow import CustomComponent
from langflow.field_typing import Document
from typing import List, Optional, Dict
from langchain_community.document_loaders.gitbook import GitbookLoader
class GitbookLoaderComponent(CustomComponent):
display_name = "GitbookLoader"
description = "Load `GitBook` data."
def build_config(self):
return {
"metadata": {
"display_name": "Metadata",
"field_type": "dict",
"value": {},
},
"web_page": {
"display_name": "Web Page",
"required": True,
},
}
def build(self, metadata: Optional[Dict] = None, web_page: str = "") -> List[Document]:
documents = GitbookLoader(web_page=web_page).load()
if metadata:
for document in documents:
if not document.metadata:
document.metadata = metadata
else:
document.metadata.update(metadata)
return documents

View file

@ -1,29 +0,0 @@
from langflow import CustomComponent
from typing import List, Optional, Dict
from langchain_community.document_loaders.hn import HNLoader
from langflow.field_typing import Document
class HNLoaderComponent(CustomComponent):
display_name = "HNLoader"
description = "Load `Hacker News` data."
def build_config(self):
return {
"metadata": {"display_name": "Metadata", "value": {}, "required": False, "field_type": "dict"},
"web_path": {"display_name": "Web Page", "required": True},
}
def build(
self,
web_path: str,
metadata: Optional[Dict] = None,
) -> List[Document]:
documents = HNLoader(web_path=web_path).load()
if metadata:
for document in documents:
if not document.metadata:
document.metadata = metadata
else:
document.metadata.update(metadata)
return documents

View file

@ -1,32 +0,0 @@
from typing import Dict, List, Optional
from langchain_community.document_loaders.ifixit import IFixitLoader
from langflow import CustomComponent
from langflow.field_typing import Document
class IFixitLoaderComponent(CustomComponent):
display_name = "IFixitLoader"
description = "Load `iFixit` repair guides, device wikis and answers."
documentation = "https://python.langchain.com/docs/modules/data_connection/document_loaders/integrations/ifixit"
def build_config(self):
return {
"metadata": {"display_name": "Metadata", "type": "dict", "default": {}},
"web_path": {"display_name": "Web Page", "type": "str"},
}
def build(self, web_path: str, metadata: Optional[Dict] = None) -> List[Document]:
# Assuming IFixitLoader is the correct class name from the langchain library,
# and it has a load method that returns a Document object.
if metadata is None:
metadata = {}
docs = IFixitLoader(web_path=web_path).load()
if metadata:
for doc in docs:
if doc.metadata is None:
doc.metadata = {}
doc.metadata.update(metadata)
return docs

View file

@ -1,30 +0,0 @@
from langflow import CustomComponent
from langflow.field_typing import Document
from langchain_community.document_loaders.imsdb import IMSDbLoader
from typing import Dict, List, Optional
class IMSDbLoaderComponent(CustomComponent):
display_name = "IMSDbLoader"
description = "Load `IMSDb` webpages."
def build_config(self):
return {
"metadata": {"display_name": "Metadata", "field_type": "dict"},
"web_path": {"display_name": "Web Page", "field_type": "str"},
}
def build(
self,
metadata: Optional[Dict] = None,
web_path: str = "",
) -> List[Document]:
documents = IMSDbLoader(web_path=web_path).load()
if metadata:
for document in documents:
if not document.metadata:
document.metadata = metadata
else:
document.metadata.update(metadata)
return documents

View file

@ -1,41 +0,0 @@
from typing import Dict, List, Optional
from langchain_community.document_loaders.pdf import PyPDFLoader
from langchain_core.documents import Document
from langflow import CustomComponent
class PyPDFLoaderComponent(CustomComponent):
display_name = "PyPDFLoader"
description = "Load PDF using pypdf into list of documents"
documentation = "https://python.langchain.com/docs/modules/data_connection/document_loaders/how_to/pdf"
def build_config(self):
return {
"file_path": {
"display_name": "File Path",
"required": True,
"type": "file",
"fileTypes": ["pdf"],
"show": True,
},
"metadata": {
"display_name": "Metadata",
"required": False,
"type": "dict",
"show": True,
},
}
def build(self, file_path: str, metadata: Optional[Dict] = None) -> List[Document]:
# Assuming there is a PyPDFLoader class that takes file_path and metadata as parameters
# and inherits from BaseLoader
docs = PyPDFLoader(file_path=file_path).load()
if metadata:
for doc in docs:
if doc.metadata is None:
doc.metadata = {}
doc.metadata.update(metadata)
return docs

View file

@ -1,29 +0,0 @@
from langflow import CustomComponent
from typing import Dict, Optional, List
from langchain_core.documents import Document
from langchain_community.document_loaders.readthedocs import ReadTheDocsLoader
class ReadTheDocsLoaderComponent(CustomComponent):
display_name = "ReadTheDocsLoader"
description = "Load `ReadTheDocs` documentation directory."
def build_config(self):
return {
"metadata": {"display_name": "Metadata", "default": {}, "field_type": "dict"},
"path": {"display_name": "Local directory", "required": True},
}
def build(
self,
path: str,
metadata: Optional[Dict] = None,
) -> List[Document]:
documents = ReadTheDocsLoader(path=path).load()
if metadata:
for document in documents:
if not document.metadata:
document.metadata = metadata
else:
document.metadata.update(metadata)
return documents

View file

@ -1,27 +0,0 @@
from typing import Dict, Optional
from langchain_core.documents import Document
from langflow import CustomComponent
class SRTLoaderComponent(CustomComponent):
display_name = "SRTLoader"
description = "Load `.srt` (subtitle) files."
documentation = "https://python.langchain.com/docs/modules/data_connection/document_loaders/integrations/subtitle"
def build_config(self):
return {
"file_path": {
"display_name": "File Path",
"required": True,
"fileTypes": ["srt"],
},
"metadata": {
"display_name": "Metadata",
"required": False,
},
}
def build(self, file_path: str, metadata: Optional[Dict] = None) -> Document:
return Document(file_path=file_path, metadata=metadata)

View file

@ -1,32 +0,0 @@
from langflow import CustomComponent
from typing import Optional, Dict, List
from langchain_core.documents import Document
from langchain_community.document_loaders.slack_directory import SlackDirectoryLoader
class SlackDirectoryLoaderComponent(CustomComponent):
display_name = "SlackDirectoryLoader"
description = "Load from a `Slack` directory dump."
documentation = "https://python.langchain.com/docs/modules/data_connection/document_loaders/integrations/slack"
def build_config(self):
return {
"zip_path": {"display_name": "Path to zip file", "field_type": "file", "file_types": [".zip"]},
"metadata": {"display_name": "Metadata", "field_type": "dict"},
"workspace_url": {"display_name": "Workspace URL"},
}
def build(
self,
zip_path: str,
metadata: Optional[Dict] = None,
workspace_url: Optional[str] = None,
) -> List[Document]:
documents = SlackDirectoryLoader(zip_path=zip_path, workspace_url=workspace_url).load()
if metadata:
for document in documents:
if not document.metadata:
document.metadata = metadata
else:
document.metadata.update(metadata)
return documents

View file

@ -1,28 +0,0 @@
from langflow import CustomComponent
from langflow.field_typing import Document
from typing import Optional, Dict
class TextLoaderComponent(CustomComponent):
display_name = "TextLoader"
description = "Load text file."
documentation = "https://python.langchain.com/docs/modules/data_connection/document_loaders/"
def build_config(self):
return {
"file_path": {
"display_name": "File Path",
"required": True,
"type": "file",
"suffixes": [".txt"],
},
"metadata": {
"display_name": "Metadata",
"required": False,
"type": "dict",
"default": {},
},
}
def build(self, file_path: str, metadata: Optional[Dict] = None) -> Document:
return Document(file_path=file_path, metadata=metadata)

View file

@ -1,29 +0,0 @@
from typing import Dict, List, Optional
from langchain import CustomComponent
from langchain_community.document_loaders import UnstructuredHTMLLoader
from langchain_core.documents import Document
class UnstructuredHTMLLoaderComponent(CustomComponent):
display_name = "UnstructuredHTMLLoader"
description = "Load `HTML` files using `Unstructured`."
documentation = "https://python.langchain.com/docs/modules/data_connection/document_loaders/how_to/html"
def build_config(self):
return {
"file_path": {"display_name": "File Path", "type": "file", "fileTypes": ["html"]},
"metadata": {"display_name": "Metadata"},
}
def build(self, file_path: str, metadata: Optional[Dict] = None) -> List[Document]:
# Assuming the existence of a function or class named UnstructuredHTMLLoader that
# loads HTML and creates a Document object; Replace with actual implementation.
docs = UnstructuredHTMLLoader(file_path=file_path).load()
if metadata:
for doc in docs:
if doc.metadata is None:
doc.metadata = {}
doc.metadata.update(metadata)
return docs

View file

@ -1,31 +0,0 @@
from langflow import CustomComponent
from langchain.document_loaders import Document
from typing import Optional, Dict
class UnstructuredPowerPointLoaderComponent(CustomComponent):
display_name = "UnstructuredPowerPointLoader"
description = "Load `Microsoft PowerPoint` files using `Unstructured`."
def build_config(self):
return {
"file_path": {
"display_name": "File Path",
"type": "file",
"fileTypes": ["pptx", "ppt"],
},
"metadata": {
"display_name": "Metadata",
"type": "dict",
},
}
def build(
self,
file_path: str,
metadata: Optional[Dict] = None,
) -> Document:
# Assuming there is a loader class `UnstructuredPowerPointLoader` that takes these parameters
# Since the actual loader class is not provided, this is a placeholder for the actual implementation
loader_class = self.get_loader_class() # Placeholder method to obtain the correct loader class
return loader_class(file_path=file_path, metadata=metadata)

View file

@ -1,25 +0,0 @@
from langflow import CustomComponent
from langflow.field_typing import Document
from typing import Optional, Dict
class UnstructuredWordDocumentLoaderComponent(CustomComponent):
display_name = "UnstructuredWordDocumentLoader"
description = "Load `Microsoft Word` file using `Unstructured`."
documentation = (
"https://python.langchain.com/docs/modules/data_connection/document_loaders/integrations/microsoft_word"
)
def build_config(self):
return {
"file_path": {
"display_name": "File Path",
"required": True,
"type": "file",
"suffixes": [".docx", ".doc"],
},
"metadata": {"display_name": "Metadata", "required": False, "type": "dict"},
}
def build(self, file_path: str, metadata: Optional[Dict] = None) -> Document:
return Document(file_path=file_path, metadata=metadata)