From a9ff3add92b558da7a98e3de7a29c1b16d512ce8 Mon Sep 17 00:00:00 2001 From: Ibis Prevedello Date: Thu, 30 Mar 2023 14:25:23 -0300 Subject: [PATCH] feat: add embeddings, vectorstores and document loaders to list --- src/backend/langflow/config.yaml | 9 + .../langflow/interface/custom_lists.py | 162 ++++++++++++++++++ .../langflow/interface/documentloaders.py | 27 +++ src/backend/langflow/interface/embeddings.py | 27 +++ src/backend/langflow/interface/types.py | 9 + src/backend/langflow/interface/vectorstore.py | 27 +++ src/backend/langflow/settings.py | 3 + src/backend/langflow/utils/util.py | 44 ++--- 8 files changed, 287 insertions(+), 21 deletions(-) create mode 100644 src/backend/langflow/interface/documentloaders.py create mode 100644 src/backend/langflow/interface/embeddings.py create mode 100644 src/backend/langflow/interface/vectorstore.py diff --git a/src/backend/langflow/config.yaml b/src/backend/langflow/config.yaml index 301d89a34..602c89925 100644 --- a/src/backend/langflow/config.yaml +++ b/src/backend/langflow/config.yaml @@ -26,4 +26,13 @@ tools: memories: # - ConversationBufferMemory +embeddings: + # + +vectorstores: + # + +documentloaders: + # + dev: false diff --git a/src/backend/langflow/interface/custom_lists.py b/src/backend/langflow/interface/custom_lists.py index 3e8a800f9..7e79c8543 100644 --- a/src/backend/langflow/interface/custom_lists.py +++ b/src/backend/langflow/interface/custom_lists.py @@ -41,3 +41,165 @@ memory_type_to_cls_dict: dict[str, Any] = { # chain_type_to_cls_dict = type_to_loader_dict # chain_type_to_cls_dict["conversation_chain"] = ConversationChain + + +## Embeddings +from langchain.embeddings import ( + CohereEmbeddings, + FakeEmbeddings, + HuggingFaceEmbeddings, + HuggingFaceInstructEmbeddings, + HuggingFaceHubEmbeddings, + OpenAIEmbeddings, + # SagemakerEndpointEmbeddings, + TensorflowHubEmbeddings, + SelfHostedHuggingFaceEmbeddings, + SelfHostedHuggingFaceInstructEmbeddings, + SelfHostedEmbeddings, +) + +embedding_type_to_cls_dict = { + "OpenAIEmbeddings": OpenAIEmbeddings, + "HuggingFaceEmbeddings": HuggingFaceEmbeddings, + "CohereEmbeddings": CohereEmbeddings, + "HuggingFaceHubEmbeddings": HuggingFaceHubEmbeddings, + "TensorflowHubEmbeddings": TensorflowHubEmbeddings, + # "SagemakerEndpointEmbeddings": SagemakerEndpointEmbeddings, + "HuggingFaceInstructEmbeddings": HuggingFaceInstructEmbeddings, + "SelfHostedEmbeddings": SelfHostedEmbeddings, + "SelfHostedHuggingFaceEmbeddings": SelfHostedHuggingFaceEmbeddings, + "SelfHostedHuggingFaceInstructEmbeddings": SelfHostedHuggingFaceInstructEmbeddings, + "FakeEmbeddings": FakeEmbeddings, +} + +## Vector Stores +from langchain.vectorstores import ( + ElasticVectorSearch, + FAISS, + VectorStore, + Pinecone, + Weaviate, + Qdrant, + Milvus, + Chroma, + OpenSearchVectorSearch, + AtlasDB, + DeepLake, +) + +vectorstores_type_to_cls_dict = { + "ElasticVectorSearch": ElasticVectorSearch, + "FAISS": FAISS, + "VectorStore": VectorStore, + "Pinecone": Pinecone, + "Weaviate": Weaviate, + "Qdrant": Qdrant, + "Milvus": Milvus, + "Chroma": Chroma, + "OpenSearchVectorSearch": OpenSearchVectorSearch, + "AtlasDB": AtlasDB, + "DeepLake": DeepLake, +} + +## Document Loaders + +from langchain.document_loaders import ( + UnstructuredFileLoader, + UnstructuredFileIOLoader, + UnstructuredURLLoader, + DirectoryLoader, + NotionDirectoryLoader, + ReadTheDocsLoader, + GoogleDriveLoader, + UnstructuredHTMLLoader, + # BSHTMLLoader, + UnstructuredPowerPointLoader, + UnstructuredWordDocumentLoader, + UnstructuredPDFLoader, + UnstructuredImageLoader, + ObsidianLoader, + UnstructuredEmailLoader, + UnstructuredMarkdownLoader, + RoamLoader, + YoutubeLoader, + S3FileLoader, + TextLoader, + HNLoader, + GitbookLoader, + S3DirectoryLoader, + GCSFileLoader, + GCSDirectoryLoader, + WebBaseLoader, + IMSDbLoader, + AZLyricsLoader, + CollegeConfidentialLoader, + IFixitLoader, + GutenbergLoader, + PagedPDFSplitter, + PyPDFLoader, + EverNoteLoader, + AirbyteJSONLoader, + OnlinePDFLoader, + PDFMinerLoader, + PyMuPDFLoader, + TelegramChatLoader, + SRTLoader, + FacebookChatLoader, + NotebookLoader, + CoNLLULoader, + GoogleApiYoutubeLoader, + GoogleApiClient, + CSVLoader, + # BlackboardLoader +) + + +documentloaders_type_to_cls_dict = { + "UnstructuredFileLoader": UnstructuredFileLoader, + "UnstructuredFileIOLoader": UnstructuredFileIOLoader, + "UnstructuredURLLoader": UnstructuredURLLoader, + "DirectoryLoader": DirectoryLoader, + "NotionDirectoryLoader": NotionDirectoryLoader, + "ReadTheDocsLoader": ReadTheDocsLoader, + "GoogleDriveLoader": GoogleDriveLoader, + "UnstructuredHTMLLoader": UnstructuredHTMLLoader, + # "BSHTMLLoader": BSHTMLLoader, + "UnstructuredPowerPointLoader": UnstructuredPowerPointLoader, + "UnstructuredWordDocumentLoader": UnstructuredWordDocumentLoader, + "UnstructuredPDFLoader": UnstructuredPDFLoader, + "UnstructuredImageLoader": UnstructuredImageLoader, + "ObsidianLoader": ObsidianLoader, + "UnstructuredEmailLoader": UnstructuredEmailLoader, + "UnstructuredMarkdownLoader": UnstructuredMarkdownLoader, + "RoamLoader": RoamLoader, + "YoutubeLoader": YoutubeLoader, + "S3FileLoader": S3FileLoader, + "TextLoader": TextLoader, + "HNLoader": HNLoader, + "GitbookLoader": GitbookLoader, + "S3DirectoryLoader": S3DirectoryLoader, + "GCSFileLoader": GCSFileLoader, + "GCSDirectoryLoader": GCSDirectoryLoader, + "WebBaseLoader": WebBaseLoader, + "IMSDbLoader": IMSDbLoader, + "AZLyricsLoader": AZLyricsLoader, + "CollegeConfidentialLoader": CollegeConfidentialLoader, + "IFixitLoader": IFixitLoader, + "GutenbergLoader": GutenbergLoader, + "PagedPDFSplitter": PagedPDFSplitter, + "PyPDFLoader": PyPDFLoader, + "EverNoteLoader": EverNoteLoader, + "AirbyteJSONLoader": AirbyteJSONLoader, + "OnlinePDFLoader": OnlinePDFLoader, + "PDFMinerLoader": PDFMinerLoader, + "PyMuPDFLoader": PyMuPDFLoader, + "TelegramChatLoader": TelegramChatLoader, + "SRTLoader": SRTLoader, + "FacebookChatLoader": FacebookChatLoader, + "NotebookLoader": NotebookLoader, + "CoNLLULoader": CoNLLULoader, + "GoogleApiYoutubeLoader": GoogleApiYoutubeLoader, + "GoogleApiClient": GoogleApiClient, + "CSVLoader": CSVLoader, + # "BlackboardLoader", +} diff --git a/src/backend/langflow/interface/documentloaders.py b/src/backend/langflow/interface/documentloaders.py new file mode 100644 index 000000000..ce4dc79ae --- /dev/null +++ b/src/backend/langflow/interface/documentloaders.py @@ -0,0 +1,27 @@ +from langflow.interface.custom_lists import documentloaders_type_to_cls_dict +from langflow.settings import settings +from langflow.interface.base import LangChainTypeCreator +from langflow.utils.util import build_template_from_class +from typing import Dict, List + + +class DocumentLoaderCreator(LangChainTypeCreator): + type_name: str = "documentloader" + + @property + def type_to_loader_dict(self) -> Dict: + return documentloaders_type_to_cls_dict + + def get_signature(self, name: str) -> Dict | None: + """Get the signature of a document loader.""" + try: + return build_template_from_class(name, documentloaders_type_to_cls_dict) + except ValueError as exc: + raise ValueError(f"Documment Loader {name} not found") from exc + + def to_list(self) -> List[str]: + return [ + documentloader.__name__ + for documentloader in self.type_to_loader_dict.values() + if documentloader.__name__ in settings.documentloaders or settings.dev + ] diff --git a/src/backend/langflow/interface/embeddings.py b/src/backend/langflow/interface/embeddings.py new file mode 100644 index 000000000..61130305d --- /dev/null +++ b/src/backend/langflow/interface/embeddings.py @@ -0,0 +1,27 @@ +from langflow.interface.custom_lists import embedding_type_to_cls_dict +from langflow.settings import settings +from langflow.interface.base import LangChainTypeCreator +from langflow.utils.util import build_template_from_class +from typing import Dict, List + + +class EmbeddingCreator(LangChainTypeCreator): + type_name: str = "embeddings" + + @property + def type_to_loader_dict(self) -> Dict: + return embedding_type_to_cls_dict + + def get_signature(self, name: str) -> Dict | None: + """Get the signature of an embedding.""" + try: + return build_template_from_class(name, embedding_type_to_cls_dict) + except ValueError as exc: + raise ValueError(f"Embedding {name} not found") from exc + + def to_list(self) -> List[str]: + return [ + embedding.__name__ + for embedding in self.type_to_loader_dict.values() + if embedding.__name__ in settings.embeddings or settings.dev + ] diff --git a/src/backend/langflow/interface/types.py b/src/backend/langflow/interface/types.py index 726c2b01d..8f1f6da76 100644 --- a/src/backend/langflow/interface/types.py +++ b/src/backend/langflow/interface/types.py @@ -4,6 +4,9 @@ from langflow.interface.llms import LLMCreator from langflow.interface.memories import MemoryCreator from langflow.interface.prompts import PromptCreator from langflow.interface.signature import get_signature +from langflow.interface.embeddings import EmbeddingCreator +from langflow.interface.vectorstore import VectorstoreCreator +from langflow.interface.documentloaders import DocumentLoaderCreator from langchain import chains from langflow.interface.chains import ChainCreator from langflow.interface.tools import ToolCreator @@ -29,6 +32,9 @@ def build_langchain_types_dict(): tool_creator = ToolCreator() llm_creator = LLMCreator() memory_creator = MemoryCreator() + embedding_creator = EmbeddingCreator() + vectorstore_creator = VectorstoreCreator() + documentloader_creator = DocumentLoaderCreator() all_types = {} @@ -39,6 +45,9 @@ def build_langchain_types_dict(): llm_creator, memory_creator, tool_creator, + embedding_creator, + vectorstore_creator, + documentloader_creator, ] all_types = {} diff --git a/src/backend/langflow/interface/vectorstore.py b/src/backend/langflow/interface/vectorstore.py new file mode 100644 index 000000000..f97674ca4 --- /dev/null +++ b/src/backend/langflow/interface/vectorstore.py @@ -0,0 +1,27 @@ +from langflow.interface.custom_lists import vectorstores_type_to_cls_dict +from langflow.settings import settings +from langflow.interface.base import LangChainTypeCreator +from langflow.utils.util import build_template_from_class +from typing import Dict, List + + +class VectorstoreCreator(LangChainTypeCreator): + type_name: str = "vectorstore" + + @property + def type_to_loader_dict(self) -> Dict: + return vectorstores_type_to_cls_dict + + def get_signature(self, name: str) -> Dict | None: + """Get the signature of an embedding.""" + try: + return build_template_from_class(name, vectorstores_type_to_cls_dict) + except ValueError as exc: + raise ValueError(f"Vector Store {name} not found") from exc + + def to_list(self) -> List[str]: + return [ + vectorstore + for vectorstore in self.type_to_loader_dict.keys() + if vectorstore in settings.vectorstores or settings.dev + ] diff --git a/src/backend/langflow/settings.py b/src/backend/langflow/settings.py index f4dd4ae30..6c5c170eb 100644 --- a/src/backend/langflow/settings.py +++ b/src/backend/langflow/settings.py @@ -12,6 +12,9 @@ class Settings(BaseSettings): llms: Optional[List[str]] = Field(...) tools: Optional[List[str]] = Field(...) memories: Optional[List[str]] = Field(...) + embeddings: Optional[List[str]] = Field(...) + vectorstores: Optional[List[str]] = Field(...) + documentloaders: Optional[List[str]] = Field(...) dev: bool = Field(...) class Config: diff --git a/src/backend/langflow/utils/util.py b/src/backend/langflow/utils/util.py index 664630890..3717bcba6 100644 --- a/src/backend/langflow/utils/util.py +++ b/src/backend/langflow/utils/util.py @@ -88,28 +88,30 @@ def build_template_from_class( docs = get_class_doc(_class) variables = {"_type": _type} - for class_field_items, value in _class.__fields__.items(): - if class_field_items in ["callback_manager"]: - continue - variables[class_field_items] = {} - for name_, value_ in value.__repr_args__(): - if name_ == "default_factory": - try: - variables[class_field_items][ - "default" - ] = get_default_factory( - module=_class.__base__.__module__, function=value_ - ) - except Exception: - variables[class_field_items]["default"] = None - elif name_ not in ["name"]: - variables[class_field_items][name_] = value_ - variables[class_field_items]["placeholder"] = ( - docs["Attributes"][class_field_items] - if class_field_items in docs["Attributes"] - else "" - ) + if "__fields__" in _class.__dict__: + for class_field_items, value in _class.__fields__.items(): + if class_field_items in ["callback_manager"]: + continue + variables[class_field_items] = {} + for name_, value_ in value.__repr_args__(): + if name_ == "default_factory": + try: + variables[class_field_items][ + "default" + ] = get_default_factory( + module=_class.__base__.__module__, function=value_ + ) + except Exception: + variables[class_field_items]["default"] = None + elif name_ not in ["name"]: + variables[class_field_items][name_] = value_ + + variables[class_field_items]["placeholder"] = ( + docs["Attributes"][class_field_items] + if class_field_items in docs["Attributes"] + else "" + ) base_classes = get_base_classes(_class) # Adding function to base classes to allow # the output to be a function