feat: add embeddings, vectorstores and document loaders to list

This commit is contained in:
Ibis Prevedello 2023-03-30 14:25:23 -03:00
commit a9ff3add92
8 changed files with 287 additions and 21 deletions

View file

@ -26,4 +26,13 @@ tools:
memories:
# - ConversationBufferMemory
embeddings:
#
vectorstores:
#
documentloaders:
#
dev: false

View file

@ -41,3 +41,165 @@ memory_type_to_cls_dict: dict[str, Any] = {
# chain_type_to_cls_dict = type_to_loader_dict
# chain_type_to_cls_dict["conversation_chain"] = ConversationChain
## Embeddings
from langchain.embeddings import (
CohereEmbeddings,
FakeEmbeddings,
HuggingFaceEmbeddings,
HuggingFaceInstructEmbeddings,
HuggingFaceHubEmbeddings,
OpenAIEmbeddings,
# SagemakerEndpointEmbeddings,
TensorflowHubEmbeddings,
SelfHostedHuggingFaceEmbeddings,
SelfHostedHuggingFaceInstructEmbeddings,
SelfHostedEmbeddings,
)
embedding_type_to_cls_dict = {
"OpenAIEmbeddings": OpenAIEmbeddings,
"HuggingFaceEmbeddings": HuggingFaceEmbeddings,
"CohereEmbeddings": CohereEmbeddings,
"HuggingFaceHubEmbeddings": HuggingFaceHubEmbeddings,
"TensorflowHubEmbeddings": TensorflowHubEmbeddings,
# "SagemakerEndpointEmbeddings": SagemakerEndpointEmbeddings,
"HuggingFaceInstructEmbeddings": HuggingFaceInstructEmbeddings,
"SelfHostedEmbeddings": SelfHostedEmbeddings,
"SelfHostedHuggingFaceEmbeddings": SelfHostedHuggingFaceEmbeddings,
"SelfHostedHuggingFaceInstructEmbeddings": SelfHostedHuggingFaceInstructEmbeddings,
"FakeEmbeddings": FakeEmbeddings,
}
## Vector Stores
from langchain.vectorstores import (
ElasticVectorSearch,
FAISS,
VectorStore,
Pinecone,
Weaviate,
Qdrant,
Milvus,
Chroma,
OpenSearchVectorSearch,
AtlasDB,
DeepLake,
)
vectorstores_type_to_cls_dict = {
"ElasticVectorSearch": ElasticVectorSearch,
"FAISS": FAISS,
"VectorStore": VectorStore,
"Pinecone": Pinecone,
"Weaviate": Weaviate,
"Qdrant": Qdrant,
"Milvus": Milvus,
"Chroma": Chroma,
"OpenSearchVectorSearch": OpenSearchVectorSearch,
"AtlasDB": AtlasDB,
"DeepLake": DeepLake,
}
## Document Loaders
from langchain.document_loaders import (
UnstructuredFileLoader,
UnstructuredFileIOLoader,
UnstructuredURLLoader,
DirectoryLoader,
NotionDirectoryLoader,
ReadTheDocsLoader,
GoogleDriveLoader,
UnstructuredHTMLLoader,
# BSHTMLLoader,
UnstructuredPowerPointLoader,
UnstructuredWordDocumentLoader,
UnstructuredPDFLoader,
UnstructuredImageLoader,
ObsidianLoader,
UnstructuredEmailLoader,
UnstructuredMarkdownLoader,
RoamLoader,
YoutubeLoader,
S3FileLoader,
TextLoader,
HNLoader,
GitbookLoader,
S3DirectoryLoader,
GCSFileLoader,
GCSDirectoryLoader,
WebBaseLoader,
IMSDbLoader,
AZLyricsLoader,
CollegeConfidentialLoader,
IFixitLoader,
GutenbergLoader,
PagedPDFSplitter,
PyPDFLoader,
EverNoteLoader,
AirbyteJSONLoader,
OnlinePDFLoader,
PDFMinerLoader,
PyMuPDFLoader,
TelegramChatLoader,
SRTLoader,
FacebookChatLoader,
NotebookLoader,
CoNLLULoader,
GoogleApiYoutubeLoader,
GoogleApiClient,
CSVLoader,
# BlackboardLoader
)
documentloaders_type_to_cls_dict = {
"UnstructuredFileLoader": UnstructuredFileLoader,
"UnstructuredFileIOLoader": UnstructuredFileIOLoader,
"UnstructuredURLLoader": UnstructuredURLLoader,
"DirectoryLoader": DirectoryLoader,
"NotionDirectoryLoader": NotionDirectoryLoader,
"ReadTheDocsLoader": ReadTheDocsLoader,
"GoogleDriveLoader": GoogleDriveLoader,
"UnstructuredHTMLLoader": UnstructuredHTMLLoader,
# "BSHTMLLoader": BSHTMLLoader,
"UnstructuredPowerPointLoader": UnstructuredPowerPointLoader,
"UnstructuredWordDocumentLoader": UnstructuredWordDocumentLoader,
"UnstructuredPDFLoader": UnstructuredPDFLoader,
"UnstructuredImageLoader": UnstructuredImageLoader,
"ObsidianLoader": ObsidianLoader,
"UnstructuredEmailLoader": UnstructuredEmailLoader,
"UnstructuredMarkdownLoader": UnstructuredMarkdownLoader,
"RoamLoader": RoamLoader,
"YoutubeLoader": YoutubeLoader,
"S3FileLoader": S3FileLoader,
"TextLoader": TextLoader,
"HNLoader": HNLoader,
"GitbookLoader": GitbookLoader,
"S3DirectoryLoader": S3DirectoryLoader,
"GCSFileLoader": GCSFileLoader,
"GCSDirectoryLoader": GCSDirectoryLoader,
"WebBaseLoader": WebBaseLoader,
"IMSDbLoader": IMSDbLoader,
"AZLyricsLoader": AZLyricsLoader,
"CollegeConfidentialLoader": CollegeConfidentialLoader,
"IFixitLoader": IFixitLoader,
"GutenbergLoader": GutenbergLoader,
"PagedPDFSplitter": PagedPDFSplitter,
"PyPDFLoader": PyPDFLoader,
"EverNoteLoader": EverNoteLoader,
"AirbyteJSONLoader": AirbyteJSONLoader,
"OnlinePDFLoader": OnlinePDFLoader,
"PDFMinerLoader": PDFMinerLoader,
"PyMuPDFLoader": PyMuPDFLoader,
"TelegramChatLoader": TelegramChatLoader,
"SRTLoader": SRTLoader,
"FacebookChatLoader": FacebookChatLoader,
"NotebookLoader": NotebookLoader,
"CoNLLULoader": CoNLLULoader,
"GoogleApiYoutubeLoader": GoogleApiYoutubeLoader,
"GoogleApiClient": GoogleApiClient,
"CSVLoader": CSVLoader,
# "BlackboardLoader",
}

View file

@ -0,0 +1,27 @@
from langflow.interface.custom_lists import documentloaders_type_to_cls_dict
from langflow.settings import settings
from langflow.interface.base import LangChainTypeCreator
from langflow.utils.util import build_template_from_class
from typing import Dict, List
class DocumentLoaderCreator(LangChainTypeCreator):
type_name: str = "documentloader"
@property
def type_to_loader_dict(self) -> Dict:
return documentloaders_type_to_cls_dict
def get_signature(self, name: str) -> Dict | None:
"""Get the signature of a document loader."""
try:
return build_template_from_class(name, documentloaders_type_to_cls_dict)
except ValueError as exc:
raise ValueError(f"Documment Loader {name} not found") from exc
def to_list(self) -> List[str]:
return [
documentloader.__name__
for documentloader in self.type_to_loader_dict.values()
if documentloader.__name__ in settings.documentloaders or settings.dev
]

View file

@ -0,0 +1,27 @@
from langflow.interface.custom_lists import embedding_type_to_cls_dict
from langflow.settings import settings
from langflow.interface.base import LangChainTypeCreator
from langflow.utils.util import build_template_from_class
from typing import Dict, List
class EmbeddingCreator(LangChainTypeCreator):
type_name: str = "embeddings"
@property
def type_to_loader_dict(self) -> Dict:
return embedding_type_to_cls_dict
def get_signature(self, name: str) -> Dict | None:
"""Get the signature of an embedding."""
try:
return build_template_from_class(name, embedding_type_to_cls_dict)
except ValueError as exc:
raise ValueError(f"Embedding {name} not found") from exc
def to_list(self) -> List[str]:
return [
embedding.__name__
for embedding in self.type_to_loader_dict.values()
if embedding.__name__ in settings.embeddings or settings.dev
]

View file

@ -4,6 +4,9 @@ from langflow.interface.llms import LLMCreator
from langflow.interface.memories import MemoryCreator
from langflow.interface.prompts import PromptCreator
from langflow.interface.signature import get_signature
from langflow.interface.embeddings import EmbeddingCreator
from langflow.interface.vectorstore import VectorstoreCreator
from langflow.interface.documentloaders import DocumentLoaderCreator
from langchain import chains
from langflow.interface.chains import ChainCreator
from langflow.interface.tools import ToolCreator
@ -29,6 +32,9 @@ def build_langchain_types_dict():
tool_creator = ToolCreator()
llm_creator = LLMCreator()
memory_creator = MemoryCreator()
embedding_creator = EmbeddingCreator()
vectorstore_creator = VectorstoreCreator()
documentloader_creator = DocumentLoaderCreator()
all_types = {}
@ -39,6 +45,9 @@ def build_langchain_types_dict():
llm_creator,
memory_creator,
tool_creator,
embedding_creator,
vectorstore_creator,
documentloader_creator,
]
all_types = {}

View file

@ -0,0 +1,27 @@
from langflow.interface.custom_lists import vectorstores_type_to_cls_dict
from langflow.settings import settings
from langflow.interface.base import LangChainTypeCreator
from langflow.utils.util import build_template_from_class
from typing import Dict, List
class VectorstoreCreator(LangChainTypeCreator):
type_name: str = "vectorstore"
@property
def type_to_loader_dict(self) -> Dict:
return vectorstores_type_to_cls_dict
def get_signature(self, name: str) -> Dict | None:
"""Get the signature of an embedding."""
try:
return build_template_from_class(name, vectorstores_type_to_cls_dict)
except ValueError as exc:
raise ValueError(f"Vector Store {name} not found") from exc
def to_list(self) -> List[str]:
return [
vectorstore
for vectorstore in self.type_to_loader_dict.keys()
if vectorstore in settings.vectorstores or settings.dev
]

View file

@ -12,6 +12,9 @@ class Settings(BaseSettings):
llms: Optional[List[str]] = Field(...)
tools: Optional[List[str]] = Field(...)
memories: Optional[List[str]] = Field(...)
embeddings: Optional[List[str]] = Field(...)
vectorstores: Optional[List[str]] = Field(...)
documentloaders: Optional[List[str]] = Field(...)
dev: bool = Field(...)
class Config:

View file

@ -88,28 +88,30 @@ def build_template_from_class(
docs = get_class_doc(_class)
variables = {"_type": _type}
for class_field_items, value in _class.__fields__.items():
if class_field_items in ["callback_manager"]:
continue
variables[class_field_items] = {}
for name_, value_ in value.__repr_args__():
if name_ == "default_factory":
try:
variables[class_field_items][
"default"
] = get_default_factory(
module=_class.__base__.__module__, function=value_
)
except Exception:
variables[class_field_items]["default"] = None
elif name_ not in ["name"]:
variables[class_field_items][name_] = value_
variables[class_field_items]["placeholder"] = (
docs["Attributes"][class_field_items]
if class_field_items in docs["Attributes"]
else ""
)
if "__fields__" in _class.__dict__:
for class_field_items, value in _class.__fields__.items():
if class_field_items in ["callback_manager"]:
continue
variables[class_field_items] = {}
for name_, value_ in value.__repr_args__():
if name_ == "default_factory":
try:
variables[class_field_items][
"default"
] = get_default_factory(
module=_class.__base__.__module__, function=value_
)
except Exception:
variables[class_field_items]["default"] = None
elif name_ not in ["name"]:
variables[class_field_items][name_] = value_
variables[class_field_items]["placeholder"] = (
docs["Attributes"][class_field_items]
if class_field_items in docs["Attributes"]
else ""
)
base_classes = get_base_classes(_class)
# Adding function to base classes to allow
# the output to be a function