From 7f330177f307a3539a47852f772338c5492e221c Mon Sep 17 00:00:00 2001 From: Ibis Prevedello Date: Wed, 5 Apr 2023 19:40:52 -0300 Subject: [PATCH] feat: add vecstores, docloaders and embedding --- src/backend/langflow/config.yaml | 1 + src/backend/langflow/graph/base.py | 3 +++ src/backend/langflow/graph/graph.py | 12 +++++++++ src/backend/langflow/graph/utils.py | 3 +++ .../interface/documentLoaders/base.py | 8 +++++- .../interface/documentLoaders/custom.py | 27 +++++++++++++++++++ .../langflow/interface/importing/utils.py | 22 +++++++++++++++ src/backend/langflow/interface/loading.py | 8 ++++++ .../langflow/interface/vectorStore/base.py | 2 -- src/frontend/src/utils.ts | 12 ++++----- 10 files changed, 89 insertions(+), 9 deletions(-) create mode 100644 src/backend/langflow/interface/documentLoaders/custom.py diff --git a/src/backend/langflow/config.yaml b/src/backend/langflow/config.yaml index 970e160be..8563b9533 100644 --- a/src/backend/langflow/config.yaml +++ b/src/backend/langflow/config.yaml @@ -45,5 +45,6 @@ vectorstores: documentloaders: - TextLoader + - Text dev: false diff --git a/src/backend/langflow/graph/base.py b/src/backend/langflow/graph/base.py index fa93ed8ed..167f1ea93 100644 --- a/src/backend/langflow/graph/base.py +++ b/src/backend/langflow/graph/base.py @@ -153,6 +153,9 @@ class Node: result = result.run # type: ignore elif hasattr(result, "get_function"): result = result.get_function() # type: ignore + elif key == "Document Loader": + result = result.load() + self.params[key] = result elif isinstance(value, list) and all( isinstance(node, Node) for node in value diff --git a/src/backend/langflow/graph/graph.py b/src/backend/langflow/graph/graph.py index 022deac95..2c86ae90e 100644 --- a/src/backend/langflow/graph/graph.py +++ b/src/backend/langflow/graph/graph.py @@ -10,6 +10,9 @@ from langflow.graph.nodes import ( ToolkitNode, ToolNode, WrapperNode, + EmbeddingNode, + VectorStoreNode, + DocumentLoaderNode, ) from langflow.interface.agents.base import agent_creator from langflow.interface.chains.base import chain_creator @@ -20,6 +23,9 @@ from langflow.interface.tools.base import tool_creator from langflow.interface.tools.constants import FILE_TOOLS from langflow.interface.tools.util import get_tools_dict from langflow.interface.wrappers.base import wrapper_creator +from langflow.interface.embeddings.base import embedding_creator +from langflow.interface.vectorStore.base import vectorstore_creator +from langflow.interface.documentLoaders.base import documentloader_creator from langflow.utils import payload @@ -130,6 +136,12 @@ class Graph: or node_lc_type in llm_creator.to_list() ): nodes.append(LLMNode(node)) + elif node_type in embedding_creator.to_list(): + nodes.append(EmbeddingNode(node)) + elif node_type in vectorstore_creator.to_list(): + nodes.append(VectorStoreNode(node)) + elif node_type in documentloader_creator.to_list(): + nodes.append(DocumentLoaderNode(node)) else: nodes.append(Node(node)) return nodes diff --git a/src/backend/langflow/graph/utils.py b/src/backend/langflow/graph/utils.py index e7dc9e593..2fe047c2d 100644 --- a/src/backend/langflow/graph/utils.py +++ b/src/backend/langflow/graph/utils.py @@ -31,6 +31,9 @@ def load_file(file_name, file_content, accepted_types) -> Any: # Load the csv content csv_reader = csv.DictReader(io.StringIO(decoded_string)) return list(csv_reader) + elif suffix == "txt": + # Return the text content + return decoded_string else: raise ValueError(f"File {file_name} is not accepted") diff --git a/src/backend/langflow/interface/documentLoaders/base.py b/src/backend/langflow/interface/documentLoaders/base.py index b16f31e9f..fd700f27a 100644 --- a/src/backend/langflow/interface/documentLoaders/base.py +++ b/src/backend/langflow/interface/documentLoaders/base.py @@ -4,6 +4,7 @@ from langflow.interface.base import LangChainTypeCreator from langflow.interface.custom_lists import documentloaders_type_to_cls_dict from langflow.settings import settings from langflow.utils.util import build_template_from_class +from langflow.interface.documentLoaders.custom import CUSTOM_DOCUMENTLOADERS class DocumentLoaderCreator(LangChainTypeCreator): @@ -11,7 +12,12 @@ class DocumentLoaderCreator(LangChainTypeCreator): @property def type_to_loader_dict(self) -> Dict: - return documentloaders_type_to_cls_dict + types = documentloaders_type_to_cls_dict + + for name, documentloader in CUSTOM_DOCUMENTLOADERS.items(): + types[name] = documentloader + + return types def get_signature(self, name: str) -> Optional[Dict]: """Get the signature of a document loader.""" diff --git a/src/backend/langflow/interface/documentLoaders/custom.py b/src/backend/langflow/interface/documentLoaders/custom.py new file mode 100644 index 000000000..9e2d40f3e --- /dev/null +++ b/src/backend/langflow/interface/documentLoaders/custom.py @@ -0,0 +1,27 @@ +"""Load text files.""" +from typing import List + +from langchain.docstore.document import Document +from langchain.document_loaders.base import BaseLoader +from langchain.text_splitter import CharacterTextSplitter + + +class Text(BaseLoader): + """Load Text files.""" + + def __init__(self, file: str): + """Initialize with file path.""" + self.file = file + + def load(self) -> List[Document]: + """Load from file path.""" + documents = [Document(page_content=self.file, metadata={"source": "loaded"})] + + text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0) + + return text_splitter.split_documents(documents) + + +CUSTOM_DOCUMENTLOADERS = { + "Text": Text, +} diff --git a/src/backend/langflow/interface/importing/utils.py b/src/backend/langflow/interface/importing/utils.py index 0ada410e4..16073735a 100644 --- a/src/backend/langflow/interface/importing/utils.py +++ b/src/backend/langflow/interface/importing/utils.py @@ -9,6 +9,7 @@ from langchain.chains.base import Chain from langchain.chat_models.base import BaseChatModel from langchain.llms.base import BaseLLM from langchain.tools import BaseTool +from langflow.interface.documentLoaders.custom import CUSTOM_DOCUMENTLOADERS from langflow.interface.tools.util import get_tool_by_name @@ -38,6 +39,9 @@ def import_by_type(_type: str, name: str) -> Any: "toolkits": import_toolkit, "wrappers": import_wrapper, "memory": import_memory, + "embeddings": import_embedding, + "vectorstores": import_vectorstore, + "documentloaders": import_documentloader, } if _type == "llms": key = "chat" if "chat" in name.lower() else "llm" @@ -103,3 +107,21 @@ def import_tool(tool: str) -> BaseTool: def import_chain(chain: str) -> Chain: """Import chain from chain name""" return import_class(f"langchain.chains.{chain}") + + +def import_embedding(embedding: str) -> Any: + """Import embedding from embedding name""" + return import_class(f"langchain.embeddings.{embedding}") + + +def import_vectorstore(vectorstore: str) -> Any: + """Import vectorstore from vectorstore name""" + return import_class(f"langchain.vectorstores.{vectorstore}") + + +def import_documentloader(documentloader: str) -> Any: + """Import documentloader from documentloader name""" + if documentloader in CUSTOM_DOCUMENTLOADERS: + return CUSTOM_DOCUMENTLOADERS[documentloader] + + return import_class(f"langchain.document_loaders.{documentloader}") diff --git a/src/backend/langflow/interface/loading.py b/src/backend/langflow/interface/loading.py index 9a8d28d05..f4b11edf0 100644 --- a/src/backend/langflow/interface/loading.py +++ b/src/backend/langflow/interface/loading.py @@ -57,6 +57,14 @@ def instantiate_class(node_type: str, base_type: str, params: Dict) -> Any: if toolkits_creator.has_create_function(node_type): return load_toolkits_executor(node_type, loaded_toolkit, params) return loaded_toolkit + elif base_type == "embeddings": + params.pop("model") + return class_object(**params) + elif base_type == "vectorstores": + # Rename dict key + params["documents"] = params.pop("Document Loader") + params["embedding"] = params.pop("Embedding") + return class_object.from_documents(**params) else: return class_object(**params) diff --git a/src/backend/langflow/interface/vectorStore/base.py b/src/backend/langflow/interface/vectorStore/base.py index c8b42677f..e04881976 100644 --- a/src/backend/langflow/interface/vectorStore/base.py +++ b/src/backend/langflow/interface/vectorStore/base.py @@ -24,14 +24,12 @@ class VectorstoreCreator(LangChainTypeCreator): "required": True, "show": True, "name": "Document Loader", - "value": "", }, "Embedding": { "type": "Embeddings", "required": True, "show": True, "name": "Embedding", - "value": "", }, } return signature diff --git a/src/frontend/src/utils.ts b/src/frontend/src/utils.ts index dcbbdf833..88a1d9b06 100644 --- a/src/frontend/src/utils.ts +++ b/src/frontend/src/utils.ts @@ -77,8 +77,8 @@ export const nodeColors: {[char: string]: string} = { chat: "#454173", thought:"#272541", embeddings:"#FF9135", - documentloader:"#FF9135", - vectorstore: "#FF9135", + documentloaders:"#FF9135", + vectorstores: "#FF9135", unknown:"#9CA3AF" }; @@ -92,8 +92,8 @@ export const nodeNames:{[char: string]: string} = { advanced: "Advanced", chat: "Chat", embeddings: "Embeddings", - documentloader: "Document Loaders", - vectorstore: "Vector Stores", + documentloaders: "Document Loaders", + vectorstores: "Vector Stores", toolkits:"Toolkits", wrappers:"Wrappers", unknown:"Unknown" @@ -109,8 +109,8 @@ export const nodeIcons:{[char: string]: React.ForwardRefExoticComponent