From 380aba22deb5a7466b3f1f27514fd3bd9f5072a0 Mon Sep 17 00:00:00 2001 From: Ibis Prevedello Date: Fri, 7 Apr 2023 13:01:03 -0300 Subject: [PATCH] feat: add multi vectore stores --- src/backend/langflow/config.yaml | 5 +- src/backend/langflow/custom/customs.py | 1 + src/backend/langflow/graph/base.py | 11 ++- src/backend/langflow/graph/graph.py | 14 ++-- src/backend/langflow/graph/nodes.py | 25 +----- .../langflow/interface/agents/custom.py | 78 ++++++++++++++----- .../interface/documentLoaders/base.py | 34 +++++--- .../interface/documentLoaders/custom.py | 19 ++++- .../langflow/interface/importing/utils.py | 2 +- src/backend/langflow/interface/loading.py | 3 - src/backend/langflow/interface/run.py | 4 +- .../langflow/interface/toolkits/custom.py | 0 .../langflow/interface/tools/custom.py | 8 +- .../langflow/interface/vectorStore/base.py | 10 ++- src/backend/langflow/template/nodes.py | 50 +++++++----- 15 files changed, 164 insertions(+), 100 deletions(-) create mode 100644 src/backend/langflow/interface/toolkits/custom.py diff --git a/src/backend/langflow/config.yaml b/src/backend/langflow/config.yaml index fc7958193..8036dba25 100644 --- a/src/backend/langflow/config.yaml +++ b/src/backend/langflow/config.yaml @@ -13,6 +13,7 @@ agents: - CSVAgent - initialize_agent - VectorStoreAgent + - VectorStoreRouterAgent prompts: - PromptTemplate @@ -43,6 +44,8 @@ wrappers: toolkits: - OpenAPIToolkit - JsonToolkit + - VectorStoreInfo + - VectorStoreRouterToolkit memories: - ConversationBufferMemory @@ -57,6 +60,6 @@ vectorstores: documentloaders: - TextLoader - - Text + - WebBaseLoader dev: false diff --git a/src/backend/langflow/custom/customs.py b/src/backend/langflow/custom/customs.py index b5ba47e83..000bf890b 100644 --- a/src/backend/langflow/custom/customs.py +++ b/src/backend/langflow/custom/customs.py @@ -9,6 +9,7 @@ CUSTOM_NODES = { "CSVAgent": nodes.CSVAgentNode(), "initialize_agent": nodes.InitializeAgentNode(), "VectorStoreAgent": nodes.VectorStoreAgentNode(), + "VectorStoreRouterAgent": nodes.VectorStoreRouterAgentNode(), }, } diff --git a/src/backend/langflow/graph/base.py b/src/backend/langflow/graph/base.py index 0b89e2f0a..57bf797eb 100644 --- a/src/backend/langflow/graph/base.py +++ b/src/backend/langflow/graph/base.py @@ -153,7 +153,7 @@ class Node: result = result.run # type: ignore elif hasattr(result, "get_function"): result = result.get_function() # type: ignore - elif key == "Document Loader": + elif value.base_type == "documentloaders": result = result.load() self.params[key] = result @@ -185,9 +185,14 @@ class Node: def build(self, force: bool = False) -> Any: if not self._built or force: self._build() - + #! Deepcopy is breaking for vectorstores - if self.base_type == 'vectorstores': + if self.base_type in [ + "vectorstores", + "VectorStoreRouterAgent", + "VectorStoreAgent", + "VectorStoreInfo", + ] or self.node_type in ["VectorStoreInfo", "VectorStoreRouterToolkit"]: return self._built_object return deepcopy(self._built_object) diff --git a/src/backend/langflow/graph/graph.py b/src/backend/langflow/graph/graph.py index a58f5e4db..9cbeb94a3 100644 --- a/src/backend/langflow/graph/graph.py +++ b/src/backend/langflow/graph/graph.py @@ -4,30 +4,30 @@ from langflow.graph.base import Edge, Node from langflow.graph.nodes import ( AgentNode, ChainNode, + DocumentLoaderNode, + EmbeddingNode, FileToolNode, LLMNode, MemoryNode, PromptNode, ToolkitNode, ToolNode, - WrapperNode, - EmbeddingNode, VectorStoreNode, - DocumentLoaderNode, + WrapperNode, ) from langflow.interface.agents.base import agent_creator from langflow.interface.chains.base import chain_creator +from langflow.interface.documentLoaders.base import documentloader_creator +from langflow.interface.embeddings.base import embedding_creator from langflow.interface.llms.base import llm_creator +from langflow.interface.memories.base import memory_creator from langflow.interface.prompts.base import prompt_creator from langflow.interface.toolkits.base import toolkits_creator from langflow.interface.tools.base import tool_creator from langflow.interface.tools.constants import FILE_TOOLS from langflow.interface.tools.util import get_tools_dict -from langflow.interface.wrappers.base import wrapper_creator -from langflow.interface.embeddings.base import embedding_creator from langflow.interface.vectorStore.base import vectorstore_creator -from langflow.interface.documentLoaders.base import documentloader_creator -from langflow.interface.memories.base import memory_creator +from langflow.interface.wrappers.base import wrapper_creator from langflow.utils import payload diff --git a/src/backend/langflow/graph/nodes.py b/src/backend/langflow/graph/nodes.py index 3e1090148..0d90fe333 100644 --- a/src/backend/langflow/graph/nodes.py +++ b/src/backend/langflow/graph/nodes.py @@ -34,7 +34,7 @@ class AgentNode(Node): self._build() #! Cannot deepcopy VectorStore - if self.node_type == "VectorStoreAgent": + if self.node_type in ["VectorStoreAgent", "VectorStoreRouterAgent"]: return self._built_object return deepcopy(self._built_object) @@ -43,11 +43,6 @@ class ToolNode(Node): def __init__(self, data: Dict): super().__init__(data, base_type="tools") - def build(self, force: bool = False) -> Any: - if not self._built or force: - self._build() - return deepcopy(self._built_object) - class PromptNode(Node): def __init__(self, data: Dict): @@ -111,32 +106,16 @@ class LLMNode(Node): def __init__(self, data: Dict): super().__init__(data, base_type="llms") - def build(self, force: bool = False) -> Any: - if not self._built or force: - self._build() - return deepcopy(self._built_object) - class ToolkitNode(Node): def __init__(self, data: Dict): super().__init__(data, base_type="toolkits") - def build(self, force: bool = False) -> Any: - if not self._built or force: - self._build() - - return deepcopy(self._built_object) - class FileToolNode(ToolNode): def __init__(self, data: Dict): super().__init__(data) - def build(self, force: bool = False) -> Any: - if not self._built or force: - self._build() - return deepcopy(self._built_object) - class WrapperNode(Node): def __init__(self, data: Dict): @@ -155,7 +134,6 @@ class DocumentLoaderNode(Node): super().__init__(data, base_type="documentloaders") - class EmbeddingNode(Node): def __init__(self, data: Dict): super().__init__(data, base_type="embeddings") @@ -169,4 +147,3 @@ class VectorStoreNode(Node): class MemoryNode(Node): def __init__(self, data: Dict): super().__init__(data, base_type="memory") - diff --git a/src/backend/langflow/interface/agents/custom.py b/src/backend/langflow/interface/agents/custom.py index 316e9b151..f06c11562 100644 --- a/src/backend/langflow/interface/agents/custom.py +++ b/src/backend/langflow/interface/agents/custom.py @@ -2,21 +2,27 @@ from typing import Any, List, Optional from langchain import LLMChain from langchain.agents import AgentExecutor, Tool, ZeroShotAgent, initialize_agent +from langchain.agents.agent_toolkits import ( + VectorStoreInfo, + VectorStoreRouterToolkit, + VectorStoreToolkit, +) from langchain.agents.agent_toolkits.json.prompt import JSON_PREFIX, JSON_SUFFIX from langchain.agents.agent_toolkits.json.toolkit import JsonToolkit from langchain.agents.agent_toolkits.pandas.prompt import PREFIX as PANDAS_PREFIX from langchain.agents.agent_toolkits.pandas.prompt import SUFFIX as PANDAS_SUFFIX +from langchain.agents.agent_toolkits.vectorstore.prompt import ( + PREFIX as VECTORSTORE_PREFIX, +) +from langchain.agents.agent_toolkits.vectorstore.prompt import ( + ROUTER_PREFIX as VECTORSTORE_ROUTER_PREFIX, +) from langchain.agents.mrkl.prompt import FORMAT_INSTRUCTIONS from langchain.llms.base import BaseLLM from langchain.memory.chat_memory import BaseChatMemory from langchain.schema import BaseLanguageModel from langchain.tools.python.tool import PythonAstREPLTool -from langchain.agents.agent_toolkits import ( - VectorStoreToolkit, - VectorStoreInfo, -) -from langchain.vectorstores.base import VectorStore -from langchain.agents.agent_toolkits.vectorstore.prompt import PREFIX as VECTORSTORE_PREFIX, ROUTER_PREFIX as VECTORSTORE_ROUTER_PREFIX + class JsonAgent(AgentExecutor): """Json agent""" @@ -118,33 +124,62 @@ class VectorStoreAgent(AgentExecutor): @classmethod def from_toolkit_and_llm( - cls, - llm: BaseLanguageModel, - name: str, - description: str, - vectorstore: VectorStore, - **kwargs: Any + cls, llm: BaseLLM, vectorstoreinfo: VectorStoreInfo, **kwargs: Any ): """Construct a vectorstore agent from an LLM and tools.""" - vectorstore_info = VectorStoreInfo( - name=name, - description=description, - vectorstore=vectorstore - ) - toolkit = VectorStoreToolkit(vectorstore_info=vectorstore_info, llm=llm) - + toolkit = VectorStoreToolkit(vectorstore_info=vectorstoreinfo, llm=llm) tools = toolkit.get_tools() prompt = ZeroShotAgent.create_prompt(tools, prefix=VECTORSTORE_PREFIX) llm_chain = LLMChain( llm=llm, prompt=prompt, - callback_manager=None, ) tool_names = [tool.name for tool in tools] agent = ZeroShotAgent(llm_chain=llm_chain, allowed_tools=tool_names, **kwargs) - return AgentExecutor.from_agent_and_tools(agent=agent, tools=tools, verbose=True) + return AgentExecutor.from_agent_and_tools( + agent=agent, tools=tools, verbose=True + ) + + def run(self, *args, **kwargs): + return super().run(*args, **kwargs) + + +class VectorStoreRouterAgent(AgentExecutor): + """Vector Store Router Agent""" + + @staticmethod + def function_name(): + return "VectorStoreRouterAgent" + + @classmethod + def initialize(cls, *args, **kwargs): + return cls.from_toolkit_and_llm(*args, **kwargs) + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + @classmethod + def from_toolkit_and_llm( + cls, + llm: BaseLanguageModel, + vectorstoreroutertoolkit: VectorStoreRouterToolkit, + **kwargs: Any + ): + """Construct a vector store router agent from an LLM and tools.""" + + tools = vectorstoreroutertoolkit.get_tools() + prompt = ZeroShotAgent.create_prompt(tools, prefix=VECTORSTORE_ROUTER_PREFIX) + llm_chain = LLMChain( + llm=llm, + prompt=prompt, + ) + tool_names = [tool.name for tool in tools] + agent = ZeroShotAgent(llm_chain=llm_chain, allowed_tools=tool_names, **kwargs) + return AgentExecutor.from_agent_and_tools( + agent=agent, tools=tools, verbose=True + ) def run(self, *args, **kwargs): return super().run(*args, **kwargs) @@ -182,4 +217,5 @@ CUSTOM_AGENTS = { "CSVAgent": CSVAgent, "initialize_agent": InitializeAgent, "VectorStoreAgent": VectorStoreAgent, + "VectorStoreRouterAgent": VectorStoreRouterAgent, } diff --git a/src/backend/langflow/interface/documentLoaders/base.py b/src/backend/langflow/interface/documentLoaders/base.py index fd700f27a..5aa729b62 100644 --- a/src/backend/langflow/interface/documentLoaders/base.py +++ b/src/backend/langflow/interface/documentLoaders/base.py @@ -2,9 +2,9 @@ from typing import Dict, List, Optional from langflow.interface.base import LangChainTypeCreator from langflow.interface.custom_lists import documentloaders_type_to_cls_dict +from langflow.interface.documentLoaders.custom import CUSTOM_DOCUMENTLOADERS from langflow.settings import settings from langflow.utils.util import build_template_from_class -from langflow.interface.documentLoaders.custom import CUSTOM_DOCUMENTLOADERS class DocumentLoaderCreator(LangChainTypeCreator): @@ -14,6 +14,10 @@ class DocumentLoaderCreator(LangChainTypeCreator): def type_to_loader_dict(self) -> Dict: types = documentloaders_type_to_cls_dict + # Drop some types that are reimplemented with the same name + types.pop("TextLoader") + types.pop("WebBaseLoader") + for name, documentloader in CUSTOM_DOCUMENTLOADERS.items(): types[name] = documentloader @@ -26,15 +30,25 @@ class DocumentLoaderCreator(LangChainTypeCreator): name, documentloaders_type_to_cls_dict ) - signature["template"]["file"] = { - "type": "file", - "required": True, - "show": True, - "name": "path", - "value": "", - "suffixes": [".txt"], - "fileTypes": ["txt"], - } + if name == "TextLoader": + signature["template"]["file"] = { + "type": "file", + "required": True, + "show": True, + "name": "path", + "value": "", + "suffixes": [".txt"], + "fileTypes": ["txt"], + } + elif name == "WebBaseLoader": + signature["template"]["web_path"] = { + "type": "str", + "required": True, + "show": True, + "name": "web_path", + "value": "", + "display_name": "Web Path", + } return signature except ValueError as exc: diff --git a/src/backend/langflow/interface/documentLoaders/custom.py b/src/backend/langflow/interface/documentLoaders/custom.py index 9e2d40f3e..f142314fa 100644 --- a/src/backend/langflow/interface/documentLoaders/custom.py +++ b/src/backend/langflow/interface/documentLoaders/custom.py @@ -3,10 +3,11 @@ from typing import List from langchain.docstore.document import Document from langchain.document_loaders.base import BaseLoader +from langchain.document_loaders.web_base import WebBaseLoader as LCWebBaseLoader from langchain.text_splitter import CharacterTextSplitter -class Text(BaseLoader): +class TextLoader(BaseLoader): """Load Text files.""" def __init__(self, file: str): @@ -22,6 +23,20 @@ class Text(BaseLoader): return text_splitter.split_documents(documents) +class WebBaseLoader(LCWebBaseLoader): + def load(self) -> List[Document]: + """Load data into document objects.""" + soup = self.scrape() + text = soup.get_text() + metadata = {"source": self.web_path} + documents = [Document(page_content=text, metadata=metadata)] + + text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0) + + return text_splitter.split_documents(documents) + + CUSTOM_DOCUMENTLOADERS = { - "Text": Text, + "TextLoader": TextLoader, + "WebBaseLoader": WebBaseLoader, } diff --git a/src/backend/langflow/interface/importing/utils.py b/src/backend/langflow/interface/importing/utils.py index bd0c9c99d..62f81a90a 100644 --- a/src/backend/langflow/interface/importing/utils.py +++ b/src/backend/langflow/interface/importing/utils.py @@ -9,8 +9,8 @@ from langchain.chains.base import Chain from langchain.chat_models.base import BaseChatModel from langchain.llms.base import BaseLLM from langchain.tools import BaseTool -from langflow.interface.documentLoaders.custom import CUSTOM_DOCUMENTLOADERS +from langflow.interface.documentLoaders.custom import CUSTOM_DOCUMENTLOADERS from langflow.interface.tools.util import get_tool_by_name diff --git a/src/backend/langflow/interface/loading.py b/src/backend/langflow/interface/loading.py index f4b11edf0..4426644b9 100644 --- a/src/backend/langflow/interface/loading.py +++ b/src/backend/langflow/interface/loading.py @@ -61,9 +61,6 @@ def instantiate_class(node_type: str, base_type: str, params: Dict) -> Any: params.pop("model") return class_object(**params) elif base_type == "vectorstores": - # Rename dict key - params["documents"] = params.pop("Document Loader") - params["embedding"] = params.pop("Embedding") return class_object.from_documents(**params) else: return class_object(**params) diff --git a/src/backend/langflow/interface/run.py b/src/backend/langflow/interface/run.py index c413dfc4f..8f7765ef2 100644 --- a/src/backend/langflow/interface/run.py +++ b/src/backend/langflow/interface/run.py @@ -2,7 +2,7 @@ import contextlib import io from typing import Any, Dict -from langflow.cache.utils import compute_hash, load_cache, save_cache +from langflow.cache.utils import compute_hash, load_cache from langflow.graph.graph import Graph from langflow.interface import loading from langflow.utils.logger import logger @@ -67,7 +67,7 @@ def process_graph(data_graph: Dict[str, Any]): # We have to save it here because if the # memory is updated we need to keep the new values logger.debug("Saving langchain object to cache") - save_cache(computed_hash, langchain_object, is_first_message) + # save_cache(computed_hash, langchain_object, is_first_message) logger.debug("Saved langchain object to cache") return {"result": str(result), "thought": thought.strip()} diff --git a/src/backend/langflow/interface/toolkits/custom.py b/src/backend/langflow/interface/toolkits/custom.py new file mode 100644 index 000000000..e69de29bb diff --git a/src/backend/langflow/interface/tools/custom.py b/src/backend/langflow/interface/tools/custom.py index 6ce61821a..4c641f388 100644 --- a/src/backend/langflow/interface/tools/custom.py +++ b/src/backend/langflow/interface/tools/custom.py @@ -1,9 +1,9 @@ -from langflow.utils import validate - - from typing import Callable, Optional + from pydantic import BaseModel, validator +from langflow.utils import validate + class Function(BaseModel): code: str @@ -30,8 +30,8 @@ class Function(BaseModel): return validate.create_function(self.code, function_name) + class PythonFunction(Function): """Python function""" code: str - diff --git a/src/backend/langflow/interface/vectorStore/base.py b/src/backend/langflow/interface/vectorStore/base.py index e04881976..10e3a8768 100644 --- a/src/backend/langflow/interface/vectorStore/base.py +++ b/src/backend/langflow/interface/vectorStore/base.py @@ -19,17 +19,19 @@ class VectorstoreCreator(LangChainTypeCreator): signature = build_template_from_class(name, vectorstores_type_to_cls_dict) signature["template"] = { - "Document Loader": { + "documents": { "type": "BaseLoader", "required": True, "show": True, - "name": "Document Loader", + "name": "documents", + "display_name": "Document Loader", }, - "Embedding": { + "embedding": { "type": "Embeddings", "required": True, "show": True, - "name": "Embedding", + "name": "embedding", + "display_name": "Embedding", }, } return signature diff --git a/src/backend/langflow/template/nodes.py b/src/backend/langflow/template/nodes.py index 6d41ee3dc..3777eb80f 100644 --- a/src/backend/langflow/template/nodes.py +++ b/src/backend/langflow/template/nodes.py @@ -254,25 +254,11 @@ class VectorStoreAgentNode(FrontendNode): type_name="vectorstore_agent", fields=[ TemplateField( - field_type="str", + field_type="VectorStoreInfo", required=True, show=True, - name="name", - value="", - ), - TemplateField( - field_type="str", - required=True, - show=True, - name="description", - value="", - ), - TemplateField( - field_type="VectorStore", - required=True, - show=True, - name="vectorstore", - display_name="Vector Store", + name="vectorstoreinfo", + display_name="Vector Store Info", ), TemplateField( field_type="BaseLanguageModel", @@ -283,7 +269,35 @@ class VectorStoreAgentNode(FrontendNode): ), ], ) - description: str = """Construct a json agent from a CSV and tools.""" + description: str = """Construct an agent from a Vector Store.""" + base_classes: list[str] = ["AgentExecutor"] + + def to_dict(self): + return super().to_dict() + + +class VectorStoreRouterAgentNode(FrontendNode): + name: str = "VectorStoreRouterAgent" + template: Template = Template( + type_name="vectorstorerouter_agent", + fields=[ + TemplateField( + field_type="VectorStoreRouterToolkit", + required=True, + show=True, + name="vectorstoreroutertoolkit", + display_name="Vector Store Router Toolkit", + ), + TemplateField( + field_type="BaseLanguageModel", + required=True, + show=True, + name="llm", + display_name="LLM", + ), + ], + ) + description: str = """Construct an agent from a Vector Store Router.""" base_classes: list[str] = ["AgentExecutor"] def to_dict(self):