From 4cc2fae52b8bd3d759074df54a9450a5a3fe9c61 Mon Sep 17 00:00:00 2001 From: Gabriel Luiz Freitas Almeida Date: Fri, 23 Jun 2023 12:38:02 -0300 Subject: [PATCH] =?UTF-8?q?=F0=9F=9A=80=20feat(langflow):=20add=20support?= =?UTF-8?q?=20for=20MongoDB=20Atlas=20Vector=20Search=20in=20vectorstores?= =?UTF-8?q?=20=E2=9C=A8=20feat(langflow):=20add=20support=20for=20search?= =?UTF-8?q?=5Fkwargs=20field=20in=20VectorStoreFrontendNode=20The=20change?= =?UTF-8?q?s=20add=20support=20for=20MongoDB=20Atlas=20Vector=20Search=20i?= =?UTF-8?q?n=20the=20vectorstores.=20The=20`MongoDBAtlasVectorSearch`=20cl?= =?UTF-8?q?ass=20is=20now=20imported=20and=20initialized=20in=20`vector=5F?= =?UTF-8?q?store.py`.=20The=20`initialize=5Fmongodb`=20function=20is=20add?= =?UTF-8?q?ed=20to=20initialize=20the=20MongoDB=20Atlas=20Vector=20Search?= =?UTF-8?q?=20class.=20The=20`VectorStoreFrontendNode`=20class=20is=20upda?= =?UTF-8?q?ted=20to=20add=20the=20`mongodb=5Fatlas=5Fcluster=5Furi`,=20`co?= =?UTF-8?q?llection=5Fname`,=20and=20`db=5Fname`=20fields.=20The=20`search?= =?UTF-8?q?=5Fkwargs`=20field=20is=20also=20added=20to=20the=20`VectorStor?= =?UTF-8?q?eFrontendNode`=20class=20to=20allow=20users=20to=20pass=20addit?= =?UTF-8?q?ional=20search=20parameters=20to=20the=20vector=20store.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/backend/langflow/config.yaml | 1 + .../interface/initialize/vector_store.py | 40 +++++++++++++ .../template/frontend_node/vectorstores.py | 59 +++++++++++++++++++ 3 files changed, 100 insertions(+) diff --git a/src/backend/langflow/config.yaml b/src/backend/langflow/config.yaml index f3b5efaf3..d8cd4a325 100644 --- a/src/backend/langflow/config.yaml +++ b/src/backend/langflow/config.yaml @@ -133,6 +133,7 @@ vectorstores: - FAISS - Pinecone - SupabaseVectorStore + - MongoDBAtlasVectorSearch wrappers: - RequestsWrapper # - ChatPromptTemplate diff --git a/src/backend/langflow/interface/initialize/vector_store.py b/src/backend/langflow/interface/initialize/vector_store.py index a5149b922..cc887dfd7 100644 --- a/src/backend/langflow/interface/initialize/vector_store.py +++ b/src/backend/langflow/interface/initialize/vector_store.py @@ -7,7 +7,9 @@ from langchain.vectorstores import ( FAISS, Weaviate, SupabaseVectorStore, + MongoDBAtlasVectorSearch, ) +import os def docs_in_params(params: dict) -> bool: @@ -18,6 +20,38 @@ def docs_in_params(params: dict) -> bool: ) +def initialize_mongodb(class_object: Type[MongoDBAtlasVectorSearch], params: dict): + """Initialize mongodb and return the class object""" + + MONGODB_ATLAS_CLUSTER_URI = params.get("mongodb_atlas_cluster_uri") + if not MONGODB_ATLAS_CLUSTER_URI: + raise ValueError("Mongodb atlas cluster uri must be provided in the params") + from pymongo import MongoClient + + client = MongoClient(MONGODB_ATLAS_CLUSTER_URI) + db_name = "lanchain_db" + collection_name = "langchain_col" + collection = client[db_name][collection_name] + index_name = "langchain_demo" + if not docs_in_params(params): + # __init__ requires collection, embedding and index_name + init_args = { + "collection": collection, + "index_name": index_name, + "embedding": params.get("embedding"), + } + + return class_object(**init_args) + + if "texts" in params: + params["documents"] = params.pop("texts") + + params["collection"] = collection + params["index_name"] = index_name + + return class_object.from_documents(**params) + + def initialize_supabase(class_object: Type[SupabaseVectorStore], params: dict): """Initialize supabase and return the class object""" from supabase.client import Client, create_client @@ -89,6 +123,12 @@ def initialize_pinecone(class_object: Type[Pinecone], params: dict): pinecone_api_key = params.get("pinecone_api_key") pinecone_env = params.get("pinecone_env") + if pinecone_api_key is None or pinecone_env is None: + if os.getenv("PINECONE_API_KEY") is not None: + pinecone_api_key = os.getenv("PINECONE_API_KEY") + if os.getenv("PINECONE_ENV") is not None: + pinecone_env = os.getenv("PINECONE_ENV") + if pinecone_api_key is None or pinecone_env is None: raise ValueError( "Pinecone API key and environment must be provided in the params" diff --git a/src/backend/langflow/template/frontend_node/vectorstores.py b/src/backend/langflow/template/frontend_node/vectorstores.py index c35bd338c..2b9aaecc8 100644 --- a/src/backend/langflow/template/frontend_node/vectorstores.py +++ b/src/backend/langflow/template/frontend_node/vectorstores.py @@ -7,6 +7,18 @@ from langflow.template.frontend_node.base import FrontendNode class VectorStoreFrontendNode(FrontendNode): def add_extra_fields(self) -> None: extra_fields: List[TemplateField] = [] + # Add search_kwargs field + extra_field = TemplateField( + name="search_kwargs", + field_type="code", + required=False, + placeholder="", + show=True, + advanced=True, + multiline=False, + value="{}", + ) + extra_fields.append(extra_field) if self.template.type_name == "Weaviate": extra_field = TemplateField( name="weaviate_url", @@ -134,6 +146,45 @@ class VectorStoreFrontendNode(FrontendNode): ) extra_fields.extend((extra_field, extra_field2, extra_field3, extra_field4)) + elif self.template.type_name == "MongoDBAtlasVectorSearch": + # add "mongodb_atlas_cluster_uri", + # "collection_name", + # "db_name", + extra_field = TemplateField( + name="mongodb_atlas_cluster_uri", + field_type="str", + required=False, + placeholder="", + show=True, + advanced=True, + multiline=False, + display_name="MongoDB Atlas Cluster URI", + value="", + ) + extra_field2 = TemplateField( + name="collection_name", + field_type="str", + required=False, + placeholder="", + show=True, + advanced=True, + multiline=False, + display_name="Collection Name", + value="", + ) + extra_field3 = TemplateField( + name="db_name", + field_type="str", + required=False, + placeholder="", + show=True, + advanced=True, + multiline=False, + display_name="Database Name", + value="", + ) + extra_fields.extend((extra_field, extra_field2, extra_field3)) + if extra_fields: for field in extra_fields: self.template.add_field(field) @@ -160,6 +211,9 @@ class VectorStoreFrontendNode(FrontendNode): "query_name", "supabase_url", "supabase_service_key", + "mongodb_atlas_cluster_uri", + "collection_name", + "db_name", ] advanced_fields = [ "n_dim", @@ -179,10 +233,15 @@ class VectorStoreFrontendNode(FrontendNode): "pinecone_api_key", "pinecone_env", "client_kwargs", + "search_kwargs", ] # Check and set field attributes if field.name == "texts": + # if field.name is "texts" it has to be replaced + # when instantiating the vectorstores + field.name = "documents" + field.field_type = "TextSplitter" field.display_name = "Documents" field.required = False