Fix: add insert mode MongoDB (#7394)

* add dropdown inser mode, create method __insert_mode

* fix unit_test mongodb

* add info to index_name

* to overwrite, delete_many from collection

* create verify_search_index

* fix SIMILARITY_OPTIONS

* fix documentation components-vector-stores.md
This commit is contained in:
Gustavo Costa 2025-04-03 16:03:17 -03:00 committed by GitHub
commit f9a7c9bcef
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 136 additions and 58 deletions

View file

@ -430,16 +430,22 @@ For more information, see the [MongoDB Atlas documentation](https://www.mongodb.
### Inputs
| Name | Type | Description |
| ------------------------ | ------------ | ----------------------------------------- |
| mongodb_atlas_cluster_uri | SecretString | MongoDB Atlas Cluster URI |
| db_name | String | Database name |
| collection_name | String | Collection name |
| index_name | String | Index name |
| search_query | String | Query for similarity search |
| ingest_data | Data | Data to be ingested into the vector store |
| embedding | Embeddings | Embedding function to use |
| number_of_results | Integer | Number of results to return in search |
| Name | Type | Description |
| ------------------------- | ------------ | ----------------------------------------- |
| mongodb_atlas_cluster_uri | SecretString | The connection URI for your MongoDB Atlas cluster (required) |
| enable_mtls | Boolean | Enable mutual TLS authentication (default: false) |
| mongodb_atlas_client_cert | SecretString | Client certificate combined with private key for mTLS authentication (required if mTLS is enabled) |
| db_name | String | The name of the database to use (required) |
| collection_name | String | The name of the collection to use (required) |
| index_name | String | The name of the Atlas Search index, it should be a Vector Search (required) |
| insert_mode | String | How to insert new documents into the collection (options: "append", "overwrite", default: "append") |
| embedding | Embeddings | The embedding model to use |
| number_of_results | Integer | Number of results to return in similarity search (default: 4) |
| index_field | String | The field to index (default: "embedding") |
| filter_field | String | The field to filter the index |
| number_dimensions | Integer | Embedding context length (default: 1536) |
| similarity | String | The method used to measure similarity between vectors (options: "cosine", "euclidean", "dotProduct", default: "cosine") |
| quantization | String | Quantization reduces memory costs by converting 32-bit floats to smaller data types (options: "scalar", "binary") |
### Outputs

View file

@ -1,11 +1,14 @@
import tempfile
import time
import certifi
from langchain_community.vectorstores import MongoDBAtlasVectorSearch
from pymongo.collection import Collection
from pymongo.operations import SearchIndexModel
from langflow.base.vectorstores.model import LCVectorStoreComponent, check_cached_vector_store
from langflow.helpers.data import docs_to_data
from langflow.io import BoolInput, HandleInput, IntInput, SecretStrInput, StrInput
from langflow.io import BoolInput, DropdownInput, HandleInput, IntInput, SecretStrInput, StrInput
from langflow.schema import Data
@ -14,7 +17,9 @@ class MongoVectorStoreComponent(LCVectorStoreComponent):
description = "MongoDB Atlas Vector Store with search capabilities"
name = "MongoDBAtlasVector"
icon = "MongoDB"
INSERT_MODES = ["append", "overwrite"]
SIMILARITY_OPTIONS = ["cosine", "euclidean", "dotProduct"]
QUANTIZATION_OPTIONS = ["scalar", "binary"]
inputs = [
SecretStrInput(name="mongodb_atlas_cluster_uri", display_name="MongoDB Atlas Cluster URI", required=True),
BoolInput(name="enable_mtls", display_name="Enable mTLS", value=False, advanced=True, required=True),
@ -28,8 +33,21 @@ class MongoVectorStoreComponent(LCVectorStoreComponent):
),
StrInput(name="db_name", display_name="Database Name", required=True),
StrInput(name="collection_name", display_name="Collection Name", required=True),
StrInput(name="index_name", display_name="Index Name", required=True),
StrInput(
name="index_name",
display_name="Index Name",
required=True,
info="The name of Atlas Search index, it should be a Vector Search.",
),
*LCVectorStoreComponent.inputs,
DropdownInput(
name="insert_mode",
display_name="Insert Mode",
options=INSERT_MODES,
value=INSERT_MODES[0],
info="How to insert new documents into the collection.",
advanced=True,
),
HandleInput(name="embedding", display_name="Embedding", input_types=["Embeddings"]),
IntInput(
name="number_of_results",
@ -38,6 +56,41 @@ class MongoVectorStoreComponent(LCVectorStoreComponent):
value=4,
advanced=True,
),
StrInput(
name="index_field",
display_name="Index Field",
advanced=True,
required=True,
info="The field to index.",
value="embedding",
),
StrInput(
name="filter_field", display_name="Filter Field", advanced=True, info="The field to filter the index."
),
IntInput(
name="number_dimensions",
display_name="Number of Dimensions",
info="Embedding Context Length.",
value=1536,
advanced=True,
required=True,
),
DropdownInput(
name="similarity",
display_name="Similarity",
options=SIMILARITY_OPTIONS,
value=SIMILARITY_OPTIONS[0],
info="The method used to measure the similarity between vectors.",
advanced=True,
),
DropdownInput(
name="quantization",
display_name="Quantization",
options=QUANTIZATION_OPTIONS,
value=None,
info="Quantization reduces memory costs converting 32-bit floats to smaller data types",
advanced=True,
),
]
@check_cached_vector_store
@ -96,21 +149,20 @@ class MongoVectorStoreComponent(LCVectorStoreComponent):
documents.append(_input)
if documents:
collection.drop() # Drop collection to override the vector store
self.__insert_mode(collection)
return MongoDBAtlasVectorSearch.from_documents(
documents=documents, embedding=self.embedding, collection=collection, index_name=self.index_name
)
return MongoDBAtlasVectorSearch(
embedding=self.embedding,
collection=collection,
index_name=self.index_name,
)
return MongoDBAtlasVectorSearch(embedding=self.embedding, collection=collection, index_name=self.index_name)
def search_documents(self) -> list[Data]:
from bson.objectid import ObjectId
vector_store = self.build_vector_store()
self.verify_search_index(vector_store._collection)
if self.search_query and isinstance(self.search_query, str):
docs = vector_store.similarity_search(
query=self.search_query,
@ -125,3 +177,37 @@ class MongoVectorStoreComponent(LCVectorStoreComponent):
self.status = data
return data
return []
def __insert_mode(self, collection: Collection) -> None:
if self.insert_mode == "overwrite":
collection.delete_many({}) # Delete all documents while preserving collection structure
def verify_search_index(self, collection: Collection) -> None:
"""Verify if the search index exists, if not, create it.
Args:
collection (Collection): The collection to verify the search index on.
"""
indexes = collection.list_search_indexes()
index_names_types = {idx["name"]: idx["type"] for idx in indexes}
index_names = list(index_names_types.keys())
index_type = index_names_types.get(self.index_name)
if self.index_name not in index_names and index_type != "vectorSearch":
collection.create_search_index(self.__create_index_definition())
time.sleep(20) # Give some time for index to be ready
def __create_index_definition(self) -> SearchIndexModel:
fields = [
{
"type": "vector",
"path": self.index_field,
"numDimensions": self.number_dimensions,
"similarity": self.similarity,
"quantization": self.quantization,
}
]
if self.filter_field:
fields.append({"type": "filter", "path": self.filter_field})
return SearchIndexModel(definition={"fields": fields}, name=self.index_name, type="vectorSearch")

View file

@ -1,12 +1,12 @@
import os
import time
from typing import Any
import pytest
from langchain_community.embeddings.fake import DeterministicFakeEmbedding
from langchain_community.vectorstores import MongoDBAtlasVectorSearch
from langflow.components.vectorstores.mongodb_atlas import MongoVectorStoreComponent
from langflow.schema.data import Data
from pymongo.operations import SearchIndexModel
from pymongo.collection import Collection
from tests.base import ComponentTestBaseWithoutClient, VersionComponentMapping
@ -30,6 +30,13 @@ class TestMongoVectorStoreComponent(ComponentTestBaseWithoutClient):
"index_name": "test_index",
"enable_mtls": False,
"embedding": DeterministicFakeEmbedding(size=8),
"index_field": "embedding",
"filter_field": "text",
"number_dimensions": 8,
"similarity": "cosine",
"quantization": "scalar",
"insert_mode": "append",
"ingest_data": [Data(data={"text": "test data 1"}), Data(data={"text": "test data 2"})],
}
@pytest.fixture
@ -41,42 +48,18 @@ class TestMongoVectorStoreComponent(ComponentTestBaseWithoutClient):
{"version": "1.1.1", "module": "vectorstores", "file_name": "mongodb_atlas"},
]
def __create_search_index(self, vector_store: MongoDBAtlasVectorSearch, default_kwargs: dict[str, Any]) -> None:
def __create_search_index(
self, component_class: type[MongoVectorStoreComponent], collection: Collection, default_kwargs: dict[str, Any]
) -> None:
"""Create a vector search index if it doesn't exist."""
try:
index_definition = SearchIndexModel(
definition={
"fields": [
{
"type": "vector",
"path": "embedding",
"numDimensions": 8,
"similarity": "cosine",
"quantization": "scalar",
},
{"type": "filter", "path": "text"},
]
},
name=default_kwargs["index_name"],
type="vectorSearch",
)
component_class().set(**default_kwargs).verify_search_index(collection)
vector_store._collection.create_search_index(index_definition)
# Wait for index to be ready
import time
time.sleep(40) # Give some time for index to be ready
# Verify index was created
indexes = vector_store._collection.list_search_indexes()
index_names = [idx["name"] for idx in indexes]
assert default_kwargs["index_name"] in index_names
except Exception as e:
# Index might already exist, which is fine
if "AlreadyExists" not in str(e):
raise
# Verify index was created
indexes = collection.list_search_indexes()
index_names = {idx["name"]: idx["type"] for idx in indexes}
index_type = index_names.get(default_kwargs["index_name"])
assert default_kwargs["index_name"] in index_names
assert index_type == "vectorSearch"
def test_create_db(self, component_class: type[MongoVectorStoreComponent], default_kwargs: dict[str, Any]) -> None:
"""Test creating a MongoDB Atlas vector store."""
@ -93,6 +76,7 @@ class TestMongoVectorStoreComponent(ComponentTestBaseWithoutClient):
"""Test creating a collection with data."""
test_texts = ["test data 1", "test data 2", "something completely different"]
default_kwargs["ingest_data"] = [Data(data={"text": text}) for text in test_texts]
default_kwargs["insert_mode"] = "overwrite"
component: MongoVectorStoreComponent = component_class().set(**default_kwargs)
vector_store = component.build_vector_store()
@ -115,6 +99,7 @@ class TestMongoVectorStoreComponent(ComponentTestBaseWithoutClient):
]
default_kwargs["ingest_data"] = [Data(data={"text": text, "metadata": {}}) for text in test_data]
default_kwargs["number_of_results"] = 2
default_kwargs["insert_mode"] = "overwrite"
# Create and initialize the component
component: MongoVectorStoreComponent = component_class().set(**default_kwargs)
@ -131,7 +116,7 @@ class TestMongoVectorStoreComponent(ComponentTestBaseWithoutClient):
assert isinstance(doc["embedding"], list)
assert len(doc["embedding"]) == 8 # Should match our embedding size
self.__create_search_index(vector_store, default_kwargs)
self.__create_search_index(component_class, vector_store._collection, default_kwargs)
# Verify index was created
indexes = vector_store._collection.list_search_indexes()
@ -141,6 +126,7 @@ class TestMongoVectorStoreComponent(ComponentTestBaseWithoutClient):
# Test similarity search through the component
component.set(search_query="dog")
results = component.search_documents()
time.sleep(5) # wait the results come from API
assert len(results) == 2, "Expected 2 results for 'lazy dog' query"
# The most relevant results should be about dogs
@ -168,8 +154,8 @@ class TestMongoVectorStoreComponent(ComponentTestBaseWithoutClient):
self, component_class: type[MongoVectorStoreComponent], default_kwargs: dict[str, Any]
) -> None:
"""Test search with empty query."""
default_kwargs["insert_mode"] = "overwrite"
component: MongoVectorStoreComponent = component_class().set(**default_kwargs)
component.build_vector_store()
# Test with empty search query
component.set(search_query="")
@ -191,7 +177,7 @@ class TestMongoVectorStoreComponent(ComponentTestBaseWithoutClient):
component: MongoVectorStoreComponent = component_class().set(**default_kwargs)
vector_store = component.build_vector_store()
self.__create_search_index(vector_store, default_kwargs)
self.__create_search_index(component_class, vector_store._collection, default_kwargs)
# Test search and verify metadata is preserved
component.set(search_query="Document", number_of_results=2)