Fix: add insert mode MongoDB (#7394)
* add dropdown inser mode, create method __insert_mode * fix unit_test mongodb * add info to index_name * to overwrite, delete_many from collection * create verify_search_index * fix SIMILARITY_OPTIONS * fix documentation components-vector-stores.md
This commit is contained in:
parent
f4a07fac69
commit
f9a7c9bcef
3 changed files with 136 additions and 58 deletions
|
|
@ -430,16 +430,22 @@ For more information, see the [MongoDB Atlas documentation](https://www.mongodb.
|
|||
|
||||
### Inputs
|
||||
|
||||
| Name | Type | Description |
|
||||
| ------------------------ | ------------ | ----------------------------------------- |
|
||||
| mongodb_atlas_cluster_uri | SecretString | MongoDB Atlas Cluster URI |
|
||||
| db_name | String | Database name |
|
||||
| collection_name | String | Collection name |
|
||||
| index_name | String | Index name |
|
||||
| search_query | String | Query for similarity search |
|
||||
| ingest_data | Data | Data to be ingested into the vector store |
|
||||
| embedding | Embeddings | Embedding function to use |
|
||||
| number_of_results | Integer | Number of results to return in search |
|
||||
| Name | Type | Description |
|
||||
| ------------------------- | ------------ | ----------------------------------------- |
|
||||
| mongodb_atlas_cluster_uri | SecretString | The connection URI for your MongoDB Atlas cluster (required) |
|
||||
| enable_mtls | Boolean | Enable mutual TLS authentication (default: false) |
|
||||
| mongodb_atlas_client_cert | SecretString | Client certificate combined with private key for mTLS authentication (required if mTLS is enabled) |
|
||||
| db_name | String | The name of the database to use (required) |
|
||||
| collection_name | String | The name of the collection to use (required) |
|
||||
| index_name | String | The name of the Atlas Search index, it should be a Vector Search (required) |
|
||||
| insert_mode | String | How to insert new documents into the collection (options: "append", "overwrite", default: "append") |
|
||||
| embedding | Embeddings | The embedding model to use |
|
||||
| number_of_results | Integer | Number of results to return in similarity search (default: 4) |
|
||||
| index_field | String | The field to index (default: "embedding") |
|
||||
| filter_field | String | The field to filter the index |
|
||||
| number_dimensions | Integer | Embedding context length (default: 1536) |
|
||||
| similarity | String | The method used to measure similarity between vectors (options: "cosine", "euclidean", "dotProduct", default: "cosine") |
|
||||
| quantization | String | Quantization reduces memory costs by converting 32-bit floats to smaller data types (options: "scalar", "binary") |
|
||||
|
||||
### Outputs
|
||||
|
||||
|
|
|
|||
|
|
@ -1,11 +1,14 @@
|
|||
import tempfile
|
||||
import time
|
||||
|
||||
import certifi
|
||||
from langchain_community.vectorstores import MongoDBAtlasVectorSearch
|
||||
from pymongo.collection import Collection
|
||||
from pymongo.operations import SearchIndexModel
|
||||
|
||||
from langflow.base.vectorstores.model import LCVectorStoreComponent, check_cached_vector_store
|
||||
from langflow.helpers.data import docs_to_data
|
||||
from langflow.io import BoolInput, HandleInput, IntInput, SecretStrInput, StrInput
|
||||
from langflow.io import BoolInput, DropdownInput, HandleInput, IntInput, SecretStrInput, StrInput
|
||||
from langflow.schema import Data
|
||||
|
||||
|
||||
|
|
@ -14,7 +17,9 @@ class MongoVectorStoreComponent(LCVectorStoreComponent):
|
|||
description = "MongoDB Atlas Vector Store with search capabilities"
|
||||
name = "MongoDBAtlasVector"
|
||||
icon = "MongoDB"
|
||||
|
||||
INSERT_MODES = ["append", "overwrite"]
|
||||
SIMILARITY_OPTIONS = ["cosine", "euclidean", "dotProduct"]
|
||||
QUANTIZATION_OPTIONS = ["scalar", "binary"]
|
||||
inputs = [
|
||||
SecretStrInput(name="mongodb_atlas_cluster_uri", display_name="MongoDB Atlas Cluster URI", required=True),
|
||||
BoolInput(name="enable_mtls", display_name="Enable mTLS", value=False, advanced=True, required=True),
|
||||
|
|
@ -28,8 +33,21 @@ class MongoVectorStoreComponent(LCVectorStoreComponent):
|
|||
),
|
||||
StrInput(name="db_name", display_name="Database Name", required=True),
|
||||
StrInput(name="collection_name", display_name="Collection Name", required=True),
|
||||
StrInput(name="index_name", display_name="Index Name", required=True),
|
||||
StrInput(
|
||||
name="index_name",
|
||||
display_name="Index Name",
|
||||
required=True,
|
||||
info="The name of Atlas Search index, it should be a Vector Search.",
|
||||
),
|
||||
*LCVectorStoreComponent.inputs,
|
||||
DropdownInput(
|
||||
name="insert_mode",
|
||||
display_name="Insert Mode",
|
||||
options=INSERT_MODES,
|
||||
value=INSERT_MODES[0],
|
||||
info="How to insert new documents into the collection.",
|
||||
advanced=True,
|
||||
),
|
||||
HandleInput(name="embedding", display_name="Embedding", input_types=["Embeddings"]),
|
||||
IntInput(
|
||||
name="number_of_results",
|
||||
|
|
@ -38,6 +56,41 @@ class MongoVectorStoreComponent(LCVectorStoreComponent):
|
|||
value=4,
|
||||
advanced=True,
|
||||
),
|
||||
StrInput(
|
||||
name="index_field",
|
||||
display_name="Index Field",
|
||||
advanced=True,
|
||||
required=True,
|
||||
info="The field to index.",
|
||||
value="embedding",
|
||||
),
|
||||
StrInput(
|
||||
name="filter_field", display_name="Filter Field", advanced=True, info="The field to filter the index."
|
||||
),
|
||||
IntInput(
|
||||
name="number_dimensions",
|
||||
display_name="Number of Dimensions",
|
||||
info="Embedding Context Length.",
|
||||
value=1536,
|
||||
advanced=True,
|
||||
required=True,
|
||||
),
|
||||
DropdownInput(
|
||||
name="similarity",
|
||||
display_name="Similarity",
|
||||
options=SIMILARITY_OPTIONS,
|
||||
value=SIMILARITY_OPTIONS[0],
|
||||
info="The method used to measure the similarity between vectors.",
|
||||
advanced=True,
|
||||
),
|
||||
DropdownInput(
|
||||
name="quantization",
|
||||
display_name="Quantization",
|
||||
options=QUANTIZATION_OPTIONS,
|
||||
value=None,
|
||||
info="Quantization reduces memory costs converting 32-bit floats to smaller data types",
|
||||
advanced=True,
|
||||
),
|
||||
]
|
||||
|
||||
@check_cached_vector_store
|
||||
|
|
@ -96,21 +149,20 @@ class MongoVectorStoreComponent(LCVectorStoreComponent):
|
|||
documents.append(_input)
|
||||
|
||||
if documents:
|
||||
collection.drop() # Drop collection to override the vector store
|
||||
self.__insert_mode(collection)
|
||||
|
||||
return MongoDBAtlasVectorSearch.from_documents(
|
||||
documents=documents, embedding=self.embedding, collection=collection, index_name=self.index_name
|
||||
)
|
||||
return MongoDBAtlasVectorSearch(
|
||||
embedding=self.embedding,
|
||||
collection=collection,
|
||||
index_name=self.index_name,
|
||||
)
|
||||
return MongoDBAtlasVectorSearch(embedding=self.embedding, collection=collection, index_name=self.index_name)
|
||||
|
||||
def search_documents(self) -> list[Data]:
|
||||
from bson.objectid import ObjectId
|
||||
|
||||
vector_store = self.build_vector_store()
|
||||
|
||||
self.verify_search_index(vector_store._collection)
|
||||
|
||||
if self.search_query and isinstance(self.search_query, str):
|
||||
docs = vector_store.similarity_search(
|
||||
query=self.search_query,
|
||||
|
|
@ -125,3 +177,37 @@ class MongoVectorStoreComponent(LCVectorStoreComponent):
|
|||
self.status = data
|
||||
return data
|
||||
return []
|
||||
|
||||
def __insert_mode(self, collection: Collection) -> None:
|
||||
if self.insert_mode == "overwrite":
|
||||
collection.delete_many({}) # Delete all documents while preserving collection structure
|
||||
|
||||
def verify_search_index(self, collection: Collection) -> None:
|
||||
"""Verify if the search index exists, if not, create it.
|
||||
|
||||
Args:
|
||||
collection (Collection): The collection to verify the search index on.
|
||||
"""
|
||||
indexes = collection.list_search_indexes()
|
||||
|
||||
index_names_types = {idx["name"]: idx["type"] for idx in indexes}
|
||||
index_names = list(index_names_types.keys())
|
||||
index_type = index_names_types.get(self.index_name)
|
||||
if self.index_name not in index_names and index_type != "vectorSearch":
|
||||
collection.create_search_index(self.__create_index_definition())
|
||||
|
||||
time.sleep(20) # Give some time for index to be ready
|
||||
|
||||
def __create_index_definition(self) -> SearchIndexModel:
|
||||
fields = [
|
||||
{
|
||||
"type": "vector",
|
||||
"path": self.index_field,
|
||||
"numDimensions": self.number_dimensions,
|
||||
"similarity": self.similarity,
|
||||
"quantization": self.quantization,
|
||||
}
|
||||
]
|
||||
if self.filter_field:
|
||||
fields.append({"type": "filter", "path": self.filter_field})
|
||||
return SearchIndexModel(definition={"fields": fields}, name=self.index_name, type="vectorSearch")
|
||||
|
|
|
|||
|
|
@ -1,12 +1,12 @@
|
|||
import os
|
||||
import time
|
||||
from typing import Any
|
||||
|
||||
import pytest
|
||||
from langchain_community.embeddings.fake import DeterministicFakeEmbedding
|
||||
from langchain_community.vectorstores import MongoDBAtlasVectorSearch
|
||||
from langflow.components.vectorstores.mongodb_atlas import MongoVectorStoreComponent
|
||||
from langflow.schema.data import Data
|
||||
from pymongo.operations import SearchIndexModel
|
||||
from pymongo.collection import Collection
|
||||
|
||||
from tests.base import ComponentTestBaseWithoutClient, VersionComponentMapping
|
||||
|
||||
|
|
@ -30,6 +30,13 @@ class TestMongoVectorStoreComponent(ComponentTestBaseWithoutClient):
|
|||
"index_name": "test_index",
|
||||
"enable_mtls": False,
|
||||
"embedding": DeterministicFakeEmbedding(size=8),
|
||||
"index_field": "embedding",
|
||||
"filter_field": "text",
|
||||
"number_dimensions": 8,
|
||||
"similarity": "cosine",
|
||||
"quantization": "scalar",
|
||||
"insert_mode": "append",
|
||||
"ingest_data": [Data(data={"text": "test data 1"}), Data(data={"text": "test data 2"})],
|
||||
}
|
||||
|
||||
@pytest.fixture
|
||||
|
|
@ -41,42 +48,18 @@ class TestMongoVectorStoreComponent(ComponentTestBaseWithoutClient):
|
|||
{"version": "1.1.1", "module": "vectorstores", "file_name": "mongodb_atlas"},
|
||||
]
|
||||
|
||||
def __create_search_index(self, vector_store: MongoDBAtlasVectorSearch, default_kwargs: dict[str, Any]) -> None:
|
||||
def __create_search_index(
|
||||
self, component_class: type[MongoVectorStoreComponent], collection: Collection, default_kwargs: dict[str, Any]
|
||||
) -> None:
|
||||
"""Create a vector search index if it doesn't exist."""
|
||||
try:
|
||||
index_definition = SearchIndexModel(
|
||||
definition={
|
||||
"fields": [
|
||||
{
|
||||
"type": "vector",
|
||||
"path": "embedding",
|
||||
"numDimensions": 8,
|
||||
"similarity": "cosine",
|
||||
"quantization": "scalar",
|
||||
},
|
||||
{"type": "filter", "path": "text"},
|
||||
]
|
||||
},
|
||||
name=default_kwargs["index_name"],
|
||||
type="vectorSearch",
|
||||
)
|
||||
component_class().set(**default_kwargs).verify_search_index(collection)
|
||||
|
||||
vector_store._collection.create_search_index(index_definition)
|
||||
|
||||
# Wait for index to be ready
|
||||
import time
|
||||
|
||||
time.sleep(40) # Give some time for index to be ready
|
||||
|
||||
# Verify index was created
|
||||
indexes = vector_store._collection.list_search_indexes()
|
||||
index_names = [idx["name"] for idx in indexes]
|
||||
assert default_kwargs["index_name"] in index_names
|
||||
|
||||
except Exception as e:
|
||||
# Index might already exist, which is fine
|
||||
if "AlreadyExists" not in str(e):
|
||||
raise
|
||||
# Verify index was created
|
||||
indexes = collection.list_search_indexes()
|
||||
index_names = {idx["name"]: idx["type"] for idx in indexes}
|
||||
index_type = index_names.get(default_kwargs["index_name"])
|
||||
assert default_kwargs["index_name"] in index_names
|
||||
assert index_type == "vectorSearch"
|
||||
|
||||
def test_create_db(self, component_class: type[MongoVectorStoreComponent], default_kwargs: dict[str, Any]) -> None:
|
||||
"""Test creating a MongoDB Atlas vector store."""
|
||||
|
|
@ -93,6 +76,7 @@ class TestMongoVectorStoreComponent(ComponentTestBaseWithoutClient):
|
|||
"""Test creating a collection with data."""
|
||||
test_texts = ["test data 1", "test data 2", "something completely different"]
|
||||
default_kwargs["ingest_data"] = [Data(data={"text": text}) for text in test_texts]
|
||||
default_kwargs["insert_mode"] = "overwrite"
|
||||
|
||||
component: MongoVectorStoreComponent = component_class().set(**default_kwargs)
|
||||
vector_store = component.build_vector_store()
|
||||
|
|
@ -115,6 +99,7 @@ class TestMongoVectorStoreComponent(ComponentTestBaseWithoutClient):
|
|||
]
|
||||
default_kwargs["ingest_data"] = [Data(data={"text": text, "metadata": {}}) for text in test_data]
|
||||
default_kwargs["number_of_results"] = 2
|
||||
default_kwargs["insert_mode"] = "overwrite"
|
||||
|
||||
# Create and initialize the component
|
||||
component: MongoVectorStoreComponent = component_class().set(**default_kwargs)
|
||||
|
|
@ -131,7 +116,7 @@ class TestMongoVectorStoreComponent(ComponentTestBaseWithoutClient):
|
|||
assert isinstance(doc["embedding"], list)
|
||||
assert len(doc["embedding"]) == 8 # Should match our embedding size
|
||||
|
||||
self.__create_search_index(vector_store, default_kwargs)
|
||||
self.__create_search_index(component_class, vector_store._collection, default_kwargs)
|
||||
|
||||
# Verify index was created
|
||||
indexes = vector_store._collection.list_search_indexes()
|
||||
|
|
@ -141,6 +126,7 @@ class TestMongoVectorStoreComponent(ComponentTestBaseWithoutClient):
|
|||
# Test similarity search through the component
|
||||
component.set(search_query="dog")
|
||||
results = component.search_documents()
|
||||
time.sleep(5) # wait the results come from API
|
||||
|
||||
assert len(results) == 2, "Expected 2 results for 'lazy dog' query"
|
||||
# The most relevant results should be about dogs
|
||||
|
|
@ -168,8 +154,8 @@ class TestMongoVectorStoreComponent(ComponentTestBaseWithoutClient):
|
|||
self, component_class: type[MongoVectorStoreComponent], default_kwargs: dict[str, Any]
|
||||
) -> None:
|
||||
"""Test search with empty query."""
|
||||
default_kwargs["insert_mode"] = "overwrite"
|
||||
component: MongoVectorStoreComponent = component_class().set(**default_kwargs)
|
||||
component.build_vector_store()
|
||||
|
||||
# Test with empty search query
|
||||
component.set(search_query="")
|
||||
|
|
@ -191,7 +177,7 @@ class TestMongoVectorStoreComponent(ComponentTestBaseWithoutClient):
|
|||
component: MongoVectorStoreComponent = component_class().set(**default_kwargs)
|
||||
vector_store = component.build_vector_store()
|
||||
|
||||
self.__create_search_index(vector_store, default_kwargs)
|
||||
self.__create_search_index(component_class, vector_store._collection, default_kwargs)
|
||||
|
||||
# Test search and verify metadata is preserved
|
||||
component.set(search_query="Document", number_of_results=2)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue