fix: add tests and adjustments to Chroma component (#5571)

* Refactor: Update EditNodeComponent to hide table options and block hide

* 🐛 (model.py): fix issue with search_documents method not returning empty list when search_query is empty
♻️ (model.py): refactor search_documents method to handle search_query logic more efficiently
📝 (chroma.py): add typing_extensions override import for build_vector_store method to improve code readability

*  (tests): add unit tests for ChromaVectorStoreComponent

- Introduced new test suite for ChromaVectorStoreComponent, covering various functionalities including database creation, collection management, similarity and MMR searches, and duplicate handling.
- Implemented tests for creating collections with and without data, ensuring proper functionality and data integrity.
- Verified search capabilities with different query types and result limits, enhancing overall test coverage for the component.

* fix: remove unnecessary whitespace in model.py and add missing import in chroma.py

* fix: mypy error module has no attribute "timeout"

* ♻️ (async_helpers.py): Remove unnecessary type hint ignore comment from timeout_context function

* 📝 (async_helpers.py): add a comment with issue reference PGH003 to document the reason for ignoring type checking in timeout_context function

* ♻️ (async_helpers.py): Remove unnecessary type hint comment to improve code readability and maintainability

* ♻️ (async_helpers.py): Add type ignore comment to suppress miscellaneous type error for timeout_context function

* ♻️ (async_helpers.py): refactor timeout_context function to remove unnecessary type ignore comments and improve code readability

* [autofix.ci] apply automated fixes

* 📝 (async_helpers.py): add a blank line for better code readability and consistency

* fix: mypy error: incompatible redefinition

---------

Co-authored-by: anovazzi1 <otavio2204@gmail.com>
Co-authored-by: Gabriel Luiz Freitas Almeida <gabriel@langflow.org>
Co-authored-by: italojohnny <italojohnnydosanjos@gmail.com>
Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
This commit is contained in:
Cristhian Zanforlin Lousa 2025-01-08 10:01:39 -03:00 committed by GitHub
commit f080049526
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 347 additions and 7 deletions

View file

@ -122,17 +122,17 @@ class LCVectorStoreComponent(Component):
def search_documents(self) -> list[Data]:
"""Search for documents in the vector store."""
search_query: str = self.search_query
if not search_query:
self.status = ""
return []
if self._cached_vector_store is not None:
vector_store = self._cached_vector_store
else:
vector_store = self.build_vector_store()
self._cached_vector_store = vector_store
search_query: str = self.search_query
if not search_query:
self.status = ""
return []
self.log(f"Search input: {search_query}")
self.log(f"Search type: {self.search_type}")
self.log(f"Number of results: {self.number_of_results}")

View file

@ -1,5 +1,4 @@
# from langflow.field_typing import Data
import asyncio
from contextlib import AsyncExitStack
import httpx
@ -11,6 +10,7 @@ from langflow.components.tools.mcp_stdio import create_input_schema_from_json_sc
from langflow.custom import Component
from langflow.field_typing import Tool
from langflow.io import MessageTextInput, Output
from langflow.utils.async_helpers import timeout_context
# Define constant for status code
HTTP_TEMPORARY_REDIRECT = 307
@ -39,7 +39,7 @@ class MCPSseClient:
headers = {}
url = await self.pre_check_redirect(url)
async with asyncio.timeout(timeout_seconds):
async with timeout_context(timeout_seconds):
sse_transport = await self.exit_stack.enter_async_context(
sse_client(url, headers, timeout_seconds, sse_read_timeout_seconds)
)

View file

@ -2,6 +2,7 @@ from copy import deepcopy
from chromadb.config import Settings
from langchain_chroma import Chroma
from typing_extensions import override
from langflow.base.vectorstores.model import LCVectorStoreComponent, check_cached_vector_store
from langflow.base.vectorstores.utils import chroma_collection_to_data
@ -82,6 +83,7 @@ class ChromaVectorStoreComponent(LCVectorStoreComponent):
),
]
@override
@check_cached_vector_store
def build_vector_store(self) -> Chroma:
"""Builds the Chroma object."""

View file

@ -1,4 +1,21 @@
import asyncio
from contextlib import asynccontextmanager
if hasattr(asyncio, "timeout"):
@asynccontextmanager
async def timeout_context(timeout_seconds):
with asyncio.timeout(timeout_seconds) as ctx:
yield ctx
else:
@asynccontextmanager
async def timeout_context(timeout_seconds):
try:
yield await asyncio.wait_for(asyncio.Future(), timeout=timeout_seconds)
except asyncio.TimeoutError as e:
msg = f"Operation timed out after {timeout_seconds} seconds"
raise TimeoutError(msg) from e
def run_until_complete(coro):

View file

@ -0,0 +1,320 @@
import os
from pathlib import Path
from typing import Any
import pytest
from langflow.components.vectorstores.chroma import ChromaVectorStoreComponent
from langflow.schema.data import Data
from tests.base import ComponentTestBaseWithoutClient, VersionComponentMapping
@pytest.mark.api_key_required
class TestChromaVectorStoreComponent(ComponentTestBaseWithoutClient):
@pytest.fixture
def component_class(self) -> type[Any]:
"""Return the component class to test."""
return ChromaVectorStoreComponent
@pytest.fixture
def default_kwargs(self, tmp_path: Path) -> dict[str, Any]:
"""Return the default kwargs for the component."""
from langflow.components.embeddings.openai import OpenAIEmbeddingsComponent
if os.getenv("OPENAI_API_KEY") is None:
pytest.skip("OPENAI_API_KEY is not set")
api_key = os.getenv("OPENAI_API_KEY")
return {
"embedding": OpenAIEmbeddingsComponent(openai_api_key=api_key).build_embeddings(),
"collection_name": "test_collection",
"persist_directory": tmp_path,
}
@pytest.fixture
def file_names_mapping(self) -> list[VersionComponentMapping]:
"""Return the file names mapping for different versions."""
return [
{"version": "1.0.19", "module": "vectorstores", "file_name": "Chroma"},
{"version": "1.1.0", "module": "vectorstores", "file_name": "chroma"},
{"version": "1.1.1", "module": "vectorstores", "file_name": "chroma"},
]
def test_create_db(self, component_class: type[ChromaVectorStoreComponent], default_kwargs: dict[str, Any]) -> None:
"""Test the create_collection method."""
component: ChromaVectorStoreComponent = component_class().set(**default_kwargs)
component.build_vector_store()
persist_directory = default_kwargs["persist_directory"]
assert persist_directory.exists()
assert persist_directory.is_dir()
# Assert it isn't empty
assert len(list(persist_directory.iterdir())) > 0
# Assert there's a chroma.sqlite3 file
assert (persist_directory / "chroma.sqlite3").exists()
assert (persist_directory / "chroma.sqlite3").is_file()
def test_create_collection_with_data(
self, component_class: type[ChromaVectorStoreComponent], default_kwargs: dict[str, Any]
) -> None:
"""Test the create_collection method with data."""
# set ingest_data in default_kwargs to a list of Data objects
test_texts = ["test data 1", "test data 2", "something completely different"]
default_kwargs["ingest_data"] = [Data(text=text) for text in test_texts]
component: ChromaVectorStoreComponent = component_class().set(**default_kwargs)
vector_store = component.build_vector_store()
# Verify collection exists and has the correct data
collection = vector_store._collection
assert collection.name == default_kwargs["collection_name"]
assert collection.count() == len(test_texts)
def test_similarity_search(
self, component_class: type[ChromaVectorStoreComponent], default_kwargs: dict[str, Any]
) -> None:
"""Test the similarity search functionality through the component."""
# Create test data with distinct topics
test_data = [
"The quick brown fox jumps over the lazy dog",
"Python is a popular programming language",
"Machine learning models process data",
"The lazy dog sleeps all day long",
]
default_kwargs["ingest_data"] = [Data(text=text) for text in test_data]
default_kwargs["search_type"] = "Similarity"
default_kwargs["number_of_results"] = 2
component: ChromaVectorStoreComponent = component_class().set(**default_kwargs)
component.build_vector_store()
# Test similarity search through the component
component.set(search_query="dog sleeping")
results = component.search_documents()
assert len(results) == 2
# The most relevant results should be about dogs
assert any("dog" in result.text.lower() for result in results)
# Test with different number of results
component.set(number_of_results=3)
results = component.search_documents()
assert len(results) == 3
def test_mmr_search(
self, component_class: type[ChromaVectorStoreComponent], default_kwargs: dict[str, Any]
) -> None:
"""Test the MMR search functionality through the component."""
# Create test data with some similar documents
test_data = [
"The quick brown fox jumps",
"The quick brown fox leaps",
"The quick brown fox hops",
"Something completely different about cats",
]
default_kwargs["ingest_data"] = [Data(text=text) for text in test_data]
default_kwargs["search_type"] = "MMR"
default_kwargs["number_of_results"] = 3
component: ChromaVectorStoreComponent = component_class().set(**default_kwargs)
component.build_vector_store()
# Test MMR search through the component
component.set(search_query="quick fox")
results = component.search_documents()
assert len(results) == 3
# Results should be diverse but relevant
assert any("fox" in result.text.lower() for result in results)
# Test with different settings
component.set(number_of_results=2)
diverse_results = component.search_documents()
assert len(diverse_results) == 2
def test_search_with_different_types(
self, component_class: type[ChromaVectorStoreComponent], default_kwargs: dict[str, Any]
) -> None:
"""Test search with different search types."""
test_data = [
"The quick brown fox jumps over the lazy dog",
"Python is a popular programming language",
"Machine learning models process data",
]
default_kwargs["ingest_data"] = [Data(text=text) for text in test_data]
default_kwargs["number_of_results"] = 2
component: ChromaVectorStoreComponent = component_class().set(**default_kwargs)
component.build_vector_store()
# Test similarity search
component.set(search_type="Similarity", search_query="programming languages")
similarity_results = component.search_documents()
assert len(similarity_results) == 2
assert any("python" in result.text.lower() for result in similarity_results)
# Test MMR search
component.set(search_type="MMR", search_query="programming languages")
mmr_results = component.search_documents()
assert len(mmr_results) == 2
# Test with empty query
component.set(search_query="")
empty_results = component.search_documents()
assert len(empty_results) == 0
def test_search_with_score(
self, component_class: type[ChromaVectorStoreComponent], default_kwargs: dict[str, Any]
) -> None:
"""Test the search with score functionality through the component."""
test_data = [
"The quick brown fox jumps over the lazy dog",
"Python is a popular programming language",
"Machine learning models process data",
]
default_kwargs["ingest_data"] = [Data(text=text) for text in test_data]
default_kwargs["number_of_results"] = 2
component: ChromaVectorStoreComponent = component_class().set(**default_kwargs)
component.build_vector_store()
# Test search with score through the component
component.set(
search_type="similarity_score_threshold", search_query="programming languages", number_of_results=2
)
results = component.search_documents()
assert len(results) == 2
# Results should be sorted by relevance
assert any("python" in result.text.lower() for result in results)
assert any("programming" in result.text.lower() for result in results)
# Test with different number of results
component.set(number_of_results=3)
results = component.search_documents()
assert len(results) == 3
def test_duplicate_handling(
self, component_class: type[ChromaVectorStoreComponent], default_kwargs: dict[str, Any]
) -> None:
"""Test handling of duplicate documents."""
# Create test data with duplicates
test_data = [
Data(text_key="text", data={"text": "This is a test document"}),
Data(text_key="text", data={"text": "This is a test document"}), # Duplicate with exact same data
Data(text_key="text", data={"text": "This is another document"}),
]
default_kwargs["ingest_data"] = test_data
default_kwargs["allow_duplicates"] = False
default_kwargs["limit"] = 100 # Set a high enough limit to get all documents
component: ChromaVectorStoreComponent = component_class().set(**default_kwargs)
vector_store = component.build_vector_store()
# Get all documents
results = vector_store.get(limit=100)
documents = results["documents"]
# The documents are returned in a list structure
assert len(documents) == 3 # All documents are added, even duplicates
# Count unique texts
unique_texts = set(documents)
assert len(unique_texts) == 2 # Should have 2 unique texts
# Test with allow_duplicates=True
test_data = [
Data(text_key="text", data={"text": "This is a test document"}),
Data(text_key="text", data={"text": "This is a test document"}), # Duplicate
]
default_kwargs["ingest_data"] = test_data
default_kwargs["allow_duplicates"] = True
default_kwargs["collection_name"] = "test_collection_2" # Use a different collection name
component = component_class().set(**default_kwargs)
vector_store = component.build_vector_store()
# Get all documents
results = vector_store.get(limit=100)
documents = results["documents"]
# With allow_duplicates=True, we should have both documents
assert len(documents) == 2
assert all("test document" in doc for doc in documents)
# Verify that we have the expected number of documents
assert vector_store._collection.count() == 2
def test_chroma_collection_to_data(
self, component_class: type[ChromaVectorStoreComponent], default_kwargs: dict[str, Any]
) -> None:
"""Test the chroma_collection_to_data function."""
from langflow.base.vectorstores.utils import chroma_collection_to_data
# Create a collection with documents and metadata
test_data = [
Data(data={"text": "Document 1", "metadata_field": "value1"}),
Data(data={"text": "Document 2", "metadata_field": "value2"}),
]
default_kwargs["ingest_data"] = test_data
component: ChromaVectorStoreComponent = component_class().set(**default_kwargs)
vector_store = component.build_vector_store()
# Get the collection data
collection_dict = vector_store.get()
data_objects = chroma_collection_to_data(collection_dict)
# Verify the conversion
assert len(data_objects) == 2
for data_obj in data_objects:
assert isinstance(data_obj, Data)
assert "id" in data_obj.data
assert "text" in data_obj.data
assert data_obj.data["text"] in ["Document 1", "Document 2"]
assert "metadata_field" in data_obj.data
assert data_obj.data["metadata_field"] in ["value1", "value2"]
def test_chroma_collection_to_data_without_metadata(
self, component_class: type[ChromaVectorStoreComponent], default_kwargs: dict[str, Any]
) -> None:
"""Test the chroma_collection_to_data function with documents that have no metadata."""
from langflow.base.vectorstores.utils import chroma_collection_to_data
# Create a collection with documents but no metadata
test_data = [
Data(data={"text": "Simple document 1"}),
Data(data={"text": "Simple document 2"}),
]
default_kwargs["ingest_data"] = test_data
component: ChromaVectorStoreComponent = component_class().set(**default_kwargs)
vector_store = component.build_vector_store()
# Get the collection data
collection_dict = vector_store.get()
data_objects = chroma_collection_to_data(collection_dict)
# Verify the conversion
assert len(data_objects) == 2
for data_obj in data_objects:
assert isinstance(data_obj, Data)
assert "id" in data_obj.data
assert "text" in data_obj.data
assert data_obj.data["text"] in ["Simple document 1", "Simple document 2"]
def test_chroma_collection_to_data_empty_collection(
self, component_class: type[ChromaVectorStoreComponent], default_kwargs: dict[str, Any]
) -> None:
"""Test the chroma_collection_to_data function with an empty collection."""
from langflow.base.vectorstores.utils import chroma_collection_to_data
# Create an empty collection
component: ChromaVectorStoreComponent = component_class().set(**default_kwargs)
vector_store = component.build_vector_store()
# Get the collection data
collection_dict = vector_store.get()
data_objects = chroma_collection_to_data(collection_dict)
# Verify the conversion
assert len(data_objects) == 0

View file

@ -34,6 +34,7 @@ export function EditNodeComponent({
<div className="h-full">
{nodeClass && (
<TableComponent
tableOptions={{ hide_options: true, block_hide: true }}
domLayout={autoHeight ? "autoHeight" : undefined}
key={"editNode"}
tooltipShowDelay={0.5}