fix: add tests and adjustments to Chroma component (#5571)
* Refactor: Update EditNodeComponent to hide table options and block hide * 🐛 (model.py): fix issue with search_documents method not returning empty list when search_query is empty ♻️ (model.py): refactor search_documents method to handle search_query logic more efficiently 📝 (chroma.py): add typing_extensions override import for build_vector_store method to improve code readability * ✨ (tests): add unit tests for ChromaVectorStoreComponent - Introduced new test suite for ChromaVectorStoreComponent, covering various functionalities including database creation, collection management, similarity and MMR searches, and duplicate handling. - Implemented tests for creating collections with and without data, ensuring proper functionality and data integrity. - Verified search capabilities with different query types and result limits, enhancing overall test coverage for the component. * fix: remove unnecessary whitespace in model.py and add missing import in chroma.py * fix: mypy error module has no attribute "timeout" * ♻️ (async_helpers.py): Remove unnecessary type hint ignore comment from timeout_context function * 📝 (async_helpers.py): add a comment with issue reference PGH003 to document the reason for ignoring type checking in timeout_context function * ♻️ (async_helpers.py): Remove unnecessary type hint comment to improve code readability and maintainability * ♻️ (async_helpers.py): Add type ignore comment to suppress miscellaneous type error for timeout_context function * ♻️ (async_helpers.py): refactor timeout_context function to remove unnecessary type ignore comments and improve code readability * [autofix.ci] apply automated fixes * 📝 (async_helpers.py): add a blank line for better code readability and consistency * fix: mypy error: incompatible redefinition --------- Co-authored-by: anovazzi1 <otavio2204@gmail.com> Co-authored-by: Gabriel Luiz Freitas Almeida <gabriel@langflow.org> Co-authored-by: italojohnny <italojohnnydosanjos@gmail.com> Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
This commit is contained in:
parent
cfaac694dc
commit
f080049526
7 changed files with 347 additions and 7 deletions
|
|
@ -122,17 +122,17 @@ class LCVectorStoreComponent(Component):
|
|||
|
||||
def search_documents(self) -> list[Data]:
|
||||
"""Search for documents in the vector store."""
|
||||
search_query: str = self.search_query
|
||||
if not search_query:
|
||||
self.status = ""
|
||||
return []
|
||||
|
||||
if self._cached_vector_store is not None:
|
||||
vector_store = self._cached_vector_store
|
||||
else:
|
||||
vector_store = self.build_vector_store()
|
||||
self._cached_vector_store = vector_store
|
||||
|
||||
search_query: str = self.search_query
|
||||
if not search_query:
|
||||
self.status = ""
|
||||
return []
|
||||
|
||||
self.log(f"Search input: {search_query}")
|
||||
self.log(f"Search type: {self.search_type}")
|
||||
self.log(f"Number of results: {self.number_of_results}")
|
||||
|
|
|
|||
|
|
@ -1,5 +1,4 @@
|
|||
# from langflow.field_typing import Data
|
||||
import asyncio
|
||||
from contextlib import AsyncExitStack
|
||||
|
||||
import httpx
|
||||
|
|
@ -11,6 +10,7 @@ from langflow.components.tools.mcp_stdio import create_input_schema_from_json_sc
|
|||
from langflow.custom import Component
|
||||
from langflow.field_typing import Tool
|
||||
from langflow.io import MessageTextInput, Output
|
||||
from langflow.utils.async_helpers import timeout_context
|
||||
|
||||
# Define constant for status code
|
||||
HTTP_TEMPORARY_REDIRECT = 307
|
||||
|
|
@ -39,7 +39,7 @@ class MCPSseClient:
|
|||
headers = {}
|
||||
url = await self.pre_check_redirect(url)
|
||||
|
||||
async with asyncio.timeout(timeout_seconds):
|
||||
async with timeout_context(timeout_seconds):
|
||||
sse_transport = await self.exit_stack.enter_async_context(
|
||||
sse_client(url, headers, timeout_seconds, sse_read_timeout_seconds)
|
||||
)
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@ from copy import deepcopy
|
|||
|
||||
from chromadb.config import Settings
|
||||
from langchain_chroma import Chroma
|
||||
from typing_extensions import override
|
||||
|
||||
from langflow.base.vectorstores.model import LCVectorStoreComponent, check_cached_vector_store
|
||||
from langflow.base.vectorstores.utils import chroma_collection_to_data
|
||||
|
|
@ -82,6 +83,7 @@ class ChromaVectorStoreComponent(LCVectorStoreComponent):
|
|||
),
|
||||
]
|
||||
|
||||
@override
|
||||
@check_cached_vector_store
|
||||
def build_vector_store(self) -> Chroma:
|
||||
"""Builds the Chroma object."""
|
||||
|
|
|
|||
|
|
@ -1,4 +1,21 @@
|
|||
import asyncio
|
||||
from contextlib import asynccontextmanager
|
||||
|
||||
if hasattr(asyncio, "timeout"):
|
||||
|
||||
@asynccontextmanager
|
||||
async def timeout_context(timeout_seconds):
|
||||
with asyncio.timeout(timeout_seconds) as ctx:
|
||||
yield ctx
|
||||
else:
|
||||
|
||||
@asynccontextmanager
|
||||
async def timeout_context(timeout_seconds):
|
||||
try:
|
||||
yield await asyncio.wait_for(asyncio.Future(), timeout=timeout_seconds)
|
||||
except asyncio.TimeoutError as e:
|
||||
msg = f"Operation timed out after {timeout_seconds} seconds"
|
||||
raise TimeoutError(msg) from e
|
||||
|
||||
|
||||
def run_until_complete(coro):
|
||||
|
|
|
|||
|
|
@ -0,0 +1,320 @@
|
|||
import os
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
import pytest
|
||||
from langflow.components.vectorstores.chroma import ChromaVectorStoreComponent
|
||||
from langflow.schema.data import Data
|
||||
|
||||
from tests.base import ComponentTestBaseWithoutClient, VersionComponentMapping
|
||||
|
||||
|
||||
@pytest.mark.api_key_required
|
||||
class TestChromaVectorStoreComponent(ComponentTestBaseWithoutClient):
|
||||
@pytest.fixture
|
||||
def component_class(self) -> type[Any]:
|
||||
"""Return the component class to test."""
|
||||
return ChromaVectorStoreComponent
|
||||
|
||||
@pytest.fixture
|
||||
def default_kwargs(self, tmp_path: Path) -> dict[str, Any]:
|
||||
"""Return the default kwargs for the component."""
|
||||
from langflow.components.embeddings.openai import OpenAIEmbeddingsComponent
|
||||
|
||||
if os.getenv("OPENAI_API_KEY") is None:
|
||||
pytest.skip("OPENAI_API_KEY is not set")
|
||||
|
||||
api_key = os.getenv("OPENAI_API_KEY")
|
||||
|
||||
return {
|
||||
"embedding": OpenAIEmbeddingsComponent(openai_api_key=api_key).build_embeddings(),
|
||||
"collection_name": "test_collection",
|
||||
"persist_directory": tmp_path,
|
||||
}
|
||||
|
||||
@pytest.fixture
|
||||
def file_names_mapping(self) -> list[VersionComponentMapping]:
|
||||
"""Return the file names mapping for different versions."""
|
||||
return [
|
||||
{"version": "1.0.19", "module": "vectorstores", "file_name": "Chroma"},
|
||||
{"version": "1.1.0", "module": "vectorstores", "file_name": "chroma"},
|
||||
{"version": "1.1.1", "module": "vectorstores", "file_name": "chroma"},
|
||||
]
|
||||
|
||||
def test_create_db(self, component_class: type[ChromaVectorStoreComponent], default_kwargs: dict[str, Any]) -> None:
|
||||
"""Test the create_collection method."""
|
||||
component: ChromaVectorStoreComponent = component_class().set(**default_kwargs)
|
||||
component.build_vector_store()
|
||||
persist_directory = default_kwargs["persist_directory"]
|
||||
assert persist_directory.exists()
|
||||
assert persist_directory.is_dir()
|
||||
# Assert it isn't empty
|
||||
assert len(list(persist_directory.iterdir())) > 0
|
||||
# Assert there's a chroma.sqlite3 file
|
||||
assert (persist_directory / "chroma.sqlite3").exists()
|
||||
assert (persist_directory / "chroma.sqlite3").is_file()
|
||||
|
||||
def test_create_collection_with_data(
|
||||
self, component_class: type[ChromaVectorStoreComponent], default_kwargs: dict[str, Any]
|
||||
) -> None:
|
||||
"""Test the create_collection method with data."""
|
||||
# set ingest_data in default_kwargs to a list of Data objects
|
||||
test_texts = ["test data 1", "test data 2", "something completely different"]
|
||||
default_kwargs["ingest_data"] = [Data(text=text) for text in test_texts]
|
||||
|
||||
component: ChromaVectorStoreComponent = component_class().set(**default_kwargs)
|
||||
vector_store = component.build_vector_store()
|
||||
|
||||
# Verify collection exists and has the correct data
|
||||
collection = vector_store._collection
|
||||
assert collection.name == default_kwargs["collection_name"]
|
||||
assert collection.count() == len(test_texts)
|
||||
|
||||
def test_similarity_search(
|
||||
self, component_class: type[ChromaVectorStoreComponent], default_kwargs: dict[str, Any]
|
||||
) -> None:
|
||||
"""Test the similarity search functionality through the component."""
|
||||
# Create test data with distinct topics
|
||||
test_data = [
|
||||
"The quick brown fox jumps over the lazy dog",
|
||||
"Python is a popular programming language",
|
||||
"Machine learning models process data",
|
||||
"The lazy dog sleeps all day long",
|
||||
]
|
||||
default_kwargs["ingest_data"] = [Data(text=text) for text in test_data]
|
||||
default_kwargs["search_type"] = "Similarity"
|
||||
default_kwargs["number_of_results"] = 2
|
||||
|
||||
component: ChromaVectorStoreComponent = component_class().set(**default_kwargs)
|
||||
component.build_vector_store()
|
||||
|
||||
# Test similarity search through the component
|
||||
component.set(search_query="dog sleeping")
|
||||
results = component.search_documents()
|
||||
|
||||
assert len(results) == 2
|
||||
# The most relevant results should be about dogs
|
||||
assert any("dog" in result.text.lower() for result in results)
|
||||
|
||||
# Test with different number of results
|
||||
component.set(number_of_results=3)
|
||||
results = component.search_documents()
|
||||
assert len(results) == 3
|
||||
|
||||
def test_mmr_search(
|
||||
self, component_class: type[ChromaVectorStoreComponent], default_kwargs: dict[str, Any]
|
||||
) -> None:
|
||||
"""Test the MMR search functionality through the component."""
|
||||
# Create test data with some similar documents
|
||||
test_data = [
|
||||
"The quick brown fox jumps",
|
||||
"The quick brown fox leaps",
|
||||
"The quick brown fox hops",
|
||||
"Something completely different about cats",
|
||||
]
|
||||
default_kwargs["ingest_data"] = [Data(text=text) for text in test_data]
|
||||
default_kwargs["search_type"] = "MMR"
|
||||
default_kwargs["number_of_results"] = 3
|
||||
|
||||
component: ChromaVectorStoreComponent = component_class().set(**default_kwargs)
|
||||
component.build_vector_store()
|
||||
|
||||
# Test MMR search through the component
|
||||
component.set(search_query="quick fox")
|
||||
results = component.search_documents()
|
||||
|
||||
assert len(results) == 3
|
||||
# Results should be diverse but relevant
|
||||
assert any("fox" in result.text.lower() for result in results)
|
||||
|
||||
# Test with different settings
|
||||
component.set(number_of_results=2)
|
||||
diverse_results = component.search_documents()
|
||||
assert len(diverse_results) == 2
|
||||
|
||||
def test_search_with_different_types(
|
||||
self, component_class: type[ChromaVectorStoreComponent], default_kwargs: dict[str, Any]
|
||||
) -> None:
|
||||
"""Test search with different search types."""
|
||||
test_data = [
|
||||
"The quick brown fox jumps over the lazy dog",
|
||||
"Python is a popular programming language",
|
||||
"Machine learning models process data",
|
||||
]
|
||||
default_kwargs["ingest_data"] = [Data(text=text) for text in test_data]
|
||||
default_kwargs["number_of_results"] = 2
|
||||
|
||||
component: ChromaVectorStoreComponent = component_class().set(**default_kwargs)
|
||||
component.build_vector_store()
|
||||
|
||||
# Test similarity search
|
||||
component.set(search_type="Similarity", search_query="programming languages")
|
||||
similarity_results = component.search_documents()
|
||||
assert len(similarity_results) == 2
|
||||
assert any("python" in result.text.lower() for result in similarity_results)
|
||||
|
||||
# Test MMR search
|
||||
component.set(search_type="MMR", search_query="programming languages")
|
||||
mmr_results = component.search_documents()
|
||||
assert len(mmr_results) == 2
|
||||
|
||||
# Test with empty query
|
||||
component.set(search_query="")
|
||||
empty_results = component.search_documents()
|
||||
assert len(empty_results) == 0
|
||||
|
||||
def test_search_with_score(
|
||||
self, component_class: type[ChromaVectorStoreComponent], default_kwargs: dict[str, Any]
|
||||
) -> None:
|
||||
"""Test the search with score functionality through the component."""
|
||||
test_data = [
|
||||
"The quick brown fox jumps over the lazy dog",
|
||||
"Python is a popular programming language",
|
||||
"Machine learning models process data",
|
||||
]
|
||||
default_kwargs["ingest_data"] = [Data(text=text) for text in test_data]
|
||||
default_kwargs["number_of_results"] = 2
|
||||
|
||||
component: ChromaVectorStoreComponent = component_class().set(**default_kwargs)
|
||||
component.build_vector_store()
|
||||
|
||||
# Test search with score through the component
|
||||
component.set(
|
||||
search_type="similarity_score_threshold", search_query="programming languages", number_of_results=2
|
||||
)
|
||||
results = component.search_documents()
|
||||
|
||||
assert len(results) == 2
|
||||
# Results should be sorted by relevance
|
||||
assert any("python" in result.text.lower() for result in results)
|
||||
assert any("programming" in result.text.lower() for result in results)
|
||||
|
||||
# Test with different number of results
|
||||
component.set(number_of_results=3)
|
||||
results = component.search_documents()
|
||||
assert len(results) == 3
|
||||
|
||||
def test_duplicate_handling(
|
||||
self, component_class: type[ChromaVectorStoreComponent], default_kwargs: dict[str, Any]
|
||||
) -> None:
|
||||
"""Test handling of duplicate documents."""
|
||||
# Create test data with duplicates
|
||||
test_data = [
|
||||
Data(text_key="text", data={"text": "This is a test document"}),
|
||||
Data(text_key="text", data={"text": "This is a test document"}), # Duplicate with exact same data
|
||||
Data(text_key="text", data={"text": "This is another document"}),
|
||||
]
|
||||
default_kwargs["ingest_data"] = test_data
|
||||
default_kwargs["allow_duplicates"] = False
|
||||
default_kwargs["limit"] = 100 # Set a high enough limit to get all documents
|
||||
|
||||
component: ChromaVectorStoreComponent = component_class().set(**default_kwargs)
|
||||
vector_store = component.build_vector_store()
|
||||
|
||||
# Get all documents
|
||||
results = vector_store.get(limit=100)
|
||||
|
||||
documents = results["documents"]
|
||||
|
||||
# The documents are returned in a list structure
|
||||
assert len(documents) == 3 # All documents are added, even duplicates
|
||||
|
||||
# Count unique texts
|
||||
unique_texts = set(documents)
|
||||
assert len(unique_texts) == 2 # Should have 2 unique texts
|
||||
|
||||
# Test with allow_duplicates=True
|
||||
test_data = [
|
||||
Data(text_key="text", data={"text": "This is a test document"}),
|
||||
Data(text_key="text", data={"text": "This is a test document"}), # Duplicate
|
||||
]
|
||||
default_kwargs["ingest_data"] = test_data
|
||||
default_kwargs["allow_duplicates"] = True
|
||||
default_kwargs["collection_name"] = "test_collection_2" # Use a different collection name
|
||||
|
||||
component = component_class().set(**default_kwargs)
|
||||
vector_store = component.build_vector_store()
|
||||
|
||||
# Get all documents
|
||||
results = vector_store.get(limit=100)
|
||||
documents = results["documents"]
|
||||
|
||||
# With allow_duplicates=True, we should have both documents
|
||||
assert len(documents) == 2
|
||||
assert all("test document" in doc for doc in documents)
|
||||
|
||||
# Verify that we have the expected number of documents
|
||||
assert vector_store._collection.count() == 2
|
||||
|
||||
def test_chroma_collection_to_data(
|
||||
self, component_class: type[ChromaVectorStoreComponent], default_kwargs: dict[str, Any]
|
||||
) -> None:
|
||||
"""Test the chroma_collection_to_data function."""
|
||||
from langflow.base.vectorstores.utils import chroma_collection_to_data
|
||||
|
||||
# Create a collection with documents and metadata
|
||||
test_data = [
|
||||
Data(data={"text": "Document 1", "metadata_field": "value1"}),
|
||||
Data(data={"text": "Document 2", "metadata_field": "value2"}),
|
||||
]
|
||||
default_kwargs["ingest_data"] = test_data
|
||||
component: ChromaVectorStoreComponent = component_class().set(**default_kwargs)
|
||||
vector_store = component.build_vector_store()
|
||||
|
||||
# Get the collection data
|
||||
collection_dict = vector_store.get()
|
||||
data_objects = chroma_collection_to_data(collection_dict)
|
||||
|
||||
# Verify the conversion
|
||||
assert len(data_objects) == 2
|
||||
for data_obj in data_objects:
|
||||
assert isinstance(data_obj, Data)
|
||||
assert "id" in data_obj.data
|
||||
assert "text" in data_obj.data
|
||||
assert data_obj.data["text"] in ["Document 1", "Document 2"]
|
||||
assert "metadata_field" in data_obj.data
|
||||
assert data_obj.data["metadata_field"] in ["value1", "value2"]
|
||||
|
||||
def test_chroma_collection_to_data_without_metadata(
|
||||
self, component_class: type[ChromaVectorStoreComponent], default_kwargs: dict[str, Any]
|
||||
) -> None:
|
||||
"""Test the chroma_collection_to_data function with documents that have no metadata."""
|
||||
from langflow.base.vectorstores.utils import chroma_collection_to_data
|
||||
|
||||
# Create a collection with documents but no metadata
|
||||
test_data = [
|
||||
Data(data={"text": "Simple document 1"}),
|
||||
Data(data={"text": "Simple document 2"}),
|
||||
]
|
||||
default_kwargs["ingest_data"] = test_data
|
||||
component: ChromaVectorStoreComponent = component_class().set(**default_kwargs)
|
||||
vector_store = component.build_vector_store()
|
||||
|
||||
# Get the collection data
|
||||
collection_dict = vector_store.get()
|
||||
data_objects = chroma_collection_to_data(collection_dict)
|
||||
|
||||
# Verify the conversion
|
||||
assert len(data_objects) == 2
|
||||
for data_obj in data_objects:
|
||||
assert isinstance(data_obj, Data)
|
||||
assert "id" in data_obj.data
|
||||
assert "text" in data_obj.data
|
||||
assert data_obj.data["text"] in ["Simple document 1", "Simple document 2"]
|
||||
|
||||
def test_chroma_collection_to_data_empty_collection(
|
||||
self, component_class: type[ChromaVectorStoreComponent], default_kwargs: dict[str, Any]
|
||||
) -> None:
|
||||
"""Test the chroma_collection_to_data function with an empty collection."""
|
||||
from langflow.base.vectorstores.utils import chroma_collection_to_data
|
||||
|
||||
# Create an empty collection
|
||||
component: ChromaVectorStoreComponent = component_class().set(**default_kwargs)
|
||||
vector_store = component.build_vector_store()
|
||||
|
||||
# Get the collection data
|
||||
collection_dict = vector_store.get()
|
||||
data_objects = chroma_collection_to_data(collection_dict)
|
||||
|
||||
# Verify the conversion
|
||||
assert len(data_objects) == 0
|
||||
|
|
@ -34,6 +34,7 @@ export function EditNodeComponent({
|
|||
<div className="h-full">
|
||||
{nodeClass && (
|
||||
<TableComponent
|
||||
tableOptions={{ hide_options: true, block_hide: true }}
|
||||
domLayout={autoHeight ? "autoHeight" : undefined}
|
||||
key={"editNode"}
|
||||
tooltipShowDelay={0.5}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue