From f080049526cc4b7c68559dc893e6ff29da903467 Mon Sep 17 00:00:00 2001
From: Cristhian Zanforlin Lousa <cristhian.lousa@gmail.com>
Date: Wed, 8 Jan 2025 10:01:39 -0300
Subject: [PATCH] fix: add tests and adjustments to Chroma component (#5571)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* Refactor: Update EditNodeComponent to hide table options and block hide

* 🐛 (model.py): fix issue with search_documents method not returning empty list when search_query is empty
♻️ (model.py): refactor search_documents method to handle search_query logic more efficiently
📝 (chroma.py): add typing_extensions override import for build_vector_store method to improve code readability

* ✨ (tests): add unit tests for ChromaVectorStoreComponent

- Introduced new test suite for ChromaVectorStoreComponent, covering various functionalities including database creation, collection management, similarity and MMR searches, and duplicate handling.
- Implemented tests for creating collections with and without data, ensuring proper functionality and data integrity.
- Verified search capabilities with different query types and result limits, enhancing overall test coverage for the component.

* fix: remove unnecessary whitespace in model.py and add missing import in chroma.py

* fix: mypy error module has no attribute "timeout"

* ♻️ (async_helpers.py): Remove unnecessary type hint ignore comment from timeout_context function

* 📝 (async_helpers.py): add a comment with issue reference PGH003 to document the reason for ignoring type checking in timeout_context function

* ♻️ (async_helpers.py): Remove unnecessary type hint comment to improve code readability and maintainability

* ♻️ (async_helpers.py): Add type ignore comment to suppress miscellaneous type error for timeout_context function

* ♻️ (async_helpers.py): refactor timeout_context function to remove unnecessary type ignore comments and improve code readability

* [autofix.ci] apply automated fixes

* 📝 (async_helpers.py): add a blank line for better code readability and consistency

* fix: mypy error: incompatible redefinition

---------

Co-authored-by: anovazzi1 <otavio2204@gmail.com>
Co-authored-by: Gabriel Luiz Freitas Almeida <gabriel@langflow.org>
Co-authored-by: italojohnny <italojohnnydosanjos@gmail.com>
Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
---
 .../base/langflow/base/vectorstores/model.py  |  10 +-
 .../base/langflow/components/tools/mcp_sse.py |   4 +-
 .../components/vectorstores/chroma.py         |   2 +
 .../base/langflow/utils/async_helpers.py      |  17 +
 .../unit/components/vectorstores/__init__.py  |   0
 .../test_chroma_vector_store_component.py     | 320 ++++++++++++++++++
 .../components/editNodeComponent/index.tsx    |   1 +
 7 files changed, 347 insertions(+), 7 deletions(-)
 create mode 100644 src/backend/tests/unit/components/vectorstores/__init__.py
 create mode 100644 src/backend/tests/unit/components/vectorstores/test_chroma_vector_store_component.py

diff --git a/src/backend/base/langflow/base/vectorstores/model.py b/src/backend/base/langflow/base/vectorstores/model.py
index ee157c037..0c58c96fb 100644
--- a/src/backend/base/langflow/base/vectorstores/model.py
+++ b/src/backend/base/langflow/base/vectorstores/model.py
@@ -122,17 +122,17 @@ class LCVectorStoreComponent(Component):
 
     def search_documents(self) -> list[Data]:
         """Search for documents in the vector store."""
-        search_query: str = self.search_query
-        if not search_query:
-            self.status = ""
-            return []
-
         if self._cached_vector_store is not None:
             vector_store = self._cached_vector_store
         else:
             vector_store = self.build_vector_store()
             self._cached_vector_store = vector_store
 
+        search_query: str = self.search_query
+        if not search_query:
+            self.status = ""
+            return []
+
         self.log(f"Search input: {search_query}")
         self.log(f"Search type: {self.search_type}")
         self.log(f"Number of results: {self.number_of_results}")
diff --git a/src/backend/base/langflow/components/tools/mcp_sse.py b/src/backend/base/langflow/components/tools/mcp_sse.py
index 876c05c7d..8c813a574 100644
--- a/src/backend/base/langflow/components/tools/mcp_sse.py
+++ b/src/backend/base/langflow/components/tools/mcp_sse.py
@@ -1,5 +1,4 @@
 # from langflow.field_typing import Data
-import asyncio
 from contextlib import AsyncExitStack
 
 import httpx
@@ -11,6 +10,7 @@ from langflow.components.tools.mcp_stdio import create_input_schema_from_json_sc
 from langflow.custom import Component
 from langflow.field_typing import Tool
 from langflow.io import MessageTextInput, Output
+from langflow.utils.async_helpers import timeout_context
 
 # Define constant for status code
 HTTP_TEMPORARY_REDIRECT = 307
@@ -39,7 +39,7 @@ class MCPSseClient:
             headers = {}
         url = await self.pre_check_redirect(url)
 
-        async with asyncio.timeout(timeout_seconds):
+        async with timeout_context(timeout_seconds):
             sse_transport = await self.exit_stack.enter_async_context(
                 sse_client(url, headers, timeout_seconds, sse_read_timeout_seconds)
             )
diff --git a/src/backend/base/langflow/components/vectorstores/chroma.py b/src/backend/base/langflow/components/vectorstores/chroma.py
index c009443b0..970b53530 100644
--- a/src/backend/base/langflow/components/vectorstores/chroma.py
+++ b/src/backend/base/langflow/components/vectorstores/chroma.py
@@ -2,6 +2,7 @@ from copy import deepcopy
 
 from chromadb.config import Settings
 from langchain_chroma import Chroma
+from typing_extensions import override
 
 from langflow.base.vectorstores.model import LCVectorStoreComponent, check_cached_vector_store
 from langflow.base.vectorstores.utils import chroma_collection_to_data
@@ -82,6 +83,7 @@ class ChromaVectorStoreComponent(LCVectorStoreComponent):
         ),
     ]
 
+    @override
     @check_cached_vector_store
     def build_vector_store(self) -> Chroma:
         """Builds the Chroma object."""
diff --git a/src/backend/base/langflow/utils/async_helpers.py b/src/backend/base/langflow/utils/async_helpers.py
index 1b0ba25b4..68b68e147 100644
--- a/src/backend/base/langflow/utils/async_helpers.py
+++ b/src/backend/base/langflow/utils/async_helpers.py
@@ -1,4 +1,21 @@
 import asyncio
+from contextlib import asynccontextmanager
+
+if hasattr(asyncio, "timeout"):
+
+    @asynccontextmanager
+    async def timeout_context(timeout_seconds):
+        with asyncio.timeout(timeout_seconds) as ctx:
+            yield ctx
+else:
+
+    @asynccontextmanager
+    async def timeout_context(timeout_seconds):
+        try:
+            yield await asyncio.wait_for(asyncio.Future(), timeout=timeout_seconds)
+        except asyncio.TimeoutError as e:
+            msg = f"Operation timed out after {timeout_seconds} seconds"
+            raise TimeoutError(msg) from e
 
 
 def run_until_complete(coro):
diff --git a/src/backend/tests/unit/components/vectorstores/__init__.py b/src/backend/tests/unit/components/vectorstores/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/src/backend/tests/unit/components/vectorstores/test_chroma_vector_store_component.py b/src/backend/tests/unit/components/vectorstores/test_chroma_vector_store_component.py
new file mode 100644
index 000000000..5775233d8
--- /dev/null
+++ b/src/backend/tests/unit/components/vectorstores/test_chroma_vector_store_component.py
@@ -0,0 +1,320 @@
+import os
+from pathlib import Path
+from typing import Any
+
+import pytest
+from langflow.components.vectorstores.chroma import ChromaVectorStoreComponent
+from langflow.schema.data import Data
+
+from tests.base import ComponentTestBaseWithoutClient, VersionComponentMapping
+
+
+@pytest.mark.api_key_required
+class TestChromaVectorStoreComponent(ComponentTestBaseWithoutClient):
+    @pytest.fixture
+    def component_class(self) -> type[Any]:
+        """Return the component class to test."""
+        return ChromaVectorStoreComponent
+
+    @pytest.fixture
+    def default_kwargs(self, tmp_path: Path) -> dict[str, Any]:
+        """Return the default kwargs for the component."""
+        from langflow.components.embeddings.openai import OpenAIEmbeddingsComponent
+
+        if os.getenv("OPENAI_API_KEY") is None:
+            pytest.skip("OPENAI_API_KEY is not set")
+
+        api_key = os.getenv("OPENAI_API_KEY")
+
+        return {
+            "embedding": OpenAIEmbeddingsComponent(openai_api_key=api_key).build_embeddings(),
+            "collection_name": "test_collection",
+            "persist_directory": tmp_path,
+        }
+
+    @pytest.fixture
+    def file_names_mapping(self) -> list[VersionComponentMapping]:
+        """Return the file names mapping for different versions."""
+        return [
+            {"version": "1.0.19", "module": "vectorstores", "file_name": "Chroma"},
+            {"version": "1.1.0", "module": "vectorstores", "file_name": "chroma"},
+            {"version": "1.1.1", "module": "vectorstores", "file_name": "chroma"},
+        ]
+
+    def test_create_db(self, component_class: type[ChromaVectorStoreComponent], default_kwargs: dict[str, Any]) -> None:
+        """Test the create_collection method."""
+        component: ChromaVectorStoreComponent = component_class().set(**default_kwargs)
+        component.build_vector_store()
+        persist_directory = default_kwargs["persist_directory"]
+        assert persist_directory.exists()
+        assert persist_directory.is_dir()
+        # Assert it isn't empty
+        assert len(list(persist_directory.iterdir())) > 0
+        # Assert there's a chroma.sqlite3 file
+        assert (persist_directory / "chroma.sqlite3").exists()
+        assert (persist_directory / "chroma.sqlite3").is_file()
+
+    def test_create_collection_with_data(
+        self, component_class: type[ChromaVectorStoreComponent], default_kwargs: dict[str, Any]
+    ) -> None:
+        """Test the create_collection method with data."""
+        # set ingest_data in default_kwargs to a list of Data objects
+        test_texts = ["test data 1", "test data 2", "something completely different"]
+        default_kwargs["ingest_data"] = [Data(text=text) for text in test_texts]
+
+        component: ChromaVectorStoreComponent = component_class().set(**default_kwargs)
+        vector_store = component.build_vector_store()
+
+        # Verify collection exists and has the correct data
+        collection = vector_store._collection
+        assert collection.name == default_kwargs["collection_name"]
+        assert collection.count() == len(test_texts)
+
+    def test_similarity_search(
+        self, component_class: type[ChromaVectorStoreComponent], default_kwargs: dict[str, Any]
+    ) -> None:
+        """Test the similarity search functionality through the component."""
+        # Create test data with distinct topics
+        test_data = [
+            "The quick brown fox jumps over the lazy dog",
+            "Python is a popular programming language",
+            "Machine learning models process data",
+            "The lazy dog sleeps all day long",
+        ]
+        default_kwargs["ingest_data"] = [Data(text=text) for text in test_data]
+        default_kwargs["search_type"] = "Similarity"
+        default_kwargs["number_of_results"] = 2
+
+        component: ChromaVectorStoreComponent = component_class().set(**default_kwargs)
+        component.build_vector_store()
+
+        # Test similarity search through the component
+        component.set(search_query="dog sleeping")
+        results = component.search_documents()
+
+        assert len(results) == 2
+        # The most relevant results should be about dogs
+        assert any("dog" in result.text.lower() for result in results)
+
+        # Test with different number of results
+        component.set(number_of_results=3)
+        results = component.search_documents()
+        assert len(results) == 3
+
+    def test_mmr_search(
+        self, component_class: type[ChromaVectorStoreComponent], default_kwargs: dict[str, Any]
+    ) -> None:
+        """Test the MMR search functionality through the component."""
+        # Create test data with some similar documents
+        test_data = [
+            "The quick brown fox jumps",
+            "The quick brown fox leaps",
+            "The quick brown fox hops",
+            "Something completely different about cats",
+        ]
+        default_kwargs["ingest_data"] = [Data(text=text) for text in test_data]
+        default_kwargs["search_type"] = "MMR"
+        default_kwargs["number_of_results"] = 3
+
+        component: ChromaVectorStoreComponent = component_class().set(**default_kwargs)
+        component.build_vector_store()
+
+        # Test MMR search through the component
+        component.set(search_query="quick fox")
+        results = component.search_documents()
+
+        assert len(results) == 3
+        # Results should be diverse but relevant
+        assert any("fox" in result.text.lower() for result in results)
+
+        # Test with different settings
+        component.set(number_of_results=2)
+        diverse_results = component.search_documents()
+        assert len(diverse_results) == 2
+
+    def test_search_with_different_types(
+        self, component_class: type[ChromaVectorStoreComponent], default_kwargs: dict[str, Any]
+    ) -> None:
+        """Test search with different search types."""
+        test_data = [
+            "The quick brown fox jumps over the lazy dog",
+            "Python is a popular programming language",
+            "Machine learning models process data",
+        ]
+        default_kwargs["ingest_data"] = [Data(text=text) for text in test_data]
+        default_kwargs["number_of_results"] = 2
+
+        component: ChromaVectorStoreComponent = component_class().set(**default_kwargs)
+        component.build_vector_store()
+
+        # Test similarity search
+        component.set(search_type="Similarity", search_query="programming languages")
+        similarity_results = component.search_documents()
+        assert len(similarity_results) == 2
+        assert any("python" in result.text.lower() for result in similarity_results)
+
+        # Test MMR search
+        component.set(search_type="MMR", search_query="programming languages")
+        mmr_results = component.search_documents()
+        assert len(mmr_results) == 2
+
+        # Test with empty query
+        component.set(search_query="")
+        empty_results = component.search_documents()
+        assert len(empty_results) == 0
+
+    def test_search_with_score(
+        self, component_class: type[ChromaVectorStoreComponent], default_kwargs: dict[str, Any]
+    ) -> None:
+        """Test the search with score functionality through the component."""
+        test_data = [
+            "The quick brown fox jumps over the lazy dog",
+            "Python is a popular programming language",
+            "Machine learning models process data",
+        ]
+        default_kwargs["ingest_data"] = [Data(text=text) for text in test_data]
+        default_kwargs["number_of_results"] = 2
+
+        component: ChromaVectorStoreComponent = component_class().set(**default_kwargs)
+        component.build_vector_store()
+
+        # Test search with score through the component
+        component.set(
+            search_type="similarity_score_threshold", search_query="programming languages", number_of_results=2
+        )
+        results = component.search_documents()
+
+        assert len(results) == 2
+        # Results should be sorted by relevance
+        assert any("python" in result.text.lower() for result in results)
+        assert any("programming" in result.text.lower() for result in results)
+
+        # Test with different number of results
+        component.set(number_of_results=3)
+        results = component.search_documents()
+        assert len(results) == 3
+
+    def test_duplicate_handling(
+        self, component_class: type[ChromaVectorStoreComponent], default_kwargs: dict[str, Any]
+    ) -> None:
+        """Test handling of duplicate documents."""
+        # Create test data with duplicates
+        test_data = [
+            Data(text_key="text", data={"text": "This is a test document"}),
+            Data(text_key="text", data={"text": "This is a test document"}),  # Duplicate with exact same data
+            Data(text_key="text", data={"text": "This is another document"}),
+        ]
+        default_kwargs["ingest_data"] = test_data
+        default_kwargs["allow_duplicates"] = False
+        default_kwargs["limit"] = 100  # Set a high enough limit to get all documents
+
+        component: ChromaVectorStoreComponent = component_class().set(**default_kwargs)
+        vector_store = component.build_vector_store()
+
+        # Get all documents
+        results = vector_store.get(limit=100)
+
+        documents = results["documents"]
+
+        # The documents are returned in a list structure
+        assert len(documents) == 3  # All documents are added, even duplicates
+
+        # Count unique texts
+        unique_texts = set(documents)
+        assert len(unique_texts) == 2  # Should have 2 unique texts
+
+        # Test with allow_duplicates=True
+        test_data = [
+            Data(text_key="text", data={"text": "This is a test document"}),
+            Data(text_key="text", data={"text": "This is a test document"}),  # Duplicate
+        ]
+        default_kwargs["ingest_data"] = test_data
+        default_kwargs["allow_duplicates"] = True
+        default_kwargs["collection_name"] = "test_collection_2"  # Use a different collection name
+
+        component = component_class().set(**default_kwargs)
+        vector_store = component.build_vector_store()
+
+        # Get all documents
+        results = vector_store.get(limit=100)
+        documents = results["documents"]
+
+        # With allow_duplicates=True, we should have both documents
+        assert len(documents) == 2
+        assert all("test document" in doc for doc in documents)
+
+        # Verify that we have the expected number of documents
+        assert vector_store._collection.count() == 2
+
+    def test_chroma_collection_to_data(
+        self, component_class: type[ChromaVectorStoreComponent], default_kwargs: dict[str, Any]
+    ) -> None:
+        """Test the chroma_collection_to_data function."""
+        from langflow.base.vectorstores.utils import chroma_collection_to_data
+
+        # Create a collection with documents and metadata
+        test_data = [
+            Data(data={"text": "Document 1", "metadata_field": "value1"}),
+            Data(data={"text": "Document 2", "metadata_field": "value2"}),
+        ]
+        default_kwargs["ingest_data"] = test_data
+        component: ChromaVectorStoreComponent = component_class().set(**default_kwargs)
+        vector_store = component.build_vector_store()
+
+        # Get the collection data
+        collection_dict = vector_store.get()
+        data_objects = chroma_collection_to_data(collection_dict)
+
+        # Verify the conversion
+        assert len(data_objects) == 2
+        for data_obj in data_objects:
+            assert isinstance(data_obj, Data)
+            assert "id" in data_obj.data
+            assert "text" in data_obj.data
+            assert data_obj.data["text"] in ["Document 1", "Document 2"]
+            assert "metadata_field" in data_obj.data
+            assert data_obj.data["metadata_field"] in ["value1", "value2"]
+
+    def test_chroma_collection_to_data_without_metadata(
+        self, component_class: type[ChromaVectorStoreComponent], default_kwargs: dict[str, Any]
+    ) -> None:
+        """Test the chroma_collection_to_data function with documents that have no metadata."""
+        from langflow.base.vectorstores.utils import chroma_collection_to_data
+
+        # Create a collection with documents but no metadata
+        test_data = [
+            Data(data={"text": "Simple document 1"}),
+            Data(data={"text": "Simple document 2"}),
+        ]
+        default_kwargs["ingest_data"] = test_data
+        component: ChromaVectorStoreComponent = component_class().set(**default_kwargs)
+        vector_store = component.build_vector_store()
+
+        # Get the collection data
+        collection_dict = vector_store.get()
+        data_objects = chroma_collection_to_data(collection_dict)
+
+        # Verify the conversion
+        assert len(data_objects) == 2
+        for data_obj in data_objects:
+            assert isinstance(data_obj, Data)
+            assert "id" in data_obj.data
+            assert "text" in data_obj.data
+            assert data_obj.data["text"] in ["Simple document 1", "Simple document 2"]
+
+    def test_chroma_collection_to_data_empty_collection(
+        self, component_class: type[ChromaVectorStoreComponent], default_kwargs: dict[str, Any]
+    ) -> None:
+        """Test the chroma_collection_to_data function with an empty collection."""
+        from langflow.base.vectorstores.utils import chroma_collection_to_data
+
+        # Create an empty collection
+        component: ChromaVectorStoreComponent = component_class().set(**default_kwargs)
+        vector_store = component.build_vector_store()
+
+        # Get the collection data
+        collection_dict = vector_store.get()
+        data_objects = chroma_collection_to_data(collection_dict)
+
+        # Verify the conversion
+        assert len(data_objects) == 0
diff --git a/src/frontend/src/modals/editNodeModal/components/editNodeComponent/index.tsx b/src/frontend/src/modals/editNodeModal/components/editNodeComponent/index.tsx
index 5e7f23596..53bf2f4d8 100644
--- a/src/frontend/src/modals/editNodeModal/components/editNodeComponent/index.tsx
+++ b/src/frontend/src/modals/editNodeModal/components/editNodeComponent/index.tsx
@@ -34,6 +34,7 @@ export function EditNodeComponent({
         <div className="h-full">
           {nodeClass && (
             <TableComponent
+              tableOptions={{ hide_options: true, block_hide: true }}
               domLayout={autoHeight ? "autoHeight" : undefined}
               key={"editNode"}
               tooltipShowDelay={0.5}