diff --git a/src/backend/base/langflow/components/vectorstores/chroma.py b/src/backend/base/langflow/components/vectorstores/chroma.py index 31cc1fb85..b035d5f58 100644 --- a/src/backend/base/langflow/components/vectorstores/chroma.py +++ b/src/backend/base/langflow/components/vectorstores/chroma.py @@ -154,6 +154,14 @@ class ChromaVectorStoreComponent(LCVectorStoreComponent): if documents and self.embedding is not None: self.log(f"Adding {len(documents)} documents to the Vector Store.") - vector_store.add_documents(documents) + # Filter complex metadata to prevent ChromaDB errors + try: + from langchain_community.vectorstores.utils import filter_complex_metadata + + filtered_documents = filter_complex_metadata(documents) + vector_store.add_documents(filtered_documents) + except ImportError: + self.log("Warning: Could not import filter_complex_metadata. Adding documents without filtering.") + vector_store.add_documents(documents) else: self.log("No documents to add to the Vector Store.") diff --git a/src/backend/tests/unit/components/vectorstores/test_chroma_vector_store_component.py b/src/backend/tests/unit/components/vectorstores/test_chroma_vector_store_component.py index 4fee4b194..ab45d6f2f 100644 --- a/src/backend/tests/unit/components/vectorstores/test_chroma_vector_store_component.py +++ b/src/backend/tests/unit/components/vectorstores/test_chroma_vector_store_component.py @@ -318,3 +318,84 @@ class TestChromaVectorStoreComponent(ComponentTestBaseWithoutClient): # Verify the conversion assert len(data_objects) == 0 + + def test_metadata_filtering_with_complex_data( + self, component_class: type[ChromaVectorStoreComponent], default_kwargs: dict[str, Any] + ) -> None: + """Test that complex metadata is properly filtered and simple types are preserved.""" + from langflow.base.vectorstores.utils import chroma_collection_to_data + + # Create test data that covers the original error scenario and validation + test_data = [ + Data( + data={ + "text": "Document with mixed metadata", + "files": [], # This empty list was causing the original ChromaDB error + "tags": ["tag1", "tag2"], # Lists should be filtered out + "nested": {"key": "value"}, # Nested objects should be filtered out + "simple_string": "preserved", + "simple_int": 42, + "simple_bool": True, + "empty_string": "", # Edge case: empty but valid + "zero_value": 0, # Edge case: falsy but valid + } + ) + ] + + default_kwargs["ingest_data"] = test_data + default_kwargs["collection_name"] = "test_metadata_filtering" + + # This should not raise an error despite the complex metadata + component: ChromaVectorStoreComponent = component_class().set(**default_kwargs) + vector_store = component.build_vector_store() + + # Verify document was added successfully + collection_dict = vector_store.get() + assert len(collection_dict["documents"]) == 1 + assert "Document with mixed metadata" in collection_dict["documents"][0] + + # Verify metadata filtering: simple types preserved, complex types filtered out + data_objects = chroma_collection_to_data(collection_dict) + data_obj = data_objects[0] + + # Simple types should be preserved + assert data_obj.data["simple_string"] == "preserved" + assert data_obj.data["simple_int"] == 42 + assert data_obj.data["simple_bool"] is True + assert data_obj.data["empty_string"] == "" + assert data_obj.data["zero_value"] == 0 + + # Complex types should be filtered out + assert "files" not in data_obj.data + assert "tags" not in data_obj.data + assert "nested" not in data_obj.data + + def test_metadata_filtering_fallback( + self, component_class: type[ChromaVectorStoreComponent], default_kwargs: dict[str, Any], monkeypatch + ) -> None: + """Test the fallback behavior when filter_complex_metadata import fails.""" + import builtins + + original_import = builtins.__import__ + + def mock_import(name, *args, **kwargs): + if name == "langchain_community.vectorstores.utils": + error_msg = "Mocked import error" + raise ImportError(error_msg) + return original_import(name, *args, **kwargs) + + monkeypatch.setattr(builtins, "__import__", mock_import) + + # Use simple test data to avoid ChromaDB errors when filtering is unavailable + test_data = [Data(data={"text": "Simple document", "simple_field": "simple_value"})] + default_kwargs["ingest_data"] = test_data + default_kwargs["collection_name"] = "test_fallback" + + # Should work with fallback (no filtering) + component: ChromaVectorStoreComponent = component_class().set(**default_kwargs) + vector_store = component.build_vector_store() + + # Verify document was added + collection_dict = vector_store.get() + assert len(collection_dict["documents"]) == 1 + assert "Simple document" in collection_dict["documents"][0]