fix: silent error fix in KB components (#9433)

* silent error fix

* silent error fix

* Raise value error if configured column is not available

* [autofix.ci] apply automated fixes

* Remove silent error tests with option gone

* Update test_kb_ingest.py

---------

Co-authored-by: Eric Hare <ericrhare@gmail.com>
Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
This commit is contained in:
Edwin Jose 2025-08-19 16:03:54 -04:00 committed by GitHub
commit fbb0012045
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 8 additions and 44 deletions

View file

@ -187,9 +187,8 @@ class KBIngestionComponent(Component):
df_columns = set(df_source.columns)
for config in config_list:
col_name = config.get("column_name")
if col_name not in df_columns and not self.silent_errors:
if col_name not in df_columns:
msg = f"Column '{col_name}' not found in DataFrame. Available columns: {sorted(df_columns)}"
self.log(f"Warning: {msg}")
raise ValueError(msg)
return config_list
@ -295,9 +294,7 @@ class KBIngestionComponent(Component):
if not cfg_path.exists():
cfg_path.write_text(json.dumps(config_list, indent=2))
except Exception as e:
if not self.silent_errors:
raise
except (OSError, TypeError, ValueError) as e:
self.log(f"Error saving KB files: {e}")
def _build_column_metadata(self, config_list: list[dict[str, Any]], df_source: pd.DataFrame) -> dict[str, Any]:
@ -367,9 +364,7 @@ class KBIngestionComponent(Component):
chroma.add_documents(documents)
self.log(f"Added {len(documents)} documents to vector store '{self.knowledge_base}'")
except Exception as e:
if not self.silent_errors:
raise
except (OSError, ValueError, RuntimeError) as e:
self.log(f"Error creating vector store: {e}")
def _convert_df_to_data_objects(self, df_source: pd.DataFrame, config_list: list[dict[str, Any]]) -> list[Data]:
@ -526,9 +521,7 @@ class KBIngestionComponent(Component):
return Data(data=meta)
except Exception as e:
if not self.silent_errors:
raise
except (OSError, ValueError, RuntimeError, KeyError) as e:
self.log(f"Error in KB ingestion: {e}")
self.status = f"❌ KB ingestion failed: {e}"
return Data(data={"error": str(e), "kb_name": self.knowledge_base})

File diff suppressed because one or more lines are too long

View file

@ -1,5 +1,4 @@
import json
from pathlib import Path
from unittest.mock import MagicMock, patch
import pandas as pd
@ -87,26 +86,14 @@ class TestKBIngestionComponent(ComponentTestBaseWithoutClient):
invalid_config = [{"column_name": "nonexistent", "vectorize": True, "identifier": False}]
default_kwargs["column_config"] = invalid_config
# Instantiate the component with the modified config
component = component_class(**default_kwargs)
data_df = default_kwargs["input_df"]
# Should raise ValueError since column does not exist in DataFrame
with pytest.raises(ValueError, match="Column 'nonexistent' not found in DataFrame"):
component._validate_column_config(data_df)
def test_validate_column_config_silent_errors(self, component_class, default_kwargs):
"""Test column configuration validation with silent errors enabled."""
# Modify column config to include non-existent column
invalid_config = [{"column_name": "nonexistent", "vectorize": True, "identifier": False}]
default_kwargs["column_config"] = invalid_config
default_kwargs["silent_errors"] = True
component = component_class(**default_kwargs)
data_df = default_kwargs["input_df"]
# Should not raise exception with silent_errors=True
config_list = component._validate_column_config(data_df)
assert isinstance(config_list, list)
def test_get_embedding_provider(self, component_class, default_kwargs):
"""Test embedding provider detection."""
component = component_class(**default_kwargs)
@ -308,22 +295,6 @@ class TestKBIngestionComponent(ComponentTestBaseWithoutClient):
assert "rows" in result.data
assert result.data["rows"] == 2
def test_build_kb_info_with_silent_errors(self, component_class, default_kwargs):
"""Test KB info building with silent errors enabled."""
default_kwargs["silent_errors"] = True
component = component_class(**default_kwargs)
# Remove the metadata file to cause an error
kb_path = Path(default_kwargs["kb_root_path"]) / default_kwargs["knowledge_base"]
metadata_file = kb_path / "embedding_metadata.json"
if metadata_file.exists():
metadata_file.unlink()
# Should not raise exception with silent_errors=True
result = component.build_kb_info()
assert isinstance(result, Data)
assert "error" in result.data
def test_get_knowledge_bases(self, component_class, default_kwargs, tmp_path):
"""Test getting list of knowledge bases."""
component = component_class(**default_kwargs)