fix: Use the identifier column as hash if available (#9405)
* fix: Use the identifier column as hash if available * Update kb_ingest.py * [autofix.ci] apply automated fixes * Update src/backend/base/langflow/components/data/kb_ingest.py Co-authored-by: Edwin Jose <edwin.jose@datastax.com> * [autofix.ci] apply automated fixes * [autofix.ci] apply automated fixes --------- Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com> Co-authored-by: Edwin Jose <edwin.jose@datastax.com>
This commit is contained in:
parent
0b78ccd4de
commit
e63e879af6
2 changed files with 13 additions and 7 deletions
|
|
@ -139,8 +139,8 @@ class KBIngestionComponent(Component):
|
|||
{
|
||||
"column_name": "text",
|
||||
"vectorize": True,
|
||||
"identifier": False,
|
||||
}
|
||||
"identifier": True,
|
||||
},
|
||||
],
|
||||
),
|
||||
IntInput(
|
||||
|
|
@ -402,16 +402,22 @@ class KBIngestionComponent(Component):
|
|||
|
||||
# Convert each row to a Data object
|
||||
for _, row in df_source.iterrows():
|
||||
# Build content text from vectorized columns using list comprehension
|
||||
content_parts = [str(row[col]) for col in content_cols if col in row and pd.notna(row[col])]
|
||||
# Build content text from identifier columns using list comprehension
|
||||
identifier_parts = [str(row[col]) for col in content_cols if col in row and pd.notna(row[col])]
|
||||
|
||||
page_content = " ".join(content_parts)
|
||||
# Join all parts into a single string
|
||||
page_content = " ".join(identifier_parts)
|
||||
|
||||
# Build metadata from NON-vectorized columns only (simple key-value pairs)
|
||||
data_dict = {
|
||||
"text": page_content, # Main content for vectorization
|
||||
}
|
||||
|
||||
# Add identifier columns if they exist
|
||||
if identifier_cols:
|
||||
identifier_parts = [str(row[col]) for col in identifier_cols if col in row and pd.notna(row[col])]
|
||||
page_content = " ".join(identifier_parts)
|
||||
|
||||
# Add metadata columns as simple key-value pairs
|
||||
for col in df_source.columns:
|
||||
if col not in content_cols and col in row and pd.notna(row[col]):
|
||||
|
|
|
|||
File diff suppressed because one or more lines are too long
Loading…
Add table
Add a link
Reference in a new issue