feat: astra db chunks deletion based on metadata field (#5537)
* feat: Add deletion_field parameter to AstraDBVectorStoreComponent for document management - Introduced a new 'deletion_field' input to specify a metadata field for deleting documents before loading new data. - Enhanced the _add_documents_to_vector_store method to handle document deletion based on the specified field, improving data management capabilities. * Merging with main * [autofix.ci] apply automated fixes * [autofix.ci] apply automated fixes * - Enhanced the info string for the 'deletion_field' parameter to improve readability. - Optimized the deletion logic by using a set comprehension to eliminate duplicates when gathering delete values from documents. * [autofix.ci] apply automated fixes * Update src/backend/base/langflow/components/vectorstores/astradb.py Co-authored-by: Madhavan <msmygit@users.noreply.github.com> * [autofix.ci] apply automated fixes --------- Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com> Co-authored-by: Eric Hare <ericrhare@gmail.com> Co-authored-by: Madhavan <msmygit@users.noreply.github.com>
This commit is contained in:
parent
c5528c6f93
commit
3df81309bf
2 changed files with 60 additions and 3 deletions
|
|
@ -140,6 +140,14 @@ class AstraDBVectorStoreComponent(LCVectorStoreComponent):
|
|||
info="Field to use as the text content field for the vector store.",
|
||||
advanced=True,
|
||||
),
|
||||
StrInput(
|
||||
name="deletion_field",
|
||||
display_name="Deletion Based On Field",
|
||||
info="When this parameter is provided, documents in the target collection with "
|
||||
"metadata field values matching the input metadata field value will be deleted "
|
||||
"before new data is loaded.",
|
||||
advanced=True,
|
||||
),
|
||||
BoolInput(
|
||||
name="ignore_invalid_documents",
|
||||
display_name="Ignore Invalid Documents",
|
||||
|
|
@ -569,7 +577,8 @@ class AstraDBVectorStoreComponent(LCVectorStoreComponent):
|
|||
|
||||
# Bundle up the auto-detect parameters
|
||||
autodetect_params = {
|
||||
"autodetect_collection": not is_new_collection, # TODO: May want to expose this option
|
||||
# TODO: May want to expose this option
|
||||
"autodetect_collection": not is_new_collection,
|
||||
"content_field": self.content_field or None,
|
||||
"ignore_invalid_documents": self.ignore_invalid_documents,
|
||||
}
|
||||
|
|
@ -607,6 +616,18 @@ class AstraDBVectorStoreComponent(LCVectorStoreComponent):
|
|||
msg = "Vector Store Inputs must be Data objects."
|
||||
raise TypeError(msg)
|
||||
|
||||
if documents and self.deletion_field:
|
||||
self.log(f"Deleting documents where {self.deletion_field}")
|
||||
try:
|
||||
database = self.get_database()
|
||||
collection = database.get_collection(self.get_collection_choice(), keyspace=self.keyspace or None)
|
||||
delete_values = list({doc.metadata[self.deletion_field] for doc in documents})
|
||||
self.log(f"Deleting documents where {self.deletion_field} matches {delete_values}.")
|
||||
collection.delete_many({f"metadata.{self.deletion_field}": {"$in": delete_values}})
|
||||
except Exception as e:
|
||||
msg = f"Error deleting documents from AstraDBVectorStore based on '{self.deletion_field}': {e}"
|
||||
raise ValueError(msg) from e
|
||||
|
||||
if documents:
|
||||
self.log(f"Adding {len(documents)} documents to the Vector Store.")
|
||||
try:
|
||||
|
|
|
|||
File diff suppressed because one or more lines are too long
Loading…
Add table
Add a link
Reference in a new issue