From 8e55a0e5f440dc7048a94e71cc2f2cb98a090004 Mon Sep 17 00:00:00 2001 From: Phil Miesle Date: Tue, 26 Nov 2024 04:48:45 +0000 Subject: [PATCH] feat: new AlterMetadataComponent enhances Data and Message objects with dict or Data object entries (#4634) * creating AddMetadataComponent * [autofix.ci] apply automated fixes * fixing lint * allow removal of metadata fields * [autofix.ci] apply automated fixes * Fix the name of alter metadata --------- Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com> Co-authored-by: Eric Hare --- .../components/processing/__init__.py | 2 + .../components/processing/alter_metadata.py | 90 +++++++++++++++++++ 2 files changed, 92 insertions(+) create mode 100644 src/backend/base/langflow/components/processing/alter_metadata.py diff --git a/src/backend/base/langflow/components/processing/__init__.py b/src/backend/base/langflow/components/processing/__init__.py index 6ce274f75..7eb82f5ba 100644 --- a/src/backend/base/langflow/components/processing/__init__.py +++ b/src/backend/base/langflow/components/processing/__init__.py @@ -1,3 +1,4 @@ +from .alter_metadata import AlterMetadataComponent from .combine_text import CombineTextComponent from .create_data import CreateDataComponent from .extract_key import ExtractDataKeyComponent @@ -12,6 +13,7 @@ from .split_text import SplitTextComponent from .update_data import UpdateDataComponent __all__ = [ + "AlterMetadataComponent", "CombineTextComponent", "CreateDataComponent", "DataFilterComponent", diff --git a/src/backend/base/langflow/components/processing/alter_metadata.py b/src/backend/base/langflow/components/processing/alter_metadata.py new file mode 100644 index 000000000..118f410a2 --- /dev/null +++ b/src/backend/base/langflow/components/processing/alter_metadata.py @@ -0,0 +1,90 @@ +from langflow.custom import Component +from langflow.inputs import MessageTextInput +from langflow.io import HandleInput, NestedDictInput, Output, StrInput +from langflow.schema import Data + + +class AlterMetadataComponent(Component): + display_name = "Alter Metadata" + description = "Adds/Removes Metadata Dictionary on inputs" + icon = "merge" + name = "AlterMetadata" + + inputs = [ + HandleInput( + name="input_value", + display_name="Input", + info="Object(s) to which Metadata should be added", + required=False, + input_types=["Message", "Data"], + is_list=True, + ), + StrInput( + name="text_in", + display_name="User Text", + info="Text input; value will be in 'text' attribute of Data object. Empty text entries are ignored.", + required=False, + ), + NestedDictInput( + name="metadata", + display_name="Metadata", + info="Metadata to add to each object", + input_types=["Data"], + required=True, + ), + MessageTextInput( + name="remove_fields", + display_name="Fields to Remove", + info="Metadata Fields to Remove", + required=False, + is_list=True, + ), + ] + + outputs = [ + Output( + name="data", + display_name="Data", + info="List of Input objects each with added Metadata", + method="process_output", + ), + ] + + def _as_clean_dict(self, obj): + """Convert a Data object or a standard dictionary to a standard dictionary.""" + if isinstance(obj, dict): + as_dict = obj + elif isinstance(obj, Data): + as_dict = obj.data + else: + msg = f"Expected a Data object or a dictionary but got {type(obj)}." + raise TypeError(msg) + + return {k: v for k, v in (as_dict or {}).items() if k and k.strip()} + + def process_output(self) -> list[Data]: + # Ensure metadata is a dictionary, filtering out any empty keys + metadata = self._as_clean_dict(self.metadata) + + # Convert text_in to a Data object if it exists, and initialize our list of Data objects + data_objects = [Data(text=self.text_in)] if self.text_in else [] + + # Append existing Data objects from input_value, if any + if self.input_value: + data_objects.extend(self.input_value) + + # Update each Data object with the new metadata, preserving existing fields + for data in data_objects: + data.data.update(metadata) + + # Handle removal of fields specified in remove_fields + if self.remove_fields: + fields_to_remove = {field.strip() for field in self.remove_fields if field.strip()} + + # Remove specified fields from each Data object's metadata + for data in data_objects: + data.data = {k: v for k, v in data.data.items() if k not in fields_to_remove} + + # Set the status for tracking/debugging purposes + self.status = data_objects + return data_objects