feat: Enhance Data Components and Add SelectData Functionality (#3715)

## Changes Overview

This PR introduces several enhancements to our data handling components and removes some legacy code:

1. **CreateData and UpdateData Components**:
   - Added text key name validation
   - Enhanced UpdateData to support updating and appending multiple data points
   - Implemented optional text key validator for dynamically loaded data
   - Updated component names, descriptions, and default values

2. **New SelectData Component**:
   - Implemented SelectDataComponent for data selection from a list
   - Added inputs for data list and index selection
   - Implemented error handling for out-of-range index selection

3. **Code Cleanup**:
   - Removed legacy post_code_processing methods from CreateDataComponent and UpdateDataComponent
   - Removed corresponding test cases

4. **Testing**:
   - Added unit tests for CreateData and UpdateData components
   - Updated test file names for consistency
This commit is contained in:
Edwin Jose 2024-09-22 14:19:51 -04:00 committed by GitHub
commit ab2df9ef3d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 401 additions and 49 deletions

View file

@ -1,7 +1,7 @@
from typing import Any
from langflow.custom import Component
from langflow.inputs.inputs import IntInput, MessageTextInput, DictInput
from langflow.inputs.inputs import IntInput, MessageTextInput, DictInput, BoolInput
from langflow.io import Output
from langflow.field_typing.range_spec import RangeSpec
@ -20,10 +20,21 @@ class CreateDataComponent(Component):
display_name="Number of Fields",
info="Number of fields to be added to the record.",
real_time_refresh=True,
value=0,
value=1,
range_spec=RangeSpec(min=1, max=15, step=1, step_type="int"),
),
MessageTextInput(name="text_key", display_name="Text Key", info="Key to be used as text.", advanced=True),
MessageTextInput(
name="text_key",
display_name="Text Key",
info="Key that identifies the field to be used as the text content.",
advanced=True,
),
BoolInput(
name="text_key_validator",
display_name="Text Key Validator",
advanced=True,
info="If enabled, checks if the given 'Text Key' is present in the given 'Data'.",
),
]
outputs = [
@ -32,7 +43,7 @@ class CreateDataComponent(Component):
def update_build_config(self, build_config: dotdict, field_value: Any, field_name: str | None = None):
if field_name == "number_of_fields":
default_keys = ["code", "_type", "number_of_fields", "text_key"]
default_keys = ["code", "_type", "number_of_fields", "text_key", "text_key_validator"]
try:
field_value_int = int(field_value)
except ValueError:
@ -65,6 +76,15 @@ class CreateDataComponent(Component):
return build_config
async def build_data(self) -> Data:
data = self.get_data()
return_data = Data(data=data, text_key=self.text_key)
self.status = return_data
if self.text_key_validator:
self.validate_text_key()
return return_data
def get_data(self):
"""Function to get the Data from the attributes"""
data = {}
for value_dict in self._attributes.values():
if isinstance(value_dict, dict):
@ -73,17 +93,11 @@ class CreateDataComponent(Component):
key: value.get_text() if isinstance(value, Data) else value for key, value in value_dict.items()
}
data.update(value_dict)
return_data = Data(data=data, text_key=self.text_key)
self.status = return_data
return return_data
return data
def post_code_processing(self, new_frontend_node: dict, current_frontend_node: dict):
"""
This function is called after the code validation is done.
"""
frontend_node = super().post_code_processing(new_frontend_node, current_frontend_node)
frontend_node["template"] = self.update_build_config(
frontend_node["template"], frontend_node["template"]["number_of_fields"]["value"], "number_of_fields"
)
frontend_node = super().post_code_processing(new_frontend_node, current_frontend_node)
return frontend_node
def validate_text_key(self):
"""This function validates that the Text Key is one of the keys in the Data"""
data_keys = self.get_data().keys()
if self.text_key not in data_keys and self.text_key != "":
formatted_data_keys = ", ".join(data_keys)
raise ValueError(f"Text Key: '{self.text_key}' not found in the Data keys: '{formatted_data_keys}'")

View file

@ -0,0 +1,47 @@
from langflow.custom import Component
from langflow.inputs.inputs import DataInput, IntInput
from langflow.io import Output
from langflow.field_typing.range_spec import RangeSpec
from langflow.schema import Data
class SelectDataComponent(Component):
display_name: str = "Select Data"
description: str = "Select a single data from a list of data."
name: str = "SelectData"
icon = "prototypes"
inputs = [
DataInput(
name="data_list",
display_name="Data List",
info="List of data to select from.",
is_list=True, # Specify that this input takes a list of Data objects
),
IntInput(
name="data_index",
display_name="Data Index",
info="Index of the data to select.",
value=0, # Will be populated dynamically based on the length of data_list
range_spec=RangeSpec(min=0, max=15, step=1, step_type="int"),
),
]
outputs = [
Output(display_name="Selected Data", name="selected_data", method="select_data"),
]
async def select_data(self) -> Data:
# Retrieve the selected index from the dropdown
selected_index = int(self.data_index)
# Get the data list
# Validate that the selected index is within bounds
if selected_index < 0 or selected_index >= len(self.data_list):
raise ValueError(f"Selected index {selected_index} is out of range.")
# Return the selected Data object
selected_data = self.data_list[selected_index]
self.status = selected_data # Update the component status to reflect the selected data
return selected_data

View file

@ -1,40 +1,109 @@
from langflow.custom import CustomComponent
from typing import Any
from langflow.custom import Component
from langflow.inputs.inputs import IntInput, MessageTextInput, DictInput, DataInput, BoolInput
from langflow.io import Output
from langflow.field_typing.range_spec import RangeSpec
from langflow.schema import Data
from langflow.schema.dotdict import dotdict
class UpdateDataComponent(CustomComponent):
display_name = "Update Data"
description = "Update Data with text-based key/value pairs, similar to updating a Python dictionary."
name = "UpdateData"
class UpdateDataComponent(Component):
display_name: str = "Update data"
description: str = "Dynamically update or append data with the specified fields."
name: str = "UpdateData"
def build_config(self):
return {
"data": {
"display_name": "Data",
"info": "The record to update.",
},
"new_data": {
"display_name": "New Data",
"info": "The new data to update the record with.",
"input_types": ["Text"],
},
}
inputs = [
DataInput(
name="old_data",
display_name="Data",
info="The record to update.",
is_list=False,
),
IntInput(
name="number_of_fields",
display_name="Number of Fields",
info="Number of fields to be added to the record.",
real_time_refresh=True,
value=0,
range_spec=RangeSpec(min=1, max=15, step=1, step_type="int"),
),
MessageTextInput(
name="text_key",
display_name="Text Key",
info="Key that identifies the field to be used as the text content.",
advanced=True,
),
BoolInput(
name="text_key_validator",
display_name="Text Key Validator",
advanced=True,
info="If enabled, checks if the given 'Text Key' is present in the given 'Data'.",
),
]
def build(
self,
data: Data,
new_data: dict,
) -> Data:
"""
Updates a record with new data.
outputs = [
Output(display_name="Data", name="data", method="build_data"),
]
Args:
record (Data): The record to update.
new_data (dict): The new data to update the record with.
def update_build_config(self, build_config: dotdict, field_value: Any, field_name: str | None = None):
if field_name == "number_of_fields":
default_keys = ["code", "_type", "number_of_fields", "text_key", "old_data", "text_key_validator"]
try:
field_value_int = int(field_value)
except ValueError:
return build_config
existing_fields = {}
if field_value_int > 15:
build_config["number_of_fields"]["value"] = 15
raise ValueError("Number of fields cannot exceed 15. Try using a Component to combine two Data.")
if len(build_config) > len(default_keys):
# back up the existing template fields
for key in build_config.copy():
if key not in default_keys:
existing_fields[key] = build_config.pop(key)
Returns:
Data: The updated record.
"""
data.data.update(new_data)
self.status = data
for i in range(1, field_value_int + 1):
key = f"field_{i}_key"
if key in existing_fields:
field = existing_fields[key]
build_config[key] = field
else:
field = DictInput(
display_name=f"Field {i}",
name=key,
info=f"Key for field {i}.",
input_types=["Text", "Data"],
)
build_config[field.name] = field.to_dict()
build_config["number_of_fields"]["value"] = field_value_int
return build_config
async def build_data(self) -> Data:
new_data = self.get_data()
self.old_data.data.update(new_data)
if self.text_key:
self.old_data.text_key = self.text_key
self.status = self.old_data
self.validate_text_key(self.old_data)
return self.old_data
def get_data(self):
"""Function to get the Data from the attributes"""
data = {}
for value_dict in self._attributes.values():
if isinstance(value_dict, dict):
# Check if the value of the value_dict is a Data
value_dict = {
key: value.get_text() if isinstance(value, Data) else value for key, value in value_dict.items()
}
data.update(value_dict)
return data
def validate_text_key(self, data: Data):
"""This function validates that the Text Key is one of the keys in the Data"""
data_keys = data.data.keys()
if self.text_key not in data_keys and self.text_key != "":
raise ValueError(f"Text Key: {self.text_key} not found in the Data keys: {','.join(data_keys)}")

View file

@ -0,0 +1,117 @@
import pytest
from langflow.components.prototypes.CreateData import CreateDataComponent
from langflow.schema import Data
@pytest.fixture
def create_data_component():
return CreateDataComponent()
def test_update_build_config(create_data_component):
build_config = {
"number_of_fields": {
"type": "int",
"value": 2,
},
"text_key": {
"type": "str",
"value": "",
},
"text_key_validator": {
"type": "bool",
"value": False,
},
}
updated_config = create_data_component.update_build_config(
build_config=build_config, field_value=3, field_name="number_of_fields"
)
assert "field_1_key" in updated_config
assert "field_2_key" in updated_config
assert "field_3_key" in updated_config
assert updated_config["number_of_fields"]["value"] == 3
def test_update_build_config_exceed_limit(create_data_component):
build_config = {
"number_of_fields": {
"type": "int",
"value": 2,
},
"text_key": {
"type": "str",
"value": "",
},
"text_key_validator": {
"type": "bool",
"value": False,
},
}
with pytest.raises(
ValueError, match="Number of fields cannot exceed 15. Try using a Component to combine two Data."
):
create_data_component.update_build_config(build_config, 16, "number_of_fields")
@pytest.mark.asyncio
async def test_build_data(create_data_component):
create_data_component._attributes = {
"field_1_key": {"key1": "value1"},
"field_2_key": {"key2": "value2"},
}
create_data_component.text_key = "key1"
create_data_component.text_key_validator = False
result = await create_data_component.build_data()
assert isinstance(result, Data)
assert result.data == {"key1": "value1", "key2": "value2"}
assert result.text_key == "key1"
def test_get_data(create_data_component):
create_data_component._attributes = {
"field_1_key": {"key1": "value1"},
"field_2_key": {"key2": "value2"},
}
result = create_data_component.get_data()
assert result == {"key1": "value1", "key2": "value2"}
def test_validate_text_key_valid(create_data_component):
# Arrange
create_data_component._attributes = {
"field_1_key": {"key1": "value1"},
"field_2_key": {"key2": "value2"},
}
create_data_component.text_key = "key1"
# Act & Assert
try:
create_data_component.validate_text_key()
except ValueError:
pytest.fail("validate_text_key() raised ValueError unexpectedly!")
# Additional assertions
assert create_data_component.text_key == "key1"
assert "key1" in create_data_component.get_data()
def test_validate_text_key_invalid(create_data_component):
# Arrange
create_data_component._attributes = {
"field_1_key": {"key1": "value1"},
"field_2_key": {"key2": "value2"},
}
create_data_component.text_key = "invalid_key"
# Act & Assert
with pytest.raises(ValueError) as exc_info:
create_data_component.validate_text_key()
# Check for the exact error message
expected_error_message = f"Text Key: '{create_data_component.text_key}' not found in the Data keys: '{', '.join(create_data_component.get_data().keys())}'"
assert str(exc_info.value) == expected_error_message

View file

@ -0,0 +1,105 @@
import pytest
from langflow.components.prototypes.UpdateData import UpdateDataComponent
from langflow.schema import Data
@pytest.fixture
def update_data_component():
return UpdateDataComponent()
def test_update_build_config(update_data_component):
build_config = {
"number_of_fields": {
"type": "int",
"value": 2,
},
"text_key": {
"type": "str",
"value": "",
},
"text_key_validator": {
"type": "bool",
"value": False,
},
}
updated_config = update_data_component.update_build_config(
build_config=build_config, field_value=3, field_name="number_of_fields"
)
assert "field_1_key" in updated_config
assert "field_2_key" in updated_config
assert "field_3_key" in updated_config
assert updated_config["number_of_fields"]["value"] == 3
def test_update_build_config_exceed_limit(update_data_component):
build_config = {
"number_of_fields": {
"type": "int",
"value": 2,
},
"text_key": {
"type": "str",
"value": "",
},
"text_key_validator": {
"type": "bool",
"value": False,
},
}
with pytest.raises(
ValueError, match="Number of fields cannot exceed 15. Try using a Component to combine two Data."
):
update_data_component.update_build_config(build_config, 16, "number_of_fields")
@pytest.mark.asyncio
async def test_build_data(update_data_component):
update_data_component._attributes = {
"field_1_key": {"key1": "new_value1"},
"field_2_key": {"key3": "value3"},
}
update_data_component.text_key = "key1"
update_data_component.text_key_validator = False
update_data_component.old_data = Data(data={"key1": "old_value1", "key2": "value2"}, text_key="key2")
result = await update_data_component.build_data()
assert isinstance(result, Data)
assert result.data == {"key1": "new_value1", "key2": "value2", "key3": "value3"}
assert result.text_key == "key1"
def test_get_data(update_data_component):
update_data_component._attributes = {
"field_1_key": {"key1": "value1"},
"field_2_key": {"key2": "value2"},
}
result = update_data_component.get_data()
assert result == {"key1": "value1", "key2": "value2"}
def test_validate_text_key_valid(update_data_component):
data = Data(data={"key1": "value1", "key2": "value2"}, text_key="key1")
update_data_component.text_key = "key1"
try:
update_data_component.validate_text_key(data)
except ValueError:
pytest.fail("validate_text_key() raised ValueError unexpectedly!")
def test_validate_text_key_invalid(update_data_component):
data = Data(data={"key1": "value1", "key2": "value2"}, text_key="key1")
update_data_component.text_key = "invalid_key"
with pytest.raises(ValueError) as exc_info:
update_data_component.validate_text_key(data)
expected_error_message = (
f"Text Key: {update_data_component.text_key} not found in the Data keys: {','.join(data.data.keys())}"
)
assert str(exc_info.value) == expected_error_message