feat: adding list of Data output to Parse Data (#4598)

* adding list of Data output to Parse Data

* [autofix.ci] apply automated fixes

* enabling connection from more List outputs

* [autofix.ci] apply automated fixes

* refactor to have single return type

* [autofix.ci] apply automated fixes

* addressing linting comments

* [autofix.ci] apply automated fixes

* apparently lint was missed...

* sigh

---------

Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
Co-authored-by: Eric Hare <ericrhare@gmail.com>
This commit is contained in:
Phil Miesle 2024-11-26 04:40:59 +00:00 committed by GitHub
commit e439d71bfa
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 85 additions and 13 deletions

View file

@ -1,6 +1,7 @@
from langflow.custom import Component
from langflow.helpers.data import data_to_text
from langflow.helpers.data import data_to_text, data_to_text_list
from langflow.io import DataInput, MultilineInput, Output, StrInput
from langflow.schema import Data
from langflow.schema.message import Message
@ -11,7 +12,7 @@ class ParseDataComponent(Component):
name = "ParseData"
inputs = [
DataInput(name="data", display_name="Data", info="The data to convert to text."),
DataInput(name="data", display_name="Data", info="The data to convert to text.", is_list=True),
MultilineInput(
name="template",
display_name="Template",
@ -23,13 +24,36 @@ class ParseDataComponent(Component):
]
outputs = [
Output(display_name="Text", name="text", method="parse_data"),
Output(
display_name="Text",
name="text",
info="Data as a single Message, with each input Data separated by Separator",
method="parse_data",
),
Output(
display_name="Data List",
name="data_list",
info="Data as a list of new Data, each having `text` formatted by Template",
method="parse_data_as_list",
),
]
def parse_data(self) -> Message:
def _clean_args(self) -> tuple[list[Data], str, str]:
data = self.data if isinstance(self.data, list) else [self.data]
template = self.template
sep = self.sep
return data, template, sep
result_string = data_to_text(template, data, sep=self.sep)
def parse_data(self) -> Message:
data, template, sep = self._clean_args()
result_string = data_to_text(template, data, sep)
self.status = result_string
return Message(text=result_string)
def parse_data_as_list(self) -> list[Data]:
data, template, _ = self._clean_args()
text_list, data_list = data_to_text_list(template, data)
for item, text in zip(data_list, text_list, strict=True):
item.set_text(text)
self.status = data_list
return data_list

View file

@ -16,17 +16,31 @@ def docs_to_data(documents: list[Document]) -> list[Data]:
return [Data.from_document(document) for document in documents]
def data_to_text(template: str, data: Data | list[Data], sep: str = "\n") -> str:
"""Converts a list of Data to a list of texts.
def data_to_text_list(template: str, data: Data | list[Data]) -> tuple[list[str], list[Data]]:
r"""Formats `text` within Data objects based on a given template.
Converts a Data object or a list of Data objects into a tuple containing a list of formatted strings
and a list of Data objects based on a given template.
Args:
template (str): The template to use for the conversion.
data (list[Data]): The list of Data to convert.
sep (str): The separator used to join the data.
template (str): The format string template to be used for formatting the data.
data (Data | list[Data]): A single Data object or a list of Data objects to be formatted.
Returns:
list[str]: The converted list of texts.
tuple[list[str], list[Data]]: A tuple containing a list of formatted strings based on the
provided template and data, and a list of Data objects.
"""
if data is None:
return [], []
if template is None:
msg = "Template must be a string, but got None."
raise ValueError(msg)
if not isinstance(template, str):
msg = f"Template must be a string, but got {type(template)}"
raise TypeError(msg)
if isinstance(data, (Data)):
data = [data]
# Check if there are any format strings in the template
@ -36,8 +50,24 @@ def data_to_text(template: str, data: Data | list[Data], sep: str = "\n") -> str
for value in data
]
formated_data = [template.format(data=value.data, **value.data) for value in _data]
return sep.join(formated_data)
formatted_text = [template.format(data=value.data, **value.data) for value in _data]
return formatted_text, _data
def data_to_text(template: str, data: Data | list[Data], sep: str = "\n") -> str:
r"""Converts data into a formatted text string based on a given template.
Args:
template (str): The template string used to format each data item.
data (Data | list[Data]): A single data item or a list of data items to be formatted.
sep (str, optional): The separator to use between formatted data items. Defaults to "\n".
Returns:
str: A string containing the formatted data items separated by the specified separator.
"""
formatted_text, _ = data_to_text_list(template, data)
sep = "\n" if sep is None else sep
return sep.join(formatted_text)
def messages_to_text(template: str, messages: Message | list[Message]) -> str:

View file

@ -54,6 +54,24 @@ class Data(BaseModel):
"""
return self.data.get(self.text_key, self.default_value)
def set_text(self, text: str | None) -> str:
r"""Sets the text value in the data dictionary.
The object's `text` value is set to `text parameter as given, with the following modifications:
- `text` value of `None` is converted to an empty string.
- `text` value is converted to `str` type.
Args:
text (str): The text to be set in the data dictionary.
Returns:
str: The text value that was set in the data dictionary.
"""
new_text = "" if text is None else str(text)
self.data[self.text_key] = new_text
return new_text
@classmethod
def from_document(cls, document: Document) -> "Data":
"""Converts a Document to a Data.