From e439d71bfa9a9884029a1c56e6bd9bdda9839788 Mon Sep 17 00:00:00 2001 From: Phil Miesle Date: Tue, 26 Nov 2024 04:40:59 +0000 Subject: [PATCH] feat: adding list of Data output to Parse Data (#4598) * adding list of Data output to Parse Data * [autofix.ci] apply automated fixes * enabling connection from more List outputs * [autofix.ci] apply automated fixes * refactor to have single return type * [autofix.ci] apply automated fixes * addressing linting comments * [autofix.ci] apply automated fixes * apparently lint was missed... * sigh --------- Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com> Co-authored-by: Eric Hare --- .../components/processing/parse_data.py | 34 ++++++++++++-- src/backend/base/langflow/helpers/data.py | 46 +++++++++++++++---- src/backend/base/langflow/schema/data.py | 18 ++++++++ 3 files changed, 85 insertions(+), 13 deletions(-) diff --git a/src/backend/base/langflow/components/processing/parse_data.py b/src/backend/base/langflow/components/processing/parse_data.py index 8e1156bcc..95bd9075e 100644 --- a/src/backend/base/langflow/components/processing/parse_data.py +++ b/src/backend/base/langflow/components/processing/parse_data.py @@ -1,6 +1,7 @@ from langflow.custom import Component -from langflow.helpers.data import data_to_text +from langflow.helpers.data import data_to_text, data_to_text_list from langflow.io import DataInput, MultilineInput, Output, StrInput +from langflow.schema import Data from langflow.schema.message import Message @@ -11,7 +12,7 @@ class ParseDataComponent(Component): name = "ParseData" inputs = [ - DataInput(name="data", display_name="Data", info="The data to convert to text."), + DataInput(name="data", display_name="Data", info="The data to convert to text.", is_list=True), MultilineInput( name="template", display_name="Template", @@ -23,13 +24,36 @@ class ParseDataComponent(Component): ] outputs = [ - Output(display_name="Text", name="text", method="parse_data"), + Output( + display_name="Text", + name="text", + info="Data as a single Message, with each input Data separated by Separator", + method="parse_data", + ), + Output( + display_name="Data List", + name="data_list", + info="Data as a list of new Data, each having `text` formatted by Template", + method="parse_data_as_list", + ), ] - def parse_data(self) -> Message: + def _clean_args(self) -> tuple[list[Data], str, str]: data = self.data if isinstance(self.data, list) else [self.data] template = self.template + sep = self.sep + return data, template, sep - result_string = data_to_text(template, data, sep=self.sep) + def parse_data(self) -> Message: + data, template, sep = self._clean_args() + result_string = data_to_text(template, data, sep) self.status = result_string return Message(text=result_string) + + def parse_data_as_list(self) -> list[Data]: + data, template, _ = self._clean_args() + text_list, data_list = data_to_text_list(template, data) + for item, text in zip(data_list, text_list, strict=True): + item.set_text(text) + self.status = data_list + return data_list diff --git a/src/backend/base/langflow/helpers/data.py b/src/backend/base/langflow/helpers/data.py index d2cb88ba7..dd840867c 100644 --- a/src/backend/base/langflow/helpers/data.py +++ b/src/backend/base/langflow/helpers/data.py @@ -16,17 +16,31 @@ def docs_to_data(documents: list[Document]) -> list[Data]: return [Data.from_document(document) for document in documents] -def data_to_text(template: str, data: Data | list[Data], sep: str = "\n") -> str: - """Converts a list of Data to a list of texts. +def data_to_text_list(template: str, data: Data | list[Data]) -> tuple[list[str], list[Data]]: + r"""Formats `text` within Data objects based on a given template. + + Converts a Data object or a list of Data objects into a tuple containing a list of formatted strings + and a list of Data objects based on a given template. Args: - template (str): The template to use for the conversion. - data (list[Data]): The list of Data to convert. - sep (str): The separator used to join the data. + template (str): The format string template to be used for formatting the data. + data (Data | list[Data]): A single Data object or a list of Data objects to be formatted. Returns: - list[str]: The converted list of texts. + tuple[list[str], list[Data]]: A tuple containing a list of formatted strings based on the + provided template and data, and a list of Data objects. """ + if data is None: + return [], [] + + if template is None: + msg = "Template must be a string, but got None." + raise ValueError(msg) + + if not isinstance(template, str): + msg = f"Template must be a string, but got {type(template)}" + raise TypeError(msg) + if isinstance(data, (Data)): data = [data] # Check if there are any format strings in the template @@ -36,8 +50,24 @@ def data_to_text(template: str, data: Data | list[Data], sep: str = "\n") -> str for value in data ] - formated_data = [template.format(data=value.data, **value.data) for value in _data] - return sep.join(formated_data) + formatted_text = [template.format(data=value.data, **value.data) for value in _data] + return formatted_text, _data + + +def data_to_text(template: str, data: Data | list[Data], sep: str = "\n") -> str: + r"""Converts data into a formatted text string based on a given template. + + Args: + template (str): The template string used to format each data item. + data (Data | list[Data]): A single data item or a list of data items to be formatted. + sep (str, optional): The separator to use between formatted data items. Defaults to "\n". + + Returns: + str: A string containing the formatted data items separated by the specified separator. + """ + formatted_text, _ = data_to_text_list(template, data) + sep = "\n" if sep is None else sep + return sep.join(formatted_text) def messages_to_text(template: str, messages: Message | list[Message]) -> str: diff --git a/src/backend/base/langflow/schema/data.py b/src/backend/base/langflow/schema/data.py index 05367b2e5..e8a52a565 100644 --- a/src/backend/base/langflow/schema/data.py +++ b/src/backend/base/langflow/schema/data.py @@ -54,6 +54,24 @@ class Data(BaseModel): """ return self.data.get(self.text_key, self.default_value) + def set_text(self, text: str | None) -> str: + r"""Sets the text value in the data dictionary. + + The object's `text` value is set to `text parameter as given, with the following modifications: + + - `text` value of `None` is converted to an empty string. + - `text` value is converted to `str` type. + + Args: + text (str): The text to be set in the data dictionary. + + Returns: + str: The text value that was set in the data dictionary. + """ + new_text = "" if text is None else str(text) + self.data[self.text_key] = new_text + return new_text + @classmethod def from_document(cls, document: Document) -> "Data": """Converts a Document to a Data.