From 37bc0467b6c1333328af7214c861d8084aa9ff8d Mon Sep 17 00:00:00 2001 From: Eric Hare Date: Fri, 14 Mar 2025 08:56:33 -0700 Subject: [PATCH] feat: Dynamic outputs for file component (#7005) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat: Dynamic outputs for file component * [autofix.ci] apply automated fixes * Update base_file.py * [autofix.ci] apply automated fixes * [autofix.ci] apply automated fixes (attempt 2/3) * Update base_file.py * [autofix.ci] apply automated fixes * feat: enhance BaseFileComponent with new data loading methods and separator input - Added `separator` input to specify the separator for concatenated outputs in Message format. - Refactored output methods: renamed and updated `load_csv`, `load_json`, and `load_others` to `load_data`, `load_dataframe`, and `load_message` respectively. - Updated JSON configuration files to reflect changes in output types and methods for starter projects. * Fix file loader --------- Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com> Co-authored-by: Ítalo Johnny Co-authored-by: Rodrigo --- .../base/langflow/base/data/base_file.py | 95 ++++++++++++++++++- .../starter_projects/Document Q&A.json | 47 +++++++++ .../Portfolio Website Code Generator.json | 47 +++++++++ .../starter_projects/Vector Store RAG.json | 47 +++++++++ 4 files changed, 233 insertions(+), 3 deletions(-) diff --git a/src/backend/base/langflow/base/data/base_file.py b/src/backend/base/langflow/base/data/base_file.py index cd7848e67..cce3886ee 100644 --- a/src/backend/base/langflow/base/data/base_file.py +++ b/src/backend/base/langflow/base/data/base_file.py @@ -5,9 +5,12 @@ from pathlib import Path from tempfile import TemporaryDirectory from zipfile import ZipFile, is_zipfile +import pandas as pd + from langflow.custom import Component -from langflow.io import BoolInput, FileInput, HandleInput, Output +from langflow.io import BoolInput, FileInput, HandleInput, Output, StrInput from langflow.schema import Data +from langflow.schema.dataframe import DataFrame from langflow.schema.message import Message @@ -132,6 +135,14 @@ class BaseFileComponent(Component, ABC): is_list=True, advanced=True, ), + StrInput( + name="separator", + display_name="Separator", + value="\n\n", + show=True, + info="Specify the separator to use between multiple outputs in Message format.", + advanced=True, + ), BoolInput( name="silent_errors", display_name="Silent Errors", @@ -161,7 +172,11 @@ class BaseFileComponent(Component, ABC): ), ] - _base_outputs = [Output(display_name="Data", name="data", method="load_files")] + _base_outputs = [ + Output(display_name="Data", name="data", method="load_files"), + Output(display_name="DataFrame", name="dataframe", method="load_dataframe"), + Output(display_name="Message", name="message", method="load_message"), + ] @abstractmethod def process_files(self, file_list: list[BaseFile]) -> list[BaseFile]: @@ -174,7 +189,7 @@ class BaseFileComponent(Component, ABC): list[BaseFile]: A list of BaseFile objects with updated `data`. """ - def load_files(self) -> list[Data]: + def load_files_base(self) -> list[Data]: """Loads and parses file(s), including unpacked file bundles. Returns: @@ -211,6 +226,80 @@ class BaseFileComponent(Component, ABC): else: file.path.unlink() + def load_files(self) -> list[Data]: + """Load files and return as Data objects. + + Returns: + list[Data]: List of Data objects from all files + """ + data_list = self.load_files_base() + if not data_list: + return [Data()] + return data_list + + def load_dataframe(self) -> DataFrame: + """Load files and return as DataFrame. + + Returns: + DataFrame: DataFrame containing all file data + """ + data_list = self.load_files() + if not data_list: + return DataFrame() + + # First handle CSV files specially + csv_data = [] + non_csv_rows = [] + + for data in data_list: + file_path = data.data.get(self.SERVER_FILE_PATH_FIELDNAME) + if file_path and str(file_path).lower().endswith(".csv"): + try: + csv_data.extend(pd.read_csv(file_path).to_dict("records")) + except Exception as e: + self.log(f"Error processing CSV file {file_path}: {e}") + if not self.silent_errors: + raise + else: + # Handle non-CSV files as before + row = dict(data.data) if data.data else {} + if data.text: + row["text"] = data.text + if file_path: + row["file_path"] = file_path + non_csv_rows.append(row) + + # Combine CSV and non-CSV data + all_rows = csv_data + non_csv_rows + return DataFrame(all_rows) + + def load_message(self) -> Message: + """Load files and return as Message with concatenated content. + + Returns: + Message: Message containing concatenated file content + """ + data_list = self.load_files() + if not data_list: + return Message(text="") + + # Concatenate all text content + text_content = [] + for data in data_list: + content = data.text if data.text else "" + text_content.append(content) + + # Join with separator + final_text = self.separator.join(text_content) + + # Create message with all metadata + all_data = {} + for data in data_list: + if data.data: + all_data.update(data.data) + + return Message(text=final_text, data=all_data) + @property def valid_extensions(self) -> list[str]: """Returns valid file extensions for the class. diff --git a/src/backend/base/langflow/initial_setup/starter_projects/Document Q&A.json b/src/backend/base/langflow/initial_setup/starter_projects/Document Q&A.json index db0a59256..e31043445 100644 --- a/src/backend/base/langflow/initial_setup/starter_projects/Document Q&A.json +++ b/src/backend/base/langflow/initial_setup/starter_projects/Document Q&A.json @@ -993,6 +993,34 @@ "Data" ], "value": "__UNDEFINED__" + }, + { + "allows_loop": false, + "cache": true, + "display_name": "DataFrame", + "method": "load_dataframe", + "name": "dataframe", + "required_inputs": [], + "selected": "DataFrame", + "tool_mode": true, + "types": [ + "DataFrame" + ], + "value": "__UNDEFINED__" + }, + { + "allows_loop": false, + "cache": true, + "display_name": "Message", + "method": "load_message", + "name": "message", + "required_inputs": [], + "selected": "Message", + "tool_mode": true, + "types": [ + "Message" + ], + "value": "__UNDEFINED__" } ], "pinned": false, @@ -1142,6 +1170,25 @@ "type": "file", "value": "" }, + "separator": { + "_input_type": "StrInput", + "advanced": true, + "display_name": "Separator", + "dynamic": false, + "info": "Specify the separator to use between multiple outputs in Message format.", + "list": false, + "list_add_label": "Add More", + "load_from_db": false, + "name": "separator", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "str", + "value": "\n\n" + }, "silent_errors": { "_input_type": "BoolInput", "advanced": true, diff --git a/src/backend/base/langflow/initial_setup/starter_projects/Portfolio Website Code Generator.json b/src/backend/base/langflow/initial_setup/starter_projects/Portfolio Website Code Generator.json index ab8f07466..c4c026246 100644 --- a/src/backend/base/langflow/initial_setup/starter_projects/Portfolio Website Code Generator.json +++ b/src/backend/base/langflow/initial_setup/starter_projects/Portfolio Website Code Generator.json @@ -2149,6 +2149,34 @@ "Data" ], "value": "__UNDEFINED__" + }, + { + "allows_loop": false, + "cache": true, + "display_name": "DataFrame", + "method": "load_dataframe", + "name": "dataframe", + "required_inputs": [], + "selected": "DataFrame", + "tool_mode": true, + "types": [ + "DataFrame" + ], + "value": "__UNDEFINED__" + }, + { + "allows_loop": false, + "cache": true, + "display_name": "Message", + "method": "load_message", + "name": "message", + "required_inputs": [], + "selected": "Message", + "tool_mode": true, + "types": [ + "Message" + ], + "value": "__UNDEFINED__" } ], "pinned": false, @@ -2309,6 +2337,25 @@ "type": "file", "value": "" }, + "separator": { + "_input_type": "StrInput", + "advanced": true, + "display_name": "Separator", + "dynamic": false, + "info": "Specify the separator to use between multiple outputs in Message format.", + "list": false, + "list_add_label": "Add More", + "load_from_db": false, + "name": "separator", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "str", + "value": "\n\n" + }, "silent_errors": { "_input_type": "BoolInput", "advanced": true, diff --git a/src/backend/base/langflow/initial_setup/starter_projects/Vector Store RAG.json b/src/backend/base/langflow/initial_setup/starter_projects/Vector Store RAG.json index 5c25f6bae..5fa80f209 100644 --- a/src/backend/base/langflow/initial_setup/starter_projects/Vector Store RAG.json +++ b/src/backend/base/langflow/initial_setup/starter_projects/Vector Store RAG.json @@ -2564,6 +2564,34 @@ "Data" ], "value": "__UNDEFINED__" + }, + { + "allows_loop": false, + "cache": true, + "display_name": "DataFrame", + "method": "load_dataframe", + "name": "dataframe", + "required_inputs": [], + "selected": "DataFrame", + "tool_mode": true, + "types": [ + "DataFrame" + ], + "value": "__UNDEFINED__" + }, + { + "allows_loop": false, + "cache": true, + "display_name": "Message", + "method": "load_message", + "name": "message", + "required_inputs": [], + "selected": "Message", + "tool_mode": true, + "types": [ + "Message" + ], + "value": "__UNDEFINED__" } ], "pinned": false, @@ -2713,6 +2741,25 @@ "type": "file", "value": "" }, + "separator": { + "_input_type": "StrInput", + "advanced": true, + "display_name": "Separator", + "dynamic": false, + "info": "Specify the separator to use between multiple outputs in Message format.", + "list": false, + "list_add_label": "Add More", + "load_from_db": false, + "name": "separator", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "str", + "value": "\n\n" + }, "silent_errors": { "_input_type": "BoolInput", "advanced": true,