diff --git a/src/backend/base/langflow/base/data/base_file.py b/src/backend/base/langflow/base/data/base_file.py index cd7848e67..cce3886ee 100644 --- a/src/backend/base/langflow/base/data/base_file.py +++ b/src/backend/base/langflow/base/data/base_file.py @@ -5,9 +5,12 @@ from pathlib import Path from tempfile import TemporaryDirectory from zipfile import ZipFile, is_zipfile +import pandas as pd + from langflow.custom import Component -from langflow.io import BoolInput, FileInput, HandleInput, Output +from langflow.io import BoolInput, FileInput, HandleInput, Output, StrInput from langflow.schema import Data +from langflow.schema.dataframe import DataFrame from langflow.schema.message import Message @@ -132,6 +135,14 @@ class BaseFileComponent(Component, ABC): is_list=True, advanced=True, ), + StrInput( + name="separator", + display_name="Separator", + value="\n\n", + show=True, + info="Specify the separator to use between multiple outputs in Message format.", + advanced=True, + ), BoolInput( name="silent_errors", display_name="Silent Errors", @@ -161,7 +172,11 @@ class BaseFileComponent(Component, ABC): ), ] - _base_outputs = [Output(display_name="Data", name="data", method="load_files")] + _base_outputs = [ + Output(display_name="Data", name="data", method="load_files"), + Output(display_name="DataFrame", name="dataframe", method="load_dataframe"), + Output(display_name="Message", name="message", method="load_message"), + ] @abstractmethod def process_files(self, file_list: list[BaseFile]) -> list[BaseFile]: @@ -174,7 +189,7 @@ class BaseFileComponent(Component, ABC): list[BaseFile]: A list of BaseFile objects with updated `data`. """ - def load_files(self) -> list[Data]: + def load_files_base(self) -> list[Data]: """Loads and parses file(s), including unpacked file bundles. Returns: @@ -211,6 +226,80 @@ class BaseFileComponent(Component, ABC): else: file.path.unlink() + def load_files(self) -> list[Data]: + """Load files and return as Data objects. + + Returns: + list[Data]: List of Data objects from all files + """ + data_list = self.load_files_base() + if not data_list: + return [Data()] + return data_list + + def load_dataframe(self) -> DataFrame: + """Load files and return as DataFrame. + + Returns: + DataFrame: DataFrame containing all file data + """ + data_list = self.load_files() + if not data_list: + return DataFrame() + + # First handle CSV files specially + csv_data = [] + non_csv_rows = [] + + for data in data_list: + file_path = data.data.get(self.SERVER_FILE_PATH_FIELDNAME) + if file_path and str(file_path).lower().endswith(".csv"): + try: + csv_data.extend(pd.read_csv(file_path).to_dict("records")) + except Exception as e: + self.log(f"Error processing CSV file {file_path}: {e}") + if not self.silent_errors: + raise + else: + # Handle non-CSV files as before + row = dict(data.data) if data.data else {} + if data.text: + row["text"] = data.text + if file_path: + row["file_path"] = file_path + non_csv_rows.append(row) + + # Combine CSV and non-CSV data + all_rows = csv_data + non_csv_rows + return DataFrame(all_rows) + + def load_message(self) -> Message: + """Load files and return as Message with concatenated content. + + Returns: + Message: Message containing concatenated file content + """ + data_list = self.load_files() + if not data_list: + return Message(text="") + + # Concatenate all text content + text_content = [] + for data in data_list: + content = data.text if data.text else "" + text_content.append(content) + + # Join with separator + final_text = self.separator.join(text_content) + + # Create message with all metadata + all_data = {} + for data in data_list: + if data.data: + all_data.update(data.data) + + return Message(text=final_text, data=all_data) + @property def valid_extensions(self) -> list[str]: """Returns valid file extensions for the class. diff --git a/src/backend/base/langflow/initial_setup/starter_projects/Document Q&A.json b/src/backend/base/langflow/initial_setup/starter_projects/Document Q&A.json index db0a59256..e31043445 100644 --- a/src/backend/base/langflow/initial_setup/starter_projects/Document Q&A.json +++ b/src/backend/base/langflow/initial_setup/starter_projects/Document Q&A.json @@ -993,6 +993,34 @@ "Data" ], "value": "__UNDEFINED__" + }, + { + "allows_loop": false, + "cache": true, + "display_name": "DataFrame", + "method": "load_dataframe", + "name": "dataframe", + "required_inputs": [], + "selected": "DataFrame", + "tool_mode": true, + "types": [ + "DataFrame" + ], + "value": "__UNDEFINED__" + }, + { + "allows_loop": false, + "cache": true, + "display_name": "Message", + "method": "load_message", + "name": "message", + "required_inputs": [], + "selected": "Message", + "tool_mode": true, + "types": [ + "Message" + ], + "value": "__UNDEFINED__" } ], "pinned": false, @@ -1142,6 +1170,25 @@ "type": "file", "value": "" }, + "separator": { + "_input_type": "StrInput", + "advanced": true, + "display_name": "Separator", + "dynamic": false, + "info": "Specify the separator to use between multiple outputs in Message format.", + "list": false, + "list_add_label": "Add More", + "load_from_db": false, + "name": "separator", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "str", + "value": "\n\n" + }, "silent_errors": { "_input_type": "BoolInput", "advanced": true, diff --git a/src/backend/base/langflow/initial_setup/starter_projects/Portfolio Website Code Generator.json b/src/backend/base/langflow/initial_setup/starter_projects/Portfolio Website Code Generator.json index ab8f07466..c4c026246 100644 --- a/src/backend/base/langflow/initial_setup/starter_projects/Portfolio Website Code Generator.json +++ b/src/backend/base/langflow/initial_setup/starter_projects/Portfolio Website Code Generator.json @@ -2149,6 +2149,34 @@ "Data" ], "value": "__UNDEFINED__" + }, + { + "allows_loop": false, + "cache": true, + "display_name": "DataFrame", + "method": "load_dataframe", + "name": "dataframe", + "required_inputs": [], + "selected": "DataFrame", + "tool_mode": true, + "types": [ + "DataFrame" + ], + "value": "__UNDEFINED__" + }, + { + "allows_loop": false, + "cache": true, + "display_name": "Message", + "method": "load_message", + "name": "message", + "required_inputs": [], + "selected": "Message", + "tool_mode": true, + "types": [ + "Message" + ], + "value": "__UNDEFINED__" } ], "pinned": false, @@ -2309,6 +2337,25 @@ "type": "file", "value": "" }, + "separator": { + "_input_type": "StrInput", + "advanced": true, + "display_name": "Separator", + "dynamic": false, + "info": "Specify the separator to use between multiple outputs in Message format.", + "list": false, + "list_add_label": "Add More", + "load_from_db": false, + "name": "separator", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "str", + "value": "\n\n" + }, "silent_errors": { "_input_type": "BoolInput", "advanced": true, diff --git a/src/backend/base/langflow/initial_setup/starter_projects/Vector Store RAG.json b/src/backend/base/langflow/initial_setup/starter_projects/Vector Store RAG.json index 5c25f6bae..5fa80f209 100644 --- a/src/backend/base/langflow/initial_setup/starter_projects/Vector Store RAG.json +++ b/src/backend/base/langflow/initial_setup/starter_projects/Vector Store RAG.json @@ -2564,6 +2564,34 @@ "Data" ], "value": "__UNDEFINED__" + }, + { + "allows_loop": false, + "cache": true, + "display_name": "DataFrame", + "method": "load_dataframe", + "name": "dataframe", + "required_inputs": [], + "selected": "DataFrame", + "tool_mode": true, + "types": [ + "DataFrame" + ], + "value": "__UNDEFINED__" + }, + { + "allows_loop": false, + "cache": true, + "display_name": "Message", + "method": "load_message", + "name": "message", + "required_inputs": [], + "selected": "Message", + "tool_mode": true, + "types": [ + "Message" + ], + "value": "__UNDEFINED__" } ], "pinned": false, @@ -2713,6 +2741,25 @@ "type": "file", "value": "" }, + "separator": { + "_input_type": "StrInput", + "advanced": true, + "display_name": "Separator", + "dynamic": false, + "info": "Specify the separator to use between multiple outputs in Message format.", + "list": false, + "list_add_label": "Add More", + "load_from_db": false, + "name": "separator", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "str", + "value": "\n\n" + }, "silent_errors": { "_input_type": "BoolInput", "advanced": true,