feat: Dynamic outputs for file component (#7005)

* feat: Dynamic outputs for file component

* [autofix.ci] apply automated fixes

* Update base_file.py

* [autofix.ci] apply automated fixes

* [autofix.ci] apply automated fixes (attempt 2/3)

* Update base_file.py

* [autofix.ci] apply automated fixes

* feat: enhance BaseFileComponent with new data loading methods and separator input

- Added `separator` input to specify the separator for concatenated outputs in Message format.
- Refactored output methods: renamed and updated `load_csv`, `load_json`, and `load_others` to `load_data`, `load_dataframe`, and `load_message` respectively.
- Updated JSON configuration files to reflect changes in output types and methods for starter projects.

* Fix file loader

---------

Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
Co-authored-by: Ítalo Johnny <italojohnnydosanjos@gmail.com>
Co-authored-by: Rodrigo <rodrigosilvanader@gmail.com>
This commit is contained in:
Eric Hare 2025-03-14 08:56:33 -07:00 committed by GitHub
commit 37bc0467b6
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 233 additions and 3 deletions

View file

@ -5,9 +5,12 @@ from pathlib import Path
from tempfile import TemporaryDirectory
from zipfile import ZipFile, is_zipfile
import pandas as pd
from langflow.custom import Component
from langflow.io import BoolInput, FileInput, HandleInput, Output
from langflow.io import BoolInput, FileInput, HandleInput, Output, StrInput
from langflow.schema import Data
from langflow.schema.dataframe import DataFrame
from langflow.schema.message import Message
@ -132,6 +135,14 @@ class BaseFileComponent(Component, ABC):
is_list=True,
advanced=True,
),
StrInput(
name="separator",
display_name="Separator",
value="\n\n",
show=True,
info="Specify the separator to use between multiple outputs in Message format.",
advanced=True,
),
BoolInput(
name="silent_errors",
display_name="Silent Errors",
@ -161,7 +172,11 @@ class BaseFileComponent(Component, ABC):
),
]
_base_outputs = [Output(display_name="Data", name="data", method="load_files")]
_base_outputs = [
Output(display_name="Data", name="data", method="load_files"),
Output(display_name="DataFrame", name="dataframe", method="load_dataframe"),
Output(display_name="Message", name="message", method="load_message"),
]
@abstractmethod
def process_files(self, file_list: list[BaseFile]) -> list[BaseFile]:
@ -174,7 +189,7 @@ class BaseFileComponent(Component, ABC):
list[BaseFile]: A list of BaseFile objects with updated `data`.
"""
def load_files(self) -> list[Data]:
def load_files_base(self) -> list[Data]:
"""Loads and parses file(s), including unpacked file bundles.
Returns:
@ -211,6 +226,80 @@ class BaseFileComponent(Component, ABC):
else:
file.path.unlink()
def load_files(self) -> list[Data]:
"""Load files and return as Data objects.
Returns:
list[Data]: List of Data objects from all files
"""
data_list = self.load_files_base()
if not data_list:
return [Data()]
return data_list
def load_dataframe(self) -> DataFrame:
"""Load files and return as DataFrame.
Returns:
DataFrame: DataFrame containing all file data
"""
data_list = self.load_files()
if not data_list:
return DataFrame()
# First handle CSV files specially
csv_data = []
non_csv_rows = []
for data in data_list:
file_path = data.data.get(self.SERVER_FILE_PATH_FIELDNAME)
if file_path and str(file_path).lower().endswith(".csv"):
try:
csv_data.extend(pd.read_csv(file_path).to_dict("records"))
except Exception as e:
self.log(f"Error processing CSV file {file_path}: {e}")
if not self.silent_errors:
raise
else:
# Handle non-CSV files as before
row = dict(data.data) if data.data else {}
if data.text:
row["text"] = data.text
if file_path:
row["file_path"] = file_path
non_csv_rows.append(row)
# Combine CSV and non-CSV data
all_rows = csv_data + non_csv_rows
return DataFrame(all_rows)
def load_message(self) -> Message:
"""Load files and return as Message with concatenated content.
Returns:
Message: Message containing concatenated file content
"""
data_list = self.load_files()
if not data_list:
return Message(text="")
# Concatenate all text content
text_content = []
for data in data_list:
content = data.text if data.text else ""
text_content.append(content)
# Join with separator
final_text = self.separator.join(text_content)
# Create message with all metadata
all_data = {}
for data in data_list:
if data.data:
all_data.update(data.data)
return Message(text=final_text, data=all_data)
@property
def valid_extensions(self) -> list[str]:
"""Returns valid file extensions for the class.

View file

@ -993,6 +993,34 @@
"Data"
],
"value": "__UNDEFINED__"
},
{
"allows_loop": false,
"cache": true,
"display_name": "DataFrame",
"method": "load_dataframe",
"name": "dataframe",
"required_inputs": [],
"selected": "DataFrame",
"tool_mode": true,
"types": [
"DataFrame"
],
"value": "__UNDEFINED__"
},
{
"allows_loop": false,
"cache": true,
"display_name": "Message",
"method": "load_message",
"name": "message",
"required_inputs": [],
"selected": "Message",
"tool_mode": true,
"types": [
"Message"
],
"value": "__UNDEFINED__"
}
],
"pinned": false,
@ -1142,6 +1170,25 @@
"type": "file",
"value": ""
},
"separator": {
"_input_type": "StrInput",
"advanced": true,
"display_name": "Separator",
"dynamic": false,
"info": "Specify the separator to use between multiple outputs in Message format.",
"list": false,
"list_add_label": "Add More",
"load_from_db": false,
"name": "separator",
"placeholder": "",
"required": false,
"show": true,
"title_case": false,
"tool_mode": false,
"trace_as_metadata": true,
"type": "str",
"value": "\n\n"
},
"silent_errors": {
"_input_type": "BoolInput",
"advanced": true,

View file

@ -2149,6 +2149,34 @@
"Data"
],
"value": "__UNDEFINED__"
},
{
"allows_loop": false,
"cache": true,
"display_name": "DataFrame",
"method": "load_dataframe",
"name": "dataframe",
"required_inputs": [],
"selected": "DataFrame",
"tool_mode": true,
"types": [
"DataFrame"
],
"value": "__UNDEFINED__"
},
{
"allows_loop": false,
"cache": true,
"display_name": "Message",
"method": "load_message",
"name": "message",
"required_inputs": [],
"selected": "Message",
"tool_mode": true,
"types": [
"Message"
],
"value": "__UNDEFINED__"
}
],
"pinned": false,
@ -2309,6 +2337,25 @@
"type": "file",
"value": ""
},
"separator": {
"_input_type": "StrInput",
"advanced": true,
"display_name": "Separator",
"dynamic": false,
"info": "Specify the separator to use between multiple outputs in Message format.",
"list": false,
"list_add_label": "Add More",
"load_from_db": false,
"name": "separator",
"placeholder": "",
"required": false,
"show": true,
"title_case": false,
"tool_mode": false,
"trace_as_metadata": true,
"type": "str",
"value": "\n\n"
},
"silent_errors": {
"_input_type": "BoolInput",
"advanced": true,

View file

@ -2564,6 +2564,34 @@
"Data"
],
"value": "__UNDEFINED__"
},
{
"allows_loop": false,
"cache": true,
"display_name": "DataFrame",
"method": "load_dataframe",
"name": "dataframe",
"required_inputs": [],
"selected": "DataFrame",
"tool_mode": true,
"types": [
"DataFrame"
],
"value": "__UNDEFINED__"
},
{
"allows_loop": false,
"cache": true,
"display_name": "Message",
"method": "load_message",
"name": "message",
"required_inputs": [],
"selected": "Message",
"tool_mode": true,
"types": [
"Message"
],
"value": "__UNDEFINED__"
}
],
"pinned": false,
@ -2713,6 +2741,25 @@
"type": "file",
"value": ""
},
"separator": {
"_input_type": "StrInput",
"advanced": true,
"display_name": "Separator",
"dynamic": false,
"info": "Specify the separator to use between multiple outputs in Message format.",
"list": false,
"list_add_label": "Add More",
"load_from_db": false,
"name": "separator",
"placeholder": "",
"required": false,
"show": true,
"title_case": false,
"tool_mode": false,
"trace_as_metadata": true,
"type": "str",
"value": "\n\n"
},
"silent_errors": {
"_input_type": "BoolInput",
"advanced": true,