feat: Dynamic outputs for file component (#7005)
* feat: Dynamic outputs for file component * [autofix.ci] apply automated fixes * Update base_file.py * [autofix.ci] apply automated fixes * [autofix.ci] apply automated fixes (attempt 2/3) * Update base_file.py * [autofix.ci] apply automated fixes * feat: enhance BaseFileComponent with new data loading methods and separator input - Added `separator` input to specify the separator for concatenated outputs in Message format. - Refactored output methods: renamed and updated `load_csv`, `load_json`, and `load_others` to `load_data`, `load_dataframe`, and `load_message` respectively. - Updated JSON configuration files to reflect changes in output types and methods for starter projects. * Fix file loader --------- Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com> Co-authored-by: Ítalo Johnny <italojohnnydosanjos@gmail.com> Co-authored-by: Rodrigo <rodrigosilvanader@gmail.com>
This commit is contained in:
parent
72f2528216
commit
37bc0467b6
4 changed files with 233 additions and 3 deletions
|
|
@ -5,9 +5,12 @@ from pathlib import Path
|
|||
from tempfile import TemporaryDirectory
|
||||
from zipfile import ZipFile, is_zipfile
|
||||
|
||||
import pandas as pd
|
||||
|
||||
from langflow.custom import Component
|
||||
from langflow.io import BoolInput, FileInput, HandleInput, Output
|
||||
from langflow.io import BoolInput, FileInput, HandleInput, Output, StrInput
|
||||
from langflow.schema import Data
|
||||
from langflow.schema.dataframe import DataFrame
|
||||
from langflow.schema.message import Message
|
||||
|
||||
|
||||
|
|
@ -132,6 +135,14 @@ class BaseFileComponent(Component, ABC):
|
|||
is_list=True,
|
||||
advanced=True,
|
||||
),
|
||||
StrInput(
|
||||
name="separator",
|
||||
display_name="Separator",
|
||||
value="\n\n",
|
||||
show=True,
|
||||
info="Specify the separator to use between multiple outputs in Message format.",
|
||||
advanced=True,
|
||||
),
|
||||
BoolInput(
|
||||
name="silent_errors",
|
||||
display_name="Silent Errors",
|
||||
|
|
@ -161,7 +172,11 @@ class BaseFileComponent(Component, ABC):
|
|||
),
|
||||
]
|
||||
|
||||
_base_outputs = [Output(display_name="Data", name="data", method="load_files")]
|
||||
_base_outputs = [
|
||||
Output(display_name="Data", name="data", method="load_files"),
|
||||
Output(display_name="DataFrame", name="dataframe", method="load_dataframe"),
|
||||
Output(display_name="Message", name="message", method="load_message"),
|
||||
]
|
||||
|
||||
@abstractmethod
|
||||
def process_files(self, file_list: list[BaseFile]) -> list[BaseFile]:
|
||||
|
|
@ -174,7 +189,7 @@ class BaseFileComponent(Component, ABC):
|
|||
list[BaseFile]: A list of BaseFile objects with updated `data`.
|
||||
"""
|
||||
|
||||
def load_files(self) -> list[Data]:
|
||||
def load_files_base(self) -> list[Data]:
|
||||
"""Loads and parses file(s), including unpacked file bundles.
|
||||
|
||||
Returns:
|
||||
|
|
@ -211,6 +226,80 @@ class BaseFileComponent(Component, ABC):
|
|||
else:
|
||||
file.path.unlink()
|
||||
|
||||
def load_files(self) -> list[Data]:
|
||||
"""Load files and return as Data objects.
|
||||
|
||||
Returns:
|
||||
list[Data]: List of Data objects from all files
|
||||
"""
|
||||
data_list = self.load_files_base()
|
||||
if not data_list:
|
||||
return [Data()]
|
||||
return data_list
|
||||
|
||||
def load_dataframe(self) -> DataFrame:
|
||||
"""Load files and return as DataFrame.
|
||||
|
||||
Returns:
|
||||
DataFrame: DataFrame containing all file data
|
||||
"""
|
||||
data_list = self.load_files()
|
||||
if not data_list:
|
||||
return DataFrame()
|
||||
|
||||
# First handle CSV files specially
|
||||
csv_data = []
|
||||
non_csv_rows = []
|
||||
|
||||
for data in data_list:
|
||||
file_path = data.data.get(self.SERVER_FILE_PATH_FIELDNAME)
|
||||
if file_path and str(file_path).lower().endswith(".csv"):
|
||||
try:
|
||||
csv_data.extend(pd.read_csv(file_path).to_dict("records"))
|
||||
except Exception as e:
|
||||
self.log(f"Error processing CSV file {file_path}: {e}")
|
||||
if not self.silent_errors:
|
||||
raise
|
||||
else:
|
||||
# Handle non-CSV files as before
|
||||
row = dict(data.data) if data.data else {}
|
||||
if data.text:
|
||||
row["text"] = data.text
|
||||
if file_path:
|
||||
row["file_path"] = file_path
|
||||
non_csv_rows.append(row)
|
||||
|
||||
# Combine CSV and non-CSV data
|
||||
all_rows = csv_data + non_csv_rows
|
||||
return DataFrame(all_rows)
|
||||
|
||||
def load_message(self) -> Message:
|
||||
"""Load files and return as Message with concatenated content.
|
||||
|
||||
Returns:
|
||||
Message: Message containing concatenated file content
|
||||
"""
|
||||
data_list = self.load_files()
|
||||
if not data_list:
|
||||
return Message(text="")
|
||||
|
||||
# Concatenate all text content
|
||||
text_content = []
|
||||
for data in data_list:
|
||||
content = data.text if data.text else ""
|
||||
text_content.append(content)
|
||||
|
||||
# Join with separator
|
||||
final_text = self.separator.join(text_content)
|
||||
|
||||
# Create message with all metadata
|
||||
all_data = {}
|
||||
for data in data_list:
|
||||
if data.data:
|
||||
all_data.update(data.data)
|
||||
|
||||
return Message(text=final_text, data=all_data)
|
||||
|
||||
@property
|
||||
def valid_extensions(self) -> list[str]:
|
||||
"""Returns valid file extensions for the class.
|
||||
|
|
|
|||
|
|
@ -993,6 +993,34 @@
|
|||
"Data"
|
||||
],
|
||||
"value": "__UNDEFINED__"
|
||||
},
|
||||
{
|
||||
"allows_loop": false,
|
||||
"cache": true,
|
||||
"display_name": "DataFrame",
|
||||
"method": "load_dataframe",
|
||||
"name": "dataframe",
|
||||
"required_inputs": [],
|
||||
"selected": "DataFrame",
|
||||
"tool_mode": true,
|
||||
"types": [
|
||||
"DataFrame"
|
||||
],
|
||||
"value": "__UNDEFINED__"
|
||||
},
|
||||
{
|
||||
"allows_loop": false,
|
||||
"cache": true,
|
||||
"display_name": "Message",
|
||||
"method": "load_message",
|
||||
"name": "message",
|
||||
"required_inputs": [],
|
||||
"selected": "Message",
|
||||
"tool_mode": true,
|
||||
"types": [
|
||||
"Message"
|
||||
],
|
||||
"value": "__UNDEFINED__"
|
||||
}
|
||||
],
|
||||
"pinned": false,
|
||||
|
|
@ -1142,6 +1170,25 @@
|
|||
"type": "file",
|
||||
"value": ""
|
||||
},
|
||||
"separator": {
|
||||
"_input_type": "StrInput",
|
||||
"advanced": true,
|
||||
"display_name": "Separator",
|
||||
"dynamic": false,
|
||||
"info": "Specify the separator to use between multiple outputs in Message format.",
|
||||
"list": false,
|
||||
"list_add_label": "Add More",
|
||||
"load_from_db": false,
|
||||
"name": "separator",
|
||||
"placeholder": "",
|
||||
"required": false,
|
||||
"show": true,
|
||||
"title_case": false,
|
||||
"tool_mode": false,
|
||||
"trace_as_metadata": true,
|
||||
"type": "str",
|
||||
"value": "\n\n"
|
||||
},
|
||||
"silent_errors": {
|
||||
"_input_type": "BoolInput",
|
||||
"advanced": true,
|
||||
|
|
|
|||
|
|
@ -2149,6 +2149,34 @@
|
|||
"Data"
|
||||
],
|
||||
"value": "__UNDEFINED__"
|
||||
},
|
||||
{
|
||||
"allows_loop": false,
|
||||
"cache": true,
|
||||
"display_name": "DataFrame",
|
||||
"method": "load_dataframe",
|
||||
"name": "dataframe",
|
||||
"required_inputs": [],
|
||||
"selected": "DataFrame",
|
||||
"tool_mode": true,
|
||||
"types": [
|
||||
"DataFrame"
|
||||
],
|
||||
"value": "__UNDEFINED__"
|
||||
},
|
||||
{
|
||||
"allows_loop": false,
|
||||
"cache": true,
|
||||
"display_name": "Message",
|
||||
"method": "load_message",
|
||||
"name": "message",
|
||||
"required_inputs": [],
|
||||
"selected": "Message",
|
||||
"tool_mode": true,
|
||||
"types": [
|
||||
"Message"
|
||||
],
|
||||
"value": "__UNDEFINED__"
|
||||
}
|
||||
],
|
||||
"pinned": false,
|
||||
|
|
@ -2309,6 +2337,25 @@
|
|||
"type": "file",
|
||||
"value": ""
|
||||
},
|
||||
"separator": {
|
||||
"_input_type": "StrInput",
|
||||
"advanced": true,
|
||||
"display_name": "Separator",
|
||||
"dynamic": false,
|
||||
"info": "Specify the separator to use between multiple outputs in Message format.",
|
||||
"list": false,
|
||||
"list_add_label": "Add More",
|
||||
"load_from_db": false,
|
||||
"name": "separator",
|
||||
"placeholder": "",
|
||||
"required": false,
|
||||
"show": true,
|
||||
"title_case": false,
|
||||
"tool_mode": false,
|
||||
"trace_as_metadata": true,
|
||||
"type": "str",
|
||||
"value": "\n\n"
|
||||
},
|
||||
"silent_errors": {
|
||||
"_input_type": "BoolInput",
|
||||
"advanced": true,
|
||||
|
|
|
|||
|
|
@ -2564,6 +2564,34 @@
|
|||
"Data"
|
||||
],
|
||||
"value": "__UNDEFINED__"
|
||||
},
|
||||
{
|
||||
"allows_loop": false,
|
||||
"cache": true,
|
||||
"display_name": "DataFrame",
|
||||
"method": "load_dataframe",
|
||||
"name": "dataframe",
|
||||
"required_inputs": [],
|
||||
"selected": "DataFrame",
|
||||
"tool_mode": true,
|
||||
"types": [
|
||||
"DataFrame"
|
||||
],
|
||||
"value": "__UNDEFINED__"
|
||||
},
|
||||
{
|
||||
"allows_loop": false,
|
||||
"cache": true,
|
||||
"display_name": "Message",
|
||||
"method": "load_message",
|
||||
"name": "message",
|
||||
"required_inputs": [],
|
||||
"selected": "Message",
|
||||
"tool_mode": true,
|
||||
"types": [
|
||||
"Message"
|
||||
],
|
||||
"value": "__UNDEFINED__"
|
||||
}
|
||||
],
|
||||
"pinned": false,
|
||||
|
|
@ -2713,6 +2741,25 @@
|
|||
"type": "file",
|
||||
"value": ""
|
||||
},
|
||||
"separator": {
|
||||
"_input_type": "StrInput",
|
||||
"advanced": true,
|
||||
"display_name": "Separator",
|
||||
"dynamic": false,
|
||||
"info": "Specify the separator to use between multiple outputs in Message format.",
|
||||
"list": false,
|
||||
"list_add_label": "Add More",
|
||||
"load_from_db": false,
|
||||
"name": "separator",
|
||||
"placeholder": "",
|
||||
"required": false,
|
||||
"show": true,
|
||||
"title_case": false,
|
||||
"tool_mode": false,
|
||||
"trace_as_metadata": true,
|
||||
"type": "str",
|
||||
"value": "\n\n"
|
||||
},
|
||||
"silent_errors": {
|
||||
"_input_type": "BoolInput",
|
||||
"advanced": true,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue