feat: add SaveToFile component for DataFrame, Data and Message exports (#6114)

*  (save_to_file.py): Add a new component 'SaveToFileComponent' to save DataFrames, Data, or Messages to various file formats. This component allows users to select the input type, file format, and file path for saving the data.

* [autofix.ci] apply automated fixes

* 🔧 (save_to_file.py): refactor variable names for better readability and consistency
🐛 (save_to_file.py): handle unsupported input types and formats by raising ValueErrors with informative error messages

* [autofix.ci] apply automated fixes

*  (test_save_to_file_component.py): Add unit tests for the SaveToFileComponent to ensure proper saving of data to various file formats and handling of different input types.

* [autofix.ci] apply automated fixes

* 📝 (save_to_file.py): Add support for handling different types of message text in the SaveToFileComponent class to ensure proper saving to file
🔧 (test_save_to_file_component.py): Refactor test cases in the SaveToFileComponent test file for better readability and maintainability

* [autofix.ci] apply automated fixes

---------

Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
This commit is contained in:
Cristhian Zanforlin Lousa 2025-02-17 09:35:57 -03:00 committed by GitHub
commit 8c74ead3e8
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 337 additions and 0 deletions

View file

@ -0,0 +1,172 @@
import json
from collections.abc import AsyncIterator, Iterator
from pathlib import Path
import pandas as pd
from langflow.custom import Component
from langflow.io import (
DataFrameInput,
DataInput,
DropdownInput,
MessageInput,
Output,
StrInput,
)
from langflow.schema import Data, DataFrame, Message
class SaveToFileComponent(Component):
display_name = "Save to File"
description = "Save DataFrames, Data, or Messages to various file formats."
icon = "save"
name = "SaveToFile"
# File format options for different types
DATA_FORMAT_CHOICES = ["csv", "excel", "json", "markdown"]
MESSAGE_FORMAT_CHOICES = ["txt", "json", "markdown"]
inputs = [
DropdownInput(
name="input_type",
display_name="Input Type",
options=["DataFrame", "Data", "Message"],
info="Select the type of input to save.",
value="DataFrame",
real_time_refresh=True,
),
DataFrameInput(
name="df",
display_name="DataFrame",
info="The DataFrame to save.",
dynamic=True,
show=True,
),
DataInput(
name="data",
display_name="Data",
info="The Data object to save.",
dynamic=True,
show=False,
),
MessageInput(
name="message",
display_name="Message",
info="The Message to save.",
dynamic=True,
show=False,
),
DropdownInput(
name="file_format",
display_name="File Format",
options=DATA_FORMAT_CHOICES,
info="Select the file format to save the input.",
real_time_refresh=True,
),
StrInput(
name="file_path",
display_name="File Path (including filename)",
info="The full file path (including filename and extension).",
value="./output",
),
]
outputs = [
Output(
name="confirmation",
display_name="Confirmation",
method="save_to_file",
info="Confirmation message after saving the file.",
),
]
def update_build_config(self, build_config, field_value, field_name=None):
# Hide/show dynamic fields based on the selected input type
if field_name == "input_type":
build_config["df"]["show"] = field_value == "DataFrame"
build_config["data"]["show"] = field_value == "Data"
build_config["message"]["show"] = field_value == "Message"
if field_value in ["DataFrame", "Data"]:
build_config["file_format"]["options"] = self.DATA_FORMAT_CHOICES
elif field_value == "Message":
build_config["file_format"]["options"] = self.MESSAGE_FORMAT_CHOICES
return build_config
def save_to_file(self) -> str:
input_type = self.input_type
file_format = self.file_format
file_path = Path(self.file_path).expanduser()
# Ensure the directory exists
if not file_path.parent.exists():
file_path.parent.mkdir(parents=True, exist_ok=True)
if input_type == "DataFrame":
dataframe = self.df
return self._save_dataframe(dataframe, file_path, file_format)
if input_type == "Data":
data = self.data
return self._save_data(data, file_path, file_format)
if input_type == "Message":
message = self.message
return self._save_message(message, file_path, file_format)
error_msg = f"Unsupported input type: {input_type}"
raise ValueError(error_msg)
def _save_dataframe(self, dataframe: DataFrame, path: Path, fmt: str) -> str:
if fmt == "csv":
dataframe.to_csv(path, index=False)
elif fmt == "excel":
dataframe.to_excel(path, index=False, engine="openpyxl")
elif fmt == "json":
dataframe.to_json(path, orient="records", indent=2)
elif fmt == "markdown":
path.write_text(dataframe.to_markdown(index=False), encoding="utf-8")
else:
error_msg = f"Unsupported DataFrame format: {fmt}"
raise ValueError(error_msg)
return f"DataFrame saved successfully as '{path}'"
def _save_data(self, data: Data, path: Path, fmt: str) -> str:
if fmt == "csv":
pd.DataFrame(data.data).to_csv(path, index=False)
elif fmt == "excel":
pd.DataFrame(data.data).to_excel(path, index=False, engine="openpyxl")
elif fmt == "json":
path.write_text(json.dumps(data.data, indent=2), encoding="utf-8")
elif fmt == "markdown":
path.write_text(pd.DataFrame(data.data).to_markdown(index=False), encoding="utf-8")
else:
error_msg = f"Unsupported Data format: {fmt}"
raise ValueError(error_msg)
return f"Data saved successfully as '{path}'"
def _save_message(self, message: Message, path: Path, fmt: str) -> str:
if message.text is None:
content = ""
elif isinstance(message.text, AsyncIterator):
# AsyncIterator needs to be handled differently
error_msg = "AsyncIterator not supported"
raise ValueError(error_msg)
elif isinstance(message.text, Iterator):
# Convert iterator to string
content = " ".join(str(item) for item in message.text)
else:
content = str(message.text)
if fmt == "txt":
path.write_text(content, encoding="utf-8")
elif fmt == "json":
path.write_text(json.dumps({"message": content}, indent=2), encoding="utf-8")
elif fmt == "markdown":
path.write_text(f"**Message:**\n\n{content}", encoding="utf-8")
else:
error_msg = f"Unsupported Message format: {fmt}"
raise ValueError(error_msg)
return f"Message saved successfully as '{path}'"

View file

@ -0,0 +1,165 @@
import json
from pathlib import Path
from unittest.mock import MagicMock, patch
import pandas as pd
import pytest
from langflow.components.processing.save_to_file import SaveToFileComponent
from langflow.schema import Data, Message
from tests.base import ComponentTestBaseWithoutClient
class TestSaveToFileComponent(ComponentTestBaseWithoutClient):
@pytest.fixture(autouse=True)
def setup_and_teardown(self):
"""Setup and teardown for each test."""
# Setup
test_files = [
"./test_output.csv",
"./test_output.xlsx",
"./test_output.json",
"./test_output.md",
"./test_output.txt",
]
# Teardown
yield
# Delete test files after each test
for file_path in test_files:
path = Path(file_path)
if path.exists():
path.unlink()
@pytest.fixture
def component_class(self):
"""Return the component class to test."""
return SaveToFileComponent
@pytest.fixture
def default_kwargs(self):
"""Return the default kwargs for the component."""
sample_df = pd.DataFrame([{"col1": 1, "col2": "a"}, {"col1": 2, "col2": "b"}])
return {"input_type": "DataFrame", "df": sample_df, "file_format": "csv", "file_path": "./test_output.csv"}
@pytest.fixture
def file_names_mapping(self):
"""Return the file names mapping for different versions."""
return [] # New component
def test_basic_setup(self, component_class, default_kwargs):
"""Test basic component initialization."""
component = component_class()
component.set_attributes(default_kwargs)
assert component.input_type == "DataFrame"
assert component.file_format == "csv"
assert component.file_path == "./test_output.csv"
def test_update_build_config_dataframe(self, component_class):
"""Test build config update for DataFrame input type."""
component = component_class()
build_config = {
"df": {"show": False},
"data": {"show": False},
"message": {"show": False},
"file_format": {"options": []},
}
updated_config = component.update_build_config(build_config, "DataFrame", "input_type")
assert updated_config["df"]["show"] is True
assert updated_config["data"]["show"] is False
assert updated_config["message"]["show"] is False
assert set(updated_config["file_format"]["options"]) == set(component.DATA_FORMAT_CHOICES)
def test_save_message(self, component_class):
"""Test saving Message to different formats."""
test_cases = [
("txt", "Test message"),
("json", json.dumps({"message": "Test message"}, indent=2)),
("markdown", "**Message:**\n\nTest message"),
]
for fmt, expected_content in test_cases:
mock_file = MagicMock()
mock_parent = MagicMock()
mock_parent.exists.return_value = True
mock_file.parent = mock_parent
mock_file.expanduser.return_value = mock_file
# Mock Path at the module level where it's imported
with patch("langflow.components.processing.save_to_file.Path") as mock_path:
mock_path.return_value = mock_file
component = component_class()
component.set_attributes(
{
"input_type": "Message",
"message": Message(text="Test message"),
"file_format": fmt,
"file_path": f"./test_output.{fmt}",
}
)
result = component.save_to_file()
mock_file.write_text.assert_called_once_with(expected_content, encoding="utf-8")
assert "saved successfully" in result
def test_save_data(self, component_class):
"""Test saving Data object to JSON."""
test_data = {"col1": ["value1"], "col2": ["value2"]}
mock_file = MagicMock()
mock_parent = MagicMock()
mock_parent.exists.return_value = True
mock_file.parent = mock_parent
mock_file.expanduser.return_value = mock_file
with patch("langflow.components.processing.save_to_file.Path") as mock_path:
mock_path.return_value = mock_file
component = component_class()
component.set_attributes(
{
"input_type": "Data",
"data": Data(data=test_data),
"file_format": "json",
"file_path": "./test_output.json",
}
)
result = component.save_to_file()
expected_json = json.dumps(test_data, indent=2)
mock_file.write_text.assert_called_once_with(expected_json, encoding="utf-8")
assert "saved successfully" in result
def test_directory_creation(self, component_class, default_kwargs):
"""Test directory creation if it doesn't exist."""
mock_file = MagicMock()
mock_parent = MagicMock()
mock_parent.exists.return_value = False
mock_file.parent = mock_parent
mock_file.expanduser.return_value = mock_file
with patch("langflow.components.processing.save_to_file.Path") as mock_path:
mock_path.return_value = mock_file
with patch.object(pd.DataFrame, "to_csv") as mock_to_csv:
component = component_class()
component.set_attributes(default_kwargs)
result = component.save_to_file()
mock_parent.mkdir.assert_called_once_with(parents=True, exist_ok=True)
assert mock_to_csv.called
assert "saved successfully" in result
def test_invalid_input_type(self, default_kwargs):
"""Test handling of invalid input type."""
component = SaveToFileComponent()
invalid_kwargs = default_kwargs.copy() # Create a copy to modify
invalid_kwargs["input_type"] = "InvalidType"
component.set_attributes(invalid_kwargs)
with pytest.raises(ValueError, match="Unsupported input type"):
component.save_to_file()