diff --git a/src/backend/base/langflow/components/processing/save_to_file.py b/src/backend/base/langflow/components/processing/save_to_file.py new file mode 100644 index 000000000..9494c185d --- /dev/null +++ b/src/backend/base/langflow/components/processing/save_to_file.py @@ -0,0 +1,172 @@ +import json +from collections.abc import AsyncIterator, Iterator +from pathlib import Path + +import pandas as pd + +from langflow.custom import Component +from langflow.io import ( + DataFrameInput, + DataInput, + DropdownInput, + MessageInput, + Output, + StrInput, +) +from langflow.schema import Data, DataFrame, Message + + +class SaveToFileComponent(Component): + display_name = "Save to File" + description = "Save DataFrames, Data, or Messages to various file formats." + icon = "save" + name = "SaveToFile" + + # File format options for different types + DATA_FORMAT_CHOICES = ["csv", "excel", "json", "markdown"] + MESSAGE_FORMAT_CHOICES = ["txt", "json", "markdown"] + + inputs = [ + DropdownInput( + name="input_type", + display_name="Input Type", + options=["DataFrame", "Data", "Message"], + info="Select the type of input to save.", + value="DataFrame", + real_time_refresh=True, + ), + DataFrameInput( + name="df", + display_name="DataFrame", + info="The DataFrame to save.", + dynamic=True, + show=True, + ), + DataInput( + name="data", + display_name="Data", + info="The Data object to save.", + dynamic=True, + show=False, + ), + MessageInput( + name="message", + display_name="Message", + info="The Message to save.", + dynamic=True, + show=False, + ), + DropdownInput( + name="file_format", + display_name="File Format", + options=DATA_FORMAT_CHOICES, + info="Select the file format to save the input.", + real_time_refresh=True, + ), + StrInput( + name="file_path", + display_name="File Path (including filename)", + info="The full file path (including filename and extension).", + value="./output", + ), + ] + + outputs = [ + Output( + name="confirmation", + display_name="Confirmation", + method="save_to_file", + info="Confirmation message after saving the file.", + ), + ] + + def update_build_config(self, build_config, field_value, field_name=None): + # Hide/show dynamic fields based on the selected input type + if field_name == "input_type": + build_config["df"]["show"] = field_value == "DataFrame" + build_config["data"]["show"] = field_value == "Data" + build_config["message"]["show"] = field_value == "Message" + + if field_value in ["DataFrame", "Data"]: + build_config["file_format"]["options"] = self.DATA_FORMAT_CHOICES + elif field_value == "Message": + build_config["file_format"]["options"] = self.MESSAGE_FORMAT_CHOICES + + return build_config + + def save_to_file(self) -> str: + input_type = self.input_type + file_format = self.file_format + file_path = Path(self.file_path).expanduser() + + # Ensure the directory exists + if not file_path.parent.exists(): + file_path.parent.mkdir(parents=True, exist_ok=True) + + if input_type == "DataFrame": + dataframe = self.df + return self._save_dataframe(dataframe, file_path, file_format) + if input_type == "Data": + data = self.data + return self._save_data(data, file_path, file_format) + if input_type == "Message": + message = self.message + return self._save_message(message, file_path, file_format) + + error_msg = f"Unsupported input type: {input_type}" + raise ValueError(error_msg) + + def _save_dataframe(self, dataframe: DataFrame, path: Path, fmt: str) -> str: + if fmt == "csv": + dataframe.to_csv(path, index=False) + elif fmt == "excel": + dataframe.to_excel(path, index=False, engine="openpyxl") + elif fmt == "json": + dataframe.to_json(path, orient="records", indent=2) + elif fmt == "markdown": + path.write_text(dataframe.to_markdown(index=False), encoding="utf-8") + else: + error_msg = f"Unsupported DataFrame format: {fmt}" + raise ValueError(error_msg) + + return f"DataFrame saved successfully as '{path}'" + + def _save_data(self, data: Data, path: Path, fmt: str) -> str: + if fmt == "csv": + pd.DataFrame(data.data).to_csv(path, index=False) + elif fmt == "excel": + pd.DataFrame(data.data).to_excel(path, index=False, engine="openpyxl") + elif fmt == "json": + path.write_text(json.dumps(data.data, indent=2), encoding="utf-8") + elif fmt == "markdown": + path.write_text(pd.DataFrame(data.data).to_markdown(index=False), encoding="utf-8") + else: + error_msg = f"Unsupported Data format: {fmt}" + raise ValueError(error_msg) + + return f"Data saved successfully as '{path}'" + + def _save_message(self, message: Message, path: Path, fmt: str) -> str: + if message.text is None: + content = "" + elif isinstance(message.text, AsyncIterator): + # AsyncIterator needs to be handled differently + error_msg = "AsyncIterator not supported" + raise ValueError(error_msg) + elif isinstance(message.text, Iterator): + # Convert iterator to string + content = " ".join(str(item) for item in message.text) + else: + content = str(message.text) + + if fmt == "txt": + path.write_text(content, encoding="utf-8") + elif fmt == "json": + path.write_text(json.dumps({"message": content}, indent=2), encoding="utf-8") + elif fmt == "markdown": + path.write_text(f"**Message:**\n\n{content}", encoding="utf-8") + else: + error_msg = f"Unsupported Message format: {fmt}" + raise ValueError(error_msg) + + return f"Message saved successfully as '{path}'" diff --git a/src/backend/tests/unit/components/processing/test_save_to_file_component.py b/src/backend/tests/unit/components/processing/test_save_to_file_component.py new file mode 100644 index 000000000..e3ef516e6 --- /dev/null +++ b/src/backend/tests/unit/components/processing/test_save_to_file_component.py @@ -0,0 +1,165 @@ +import json +from pathlib import Path +from unittest.mock import MagicMock, patch + +import pandas as pd +import pytest +from langflow.components.processing.save_to_file import SaveToFileComponent +from langflow.schema import Data, Message + +from tests.base import ComponentTestBaseWithoutClient + + +class TestSaveToFileComponent(ComponentTestBaseWithoutClient): + @pytest.fixture(autouse=True) + def setup_and_teardown(self): + """Setup and teardown for each test.""" + # Setup + test_files = [ + "./test_output.csv", + "./test_output.xlsx", + "./test_output.json", + "./test_output.md", + "./test_output.txt", + ] + # Teardown + yield + # Delete test files after each test + for file_path in test_files: + path = Path(file_path) + if path.exists(): + path.unlink() + + @pytest.fixture + def component_class(self): + """Return the component class to test.""" + return SaveToFileComponent + + @pytest.fixture + def default_kwargs(self): + """Return the default kwargs for the component.""" + sample_df = pd.DataFrame([{"col1": 1, "col2": "a"}, {"col1": 2, "col2": "b"}]) + return {"input_type": "DataFrame", "df": sample_df, "file_format": "csv", "file_path": "./test_output.csv"} + + @pytest.fixture + def file_names_mapping(self): + """Return the file names mapping for different versions.""" + return [] # New component + + def test_basic_setup(self, component_class, default_kwargs): + """Test basic component initialization.""" + component = component_class() + component.set_attributes(default_kwargs) + assert component.input_type == "DataFrame" + assert component.file_format == "csv" + assert component.file_path == "./test_output.csv" + + def test_update_build_config_dataframe(self, component_class): + """Test build config update for DataFrame input type.""" + component = component_class() + build_config = { + "df": {"show": False}, + "data": {"show": False}, + "message": {"show": False}, + "file_format": {"options": []}, + } + + updated_config = component.update_build_config(build_config, "DataFrame", "input_type") + + assert updated_config["df"]["show"] is True + assert updated_config["data"]["show"] is False + assert updated_config["message"]["show"] is False + assert set(updated_config["file_format"]["options"]) == set(component.DATA_FORMAT_CHOICES) + + def test_save_message(self, component_class): + """Test saving Message to different formats.""" + test_cases = [ + ("txt", "Test message"), + ("json", json.dumps({"message": "Test message"}, indent=2)), + ("markdown", "**Message:**\n\nTest message"), + ] + + for fmt, expected_content in test_cases: + mock_file = MagicMock() + mock_parent = MagicMock() + mock_parent.exists.return_value = True + mock_file.parent = mock_parent + mock_file.expanduser.return_value = mock_file + + # Mock Path at the module level where it's imported + with patch("langflow.components.processing.save_to_file.Path") as mock_path: + mock_path.return_value = mock_file + + component = component_class() + component.set_attributes( + { + "input_type": "Message", + "message": Message(text="Test message"), + "file_format": fmt, + "file_path": f"./test_output.{fmt}", + } + ) + + result = component.save_to_file() + + mock_file.write_text.assert_called_once_with(expected_content, encoding="utf-8") + assert "saved successfully" in result + + def test_save_data(self, component_class): + """Test saving Data object to JSON.""" + test_data = {"col1": ["value1"], "col2": ["value2"]} + + mock_file = MagicMock() + mock_parent = MagicMock() + mock_parent.exists.return_value = True + mock_file.parent = mock_parent + mock_file.expanduser.return_value = mock_file + + with patch("langflow.components.processing.save_to_file.Path") as mock_path: + mock_path.return_value = mock_file + + component = component_class() + component.set_attributes( + { + "input_type": "Data", + "data": Data(data=test_data), + "file_format": "json", + "file_path": "./test_output.json", + } + ) + + result = component.save_to_file() + + expected_json = json.dumps(test_data, indent=2) + mock_file.write_text.assert_called_once_with(expected_json, encoding="utf-8") + assert "saved successfully" in result + + def test_directory_creation(self, component_class, default_kwargs): + """Test directory creation if it doesn't exist.""" + mock_file = MagicMock() + mock_parent = MagicMock() + mock_parent.exists.return_value = False + mock_file.parent = mock_parent + mock_file.expanduser.return_value = mock_file + + with patch("langflow.components.processing.save_to_file.Path") as mock_path: + mock_path.return_value = mock_file + with patch.object(pd.DataFrame, "to_csv") as mock_to_csv: + component = component_class() + component.set_attributes(default_kwargs) + + result = component.save_to_file() + + mock_parent.mkdir.assert_called_once_with(parents=True, exist_ok=True) + assert mock_to_csv.called + assert "saved successfully" in result + + def test_invalid_input_type(self, default_kwargs): + """Test handling of invalid input type.""" + component = SaveToFileComponent() + invalid_kwargs = default_kwargs.copy() # Create a copy to modify + invalid_kwargs["input_type"] = "InvalidType" + component.set_attributes(invalid_kwargs) + + with pytest.raises(ValueError, match="Unsupported input type"): + component.save_to_file()