From 8c74ead3e8ec15e2672d1a096e64e00250ab8b67 Mon Sep 17 00:00:00 2001 From: Cristhian Zanforlin Lousa Date: Mon, 17 Feb 2025 09:35:57 -0300 Subject: [PATCH] feat: add SaveToFile component for DataFrame, Data and Message exports (#6114) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * ✨ (save_to_file.py): Add a new component 'SaveToFileComponent' to save DataFrames, Data, or Messages to various file formats. This component allows users to select the input type, file format, and file path for saving the data. * [autofix.ci] apply automated fixes * 🔧 (save_to_file.py): refactor variable names for better readability and consistency 🐛 (save_to_file.py): handle unsupported input types and formats by raising ValueErrors with informative error messages * [autofix.ci] apply automated fixes * ✨ (test_save_to_file_component.py): Add unit tests for the SaveToFileComponent to ensure proper saving of data to various file formats and handling of different input types. * [autofix.ci] apply automated fixes * 📝 (save_to_file.py): Add support for handling different types of message text in the SaveToFileComponent class to ensure proper saving to file 🔧 (test_save_to_file_component.py): Refactor test cases in the SaveToFileComponent test file for better readability and maintainability * [autofix.ci] apply automated fixes --------- Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com> --- .../components/processing/save_to_file.py | 172 ++++++++++++++++++ .../processing/test_save_to_file_component.py | 165 +++++++++++++++++ 2 files changed, 337 insertions(+) create mode 100644 src/backend/base/langflow/components/processing/save_to_file.py create mode 100644 src/backend/tests/unit/components/processing/test_save_to_file_component.py diff --git a/src/backend/base/langflow/components/processing/save_to_file.py b/src/backend/base/langflow/components/processing/save_to_file.py new file mode 100644 index 000000000..9494c185d --- /dev/null +++ b/src/backend/base/langflow/components/processing/save_to_file.py @@ -0,0 +1,172 @@ +import json +from collections.abc import AsyncIterator, Iterator +from pathlib import Path + +import pandas as pd + +from langflow.custom import Component +from langflow.io import ( + DataFrameInput, + DataInput, + DropdownInput, + MessageInput, + Output, + StrInput, +) +from langflow.schema import Data, DataFrame, Message + + +class SaveToFileComponent(Component): + display_name = "Save to File" + description = "Save DataFrames, Data, or Messages to various file formats." + icon = "save" + name = "SaveToFile" + + # File format options for different types + DATA_FORMAT_CHOICES = ["csv", "excel", "json", "markdown"] + MESSAGE_FORMAT_CHOICES = ["txt", "json", "markdown"] + + inputs = [ + DropdownInput( + name="input_type", + display_name="Input Type", + options=["DataFrame", "Data", "Message"], + info="Select the type of input to save.", + value="DataFrame", + real_time_refresh=True, + ), + DataFrameInput( + name="df", + display_name="DataFrame", + info="The DataFrame to save.", + dynamic=True, + show=True, + ), + DataInput( + name="data", + display_name="Data", + info="The Data object to save.", + dynamic=True, + show=False, + ), + MessageInput( + name="message", + display_name="Message", + info="The Message to save.", + dynamic=True, + show=False, + ), + DropdownInput( + name="file_format", + display_name="File Format", + options=DATA_FORMAT_CHOICES, + info="Select the file format to save the input.", + real_time_refresh=True, + ), + StrInput( + name="file_path", + display_name="File Path (including filename)", + info="The full file path (including filename and extension).", + value="./output", + ), + ] + + outputs = [ + Output( + name="confirmation", + display_name="Confirmation", + method="save_to_file", + info="Confirmation message after saving the file.", + ), + ] + + def update_build_config(self, build_config, field_value, field_name=None): + # Hide/show dynamic fields based on the selected input type + if field_name == "input_type": + build_config["df"]["show"] = field_value == "DataFrame" + build_config["data"]["show"] = field_value == "Data" + build_config["message"]["show"] = field_value == "Message" + + if field_value in ["DataFrame", "Data"]: + build_config["file_format"]["options"] = self.DATA_FORMAT_CHOICES + elif field_value == "Message": + build_config["file_format"]["options"] = self.MESSAGE_FORMAT_CHOICES + + return build_config + + def save_to_file(self) -> str: + input_type = self.input_type + file_format = self.file_format + file_path = Path(self.file_path).expanduser() + + # Ensure the directory exists + if not file_path.parent.exists(): + file_path.parent.mkdir(parents=True, exist_ok=True) + + if input_type == "DataFrame": + dataframe = self.df + return self._save_dataframe(dataframe, file_path, file_format) + if input_type == "Data": + data = self.data + return self._save_data(data, file_path, file_format) + if input_type == "Message": + message = self.message + return self._save_message(message, file_path, file_format) + + error_msg = f"Unsupported input type: {input_type}" + raise ValueError(error_msg) + + def _save_dataframe(self, dataframe: DataFrame, path: Path, fmt: str) -> str: + if fmt == "csv": + dataframe.to_csv(path, index=False) + elif fmt == "excel": + dataframe.to_excel(path, index=False, engine="openpyxl") + elif fmt == "json": + dataframe.to_json(path, orient="records", indent=2) + elif fmt == "markdown": + path.write_text(dataframe.to_markdown(index=False), encoding="utf-8") + else: + error_msg = f"Unsupported DataFrame format: {fmt}" + raise ValueError(error_msg) + + return f"DataFrame saved successfully as '{path}'" + + def _save_data(self, data: Data, path: Path, fmt: str) -> str: + if fmt == "csv": + pd.DataFrame(data.data).to_csv(path, index=False) + elif fmt == "excel": + pd.DataFrame(data.data).to_excel(path, index=False, engine="openpyxl") + elif fmt == "json": + path.write_text(json.dumps(data.data, indent=2), encoding="utf-8") + elif fmt == "markdown": + path.write_text(pd.DataFrame(data.data).to_markdown(index=False), encoding="utf-8") + else: + error_msg = f"Unsupported Data format: {fmt}" + raise ValueError(error_msg) + + return f"Data saved successfully as '{path}'" + + def _save_message(self, message: Message, path: Path, fmt: str) -> str: + if message.text is None: + content = "" + elif isinstance(message.text, AsyncIterator): + # AsyncIterator needs to be handled differently + error_msg = "AsyncIterator not supported" + raise ValueError(error_msg) + elif isinstance(message.text, Iterator): + # Convert iterator to string + content = " ".join(str(item) for item in message.text) + else: + content = str(message.text) + + if fmt == "txt": + path.write_text(content, encoding="utf-8") + elif fmt == "json": + path.write_text(json.dumps({"message": content}, indent=2), encoding="utf-8") + elif fmt == "markdown": + path.write_text(f"**Message:**\n\n{content}", encoding="utf-8") + else: + error_msg = f"Unsupported Message format: {fmt}" + raise ValueError(error_msg) + + return f"Message saved successfully as '{path}'" diff --git a/src/backend/tests/unit/components/processing/test_save_to_file_component.py b/src/backend/tests/unit/components/processing/test_save_to_file_component.py new file mode 100644 index 000000000..e3ef516e6 --- /dev/null +++ b/src/backend/tests/unit/components/processing/test_save_to_file_component.py @@ -0,0 +1,165 @@ +import json +from pathlib import Path +from unittest.mock import MagicMock, patch + +import pandas as pd +import pytest +from langflow.components.processing.save_to_file import SaveToFileComponent +from langflow.schema import Data, Message + +from tests.base import ComponentTestBaseWithoutClient + + +class TestSaveToFileComponent(ComponentTestBaseWithoutClient): + @pytest.fixture(autouse=True) + def setup_and_teardown(self): + """Setup and teardown for each test.""" + # Setup + test_files = [ + "./test_output.csv", + "./test_output.xlsx", + "./test_output.json", + "./test_output.md", + "./test_output.txt", + ] + # Teardown + yield + # Delete test files after each test + for file_path in test_files: + path = Path(file_path) + if path.exists(): + path.unlink() + + @pytest.fixture + def component_class(self): + """Return the component class to test.""" + return SaveToFileComponent + + @pytest.fixture + def default_kwargs(self): + """Return the default kwargs for the component.""" + sample_df = pd.DataFrame([{"col1": 1, "col2": "a"}, {"col1": 2, "col2": "b"}]) + return {"input_type": "DataFrame", "df": sample_df, "file_format": "csv", "file_path": "./test_output.csv"} + + @pytest.fixture + def file_names_mapping(self): + """Return the file names mapping for different versions.""" + return [] # New component + + def test_basic_setup(self, component_class, default_kwargs): + """Test basic component initialization.""" + component = component_class() + component.set_attributes(default_kwargs) + assert component.input_type == "DataFrame" + assert component.file_format == "csv" + assert component.file_path == "./test_output.csv" + + def test_update_build_config_dataframe(self, component_class): + """Test build config update for DataFrame input type.""" + component = component_class() + build_config = { + "df": {"show": False}, + "data": {"show": False}, + "message": {"show": False}, + "file_format": {"options": []}, + } + + updated_config = component.update_build_config(build_config, "DataFrame", "input_type") + + assert updated_config["df"]["show"] is True + assert updated_config["data"]["show"] is False + assert updated_config["message"]["show"] is False + assert set(updated_config["file_format"]["options"]) == set(component.DATA_FORMAT_CHOICES) + + def test_save_message(self, component_class): + """Test saving Message to different formats.""" + test_cases = [ + ("txt", "Test message"), + ("json", json.dumps({"message": "Test message"}, indent=2)), + ("markdown", "**Message:**\n\nTest message"), + ] + + for fmt, expected_content in test_cases: + mock_file = MagicMock() + mock_parent = MagicMock() + mock_parent.exists.return_value = True + mock_file.parent = mock_parent + mock_file.expanduser.return_value = mock_file + + # Mock Path at the module level where it's imported + with patch("langflow.components.processing.save_to_file.Path") as mock_path: + mock_path.return_value = mock_file + + component = component_class() + component.set_attributes( + { + "input_type": "Message", + "message": Message(text="Test message"), + "file_format": fmt, + "file_path": f"./test_output.{fmt}", + } + ) + + result = component.save_to_file() + + mock_file.write_text.assert_called_once_with(expected_content, encoding="utf-8") + assert "saved successfully" in result + + def test_save_data(self, component_class): + """Test saving Data object to JSON.""" + test_data = {"col1": ["value1"], "col2": ["value2"]} + + mock_file = MagicMock() + mock_parent = MagicMock() + mock_parent.exists.return_value = True + mock_file.parent = mock_parent + mock_file.expanduser.return_value = mock_file + + with patch("langflow.components.processing.save_to_file.Path") as mock_path: + mock_path.return_value = mock_file + + component = component_class() + component.set_attributes( + { + "input_type": "Data", + "data": Data(data=test_data), + "file_format": "json", + "file_path": "./test_output.json", + } + ) + + result = component.save_to_file() + + expected_json = json.dumps(test_data, indent=2) + mock_file.write_text.assert_called_once_with(expected_json, encoding="utf-8") + assert "saved successfully" in result + + def test_directory_creation(self, component_class, default_kwargs): + """Test directory creation if it doesn't exist.""" + mock_file = MagicMock() + mock_parent = MagicMock() + mock_parent.exists.return_value = False + mock_file.parent = mock_parent + mock_file.expanduser.return_value = mock_file + + with patch("langflow.components.processing.save_to_file.Path") as mock_path: + mock_path.return_value = mock_file + with patch.object(pd.DataFrame, "to_csv") as mock_to_csv: + component = component_class() + component.set_attributes(default_kwargs) + + result = component.save_to_file() + + mock_parent.mkdir.assert_called_once_with(parents=True, exist_ok=True) + assert mock_to_csv.called + assert "saved successfully" in result + + def test_invalid_input_type(self, default_kwargs): + """Test handling of invalid input type.""" + component = SaveToFileComponent() + invalid_kwargs = default_kwargs.copy() # Create a copy to modify + invalid_kwargs["input_type"] = "InvalidType" + component.set_attributes(invalid_kwargs) + + with pytest.raises(ValueError, match="Unsupported input type"): + component.save_to_file()