diff --git a/src/backend/base/langflow/components/processing/save_to_file.py b/src/backend/base/langflow/components/processing/save_to_file.py index d595d74bf..b4804d94f 100644 --- a/src/backend/base/langflow/components/processing/save_to_file.py +++ b/src/backend/base/langflow/components/processing/save_to_file.py @@ -103,6 +103,8 @@ class SaveToFileComponent(Component): if not file_path.parent.exists(): file_path.parent.mkdir(parents=True, exist_ok=True) + file_path = self._adjust_file_path_with_format(file_path, file_format) + if input_type == "DataFrame": dataframe = self.df return self._save_dataframe(dataframe, file_path, file_format) @@ -116,6 +118,14 @@ class SaveToFileComponent(Component): error_msg = f"Unsupported input type: {input_type}" raise ValueError(error_msg) + def _adjust_file_path_with_format(self, path: Path, fmt: str) -> Path: + file_extension = path.suffix.lower().lstrip(".") + + if fmt == "excel": + return Path(f"{path}.xlsx").expanduser() if file_extension not in ["xlsx", "xls"] else path + + return Path(f"{path}.{fmt}").expanduser() if file_extension != fmt else path + def _save_dataframe(self, dataframe: DataFrame, path: Path, fmt: str) -> str: if fmt == "csv": dataframe.to_csv(path, index=False) diff --git a/src/backend/base/langflow/initial_setup/starter_projects/News Aggregator.json b/src/backend/base/langflow/initial_setup/starter_projects/News Aggregator.json index 02e980f17..9e7d281ba 100644 --- a/src/backend/base/langflow/initial_setup/starter_projects/News Aggregator.json +++ b/src/backend/base/langflow/initial_setup/starter_projects/News Aggregator.json @@ -1669,7 +1669,7 @@ "show": true, "title_case": false, "type": "code", - "value": "import json\nfrom collections.abc import AsyncIterator, Iterator\nfrom pathlib import Path\n\nimport pandas as pd\n\nfrom langflow.custom import Component\nfrom langflow.io import (\n DataFrameInput,\n DataInput,\n DropdownInput,\n MessageInput,\n Output,\n StrInput,\n)\nfrom langflow.schema import Data, DataFrame, Message\n\n\nclass SaveToFileComponent(Component):\n display_name = \"Save to File\"\n description = \"Save DataFrames, Data, or Messages to various file formats.\"\n icon = \"save\"\n name = \"SaveToFile\"\n\n # File format options for different types\n DATA_FORMAT_CHOICES = [\"csv\", \"excel\", \"json\", \"markdown\"]\n MESSAGE_FORMAT_CHOICES = [\"txt\", \"json\", \"markdown\"]\n\n inputs = [\n DropdownInput(\n name=\"input_type\",\n display_name=\"Input Type\",\n options=[\"DataFrame\", \"Data\", \"Message\"],\n info=\"Select the type of input to save.\",\n value=\"DataFrame\",\n real_time_refresh=True,\n ),\n DataFrameInput(\n name=\"df\",\n display_name=\"DataFrame\",\n info=\"The DataFrame to save.\",\n dynamic=True,\n show=True,\n ),\n DataInput(\n name=\"data\",\n display_name=\"Data\",\n info=\"The Data object to save.\",\n dynamic=True,\n show=False,\n ),\n MessageInput(\n name=\"message\",\n display_name=\"Message\",\n info=\"The Message to save.\",\n dynamic=True,\n show=False,\n ),\n DropdownInput(\n name=\"file_format\",\n display_name=\"File Format\",\n options=DATA_FORMAT_CHOICES,\n info=\"Select the file format to save the input.\",\n real_time_refresh=True,\n ),\n StrInput(\n name=\"file_path\",\n display_name=\"File Path (including filename)\",\n info=\"The full file path (including filename and extension).\",\n value=\"./output\",\n ),\n ]\n\n outputs = [\n Output(\n name=\"confirmation\",\n display_name=\"Confirmation\",\n method=\"save_to_file\",\n info=\"Confirmation message after saving the file.\",\n ),\n ]\n\n def update_build_config(self, build_config, field_value, field_name=None):\n # Hide/show dynamic fields based on the selected input type\n if field_name == \"input_type\":\n build_config[\"df\"][\"show\"] = field_value == \"DataFrame\"\n build_config[\"data\"][\"show\"] = field_value == \"Data\"\n build_config[\"message\"][\"show\"] = field_value == \"Message\"\n\n if field_value in {\"DataFrame\", \"Data\"}:\n build_config[\"file_format\"][\"options\"] = self.DATA_FORMAT_CHOICES\n elif field_value == \"Message\":\n build_config[\"file_format\"][\"options\"] = self.MESSAGE_FORMAT_CHOICES\n\n return build_config\n\n def save_to_file(self) -> str:\n input_type = self.input_type\n file_format = self.file_format\n file_path = Path(self.file_path).expanduser()\n\n # Ensure the directory exists\n if not file_path.parent.exists():\n file_path.parent.mkdir(parents=True, exist_ok=True)\n\n if input_type == \"DataFrame\":\n dataframe = self.df\n return self._save_dataframe(dataframe, file_path, file_format)\n if input_type == \"Data\":\n data = self.data\n return self._save_data(data, file_path, file_format)\n if input_type == \"Message\":\n message = self.message\n return self._save_message(message, file_path, file_format)\n\n error_msg = f\"Unsupported input type: {input_type}\"\n raise ValueError(error_msg)\n\n def _save_dataframe(self, dataframe: DataFrame, path: Path, fmt: str) -> str:\n if fmt == \"csv\":\n dataframe.to_csv(path, index=False)\n elif fmt == \"excel\":\n dataframe.to_excel(path, index=False, engine=\"openpyxl\")\n elif fmt == \"json\":\n dataframe.to_json(path, orient=\"records\", indent=2)\n elif fmt == \"markdown\":\n path.write_text(dataframe.to_markdown(index=False), encoding=\"utf-8\")\n else:\n error_msg = f\"Unsupported DataFrame format: {fmt}\"\n raise ValueError(error_msg)\n\n return f\"DataFrame saved successfully as '{path}'\"\n\n def _save_data(self, data: Data, path: Path, fmt: str) -> str:\n if fmt == \"csv\":\n pd.DataFrame(data.data).to_csv(path, index=False)\n elif fmt == \"excel\":\n pd.DataFrame(data.data).to_excel(path, index=False, engine=\"openpyxl\")\n elif fmt == \"json\":\n path.write_text(json.dumps(data.data, indent=2), encoding=\"utf-8\")\n elif fmt == \"markdown\":\n path.write_text(pd.DataFrame(data.data).to_markdown(index=False), encoding=\"utf-8\")\n else:\n error_msg = f\"Unsupported Data format: {fmt}\"\n raise ValueError(error_msg)\n\n return f\"Data saved successfully as '{path}'\"\n\n def _save_message(self, message: Message, path: Path, fmt: str) -> str:\n if message.text is None:\n content = \"\"\n elif isinstance(message.text, AsyncIterator):\n # AsyncIterator needs to be handled differently\n error_msg = \"AsyncIterator not supported\"\n raise ValueError(error_msg)\n elif isinstance(message.text, Iterator):\n # Convert iterator to string\n content = \" \".join(str(item) for item in message.text)\n else:\n content = str(message.text)\n\n if fmt == \"txt\":\n path.write_text(content, encoding=\"utf-8\")\n elif fmt == \"json\":\n path.write_text(json.dumps({\"message\": content}, indent=2), encoding=\"utf-8\")\n elif fmt == \"markdown\":\n path.write_text(f\"**Message:**\\n\\n{content}\", encoding=\"utf-8\")\n else:\n error_msg = f\"Unsupported Message format: {fmt}\"\n raise ValueError(error_msg)\n\n return f\"Message saved successfully as '{path}'\"\n" + "value": "import json\nfrom collections.abc import AsyncIterator, Iterator\nfrom pathlib import Path\n\nimport pandas as pd\n\nfrom langflow.custom import Component\nfrom langflow.io import (\n DataFrameInput,\n DataInput,\n DropdownInput,\n MessageInput,\n Output,\n StrInput,\n)\nfrom langflow.schema import Data, DataFrame, Message\n\n\nclass SaveToFileComponent(Component):\n display_name = \"Save to File\"\n description = \"Save DataFrames, Data, or Messages to various file formats.\"\n icon = \"save\"\n name = \"SaveToFile\"\n\n # File format options for different types\n DATA_FORMAT_CHOICES = [\"csv\", \"excel\", \"json\", \"markdown\"]\n MESSAGE_FORMAT_CHOICES = [\"txt\", \"json\", \"markdown\"]\n\n inputs = [\n DropdownInput(\n name=\"input_type\",\n display_name=\"Input Type\",\n options=[\"DataFrame\", \"Data\", \"Message\"],\n info=\"Select the type of input to save.\",\n value=\"DataFrame\",\n real_time_refresh=True,\n ),\n DataFrameInput(\n name=\"df\",\n display_name=\"DataFrame\",\n info=\"The DataFrame to save.\",\n dynamic=True,\n show=True,\n ),\n DataInput(\n name=\"data\",\n display_name=\"Data\",\n info=\"The Data object to save.\",\n dynamic=True,\n show=False,\n ),\n MessageInput(\n name=\"message\",\n display_name=\"Message\",\n info=\"The Message to save.\",\n dynamic=True,\n show=False,\n ),\n DropdownInput(\n name=\"file_format\",\n display_name=\"File Format\",\n options=DATA_FORMAT_CHOICES,\n info=\"Select the file format to save the input.\",\n real_time_refresh=True,\n ),\n StrInput(\n name=\"file_path\",\n display_name=\"File Path (including filename)\",\n info=\"The full file path (including filename and extension).\",\n value=\"./output\",\n ),\n ]\n\n outputs = [\n Output(\n name=\"confirmation\",\n display_name=\"Confirmation\",\n method=\"save_to_file\",\n info=\"Confirmation message after saving the file.\",\n ),\n ]\n\n def update_build_config(self, build_config, field_value, field_name=None):\n # Hide/show dynamic fields based on the selected input type\n if field_name == \"input_type\":\n build_config[\"df\"][\"show\"] = field_value == \"DataFrame\"\n build_config[\"data\"][\"show\"] = field_value == \"Data\"\n build_config[\"message\"][\"show\"] = field_value == \"Message\"\n\n if field_value in {\"DataFrame\", \"Data\"}:\n build_config[\"file_format\"][\"options\"] = self.DATA_FORMAT_CHOICES\n elif field_value == \"Message\":\n build_config[\"file_format\"][\"options\"] = self.MESSAGE_FORMAT_CHOICES\n\n return build_config\n\n def save_to_file(self) -> str:\n input_type = self.input_type\n file_format = self.file_format\n file_path = Path(self.file_path).expanduser()\n\n # Ensure the directory exists\n if not file_path.parent.exists():\n file_path.parent.mkdir(parents=True, exist_ok=True)\n\n file_path = self._adjust_file_path_with_format(file_path, file_format)\n\n if input_type == \"DataFrame\":\n dataframe = self.df\n return self._save_dataframe(dataframe, file_path, file_format)\n if input_type == \"Data\":\n data = self.data\n return self._save_data(data, file_path, file_format)\n if input_type == \"Message\":\n message = self.message\n return self._save_message(message, file_path, file_format)\n\n error_msg = f\"Unsupported input type: {input_type}\"\n raise ValueError(error_msg)\n\n def _adjust_file_path_with_format(self, path: Path, fmt: str) -> Path:\n file_extension = path.suffix.lower().lstrip(\".\")\n\n if fmt == \"excel\":\n return Path(f\"{path}.xlsx\").expanduser() if file_extension not in [\"xlsx\", \"xls\"] else path\n\n return Path(f\"{path}.{fmt}\").expanduser() if file_extension != fmt else path\n\n def _save_dataframe(self, dataframe: DataFrame, path: Path, fmt: str) -> str:\n if fmt == \"csv\":\n dataframe.to_csv(path, index=False)\n elif fmt == \"excel\":\n dataframe.to_excel(path, index=False, engine=\"openpyxl\")\n elif fmt == \"json\":\n dataframe.to_json(path, orient=\"records\", indent=2)\n elif fmt == \"markdown\":\n path.write_text(dataframe.to_markdown(index=False), encoding=\"utf-8\")\n else:\n error_msg = f\"Unsupported DataFrame format: {fmt}\"\n raise ValueError(error_msg)\n\n return f\"DataFrame saved successfully as '{path}'\"\n\n def _save_data(self, data: Data, path: Path, fmt: str) -> str:\n if fmt == \"csv\":\n pd.DataFrame(data.data).to_csv(path, index=False)\n elif fmt == \"excel\":\n pd.DataFrame(data.data).to_excel(path, index=False, engine=\"openpyxl\")\n elif fmt == \"json\":\n path.write_text(json.dumps(data.data, indent=2), encoding=\"utf-8\")\n elif fmt == \"markdown\":\n path.write_text(pd.DataFrame(data.data).to_markdown(index=False), encoding=\"utf-8\")\n else:\n error_msg = f\"Unsupported Data format: {fmt}\"\n raise ValueError(error_msg)\n\n return f\"Data saved successfully as '{path}'\"\n\n def _save_message(self, message: Message, path: Path, fmt: str) -> str:\n if message.text is None:\n content = \"\"\n elif isinstance(message.text, AsyncIterator):\n # AsyncIterator needs to be handled differently\n error_msg = \"AsyncIterator not supported\"\n raise ValueError(error_msg)\n elif isinstance(message.text, Iterator):\n # Convert iterator to string\n content = \" \".join(str(item) for item in message.text)\n else:\n content = str(message.text)\n\n if fmt == \"txt\":\n path.write_text(content, encoding=\"utf-8\")\n elif fmt == \"json\":\n path.write_text(json.dumps({\"message\": content}, indent=2), encoding=\"utf-8\")\n elif fmt == \"markdown\":\n path.write_text(f\"**Message:**\\n\\n{content}\", encoding=\"utf-8\")\n else:\n error_msg = f\"Unsupported Message format: {fmt}\"\n raise ValueError(error_msg)\n\n return f\"Message saved successfully as '{path}'\"\n" }, "data": { "_input_type": "DataInput", diff --git a/src/backend/tests/unit/components/processing/test_save_to_file_component.py b/src/backend/tests/unit/components/processing/test_save_to_file_component.py index e3ef516e6..0d1d8ec06 100644 --- a/src/backend/tests/unit/components/processing/test_save_to_file_component.py +++ b/src/backend/tests/unit/components/processing/test_save_to_file_component.py @@ -163,3 +163,75 @@ class TestSaveToFileComponent(ComponentTestBaseWithoutClient): with pytest.raises(ValueError, match="Unsupported input type"): component.save_to_file() + + @pytest.mark.parametrize( + ("path_str", "fmt", "expected_suffix"), + [ + ("./test_output", "csv", ".csv"), + ("./test_output", "json", ".json"), + ("./test_output", "markdown", ".markdown"), + ("./test_output", "txt", ".txt"), + ], + ) + def test_adjust_path_adds_extension(self, component_class, path_str, fmt, expected_suffix): + """Test that the correct extension is added when none exists.""" + component = component_class() + input_path = Path(path_str) + expected_path = Path(f"{path_str}{expected_suffix}") + result = component._adjust_file_path_with_format(input_path, fmt) + assert str(result) == str(expected_path.expanduser()) + + @pytest.mark.parametrize( + ("path_str", "fmt"), + [ + ("./test_output.csv", "csv"), + ("./test_output.json", "json"), + ("./test_output.markdown", "markdown"), + ("./test_output.txt", "txt"), + ], + ) + def test_adjust_path_keeps_existing_correct_extension(self, component_class, path_str, fmt): + """Test that the existing correct extension is kept.""" + component = component_class() + input_path = Path(path_str) + result = component._adjust_file_path_with_format(input_path, fmt) + assert str(result) == str(input_path.expanduser()) + + @pytest.mark.parametrize( + ("path_str", "fmt", "expected_path_str"), + [ + ("./test_output.txt", "csv", "./test_output.txt.csv"), # Incorrect extension + ("./test_output", "excel", "./test_output.xlsx"), # Add .xlsx for excel + ("./test_output.txt", "excel", "./test_output.txt.xlsx"), # Incorrect extension for excel + ], + ) + def test_adjust_path_handles_incorrect_or_excel_add(self, component_class, path_str, fmt, expected_path_str): + """Test handling incorrect extensions and adding .xlsx for excel.""" + component = component_class() + input_path = Path(path_str) + expected_path = Path(expected_path_str) + result = component._adjust_file_path_with_format(input_path, fmt) + assert str(result) == str(expected_path.expanduser()) + + @pytest.mark.parametrize( + "path_str", + [ + "./test_output.xlsx", + "./test_output.xls", + ], + ) + def test_adjust_path_keeps_existing_excel_extension(self, component_class, path_str): + """Test that existing .xlsx or .xls extensions are kept for excel format.""" + component = component_class() + input_path = Path(path_str) + result = component._adjust_file_path_with_format(input_path, "excel") + assert str(result) == str(input_path.expanduser()) + + def test_adjust_path_expands_home(self, component_class): + """Test that the home directory symbol '~' is expanded.""" + component = component_class() + input_path = Path("~/test_output") + expected_path = Path("~/test_output.csv").expanduser() + result = component._adjust_file_path_with_format(input_path, "csv") + assert str(result) == str(expected_path) + assert "~" not in str(result) # Ensure ~ was expanded