diff --git a/src/backend/base/langflow/components/data/__init__.py b/src/backend/base/langflow/components/data/__init__.py index acf3d8585..4e2b86499 100644 --- a/src/backend/base/langflow/components/data/__init__.py +++ b/src/backend/base/langflow/components/data/__init__.py @@ -3,6 +3,7 @@ from .csv_to_data import CSVToDataComponent from .directory import DirectoryComponent from .file import FileComponent from .json_to_data import JSONToDataComponent +from .rss import RSSReaderComponent from .sql_executor import SQLComponent from .url import URLComponent from .webhook import WebhookComponent @@ -13,6 +14,7 @@ __all__ = [ "DirectoryComponent", "FileComponent", "JSONToDataComponent", + "RSSReaderComponent", "SQLComponent", "URLComponent", "WebhookComponent", diff --git a/src/backend/base/langflow/components/data/rss.py b/src/backend/base/langflow/components/data/rss.py new file mode 100644 index 000000000..28ac99604 --- /dev/null +++ b/src/backend/base/langflow/components/data/rss.py @@ -0,0 +1,69 @@ +import pandas as pd +import requests +from bs4 import BeautifulSoup + +from langflow.custom import Component +from langflow.io import IntInput, MessageTextInput, Output +from langflow.logging import logger +from langflow.schema import DataFrame + + +class RSSReaderComponent(Component): + display_name = "RSS Reader" + description = "Fetches and parses an RSS feed." + icon = "rss" + name = "RSSReaderSimple" + + inputs = [ + MessageTextInput( + name="rss_url", + display_name="RSS Feed URL", + info="URL of the RSS feed to parse.", + tool_mode=True, + input_types=[], + required=True, + ), + IntInput( + name="timeout", + display_name="Timeout", + info="Timeout for the RSS feed request.", + value=5, + advanced=True, + ), + ] + + outputs = [Output(name="articles", display_name="Articles", method="read_rss")] + + def read_rss(self) -> DataFrame: + try: + response = requests.get(self.rss_url, timeout=self.timeout) + response.raise_for_status() + if not response.content.strip(): + msg = "Empty response received" + raise ValueError(msg) + # Check if the response is valid XML + try: + BeautifulSoup(response.content, "xml") + except Exception as e: + msg = f"Invalid XML response: {e}" + raise ValueError(msg) from e + soup = BeautifulSoup(response.content, "xml") + items = soup.find_all("item") + except (requests.RequestException, ValueError) as e: + self.status = f"Failed to fetch RSS: {e}" + return DataFrame(pd.DataFrame([{"title": "Error", "link": "", "published": "", "summary": str(e)}])) + + articles = [ + { + "title": item.title.text if item.title else "", + "link": item.link.text if item.link else "", + "published": item.pubDate.text if item.pubDate else "", + "summary": item.description.text if item.description else "", + } + for item in items + ] + + # Ensure the DataFrame has the correct columns even if empty + df_articles = pd.DataFrame(articles, columns=["title", "link", "published", "summary"]) + logger.info(f"Fetched {len(df_articles)} articles.") + return DataFrame(df_articles) diff --git a/src/backend/tests/unit/components/data/test_rss.py b/src/backend/tests/unit/components/data/test_rss.py new file mode 100644 index 000000000..0b7875ad9 --- /dev/null +++ b/src/backend/tests/unit/components/data/test_rss.py @@ -0,0 +1,129 @@ +from unittest.mock import Mock, patch + +import pytest +import requests +from langflow.components.data.rss import RSSReaderComponent +from langflow.schema import DataFrame + +from tests.base import ComponentTestBaseWithoutClient + + +class TestRSSReaderComponent(ComponentTestBaseWithoutClient): + @pytest.fixture + def component_class(self): + """Return the component class to test.""" + return RSSReaderComponent + + @pytest.fixture + def default_kwargs(self): + """Return the default kwargs for the component.""" + return { + "rss_url": "https://example.com/feed.xml", + } + + @pytest.fixture + def file_names_mapping(self): + """Return an empty list since this component doesn't have version-specific files.""" + return [] + + def test_successful_rss_fetch(self): + # Mock RSS feed content + mock_rss_content = """ + + + + + Test Article 1 + https://example.com/1 + 2024-03-20 + Test summary 1 + + + Test Article 2 + https://example.com/2 + 2024-03-21 + Test summary 2 + + + + """ + + # Mock the requests.get response + mock_response = Mock() + mock_response.content = mock_rss_content.encode("utf-8") + mock_response.raise_for_status = Mock() + + with patch("requests.get", return_value=mock_response): + component = RSSReaderComponent(rss_url="https://example.com/feed.xml") + result = component.read_rss() + + assert isinstance(result, DataFrame) + assert len(result) == 2 + assert list(result.columns) == ["title", "link", "published", "summary"] + assert result.iloc[0]["title"] == "Test Article 1" + assert result.iloc[1]["title"] == "Test Article 2" + + def test_rss_fetch_with_missing_fields(self): + # Mock RSS feed content with missing fields + mock_rss_content = """ + + + + + Test Article + + 2024-03-20 + + + + + """ + + mock_response = Mock() + mock_response.content = mock_rss_content.encode("utf-8") + mock_response.raise_for_status = Mock() + + with patch("requests.get", return_value=mock_response): + component = RSSReaderComponent(rss_url="https://example.com/feed.xml") + result = component.read_rss() + + assert isinstance(result, DataFrame) + assert len(result) == 1 + assert result.iloc[0]["title"] == "Test Article" + assert result.iloc[0]["link"] == "" + assert result.iloc[0]["summary"] == "" + + def test_rss_fetch_error(self): + # Mock a failed request + with patch("requests.get", side_effect=requests.RequestException("Network error")): + component = RSSReaderComponent(rss_url="https://example.com/feed.xml") + result = component.read_rss() + + assert isinstance(result, DataFrame) + assert len(result) == 1 + assert result.iloc[0]["title"] == "Error" + assert result.iloc[0]["link"] == "" + assert result.iloc[0]["published"] == "" + assert "Network error" in result.iloc[0]["summary"] + + def test_empty_rss_feed(self): + # Mock empty RSS feed + mock_rss_content = """ + + + + + + """ + + mock_response = Mock() + mock_response.content = mock_rss_content.encode("utf-8") + mock_response.raise_for_status = Mock() + + with patch("requests.get", return_value=mock_response): + component = RSSReaderComponent(rss_url="https://example.com/feed.xml") + result = component.read_rss() + + assert isinstance(result, DataFrame) + assert len(result) == 0 + assert list(result.columns) == ["title", "link", "published", "summary"]