feat: add rss component (#8134)
* add RSS component * add tests * [autofix.ci] apply automated fixes * Update rss.py --------- Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
This commit is contained in:
parent
44f680fb16
commit
a0a50eb3a1
3 changed files with 200 additions and 0 deletions
|
|
@ -3,6 +3,7 @@ from .csv_to_data import CSVToDataComponent
|
|||
from .directory import DirectoryComponent
|
||||
from .file import FileComponent
|
||||
from .json_to_data import JSONToDataComponent
|
||||
from .rss import RSSReaderComponent
|
||||
from .sql_executor import SQLComponent
|
||||
from .url import URLComponent
|
||||
from .webhook import WebhookComponent
|
||||
|
|
@ -13,6 +14,7 @@ __all__ = [
|
|||
"DirectoryComponent",
|
||||
"FileComponent",
|
||||
"JSONToDataComponent",
|
||||
"RSSReaderComponent",
|
||||
"SQLComponent",
|
||||
"URLComponent",
|
||||
"WebhookComponent",
|
||||
|
|
|
|||
69
src/backend/base/langflow/components/data/rss.py
Normal file
69
src/backend/base/langflow/components/data/rss.py
Normal file
|
|
@ -0,0 +1,69 @@
|
|||
import pandas as pd
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
from langflow.custom import Component
|
||||
from langflow.io import IntInput, MessageTextInput, Output
|
||||
from langflow.logging import logger
|
||||
from langflow.schema import DataFrame
|
||||
|
||||
|
||||
class RSSReaderComponent(Component):
|
||||
display_name = "RSS Reader"
|
||||
description = "Fetches and parses an RSS feed."
|
||||
icon = "rss"
|
||||
name = "RSSReaderSimple"
|
||||
|
||||
inputs = [
|
||||
MessageTextInput(
|
||||
name="rss_url",
|
||||
display_name="RSS Feed URL",
|
||||
info="URL of the RSS feed to parse.",
|
||||
tool_mode=True,
|
||||
input_types=[],
|
||||
required=True,
|
||||
),
|
||||
IntInput(
|
||||
name="timeout",
|
||||
display_name="Timeout",
|
||||
info="Timeout for the RSS feed request.",
|
||||
value=5,
|
||||
advanced=True,
|
||||
),
|
||||
]
|
||||
|
||||
outputs = [Output(name="articles", display_name="Articles", method="read_rss")]
|
||||
|
||||
def read_rss(self) -> DataFrame:
|
||||
try:
|
||||
response = requests.get(self.rss_url, timeout=self.timeout)
|
||||
response.raise_for_status()
|
||||
if not response.content.strip():
|
||||
msg = "Empty response received"
|
||||
raise ValueError(msg)
|
||||
# Check if the response is valid XML
|
||||
try:
|
||||
BeautifulSoup(response.content, "xml")
|
||||
except Exception as e:
|
||||
msg = f"Invalid XML response: {e}"
|
||||
raise ValueError(msg) from e
|
||||
soup = BeautifulSoup(response.content, "xml")
|
||||
items = soup.find_all("item")
|
||||
except (requests.RequestException, ValueError) as e:
|
||||
self.status = f"Failed to fetch RSS: {e}"
|
||||
return DataFrame(pd.DataFrame([{"title": "Error", "link": "", "published": "", "summary": str(e)}]))
|
||||
|
||||
articles = [
|
||||
{
|
||||
"title": item.title.text if item.title else "",
|
||||
"link": item.link.text if item.link else "",
|
||||
"published": item.pubDate.text if item.pubDate else "",
|
||||
"summary": item.description.text if item.description else "",
|
||||
}
|
||||
for item in items
|
||||
]
|
||||
|
||||
# Ensure the DataFrame has the correct columns even if empty
|
||||
df_articles = pd.DataFrame(articles, columns=["title", "link", "published", "summary"])
|
||||
logger.info(f"Fetched {len(df_articles)} articles.")
|
||||
return DataFrame(df_articles)
|
||||
129
src/backend/tests/unit/components/data/test_rss.py
Normal file
129
src/backend/tests/unit/components/data/test_rss.py
Normal file
|
|
@ -0,0 +1,129 @@
|
|||
from unittest.mock import Mock, patch
|
||||
|
||||
import pytest
|
||||
import requests
|
||||
from langflow.components.data.rss import RSSReaderComponent
|
||||
from langflow.schema import DataFrame
|
||||
|
||||
from tests.base import ComponentTestBaseWithoutClient
|
||||
|
||||
|
||||
class TestRSSReaderComponent(ComponentTestBaseWithoutClient):
|
||||
@pytest.fixture
|
||||
def component_class(self):
|
||||
"""Return the component class to test."""
|
||||
return RSSReaderComponent
|
||||
|
||||
@pytest.fixture
|
||||
def default_kwargs(self):
|
||||
"""Return the default kwargs for the component."""
|
||||
return {
|
||||
"rss_url": "https://example.com/feed.xml",
|
||||
}
|
||||
|
||||
@pytest.fixture
|
||||
def file_names_mapping(self):
|
||||
"""Return an empty list since this component doesn't have version-specific files."""
|
||||
return []
|
||||
|
||||
def test_successful_rss_fetch(self):
|
||||
# Mock RSS feed content
|
||||
mock_rss_content = """
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<rss version="2.0">
|
||||
<channel>
|
||||
<item>
|
||||
<title>Test Article 1</title>
|
||||
<link>https://example.com/1</link>
|
||||
<pubDate>2024-03-20</pubDate>
|
||||
<description>Test summary 1</description>
|
||||
</item>
|
||||
<item>
|
||||
<title>Test Article 2</title>
|
||||
<link>https://example.com/2</link>
|
||||
<pubDate>2024-03-21</pubDate>
|
||||
<description>Test summary 2</description>
|
||||
</item>
|
||||
</channel>
|
||||
</rss>
|
||||
"""
|
||||
|
||||
# Mock the requests.get response
|
||||
mock_response = Mock()
|
||||
mock_response.content = mock_rss_content.encode("utf-8")
|
||||
mock_response.raise_for_status = Mock()
|
||||
|
||||
with patch("requests.get", return_value=mock_response):
|
||||
component = RSSReaderComponent(rss_url="https://example.com/feed.xml")
|
||||
result = component.read_rss()
|
||||
|
||||
assert isinstance(result, DataFrame)
|
||||
assert len(result) == 2
|
||||
assert list(result.columns) == ["title", "link", "published", "summary"]
|
||||
assert result.iloc[0]["title"] == "Test Article 1"
|
||||
assert result.iloc[1]["title"] == "Test Article 2"
|
||||
|
||||
def test_rss_fetch_with_missing_fields(self):
|
||||
# Mock RSS feed content with missing fields
|
||||
mock_rss_content = """
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<rss version="2.0">
|
||||
<channel>
|
||||
<item>
|
||||
<title>Test Article</title>
|
||||
<!-- Missing link -->
|
||||
<pubDate>2024-03-20</pubDate>
|
||||
<!-- Missing description -->
|
||||
</item>
|
||||
</channel>
|
||||
</rss>
|
||||
"""
|
||||
|
||||
mock_response = Mock()
|
||||
mock_response.content = mock_rss_content.encode("utf-8")
|
||||
mock_response.raise_for_status = Mock()
|
||||
|
||||
with patch("requests.get", return_value=mock_response):
|
||||
component = RSSReaderComponent(rss_url="https://example.com/feed.xml")
|
||||
result = component.read_rss()
|
||||
|
||||
assert isinstance(result, DataFrame)
|
||||
assert len(result) == 1
|
||||
assert result.iloc[0]["title"] == "Test Article"
|
||||
assert result.iloc[0]["link"] == ""
|
||||
assert result.iloc[0]["summary"] == ""
|
||||
|
||||
def test_rss_fetch_error(self):
|
||||
# Mock a failed request
|
||||
with patch("requests.get", side_effect=requests.RequestException("Network error")):
|
||||
component = RSSReaderComponent(rss_url="https://example.com/feed.xml")
|
||||
result = component.read_rss()
|
||||
|
||||
assert isinstance(result, DataFrame)
|
||||
assert len(result) == 1
|
||||
assert result.iloc[0]["title"] == "Error"
|
||||
assert result.iloc[0]["link"] == ""
|
||||
assert result.iloc[0]["published"] == ""
|
||||
assert "Network error" in result.iloc[0]["summary"]
|
||||
|
||||
def test_empty_rss_feed(self):
|
||||
# Mock empty RSS feed
|
||||
mock_rss_content = """
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<rss version="2.0">
|
||||
<channel>
|
||||
</channel>
|
||||
</rss>
|
||||
"""
|
||||
|
||||
mock_response = Mock()
|
||||
mock_response.content = mock_rss_content.encode("utf-8")
|
||||
mock_response.raise_for_status = Mock()
|
||||
|
||||
with patch("requests.get", return_value=mock_response):
|
||||
component = RSSReaderComponent(rss_url="https://example.com/feed.xml")
|
||||
result = component.read_rss()
|
||||
|
||||
assert isinstance(result, DataFrame)
|
||||
assert len(result) == 0
|
||||
assert list(result.columns) == ["title", "link", "published", "summary"]
|
||||
Loading…
Add table
Add a link
Reference in a new issue