diff --git a/src/backend/base/langflow/components/data/__init__.py b/src/backend/base/langflow/components/data/__init__.py
index cd5acd3fe..6e90f0426 100644
--- a/src/backend/base/langflow/components/data/__init__.py
+++ b/src/backend/base/langflow/components/data/__init__.py
@@ -3,6 +3,7 @@ from .csv_to_data import CSVToDataComponent
from .directory import DirectoryComponent
from .file import FileComponent
from .json_to_data import JSONToDataComponent
+from .news_search import NewsSearchComponent
from .rss import RSSReaderComponent
from .sql_executor import SQLComponent
from .url import URLComponent
@@ -15,6 +16,7 @@ __all__ = [
"DirectoryComponent",
"FileComponent",
"JSONToDataComponent",
+ "NewsSearchComponent",
"RSSReaderComponent",
"SQLComponent",
"URLComponent",
diff --git a/src/backend/base/langflow/components/data/news_search.py b/src/backend/base/langflow/components/data/news_search.py
new file mode 100644
index 000000000..df3100d56
--- /dev/null
+++ b/src/backend/base/langflow/components/data/news_search.py
@@ -0,0 +1,164 @@
+from urllib.parse import quote_plus
+
+import pandas as pd
+import requests
+from bs4 import BeautifulSoup
+
+from langflow.custom import Component
+from langflow.io import IntInput, MessageTextInput, Output
+from langflow.schema import DataFrame
+
+
+class NewsSearchComponent(Component):
+ display_name = "News Search"
+ description = "Searches Google News via RSS. Returns clean article data."
+ icon = "newspaper"
+ name = "NewsSearch"
+
+ inputs = [
+ MessageTextInput(
+ name="query",
+ display_name="Search Query",
+ info="Search keywords for news articles.",
+ tool_mode=True,
+ input_types=[],
+ required=True,
+ ),
+ MessageTextInput(
+ name="hl",
+ display_name="Language (hl)",
+ info="Language code, e.g. en-US, fr, de. Default: en-US.",
+ tool_mode=False,
+ input_types=[],
+ required=False,
+ advanced=True,
+ ),
+ MessageTextInput(
+ name="gl",
+ display_name="Country (gl)",
+ info="Country code, e.g. US, FR, DE. Default: US.",
+ tool_mode=False,
+ input_types=[],
+ required=False,
+ advanced=True,
+ ),
+ MessageTextInput(
+ name="ceid",
+ display_name="Country:Language (ceid)",
+ info="e.g. US:en, FR:fr. Default: US:en.",
+ tool_mode=False,
+ value="US:en",
+ input_types=[],
+ required=False,
+ advanced=True,
+ ),
+ MessageTextInput(
+ name="topic",
+ display_name="Topic",
+ info="One of: WORLD, NATION, BUSINESS, TECHNOLOGY, ENTERTAINMENT, SCIENCE, SPORTS, HEALTH.",
+ tool_mode=False,
+ input_types=[],
+ required=False,
+ advanced=True,
+ ),
+ MessageTextInput(
+ name="location",
+ display_name="Location (Geo)",
+ info="City, state, or country for location-based news. Leave blank for keyword search.",
+ tool_mode=False,
+ input_types=[],
+ required=False,
+ advanced=True,
+ ),
+ IntInput(
+ name="timeout",
+ display_name="Timeout",
+ info="Timeout for the request in seconds.",
+ value=5,
+ required=False,
+ advanced=True,
+ ),
+ ]
+
+ outputs = [Output(name="articles", display_name="News Articles", method="search_news")]
+
+ def search_news(self) -> DataFrame:
+ # Defaults
+ hl = getattr(self, "hl", None) or "en-US"
+ gl = getattr(self, "gl", None) or "US"
+ ceid = getattr(self, "ceid", None) or f"{gl}:{hl.split('-')[0]}"
+ topic = getattr(self, "topic", None)
+ location = getattr(self, "location", None)
+ query = getattr(self, "query", None)
+
+ # Build base URL
+ if topic:
+ # Topic-based feed
+ base_url = f"https://news.google.com/rss/headlines/section/topic/{quote_plus(topic.upper())}"
+ params = f"?hl={hl}&gl={gl}&ceid={ceid}"
+ rss_url = base_url + params
+ elif location:
+ # Location-based feed
+ base_url = f"https://news.google.com/rss/headlines/section/geo/{quote_plus(location)}"
+ params = f"?hl={hl}&gl={gl}&ceid={ceid}"
+ rss_url = base_url + params
+ elif query:
+ # Keyword search feed
+ base_url = "https://news.google.com/rss/search?q="
+ query_parts = [query]
+ query_encoded = quote_plus(" ".join(query_parts))
+ params = f"&hl={hl}&gl={gl}&ceid={ceid}"
+ rss_url = f"{base_url}{query_encoded}{params}"
+ else:
+ self.status = "No search query, topic, or location provided."
+ self.log(self.status)
+ return DataFrame(
+ pd.DataFrame(
+ [
+ {
+ "title": "Error",
+ "link": "",
+ "published": "",
+ "summary": "No search query, topic, or location provided.",
+ }
+ ]
+ )
+ )
+
+ try:
+ response = requests.get(rss_url, timeout=self.timeout)
+ response.raise_for_status()
+ soup = BeautifulSoup(response.content, "xml")
+ items = soup.find_all("item")
+ except requests.RequestException as e:
+ self.status = f"Failed to fetch news: {e}"
+ self.log(self.status)
+ return DataFrame(pd.DataFrame([{"title": "Error", "link": "", "published": "", "summary": str(e)}]))
+ except (AttributeError, ValueError, TypeError) as e:
+ self.status = f"Unexpected error: {e!s}"
+ self.log(self.status)
+ return DataFrame(pd.DataFrame([{"title": "Error", "link": "", "published": "", "summary": str(e)}]))
+
+ if not items:
+ self.status = "No news articles found."
+ self.log(self.status)
+ return DataFrame(pd.DataFrame([{"title": "No articles found", "link": "", "published": "", "summary": ""}]))
+
+ articles = []
+ for item in items:
+ try:
+ title = self.clean_html(item.title.text if item.title else "")
+ link = item.link.text if item.link else ""
+ published = item.pubDate.text if item.pubDate else ""
+ summary = self.clean_html(item.description.text if item.description else "")
+ articles.append({"title": title, "link": link, "published": published, "summary": summary})
+ except (AttributeError, ValueError, TypeError) as e:
+ self.log(f"Error parsing article: {e!s}")
+ continue
+
+ df_articles = pd.DataFrame(articles)
+ self.log(f"Found {len(df_articles)} articles.")
+ return DataFrame(df_articles)
+
+ def clean_html(self, html_string: str) -> str:
+ return BeautifulSoup(html_string, "html.parser").get_text(separator=" ", strip=True)
diff --git a/src/backend/tests/unit/components/data/test_news_search.py b/src/backend/tests/unit/components/data/test_news_search.py
new file mode 100644
index 000000000..bb58dcf55
--- /dev/null
+++ b/src/backend/tests/unit/components/data/test_news_search.py
@@ -0,0 +1,88 @@
+from unittest.mock import Mock, patch
+
+import pytest
+import requests
+from langflow.components.data.news_search import NewsSearchComponent
+from langflow.schema import DataFrame
+
+from tests.base import ComponentTestBaseWithoutClient
+
+
+class TestNewsSearchComponent(ComponentTestBaseWithoutClient):
+ @pytest.fixture
+ def component_class(self):
+ return NewsSearchComponent
+
+ @pytest.fixture
+ def default_kwargs(self):
+ return {"query": "OpenAI"}
+
+ @pytest.fixture
+ def file_names_mapping(self):
+ return []
+
+ def test_successful_news_search(self):
+ # Mock Google News RSS feed content
+ mock_rss_content = """
+
+
+
+ -
+ Test News 1
+ https://example.com/1
+ 2024-03-20
+ Summary 1
+
+ -
+ Test News 2
+ https://example.com/2
+ 2024-03-21
+ Summary 2
+
+
+
+ """
+ mock_response = Mock()
+ mock_response.content = mock_rss_content.encode("utf-8")
+ mock_response.raise_for_status = Mock()
+
+ with patch("requests.get", return_value=mock_response):
+ component = NewsSearchComponent(query="OpenAI")
+ result = component.search_news()
+ assert isinstance(result, DataFrame)
+ df = result
+ assert len(df) == 2
+ assert list(df.columns) == ["title", "link", "published", "summary"]
+ assert df.iloc[0]["title"] == "Test News 1"
+ assert df.iloc[1]["title"] == "Test News 2"
+
+ def test_news_search_error(self):
+ with patch("requests.get", side_effect=requests.RequestException("Network error")):
+ component = NewsSearchComponent(query="OpenAI")
+ result = component.search_news()
+ assert isinstance(result, DataFrame)
+ df = result
+ assert len(df) == 1
+ assert df.iloc[0]["title"] == "Error"
+ assert "Network error" in df.iloc[0]["summary"]
+
+ def test_empty_news_results(self):
+ # Mock empty RSS feed
+ mock_rss_content = """
+
+
+
+
+
+ """
+ mock_response = Mock()
+ mock_response.content = mock_rss_content.encode("utf-8")
+ mock_response.raise_for_status = Mock()
+
+ with patch("requests.get", return_value=mock_response):
+ component = NewsSearchComponent(query="OpenAI")
+ result = component.search_news()
+ assert isinstance(result, DataFrame)
+ df = result
+ assert len(df) == 1
+ assert df.iloc[0]["title"] == "No articles found"