diff --git a/pyproject.toml b/pyproject.toml index 76cf85d52..302f59a30 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -109,7 +109,7 @@ dependencies = [ "mcp>=0.9.1", "uv>=0.5.7", "ag2>=0.1.0", - "scrapegraph-py>=1.10.2", + "scrapegraph-py>=1.12.0", "pydantic-ai>=0.0.19", ] diff --git a/src/backend/base/langflow/components/scrapegraph/__init__.py b/src/backend/base/langflow/components/scrapegraph/__init__.py index 683da9601..f14fca52f 100644 --- a/src/backend/base/langflow/components/scrapegraph/__init__.py +++ b/src/backend/base/langflow/components/scrapegraph/__init__.py @@ -1,4 +1,5 @@ from .scrapegraph_markdownify_api import ScrapeGraphMarkdownifyApi +from .scrapegraph_search_api import ScrapeGraphSearchApi from .scrapegraph_smart_scraper_api import ScrapeGraphSmartScraperApi -__all__ = ["ScrapeGraphMarkdownifyApi", "ScrapeGraphSmartScraperApi"] +__all__ = ["ScrapeGraphMarkdownifyApi", "ScrapeGraphSearchApi", "ScrapeGraphSmartScraperApi"] diff --git a/src/backend/base/langflow/components/scrapegraph/scrapegraph_markdownify_api.py b/src/backend/base/langflow/components/scrapegraph/scrapegraph_markdownify_api.py index d945b97d1..79ffff8bd 100644 --- a/src/backend/base/langflow/components/scrapegraph/scrapegraph_markdownify_api.py +++ b/src/backend/base/langflow/components/scrapegraph/scrapegraph_markdownify_api.py @@ -1,8 +1,8 @@ from langflow.custom import Component from langflow.io import ( + MessageTextInput, Output, SecretStrInput, - StrInput, ) from langflow.schema import Data @@ -25,10 +25,10 @@ class ScrapeGraphMarkdownifyApi(Component): password=True, info="The API key to use ScrapeGraph API.", ), - StrInput( + MessageTextInput( name="url", display_name="URL", - required=True, + tool_mode=True, info="The URL to markdownify.", ), ] diff --git a/src/backend/base/langflow/components/scrapegraph/scrapegraph_search_api.py b/src/backend/base/langflow/components/scrapegraph/scrapegraph_search_api.py new file mode 100644 index 000000000..22778ce68 --- /dev/null +++ b/src/backend/base/langflow/components/scrapegraph/scrapegraph_search_api.py @@ -0,0 +1,66 @@ +from langflow.custom import Component +from langflow.io import ( + MessageTextInput, + Output, + SecretStrInput, +) +from langflow.schema import Data + + +class ScrapeGraphSearchApi(Component): + display_name: str = "ScrapeGraphSearchApi" + description: str = """ScrapeGraph Search API. + Given a search prompt, it will return search results using ScrapeGraph's search functionality. + More info at https://docs.scrapegraphai.com/services/searchscraper""" + name = "ScrapeGraphSearchApi" + + documentation: str = "https://docs.scrapegraphai.com/introduction" + icon = "ScrapeGraph" + + inputs = [ + SecretStrInput( + name="api_key", + display_name="ScrapeGraph API Key", + required=True, + password=True, + info="The API key to use ScrapeGraph API.", + ), + MessageTextInput( + name="user_prompt", + display_name="Search Prompt", + tool_mode=True, + info="The search prompt to use.", + ), + ] + + outputs = [ + Output(display_name="Data", name="data", method="search"), + ] + + def search(self) -> list[Data]: + try: + from scrapegraph_py import Client + from scrapegraph_py.logger import sgai_logger + except ImportError as e: + msg = "Could not import scrapegraph-py package. Please install it with `pip install scrapegraph-py`." + raise ImportError(msg) from e + + # Set logging level + sgai_logger.set_logging(level="INFO") + + # Initialize the client with API key + sgai_client = Client(api_key=self.api_key) + + try: + # SearchScraper request + response = sgai_client.searchscraper( + user_prompt=self.user_prompt, + ) + + # Close the client + sgai_client.close() + + return Data(data=response) + except Exception: + sgai_client.close() + raise diff --git a/src/backend/base/langflow/components/scrapegraph/scrapegraph_smart_scraper_api.py b/src/backend/base/langflow/components/scrapegraph/scrapegraph_smart_scraper_api.py index 65e91c0c6..8a33f34b4 100644 --- a/src/backend/base/langflow/components/scrapegraph/scrapegraph_smart_scraper_api.py +++ b/src/backend/base/langflow/components/scrapegraph/scrapegraph_smart_scraper_api.py @@ -1,8 +1,8 @@ from langflow.custom import Component from langflow.io import ( + MessageTextInput, Output, SecretStrInput, - StrInput, ) from langflow.schema import Data @@ -25,12 +25,18 @@ class ScrapeGraphSmartScraperApi(Component): password=True, info="The API key to use ScrapeGraph API.", ), - StrInput( + MessageTextInput( name="url", display_name="URL", - required=True, + tool_mode=True, info="The URL to scrape.", ), + MessageTextInput( + name="prompt", + display_name="Prompt", + tool_mode=True, + info="The prompt to use for the smart scraper.", + ), ] outputs = [ @@ -55,6 +61,7 @@ class ScrapeGraphSmartScraperApi(Component): # SmartScraper request response = sgai_client.smartscraper( website_url=self.url, + user_prompt=self.prompt, ) # Close the client diff --git a/uv.lock b/uv.lock index 6c9eadcbc..39e26a22c 100644 --- a/uv.lock +++ b/uv.lock @@ -512,7 +512,7 @@ name = "blessed" version = "1.20.0" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "jinxed", marker = "sys_platform == 'win32'" }, + { name = "jinxed", marker = "platform_system == 'Windows'" }, { name = "six" }, { name = "wcwidth" }, ] @@ -906,7 +906,7 @@ name = "click" version = "8.1.8" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "colorama", marker = "sys_platform == 'win32'" }, + { name = "colorama", marker = "platform_system == 'Windows'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/b9/2e/0090cbf739cee7d23781ad4b89a9894a41538e4fcf4c31dcdd705b78eb8b/click-8.1.8.tar.gz", hash = "sha256:ed53c9d8990d83c2a27deae68e4ee337473f6330c040a31d4225c9574d16096a", size = 226593 } wheels = [ @@ -3005,7 +3005,7 @@ name = "ipykernel" version = "6.29.5" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "appnope", marker = "sys_platform == 'darwin'" }, + { name = "appnope", marker = "platform_system == 'Darwin'" }, { name = "comm" }, { name = "debugpy" }, { name = "ipython" }, @@ -3096,7 +3096,7 @@ name = "jinxed" version = "1.3.0" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "ansicon", marker = "sys_platform == 'win32'" }, + { name = "ansicon", marker = "platform_system == 'Windows'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/20/d0/59b2b80e7a52d255f9e0ad040d2e826342d05580c4b1d7d7747cfb8db731/jinxed-1.3.0.tar.gz", hash = "sha256:1593124b18a41b7a3da3b078471442e51dbad3d77b4d4f2b0c26ab6f7d660dbf", size = 80981 } wheels = [ @@ -4048,7 +4048,7 @@ requires-dist = [ { name = "qianfan", specifier = "==0.3.5" }, { name = "ragstack-ai-knowledge-store", specifier = "==0.2.1" }, { name = "redis", specifier = "==5.2.1" }, - { name = "scrapegraph-py", specifier = ">=1.10.2" }, + { name = "scrapegraph-py", specifier = ">=1.12.0" }, { name = "sentence-transformers", marker = "extra == 'local'", specifier = ">=2.3.1" }, { name = "spider-client", specifier = "==0.1.24" }, { name = "sqlalchemy", extras = ["aiosqlite", "postgresql-psycopg2binary", "postgresql-psycopgbinary"], specifier = ">=2.0.38,<3.0.0" }, @@ -5933,7 +5933,7 @@ name = "portalocker" version = "2.10.1" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "pywin32", marker = "sys_platform == 'win32'" }, + { name = "pywin32", marker = "platform_system == 'Windows'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/ed/d3/c6c64067759e87af98cc668c1cc75171347d0f1577fab7ca3749134e3cd4/portalocker-2.10.1.tar.gz", hash = "sha256:ef1bf844e878ab08aee7e40184156e1151f228f103aa5c6bd0724cc330960f8f", size = 40891 } wheels = [ @@ -7645,7 +7645,7 @@ wheels = [ [[package]] name = "scrapegraph-py" -version = "1.10.2" +version = "1.12.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "aiohttp" }, @@ -7654,9 +7654,9 @@ dependencies = [ { name = "python-dotenv" }, { name = "requests" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/07/25/e0a55d0b5bced151f30203bf9c187dd52725b2cd4dd0b0c195fff97f8a47/scrapegraph_py-1.10.2.tar.gz", hash = "sha256:42eb27de0da25b9b912a5b4da9851398b79dec147af749fea4a36f54511a5e8b", size = 110397 } +sdist = { url = "https://files.pythonhosted.org/packages/42/36/10546d18157cd2efb2de09098fac8dd6f689b98842a4cc71bb7fc29ba4b9/scrapegraph_py-1.12.0.tar.gz", hash = "sha256:82d27e8ea325975f768f80d4edf403b6294518dae6a1e3ae63e27b8934a5dacb", size = 113290 } wheels = [ - { url = "https://files.pythonhosted.org/packages/c0/7e/7c31a844c2f944fd7851bc4d2c2924fa21e30cd0a274a21a8460da0b4350/scrapegraph_py-1.10.2-py3-none-any.whl", hash = "sha256:c69baa8d4f1b21f4f728705b4798246f728080e9d6eaf6a90c203991df872dfc", size = 14523 }, + { url = "https://files.pythonhosted.org/packages/11/a8/8610143e9ebad9596e402260f63cbb6168f99719f07e13847b1df5a28f4d/scrapegraph_py-1.12.0-py3-none-any.whl", hash = "sha256:fd74d091529d3f8f5ba057950333e15a48ac5c0be7e2a56a8f2bad04cebdac30", size = 15458 }, ] [[package]] @@ -8292,19 +8292,19 @@ dependencies = [ { name = "fsspec" }, { name = "jinja2" }, { name = "networkx" }, - { name = "nvidia-cublas-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-cuda-cupti-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-cuda-nvrtc-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-cuda-runtime-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-cudnn-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-cufft-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-curand-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-cusolver-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-cusparse-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-nccl-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-nvtx-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, + { name = "nvidia-cublas-cu12", marker = "platform_machine == 'x86_64' and platform_system == 'Linux'" }, + { name = "nvidia-cuda-cupti-cu12", marker = "platform_machine == 'x86_64' and platform_system == 'Linux'" }, + { name = "nvidia-cuda-nvrtc-cu12", marker = "platform_machine == 'x86_64' and platform_system == 'Linux'" }, + { name = "nvidia-cuda-runtime-cu12", marker = "platform_machine == 'x86_64' and platform_system == 'Linux'" }, + { name = "nvidia-cudnn-cu12", marker = "platform_machine == 'x86_64' and platform_system == 'Linux'" }, + { name = "nvidia-cufft-cu12", marker = "platform_machine == 'x86_64' and platform_system == 'Linux'" }, + { name = "nvidia-curand-cu12", marker = "platform_machine == 'x86_64' and platform_system == 'Linux'" }, + { name = "nvidia-cusolver-cu12", marker = "platform_machine == 'x86_64' and platform_system == 'Linux'" }, + { name = "nvidia-cusparse-cu12", marker = "platform_machine == 'x86_64' and platform_system == 'Linux'" }, + { name = "nvidia-nccl-cu12", marker = "platform_machine == 'x86_64' and platform_system == 'Linux'" }, + { name = "nvidia-nvtx-cu12", marker = "platform_machine == 'x86_64' and platform_system == 'Linux'" }, { name = "sympy" }, - { name = "triton", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, + { name = "triton", marker = "platform_machine == 'x86_64' and platform_system == 'Linux'" }, { name = "typing-extensions" }, ] wheels = [ @@ -8345,7 +8345,7 @@ name = "tqdm" version = "4.67.1" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "colorama", marker = "sys_platform == 'win32'" }, + { name = "colorama", marker = "platform_system == 'Windows'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/a8/4b/29b4ef32e036bb34e4ab51796dd745cdba7ed47ad142a9f4a1eb8e0c744d/tqdm-4.67.1.tar.gz", hash = "sha256:f8aef9c52c08c13a65f30ea34f4e5aac3fd1a34959879d7e59e63027286627f2", size = 169737 } wheels = [