feat: ScrapeGraph AI component updates and add new component ScrapgraphSearch API (#6305)
* feat: add search * Update __init__.py * Update pyproject.toml * feat: update scraegraph components * Update scrapegraph_smart_scraper_api.py * Update scrapegraph_smart_scraper_api.py * removed required * Update scrapegraph_smart_scraper_api.py * formatting
This commit is contained in:
parent
ec445ce7df
commit
898775c36d
6 changed files with 104 additions and 30 deletions
|
|
@ -109,7 +109,7 @@ dependencies = [
|
|||
"mcp>=0.9.1",
|
||||
"uv>=0.5.7",
|
||||
"ag2>=0.1.0",
|
||||
"scrapegraph-py>=1.10.2",
|
||||
"scrapegraph-py>=1.12.0",
|
||||
"pydantic-ai>=0.0.19",
|
||||
]
|
||||
|
||||
|
|
|
|||
|
|
@ -1,4 +1,5 @@
|
|||
from .scrapegraph_markdownify_api import ScrapeGraphMarkdownifyApi
|
||||
from .scrapegraph_search_api import ScrapeGraphSearchApi
|
||||
from .scrapegraph_smart_scraper_api import ScrapeGraphSmartScraperApi
|
||||
|
||||
__all__ = ["ScrapeGraphMarkdownifyApi", "ScrapeGraphSmartScraperApi"]
|
||||
__all__ = ["ScrapeGraphMarkdownifyApi", "ScrapeGraphSearchApi", "ScrapeGraphSmartScraperApi"]
|
||||
|
|
|
|||
|
|
@ -1,8 +1,8 @@
|
|||
from langflow.custom import Component
|
||||
from langflow.io import (
|
||||
MessageTextInput,
|
||||
Output,
|
||||
SecretStrInput,
|
||||
StrInput,
|
||||
)
|
||||
from langflow.schema import Data
|
||||
|
||||
|
|
@ -25,10 +25,10 @@ class ScrapeGraphMarkdownifyApi(Component):
|
|||
password=True,
|
||||
info="The API key to use ScrapeGraph API.",
|
||||
),
|
||||
StrInput(
|
||||
MessageTextInput(
|
||||
name="url",
|
||||
display_name="URL",
|
||||
required=True,
|
||||
tool_mode=True,
|
||||
info="The URL to markdownify.",
|
||||
),
|
||||
]
|
||||
|
|
|
|||
|
|
@ -0,0 +1,66 @@
|
|||
from langflow.custom import Component
|
||||
from langflow.io import (
|
||||
MessageTextInput,
|
||||
Output,
|
||||
SecretStrInput,
|
||||
)
|
||||
from langflow.schema import Data
|
||||
|
||||
|
||||
class ScrapeGraphSearchApi(Component):
|
||||
display_name: str = "ScrapeGraphSearchApi"
|
||||
description: str = """ScrapeGraph Search API.
|
||||
Given a search prompt, it will return search results using ScrapeGraph's search functionality.
|
||||
More info at https://docs.scrapegraphai.com/services/searchscraper"""
|
||||
name = "ScrapeGraphSearchApi"
|
||||
|
||||
documentation: str = "https://docs.scrapegraphai.com/introduction"
|
||||
icon = "ScrapeGraph"
|
||||
|
||||
inputs = [
|
||||
SecretStrInput(
|
||||
name="api_key",
|
||||
display_name="ScrapeGraph API Key",
|
||||
required=True,
|
||||
password=True,
|
||||
info="The API key to use ScrapeGraph API.",
|
||||
),
|
||||
MessageTextInput(
|
||||
name="user_prompt",
|
||||
display_name="Search Prompt",
|
||||
tool_mode=True,
|
||||
info="The search prompt to use.",
|
||||
),
|
||||
]
|
||||
|
||||
outputs = [
|
||||
Output(display_name="Data", name="data", method="search"),
|
||||
]
|
||||
|
||||
def search(self) -> list[Data]:
|
||||
try:
|
||||
from scrapegraph_py import Client
|
||||
from scrapegraph_py.logger import sgai_logger
|
||||
except ImportError as e:
|
||||
msg = "Could not import scrapegraph-py package. Please install it with `pip install scrapegraph-py`."
|
||||
raise ImportError(msg) from e
|
||||
|
||||
# Set logging level
|
||||
sgai_logger.set_logging(level="INFO")
|
||||
|
||||
# Initialize the client with API key
|
||||
sgai_client = Client(api_key=self.api_key)
|
||||
|
||||
try:
|
||||
# SearchScraper request
|
||||
response = sgai_client.searchscraper(
|
||||
user_prompt=self.user_prompt,
|
||||
)
|
||||
|
||||
# Close the client
|
||||
sgai_client.close()
|
||||
|
||||
return Data(data=response)
|
||||
except Exception:
|
||||
sgai_client.close()
|
||||
raise
|
||||
|
|
@ -1,8 +1,8 @@
|
|||
from langflow.custom import Component
|
||||
from langflow.io import (
|
||||
MessageTextInput,
|
||||
Output,
|
||||
SecretStrInput,
|
||||
StrInput,
|
||||
)
|
||||
from langflow.schema import Data
|
||||
|
||||
|
|
@ -25,12 +25,18 @@ class ScrapeGraphSmartScraperApi(Component):
|
|||
password=True,
|
||||
info="The API key to use ScrapeGraph API.",
|
||||
),
|
||||
StrInput(
|
||||
MessageTextInput(
|
||||
name="url",
|
||||
display_name="URL",
|
||||
required=True,
|
||||
tool_mode=True,
|
||||
info="The URL to scrape.",
|
||||
),
|
||||
MessageTextInput(
|
||||
name="prompt",
|
||||
display_name="Prompt",
|
||||
tool_mode=True,
|
||||
info="The prompt to use for the smart scraper.",
|
||||
),
|
||||
]
|
||||
|
||||
outputs = [
|
||||
|
|
@ -55,6 +61,7 @@ class ScrapeGraphSmartScraperApi(Component):
|
|||
# SmartScraper request
|
||||
response = sgai_client.smartscraper(
|
||||
website_url=self.url,
|
||||
user_prompt=self.prompt,
|
||||
)
|
||||
|
||||
# Close the client
|
||||
|
|
|
|||
44
uv.lock
generated
44
uv.lock
generated
|
|
@ -512,7 +512,7 @@ name = "blessed"
|
|||
version = "1.20.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "jinxed", marker = "sys_platform == 'win32'" },
|
||||
{ name = "jinxed", marker = "platform_system == 'Windows'" },
|
||||
{ name = "six" },
|
||||
{ name = "wcwidth" },
|
||||
]
|
||||
|
|
@ -906,7 +906,7 @@ name = "click"
|
|||
version = "8.1.8"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "colorama", marker = "sys_platform == 'win32'" },
|
||||
{ name = "colorama", marker = "platform_system == 'Windows'" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/b9/2e/0090cbf739cee7d23781ad4b89a9894a41538e4fcf4c31dcdd705b78eb8b/click-8.1.8.tar.gz", hash = "sha256:ed53c9d8990d83c2a27deae68e4ee337473f6330c040a31d4225c9574d16096a", size = 226593 }
|
||||
wheels = [
|
||||
|
|
@ -3005,7 +3005,7 @@ name = "ipykernel"
|
|||
version = "6.29.5"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "appnope", marker = "sys_platform == 'darwin'" },
|
||||
{ name = "appnope", marker = "platform_system == 'Darwin'" },
|
||||
{ name = "comm" },
|
||||
{ name = "debugpy" },
|
||||
{ name = "ipython" },
|
||||
|
|
@ -3096,7 +3096,7 @@ name = "jinxed"
|
|||
version = "1.3.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "ansicon", marker = "sys_platform == 'win32'" },
|
||||
{ name = "ansicon", marker = "platform_system == 'Windows'" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/20/d0/59b2b80e7a52d255f9e0ad040d2e826342d05580c4b1d7d7747cfb8db731/jinxed-1.3.0.tar.gz", hash = "sha256:1593124b18a41b7a3da3b078471442e51dbad3d77b4d4f2b0c26ab6f7d660dbf", size = 80981 }
|
||||
wheels = [
|
||||
|
|
@ -4048,7 +4048,7 @@ requires-dist = [
|
|||
{ name = "qianfan", specifier = "==0.3.5" },
|
||||
{ name = "ragstack-ai-knowledge-store", specifier = "==0.2.1" },
|
||||
{ name = "redis", specifier = "==5.2.1" },
|
||||
{ name = "scrapegraph-py", specifier = ">=1.10.2" },
|
||||
{ name = "scrapegraph-py", specifier = ">=1.12.0" },
|
||||
{ name = "sentence-transformers", marker = "extra == 'local'", specifier = ">=2.3.1" },
|
||||
{ name = "spider-client", specifier = "==0.1.24" },
|
||||
{ name = "sqlalchemy", extras = ["aiosqlite", "postgresql-psycopg2binary", "postgresql-psycopgbinary"], specifier = ">=2.0.38,<3.0.0" },
|
||||
|
|
@ -5933,7 +5933,7 @@ name = "portalocker"
|
|||
version = "2.10.1"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "pywin32", marker = "sys_platform == 'win32'" },
|
||||
{ name = "pywin32", marker = "platform_system == 'Windows'" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/ed/d3/c6c64067759e87af98cc668c1cc75171347d0f1577fab7ca3749134e3cd4/portalocker-2.10.1.tar.gz", hash = "sha256:ef1bf844e878ab08aee7e40184156e1151f228f103aa5c6bd0724cc330960f8f", size = 40891 }
|
||||
wheels = [
|
||||
|
|
@ -7645,7 +7645,7 @@ wheels = [
|
|||
|
||||
[[package]]
|
||||
name = "scrapegraph-py"
|
||||
version = "1.10.2"
|
||||
version = "1.12.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "aiohttp" },
|
||||
|
|
@ -7654,9 +7654,9 @@ dependencies = [
|
|||
{ name = "python-dotenv" },
|
||||
{ name = "requests" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/07/25/e0a55d0b5bced151f30203bf9c187dd52725b2cd4dd0b0c195fff97f8a47/scrapegraph_py-1.10.2.tar.gz", hash = "sha256:42eb27de0da25b9b912a5b4da9851398b79dec147af749fea4a36f54511a5e8b", size = 110397 }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/42/36/10546d18157cd2efb2de09098fac8dd6f689b98842a4cc71bb7fc29ba4b9/scrapegraph_py-1.12.0.tar.gz", hash = "sha256:82d27e8ea325975f768f80d4edf403b6294518dae6a1e3ae63e27b8934a5dacb", size = 113290 }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/c0/7e/7c31a844c2f944fd7851bc4d2c2924fa21e30cd0a274a21a8460da0b4350/scrapegraph_py-1.10.2-py3-none-any.whl", hash = "sha256:c69baa8d4f1b21f4f728705b4798246f728080e9d6eaf6a90c203991df872dfc", size = 14523 },
|
||||
{ url = "https://files.pythonhosted.org/packages/11/a8/8610143e9ebad9596e402260f63cbb6168f99719f07e13847b1df5a28f4d/scrapegraph_py-1.12.0-py3-none-any.whl", hash = "sha256:fd74d091529d3f8f5ba057950333e15a48ac5c0be7e2a56a8f2bad04cebdac30", size = 15458 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
|
@ -8292,19 +8292,19 @@ dependencies = [
|
|||
{ name = "fsspec" },
|
||||
{ name = "jinja2" },
|
||||
{ name = "networkx" },
|
||||
{ name = "nvidia-cublas-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
|
||||
{ name = "nvidia-cuda-cupti-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
|
||||
{ name = "nvidia-cuda-nvrtc-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
|
||||
{ name = "nvidia-cuda-runtime-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
|
||||
{ name = "nvidia-cudnn-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
|
||||
{ name = "nvidia-cufft-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
|
||||
{ name = "nvidia-curand-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
|
||||
{ name = "nvidia-cusolver-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
|
||||
{ name = "nvidia-cusparse-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
|
||||
{ name = "nvidia-nccl-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
|
||||
{ name = "nvidia-nvtx-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
|
||||
{ name = "nvidia-cublas-cu12", marker = "platform_machine == 'x86_64' and platform_system == 'Linux'" },
|
||||
{ name = "nvidia-cuda-cupti-cu12", marker = "platform_machine == 'x86_64' and platform_system == 'Linux'" },
|
||||
{ name = "nvidia-cuda-nvrtc-cu12", marker = "platform_machine == 'x86_64' and platform_system == 'Linux'" },
|
||||
{ name = "nvidia-cuda-runtime-cu12", marker = "platform_machine == 'x86_64' and platform_system == 'Linux'" },
|
||||
{ name = "nvidia-cudnn-cu12", marker = "platform_machine == 'x86_64' and platform_system == 'Linux'" },
|
||||
{ name = "nvidia-cufft-cu12", marker = "platform_machine == 'x86_64' and platform_system == 'Linux'" },
|
||||
{ name = "nvidia-curand-cu12", marker = "platform_machine == 'x86_64' and platform_system == 'Linux'" },
|
||||
{ name = "nvidia-cusolver-cu12", marker = "platform_machine == 'x86_64' and platform_system == 'Linux'" },
|
||||
{ name = "nvidia-cusparse-cu12", marker = "platform_machine == 'x86_64' and platform_system == 'Linux'" },
|
||||
{ name = "nvidia-nccl-cu12", marker = "platform_machine == 'x86_64' and platform_system == 'Linux'" },
|
||||
{ name = "nvidia-nvtx-cu12", marker = "platform_machine == 'x86_64' and platform_system == 'Linux'" },
|
||||
{ name = "sympy" },
|
||||
{ name = "triton", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
|
||||
{ name = "triton", marker = "platform_machine == 'x86_64' and platform_system == 'Linux'" },
|
||||
{ name = "typing-extensions" },
|
||||
]
|
||||
wheels = [
|
||||
|
|
@ -8345,7 +8345,7 @@ name = "tqdm"
|
|||
version = "4.67.1"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "colorama", marker = "sys_platform == 'win32'" },
|
||||
{ name = "colorama", marker = "platform_system == 'Windows'" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/a8/4b/29b4ef32e036bb34e4ab51796dd745cdba7ed47ad142a9f4a1eb8e0c744d/tqdm-4.67.1.tar.gz", hash = "sha256:f8aef9c52c08c13a65f30ea34f4e5aac3fd1a34959879d7e59e63027286627f2", size = 169737 }
|
||||
wheels = [
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue