feat: integration of scrapegraph apis (#5551)

* feat: integration of scrapegraph apis

* feat: refactoring of descriptions

* udpate uv lock

* [autofix.ci] apply automated fixes

* pyproject update

* did make format_backend

---------

Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
Co-authored-by: Gabriel Luiz Freitas Almeida <gabriel@langflow.org>
This commit is contained in:
Marco Vinciguerra 2025-01-22 21:16:58 +01:00 committed by GitHub
commit dc8e40cb2d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 324 additions and 83 deletions

View file

@ -0,0 +1,4 @@
from .scrapegraph_markdownify_api import ScrapeGraphMarkdownifyApi
from .scrapegraph_smart_scraper_api import ScrapeGraphSmartScraperApi
__all__ = ["ScrapeGraphMarkdownifyApi", "ScrapeGraphSmartScraperApi"]

View file

@ -0,0 +1,66 @@
from langflow.custom import Component
from langflow.io import (
Output,
SecretStrInput,
StrInput,
)
from langflow.schema import Data
class ScrapeGraphMarkdownifyApi(Component):
display_name: str = "ScrapeGraphMarkdownifyApi"
description: str = """ScrapeGraph Markdownify API.
Given a URL, it will return the markdownified content of the website.
More info at https://docs.scrapegraphai.com/services/markdownify"""
name = "ScrapeGraphMarkdownifyApi"
output_types: list[str] = ["Document"]
documentation: str = "https://docs.scrapegraphai.com/introduction"
inputs = [
SecretStrInput(
name="api_key",
display_name="ScrapeGraph API Key",
required=True,
password=True,
info="The API key to use ScrapeGraph API.",
),
StrInput(
name="url",
display_name="URL",
required=True,
info="The URL to markdownify.",
),
]
outputs = [
Output(display_name="Data", name="data", method="scrape"),
]
def scrape(self) -> list[Data]:
try:
from scrapegraph_py import Client
from scrapegraph_py.logger import sgai_logger
except ImportError as e:
msg = "Could not import scrapegraph-py package. Please install it with `pip install scrapegraph-py`."
raise ImportError(msg) from e
# Set logging level
sgai_logger.set_logging(level="INFO")
# Initialize the client with API key
sgai_client = Client(api_key=self.api_key)
try:
# Markdownify request
response = sgai_client.markdownify(
website_url=self.url,
)
# Close the client
sgai_client.close()
return Data(data=response)
except Exception:
sgai_client.close()
raise

View file

@ -0,0 +1,66 @@
from langflow.custom import Component
from langflow.io import (
Output,
SecretStrInput,
StrInput,
)
from langflow.schema import Data
class ScrapeGraphSmartScraperApi(Component):
display_name: str = "ScrapeGraphSmartScraperApi"
description: str = """ScrapeGraph Smart Scraper API.
Given a URL, it will return the structured data of the website.
More info at https://docs.scrapegraphai.com/services/smartscraper"""
name = "ScrapeGraphSmartScraperApi"
output_types: list[str] = ["Document"]
documentation: str = "https://docs.scrapegraphai.com/introduction"
inputs = [
SecretStrInput(
name="api_key",
display_name="ScrapeGraph API Key",
required=True,
password=True,
info="The API key to use ScrapeGraph API.",
),
StrInput(
name="url",
display_name="URL",
required=True,
info="The URL to scrape.",
),
]
outputs = [
Output(display_name="Data", name="data", method="scrape"),
]
def scrape(self) -> list[Data]:
try:
from scrapegraph_py import Client
from scrapegraph_py.logger import sgai_logger
except ImportError as e:
msg = "Could not import scrapegraph-py package. Please install it with `pip install scrapegraph-py`."
raise ImportError(msg) from e
# Set logging level
sgai_logger.set_logging(level="INFO")
# Initialize the client with API key
sgai_client = Client(api_key=self.api_key)
try:
# SmartScraper request
response = sgai_client.smartscraper(
website_url=self.url,
)
# Close the client
sgai_client.close()
return Data(data=response)
except Exception:
sgai_client.close()
raise