Added Firecrawl integration (#2359)

Added `FirecrawlScraperApi` and `FirecrawlCrawlApi`.

Also updated the example on
https://github.com/langflow-ai/langflow_examples/pull/21
This commit is contained in:
Gabriel Luiz Freitas Almeida 2024-06-25 14:33:20 -07:00 committed by GitHub
commit 201de54eb9
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
10 changed files with 291 additions and 7 deletions

26
poetry.lock generated
View file

@ -2112,6 +2112,20 @@ files = [
{file = "filetype-1.2.0.tar.gz", hash = "sha256:66b56cd6474bf41d8c54660347d37afcc3f7d1970648de365c102ef77548aadb"},
]
[[package]]
name = "firecrawl-py"
version = "0.0.16"
description = "Python SDK for Firecrawl API"
optional = false
python-versions = ">=3.8"
files = [
{file = "firecrawl_py-0.0.16-py3-none-any.whl", hash = "sha256:9024f483b501852a6b9c4e6cdfc9e8dde452d922afac357080bb278a0c9c2a26"},
{file = "firecrawl_py-0.0.16.tar.gz", hash = "sha256:6c662fa0a549bc7f5c0acb704baba6731869ca0451094034264dfc1b4eb086e4"},
]
[package.dependencies]
requests = "*"
[[package]]
name = "flaml"
version = "2.1.2"
@ -2432,8 +2446,8 @@ files = [
[package.dependencies]
cffi = {version = ">=1.12.2", markers = "platform_python_implementation == \"CPython\" and sys_platform == \"win32\""}
greenlet = [
{version = ">=2.0.0", markers = "platform_python_implementation == \"CPython\" and python_version < \"3.11\""},
{version = ">=3.0rc3", markers = "platform_python_implementation == \"CPython\" and python_version >= \"3.11\""},
{version = ">=2.0.0", markers = "platform_python_implementation == \"CPython\" and python_version < \"3.11\""},
]
"zope.event" = "*"
"zope.interface" = "*"
@ -2592,12 +2606,12 @@ files = [
google-auth = ">=2.14.1,<3.0.dev0"
googleapis-common-protos = ">=1.56.2,<2.0.dev0"
grpcio = [
{version = ">=1.33.2,<2.0dev", optional = true, markers = "python_version < \"3.11\" and extra == \"grpc\""},
{version = ">=1.49.1,<2.0dev", optional = true, markers = "python_version >= \"3.11\" and extra == \"grpc\""},
{version = ">=1.33.2,<2.0dev", optional = true, markers = "python_version < \"3.11\" and extra == \"grpc\""},
]
grpcio-status = [
{version = ">=1.33.2,<2.0.dev0", optional = true, markers = "python_version < \"3.11\" and extra == \"grpc\""},
{version = ">=1.49.1,<2.0.dev0", optional = true, markers = "python_version >= \"3.11\" and extra == \"grpc\""},
{version = ">=1.33.2,<2.0.dev0", optional = true, markers = "python_version < \"3.11\" and extra == \"grpc\""},
]
proto-plus = ">=1.22.3,<2.0.0dev"
protobuf = ">=3.19.5,<3.20.0 || >3.20.0,<3.20.1 || >3.20.1,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<6.0.0.dev0"
@ -4614,8 +4628,8 @@ psutil = ">=5.9.1"
pywin32 = {version = "*", markers = "platform_system == \"Windows\""}
pyzmq = ">=25.0.0"
requests = [
{version = ">=2.26.0", markers = "python_version <= \"3.11\""},
{version = ">=2.32.2", markers = "python_version > \"3.11\""},
{version = ">=2.26.0", markers = "python_version <= \"3.11\""},
]
tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""}
typing-extensions = {version = ">=4.6.0", markers = "python_version < \"3.11\""}
@ -6080,9 +6094,9 @@ files = [
[package.dependencies]
numpy = [
{version = ">=1.26.0,<2", markers = "python_version >= \"3.12\""},
{version = ">=1.22.4,<2", markers = "python_version < \"3.11\""},
{version = ">=1.23.2,<2", markers = "python_version == \"3.11\""},
{version = ">=1.26.0,<2", markers = "python_version >= \"3.12\""},
]
python-dateutil = ">=2.8.2"
pytz = ">=2020.1"
@ -10552,4 +10566,4 @@ local = ["ctransformers", "llama-cpp-python", "sentence-transformers"]
[metadata]
lock-version = "2.0"
python-versions = ">=3.10,<3.13"
content-hash = "f7377e3a997651cbcec2b9227b0bcde2507afc7d6236b708f4dc62857f150578"
content-hash = "3e72b6faa1c674615a7e5dec3e7d962349e736bf6675c08a49080b7f336cc75b"

View file

@ -92,6 +92,7 @@ unstructured = {extras = ["docx", "md", "pptx"], version = "^0.14.4"}
langchain-aws = "^0.1.6"
langchain-mongodb = "^0.1.6"
kubernetes = "^30.1.0"
firecrawl-py = "^0.0.16"
[tool.poetry.group.dev.dependencies]

View file

@ -0,0 +1,81 @@
from typing import Optional
from firecrawl.firecrawl import FirecrawlApp
from langflow.custom import CustomComponent
from langflow.schema import Data
import uuid
class FirecrawlCrawlApi(CustomComponent):
display_name: str = "FirecrawlCrawlApi"
description: str = "Firecrawl Crawl API."
output_types: list[str] = ["Document"]
documentation: str = "https://docs.firecrawl.dev/api-reference/endpoint/crawl"
field_config = {
"api_key": {
"display_name": "API Key",
"field_type": "str",
"required": True,
"password": True,
"info": "The API key to use Firecrawl API.",
},
"url": {
"display_name": "URL",
"field_type": "str",
"required": True,
"info": "The base URL to start crawling from.",
},
"timeout": {
"display_name": "Timeout",
"field_type": "int",
"info": "The timeout in milliseconds.",
},
"crawlerOptions": {
"display_name": "Crawler Options",
"info": "Options for the crawler behavior.",
},
"pageOptions": {
"display_name": "Page Options",
"info": "The page options to send with the request.",
},
"idempotency_key": {
"display_name": "Idempotency Key",
"field_type": "str",
"info": "Optional idempotency key to ensure unique requests.",
},
}
def build(
self,
api_key: str,
url: str,
timeout: Optional[int] = 30000,
crawlerOptions: Optional[Data] = None,
pageOptions: Optional[Data] = None,
idempotency_key: Optional[str] = None,
) -> Data:
if crawlerOptions:
crawler_options_dict = crawlerOptions.__dict__['data']['text']
else:
crawler_options_dict = {}
if pageOptions:
page_options_dict = pageOptions.__dict__['data']['text']
else:
page_options_dict = {}
if not idempotency_key:
idempotency_key = str(uuid.uuid4())
app = FirecrawlApp(api_key=api_key)
crawl_result = app.crawl_url(
url,
{
"crawlerOptions": crawler_options_dict,
"pageOptions": page_options_dict,
},
True,
int(timeout / 1000),
idempotency_key
)
records = Data(data={"results": crawl_result})
return records

View file

@ -0,0 +1,72 @@
from typing import Optional
from firecrawl.firecrawl import FirecrawlApp
from langflow.custom import CustomComponent
from langflow.schema import Data
from langflow.services.database.models.base import orjson_dumps
import json
class FirecrawlScrapeApi(CustomComponent):
display_name: str = "FirecrawlScrapeApi"
description: str = "Firecrawl Scrape API."
output_types: list[str] = ["Document"]
documentation: str = "https://docs.firecrawl.dev/api-reference/endpoint/scrape"
field_config = {
"api_key": {
"display_name": "API Key",
"field_type": "str",
"required": True,
"password": True,
"info": "The API key to use Firecrawl API.",
},
"url": {
"display_name": "URL",
"field_type": "str",
"required": True,
"info": "The URL to scrape.",
},
"timeout": {
"display_name": "Timeout",
"info": "Timeout in milliseconds for the request.",
"field_type": "int",
"default_value": 10000,
},
"pageOptions": {
"display_name": "Page Options",
"info": "The page options to send with the request.",
},
"extractorOptions": {
"display_name": "Extractor Options",
"info": "The extractor options to send with the request.",
},
}
def build(
self,
api_key: str,
url: str,
timeout: Optional[int] = 10000,
pageOptions: Optional[Data] = None,
extractorOptions: Optional[Data] = None,
) -> Data:
if extractorOptions:
extractor_options_dict = extractorOptions.__dict__['data']['text']
else:
extractor_options_dict = {}
if pageOptions:
page_options_dict = pageOptions.__dict__['data']['text']
else:
page_options_dict = {}
app = FirecrawlApp(api_key=api_key)
results = app.scrape_url(
url,
{
"timeout": str(timeout),
"extractorOptions": extractor_options_dict,
"pageOptions": page_options_dict,
},
)
record = Data(data=results)
return record

View file

@ -739,6 +739,20 @@ typer = ">=0.12.3"
[package.extras]
standard = ["fastapi", "uvicorn[standard] (>=0.15.0)"]
[[package]]
name = "firecrawl-py"
version = "0.0.16"
description = "Python SDK for Firecrawl API"
optional = false
python-versions = ">=3.8"
files = [
{file = "firecrawl_py-0.0.16-py3-none-any.whl", hash = "sha256:9024f483b501852a6b9c4e6cdfc9e8dde452d922afac357080bb278a0c9c2a26"},
{file = "firecrawl_py-0.0.16.tar.gz", hash = "sha256:6c662fa0a549bc7f5c0acb704baba6731869ca0451094034264dfc1b4eb086e4"},
]
[package.dependencies]
requests = "*"
[[package]]
name = "frozenlist"
version = "1.4.1"
@ -3235,4 +3249,4 @@ local = []
[metadata]
lock-version = "2.0"
python-versions = ">=3.10,<3.13"
content-hash = "4f566531a8539ddc81cb91a7e7f9b723c84679f0af5bb8619f7b02f9ffc6cfaa"
content-hash = "7e46144d27c633214f00e73e496c0e4d56db1fb47032a21861677ec275b79d86"

View file

@ -64,6 +64,7 @@ pyperclip = "^1.8.2"
uncurl = "^0.0.11"
sentry-sdk = {extras = ["fastapi", "loguru"], version = "^2.5.1"}
chardet = "^5.2.0"
firecrawl-py = "^0.0.16"
[tool.poetry.extras]

View file

@ -0,0 +1,61 @@
const SvgFirecrawlLogo = (props) => (
<svg
viewBox="-33 0 255 255"
width="24"
height="24"
xmlns="http://www.w3.org/2000/svg"
xmlnsXlink="http://www.w3.org/1999/xlink"
preserveAspectRatio="xMidYMid"
>
<defs>
<style>
{`
.cls-3 {
fill: url(#linear-gradient-1);
}
.cls-4 {
fill: #fc9502;
}
.cls-5 {
fill: #fce202;
}
`}
</style>
<linearGradient
id="linear-gradient-1"
gradientUnits="userSpaceOnUse"
x1="94.141"
y1="255"
x2="94.141"
y2="0.188"
>
<stop offset="0" stopColor="#ff4c0d" />
<stop offset="1" stopColor="#fc9502" />
</linearGradient>
</defs>
<g id="fire">
<path
d="M187.899,164.809 C185.803,214.868 144.574,254.812 94.000,254.812 C42.085,254.812 -0.000,211.312 -0.000,160.812 C-0.000,154.062 -0.121,140.572 10.000,117.812 C16.057,104.191 19.856,95.634 22.000,87.812 C23.178,83.513 25.469,76.683 32.000,87.812 C35.851,94.374 36.000,103.812 36.000,103.812 C36.000,103.812 50.328,92.817 60.000,71.812 C74.179,41.019 62.866,22.612 59.000,9.812 C57.662,5.384 56.822,-2.574 66.000,0.812 C75.352,4.263 100.076,21.570 113.000,39.812 C131.445,65.847 138.000,90.812 138.000,90.812 C138.000,90.812 143.906,83.482 146.000,75.812 C148.365,67.151 148.400,58.573 155.999,67.813 C163.226,76.600 173.959,93.113 180.000,108.812 C190.969,137.321 187.899,164.809 187.899,164.809 Z"
id="path-1"
className="cls-3"
fillRule="evenodd"
/>
<path
d="M94.000,254.812 C58.101,254.812 29.000,225.711 29.000,189.812 C29.000,168.151 37.729,155.000 55.896,137.166 C67.528,125.747 78.415,111.722 83.042,102.172 C83.953,100.292 86.026,90.495 94.019,101.966 C98.212,107.982 104.785,118.681 109.000,127.812 C116.266,143.555 118.000,158.812 118.000,158.812 C118.000,158.812 125.121,154.616 130.000,143.812 C131.573,140.330 134.753,127.148 143.643,140.328 C150.166,150.000 159.127,167.390 159.000,189.812 C159.000,225.711 129.898,254.812 94.000,254.812 Z"
id="path-2"
className="cls-4"
fillRule="evenodd"
/>
<path
d="M95.000,183.812 C104.250,183.812 104.250,200.941 116.000,223.812 C123.824,239.041 112.121,254.812 95.000,254.812 C77.879,254.812 69.000,240.933 69.000,223.812 C69.000,206.692 85.750,183.812 95.000,183.812 Z"
id="path-3"
className="cls-5"
fillRule="evenodd"
/>
</g>
</svg>
);
export default SvgFirecrawlLogo;

View file

@ -0,0 +1,28 @@
<svg width="800px" height="800px" viewBox="-33 0 255 255" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" preserveAspectRatio="xMidYMid">
<defs>
<style>
.cls-3 {
fill: url(#linear-gradient-1);
}
.cls-4 {
fill: #fc9502;
}
.cls-5 {
fill: #fce202;
}
</style>
<linearGradient id="linear-gradient-1" gradientUnits="userSpaceOnUse" x1="94.141" y1="255" x2="94.141" y2="0.188">
<stop offset="0" stop-color="#ff4c0d"/>
<stop offset="1" stop-color="#fc9502"/>
</linearGradient>
</defs>
<g id="fire">
<path d="M187.899,164.809 C185.803,214.868 144.574,254.812 94.000,254.812 C42.085,254.812 -0.000,211.312 -0.000,160.812 C-0.000,154.062 -0.121,140.572 10.000,117.812 C16.057,104.191 19.856,95.634 22.000,87.812 C23.178,83.513 25.469,76.683 32.000,87.812 C35.851,94.374 36.000,103.812 36.000,103.812 C36.000,103.812 50.328,92.817 60.000,71.812 C74.179,41.019 62.866,22.612 59.000,9.812 C57.662,5.384 56.822,-2.574 66.000,0.812 C75.352,4.263 100.076,21.570 113.000,39.812 C131.445,65.847 138.000,90.812 138.000,90.812 C138.000,90.812 143.906,83.482 146.000,75.812 C148.365,67.151 148.400,58.573 155.999,67.813 C163.226,76.600 173.959,93.113 180.000,108.812 C190.969,137.321 187.899,164.809 187.899,164.809 Z" id="path-1" class="cls-3" fill-rule="evenodd"/>
<path d="M94.000,254.812 C58.101,254.812 29.000,225.711 29.000,189.812 C29.000,168.151 37.729,155.000 55.896,137.166 C67.528,125.747 78.415,111.722 83.042,102.172 C83.953,100.292 86.026,90.495 94.019,101.966 C98.212,107.982 104.785,118.681 109.000,127.812 C116.266,143.555 118.000,158.812 118.000,158.812 C118.000,158.812 125.121,154.616 130.000,143.812 C131.573,140.330 134.753,127.148 143.643,140.328 C150.166,150.000 159.127,167.390 159.000,189.812 C159.000,225.711 129.898,254.812 94.000,254.812 Z" id="path-2" class="cls-4" fill-rule="evenodd"/>
<path d="M95.000,183.812 C104.250,183.812 104.250,200.941 116.000,223.812 C123.824,239.041 112.121,254.812 95.000,254.812 C77.879,254.812 69.000,240.933 69.000,223.812 C69.000,206.692 85.750,183.812 95.000,183.812 Z" id="path-3" class="cls-5" fill-rule="evenodd"/>
</g>
</svg>

After

Width:  |  Height:  |  Size: 2.2 KiB

View file

@ -0,0 +1,9 @@
import React, { forwardRef } from "react";
import SvgFirecrawlLogo from "./FirecrawlLogo";
export const FirecrawlIcon = forwardRef<
SVGSVGElement,
React.PropsWithChildren<{}>
>((props, ref) => {
return <SvgFirecrawlLogo ref={ref} {...props} />;
});

View file

@ -171,6 +171,7 @@ import { CouchbaseIcon } from "../icons/Couchbase";
import { ElasticsearchIcon } from "../icons/ElasticsearchStore";
import { EvernoteIcon } from "../icons/Evernote";
import { FBIcon } from "../icons/FacebookMessenger";
import { FirecrawlIcon } from "../icons/Firecrawl";
import { GitBookIcon } from "../icons/GitBook";
import { GoogleIcon } from "../icons/Google";
import { GoogleGenerativeAIIcon } from "../icons/GoogleGenerativeAI";
@ -363,6 +364,8 @@ export const nodeIconsLucide: iconsType = {
CohereEmbeddings: CohereIcon,
EverNoteLoader: EvernoteIcon,
FacebookChatLoader: FBIcon,
FirecrawlCrawlApi: FirecrawlIcon,
FirecrawlScrapeApi: FirecrawlIcon,
GitbookLoader: GitBookIcon,
GoogleSearchAPIWrapper: GoogleIcon,
GoogleSearchResults: GoogleIcon,