Added Firecrawl integration (#2359)
Added `FirecrawlScraperApi` and `FirecrawlCrawlApi`. Also updated the example on https://github.com/langflow-ai/langflow_examples/pull/21
This commit is contained in:
commit
201de54eb9
10 changed files with 291 additions and 7 deletions
26
poetry.lock
generated
26
poetry.lock
generated
|
|
@ -2112,6 +2112,20 @@ files = [
|
|||
{file = "filetype-1.2.0.tar.gz", hash = "sha256:66b56cd6474bf41d8c54660347d37afcc3f7d1970648de365c102ef77548aadb"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "firecrawl-py"
|
||||
version = "0.0.16"
|
||||
description = "Python SDK for Firecrawl API"
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
files = [
|
||||
{file = "firecrawl_py-0.0.16-py3-none-any.whl", hash = "sha256:9024f483b501852a6b9c4e6cdfc9e8dde452d922afac357080bb278a0c9c2a26"},
|
||||
{file = "firecrawl_py-0.0.16.tar.gz", hash = "sha256:6c662fa0a549bc7f5c0acb704baba6731869ca0451094034264dfc1b4eb086e4"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
requests = "*"
|
||||
|
||||
[[package]]
|
||||
name = "flaml"
|
||||
version = "2.1.2"
|
||||
|
|
@ -2432,8 +2446,8 @@ files = [
|
|||
[package.dependencies]
|
||||
cffi = {version = ">=1.12.2", markers = "platform_python_implementation == \"CPython\" and sys_platform == \"win32\""}
|
||||
greenlet = [
|
||||
{version = ">=2.0.0", markers = "platform_python_implementation == \"CPython\" and python_version < \"3.11\""},
|
||||
{version = ">=3.0rc3", markers = "platform_python_implementation == \"CPython\" and python_version >= \"3.11\""},
|
||||
{version = ">=2.0.0", markers = "platform_python_implementation == \"CPython\" and python_version < \"3.11\""},
|
||||
]
|
||||
"zope.event" = "*"
|
||||
"zope.interface" = "*"
|
||||
|
|
@ -2592,12 +2606,12 @@ files = [
|
|||
google-auth = ">=2.14.1,<3.0.dev0"
|
||||
googleapis-common-protos = ">=1.56.2,<2.0.dev0"
|
||||
grpcio = [
|
||||
{version = ">=1.33.2,<2.0dev", optional = true, markers = "python_version < \"3.11\" and extra == \"grpc\""},
|
||||
{version = ">=1.49.1,<2.0dev", optional = true, markers = "python_version >= \"3.11\" and extra == \"grpc\""},
|
||||
{version = ">=1.33.2,<2.0dev", optional = true, markers = "python_version < \"3.11\" and extra == \"grpc\""},
|
||||
]
|
||||
grpcio-status = [
|
||||
{version = ">=1.33.2,<2.0.dev0", optional = true, markers = "python_version < \"3.11\" and extra == \"grpc\""},
|
||||
{version = ">=1.49.1,<2.0.dev0", optional = true, markers = "python_version >= \"3.11\" and extra == \"grpc\""},
|
||||
{version = ">=1.33.2,<2.0.dev0", optional = true, markers = "python_version < \"3.11\" and extra == \"grpc\""},
|
||||
]
|
||||
proto-plus = ">=1.22.3,<2.0.0dev"
|
||||
protobuf = ">=3.19.5,<3.20.0 || >3.20.0,<3.20.1 || >3.20.1,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<6.0.0.dev0"
|
||||
|
|
@ -4614,8 +4628,8 @@ psutil = ">=5.9.1"
|
|||
pywin32 = {version = "*", markers = "platform_system == \"Windows\""}
|
||||
pyzmq = ">=25.0.0"
|
||||
requests = [
|
||||
{version = ">=2.26.0", markers = "python_version <= \"3.11\""},
|
||||
{version = ">=2.32.2", markers = "python_version > \"3.11\""},
|
||||
{version = ">=2.26.0", markers = "python_version <= \"3.11\""},
|
||||
]
|
||||
tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""}
|
||||
typing-extensions = {version = ">=4.6.0", markers = "python_version < \"3.11\""}
|
||||
|
|
@ -6080,9 +6094,9 @@ files = [
|
|||
|
||||
[package.dependencies]
|
||||
numpy = [
|
||||
{version = ">=1.26.0,<2", markers = "python_version >= \"3.12\""},
|
||||
{version = ">=1.22.4,<2", markers = "python_version < \"3.11\""},
|
||||
{version = ">=1.23.2,<2", markers = "python_version == \"3.11\""},
|
||||
{version = ">=1.26.0,<2", markers = "python_version >= \"3.12\""},
|
||||
]
|
||||
python-dateutil = ">=2.8.2"
|
||||
pytz = ">=2020.1"
|
||||
|
|
@ -10552,4 +10566,4 @@ local = ["ctransformers", "llama-cpp-python", "sentence-transformers"]
|
|||
[metadata]
|
||||
lock-version = "2.0"
|
||||
python-versions = ">=3.10,<3.13"
|
||||
content-hash = "f7377e3a997651cbcec2b9227b0bcde2507afc7d6236b708f4dc62857f150578"
|
||||
content-hash = "3e72b6faa1c674615a7e5dec3e7d962349e736bf6675c08a49080b7f336cc75b"
|
||||
|
|
|
|||
|
|
@ -92,6 +92,7 @@ unstructured = {extras = ["docx", "md", "pptx"], version = "^0.14.4"}
|
|||
langchain-aws = "^0.1.6"
|
||||
langchain-mongodb = "^0.1.6"
|
||||
kubernetes = "^30.1.0"
|
||||
firecrawl-py = "^0.0.16"
|
||||
|
||||
|
||||
[tool.poetry.group.dev.dependencies]
|
||||
|
|
|
|||
|
|
@ -0,0 +1,81 @@
|
|||
from typing import Optional
|
||||
from firecrawl.firecrawl import FirecrawlApp
|
||||
from langflow.custom import CustomComponent
|
||||
from langflow.schema import Data
|
||||
import uuid
|
||||
|
||||
class FirecrawlCrawlApi(CustomComponent):
|
||||
display_name: str = "FirecrawlCrawlApi"
|
||||
description: str = "Firecrawl Crawl API."
|
||||
output_types: list[str] = ["Document"]
|
||||
documentation: str = "https://docs.firecrawl.dev/api-reference/endpoint/crawl"
|
||||
field_config = {
|
||||
"api_key": {
|
||||
"display_name": "API Key",
|
||||
"field_type": "str",
|
||||
"required": True,
|
||||
"password": True,
|
||||
"info": "The API key to use Firecrawl API.",
|
||||
},
|
||||
"url": {
|
||||
"display_name": "URL",
|
||||
"field_type": "str",
|
||||
"required": True,
|
||||
"info": "The base URL to start crawling from.",
|
||||
},
|
||||
"timeout": {
|
||||
"display_name": "Timeout",
|
||||
"field_type": "int",
|
||||
"info": "The timeout in milliseconds.",
|
||||
},
|
||||
"crawlerOptions": {
|
||||
"display_name": "Crawler Options",
|
||||
"info": "Options for the crawler behavior.",
|
||||
},
|
||||
"pageOptions": {
|
||||
"display_name": "Page Options",
|
||||
"info": "The page options to send with the request.",
|
||||
},
|
||||
"idempotency_key": {
|
||||
"display_name": "Idempotency Key",
|
||||
"field_type": "str",
|
||||
"info": "Optional idempotency key to ensure unique requests.",
|
||||
},
|
||||
}
|
||||
|
||||
def build(
|
||||
self,
|
||||
api_key: str,
|
||||
url: str,
|
||||
timeout: Optional[int] = 30000,
|
||||
crawlerOptions: Optional[Data] = None,
|
||||
pageOptions: Optional[Data] = None,
|
||||
idempotency_key: Optional[str] = None,
|
||||
) -> Data:
|
||||
if crawlerOptions:
|
||||
crawler_options_dict = crawlerOptions.__dict__['data']['text']
|
||||
else:
|
||||
crawler_options_dict = {}
|
||||
|
||||
if pageOptions:
|
||||
page_options_dict = pageOptions.__dict__['data']['text']
|
||||
else:
|
||||
page_options_dict = {}
|
||||
|
||||
if not idempotency_key:
|
||||
idempotency_key = str(uuid.uuid4())
|
||||
|
||||
app = FirecrawlApp(api_key=api_key)
|
||||
crawl_result = app.crawl_url(
|
||||
url,
|
||||
{
|
||||
"crawlerOptions": crawler_options_dict,
|
||||
"pageOptions": page_options_dict,
|
||||
},
|
||||
True,
|
||||
int(timeout / 1000),
|
||||
idempotency_key
|
||||
)
|
||||
|
||||
records = Data(data={"results": crawl_result})
|
||||
return records
|
||||
|
|
@ -0,0 +1,72 @@
|
|||
from typing import Optional
|
||||
from firecrawl.firecrawl import FirecrawlApp
|
||||
from langflow.custom import CustomComponent
|
||||
from langflow.schema import Data
|
||||
from langflow.services.database.models.base import orjson_dumps
|
||||
import json
|
||||
|
||||
class FirecrawlScrapeApi(CustomComponent):
|
||||
display_name: str = "FirecrawlScrapeApi"
|
||||
description: str = "Firecrawl Scrape API."
|
||||
output_types: list[str] = ["Document"]
|
||||
documentation: str = "https://docs.firecrawl.dev/api-reference/endpoint/scrape"
|
||||
field_config = {
|
||||
"api_key": {
|
||||
"display_name": "API Key",
|
||||
"field_type": "str",
|
||||
"required": True,
|
||||
"password": True,
|
||||
"info": "The API key to use Firecrawl API.",
|
||||
},
|
||||
"url": {
|
||||
"display_name": "URL",
|
||||
"field_type": "str",
|
||||
"required": True,
|
||||
"info": "The URL to scrape.",
|
||||
},
|
||||
"timeout": {
|
||||
"display_name": "Timeout",
|
||||
"info": "Timeout in milliseconds for the request.",
|
||||
"field_type": "int",
|
||||
"default_value": 10000,
|
||||
},
|
||||
"pageOptions": {
|
||||
"display_name": "Page Options",
|
||||
"info": "The page options to send with the request.",
|
||||
},
|
||||
"extractorOptions": {
|
||||
"display_name": "Extractor Options",
|
||||
"info": "The extractor options to send with the request.",
|
||||
},
|
||||
}
|
||||
|
||||
def build(
|
||||
self,
|
||||
api_key: str,
|
||||
url: str,
|
||||
timeout: Optional[int] = 10000,
|
||||
pageOptions: Optional[Data] = None,
|
||||
extractorOptions: Optional[Data] = None,
|
||||
) -> Data:
|
||||
if extractorOptions:
|
||||
extractor_options_dict = extractorOptions.__dict__['data']['text']
|
||||
else:
|
||||
extractor_options_dict = {}
|
||||
|
||||
if pageOptions:
|
||||
page_options_dict = pageOptions.__dict__['data']['text']
|
||||
else:
|
||||
page_options_dict = {}
|
||||
|
||||
app = FirecrawlApp(api_key=api_key)
|
||||
results = app.scrape_url(
|
||||
url,
|
||||
{
|
||||
"timeout": str(timeout),
|
||||
"extractorOptions": extractor_options_dict,
|
||||
"pageOptions": page_options_dict,
|
||||
},
|
||||
)
|
||||
|
||||
record = Data(data=results)
|
||||
return record
|
||||
16
src/backend/base/poetry.lock
generated
16
src/backend/base/poetry.lock
generated
|
|
@ -739,6 +739,20 @@ typer = ">=0.12.3"
|
|||
[package.extras]
|
||||
standard = ["fastapi", "uvicorn[standard] (>=0.15.0)"]
|
||||
|
||||
[[package]]
|
||||
name = "firecrawl-py"
|
||||
version = "0.0.16"
|
||||
description = "Python SDK for Firecrawl API"
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
files = [
|
||||
{file = "firecrawl_py-0.0.16-py3-none-any.whl", hash = "sha256:9024f483b501852a6b9c4e6cdfc9e8dde452d922afac357080bb278a0c9c2a26"},
|
||||
{file = "firecrawl_py-0.0.16.tar.gz", hash = "sha256:6c662fa0a549bc7f5c0acb704baba6731869ca0451094034264dfc1b4eb086e4"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
requests = "*"
|
||||
|
||||
[[package]]
|
||||
name = "frozenlist"
|
||||
version = "1.4.1"
|
||||
|
|
@ -3235,4 +3249,4 @@ local = []
|
|||
[metadata]
|
||||
lock-version = "2.0"
|
||||
python-versions = ">=3.10,<3.13"
|
||||
content-hash = "4f566531a8539ddc81cb91a7e7f9b723c84679f0af5bb8619f7b02f9ffc6cfaa"
|
||||
content-hash = "7e46144d27c633214f00e73e496c0e4d56db1fb47032a21861677ec275b79d86"
|
||||
|
|
|
|||
|
|
@ -64,6 +64,7 @@ pyperclip = "^1.8.2"
|
|||
uncurl = "^0.0.11"
|
||||
sentry-sdk = {extras = ["fastapi", "loguru"], version = "^2.5.1"}
|
||||
chardet = "^5.2.0"
|
||||
firecrawl-py = "^0.0.16"
|
||||
|
||||
|
||||
[tool.poetry.extras]
|
||||
|
|
|
|||
61
src/frontend/src/icons/Firecrawl/FirecrawlLogo.jsx
Normal file
61
src/frontend/src/icons/Firecrawl/FirecrawlLogo.jsx
Normal file
|
|
@ -0,0 +1,61 @@
|
|||
const SvgFirecrawlLogo = (props) => (
|
||||
<svg
|
||||
viewBox="-33 0 255 255"
|
||||
width="24"
|
||||
height="24"
|
||||
xmlns="http://www.w3.org/2000/svg"
|
||||
xmlnsXlink="http://www.w3.org/1999/xlink"
|
||||
preserveAspectRatio="xMidYMid"
|
||||
>
|
||||
<defs>
|
||||
<style>
|
||||
{`
|
||||
.cls-3 {
|
||||
fill: url(#linear-gradient-1);
|
||||
}
|
||||
|
||||
.cls-4 {
|
||||
fill: #fc9502;
|
||||
}
|
||||
|
||||
.cls-5 {
|
||||
fill: #fce202;
|
||||
}
|
||||
`}
|
||||
</style>
|
||||
|
||||
<linearGradient
|
||||
id="linear-gradient-1"
|
||||
gradientUnits="userSpaceOnUse"
|
||||
x1="94.141"
|
||||
y1="255"
|
||||
x2="94.141"
|
||||
y2="0.188"
|
||||
>
|
||||
<stop offset="0" stopColor="#ff4c0d" />
|
||||
<stop offset="1" stopColor="#fc9502" />
|
||||
</linearGradient>
|
||||
</defs>
|
||||
<g id="fire">
|
||||
<path
|
||||
d="M187.899,164.809 C185.803,214.868 144.574,254.812 94.000,254.812 C42.085,254.812 -0.000,211.312 -0.000,160.812 C-0.000,154.062 -0.121,140.572 10.000,117.812 C16.057,104.191 19.856,95.634 22.000,87.812 C23.178,83.513 25.469,76.683 32.000,87.812 C35.851,94.374 36.000,103.812 36.000,103.812 C36.000,103.812 50.328,92.817 60.000,71.812 C74.179,41.019 62.866,22.612 59.000,9.812 C57.662,5.384 56.822,-2.574 66.000,0.812 C75.352,4.263 100.076,21.570 113.000,39.812 C131.445,65.847 138.000,90.812 138.000,90.812 C138.000,90.812 143.906,83.482 146.000,75.812 C148.365,67.151 148.400,58.573 155.999,67.813 C163.226,76.600 173.959,93.113 180.000,108.812 C190.969,137.321 187.899,164.809 187.899,164.809 Z"
|
||||
id="path-1"
|
||||
className="cls-3"
|
||||
fillRule="evenodd"
|
||||
/>
|
||||
<path
|
||||
d="M94.000,254.812 C58.101,254.812 29.000,225.711 29.000,189.812 C29.000,168.151 37.729,155.000 55.896,137.166 C67.528,125.747 78.415,111.722 83.042,102.172 C83.953,100.292 86.026,90.495 94.019,101.966 C98.212,107.982 104.785,118.681 109.000,127.812 C116.266,143.555 118.000,158.812 118.000,158.812 C118.000,158.812 125.121,154.616 130.000,143.812 C131.573,140.330 134.753,127.148 143.643,140.328 C150.166,150.000 159.127,167.390 159.000,189.812 C159.000,225.711 129.898,254.812 94.000,254.812 Z"
|
||||
id="path-2"
|
||||
className="cls-4"
|
||||
fillRule="evenodd"
|
||||
/>
|
||||
<path
|
||||
d="M95.000,183.812 C104.250,183.812 104.250,200.941 116.000,223.812 C123.824,239.041 112.121,254.812 95.000,254.812 C77.879,254.812 69.000,240.933 69.000,223.812 C69.000,206.692 85.750,183.812 95.000,183.812 Z"
|
||||
id="path-3"
|
||||
className="cls-5"
|
||||
fillRule="evenodd"
|
||||
/>
|
||||
</g>
|
||||
</svg>
|
||||
);
|
||||
export default SvgFirecrawlLogo;
|
||||
28
src/frontend/src/icons/Firecrawl/firecraw-logo.svg
Normal file
28
src/frontend/src/icons/Firecrawl/firecraw-logo.svg
Normal file
|
|
@ -0,0 +1,28 @@
|
|||
<svg width="800px" height="800px" viewBox="-33 0 255 255" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" preserveAspectRatio="xMidYMid">
|
||||
<defs>
|
||||
<style>
|
||||
|
||||
.cls-3 {
|
||||
fill: url(#linear-gradient-1);
|
||||
}
|
||||
|
||||
.cls-4 {
|
||||
fill: #fc9502;
|
||||
}
|
||||
|
||||
.cls-5 {
|
||||
fill: #fce202;
|
||||
}
|
||||
</style>
|
||||
|
||||
<linearGradient id="linear-gradient-1" gradientUnits="userSpaceOnUse" x1="94.141" y1="255" x2="94.141" y2="0.188">
|
||||
<stop offset="0" stop-color="#ff4c0d"/>
|
||||
<stop offset="1" stop-color="#fc9502"/>
|
||||
</linearGradient>
|
||||
</defs>
|
||||
<g id="fire">
|
||||
<path d="M187.899,164.809 C185.803,214.868 144.574,254.812 94.000,254.812 C42.085,254.812 -0.000,211.312 -0.000,160.812 C-0.000,154.062 -0.121,140.572 10.000,117.812 C16.057,104.191 19.856,95.634 22.000,87.812 C23.178,83.513 25.469,76.683 32.000,87.812 C35.851,94.374 36.000,103.812 36.000,103.812 C36.000,103.812 50.328,92.817 60.000,71.812 C74.179,41.019 62.866,22.612 59.000,9.812 C57.662,5.384 56.822,-2.574 66.000,0.812 C75.352,4.263 100.076,21.570 113.000,39.812 C131.445,65.847 138.000,90.812 138.000,90.812 C138.000,90.812 143.906,83.482 146.000,75.812 C148.365,67.151 148.400,58.573 155.999,67.813 C163.226,76.600 173.959,93.113 180.000,108.812 C190.969,137.321 187.899,164.809 187.899,164.809 Z" id="path-1" class="cls-3" fill-rule="evenodd"/>
|
||||
<path d="M94.000,254.812 C58.101,254.812 29.000,225.711 29.000,189.812 C29.000,168.151 37.729,155.000 55.896,137.166 C67.528,125.747 78.415,111.722 83.042,102.172 C83.953,100.292 86.026,90.495 94.019,101.966 C98.212,107.982 104.785,118.681 109.000,127.812 C116.266,143.555 118.000,158.812 118.000,158.812 C118.000,158.812 125.121,154.616 130.000,143.812 C131.573,140.330 134.753,127.148 143.643,140.328 C150.166,150.000 159.127,167.390 159.000,189.812 C159.000,225.711 129.898,254.812 94.000,254.812 Z" id="path-2" class="cls-4" fill-rule="evenodd"/>
|
||||
<path d="M95.000,183.812 C104.250,183.812 104.250,200.941 116.000,223.812 C123.824,239.041 112.121,254.812 95.000,254.812 C77.879,254.812 69.000,240.933 69.000,223.812 C69.000,206.692 85.750,183.812 95.000,183.812 Z" id="path-3" class="cls-5" fill-rule="evenodd"/>
|
||||
</g>
|
||||
</svg>
|
||||
|
After Width: | Height: | Size: 2.2 KiB |
9
src/frontend/src/icons/Firecrawl/index.tsx
Normal file
9
src/frontend/src/icons/Firecrawl/index.tsx
Normal file
|
|
@ -0,0 +1,9 @@
|
|||
import React, { forwardRef } from "react";
|
||||
import SvgFirecrawlLogo from "./FirecrawlLogo";
|
||||
|
||||
export const FirecrawlIcon = forwardRef<
|
||||
SVGSVGElement,
|
||||
React.PropsWithChildren<{}>
|
||||
>((props, ref) => {
|
||||
return <SvgFirecrawlLogo ref={ref} {...props} />;
|
||||
});
|
||||
|
|
@ -171,6 +171,7 @@ import { CouchbaseIcon } from "../icons/Couchbase";
|
|||
import { ElasticsearchIcon } from "../icons/ElasticsearchStore";
|
||||
import { EvernoteIcon } from "../icons/Evernote";
|
||||
import { FBIcon } from "../icons/FacebookMessenger";
|
||||
import { FirecrawlIcon } from "../icons/Firecrawl";
|
||||
import { GitBookIcon } from "../icons/GitBook";
|
||||
import { GoogleIcon } from "../icons/Google";
|
||||
import { GoogleGenerativeAIIcon } from "../icons/GoogleGenerativeAI";
|
||||
|
|
@ -363,6 +364,8 @@ export const nodeIconsLucide: iconsType = {
|
|||
CohereEmbeddings: CohereIcon,
|
||||
EverNoteLoader: EvernoteIcon,
|
||||
FacebookChatLoader: FBIcon,
|
||||
FirecrawlCrawlApi: FirecrawlIcon,
|
||||
FirecrawlScrapeApi: FirecrawlIcon,
|
||||
GitbookLoader: GitBookIcon,
|
||||
GoogleSearchAPIWrapper: GoogleIcon,
|
||||
GoogleSearchResults: GoogleIcon,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue