diff --git a/poetry.lock b/poetry.lock index 6773edcab..c8653b9e4 100644 --- a/poetry.lock +++ b/poetry.lock @@ -2112,6 +2112,20 @@ files = [ {file = "filetype-1.2.0.tar.gz", hash = "sha256:66b56cd6474bf41d8c54660347d37afcc3f7d1970648de365c102ef77548aadb"}, ] +[[package]] +name = "firecrawl-py" +version = "0.0.16" +description = "Python SDK for Firecrawl API" +optional = false +python-versions = ">=3.8" +files = [ + {file = "firecrawl_py-0.0.16-py3-none-any.whl", hash = "sha256:9024f483b501852a6b9c4e6cdfc9e8dde452d922afac357080bb278a0c9c2a26"}, + {file = "firecrawl_py-0.0.16.tar.gz", hash = "sha256:6c662fa0a549bc7f5c0acb704baba6731869ca0451094034264dfc1b4eb086e4"}, +] + +[package.dependencies] +requests = "*" + [[package]] name = "flaml" version = "2.1.2" @@ -2432,8 +2446,8 @@ files = [ [package.dependencies] cffi = {version = ">=1.12.2", markers = "platform_python_implementation == \"CPython\" and sys_platform == \"win32\""} greenlet = [ - {version = ">=2.0.0", markers = "platform_python_implementation == \"CPython\" and python_version < \"3.11\""}, {version = ">=3.0rc3", markers = "platform_python_implementation == \"CPython\" and python_version >= \"3.11\""}, + {version = ">=2.0.0", markers = "platform_python_implementation == \"CPython\" and python_version < \"3.11\""}, ] "zope.event" = "*" "zope.interface" = "*" @@ -2592,12 +2606,12 @@ files = [ google-auth = ">=2.14.1,<3.0.dev0" googleapis-common-protos = ">=1.56.2,<2.0.dev0" grpcio = [ - {version = ">=1.33.2,<2.0dev", optional = true, markers = "python_version < \"3.11\" and extra == \"grpc\""}, {version = ">=1.49.1,<2.0dev", optional = true, markers = "python_version >= \"3.11\" and extra == \"grpc\""}, + {version = ">=1.33.2,<2.0dev", optional = true, markers = "python_version < \"3.11\" and extra == \"grpc\""}, ] grpcio-status = [ - {version = ">=1.33.2,<2.0.dev0", optional = true, markers = "python_version < \"3.11\" and extra == \"grpc\""}, {version = ">=1.49.1,<2.0.dev0", optional = true, markers = "python_version >= \"3.11\" and extra == \"grpc\""}, + {version = ">=1.33.2,<2.0.dev0", optional = true, markers = "python_version < \"3.11\" and extra == \"grpc\""}, ] proto-plus = ">=1.22.3,<2.0.0dev" protobuf = ">=3.19.5,<3.20.0 || >3.20.0,<3.20.1 || >3.20.1,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<6.0.0.dev0" @@ -4614,8 +4628,8 @@ psutil = ">=5.9.1" pywin32 = {version = "*", markers = "platform_system == \"Windows\""} pyzmq = ">=25.0.0" requests = [ - {version = ">=2.26.0", markers = "python_version <= \"3.11\""}, {version = ">=2.32.2", markers = "python_version > \"3.11\""}, + {version = ">=2.26.0", markers = "python_version <= \"3.11\""}, ] tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""} typing-extensions = {version = ">=4.6.0", markers = "python_version < \"3.11\""} @@ -6080,9 +6094,9 @@ files = [ [package.dependencies] numpy = [ + {version = ">=1.26.0,<2", markers = "python_version >= \"3.12\""}, {version = ">=1.22.4,<2", markers = "python_version < \"3.11\""}, {version = ">=1.23.2,<2", markers = "python_version == \"3.11\""}, - {version = ">=1.26.0,<2", markers = "python_version >= \"3.12\""}, ] python-dateutil = ">=2.8.2" pytz = ">=2020.1" @@ -10552,4 +10566,4 @@ local = ["ctransformers", "llama-cpp-python", "sentence-transformers"] [metadata] lock-version = "2.0" python-versions = ">=3.10,<3.13" -content-hash = "f7377e3a997651cbcec2b9227b0bcde2507afc7d6236b708f4dc62857f150578" +content-hash = "3e72b6faa1c674615a7e5dec3e7d962349e736bf6675c08a49080b7f336cc75b" diff --git a/pyproject.toml b/pyproject.toml index 1bd744a36..159c81fbe 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -92,6 +92,7 @@ unstructured = {extras = ["docx", "md", "pptx"], version = "^0.14.4"} langchain-aws = "^0.1.6" langchain-mongodb = "^0.1.6" kubernetes = "^30.1.0" +firecrawl-py = "^0.0.16" [tool.poetry.group.dev.dependencies] diff --git a/src/backend/base/langflow/components/langchain_utilities/FirecrawlCrawlApi.py b/src/backend/base/langflow/components/langchain_utilities/FirecrawlCrawlApi.py new file mode 100644 index 000000000..54355285d --- /dev/null +++ b/src/backend/base/langflow/components/langchain_utilities/FirecrawlCrawlApi.py @@ -0,0 +1,81 @@ +from typing import Optional +from firecrawl.firecrawl import FirecrawlApp +from langflow.custom import CustomComponent +from langflow.schema import Data +import uuid + +class FirecrawlCrawlApi(CustomComponent): + display_name: str = "FirecrawlCrawlApi" + description: str = "Firecrawl Crawl API." + output_types: list[str] = ["Document"] + documentation: str = "https://docs.firecrawl.dev/api-reference/endpoint/crawl" + field_config = { + "api_key": { + "display_name": "API Key", + "field_type": "str", + "required": True, + "password": True, + "info": "The API key to use Firecrawl API.", + }, + "url": { + "display_name": "URL", + "field_type": "str", + "required": True, + "info": "The base URL to start crawling from.", + }, + "timeout": { + "display_name": "Timeout", + "field_type": "int", + "info": "The timeout in milliseconds.", + }, + "crawlerOptions": { + "display_name": "Crawler Options", + "info": "Options for the crawler behavior.", + }, + "pageOptions": { + "display_name": "Page Options", + "info": "The page options to send with the request.", + }, + "idempotency_key": { + "display_name": "Idempotency Key", + "field_type": "str", + "info": "Optional idempotency key to ensure unique requests.", + }, + } + + def build( + self, + api_key: str, + url: str, + timeout: Optional[int] = 30000, + crawlerOptions: Optional[Data] = None, + pageOptions: Optional[Data] = None, + idempotency_key: Optional[str] = None, + ) -> Data: + if crawlerOptions: + crawler_options_dict = crawlerOptions.__dict__['data']['text'] + else: + crawler_options_dict = {} + + if pageOptions: + page_options_dict = pageOptions.__dict__['data']['text'] + else: + page_options_dict = {} + + if not idempotency_key: + idempotency_key = str(uuid.uuid4()) + + app = FirecrawlApp(api_key=api_key) + crawl_result = app.crawl_url( + url, + { + "crawlerOptions": crawler_options_dict, + "pageOptions": page_options_dict, + }, + True, + int(timeout / 1000), + idempotency_key + ) + + records = Data(data={"results": crawl_result}) + return records diff --git a/src/backend/base/langflow/components/langchain_utilities/FirecrawlScrapeApi.py b/src/backend/base/langflow/components/langchain_utilities/FirecrawlScrapeApi.py new file mode 100644 index 000000000..5cdfc82ba --- /dev/null +++ b/src/backend/base/langflow/components/langchain_utilities/FirecrawlScrapeApi.py @@ -0,0 +1,72 @@ +from typing import Optional +from firecrawl.firecrawl import FirecrawlApp +from langflow.custom import CustomComponent +from langflow.schema import Data +from langflow.services.database.models.base import orjson_dumps +import json + +class FirecrawlScrapeApi(CustomComponent): + display_name: str = "FirecrawlScrapeApi" + description: str = "Firecrawl Scrape API." + output_types: list[str] = ["Document"] + documentation: str = "https://docs.firecrawl.dev/api-reference/endpoint/scrape" + field_config = { + "api_key": { + "display_name": "API Key", + "field_type": "str", + "required": True, + "password": True, + "info": "The API key to use Firecrawl API.", + }, + "url": { + "display_name": "URL", + "field_type": "str", + "required": True, + "info": "The URL to scrape.", + }, + "timeout": { + "display_name": "Timeout", + "info": "Timeout in milliseconds for the request.", + "field_type": "int", + "default_value": 10000, + }, + "pageOptions": { + "display_name": "Page Options", + "info": "The page options to send with the request.", + }, + "extractorOptions": { + "display_name": "Extractor Options", + "info": "The extractor options to send with the request.", + }, + } + + def build( + self, + api_key: str, + url: str, + timeout: Optional[int] = 10000, + pageOptions: Optional[Data] = None, + extractorOptions: Optional[Data] = None, + ) -> Data: + if extractorOptions: + extractor_options_dict = extractorOptions.__dict__['data']['text'] + else: + extractor_options_dict = {} + + if pageOptions: + page_options_dict = pageOptions.__dict__['data']['text'] + else: + page_options_dict = {} + + app = FirecrawlApp(api_key=api_key) + results = app.scrape_url( + url, + { + "timeout": str(timeout), + "extractorOptions": extractor_options_dict, + "pageOptions": page_options_dict, + }, + ) + + record = Data(data=results) + return record diff --git a/src/backend/base/poetry.lock b/src/backend/base/poetry.lock index 07d7472cf..fd3571889 100644 --- a/src/backend/base/poetry.lock +++ b/src/backend/base/poetry.lock @@ -739,6 +739,20 @@ typer = ">=0.12.3" [package.extras] standard = ["fastapi", "uvicorn[standard] (>=0.15.0)"] +[[package]] +name = "firecrawl-py" +version = "0.0.16" +description = "Python SDK for Firecrawl API" +optional = false +python-versions = ">=3.8" +files = [ + {file = "firecrawl_py-0.0.16-py3-none-any.whl", hash = "sha256:9024f483b501852a6b9c4e6cdfc9e8dde452d922afac357080bb278a0c9c2a26"}, + {file = "firecrawl_py-0.0.16.tar.gz", hash = "sha256:6c662fa0a549bc7f5c0acb704baba6731869ca0451094034264dfc1b4eb086e4"}, +] + +[package.dependencies] +requests = "*" + [[package]] name = "frozenlist" version = "1.4.1" @@ -3235,4 +3249,4 @@ local = [] [metadata] lock-version = "2.0" python-versions = ">=3.10,<3.13" -content-hash = "4f566531a8539ddc81cb91a7e7f9b723c84679f0af5bb8619f7b02f9ffc6cfaa" +content-hash = "7e46144d27c633214f00e73e496c0e4d56db1fb47032a21861677ec275b79d86" diff --git a/src/backend/base/pyproject.toml b/src/backend/base/pyproject.toml index 6f305ea85..3a07f47a5 100644 --- a/src/backend/base/pyproject.toml +++ b/src/backend/base/pyproject.toml @@ -64,6 +64,7 @@ pyperclip = "^1.8.2" uncurl = "^0.0.11" sentry-sdk = {extras = ["fastapi", "loguru"], version = "^2.5.1"} chardet = "^5.2.0" +firecrawl-py = "^0.0.16" [tool.poetry.extras] diff --git a/src/frontend/src/icons/Firecrawl/FirecrawlLogo.jsx b/src/frontend/src/icons/Firecrawl/FirecrawlLogo.jsx new file mode 100644 index 000000000..f79173d13 --- /dev/null +++ b/src/frontend/src/icons/Firecrawl/FirecrawlLogo.jsx @@ -0,0 +1,61 @@ +const SvgFirecrawlLogo = (props) => ( + + + + + + + + + + + + + + + +); +export default SvgFirecrawlLogo; diff --git a/src/frontend/src/icons/Firecrawl/firecraw-logo.svg b/src/frontend/src/icons/Firecrawl/firecraw-logo.svg new file mode 100644 index 000000000..ea8d2c7f1 --- /dev/null +++ b/src/frontend/src/icons/Firecrawl/firecraw-logo.svg @@ -0,0 +1,28 @@ + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/src/frontend/src/icons/Firecrawl/index.tsx b/src/frontend/src/icons/Firecrawl/index.tsx new file mode 100644 index 000000000..060d053bb --- /dev/null +++ b/src/frontend/src/icons/Firecrawl/index.tsx @@ -0,0 +1,9 @@ +import React, { forwardRef } from "react"; +import SvgFirecrawlLogo from "./FirecrawlLogo"; + +export const FirecrawlIcon = forwardRef< + SVGSVGElement, + React.PropsWithChildren<{}> +>((props, ref) => { + return ; +}); diff --git a/src/frontend/src/utils/styleUtils.ts b/src/frontend/src/utils/styleUtils.ts index 30a22fd60..b344eaaec 100644 --- a/src/frontend/src/utils/styleUtils.ts +++ b/src/frontend/src/utils/styleUtils.ts @@ -171,6 +171,7 @@ import { CouchbaseIcon } from "../icons/Couchbase"; import { ElasticsearchIcon } from "../icons/ElasticsearchStore"; import { EvernoteIcon } from "../icons/Evernote"; import { FBIcon } from "../icons/FacebookMessenger"; +import { FirecrawlIcon } from "../icons/Firecrawl"; import { GitBookIcon } from "../icons/GitBook"; import { GoogleIcon } from "../icons/Google"; import { GoogleGenerativeAIIcon } from "../icons/GoogleGenerativeAI"; @@ -363,6 +364,8 @@ export const nodeIconsLucide: iconsType = { CohereEmbeddings: CohereIcon, EverNoteLoader: EvernoteIcon, FacebookChatLoader: FBIcon, + FirecrawlCrawlApi: FirecrawlIcon, + FirecrawlScrapeApi: FirecrawlIcon, GitbookLoader: GitBookIcon, GoogleSearchAPIWrapper: GoogleIcon, GoogleSearchResults: GoogleIcon,