feat: add Spider Web Scraper & Crawler (#2439)
* spider files * rebuild required * add spider-client here * Feat: Spider Web Crawler & Scraper * Feat: spider integration * new input not working * [autofix.ci] apply automated fixes * [autofix.ci] apply automated fixes (attempt 2/3) * fix: add outputs and configure build method * style: run ruff * Refactor SpiderTool to use 'crawl' instead of 'build' for generating Markdown content * chore: add type ignore * chore: new lock * chore: Update mem0ai dependency to version 0.0.5 --------- Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com> Co-authored-by: Gabriel Luiz Freitas Almeida <gabriel@langflow.org>
This commit is contained in:
parent
a2c98b90c1
commit
7a36cc9ebf
12 changed files with 2580 additions and 68 deletions
143
poetry.lock
generated
143
poetry.lock
generated
|
|
@ -2124,13 +2124,13 @@ idna = ">=2.0.0"
|
|||
|
||||
[[package]]
|
||||
name = "embedchain"
|
||||
version = "0.1.119"
|
||||
version = "0.1.118"
|
||||
description = "Simplest open source retrieval (RAG) framework"
|
||||
optional = false
|
||||
python-versions = "<=3.13,>=3.9"
|
||||
files = [
|
||||
{file = "embedchain-0.1.119-py3-none-any.whl", hash = "sha256:8ec3e7f139939fa1dc8fda898f8d8d9d31a5abfe08e184b607e38733d863d606"},
|
||||
{file = "embedchain-0.1.119.tar.gz", hash = "sha256:0f4f45e092b7f3192ea6fe82575726532573b1231d7af6c22edc695b701b4223"},
|
||||
{file = "embedchain-0.1.118-py3-none-any.whl", hash = "sha256:38ead471df9d9234bf42e6f7a32cab26431d50d6f2f894f18a6cabc0b02bf31a"},
|
||||
{file = "embedchain-0.1.118.tar.gz", hash = "sha256:1fa1e799882a1dc4e63af344595b043f1c1f30fbd59461b6660b1934b85a1e4b"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
|
|
@ -2144,7 +2144,7 @@ langchain = ">0.2,<=0.3"
|
|||
langchain-cohere = ">=0.1.4,<0.2.0"
|
||||
langchain-community = ">=0.2.6,<0.3.0"
|
||||
langchain-openai = ">=0.1.7,<0.2.0"
|
||||
mem0ai = ">=0.0.9,<0.0.10"
|
||||
mem0ai = ">=0.0.5,<0.0.6"
|
||||
openai = ">=1.1.1"
|
||||
posthog = ">=3.0.2,<4.0.0"
|
||||
pypdf = ">=4.0.1,<5.0.0"
|
||||
|
|
@ -2211,6 +2211,20 @@ django = ["dj-database-url", "dj-email-url", "django-cache-url"]
|
|||
lint = ["flake8 (==4.0.1)", "flake8-bugbear (==21.9.2)", "mypy (==0.910)", "pre-commit (>=2.4,<3.0)"]
|
||||
tests = ["dj-database-url", "dj-email-url", "django-cache-url", "pytest"]
|
||||
|
||||
[[package]]
|
||||
name = "eval-type-backport"
|
||||
version = "0.2.0"
|
||||
description = "Like `typing._eval_type`, but lets older Python versions use newer typing features."
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
files = [
|
||||
{file = "eval_type_backport-0.2.0-py3-none-any.whl", hash = "sha256:ac2f73d30d40c5a30a80b8739a789d6bb5e49fdffa66d7912667e2015d9c9933"},
|
||||
{file = "eval_type_backport-0.2.0.tar.gz", hash = "sha256:68796cfbc7371ebf923f03bdf7bef415f3ec098aeced24e054b253a0e78f7b37"},
|
||||
]
|
||||
|
||||
[package.extras]
|
||||
tests = ["pytest"]
|
||||
|
||||
[[package]]
|
||||
name = "exceptiongroup"
|
||||
version = "1.2.2"
|
||||
|
|
@ -4735,19 +4749,19 @@ tests = ["aiohttp", "duckdb", "pandas (>=1.4)", "polars (>=0.19)", "pytest", "py
|
|||
|
||||
[[package]]
|
||||
name = "langchain"
|
||||
version = "0.2.11"
|
||||
version = "0.2.10"
|
||||
description = "Building applications with LLMs through composability"
|
||||
optional = false
|
||||
python-versions = "<4.0,>=3.8.1"
|
||||
files = [
|
||||
{file = "langchain-0.2.11-py3-none-any.whl", hash = "sha256:5a7a8b4918f3d3bebce9b4f23b92d050699e6f7fb97591e8941177cf07a260a2"},
|
||||
{file = "langchain-0.2.11.tar.gz", hash = "sha256:d7a9e4165f02dca0bd78addbc2319d5b9286b5d37c51d784124102b57e9fd297"},
|
||||
{file = "langchain-0.2.10-py3-none-any.whl", hash = "sha256:b4fb58c7faf4f4999cfe3325474979a7121a1737dd101655a723a1d957ef0617"},
|
||||
{file = "langchain-0.2.10.tar.gz", hash = "sha256:1f861c1b59ac9c91b02bb0fa58d3adad1c1d0686636872b5b357bbce3ce41d06"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
aiohttp = ">=3.8.3,<4.0.0"
|
||||
async-timeout = {version = ">=4.0.0,<5.0.0", markers = "python_version < \"3.11\""}
|
||||
langchain-core = ">=0.2.23,<0.3.0"
|
||||
langchain-core = ">=0.2.22,<0.3.0"
|
||||
langchain-text-splitters = ">=0.2.0,<0.3.0"
|
||||
langsmith = ">=0.1.17,<0.2.0"
|
||||
numpy = [
|
||||
|
|
@ -4851,20 +4865,20 @@ langchain-community = ["langchain-community (>=0.2.4)"]
|
|||
|
||||
[[package]]
|
||||
name = "langchain-community"
|
||||
version = "0.2.10"
|
||||
version = "0.2.9"
|
||||
description = "Community contributed LangChain integrations."
|
||||
optional = false
|
||||
python-versions = "<4.0,>=3.8.1"
|
||||
files = [
|
||||
{file = "langchain_community-0.2.10-py3-none-any.whl", hash = "sha256:9f4d1b5ab7f0b0a704f538e26e50fce45a461da6d2bf6b7b636d24f22fbc088a"},
|
||||
{file = "langchain_community-0.2.10.tar.gz", hash = "sha256:3a0404bad4bd07d6f86affdb62fb3d080a456c66191754d586a409d9d6024d62"},
|
||||
{file = "langchain_community-0.2.9-py3-none-any.whl", hash = "sha256:b51d3adf9346a1161c1098917585b9e303cf24e2f5c71f5d232a0504edada5f2"},
|
||||
{file = "langchain_community-0.2.9.tar.gz", hash = "sha256:1e7c180232916cbe35fe00509680dd1f805e32d7c87b5e80b3a9ec8754ecae37"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
aiohttp = ">=3.8.3,<4.0.0"
|
||||
dataclasses-json = ">=0.5.7,<0.7"
|
||||
langchain = ">=0.2.9,<0.3.0"
|
||||
langchain-core = ">=0.2.23,<0.3.0"
|
||||
langchain-core = ">=0.2.22,<0.3.0"
|
||||
langsmith = ">=0.1.0,<0.2.0"
|
||||
numpy = [
|
||||
{version = ">=1,<2", markers = "python_version < \"3.12\""},
|
||||
|
|
@ -4877,13 +4891,13 @@ tenacity = ">=8.1.0,<8.4.0 || >8.4.0,<9.0.0"
|
|||
|
||||
[[package]]
|
||||
name = "langchain-core"
|
||||
version = "0.2.24"
|
||||
version = "0.2.29"
|
||||
description = "Building applications with LLMs through composability"
|
||||
optional = false
|
||||
python-versions = "<4.0,>=3.8.1"
|
||||
files = [
|
||||
{file = "langchain_core-0.2.24-py3-none-any.whl", hash = "sha256:9444fc082d21ef075d925590a684a73fe1f9688a3d90087580ec929751be55e7"},
|
||||
{file = "langchain_core-0.2.24.tar.gz", hash = "sha256:f2e3fa200b124e8c45d270da9bf836bed9c09532612c96ff3225e59b9a232f5a"},
|
||||
{file = "langchain_core-0.2.29-py3-none-any.whl", hash = "sha256:846c04a3bb72e409a9b928e0eb3ea1762e1473f2c4fb6df2596fbd7b3ab75973"},
|
||||
{file = "langchain_core-0.2.29.tar.gz", hash = "sha256:491324745a7afee5a7b285c3904edd9dd0c6efa7daf26b92fec6e84a2d2f5d10"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
|
|
@ -4896,6 +4910,7 @@ pydantic = [
|
|||
]
|
||||
PyYAML = ">=5.3"
|
||||
tenacity = ">=8.1.0,<8.4.0 || >8.4.0,<9.0.0"
|
||||
typing-extensions = ">=4.7"
|
||||
|
||||
[[package]]
|
||||
name = "langchain-experimental"
|
||||
|
|
@ -5181,6 +5196,7 @@ asyncer = "^0.0.5"
|
|||
bcrypt = "4.0.1"
|
||||
cachetools = "^5.3.1"
|
||||
chardet = "^5.2.0"
|
||||
crewai = "^0.36.0"
|
||||
cryptography = "^42.0.5"
|
||||
docstring-parser = "^0.16"
|
||||
duckdb = "^1.0.0"
|
||||
|
|
@ -5220,6 +5236,7 @@ python-multipart = "^0.0.7"
|
|||
rich = "^13.7.0"
|
||||
sentry-sdk = {version = "^2.5.1", extras = ["fastapi", "loguru"]}
|
||||
setuptools = ">=70"
|
||||
spider-client = "^0.0.27"
|
||||
sqlmodel = "^0.0.18"
|
||||
typer = "^0.12.0"
|
||||
uncurl = "^0.0.11"
|
||||
|
|
@ -5281,13 +5298,13 @@ requests = ">=2,<3"
|
|||
|
||||
[[package]]
|
||||
name = "langwatch"
|
||||
version = "0.1.16"
|
||||
version = "0.1.18"
|
||||
description = "Python SDK for LangWatch for monitoring your LLMs"
|
||||
optional = false
|
||||
python-versions = "<4.0,>=3.9"
|
||||
python-versions = "<3.13,>=3.9"
|
||||
files = [
|
||||
{file = "langwatch-0.1.16-py3-none-any.whl", hash = "sha256:61ccb1f1efbffc1b2e8bbd3b9c7ed53440d3a66b9fd741f3d1a30d31d0b936f7"},
|
||||
{file = "langwatch-0.1.16.tar.gz", hash = "sha256:d8c453a4dcdb500bb55df19ef5fa2c43d450236d84e47fd72348fb3184cc3f6a"},
|
||||
{file = "langwatch-0.1.18-py3-none-any.whl", hash = "sha256:73e469fee96d1bebfc54f27b8413d2f108124139ca2df7510063e3a1ea2dc3c1"},
|
||||
{file = "langwatch-0.1.18.tar.gz", hash = "sha256:a0e6dbfedca02502bf5abafd7cf9ef8d1bffd8c0d6bac0d932d1209c97752c0e"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
|
|
@ -5309,13 +5326,13 @@ openai = ["openai (>=1.3.7,<2.0.0)"]
|
|||
|
||||
[[package]]
|
||||
name = "litellm"
|
||||
version = "1.42.5"
|
||||
version = "1.41.25"
|
||||
description = "Library to easily interface with LLM API providers"
|
||||
optional = false
|
||||
python-versions = "!=2.7.*,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,!=3.7.*,>=3.8"
|
||||
files = [
|
||||
{file = "litellm-1.42.5-py3-none-any.whl", hash = "sha256:c8c2f9e40b5aa1c2dcfcac9adb854b8ac22ce2112825d742d8fce516d26e9a65"},
|
||||
{file = "litellm-1.42.5.tar.gz", hash = "sha256:64ea24040751009e70e816e9340c5c82717d9a309f4480e5ece9f3f67328e04e"},
|
||||
{file = "litellm-1.41.25-py3-none-any.whl", hash = "sha256:80ef35f141402be4ef106a9c720169f6f613ff47df717ab3d1b8ba845c2a5b38"},
|
||||
{file = "litellm-1.41.25.tar.gz", hash = "sha256:f6f000b8e666b51914436c26659d4a91d67b350bcc44e47b3837d3b8f0e1640d"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
|
|
@ -5736,20 +5753,23 @@ files = [
|
|||
|
||||
[[package]]
|
||||
name = "mem0ai"
|
||||
version = "0.0.9"
|
||||
version = "0.0.5"
|
||||
description = "Long-term memory for AI Agents"
|
||||
optional = false
|
||||
python-versions = "<4.0,>=3.8"
|
||||
files = [
|
||||
{file = "mem0ai-0.0.9-py3-none-any.whl", hash = "sha256:d4de435729af4fd3d597d022ffb2af89a0630d6c3b4769792bbe27d2ce816858"},
|
||||
{file = "mem0ai-0.0.9.tar.gz", hash = "sha256:e4374d5d04aa3f543cd3325f700e4b62f5358ae1c6fa5c44b2ff790c10c4e5f1"},
|
||||
{file = "mem0ai-0.0.5-py3-none-any.whl", hash = "sha256:6f6e5356fd522adf0510322cd581476ea456fd7ccefca11b5ac050e9a6f00f36"},
|
||||
{file = "mem0ai-0.0.5.tar.gz", hash = "sha256:f2ac35d15e4e620becb8d06b8ebeb1ffa85fac0b7cb2d3138056babec48dd5dd"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
boto3 = ">=1.34.144,<2.0.0"
|
||||
groq = ">=0.9.0,<0.10.0"
|
||||
openai = ">=1.33.0,<2.0.0"
|
||||
posthog = ">=3.5.0,<4.0.0"
|
||||
pydantic = ">=2.7.3,<3.0.0"
|
||||
qdrant-client = ">=1.9.1,<2.0.0"
|
||||
together = ">=1.2.1,<2.0.0"
|
||||
|
||||
[[package]]
|
||||
name = "metal-sdk"
|
||||
|
|
@ -6566,13 +6586,13 @@ sympy = "*"
|
|||
|
||||
[[package]]
|
||||
name = "openai"
|
||||
version = "1.37.1"
|
||||
version = "1.37.0"
|
||||
description = "The official Python library for the openai API"
|
||||
optional = false
|
||||
python-versions = ">=3.7.1"
|
||||
files = [
|
||||
{file = "openai-1.37.1-py3-none-any.whl", hash = "sha256:9a6adda0d6ae8fce02d235c5671c399cfa40d6a281b3628914c7ebf244888ee3"},
|
||||
{file = "openai-1.37.1.tar.gz", hash = "sha256:faf87206785a6b5d9e34555d6a3242482a6852bc802e453e2a891f68ee04ce55"},
|
||||
{file = "openai-1.37.0-py3-none-any.whl", hash = "sha256:a903245c0ecf622f2830024acdaa78683c70abb8e9d37a497b851670864c9f73"},
|
||||
{file = "openai-1.37.0.tar.gz", hash = "sha256:dc8197fc40ab9d431777b6620d962cc49f4544ffc3011f03ce0a805e6eb54adb"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
|
|
@ -8316,13 +8336,13 @@ files = [
|
|||
|
||||
[[package]]
|
||||
name = "pytest"
|
||||
version = "8.3.2"
|
||||
version = "8.3.1"
|
||||
description = "pytest: simple powerful testing with Python"
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
files = [
|
||||
{file = "pytest-8.3.2-py3-none-any.whl", hash = "sha256:4ba08f9ae7dcf84ded419494d229b48d0903ea6407b030eaec46df5e6a73bba5"},
|
||||
{file = "pytest-8.3.2.tar.gz", hash = "sha256:c132345d12ce551242c87269de812483f5bcc87cdbb4722e48487ba194f9fdce"},
|
||||
{file = "pytest-8.3.1-py3-none-any.whl", hash = "sha256:e9600ccf4f563976e2c99fa02c7624ab938296551f280835ee6516df8bc4ae8c"},
|
||||
{file = "pytest-8.3.1.tar.gz", hash = "sha256:7e8e5c5abd6e93cb1cc151f23e57adc31fcf8cfd2a3ff2da63e23f732de35db6"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
|
|
@ -9729,13 +9749,13 @@ tornado = ["tornado (>=6)"]
|
|||
|
||||
[[package]]
|
||||
name = "setuptools"
|
||||
version = "72.1.0"
|
||||
version = "71.1.0"
|
||||
description = "Easily download, build, install, upgrade, and uninstall Python packages"
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
files = [
|
||||
{file = "setuptools-72.1.0-py3-none-any.whl", hash = "sha256:5a03e1860cf56bb6ef48ce186b0e557fdba433237481a9a625176c2831be15d1"},
|
||||
{file = "setuptools-72.1.0.tar.gz", hash = "sha256:8d243eff56d095e5817f796ede6ae32941278f542e0f941867cc05ae52b162ec"},
|
||||
{file = "setuptools-71.1.0-py3-none-any.whl", hash = "sha256:33874fdc59b3188304b2e7c80d9029097ea31627180896fb549c578ceb8a0855"},
|
||||
{file = "setuptools-71.1.0.tar.gz", hash = "sha256:032d42ee9fb536e33087fb66cac5f840eb9391ed05637b3f2a76a7c8fb477936"},
|
||||
]
|
||||
|
||||
[package.extras]
|
||||
|
|
@ -9861,6 +9881,19 @@ files = [
|
|||
{file = "soupsieve-2.5.tar.gz", hash = "sha256:5663d5a7b3bfaeee0bc4372e7fc48f9cff4940b3eec54a6451cc5299f1097690"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "spider-client"
|
||||
version = "0.0.27"
|
||||
description = "Python SDK for Spider Cloud API"
|
||||
optional = false
|
||||
python-versions = "*"
|
||||
files = [
|
||||
{file = "spider-client-0.0.27.tar.gz", hash = "sha256:c3feaf5c491bd9a6c509efa0c8789452497073d9f68e70fc90e7626a6a8365aa"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
requests = "*"
|
||||
|
||||
[[package]]
|
||||
name = "sqlalchemy"
|
||||
version = "2.0.31"
|
||||
|
|
@ -10204,6 +10237,34 @@ requests = ">=2.26.0"
|
|||
[package.extras]
|
||||
blobfile = ["blobfile (>=2)"]
|
||||
|
||||
[[package]]
|
||||
name = "together"
|
||||
version = "1.2.6"
|
||||
description = "Python client for Together's Cloud Platform!"
|
||||
optional = false
|
||||
python-versions = "<4.0,>=3.8"
|
||||
files = [
|
||||
{file = "together-1.2.6-py3-none-any.whl", hash = "sha256:b3ccf467919edcf3a3927dcf7aad6dee95c4a276ced7bff523a2b361fc766d56"},
|
||||
{file = "together-1.2.6.tar.gz", hash = "sha256:f79f383d258fc964809ebe60870c94f2104c15b34451c5b4808bd11d956a1702"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
aiohttp = ">=3.9.3,<4.0.0"
|
||||
click = ">=8.1.7,<9.0.0"
|
||||
eval-type-backport = ">=0.1.3,<0.3.0"
|
||||
filelock = ">=3.13.1,<4.0.0"
|
||||
numpy = [
|
||||
{version = ">=1.23.5", markers = "python_version < \"3.12\""},
|
||||
{version = ">=1.26.0", markers = "python_version >= \"3.12\""},
|
||||
]
|
||||
pillow = ">=10.3.0,<11.0.0"
|
||||
pyarrow = ">=10.0.1"
|
||||
pydantic = ">=2.6.3,<3.0.0"
|
||||
requests = ">=2.31.0,<3.0.0"
|
||||
tabulate = ">=0.9.0,<0.10.0"
|
||||
tqdm = ">=4.66.2,<5.0.0"
|
||||
typer = ">=0.9,<0.13"
|
||||
|
||||
[[package]]
|
||||
name = "tokenizers"
|
||||
version = "0.19.1"
|
||||
|
|
@ -10761,13 +10822,13 @@ urllib3 = ">=2"
|
|||
|
||||
[[package]]
|
||||
name = "types-setuptools"
|
||||
version = "71.1.0.20240726"
|
||||
version = "71.0.0.20240722"
|
||||
description = "Typing stubs for setuptools"
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
files = [
|
||||
{file = "types-setuptools-71.1.0.20240726.tar.gz", hash = "sha256:85ba28e9461bb1be86ebba4db0f1c2408f2b11115b1966334ea9dc464e29303e"},
|
||||
{file = "types_setuptools-71.1.0.20240726-py3-none-any.whl", hash = "sha256:a7775376f36e0ff09bcad236bf265777590a66b11623e48c20bfc30f1444ea36"},
|
||||
{file = "types-setuptools-71.0.0.20240722.tar.gz", hash = "sha256:8f1fd5281945ed8f5a896f05dd50bc31917d6e2487ff9508f4bac522d13ad395"},
|
||||
{file = "types_setuptools-71.0.0.20240722-py3-none-any.whl", hash = "sha256:04a383bd1a2dcdb6a85397516ce2d7b46617d89f1d758f686d0d9069943d9811"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
|
@ -11331,13 +11392,13 @@ files = [
|
|||
|
||||
[[package]]
|
||||
name = "weaviate-client"
|
||||
version = "4.7.1"
|
||||
version = "4.6.7"
|
||||
description = "A python native Weaviate client"
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
files = [
|
||||
{file = "weaviate_client-4.7.1-py3-none-any.whl", hash = "sha256:342f5c67b126cee4dc3a60467ad1ae74971cd5614e27af6fb13d687a345352c4"},
|
||||
{file = "weaviate_client-4.7.1.tar.gz", hash = "sha256:af99ac4e53613d2ff5b797372e95d004d0c8a1dd10a7f592068bcb423a30af30"},
|
||||
{file = "weaviate_client-4.6.7-py3-none-any.whl", hash = "sha256:8793de35264cab33a84fe8cb8c422a257fe4d8334657aaddd8ead853da3fb34a"},
|
||||
{file = "weaviate_client-4.6.7.tar.gz", hash = "sha256:202b32e160536f5f44e4a635d30c3d3a0790b1a7ff997f5e243919d1ac5b68a1"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
|
|
@ -12012,4 +12073,4 @@ local = ["ctransformers", "llama-cpp-python", "sentence-transformers"]
|
|||
[metadata]
|
||||
lock-version = "2.0"
|
||||
python-versions = ">=3.10,<3.13"
|
||||
content-hash = "b0b58a9883d3eacc262701a7938ff839365386ba5e155dd2582986501d2b5d7c"
|
||||
content-hash = "6554abddd0322e28c531774986c897c560099a974f985aa48b21b36b12e2be52"
|
||||
|
|
|
|||
|
|
@ -106,6 +106,7 @@ langchain-google-community = "1.0.7"
|
|||
wolframalpha = "^5.1.3"
|
||||
astra-assistants = "^2.0.15"
|
||||
composio-langchain = "^0.3.28"
|
||||
spider-client = "^0.0.27"
|
||||
|
||||
|
||||
[tool.poetry.group.dev.dependencies]
|
||||
|
|
|
|||
|
|
@ -0,0 +1 @@
|
|||
MODES = ["scrape", "crawl"]
|
||||
|
|
@ -1,6 +1,7 @@
|
|||
from typing import Any
|
||||
|
||||
from langflow.custom import Component
|
||||
from langflow.inputs.inputs import DictInput, SecretStrInput, MessageTextInput, DropdownInput
|
||||
from langflow.inputs.inputs import DictInput, DropdownInput, MessageTextInput, SecretStrInput
|
||||
from langflow.template.field.base import Output
|
||||
|
||||
|
||||
|
|
@ -60,13 +61,19 @@ class AstraVectorizeComponent(Component):
|
|||
name="model_name",
|
||||
display_name="Model Name",
|
||||
info=f"The embedding model to use for the selected provider. Each provider has a different set of models "
|
||||
f"available (https://docs.datastax.com/en/astra-db-serverless/databases/embedding-generation.html):\n\n{VECTORIZE_MODELS_STR}",
|
||||
f"available (full list at https://docs.datastax.com/en/astra-db-serverless/databases/embedding-generation.html):\n\n{VECTORIZE_MODELS_STR}",
|
||||
required=True,
|
||||
),
|
||||
MessageTextInput(
|
||||
name="api_key_name",
|
||||
display_name="Provider API Key Name",
|
||||
info="The name of the embeddings provider API key stored on Astra.",
|
||||
display_name="API Key name",
|
||||
info="The name of the embeddings provider API key stored on Astra. If set, it will override the 'ProviderKey' in the authentication parameters.",
|
||||
),
|
||||
DictInput(
|
||||
name="authentication",
|
||||
display_name="Authentication parameters",
|
||||
is_list=True,
|
||||
advanced=True,
|
||||
),
|
||||
SecretStrInput(
|
||||
name="provider_api_key",
|
||||
|
|
|
|||
|
|
@ -0,0 +1,121 @@
|
|||
from spider.spider import Spider # type: ignore
|
||||
|
||||
from langflow.base.langchain_utilities.spider_constants import MODES
|
||||
from langflow.custom import Component
|
||||
from langflow.io import BoolInput, DictInput, DropdownInput, IntInput, Output, SecretStrInput, StrInput
|
||||
from langflow.schema import Data
|
||||
|
||||
|
||||
class SpiderTool(Component):
|
||||
display_name: str = "Spider Web Crawler & Scraper"
|
||||
description: str = "Spider API for web crawling and scraping."
|
||||
output_types: list[str] = ["Document"]
|
||||
documentation: str = "https://spider.cloud/docs/api"
|
||||
|
||||
inputs = [
|
||||
SecretStrInput(
|
||||
name="spider_api_key",
|
||||
display_name="Spider API Key",
|
||||
required=True,
|
||||
password=True,
|
||||
info="The Spider API Key, get it from https://spider.cloud",
|
||||
),
|
||||
StrInput(
|
||||
name="url",
|
||||
display_name="URL",
|
||||
required=True,
|
||||
info="The URL to scrape or crawl",
|
||||
),
|
||||
DropdownInput(
|
||||
name="mode",
|
||||
display_name="Mode",
|
||||
required=True,
|
||||
options=MODES,
|
||||
value=MODES[0],
|
||||
info="The mode of operation: scrape or crawl",
|
||||
),
|
||||
IntInput(
|
||||
name="limit",
|
||||
display_name="Limit",
|
||||
info="The maximum amount of pages allowed to crawl per website. Set to 0 to crawl all pages.",
|
||||
advanced=True,
|
||||
),
|
||||
IntInput(
|
||||
name="depth",
|
||||
display_name="Depth",
|
||||
info="The crawl limit for maximum depth. If 0, no limit will be applied.",
|
||||
advanced=True,
|
||||
),
|
||||
StrInput(
|
||||
name="blacklist",
|
||||
display_name="Blacklist",
|
||||
info="Blacklist paths that you do not want to crawl. Use Regex patterns.",
|
||||
advanced=True,
|
||||
),
|
||||
StrInput(
|
||||
name="whitelist",
|
||||
display_name="Whitelist",
|
||||
info="Whitelist paths that you want to crawl, ignoring all other routes. Use Regex patterns.",
|
||||
advanced=True,
|
||||
),
|
||||
BoolInput(
|
||||
name="use_readability",
|
||||
display_name="Use Readability",
|
||||
info="Use readability to pre-process the content for reading.",
|
||||
advanced=True,
|
||||
),
|
||||
IntInput(
|
||||
name="request_timeout",
|
||||
display_name="Request Timeout",
|
||||
info="Timeout for the request in seconds.",
|
||||
advanced=True,
|
||||
),
|
||||
BoolInput(
|
||||
name="metadata",
|
||||
display_name="Metadata",
|
||||
info="Include metadata in the response.",
|
||||
advanced=True,
|
||||
),
|
||||
DictInput(
|
||||
name="params",
|
||||
display_name="Additional Parameters",
|
||||
info="Additional parameters to pass to the API. If provided, other inputs will be ignored.",
|
||||
),
|
||||
]
|
||||
|
||||
outputs = [
|
||||
Output(display_name="Markdown", name="content", method="crawl"),
|
||||
]
|
||||
|
||||
def crawl(self) -> list[Data]:
|
||||
if self.params:
|
||||
parameters = self.params.data
|
||||
else:
|
||||
parameters = {
|
||||
"limit": self.limit,
|
||||
"depth": self.depth,
|
||||
"blacklist": self.blacklist,
|
||||
"whitelist": self.whitelist,
|
||||
"use_readability": self.use_readability,
|
||||
"request_timeout": self.request_timeout,
|
||||
"metadata": self.metadata,
|
||||
"return_format": "markdown",
|
||||
}
|
||||
|
||||
app = Spider(api_key=self.spider_api_key)
|
||||
try:
|
||||
if self.mode == "scrape":
|
||||
parameters["limit"] = 1
|
||||
result = app.scrape_url(self.url, parameters)
|
||||
elif self.mode == "crawl":
|
||||
result = app.crawl_url(self.url, parameters)
|
||||
else:
|
||||
raise ValueError(f"Invalid mode: {self.mode}. Must be 'scrape' or 'crawl'.")
|
||||
except Exception as e:
|
||||
raise Exception(f"Error: {str(e)}")
|
||||
|
||||
records = []
|
||||
|
||||
for record in result:
|
||||
records.append(Data(data={"content": record["content"], "url": record["url"]}))
|
||||
return records
|
||||
|
|
@ -157,7 +157,7 @@ def create_app():
|
|||
raise ValueError(f"Invalid port number {prome_port_str}")
|
||||
|
||||
if settings.prometheus_enabled:
|
||||
from prometheus_client import start_http_server
|
||||
from prometheus_client import start_http_server # type: ignore
|
||||
|
||||
start_http_server(settings.prometheus_port)
|
||||
|
||||
|
|
|
|||
2331
src/backend/base/poetry.lock
generated
2331
src/backend/base/poetry.lock
generated
File diff suppressed because it is too large
Load diff
|
|
@ -77,6 +77,10 @@ setuptools = ">=70"
|
|||
nanoid = "^2.0.0"
|
||||
filelock = "^3.15.4"
|
||||
grandalf = "^0.8.0"
|
||||
crewai = "^0.36.0"
|
||||
spider-client = "^0.0.27"
|
||||
|
||||
|
||||
[tool.poetry.extras]
|
||||
deploy = ["celery", "redis", "flower"]
|
||||
local = ["llama-cpp-python", "sentence-transformers", "ctransformers"]
|
||||
|
|
|
|||
18
src/frontend/src/icons/Spider/SpiderIcon.jsx
Normal file
18
src/frontend/src/icons/Spider/SpiderIcon.jsx
Normal file
|
|
@ -0,0 +1,18 @@
|
|||
const SvgSpiderIcon = (props) => (
|
||||
<svg
|
||||
height="30"
|
||||
width="30"
|
||||
viewBox="0 0 36 34"
|
||||
xmlns="http://www.w3.org/2000/svg"
|
||||
className="fill-accent-foreground transition-all group-hover:scale-110"
|
||||
{...props}
|
||||
>
|
||||
<title>Spider v0 Logo</title>
|
||||
<path
|
||||
fillRule="evenodd"
|
||||
clipRule="evenodd"
|
||||
d="M9.13883 7.06589V0.164429L13.0938 0.164429V6.175L14.5178 7.4346C15.577 6.68656 16.7337 6.27495 17.945 6.27495C19.1731 6.27495 20.3451 6.69807 21.4163 7.46593L22.8757 6.175V0.164429L26.8307 0.164429V7.06589V7.95679L26.1634 8.54706L24.0775 10.3922C24.3436 10.8108 24.5958 11.2563 24.8327 11.7262L26.0467 11.4215L28.6971 8.08749L31.793 10.5487L28.7257 14.407L28.3089 14.9313L27.6592 15.0944L26.2418 15.4502C26.3124 15.7082 26.3793 15.9701 26.4422 16.2355L28.653 16.6566L29.092 16.7402L29.4524 17.0045L35.3849 21.355L33.0461 24.5444L27.474 20.4581L27.0719 20.3816C27.1214 21.0613 27.147 21.7543 27.147 22.4577C27.147 22.5398 27.1466 22.6214 27.1459 22.7024L29.5889 23.7911L30.3219 24.1177L30.62 24.8629L33.6873 32.5312L30.0152 34L27.246 27.0769L26.7298 26.8469C25.5612 32.2432 22.0701 33.8808 17.945 33.8808C13.8382 33.8808 10.3598 32.2577 9.17593 26.9185L8.82034 27.0769L6.05109 34L2.37897 32.5312L5.44629 24.8629L5.74435 24.1177L6.47743 23.7911L8.74487 22.7806C8.74366 22.6739 8.74305 22.5663 8.74305 22.4577C8.74305 21.7616 8.76804 21.0758 8.81654 20.4028L8.52606 20.4581L2.95395 24.5444L0.615112 21.355L6.54761 17.0045L6.908 16.7402L7.34701 16.6566L9.44264 16.2575C9.50917 15.9756 9.5801 15.6978 9.65528 15.4242L8.34123 15.0944L7.69155 14.9313L7.27471 14.407L4.20739 10.5487L7.30328 8.08749L9.95376 11.4215L11.0697 11.7016C11.3115 11.2239 11.5692 10.7716 11.8412 10.3473L9.80612 8.54706L9.13883 7.95679V7.06589Z"
|
||||
></path>
|
||||
</svg>
|
||||
);
|
||||
export default SvgSpiderIcon;
|
||||
9
src/frontend/src/icons/Spider/index.tsx
Normal file
9
src/frontend/src/icons/Spider/index.tsx
Normal file
|
|
@ -0,0 +1,9 @@
|
|||
import React, { forwardRef } from "react";
|
||||
import SvgSpiderIcon from "./SpiderIcon";
|
||||
|
||||
export const SpiderIcon = forwardRef<
|
||||
SVGSVGElement,
|
||||
React.PropsWithChildren<{}>
|
||||
>((props, ref) => {
|
||||
return <SvgSpiderIcon ref={ref} {...props} />;
|
||||
});
|
||||
1
src/frontend/src/icons/Spider/spider_logo.svg
Normal file
1
src/frontend/src/icons/Spider/spider_logo.svg
Normal file
|
|
@ -0,0 +1 @@
|
|||
<svg height="30" width="30" viewBox="0 0 36 34" xml:space="preserve" xmlns="http://www.w3.org/2000/svg" class="fill-accent-foreground transition-all group-hover:scale-110"><title>Spider v1 Logo</title><path fill-rule="evenodd" clip-rule="evenodd" d="M9.13883 7.06589V0.164429L13.0938 0.164429V6.175L14.5178 7.4346C15.577 6.68656 16.7337 6.27495 17.945 6.27495C19.1731 6.27495 20.3451 6.69807 21.4163 7.46593L22.8757 6.175V0.164429L26.8307 0.164429V7.06589V7.95679L26.1634 8.54706L24.0775 10.3922C24.3436 10.8108 24.5958 11.2563 24.8327 11.7262L26.0467 11.4215L28.6971 8.08749L31.793 10.5487L28.7257 14.407L28.3089 14.9313L27.6592 15.0944L26.2418 15.4502C26.3124 15.7082 26.3793 15.9701 26.4422 16.2355L28.653 16.6566L29.092 16.7402L29.4524 17.0045L35.3849 21.355L33.0461 24.5444L27.474 20.4581L27.0719 20.3816C27.1214 21.0613 27.147 21.7543 27.147 22.4577C27.147 22.5398 27.1466 22.6214 27.1459 22.7024L29.5889 23.7911L30.3219 24.1177L30.62 24.8629L33.6873 32.5312L30.0152 34L27.246 27.0769L26.7298 26.8469C25.5612 32.2432 22.0701 33.8808 17.945 33.8808C13.8382 33.8808 10.3598 32.2577 9.17593 26.9185L8.82034 27.0769L6.05109 34L2.37897 32.5312L5.44629 24.8629L5.74435 24.1177L6.47743 23.7911L8.74487 22.7806C8.74366 22.6739 8.74305 22.5663 8.74305 22.4577C8.74305 21.7616 8.76804 21.0758 8.81654 20.4028L8.52606 20.4581L2.95395 24.5444L0.615112 21.355L6.54761 17.0045L6.908 16.7402L7.34701 16.6566L9.44264 16.2575C9.50917 15.9756 9.5801 15.6978 9.65528 15.4242L8.34123 15.0944L7.69155 14.9313L7.27471 14.407L4.20739 10.5487L7.30328 8.08749L9.95376 11.4215L11.0697 11.7016C11.3115 11.2239 11.5692 10.7716 11.8412 10.3473L9.80612 8.54706L9.13883 7.95679V7.06589Z"></path></svg>
|
||||
|
After Width: | Height: | Size: 1.6 KiB |
|
|
@ -211,6 +211,7 @@ import { SearxIcon } from "../icons/Searx";
|
|||
import { ShareIcon } from "../icons/Share";
|
||||
import { Share2Icon } from "../icons/Share2";
|
||||
import SvgSlackIcon from "../icons/Slack/SlackIcon";
|
||||
import { SpiderIcon } from "../icons/Spider";
|
||||
import { Streamlit } from "../icons/Streamlit";
|
||||
import { UpstashSvgIcon } from "../icons/Upstash";
|
||||
import { VectaraIcon } from "../icons/VectaraIcon";
|
||||
|
|
@ -412,6 +413,7 @@ export const nodeIconsLucide: iconsType = {
|
|||
Weaviate: WeaviateIcon,
|
||||
Searx: SearxIcon,
|
||||
SlackDirectoryLoader: SvgSlackIcon,
|
||||
SpiderTool: SpiderIcon,
|
||||
SupabaseVectorStore: SupabaseIcon,
|
||||
Supabase: SupabaseIcon,
|
||||
VertexAI: VertexAIIcon,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue