Merge branch 'main' into dev

This commit is contained in:
Gabriel Luiz Freitas Almeida 2024-06-26 16:28:45 -03:00
commit 68c4485770
17 changed files with 467 additions and 48 deletions

View file

@ -1,6 +1,6 @@
from typing import Any
from langflow.custom import Component
from langflow.inputs.inputs import DictInput, SecretStrInput, MessageTextInput
from langflow.inputs.inputs import DictInput, SecretStrInput, MessageTextInput, DropdownInput
from langflow.template.field.base import Output
@ -10,32 +10,58 @@ class AstraVectorize(Component):
documentation: str = "https://docs.datastax.com/en/astra-db-serverless/databases/embedding-generation.html"
icon = "AstraDB"
VECTORIZE_PROVIDERS_MAPPING = {
"Azure OpenAI": ["azureOpenAI", ["text-embedding-3-small", "text-embedding-3-large", "text-embedding-ada-002"]],
"Hugging Face - Dedicated": ["huggingfaceDedicated", ["endpoint-defined-model"]],
"Hugging Face - Serverless": ["huggingface",
["sentence-transformers/all-MiniLM-L6-v2", "intfloat/multilingual-e5-large",
"intfloat/multilingual-e5-large-instruct", "BAAI/bge-small-en-v1.5",
"BAAI/bge-base-en-v1.5", "BAAI/bge-large-en-v1.5"]],
"Jina AI": ["jinaAI", ["jina-embeddings-v2-base-en", "jina-embeddings-v2-base-de", "jina-embeddings-v2-base-es",
"jina-embeddings-v2-base-code", "jina-embeddings-v2-base-zh"]],
"Mistral AI": ["mistral", ["mistral-embed"]],
"NVIDIA": ["nvidia", ["NV-Embed-QA"]],
"OpenAI": ["openai", ["text-embedding-3-small", "text-embedding-3-large", "text-embedding-ada-002"]],
"Upstage": ["upstageAI", ["solar-embedding-1-large"]],
"Voyage AI": ["voyageAI",
["voyage-large-2-instruct", "voyage-law-2", "voyage-code-2", "voyage-large-2", "voyage-2"]]
}
VECTORIZE_MODELS_STR = "\n\n".join([provider + ": " + (', '.join(models[1])) for provider, models in VECTORIZE_PROVIDERS_MAPPING.items()])
inputs = [
MessageTextInput(
DropdownInput(
name="provider",
display_name="Provider name",
info="The embedding provider to use.",
options=VECTORIZE_PROVIDERS_MAPPING.keys(),
value="",
),
MessageTextInput(
name="model_name",
display_name="Model name",
info="The embedding model to use.",
info=f"The embedding model to use for the selected provider. Each provider has a different set of models "
f"available (full list at https://docs.datastax.com/en/astra-db-serverless/databases/embedding-generation.html):\n\n{VECTORIZE_MODELS_STR}",
required=True
),
MessageTextInput(
name="api_key_name",
display_name="API Key name",
info="The name of the embeddings provider API key stored on Astra. If set, it will override the 'ProviderKey' in the authentication parameters."
),
DictInput(
name="authentication",
display_name="Authentication",
info="Authentication parameters. Use the Astra Portal to add the embedding provider integration to your Astra organization.",
display_name="Authentication parameters",
is_list=True,
advanced=True,
),
SecretStrInput(
name="provider_api_key",
display_name="Provider API Key",
info="An alternative to the Astra Authentication that let you use directly the API key of the provider.",
advanced=True,
),
DictInput(
name="model_parameters",
display_name="Model parameters",
info="Additional model parameters.",
advanced=True,
is_list=True,
),
@ -45,12 +71,17 @@ class AstraVectorize(Component):
]
def build_options(self) -> dict[str, Any]:
provider_value = self.VECTORIZE_PROVIDERS_MAPPING[self.provider][0]
authentication = {**self.authentication}
api_key_name = self.api_key_name
if api_key_name:
authentication["providerKey"] = api_key_name
return {
# must match exactly astra CollectionVectorServiceOptions
"collection_vector_service_options": {
"provider": self.provider,
"provider": provider_value,
"modelName": self.model_name,
"authentication": self.authentication,
"authentication": authentication,
"parameters": self.model_parameters,
},
"collection_embedding_api_key": self.provider_api_key,

View file

@ -0,0 +1,81 @@
from typing import Optional
from firecrawl.firecrawl import FirecrawlApp
from langflow.custom import CustomComponent
from langflow.schema import Data
import uuid
class FirecrawlCrawlApi(CustomComponent):
display_name: str = "FirecrawlCrawlApi"
description: str = "Firecrawl Crawl API."
output_types: list[str] = ["Document"]
documentation: str = "https://docs.firecrawl.dev/api-reference/endpoint/crawl"
field_config = {
"api_key": {
"display_name": "API Key",
"field_type": "str",
"required": True,
"password": True,
"info": "The API key to use Firecrawl API.",
},
"url": {
"display_name": "URL",
"field_type": "str",
"required": True,
"info": "The base URL to start crawling from.",
},
"timeout": {
"display_name": "Timeout",
"field_type": "int",
"info": "The timeout in milliseconds.",
},
"crawlerOptions": {
"display_name": "Crawler Options",
"info": "Options for the crawler behavior.",
},
"pageOptions": {
"display_name": "Page Options",
"info": "The page options to send with the request.",
},
"idempotency_key": {
"display_name": "Idempotency Key",
"field_type": "str",
"info": "Optional idempotency key to ensure unique requests.",
},
}
def build(
self,
api_key: str,
url: str,
timeout: Optional[int] = 30000,
crawlerOptions: Optional[Data] = None,
pageOptions: Optional[Data] = None,
idempotency_key: Optional[str] = None,
) -> Data:
if crawlerOptions:
crawler_options_dict = crawlerOptions.__dict__['data']['text']
else:
crawler_options_dict = {}
if pageOptions:
page_options_dict = pageOptions.__dict__['data']['text']
else:
page_options_dict = {}
if not idempotency_key:
idempotency_key = str(uuid.uuid4())
app = FirecrawlApp(api_key=api_key)
crawl_result = app.crawl_url(
url,
{
"crawlerOptions": crawler_options_dict,
"pageOptions": page_options_dict,
},
True,
int(timeout / 1000),
idempotency_key
)
records = Data(data={"results": crawl_result})
return records

View file

@ -0,0 +1,70 @@
from typing import Optional
from firecrawl.firecrawl import FirecrawlApp
from langflow.custom import CustomComponent
from langflow.schema import Data
class FirecrawlScrapeApi(CustomComponent):
display_name: str = "FirecrawlScrapeApi"
description: str = "Firecrawl Scrape API."
output_types: list[str] = ["Document"]
documentation: str = "https://docs.firecrawl.dev/api-reference/endpoint/scrape"
field_config = {
"api_key": {
"display_name": "API Key",
"field_type": "str",
"required": True,
"password": True,
"info": "The API key to use Firecrawl API.",
},
"url": {
"display_name": "URL",
"field_type": "str",
"required": True,
"info": "The URL to scrape.",
},
"timeout": {
"display_name": "Timeout",
"info": "Timeout in milliseconds for the request.",
"field_type": "int",
"default_value": 10000,
},
"pageOptions": {
"display_name": "Page Options",
"info": "The page options to send with the request.",
},
"extractorOptions": {
"display_name": "Extractor Options",
"info": "The extractor options to send with the request.",
},
}
def build(
self,
api_key: str,
url: str,
timeout: Optional[int] = 10000,
pageOptions: Optional[Data] = None,
extractorOptions: Optional[Data] = None,
) -> Data:
if extractorOptions:
extractor_options_dict = extractorOptions.__dict__['data']['text']
else:
extractor_options_dict = {}
if pageOptions:
page_options_dict = pageOptions.__dict__['data']['text']
else:
page_options_dict = {}
app = FirecrawlApp(api_key=api_key)
results = app.scrape_url(
url,
{
"timeout": str(timeout),
"extractorOptions": extractor_options_dict,
"pageOptions": page_options_dict,
},
)
record = Data(data=results)
return record

View file

@ -4,6 +4,7 @@ from langchain_community.vectorstores import Cassandra
from langflow.base.vectorstores.model import LCVectorStoreComponent
from langflow.helpers.data import docs_to_data
from langflow.inputs import DictInput
from langflow.io import (
DataInput,
DropdownInput,
@ -23,24 +24,30 @@ class CassandraVectorStoreComponent(LCVectorStoreComponent):
icon = "Cassandra"
inputs = [
MessageTextInput(name="database_ref",
display_name="Contact Points / Astra Database ID",
info="Contact points for the database (or AstraDB database ID)",
required=True),
MessageTextInput(name="username",
display_name="Username",
info="Username for the database (leave empty for AstraDB)."),
SecretStrInput(
name="token",
display_name="Token",
info="Authentication token for accessing Cassandra on Astra DB.",
required=True,
),
MessageTextInput(name="database_id", display_name="Database ID", info="The Astra database ID.", required=True),
MessageTextInput(
name="table_name",
display_name="Table Name",
info="The name of the table where vectors will be stored.",
required=True,
display_name="Password / AstraDB Token",
info="User password for the database (or AstraDB token).",
required=True
),
MessageTextInput(
name="keyspace",
display_name="Keyspace",
info="Optional key space within Astra DB. The keyspace should already be created.",
advanced=False,
info="Table Keyspace (or AstraDB namespace).",
required=True,
),
MessageTextInput(
name="table_name",
display_name="Table Name",
info="The name of the table (or AstraDB collection) where vectors will be stored.",
required=True,
),
IntInput(
name="ttl_seconds",
@ -69,6 +76,13 @@ class CassandraVectorStoreComponent(LCVectorStoreComponent):
value="Sync",
advanced=True,
),
DictInput(
name="cluster_kwargs",
display_name="Cluster arguments",
info="Optional dictionary of additional keyword arguments for the Cassandra cluster.",
advanced=True,
is_list=True
),
MultilineInput(name="search_query", display_name="Search Query"),
DataInput(
name="ingest_data",
@ -96,10 +110,35 @@ class CassandraVectorStoreComponent(LCVectorStoreComponent):
"Could not import cassio integration package. " "Please install it with `pip install cassio`."
)
cassio.init(
database_id=self.database_id,
token=self.token,
)
from uuid import UUID
database_ref = self.database_ref
try:
UUID(self.database_ref)
is_astra = True
except ValueError:
is_astra = False
if "," in self.database_ref:
# use a copy because we can't change the type of the parameter
database_ref = self.database_ref.split(",")
if is_astra:
cassio.init(
database_id=database_ref,
token=self.token,
cluster_kwargs=self.cluster_kwargs,
)
else:
cassio.init(
contact_points=database_ref,
username=self.username,
password=self.token,
cluster_kwargs=self.cluster_kwargs,
)
if not self.ttl_seconds:
self.ttl_seconds = None
documents = []

View file

@ -739,6 +739,20 @@ typer = ">=0.12.3"
[package.extras]
standard = ["fastapi", "uvicorn[standard] (>=0.15.0)"]
[[package]]
name = "firecrawl-py"
version = "0.0.16"
description = "Python SDK for Firecrawl API"
optional = false
python-versions = ">=3.8"
files = [
{file = "firecrawl_py-0.0.16-py3-none-any.whl", hash = "sha256:9024f483b501852a6b9c4e6cdfc9e8dde452d922afac357080bb278a0c9c2a26"},
{file = "firecrawl_py-0.0.16.tar.gz", hash = "sha256:6c662fa0a549bc7f5c0acb704baba6731869ca0451094034264dfc1b4eb086e4"},
]
[package.dependencies]
requests = "*"
[[package]]
name = "frozenlist"
version = "1.4.1"
@ -3235,4 +3249,4 @@ local = []
[metadata]
lock-version = "2.0"
python-versions = ">=3.10,<3.13"
content-hash = "4f566531a8539ddc81cb91a7e7f9b723c84679f0af5bb8619f7b02f9ffc6cfaa"
content-hash = "7e46144d27c633214f00e73e496c0e4d56db1fb47032a21861677ec275b79d86"

View file

@ -64,6 +64,7 @@ pyperclip = "^1.8.2"
uncurl = "^0.0.11"
sentry-sdk = {extras = ["fastapi", "loguru"], version = "^2.5.1"}
chardet = "^5.2.0"
firecrawl-py = "^0.0.16"
[tool.poetry.extras]

View file

@ -0,0 +1,61 @@
const SvgFirecrawlLogo = (props) => (
<svg
viewBox="-33 0 255 255"
width="24"
height="24"
xmlns="http://www.w3.org/2000/svg"
xmlnsXlink="http://www.w3.org/1999/xlink"
preserveAspectRatio="xMidYMid"
>
<defs>
<style>
{`
.cls-3 {
fill: url(#linear-gradient-1);
}
.cls-4 {
fill: #fc9502;
}
.cls-5 {
fill: #fce202;
}
`}
</style>
<linearGradient
id="linear-gradient-1"
gradientUnits="userSpaceOnUse"
x1="94.141"
y1="255"
x2="94.141"
y2="0.188"
>
<stop offset="0" stopColor="#ff4c0d" />
<stop offset="1" stopColor="#fc9502" />
</linearGradient>
</defs>
<g id="fire">
<path
d="M187.899,164.809 C185.803,214.868 144.574,254.812 94.000,254.812 C42.085,254.812 -0.000,211.312 -0.000,160.812 C-0.000,154.062 -0.121,140.572 10.000,117.812 C16.057,104.191 19.856,95.634 22.000,87.812 C23.178,83.513 25.469,76.683 32.000,87.812 C35.851,94.374 36.000,103.812 36.000,103.812 C36.000,103.812 50.328,92.817 60.000,71.812 C74.179,41.019 62.866,22.612 59.000,9.812 C57.662,5.384 56.822,-2.574 66.000,0.812 C75.352,4.263 100.076,21.570 113.000,39.812 C131.445,65.847 138.000,90.812 138.000,90.812 C138.000,90.812 143.906,83.482 146.000,75.812 C148.365,67.151 148.400,58.573 155.999,67.813 C163.226,76.600 173.959,93.113 180.000,108.812 C190.969,137.321 187.899,164.809 187.899,164.809 Z"
id="path-1"
className="cls-3"
fillRule="evenodd"
/>
<path
d="M94.000,254.812 C58.101,254.812 29.000,225.711 29.000,189.812 C29.000,168.151 37.729,155.000 55.896,137.166 C67.528,125.747 78.415,111.722 83.042,102.172 C83.953,100.292 86.026,90.495 94.019,101.966 C98.212,107.982 104.785,118.681 109.000,127.812 C116.266,143.555 118.000,158.812 118.000,158.812 C118.000,158.812 125.121,154.616 130.000,143.812 C131.573,140.330 134.753,127.148 143.643,140.328 C150.166,150.000 159.127,167.390 159.000,189.812 C159.000,225.711 129.898,254.812 94.000,254.812 Z"
id="path-2"
className="cls-4"
fillRule="evenodd"
/>
<path
d="M95.000,183.812 C104.250,183.812 104.250,200.941 116.000,223.812 C123.824,239.041 112.121,254.812 95.000,254.812 C77.879,254.812 69.000,240.933 69.000,223.812 C69.000,206.692 85.750,183.812 95.000,183.812 Z"
id="path-3"
className="cls-5"
fillRule="evenodd"
/>
</g>
</svg>
);
export default SvgFirecrawlLogo;

View file

@ -0,0 +1,28 @@
<svg width="800px" height="800px" viewBox="-33 0 255 255" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" preserveAspectRatio="xMidYMid">
<defs>
<style>
.cls-3 {
fill: url(#linear-gradient-1);
}
.cls-4 {
fill: #fc9502;
}
.cls-5 {
fill: #fce202;
}
</style>
<linearGradient id="linear-gradient-1" gradientUnits="userSpaceOnUse" x1="94.141" y1="255" x2="94.141" y2="0.188">
<stop offset="0" stop-color="#ff4c0d"/>
<stop offset="1" stop-color="#fc9502"/>
</linearGradient>
</defs>
<g id="fire">
<path d="M187.899,164.809 C185.803,214.868 144.574,254.812 94.000,254.812 C42.085,254.812 -0.000,211.312 -0.000,160.812 C-0.000,154.062 -0.121,140.572 10.000,117.812 C16.057,104.191 19.856,95.634 22.000,87.812 C23.178,83.513 25.469,76.683 32.000,87.812 C35.851,94.374 36.000,103.812 36.000,103.812 C36.000,103.812 50.328,92.817 60.000,71.812 C74.179,41.019 62.866,22.612 59.000,9.812 C57.662,5.384 56.822,-2.574 66.000,0.812 C75.352,4.263 100.076,21.570 113.000,39.812 C131.445,65.847 138.000,90.812 138.000,90.812 C138.000,90.812 143.906,83.482 146.000,75.812 C148.365,67.151 148.400,58.573 155.999,67.813 C163.226,76.600 173.959,93.113 180.000,108.812 C190.969,137.321 187.899,164.809 187.899,164.809 Z" id="path-1" class="cls-3" fill-rule="evenodd"/>
<path d="M94.000,254.812 C58.101,254.812 29.000,225.711 29.000,189.812 C29.000,168.151 37.729,155.000 55.896,137.166 C67.528,125.747 78.415,111.722 83.042,102.172 C83.953,100.292 86.026,90.495 94.019,101.966 C98.212,107.982 104.785,118.681 109.000,127.812 C116.266,143.555 118.000,158.812 118.000,158.812 C118.000,158.812 125.121,154.616 130.000,143.812 C131.573,140.330 134.753,127.148 143.643,140.328 C150.166,150.000 159.127,167.390 159.000,189.812 C159.000,225.711 129.898,254.812 94.000,254.812 Z" id="path-2" class="cls-4" fill-rule="evenodd"/>
<path d="M95.000,183.812 C104.250,183.812 104.250,200.941 116.000,223.812 C123.824,239.041 112.121,254.812 95.000,254.812 C77.879,254.812 69.000,240.933 69.000,223.812 C69.000,206.692 85.750,183.812 95.000,183.812 Z" id="path-3" class="cls-5" fill-rule="evenodd"/>
</g>
</svg>

After

Width:  |  Height:  |  Size: 2.2 KiB

View file

@ -0,0 +1,9 @@
import React, { forwardRef } from "react";
import SvgFirecrawlLogo from "./FirecrawlLogo";
export const FirecrawlIcon = forwardRef<
SVGSVGElement,
React.PropsWithChildren<{}>
>((props, ref) => {
return <SvgFirecrawlLogo ref={ref} {...props} />;
});

View file

@ -171,6 +171,7 @@ import { CouchbaseIcon } from "../icons/Couchbase";
import { ElasticsearchIcon } from "../icons/ElasticsearchStore";
import { EvernoteIcon } from "../icons/Evernote";
import { FBIcon } from "../icons/FacebookMessenger";
import { FirecrawlIcon } from "../icons/Firecrawl";
import { GitBookIcon } from "../icons/GitBook";
import { GoogleIcon } from "../icons/Google";
import { GoogleGenerativeAIIcon } from "../icons/GoogleGenerativeAI";
@ -363,6 +364,8 @@ export const nodeIconsLucide: iconsType = {
CohereEmbeddings: CohereIcon,
EverNoteLoader: EvernoteIcon,
FacebookChatLoader: FBIcon,
FirecrawlCrawlApi: FirecrawlIcon,
FirecrawlScrapeApi: FirecrawlIcon,
GitbookLoader: GitBookIcon,
GoogleSearchAPIWrapper: GoogleIcon,
GoogleSearchResults: GoogleIcon,