Merge branch 'main' into fix_ref_main

This commit is contained in:
Gabriel Luiz Freitas Almeida 2024-06-26 08:12:02 -07:00 committed by GitHub
commit 8456d58d9d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 95 additions and 33 deletions

View file

@ -1,11 +1,5 @@
import Admonition from "@theme/Admonition";
# Kubernetes
<Admonition type="warning" title="warning">
This page may contain outdated information. It will be updated as soon as possible.
</Admonition>
This guide will help you get LangFlow up and running in Kubernetes cluster, including the following steps:
- Install [LangFlow as IDE](#langflow-ide) in a Kubernetes cluster (for development)

View file

@ -1,6 +1,6 @@
from typing import Any
from langflow.custom import Component
from langflow.inputs.inputs import DictInput, SecretStrInput, MessageTextInput
from langflow.inputs.inputs import DictInput, SecretStrInput, MessageTextInput, DropdownInput
from langflow.template.field.base import Output
@ -10,32 +10,58 @@ class AstraVectorize(Component):
documentation: str = "https://docs.datastax.com/en/astra-db-serverless/databases/embedding-generation.html"
icon = "AstraDB"
VECTORIZE_PROVIDERS_MAPPING = {
"Azure OpenAI": ["azureOpenAI", ["text-embedding-3-small", "text-embedding-3-large", "text-embedding-ada-002"]],
"Hugging Face - Dedicated": ["huggingfaceDedicated", ["endpoint-defined-model"]],
"Hugging Face - Serverless": ["huggingface",
["sentence-transformers/all-MiniLM-L6-v2", "intfloat/multilingual-e5-large",
"intfloat/multilingual-e5-large-instruct", "BAAI/bge-small-en-v1.5",
"BAAI/bge-base-en-v1.5", "BAAI/bge-large-en-v1.5"]],
"Jina AI": ["jinaAI", ["jina-embeddings-v2-base-en", "jina-embeddings-v2-base-de", "jina-embeddings-v2-base-es",
"jina-embeddings-v2-base-code", "jina-embeddings-v2-base-zh"]],
"Mistral AI": ["mistral", ["mistral-embed"]],
"NVIDIA": ["nvidia", ["NV-Embed-QA"]],
"OpenAI": ["openai", ["text-embedding-3-small", "text-embedding-3-large", "text-embedding-ada-002"]],
"Upstage": ["upstageAI", ["solar-embedding-1-large"]],
"Voyage AI": ["voyageAI",
["voyage-large-2-instruct", "voyage-law-2", "voyage-code-2", "voyage-large-2", "voyage-2"]]
}
VECTORIZE_MODELS_STR = "\n\n".join([provider + ": " + (', '.join(models[1])) for provider, models in VECTORIZE_PROVIDERS_MAPPING.items()])
inputs = [
MessageTextInput(
DropdownInput(
name="provider",
display_name="Provider name",
info="The embedding provider to use.",
options=VECTORIZE_PROVIDERS_MAPPING.keys(),
value="",
),
MessageTextInput(
name="model_name",
display_name="Model name",
info="The embedding model to use.",
info=f"The embedding model to use for the selected provider. Each provider has a different set of models "
f"available (full list at https://docs.datastax.com/en/astra-db-serverless/databases/embedding-generation.html):\n\n{VECTORIZE_MODELS_STR}",
required=True
),
MessageTextInput(
name="api_key_name",
display_name="API Key name",
info="The name of the embeddings provider API key stored on Astra. If set, it will override the 'ProviderKey' in the authentication parameters."
),
DictInput(
name="authentication",
display_name="Authentication",
info="Authentication parameters. Use the Astra Portal to add the embedding provider integration to your Astra organization.",
display_name="Authentication parameters",
is_list=True,
advanced=True,
),
SecretStrInput(
name="provider_api_key",
display_name="Provider API Key",
info="An alternative to the Astra Authentication that let you use directly the API key of the provider.",
advanced=True,
),
DictInput(
name="model_parameters",
display_name="Model parameters",
info="Additional model parameters.",
advanced=True,
is_list=True,
),
@ -45,12 +71,17 @@ class AstraVectorize(Component):
]
def build_options(self) -> dict[str, Any]:
provider_value = self.VECTORIZE_PROVIDERS_MAPPING[self.provider][0]
authentication = {**self.authentication}
api_key_name = self.api_key_name
if api_key_name:
authentication["providerKey"] = api_key_name
return {
# must match exactly astra CollectionVectorServiceOptions
"collection_vector_service_options": {
"provider": self.provider,
"provider": provider_value,
"modelName": self.model_name,
"authentication": self.authentication,
"authentication": authentication,
"parameters": self.model_parameters,
},
"collection_embedding_api_key": self.provider_api_key,

View file

@ -2,8 +2,6 @@ from typing import Optional
from firecrawl.firecrawl import FirecrawlApp
from langflow.custom import CustomComponent
from langflow.schema import Data
from langflow.services.database.models.base import orjson_dumps
import json
class FirecrawlScrapeApi(CustomComponent):
display_name: str = "FirecrawlScrapeApi"

View file

@ -4,6 +4,7 @@ from langchain_community.vectorstores import Cassandra
from langflow.base.vectorstores.model import LCVectorStoreComponent
from langflow.helpers.data import docs_to_data
from langflow.inputs import DictInput
from langflow.io import (
DataInput,
DropdownInput,
@ -23,24 +24,30 @@ class CassandraVectorStoreComponent(LCVectorStoreComponent):
icon = "Cassandra"
inputs = [
MessageTextInput(name="database_ref",
display_name="Contact Points / Astra Database ID",
info="Contact points for the database (or AstraDB database ID)",
required=True),
MessageTextInput(name="username",
display_name="Username",
info="Username for the database (leave empty for AstraDB)."),
SecretStrInput(
name="token",
display_name="Token",
info="Authentication token for accessing Cassandra on Astra DB.",
required=True,
),
MessageTextInput(name="database_id", display_name="Database ID", info="The Astra database ID.", required=True),
MessageTextInput(
name="table_name",
display_name="Table Name",
info="The name of the table where vectors will be stored.",
required=True,
display_name="Password / AstraDB Token",
info="User password for the database (or AstraDB token).",
required=True
),
MessageTextInput(
name="keyspace",
display_name="Keyspace",
info="Optional key space within Astra DB. The keyspace should already be created.",
advanced=False,
info="Table Keyspace (or AstraDB namespace).",
required=True,
),
MessageTextInput(
name="table_name",
display_name="Table Name",
info="The name of the table (or AstraDB collection) where vectors will be stored.",
required=True,
),
IntInput(
name="ttl_seconds",
@ -69,6 +76,13 @@ class CassandraVectorStoreComponent(LCVectorStoreComponent):
value="Sync",
advanced=True,
),
DictInput(
name="cluster_kwargs",
display_name="Cluster arguments",
info="Optional dictionary of additional keyword arguments for the Cassandra cluster.",
advanced=True,
is_list=True
),
MultilineInput(name="search_query", display_name="Search Query"),
DataInput(
name="ingest_data",
@ -96,10 +110,35 @@ class CassandraVectorStoreComponent(LCVectorStoreComponent):
"Could not import cassio integration package. " "Please install it with `pip install cassio`."
)
cassio.init(
database_id=self.database_id,
token=self.token,
)
from uuid import UUID
database_ref = self.database_ref
try:
UUID(self.database_ref)
is_astra = True
except ValueError:
is_astra = False
if "," in self.database_ref:
# use a copy because we can't change the type of the parameter
database_ref = self.database_ref.split(",")
if is_astra:
cassio.init(
database_id=database_ref,
token=self.token,
cluster_kwargs=self.cluster_kwargs,
)
else:
cassio.init(
contact_points=database_ref,
username=self.username,
password=self.token,
cluster_kwargs=self.cluster_kwargs,
)
if not self.ttl_seconds:
self.ttl_seconds = None
documents = []