diff --git a/docs/docs/deployment/kubernetes.mdx b/docs/docs/deployment/kubernetes.md similarity index 98% rename from docs/docs/deployment/kubernetes.mdx rename to docs/docs/deployment/kubernetes.md index 8896ab875..8648354e2 100644 --- a/docs/docs/deployment/kubernetes.mdx +++ b/docs/docs/deployment/kubernetes.md @@ -1,11 +1,5 @@ -import Admonition from "@theme/Admonition"; - # Kubernetes - -This page may contain outdated information. It will be updated as soon as possible. - - This guide will help you get LangFlow up and running in Kubernetes cluster, including the following steps: - Install [LangFlow as IDE](#langflow-ide) in a Kubernetes cluster (for development) diff --git a/src/backend/base/langflow/components/embeddings/AstraVectorize.py b/src/backend/base/langflow/components/embeddings/AstraVectorize.py index 8c9e6d974..00b1a9a63 100644 --- a/src/backend/base/langflow/components/embeddings/AstraVectorize.py +++ b/src/backend/base/langflow/components/embeddings/AstraVectorize.py @@ -1,6 +1,6 @@ from typing import Any from langflow.custom import Component -from langflow.inputs.inputs import DictInput, SecretStrInput, MessageTextInput +from langflow.inputs.inputs import DictInput, SecretStrInput, MessageTextInput, DropdownInput from langflow.template.field.base import Output @@ -10,32 +10,58 @@ class AstraVectorize(Component): documentation: str = "https://docs.datastax.com/en/astra-db-serverless/databases/embedding-generation.html" icon = "AstraDB" + VECTORIZE_PROVIDERS_MAPPING = { + "Azure OpenAI": ["azureOpenAI", ["text-embedding-3-small", "text-embedding-3-large", "text-embedding-ada-002"]], + "Hugging Face - Dedicated": ["huggingfaceDedicated", ["endpoint-defined-model"]], + "Hugging Face - Serverless": ["huggingface", + ["sentence-transformers/all-MiniLM-L6-v2", "intfloat/multilingual-e5-large", + "intfloat/multilingual-e5-large-instruct", "BAAI/bge-small-en-v1.5", + "BAAI/bge-base-en-v1.5", "BAAI/bge-large-en-v1.5"]], + "Jina AI": ["jinaAI", ["jina-embeddings-v2-base-en", "jina-embeddings-v2-base-de", "jina-embeddings-v2-base-es", + "jina-embeddings-v2-base-code", "jina-embeddings-v2-base-zh"]], + "Mistral AI": ["mistral", ["mistral-embed"]], + "NVIDIA": ["nvidia", ["NV-Embed-QA"]], + "OpenAI": ["openai", ["text-embedding-3-small", "text-embedding-3-large", "text-embedding-ada-002"]], + "Upstage": ["upstageAI", ["solar-embedding-1-large"]], + "Voyage AI": ["voyageAI", + ["voyage-large-2-instruct", "voyage-law-2", "voyage-code-2", "voyage-large-2", "voyage-2"]] + } + VECTORIZE_MODELS_STR = "\n\n".join([provider + ": " + (', '.join(models[1])) for provider, models in VECTORIZE_PROVIDERS_MAPPING.items()]) + inputs = [ - MessageTextInput( + DropdownInput( name="provider", display_name="Provider name", - info="The embedding provider to use.", + options=VECTORIZE_PROVIDERS_MAPPING.keys(), + value="", ), MessageTextInput( name="model_name", display_name="Model name", - info="The embedding model to use.", + info=f"The embedding model to use for the selected provider. Each provider has a different set of models " + f"available (full list at https://docs.datastax.com/en/astra-db-serverless/databases/embedding-generation.html):\n\n{VECTORIZE_MODELS_STR}", + required=True + ), + MessageTextInput( + name="api_key_name", + display_name="API Key name", + info="The name of the embeddings provider API key stored on Astra. If set, it will override the 'ProviderKey' in the authentication parameters." ), DictInput( name="authentication", - display_name="Authentication", - info="Authentication parameters. Use the Astra Portal to add the embedding provider integration to your Astra organization.", + display_name="Authentication parameters", is_list=True, + advanced=True, ), SecretStrInput( name="provider_api_key", display_name="Provider API Key", info="An alternative to the Astra Authentication that let you use directly the API key of the provider.", + advanced=True, ), DictInput( name="model_parameters", display_name="Model parameters", - info="Additional model parameters.", advanced=True, is_list=True, ), @@ -45,12 +71,17 @@ class AstraVectorize(Component): ] def build_options(self) -> dict[str, Any]: + provider_value = self.VECTORIZE_PROVIDERS_MAPPING[self.provider][0] + authentication = {**self.authentication} + api_key_name = self.api_key_name + if api_key_name: + authentication["providerKey"] = api_key_name return { # must match exactly astra CollectionVectorServiceOptions "collection_vector_service_options": { - "provider": self.provider, + "provider": provider_value, "modelName": self.model_name, - "authentication": self.authentication, + "authentication": authentication, "parameters": self.model_parameters, }, "collection_embedding_api_key": self.provider_api_key, diff --git a/src/backend/base/langflow/components/langchain_utilities/FirecrawlScrapeApi.py b/src/backend/base/langflow/components/langchain_utilities/FirecrawlScrapeApi.py index 5cdfc82ba..277cbfb93 100644 --- a/src/backend/base/langflow/components/langchain_utilities/FirecrawlScrapeApi.py +++ b/src/backend/base/langflow/components/langchain_utilities/FirecrawlScrapeApi.py @@ -2,8 +2,6 @@ from typing import Optional from firecrawl.firecrawl import FirecrawlApp from langflow.custom import CustomComponent from langflow.schema import Data -from langflow.services.database.models.base import orjson_dumps -import json class FirecrawlScrapeApi(CustomComponent): display_name: str = "FirecrawlScrapeApi" diff --git a/src/backend/base/langflow/components/vectorstores/Cassandra.py b/src/backend/base/langflow/components/vectorstores/Cassandra.py index 0e4e9eb84..14ca572f4 100644 --- a/src/backend/base/langflow/components/vectorstores/Cassandra.py +++ b/src/backend/base/langflow/components/vectorstores/Cassandra.py @@ -4,6 +4,7 @@ from langchain_community.vectorstores import Cassandra from langflow.base.vectorstores.model import LCVectorStoreComponent from langflow.helpers.data import docs_to_data +from langflow.inputs import DictInput from langflow.io import ( DataInput, DropdownInput, @@ -23,24 +24,30 @@ class CassandraVectorStoreComponent(LCVectorStoreComponent): icon = "Cassandra" inputs = [ + MessageTextInput(name="database_ref", + display_name="Contact Points / Astra Database ID", + info="Contact points for the database (or AstraDB database ID)", + required=True), + MessageTextInput(name="username", + display_name="Username", + info="Username for the database (leave empty for AstraDB)."), SecretStrInput( name="token", - display_name="Token", - info="Authentication token for accessing Cassandra on Astra DB.", - required=True, - ), - MessageTextInput(name="database_id", display_name="Database ID", info="The Astra database ID.", required=True), - MessageTextInput( - name="table_name", - display_name="Table Name", - info="The name of the table where vectors will be stored.", - required=True, + display_name="Password / AstraDB Token", + info="User password for the database (or AstraDB token).", + required=True ), MessageTextInput( name="keyspace", display_name="Keyspace", - info="Optional key space within Astra DB. The keyspace should already be created.", - advanced=False, + info="Table Keyspace (or AstraDB namespace).", + required=True, + ), + MessageTextInput( + name="table_name", + display_name="Table Name", + info="The name of the table (or AstraDB collection) where vectors will be stored.", + required=True, ), IntInput( name="ttl_seconds", @@ -69,6 +76,13 @@ class CassandraVectorStoreComponent(LCVectorStoreComponent): value="Sync", advanced=True, ), + DictInput( + name="cluster_kwargs", + display_name="Cluster arguments", + info="Optional dictionary of additional keyword arguments for the Cassandra cluster.", + advanced=True, + is_list=True + ), MultilineInput(name="search_query", display_name="Search Query"), DataInput( name="ingest_data", @@ -96,10 +110,35 @@ class CassandraVectorStoreComponent(LCVectorStoreComponent): "Could not import cassio integration package. " "Please install it with `pip install cassio`." ) - cassio.init( - database_id=self.database_id, - token=self.token, - ) + from uuid import UUID + + database_ref = self.database_ref + + try: + UUID(self.database_ref) + is_astra = True + except ValueError: + is_astra = False + if "," in self.database_ref: + # use a copy because we can't change the type of the parameter + database_ref = self.database_ref.split(",") + + if is_astra: + cassio.init( + database_id=database_ref, + token=self.token, + cluster_kwargs=self.cluster_kwargs, + ) + else: + cassio.init( + contact_points=database_ref, + username=self.username, + password=self.token, + cluster_kwargs=self.cluster_kwargs, + ) + + if not self.ttl_seconds: + self.ttl_seconds = None documents = []