components: simplify astra vectorize (#2370)
* Add ProviderKey as text field since it's the only auth entry available today * Hide authentication and external key as advanced fields * Make provider name and model required * Make provider as dropdown * Add complete list of models in the help (better to not hardcode models for now)
This commit is contained in:
commit
1935a7b2d6
2 changed files with 40 additions and 11 deletions
|
|
@ -1,6 +1,6 @@
|
|||
from typing import Any
|
||||
from langflow.custom import Component
|
||||
from langflow.inputs.inputs import DictInput, SecretStrInput, MessageTextInput
|
||||
from langflow.inputs.inputs import DictInput, SecretStrInput, MessageTextInput, DropdownInput
|
||||
from langflow.template.field.base import Output
|
||||
|
||||
|
||||
|
|
@ -10,32 +10,58 @@ class AstraVectorize(Component):
|
|||
documentation: str = "https://docs.datastax.com/en/astra-db-serverless/databases/embedding-generation.html"
|
||||
icon = "AstraDB"
|
||||
|
||||
VECTORIZE_PROVIDERS_MAPPING = {
|
||||
"Azure OpenAI": ["azureOpenAI", ["text-embedding-3-small", "text-embedding-3-large", "text-embedding-ada-002"]],
|
||||
"Hugging Face - Dedicated": ["huggingfaceDedicated", ["endpoint-defined-model"]],
|
||||
"Hugging Face - Serverless": ["huggingface",
|
||||
["sentence-transformers/all-MiniLM-L6-v2", "intfloat/multilingual-e5-large",
|
||||
"intfloat/multilingual-e5-large-instruct", "BAAI/bge-small-en-v1.5",
|
||||
"BAAI/bge-base-en-v1.5", "BAAI/bge-large-en-v1.5"]],
|
||||
"Jina AI": ["jinaAI", ["jina-embeddings-v2-base-en", "jina-embeddings-v2-base-de", "jina-embeddings-v2-base-es",
|
||||
"jina-embeddings-v2-base-code", "jina-embeddings-v2-base-zh"]],
|
||||
"Mistral AI": ["mistral", ["mistral-embed"]],
|
||||
"NVIDIA": ["nvidia", ["NV-Embed-QA"]],
|
||||
"OpenAI": ["openai", ["text-embedding-3-small", "text-embedding-3-large", "text-embedding-ada-002"]],
|
||||
"Upstage": ["upstageAI", ["solar-embedding-1-large"]],
|
||||
"Voyage AI": ["voyageAI",
|
||||
["voyage-large-2-instruct", "voyage-law-2", "voyage-code-2", "voyage-large-2", "voyage-2"]]
|
||||
}
|
||||
VECTORIZE_MODELS_STR = "\n\n".join([provider + ": " + (', '.join(models[1])) for provider, models in VECTORIZE_PROVIDERS_MAPPING.items()])
|
||||
|
||||
inputs = [
|
||||
MessageTextInput(
|
||||
DropdownInput(
|
||||
name="provider",
|
||||
display_name="Provider name",
|
||||
info="The embedding provider to use.",
|
||||
options=VECTORIZE_PROVIDERS_MAPPING.keys(),
|
||||
value="",
|
||||
),
|
||||
MessageTextInput(
|
||||
name="model_name",
|
||||
display_name="Model name",
|
||||
info="The embedding model to use.",
|
||||
info=f"The embedding model to use for the selected provider. Each provider has a different set of models "
|
||||
f"available (full list at https://docs.datastax.com/en/astra-db-serverless/databases/embedding-generation.html):\n\n{VECTORIZE_MODELS_STR}",
|
||||
required=True
|
||||
),
|
||||
MessageTextInput(
|
||||
name="api_key_name",
|
||||
display_name="API Key name",
|
||||
info="The name of the embeddings provider API key stored on Astra. If set, it will override the 'ProviderKey' in the authentication parameters."
|
||||
),
|
||||
DictInput(
|
||||
name="authentication",
|
||||
display_name="Authentication",
|
||||
info="Authentication parameters. Use the Astra Portal to add the embedding provider integration to your Astra organization.",
|
||||
display_name="Authentication parameters",
|
||||
is_list=True,
|
||||
advanced=True,
|
||||
),
|
||||
SecretStrInput(
|
||||
name="provider_api_key",
|
||||
display_name="Provider API Key",
|
||||
info="An alternative to the Astra Authentication that let you use directly the API key of the provider.",
|
||||
advanced=True,
|
||||
),
|
||||
DictInput(
|
||||
name="model_parameters",
|
||||
display_name="Model parameters",
|
||||
info="Additional model parameters.",
|
||||
advanced=True,
|
||||
is_list=True,
|
||||
),
|
||||
|
|
@ -45,12 +71,17 @@ class AstraVectorize(Component):
|
|||
]
|
||||
|
||||
def build_options(self) -> dict[str, Any]:
|
||||
provider_value = self.VECTORIZE_PROVIDERS_MAPPING[self.provider][0]
|
||||
authentication = {**self.authentication}
|
||||
api_key_name = self.api_key_name
|
||||
if api_key_name:
|
||||
authentication["providerKey"] = api_key_name
|
||||
return {
|
||||
# must match exactly astra CollectionVectorServiceOptions
|
||||
"collection_vector_service_options": {
|
||||
"provider": self.provider,
|
||||
"provider": provider_value,
|
||||
"modelName": self.model_name,
|
||||
"authentication": self.authentication,
|
||||
"authentication": authentication,
|
||||
"parameters": self.model_parameters,
|
||||
},
|
||||
"collection_embedding_api_key": self.provider_api_key,
|
||||
|
|
|
|||
|
|
@ -2,8 +2,6 @@ from typing import Optional
|
|||
from firecrawl.firecrawl import FirecrawlApp
|
||||
from langflow.custom import CustomComponent
|
||||
from langflow.schema import Data
|
||||
from langflow.services.database.models.base import orjson_dumps
|
||||
import json
|
||||
|
||||
class FirecrawlScrapeApi(CustomComponent):
|
||||
display_name: str = "FirecrawlScrapeApi"
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue