diff --git a/src/backend/base/langflow/components/embeddings/AstraVectorize.py b/src/backend/base/langflow/components/embeddings/AstraVectorize.py index 7b3feab31..4b2fe3af6 100644 --- a/src/backend/base/langflow/components/embeddings/AstraVectorize.py +++ b/src/backend/base/langflow/components/embeddings/AstraVectorize.py @@ -51,37 +51,38 @@ class AstraVectorize(Component): inputs = [ DropdownInput( name="provider", - display_name="Provider name", + display_name="Provider", options=VECTORIZE_PROVIDERS_MAPPING.keys(), value="", - ), - MessageTextInput( - name="model_name", - display_name="Model name", - info=f"The embedding model to use for the selected provider. Each provider has a different set of models " - f"available (full list at https://docs.datastax.com/en/astra-db-serverless/databases/embedding-generation.html):\n\n{VECTORIZE_MODELS_STR}", required=True, ), MessageTextInput( - name="api_key_name", - display_name="API Key name", - info="The name of the embeddings provider API key stored on Astra. If set, it will override the 'ProviderKey' in the authentication parameters.", - ), - DictInput( - name="authentication", - display_name="Authentication parameters", - is_list=True, - advanced=True, + name="model_name", + display_name="Model Name", + info=f"The embedding model to use for the selected provider. Each provider has a different set of models " + f"available (https://docs.datastax.com/en/astra-db-serverless/databases/embedding-generation.html):\n\n{VECTORIZE_MODELS_STR}", + required=True, ), SecretStrInput( name="provider_api_key", display_name="Provider API Key", - info="An alternative to the Astra Authentication that let you use directly the API key of the provider.", + info="An alternative to the Astra Authentication that passes an API key for the provider with each request to Astra DB. This may be used when Vectorize is configured for the collection, but no corresponding provider secret is stored within Astra's key management system.", + ), + MessageTextInput( + name="api_key_name", + display_name="Provider API Key Name", + info="The name of the embeddings provider API key stored on Astra. If set, it will override the 'ProviderKey' in the authentication parameters.", + advanced=True, + ), + DictInput( + name="authentication", + display_name="Authentication Parameters", + is_list=True, advanced=True, ), DictInput( name="model_parameters", - display_name="Model parameters", + display_name="Model Parameters", advanced=True, is_list=True, ), @@ -97,7 +98,7 @@ class AstraVectorize(Component): if api_key_name: authentication["providerKey"] = api_key_name return { - # must match exactly astra CollectionVectorServiceOptions + # must match astrapy.info.CollectionVectorServiceOptions "collection_vector_service_options": { "provider": provider_value, "modelName": self.model_name, diff --git a/src/backend/base/langflow/components/vectorstores/AstraDB.py b/src/backend/base/langflow/components/vectorstores/AstraDB.py index 6ed4ec7ab..40b578932 100644 --- a/src/backend/base/langflow/components/vectorstores/AstraDB.py +++ b/src/backend/base/langflow/components/vectorstores/AstraDB.py @@ -117,6 +117,7 @@ class AstraVectorStoreComponent(LCVectorStoreComponent): name="embedding", display_name="Embedding or Astra Vectorize", input_types=["Embeddings", "dict"], + info="Allows either an embedding model or an Astra Vectorize configuration. If Astra Vectorize is already configured for the collection, this field is not required.", ), StrInput( name="metadata_indexing_exclude", @@ -164,6 +165,7 @@ class AstraVectorStoreComponent(LCVectorStoreComponent): def _build_vector_store_no_ingest(self): if self._cached_vectorstore: return self._cached_vectorstore + try: from langchain_astradb import AstraDBVectorStore from langchain_astradb.utils.astradb import SetupMode @@ -226,11 +228,6 @@ class AstraVectorStoreComponent(LCVectorStoreComponent): return vector_store - def build_vector_store(self): - vector_store = self._build_vector_store_no_ingest() - self._add_documents_to_vector_store(vector_store) - return vector_store - def _add_documents_to_vector_store(self, vector_store): documents = [] for _input in self.ingest_data or []: @@ -256,6 +253,18 @@ class AstraVectorStoreComponent(LCVectorStoreComponent): else: return "similarity" + def _build_search_args(self): + args = { + "k": self.number_of_results, + "score_threshold": self.search_score_threshold, + } + + if self.search_filter: + clean_filter = {k: v for k, v in self.search_filter.items() if k and v} + if len(clean_filter) > 0: + args["filter"] = clean_filter + return args + def search_documents(self) -> list[Data]: vector_store = self._build_vector_store_no_ingest() self._add_documents_to_vector_store(vector_store) @@ -283,21 +292,14 @@ class AstraVectorStoreComponent(LCVectorStoreComponent): logger.debug("No search input provided. Skipping search.") return [] - def _build_search_args(self): - args = { - "k": self.number_of_results, - "score_threshold": self.search_score_threshold, - } - - if self.search_filter: - clean_filter = {k: v for k, v in self.search_filter.items() if k and v} - if len(clean_filter) > 0: - args["filter"] = clean_filter - return args - def get_retriever_kwargs(self): search_args = self._build_search_args() return { "search_type": self._map_search_type(), "search_kwargs": search_args, } + + def build_vector_store(self): + vector_store = self._build_vector_store_no_ingest() + self._add_documents_to_vector_store(vector_store) + return vector_store