docs: small updates to astra vectorize docs (#2497)

small updates to vectorize docs

Co-authored-by: Gabriel Luiz Freitas Almeida <gabriel@langflow.org>
This commit is contained in:
Jordan Frazier 2024-07-04 11:25:58 -07:00 committed by GitHub
commit a933139927
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 39 additions and 36 deletions

View file

@ -51,37 +51,38 @@ class AstraVectorize(Component):
inputs = [
DropdownInput(
name="provider",
display_name="Provider name",
display_name="Provider",
options=VECTORIZE_PROVIDERS_MAPPING.keys(),
value="",
),
MessageTextInput(
name="model_name",
display_name="Model name",
info=f"The embedding model to use for the selected provider. Each provider has a different set of models "
f"available (full list at https://docs.datastax.com/en/astra-db-serverless/databases/embedding-generation.html):\n\n{VECTORIZE_MODELS_STR}",
required=True,
),
MessageTextInput(
name="api_key_name",
display_name="API Key name",
info="The name of the embeddings provider API key stored on Astra. If set, it will override the 'ProviderKey' in the authentication parameters.",
),
DictInput(
name="authentication",
display_name="Authentication parameters",
is_list=True,
advanced=True,
name="model_name",
display_name="Model Name",
info=f"The embedding model to use for the selected provider. Each provider has a different set of models "
f"available (https://docs.datastax.com/en/astra-db-serverless/databases/embedding-generation.html):\n\n{VECTORIZE_MODELS_STR}",
required=True,
),
SecretStrInput(
name="provider_api_key",
display_name="Provider API Key",
info="An alternative to the Astra Authentication that let you use directly the API key of the provider.",
info="An alternative to the Astra Authentication that passes an API key for the provider with each request to Astra DB. This may be used when Vectorize is configured for the collection, but no corresponding provider secret is stored within Astra's key management system.",
),
MessageTextInput(
name="api_key_name",
display_name="Provider API Key Name",
info="The name of the embeddings provider API key stored on Astra. If set, it will override the 'ProviderKey' in the authentication parameters.",
advanced=True,
),
DictInput(
name="authentication",
display_name="Authentication Parameters",
is_list=True,
advanced=True,
),
DictInput(
name="model_parameters",
display_name="Model parameters",
display_name="Model Parameters",
advanced=True,
is_list=True,
),
@ -97,7 +98,7 @@ class AstraVectorize(Component):
if api_key_name:
authentication["providerKey"] = api_key_name
return {
# must match exactly astra CollectionVectorServiceOptions
# must match astrapy.info.CollectionVectorServiceOptions
"collection_vector_service_options": {
"provider": provider_value,
"modelName": self.model_name,

View file

@ -117,6 +117,7 @@ class AstraVectorStoreComponent(LCVectorStoreComponent):
name="embedding",
display_name="Embedding or Astra Vectorize",
input_types=["Embeddings", "dict"],
info="Allows either an embedding model or an Astra Vectorize configuration. If Astra Vectorize is already configured for the collection, this field is not required.",
),
StrInput(
name="metadata_indexing_exclude",
@ -164,6 +165,7 @@ class AstraVectorStoreComponent(LCVectorStoreComponent):
def _build_vector_store_no_ingest(self):
if self._cached_vectorstore:
return self._cached_vectorstore
try:
from langchain_astradb import AstraDBVectorStore
from langchain_astradb.utils.astradb import SetupMode
@ -226,11 +228,6 @@ class AstraVectorStoreComponent(LCVectorStoreComponent):
return vector_store
def build_vector_store(self):
vector_store = self._build_vector_store_no_ingest()
self._add_documents_to_vector_store(vector_store)
return vector_store
def _add_documents_to_vector_store(self, vector_store):
documents = []
for _input in self.ingest_data or []:
@ -256,6 +253,18 @@ class AstraVectorStoreComponent(LCVectorStoreComponent):
else:
return "similarity"
def _build_search_args(self):
args = {
"k": self.number_of_results,
"score_threshold": self.search_score_threshold,
}
if self.search_filter:
clean_filter = {k: v for k, v in self.search_filter.items() if k and v}
if len(clean_filter) > 0:
args["filter"] = clean_filter
return args
def search_documents(self) -> list[Data]:
vector_store = self._build_vector_store_no_ingest()
self._add_documents_to_vector_store(vector_store)
@ -283,21 +292,14 @@ class AstraVectorStoreComponent(LCVectorStoreComponent):
logger.debug("No search input provided. Skipping search.")
return []
def _build_search_args(self):
args = {
"k": self.number_of_results,
"score_threshold": self.search_score_threshold,
}
if self.search_filter:
clean_filter = {k: v for k, v in self.search_filter.items() if k and v}
if len(clean_filter) > 0:
args["filter"] = clean_filter
return args
def get_retriever_kwargs(self):
search_args = self._build_search_args()
return {
"search_type": self._map_search_type(),
"search_kwargs": search_args,
}
def build_vector_store(self):
vector_store = self._build_vector_store_no_ingest()
self._add_documents_to_vector_store(vector_store)
return vector_store