Refactor AstraDBSearchComponent parameters and add informative descriptions

This commit is contained in:
Gabriel Luiz Freitas Almeida 2024-03-29 15:11:42 -03:00
commit 84fb678b2f
2 changed files with 81 additions and 34 deletions

View file

@ -23,51 +23,73 @@ class AstraDBSearchComponent(LCVectorStoreComponent):
"info": "Input value to search",
},
"embedding": {"display_name": "Embedding", "info": "Embedding to use"},
"collection_name": {"display_name": "Collection Name", "info": "Collection name"},
"token": {"display_name": "Token", "info": "Token to use", "password": True},
"api_endpoint": {"display_name": "API Endpoint", "info": "API Endpoint to use"},
"namespace": {"display_name": "Namespace", "info": "Namespace to use", "advanced": True},
"metric": {"display_name": "Metric", "info": "Metric to use", "advanced": True},
"batch_size": {"display_name": "Batch Size", "info": "Batch size to use", "advanced": True},
"collection_name": {
"display_name": "Collection Name",
"info": "The name of the collection within AstraDB where the vectors will be stored.",
},
"token": {
"display_name": "Token",
"info": "Authentication token for accessing AstraDB.",
"password": True,
},
"api_endpoint": {
"display_name": "API Endpoint",
"info": "API endpoint URL for the AstraDB service.",
},
"namespace": {
"display_name": "Namespace",
"info": "Optional namespace within AstraDB to use for the collection.",
"advanced": True,
},
"metric": {
"display_name": "Metric",
"info": "Optional distance metric for vector comparisons in the vector store.",
"advanced": True,
},
"batch_size": {
"display_name": "Batch Size",
"info": "Optional number of records to process in a single batch.",
"advanced": True,
},
"bulk_insert_batch_concurrency": {
"display_name": "Bulk Insert Batch Concurrency",
"info": "Bulk Insert Batch Concurrency to use",
"info": "Optional concurrency level for bulk insert operations.",
"advanced": True,
},
"bulk_insert_overwrite_concurrency": {
"display_name": "Bulk Insert Overwrite Concurrency",
"info": "Bulk Insert Overwrite Concurrency to use",
"info": "Optional concurrency level for bulk insert operations that overwrite existing records.",
"advanced": True,
},
"bulk_delete_concurrency": {
"display_name": "Bulk Delete Concurrency",
"info": "Bulk Delete Concurrency to use",
"info": "Optional concurrency level for bulk delete operations.",
"advanced": True,
},
"setup_mode": {
"display_name": "Setup Mode",
"info": "Setup mode for the vector store",
"info": "Configuration mode for setting up the vector store, with options like “Sync”, “Async”, or “Off”.",
"options": ["Sync", "Async", "Off"],
"advanced": True,
},
"pre_delete_collection": {
"display_name": "Pre Delete Collection",
"info": "Pre delete collection",
"info": "Boolean flag to determine whether to delete the collection before creating a new one.",
"advanced": True,
},
"metadata_indexing_include": {
"display_name": "Metadata Indexing Include",
"info": "Metadata Indexing Include",
"info": "Optional list of metadata fields to include in the indexing.",
"advanced": True,
},
"metadata_indexing_exclude": {
"display_name": "Metadata Indexing Exclude",
"info": "Metadata Indexing Exclude",
"info": "Optional list of metadata fields to exclude from the indexing.",
"advanced": True,
},
"collection_indexing_policy": {
"display_name": "Collection Indexing Policy",
"info": "Collection Indexing Policy",
"info": "Optional dictionary defining the indexing policy for the collection.",
"advanced": True,
},
}
@ -77,9 +99,9 @@ class AstraDBSearchComponent(LCVectorStoreComponent):
embedding: Embeddings,
collection_name: str,
input_value: Text,
token: str,
api_endpoint: str,
search_type: str = "Similarity",
token: Optional[str] = None,
api_endpoint: Optional[str] = None,
namespace: Optional[str] = None,
metric: Optional[str] = None,
batch_size: Optional[int] = None,

View file

@ -16,53 +16,78 @@ class AstraDBVectorStoreComponent(CustomComponent):
def build_config(self):
return {
"inputs": {"display_name": "Inputs", "info": "Inputs to AstraDB"},
"inputs": {
"display_name": "Inputs",
"info": "Optional list of records to be processed and stored in the vector store.",
},
"embedding": {"display_name": "Embedding", "info": "Embedding to use"},
"collection_name": {"display_name": "Collection Name", "info": "Collection name"},
"token": {"display_name": "Token", "info": "Token to use", "password": True},
"api_endpoint": {"display_name": "API Endpoint", "info": "API Endpoint to use"},
"namespace": {"display_name": "Namespace", "info": "Namespace to use", "advanced": True},
"metric": {"display_name": "Metric", "info": "Metric to use", "advanced": True},
"batch_size": {"display_name": "Batch Size", "info": "Batch size to use", "advanced": True},
"collection_name": {
"display_name": "Collection Name",
"info": "The name of the collection within AstraDB where the vectors will be stored.",
},
"token": {
"display_name": "Token",
"info": "Authentication token for accessing AstraDB.",
"password": True,
},
"api_endpoint": {
"display_name": "API Endpoint",
"info": "API endpoint URL for the AstraDB service.",
},
"namespace": {
"display_name": "Namespace",
"info": "Optional namespace within AstraDB to use for the collection.",
"advanced": True,
},
"metric": {
"display_name": "Metric",
"info": "Optional distance metric for vector comparisons in the vector store.",
"advanced": True,
},
"batch_size": {
"display_name": "Batch Size",
"info": "Optional number of records to process in a single batch.",
"advanced": True,
},
"bulk_insert_batch_concurrency": {
"display_name": "Bulk Insert Batch Concurrency",
"info": "Bulk Insert Batch Concurrency to use",
"info": "Optional concurrency level for bulk insert operations.",
"advanced": True,
},
"bulk_insert_overwrite_concurrency": {
"display_name": "Bulk Insert Overwrite Concurrency",
"info": "Bulk Insert Overwrite Concurrency to use",
"info": "Optional concurrency level for bulk insert operations that overwrite existing records.",
"advanced": True,
},
"bulk_delete_concurrency": {
"display_name": "Bulk Delete Concurrency",
"info": "Bulk Delete Concurrency to use",
"info": "Optional concurrency level for bulk delete operations.",
"advanced": True,
},
"setup_mode": {
"display_name": "Setup Mode",
"info": "Setup mode for the vector store",
"info": "Configuration mode for setting up the vector store, with options like “Sync”, “Async”, or “Off”.",
"options": ["Sync", "Async", "Off"],
"advanced": True,
},
"pre_delete_collection": {
"display_name": "Pre Delete Collection",
"info": "Pre delete collection",
"info": "Boolean flag to determine whether to delete the collection before creating a new one.",
"advanced": True,
},
"metadata_indexing_include": {
"display_name": "Metadata Indexing Include",
"info": "Metadata Indexing Include",
"info": "Optional list of metadata fields to include in the indexing.",
"advanced": True,
},
"metadata_indexing_exclude": {
"display_name": "Metadata Indexing Exclude",
"info": "Metadata Indexing Exclude",
"info": "Optional list of metadata fields to exclude from the indexing.",
"advanced": True,
},
"collection_indexing_policy": {
"display_name": "Collection Indexing Policy",
"info": "Collection Indexing Policy",
"info": "Optional dictionary defining the indexing policy for the collection.",
"advanced": True,
},
}
@ -70,17 +95,17 @@ class AstraDBVectorStoreComponent(CustomComponent):
def build(
self,
embedding: Embeddings,
token: str,
api_endpoint: str,
collection_name: str,
inputs: Optional[List[Record]] = None,
token: Optional[str] = None,
api_endpoint: Optional[str] = None,
namespace: Optional[str] = None,
metric: Optional[str] = None,
batch_size: Optional[int] = None,
bulk_insert_batch_concurrency: Optional[int] = None,
bulk_insert_overwrite_concurrency: Optional[int] = None,
bulk_delete_concurrency: Optional[int] = None,
setup_mode: str = "Sync",
setup_mode: str = "Async",
pre_delete_collection: bool = False,
metadata_indexing_include: Optional[List[str]] = None,
metadata_indexing_exclude: Optional[List[str]] = None,