From c902fb9e1113c16b30e0e58fd35ca05db81275e6 Mon Sep 17 00:00:00 2001 From: Deon Sanchez <69873175+deon-sanchez@users.noreply.github.com> Date: Fri, 14 Feb 2025 19:10:12 -0700 Subject: [PATCH] feat: Generic Callback Dialog Input for Custom Component (#6236) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * force dialog * Reimplement backend dialog * Update astradb.py * Clean up dropdown options * Remove unused import * [autofix.ci] apply automated fixes * Update astradb.py * Ruff fixes * Update Vector Store RAG.json * [autofix.ci] apply automated fixes * fix: Conditionally render custom option dialog in dropdown * ✨ (NodeDialogComponent/index.tsx): Add support for passing 'name' prop to NodeDialog component to improve customization and flexibility 📝 (NodeDialogComponent/index.tsx): Update comments and remove unused import to improve code readability and maintainability 🔧 (dropdownComponent/index.tsx): Pass 'name' prop to Dropdown component to enhance customization and flexibility * ✨ Refactor NodeDialog component to improve state management and payload handling * Update astradb.py * [autofix.ci] apply automated fixes * ✨ Enhance NodeDialog and Dropdown components with improved payload handling and type safety * Add DB creation functionality * First version of create * Update astradb.py * Fix ruff errors * Update Vector Store RAG.json * [autofix.ci] apply automated fixes * Update astradb.py * [autofix.ci] apply automated fixes * Update astradb.py * [autofix.ci] apply automated fixes * Update astradb.py * Update astradb.py * Update astradb.py * Update Vector Store RAG.json * [autofix.ci] apply automated fixes * Update astradb.py * [autofix.ci] apply automated fixes * feat: Enhance dropdown and node dialog with loading states and improved UX * refactor: Improve error handling in NodeDialog component * refactor: Update default excluded keys in dropdown metadata filter * [autofix.ci] apply automated fixes * refactor: Update Vector Store RAG starter project JSON with formatting and connection ID corrections * Hide fields that aren't relevant yet * [autofix.ci] apply automated fixes * Update Vector Store RAG.json * [autofix.ci] apply automated fixes * Update astradb.py * feat: Improve dropdown component with loading states and enhanced UX * Update astradb.py * [autofix.ci] apply automated fixes * Update astradb.py * Simon feedback * [autofix.ci] apply automated fixes * feat: Enhance dropdown and UI components with status indicators and loading states * refactor: Update dropdown metadata filtering to exclude 'icon' key * fix: Conditionally render dropdown icon when available * fix: Improve dropdown icon rendering with null checks * chore: Remove debug console log in dropdown component * Add support for icons in the dropdowns * Update astradb.py * Update Vector Store RAG.json * [autofix.ci] apply automated fixes * feat: Enhance dropdown status display and color handling * feat: Add auto-close functionality to node dialog and expand status color handling * feat: Add real-time template refresh for node dialog fields * refactor: Improve node dialog component state management and naming * Async for create collection * [autofix.ci] apply automated fixes * Dynamic provider list generation * Update astradb.py * [autofix.ci] apply automated fixes * [autofix.ci] apply automated fixes (attempt 2/3) * Update astradb.py * [autofix.ci] apply automated fixes --------- Co-authored-by: Eric Hare Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com> Co-authored-by: cristhianzl --- .../components/vectorstores/astradb.py | 460 +++++++---- .../starter_projects/Vector Store RAG.json | 739 +++++++++++++----- .../components/NodeDialogComponent/index.tsx | 205 +++-- .../common/fetchIconComponent/index.tsx | 21 + .../common/loadingTextComponent/index.tsx | 23 + .../core/dropdownComponent/index.tsx | 374 +++++---- .../core/parameterRenderComponent/types.ts | 1 + src/frontend/src/style/index.css | 2 + src/frontend/src/utils/stringManipulation.ts | 21 + src/frontend/src/utils/utils.ts | 2 +- src/frontend/tailwind.config.mjs | 4 + 11 files changed, 1314 insertions(+), 538 deletions(-) create mode 100644 src/frontend/src/components/common/fetchIconComponent/index.tsx create mode 100644 src/frontend/src/components/common/loadingTextComponent/index.tsx diff --git a/src/backend/base/langflow/components/vectorstores/astradb.py b/src/backend/base/langflow/components/vectorstores/astradb.py index 83a4d46f6..2f19e9461 100644 --- a/src/backend/base/langflow/components/vectorstores/astradb.py +++ b/src/backend/base/langflow/components/vectorstores/astradb.py @@ -1,8 +1,8 @@ -import os from collections import defaultdict -from dataclasses import dataclass, field +from dataclasses import asdict, dataclass, field from astrapy import AstraDBAdmin, DataAPIClient, Database +from astrapy.info import CollectionDescriptor from langchain_astradb import AstraDBVectorStore, CollectionVectorServiceOptions from langflow.base.vectorstores.model import LCVectorStoreComponent, check_cached_vector_store @@ -36,22 +36,24 @@ class AstraDBVectorStoreComponent(LCVectorStoreComponent): default_factory=lambda: { "data": { "node": { - "description": "Create a new database in Astra DB.", - "display_name": "Create New Database", + "name": "create_database", + "description": "", + "display_name": "Create new database", "field_order": ["new_database_name", "cloud_provider", "region"], "template": { "new_database_name": StrInput( name="new_database_name", - display_name="New Database Name", + display_name="Name", info="Name of the new database to create in Astra DB.", required=True, ), "cloud_provider": DropdownInput( name="cloud_provider", - display_name="Cloud Provider", + display_name="Cloud provider", info="Cloud provider for the new database.", options=["Amazon Web Services", "Google Cloud Platform", "Microsoft Azure"], required=True, + real_time_refresh=True, ), "region": DropdownInput( name="region", @@ -73,8 +75,9 @@ class AstraDBVectorStoreComponent(LCVectorStoreComponent): default_factory=lambda: { "data": { "node": { - "description": "Create a new collection in Astra DB.", - "display_name": "Create New Collection", + "name": "create_collection", + "description": "", + "display_name": "Create new collection", "field_order": [ "new_collection_name", "embedding_generation_provider", @@ -83,23 +86,31 @@ class AstraDBVectorStoreComponent(LCVectorStoreComponent): "template": { "new_collection_name": StrInput( name="new_collection_name", - display_name="New Collection Name", + display_name="Name", info="Name of the new collection to create in Astra DB.", required=True, ), "embedding_generation_provider": DropdownInput( name="embedding_generation_provider", - display_name="Embedding Generation Provider", + display_name="Embedding generation method", info="Provider to use for generating embeddings.", - options=[], + real_time_refresh=True, required=True, + options=["Bring your own", "Nvidia"], ), "embedding_generation_model": DropdownInput( name="embedding_generation_model", - display_name="Embedding Generation Model", + display_name="Embedding model", info="Model to use for generating embeddings.", - options=[], required=True, + options=[], + ), + "dimension": IntInput( + name="dimension", + display_name="Dimensions (Required only for `Bring your own`)", + info="Dimensions of the embeddings to generate.", + required=False, + value=1024, ), }, }, @@ -125,17 +136,18 @@ class AstraDBVectorStoreComponent(LCVectorStoreComponent): real_time_refresh=True, ), DropdownInput( - name="api_endpoint", + name="database_name", display_name="Database", - info="The Database / API Endpoint for the Astra DB instance.", + info="The Database name for the Astra DB instance.", required=True, refresh_button=True, real_time_refresh=True, + dialog_inputs=asdict(NewDatabaseInput()), combobox=True, ), StrInput( - name="d_api_endpoint", - display_name="Database API Endpoint", + name="api_endpoint", + display_name="Astra DB API Endpoint", info="The API Endpoint for the Astra DB instance. Supercedes database selection.", advanced=True, ), @@ -146,8 +158,9 @@ class AstraDBVectorStoreComponent(LCVectorStoreComponent): required=True, refresh_button=True, real_time_refresh=True, - # dialog_inputs=asdict(NewCollectionInput()), + dialog_inputs=asdict(NewCollectionInput()), combobox=True, + advanced=True, ), StrInput( name="keyspace", @@ -238,6 +251,7 @@ class AstraDBVectorStoreComponent(LCVectorStoreComponent): @classmethod def map_cloud_providers(cls): + # TODO: Programmatically fetch the regions for each cloud provider return { "Amazon Web Services": { "id": "aws", @@ -254,54 +268,87 @@ class AstraDBVectorStoreComponent(LCVectorStoreComponent): } @classmethod - def create_database_api( + def get_vectorize_providers(cls, token: str, environment: str | None = None, api_endpoint: str | None = None): + try: + # Get the admin object + admin = AstraDBAdmin(token=token, environment=environment) + db_admin = admin.get_database_admin(api_endpoint=api_endpoint) + + # Get the list of embedding providers + embedding_providers = db_admin.find_embedding_providers().as_dict() + + vectorize_providers_mapping = {} + # Map the provider display name to the provider key and models + for provider_key, provider_data in embedding_providers["embeddingProviders"].items(): + # Get the provider display name and models + display_name = provider_data["displayName"] + models = [model["name"] for model in provider_data["models"]] + + # Build our mapping + vectorize_providers_mapping[display_name] = [provider_key, models] + + # Sort the resulting dictionary + return defaultdict(list, dict(sorted(vectorize_providers_mapping.items()))) + except Exception as e: + msg = f"Error fetching vectorize providers: {e}" + raise ValueError(msg) from e + + @classmethod + async def create_database_api( cls, - token: str, new_database_name: str, cloud_provider: str, region: str, + token: str, + environment: str | None = None, + keyspace: str | None = None, ): - client = DataAPIClient(token=token) + client = DataAPIClient(token=token, environment=environment) # Get the admin object admin_client = client.get_admin(token=token) # Call the create database function - return admin_client.create_database( + return await admin_client.async_create_database( name=new_database_name, - cloud_provider=cloud_provider, + cloud_provider=cls.map_cloud_providers()[cloud_provider]["id"], region=region, + keyspace=keyspace, + wait_until_active=False, ) @classmethod - def create_collection_api( + async def create_collection_api( cls, - token: str, - database_name: str, new_collection_name: str, + token: str, + api_endpoint: str, + environment: str | None = None, + keyspace: str | None = None, dimension: int | None = None, embedding_generation_provider: str | None = None, embedding_generation_model: str | None = None, ): + # Create the data API client client = DataAPIClient(token=token) - api_endpoint = cls.get_api_endpoint_static(token=token, database_name=database_name) # Get the database object - database = client.get_database(api_endpoint=api_endpoint, token=token) + database = client.get_async_database(api_endpoint=api_endpoint, token=token) # Build vectorize options, if needed vectorize_options = None if not dimension: vectorize_options = CollectionVectorServiceOptions( - provider=embedding_generation_provider, + provider=cls.get_vectorize_providers( + token=token, environment=environment, api_endpoint=api_endpoint + ).get(embedding_generation_provider, [None, []])[0], model_name=embedding_generation_model, - authentication=None, - parameters=None, ) # Create the collection - return database.create_collection( + return await database.create_collection( name=new_collection_name, + keyspace=keyspace, dimension=dimension, service=vectorize_options, ) @@ -325,16 +372,28 @@ class AstraDBVectorStoreComponent(LCVectorStoreComponent): db_info_dict = {} for db in db_list: try: + # Get the API endpoint for the database api_endpoint = f"https://{db.info.id}-{db.info.region}.apps.astra{env_string}.datastax.com" - db_info_dict[db.info.name] = { - "api_endpoint": api_endpoint, - "collections": len( + + # Get the number of collections + try: + num_collections = len( list( client.get_database( api_endpoint=api_endpoint, token=token, keyspace=db.info.keyspace ).list_collection_names(keyspace=db.info.keyspace) ) - ), + ) + except Exception: # noqa: BLE001 + num_collections = 0 + if db.status != "PENDING": + continue + + # Add the database to the dictionary + db_info_dict[db.info.name] = { + "api_endpoint": api_endpoint, + "collections": num_collections, + "status": db.status if db.status != "ACTIVE" else None, } except Exception: # noqa: BLE001, S110 pass @@ -364,15 +423,20 @@ class AstraDBVectorStoreComponent(LCVectorStoreComponent): if not database_name: return None - # Otherwise, get the URL from the database list - return cls.get_database_list_static(token=token, environment=environment).get(database_name).get("api_endpoint") + # Grab the database object + db = cls.get_database_list_static(token=token, environment=environment).get(database_name) + if not db: + return None - def get_api_endpoint(self, *, api_endpoint: str | None = None): + # Otherwise, get the URL from the database list + return db.get("api_endpoint") + + def get_api_endpoint(self): return self.get_api_endpoint_static( token=self.token, environment=self.environment, - api_endpoint=api_endpoint or self.d_api_endpoint, - database_name=self.api_endpoint, + api_endpoint=self.api_endpoint, + database_name=self.database_name, ) def get_keyspace(self): @@ -388,7 +452,7 @@ class AstraDBVectorStoreComponent(LCVectorStoreComponent): client = DataAPIClient(token=self.token, environment=self.environment) return client.get_database( - api_endpoint=self.get_api_endpoint(api_endpoint=api_endpoint), + api_endpoint=api_endpoint or self.get_api_endpoint(), token=self.token, keyspace=self.get_keyspace(), ) @@ -415,40 +479,15 @@ class AstraDBVectorStoreComponent(LCVectorStoreComponent): return None - def get_vectorize_providers(self): - try: - self.log("Dynamically updating list of Vectorize providers.") - - # Get the admin object - admin = AstraDBAdmin(token=self.token) - db_admin = admin.get_database_admin(api_endpoint=self.get_api_endpoint()) - - # Get the list of embedding providers - embedding_providers = db_admin.find_embedding_providers().as_dict() - - vectorize_providers_mapping = {} - # Map the provider display name to the provider key and models - for provider_key, provider_data in embedding_providers["embeddingProviders"].items(): - display_name = provider_data["displayName"] - models = [model["name"] for model in provider_data["models"]] - - # TODO: https://astra.datastax.com/api/v2/graphql - vectorize_providers_mapping[display_name] = [provider_key, models] - - # Sort the resulting dictionary - return defaultdict(list, dict(sorted(vectorize_providers_mapping.items()))) - except Exception as e: # noqa: BLE001 - self.log(f"Error fetching Vectorize providers: {e}") - - return {} - def _initialize_database_options(self): try: return [ { "name": name, + "status": info["status"], "collections": info["collections"], "api_endpoint": info["api_endpoint"], + "icon": "data", } for name, info in self.get_database_list().items() ] @@ -456,7 +495,35 @@ class AstraDBVectorStoreComponent(LCVectorStoreComponent): msg = f"Error fetching database options: {e}" raise ValueError(msg) from e + @classmethod + def get_provider_icon(cls, collection: CollectionDescriptor | None = None, provider_name: str | None = None) -> str: + # Get the provider name from the collection + provider_name = provider_name or ( + collection.options.vector.service.provider + if collection and collection.options and collection.options.vector and collection.options.vector.service + else None + ) + + # If there is no provider, use the vector store icon + if not provider_name or provider_name == "bring your own": + return "vectorstores" + + # Special case for certain models + # TODO: Add more icons + if provider_name == "nvidia": + return "NVIDIA" + if provider_name == "openai": + return "OpenAI" + + # Title case on the provider for the icon if no special case + return provider_name.title() + def _initialize_collection_options(self, api_endpoint: str | None = None): + # Nothing to generate if we don't have an API endpoint yet + api_endpoint = api_endpoint or self.get_api_endpoint() + if not api_endpoint: + return [] + # Retrieve the database object database = self.get_database_object(api_endpoint=api_endpoint) @@ -471,7 +538,7 @@ class AstraDBVectorStoreComponent(LCVectorStoreComponent): "provider": ( col.options.vector.service.provider if col.options.vector and col.options.vector.service else None ), - "icon": "", + "icon": self.get_provider_icon(collection=col), "model": ( col.options.vector.service.model_name if col.options.vector and col.options.vector.service else None ), @@ -479,9 +546,53 @@ class AstraDBVectorStoreComponent(LCVectorStoreComponent): for col in collection_list ] + def reset_provider_options(self, build_config: dict): + # Get the list of vectorize providers + vectorize_providers = self.get_vectorize_providers( + token=self.token, + environment=self.environment, + api_endpoint=build_config["api_endpoint"]["value"], + ) + + # If the collection is set, allow user to see embedding options + build_config["collection_name"]["dialog_inputs"]["fields"]["data"]["node"]["template"][ + "embedding_generation_provider" + ]["options"] = ["Bring your own", "Nvidia", *[key for key in vectorize_providers if key != "Nvidia"]] + + # For all not Bring your own or Nvidia providers, add metadata saying configure in Astra DB Portal + provider_options = build_config["collection_name"]["dialog_inputs"]["fields"]["data"]["node"]["template"][ + "embedding_generation_provider" + ]["options"] + + # Go over each possible provider and add metadata to configure in Astra DB Portal + for provider in provider_options: + # Skip Bring your own and Nvidia, automatically configured + if provider in ["Bring your own", "Nvidia"]: + build_config["collection_name"]["dialog_inputs"]["fields"]["data"]["node"]["template"][ + "embedding_generation_provider" + ]["options_metadata"].append({"icon": self.get_provider_icon(provider_name=provider.lower())}) + continue + + # Add metadata to configure in Astra DB Portal + build_config["collection_name"]["dialog_inputs"]["fields"]["data"]["node"]["template"][ + "embedding_generation_provider" + ]["options_metadata"].append({" ": "Configure in Astra DB Portal"}) + + # And allow the user to see the models based on a selected provider + embedding_provider = build_config["collection_name"]["dialog_inputs"]["fields"]["data"]["node"]["template"][ + "embedding_generation_provider" + ]["value"] + + # Set the options for the embedding model based on the provider + build_config["collection_name"]["dialog_inputs"]["fields"]["data"]["node"]["template"][ + "embedding_generation_model" + ]["options"] = vectorize_providers.get(embedding_provider, [[], []])[1] + + return build_config + def reset_collection_list(self, build_config: dict): # Get the list of options we have based on the token provided - collection_options = self._initialize_collection_options() + collection_options = self._initialize_collection_options(api_endpoint=build_config["api_endpoint"]["value"]) # If we retrieved options based on the token, show the dropdown build_config["collection_name"]["options"] = [col["name"] for col in collection_options] @@ -490,7 +601,11 @@ class AstraDBVectorStoreComponent(LCVectorStoreComponent): ] # Reset the selected collection - build_config["collection_name"]["value"] = "" + if build_config["collection_name"]["value"] not in build_config["collection_name"]["options"]: + build_config["collection_name"]["value"] = "" + + # If we have a database, collection name should not be advanced + build_config["collection_name"]["advanced"] = not build_config["database_name"]["value"] return build_config @@ -499,84 +614,171 @@ class AstraDBVectorStoreComponent(LCVectorStoreComponent): database_options = self._initialize_database_options() # If we retrieved options based on the token, show the dropdown - build_config["api_endpoint"]["options"] = [db["name"] for db in database_options] - build_config["api_endpoint"]["options_metadata"] = [ + build_config["database_name"]["options"] = [db["name"] for db in database_options] + build_config["database_name"]["options_metadata"] = [ {k: v for k, v in db.items() if k not in ["name"]} for db in database_options ] # Reset the selected database - build_config["api_endpoint"]["value"] = "" + if build_config["database_name"]["value"] not in build_config["database_name"]["options"]: + build_config["database_name"]["value"] = "" + build_config["api_endpoint"]["value"] = "" + build_config["collection_name"]["advanced"] = True + + # If we have a token, database name should not be advanced + build_config["database_name"]["advanced"] = not build_config["token"]["value"] return build_config def reset_build_config(self, build_config: dict): # Reset the list of databases we have based on the token provided - build_config["api_endpoint"]["options"] = [] - build_config["api_endpoint"]["options_metadata"] = [] + build_config["database_name"]["options"] = [] + build_config["database_name"]["options_metadata"] = [] + build_config["database_name"]["value"] = "" + build_config["database_name"]["advanced"] = True build_config["api_endpoint"]["value"] = "" - build_config["api_endpoint"]["name"] = "Database" # Reset the list of collections and metadata associated build_config["collection_name"]["options"] = [] build_config["collection_name"]["options_metadata"] = [] build_config["collection_name"]["value"] = "" + build_config["collection_name"]["advanced"] = True return build_config - def update_build_config(self, build_config: dict, field_value: str, field_name: str | None = None): - # When the component first executes, this is the update refresh call - first_run = field_name == "collection_name" and not field_value and not build_config["api_endpoint"]["options"] + async def update_build_config(self, build_config: dict, field_value: str, field_name: str | None = None): + # Callback for database creation + if field_name == "database_name" and isinstance(field_value, dict) and "new_database_name" in field_value: + try: + await self.create_database_api( + new_database_name=field_value["new_database_name"], + token=self.token, + keyspace=self.get_keyspace(), + environment=self.environment, + cloud_provider=field_value["cloud_provider"], + region=field_value["region"], + ) + except Exception as e: + msg = f"Error creating database: {e}" + raise ValueError(msg) from e - # If the token has not been provided, simply return + # Add the new database to the list of options + build_config["database_name"]["options"] = build_config["database_name"]["options"] + [ + field_value["new_database_name"] + ] + build_config["database_name"]["options_metadata"] = build_config["database_name"]["options_metadata"] + [ + {"status": "PENDING"} + ] + + return self.reset_collection_list(build_config) + + # This is the callback required to update the list of regions for a cloud provider + if field_name == "database_name" and isinstance(field_value, dict) and "new_database_name" not in field_value: + cloud_provider = field_value["cloud_provider"] + build_config["database_name"]["dialog_inputs"]["fields"]["data"]["node"]["template"]["region"][ + "options" + ] = self.map_cloud_providers()[cloud_provider]["regions"] + + return build_config + + # Callback for the creation of collections + if field_name == "collection_name" and isinstance(field_value, dict) and "new_collection_name" in field_value: + try: + # Get the dimension if its a BYO provider + dimension = ( + field_value["dimension"] + if field_value["embedding_generation_provider"] == "Bring your own" + else None + ) + + # Create the collection + await self.create_collection_api( + new_collection_name=field_value["new_collection_name"], + token=self.token, + api_endpoint=build_config["api_endpoint"]["value"], + environment=self.environment, + keyspace=self.get_keyspace(), + dimension=dimension, + embedding_generation_provider=field_value["embedding_generation_provider"], + embedding_generation_model=field_value["embedding_generation_model"], + ) + except Exception as e: + msg = f"Error creating collection: {e}" + raise ValueError(msg) from e + + # Add the new collection to the list of options + build_config["collection_name"]["value"] = field_value["new_collection_name"] + build_config["collection_name"]["options"].append(field_value["new_collection_name"]) + + # Get the provider and model for the new collection + generation_provider = field_value["embedding_generation_provider"] + provider = generation_provider if generation_provider != "Bring your own" else None + generation_model = field_value["embedding_generation_model"] + model = generation_model if generation_model else None + + # Add the new collection to the list of options + icon = "NVIDIA" if provider == "Nvidia" else "vectorstores" + build_config["collection_name"]["options_metadata"] = build_config["collection_name"][ + "options_metadata" + ] + [{"records": 0, "provider": provider, "icon": icon, "model": model}] + + return build_config + + # Callback to update the model list based on the embedding provider + if ( + field_name == "collection_name" + and isinstance(field_value, dict) + and "new_collection_name" not in field_value + ): + return self.reset_provider_options(build_config) + + # When the component first executes, this is the update refresh call + first_run = field_name == "collection_name" and not field_value and not build_config["database_name"]["options"] + + # If the token has not been provided, simply return the empty build config if not self.token: return self.reset_build_config(build_config) # If this is the first execution of the component, reset and build database list if first_run or field_name in ["token", "environment"]: - # Reset the build config to ensure we are starting fresh - build_config = self.reset_build_config(build_config) - build_config = self.reset_database_list(build_config) - - # Get list of regions for a given cloud provider - """ - cloud_provider = ( - build_config["api_endpoint"]["dialog_inputs"]["fields"]["data"]["node"]["template"]["cloud_provider"][ - "value" - ] - or "Amazon Web Services" - ) - build_config["api_endpoint"]["dialog_inputs"]["fields"]["data"]["node"]["template"]["region"][ - "options" - ] = self.map_cloud_providers()[cloud_provider]["regions"] - """ - - return build_config + return self.reset_database_list(build_config) # Refresh the collection name options - if field_name == "api_endpoint": + if field_name == "database_name" and not isinstance(field_value, dict): # If missing, refresh the database options - if not build_config["api_endpoint"]["options"] or not field_value: - return self.update_build_config(build_config, field_value=self.token, field_name="token") + if field_value not in build_config["database_name"]["options"]: + build_config = await self.update_build_config(build_config, field_value=self.token, field_name="token") + build_config["database_name"]["value"] = "" + else: + # Find the position of the selected database to align with metadata + index_of_name = build_config["database_name"]["options"].index(field_value) - # Set the underlying api endpoint value of the database - if field_value in build_config["api_endpoint"]["options"]: - index_of_name = build_config["api_endpoint"]["options"].index(field_value) - build_config["d_api_endpoint"]["value"] = build_config["api_endpoint"]["options_metadata"][ + # Initializing database condition + pending = build_config["database_name"]["options_metadata"][index_of_name]["status"] == "PENDING" + if pending: + return self.update_build_config(build_config, field_value=self.token, field_name="token") + + # Set the API endpoint based on the selected database + build_config["api_endpoint"]["value"] = build_config["database_name"]["options_metadata"][ index_of_name ]["api_endpoint"] - else: - build_config["d_api_endpoint"]["value"] = "" + + # Reset the provider options + build_config = self.reset_provider_options(build_config) # Reset the list of collections we have based on the token provided return self.reset_collection_list(build_config) # Hide embedding model option if opriona_metadata provider is not null - if field_name == "collection_name" and field_value: + if field_name == "collection_name" and not isinstance(field_value, dict): # Assume we will be autodetecting the collection: build_config["autodetect_collection"]["value"] = True + # Reload the collection list + build_config = self.reset_collection_list(build_config) + # Set the options for collection name to be the field value if its a new collection - if field_value not in build_config["collection_name"]["options"]: + if field_value and field_value not in build_config["collection_name"]["options"]: # Add the new collection to the list of options build_config["collection_name"]["options"].append(field_value) build_config["collection_name"]["options_metadata"].append( @@ -598,36 +800,8 @@ class AstraDBVectorStoreComponent(LCVectorStoreComponent): build_config["embedding_model"]["advanced"] = False build_config["embedding_choice"]["value"] = "Embedding Model" - # For the final step, get the list of vectorize providers - """ - vectorize_providers = self.get_vectorize_providers() - if not vectorize_providers: return build_config - # Allow the user to see the embedding provider options - provider_options = build_config["collection_name"]["dialog_inputs"]["fields"]["data"]["node"]["template"][ - "embedding_generation_provider" - ]["options"] - if not provider_options: - # If the collection is set, allow user to see embedding options - build_config["collection_name"]["dialog_inputs"]["fields"]["data"]["node"]["template"][ - "embedding_generation_provider" - ]["options"] = ["Bring your own", "Nvidia", *[key for key in vectorize_providers if key != "Nvidia"]] - - # And allow the user to see the models based on a selected provider - model_options = build_config["collection_name"]["dialog_inputs"]["fields"]["data"]["node"]["template"][ - "embedding_generation_model" - ]["options"] - if not model_options: - embedding_provider = build_config["collection_name"]["dialog_inputs"]["fields"]["data"]["node"]["template"][ - "embedding_generation_provider" - ]["value"] - - build_config["collection_name"]["dialog_inputs"]["fields"]["data"]["node"]["template"][ - "embedding_generation_model" - ]["options"] = vectorize_providers.get(embedding_provider, [[], []])[1] - """ - return build_config @check_cached_vector_store @@ -654,11 +828,11 @@ class AstraDBVectorStoreComponent(LCVectorStoreComponent): # Get Langflow version and platform information __version__ = get_version_info()["version"] langflow_prefix = "" - if os.getenv("AWS_EXECUTION_ENV") == "AWS_ECS_FARGATE": # TODO: More precise way of detecting - langflow_prefix = "ds-" + # if os.getenv("AWS_EXECUTION_ENV") == "AWS_ECS_FARGATE": # TODO: More precise way of detecting + # langflow_prefix = "ds-" # Get the database object - database = self.get_database_object(api_endpoint=self.d_api_endpoint) + database = self.get_database_object() autodetect = self.collection_name in database.list_collection_names() and self.autodetect_collection # Bundle up the auto-detect parameters @@ -714,7 +888,7 @@ class AstraDBVectorStoreComponent(LCVectorStoreComponent): if documents and self.deletion_field: self.log(f"Deleting documents where {self.deletion_field}") try: - database = self.get_database_object(api_endpoint=self.d_api_endpoint) + database = self.get_database_object() collection = database.get_collection(self.collection_name, keyspace=database.keyspace) delete_values = list({doc.metadata[self.deletion_field] for doc in documents}) self.log(f"Deleting documents where {self.deletion_field} matches {delete_values}.") diff --git a/src/backend/base/langflow/initial_setup/starter_projects/Vector Store RAG.json b/src/backend/base/langflow/initial_setup/starter_projects/Vector Store RAG.json index 2e999c096..7228b82a6 100644 --- a/src/backend/base/langflow/initial_setup/starter_projects/Vector Store RAG.json +++ b/src/backend/base/langflow/initial_setup/starter_projects/Vector Store RAG.json @@ -7,7 +7,7 @@ "data": { "sourceHandle": { "dataType": "ParseData", - "id": "ParseData-9zsFp", + "id": "ParseData-cwmU0", "name": "text", "output_types": [ "Message" @@ -15,7 +15,7 @@ }, "targetHandle": { "fieldName": "context", - "id": "Prompt-mqa6n", + "id": "Prompt-wBjYe", "inputTypes": [ "Message", "Text" @@ -23,11 +23,11 @@ "type": "str" } }, - "id": "reactflow__edge-ParseData-9zsFp{œdataTypeœ:œParseDataœ,œidœ:œParseData-9zsFpœ,œnameœ:œtextœ,œoutput_typesœ:[œMessageœ]}-Prompt-mqa6n{œfieldNameœ:œcontextœ,œidœ:œPrompt-mqa6nœ,œinputTypesœ:[œMessageœ,œTextœ],œtypeœ:œstrœ}", - "source": "ParseData-9zsFp", - "sourceHandle": "{œdataTypeœ: œParseDataœ, œidœ: œParseData-9zsFpœ, œnameœ: œtextœ, œoutput_typesœ: [œMessageœ]}", - "target": "Prompt-mqa6n", - "targetHandle": "{œfieldNameœ: œcontextœ, œidœ: œPrompt-mqa6nœ, œinputTypesœ: [œMessageœ, œTextœ], œtypeœ: œstrœ}" + "id": "reactflow__edge-ParseData-cwmU0{œdataTypeœ:œParseDataœ,œidœ:œParseData-cwmU0œ,œnameœ:œtextœ,œoutput_typesœ:[œMessageœ]}-Prompt-wBjYe{œfieldNameœ:œcontextœ,œidœ:œPrompt-wBjYeœ,œinputTypesœ:[œMessageœ,œTextœ],œtypeœ:œstrœ}", + "source": "ParseData-cwmU0", + "sourceHandle": "{œdataTypeœ: œParseDataœ, œidœ: œParseData-cwmU0œ, œnameœ: œtextœ, œoutput_typesœ: [œMessageœ]}", + "target": "Prompt-wBjYe", + "targetHandle": "{œfieldNameœ: œcontextœ, œidœ: œPrompt-wBjYeœ, œinputTypesœ: [œMessageœ, œTextœ], œtypeœ: œstrœ}" }, { "animated": false, @@ -35,7 +35,7 @@ "data": { "sourceHandle": { "dataType": "ChatInput", - "id": "ChatInput-Jy5aI", + "id": "ChatInput-IRziS", "name": "message", "output_types": [ "Message" @@ -43,7 +43,7 @@ }, "targetHandle": { "fieldName": "question", - "id": "Prompt-mqa6n", + "id": "Prompt-wBjYe", "inputTypes": [ "Message", "Text" @@ -51,11 +51,11 @@ "type": "str" } }, - "id": "reactflow__edge-ChatInput-Jy5aI{œdataTypeœ:œChatInputœ,œidœ:œChatInput-Jy5aIœ,œnameœ:œmessageœ,œoutput_typesœ:[œMessageœ]}-Prompt-mqa6n{œfieldNameœ:œquestionœ,œidœ:œPrompt-mqa6nœ,œinputTypesœ:[œMessageœ,œTextœ],œtypeœ:œstrœ}", - "source": "ChatInput-Jy5aI", - "sourceHandle": "{œdataTypeœ: œChatInputœ, œidœ: œChatInput-Jy5aIœ, œnameœ: œmessageœ, œoutput_typesœ: [œMessageœ]}", - "target": "Prompt-mqa6n", - "targetHandle": "{œfieldNameœ: œquestionœ, œidœ: œPrompt-mqa6nœ, œinputTypesœ: [œMessageœ, œTextœ], œtypeœ: œstrœ}" + "id": "reactflow__edge-ChatInput-IRziS{œdataTypeœ:œChatInputœ,œidœ:œChatInput-IRziSœ,œnameœ:œmessageœ,œoutput_typesœ:[œMessageœ]}-Prompt-wBjYe{œfieldNameœ:œquestionœ,œidœ:œPrompt-wBjYeœ,œinputTypesœ:[œMessageœ,œTextœ],œtypeœ:œstrœ}", + "source": "ChatInput-IRziS", + "sourceHandle": "{œdataTypeœ: œChatInputœ, œidœ: œChatInput-IRziSœ, œnameœ: œmessageœ, œoutput_typesœ: [œMessageœ]}", + "target": "Prompt-wBjYe", + "targetHandle": "{œfieldNameœ: œquestionœ, œidœ: œPrompt-wBjYeœ, œinputTypesœ: [œMessageœ, œTextœ], œtypeœ: œstrœ}" }, { "animated": false, @@ -63,7 +63,7 @@ "data": { "sourceHandle": { "dataType": "File", - "id": "File-i8StI", + "id": "File-4yyks", "name": "data", "output_types": [ "Data" @@ -71,25 +71,25 @@ }, "targetHandle": { "fieldName": "data_inputs", - "id": "SplitText-DakpR", + "id": "SplitText-HWKil", "inputTypes": [ "Data" ], "type": "other" } }, - "id": "reactflow__edge-File-i8StI{œdataTypeœ:œFileœ,œidœ:œFile-i8StIœ,œnameœ:œdataœ,œoutput_typesœ:[œDataœ]}-SplitText-DakpR{œfieldNameœ:œdata_inputsœ,œidœ:œSplitText-DakpRœ,œinputTypesœ:[œDataœ],œtypeœ:œotherœ}", - "source": "File-i8StI", - "sourceHandle": "{œdataTypeœ: œFileœ, œidœ: œFile-i8StIœ, œnameœ: œdataœ, œoutput_typesœ: [œDataœ]}", - "target": "SplitText-DakpR", - "targetHandle": "{œfieldNameœ: œdata_inputsœ, œidœ: œSplitText-DakpRœ, œinputTypesœ: [œDataœ], œtypeœ: œotherœ}" + "id": "reactflow__edge-File-4yyks{œdataTypeœ:œFileœ,œidœ:œFile-4yyksœ,œnameœ:œdataœ,œoutput_typesœ:[œDataœ]}-SplitText-HWKil{œfieldNameœ:œdata_inputsœ,œidœ:œSplitText-HWKilœ,œinputTypesœ:[œDataœ],œtypeœ:œotherœ}", + "source": "File-4yyks", + "sourceHandle": "{œdataTypeœ: œFileœ, œidœ: œFile-4yyksœ, œnameœ: œdataœ, œoutput_typesœ: [œDataœ]}", + "target": "SplitText-HWKil", + "targetHandle": "{œfieldNameœ: œdata_inputsœ, œidœ: œSplitText-HWKilœ, œinputTypesœ: [œDataœ], œtypeœ: œotherœ}" }, { "className": "", "data": { "sourceHandle": { "dataType": "Prompt", - "id": "Prompt-mqa6n", + "id": "Prompt-wBjYe", "name": "prompt", "output_types": [ "Message" @@ -97,25 +97,25 @@ }, "targetHandle": { "fieldName": "input_value", - "id": "OpenAIModel-VVLPR", + "id": "OpenAIModel-XJ1BC", "inputTypes": [ "Message" ], "type": "str" } }, - "id": "reactflow__edge-Prompt-mqa6n{œdataTypeœ:œPromptœ,œidœ:œPrompt-mqa6nœ,œnameœ:œpromptœ,œoutput_typesœ:[œMessageœ]}-OpenAIModel-VVLPR{œfieldNameœ:œinput_valueœ,œidœ:œOpenAIModel-VVLPRœ,œinputTypesœ:[œMessageœ],œtypeœ:œstrœ}", - "source": "Prompt-mqa6n", - "sourceHandle": "{œdataTypeœ: œPromptœ, œidœ: œPrompt-mqa6nœ, œnameœ: œpromptœ, œoutput_typesœ: [œMessageœ]}", - "target": "OpenAIModel-VVLPR", - "targetHandle": "{œfieldNameœ: œinput_valueœ, œidœ: œOpenAIModel-VVLPRœ, œinputTypesœ: [œMessageœ], œtypeœ: œstrœ}" + "id": "reactflow__edge-Prompt-wBjYe{œdataTypeœ:œPromptœ,œidœ:œPrompt-wBjYeœ,œnameœ:œpromptœ,œoutput_typesœ:[œMessageœ]}-OpenAIModel-XJ1BC{œfieldNameœ:œinput_valueœ,œidœ:œOpenAIModel-XJ1BCœ,œinputTypesœ:[œMessageœ],œtypeœ:œstrœ}", + "source": "Prompt-wBjYe", + "sourceHandle": "{œdataTypeœ: œPromptœ, œidœ: œPrompt-wBjYeœ, œnameœ: œpromptœ, œoutput_typesœ: [œMessageœ]}", + "target": "OpenAIModel-XJ1BC", + "targetHandle": "{œfieldNameœ: œinput_valueœ, œidœ: œOpenAIModel-XJ1BCœ, œinputTypesœ: [œMessageœ], œtypeœ: œstrœ}" }, { "className": "", "data": { "sourceHandle": { "dataType": "OpenAIModel", - "id": "OpenAIModel-VVLPR", + "id": "OpenAIModel-XJ1BC", "name": "text_output", "output_types": [ "Message" @@ -123,25 +123,24 @@ }, "targetHandle": { "fieldName": "input_value", - "id": "ChatOutput-EujCa", + "id": "ChatOutput-D2eyW", "inputTypes": [ "Message" ], "type": "str" } }, - "id": "reactflow__edge-OpenAIModel-VVLPR{œdataTypeœ:œOpenAIModelœ,œidœ:œOpenAIModel-VVLPRœ,œnameœ:œtext_outputœ,œoutput_typesœ:[œMessageœ]}-ChatOutput-EujCa{œfieldNameœ:œinput_valueœ,œidœ:œChatOutput-EujCaœ,œinputTypesœ:[œMessageœ],œtypeœ:œstrœ}", - "source": "OpenAIModel-VVLPR", - "sourceHandle": "{œdataTypeœ: œOpenAIModelœ, œidœ: œOpenAIModel-VVLPRœ, œnameœ: œtext_outputœ, œoutput_typesœ: [œMessageœ]}", - "target": "ChatOutput-EujCa", - "targetHandle": "{œfieldNameœ: œinput_valueœ, œidœ: œChatOutput-EujCaœ, œinputTypesœ: [œMessageœ], œtypeœ: œstrœ}" + "id": "reactflow__edge-OpenAIModel-XJ1BC{œdataTypeœ:œOpenAIModelœ,œidœ:œOpenAIModel-XJ1BCœ,œnameœ:œtext_outputœ,œoutput_typesœ:[œMessageœ]}-ChatOutput-D2eyW{œfieldNameœ:œinput_valueœ,œidœ:œChatOutput-D2eyWœ,œinputTypesœ:[œMessageœ],œtypeœ:œstrœ}", + "source": "OpenAIModel-XJ1BC", + "sourceHandle": "{œdataTypeœ: œOpenAIModelœ, œidœ: œOpenAIModel-XJ1BCœ, œnameœ: œtext_outputœ, œoutput_typesœ: [œMessageœ]}", + "target": "ChatOutput-D2eyW", + "targetHandle": "{œfieldNameœ: œinput_valueœ, œidœ: œChatOutput-D2eyWœ, œinputTypesœ: [œMessageœ], œtypeœ: œstrœ}" }, { - "className": "", "data": { "sourceHandle": { "dataType": "OpenAIEmbeddings", - "id": "OpenAIEmbeddings-BF7iH", + "id": "OpenAIEmbeddings-xoSJQ", "name": "embeddings", "output_types": [ "Embeddings" @@ -149,25 +148,24 @@ }, "targetHandle": { "fieldName": "embedding_model", - "id": "AstraDB-Qdaes", + "id": "AstraDB-HXAXh", "inputTypes": [ "Embeddings" ], "type": "other" } }, - "id": "reactflow__edge-OpenAIEmbeddings-BF7iH{œdataTypeœ:œOpenAIEmbeddingsœ,œidœ:œOpenAIEmbeddings-BF7iHœ,œnameœ:œembeddingsœ,œoutput_typesœ:[œEmbeddingsœ]}-AstraDB-Qdaes{œfieldNameœ:œembedding_modelœ,œidœ:œAstraDB-Qdaesœ,œinputTypesœ:[œEmbeddingsœ],œtypeœ:œotherœ}", - "source": "OpenAIEmbeddings-BF7iH", - "sourceHandle": "{œdataTypeœ: œOpenAIEmbeddingsœ, œidœ: œOpenAIEmbeddings-BF7iHœ, œnameœ: œembeddingsœ, œoutput_typesœ: [œEmbeddingsœ]}", - "target": "AstraDB-Qdaes", - "targetHandle": "{œfieldNameœ: œembedding_modelœ, œidœ: œAstraDB-Qdaesœ, œinputTypesœ: [œEmbeddingsœ], œtypeœ: œotherœ}" + "id": "xy-edge__OpenAIEmbeddings-xoSJQ{œdataTypeœ:œOpenAIEmbeddingsœ,œidœ:œOpenAIEmbeddings-xoSJQœ,œnameœ:œembeddingsœ,œoutput_typesœ:[œEmbeddingsœ]}-AstraDB-HXAXh{œfieldNameœ:œembedding_modelœ,œidœ:œAstraDB-HXAXhœ,œinputTypesœ:[œEmbeddingsœ],œtypeœ:œotherœ}", + "source": "OpenAIEmbeddings-xoSJQ", + "sourceHandle": "{œdataTypeœ: œOpenAIEmbeddingsœ, œidœ: œOpenAIEmbeddings-xoSJQœ, œnameœ: œembeddingsœ, œoutput_typesœ: [œEmbeddingsœ]}", + "target": "AstraDB-HXAXh", + "targetHandle": "{œfieldNameœ: œembedding_modelœ, œidœ: œAstraDB-HXAXhœ, œinputTypesœ: [œEmbeddingsœ], œtypeœ: œotherœ}" }, { - "className": "", "data": { "sourceHandle": { "dataType": "ChatInput", - "id": "ChatInput-Jy5aI", + "id": "ChatInput-IRziS", "name": "message", "output_types": [ "Message" @@ -175,25 +173,24 @@ }, "targetHandle": { "fieldName": "search_query", - "id": "AstraDB-Qdaes", + "id": "AstraDB-HXAXh", "inputTypes": [ "Message" ], "type": "str" } }, - "id": "reactflow__edge-ChatInput-Jy5aI{œdataTypeœ:œChatInputœ,œidœ:œChatInput-Jy5aIœ,œnameœ:œmessageœ,œoutput_typesœ:[œMessageœ]}-AstraDB-Qdaes{œfieldNameœ:œsearch_queryœ,œidœ:œAstraDB-Qdaesœ,œinputTypesœ:[œMessageœ],œtypeœ:œstrœ}", - "source": "ChatInput-Jy5aI", - "sourceHandle": "{œdataTypeœ: œChatInputœ, œidœ: œChatInput-Jy5aIœ, œnameœ: œmessageœ, œoutput_typesœ: [œMessageœ]}", - "target": "AstraDB-Qdaes", - "targetHandle": "{œfieldNameœ: œsearch_queryœ, œidœ: œAstraDB-Qdaesœ, œinputTypesœ: [œMessageœ], œtypeœ: œstrœ}" + "id": "xy-edge__ChatInput-IRziS{œdataTypeœ:œChatInputœ,œidœ:œChatInput-IRziSœ,œnameœ:œmessageœ,œoutput_typesœ:[œMessageœ]}-AstraDB-HXAXh{œfieldNameœ:œsearch_queryœ,œidœ:œAstraDB-HXAXhœ,œinputTypesœ:[œMessageœ],œtypeœ:œstrœ}", + "source": "ChatInput-IRziS", + "sourceHandle": "{œdataTypeœ: œChatInputœ, œidœ: œChatInput-IRziSœ, œnameœ: œmessageœ, œoutput_typesœ: [œMessageœ]}", + "target": "AstraDB-HXAXh", + "targetHandle": "{œfieldNameœ: œsearch_queryœ, œidœ: œAstraDB-HXAXhœ, œinputTypesœ: [œMessageœ], œtypeœ: œstrœ}" }, { - "className": "", "data": { "sourceHandle": { "dataType": "AstraDB", - "id": "AstraDB-Qdaes", + "id": "AstraDB-HXAXh", "name": "search_results", "output_types": [ "Data" @@ -201,25 +198,24 @@ }, "targetHandle": { "fieldName": "data", - "id": "ParseData-9zsFp", + "id": "ParseData-cwmU0", "inputTypes": [ "Data" ], "type": "other" } }, - "id": "reactflow__edge-AstraDB-Qdaes{œdataTypeœ:œAstraDBœ,œidœ:œAstraDB-Qdaesœ,œnameœ:œsearch_resultsœ,œoutput_typesœ:[œDataœ]}-ParseData-9zsFp{œfieldNameœ:œdataœ,œidœ:œParseData-9zsFpœ,œinputTypesœ:[œDataœ],œtypeœ:œotherœ}", - "source": "AstraDB-Qdaes", - "sourceHandle": "{œdataTypeœ: œAstraDBœ, œidœ: œAstraDB-Qdaesœ, œnameœ: œsearch_resultsœ, œoutput_typesœ: [œDataœ]}", - "target": "ParseData-9zsFp", - "targetHandle": "{œfieldNameœ: œdataœ, œidœ: œParseData-9zsFpœ, œinputTypesœ: [œDataœ], œtypeœ: œotherœ}" + "id": "xy-edge__AstraDB-HXAXh{œdataTypeœ:œAstraDBœ,œidœ:œAstraDB-HXAXhœ,œnameœ:œsearch_resultsœ,œoutput_typesœ:[œDataœ]}-ParseData-cwmU0{œfieldNameœ:œdataœ,œidœ:œParseData-cwmU0œ,œinputTypesœ:[œDataœ],œtypeœ:œotherœ}", + "source": "AstraDB-HXAXh", + "sourceHandle": "{œdataTypeœ: œAstraDBœ, œidœ: œAstraDB-HXAXhœ, œnameœ: œsearch_resultsœ, œoutput_typesœ: [œDataœ]}", + "target": "ParseData-cwmU0", + "targetHandle": "{œfieldNameœ: œdataœ, œidœ: œParseData-cwmU0œ, œinputTypesœ: [œDataœ], œtypeœ: œotherœ}" }, { - "className": "", "data": { "sourceHandle": { "dataType": "OpenAIEmbeddings", - "id": "OpenAIEmbeddings-KNVHv", + "id": "OpenAIEmbeddings-d7EtR", "name": "embeddings", "output_types": [ "Embeddings" @@ -227,25 +223,24 @@ }, "targetHandle": { "fieldName": "embedding_model", - "id": "AstraDB-sPWXd", + "id": "AstraDB-nMlxo", "inputTypes": [ "Embeddings" ], "type": "other" } }, - "id": "reactflow__edge-OpenAIEmbeddings-KNVHv{œdataTypeœ:œOpenAIEmbeddingsœ,œidœ:œOpenAIEmbeddings-KNVHvœ,œnameœ:œembeddingsœ,œoutput_typesœ:[œEmbeddingsœ]}-AstraDB-sPWXd{œfieldNameœ:œembedding_modelœ,œidœ:œAstraDB-sPWXdœ,œinputTypesœ:[œEmbeddingsœ],œtypeœ:œotherœ}", - "source": "OpenAIEmbeddings-KNVHv", - "sourceHandle": "{œdataTypeœ: œOpenAIEmbeddingsœ, œidœ: œOpenAIEmbeddings-KNVHvœ, œnameœ: œembeddingsœ, œoutput_typesœ: [œEmbeddingsœ]}", - "target": "AstraDB-sPWXd", - "targetHandle": "{œfieldNameœ: œembedding_modelœ, œidœ: œAstraDB-sPWXdœ, œinputTypesœ: [œEmbeddingsœ], œtypeœ: œotherœ}" + "id": "xy-edge__OpenAIEmbeddings-d7EtR{œdataTypeœ:œOpenAIEmbeddingsœ,œidœ:œOpenAIEmbeddings-d7EtRœ,œnameœ:œembeddingsœ,œoutput_typesœ:[œEmbeddingsœ]}-AstraDB-nMlxo{œfieldNameœ:œembedding_modelœ,œidœ:œAstraDB-nMlxoœ,œinputTypesœ:[œEmbeddingsœ],œtypeœ:œotherœ}", + "source": "OpenAIEmbeddings-d7EtR", + "sourceHandle": "{œdataTypeœ: œOpenAIEmbeddingsœ, œidœ: œOpenAIEmbeddings-d7EtRœ, œnameœ: œembeddingsœ, œoutput_typesœ: [œEmbeddingsœ]}", + "target": "AstraDB-nMlxo", + "targetHandle": "{œfieldNameœ: œembedding_modelœ, œidœ: œAstraDB-nMlxoœ, œinputTypesœ: [œEmbeddingsœ], œtypeœ: œotherœ}" }, { - "className": "", "data": { "sourceHandle": { "dataType": "SplitText", - "id": "SplitText-DakpR", + "id": "SplitText-HWKil", "name": "chunks", "output_types": [ "Data" @@ -253,18 +248,18 @@ }, "targetHandle": { "fieldName": "ingest_data", - "id": "AstraDB-sPWXd", + "id": "AstraDB-nMlxo", "inputTypes": [ "Data" ], "type": "other" } }, - "id": "reactflow__edge-SplitText-DakpR{œdataTypeœ:œSplitTextœ,œidœ:œSplitText-DakpRœ,œnameœ:œchunksœ,œoutput_typesœ:[œDataœ]}-AstraDB-sPWXd{œfieldNameœ:œingest_dataœ,œidœ:œAstraDB-sPWXdœ,œinputTypesœ:[œDataœ],œtypeœ:œotherœ}", - "source": "SplitText-DakpR", - "sourceHandle": "{œdataTypeœ: œSplitTextœ, œidœ: œSplitText-DakpRœ, œnameœ: œchunksœ, œoutput_typesœ: [œDataœ]}", - "target": "AstraDB-sPWXd", - "targetHandle": "{œfieldNameœ: œingest_dataœ, œidœ: œAstraDB-sPWXdœ, œinputTypesœ: [œDataœ], œtypeœ: œotherœ}" + "id": "xy-edge__SplitText-HWKil{œdataTypeœ:œSplitTextœ,œidœ:œSplitText-HWKilœ,œnameœ:œchunksœ,œoutput_typesœ:[œDataœ]}-AstraDB-nMlxo{œfieldNameœ:œingest_dataœ,œidœ:œAstraDB-nMlxoœ,œinputTypesœ:[œDataœ],œtypeœ:œotherœ}", + "source": "SplitText-HWKil", + "sourceHandle": "{œdataTypeœ: œSplitTextœ, œidœ: œSplitText-HWKilœ, œnameœ: œchunksœ, œoutput_typesœ: [œDataœ]}", + "target": "AstraDB-nMlxo", + "targetHandle": "{œfieldNameœ: œingest_dataœ, œidœ: œAstraDB-nMlxoœ, œinputTypesœ: [œDataœ], œtypeœ: œotherœ}" } ], "nodes": [ @@ -272,7 +267,7 @@ "data": { "description": "Get chat inputs from the Playground.", "display_name": "Chat Input", - "id": "ChatInput-Jy5aI", + "id": "ChatInput-IRziS", "node": { "base_classes": [ "Message" @@ -536,7 +531,7 @@ }, "dragging": false, "height": 234, - "id": "ChatInput-Jy5aI", + "id": "ChatInput-IRziS", "measured": { "height": 234, "width": 320 @@ -557,7 +552,7 @@ "data": { "description": "Convert Data into plain text following a specified template.", "display_name": "Parse Data", - "id": "ParseData-9zsFp", + "id": "ParseData-cwmU0", "node": { "base_classes": [ "Message" @@ -691,7 +686,7 @@ }, "dragging": false, "height": 350, - "id": "ParseData-9zsFp", + "id": "ParseData-cwmU0", "measured": { "height": 350, "width": 320 @@ -712,7 +707,7 @@ "data": { "description": "Create a prompt template with dynamic variables.", "display_name": "Prompt", - "id": "Prompt-mqa6n", + "id": "Prompt-wBjYe", "node": { "base_classes": [ "Message" @@ -871,7 +866,7 @@ }, "dragging": false, "height": 433, - "id": "Prompt-mqa6n", + "id": "Prompt-wBjYe", "measured": { "height": 433, "width": 320 @@ -892,7 +887,7 @@ "data": { "description": "Split text into chunks based on specified criteria.", "display_name": "Split Text", - "id": "SplitText-DakpR", + "id": "SplitText-HWKil", "node": { "base_classes": [ "Data" @@ -1039,7 +1034,7 @@ }, "dragging": false, "height": 475, - "id": "SplitText-DakpR", + "id": "SplitText-HWKil", "measured": { "height": 475, "width": 320 @@ -1058,7 +1053,7 @@ }, { "data": { - "id": "note-Z3QTX", + "id": "note-fi4dw", "node": { "description": "## 🐕 2. Retriever Flow\n\nThis flow answers your questions with contextual data retrieved from your vector database.\n\nOpen the **Playground** and ask, \n\n```\nWhat is this document about?\n```\n", "display_name": "", @@ -1071,7 +1066,7 @@ }, "dragging": false, "height": 324, - "id": "note-Z3QTX", + "id": "note-fi4dw", "measured": { "height": 324, "width": 325 @@ -1095,7 +1090,7 @@ }, { "data": { - "id": "note-o6eiV", + "id": "note-KK8E2", "node": { "description": "## 📖 README\n\nLoad your data into a vector database with the 📚 **Load Data** flow, and then use your data as chat context with the 🐕 **Retriever** flow.\n\n**🚨 Add your OpenAI API key as a global variable to easily add it to all of the OpenAI components in this flow.** \n\n**Quick start**\n1. Run the 📚 **Load Data** flow.\n2. Run the 🐕 **Retriever** flow.\n\n**Next steps** \n\n- Experiment by changing the prompt and the loaded data to see how the bot's responses change. \n\nFor more info, see the [Langflow docs](https://docs.langflow.org/starter-projects-vector-store-rag).", "display_name": "Read Me", @@ -1108,10 +1103,10 @@ }, "dragging": false, "height": 324, - "id": "note-o6eiV", + "id": "note-KK8E2", "measured": { "height": 324, - "width": 324 + "width": 325 }, "position": { "x": 94.28986613312418, @@ -1134,7 +1129,7 @@ "data": { "description": "Display a chat message in the Playground.", "display_name": "Chat Output", - "id": "ChatOutput-EujCa", + "id": "ChatOutput-D2eyW", "node": { "base_classes": [ "Message" @@ -1396,7 +1391,7 @@ }, "dragging": false, "height": 234, - "id": "ChatOutput-EujCa", + "id": "ChatOutput-D2eyW", "measured": { "height": 234, "width": 320 @@ -1415,7 +1410,7 @@ }, { "data": { - "id": "OpenAIEmbeddings-BF7iH", + "id": "OpenAIEmbeddings-xoSJQ", "node": { "base_classes": [ "Embeddings" @@ -1712,7 +1707,7 @@ "show": true, "title_case": false, "type": "str", - "value": "" + "value": "OPENAI_API_KEY" }, "openai_api_type": { "_input_type": "MessageTextInput", @@ -1895,7 +1890,7 @@ }, "dragging": false, "height": 320, - "id": "OpenAIEmbeddings-BF7iH", + "id": "OpenAIEmbeddings-xoSJQ", "measured": { "height": 320, "width": 320 @@ -1914,7 +1909,7 @@ }, { "data": { - "id": "note-7sR5R", + "id": "note-NTe8U", "node": { "description": "## 📚 1. Load Data Flow\n\nRun this first! Load data from a local file and embed it into the vector database.\n\nSelect a Database and a Collection, or create new ones. \n\nClick ▶️ **Run component** on the **Astra DB** component to load your data.\n\n* If you're using OSS Langflow, add your Astra DB Application Token to the Astra DB component.\n\n#### Next steps:\n Experiment by changing the prompt and the contextual data to see how the retrieval flow's responses change.", "display_name": "", @@ -1927,10 +1922,10 @@ }, "dragging": false, "height": 324, - "id": "note-7sR5R", + "id": "note-NTe8U", "measured": { "height": 324, - "width": 324 + "width": 325 }, "position": { "x": 955.3277857006676, @@ -1941,7 +1936,7 @@ "y": 1552.171191793604 }, "resizing": false, - "selected": true, + "selected": false, "style": { "height": 324, "width": 324 @@ -1951,7 +1946,7 @@ }, { "data": { - "id": "OpenAIEmbeddings-KNVHv", + "id": "OpenAIEmbeddings-d7EtR", "node": { "base_classes": [ "Embeddings" @@ -2248,7 +2243,7 @@ "show": true, "title_case": false, "type": "str", - "value": "" + "value": "OPENAI_API_KEY" }, "openai_api_type": { "_input_type": "MessageTextInput", @@ -2431,7 +2426,7 @@ }, "dragging": false, "height": 320, - "id": "OpenAIEmbeddings-KNVHv", + "id": "OpenAIEmbeddings-d7EtR", "measured": { "height": 320, "width": 320 @@ -2450,7 +2445,7 @@ }, { "data": { - "id": "File-i8StI", + "id": "File-4yyks", "node": { "base_classes": [ "Data" @@ -2676,7 +2671,7 @@ }, "dragging": false, "height": 367, - "id": "File-i8StI", + "id": "File-4yyks", "measured": { "height": 367, "width": 320 @@ -2695,7 +2690,7 @@ }, { "data": { - "id": "note-LxvwE", + "id": "note-KLxcd", "node": { "description": "### 💡 Add your OpenAI API key here 👇", "display_name": "", @@ -2708,7 +2703,7 @@ }, "dragging": false, "height": 324, - "id": "note-LxvwE", + "id": "note-KLxcd", "measured": { "height": 324, "width": 324 @@ -2727,7 +2722,7 @@ }, { "data": { - "id": "note-PkcXs", + "id": "note-rKy2s", "node": { "description": "### 💡 Add your OpenAI API key here 👇", "display_name": "", @@ -2740,7 +2735,7 @@ }, "dragging": false, "height": 324, - "id": "note-PkcXs", + "id": "note-rKy2s", "measured": { "height": 324, "width": 324 @@ -2759,7 +2754,7 @@ }, { "data": { - "id": "note-vhWhj", + "id": "note-cjRCD", "node": { "description": "### 💡 Add your OpenAI API key here 👇", "display_name": "", @@ -2772,7 +2767,7 @@ }, "dragging": false, "height": 324, - "id": "note-vhWhj", + "id": "note-cjRCD", "measured": { "height": 324, "width": 324 @@ -2791,7 +2786,7 @@ }, { "data": { - "id": "OpenAIModel-VVLPR", + "id": "OpenAIModel-XJ1BC", "node": { "base_classes": [ "LanguageModel", @@ -2878,7 +2873,7 @@ "show": true, "title_case": false, "type": "str", - "value": "" + "value": "OPENAI_API_KEY" }, "code": { "advanced": true, @@ -3159,9 +3154,9 @@ "type": "OpenAIModel" }, "dragging": false, - "id": "OpenAIModel-VVLPR", + "id": "OpenAIModel-XJ1BC", "measured": { - "height": 653, + "height": 656, "width": 320 }, "position": { @@ -3173,7 +3168,7 @@ }, { "data": { - "id": "AstraDB-Qdaes", + "id": "AstraDB-HXAXh", "node": { "base_classes": [ "Data", @@ -3189,6 +3184,7 @@ "field_order": [ "token", "environment", + "database_name", "api_endpoint", "collection_name", "keyspace", @@ -3200,6 +3196,7 @@ "search_type", "search_score_threshold", "advanced_search_filter", + "autodetect_collection", "content_field", "deletion_field", "ignore_invalid_documents", @@ -3219,8 +3216,8 @@ "method": "search_documents", "name": "search_results", "required_inputs": [ - "api_endpoint", "collection_name", + "database_name", "token" ], "selected": "Data", @@ -3268,20 +3265,17 @@ "value": {} }, "api_endpoint": { - "_input_type": "DropdownInput", - "advanced": false, - "combobox": true, - "dialog_inputs": {}, - "display_name": "Database", + "_input_type": "StrInput", + "advanced": true, + "display_name": "Astra DB API Endpoint", "dynamic": false, - "info": "The Database / API Endpoint for the Astra DB instance.", - "name": "Database", - "options": [], - "options_metadata": [], + "info": "The API Endpoint for the Astra DB instance. Supercedes database selection.", + "list": false, + "list_add_label": "Add More", + "load_from_db": false, + "name": "api_endpoint", "placeholder": "", - "real_time_refresh": true, - "refresh_button": true, - "required": true, + "required": false, "show": true, "title_case": false, "tool_mode": false, @@ -3342,13 +3336,115 @@ "show": true, "title_case": false, "type": "code", - "value": "import os\nfrom collections import defaultdict\nfrom dataclasses import dataclass, field\n\nfrom astrapy import AstraDBAdmin, DataAPIClient, Database\nfrom langchain_astradb import AstraDBVectorStore, CollectionVectorServiceOptions\n\nfrom langflow.base.vectorstores.model import LCVectorStoreComponent, check_cached_vector_store\nfrom langflow.helpers import docs_to_data\nfrom langflow.inputs import FloatInput, NestedDictInput\nfrom langflow.io import (\n BoolInput,\n DropdownInput,\n HandleInput,\n IntInput,\n SecretStrInput,\n StrInput,\n)\nfrom langflow.schema import Data\nfrom langflow.utils.version import get_version_info\n\n\nclass AstraDBVectorStoreComponent(LCVectorStoreComponent):\n display_name: str = \"Astra DB\"\n description: str = \"Ingest and search documents in Astra DB\"\n documentation: str = \"https://docs.datastax.com/en/langflow/astra-components.html\"\n name = \"AstraDB\"\n icon: str = \"AstraDB\"\n\n _cached_vector_store: AstraDBVectorStore | None = None\n\n @dataclass\n class NewDatabaseInput:\n functionality: str = \"create\"\n fields: dict[str, dict] = field(\n default_factory=lambda: {\n \"data\": {\n \"node\": {\n \"description\": \"Create a new database in Astra DB.\",\n \"display_name\": \"Create New Database\",\n \"field_order\": [\"new_database_name\", \"cloud_provider\", \"region\"],\n \"template\": {\n \"new_database_name\": StrInput(\n name=\"new_database_name\",\n display_name=\"New Database Name\",\n info=\"Name of the new database to create in Astra DB.\",\n required=True,\n ),\n \"cloud_provider\": DropdownInput(\n name=\"cloud_provider\",\n display_name=\"Cloud Provider\",\n info=\"Cloud provider for the new database.\",\n options=[\"Amazon Web Services\", \"Google Cloud Platform\", \"Microsoft Azure\"],\n required=True,\n ),\n \"region\": DropdownInput(\n name=\"region\",\n display_name=\"Region\",\n info=\"Region for the new database.\",\n options=[],\n required=True,\n ),\n },\n },\n }\n }\n )\n\n @dataclass\n class NewCollectionInput:\n functionality: str = \"create\"\n fields: dict[str, dict] = field(\n default_factory=lambda: {\n \"data\": {\n \"node\": {\n \"description\": \"Create a new collection in Astra DB.\",\n \"display_name\": \"Create New Collection\",\n \"field_order\": [\n \"new_collection_name\",\n \"embedding_generation_provider\",\n \"embedding_generation_model\",\n ],\n \"template\": {\n \"new_collection_name\": StrInput(\n name=\"new_collection_name\",\n display_name=\"New Collection Name\",\n info=\"Name of the new collection to create in Astra DB.\",\n required=True,\n ),\n \"embedding_generation_provider\": DropdownInput(\n name=\"embedding_generation_provider\",\n display_name=\"Embedding Generation Provider\",\n info=\"Provider to use for generating embeddings.\",\n options=[],\n required=True,\n ),\n \"embedding_generation_model\": DropdownInput(\n name=\"embedding_generation_model\",\n display_name=\"Embedding Generation Model\",\n info=\"Model to use for generating embeddings.\",\n options=[],\n required=True,\n ),\n },\n },\n }\n }\n )\n\n inputs = [\n SecretStrInput(\n name=\"token\",\n display_name=\"Astra DB Application Token\",\n info=\"Authentication token for accessing Astra DB.\",\n value=\"ASTRA_DB_APPLICATION_TOKEN\",\n required=True,\n real_time_refresh=True,\n input_types=[],\n ),\n StrInput(\n name=\"environment\",\n display_name=\"Environment\",\n info=\"The environment for the Astra DB API Endpoint.\",\n advanced=True,\n real_time_refresh=True,\n ),\n DropdownInput(\n name=\"api_endpoint\",\n display_name=\"Database\",\n info=\"The Database / API Endpoint for the Astra DB instance.\",\n required=True,\n refresh_button=True,\n real_time_refresh=True,\n combobox=True,\n ),\n StrInput(\n name=\"d_api_endpoint\",\n display_name=\"Database API Endpoint\",\n info=\"The API Endpoint for the Astra DB instance. Supercedes database selection.\",\n advanced=True,\n ),\n DropdownInput(\n name=\"collection_name\",\n display_name=\"Collection\",\n info=\"The name of the collection within Astra DB where the vectors will be stored.\",\n required=True,\n refresh_button=True,\n real_time_refresh=True,\n # dialog_inputs=asdict(NewCollectionInput()),\n combobox=True,\n ),\n StrInput(\n name=\"keyspace\",\n display_name=\"Keyspace\",\n info=\"Optional keyspace within Astra DB to use for the collection.\",\n advanced=True,\n ),\n DropdownInput(\n name=\"embedding_choice\",\n display_name=\"Embedding Model or Astra Vectorize\",\n info=\"Choose an embedding model or use Astra Vectorize.\",\n options=[\"Embedding Model\", \"Astra Vectorize\"],\n value=\"Embedding Model\",\n advanced=True,\n real_time_refresh=True,\n ),\n HandleInput(\n name=\"embedding_model\",\n display_name=\"Embedding Model\",\n input_types=[\"Embeddings\"],\n info=\"Specify the Embedding Model. Not required for Astra Vectorize collections.\",\n required=False,\n ),\n *LCVectorStoreComponent.inputs,\n IntInput(\n name=\"number_of_results\",\n display_name=\"Number of Search Results\",\n info=\"Number of search results to return.\",\n advanced=True,\n value=4,\n ),\n DropdownInput(\n name=\"search_type\",\n display_name=\"Search Type\",\n info=\"Search type to use\",\n options=[\"Similarity\", \"Similarity with score threshold\", \"MMR (Max Marginal Relevance)\"],\n value=\"Similarity\",\n advanced=True,\n ),\n FloatInput(\n name=\"search_score_threshold\",\n display_name=\"Search Score Threshold\",\n info=\"Minimum similarity score threshold for search results. \"\n \"(when using 'Similarity with score threshold')\",\n value=0,\n advanced=True,\n ),\n NestedDictInput(\n name=\"advanced_search_filter\",\n display_name=\"Search Metadata Filter\",\n info=\"Optional dictionary of filters to apply to the search query.\",\n advanced=True,\n ),\n BoolInput(\n name=\"autodetect_collection\",\n display_name=\"Autodetect Collection\",\n info=\"Boolean flag to determine whether to autodetect the collection.\",\n advanced=True,\n value=True,\n ),\n StrInput(\n name=\"content_field\",\n display_name=\"Content Field\",\n info=\"Field to use as the text content field for the vector store.\",\n advanced=True,\n ),\n StrInput(\n name=\"deletion_field\",\n display_name=\"Deletion Based On Field\",\n info=\"When this parameter is provided, documents in the target collection with \"\n \"metadata field values matching the input metadata field value will be deleted \"\n \"before new data is loaded.\",\n advanced=True,\n ),\n BoolInput(\n name=\"ignore_invalid_documents\",\n display_name=\"Ignore Invalid Documents\",\n info=\"Boolean flag to determine whether to ignore invalid documents at runtime.\",\n advanced=True,\n ),\n NestedDictInput(\n name=\"astradb_vectorstore_kwargs\",\n display_name=\"AstraDBVectorStore Parameters\",\n info=\"Optional dictionary of additional parameters for the AstraDBVectorStore.\",\n advanced=True,\n ),\n ]\n\n @classmethod\n def map_cloud_providers(cls):\n return {\n \"Amazon Web Services\": {\n \"id\": \"aws\",\n \"regions\": [\"us-east-2\", \"ap-south-1\", \"eu-west-1\"],\n },\n \"Google Cloud Platform\": {\n \"id\": \"gcp\",\n \"regions\": [\"us-east1\"],\n },\n \"Microsoft Azure\": {\n \"id\": \"azure\",\n \"regions\": [\"westus3\"],\n },\n }\n\n @classmethod\n def create_database_api(\n cls,\n token: str,\n new_database_name: str,\n cloud_provider: str,\n region: str,\n ):\n client = DataAPIClient(token=token)\n\n # Get the admin object\n admin_client = client.get_admin(token=token)\n\n # Call the create database function\n return admin_client.create_database(\n name=new_database_name,\n cloud_provider=cloud_provider,\n region=region,\n )\n\n @classmethod\n def create_collection_api(\n cls,\n token: str,\n database_name: str,\n new_collection_name: str,\n dimension: int | None = None,\n embedding_generation_provider: str | None = None,\n embedding_generation_model: str | None = None,\n ):\n client = DataAPIClient(token=token)\n api_endpoint = cls.get_api_endpoint_static(token=token, database_name=database_name)\n\n # Get the database object\n database = client.get_database(api_endpoint=api_endpoint, token=token)\n\n # Build vectorize options, if needed\n vectorize_options = None\n if not dimension:\n vectorize_options = CollectionVectorServiceOptions(\n provider=embedding_generation_provider,\n model_name=embedding_generation_model,\n authentication=None,\n parameters=None,\n )\n\n # Create the collection\n return database.create_collection(\n name=new_collection_name,\n dimension=dimension,\n service=vectorize_options,\n )\n\n @classmethod\n def get_database_list_static(cls, token: str, environment: str | None = None):\n client = DataAPIClient(token=token, environment=environment)\n\n # Get the admin object\n admin_client = client.get_admin(token=token)\n\n # Get the list of databases\n db_list = list(admin_client.list_databases())\n\n # Set the environment properly\n env_string = \"\"\n if environment and environment != \"prod\":\n env_string = f\"-{environment}\"\n\n # Generate the api endpoint for each database\n db_info_dict = {}\n for db in db_list:\n try:\n api_endpoint = f\"https://{db.info.id}-{db.info.region}.apps.astra{env_string}.datastax.com\"\n db_info_dict[db.info.name] = {\n \"api_endpoint\": api_endpoint,\n \"collections\": len(\n list(\n client.get_database(\n api_endpoint=api_endpoint, token=token, keyspace=db.info.keyspace\n ).list_collection_names(keyspace=db.info.keyspace)\n )\n ),\n }\n except Exception: # noqa: BLE001, S110\n pass\n\n return db_info_dict\n\n def get_database_list(self):\n return self.get_database_list_static(token=self.token, environment=self.environment)\n\n @classmethod\n def get_api_endpoint_static(\n cls,\n token: str,\n environment: str | None = None,\n api_endpoint: str | None = None,\n database_name: str | None = None,\n ):\n # If the api_endpoint is set, return it\n if api_endpoint:\n return api_endpoint\n\n # Check if the database_name is like a url\n if database_name and database_name.startswith(\"https://\"):\n return database_name\n\n # If the database is not set, nothing we can do.\n if not database_name:\n return None\n\n # Otherwise, get the URL from the database list\n return cls.get_database_list_static(token=token, environment=environment).get(database_name).get(\"api_endpoint\")\n\n def get_api_endpoint(self, *, api_endpoint: str | None = None):\n return self.get_api_endpoint_static(\n token=self.token,\n environment=self.environment,\n api_endpoint=api_endpoint or self.d_api_endpoint,\n database_name=self.api_endpoint,\n )\n\n def get_keyspace(self):\n keyspace = self.keyspace\n\n if keyspace:\n return keyspace.strip()\n\n return None\n\n def get_database_object(self, api_endpoint: str | None = None):\n try:\n client = DataAPIClient(token=self.token, environment=self.environment)\n\n return client.get_database(\n api_endpoint=self.get_api_endpoint(api_endpoint=api_endpoint),\n token=self.token,\n keyspace=self.get_keyspace(),\n )\n except Exception as e:\n msg = f\"Error fetching database object: {e}\"\n raise ValueError(msg) from e\n\n def collection_data(self, collection_name: str, database: Database | None = None):\n try:\n if not database:\n client = DataAPIClient(token=self.token, environment=self.environment)\n\n database = client.get_database(\n api_endpoint=self.get_api_endpoint(),\n token=self.token,\n keyspace=self.get_keyspace(),\n )\n\n collection = database.get_collection(collection_name, keyspace=self.get_keyspace())\n\n return collection.estimated_document_count()\n except Exception as e: # noqa: BLE001\n self.log(f\"Error checking collection data: {e}\")\n\n return None\n\n def get_vectorize_providers(self):\n try:\n self.log(\"Dynamically updating list of Vectorize providers.\")\n\n # Get the admin object\n admin = AstraDBAdmin(token=self.token)\n db_admin = admin.get_database_admin(api_endpoint=self.get_api_endpoint())\n\n # Get the list of embedding providers\n embedding_providers = db_admin.find_embedding_providers().as_dict()\n\n vectorize_providers_mapping = {}\n # Map the provider display name to the provider key and models\n for provider_key, provider_data in embedding_providers[\"embeddingProviders\"].items():\n display_name = provider_data[\"displayName\"]\n models = [model[\"name\"] for model in provider_data[\"models\"]]\n\n # TODO: https://astra.datastax.com/api/v2/graphql\n vectorize_providers_mapping[display_name] = [provider_key, models]\n\n # Sort the resulting dictionary\n return defaultdict(list, dict(sorted(vectorize_providers_mapping.items())))\n except Exception as e: # noqa: BLE001\n self.log(f\"Error fetching Vectorize providers: {e}\")\n\n return {}\n\n def _initialize_database_options(self):\n try:\n return [\n {\n \"name\": name,\n \"collections\": info[\"collections\"],\n \"api_endpoint\": info[\"api_endpoint\"],\n }\n for name, info in self.get_database_list().items()\n ]\n except Exception as e:\n msg = f\"Error fetching database options: {e}\"\n raise ValueError(msg) from e\n\n def _initialize_collection_options(self, api_endpoint: str | None = None):\n # Retrieve the database object\n database = self.get_database_object(api_endpoint=api_endpoint)\n\n # Get the list of collections\n collection_list = list(database.list_collections(keyspace=self.get_keyspace()))\n\n # Return the list of collections and metadata associated\n return [\n {\n \"name\": col.name,\n \"records\": self.collection_data(collection_name=col.name, database=database),\n \"provider\": (\n col.options.vector.service.provider if col.options.vector and col.options.vector.service else None\n ),\n \"icon\": \"\",\n \"model\": (\n col.options.vector.service.model_name if col.options.vector and col.options.vector.service else None\n ),\n }\n for col in collection_list\n ]\n\n def reset_collection_list(self, build_config: dict):\n # Get the list of options we have based on the token provided\n collection_options = self._initialize_collection_options()\n\n # If we retrieved options based on the token, show the dropdown\n build_config[\"collection_name\"][\"options\"] = [col[\"name\"] for col in collection_options]\n build_config[\"collection_name\"][\"options_metadata\"] = [\n {k: v for k, v in col.items() if k not in [\"name\"]} for col in collection_options\n ]\n\n # Reset the selected collection\n build_config[\"collection_name\"][\"value\"] = \"\"\n\n return build_config\n\n def reset_database_list(self, build_config: dict):\n # Get the list of options we have based on the token provided\n database_options = self._initialize_database_options()\n\n # If we retrieved options based on the token, show the dropdown\n build_config[\"api_endpoint\"][\"options\"] = [db[\"name\"] for db in database_options]\n build_config[\"api_endpoint\"][\"options_metadata\"] = [\n {k: v for k, v in db.items() if k not in [\"name\"]} for db in database_options\n ]\n\n # Reset the selected database\n build_config[\"api_endpoint\"][\"value\"] = \"\"\n\n return build_config\n\n def reset_build_config(self, build_config: dict):\n # Reset the list of databases we have based on the token provided\n build_config[\"api_endpoint\"][\"options\"] = []\n build_config[\"api_endpoint\"][\"options_metadata\"] = []\n build_config[\"api_endpoint\"][\"value\"] = \"\"\n build_config[\"api_endpoint\"][\"name\"] = \"Database\"\n\n # Reset the list of collections and metadata associated\n build_config[\"collection_name\"][\"options\"] = []\n build_config[\"collection_name\"][\"options_metadata\"] = []\n build_config[\"collection_name\"][\"value\"] = \"\"\n\n return build_config\n\n def update_build_config(self, build_config: dict, field_value: str, field_name: str | None = None):\n # When the component first executes, this is the update refresh call\n first_run = field_name == \"collection_name\" and not field_value and not build_config[\"api_endpoint\"][\"options\"]\n\n # If the token has not been provided, simply return\n if not self.token:\n return self.reset_build_config(build_config)\n\n # If this is the first execution of the component, reset and build database list\n if first_run or field_name in [\"token\", \"environment\"]:\n # Reset the build config to ensure we are starting fresh\n build_config = self.reset_build_config(build_config)\n build_config = self.reset_database_list(build_config)\n\n # Get list of regions for a given cloud provider\n \"\"\"\n cloud_provider = (\n build_config[\"api_endpoint\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"][\"cloud_provider\"][\n \"value\"\n ]\n or \"Amazon Web Services\"\n )\n build_config[\"api_endpoint\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"][\"region\"][\n \"options\"\n ] = self.map_cloud_providers()[cloud_provider][\"regions\"]\n \"\"\"\n\n return build_config\n\n # Refresh the collection name options\n if field_name == \"api_endpoint\":\n # If missing, refresh the database options\n if not build_config[\"api_endpoint\"][\"options\"] or not field_value:\n return self.update_build_config(build_config, field_value=self.token, field_name=\"token\")\n\n # Set the underlying api endpoint value of the database\n if field_value in build_config[\"api_endpoint\"][\"options\"]:\n index_of_name = build_config[\"api_endpoint\"][\"options\"].index(field_value)\n build_config[\"d_api_endpoint\"][\"value\"] = build_config[\"api_endpoint\"][\"options_metadata\"][\n index_of_name\n ][\"api_endpoint\"]\n else:\n build_config[\"d_api_endpoint\"][\"value\"] = \"\"\n\n # Reset the list of collections we have based on the token provided\n return self.reset_collection_list(build_config)\n\n # Hide embedding model option if opriona_metadata provider is not null\n if field_name == \"collection_name\" and field_value:\n # Assume we will be autodetecting the collection:\n build_config[\"autodetect_collection\"][\"value\"] = True\n\n # Set the options for collection name to be the field value if its a new collection\n if field_value not in build_config[\"collection_name\"][\"options\"]:\n # Add the new collection to the list of options\n build_config[\"collection_name\"][\"options\"].append(field_value)\n build_config[\"collection_name\"][\"options_metadata\"].append(\n {\"records\": 0, \"provider\": None, \"icon\": \"\", \"model\": None}\n )\n\n # Ensure that autodetect collection is set to False, since its a new collection\n build_config[\"autodetect_collection\"][\"value\"] = False\n\n # Find the position of the selected collection to align with metadata\n index_of_name = build_config[\"collection_name\"][\"options\"].index(field_value)\n value_of_provider = build_config[\"collection_name\"][\"options_metadata\"][index_of_name][\"provider\"]\n\n # If we were able to determine the Vectorize provider, set it accordingly\n if value_of_provider:\n build_config[\"embedding_model\"][\"advanced\"] = True\n build_config[\"embedding_choice\"][\"value\"] = \"Astra Vectorize\"\n else:\n build_config[\"embedding_model\"][\"advanced\"] = False\n build_config[\"embedding_choice\"][\"value\"] = \"Embedding Model\"\n\n # For the final step, get the list of vectorize providers\n \"\"\"\n vectorize_providers = self.get_vectorize_providers()\n if not vectorize_providers:\n return build_config\n\n # Allow the user to see the embedding provider options\n provider_options = build_config[\"collection_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"][\n \"embedding_generation_provider\"\n ][\"options\"]\n if not provider_options:\n # If the collection is set, allow user to see embedding options\n build_config[\"collection_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"][\n \"embedding_generation_provider\"\n ][\"options\"] = [\"Bring your own\", \"Nvidia\", *[key for key in vectorize_providers if key != \"Nvidia\"]]\n\n # And allow the user to see the models based on a selected provider\n model_options = build_config[\"collection_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"][\n \"embedding_generation_model\"\n ][\"options\"]\n if not model_options:\n embedding_provider = build_config[\"collection_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"][\n \"embedding_generation_provider\"\n ][\"value\"]\n\n build_config[\"collection_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"][\n \"embedding_generation_model\"\n ][\"options\"] = vectorize_providers.get(embedding_provider, [[], []])[1]\n \"\"\"\n\n return build_config\n\n @check_cached_vector_store\n def build_vector_store(self):\n try:\n from langchain_astradb import AstraDBVectorStore\n except ImportError as e:\n msg = (\n \"Could not import langchain Astra DB integration package. \"\n \"Please install it with `pip install langchain-astradb`.\"\n )\n raise ImportError(msg) from e\n\n # Get the embedding model and additional params\n embedding_params = (\n {\"embedding\": self.embedding_model}\n if self.embedding_model and self.embedding_choice == \"Embedding Model\"\n else {}\n )\n\n # Get the additional parameters\n additional_params = self.astradb_vectorstore_kwargs or {}\n\n # Get Langflow version and platform information\n __version__ = get_version_info()[\"version\"]\n langflow_prefix = \"\"\n if os.getenv(\"AWS_EXECUTION_ENV\") == \"AWS_ECS_FARGATE\": # TODO: More precise way of detecting\n langflow_prefix = \"ds-\"\n\n # Get the database object\n database = self.get_database_object(api_endpoint=self.d_api_endpoint)\n autodetect = self.collection_name in database.list_collection_names() and self.autodetect_collection\n\n # Bundle up the auto-detect parameters\n autodetect_params = {\n \"autodetect_collection\": autodetect,\n \"content_field\": (\n self.content_field\n if self.content_field and embedding_params\n else (\n \"page_content\"\n if embedding_params\n and self.collection_data(collection_name=self.collection_name, database=database) == 0\n else None\n )\n ),\n \"ignore_invalid_documents\": self.ignore_invalid_documents,\n }\n\n # Attempt to build the Vector Store object\n try:\n vector_store = AstraDBVectorStore(\n # Astra DB Authentication Parameters\n token=self.token,\n api_endpoint=database.api_endpoint,\n namespace=database.keyspace,\n collection_name=self.collection_name,\n environment=self.environment,\n # Astra DB Usage Tracking Parameters\n ext_callers=[(f\"{langflow_prefix}langflow\", __version__)],\n # Astra DB Vector Store Parameters\n **autodetect_params,\n **embedding_params,\n **additional_params,\n )\n except Exception as e:\n msg = f\"Error initializing AstraDBVectorStore: {e}\"\n raise ValueError(msg) from e\n\n # Add documents to the vector store\n self._add_documents_to_vector_store(vector_store)\n\n return vector_store\n\n def _add_documents_to_vector_store(self, vector_store) -> None:\n documents = []\n for _input in self.ingest_data or []:\n if isinstance(_input, Data):\n documents.append(_input.to_lc_document())\n else:\n msg = \"Vector Store Inputs must be Data objects.\"\n raise TypeError(msg)\n\n if documents and self.deletion_field:\n self.log(f\"Deleting documents where {self.deletion_field}\")\n try:\n database = self.get_database_object(api_endpoint=self.d_api_endpoint)\n collection = database.get_collection(self.collection_name, keyspace=database.keyspace)\n delete_values = list({doc.metadata[self.deletion_field] for doc in documents})\n self.log(f\"Deleting documents where {self.deletion_field} matches {delete_values}.\")\n collection.delete_many({f\"metadata.{self.deletion_field}\": {\"$in\": delete_values}})\n except Exception as e:\n msg = f\"Error deleting documents from AstraDBVectorStore based on '{self.deletion_field}': {e}\"\n raise ValueError(msg) from e\n\n if documents:\n self.log(f\"Adding {len(documents)} documents to the Vector Store.\")\n try:\n vector_store.add_documents(documents)\n except Exception as e:\n msg = f\"Error adding documents to AstraDBVectorStore: {e}\"\n raise ValueError(msg) from e\n else:\n self.log(\"No documents to add to the Vector Store.\")\n\n def _map_search_type(self) -> str:\n search_type_mapping = {\n \"Similarity with score threshold\": \"similarity_score_threshold\",\n \"MMR (Max Marginal Relevance)\": \"mmr\",\n }\n\n return search_type_mapping.get(self.search_type, \"similarity\")\n\n def _build_search_args(self):\n query = self.search_query if isinstance(self.search_query, str) and self.search_query.strip() else None\n\n if query:\n args = {\n \"query\": query,\n \"search_type\": self._map_search_type(),\n \"k\": self.number_of_results,\n \"score_threshold\": self.search_score_threshold,\n }\n elif self.advanced_search_filter:\n args = {\n \"n\": self.number_of_results,\n }\n else:\n return {}\n\n filter_arg = self.advanced_search_filter or {}\n if filter_arg:\n args[\"filter\"] = filter_arg\n\n return args\n\n def search_documents(self, vector_store=None) -> list[Data]:\n vector_store = vector_store or self.build_vector_store()\n\n self.log(f\"Search input: {self.search_query}\")\n self.log(f\"Search type: {self.search_type}\")\n self.log(f\"Number of results: {self.number_of_results}\")\n\n try:\n search_args = self._build_search_args()\n except Exception as e:\n msg = f\"Error in AstraDBVectorStore._build_search_args: {e}\"\n raise ValueError(msg) from e\n\n if not search_args:\n self.log(\"No search input or filters provided. Skipping search.\")\n return []\n\n docs = []\n search_method = \"search\" if \"query\" in search_args else \"metadata_search\"\n\n try:\n self.log(f\"Calling vector_store.{search_method} with args: {search_args}\")\n docs = getattr(vector_store, search_method)(**search_args)\n except Exception as e:\n msg = f\"Error performing {search_method} in AstraDBVectorStore: {e}\"\n raise ValueError(msg) from e\n\n self.log(f\"Retrieved documents: {len(docs)}\")\n\n data = docs_to_data(docs)\n self.log(f\"Converted documents to data: {len(data)}\")\n self.status = data\n\n return data\n\n def get_retriever_kwargs(self):\n search_args = self._build_search_args()\n\n return {\n \"search_type\": self._map_search_type(),\n \"search_kwargs\": search_args,\n }\n" + "value": "from collections import defaultdict\nfrom dataclasses import asdict, dataclass, field\n\nfrom astrapy import AstraDBAdmin, DataAPIClient, Database\nfrom astrapy.info import CollectionDescriptor\nfrom langchain_astradb import AstraDBVectorStore, CollectionVectorServiceOptions\n\nfrom langflow.base.vectorstores.model import LCVectorStoreComponent, check_cached_vector_store\nfrom langflow.helpers import docs_to_data\nfrom langflow.inputs import FloatInput, NestedDictInput\nfrom langflow.io import (\n BoolInput,\n DropdownInput,\n HandleInput,\n IntInput,\n SecretStrInput,\n StrInput,\n)\nfrom langflow.schema import Data\nfrom langflow.utils.version import get_version_info\n\n\nclass AstraDBVectorStoreComponent(LCVectorStoreComponent):\n display_name: str = \"Astra DB\"\n description: str = \"Ingest and search documents in Astra DB\"\n documentation: str = \"https://docs.datastax.com/en/langflow/astra-components.html\"\n name = \"AstraDB\"\n icon: str = \"AstraDB\"\n\n _cached_vector_store: AstraDBVectorStore | None = None\n\n @dataclass\n class NewDatabaseInput:\n functionality: str = \"create\"\n fields: dict[str, dict] = field(\n default_factory=lambda: {\n \"data\": {\n \"node\": {\n \"name\": \"create_database\",\n \"description\": \"\",\n \"display_name\": \"Create new database\",\n \"field_order\": [\"new_database_name\", \"cloud_provider\", \"region\"],\n \"template\": {\n \"new_database_name\": StrInput(\n name=\"new_database_name\",\n display_name=\"Name\",\n info=\"Name of the new database to create in Astra DB.\",\n required=True,\n ),\n \"cloud_provider\": DropdownInput(\n name=\"cloud_provider\",\n display_name=\"Cloud provider\",\n info=\"Cloud provider for the new database.\",\n options=[\"Amazon Web Services\", \"Google Cloud Platform\", \"Microsoft Azure\"],\n required=True,\n real_time_refresh=True,\n ),\n \"region\": DropdownInput(\n name=\"region\",\n display_name=\"Region\",\n info=\"Region for the new database.\",\n options=[],\n required=True,\n ),\n },\n },\n }\n }\n )\n\n @dataclass\n class NewCollectionInput:\n functionality: str = \"create\"\n fields: dict[str, dict] = field(\n default_factory=lambda: {\n \"data\": {\n \"node\": {\n \"name\": \"create_collection\",\n \"description\": \"\",\n \"display_name\": \"Create new collection\",\n \"field_order\": [\n \"new_collection_name\",\n \"embedding_generation_provider\",\n \"embedding_generation_model\",\n ],\n \"template\": {\n \"new_collection_name\": StrInput(\n name=\"new_collection_name\",\n display_name=\"Name\",\n info=\"Name of the new collection to create in Astra DB.\",\n required=True,\n ),\n \"embedding_generation_provider\": DropdownInput(\n name=\"embedding_generation_provider\",\n display_name=\"Embedding generation method\",\n info=\"Provider to use for generating embeddings.\",\n real_time_refresh=True,\n required=True,\n options=[\"Bring your own\", \"Nvidia\"],\n ),\n \"embedding_generation_model\": DropdownInput(\n name=\"embedding_generation_model\",\n display_name=\"Embedding model\",\n info=\"Model to use for generating embeddings.\",\n required=True,\n options=[],\n ),\n \"dimension\": IntInput(\n name=\"dimension\",\n display_name=\"Dimensions (Required only for `Bring your own`)\",\n info=\"Dimensions of the embeddings to generate.\",\n required=False,\n value=1024,\n ),\n },\n },\n }\n }\n )\n\n inputs = [\n SecretStrInput(\n name=\"token\",\n display_name=\"Astra DB Application Token\",\n info=\"Authentication token for accessing Astra DB.\",\n value=\"ASTRA_DB_APPLICATION_TOKEN\",\n required=True,\n real_time_refresh=True,\n input_types=[],\n ),\n StrInput(\n name=\"environment\",\n display_name=\"Environment\",\n info=\"The environment for the Astra DB API Endpoint.\",\n advanced=True,\n real_time_refresh=True,\n ),\n DropdownInput(\n name=\"database_name\",\n display_name=\"Database\",\n info=\"The Database name for the Astra DB instance.\",\n required=True,\n refresh_button=True,\n real_time_refresh=True,\n dialog_inputs=asdict(NewDatabaseInput()),\n combobox=True,\n ),\n StrInput(\n name=\"api_endpoint\",\n display_name=\"Astra DB API Endpoint\",\n info=\"The API Endpoint for the Astra DB instance. Supercedes database selection.\",\n advanced=True,\n ),\n DropdownInput(\n name=\"collection_name\",\n display_name=\"Collection\",\n info=\"The name of the collection within Astra DB where the vectors will be stored.\",\n required=True,\n refresh_button=True,\n real_time_refresh=True,\n dialog_inputs=asdict(NewCollectionInput()),\n combobox=True,\n advanced=True,\n ),\n StrInput(\n name=\"keyspace\",\n display_name=\"Keyspace\",\n info=\"Optional keyspace within Astra DB to use for the collection.\",\n advanced=True,\n ),\n DropdownInput(\n name=\"embedding_choice\",\n display_name=\"Embedding Model or Astra Vectorize\",\n info=\"Choose an embedding model or use Astra Vectorize.\",\n options=[\"Embedding Model\", \"Astra Vectorize\"],\n value=\"Embedding Model\",\n advanced=True,\n real_time_refresh=True,\n ),\n HandleInput(\n name=\"embedding_model\",\n display_name=\"Embedding Model\",\n input_types=[\"Embeddings\"],\n info=\"Specify the Embedding Model. Not required for Astra Vectorize collections.\",\n required=False,\n ),\n *LCVectorStoreComponent.inputs,\n IntInput(\n name=\"number_of_results\",\n display_name=\"Number of Search Results\",\n info=\"Number of search results to return.\",\n advanced=True,\n value=4,\n ),\n DropdownInput(\n name=\"search_type\",\n display_name=\"Search Type\",\n info=\"Search type to use\",\n options=[\"Similarity\", \"Similarity with score threshold\", \"MMR (Max Marginal Relevance)\"],\n value=\"Similarity\",\n advanced=True,\n ),\n FloatInput(\n name=\"search_score_threshold\",\n display_name=\"Search Score Threshold\",\n info=\"Minimum similarity score threshold for search results. \"\n \"(when using 'Similarity with score threshold')\",\n value=0,\n advanced=True,\n ),\n NestedDictInput(\n name=\"advanced_search_filter\",\n display_name=\"Search Metadata Filter\",\n info=\"Optional dictionary of filters to apply to the search query.\",\n advanced=True,\n ),\n BoolInput(\n name=\"autodetect_collection\",\n display_name=\"Autodetect Collection\",\n info=\"Boolean flag to determine whether to autodetect the collection.\",\n advanced=True,\n value=True,\n ),\n StrInput(\n name=\"content_field\",\n display_name=\"Content Field\",\n info=\"Field to use as the text content field for the vector store.\",\n advanced=True,\n ),\n StrInput(\n name=\"deletion_field\",\n display_name=\"Deletion Based On Field\",\n info=\"When this parameter is provided, documents in the target collection with \"\n \"metadata field values matching the input metadata field value will be deleted \"\n \"before new data is loaded.\",\n advanced=True,\n ),\n BoolInput(\n name=\"ignore_invalid_documents\",\n display_name=\"Ignore Invalid Documents\",\n info=\"Boolean flag to determine whether to ignore invalid documents at runtime.\",\n advanced=True,\n ),\n NestedDictInput(\n name=\"astradb_vectorstore_kwargs\",\n display_name=\"AstraDBVectorStore Parameters\",\n info=\"Optional dictionary of additional parameters for the AstraDBVectorStore.\",\n advanced=True,\n ),\n ]\n\n @classmethod\n def map_cloud_providers(cls):\n # TODO: Programmatically fetch the regions for each cloud provider\n return {\n \"Amazon Web Services\": {\n \"id\": \"aws\",\n \"regions\": [\"us-east-2\", \"ap-south-1\", \"eu-west-1\"],\n },\n \"Google Cloud Platform\": {\n \"id\": \"gcp\",\n \"regions\": [\"us-east1\"],\n },\n \"Microsoft Azure\": {\n \"id\": \"azure\",\n \"regions\": [\"westus3\"],\n },\n }\n\n @classmethod\n def get_vectorize_providers(cls, token: str, environment: str | None = None, api_endpoint: str | None = None):\n try:\n # Get the admin object\n admin = AstraDBAdmin(token=token, environment=environment)\n db_admin = admin.get_database_admin(api_endpoint=api_endpoint)\n\n # Get the list of embedding providers\n embedding_providers = db_admin.find_embedding_providers().as_dict()\n\n vectorize_providers_mapping = {}\n # Map the provider display name to the provider key and models\n for provider_key, provider_data in embedding_providers[\"embeddingProviders\"].items():\n # Get the provider display name and models\n display_name = provider_data[\"displayName\"]\n models = [model[\"name\"] for model in provider_data[\"models\"]]\n\n # Build our mapping\n vectorize_providers_mapping[display_name] = [provider_key, models]\n\n # Sort the resulting dictionary\n return defaultdict(list, dict(sorted(vectorize_providers_mapping.items())))\n except Exception as e:\n msg = f\"Error fetching vectorize providers: {e}\"\n raise ValueError(msg) from e\n\n @classmethod\n async def create_database_api(\n cls,\n new_database_name: str,\n cloud_provider: str,\n region: str,\n token: str,\n environment: str | None = None,\n keyspace: str | None = None,\n ):\n client = DataAPIClient(token=token, environment=environment)\n\n # Get the admin object\n admin_client = client.get_admin(token=token)\n\n # Call the create database function\n return await admin_client.async_create_database(\n name=new_database_name,\n cloud_provider=cls.map_cloud_providers()[cloud_provider][\"id\"],\n region=region,\n keyspace=keyspace,\n wait_until_active=False,\n )\n\n @classmethod\n async def create_collection_api(\n cls,\n new_collection_name: str,\n token: str,\n api_endpoint: str,\n environment: str | None = None,\n keyspace: str | None = None,\n dimension: int | None = None,\n embedding_generation_provider: str | None = None,\n embedding_generation_model: str | None = None,\n ):\n # Create the data API client\n client = DataAPIClient(token=token)\n\n # Get the database object\n database = client.get_async_database(api_endpoint=api_endpoint, token=token)\n\n # Build vectorize options, if needed\n vectorize_options = None\n if not dimension:\n vectorize_options = CollectionVectorServiceOptions(\n provider=cls.get_vectorize_providers(\n token=token, environment=environment, api_endpoint=api_endpoint\n ).get(embedding_generation_provider, [None, []])[0],\n model_name=embedding_generation_model,\n )\n\n # Create the collection\n return await database.create_collection(\n name=new_collection_name,\n keyspace=keyspace,\n dimension=dimension,\n service=vectorize_options,\n )\n\n @classmethod\n def get_database_list_static(cls, token: str, environment: str | None = None):\n client = DataAPIClient(token=token, environment=environment)\n\n # Get the admin object\n admin_client = client.get_admin(token=token)\n\n # Get the list of databases\n db_list = list(admin_client.list_databases())\n\n # Set the environment properly\n env_string = \"\"\n if environment and environment != \"prod\":\n env_string = f\"-{environment}\"\n\n # Generate the api endpoint for each database\n db_info_dict = {}\n for db in db_list:\n try:\n # Get the API endpoint for the database\n api_endpoint = f\"https://{db.info.id}-{db.info.region}.apps.astra{env_string}.datastax.com\"\n\n # Get the number of collections\n try:\n num_collections = len(\n list(\n client.get_database(\n api_endpoint=api_endpoint, token=token, keyspace=db.info.keyspace\n ).list_collection_names(keyspace=db.info.keyspace)\n )\n )\n except Exception: # noqa: BLE001\n num_collections = 0\n if db.status != \"PENDING\":\n continue\n\n # Add the database to the dictionary\n db_info_dict[db.info.name] = {\n \"api_endpoint\": api_endpoint,\n \"collections\": num_collections,\n \"status\": db.status if db.status != \"ACTIVE\" else None,\n }\n except Exception: # noqa: BLE001, S110\n pass\n\n return db_info_dict\n\n def get_database_list(self):\n return self.get_database_list_static(token=self.token, environment=self.environment)\n\n @classmethod\n def get_api_endpoint_static(\n cls,\n token: str,\n environment: str | None = None,\n api_endpoint: str | None = None,\n database_name: str | None = None,\n ):\n # If the api_endpoint is set, return it\n if api_endpoint:\n return api_endpoint\n\n # Check if the database_name is like a url\n if database_name and database_name.startswith(\"https://\"):\n return database_name\n\n # If the database is not set, nothing we can do.\n if not database_name:\n return None\n\n # Grab the database object\n db = cls.get_database_list_static(token=token, environment=environment).get(database_name)\n if not db:\n return None\n\n # Otherwise, get the URL from the database list\n return db.get(\"api_endpoint\")\n\n def get_api_endpoint(self):\n return self.get_api_endpoint_static(\n token=self.token,\n environment=self.environment,\n api_endpoint=self.api_endpoint,\n database_name=self.database_name,\n )\n\n def get_keyspace(self):\n keyspace = self.keyspace\n\n if keyspace:\n return keyspace.strip()\n\n return None\n\n def get_database_object(self, api_endpoint: str | None = None):\n try:\n client = DataAPIClient(token=self.token, environment=self.environment)\n\n return client.get_database(\n api_endpoint=api_endpoint or self.get_api_endpoint(),\n token=self.token,\n keyspace=self.get_keyspace(),\n )\n except Exception as e:\n msg = f\"Error fetching database object: {e}\"\n raise ValueError(msg) from e\n\n def collection_data(self, collection_name: str, database: Database | None = None):\n try:\n if not database:\n client = DataAPIClient(token=self.token, environment=self.environment)\n\n database = client.get_database(\n api_endpoint=self.get_api_endpoint(),\n token=self.token,\n keyspace=self.get_keyspace(),\n )\n\n collection = database.get_collection(collection_name, keyspace=self.get_keyspace())\n\n return collection.estimated_document_count()\n except Exception as e: # noqa: BLE001\n self.log(f\"Error checking collection data: {e}\")\n\n return None\n\n def _initialize_database_options(self):\n try:\n return [\n {\n \"name\": name,\n \"status\": info[\"status\"],\n \"collections\": info[\"collections\"],\n \"api_endpoint\": info[\"api_endpoint\"],\n \"icon\": \"data\",\n }\n for name, info in self.get_database_list().items()\n ]\n except Exception as e:\n msg = f\"Error fetching database options: {e}\"\n raise ValueError(msg) from e\n\n @classmethod\n def get_provider_icon(cls, collection: CollectionDescriptor | None = None, provider_name: str | None = None) -> str:\n # Get the provider name from the collection\n provider_name = provider_name or (\n collection.options.vector.service.provider\n if collection and collection.options and collection.options.vector and collection.options.vector.service\n else None\n )\n\n # If there is no provider, use the vector store icon\n if not provider_name or provider_name == \"bring your own\":\n return \"vectorstores\"\n\n # Special case for certain models\n # TODO: Add more icons\n if provider_name == \"nvidia\":\n return \"NVIDIA\"\n if provider_name == \"openai\":\n return \"OpenAI\"\n\n # Title case on the provider for the icon if no special case\n return provider_name.title()\n\n def _initialize_collection_options(self, api_endpoint: str | None = None):\n # Nothing to generate if we don't have an API endpoint yet\n api_endpoint = api_endpoint or self.get_api_endpoint()\n if not api_endpoint:\n return []\n\n # Retrieve the database object\n database = self.get_database_object(api_endpoint=api_endpoint)\n\n # Get the list of collections\n collection_list = list(database.list_collections(keyspace=self.get_keyspace()))\n\n # Return the list of collections and metadata associated\n return [\n {\n \"name\": col.name,\n \"records\": self.collection_data(collection_name=col.name, database=database),\n \"provider\": (\n col.options.vector.service.provider if col.options.vector and col.options.vector.service else None\n ),\n \"icon\": self.get_provider_icon(collection=col),\n \"model\": (\n col.options.vector.service.model_name if col.options.vector and col.options.vector.service else None\n ),\n }\n for col in collection_list\n ]\n\n def reset_provider_options(self, build_config: dict):\n # Get the list of vectorize providers\n vectorize_providers = self.get_vectorize_providers(\n token=self.token,\n environment=self.environment,\n api_endpoint=build_config[\"api_endpoint\"][\"value\"],\n )\n\n # If the collection is set, allow user to see embedding options\n build_config[\"collection_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"][\n \"embedding_generation_provider\"\n ][\"options\"] = [\"Bring your own\", \"Nvidia\", *[key for key in vectorize_providers if key != \"Nvidia\"]]\n\n # For all not Bring your own or Nvidia providers, add metadata saying configure in Astra DB Portal\n provider_options = build_config[\"collection_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"][\n \"embedding_generation_provider\"\n ][\"options\"]\n\n # Go over each possible provider and add metadata to configure in Astra DB Portal\n for provider in provider_options:\n # Skip Bring your own and Nvidia, automatically configured\n if provider in [\"Bring your own\", \"Nvidia\"]:\n build_config[\"collection_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"][\n \"embedding_generation_provider\"\n ][\"options_metadata\"].append({\"icon\": self.get_provider_icon(provider_name=provider.lower())})\n continue\n\n # Add metadata to configure in Astra DB Portal\n build_config[\"collection_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"][\n \"embedding_generation_provider\"\n ][\"options_metadata\"].append({\" \": \"Configure in Astra DB Portal\"})\n\n # And allow the user to see the models based on a selected provider\n embedding_provider = build_config[\"collection_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"][\n \"embedding_generation_provider\"\n ][\"value\"]\n\n # Set the options for the embedding model based on the provider\n build_config[\"collection_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"][\n \"embedding_generation_model\"\n ][\"options\"] = vectorize_providers.get(embedding_provider, [[], []])[1]\n\n return build_config\n\n def reset_collection_list(self, build_config: dict):\n # Get the list of options we have based on the token provided\n collection_options = self._initialize_collection_options(api_endpoint=build_config[\"api_endpoint\"][\"value\"])\n\n # If we retrieved options based on the token, show the dropdown\n build_config[\"collection_name\"][\"options\"] = [col[\"name\"] for col in collection_options]\n build_config[\"collection_name\"][\"options_metadata\"] = [\n {k: v for k, v in col.items() if k not in [\"name\"]} for col in collection_options\n ]\n\n # Reset the selected collection\n if build_config[\"collection_name\"][\"value\"] not in build_config[\"collection_name\"][\"options\"]:\n build_config[\"collection_name\"][\"value\"] = \"\"\n\n # If we have a database, collection name should not be advanced\n build_config[\"collection_name\"][\"advanced\"] = not build_config[\"database_name\"][\"value\"]\n\n return build_config\n\n def reset_database_list(self, build_config: dict):\n # Get the list of options we have based on the token provided\n database_options = self._initialize_database_options()\n\n # If we retrieved options based on the token, show the dropdown\n build_config[\"database_name\"][\"options\"] = [db[\"name\"] for db in database_options]\n build_config[\"database_name\"][\"options_metadata\"] = [\n {k: v for k, v in db.items() if k not in [\"name\"]} for db in database_options\n ]\n\n # Reset the selected database\n if build_config[\"database_name\"][\"value\"] not in build_config[\"database_name\"][\"options\"]:\n build_config[\"database_name\"][\"value\"] = \"\"\n build_config[\"api_endpoint\"][\"value\"] = \"\"\n build_config[\"collection_name\"][\"advanced\"] = True\n\n # If we have a token, database name should not be advanced\n build_config[\"database_name\"][\"advanced\"] = not build_config[\"token\"][\"value\"]\n\n return build_config\n\n def reset_build_config(self, build_config: dict):\n # Reset the list of databases we have based on the token provided\n build_config[\"database_name\"][\"options\"] = []\n build_config[\"database_name\"][\"options_metadata\"] = []\n build_config[\"database_name\"][\"value\"] = \"\"\n build_config[\"database_name\"][\"advanced\"] = True\n build_config[\"api_endpoint\"][\"value\"] = \"\"\n\n # Reset the list of collections and metadata associated\n build_config[\"collection_name\"][\"options\"] = []\n build_config[\"collection_name\"][\"options_metadata\"] = []\n build_config[\"collection_name\"][\"value\"] = \"\"\n build_config[\"collection_name\"][\"advanced\"] = True\n\n return build_config\n\n async def update_build_config(self, build_config: dict, field_value: str, field_name: str | None = None):\n # Callback for database creation\n if field_name == \"database_name\" and isinstance(field_value, dict) and \"new_database_name\" in field_value:\n try:\n await self.create_database_api(\n new_database_name=field_value[\"new_database_name\"],\n token=self.token,\n keyspace=self.get_keyspace(),\n environment=self.environment,\n cloud_provider=field_value[\"cloud_provider\"],\n region=field_value[\"region\"],\n )\n except Exception as e:\n msg = f\"Error creating database: {e}\"\n raise ValueError(msg) from e\n\n # Add the new database to the list of options\n build_config[\"database_name\"][\"options\"] = build_config[\"database_name\"][\"options\"] + [\n field_value[\"new_database_name\"]\n ]\n build_config[\"database_name\"][\"options_metadata\"] = build_config[\"database_name\"][\"options_metadata\"] + [\n {\"status\": \"PENDING\"}\n ]\n\n return self.reset_collection_list(build_config)\n\n # This is the callback required to update the list of regions for a cloud provider\n if field_name == \"database_name\" and isinstance(field_value, dict) and \"new_database_name\" not in field_value:\n cloud_provider = field_value[\"cloud_provider\"]\n build_config[\"database_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"][\"region\"][\n \"options\"\n ] = self.map_cloud_providers()[cloud_provider][\"regions\"]\n\n return build_config\n\n # Callback for the creation of collections\n if field_name == \"collection_name\" and isinstance(field_value, dict) and \"new_collection_name\" in field_value:\n try:\n # Get the dimension if its a BYO provider\n dimension = (\n field_value[\"dimension\"]\n if field_value[\"embedding_generation_provider\"] == \"Bring your own\"\n else None\n )\n\n # Create the collection\n await self.create_collection_api(\n new_collection_name=field_value[\"new_collection_name\"],\n token=self.token,\n api_endpoint=build_config[\"api_endpoint\"][\"value\"],\n environment=self.environment,\n keyspace=self.get_keyspace(),\n dimension=dimension,\n embedding_generation_provider=field_value[\"embedding_generation_provider\"],\n embedding_generation_model=field_value[\"embedding_generation_model\"],\n )\n except Exception as e:\n msg = f\"Error creating collection: {e}\"\n raise ValueError(msg) from e\n\n # Add the new collection to the list of options\n build_config[\"collection_name\"][\"value\"] = field_value[\"new_collection_name\"]\n build_config[\"collection_name\"][\"options\"].append(field_value[\"new_collection_name\"])\n\n # Get the provider and model for the new collection\n generation_provider = field_value[\"embedding_generation_provider\"]\n provider = generation_provider if generation_provider != \"Bring your own\" else None\n generation_model = field_value[\"embedding_generation_model\"]\n model = generation_model if generation_model else None\n\n # Add the new collection to the list of options\n icon = \"NVIDIA\" if provider == \"Nvidia\" else \"vectorstores\"\n build_config[\"collection_name\"][\"options_metadata\"] = build_config[\"collection_name\"][\n \"options_metadata\"\n ] + [{\"records\": 0, \"provider\": provider, \"icon\": icon, \"model\": model}]\n\n return build_config\n\n # Callback to update the model list based on the embedding provider\n if (\n field_name == \"collection_name\"\n and isinstance(field_value, dict)\n and \"new_collection_name\" not in field_value\n ):\n return self.reset_provider_options(build_config)\n\n # When the component first executes, this is the update refresh call\n first_run = field_name == \"collection_name\" and not field_value and not build_config[\"database_name\"][\"options\"]\n\n # If the token has not been provided, simply return the empty build config\n if not self.token:\n return self.reset_build_config(build_config)\n\n # If this is the first execution of the component, reset and build database list\n if first_run or field_name in [\"token\", \"environment\"]:\n return self.reset_database_list(build_config)\n\n # Refresh the collection name options\n if field_name == \"database_name\" and not isinstance(field_value, dict):\n # If missing, refresh the database options\n if field_value not in build_config[\"database_name\"][\"options\"]:\n build_config = await self.update_build_config(build_config, field_value=self.token, field_name=\"token\")\n build_config[\"database_name\"][\"value\"] = \"\"\n else:\n # Find the position of the selected database to align with metadata\n index_of_name = build_config[\"database_name\"][\"options\"].index(field_value)\n\n # Initializing database condition\n pending = build_config[\"database_name\"][\"options_metadata\"][index_of_name][\"status\"] == \"PENDING\"\n if pending:\n return self.update_build_config(build_config, field_value=self.token, field_name=\"token\")\n\n # Set the API endpoint based on the selected database\n build_config[\"api_endpoint\"][\"value\"] = build_config[\"database_name\"][\"options_metadata\"][\n index_of_name\n ][\"api_endpoint\"]\n\n # Reset the provider options\n build_config = self.reset_provider_options(build_config)\n\n # Reset the list of collections we have based on the token provided\n return self.reset_collection_list(build_config)\n\n # Hide embedding model option if opriona_metadata provider is not null\n if field_name == \"collection_name\" and not isinstance(field_value, dict):\n # Assume we will be autodetecting the collection:\n build_config[\"autodetect_collection\"][\"value\"] = True\n\n # Reload the collection list\n build_config = self.reset_collection_list(build_config)\n\n # Set the options for collection name to be the field value if its a new collection\n if field_value and field_value not in build_config[\"collection_name\"][\"options\"]:\n # Add the new collection to the list of options\n build_config[\"collection_name\"][\"options\"].append(field_value)\n build_config[\"collection_name\"][\"options_metadata\"].append(\n {\"records\": 0, \"provider\": None, \"icon\": \"\", \"model\": None}\n )\n\n # Ensure that autodetect collection is set to False, since its a new collection\n build_config[\"autodetect_collection\"][\"value\"] = False\n\n # Find the position of the selected collection to align with metadata\n index_of_name = build_config[\"collection_name\"][\"options\"].index(field_value)\n value_of_provider = build_config[\"collection_name\"][\"options_metadata\"][index_of_name][\"provider\"]\n\n # If we were able to determine the Vectorize provider, set it accordingly\n if value_of_provider:\n build_config[\"embedding_model\"][\"advanced\"] = True\n build_config[\"embedding_choice\"][\"value\"] = \"Astra Vectorize\"\n else:\n build_config[\"embedding_model\"][\"advanced\"] = False\n build_config[\"embedding_choice\"][\"value\"] = \"Embedding Model\"\n\n return build_config\n\n return build_config\n\n @check_cached_vector_store\n def build_vector_store(self):\n try:\n from langchain_astradb import AstraDBVectorStore\n except ImportError as e:\n msg = (\n \"Could not import langchain Astra DB integration package. \"\n \"Please install it with `pip install langchain-astradb`.\"\n )\n raise ImportError(msg) from e\n\n # Get the embedding model and additional params\n embedding_params = (\n {\"embedding\": self.embedding_model}\n if self.embedding_model and self.embedding_choice == \"Embedding Model\"\n else {}\n )\n\n # Get the additional parameters\n additional_params = self.astradb_vectorstore_kwargs or {}\n\n # Get Langflow version and platform information\n __version__ = get_version_info()[\"version\"]\n langflow_prefix = \"\"\n # if os.getenv(\"AWS_EXECUTION_ENV\") == \"AWS_ECS_FARGATE\": # TODO: More precise way of detecting\n # langflow_prefix = \"ds-\"\n\n # Get the database object\n database = self.get_database_object()\n autodetect = self.collection_name in database.list_collection_names() and self.autodetect_collection\n\n # Bundle up the auto-detect parameters\n autodetect_params = {\n \"autodetect_collection\": autodetect,\n \"content_field\": (\n self.content_field\n if self.content_field and embedding_params\n else (\n \"page_content\"\n if embedding_params\n and self.collection_data(collection_name=self.collection_name, database=database) == 0\n else None\n )\n ),\n \"ignore_invalid_documents\": self.ignore_invalid_documents,\n }\n\n # Attempt to build the Vector Store object\n try:\n vector_store = AstraDBVectorStore(\n # Astra DB Authentication Parameters\n token=self.token,\n api_endpoint=database.api_endpoint,\n namespace=database.keyspace,\n collection_name=self.collection_name,\n environment=self.environment,\n # Astra DB Usage Tracking Parameters\n ext_callers=[(f\"{langflow_prefix}langflow\", __version__)],\n # Astra DB Vector Store Parameters\n **autodetect_params,\n **embedding_params,\n **additional_params,\n )\n except Exception as e:\n msg = f\"Error initializing AstraDBVectorStore: {e}\"\n raise ValueError(msg) from e\n\n # Add documents to the vector store\n self._add_documents_to_vector_store(vector_store)\n\n return vector_store\n\n def _add_documents_to_vector_store(self, vector_store) -> None:\n documents = []\n for _input in self.ingest_data or []:\n if isinstance(_input, Data):\n documents.append(_input.to_lc_document())\n else:\n msg = \"Vector Store Inputs must be Data objects.\"\n raise TypeError(msg)\n\n if documents and self.deletion_field:\n self.log(f\"Deleting documents where {self.deletion_field}\")\n try:\n database = self.get_database_object()\n collection = database.get_collection(self.collection_name, keyspace=database.keyspace)\n delete_values = list({doc.metadata[self.deletion_field] for doc in documents})\n self.log(f\"Deleting documents where {self.deletion_field} matches {delete_values}.\")\n collection.delete_many({f\"metadata.{self.deletion_field}\": {\"$in\": delete_values}})\n except Exception as e:\n msg = f\"Error deleting documents from AstraDBVectorStore based on '{self.deletion_field}': {e}\"\n raise ValueError(msg) from e\n\n if documents:\n self.log(f\"Adding {len(documents)} documents to the Vector Store.\")\n try:\n vector_store.add_documents(documents)\n except Exception as e:\n msg = f\"Error adding documents to AstraDBVectorStore: {e}\"\n raise ValueError(msg) from e\n else:\n self.log(\"No documents to add to the Vector Store.\")\n\n def _map_search_type(self) -> str:\n search_type_mapping = {\n \"Similarity with score threshold\": \"similarity_score_threshold\",\n \"MMR (Max Marginal Relevance)\": \"mmr\",\n }\n\n return search_type_mapping.get(self.search_type, \"similarity\")\n\n def _build_search_args(self):\n query = self.search_query if isinstance(self.search_query, str) and self.search_query.strip() else None\n\n if query:\n args = {\n \"query\": query,\n \"search_type\": self._map_search_type(),\n \"k\": self.number_of_results,\n \"score_threshold\": self.search_score_threshold,\n }\n elif self.advanced_search_filter:\n args = {\n \"n\": self.number_of_results,\n }\n else:\n return {}\n\n filter_arg = self.advanced_search_filter or {}\n if filter_arg:\n args[\"filter\"] = filter_arg\n\n return args\n\n def search_documents(self, vector_store=None) -> list[Data]:\n vector_store = vector_store or self.build_vector_store()\n\n self.log(f\"Search input: {self.search_query}\")\n self.log(f\"Search type: {self.search_type}\")\n self.log(f\"Number of results: {self.number_of_results}\")\n\n try:\n search_args = self._build_search_args()\n except Exception as e:\n msg = f\"Error in AstraDBVectorStore._build_search_args: {e}\"\n raise ValueError(msg) from e\n\n if not search_args:\n self.log(\"No search input or filters provided. Skipping search.\")\n return []\n\n docs = []\n search_method = \"search\" if \"query\" in search_args else \"metadata_search\"\n\n try:\n self.log(f\"Calling vector_store.{search_method} with args: {search_args}\")\n docs = getattr(vector_store, search_method)(**search_args)\n except Exception as e:\n msg = f\"Error performing {search_method} in AstraDBVectorStore: {e}\"\n raise ValueError(msg) from e\n\n self.log(f\"Retrieved documents: {len(docs)}\")\n\n data = docs_to_data(docs)\n self.log(f\"Converted documents to data: {len(data)}\")\n self.status = data\n\n return data\n\n def get_retriever_kwargs(self):\n search_args = self._build_search_args()\n\n return {\n \"search_type\": self._map_search_type(),\n \"search_kwargs\": search_args,\n }\n" }, "collection_name": { "_input_type": "DropdownInput", - "advanced": false, + "advanced": true, "combobox": true, - "dialog_inputs": {}, + "dialog_inputs": { + "fields": { + "data": { + "node": { + "description": "", + "display_name": "Create new collection", + "field_order": [ + "new_collection_name", + "embedding_generation_provider", + "embedding_generation_model" + ], + "name": "create_collection", + "template": { + "dimension": { + "_input_type": "IntInput", + "advanced": false, + "display_name": "Dimensions", + "dynamic": false, + "info": "Dimension of the embeddings to generate.", + "list": false, + "list_add_label": "Add More", + "name": "dimension", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "int", + "value": 1024 + }, + "embedding_generation_model": { + "_input_type": "DropdownInput", + "advanced": false, + "combobox": false, + "dialog_inputs": {}, + "display_name": "Embedding model", + "dynamic": false, + "info": "Model to use for generating embeddings.", + "name": "embedding_generation_model", + "options": [ + "Bring your own", + "NV-Embed-QA" + ], + "options_metadata": [], + "placeholder": "", + "required": true, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "str", + "value": "" + }, + "embedding_generation_provider": { + "_input_type": "DropdownInput", + "advanced": false, + "combobox": false, + "dialog_inputs": {}, + "display_name": "Embedding generation method", + "dynamic": false, + "info": "Provider to use for generating embeddings.", + "name": "embedding_generation_provider", + "options": [ + "Bring your own", + "Nvidia" + ], + "options_metadata": [], + "placeholder": "", + "real_time_refresh": true, + "required": true, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "str", + "value": "" + }, + "new_collection_name": { + "_input_type": "StrInput", + "advanced": false, + "display_name": "Name", + "dynamic": false, + "info": "Name of the new collection to create in Astra DB.", + "list": false, + "list_add_label": "Add More", + "load_from_db": false, + "name": "new_collection_name", + "placeholder": "", + "required": true, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "str", + "value": "" + } + } + } + } + }, + "functionality": "create" + }, "display_name": "Collection", "dynamic": false, "info": "The name of the collection within Astra DB where the vectors will be stored.", @@ -3385,18 +3481,107 @@ "type": "str", "value": "" }, - "d_api_endpoint": { - "_input_type": "StrInput", - "advanced": true, - "display_name": "Database API Endpoint", + "database_name": { + "_input_type": "DropdownInput", + "advanced": false, + "combobox": true, + "dialog_inputs": { + "fields": { + "data": { + "node": { + "description": "", + "display_name": "Create new database", + "field_order": [ + "new_database_name", + "cloud_provider", + "region" + ], + "name": "create_database", + "template": { + "cloud_provider": { + "_input_type": "DropdownInput", + "advanced": false, + "combobox": false, + "dialog_inputs": {}, + "display_name": "Cloud provider", + "dynamic": false, + "info": "Cloud provider for the new database.", + "name": "cloud_provider", + "options": [ + "Amazon Web Services", + "Google Cloud Platform", + "Microsoft Azure" + ], + "options_metadata": [], + "placeholder": "", + "real_time_refresh": true, + "required": true, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "str", + "value": "" + }, + "new_database_name": { + "_input_type": "StrInput", + "advanced": false, + "display_name": "Name", + "dynamic": false, + "info": "Name of the new database to create in Astra DB.", + "list": false, + "list_add_label": "Add More", + "load_from_db": false, + "name": "new_database_name", + "placeholder": "", + "required": true, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "str", + "value": "" + }, + "region": { + "_input_type": "DropdownInput", + "advanced": false, + "combobox": false, + "dialog_inputs": {}, + "display_name": "Region", + "dynamic": false, + "info": "Region for the new database.", + "name": "region", + "options": [ + "us-east-2", + "ap-south-1", + "eu-west-1" + ], + "options_metadata": [], + "placeholder": "", + "required": true, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "str", + "value": "" + } + } + } + } + }, + "functionality": "create" + }, + "display_name": "Database", "dynamic": false, - "info": "The API Endpoint for the Astra DB instance. Supercedes database selection.", - "list": false, - "list_add_label": "Add More", - "load_from_db": false, - "name": "d_api_endpoint", + "info": "The Database name for the Astra DB instance.", + "name": "database_name", + "options": [], + "options_metadata": [], "placeholder": "", - "required": false, + "real_time_refresh": true, + "refresh_button": true, + "required": true, "show": true, "title_case": false, "tool_mode": false, @@ -3478,6 +3663,7 @@ "load_from_db": false, "name": "environment", "placeholder": "", + "real_time_refresh": true, "required": false, "show": true, "title_case": false, @@ -3645,7 +3831,7 @@ "show": true, "title_case": false, "type": "str", - "value": "" + "value": "ASTRA_DB_APPLICATION_TOKEN" } }, "tool_mode": false @@ -3654,21 +3840,21 @@ "type": "AstraDB" }, "dragging": false, - "id": "AstraDB-Qdaes", + "id": "AstraDB-HXAXh", "measured": { - "height": 611, + "height": 532, "width": 320 }, "position": { - "x": 1221.7808624943825, - "y": 598.7224891255499 + "x": 1213.4353517134307, + "y": 631.4125346711122 }, "selected": false, "type": "genericNode" }, { "data": { - "id": "AstraDB-sPWXd", + "id": "AstraDB-nMlxo", "node": { "base_classes": [ "Data", @@ -3684,6 +3870,7 @@ "field_order": [ "token", "environment", + "database_name", "api_endpoint", "collection_name", "keyspace", @@ -3695,6 +3882,7 @@ "search_type", "search_score_threshold", "advanced_search_filter", + "autodetect_collection", "content_field", "deletion_field", "ignore_invalid_documents", @@ -3714,8 +3902,8 @@ "method": "search_documents", "name": "search_results", "required_inputs": [ - "api_endpoint", "collection_name", + "database_name", "token" ], "selected": "Data", @@ -3763,20 +3951,17 @@ "value": {} }, "api_endpoint": { - "_input_type": "DropdownInput", - "advanced": false, - "combobox": true, - "dialog_inputs": {}, - "display_name": "Database", + "_input_type": "StrInput", + "advanced": true, + "display_name": "Astra DB API Endpoint", "dynamic": false, - "info": "The Database / API Endpoint for the Astra DB instance.", - "name": "Database", - "options": [], - "options_metadata": [], + "info": "The API Endpoint for the Astra DB instance. Supercedes database selection.", + "list": false, + "list_add_label": "Add More", + "load_from_db": false, + "name": "api_endpoint", "placeholder": "", - "real_time_refresh": true, - "refresh_button": true, - "required": true, + "required": false, "show": true, "title_case": false, "tool_mode": false, @@ -3837,13 +4022,115 @@ "show": true, "title_case": false, "type": "code", - "value": "import os\nfrom collections import defaultdict\nfrom dataclasses import dataclass, field\n\nfrom astrapy import AstraDBAdmin, DataAPIClient, Database\nfrom langchain_astradb import AstraDBVectorStore, CollectionVectorServiceOptions\n\nfrom langflow.base.vectorstores.model import LCVectorStoreComponent, check_cached_vector_store\nfrom langflow.helpers import docs_to_data\nfrom langflow.inputs import FloatInput, NestedDictInput\nfrom langflow.io import (\n BoolInput,\n DropdownInput,\n HandleInput,\n IntInput,\n SecretStrInput,\n StrInput,\n)\nfrom langflow.schema import Data\nfrom langflow.utils.version import get_version_info\n\n\nclass AstraDBVectorStoreComponent(LCVectorStoreComponent):\n display_name: str = \"Astra DB\"\n description: str = \"Ingest and search documents in Astra DB\"\n documentation: str = \"https://docs.datastax.com/en/langflow/astra-components.html\"\n name = \"AstraDB\"\n icon: str = \"AstraDB\"\n\n _cached_vector_store: AstraDBVectorStore | None = None\n\n @dataclass\n class NewDatabaseInput:\n functionality: str = \"create\"\n fields: dict[str, dict] = field(\n default_factory=lambda: {\n \"data\": {\n \"node\": {\n \"description\": \"Create a new database in Astra DB.\",\n \"display_name\": \"Create New Database\",\n \"field_order\": [\"new_database_name\", \"cloud_provider\", \"region\"],\n \"template\": {\n \"new_database_name\": StrInput(\n name=\"new_database_name\",\n display_name=\"New Database Name\",\n info=\"Name of the new database to create in Astra DB.\",\n required=True,\n ),\n \"cloud_provider\": DropdownInput(\n name=\"cloud_provider\",\n display_name=\"Cloud Provider\",\n info=\"Cloud provider for the new database.\",\n options=[\"Amazon Web Services\", \"Google Cloud Platform\", \"Microsoft Azure\"],\n required=True,\n ),\n \"region\": DropdownInput(\n name=\"region\",\n display_name=\"Region\",\n info=\"Region for the new database.\",\n options=[],\n required=True,\n ),\n },\n },\n }\n }\n )\n\n @dataclass\n class NewCollectionInput:\n functionality: str = \"create\"\n fields: dict[str, dict] = field(\n default_factory=lambda: {\n \"data\": {\n \"node\": {\n \"description\": \"Create a new collection in Astra DB.\",\n \"display_name\": \"Create New Collection\",\n \"field_order\": [\n \"new_collection_name\",\n \"embedding_generation_provider\",\n \"embedding_generation_model\",\n ],\n \"template\": {\n \"new_collection_name\": StrInput(\n name=\"new_collection_name\",\n display_name=\"New Collection Name\",\n info=\"Name of the new collection to create in Astra DB.\",\n required=True,\n ),\n \"embedding_generation_provider\": DropdownInput(\n name=\"embedding_generation_provider\",\n display_name=\"Embedding Generation Provider\",\n info=\"Provider to use for generating embeddings.\",\n options=[],\n required=True,\n ),\n \"embedding_generation_model\": DropdownInput(\n name=\"embedding_generation_model\",\n display_name=\"Embedding Generation Model\",\n info=\"Model to use for generating embeddings.\",\n options=[],\n required=True,\n ),\n },\n },\n }\n }\n )\n\n inputs = [\n SecretStrInput(\n name=\"token\",\n display_name=\"Astra DB Application Token\",\n info=\"Authentication token for accessing Astra DB.\",\n value=\"ASTRA_DB_APPLICATION_TOKEN\",\n required=True,\n real_time_refresh=True,\n input_types=[],\n ),\n StrInput(\n name=\"environment\",\n display_name=\"Environment\",\n info=\"The environment for the Astra DB API Endpoint.\",\n advanced=True,\n real_time_refresh=True,\n ),\n DropdownInput(\n name=\"api_endpoint\",\n display_name=\"Database\",\n info=\"The Database / API Endpoint for the Astra DB instance.\",\n required=True,\n refresh_button=True,\n real_time_refresh=True,\n combobox=True,\n ),\n StrInput(\n name=\"d_api_endpoint\",\n display_name=\"Database API Endpoint\",\n info=\"The API Endpoint for the Astra DB instance. Supercedes database selection.\",\n advanced=True,\n ),\n DropdownInput(\n name=\"collection_name\",\n display_name=\"Collection\",\n info=\"The name of the collection within Astra DB where the vectors will be stored.\",\n required=True,\n refresh_button=True,\n real_time_refresh=True,\n # dialog_inputs=asdict(NewCollectionInput()),\n combobox=True,\n ),\n StrInput(\n name=\"keyspace\",\n display_name=\"Keyspace\",\n info=\"Optional keyspace within Astra DB to use for the collection.\",\n advanced=True,\n ),\n DropdownInput(\n name=\"embedding_choice\",\n display_name=\"Embedding Model or Astra Vectorize\",\n info=\"Choose an embedding model or use Astra Vectorize.\",\n options=[\"Embedding Model\", \"Astra Vectorize\"],\n value=\"Embedding Model\",\n advanced=True,\n real_time_refresh=True,\n ),\n HandleInput(\n name=\"embedding_model\",\n display_name=\"Embedding Model\",\n input_types=[\"Embeddings\"],\n info=\"Specify the Embedding Model. Not required for Astra Vectorize collections.\",\n required=False,\n ),\n *LCVectorStoreComponent.inputs,\n IntInput(\n name=\"number_of_results\",\n display_name=\"Number of Search Results\",\n info=\"Number of search results to return.\",\n advanced=True,\n value=4,\n ),\n DropdownInput(\n name=\"search_type\",\n display_name=\"Search Type\",\n info=\"Search type to use\",\n options=[\"Similarity\", \"Similarity with score threshold\", \"MMR (Max Marginal Relevance)\"],\n value=\"Similarity\",\n advanced=True,\n ),\n FloatInput(\n name=\"search_score_threshold\",\n display_name=\"Search Score Threshold\",\n info=\"Minimum similarity score threshold for search results. \"\n \"(when using 'Similarity with score threshold')\",\n value=0,\n advanced=True,\n ),\n NestedDictInput(\n name=\"advanced_search_filter\",\n display_name=\"Search Metadata Filter\",\n info=\"Optional dictionary of filters to apply to the search query.\",\n advanced=True,\n ),\n BoolInput(\n name=\"autodetect_collection\",\n display_name=\"Autodetect Collection\",\n info=\"Boolean flag to determine whether to autodetect the collection.\",\n advanced=True,\n value=True,\n ),\n StrInput(\n name=\"content_field\",\n display_name=\"Content Field\",\n info=\"Field to use as the text content field for the vector store.\",\n advanced=True,\n ),\n StrInput(\n name=\"deletion_field\",\n display_name=\"Deletion Based On Field\",\n info=\"When this parameter is provided, documents in the target collection with \"\n \"metadata field values matching the input metadata field value will be deleted \"\n \"before new data is loaded.\",\n advanced=True,\n ),\n BoolInput(\n name=\"ignore_invalid_documents\",\n display_name=\"Ignore Invalid Documents\",\n info=\"Boolean flag to determine whether to ignore invalid documents at runtime.\",\n advanced=True,\n ),\n NestedDictInput(\n name=\"astradb_vectorstore_kwargs\",\n display_name=\"AstraDBVectorStore Parameters\",\n info=\"Optional dictionary of additional parameters for the AstraDBVectorStore.\",\n advanced=True,\n ),\n ]\n\n @classmethod\n def map_cloud_providers(cls):\n return {\n \"Amazon Web Services\": {\n \"id\": \"aws\",\n \"regions\": [\"us-east-2\", \"ap-south-1\", \"eu-west-1\"],\n },\n \"Google Cloud Platform\": {\n \"id\": \"gcp\",\n \"regions\": [\"us-east1\"],\n },\n \"Microsoft Azure\": {\n \"id\": \"azure\",\n \"regions\": [\"westus3\"],\n },\n }\n\n @classmethod\n def create_database_api(\n cls,\n token: str,\n new_database_name: str,\n cloud_provider: str,\n region: str,\n ):\n client = DataAPIClient(token=token)\n\n # Get the admin object\n admin_client = client.get_admin(token=token)\n\n # Call the create database function\n return admin_client.create_database(\n name=new_database_name,\n cloud_provider=cloud_provider,\n region=region,\n )\n\n @classmethod\n def create_collection_api(\n cls,\n token: str,\n database_name: str,\n new_collection_name: str,\n dimension: int | None = None,\n embedding_generation_provider: str | None = None,\n embedding_generation_model: str | None = None,\n ):\n client = DataAPIClient(token=token)\n api_endpoint = cls.get_api_endpoint_static(token=token, database_name=database_name)\n\n # Get the database object\n database = client.get_database(api_endpoint=api_endpoint, token=token)\n\n # Build vectorize options, if needed\n vectorize_options = None\n if not dimension:\n vectorize_options = CollectionVectorServiceOptions(\n provider=embedding_generation_provider,\n model_name=embedding_generation_model,\n authentication=None,\n parameters=None,\n )\n\n # Create the collection\n return database.create_collection(\n name=new_collection_name,\n dimension=dimension,\n service=vectorize_options,\n )\n\n @classmethod\n def get_database_list_static(cls, token: str, environment: str | None = None):\n client = DataAPIClient(token=token, environment=environment)\n\n # Get the admin object\n admin_client = client.get_admin(token=token)\n\n # Get the list of databases\n db_list = list(admin_client.list_databases())\n\n # Set the environment properly\n env_string = \"\"\n if environment and environment != \"prod\":\n env_string = f\"-{environment}\"\n\n # Generate the api endpoint for each database\n db_info_dict = {}\n for db in db_list:\n try:\n api_endpoint = f\"https://{db.info.id}-{db.info.region}.apps.astra{env_string}.datastax.com\"\n db_info_dict[db.info.name] = {\n \"api_endpoint\": api_endpoint,\n \"collections\": len(\n list(\n client.get_database(\n api_endpoint=api_endpoint, token=token, keyspace=db.info.keyspace\n ).list_collection_names(keyspace=db.info.keyspace)\n )\n ),\n }\n except Exception: # noqa: BLE001, S110\n pass\n\n return db_info_dict\n\n def get_database_list(self):\n return self.get_database_list_static(token=self.token, environment=self.environment)\n\n @classmethod\n def get_api_endpoint_static(\n cls,\n token: str,\n environment: str | None = None,\n api_endpoint: str | None = None,\n database_name: str | None = None,\n ):\n # If the api_endpoint is set, return it\n if api_endpoint:\n return api_endpoint\n\n # Check if the database_name is like a url\n if database_name and database_name.startswith(\"https://\"):\n return database_name\n\n # If the database is not set, nothing we can do.\n if not database_name:\n return None\n\n # Otherwise, get the URL from the database list\n return cls.get_database_list_static(token=token, environment=environment).get(database_name).get(\"api_endpoint\")\n\n def get_api_endpoint(self, *, api_endpoint: str | None = None):\n return self.get_api_endpoint_static(\n token=self.token,\n environment=self.environment,\n api_endpoint=api_endpoint or self.d_api_endpoint,\n database_name=self.api_endpoint,\n )\n\n def get_keyspace(self):\n keyspace = self.keyspace\n\n if keyspace:\n return keyspace.strip()\n\n return None\n\n def get_database_object(self, api_endpoint: str | None = None):\n try:\n client = DataAPIClient(token=self.token, environment=self.environment)\n\n return client.get_database(\n api_endpoint=self.get_api_endpoint(api_endpoint=api_endpoint),\n token=self.token,\n keyspace=self.get_keyspace(),\n )\n except Exception as e:\n msg = f\"Error fetching database object: {e}\"\n raise ValueError(msg) from e\n\n def collection_data(self, collection_name: str, database: Database | None = None):\n try:\n if not database:\n client = DataAPIClient(token=self.token, environment=self.environment)\n\n database = client.get_database(\n api_endpoint=self.get_api_endpoint(),\n token=self.token,\n keyspace=self.get_keyspace(),\n )\n\n collection = database.get_collection(collection_name, keyspace=self.get_keyspace())\n\n return collection.estimated_document_count()\n except Exception as e: # noqa: BLE001\n self.log(f\"Error checking collection data: {e}\")\n\n return None\n\n def get_vectorize_providers(self):\n try:\n self.log(\"Dynamically updating list of Vectorize providers.\")\n\n # Get the admin object\n admin = AstraDBAdmin(token=self.token)\n db_admin = admin.get_database_admin(api_endpoint=self.get_api_endpoint())\n\n # Get the list of embedding providers\n embedding_providers = db_admin.find_embedding_providers().as_dict()\n\n vectorize_providers_mapping = {}\n # Map the provider display name to the provider key and models\n for provider_key, provider_data in embedding_providers[\"embeddingProviders\"].items():\n display_name = provider_data[\"displayName\"]\n models = [model[\"name\"] for model in provider_data[\"models\"]]\n\n # TODO: https://astra.datastax.com/api/v2/graphql\n vectorize_providers_mapping[display_name] = [provider_key, models]\n\n # Sort the resulting dictionary\n return defaultdict(list, dict(sorted(vectorize_providers_mapping.items())))\n except Exception as e: # noqa: BLE001\n self.log(f\"Error fetching Vectorize providers: {e}\")\n\n return {}\n\n def _initialize_database_options(self):\n try:\n return [\n {\n \"name\": name,\n \"collections\": info[\"collections\"],\n \"api_endpoint\": info[\"api_endpoint\"],\n }\n for name, info in self.get_database_list().items()\n ]\n except Exception as e:\n msg = f\"Error fetching database options: {e}\"\n raise ValueError(msg) from e\n\n def _initialize_collection_options(self, api_endpoint: str | None = None):\n # Retrieve the database object\n database = self.get_database_object(api_endpoint=api_endpoint)\n\n # Get the list of collections\n collection_list = list(database.list_collections(keyspace=self.get_keyspace()))\n\n # Return the list of collections and metadata associated\n return [\n {\n \"name\": col.name,\n \"records\": self.collection_data(collection_name=col.name, database=database),\n \"provider\": (\n col.options.vector.service.provider if col.options.vector and col.options.vector.service else None\n ),\n \"icon\": \"\",\n \"model\": (\n col.options.vector.service.model_name if col.options.vector and col.options.vector.service else None\n ),\n }\n for col in collection_list\n ]\n\n def reset_collection_list(self, build_config: dict):\n # Get the list of options we have based on the token provided\n collection_options = self._initialize_collection_options()\n\n # If we retrieved options based on the token, show the dropdown\n build_config[\"collection_name\"][\"options\"] = [col[\"name\"] for col in collection_options]\n build_config[\"collection_name\"][\"options_metadata\"] = [\n {k: v for k, v in col.items() if k not in [\"name\"]} for col in collection_options\n ]\n\n # Reset the selected collection\n build_config[\"collection_name\"][\"value\"] = \"\"\n\n return build_config\n\n def reset_database_list(self, build_config: dict):\n # Get the list of options we have based on the token provided\n database_options = self._initialize_database_options()\n\n # If we retrieved options based on the token, show the dropdown\n build_config[\"api_endpoint\"][\"options\"] = [db[\"name\"] for db in database_options]\n build_config[\"api_endpoint\"][\"options_metadata\"] = [\n {k: v for k, v in db.items() if k not in [\"name\"]} for db in database_options\n ]\n\n # Reset the selected database\n build_config[\"api_endpoint\"][\"value\"] = \"\"\n\n return build_config\n\n def reset_build_config(self, build_config: dict):\n # Reset the list of databases we have based on the token provided\n build_config[\"api_endpoint\"][\"options\"] = []\n build_config[\"api_endpoint\"][\"options_metadata\"] = []\n build_config[\"api_endpoint\"][\"value\"] = \"\"\n build_config[\"api_endpoint\"][\"name\"] = \"Database\"\n\n # Reset the list of collections and metadata associated\n build_config[\"collection_name\"][\"options\"] = []\n build_config[\"collection_name\"][\"options_metadata\"] = []\n build_config[\"collection_name\"][\"value\"] = \"\"\n\n return build_config\n\n def update_build_config(self, build_config: dict, field_value: str, field_name: str | None = None):\n # When the component first executes, this is the update refresh call\n first_run = field_name == \"collection_name\" and not field_value and not build_config[\"api_endpoint\"][\"options\"]\n\n # If the token has not been provided, simply return\n if not self.token:\n return self.reset_build_config(build_config)\n\n # If this is the first execution of the component, reset and build database list\n if first_run or field_name in [\"token\", \"environment\"]:\n # Reset the build config to ensure we are starting fresh\n build_config = self.reset_build_config(build_config)\n build_config = self.reset_database_list(build_config)\n\n # Get list of regions for a given cloud provider\n \"\"\"\n cloud_provider = (\n build_config[\"api_endpoint\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"][\"cloud_provider\"][\n \"value\"\n ]\n or \"Amazon Web Services\"\n )\n build_config[\"api_endpoint\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"][\"region\"][\n \"options\"\n ] = self.map_cloud_providers()[cloud_provider][\"regions\"]\n \"\"\"\n\n return build_config\n\n # Refresh the collection name options\n if field_name == \"api_endpoint\":\n # If missing, refresh the database options\n if not build_config[\"api_endpoint\"][\"options\"] or not field_value:\n return self.update_build_config(build_config, field_value=self.token, field_name=\"token\")\n\n # Set the underlying api endpoint value of the database\n if field_value in build_config[\"api_endpoint\"][\"options\"]:\n index_of_name = build_config[\"api_endpoint\"][\"options\"].index(field_value)\n build_config[\"d_api_endpoint\"][\"value\"] = build_config[\"api_endpoint\"][\"options_metadata\"][\n index_of_name\n ][\"api_endpoint\"]\n else:\n build_config[\"d_api_endpoint\"][\"value\"] = \"\"\n\n # Reset the list of collections we have based on the token provided\n return self.reset_collection_list(build_config)\n\n # Hide embedding model option if opriona_metadata provider is not null\n if field_name == \"collection_name\" and field_value:\n # Assume we will be autodetecting the collection:\n build_config[\"autodetect_collection\"][\"value\"] = True\n\n # Set the options for collection name to be the field value if its a new collection\n if field_value not in build_config[\"collection_name\"][\"options\"]:\n # Add the new collection to the list of options\n build_config[\"collection_name\"][\"options\"].append(field_value)\n build_config[\"collection_name\"][\"options_metadata\"].append(\n {\"records\": 0, \"provider\": None, \"icon\": \"\", \"model\": None}\n )\n\n # Ensure that autodetect collection is set to False, since its a new collection\n build_config[\"autodetect_collection\"][\"value\"] = False\n\n # Find the position of the selected collection to align with metadata\n index_of_name = build_config[\"collection_name\"][\"options\"].index(field_value)\n value_of_provider = build_config[\"collection_name\"][\"options_metadata\"][index_of_name][\"provider\"]\n\n # If we were able to determine the Vectorize provider, set it accordingly\n if value_of_provider:\n build_config[\"embedding_model\"][\"advanced\"] = True\n build_config[\"embedding_choice\"][\"value\"] = \"Astra Vectorize\"\n else:\n build_config[\"embedding_model\"][\"advanced\"] = False\n build_config[\"embedding_choice\"][\"value\"] = \"Embedding Model\"\n\n # For the final step, get the list of vectorize providers\n \"\"\"\n vectorize_providers = self.get_vectorize_providers()\n if not vectorize_providers:\n return build_config\n\n # Allow the user to see the embedding provider options\n provider_options = build_config[\"collection_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"][\n \"embedding_generation_provider\"\n ][\"options\"]\n if not provider_options:\n # If the collection is set, allow user to see embedding options\n build_config[\"collection_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"][\n \"embedding_generation_provider\"\n ][\"options\"] = [\"Bring your own\", \"Nvidia\", *[key for key in vectorize_providers if key != \"Nvidia\"]]\n\n # And allow the user to see the models based on a selected provider\n model_options = build_config[\"collection_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"][\n \"embedding_generation_model\"\n ][\"options\"]\n if not model_options:\n embedding_provider = build_config[\"collection_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"][\n \"embedding_generation_provider\"\n ][\"value\"]\n\n build_config[\"collection_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"][\n \"embedding_generation_model\"\n ][\"options\"] = vectorize_providers.get(embedding_provider, [[], []])[1]\n \"\"\"\n\n return build_config\n\n @check_cached_vector_store\n def build_vector_store(self):\n try:\n from langchain_astradb import AstraDBVectorStore\n except ImportError as e:\n msg = (\n \"Could not import langchain Astra DB integration package. \"\n \"Please install it with `pip install langchain-astradb`.\"\n )\n raise ImportError(msg) from e\n\n # Get the embedding model and additional params\n embedding_params = (\n {\"embedding\": self.embedding_model}\n if self.embedding_model and self.embedding_choice == \"Embedding Model\"\n else {}\n )\n\n # Get the additional parameters\n additional_params = self.astradb_vectorstore_kwargs or {}\n\n # Get Langflow version and platform information\n __version__ = get_version_info()[\"version\"]\n langflow_prefix = \"\"\n if os.getenv(\"AWS_EXECUTION_ENV\") == \"AWS_ECS_FARGATE\": # TODO: More precise way of detecting\n langflow_prefix = \"ds-\"\n\n # Get the database object\n database = self.get_database_object(api_endpoint=self.d_api_endpoint)\n autodetect = self.collection_name in database.list_collection_names() and self.autodetect_collection\n\n # Bundle up the auto-detect parameters\n autodetect_params = {\n \"autodetect_collection\": autodetect,\n \"content_field\": (\n self.content_field\n if self.content_field and embedding_params\n else (\n \"page_content\"\n if embedding_params\n and self.collection_data(collection_name=self.collection_name, database=database) == 0\n else None\n )\n ),\n \"ignore_invalid_documents\": self.ignore_invalid_documents,\n }\n\n # Attempt to build the Vector Store object\n try:\n vector_store = AstraDBVectorStore(\n # Astra DB Authentication Parameters\n token=self.token,\n api_endpoint=database.api_endpoint,\n namespace=database.keyspace,\n collection_name=self.collection_name,\n environment=self.environment,\n # Astra DB Usage Tracking Parameters\n ext_callers=[(f\"{langflow_prefix}langflow\", __version__)],\n # Astra DB Vector Store Parameters\n **autodetect_params,\n **embedding_params,\n **additional_params,\n )\n except Exception as e:\n msg = f\"Error initializing AstraDBVectorStore: {e}\"\n raise ValueError(msg) from e\n\n # Add documents to the vector store\n self._add_documents_to_vector_store(vector_store)\n\n return vector_store\n\n def _add_documents_to_vector_store(self, vector_store) -> None:\n documents = []\n for _input in self.ingest_data or []:\n if isinstance(_input, Data):\n documents.append(_input.to_lc_document())\n else:\n msg = \"Vector Store Inputs must be Data objects.\"\n raise TypeError(msg)\n\n if documents and self.deletion_field:\n self.log(f\"Deleting documents where {self.deletion_field}\")\n try:\n database = self.get_database_object(api_endpoint=self.d_api_endpoint)\n collection = database.get_collection(self.collection_name, keyspace=database.keyspace)\n delete_values = list({doc.metadata[self.deletion_field] for doc in documents})\n self.log(f\"Deleting documents where {self.deletion_field} matches {delete_values}.\")\n collection.delete_many({f\"metadata.{self.deletion_field}\": {\"$in\": delete_values}})\n except Exception as e:\n msg = f\"Error deleting documents from AstraDBVectorStore based on '{self.deletion_field}': {e}\"\n raise ValueError(msg) from e\n\n if documents:\n self.log(f\"Adding {len(documents)} documents to the Vector Store.\")\n try:\n vector_store.add_documents(documents)\n except Exception as e:\n msg = f\"Error adding documents to AstraDBVectorStore: {e}\"\n raise ValueError(msg) from e\n else:\n self.log(\"No documents to add to the Vector Store.\")\n\n def _map_search_type(self) -> str:\n search_type_mapping = {\n \"Similarity with score threshold\": \"similarity_score_threshold\",\n \"MMR (Max Marginal Relevance)\": \"mmr\",\n }\n\n return search_type_mapping.get(self.search_type, \"similarity\")\n\n def _build_search_args(self):\n query = self.search_query if isinstance(self.search_query, str) and self.search_query.strip() else None\n\n if query:\n args = {\n \"query\": query,\n \"search_type\": self._map_search_type(),\n \"k\": self.number_of_results,\n \"score_threshold\": self.search_score_threshold,\n }\n elif self.advanced_search_filter:\n args = {\n \"n\": self.number_of_results,\n }\n else:\n return {}\n\n filter_arg = self.advanced_search_filter or {}\n if filter_arg:\n args[\"filter\"] = filter_arg\n\n return args\n\n def search_documents(self, vector_store=None) -> list[Data]:\n vector_store = vector_store or self.build_vector_store()\n\n self.log(f\"Search input: {self.search_query}\")\n self.log(f\"Search type: {self.search_type}\")\n self.log(f\"Number of results: {self.number_of_results}\")\n\n try:\n search_args = self._build_search_args()\n except Exception as e:\n msg = f\"Error in AstraDBVectorStore._build_search_args: {e}\"\n raise ValueError(msg) from e\n\n if not search_args:\n self.log(\"No search input or filters provided. Skipping search.\")\n return []\n\n docs = []\n search_method = \"search\" if \"query\" in search_args else \"metadata_search\"\n\n try:\n self.log(f\"Calling vector_store.{search_method} with args: {search_args}\")\n docs = getattr(vector_store, search_method)(**search_args)\n except Exception as e:\n msg = f\"Error performing {search_method} in AstraDBVectorStore: {e}\"\n raise ValueError(msg) from e\n\n self.log(f\"Retrieved documents: {len(docs)}\")\n\n data = docs_to_data(docs)\n self.log(f\"Converted documents to data: {len(data)}\")\n self.status = data\n\n return data\n\n def get_retriever_kwargs(self):\n search_args = self._build_search_args()\n\n return {\n \"search_type\": self._map_search_type(),\n \"search_kwargs\": search_args,\n }\n" + "value": "from collections import defaultdict\nfrom dataclasses import asdict, dataclass, field\n\nfrom astrapy import AstraDBAdmin, DataAPIClient, Database\nfrom astrapy.info import CollectionDescriptor\nfrom langchain_astradb import AstraDBVectorStore, CollectionVectorServiceOptions\n\nfrom langflow.base.vectorstores.model import LCVectorStoreComponent, check_cached_vector_store\nfrom langflow.helpers import docs_to_data\nfrom langflow.inputs import FloatInput, NestedDictInput\nfrom langflow.io import (\n BoolInput,\n DropdownInput,\n HandleInput,\n IntInput,\n SecretStrInput,\n StrInput,\n)\nfrom langflow.schema import Data\nfrom langflow.utils.version import get_version_info\n\n\nclass AstraDBVectorStoreComponent(LCVectorStoreComponent):\n display_name: str = \"Astra DB\"\n description: str = \"Ingest and search documents in Astra DB\"\n documentation: str = \"https://docs.datastax.com/en/langflow/astra-components.html\"\n name = \"AstraDB\"\n icon: str = \"AstraDB\"\n\n _cached_vector_store: AstraDBVectorStore | None = None\n\n @dataclass\n class NewDatabaseInput:\n functionality: str = \"create\"\n fields: dict[str, dict] = field(\n default_factory=lambda: {\n \"data\": {\n \"node\": {\n \"name\": \"create_database\",\n \"description\": \"\",\n \"display_name\": \"Create new database\",\n \"field_order\": [\"new_database_name\", \"cloud_provider\", \"region\"],\n \"template\": {\n \"new_database_name\": StrInput(\n name=\"new_database_name\",\n display_name=\"Name\",\n info=\"Name of the new database to create in Astra DB.\",\n required=True,\n ),\n \"cloud_provider\": DropdownInput(\n name=\"cloud_provider\",\n display_name=\"Cloud provider\",\n info=\"Cloud provider for the new database.\",\n options=[\"Amazon Web Services\", \"Google Cloud Platform\", \"Microsoft Azure\"],\n required=True,\n real_time_refresh=True,\n ),\n \"region\": DropdownInput(\n name=\"region\",\n display_name=\"Region\",\n info=\"Region for the new database.\",\n options=[],\n required=True,\n ),\n },\n },\n }\n }\n )\n\n @dataclass\n class NewCollectionInput:\n functionality: str = \"create\"\n fields: dict[str, dict] = field(\n default_factory=lambda: {\n \"data\": {\n \"node\": {\n \"name\": \"create_collection\",\n \"description\": \"\",\n \"display_name\": \"Create new collection\",\n \"field_order\": [\n \"new_collection_name\",\n \"embedding_generation_provider\",\n \"embedding_generation_model\",\n ],\n \"template\": {\n \"new_collection_name\": StrInput(\n name=\"new_collection_name\",\n display_name=\"Name\",\n info=\"Name of the new collection to create in Astra DB.\",\n required=True,\n ),\n \"embedding_generation_provider\": DropdownInput(\n name=\"embedding_generation_provider\",\n display_name=\"Embedding generation method\",\n info=\"Provider to use for generating embeddings.\",\n real_time_refresh=True,\n required=True,\n options=[\"Bring your own\", \"Nvidia\"],\n ),\n \"embedding_generation_model\": DropdownInput(\n name=\"embedding_generation_model\",\n display_name=\"Embedding model\",\n info=\"Model to use for generating embeddings.\",\n required=True,\n options=[],\n ),\n \"dimension\": IntInput(\n name=\"dimension\",\n display_name=\"Dimensions (Required only for `Bring your own`)\",\n info=\"Dimensions of the embeddings to generate.\",\n required=False,\n value=1024,\n ),\n },\n },\n }\n }\n )\n\n inputs = [\n SecretStrInput(\n name=\"token\",\n display_name=\"Astra DB Application Token\",\n info=\"Authentication token for accessing Astra DB.\",\n value=\"ASTRA_DB_APPLICATION_TOKEN\",\n required=True,\n real_time_refresh=True,\n input_types=[],\n ),\n StrInput(\n name=\"environment\",\n display_name=\"Environment\",\n info=\"The environment for the Astra DB API Endpoint.\",\n advanced=True,\n real_time_refresh=True,\n ),\n DropdownInput(\n name=\"database_name\",\n display_name=\"Database\",\n info=\"The Database name for the Astra DB instance.\",\n required=True,\n refresh_button=True,\n real_time_refresh=True,\n dialog_inputs=asdict(NewDatabaseInput()),\n combobox=True,\n ),\n StrInput(\n name=\"api_endpoint\",\n display_name=\"Astra DB API Endpoint\",\n info=\"The API Endpoint for the Astra DB instance. Supercedes database selection.\",\n advanced=True,\n ),\n DropdownInput(\n name=\"collection_name\",\n display_name=\"Collection\",\n info=\"The name of the collection within Astra DB where the vectors will be stored.\",\n required=True,\n refresh_button=True,\n real_time_refresh=True,\n dialog_inputs=asdict(NewCollectionInput()),\n combobox=True,\n advanced=True,\n ),\n StrInput(\n name=\"keyspace\",\n display_name=\"Keyspace\",\n info=\"Optional keyspace within Astra DB to use for the collection.\",\n advanced=True,\n ),\n DropdownInput(\n name=\"embedding_choice\",\n display_name=\"Embedding Model or Astra Vectorize\",\n info=\"Choose an embedding model or use Astra Vectorize.\",\n options=[\"Embedding Model\", \"Astra Vectorize\"],\n value=\"Embedding Model\",\n advanced=True,\n real_time_refresh=True,\n ),\n HandleInput(\n name=\"embedding_model\",\n display_name=\"Embedding Model\",\n input_types=[\"Embeddings\"],\n info=\"Specify the Embedding Model. Not required for Astra Vectorize collections.\",\n required=False,\n ),\n *LCVectorStoreComponent.inputs,\n IntInput(\n name=\"number_of_results\",\n display_name=\"Number of Search Results\",\n info=\"Number of search results to return.\",\n advanced=True,\n value=4,\n ),\n DropdownInput(\n name=\"search_type\",\n display_name=\"Search Type\",\n info=\"Search type to use\",\n options=[\"Similarity\", \"Similarity with score threshold\", \"MMR (Max Marginal Relevance)\"],\n value=\"Similarity\",\n advanced=True,\n ),\n FloatInput(\n name=\"search_score_threshold\",\n display_name=\"Search Score Threshold\",\n info=\"Minimum similarity score threshold for search results. \"\n \"(when using 'Similarity with score threshold')\",\n value=0,\n advanced=True,\n ),\n NestedDictInput(\n name=\"advanced_search_filter\",\n display_name=\"Search Metadata Filter\",\n info=\"Optional dictionary of filters to apply to the search query.\",\n advanced=True,\n ),\n BoolInput(\n name=\"autodetect_collection\",\n display_name=\"Autodetect Collection\",\n info=\"Boolean flag to determine whether to autodetect the collection.\",\n advanced=True,\n value=True,\n ),\n StrInput(\n name=\"content_field\",\n display_name=\"Content Field\",\n info=\"Field to use as the text content field for the vector store.\",\n advanced=True,\n ),\n StrInput(\n name=\"deletion_field\",\n display_name=\"Deletion Based On Field\",\n info=\"When this parameter is provided, documents in the target collection with \"\n \"metadata field values matching the input metadata field value will be deleted \"\n \"before new data is loaded.\",\n advanced=True,\n ),\n BoolInput(\n name=\"ignore_invalid_documents\",\n display_name=\"Ignore Invalid Documents\",\n info=\"Boolean flag to determine whether to ignore invalid documents at runtime.\",\n advanced=True,\n ),\n NestedDictInput(\n name=\"astradb_vectorstore_kwargs\",\n display_name=\"AstraDBVectorStore Parameters\",\n info=\"Optional dictionary of additional parameters for the AstraDBVectorStore.\",\n advanced=True,\n ),\n ]\n\n @classmethod\n def map_cloud_providers(cls):\n # TODO: Programmatically fetch the regions for each cloud provider\n return {\n \"Amazon Web Services\": {\n \"id\": \"aws\",\n \"regions\": [\"us-east-2\", \"ap-south-1\", \"eu-west-1\"],\n },\n \"Google Cloud Platform\": {\n \"id\": \"gcp\",\n \"regions\": [\"us-east1\"],\n },\n \"Microsoft Azure\": {\n \"id\": \"azure\",\n \"regions\": [\"westus3\"],\n },\n }\n\n @classmethod\n def get_vectorize_providers(cls, token: str, environment: str | None = None, api_endpoint: str | None = None):\n try:\n # Get the admin object\n admin = AstraDBAdmin(token=token, environment=environment)\n db_admin = admin.get_database_admin(api_endpoint=api_endpoint)\n\n # Get the list of embedding providers\n embedding_providers = db_admin.find_embedding_providers().as_dict()\n\n vectorize_providers_mapping = {}\n # Map the provider display name to the provider key and models\n for provider_key, provider_data in embedding_providers[\"embeddingProviders\"].items():\n # Get the provider display name and models\n display_name = provider_data[\"displayName\"]\n models = [model[\"name\"] for model in provider_data[\"models\"]]\n\n # Build our mapping\n vectorize_providers_mapping[display_name] = [provider_key, models]\n\n # Sort the resulting dictionary\n return defaultdict(list, dict(sorted(vectorize_providers_mapping.items())))\n except Exception as e:\n msg = f\"Error fetching vectorize providers: {e}\"\n raise ValueError(msg) from e\n\n @classmethod\n async def create_database_api(\n cls,\n new_database_name: str,\n cloud_provider: str,\n region: str,\n token: str,\n environment: str | None = None,\n keyspace: str | None = None,\n ):\n client = DataAPIClient(token=token, environment=environment)\n\n # Get the admin object\n admin_client = client.get_admin(token=token)\n\n # Call the create database function\n return await admin_client.async_create_database(\n name=new_database_name,\n cloud_provider=cls.map_cloud_providers()[cloud_provider][\"id\"],\n region=region,\n keyspace=keyspace,\n wait_until_active=False,\n )\n\n @classmethod\n async def create_collection_api(\n cls,\n new_collection_name: str,\n token: str,\n api_endpoint: str,\n environment: str | None = None,\n keyspace: str | None = None,\n dimension: int | None = None,\n embedding_generation_provider: str | None = None,\n embedding_generation_model: str | None = None,\n ):\n # Create the data API client\n client = DataAPIClient(token=token)\n\n # Get the database object\n database = client.get_async_database(api_endpoint=api_endpoint, token=token)\n\n # Build vectorize options, if needed\n vectorize_options = None\n if not dimension:\n vectorize_options = CollectionVectorServiceOptions(\n provider=cls.get_vectorize_providers(\n token=token, environment=environment, api_endpoint=api_endpoint\n ).get(embedding_generation_provider, [None, []])[0],\n model_name=embedding_generation_model,\n )\n\n # Create the collection\n return await database.create_collection(\n name=new_collection_name,\n keyspace=keyspace,\n dimension=dimension,\n service=vectorize_options,\n )\n\n @classmethod\n def get_database_list_static(cls, token: str, environment: str | None = None):\n client = DataAPIClient(token=token, environment=environment)\n\n # Get the admin object\n admin_client = client.get_admin(token=token)\n\n # Get the list of databases\n db_list = list(admin_client.list_databases())\n\n # Set the environment properly\n env_string = \"\"\n if environment and environment != \"prod\":\n env_string = f\"-{environment}\"\n\n # Generate the api endpoint for each database\n db_info_dict = {}\n for db in db_list:\n try:\n # Get the API endpoint for the database\n api_endpoint = f\"https://{db.info.id}-{db.info.region}.apps.astra{env_string}.datastax.com\"\n\n # Get the number of collections\n try:\n num_collections = len(\n list(\n client.get_database(\n api_endpoint=api_endpoint, token=token, keyspace=db.info.keyspace\n ).list_collection_names(keyspace=db.info.keyspace)\n )\n )\n except Exception: # noqa: BLE001\n num_collections = 0\n if db.status != \"PENDING\":\n continue\n\n # Add the database to the dictionary\n db_info_dict[db.info.name] = {\n \"api_endpoint\": api_endpoint,\n \"collections\": num_collections,\n \"status\": db.status if db.status != \"ACTIVE\" else None,\n }\n except Exception: # noqa: BLE001, S110\n pass\n\n return db_info_dict\n\n def get_database_list(self):\n return self.get_database_list_static(token=self.token, environment=self.environment)\n\n @classmethod\n def get_api_endpoint_static(\n cls,\n token: str,\n environment: str | None = None,\n api_endpoint: str | None = None,\n database_name: str | None = None,\n ):\n # If the api_endpoint is set, return it\n if api_endpoint:\n return api_endpoint\n\n # Check if the database_name is like a url\n if database_name and database_name.startswith(\"https://\"):\n return database_name\n\n # If the database is not set, nothing we can do.\n if not database_name:\n return None\n\n # Grab the database object\n db = cls.get_database_list_static(token=token, environment=environment).get(database_name)\n if not db:\n return None\n\n # Otherwise, get the URL from the database list\n return db.get(\"api_endpoint\")\n\n def get_api_endpoint(self):\n return self.get_api_endpoint_static(\n token=self.token,\n environment=self.environment,\n api_endpoint=self.api_endpoint,\n database_name=self.database_name,\n )\n\n def get_keyspace(self):\n keyspace = self.keyspace\n\n if keyspace:\n return keyspace.strip()\n\n return None\n\n def get_database_object(self, api_endpoint: str | None = None):\n try:\n client = DataAPIClient(token=self.token, environment=self.environment)\n\n return client.get_database(\n api_endpoint=api_endpoint or self.get_api_endpoint(),\n token=self.token,\n keyspace=self.get_keyspace(),\n )\n except Exception as e:\n msg = f\"Error fetching database object: {e}\"\n raise ValueError(msg) from e\n\n def collection_data(self, collection_name: str, database: Database | None = None):\n try:\n if not database:\n client = DataAPIClient(token=self.token, environment=self.environment)\n\n database = client.get_database(\n api_endpoint=self.get_api_endpoint(),\n token=self.token,\n keyspace=self.get_keyspace(),\n )\n\n collection = database.get_collection(collection_name, keyspace=self.get_keyspace())\n\n return collection.estimated_document_count()\n except Exception as e: # noqa: BLE001\n self.log(f\"Error checking collection data: {e}\")\n\n return None\n\n def _initialize_database_options(self):\n try:\n return [\n {\n \"name\": name,\n \"status\": info[\"status\"],\n \"collections\": info[\"collections\"],\n \"api_endpoint\": info[\"api_endpoint\"],\n \"icon\": \"data\",\n }\n for name, info in self.get_database_list().items()\n ]\n except Exception as e:\n msg = f\"Error fetching database options: {e}\"\n raise ValueError(msg) from e\n\n @classmethod\n def get_provider_icon(cls, collection: CollectionDescriptor | None = None, provider_name: str | None = None) -> str:\n # Get the provider name from the collection\n provider_name = provider_name or (\n collection.options.vector.service.provider\n if collection and collection.options and collection.options.vector and collection.options.vector.service\n else None\n )\n\n # If there is no provider, use the vector store icon\n if not provider_name or provider_name == \"bring your own\":\n return \"vectorstores\"\n\n # Special case for certain models\n # TODO: Add more icons\n if provider_name == \"nvidia\":\n return \"NVIDIA\"\n if provider_name == \"openai\":\n return \"OpenAI\"\n\n # Title case on the provider for the icon if no special case\n return provider_name.title()\n\n def _initialize_collection_options(self, api_endpoint: str | None = None):\n # Nothing to generate if we don't have an API endpoint yet\n api_endpoint = api_endpoint or self.get_api_endpoint()\n if not api_endpoint:\n return []\n\n # Retrieve the database object\n database = self.get_database_object(api_endpoint=api_endpoint)\n\n # Get the list of collections\n collection_list = list(database.list_collections(keyspace=self.get_keyspace()))\n\n # Return the list of collections and metadata associated\n return [\n {\n \"name\": col.name,\n \"records\": self.collection_data(collection_name=col.name, database=database),\n \"provider\": (\n col.options.vector.service.provider if col.options.vector and col.options.vector.service else None\n ),\n \"icon\": self.get_provider_icon(collection=col),\n \"model\": (\n col.options.vector.service.model_name if col.options.vector and col.options.vector.service else None\n ),\n }\n for col in collection_list\n ]\n\n def reset_provider_options(self, build_config: dict):\n # Get the list of vectorize providers\n vectorize_providers = self.get_vectorize_providers(\n token=self.token,\n environment=self.environment,\n api_endpoint=build_config[\"api_endpoint\"][\"value\"],\n )\n\n # If the collection is set, allow user to see embedding options\n build_config[\"collection_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"][\n \"embedding_generation_provider\"\n ][\"options\"] = [\"Bring your own\", \"Nvidia\", *[key for key in vectorize_providers if key != \"Nvidia\"]]\n\n # For all not Bring your own or Nvidia providers, add metadata saying configure in Astra DB Portal\n provider_options = build_config[\"collection_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"][\n \"embedding_generation_provider\"\n ][\"options\"]\n\n # Go over each possible provider and add metadata to configure in Astra DB Portal\n for provider in provider_options:\n # Skip Bring your own and Nvidia, automatically configured\n if provider in [\"Bring your own\", \"Nvidia\"]:\n build_config[\"collection_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"][\n \"embedding_generation_provider\"\n ][\"options_metadata\"].append({\"icon\": self.get_provider_icon(provider_name=provider.lower())})\n continue\n\n # Add metadata to configure in Astra DB Portal\n build_config[\"collection_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"][\n \"embedding_generation_provider\"\n ][\"options_metadata\"].append({\" \": \"Configure in Astra DB Portal\"})\n\n # And allow the user to see the models based on a selected provider\n embedding_provider = build_config[\"collection_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"][\n \"embedding_generation_provider\"\n ][\"value\"]\n\n # Set the options for the embedding model based on the provider\n build_config[\"collection_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"][\n \"embedding_generation_model\"\n ][\"options\"] = vectorize_providers.get(embedding_provider, [[], []])[1]\n\n return build_config\n\n def reset_collection_list(self, build_config: dict):\n # Get the list of options we have based on the token provided\n collection_options = self._initialize_collection_options(api_endpoint=build_config[\"api_endpoint\"][\"value\"])\n\n # If we retrieved options based on the token, show the dropdown\n build_config[\"collection_name\"][\"options\"] = [col[\"name\"] for col in collection_options]\n build_config[\"collection_name\"][\"options_metadata\"] = [\n {k: v for k, v in col.items() if k not in [\"name\"]} for col in collection_options\n ]\n\n # Reset the selected collection\n if build_config[\"collection_name\"][\"value\"] not in build_config[\"collection_name\"][\"options\"]:\n build_config[\"collection_name\"][\"value\"] = \"\"\n\n # If we have a database, collection name should not be advanced\n build_config[\"collection_name\"][\"advanced\"] = not build_config[\"database_name\"][\"value\"]\n\n return build_config\n\n def reset_database_list(self, build_config: dict):\n # Get the list of options we have based on the token provided\n database_options = self._initialize_database_options()\n\n # If we retrieved options based on the token, show the dropdown\n build_config[\"database_name\"][\"options\"] = [db[\"name\"] for db in database_options]\n build_config[\"database_name\"][\"options_metadata\"] = [\n {k: v for k, v in db.items() if k not in [\"name\"]} for db in database_options\n ]\n\n # Reset the selected database\n if build_config[\"database_name\"][\"value\"] not in build_config[\"database_name\"][\"options\"]:\n build_config[\"database_name\"][\"value\"] = \"\"\n build_config[\"api_endpoint\"][\"value\"] = \"\"\n build_config[\"collection_name\"][\"advanced\"] = True\n\n # If we have a token, database name should not be advanced\n build_config[\"database_name\"][\"advanced\"] = not build_config[\"token\"][\"value\"]\n\n return build_config\n\n def reset_build_config(self, build_config: dict):\n # Reset the list of databases we have based on the token provided\n build_config[\"database_name\"][\"options\"] = []\n build_config[\"database_name\"][\"options_metadata\"] = []\n build_config[\"database_name\"][\"value\"] = \"\"\n build_config[\"database_name\"][\"advanced\"] = True\n build_config[\"api_endpoint\"][\"value\"] = \"\"\n\n # Reset the list of collections and metadata associated\n build_config[\"collection_name\"][\"options\"] = []\n build_config[\"collection_name\"][\"options_metadata\"] = []\n build_config[\"collection_name\"][\"value\"] = \"\"\n build_config[\"collection_name\"][\"advanced\"] = True\n\n return build_config\n\n async def update_build_config(self, build_config: dict, field_value: str, field_name: str | None = None):\n # Callback for database creation\n if field_name == \"database_name\" and isinstance(field_value, dict) and \"new_database_name\" in field_value:\n try:\n await self.create_database_api(\n new_database_name=field_value[\"new_database_name\"],\n token=self.token,\n keyspace=self.get_keyspace(),\n environment=self.environment,\n cloud_provider=field_value[\"cloud_provider\"],\n region=field_value[\"region\"],\n )\n except Exception as e:\n msg = f\"Error creating database: {e}\"\n raise ValueError(msg) from e\n\n # Add the new database to the list of options\n build_config[\"database_name\"][\"options\"] = build_config[\"database_name\"][\"options\"] + [\n field_value[\"new_database_name\"]\n ]\n build_config[\"database_name\"][\"options_metadata\"] = build_config[\"database_name\"][\"options_metadata\"] + [\n {\"status\": \"PENDING\"}\n ]\n\n return self.reset_collection_list(build_config)\n\n # This is the callback required to update the list of regions for a cloud provider\n if field_name == \"database_name\" and isinstance(field_value, dict) and \"new_database_name\" not in field_value:\n cloud_provider = field_value[\"cloud_provider\"]\n build_config[\"database_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"][\"region\"][\n \"options\"\n ] = self.map_cloud_providers()[cloud_provider][\"regions\"]\n\n return build_config\n\n # Callback for the creation of collections\n if field_name == \"collection_name\" and isinstance(field_value, dict) and \"new_collection_name\" in field_value:\n try:\n # Get the dimension if its a BYO provider\n dimension = (\n field_value[\"dimension\"]\n if field_value[\"embedding_generation_provider\"] == \"Bring your own\"\n else None\n )\n\n # Create the collection\n await self.create_collection_api(\n new_collection_name=field_value[\"new_collection_name\"],\n token=self.token,\n api_endpoint=build_config[\"api_endpoint\"][\"value\"],\n environment=self.environment,\n keyspace=self.get_keyspace(),\n dimension=dimension,\n embedding_generation_provider=field_value[\"embedding_generation_provider\"],\n embedding_generation_model=field_value[\"embedding_generation_model\"],\n )\n except Exception as e:\n msg = f\"Error creating collection: {e}\"\n raise ValueError(msg) from e\n\n # Add the new collection to the list of options\n build_config[\"collection_name\"][\"value\"] = field_value[\"new_collection_name\"]\n build_config[\"collection_name\"][\"options\"].append(field_value[\"new_collection_name\"])\n\n # Get the provider and model for the new collection\n generation_provider = field_value[\"embedding_generation_provider\"]\n provider = generation_provider if generation_provider != \"Bring your own\" else None\n generation_model = field_value[\"embedding_generation_model\"]\n model = generation_model if generation_model else None\n\n # Add the new collection to the list of options\n icon = \"NVIDIA\" if provider == \"Nvidia\" else \"vectorstores\"\n build_config[\"collection_name\"][\"options_metadata\"] = build_config[\"collection_name\"][\n \"options_metadata\"\n ] + [{\"records\": 0, \"provider\": provider, \"icon\": icon, \"model\": model}]\n\n return build_config\n\n # Callback to update the model list based on the embedding provider\n if (\n field_name == \"collection_name\"\n and isinstance(field_value, dict)\n and \"new_collection_name\" not in field_value\n ):\n return self.reset_provider_options(build_config)\n\n # When the component first executes, this is the update refresh call\n first_run = field_name == \"collection_name\" and not field_value and not build_config[\"database_name\"][\"options\"]\n\n # If the token has not been provided, simply return the empty build config\n if not self.token:\n return self.reset_build_config(build_config)\n\n # If this is the first execution of the component, reset and build database list\n if first_run or field_name in [\"token\", \"environment\"]:\n return self.reset_database_list(build_config)\n\n # Refresh the collection name options\n if field_name == \"database_name\" and not isinstance(field_value, dict):\n # If missing, refresh the database options\n if field_value not in build_config[\"database_name\"][\"options\"]:\n build_config = await self.update_build_config(build_config, field_value=self.token, field_name=\"token\")\n build_config[\"database_name\"][\"value\"] = \"\"\n else:\n # Find the position of the selected database to align with metadata\n index_of_name = build_config[\"database_name\"][\"options\"].index(field_value)\n\n # Initializing database condition\n pending = build_config[\"database_name\"][\"options_metadata\"][index_of_name][\"status\"] == \"PENDING\"\n if pending:\n return self.update_build_config(build_config, field_value=self.token, field_name=\"token\")\n\n # Set the API endpoint based on the selected database\n build_config[\"api_endpoint\"][\"value\"] = build_config[\"database_name\"][\"options_metadata\"][\n index_of_name\n ][\"api_endpoint\"]\n\n # Reset the provider options\n build_config = self.reset_provider_options(build_config)\n\n # Reset the list of collections we have based on the token provided\n return self.reset_collection_list(build_config)\n\n # Hide embedding model option if opriona_metadata provider is not null\n if field_name == \"collection_name\" and not isinstance(field_value, dict):\n # Assume we will be autodetecting the collection:\n build_config[\"autodetect_collection\"][\"value\"] = True\n\n # Reload the collection list\n build_config = self.reset_collection_list(build_config)\n\n # Set the options for collection name to be the field value if its a new collection\n if field_value and field_value not in build_config[\"collection_name\"][\"options\"]:\n # Add the new collection to the list of options\n build_config[\"collection_name\"][\"options\"].append(field_value)\n build_config[\"collection_name\"][\"options_metadata\"].append(\n {\"records\": 0, \"provider\": None, \"icon\": \"\", \"model\": None}\n )\n\n # Ensure that autodetect collection is set to False, since its a new collection\n build_config[\"autodetect_collection\"][\"value\"] = False\n\n # Find the position of the selected collection to align with metadata\n index_of_name = build_config[\"collection_name\"][\"options\"].index(field_value)\n value_of_provider = build_config[\"collection_name\"][\"options_metadata\"][index_of_name][\"provider\"]\n\n # If we were able to determine the Vectorize provider, set it accordingly\n if value_of_provider:\n build_config[\"embedding_model\"][\"advanced\"] = True\n build_config[\"embedding_choice\"][\"value\"] = \"Astra Vectorize\"\n else:\n build_config[\"embedding_model\"][\"advanced\"] = False\n build_config[\"embedding_choice\"][\"value\"] = \"Embedding Model\"\n\n return build_config\n\n return build_config\n\n @check_cached_vector_store\n def build_vector_store(self):\n try:\n from langchain_astradb import AstraDBVectorStore\n except ImportError as e:\n msg = (\n \"Could not import langchain Astra DB integration package. \"\n \"Please install it with `pip install langchain-astradb`.\"\n )\n raise ImportError(msg) from e\n\n # Get the embedding model and additional params\n embedding_params = (\n {\"embedding\": self.embedding_model}\n if self.embedding_model and self.embedding_choice == \"Embedding Model\"\n else {}\n )\n\n # Get the additional parameters\n additional_params = self.astradb_vectorstore_kwargs or {}\n\n # Get Langflow version and platform information\n __version__ = get_version_info()[\"version\"]\n langflow_prefix = \"\"\n # if os.getenv(\"AWS_EXECUTION_ENV\") == \"AWS_ECS_FARGATE\": # TODO: More precise way of detecting\n # langflow_prefix = \"ds-\"\n\n # Get the database object\n database = self.get_database_object()\n autodetect = self.collection_name in database.list_collection_names() and self.autodetect_collection\n\n # Bundle up the auto-detect parameters\n autodetect_params = {\n \"autodetect_collection\": autodetect,\n \"content_field\": (\n self.content_field\n if self.content_field and embedding_params\n else (\n \"page_content\"\n if embedding_params\n and self.collection_data(collection_name=self.collection_name, database=database) == 0\n else None\n )\n ),\n \"ignore_invalid_documents\": self.ignore_invalid_documents,\n }\n\n # Attempt to build the Vector Store object\n try:\n vector_store = AstraDBVectorStore(\n # Astra DB Authentication Parameters\n token=self.token,\n api_endpoint=database.api_endpoint,\n namespace=database.keyspace,\n collection_name=self.collection_name,\n environment=self.environment,\n # Astra DB Usage Tracking Parameters\n ext_callers=[(f\"{langflow_prefix}langflow\", __version__)],\n # Astra DB Vector Store Parameters\n **autodetect_params,\n **embedding_params,\n **additional_params,\n )\n except Exception as e:\n msg = f\"Error initializing AstraDBVectorStore: {e}\"\n raise ValueError(msg) from e\n\n # Add documents to the vector store\n self._add_documents_to_vector_store(vector_store)\n\n return vector_store\n\n def _add_documents_to_vector_store(self, vector_store) -> None:\n documents = []\n for _input in self.ingest_data or []:\n if isinstance(_input, Data):\n documents.append(_input.to_lc_document())\n else:\n msg = \"Vector Store Inputs must be Data objects.\"\n raise TypeError(msg)\n\n if documents and self.deletion_field:\n self.log(f\"Deleting documents where {self.deletion_field}\")\n try:\n database = self.get_database_object()\n collection = database.get_collection(self.collection_name, keyspace=database.keyspace)\n delete_values = list({doc.metadata[self.deletion_field] for doc in documents})\n self.log(f\"Deleting documents where {self.deletion_field} matches {delete_values}.\")\n collection.delete_many({f\"metadata.{self.deletion_field}\": {\"$in\": delete_values}})\n except Exception as e:\n msg = f\"Error deleting documents from AstraDBVectorStore based on '{self.deletion_field}': {e}\"\n raise ValueError(msg) from e\n\n if documents:\n self.log(f\"Adding {len(documents)} documents to the Vector Store.\")\n try:\n vector_store.add_documents(documents)\n except Exception as e:\n msg = f\"Error adding documents to AstraDBVectorStore: {e}\"\n raise ValueError(msg) from e\n else:\n self.log(\"No documents to add to the Vector Store.\")\n\n def _map_search_type(self) -> str:\n search_type_mapping = {\n \"Similarity with score threshold\": \"similarity_score_threshold\",\n \"MMR (Max Marginal Relevance)\": \"mmr\",\n }\n\n return search_type_mapping.get(self.search_type, \"similarity\")\n\n def _build_search_args(self):\n query = self.search_query if isinstance(self.search_query, str) and self.search_query.strip() else None\n\n if query:\n args = {\n \"query\": query,\n \"search_type\": self._map_search_type(),\n \"k\": self.number_of_results,\n \"score_threshold\": self.search_score_threshold,\n }\n elif self.advanced_search_filter:\n args = {\n \"n\": self.number_of_results,\n }\n else:\n return {}\n\n filter_arg = self.advanced_search_filter or {}\n if filter_arg:\n args[\"filter\"] = filter_arg\n\n return args\n\n def search_documents(self, vector_store=None) -> list[Data]:\n vector_store = vector_store or self.build_vector_store()\n\n self.log(f\"Search input: {self.search_query}\")\n self.log(f\"Search type: {self.search_type}\")\n self.log(f\"Number of results: {self.number_of_results}\")\n\n try:\n search_args = self._build_search_args()\n except Exception as e:\n msg = f\"Error in AstraDBVectorStore._build_search_args: {e}\"\n raise ValueError(msg) from e\n\n if not search_args:\n self.log(\"No search input or filters provided. Skipping search.\")\n return []\n\n docs = []\n search_method = \"search\" if \"query\" in search_args else \"metadata_search\"\n\n try:\n self.log(f\"Calling vector_store.{search_method} with args: {search_args}\")\n docs = getattr(vector_store, search_method)(**search_args)\n except Exception as e:\n msg = f\"Error performing {search_method} in AstraDBVectorStore: {e}\"\n raise ValueError(msg) from e\n\n self.log(f\"Retrieved documents: {len(docs)}\")\n\n data = docs_to_data(docs)\n self.log(f\"Converted documents to data: {len(data)}\")\n self.status = data\n\n return data\n\n def get_retriever_kwargs(self):\n search_args = self._build_search_args()\n\n return {\n \"search_type\": self._map_search_type(),\n \"search_kwargs\": search_args,\n }\n" }, "collection_name": { "_input_type": "DropdownInput", - "advanced": false, + "advanced": true, "combobox": true, - "dialog_inputs": {}, + "dialog_inputs": { + "fields": { + "data": { + "node": { + "description": "", + "display_name": "Create new collection", + "field_order": [ + "new_collection_name", + "embedding_generation_provider", + "embedding_generation_model" + ], + "name": "create_collection", + "template": { + "dimension": { + "_input_type": "IntInput", + "advanced": false, + "display_name": "Dimensions", + "dynamic": false, + "info": "Dimension of the embeddings to generate.", + "list": false, + "list_add_label": "Add More", + "name": "dimension", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "int", + "value": 1024 + }, + "embedding_generation_model": { + "_input_type": "DropdownInput", + "advanced": false, + "combobox": false, + "dialog_inputs": {}, + "display_name": "Embedding model", + "dynamic": false, + "info": "Model to use for generating embeddings.", + "name": "embedding_generation_model", + "options": [ + "Bring your own", + "NV-Embed-QA" + ], + "options_metadata": [], + "placeholder": "", + "required": true, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "str", + "value": "" + }, + "embedding_generation_provider": { + "_input_type": "DropdownInput", + "advanced": false, + "combobox": false, + "dialog_inputs": {}, + "display_name": "Embedding generation method", + "dynamic": false, + "info": "Provider to use for generating embeddings.", + "name": "embedding_generation_provider", + "options": [ + "Bring your own", + "Nvidia" + ], + "options_metadata": [], + "placeholder": "", + "real_time_refresh": true, + "required": true, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "str", + "value": "" + }, + "new_collection_name": { + "_input_type": "StrInput", + "advanced": false, + "display_name": "Name", + "dynamic": false, + "info": "Name of the new collection to create in Astra DB.", + "list": false, + "list_add_label": "Add More", + "load_from_db": false, + "name": "new_collection_name", + "placeholder": "", + "required": true, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "str", + "value": "" + } + } + } + } + }, + "functionality": "create" + }, "display_name": "Collection", "dynamic": false, "info": "The name of the collection within Astra DB where the vectors will be stored.", @@ -3880,18 +4167,107 @@ "type": "str", "value": "" }, - "d_api_endpoint": { - "_input_type": "StrInput", - "advanced": true, - "display_name": "Database API Endpoint", + "database_name": { + "_input_type": "DropdownInput", + "advanced": false, + "combobox": true, + "dialog_inputs": { + "fields": { + "data": { + "node": { + "description": "", + "display_name": "Create new database", + "field_order": [ + "new_database_name", + "cloud_provider", + "region" + ], + "name": "create_database", + "template": { + "cloud_provider": { + "_input_type": "DropdownInput", + "advanced": false, + "combobox": false, + "dialog_inputs": {}, + "display_name": "Cloud provider", + "dynamic": false, + "info": "Cloud provider for the new database.", + "name": "cloud_provider", + "options": [ + "Amazon Web Services", + "Google Cloud Platform", + "Microsoft Azure" + ], + "options_metadata": [], + "placeholder": "", + "real_time_refresh": true, + "required": true, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "str", + "value": "" + }, + "new_database_name": { + "_input_type": "StrInput", + "advanced": false, + "display_name": "Name", + "dynamic": false, + "info": "Name of the new database to create in Astra DB.", + "list": false, + "list_add_label": "Add More", + "load_from_db": false, + "name": "new_database_name", + "placeholder": "", + "required": true, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "str", + "value": "" + }, + "region": { + "_input_type": "DropdownInput", + "advanced": false, + "combobox": false, + "dialog_inputs": {}, + "display_name": "Region", + "dynamic": false, + "info": "Region for the new database.", + "name": "region", + "options": [ + "us-east-2", + "ap-south-1", + "eu-west-1" + ], + "options_metadata": [], + "placeholder": "", + "required": true, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "str", + "value": "" + } + } + } + } + }, + "functionality": "create" + }, + "display_name": "Database", "dynamic": false, - "info": "The API Endpoint for the Astra DB instance. Supercedes database selection.", - "list": false, - "list_add_label": "Add More", - "load_from_db": false, - "name": "d_api_endpoint", + "info": "The Database name for the Astra DB instance.", + "name": "database_name", + "options": [], + "options_metadata": [], "placeholder": "", - "required": false, + "real_time_refresh": true, + "refresh_button": true, + "required": true, "show": true, "title_case": false, "tool_mode": false, @@ -3973,6 +4349,7 @@ "load_from_db": false, "name": "environment", "placeholder": "", + "real_time_refresh": true, "required": false, "show": true, "title_case": false, @@ -4140,7 +4517,7 @@ "show": true, "title_case": false, "type": "str", - "value": "" + "value": "ASTRA_DB_APPLICATION_TOKEN" } }, "tool_mode": false @@ -4149,30 +4526,30 @@ "type": "AstraDB" }, "dragging": false, - "id": "AstraDB-sPWXd", + "id": "AstraDB-nMlxo", "measured": { - "height": 611, + "height": 532, "width": 320 }, "position": { - "x": 2053.8028711939423, - "y": 1455.7952184640951 + "x": 2065.4581687557493, + "y": 1496.259507100966 }, "selected": false, "type": "genericNode" } ], "viewport": { - "x": 14.338407079834894, - "y": 248.6723683033677, - "zoom": 0.2658894837527901 + "x": 24.946958998386435, + "y": -163.43184624766263, + "zoom": 0.44406917240373706 } }, "description": "Load your data for chat context with Retrieval Augmented Generation.", "endpoint_name": null, - "id": "b57f5ec7-f4f1-42d9-b877-4b2a4fb0650a", + "id": "89b399d4-ddab-44cb-bda6-f8cad1120416", "is_component": false, - "last_tested_version": "1.1.2", + "last_tested_version": "1.1.5", "name": "Vector Store RAG", "tags": [ "openai", diff --git a/src/frontend/src/CustomNodes/GenericNode/components/NodeDialogComponent/index.tsx b/src/frontend/src/CustomNodes/GenericNode/components/NodeDialogComponent/index.tsx index 5b7020b75..c764018b4 100644 --- a/src/frontend/src/CustomNodes/GenericNode/components/NodeDialogComponent/index.tsx +++ b/src/frontend/src/CustomNodes/GenericNode/components/NodeDialogComponent/index.tsx @@ -8,105 +8,184 @@ import { DialogHeader, DialogTitle, } from "@/components/ui/dialog"; +import { usePostTemplateValue } from "@/controllers/API/queries/nodes/use-post-template-value"; import { getCustomParameterTitle } from "@/customization/components/custom-parameter"; -import useHandleOnNewValue from "@/CustomNodes/hooks/use-handle-new-value"; +import { mutateTemplate } from "@/CustomNodes/helpers/mutate-template"; +import useAlertStore from "@/stores/alertStore"; import useFlowStore from "@/stores/flowStore"; -import { InputFieldType } from "@/types/api"; -import { cloneDeep } from "lodash"; +import { APIClassType, InputFieldType } from "@/types/api"; +import { useState } from "react"; -export const NodeDialog = ({ - open, - onClose, - dialogInputs, - nodeId, -}: { +interface NodeDialogProps { open: boolean; onClose: () => void; dialogInputs: any; nodeId: string; + name: string; + nodeClass: APIClassType; +} + +interface ValueObject { + value: string; +} + +export const NodeDialog: React.FC = ({ + open, + onClose, + dialogInputs, + nodeId, + name, + nodeClass, }) => { + const [isLoading, setIsLoading] = useState(false); + const [fieldValues, setFieldValues] = useState>({}); + const nodes = useFlowStore((state) => state.nodes); const setNode = useFlowStore((state) => state.setNode); + const setErrorData = useAlertStore((state) => state.setErrorData); - const handleNewValue = (value: string, key: string) => { - let rawValue = value; + const postTemplateValue = usePostTemplateValue({ + parameterId: name, + nodeId: nodeId, + node: nodeClass, + }); - if (typeof value === "object" && value) { - rawValue = (value as { value: string }).value; - } + const { fields, functionality: submitButtonText } = dialogInputs || {}; + const dialogNodeData = fields?.data?.node; + const dialogTemplate = dialogNodeData?.template || {}; - const template = cloneDeep(dialogInputs?.fields?.data?.node?.template); - template[key].value = value; + const setNodeClass = (newNode: APIClassType) => { + const targetNode = nodes.find((node) => node.id === nodeId); + if (!targetNode) return; - const newNode = cloneDeep(nodes.find((node) => node.id === nodeId)); - if (newNode) { - const template = newNode.data.node.template; - const databaseFields = template.database_name.dialog_inputs.fields; - const nodeTemplate = databaseFields.data.node.template; - - nodeTemplate[key].value = rawValue; - } - setNode(nodeId, newNode!); + targetNode.data.node = newNode; + setNode(nodeId, targetNode); }; + const handleErrorData = (newState: { + title: string; + list?: Array; + }) => { + setErrorData(newState); + setIsLoading(false); + }; + + const updateFieldValue = (value: string | ValueObject, fieldKey: string) => { + const newValue = typeof value === "object" ? value.value : value; + const targetNode = nodes.find((node) => node.id === nodeId); + if (!targetNode || !name) return; + + targetNode.data.node.template[name].dialog_inputs.fields.data.node.template[ + fieldKey + ].value = newValue; + setNode(nodeId, targetNode); + setFieldValues((prev) => ({ ...prev, [fieldKey]: newValue })); + + if (dialogTemplate[fieldKey].real_time_refresh) { + mutateTemplate( + { [fieldKey]: newValue }, + nodeClass, + setNodeClass, + postTemplateValue, + handleErrorData, + name, + ); + } + }; + + const handleCloseDialog = () => { + setFieldValues({}); + const targetNode = nodes.find((node) => node.id === nodeId); + if (targetNode && name) { + const nodeTemplate = targetNode.data.node.template; + Object.keys(dialogTemplate).forEach((key) => { + nodeTemplate[name].dialog_inputs.fields.data.node.template[key].value = + ""; + }); + setNode(nodeId, targetNode); + } + setIsLoading(false); + onClose(); + }; + + const handleSubmitDialog = async () => { + setIsLoading(true); + + await mutateTemplate( + fieldValues, + nodeClass, + setNodeClass, + postTemplateValue, + handleErrorData, + name, + handleCloseDialog, + nodeClass.tool_mode, + ); + + setTimeout(() => { + handleCloseDialog(); + }, 5000); + }; + + // Render return ( - +
- - {dialogInputs.fields?.data?.node?.display_name} - + {dialogNodeData?.display_name}
- {dialogInputs.fields?.data?.node?.description} + {dialogNodeData?.description}
- {Object.entries(dialogInputs?.fields?.data?.node?.template ?? {}).map( - ([key, value]) => ( -
-
- {getCustomParameterTitle({ - title: - dialogInputs?.fields?.data?.node?.template[key] - .display_name ?? "", - nodeId, - isFlexView: false, - })} -
- - handleNewValue(value, key) - } - name={key} - nodeId={nodeId} - templateData={value as Partial} - templateValue={ - dialogInputs?.fields?.data?.node?.template[key].value - } - editNode={false} - handleNodeClass={() => {}} - nodeClass={dialogInputs.fields?.data?.node} - disabled={false} - placeholder="" - isToolMode={false} - /> + {Object.entries(dialogTemplate).map(([fieldKey, fieldValue]) => ( +
+
+ {getCustomParameterTitle({ + title: + (fieldValue as { display_name: string })?.display_name ?? + "", + nodeId, + isFlexView: false, + })}
- ), - )} + + updateFieldValue(value, fieldKey) + } + name={fieldKey} + nodeId={nodeId} + templateData={fieldValue as Partial} + templateValue={fieldValues[fieldKey] || ""} + editNode={false} + handleNodeClass={() => {}} + nodeClass={dialogNodeData} + disabled={false} + placeholder="" + isToolMode={false} + /> +
+ ))}
- - +
diff --git a/src/frontend/src/components/common/fetchIconComponent/index.tsx b/src/frontend/src/components/common/fetchIconComponent/index.tsx new file mode 100644 index 000000000..83431f520 --- /dev/null +++ b/src/frontend/src/components/common/fetchIconComponent/index.tsx @@ -0,0 +1,21 @@ +import ForwardedIconComponent from "../genericIconComponent"; + +const FetchIconComponent = ({ + source, + name, +}: { + source: string; + name: string; +}) => { + return ( +
+ {source ? ( + {name} + ) : ( + + )} +
+ ); +}; + +export default FetchIconComponent; diff --git a/src/frontend/src/components/common/loadingTextComponent/index.tsx b/src/frontend/src/components/common/loadingTextComponent/index.tsx new file mode 100644 index 000000000..ec0262f77 --- /dev/null +++ b/src/frontend/src/components/common/loadingTextComponent/index.tsx @@ -0,0 +1,23 @@ +import { useEffect, useState } from "react"; + +const LoadingTextComponent = ({ text }: { text: string }) => { + const [dots, setDots] = useState("."); + + useEffect(() => { + const interval = setInterval(() => { + setDots((prevDots) => (prevDots === "..." ? "" : `${prevDots}.`)); + }, 300); + + return () => { + clearInterval(interval); + }; + }, []); + + if (!text) { + return null; + } + + return {`${text}${dots}`}; +}; + +export default LoadingTextComponent; diff --git a/src/frontend/src/components/core/dropdownComponent/index.tsx b/src/frontend/src/components/core/dropdownComponent/index.tsx index 20c7887c9..1e240b2a0 100644 --- a/src/frontend/src/components/core/dropdownComponent/index.tsx +++ b/src/frontend/src/components/core/dropdownComponent/index.tsx @@ -1,7 +1,9 @@ +import LoadingTextComponent from "@/components/common/loadingTextComponent"; import { usePostTemplateValue } from "@/controllers/API/queries/nodes/use-post-template-value"; import NodeDialog from "@/CustomNodes/GenericNode/components/NodeDialogComponent"; import { mutateTemplate } from "@/CustomNodes/helpers/mutate-template"; import useAlertStore from "@/stores/alertStore"; +import { getStatusColor } from "@/utils/stringManipulation"; import { PopoverAnchor } from "@radix-ui/react-popover"; import Fuse from "fuse.js"; import { cloneDeep } from "lodash"; @@ -13,7 +15,6 @@ import ShadTooltip from "../../common/shadTooltipComponent"; import { Button } from "../../ui/button"; import { Command, - CommandEmpty, CommandGroup, CommandItem, CommandList, @@ -42,25 +43,41 @@ export default function Dropdown({ dialogInputs, ...baseInputProps }: BaseInputProps & DropDownComponent): JSX.Element { - const nodeId = baseInputProps?.nodeId; + // Initialize state and refs + const [open, setOpen] = useState(children ? true : false); + const [openDialog, setOpenDialog] = useState(false); + const [customValue, setCustomValue] = useState(""); + const [filteredOptions, setFilteredOptions] = useState(options); + const [refreshOptions, setRefreshOptions] = useState(false); + const refButton = useRef(null); + // Initialize utilities and constants const placeholderName = name ? formatPlaceholderName(name) : "Choose an option..."; - const { firstWord } = formatName(name); - const [open, setOpen] = useState(children ? true : false); - const [openDialog, setOpenDialog] = useState(false); - - const refButton = useRef(null); - + const fuse = new Fuse(options, { keys: ["name", "value"] }); const PopoverContentDropdown = children || editNode ? PopoverContent : PopoverContentWithoutPortal; + const { nodeClass, nodeId, handleNodeClass, tooltip } = baseInputProps; - const [customValue, setCustomValue] = useState(""); - const [filteredOptions, setFilteredOptions] = useState(options); + // API and store hooks + const postTemplateValue = usePostTemplateValue({ + parameterId: name || "", + nodeId: nodeId || "", + node: nodeClass!, + }); + const setErrorData = useAlertStore((state) => state.setErrorData); - const fuse = new Fuse(options, { keys: ["name", "value"] }); + // Utility functions + const filterMetadataKeys = ( + metadata: Record = {}, + excludeKeys: string[] = ["api_endpoint", "icon", "status"], + ) => { + return Object.fromEntries( + Object.entries(metadata).filter(([key]) => !excludeKeys.includes(key)), + ); + }; const searchRoleByTerm = async (event: ChangeEvent) => { const value = event.target.value; @@ -71,28 +88,43 @@ export default function Dropdown({ setCustomValue(value); }; - const { nodeClass, handleNodeClass } = baseInputProps; + const handleRefreshButtonPress = async () => { + setRefreshOptions(true); + setOpen(false); - const postTemplateValue = usePostTemplateValue({ - parameterId: name || "", - nodeId: id, - node: nodeClass!, - }); - - const { isPending } = postTemplateValue; - - const setErrorData = useAlertStore((state) => state.setErrorData); - - const handleRefreshButtonPress = () => { - mutateTemplate( + await mutateTemplate( value, nodeClass!, handleNodeClass, postTemplateValue, setErrorData, - ); + )?.then(() => { + setTimeout(() => { + setRefreshOptions(false); + }, 2000); + }); }; + const formatTooltipContent = (option: string, index: number) => { + if (!optionsMetaData?.[index]) return option; + + const metadata = optionsMetaData[index]; + const metadataEntries = Object.entries(metadata) + .filter(([key, value]) => value !== null && key !== "icon") + .map(([key, value]) => { + const displayValue = + typeof value === "string" && value.length > 20 + ? `${value.substring(0, 30)}...` + : String(value); + return `${key}: ${displayValue}`; + }); + + return metadataEntries.length > 0 + ? `${firstWord}: ${option}\n${metadataEntries.join("\n")}` + : option; + }; + + // Effects useEffect(() => { if (disabled && value !== "") { onSelect("", undefined, true); @@ -109,61 +141,71 @@ export default function Dropdown({ } }, [open]); + // Render helper functions + + const renderLoadingButton = () => ( + + ); + const renderTriggerButton = () => ( - - - + > + + {optionsMetaData?.[ + filteredOptions.findIndex((option) => option === value) + ]?.icon && ( + option === value) + ]?.icon + } + className="h-4 w-4" + /> + )} + {value && filteredOptions.includes(value) ? value : placeholderName}{" "} + + + + + ); const renderSearchInput = () => ( @@ -211,10 +253,7 @@ export default function Dropdown({
Refresh list
@@ -223,87 +262,118 @@ export default function Dropdown({ setOpenDialog(false)} + onClose={() => { + setOpenDialog(false); + setOpen(false); + }} nodeId={nodeId!} + name={name!} + nodeClass={nodeClass!} /> ); const renderOptionsList = () => ( - No values found. - {filteredOptions?.map((option, index) => ( - -
- { - onSelect(currentValue); - setOpen(false); - }} - className="items-center" - data-testid={`${option}-${index}-option`} - > -
- {optionsMetaData?.[index]?.icon ? ( - - ) : null} -
0, - "w-full pl-2": !optionsMetaData?.[index]?.icon, - })} - > -
{option}
- {optionsMetaData && optionsMetaData?.length > 0 ? ( -
- {Object.entries(optionsMetaData?.[index] || {}) - .filter( - ([key, value]) => value !== null && key !== "icon", + {filteredOptions?.length > 0 ? ( + filteredOptions?.map((option, index) => ( + +
+ { + onSelect(currentValue); + setOpen(false); + }} + className="items-center" + data-testid={`${option}-${index}-option`} + > +
+ {optionsMetaData && optionsMetaData.length > 0 && ( + + )} +
0, + "w-full pl-2": !optionsMetaData?.[index]?.icon, + })} + > +
+ {option}{" "} + + + +
+ {optionsMetaData && optionsMetaData?.length > 0 ? ( +
+ {Object.entries( + filterMetadataKeys(optionsMetaData?.[index] || {}), ) - .map(([key, value], i, arr) => ( -
- {i > 0 && ( - - )} + .filter( + ([key, value]) => + value !== null && key !== "icon", + ) + .map(([key, value], i, arr) => (
{`${String(value)} ${key}`}
-
- ))} -
- ) : ( -
- -
- )} + > + {i > 0 && ( + + )} +
{`${String(value)} ${key}`}
+
+ ))} +
+ ) : ( +
+ +
+ )} +
-
- -
- - ))} +
+
+
+ )) + ) : ( + + No options found + + )}
{dialogInputs && dialogInputs?.fields && renderCustomOptionDialog()} @@ -320,12 +390,13 @@ export default function Dropdown({ } > - {renderSearchInput()} + {filteredOptions?.length > 0 && renderSearchInput()} {renderOptionsList()} ); + // Loading state if (Object.keys(options).length === 0 && !combobox && isLoading) { return (
@@ -334,10 +405,13 @@ export default function Dropdown({ ); } + // Main render return ( {} : setOpen}> {children ? ( {children} + ) : refreshOptions || isLoading ? ( + renderLoadingButton() ) : ( renderTriggerButton() )} diff --git a/src/frontend/src/components/core/parameterRenderComponent/types.ts b/src/frontend/src/components/core/parameterRenderComponent/types.ts index 920726de9..fcf71f7b1 100644 --- a/src/frontend/src/components/core/parameterRenderComponent/types.ts +++ b/src/frontend/src/components/core/parameterRenderComponent/types.ts @@ -15,6 +15,7 @@ export type BaseInputProps = { readonly?: boolean; placeholder?: string; isToolMode?: boolean; + tooltip?: string; metadata?: any; nodeId?: string; }; diff --git a/src/frontend/src/style/index.css b/src/frontend/src/style/index.css index 346db6251..900f83aee 100644 --- a/src/frontend/src/style/index.css +++ b/src/frontend/src/style/index.css @@ -196,6 +196,8 @@ --placeholder-foreground: 240 4% 46%; /* hsl(240, 4%, 46%) */ --canvas: 0 0% 0%; /* hsl(0, 0%, 0%) */ --canvas-dot: 240 5.3% 26.1%; /* hsl(240, 5.3%, 26.1%) */ + --accent-amber: 26 90% 37%; /* hsl(26, 90%, 37%) */ + --accent-amber-foreground: 26 90% 37%; /* hsl(26, 90%, 37%) */ --accent-emerald: 164 86% 16%; /* hsl(164, 86%, 16%) */ --accent-emerald-foreground: 158 64% 52%; /* hsl(158, 64%, 52%) */ --accent-emerald-hover: 163.1 88.1% 19.8%; /* hsl(163.1, 88.1%, 19.8%) */ diff --git a/src/frontend/src/utils/stringManipulation.ts b/src/frontend/src/utils/stringManipulation.ts index 246a48712..3ed01a6c1 100644 --- a/src/frontend/src/utils/stringManipulation.ts +++ b/src/frontend/src/utils/stringManipulation.ts @@ -113,3 +113,24 @@ export function parseString( return result; } + +export const getStatusColor = (status: string): string => { + const amberStatuses = [ + "initializing", + "pending", + "hibernating", + "hiberated", + "maintenance", + "parked", + ]; + + if (amberStatuses.includes(status?.toLowerCase())) { + return "text-accent-amber-foreground"; + } + + if (status?.toLowerCase() === "terminating") { + return "red-500"; + } + + return ""; +}; diff --git a/src/frontend/src/utils/utils.ts b/src/frontend/src/utils/utils.ts index 9eef7f920..d13d3cda9 100644 --- a/src/frontend/src/utils/utils.ts +++ b/src/frontend/src/utils/utils.ts @@ -741,7 +741,7 @@ export const formatName = (name) => { .join(" "); const firstWord = - formattedName.split(" ")[0].charAt(0).toUpperCase() + + formattedName.split(" ")[0].charAt(0) + formattedName.split(" ")[0].slice(1); return { formattedName, firstWord }; diff --git a/src/frontend/tailwind.config.mjs b/src/frontend/tailwind.config.mjs index dfcd8cb64..28087fefa 100644 --- a/src/frontend/tailwind.config.mjs +++ b/src/frontend/tailwind.config.mjs @@ -175,6 +175,10 @@ const config = { DEFAULT: "hsl(var(--accent))", foreground: "hsl(var(--accent-foreground))", }, + "accent-amber": { + DEFAULT: "hsl(var(--accent-amber))", + foreground: "hsl(var(--accent-amber-foreground))", + }, "accent-emerald": { DEFAULT: "hsl(var(--accent-emerald))", foreground: "hsl(var(--accent-emerald-foreground))",