diff --git a/src/backend/base/langflow/base/vectorstores/model.py b/src/backend/base/langflow/base/vectorstores/model.py index 191e45ad0..ee157c037 100644 --- a/src/backend/base/langflow/base/vectorstores/model.py +++ b/src/backend/base/langflow/base/vectorstores/model.py @@ -2,12 +2,10 @@ from abc import abstractmethod from functools import wraps from typing import TYPE_CHECKING -from loguru import logger - from langflow.custom import Component from langflow.field_typing import Text, VectorStore from langflow.helpers.data import docs_to_data -from langflow.io import Output +from langflow.io import DataInput, MultilineInput, Output from langflow.schema import Data if TYPE_CHECKING: @@ -53,6 +51,19 @@ class LCVectorStoreComponent(Component): raise TypeError(msg) trace_type = "retriever" + + inputs = [ + MultilineInput( + name="search_query", + display_name="Search Query", + tool_mode=True, + ), + DataInput( + name="ingest_data", + display_name="Ingest Data", + ), + ] + outputs = [ Output( display_name="Search Results", @@ -122,9 +133,9 @@ class LCVectorStoreComponent(Component): vector_store = self.build_vector_store() self._cached_vector_store = vector_store - logger.debug(f"Search input: {search_query}") - logger.debug(f"Search type: {self.search_type}") - logger.debug(f"Number of results: {self.number_of_results}") + self.log(f"Search input: {search_query}") + self.log(f"Search type: {self.search_type}") + self.log(f"Number of results: {self.number_of_results}") search_results = self.search_with_vector_store( search_query, self.search_type, vector_store, k=self.number_of_results diff --git a/src/backend/base/langflow/components/vectorstores/astradb.py b/src/backend/base/langflow/components/vectorstores/astradb.py index e922d9660..fb68e3b4c 100644 --- a/src/backend/base/langflow/components/vectorstores/astradb.py +++ b/src/backend/base/langflow/components/vectorstores/astradb.py @@ -1,7 +1,7 @@ import os from collections import defaultdict -from astrapy import DataAPIClient +from astrapy import AstraDBAdmin, DataAPIClient from astrapy.admin import parse_api_endpoint from langchain_astradb import AstraDBVectorStore @@ -14,7 +14,6 @@ from langflow.io import ( DropdownInput, HandleInput, IntInput, - MultilineInput, SecretStrInput, StrInput, ) @@ -24,52 +23,29 @@ from langflow.utils.version import get_version_info class AstraDBVectorStoreComponent(LCVectorStoreComponent): display_name: str = "Astra DB" - description: str = "Implementation of Vector Store using Astra DB with search capabilities" - documentation: str = "https://docs.langflow.org/starter-projects-vector-store-rag" + description: str = "Ingest and search documents in Astra DB" + documentation: str = "https://docs.datastax.com/en/langflow/astra-components.html" name = "AstraDB" icon: str = "AstraDB" _cached_vector_store: AstraDBVectorStore | None = None - VECTORIZE_PROVIDERS_MAPPING = defaultdict( - list, - { - "Azure OpenAI": [ - "azureOpenAI", - ["text-embedding-3-small", "text-embedding-3-large", "text-embedding-ada-002"], - ], - "Hugging Face - Dedicated": ["huggingfaceDedicated", ["endpoint-defined-model"]], - "Hugging Face - Serverless": [ - "huggingface", - [ - "sentence-transformers/all-MiniLM-L6-v2", - "intfloat/multilingual-e5-large", - "intfloat/multilingual-e5-large-instruct", - "BAAI/bge-small-en-v1.5", - "BAAI/bge-base-en-v1.5", - "BAAI/bge-large-en-v1.5", - ], - ], - "Jina AI": [ - "jinaAI", - [ - "jina-embeddings-v2-base-en", - "jina-embeddings-v2-base-de", - "jina-embeddings-v2-base-es", - "jina-embeddings-v2-base-code", - "jina-embeddings-v2-base-zh", - ], - ], - "Mistral AI": ["mistral", ["mistral-embed"]], - "Nvidia": ["nvidia", ["NV-Embed-QA"]], - "OpenAI": ["openai", ["text-embedding-3-small", "text-embedding-3-large", "text-embedding-ada-002"]], - "Upstage": ["upstageAI", ["solar-embedding-1-large"]], - "Voyage AI": [ - "voyageAI", - ["voyage-large-2-instruct", "voyage-law-2", "voyage-code-2", "voyage-large-2", "voyage-2"], - ], - }, - ) + base_inputs = LCVectorStoreComponent.inputs + if "search_query" not in [input_.name for input_ in base_inputs]: + base_inputs.append( + MessageTextInput( + name="search_query", + display_name="Search Query", + tool_mode=True, + ) + ) + if "ingest_data" not in [input_.name for input_ in base_inputs]: + base_inputs.append( + DataInput( + name="ingest_data", + display_name="Ingest Data", + ) + ) inputs = [ SecretStrInput( @@ -81,13 +57,15 @@ class AstraDBVectorStoreComponent(LCVectorStoreComponent): advanced=os.getenv("ASTRA_ENHANCED", "false").lower() == "true", real_time_refresh=True, ), - SecretStrInput( + DropdownInput( name="api_endpoint", - display_name="Database" if os.getenv("ASTRA_ENHANCED", "false").lower() == "true" else "API Endpoint", - info="API endpoint URL for the Astra DB service.", - value="ASTRA_DB_API_ENDPOINT", + display_name="Database", + info="The Astra DB Database to use.", required=True, + refresh_button=True, real_time_refresh=True, + options=["Default database"], + value="Default database", ), DropdownInput( name="collection_name", @@ -126,15 +104,7 @@ class AstraDBVectorStoreComponent(LCVectorStoreComponent): input_types=["Embeddings"], info="Allows an embedding model configuration.", ), - DataInput( - name="ingest_data", - display_name="Ingest Data", - ), - MultilineInput( - name="search_input", - display_name="Search Query", - tool_mode=True, - ), + *base_inputs, IntInput( name="number_of_results", display_name="Number of Search Results", @@ -212,26 +182,18 @@ class AstraDBVectorStoreComponent(LCVectorStoreComponent): return build_config - def update_providers_mapping(self): - # If we don't have token or api_endpoint, we can't fetch the list of providers - if not self.token or not self.api_endpoint: - self.log("Astra DB token and API endpoint are required to fetch the list of Vectorize providers.") - - return self.VECTORIZE_PROVIDERS_MAPPING - + def get_vectorize_providers(self): try: self.log("Dynamically updating list of Vectorize providers.") # Get the admin object - client = DataAPIClient(token=self.token) - admin = client.get_admin() + admin = AstraDBAdmin(token=self.token) + db_admin = admin.get_database_admin(self.get_api_endpoint()) - # Get the embedding providers - db_admin = admin.get_database_admin(self.api_endpoint) + # Get the list of embedding providers embedding_providers = db_admin.find_embedding_providers().as_dict() vectorize_providers_mapping = {} - # Map the provider display name to the provider key and models for provider_key, provider_data in embedding_providers["embeddingProviders"].items(): display_name = provider_data["displayName"] @@ -244,14 +206,37 @@ class AstraDBVectorStoreComponent(LCVectorStoreComponent): except Exception as e: # noqa: BLE001 self.log(f"Error fetching Vectorize providers: {e}") - return self.VECTORIZE_PROVIDERS_MAPPING + return {} + + def get_database_list(self): + # Get the admin object + db_admin = AstraDBAdmin(token=self.token) + db_list = list(db_admin.list_databases()) + + # Generate the api endpoint for each database + return {db.info.name: f"https://{db.info.id}-{db.info.region}.apps.astra.datastax.com" for db in db_list} + + def get_api_endpoint(self): + # Get the database name (or endpoint) + database = self.api_endpoint + + # If the database is not set, get the first database in the list + if not database or database == "Default database": + database, _ = next(iter(self.get_database_list().items())) + + # If the database is a URL, return it + if database.startswith("https://"): + return database + + # Otherwise, get the URL from the database list + return self.get_database_list().get(database) def get_database(self): try: client = DataAPIClient(token=self.token) return client.get_database( - self.api_endpoint, + api_endpoint=self.get_api_endpoint(), token=self.token, ) except Exception as e: # noqa: BLE001 @@ -259,13 +244,25 @@ class AstraDBVectorStoreComponent(LCVectorStoreComponent): return None + def _initialize_database_options(self): + if not self.token: + return ["Default database"] + try: + databases = ["Default database", *list(self.get_database_list().keys())] + except Exception as e: # noqa: BLE001 + self.log(f"Error fetching databases: {e}") + + return ["Default database"] + + return databases + def _initialize_collection_options(self): database = self.get_database() if database is None: return ["+ Create new collection"] try: - collections = [collection.name for collection in database.list_collections()] + collections = [collection.name for collection in database.list_collections(keyspace=self.keyspace or None)] except Exception as e: # noqa: BLE001 self.log(f"Error fetching collections: {e}") @@ -289,7 +286,7 @@ class AstraDBVectorStoreComponent(LCVectorStoreComponent): collection_name = self.get_collection_choice() try: - collection = database.get_collection(collection_name) + collection = database.get_collection(collection_name, keyspace=self.keyspace or None) collection_options = collection.options() except Exception as _: # noqa: BLE001 return None @@ -297,8 +294,17 @@ class AstraDBVectorStoreComponent(LCVectorStoreComponent): return collection_options.vector def update_build_config(self, build_config: dict, field_value: str, field_name: str | None = None): - # Refresh the collection name options - build_config["collection_name"]["options"] = self._initialize_collection_options() + # Always attempt to update the database list + if field_name in ["token", "api_endpoint", "collection_name"]: + # Update the database selector + build_config["api_endpoint"]["options"] = self._initialize_database_options() + + # Set the default API endpoint if not set + if build_config["api_endpoint"]["value"] == "Default database": + build_config["api_endpoint"]["value"] = build_config["api_endpoint"]["options"][0] + + # Update the collection selector + build_config["collection_name"]["options"] = self._initialize_collection_options() # Update the choice of embedding model based on collection name if field_name == "collection_name": @@ -368,7 +374,7 @@ class AstraDBVectorStoreComponent(LCVectorStoreComponent): build_config["embedding_model"]["advanced"] = True # Update the providers mapping - vectorize_providers = self.update_providers_mapping() + vectorize_providers = self.get_vectorize_providers() new_parameter = DropdownInput( name="embedding_provider", @@ -402,7 +408,7 @@ class AstraDBVectorStoreComponent(LCVectorStoreComponent): ) # Update the providers mapping - vectorize_providers = self.update_providers_mapping() + vectorize_providers = self.get_vectorize_providers() model_options = vectorize_providers[field_value][1] new_parameter = DropdownInput( @@ -481,7 +487,7 @@ class AstraDBVectorStoreComponent(LCVectorStoreComponent): setattr(self, attribute, None) # Fetch values from kwargs if any self.* attributes are None - provider_mapping = self.update_providers_mapping() + provider_mapping = self.get_vectorize_providers() provider_value = provider_mapping.get(self.embedding_provider, [None])[0] or kwargs.get("embedding_provider") model_name = self.model or kwargs.get("model") authentication = {**(self.z_04_authentication or {}), **kwargs.get("z_04_authentication", {})} @@ -525,7 +531,7 @@ class AstraDBVectorStoreComponent(LCVectorStoreComponent): raise ImportError(msg) from e # Initialize parameters based on the collection name - is_new_collection = self.collection_name == "+ Create new collection" + is_new_collection = self.get_collection_options() is None # Get the embedding model embedding_params = {"embedding": self.embedding_model} if self.embedding_choice == "Embedding Model" else {} @@ -553,11 +559,7 @@ class AstraDBVectorStoreComponent(LCVectorStoreComponent): } # Get the running environment for Langflow - environment = ( - parse_api_endpoint(getattr(self, "api_endpoint", None)).environment - if getattr(self, "api_endpoint", None) - else None - ) + environment = parse_api_endpoint(self.get_api_endpoint()).environment if self.get_api_endpoint() else None # Get Langflow version and platform information __version__ = get_version_info()["version"] @@ -577,16 +579,16 @@ class AstraDBVectorStoreComponent(LCVectorStoreComponent): vector_store = AstraDBVectorStore( # Astra DB Authentication Parameters token=self.token, - api_endpoint=self.api_endpoint, + api_endpoint=self.get_api_endpoint(), namespace=self.keyspace or None, collection_name=self.get_collection_choice(), environment=environment, # Astra DB Usage Tracking Parameters ext_callers=[(f"{langflow_prefix}langflow", __version__)], # Astra DB Vector Store Parameters - **autodetect_params, - **embedding_params, - **self.astradb_vectorstore_kwargs, + **autodetect_params or {}, + **embedding_params or {}, + **self.astradb_vectorstore_kwargs or {}, ) except Exception as e: msg = f"Error initializing AstraDBVectorStore: {e}" @@ -623,7 +625,7 @@ class AstraDBVectorStoreComponent(LCVectorStoreComponent): return "similarity" def _build_search_args(self): - query = self.search_input if isinstance(self.search_input, str) and self.search_input.strip() else None + query = self.search_query if isinstance(self.search_query, str) and self.search_query.strip() else None if query: args = { @@ -648,7 +650,7 @@ class AstraDBVectorStoreComponent(LCVectorStoreComponent): def search_documents(self, vector_store=None) -> list[Data]: vector_store = vector_store or self.build_vector_store() - self.log(f"Search input: {self.search_input}") + self.log(f"Search input: {self.search_query}") self.log(f"Search type: {self.search_type}") self.log(f"Number of results: {self.number_of_results}") diff --git a/src/backend/base/langflow/components/vectorstores/astradb_graph.py b/src/backend/base/langflow/components/vectorstores/astradb_graph.py index 3e5b398d2..7f279a833 100644 --- a/src/backend/base/langflow/components/vectorstores/astradb_graph.py +++ b/src/backend/base/langflow/components/vectorstores/astradb_graph.py @@ -2,19 +2,16 @@ import os import orjson from astrapy.admin import parse_api_endpoint -from loguru import logger from langflow.base.vectorstores.model import LCVectorStoreComponent, check_cached_vector_store from langflow.helpers import docs_to_data from langflow.inputs import ( BoolInput, - DataInput, DictInput, DropdownInput, FloatInput, HandleInput, IntInput, - MultilineInput, SecretStrInput, StrInput, ) @@ -24,7 +21,6 @@ from langflow.schema import Data class AstraDBGraphVectorStoreComponent(LCVectorStoreComponent): display_name: str = "Astra DB Graph" description: str = "Implementation of Graph Vector Store using Astra DB" - documentation: str = "https://python.langchain.com/api_reference/astradb/graph_vectorstores/langchain_astradb.graph_vectorstores.AstraDBGraphVectorStore.html" name = "AstraDBGraph" icon: str = "AstraDB" @@ -56,15 +52,7 @@ class AstraDBGraphVectorStoreComponent(LCVectorStoreComponent): info="Metadata key used for incoming links.", advanced=True, ), - MultilineInput( - name="search_input", - display_name="Search Input", - ), - DataInput( - name="ingest_data", - display_name="Ingest Data", - is_list=True, - ), + *LCVectorStoreComponent.inputs, StrInput( name="keyspace", display_name="Keyspace", @@ -129,14 +117,14 @@ class AstraDBGraphVectorStoreComponent(LCVectorStoreComponent): display_name="Metadata Indexing Include", info="Optional list of metadata fields to include in the indexing.", advanced=True, - is_list=True, + list=True, ), StrInput( name="metadata_indexing_exclude", display_name="Metadata Indexing Exclude", info="Optional list of metadata fields to exclude from the indexing.", advanced=True, - is_list=True, + list=True, ), StrInput( name="collection_indexing_policy", @@ -205,7 +193,7 @@ class AstraDBGraphVectorStoreComponent(LCVectorStoreComponent): raise ValueError(msg) from e try: - logger.debug(f"Initializing Graph Vector Store {self.collection_name}") + self.log(f"Initializing Graph Vector Store {self.collection_name}") vector_store = AstraDBGraphVectorStore( embedding=self.embedding_model, @@ -232,7 +220,7 @@ class AstraDBGraphVectorStoreComponent(LCVectorStoreComponent): msg = f"Error initializing AstraDBGraphVectorStore: {e}" raise ValueError(msg) from e - logger.debug(f"Vector Store initialized: {vector_store.astra_env.collection_name}") + self.log(f"Vector Store initialized: {vector_store.astra_env.collection_name}") self._add_documents_to_vector_store(vector_store) return vector_store @@ -247,14 +235,14 @@ class AstraDBGraphVectorStoreComponent(LCVectorStoreComponent): raise TypeError(msg) if documents: - logger.debug(f"Adding {len(documents)} documents to the Vector Store.") + self.log(f"Adding {len(documents)} documents to the Vector Store.") try: vector_store.add_documents(documents) except Exception as e: msg = f"Error adding documents to AstraDBGraphVectorStore: {e}" raise ValueError(msg) from e else: - logger.debug("No documents to add to the Vector Store.") + self.log("No documents to add to the Vector Store.") def _map_search_type(self) -> str: match self.search_type: @@ -287,21 +275,21 @@ class AstraDBGraphVectorStoreComponent(LCVectorStoreComponent): if not vector_store: vector_store = self.build_vector_store() - logger.debug("Searching for documents in AstraDBGraphVectorStore.") - logger.debug(f"Search input: {self.search_input}") - logger.debug(f"Search type: {self.search_type}") - logger.debug(f"Number of results: {self.number_of_results}") + self.log("Searching for documents in AstraDBGraphVectorStore.") + self.log(f"Search query: {self.search_query}") + self.log(f"Search type: {self.search_type}") + self.log(f"Number of results: {self.number_of_results}") - if self.search_input and isinstance(self.search_input, str) and self.search_input.strip(): + if self.search_query and isinstance(self.search_query, str) and self.search_query.strip(): try: search_type = self._map_search_type() search_args = self._build_search_args() - docs = vector_store.search(query=self.search_input, search_type=search_type, **search_args) + docs = vector_store.search(query=self.search_query, search_type=search_type, **search_args) # Drop links from the metadata. At this point the links don't add any value for building the # context and haven't been restored to json which causes the conversion to fail. - logger.debug("Removing links from metadata.") + self.log("Removing links from metadata.") for doc in docs: if "links" in doc.metadata: doc.metadata.pop("links") @@ -310,15 +298,15 @@ class AstraDBGraphVectorStoreComponent(LCVectorStoreComponent): msg = f"Error performing search in AstraDBGraphVectorStore: {e}" raise ValueError(msg) from e - logger.debug(f"Retrieved documents: {len(docs)}") + self.log(f"Retrieved documents: {len(docs)}") data = docs_to_data(docs) - logger.debug(f"Converted documents to data: {len(data)}") + self.log(f"Converted documents to data: {len(data)}") self.status = data return data - logger.debug("No search input provided. Skipping search.") + self.log("No search input provided. Skipping search.") return [] def get_retriever_kwargs(self): diff --git a/src/backend/base/langflow/components/vectorstores/cassandra.py b/src/backend/base/langflow/components/vectorstores/cassandra.py index b2db432e9..1f13abdda 100644 --- a/src/backend/base/langflow/components/vectorstores/cassandra.py +++ b/src/backend/base/langflow/components/vectorstores/cassandra.py @@ -1,16 +1,13 @@ from langchain_community.vectorstores import Cassandra -from loguru import logger from langflow.base.vectorstores.model import LCVectorStoreComponent, check_cached_vector_store from langflow.helpers.data import docs_to_data from langflow.inputs import BoolInput, DictInput, FloatInput from langflow.io import ( - DataInput, DropdownInput, HandleInput, IntInput, MessageTextInput, - MultilineInput, SecretStrInput, ) from langflow.schema import Data @@ -77,14 +74,9 @@ class CassandraVectorStoreComponent(LCVectorStoreComponent): display_name="Cluster arguments", info="Optional dictionary of additional keyword arguments for the Cassandra cluster.", advanced=True, - is_list=True, - ), - MultilineInput(name="search_query", display_name="Search Query"), - DataInput( - name="ingest_data", - display_name="Ingest Data", - is_list=True, + list=True, ), + *LCVectorStoreComponent.inputs, HandleInput(name="embedding", display_name="Embedding", input_types=["Embeddings"]), IntInput( name="number_of_results", @@ -114,7 +106,7 @@ class CassandraVectorStoreComponent(LCVectorStoreComponent): display_name="Search Metadata Filter", info="Optional dictionary of filters to apply to the search query.", advanced=True, - is_list=True, + list=True, ), MessageTextInput( name="body_search", @@ -184,7 +176,7 @@ class CassandraVectorStoreComponent(LCVectorStoreComponent): setup_mode = SetupMode.ASYNC if documents: - logger.debug(f"Adding {len(documents)} documents to the Vector Store.") + self.log(f"Adding {len(documents)} documents to the Vector Store.") table = Cassandra.from_documents( documents=documents, embedding=self.embedding, @@ -195,7 +187,7 @@ class CassandraVectorStoreComponent(LCVectorStoreComponent): body_index_options=body_index_options, ) else: - logger.debug("No documents to add to the Vector Store.") + self.log("No documents to add to the Vector Store.") table = Cassandra( embedding=self.embedding, table_name=self.table_name, @@ -216,16 +208,16 @@ class CassandraVectorStoreComponent(LCVectorStoreComponent): def search_documents(self) -> list[Data]: vector_store = self.build_vector_store() - logger.debug(f"Search input: {self.search_query}") - logger.debug(f"Search type: {self.search_type}") - logger.debug(f"Number of results: {self.number_of_results}") + self.log(f"Search input: {self.search_query}") + self.log(f"Search type: {self.search_type}") + self.log(f"Number of results: {self.number_of_results}") if self.search_query and isinstance(self.search_query, str) and self.search_query.strip(): try: search_type = self._map_search_type() search_args = self._build_search_args() - logger.debug(f"Search args: {search_args}") + self.log(f"Search args: {search_args}") docs = vector_store.search(query=self.search_query, search_type=search_type, **search_args) except KeyError as e: @@ -237,7 +229,7 @@ class CassandraVectorStoreComponent(LCVectorStoreComponent): raise ValueError(msg) from e raise - logger.debug(f"Retrieved documents: {len(docs)}") + self.log(f"Retrieved documents: {len(docs)}") data = docs_to_data(docs) self.status = data diff --git a/src/backend/base/langflow/components/vectorstores/cassandra_graph.py b/src/backend/base/langflow/components/vectorstores/cassandra_graph.py index 740ed003e..1216a90f0 100644 --- a/src/backend/base/langflow/components/vectorstores/cassandra_graph.py +++ b/src/backend/base/langflow/components/vectorstores/cassandra_graph.py @@ -1,18 +1,15 @@ from uuid import UUID from langchain_community.graph_vectorstores import CassandraGraphVectorStore -from loguru import logger from langflow.base.vectorstores.model import LCVectorStoreComponent, check_cached_vector_store from langflow.helpers.data import docs_to_data from langflow.inputs import DictInput, FloatInput from langflow.io import ( - DataInput, DropdownInput, HandleInput, IntInput, MessageTextInput, - MultilineInput, SecretStrInput, ) from langflow.schema import Data @@ -21,7 +18,6 @@ from langflow.schema import Data class CassandraGraphVectorStoreComponent(LCVectorStoreComponent): display_name = "Cassandra Graph" description = "Cassandra Graph Vector Store" - documentation = "https://python.langchain.com/v0.2/api_reference/community/graph_vectorstores.html" name = "CassandraGraph" icon = "Cassandra" @@ -66,14 +62,9 @@ class CassandraGraphVectorStoreComponent(LCVectorStoreComponent): display_name="Cluster arguments", info="Optional dictionary of additional keyword arguments for the Cassandra cluster.", advanced=True, - is_list=True, - ), - MultilineInput(name="search_query", display_name="Search Query"), - DataInput( - name="ingest_data", - display_name="Ingest Data", - is_list=True, + list=True, ), + *LCVectorStoreComponent.inputs, HandleInput(name="embedding", display_name="Embedding", input_types=["Embeddings"]), IntInput( name="number_of_results", @@ -116,7 +107,7 @@ class CassandraGraphVectorStoreComponent(LCVectorStoreComponent): display_name="Search Metadata Filter", info="Optional dictionary of filters to apply to the search query.", advanced=True, - is_list=True, + list=True, ), ] @@ -164,7 +155,7 @@ class CassandraGraphVectorStoreComponent(LCVectorStoreComponent): setup_mode = SetupMode.OFF if self.setup_mode == "Off" else SetupMode.SYNC if documents: - logger.debug(f"Adding {len(documents)} documents to the Vector Store.") + self.log(f"Adding {len(documents)} documents to the Vector Store.") store = CassandraGraphVectorStore.from_documents( documents=documents, embedding=self.embedding, @@ -172,7 +163,7 @@ class CassandraGraphVectorStoreComponent(LCVectorStoreComponent): keyspace=self.keyspace, ) else: - logger.debug("No documents to add to the Vector Store.") + self.log("No documents to add to the Vector Store.") store = CassandraGraphVectorStore( embedding=self.embedding, node_table=self.table_name, @@ -195,16 +186,16 @@ class CassandraGraphVectorStoreComponent(LCVectorStoreComponent): def search_documents(self) -> list[Data]: vector_store = self.build_vector_store() - logger.debug(f"Search input: {self.search_query}") - logger.debug(f"Search type: {self.search_type}") - logger.debug(f"Number of results: {self.number_of_results}") + self.log(f"Search input: {self.search_query}") + self.log(f"Search type: {self.search_type}") + self.log(f"Number of results: {self.number_of_results}") if self.search_query and isinstance(self.search_query, str) and self.search_query.strip(): try: search_type = self._map_search_type() search_args = self._build_search_args() - logger.debug(f"Search args: {search_args}") + self.log(f"Search args: {search_args}") docs = vector_store.search(query=self.search_query, search_type=search_type, **search_args) except KeyError as e: @@ -216,7 +207,7 @@ class CassandraGraphVectorStoreComponent(LCVectorStoreComponent): raise ValueError(msg) from e raise - logger.debug(f"Retrieved documents: {len(docs)}") + self.log(f"Retrieved documents: {len(docs)}") data = docs_to_data(docs) self.status = data diff --git a/src/backend/base/langflow/components/vectorstores/chroma.py b/src/backend/base/langflow/components/vectorstores/chroma.py index 39d4c7070..c009443b0 100644 --- a/src/backend/base/langflow/components/vectorstores/chroma.py +++ b/src/backend/base/langflow/components/vectorstores/chroma.py @@ -2,11 +2,10 @@ from copy import deepcopy from chromadb.config import Settings from langchain_chroma import Chroma -from loguru import logger from langflow.base.vectorstores.model import LCVectorStoreComponent, check_cached_vector_store from langflow.base.vectorstores.utils import chroma_collection_to_data -from langflow.io import BoolInput, DataInput, DropdownInput, HandleInput, IntInput, MultilineInput, StrInput +from langflow.io import BoolInput, DropdownInput, HandleInput, IntInput, StrInput from langflow.schema import Data @@ -15,7 +14,6 @@ class ChromaVectorStoreComponent(LCVectorStoreComponent): display_name: str = "Chroma DB" description: str = "Chroma Vector Store with search capabilities" - documentation = "https://python.langchain.com/docs/integrations/vectorstores/chroma" name = "Chroma" icon = "Chroma" @@ -29,15 +27,7 @@ class ChromaVectorStoreComponent(LCVectorStoreComponent): name="persist_directory", display_name="Persist Directory", ), - MultilineInput( - name="search_query", - display_name="Search Query", - ), - DataInput( - name="ingest_data", - display_name="Ingest Data", - is_list=True, - ), + *LCVectorStoreComponent.inputs, HandleInput(name="embedding", display_name="Embedding", input_types=["Embeddings"]), StrInput( name="chroma_server_cors_allow_origins", @@ -153,7 +143,7 @@ class ChromaVectorStoreComponent(LCVectorStoreComponent): raise TypeError(msg) if documents and self.embedding is not None: - logger.debug(f"Adding {len(documents)} documents to the Vector Store.") + self.log(f"Adding {len(documents)} documents to the Vector Store.") vector_store.add_documents(documents) else: - logger.debug("No documents to add to the Vector Store.") + self.log("No documents to add to the Vector Store.") diff --git a/src/backend/base/langflow/components/vectorstores/clickhouse.py b/src/backend/base/langflow/components/vectorstores/clickhouse.py index cfd81ff77..4532f241c 100644 --- a/src/backend/base/langflow/components/vectorstores/clickhouse.py +++ b/src/backend/base/langflow/components/vectorstores/clickhouse.py @@ -4,12 +4,10 @@ from langflow.base.vectorstores.model import LCVectorStoreComponent, check_cache from langflow.helpers.data import docs_to_data from langflow.inputs import BoolInput, FloatInput from langflow.io import ( - DataInput, DictInput, DropdownInput, HandleInput, IntInput, - MultilineInput, SecretStrInput, StrInput, ) @@ -19,7 +17,6 @@ from langflow.schema import Data class ClickhouseVectorStoreComponent(LCVectorStoreComponent): display_name = "Clickhouse" description = "Clickhouse Vector Store with search capabilities" - documentation = "https://python.langchain.com/v0.2/docs/integrations/vectorstores/clickhouse/" name = "Clickhouse" icon = "Clickhouse" @@ -54,8 +51,7 @@ class ClickhouseVectorStoreComponent(LCVectorStoreComponent): ), StrInput(name="index_param", display_name="Param of the index", value="'L2Distance',100", advanced=True), DictInput(name="index_query_params", display_name="index query params", advanced=True), - MultilineInput(name="search_query", display_name="Search Query"), - DataInput(name="ingest_data", display_name="Ingest Data", is_list=True), + *LCVectorStoreComponent.inputs, HandleInput(name="embedding", display_name="Embedding", input_types=["Embeddings"]), IntInput( name="number_of_results", diff --git a/src/backend/base/langflow/components/vectorstores/couchbase.py b/src/backend/base/langflow/components/vectorstores/couchbase.py index 622d582a7..fce34743f 100644 --- a/src/backend/base/langflow/components/vectorstores/couchbase.py +++ b/src/backend/base/langflow/components/vectorstores/couchbase.py @@ -4,14 +4,13 @@ from langchain_community.vectorstores import CouchbaseVectorStore from langflow.base.vectorstores.model import LCVectorStoreComponent, check_cached_vector_store from langflow.helpers.data import docs_to_data -from langflow.io import DataInput, HandleInput, IntInput, MultilineInput, SecretStrInput, StrInput +from langflow.io import HandleInput, IntInput, SecretStrInput, StrInput from langflow.schema import Data class CouchbaseVectorStoreComponent(LCVectorStoreComponent): display_name = "Couchbase" description = "Couchbase Vector Store with search capabilities" - documentation = "https://python.langchain.com/v0.1/docs/integrations/document_loaders/couchbase/" name = "Couchbase" icon = "Couchbase" @@ -25,12 +24,7 @@ class CouchbaseVectorStoreComponent(LCVectorStoreComponent): StrInput(name="scope_name", display_name="Scope Name", required=True), StrInput(name="collection_name", display_name="Collection Name", required=True), StrInput(name="index_name", display_name="Index Name", required=True), - MultilineInput(name="search_query", display_name="Search Query"), - DataInput( - name="ingest_data", - display_name="Ingest Data", - is_list=True, - ), + *LCVectorStoreComponent.inputs, HandleInput(name="embedding", display_name="Embedding", input_types=["Embeddings"]), IntInput( name="number_of_results", diff --git a/src/backend/base/langflow/components/vectorstores/elasticsearch.py b/src/backend/base/langflow/components/vectorstores/elasticsearch.py index 1558a412a..b4627c117 100644 --- a/src/backend/base/langflow/components/vectorstores/elasticsearch.py +++ b/src/backend/base/langflow/components/vectorstores/elasticsearch.py @@ -2,16 +2,13 @@ from typing import Any from langchain.schema import Document from langchain_elasticsearch import ElasticsearchStore -from loguru import logger from langflow.base.vectorstores.model import LCVectorStoreComponent, check_cached_vector_store from langflow.io import ( - DataInput, DropdownInput, FloatInput, HandleInput, IntInput, - MultilineInput, SecretStrInput, StrInput, ) @@ -23,7 +20,6 @@ class ElasticsearchVectorStoreComponent(LCVectorStoreComponent): display_name: str = "Elasticsearch" description: str = "Elasticsearch Vector Store with with advanced, customizable search capabilities." - documentation = "https://python.langchain.com/docs/integrations/vectorstores/elasticsearch" name = "Elasticsearch" icon = "ElasticsearchStore" @@ -47,11 +43,7 @@ class ElasticsearchVectorStoreComponent(LCVectorStoreComponent): value="langflow", info="The index name where the vectors will be stored in Elasticsearch cluster.", ), - MultilineInput( - name="search_input", - display_name="Search Input", - info="Enter a search query. Leave empty to retrieve all documents.", - ), + *LCVectorStoreComponent.inputs, StrInput( name="username", display_name="Username", @@ -72,11 +64,6 @@ class ElasticsearchVectorStoreComponent(LCVectorStoreComponent): "Required for both local and Elastic Cloud setups unless API keys are used." ), ), - DataInput( - name="ingest_data", - display_name="Ingest Data", - is_list=True, - ), HandleInput( name="embedding", display_name="Embedding", @@ -155,7 +142,7 @@ class ElasticsearchVectorStoreComponent(LCVectorStoreComponent): documents.append(data.to_lc_document()) else: error_message = "Vector Store Inputs must be Data objects." - logger.error(error_message) + self.log(error_message) raise TypeError(error_message) return documents @@ -163,10 +150,10 @@ class ElasticsearchVectorStoreComponent(LCVectorStoreComponent): """Adds documents to the Vector Store.""" documents = self._prepare_documents() if documents and self.embedding: - logger.debug(f"Adding {len(documents)} documents to the Vector Store.") + self.log(f"Adding {len(documents)} documents to the Vector Store.") vector_store.add_documents(documents) else: - logger.debug("No documents to add to the Vector Store.") + self.log("No documents to add to the Vector Store.") def search(self, query: str | None = None) -> list[dict[str, Any]]: """Search for similar documents in the vector store or retrieve all documents if no query is provided.""" @@ -180,7 +167,7 @@ class ElasticsearchVectorStoreComponent(LCVectorStoreComponent): search_type = self.search_type.lower() if search_type not in {"similarity", "mmr"}: msg = f"Invalid search type: {self.search_type}" - logger.error(msg) + self.log(msg) raise ValueError(msg) try: if search_type == "similarity": @@ -192,7 +179,7 @@ class ElasticsearchVectorStoreComponent(LCVectorStoreComponent): "Error occurred while querying the Elasticsearch VectorStore," " there is no Data into the VectorStore." ) - logger.exception(msg) + self.log(msg) raise ValueError(msg) from e return [ {"page_content": doc.page_content, "metadata": doc.metadata, "score": score} for doc, score in results @@ -228,7 +215,7 @@ class ElasticsearchVectorStoreComponent(LCVectorStoreComponent): If no search input is provided, retrieve all documents. """ - results = self.search(self.search_input) + results = self.search(self.search_query) retrieved_data = [ Data( text=result["page_content"], diff --git a/src/backend/base/langflow/components/vectorstores/faiss.py b/src/backend/base/langflow/components/vectorstores/faiss.py index 871ffb7ef..a002121aa 100644 --- a/src/backend/base/langflow/components/vectorstores/faiss.py +++ b/src/backend/base/langflow/components/vectorstores/faiss.py @@ -1,9 +1,8 @@ from langchain_community.vectorstores import FAISS -from loguru import logger from langflow.base.vectorstores.model import LCVectorStoreComponent, check_cached_vector_store from langflow.helpers.data import docs_to_data -from langflow.io import BoolInput, DataInput, HandleInput, IntInput, MultilineInput, StrInput +from langflow.io import BoolInput, HandleInput, IntInput, StrInput from langflow.schema import Data @@ -12,7 +11,6 @@ class FaissVectorStoreComponent(LCVectorStoreComponent): display_name: str = "FAISS" description: str = "FAISS Vector Store with search capabilities" - documentation = "https://python.langchain.com/docs/modules/data_connection/vectorstores/integrations/faiss" name = "FAISS" icon = "FAISS" @@ -27,15 +25,7 @@ class FaissVectorStoreComponent(LCVectorStoreComponent): display_name="Persist Directory", info="Path to save the FAISS index. It will be relative to where Langflow is running.", ), - MultilineInput( - name="search_query", - display_name="Search Query", - ), - DataInput( - name="ingest_data", - display_name="Ingest Data", - is_list=True, - ), + *LCVectorStoreComponent.inputs, BoolInput( name="allow_dangerous_deserialization", display_name="Allow Dangerous Deserialization", @@ -93,8 +83,8 @@ class FaissVectorStoreComponent(LCVectorStoreComponent): msg = "Failed to load the FAISS index." raise ValueError(msg) - logger.debug(f"Search input: {self.search_query}") - logger.debug(f"Number of results: {self.number_of_results}") + self.log(f"Search input: {self.search_query}") + self.log(f"Number of results: {self.number_of_results}") if self.search_query and isinstance(self.search_query, str) and self.search_query.strip(): docs = vector_store.similarity_search( @@ -102,11 +92,11 @@ class FaissVectorStoreComponent(LCVectorStoreComponent): k=self.number_of_results, ) - logger.debug(f"Retrieved documents: {len(docs)}") + self.log(f"Retrieved documents: {len(docs)}") data = docs_to_data(docs) - logger.debug(f"Converted documents to data: {len(data)}") - logger.debug(data) + self.log(f"Converted documents to data: {len(data)}") + self.log(data) return data # Return the search results data - logger.debug("No search input provided. Skipping search.") + self.log("No search input provided. Skipping search.") return [] diff --git a/src/backend/base/langflow/components/vectorstores/hcd.py b/src/backend/base/langflow/components/vectorstores/hcd.py index 488a586d9..8b12b9424 100644 --- a/src/backend/base/langflow/components/vectorstores/hcd.py +++ b/src/backend/base/langflow/components/vectorstores/hcd.py @@ -1,11 +1,8 @@ -from loguru import logger - from langflow.base.vectorstores.model import LCVectorStoreComponent, check_cached_vector_store from langflow.helpers import docs_to_data from langflow.inputs import DictInput, FloatInput from langflow.io import ( BoolInput, - DataInput, DropdownInput, HandleInput, IntInput, @@ -19,7 +16,6 @@ from langflow.schema import Data class HCDVectorStoreComponent(LCVectorStoreComponent): display_name: str = "Hyper-Converged Database" description: str = "Implementation of Vector Store using Hyper-Converged Database (HCD) with search capabilities" - documentation: str = "https://python.langchain.com/docs/integrations/vectorstores/astradb" name = "HCD" icon: str = "HCD" @@ -51,15 +47,7 @@ class HCDVectorStoreComponent(LCVectorStoreComponent): value="HCD_API_ENDPOINT", required=True, ), - MultilineInput( - name="search_input", - display_name="Search Input", - ), - DataInput( - name="ingest_data", - display_name="Ingest Data", - is_list=True, - ), + *LCVectorStoreComponent.inputs, StrInput( name="namespace", display_name="Namespace", @@ -263,14 +251,14 @@ class HCDVectorStoreComponent(LCVectorStoreComponent): raise TypeError(msg) if documents: - logger.debug(f"Adding {len(documents)} documents to the Vector Store.") + self.log(f"Adding {len(documents)} documents to the Vector Store.") try: vector_store.add_documents(documents) except Exception as e: msg = f"Error adding documents to AstraDBVectorStore: {e}" raise ValueError(msg) from e else: - logger.debug("No documents to add to the Vector Store.") + self.log("No documents to add to the Vector Store.") def _map_search_type(self) -> str: if self.search_type == "Similarity with score threshold": @@ -294,27 +282,27 @@ class HCDVectorStoreComponent(LCVectorStoreComponent): def search_documents(self) -> list[Data]: vector_store = self.build_vector_store() - logger.debug(f"Search input: {self.search_input}") - logger.debug(f"Search type: {self.search_type}") - logger.debug(f"Number of results: {self.number_of_results}") + self.log(f"Search query: {self.search_query}") + self.log(f"Search type: {self.search_type}") + self.log(f"Number of results: {self.number_of_results}") - if self.search_input and isinstance(self.search_input, str) and self.search_input.strip(): + if self.search_query and isinstance(self.search_query, str) and self.search_query.strip(): try: search_type = self._map_search_type() search_args = self._build_search_args() - docs = vector_store.search(query=self.search_input, search_type=search_type, **search_args) + docs = vector_store.search(query=self.search_query, search_type=search_type, **search_args) except Exception as e: msg = f"Error performing search in AstraDBVectorStore: {e}" raise ValueError(msg) from e - logger.debug(f"Retrieved documents: {len(docs)}") + self.log(f"Retrieved documents: {len(docs)}") data = docs_to_data(docs) - logger.debug(f"Converted documents to data: {len(data)}") + self.log(f"Converted documents to data: {len(data)}") self.status = data return data - logger.debug("No search input provided. Skipping search.") + self.log("No search input provided. Skipping search.") return [] def get_retriever_kwargs(self): diff --git a/src/backend/base/langflow/components/vectorstores/milvus.py b/src/backend/base/langflow/components/vectorstores/milvus.py index 994604f2f..4300ddf9a 100644 --- a/src/backend/base/langflow/components/vectorstores/milvus.py +++ b/src/backend/base/langflow/components/vectorstores/milvus.py @@ -2,13 +2,11 @@ from langflow.base.vectorstores.model import LCVectorStoreComponent, check_cache from langflow.helpers.data import docs_to_data from langflow.io import ( BoolInput, - DataInput, DictInput, DropdownInput, FloatInput, HandleInput, IntInput, - MultilineInput, SecretStrInput, StrInput, ) @@ -20,7 +18,6 @@ class MilvusVectorStoreComponent(LCVectorStoreComponent): display_name: str = "Milvus" description: str = "Milvus vector store with search capabilities" - documentation = "https://python.langchain.com/docs/integrations/vectorstores/milvus" name = "Milvus" icon = "Milvus" @@ -53,12 +50,7 @@ class MilvusVectorStoreComponent(LCVectorStoreComponent): DictInput(name="search_params", display_name="Search Parameters", advanced=True), BoolInput(name="drop_old", display_name="Drop Old Collection", value=False, advanced=True), FloatInput(name="timeout", display_name="Timeout", advanced=True), - MultilineInput(name="search_query", display_name="Search Query"), - DataInput( - name="ingest_data", - display_name="Ingest Data", - is_list=True, - ), + *LCVectorStoreComponent.inputs, HandleInput(name="embedding", display_name="Embedding", input_types=["Embeddings"]), IntInput( name="number_of_results", diff --git a/src/backend/base/langflow/components/vectorstores/mongodb_atlas.py b/src/backend/base/langflow/components/vectorstores/mongodb_atlas.py index fde29063e..f0a9d236d 100644 --- a/src/backend/base/langflow/components/vectorstores/mongodb_atlas.py +++ b/src/backend/base/langflow/components/vectorstores/mongodb_atlas.py @@ -5,14 +5,13 @@ from langchain_community.vectorstores import MongoDBAtlasVectorSearch from langflow.base.vectorstores.model import LCVectorStoreComponent, check_cached_vector_store from langflow.helpers.data import docs_to_data -from langflow.io import BoolInput, DataInput, HandleInput, IntInput, MultilineInput, SecretStrInput, StrInput +from langflow.io import BoolInput, HandleInput, IntInput, SecretStrInput, StrInput from langflow.schema import Data class MongoVectorStoreComponent(LCVectorStoreComponent): display_name = "MongoDB Atlas" description = "MongoDB Atlas Vector Store with search capabilities" - documentation = "https://python.langchain.com/docs/modules/data_connection/vectorstores/integrations/mongodb_atlas" name = "MongoDBAtlasVector" icon = "MongoDB" @@ -30,12 +29,7 @@ class MongoVectorStoreComponent(LCVectorStoreComponent): StrInput(name="db_name", display_name="Database Name", required=True), StrInput(name="collection_name", display_name="Collection Name", required=True), StrInput(name="index_name", display_name="Index Name", required=True), - MultilineInput(name="search_query", display_name="Search Query"), - DataInput( - name="ingest_data", - display_name="Ingest Data", - is_list=True, - ), + *LCVectorStoreComponent.inputs, HandleInput(name="embedding", display_name="Embedding", input_types=["Embeddings"]), IntInput( name="number_of_results", diff --git a/src/backend/base/langflow/components/vectorstores/opensearch.py b/src/backend/base/langflow/components/vectorstores/opensearch.py index 856baaaa8..1de82bf44 100644 --- a/src/backend/base/langflow/components/vectorstores/opensearch.py +++ b/src/backend/base/langflow/components/vectorstores/opensearch.py @@ -2,12 +2,10 @@ import json from typing import Any from langchain_community.vectorstores import OpenSearchVectorSearch -from loguru import logger from langflow.base.vectorstores.model import LCVectorStoreComponent, check_cached_vector_store from langflow.io import ( BoolInput, - DataInput, DropdownInput, FloatInput, HandleInput, @@ -24,7 +22,6 @@ class OpenSearchVectorStoreComponent(LCVectorStoreComponent): display_name: str = "OpenSearch" description: str = "OpenSearch Vector Store with advanced, customizable search capabilities." - documentation = "https://python.langchain.com/docs/integrations/vectorstores/opensearch" name = "OpenSearch" icon = "OpenSearch" @@ -41,20 +38,7 @@ class OpenSearchVectorStoreComponent(LCVectorStoreComponent): value="langflow", info="The index name where the vectors will be stored in OpenSearch cluster.", ), - MultilineInput( - name="search_input", - display_name="Search Input", - info=( - "Enter a search query. Leave empty to retrieve all documents. " - "If you need a more advanced search consider using Hybrid Search Query instead." - ), - value="", - ), - DataInput( - name="ingest_data", - display_name="Ingest Data", - is_list=True, - ), + *LCVectorStoreComponent.inputs, HandleInput(name="embedding", display_name="Embedding", input_types=["Embeddings"]), DropdownInput( name="search_type", @@ -120,7 +104,7 @@ class OpenSearchVectorStoreComponent(LCVectorStoreComponent): from langchain_community.vectorstores import OpenSearchVectorSearch except ImportError as e: error_message = f"Failed to import required modules: {e}" - logger.exception(error_message) + self.log(error_message) raise ImportError(error_message) from e try: @@ -136,7 +120,7 @@ class OpenSearchVectorStoreComponent(LCVectorStoreComponent): ) except Exception as e: error_message = f"Failed to create OpenSearchVectorSearch instance: {e}" - logger.exception(error_message) + self.log(error_message) raise RuntimeError(error_message) from e if self.ingest_data: @@ -152,19 +136,19 @@ class OpenSearchVectorStoreComponent(LCVectorStoreComponent): documents.append(_input.to_lc_document()) else: error_message = f"Expected Data object, got {type(_input)}" - logger.error(error_message) + self.log(error_message) raise TypeError(error_message) if documents and self.embedding is not None: - logger.debug(f"Adding {len(documents)} documents to the Vector Store.") + self.log(f"Adding {len(documents)} documents to the Vector Store.") try: vector_store.add_documents(documents) except Exception as e: error_message = f"Error adding documents to Vector Store: {e}" - logger.exception(error_message) + self.log(error_message) raise RuntimeError(error_message) from e else: - logger.debug("No documents to add to the Vector Store.") + self.log("No documents to add to the Vector Store.") def search(self, query: str | None = None) -> list[dict[str, Any]]: """Search for similar documents in the vector store or retrieve all documents if no query is provided.""" @@ -178,7 +162,7 @@ class OpenSearchVectorStoreComponent(LCVectorStoreComponent): hybrid_query = json.loads(self.hybrid_search_query) except json.JSONDecodeError as e: error_message = f"Invalid hybrid search query JSON: {e}" - logger.exception(error_message) + self.log(error_message) raise ValueError(error_message) from e results = vector_store.client.search(index=self.index_name, body=hybrid_query) @@ -223,11 +207,11 @@ class OpenSearchVectorStoreComponent(LCVectorStoreComponent): except Exception as e: error_message = f"Error during search: {e}" - logger.exception(error_message) + self.log(error_message) raise RuntimeError(error_message) from e error_message = f"Error during search. Invalid search type: {self.search_type}" - logger.error(error_message) + self.log(error_message) raise ValueError(error_message) def search_documents(self) -> list[Data]: @@ -236,7 +220,7 @@ class OpenSearchVectorStoreComponent(LCVectorStoreComponent): If no search input is provided, retrieve all documents. """ try: - query = self.search_input.strip() if self.search_input else None + query = self.search_query.strip() if self.search_query else None results = self.search(query) retrieved_data = [ Data( @@ -247,7 +231,7 @@ class OpenSearchVectorStoreComponent(LCVectorStoreComponent): ] except Exception as e: error_message = f"Error during document search: {e}" - logger.exception(error_message) + self.log(error_message) raise RuntimeError(error_message) from e self.status = retrieved_data diff --git a/src/backend/base/langflow/components/vectorstores/pgvector.py b/src/backend/base/langflow/components/vectorstores/pgvector.py index 4ff171388..bef0b995e 100644 --- a/src/backend/base/langflow/components/vectorstores/pgvector.py +++ b/src/backend/base/langflow/components/vectorstores/pgvector.py @@ -2,7 +2,7 @@ from langchain_community.vectorstores import PGVector from langflow.base.vectorstores.model import LCVectorStoreComponent, check_cached_vector_store from langflow.helpers.data import docs_to_data -from langflow.io import DataInput, HandleInput, IntInput, MultilineInput, SecretStrInput, StrInput +from langflow.io import HandleInput, IntInput, SecretStrInput, StrInput from langflow.schema import Data from langflow.utils.connection_string_parser import transform_connection_string @@ -10,19 +10,13 @@ from langflow.utils.connection_string_parser import transform_connection_string class PGVectorStoreComponent(LCVectorStoreComponent): display_name = "PGVector" description = "PGVector Vector Store with search capabilities" - documentation = "https://python.langchain.com/v0.2/docs/integrations/vectorstores/pgvector/" name = "pgvector" icon = "cpu" inputs = [ SecretStrInput(name="pg_server_url", display_name="PostgreSQL Server Connection String", required=True), StrInput(name="collection_name", display_name="Table", required=True), - MultilineInput(name="search_query", display_name="Search Query"), - DataInput( - name="ingest_data", - display_name="Ingestion Data", - is_list=True, - ), + *LCVectorStoreComponent.inputs, HandleInput(name="embedding", display_name="Embedding", input_types=["Embeddings"]), IntInput( name="number_of_results", diff --git a/src/backend/base/langflow/components/vectorstores/pinecone.py b/src/backend/base/langflow/components/vectorstores/pinecone.py index 17eafefe5..8ab592c32 100644 --- a/src/backend/base/langflow/components/vectorstores/pinecone.py +++ b/src/backend/base/langflow/components/vectorstores/pinecone.py @@ -3,14 +3,13 @@ from langchain_pinecone import Pinecone from langflow.base.vectorstores.model import LCVectorStoreComponent, check_cached_vector_store from langflow.helpers.data import docs_to_data -from langflow.io import DataInput, DropdownInput, HandleInput, IntInput, MultilineInput, SecretStrInput, StrInput +from langflow.io import DropdownInput, HandleInput, IntInput, SecretStrInput, StrInput from langflow.schema import Data class PineconeVectorStoreComponent(LCVectorStoreComponent): display_name = "Pinecone" description = "Pinecone Vector Store with search capabilities" - documentation = "https://python.langchain.com/v0.2/docs/integrations/vectorstores/pinecone/" name = "Pinecone" icon = "Pinecone" inputs = [ @@ -31,12 +30,7 @@ class PineconeVectorStoreComponent(LCVectorStoreComponent): value="text", advanced=True, ), - MultilineInput(name="search_query", display_name="Search Query"), - DataInput( - name="ingest_data", - display_name="Ingest Data", - is_list=True, - ), + *LCVectorStoreComponent.inputs, HandleInput(name="embedding", display_name="Embedding", input_types=["Embeddings"]), IntInput( name="number_of_results", diff --git a/src/backend/base/langflow/components/vectorstores/qdrant.py b/src/backend/base/langflow/components/vectorstores/qdrant.py index bc025f704..7a37b243c 100644 --- a/src/backend/base/langflow/components/vectorstores/qdrant.py +++ b/src/backend/base/langflow/components/vectorstores/qdrant.py @@ -4,11 +4,9 @@ from langchain_community.vectorstores import Qdrant from langflow.base.vectorstores.model import LCVectorStoreComponent, check_cached_vector_store from langflow.helpers.data import docs_to_data from langflow.io import ( - DataInput, DropdownInput, HandleInput, IntInput, - MultilineInput, SecretStrInput, StrInput, ) @@ -18,7 +16,6 @@ from langflow.schema import Data class QdrantVectorStoreComponent(LCVectorStoreComponent): display_name = "Qdrant" description = "Qdrant Vector Store with search capabilities" - documentation = "https://python.langchain.com/docs/modules/data_connection/vectorstores/integrations/qdrant" icon = "Qdrant" inputs = [ @@ -40,12 +37,7 @@ class QdrantVectorStoreComponent(LCVectorStoreComponent): ), StrInput(name="content_payload_key", display_name="Content Payload Key", value="page_content", advanced=True), StrInput(name="metadata_payload_key", display_name="Metadata Payload Key", value="metadata", advanced=True), - MultilineInput(name="search_query", display_name="Search Query"), - DataInput( - name="ingest_data", - display_name="Ingest Data", - is_list=True, - ), + *LCVectorStoreComponent.inputs, HandleInput(name="embedding", display_name="Embedding", input_types=["Embeddings"]), IntInput( name="number_of_results", diff --git a/src/backend/base/langflow/components/vectorstores/redis.py b/src/backend/base/langflow/components/vectorstores/redis.py index d4d5ce07a..f628e75b8 100644 --- a/src/backend/base/langflow/components/vectorstores/redis.py +++ b/src/backend/base/langflow/components/vectorstores/redis.py @@ -5,7 +5,7 @@ from langchain_community.vectorstores.redis import Redis from langflow.base.vectorstores.model import LCVectorStoreComponent, check_cached_vector_store from langflow.helpers.data import docs_to_data -from langflow.io import DataInput, HandleInput, IntInput, MultilineInput, SecretStrInput, StrInput +from langflow.io import HandleInput, IntInput, SecretStrInput, StrInput from langflow.schema import Data @@ -14,7 +14,6 @@ class RedisVectorStoreComponent(LCVectorStoreComponent): display_name: str = "Redis" description: str = "Implementation of Vector Store using Redis" - documentation = "https://python.langchain.com/docs/integrations/vectorstores/redis" name = "Redis" icon = "Redis" @@ -29,12 +28,7 @@ class RedisVectorStoreComponent(LCVectorStoreComponent): name="schema", display_name="Schema", ), - MultilineInput(name="search_query", display_name="Search Query"), - DataInput( - name="ingest_data", - display_name="Ingest Data", - is_list=True, - ), + *LCVectorStoreComponent.inputs, IntInput( name="number_of_results", display_name="Number of Results", diff --git a/src/backend/base/langflow/components/vectorstores/supabase.py b/src/backend/base/langflow/components/vectorstores/supabase.py index b5da646a6..2dc8d8c8a 100644 --- a/src/backend/base/langflow/components/vectorstores/supabase.py +++ b/src/backend/base/langflow/components/vectorstores/supabase.py @@ -3,14 +3,13 @@ from supabase.client import Client, create_client from langflow.base.vectorstores.model import LCVectorStoreComponent, check_cached_vector_store from langflow.helpers.data import docs_to_data -from langflow.io import DataInput, HandleInput, IntInput, MultilineInput, SecretStrInput, StrInput +from langflow.io import HandleInput, IntInput, SecretStrInput, StrInput from langflow.schema import Data class SupabaseVectorStoreComponent(LCVectorStoreComponent): display_name = "Supabase" description = "Supabase Vector Store with search capabilities" - documentation = "https://python.langchain.com/v0.2/docs/integrations/vectorstores/supabase/" name = "SupabaseVectorStore" icon = "Supabase" @@ -19,12 +18,7 @@ class SupabaseVectorStoreComponent(LCVectorStoreComponent): SecretStrInput(name="supabase_service_key", display_name="Supabase Service Key", required=True), StrInput(name="table_name", display_name="Table Name", advanced=True), StrInput(name="query_name", display_name="Query Name"), - MultilineInput(name="search_query", display_name="Search Query"), - DataInput( - name="ingest_data", - display_name="Ingest Data", - is_list=True, - ), + *LCVectorStoreComponent.inputs, HandleInput(name="embedding", display_name="Embedding", input_types=["Embeddings"]), IntInput( name="number_of_results", diff --git a/src/backend/base/langflow/components/vectorstores/upstash.py b/src/backend/base/langflow/components/vectorstores/upstash.py index 49001a808..b1f94f1f7 100644 --- a/src/backend/base/langflow/components/vectorstores/upstash.py +++ b/src/backend/base/langflow/components/vectorstores/upstash.py @@ -3,7 +3,6 @@ from langchain_community.vectorstores import UpstashVectorStore from langflow.base.vectorstores.model import LCVectorStoreComponent, check_cached_vector_store from langflow.helpers.data import docs_to_data from langflow.io import ( - DataInput, HandleInput, IntInput, MultilineInput, @@ -16,7 +15,6 @@ from langflow.schema import Data class UpstashVectorStoreComponent(LCVectorStoreComponent): display_name = "Upstash" description = "Upstash Vector Store with search capabilities" - documentation = "https://python.langchain.com/v0.2/docs/integrations/vectorstores/upstash/" name = "Upstash" icon = "Upstash" @@ -45,17 +43,12 @@ class UpstashVectorStoreComponent(LCVectorStoreComponent): display_name="Namespace", info="Leave empty for default namespace.", ), - MultilineInput(name="search_query", display_name="Search Query"), + *LCVectorStoreComponent.inputs, MultilineInput( name="metadata_filter", display_name="Metadata Filter", info="Filters documents by metadata. Look at the documentation for more information.", ), - DataInput( - name="ingest_data", - display_name="Ingest Data", - is_list=True, - ), HandleInput( name="embedding", display_name="Embedding", diff --git a/src/backend/base/langflow/components/vectorstores/vectara.py b/src/backend/base/langflow/components/vectorstores/vectara.py index 697bd4471..82f146aef 100644 --- a/src/backend/base/langflow/components/vectorstores/vectara.py +++ b/src/backend/base/langflow/components/vectorstores/vectara.py @@ -1,11 +1,10 @@ from typing import TYPE_CHECKING from langchain_community.vectorstores import Vectara -from loguru import logger from langflow.base.vectorstores.model import LCVectorStoreComponent, check_cached_vector_store from langflow.helpers.data import docs_to_data -from langflow.io import HandleInput, IntInput, MessageTextInput, SecretStrInput, StrInput +from langflow.io import HandleInput, IntInput, SecretStrInput, StrInput from langflow.schema import Data if TYPE_CHECKING: @@ -17,7 +16,6 @@ class VectaraVectorStoreComponent(LCVectorStoreComponent): display_name: str = "Vectara" description: str = "Vectara Vector Store with search capabilities" - documentation = "https://python.langchain.com/docs/modules/data_connection/vectorstores/integrations/vectara" name = "Vectara" icon = "Vectara" @@ -30,16 +28,7 @@ class VectaraVectorStoreComponent(LCVectorStoreComponent): display_name="Embedding", input_types=["Embeddings"], ), - HandleInput( - name="ingest_data", - display_name="Ingest Data", - input_types=["Document", "Data"], - is_list=True, - ), - MessageTextInput( - name="search_query", - display_name="Search Query", - ), + *LCVectorStoreComponent.inputs, IntInput( name="number_of_results", display_name="Number of Results", @@ -81,11 +70,11 @@ class VectaraVectorStoreComponent(LCVectorStoreComponent): documents.append(_input) if documents: - logger.debug(f"Adding {len(documents)} documents to Vectara.") + self.log(f"Adding {len(documents)} documents to Vectara.") vector_store.add_documents(documents) self.status = f"Added {len(documents)} documents to Vectara" else: - logger.debug("No documents to add to Vectara.") + self.log("No documents to add to Vectara.") self.status = "No valid documents to add to Vectara" def search_documents(self) -> list[Data]: diff --git a/src/backend/base/langflow/components/vectorstores/vectara_rag.py b/src/backend/base/langflow/components/vectorstores/vectara_rag.py index 3ec48e98b..fda11a209 100644 --- a/src/backend/base/langflow/components/vectorstores/vectara_rag.py +++ b/src/backend/base/langflow/components/vectorstores/vectara_rag.py @@ -58,7 +58,12 @@ class VectaraRagComponent(Component): StrInput(name="vectara_customer_id", display_name="Vectara Customer ID", required=True), StrInput(name="vectara_corpus_id", display_name="Vectara Corpus ID", required=True), SecretStrInput(name="vectara_api_key", display_name="Vectara API Key", required=True), - MessageTextInput(name="search_query", display_name="Search Query", info="The query to receive an answer on."), + MessageTextInput( + name="search_query", + display_name="Search Query", + info="The query to receive an answer on.", + tool_mode=True, + ), FloatInput( name="lexical_interpolation", display_name="Hybrid Search Factor", diff --git a/src/backend/base/langflow/components/vectorstores/vectara_self_query.py b/src/backend/base/langflow/components/vectorstores/vectara_self_query.py index bff0514d3..62d2cfcf7 100644 --- a/src/backend/base/langflow/components/vectorstores/vectara_self_query.py +++ b/src/backend/base/langflow/components/vectorstores/vectara_self_query.py @@ -15,7 +15,6 @@ class VectaraSelfQueryRetriverComponent(CustomComponent): display_name: str = "Vectara Self Query Retriever for Vectara Vector Store" description: str = "Implementation of Vectara Self Query Retriever" - documentation = "https://python.langchain.com/docs/integrations/retrievers/self_query/vectara_self_query" name = "VectaraSelfQueryRetriver" icon = "Vectara" legacy = True diff --git a/src/backend/base/langflow/components/vectorstores/weaviate.py b/src/backend/base/langflow/components/vectorstores/weaviate.py index ef01bc444..feaabdbcb 100644 --- a/src/backend/base/langflow/components/vectorstores/weaviate.py +++ b/src/backend/base/langflow/components/vectorstores/weaviate.py @@ -3,14 +3,13 @@ from langchain_community.vectorstores import Weaviate from langflow.base.vectorstores.model import LCVectorStoreComponent, check_cached_vector_store from langflow.helpers.data import docs_to_data -from langflow.io import BoolInput, DataInput, HandleInput, IntInput, MultilineInput, SecretStrInput, StrInput +from langflow.io import BoolInput, HandleInput, IntInput, SecretStrInput, StrInput from langflow.schema import Data class WeaviateVectorStoreComponent(LCVectorStoreComponent): display_name = "Weaviate" description = "Weaviate Vector Store with search capabilities" - documentation = "https://python.langchain.com/docs/modules/data_connection/vectorstores/integrations/weaviate" name = "Weaviate" icon = "Weaviate" @@ -24,12 +23,7 @@ class WeaviateVectorStoreComponent(LCVectorStoreComponent): info="Requires capitalized index name.", ), StrInput(name="text_key", display_name="Text Key", value="text", advanced=True), - MultilineInput(name="search_query", display_name="Search Query"), - DataInput( - name="ingest_data", - display_name="Ingest Data", - is_list=True, - ), + *LCVectorStoreComponent.inputs, HandleInput(name="embedding", display_name="Embedding", input_types=["Embeddings"]), IntInput( name="number_of_results", diff --git a/src/backend/base/langflow/initial_setup/starter_projects/Vector Store RAG.json b/src/backend/base/langflow/initial_setup/starter_projects/Vector Store RAG.json index 414543816..25709d1e5 100644 --- a/src/backend/base/langflow/initial_setup/starter_projects/Vector Store RAG.json +++ b/src/backend/base/langflow/initial_setup/starter_projects/Vector Store RAG.json @@ -7,7 +7,7 @@ "data": { "sourceHandle": { "dataType": "ParseData", - "id": "ParseData-z1UYg", + "id": "ParseData-1lOsZ", "name": "text", "output_types": [ "Message" @@ -15,7 +15,7 @@ }, "targetHandle": { "fieldName": "context", - "id": "Prompt-EkVe9", + "id": "Prompt-o2SL4", "inputTypes": [ "Message", "Text" @@ -23,11 +23,11 @@ "type": "str" } }, - "id": "reactflow__edge-ParseData-z1UYg{œdataTypeœ:œParseDataœ,œidœ:œParseData-z1UYgœ,œnameœ:œtextœ,œoutput_typesœ:[œMessageœ]}-Prompt-EkVe9{œfieldNameœ:œcontextœ,œidœ:œPrompt-EkVe9œ,œinputTypesœ:[œMessageœ,œTextœ],œtypeœ:œstrœ}", - "source": "ParseData-z1UYg", - "sourceHandle": "{œdataTypeœ: œParseDataœ, œidœ: œParseData-z1UYgœ, œnameœ: œtextœ, œoutput_typesœ: [œMessageœ]}", - "target": "Prompt-EkVe9", - "targetHandle": "{œfieldNameœ: œcontextœ, œidœ: œPrompt-EkVe9œ, œinputTypesœ: [œMessageœ, œTextœ], œtypeœ: œstrœ}" + "id": "reactflow__edge-ParseData-1lOsZ{œdataTypeœ:œParseDataœ,œidœ:œParseData-1lOsZœ,œnameœ:œtextœ,œoutput_typesœ:[œMessageœ]}-Prompt-o2SL4{œfieldNameœ:œcontextœ,œidœ:œPrompt-o2SL4œ,œinputTypesœ:[œMessageœ,œTextœ],œtypeœ:œstrœ}", + "source": "ParseData-1lOsZ", + "sourceHandle": "{œdataTypeœ: œParseDataœ, œidœ: œParseData-1lOsZœ, œnameœ: œtextœ, œoutput_typesœ: [œMessageœ]}", + "target": "Prompt-o2SL4", + "targetHandle": "{œfieldNameœ: œcontextœ, œidœ: œPrompt-o2SL4œ, œinputTypesœ: [œMessageœ, œTextœ], œtypeœ: œstrœ}" }, { "animated": false, @@ -35,7 +35,7 @@ "data": { "sourceHandle": { "dataType": "Prompt", - "id": "Prompt-EkVe9", + "id": "Prompt-o2SL4", "name": "prompt", "output_types": [ "Message" @@ -43,18 +43,18 @@ }, "targetHandle": { "fieldName": "input_value", - "id": "OpenAIModel-Hafw2", + "id": "OpenAIModel-yjcwf", "inputTypes": [ "Message" ], "type": "str" } }, - "id": "reactflow__edge-Prompt-EkVe9{œdataTypeœ:œPromptœ,œidœ:œPrompt-EkVe9œ,œnameœ:œpromptœ,œoutput_typesœ:[œMessageœ]}-OpenAIModel-Hafw2{œfieldNameœ:œinput_valueœ,œidœ:œOpenAIModel-Hafw2œ,œinputTypesœ:[œMessageœ],œtypeœ:œstrœ}", - "source": "Prompt-EkVe9", - "sourceHandle": "{œdataTypeœ: œPromptœ, œidœ: œPrompt-EkVe9œ, œnameœ: œpromptœ, œoutput_typesœ: [œMessageœ]}", - "target": "OpenAIModel-Hafw2", - "targetHandle": "{œfieldNameœ: œinput_valueœ, œidœ: œOpenAIModel-Hafw2œ, œinputTypesœ: [œMessageœ], œtypeœ: œstrœ}" + "id": "reactflow__edge-Prompt-o2SL4{œdataTypeœ:œPromptœ,œidœ:œPrompt-o2SL4œ,œnameœ:œpromptœ,œoutput_typesœ:[œMessageœ]}-OpenAIModel-yjcwf{œfieldNameœ:œinput_valueœ,œidœ:œOpenAIModel-yjcwfœ,œinputTypesœ:[œMessageœ],œtypeœ:œstrœ}", + "source": "Prompt-o2SL4", + "sourceHandle": "{œdataTypeœ: œPromptœ, œidœ: œPrompt-o2SL4œ, œnameœ: œpromptœ, œoutput_typesœ: [œMessageœ]}", + "target": "OpenAIModel-yjcwf", + "targetHandle": "{œfieldNameœ: œinput_valueœ, œidœ: œOpenAIModel-yjcwfœ, œinputTypesœ: [œMessageœ], œtypeœ: œstrœ}" }, { "animated": false, @@ -62,7 +62,7 @@ "data": { "sourceHandle": { "dataType": "OpenAIModel", - "id": "OpenAIModel-Hafw2", + "id": "OpenAIModel-yjcwf", "name": "text_output", "output_types": [ "Message" @@ -70,18 +70,18 @@ }, "targetHandle": { "fieldName": "input_value", - "id": "ChatOutput-L7Gy2", + "id": "ChatOutput-p7okC", "inputTypes": [ "Message" ], "type": "str" } }, - "id": "reactflow__edge-OpenAIModel-Hafw2{œdataTypeœ:œOpenAIModelœ,œidœ:œOpenAIModel-Hafw2œ,œnameœ:œtext_outputœ,œoutput_typesœ:[œMessageœ]}-ChatOutput-L7Gy2{œfieldNameœ:œinput_valueœ,œidœ:œChatOutput-L7Gy2œ,œinputTypesœ:[œMessageœ],œtypeœ:œstrœ}", - "source": "OpenAIModel-Hafw2", - "sourceHandle": "{œdataTypeœ: œOpenAIModelœ, œidœ: œOpenAIModel-Hafw2œ, œnameœ: œtext_outputœ, œoutput_typesœ: [œMessageœ]}", - "target": "ChatOutput-L7Gy2", - "targetHandle": "{œfieldNameœ: œinput_valueœ, œidœ: œChatOutput-L7Gy2œ, œinputTypesœ: [œMessageœ], œtypeœ: œstrœ}" + "id": "reactflow__edge-OpenAIModel-yjcwf{œdataTypeœ:œOpenAIModelœ,œidœ:œOpenAIModel-yjcwfœ,œnameœ:œtext_outputœ,œoutput_typesœ:[œMessageœ]}-ChatOutput-p7okC{œfieldNameœ:œinput_valueœ,œidœ:œChatOutput-p7okCœ,œinputTypesœ:[œMessageœ],œtypeœ:œstrœ}", + "source": "OpenAIModel-yjcwf", + "sourceHandle": "{œdataTypeœ: œOpenAIModelœ, œidœ: œOpenAIModel-yjcwfœ, œnameœ: œtext_outputœ, œoutput_typesœ: [œMessageœ]}", + "target": "ChatOutput-p7okC", + "targetHandle": "{œfieldNameœ: œinput_valueœ, œidœ: œChatOutput-p7okCœ, œinputTypesœ: [œMessageœ], œtypeœ: œstrœ}" }, { "animated": false, @@ -89,7 +89,7 @@ "data": { "sourceHandle": { "dataType": "ChatInput", - "id": "ChatInput-nFs38", + "id": "ChatInput-uBsVA", "name": "message", "output_types": [ "Message" @@ -97,7 +97,7 @@ }, "targetHandle": { "fieldName": "question", - "id": "Prompt-EkVe9", + "id": "Prompt-o2SL4", "inputTypes": [ "Message", "Text" @@ -105,11 +105,11 @@ "type": "str" } }, - "id": "reactflow__edge-ChatInput-nFs38{œdataTypeœ:œChatInputœ,œidœ:œChatInput-nFs38œ,œnameœ:œmessageœ,œoutput_typesœ:[œMessageœ]}-Prompt-EkVe9{œfieldNameœ:œquestionœ,œidœ:œPrompt-EkVe9œ,œinputTypesœ:[œMessageœ,œTextœ],œtypeœ:œstrœ}", - "source": "ChatInput-nFs38", - "sourceHandle": "{œdataTypeœ: œChatInputœ, œidœ: œChatInput-nFs38œ, œnameœ: œmessageœ, œoutput_typesœ: [œMessageœ]}", - "target": "Prompt-EkVe9", - "targetHandle": "{œfieldNameœ: œquestionœ, œidœ: œPrompt-EkVe9œ, œinputTypesœ: [œMessageœ, œTextœ], œtypeœ: œstrœ}" + "id": "reactflow__edge-ChatInput-uBsVA{œdataTypeœ:œChatInputœ,œidœ:œChatInput-uBsVAœ,œnameœ:œmessageœ,œoutput_typesœ:[œMessageœ]}-Prompt-o2SL4{œfieldNameœ:œquestionœ,œidœ:œPrompt-o2SL4œ,œinputTypesœ:[œMessageœ,œTextœ],œtypeœ:œstrœ}", + "source": "ChatInput-uBsVA", + "sourceHandle": "{œdataTypeœ: œChatInputœ, œidœ: œChatInput-uBsVAœ, œnameœ: œmessageœ, œoutput_typesœ: [œMessageœ]}", + "target": "Prompt-o2SL4", + "targetHandle": "{œfieldNameœ: œquestionœ, œidœ: œPrompt-o2SL4œ, œinputTypesœ: [œMessageœ, œTextœ], œtypeœ: œstrœ}" }, { "animated": false, @@ -117,7 +117,7 @@ "data": { "sourceHandle": { "dataType": "File", - "id": "File-BlhWT", + "id": "File-jTMwG", "name": "data", "output_types": [ "Data" @@ -125,26 +125,24 @@ }, "targetHandle": { "fieldName": "data_inputs", - "id": "SplitText-CmZlY", + "id": "SplitText-bByhd", "inputTypes": [ "Data" ], "type": "other" } }, - "id": "reactflow__edge-File-BlhWT{œdataTypeœ:œFileœ,œidœ:œFile-BlhWTœ,œnameœ:œdataœ,œoutput_typesœ:[œDataœ]}-SplitText-CmZlY{œfieldNameœ:œdata_inputsœ,œidœ:œSplitText-CmZlYœ,œinputTypesœ:[œDataœ],œtypeœ:œotherœ}", - "source": "File-BlhWT", - "sourceHandle": "{œdataTypeœ: œFileœ, œidœ: œFile-BlhWTœ, œnameœ: œdataœ, œoutput_typesœ: [œDataœ]}", - "target": "SplitText-CmZlY", - "targetHandle": "{œfieldNameœ: œdata_inputsœ, œidœ: œSplitText-CmZlYœ, œinputTypesœ: [œDataœ], œtypeœ: œotherœ}" + "id": "reactflow__edge-File-jTMwG{œdataTypeœ:œFileœ,œidœ:œFile-jTMwGœ,œnameœ:œdataœ,œoutput_typesœ:[œDataœ]}-SplitText-bByhd{œfieldNameœ:œdata_inputsœ,œidœ:œSplitText-bByhdœ,œinputTypesœ:[œDataœ],œtypeœ:œotherœ}", + "source": "File-jTMwG", + "sourceHandle": "{œdataTypeœ: œFileœ, œidœ: œFile-jTMwGœ, œnameœ: œdataœ, œoutput_typesœ: [œDataœ]}", + "target": "SplitText-bByhd", + "targetHandle": "{œfieldNameœ: œdata_inputsœ, œidœ: œSplitText-bByhdœ, œinputTypesœ: [œDataœ], œtypeœ: œotherœ}" }, { - "animated": false, - "className": "", "data": { "sourceHandle": { "dataType": "OpenAIEmbeddings", - "id": "OpenAIEmbeddings-QPlR3", + "id": "OpenAIEmbeddings-b229k", "name": "embeddings", "output_types": [ "Embeddings" @@ -152,80 +150,24 @@ }, "targetHandle": { "fieldName": "embedding_model", - "id": "AstraDB-KrRFj", + "id": "AstraDB-0959q", "inputTypes": [ "Embeddings" ], "type": "other" } }, - "id": "reactflow__edge-OpenAIEmbeddings-QPlR3{œdataTypeœ:œOpenAIEmbeddingsœ,œidœ:œOpenAIEmbeddings-QPlR3œ,œnameœ:œembeddingsœ,œoutput_typesœ:[œEmbeddingsœ]}-AstraDB-KrRFj{œfieldNameœ:œembedding_modelœ,œidœ:œAstraDB-KrRFjœ,œinputTypesœ:[œEmbeddingsœ],œtypeœ:œotherœ}", - "source": "OpenAIEmbeddings-QPlR3", - "sourceHandle": "{œdataTypeœ: œOpenAIEmbeddingsœ, œidœ: œOpenAIEmbeddings-QPlR3œ, œnameœ: œembeddingsœ, œoutput_typesœ: [œEmbeddingsœ]}", - "target": "AstraDB-KrRFj", - "targetHandle": "{œfieldNameœ: œembedding_modelœ, œidœ: œAstraDB-KrRFjœ, œinputTypesœ: [œEmbeddingsœ], œtypeœ: œotherœ}" + "id": "reactflow__edge-OpenAIEmbeddings-b229k{œdataTypeœ:œOpenAIEmbeddingsœ,œidœ:œOpenAIEmbeddings-b229kœ,œnameœ:œembeddingsœ,œoutput_typesœ:[œEmbeddingsœ]}-AstraDB-0959q{œfieldNameœ:œembedding_modelœ,œidœ:œAstraDB-0959qœ,œinputTypesœ:[œEmbeddingsœ],œtypeœ:œotherœ}", + "source": "OpenAIEmbeddings-b229k", + "sourceHandle": "{œdataTypeœ: œOpenAIEmbeddingsœ, œidœ: œOpenAIEmbeddings-b229kœ, œnameœ: œembeddingsœ, œoutput_typesœ: [œEmbeddingsœ]}", + "target": "AstraDB-0959q", + "targetHandle": "{œfieldNameœ: œembedding_modelœ, œidœ: œAstraDB-0959qœ, œinputTypesœ: [œEmbeddingsœ], œtypeœ: œotherœ}" }, { - "animated": false, - "className": "", - "data": { - "sourceHandle": { - "dataType": "OpenAIEmbeddings", - "id": "OpenAIEmbeddings-Jp4uj", - "name": "embeddings", - "output_types": [ - "Embeddings" - ] - }, - "targetHandle": { - "fieldName": "embedding_model", - "id": "AstraDB-ONDn2", - "inputTypes": [ - "Embeddings" - ], - "type": "other" - } - }, - "id": "reactflow__edge-OpenAIEmbeddings-Jp4uj{œdataTypeœ:œOpenAIEmbeddingsœ,œidœ:œOpenAIEmbeddings-Jp4ujœ,œnameœ:œembeddingsœ,œoutput_typesœ:[œEmbeddingsœ]}-AstraDB-ONDn2{œfieldNameœ:œembedding_modelœ,œidœ:œAstraDB-ONDn2œ,œinputTypesœ:[œEmbeddingsœ],œtypeœ:œotherœ}", - "source": "OpenAIEmbeddings-Jp4uj", - "sourceHandle": "{œdataTypeœ: œOpenAIEmbeddingsœ, œidœ: œOpenAIEmbeddings-Jp4ujœ, œnameœ: œembeddingsœ, œoutput_typesœ: [œEmbeddingsœ]}", - "target": "AstraDB-ONDn2", - "targetHandle": "{œfieldNameœ: œembedding_modelœ, œidœ: œAstraDB-ONDn2œ, œinputTypesœ: [œEmbeddingsœ], œtypeœ: œotherœ}" - }, - { - "animated": false, - "className": "", - "data": { - "sourceHandle": { - "dataType": "AstraDB", - "id": "AstraDB-KrRFj", - "name": "search_results", - "output_types": [ - "Data" - ] - }, - "targetHandle": { - "fieldName": "data", - "id": "ParseData-z1UYg", - "inputTypes": [ - "Data" - ], - "type": "other" - } - }, - "id": "reactflow__edge-AstraDB-KrRFj{œdataTypeœ:œAstraDBœ,œidœ:œAstraDB-KrRFjœ,œnameœ:œsearch_resultsœ,œoutput_typesœ:[œDataœ]}-ParseData-z1UYg{œfieldNameœ:œdataœ,œidœ:œParseData-z1UYgœ,œinputTypesœ:[œDataœ],œtypeœ:œotherœ}", - "source": "AstraDB-KrRFj", - "sourceHandle": "{œdataTypeœ: œAstraDBœ, œidœ: œAstraDB-KrRFjœ, œnameœ: œsearch_resultsœ, œoutput_typesœ: [œDataœ]}", - "target": "ParseData-z1UYg", - "targetHandle": "{œfieldNameœ: œdataœ, œidœ: œParseData-z1UYgœ, œinputTypesœ: [œDataœ], œtypeœ: œotherœ}" - }, - { - "animated": false, - "className": "", "data": { "sourceHandle": { "dataType": "SplitText", - "id": "SplitText-CmZlY", + "id": "SplitText-bByhd", "name": "chunks", "output_types": [ "Data" @@ -233,43 +175,93 @@ }, "targetHandle": { "fieldName": "ingest_data", - "id": "AstraDB-ONDn2", + "id": "AstraDB-0959q", "inputTypes": [ "Data" ], "type": "other" } }, - "id": "reactflow__edge-SplitText-CmZlY{œdataTypeœ:œSplitTextœ,œidœ:œSplitText-CmZlYœ,œnameœ:œchunksœ,œoutput_typesœ:[œDataœ]}-AstraDB-ONDn2{œfieldNameœ:œingest_dataœ,œidœ:œAstraDB-ONDn2œ,œinputTypesœ:[œDataœ],œtypeœ:œotherœ}", - "source": "SplitText-CmZlY", - "sourceHandle": "{œdataTypeœ: œSplitTextœ, œidœ: œSplitText-CmZlYœ, œnameœ: œchunksœ, œoutput_typesœ: [œDataœ]}", - "target": "AstraDB-ONDn2", - "targetHandle": "{œfieldNameœ: œingest_dataœ, œidœ: œAstraDB-ONDn2œ, œinputTypesœ: [œDataœ], œtypeœ: œotherœ}" + "id": "reactflow__edge-SplitText-bByhd{œdataTypeœ:œSplitTextœ,œidœ:œSplitText-bByhdœ,œnameœ:œchunksœ,œoutput_typesœ:[œDataœ]}-AstraDB-0959q{œfieldNameœ:œingest_dataœ,œidœ:œAstraDB-0959qœ,œinputTypesœ:[œDataœ],œtypeœ:œotherœ}", + "source": "SplitText-bByhd", + "sourceHandle": "{œdataTypeœ: œSplitTextœ, œidœ: œSplitText-bByhdœ, œnameœ: œchunksœ, œoutput_typesœ: [œDataœ]}", + "target": "AstraDB-0959q", + "targetHandle": "{œfieldNameœ: œingest_dataœ, œidœ: œAstraDB-0959qœ, œinputTypesœ: [œDataœ], œtypeœ: œotherœ}" + }, + { + "data": { + "sourceHandle": { + "dataType": "OpenAIEmbeddings", + "id": "OpenAIEmbeddings-qVkNT", + "name": "embeddings", + "output_types": [ + "Embeddings" + ] + }, + "targetHandle": { + "fieldName": "embedding_model", + "id": "AstraDB-3Vxgl", + "inputTypes": [ + "Embeddings" + ], + "type": "other" + } + }, + "id": "reactflow__edge-OpenAIEmbeddings-qVkNT{œdataTypeœ:œOpenAIEmbeddingsœ,œidœ:œOpenAIEmbeddings-qVkNTœ,œnameœ:œembeddingsœ,œoutput_typesœ:[œEmbeddingsœ]}-AstraDB-3Vxgl{œfieldNameœ:œembedding_modelœ,œidœ:œAstraDB-3Vxglœ,œinputTypesœ:[œEmbeddingsœ],œtypeœ:œotherœ}", + "source": "OpenAIEmbeddings-qVkNT", + "sourceHandle": "{œdataTypeœ: œOpenAIEmbeddingsœ, œidœ: œOpenAIEmbeddings-qVkNTœ, œnameœ: œembeddingsœ, œoutput_typesœ: [œEmbeddingsœ]}", + "target": "AstraDB-3Vxgl", + "targetHandle": "{œfieldNameœ: œembedding_modelœ, œidœ: œAstraDB-3Vxglœ, œinputTypesœ: [œEmbeddingsœ], œtypeœ: œotherœ}" }, { "data": { "sourceHandle": { "dataType": "ChatInput", - "id": "ChatInput-nFs38", + "id": "ChatInput-uBsVA", "name": "message", "output_types": [ "Message" ] }, "targetHandle": { - "fieldName": "search_input", - "id": "AstraDB-KrRFj", + "fieldName": "search_query", + "id": "AstraDB-3Vxgl", "inputTypes": [ "Message" ], "type": "str" } }, - "id": "reactflow__edge-ChatInput-nFs38{œdataTypeœ:œChatInputœ,œidœ:œChatInput-nFs38œ,œnameœ:œmessageœ,œoutput_typesœ:[œMessageœ]}-AstraDB-KrRFj{œfieldNameœ:œsearch_inputœ,œidœ:œAstraDB-KrRFjœ,œinputTypesœ:[œMessageœ],œtypeœ:œstrœ}", - "source": "ChatInput-nFs38", - "sourceHandle": "{œdataTypeœ: œChatInputœ, œidœ: œChatInput-nFs38œ, œnameœ: œmessageœ, œoutput_typesœ: [œMessageœ]}", - "target": "AstraDB-KrRFj", - "targetHandle": "{œfieldNameœ: œsearch_inputœ, œidœ: œAstraDB-KrRFjœ, œinputTypesœ: [œMessageœ], œtypeœ: œstrœ}" + "id": "reactflow__edge-ChatInput-uBsVA{œdataTypeœ:œChatInputœ,œidœ:œChatInput-uBsVAœ,œnameœ:œmessageœ,œoutput_typesœ:[œMessageœ]}-AstraDB-3Vxgl{œfieldNameœ:œsearch_queryœ,œidœ:œAstraDB-3Vxglœ,œinputTypesœ:[œMessageœ],œtypeœ:œstrœ}", + "source": "ChatInput-uBsVA", + "sourceHandle": "{œdataTypeœ: œChatInputœ, œidœ: œChatInput-uBsVAœ, œnameœ: œmessageœ, œoutput_typesœ: [œMessageœ]}", + "target": "AstraDB-3Vxgl", + "targetHandle": "{œfieldNameœ: œsearch_queryœ, œidœ: œAstraDB-3Vxglœ, œinputTypesœ: [œMessageœ], œtypeœ: œstrœ}" + }, + { + "data": { + "sourceHandle": { + "dataType": "AstraDB", + "id": "AstraDB-3Vxgl", + "name": "search_results", + "output_types": [ + "Data" + ] + }, + "targetHandle": { + "fieldName": "data", + "id": "ParseData-1lOsZ", + "inputTypes": [ + "Data" + ], + "type": "other" + } + }, + "id": "reactflow__edge-AstraDB-3Vxgl{œdataTypeœ:œAstraDBœ,œidœ:œAstraDB-3Vxglœ,œnameœ:œsearch_resultsœ,œoutput_typesœ:[œDataœ]}-ParseData-1lOsZ{œfieldNameœ:œdataœ,œidœ:œParseData-1lOsZœ,œinputTypesœ:[œDataœ],œtypeœ:œotherœ}", + "source": "AstraDB-3Vxgl", + "sourceHandle": "{œdataTypeœ: œAstraDBœ, œidœ: œAstraDB-3Vxglœ, œnameœ: œsearch_resultsœ, œoutput_typesœ: [œDataœ]}", + "target": "ParseData-1lOsZ", + "targetHandle": "{œfieldNameœ: œdataœ, œidœ: œParseData-1lOsZœ, œinputTypesœ: [œDataœ], œtypeœ: œotherœ}" } ], "nodes": [ @@ -277,7 +269,7 @@ "data": { "description": "Get chat inputs from the Playground.", "display_name": "Chat Input", - "id": "ChatInput-nFs38", + "id": "ChatInput-uBsVA", "node": { "base_classes": [ "Message" @@ -541,7 +533,7 @@ }, "dragging": false, "height": 234, - "id": "ChatInput-nFs38", + "id": "ChatInput-uBsVA", "position": { "x": 743.9745420290319, "y": 463.6977510207854 @@ -558,7 +550,7 @@ "data": { "description": "Convert Data into plain text following a specified template.", "display_name": "Parse Data", - "id": "ParseData-z1UYg", + "id": "ParseData-1lOsZ", "node": { "base_classes": [ "Message" @@ -688,7 +680,7 @@ }, "dragging": false, "height": 350, - "id": "ParseData-z1UYg", + "id": "ParseData-1lOsZ", "position": { "x": 1606.0595305373527, "y": 751.4473696960695 @@ -705,7 +697,7 @@ "data": { "description": "Create a prompt template with dynamic variables.", "display_name": "Prompt", - "id": "Prompt-EkVe9", + "id": "Prompt-o2SL4", "node": { "base_classes": [ "Message" @@ -862,7 +854,7 @@ }, "dragging": false, "height": 433, - "id": "Prompt-EkVe9", + "id": "Prompt-o2SL4", "position": { "x": 1977.9097981422992, "y": 640.5656416923846 @@ -879,7 +871,7 @@ "data": { "description": "Split text into chunks based on specified criteria.", "display_name": "Split Text", - "id": "SplitText-CmZlY", + "id": "SplitText-bByhd", "node": { "base_classes": [ "Data" @@ -1011,7 +1003,7 @@ }, "dragging": false, "height": 475, - "id": "SplitText-CmZlY", + "id": "SplitText-bByhd", "position": { "x": 1683.4543896546102, "y": 1350.7871623588553 @@ -1026,7 +1018,7 @@ }, { "data": { - "id": "note-3HOSg", + "id": "note-7AuKD", "node": { "description": "## 🐕 2. Retriever Flow\n\nThis flow answers your questions with contextual data retrieved from your vector database.\n\nOpen the **Playground** and ask, \n\n```\nWhat is this document about?\n```\n", "display_name": "", @@ -1039,7 +1031,7 @@ }, "dragging": false, "height": 324, - "id": "note-3HOSg", + "id": "note-7AuKD", "position": { "x": 374.388314931542, "y": 486.18094072679895 @@ -1059,7 +1051,7 @@ }, { "data": { - "id": "note-kP3Eg", + "id": "note-5KK6I", "node": { "description": "## 📖 README\n\nLoad your data into a vector database with the 📚 **Load Data** flow, and then use your data as chat context with the 🐕 **Retriever** flow.\n\n**🚨 Add your OpenAI API key as a global variable to easily add it to all of the OpenAI components in this flow.** \n\n**Quick start**\n1. Run the 📚 **Load Data** flow.\n2. Run the 🐕 **Retriever** flow.\n\n**Next steps** \n\n- Experiment by changing the prompt and the loaded data to see how the bot's responses change. \n\nFor more info, see the [Langflow docs](https://docs.langflow.org/starter-projects-vector-store-rag).", "display_name": "Read Me", @@ -1072,7 +1064,7 @@ }, "dragging": false, "height": 527, - "id": "note-kP3Eg", + "id": "note-5KK6I", "position": { "x": 94.28986613312418, "y": 907.6428043837066 @@ -1092,7 +1084,7 @@ }, { "data": { - "id": "OpenAIModel-Hafw2", + "id": "OpenAIModel-yjcwf", "node": { "base_classes": [ "LanguageModel", @@ -1386,8 +1378,8 @@ "type": "OpenAIModel" }, "dragging": false, - "height": 672, - "id": "OpenAIModel-Hafw2", + "height": 674, + "id": "OpenAIModel-yjcwf", "position": { "x": 2360.1432368563187, "y": 571.6712358167248 @@ -1404,7 +1396,7 @@ "data": { "description": "Display a chat message in the Playground.", "display_name": "Chat Output", - "id": "ChatOutput-L7Gy2", + "id": "ChatOutput-p7okC", "node": { "base_classes": [ "Message" @@ -1664,7 +1656,7 @@ }, "dragging": false, "height": 234, - "id": "ChatOutput-L7Gy2", + "id": "ChatOutput-p7okC", "position": { "x": 2734.385670401691, "y": 810.6079786425926 @@ -1673,13 +1665,13 @@ "x": 2734.385670401691, "y": 810.6079786425926 }, - "selected": true, + "selected": false, "type": "genericNode", "width": 320 }, { "data": { - "id": "OpenAIEmbeddings-QPlR3", + "id": "OpenAIEmbeddings-qVkNT", "node": { "base_classes": [ "Embeddings" @@ -2155,7 +2147,7 @@ }, "dragging": false, "height": 320, - "id": "OpenAIEmbeddings-QPlR3", + "id": "OpenAIEmbeddings-qVkNT", "position": { "x": 825.435626932521, "y": 739.6327999745448 @@ -2170,7 +2162,7 @@ }, { "data": { - "id": "note-GbFKO", + "id": "note-dI9D3", "node": { "description": "## 📚 1. Load Data Flow\n\nRun this first! Load data from a local file and embed it into the vector database.\n\nSelect a Database and a Collection, or create new ones. \n\nClick ▶️ **Run component** on the **Astra DB** component to load your data.\n\n* If you're using OSS Langflow, add your Astra DB Application Token to the Astra DB component.\n\n#### Next steps:\n Experiment by changing the prompt and the contextual data to see how the retrieval flow's responses change.", "display_name": "", @@ -2183,7 +2175,7 @@ }, "dragging": false, "height": 50, - "id": "note-GbFKO", + "id": "note-dI9D3", "position": { "x": 955.3277857006676, "y": 1552.171191793604 @@ -2202,7 +2194,7 @@ }, { "data": { - "id": "OpenAIEmbeddings-Jp4uj", + "id": "OpenAIEmbeddings-b229k", "node": { "base_classes": [ "Embeddings" @@ -2678,7 +2670,7 @@ }, "dragging": false, "height": 320, - "id": "OpenAIEmbeddings-Jp4uj", + "id": "OpenAIEmbeddings-b229k", "position": { "x": 1690.9220896443658, "y": 1866.483269483266 @@ -2693,7 +2685,7 @@ }, { "data": { - "id": "File-BlhWT", + "id": "File-jTMwG", "node": { "base_classes": [ "Data" @@ -2917,7 +2909,7 @@ }, "dragging": false, "height": 367, - "id": "File-BlhWT", + "id": "File-jTMwG", "position": { "x": 1318.9043936921921, "y": 1484.0151419511485 @@ -2932,7 +2924,7 @@ }, { "data": { - "id": "note-febc1", + "id": "note-V6ieF", "node": { "description": "### 💡 Add your OpenAI API key here 👇", "display_name": "", @@ -2945,7 +2937,7 @@ }, "dragging": false, "height": 324, - "id": "note-febc1", + "id": "note-V6ieF", "position": { "x": 1692.2322233423606, "y": 1821.9077961087607 @@ -2960,7 +2952,7 @@ }, { "data": { - "id": "note-NZNn3", + "id": "note-2HhzZ", "node": { "description": "### 💡 Add your OpenAI API key here 👇", "display_name": "", @@ -2973,7 +2965,7 @@ }, "dragging": false, "height": 324, - "id": "note-NZNn3", + "id": "note-2HhzZ", "position": { "x": 824.1003268813427, "y": 698.6951695764802 @@ -2988,7 +2980,7 @@ }, { "data": { - "id": "note-0QI9J", + "id": "note-eiHeg", "node": { "description": "### 💡 Add your OpenAI API key here 👇", "display_name": "", @@ -3001,7 +2993,7 @@ }, "dragging": false, "height": 324, - "id": "note-0QI9J", + "id": "note-eiHeg", "position": { "x": 2350.297636215281, "y": 525.0687902842766 @@ -3016,7 +3008,7 @@ }, { "data": { - "id": "AstraDB-KrRFj", + "id": "AstraDB-0959q", "node": { "base_classes": [ "Data" @@ -3024,9 +3016,9 @@ "beta": false, "conditional_paths": [], "custom_fields": {}, - "description": "Implementation of Vector Store using Astra DB with search capabilities", + "description": "Ingest and search documents in Astra DB", "display_name": "Astra DB", - "documentation": "https://docs.langflow.org/starter-projects-vector-store-rag", + "documentation": "https://docs.datastax.com/en/langflow/astra-components.html", "edited": false, "field_order": [ "token", @@ -3036,8 +3028,8 @@ "keyspace", "embedding_choice", "embedding_model", + "search_query", "ingest_data", - "search_input", "number_of_results", "search_type", "search_score_threshold", @@ -3049,7 +3041,6 @@ "frozen": false, "icon": "AstraDB", "legacy": false, - "lf_version": "1.1.1", "metadata": {}, "output_types": [], "outputs": [ @@ -3092,24 +3083,26 @@ "value": {} }, "api_endpoint": { - "_input_type": "SecretStrInput", + "_input_type": "DropdownInput", "advanced": false, - "display_name": "API Endpoint", + "combobox": false, + "display_name": "Database", "dynamic": false, - "info": "API endpoint URL for the Astra DB service.", - "input_types": [ - "Message" - ], - "load_from_db": true, + "info": "The Astra DB Database to use.", "name": "api_endpoint", - "password": true, + "options": [ + "Default database" + ], "placeholder": "", "real_time_refresh": true, + "refresh_button": true, "required": true, "show": true, "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, "type": "str", - "value": "ASTRA_DB_API_ENDPOINT" + "value": "Default database" }, "astradb_vectorstore_kwargs": { "_input_type": "NestedDictInput", @@ -3144,7 +3137,7 @@ "show": true, "title_case": false, "type": "code", - "value": "import os\nfrom collections import defaultdict\n\nfrom astrapy import DataAPIClient\nfrom astrapy.admin import parse_api_endpoint\nfrom langchain_astradb import AstraDBVectorStore\n\nfrom langflow.base.vectorstores.model import LCVectorStoreComponent, check_cached_vector_store\nfrom langflow.helpers import docs_to_data\nfrom langflow.inputs import DictInput, FloatInput, MessageTextInput, NestedDictInput\nfrom langflow.io import (\n BoolInput,\n DataInput,\n DropdownInput,\n HandleInput,\n IntInput,\n MultilineInput,\n SecretStrInput,\n StrInput,\n)\nfrom langflow.schema import Data\nfrom langflow.utils.version import get_version_info\n\n\nclass AstraDBVectorStoreComponent(LCVectorStoreComponent):\n display_name: str = \"Astra DB\"\n description: str = \"Implementation of Vector Store using Astra DB with search capabilities\"\n documentation: str = \"https://docs.langflow.org/starter-projects-vector-store-rag\"\n name = \"AstraDB\"\n icon: str = \"AstraDB\"\n\n _cached_vector_store: AstraDBVectorStore | None = None\n\n VECTORIZE_PROVIDERS_MAPPING = defaultdict(\n list,\n {\n \"Azure OpenAI\": [\n \"azureOpenAI\",\n [\"text-embedding-3-small\", \"text-embedding-3-large\", \"text-embedding-ada-002\"],\n ],\n \"Hugging Face - Dedicated\": [\"huggingfaceDedicated\", [\"endpoint-defined-model\"]],\n \"Hugging Face - Serverless\": [\n \"huggingface\",\n [\n \"sentence-transformers/all-MiniLM-L6-v2\",\n \"intfloat/multilingual-e5-large\",\n \"intfloat/multilingual-e5-large-instruct\",\n \"BAAI/bge-small-en-v1.5\",\n \"BAAI/bge-base-en-v1.5\",\n \"BAAI/bge-large-en-v1.5\",\n ],\n ],\n \"Jina AI\": [\n \"jinaAI\",\n [\n \"jina-embeddings-v2-base-en\",\n \"jina-embeddings-v2-base-de\",\n \"jina-embeddings-v2-base-es\",\n \"jina-embeddings-v2-base-code\",\n \"jina-embeddings-v2-base-zh\",\n ],\n ],\n \"Mistral AI\": [\"mistral\", [\"mistral-embed\"]],\n \"Nvidia\": [\"nvidia\", [\"NV-Embed-QA\"]],\n \"OpenAI\": [\"openai\", [\"text-embedding-3-small\", \"text-embedding-3-large\", \"text-embedding-ada-002\"]],\n \"Upstage\": [\"upstageAI\", [\"solar-embedding-1-large\"]],\n \"Voyage AI\": [\n \"voyageAI\",\n [\"voyage-large-2-instruct\", \"voyage-law-2\", \"voyage-code-2\", \"voyage-large-2\", \"voyage-2\"],\n ],\n },\n )\n\n inputs = [\n SecretStrInput(\n name=\"token\",\n display_name=\"Astra DB Application Token\",\n info=\"Authentication token for accessing Astra DB.\",\n value=\"ASTRA_DB_APPLICATION_TOKEN\",\n required=True,\n advanced=os.getenv(\"ASTRA_ENHANCED\", \"false\").lower() == \"true\",\n real_time_refresh=True,\n ),\n SecretStrInput(\n name=\"api_endpoint\",\n display_name=\"Database\" if os.getenv(\"ASTRA_ENHANCED\", \"false\").lower() == \"true\" else \"API Endpoint\",\n info=\"API endpoint URL for the Astra DB service.\",\n value=\"ASTRA_DB_API_ENDPOINT\",\n required=True,\n real_time_refresh=True,\n ),\n DropdownInput(\n name=\"collection_name\",\n display_name=\"Collection\",\n info=\"The name of the collection within Astra DB where the vectors will be stored.\",\n required=True,\n refresh_button=True,\n real_time_refresh=True,\n options=[\"+ Create new collection\"],\n value=\"+ Create new collection\",\n ),\n StrInput(\n name=\"collection_name_new\",\n display_name=\"Collection Name\",\n info=\"Name of the new collection to create.\",\n advanced=os.getenv(\"LANGFLOW_HOST\") is not None,\n required=os.getenv(\"LANGFLOW_HOST\") is None,\n ),\n StrInput(\n name=\"keyspace\",\n display_name=\"Keyspace\",\n info=\"Optional keyspace within Astra DB to use for the collection.\",\n advanced=True,\n ),\n DropdownInput(\n name=\"embedding_choice\",\n display_name=\"Embedding Model or Astra Vectorize\",\n info=\"Determines whether to use Astra Vectorize for the collection.\",\n options=[\"Embedding Model\", \"Astra Vectorize\"],\n real_time_refresh=True,\n value=\"Embedding Model\",\n ),\n HandleInput(\n name=\"embedding_model\",\n display_name=\"Embedding Model\",\n input_types=[\"Embeddings\"],\n info=\"Allows an embedding model configuration.\",\n ),\n DataInput(\n name=\"ingest_data\",\n display_name=\"Ingest Data\",\n ),\n MultilineInput(\n name=\"search_input\",\n display_name=\"Search Query\",\n tool_mode=True,\n ),\n IntInput(\n name=\"number_of_results\",\n display_name=\"Number of Search Results\",\n info=\"Number of search results to return.\",\n advanced=True,\n value=4,\n ),\n DropdownInput(\n name=\"search_type\",\n display_name=\"Search Type\",\n info=\"Search type to use\",\n options=[\"Similarity\", \"Similarity with score threshold\", \"MMR (Max Marginal Relevance)\"],\n value=\"Similarity\",\n advanced=True,\n ),\n FloatInput(\n name=\"search_score_threshold\",\n display_name=\"Search Score Threshold\",\n info=\"Minimum similarity score threshold for search results. \"\n \"(when using 'Similarity with score threshold')\",\n value=0,\n advanced=True,\n ),\n NestedDictInput(\n name=\"advanced_search_filter\",\n display_name=\"Search Metadata Filter\",\n info=\"Optional dictionary of filters to apply to the search query.\",\n advanced=True,\n ),\n StrInput(\n name=\"content_field\",\n display_name=\"Content Field\",\n info=\"Field to use as the text content field for the vector store.\",\n advanced=True,\n ),\n BoolInput(\n name=\"ignore_invalid_documents\",\n display_name=\"Ignore Invalid Documents\",\n info=\"Boolean flag to determine whether to ignore invalid documents at runtime.\",\n advanced=True,\n ),\n NestedDictInput(\n name=\"astradb_vectorstore_kwargs\",\n display_name=\"AstraDBVectorStore Parameters\",\n info=\"Optional dictionary of additional parameters for the AstraDBVectorStore.\",\n advanced=True,\n ),\n ]\n\n def del_fields(self, build_config, field_list):\n for field in field_list:\n if field in build_config:\n del build_config[field]\n\n return build_config\n\n def insert_in_dict(self, build_config, field_name, new_parameters):\n # Insert the new key-value pair after the found key\n for new_field_name, new_parameter in new_parameters.items():\n # Get all the items as a list of tuples (key, value)\n items = list(build_config.items())\n\n # Find the index of the key to insert after\n idx = len(items)\n for i, (key, _) in enumerate(items):\n if key == field_name:\n idx = i + 1\n break\n\n items.insert(idx, (new_field_name, new_parameter))\n\n # Clear the original dictionary and update with the modified items\n build_config.clear()\n build_config.update(items)\n\n return build_config\n\n def update_providers_mapping(self):\n # If we don't have token or api_endpoint, we can't fetch the list of providers\n if not self.token or not self.api_endpoint:\n self.log(\"Astra DB token and API endpoint are required to fetch the list of Vectorize providers.\")\n\n return self.VECTORIZE_PROVIDERS_MAPPING\n\n try:\n self.log(\"Dynamically updating list of Vectorize providers.\")\n\n # Get the admin object\n client = DataAPIClient(token=self.token)\n admin = client.get_admin()\n\n # Get the embedding providers\n db_admin = admin.get_database_admin(self.api_endpoint)\n embedding_providers = db_admin.find_embedding_providers().as_dict()\n\n vectorize_providers_mapping = {}\n\n # Map the provider display name to the provider key and models\n for provider_key, provider_data in embedding_providers[\"embeddingProviders\"].items():\n display_name = provider_data[\"displayName\"]\n models = [model[\"name\"] for model in provider_data[\"models\"]]\n\n vectorize_providers_mapping[display_name] = [provider_key, models]\n\n # Sort the resulting dictionary\n return defaultdict(list, dict(sorted(vectorize_providers_mapping.items())))\n except Exception as e: # noqa: BLE001\n self.log(f\"Error fetching Vectorize providers: {e}\")\n\n return self.VECTORIZE_PROVIDERS_MAPPING\n\n def get_database(self):\n try:\n client = DataAPIClient(token=self.token)\n\n return client.get_database(\n self.api_endpoint,\n token=self.token,\n )\n except Exception as e: # noqa: BLE001\n self.log(f\"Error getting database: {e}\")\n\n return None\n\n def _initialize_collection_options(self):\n database = self.get_database()\n if database is None:\n return [\"+ Create new collection\"]\n\n try:\n collections = [collection.name for collection in database.list_collections()]\n except Exception as e: # noqa: BLE001\n self.log(f\"Error fetching collections: {e}\")\n\n return [\"+ Create new collection\"]\n\n return [*collections, \"+ Create new collection\"]\n\n def get_collection_choice(self):\n collection_name = self.collection_name\n if collection_name == \"+ Create new collection\":\n return self.collection_name_new\n\n return collection_name\n\n def get_collection_options(self):\n # Only get the options if the collection exists\n database = self.get_database()\n if database is None:\n return None\n\n collection_name = self.get_collection_choice()\n\n try:\n collection = database.get_collection(collection_name)\n collection_options = collection.options()\n except Exception as _: # noqa: BLE001\n return None\n\n return collection_options.vector\n\n def update_build_config(self, build_config: dict, field_value: str, field_name: str | None = None):\n # Refresh the collection name options\n build_config[\"collection_name\"][\"options\"] = self._initialize_collection_options()\n\n # Update the choice of embedding model based on collection name\n if field_name == \"collection_name\":\n # Detect if it is a new collection\n is_new_collection = field_value == \"+ Create new collection\"\n\n # Set the advanced and required fields based on the collection choice\n build_config[\"embedding_choice\"].update(\n {\n \"advanced\": not is_new_collection,\n \"value\": \"Embedding Model\" if is_new_collection else build_config[\"embedding_choice\"].get(\"value\"),\n }\n )\n\n # Set the advanced field for the embedding model\n build_config[\"embedding_model\"][\"advanced\"] = not is_new_collection\n\n # Set the advanced and required fields for the new collection name\n build_config[\"collection_name_new\"].update(\n {\n \"advanced\": not is_new_collection,\n \"required\": is_new_collection,\n \"value\": \"\" if not is_new_collection else build_config[\"collection_name_new\"].get(\"value\"),\n }\n )\n\n # Get the collection options for the selected collection\n collection_options = self.get_collection_options()\n\n # If the collection options are available (DB exists), show the advanced options\n if collection_options:\n build_config[\"embedding_choice\"][\"advanced\"] = True\n\n if collection_options.service:\n # Remove unnecessary fields when a service is set\n self.del_fields(\n build_config,\n [\n \"embedding_provider\",\n \"model\",\n \"z_01_model_parameters\",\n \"z_02_api_key_name\",\n \"z_03_provider_api_key\",\n \"z_04_authentication\",\n ],\n )\n\n # Update the providers mapping\n updates = {\n \"embedding_model\": {\"advanced\": True},\n \"embedding_choice\": {\"value\": \"Astra Vectorize\"},\n }\n else:\n # Update the providers mapping\n updates = {\n \"embedding_model\": {\"advanced\": False},\n \"embedding_provider\": {\"advanced\": False},\n \"embedding_choice\": {\"value\": \"Embedding Model\"},\n }\n\n # Apply updates to the build_config\n for key, value in updates.items():\n build_config[key].update(value)\n\n elif field_name == \"embedding_choice\":\n if field_value == \"Astra Vectorize\":\n build_config[\"embedding_model\"][\"advanced\"] = True\n\n # Update the providers mapping\n vectorize_providers = self.update_providers_mapping()\n\n new_parameter = DropdownInput(\n name=\"embedding_provider\",\n display_name=\"Embedding Provider\",\n options=vectorize_providers.keys(),\n value=\"\",\n required=True,\n real_time_refresh=True,\n ).to_dict()\n\n self.insert_in_dict(build_config, \"embedding_choice\", {\"embedding_provider\": new_parameter})\n else:\n build_config[\"embedding_model\"][\"advanced\"] = False\n\n self.del_fields(\n build_config,\n [\n \"embedding_provider\",\n \"model\",\n \"z_01_model_parameters\",\n \"z_02_api_key_name\",\n \"z_03_provider_api_key\",\n \"z_04_authentication\",\n ],\n )\n\n elif field_name == \"embedding_provider\":\n self.del_fields(\n build_config,\n [\"model\", \"z_01_model_parameters\", \"z_02_api_key_name\", \"z_03_provider_api_key\", \"z_04_authentication\"],\n )\n\n # Update the providers mapping\n vectorize_providers = self.update_providers_mapping()\n model_options = vectorize_providers[field_value][1]\n\n new_parameter = DropdownInput(\n name=\"model\",\n display_name=\"Model\",\n info=\"The embedding model to use for the selected provider. Each provider has a different set of \"\n \"models available (full list at \"\n \"https://docs.datastax.com/en/astra-db-serverless/databases/embedding-generation.html):\\n\\n\"\n f\"{', '.join(model_options)}\",\n options=model_options,\n value=None,\n required=True,\n real_time_refresh=True,\n ).to_dict()\n\n self.insert_in_dict(build_config, \"embedding_provider\", {\"model\": new_parameter})\n\n elif field_name == \"model\":\n self.del_fields(\n build_config,\n [\"z_01_model_parameters\", \"z_02_api_key_name\", \"z_03_provider_api_key\", \"z_04_authentication\"],\n )\n\n new_parameter_1 = DictInput(\n name=\"z_01_model_parameters\",\n display_name=\"Model Parameters\",\n list=True,\n ).to_dict()\n\n new_parameter_2 = MessageTextInput(\n name=\"z_02_api_key_name\",\n display_name=\"API Key Name\",\n info=\"The name of the embeddings provider API key stored on Astra. \"\n \"If set, it will override the 'ProviderKey' in the authentication parameters.\",\n ).to_dict()\n\n new_parameter_3 = SecretStrInput(\n load_from_db=False,\n name=\"z_03_provider_api_key\",\n display_name=\"Provider API Key\",\n info=\"An alternative to the Astra Authentication that passes an API key for the provider \"\n \"with each request to Astra DB. \"\n \"This may be used when Vectorize is configured for the collection, \"\n \"but no corresponding provider secret is stored within Astra's key management system.\",\n ).to_dict()\n\n new_parameter_4 = DictInput(\n name=\"z_04_authentication\",\n display_name=\"Authentication Parameters\",\n list=True,\n ).to_dict()\n\n self.insert_in_dict(\n build_config,\n \"model\",\n {\n \"z_01_model_parameters\": new_parameter_1,\n \"z_02_api_key_name\": new_parameter_2,\n \"z_03_provider_api_key\": new_parameter_3,\n \"z_04_authentication\": new_parameter_4,\n },\n )\n\n return build_config\n\n def build_vectorize_options(self, **kwargs):\n for attribute in [\n \"embedding_provider\",\n \"model\",\n \"z_01_model_parameters\",\n \"z_02_api_key_name\",\n \"z_03_provider_api_key\",\n \"z_04_authentication\",\n ]:\n if not hasattr(self, attribute):\n setattr(self, attribute, None)\n\n # Fetch values from kwargs if any self.* attributes are None\n provider_mapping = self.update_providers_mapping()\n provider_value = provider_mapping.get(self.embedding_provider, [None])[0] or kwargs.get(\"embedding_provider\")\n model_name = self.model or kwargs.get(\"model\")\n authentication = {**(self.z_04_authentication or {}), **kwargs.get(\"z_04_authentication\", {})}\n parameters = self.z_01_model_parameters or kwargs.get(\"z_01_model_parameters\", {})\n\n # Set the API key name if provided\n api_key_name = self.z_02_api_key_name or kwargs.get(\"z_02_api_key_name\")\n provider_key = self.z_03_provider_api_key or kwargs.get(\"z_03_provider_api_key\")\n if api_key_name:\n authentication[\"providerKey\"] = api_key_name\n if authentication:\n provider_key = None\n authentication[\"providerKey\"] = authentication[\"providerKey\"].split(\".\")[0]\n\n # Set authentication and parameters to None if no values are provided\n if not authentication:\n authentication = None\n if not parameters:\n parameters = None\n\n return {\n # must match astrapy.info.CollectionVectorServiceOptions\n \"collection_vector_service_options\": {\n \"provider\": provider_value,\n \"modelName\": model_name,\n \"authentication\": authentication,\n \"parameters\": parameters,\n },\n \"collection_embedding_api_key\": provider_key,\n }\n\n @check_cached_vector_store\n def build_vector_store(self, vectorize_options=None):\n try:\n from langchain_astradb import AstraDBVectorStore\n except ImportError as e:\n msg = (\n \"Could not import langchain Astra DB integration package. \"\n \"Please install it with `pip install langchain-astradb`.\"\n )\n raise ImportError(msg) from e\n\n # Initialize parameters based on the collection name\n is_new_collection = self.collection_name == \"+ Create new collection\"\n\n # Get the embedding model\n embedding_params = {\"embedding\": self.embedding_model} if self.embedding_choice == \"Embedding Model\" else {}\n\n # Use the embedding model if the choice is set to \"Embedding Model\"\n if self.embedding_choice == \"Astra Vectorize\" and is_new_collection:\n from astrapy.info import CollectionVectorServiceOptions\n\n # Build the vectorize options dictionary\n dict_options = vectorize_options or self.build_vectorize_options(\n embedding_provider=getattr(self, \"embedding_provider\", None) or None,\n model=getattr(self, \"model\", None) or None,\n z_01_model_parameters=getattr(self, \"z_01_model_parameters\", None) or None,\n z_02_api_key_name=getattr(self, \"z_02_api_key_name\", None) or None,\n z_03_provider_api_key=getattr(self, \"z_03_provider_api_key\", None) or None,\n z_04_authentication=getattr(self, \"z_04_authentication\", {}) or {},\n )\n\n # Set the embedding dictionary\n embedding_params = {\n \"collection_vector_service_options\": CollectionVectorServiceOptions.from_dict(\n dict_options.get(\"collection_vector_service_options\")\n ),\n \"collection_embedding_api_key\": dict_options.get(\"collection_embedding_api_key\"),\n }\n\n # Get the running environment for Langflow\n environment = (\n parse_api_endpoint(getattr(self, \"api_endpoint\", None)).environment\n if getattr(self, \"api_endpoint\", None)\n else None\n )\n\n # Get Langflow version and platform information\n __version__ = get_version_info()[\"version\"]\n langflow_prefix = \"\"\n if os.getenv(\"LANGFLOW_HOST\") is not None:\n langflow_prefix = \"ds-\"\n\n # Bundle up the auto-detect parameters\n autodetect_params = {\n \"autodetect_collection\": not is_new_collection, # TODO: May want to expose this option\n \"content_field\": self.content_field or None,\n \"ignore_invalid_documents\": self.ignore_invalid_documents,\n }\n\n # Attempt to build the Vector Store object\n try:\n vector_store = AstraDBVectorStore(\n # Astra DB Authentication Parameters\n token=self.token,\n api_endpoint=self.api_endpoint,\n namespace=self.keyspace or None,\n collection_name=self.get_collection_choice(),\n environment=environment,\n # Astra DB Usage Tracking Parameters\n ext_callers=[(f\"{langflow_prefix}langflow\", __version__)],\n # Astra DB Vector Store Parameters\n **autodetect_params,\n **embedding_params,\n **self.astradb_vectorstore_kwargs,\n )\n except Exception as e:\n msg = f\"Error initializing AstraDBVectorStore: {e}\"\n raise ValueError(msg) from e\n\n self._add_documents_to_vector_store(vector_store)\n\n return vector_store\n\n def _add_documents_to_vector_store(self, vector_store) -> None:\n documents = []\n for _input in self.ingest_data or []:\n if isinstance(_input, Data):\n documents.append(_input.to_lc_document())\n else:\n msg = \"Vector Store Inputs must be Data objects.\"\n raise TypeError(msg)\n\n if documents:\n self.log(f\"Adding {len(documents)} documents to the Vector Store.\")\n try:\n vector_store.add_documents(documents)\n except Exception as e:\n msg = f\"Error adding documents to AstraDBVectorStore: {e}\"\n raise ValueError(msg) from e\n else:\n self.log(\"No documents to add to the Vector Store.\")\n\n def _map_search_type(self) -> str:\n if self.search_type == \"Similarity with score threshold\":\n return \"similarity_score_threshold\"\n if self.search_type == \"MMR (Max Marginal Relevance)\":\n return \"mmr\"\n return \"similarity\"\n\n def _build_search_args(self):\n query = self.search_input if isinstance(self.search_input, str) and self.search_input.strip() else None\n\n if query:\n args = {\n \"query\": query,\n \"search_type\": self._map_search_type(),\n \"k\": self.number_of_results,\n \"score_threshold\": self.search_score_threshold,\n }\n elif self.advanced_search_filter:\n args = {\n \"n\": self.number_of_results,\n }\n else:\n return {}\n\n filter_arg = self.advanced_search_filter or {}\n if filter_arg:\n args[\"filter\"] = filter_arg\n\n return args\n\n def search_documents(self, vector_store=None) -> list[Data]:\n vector_store = vector_store or self.build_vector_store()\n\n self.log(f\"Search input: {self.search_input}\")\n self.log(f\"Search type: {self.search_type}\")\n self.log(f\"Number of results: {self.number_of_results}\")\n\n try:\n search_args = self._build_search_args()\n except Exception as e:\n msg = f\"Error in AstraDBVectorStore._build_search_args: {e}\"\n raise ValueError(msg) from e\n\n if not search_args:\n self.log(\"No search input or filters provided. Skipping search.\")\n return []\n\n docs = []\n search_method = \"search\" if \"query\" in search_args else \"metadata_search\"\n\n try:\n self.log(f\"Calling vector_store.{search_method} with args: {search_args}\")\n docs = getattr(vector_store, search_method)(**search_args)\n except Exception as e:\n msg = f\"Error performing {search_method} in AstraDBVectorStore: {e}\"\n raise ValueError(msg) from e\n\n self.log(f\"Retrieved documents: {len(docs)}\")\n\n data = docs_to_data(docs)\n self.log(f\"Converted documents to data: {len(data)}\")\n self.status = data\n return data\n\n def get_retriever_kwargs(self):\n search_args = self._build_search_args()\n return {\n \"search_type\": self._map_search_type(),\n \"search_kwargs\": search_args,\n }\n" + "value": "import os\nfrom collections import defaultdict\n\nfrom astrapy import AstraDBAdmin, DataAPIClient\nfrom astrapy.admin import parse_api_endpoint\nfrom langchain_astradb import AstraDBVectorStore\n\nfrom langflow.base.vectorstores.model import LCVectorStoreComponent, check_cached_vector_store\nfrom langflow.helpers import docs_to_data\nfrom langflow.inputs import DictInput, FloatInput, MessageTextInput, NestedDictInput\nfrom langflow.io import (\n BoolInput,\n DataInput,\n DropdownInput,\n HandleInput,\n IntInput,\n SecretStrInput,\n StrInput,\n)\nfrom langflow.schema import Data\nfrom langflow.utils.version import get_version_info\n\n\nclass AstraDBVectorStoreComponent(LCVectorStoreComponent):\n display_name: str = \"Astra DB\"\n description: str = \"Ingest and search documents in Astra DB\"\n documentation: str = \"https://docs.datastax.com/en/langflow/astra-components.html\"\n name = \"AstraDB\"\n icon: str = \"AstraDB\"\n\n _cached_vector_store: AstraDBVectorStore | None = None\n\n base_inputs = LCVectorStoreComponent.inputs\n if \"search_query\" not in [input_.name for input_ in base_inputs]:\n base_inputs.append(\n MessageTextInput(\n name=\"search_query\",\n display_name=\"Search Query\",\n tool_mode=True,\n )\n )\n if \"ingest_data\" not in [input_.name for input_ in base_inputs]:\n base_inputs.append(\n DataInput(\n name=\"ingest_data\",\n display_name=\"Ingest Data\",\n )\n )\n\n inputs = [\n SecretStrInput(\n name=\"token\",\n display_name=\"Astra DB Application Token\",\n info=\"Authentication token for accessing Astra DB.\",\n value=\"ASTRA_DB_APPLICATION_TOKEN\",\n required=True,\n advanced=os.getenv(\"ASTRA_ENHANCED\", \"false\").lower() == \"true\",\n real_time_refresh=True,\n ),\n DropdownInput(\n name=\"api_endpoint\",\n display_name=\"Database\",\n info=\"The Astra DB Database to use.\",\n required=True,\n refresh_button=True,\n real_time_refresh=True,\n options=[\"Default database\"],\n value=\"Default database\",\n ),\n DropdownInput(\n name=\"collection_name\",\n display_name=\"Collection\",\n info=\"The name of the collection within Astra DB where the vectors will be stored.\",\n required=True,\n refresh_button=True,\n real_time_refresh=True,\n options=[\"+ Create new collection\"],\n value=\"+ Create new collection\",\n ),\n StrInput(\n name=\"collection_name_new\",\n display_name=\"Collection Name\",\n info=\"Name of the new collection to create.\",\n advanced=os.getenv(\"LANGFLOW_HOST\") is not None,\n required=os.getenv(\"LANGFLOW_HOST\") is None,\n ),\n StrInput(\n name=\"keyspace\",\n display_name=\"Keyspace\",\n info=\"Optional keyspace within Astra DB to use for the collection.\",\n advanced=True,\n ),\n DropdownInput(\n name=\"embedding_choice\",\n display_name=\"Embedding Model or Astra Vectorize\",\n info=\"Determines whether to use Astra Vectorize for the collection.\",\n options=[\"Embedding Model\", \"Astra Vectorize\"],\n real_time_refresh=True,\n value=\"Embedding Model\",\n ),\n HandleInput(\n name=\"embedding_model\",\n display_name=\"Embedding Model\",\n input_types=[\"Embeddings\"],\n info=\"Allows an embedding model configuration.\",\n ),\n *base_inputs,\n IntInput(\n name=\"number_of_results\",\n display_name=\"Number of Search Results\",\n info=\"Number of search results to return.\",\n advanced=True,\n value=4,\n ),\n DropdownInput(\n name=\"search_type\",\n display_name=\"Search Type\",\n info=\"Search type to use\",\n options=[\"Similarity\", \"Similarity with score threshold\", \"MMR (Max Marginal Relevance)\"],\n value=\"Similarity\",\n advanced=True,\n ),\n FloatInput(\n name=\"search_score_threshold\",\n display_name=\"Search Score Threshold\",\n info=\"Minimum similarity score threshold for search results. \"\n \"(when using 'Similarity with score threshold')\",\n value=0,\n advanced=True,\n ),\n NestedDictInput(\n name=\"advanced_search_filter\",\n display_name=\"Search Metadata Filter\",\n info=\"Optional dictionary of filters to apply to the search query.\",\n advanced=True,\n ),\n StrInput(\n name=\"content_field\",\n display_name=\"Content Field\",\n info=\"Field to use as the text content field for the vector store.\",\n advanced=True,\n ),\n BoolInput(\n name=\"ignore_invalid_documents\",\n display_name=\"Ignore Invalid Documents\",\n info=\"Boolean flag to determine whether to ignore invalid documents at runtime.\",\n advanced=True,\n ),\n NestedDictInput(\n name=\"astradb_vectorstore_kwargs\",\n display_name=\"AstraDBVectorStore Parameters\",\n info=\"Optional dictionary of additional parameters for the AstraDBVectorStore.\",\n advanced=True,\n ),\n ]\n\n def del_fields(self, build_config, field_list):\n for field in field_list:\n if field in build_config:\n del build_config[field]\n\n return build_config\n\n def insert_in_dict(self, build_config, field_name, new_parameters):\n # Insert the new key-value pair after the found key\n for new_field_name, new_parameter in new_parameters.items():\n # Get all the items as a list of tuples (key, value)\n items = list(build_config.items())\n\n # Find the index of the key to insert after\n idx = len(items)\n for i, (key, _) in enumerate(items):\n if key == field_name:\n idx = i + 1\n break\n\n items.insert(idx, (new_field_name, new_parameter))\n\n # Clear the original dictionary and update with the modified items\n build_config.clear()\n build_config.update(items)\n\n return build_config\n\n def get_vectorize_providers(self):\n try:\n self.log(\"Dynamically updating list of Vectorize providers.\")\n\n # Get the admin object\n admin = AstraDBAdmin(token=self.token)\n db_admin = admin.get_database_admin(self.get_api_endpoint())\n\n # Get the list of embedding providers\n embedding_providers = db_admin.find_embedding_providers().as_dict()\n\n vectorize_providers_mapping = {}\n # Map the provider display name to the provider key and models\n for provider_key, provider_data in embedding_providers[\"embeddingProviders\"].items():\n display_name = provider_data[\"displayName\"]\n models = [model[\"name\"] for model in provider_data[\"models\"]]\n\n vectorize_providers_mapping[display_name] = [provider_key, models]\n\n # Sort the resulting dictionary\n return defaultdict(list, dict(sorted(vectorize_providers_mapping.items())))\n except Exception as e: # noqa: BLE001\n self.log(f\"Error fetching Vectorize providers: {e}\")\n\n return {}\n\n def get_database_list(self):\n # Get the admin object\n db_admin = AstraDBAdmin(token=self.token)\n db_list = list(db_admin.list_databases())\n\n # Generate the api endpoint for each database\n return {db.info.name: f\"https://{db.info.id}-{db.info.region}.apps.astra.datastax.com\" for db in db_list}\n\n def get_api_endpoint(self):\n # Get the database name (or endpoint)\n database = self.api_endpoint\n\n # If the database is not set, get the first database in the list\n if not database or database == \"Default database\":\n database, _ = next(iter(self.get_database_list().items()))\n\n # If the database is a URL, return it\n if database.startswith(\"https://\"):\n return database\n\n # Otherwise, get the URL from the database list\n return self.get_database_list().get(database)\n\n def get_database(self):\n try:\n client = DataAPIClient(token=self.token)\n\n return client.get_database(\n api_endpoint=self.get_api_endpoint(),\n token=self.token,\n )\n except Exception as e: # noqa: BLE001\n self.log(f\"Error getting database: {e}\")\n\n return None\n\n def _initialize_database_options(self):\n if not self.token:\n return [\"Default database\"]\n try:\n databases = [\"Default database\", *list(self.get_database_list().keys())]\n except Exception as e: # noqa: BLE001\n self.log(f\"Error fetching databases: {e}\")\n\n return [\"Default database\"]\n\n return databases\n\n def _initialize_collection_options(self):\n database = self.get_database()\n if database is None:\n return [\"+ Create new collection\"]\n\n try:\n collections = [collection.name for collection in database.list_collections(keyspace=self.keyspace or None)]\n except Exception as e: # noqa: BLE001\n self.log(f\"Error fetching collections: {e}\")\n\n return [\"+ Create new collection\"]\n\n return [*collections, \"+ Create new collection\"]\n\n def get_collection_choice(self):\n collection_name = self.collection_name\n if collection_name == \"+ Create new collection\":\n return self.collection_name_new\n\n return collection_name\n\n def get_collection_options(self):\n # Only get the options if the collection exists\n database = self.get_database()\n if database is None:\n return None\n\n collection_name = self.get_collection_choice()\n\n try:\n collection = database.get_collection(collection_name, keyspace=self.keyspace or None)\n collection_options = collection.options()\n except Exception as _: # noqa: BLE001\n return None\n\n return collection_options.vector\n\n def update_build_config(self, build_config: dict, field_value: str, field_name: str | None = None):\n # Always attempt to update the database list\n if field_name in [\"token\", \"api_endpoint\", \"collection_name\"]:\n # Update the database selector\n build_config[\"api_endpoint\"][\"options\"] = self._initialize_database_options()\n\n # Set the default API endpoint if not set\n if build_config[\"api_endpoint\"][\"value\"] == \"Default database\":\n build_config[\"api_endpoint\"][\"value\"] = build_config[\"api_endpoint\"][\"options\"][0]\n\n # Update the collection selector\n build_config[\"collection_name\"][\"options\"] = self._initialize_collection_options()\n\n # Update the choice of embedding model based on collection name\n if field_name == \"collection_name\":\n # Detect if it is a new collection\n is_new_collection = field_value == \"+ Create new collection\"\n\n # Set the advanced and required fields based on the collection choice\n build_config[\"embedding_choice\"].update(\n {\n \"advanced\": not is_new_collection,\n \"value\": \"Embedding Model\" if is_new_collection else build_config[\"embedding_choice\"].get(\"value\"),\n }\n )\n\n # Set the advanced field for the embedding model\n build_config[\"embedding_model\"][\"advanced\"] = not is_new_collection\n\n # Set the advanced and required fields for the new collection name\n build_config[\"collection_name_new\"].update(\n {\n \"advanced\": not is_new_collection,\n \"required\": is_new_collection,\n \"value\": \"\" if not is_new_collection else build_config[\"collection_name_new\"].get(\"value\"),\n }\n )\n\n # Get the collection options for the selected collection\n collection_options = self.get_collection_options()\n\n # If the collection options are available (DB exists), show the advanced options\n if collection_options:\n build_config[\"embedding_choice\"][\"advanced\"] = True\n\n if collection_options.service:\n # Remove unnecessary fields when a service is set\n self.del_fields(\n build_config,\n [\n \"embedding_provider\",\n \"model\",\n \"z_01_model_parameters\",\n \"z_02_api_key_name\",\n \"z_03_provider_api_key\",\n \"z_04_authentication\",\n ],\n )\n\n # Update the providers mapping\n updates = {\n \"embedding_model\": {\"advanced\": True},\n \"embedding_choice\": {\"value\": \"Astra Vectorize\"},\n }\n else:\n # Update the providers mapping\n updates = {\n \"embedding_model\": {\"advanced\": False},\n \"embedding_provider\": {\"advanced\": False},\n \"embedding_choice\": {\"value\": \"Embedding Model\"},\n }\n\n # Apply updates to the build_config\n for key, value in updates.items():\n build_config[key].update(value)\n\n elif field_name == \"embedding_choice\":\n if field_value == \"Astra Vectorize\":\n build_config[\"embedding_model\"][\"advanced\"] = True\n\n # Update the providers mapping\n vectorize_providers = self.get_vectorize_providers()\n\n new_parameter = DropdownInput(\n name=\"embedding_provider\",\n display_name=\"Embedding Provider\",\n options=vectorize_providers.keys(),\n value=\"\",\n required=True,\n real_time_refresh=True,\n ).to_dict()\n\n self.insert_in_dict(build_config, \"embedding_choice\", {\"embedding_provider\": new_parameter})\n else:\n build_config[\"embedding_model\"][\"advanced\"] = False\n\n self.del_fields(\n build_config,\n [\n \"embedding_provider\",\n \"model\",\n \"z_01_model_parameters\",\n \"z_02_api_key_name\",\n \"z_03_provider_api_key\",\n \"z_04_authentication\",\n ],\n )\n\n elif field_name == \"embedding_provider\":\n self.del_fields(\n build_config,\n [\"model\", \"z_01_model_parameters\", \"z_02_api_key_name\", \"z_03_provider_api_key\", \"z_04_authentication\"],\n )\n\n # Update the providers mapping\n vectorize_providers = self.get_vectorize_providers()\n model_options = vectorize_providers[field_value][1]\n\n new_parameter = DropdownInput(\n name=\"model\",\n display_name=\"Model\",\n info=\"The embedding model to use for the selected provider. Each provider has a different set of \"\n \"models available (full list at \"\n \"https://docs.datastax.com/en/astra-db-serverless/databases/embedding-generation.html):\\n\\n\"\n f\"{', '.join(model_options)}\",\n options=model_options,\n value=None,\n required=True,\n real_time_refresh=True,\n ).to_dict()\n\n self.insert_in_dict(build_config, \"embedding_provider\", {\"model\": new_parameter})\n\n elif field_name == \"model\":\n self.del_fields(\n build_config,\n [\"z_01_model_parameters\", \"z_02_api_key_name\", \"z_03_provider_api_key\", \"z_04_authentication\"],\n )\n\n new_parameter_1 = DictInput(\n name=\"z_01_model_parameters\",\n display_name=\"Model Parameters\",\n list=True,\n ).to_dict()\n\n new_parameter_2 = MessageTextInput(\n name=\"z_02_api_key_name\",\n display_name=\"API Key Name\",\n info=\"The name of the embeddings provider API key stored on Astra. \"\n \"If set, it will override the 'ProviderKey' in the authentication parameters.\",\n ).to_dict()\n\n new_parameter_3 = SecretStrInput(\n load_from_db=False,\n name=\"z_03_provider_api_key\",\n display_name=\"Provider API Key\",\n info=\"An alternative to the Astra Authentication that passes an API key for the provider \"\n \"with each request to Astra DB. \"\n \"This may be used when Vectorize is configured for the collection, \"\n \"but no corresponding provider secret is stored within Astra's key management system.\",\n ).to_dict()\n\n new_parameter_4 = DictInput(\n name=\"z_04_authentication\",\n display_name=\"Authentication Parameters\",\n list=True,\n ).to_dict()\n\n self.insert_in_dict(\n build_config,\n \"model\",\n {\n \"z_01_model_parameters\": new_parameter_1,\n \"z_02_api_key_name\": new_parameter_2,\n \"z_03_provider_api_key\": new_parameter_3,\n \"z_04_authentication\": new_parameter_4,\n },\n )\n\n return build_config\n\n def build_vectorize_options(self, **kwargs):\n for attribute in [\n \"embedding_provider\",\n \"model\",\n \"z_01_model_parameters\",\n \"z_02_api_key_name\",\n \"z_03_provider_api_key\",\n \"z_04_authentication\",\n ]:\n if not hasattr(self, attribute):\n setattr(self, attribute, None)\n\n # Fetch values from kwargs if any self.* attributes are None\n provider_mapping = self.get_vectorize_providers()\n provider_value = provider_mapping.get(self.embedding_provider, [None])[0] or kwargs.get(\"embedding_provider\")\n model_name = self.model or kwargs.get(\"model\")\n authentication = {**(self.z_04_authentication or {}), **kwargs.get(\"z_04_authentication\", {})}\n parameters = self.z_01_model_parameters or kwargs.get(\"z_01_model_parameters\", {})\n\n # Set the API key name if provided\n api_key_name = self.z_02_api_key_name or kwargs.get(\"z_02_api_key_name\")\n provider_key = self.z_03_provider_api_key or kwargs.get(\"z_03_provider_api_key\")\n if api_key_name:\n authentication[\"providerKey\"] = api_key_name\n if authentication:\n provider_key = None\n authentication[\"providerKey\"] = authentication[\"providerKey\"].split(\".\")[0]\n\n # Set authentication and parameters to None if no values are provided\n if not authentication:\n authentication = None\n if not parameters:\n parameters = None\n\n return {\n # must match astrapy.info.CollectionVectorServiceOptions\n \"collection_vector_service_options\": {\n \"provider\": provider_value,\n \"modelName\": model_name,\n \"authentication\": authentication,\n \"parameters\": parameters,\n },\n \"collection_embedding_api_key\": provider_key,\n }\n\n @check_cached_vector_store\n def build_vector_store(self, vectorize_options=None):\n try:\n from langchain_astradb import AstraDBVectorStore\n except ImportError as e:\n msg = (\n \"Could not import langchain Astra DB integration package. \"\n \"Please install it with `pip install langchain-astradb`.\"\n )\n raise ImportError(msg) from e\n\n # Initialize parameters based on the collection name\n is_new_collection = self.get_collection_options() is None\n\n # Get the embedding model\n embedding_params = {\"embedding\": self.embedding_model} if self.embedding_choice == \"Embedding Model\" else {}\n\n # Use the embedding model if the choice is set to \"Embedding Model\"\n if self.embedding_choice == \"Astra Vectorize\" and is_new_collection:\n from astrapy.info import CollectionVectorServiceOptions\n\n # Build the vectorize options dictionary\n dict_options = vectorize_options or self.build_vectorize_options(\n embedding_provider=getattr(self, \"embedding_provider\", None) or None,\n model=getattr(self, \"model\", None) or None,\n z_01_model_parameters=getattr(self, \"z_01_model_parameters\", None) or None,\n z_02_api_key_name=getattr(self, \"z_02_api_key_name\", None) or None,\n z_03_provider_api_key=getattr(self, \"z_03_provider_api_key\", None) or None,\n z_04_authentication=getattr(self, \"z_04_authentication\", {}) or {},\n )\n\n # Set the embedding dictionary\n embedding_params = {\n \"collection_vector_service_options\": CollectionVectorServiceOptions.from_dict(\n dict_options.get(\"collection_vector_service_options\")\n ),\n \"collection_embedding_api_key\": dict_options.get(\"collection_embedding_api_key\"),\n }\n\n # Get the running environment for Langflow\n environment = parse_api_endpoint(self.get_api_endpoint()).environment if self.get_api_endpoint() else None\n\n # Get Langflow version and platform information\n __version__ = get_version_info()[\"version\"]\n langflow_prefix = \"\"\n if os.getenv(\"LANGFLOW_HOST\") is not None:\n langflow_prefix = \"ds-\"\n\n # Bundle up the auto-detect parameters\n autodetect_params = {\n \"autodetect_collection\": not is_new_collection, # TODO: May want to expose this option\n \"content_field\": self.content_field or None,\n \"ignore_invalid_documents\": self.ignore_invalid_documents,\n }\n\n # Attempt to build the Vector Store object\n try:\n vector_store = AstraDBVectorStore(\n # Astra DB Authentication Parameters\n token=self.token,\n api_endpoint=self.get_api_endpoint(),\n namespace=self.keyspace or None,\n collection_name=self.get_collection_choice(),\n environment=environment,\n # Astra DB Usage Tracking Parameters\n ext_callers=[(f\"{langflow_prefix}langflow\", __version__)],\n # Astra DB Vector Store Parameters\n **autodetect_params or {},\n **embedding_params or {},\n **self.astradb_vectorstore_kwargs or {},\n )\n except Exception as e:\n msg = f\"Error initializing AstraDBVectorStore: {e}\"\n raise ValueError(msg) from e\n\n self._add_documents_to_vector_store(vector_store)\n\n return vector_store\n\n def _add_documents_to_vector_store(self, vector_store) -> None:\n documents = []\n for _input in self.ingest_data or []:\n if isinstance(_input, Data):\n documents.append(_input.to_lc_document())\n else:\n msg = \"Vector Store Inputs must be Data objects.\"\n raise TypeError(msg)\n\n if documents:\n self.log(f\"Adding {len(documents)} documents to the Vector Store.\")\n try:\n vector_store.add_documents(documents)\n except Exception as e:\n msg = f\"Error adding documents to AstraDBVectorStore: {e}\"\n raise ValueError(msg) from e\n else:\n self.log(\"No documents to add to the Vector Store.\")\n\n def _map_search_type(self) -> str:\n if self.search_type == \"Similarity with score threshold\":\n return \"similarity_score_threshold\"\n if self.search_type == \"MMR (Max Marginal Relevance)\":\n return \"mmr\"\n return \"similarity\"\n\n def _build_search_args(self):\n query = self.search_query if isinstance(self.search_query, str) and self.search_query.strip() else None\n\n if query:\n args = {\n \"query\": query,\n \"search_type\": self._map_search_type(),\n \"k\": self.number_of_results,\n \"score_threshold\": self.search_score_threshold,\n }\n elif self.advanced_search_filter:\n args = {\n \"n\": self.number_of_results,\n }\n else:\n return {}\n\n filter_arg = self.advanced_search_filter or {}\n if filter_arg:\n args[\"filter\"] = filter_arg\n\n return args\n\n def search_documents(self, vector_store=None) -> list[Data]:\n vector_store = vector_store or self.build_vector_store()\n\n self.log(f\"Search input: {self.search_query}\")\n self.log(f\"Search type: {self.search_type}\")\n self.log(f\"Number of results: {self.number_of_results}\")\n\n try:\n search_args = self._build_search_args()\n except Exception as e:\n msg = f\"Error in AstraDBVectorStore._build_search_args: {e}\"\n raise ValueError(msg) from e\n\n if not search_args:\n self.log(\"No search input or filters provided. Skipping search.\")\n return []\n\n docs = []\n search_method = \"search\" if \"query\" in search_args else \"metadata_search\"\n\n try:\n self.log(f\"Calling vector_store.{search_method} with args: {search_args}\")\n docs = getattr(vector_store, search_method)(**search_args)\n except Exception as e:\n msg = f\"Error performing {search_method} in AstraDBVectorStore: {e}\"\n raise ValueError(msg) from e\n\n self.log(f\"Retrieved documents: {len(docs)}\")\n\n data = docs_to_data(docs)\n self.log(f\"Converted documents to data: {len(data)}\")\n self.status = data\n return data\n\n def get_retriever_kwargs(self):\n search_args = self._build_search_args()\n return {\n \"search_type\": self._map_search_type(),\n \"search_kwargs\": search_args,\n }\n" }, "collection_name": { "_input_type": "DropdownInput", @@ -3313,7 +3306,7 @@ "type": "int", "value": 4 }, - "search_input": { + "search_query": { "_input_type": "MultilineInput", "advanced": false, "display_name": "Search Query", @@ -3325,7 +3318,7 @@ "list": false, "load_from_db": false, "multiline": true, - "name": "search_input", + "name": "search_query", "placeholder": "", "required": false, "show": true, @@ -3400,15 +3393,15 @@ "type": "AstraDB" }, "dragging": false, - "height": 783, - "id": "AstraDB-KrRFj", + "height": 763, + "id": "AstraDB-0959q", "position": { - "x": 1215.5738831152323, - "y": 479.1449074798619 + "x": 2048.063432724921, + "y": 1382.2469953470875 }, "positionAbsolute": { - "x": 1215.5738831152323, - "y": 479.1449074798619 + "x": 2048.063432724921, + "y": 1382.2469953470875 }, "selected": false, "type": "genericNode", @@ -3416,7 +3409,7 @@ }, { "data": { - "id": "AstraDB-ONDn2", + "id": "AstraDB-3Vxgl", "node": { "base_classes": [ "Data" @@ -3424,9 +3417,9 @@ "beta": false, "conditional_paths": [], "custom_fields": {}, - "description": "Implementation of Vector Store using Astra DB with search capabilities", + "description": "Ingest and search documents in Astra DB", "display_name": "Astra DB", - "documentation": "https://docs.langflow.org/starter-projects-vector-store-rag", + "documentation": "https://docs.datastax.com/en/langflow/astra-components.html", "edited": false, "field_order": [ "token", @@ -3436,8 +3429,8 @@ "keyspace", "embedding_choice", "embedding_model", + "search_query", "ingest_data", - "search_input", "number_of_results", "search_type", "search_score_threshold", @@ -3449,7 +3442,6 @@ "frozen": false, "icon": "AstraDB", "legacy": false, - "lf_version": "1.1.1", "metadata": {}, "output_types": [], "outputs": [ @@ -3492,24 +3484,26 @@ "value": {} }, "api_endpoint": { - "_input_type": "SecretStrInput", + "_input_type": "DropdownInput", "advanced": false, - "display_name": "API Endpoint", + "combobox": false, + "display_name": "Database", "dynamic": false, - "info": "API endpoint URL for the Astra DB service.", - "input_types": [ - "Message" - ], - "load_from_db": true, + "info": "The Astra DB Database to use.", "name": "api_endpoint", - "password": true, + "options": [ + "Default database" + ], "placeholder": "", "real_time_refresh": true, + "refresh_button": true, "required": true, "show": true, "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, "type": "str", - "value": "ASTRA_DB_API_ENDPOINT" + "value": "Default database" }, "astradb_vectorstore_kwargs": { "_input_type": "NestedDictInput", @@ -3544,7 +3538,7 @@ "show": true, "title_case": false, "type": "code", - "value": "import os\nfrom collections import defaultdict\n\nfrom astrapy import DataAPIClient\nfrom astrapy.admin import parse_api_endpoint\nfrom langchain_astradb import AstraDBVectorStore\n\nfrom langflow.base.vectorstores.model import LCVectorStoreComponent, check_cached_vector_store\nfrom langflow.helpers import docs_to_data\nfrom langflow.inputs import DictInput, FloatInput, MessageTextInput, NestedDictInput\nfrom langflow.io import (\n BoolInput,\n DataInput,\n DropdownInput,\n HandleInput,\n IntInput,\n MultilineInput,\n SecretStrInput,\n StrInput,\n)\nfrom langflow.schema import Data\nfrom langflow.utils.version import get_version_info\n\n\nclass AstraDBVectorStoreComponent(LCVectorStoreComponent):\n display_name: str = \"Astra DB\"\n description: str = \"Implementation of Vector Store using Astra DB with search capabilities\"\n documentation: str = \"https://docs.langflow.org/starter-projects-vector-store-rag\"\n name = \"AstraDB\"\n icon: str = \"AstraDB\"\n\n _cached_vector_store: AstraDBVectorStore | None = None\n\n VECTORIZE_PROVIDERS_MAPPING = defaultdict(\n list,\n {\n \"Azure OpenAI\": [\n \"azureOpenAI\",\n [\"text-embedding-3-small\", \"text-embedding-3-large\", \"text-embedding-ada-002\"],\n ],\n \"Hugging Face - Dedicated\": [\"huggingfaceDedicated\", [\"endpoint-defined-model\"]],\n \"Hugging Face - Serverless\": [\n \"huggingface\",\n [\n \"sentence-transformers/all-MiniLM-L6-v2\",\n \"intfloat/multilingual-e5-large\",\n \"intfloat/multilingual-e5-large-instruct\",\n \"BAAI/bge-small-en-v1.5\",\n \"BAAI/bge-base-en-v1.5\",\n \"BAAI/bge-large-en-v1.5\",\n ],\n ],\n \"Jina AI\": [\n \"jinaAI\",\n [\n \"jina-embeddings-v2-base-en\",\n \"jina-embeddings-v2-base-de\",\n \"jina-embeddings-v2-base-es\",\n \"jina-embeddings-v2-base-code\",\n \"jina-embeddings-v2-base-zh\",\n ],\n ],\n \"Mistral AI\": [\"mistral\", [\"mistral-embed\"]],\n \"Nvidia\": [\"nvidia\", [\"NV-Embed-QA\"]],\n \"OpenAI\": [\"openai\", [\"text-embedding-3-small\", \"text-embedding-3-large\", \"text-embedding-ada-002\"]],\n \"Upstage\": [\"upstageAI\", [\"solar-embedding-1-large\"]],\n \"Voyage AI\": [\n \"voyageAI\",\n [\"voyage-large-2-instruct\", \"voyage-law-2\", \"voyage-code-2\", \"voyage-large-2\", \"voyage-2\"],\n ],\n },\n )\n\n inputs = [\n SecretStrInput(\n name=\"token\",\n display_name=\"Astra DB Application Token\",\n info=\"Authentication token for accessing Astra DB.\",\n value=\"ASTRA_DB_APPLICATION_TOKEN\",\n required=True,\n advanced=os.getenv(\"ASTRA_ENHANCED\", \"false\").lower() == \"true\",\n real_time_refresh=True,\n ),\n SecretStrInput(\n name=\"api_endpoint\",\n display_name=\"Database\" if os.getenv(\"ASTRA_ENHANCED\", \"false\").lower() == \"true\" else \"API Endpoint\",\n info=\"API endpoint URL for the Astra DB service.\",\n value=\"ASTRA_DB_API_ENDPOINT\",\n required=True,\n real_time_refresh=True,\n ),\n DropdownInput(\n name=\"collection_name\",\n display_name=\"Collection\",\n info=\"The name of the collection within Astra DB where the vectors will be stored.\",\n required=True,\n refresh_button=True,\n real_time_refresh=True,\n options=[\"+ Create new collection\"],\n value=\"+ Create new collection\",\n ),\n StrInput(\n name=\"collection_name_new\",\n display_name=\"Collection Name\",\n info=\"Name of the new collection to create.\",\n advanced=os.getenv(\"LANGFLOW_HOST\") is not None,\n required=os.getenv(\"LANGFLOW_HOST\") is None,\n ),\n StrInput(\n name=\"keyspace\",\n display_name=\"Keyspace\",\n info=\"Optional keyspace within Astra DB to use for the collection.\",\n advanced=True,\n ),\n DropdownInput(\n name=\"embedding_choice\",\n display_name=\"Embedding Model or Astra Vectorize\",\n info=\"Determines whether to use Astra Vectorize for the collection.\",\n options=[\"Embedding Model\", \"Astra Vectorize\"],\n real_time_refresh=True,\n value=\"Embedding Model\",\n ),\n HandleInput(\n name=\"embedding_model\",\n display_name=\"Embedding Model\",\n input_types=[\"Embeddings\"],\n info=\"Allows an embedding model configuration.\",\n ),\n DataInput(\n name=\"ingest_data\",\n display_name=\"Ingest Data\",\n ),\n MultilineInput(\n name=\"search_input\",\n display_name=\"Search Query\",\n tool_mode=True,\n ),\n IntInput(\n name=\"number_of_results\",\n display_name=\"Number of Search Results\",\n info=\"Number of search results to return.\",\n advanced=True,\n value=4,\n ),\n DropdownInput(\n name=\"search_type\",\n display_name=\"Search Type\",\n info=\"Search type to use\",\n options=[\"Similarity\", \"Similarity with score threshold\", \"MMR (Max Marginal Relevance)\"],\n value=\"Similarity\",\n advanced=True,\n ),\n FloatInput(\n name=\"search_score_threshold\",\n display_name=\"Search Score Threshold\",\n info=\"Minimum similarity score threshold for search results. \"\n \"(when using 'Similarity with score threshold')\",\n value=0,\n advanced=True,\n ),\n NestedDictInput(\n name=\"advanced_search_filter\",\n display_name=\"Search Metadata Filter\",\n info=\"Optional dictionary of filters to apply to the search query.\",\n advanced=True,\n ),\n StrInput(\n name=\"content_field\",\n display_name=\"Content Field\",\n info=\"Field to use as the text content field for the vector store.\",\n advanced=True,\n ),\n BoolInput(\n name=\"ignore_invalid_documents\",\n display_name=\"Ignore Invalid Documents\",\n info=\"Boolean flag to determine whether to ignore invalid documents at runtime.\",\n advanced=True,\n ),\n NestedDictInput(\n name=\"astradb_vectorstore_kwargs\",\n display_name=\"AstraDBVectorStore Parameters\",\n info=\"Optional dictionary of additional parameters for the AstraDBVectorStore.\",\n advanced=True,\n ),\n ]\n\n def del_fields(self, build_config, field_list):\n for field in field_list:\n if field in build_config:\n del build_config[field]\n\n return build_config\n\n def insert_in_dict(self, build_config, field_name, new_parameters):\n # Insert the new key-value pair after the found key\n for new_field_name, new_parameter in new_parameters.items():\n # Get all the items as a list of tuples (key, value)\n items = list(build_config.items())\n\n # Find the index of the key to insert after\n idx = len(items)\n for i, (key, _) in enumerate(items):\n if key == field_name:\n idx = i + 1\n break\n\n items.insert(idx, (new_field_name, new_parameter))\n\n # Clear the original dictionary and update with the modified items\n build_config.clear()\n build_config.update(items)\n\n return build_config\n\n def update_providers_mapping(self):\n # If we don't have token or api_endpoint, we can't fetch the list of providers\n if not self.token or not self.api_endpoint:\n self.log(\"Astra DB token and API endpoint are required to fetch the list of Vectorize providers.\")\n\n return self.VECTORIZE_PROVIDERS_MAPPING\n\n try:\n self.log(\"Dynamically updating list of Vectorize providers.\")\n\n # Get the admin object\n client = DataAPIClient(token=self.token)\n admin = client.get_admin()\n\n # Get the embedding providers\n db_admin = admin.get_database_admin(self.api_endpoint)\n embedding_providers = db_admin.find_embedding_providers().as_dict()\n\n vectorize_providers_mapping = {}\n\n # Map the provider display name to the provider key and models\n for provider_key, provider_data in embedding_providers[\"embeddingProviders\"].items():\n display_name = provider_data[\"displayName\"]\n models = [model[\"name\"] for model in provider_data[\"models\"]]\n\n vectorize_providers_mapping[display_name] = [provider_key, models]\n\n # Sort the resulting dictionary\n return defaultdict(list, dict(sorted(vectorize_providers_mapping.items())))\n except Exception as e: # noqa: BLE001\n self.log(f\"Error fetching Vectorize providers: {e}\")\n\n return self.VECTORIZE_PROVIDERS_MAPPING\n\n def get_database(self):\n try:\n client = DataAPIClient(token=self.token)\n\n return client.get_database(\n self.api_endpoint,\n token=self.token,\n )\n except Exception as e: # noqa: BLE001\n self.log(f\"Error getting database: {e}\")\n\n return None\n\n def _initialize_collection_options(self):\n database = self.get_database()\n if database is None:\n return [\"+ Create new collection\"]\n\n try:\n collections = [collection.name for collection in database.list_collections()]\n except Exception as e: # noqa: BLE001\n self.log(f\"Error fetching collections: {e}\")\n\n return [\"+ Create new collection\"]\n\n return [*collections, \"+ Create new collection\"]\n\n def get_collection_choice(self):\n collection_name = self.collection_name\n if collection_name == \"+ Create new collection\":\n return self.collection_name_new\n\n return collection_name\n\n def get_collection_options(self):\n # Only get the options if the collection exists\n database = self.get_database()\n if database is None:\n return None\n\n collection_name = self.get_collection_choice()\n\n try:\n collection = database.get_collection(collection_name)\n collection_options = collection.options()\n except Exception as _: # noqa: BLE001\n return None\n\n return collection_options.vector\n\n def update_build_config(self, build_config: dict, field_value: str, field_name: str | None = None):\n # Refresh the collection name options\n build_config[\"collection_name\"][\"options\"] = self._initialize_collection_options()\n\n # Update the choice of embedding model based on collection name\n if field_name == \"collection_name\":\n # Detect if it is a new collection\n is_new_collection = field_value == \"+ Create new collection\"\n\n # Set the advanced and required fields based on the collection choice\n build_config[\"embedding_choice\"].update(\n {\n \"advanced\": not is_new_collection,\n \"value\": \"Embedding Model\" if is_new_collection else build_config[\"embedding_choice\"].get(\"value\"),\n }\n )\n\n # Set the advanced field for the embedding model\n build_config[\"embedding_model\"][\"advanced\"] = not is_new_collection\n\n # Set the advanced and required fields for the new collection name\n build_config[\"collection_name_new\"].update(\n {\n \"advanced\": not is_new_collection,\n \"required\": is_new_collection,\n \"value\": \"\" if not is_new_collection else build_config[\"collection_name_new\"].get(\"value\"),\n }\n )\n\n # Get the collection options for the selected collection\n collection_options = self.get_collection_options()\n\n # If the collection options are available (DB exists), show the advanced options\n if collection_options:\n build_config[\"embedding_choice\"][\"advanced\"] = True\n\n if collection_options.service:\n # Remove unnecessary fields when a service is set\n self.del_fields(\n build_config,\n [\n \"embedding_provider\",\n \"model\",\n \"z_01_model_parameters\",\n \"z_02_api_key_name\",\n \"z_03_provider_api_key\",\n \"z_04_authentication\",\n ],\n )\n\n # Update the providers mapping\n updates = {\n \"embedding_model\": {\"advanced\": True},\n \"embedding_choice\": {\"value\": \"Astra Vectorize\"},\n }\n else:\n # Update the providers mapping\n updates = {\n \"embedding_model\": {\"advanced\": False},\n \"embedding_provider\": {\"advanced\": False},\n \"embedding_choice\": {\"value\": \"Embedding Model\"},\n }\n\n # Apply updates to the build_config\n for key, value in updates.items():\n build_config[key].update(value)\n\n elif field_name == \"embedding_choice\":\n if field_value == \"Astra Vectorize\":\n build_config[\"embedding_model\"][\"advanced\"] = True\n\n # Update the providers mapping\n vectorize_providers = self.update_providers_mapping()\n\n new_parameter = DropdownInput(\n name=\"embedding_provider\",\n display_name=\"Embedding Provider\",\n options=vectorize_providers.keys(),\n value=\"\",\n required=True,\n real_time_refresh=True,\n ).to_dict()\n\n self.insert_in_dict(build_config, \"embedding_choice\", {\"embedding_provider\": new_parameter})\n else:\n build_config[\"embedding_model\"][\"advanced\"] = False\n\n self.del_fields(\n build_config,\n [\n \"embedding_provider\",\n \"model\",\n \"z_01_model_parameters\",\n \"z_02_api_key_name\",\n \"z_03_provider_api_key\",\n \"z_04_authentication\",\n ],\n )\n\n elif field_name == \"embedding_provider\":\n self.del_fields(\n build_config,\n [\"model\", \"z_01_model_parameters\", \"z_02_api_key_name\", \"z_03_provider_api_key\", \"z_04_authentication\"],\n )\n\n # Update the providers mapping\n vectorize_providers = self.update_providers_mapping()\n model_options = vectorize_providers[field_value][1]\n\n new_parameter = DropdownInput(\n name=\"model\",\n display_name=\"Model\",\n info=\"The embedding model to use for the selected provider. Each provider has a different set of \"\n \"models available (full list at \"\n \"https://docs.datastax.com/en/astra-db-serverless/databases/embedding-generation.html):\\n\\n\"\n f\"{', '.join(model_options)}\",\n options=model_options,\n value=None,\n required=True,\n real_time_refresh=True,\n ).to_dict()\n\n self.insert_in_dict(build_config, \"embedding_provider\", {\"model\": new_parameter})\n\n elif field_name == \"model\":\n self.del_fields(\n build_config,\n [\"z_01_model_parameters\", \"z_02_api_key_name\", \"z_03_provider_api_key\", \"z_04_authentication\"],\n )\n\n new_parameter_1 = DictInput(\n name=\"z_01_model_parameters\",\n display_name=\"Model Parameters\",\n list=True,\n ).to_dict()\n\n new_parameter_2 = MessageTextInput(\n name=\"z_02_api_key_name\",\n display_name=\"API Key Name\",\n info=\"The name of the embeddings provider API key stored on Astra. \"\n \"If set, it will override the 'ProviderKey' in the authentication parameters.\",\n ).to_dict()\n\n new_parameter_3 = SecretStrInput(\n load_from_db=False,\n name=\"z_03_provider_api_key\",\n display_name=\"Provider API Key\",\n info=\"An alternative to the Astra Authentication that passes an API key for the provider \"\n \"with each request to Astra DB. \"\n \"This may be used when Vectorize is configured for the collection, \"\n \"but no corresponding provider secret is stored within Astra's key management system.\",\n ).to_dict()\n\n new_parameter_4 = DictInput(\n name=\"z_04_authentication\",\n display_name=\"Authentication Parameters\",\n list=True,\n ).to_dict()\n\n self.insert_in_dict(\n build_config,\n \"model\",\n {\n \"z_01_model_parameters\": new_parameter_1,\n \"z_02_api_key_name\": new_parameter_2,\n \"z_03_provider_api_key\": new_parameter_3,\n \"z_04_authentication\": new_parameter_4,\n },\n )\n\n return build_config\n\n def build_vectorize_options(self, **kwargs):\n for attribute in [\n \"embedding_provider\",\n \"model\",\n \"z_01_model_parameters\",\n \"z_02_api_key_name\",\n \"z_03_provider_api_key\",\n \"z_04_authentication\",\n ]:\n if not hasattr(self, attribute):\n setattr(self, attribute, None)\n\n # Fetch values from kwargs if any self.* attributes are None\n provider_mapping = self.update_providers_mapping()\n provider_value = provider_mapping.get(self.embedding_provider, [None])[0] or kwargs.get(\"embedding_provider\")\n model_name = self.model or kwargs.get(\"model\")\n authentication = {**(self.z_04_authentication or {}), **kwargs.get(\"z_04_authentication\", {})}\n parameters = self.z_01_model_parameters or kwargs.get(\"z_01_model_parameters\", {})\n\n # Set the API key name if provided\n api_key_name = self.z_02_api_key_name or kwargs.get(\"z_02_api_key_name\")\n provider_key = self.z_03_provider_api_key or kwargs.get(\"z_03_provider_api_key\")\n if api_key_name:\n authentication[\"providerKey\"] = api_key_name\n if authentication:\n provider_key = None\n authentication[\"providerKey\"] = authentication[\"providerKey\"].split(\".\")[0]\n\n # Set authentication and parameters to None if no values are provided\n if not authentication:\n authentication = None\n if not parameters:\n parameters = None\n\n return {\n # must match astrapy.info.CollectionVectorServiceOptions\n \"collection_vector_service_options\": {\n \"provider\": provider_value,\n \"modelName\": model_name,\n \"authentication\": authentication,\n \"parameters\": parameters,\n },\n \"collection_embedding_api_key\": provider_key,\n }\n\n @check_cached_vector_store\n def build_vector_store(self, vectorize_options=None):\n try:\n from langchain_astradb import AstraDBVectorStore\n except ImportError as e:\n msg = (\n \"Could not import langchain Astra DB integration package. \"\n \"Please install it with `pip install langchain-astradb`.\"\n )\n raise ImportError(msg) from e\n\n # Initialize parameters based on the collection name\n is_new_collection = self.collection_name == \"+ Create new collection\"\n\n # Get the embedding model\n embedding_params = {\"embedding\": self.embedding_model} if self.embedding_choice == \"Embedding Model\" else {}\n\n # Use the embedding model if the choice is set to \"Embedding Model\"\n if self.embedding_choice == \"Astra Vectorize\" and is_new_collection:\n from astrapy.info import CollectionVectorServiceOptions\n\n # Build the vectorize options dictionary\n dict_options = vectorize_options or self.build_vectorize_options(\n embedding_provider=getattr(self, \"embedding_provider\", None) or None,\n model=getattr(self, \"model\", None) or None,\n z_01_model_parameters=getattr(self, \"z_01_model_parameters\", None) or None,\n z_02_api_key_name=getattr(self, \"z_02_api_key_name\", None) or None,\n z_03_provider_api_key=getattr(self, \"z_03_provider_api_key\", None) or None,\n z_04_authentication=getattr(self, \"z_04_authentication\", {}) or {},\n )\n\n # Set the embedding dictionary\n embedding_params = {\n \"collection_vector_service_options\": CollectionVectorServiceOptions.from_dict(\n dict_options.get(\"collection_vector_service_options\")\n ),\n \"collection_embedding_api_key\": dict_options.get(\"collection_embedding_api_key\"),\n }\n\n # Get the running environment for Langflow\n environment = (\n parse_api_endpoint(getattr(self, \"api_endpoint\", None)).environment\n if getattr(self, \"api_endpoint\", None)\n else None\n )\n\n # Get Langflow version and platform information\n __version__ = get_version_info()[\"version\"]\n langflow_prefix = \"\"\n if os.getenv(\"LANGFLOW_HOST\") is not None:\n langflow_prefix = \"ds-\"\n\n # Bundle up the auto-detect parameters\n autodetect_params = {\n \"autodetect_collection\": not is_new_collection, # TODO: May want to expose this option\n \"content_field\": self.content_field or None,\n \"ignore_invalid_documents\": self.ignore_invalid_documents,\n }\n\n # Attempt to build the Vector Store object\n try:\n vector_store = AstraDBVectorStore(\n # Astra DB Authentication Parameters\n token=self.token,\n api_endpoint=self.api_endpoint,\n namespace=self.keyspace or None,\n collection_name=self.get_collection_choice(),\n environment=environment,\n # Astra DB Usage Tracking Parameters\n ext_callers=[(f\"{langflow_prefix}langflow\", __version__)],\n # Astra DB Vector Store Parameters\n **autodetect_params,\n **embedding_params,\n **self.astradb_vectorstore_kwargs,\n )\n except Exception as e:\n msg = f\"Error initializing AstraDBVectorStore: {e}\"\n raise ValueError(msg) from e\n\n self._add_documents_to_vector_store(vector_store)\n\n return vector_store\n\n def _add_documents_to_vector_store(self, vector_store) -> None:\n documents = []\n for _input in self.ingest_data or []:\n if isinstance(_input, Data):\n documents.append(_input.to_lc_document())\n else:\n msg = \"Vector Store Inputs must be Data objects.\"\n raise TypeError(msg)\n\n if documents:\n self.log(f\"Adding {len(documents)} documents to the Vector Store.\")\n try:\n vector_store.add_documents(documents)\n except Exception as e:\n msg = f\"Error adding documents to AstraDBVectorStore: {e}\"\n raise ValueError(msg) from e\n else:\n self.log(\"No documents to add to the Vector Store.\")\n\n def _map_search_type(self) -> str:\n if self.search_type == \"Similarity with score threshold\":\n return \"similarity_score_threshold\"\n if self.search_type == \"MMR (Max Marginal Relevance)\":\n return \"mmr\"\n return \"similarity\"\n\n def _build_search_args(self):\n query = self.search_input if isinstance(self.search_input, str) and self.search_input.strip() else None\n\n if query:\n args = {\n \"query\": query,\n \"search_type\": self._map_search_type(),\n \"k\": self.number_of_results,\n \"score_threshold\": self.search_score_threshold,\n }\n elif self.advanced_search_filter:\n args = {\n \"n\": self.number_of_results,\n }\n else:\n return {}\n\n filter_arg = self.advanced_search_filter or {}\n if filter_arg:\n args[\"filter\"] = filter_arg\n\n return args\n\n def search_documents(self, vector_store=None) -> list[Data]:\n vector_store = vector_store or self.build_vector_store()\n\n self.log(f\"Search input: {self.search_input}\")\n self.log(f\"Search type: {self.search_type}\")\n self.log(f\"Number of results: {self.number_of_results}\")\n\n try:\n search_args = self._build_search_args()\n except Exception as e:\n msg = f\"Error in AstraDBVectorStore._build_search_args: {e}\"\n raise ValueError(msg) from e\n\n if not search_args:\n self.log(\"No search input or filters provided. Skipping search.\")\n return []\n\n docs = []\n search_method = \"search\" if \"query\" in search_args else \"metadata_search\"\n\n try:\n self.log(f\"Calling vector_store.{search_method} with args: {search_args}\")\n docs = getattr(vector_store, search_method)(**search_args)\n except Exception as e:\n msg = f\"Error performing {search_method} in AstraDBVectorStore: {e}\"\n raise ValueError(msg) from e\n\n self.log(f\"Retrieved documents: {len(docs)}\")\n\n data = docs_to_data(docs)\n self.log(f\"Converted documents to data: {len(data)}\")\n self.status = data\n return data\n\n def get_retriever_kwargs(self):\n search_args = self._build_search_args()\n return {\n \"search_type\": self._map_search_type(),\n \"search_kwargs\": search_args,\n }\n" + "value": "import os\nfrom collections import defaultdict\n\nfrom astrapy import AstraDBAdmin, DataAPIClient\nfrom astrapy.admin import parse_api_endpoint\nfrom langchain_astradb import AstraDBVectorStore\n\nfrom langflow.base.vectorstores.model import LCVectorStoreComponent, check_cached_vector_store\nfrom langflow.helpers import docs_to_data\nfrom langflow.inputs import DictInput, FloatInput, MessageTextInput, NestedDictInput\nfrom langflow.io import (\n BoolInput,\n DataInput,\n DropdownInput,\n HandleInput,\n IntInput,\n SecretStrInput,\n StrInput,\n)\nfrom langflow.schema import Data\nfrom langflow.utils.version import get_version_info\n\n\nclass AstraDBVectorStoreComponent(LCVectorStoreComponent):\n display_name: str = \"Astra DB\"\n description: str = \"Ingest and search documents in Astra DB\"\n documentation: str = \"https://docs.datastax.com/en/langflow/astra-components.html\"\n name = \"AstraDB\"\n icon: str = \"AstraDB\"\n\n _cached_vector_store: AstraDBVectorStore | None = None\n\n base_inputs = LCVectorStoreComponent.inputs\n if \"search_query\" not in [input_.name for input_ in base_inputs]:\n base_inputs.append(\n MessageTextInput(\n name=\"search_query\",\n display_name=\"Search Query\",\n tool_mode=True,\n )\n )\n if \"ingest_data\" not in [input_.name for input_ in base_inputs]:\n base_inputs.append(\n DataInput(\n name=\"ingest_data\",\n display_name=\"Ingest Data\",\n )\n )\n\n inputs = [\n SecretStrInput(\n name=\"token\",\n display_name=\"Astra DB Application Token\",\n info=\"Authentication token for accessing Astra DB.\",\n value=\"ASTRA_DB_APPLICATION_TOKEN\",\n required=True,\n advanced=os.getenv(\"ASTRA_ENHANCED\", \"false\").lower() == \"true\",\n real_time_refresh=True,\n ),\n DropdownInput(\n name=\"api_endpoint\",\n display_name=\"Database\",\n info=\"The Astra DB Database to use.\",\n required=True,\n refresh_button=True,\n real_time_refresh=True,\n options=[\"Default database\"],\n value=\"Default database\",\n ),\n DropdownInput(\n name=\"collection_name\",\n display_name=\"Collection\",\n info=\"The name of the collection within Astra DB where the vectors will be stored.\",\n required=True,\n refresh_button=True,\n real_time_refresh=True,\n options=[\"+ Create new collection\"],\n value=\"+ Create new collection\",\n ),\n StrInput(\n name=\"collection_name_new\",\n display_name=\"Collection Name\",\n info=\"Name of the new collection to create.\",\n advanced=os.getenv(\"LANGFLOW_HOST\") is not None,\n required=os.getenv(\"LANGFLOW_HOST\") is None,\n ),\n StrInput(\n name=\"keyspace\",\n display_name=\"Keyspace\",\n info=\"Optional keyspace within Astra DB to use for the collection.\",\n advanced=True,\n ),\n DropdownInput(\n name=\"embedding_choice\",\n display_name=\"Embedding Model or Astra Vectorize\",\n info=\"Determines whether to use Astra Vectorize for the collection.\",\n options=[\"Embedding Model\", \"Astra Vectorize\"],\n real_time_refresh=True,\n value=\"Embedding Model\",\n ),\n HandleInput(\n name=\"embedding_model\",\n display_name=\"Embedding Model\",\n input_types=[\"Embeddings\"],\n info=\"Allows an embedding model configuration.\",\n ),\n *base_inputs,\n IntInput(\n name=\"number_of_results\",\n display_name=\"Number of Search Results\",\n info=\"Number of search results to return.\",\n advanced=True,\n value=4,\n ),\n DropdownInput(\n name=\"search_type\",\n display_name=\"Search Type\",\n info=\"Search type to use\",\n options=[\"Similarity\", \"Similarity with score threshold\", \"MMR (Max Marginal Relevance)\"],\n value=\"Similarity\",\n advanced=True,\n ),\n FloatInput(\n name=\"search_score_threshold\",\n display_name=\"Search Score Threshold\",\n info=\"Minimum similarity score threshold for search results. \"\n \"(when using 'Similarity with score threshold')\",\n value=0,\n advanced=True,\n ),\n NestedDictInput(\n name=\"advanced_search_filter\",\n display_name=\"Search Metadata Filter\",\n info=\"Optional dictionary of filters to apply to the search query.\",\n advanced=True,\n ),\n StrInput(\n name=\"content_field\",\n display_name=\"Content Field\",\n info=\"Field to use as the text content field for the vector store.\",\n advanced=True,\n ),\n BoolInput(\n name=\"ignore_invalid_documents\",\n display_name=\"Ignore Invalid Documents\",\n info=\"Boolean flag to determine whether to ignore invalid documents at runtime.\",\n advanced=True,\n ),\n NestedDictInput(\n name=\"astradb_vectorstore_kwargs\",\n display_name=\"AstraDBVectorStore Parameters\",\n info=\"Optional dictionary of additional parameters for the AstraDBVectorStore.\",\n advanced=True,\n ),\n ]\n\n def del_fields(self, build_config, field_list):\n for field in field_list:\n if field in build_config:\n del build_config[field]\n\n return build_config\n\n def insert_in_dict(self, build_config, field_name, new_parameters):\n # Insert the new key-value pair after the found key\n for new_field_name, new_parameter in new_parameters.items():\n # Get all the items as a list of tuples (key, value)\n items = list(build_config.items())\n\n # Find the index of the key to insert after\n idx = len(items)\n for i, (key, _) in enumerate(items):\n if key == field_name:\n idx = i + 1\n break\n\n items.insert(idx, (new_field_name, new_parameter))\n\n # Clear the original dictionary and update with the modified items\n build_config.clear()\n build_config.update(items)\n\n return build_config\n\n def get_vectorize_providers(self):\n try:\n self.log(\"Dynamically updating list of Vectorize providers.\")\n\n # Get the admin object\n admin = AstraDBAdmin(token=self.token)\n db_admin = admin.get_database_admin(self.get_api_endpoint())\n\n # Get the list of embedding providers\n embedding_providers = db_admin.find_embedding_providers().as_dict()\n\n vectorize_providers_mapping = {}\n # Map the provider display name to the provider key and models\n for provider_key, provider_data in embedding_providers[\"embeddingProviders\"].items():\n display_name = provider_data[\"displayName\"]\n models = [model[\"name\"] for model in provider_data[\"models\"]]\n\n vectorize_providers_mapping[display_name] = [provider_key, models]\n\n # Sort the resulting dictionary\n return defaultdict(list, dict(sorted(vectorize_providers_mapping.items())))\n except Exception as e: # noqa: BLE001\n self.log(f\"Error fetching Vectorize providers: {e}\")\n\n return {}\n\n def get_database_list(self):\n # Get the admin object\n db_admin = AstraDBAdmin(token=self.token)\n db_list = list(db_admin.list_databases())\n\n # Generate the api endpoint for each database\n return {db.info.name: f\"https://{db.info.id}-{db.info.region}.apps.astra.datastax.com\" for db in db_list}\n\n def get_api_endpoint(self):\n # Get the database name (or endpoint)\n database = self.api_endpoint\n\n # If the database is not set, get the first database in the list\n if not database or database == \"Default database\":\n database, _ = next(iter(self.get_database_list().items()))\n\n # If the database is a URL, return it\n if database.startswith(\"https://\"):\n return database\n\n # Otherwise, get the URL from the database list\n return self.get_database_list().get(database)\n\n def get_database(self):\n try:\n client = DataAPIClient(token=self.token)\n\n return client.get_database(\n api_endpoint=self.get_api_endpoint(),\n token=self.token,\n )\n except Exception as e: # noqa: BLE001\n self.log(f\"Error getting database: {e}\")\n\n return None\n\n def _initialize_database_options(self):\n if not self.token:\n return [\"Default database\"]\n try:\n databases = [\"Default database\", *list(self.get_database_list().keys())]\n except Exception as e: # noqa: BLE001\n self.log(f\"Error fetching databases: {e}\")\n\n return [\"Default database\"]\n\n return databases\n\n def _initialize_collection_options(self):\n database = self.get_database()\n if database is None:\n return [\"+ Create new collection\"]\n\n try:\n collections = [collection.name for collection in database.list_collections(keyspace=self.keyspace or None)]\n except Exception as e: # noqa: BLE001\n self.log(f\"Error fetching collections: {e}\")\n\n return [\"+ Create new collection\"]\n\n return [*collections, \"+ Create new collection\"]\n\n def get_collection_choice(self):\n collection_name = self.collection_name\n if collection_name == \"+ Create new collection\":\n return self.collection_name_new\n\n return collection_name\n\n def get_collection_options(self):\n # Only get the options if the collection exists\n database = self.get_database()\n if database is None:\n return None\n\n collection_name = self.get_collection_choice()\n\n try:\n collection = database.get_collection(collection_name, keyspace=self.keyspace or None)\n collection_options = collection.options()\n except Exception as _: # noqa: BLE001\n return None\n\n return collection_options.vector\n\n def update_build_config(self, build_config: dict, field_value: str, field_name: str | None = None):\n # Always attempt to update the database list\n if field_name in [\"token\", \"api_endpoint\", \"collection_name\"]:\n # Update the database selector\n build_config[\"api_endpoint\"][\"options\"] = self._initialize_database_options()\n\n # Set the default API endpoint if not set\n if build_config[\"api_endpoint\"][\"value\"] == \"Default database\":\n build_config[\"api_endpoint\"][\"value\"] = build_config[\"api_endpoint\"][\"options\"][0]\n\n # Update the collection selector\n build_config[\"collection_name\"][\"options\"] = self._initialize_collection_options()\n\n # Update the choice of embedding model based on collection name\n if field_name == \"collection_name\":\n # Detect if it is a new collection\n is_new_collection = field_value == \"+ Create new collection\"\n\n # Set the advanced and required fields based on the collection choice\n build_config[\"embedding_choice\"].update(\n {\n \"advanced\": not is_new_collection,\n \"value\": \"Embedding Model\" if is_new_collection else build_config[\"embedding_choice\"].get(\"value\"),\n }\n )\n\n # Set the advanced field for the embedding model\n build_config[\"embedding_model\"][\"advanced\"] = not is_new_collection\n\n # Set the advanced and required fields for the new collection name\n build_config[\"collection_name_new\"].update(\n {\n \"advanced\": not is_new_collection,\n \"required\": is_new_collection,\n \"value\": \"\" if not is_new_collection else build_config[\"collection_name_new\"].get(\"value\"),\n }\n )\n\n # Get the collection options for the selected collection\n collection_options = self.get_collection_options()\n\n # If the collection options are available (DB exists), show the advanced options\n if collection_options:\n build_config[\"embedding_choice\"][\"advanced\"] = True\n\n if collection_options.service:\n # Remove unnecessary fields when a service is set\n self.del_fields(\n build_config,\n [\n \"embedding_provider\",\n \"model\",\n \"z_01_model_parameters\",\n \"z_02_api_key_name\",\n \"z_03_provider_api_key\",\n \"z_04_authentication\",\n ],\n )\n\n # Update the providers mapping\n updates = {\n \"embedding_model\": {\"advanced\": True},\n \"embedding_choice\": {\"value\": \"Astra Vectorize\"},\n }\n else:\n # Update the providers mapping\n updates = {\n \"embedding_model\": {\"advanced\": False},\n \"embedding_provider\": {\"advanced\": False},\n \"embedding_choice\": {\"value\": \"Embedding Model\"},\n }\n\n # Apply updates to the build_config\n for key, value in updates.items():\n build_config[key].update(value)\n\n elif field_name == \"embedding_choice\":\n if field_value == \"Astra Vectorize\":\n build_config[\"embedding_model\"][\"advanced\"] = True\n\n # Update the providers mapping\n vectorize_providers = self.get_vectorize_providers()\n\n new_parameter = DropdownInput(\n name=\"embedding_provider\",\n display_name=\"Embedding Provider\",\n options=vectorize_providers.keys(),\n value=\"\",\n required=True,\n real_time_refresh=True,\n ).to_dict()\n\n self.insert_in_dict(build_config, \"embedding_choice\", {\"embedding_provider\": new_parameter})\n else:\n build_config[\"embedding_model\"][\"advanced\"] = False\n\n self.del_fields(\n build_config,\n [\n \"embedding_provider\",\n \"model\",\n \"z_01_model_parameters\",\n \"z_02_api_key_name\",\n \"z_03_provider_api_key\",\n \"z_04_authentication\",\n ],\n )\n\n elif field_name == \"embedding_provider\":\n self.del_fields(\n build_config,\n [\"model\", \"z_01_model_parameters\", \"z_02_api_key_name\", \"z_03_provider_api_key\", \"z_04_authentication\"],\n )\n\n # Update the providers mapping\n vectorize_providers = self.get_vectorize_providers()\n model_options = vectorize_providers[field_value][1]\n\n new_parameter = DropdownInput(\n name=\"model\",\n display_name=\"Model\",\n info=\"The embedding model to use for the selected provider. Each provider has a different set of \"\n \"models available (full list at \"\n \"https://docs.datastax.com/en/astra-db-serverless/databases/embedding-generation.html):\\n\\n\"\n f\"{', '.join(model_options)}\",\n options=model_options,\n value=None,\n required=True,\n real_time_refresh=True,\n ).to_dict()\n\n self.insert_in_dict(build_config, \"embedding_provider\", {\"model\": new_parameter})\n\n elif field_name == \"model\":\n self.del_fields(\n build_config,\n [\"z_01_model_parameters\", \"z_02_api_key_name\", \"z_03_provider_api_key\", \"z_04_authentication\"],\n )\n\n new_parameter_1 = DictInput(\n name=\"z_01_model_parameters\",\n display_name=\"Model Parameters\",\n list=True,\n ).to_dict()\n\n new_parameter_2 = MessageTextInput(\n name=\"z_02_api_key_name\",\n display_name=\"API Key Name\",\n info=\"The name of the embeddings provider API key stored on Astra. \"\n \"If set, it will override the 'ProviderKey' in the authentication parameters.\",\n ).to_dict()\n\n new_parameter_3 = SecretStrInput(\n load_from_db=False,\n name=\"z_03_provider_api_key\",\n display_name=\"Provider API Key\",\n info=\"An alternative to the Astra Authentication that passes an API key for the provider \"\n \"with each request to Astra DB. \"\n \"This may be used when Vectorize is configured for the collection, \"\n \"but no corresponding provider secret is stored within Astra's key management system.\",\n ).to_dict()\n\n new_parameter_4 = DictInput(\n name=\"z_04_authentication\",\n display_name=\"Authentication Parameters\",\n list=True,\n ).to_dict()\n\n self.insert_in_dict(\n build_config,\n \"model\",\n {\n \"z_01_model_parameters\": new_parameter_1,\n \"z_02_api_key_name\": new_parameter_2,\n \"z_03_provider_api_key\": new_parameter_3,\n \"z_04_authentication\": new_parameter_4,\n },\n )\n\n return build_config\n\n def build_vectorize_options(self, **kwargs):\n for attribute in [\n \"embedding_provider\",\n \"model\",\n \"z_01_model_parameters\",\n \"z_02_api_key_name\",\n \"z_03_provider_api_key\",\n \"z_04_authentication\",\n ]:\n if not hasattr(self, attribute):\n setattr(self, attribute, None)\n\n # Fetch values from kwargs if any self.* attributes are None\n provider_mapping = self.get_vectorize_providers()\n provider_value = provider_mapping.get(self.embedding_provider, [None])[0] or kwargs.get(\"embedding_provider\")\n model_name = self.model or kwargs.get(\"model\")\n authentication = {**(self.z_04_authentication or {}), **kwargs.get(\"z_04_authentication\", {})}\n parameters = self.z_01_model_parameters or kwargs.get(\"z_01_model_parameters\", {})\n\n # Set the API key name if provided\n api_key_name = self.z_02_api_key_name or kwargs.get(\"z_02_api_key_name\")\n provider_key = self.z_03_provider_api_key or kwargs.get(\"z_03_provider_api_key\")\n if api_key_name:\n authentication[\"providerKey\"] = api_key_name\n if authentication:\n provider_key = None\n authentication[\"providerKey\"] = authentication[\"providerKey\"].split(\".\")[0]\n\n # Set authentication and parameters to None if no values are provided\n if not authentication:\n authentication = None\n if not parameters:\n parameters = None\n\n return {\n # must match astrapy.info.CollectionVectorServiceOptions\n \"collection_vector_service_options\": {\n \"provider\": provider_value,\n \"modelName\": model_name,\n \"authentication\": authentication,\n \"parameters\": parameters,\n },\n \"collection_embedding_api_key\": provider_key,\n }\n\n @check_cached_vector_store\n def build_vector_store(self, vectorize_options=None):\n try:\n from langchain_astradb import AstraDBVectorStore\n except ImportError as e:\n msg = (\n \"Could not import langchain Astra DB integration package. \"\n \"Please install it with `pip install langchain-astradb`.\"\n )\n raise ImportError(msg) from e\n\n # Initialize parameters based on the collection name\n is_new_collection = self.get_collection_options() is None\n\n # Get the embedding model\n embedding_params = {\"embedding\": self.embedding_model} if self.embedding_choice == \"Embedding Model\" else {}\n\n # Use the embedding model if the choice is set to \"Embedding Model\"\n if self.embedding_choice == \"Astra Vectorize\" and is_new_collection:\n from astrapy.info import CollectionVectorServiceOptions\n\n # Build the vectorize options dictionary\n dict_options = vectorize_options or self.build_vectorize_options(\n embedding_provider=getattr(self, \"embedding_provider\", None) or None,\n model=getattr(self, \"model\", None) or None,\n z_01_model_parameters=getattr(self, \"z_01_model_parameters\", None) or None,\n z_02_api_key_name=getattr(self, \"z_02_api_key_name\", None) or None,\n z_03_provider_api_key=getattr(self, \"z_03_provider_api_key\", None) or None,\n z_04_authentication=getattr(self, \"z_04_authentication\", {}) or {},\n )\n\n # Set the embedding dictionary\n embedding_params = {\n \"collection_vector_service_options\": CollectionVectorServiceOptions.from_dict(\n dict_options.get(\"collection_vector_service_options\")\n ),\n \"collection_embedding_api_key\": dict_options.get(\"collection_embedding_api_key\"),\n }\n\n # Get the running environment for Langflow\n environment = parse_api_endpoint(self.get_api_endpoint()).environment if self.get_api_endpoint() else None\n\n # Get Langflow version and platform information\n __version__ = get_version_info()[\"version\"]\n langflow_prefix = \"\"\n if os.getenv(\"LANGFLOW_HOST\") is not None:\n langflow_prefix = \"ds-\"\n\n # Bundle up the auto-detect parameters\n autodetect_params = {\n \"autodetect_collection\": not is_new_collection, # TODO: May want to expose this option\n \"content_field\": self.content_field or None,\n \"ignore_invalid_documents\": self.ignore_invalid_documents,\n }\n\n # Attempt to build the Vector Store object\n try:\n vector_store = AstraDBVectorStore(\n # Astra DB Authentication Parameters\n token=self.token,\n api_endpoint=self.get_api_endpoint(),\n namespace=self.keyspace or None,\n collection_name=self.get_collection_choice(),\n environment=environment,\n # Astra DB Usage Tracking Parameters\n ext_callers=[(f\"{langflow_prefix}langflow\", __version__)],\n # Astra DB Vector Store Parameters\n **autodetect_params or {},\n **embedding_params or {},\n **self.astradb_vectorstore_kwargs or {},\n )\n except Exception as e:\n msg = f\"Error initializing AstraDBVectorStore: {e}\"\n raise ValueError(msg) from e\n\n self._add_documents_to_vector_store(vector_store)\n\n return vector_store\n\n def _add_documents_to_vector_store(self, vector_store) -> None:\n documents = []\n for _input in self.ingest_data or []:\n if isinstance(_input, Data):\n documents.append(_input.to_lc_document())\n else:\n msg = \"Vector Store Inputs must be Data objects.\"\n raise TypeError(msg)\n\n if documents:\n self.log(f\"Adding {len(documents)} documents to the Vector Store.\")\n try:\n vector_store.add_documents(documents)\n except Exception as e:\n msg = f\"Error adding documents to AstraDBVectorStore: {e}\"\n raise ValueError(msg) from e\n else:\n self.log(\"No documents to add to the Vector Store.\")\n\n def _map_search_type(self) -> str:\n if self.search_type == \"Similarity with score threshold\":\n return \"similarity_score_threshold\"\n if self.search_type == \"MMR (Max Marginal Relevance)\":\n return \"mmr\"\n return \"similarity\"\n\n def _build_search_args(self):\n query = self.search_query if isinstance(self.search_query, str) and self.search_query.strip() else None\n\n if query:\n args = {\n \"query\": query,\n \"search_type\": self._map_search_type(),\n \"k\": self.number_of_results,\n \"score_threshold\": self.search_score_threshold,\n }\n elif self.advanced_search_filter:\n args = {\n \"n\": self.number_of_results,\n }\n else:\n return {}\n\n filter_arg = self.advanced_search_filter or {}\n if filter_arg:\n args[\"filter\"] = filter_arg\n\n return args\n\n def search_documents(self, vector_store=None) -> list[Data]:\n vector_store = vector_store or self.build_vector_store()\n\n self.log(f\"Search input: {self.search_query}\")\n self.log(f\"Search type: {self.search_type}\")\n self.log(f\"Number of results: {self.number_of_results}\")\n\n try:\n search_args = self._build_search_args()\n except Exception as e:\n msg = f\"Error in AstraDBVectorStore._build_search_args: {e}\"\n raise ValueError(msg) from e\n\n if not search_args:\n self.log(\"No search input or filters provided. Skipping search.\")\n return []\n\n docs = []\n search_method = \"search\" if \"query\" in search_args else \"metadata_search\"\n\n try:\n self.log(f\"Calling vector_store.{search_method} with args: {search_args}\")\n docs = getattr(vector_store, search_method)(**search_args)\n except Exception as e:\n msg = f\"Error performing {search_method} in AstraDBVectorStore: {e}\"\n raise ValueError(msg) from e\n\n self.log(f\"Retrieved documents: {len(docs)}\")\n\n data = docs_to_data(docs)\n self.log(f\"Converted documents to data: {len(data)}\")\n self.status = data\n return data\n\n def get_retriever_kwargs(self):\n search_args = self._build_search_args()\n return {\n \"search_type\": self._map_search_type(),\n \"search_kwargs\": search_args,\n }\n" }, "collection_name": { "_input_type": "DropdownInput", @@ -3713,7 +3707,7 @@ "type": "int", "value": 4 }, - "search_input": { + "search_query": { "_input_type": "MultilineInput", "advanced": false, "display_name": "Search Query", @@ -3725,7 +3719,7 @@ "list": false, "load_from_db": false, "multiline": true, - "name": "search_input", + "name": "search_query", "placeholder": "", "required": false, "show": true, @@ -3800,15 +3794,15 @@ "type": "AstraDB" }, "dragging": false, - "height": 783, - "id": "AstraDB-ONDn2", + "height": 763, + "id": "AstraDB-3Vxgl", "position": { - "x": 2058.0903617951794, - "y": 1377.2119232156292 + "x": 1212.6260540264493, + "y": 479.1345217261139 }, "positionAbsolute": { - "x": 2058.0903617951794, - "y": 1377.2119232156292 + "x": 1212.6260540264493, + "y": 479.1345217261139 }, "selected": false, "type": "genericNode", @@ -3816,14 +3810,14 @@ } ], "viewport": { - "x": -97.1807844187706, - "y": -151.4043859549497, - "zoom": 0.4326776617754808 + "x": -19.356798387149297, + "y": -230.4171794261714, + "zoom": 0.5951923522724938 } }, "description": "Load your data for chat context with Retrieval Augmented Generation.", "endpoint_name": null, - "id": "7e8f60b6-c649-4dca-80f3-81452b4d211a", + "id": "8f0934bc-f9ed-436a-b5ef-2c90755ab4d7", "is_component": false, "last_tested_version": "1.1.1", "name": "Vector Store RAG", diff --git a/src/backend/base/langflow/initial_setup/starter_projects/vector_store_rag.py b/src/backend/base/langflow/initial_setup/starter_projects/vector_store_rag.py index d91968cbc..1467bb07d 100644 --- a/src/backend/base/langflow/initial_setup/starter_projects/vector_store_rag.py +++ b/src/backend/base/langflow/initial_setup/starter_projects/vector_store_rag.py @@ -33,7 +33,7 @@ def rag_graph(): chat_input = ChatInput() rag_vector_store = AstraDBVectorStoreComponent() rag_vector_store.set( - search_input=chat_input.message_response, + search_query=chat_input.message_response, embedding_model=openai_embeddings.build_embeddings, ) diff --git a/src/backend/tests/integration/components/astra/test_astra_component.py b/src/backend/tests/integration/components/astra/test_astra_component.py index a5414c0d3..a7622631a 100644 --- a/src/backend/tests/integration/components/astra/test_astra_component.py +++ b/src/backend/tests/integration/components/astra/test_astra_component.py @@ -56,7 +56,6 @@ async def test_base(astradb_client: AstraDB): }, ) - assert results["vector_store"] is not None assert results["search_results"] == [] assert astradb_client.collection(BASIC_COLLECTION) @@ -73,7 +72,7 @@ async def test_astra_embeds_and_search(): "api_endpoint": api_endpoint, "collection_name": BASIC_COLLECTION, "number_of_results": 1, - "search_input": "test1", + "search_query": "test1", "ingest_data": ComponentInputHandle( clazz=TextToData, inputs={"text_data": ["test1", "test2"]}, output_name="from_text" ), @@ -117,7 +116,7 @@ def test_astra_vectorize(): api_endpoint=api_endpoint, collection_name=VECTORIZE_COLLECTION, ingest_data=records, - search_input="test", + search_query="test", number_of_results=2, pre_delete_collection=True, ) @@ -173,7 +172,7 @@ def test_astra_vectorize_with_provider_api_key(): api_endpoint=api_endpoint, collection_name=VECTORIZE_COLLECTION_OPENAI, ingest_data=records, - search_input="test", + search_query="test", number_of_results=2, pre_delete_collection=True, ) @@ -228,7 +227,7 @@ def test_astra_vectorize_passes_authentication(): api_endpoint=api_endpoint, collection_name=VECTORIZE_COLLECTION_OPENAI_WITH_AUTH, ingest_data=records, - search_input="test", + search_query="test", number_of_results=2, pre_delete_collection=True, ) diff --git a/src/backend/tests/unit/initial_setup/starter_projects/test_vector_store_rag.py b/src/backend/tests/unit/initial_setup/starter_projects/test_vector_store_rag.py index 87515a0f2..5c7f42330 100644 --- a/src/backend/tests/unit/initial_setup/starter_projects/test_vector_store_rag.py +++ b/src/backend/tests/unit/initial_setup/starter_projects/test_vector_store_rag.py @@ -49,7 +49,7 @@ def rag_graph(): chat_input.get_output("message").value = "What is the meaning of life?" rag_vector_store = AstraDBVectorStoreComponent(_id="rag-vector-store-123") rag_vector_store.set( - search_input=chat_input.message_response, + search_query=chat_input.message_response, api_endpoint="https://astra.example.com", token="token", # noqa: S106 embedding_model=openai_embeddings.build_embeddings,