diff --git a/src/backend/base/langflow/initial_setup/starter_projects/Vector Store RAG.json b/src/backend/base/langflow/initial_setup/starter_projects/Vector Store RAG.json index 7b52c3403..2959d67f1 100644 --- a/src/backend/base/langflow/initial_setup/starter_projects/Vector Store RAG.json +++ b/src/backend/base/langflow/initial_setup/starter_projects/Vector Store RAG.json @@ -618,6 +618,7 @@ "collection_indexing_policy", "collection_name", "embedding", + "embedding_service", "ingest_data", "metadata_indexing_exclude", "metadata_indexing_include", @@ -748,7 +749,7 @@ "show": true, "title_case": false, "type": "code", - "value": "import os\n\nimport orjson\nfrom astrapy.admin import parse_api_endpoint\nfrom loguru import logger\n\nfrom langflow.base.vectorstores.model import LCVectorStoreComponent, check_cached_vector_store\nfrom langflow.helpers import docs_to_data\nfrom langflow.inputs import DictInput, FloatInput, MessageTextInput\nfrom langflow.io import (\n BoolInput,\n DataInput,\n DropdownInput,\n HandleInput,\n IntInput,\n MultilineInput,\n SecretStrInput,\n StrInput,\n)\nfrom langflow.schema import Data\n\n\nclass AstraVectorStoreComponent(LCVectorStoreComponent):\n display_name: str = \"Astra DB\"\n description: str = \"Implementation of Vector Store using Astra DB with search capabilities\"\n documentation: str = \"https://docs.langflow.org/starter-projects-vector-store-rag\"\n name = \"AstraDB\"\n icon: str = \"AstraDB\"\n\n VECTORIZE_PROVIDERS_MAPPING = {\n \"Azure OpenAI\": [\"azureOpenAI\", [\"text-embedding-3-small\", \"text-embedding-3-large\", \"text-embedding-ada-002\"]],\n \"Hugging Face - Dedicated\": [\"huggingfaceDedicated\", [\"endpoint-defined-model\"]],\n \"Hugging Face - Serverless\": [\n \"huggingface\",\n [\n \"sentence-transformers/all-MiniLM-L6-v2\",\n \"intfloat/multilingual-e5-large\",\n \"intfloat/multilingual-e5-large-instruct\",\n \"BAAI/bge-small-en-v1.5\",\n \"BAAI/bge-base-en-v1.5\",\n \"BAAI/bge-large-en-v1.5\",\n ],\n ],\n \"Jina AI\": [\n \"jinaAI\",\n [\n \"jina-embeddings-v2-base-en\",\n \"jina-embeddings-v2-base-de\",\n \"jina-embeddings-v2-base-es\",\n \"jina-embeddings-v2-base-code\",\n \"jina-embeddings-v2-base-zh\",\n ],\n ],\n \"Mistral AI\": [\"mistral\", [\"mistral-embed\"]],\n \"NVIDIA\": [\"nvidia\", [\"NV-Embed-QA\"]],\n \"OpenAI\": [\"openai\", [\"text-embedding-3-small\", \"text-embedding-3-large\", \"text-embedding-ada-002\"]],\n \"Upstage\": [\"upstageAI\", [\"solar-embedding-1-large\"]],\n \"Voyage AI\": [\n \"voyageAI\",\n [\"voyage-large-2-instruct\", \"voyage-law-2\", \"voyage-code-2\", \"voyage-large-2\", \"voyage-2\"],\n ],\n }\n\n inputs = [\n SecretStrInput(\n name=\"token\",\n display_name=\"Astra DB Application Token\",\n info=\"Authentication token for accessing Astra DB.\",\n value=\"ASTRA_DB_APPLICATION_TOKEN\",\n required=True,\n advanced=os.getenv(\"ASTRA_ENHANCED\", \"false\").lower() == \"true\",\n ),\n SecretStrInput(\n name=\"api_endpoint\",\n display_name=\"Database\" if os.getenv(\"ASTRA_ENHANCED\", \"false\").lower() == \"true\" else \"API Endpoint\",\n info=\"API endpoint URL for the Astra DB service.\",\n value=\"ASTRA_DB_API_ENDPOINT\",\n required=True,\n ),\n StrInput(\n name=\"collection_name\",\n display_name=\"Collection Name\",\n info=\"The name of the collection within Astra DB where the vectors will be stored.\",\n required=True,\n ),\n MultilineInput(\n name=\"search_input\",\n display_name=\"Search Input\",\n ),\n DataInput(\n name=\"ingest_data\",\n display_name=\"Ingest Data\",\n is_list=True,\n ),\n StrInput(\n name=\"namespace\",\n display_name=\"Namespace\",\n info=\"Optional namespace within Astra DB to use for the collection.\",\n advanced=True,\n ),\n DropdownInput(\n name=\"embedding_service\",\n display_name=\"Embedding Model or Astra Vectorize\",\n info=\"Determines whether to use Astra Vectorize for the collection.\",\n options=[\"Embedding Model\", \"Astra Vectorize\"],\n real_time_refresh=True,\n value=\"Embedding Model\",\n ),\n HandleInput(\n name=\"embedding\",\n display_name=\"Embedding Model\",\n input_types=[\"Embeddings\"],\n info=\"Allows an embedding model configuration.\",\n ),\n DropdownInput(\n name=\"metric\",\n display_name=\"Metric\",\n info=\"Optional distance metric for vector comparisons in the vector store.\",\n options=[\"cosine\", \"dot_product\", \"euclidean\"],\n value=\"cosine\",\n advanced=True,\n ),\n IntInput(\n name=\"batch_size\",\n display_name=\"Batch Size\",\n info=\"Optional number of data to process in a single batch.\",\n advanced=True,\n ),\n IntInput(\n name=\"bulk_insert_batch_concurrency\",\n display_name=\"Bulk Insert Batch Concurrency\",\n info=\"Optional concurrency level for bulk insert operations.\",\n advanced=True,\n ),\n IntInput(\n name=\"bulk_insert_overwrite_concurrency\",\n display_name=\"Bulk Insert Overwrite Concurrency\",\n info=\"Optional concurrency level for bulk insert operations that overwrite existing data.\",\n advanced=True,\n ),\n IntInput(\n name=\"bulk_delete_concurrency\",\n display_name=\"Bulk Delete Concurrency\",\n info=\"Optional concurrency level for bulk delete operations.\",\n advanced=True,\n ),\n DropdownInput(\n name=\"setup_mode\",\n display_name=\"Setup Mode\",\n info=\"Configuration mode for setting up the vector store, with options like 'Sync' or 'Off'.\",\n options=[\"Sync\", \"Off\"],\n advanced=True,\n value=\"Sync\",\n ),\n BoolInput(\n name=\"pre_delete_collection\",\n display_name=\"Pre Delete Collection\",\n info=\"Boolean flag to determine whether to delete the collection before creating a new one.\",\n advanced=True,\n ),\n StrInput(\n name=\"metadata_indexing_include\",\n display_name=\"Metadata Indexing Include\",\n info=\"Optional list of metadata fields to include in the indexing.\",\n is_list=True,\n advanced=True,\n ),\n StrInput(\n name=\"metadata_indexing_exclude\",\n display_name=\"Metadata Indexing Exclude\",\n info=\"Optional list of metadata fields to exclude from the indexing.\",\n is_list=True,\n advanced=True,\n ),\n StrInput(\n name=\"collection_indexing_policy\",\n display_name=\"Collection Indexing Policy\",\n info='Optional JSON string for the \"indexing\" field of the collection. '\n \"See https://docs.datastax.com/en/astra-db-serverless/api-reference/collections.html#the-indexing-option\",\n advanced=True,\n ),\n IntInput(\n name=\"number_of_results\",\n display_name=\"Number of Results\",\n info=\"Number of results to return.\",\n advanced=True,\n value=4,\n ),\n DropdownInput(\n name=\"search_type\",\n display_name=\"Search Type\",\n info=\"Search type to use\",\n options=[\"Similarity\", \"Similarity with score threshold\", \"MMR (Max Marginal Relevance)\"],\n value=\"Similarity\",\n advanced=True,\n ),\n FloatInput(\n name=\"search_score_threshold\",\n display_name=\"Search Score Threshold\",\n info=\"Minimum similarity score threshold for search results. \"\n \"(when using 'Similarity with score threshold')\",\n value=0,\n advanced=True,\n ),\n DictInput(\n name=\"search_filter\",\n display_name=\"Search Metadata Filter\",\n info=\"Optional dictionary of filters to apply to the search query.\",\n advanced=True,\n is_list=True,\n ),\n ]\n\n def insert_in_dict(self, build_config, field_name, new_parameters):\n # Insert the new key-value pair after the found key\n for new_field_name, new_parameter in new_parameters.items():\n # Get all the items as a list of tuples (key, value)\n items = list(build_config.items())\n\n # Find the index of the key to insert after\n idx = len(items)\n for i, (key, _value) in enumerate(items):\n if key == field_name:\n idx = i + 1\n break\n\n items.insert(idx, (new_field_name, new_parameter))\n\n # Clear the original dictionary and update with the modified items\n build_config.clear()\n build_config.update(items)\n\n return build_config\n\n def update_build_config(self, build_config: dict, field_value: str, field_name: str | None = None):\n if field_name == \"embedding_service\":\n if field_value == \"Astra Vectorize\":\n for field in [\"embedding\"]:\n if field in build_config:\n del build_config[field]\n\n new_parameter = DropdownInput(\n name=\"provider\",\n display_name=\"Vectorize Provider\",\n options=self.VECTORIZE_PROVIDERS_MAPPING.keys(),\n value=\"\",\n required=True,\n real_time_refresh=True,\n ).to_dict()\n\n self.insert_in_dict(build_config, \"embedding_service\", {\"provider\": new_parameter})\n else:\n for field in [\n \"provider\",\n \"z_00_model_name\",\n \"z_01_model_parameters\",\n \"z_02_api_key_name\",\n \"z_03_provider_api_key\",\n \"z_04_authentication\",\n ]:\n if field in build_config:\n del build_config[field]\n\n new_parameter = HandleInput(\n name=\"embedding\",\n display_name=\"Embedding Model\",\n input_types=[\"Embeddings\"],\n info=\"Allows an embedding model configuration.\",\n ).to_dict()\n\n self.insert_in_dict(build_config, \"embedding_service\", {\"embedding\": new_parameter})\n\n elif field_name == \"provider\":\n for field in [\n \"z_00_model_name\",\n \"z_01_model_parameters\",\n \"z_02_api_key_name\",\n \"z_03_provider_api_key\",\n \"z_04_authentication\",\n ]:\n if field in build_config:\n del build_config[field]\n\n model_options = self.VECTORIZE_PROVIDERS_MAPPING[field_value][1]\n\n new_parameter_0 = DropdownInput(\n name=\"z_00_model_name\",\n display_name=\"Model Name\",\n info=\"The embedding model to use for the selected provider. Each provider has a different set of \"\n \"models available (full list at \"\n \"https://docs.datastax.com/en/astra-db-serverless/databases/embedding-generation.html):\\n\\n\"\n f\"{', '.join(model_options)}\",\n options=model_options,\n required=True,\n ).to_dict()\n\n new_parameter_1 = DictInput(\n name=\"z_01_model_parameters\",\n display_name=\"Model Parameters\",\n is_list=True,\n ).to_dict()\n\n new_parameter_2 = MessageTextInput(\n name=\"z_02_api_key_name\",\n display_name=\"API Key name\",\n info=\"The name of the embeddings provider API key stored on Astra. \"\n \"If set, it will override the 'ProviderKey' in the authentication parameters.\",\n ).to_dict()\n\n new_parameter_3 = SecretStrInput(\n name=\"z_03_provider_api_key\",\n display_name=\"Provider API Key\",\n info=\"An alternative to the Astra Authentication that passes an API key for the provider \"\n \"with each request to Astra DB. \"\n \"This may be used when Vectorize is configured for the collection, \"\n \"but no corresponding provider secret is stored within Astra's key management system.\",\n ).to_dict()\n\n new_parameter_4 = DictInput(\n name=\"z_04_authentication\",\n display_name=\"Authentication parameters\",\n is_list=True,\n ).to_dict()\n\n self.insert_in_dict(\n build_config,\n \"provider\",\n {\n \"z_00_model_name\": new_parameter_0,\n \"z_01_model_parameters\": new_parameter_1,\n \"z_02_api_key_name\": new_parameter_2,\n \"z_03_provider_api_key\": new_parameter_3,\n \"z_04_authentication\": new_parameter_4,\n },\n )\n\n return build_config\n\n def build_vectorize_options(self, **kwargs):\n for attribute in [\n \"provider\",\n \"z_00_model_name\",\n \"z_01_model_parameters\",\n \"z_02_api_key_name\",\n \"z_03_provider_api_key\",\n \"z_04_authentication\",\n ]:\n if not hasattr(self, attribute):\n setattr(self, attribute, None)\n\n # Fetch values from kwargs if any self.* attributes are None\n provider_value = self.VECTORIZE_PROVIDERS_MAPPING.get(self.provider, [None])[0] or kwargs.get(\"provider\")\n authentication = {**(self.z_04_authentication or kwargs.get(\"z_04_authentication\", {}))}\n\n api_key_name = self.z_02_api_key_name or kwargs.get(\"z_02_api_key_name\")\n provider_key = self.z_03_provider_api_key or kwargs.get(\"z_03_provider_api_key\")\n if api_key_name:\n authentication[\"providerKey\"] = api_key_name\n\n return {\n # must match astrapy.info.CollectionVectorServiceOptions\n \"collection_vector_service_options\": {\n \"provider\": provider_value,\n \"modelName\": self.z_00_model_name or kwargs.get(\"z_00_model_name\"),\n \"authentication\": authentication,\n \"parameters\": self.z_01_model_parameters or kwargs.get(\"z_01_model_parameters\", {}),\n },\n \"collection_embedding_api_key\": provider_key,\n }\n\n @check_cached_vector_store\n def build_vector_store(self, vectorize_options=None):\n try:\n from langchain_astradb import AstraDBVectorStore\n from langchain_astradb.utils.astradb import SetupMode\n except ImportError as e:\n msg = (\n \"Could not import langchain Astra DB integration package. \"\n \"Please install it with `pip install langchain-astradb`.\"\n )\n raise ImportError(msg) from e\n\n try:\n if not self.setup_mode:\n self.setup_mode = self._inputs[\"setup_mode\"].options[0]\n\n setup_mode_value = SetupMode[self.setup_mode.upper()]\n except KeyError as e:\n msg = f\"Invalid setup mode: {self.setup_mode}\"\n raise ValueError(msg) from e\n\n if self.embedding:\n embedding_dict = {\"embedding\": self.embedding}\n else:\n from astrapy.info import CollectionVectorServiceOptions\n\n dict_options = vectorize_options or self.build_vectorize_options()\n dict_options[\"authentication\"] = {\n k: v for k, v in dict_options.get(\"authentication\", {}).items() if k and v\n }\n dict_options[\"parameters\"] = {k: v for k, v in dict_options.get(\"parameters\", {}).items() if k and v}\n\n embedding_dict = {\n \"collection_vector_service_options\": CollectionVectorServiceOptions.from_dict(\n dict_options.get(\"collection_vector_service_options\", {})\n ),\n }\n\n try:\n vector_store = AstraDBVectorStore(\n collection_name=self.collection_name,\n token=self.token,\n api_endpoint=self.api_endpoint,\n namespace=self.namespace or None,\n environment=parse_api_endpoint(self.api_endpoint).environment,\n metric=self.metric,\n batch_size=self.batch_size or None,\n bulk_insert_batch_concurrency=self.bulk_insert_batch_concurrency or None,\n bulk_insert_overwrite_concurrency=self.bulk_insert_overwrite_concurrency or None,\n bulk_delete_concurrency=self.bulk_delete_concurrency or None,\n setup_mode=setup_mode_value,\n pre_delete_collection=self.pre_delete_collection,\n metadata_indexing_include=[s for s in self.metadata_indexing_include if s] or None,\n metadata_indexing_exclude=[s for s in self.metadata_indexing_exclude if s] or None,\n collection_indexing_policy=orjson.dumps(self.collection_indexing_policy)\n if self.collection_indexing_policy\n else None,\n **embedding_dict,\n )\n except Exception as e:\n msg = f\"Error initializing AstraDBVectorStore: {e}\"\n raise ValueError(msg) from e\n\n self._add_documents_to_vector_store(vector_store)\n\n return vector_store\n\n def _add_documents_to_vector_store(self, vector_store) -> None:\n documents = []\n for _input in self.ingest_data or []:\n if isinstance(_input, Data):\n documents.append(_input.to_lc_document())\n else:\n msg = \"Vector Store Inputs must be Data objects.\"\n raise TypeError(msg)\n\n if documents:\n logger.debug(f\"Adding {len(documents)} documents to the Vector Store.\")\n try:\n vector_store.add_documents(documents)\n except Exception as e:\n msg = f\"Error adding documents to AstraDBVectorStore: {e}\"\n raise ValueError(msg) from e\n else:\n logger.debug(\"No documents to add to the Vector Store.\")\n\n def _map_search_type(self) -> str:\n if self.search_type == \"Similarity with score threshold\":\n return \"similarity_score_threshold\"\n if self.search_type == \"MMR (Max Marginal Relevance)\":\n return \"mmr\"\n return \"similarity\"\n\n def _build_search_args(self):\n args = {\n \"k\": self.number_of_results,\n \"score_threshold\": self.search_score_threshold,\n }\n\n if self.search_filter:\n clean_filter = {k: v for k, v in self.search_filter.items() if k and v}\n if len(clean_filter) > 0:\n args[\"filter\"] = clean_filter\n return args\n\n def search_documents(self, vector_store=None) -> list[Data]:\n if not vector_store:\n vector_store = self.build_vector_store()\n\n logger.debug(f\"Search input: {self.search_input}\")\n logger.debug(f\"Search type: {self.search_type}\")\n logger.debug(f\"Number of results: {self.number_of_results}\")\n\n if self.search_input and isinstance(self.search_input, str) and self.search_input.strip():\n try:\n search_type = self._map_search_type()\n search_args = self._build_search_args()\n\n docs = vector_store.search(query=self.search_input, search_type=search_type, **search_args)\n except Exception as e:\n msg = f\"Error performing search in AstraDBVectorStore: {e}\"\n raise ValueError(msg) from e\n\n logger.debug(f\"Retrieved documents: {len(docs)}\")\n\n data = docs_to_data(docs)\n logger.debug(f\"Converted documents to data: {len(data)}\")\n self.status = data\n return data\n logger.debug(\"No search input provided. Skipping search.\")\n return []\n\n def get_retriever_kwargs(self):\n search_args = self._build_search_args()\n return {\n \"search_type\": self._map_search_type(),\n \"search_kwargs\": search_args,\n }\n" + "value": "import os\n\nimport orjson\nfrom astrapy.admin import parse_api_endpoint\n\nfrom langflow.base.vectorstores.model import LCVectorStoreComponent, check_cached_vector_store\nfrom langflow.helpers import docs_to_data\nfrom langflow.inputs import DictInput, FloatInput, MessageTextInput\nfrom langflow.io import (\n BoolInput,\n DataInput,\n DropdownInput,\n HandleInput,\n IntInput,\n MultilineInput,\n SecretStrInput,\n StrInput,\n)\nfrom langflow.schema import Data\n\n\nclass AstraVectorStoreComponent(LCVectorStoreComponent):\n display_name: str = \"Astra DB\"\n description: str = \"Implementation of Vector Store using Astra DB with search capabilities\"\n documentation: str = \"https://docs.langflow.org/starter-projects-vector-store-rag\"\n name = \"AstraDB\"\n icon: str = \"AstraDB\"\n\n VECTORIZE_PROVIDERS_MAPPING = {\n \"Azure OpenAI\": [\"azureOpenAI\", [\"text-embedding-3-small\", \"text-embedding-3-large\", \"text-embedding-ada-002\"]],\n \"Hugging Face - Dedicated\": [\"huggingfaceDedicated\", [\"endpoint-defined-model\"]],\n \"Hugging Face - Serverless\": [\n \"huggingface\",\n [\n \"sentence-transformers/all-MiniLM-L6-v2\",\n \"intfloat/multilingual-e5-large\",\n \"intfloat/multilingual-e5-large-instruct\",\n \"BAAI/bge-small-en-v1.5\",\n \"BAAI/bge-base-en-v1.5\",\n \"BAAI/bge-large-en-v1.5\",\n ],\n ],\n \"Jina AI\": [\n \"jinaAI\",\n [\n \"jina-embeddings-v2-base-en\",\n \"jina-embeddings-v2-base-de\",\n \"jina-embeddings-v2-base-es\",\n \"jina-embeddings-v2-base-code\",\n \"jina-embeddings-v2-base-zh\",\n ],\n ],\n \"Mistral AI\": [\"mistral\", [\"mistral-embed\"]],\n \"NVIDIA\": [\"nvidia\", [\"NV-Embed-QA\"]],\n \"OpenAI\": [\"openai\", [\"text-embedding-3-small\", \"text-embedding-3-large\", \"text-embedding-ada-002\"]],\n \"Upstage\": [\"upstageAI\", [\"solar-embedding-1-large\"]],\n \"Voyage AI\": [\n \"voyageAI\",\n [\"voyage-large-2-instruct\", \"voyage-law-2\", \"voyage-code-2\", \"voyage-large-2\", \"voyage-2\"],\n ],\n }\n\n inputs = [\n SecretStrInput(\n name=\"token\",\n display_name=\"Astra DB Application Token\",\n info=\"Authentication token for accessing Astra DB.\",\n value=\"ASTRA_DB_APPLICATION_TOKEN\",\n required=True,\n advanced=os.getenv(\"ASTRA_ENHANCED\", \"false\").lower() == \"true\",\n ),\n SecretStrInput(\n name=\"api_endpoint\",\n display_name=\"Database\" if os.getenv(\"ASTRA_ENHANCED\", \"false\").lower() == \"true\" else \"API Endpoint\",\n info=\"API endpoint URL for the Astra DB service.\",\n value=\"ASTRA_DB_API_ENDPOINT\",\n required=True,\n ),\n StrInput(\n name=\"collection_name\",\n display_name=\"Collection Name\",\n info=\"The name of the collection within Astra DB where the vectors will be stored.\",\n required=True,\n ),\n MultilineInput(\n name=\"search_input\",\n display_name=\"Search Input\",\n ),\n DataInput(\n name=\"ingest_data\",\n display_name=\"Ingest Data\",\n is_list=True,\n ),\n StrInput(\n name=\"namespace\",\n display_name=\"Namespace\",\n info=\"Optional namespace within Astra DB to use for the collection.\",\n advanced=True,\n ),\n DropdownInput(\n name=\"embedding_service\",\n display_name=\"Embedding Model or Astra Vectorize\",\n info=\"Determines whether to use Astra Vectorize for the collection.\",\n options=[\"Embedding Model\", \"Astra Vectorize\"],\n real_time_refresh=True,\n value=\"Embedding Model\",\n ),\n HandleInput(\n name=\"embedding\",\n display_name=\"Embedding Model\",\n input_types=[\"Embeddings\"],\n info=\"Allows an embedding model configuration.\",\n ),\n DropdownInput(\n name=\"metric\",\n display_name=\"Metric\",\n info=\"Optional distance metric for vector comparisons in the vector store.\",\n options=[\"cosine\", \"dot_product\", \"euclidean\"],\n value=\"cosine\",\n advanced=True,\n ),\n IntInput(\n name=\"batch_size\",\n display_name=\"Batch Size\",\n info=\"Optional number of data to process in a single batch.\",\n advanced=True,\n ),\n IntInput(\n name=\"bulk_insert_batch_concurrency\",\n display_name=\"Bulk Insert Batch Concurrency\",\n info=\"Optional concurrency level for bulk insert operations.\",\n advanced=True,\n ),\n IntInput(\n name=\"bulk_insert_overwrite_concurrency\",\n display_name=\"Bulk Insert Overwrite Concurrency\",\n info=\"Optional concurrency level for bulk insert operations that overwrite existing data.\",\n advanced=True,\n ),\n IntInput(\n name=\"bulk_delete_concurrency\",\n display_name=\"Bulk Delete Concurrency\",\n info=\"Optional concurrency level for bulk delete operations.\",\n advanced=True,\n ),\n DropdownInput(\n name=\"setup_mode\",\n display_name=\"Setup Mode\",\n info=\"Configuration mode for setting up the vector store, with options like 'Sync' or 'Off'.\",\n options=[\"Sync\", \"Off\"],\n advanced=True,\n value=\"Sync\",\n ),\n BoolInput(\n name=\"pre_delete_collection\",\n display_name=\"Pre Delete Collection\",\n info=\"Boolean flag to determine whether to delete the collection before creating a new one.\",\n advanced=True,\n ),\n StrInput(\n name=\"metadata_indexing_include\",\n display_name=\"Metadata Indexing Include\",\n info=\"Optional list of metadata fields to include in the indexing.\",\n is_list=True,\n advanced=True,\n ),\n StrInput(\n name=\"metadata_indexing_exclude\",\n display_name=\"Metadata Indexing Exclude\",\n info=\"Optional list of metadata fields to exclude from the indexing.\",\n is_list=True,\n advanced=True,\n ),\n StrInput(\n name=\"collection_indexing_policy\",\n display_name=\"Collection Indexing Policy\",\n info='Optional JSON string for the \"indexing\" field of the collection. '\n \"See https://docs.datastax.com/en/astra-db-serverless/api-reference/collections.html#the-indexing-option\",\n advanced=True,\n ),\n IntInput(\n name=\"number_of_results\",\n display_name=\"Number of Results\",\n info=\"Number of results to return.\",\n advanced=True,\n value=4,\n ),\n DropdownInput(\n name=\"search_type\",\n display_name=\"Search Type\",\n info=\"Search type to use\",\n options=[\"Similarity\", \"Similarity with score threshold\", \"MMR (Max Marginal Relevance)\"],\n value=\"Similarity\",\n advanced=True,\n ),\n FloatInput(\n name=\"search_score_threshold\",\n display_name=\"Search Score Threshold\",\n info=\"Minimum similarity score threshold for search results. \"\n \"(when using 'Similarity with score threshold')\",\n value=0,\n advanced=True,\n ),\n DictInput(\n name=\"search_filter\",\n display_name=\"Search Metadata Filter\",\n info=\"Optional dictionary of filters to apply to the search query.\",\n advanced=True,\n is_list=True,\n ),\n ]\n\n def insert_in_dict(self, build_config, field_name, new_parameters):\n # Insert the new key-value pair after the found key\n for new_field_name, new_parameter in new_parameters.items():\n # Get all the items as a list of tuples (key, value)\n items = list(build_config.items())\n\n # Find the index of the key to insert after\n idx = len(items)\n for i, (key, _) in enumerate(items):\n if key == field_name:\n idx = i + 1\n break\n\n items.insert(idx, (new_field_name, new_parameter))\n\n # Clear the original dictionary and update with the modified items\n build_config.clear()\n build_config.update(items)\n\n return build_config\n\n def update_build_config(self, build_config: dict, field_value: str, field_name: str | None = None):\n if field_name == \"embedding_service\":\n if field_value == \"Astra Vectorize\":\n for field in [\"embedding\"]:\n if field in build_config:\n del build_config[field]\n\n new_parameter = DropdownInput(\n name=\"provider\",\n display_name=\"Vectorize Provider\",\n options=self.VECTORIZE_PROVIDERS_MAPPING.keys(),\n value=\"\",\n required=True,\n real_time_refresh=True,\n ).to_dict()\n\n self.insert_in_dict(build_config, \"embedding_service\", {\"provider\": new_parameter})\n else:\n for field in [\n \"provider\",\n \"z_00_model_name\",\n \"z_01_model_parameters\",\n \"z_02_api_key_name\",\n \"z_03_provider_api_key\",\n \"z_04_authentication\",\n ]:\n if field in build_config:\n del build_config[field]\n\n new_parameter = HandleInput(\n name=\"embedding\",\n display_name=\"Embedding Model\",\n input_types=[\"Embeddings\"],\n info=\"Allows an embedding model configuration.\",\n ).to_dict()\n\n self.insert_in_dict(build_config, \"embedding_service\", {\"embedding\": new_parameter})\n\n elif field_name == \"provider\":\n for field in [\n \"z_00_model_name\",\n \"z_01_model_parameters\",\n \"z_02_api_key_name\",\n \"z_03_provider_api_key\",\n \"z_04_authentication\",\n ]:\n if field in build_config:\n del build_config[field]\n\n model_options = self.VECTORIZE_PROVIDERS_MAPPING[field_value][1]\n\n new_parameter_0 = DropdownInput(\n name=\"z_00_model_name\",\n display_name=\"Model Name\",\n info=\"The embedding model to use for the selected provider. Each provider has a different set of \"\n \"models available (full list at \"\n \"https://docs.datastax.com/en/astra-db-serverless/databases/embedding-generation.html):\\n\\n\"\n f\"{', '.join(model_options)}\",\n options=model_options,\n required=True,\n ).to_dict()\n\n new_parameter_1 = DictInput(\n name=\"z_01_model_parameters\",\n display_name=\"Model Parameters\",\n is_list=True,\n ).to_dict()\n\n new_parameter_2 = MessageTextInput(\n name=\"z_02_api_key_name\",\n display_name=\"API Key name\",\n info=\"The name of the embeddings provider API key stored on Astra. \"\n \"If set, it will override the 'ProviderKey' in the authentication parameters.\",\n ).to_dict()\n\n new_parameter_3 = SecretStrInput(\n name=\"z_03_provider_api_key\",\n display_name=\"Provider API Key\",\n info=\"An alternative to the Astra Authentication that passes an API key for the provider \"\n \"with each request to Astra DB. \"\n \"This may be used when Vectorize is configured for the collection, \"\n \"but no corresponding provider secret is stored within Astra's key management system.\",\n ).to_dict()\n\n new_parameter_4 = DictInput(\n name=\"z_04_authentication\",\n display_name=\"Authentication parameters\",\n is_list=True,\n ).to_dict()\n\n self.insert_in_dict(\n build_config,\n \"provider\",\n {\n \"z_00_model_name\": new_parameter_0,\n \"z_01_model_parameters\": new_parameter_1,\n \"z_02_api_key_name\": new_parameter_2,\n \"z_03_provider_api_key\": new_parameter_3,\n \"z_04_authentication\": new_parameter_4,\n },\n )\n\n return build_config\n\n def build_vectorize_options(self, **kwargs):\n for attribute in [\n \"provider\",\n \"z_00_model_name\",\n \"z_01_model_parameters\",\n \"z_02_api_key_name\",\n \"z_03_provider_api_key\",\n \"z_04_authentication\",\n ]:\n if not hasattr(self, attribute):\n setattr(self, attribute, None)\n\n # Fetch values from kwargs if any self.* attributes are None\n provider_value = self.VECTORIZE_PROVIDERS_MAPPING.get(self.provider, [None])[0] or kwargs.get(\"provider\")\n authentication = {**(self.z_04_authentication or kwargs.get(\"z_04_authentication\", {}))}\n\n api_key_name = self.z_02_api_key_name or kwargs.get(\"z_02_api_key_name\")\n provider_key = self.z_03_provider_api_key or kwargs.get(\"z_03_provider_api_key\")\n if api_key_name:\n authentication[\"providerKey\"] = api_key_name\n\n return {\n # must match astrapy.info.CollectionVectorServiceOptions\n \"collection_vector_service_options\": {\n \"provider\": provider_value,\n \"modelName\": self.z_00_model_name or kwargs.get(\"z_00_model_name\"),\n \"authentication\": authentication,\n \"parameters\": self.z_01_model_parameters or kwargs.get(\"z_01_model_parameters\", {}),\n },\n \"collection_embedding_api_key\": provider_key,\n }\n\n @check_cached_vector_store\n def build_vector_store(self, vectorize_options=None):\n try:\n from langchain_astradb import AstraDBVectorStore\n from langchain_astradb.utils.astradb import SetupMode\n except ImportError as e:\n msg = (\n \"Could not import langchain Astra DB integration package. \"\n \"Please install it with `pip install langchain-astradb`.\"\n )\n raise ImportError(msg) from e\n\n try:\n if not self.setup_mode:\n self.setup_mode = self._inputs[\"setup_mode\"].options[0]\n\n setup_mode_value = SetupMode[self.setup_mode.upper()]\n except KeyError as e:\n msg = f\"Invalid setup mode: {self.setup_mode}\"\n raise ValueError(msg) from e\n\n if self.embedding_service == \"Embedding Model\":\n embedding_dict = {\"embedding\": self.embedding}\n else:\n from astrapy.info import CollectionVectorServiceOptions\n\n dict_options = vectorize_options or self.build_vectorize_options()\n dict_options[\"authentication\"] = {\n k: v for k, v in dict_options.get(\"authentication\", {}).items() if k and v\n }\n dict_options[\"parameters\"] = {k: v for k, v in dict_options.get(\"parameters\", {}).items() if k and v}\n\n embedding_dict = {\n \"collection_vector_service_options\": CollectionVectorServiceOptions.from_dict(\n dict_options.get(\"collection_vector_service_options\", {})\n ),\n }\n\n try:\n vector_store = AstraDBVectorStore(\n collection_name=self.collection_name,\n token=self.token,\n api_endpoint=self.api_endpoint,\n namespace=self.namespace or None,\n environment=parse_api_endpoint(self.api_endpoint).environment if self.api_endpoint else None,\n metric=self.metric or None,\n batch_size=self.batch_size or None,\n bulk_insert_batch_concurrency=self.bulk_insert_batch_concurrency or None,\n bulk_insert_overwrite_concurrency=self.bulk_insert_overwrite_concurrency or None,\n bulk_delete_concurrency=self.bulk_delete_concurrency or None,\n setup_mode=setup_mode_value,\n pre_delete_collection=self.pre_delete_collection,\n metadata_indexing_include=[s for s in self.metadata_indexing_include if s] or None,\n metadata_indexing_exclude=[s for s in self.metadata_indexing_exclude if s] or None,\n collection_indexing_policy=orjson.dumps(self.collection_indexing_policy)\n if self.collection_indexing_policy\n else None,\n **embedding_dict,\n )\n except Exception as e:\n msg = f\"Error initializing AstraDBVectorStore: {e}\"\n raise ValueError(msg) from e\n\n self._add_documents_to_vector_store(vector_store)\n\n return vector_store\n\n def _add_documents_to_vector_store(self, vector_store) -> None:\n documents = []\n for _input in self.ingest_data or []:\n if isinstance(_input, Data):\n documents.append(_input.to_lc_document())\n else:\n msg = \"Vector Store Inputs must be Data objects.\"\n raise TypeError(msg)\n\n if documents:\n self.log(f\"Adding {len(documents)} documents to the Vector Store.\")\n try:\n vector_store.add_documents(documents)\n except Exception as e:\n msg = f\"Error adding documents to AstraDBVectorStore: {e}\"\n raise ValueError(msg) from e\n else:\n self.log(\"No documents to add to the Vector Store.\")\n\n def _map_search_type(self) -> str:\n if self.search_type == \"Similarity with score threshold\":\n return \"similarity_score_threshold\"\n if self.search_type == \"MMR (Max Marginal Relevance)\":\n return \"mmr\"\n return \"similarity\"\n\n def _build_search_args(self):\n args = {\n \"k\": self.number_of_results,\n \"score_threshold\": self.search_score_threshold,\n }\n\n if self.search_filter:\n clean_filter = {k: v for k, v in self.search_filter.items() if k and v}\n if len(clean_filter) > 0:\n args[\"filter\"] = clean_filter\n return args\n\n def search_documents(self, vector_store=None) -> list[Data]:\n if not vector_store:\n vector_store = self.build_vector_store()\n\n self.log(f\"Search input: {self.search_input}\")\n self.log(f\"Search type: {self.search_type}\")\n self.log(f\"Number of results: {self.number_of_results}\")\n\n if self.search_input and isinstance(self.search_input, str) and self.search_input.strip():\n try:\n search_type = self._map_search_type()\n search_args = self._build_search_args()\n\n docs = vector_store.search(query=self.search_input, search_type=search_type, **search_args)\n except Exception as e:\n msg = f\"Error performing search in AstraDBVectorStore: {e}\"\n raise ValueError(msg) from e\n\n self.log(f\"Retrieved documents: {len(docs)}\")\n\n data = docs_to_data(docs)\n self.log(f\"Converted documents to data: {len(data)}\")\n self.status = data\n return data\n self.log(\"No search input provided. Skipping search.\")\n return []\n\n def get_retriever_kwargs(self):\n search_args = self._build_search_args()\n return {\n \"search_type\": self._map_search_type(),\n \"search_kwargs\": search_args,\n }\n" }, "collection_indexing_policy": { "advanced": true, @@ -1951,6 +1952,7 @@ "collection_indexing_policy", "collection_name", "embedding", + "embedding_service", "ingest_data", "metadata_indexing_exclude", "metadata_indexing_include", @@ -2081,7 +2083,7 @@ "show": true, "title_case": false, "type": "code", - "value": "import os\n\nimport orjson\nfrom astrapy.admin import parse_api_endpoint\nfrom loguru import logger\n\nfrom langflow.base.vectorstores.model import LCVectorStoreComponent, check_cached_vector_store\nfrom langflow.helpers import docs_to_data\nfrom langflow.inputs import DictInput, FloatInput, MessageTextInput\nfrom langflow.io import (\n BoolInput,\n DataInput,\n DropdownInput,\n HandleInput,\n IntInput,\n MultilineInput,\n SecretStrInput,\n StrInput,\n)\nfrom langflow.schema import Data\n\n\nclass AstraVectorStoreComponent(LCVectorStoreComponent):\n display_name: str = \"Astra DB\"\n description: str = \"Implementation of Vector Store using Astra DB with search capabilities\"\n documentation: str = \"https://docs.langflow.org/starter-projects-vector-store-rag\"\n name = \"AstraDB\"\n icon: str = \"AstraDB\"\n\n VECTORIZE_PROVIDERS_MAPPING = {\n \"Azure OpenAI\": [\"azureOpenAI\", [\"text-embedding-3-small\", \"text-embedding-3-large\", \"text-embedding-ada-002\"]],\n \"Hugging Face - Dedicated\": [\"huggingfaceDedicated\", [\"endpoint-defined-model\"]],\n \"Hugging Face - Serverless\": [\n \"huggingface\",\n [\n \"sentence-transformers/all-MiniLM-L6-v2\",\n \"intfloat/multilingual-e5-large\",\n \"intfloat/multilingual-e5-large-instruct\",\n \"BAAI/bge-small-en-v1.5\",\n \"BAAI/bge-base-en-v1.5\",\n \"BAAI/bge-large-en-v1.5\",\n ],\n ],\n \"Jina AI\": [\n \"jinaAI\",\n [\n \"jina-embeddings-v2-base-en\",\n \"jina-embeddings-v2-base-de\",\n \"jina-embeddings-v2-base-es\",\n \"jina-embeddings-v2-base-code\",\n \"jina-embeddings-v2-base-zh\",\n ],\n ],\n \"Mistral AI\": [\"mistral\", [\"mistral-embed\"]],\n \"NVIDIA\": [\"nvidia\", [\"NV-Embed-QA\"]],\n \"OpenAI\": [\"openai\", [\"text-embedding-3-small\", \"text-embedding-3-large\", \"text-embedding-ada-002\"]],\n \"Upstage\": [\"upstageAI\", [\"solar-embedding-1-large\"]],\n \"Voyage AI\": [\n \"voyageAI\",\n [\"voyage-large-2-instruct\", \"voyage-law-2\", \"voyage-code-2\", \"voyage-large-2\", \"voyage-2\"],\n ],\n }\n\n inputs = [\n SecretStrInput(\n name=\"token\",\n display_name=\"Astra DB Application Token\",\n info=\"Authentication token for accessing Astra DB.\",\n value=\"ASTRA_DB_APPLICATION_TOKEN\",\n required=True,\n advanced=os.getenv(\"ASTRA_ENHANCED\", \"false\").lower() == \"true\",\n ),\n SecretStrInput(\n name=\"api_endpoint\",\n display_name=\"Database\" if os.getenv(\"ASTRA_ENHANCED\", \"false\").lower() == \"true\" else \"API Endpoint\",\n info=\"API endpoint URL for the Astra DB service.\",\n value=\"ASTRA_DB_API_ENDPOINT\",\n required=True,\n ),\n StrInput(\n name=\"collection_name\",\n display_name=\"Collection Name\",\n info=\"The name of the collection within Astra DB where the vectors will be stored.\",\n required=True,\n ),\n MultilineInput(\n name=\"search_input\",\n display_name=\"Search Input\",\n ),\n DataInput(\n name=\"ingest_data\",\n display_name=\"Ingest Data\",\n is_list=True,\n ),\n StrInput(\n name=\"namespace\",\n display_name=\"Namespace\",\n info=\"Optional namespace within Astra DB to use for the collection.\",\n advanced=True,\n ),\n DropdownInput(\n name=\"embedding_service\",\n display_name=\"Embedding Model or Astra Vectorize\",\n info=\"Determines whether to use Astra Vectorize for the collection.\",\n options=[\"Embedding Model\", \"Astra Vectorize\"],\n real_time_refresh=True,\n value=\"Embedding Model\",\n ),\n HandleInput(\n name=\"embedding\",\n display_name=\"Embedding Model\",\n input_types=[\"Embeddings\"],\n info=\"Allows an embedding model configuration.\",\n ),\n DropdownInput(\n name=\"metric\",\n display_name=\"Metric\",\n info=\"Optional distance metric for vector comparisons in the vector store.\",\n options=[\"cosine\", \"dot_product\", \"euclidean\"],\n value=\"cosine\",\n advanced=True,\n ),\n IntInput(\n name=\"batch_size\",\n display_name=\"Batch Size\",\n info=\"Optional number of data to process in a single batch.\",\n advanced=True,\n ),\n IntInput(\n name=\"bulk_insert_batch_concurrency\",\n display_name=\"Bulk Insert Batch Concurrency\",\n info=\"Optional concurrency level for bulk insert operations.\",\n advanced=True,\n ),\n IntInput(\n name=\"bulk_insert_overwrite_concurrency\",\n display_name=\"Bulk Insert Overwrite Concurrency\",\n info=\"Optional concurrency level for bulk insert operations that overwrite existing data.\",\n advanced=True,\n ),\n IntInput(\n name=\"bulk_delete_concurrency\",\n display_name=\"Bulk Delete Concurrency\",\n info=\"Optional concurrency level for bulk delete operations.\",\n advanced=True,\n ),\n DropdownInput(\n name=\"setup_mode\",\n display_name=\"Setup Mode\",\n info=\"Configuration mode for setting up the vector store, with options like 'Sync' or 'Off'.\",\n options=[\"Sync\", \"Off\"],\n advanced=True,\n value=\"Sync\",\n ),\n BoolInput(\n name=\"pre_delete_collection\",\n display_name=\"Pre Delete Collection\",\n info=\"Boolean flag to determine whether to delete the collection before creating a new one.\",\n advanced=True,\n ),\n StrInput(\n name=\"metadata_indexing_include\",\n display_name=\"Metadata Indexing Include\",\n info=\"Optional list of metadata fields to include in the indexing.\",\n is_list=True,\n advanced=True,\n ),\n StrInput(\n name=\"metadata_indexing_exclude\",\n display_name=\"Metadata Indexing Exclude\",\n info=\"Optional list of metadata fields to exclude from the indexing.\",\n is_list=True,\n advanced=True,\n ),\n StrInput(\n name=\"collection_indexing_policy\",\n display_name=\"Collection Indexing Policy\",\n info='Optional JSON string for the \"indexing\" field of the collection. '\n \"See https://docs.datastax.com/en/astra-db-serverless/api-reference/collections.html#the-indexing-option\",\n advanced=True,\n ),\n IntInput(\n name=\"number_of_results\",\n display_name=\"Number of Results\",\n info=\"Number of results to return.\",\n advanced=True,\n value=4,\n ),\n DropdownInput(\n name=\"search_type\",\n display_name=\"Search Type\",\n info=\"Search type to use\",\n options=[\"Similarity\", \"Similarity with score threshold\", \"MMR (Max Marginal Relevance)\"],\n value=\"Similarity\",\n advanced=True,\n ),\n FloatInput(\n name=\"search_score_threshold\",\n display_name=\"Search Score Threshold\",\n info=\"Minimum similarity score threshold for search results. \"\n \"(when using 'Similarity with score threshold')\",\n value=0,\n advanced=True,\n ),\n DictInput(\n name=\"search_filter\",\n display_name=\"Search Metadata Filter\",\n info=\"Optional dictionary of filters to apply to the search query.\",\n advanced=True,\n is_list=True,\n ),\n ]\n\n def insert_in_dict(self, build_config, field_name, new_parameters):\n # Insert the new key-value pair after the found key\n for new_field_name, new_parameter in new_parameters.items():\n # Get all the items as a list of tuples (key, value)\n items = list(build_config.items())\n\n # Find the index of the key to insert after\n idx = len(items)\n for i, (key, _value) in enumerate(items):\n if key == field_name:\n idx = i + 1\n break\n\n items.insert(idx, (new_field_name, new_parameter))\n\n # Clear the original dictionary and update with the modified items\n build_config.clear()\n build_config.update(items)\n\n return build_config\n\n def update_build_config(self, build_config: dict, field_value: str, field_name: str | None = None):\n if field_name == \"embedding_service\":\n if field_value == \"Astra Vectorize\":\n for field in [\"embedding\"]:\n if field in build_config:\n del build_config[field]\n\n new_parameter = DropdownInput(\n name=\"provider\",\n display_name=\"Vectorize Provider\",\n options=self.VECTORIZE_PROVIDERS_MAPPING.keys(),\n value=\"\",\n required=True,\n real_time_refresh=True,\n ).to_dict()\n\n self.insert_in_dict(build_config, \"embedding_service\", {\"provider\": new_parameter})\n else:\n for field in [\n \"provider\",\n \"z_00_model_name\",\n \"z_01_model_parameters\",\n \"z_02_api_key_name\",\n \"z_03_provider_api_key\",\n \"z_04_authentication\",\n ]:\n if field in build_config:\n del build_config[field]\n\n new_parameter = HandleInput(\n name=\"embedding\",\n display_name=\"Embedding Model\",\n input_types=[\"Embeddings\"],\n info=\"Allows an embedding model configuration.\",\n ).to_dict()\n\n self.insert_in_dict(build_config, \"embedding_service\", {\"embedding\": new_parameter})\n\n elif field_name == \"provider\":\n for field in [\n \"z_00_model_name\",\n \"z_01_model_parameters\",\n \"z_02_api_key_name\",\n \"z_03_provider_api_key\",\n \"z_04_authentication\",\n ]:\n if field in build_config:\n del build_config[field]\n\n model_options = self.VECTORIZE_PROVIDERS_MAPPING[field_value][1]\n\n new_parameter_0 = DropdownInput(\n name=\"z_00_model_name\",\n display_name=\"Model Name\",\n info=\"The embedding model to use for the selected provider. Each provider has a different set of \"\n \"models available (full list at \"\n \"https://docs.datastax.com/en/astra-db-serverless/databases/embedding-generation.html):\\n\\n\"\n f\"{', '.join(model_options)}\",\n options=model_options,\n required=True,\n ).to_dict()\n\n new_parameter_1 = DictInput(\n name=\"z_01_model_parameters\",\n display_name=\"Model Parameters\",\n is_list=True,\n ).to_dict()\n\n new_parameter_2 = MessageTextInput(\n name=\"z_02_api_key_name\",\n display_name=\"API Key name\",\n info=\"The name of the embeddings provider API key stored on Astra. \"\n \"If set, it will override the 'ProviderKey' in the authentication parameters.\",\n ).to_dict()\n\n new_parameter_3 = SecretStrInput(\n name=\"z_03_provider_api_key\",\n display_name=\"Provider API Key\",\n info=\"An alternative to the Astra Authentication that passes an API key for the provider \"\n \"with each request to Astra DB. \"\n \"This may be used when Vectorize is configured for the collection, \"\n \"but no corresponding provider secret is stored within Astra's key management system.\",\n ).to_dict()\n\n new_parameter_4 = DictInput(\n name=\"z_04_authentication\",\n display_name=\"Authentication parameters\",\n is_list=True,\n ).to_dict()\n\n self.insert_in_dict(\n build_config,\n \"provider\",\n {\n \"z_00_model_name\": new_parameter_0,\n \"z_01_model_parameters\": new_parameter_1,\n \"z_02_api_key_name\": new_parameter_2,\n \"z_03_provider_api_key\": new_parameter_3,\n \"z_04_authentication\": new_parameter_4,\n },\n )\n\n return build_config\n\n def build_vectorize_options(self, **kwargs):\n for attribute in [\n \"provider\",\n \"z_00_model_name\",\n \"z_01_model_parameters\",\n \"z_02_api_key_name\",\n \"z_03_provider_api_key\",\n \"z_04_authentication\",\n ]:\n if not hasattr(self, attribute):\n setattr(self, attribute, None)\n\n # Fetch values from kwargs if any self.* attributes are None\n provider_value = self.VECTORIZE_PROVIDERS_MAPPING.get(self.provider, [None])[0] or kwargs.get(\"provider\")\n authentication = {**(self.z_04_authentication or kwargs.get(\"z_04_authentication\", {}))}\n\n api_key_name = self.z_02_api_key_name or kwargs.get(\"z_02_api_key_name\")\n provider_key = self.z_03_provider_api_key or kwargs.get(\"z_03_provider_api_key\")\n if api_key_name:\n authentication[\"providerKey\"] = api_key_name\n\n return {\n # must match astrapy.info.CollectionVectorServiceOptions\n \"collection_vector_service_options\": {\n \"provider\": provider_value,\n \"modelName\": self.z_00_model_name or kwargs.get(\"z_00_model_name\"),\n \"authentication\": authentication,\n \"parameters\": self.z_01_model_parameters or kwargs.get(\"z_01_model_parameters\", {}),\n },\n \"collection_embedding_api_key\": provider_key,\n }\n\n @check_cached_vector_store\n def build_vector_store(self, vectorize_options=None):\n try:\n from langchain_astradb import AstraDBVectorStore\n from langchain_astradb.utils.astradb import SetupMode\n except ImportError as e:\n msg = (\n \"Could not import langchain Astra DB integration package. \"\n \"Please install it with `pip install langchain-astradb`.\"\n )\n raise ImportError(msg) from e\n\n try:\n if not self.setup_mode:\n self.setup_mode = self._inputs[\"setup_mode\"].options[0]\n\n setup_mode_value = SetupMode[self.setup_mode.upper()]\n except KeyError as e:\n msg = f\"Invalid setup mode: {self.setup_mode}\"\n raise ValueError(msg) from e\n\n if self.embedding:\n embedding_dict = {\"embedding\": self.embedding}\n else:\n from astrapy.info import CollectionVectorServiceOptions\n\n dict_options = vectorize_options or self.build_vectorize_options()\n dict_options[\"authentication\"] = {\n k: v for k, v in dict_options.get(\"authentication\", {}).items() if k and v\n }\n dict_options[\"parameters\"] = {k: v for k, v in dict_options.get(\"parameters\", {}).items() if k and v}\n\n embedding_dict = {\n \"collection_vector_service_options\": CollectionVectorServiceOptions.from_dict(\n dict_options.get(\"collection_vector_service_options\", {})\n ),\n }\n\n try:\n vector_store = AstraDBVectorStore(\n collection_name=self.collection_name,\n token=self.token,\n api_endpoint=self.api_endpoint,\n namespace=self.namespace or None,\n environment=parse_api_endpoint(self.api_endpoint).environment,\n metric=self.metric,\n batch_size=self.batch_size or None,\n bulk_insert_batch_concurrency=self.bulk_insert_batch_concurrency or None,\n bulk_insert_overwrite_concurrency=self.bulk_insert_overwrite_concurrency or None,\n bulk_delete_concurrency=self.bulk_delete_concurrency or None,\n setup_mode=setup_mode_value,\n pre_delete_collection=self.pre_delete_collection,\n metadata_indexing_include=[s for s in self.metadata_indexing_include if s] or None,\n metadata_indexing_exclude=[s for s in self.metadata_indexing_exclude if s] or None,\n collection_indexing_policy=orjson.dumps(self.collection_indexing_policy)\n if self.collection_indexing_policy\n else None,\n **embedding_dict,\n )\n except Exception as e:\n msg = f\"Error initializing AstraDBVectorStore: {e}\"\n raise ValueError(msg) from e\n\n self._add_documents_to_vector_store(vector_store)\n\n return vector_store\n\n def _add_documents_to_vector_store(self, vector_store) -> None:\n documents = []\n for _input in self.ingest_data or []:\n if isinstance(_input, Data):\n documents.append(_input.to_lc_document())\n else:\n msg = \"Vector Store Inputs must be Data objects.\"\n raise TypeError(msg)\n\n if documents:\n logger.debug(f\"Adding {len(documents)} documents to the Vector Store.\")\n try:\n vector_store.add_documents(documents)\n except Exception as e:\n msg = f\"Error adding documents to AstraDBVectorStore: {e}\"\n raise ValueError(msg) from e\n else:\n logger.debug(\"No documents to add to the Vector Store.\")\n\n def _map_search_type(self) -> str:\n if self.search_type == \"Similarity with score threshold\":\n return \"similarity_score_threshold\"\n if self.search_type == \"MMR (Max Marginal Relevance)\":\n return \"mmr\"\n return \"similarity\"\n\n def _build_search_args(self):\n args = {\n \"k\": self.number_of_results,\n \"score_threshold\": self.search_score_threshold,\n }\n\n if self.search_filter:\n clean_filter = {k: v for k, v in self.search_filter.items() if k and v}\n if len(clean_filter) > 0:\n args[\"filter\"] = clean_filter\n return args\n\n def search_documents(self, vector_store=None) -> list[Data]:\n if not vector_store:\n vector_store = self.build_vector_store()\n\n logger.debug(f\"Search input: {self.search_input}\")\n logger.debug(f\"Search type: {self.search_type}\")\n logger.debug(f\"Number of results: {self.number_of_results}\")\n\n if self.search_input and isinstance(self.search_input, str) and self.search_input.strip():\n try:\n search_type = self._map_search_type()\n search_args = self._build_search_args()\n\n docs = vector_store.search(query=self.search_input, search_type=search_type, **search_args)\n except Exception as e:\n msg = f\"Error performing search in AstraDBVectorStore: {e}\"\n raise ValueError(msg) from e\n\n logger.debug(f\"Retrieved documents: {len(docs)}\")\n\n data = docs_to_data(docs)\n logger.debug(f\"Converted documents to data: {len(data)}\")\n self.status = data\n return data\n logger.debug(\"No search input provided. Skipping search.\")\n return []\n\n def get_retriever_kwargs(self):\n search_args = self._build_search_args()\n return {\n \"search_type\": self._map_search_type(),\n \"search_kwargs\": search_args,\n }\n" + "value": "import os\n\nimport orjson\nfrom astrapy.admin import parse_api_endpoint\n\nfrom langflow.base.vectorstores.model import LCVectorStoreComponent, check_cached_vector_store\nfrom langflow.helpers import docs_to_data\nfrom langflow.inputs import DictInput, FloatInput, MessageTextInput\nfrom langflow.io import (\n BoolInput,\n DataInput,\n DropdownInput,\n HandleInput,\n IntInput,\n MultilineInput,\n SecretStrInput,\n StrInput,\n)\nfrom langflow.schema import Data\n\n\nclass AstraVectorStoreComponent(LCVectorStoreComponent):\n display_name: str = \"Astra DB\"\n description: str = \"Implementation of Vector Store using Astra DB with search capabilities\"\n documentation: str = \"https://docs.langflow.org/starter-projects-vector-store-rag\"\n name = \"AstraDB\"\n icon: str = \"AstraDB\"\n\n VECTORIZE_PROVIDERS_MAPPING = {\n \"Azure OpenAI\": [\"azureOpenAI\", [\"text-embedding-3-small\", \"text-embedding-3-large\", \"text-embedding-ada-002\"]],\n \"Hugging Face - Dedicated\": [\"huggingfaceDedicated\", [\"endpoint-defined-model\"]],\n \"Hugging Face - Serverless\": [\n \"huggingface\",\n [\n \"sentence-transformers/all-MiniLM-L6-v2\",\n \"intfloat/multilingual-e5-large\",\n \"intfloat/multilingual-e5-large-instruct\",\n \"BAAI/bge-small-en-v1.5\",\n \"BAAI/bge-base-en-v1.5\",\n \"BAAI/bge-large-en-v1.5\",\n ],\n ],\n \"Jina AI\": [\n \"jinaAI\",\n [\n \"jina-embeddings-v2-base-en\",\n \"jina-embeddings-v2-base-de\",\n \"jina-embeddings-v2-base-es\",\n \"jina-embeddings-v2-base-code\",\n \"jina-embeddings-v2-base-zh\",\n ],\n ],\n \"Mistral AI\": [\"mistral\", [\"mistral-embed\"]],\n \"NVIDIA\": [\"nvidia\", [\"NV-Embed-QA\"]],\n \"OpenAI\": [\"openai\", [\"text-embedding-3-small\", \"text-embedding-3-large\", \"text-embedding-ada-002\"]],\n \"Upstage\": [\"upstageAI\", [\"solar-embedding-1-large\"]],\n \"Voyage AI\": [\n \"voyageAI\",\n [\"voyage-large-2-instruct\", \"voyage-law-2\", \"voyage-code-2\", \"voyage-large-2\", \"voyage-2\"],\n ],\n }\n\n inputs = [\n SecretStrInput(\n name=\"token\",\n display_name=\"Astra DB Application Token\",\n info=\"Authentication token for accessing Astra DB.\",\n value=\"ASTRA_DB_APPLICATION_TOKEN\",\n required=True,\n advanced=os.getenv(\"ASTRA_ENHANCED\", \"false\").lower() == \"true\",\n ),\n SecretStrInput(\n name=\"api_endpoint\",\n display_name=\"Database\" if os.getenv(\"ASTRA_ENHANCED\", \"false\").lower() == \"true\" else \"API Endpoint\",\n info=\"API endpoint URL for the Astra DB service.\",\n value=\"ASTRA_DB_API_ENDPOINT\",\n required=True,\n ),\n StrInput(\n name=\"collection_name\",\n display_name=\"Collection Name\",\n info=\"The name of the collection within Astra DB where the vectors will be stored.\",\n required=True,\n ),\n MultilineInput(\n name=\"search_input\",\n display_name=\"Search Input\",\n ),\n DataInput(\n name=\"ingest_data\",\n display_name=\"Ingest Data\",\n is_list=True,\n ),\n StrInput(\n name=\"namespace\",\n display_name=\"Namespace\",\n info=\"Optional namespace within Astra DB to use for the collection.\",\n advanced=True,\n ),\n DropdownInput(\n name=\"embedding_service\",\n display_name=\"Embedding Model or Astra Vectorize\",\n info=\"Determines whether to use Astra Vectorize for the collection.\",\n options=[\"Embedding Model\", \"Astra Vectorize\"],\n real_time_refresh=True,\n value=\"Embedding Model\",\n ),\n HandleInput(\n name=\"embedding\",\n display_name=\"Embedding Model\",\n input_types=[\"Embeddings\"],\n info=\"Allows an embedding model configuration.\",\n ),\n DropdownInput(\n name=\"metric\",\n display_name=\"Metric\",\n info=\"Optional distance metric for vector comparisons in the vector store.\",\n options=[\"cosine\", \"dot_product\", \"euclidean\"],\n value=\"cosine\",\n advanced=True,\n ),\n IntInput(\n name=\"batch_size\",\n display_name=\"Batch Size\",\n info=\"Optional number of data to process in a single batch.\",\n advanced=True,\n ),\n IntInput(\n name=\"bulk_insert_batch_concurrency\",\n display_name=\"Bulk Insert Batch Concurrency\",\n info=\"Optional concurrency level for bulk insert operations.\",\n advanced=True,\n ),\n IntInput(\n name=\"bulk_insert_overwrite_concurrency\",\n display_name=\"Bulk Insert Overwrite Concurrency\",\n info=\"Optional concurrency level for bulk insert operations that overwrite existing data.\",\n advanced=True,\n ),\n IntInput(\n name=\"bulk_delete_concurrency\",\n display_name=\"Bulk Delete Concurrency\",\n info=\"Optional concurrency level for bulk delete operations.\",\n advanced=True,\n ),\n DropdownInput(\n name=\"setup_mode\",\n display_name=\"Setup Mode\",\n info=\"Configuration mode for setting up the vector store, with options like 'Sync' or 'Off'.\",\n options=[\"Sync\", \"Off\"],\n advanced=True,\n value=\"Sync\",\n ),\n BoolInput(\n name=\"pre_delete_collection\",\n display_name=\"Pre Delete Collection\",\n info=\"Boolean flag to determine whether to delete the collection before creating a new one.\",\n advanced=True,\n ),\n StrInput(\n name=\"metadata_indexing_include\",\n display_name=\"Metadata Indexing Include\",\n info=\"Optional list of metadata fields to include in the indexing.\",\n is_list=True,\n advanced=True,\n ),\n StrInput(\n name=\"metadata_indexing_exclude\",\n display_name=\"Metadata Indexing Exclude\",\n info=\"Optional list of metadata fields to exclude from the indexing.\",\n is_list=True,\n advanced=True,\n ),\n StrInput(\n name=\"collection_indexing_policy\",\n display_name=\"Collection Indexing Policy\",\n info='Optional JSON string for the \"indexing\" field of the collection. '\n \"See https://docs.datastax.com/en/astra-db-serverless/api-reference/collections.html#the-indexing-option\",\n advanced=True,\n ),\n IntInput(\n name=\"number_of_results\",\n display_name=\"Number of Results\",\n info=\"Number of results to return.\",\n advanced=True,\n value=4,\n ),\n DropdownInput(\n name=\"search_type\",\n display_name=\"Search Type\",\n info=\"Search type to use\",\n options=[\"Similarity\", \"Similarity with score threshold\", \"MMR (Max Marginal Relevance)\"],\n value=\"Similarity\",\n advanced=True,\n ),\n FloatInput(\n name=\"search_score_threshold\",\n display_name=\"Search Score Threshold\",\n info=\"Minimum similarity score threshold for search results. \"\n \"(when using 'Similarity with score threshold')\",\n value=0,\n advanced=True,\n ),\n DictInput(\n name=\"search_filter\",\n display_name=\"Search Metadata Filter\",\n info=\"Optional dictionary of filters to apply to the search query.\",\n advanced=True,\n is_list=True,\n ),\n ]\n\n def insert_in_dict(self, build_config, field_name, new_parameters):\n # Insert the new key-value pair after the found key\n for new_field_name, new_parameter in new_parameters.items():\n # Get all the items as a list of tuples (key, value)\n items = list(build_config.items())\n\n # Find the index of the key to insert after\n idx = len(items)\n for i, (key, _) in enumerate(items):\n if key == field_name:\n idx = i + 1\n break\n\n items.insert(idx, (new_field_name, new_parameter))\n\n # Clear the original dictionary and update with the modified items\n build_config.clear()\n build_config.update(items)\n\n return build_config\n\n def update_build_config(self, build_config: dict, field_value: str, field_name: str | None = None):\n if field_name == \"embedding_service\":\n if field_value == \"Astra Vectorize\":\n for field in [\"embedding\"]:\n if field in build_config:\n del build_config[field]\n\n new_parameter = DropdownInput(\n name=\"provider\",\n display_name=\"Vectorize Provider\",\n options=self.VECTORIZE_PROVIDERS_MAPPING.keys(),\n value=\"\",\n required=True,\n real_time_refresh=True,\n ).to_dict()\n\n self.insert_in_dict(build_config, \"embedding_service\", {\"provider\": new_parameter})\n else:\n for field in [\n \"provider\",\n \"z_00_model_name\",\n \"z_01_model_parameters\",\n \"z_02_api_key_name\",\n \"z_03_provider_api_key\",\n \"z_04_authentication\",\n ]:\n if field in build_config:\n del build_config[field]\n\n new_parameter = HandleInput(\n name=\"embedding\",\n display_name=\"Embedding Model\",\n input_types=[\"Embeddings\"],\n info=\"Allows an embedding model configuration.\",\n ).to_dict()\n\n self.insert_in_dict(build_config, \"embedding_service\", {\"embedding\": new_parameter})\n\n elif field_name == \"provider\":\n for field in [\n \"z_00_model_name\",\n \"z_01_model_parameters\",\n \"z_02_api_key_name\",\n \"z_03_provider_api_key\",\n \"z_04_authentication\",\n ]:\n if field in build_config:\n del build_config[field]\n\n model_options = self.VECTORIZE_PROVIDERS_MAPPING[field_value][1]\n\n new_parameter_0 = DropdownInput(\n name=\"z_00_model_name\",\n display_name=\"Model Name\",\n info=\"The embedding model to use for the selected provider. Each provider has a different set of \"\n \"models available (full list at \"\n \"https://docs.datastax.com/en/astra-db-serverless/databases/embedding-generation.html):\\n\\n\"\n f\"{', '.join(model_options)}\",\n options=model_options,\n required=True,\n ).to_dict()\n\n new_parameter_1 = DictInput(\n name=\"z_01_model_parameters\",\n display_name=\"Model Parameters\",\n is_list=True,\n ).to_dict()\n\n new_parameter_2 = MessageTextInput(\n name=\"z_02_api_key_name\",\n display_name=\"API Key name\",\n info=\"The name of the embeddings provider API key stored on Astra. \"\n \"If set, it will override the 'ProviderKey' in the authentication parameters.\",\n ).to_dict()\n\n new_parameter_3 = SecretStrInput(\n name=\"z_03_provider_api_key\",\n display_name=\"Provider API Key\",\n info=\"An alternative to the Astra Authentication that passes an API key for the provider \"\n \"with each request to Astra DB. \"\n \"This may be used when Vectorize is configured for the collection, \"\n \"but no corresponding provider secret is stored within Astra's key management system.\",\n ).to_dict()\n\n new_parameter_4 = DictInput(\n name=\"z_04_authentication\",\n display_name=\"Authentication parameters\",\n is_list=True,\n ).to_dict()\n\n self.insert_in_dict(\n build_config,\n \"provider\",\n {\n \"z_00_model_name\": new_parameter_0,\n \"z_01_model_parameters\": new_parameter_1,\n \"z_02_api_key_name\": new_parameter_2,\n \"z_03_provider_api_key\": new_parameter_3,\n \"z_04_authentication\": new_parameter_4,\n },\n )\n\n return build_config\n\n def build_vectorize_options(self, **kwargs):\n for attribute in [\n \"provider\",\n \"z_00_model_name\",\n \"z_01_model_parameters\",\n \"z_02_api_key_name\",\n \"z_03_provider_api_key\",\n \"z_04_authentication\",\n ]:\n if not hasattr(self, attribute):\n setattr(self, attribute, None)\n\n # Fetch values from kwargs if any self.* attributes are None\n provider_value = self.VECTORIZE_PROVIDERS_MAPPING.get(self.provider, [None])[0] or kwargs.get(\"provider\")\n authentication = {**(self.z_04_authentication or kwargs.get(\"z_04_authentication\", {}))}\n\n api_key_name = self.z_02_api_key_name or kwargs.get(\"z_02_api_key_name\")\n provider_key = self.z_03_provider_api_key or kwargs.get(\"z_03_provider_api_key\")\n if api_key_name:\n authentication[\"providerKey\"] = api_key_name\n\n return {\n # must match astrapy.info.CollectionVectorServiceOptions\n \"collection_vector_service_options\": {\n \"provider\": provider_value,\n \"modelName\": self.z_00_model_name or kwargs.get(\"z_00_model_name\"),\n \"authentication\": authentication,\n \"parameters\": self.z_01_model_parameters or kwargs.get(\"z_01_model_parameters\", {}),\n },\n \"collection_embedding_api_key\": provider_key,\n }\n\n @check_cached_vector_store\n def build_vector_store(self, vectorize_options=None):\n try:\n from langchain_astradb import AstraDBVectorStore\n from langchain_astradb.utils.astradb import SetupMode\n except ImportError as e:\n msg = (\n \"Could not import langchain Astra DB integration package. \"\n \"Please install it with `pip install langchain-astradb`.\"\n )\n raise ImportError(msg) from e\n\n try:\n if not self.setup_mode:\n self.setup_mode = self._inputs[\"setup_mode\"].options[0]\n\n setup_mode_value = SetupMode[self.setup_mode.upper()]\n except KeyError as e:\n msg = f\"Invalid setup mode: {self.setup_mode}\"\n raise ValueError(msg) from e\n\n if self.embedding_service == \"Embedding Model\":\n embedding_dict = {\"embedding\": self.embedding}\n else:\n from astrapy.info import CollectionVectorServiceOptions\n\n dict_options = vectorize_options or self.build_vectorize_options()\n dict_options[\"authentication\"] = {\n k: v for k, v in dict_options.get(\"authentication\", {}).items() if k and v\n }\n dict_options[\"parameters\"] = {k: v for k, v in dict_options.get(\"parameters\", {}).items() if k and v}\n\n embedding_dict = {\n \"collection_vector_service_options\": CollectionVectorServiceOptions.from_dict(\n dict_options.get(\"collection_vector_service_options\", {})\n ),\n }\n\n try:\n vector_store = AstraDBVectorStore(\n collection_name=self.collection_name,\n token=self.token,\n api_endpoint=self.api_endpoint,\n namespace=self.namespace or None,\n environment=parse_api_endpoint(self.api_endpoint).environment if self.api_endpoint else None,\n metric=self.metric or None,\n batch_size=self.batch_size or None,\n bulk_insert_batch_concurrency=self.bulk_insert_batch_concurrency or None,\n bulk_insert_overwrite_concurrency=self.bulk_insert_overwrite_concurrency or None,\n bulk_delete_concurrency=self.bulk_delete_concurrency or None,\n setup_mode=setup_mode_value,\n pre_delete_collection=self.pre_delete_collection,\n metadata_indexing_include=[s for s in self.metadata_indexing_include if s] or None,\n metadata_indexing_exclude=[s for s in self.metadata_indexing_exclude if s] or None,\n collection_indexing_policy=orjson.dumps(self.collection_indexing_policy)\n if self.collection_indexing_policy\n else None,\n **embedding_dict,\n )\n except Exception as e:\n msg = f\"Error initializing AstraDBVectorStore: {e}\"\n raise ValueError(msg) from e\n\n self._add_documents_to_vector_store(vector_store)\n\n return vector_store\n\n def _add_documents_to_vector_store(self, vector_store) -> None:\n documents = []\n for _input in self.ingest_data or []:\n if isinstance(_input, Data):\n documents.append(_input.to_lc_document())\n else:\n msg = \"Vector Store Inputs must be Data objects.\"\n raise TypeError(msg)\n\n if documents:\n self.log(f\"Adding {len(documents)} documents to the Vector Store.\")\n try:\n vector_store.add_documents(documents)\n except Exception as e:\n msg = f\"Error adding documents to AstraDBVectorStore: {e}\"\n raise ValueError(msg) from e\n else:\n self.log(\"No documents to add to the Vector Store.\")\n\n def _map_search_type(self) -> str:\n if self.search_type == \"Similarity with score threshold\":\n return \"similarity_score_threshold\"\n if self.search_type == \"MMR (Max Marginal Relevance)\":\n return \"mmr\"\n return \"similarity\"\n\n def _build_search_args(self):\n args = {\n \"k\": self.number_of_results,\n \"score_threshold\": self.search_score_threshold,\n }\n\n if self.search_filter:\n clean_filter = {k: v for k, v in self.search_filter.items() if k and v}\n if len(clean_filter) > 0:\n args[\"filter\"] = clean_filter\n return args\n\n def search_documents(self, vector_store=None) -> list[Data]:\n if not vector_store:\n vector_store = self.build_vector_store()\n\n self.log(f\"Search input: {self.search_input}\")\n self.log(f\"Search type: {self.search_type}\")\n self.log(f\"Number of results: {self.number_of_results}\")\n\n if self.search_input and isinstance(self.search_input, str) and self.search_input.strip():\n try:\n search_type = self._map_search_type()\n search_args = self._build_search_args()\n\n docs = vector_store.search(query=self.search_input, search_type=search_type, **search_args)\n except Exception as e:\n msg = f\"Error performing search in AstraDBVectorStore: {e}\"\n raise ValueError(msg) from e\n\n self.log(f\"Retrieved documents: {len(docs)}\")\n\n data = docs_to_data(docs)\n self.log(f\"Converted documents to data: {len(data)}\")\n self.status = data\n return data\n self.log(\"No search input provided. Skipping search.\")\n return []\n\n def get_retriever_kwargs(self):\n search_args = self._build_search_args()\n return {\n \"search_type\": self._map_search_type(),\n \"search_kwargs\": search_args,\n }\n" }, "collection_indexing_policy": { "advanced": true, @@ -2447,6 +2449,7 @@ "name": "embeddings", "required_inputs": [ "chunk_size", + "client", "default_headers", "default_query", "deployment", @@ -2528,7 +2531,7 @@ "show": true, "title_case": false, "type": "code", - "value": "from langchain_openai.embeddings.base import OpenAIEmbeddings\n\nfrom langflow.base.embeddings.model import LCEmbeddingsModel\nfrom langflow.base.models.openai_constants import OPENAI_EMBEDDING_MODEL_NAMES\nfrom langflow.field_typing import Embeddings\nfrom langflow.io import BoolInput, DictInput, DropdownInput, FloatInput, IntInput, MessageTextInput, SecretStrInput\n\n\nclass OpenAIEmbeddingsComponent(LCEmbeddingsModel):\n display_name = \"OpenAI Embeddings\"\n description = \"Generate embeddings using OpenAI models.\"\n icon = \"OpenAI\"\n name = \"OpenAIEmbeddings\"\n\n inputs = [\n DictInput(\n name=\"default_headers\",\n display_name=\"Default Headers\",\n advanced=True,\n info=\"Default headers to use for the API request.\",\n ),\n DictInput(\n name=\"default_query\",\n display_name=\"Default Query\",\n advanced=True,\n info=\"Default query parameters to use for the API request.\",\n ),\n IntInput(name=\"chunk_size\", display_name=\"Chunk Size\", advanced=True, value=1000),\n MessageTextInput(name=\"client\", display_name=\"Client\", advanced=True),\n MessageTextInput(name=\"deployment\", display_name=\"Deployment\", advanced=True),\n IntInput(name=\"embedding_ctx_length\", display_name=\"Embedding Context Length\", advanced=True, value=1536),\n IntInput(name=\"max_retries\", display_name=\"Max Retries\", value=3, advanced=True),\n DropdownInput(\n name=\"model\",\n display_name=\"Model\",\n advanced=False,\n options=OPENAI_EMBEDDING_MODEL_NAMES,\n value=\"text-embedding-3-small\",\n ),\n DictInput(name=\"model_kwargs\", display_name=\"Model Kwargs\", advanced=True),\n SecretStrInput(name=\"openai_api_base\", display_name=\"OpenAI API Base\", advanced=True),\n SecretStrInput(name=\"openai_api_key\", display_name=\"OpenAI API Key\", value=\"OPENAI_API_KEY\"),\n SecretStrInput(name=\"openai_api_type\", display_name=\"OpenAI API Type\", advanced=True),\n MessageTextInput(name=\"openai_api_version\", display_name=\"OpenAI API Version\", advanced=True),\n MessageTextInput(\n name=\"openai_organization\",\n display_name=\"OpenAI Organization\",\n advanced=True,\n ),\n MessageTextInput(name=\"openai_proxy\", display_name=\"OpenAI Proxy\", advanced=True),\n FloatInput(name=\"request_timeout\", display_name=\"Request Timeout\", advanced=True),\n BoolInput(name=\"show_progress_bar\", display_name=\"Show Progress Bar\", advanced=True),\n BoolInput(name=\"skip_empty\", display_name=\"Skip Empty\", advanced=True),\n MessageTextInput(\n name=\"tiktoken_model_name\",\n display_name=\"TikToken Model Name\",\n advanced=True,\n ),\n BoolInput(\n name=\"tiktoken_enable\",\n display_name=\"TikToken Enable\",\n advanced=True,\n value=True,\n info=\"If False, you must have transformers installed.\",\n ),\n IntInput(\n name=\"dimensions\",\n display_name=\"Dimensions\",\n info=\"The number of dimensions the resulting output embeddings should have. \"\n \"Only supported by certain models.\",\n advanced=True,\n ),\n ]\n\n def build_embeddings(self) -> Embeddings:\n return OpenAIEmbeddings(\n tiktoken_enabled=self.tiktoken_enable,\n default_headers=self.default_headers,\n default_query=self.default_query,\n allowed_special=\"all\",\n disallowed_special=\"all\",\n chunk_size=self.chunk_size,\n deployment=self.deployment,\n embedding_ctx_length=self.embedding_ctx_length,\n max_retries=self.max_retries,\n model=self.model,\n model_kwargs=self.model_kwargs,\n base_url=self.openai_api_base,\n api_key=self.openai_api_key,\n openai_api_type=self.openai_api_type,\n api_version=self.openai_api_version,\n organization=self.openai_organization,\n openai_proxy=self.openai_proxy,\n timeout=self.request_timeout or None,\n show_progress_bar=self.show_progress_bar,\n skip_empty=self.skip_empty,\n tiktoken_model_name=self.tiktoken_model_name,\n dimensions=self.dimensions or None,\n )\n" + "value": "from langchain_openai import OpenAIEmbeddings\n\nfrom langflow.base.embeddings.model import LCEmbeddingsModel\nfrom langflow.base.models.openai_constants import OPENAI_EMBEDDING_MODEL_NAMES\nfrom langflow.field_typing import Embeddings\nfrom langflow.io import BoolInput, DictInput, DropdownInput, FloatInput, IntInput, MessageTextInput, SecretStrInput\n\n\nclass OpenAIEmbeddingsComponent(LCEmbeddingsModel):\n display_name = \"OpenAI Embeddings\"\n description = \"Generate embeddings using OpenAI models.\"\n icon = \"OpenAI\"\n name = \"OpenAIEmbeddings\"\n\n inputs = [\n DictInput(\n name=\"default_headers\",\n display_name=\"Default Headers\",\n advanced=True,\n info=\"Default headers to use for the API request.\",\n ),\n DictInput(\n name=\"default_query\",\n display_name=\"Default Query\",\n advanced=True,\n info=\"Default query parameters to use for the API request.\",\n ),\n IntInput(name=\"chunk_size\", display_name=\"Chunk Size\", advanced=True, value=1000),\n MessageTextInput(name=\"client\", display_name=\"Client\", advanced=True),\n MessageTextInput(name=\"deployment\", display_name=\"Deployment\", advanced=True),\n IntInput(name=\"embedding_ctx_length\", display_name=\"Embedding Context Length\", advanced=True, value=1536),\n IntInput(name=\"max_retries\", display_name=\"Max Retries\", value=3, advanced=True),\n DropdownInput(\n name=\"model\",\n display_name=\"Model\",\n advanced=False,\n options=OPENAI_EMBEDDING_MODEL_NAMES,\n value=\"text-embedding-3-small\",\n ),\n DictInput(name=\"model_kwargs\", display_name=\"Model Kwargs\", advanced=True),\n SecretStrInput(name=\"openai_api_key\", display_name=\"OpenAI API Key\", value=\"OPENAI_API_KEY\"),\n MessageTextInput(name=\"openai_api_base\", display_name=\"OpenAI API Base\", advanced=True),\n MessageTextInput(name=\"openai_api_type\", display_name=\"OpenAI API Type\", advanced=True),\n MessageTextInput(name=\"openai_api_version\", display_name=\"OpenAI API Version\", advanced=True),\n MessageTextInput(\n name=\"openai_organization\",\n display_name=\"OpenAI Organization\",\n advanced=True,\n ),\n MessageTextInput(name=\"openai_proxy\", display_name=\"OpenAI Proxy\", advanced=True),\n FloatInput(name=\"request_timeout\", display_name=\"Request Timeout\", advanced=True),\n BoolInput(name=\"show_progress_bar\", display_name=\"Show Progress Bar\", advanced=True),\n BoolInput(name=\"skip_empty\", display_name=\"Skip Empty\", advanced=True),\n MessageTextInput(\n name=\"tiktoken_model_name\",\n display_name=\"TikToken Model Name\",\n advanced=True,\n ),\n BoolInput(\n name=\"tiktoken_enable\",\n display_name=\"TikToken Enable\",\n advanced=True,\n value=True,\n info=\"If False, you must have transformers installed.\",\n ),\n IntInput(\n name=\"dimensions\",\n display_name=\"Dimensions\",\n info=\"The number of dimensions the resulting output embeddings should have. \"\n \"Only supported by certain models.\",\n advanced=True,\n ),\n ]\n\n def build_embeddings(self) -> Embeddings:\n return OpenAIEmbeddings(\n client=self.client or None,\n model=self.model,\n dimensions=self.dimensions or None,\n deployment=self.deployment or None,\n api_version=self.openai_api_version or None,\n base_url=self.openai_api_base or None,\n openai_api_type=self.openai_api_type or None,\n openai_proxy=self.openai_proxy or None,\n embedding_ctx_length=self.embedding_ctx_length,\n api_key=self.openai_api_key or None,\n organization=self.openai_organization or None,\n allowed_special=\"all\",\n disallowed_special=\"all\",\n chunk_size=self.chunk_size,\n max_retries=self.max_retries,\n timeout=self.request_timeout or None,\n tiktoken_enabled=self.tiktoken_enable,\n tiktoken_model_name=self.tiktoken_model_name or None,\n show_progress_bar=self.show_progress_bar,\n model_kwargs=self.model_kwargs,\n skip_empty=self.skip_empty,\n default_headers=self.default_headers or None,\n default_query=self.default_query or None,\n )\n" }, "default_headers": { "advanced": true, @@ -2667,7 +2670,7 @@ "input_types": [ "Message" ], - "load_from_db": true, + "load_from_db": false, "name": "openai_api_base", "password": true, "placeholder": "", @@ -2703,7 +2706,7 @@ "input_types": [ "Message" ], - "load_from_db": true, + "load_from_db": false, "name": "openai_api_type", "password": true, "placeholder": "", @@ -2924,6 +2927,7 @@ "name": "embeddings", "required_inputs": [ "chunk_size", + "client", "default_headers", "default_query", "deployment", @@ -3005,7 +3009,7 @@ "show": true, "title_case": false, "type": "code", - "value": "from langchain_openai.embeddings.base import OpenAIEmbeddings\n\nfrom langflow.base.embeddings.model import LCEmbeddingsModel\nfrom langflow.base.models.openai_constants import OPENAI_EMBEDDING_MODEL_NAMES\nfrom langflow.field_typing import Embeddings\nfrom langflow.io import BoolInput, DictInput, DropdownInput, FloatInput, IntInput, MessageTextInput, SecretStrInput\n\n\nclass OpenAIEmbeddingsComponent(LCEmbeddingsModel):\n display_name = \"OpenAI Embeddings\"\n description = \"Generate embeddings using OpenAI models.\"\n icon = \"OpenAI\"\n name = \"OpenAIEmbeddings\"\n\n inputs = [\n DictInput(\n name=\"default_headers\",\n display_name=\"Default Headers\",\n advanced=True,\n info=\"Default headers to use for the API request.\",\n ),\n DictInput(\n name=\"default_query\",\n display_name=\"Default Query\",\n advanced=True,\n info=\"Default query parameters to use for the API request.\",\n ),\n IntInput(name=\"chunk_size\", display_name=\"Chunk Size\", advanced=True, value=1000),\n MessageTextInput(name=\"client\", display_name=\"Client\", advanced=True),\n MessageTextInput(name=\"deployment\", display_name=\"Deployment\", advanced=True),\n IntInput(name=\"embedding_ctx_length\", display_name=\"Embedding Context Length\", advanced=True, value=1536),\n IntInput(name=\"max_retries\", display_name=\"Max Retries\", value=3, advanced=True),\n DropdownInput(\n name=\"model\",\n display_name=\"Model\",\n advanced=False,\n options=OPENAI_EMBEDDING_MODEL_NAMES,\n value=\"text-embedding-3-small\",\n ),\n DictInput(name=\"model_kwargs\", display_name=\"Model Kwargs\", advanced=True),\n SecretStrInput(name=\"openai_api_base\", display_name=\"OpenAI API Base\", advanced=True),\n SecretStrInput(name=\"openai_api_key\", display_name=\"OpenAI API Key\", value=\"OPENAI_API_KEY\"),\n SecretStrInput(name=\"openai_api_type\", display_name=\"OpenAI API Type\", advanced=True),\n MessageTextInput(name=\"openai_api_version\", display_name=\"OpenAI API Version\", advanced=True),\n MessageTextInput(\n name=\"openai_organization\",\n display_name=\"OpenAI Organization\",\n advanced=True,\n ),\n MessageTextInput(name=\"openai_proxy\", display_name=\"OpenAI Proxy\", advanced=True),\n FloatInput(name=\"request_timeout\", display_name=\"Request Timeout\", advanced=True),\n BoolInput(name=\"show_progress_bar\", display_name=\"Show Progress Bar\", advanced=True),\n BoolInput(name=\"skip_empty\", display_name=\"Skip Empty\", advanced=True),\n MessageTextInput(\n name=\"tiktoken_model_name\",\n display_name=\"TikToken Model Name\",\n advanced=True,\n ),\n BoolInput(\n name=\"tiktoken_enable\",\n display_name=\"TikToken Enable\",\n advanced=True,\n value=True,\n info=\"If False, you must have transformers installed.\",\n ),\n IntInput(\n name=\"dimensions\",\n display_name=\"Dimensions\",\n info=\"The number of dimensions the resulting output embeddings should have. \"\n \"Only supported by certain models.\",\n advanced=True,\n ),\n ]\n\n def build_embeddings(self) -> Embeddings:\n return OpenAIEmbeddings(\n tiktoken_enabled=self.tiktoken_enable,\n default_headers=self.default_headers,\n default_query=self.default_query,\n allowed_special=\"all\",\n disallowed_special=\"all\",\n chunk_size=self.chunk_size,\n deployment=self.deployment,\n embedding_ctx_length=self.embedding_ctx_length,\n max_retries=self.max_retries,\n model=self.model,\n model_kwargs=self.model_kwargs,\n base_url=self.openai_api_base,\n api_key=self.openai_api_key,\n openai_api_type=self.openai_api_type,\n api_version=self.openai_api_version,\n organization=self.openai_organization,\n openai_proxy=self.openai_proxy,\n timeout=self.request_timeout or None,\n show_progress_bar=self.show_progress_bar,\n skip_empty=self.skip_empty,\n tiktoken_model_name=self.tiktoken_model_name,\n dimensions=self.dimensions or None,\n )\n" + "value": "from langchain_openai import OpenAIEmbeddings\n\nfrom langflow.base.embeddings.model import LCEmbeddingsModel\nfrom langflow.base.models.openai_constants import OPENAI_EMBEDDING_MODEL_NAMES\nfrom langflow.field_typing import Embeddings\nfrom langflow.io import BoolInput, DictInput, DropdownInput, FloatInput, IntInput, MessageTextInput, SecretStrInput\n\n\nclass OpenAIEmbeddingsComponent(LCEmbeddingsModel):\n display_name = \"OpenAI Embeddings\"\n description = \"Generate embeddings using OpenAI models.\"\n icon = \"OpenAI\"\n name = \"OpenAIEmbeddings\"\n\n inputs = [\n DictInput(\n name=\"default_headers\",\n display_name=\"Default Headers\",\n advanced=True,\n info=\"Default headers to use for the API request.\",\n ),\n DictInput(\n name=\"default_query\",\n display_name=\"Default Query\",\n advanced=True,\n info=\"Default query parameters to use for the API request.\",\n ),\n IntInput(name=\"chunk_size\", display_name=\"Chunk Size\", advanced=True, value=1000),\n MessageTextInput(name=\"client\", display_name=\"Client\", advanced=True),\n MessageTextInput(name=\"deployment\", display_name=\"Deployment\", advanced=True),\n IntInput(name=\"embedding_ctx_length\", display_name=\"Embedding Context Length\", advanced=True, value=1536),\n IntInput(name=\"max_retries\", display_name=\"Max Retries\", value=3, advanced=True),\n DropdownInput(\n name=\"model\",\n display_name=\"Model\",\n advanced=False,\n options=OPENAI_EMBEDDING_MODEL_NAMES,\n value=\"text-embedding-3-small\",\n ),\n DictInput(name=\"model_kwargs\", display_name=\"Model Kwargs\", advanced=True),\n SecretStrInput(name=\"openai_api_key\", display_name=\"OpenAI API Key\", value=\"OPENAI_API_KEY\"),\n MessageTextInput(name=\"openai_api_base\", display_name=\"OpenAI API Base\", advanced=True),\n MessageTextInput(name=\"openai_api_type\", display_name=\"OpenAI API Type\", advanced=True),\n MessageTextInput(name=\"openai_api_version\", display_name=\"OpenAI API Version\", advanced=True),\n MessageTextInput(\n name=\"openai_organization\",\n display_name=\"OpenAI Organization\",\n advanced=True,\n ),\n MessageTextInput(name=\"openai_proxy\", display_name=\"OpenAI Proxy\", advanced=True),\n FloatInput(name=\"request_timeout\", display_name=\"Request Timeout\", advanced=True),\n BoolInput(name=\"show_progress_bar\", display_name=\"Show Progress Bar\", advanced=True),\n BoolInput(name=\"skip_empty\", display_name=\"Skip Empty\", advanced=True),\n MessageTextInput(\n name=\"tiktoken_model_name\",\n display_name=\"TikToken Model Name\",\n advanced=True,\n ),\n BoolInput(\n name=\"tiktoken_enable\",\n display_name=\"TikToken Enable\",\n advanced=True,\n value=True,\n info=\"If False, you must have transformers installed.\",\n ),\n IntInput(\n name=\"dimensions\",\n display_name=\"Dimensions\",\n info=\"The number of dimensions the resulting output embeddings should have. \"\n \"Only supported by certain models.\",\n advanced=True,\n ),\n ]\n\n def build_embeddings(self) -> Embeddings:\n return OpenAIEmbeddings(\n client=self.client or None,\n model=self.model,\n dimensions=self.dimensions or None,\n deployment=self.deployment or None,\n api_version=self.openai_api_version or None,\n base_url=self.openai_api_base or None,\n openai_api_type=self.openai_api_type or None,\n openai_proxy=self.openai_proxy or None,\n embedding_ctx_length=self.embedding_ctx_length,\n api_key=self.openai_api_key or None,\n organization=self.openai_organization or None,\n allowed_special=\"all\",\n disallowed_special=\"all\",\n chunk_size=self.chunk_size,\n max_retries=self.max_retries,\n timeout=self.request_timeout or None,\n tiktoken_enabled=self.tiktoken_enable,\n tiktoken_model_name=self.tiktoken_model_name or None,\n show_progress_bar=self.show_progress_bar,\n model_kwargs=self.model_kwargs,\n skip_empty=self.skip_empty,\n default_headers=self.default_headers or None,\n default_query=self.default_query or None,\n )\n" }, "default_headers": { "advanced": true, @@ -3144,7 +3148,7 @@ "input_types": [ "Message" ], - "load_from_db": true, + "load_from_db": false, "name": "openai_api_base", "password": true, "placeholder": "", @@ -3180,7 +3184,7 @@ "input_types": [ "Message" ], - "load_from_db": true, + "load_from_db": false, "name": "openai_api_type", "password": true, "placeholder": "", diff --git a/src/frontend/src/CustomNodes/GenericNode/components/NodeName/index.tsx b/src/frontend/src/CustomNodes/GenericNode/components/NodeName/index.tsx index c2737e148..8c4c48d6e 100644 --- a/src/frontend/src/CustomNodes/GenericNode/components/NodeName/index.tsx +++ b/src/frontend/src/CustomNodes/GenericNode/components/NodeName/index.tsx @@ -68,13 +68,20 @@ export default function NodeName({