diff --git a/docs/docs/starter-projects/rag-with-astradb.mdx b/docs/docs/starter-projects/rag-with-astradb.mdx index baae6288c..3c3868bf2 100644 --- a/docs/docs/starter-projects/rag-with-astradb.mdx +++ b/docs/docs/starter-projects/rag-with-astradb.mdx @@ -3,13 +3,13 @@ import useBaseUrl from "@docusaurus/useBaseUrl"; import ZoomableImage from "/src/theme/ZoomableImage.js"; import Admonition from "@theme/Admonition"; -# 🌟 RAG with AstraDB +# 🌟 RAG with Astra DB -This guide will walk you through how to build a RAG (Retrieval Augmented Generation) application using **AstraDB** and **Langflow**. +This guide will walk you through how to build a RAG (Retrieval Augmented Generation) application using **Astra DB** and **Langflow**. -AstraDB is a cloud-native database built on Apache Cassandra that is optimized for the cloud. It is a fully managed database-as-a-service that simplifies operations and reduces costs. AstraDB is built on the same technology that powers the largest Cassandra deployments in the world. +Astra DB is a cloud-native database built on Apache Cassandra that is optimized for the cloud. It is a fully managed database-as-a-service that simplifies operations and reduces costs. Astra DB is built on the same technology that powers the largest Cassandra deployments in the world. -In this guide, we will use AstraDB as a vector store to store and retrieve the documents that will be used by the RAG application to generate responses. +In this guide, we will use Astra DB as a vector store to store and retrieve the documents that will be used by the RAG application to generate responses. This guide assumes that you have Langflow up and running. If you are new to @@ -23,18 +23,18 @@ TLDR; - Create a new database, get a **Token** and the **API Endpoint** - Click on the **New Project** button and look for Vector Store RAG. This will create a new project with the necessary components - Import the project into Langflow by dropping it on the Canvas or My Collection page -- Update the **Token** and **API Endpoint** in the **AstraDB** components +- Update the **Token** and **API Endpoint** in the **Astra DB** components - Update the OpenAI API key in the **OpenAI** components -- Run the ingestion flow which is the one that uses the **AstraDB** component +- Run the ingestion flow which is the one that uses the **Astra DB** component - Click on the ⚡ _Run_ button and start interacting with your RAG application # First things first -## Create an AstraDB Database +## Create an Astra DB Database -To get started, you will need to create an AstraDB database. Visit the [Astra](https://astra.datastax.com) website and create a free account. +To get started, you will need to create an Astra DB database. Visit the [Astra](https://astra.datastax.com) website and create a free account. -Once you have created an account, you will be taken to the AstraDB dashboard. Click on the **Create Database** button. +Once you have created an account, you will be taken to the Astra DB dashboard. Click on the **Create Database** button. -Now we are all set to start building our RAG application using AstraDB and Langflow. +Now we are all set to start building our RAG application using Astra DB and Langflow. ## (Optional) Duplicate the Langflow 1.0 HuggingFace Space @@ -77,7 +77,7 @@ If you haven't already, now is the time to launch Langflow. To make things easie ## Open the Vector Store RAG Project -To get started, click on the **New Project** button and look for the **Vector Store RAG** project. This will open a starter project with the necessary components to run a RAG application using AstraDB. +To get started, click on the **New Project** button and look for the **Vector Store RAG** project. This will open a starter project with the necessary components to run a RAG application using Astra DB. -This project consists of two flows. The simpler one is the **Ingestion Flow** which is responsible for ingesting the documents into the AstraDB database. +This project consists of two flows. The simpler one is the **Ingestion Flow** which is responsible for ingesting the documents into the Astra DB database. Your first step should be to understand what each flow does and how they interact with each other. @@ -97,7 +97,7 @@ The ingestion flow consists of: - **Files** component that uploads a text file to Langflow - **Recursive Character Text Splitter** component that splits the text into smaller chunks - **OpenAIEmbeddings** component that generates embeddings for the text chunks -- **AstraDB** component that stores the text chunks in the AstraDB database +- **Astra DB** component that stores the text chunks in the Astra DB database -Now, let's update the **AstraDB** and **AstraDB Search** components with the **Token** and **API Endpoint** that we generated earlier, and the OpenAI Embeddings components with your OpenAI API key. +Now, let's update the **Astra DB** and **Astra DB Search** components with the **Token** and **API Endpoint** that we generated earlier, and the OpenAI Embeddings components with your OpenAI API key. -And run it! This will ingest the Text data from your file into the AstraDB database. +And run it! This will ingest the Text data from your file into the Astra DB database. -And that's it! You have successfully ran a RAG application using AstraDB and Langflow. +And that's it! You have successfully ran a RAG application using Astra DB and Langflow. # Conclusion -In this guide, we have learned how to run a RAG application using AstraDB and Langflow. We have seen how to create an AstraDB database, import the AstraDB RAG Flows project into Langflow, and run the ingestion and RAG flows. +In this guide, we have learned how to run a RAG application using Astra DB and Langflow. We have seen how to create an Astra DB database, import the Astra DB RAG Flows project into Langflow, and run the ingestion and RAG flows.import ThemedImage from "@theme/ThemedImage"; +import useBaseUrl from "@docusaurus/useBaseUrl"; +import ZoomableImage from "/src/theme/ZoomableImage.js"; +import Admonition from "@theme/Admonition"; diff --git a/docs/static/data/AstraDB-RAG-Flows.json b/docs/static/data/AstraDB-RAG-Flows.json index cf96c6841..8068ea4a9 100644 --- a/docs/static/data/AstraDB-RAG-Flows.json +++ b/docs/static/data/AstraDB-RAG-Flows.json @@ -20,7 +20,7 @@ "list": false, "show": true, "multiline": true, - "value": "from typing import Optional, Union\n\nfrom langflow.base.io.chat import ChatComponent\nfrom langflow.field_typing import Text\nfrom langflow.schema import Record\n\n\nclass ChatInput(ChatComponent):\n display_name = \"Chat Input\"\n description = \"Get chat inputs from the Interaction Panel.\"\n icon = \"ChatInput\"\n\n def build(\n self,\n sender: Optional[str] = \"User\",\n sender_name: Optional[str] = \"User\",\n input_value: Optional[str] = None,\n session_id: Optional[str] = None,\n return_record: Optional[bool] = False,\n ) -> Union[Text, Record]:\n return super().build(\n sender=sender,\n sender_name=sender_name,\n input_value=input_value,\n session_id=session_id,\n return_record=return_record,\n )\n", + "value": "from typing import Optional, Union\n\nfrom langflow.base.io.chat import ChatComponent\nfrom langflow.field_typing import Text\nfrom langflow.schema import Record\n\n\nclass ChatInput(ChatComponent):\n display_name = \"Chat Input\"\n description = \"Get chat inputs from the Interaction Panel.\"\n icon = \"ChatInput\"\n\n def build_config(self):\n build_config = super().build_config()\n build_config[\"input_value\"] = {\n \"input_types\": [],\n \"display_name\": \"Message\",\n \"multiline\": True,\n }\n\n return build_config\n\n def build(\n self,\n sender: Optional[str] = \"User\",\n sender_name: Optional[str] = \"User\",\n input_value: Optional[str] = None,\n session_id: Optional[str] = None,\n return_record: Optional[bool] = False,\n ) -> Union[Text, Record]:\n return super().build(\n sender=sender,\n sender_name=sender_name,\n input_value=input_value,\n session_id=session_id,\n return_record=return_record,\n )\n", "fileTypes": [], "file_path": "", "password": false, @@ -44,9 +44,7 @@ "name": "input_value", "display_name": "Message", "advanced": false, - "input_types": [ - "Text" - ], + "input_types": [], "dynamic": false, "info": "", "load_from_db": false, @@ -1489,7 +1487,7 @@ "list": false, "show": true, "multiline": true, - "value": "from pathlib import Path\nfrom typing import Any, Dict\n\nfrom langflow.base.data.utils import TEXT_FILE_TYPES, parse_text_file_to_record\nfrom langflow.interface.custom.custom_component import CustomComponent\nfrom langflow.schema import Record\n\n\nclass FileComponent(CustomComponent):\n display_name = \"File\"\n description = \"A generic file loader.\"\n icon = \"file-text\"\n \n def build_config(self) -> Dict[str, Any]:\n return {\n \"path\": {\n \"display_name\": \"Path\",\n \"field_type\": \"file\",\n \"file_types\": TEXT_FILE_TYPES,\n \"info\": f\"Supported file types: {', '.join(TEXT_FILE_TYPES)}\",\n },\n \"silent_errors\": {\n \"display_name\": \"Silent Errors\",\n \"advanced\": True,\n \"info\": \"If true, errors will not raise an exception.\",\n },\n }\n\n def load_file(self, path: str, silent_errors: bool = False) -> Record:\n resolved_path = self.resolve_path(path)\n path_obj = Path(resolved_path)\n extension = path_obj.suffix[1:].lower()\n if extension == \"doc\":\n raise ValueError(\"doc files are not supported. Please save as .docx\")\n if extension not in TEXT_FILE_TYPES:\n raise ValueError(f\"Unsupported file type: {extension}\")\n record = parse_text_file_to_record(resolved_path, silent_errors)\n self.status = record if record else \"No data\"\n return record or Record()\n\n def build(\n self,\n path: str,\n silent_errors: bool = False,\n ) -> Record:\n record = self.load_file(path, silent_errors)\n self.status = record\n return record\n", + "value": "from pathlib import Path\nfrom typing import Any, Dict\n\nfrom langflow.base.data.utils import TEXT_FILE_TYPES, parse_text_file_to_record\nfrom langflow.interface.custom.custom_component import CustomComponent\nfrom langflow.schema import Record\n\n\nclass FileComponent(CustomComponent):\n display_name = \"File\"\n description = \"A generic file loader.\"\n icon = \"file-text\"\n\n def build_config(self) -> Dict[str, Any]:\n return {\n \"path\": {\n \"display_name\": \"Path\",\n \"field_type\": \"file\",\n \"file_types\": TEXT_FILE_TYPES,\n \"info\": f\"Supported file types: {', '.join(TEXT_FILE_TYPES)}\",\n },\n \"silent_errors\": {\n \"display_name\": \"Silent Errors\",\n \"advanced\": True,\n \"info\": \"If true, errors will not raise an exception.\",\n },\n }\n\n def load_file(self, path: str, silent_errors: bool = False) -> Record:\n resolved_path = self.resolve_path(path)\n path_obj = Path(resolved_path)\n extension = path_obj.suffix[1:].lower()\n if extension == \"doc\":\n raise ValueError(\"doc files are not supported. Please save as .docx\")\n if extension not in TEXT_FILE_TYPES:\n raise ValueError(f\"Unsupported file type: {extension}\")\n record = parse_text_file_to_record(resolved_path, silent_errors)\n self.status = record if record else \"No data\"\n return record or Record()\n\n def build(\n self,\n path: str,\n silent_errors: bool = False,\n ) -> Record:\n record = self.load_file(path, silent_errors)\n self.status = record\n return record\n", "fileTypes": [], "file_path": "", "password": false, @@ -1498,8 +1496,7 @@ "dynamic": true, "info": "", "load_from_db": false, - "title_case": false, - "display_name": "code" + "title_case": false }, "silent_errors": { "type": "bool", @@ -1639,8 +1636,7 @@ "dynamic": true, "info": "", "load_from_db": false, - "title_case": false, - "display_name": "code" + "title_case": false }, "separators": { "type": "str", @@ -1763,7 +1759,7 @@ "display_name": "API Endpoint", "advanced": false, "dynamic": false, - "info": "API endpoint URL for the AstraDB service.", + "info": "API endpoint URL for the Astra DB service.", "load_from_db": false, "title_case": false, "input_types": [ @@ -1850,7 +1846,7 @@ "list": false, "show": true, "multiline": true, - "value": "from typing import List, Optional\n\nfrom langflow.components.vectorstores.AstraDB import AstraDBVectorStoreComponent\nfrom langflow.components.vectorstores.base.model import LCVectorStoreComponent\nfrom langflow.field_typing import Embeddings, Text\nfrom langflow.schema import Record\n\n\nclass AstraDBSearchComponent(LCVectorStoreComponent):\n display_name = \"AstraDB Search\"\n description = \"Searches an existing AstraDB Vector Store.\"\n icon = \"AstraDB\"\n field_order = [\"token\", \"api_endpoint\", \"collection_name\", \"input_value\", \"embedding\"]\n\n def build_config(self):\n return {\n \"search_type\": {\n \"display_name\": \"Search Type\",\n \"options\": [\"Similarity\", \"MMR\"],\n },\n \"input_value\": {\n \"display_name\": \"Input Value\",\n \"info\": \"Input value to search\",\n },\n \"embedding\": {\"display_name\": \"Embedding\", \"info\": \"Embedding to use\"},\n \"collection_name\": {\n \"display_name\": \"Collection Name\",\n \"info\": \"The name of the collection within AstraDB where the vectors will be stored.\",\n },\n \"token\": {\n \"display_name\": \"Token\",\n \"info\": \"Authentication token for accessing AstraDB.\",\n \"password\": True,\n },\n \"api_endpoint\": {\n \"display_name\": \"API Endpoint\",\n \"info\": \"API endpoint URL for the AstraDB service.\",\n },\n \"namespace\": {\n \"display_name\": \"Namespace\",\n \"info\": \"Optional namespace within AstraDB to use for the collection.\",\n \"advanced\": True,\n },\n \"metric\": {\n \"display_name\": \"Metric\",\n \"info\": \"Optional distance metric for vector comparisons in the vector store.\",\n \"advanced\": True,\n },\n \"batch_size\": {\n \"display_name\": \"Batch Size\",\n \"info\": \"Optional number of records to process in a single batch.\",\n \"advanced\": True,\n },\n \"bulk_insert_batch_concurrency\": {\n \"display_name\": \"Bulk Insert Batch Concurrency\",\n \"info\": \"Optional concurrency level for bulk insert operations.\",\n \"advanced\": True,\n },\n \"bulk_insert_overwrite_concurrency\": {\n \"display_name\": \"Bulk Insert Overwrite Concurrency\",\n \"info\": \"Optional concurrency level for bulk insert operations that overwrite existing records.\",\n \"advanced\": True,\n },\n \"bulk_delete_concurrency\": {\n \"display_name\": \"Bulk Delete Concurrency\",\n \"info\": \"Optional concurrency level for bulk delete operations.\",\n \"advanced\": True,\n },\n \"setup_mode\": {\n \"display_name\": \"Setup Mode\",\n \"info\": \"Configuration mode for setting up the vector store, with options like “Sync”, “Async”, or “Off”.\",\n \"options\": [\"Sync\", \"Async\", \"Off\"],\n \"advanced\": True,\n },\n \"pre_delete_collection\": {\n \"display_name\": \"Pre Delete Collection\",\n \"info\": \"Boolean flag to determine whether to delete the collection before creating a new one.\",\n \"advanced\": True,\n },\n \"metadata_indexing_include\": {\n \"display_name\": \"Metadata Indexing Include\",\n \"info\": \"Optional list of metadata fields to include in the indexing.\",\n \"advanced\": True,\n },\n \"metadata_indexing_exclude\": {\n \"display_name\": \"Metadata Indexing Exclude\",\n \"info\": \"Optional list of metadata fields to exclude from the indexing.\",\n \"advanced\": True,\n },\n \"collection_indexing_policy\": {\n \"display_name\": \"Collection Indexing Policy\",\n \"info\": \"Optional dictionary defining the indexing policy for the collection.\",\n \"advanced\": True,\n },\n \"number_of_results\": {\n \"display_name\": \"Number of Results\",\n \"info\": \"Number of results to return.\",\n \"advanced\": True,\n },\n }\n\n def build(\n self,\n embedding: Embeddings,\n collection_name: str,\n input_value: Text,\n token: str,\n api_endpoint: str,\n search_type: str = \"Similarity\",\n number_of_results: int = 4,\n namespace: Optional[str] = None,\n metric: Optional[str] = None,\n batch_size: Optional[int] = None,\n bulk_insert_batch_concurrency: Optional[int] = None,\n bulk_insert_overwrite_concurrency: Optional[int] = None,\n bulk_delete_concurrency: Optional[int] = None,\n setup_mode: str = \"Sync\",\n pre_delete_collection: bool = False,\n metadata_indexing_include: Optional[List[str]] = None,\n metadata_indexing_exclude: Optional[List[str]] = None,\n collection_indexing_policy: Optional[dict] = None,\n ) -> List[Record]:\n vector_store = AstraDBVectorStoreComponent().build(\n embedding=embedding,\n collection_name=collection_name,\n token=token,\n api_endpoint=api_endpoint,\n namespace=namespace,\n metric=metric,\n batch_size=batch_size,\n bulk_insert_batch_concurrency=bulk_insert_batch_concurrency,\n bulk_insert_overwrite_concurrency=bulk_insert_overwrite_concurrency,\n bulk_delete_concurrency=bulk_delete_concurrency,\n setup_mode=setup_mode,\n pre_delete_collection=pre_delete_collection,\n metadata_indexing_include=metadata_indexing_include,\n metadata_indexing_exclude=metadata_indexing_exclude,\n collection_indexing_policy=collection_indexing_policy,\n )\n try:\n return self.search_with_vector_store(input_value, search_type, vector_store, k=number_of_results)\n except KeyError as e:\n if \"content\" in str(e):\n raise ValueError(\n \"You should ingest data through Langflow (or LangChain) to query it in Langflow. Your collection does not contain the 'content' field.\"\n )\n else:\n raise e\n", + "value": "from typing import List, Optional\n\nfrom langflow.components.vectorstores.AstraDB import AstraDBVectorStoreComponent\nfrom langflow.components.vectorstores.base.model import LCVectorStoreComponent\nfrom langflow.field_typing import Embeddings, Text\nfrom langflow.schema import Record\n\n\nclass AstraDBSearchComponent(LCVectorStoreComponent):\n display_name = \"Astra DB Search\"\n description = \"Searches an existing Astra DB Vector Store.\"\n icon = \"AstraDB\"\n field_order = [\"token\", \"api_endpoint\", \"collection_name\", \"input_value\", \"embedding\"]\n\n def build_config(self):\n return {\n \"search_type\": {\n \"display_name\": \"Search Type\",\n \"options\": [\"Similarity\", \"MMR\"],\n },\n \"input_value\": {\n \"display_name\": \"Input Value\",\n \"info\": \"Input value to search\",\n },\n \"embedding\": {\"display_name\": \"Embedding\", \"info\": \"Embedding to use\"},\n \"collection_name\": {\n \"display_name\": \"Collection Name\",\n \"info\": \"The name of the collection within Astra DB where the vectors will be stored.\",\n },\n \"token\": {\n \"display_name\": \"Token\",\n \"info\": \"Authentication token for accessing Astra DB.\",\n \"password\": True,\n },\n \"api_endpoint\": {\n \"display_name\": \"API Endpoint\",\n \"info\": \"API endpoint URL for the Astra DB service.\",\n },\n \"namespace\": {\n \"display_name\": \"Namespace\",\n \"info\": \"Optional namespace within Astra DB to use for the collection.\",\n \"advanced\": True,\n },\n \"metric\": {\n \"display_name\": \"Metric\",\n \"info\": \"Optional distance metric for vector comparisons in the vector store.\",\n \"advanced\": True,\n },\n \"batch_size\": {\n \"display_name\": \"Batch Size\",\n \"info\": \"Optional number of records to process in a single batch.\",\n \"advanced\": True,\n },\n \"bulk_insert_batch_concurrency\": {\n \"display_name\": \"Bulk Insert Batch Concurrency\",\n \"info\": \"Optional concurrency level for bulk insert operations.\",\n \"advanced\": True,\n },\n \"bulk_insert_overwrite_concurrency\": {\n \"display_name\": \"Bulk Insert Overwrite Concurrency\",\n \"info\": \"Optional concurrency level for bulk insert operations that overwrite existing records.\",\n \"advanced\": True,\n },\n \"bulk_delete_concurrency\": {\n \"display_name\": \"Bulk Delete Concurrency\",\n \"info\": \"Optional concurrency level for bulk delete operations.\",\n \"advanced\": True,\n },\n \"setup_mode\": {\n \"display_name\": \"Setup Mode\",\n \"info\": \"Configuration mode for setting up the vector store, with options like “Sync”, “Async”, or “Off”.\",\n \"options\": [\"Sync\", \"Async\", \"Off\"],\n \"advanced\": True,\n },\n \"pre_delete_collection\": {\n \"display_name\": \"Pre Delete Collection\",\n \"info\": \"Boolean flag to determine whether to delete the collection before creating a new one.\",\n \"advanced\": True,\n },\n \"metadata_indexing_include\": {\n \"display_name\": \"Metadata Indexing Include\",\n \"info\": \"Optional list of metadata fields to include in the indexing.\",\n \"advanced\": True,\n },\n \"metadata_indexing_exclude\": {\n \"display_name\": \"Metadata Indexing Exclude\",\n \"info\": \"Optional list of metadata fields to exclude from the indexing.\",\n \"advanced\": True,\n },\n \"collection_indexing_policy\": {\n \"display_name\": \"Collection Indexing Policy\",\n \"info\": \"Optional dictionary defining the indexing policy for the collection.\",\n \"advanced\": True,\n },\n \"number_of_results\": {\n \"display_name\": \"Number of Results\",\n \"info\": \"Number of results to return.\",\n \"advanced\": True,\n },\n }\n\n def build(\n self,\n embedding: Embeddings,\n collection_name: str,\n input_value: Text,\n token: str,\n api_endpoint: str,\n search_type: str = \"Similarity\",\n number_of_results: int = 4,\n namespace: Optional[str] = None,\n metric: Optional[str] = None,\n batch_size: Optional[int] = None,\n bulk_insert_batch_concurrency: Optional[int] = None,\n bulk_insert_overwrite_concurrency: Optional[int] = None,\n bulk_delete_concurrency: Optional[int] = None,\n setup_mode: str = \"Sync\",\n pre_delete_collection: bool = False,\n metadata_indexing_include: Optional[List[str]] = None,\n metadata_indexing_exclude: Optional[List[str]] = None,\n collection_indexing_policy: Optional[dict] = None,\n ) -> List[Record]:\n vector_store = AstraDBVectorStoreComponent().build(\n embedding=embedding,\n collection_name=collection_name,\n token=token,\n api_endpoint=api_endpoint,\n namespace=namespace,\n metric=metric,\n batch_size=batch_size,\n bulk_insert_batch_concurrency=bulk_insert_batch_concurrency,\n bulk_insert_overwrite_concurrency=bulk_insert_overwrite_concurrency,\n bulk_delete_concurrency=bulk_delete_concurrency,\n setup_mode=setup_mode,\n pre_delete_collection=pre_delete_collection,\n metadata_indexing_include=metadata_indexing_include,\n metadata_indexing_exclude=metadata_indexing_exclude,\n collection_indexing_policy=collection_indexing_policy,\n )\n try:\n return self.search_with_vector_store(input_value, search_type, vector_store, k=number_of_results)\n except KeyError as e:\n if \"content\" in str(e):\n raise ValueError(\n \"You should ingest data through Langflow (or LangChain) to query it in Langflow. Your collection does not contain a field name 'content'.\"\n )\n else:\n raise e\n", "fileTypes": [], "file_path": "", "password": false, @@ -1893,7 +1889,7 @@ "display_name": "Collection Name", "advanced": false, "dynamic": false, - "info": "The name of the collection within AstraDB where the vectors will be stored.", + "info": "The name of the collection within Astra DB where the vectors will be stored.", "load_from_db": false, "title_case": false, "input_types": [ @@ -1978,7 +1974,7 @@ "display_name": "Namespace", "advanced": true, "dynamic": false, - "info": "Optional namespace within AstraDB to use for the collection.", + "info": "Optional namespace within Astra DB to use for the collection.", "load_from_db": false, "title_case": false, "input_types": [ @@ -2090,7 +2086,7 @@ "display_name": "Token", "advanced": false, "dynamic": false, - "info": "Authentication token for accessing AstraDB.", + "info": "Authentication token for accessing Astra DB.", "load_from_db": false, "title_case": false, "input_types": [ @@ -2100,12 +2096,12 @@ }, "_type": "CustomComponent" }, - "description": "Searches an existing AstraDB Vector Store.", + "description": "Searches an existing Astra DB Vector Store.", "icon": "AstraDB", "base_classes": [ "Record" ], - "display_name": "AstraDB Search", + "display_name": "Astra DB Search", "documentation": "", "custom_fields": { "embedding": null, @@ -2213,7 +2209,7 @@ "display_name": "API Endpoint", "advanced": false, "dynamic": false, - "info": "API endpoint URL for the AstraDB service.", + "info": "API endpoint URL for the Astra DB service.", "load_from_db": false, "title_case": false, "input_types": [ @@ -2300,7 +2296,7 @@ "list": false, "show": true, "multiline": true, - "value": "from typing import List, Optional\n\nfrom langchain_astradb import AstraDBVectorStore\nfrom langchain_astradb.utils.astradb import SetupMode\n\nfrom langflow.custom import CustomComponent\nfrom langflow.field_typing import Embeddings, VectorStore\nfrom langflow.schema import Record\n\n\nclass AstraDBVectorStoreComponent(CustomComponent):\n display_name = \"AstraDB\"\n description = \"Builds or loads an AstraDB Vector Store.\"\n icon = \"AstraDB\"\n field_order = [\"token\", \"api_endpoint\", \"collection_name\", \"inputs\", \"embedding\"]\n\n def build_config(self):\n return {\n \"inputs\": {\n \"display_name\": \"Inputs\",\n \"info\": \"Optional list of records to be processed and stored in the vector store.\",\n },\n \"embedding\": {\"display_name\": \"Embedding\", \"info\": \"Embedding to use\"},\n \"collection_name\": {\n \"display_name\": \"Collection Name\",\n \"info\": \"The name of the collection within AstraDB where the vectors will be stored.\",\n },\n \"token\": {\n \"display_name\": \"Token\",\n \"info\": \"Authentication token for accessing AstraDB.\",\n \"password\": True,\n },\n \"api_endpoint\": {\n \"display_name\": \"API Endpoint\",\n \"info\": \"API endpoint URL for the AstraDB service.\",\n },\n \"namespace\": {\n \"display_name\": \"Namespace\",\n \"info\": \"Optional namespace within AstraDB to use for the collection.\",\n \"advanced\": True,\n },\n \"metric\": {\n \"display_name\": \"Metric\",\n \"info\": \"Optional distance metric for vector comparisons in the vector store.\",\n \"advanced\": True,\n },\n \"batch_size\": {\n \"display_name\": \"Batch Size\",\n \"info\": \"Optional number of records to process in a single batch.\",\n \"advanced\": True,\n },\n \"bulk_insert_batch_concurrency\": {\n \"display_name\": \"Bulk Insert Batch Concurrency\",\n \"info\": \"Optional concurrency level for bulk insert operations.\",\n \"advanced\": True,\n },\n \"bulk_insert_overwrite_concurrency\": {\n \"display_name\": \"Bulk Insert Overwrite Concurrency\",\n \"info\": \"Optional concurrency level for bulk insert operations that overwrite existing records.\",\n \"advanced\": True,\n },\n \"bulk_delete_concurrency\": {\n \"display_name\": \"Bulk Delete Concurrency\",\n \"info\": \"Optional concurrency level for bulk delete operations.\",\n \"advanced\": True,\n },\n \"setup_mode\": {\n \"display_name\": \"Setup Mode\",\n \"info\": \"Configuration mode for setting up the vector store, with options like “Sync”, “Async”, or “Off”.\",\n \"options\": [\"Sync\", \"Async\", \"Off\"],\n \"advanced\": True,\n },\n \"pre_delete_collection\": {\n \"display_name\": \"Pre Delete Collection\",\n \"info\": \"Boolean flag to determine whether to delete the collection before creating a new one.\",\n \"advanced\": True,\n },\n \"metadata_indexing_include\": {\n \"display_name\": \"Metadata Indexing Include\",\n \"info\": \"Optional list of metadata fields to include in the indexing.\",\n \"advanced\": True,\n },\n \"metadata_indexing_exclude\": {\n \"display_name\": \"Metadata Indexing Exclude\",\n \"info\": \"Optional list of metadata fields to exclude from the indexing.\",\n \"advanced\": True,\n },\n \"collection_indexing_policy\": {\n \"display_name\": \"Collection Indexing Policy\",\n \"info\": \"Optional dictionary defining the indexing policy for the collection.\",\n \"advanced\": True,\n },\n }\n\n def build(\n self,\n embedding: Embeddings,\n token: str,\n api_endpoint: str,\n collection_name: str,\n inputs: Optional[List[Record]] = None,\n namespace: Optional[str] = None,\n metric: Optional[str] = None,\n batch_size: Optional[int] = None,\n bulk_insert_batch_concurrency: Optional[int] = None,\n bulk_insert_overwrite_concurrency: Optional[int] = None,\n bulk_delete_concurrency: Optional[int] = None,\n setup_mode: str = \"Async\",\n pre_delete_collection: bool = False,\n metadata_indexing_include: Optional[List[str]] = None,\n metadata_indexing_exclude: Optional[List[str]] = None,\n collection_indexing_policy: Optional[dict] = None,\n ) -> VectorStore:\n try:\n setup_mode_value = SetupMode[setup_mode.upper()]\n except KeyError:\n raise ValueError(f\"Invalid setup mode: {setup_mode}\")\n if inputs:\n documents = [_input.to_lc_document() for _input in inputs]\n\n vector_store = AstraDBVectorStore.from_documents(\n documents=documents,\n embedding=embedding,\n collection_name=collection_name,\n token=token,\n api_endpoint=api_endpoint,\n namespace=namespace,\n metric=metric,\n batch_size=batch_size,\n bulk_insert_batch_concurrency=bulk_insert_batch_concurrency,\n bulk_insert_overwrite_concurrency=bulk_insert_overwrite_concurrency,\n bulk_delete_concurrency=bulk_delete_concurrency,\n setup_mode=setup_mode_value,\n pre_delete_collection=pre_delete_collection,\n metadata_indexing_include=metadata_indexing_include,\n metadata_indexing_exclude=metadata_indexing_exclude,\n collection_indexing_policy=collection_indexing_policy,\n )\n else:\n vector_store = AstraDBVectorStore(\n embedding=embedding,\n collection_name=collection_name,\n token=token,\n api_endpoint=api_endpoint,\n namespace=namespace,\n metric=metric,\n batch_size=batch_size,\n bulk_insert_batch_concurrency=bulk_insert_batch_concurrency,\n bulk_insert_overwrite_concurrency=bulk_insert_overwrite_concurrency,\n bulk_delete_concurrency=bulk_delete_concurrency,\n setup_mode=setup_mode_value,\n pre_delete_collection=pre_delete_collection,\n metadata_indexing_include=metadata_indexing_include,\n metadata_indexing_exclude=metadata_indexing_exclude,\n collection_indexing_policy=collection_indexing_policy,\n )\n\n return vector_store\n", + "value": "from typing import List, Optional\n\nfrom langchain_astradb import AstraDBVectorStore\nfrom langchain_astradb.utils.astradb import SetupMode\n\nfrom langflow.custom import CustomComponent\nfrom langflow.field_typing import Embeddings, VectorStore\nfrom langflow.schema import Record\n\n\nclass AstraDBVectorStoreComponent(CustomComponent):\n display_name = \"Astra DB\"\n description = \"Builds or loads an Astra DB Vector Store.\"\n icon = \"AstraDB\"\n field_order = [\"token\", \"api_endpoint\", \"collection_name\", \"inputs\", \"embedding\"]\n\n def build_config(self):\n return {\n \"inputs\": {\n \"display_name\": \"Inputs\",\n \"info\": \"Optional list of records to be processed and stored in the vector store.\",\n },\n \"embedding\": {\"display_name\": \"Embedding\", \"info\": \"Embedding to use\"},\n \"collection_name\": {\n \"display_name\": \"Collection Name\",\n \"info\": \"The name of the collection within Astra DB where the vectors will be stored.\",\n },\n \"token\": {\n \"display_name\": \"Token\",\n \"info\": \"Authentication token for accessing Astra DB.\",\n \"password\": True,\n },\n \"api_endpoint\": {\n \"display_name\": \"API Endpoint\",\n \"info\": \"API endpoint URL for the Astra DB service.\",\n },\n \"namespace\": {\n \"display_name\": \"Namespace\",\n \"info\": \"Optional namespace within Astra DB to use for the collection.\",\n \"advanced\": True,\n },\n \"metric\": {\n \"display_name\": \"Metric\",\n \"info\": \"Optional distance metric for vector comparisons in the vector store.\",\n \"advanced\": True,\n },\n \"batch_size\": {\n \"display_name\": \"Batch Size\",\n \"info\": \"Optional number of records to process in a single batch.\",\n \"advanced\": True,\n },\n \"bulk_insert_batch_concurrency\": {\n \"display_name\": \"Bulk Insert Batch Concurrency\",\n \"info\": \"Optional concurrency level for bulk insert operations.\",\n \"advanced\": True,\n },\n \"bulk_insert_overwrite_concurrency\": {\n \"display_name\": \"Bulk Insert Overwrite Concurrency\",\n \"info\": \"Optional concurrency level for bulk insert operations that overwrite existing records.\",\n \"advanced\": True,\n },\n \"bulk_delete_concurrency\": {\n \"display_name\": \"Bulk Delete Concurrency\",\n \"info\": \"Optional concurrency level for bulk delete operations.\",\n \"advanced\": True,\n },\n \"setup_mode\": {\n \"display_name\": \"Setup Mode\",\n \"info\": \"Configuration mode for setting up the vector store, with options like “Sync”, “Async”, or “Off”.\",\n \"options\": [\"Sync\", \"Async\", \"Off\"],\n \"advanced\": True,\n },\n \"pre_delete_collection\": {\n \"display_name\": \"Pre Delete Collection\",\n \"info\": \"Boolean flag to determine whether to delete the collection before creating a new one.\",\n \"advanced\": True,\n },\n \"metadata_indexing_include\": {\n \"display_name\": \"Metadata Indexing Include\",\n \"info\": \"Optional list of metadata fields to include in the indexing.\",\n \"advanced\": True,\n },\n \"metadata_indexing_exclude\": {\n \"display_name\": \"Metadata Indexing Exclude\",\n \"info\": \"Optional list of metadata fields to exclude from the indexing.\",\n \"advanced\": True,\n },\n \"collection_indexing_policy\": {\n \"display_name\": \"Collection Indexing Policy\",\n \"info\": \"Optional dictionary defining the indexing policy for the collection.\",\n \"advanced\": True,\n },\n }\n\n def build(\n self,\n embedding: Embeddings,\n token: str,\n api_endpoint: str,\n collection_name: str,\n inputs: Optional[List[Record]] = None,\n namespace: Optional[str] = None,\n metric: Optional[str] = None,\n batch_size: Optional[int] = None,\n bulk_insert_batch_concurrency: Optional[int] = None,\n bulk_insert_overwrite_concurrency: Optional[int] = None,\n bulk_delete_concurrency: Optional[int] = None,\n setup_mode: str = \"Async\",\n pre_delete_collection: bool = False,\n metadata_indexing_include: Optional[List[str]] = None,\n metadata_indexing_exclude: Optional[List[str]] = None,\n collection_indexing_policy: Optional[dict] = None,\n ) -> VectorStore:\n try:\n setup_mode_value = SetupMode[setup_mode.upper()]\n except KeyError:\n raise ValueError(f\"Invalid setup mode: {setup_mode}\")\n if inputs:\n documents = [_input.to_lc_document() for _input in inputs]\n\n vector_store = AstraDBVectorStore.from_documents(\n documents=documents,\n embedding=embedding,\n collection_name=collection_name,\n token=token,\n api_endpoint=api_endpoint,\n namespace=namespace,\n metric=metric,\n batch_size=batch_size,\n bulk_insert_batch_concurrency=bulk_insert_batch_concurrency,\n bulk_insert_overwrite_concurrency=bulk_insert_overwrite_concurrency,\n bulk_delete_concurrency=bulk_delete_concurrency,\n setup_mode=setup_mode_value,\n pre_delete_collection=pre_delete_collection,\n metadata_indexing_include=metadata_indexing_include,\n metadata_indexing_exclude=metadata_indexing_exclude,\n collection_indexing_policy=collection_indexing_policy,\n )\n else:\n vector_store = AstraDBVectorStore(\n embedding=embedding,\n collection_name=collection_name,\n token=token,\n api_endpoint=api_endpoint,\n namespace=namespace,\n metric=metric,\n batch_size=batch_size,\n bulk_insert_batch_concurrency=bulk_insert_batch_concurrency,\n bulk_insert_overwrite_concurrency=bulk_insert_overwrite_concurrency,\n bulk_delete_concurrency=bulk_delete_concurrency,\n setup_mode=setup_mode_value,\n pre_delete_collection=pre_delete_collection,\n metadata_indexing_include=metadata_indexing_include,\n metadata_indexing_exclude=metadata_indexing_exclude,\n collection_indexing_policy=collection_indexing_policy,\n )\n\n return vector_store\n", "fileTypes": [], "file_path": "", "password": false, @@ -2343,7 +2339,7 @@ "display_name": "Collection Name", "advanced": false, "dynamic": false, - "info": "The name of the collection within AstraDB where the vectors will be stored.", + "info": "The name of the collection within Astra DB where the vectors will be stored.", "load_from_db": false, "title_case": false, "input_types": [ @@ -2428,7 +2424,7 @@ "display_name": "Namespace", "advanced": true, "dynamic": false, - "info": "Optional namespace within AstraDB to use for the collection.", + "info": "Optional namespace within Astra DB to use for the collection.", "load_from_db": false, "title_case": false, "input_types": [ @@ -2495,7 +2491,7 @@ "display_name": "Token", "advanced": false, "dynamic": false, - "info": "Authentication token for accessing AstraDB.", + "info": "Authentication token for accessing Astra DB.", "load_from_db": false, "title_case": false, "input_types": [ @@ -2505,12 +2501,12 @@ }, "_type": "CustomComponent" }, - "description": "Builds or loads an AstraDB Vector Store.", + "description": "Builds or loads an Astra DB Vector Store.", "icon": "AstraDB", "base_classes": [ "VectorStore" ], - "display_name": "AstraDB", + "display_name": "Astra DB", "documentation": "", "custom_fields": { "embedding": null, @@ -3400,8 +3396,8 @@ "zoom": 0.2687057134854984 } }, - "description": "This project give you both Ingestion and RAG in a single file. You'll need to visit https://astra.datastax.com/ to create an AstraDB instance, your Token and grab an API Endpoint.\nRunning this project requires you to add a file in the Files component, then define a Collection Name and click on the Play icon on the AstraDB component. \n\nAfter the ingestion ends you are ready to click on the Run button at the lower left corner and start asking questions about your data.", - "name": "AstraDB RAG Flows", + "description": "Visit https://pre-release.langflow.org/guides/rag-with-astradb for a detailed guide of this project.\nThis project give you both Ingestion and RAG in a single file. You'll need to visit https://astra.datastax.com/ to create an Astra DB instance, your Token and grab an API Endpoint.\nRunning this project requires you to add a file in the Files component, then define a Collection Name and click on the Play icon on the Astra DB component. \n\nAfter the ingestion ends you are ready to click on the Run button at the lower left corner and start asking questions about your data.", + "name": "Vector Store RAG", "last_tested_version": "1.0.0a0", "is_component": false } \ No newline at end of file diff --git a/src/backend/base/langflow/components/vectorsearch/AstraDBSearch.py b/src/backend/base/langflow/components/vectorsearch/AstraDBSearch.py index e2d8e61e9..83ed42daf 100644 --- a/src/backend/base/langflow/components/vectorsearch/AstraDBSearch.py +++ b/src/backend/base/langflow/components/vectorsearch/AstraDBSearch.py @@ -7,8 +7,8 @@ from langflow.schema import Record class AstraDBSearchComponent(LCVectorStoreComponent): - display_name = "AstraDB Search" - description = "Searches an existing AstraDB Vector Store." + display_name = "Astra DB Search" + description = "Searches an existing Astra DB Vector Store." icon = "AstraDB" field_order = ["token", "api_endpoint", "collection_name", "input_value", "embedding"] @@ -25,20 +25,20 @@ class AstraDBSearchComponent(LCVectorStoreComponent): "embedding": {"display_name": "Embedding", "info": "Embedding to use"}, "collection_name": { "display_name": "Collection Name", - "info": "The name of the collection within AstraDB where the vectors will be stored.", + "info": "The name of the collection within Astra DB where the vectors will be stored.", }, "token": { "display_name": "Token", - "info": "Authentication token for accessing AstraDB.", + "info": "Authentication token for accessing Astra DB.", "password": True, }, "api_endpoint": { "display_name": "API Endpoint", - "info": "API endpoint URL for the AstraDB service.", + "info": "API endpoint URL for the Astra DB service.", }, "namespace": { "display_name": "Namespace", - "info": "Optional namespace within AstraDB to use for the collection.", + "info": "Optional namespace within Astra DB to use for the collection.", "advanced": True, }, "metric": { diff --git a/src/backend/base/langflow/components/vectorstores/AstraDB.py b/src/backend/base/langflow/components/vectorstores/AstraDB.py index 6dcbe3d57..460d1f6db 100644 --- a/src/backend/base/langflow/components/vectorstores/AstraDB.py +++ b/src/backend/base/langflow/components/vectorstores/AstraDB.py @@ -9,8 +9,8 @@ from langflow.schema import Record class AstraDBVectorStoreComponent(CustomComponent): - display_name = "AstraDB" - description = "Builds or loads an AstraDB Vector Store." + display_name = "Astra DB" + description = "Builds or loads an Astra DB Vector Store." icon = "AstraDB" field_order = ["token", "api_endpoint", "collection_name", "inputs", "embedding"] @@ -23,20 +23,20 @@ class AstraDBVectorStoreComponent(CustomComponent): "embedding": {"display_name": "Embedding", "info": "Embedding to use"}, "collection_name": { "display_name": "Collection Name", - "info": "The name of the collection within AstraDB where the vectors will be stored.", + "info": "The name of the collection within Astra DB where the vectors will be stored.", }, "token": { "display_name": "Token", - "info": "Authentication token for accessing AstraDB.", + "info": "Authentication token for accessing Astra DB.", "password": True, }, "api_endpoint": { "display_name": "API Endpoint", - "info": "API endpoint URL for the AstraDB service.", + "info": "API endpoint URL for the Astra DB service.", }, "namespace": { "display_name": "Namespace", - "info": "Optional namespace within AstraDB to use for the collection.", + "info": "Optional namespace within Astra DB to use for the collection.", "advanced": True, }, "metric": { diff --git a/src/backend/base/langflow/initial_setup/starter_projects/AstraDB-RAG-Flows.json b/src/backend/base/langflow/initial_setup/starter_projects/VectorStore-RAG-Flows.json similarity index 93% rename from src/backend/base/langflow/initial_setup/starter_projects/AstraDB-RAG-Flows.json rename to src/backend/base/langflow/initial_setup/starter_projects/VectorStore-RAG-Flows.json index f80a20d15..8068ea4a9 100644 --- a/src/backend/base/langflow/initial_setup/starter_projects/AstraDB-RAG-Flows.json +++ b/src/backend/base/langflow/initial_setup/starter_projects/VectorStore-RAG-Flows.json @@ -1759,7 +1759,7 @@ "display_name": "API Endpoint", "advanced": false, "dynamic": false, - "info": "API endpoint URL for the AstraDB service.", + "info": "API endpoint URL for the Astra DB service.", "load_from_db": false, "title_case": false, "input_types": [ @@ -1846,7 +1846,7 @@ "list": false, "show": true, "multiline": true, - "value": "from typing import List, Optional\n\nfrom langflow.components.vectorstores.AstraDB import AstraDBVectorStoreComponent\nfrom langflow.components.vectorstores.base.model import LCVectorStoreComponent\nfrom langflow.field_typing import Embeddings, Text\nfrom langflow.schema import Record\n\n\nclass AstraDBSearchComponent(LCVectorStoreComponent):\n display_name = \"AstraDB Search\"\n description = \"Searches an existing AstraDB Vector Store.\"\n icon = \"AstraDB\"\n field_order = [\"token\", \"api_endpoint\", \"collection_name\", \"input_value\", \"embedding\"]\n\n def build_config(self):\n return {\n \"search_type\": {\n \"display_name\": \"Search Type\",\n \"options\": [\"Similarity\", \"MMR\"],\n },\n \"input_value\": {\n \"display_name\": \"Input Value\",\n \"info\": \"Input value to search\",\n },\n \"embedding\": {\"display_name\": \"Embedding\", \"info\": \"Embedding to use\"},\n \"collection_name\": {\n \"display_name\": \"Collection Name\",\n \"info\": \"The name of the collection within AstraDB where the vectors will be stored.\",\n },\n \"token\": {\n \"display_name\": \"Token\",\n \"info\": \"Authentication token for accessing AstraDB.\",\n \"password\": True,\n },\n \"api_endpoint\": {\n \"display_name\": \"API Endpoint\",\n \"info\": \"API endpoint URL for the AstraDB service.\",\n },\n \"namespace\": {\n \"display_name\": \"Namespace\",\n \"info\": \"Optional namespace within AstraDB to use for the collection.\",\n \"advanced\": True,\n },\n \"metric\": {\n \"display_name\": \"Metric\",\n \"info\": \"Optional distance metric for vector comparisons in the vector store.\",\n \"advanced\": True,\n },\n \"batch_size\": {\n \"display_name\": \"Batch Size\",\n \"info\": \"Optional number of records to process in a single batch.\",\n \"advanced\": True,\n },\n \"bulk_insert_batch_concurrency\": {\n \"display_name\": \"Bulk Insert Batch Concurrency\",\n \"info\": \"Optional concurrency level for bulk insert operations.\",\n \"advanced\": True,\n },\n \"bulk_insert_overwrite_concurrency\": {\n \"display_name\": \"Bulk Insert Overwrite Concurrency\",\n \"info\": \"Optional concurrency level for bulk insert operations that overwrite existing records.\",\n \"advanced\": True,\n },\n \"bulk_delete_concurrency\": {\n \"display_name\": \"Bulk Delete Concurrency\",\n \"info\": \"Optional concurrency level for bulk delete operations.\",\n \"advanced\": True,\n },\n \"setup_mode\": {\n \"display_name\": \"Setup Mode\",\n \"info\": \"Configuration mode for setting up the vector store, with options like “Sync”, “Async”, or “Off”.\",\n \"options\": [\"Sync\", \"Async\", \"Off\"],\n \"advanced\": True,\n },\n \"pre_delete_collection\": {\n \"display_name\": \"Pre Delete Collection\",\n \"info\": \"Boolean flag to determine whether to delete the collection before creating a new one.\",\n \"advanced\": True,\n },\n \"metadata_indexing_include\": {\n \"display_name\": \"Metadata Indexing Include\",\n \"info\": \"Optional list of metadata fields to include in the indexing.\",\n \"advanced\": True,\n },\n \"metadata_indexing_exclude\": {\n \"display_name\": \"Metadata Indexing Exclude\",\n \"info\": \"Optional list of metadata fields to exclude from the indexing.\",\n \"advanced\": True,\n },\n \"collection_indexing_policy\": {\n \"display_name\": \"Collection Indexing Policy\",\n \"info\": \"Optional dictionary defining the indexing policy for the collection.\",\n \"advanced\": True,\n },\n \"number_of_results\": {\n \"display_name\": \"Number of Results\",\n \"info\": \"Number of results to return.\",\n \"advanced\": True,\n },\n }\n\n def build(\n self,\n embedding: Embeddings,\n collection_name: str,\n input_value: Text,\n token: str,\n api_endpoint: str,\n search_type: str = \"Similarity\",\n number_of_results: int = 4,\n namespace: Optional[str] = None,\n metric: Optional[str] = None,\n batch_size: Optional[int] = None,\n bulk_insert_batch_concurrency: Optional[int] = None,\n bulk_insert_overwrite_concurrency: Optional[int] = None,\n bulk_delete_concurrency: Optional[int] = None,\n setup_mode: str = \"Sync\",\n pre_delete_collection: bool = False,\n metadata_indexing_include: Optional[List[str]] = None,\n metadata_indexing_exclude: Optional[List[str]] = None,\n collection_indexing_policy: Optional[dict] = None,\n ) -> List[Record]:\n vector_store = AstraDBVectorStoreComponent().build(\n embedding=embedding,\n collection_name=collection_name,\n token=token,\n api_endpoint=api_endpoint,\n namespace=namespace,\n metric=metric,\n batch_size=batch_size,\n bulk_insert_batch_concurrency=bulk_insert_batch_concurrency,\n bulk_insert_overwrite_concurrency=bulk_insert_overwrite_concurrency,\n bulk_delete_concurrency=bulk_delete_concurrency,\n setup_mode=setup_mode,\n pre_delete_collection=pre_delete_collection,\n metadata_indexing_include=metadata_indexing_include,\n metadata_indexing_exclude=metadata_indexing_exclude,\n collection_indexing_policy=collection_indexing_policy,\n )\n try:\n return self.search_with_vector_store(input_value, search_type, vector_store, k=number_of_results)\n except KeyError as e:\n if \"content\" in str(e):\n raise ValueError(\n \"You should ingest data through Langflow (or LangChain) to query it in Langflow. Your collection does not contain a field name 'content'.\"\n )\n else:\n raise e\n", + "value": "from typing import List, Optional\n\nfrom langflow.components.vectorstores.AstraDB import AstraDBVectorStoreComponent\nfrom langflow.components.vectorstores.base.model import LCVectorStoreComponent\nfrom langflow.field_typing import Embeddings, Text\nfrom langflow.schema import Record\n\n\nclass AstraDBSearchComponent(LCVectorStoreComponent):\n display_name = \"Astra DB Search\"\n description = \"Searches an existing Astra DB Vector Store.\"\n icon = \"AstraDB\"\n field_order = [\"token\", \"api_endpoint\", \"collection_name\", \"input_value\", \"embedding\"]\n\n def build_config(self):\n return {\n \"search_type\": {\n \"display_name\": \"Search Type\",\n \"options\": [\"Similarity\", \"MMR\"],\n },\n \"input_value\": {\n \"display_name\": \"Input Value\",\n \"info\": \"Input value to search\",\n },\n \"embedding\": {\"display_name\": \"Embedding\", \"info\": \"Embedding to use\"},\n \"collection_name\": {\n \"display_name\": \"Collection Name\",\n \"info\": \"The name of the collection within Astra DB where the vectors will be stored.\",\n },\n \"token\": {\n \"display_name\": \"Token\",\n \"info\": \"Authentication token for accessing Astra DB.\",\n \"password\": True,\n },\n \"api_endpoint\": {\n \"display_name\": \"API Endpoint\",\n \"info\": \"API endpoint URL for the Astra DB service.\",\n },\n \"namespace\": {\n \"display_name\": \"Namespace\",\n \"info\": \"Optional namespace within Astra DB to use for the collection.\",\n \"advanced\": True,\n },\n \"metric\": {\n \"display_name\": \"Metric\",\n \"info\": \"Optional distance metric for vector comparisons in the vector store.\",\n \"advanced\": True,\n },\n \"batch_size\": {\n \"display_name\": \"Batch Size\",\n \"info\": \"Optional number of records to process in a single batch.\",\n \"advanced\": True,\n },\n \"bulk_insert_batch_concurrency\": {\n \"display_name\": \"Bulk Insert Batch Concurrency\",\n \"info\": \"Optional concurrency level for bulk insert operations.\",\n \"advanced\": True,\n },\n \"bulk_insert_overwrite_concurrency\": {\n \"display_name\": \"Bulk Insert Overwrite Concurrency\",\n \"info\": \"Optional concurrency level for bulk insert operations that overwrite existing records.\",\n \"advanced\": True,\n },\n \"bulk_delete_concurrency\": {\n \"display_name\": \"Bulk Delete Concurrency\",\n \"info\": \"Optional concurrency level for bulk delete operations.\",\n \"advanced\": True,\n },\n \"setup_mode\": {\n \"display_name\": \"Setup Mode\",\n \"info\": \"Configuration mode for setting up the vector store, with options like “Sync”, “Async”, or “Off”.\",\n \"options\": [\"Sync\", \"Async\", \"Off\"],\n \"advanced\": True,\n },\n \"pre_delete_collection\": {\n \"display_name\": \"Pre Delete Collection\",\n \"info\": \"Boolean flag to determine whether to delete the collection before creating a new one.\",\n \"advanced\": True,\n },\n \"metadata_indexing_include\": {\n \"display_name\": \"Metadata Indexing Include\",\n \"info\": \"Optional list of metadata fields to include in the indexing.\",\n \"advanced\": True,\n },\n \"metadata_indexing_exclude\": {\n \"display_name\": \"Metadata Indexing Exclude\",\n \"info\": \"Optional list of metadata fields to exclude from the indexing.\",\n \"advanced\": True,\n },\n \"collection_indexing_policy\": {\n \"display_name\": \"Collection Indexing Policy\",\n \"info\": \"Optional dictionary defining the indexing policy for the collection.\",\n \"advanced\": True,\n },\n \"number_of_results\": {\n \"display_name\": \"Number of Results\",\n \"info\": \"Number of results to return.\",\n \"advanced\": True,\n },\n }\n\n def build(\n self,\n embedding: Embeddings,\n collection_name: str,\n input_value: Text,\n token: str,\n api_endpoint: str,\n search_type: str = \"Similarity\",\n number_of_results: int = 4,\n namespace: Optional[str] = None,\n metric: Optional[str] = None,\n batch_size: Optional[int] = None,\n bulk_insert_batch_concurrency: Optional[int] = None,\n bulk_insert_overwrite_concurrency: Optional[int] = None,\n bulk_delete_concurrency: Optional[int] = None,\n setup_mode: str = \"Sync\",\n pre_delete_collection: bool = False,\n metadata_indexing_include: Optional[List[str]] = None,\n metadata_indexing_exclude: Optional[List[str]] = None,\n collection_indexing_policy: Optional[dict] = None,\n ) -> List[Record]:\n vector_store = AstraDBVectorStoreComponent().build(\n embedding=embedding,\n collection_name=collection_name,\n token=token,\n api_endpoint=api_endpoint,\n namespace=namespace,\n metric=metric,\n batch_size=batch_size,\n bulk_insert_batch_concurrency=bulk_insert_batch_concurrency,\n bulk_insert_overwrite_concurrency=bulk_insert_overwrite_concurrency,\n bulk_delete_concurrency=bulk_delete_concurrency,\n setup_mode=setup_mode,\n pre_delete_collection=pre_delete_collection,\n metadata_indexing_include=metadata_indexing_include,\n metadata_indexing_exclude=metadata_indexing_exclude,\n collection_indexing_policy=collection_indexing_policy,\n )\n try:\n return self.search_with_vector_store(input_value, search_type, vector_store, k=number_of_results)\n except KeyError as e:\n if \"content\" in str(e):\n raise ValueError(\n \"You should ingest data through Langflow (or LangChain) to query it in Langflow. Your collection does not contain a field name 'content'.\"\n )\n else:\n raise e\n", "fileTypes": [], "file_path": "", "password": false, @@ -1889,7 +1889,7 @@ "display_name": "Collection Name", "advanced": false, "dynamic": false, - "info": "The name of the collection within AstraDB where the vectors will be stored.", + "info": "The name of the collection within Astra DB where the vectors will be stored.", "load_from_db": false, "title_case": false, "input_types": [ @@ -1974,7 +1974,7 @@ "display_name": "Namespace", "advanced": true, "dynamic": false, - "info": "Optional namespace within AstraDB to use for the collection.", + "info": "Optional namespace within Astra DB to use for the collection.", "load_from_db": false, "title_case": false, "input_types": [ @@ -2086,7 +2086,7 @@ "display_name": "Token", "advanced": false, "dynamic": false, - "info": "Authentication token for accessing AstraDB.", + "info": "Authentication token for accessing Astra DB.", "load_from_db": false, "title_case": false, "input_types": [ @@ -2096,12 +2096,12 @@ }, "_type": "CustomComponent" }, - "description": "Searches an existing AstraDB Vector Store.", + "description": "Searches an existing Astra DB Vector Store.", "icon": "AstraDB", "base_classes": [ "Record" ], - "display_name": "AstraDB Search", + "display_name": "Astra DB Search", "documentation": "", "custom_fields": { "embedding": null, @@ -2209,7 +2209,7 @@ "display_name": "API Endpoint", "advanced": false, "dynamic": false, - "info": "API endpoint URL for the AstraDB service.", + "info": "API endpoint URL for the Astra DB service.", "load_from_db": false, "title_case": false, "input_types": [ @@ -2296,7 +2296,7 @@ "list": false, "show": true, "multiline": true, - "value": "from typing import List, Optional\n\nfrom langchain_astradb import AstraDBVectorStore\nfrom langchain_astradb.utils.astradb import SetupMode\n\nfrom langflow.custom import CustomComponent\nfrom langflow.field_typing import Embeddings, VectorStore\nfrom langflow.schema import Record\n\n\nclass AstraDBVectorStoreComponent(CustomComponent):\n display_name = \"AstraDB\"\n description = \"Builds or loads an AstraDB Vector Store.\"\n icon = \"AstraDB\"\n field_order = [\"token\", \"api_endpoint\", \"collection_name\", \"inputs\", \"embedding\"]\n\n def build_config(self):\n return {\n \"inputs\": {\n \"display_name\": \"Inputs\",\n \"info\": \"Optional list of records to be processed and stored in the vector store.\",\n },\n \"embedding\": {\"display_name\": \"Embedding\", \"info\": \"Embedding to use\"},\n \"collection_name\": {\n \"display_name\": \"Collection Name\",\n \"info\": \"The name of the collection within AstraDB where the vectors will be stored.\",\n },\n \"token\": {\n \"display_name\": \"Token\",\n \"info\": \"Authentication token for accessing AstraDB.\",\n \"password\": True,\n },\n \"api_endpoint\": {\n \"display_name\": \"API Endpoint\",\n \"info\": \"API endpoint URL for the AstraDB service.\",\n },\n \"namespace\": {\n \"display_name\": \"Namespace\",\n \"info\": \"Optional namespace within AstraDB to use for the collection.\",\n \"advanced\": True,\n },\n \"metric\": {\n \"display_name\": \"Metric\",\n \"info\": \"Optional distance metric for vector comparisons in the vector store.\",\n \"advanced\": True,\n },\n \"batch_size\": {\n \"display_name\": \"Batch Size\",\n \"info\": \"Optional number of records to process in a single batch.\",\n \"advanced\": True,\n },\n \"bulk_insert_batch_concurrency\": {\n \"display_name\": \"Bulk Insert Batch Concurrency\",\n \"info\": \"Optional concurrency level for bulk insert operations.\",\n \"advanced\": True,\n },\n \"bulk_insert_overwrite_concurrency\": {\n \"display_name\": \"Bulk Insert Overwrite Concurrency\",\n \"info\": \"Optional concurrency level for bulk insert operations that overwrite existing records.\",\n \"advanced\": True,\n },\n \"bulk_delete_concurrency\": {\n \"display_name\": \"Bulk Delete Concurrency\",\n \"info\": \"Optional concurrency level for bulk delete operations.\",\n \"advanced\": True,\n },\n \"setup_mode\": {\n \"display_name\": \"Setup Mode\",\n \"info\": \"Configuration mode for setting up the vector store, with options like “Sync”, “Async”, or “Off”.\",\n \"options\": [\"Sync\", \"Async\", \"Off\"],\n \"advanced\": True,\n },\n \"pre_delete_collection\": {\n \"display_name\": \"Pre Delete Collection\",\n \"info\": \"Boolean flag to determine whether to delete the collection before creating a new one.\",\n \"advanced\": True,\n },\n \"metadata_indexing_include\": {\n \"display_name\": \"Metadata Indexing Include\",\n \"info\": \"Optional list of metadata fields to include in the indexing.\",\n \"advanced\": True,\n },\n \"metadata_indexing_exclude\": {\n \"display_name\": \"Metadata Indexing Exclude\",\n \"info\": \"Optional list of metadata fields to exclude from the indexing.\",\n \"advanced\": True,\n },\n \"collection_indexing_policy\": {\n \"display_name\": \"Collection Indexing Policy\",\n \"info\": \"Optional dictionary defining the indexing policy for the collection.\",\n \"advanced\": True,\n },\n }\n\n def build(\n self,\n embedding: Embeddings,\n token: str,\n api_endpoint: str,\n collection_name: str,\n inputs: Optional[List[Record]] = None,\n namespace: Optional[str] = None,\n metric: Optional[str] = None,\n batch_size: Optional[int] = None,\n bulk_insert_batch_concurrency: Optional[int] = None,\n bulk_insert_overwrite_concurrency: Optional[int] = None,\n bulk_delete_concurrency: Optional[int] = None,\n setup_mode: str = \"Async\",\n pre_delete_collection: bool = False,\n metadata_indexing_include: Optional[List[str]] = None,\n metadata_indexing_exclude: Optional[List[str]] = None,\n collection_indexing_policy: Optional[dict] = None,\n ) -> VectorStore:\n try:\n setup_mode_value = SetupMode[setup_mode.upper()]\n except KeyError:\n raise ValueError(f\"Invalid setup mode: {setup_mode}\")\n if inputs:\n documents = [_input.to_lc_document() for _input in inputs]\n\n vector_store = AstraDBVectorStore.from_documents(\n documents=documents,\n embedding=embedding,\n collection_name=collection_name,\n token=token,\n api_endpoint=api_endpoint,\n namespace=namespace,\n metric=metric,\n batch_size=batch_size,\n bulk_insert_batch_concurrency=bulk_insert_batch_concurrency,\n bulk_insert_overwrite_concurrency=bulk_insert_overwrite_concurrency,\n bulk_delete_concurrency=bulk_delete_concurrency,\n setup_mode=setup_mode_value,\n pre_delete_collection=pre_delete_collection,\n metadata_indexing_include=metadata_indexing_include,\n metadata_indexing_exclude=metadata_indexing_exclude,\n collection_indexing_policy=collection_indexing_policy,\n )\n else:\n vector_store = AstraDBVectorStore(\n embedding=embedding,\n collection_name=collection_name,\n token=token,\n api_endpoint=api_endpoint,\n namespace=namespace,\n metric=metric,\n batch_size=batch_size,\n bulk_insert_batch_concurrency=bulk_insert_batch_concurrency,\n bulk_insert_overwrite_concurrency=bulk_insert_overwrite_concurrency,\n bulk_delete_concurrency=bulk_delete_concurrency,\n setup_mode=setup_mode_value,\n pre_delete_collection=pre_delete_collection,\n metadata_indexing_include=metadata_indexing_include,\n metadata_indexing_exclude=metadata_indexing_exclude,\n collection_indexing_policy=collection_indexing_policy,\n )\n\n return vector_store\n", + "value": "from typing import List, Optional\n\nfrom langchain_astradb import AstraDBVectorStore\nfrom langchain_astradb.utils.astradb import SetupMode\n\nfrom langflow.custom import CustomComponent\nfrom langflow.field_typing import Embeddings, VectorStore\nfrom langflow.schema import Record\n\n\nclass AstraDBVectorStoreComponent(CustomComponent):\n display_name = \"Astra DB\"\n description = \"Builds or loads an Astra DB Vector Store.\"\n icon = \"AstraDB\"\n field_order = [\"token\", \"api_endpoint\", \"collection_name\", \"inputs\", \"embedding\"]\n\n def build_config(self):\n return {\n \"inputs\": {\n \"display_name\": \"Inputs\",\n \"info\": \"Optional list of records to be processed and stored in the vector store.\",\n },\n \"embedding\": {\"display_name\": \"Embedding\", \"info\": \"Embedding to use\"},\n \"collection_name\": {\n \"display_name\": \"Collection Name\",\n \"info\": \"The name of the collection within Astra DB where the vectors will be stored.\",\n },\n \"token\": {\n \"display_name\": \"Token\",\n \"info\": \"Authentication token for accessing Astra DB.\",\n \"password\": True,\n },\n \"api_endpoint\": {\n \"display_name\": \"API Endpoint\",\n \"info\": \"API endpoint URL for the Astra DB service.\",\n },\n \"namespace\": {\n \"display_name\": \"Namespace\",\n \"info\": \"Optional namespace within Astra DB to use for the collection.\",\n \"advanced\": True,\n },\n \"metric\": {\n \"display_name\": \"Metric\",\n \"info\": \"Optional distance metric for vector comparisons in the vector store.\",\n \"advanced\": True,\n },\n \"batch_size\": {\n \"display_name\": \"Batch Size\",\n \"info\": \"Optional number of records to process in a single batch.\",\n \"advanced\": True,\n },\n \"bulk_insert_batch_concurrency\": {\n \"display_name\": \"Bulk Insert Batch Concurrency\",\n \"info\": \"Optional concurrency level for bulk insert operations.\",\n \"advanced\": True,\n },\n \"bulk_insert_overwrite_concurrency\": {\n \"display_name\": \"Bulk Insert Overwrite Concurrency\",\n \"info\": \"Optional concurrency level for bulk insert operations that overwrite existing records.\",\n \"advanced\": True,\n },\n \"bulk_delete_concurrency\": {\n \"display_name\": \"Bulk Delete Concurrency\",\n \"info\": \"Optional concurrency level for bulk delete operations.\",\n \"advanced\": True,\n },\n \"setup_mode\": {\n \"display_name\": \"Setup Mode\",\n \"info\": \"Configuration mode for setting up the vector store, with options like “Sync”, “Async”, or “Off”.\",\n \"options\": [\"Sync\", \"Async\", \"Off\"],\n \"advanced\": True,\n },\n \"pre_delete_collection\": {\n \"display_name\": \"Pre Delete Collection\",\n \"info\": \"Boolean flag to determine whether to delete the collection before creating a new one.\",\n \"advanced\": True,\n },\n \"metadata_indexing_include\": {\n \"display_name\": \"Metadata Indexing Include\",\n \"info\": \"Optional list of metadata fields to include in the indexing.\",\n \"advanced\": True,\n },\n \"metadata_indexing_exclude\": {\n \"display_name\": \"Metadata Indexing Exclude\",\n \"info\": \"Optional list of metadata fields to exclude from the indexing.\",\n \"advanced\": True,\n },\n \"collection_indexing_policy\": {\n \"display_name\": \"Collection Indexing Policy\",\n \"info\": \"Optional dictionary defining the indexing policy for the collection.\",\n \"advanced\": True,\n },\n }\n\n def build(\n self,\n embedding: Embeddings,\n token: str,\n api_endpoint: str,\n collection_name: str,\n inputs: Optional[List[Record]] = None,\n namespace: Optional[str] = None,\n metric: Optional[str] = None,\n batch_size: Optional[int] = None,\n bulk_insert_batch_concurrency: Optional[int] = None,\n bulk_insert_overwrite_concurrency: Optional[int] = None,\n bulk_delete_concurrency: Optional[int] = None,\n setup_mode: str = \"Async\",\n pre_delete_collection: bool = False,\n metadata_indexing_include: Optional[List[str]] = None,\n metadata_indexing_exclude: Optional[List[str]] = None,\n collection_indexing_policy: Optional[dict] = None,\n ) -> VectorStore:\n try:\n setup_mode_value = SetupMode[setup_mode.upper()]\n except KeyError:\n raise ValueError(f\"Invalid setup mode: {setup_mode}\")\n if inputs:\n documents = [_input.to_lc_document() for _input in inputs]\n\n vector_store = AstraDBVectorStore.from_documents(\n documents=documents,\n embedding=embedding,\n collection_name=collection_name,\n token=token,\n api_endpoint=api_endpoint,\n namespace=namespace,\n metric=metric,\n batch_size=batch_size,\n bulk_insert_batch_concurrency=bulk_insert_batch_concurrency,\n bulk_insert_overwrite_concurrency=bulk_insert_overwrite_concurrency,\n bulk_delete_concurrency=bulk_delete_concurrency,\n setup_mode=setup_mode_value,\n pre_delete_collection=pre_delete_collection,\n metadata_indexing_include=metadata_indexing_include,\n metadata_indexing_exclude=metadata_indexing_exclude,\n collection_indexing_policy=collection_indexing_policy,\n )\n else:\n vector_store = AstraDBVectorStore(\n embedding=embedding,\n collection_name=collection_name,\n token=token,\n api_endpoint=api_endpoint,\n namespace=namespace,\n metric=metric,\n batch_size=batch_size,\n bulk_insert_batch_concurrency=bulk_insert_batch_concurrency,\n bulk_insert_overwrite_concurrency=bulk_insert_overwrite_concurrency,\n bulk_delete_concurrency=bulk_delete_concurrency,\n setup_mode=setup_mode_value,\n pre_delete_collection=pre_delete_collection,\n metadata_indexing_include=metadata_indexing_include,\n metadata_indexing_exclude=metadata_indexing_exclude,\n collection_indexing_policy=collection_indexing_policy,\n )\n\n return vector_store\n", "fileTypes": [], "file_path": "", "password": false, @@ -2339,7 +2339,7 @@ "display_name": "Collection Name", "advanced": false, "dynamic": false, - "info": "The name of the collection within AstraDB where the vectors will be stored.", + "info": "The name of the collection within Astra DB where the vectors will be stored.", "load_from_db": false, "title_case": false, "input_types": [ @@ -2424,7 +2424,7 @@ "display_name": "Namespace", "advanced": true, "dynamic": false, - "info": "Optional namespace within AstraDB to use for the collection.", + "info": "Optional namespace within Astra DB to use for the collection.", "load_from_db": false, "title_case": false, "input_types": [ @@ -2491,7 +2491,7 @@ "display_name": "Token", "advanced": false, "dynamic": false, - "info": "Authentication token for accessing AstraDB.", + "info": "Authentication token for accessing Astra DB.", "load_from_db": false, "title_case": false, "input_types": [ @@ -2501,12 +2501,12 @@ }, "_type": "CustomComponent" }, - "description": "Builds or loads an AstraDB Vector Store.", + "description": "Builds or loads an Astra DB Vector Store.", "icon": "AstraDB", "base_classes": [ "VectorStore" ], - "display_name": "AstraDB", + "display_name": "Astra DB", "documentation": "", "custom_fields": { "embedding": null, @@ -3396,7 +3396,7 @@ "zoom": 0.2687057134854984 } }, - "description": "Visit https://pre-release.langflow.org/guides/rag-with-astradb for a detailed guide of this project.\nThis project give you both Ingestion and RAG in a single file. You'll need to visit https://astra.datastax.com/ to create an AstraDB instance, your Token and grab an API Endpoint.\nRunning this project requires you to add a file in the Files component, then define a Collection Name and click on the Play icon on the AstraDB component. \n\nAfter the ingestion ends you are ready to click on the Run button at the lower left corner and start asking questions about your data.", + "description": "Visit https://pre-release.langflow.org/guides/rag-with-astradb for a detailed guide of this project.\nThis project give you both Ingestion and RAG in a single file. You'll need to visit https://astra.datastax.com/ to create an Astra DB instance, your Token and grab an API Endpoint.\nRunning this project requires you to add a file in the Files component, then define a Collection Name and click on the Play icon on the Astra DB component. \n\nAfter the ingestion ends you are ready to click on the Run button at the lower left corner and start asking questions about your data.", "name": "Vector Store RAG", "last_tested_version": "1.0.0a0", "is_component": false