From a1fa8ec1b60dfbdf305e45fbec7a02d886ee4e9a Mon Sep 17 00:00:00 2001 From: Mendon Kissling <59585235+mendonk@users.noreply.github.com> Date: Tue, 19 Nov 2024 09:38:44 -0500 Subject: [PATCH] fix: update Basic Prompting and Vector Store RAG notes (#4644) * refactor: update README content and add OpenAI API key note in Basic Prompting project * cleanup-link * refactor: improve performance of data processing functions * remove-astradb-setup-notenode * text-cleanup * test-linking * pin-notes * pin-load-data * openai-key-locations * missed-comma * resize-basic-prompting-readme * height-and-description-vector-rag * selected-false * height-styling * height * refactor: update README content and add OpenAI API key note in Basic Prompting project * cleanup-link * refactor: improve performance of data processing functions * remove-astradb-setup-notenode * text-cleanup * test-linking * pin-notes * pin-load-data * openai-key-locations * missed-comma * resize-basic-prompting-readme * height-and-description-vector-rag * selected-false * height-styling * height * basic-prompt * style * vector-rag-done --- .../starter_projects/Basic Prompting.json | 45 +- .../starter_projects/Vector Store RAG.json | 805 +++++++++++++----- 2 files changed, 639 insertions(+), 211 deletions(-) diff --git a/src/backend/base/langflow/initial_setup/starter_projects/Basic Prompting.json b/src/backend/base/langflow/initial_setup/starter_projects/Basic Prompting.json index ea71c7b74..fc69881b2 100644 --- a/src/backend/base/langflow/initial_setup/starter_projects/Basic Prompting.json +++ b/src/backend/base/langflow/initial_setup/starter_projects/Basic Prompting.json @@ -404,7 +404,7 @@ "tool_mode": false, "trace_as_input": true, "type": "prompt", - "value": "Answer the user as if you were a pirate." + "value": "Answer the user as if you were a GenAI expert, enthusiastic about helping them get started building something fresh." } }, "tool_mode": false @@ -430,16 +430,16 @@ "data": { "id": "undefined-emdQy", "node": { - "description": "### ✅ Basic System Prompting README\n\nExperiment with AI behavior control using system prompts. \n\n#### Component Overview\n- **Chat Input:** User message entry point\n- **System Message:** Sets AI personality/behavior\n- **OpenAI Model:** Processes both prompts and generates responses\n- **Chat Output:** Displays the AI response in the Playground\n\n#### Quick Start\n- Add your **OpenAI API key** to the **OpenAI Model**\n- Modify the **System Prompt** template to change AI behavior\n- Use the **Playground** to start chatting\n\nThe default prompt makes the AI respond like a pirate! Try changing it to create different AI personalities.\n\nFor more details, check the [system prompting guide](https://docs.langflow.org/guides/system-prompting).", + "description": "## 📖 README\n\nPerform basic prompting with an OpenAI model.\n\n#### Quick Start\n- Add your **OpenAI API key** to the **OpenAI Model**\n- Open the **Playground** to chat with your bot.\n\n#### Next steps:\n Experiment by changing the prompt and the OpenAI model temperature to see how the bot's responses change.", "display_name": "Read Me", "documentation": "", "template": { - "backgroundColor": "blue" + "backgroundColor": "neutral" } } }, "dragging": false, - "height": 561, + "height": 250, "id": "undefined-emdQy", "position": { "x": 66.38770028934243, @@ -450,14 +450,47 @@ "y": 749.744424427066 }, "resizing": false, - "selected": true, + "selected": false, "style": { - "height": 561, + "height": 250, "width": 600 }, "type": "noteNode", "width": 600 }, + { + "data": { + "id": "note-e6K1n", + "node": { + "description": "### 💡 Add your OpenAI API key here 👇", + "display_name": "", + "documentation": "", + "template": { + "backgroundColor": "transparent" + } + }, + "type": "note" + }, + "dragging": false, + "height": 324, + "id": "note-e6K1n", + "position": { + "x": 1075.829573520873, + "y": 657.2057655038416 + }, + "positionAbsolute": { + "x": 1075.829573520873, + "y": 657.2057655038416 + }, + "resizing": false, + "selected": false, + "style": { + "height": 324, + "width": 324 + }, + "type": "noteNode", + "width": 324 + }, { "data": { "description": "Generates text using OpenAI LLMs.", diff --git a/src/backend/base/langflow/initial_setup/starter_projects/Vector Store RAG.json b/src/backend/base/langflow/initial_setup/starter_projects/Vector Store RAG.json index cb7aaca02..bc3f45113 100644 --- a/src/backend/base/langflow/initial_setup/starter_projects/Vector Store RAG.json +++ b/src/backend/base/langflow/initial_setup/starter_projects/Vector Store RAG.json @@ -9,12 +9,17 @@ "dataType": "ParseData", "id": "ParseData-pqaC7", "name": "text", - "output_types": ["Message"] + "output_types": [ + "Message" + ] }, "targetHandle": { "fieldName": "context", "id": "Prompt-uqDcR", - "inputTypes": ["Message", "Text"], + "inputTypes": [ + "Message", + "Text" + ], "type": "str" } }, @@ -32,12 +37,16 @@ "dataType": "Prompt", "id": "Prompt-uqDcR", "name": "prompt", - "output_types": ["Message"] + "output_types": [ + "Message" + ] }, "targetHandle": { "fieldName": "input_value", "id": "OpenAIModel-H0ANr", - "inputTypes": ["Message"], + "inputTypes": [ + "Message" + ], "type": "str" } }, @@ -55,12 +64,16 @@ "dataType": "OpenAIModel", "id": "OpenAIModel-H0ANr", "name": "text_output", - "output_types": ["Message"] + "output_types": [ + "Message" + ] }, "targetHandle": { "fieldName": "input_value", "id": "ChatOutput-a4EPO", - "inputTypes": ["Message"], + "inputTypes": [ + "Message" + ], "type": "str" } }, @@ -78,12 +91,16 @@ "dataType": "AstraDB", "id": "AstraDB-3buPx", "name": "search_results", - "output_types": ["Data"] + "output_types": [ + "Data" + ] }, "targetHandle": { "fieldName": "data", "id": "ParseData-pqaC7", - "inputTypes": ["Data"], + "inputTypes": [ + "Data" + ], "type": "other" } }, @@ -101,12 +118,16 @@ "dataType": "OpenAIEmbeddings", "id": "OpenAIEmbeddings-CeoV9", "name": "embeddings", - "output_types": ["Embeddings"] + "output_types": [ + "Embeddings" + ] }, "targetHandle": { "fieldName": "embedding_model", "id": "AstraDB-3buPx", - "inputTypes": ["Embeddings"], + "inputTypes": [ + "Embeddings" + ], "type": "other" } }, @@ -124,12 +145,16 @@ "dataType": "ChatInput", "id": "ChatInput-nd3Fq", "name": "message", - "output_types": ["Message"] + "output_types": [ + "Message" + ] }, "targetHandle": { "fieldName": "search_input", "id": "AstraDB-3buPx", - "inputTypes": ["Message"], + "inputTypes": [ + "Message" + ], "type": "str" } }, @@ -147,12 +172,17 @@ "dataType": "ChatInput", "id": "ChatInput-nd3Fq", "name": "message", - "output_types": ["Message"] + "output_types": [ + "Message" + ] }, "targetHandle": { "fieldName": "question", "id": "Prompt-uqDcR", - "inputTypes": ["Message", "Text"], + "inputTypes": [ + "Message", + "Text" + ], "type": "str" } }, @@ -170,12 +200,16 @@ "dataType": "SplitText", "id": "SplitText-QakmY", "name": "chunks", - "output_types": ["Data"] + "output_types": [ + "Data" + ] }, "targetHandle": { "fieldName": "ingest_data", "id": "AstraDB-laybz", - "inputTypes": ["Data"], + "inputTypes": [ + "Data" + ], "type": "other" } }, @@ -193,12 +227,16 @@ "dataType": "OpenAIEmbeddings", "id": "OpenAIEmbeddings-ANgku", "name": "embeddings", - "output_types": ["Embeddings"] + "output_types": [ + "Embeddings" + ] }, "targetHandle": { "fieldName": "embedding_model", "id": "AstraDB-laybz", - "inputTypes": ["Embeddings"], + "inputTypes": [ + "Embeddings" + ], "type": "other" } }, @@ -215,12 +253,16 @@ "dataType": "File", "id": "File-FJIuH", "name": "data", - "output_types": ["Data"] + "output_types": [ + "Data" + ] }, "targetHandle": { "fieldName": "data_inputs", "id": "SplitText-QakmY", - "inputTypes": ["Data"], + "inputTypes": [ + "Data" + ], "type": "other" } }, @@ -238,7 +280,9 @@ "display_name": "Chat Input", "id": "ChatInput-nd3Fq", "node": { - "base_classes": ["Message"], + "base_classes": [ + "Message" + ], "beta": false, "conditional_paths": [], "custom_fields": {}, @@ -267,7 +311,9 @@ "method": "message_response", "name": "message", "selected": "Message", - "types": ["Message"], + "types": [ + "Message" + ], "value": "__UNDEFINED__" } ], @@ -280,7 +326,9 @@ "display_name": "Background Color", "dynamic": false, "info": "The background color of the icon.", - "input_types": ["Message"], + "input_types": [ + "Message" + ], "list": false, "load_from_db": false, "name": "background_color", @@ -299,7 +347,9 @@ "display_name": "Icon", "dynamic": false, "info": "The icon of the message.", - "input_types": ["Message"], + "input_types": [ + "Message" + ], "list": false, "load_from_db": false, "name": "chat_icon", @@ -376,7 +426,9 @@ "display_name": "Text", "dynamic": false, "info": "Message to be passed as input.", - "input_types": ["Message"], + "input_types": [ + "Message" + ], "list": false, "load_from_db": false, "multiline": true, @@ -396,7 +448,10 @@ "dynamic": false, "info": "Type of sender.", "name": "sender", - "options": ["Machine", "User"], + "options": [ + "Machine", + "User" + ], "placeholder": "", "required": false, "show": true, @@ -410,7 +465,9 @@ "display_name": "Sender Name", "dynamic": false, "info": "Name of the sender.", - "input_types": ["Message"], + "input_types": [ + "Message" + ], "list": false, "load_from_db": false, "name": "sender_name", @@ -428,7 +485,9 @@ "display_name": "Session ID", "dynamic": false, "info": "The session ID of the chat. If empty, the current session ID parameter will be used.", - "input_types": ["Message"], + "input_types": [ + "Message" + ], "list": false, "load_from_db": false, "name": "session_id", @@ -462,7 +521,9 @@ "display_name": "Text Color", "dynamic": false, "info": "The text color of the name", - "input_types": ["Message"], + "input_types": [ + "Message" + ], "list": false, "load_from_db": false, "name": "text_color", @@ -500,7 +561,9 @@ "display_name": "Parse Data", "id": "ParseData-pqaC7", "node": { - "base_classes": ["Message"], + "base_classes": [ + "Message" + ], "beta": false, "conditional_paths": [], "custom_fields": {}, @@ -508,7 +571,11 @@ "display_name": "Parse Data", "documentation": "", "edited": false, - "field_order": ["data", "template", "sep"], + "field_order": [ + "data", + "template", + "sep" + ], "frozen": false, "icon": "braces", "legacy": false, @@ -522,7 +589,9 @@ "method": "parse_data", "name": "text", "selected": "Message", - "types": ["Message"], + "types": [ + "Message" + ], "value": "__UNDEFINED__" } ], @@ -552,7 +621,9 @@ "display_name": "Data", "dynamic": false, "info": "The data to convert to text.", - "input_types": ["Data"], + "input_types": [ + "Data" + ], "list": false, "name": "data", "placeholder": "", @@ -585,7 +656,9 @@ "display_name": "Template", "dynamic": false, "info": "The template to use for formatting the data. It can contain the keys {text}, {data} or any other key in the Data.", - "input_types": ["Message"], + "input_types": [ + "Message" + ], "list": false, "load_from_db": false, "multiline": true, @@ -624,18 +697,25 @@ "display_name": "Prompt", "id": "Prompt-uqDcR", "node": { - "base_classes": ["Message"], + "base_classes": [ + "Message" + ], "beta": false, "conditional_paths": [], "custom_fields": { - "template": ["context", "question"] + "template": [ + "context", + "question" + ] }, "description": "Create a prompt template with dynamic variables.", "display_name": "Prompt", "documentation": "", "edited": false, "error": null, - "field_order": ["template"], + "field_order": [ + "template" + ], "frozen": false, "full_path": null, "icon": "prompts", @@ -654,7 +734,9 @@ "method": "build_prompt", "name": "prompt", "selected": "Message", - "types": ["Message"], + "types": [ + "Message" + ], "value": "__UNDEFINED__" } ], @@ -687,7 +769,10 @@ "fileTypes": [], "file_path": "", "info": "", - "input_types": ["Message", "Text"], + "input_types": [ + "Message", + "Text" + ], "list": false, "load_from_db": false, "multiline": true, @@ -707,7 +792,10 @@ "fileTypes": [], "file_path": "", "info": "", - "input_types": ["Message", "Text"], + "input_types": [ + "Message", + "Text" + ], "list": false, "load_from_db": false, "multiline": true, @@ -761,7 +849,9 @@ "display_name": "Split Text", "id": "SplitText-QakmY", "node": { - "base_classes": ["Data"], + "base_classes": [ + "Data" + ], "beta": false, "conditional_paths": [], "custom_fields": {}, @@ -788,7 +878,9 @@ "method": "split_text", "name": "chunks", "selected": "Data", - "types": ["Data"], + "types": [ + "Data" + ], "value": "__UNDEFINED__" } ], @@ -848,7 +940,9 @@ "display_name": "Data Inputs", "dynamic": false, "info": "The data to split.", - "input_types": ["Data"], + "input_types": [ + "Data" + ], "list": true, "name": "data_inputs", "placeholder": "", @@ -864,7 +958,9 @@ "display_name": "Separator", "dynamic": false, "info": "The character to split on. Defaults to newline.", - "input_types": ["Message"], + "input_types": [ + "Message" + ], "list": false, "load_from_db": false, "name": "separator", @@ -898,108 +994,77 @@ }, { "data": { - "id": "note-i0pUr", + "id": "note-UrQ0p", "node": { - "description": "## 📄 Load Data Flow\n\n**Purpose**: This flow is designed to ingest local files and store their contents as vectors in the database.\n\n**Steps to Use:**\n\n1. **Upload File**: Use the \"File\" component to upload your data file. Ensure the file format is supported (e.g., `txt`, `pdf`).\n2. **Text Splitting**: The \"Split Text\" component will automatically segment your document into smaller, manageable chunks for efficient processing.\n3. **Generate Embeddings**: OpenAI models will convert text chunks into vector representations that can be indexed in Astra DB.\n4. **Store Vectors**: The \"Astra DB\" component completes the process by saving these vectors, making them ready for retrieval and search operations.\n\n**Quick Tips**:\n- Always ensure your files are correctly formatted and free of errors before uploading.\n- Monitor the component indicators to confirm data processing and storage status.\n", + "description": "## 📚 1. Load Data Flow\n\nRun this first! Load data from a local file and embed it into the vector database.\n\nSelect a Database and a Collection, or create new ones. \n\nClick ▶️ **Run component** on the **Astra DB** component to load your data.\n\n*If you're using OSS Langflow, add your Astra DB Application Token to the Astra DB component.\n\n#### Next steps:\n Experiment by changing the prompt and the contextual data to see how the retrieval flow's responses change.", "display_name": "", "documentation": "", "template": { - "backgroundColor": "blue" + "backgroundColor": "neutral" } }, "type": "note" }, "dragging": false, - "height": 445, - "id": "note-i0pUr", + "height": 324, + "id": "note-UrQ0p", "position": { - "x": 806.5400894704665, - "y": 1410.461680629912 + "x": 374.388314931542, + "y": 486.18094072679895 }, "positionAbsolute": { - "x": 806.5400894704665, - "y": 1410.461680629912 + "x": 374.388314931542, + "y": 486.18094072679895 }, "resizing": false, "selected": false, "style": { - "height": 445, - "width": 493 + "height": 324, + "width": 324 }, "type": "noteNode", - "width": 493 + "width": 324 }, { "data": { - "id": "note-9DvF2", + "id": "note-39jdn", "node": { - "description": "# Vector Store RAG Overview\n\n\nThis Vector Store RAG workflow combines data ingestion and retrieval into a unified process, allowing you to manage and query your data efficiently.\n\n**Components**:\n- **📄 Load Data**: Prepares data for vector database storage.\n - File ingestion\n - Text chunking\n - Embedding generation\n - Storage in Astra DB\n\n- **✨ Retriever**: Provides intelligent search and retrieval from the vector database.\n - User query input\n - Database search\n - Enhanced response generation using AI models\n\n**Workflow Instructions**:\n1. Initiate the **Load Data** flow to input your document into the vector database.\n2. Use the **Retriever** flow to conduct queries and obtain comprehensive responses based on your stored data.\n3. Adjust settings like API keys and collection names as needed for your specific use case.\n\n**Benefits**:\n- Streamlines data management with a single interface.\n- Fast, scalable vector-based search capabilities.\n- Integrates cutting-edge AI technology for rich, context-aware outputs.\n", + "description": "## 📖 README\n\nLoad your data into a vector database with the 📚 **Load Data** flow, and then use your data as chat context with the 🐕 **Retriever** flow.\n\n**🚨 Add your OpenAI API key as a global variable to easily add it to all of the OpenAI components in this flow.** \n\n**Quick start**:\n1. Run the 📚 **Load Data** flow.\n2. Run the 🐕 **Retriever** flow.\n\n**Next steps** \n\n- Experiment by changing the prompt and the loaded data to see how the bot's responses change. \n\nFor more info, see the [Langflow docs](https://docs.langflow.org/starter-projects-vector-store-rag).", "display_name": "Read Me", "documentation": "", "template": { - "backgroundColor": "indigo" + "backgroundColor": "neutral" } }, "type": "note" }, "dragging": false, - "height": 798, - "id": "note-9DvF2", + "id": "note-39jdn", "position": { - "x": 134.64227176140844, - "y": 307.10406179806375 + "x": 94.28986613312418, + "y": 907.6428043837066 }, "positionAbsolute": { - "x": 134.64227176140844, - "y": 307.10406179806375 + "x": 94.28986613312418, + "y": 907.6428043837066 }, "resizing": false, "selected": false, "style": { - "height": 798, + "height": 527, "width": 600 }, "type": "noteNode", "width": 600 }, - { - "data": { - "id": "note-RC6pg", - "node": { - "description": "## Astra DB Setup\n\n**Important Setup Information:**\n\nTo use the Astra DB component in this workflow, you'll need to obtain an Astra DB key. Follow these steps to set up your access:\n\n1. **Create an Account**: Visit [DataStax Accounts](https://accounts.datastax.com) and create an account if you don't already have one.\n2. **Generate a Key**: Once logged in, navigate to the \"Astra DB\" section to create a new application token. This token will serve as your authentication key for API access.\n3. **Configure Endpoint**: Note the API endpoint URL provided by Astra DB. This is essential for connecting your vector database to the workflow.\n4. **Input Token**: Enter the generated token and API endpoint URL in the appropriate fields in the Astra DB component within the workflow.\n\n", - "display_name": "", - "documentation": "", - "template": { - "backgroundColor": "blue" - } - }, - "type": "note" - }, - "dragging": false, - "height": 358, - "id": "note-RC6pg", - "position": { - "x": 1180.1117634200216, - "y": -2.4324300661269547 - }, - "positionAbsolute": { - "x": 1180.1117634200216, - "y": -2.4324300661269547 - }, - "resizing": false, - "selected": false, - "style": { - "height": 358, - "width": 412 - }, - "type": "noteNode", - "width": 412 - }, { "data": { "id": "OpenAIModel-H0ANr", "node": { - "base_classes": ["LanguageModel", "Message"], + "base_classes": [ + "LanguageModel", + "Message" + ], "beta": false, "conditional_paths": [], "custom_fields": {}, @@ -1036,7 +1101,9 @@ "name": "text_output", "required_inputs": [], "selected": "Message", - "types": ["Message"], + "types": [ + "Message" + ], "value": "__UNDEFINED__" }, { @@ -1046,7 +1113,9 @@ "name": "model_output", "required_inputs": [], "selected": "LanguageModel", - "types": ["LanguageModel"], + "types": [ + "LanguageModel" + ], "value": "__UNDEFINED__" } ], @@ -1059,7 +1128,9 @@ "display_name": "OpenAI API Key", "dynamic": false, "info": "The OpenAI API Key to use for the OpenAI model.", - "input_types": ["Message"], + "input_types": [ + "Message" + ], "load_from_db": true, "name": "api_key", "password": true, @@ -1094,7 +1165,9 @@ "display_name": "Input", "dynamic": false, "info": "", - "input_types": ["Message"], + "input_types": [ + "Message" + ], "list": false, "load_from_db": false, "name": "input_value", @@ -1210,7 +1283,9 @@ "display_name": "Output Parser", "dynamic": false, "info": "The parser to use to parse the output of the model", - "input_types": ["OutputParser"], + "input_types": [ + "OutputParser" + ], "list": false, "name": "output_parser", "placeholder": "", @@ -1275,7 +1350,9 @@ "display_name": "System Message", "dynamic": false, "info": "System message to pass to the model.", - "input_types": ["Message"], + "input_types": [ + "Message" + ], "list": false, "load_from_db": false, "name": "system_message", @@ -1331,7 +1408,9 @@ "display_name": "Chat Output", "id": "ChatOutput-a4EPO", "node": { - "base_classes": ["Message"], + "base_classes": [ + "Message" + ], "beta": false, "conditional_paths": [], "custom_fields": {}, @@ -1363,7 +1442,9 @@ "method": "message_response", "name": "message", "selected": "Message", - "types": ["Message"], + "types": [ + "Message" + ], "value": "__UNDEFINED__" } ], @@ -1376,7 +1457,9 @@ "display_name": "Background Color", "dynamic": false, "info": "The background color of the icon.", - "input_types": ["Message"], + "input_types": [ + "Message" + ], "list": false, "load_from_db": false, "name": "background_color", @@ -1396,7 +1479,9 @@ "display_name": "Icon", "dynamic": false, "info": "The icon of the message.", - "input_types": ["Message"], + "input_types": [ + "Message" + ], "list": false, "load_from_db": false, "name": "chat_icon", @@ -1434,7 +1519,9 @@ "display_name": "Data Template", "dynamic": false, "info": "Template to convert Data to Text. If left empty, it will be dynamically set to the Data's text key.", - "input_types": ["Message"], + "input_types": [ + "Message" + ], "list": false, "load_from_db": false, "name": "data_template", @@ -1454,7 +1541,9 @@ "display_name": "Text", "dynamic": false, "info": "Message to be passed as output.", - "input_types": ["Message"], + "input_types": [ + "Message" + ], "list": false, "load_from_db": false, "name": "input_value", @@ -1475,7 +1564,10 @@ "dynamic": false, "info": "Type of sender.", "name": "sender", - "options": ["Machine", "User"], + "options": [ + "Machine", + "User" + ], "placeholder": "", "required": false, "show": true, @@ -1491,7 +1583,9 @@ "display_name": "Sender Name", "dynamic": false, "info": "Name of the sender.", - "input_types": ["Message"], + "input_types": [ + "Message" + ], "list": false, "load_from_db": false, "name": "sender_name", @@ -1511,7 +1605,9 @@ "display_name": "Session ID", "dynamic": false, "info": "The session ID of the chat. If empty, the current session ID parameter will be used.", - "input_types": ["Message"], + "input_types": [ + "Message" + ], "list": false, "load_from_db": false, "name": "session_id", @@ -1547,7 +1643,9 @@ "display_name": "Text Color", "dynamic": false, "info": "The text color of the name", - "input_types": ["Message"], + "input_types": [ + "Message" + ], "list": false, "load_from_db": false, "name": "text_color", @@ -1585,7 +1683,10 @@ "data": { "id": "AstraDB-3buPx", "node": { - "base_classes": ["Data", "Retriever"], + "base_classes": [ + "Data", + "Retriever" + ], "beta": false, "conditional_paths": [], "custom_fields": {}, @@ -1631,7 +1732,9 @@ "name": "base_retriever", "required_inputs": [], "selected": "Retriever", - "types": ["Retriever"], + "types": [ + "Retriever" + ], "value": "__UNDEFINED__" }, { @@ -1639,22 +1742,47 @@ "display_name": "Search Results", "method": "search_documents", "name": "search_results", - "required_inputs": ["api_endpoint", "collection_name", "token"], + "required_inputs": [ + "api_endpoint", + "collection_name", + "token" + ], "selected": "Data", - "types": ["Data"], + "types": [ + "Data" + ], "value": "__UNDEFINED__" } ], "pinned": false, "template": { "_type": "Component", + "advanced_search_filter": { + "_input_type": "NestedDictInput", + "advanced": true, + "display_name": "Search Metadata Filter", + "dynamic": false, + "info": "Optional dictionary of filters to apply to the search query.", + "list": false, + "name": "advanced_search_filter", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "trace_as_input": true, + "trace_as_metadata": true, + "type": "NestedDict", + "value": {} + }, "api_endpoint": { "_input_type": "SecretStrInput", "advanced": false, "display_name": "API Endpoint", "dynamic": false, "info": "API endpoint URL for the Astra DB service.", - "input_types": ["Message"], + "input_types": [ + "Message" + ], "load_from_db": true, "name": "api_endpoint", "password": true, @@ -1745,7 +1873,7 @@ "show": true, "title_case": false, "type": "code", - "value": "import os\n\nimport orjson\nfrom astrapy.admin import parse_api_endpoint\n\nfrom langflow.base.vectorstores.model import LCVectorStoreComponent, check_cached_vector_store\nfrom langflow.helpers import docs_to_data\nfrom langflow.inputs import DictInput, FloatInput, MessageTextInput\nfrom langflow.io import (\n BoolInput,\n DataInput,\n DropdownInput,\n HandleInput,\n IntInput,\n MultilineInput,\n SecretStrInput,\n StrInput,\n)\nfrom langflow.schema import Data\n\n\nclass AstraVectorStoreComponent(LCVectorStoreComponent):\n display_name: str = \"Astra DB\"\n description: str = \"Implementation of Vector Store using Astra DB with search capabilities\"\n documentation: str = \"https://docs.langflow.org/starter-projects-vector-store-rag\"\n name = \"AstraDB\"\n icon: str = \"AstraDB\"\n\n VECTORIZE_PROVIDERS_MAPPING = {\n \"Azure OpenAI\": [\"azureOpenAI\", [\"text-embedding-3-small\", \"text-embedding-3-large\", \"text-embedding-ada-002\"]],\n \"Hugging Face - Dedicated\": [\"huggingfaceDedicated\", [\"endpoint-defined-model\"]],\n \"Hugging Face - Serverless\": [\n \"huggingface\",\n [\n \"sentence-transformers/all-MiniLM-L6-v2\",\n \"intfloat/multilingual-e5-large\",\n \"intfloat/multilingual-e5-large-instruct\",\n \"BAAI/bge-small-en-v1.5\",\n \"BAAI/bge-base-en-v1.5\",\n \"BAAI/bge-large-en-v1.5\",\n ],\n ],\n \"Jina AI\": [\n \"jinaAI\",\n [\n \"jina-embeddings-v2-base-en\",\n \"jina-embeddings-v2-base-de\",\n \"jina-embeddings-v2-base-es\",\n \"jina-embeddings-v2-base-code\",\n \"jina-embeddings-v2-base-zh\",\n ],\n ],\n \"Mistral AI\": [\"mistral\", [\"mistral-embed\"]],\n \"NVIDIA\": [\"nvidia\", [\"NV-Embed-QA\"]],\n \"OpenAI\": [\"openai\", [\"text-embedding-3-small\", \"text-embedding-3-large\", \"text-embedding-ada-002\"]],\n \"Upstage\": [\"upstageAI\", [\"solar-embedding-1-large\"]],\n \"Voyage AI\": [\n \"voyageAI\",\n [\"voyage-large-2-instruct\", \"voyage-law-2\", \"voyage-code-2\", \"voyage-large-2\", \"voyage-2\"],\n ],\n }\n\n inputs = [\n SecretStrInput(\n name=\"token\",\n display_name=\"Astra DB Application Token\",\n info=\"Authentication token for accessing Astra DB.\",\n value=\"ASTRA_DB_APPLICATION_TOKEN\",\n required=True,\n advanced=os.getenv(\"ASTRA_ENHANCED\", \"false\").lower() == \"true\",\n ),\n SecretStrInput(\n name=\"api_endpoint\",\n display_name=\"Database\" if os.getenv(\"ASTRA_ENHANCED\", \"false\").lower() == \"true\" else \"API Endpoint\",\n info=\"API endpoint URL for the Astra DB service.\",\n value=\"ASTRA_DB_API_ENDPOINT\",\n required=True,\n ),\n StrInput(\n name=\"collection_name\",\n display_name=\"Collection Name\",\n info=\"The name of the collection within Astra DB where the vectors will be stored.\",\n required=True,\n ),\n MultilineInput(\n name=\"search_input\",\n display_name=\"Search Input\",\n ),\n DataInput(\n name=\"ingest_data\",\n display_name=\"Ingest Data\",\n is_list=True,\n ),\n StrInput(\n name=\"namespace\",\n display_name=\"Namespace\",\n info=\"Optional namespace within Astra DB to use for the collection.\",\n advanced=True,\n ),\n DropdownInput(\n name=\"embedding_choice\",\n display_name=\"Embedding Model or Astra Vectorize\",\n info=\"Determines whether to use Astra Vectorize for the collection.\",\n options=[\"Embedding Model\", \"Astra Vectorize\"],\n real_time_refresh=True,\n value=\"Embedding Model\",\n ),\n HandleInput(\n name=\"embedding\",\n display_name=\"Embedding Model\",\n input_types=[\"Embeddings\"],\n info=\"Allows an embedding model configuration.\",\n ),\n DropdownInput(\n name=\"metric\",\n display_name=\"Metric\",\n info=\"Optional distance metric for vector comparisons in the vector store.\",\n options=[\"cosine\", \"dot_product\", \"euclidean\"],\n value=\"cosine\",\n advanced=True,\n ),\n IntInput(\n name=\"batch_size\",\n display_name=\"Batch Size\",\n info=\"Optional number of data to process in a single batch.\",\n advanced=True,\n ),\n IntInput(\n name=\"bulk_insert_batch_concurrency\",\n display_name=\"Bulk Insert Batch Concurrency\",\n info=\"Optional concurrency level for bulk insert operations.\",\n advanced=True,\n ),\n IntInput(\n name=\"bulk_insert_overwrite_concurrency\",\n display_name=\"Bulk Insert Overwrite Concurrency\",\n info=\"Optional concurrency level for bulk insert operations that overwrite existing data.\",\n advanced=True,\n ),\n IntInput(\n name=\"bulk_delete_concurrency\",\n display_name=\"Bulk Delete Concurrency\",\n info=\"Optional concurrency level for bulk delete operations.\",\n advanced=True,\n ),\n DropdownInput(\n name=\"setup_mode\",\n display_name=\"Setup Mode\",\n info=\"Configuration mode for setting up the vector store, with options like 'Sync' or 'Off'.\",\n options=[\"Sync\", \"Off\"],\n advanced=True,\n value=\"Sync\",\n ),\n BoolInput(\n name=\"pre_delete_collection\",\n display_name=\"Pre Delete Collection\",\n info=\"Boolean flag to determine whether to delete the collection before creating a new one.\",\n advanced=True,\n ),\n StrInput(\n name=\"metadata_indexing_include\",\n display_name=\"Metadata Indexing Include\",\n info=\"Optional list of metadata fields to include in the indexing.\",\n is_list=True,\n advanced=True,\n ),\n StrInput(\n name=\"metadata_indexing_exclude\",\n display_name=\"Metadata Indexing Exclude\",\n info=\"Optional list of metadata fields to exclude from the indexing.\",\n is_list=True,\n advanced=True,\n ),\n StrInput(\n name=\"collection_indexing_policy\",\n display_name=\"Collection Indexing Policy\",\n info='Optional JSON string for the \"indexing\" field of the collection. '\n \"See https://docs.datastax.com/en/astra-db-serverless/api-reference/collections.html#the-indexing-option\",\n advanced=True,\n ),\n IntInput(\n name=\"number_of_results\",\n display_name=\"Number of Results\",\n info=\"Number of results to return.\",\n advanced=True,\n value=4,\n ),\n DropdownInput(\n name=\"search_type\",\n display_name=\"Search Type\",\n info=\"Search type to use\",\n options=[\"Similarity\", \"Similarity with score threshold\", \"MMR (Max Marginal Relevance)\"],\n value=\"Similarity\",\n advanced=True,\n ),\n FloatInput(\n name=\"search_score_threshold\",\n display_name=\"Search Score Threshold\",\n info=\"Minimum similarity score threshold for search results. \"\n \"(when using 'Similarity with score threshold')\",\n value=0,\n advanced=True,\n ),\n DictInput(\n name=\"search_filter\",\n display_name=\"Search Metadata Filter\",\n info=\"Optional dictionary of filters to apply to the search query.\",\n advanced=True,\n is_list=True,\n ),\n ]\n\n def del_fields(self, build_config, field_list):\n for field in field_list:\n if field in build_config:\n del build_config[field]\n\n return build_config\n\n def insert_in_dict(self, build_config, field_name, new_parameters):\n # Insert the new key-value pair after the found key\n for new_field_name, new_parameter in new_parameters.items():\n # Get all the items as a list of tuples (key, value)\n items = list(build_config.items())\n\n # Find the index of the key to insert after\n idx = len(items)\n for i, (key, _) in enumerate(items):\n if key == field_name:\n idx = i + 1\n break\n\n items.insert(idx, (new_field_name, new_parameter))\n\n # Clear the original dictionary and update with the modified items\n build_config.clear()\n build_config.update(items)\n\n return build_config\n\n def update_build_config(self, build_config: dict, field_value: str, field_name: str | None = None):\n if field_name == \"embedding_choice\":\n if field_value == \"Astra Vectorize\":\n self.del_fields(build_config, [\"embedding\"])\n\n new_parameter = DropdownInput(\n name=\"embedding_provider\",\n display_name=\"Embedding Provider\",\n options=self.VECTORIZE_PROVIDERS_MAPPING.keys(),\n value=\"\",\n required=True,\n real_time_refresh=True,\n ).to_dict()\n\n self.insert_in_dict(build_config, \"embedding_choice\", {\"embedding_provider\": new_parameter})\n else:\n self.del_fields(\n build_config,\n [\n \"embedding_provider\",\n \"model\",\n \"z_01_model_parameters\",\n \"z_02_api_key_name\",\n \"z_03_provider_api_key\",\n \"z_04_authentication\",\n ],\n )\n\n new_parameter = HandleInput(\n name=\"embedding\",\n display_name=\"Embedding Model\",\n input_types=[\"Embeddings\"],\n info=\"Allows an embedding model configuration.\",\n ).to_dict()\n\n self.insert_in_dict(build_config, \"embedding_choice\", {\"embedding\": new_parameter})\n\n elif field_name == \"embedding_provider\":\n self.del_fields(\n build_config,\n [\"model\", \"z_01_model_parameters\", \"z_02_api_key_name\", \"z_03_provider_api_key\", \"z_04_authentication\"],\n )\n\n model_options = self.VECTORIZE_PROVIDERS_MAPPING[field_value][1]\n\n new_parameter = DropdownInput(\n name=\"model\",\n display_name=\"Model\",\n info=\"The embedding model to use for the selected provider. Each provider has a different set of \"\n \"models available (full list at \"\n \"https://docs.datastax.com/en/astra-db-serverless/databases/embedding-generation.html):\\n\\n\"\n f\"{', '.join(model_options)}\",\n options=model_options,\n value=None,\n required=True,\n real_time_refresh=True,\n ).to_dict()\n\n self.insert_in_dict(build_config, \"embedding_provider\", {\"model\": new_parameter})\n\n elif field_name == \"model\":\n self.del_fields(\n build_config,\n [\"z_01_model_parameters\", \"z_02_api_key_name\", \"z_03_provider_api_key\", \"z_04_authentication\"],\n )\n\n new_parameter_1 = DictInput(\n name=\"z_01_model_parameters\",\n display_name=\"Model Parameters\",\n is_list=True,\n ).to_dict()\n\n new_parameter_2 = MessageTextInput(\n name=\"z_02_api_key_name\",\n display_name=\"API Key Name\",\n info=\"The name of the embeddings provider API key stored on Astra. \"\n \"If set, it will override the 'ProviderKey' in the authentication parameters.\",\n ).to_dict()\n\n new_parameter_3 = SecretStrInput(\n load_from_db=False,\n name=\"z_03_provider_api_key\",\n display_name=\"Provider API Key\",\n info=\"An alternative to the Astra Authentication that passes an API key for the provider \"\n \"with each request to Astra DB. \"\n \"This may be used when Vectorize is configured for the collection, \"\n \"but no corresponding provider secret is stored within Astra's key management system.\",\n ).to_dict()\n\n new_parameter_4 = DictInput(\n name=\"z_04_authentication\",\n display_name=\"Authentication Parameters\",\n is_list=True,\n ).to_dict()\n\n self.insert_in_dict(\n build_config,\n \"model\",\n {\n \"z_01_model_parameters\": new_parameter_1,\n \"z_02_api_key_name\": new_parameter_2,\n \"z_03_provider_api_key\": new_parameter_3,\n \"z_04_authentication\": new_parameter_4,\n },\n )\n\n return build_config\n\n def build_vectorize_options(self, **kwargs):\n for attribute in [\n \"embedding_provider\",\n \"model\",\n \"z_01_model_parameters\",\n \"z_02_api_key_name\",\n \"z_03_provider_api_key\",\n \"z_04_authentication\",\n ]:\n if not hasattr(self, attribute):\n setattr(self, attribute, None)\n\n # Fetch values from kwargs if any self.* attributes are None\n provider_value = self.VECTORIZE_PROVIDERS_MAPPING.get(self.embedding_provider, [None])[0] or kwargs.get(\n \"embedding_provider\"\n )\n model_name = self.model or kwargs.get(\"model\")\n authentication = {**(self.z_04_authentication or kwargs.get(\"z_04_authentication\", {}))}\n parameters = self.z_01_model_parameters or kwargs.get(\"z_01_model_parameters\", {})\n\n # Set the API key name if provided\n api_key_name = self.z_02_api_key_name or kwargs.get(\"z_02_api_key_name\")\n provider_key = self.z_03_provider_api_key or kwargs.get(\"z_03_provider_api_key\")\n if api_key_name:\n authentication[\"providerKey\"] = api_key_name\n\n # Set authentication and parameters to None if no values are provided\n if not authentication:\n authentication = None\n if not parameters:\n parameters = None\n\n return {\n # must match astrapy.info.CollectionVectorServiceOptions\n \"collection_vector_service_options\": {\n \"provider\": provider_value,\n \"modelName\": model_name,\n \"authentication\": authentication,\n \"parameters\": parameters,\n },\n \"collection_embedding_api_key\": provider_key,\n }\n\n @check_cached_vector_store\n def build_vector_store(self, vectorize_options=None):\n try:\n from langchain_astradb import AstraDBVectorStore\n from langchain_astradb.utils.astradb import SetupMode\n except ImportError as e:\n msg = (\n \"Could not import langchain Astra DB integration package. \"\n \"Please install it with `pip install langchain-astradb`.\"\n )\n raise ImportError(msg) from e\n\n try:\n if not self.setup_mode:\n self.setup_mode = self._inputs[\"setup_mode\"].options[0]\n\n setup_mode_value = SetupMode[self.setup_mode.upper()]\n except KeyError as e:\n msg = f\"Invalid setup mode: {self.setup_mode}\"\n raise ValueError(msg) from e\n\n if self.embedding_choice == \"Embedding Model\":\n embedding_dict = {\"embedding\": self.embedding}\n else:\n from astrapy.info import CollectionVectorServiceOptions\n\n # Fetch values from kwargs if any self.* attributes are None\n dict_options = vectorize_options or self.build_vectorize_options()\n\n # Set the embedding dictionary\n embedding_dict = {\n \"collection_vector_service_options\": CollectionVectorServiceOptions.from_dict(\n dict_options.get(\"collection_vector_service_options\")\n ),\n \"collection_embedding_api_key\": dict_options.get(\"collection_embedding_api_key\"),\n }\n\n try:\n vector_store = AstraDBVectorStore(\n collection_name=self.collection_name,\n token=self.token,\n api_endpoint=self.api_endpoint,\n namespace=self.namespace or None,\n environment=parse_api_endpoint(self.api_endpoint).environment if self.api_endpoint else None,\n metric=self.metric or None,\n batch_size=self.batch_size or None,\n bulk_insert_batch_concurrency=self.bulk_insert_batch_concurrency or None,\n bulk_insert_overwrite_concurrency=self.bulk_insert_overwrite_concurrency or None,\n bulk_delete_concurrency=self.bulk_delete_concurrency or None,\n setup_mode=setup_mode_value,\n pre_delete_collection=self.pre_delete_collection,\n metadata_indexing_include=[s for s in self.metadata_indexing_include if s] or None,\n metadata_indexing_exclude=[s for s in self.metadata_indexing_exclude if s] or None,\n collection_indexing_policy=orjson.dumps(self.collection_indexing_policy)\n if self.collection_indexing_policy\n else None,\n **embedding_dict,\n )\n except Exception as e:\n msg = f\"Error initializing AstraDBVectorStore: {e}\"\n raise ValueError(msg) from e\n\n self._add_documents_to_vector_store(vector_store)\n\n return vector_store\n\n def _add_documents_to_vector_store(self, vector_store) -> None:\n documents = []\n for _input in self.ingest_data or []:\n if isinstance(_input, Data):\n documents.append(_input.to_lc_document())\n else:\n msg = \"Vector Store Inputs must be Data objects.\"\n raise TypeError(msg)\n\n if documents:\n self.log(f\"Adding {len(documents)} documents to the Vector Store.\")\n try:\n vector_store.add_documents(documents)\n except Exception as e:\n msg = f\"Error adding documents to AstraDBVectorStore: {e}\"\n raise ValueError(msg) from e\n else:\n self.log(\"No documents to add to the Vector Store.\")\n\n def _map_search_type(self) -> str:\n if self.search_type == \"Similarity with score threshold\":\n return \"similarity_score_threshold\"\n if self.search_type == \"MMR (Max Marginal Relevance)\":\n return \"mmr\"\n return \"similarity\"\n\n def _build_search_args(self):\n args = {\n \"k\": self.number_of_results,\n \"score_threshold\": self.search_score_threshold,\n }\n\n if self.search_filter:\n clean_filter = {k: v for k, v in self.search_filter.items() if k and v}\n if len(clean_filter) > 0:\n args[\"filter\"] = clean_filter\n return args\n\n def search_documents(self, vector_store=None) -> list[Data]:\n if not vector_store:\n vector_store = self.build_vector_store()\n\n self.log(f\"Search input: {self.search_input}\")\n self.log(f\"Search type: {self.search_type}\")\n self.log(f\"Number of results: {self.number_of_results}\")\n\n if self.search_input and isinstance(self.search_input, str) and self.search_input.strip():\n try:\n search_type = self._map_search_type()\n search_args = self._build_search_args()\n\n docs = vector_store.search(query=self.search_input, search_type=search_type, **search_args)\n except Exception as e:\n msg = f\"Error performing search in AstraDBVectorStore: {e}\"\n raise ValueError(msg) from e\n\n self.log(f\"Retrieved documents: {len(docs)}\")\n\n data = docs_to_data(docs)\n self.log(f\"Converted documents to data: {len(data)}\")\n self.status = data\n return data\n self.log(\"No search input provided. Skipping search.\")\n return []\n\n def get_retriever_kwargs(self):\n search_args = self._build_search_args()\n return {\n \"search_type\": self._map_search_type(),\n \"search_kwargs\": search_args,\n }\n" + "value": "import os\n\nimport orjson\nfrom astrapy.admin import parse_api_endpoint\nfrom langchain_astradb import AstraDBVectorStore\n\nfrom langflow.base.vectorstores.model import LCVectorStoreComponent, check_cached_vector_store\nfrom langflow.helpers import docs_to_data\nfrom langflow.inputs import DictInput, FloatInput, MessageTextInput, NestedDictInput\nfrom langflow.io import (\n BoolInput,\n DataInput,\n DropdownInput,\n HandleInput,\n IntInput,\n MultilineInput,\n SecretStrInput,\n StrInput,\n)\nfrom langflow.schema import Data\n\n\nclass AstraVectorStoreComponent(LCVectorStoreComponent):\n display_name: str = \"Astra DB\"\n description: str = \"Implementation of Vector Store using Astra DB with search capabilities\"\n documentation: str = \"https://docs.langflow.org/starter-projects-vector-store-rag\"\n name = \"AstraDB\"\n icon: str = \"AstraDB\"\n\n _cached_vector_store: AstraDBVectorStore | None = None\n\n VECTORIZE_PROVIDERS_MAPPING = {\n \"Azure OpenAI\": [\"azureOpenAI\", [\"text-embedding-3-small\", \"text-embedding-3-large\", \"text-embedding-ada-002\"]],\n \"Hugging Face - Dedicated\": [\"huggingfaceDedicated\", [\"endpoint-defined-model\"]],\n \"Hugging Face - Serverless\": [\n \"huggingface\",\n [\n \"sentence-transformers/all-MiniLM-L6-v2\",\n \"intfloat/multilingual-e5-large\",\n \"intfloat/multilingual-e5-large-instruct\",\n \"BAAI/bge-small-en-v1.5\",\n \"BAAI/bge-base-en-v1.5\",\n \"BAAI/bge-large-en-v1.5\",\n ],\n ],\n \"Jina AI\": [\n \"jinaAI\",\n [\n \"jina-embeddings-v2-base-en\",\n \"jina-embeddings-v2-base-de\",\n \"jina-embeddings-v2-base-es\",\n \"jina-embeddings-v2-base-code\",\n \"jina-embeddings-v2-base-zh\",\n ],\n ],\n \"Mistral AI\": [\"mistral\", [\"mistral-embed\"]],\n \"NVIDIA\": [\"nvidia\", [\"NV-Embed-QA\"]],\n \"OpenAI\": [\"openai\", [\"text-embedding-3-small\", \"text-embedding-3-large\", \"text-embedding-ada-002\"]],\n \"Upstage\": [\"upstageAI\", [\"solar-embedding-1-large\"]],\n \"Voyage AI\": [\n \"voyageAI\",\n [\"voyage-large-2-instruct\", \"voyage-law-2\", \"voyage-code-2\", \"voyage-large-2\", \"voyage-2\"],\n ],\n }\n\n inputs = [\n SecretStrInput(\n name=\"token\",\n display_name=\"Astra DB Application Token\",\n info=\"Authentication token for accessing Astra DB.\",\n value=\"ASTRA_DB_APPLICATION_TOKEN\",\n required=True,\n advanced=os.getenv(\"ASTRA_ENHANCED\", \"false\").lower() == \"true\",\n ),\n SecretStrInput(\n name=\"api_endpoint\",\n display_name=\"Database\" if os.getenv(\"ASTRA_ENHANCED\", \"false\").lower() == \"true\" else \"API Endpoint\",\n info=\"API endpoint URL for the Astra DB service.\",\n value=\"ASTRA_DB_API_ENDPOINT\",\n required=True,\n ),\n StrInput(\n name=\"collection_name\",\n display_name=\"Collection Name\",\n info=\"The name of the collection within Astra DB where the vectors will be stored.\",\n required=True,\n ),\n MultilineInput(\n name=\"search_input\",\n display_name=\"Search Input\",\n ),\n DataInput(\n name=\"ingest_data\",\n display_name=\"Ingest Data\",\n is_list=True,\n ),\n StrInput(\n name=\"keyspace\",\n display_name=\"Keyspace\",\n info=\"Optional keyspace within Astra DB to use for the collection.\",\n advanced=True,\n ),\n DropdownInput(\n name=\"embedding_choice\",\n display_name=\"Embedding Model or Astra Vectorize\",\n info=\"Determines whether to use Astra Vectorize for the collection.\",\n options=[\"Embedding Model\", \"Astra Vectorize\"],\n real_time_refresh=True,\n value=\"Embedding Model\",\n ),\n HandleInput(\n name=\"embedding\",\n display_name=\"Embedding Model\",\n input_types=[\"Embeddings\"],\n info=\"Allows an embedding model configuration.\",\n ),\n DropdownInput(\n name=\"metric\",\n display_name=\"Metric\",\n info=\"Optional distance metric for vector comparisons in the vector store.\",\n options=[\"cosine\", \"dot_product\", \"euclidean\"],\n value=\"cosine\",\n advanced=True,\n ),\n IntInput(\n name=\"batch_size\",\n display_name=\"Batch Size\",\n info=\"Optional number of data to process in a single batch.\",\n advanced=True,\n ),\n IntInput(\n name=\"bulk_insert_batch_concurrency\",\n display_name=\"Bulk Insert Batch Concurrency\",\n info=\"Optional concurrency level for bulk insert operations.\",\n advanced=True,\n ),\n IntInput(\n name=\"bulk_insert_overwrite_concurrency\",\n display_name=\"Bulk Insert Overwrite Concurrency\",\n info=\"Optional concurrency level for bulk insert operations that overwrite existing data.\",\n advanced=True,\n ),\n IntInput(\n name=\"bulk_delete_concurrency\",\n display_name=\"Bulk Delete Concurrency\",\n info=\"Optional concurrency level for bulk delete operations.\",\n advanced=True,\n ),\n DropdownInput(\n name=\"setup_mode\",\n display_name=\"Setup Mode\",\n info=\"Configuration mode for setting up the vector store, with options like 'Sync' or 'Off'.\",\n options=[\"Sync\", \"Off\"],\n advanced=True,\n value=\"Sync\",\n ),\n BoolInput(\n name=\"pre_delete_collection\",\n display_name=\"Pre Delete Collection\",\n info=\"Boolean flag to determine whether to delete the collection before creating a new one.\",\n advanced=True,\n ),\n StrInput(\n name=\"metadata_indexing_include\",\n display_name=\"Metadata Indexing Include\",\n info=\"Optional list of metadata fields to include in the indexing.\",\n is_list=True,\n advanced=True,\n ),\n StrInput(\n name=\"metadata_indexing_exclude\",\n display_name=\"Metadata Indexing Exclude\",\n info=\"Optional list of metadata fields to exclude from the indexing.\",\n is_list=True,\n advanced=True,\n ),\n StrInput(\n name=\"collection_indexing_policy\",\n display_name=\"Collection Indexing Policy\",\n info='Optional JSON string for the \"indexing\" field of the collection. '\n \"See https://docs.datastax.com/en/astra-db-serverless/api-reference/collections.html#the-indexing-option\",\n advanced=True,\n ),\n IntInput(\n name=\"number_of_results\",\n display_name=\"Number of Results\",\n info=\"Number of results to return.\",\n advanced=True,\n value=4,\n ),\n DropdownInput(\n name=\"search_type\",\n display_name=\"Search Type\",\n info=\"Search type to use\",\n options=[\"Similarity\", \"Similarity with score threshold\", \"MMR (Max Marginal Relevance)\"],\n value=\"Similarity\",\n advanced=True,\n ),\n FloatInput(\n name=\"search_score_threshold\",\n display_name=\"Search Score Threshold\",\n info=\"Minimum similarity score threshold for search results. \"\n \"(when using 'Similarity with score threshold')\",\n value=0,\n advanced=True,\n ),\n NestedDictInput(\n name=\"advanced_search_filter\",\n display_name=\"Search Metadata Filter\",\n info=\"Optional dictionary of filters to apply to the search query.\",\n advanced=True,\n ),\n DictInput(\n name=\"search_filter\",\n display_name=\"[DEPRECATED] Search Metadata Filter\",\n info=\"Deprecated: use advanced_search_filter. Optional dictionary of filters to apply to the search query.\",\n advanced=True,\n is_list=True,\n ),\n ]\n\n def del_fields(self, build_config, field_list):\n for field in field_list:\n if field in build_config:\n del build_config[field]\n\n return build_config\n\n def insert_in_dict(self, build_config, field_name, new_parameters):\n # Insert the new key-value pair after the found key\n for new_field_name, new_parameter in new_parameters.items():\n # Get all the items as a list of tuples (key, value)\n items = list(build_config.items())\n\n # Find the index of the key to insert after\n idx = len(items)\n for i, (key, _) in enumerate(items):\n if key == field_name:\n idx = i + 1\n break\n\n items.insert(idx, (new_field_name, new_parameter))\n\n # Clear the original dictionary and update with the modified items\n build_config.clear()\n build_config.update(items)\n\n return build_config\n\n def update_build_config(self, build_config: dict, field_value: str, field_name: str | None = None):\n if field_name == \"embedding_choice\":\n if field_value == \"Astra Vectorize\":\n self.del_fields(build_config, [\"embedding\"])\n\n new_parameter = DropdownInput(\n name=\"embedding_provider\",\n display_name=\"Embedding Provider\",\n options=self.VECTORIZE_PROVIDERS_MAPPING.keys(),\n value=\"\",\n required=True,\n real_time_refresh=True,\n ).to_dict()\n\n self.insert_in_dict(build_config, \"embedding_choice\", {\"embedding_provider\": new_parameter})\n else:\n self.del_fields(\n build_config,\n [\n \"embedding_provider\",\n \"model\",\n \"z_01_model_parameters\",\n \"z_02_api_key_name\",\n \"z_03_provider_api_key\",\n \"z_04_authentication\",\n ],\n )\n\n new_parameter = HandleInput(\n name=\"embedding\",\n display_name=\"Embedding Model\",\n input_types=[\"Embeddings\"],\n info=\"Allows an embedding model configuration.\",\n ).to_dict()\n\n self.insert_in_dict(build_config, \"embedding_choice\", {\"embedding\": new_parameter})\n\n elif field_name == \"embedding_provider\":\n self.del_fields(\n build_config,\n [\"model\", \"z_01_model_parameters\", \"z_02_api_key_name\", \"z_03_provider_api_key\", \"z_04_authentication\"],\n )\n\n model_options = self.VECTORIZE_PROVIDERS_MAPPING[field_value][1]\n\n new_parameter = DropdownInput(\n name=\"model\",\n display_name=\"Model\",\n info=\"The embedding model to use for the selected provider. Each provider has a different set of \"\n \"models available (full list at \"\n \"https://docs.datastax.com/en/astra-db-serverless/databases/embedding-generation.html):\\n\\n\"\n f\"{', '.join(model_options)}\",\n options=model_options,\n value=None,\n required=True,\n real_time_refresh=True,\n ).to_dict()\n\n self.insert_in_dict(build_config, \"embedding_provider\", {\"model\": new_parameter})\n\n elif field_name == \"model\":\n self.del_fields(\n build_config,\n [\"z_01_model_parameters\", \"z_02_api_key_name\", \"z_03_provider_api_key\", \"z_04_authentication\"],\n )\n\n new_parameter_1 = DictInput(\n name=\"z_01_model_parameters\",\n display_name=\"Model Parameters\",\n is_list=True,\n ).to_dict()\n\n new_parameter_2 = MessageTextInput(\n name=\"z_02_api_key_name\",\n display_name=\"API Key Name\",\n info=\"The name of the embeddings provider API key stored on Astra. \"\n \"If set, it will override the 'ProviderKey' in the authentication parameters.\",\n ).to_dict()\n\n new_parameter_3 = SecretStrInput(\n load_from_db=False,\n name=\"z_03_provider_api_key\",\n display_name=\"Provider API Key\",\n info=\"An alternative to the Astra Authentication that passes an API key for the provider \"\n \"with each request to Astra DB. \"\n \"This may be used when Vectorize is configured for the collection, \"\n \"but no corresponding provider secret is stored within Astra's key management system.\",\n ).to_dict()\n\n new_parameter_4 = DictInput(\n name=\"z_04_authentication\",\n display_name=\"Authentication Parameters\",\n is_list=True,\n ).to_dict()\n\n self.insert_in_dict(\n build_config,\n \"model\",\n {\n \"z_01_model_parameters\": new_parameter_1,\n \"z_02_api_key_name\": new_parameter_2,\n \"z_03_provider_api_key\": new_parameter_3,\n \"z_04_authentication\": new_parameter_4,\n },\n )\n\n return build_config\n\n def build_vectorize_options(self, **kwargs):\n for attribute in [\n \"embedding_provider\",\n \"model\",\n \"z_01_model_parameters\",\n \"z_02_api_key_name\",\n \"z_03_provider_api_key\",\n \"z_04_authentication\",\n ]:\n if not hasattr(self, attribute):\n setattr(self, attribute, None)\n\n # Fetch values from kwargs if any self.* attributes are None\n provider_value = self.VECTORIZE_PROVIDERS_MAPPING.get(self.embedding_provider, [None])[0] or kwargs.get(\n \"embedding_provider\"\n )\n model_name = self.model or kwargs.get(\"model\")\n authentication = {**(self.z_04_authentication or kwargs.get(\"z_04_authentication\", {}))}\n parameters = self.z_01_model_parameters or kwargs.get(\"z_01_model_parameters\", {})\n\n # Set the API key name if provided\n api_key_name = self.z_02_api_key_name or kwargs.get(\"z_02_api_key_name\")\n provider_key = self.z_03_provider_api_key or kwargs.get(\"z_03_provider_api_key\")\n if api_key_name:\n authentication[\"providerKey\"] = api_key_name\n\n # Set authentication and parameters to None if no values are provided\n if not authentication:\n authentication = None\n if not parameters:\n parameters = None\n\n return {\n # must match astrapy.info.CollectionVectorServiceOptions\n \"collection_vector_service_options\": {\n \"provider\": provider_value,\n \"modelName\": model_name,\n \"authentication\": authentication,\n \"parameters\": parameters,\n },\n \"collection_embedding_api_key\": provider_key,\n }\n\n @check_cached_vector_store\n def build_vector_store(self, vectorize_options=None):\n try:\n from langchain_astradb import AstraDBVectorStore\n from langchain_astradb.utils.astradb import SetupMode\n except ImportError as e:\n msg = (\n \"Could not import langchain Astra DB integration package. \"\n \"Please install it with `pip install langchain-astradb`.\"\n )\n raise ImportError(msg) from e\n\n try:\n if not self.setup_mode:\n self.setup_mode = self._inputs[\"setup_mode\"].options[0]\n\n setup_mode_value = SetupMode[self.setup_mode.upper()]\n except KeyError as e:\n msg = f\"Invalid setup mode: {self.setup_mode}\"\n raise ValueError(msg) from e\n\n if self.embedding_choice == \"Embedding Model\":\n embedding_dict = {\"embedding\": self.embedding}\n else:\n from astrapy.info import CollectionVectorServiceOptions\n\n # Fetch values from kwargs if any self.* attributes are None\n dict_options = vectorize_options or self.build_vectorize_options()\n\n # Set the embedding dictionary\n embedding_dict = {\n \"collection_vector_service_options\": CollectionVectorServiceOptions.from_dict(\n dict_options.get(\"collection_vector_service_options\")\n ),\n \"collection_embedding_api_key\": dict_options.get(\"collection_embedding_api_key\"),\n }\n\n try:\n vector_store = AstraDBVectorStore(\n collection_name=self.collection_name,\n token=self.token,\n api_endpoint=self.api_endpoint,\n namespace=self.keyspace or None,\n environment=parse_api_endpoint(self.api_endpoint).environment if self.api_endpoint else None,\n metric=self.metric or None,\n batch_size=self.batch_size or None,\n bulk_insert_batch_concurrency=self.bulk_insert_batch_concurrency or None,\n bulk_insert_overwrite_concurrency=self.bulk_insert_overwrite_concurrency or None,\n bulk_delete_concurrency=self.bulk_delete_concurrency or None,\n setup_mode=setup_mode_value,\n pre_delete_collection=self.pre_delete_collection,\n metadata_indexing_include=[s for s in self.metadata_indexing_include if s] or None,\n metadata_indexing_exclude=[s for s in self.metadata_indexing_exclude if s] or None,\n collection_indexing_policy=orjson.dumps(self.collection_indexing_policy)\n if self.collection_indexing_policy\n else None,\n **embedding_dict,\n )\n except Exception as e:\n msg = f\"Error initializing AstraDBVectorStore: {e}\"\n raise ValueError(msg) from e\n\n self._add_documents_to_vector_store(vector_store)\n\n return vector_store\n\n def _add_documents_to_vector_store(self, vector_store) -> None:\n documents = []\n for _input in self.ingest_data or []:\n if isinstance(_input, Data):\n documents.append(_input.to_lc_document())\n else:\n msg = \"Vector Store Inputs must be Data objects.\"\n raise TypeError(msg)\n\n if documents:\n self.log(f\"Adding {len(documents)} documents to the Vector Store.\")\n try:\n vector_store.add_documents(documents)\n except Exception as e:\n msg = f\"Error adding documents to AstraDBVectorStore: {e}\"\n raise ValueError(msg) from e\n else:\n self.log(\"No documents to add to the Vector Store.\")\n\n def _map_search_type(self) -> str:\n if self.search_type == \"Similarity with score threshold\":\n return \"similarity_score_threshold\"\n if self.search_type == \"MMR (Max Marginal Relevance)\":\n return \"mmr\"\n return \"similarity\"\n\n def _build_search_args(self):\n query = self.search_input if isinstance(self.search_input, str) and self.search_input.strip() else None\n search_filter = (\n {k: v for k, v in self.search_filter.items() if k and v and k.strip()} if self.search_filter else None\n )\n\n if query:\n args = {\n \"query\": query,\n \"search_type\": self._map_search_type(),\n \"k\": self.number_of_results,\n \"score_threshold\": self.search_score_threshold,\n }\n elif self.advanced_search_filter or search_filter:\n args = {\n \"n\": self.number_of_results,\n }\n else:\n return {}\n\n filter_arg = self.advanced_search_filter or {}\n\n if search_filter:\n self.log(self.log(f\"`search_filter` is deprecated. Use `advanced_search_filter`. Cleaned: {search_filter}\"))\n filter_arg.update(search_filter)\n\n if filter_arg:\n args[\"filter\"] = filter_arg\n\n return args\n\n def search_documents(self, vector_store=None) -> list[Data]:\n vector_store = vector_store or self.build_vector_store()\n\n self.log(f\"Search input: {self.search_input}\")\n self.log(f\"Search type: {self.search_type}\")\n self.log(f\"Number of results: {self.number_of_results}\")\n\n try:\n search_args = self._build_search_args()\n except Exception as e:\n msg = f\"Error in AstraDBVectorStore._build_search_args: {e}\"\n raise ValueError(msg) from e\n\n if not search_args:\n self.log(\"No search input or filters provided. Skipping search.\")\n return []\n\n docs = []\n search_method = \"search\" if \"query\" in search_args else \"metadata_search\"\n\n try:\n self.log(f\"Calling vector_store.{search_method} with args: {search_args}\")\n docs = getattr(vector_store, search_method)(**search_args)\n except Exception as e:\n msg = f\"Error performing {search_method} in AstraDBVectorStore: {e}\"\n raise ValueError(msg) from e\n\n self.log(f\"Retrieved documents: {len(docs)}\")\n\n data = docs_to_data(docs)\n self.log(f\"Converted documents to data: {len(data)}\")\n self.status = data\n return data\n\n def get_retriever_kwargs(self):\n search_args = self._build_search_args()\n return {\n \"search_type\": self._map_search_type(),\n \"search_kwargs\": search_args,\n }\n" }, "collection_indexing_policy": { "_input_type": "StrInput", @@ -1781,6 +1909,25 @@ "type": "str", "value": "" }, + "embedding": { + "_input_type": "HandleInput", + "advanced": false, + "display_name": "Embedding Model", + "dynamic": false, + "info": "Allows an embedding model configuration.", + "input_types": [ + "Embeddings" + ], + "list": false, + "name": "embedding", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "trace_as_metadata": true, + "type": "other", + "value": "" + }, "embedding_choice": { "_input_type": "DropdownInput", "advanced": false, @@ -1789,7 +1936,10 @@ "dynamic": false, "info": "Determines whether to use Astra Vectorize for the collection.", "name": "embedding_choice", - "options": ["Embedding Model", "Astra Vectorize"], + "options": [ + "Embedding Model", + "Astra Vectorize" + ], "placeholder": "", "real_time_refresh": true, "required": false, @@ -1823,7 +1973,9 @@ "display_name": "Ingest Data", "dynamic": false, "info": "", - "input_types": ["Data"], + "input_types": [ + "Data" + ], "list": true, "name": "ingest_data", "placeholder": "", @@ -1836,6 +1988,23 @@ "type": "other", "value": "" }, + "keyspace": { + "_input_type": "StrInput", + "advanced": true, + "display_name": "Keyspace", + "dynamic": false, + "info": "Optional keyspace within Astra DB to use for the collection.", + "list": false, + "load_from_db": false, + "name": "keyspace", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "trace_as_metadata": true, + "type": "str", + "value": "" + }, "metadata_indexing_exclude": { "_input_type": "StrInput", "advanced": true, @@ -1878,7 +2047,11 @@ "dynamic": false, "info": "Optional distance metric for vector comparisons in the vector store.", "name": "metric", - "options": ["cosine", "dot_product", "euclidean"], + "options": [ + "cosine", + "dot_product", + "euclidean" + ], "placeholder": "", "required": false, "show": true, @@ -1888,23 +2061,6 @@ "type": "str", "value": "cosine" }, - "namespace": { - "_input_type": "StrInput", - "advanced": true, - "display_name": "Namespace", - "dynamic": false, - "info": "Optional namespace within Astra DB to use for the collection.", - "list": false, - "load_from_db": false, - "name": "namespace", - "placeholder": "", - "required": false, - "show": true, - "title_case": false, - "trace_as_metadata": true, - "type": "str", - "value": "" - }, "number_of_results": { "_input_type": "IntInput", "advanced": true, @@ -1940,9 +2096,9 @@ "search_filter": { "_input_type": "DictInput", "advanced": true, - "display_name": "Search Metadata Filter", + "display_name": "[DEPRECATED] Search Metadata Filter", "dynamic": false, - "info": "Optional dictionary of filters to apply to the search query.", + "info": "Deprecated: use advanced_search_filter. Optional dictionary of filters to apply to the search query.", "list": true, "name": "search_filter", "placeholder": "", @@ -1959,7 +2115,9 @@ "display_name": "Search Input", "dynamic": false, "info": "", - "input_types": ["Message"], + "input_types": [ + "Message" + ], "list": false, "load_from_db": false, "multiline": true, @@ -2020,7 +2178,10 @@ "dynamic": false, "info": "Configuration mode for setting up the vector store, with options like 'Sync' or 'Off'.", "name": "setup_mode", - "options": ["Sync", "Off"], + "options": [ + "Sync", + "Off" + ], "placeholder": "", "required": false, "show": true, @@ -2036,7 +2197,9 @@ "display_name": "Astra DB Application Token", "dynamic": false, "info": "Authentication token for accessing Astra DB.", - "input_types": ["Message"], + "input_types": [ + "Message" + ], "load_from_db": true, "name": "token", "password": true, @@ -2071,7 +2234,9 @@ "data": { "id": "OpenAIEmbeddings-CeoV9", "node": { - "base_classes": ["Embeddings"], + "base_classes": [ + "Embeddings" + ], "beta": false, "conditional_paths": [], "custom_fields": {}, @@ -2116,7 +2281,9 @@ "name": "embeddings", "required_inputs": [], "selected": "Embeddings", - "types": ["Embeddings"], + "types": [ + "Embeddings" + ], "value": "__UNDEFINED__" } ], @@ -2145,7 +2312,9 @@ "display_name": "Client", "dynamic": false, "info": "", - "input_types": ["Message"], + "input_types": [ + "Message" + ], "list": false, "load_from_db": false, "name": "client", @@ -2215,7 +2384,9 @@ "display_name": "Deployment", "dynamic": false, "info": "", - "input_types": ["Message"], + "input_types": [ + "Message" + ], "list": false, "load_from_db": false, "name": "deployment", @@ -2321,7 +2492,9 @@ "display_name": "OpenAI API Base", "dynamic": false, "info": "", - "input_types": ["Message"], + "input_types": [ + "Message" + ], "list": false, "load_from_db": false, "name": "openai_api_base", @@ -2341,7 +2514,9 @@ "display_name": "OpenAI API Key", "dynamic": false, "info": "", - "input_types": ["Message"], + "input_types": [ + "Message" + ], "load_from_db": true, "name": "openai_api_key", "password": true, @@ -2358,7 +2533,9 @@ "display_name": "OpenAI API Type", "dynamic": false, "info": "", - "input_types": ["Message"], + "input_types": [ + "Message" + ], "list": false, "load_from_db": false, "name": "openai_api_type", @@ -2378,7 +2555,9 @@ "display_name": "OpenAI API Version", "dynamic": false, "info": "", - "input_types": ["Message"], + "input_types": [ + "Message" + ], "list": false, "load_from_db": false, "name": "openai_api_version", @@ -2398,7 +2577,9 @@ "display_name": "OpenAI Organization", "dynamic": false, "info": "", - "input_types": ["Message"], + "input_types": [ + "Message" + ], "list": false, "load_from_db": false, "name": "openai_organization", @@ -2418,7 +2599,9 @@ "display_name": "OpenAI Proxy", "dynamic": false, "info": "", - "input_types": ["Message"], + "input_types": [ + "Message" + ], "list": false, "load_from_db": false, "name": "openai_proxy", @@ -2502,7 +2685,9 @@ "display_name": "TikToken Model Name", "dynamic": false, "info": "", - "input_types": ["Message"], + "input_types": [ + "Message" + ], "list": false, "load_from_db": false, "name": "tiktoken_model_name", @@ -2540,7 +2725,10 @@ "data": { "id": "AstraDB-laybz", "node": { - "base_classes": ["Data", "Retriever"], + "base_classes": [ + "Data", + "Retriever" + ], "beta": false, "conditional_paths": [], "custom_fields": {}, @@ -2586,7 +2774,9 @@ "name": "base_retriever", "required_inputs": [], "selected": "Retriever", - "types": ["Retriever"], + "types": [ + "Retriever" + ], "value": "__UNDEFINED__" }, { @@ -2594,22 +2784,47 @@ "display_name": "Search Results", "method": "search_documents", "name": "search_results", - "required_inputs": ["api_endpoint", "collection_name", "token"], + "required_inputs": [ + "api_endpoint", + "collection_name", + "token" + ], "selected": "Data", - "types": ["Data"], + "types": [ + "Data" + ], "value": "__UNDEFINED__" } ], "pinned": false, "template": { "_type": "Component", + "advanced_search_filter": { + "_input_type": "NestedDictInput", + "advanced": true, + "display_name": "Search Metadata Filter", + "dynamic": false, + "info": "Optional dictionary of filters to apply to the search query.", + "list": false, + "name": "advanced_search_filter", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "trace_as_input": true, + "trace_as_metadata": true, + "type": "NestedDict", + "value": {} + }, "api_endpoint": { "_input_type": "SecretStrInput", "advanced": false, "display_name": "API Endpoint", "dynamic": false, "info": "API endpoint URL for the Astra DB service.", - "input_types": ["Message"], + "input_types": [ + "Message" + ], "load_from_db": true, "name": "api_endpoint", "password": true, @@ -2700,7 +2915,7 @@ "show": true, "title_case": false, "type": "code", - "value": "import os\n\nimport orjson\nfrom astrapy.admin import parse_api_endpoint\n\nfrom langflow.base.vectorstores.model import LCVectorStoreComponent, check_cached_vector_store\nfrom langflow.helpers import docs_to_data\nfrom langflow.inputs import DictInput, FloatInput, MessageTextInput\nfrom langflow.io import (\n BoolInput,\n DataInput,\n DropdownInput,\n HandleInput,\n IntInput,\n MultilineInput,\n SecretStrInput,\n StrInput,\n)\nfrom langflow.schema import Data\n\n\nclass AstraVectorStoreComponent(LCVectorStoreComponent):\n display_name: str = \"Astra DB\"\n description: str = \"Implementation of Vector Store using Astra DB with search capabilities\"\n documentation: str = \"https://docs.langflow.org/starter-projects-vector-store-rag\"\n name = \"AstraDB\"\n icon: str = \"AstraDB\"\n\n VECTORIZE_PROVIDERS_MAPPING = {\n \"Azure OpenAI\": [\"azureOpenAI\", [\"text-embedding-3-small\", \"text-embedding-3-large\", \"text-embedding-ada-002\"]],\n \"Hugging Face - Dedicated\": [\"huggingfaceDedicated\", [\"endpoint-defined-model\"]],\n \"Hugging Face - Serverless\": [\n \"huggingface\",\n [\n \"sentence-transformers/all-MiniLM-L6-v2\",\n \"intfloat/multilingual-e5-large\",\n \"intfloat/multilingual-e5-large-instruct\",\n \"BAAI/bge-small-en-v1.5\",\n \"BAAI/bge-base-en-v1.5\",\n \"BAAI/bge-large-en-v1.5\",\n ],\n ],\n \"Jina AI\": [\n \"jinaAI\",\n [\n \"jina-embeddings-v2-base-en\",\n \"jina-embeddings-v2-base-de\",\n \"jina-embeddings-v2-base-es\",\n \"jina-embeddings-v2-base-code\",\n \"jina-embeddings-v2-base-zh\",\n ],\n ],\n \"Mistral AI\": [\"mistral\", [\"mistral-embed\"]],\n \"NVIDIA\": [\"nvidia\", [\"NV-Embed-QA\"]],\n \"OpenAI\": [\"openai\", [\"text-embedding-3-small\", \"text-embedding-3-large\", \"text-embedding-ada-002\"]],\n \"Upstage\": [\"upstageAI\", [\"solar-embedding-1-large\"]],\n \"Voyage AI\": [\n \"voyageAI\",\n [\"voyage-large-2-instruct\", \"voyage-law-2\", \"voyage-code-2\", \"voyage-large-2\", \"voyage-2\"],\n ],\n }\n\n inputs = [\n SecretStrInput(\n name=\"token\",\n display_name=\"Astra DB Application Token\",\n info=\"Authentication token for accessing Astra DB.\",\n value=\"ASTRA_DB_APPLICATION_TOKEN\",\n required=True,\n advanced=os.getenv(\"ASTRA_ENHANCED\", \"false\").lower() == \"true\",\n ),\n SecretStrInput(\n name=\"api_endpoint\",\n display_name=\"Database\" if os.getenv(\"ASTRA_ENHANCED\", \"false\").lower() == \"true\" else \"API Endpoint\",\n info=\"API endpoint URL for the Astra DB service.\",\n value=\"ASTRA_DB_API_ENDPOINT\",\n required=True,\n ),\n StrInput(\n name=\"collection_name\",\n display_name=\"Collection Name\",\n info=\"The name of the collection within Astra DB where the vectors will be stored.\",\n required=True,\n ),\n MultilineInput(\n name=\"search_input\",\n display_name=\"Search Input\",\n ),\n DataInput(\n name=\"ingest_data\",\n display_name=\"Ingest Data\",\n is_list=True,\n ),\n StrInput(\n name=\"namespace\",\n display_name=\"Namespace\",\n info=\"Optional namespace within Astra DB to use for the collection.\",\n advanced=True,\n ),\n DropdownInput(\n name=\"embedding_choice\",\n display_name=\"Embedding Model or Astra Vectorize\",\n info=\"Determines whether to use Astra Vectorize for the collection.\",\n options=[\"Embedding Model\", \"Astra Vectorize\"],\n real_time_refresh=True,\n value=\"Embedding Model\",\n ),\n HandleInput(\n name=\"embedding\",\n display_name=\"Embedding Model\",\n input_types=[\"Embeddings\"],\n info=\"Allows an embedding model configuration.\",\n ),\n DropdownInput(\n name=\"metric\",\n display_name=\"Metric\",\n info=\"Optional distance metric for vector comparisons in the vector store.\",\n options=[\"cosine\", \"dot_product\", \"euclidean\"],\n value=\"cosine\",\n advanced=True,\n ),\n IntInput(\n name=\"batch_size\",\n display_name=\"Batch Size\",\n info=\"Optional number of data to process in a single batch.\",\n advanced=True,\n ),\n IntInput(\n name=\"bulk_insert_batch_concurrency\",\n display_name=\"Bulk Insert Batch Concurrency\",\n info=\"Optional concurrency level for bulk insert operations.\",\n advanced=True,\n ),\n IntInput(\n name=\"bulk_insert_overwrite_concurrency\",\n display_name=\"Bulk Insert Overwrite Concurrency\",\n info=\"Optional concurrency level for bulk insert operations that overwrite existing data.\",\n advanced=True,\n ),\n IntInput(\n name=\"bulk_delete_concurrency\",\n display_name=\"Bulk Delete Concurrency\",\n info=\"Optional concurrency level for bulk delete operations.\",\n advanced=True,\n ),\n DropdownInput(\n name=\"setup_mode\",\n display_name=\"Setup Mode\",\n info=\"Configuration mode for setting up the vector store, with options like 'Sync' or 'Off'.\",\n options=[\"Sync\", \"Off\"],\n advanced=True,\n value=\"Sync\",\n ),\n BoolInput(\n name=\"pre_delete_collection\",\n display_name=\"Pre Delete Collection\",\n info=\"Boolean flag to determine whether to delete the collection before creating a new one.\",\n advanced=True,\n ),\n StrInput(\n name=\"metadata_indexing_include\",\n display_name=\"Metadata Indexing Include\",\n info=\"Optional list of metadata fields to include in the indexing.\",\n is_list=True,\n advanced=True,\n ),\n StrInput(\n name=\"metadata_indexing_exclude\",\n display_name=\"Metadata Indexing Exclude\",\n info=\"Optional list of metadata fields to exclude from the indexing.\",\n is_list=True,\n advanced=True,\n ),\n StrInput(\n name=\"collection_indexing_policy\",\n display_name=\"Collection Indexing Policy\",\n info='Optional JSON string for the \"indexing\" field of the collection. '\n \"See https://docs.datastax.com/en/astra-db-serverless/api-reference/collections.html#the-indexing-option\",\n advanced=True,\n ),\n IntInput(\n name=\"number_of_results\",\n display_name=\"Number of Results\",\n info=\"Number of results to return.\",\n advanced=True,\n value=4,\n ),\n DropdownInput(\n name=\"search_type\",\n display_name=\"Search Type\",\n info=\"Search type to use\",\n options=[\"Similarity\", \"Similarity with score threshold\", \"MMR (Max Marginal Relevance)\"],\n value=\"Similarity\",\n advanced=True,\n ),\n FloatInput(\n name=\"search_score_threshold\",\n display_name=\"Search Score Threshold\",\n info=\"Minimum similarity score threshold for search results. \"\n \"(when using 'Similarity with score threshold')\",\n value=0,\n advanced=True,\n ),\n DictInput(\n name=\"search_filter\",\n display_name=\"Search Metadata Filter\",\n info=\"Optional dictionary of filters to apply to the search query.\",\n advanced=True,\n is_list=True,\n ),\n ]\n\n def del_fields(self, build_config, field_list):\n for field in field_list:\n if field in build_config:\n del build_config[field]\n\n return build_config\n\n def insert_in_dict(self, build_config, field_name, new_parameters):\n # Insert the new key-value pair after the found key\n for new_field_name, new_parameter in new_parameters.items():\n # Get all the items as a list of tuples (key, value)\n items = list(build_config.items())\n\n # Find the index of the key to insert after\n idx = len(items)\n for i, (key, _) in enumerate(items):\n if key == field_name:\n idx = i + 1\n break\n\n items.insert(idx, (new_field_name, new_parameter))\n\n # Clear the original dictionary and update with the modified items\n build_config.clear()\n build_config.update(items)\n\n return build_config\n\n def update_build_config(self, build_config: dict, field_value: str, field_name: str | None = None):\n if field_name == \"embedding_choice\":\n if field_value == \"Astra Vectorize\":\n self.del_fields(build_config, [\"embedding\"])\n\n new_parameter = DropdownInput(\n name=\"embedding_provider\",\n display_name=\"Embedding Provider\",\n options=self.VECTORIZE_PROVIDERS_MAPPING.keys(),\n value=\"\",\n required=True,\n real_time_refresh=True,\n ).to_dict()\n\n self.insert_in_dict(build_config, \"embedding_choice\", {\"embedding_provider\": new_parameter})\n else:\n self.del_fields(\n build_config,\n [\n \"embedding_provider\",\n \"model\",\n \"z_01_model_parameters\",\n \"z_02_api_key_name\",\n \"z_03_provider_api_key\",\n \"z_04_authentication\",\n ],\n )\n\n new_parameter = HandleInput(\n name=\"embedding\",\n display_name=\"Embedding Model\",\n input_types=[\"Embeddings\"],\n info=\"Allows an embedding model configuration.\",\n ).to_dict()\n\n self.insert_in_dict(build_config, \"embedding_choice\", {\"embedding\": new_parameter})\n\n elif field_name == \"embedding_provider\":\n self.del_fields(\n build_config,\n [\"model\", \"z_01_model_parameters\", \"z_02_api_key_name\", \"z_03_provider_api_key\", \"z_04_authentication\"],\n )\n\n model_options = self.VECTORIZE_PROVIDERS_MAPPING[field_value][1]\n\n new_parameter = DropdownInput(\n name=\"model\",\n display_name=\"Model\",\n info=\"The embedding model to use for the selected provider. Each provider has a different set of \"\n \"models available (full list at \"\n \"https://docs.datastax.com/en/astra-db-serverless/databases/embedding-generation.html):\\n\\n\"\n f\"{', '.join(model_options)}\",\n options=model_options,\n value=None,\n required=True,\n real_time_refresh=True,\n ).to_dict()\n\n self.insert_in_dict(build_config, \"embedding_provider\", {\"model\": new_parameter})\n\n elif field_name == \"model\":\n self.del_fields(\n build_config,\n [\"z_01_model_parameters\", \"z_02_api_key_name\", \"z_03_provider_api_key\", \"z_04_authentication\"],\n )\n\n new_parameter_1 = DictInput(\n name=\"z_01_model_parameters\",\n display_name=\"Model Parameters\",\n is_list=True,\n ).to_dict()\n\n new_parameter_2 = MessageTextInput(\n name=\"z_02_api_key_name\",\n display_name=\"API Key Name\",\n info=\"The name of the embeddings provider API key stored on Astra. \"\n \"If set, it will override the 'ProviderKey' in the authentication parameters.\",\n ).to_dict()\n\n new_parameter_3 = SecretStrInput(\n load_from_db=False,\n name=\"z_03_provider_api_key\",\n display_name=\"Provider API Key\",\n info=\"An alternative to the Astra Authentication that passes an API key for the provider \"\n \"with each request to Astra DB. \"\n \"This may be used when Vectorize is configured for the collection, \"\n \"but no corresponding provider secret is stored within Astra's key management system.\",\n ).to_dict()\n\n new_parameter_4 = DictInput(\n name=\"z_04_authentication\",\n display_name=\"Authentication Parameters\",\n is_list=True,\n ).to_dict()\n\n self.insert_in_dict(\n build_config,\n \"model\",\n {\n \"z_01_model_parameters\": new_parameter_1,\n \"z_02_api_key_name\": new_parameter_2,\n \"z_03_provider_api_key\": new_parameter_3,\n \"z_04_authentication\": new_parameter_4,\n },\n )\n\n return build_config\n\n def build_vectorize_options(self, **kwargs):\n for attribute in [\n \"embedding_provider\",\n \"model\",\n \"z_01_model_parameters\",\n \"z_02_api_key_name\",\n \"z_03_provider_api_key\",\n \"z_04_authentication\",\n ]:\n if not hasattr(self, attribute):\n setattr(self, attribute, None)\n\n # Fetch values from kwargs if any self.* attributes are None\n provider_value = self.VECTORIZE_PROVIDERS_MAPPING.get(self.embedding_provider, [None])[0] or kwargs.get(\n \"embedding_provider\"\n )\n model_name = self.model or kwargs.get(\"model\")\n authentication = {**(self.z_04_authentication or kwargs.get(\"z_04_authentication\", {}))}\n parameters = self.z_01_model_parameters or kwargs.get(\"z_01_model_parameters\", {})\n\n # Set the API key name if provided\n api_key_name = self.z_02_api_key_name or kwargs.get(\"z_02_api_key_name\")\n provider_key = self.z_03_provider_api_key or kwargs.get(\"z_03_provider_api_key\")\n if api_key_name:\n authentication[\"providerKey\"] = api_key_name\n\n # Set authentication and parameters to None if no values are provided\n if not authentication:\n authentication = None\n if not parameters:\n parameters = None\n\n return {\n # must match astrapy.info.CollectionVectorServiceOptions\n \"collection_vector_service_options\": {\n \"provider\": provider_value,\n \"modelName\": model_name,\n \"authentication\": authentication,\n \"parameters\": parameters,\n },\n \"collection_embedding_api_key\": provider_key,\n }\n\n @check_cached_vector_store\n def build_vector_store(self, vectorize_options=None):\n try:\n from langchain_astradb import AstraDBVectorStore\n from langchain_astradb.utils.astradb import SetupMode\n except ImportError as e:\n msg = (\n \"Could not import langchain Astra DB integration package. \"\n \"Please install it with `pip install langchain-astradb`.\"\n )\n raise ImportError(msg) from e\n\n try:\n if not self.setup_mode:\n self.setup_mode = self._inputs[\"setup_mode\"].options[0]\n\n setup_mode_value = SetupMode[self.setup_mode.upper()]\n except KeyError as e:\n msg = f\"Invalid setup mode: {self.setup_mode}\"\n raise ValueError(msg) from e\n\n if self.embedding_choice == \"Embedding Model\":\n embedding_dict = {\"embedding\": self.embedding}\n else:\n from astrapy.info import CollectionVectorServiceOptions\n\n # Fetch values from kwargs if any self.* attributes are None\n dict_options = vectorize_options or self.build_vectorize_options()\n\n # Set the embedding dictionary\n embedding_dict = {\n \"collection_vector_service_options\": CollectionVectorServiceOptions.from_dict(\n dict_options.get(\"collection_vector_service_options\")\n ),\n \"collection_embedding_api_key\": dict_options.get(\"collection_embedding_api_key\"),\n }\n\n try:\n vector_store = AstraDBVectorStore(\n collection_name=self.collection_name,\n token=self.token,\n api_endpoint=self.api_endpoint,\n namespace=self.namespace or None,\n environment=parse_api_endpoint(self.api_endpoint).environment if self.api_endpoint else None,\n metric=self.metric or None,\n batch_size=self.batch_size or None,\n bulk_insert_batch_concurrency=self.bulk_insert_batch_concurrency or None,\n bulk_insert_overwrite_concurrency=self.bulk_insert_overwrite_concurrency or None,\n bulk_delete_concurrency=self.bulk_delete_concurrency or None,\n setup_mode=setup_mode_value,\n pre_delete_collection=self.pre_delete_collection,\n metadata_indexing_include=[s for s in self.metadata_indexing_include if s] or None,\n metadata_indexing_exclude=[s for s in self.metadata_indexing_exclude if s] or None,\n collection_indexing_policy=orjson.dumps(self.collection_indexing_policy)\n if self.collection_indexing_policy\n else None,\n **embedding_dict,\n )\n except Exception as e:\n msg = f\"Error initializing AstraDBVectorStore: {e}\"\n raise ValueError(msg) from e\n\n self._add_documents_to_vector_store(vector_store)\n\n return vector_store\n\n def _add_documents_to_vector_store(self, vector_store) -> None:\n documents = []\n for _input in self.ingest_data or []:\n if isinstance(_input, Data):\n documents.append(_input.to_lc_document())\n else:\n msg = \"Vector Store Inputs must be Data objects.\"\n raise TypeError(msg)\n\n if documents:\n self.log(f\"Adding {len(documents)} documents to the Vector Store.\")\n try:\n vector_store.add_documents(documents)\n except Exception as e:\n msg = f\"Error adding documents to AstraDBVectorStore: {e}\"\n raise ValueError(msg) from e\n else:\n self.log(\"No documents to add to the Vector Store.\")\n\n def _map_search_type(self) -> str:\n if self.search_type == \"Similarity with score threshold\":\n return \"similarity_score_threshold\"\n if self.search_type == \"MMR (Max Marginal Relevance)\":\n return \"mmr\"\n return \"similarity\"\n\n def _build_search_args(self):\n args = {\n \"k\": self.number_of_results,\n \"score_threshold\": self.search_score_threshold,\n }\n\n if self.search_filter:\n clean_filter = {k: v for k, v in self.search_filter.items() if k and v}\n if len(clean_filter) > 0:\n args[\"filter\"] = clean_filter\n return args\n\n def search_documents(self, vector_store=None) -> list[Data]:\n if not vector_store:\n vector_store = self.build_vector_store()\n\n self.log(f\"Search input: {self.search_input}\")\n self.log(f\"Search type: {self.search_type}\")\n self.log(f\"Number of results: {self.number_of_results}\")\n\n if self.search_input and isinstance(self.search_input, str) and self.search_input.strip():\n try:\n search_type = self._map_search_type()\n search_args = self._build_search_args()\n\n docs = vector_store.search(query=self.search_input, search_type=search_type, **search_args)\n except Exception as e:\n msg = f\"Error performing search in AstraDBVectorStore: {e}\"\n raise ValueError(msg) from e\n\n self.log(f\"Retrieved documents: {len(docs)}\")\n\n data = docs_to_data(docs)\n self.log(f\"Converted documents to data: {len(data)}\")\n self.status = data\n return data\n self.log(\"No search input provided. Skipping search.\")\n return []\n\n def get_retriever_kwargs(self):\n search_args = self._build_search_args()\n return {\n \"search_type\": self._map_search_type(),\n \"search_kwargs\": search_args,\n }\n" + "value": "import os\n\nimport orjson\nfrom astrapy.admin import parse_api_endpoint\nfrom langchain_astradb import AstraDBVectorStore\n\nfrom langflow.base.vectorstores.model import LCVectorStoreComponent, check_cached_vector_store\nfrom langflow.helpers import docs_to_data\nfrom langflow.inputs import DictInput, FloatInput, MessageTextInput, NestedDictInput\nfrom langflow.io import (\n BoolInput,\n DataInput,\n DropdownInput,\n HandleInput,\n IntInput,\n MultilineInput,\n SecretStrInput,\n StrInput,\n)\nfrom langflow.schema import Data\n\n\nclass AstraVectorStoreComponent(LCVectorStoreComponent):\n display_name: str = \"Astra DB\"\n description: str = \"Implementation of Vector Store using Astra DB with search capabilities\"\n documentation: str = \"https://docs.langflow.org/starter-projects-vector-store-rag\"\n name = \"AstraDB\"\n icon: str = \"AstraDB\"\n\n _cached_vector_store: AstraDBVectorStore | None = None\n\n VECTORIZE_PROVIDERS_MAPPING = {\n \"Azure OpenAI\": [\"azureOpenAI\", [\"text-embedding-3-small\", \"text-embedding-3-large\", \"text-embedding-ada-002\"]],\n \"Hugging Face - Dedicated\": [\"huggingfaceDedicated\", [\"endpoint-defined-model\"]],\n \"Hugging Face - Serverless\": [\n \"huggingface\",\n [\n \"sentence-transformers/all-MiniLM-L6-v2\",\n \"intfloat/multilingual-e5-large\",\n \"intfloat/multilingual-e5-large-instruct\",\n \"BAAI/bge-small-en-v1.5\",\n \"BAAI/bge-base-en-v1.5\",\n \"BAAI/bge-large-en-v1.5\",\n ],\n ],\n \"Jina AI\": [\n \"jinaAI\",\n [\n \"jina-embeddings-v2-base-en\",\n \"jina-embeddings-v2-base-de\",\n \"jina-embeddings-v2-base-es\",\n \"jina-embeddings-v2-base-code\",\n \"jina-embeddings-v2-base-zh\",\n ],\n ],\n \"Mistral AI\": [\"mistral\", [\"mistral-embed\"]],\n \"NVIDIA\": [\"nvidia\", [\"NV-Embed-QA\"]],\n \"OpenAI\": [\"openai\", [\"text-embedding-3-small\", \"text-embedding-3-large\", \"text-embedding-ada-002\"]],\n \"Upstage\": [\"upstageAI\", [\"solar-embedding-1-large\"]],\n \"Voyage AI\": [\n \"voyageAI\",\n [\"voyage-large-2-instruct\", \"voyage-law-2\", \"voyage-code-2\", \"voyage-large-2\", \"voyage-2\"],\n ],\n }\n\n inputs = [\n SecretStrInput(\n name=\"token\",\n display_name=\"Astra DB Application Token\",\n info=\"Authentication token for accessing Astra DB.\",\n value=\"ASTRA_DB_APPLICATION_TOKEN\",\n required=True,\n advanced=os.getenv(\"ASTRA_ENHANCED\", \"false\").lower() == \"true\",\n ),\n SecretStrInput(\n name=\"api_endpoint\",\n display_name=\"Database\" if os.getenv(\"ASTRA_ENHANCED\", \"false\").lower() == \"true\" else \"API Endpoint\",\n info=\"API endpoint URL for the Astra DB service.\",\n value=\"ASTRA_DB_API_ENDPOINT\",\n required=True,\n ),\n StrInput(\n name=\"collection_name\",\n display_name=\"Collection Name\",\n info=\"The name of the collection within Astra DB where the vectors will be stored.\",\n required=True,\n ),\n MultilineInput(\n name=\"search_input\",\n display_name=\"Search Input\",\n ),\n DataInput(\n name=\"ingest_data\",\n display_name=\"Ingest Data\",\n is_list=True,\n ),\n StrInput(\n name=\"keyspace\",\n display_name=\"Keyspace\",\n info=\"Optional keyspace within Astra DB to use for the collection.\",\n advanced=True,\n ),\n DropdownInput(\n name=\"embedding_choice\",\n display_name=\"Embedding Model or Astra Vectorize\",\n info=\"Determines whether to use Astra Vectorize for the collection.\",\n options=[\"Embedding Model\", \"Astra Vectorize\"],\n real_time_refresh=True,\n value=\"Embedding Model\",\n ),\n HandleInput(\n name=\"embedding\",\n display_name=\"Embedding Model\",\n input_types=[\"Embeddings\"],\n info=\"Allows an embedding model configuration.\",\n ),\n DropdownInput(\n name=\"metric\",\n display_name=\"Metric\",\n info=\"Optional distance metric for vector comparisons in the vector store.\",\n options=[\"cosine\", \"dot_product\", \"euclidean\"],\n value=\"cosine\",\n advanced=True,\n ),\n IntInput(\n name=\"batch_size\",\n display_name=\"Batch Size\",\n info=\"Optional number of data to process in a single batch.\",\n advanced=True,\n ),\n IntInput(\n name=\"bulk_insert_batch_concurrency\",\n display_name=\"Bulk Insert Batch Concurrency\",\n info=\"Optional concurrency level for bulk insert operations.\",\n advanced=True,\n ),\n IntInput(\n name=\"bulk_insert_overwrite_concurrency\",\n display_name=\"Bulk Insert Overwrite Concurrency\",\n info=\"Optional concurrency level for bulk insert operations that overwrite existing data.\",\n advanced=True,\n ),\n IntInput(\n name=\"bulk_delete_concurrency\",\n display_name=\"Bulk Delete Concurrency\",\n info=\"Optional concurrency level for bulk delete operations.\",\n advanced=True,\n ),\n DropdownInput(\n name=\"setup_mode\",\n display_name=\"Setup Mode\",\n info=\"Configuration mode for setting up the vector store, with options like 'Sync' or 'Off'.\",\n options=[\"Sync\", \"Off\"],\n advanced=True,\n value=\"Sync\",\n ),\n BoolInput(\n name=\"pre_delete_collection\",\n display_name=\"Pre Delete Collection\",\n info=\"Boolean flag to determine whether to delete the collection before creating a new one.\",\n advanced=True,\n ),\n StrInput(\n name=\"metadata_indexing_include\",\n display_name=\"Metadata Indexing Include\",\n info=\"Optional list of metadata fields to include in the indexing.\",\n is_list=True,\n advanced=True,\n ),\n StrInput(\n name=\"metadata_indexing_exclude\",\n display_name=\"Metadata Indexing Exclude\",\n info=\"Optional list of metadata fields to exclude from the indexing.\",\n is_list=True,\n advanced=True,\n ),\n StrInput(\n name=\"collection_indexing_policy\",\n display_name=\"Collection Indexing Policy\",\n info='Optional JSON string for the \"indexing\" field of the collection. '\n \"See https://docs.datastax.com/en/astra-db-serverless/api-reference/collections.html#the-indexing-option\",\n advanced=True,\n ),\n IntInput(\n name=\"number_of_results\",\n display_name=\"Number of Results\",\n info=\"Number of results to return.\",\n advanced=True,\n value=4,\n ),\n DropdownInput(\n name=\"search_type\",\n display_name=\"Search Type\",\n info=\"Search type to use\",\n options=[\"Similarity\", \"Similarity with score threshold\", \"MMR (Max Marginal Relevance)\"],\n value=\"Similarity\",\n advanced=True,\n ),\n FloatInput(\n name=\"search_score_threshold\",\n display_name=\"Search Score Threshold\",\n info=\"Minimum similarity score threshold for search results. \"\n \"(when using 'Similarity with score threshold')\",\n value=0,\n advanced=True,\n ),\n NestedDictInput(\n name=\"advanced_search_filter\",\n display_name=\"Search Metadata Filter\",\n info=\"Optional dictionary of filters to apply to the search query.\",\n advanced=True,\n ),\n DictInput(\n name=\"search_filter\",\n display_name=\"[DEPRECATED] Search Metadata Filter\",\n info=\"Deprecated: use advanced_search_filter. Optional dictionary of filters to apply to the search query.\",\n advanced=True,\n is_list=True,\n ),\n ]\n\n def del_fields(self, build_config, field_list):\n for field in field_list:\n if field in build_config:\n del build_config[field]\n\n return build_config\n\n def insert_in_dict(self, build_config, field_name, new_parameters):\n # Insert the new key-value pair after the found key\n for new_field_name, new_parameter in new_parameters.items():\n # Get all the items as a list of tuples (key, value)\n items = list(build_config.items())\n\n # Find the index of the key to insert after\n idx = len(items)\n for i, (key, _) in enumerate(items):\n if key == field_name:\n idx = i + 1\n break\n\n items.insert(idx, (new_field_name, new_parameter))\n\n # Clear the original dictionary and update with the modified items\n build_config.clear()\n build_config.update(items)\n\n return build_config\n\n def update_build_config(self, build_config: dict, field_value: str, field_name: str | None = None):\n if field_name == \"embedding_choice\":\n if field_value == \"Astra Vectorize\":\n self.del_fields(build_config, [\"embedding\"])\n\n new_parameter = DropdownInput(\n name=\"embedding_provider\",\n display_name=\"Embedding Provider\",\n options=self.VECTORIZE_PROVIDERS_MAPPING.keys(),\n value=\"\",\n required=True,\n real_time_refresh=True,\n ).to_dict()\n\n self.insert_in_dict(build_config, \"embedding_choice\", {\"embedding_provider\": new_parameter})\n else:\n self.del_fields(\n build_config,\n [\n \"embedding_provider\",\n \"model\",\n \"z_01_model_parameters\",\n \"z_02_api_key_name\",\n \"z_03_provider_api_key\",\n \"z_04_authentication\",\n ],\n )\n\n new_parameter = HandleInput(\n name=\"embedding\",\n display_name=\"Embedding Model\",\n input_types=[\"Embeddings\"],\n info=\"Allows an embedding model configuration.\",\n ).to_dict()\n\n self.insert_in_dict(build_config, \"embedding_choice\", {\"embedding\": new_parameter})\n\n elif field_name == \"embedding_provider\":\n self.del_fields(\n build_config,\n [\"model\", \"z_01_model_parameters\", \"z_02_api_key_name\", \"z_03_provider_api_key\", \"z_04_authentication\"],\n )\n\n model_options = self.VECTORIZE_PROVIDERS_MAPPING[field_value][1]\n\n new_parameter = DropdownInput(\n name=\"model\",\n display_name=\"Model\",\n info=\"The embedding model to use for the selected provider. Each provider has a different set of \"\n \"models available (full list at \"\n \"https://docs.datastax.com/en/astra-db-serverless/databases/embedding-generation.html):\\n\\n\"\n f\"{', '.join(model_options)}\",\n options=model_options,\n value=None,\n required=True,\n real_time_refresh=True,\n ).to_dict()\n\n self.insert_in_dict(build_config, \"embedding_provider\", {\"model\": new_parameter})\n\n elif field_name == \"model\":\n self.del_fields(\n build_config,\n [\"z_01_model_parameters\", \"z_02_api_key_name\", \"z_03_provider_api_key\", \"z_04_authentication\"],\n )\n\n new_parameter_1 = DictInput(\n name=\"z_01_model_parameters\",\n display_name=\"Model Parameters\",\n is_list=True,\n ).to_dict()\n\n new_parameter_2 = MessageTextInput(\n name=\"z_02_api_key_name\",\n display_name=\"API Key Name\",\n info=\"The name of the embeddings provider API key stored on Astra. \"\n \"If set, it will override the 'ProviderKey' in the authentication parameters.\",\n ).to_dict()\n\n new_parameter_3 = SecretStrInput(\n load_from_db=False,\n name=\"z_03_provider_api_key\",\n display_name=\"Provider API Key\",\n info=\"An alternative to the Astra Authentication that passes an API key for the provider \"\n \"with each request to Astra DB. \"\n \"This may be used when Vectorize is configured for the collection, \"\n \"but no corresponding provider secret is stored within Astra's key management system.\",\n ).to_dict()\n\n new_parameter_4 = DictInput(\n name=\"z_04_authentication\",\n display_name=\"Authentication Parameters\",\n is_list=True,\n ).to_dict()\n\n self.insert_in_dict(\n build_config,\n \"model\",\n {\n \"z_01_model_parameters\": new_parameter_1,\n \"z_02_api_key_name\": new_parameter_2,\n \"z_03_provider_api_key\": new_parameter_3,\n \"z_04_authentication\": new_parameter_4,\n },\n )\n\n return build_config\n\n def build_vectorize_options(self, **kwargs):\n for attribute in [\n \"embedding_provider\",\n \"model\",\n \"z_01_model_parameters\",\n \"z_02_api_key_name\",\n \"z_03_provider_api_key\",\n \"z_04_authentication\",\n ]:\n if not hasattr(self, attribute):\n setattr(self, attribute, None)\n\n # Fetch values from kwargs if any self.* attributes are None\n provider_value = self.VECTORIZE_PROVIDERS_MAPPING.get(self.embedding_provider, [None])[0] or kwargs.get(\n \"embedding_provider\"\n )\n model_name = self.model or kwargs.get(\"model\")\n authentication = {**(self.z_04_authentication or kwargs.get(\"z_04_authentication\", {}))}\n parameters = self.z_01_model_parameters or kwargs.get(\"z_01_model_parameters\", {})\n\n # Set the API key name if provided\n api_key_name = self.z_02_api_key_name or kwargs.get(\"z_02_api_key_name\")\n provider_key = self.z_03_provider_api_key or kwargs.get(\"z_03_provider_api_key\")\n if api_key_name:\n authentication[\"providerKey\"] = api_key_name\n\n # Set authentication and parameters to None if no values are provided\n if not authentication:\n authentication = None\n if not parameters:\n parameters = None\n\n return {\n # must match astrapy.info.CollectionVectorServiceOptions\n \"collection_vector_service_options\": {\n \"provider\": provider_value,\n \"modelName\": model_name,\n \"authentication\": authentication,\n \"parameters\": parameters,\n },\n \"collection_embedding_api_key\": provider_key,\n }\n\n @check_cached_vector_store\n def build_vector_store(self, vectorize_options=None):\n try:\n from langchain_astradb import AstraDBVectorStore\n from langchain_astradb.utils.astradb import SetupMode\n except ImportError as e:\n msg = (\n \"Could not import langchain Astra DB integration package. \"\n \"Please install it with `pip install langchain-astradb`.\"\n )\n raise ImportError(msg) from e\n\n try:\n if not self.setup_mode:\n self.setup_mode = self._inputs[\"setup_mode\"].options[0]\n\n setup_mode_value = SetupMode[self.setup_mode.upper()]\n except KeyError as e:\n msg = f\"Invalid setup mode: {self.setup_mode}\"\n raise ValueError(msg) from e\n\n if self.embedding_choice == \"Embedding Model\":\n embedding_dict = {\"embedding\": self.embedding}\n else:\n from astrapy.info import CollectionVectorServiceOptions\n\n # Fetch values from kwargs if any self.* attributes are None\n dict_options = vectorize_options or self.build_vectorize_options()\n\n # Set the embedding dictionary\n embedding_dict = {\n \"collection_vector_service_options\": CollectionVectorServiceOptions.from_dict(\n dict_options.get(\"collection_vector_service_options\")\n ),\n \"collection_embedding_api_key\": dict_options.get(\"collection_embedding_api_key\"),\n }\n\n try:\n vector_store = AstraDBVectorStore(\n collection_name=self.collection_name,\n token=self.token,\n api_endpoint=self.api_endpoint,\n namespace=self.keyspace or None,\n environment=parse_api_endpoint(self.api_endpoint).environment if self.api_endpoint else None,\n metric=self.metric or None,\n batch_size=self.batch_size or None,\n bulk_insert_batch_concurrency=self.bulk_insert_batch_concurrency or None,\n bulk_insert_overwrite_concurrency=self.bulk_insert_overwrite_concurrency or None,\n bulk_delete_concurrency=self.bulk_delete_concurrency or None,\n setup_mode=setup_mode_value,\n pre_delete_collection=self.pre_delete_collection,\n metadata_indexing_include=[s for s in self.metadata_indexing_include if s] or None,\n metadata_indexing_exclude=[s for s in self.metadata_indexing_exclude if s] or None,\n collection_indexing_policy=orjson.dumps(self.collection_indexing_policy)\n if self.collection_indexing_policy\n else None,\n **embedding_dict,\n )\n except Exception as e:\n msg = f\"Error initializing AstraDBVectorStore: {e}\"\n raise ValueError(msg) from e\n\n self._add_documents_to_vector_store(vector_store)\n\n return vector_store\n\n def _add_documents_to_vector_store(self, vector_store) -> None:\n documents = []\n for _input in self.ingest_data or []:\n if isinstance(_input, Data):\n documents.append(_input.to_lc_document())\n else:\n msg = \"Vector Store Inputs must be Data objects.\"\n raise TypeError(msg)\n\n if documents:\n self.log(f\"Adding {len(documents)} documents to the Vector Store.\")\n try:\n vector_store.add_documents(documents)\n except Exception as e:\n msg = f\"Error adding documents to AstraDBVectorStore: {e}\"\n raise ValueError(msg) from e\n else:\n self.log(\"No documents to add to the Vector Store.\")\n\n def _map_search_type(self) -> str:\n if self.search_type == \"Similarity with score threshold\":\n return \"similarity_score_threshold\"\n if self.search_type == \"MMR (Max Marginal Relevance)\":\n return \"mmr\"\n return \"similarity\"\n\n def _build_search_args(self):\n query = self.search_input if isinstance(self.search_input, str) and self.search_input.strip() else None\n search_filter = (\n {k: v for k, v in self.search_filter.items() if k and v and k.strip()} if self.search_filter else None\n )\n\n if query:\n args = {\n \"query\": query,\n \"search_type\": self._map_search_type(),\n \"k\": self.number_of_results,\n \"score_threshold\": self.search_score_threshold,\n }\n elif self.advanced_search_filter or search_filter:\n args = {\n \"n\": self.number_of_results,\n }\n else:\n return {}\n\n filter_arg = self.advanced_search_filter or {}\n\n if search_filter:\n self.log(self.log(f\"`search_filter` is deprecated. Use `advanced_search_filter`. Cleaned: {search_filter}\"))\n filter_arg.update(search_filter)\n\n if filter_arg:\n args[\"filter\"] = filter_arg\n\n return args\n\n def search_documents(self, vector_store=None) -> list[Data]:\n vector_store = vector_store or self.build_vector_store()\n\n self.log(f\"Search input: {self.search_input}\")\n self.log(f\"Search type: {self.search_type}\")\n self.log(f\"Number of results: {self.number_of_results}\")\n\n try:\n search_args = self._build_search_args()\n except Exception as e:\n msg = f\"Error in AstraDBVectorStore._build_search_args: {e}\"\n raise ValueError(msg) from e\n\n if not search_args:\n self.log(\"No search input or filters provided. Skipping search.\")\n return []\n\n docs = []\n search_method = \"search\" if \"query\" in search_args else \"metadata_search\"\n\n try:\n self.log(f\"Calling vector_store.{search_method} with args: {search_args}\")\n docs = getattr(vector_store, search_method)(**search_args)\n except Exception as e:\n msg = f\"Error performing {search_method} in AstraDBVectorStore: {e}\"\n raise ValueError(msg) from e\n\n self.log(f\"Retrieved documents: {len(docs)}\")\n\n data = docs_to_data(docs)\n self.log(f\"Converted documents to data: {len(data)}\")\n self.status = data\n return data\n\n def get_retriever_kwargs(self):\n search_args = self._build_search_args()\n return {\n \"search_type\": self._map_search_type(),\n \"search_kwargs\": search_args,\n }\n" }, "collection_indexing_policy": { "_input_type": "StrInput", @@ -2736,6 +2951,25 @@ "type": "str", "value": "test" }, + "embedding": { + "_input_type": "HandleInput", + "advanced": false, + "display_name": "Embedding Model", + "dynamic": false, + "info": "Allows an embedding model configuration.", + "input_types": [ + "Embeddings" + ], + "list": false, + "name": "embedding", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "trace_as_metadata": true, + "type": "other", + "value": "" + }, "embedding_choice": { "_input_type": "DropdownInput", "advanced": false, @@ -2744,7 +2978,10 @@ "dynamic": false, "info": "Determines whether to use Astra Vectorize for the collection.", "name": "embedding_choice", - "options": ["Embedding Model", "Astra Vectorize"], + "options": [ + "Embedding Model", + "Astra Vectorize" + ], "placeholder": "", "real_time_refresh": true, "required": false, @@ -2778,7 +3015,9 @@ "display_name": "Ingest Data", "dynamic": false, "info": "", - "input_types": ["Data"], + "input_types": [ + "Data" + ], "list": true, "name": "ingest_data", "placeholder": "", @@ -2791,6 +3030,23 @@ "type": "other", "value": "" }, + "keyspace": { + "_input_type": "StrInput", + "advanced": true, + "display_name": "Keyspace", + "dynamic": false, + "info": "Optional keyspace within Astra DB to use for the collection.", + "list": false, + "load_from_db": false, + "name": "keyspace", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "trace_as_metadata": true, + "type": "str", + "value": "" + }, "metadata_indexing_exclude": { "_input_type": "StrInput", "advanced": true, @@ -2833,7 +3089,11 @@ "dynamic": false, "info": "Optional distance metric for vector comparisons in the vector store.", "name": "metric", - "options": ["cosine", "dot_product", "euclidean"], + "options": [ + "cosine", + "dot_product", + "euclidean" + ], "placeholder": "", "required": false, "show": true, @@ -2843,23 +3103,6 @@ "type": "str", "value": "cosine" }, - "namespace": { - "_input_type": "StrInput", - "advanced": true, - "display_name": "Namespace", - "dynamic": false, - "info": "Optional namespace within Astra DB to use for the collection.", - "list": false, - "load_from_db": false, - "name": "namespace", - "placeholder": "", - "required": false, - "show": true, - "title_case": false, - "trace_as_metadata": true, - "type": "str", - "value": "" - }, "number_of_results": { "_input_type": "IntInput", "advanced": true, @@ -2895,9 +3138,9 @@ "search_filter": { "_input_type": "DictInput", "advanced": true, - "display_name": "Search Metadata Filter", + "display_name": "[DEPRECATED] Search Metadata Filter", "dynamic": false, - "info": "Optional dictionary of filters to apply to the search query.", + "info": "Deprecated: use advanced_search_filter. Optional dictionary of filters to apply to the search query.", "list": true, "name": "search_filter", "placeholder": "", @@ -2914,7 +3157,9 @@ "display_name": "Search Input", "dynamic": false, "info": "", - "input_types": ["Message"], + "input_types": [ + "Message" + ], "list": false, "load_from_db": false, "multiline": true, @@ -2975,7 +3220,10 @@ "dynamic": false, "info": "Configuration mode for setting up the vector store, with options like 'Sync' or 'Off'.", "name": "setup_mode", - "options": ["Sync", "Off"], + "options": [ + "Sync", + "Off" + ], "placeholder": "", "required": false, "show": true, @@ -2991,7 +3239,9 @@ "display_name": "Astra DB Application Token", "dynamic": false, "info": "Authentication token for accessing Astra DB.", - "input_types": ["Message"], + "input_types": [ + "Message" + ], "load_from_db": true, "name": "token", "password": true, @@ -3022,11 +3272,43 @@ "type": "genericNode", "width": 320 }, + { + "data": { + "id": "note-igpjN", + "node": { + "description": "## 🐕 2. Retriever Flow\n\nThis flow answers your questions with contextual data retrieved from your vector database.\n\nOpen the **Playground** and ask, ```What is this document about?```\n", + "display_name": "", + "documentation": "", + "template": { + "backgroundColor": "neutral" + } + }, + "type": "note" + }, + "dragging": false, + "id": "note-igpjN", + "position": { + "x": 955.3277857006676, + "y": 1552.171191793604 + }, + "positionAbsolute": { + "x": 955.3277857006676, + "y": 1552.171191793604 + }, + "selected": false, + "type": "noteNode", + "style": { + "height": 50, + "width": 325 + } + }, { "data": { "id": "OpenAIEmbeddings-ANgku", "node": { - "base_classes": ["Embeddings"], + "base_classes": [ + "Embeddings" + ], "beta": false, "conditional_paths": [], "custom_fields": {}, @@ -3071,7 +3353,9 @@ "name": "embeddings", "required_inputs": [], "selected": "Embeddings", - "types": ["Embeddings"], + "types": [ + "Embeddings" + ], "value": "__UNDEFINED__" } ], @@ -3100,7 +3384,9 @@ "display_name": "Client", "dynamic": false, "info": "", - "input_types": ["Message"], + "input_types": [ + "Message" + ], "list": false, "load_from_db": false, "name": "client", @@ -3170,7 +3456,9 @@ "display_name": "Deployment", "dynamic": false, "info": "", - "input_types": ["Message"], + "input_types": [ + "Message" + ], "list": false, "load_from_db": false, "name": "deployment", @@ -3276,7 +3564,9 @@ "display_name": "OpenAI API Base", "dynamic": false, "info": "", - "input_types": ["Message"], + "input_types": [ + "Message" + ], "list": false, "load_from_db": false, "name": "openai_api_base", @@ -3296,7 +3586,9 @@ "display_name": "OpenAI API Key", "dynamic": false, "info": "", - "input_types": ["Message"], + "input_types": [ + "Message" + ], "load_from_db": true, "name": "openai_api_key", "password": true, @@ -3313,7 +3605,9 @@ "display_name": "OpenAI API Type", "dynamic": false, "info": "", - "input_types": ["Message"], + "input_types": [ + "Message" + ], "list": false, "load_from_db": false, "name": "openai_api_type", @@ -3333,7 +3627,9 @@ "display_name": "OpenAI API Version", "dynamic": false, "info": "", - "input_types": ["Message"], + "input_types": [ + "Message" + ], "list": false, "load_from_db": false, "name": "openai_api_version", @@ -3353,7 +3649,9 @@ "display_name": "OpenAI Organization", "dynamic": false, "info": "", - "input_types": ["Message"], + "input_types": [ + "Message" + ], "list": false, "load_from_db": false, "name": "openai_organization", @@ -3373,7 +3671,9 @@ "display_name": "OpenAI Proxy", "dynamic": false, "info": "", - "input_types": ["Message"], + "input_types": [ + "Message" + ], "list": false, "load_from_db": false, "name": "openai_proxy", @@ -3457,7 +3757,9 @@ "display_name": "TikToken Model Name", "dynamic": false, "info": "", - "input_types": ["Message"], + "input_types": [ + "Message" + ], "list": false, "load_from_db": false, "name": "tiktoken_model_name", @@ -3495,7 +3797,9 @@ "data": { "id": "File-FJIuH", "node": { - "base_classes": ["Data"], + "base_classes": [ + "Data" + ], "beta": false, "conditional_paths": [], "custom_fields": {}, @@ -3521,7 +3825,9 @@ "method": "load_file", "name": "data", "selected": "Data", - "types": ["Data"], + "types": [ + "Data" + ], "value": "__UNDEFINED__" } ], @@ -3651,6 +3957,90 @@ "selected": false, "type": "genericNode", "width": 320 + }, + { + "data": { + "id": "note-O3TZQ", + "node": { + "description": "### 💡 Add your OpenAI API key here 👇", + "display_name": "", + "documentation": "", + "template": { + "backgroundColor": "transparent" + } + }, + "type": "note" + }, + "dragging": false, + "height": 324, + "id": "note-O3TZQ", + "position": { + "x": 1692.2322233423606, + "y": 1821.9077961087607 + }, + "positionAbsolute": { + "x": 1692.2322233423606, + "y": 1821.9077961087607 + }, + "selected": false, + "type": "noteNode", + "width": 324 + }, + { + "data": { + "id": "note-M7loH", + "node": { + "description": "### 💡 Add your OpenAI API key here 👇", + "display_name": "", + "documentation": "", + "template": { + "backgroundColor": "transparent" + } + }, + "type": "note" + }, + "dragging": false, + "height": 324, + "id": "note-M7loH", + "position": { + "x": 824.1003268813427, + "y": 698.6951695764802 + }, + "positionAbsolute": { + "x": 824.1003268813427, + "y": 698.6951695764802 + }, + "selected": false, + "type": "noteNode", + "width": 324 + }, + { + "data": { + "id": "note-26oaj", + "node": { + "description": "### 💡 Add your OpenAI API key here 👇", + "display_name": "", + "documentation": "", + "template": { + "backgroundColor": "transparent" + } + }, + "type": "note" + }, + "dragging": false, + "height": 324, + "id": "note-26oaj", + "position": { + "x": 2350.297636215281, + "y": 525.0687902842766 + }, + "positionAbsolute": { + "x": 2350.297636215281, + "y": 525.0687902842766 + }, + "selected": false, + "type": "noteNode", + "width": 324 } ], "viewport": { @@ -3661,11 +4051,16 @@ }, "description": "Get started with Retrieval-Augmented Generation (RAG) by ingesting data from documents and retrieving relevant chunks through vector similarity to provide contextual answers.", "endpoint_name": null, + "gradient": "5", "icon": "Database", "id": "c63bc197-85d6-4f39-87dc-2bc35523ec4e", - "gradient": "5", "is_component": false, "last_tested_version": "1.0.19.post2", "name": "Vector Store RAG", - "tags": ["openai", "astradb", "rag", "q-a"] -} + "tags": [ + "openai", + "astradb", + "rag", + "q-a" + ] +} \ No newline at end of file