From 7a74c01a4bcb50652a59201cb028c1500068c1f4 Mon Sep 17 00:00:00 2001 From: Eric Hare Date: Mon, 2 Dec 2024 12:11:44 -0800 Subject: [PATCH] fix: update vector RAG flow for Astra DB comp changes (#5006) --- .../components/vectorstores/astradb.py | 23 +- .../starter_projects/Vector Store RAG.json | 2642 +++++++++-------- 2 files changed, 1358 insertions(+), 1307 deletions(-) diff --git a/src/backend/base/langflow/components/vectorstores/astradb.py b/src/backend/base/langflow/components/vectorstores/astradb.py index a90a0ac86..690deb361 100644 --- a/src/backend/base/langflow/components/vectorstores/astradb.py +++ b/src/backend/base/langflow/components/vectorstores/astradb.py @@ -113,7 +113,15 @@ class AstraDBVectorStoreComponent(LCVectorStoreComponent): required=True, real_time_refresh=True, refresh_button=True, - options=[], + options=["+ Create new collection"], + value="+ Create new collection", + ), + StrInput( + name="collection_name_new", + display_name="Collection Name", + info="Name of the new collection to create.", + advanced=False, + required=True, ), StrInput( name="keyspace", @@ -339,13 +347,8 @@ class AstraDBVectorStoreComponent(LCVectorStoreComponent): build_config["embedding_choice"]["advanced"] = False build_config["embedding_choice"]["value"] = "Embedding Model" - new_parameter = StrInput( - name="collection_name_new", - display_name="Collection Name", - required=True, - ).to_dict() - - self.insert_in_dict(build_config, "embedding_choice", {"collection_name_new": new_parameter}) + build_config["collection_name_new"]["advanced"] = False + build_config["collection_name_new"]["required"] = True new_parameter = HandleInput( name="embedding_model", @@ -356,7 +359,9 @@ class AstraDBVectorStoreComponent(LCVectorStoreComponent): self.insert_in_dict(build_config, "collection_name_new", {"embedding_model": new_parameter}) elif field_name == "collection_name" and field_value != "+ Create new collection": - self.del_fields(build_config, ["collection_name_new"]) + build_config["collection_name_new"]["advanced"] = True + build_config["collection_name_new"]["required"] = False + build_config["collection_name_new"]["value"] = "" # Get the collection options collection_options = self.get_collection_options() diff --git a/src/backend/base/langflow/initial_setup/starter_projects/Vector Store RAG.json b/src/backend/base/langflow/initial_setup/starter_projects/Vector Store RAG.json index 4240bcab4..09a75a9d7 100644 --- a/src/backend/base/langflow/initial_setup/starter_projects/Vector Store RAG.json +++ b/src/backend/base/langflow/initial_setup/starter_projects/Vector Store RAG.json @@ -7,7 +7,7 @@ "data": { "sourceHandle": { "dataType": "ParseData", - "id": "ParseData-pqaC7", + "id": "ParseData-HrKjN", "name": "text", "output_types": [ "Message" @@ -15,7 +15,7 @@ }, "targetHandle": { "fieldName": "context", - "id": "Prompt-uqDcR", + "id": "Prompt-w0w8U", "inputTypes": [ "Message", "Text" @@ -23,11 +23,11 @@ "type": "str" } }, - "id": "reactflow__edge-ParseData-pqaC7{œdataTypeœ:œParseDataœ,œidœ:œParseData-pqaC7œ,œnameœ:œtextœ,œoutput_typesœ:[œMessageœ]}-Prompt-uqDcR{œfieldNameœ:œcontextœ,œidœ:œPrompt-uqDcRœ,œinputTypesœ:[œMessageœ,œTextœ],œtypeœ:œstrœ}", - "source": "ParseData-pqaC7", - "sourceHandle": "{œdataTypeœ: œParseDataœ, œidœ: œParseData-pqaC7œ, œnameœ: œtextœ, œoutput_typesœ: [œMessageœ]}", - "target": "Prompt-uqDcR", - "targetHandle": "{œfieldNameœ: œcontextœ, œidœ: œPrompt-uqDcRœ, œinputTypesœ: [œMessageœ, œTextœ], œtypeœ: œstrœ}" + "id": "reactflow__edge-ParseData-HrKjN{œdataTypeœ:œParseDataœ,œidœ:œParseData-HrKjNœ,œnameœ:œtextœ,œoutput_typesœ:[œMessageœ]}-Prompt-w0w8U{œfieldNameœ:œcontextœ,œidœ:œPrompt-w0w8Uœ,œinputTypesœ:[œMessageœ,œTextœ],œtypeœ:œstrœ}", + "source": "ParseData-HrKjN", + "sourceHandle": "{œdataTypeœ: œParseDataœ, œidœ: œParseData-HrKjNœ, œnameœ: œtextœ, œoutput_typesœ: [œMessageœ]}", + "target": "Prompt-w0w8U", + "targetHandle": "{œfieldNameœ: œcontextœ, œidœ: œPrompt-w0w8Uœ, œinputTypesœ: [œMessageœ, œTextœ], œtypeœ: œstrœ}" }, { "animated": false, @@ -35,7 +35,7 @@ "data": { "sourceHandle": { "dataType": "Prompt", - "id": "Prompt-uqDcR", + "id": "Prompt-w0w8U", "name": "prompt", "output_types": [ "Message" @@ -43,18 +43,18 @@ }, "targetHandle": { "fieldName": "input_value", - "id": "OpenAIModel-H0ANr", + "id": "OpenAIModel-a9aIS", "inputTypes": [ "Message" ], "type": "str" } }, - "id": "reactflow__edge-Prompt-uqDcR{œdataTypeœ:œPromptœ,œidœ:œPrompt-uqDcRœ,œnameœ:œpromptœ,œoutput_typesœ:[œMessageœ]}-OpenAIModel-H0ANr{œfieldNameœ:œinput_valueœ,œidœ:œOpenAIModel-H0ANrœ,œinputTypesœ:[œMessageœ],œtypeœ:œstrœ}", - "source": "Prompt-uqDcR", - "sourceHandle": "{œdataTypeœ: œPromptœ, œidœ: œPrompt-uqDcRœ, œnameœ: œpromptœ, œoutput_typesœ: [œMessageœ]}", - "target": "OpenAIModel-H0ANr", - "targetHandle": "{œfieldNameœ: œinput_valueœ, œidœ: œOpenAIModel-H0ANrœ, œinputTypesœ: [œMessageœ], œtypeœ: œstrœ}" + "id": "reactflow__edge-Prompt-w0w8U{œdataTypeœ:œPromptœ,œidœ:œPrompt-w0w8Uœ,œnameœ:œpromptœ,œoutput_typesœ:[œMessageœ]}-OpenAIModel-a9aIS{œfieldNameœ:œinput_valueœ,œidœ:œOpenAIModel-a9aISœ,œinputTypesœ:[œMessageœ],œtypeœ:œstrœ}", + "source": "Prompt-w0w8U", + "sourceHandle": "{œdataTypeœ: œPromptœ, œidœ: œPrompt-w0w8Uœ, œnameœ: œpromptœ, œoutput_typesœ: [œMessageœ]}", + "target": "OpenAIModel-a9aIS", + "targetHandle": "{œfieldNameœ: œinput_valueœ, œidœ: œOpenAIModel-a9aISœ, œinputTypesœ: [œMessageœ], œtypeœ: œstrœ}" }, { "animated": false, @@ -62,7 +62,7 @@ "data": { "sourceHandle": { "dataType": "OpenAIModel", - "id": "OpenAIModel-H0ANr", + "id": "OpenAIModel-a9aIS", "name": "text_output", "output_types": [ "Message" @@ -70,72 +70,18 @@ }, "targetHandle": { "fieldName": "input_value", - "id": "ChatOutput-a4EPO", + "id": "ChatOutput-fRMkS", "inputTypes": [ "Message" ], "type": "str" } }, - "id": "reactflow__edge-OpenAIModel-H0ANr{œdataTypeœ:œOpenAIModelœ,œidœ:œOpenAIModel-H0ANrœ,œnameœ:œtext_outputœ,œoutput_typesœ:[œMessageœ]}-ChatOutput-a4EPO{œfieldNameœ:œinput_valueœ,œidœ:œChatOutput-a4EPOœ,œinputTypesœ:[œMessageœ],œtypeœ:œstrœ}", - "source": "OpenAIModel-H0ANr", - "sourceHandle": "{œdataTypeœ: œOpenAIModelœ, œidœ: œOpenAIModel-H0ANrœ, œnameœ: œtext_outputœ, œoutput_typesœ: [œMessageœ]}", - "target": "ChatOutput-a4EPO", - "targetHandle": "{œfieldNameœ: œinput_valueœ, œidœ: œChatOutput-a4EPOœ, œinputTypesœ: [œMessageœ], œtypeœ: œstrœ}" - }, - { - "animated": false, - "className": "", - "data": { - "sourceHandle": { - "dataType": "AstraDB", - "id": "AstraDB-3buPx", - "name": "search_results", - "output_types": [ - "Data" - ] - }, - "targetHandle": { - "fieldName": "data", - "id": "ParseData-pqaC7", - "inputTypes": [ - "Data" - ], - "type": "other" - } - }, - "id": "reactflow__edge-AstraDB-3buPx{œdataTypeœ:œAstraDBœ,œidœ:œAstraDB-3buPxœ,œnameœ:œsearch_resultsœ,œoutput_typesœ:[œDataœ]}-ParseData-pqaC7{œfieldNameœ:œdataœ,œidœ:œParseData-pqaC7œ,œinputTypesœ:[œDataœ],œtypeœ:œotherœ}", - "source": "AstraDB-3buPx", - "sourceHandle": "{œdataTypeœ: œAstraDBœ, œidœ: œAstraDB-3buPxœ, œnameœ: œsearch_resultsœ, œoutput_typesœ: [œDataœ]}", - "target": "ParseData-pqaC7", - "targetHandle": "{œfieldNameœ: œdataœ, œidœ: œParseData-pqaC7œ, œinputTypesœ: [œDataœ], œtypeœ: œotherœ}" - }, - { - "animated": false, - "className": "", - "data": { - "sourceHandle": { - "dataType": "OpenAIEmbeddings", - "id": "OpenAIEmbeddings-CeoV9", - "name": "embeddings", - "output_types": [ - "Embeddings" - ] - }, - "targetHandle": { - "fieldName": "embedding_model", - "id": "AstraDB-3buPx", - "inputTypes": [ - "Embeddings" - ], - "type": "other" - } - }, - "id": "reactflow__edge-OpenAIEmbeddings-CeoV9{œdataTypeœ:œOpenAIEmbeddingsœ,œidœ:œOpenAIEmbeddings-CeoV9œ,œnameœ:œembeddingsœ,œoutput_typesœ:[œEmbeddingsœ]}-AstraDB-3buPx{œfieldNameœ:œembeddingœ,œidœ:œAstraDB-3buPxœ,œinputTypesœ:[œEmbeddingsœ],œtypeœ:œotherœ}", - "source": "OpenAIEmbeddings-CeoV9", - "sourceHandle": "{œdataTypeœ: œOpenAIEmbeddingsœ, œidœ: œOpenAIEmbeddings-CeoV9œ, œnameœ: œembeddingsœ, œoutput_typesœ: [œEmbeddingsœ]}", - "target": "AstraDB-3buPx", - "targetHandle": "{œfieldNameœ: œembedding_modelœ, œidœ: œAstraDB-3buPxœ, œinputTypesœ: [œEmbeddingsœ], œtypeœ: œotherœ}" + "id": "reactflow__edge-OpenAIModel-a9aIS{œdataTypeœ:œOpenAIModelœ,œidœ:œOpenAIModel-a9aISœ,œnameœ:œtext_outputœ,œoutput_typesœ:[œMessageœ]}-ChatOutput-fRMkS{œfieldNameœ:œinput_valueœ,œidœ:œChatOutput-fRMkSœ,œinputTypesœ:[œMessageœ],œtypeœ:œstrœ}", + "source": "OpenAIModel-a9aIS", + "sourceHandle": "{œdataTypeœ: œOpenAIModelœ, œidœ: œOpenAIModel-a9aISœ, œnameœ: œtext_outputœ, œoutput_typesœ: [œMessageœ]}", + "target": "ChatOutput-fRMkS", + "targetHandle": "{œfieldNameœ: œinput_valueœ, œidœ: œChatOutput-fRMkSœ, œinputTypesœ: [œMessageœ], œtypeœ: œstrœ}" }, { "animated": false, @@ -143,34 +89,7 @@ "data": { "sourceHandle": { "dataType": "ChatInput", - "id": "ChatInput-nd3Fq", - "name": "message", - "output_types": [ - "Message" - ] - }, - "targetHandle": { - "fieldName": "search_input", - "id": "AstraDB-3buPx", - "inputTypes": [ - "Message" - ], - "type": "str" - } - }, - "id": "reactflow__edge-ChatInput-nd3Fq{œdataTypeœ:œChatInputœ,œidœ:œChatInput-nd3Fqœ,œnameœ:œmessageœ,œoutput_typesœ:[œMessageœ]}-AstraDB-3buPx{œfieldNameœ:œsearch_inputœ,œidœ:œAstraDB-3buPxœ,œinputTypesœ:[œMessageœ],œtypeœ:œstrœ}", - "source": "ChatInput-nd3Fq", - "sourceHandle": "{œdataTypeœ: œChatInputœ, œidœ: œChatInput-nd3Fqœ, œnameœ: œmessageœ, œoutput_typesœ: [œMessageœ]}", - "target": "AstraDB-3buPx", - "targetHandle": "{œfieldNameœ: œsearch_inputœ, œidœ: œAstraDB-3buPxœ, œinputTypesœ: [œMessageœ], œtypeœ: œstrœ}" - }, - { - "animated": false, - "className": "", - "data": { - "sourceHandle": { - "dataType": "ChatInput", - "id": "ChatInput-nd3Fq", + "id": "ChatInput-fqzZb", "name": "message", "output_types": [ "Message" @@ -178,7 +97,7 @@ }, "targetHandle": { "fieldName": "question", - "id": "Prompt-uqDcR", + "id": "Prompt-w0w8U", "inputTypes": [ "Message", "Text" @@ -186,72 +105,19 @@ "type": "str" } }, - "id": "reactflow__edge-ChatInput-nd3Fq{œdataTypeœ:œChatInputœ,œidœ:œChatInput-nd3Fqœ,œnameœ:œmessageœ,œoutput_typesœ:[œMessageœ]}-Prompt-uqDcR{œfieldNameœ:œquestionœ,œidœ:œPrompt-uqDcRœ,œinputTypesœ:[œMessageœ,œTextœ],œtypeœ:œstrœ}", - "source": "ChatInput-nd3Fq", - "sourceHandle": "{œdataTypeœ: œChatInputœ, œidœ: œChatInput-nd3Fqœ, œnameœ: œmessageœ, œoutput_typesœ: [œMessageœ]}", - "target": "Prompt-uqDcR", - "targetHandle": "{œfieldNameœ: œquestionœ, œidœ: œPrompt-uqDcRœ, œinputTypesœ: [œMessageœ, œTextœ], œtypeœ: œstrœ}" + "id": "reactflow__edge-ChatInput-fqzZb{œdataTypeœ:œChatInputœ,œidœ:œChatInput-fqzZbœ,œnameœ:œmessageœ,œoutput_typesœ:[œMessageœ]}-Prompt-w0w8U{œfieldNameœ:œquestionœ,œidœ:œPrompt-w0w8Uœ,œinputTypesœ:[œMessageœ,œTextœ],œtypeœ:œstrœ}", + "source": "ChatInput-fqzZb", + "sourceHandle": "{œdataTypeœ: œChatInputœ, œidœ: œChatInput-fqzZbœ, œnameœ: œmessageœ, œoutput_typesœ: [œMessageœ]}", + "target": "Prompt-w0w8U", + "targetHandle": "{œfieldNameœ: œquestionœ, œidœ: œPrompt-w0w8Uœ, œinputTypesœ: [œMessageœ, œTextœ], œtypeœ: œstrœ}" }, { "animated": false, - "className": "", - "data": { - "sourceHandle": { - "dataType": "SplitText", - "id": "SplitText-QakmY", - "name": "chunks", - "output_types": [ - "Data" - ] - }, - "targetHandle": { - "fieldName": "ingest_data", - "id": "AstraDB-laybz", - "inputTypes": [ - "Data" - ], - "type": "other" - } - }, - "id": "reactflow__edge-SplitText-QakmY{œdataTypeœ:œSplitTextœ,œidœ:œSplitText-QakmYœ,œnameœ:œchunksœ,œoutput_typesœ:[œDataœ]}-AstraDB-laybz{œfieldNameœ:œingest_dataœ,œidœ:œAstraDB-laybzœ,œinputTypesœ:[œDataœ],œtypeœ:œotherœ}", - "source": "SplitText-QakmY", - "sourceHandle": "{œdataTypeœ: œSplitTextœ, œidœ: œSplitText-QakmYœ, œnameœ: œchunksœ, œoutput_typesœ: [œDataœ]}", - "target": "AstraDB-laybz", - "targetHandle": "{œfieldNameœ: œingest_dataœ, œidœ: œAstraDB-laybzœ, œinputTypesœ: [œDataœ], œtypeœ: œotherœ}" - }, - { - "animated": false, - "className": "", - "data": { - "sourceHandle": { - "dataType": "OpenAIEmbeddings", - "id": "OpenAIEmbeddings-ANgku", - "name": "embeddings", - "output_types": [ - "Embeddings" - ] - }, - "targetHandle": { - "fieldName": "embedding_model", - "id": "AstraDB-laybz", - "inputTypes": [ - "Embeddings" - ], - "type": "other" - } - }, - "id": "reactflow__edge-OpenAIEmbeddings-ANgku{œdataTypeœ:œOpenAIEmbeddingsœ,œidœ:œOpenAIEmbeddings-ANgkuœ,œnameœ:œembeddingsœ,œoutput_typesœ:[œEmbeddingsœ]}-AstraDB-laybz{œfieldNameœ:œembeddingœ,œidœ:œAstraDB-laybzœ,œinputTypesœ:[œEmbeddingsœ],œtypeœ:œotherœ}", - "source": "OpenAIEmbeddings-ANgku", - "sourceHandle": "{œdataTypeœ: œOpenAIEmbeddingsœ, œidœ: œOpenAIEmbeddings-ANgkuœ, œnameœ: œembeddingsœ, œoutput_typesœ: [œEmbeddingsœ]}", - "target": "AstraDB-laybz", - "targetHandle": "{œfieldNameœ: œembedding_modelœ, œidœ: œAstraDB-laybzœ, œinputTypesœ: [œEmbeddingsœ], œtypeœ: œotherœ}" - }, - { "className": "", "data": { "sourceHandle": { "dataType": "File", - "id": "File-FJIuH", + "id": "File-4yIsn", "name": "data", "output_types": [ "Data" @@ -259,18 +125,153 @@ }, "targetHandle": { "fieldName": "data_inputs", - "id": "SplitText-QakmY", + "id": "SplitText-oHVzS", "inputTypes": [ "Data" ], "type": "other" } }, - "id": "reactflow__edge-File-FJIuH{œdataTypeœ:œFileœ,œidœ:œFile-FJIuHœ,œnameœ:œdataœ,œoutput_typesœ:[œDataœ]}-SplitText-QakmY{œfieldNameœ:œdata_inputsœ,œidœ:œSplitText-QakmYœ,œinputTypesœ:[œDataœ],œtypeœ:œotherœ}", - "source": "File-FJIuH", - "sourceHandle": "{œdataTypeœ: œFileœ, œidœ: œFile-FJIuHœ, œnameœ: œdataœ, œoutput_typesœ: [œDataœ]}", - "target": "SplitText-QakmY", - "targetHandle": "{œfieldNameœ: œdata_inputsœ, œidœ: œSplitText-QakmYœ, œinputTypesœ: [œDataœ], œtypeœ: œotherœ}" + "id": "reactflow__edge-File-4yIsn{œdataTypeœ:œFileœ,œidœ:œFile-4yIsnœ,œnameœ:œdataœ,œoutput_typesœ:[œDataœ]}-SplitText-oHVzS{œfieldNameœ:œdata_inputsœ,œidœ:œSplitText-oHVzSœ,œinputTypesœ:[œDataœ],œtypeœ:œotherœ}", + "source": "File-4yIsn", + "sourceHandle": "{œdataTypeœ: œFileœ, œidœ: œFile-4yIsnœ, œnameœ: œdataœ, œoutput_typesœ: [œDataœ]}", + "target": "SplitText-oHVzS", + "targetHandle": "{œfieldNameœ: œdata_inputsœ, œidœ: œSplitText-oHVzSœ, œinputTypesœ: [œDataœ], œtypeœ: œotherœ}" + }, + { + "animated": false, + "className": "", + "data": { + "sourceHandle": { + "dataType": "OpenAIEmbeddings", + "id": "OpenAIEmbeddings-bD6An", + "name": "embeddings", + "output_types": [ + "Embeddings" + ] + }, + "targetHandle": { + "fieldName": "embedding_model", + "id": "AstraDB-zXOmg", + "inputTypes": [ + "Embeddings" + ], + "type": "other" + } + }, + "id": "reactflow__edge-OpenAIEmbeddings-bD6An{œdataTypeœ:œOpenAIEmbeddingsœ,œidœ:œOpenAIEmbeddings-bD6Anœ,œnameœ:œembeddingsœ,œoutput_typesœ:[œEmbeddingsœ]}-AstraDB-zXOmg{œfieldNameœ:œembedding_modelœ,œidœ:œAstraDB-zXOmgœ,œinputTypesœ:[œEmbeddingsœ],œtypeœ:œotherœ}", + "source": "OpenAIEmbeddings-bD6An", + "sourceHandle": "{œdataTypeœ: œOpenAIEmbeddingsœ, œidœ: œOpenAIEmbeddings-bD6Anœ, œnameœ: œembeddingsœ, œoutput_typesœ: [œEmbeddingsœ]}", + "target": "AstraDB-zXOmg", + "targetHandle": "{œfieldNameœ: œembedding_modelœ, œidœ: œAstraDB-zXOmgœ, œinputTypesœ: [œEmbeddingsœ], œtypeœ: œotherœ}" + }, + { + "animated": false, + "className": "", + "data": { + "sourceHandle": { + "dataType": "ChatInput", + "id": "ChatInput-fqzZb", + "name": "message", + "output_types": [ + "Message" + ] + }, + "targetHandle": { + "fieldName": "search_input", + "id": "AstraDB-zXOmg", + "inputTypes": [ + "Message" + ], + "type": "str" + } + }, + "id": "reactflow__edge-ChatInput-fqzZb{œdataTypeœ:œChatInputœ,œidœ:œChatInput-fqzZbœ,œnameœ:œmessageœ,œoutput_typesœ:[œMessageœ]}-AstraDB-zXOmg{œfieldNameœ:œsearch_inputœ,œidœ:œAstraDB-zXOmgœ,œinputTypesœ:[œMessageœ],œtypeœ:œstrœ}", + "source": "ChatInput-fqzZb", + "sourceHandle": "{œdataTypeœ: œChatInputœ, œidœ: œChatInput-fqzZbœ, œnameœ: œmessageœ, œoutput_typesœ: [œMessageœ]}", + "target": "AstraDB-zXOmg", + "targetHandle": "{œfieldNameœ: œsearch_inputœ, œidœ: œAstraDB-zXOmgœ, œinputTypesœ: [œMessageœ], œtypeœ: œstrœ}" + }, + { + "animated": false, + "className": "", + "data": { + "sourceHandle": { + "dataType": "AstraDB", + "id": "AstraDB-zXOmg", + "name": "search_results", + "output_types": [ + "Data" + ] + }, + "targetHandle": { + "fieldName": "data", + "id": "ParseData-HrKjN", + "inputTypes": [ + "Data" + ], + "type": "other" + } + }, + "id": "reactflow__edge-AstraDB-zXOmg{œdataTypeœ:œAstraDBœ,œidœ:œAstraDB-zXOmgœ,œnameœ:œsearch_resultsœ,œoutput_typesœ:[œDataœ]}-ParseData-HrKjN{œfieldNameœ:œdataœ,œidœ:œParseData-HrKjNœ,œinputTypesœ:[œDataœ],œtypeœ:œotherœ}", + "source": "AstraDB-zXOmg", + "sourceHandle": "{œdataTypeœ: œAstraDBœ, œidœ: œAstraDB-zXOmgœ, œnameœ: œsearch_resultsœ, œoutput_typesœ: [œDataœ]}", + "target": "ParseData-HrKjN", + "targetHandle": "{œfieldNameœ: œdataœ, œidœ: œParseData-HrKjNœ, œinputTypesœ: [œDataœ], œtypeœ: œotherœ}" + }, + { + "animated": false, + "className": "", + "data": { + "sourceHandle": { + "dataType": "OpenAIEmbeddings", + "id": "OpenAIEmbeddings-Qtui3", + "name": "embeddings", + "output_types": [ + "Embeddings" + ] + }, + "targetHandle": { + "fieldName": "embedding_model", + "id": "AstraDB-vNNR6", + "inputTypes": [ + "Embeddings" + ], + "type": "other" + } + }, + "id": "reactflow__edge-OpenAIEmbeddings-Qtui3{œdataTypeœ:œOpenAIEmbeddingsœ,œidœ:œOpenAIEmbeddings-Qtui3œ,œnameœ:œembeddingsœ,œoutput_typesœ:[œEmbeddingsœ]}-AstraDB-vNNR6{œfieldNameœ:œembedding_modelœ,œidœ:œAstraDB-vNNR6œ,œinputTypesœ:[œEmbeddingsœ],œtypeœ:œotherœ}", + "source": "OpenAIEmbeddings-Qtui3", + "sourceHandle": "{œdataTypeœ: œOpenAIEmbeddingsœ, œidœ: œOpenAIEmbeddings-Qtui3œ, œnameœ: œembeddingsœ, œoutput_typesœ: [œEmbeddingsœ]}", + "target": "AstraDB-vNNR6", + "targetHandle": "{œfieldNameœ: œembedding_modelœ, œidœ: œAstraDB-vNNR6œ, œinputTypesœ: [œEmbeddingsœ], œtypeœ: œotherœ}" + }, + { + "animated": false, + "className": "", + "data": { + "sourceHandle": { + "dataType": "SplitText", + "id": "SplitText-oHVzS", + "name": "chunks", + "output_types": [ + "Data" + ] + }, + "targetHandle": { + "fieldName": "ingest_data", + "id": "AstraDB-vNNR6", + "inputTypes": [ + "Data" + ], + "type": "other" + } + }, + "id": "reactflow__edge-SplitText-oHVzS{œdataTypeœ:œSplitTextœ,œidœ:œSplitText-oHVzSœ,œnameœ:œchunksœ,œoutput_typesœ:[œDataœ]}-AstraDB-vNNR6{œfieldNameœ:œingest_dataœ,œidœ:œAstraDB-vNNR6œ,œinputTypesœ:[œDataœ],œtypeœ:œotherœ}", + "source": "SplitText-oHVzS", + "sourceHandle": "{œdataTypeœ: œSplitTextœ, œidœ: œSplitText-oHVzSœ, œnameœ: œchunksœ, œoutput_typesœ: [œDataœ]}", + "target": "AstraDB-vNNR6", + "targetHandle": "{œfieldNameœ: œingest_dataœ, œidœ: œAstraDB-vNNR6œ, œinputTypesœ: [œDataœ], œtypeœ: œotherœ}" } ], "nodes": [ @@ -278,7 +279,7 @@ "data": { "description": "Get chat inputs from the Playground.", "display_name": "Chat Input", - "id": "ChatInput-nd3Fq", + "id": "ChatInput-fqzZb", "node": { "base_classes": [ "Message" @@ -301,7 +302,7 @@ "frozen": false, "icon": "MessagesSquare", "legacy": false, - "lf_version": "1.0.19.post2", + "lf_version": "1.1.1", "metadata": {}, "output_types": [], "outputs": [ @@ -542,7 +543,7 @@ }, "dragging": false, "height": 234, - "id": "ChatInput-nd3Fq", + "id": "ChatInput-fqzZb", "position": { "x": 743.9745420290319, "y": 463.6977510207854 @@ -559,7 +560,7 @@ "data": { "description": "Convert Data into plain text following a specified template.", "display_name": "Parse Data", - "id": "ParseData-pqaC7", + "id": "ParseData-HrKjN", "node": { "base_classes": [ "Message" @@ -579,7 +580,7 @@ "frozen": false, "icon": "braces", "legacy": false, - "lf_version": "1.0.19.post2", + "lf_version": "1.1.1", "metadata": {}, "output_types": [], "outputs": [ @@ -688,8 +689,8 @@ "type": "ParseData" }, "dragging": false, - "height": 302, - "id": "ParseData-pqaC7", + "height": 350, + "id": "ParseData-HrKjN", "position": { "x": 1606.0595305373527, "y": 751.4473696960695 @@ -706,7 +707,7 @@ "data": { "description": "Create a prompt template with dynamic variables.", "display_name": "Prompt", - "id": "Prompt-uqDcR", + "id": "Prompt-w0w8U", "node": { "base_classes": [ "Message" @@ -734,7 +735,7 @@ "is_input": null, "is_output": null, "legacy": false, - "lf_version": "1.0.19.post2", + "lf_version": "1.1.1", "metadata": {}, "name": "", "output_types": [], @@ -841,7 +842,7 @@ }, "dragging": false, "height": 433, - "id": "Prompt-uqDcR", + "id": "Prompt-w0w8U", "position": { "x": 1977.9097981422992, "y": 640.5656416923846 @@ -858,7 +859,7 @@ "data": { "description": "Split text into chunks based on specified criteria.", "display_name": "Split Text", - "id": "SplitText-QakmY", + "id": "SplitText-oHVzS", "node": { "base_classes": [ "Data" @@ -879,7 +880,7 @@ "frozen": false, "icon": "scissors-line-dashed", "legacy": false, - "lf_version": "1.0.19.post2", + "lf_version": "1.1.1", "metadata": {}, "output_types": [], "outputs": [ @@ -990,7 +991,7 @@ }, "dragging": false, "height": 475, - "id": "SplitText-QakmY", + "id": "SplitText-oHVzS", "position": { "x": 1683.4543896546102, "y": 1350.7871623588553 @@ -1005,7 +1006,7 @@ }, { "data": { - "id": "note-UrQ0p", + "id": "note-gvYc6", "node": { "description": "## 🐕 2. Retriever Flow\n\nThis flow answers your questions with contextual data retrieved from your vector database.\n\nOpen the **Playground** and ask, \n\n```\nWhat is this document about?\n```\n", "display_name": "", @@ -1018,7 +1019,7 @@ }, "dragging": false, "height": 324, - "id": "note-UrQ0p", + "id": "note-gvYc6", "position": { "x": 374.388314931542, "y": 486.18094072679895 @@ -1038,7 +1039,7 @@ }, { "data": { - "id": "note-39jdn", + "id": "note-W3z3Y", "node": { "description": "## 📖 README\n\nLoad your data into a vector database with the 📚 **Load Data** flow, and then use your data as chat context with the 🐕 **Retriever** flow.\n\n**🚨 Add your OpenAI API key as a global variable to easily add it to all of the OpenAI components in this flow.** \n\n**Quick start**\n1. Run the 📚 **Load Data** flow.\n2. Run the 🐕 **Retriever** flow.\n\n**Next steps** \n\n- Experiment by changing the prompt and the loaded data to see how the bot's responses change. \n\nFor more info, see the [Langflow docs](https://docs.langflow.org/starter-projects-vector-store-rag).", "display_name": "Read Me", @@ -1050,7 +1051,8 @@ "type": "note" }, "dragging": false, - "id": "note-39jdn", + "height": 527, + "id": "note-W3z3Y", "position": { "x": 94.28986613312418, "y": 907.6428043837066 @@ -1070,7 +1072,7 @@ }, { "data": { - "id": "OpenAIModel-H0ANr", + "id": "OpenAIModel-a9aIS", "node": { "base_classes": [ "LanguageModel", @@ -1168,7 +1170,7 @@ "show": true, "title_case": false, "type": "code", - "value": "import operator\nfrom functools import reduce\n\nfrom langchain_openai import ChatOpenAI\nfrom pydantic.v1 import SecretStr\n\nfrom langflow.base.models.model import LCModelComponent\nfrom langflow.base.models.openai_constants import OPENAI_MODEL_NAMES\nfrom langflow.field_typing import LanguageModel\nfrom langflow.field_typing.range_spec import RangeSpec\nfrom langflow.inputs import BoolInput, DictInput, DropdownInput, FloatInput, IntInput, SecretStrInput, StrInput\nfrom langflow.inputs.inputs import HandleInput\n\n\nclass OpenAIModelComponent(LCModelComponent):\n display_name = \"OpenAI\"\n description = \"Generates text using OpenAI LLMs.\"\n icon = \"OpenAI\"\n name = \"OpenAIModel\"\n\n inputs = [\n *LCModelComponent._base_inputs,\n IntInput(\n name=\"max_tokens\",\n display_name=\"Max Tokens\",\n advanced=True,\n info=\"The maximum number of tokens to generate. Set to 0 for unlimited tokens.\",\n range_spec=RangeSpec(min=0, max=128000),\n ),\n DictInput(\n name=\"model_kwargs\",\n display_name=\"Model Kwargs\",\n advanced=True,\n info=\"Additional keyword arguments to pass to the model.\",\n ),\n BoolInput(\n name=\"json_mode\",\n display_name=\"JSON Mode\",\n advanced=True,\n info=\"If True, it will output JSON regardless of passing a schema.\",\n ),\n DictInput(\n name=\"output_schema\",\n is_list=True,\n display_name=\"Schema\",\n advanced=True,\n info=\"The schema for the Output of the model. \"\n \"You must pass the word JSON in the prompt. \"\n \"If left blank, JSON mode will be disabled. [DEPRECATED]\",\n ),\n DropdownInput(\n name=\"model_name\",\n display_name=\"Model Name\",\n advanced=False,\n options=OPENAI_MODEL_NAMES,\n value=OPENAI_MODEL_NAMES[0],\n ),\n StrInput(\n name=\"openai_api_base\",\n display_name=\"OpenAI API Base\",\n advanced=True,\n info=\"The base URL of the OpenAI API. \"\n \"Defaults to https://api.openai.com/v1. \"\n \"You can change this to use other APIs like JinaChat, LocalAI and Prem.\",\n ),\n SecretStrInput(\n name=\"api_key\",\n display_name=\"OpenAI API Key\",\n info=\"The OpenAI API Key to use for the OpenAI model.\",\n advanced=False,\n value=\"OPENAI_API_KEY\",\n ),\n FloatInput(name=\"temperature\", display_name=\"Temperature\", value=0.1),\n IntInput(\n name=\"seed\",\n display_name=\"Seed\",\n info=\"The seed controls the reproducibility of the job.\",\n advanced=True,\n value=1,\n ),\n HandleInput(\n name=\"output_parser\",\n display_name=\"Output Parser\",\n info=\"The parser to use to parse the output of the model\",\n advanced=True,\n input_types=[\"OutputParser\"],\n ),\n ]\n\n def build_model(self) -> LanguageModel: # type: ignore[type-var]\n # self.output_schema is a list of dictionaries\n # let's convert it to a dictionary\n output_schema_dict: dict[str, str] = reduce(operator.ior, self.output_schema or {}, {})\n openai_api_key = self.api_key\n temperature = self.temperature\n model_name: str = self.model_name\n max_tokens = self.max_tokens\n model_kwargs = self.model_kwargs or {}\n openai_api_base = self.openai_api_base or \"https://api.openai.com/v1\"\n json_mode = bool(output_schema_dict) or self.json_mode\n seed = self.seed\n\n api_key = SecretStr(openai_api_key).get_secret_value() if openai_api_key else None\n output = ChatOpenAI(\n max_tokens=max_tokens or None,\n model_kwargs=model_kwargs,\n model=model_name,\n base_url=openai_api_base,\n api_key=api_key,\n temperature=temperature if temperature is not None else 0.1,\n seed=seed,\n )\n if json_mode:\n if output_schema_dict:\n output = output.with_structured_output(schema=output_schema_dict, method=\"json_mode\")\n else:\n output = output.bind(response_format={\"type\": \"json_object\"})\n\n return output\n\n def _get_exception_message(self, e: Exception):\n \"\"\"Get a message from an OpenAI exception.\n\n Args:\n e (Exception): The exception to get the message from.\n\n Returns:\n str: The message from the exception.\n \"\"\"\n try:\n from openai import BadRequestError\n except ImportError:\n return None\n if isinstance(e, BadRequestError):\n message = e.body.get(\"message\")\n if message:\n return message\n return None\n" + "value": "import operator\nfrom functools import reduce\n\nfrom langchain_openai import ChatOpenAI\nfrom pydantic.v1 import SecretStr\n\nfrom langflow.base.models.model import LCModelComponent\nfrom langflow.base.models.openai_constants import OPENAI_MODEL_NAMES\nfrom langflow.field_typing import LanguageModel\nfrom langflow.field_typing.range_spec import RangeSpec\nfrom langflow.inputs import BoolInput, DictInput, DropdownInput, IntInput, SecretStrInput, SliderInput, StrInput\nfrom langflow.inputs.inputs import HandleInput\n\n\nclass OpenAIModelComponent(LCModelComponent):\n display_name = \"OpenAI\"\n description = \"Generates text using OpenAI LLMs.\"\n icon = \"OpenAI\"\n name = \"OpenAIModel\"\n\n inputs = [\n *LCModelComponent._base_inputs,\n IntInput(\n name=\"max_tokens\",\n display_name=\"Max Tokens\",\n advanced=True,\n info=\"The maximum number of tokens to generate. Set to 0 for unlimited tokens.\",\n range_spec=RangeSpec(min=0, max=128000),\n ),\n DictInput(\n name=\"model_kwargs\",\n display_name=\"Model Kwargs\",\n advanced=True,\n info=\"Additional keyword arguments to pass to the model.\",\n ),\n BoolInput(\n name=\"json_mode\",\n display_name=\"JSON Mode\",\n advanced=True,\n info=\"If True, it will output JSON regardless of passing a schema.\",\n ),\n DictInput(\n name=\"output_schema\",\n is_list=True,\n display_name=\"Schema\",\n advanced=True,\n info=\"The schema for the Output of the model. \"\n \"You must pass the word JSON in the prompt. \"\n \"If left blank, JSON mode will be disabled. [DEPRECATED]\",\n ),\n DropdownInput(\n name=\"model_name\",\n display_name=\"Model Name\",\n advanced=False,\n options=OPENAI_MODEL_NAMES,\n value=OPENAI_MODEL_NAMES[0],\n ),\n StrInput(\n name=\"openai_api_base\",\n display_name=\"OpenAI API Base\",\n advanced=True,\n info=\"The base URL of the OpenAI API. \"\n \"Defaults to https://api.openai.com/v1. \"\n \"You can change this to use other APIs like JinaChat, LocalAI and Prem.\",\n ),\n SecretStrInput(\n name=\"api_key\",\n display_name=\"OpenAI API Key\",\n info=\"The OpenAI API Key to use for the OpenAI model.\",\n advanced=False,\n value=\"OPENAI_API_KEY\",\n ),\n SliderInput(name=\"temperature\", display_name=\"Temperature\", value=0.1, range_spec=RangeSpec(min=0, max=1)),\n IntInput(\n name=\"seed\",\n display_name=\"Seed\",\n info=\"The seed controls the reproducibility of the job.\",\n advanced=True,\n value=1,\n ),\n HandleInput(\n name=\"output_parser\",\n display_name=\"Output Parser\",\n info=\"The parser to use to parse the output of the model\",\n advanced=True,\n input_types=[\"OutputParser\"],\n ),\n ]\n\n def build_model(self) -> LanguageModel: # type: ignore[type-var]\n # self.output_schema is a list of dictionaries\n # let's convert it to a dictionary\n output_schema_dict: dict[str, str] = reduce(operator.ior, self.output_schema or {}, {})\n openai_api_key = self.api_key\n temperature = self.temperature\n model_name: str = self.model_name\n max_tokens = self.max_tokens\n model_kwargs = self.model_kwargs or {}\n openai_api_base = self.openai_api_base or \"https://api.openai.com/v1\"\n json_mode = bool(output_schema_dict) or self.json_mode\n seed = self.seed\n\n api_key = SecretStr(openai_api_key).get_secret_value() if openai_api_key else None\n output = ChatOpenAI(\n max_tokens=max_tokens or None,\n model_kwargs=model_kwargs,\n model=model_name,\n base_url=openai_api_base,\n api_key=api_key,\n temperature=temperature if temperature is not None else 0.1,\n seed=seed,\n )\n if json_mode:\n if output_schema_dict:\n output = output.with_structured_output(schema=output_schema_dict, method=\"json_mode\")\n else:\n output = output.bind(response_format={\"type\": \"json_object\"})\n\n return output\n\n def _get_exception_message(self, e: Exception):\n \"\"\"Get a message from an OpenAI exception.\n\n Args:\n e (Exception): The exception to get the message from.\n\n Returns:\n str: The message from the exception.\n \"\"\"\n try:\n from openai import BadRequestError\n except ImportError:\n return None\n if isinstance(e, BadRequestError):\n message = e.body.get(\"message\")\n if message:\n return message\n return None\n" }, "input_value": { "_input_type": "MessageInput", @@ -1399,8 +1401,8 @@ "type": "OpenAIModel" }, "dragging": false, - "height": 543, - "id": "OpenAIModel-H0ANr", + "height": 672, + "id": "OpenAIModel-a9aIS", "position": { "x": 2360.1432368563187, "y": 571.6712358167248 @@ -1417,7 +1419,7 @@ "data": { "description": "Display a chat message in the Playground.", "display_name": "Chat Output", - "id": "ChatOutput-a4EPO", + "id": "ChatOutput-fRMkS", "node": { "base_classes": [ "Message" @@ -1677,7 +1679,7 @@ }, "dragging": false, "height": 234, - "id": "ChatOutput-a4EPO", + "id": "ChatOutput-fRMkS", "position": { "x": 2734.385670401691, "y": 808.2967893015561 @@ -1692,544 +1694,7 @@ }, { "data": { - "id": "AstraDB-3buPx", - "node": { - "base_classes": [ - "Data", - "Retriever" - ], - "beta": false, - "conditional_paths": [], - "custom_fields": {}, - "description": "Implementation of Vector Store using Astra DB with search capabilities", - "display_name": "Astra DB", - "documentation": "https://docs.langflow.org/starter-projects-vector-store-rag", - "edited": false, - "field_order": [ - "token", - "api_endpoint", - "collection_name", - "search_input", - "ingest_data", - "namespace", - "embedding_choice", - "embedding_model", - "metric", - "batch_size", - "bulk_insert_batch_concurrency", - "bulk_insert_overwrite_concurrency", - "bulk_delete_concurrency", - "setup_mode", - "pre_delete_collection", - "metadata_indexing_include", - "metadata_indexing_exclude", - "collection_indexing_policy", - "number_of_results", - "search_type", - "search_score_threshold", - "search_filter" - ], - "frozen": false, - "icon": "AstraDB", - "legacy": false, - "lf_version": "1.0.19.post2", - "metadata": {}, - "output_types": [], - "outputs": [ - { - "cache": true, - "display_name": "Retriever", - "method": "build_base_retriever", - "name": "base_retriever", - "required_inputs": [], - "selected": "Retriever", - "types": [ - "Retriever" - ], - "value": "__UNDEFINED__" - }, - { - "cache": true, - "display_name": "Search Results", - "method": "search_documents", - "name": "search_results", - "required_inputs": [ - "api_endpoint", - "collection_name", - "token" - ], - "selected": "Data", - "types": [ - "Data" - ], - "value": "__UNDEFINED__" - } - ], - "pinned": false, - "template": { - "_type": "Component", - "advanced_search_filter": { - "_input_type": "NestedDictInput", - "advanced": true, - "display_name": "Search Metadata Filter", - "dynamic": false, - "info": "Optional dictionary of filters to apply to the search query.", - "list": false, - "name": "advanced_search_filter", - "placeholder": "", - "required": false, - "show": true, - "title_case": false, - "trace_as_input": true, - "trace_as_metadata": true, - "type": "NestedDict", - "value": {} - }, - "api_endpoint": { - "_input_type": "SecretStrInput", - "advanced": false, - "display_name": "API Endpoint", - "dynamic": false, - "info": "API endpoint URL for the Astra DB service.", - "input_types": [ - "Message" - ], - "load_from_db": true, - "name": "api_endpoint", - "password": true, - "placeholder": "", - "required": true, - "show": true, - "title_case": false, - "type": "str", - "value": "ASTRA_DB_API_ENDPOINT" - }, - "batch_size": { - "_input_type": "IntInput", - "advanced": true, - "display_name": "Batch Size", - "dynamic": false, - "info": "Optional number of data to process in a single batch.", - "list": false, - "name": "batch_size", - "placeholder": "", - "required": false, - "show": true, - "title_case": false, - "trace_as_metadata": true, - "type": "int", - "value": "" - }, - "bulk_delete_concurrency": { - "_input_type": "IntInput", - "advanced": true, - "display_name": "Bulk Delete Concurrency", - "dynamic": false, - "info": "Optional concurrency level for bulk delete operations.", - "list": false, - "name": "bulk_delete_concurrency", - "placeholder": "", - "required": false, - "show": true, - "title_case": false, - "trace_as_metadata": true, - "type": "int", - "value": "" - }, - "bulk_insert_batch_concurrency": { - "_input_type": "IntInput", - "advanced": true, - "display_name": "Bulk Insert Batch Concurrency", - "dynamic": false, - "info": "Optional concurrency level for bulk insert operations.", - "list": false, - "name": "bulk_insert_batch_concurrency", - "placeholder": "", - "required": false, - "show": true, - "title_case": false, - "trace_as_metadata": true, - "type": "int", - "value": "" - }, - "bulk_insert_overwrite_concurrency": { - "_input_type": "IntInput", - "advanced": true, - "display_name": "Bulk Insert Overwrite Concurrency", - "dynamic": false, - "info": "Optional concurrency level for bulk insert operations that overwrite existing data.", - "list": false, - "name": "bulk_insert_overwrite_concurrency", - "placeholder": "", - "required": false, - "show": true, - "title_case": false, - "trace_as_metadata": true, - "type": "int", - "value": "" - }, - "code": { - "advanced": true, - "dynamic": true, - "fileTypes": [], - "file_path": "", - "info": "", - "list": false, - "load_from_db": false, - "multiline": true, - "name": "code", - "password": false, - "placeholder": "", - "required": true, - "show": true, - "title_case": false, - "type": "code", - "value": "import os\nfrom collections import defaultdict\n\nimport orjson\nfrom astrapy import DataAPIClient\nfrom astrapy.admin import parse_api_endpoint\nfrom astrapy.exceptions import CollectionNotFoundException\nfrom langchain_astradb import AstraDBVectorStore\n\nfrom langflow.base.vectorstores.model import LCVectorStoreComponent, check_cached_vector_store\nfrom langflow.helpers import docs_to_data\nfrom langflow.inputs import DictInput, FloatInput, MessageTextInput, NestedDictInput\nfrom langflow.io import (\n BoolInput,\n DataInput,\n DropdownInput,\n HandleInput,\n IntInput,\n MultilineInput,\n SecretStrInput,\n StrInput,\n)\nfrom langflow.schema import Data\n\n\nclass AstraDBVectorStoreComponent(LCVectorStoreComponent):\n display_name: str = \"Astra DB\"\n description: str = \"Implementation of Vector Store using Astra DB with search capabilities\"\n documentation: str = \"https://docs.langflow.org/starter-projects-vector-store-rag\"\n name = \"AstraDB\"\n icon: str = \"AstraDB\"\n\n _cached_vector_store: AstraDBVectorStore | None = None\n\n def list_collections(self):\n client = DataAPIClient(token=self.token)\n\n database = client.get_database(\n self.api_endpoint,\n token=self.token,\n )\n\n return database.list_collections()\n\n def _initialize_collection_options(self):\n try:\n collections = [collection.name for collection in self.list_collections()]\n except (CollectionNotFoundException, ConnectionError, ValueError) as _:\n collections = []\n\n return [*collections, \"+ Create new collection\"]\n\n VECTORIZE_PROVIDERS_MAPPING = defaultdict(\n list,\n {\n \"Azure OpenAI\": [\n \"azureOpenAI\",\n [\"text-embedding-3-small\", \"text-embedding-3-large\", \"text-embedding-ada-002\"],\n ],\n \"Hugging Face - Dedicated\": [\"huggingfaceDedicated\", [\"endpoint-defined-model\"]],\n \"Hugging Face - Serverless\": [\n \"huggingface\",\n [\n \"sentence-transformers/all-MiniLM-L6-v2\",\n \"intfloat/multilingual-e5-large\",\n \"intfloat/multilingual-e5-large-instruct\",\n \"BAAI/bge-small-en-v1.5\",\n \"BAAI/bge-base-en-v1.5\",\n \"BAAI/bge-large-en-v1.5\",\n ],\n ],\n \"Jina AI\": [\n \"jinaAI\",\n [\n \"jina-embeddings-v2-base-en\",\n \"jina-embeddings-v2-base-de\",\n \"jina-embeddings-v2-base-es\",\n \"jina-embeddings-v2-base-code\",\n \"jina-embeddings-v2-base-zh\",\n ],\n ],\n \"Mistral AI\": [\"mistral\", [\"mistral-embed\"]],\n \"Nvidia\": [\"nvidia\", [\"NV-Embed-QA\"]],\n \"OpenAI\": [\"openai\", [\"text-embedding-3-small\", \"text-embedding-3-large\", \"text-embedding-ada-002\"]],\n \"Upstage\": [\"upstageAI\", [\"solar-embedding-1-large\"]],\n \"Voyage AI\": [\n \"voyageAI\",\n [\"voyage-large-2-instruct\", \"voyage-law-2\", \"voyage-code-2\", \"voyage-large-2\", \"voyage-2\"],\n ],\n },\n )\n\n inputs = [\n SecretStrInput(\n name=\"token\",\n display_name=\"Astra DB Application Token\",\n info=\"Authentication token for accessing Astra DB.\",\n value=\"ASTRA_DB_APPLICATION_TOKEN\",\n required=True,\n advanced=os.getenv(\"ASTRA_ENHANCED\", \"false\").lower() == \"true\",\n ),\n SecretStrInput(\n name=\"api_endpoint\",\n display_name=\"Database\" if os.getenv(\"ASTRA_ENHANCED\", \"false\").lower() == \"true\" else \"API Endpoint\",\n info=\"API endpoint URL for the Astra DB service.\",\n value=\"ASTRA_DB_API_ENDPOINT\",\n required=True,\n ),\n DropdownInput(\n name=\"collection_name\",\n display_name=\"Collection\",\n info=\"The name of the collection within Astra DB where the vectors will be stored.\",\n required=True,\n real_time_refresh=True,\n refresh_button=True,\n options=[],\n ),\n StrInput(\n name=\"keyspace\",\n display_name=\"Keyspace\",\n info=\"Optional keyspace within Astra DB to use for the collection.\",\n advanced=True,\n ),\n MultilineInput(\n name=\"search_input\",\n display_name=\"Search Input\",\n ),\n IntInput(\n name=\"number_of_results\",\n display_name=\"Number of Results\",\n info=\"Number of results to return.\",\n advanced=True,\n value=4,\n ),\n DropdownInput(\n name=\"search_type\",\n display_name=\"Search Type\",\n info=\"Search type to use\",\n options=[\"Similarity\", \"Similarity with score threshold\", \"MMR (Max Marginal Relevance)\"],\n value=\"Similarity\",\n advanced=True,\n ),\n FloatInput(\n name=\"search_score_threshold\",\n display_name=\"Search Score Threshold\",\n info=\"Minimum similarity score threshold for search results. \"\n \"(when using 'Similarity with score threshold')\",\n value=0,\n advanced=True,\n ),\n NestedDictInput(\n name=\"advanced_search_filter\",\n display_name=\"Search Metadata Filter\",\n info=\"Optional dictionary of filters to apply to the search query.\",\n advanced=True,\n ),\n DictInput(\n name=\"search_filter\",\n display_name=\"[DEPRECATED] Search Metadata Filter\",\n info=\"Deprecated: use advanced_search_filter. Optional dictionary of filters to apply to the search query.\",\n advanced=True,\n list=True,\n ),\n DataInput(\n name=\"ingest_data\",\n display_name=\"Ingest Data\",\n ),\n DropdownInput(\n name=\"embedding_choice\",\n display_name=\"Embedding Model or Astra Vectorize\",\n info=\"Determines whether to use Astra Vectorize for the collection.\",\n options=[\"Embedding Model\", \"Astra Vectorize\"],\n real_time_refresh=True,\n value=\"Embedding Model\",\n ),\n HandleInput(\n name=\"embedding_model\",\n display_name=\"Embedding Model\",\n input_types=[\"Embeddings\"],\n info=\"Allows an embedding model configuration.\",\n ),\n DropdownInput(\n name=\"metric\",\n display_name=\"Metric\",\n info=\"Optional distance metric for vector comparisons in the vector store.\",\n options=[\"cosine\", \"dot_product\", \"euclidean\"],\n value=\"cosine\",\n advanced=True,\n ),\n IntInput(\n name=\"batch_size\",\n display_name=\"Batch Size\",\n info=\"Optional number of data to process in a single batch.\",\n advanced=True,\n ),\n IntInput(\n name=\"bulk_insert_batch_concurrency\",\n display_name=\"Bulk Insert Batch Concurrency\",\n info=\"Optional concurrency level for bulk insert operations.\",\n advanced=True,\n ),\n IntInput(\n name=\"bulk_insert_overwrite_concurrency\",\n display_name=\"Bulk Insert Overwrite Concurrency\",\n info=\"Optional concurrency level for bulk insert operations that overwrite existing data.\",\n advanced=True,\n ),\n IntInput(\n name=\"bulk_delete_concurrency\",\n display_name=\"Bulk Delete Concurrency\",\n info=\"Optional concurrency level for bulk delete operations.\",\n advanced=True,\n ),\n DropdownInput(\n name=\"setup_mode\",\n display_name=\"Setup Mode\",\n info=\"Configuration mode for setting up the vector store, with options like 'Sync' or 'Off'.\",\n options=[\"Sync\", \"Off\"],\n advanced=True,\n value=\"Sync\",\n ),\n BoolInput(\n name=\"pre_delete_collection\",\n display_name=\"Pre Delete Collection\",\n info=\"Boolean flag to determine whether to delete the collection before creating a new one.\",\n advanced=True,\n ),\n StrInput(\n name=\"metadata_indexing_include\",\n display_name=\"Metadata Indexing Include\",\n info=\"Optional list of metadata fields to include in the indexing.\",\n list=True,\n advanced=True,\n ),\n StrInput(\n name=\"metadata_indexing_exclude\",\n display_name=\"Metadata Indexing Exclude\",\n info=\"Optional list of metadata fields to exclude from the indexing.\",\n list=True,\n advanced=True,\n ),\n StrInput(\n name=\"collection_indexing_policy\",\n display_name=\"Collection Indexing Policy\",\n info='Optional JSON string for the \"indexing\" field of the collection. '\n \"See https://docs.datastax.com/en/astra-db-serverless/api-reference/collections.html#the-indexing-option\",\n advanced=True,\n ),\n ]\n\n def del_fields(self, build_config, field_list):\n for field in field_list:\n if field in build_config:\n del build_config[field]\n\n return build_config\n\n def insert_in_dict(self, build_config, field_name, new_parameters):\n # Insert the new key-value pair after the found key\n for new_field_name, new_parameter in new_parameters.items():\n # Get all the items as a list of tuples (key, value)\n items = list(build_config.items())\n\n # Find the index of the key to insert after\n idx = len(items)\n for i, (key, _) in enumerate(items):\n if key == field_name:\n idx = i + 1\n break\n\n items.insert(idx, (new_field_name, new_parameter))\n\n # Clear the original dictionary and update with the modified items\n build_config.clear()\n build_config.update(items)\n\n return build_config\n\n def update_providers_mapping(self):\n # If we don't have token or api_endpoint, we can't fetch the list of providers\n if not self.token or not self.api_endpoint:\n self.log(\"Astra DB token and API endpoint are required to fetch the list of Vectorize providers.\")\n\n return self.VECTORIZE_PROVIDERS_MAPPING\n\n try:\n self.log(\"Dynamically updating list of Vectorize providers.\")\n\n # Get the admin object\n client = DataAPIClient(token=self.token)\n admin = client.get_admin()\n\n # Get the embedding providers\n db_admin = admin.get_database_admin(self.api_endpoint)\n embedding_providers = db_admin.find_embedding_providers().as_dict()\n\n vectorize_providers_mapping = {}\n\n # Map the provider display name to the provider key and models\n for provider_key, provider_data in embedding_providers[\"embeddingProviders\"].items():\n display_name = provider_data[\"displayName\"]\n models = [model[\"name\"] for model in provider_data[\"models\"]]\n\n vectorize_providers_mapping[display_name] = [provider_key, models]\n\n # Sort the resulting dictionary\n return defaultdict(list, dict(sorted(vectorize_providers_mapping.items())))\n except Exception as e: # noqa: BLE001\n self.log(f\"Error fetching Vectorize providers: {e}\")\n\n return self.VECTORIZE_PROVIDERS_MAPPING\n\n def get_collection_options(self):\n client = DataAPIClient(token=self.token)\n\n database = client.get_database(\n self.api_endpoint,\n token=self.token,\n )\n\n collection = database.get_collection(self.collection_name)\n\n # Only get the options if the collection exists\n try:\n collection_options = collection.options()\n except CollectionNotFoundException as e:\n self.log(f\"Collection not found: {e}\")\n\n return None\n\n return collection_options.vector\n\n\n def update_build_config(self, build_config: dict, field_value: str, field_name: str | None = None):\n # Refresh the collection name options\n build_config[\"collection_name\"][\"options\"] = self._initialize_collection_options()\n\n # If the collection name is set to \"+ Create new collection\", show the advanced options\n if field_name == \"collection_name\" and field_value == \"+ Create new collection\":\n build_config[\"embedding_choice\"][\"advanced\"] = False\n build_config[\"embedding_choice\"][\"value\"] = \"Embedding Model\"\n\n new_parameter = StrInput(\n name=\"collection_name_new\",\n display_name=\"Collection Name\",\n required=True,\n ).to_dict()\n\n self.insert_in_dict(build_config, \"embedding_choice\", {\"collection_name_new\": new_parameter})\n\n new_parameter = HandleInput(\n name=\"embedding_model\",\n display_name=\"Embedding Model\",\n input_types=[\"Embeddings\"],\n info=\"Allows an embedding model configuration.\",\n ).to_dict()\n\n self.insert_in_dict(build_config, \"collection_name_new\", {\"embedding_model\": new_parameter})\n elif field_name == \"collection_name\" and field_value != \"+ Create new collection\":\n self.del_fields(build_config, [\"collection_name_new\"])\n\n # Get the collection options\n collection_options = self.get_collection_options()\n\n # If the collection options are available, show the advanced options\n if collection_options:\n build_config[\"embedding_choice\"][\"advanced\"] = True\n\n if collection_options.service:\n for input_field in [\n \"embedding_provider\",\n \"z_01_model_parameters\",\n \"z_02_api_key_name\",\n \"z_03_provider_api_key\",\n \"z_04_authentication\",\n ]:\n build_config[input_field][\"advanced\"] = False\n\n build_config[\"embedding_model\"][\"advanced\"] = True\n build_config[\"embedding_provider\"][\"advanced\"] = True\n build_config[\"embedding_choice\"][\"value\"] = \"Astra Vectorize\"\n build_config[\"embedding_provider\"][\"value\"] = collection_options.service.provider\n\n build_config[\"model\"][\"value\"] = collection_options.service.model_name\n build_config[\"z_01_model_parameters\"][\"value\"] = collection_options.service.parameters\n\n if collection_options.service.authentication:\n build_config[\"z_02_api_key_name\"][\"value\"] = (\n collection_options.service.authentication.get(\"providerKey\")\n )\n build_config[\"z_03_provider_api_key\"][\"value\"] = (\n collection_options.service.authentication.get(\"apiKey\")\n )\n build_config[\"z_04_authentication\"][\"value\"] = collection_options.service.authentication\n else:\n for input_field in [\n \"z_01_model_parameters\",\n \"z_02_api_key_name\",\n \"z_03_provider_api_key\",\n \"z_04_authentication\",\n ]:\n build_config[input_field][\"advanced\"] = True\n\n build_config[\"embedding_model\"][\"advanced\"] = False\n build_config[\"embedding_provider\"][\"advanced\"] = False\n build_config[\"embedding_choice\"][\"value\"] = \"Embedding Model\"\n\n new_parameter = HandleInput(\n name=\"embedding_model\",\n display_name=\"Embedding Model\",\n input_types=[\"Embeddings\"],\n info=\"Allows an embedding model configuration.\",\n ).to_dict()\n\n self.insert_in_dict(build_config, \"embedding_choice\", {\"embedding_model\": new_parameter})\n\n elif field_name == \"embedding_choice\":\n if field_value == \"Astra Vectorize\":\n self.del_fields(build_config, [\"embedding_model\"])\n\n # Update the providers mapping\n vectorize_providers = self.update_providers_mapping()\n\n new_parameter = DropdownInput(\n name=\"embedding_provider\",\n display_name=\"Embedding Provider\",\n options=vectorize_providers.keys(),\n value=\"\",\n required=True,\n real_time_refresh=True,\n ).to_dict()\n\n self.insert_in_dict(build_config, \"embedding_choice\", {\"embedding_provider\": new_parameter})\n else:\n self.del_fields(\n build_config,\n [\n \"embedding_provider\",\n \"model\",\n \"z_01_model_parameters\",\n \"z_02_api_key_name\",\n \"z_03_provider_api_key\",\n \"z_04_authentication\",\n ],\n )\n\n new_parameter = HandleInput(\n name=\"embedding_model\",\n display_name=\"Embedding Model\",\n input_types=[\"Embeddings\"],\n info=\"Allows an embedding model configuration.\",\n ).to_dict()\n\n self.insert_in_dict(build_config, \"embedding_choice\", {\"embedding_model\": new_parameter})\n\n elif field_name == \"embedding_provider\":\n self.del_fields(\n build_config,\n [\"model\", \"z_01_model_parameters\", \"z_02_api_key_name\", \"z_03_provider_api_key\", \"z_04_authentication\"],\n )\n\n # Update the providers mapping\n vectorize_providers = self.update_providers_mapping()\n model_options = vectorize_providers[field_value][1]\n\n new_parameter = DropdownInput(\n name=\"model\",\n display_name=\"Model\",\n info=\"The embedding model to use for the selected provider. Each provider has a different set of \"\n \"models available (full list at \"\n \"https://docs.datastax.com/en/astra-db-serverless/databases/embedding-generation.html):\\n\\n\"\n f\"{', '.join(model_options)}\",\n options=model_options,\n value=None,\n required=True,\n real_time_refresh=True,\n ).to_dict()\n\n self.insert_in_dict(build_config, \"embedding_provider\", {\"model\": new_parameter})\n\n elif field_name == \"model\":\n self.del_fields(\n build_config,\n [\"z_01_model_parameters\", \"z_02_api_key_name\", \"z_03_provider_api_key\", \"z_04_authentication\"],\n )\n\n new_parameter_1 = DictInput(\n name=\"z_01_model_parameters\",\n display_name=\"Model Parameters\",\n list=True,\n ).to_dict()\n\n new_parameter_2 = MessageTextInput(\n name=\"z_02_api_key_name\",\n display_name=\"API Key Name\",\n info=\"The name of the embeddings provider API key stored on Astra. \"\n \"If set, it will override the 'ProviderKey' in the authentication parameters.\",\n ).to_dict()\n\n new_parameter_3 = SecretStrInput(\n load_from_db=False,\n name=\"z_03_provider_api_key\",\n display_name=\"Provider API Key\",\n info=\"An alternative to the Astra Authentication that passes an API key for the provider \"\n \"with each request to Astra DB. \"\n \"This may be used when Vectorize is configured for the collection, \"\n \"but no corresponding provider secret is stored within Astra's key management system.\",\n ).to_dict()\n\n new_parameter_4 = DictInput(\n name=\"z_04_authentication\",\n display_name=\"Authentication Parameters\",\n list=True,\n ).to_dict()\n\n self.insert_in_dict(\n build_config,\n \"model\",\n {\n \"z_01_model_parameters\": new_parameter_1,\n \"z_02_api_key_name\": new_parameter_2,\n \"z_03_provider_api_key\": new_parameter_3,\n \"z_04_authentication\": new_parameter_4,\n },\n )\n\n return build_config\n\n def build_vectorize_options(self, **kwargs):\n for attribute in [\n \"embedding_provider\",\n \"model\",\n \"z_01_model_parameters\",\n \"z_02_api_key_name\",\n \"z_03_provider_api_key\",\n \"z_04_authentication\",\n ]:\n if not hasattr(self, attribute):\n setattr(self, attribute, None)\n\n # Fetch values from kwargs if any self.* attributes are None\n provider_mapping = self.update_providers_mapping()\n provider_value = provider_mapping.get(self.embedding_provider, [None])[0] or kwargs.get(\n \"embedding_provider\"\n )\n model_name = self.model or kwargs.get(\"model\")\n authentication = {**(self.z_04_authentication or {}), **kwargs.get(\"z_04_authentication\", {})}\n parameters = self.z_01_model_parameters or kwargs.get(\"z_01_model_parameters\", {})\n\n # Set the API key name if provided\n api_key_name = self.z_02_api_key_name or kwargs.get(\"z_02_api_key_name\")\n provider_key = self.z_03_provider_api_key or kwargs.get(\"z_03_provider_api_key\")\n if provider_key:\n authentication[\"providerKey\"] = provider_key.split(\".\")[0]\n if api_key_name:\n authentication[\"providerKey\"] = api_key_name.split(\".\")[0]\n\n # Set authentication and parameters to None if no values are provided\n if not authentication:\n authentication = None\n if not parameters:\n parameters = None\n\n return {\n # must match astrapy.info.CollectionVectorServiceOptions\n \"collection_vector_service_options\": {\n \"provider\": provider_value,\n \"modelName\": model_name,\n \"authentication\": authentication,\n \"parameters\": parameters,\n },\n \"collection_embedding_api_key\": provider_key,\n }\n\n @check_cached_vector_store\n def build_vector_store(self, vectorize_options=None):\n try:\n from langchain_astradb import AstraDBVectorStore\n from langchain_astradb.utils.astradb import SetupMode\n except ImportError as e:\n msg = (\n \"Could not import langchain Astra DB integration package. \"\n \"Please install it with `pip install langchain-astradb`.\"\n )\n raise ImportError(msg) from e\n\n try:\n if not self.setup_mode:\n self.setup_mode = self._inputs[\"setup_mode\"].options[0]\n\n setup_mode_value = SetupMode[self.setup_mode.upper()]\n except KeyError as e:\n msg = f\"Invalid setup mode: {self.setup_mode}\"\n raise ValueError(msg) from e\n\n if self.embedding_choice == \"Embedding Model\":\n embedding_dict = {\"embedding\": self.embedding_model}\n else:\n from astrapy.info import CollectionVectorServiceOptions\n\n # Grab the collection options if available\n collection_options = self.get_collection_options()\n\n # Ensure collection_options and its nested attributes are handled safely\n authentication = (\n getattr(self, \"z_04_authentication\", {})\n or (\n collection_options.service.authentication\n if collection_options and collection_options.service\n else {}\n )\n )\n\n # Build the vectorize options dictionary\n dict_options = vectorize_options or self.build_vectorize_options(\n embedding_provider=(\n getattr(self, \"embedding_provider\", None)\n or (\n collection_options.service.provider\n if collection_options and collection_options.service\n else None\n )\n ),\n model=(\n getattr(self, \"model\", None)\n or (\n collection_options.service.model_name\n if collection_options and collection_options.service\n else None\n )\n ),\n z_01_model_parameters=(\n getattr(self, \"z_01_model_parameters\", None)\n or (\n collection_options.service.parameters\n if collection_options and collection_options.service\n else None\n )\n ),\n z_02_api_key_name=(\n getattr(self, \"z_02_api_key_name\", None)\n or (authentication.get(\"apiKey\") if authentication else None)\n ),\n z_03_provider_api_key=(\n getattr(self, \"z_03_provider_api_key\", None)\n or (authentication.get(\"providerKey\") if authentication else None)\n ),\n z_04_authentication=authentication,\n )\n\n # Set the embedding dictionary\n embedding_dict = {\n \"collection_vector_service_options\": CollectionVectorServiceOptions.from_dict(\n dict_options.get(\"collection_vector_service_options\")\n ),\n \"collection_embedding_api_key\": dict_options.get(\"collection_embedding_api_key\"),\n }\n\n try:\n vector_store = AstraDBVectorStore(\n token=self.token,\n api_endpoint=self.api_endpoint,\n namespace=self.keyspace or None,\n collection_name=getattr(self, \"collection_name_new\", None) or self.collection_name,\n environment = (\n parse_api_endpoint(getattr(self, \"api_endpoint\", None)).environment\n if getattr(self, \"api_endpoint\", None)\n else None\n ),\n metric=self.metric or None,\n batch_size=self.batch_size or None,\n bulk_insert_batch_concurrency=self.bulk_insert_batch_concurrency or None,\n bulk_insert_overwrite_concurrency=self.bulk_insert_overwrite_concurrency or None,\n bulk_delete_concurrency=self.bulk_delete_concurrency or None,\n setup_mode=setup_mode_value,\n pre_delete_collection=self.pre_delete_collection,\n metadata_indexing_include=[s for s in self.metadata_indexing_include if s] or None,\n metadata_indexing_exclude=[s for s in self.metadata_indexing_exclude if s] or None,\n collection_indexing_policy=orjson.dumps(self.collection_indexing_policy)\n if self.collection_indexing_policy\n else None,\n **embedding_dict,\n )\n except Exception as e:\n msg = f\"Error initializing AstraDBVectorStore: {e}\"\n raise ValueError(msg) from e\n\n self._add_documents_to_vector_store(vector_store)\n\n return vector_store\n\n def _add_documents_to_vector_store(self, vector_store) -> None:\n documents = []\n for _input in self.ingest_data or []:\n if isinstance(_input, Data):\n documents.append(_input.to_lc_document())\n else:\n msg = \"Vector Store Inputs must be Data objects.\"\n raise TypeError(msg)\n\n if documents:\n self.log(f\"Adding {len(documents)} documents to the Vector Store.\")\n try:\n vector_store.add_documents(documents)\n except Exception as e:\n msg = f\"Error adding documents to AstraDBVectorStore: {e}\"\n raise ValueError(msg) from e\n else:\n self.log(\"No documents to add to the Vector Store.\")\n\n def _map_search_type(self) -> str:\n if self.search_type == \"Similarity with score threshold\":\n return \"similarity_score_threshold\"\n if self.search_type == \"MMR (Max Marginal Relevance)\":\n return \"mmr\"\n return \"similarity\"\n\n def _build_search_args(self):\n query = self.search_input if isinstance(self.search_input, str) and self.search_input.strip() else None\n search_filter = (\n {k: v for k, v in self.search_filter.items() if k and v and k.strip()} if self.search_filter else None\n )\n\n if query:\n args = {\n \"query\": query,\n \"search_type\": self._map_search_type(),\n \"k\": self.number_of_results,\n \"score_threshold\": self.search_score_threshold,\n }\n elif self.advanced_search_filter or search_filter:\n args = {\n \"n\": self.number_of_results,\n }\n else:\n return {}\n\n filter_arg = self.advanced_search_filter or {}\n\n if search_filter:\n self.log(self.log(f\"`search_filter` is deprecated. Use `advanced_search_filter`. Cleaned: {search_filter}\"))\n filter_arg.update(search_filter)\n\n if filter_arg:\n args[\"filter\"] = filter_arg\n\n return args\n\n def search_documents(self, vector_store=None) -> list[Data]:\n vector_store = vector_store or self.build_vector_store()\n\n self.log(f\"Search input: {self.search_input}\")\n self.log(f\"Search type: {self.search_type}\")\n self.log(f\"Number of results: {self.number_of_results}\")\n\n try:\n search_args = self._build_search_args()\n except Exception as e:\n msg = f\"Error in AstraDBVectorStore._build_search_args: {e}\"\n raise ValueError(msg) from e\n\n if not search_args:\n self.log(\"No search input or filters provided. Skipping search.\")\n return []\n\n docs = []\n search_method = \"search\" if \"query\" in search_args else \"metadata_search\"\n\n try:\n self.log(f\"Calling vector_store.{search_method} with args: {search_args}\")\n docs = getattr(vector_store, search_method)(**search_args)\n except Exception as e:\n msg = f\"Error performing {search_method} in AstraDBVectorStore: {e}\"\n raise ValueError(msg) from e\n\n self.log(f\"Retrieved documents: {len(docs)}\")\n\n data = docs_to_data(docs)\n self.log(f\"Converted documents to data: {len(data)}\")\n self.status = data\n return data\n\n def get_retriever_kwargs(self):\n search_args = self._build_search_args()\n return {\n \"search_type\": self._map_search_type(),\n \"search_kwargs\": search_args,\n }\n" - }, - "collection_indexing_policy": { - "_input_type": "StrInput", - "advanced": true, - "display_name": "Collection Indexing Policy", - "dynamic": false, - "info": "Optional JSON string for the \"indexing\" field of the collection. See https://docs.datastax.com/en/astra-db-serverless/api-reference/collections.html#the-indexing-option", - "list": false, - "load_from_db": false, - "name": "collection_indexing_policy", - "placeholder": "", - "required": false, - "show": true, - "title_case": false, - "trace_as_metadata": true, - "type": "str", - "value": "" - }, - "collection_name": { - "_input_type": "DropdownInput", - "advanced": false, - "combobox": false, - "display_name": "Collection", - "dynamic": false, - "info": "The name of the collection within Astra DB where the vectors will be stored.", - "name": "collection_name", - "options": [], - "placeholder": "", - "real_time_refresh": true, - "refresh_button": true, - "required": true, - "show": true, - "title_case": false, - "tool_mode": false, - "trace_as_metadata": true, - "type": "str", - "value": "" - }, - "embedding_choice": { - "_input_type": "DropdownInput", - "advanced": false, - "combobox": false, - "display_name": "Embedding Model or Astra Vectorize", - "dynamic": false, - "info": "Determines whether to use Astra Vectorize for the collection.", - "name": "embedding_choice", - "options": [ - "Embedding Model", - "Astra Vectorize" - ], - "placeholder": "", - "real_time_refresh": true, - "required": false, - "show": true, - "title_case": false, - "tool_mode": false, - "trace_as_metadata": true, - "type": "str", - "value": "Embedding Model" - }, - "embedding_model": { - "_input_type": "HandleInput", - "advanced": false, - "display_name": "Embedding Model", - "dynamic": false, - "info": "Allows an embedding model configuration.", - "input_types": [ - "Embeddings" - ], - "list": false, - "name": "embedding_model", - "placeholder": "", - "required": false, - "show": true, - "title_case": false, - "trace_as_metadata": true, - "type": "other", - "value": "" - }, - "ingest_data": { - "_input_type": "DataInput", - "advanced": false, - "display_name": "Ingest Data", - "dynamic": false, - "info": "", - "input_types": [ - "Data" - ], - "list": false, - "name": "ingest_data", - "placeholder": "", - "required": false, - "show": true, - "title_case": false, - "tool_mode": false, - "trace_as_input": true, - "trace_as_metadata": true, - "type": "other", - "value": "" - }, - "keyspace": { - "_input_type": "StrInput", - "advanced": true, - "display_name": "Keyspace", - "dynamic": false, - "info": "Optional keyspace within Astra DB to use for the collection.", - "list": false, - "load_from_db": false, - "name": "keyspace", - "placeholder": "", - "required": false, - "show": true, - "title_case": false, - "trace_as_metadata": true, - "type": "str", - "value": "" - }, - "metadata_indexing_exclude": { - "_input_type": "StrInput", - "advanced": true, - "display_name": "Metadata Indexing Exclude", - "dynamic": false, - "info": "Optional list of metadata fields to exclude from the indexing.", - "list": true, - "load_from_db": false, - "name": "metadata_indexing_exclude", - "placeholder": "", - "required": false, - "show": true, - "title_case": false, - "trace_as_metadata": true, - "type": "str", - "value": "" - }, - "metadata_indexing_include": { - "_input_type": "StrInput", - "advanced": true, - "display_name": "Metadata Indexing Include", - "dynamic": false, - "info": "Optional list of metadata fields to include in the indexing.", - "list": true, - "load_from_db": false, - "name": "metadata_indexing_include", - "placeholder": "", - "required": false, - "show": true, - "title_case": false, - "trace_as_metadata": true, - "type": "str", - "value": "" - }, - "metric": { - "_input_type": "DropdownInput", - "advanced": true, - "combobox": false, - "display_name": "Metric", - "dynamic": false, - "info": "Optional distance metric for vector comparisons in the vector store.", - "name": "metric", - "options": [ - "cosine", - "dot_product", - "euclidean" - ], - "placeholder": "", - "required": false, - "show": true, - "title_case": false, - "tool_mode": false, - "trace_as_metadata": true, - "type": "str", - "value": "cosine" - }, - "number_of_results": { - "_input_type": "IntInput", - "advanced": true, - "display_name": "Number of Results", - "dynamic": false, - "info": "Number of results to return.", - "list": false, - "name": "number_of_results", - "placeholder": "", - "required": false, - "show": true, - "title_case": false, - "trace_as_metadata": true, - "type": "int", - "value": 4 - }, - "pre_delete_collection": { - "_input_type": "BoolInput", - "advanced": true, - "display_name": "Pre Delete Collection", - "dynamic": false, - "info": "Boolean flag to determine whether to delete the collection before creating a new one.", - "list": false, - "name": "pre_delete_collection", - "placeholder": "", - "required": false, - "show": true, - "title_case": false, - "trace_as_metadata": true, - "type": "bool", - "value": false - }, - "search_filter": { - "_input_type": "DictInput", - "advanced": true, - "display_name": "[DEPRECATED] Search Metadata Filter", - "dynamic": false, - "info": "Deprecated: use advanced_search_filter. Optional dictionary of filters to apply to the search query.", - "list": true, - "name": "search_filter", - "placeholder": "", - "required": false, - "show": true, - "title_case": false, - "trace_as_input": true, - "type": "dict", - "value": {} - }, - "search_input": { - "_input_type": "MultilineInput", - "advanced": false, - "display_name": "Search Input", - "dynamic": false, - "info": "", - "input_types": [ - "Message" - ], - "list": false, - "load_from_db": false, - "multiline": true, - "name": "search_input", - "placeholder": "", - "required": false, - "show": true, - "title_case": false, - "tool_mode": false, - "trace_as_input": true, - "trace_as_metadata": true, - "type": "str", - "value": "" - }, - "search_score_threshold": { - "_input_type": "FloatInput", - "advanced": true, - "display_name": "Search Score Threshold", - "dynamic": false, - "info": "Minimum similarity score threshold for search results. (when using 'Similarity with score threshold')", - "list": false, - "name": "search_score_threshold", - "placeholder": "", - "required": false, - "show": true, - "title_case": false, - "trace_as_metadata": true, - "type": "float", - "value": 0 - }, - "search_type": { - "_input_type": "DropdownInput", - "advanced": true, - "combobox": false, - "display_name": "Search Type", - "dynamic": false, - "info": "Search type to use", - "name": "search_type", - "options": [ - "Similarity", - "Similarity with score threshold", - "MMR (Max Marginal Relevance)" - ], - "placeholder": "", - "required": false, - "show": true, - "title_case": false, - "tool_mode": false, - "trace_as_metadata": true, - "type": "str", - "value": "Similarity" - }, - "setup_mode": { - "_input_type": "DropdownInput", - "advanced": true, - "combobox": false, - "display_name": "Setup Mode", - "dynamic": false, - "info": "Configuration mode for setting up the vector store, with options like 'Sync' or 'Off'.", - "name": "setup_mode", - "options": [ - "Sync", - "Off" - ], - "placeholder": "", - "required": false, - "show": true, - "title_case": false, - "tool_mode": false, - "trace_as_metadata": true, - "type": "str", - "value": "Sync" - }, - "token": { - "_input_type": "SecretStrInput", - "advanced": false, - "display_name": "Astra DB Application Token", - "dynamic": false, - "info": "Authentication token for accessing Astra DB.", - "input_types": [ - "Message" - ], - "load_from_db": true, - "name": "token", - "password": true, - "placeholder": "", - "required": true, - "show": true, - "title_case": false, - "type": "str", - "value": "ASTRA_DB_APPLICATION_TOKEN" - } - }, - "tool_mode": false - }, - "type": "AstraDB" - }, - "dragging": false, - "height": 749, - "id": "AstraDB-3buPx", - "position": { - "x": 1225.8151138573664, - "y": 369.2727294042354 - }, - "positionAbsolute": { - "x": 1225.8151138573664, - "y": 369.2727294042354 - }, - "selected": false, - "type": "genericNode", - "width": 320 - }, - { - "data": { - "id": "OpenAIEmbeddings-CeoV9", + "id": "OpenAIEmbeddings-bD6An", "node": { "base_classes": [ "Embeddings" @@ -2267,7 +1732,7 @@ "frozen": false, "icon": "OpenAI", "legacy": false, - "lf_version": "1.0.19.post2", + "lf_version": "1.1.1", "metadata": {}, "output_types": [], "outputs": [ @@ -2704,8 +2169,8 @@ "type": "OpenAIEmbeddings" }, "dragging": false, - "height": 322, - "id": "OpenAIEmbeddings-CeoV9", + "height": 320, + "id": "OpenAIEmbeddings-bD6An", "position": { "x": 825.435626932521, "y": 739.6327999745448 @@ -2720,544 +2185,7 @@ }, { "data": { - "id": "AstraDB-laybz", - "node": { - "base_classes": [ - "Data", - "Retriever" - ], - "beta": false, - "conditional_paths": [], - "custom_fields": {}, - "description": "Implementation of Vector Store using Astra DB with search capabilities", - "display_name": "Astra DB", - "documentation": "https://docs.langflow.org/starter-projects-vector-store-rag", - "edited": false, - "field_order": [ - "token", - "api_endpoint", - "collection_name", - "search_input", - "ingest_data", - "namespace", - "embedding_choice", - "embedding_model", - "metric", - "batch_size", - "bulk_insert_batch_concurrency", - "bulk_insert_overwrite_concurrency", - "bulk_delete_concurrency", - "setup_mode", - "pre_delete_collection", - "metadata_indexing_include", - "metadata_indexing_exclude", - "collection_indexing_policy", - "number_of_results", - "search_type", - "search_score_threshold", - "search_filter" - ], - "frozen": false, - "icon": "AstraDB", - "legacy": false, - "lf_version": "1.0.19.post2", - "metadata": {}, - "output_types": [], - "outputs": [ - { - "cache": true, - "display_name": "Retriever", - "method": "build_base_retriever", - "name": "base_retriever", - "required_inputs": [], - "selected": "Retriever", - "types": [ - "Retriever" - ], - "value": "__UNDEFINED__" - }, - { - "cache": true, - "display_name": "Search Results", - "method": "search_documents", - "name": "search_results", - "required_inputs": [ - "api_endpoint", - "collection_name", - "token" - ], - "selected": "Data", - "types": [ - "Data" - ], - "value": "__UNDEFINED__" - } - ], - "pinned": false, - "template": { - "_type": "Component", - "advanced_search_filter": { - "_input_type": "NestedDictInput", - "advanced": true, - "display_name": "Search Metadata Filter", - "dynamic": false, - "info": "Optional dictionary of filters to apply to the search query.", - "list": false, - "name": "advanced_search_filter", - "placeholder": "", - "required": false, - "show": true, - "title_case": false, - "trace_as_input": true, - "trace_as_metadata": true, - "type": "NestedDict", - "value": {} - }, - "api_endpoint": { - "_input_type": "SecretStrInput", - "advanced": false, - "display_name": "API Endpoint", - "dynamic": false, - "info": "API endpoint URL for the Astra DB service.", - "input_types": [ - "Message" - ], - "load_from_db": true, - "name": "api_endpoint", - "password": true, - "placeholder": "", - "required": true, - "show": true, - "title_case": false, - "type": "str", - "value": "ASTRA_DB_API_ENDPOINT" - }, - "batch_size": { - "_input_type": "IntInput", - "advanced": true, - "display_name": "Batch Size", - "dynamic": false, - "info": "Optional number of data to process in a single batch.", - "list": false, - "name": "batch_size", - "placeholder": "", - "required": false, - "show": true, - "title_case": false, - "trace_as_metadata": true, - "type": "int", - "value": "" - }, - "bulk_delete_concurrency": { - "_input_type": "IntInput", - "advanced": true, - "display_name": "Bulk Delete Concurrency", - "dynamic": false, - "info": "Optional concurrency level for bulk delete operations.", - "list": false, - "name": "bulk_delete_concurrency", - "placeholder": "", - "required": false, - "show": true, - "title_case": false, - "trace_as_metadata": true, - "type": "int", - "value": "" - }, - "bulk_insert_batch_concurrency": { - "_input_type": "IntInput", - "advanced": true, - "display_name": "Bulk Insert Batch Concurrency", - "dynamic": false, - "info": "Optional concurrency level for bulk insert operations.", - "list": false, - "name": "bulk_insert_batch_concurrency", - "placeholder": "", - "required": false, - "show": true, - "title_case": false, - "trace_as_metadata": true, - "type": "int", - "value": "" - }, - "bulk_insert_overwrite_concurrency": { - "_input_type": "IntInput", - "advanced": true, - "display_name": "Bulk Insert Overwrite Concurrency", - "dynamic": false, - "info": "Optional concurrency level for bulk insert operations that overwrite existing data.", - "list": false, - "name": "bulk_insert_overwrite_concurrency", - "placeholder": "", - "required": false, - "show": true, - "title_case": false, - "trace_as_metadata": true, - "type": "int", - "value": "" - }, - "code": { - "advanced": true, - "dynamic": true, - "fileTypes": [], - "file_path": "", - "info": "", - "list": false, - "load_from_db": false, - "multiline": true, - "name": "code", - "password": false, - "placeholder": "", - "required": true, - "show": true, - "title_case": false, - "type": "code", - "value": "import os\nfrom collections import defaultdict\n\nimport orjson\nfrom astrapy import DataAPIClient\nfrom astrapy.admin import parse_api_endpoint\nfrom astrapy.exceptions import CollectionNotFoundException\nfrom langchain_astradb import AstraDBVectorStore\n\nfrom langflow.base.vectorstores.model import LCVectorStoreComponent, check_cached_vector_store\nfrom langflow.helpers import docs_to_data\nfrom langflow.inputs import DictInput, FloatInput, MessageTextInput, NestedDictInput\nfrom langflow.io import (\n BoolInput,\n DataInput,\n DropdownInput,\n HandleInput,\n IntInput,\n MultilineInput,\n SecretStrInput,\n StrInput,\n)\nfrom langflow.schema import Data\n\n\nclass AstraDBVectorStoreComponent(LCVectorStoreComponent):\n display_name: str = \"Astra DB\"\n description: str = \"Implementation of Vector Store using Astra DB with search capabilities\"\n documentation: str = \"https://docs.langflow.org/starter-projects-vector-store-rag\"\n name = \"AstraDB\"\n icon: str = \"AstraDB\"\n\n _cached_vector_store: AstraDBVectorStore | None = None\n\n def list_collections(self):\n client = DataAPIClient(token=self.token)\n\n database = client.get_database(\n self.api_endpoint,\n token=self.token,\n )\n\n return database.list_collections()\n\n def _initialize_collection_options(self):\n try:\n collections = [collection.name for collection in self.list_collections()]\n except (CollectionNotFoundException, ConnectionError, ValueError) as _:\n collections = []\n\n return [*collections, \"+ Create new collection\"]\n\n VECTORIZE_PROVIDERS_MAPPING = defaultdict(\n list,\n {\n \"Azure OpenAI\": [\n \"azureOpenAI\",\n [\"text-embedding-3-small\", \"text-embedding-3-large\", \"text-embedding-ada-002\"],\n ],\n \"Hugging Face - Dedicated\": [\"huggingfaceDedicated\", [\"endpoint-defined-model\"]],\n \"Hugging Face - Serverless\": [\n \"huggingface\",\n [\n \"sentence-transformers/all-MiniLM-L6-v2\",\n \"intfloat/multilingual-e5-large\",\n \"intfloat/multilingual-e5-large-instruct\",\n \"BAAI/bge-small-en-v1.5\",\n \"BAAI/bge-base-en-v1.5\",\n \"BAAI/bge-large-en-v1.5\",\n ],\n ],\n \"Jina AI\": [\n \"jinaAI\",\n [\n \"jina-embeddings-v2-base-en\",\n \"jina-embeddings-v2-base-de\",\n \"jina-embeddings-v2-base-es\",\n \"jina-embeddings-v2-base-code\",\n \"jina-embeddings-v2-base-zh\",\n ],\n ],\n \"Mistral AI\": [\"mistral\", [\"mistral-embed\"]],\n \"Nvidia\": [\"nvidia\", [\"NV-Embed-QA\"]],\n \"OpenAI\": [\"openai\", [\"text-embedding-3-small\", \"text-embedding-3-large\", \"text-embedding-ada-002\"]],\n \"Upstage\": [\"upstageAI\", [\"solar-embedding-1-large\"]],\n \"Voyage AI\": [\n \"voyageAI\",\n [\"voyage-large-2-instruct\", \"voyage-law-2\", \"voyage-code-2\", \"voyage-large-2\", \"voyage-2\"],\n ],\n },\n )\n\n inputs = [\n SecretStrInput(\n name=\"token\",\n display_name=\"Astra DB Application Token\",\n info=\"Authentication token for accessing Astra DB.\",\n value=\"ASTRA_DB_APPLICATION_TOKEN\",\n required=True,\n advanced=os.getenv(\"ASTRA_ENHANCED\", \"false\").lower() == \"true\",\n ),\n SecretStrInput(\n name=\"api_endpoint\",\n display_name=\"Database\" if os.getenv(\"ASTRA_ENHANCED\", \"false\").lower() == \"true\" else \"API Endpoint\",\n info=\"API endpoint URL for the Astra DB service.\",\n value=\"ASTRA_DB_API_ENDPOINT\",\n required=True,\n ),\n DropdownInput(\n name=\"collection_name\",\n display_name=\"Collection\",\n info=\"The name of the collection within Astra DB where the vectors will be stored.\",\n required=True,\n real_time_refresh=True,\n refresh_button=True,\n options=[],\n ),\n StrInput(\n name=\"keyspace\",\n display_name=\"Keyspace\",\n info=\"Optional keyspace within Astra DB to use for the collection.\",\n advanced=True,\n ),\n MultilineInput(\n name=\"search_input\",\n display_name=\"Search Input\",\n ),\n IntInput(\n name=\"number_of_results\",\n display_name=\"Number of Results\",\n info=\"Number of results to return.\",\n advanced=True,\n value=4,\n ),\n DropdownInput(\n name=\"search_type\",\n display_name=\"Search Type\",\n info=\"Search type to use\",\n options=[\"Similarity\", \"Similarity with score threshold\", \"MMR (Max Marginal Relevance)\"],\n value=\"Similarity\",\n advanced=True,\n ),\n FloatInput(\n name=\"search_score_threshold\",\n display_name=\"Search Score Threshold\",\n info=\"Minimum similarity score threshold for search results. \"\n \"(when using 'Similarity with score threshold')\",\n value=0,\n advanced=True,\n ),\n NestedDictInput(\n name=\"advanced_search_filter\",\n display_name=\"Search Metadata Filter\",\n info=\"Optional dictionary of filters to apply to the search query.\",\n advanced=True,\n ),\n DictInput(\n name=\"search_filter\",\n display_name=\"[DEPRECATED] Search Metadata Filter\",\n info=\"Deprecated: use advanced_search_filter. Optional dictionary of filters to apply to the search query.\",\n advanced=True,\n list=True,\n ),\n DataInput(\n name=\"ingest_data\",\n display_name=\"Ingest Data\",\n ),\n DropdownInput(\n name=\"embedding_choice\",\n display_name=\"Embedding Model or Astra Vectorize\",\n info=\"Determines whether to use Astra Vectorize for the collection.\",\n options=[\"Embedding Model\", \"Astra Vectorize\"],\n real_time_refresh=True,\n value=\"Embedding Model\",\n ),\n HandleInput(\n name=\"embedding_model\",\n display_name=\"Embedding Model\",\n input_types=[\"Embeddings\"],\n info=\"Allows an embedding model configuration.\",\n ),\n DropdownInput(\n name=\"metric\",\n display_name=\"Metric\",\n info=\"Optional distance metric for vector comparisons in the vector store.\",\n options=[\"cosine\", \"dot_product\", \"euclidean\"],\n value=\"cosine\",\n advanced=True,\n ),\n IntInput(\n name=\"batch_size\",\n display_name=\"Batch Size\",\n info=\"Optional number of data to process in a single batch.\",\n advanced=True,\n ),\n IntInput(\n name=\"bulk_insert_batch_concurrency\",\n display_name=\"Bulk Insert Batch Concurrency\",\n info=\"Optional concurrency level for bulk insert operations.\",\n advanced=True,\n ),\n IntInput(\n name=\"bulk_insert_overwrite_concurrency\",\n display_name=\"Bulk Insert Overwrite Concurrency\",\n info=\"Optional concurrency level for bulk insert operations that overwrite existing data.\",\n advanced=True,\n ),\n IntInput(\n name=\"bulk_delete_concurrency\",\n display_name=\"Bulk Delete Concurrency\",\n info=\"Optional concurrency level for bulk delete operations.\",\n advanced=True,\n ),\n DropdownInput(\n name=\"setup_mode\",\n display_name=\"Setup Mode\",\n info=\"Configuration mode for setting up the vector store, with options like 'Sync' or 'Off'.\",\n options=[\"Sync\", \"Off\"],\n advanced=True,\n value=\"Sync\",\n ),\n BoolInput(\n name=\"pre_delete_collection\",\n display_name=\"Pre Delete Collection\",\n info=\"Boolean flag to determine whether to delete the collection before creating a new one.\",\n advanced=True,\n ),\n StrInput(\n name=\"metadata_indexing_include\",\n display_name=\"Metadata Indexing Include\",\n info=\"Optional list of metadata fields to include in the indexing.\",\n list=True,\n advanced=True,\n ),\n StrInput(\n name=\"metadata_indexing_exclude\",\n display_name=\"Metadata Indexing Exclude\",\n info=\"Optional list of metadata fields to exclude from the indexing.\",\n list=True,\n advanced=True,\n ),\n StrInput(\n name=\"collection_indexing_policy\",\n display_name=\"Collection Indexing Policy\",\n info='Optional JSON string for the \"indexing\" field of the collection. '\n \"See https://docs.datastax.com/en/astra-db-serverless/api-reference/collections.html#the-indexing-option\",\n advanced=True,\n ),\n ]\n\n def del_fields(self, build_config, field_list):\n for field in field_list:\n if field in build_config:\n del build_config[field]\n\n return build_config\n\n def insert_in_dict(self, build_config, field_name, new_parameters):\n # Insert the new key-value pair after the found key\n for new_field_name, new_parameter in new_parameters.items():\n # Get all the items as a list of tuples (key, value)\n items = list(build_config.items())\n\n # Find the index of the key to insert after\n idx = len(items)\n for i, (key, _) in enumerate(items):\n if key == field_name:\n idx = i + 1\n break\n\n items.insert(idx, (new_field_name, new_parameter))\n\n # Clear the original dictionary and update with the modified items\n build_config.clear()\n build_config.update(items)\n\n return build_config\n\n def update_providers_mapping(self):\n # If we don't have token or api_endpoint, we can't fetch the list of providers\n if not self.token or not self.api_endpoint:\n self.log(\"Astra DB token and API endpoint are required to fetch the list of Vectorize providers.\")\n\n return self.VECTORIZE_PROVIDERS_MAPPING\n\n try:\n self.log(\"Dynamically updating list of Vectorize providers.\")\n\n # Get the admin object\n client = DataAPIClient(token=self.token)\n admin = client.get_admin()\n\n # Get the embedding providers\n db_admin = admin.get_database_admin(self.api_endpoint)\n embedding_providers = db_admin.find_embedding_providers().as_dict()\n\n vectorize_providers_mapping = {}\n\n # Map the provider display name to the provider key and models\n for provider_key, provider_data in embedding_providers[\"embeddingProviders\"].items():\n display_name = provider_data[\"displayName\"]\n models = [model[\"name\"] for model in provider_data[\"models\"]]\n\n vectorize_providers_mapping[display_name] = [provider_key, models]\n\n # Sort the resulting dictionary\n return defaultdict(list, dict(sorted(vectorize_providers_mapping.items())))\n except Exception as e: # noqa: BLE001\n self.log(f\"Error fetching Vectorize providers: {e}\")\n\n return self.VECTORIZE_PROVIDERS_MAPPING\n\n def get_collection_options(self):\n client = DataAPIClient(token=self.token)\n\n database = client.get_database(\n self.api_endpoint,\n token=self.token,\n )\n\n collection = database.get_collection(self.collection_name)\n\n # Only get the options if the collection exists\n try:\n collection_options = collection.options()\n except CollectionNotFoundException as e:\n self.log(f\"Collection not found: {e}\")\n\n return None\n\n return collection_options.vector\n\n\n def update_build_config(self, build_config: dict, field_value: str, field_name: str | None = None):\n # Refresh the collection name options\n build_config[\"collection_name\"][\"options\"] = self._initialize_collection_options()\n\n # If the collection name is set to \"+ Create new collection\", show the advanced options\n if field_name == \"collection_name\" and field_value == \"+ Create new collection\":\n build_config[\"embedding_choice\"][\"advanced\"] = False\n build_config[\"embedding_choice\"][\"value\"] = \"Embedding Model\"\n\n new_parameter = StrInput(\n name=\"collection_name_new\",\n display_name=\"Collection Name\",\n required=True,\n ).to_dict()\n\n self.insert_in_dict(build_config, \"embedding_choice\", {\"collection_name_new\": new_parameter})\n\n new_parameter = HandleInput(\n name=\"embedding_model\",\n display_name=\"Embedding Model\",\n input_types=[\"Embeddings\"],\n info=\"Allows an embedding model configuration.\",\n ).to_dict()\n\n self.insert_in_dict(build_config, \"collection_name_new\", {\"embedding_model\": new_parameter})\n elif field_name == \"collection_name\" and field_value != \"+ Create new collection\":\n self.del_fields(build_config, [\"collection_name_new\"])\n\n # Get the collection options\n collection_options = self.get_collection_options()\n\n # If the collection options are available, show the advanced options\n if collection_options:\n build_config[\"embedding_choice\"][\"advanced\"] = True\n\n if collection_options.service:\n for input_field in [\n \"embedding_provider\",\n \"z_01_model_parameters\",\n \"z_02_api_key_name\",\n \"z_03_provider_api_key\",\n \"z_04_authentication\",\n ]:\n build_config[input_field][\"advanced\"] = False\n\n build_config[\"embedding_model\"][\"advanced\"] = True\n build_config[\"embedding_provider\"][\"advanced\"] = True\n build_config[\"embedding_choice\"][\"value\"] = \"Astra Vectorize\"\n build_config[\"embedding_provider\"][\"value\"] = collection_options.service.provider\n\n build_config[\"model\"][\"value\"] = collection_options.service.model_name\n build_config[\"z_01_model_parameters\"][\"value\"] = collection_options.service.parameters\n\n if collection_options.service.authentication:\n build_config[\"z_02_api_key_name\"][\"value\"] = (\n collection_options.service.authentication.get(\"providerKey\")\n )\n build_config[\"z_03_provider_api_key\"][\"value\"] = (\n collection_options.service.authentication.get(\"apiKey\")\n )\n build_config[\"z_04_authentication\"][\"value\"] = collection_options.service.authentication\n else:\n for input_field in [\n \"z_01_model_parameters\",\n \"z_02_api_key_name\",\n \"z_03_provider_api_key\",\n \"z_04_authentication\",\n ]:\n build_config[input_field][\"advanced\"] = True\n\n build_config[\"embedding_model\"][\"advanced\"] = False\n build_config[\"embedding_provider\"][\"advanced\"] = False\n build_config[\"embedding_choice\"][\"value\"] = \"Embedding Model\"\n\n new_parameter = HandleInput(\n name=\"embedding_model\",\n display_name=\"Embedding Model\",\n input_types=[\"Embeddings\"],\n info=\"Allows an embedding model configuration.\",\n ).to_dict()\n\n self.insert_in_dict(build_config, \"embedding_choice\", {\"embedding_model\": new_parameter})\n\n elif field_name == \"embedding_choice\":\n if field_value == \"Astra Vectorize\":\n self.del_fields(build_config, [\"embedding_model\"])\n\n # Update the providers mapping\n vectorize_providers = self.update_providers_mapping()\n\n new_parameter = DropdownInput(\n name=\"embedding_provider\",\n display_name=\"Embedding Provider\",\n options=vectorize_providers.keys(),\n value=\"\",\n required=True,\n real_time_refresh=True,\n ).to_dict()\n\n self.insert_in_dict(build_config, \"embedding_choice\", {\"embedding_provider\": new_parameter})\n else:\n self.del_fields(\n build_config,\n [\n \"embedding_provider\",\n \"model\",\n \"z_01_model_parameters\",\n \"z_02_api_key_name\",\n \"z_03_provider_api_key\",\n \"z_04_authentication\",\n ],\n )\n\n new_parameter = HandleInput(\n name=\"embedding_model\",\n display_name=\"Embedding Model\",\n input_types=[\"Embeddings\"],\n info=\"Allows an embedding model configuration.\",\n ).to_dict()\n\n self.insert_in_dict(build_config, \"embedding_choice\", {\"embedding_model\": new_parameter})\n\n elif field_name == \"embedding_provider\":\n self.del_fields(\n build_config,\n [\"model\", \"z_01_model_parameters\", \"z_02_api_key_name\", \"z_03_provider_api_key\", \"z_04_authentication\"],\n )\n\n # Update the providers mapping\n vectorize_providers = self.update_providers_mapping()\n model_options = vectorize_providers[field_value][1]\n\n new_parameter = DropdownInput(\n name=\"model\",\n display_name=\"Model\",\n info=\"The embedding model to use for the selected provider. Each provider has a different set of \"\n \"models available (full list at \"\n \"https://docs.datastax.com/en/astra-db-serverless/databases/embedding-generation.html):\\n\\n\"\n f\"{', '.join(model_options)}\",\n options=model_options,\n value=None,\n required=True,\n real_time_refresh=True,\n ).to_dict()\n\n self.insert_in_dict(build_config, \"embedding_provider\", {\"model\": new_parameter})\n\n elif field_name == \"model\":\n self.del_fields(\n build_config,\n [\"z_01_model_parameters\", \"z_02_api_key_name\", \"z_03_provider_api_key\", \"z_04_authentication\"],\n )\n\n new_parameter_1 = DictInput(\n name=\"z_01_model_parameters\",\n display_name=\"Model Parameters\",\n list=True,\n ).to_dict()\n\n new_parameter_2 = MessageTextInput(\n name=\"z_02_api_key_name\",\n display_name=\"API Key Name\",\n info=\"The name of the embeddings provider API key stored on Astra. \"\n \"If set, it will override the 'ProviderKey' in the authentication parameters.\",\n ).to_dict()\n\n new_parameter_3 = SecretStrInput(\n load_from_db=False,\n name=\"z_03_provider_api_key\",\n display_name=\"Provider API Key\",\n info=\"An alternative to the Astra Authentication that passes an API key for the provider \"\n \"with each request to Astra DB. \"\n \"This may be used when Vectorize is configured for the collection, \"\n \"but no corresponding provider secret is stored within Astra's key management system.\",\n ).to_dict()\n\n new_parameter_4 = DictInput(\n name=\"z_04_authentication\",\n display_name=\"Authentication Parameters\",\n list=True,\n ).to_dict()\n\n self.insert_in_dict(\n build_config,\n \"model\",\n {\n \"z_01_model_parameters\": new_parameter_1,\n \"z_02_api_key_name\": new_parameter_2,\n \"z_03_provider_api_key\": new_parameter_3,\n \"z_04_authentication\": new_parameter_4,\n },\n )\n\n return build_config\n\n def build_vectorize_options(self, **kwargs):\n for attribute in [\n \"embedding_provider\",\n \"model\",\n \"z_01_model_parameters\",\n \"z_02_api_key_name\",\n \"z_03_provider_api_key\",\n \"z_04_authentication\",\n ]:\n if not hasattr(self, attribute):\n setattr(self, attribute, None)\n\n # Fetch values from kwargs if any self.* attributes are None\n provider_mapping = self.update_providers_mapping()\n provider_value = provider_mapping.get(self.embedding_provider, [None])[0] or kwargs.get(\n \"embedding_provider\"\n )\n model_name = self.model or kwargs.get(\"model\")\n authentication = {**(self.z_04_authentication or {}), **kwargs.get(\"z_04_authentication\", {})}\n parameters = self.z_01_model_parameters or kwargs.get(\"z_01_model_parameters\", {})\n\n # Set the API key name if provided\n api_key_name = self.z_02_api_key_name or kwargs.get(\"z_02_api_key_name\")\n provider_key = self.z_03_provider_api_key or kwargs.get(\"z_03_provider_api_key\")\n if provider_key:\n authentication[\"providerKey\"] = provider_key.split(\".\")[0]\n if api_key_name:\n authentication[\"providerKey\"] = api_key_name.split(\".\")[0]\n\n # Set authentication and parameters to None if no values are provided\n if not authentication:\n authentication = None\n if not parameters:\n parameters = None\n\n return {\n # must match astrapy.info.CollectionVectorServiceOptions\n \"collection_vector_service_options\": {\n \"provider\": provider_value,\n \"modelName\": model_name,\n \"authentication\": authentication,\n \"parameters\": parameters,\n },\n \"collection_embedding_api_key\": provider_key,\n }\n\n @check_cached_vector_store\n def build_vector_store(self, vectorize_options=None):\n try:\n from langchain_astradb import AstraDBVectorStore\n from langchain_astradb.utils.astradb import SetupMode\n except ImportError as e:\n msg = (\n \"Could not import langchain Astra DB integration package. \"\n \"Please install it with `pip install langchain-astradb`.\"\n )\n raise ImportError(msg) from e\n\n try:\n if not self.setup_mode:\n self.setup_mode = self._inputs[\"setup_mode\"].options[0]\n\n setup_mode_value = SetupMode[self.setup_mode.upper()]\n except KeyError as e:\n msg = f\"Invalid setup mode: {self.setup_mode}\"\n raise ValueError(msg) from e\n\n if self.embedding_choice == \"Embedding Model\":\n embedding_dict = {\"embedding\": self.embedding_model}\n else:\n from astrapy.info import CollectionVectorServiceOptions\n\n # Grab the collection options if available\n collection_options = self.get_collection_options()\n\n # Ensure collection_options and its nested attributes are handled safely\n authentication = (\n getattr(self, \"z_04_authentication\", {})\n or (\n collection_options.service.authentication\n if collection_options and collection_options.service\n else {}\n )\n )\n\n # Build the vectorize options dictionary\n dict_options = vectorize_options or self.build_vectorize_options(\n embedding_provider=(\n getattr(self, \"embedding_provider\", None)\n or (\n collection_options.service.provider\n if collection_options and collection_options.service\n else None\n )\n ),\n model=(\n getattr(self, \"model\", None)\n or (\n collection_options.service.model_name\n if collection_options and collection_options.service\n else None\n )\n ),\n z_01_model_parameters=(\n getattr(self, \"z_01_model_parameters\", None)\n or (\n collection_options.service.parameters\n if collection_options and collection_options.service\n else None\n )\n ),\n z_02_api_key_name=(\n getattr(self, \"z_02_api_key_name\", None)\n or (authentication.get(\"apiKey\") if authentication else None)\n ),\n z_03_provider_api_key=(\n getattr(self, \"z_03_provider_api_key\", None)\n or (authentication.get(\"providerKey\") if authentication else None)\n ),\n z_04_authentication=authentication,\n )\n\n # Set the embedding dictionary\n embedding_dict = {\n \"collection_vector_service_options\": CollectionVectorServiceOptions.from_dict(\n dict_options.get(\"collection_vector_service_options\")\n ),\n \"collection_embedding_api_key\": dict_options.get(\"collection_embedding_api_key\"),\n }\n\n try:\n vector_store = AstraDBVectorStore(\n token=self.token,\n api_endpoint=self.api_endpoint,\n namespace=self.keyspace or None,\n collection_name=getattr(self, \"collection_name_new\", None) or self.collection_name,\n environment = (\n parse_api_endpoint(getattr(self, \"api_endpoint\", None)).environment\n if getattr(self, \"api_endpoint\", None)\n else None\n ),\n metric=self.metric or None,\n batch_size=self.batch_size or None,\n bulk_insert_batch_concurrency=self.bulk_insert_batch_concurrency or None,\n bulk_insert_overwrite_concurrency=self.bulk_insert_overwrite_concurrency or None,\n bulk_delete_concurrency=self.bulk_delete_concurrency or None,\n setup_mode=setup_mode_value,\n pre_delete_collection=self.pre_delete_collection,\n metadata_indexing_include=[s for s in self.metadata_indexing_include if s] or None,\n metadata_indexing_exclude=[s for s in self.metadata_indexing_exclude if s] or None,\n collection_indexing_policy=orjson.dumps(self.collection_indexing_policy)\n if self.collection_indexing_policy\n else None,\n **embedding_dict,\n )\n except Exception as e:\n msg = f\"Error initializing AstraDBVectorStore: {e}\"\n raise ValueError(msg) from e\n\n self._add_documents_to_vector_store(vector_store)\n\n return vector_store\n\n def _add_documents_to_vector_store(self, vector_store) -> None:\n documents = []\n for _input in self.ingest_data or []:\n if isinstance(_input, Data):\n documents.append(_input.to_lc_document())\n else:\n msg = \"Vector Store Inputs must be Data objects.\"\n raise TypeError(msg)\n\n if documents:\n self.log(f\"Adding {len(documents)} documents to the Vector Store.\")\n try:\n vector_store.add_documents(documents)\n except Exception as e:\n msg = f\"Error adding documents to AstraDBVectorStore: {e}\"\n raise ValueError(msg) from e\n else:\n self.log(\"No documents to add to the Vector Store.\")\n\n def _map_search_type(self) -> str:\n if self.search_type == \"Similarity with score threshold\":\n return \"similarity_score_threshold\"\n if self.search_type == \"MMR (Max Marginal Relevance)\":\n return \"mmr\"\n return \"similarity\"\n\n def _build_search_args(self):\n query = self.search_input if isinstance(self.search_input, str) and self.search_input.strip() else None\n search_filter = (\n {k: v for k, v in self.search_filter.items() if k and v and k.strip()} if self.search_filter else None\n )\n\n if query:\n args = {\n \"query\": query,\n \"search_type\": self._map_search_type(),\n \"k\": self.number_of_results,\n \"score_threshold\": self.search_score_threshold,\n }\n elif self.advanced_search_filter or search_filter:\n args = {\n \"n\": self.number_of_results,\n }\n else:\n return {}\n\n filter_arg = self.advanced_search_filter or {}\n\n if search_filter:\n self.log(self.log(f\"`search_filter` is deprecated. Use `advanced_search_filter`. Cleaned: {search_filter}\"))\n filter_arg.update(search_filter)\n\n if filter_arg:\n args[\"filter\"] = filter_arg\n\n return args\n\n def search_documents(self, vector_store=None) -> list[Data]:\n vector_store = vector_store or self.build_vector_store()\n\n self.log(f\"Search input: {self.search_input}\")\n self.log(f\"Search type: {self.search_type}\")\n self.log(f\"Number of results: {self.number_of_results}\")\n\n try:\n search_args = self._build_search_args()\n except Exception as e:\n msg = f\"Error in AstraDBVectorStore._build_search_args: {e}\"\n raise ValueError(msg) from e\n\n if not search_args:\n self.log(\"No search input or filters provided. Skipping search.\")\n return []\n\n docs = []\n search_method = \"search\" if \"query\" in search_args else \"metadata_search\"\n\n try:\n self.log(f\"Calling vector_store.{search_method} with args: {search_args}\")\n docs = getattr(vector_store, search_method)(**search_args)\n except Exception as e:\n msg = f\"Error performing {search_method} in AstraDBVectorStore: {e}\"\n raise ValueError(msg) from e\n\n self.log(f\"Retrieved documents: {len(docs)}\")\n\n data = docs_to_data(docs)\n self.log(f\"Converted documents to data: {len(data)}\")\n self.status = data\n return data\n\n def get_retriever_kwargs(self):\n search_args = self._build_search_args()\n return {\n \"search_type\": self._map_search_type(),\n \"search_kwargs\": search_args,\n }\n" - }, - "collection_indexing_policy": { - "_input_type": "StrInput", - "advanced": true, - "display_name": "Collection Indexing Policy", - "dynamic": false, - "info": "Optional JSON string for the \"indexing\" field of the collection. See https://docs.datastax.com/en/astra-db-serverless/api-reference/collections.html#the-indexing-option", - "list": false, - "load_from_db": false, - "name": "collection_indexing_policy", - "placeholder": "", - "required": false, - "show": true, - "title_case": false, - "trace_as_metadata": true, - "type": "str", - "value": "" - }, - "collection_name": { - "_input_type": "DropdownInput", - "advanced": false, - "combobox": false, - "display_name": "Collection", - "dynamic": false, - "info": "The name of the collection within Astra DB where the vectors will be stored.", - "name": "collection_name", - "options": [], - "placeholder": "", - "real_time_refresh": true, - "refresh_button": true, - "required": true, - "show": true, - "title_case": false, - "tool_mode": false, - "trace_as_metadata": true, - "type": "str", - "value": "" - }, - "embedding_choice": { - "_input_type": "DropdownInput", - "advanced": false, - "combobox": false, - "display_name": "Embedding Model or Astra Vectorize", - "dynamic": false, - "info": "Determines whether to use Astra Vectorize for the collection.", - "name": "embedding_choice", - "options": [ - "Embedding Model", - "Astra Vectorize" - ], - "placeholder": "", - "real_time_refresh": true, - "required": false, - "show": true, - "title_case": false, - "tool_mode": false, - "trace_as_metadata": true, - "type": "str", - "value": "Embedding Model" - }, - "embedding_model": { - "_input_type": "HandleInput", - "advanced": false, - "display_name": "Embedding Model", - "dynamic": false, - "info": "Allows an embedding model configuration.", - "input_types": [ - "Embeddings" - ], - "list": false, - "name": "embedding_model", - "placeholder": "", - "required": false, - "show": true, - "title_case": false, - "trace_as_metadata": true, - "type": "other", - "value": "" - }, - "ingest_data": { - "_input_type": "DataInput", - "advanced": false, - "display_name": "Ingest Data", - "dynamic": false, - "info": "", - "input_types": [ - "Data" - ], - "list": false, - "name": "ingest_data", - "placeholder": "", - "required": false, - "show": true, - "title_case": false, - "tool_mode": false, - "trace_as_input": true, - "trace_as_metadata": true, - "type": "other", - "value": "" - }, - "keyspace": { - "_input_type": "StrInput", - "advanced": true, - "display_name": "Keyspace", - "dynamic": false, - "info": "Optional keyspace within Astra DB to use for the collection.", - "list": false, - "load_from_db": false, - "name": "keyspace", - "placeholder": "", - "required": false, - "show": true, - "title_case": false, - "trace_as_metadata": true, - "type": "str", - "value": "" - }, - "metadata_indexing_exclude": { - "_input_type": "StrInput", - "advanced": true, - "display_name": "Metadata Indexing Exclude", - "dynamic": false, - "info": "Optional list of metadata fields to exclude from the indexing.", - "list": true, - "load_from_db": false, - "name": "metadata_indexing_exclude", - "placeholder": "", - "required": false, - "show": true, - "title_case": false, - "trace_as_metadata": true, - "type": "str", - "value": "" - }, - "metadata_indexing_include": { - "_input_type": "StrInput", - "advanced": true, - "display_name": "Metadata Indexing Include", - "dynamic": false, - "info": "Optional list of metadata fields to include in the indexing.", - "list": true, - "load_from_db": false, - "name": "metadata_indexing_include", - "placeholder": "", - "required": false, - "show": true, - "title_case": false, - "trace_as_metadata": true, - "type": "str", - "value": "" - }, - "metric": { - "_input_type": "DropdownInput", - "advanced": true, - "combobox": false, - "display_name": "Metric", - "dynamic": false, - "info": "Optional distance metric for vector comparisons in the vector store.", - "name": "metric", - "options": [ - "cosine", - "dot_product", - "euclidean" - ], - "placeholder": "", - "required": false, - "show": true, - "title_case": false, - "tool_mode": false, - "trace_as_metadata": true, - "type": "str", - "value": "cosine" - }, - "number_of_results": { - "_input_type": "IntInput", - "advanced": true, - "display_name": "Number of Results", - "dynamic": false, - "info": "Number of results to return.", - "list": false, - "name": "number_of_results", - "placeholder": "", - "required": false, - "show": true, - "title_case": false, - "trace_as_metadata": true, - "type": "int", - "value": 4 - }, - "pre_delete_collection": { - "_input_type": "BoolInput", - "advanced": true, - "display_name": "Pre Delete Collection", - "dynamic": false, - "info": "Boolean flag to determine whether to delete the collection before creating a new one.", - "list": false, - "name": "pre_delete_collection", - "placeholder": "", - "required": false, - "show": true, - "title_case": false, - "trace_as_metadata": true, - "type": "bool", - "value": false - }, - "search_filter": { - "_input_type": "DictInput", - "advanced": true, - "display_name": "[DEPRECATED] Search Metadata Filter", - "dynamic": false, - "info": "Deprecated: use advanced_search_filter. Optional dictionary of filters to apply to the search query.", - "list": true, - "name": "search_filter", - "placeholder": "", - "required": false, - "show": true, - "title_case": false, - "trace_as_input": true, - "type": "dict", - "value": {} - }, - "search_input": { - "_input_type": "MultilineInput", - "advanced": false, - "display_name": "Search Input", - "dynamic": false, - "info": "", - "input_types": [ - "Message" - ], - "list": false, - "load_from_db": false, - "multiline": true, - "name": "search_input", - "placeholder": "", - "required": false, - "show": true, - "title_case": false, - "tool_mode": false, - "trace_as_input": true, - "trace_as_metadata": true, - "type": "str", - "value": "" - }, - "search_score_threshold": { - "_input_type": "FloatInput", - "advanced": true, - "display_name": "Search Score Threshold", - "dynamic": false, - "info": "Minimum similarity score threshold for search results. (when using 'Similarity with score threshold')", - "list": false, - "name": "search_score_threshold", - "placeholder": "", - "required": false, - "show": true, - "title_case": false, - "trace_as_metadata": true, - "type": "float", - "value": 0 - }, - "search_type": { - "_input_type": "DropdownInput", - "advanced": true, - "combobox": false, - "display_name": "Search Type", - "dynamic": false, - "info": "Search type to use", - "name": "search_type", - "options": [ - "Similarity", - "Similarity with score threshold", - "MMR (Max Marginal Relevance)" - ], - "placeholder": "", - "required": false, - "show": true, - "title_case": false, - "tool_mode": false, - "trace_as_metadata": true, - "type": "str", - "value": "Similarity" - }, - "setup_mode": { - "_input_type": "DropdownInput", - "advanced": true, - "combobox": false, - "display_name": "Setup Mode", - "dynamic": false, - "info": "Configuration mode for setting up the vector store, with options like 'Sync' or 'Off'.", - "name": "setup_mode", - "options": [ - "Sync", - "Off" - ], - "placeholder": "", - "required": false, - "show": true, - "title_case": false, - "tool_mode": false, - "trace_as_metadata": true, - "type": "str", - "value": "Sync" - }, - "token": { - "_input_type": "SecretStrInput", - "advanced": false, - "display_name": "Astra DB Application Token", - "dynamic": false, - "info": "Authentication token for accessing Astra DB.", - "input_types": [ - "Message" - ], - "load_from_db": true, - "name": "token", - "password": true, - "placeholder": "", - "required": true, - "show": true, - "title_case": false, - "type": "str", - "value": "ASTRA_DB_APPLICATION_TOKEN" - } - }, - "tool_mode": false - }, - "type": "AstraDB" - }, - "dragging": false, - "height": 749, - "id": "AstraDB-laybz", - "position": { - "x": 2090.491421890006, - "y": 1351.6194724621473 - }, - "positionAbsolute": { - "x": 2090.491421890006, - "y": 1351.6194724621473 - }, - "selected": false, - "type": "genericNode", - "width": 320 - }, - { - "data": { - "id": "note-igpjN", + "id": "note-hfjIh", "node": { "description": "## 📚 1. Load Data Flow\n\nRun this first! Load data from a local file and embed it into the vector database.\n\nSelect a Database and a Collection, or create new ones. \n\nClick ▶️ **Run component** on the **Astra DB** component to load your data.\n\n* If you're using OSS Langflow, add your Astra DB Application Token to the Astra DB component.\n\n#### Next steps:\n Experiment by changing the prompt and the contextual data to see how the retrieval flow's responses change.", "display_name": "", @@ -3269,7 +2197,8 @@ "type": "note" }, "dragging": false, - "id": "note-igpjN", + "height": 50, + "id": "note-hfjIh", "position": { "x": 955.3277857006676, "y": 1552.171191793604 @@ -3283,11 +2212,12 @@ "height": 50, "width": 325 }, - "type": "noteNode" + "type": "noteNode", + "width": 325 }, { "data": { - "id": "OpenAIEmbeddings-ANgku", + "id": "OpenAIEmbeddings-Qtui3", "node": { "base_classes": [ "Embeddings" @@ -3325,7 +2255,7 @@ "frozen": false, "icon": "OpenAI", "legacy": false, - "lf_version": "1.0.19.post2", + "lf_version": "1.1.1", "metadata": {}, "output_types": [], "outputs": [ @@ -3762,8 +2692,8 @@ "type": "OpenAIEmbeddings" }, "dragging": false, - "height": 322, - "id": "OpenAIEmbeddings-ANgku", + "height": 320, + "id": "OpenAIEmbeddings-Qtui3", "position": { "x": 1690.9220896443658, "y": 1866.483269483266 @@ -3778,7 +2708,7 @@ }, { "data": { - "id": "File-FJIuH", + "id": "File-4yIsn", "node": { "base_classes": [ "Data" @@ -3799,6 +2729,7 @@ "frozen": false, "icon": "file-text", "legacy": false, + "lf_version": "1.1.1", "metadata": {}, "output_types": [], "outputs": [ @@ -3873,11 +2804,12 @@ "advanced": false, "display_name": "Server File Path", "dynamic": false, - "info": "Data object with a 'file_path' property pointing to server file. Supercedes 'Path'. ", + "info": "Data object with a 'file_path' property pointing to server file or a Message object with a path to the file. Supercedes 'Path' but supports same file types.", "input_types": [ - "Data" + "Data", + "Message" ], - "list": false, + "list": true, "name": "file_path", "placeholder": "", "required": false, @@ -3949,7 +2881,7 @@ "bz2", "gz" ], - "file_path": "", + "file_path": "cc5608e0-9b81-4c93-ba28-05c2b743b3b4/2024-12-02_11-44-25_1706.03762v7.pdf", "info": "Supported file extensions: txt, md, mdx, csv, json, yaml, yml, xml, html, htm, pdf, docx, py, sh, sql, js, ts, tsx; optionally bundled in file extensions: zip, tar, tgz, bz2, gz", "list": false, "name": "path", @@ -3959,7 +2891,7 @@ "title_case": false, "trace_as_metadata": true, "type": "file", - "value": "" + "value": "attention.pdf" }, "silent_errors": { "_input_type": "BoolInput", @@ -3999,8 +2931,8 @@ "type": "File" }, "dragging": false, - "height": 232, - "id": "File-FJIuH", + "height": 367, + "id": "File-4yIsn", "position": { "x": 1318.9043936921921, "y": 1486.3263312921847 @@ -4015,7 +2947,7 @@ }, { "data": { - "id": "note-O3TZQ", + "id": "note-tJJ9t", "node": { "description": "### 💡 Add your OpenAI API key here 👇", "display_name": "", @@ -4028,7 +2960,7 @@ }, "dragging": false, "height": 324, - "id": "note-O3TZQ", + "id": "note-tJJ9t", "position": { "x": 1692.2322233423606, "y": 1821.9077961087607 @@ -4043,7 +2975,7 @@ }, { "data": { - "id": "note-M7loH", + "id": "note-fANlD", "node": { "description": "### 💡 Add your OpenAI API key here 👇", "display_name": "", @@ -4056,7 +2988,7 @@ }, "dragging": false, "height": 324, - "id": "note-M7loH", + "id": "note-fANlD", "position": { "x": 824.1003268813427, "y": 698.6951695764802 @@ -4071,7 +3003,7 @@ }, { "data": { - "id": "note-26oaj", + "id": "note-94kiM", "node": { "description": "### 💡 Add your OpenAI API key here 👇", "display_name": "", @@ -4084,7 +3016,7 @@ }, "dragging": false, "height": 324, - "id": "note-26oaj", + "id": "note-94kiM", "position": { "x": 2350.297636215281, "y": 525.0687902842766 @@ -4096,21 +3028,1135 @@ "selected": false, "type": "noteNode", "width": 324 + }, + { + "data": { + "id": "AstraDB-zXOmg", + "node": { + "base_classes": [ + "Data", + "Retriever" + ], + "beta": false, + "conditional_paths": [], + "custom_fields": {}, + "description": "Implementation of Vector Store using Astra DB with search capabilities", + "display_name": "Astra DB", + "documentation": "https://docs.langflow.org/starter-projects-vector-store-rag", + "edited": false, + "field_order": [ + "token", + "api_endpoint", + "collection_name", + "collection_name_new", + "keyspace", + "search_input", + "number_of_results", + "search_type", + "search_score_threshold", + "advanced_search_filter", + "search_filter", + "ingest_data", + "embedding_choice", + "embedding_model", + "metric", + "batch_size", + "bulk_insert_batch_concurrency", + "bulk_insert_overwrite_concurrency", + "bulk_delete_concurrency", + "setup_mode", + "pre_delete_collection", + "metadata_indexing_include", + "metadata_indexing_exclude", + "collection_indexing_policy" + ], + "frozen": false, + "icon": "AstraDB", + "legacy": false, + "lf_version": "1.1.1", + "metadata": {}, + "output_types": [], + "outputs": [ + { + "cache": true, + "display_name": "Retriever", + "method": "build_base_retriever", + "name": "base_retriever", + "required_inputs": [], + "selected": "Retriever", + "types": [ + "Retriever" + ], + "value": "__UNDEFINED__" + }, + { + "cache": true, + "display_name": "Search Results", + "method": "search_documents", + "name": "search_results", + "required_inputs": [ + "api_endpoint", + "collection_name", + "token" + ], + "selected": "Data", + "types": [ + "Data" + ], + "value": "__UNDEFINED__" + } + ], + "pinned": false, + "template": { + "_type": "Component", + "advanced_search_filter": { + "_input_type": "NestedDictInput", + "advanced": true, + "display_name": "Search Metadata Filter", + "dynamic": false, + "info": "Optional dictionary of filters to apply to the search query.", + "list": false, + "name": "advanced_search_filter", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "trace_as_input": true, + "trace_as_metadata": true, + "type": "NestedDict", + "value": {} + }, + "api_endpoint": { + "_input_type": "SecretStrInput", + "advanced": false, + "display_name": "API Endpoint", + "dynamic": false, + "info": "API endpoint URL for the Astra DB service.", + "input_types": [ + "Message" + ], + "load_from_db": true, + "name": "api_endpoint", + "password": true, + "placeholder": "", + "required": true, + "show": true, + "title_case": false, + "type": "str", + "value": "ASTRA_DB_API_ENDPOINT" + }, + "batch_size": { + "_input_type": "IntInput", + "advanced": true, + "display_name": "Batch Size", + "dynamic": false, + "info": "Optional number of data to process in a single batch.", + "list": false, + "name": "batch_size", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "trace_as_metadata": true, + "type": "int", + "value": "" + }, + "bulk_delete_concurrency": { + "_input_type": "IntInput", + "advanced": true, + "display_name": "Bulk Delete Concurrency", + "dynamic": false, + "info": "Optional concurrency level for bulk delete operations.", + "list": false, + "name": "bulk_delete_concurrency", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "trace_as_metadata": true, + "type": "int", + "value": "" + }, + "bulk_insert_batch_concurrency": { + "_input_type": "IntInput", + "advanced": true, + "display_name": "Bulk Insert Batch Concurrency", + "dynamic": false, + "info": "Optional concurrency level for bulk insert operations.", + "list": false, + "name": "bulk_insert_batch_concurrency", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "trace_as_metadata": true, + "type": "int", + "value": "" + }, + "bulk_insert_overwrite_concurrency": { + "_input_type": "IntInput", + "advanced": true, + "display_name": "Bulk Insert Overwrite Concurrency", + "dynamic": false, + "info": "Optional concurrency level for bulk insert operations that overwrite existing data.", + "list": false, + "name": "bulk_insert_overwrite_concurrency", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "trace_as_metadata": true, + "type": "int", + "value": "" + }, + "code": { + "advanced": true, + "dynamic": true, + "fileTypes": [], + "file_path": "", + "info": "", + "list": false, + "load_from_db": false, + "multiline": true, + "name": "code", + "password": false, + "placeholder": "", + "required": true, + "show": true, + "title_case": false, + "type": "code", + "value": "import os\nfrom collections import defaultdict\n\nimport orjson\nfrom astrapy import DataAPIClient\nfrom astrapy.admin import parse_api_endpoint\nfrom astrapy.exceptions import CollectionNotFoundException\nfrom langchain_astradb import AstraDBVectorStore\n\nfrom langflow.base.vectorstores.model import LCVectorStoreComponent, check_cached_vector_store\nfrom langflow.helpers import docs_to_data\nfrom langflow.inputs import DictInput, FloatInput, MessageTextInput, NestedDictInput\nfrom langflow.io import (\n BoolInput,\n DataInput,\n DropdownInput,\n HandleInput,\n IntInput,\n MultilineInput,\n SecretStrInput,\n StrInput,\n)\nfrom langflow.schema import Data\n\n\nclass AstraDBVectorStoreComponent(LCVectorStoreComponent):\n display_name: str = \"Astra DB\"\n description: str = \"Implementation of Vector Store using Astra DB with search capabilities\"\n documentation: str = \"https://docs.langflow.org/starter-projects-vector-store-rag\"\n name = \"AstraDB\"\n icon: str = \"AstraDB\"\n\n _cached_vector_store: AstraDBVectorStore | None = None\n\n def list_collections(self):\n client = DataAPIClient(token=self.token)\n\n database = client.get_database(\n self.api_endpoint,\n token=self.token,\n )\n\n return database.list_collections()\n\n def _initialize_collection_options(self):\n try:\n collections = [collection.name for collection in self.list_collections()]\n except (CollectionNotFoundException, ConnectionError, ValueError) as _:\n collections = []\n\n return [*collections, \"+ Create new collection\"]\n\n VECTORIZE_PROVIDERS_MAPPING = defaultdict(\n list,\n {\n \"Azure OpenAI\": [\n \"azureOpenAI\",\n [\"text-embedding-3-small\", \"text-embedding-3-large\", \"text-embedding-ada-002\"],\n ],\n \"Hugging Face - Dedicated\": [\"huggingfaceDedicated\", [\"endpoint-defined-model\"]],\n \"Hugging Face - Serverless\": [\n \"huggingface\",\n [\n \"sentence-transformers/all-MiniLM-L6-v2\",\n \"intfloat/multilingual-e5-large\",\n \"intfloat/multilingual-e5-large-instruct\",\n \"BAAI/bge-small-en-v1.5\",\n \"BAAI/bge-base-en-v1.5\",\n \"BAAI/bge-large-en-v1.5\",\n ],\n ],\n \"Jina AI\": [\n \"jinaAI\",\n [\n \"jina-embeddings-v2-base-en\",\n \"jina-embeddings-v2-base-de\",\n \"jina-embeddings-v2-base-es\",\n \"jina-embeddings-v2-base-code\",\n \"jina-embeddings-v2-base-zh\",\n ],\n ],\n \"Mistral AI\": [\"mistral\", [\"mistral-embed\"]],\n \"Nvidia\": [\"nvidia\", [\"NV-Embed-QA\"]],\n \"OpenAI\": [\"openai\", [\"text-embedding-3-small\", \"text-embedding-3-large\", \"text-embedding-ada-002\"]],\n \"Upstage\": [\"upstageAI\", [\"solar-embedding-1-large\"]],\n \"Voyage AI\": [\n \"voyageAI\",\n [\"voyage-large-2-instruct\", \"voyage-law-2\", \"voyage-code-2\", \"voyage-large-2\", \"voyage-2\"],\n ],\n },\n )\n\n inputs = [\n SecretStrInput(\n name=\"token\",\n display_name=\"Astra DB Application Token\",\n info=\"Authentication token for accessing Astra DB.\",\n value=\"ASTRA_DB_APPLICATION_TOKEN\",\n required=True,\n advanced=os.getenv(\"ASTRA_ENHANCED\", \"false\").lower() == \"true\",\n ),\n SecretStrInput(\n name=\"api_endpoint\",\n display_name=\"Database\" if os.getenv(\"ASTRA_ENHANCED\", \"false\").lower() == \"true\" else \"API Endpoint\",\n info=\"API endpoint URL for the Astra DB service.\",\n value=\"ASTRA_DB_API_ENDPOINT\",\n required=True,\n ),\n DropdownInput(\n name=\"collection_name\",\n display_name=\"Collection\",\n info=\"The name of the collection within Astra DB where the vectors will be stored.\",\n required=True,\n real_time_refresh=True,\n refresh_button=True,\n options=[\"+ Create new collection\"],\n value=\"+ Create new collection\",\n ),\n StrInput(\n name=\"collection_name_new\",\n display_name=\"Collection Name\",\n info=\"Name of the new collection to create.\",\n advanced=False,\n required=True,\n ),\n StrInput(\n name=\"keyspace\",\n display_name=\"Keyspace\",\n info=\"Optional keyspace within Astra DB to use for the collection.\",\n advanced=True,\n ),\n MultilineInput(\n name=\"search_input\",\n display_name=\"Search Input\",\n ),\n IntInput(\n name=\"number_of_results\",\n display_name=\"Number of Results\",\n info=\"Number of results to return.\",\n advanced=True,\n value=4,\n ),\n DropdownInput(\n name=\"search_type\",\n display_name=\"Search Type\",\n info=\"Search type to use\",\n options=[\"Similarity\", \"Similarity with score threshold\", \"MMR (Max Marginal Relevance)\"],\n value=\"Similarity\",\n advanced=True,\n ),\n FloatInput(\n name=\"search_score_threshold\",\n display_name=\"Search Score Threshold\",\n info=\"Minimum similarity score threshold for search results. \"\n \"(when using 'Similarity with score threshold')\",\n value=0,\n advanced=True,\n ),\n NestedDictInput(\n name=\"advanced_search_filter\",\n display_name=\"Search Metadata Filter\",\n info=\"Optional dictionary of filters to apply to the search query.\",\n advanced=True,\n ),\n DictInput(\n name=\"search_filter\",\n display_name=\"[DEPRECATED] Search Metadata Filter\",\n info=\"Deprecated: use advanced_search_filter. Optional dictionary of filters to apply to the search query.\",\n advanced=True,\n list=True,\n ),\n DataInput(\n name=\"ingest_data\",\n display_name=\"Ingest Data\",\n ),\n DropdownInput(\n name=\"embedding_choice\",\n display_name=\"Embedding Model or Astra Vectorize\",\n info=\"Determines whether to use Astra Vectorize for the collection.\",\n options=[\"Embedding Model\", \"Astra Vectorize\"],\n real_time_refresh=True,\n value=\"Embedding Model\",\n ),\n HandleInput(\n name=\"embedding_model\",\n display_name=\"Embedding Model\",\n input_types=[\"Embeddings\"],\n info=\"Allows an embedding model configuration.\",\n ),\n DropdownInput(\n name=\"metric\",\n display_name=\"Metric\",\n info=\"Optional distance metric for vector comparisons in the vector store.\",\n options=[\"cosine\", \"dot_product\", \"euclidean\"],\n value=\"cosine\",\n advanced=True,\n ),\n IntInput(\n name=\"batch_size\",\n display_name=\"Batch Size\",\n info=\"Optional number of data to process in a single batch.\",\n advanced=True,\n ),\n IntInput(\n name=\"bulk_insert_batch_concurrency\",\n display_name=\"Bulk Insert Batch Concurrency\",\n info=\"Optional concurrency level for bulk insert operations.\",\n advanced=True,\n ),\n IntInput(\n name=\"bulk_insert_overwrite_concurrency\",\n display_name=\"Bulk Insert Overwrite Concurrency\",\n info=\"Optional concurrency level for bulk insert operations that overwrite existing data.\",\n advanced=True,\n ),\n IntInput(\n name=\"bulk_delete_concurrency\",\n display_name=\"Bulk Delete Concurrency\",\n info=\"Optional concurrency level for bulk delete operations.\",\n advanced=True,\n ),\n DropdownInput(\n name=\"setup_mode\",\n display_name=\"Setup Mode\",\n info=\"Configuration mode for setting up the vector store, with options like 'Sync' or 'Off'.\",\n options=[\"Sync\", \"Off\"],\n advanced=True,\n value=\"Sync\",\n ),\n BoolInput(\n name=\"pre_delete_collection\",\n display_name=\"Pre Delete Collection\",\n info=\"Boolean flag to determine whether to delete the collection before creating a new one.\",\n advanced=True,\n ),\n StrInput(\n name=\"metadata_indexing_include\",\n display_name=\"Metadata Indexing Include\",\n info=\"Optional list of metadata fields to include in the indexing.\",\n list=True,\n advanced=True,\n ),\n StrInput(\n name=\"metadata_indexing_exclude\",\n display_name=\"Metadata Indexing Exclude\",\n info=\"Optional list of metadata fields to exclude from the indexing.\",\n list=True,\n advanced=True,\n ),\n StrInput(\n name=\"collection_indexing_policy\",\n display_name=\"Collection Indexing Policy\",\n info='Optional JSON string for the \"indexing\" field of the collection. '\n \"See https://docs.datastax.com/en/astra-db-serverless/api-reference/collections.html#the-indexing-option\",\n advanced=True,\n ),\n ]\n\n def del_fields(self, build_config, field_list):\n for field in field_list:\n if field in build_config:\n del build_config[field]\n\n return build_config\n\n def insert_in_dict(self, build_config, field_name, new_parameters):\n # Insert the new key-value pair after the found key\n for new_field_name, new_parameter in new_parameters.items():\n # Get all the items as a list of tuples (key, value)\n items = list(build_config.items())\n\n # Find the index of the key to insert after\n idx = len(items)\n for i, (key, _) in enumerate(items):\n if key == field_name:\n idx = i + 1\n break\n\n items.insert(idx, (new_field_name, new_parameter))\n\n # Clear the original dictionary and update with the modified items\n build_config.clear()\n build_config.update(items)\n\n return build_config\n\n def update_providers_mapping(self):\n # If we don't have token or api_endpoint, we can't fetch the list of providers\n if not self.token or not self.api_endpoint:\n self.log(\"Astra DB token and API endpoint are required to fetch the list of Vectorize providers.\")\n\n return self.VECTORIZE_PROVIDERS_MAPPING\n\n try:\n self.log(\"Dynamically updating list of Vectorize providers.\")\n\n # Get the admin object\n client = DataAPIClient(token=self.token)\n admin = client.get_admin()\n\n # Get the embedding providers\n db_admin = admin.get_database_admin(self.api_endpoint)\n embedding_providers = db_admin.find_embedding_providers().as_dict()\n\n vectorize_providers_mapping = {}\n\n # Map the provider display name to the provider key and models\n for provider_key, provider_data in embedding_providers[\"embeddingProviders\"].items():\n display_name = provider_data[\"displayName\"]\n models = [model[\"name\"] for model in provider_data[\"models\"]]\n\n vectorize_providers_mapping[display_name] = [provider_key, models]\n\n # Sort the resulting dictionary\n return defaultdict(list, dict(sorted(vectorize_providers_mapping.items())))\n except Exception as e: # noqa: BLE001\n self.log(f\"Error fetching Vectorize providers: {e}\")\n\n return self.VECTORIZE_PROVIDERS_MAPPING\n\n def get_collection_options(self):\n client = DataAPIClient(token=self.token)\n\n database = client.get_database(\n self.api_endpoint,\n token=self.token,\n )\n\n collection = database.get_collection(self.collection_name)\n\n # Only get the options if the collection exists\n try:\n collection_options = collection.options()\n except CollectionNotFoundException as e:\n self.log(f\"Collection not found: {e}\")\n\n return None\n\n return collection_options.vector\n\n def update_build_config(self, build_config: dict, field_value: str, field_name: str | None = None):\n # Refresh the collection name options\n build_config[\"collection_name\"][\"options\"] = self._initialize_collection_options()\n\n # If the collection name is set to \"+ Create new collection\", show the advanced options\n if field_name == \"collection_name\" and field_value == \"+ Create new collection\":\n build_config[\"embedding_choice\"][\"advanced\"] = False\n build_config[\"embedding_choice\"][\"value\"] = \"Embedding Model\"\n\n build_config[\"collection_name_new\"][\"advanced\"] = False\n build_config[\"collection_name_new\"][\"required\"] = True\n\n new_parameter = HandleInput(\n name=\"embedding_model\",\n display_name=\"Embedding Model\",\n input_types=[\"Embeddings\"],\n info=\"Allows an embedding model configuration.\",\n ).to_dict()\n\n self.insert_in_dict(build_config, \"collection_name_new\", {\"embedding_model\": new_parameter})\n elif field_name == \"collection_name\" and field_value != \"+ Create new collection\":\n build_config[\"collection_name_new\"][\"advanced\"] = True\n build_config[\"collection_name_new\"][\"required\"] = False\n build_config[\"collection_name_new\"][\"value\"] = \"\"\n\n # Get the collection options\n collection_options = self.get_collection_options()\n\n # If the collection options are available, show the advanced options\n if collection_options:\n build_config[\"embedding_choice\"][\"advanced\"] = True\n\n if collection_options.service:\n for input_field in [\n \"embedding_provider\",\n \"z_01_model_parameters\",\n \"z_02_api_key_name\",\n \"z_03_provider_api_key\",\n \"z_04_authentication\",\n ]:\n build_config[input_field][\"advanced\"] = False\n\n build_config[\"embedding_model\"][\"advanced\"] = True\n build_config[\"embedding_provider\"][\"advanced\"] = True\n build_config[\"embedding_choice\"][\"value\"] = \"Astra Vectorize\"\n build_config[\"embedding_provider\"][\"value\"] = collection_options.service.provider\n\n build_config[\"model\"][\"value\"] = collection_options.service.model_name\n build_config[\"z_01_model_parameters\"][\"value\"] = collection_options.service.parameters\n\n if collection_options.service.authentication:\n build_config[\"z_02_api_key_name\"][\"value\"] = collection_options.service.authentication.get(\n \"providerKey\"\n )\n build_config[\"z_03_provider_api_key\"][\"value\"] = collection_options.service.authentication.get(\n \"apiKey\"\n )\n build_config[\"z_04_authentication\"][\"value\"] = collection_options.service.authentication\n else:\n for input_field in [\n \"z_01_model_parameters\",\n \"z_02_api_key_name\",\n \"z_03_provider_api_key\",\n \"z_04_authentication\",\n ]:\n build_config[input_field][\"advanced\"] = True\n\n build_config[\"embedding_model\"][\"advanced\"] = False\n build_config[\"embedding_provider\"][\"advanced\"] = False\n build_config[\"embedding_choice\"][\"value\"] = \"Embedding Model\"\n\n new_parameter = HandleInput(\n name=\"embedding_model\",\n display_name=\"Embedding Model\",\n input_types=[\"Embeddings\"],\n info=\"Allows an embedding model configuration.\",\n ).to_dict()\n\n self.insert_in_dict(build_config, \"embedding_choice\", {\"embedding_model\": new_parameter})\n\n elif field_name == \"embedding_choice\":\n if field_value == \"Astra Vectorize\":\n self.del_fields(build_config, [\"embedding_model\"])\n\n # Update the providers mapping\n vectorize_providers = self.update_providers_mapping()\n\n new_parameter = DropdownInput(\n name=\"embedding_provider\",\n display_name=\"Embedding Provider\",\n options=vectorize_providers.keys(),\n value=\"\",\n required=True,\n real_time_refresh=True,\n ).to_dict()\n\n self.insert_in_dict(build_config, \"embedding_choice\", {\"embedding_provider\": new_parameter})\n else:\n self.del_fields(\n build_config,\n [\n \"embedding_provider\",\n \"model\",\n \"z_01_model_parameters\",\n \"z_02_api_key_name\",\n \"z_03_provider_api_key\",\n \"z_04_authentication\",\n ],\n )\n\n new_parameter = HandleInput(\n name=\"embedding_model\",\n display_name=\"Embedding Model\",\n input_types=[\"Embeddings\"],\n info=\"Allows an embedding model configuration.\",\n ).to_dict()\n\n self.insert_in_dict(build_config, \"embedding_choice\", {\"embedding_model\": new_parameter})\n\n elif field_name == \"embedding_provider\":\n self.del_fields(\n build_config,\n [\"model\", \"z_01_model_parameters\", \"z_02_api_key_name\", \"z_03_provider_api_key\", \"z_04_authentication\"],\n )\n\n # Update the providers mapping\n vectorize_providers = self.update_providers_mapping()\n model_options = vectorize_providers[field_value][1]\n\n new_parameter = DropdownInput(\n name=\"model\",\n display_name=\"Model\",\n info=\"The embedding model to use for the selected provider. Each provider has a different set of \"\n \"models available (full list at \"\n \"https://docs.datastax.com/en/astra-db-serverless/databases/embedding-generation.html):\\n\\n\"\n f\"{', '.join(model_options)}\",\n options=model_options,\n value=None,\n required=True,\n real_time_refresh=True,\n ).to_dict()\n\n self.insert_in_dict(build_config, \"embedding_provider\", {\"model\": new_parameter})\n\n elif field_name == \"model\":\n self.del_fields(\n build_config,\n [\"z_01_model_parameters\", \"z_02_api_key_name\", \"z_03_provider_api_key\", \"z_04_authentication\"],\n )\n\n new_parameter_1 = DictInput(\n name=\"z_01_model_parameters\",\n display_name=\"Model Parameters\",\n list=True,\n ).to_dict()\n\n new_parameter_2 = MessageTextInput(\n name=\"z_02_api_key_name\",\n display_name=\"API Key Name\",\n info=\"The name of the embeddings provider API key stored on Astra. \"\n \"If set, it will override the 'ProviderKey' in the authentication parameters.\",\n ).to_dict()\n\n new_parameter_3 = SecretStrInput(\n load_from_db=False,\n name=\"z_03_provider_api_key\",\n display_name=\"Provider API Key\",\n info=\"An alternative to the Astra Authentication that passes an API key for the provider \"\n \"with each request to Astra DB. \"\n \"This may be used when Vectorize is configured for the collection, \"\n \"but no corresponding provider secret is stored within Astra's key management system.\",\n ).to_dict()\n\n new_parameter_4 = DictInput(\n name=\"z_04_authentication\",\n display_name=\"Authentication Parameters\",\n list=True,\n ).to_dict()\n\n self.insert_in_dict(\n build_config,\n \"model\",\n {\n \"z_01_model_parameters\": new_parameter_1,\n \"z_02_api_key_name\": new_parameter_2,\n \"z_03_provider_api_key\": new_parameter_3,\n \"z_04_authentication\": new_parameter_4,\n },\n )\n\n return build_config\n\n def build_vectorize_options(self, **kwargs):\n for attribute in [\n \"embedding_provider\",\n \"model\",\n \"z_01_model_parameters\",\n \"z_02_api_key_name\",\n \"z_03_provider_api_key\",\n \"z_04_authentication\",\n ]:\n if not hasattr(self, attribute):\n setattr(self, attribute, None)\n\n # Fetch values from kwargs if any self.* attributes are None\n provider_mapping = self.update_providers_mapping()\n provider_value = provider_mapping.get(self.embedding_provider, [None])[0] or kwargs.get(\"embedding_provider\")\n model_name = self.model or kwargs.get(\"model\")\n authentication = {**(self.z_04_authentication or {}), **kwargs.get(\"z_04_authentication\", {})}\n parameters = self.z_01_model_parameters or kwargs.get(\"z_01_model_parameters\", {})\n\n # Set the API key name if provided\n api_key_name = self.z_02_api_key_name or kwargs.get(\"z_02_api_key_name\")\n provider_key = self.z_03_provider_api_key or kwargs.get(\"z_03_provider_api_key\")\n if api_key_name:\n authentication[\"providerKey\"] = api_key_name\n if authentication:\n provider_key = None\n authentication[\"providerKey\"] = authentication[\"providerKey\"].split(\".\")[0]\n\n # Set authentication and parameters to None if no values are provided\n if not authentication:\n authentication = None\n if not parameters:\n parameters = None\n\n return {\n # must match astrapy.info.CollectionVectorServiceOptions\n \"collection_vector_service_options\": {\n \"provider\": provider_value,\n \"modelName\": model_name,\n \"authentication\": authentication,\n \"parameters\": parameters,\n },\n \"collection_embedding_api_key\": provider_key,\n }\n\n @check_cached_vector_store\n def build_vector_store(self, vectorize_options=None):\n try:\n from langchain_astradb import AstraDBVectorStore\n from langchain_astradb.utils.astradb import SetupMode\n except ImportError as e:\n msg = (\n \"Could not import langchain Astra DB integration package. \"\n \"Please install it with `pip install langchain-astradb`.\"\n )\n raise ImportError(msg) from e\n\n try:\n if not self.setup_mode:\n self.setup_mode = self._inputs[\"setup_mode\"].options[0]\n\n setup_mode_value = SetupMode[self.setup_mode.upper()]\n except KeyError as e:\n msg = f\"Invalid setup mode: {self.setup_mode}\"\n raise ValueError(msg) from e\n\n metric_value = self.metric or None\n autodetect = False\n\n if self.embedding_choice == \"Embedding Model\":\n embedding_dict = {\"embedding\": self.embedding_model}\n # Use autodetect if the collection name is NOT set to \"+ Create new collection\"\n elif self.collection_name != \"+ Create new collection\":\n autodetect = True\n metric_value = None\n setup_mode_value = None\n embedding_dict = {}\n else:\n from astrapy.info import CollectionVectorServiceOptions\n\n # Grab the collection options if available\n collection_options = self.get_collection_options()\n\n # Ensure collection_options and its nested attributes are handled safely\n authentication = getattr(self, \"z_04_authentication\", {}) or (\n collection_options.service.authentication if collection_options and collection_options.service else {}\n )\n\n # Build the vectorize options dictionary\n dict_options = vectorize_options or self.build_vectorize_options(\n embedding_provider=(\n getattr(self, \"embedding_provider\", None)\n or (\n collection_options.service.provider\n if collection_options and collection_options.service\n else None\n )\n ),\n model=(\n getattr(self, \"model\", None)\n or (\n collection_options.service.model_name\n if collection_options and collection_options.service\n else None\n )\n ),\n z_01_model_parameters=(\n getattr(self, \"z_01_model_parameters\", None)\n or (\n collection_options.service.parameters\n if collection_options and collection_options.service\n else None\n )\n ),\n z_02_api_key_name=(\n getattr(self, \"z_02_api_key_name\", None)\n or (authentication.get(\"apiKey\") if authentication else None)\n ),\n z_03_provider_api_key=(\n getattr(self, \"z_03_provider_api_key\", None)\n or (authentication.get(\"providerKey\") if authentication else None)\n ),\n z_04_authentication=authentication,\n )\n\n # Set the embedding dictionary\n embedding_dict = {\n \"collection_vector_service_options\": CollectionVectorServiceOptions.from_dict(\n dict_options.get(\"collection_vector_service_options\")\n ),\n \"collection_embedding_api_key\": dict_options.get(\"collection_embedding_api_key\"),\n }\n\n try:\n vector_store = AstraDBVectorStore(\n token=self.token,\n api_endpoint=self.api_endpoint,\n namespace=self.keyspace or None,\n collection_name=getattr(self, \"collection_name_new\", None) or self.collection_name,\n autodetect_collection=autodetect,\n environment=(\n parse_api_endpoint(getattr(self, \"api_endpoint\", None)).environment\n if getattr(self, \"api_endpoint\", None)\n else None\n ),\n metric=metric_value,\n batch_size=self.batch_size or None,\n bulk_insert_batch_concurrency=self.bulk_insert_batch_concurrency or None,\n bulk_insert_overwrite_concurrency=self.bulk_insert_overwrite_concurrency or None,\n bulk_delete_concurrency=self.bulk_delete_concurrency or None,\n setup_mode=setup_mode_value,\n pre_delete_collection=self.pre_delete_collection,\n metadata_indexing_include=[s for s in self.metadata_indexing_include if s] or None,\n metadata_indexing_exclude=[s for s in self.metadata_indexing_exclude if s] or None,\n collection_indexing_policy=orjson.dumps(self.collection_indexing_policy)\n if self.collection_indexing_policy\n else None,\n **embedding_dict,\n )\n except Exception as e:\n msg = f\"Error initializing AstraDBVectorStore: {e}\"\n raise ValueError(msg) from e\n\n self._add_documents_to_vector_store(vector_store)\n\n return vector_store\n\n def _add_documents_to_vector_store(self, vector_store) -> None:\n documents = []\n for _input in self.ingest_data or []:\n if isinstance(_input, Data):\n documents.append(_input.to_lc_document())\n else:\n msg = \"Vector Store Inputs must be Data objects.\"\n raise TypeError(msg)\n\n if documents:\n self.log(f\"Adding {len(documents)} documents to the Vector Store.\")\n try:\n vector_store.add_documents(documents)\n except Exception as e:\n msg = f\"Error adding documents to AstraDBVectorStore: {e}\"\n raise ValueError(msg) from e\n else:\n self.log(\"No documents to add to the Vector Store.\")\n\n def _map_search_type(self) -> str:\n if self.search_type == \"Similarity with score threshold\":\n return \"similarity_score_threshold\"\n if self.search_type == \"MMR (Max Marginal Relevance)\":\n return \"mmr\"\n return \"similarity\"\n\n def _build_search_args(self):\n query = self.search_input if isinstance(self.search_input, str) and self.search_input.strip() else None\n search_filter = (\n {k: v for k, v in self.search_filter.items() if k and v and k.strip()} if self.search_filter else None\n )\n\n if query:\n args = {\n \"query\": query,\n \"search_type\": self._map_search_type(),\n \"k\": self.number_of_results,\n \"score_threshold\": self.search_score_threshold,\n }\n elif self.advanced_search_filter or search_filter:\n args = {\n \"n\": self.number_of_results,\n }\n else:\n return {}\n\n filter_arg = self.advanced_search_filter or {}\n\n if search_filter:\n self.log(self.log(f\"`search_filter` is deprecated. Use `advanced_search_filter`. Cleaned: {search_filter}\"))\n filter_arg.update(search_filter)\n\n if filter_arg:\n args[\"filter\"] = filter_arg\n\n return args\n\n def search_documents(self, vector_store=None) -> list[Data]:\n vector_store = vector_store or self.build_vector_store()\n\n self.log(f\"Search input: {self.search_input}\")\n self.log(f\"Search type: {self.search_type}\")\n self.log(f\"Number of results: {self.number_of_results}\")\n\n try:\n search_args = self._build_search_args()\n except Exception as e:\n msg = f\"Error in AstraDBVectorStore._build_search_args: {e}\"\n raise ValueError(msg) from e\n\n if not search_args:\n self.log(\"No search input or filters provided. Skipping search.\")\n return []\n\n docs = []\n search_method = \"search\" if \"query\" in search_args else \"metadata_search\"\n\n try:\n self.log(f\"Calling vector_store.{search_method} with args: {search_args}\")\n docs = getattr(vector_store, search_method)(**search_args)\n except Exception as e:\n msg = f\"Error performing {search_method} in AstraDBVectorStore: {e}\"\n raise ValueError(msg) from e\n\n self.log(f\"Retrieved documents: {len(docs)}\")\n\n data = docs_to_data(docs)\n self.log(f\"Converted documents to data: {len(data)}\")\n self.status = data\n return data\n\n def get_retriever_kwargs(self):\n search_args = self._build_search_args()\n return {\n \"search_type\": self._map_search_type(),\n \"search_kwargs\": search_args,\n }\n" + }, + "collection_indexing_policy": { + "_input_type": "StrInput", + "advanced": true, + "display_name": "Collection Indexing Policy", + "dynamic": false, + "info": "Optional JSON string for the \"indexing\" field of the collection. See https://docs.datastax.com/en/astra-db-serverless/api-reference/collections.html#the-indexing-option", + "list": false, + "load_from_db": false, + "name": "collection_indexing_policy", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "trace_as_metadata": true, + "type": "str", + "value": "" + }, + "collection_name": { + "_input_type": "DropdownInput", + "advanced": false, + "combobox": false, + "display_name": "Collection", + "dynamic": false, + "info": "The name of the collection within Astra DB where the vectors will be stored.", + "name": "collection_name", + "options": [ + "+ Create new collection" + ], + "placeholder": "", + "real_time_refresh": true, + "refresh_button": true, + "required": true, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "str", + "value": "+ Create new collection" + }, + "collection_name_new": { + "_input_type": "StrInput", + "advanced": false, + "display_name": "Collection Name", + "dynamic": false, + "info": "Name of the new collection to create.", + "list": false, + "load_from_db": false, + "name": "collection_name_new", + "placeholder": "", + "required": true, + "show": true, + "title_case": false, + "trace_as_metadata": true, + "type": "str", + "value": "vector_store_rag_demo" + }, + "embedding_choice": { + "_input_type": "DropdownInput", + "advanced": false, + "combobox": false, + "display_name": "Embedding Model or Astra Vectorize", + "dynamic": false, + "info": "Determines whether to use Astra Vectorize for the collection.", + "name": "embedding_choice", + "options": [ + "Embedding Model", + "Astra Vectorize" + ], + "placeholder": "", + "real_time_refresh": true, + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "str", + "value": "Embedding Model" + }, + "embedding_model": { + "_input_type": "HandleInput", + "advanced": false, + "display_name": "Embedding Model", + "dynamic": false, + "info": "Allows an embedding model configuration.", + "input_types": [ + "Embeddings" + ], + "list": false, + "name": "embedding_model", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "trace_as_metadata": true, + "type": "other", + "value": "" + }, + "ingest_data": { + "_input_type": "DataInput", + "advanced": false, + "display_name": "Ingest Data", + "dynamic": false, + "info": "", + "input_types": [ + "Data" + ], + "list": false, + "name": "ingest_data", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_input": true, + "trace_as_metadata": true, + "type": "other", + "value": "" + }, + "keyspace": { + "_input_type": "StrInput", + "advanced": true, + "display_name": "Keyspace", + "dynamic": false, + "info": "Optional keyspace within Astra DB to use for the collection.", + "list": false, + "load_from_db": false, + "name": "keyspace", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "trace_as_metadata": true, + "type": "str", + "value": "" + }, + "metadata_indexing_exclude": { + "_input_type": "StrInput", + "advanced": true, + "display_name": "Metadata Indexing Exclude", + "dynamic": false, + "info": "Optional list of metadata fields to exclude from the indexing.", + "list": true, + "load_from_db": false, + "name": "metadata_indexing_exclude", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "trace_as_metadata": true, + "type": "str", + "value": "" + }, + "metadata_indexing_include": { + "_input_type": "StrInput", + "advanced": true, + "display_name": "Metadata Indexing Include", + "dynamic": false, + "info": "Optional list of metadata fields to include in the indexing.", + "list": true, + "load_from_db": false, + "name": "metadata_indexing_include", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "trace_as_metadata": true, + "type": "str", + "value": "" + }, + "metric": { + "_input_type": "DropdownInput", + "advanced": true, + "combobox": false, + "display_name": "Metric", + "dynamic": false, + "info": "Optional distance metric for vector comparisons in the vector store.", + "name": "metric", + "options": [ + "cosine", + "dot_product", + "euclidean" + ], + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "str", + "value": "cosine" + }, + "number_of_results": { + "_input_type": "IntInput", + "advanced": true, + "display_name": "Number of Results", + "dynamic": false, + "info": "Number of results to return.", + "list": false, + "name": "number_of_results", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "trace_as_metadata": true, + "type": "int", + "value": 4 + }, + "pre_delete_collection": { + "_input_type": "BoolInput", + "advanced": true, + "display_name": "Pre Delete Collection", + "dynamic": false, + "info": "Boolean flag to determine whether to delete the collection before creating a new one.", + "list": false, + "name": "pre_delete_collection", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "trace_as_metadata": true, + "type": "bool", + "value": false + }, + "search_filter": { + "_input_type": "DictInput", + "advanced": true, + "display_name": "[DEPRECATED] Search Metadata Filter", + "dynamic": false, + "info": "Deprecated: use advanced_search_filter. Optional dictionary of filters to apply to the search query.", + "list": true, + "name": "search_filter", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "trace_as_input": true, + "type": "dict", + "value": {} + }, + "search_input": { + "_input_type": "MultilineInput", + "advanced": false, + "display_name": "Search Input", + "dynamic": false, + "info": "", + "input_types": [ + "Message" + ], + "list": false, + "load_from_db": false, + "multiline": true, + "name": "search_input", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_input": true, + "trace_as_metadata": true, + "type": "str", + "value": "" + }, + "search_score_threshold": { + "_input_type": "FloatInput", + "advanced": true, + "display_name": "Search Score Threshold", + "dynamic": false, + "info": "Minimum similarity score threshold for search results. (when using 'Similarity with score threshold')", + "list": false, + "name": "search_score_threshold", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "trace_as_metadata": true, + "type": "float", + "value": 0 + }, + "search_type": { + "_input_type": "DropdownInput", + "advanced": true, + "combobox": false, + "display_name": "Search Type", + "dynamic": false, + "info": "Search type to use", + "name": "search_type", + "options": [ + "Similarity", + "Similarity with score threshold", + "MMR (Max Marginal Relevance)" + ], + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "str", + "value": "Similarity" + }, + "setup_mode": { + "_input_type": "DropdownInput", + "advanced": true, + "combobox": false, + "display_name": "Setup Mode", + "dynamic": false, + "info": "Configuration mode for setting up the vector store, with options like 'Sync' or 'Off'.", + "name": "setup_mode", + "options": [ + "Sync", + "Off" + ], + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "str", + "value": "Sync" + }, + "token": { + "_input_type": "SecretStrInput", + "advanced": false, + "display_name": "Astra DB Application Token", + "dynamic": false, + "info": "Authentication token for accessing Astra DB.", + "input_types": [ + "Message" + ], + "load_from_db": true, + "name": "token", + "password": true, + "placeholder": "", + "required": true, + "show": true, + "title_case": false, + "type": "str", + "value": "ASTRA_DB_APPLICATION_TOKEN" + } + }, + "tool_mode": false + }, + "type": "AstraDB" + }, + "dragging": false, + "height": 831, + "id": "AstraDB-zXOmg", + "position": { + "x": 1215.9500462518045, + "y": 461.154108547011 + }, + "positionAbsolute": { + "x": 1215.9500462518045, + "y": 461.154108547011 + }, + "selected": false, + "type": "genericNode", + "width": 320 + }, + { + "data": { + "id": "AstraDB-vNNR6", + "node": { + "base_classes": [ + "Data", + "Retriever" + ], + "beta": false, + "conditional_paths": [], + "custom_fields": {}, + "description": "Implementation of Vector Store using Astra DB with search capabilities", + "display_name": "Astra DB", + "documentation": "https://docs.langflow.org/starter-projects-vector-store-rag", + "edited": false, + "field_order": [ + "token", + "api_endpoint", + "collection_name", + "collection_name_new", + "keyspace", + "search_input", + "number_of_results", + "search_type", + "search_score_threshold", + "advanced_search_filter", + "search_filter", + "ingest_data", + "embedding_choice", + "embedding_model", + "metric", + "batch_size", + "bulk_insert_batch_concurrency", + "bulk_insert_overwrite_concurrency", + "bulk_delete_concurrency", + "setup_mode", + "pre_delete_collection", + "metadata_indexing_include", + "metadata_indexing_exclude", + "collection_indexing_policy" + ], + "frozen": false, + "icon": "AstraDB", + "legacy": false, + "lf_version": "1.1.1", + "metadata": {}, + "output_types": [], + "outputs": [ + { + "cache": true, + "display_name": "Retriever", + "method": "build_base_retriever", + "name": "base_retriever", + "required_inputs": [], + "selected": "Retriever", + "types": [ + "Retriever" + ], + "value": "__UNDEFINED__" + }, + { + "cache": true, + "display_name": "Search Results", + "method": "search_documents", + "name": "search_results", + "required_inputs": [ + "api_endpoint", + "collection_name", + "token" + ], + "selected": "Data", + "types": [ + "Data" + ], + "value": "__UNDEFINED__" + } + ], + "pinned": false, + "template": { + "_type": "Component", + "advanced_search_filter": { + "_input_type": "NestedDictInput", + "advanced": true, + "display_name": "Search Metadata Filter", + "dynamic": false, + "info": "Optional dictionary of filters to apply to the search query.", + "list": false, + "name": "advanced_search_filter", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "trace_as_input": true, + "trace_as_metadata": true, + "type": "NestedDict", + "value": {} + }, + "api_endpoint": { + "_input_type": "SecretStrInput", + "advanced": false, + "display_name": "API Endpoint", + "dynamic": false, + "info": "API endpoint URL for the Astra DB service.", + "input_types": [ + "Message" + ], + "load_from_db": true, + "name": "api_endpoint", + "password": true, + "placeholder": "", + "required": true, + "show": true, + "title_case": false, + "type": "str", + "value": "ASTRA_DB_API_ENDPOINT" + }, + "batch_size": { + "_input_type": "IntInput", + "advanced": true, + "display_name": "Batch Size", + "dynamic": false, + "info": "Optional number of data to process in a single batch.", + "list": false, + "name": "batch_size", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "trace_as_metadata": true, + "type": "int", + "value": "" + }, + "bulk_delete_concurrency": { + "_input_type": "IntInput", + "advanced": true, + "display_name": "Bulk Delete Concurrency", + "dynamic": false, + "info": "Optional concurrency level for bulk delete operations.", + "list": false, + "name": "bulk_delete_concurrency", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "trace_as_metadata": true, + "type": "int", + "value": "" + }, + "bulk_insert_batch_concurrency": { + "_input_type": "IntInput", + "advanced": true, + "display_name": "Bulk Insert Batch Concurrency", + "dynamic": false, + "info": "Optional concurrency level for bulk insert operations.", + "list": false, + "name": "bulk_insert_batch_concurrency", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "trace_as_metadata": true, + "type": "int", + "value": "" + }, + "bulk_insert_overwrite_concurrency": { + "_input_type": "IntInput", + "advanced": true, + "display_name": "Bulk Insert Overwrite Concurrency", + "dynamic": false, + "info": "Optional concurrency level for bulk insert operations that overwrite existing data.", + "list": false, + "name": "bulk_insert_overwrite_concurrency", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "trace_as_metadata": true, + "type": "int", + "value": "" + }, + "code": { + "advanced": true, + "dynamic": true, + "fileTypes": [], + "file_path": "", + "info": "", + "list": false, + "load_from_db": false, + "multiline": true, + "name": "code", + "password": false, + "placeholder": "", + "required": true, + "show": true, + "title_case": false, + "type": "code", + "value": "import os\nfrom collections import defaultdict\n\nimport orjson\nfrom astrapy import DataAPIClient\nfrom astrapy.admin import parse_api_endpoint\nfrom astrapy.exceptions import CollectionNotFoundException\nfrom langchain_astradb import AstraDBVectorStore\n\nfrom langflow.base.vectorstores.model import LCVectorStoreComponent, check_cached_vector_store\nfrom langflow.helpers import docs_to_data\nfrom langflow.inputs import DictInput, FloatInput, MessageTextInput, NestedDictInput\nfrom langflow.io import (\n BoolInput,\n DataInput,\n DropdownInput,\n HandleInput,\n IntInput,\n MultilineInput,\n SecretStrInput,\n StrInput,\n)\nfrom langflow.schema import Data\n\n\nclass AstraDBVectorStoreComponent(LCVectorStoreComponent):\n display_name: str = \"Astra DB\"\n description: str = \"Implementation of Vector Store using Astra DB with search capabilities\"\n documentation: str = \"https://docs.langflow.org/starter-projects-vector-store-rag\"\n name = \"AstraDB\"\n icon: str = \"AstraDB\"\n\n _cached_vector_store: AstraDBVectorStore | None = None\n\n def list_collections(self):\n client = DataAPIClient(token=self.token)\n\n database = client.get_database(\n self.api_endpoint,\n token=self.token,\n )\n\n return database.list_collections()\n\n def _initialize_collection_options(self):\n try:\n collections = [collection.name for collection in self.list_collections()]\n except (CollectionNotFoundException, ConnectionError, ValueError) as _:\n collections = []\n\n return [*collections, \"+ Create new collection\"]\n\n VECTORIZE_PROVIDERS_MAPPING = defaultdict(\n list,\n {\n \"Azure OpenAI\": [\n \"azureOpenAI\",\n [\"text-embedding-3-small\", \"text-embedding-3-large\", \"text-embedding-ada-002\"],\n ],\n \"Hugging Face - Dedicated\": [\"huggingfaceDedicated\", [\"endpoint-defined-model\"]],\n \"Hugging Face - Serverless\": [\n \"huggingface\",\n [\n \"sentence-transformers/all-MiniLM-L6-v2\",\n \"intfloat/multilingual-e5-large\",\n \"intfloat/multilingual-e5-large-instruct\",\n \"BAAI/bge-small-en-v1.5\",\n \"BAAI/bge-base-en-v1.5\",\n \"BAAI/bge-large-en-v1.5\",\n ],\n ],\n \"Jina AI\": [\n \"jinaAI\",\n [\n \"jina-embeddings-v2-base-en\",\n \"jina-embeddings-v2-base-de\",\n \"jina-embeddings-v2-base-es\",\n \"jina-embeddings-v2-base-code\",\n \"jina-embeddings-v2-base-zh\",\n ],\n ],\n \"Mistral AI\": [\"mistral\", [\"mistral-embed\"]],\n \"Nvidia\": [\"nvidia\", [\"NV-Embed-QA\"]],\n \"OpenAI\": [\"openai\", [\"text-embedding-3-small\", \"text-embedding-3-large\", \"text-embedding-ada-002\"]],\n \"Upstage\": [\"upstageAI\", [\"solar-embedding-1-large\"]],\n \"Voyage AI\": [\n \"voyageAI\",\n [\"voyage-large-2-instruct\", \"voyage-law-2\", \"voyage-code-2\", \"voyage-large-2\", \"voyage-2\"],\n ],\n },\n )\n\n inputs = [\n SecretStrInput(\n name=\"token\",\n display_name=\"Astra DB Application Token\",\n info=\"Authentication token for accessing Astra DB.\",\n value=\"ASTRA_DB_APPLICATION_TOKEN\",\n required=True,\n advanced=os.getenv(\"ASTRA_ENHANCED\", \"false\").lower() == \"true\",\n ),\n SecretStrInput(\n name=\"api_endpoint\",\n display_name=\"Database\" if os.getenv(\"ASTRA_ENHANCED\", \"false\").lower() == \"true\" else \"API Endpoint\",\n info=\"API endpoint URL for the Astra DB service.\",\n value=\"ASTRA_DB_API_ENDPOINT\",\n required=True,\n ),\n DropdownInput(\n name=\"collection_name\",\n display_name=\"Collection\",\n info=\"The name of the collection within Astra DB where the vectors will be stored.\",\n required=True,\n real_time_refresh=True,\n refresh_button=True,\n options=[\"+ Create new collection\"],\n value=\"+ Create new collection\",\n ),\n StrInput(\n name=\"collection_name_new\",\n display_name=\"Collection Name\",\n info=\"Name of the new collection to create.\",\n advanced=False,\n required=True,\n ),\n StrInput(\n name=\"keyspace\",\n display_name=\"Keyspace\",\n info=\"Optional keyspace within Astra DB to use for the collection.\",\n advanced=True,\n ),\n MultilineInput(\n name=\"search_input\",\n display_name=\"Search Input\",\n ),\n IntInput(\n name=\"number_of_results\",\n display_name=\"Number of Results\",\n info=\"Number of results to return.\",\n advanced=True,\n value=4,\n ),\n DropdownInput(\n name=\"search_type\",\n display_name=\"Search Type\",\n info=\"Search type to use\",\n options=[\"Similarity\", \"Similarity with score threshold\", \"MMR (Max Marginal Relevance)\"],\n value=\"Similarity\",\n advanced=True,\n ),\n FloatInput(\n name=\"search_score_threshold\",\n display_name=\"Search Score Threshold\",\n info=\"Minimum similarity score threshold for search results. \"\n \"(when using 'Similarity with score threshold')\",\n value=0,\n advanced=True,\n ),\n NestedDictInput(\n name=\"advanced_search_filter\",\n display_name=\"Search Metadata Filter\",\n info=\"Optional dictionary of filters to apply to the search query.\",\n advanced=True,\n ),\n DictInput(\n name=\"search_filter\",\n display_name=\"[DEPRECATED] Search Metadata Filter\",\n info=\"Deprecated: use advanced_search_filter. Optional dictionary of filters to apply to the search query.\",\n advanced=True,\n list=True,\n ),\n DataInput(\n name=\"ingest_data\",\n display_name=\"Ingest Data\",\n ),\n DropdownInput(\n name=\"embedding_choice\",\n display_name=\"Embedding Model or Astra Vectorize\",\n info=\"Determines whether to use Astra Vectorize for the collection.\",\n options=[\"Embedding Model\", \"Astra Vectorize\"],\n real_time_refresh=True,\n value=\"Embedding Model\",\n ),\n HandleInput(\n name=\"embedding_model\",\n display_name=\"Embedding Model\",\n input_types=[\"Embeddings\"],\n info=\"Allows an embedding model configuration.\",\n ),\n DropdownInput(\n name=\"metric\",\n display_name=\"Metric\",\n info=\"Optional distance metric for vector comparisons in the vector store.\",\n options=[\"cosine\", \"dot_product\", \"euclidean\"],\n value=\"cosine\",\n advanced=True,\n ),\n IntInput(\n name=\"batch_size\",\n display_name=\"Batch Size\",\n info=\"Optional number of data to process in a single batch.\",\n advanced=True,\n ),\n IntInput(\n name=\"bulk_insert_batch_concurrency\",\n display_name=\"Bulk Insert Batch Concurrency\",\n info=\"Optional concurrency level for bulk insert operations.\",\n advanced=True,\n ),\n IntInput(\n name=\"bulk_insert_overwrite_concurrency\",\n display_name=\"Bulk Insert Overwrite Concurrency\",\n info=\"Optional concurrency level for bulk insert operations that overwrite existing data.\",\n advanced=True,\n ),\n IntInput(\n name=\"bulk_delete_concurrency\",\n display_name=\"Bulk Delete Concurrency\",\n info=\"Optional concurrency level for bulk delete operations.\",\n advanced=True,\n ),\n DropdownInput(\n name=\"setup_mode\",\n display_name=\"Setup Mode\",\n info=\"Configuration mode for setting up the vector store, with options like 'Sync' or 'Off'.\",\n options=[\"Sync\", \"Off\"],\n advanced=True,\n value=\"Sync\",\n ),\n BoolInput(\n name=\"pre_delete_collection\",\n display_name=\"Pre Delete Collection\",\n info=\"Boolean flag to determine whether to delete the collection before creating a new one.\",\n advanced=True,\n ),\n StrInput(\n name=\"metadata_indexing_include\",\n display_name=\"Metadata Indexing Include\",\n info=\"Optional list of metadata fields to include in the indexing.\",\n list=True,\n advanced=True,\n ),\n StrInput(\n name=\"metadata_indexing_exclude\",\n display_name=\"Metadata Indexing Exclude\",\n info=\"Optional list of metadata fields to exclude from the indexing.\",\n list=True,\n advanced=True,\n ),\n StrInput(\n name=\"collection_indexing_policy\",\n display_name=\"Collection Indexing Policy\",\n info='Optional JSON string for the \"indexing\" field of the collection. '\n \"See https://docs.datastax.com/en/astra-db-serverless/api-reference/collections.html#the-indexing-option\",\n advanced=True,\n ),\n ]\n\n def del_fields(self, build_config, field_list):\n for field in field_list:\n if field in build_config:\n del build_config[field]\n\n return build_config\n\n def insert_in_dict(self, build_config, field_name, new_parameters):\n # Insert the new key-value pair after the found key\n for new_field_name, new_parameter in new_parameters.items():\n # Get all the items as a list of tuples (key, value)\n items = list(build_config.items())\n\n # Find the index of the key to insert after\n idx = len(items)\n for i, (key, _) in enumerate(items):\n if key == field_name:\n idx = i + 1\n break\n\n items.insert(idx, (new_field_name, new_parameter))\n\n # Clear the original dictionary and update with the modified items\n build_config.clear()\n build_config.update(items)\n\n return build_config\n\n def update_providers_mapping(self):\n # If we don't have token or api_endpoint, we can't fetch the list of providers\n if not self.token or not self.api_endpoint:\n self.log(\"Astra DB token and API endpoint are required to fetch the list of Vectorize providers.\")\n\n return self.VECTORIZE_PROVIDERS_MAPPING\n\n try:\n self.log(\"Dynamically updating list of Vectorize providers.\")\n\n # Get the admin object\n client = DataAPIClient(token=self.token)\n admin = client.get_admin()\n\n # Get the embedding providers\n db_admin = admin.get_database_admin(self.api_endpoint)\n embedding_providers = db_admin.find_embedding_providers().as_dict()\n\n vectorize_providers_mapping = {}\n\n # Map the provider display name to the provider key and models\n for provider_key, provider_data in embedding_providers[\"embeddingProviders\"].items():\n display_name = provider_data[\"displayName\"]\n models = [model[\"name\"] for model in provider_data[\"models\"]]\n\n vectorize_providers_mapping[display_name] = [provider_key, models]\n\n # Sort the resulting dictionary\n return defaultdict(list, dict(sorted(vectorize_providers_mapping.items())))\n except Exception as e: # noqa: BLE001\n self.log(f\"Error fetching Vectorize providers: {e}\")\n\n return self.VECTORIZE_PROVIDERS_MAPPING\n\n def get_collection_options(self):\n client = DataAPIClient(token=self.token)\n\n database = client.get_database(\n self.api_endpoint,\n token=self.token,\n )\n\n collection = database.get_collection(self.collection_name)\n\n # Only get the options if the collection exists\n try:\n collection_options = collection.options()\n except CollectionNotFoundException as e:\n self.log(f\"Collection not found: {e}\")\n\n return None\n\n return collection_options.vector\n\n def update_build_config(self, build_config: dict, field_value: str, field_name: str | None = None):\n # Refresh the collection name options\n build_config[\"collection_name\"][\"options\"] = self._initialize_collection_options()\n\n # If the collection name is set to \"+ Create new collection\", show the advanced options\n if field_name == \"collection_name\" and field_value == \"+ Create new collection\":\n build_config[\"embedding_choice\"][\"advanced\"] = False\n build_config[\"embedding_choice\"][\"value\"] = \"Embedding Model\"\n\n build_config[\"collection_name_new\"][\"advanced\"] = False\n build_config[\"collection_name_new\"][\"required\"] = True\n\n new_parameter = HandleInput(\n name=\"embedding_model\",\n display_name=\"Embedding Model\",\n input_types=[\"Embeddings\"],\n info=\"Allows an embedding model configuration.\",\n ).to_dict()\n\n self.insert_in_dict(build_config, \"collection_name_new\", {\"embedding_model\": new_parameter})\n elif field_name == \"collection_name\" and field_value != \"+ Create new collection\":\n build_config[\"collection_name_new\"][\"advanced\"] = True\n build_config[\"collection_name_new\"][\"required\"] = False\n build_config[\"collection_name_new\"][\"value\"] = \"\"\n\n # Get the collection options\n collection_options = self.get_collection_options()\n\n # If the collection options are available, show the advanced options\n if collection_options:\n build_config[\"embedding_choice\"][\"advanced\"] = True\n\n if collection_options.service:\n for input_field in [\n \"embedding_provider\",\n \"z_01_model_parameters\",\n \"z_02_api_key_name\",\n \"z_03_provider_api_key\",\n \"z_04_authentication\",\n ]:\n build_config[input_field][\"advanced\"] = False\n\n build_config[\"embedding_model\"][\"advanced\"] = True\n build_config[\"embedding_provider\"][\"advanced\"] = True\n build_config[\"embedding_choice\"][\"value\"] = \"Astra Vectorize\"\n build_config[\"embedding_provider\"][\"value\"] = collection_options.service.provider\n\n build_config[\"model\"][\"value\"] = collection_options.service.model_name\n build_config[\"z_01_model_parameters\"][\"value\"] = collection_options.service.parameters\n\n if collection_options.service.authentication:\n build_config[\"z_02_api_key_name\"][\"value\"] = collection_options.service.authentication.get(\n \"providerKey\"\n )\n build_config[\"z_03_provider_api_key\"][\"value\"] = collection_options.service.authentication.get(\n \"apiKey\"\n )\n build_config[\"z_04_authentication\"][\"value\"] = collection_options.service.authentication\n else:\n for input_field in [\n \"z_01_model_parameters\",\n \"z_02_api_key_name\",\n \"z_03_provider_api_key\",\n \"z_04_authentication\",\n ]:\n build_config[input_field][\"advanced\"] = True\n\n build_config[\"embedding_model\"][\"advanced\"] = False\n build_config[\"embedding_provider\"][\"advanced\"] = False\n build_config[\"embedding_choice\"][\"value\"] = \"Embedding Model\"\n\n new_parameter = HandleInput(\n name=\"embedding_model\",\n display_name=\"Embedding Model\",\n input_types=[\"Embeddings\"],\n info=\"Allows an embedding model configuration.\",\n ).to_dict()\n\n self.insert_in_dict(build_config, \"embedding_choice\", {\"embedding_model\": new_parameter})\n\n elif field_name == \"embedding_choice\":\n if field_value == \"Astra Vectorize\":\n self.del_fields(build_config, [\"embedding_model\"])\n\n # Update the providers mapping\n vectorize_providers = self.update_providers_mapping()\n\n new_parameter = DropdownInput(\n name=\"embedding_provider\",\n display_name=\"Embedding Provider\",\n options=vectorize_providers.keys(),\n value=\"\",\n required=True,\n real_time_refresh=True,\n ).to_dict()\n\n self.insert_in_dict(build_config, \"embedding_choice\", {\"embedding_provider\": new_parameter})\n else:\n self.del_fields(\n build_config,\n [\n \"embedding_provider\",\n \"model\",\n \"z_01_model_parameters\",\n \"z_02_api_key_name\",\n \"z_03_provider_api_key\",\n \"z_04_authentication\",\n ],\n )\n\n new_parameter = HandleInput(\n name=\"embedding_model\",\n display_name=\"Embedding Model\",\n input_types=[\"Embeddings\"],\n info=\"Allows an embedding model configuration.\",\n ).to_dict()\n\n self.insert_in_dict(build_config, \"embedding_choice\", {\"embedding_model\": new_parameter})\n\n elif field_name == \"embedding_provider\":\n self.del_fields(\n build_config,\n [\"model\", \"z_01_model_parameters\", \"z_02_api_key_name\", \"z_03_provider_api_key\", \"z_04_authentication\"],\n )\n\n # Update the providers mapping\n vectorize_providers = self.update_providers_mapping()\n model_options = vectorize_providers[field_value][1]\n\n new_parameter = DropdownInput(\n name=\"model\",\n display_name=\"Model\",\n info=\"The embedding model to use for the selected provider. Each provider has a different set of \"\n \"models available (full list at \"\n \"https://docs.datastax.com/en/astra-db-serverless/databases/embedding-generation.html):\\n\\n\"\n f\"{', '.join(model_options)}\",\n options=model_options,\n value=None,\n required=True,\n real_time_refresh=True,\n ).to_dict()\n\n self.insert_in_dict(build_config, \"embedding_provider\", {\"model\": new_parameter})\n\n elif field_name == \"model\":\n self.del_fields(\n build_config,\n [\"z_01_model_parameters\", \"z_02_api_key_name\", \"z_03_provider_api_key\", \"z_04_authentication\"],\n )\n\n new_parameter_1 = DictInput(\n name=\"z_01_model_parameters\",\n display_name=\"Model Parameters\",\n list=True,\n ).to_dict()\n\n new_parameter_2 = MessageTextInput(\n name=\"z_02_api_key_name\",\n display_name=\"API Key Name\",\n info=\"The name of the embeddings provider API key stored on Astra. \"\n \"If set, it will override the 'ProviderKey' in the authentication parameters.\",\n ).to_dict()\n\n new_parameter_3 = SecretStrInput(\n load_from_db=False,\n name=\"z_03_provider_api_key\",\n display_name=\"Provider API Key\",\n info=\"An alternative to the Astra Authentication that passes an API key for the provider \"\n \"with each request to Astra DB. \"\n \"This may be used when Vectorize is configured for the collection, \"\n \"but no corresponding provider secret is stored within Astra's key management system.\",\n ).to_dict()\n\n new_parameter_4 = DictInput(\n name=\"z_04_authentication\",\n display_name=\"Authentication Parameters\",\n list=True,\n ).to_dict()\n\n self.insert_in_dict(\n build_config,\n \"model\",\n {\n \"z_01_model_parameters\": new_parameter_1,\n \"z_02_api_key_name\": new_parameter_2,\n \"z_03_provider_api_key\": new_parameter_3,\n \"z_04_authentication\": new_parameter_4,\n },\n )\n\n return build_config\n\n def build_vectorize_options(self, **kwargs):\n for attribute in [\n \"embedding_provider\",\n \"model\",\n \"z_01_model_parameters\",\n \"z_02_api_key_name\",\n \"z_03_provider_api_key\",\n \"z_04_authentication\",\n ]:\n if not hasattr(self, attribute):\n setattr(self, attribute, None)\n\n # Fetch values from kwargs if any self.* attributes are None\n provider_mapping = self.update_providers_mapping()\n provider_value = provider_mapping.get(self.embedding_provider, [None])[0] or kwargs.get(\"embedding_provider\")\n model_name = self.model or kwargs.get(\"model\")\n authentication = {**(self.z_04_authentication or {}), **kwargs.get(\"z_04_authentication\", {})}\n parameters = self.z_01_model_parameters or kwargs.get(\"z_01_model_parameters\", {})\n\n # Set the API key name if provided\n api_key_name = self.z_02_api_key_name or kwargs.get(\"z_02_api_key_name\")\n provider_key = self.z_03_provider_api_key or kwargs.get(\"z_03_provider_api_key\")\n if api_key_name:\n authentication[\"providerKey\"] = api_key_name\n if authentication:\n provider_key = None\n authentication[\"providerKey\"] = authentication[\"providerKey\"].split(\".\")[0]\n\n # Set authentication and parameters to None if no values are provided\n if not authentication:\n authentication = None\n if not parameters:\n parameters = None\n\n return {\n # must match astrapy.info.CollectionVectorServiceOptions\n \"collection_vector_service_options\": {\n \"provider\": provider_value,\n \"modelName\": model_name,\n \"authentication\": authentication,\n \"parameters\": parameters,\n },\n \"collection_embedding_api_key\": provider_key,\n }\n\n @check_cached_vector_store\n def build_vector_store(self, vectorize_options=None):\n try:\n from langchain_astradb import AstraDBVectorStore\n from langchain_astradb.utils.astradb import SetupMode\n except ImportError as e:\n msg = (\n \"Could not import langchain Astra DB integration package. \"\n \"Please install it with `pip install langchain-astradb`.\"\n )\n raise ImportError(msg) from e\n\n try:\n if not self.setup_mode:\n self.setup_mode = self._inputs[\"setup_mode\"].options[0]\n\n setup_mode_value = SetupMode[self.setup_mode.upper()]\n except KeyError as e:\n msg = f\"Invalid setup mode: {self.setup_mode}\"\n raise ValueError(msg) from e\n\n metric_value = self.metric or None\n autodetect = False\n\n if self.embedding_choice == \"Embedding Model\":\n embedding_dict = {\"embedding\": self.embedding_model}\n # Use autodetect if the collection name is NOT set to \"+ Create new collection\"\n elif self.collection_name != \"+ Create new collection\":\n autodetect = True\n metric_value = None\n setup_mode_value = None\n embedding_dict = {}\n else:\n from astrapy.info import CollectionVectorServiceOptions\n\n # Grab the collection options if available\n collection_options = self.get_collection_options()\n\n # Ensure collection_options and its nested attributes are handled safely\n authentication = getattr(self, \"z_04_authentication\", {}) or (\n collection_options.service.authentication if collection_options and collection_options.service else {}\n )\n\n # Build the vectorize options dictionary\n dict_options = vectorize_options or self.build_vectorize_options(\n embedding_provider=(\n getattr(self, \"embedding_provider\", None)\n or (\n collection_options.service.provider\n if collection_options and collection_options.service\n else None\n )\n ),\n model=(\n getattr(self, \"model\", None)\n or (\n collection_options.service.model_name\n if collection_options and collection_options.service\n else None\n )\n ),\n z_01_model_parameters=(\n getattr(self, \"z_01_model_parameters\", None)\n or (\n collection_options.service.parameters\n if collection_options and collection_options.service\n else None\n )\n ),\n z_02_api_key_name=(\n getattr(self, \"z_02_api_key_name\", None)\n or (authentication.get(\"apiKey\") if authentication else None)\n ),\n z_03_provider_api_key=(\n getattr(self, \"z_03_provider_api_key\", None)\n or (authentication.get(\"providerKey\") if authentication else None)\n ),\n z_04_authentication=authentication,\n )\n\n # Set the embedding dictionary\n embedding_dict = {\n \"collection_vector_service_options\": CollectionVectorServiceOptions.from_dict(\n dict_options.get(\"collection_vector_service_options\")\n ),\n \"collection_embedding_api_key\": dict_options.get(\"collection_embedding_api_key\"),\n }\n\n try:\n vector_store = AstraDBVectorStore(\n token=self.token,\n api_endpoint=self.api_endpoint,\n namespace=self.keyspace or None,\n collection_name=getattr(self, \"collection_name_new\", None) or self.collection_name,\n autodetect_collection=autodetect,\n environment=(\n parse_api_endpoint(getattr(self, \"api_endpoint\", None)).environment\n if getattr(self, \"api_endpoint\", None)\n else None\n ),\n metric=metric_value,\n batch_size=self.batch_size or None,\n bulk_insert_batch_concurrency=self.bulk_insert_batch_concurrency or None,\n bulk_insert_overwrite_concurrency=self.bulk_insert_overwrite_concurrency or None,\n bulk_delete_concurrency=self.bulk_delete_concurrency or None,\n setup_mode=setup_mode_value,\n pre_delete_collection=self.pre_delete_collection,\n metadata_indexing_include=[s for s in self.metadata_indexing_include if s] or None,\n metadata_indexing_exclude=[s for s in self.metadata_indexing_exclude if s] or None,\n collection_indexing_policy=orjson.dumps(self.collection_indexing_policy)\n if self.collection_indexing_policy\n else None,\n **embedding_dict,\n )\n except Exception as e:\n msg = f\"Error initializing AstraDBVectorStore: {e}\"\n raise ValueError(msg) from e\n\n self._add_documents_to_vector_store(vector_store)\n\n return vector_store\n\n def _add_documents_to_vector_store(self, vector_store) -> None:\n documents = []\n for _input in self.ingest_data or []:\n if isinstance(_input, Data):\n documents.append(_input.to_lc_document())\n else:\n msg = \"Vector Store Inputs must be Data objects.\"\n raise TypeError(msg)\n\n if documents:\n self.log(f\"Adding {len(documents)} documents to the Vector Store.\")\n try:\n vector_store.add_documents(documents)\n except Exception as e:\n msg = f\"Error adding documents to AstraDBVectorStore: {e}\"\n raise ValueError(msg) from e\n else:\n self.log(\"No documents to add to the Vector Store.\")\n\n def _map_search_type(self) -> str:\n if self.search_type == \"Similarity with score threshold\":\n return \"similarity_score_threshold\"\n if self.search_type == \"MMR (Max Marginal Relevance)\":\n return \"mmr\"\n return \"similarity\"\n\n def _build_search_args(self):\n query = self.search_input if isinstance(self.search_input, str) and self.search_input.strip() else None\n search_filter = (\n {k: v for k, v in self.search_filter.items() if k and v and k.strip()} if self.search_filter else None\n )\n\n if query:\n args = {\n \"query\": query,\n \"search_type\": self._map_search_type(),\n \"k\": self.number_of_results,\n \"score_threshold\": self.search_score_threshold,\n }\n elif self.advanced_search_filter or search_filter:\n args = {\n \"n\": self.number_of_results,\n }\n else:\n return {}\n\n filter_arg = self.advanced_search_filter or {}\n\n if search_filter:\n self.log(self.log(f\"`search_filter` is deprecated. Use `advanced_search_filter`. Cleaned: {search_filter}\"))\n filter_arg.update(search_filter)\n\n if filter_arg:\n args[\"filter\"] = filter_arg\n\n return args\n\n def search_documents(self, vector_store=None) -> list[Data]:\n vector_store = vector_store or self.build_vector_store()\n\n self.log(f\"Search input: {self.search_input}\")\n self.log(f\"Search type: {self.search_type}\")\n self.log(f\"Number of results: {self.number_of_results}\")\n\n try:\n search_args = self._build_search_args()\n except Exception as e:\n msg = f\"Error in AstraDBVectorStore._build_search_args: {e}\"\n raise ValueError(msg) from e\n\n if not search_args:\n self.log(\"No search input or filters provided. Skipping search.\")\n return []\n\n docs = []\n search_method = \"search\" if \"query\" in search_args else \"metadata_search\"\n\n try:\n self.log(f\"Calling vector_store.{search_method} with args: {search_args}\")\n docs = getattr(vector_store, search_method)(**search_args)\n except Exception as e:\n msg = f\"Error performing {search_method} in AstraDBVectorStore: {e}\"\n raise ValueError(msg) from e\n\n self.log(f\"Retrieved documents: {len(docs)}\")\n\n data = docs_to_data(docs)\n self.log(f\"Converted documents to data: {len(data)}\")\n self.status = data\n return data\n\n def get_retriever_kwargs(self):\n search_args = self._build_search_args()\n return {\n \"search_type\": self._map_search_type(),\n \"search_kwargs\": search_args,\n }\n" + }, + "collection_indexing_policy": { + "_input_type": "StrInput", + "advanced": true, + "display_name": "Collection Indexing Policy", + "dynamic": false, + "info": "Optional JSON string for the \"indexing\" field of the collection. See https://docs.datastax.com/en/astra-db-serverless/api-reference/collections.html#the-indexing-option", + "list": false, + "load_from_db": false, + "name": "collection_indexing_policy", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "trace_as_metadata": true, + "type": "str", + "value": "" + }, + "collection_name": { + "_input_type": "DropdownInput", + "advanced": false, + "combobox": false, + "display_name": "Collection", + "dynamic": false, + "info": "The name of the collection within Astra DB where the vectors will be stored.", + "name": "collection_name", + "options": [ + "+ Create new collection" + ], + "placeholder": "", + "real_time_refresh": true, + "refresh_button": true, + "required": true, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "str", + "value": "+ Create new collection" + }, + "collection_name_new": { + "_input_type": "StrInput", + "advanced": false, + "display_name": "Collection Name", + "dynamic": false, + "info": "Name of the new collection to create.", + "list": false, + "load_from_db": false, + "name": "collection_name_new", + "placeholder": "", + "required": true, + "show": true, + "title_case": false, + "trace_as_metadata": true, + "type": "str", + "value": "vector_store_rag_demo" + }, + "embedding_choice": { + "_input_type": "DropdownInput", + "advanced": false, + "combobox": false, + "display_name": "Embedding Model or Astra Vectorize", + "dynamic": false, + "info": "Determines whether to use Astra Vectorize for the collection.", + "name": "embedding_choice", + "options": [ + "Embedding Model", + "Astra Vectorize" + ], + "placeholder": "", + "real_time_refresh": true, + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "str", + "value": "Embedding Model" + }, + "embedding_model": { + "_input_type": "HandleInput", + "advanced": false, + "display_name": "Embedding Model", + "dynamic": false, + "info": "Allows an embedding model configuration.", + "input_types": [ + "Embeddings" + ], + "list": false, + "name": "embedding_model", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "trace_as_metadata": true, + "type": "other", + "value": "" + }, + "ingest_data": { + "_input_type": "DataInput", + "advanced": false, + "display_name": "Ingest Data", + "dynamic": false, + "info": "", + "input_types": [ + "Data" + ], + "list": false, + "name": "ingest_data", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_input": true, + "trace_as_metadata": true, + "type": "other", + "value": "" + }, + "keyspace": { + "_input_type": "StrInput", + "advanced": true, + "display_name": "Keyspace", + "dynamic": false, + "info": "Optional keyspace within Astra DB to use for the collection.", + "list": false, + "load_from_db": false, + "name": "keyspace", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "trace_as_metadata": true, + "type": "str", + "value": "" + }, + "metadata_indexing_exclude": { + "_input_type": "StrInput", + "advanced": true, + "display_name": "Metadata Indexing Exclude", + "dynamic": false, + "info": "Optional list of metadata fields to exclude from the indexing.", + "list": true, + "load_from_db": false, + "name": "metadata_indexing_exclude", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "trace_as_metadata": true, + "type": "str", + "value": "" + }, + "metadata_indexing_include": { + "_input_type": "StrInput", + "advanced": true, + "display_name": "Metadata Indexing Include", + "dynamic": false, + "info": "Optional list of metadata fields to include in the indexing.", + "list": true, + "load_from_db": false, + "name": "metadata_indexing_include", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "trace_as_metadata": true, + "type": "str", + "value": "" + }, + "metric": { + "_input_type": "DropdownInput", + "advanced": true, + "combobox": false, + "display_name": "Metric", + "dynamic": false, + "info": "Optional distance metric for vector comparisons in the vector store.", + "name": "metric", + "options": [ + "cosine", + "dot_product", + "euclidean" + ], + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "str", + "value": "cosine" + }, + "number_of_results": { + "_input_type": "IntInput", + "advanced": true, + "display_name": "Number of Results", + "dynamic": false, + "info": "Number of results to return.", + "list": false, + "name": "number_of_results", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "trace_as_metadata": true, + "type": "int", + "value": 4 + }, + "pre_delete_collection": { + "_input_type": "BoolInput", + "advanced": true, + "display_name": "Pre Delete Collection", + "dynamic": false, + "info": "Boolean flag to determine whether to delete the collection before creating a new one.", + "list": false, + "name": "pre_delete_collection", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "trace_as_metadata": true, + "type": "bool", + "value": false + }, + "search_filter": { + "_input_type": "DictInput", + "advanced": true, + "display_name": "[DEPRECATED] Search Metadata Filter", + "dynamic": false, + "info": "Deprecated: use advanced_search_filter. Optional dictionary of filters to apply to the search query.", + "list": true, + "name": "search_filter", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "trace_as_input": true, + "type": "dict", + "value": {} + }, + "search_input": { + "_input_type": "MultilineInput", + "advanced": false, + "display_name": "Search Input", + "dynamic": false, + "info": "", + "input_types": [ + "Message" + ], + "list": false, + "load_from_db": false, + "multiline": true, + "name": "search_input", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_input": true, + "trace_as_metadata": true, + "type": "str", + "value": "" + }, + "search_score_threshold": { + "_input_type": "FloatInput", + "advanced": true, + "display_name": "Search Score Threshold", + "dynamic": false, + "info": "Minimum similarity score threshold for search results. (when using 'Similarity with score threshold')", + "list": false, + "name": "search_score_threshold", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "trace_as_metadata": true, + "type": "float", + "value": 0 + }, + "search_type": { + "_input_type": "DropdownInput", + "advanced": true, + "combobox": false, + "display_name": "Search Type", + "dynamic": false, + "info": "Search type to use", + "name": "search_type", + "options": [ + "Similarity", + "Similarity with score threshold", + "MMR (Max Marginal Relevance)" + ], + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "str", + "value": "Similarity" + }, + "setup_mode": { + "_input_type": "DropdownInput", + "advanced": true, + "combobox": false, + "display_name": "Setup Mode", + "dynamic": false, + "info": "Configuration mode for setting up the vector store, with options like 'Sync' or 'Off'.", + "name": "setup_mode", + "options": [ + "Sync", + "Off" + ], + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "str", + "value": "Sync" + }, + "token": { + "_input_type": "SecretStrInput", + "advanced": false, + "display_name": "Astra DB Application Token", + "dynamic": false, + "info": "Authentication token for accessing Astra DB.", + "input_types": [ + "Message" + ], + "load_from_db": true, + "name": "token", + "password": true, + "placeholder": "", + "required": true, + "show": true, + "title_case": false, + "type": "str", + "value": "ASTRA_DB_APPLICATION_TOKEN" + } + }, + "tool_mode": false + }, + "type": "AstraDB" + }, + "dragging": false, + "height": 831, + "id": "AstraDB-vNNR6", + "position": { + "x": 2057.0640877169963, + "y": 1348.7993450183103 + }, + "positionAbsolute": { + "x": 2057.0640877169963, + "y": 1348.7993450183103 + }, + "selected": false, + "type": "genericNode", + "width": 320 } ], "viewport": { - "x": -298.6563130974548, - "y": -137.6024801797489, - "zoom": 0.5239796558908366 + "x": 107.32464183109641, + "y": -162.00929980139142, + "zoom": 0.43203902737939853 } }, "description": "Load your data for chat context with Retrieval Augmented Generation.", "endpoint_name": null, - "gradient": "5", - "icon": "Database", - "id": "c63bc197-85d6-4f39-87dc-2bc35523ec4e", + "id": "cc5608e0-9b81-4c93-ba28-05c2b743b3b4", "is_component": false, - "last_tested_version": "1.0.19.post2", + "last_tested_version": "1.1.1", "name": "Vector Store RAG", "tags": [ "openai",