diff --git a/pyproject.toml b/pyproject.toml
index 87585e7ed..52460dfff 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
[project]
name = "langflow"
-version = "1.5.0.post2"
+version = "1.6.0"
description = "A Python package with a built-in web application"
requires-python = ">=3.10,<3.14"
license = "MIT"
diff --git a/scripts/windows/build_and_run.bat b/scripts/windows/build_and_run.bat
index 249322e68..26f79867e 100644
--- a/scripts/windows/build_and_run.bat
+++ b/scripts/windows/build_and_run.bat
@@ -1,14 +1,14 @@
@echo off
echo Starting Langflow build and run process...
-REM Check if .env file exists and set env file parameter
-set "ENV_FILE_PARAM="
+REM Check if .env file exists and set env file flag
+set "USE_ENV_FILE="
REM Get the script directory and resolve project root
for %%I in ("%~dp0..\..") do set "PROJECT_ROOT=%%~fI"
set "ENV_PATH=%PROJECT_ROOT%\.env"
if exist "%ENV_PATH%" (
echo Found .env file at: %ENV_PATH%
- set "ENV_FILE_PARAM=--env-file \"%ENV_PATH%\""
+ set "USE_ENV_FILE=1"
) else (
echo .env file not found at: %ENV_PATH%
echo Langflow will use default configuration
@@ -85,8 +85,8 @@ echo Step 4: Running Langflow...
echo.
echo Attention: Wait until uvicorn is running before opening the browser
echo.
-if defined ENV_FILE_PARAM (
- uv run langflow run %ENV_FILE_PARAM%
+if defined USE_ENV_FILE (
+ uv run --env-file "%ENV_PATH%" langflow run
) else (
uv run langflow run
)
diff --git a/scripts/windows/build_and_run.ps1 b/scripts/windows/build_and_run.ps1
index e0ab1c90c..164592e8f 100644
--- a/scripts/windows/build_and_run.ps1
+++ b/scripts/windows/build_and_run.ps1
@@ -87,7 +87,7 @@ Write-Host "`nStep 4: Running Langflow..." -ForegroundColor Yellow
Write-Host "`nAttention: Wait until uvicorn is running before opening the browser" -ForegroundColor Red
try {
if ($useEnvFile) {
- & uv run langflow run --env-file $envPath
+ & uv run --env-file $envPath langflow run
} else {
& uv run langflow run
}
diff --git a/src/backend/base/langflow/base/knowledge_bases/__init__.py b/src/backend/base/langflow/base/knowledge_bases/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/src/backend/base/langflow/base/data/kb_utils.py b/src/backend/base/langflow/base/knowledge_bases/knowledge_base_utils.py
similarity index 100%
rename from src/backend/base/langflow/base/data/kb_utils.py
rename to src/backend/base/langflow/base/knowledge_bases/knowledge_base_utils.py
diff --git a/src/backend/base/langflow/components/agents/agent.py b/src/backend/base/langflow/components/agents/agent.py
index 13ac5dab9..d29e427a9 100644
--- a/src/backend/base/langflow/components/agents/agent.py
+++ b/src/backend/base/langflow/components/agents/agent.py
@@ -483,6 +483,7 @@ class AgentComponent(ToolCallingAgentComponent):
build_config.update(fields_to_add)
# Reset input types for agent_llm
build_config["agent_llm"]["input_types"] = []
+ build_config["agent_llm"]["display_name"] = "Model Provider"
elif field_value == "Custom":
# Delete all provider fields
self.delete_fields(build_config, ALL_PROVIDER_FIELDS)
diff --git a/src/backend/base/langflow/components/data/__init__.py b/src/backend/base/langflow/components/data/__init__.py
index 4f589c37f..6e90f0426 100644
--- a/src/backend/base/langflow/components/data/__init__.py
+++ b/src/backend/base/langflow/components/data/__init__.py
@@ -3,8 +3,6 @@ from .csv_to_data import CSVToDataComponent
from .directory import DirectoryComponent
from .file import FileComponent
from .json_to_data import JSONToDataComponent
-from .kb_ingest import KBIngestionComponent
-from .kb_retrieval import KBRetrievalComponent
from .news_search import NewsSearchComponent
from .rss import RSSReaderComponent
from .sql_executor import SQLComponent
@@ -18,8 +16,6 @@ __all__ = [
"DirectoryComponent",
"FileComponent",
"JSONToDataComponent",
- "KBIngestionComponent",
- "KBRetrievalComponent",
"NewsSearchComponent",
"RSSReaderComponent",
"SQLComponent",
diff --git a/src/backend/base/langflow/components/datastax/__init__.py b/src/backend/base/langflow/components/datastax/__init__.py
index 30bf2951d..d635a4b3a 100644
--- a/src/backend/base/langflow/components/datastax/__init__.py
+++ b/src/backend/base/langflow/components/datastax/__init__.py
@@ -8,9 +8,9 @@ if TYPE_CHECKING:
from .astra_assistant_manager import AstraAssistantManager
from .astra_db import AstraDBChatMemory
from .astra_vectorize import AstraVectorizeComponent
- from .astradb import AstraDBVectorStoreComponent
from .astradb_cql import AstraDBCQLToolComponent
from .astradb_tool import AstraDBToolComponent
+ from .astradb_vectorstore import AstraDBVectorStoreComponent
from .create_assistant import AssistantsCreateAssistant
from .create_thread import AssistantsCreateThread
from .dotenv import Dotenv
@@ -29,7 +29,7 @@ _dynamic_imports = {
"AstraDBCQLToolComponent": "astradb_cql",
"AstraDBChatMemory": "astra_db",
"AstraDBToolComponent": "astradb_tool",
- "AstraDBVectorStoreComponent": "astradb",
+ "AstraDBVectorStoreComponent": "astradb_vectorstore",
"AstraVectorizeComponent": "astra_vectorize",
"Dotenv": "dotenv",
"GetEnvVar": "getenvvar",
diff --git a/src/backend/base/langflow/components/datastax/astradb.py b/src/backend/base/langflow/components/datastax/astradb_vectorstore.py
similarity index 100%
rename from src/backend/base/langflow/components/datastax/astradb.py
rename to src/backend/base/langflow/components/datastax/astradb_vectorstore.py
diff --git a/src/backend/base/langflow/components/knowledge_bases/__init__.py b/src/backend/base/langflow/components/knowledge_bases/__init__.py
new file mode 100644
index 000000000..b14f3c5c3
--- /dev/null
+++ b/src/backend/base/langflow/components/knowledge_bases/__init__.py
@@ -0,0 +1,34 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Any
+
+from langflow.components._importing import import_mod
+
+if TYPE_CHECKING:
+ from langflow.components.knowledge_bases.ingestion import KnowledgeIngestionComponent
+ from langflow.components.knowledge_bases.retrieval import KnowledgeRetrievalComponent
+
+_dynamic_imports = {
+ "KnowledgeIngestionComponent": "ingestion",
+ "KnowledgeRetrievalComponent": "retrieval",
+}
+
+__all__ = ["KnowledgeIngestionComponent", "KnowledgeRetrievalComponent"]
+
+
+def __getattr__(attr_name: str) -> Any:
+ """Lazily import input/output components on attribute access."""
+ if attr_name not in _dynamic_imports:
+ msg = f"module '{__name__}' has no attribute '{attr_name}'"
+ raise AttributeError(msg)
+ try:
+ result = import_mod(attr_name, _dynamic_imports[attr_name], __spec__.parent)
+ except (ModuleNotFoundError, ImportError, AttributeError) as e:
+ msg = f"Could not import '{attr_name}' from '{__name__}': {e}"
+ raise AttributeError(msg) from e
+ globals()[attr_name] = result
+ return result
+
+
+def __dir__() -> list[str]:
+ return list(__all__)
diff --git a/src/backend/base/langflow/components/data/kb_ingest.py b/src/backend/base/langflow/components/knowledge_bases/ingestion.py
similarity index 95%
rename from src/backend/base/langflow/components/data/kb_ingest.py
rename to src/backend/base/langflow/components/knowledge_bases/ingestion.py
index 4a734ba81..7a2078a8e 100644
--- a/src/backend/base/langflow/components/data/kb_ingest.py
+++ b/src/backend/base/langflow/components/knowledge_bases/ingestion.py
@@ -9,17 +9,18 @@ import uuid
from dataclasses import asdict, dataclass, field
from datetime import datetime, timezone
from pathlib import Path
-from typing import Any
+from typing import TYPE_CHECKING, Any
import pandas as pd
from cryptography.fernet import InvalidToken
from langchain_chroma import Chroma
from loguru import logger
-from langflow.base.data.kb_utils import get_knowledge_bases
+from langflow.base.knowledge_bases.knowledge_base_utils import get_knowledge_bases
from langflow.base.models.openai_constants import OPENAI_EMBEDDING_MODEL_NAMES
+from langflow.components.processing.converter import convert_to_dataframe
from langflow.custom import Component
-from langflow.io import BoolInput, DataFrameInput, DropdownInput, IntInput, Output, SecretStrInput, StrInput, TableInput
+from langflow.io import BoolInput, DropdownInput, HandleInput, IntInput, Output, SecretStrInput, StrInput, TableInput
from langflow.schema.data import Data
from langflow.schema.dotdict import dotdict # noqa: TC001
from langflow.schema.table import EditMode
@@ -27,6 +28,9 @@ from langflow.services.auth.utils import decrypt_api_key, encrypt_api_key
from langflow.services.database.models.user.crud import get_user_by_id
from langflow.services.deps import get_settings_service, get_variable_service, session_scope
+if TYPE_CHECKING:
+ from langflow.schema.dataframe import DataFrame
+
HUGGINGFACE_MODEL_NAMES = ["sentence-transformers/all-MiniLM-L6-v2", "sentence-transformers/all-mpnet-base-v2"]
COHERE_MODEL_NAMES = ["embed-english-v3.0", "embed-multilingual-v3.0"]
@@ -38,14 +42,14 @@ if not knowledge_directory:
KNOWLEDGE_BASES_ROOT_PATH = Path(knowledge_directory).expanduser()
-class KBIngestionComponent(Component):
+class KnowledgeIngestionComponent(Component):
"""Create or append to Langflow Knowledge from a DataFrame."""
# ------ UI metadata ---------------------------------------------------
display_name = "Knowledge Ingestion"
description = "Create or update knowledge in Langflow."
- icon = "database"
- name = "KBIngestion"
+ icon = "upload"
+ name = "KnowledgeIngestion"
def __init__(self, *args, **kwargs) -> None:
super().__init__(*args, **kwargs)
@@ -101,12 +105,17 @@ class KBIngestionComponent(Component):
required=True,
options=[],
refresh_button=True,
+ real_time_refresh=True,
dialog_inputs=asdict(NewKnowledgeBaseInput()),
),
- DataFrameInput(
+ HandleInput(
name="input_df",
- display_name="Data",
- info="Table with all original columns (already chunked / processed).",
+ display_name="Input",
+ info=(
+ "Table with all original columns (already chunked / processed). "
+ "Accepts Data or DataFrame. If Data is provided, it is converted to a DataFrame automatically."
+ ),
+ input_types=["Data", "DataFrame"],
required=True,
),
TableInput(
@@ -171,7 +180,7 @@ class KBIngestionComponent(Component):
]
# ------ Outputs -------------------------------------------------------
- outputs = [Output(display_name="DataFrame", name="dataframe", method="build_kb_info")]
+ outputs = [Output(display_name="Results", name="dataframe_output", method="build_kb_info")]
# ------ Internal helpers ---------------------------------------------
def _get_kb_root(self) -> Path:
@@ -503,8 +512,8 @@ class KBIngestionComponent(Component):
async def build_kb_info(self) -> Data:
"""Main ingestion routine → returns a dict with KB metadata."""
try:
- # Get source DataFrame
- df_source: pd.DataFrame = self.input_df
+ input_value = self.input_df[0] if isinstance(self.input_df, list) else self.input_df
+ df_source: DataFrame = convert_to_dataframe(input_value)
# Validate column configuration (using Structured Output patterns)
config_list = self._validate_column_config(df_source)
@@ -559,9 +568,8 @@ class KBIngestionComponent(Component):
return Data(data=meta)
except (OSError, ValueError, RuntimeError, KeyError) as e:
- self.log(f"Error in KB ingestion: {e}")
- self.status = f"❌ KB ingestion failed: {e}"
- return Data(data={"error": str(e), "kb_name": self.knowledge_base})
+ msg = f"Error during KB ingestion: {e}"
+ raise RuntimeError(msg) from e
async def _get_api_key_variable(self, field_value: dict[str, Any]):
async with session_scope() as db:
diff --git a/src/backend/base/langflow/components/data/kb_retrieval.py b/src/backend/base/langflow/components/knowledge_bases/retrieval.py
similarity index 88%
rename from src/backend/base/langflow/components/data/kb_retrieval.py
rename to src/backend/base/langflow/components/knowledge_bases/retrieval.py
index d633c62b0..6c5fa3873 100644
--- a/src/backend/base/langflow/components/data/kb_retrieval.py
+++ b/src/backend/base/langflow/components/knowledge_bases/retrieval.py
@@ -7,7 +7,7 @@ from langchain_chroma import Chroma
from loguru import logger
from pydantic import SecretStr
-from langflow.base.data.kb_utils import get_knowledge_bases
+from langflow.base.knowledge_bases.knowledge_base_utils import get_knowledge_bases
from langflow.custom import Component
from langflow.io import BoolInput, DropdownInput, IntInput, MessageTextInput, Output, SecretStrInput
from langflow.schema.data import Data
@@ -24,11 +24,11 @@ if not knowledge_directory:
KNOWLEDGE_BASES_ROOT_PATH = Path(knowledge_directory).expanduser()
-class KBRetrievalComponent(Component):
+class KnowledgeRetrievalComponent(Component):
display_name = "Knowledge Retrieval"
description = "Search and retrieve data from knowledge."
- icon = "database"
- name = "KBRetrieval"
+ icon = "download"
+ name = "KnowledgeRetrieval"
inputs = [
DropdownInput(
@@ -51,6 +51,7 @@ class KBRetrievalComponent(Component):
name="search_query",
display_name="Search Query",
info="Optional search query to filter knowledge base data.",
+ tool_mode=True,
),
IntInput(
name="top_k",
@@ -63,17 +64,24 @@ class KBRetrievalComponent(Component):
BoolInput(
name="include_metadata",
display_name="Include Metadata",
- info="Whether to include all metadata and embeddings in the output. If false, only content is returned.",
+ info="Whether to include all metadata in the output. If false, only content is returned.",
value=True,
advanced=False,
),
+ BoolInput(
+ name="include_embeddings",
+ display_name="Include Embeddings",
+ info="Whether to include embeddings in the output. Only applicable if 'Include Metadata' is enabled.",
+ value=False,
+ advanced=True,
+ ),
]
outputs = [
Output(
- name="chroma_kb_data",
+ name="retrieve_data",
display_name="Results",
- method="get_chroma_kb_data",
+ method="retrieve_data",
info="Returns the data from the selected knowledge base.",
),
]
@@ -162,7 +170,7 @@ class KBRetrievalComponent(Component):
msg = f"Embedding provider '{provider}' is not supported for retrieval."
raise NotImplementedError(msg)
- async def get_chroma_kb_data(self) -> DataFrame:
+ async def retrieve_data(self) -> DataFrame:
"""Retrieve data from the selected knowledge base by reading the Chroma collection.
Returns:
@@ -212,16 +220,16 @@ class KBRetrievalComponent(Component):
# For each result, make it a tuple to match the expected output format
results = [(doc, 0) for doc in results] # Assign a dummy score of 0
- # If metadata is enabled, get embeddings for the results
+ # If include_embeddings is enabled, get embeddings for the results
id_to_embedding = {}
- if self.include_metadata and results:
+ if self.include_embeddings and results:
doc_ids = [doc[0].metadata.get("_id") for doc in results if doc[0].metadata.get("_id")]
# Only proceed if we have valid document IDs
if doc_ids:
# Access underlying client to get embeddings
collection = chroma._client.get_collection(name=self.knowledge_base)
- embeddings_result = collection.get(where={"_id": {"$in": doc_ids}}, include=["embeddings", "metadatas"])
+ embeddings_result = collection.get(where={"_id": {"$in": doc_ids}}, include=["metadatas", "embeddings"])
# Create a mapping from document ID to embedding
for i, metadata in enumerate(embeddings_result.get("metadatas", [])):
@@ -231,20 +239,16 @@ class KBRetrievalComponent(Component):
# Build output data based on include_metadata setting
data_list = []
for doc in results:
+ kwargs = {
+ "content": doc[0].page_content,
+ }
+ if self.search_query:
+ kwargs["_score"] = -1 * doc[1]
if self.include_metadata:
# Include all metadata, embeddings, and content
- kwargs = {
- "content": doc[0].page_content,
- **doc[0].metadata,
- }
- if self.search_query:
- kwargs["_score"] = -1 * doc[1]
+ kwargs.update(doc[0].metadata)
+ if self.include_embeddings:
kwargs["_embeddings"] = id_to_embedding.get(doc[0].metadata.get("_id"))
- else:
- # Only include content
- kwargs = {
- "content": doc[0].page_content,
- }
data_list.append(Data(**kwargs))
diff --git a/src/backend/base/langflow/components/processing/dataframe_operations.py b/src/backend/base/langflow/components/processing/dataframe_operations.py
index 91f3599f5..c41fef5a9 100644
--- a/src/backend/base/langflow/components/processing/dataframe_operations.py
+++ b/src/backend/base/langflow/components/processing/dataframe_operations.py
@@ -79,7 +79,16 @@ class DataFrameOperationsComponent(Component):
DropdownInput(
name="filter_operator",
display_name="Filter Operator",
- options=["equals", "not equals", "contains", "starts with", "ends with", "greater than", "less than"],
+ options=[
+ "equals",
+ "not equals",
+ "contains",
+ "not contains",
+ "starts with",
+ "ends with",
+ "greater than",
+ "less than",
+ ],
value="equals",
info="The operator to apply for filtering rows.",
advanced=False,
@@ -254,6 +263,8 @@ class DataFrameOperationsComponent(Component):
mask = column != filter_value
elif operator == "contains":
mask = column.astype(str).str.contains(str(filter_value), na=False)
+ elif operator == "not contains":
+ mask = ~column.astype(str).str.contains(str(filter_value), na=False)
elif operator == "starts with":
mask = column.astype(str).str.startswith(str(filter_value), na=False)
elif operator == "ends with":
diff --git a/src/backend/base/langflow/initial_setup/starter_projects/Hybrid Search RAG.json b/src/backend/base/langflow/initial_setup/starter_projects/Hybrid Search RAG.json
index 76196de19..238c6ae15 100644
--- a/src/backend/base/langflow/initial_setup/starter_projects/Hybrid Search RAG.json
+++ b/src/backend/base/langflow/initial_setup/starter_projects/Hybrid Search RAG.json
@@ -7,7 +7,7 @@
"data": {
"sourceHandle": {
"dataType": "ParserComponent",
- "id": "ParserComponent-3Wxa2",
+ "id": "ParserComponent-0KvmM",
"name": "parsed_text",
"output_types": [
"Message"
@@ -15,7 +15,7 @@
},
"targetHandle": {
"fieldName": "input_value",
- "id": "ChatOutput-811h1",
+ "id": "ChatOutput-zViXc",
"inputTypes": [
"Data",
"DataFrame",
@@ -24,69 +24,12 @@
"type": "other"
}
},
- "id": "reactflow__edge-ParserComponent-3Wxa2{œdataTypeœ:œParserComponentœ,œidœ:œParserComponent-3Wxa2œ,œnameœ:œparsed_textœ,œoutput_typesœ:[œMessageœ]}-ChatOutput-811h1{œfieldNameœ:œinput_valueœ,œidœ:œChatOutput-811h1œ,œinputTypesœ:[œDataœ,œDataFrameœ,œMessageœ],œtypeœ:œotherœ}",
+ "id": "reactflow__edge-ParserComponent-0KvmM{œdataTypeœ:œParserComponentœ,œidœ:œParserComponent-0KvmMœ,œnameœ:œparsed_textœ,œoutput_typesœ:[œMessageœ]}-ChatOutput-zViXc{œfieldNameœ:œinput_valueœ,œidœ:œChatOutput-zViXcœ,œinputTypesœ:[œDataœ,œDataFrameœ,œMessageœ],œtypeœ:œotherœ}",
"selected": false,
- "source": "ParserComponent-3Wxa2",
- "sourceHandle": "{œdataTypeœ: œParserComponentœ, œidœ: œParserComponent-3Wxa2œ, œnameœ: œparsed_textœ, œoutput_typesœ: [œMessageœ]}",
- "target": "ChatOutput-811h1",
- "targetHandle": "{œfieldNameœ: œinput_valueœ, œidœ: œChatOutput-811h1œ, œinputTypesœ: [œDataœ, œDataFrameœ, œMessageœ], œtypeœ: œotherœ}"
- },
- {
- "animated": false,
- "className": "",
- "data": {
- "sourceHandle": {
- "dataType": "ChatInput",
- "id": "ChatInput-uZ7jn",
- "name": "message",
- "output_types": [
- "Message"
- ]
- },
- "targetHandle": {
- "fieldName": "search_query",
- "id": "AstraDB-7gXip",
- "inputTypes": [
- "Message"
- ],
- "type": "query"
- }
- },
- "id": "reactflow__edge-ChatInput-uZ7jn{œdataTypeœ:œChatInputœ,œidœ:œChatInput-uZ7jnœ,œnameœ:œmessageœ,œoutput_typesœ:[œMessageœ]}-AstraDB-7gXip{œfieldNameœ:œsearch_queryœ,œidœ:œAstraDB-7gXipœ,œinputTypesœ:[œMessageœ],œtypeœ:œqueryœ}",
- "selected": false,
- "source": "ChatInput-uZ7jn",
- "sourceHandle": "{œdataTypeœ: œChatInputœ, œidœ: œChatInput-uZ7jnœ, œnameœ: œmessageœ, œoutput_typesœ: [œMessageœ]}",
- "target": "AstraDB-7gXip",
- "targetHandle": "{œfieldNameœ: œsearch_queryœ, œidœ: œAstraDB-7gXipœ, œinputTypesœ: [œMessageœ], œtypeœ: œqueryœ}"
- },
- {
- "animated": false,
- "className": "",
- "data": {
- "sourceHandle": {
- "dataType": "AstraDB",
- "id": "AstraDB-7gXip",
- "name": "dataframe",
- "output_types": [
- "DataFrame"
- ]
- },
- "targetHandle": {
- "fieldName": "input_data",
- "id": "ParserComponent-3Wxa2",
- "inputTypes": [
- "DataFrame",
- "Data"
- ],
- "type": "other"
- }
- },
- "id": "reactflow__edge-AstraDB-7gXip{œdataTypeœ:œAstraDBœ,œidœ:œAstraDB-7gXipœ,œnameœ:œdataframeœ,œoutput_typesœ:[œDataFrameœ]}-ParserComponent-3Wxa2{œfieldNameœ:œinput_dataœ,œidœ:œParserComponent-3Wxa2œ,œinputTypesœ:[œDataFrameœ,œDataœ],œtypeœ:œotherœ}",
- "selected": false,
- "source": "AstraDB-7gXip",
- "sourceHandle": "{œdataTypeœ: œAstraDBœ, œidœ: œAstraDB-7gXipœ, œnameœ: œdataframeœ, œoutput_typesœ: [œDataFrameœ]}",
- "target": "ParserComponent-3Wxa2",
- "targetHandle": "{œfieldNameœ: œinput_dataœ, œidœ: œParserComponent-3Wxa2œ, œinputTypesœ: [œDataFrameœ, œDataœ], œtypeœ: œotherœ}"
+ "source": "ParserComponent-0KvmM",
+ "sourceHandle": "{œdataTypeœ: œParserComponentœ, œidœ: œParserComponent-0KvmMœ, œnameœ: œparsed_textœ, œoutput_typesœ: [œMessageœ]}",
+ "target": "ChatOutput-zViXc",
+ "targetHandle": "{œfieldNameœ: œinput_valueœ, œidœ: œChatOutput-zViXcœ, œinputTypesœ: [œDataœ, œDataFrameœ, œMessageœ], œtypeœ: œotherœ}"
},
{
"animated": false,
@@ -94,7 +37,7 @@
"data": {
"sourceHandle": {
"dataType": "LanguageModelComponent",
- "id": "LanguageModelComponent-NEQ8S",
+ "id": "LanguageModelComponent-CRZxx",
"name": "text_output",
"output_types": [
"Message"
@@ -102,19 +45,19 @@
},
"targetHandle": {
"fieldName": "input_value",
- "id": "StructuredOutput-n8Y3t",
+ "id": "StructuredOutput-AUzID",
"inputTypes": [
"Message"
],
"type": "str"
}
},
- "id": "reactflow__edge-LanguageModelComponent-NEQ8S{œdataTypeœ:œLanguageModelComponentœ,œidœ:œLanguageModelComponent-NEQ8Sœ,œnameœ:œtext_outputœ,œoutput_typesœ:[œMessageœ]}-StructuredOutput-n8Y3t{œfieldNameœ:œinput_valueœ,œidœ:œStructuredOutput-n8Y3tœ,œinputTypesœ:[œMessageœ],œtypeœ:œstrœ}",
+ "id": "reactflow__edge-LanguageModelComponent-CRZxx{œdataTypeœ:œLanguageModelComponentœ,œidœ:œLanguageModelComponent-CRZxxœ,œnameœ:œtext_outputœ,œoutput_typesœ:[œMessageœ]}-StructuredOutput-AUzID{œfieldNameœ:œinput_valueœ,œidœ:œStructuredOutput-AUzIDœ,œinputTypesœ:[œMessageœ],œtypeœ:œstrœ}",
"selected": false,
- "source": "LanguageModelComponent-NEQ8S",
- "sourceHandle": "{œdataTypeœ: œLanguageModelComponentœ, œidœ: œLanguageModelComponent-NEQ8Sœ, œnameœ: œtext_outputœ, œoutput_typesœ: [œMessageœ]}",
- "target": "StructuredOutput-n8Y3t",
- "targetHandle": "{œfieldNameœ: œinput_valueœ, œidœ: œStructuredOutput-n8Y3tœ, œinputTypesœ: [œMessageœ], œtypeœ: œstrœ}"
+ "source": "LanguageModelComponent-CRZxx",
+ "sourceHandle": "{œdataTypeœ: œLanguageModelComponentœ, œidœ: œLanguageModelComponent-CRZxxœ, œnameœ: œtext_outputœ, œoutput_typesœ: [œMessageœ]}",
+ "target": "StructuredOutput-AUzID",
+ "targetHandle": "{œfieldNameœ: œinput_valueœ, œidœ: œStructuredOutput-AUzIDœ, œinputTypesœ: [œMessageœ], œtypeœ: œstrœ}"
},
{
"animated": false,
@@ -122,7 +65,7 @@
"data": {
"sourceHandle": {
"dataType": "StructuredOutput",
- "id": "StructuredOutput-n8Y3t",
+ "id": "StructuredOutput-AUzID",
"name": "structured_output",
"output_types": [
"Data"
@@ -130,7 +73,7 @@
},
"targetHandle": {
"fieldName": "input_data",
- "id": "ParserComponent-Kb474",
+ "id": "ParserComponent-6wYbr",
"inputTypes": [
"DataFrame",
"Data"
@@ -138,12 +81,12 @@
"type": "other"
}
},
- "id": "reactflow__edge-StructuredOutput-n8Y3t{œdataTypeœ:œStructuredOutputœ,œidœ:œStructuredOutput-n8Y3tœ,œnameœ:œstructured_outputœ,œoutput_typesœ:[œDataœ]}-ParserComponent-Kb474{œfieldNameœ:œinput_dataœ,œidœ:œParserComponent-Kb474œ,œinputTypesœ:[œDataFrameœ,œDataœ],œtypeœ:œotherœ}",
+ "id": "reactflow__edge-StructuredOutput-AUzID{œdataTypeœ:œStructuredOutputœ,œidœ:œStructuredOutput-AUzIDœ,œnameœ:œstructured_outputœ,œoutput_typesœ:[œDataœ]}-ParserComponent-6wYbr{œfieldNameœ:œinput_dataœ,œidœ:œParserComponent-6wYbrœ,œinputTypesœ:[œDataFrameœ,œDataœ],œtypeœ:œotherœ}",
"selected": false,
- "source": "StructuredOutput-n8Y3t",
- "sourceHandle": "{œdataTypeœ: œStructuredOutputœ, œidœ: œStructuredOutput-n8Y3tœ, œnameœ: œstructured_outputœ, œoutput_typesœ: [œDataœ]}",
- "target": "ParserComponent-Kb474",
- "targetHandle": "{œfieldNameœ: œinput_dataœ, œidœ: œParserComponent-Kb474œ, œinputTypesœ: [œDataFrameœ, œDataœ], œtypeœ: œotherœ}"
+ "source": "StructuredOutput-AUzID",
+ "sourceHandle": "{œdataTypeœ: œStructuredOutputœ, œidœ: œStructuredOutput-AUzIDœ, œnameœ: œstructured_outputœ, œoutput_typesœ: [œDataœ]}",
+ "target": "ParserComponent-6wYbr",
+ "targetHandle": "{œfieldNameœ: œinput_dataœ, œidœ: œParserComponent-6wYbrœ, œinputTypesœ: [œDataFrameœ, œDataœ], œtypeœ: œotherœ}"
},
{
"animated": false,
@@ -151,7 +94,7 @@
"data": {
"sourceHandle": {
"dataType": "LanguageModelComponent",
- "id": "LanguageModelComponent-pB4iD",
+ "id": "LanguageModelComponent-MD9V5",
"name": "model_output",
"output_types": [
"LanguageModel"
@@ -159,25 +102,104 @@
},
"targetHandle": {
"fieldName": "llm",
- "id": "StructuredOutput-n8Y3t",
+ "id": "StructuredOutput-AUzID",
"inputTypes": [
"LanguageModel"
],
"type": "other"
}
},
- "id": "reactflow__edge-LanguageModelComponent-pB4iD{œdataTypeœ:œLanguageModelComponentœ,œidœ:œLanguageModelComponent-pB4iDœ,œnameœ:œmodel_outputœ,œoutput_typesœ:[œLanguageModelœ]}-StructuredOutput-n8Y3t{œfieldNameœ:œllmœ,œidœ:œStructuredOutput-n8Y3tœ,œinputTypesœ:[œLanguageModelœ],œtypeœ:œotherœ}",
+ "id": "reactflow__edge-LanguageModelComponent-MD9V5{œdataTypeœ:œLanguageModelComponentœ,œidœ:œLanguageModelComponent-MD9V5œ,œnameœ:œmodel_outputœ,œoutput_typesœ:[œLanguageModelœ]}-StructuredOutput-AUzID{œfieldNameœ:œllmœ,œidœ:œStructuredOutput-AUzIDœ,œinputTypesœ:[œLanguageModelœ],œtypeœ:œotherœ}",
"selected": false,
- "source": "LanguageModelComponent-pB4iD",
- "sourceHandle": "{œdataTypeœ: œLanguageModelComponentœ, œidœ: œLanguageModelComponent-pB4iDœ, œnameœ: œmodel_outputœ, œoutput_typesœ: [œLanguageModelœ]}",
- "target": "StructuredOutput-n8Y3t",
- "targetHandle": "{œfieldNameœ: œllmœ, œidœ: œStructuredOutput-n8Y3tœ, œinputTypesœ: [œLanguageModelœ], œtypeœ: œotherœ}"
+ "source": "LanguageModelComponent-MD9V5",
+ "sourceHandle": "{œdataTypeœ: œLanguageModelComponentœ, œidœ: œLanguageModelComponent-MD9V5œ, œnameœ: œmodel_outputœ, œoutput_typesœ: [œLanguageModelœ]}",
+ "target": "StructuredOutput-AUzID",
+ "targetHandle": "{œfieldNameœ: œllmœ, œidœ: œStructuredOutput-AUzIDœ, œinputTypesœ: [œLanguageModelœ], œtypeœ: œotherœ}"
+ },
+ {
+ "className": "",
+ "data": {
+ "sourceHandle": {
+ "dataType": "ParserComponent",
+ "id": "ParserComponent-6wYbr",
+ "name": "parsed_text",
+ "output_types": [
+ "Message"
+ ]
+ },
+ "targetHandle": {
+ "fieldName": "lexical_terms",
+ "id": "AstraDB-93cal",
+ "inputTypes": [
+ "Message"
+ ],
+ "type": "query"
+ }
+ },
+ "id": "xy-edge__ParserComponent-6wYbr{œdataTypeœ:œParserComponentœ,œidœ:œParserComponent-6wYbrœ,œnameœ:œparsed_textœ,œoutput_typesœ:[œMessageœ]}-AstraDB-93cal{œfieldNameœ:œlexical_termsœ,œidœ:œAstraDB-93calœ,œinputTypesœ:[œMessageœ],œtypeœ:œqueryœ}",
+ "source": "ParserComponent-6wYbr",
+ "sourceHandle": "{œdataTypeœ: œParserComponentœ, œidœ: œParserComponent-6wYbrœ, œnameœ: œparsed_textœ, œoutput_typesœ: [œMessageœ]}",
+ "target": "AstraDB-93cal",
+ "targetHandle": "{œfieldNameœ: œlexical_termsœ, œidœ: œAstraDB-93calœ, œinputTypesœ: [œMessageœ], œtypeœ: œqueryœ}"
+ },
+ {
+ "className": "",
+ "data": {
+ "sourceHandle": {
+ "dataType": "ChatInput",
+ "id": "ChatInput-2JUiB",
+ "name": "message",
+ "output_types": [
+ "Message"
+ ]
+ },
+ "targetHandle": {
+ "fieldName": "search_query",
+ "id": "AstraDB-93cal",
+ "inputTypes": [
+ "Message"
+ ],
+ "type": "query"
+ }
+ },
+ "id": "xy-edge__ChatInput-2JUiB{œdataTypeœ:œChatInputœ,œidœ:œChatInput-2JUiBœ,œnameœ:œmessageœ,œoutput_typesœ:[œMessageœ]}-AstraDB-93cal{œfieldNameœ:œsearch_queryœ,œidœ:œAstraDB-93calœ,œinputTypesœ:[œMessageœ],œtypeœ:œqueryœ}",
+ "source": "ChatInput-2JUiB",
+ "sourceHandle": "{œdataTypeœ: œChatInputœ, œidœ: œChatInput-2JUiBœ, œnameœ: œmessageœ, œoutput_typesœ: [œMessageœ]}",
+ "target": "AstraDB-93cal",
+ "targetHandle": "{œfieldNameœ: œsearch_queryœ, œidœ: œAstraDB-93calœ, œinputTypesœ: [œMessageœ], œtypeœ: œqueryœ}"
+ },
+ {
+ "className": "",
+ "data": {
+ "sourceHandle": {
+ "dataType": "AstraDB",
+ "id": "AstraDB-93cal",
+ "name": "search_results",
+ "output_types": [
+ "Data"
+ ]
+ },
+ "targetHandle": {
+ "fieldName": "input_data",
+ "id": "ParserComponent-0KvmM",
+ "inputTypes": [
+ "DataFrame",
+ "Data"
+ ],
+ "type": "other"
+ }
+ },
+ "id": "xy-edge__AstraDB-93cal{œdataTypeœ:œAstraDBœ,œidœ:œAstraDB-93calœ,œnameœ:œsearch_resultsœ,œoutput_typesœ:[œDataœ]}-ParserComponent-0KvmM{œfieldNameœ:œinput_dataœ,œidœ:œParserComponent-0KvmMœ,œinputTypesœ:[œDataFrameœ,œDataœ],œtypeœ:œotherœ}",
+ "source": "AstraDB-93cal",
+ "sourceHandle": "{œdataTypeœ: œAstraDBœ, œidœ: œAstraDB-93calœ, œnameœ: œsearch_resultsœ, œoutput_typesœ: [œDataœ]}",
+ "target": "ParserComponent-0KvmM",
+ "targetHandle": "{œfieldNameœ: œinput_dataœ, œidœ: œParserComponent-0KvmMœ, œinputTypesœ: [œDataFrameœ, œDataœ], œtypeœ: œotherœ}"
}
],
"nodes": [
{
"data": {
- "id": "ChatInput-uZ7jn",
+ "id": "ChatInput-2JUiB",
"node": {
"base_classes": [
"Message"
@@ -487,7 +509,7 @@
"type": "ChatInput"
},
"dragging": false,
- "id": "ChatInput-uZ7jn",
+ "id": "ChatInput-2JUiB",
"measured": {
"height": 48,
"width": 192
@@ -501,7 +523,7 @@
},
{
"data": {
- "id": "ParserComponent-Kb474",
+ "id": "ParserComponent-6wYbr",
"node": {
"base_classes": [
"Message"
@@ -672,7 +694,7 @@
"type": "ParserComponent"
},
"dragging": false,
- "id": "ParserComponent-Kb474",
+ "id": "ParserComponent-6wYbr",
"measured": {
"height": 329,
"width": 320
@@ -686,7 +708,7 @@
},
{
"data": {
- "id": "ChatOutput-811h1",
+ "id": "ChatOutput-zViXc",
"node": {
"base_classes": [
"Message"
@@ -1000,7 +1022,7 @@
"type": "ChatOutput"
},
"dragging": false,
- "id": "ChatOutput-811h1",
+ "id": "ChatOutput-zViXc",
"measured": {
"height": 48,
"width": 192
@@ -1014,7 +1036,7 @@
},
{
"data": {
- "id": "ParserComponent-3Wxa2",
+ "id": "ParserComponent-0KvmM",
"node": {
"base_classes": [
"Message"
@@ -1185,7 +1207,7 @@
"type": "ParserComponent"
},
"dragging": false,
- "id": "ParserComponent-3Wxa2",
+ "id": "ParserComponent-0KvmM",
"measured": {
"height": 246,
"width": 320
@@ -1199,48 +1221,32 @@
},
{
"data": {
- "id": "AstraDB-7gXip",
+ "id": "LanguageModelComponent-CRZxx",
"node": {
"base_classes": [
- "Data",
- "DataFrame",
- "VectorStore"
+ "LanguageModel",
+ "Message"
],
"beta": false,
"conditional_paths": [],
"custom_fields": {},
- "description": "Ingest and search documents in Astra DB",
- "display_name": "Astra DB",
- "documentation": "https://docs.datastax.com/en/langflow/astra-components.html",
+ "description": "Runs a language model given a specified provider. ",
+ "display_name": "Language Model",
+ "documentation": "",
"edited": false,
"field_order": [
- "token",
- "environment",
- "database_name",
- "api_endpoint",
- "keyspace",
- "collection_name",
- "embedding_model",
- "ingest_data",
- "search_query",
- "should_cache_vector_store",
- "search_method",
- "reranker",
- "lexical_terms",
- "number_of_results",
- "search_type",
- "search_score_threshold",
- "advanced_search_filter",
- "autodetect_collection",
- "content_field",
- "deletion_field",
- "ignore_invalid_documents",
- "astradb_vectorstore_kwargs"
+ "provider",
+ "model_name",
+ "api_key",
+ "input_value",
+ "system_message",
+ "stream",
+ "temperature"
],
"frozen": false,
- "icon": "AstraDB",
+ "icon": "brain-circuit",
+ "last_updated": "2025-08-26T16:33:20.961Z",
"legacy": false,
- "lf_version": "1.4.3",
"metadata": {
"code_hash": "23fbe9daca09",
"dependencies": {
@@ -1268,811 +1274,6 @@
},
"minimized": false,
"output_types": [],
- "outputs": [
- {
- "allows_loop": false,
- "cache": true,
- "display_name": "Search Results",
- "group_outputs": false,
- "method": "search_documents",
- "name": "search_results",
- "selected": "Data",
- "tool_mode": true,
- "types": [
- "Data"
- ],
- "value": "__UNDEFINED__"
- },
- {
- "allows_loop": false,
- "cache": true,
- "display_name": "DataFrame",
- "group_outputs": false,
- "method": "as_dataframe",
- "name": "dataframe",
- "selected": "DataFrame",
- "tool_mode": true,
- "types": [
- "DataFrame"
- ],
- "value": "__UNDEFINED__"
- },
- {
- "allows_loop": false,
- "cache": true,
- "display_name": "Vector Store Connection",
- "group_outputs": false,
- "hidden": true,
- "method": "as_vector_store",
- "name": "vectorstoreconnection",
- "selected": "VectorStore",
- "tool_mode": true,
- "types": [
- "VectorStore"
- ],
- "value": "__UNDEFINED__"
- }
- ],
- "pinned": false,
- "template": {
- "_type": "Component",
- "advanced_search_filter": {
- "_input_type": "NestedDictInput",
- "advanced": true,
- "display_name": "Search Metadata Filter",
- "dynamic": false,
- "info": "Optional dictionary of filters to apply to the search query.",
- "list": false,
- "list_add_label": "Add More",
- "name": "advanced_search_filter",
- "placeholder": "",
- "required": false,
- "show": true,
- "title_case": false,
- "tool_mode": false,
- "trace_as_input": true,
- "trace_as_metadata": true,
- "type": "NestedDict",
- "value": {}
- },
- "api_endpoint": {
- "_input_type": "StrInput",
- "advanced": false,
- "display_name": "Astra DB API Endpoint",
- "dynamic": false,
- "info": "The API Endpoint for the Astra DB instance. Supercedes database selection.",
- "list": false,
- "list_add_label": "Add More",
- "load_from_db": true,
- "name": "api_endpoint",
- "placeholder": "",
- "required": false,
- "show": false,
- "title_case": false,
- "tool_mode": false,
- "trace_as_metadata": true,
- "type": "str",
- "value": "ASTRA_DB_API_ENDPOINT"
- },
- "astradb_vectorstore_kwargs": {
- "_input_type": "NestedDictInput",
- "advanced": true,
- "display_name": "AstraDBVectorStore Parameters",
- "dynamic": false,
- "info": "Optional dictionary of additional parameters for the AstraDBVectorStore.",
- "list": false,
- "list_add_label": "Add More",
- "name": "astradb_vectorstore_kwargs",
- "placeholder": "",
- "required": false,
- "show": true,
- "title_case": false,
- "tool_mode": false,
- "trace_as_input": true,
- "trace_as_metadata": true,
- "type": "NestedDict",
- "value": {}
- },
- "autodetect_collection": {
- "_input_type": "BoolInput",
- "advanced": true,
- "display_name": "Autodetect Collection",
- "dynamic": false,
- "info": "Boolean flag to determine whether to autodetect the collection.",
- "list": false,
- "list_add_label": "Add More",
- "name": "autodetect_collection",
- "placeholder": "",
- "required": false,
- "show": true,
- "title_case": false,
- "tool_mode": false,
- "trace_as_metadata": true,
- "type": "bool",
- "value": true
- },
- "code": {
- "advanced": true,
- "dynamic": true,
- "fileTypes": [],
- "file_path": "",
- "info": "",
- "list": false,
- "load_from_db": false,
- "multiline": true,
- "name": "code",
- "password": false,
- "placeholder": "",
- "required": true,
- "show": true,
- "title_case": false,
- "type": "code",
- "value": "import re\nfrom collections import defaultdict\nfrom dataclasses import asdict, dataclass, field\n\nfrom astrapy import DataAPIClient, Database\nfrom astrapy.data.info.reranking import RerankServiceOptions\nfrom astrapy.info import CollectionDescriptor, CollectionLexicalOptions, CollectionRerankOptions\nfrom langchain_astradb import AstraDBVectorStore, VectorServiceOptions\nfrom langchain_astradb.utils.astradb import HybridSearchMode, _AstraDBCollectionEnvironment\nfrom langchain_core.documents import Document\n\nfrom langflow.base.vectorstores.model import LCVectorStoreComponent, check_cached_vector_store\nfrom langflow.base.vectorstores.vector_store_connection_decorator import vector_store_connection\nfrom langflow.helpers.data import docs_to_data\nfrom langflow.inputs.inputs import FloatInput, NestedDictInput\nfrom langflow.io import (\n BoolInput,\n DropdownInput,\n HandleInput,\n IntInput,\n QueryInput,\n SecretStrInput,\n StrInput,\n)\nfrom langflow.schema.data import Data\nfrom langflow.serialization import serialize\nfrom langflow.utils.version import get_version_info\n\n\n@vector_store_connection\nclass AstraDBVectorStoreComponent(LCVectorStoreComponent):\n display_name: str = \"Astra DB\"\n description: str = \"Ingest and search documents in Astra DB\"\n documentation: str = \"https://docs.datastax.com/en/langflow/astra-components.html\"\n name = \"AstraDB\"\n icon: str = \"AstraDB\"\n\n _cached_vector_store: AstraDBVectorStore | None = None\n\n @dataclass\n class NewDatabaseInput:\n functionality: str = \"create\"\n fields: dict[str, dict] = field(\n default_factory=lambda: {\n \"data\": {\n \"node\": {\n \"name\": \"create_database\",\n \"description\": \"Please allow several minutes for creation to complete.\",\n \"display_name\": \"Create new database\",\n \"field_order\": [\"01_new_database_name\", \"02_cloud_provider\", \"03_region\"],\n \"template\": {\n \"01_new_database_name\": StrInput(\n name=\"new_database_name\",\n display_name=\"Name\",\n info=\"Name of the new database to create in Astra DB.\",\n required=True,\n ),\n \"02_cloud_provider\": DropdownInput(\n name=\"cloud_provider\",\n display_name=\"Cloud provider\",\n info=\"Cloud provider for the new database.\",\n options=[],\n required=True,\n real_time_refresh=True,\n ),\n \"03_region\": DropdownInput(\n name=\"region\",\n display_name=\"Region\",\n info=\"Region for the new database.\",\n options=[],\n required=True,\n ),\n },\n },\n }\n }\n )\n\n @dataclass\n class NewCollectionInput:\n functionality: str = \"create\"\n fields: dict[str, dict] = field(\n default_factory=lambda: {\n \"data\": {\n \"node\": {\n \"name\": \"create_collection\",\n \"description\": \"Please allow several seconds for creation to complete.\",\n \"display_name\": \"Create new collection\",\n \"field_order\": [\n \"01_new_collection_name\",\n \"02_embedding_generation_provider\",\n \"03_embedding_generation_model\",\n \"04_dimension\",\n ],\n \"template\": {\n \"01_new_collection_name\": StrInput(\n name=\"new_collection_name\",\n display_name=\"Name\",\n info=\"Name of the new collection to create in Astra DB.\",\n required=True,\n ),\n \"02_embedding_generation_provider\": DropdownInput(\n name=\"embedding_generation_provider\",\n display_name=\"Embedding generation method\",\n info=\"Provider to use for generating embeddings.\",\n helper_text=(\n \"To create collections with more embedding provider options, go to \"\n 'your database in Astra DB'\n ),\n real_time_refresh=True,\n required=True,\n options=[],\n ),\n \"03_embedding_generation_model\": DropdownInput(\n name=\"embedding_generation_model\",\n display_name=\"Embedding model\",\n info=\"Model to use for generating embeddings.\",\n real_time_refresh=True,\n options=[],\n ),\n \"04_dimension\": IntInput(\n name=\"dimension\",\n display_name=\"Dimensions\",\n info=\"Dimensions of the embeddings to generate.\",\n value=None,\n ),\n },\n },\n }\n }\n )\n\n inputs = [\n SecretStrInput(\n name=\"token\",\n display_name=\"Astra DB Application Token\",\n info=\"Authentication token for accessing Astra DB.\",\n value=\"ASTRA_DB_APPLICATION_TOKEN\",\n required=True,\n real_time_refresh=True,\n input_types=[],\n ),\n DropdownInput(\n name=\"environment\",\n display_name=\"Environment\",\n info=\"The environment for the Astra DB API Endpoint.\",\n options=[\"prod\", \"test\", \"dev\"],\n value=\"prod\",\n advanced=True,\n real_time_refresh=True,\n combobox=True,\n ),\n DropdownInput(\n name=\"database_name\",\n display_name=\"Database\",\n info=\"The Database name for the Astra DB instance.\",\n required=True,\n refresh_button=True,\n real_time_refresh=True,\n dialog_inputs=asdict(NewDatabaseInput()),\n combobox=True,\n ),\n DropdownInput(\n name=\"api_endpoint\",\n display_name=\"Astra DB API Endpoint\",\n info=\"The API Endpoint for the Astra DB instance. Supercedes database selection.\",\n advanced=True,\n ),\n DropdownInput(\n name=\"keyspace\",\n display_name=\"Keyspace\",\n info=\"Optional keyspace within Astra DB to use for the collection.\",\n advanced=True,\n options=[],\n real_time_refresh=True,\n ),\n DropdownInput(\n name=\"collection_name\",\n display_name=\"Collection\",\n info=\"The name of the collection within Astra DB where the vectors will be stored.\",\n required=True,\n refresh_button=True,\n real_time_refresh=True,\n dialog_inputs=asdict(NewCollectionInput()),\n combobox=True,\n show=False,\n ),\n HandleInput(\n name=\"embedding_model\",\n display_name=\"Embedding Model\",\n input_types=[\"Embeddings\"],\n info=\"Specify the Embedding Model. Not required for Astra Vectorize collections.\",\n required=False,\n show=False,\n ),\n *LCVectorStoreComponent.inputs,\n DropdownInput(\n name=\"search_method\",\n display_name=\"Search Method\",\n info=(\n \"Determine how your content is matched: Vector finds semantic similarity, \"\n \"and Hybrid Search (suggested) combines both approaches \"\n \"with a reranker.\"\n ),\n options=[\"Hybrid Search\", \"Vector Search\"], # TODO: Restore Lexical Search?\n options_metadata=[{\"icon\": \"SearchHybrid\"}, {\"icon\": \"SearchVector\"}],\n value=\"Vector Search\",\n advanced=True,\n real_time_refresh=True,\n ),\n DropdownInput(\n name=\"reranker\",\n display_name=\"Reranker\",\n info=\"Post-retrieval model that re-scores results for optimal relevance ranking.\",\n show=False,\n toggle=True,\n ),\n QueryInput(\n name=\"lexical_terms\",\n display_name=\"Lexical Terms\",\n info=\"Add additional terms/keywords to augment search precision.\",\n placeholder=\"Enter terms to search...\",\n separator=\" \",\n show=False,\n value=\"\",\n ),\n IntInput(\n name=\"number_of_results\",\n display_name=\"Number of Search Results\",\n info=\"Number of search results to return.\",\n advanced=True,\n value=4,\n ),\n DropdownInput(\n name=\"search_type\",\n display_name=\"Search Type\",\n info=\"Search type to use\",\n options=[\"Similarity\", \"Similarity with score threshold\", \"MMR (Max Marginal Relevance)\"],\n value=\"Similarity\",\n advanced=True,\n ),\n FloatInput(\n name=\"search_score_threshold\",\n display_name=\"Search Score Threshold\",\n info=\"Minimum similarity score threshold for search results. \"\n \"(when using 'Similarity with score threshold')\",\n value=0,\n advanced=True,\n ),\n NestedDictInput(\n name=\"advanced_search_filter\",\n display_name=\"Search Metadata Filter\",\n info=\"Optional dictionary of filters to apply to the search query.\",\n advanced=True,\n ),\n BoolInput(\n name=\"autodetect_collection\",\n display_name=\"Autodetect Collection\",\n info=\"Boolean flag to determine whether to autodetect the collection.\",\n advanced=True,\n value=True,\n ),\n StrInput(\n name=\"content_field\",\n display_name=\"Content Field\",\n info=\"Field to use as the text content field for the vector store.\",\n advanced=True,\n ),\n StrInput(\n name=\"deletion_field\",\n display_name=\"Deletion Based On Field\",\n info=\"When this parameter is provided, documents in the target collection with \"\n \"metadata field values matching the input metadata field value will be deleted \"\n \"before new data is loaded.\",\n advanced=True,\n ),\n BoolInput(\n name=\"ignore_invalid_documents\",\n display_name=\"Ignore Invalid Documents\",\n info=\"Boolean flag to determine whether to ignore invalid documents at runtime.\",\n advanced=True,\n ),\n NestedDictInput(\n name=\"astradb_vectorstore_kwargs\",\n display_name=\"AstraDBVectorStore Parameters\",\n info=\"Optional dictionary of additional parameters for the AstraDBVectorStore.\",\n advanced=True,\n ),\n ]\n\n @classmethod\n def map_cloud_providers(cls):\n # TODO: Programmatically fetch the regions for each cloud provider\n return {\n \"dev\": {\n \"Amazon Web Services\": {\n \"id\": \"aws\",\n \"regions\": [\"us-west-2\"],\n },\n \"Google Cloud Platform\": {\n \"id\": \"gcp\",\n \"regions\": [\"us-central1\", \"europe-west4\"],\n },\n },\n \"test\": {\n \"Google Cloud Platform\": {\n \"id\": \"gcp\",\n \"regions\": [\"us-central1\"],\n },\n },\n \"prod\": {\n \"Amazon Web Services\": {\n \"id\": \"aws\",\n \"regions\": [\"us-east-2\", \"ap-south-1\", \"eu-west-1\"],\n },\n \"Google Cloud Platform\": {\n \"id\": \"gcp\",\n \"regions\": [\"us-east1\"],\n },\n \"Microsoft Azure\": {\n \"id\": \"azure\",\n \"regions\": [\"westus3\"],\n },\n },\n }\n\n @classmethod\n def get_vectorize_providers(cls, token: str, environment: str | None = None, api_endpoint: str | None = None):\n try:\n # Get the admin object\n client = DataAPIClient(environment=environment)\n admin_client = client.get_admin()\n db_admin = admin_client.get_database_admin(api_endpoint, token=token)\n\n # Get the list of embedding providers\n embedding_providers = db_admin.find_embedding_providers()\n\n vectorize_providers_mapping = {}\n # Map the provider display name to the provider key and models\n for provider_key, provider_data in embedding_providers.embedding_providers.items():\n # Get the provider display name and models\n display_name = provider_data.display_name\n models = [model.name for model in provider_data.models]\n\n # Build our mapping\n vectorize_providers_mapping[display_name] = [provider_key, models]\n\n # Sort the resulting dictionary\n return defaultdict(list, dict(sorted(vectorize_providers_mapping.items())))\n except Exception as _: # noqa: BLE001\n return {}\n\n @classmethod\n async def create_database_api(\n cls,\n new_database_name: str,\n cloud_provider: str,\n region: str,\n token: str,\n environment: str | None = None,\n keyspace: str | None = None,\n ):\n client = DataAPIClient(environment=environment)\n\n # Get the admin object\n admin_client = client.get_admin(token=token)\n\n # Get the environment, set to prod if null like\n my_env = environment or \"prod\"\n\n # Raise a value error if name isn't provided\n if not new_database_name:\n msg = \"Database name is required to create a new database.\"\n raise ValueError(msg)\n\n # Call the create database function\n return await admin_client.async_create_database(\n name=new_database_name,\n cloud_provider=cls.map_cloud_providers()[my_env][cloud_provider][\"id\"],\n region=region,\n keyspace=keyspace,\n wait_until_active=False,\n )\n\n @classmethod\n async def create_collection_api(\n cls,\n new_collection_name: str,\n token: str,\n api_endpoint: str,\n environment: str | None = None,\n keyspace: str | None = None,\n dimension: int | None = None,\n embedding_generation_provider: str | None = None,\n embedding_generation_model: str | None = None,\n reranker: str | None = None,\n ):\n # Build vectorize options, if needed\n vectorize_options = None\n if not dimension:\n providers = cls.get_vectorize_providers(token=token, environment=environment, api_endpoint=api_endpoint)\n vectorize_options = VectorServiceOptions(\n provider=providers.get(embedding_generation_provider, [None, []])[0],\n model_name=embedding_generation_model,\n )\n\n # Raise a value error if name isn't provided\n if not new_collection_name:\n msg = \"Collection name is required to create a new collection.\"\n raise ValueError(msg)\n\n # Define the base arguments being passed to the create collection function\n base_args = {\n \"collection_name\": new_collection_name,\n \"token\": token,\n \"api_endpoint\": api_endpoint,\n \"keyspace\": keyspace,\n \"environment\": environment,\n \"embedding_dimension\": dimension,\n \"collection_vector_service_options\": vectorize_options,\n }\n\n # Add optional arguments if the reranker is set\n if reranker:\n # Split the reranker field into a provider a model name\n provider, _ = reranker.split(\"/\")\n base_args[\"collection_rerank\"] = CollectionRerankOptions(\n service=RerankServiceOptions(provider=provider, model_name=reranker),\n )\n base_args[\"collection_lexical\"] = CollectionLexicalOptions(analyzer=\"STANDARD\")\n\n _AstraDBCollectionEnvironment(**base_args)\n\n @classmethod\n def get_database_list_static(cls, token: str, environment: str | None = None):\n client = DataAPIClient(environment=environment)\n\n # Get the admin object\n admin_client = client.get_admin(token=token)\n\n # Get the list of databases\n db_list = admin_client.list_databases()\n\n # Generate the api endpoint for each database\n db_info_dict = {}\n for db in db_list:\n try:\n # Get the API endpoint for the database\n api_endpoints = [db_reg.api_endpoint for db_reg in db.regions]\n\n # Get the number of collections\n try:\n # Get the number of collections in the database\n num_collections = len(\n client.get_database(\n api_endpoints[0],\n token=token,\n ).list_collection_names()\n )\n except Exception: # noqa: BLE001\n if db.status != \"PENDING\":\n continue\n num_collections = 0\n\n # Add the database to the dictionary\n db_info_dict[db.name] = {\n \"api_endpoints\": api_endpoints,\n \"keyspaces\": db.keyspaces,\n \"collections\": num_collections,\n \"status\": db.status if db.status != \"ACTIVE\" else None,\n \"org_id\": db.org_id if db.org_id else None,\n }\n except Exception: # noqa: BLE001, S110\n pass\n\n return db_info_dict\n\n def get_database_list(self):\n return self.get_database_list_static(\n token=self.token,\n environment=self.environment,\n )\n\n @classmethod\n def get_api_endpoint_static(\n cls,\n token: str,\n environment: str | None = None,\n api_endpoint: str | None = None,\n database_name: str | None = None,\n ):\n # If the api_endpoint is set, return it\n if api_endpoint:\n return api_endpoint\n\n # Check if the database_name is like a url\n if database_name and database_name.startswith(\"https://\"):\n return database_name\n\n # If the database is not set, nothing we can do.\n if not database_name:\n return None\n\n # Grab the database object\n db = cls.get_database_list_static(token=token, environment=environment).get(database_name)\n if not db:\n return None\n\n # Otherwise, get the URL from the database list\n endpoints = db.get(\"api_endpoints\") or []\n return endpoints[0] if endpoints else None\n\n def get_api_endpoint(self):\n return self.get_api_endpoint_static(\n token=self.token,\n environment=self.environment,\n api_endpoint=self.api_endpoint,\n database_name=self.database_name,\n )\n\n @classmethod\n def get_database_id_static(cls, api_endpoint: str) -> str | None:\n # Pattern matches standard UUID format: 8-4-4-4-12 hexadecimal characters\n uuid_pattern = r\"[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}\"\n match = re.search(uuid_pattern, api_endpoint)\n\n return match.group(0) if match else None\n\n def get_database_id(self):\n return self.get_database_id_static(api_endpoint=self.get_api_endpoint())\n\n def get_keyspace(self):\n keyspace = self.keyspace\n\n if keyspace:\n return keyspace.strip()\n\n return \"default_keyspace\"\n\n def get_database_object(self, api_endpoint: str | None = None):\n try:\n client = DataAPIClient(environment=self.environment)\n\n return client.get_database(\n api_endpoint or self.get_api_endpoint(),\n token=self.token,\n keyspace=self.get_keyspace(),\n )\n except Exception as e:\n msg = f\"Error fetching database object: {e}\"\n raise ValueError(msg) from e\n\n def collection_data(self, collection_name: str, database: Database | None = None):\n try:\n if not database:\n client = DataAPIClient(environment=self.environment)\n\n database = client.get_database(\n self.get_api_endpoint(),\n token=self.token,\n keyspace=self.get_keyspace(),\n )\n\n collection = database.get_collection(collection_name)\n\n return collection.estimated_document_count()\n except Exception as e: # noqa: BLE001\n self.log(f\"Error checking collection data: {e}\")\n\n return None\n\n def _initialize_database_options(self):\n try:\n return [\n {\n \"name\": name,\n \"status\": info[\"status\"],\n \"collections\": info[\"collections\"],\n \"api_endpoints\": info[\"api_endpoints\"],\n \"keyspaces\": info[\"keyspaces\"],\n \"org_id\": info[\"org_id\"],\n }\n for name, info in self.get_database_list().items()\n ]\n except Exception as e:\n msg = f\"Error fetching database options: {e}\"\n raise ValueError(msg) from e\n\n @classmethod\n def get_provider_icon(cls, collection: CollectionDescriptor | None = None, provider_name: str | None = None) -> str:\n # Get the provider name from the collection\n provider_name = provider_name or (\n collection.definition.vector.service.provider\n if (\n collection\n and collection.definition\n and collection.definition.vector\n and collection.definition.vector.service\n )\n else None\n )\n\n # If there is no provider, use the vector store icon\n if not provider_name or provider_name.lower() == \"bring your own\":\n return \"vectorstores\"\n\n # Map provider casings\n case_map = {\n \"nvidia\": \"NVIDIA\",\n \"openai\": \"OpenAI\",\n \"amazon bedrock\": \"AmazonBedrockEmbeddings\",\n \"azure openai\": \"AzureOpenAiEmbeddings\",\n \"cohere\": \"Cohere\",\n \"jina ai\": \"JinaAI\",\n \"mistral ai\": \"MistralAI\",\n \"upstage\": \"Upstage\",\n \"voyage ai\": \"VoyageAI\",\n }\n\n # Adjust the casing on some like nvidia\n return case_map[provider_name.lower()] if provider_name.lower() in case_map else provider_name.title()\n\n def _initialize_collection_options(self, api_endpoint: str | None = None):\n # Nothing to generate if we don't have an API endpoint yet\n api_endpoint = api_endpoint or self.get_api_endpoint()\n if not api_endpoint:\n return []\n\n # Retrieve the database object\n database = self.get_database_object(api_endpoint=api_endpoint)\n\n # Get the list of collections\n collection_list = database.list_collections(keyspace=self.get_keyspace())\n\n # Return the list of collections and metadata associated\n return [\n {\n \"name\": col.name,\n \"records\": self.collection_data(collection_name=col.name, database=database),\n \"provider\": (\n col.definition.vector.service.provider\n if col.definition.vector and col.definition.vector.service\n else None\n ),\n \"icon\": self.get_provider_icon(collection=col),\n \"model\": (\n col.definition.vector.service.model_name\n if col.definition.vector and col.definition.vector.service\n else None\n ),\n }\n for col in collection_list\n ]\n\n def reset_provider_options(self, build_config: dict) -> dict:\n \"\"\"Reset provider options and related configurations in the build_config dictionary.\"\"\"\n # Extract template path for cleaner access\n template = build_config[\"collection_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"]\n\n # Get vectorize providers\n vectorize_providers_api = self.get_vectorize_providers(\n token=self.token,\n environment=self.environment,\n api_endpoint=build_config[\"api_endpoint\"][\"value\"],\n )\n\n # Create a new dictionary with \"Bring your own\" first\n vectorize_providers: dict[str, list[list[str]]] = {\"Bring your own\": [[], []]}\n\n # Add the remaining items (only Nvidia) from the original dictionary\n vectorize_providers.update(\n {\n k: v\n for k, v in vectorize_providers_api.items()\n if k.lower() in [\"nvidia\"] # TODO: Eventually support more\n }\n )\n\n # Set provider options\n provider_field = \"02_embedding_generation_provider\"\n template[provider_field][\"options\"] = list(vectorize_providers.keys())\n\n # Add metadata for each provider option\n template[provider_field][\"options_metadata\"] = [\n {\"icon\": self.get_provider_icon(provider_name=provider)} for provider in template[provider_field][\"options\"]\n ]\n\n # Get selected embedding provider\n embedding_provider = template[provider_field][\"value\"]\n is_bring_your_own = embedding_provider and embedding_provider == \"Bring your own\"\n\n # Configure embedding model field\n model_field = \"03_embedding_generation_model\"\n template[model_field].update(\n {\n \"options\": vectorize_providers.get(embedding_provider, [[], []])[1],\n \"placeholder\": \"Bring your own\" if is_bring_your_own else None,\n \"readonly\": is_bring_your_own,\n \"required\": not is_bring_your_own,\n \"value\": None,\n }\n )\n\n # If this is a bring your own, set dimensions to 0\n return self.reset_dimension_field(build_config)\n\n def reset_dimension_field(self, build_config: dict) -> dict:\n \"\"\"Reset dimension field options based on provided configuration.\"\"\"\n # Extract template path for cleaner access\n template = build_config[\"collection_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"]\n\n # Get selected embedding model\n provider_field = \"02_embedding_generation_provider\"\n embedding_provider = template[provider_field][\"value\"]\n is_bring_your_own = embedding_provider and embedding_provider == \"Bring your own\"\n\n # Configure dimension field\n dimension_field = \"04_dimension\"\n dimension_value = 1024 if not is_bring_your_own else None # TODO: Dynamically figure this out\n template[dimension_field].update(\n {\n \"placeholder\": dimension_value,\n \"value\": dimension_value,\n \"readonly\": not is_bring_your_own,\n \"required\": is_bring_your_own,\n }\n )\n\n return build_config\n\n def reset_collection_list(self, build_config: dict) -> dict:\n \"\"\"Reset collection list options based on provided configuration.\"\"\"\n # Get collection options\n collection_options = self._initialize_collection_options(api_endpoint=build_config[\"api_endpoint\"][\"value\"])\n # Update collection configuration\n collection_config = build_config[\"collection_name\"]\n collection_config.update(\n {\n \"options\": [col[\"name\"] for col in collection_options],\n \"options_metadata\": [{k: v for k, v in col.items() if k != \"name\"} for col in collection_options],\n }\n )\n\n # Reset selected collection if not in options\n if collection_config[\"value\"] not in collection_config[\"options\"]:\n collection_config[\"value\"] = \"\"\n\n # Set advanced status based on database selection\n collection_config[\"show\"] = bool(build_config[\"database_name\"][\"value\"])\n\n return build_config\n\n def reset_database_list(self, build_config: dict) -> dict:\n \"\"\"Reset database list options and related configurations.\"\"\"\n # Get database options\n database_options = self._initialize_database_options()\n\n # Update cloud provider options\n env = self.environment\n template = build_config[\"database_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"]\n template[\"02_cloud_provider\"][\"options\"] = list(self.map_cloud_providers()[env].keys())\n\n # Update database configuration\n database_config = build_config[\"database_name\"]\n database_config.update(\n {\n \"options\": [db[\"name\"] for db in database_options],\n \"options_metadata\": [{k: v for k, v in db.items() if k != \"name\"} for db in database_options],\n }\n )\n\n # Reset selections if value not in options\n if database_config[\"value\"] not in database_config[\"options\"]:\n database_config[\"value\"] = \"\"\n build_config[\"api_endpoint\"][\"options\"] = []\n build_config[\"api_endpoint\"][\"value\"] = \"\"\n build_config[\"collection_name\"][\"show\"] = False\n\n # Set advanced status based on token presence\n database_config[\"show\"] = bool(build_config[\"token\"][\"value\"])\n\n return build_config\n\n def reset_build_config(self, build_config: dict) -> dict:\n \"\"\"Reset all build configuration options to default empty state.\"\"\"\n # Reset database configuration\n database_config = build_config[\"database_name\"]\n database_config.update({\"options\": [], \"options_metadata\": [], \"value\": \"\", \"show\": False})\n build_config[\"api_endpoint\"][\"options\"] = []\n build_config[\"api_endpoint\"][\"value\"] = \"\"\n\n # Reset collection configuration\n collection_config = build_config[\"collection_name\"]\n collection_config.update({\"options\": [], \"options_metadata\": [], \"value\": \"\", \"show\": False})\n\n return build_config\n\n def _handle_hybrid_search_options(self, build_config: dict) -> dict:\n \"\"\"Set hybrid search options in the build configuration.\"\"\"\n # Detect what hybrid options are available\n # Get the admin object\n client = DataAPIClient(environment=self.environment)\n admin_client = client.get_admin()\n db_admin = admin_client.get_database_admin(self.get_api_endpoint(), token=self.token)\n\n # We will try to get the reranking providers to see if its hybrid emabled\n try:\n providers = db_admin.find_reranking_providers()\n build_config[\"reranker\"][\"options\"] = [\n model.name for provider_data in providers.reranking_providers.values() for model in provider_data.models\n ]\n build_config[\"reranker\"][\"options_metadata\"] = [\n {\"icon\": self.get_provider_icon(provider_name=model.name.split(\"/\")[0])}\n for provider in providers.reranking_providers.values()\n for model in provider.models\n ]\n build_config[\"reranker\"][\"value\"] = build_config[\"reranker\"][\"options\"][0]\n\n # Set the default search field to hybrid search\n build_config[\"search_method\"][\"show\"] = True\n build_config[\"search_method\"][\"options\"] = [\"Hybrid Search\", \"Vector Search\"]\n build_config[\"search_method\"][\"value\"] = \"Hybrid Search\"\n except Exception as _: # noqa: BLE001\n build_config[\"reranker\"][\"options\"] = []\n build_config[\"reranker\"][\"options_metadata\"] = []\n\n # Set the default search field to vector search\n build_config[\"search_method\"][\"show\"] = False\n build_config[\"search_method\"][\"options\"] = [\"Vector Search\"]\n build_config[\"search_method\"][\"value\"] = \"Vector Search\"\n\n return build_config\n\n async def update_build_config(self, build_config: dict, field_value: str, field_name: str | None = None) -> dict:\n \"\"\"Update build configuration based on field name and value.\"\"\"\n # Early return if no token provided\n if not self.token:\n return self.reset_build_config(build_config)\n\n # Database creation callback\n if field_name == \"database_name\" and isinstance(field_value, dict):\n if \"01_new_database_name\" in field_value:\n await self._create_new_database(build_config, field_value)\n return self.reset_collection_list(build_config)\n return self._update_cloud_regions(build_config, field_value)\n\n # Collection creation callback\n if field_name == \"collection_name\" and isinstance(field_value, dict):\n # Case 1: New collection creation\n if \"01_new_collection_name\" in field_value:\n await self._create_new_collection(build_config, field_value)\n return build_config\n\n # Case 2: Update embedding provider options\n if \"02_embedding_generation_provider\" in field_value:\n return self.reset_provider_options(build_config)\n\n # Case 3: Update dimension field\n if \"03_embedding_generation_model\" in field_value:\n return self.reset_dimension_field(build_config)\n\n # Initial execution or token/environment change\n first_run = field_name == \"collection_name\" and not field_value and not build_config[\"database_name\"][\"options\"]\n if first_run or field_name in {\"token\", \"environment\"}:\n return self.reset_database_list(build_config)\n\n # Database selection change\n if field_name == \"database_name\" and not isinstance(field_value, dict):\n return self._handle_database_selection(build_config, field_value)\n\n # Keyspace selection change\n if field_name == \"keyspace\":\n return self.reset_collection_list(build_config)\n\n # Collection selection change\n if field_name == \"collection_name\" and not isinstance(field_value, dict):\n return self._handle_collection_selection(build_config, field_value)\n\n # Search method selection change\n if field_name == \"search_method\":\n is_vector_search = field_value == \"Vector Search\"\n is_autodetect = build_config[\"autodetect_collection\"][\"value\"]\n\n # Configure lexical terms (same for both cases)\n build_config[\"lexical_terms\"][\"show\"] = not is_vector_search\n build_config[\"lexical_terms\"][\"value\"] = \"\" if is_vector_search else build_config[\"lexical_terms\"][\"value\"]\n\n # Disable reranker disabling if hybrid search is selected\n build_config[\"reranker\"][\"show\"] = not is_vector_search\n build_config[\"reranker\"][\"toggle_disable\"] = not is_vector_search\n build_config[\"reranker\"][\"toggle_value\"] = True\n build_config[\"reranker\"][\"value\"] = build_config[\"reranker\"][\"options\"][0]\n\n # Toggle search type and score threshold based on search method\n build_config[\"search_type\"][\"show\"] = is_vector_search\n build_config[\"search_score_threshold\"][\"show\"] = is_vector_search\n\n # Make sure the search_type is set to \"Similarity\"\n if not is_vector_search or is_autodetect:\n build_config[\"search_type\"][\"value\"] = \"Similarity\"\n\n return build_config\n\n async def _create_new_database(self, build_config: dict, field_value: dict) -> None:\n \"\"\"Create a new database and update build config options.\"\"\"\n try:\n await self.create_database_api(\n new_database_name=field_value[\"01_new_database_name\"],\n token=self.token,\n keyspace=self.get_keyspace(),\n environment=self.environment,\n cloud_provider=field_value[\"02_cloud_provider\"],\n region=field_value[\"03_region\"],\n )\n except Exception as e:\n msg = f\"Error creating database: {e}\"\n raise ValueError(msg) from e\n\n build_config[\"database_name\"][\"options\"].append(field_value[\"01_new_database_name\"])\n build_config[\"database_name\"][\"options_metadata\"].append(\n {\n \"status\": \"PENDING\",\n \"collections\": 0,\n \"api_endpoints\": [],\n \"keyspaces\": [self.get_keyspace()],\n \"org_id\": None,\n }\n )\n\n def _update_cloud_regions(self, build_config: dict, field_value: dict) -> dict:\n \"\"\"Update cloud provider regions in build config.\"\"\"\n env = self.environment\n cloud_provider = field_value[\"02_cloud_provider\"]\n\n # Update the region options based on the selected cloud provider\n template = build_config[\"database_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"]\n template[\"03_region\"][\"options\"] = self.map_cloud_providers()[env][cloud_provider][\"regions\"]\n\n # Reset the the 03_region value if it's not in the new options\n if template[\"03_region\"][\"value\"] not in template[\"03_region\"][\"options\"]:\n template[\"03_region\"][\"value\"] = None\n\n return build_config\n\n async def _create_new_collection(self, build_config: dict, field_value: dict) -> None:\n \"\"\"Create a new collection and update build config options.\"\"\"\n embedding_provider = field_value.get(\"02_embedding_generation_provider\")\n try:\n await self.create_collection_api(\n new_collection_name=field_value[\"01_new_collection_name\"],\n token=self.token,\n api_endpoint=build_config[\"api_endpoint\"][\"value\"],\n environment=self.environment,\n keyspace=self.get_keyspace(),\n dimension=field_value.get(\"04_dimension\") if embedding_provider == \"Bring your own\" else None,\n embedding_generation_provider=embedding_provider,\n embedding_generation_model=field_value.get(\"03_embedding_generation_model\"),\n reranker=self.reranker,\n )\n except Exception as e:\n msg = f\"Error creating collection: {e}\"\n raise ValueError(msg) from e\n\n provider = embedding_provider.lower() if embedding_provider and embedding_provider != \"Bring your own\" else None\n build_config[\"collection_name\"].update(\n {\n \"value\": field_value[\"01_new_collection_name\"],\n \"options\": build_config[\"collection_name\"][\"options\"] + [field_value[\"01_new_collection_name\"]],\n }\n )\n build_config[\"embedding_model\"][\"show\"] = not bool(provider)\n build_config[\"embedding_model\"][\"required\"] = not bool(provider)\n build_config[\"collection_name\"][\"options_metadata\"].append(\n {\n \"records\": 0,\n \"provider\": provider,\n \"icon\": self.get_provider_icon(provider_name=provider),\n \"model\": field_value.get(\"03_embedding_generation_model\"),\n }\n )\n\n # Make sure we always show the reranker options if the collection is hybrid enabled\n # And right now they always are\n build_config[\"lexical_terms\"][\"show\"] = True\n\n def _handle_database_selection(self, build_config: dict, field_value: str) -> dict:\n \"\"\"Handle database selection and update related configurations.\"\"\"\n build_config = self.reset_database_list(build_config)\n\n # Reset collection list if database selection changes\n if field_value not in build_config[\"database_name\"][\"options\"]:\n build_config[\"database_name\"][\"value\"] = \"\"\n return build_config\n\n # Get the api endpoint for the selected database\n index = build_config[\"database_name\"][\"options\"].index(field_value)\n build_config[\"api_endpoint\"][\"options\"] = build_config[\"database_name\"][\"options_metadata\"][index][\n \"api_endpoints\"\n ]\n build_config[\"api_endpoint\"][\"value\"] = build_config[\"database_name\"][\"options_metadata\"][index][\n \"api_endpoints\"\n ][0]\n\n # Get the org_id for the selected database\n org_id = build_config[\"database_name\"][\"options_metadata\"][index][\"org_id\"]\n if not org_id:\n return build_config\n\n # Update the list of keyspaces based on the db info\n build_config[\"keyspace\"][\"options\"] = build_config[\"database_name\"][\"options_metadata\"][index][\"keyspaces\"]\n build_config[\"keyspace\"][\"value\"] = (\n build_config[\"keyspace\"][\"options\"] and build_config[\"keyspace\"][\"options\"][0]\n if build_config[\"keyspace\"][\"value\"] not in build_config[\"keyspace\"][\"options\"]\n else build_config[\"keyspace\"][\"value\"]\n )\n\n # Get the database id for the selected database\n db_id = self.get_database_id_static(api_endpoint=build_config[\"api_endpoint\"][\"value\"])\n keyspace = self.get_keyspace()\n\n # Update the helper text for the embedding provider field\n template = build_config[\"collection_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"]\n template[\"02_embedding_generation_provider\"][\"helper_text\"] = (\n \"To create collections with more embedding provider options, go to \"\n f''\n \"your database in Astra DB.\"\n )\n\n # Reset provider options\n build_config = self.reset_provider_options(build_config)\n\n # Handle hybrid search options\n build_config = self._handle_hybrid_search_options(build_config)\n\n return self.reset_collection_list(build_config)\n\n def _handle_collection_selection(self, build_config: dict, field_value: str) -> dict:\n \"\"\"Handle collection selection and update embedding options.\"\"\"\n build_config[\"autodetect_collection\"][\"value\"] = True\n build_config = self.reset_collection_list(build_config)\n\n # Reset embedding model if collection selection changes\n if field_value and field_value not in build_config[\"collection_name\"][\"options\"]:\n build_config[\"collection_name\"][\"options\"].append(field_value)\n build_config[\"collection_name\"][\"options_metadata\"].append(\n {\n \"records\": 0,\n \"provider\": None,\n \"icon\": \"vectorstores\",\n \"model\": None,\n }\n )\n build_config[\"autodetect_collection\"][\"value\"] = False\n\n if not field_value:\n return build_config\n\n # Get the selected collection index\n index = build_config[\"collection_name\"][\"options\"].index(field_value)\n\n # Set the provider of the selected collection\n provider = build_config[\"collection_name\"][\"options_metadata\"][index][\"provider\"]\n build_config[\"embedding_model\"][\"show\"] = not bool(provider)\n build_config[\"embedding_model\"][\"required\"] = not bool(provider)\n\n # Grab the collection object\n database = self.get_database_object(api_endpoint=build_config[\"api_endpoint\"][\"value\"])\n collection = database.get_collection(\n name=field_value,\n keyspace=build_config[\"keyspace\"][\"value\"],\n )\n\n # Check if hybrid and lexical are enabled\n col_options = collection.options()\n hyb_enabled = col_options.rerank and col_options.rerank.enabled\n lex_enabled = col_options.lexical and col_options.lexical.enabled\n user_hyb_enabled = build_config[\"search_method\"][\"value\"] == \"Hybrid Search\"\n\n # Reranker visible when both the collection supports it and the user selected Hybrid\n hybrid_active = bool(hyb_enabled and user_hyb_enabled)\n build_config[\"reranker\"][\"show\"] = hybrid_active\n build_config[\"reranker\"][\"toggle_value\"] = hybrid_active\n build_config[\"reranker\"][\"toggle_disable\"] = False # allow user to toggle if visible\n\n # If hybrid is active, lock search_type to \"Similarity\"\n if hybrid_active:\n build_config[\"search_type\"][\"value\"] = \"Similarity\"\n\n # Show the lexical terms option only if the collection enables lexical search\n build_config[\"lexical_terms\"][\"show\"] = bool(lex_enabled)\n\n return build_config\n\n @check_cached_vector_store\n def build_vector_store(self):\n try:\n from langchain_astradb import AstraDBVectorStore\n except ImportError as e:\n msg = (\n \"Could not import langchain Astra DB integration package. \"\n \"Please install it with `pip install langchain-astradb`.\"\n )\n raise ImportError(msg) from e\n\n # Get the embedding model and additional params\n embedding_params = {\"embedding\": self.embedding_model} if self.embedding_model else {}\n\n # Get the additional parameters\n additional_params = self.astradb_vectorstore_kwargs or {}\n\n # Get Langflow version and platform information\n __version__ = get_version_info()[\"version\"]\n langflow_prefix = \"\"\n # if os.getenv(\"AWS_EXECUTION_ENV\") == \"AWS_ECS_FARGATE\": # TODO: More precise way of detecting\n # langflow_prefix = \"ds-\"\n\n # Get the database object\n database = self.get_database_object()\n autodetect = self.collection_name in database.list_collection_names() and self.autodetect_collection\n\n # Bundle up the auto-detect parameters\n autodetect_params = {\n \"autodetect_collection\": autodetect,\n \"content_field\": (\n self.content_field\n if self.content_field and embedding_params\n else (\n \"page_content\"\n if embedding_params\n and self.collection_data(collection_name=self.collection_name, database=database) == 0\n else None\n )\n ),\n \"ignore_invalid_documents\": self.ignore_invalid_documents,\n }\n\n # Choose HybridSearchMode based on the selected param\n hybrid_search_mode = HybridSearchMode.DEFAULT if self.search_method == \"Hybrid Search\" else HybridSearchMode.OFF\n\n # Attempt to build the Vector Store object\n try:\n vector_store = AstraDBVectorStore(\n # Astra DB Authentication Parameters\n token=self.token,\n api_endpoint=database.api_endpoint,\n namespace=database.keyspace,\n collection_name=self.collection_name,\n environment=self.environment,\n # Hybrid Search Parameters\n hybrid_search=hybrid_search_mode,\n # Astra DB Usage Tracking Parameters\n ext_callers=[(f\"{langflow_prefix}langflow\", __version__)],\n # Astra DB Vector Store Parameters\n **autodetect_params,\n **embedding_params,\n **additional_params,\n )\n except Exception as e:\n msg = f\"Error initializing AstraDBVectorStore: {e}\"\n raise ValueError(msg) from e\n\n # Add documents to the vector store\n self._add_documents_to_vector_store(vector_store)\n\n return vector_store\n\n def _add_documents_to_vector_store(self, vector_store) -> None:\n self.ingest_data = self._prepare_ingest_data()\n\n documents = []\n for _input in self.ingest_data or []:\n if isinstance(_input, Data):\n documents.append(_input.to_lc_document())\n else:\n msg = \"Vector Store Inputs must be Data objects.\"\n raise TypeError(msg)\n\n documents = [\n Document(page_content=doc.page_content, metadata=serialize(doc.metadata, to_str=True)) for doc in documents\n ]\n\n if documents and self.deletion_field:\n self.log(f\"Deleting documents where {self.deletion_field}\")\n try:\n database = self.get_database_object()\n collection = database.get_collection(self.collection_name, keyspace=database.keyspace)\n delete_values = list({doc.metadata[self.deletion_field] for doc in documents})\n self.log(f\"Deleting documents where {self.deletion_field} matches {delete_values}.\")\n collection.delete_many({f\"metadata.{self.deletion_field}\": {\"$in\": delete_values}})\n except Exception as e:\n msg = f\"Error deleting documents from AstraDBVectorStore based on '{self.deletion_field}': {e}\"\n raise ValueError(msg) from e\n\n if documents:\n self.log(f\"Adding {len(documents)} documents to the Vector Store.\")\n try:\n vector_store.add_documents(documents)\n except Exception as e:\n msg = f\"Error adding documents to AstraDBVectorStore: {e}\"\n raise ValueError(msg) from e\n else:\n self.log(\"No documents to add to the Vector Store.\")\n\n def _map_search_type(self) -> str:\n search_type_mapping = {\n \"Similarity with score threshold\": \"similarity_score_threshold\",\n \"MMR (Max Marginal Relevance)\": \"mmr\",\n }\n\n return search_type_mapping.get(self.search_type, \"similarity\")\n\n def _build_search_args(self):\n # Clean up the search query\n query = self.search_query if isinstance(self.search_query, str) and self.search_query.strip() else None\n lexical_terms = self.lexical_terms or None\n\n # Check if we have a search query, and if so set the args\n if query:\n args = {\n \"query\": query,\n \"search_type\": self._map_search_type(),\n \"k\": self.number_of_results,\n \"score_threshold\": self.search_score_threshold,\n \"lexical_query\": lexical_terms,\n }\n elif self.advanced_search_filter:\n args = {\n \"n\": self.number_of_results,\n }\n else:\n return {}\n\n filter_arg = self.advanced_search_filter or {}\n if filter_arg:\n args[\"filter\"] = filter_arg\n\n return args\n\n def search_documents(self, vector_store=None) -> list[Data]:\n vector_store = vector_store or self.build_vector_store()\n\n self.log(f\"Search input: {self.search_query}\")\n self.log(f\"Search type: {self.search_type}\")\n self.log(f\"Number of results: {self.number_of_results}\")\n self.log(f\"store.hybrid_search: {vector_store.hybrid_search}\")\n self.log(f\"Lexical terms: {self.lexical_terms}\")\n self.log(f\"Reranker: {self.reranker}\")\n\n try:\n search_args = self._build_search_args()\n except Exception as e:\n msg = f\"Error in AstraDBVectorStore._build_search_args: {e}\"\n raise ValueError(msg) from e\n\n if not search_args:\n self.log(\"No search input or filters provided. Skipping search.\")\n return []\n\n docs = []\n search_method = \"search\" if \"query\" in search_args else \"metadata_search\"\n\n try:\n self.log(f\"Calling vector_store.{search_method} with args: {search_args}\")\n docs = getattr(vector_store, search_method)(**search_args)\n except Exception as e:\n msg = f\"Error performing {search_method} in AstraDBVectorStore: {e}\"\n raise ValueError(msg) from e\n\n self.log(f\"Retrieved documents: {len(docs)}\")\n\n data = docs_to_data(docs)\n self.log(f\"Converted documents to data: {len(data)}\")\n self.status = data\n\n return data\n\n def get_retriever_kwargs(self):\n search_args = self._build_search_args()\n\n return {\n \"search_type\": self._map_search_type(),\n \"search_kwargs\": search_args,\n }\n"
- },
- "collection_name": {
- "_input_type": "DropdownInput",
- "advanced": false,
- "combobox": true,
- "dialog_inputs": {
- "fields": {
- "data": {
- "node": {
- "description": "Please allow several seconds for creation to complete.",
- "display_name": "Create new collection",
- "field_order": [
- "01_new_collection_name",
- "02_embedding_generation_provider",
- "03_embedding_generation_model",
- "04_dimension"
- ],
- "name": "create_collection",
- "template": {
- "01_new_collection_name": {
- "_input_type": "StrInput",
- "advanced": false,
- "display_name": "Name",
- "dynamic": false,
- "info": "Name of the new collection to create in Astra DB.",
- "list": false,
- "list_add_label": "Add More",
- "load_from_db": false,
- "name": "new_collection_name",
- "placeholder": "",
- "required": true,
- "show": true,
- "title_case": false,
- "tool_mode": false,
- "trace_as_metadata": true,
- "type": "str",
- "value": ""
- },
- "02_embedding_generation_provider": {
- "_input_type": "DropdownInput",
- "advanced": false,
- "combobox": false,
- "dialog_inputs": {},
- "display_name": "Embedding generation method",
- "dynamic": false,
- "helper_text": "To create collections with more embedding provider options, go to your database in Astra DB.",
- "info": "Provider to use for generating embeddings.",
- "name": "embedding_generation_provider",
- "options": [
- "Bring your own",
- "Nvidia"
- ],
- "options_metadata": [
- {
- "icon": "vectorstores"
- },
- {
- "icon": "NVIDIA"
- }
- ],
- "placeholder": "",
- "real_time_refresh": true,
- "required": true,
- "show": true,
- "title_case": false,
- "toggle": false,
- "tool_mode": false,
- "trace_as_metadata": true,
- "type": "str",
- "value": ""
- },
- "03_embedding_generation_model": {
- "_input_type": "DropdownInput",
- "advanced": false,
- "combobox": false,
- "dialog_inputs": {},
- "display_name": "Embedding model",
- "dynamic": false,
- "info": "Model to use for generating embeddings.",
- "name": "embedding_generation_model",
- "options": [],
- "options_metadata": [],
- "placeholder": null,
- "readonly": "",
- "real_time_refresh": true,
- "required": true,
- "show": true,
- "title_case": false,
- "toggle": false,
- "tool_mode": false,
- "trace_as_metadata": true,
- "type": "str",
- "value": null
- },
- "04_dimension": {
- "_input_type": "IntInput",
- "advanced": false,
- "display_name": "Dimensions",
- "dynamic": false,
- "info": "Dimensions of the embeddings to generate.",
- "list": false,
- "list_add_label": "Add More",
- "name": "dimension",
- "placeholder": 1024,
- "readonly": true,
- "required": "",
- "show": true,
- "title_case": false,
- "tool_mode": false,
- "trace_as_metadata": true,
- "type": "int",
- "value": 1024
- }
- }
- }
- }
- },
- "functionality": "create"
- },
- "display_name": "Collection",
- "dynamic": false,
- "info": "The name of the collection within Astra DB where the vectors will be stored.",
- "load_from_db": false,
- "name": "collection_name",
- "options": [],
- "options_metadata": [],
- "placeholder": "",
- "real_time_refresh": true,
- "refresh_button": true,
- "required": true,
- "show": false,
- "title_case": false,
- "toggle": false,
- "tool_mode": false,
- "trace_as_metadata": true,
- "type": "str",
- "value": ""
- },
- "content_field": {
- "_input_type": "StrInput",
- "advanced": true,
- "display_name": "Content Field",
- "dynamic": false,
- "info": "Field to use as the text content field for the vector store.",
- "list": false,
- "list_add_label": "Add More",
- "load_from_db": false,
- "name": "content_field",
- "placeholder": "",
- "required": false,
- "show": true,
- "title_case": false,
- "tool_mode": false,
- "trace_as_metadata": true,
- "type": "str",
- "value": ""
- },
- "database_name": {
- "_input_type": "DropdownInput",
- "advanced": false,
- "combobox": true,
- "dialog_inputs": {
- "fields": {
- "data": {
- "node": {
- "description": "Please allow several minutes for creation to complete.",
- "display_name": "Create new database",
- "field_order": [
- "01_new_database_name",
- "02_cloud_provider",
- "03_region"
- ],
- "name": "create_database",
- "template": {
- "01_new_database_name": {
- "_input_type": "StrInput",
- "advanced": false,
- "display_name": "Name",
- "dynamic": false,
- "info": "Name of the new database to create in Astra DB.",
- "list": false,
- "list_add_label": "Add More",
- "load_from_db": false,
- "name": "new_database_name",
- "placeholder": "",
- "required": true,
- "show": true,
- "title_case": false,
- "tool_mode": false,
- "trace_as_metadata": true,
- "type": "str",
- "value": ""
- },
- "02_cloud_provider": {
- "_input_type": "DropdownInput",
- "advanced": false,
- "combobox": false,
- "dialog_inputs": {},
- "display_name": "Cloud provider",
- "dynamic": false,
- "info": "Cloud provider for the new database.",
- "name": "cloud_provider",
- "options": [
- "Amazon Web Services",
- "Google Cloud Platform",
- "Microsoft Azure"
- ],
- "options_metadata": [],
- "placeholder": "",
- "real_time_refresh": true,
- "required": true,
- "show": true,
- "title_case": false,
- "toggle": false,
- "tool_mode": false,
- "trace_as_metadata": true,
- "type": "str",
- "value": ""
- },
- "03_region": {
- "_input_type": "DropdownInput",
- "advanced": false,
- "combobox": false,
- "dialog_inputs": {},
- "display_name": "Region",
- "dynamic": false,
- "info": "Region for the new database.",
- "name": "region",
- "options": [],
- "options_metadata": [],
- "placeholder": "",
- "required": true,
- "show": true,
- "title_case": false,
- "toggle": false,
- "tool_mode": false,
- "trace_as_metadata": true,
- "type": "str",
- "value": ""
- }
- }
- }
- }
- },
- "functionality": "create"
- },
- "display_name": "Database",
- "dynamic": false,
- "info": "The Database name for the Astra DB instance.",
- "load_from_db": false,
- "name": "database_name",
- "options": [],
- "options_metadata": [
- {
- "api_endpoint": "https://5b8bb22c-4a38-4f0a-865c-a18ed7590bd1-us-east-2.apps.astra.datastax.com",
- "collections": 5,
- "keyspaces": [
- "default_keyspace",
- "samples_dataflow"
- ],
- "org_id": "260f986d-e65c-4f05-94a3-7cebfcb867a3",
- "status": null
- }
- ],
- "placeholder": "",
- "real_time_refresh": true,
- "refresh_button": true,
- "required": true,
- "show": true,
- "title_case": false,
- "toggle": false,
- "tool_mode": false,
- "trace_as_metadata": true,
- "type": "str",
- "value": ""
- },
- "deletion_field": {
- "_input_type": "StrInput",
- "advanced": true,
- "display_name": "Deletion Based On Field",
- "dynamic": false,
- "info": "When this parameter is provided, documents in the target collection with metadata field values matching the input metadata field value will be deleted before new data is loaded.",
- "list": false,
- "list_add_label": "Add More",
- "load_from_db": false,
- "name": "deletion_field",
- "placeholder": "",
- "required": false,
- "show": true,
- "title_case": false,
- "tool_mode": false,
- "trace_as_metadata": true,
- "type": "str",
- "value": ""
- },
- "embedding_model": {
- "_input_type": "HandleInput",
- "advanced": false,
- "display_name": "Embedding Model",
- "dynamic": false,
- "info": "Specify the Embedding Model. Not required for Astra Vectorize collections.",
- "input_types": [
- "Embeddings"
- ],
- "list": false,
- "list_add_label": "Add More",
- "name": "embedding_model",
- "placeholder": "",
- "required": false,
- "show": false,
- "title_case": false,
- "trace_as_metadata": true,
- "type": "other",
- "value": ""
- },
- "environment": {
- "_input_type": "DropdownInput",
- "advanced": true,
- "combobox": true,
- "dialog_inputs": {},
- "display_name": "Environment",
- "dynamic": false,
- "info": "The environment for the Astra DB API Endpoint.",
- "name": "environment",
- "options": [
- "prod",
- "test",
- "dev"
- ],
- "options_metadata": [],
- "placeholder": "",
- "real_time_refresh": true,
- "required": false,
- "show": true,
- "title_case": false,
- "toggle": false,
- "tool_mode": false,
- "trace_as_metadata": true,
- "type": "str",
- "value": "prod"
- },
- "ignore_invalid_documents": {
- "_input_type": "BoolInput",
- "advanced": true,
- "display_name": "Ignore Invalid Documents",
- "dynamic": false,
- "info": "Boolean flag to determine whether to ignore invalid documents at runtime.",
- "list": false,
- "list_add_label": "Add More",
- "name": "ignore_invalid_documents",
- "placeholder": "",
- "required": false,
- "show": true,
- "title_case": false,
- "tool_mode": false,
- "trace_as_metadata": true,
- "type": "bool",
- "value": false
- },
- "ingest_data": {
- "_input_type": "HandleInput",
- "advanced": false,
- "display_name": "Ingest Data",
- "dynamic": false,
- "info": "",
- "input_types": [
- "Data",
- "DataFrame"
- ],
- "list": true,
- "list_add_label": "Add More",
- "name": "ingest_data",
- "placeholder": "",
- "required": false,
- "show": true,
- "title_case": false,
- "trace_as_metadata": true,
- "type": "other",
- "value": ""
- },
- "keyspace": {
- "_input_type": "DropdownInput",
- "advanced": true,
- "combobox": false,
- "dialog_inputs": {},
- "display_name": "Keyspace",
- "dynamic": false,
- "info": "Optional keyspace within Astra DB to use for the collection.",
- "load_from_db": false,
- "name": "keyspace",
- "options": [],
- "options_metadata": [],
- "placeholder": "",
- "real_time_refresh": true,
- "required": false,
- "show": true,
- "title_case": false,
- "toggle": false,
- "tool_mode": false,
- "trace_as_metadata": true,
- "type": "str",
- "value": "default_keyspace"
- },
- "lexical_terms": {
- "_input_type": "QueryInput",
- "advanced": false,
- "display_name": "Lexical Terms",
- "dynamic": false,
- "info": "Add additional terms/keywords to augment search precision.",
- "input_types": [
- "Message"
- ],
- "list": false,
- "list_add_label": "Add More",
- "load_from_db": false,
- "name": "lexical_terms",
- "placeholder": "Enter terms to search...",
- "required": false,
- "separator": " ",
- "show": false,
- "title_case": false,
- "tool_mode": false,
- "trace_as_input": true,
- "trace_as_metadata": true,
- "type": "query",
- "value": ""
- },
- "number_of_results": {
- "_input_type": "IntInput",
- "advanced": true,
- "display_name": "Number of Search Results",
- "dynamic": false,
- "info": "Number of search results to return.",
- "list": false,
- "list_add_label": "Add More",
- "name": "number_of_results",
- "placeholder": "",
- "required": false,
- "show": true,
- "title_case": false,
- "tool_mode": false,
- "trace_as_metadata": true,
- "type": "int",
- "value": 4
- },
- "reranker": {
- "_input_type": "DropdownInput",
- "advanced": false,
- "combobox": false,
- "dialog_inputs": {},
- "display_name": "Reranker",
- "dynamic": false,
- "info": "Post-retrieval model that re-scores results for optimal relevance ranking.",
- "load_from_db": false,
- "name": "reranker",
- "options": [],
- "options_metadata": [
- {
- "icon": "NVIDIA"
- }
- ],
- "placeholder": "",
- "required": false,
- "show": true,
- "title_case": false,
- "toggle": true,
- "toggle_disable": true,
- "toggle_value": true,
- "tool_mode": false,
- "trace_as_metadata": true,
- "type": "str",
- "value": "nvidia/llama-3.2-nv-rerankqa-1b-v2"
- },
- "search_method": {
- "_input_type": "DropdownInput",
- "advanced": true,
- "combobox": false,
- "dialog_inputs": {},
- "display_name": "Search Method",
- "dynamic": false,
- "info": "Determine how your content is matched: Vector finds semantic similarity, and Hybrid Search (suggested) combines both approaches with a reranker.",
- "load_from_db": false,
- "name": "search_method",
- "options": [
- "Hybrid Search",
- "Vector Search"
- ],
- "options_metadata": [
- {
- "icon": "SearchHybrid"
- },
- {
- "icon": "SearchVector"
- }
- ],
- "placeholder": "",
- "real_time_refresh": true,
- "required": false,
- "show": true,
- "title_case": false,
- "toggle": false,
- "tool_mode": false,
- "trace_as_metadata": true,
- "type": "str",
- "value": "Hybrid Search"
- },
- "search_query": {
- "_input_type": "QueryInput",
- "advanced": false,
- "display_name": "Search Query",
- "dynamic": false,
- "info": "Enter a query to run a similarity search.",
- "input_types": [
- "Message"
- ],
- "list": false,
- "list_add_label": "Add More",
- "load_from_db": false,
- "name": "search_query",
- "placeholder": "Enter a query...",
- "required": false,
- "show": true,
- "title_case": false,
- "tool_mode": true,
- "trace_as_input": true,
- "trace_as_metadata": true,
- "type": "query",
- "value": ""
- },
- "search_score_threshold": {
- "_input_type": "FloatInput",
- "advanced": true,
- "display_name": "Search Score Threshold",
- "dynamic": false,
- "info": "Minimum similarity score threshold for search results. (when using 'Similarity with score threshold')",
- "list": false,
- "list_add_label": "Add More",
- "name": "search_score_threshold",
- "placeholder": "",
- "required": false,
- "show": true,
- "title_case": false,
- "tool_mode": false,
- "trace_as_metadata": true,
- "type": "float",
- "value": 0
- },
- "search_type": {
- "_input_type": "DropdownInput",
- "advanced": true,
- "combobox": false,
- "dialog_inputs": {},
- "display_name": "Search Type",
- "dynamic": false,
- "info": "Search type to use",
- "name": "search_type",
- "options": [
- "Similarity",
- "Similarity with score threshold",
- "MMR (Max Marginal Relevance)"
- ],
- "options_metadata": [],
- "placeholder": "",
- "required": false,
- "show": true,
- "title_case": false,
- "toggle": false,
- "tool_mode": false,
- "trace_as_metadata": true,
- "type": "str",
- "value": "Similarity"
- },
- "should_cache_vector_store": {
- "_input_type": "BoolInput",
- "advanced": true,
- "display_name": "Cache Vector Store",
- "dynamic": false,
- "info": "If True, the vector store will be cached for the current build of the component. This is useful for components that have multiple output methods and want to share the same vector store.",
- "list": false,
- "list_add_label": "Add More",
- "name": "should_cache_vector_store",
- "placeholder": "",
- "required": false,
- "show": true,
- "title_case": false,
- "tool_mode": false,
- "trace_as_metadata": true,
- "type": "bool",
- "value": true
- },
- "token": {
- "_input_type": "SecretStrInput",
- "advanced": false,
- "display_name": "Astra DB Application Token",
- "dynamic": false,
- "info": "Authentication token for accessing Astra DB.",
- "input_types": [],
- "load_from_db": true,
- "name": "token",
- "password": true,
- "placeholder": "",
- "real_time_refresh": true,
- "required": true,
- "show": true,
- "title_case": false,
- "type": "str",
- "value": "ASTRA_DB_APPLICATION_TOKEN"
- }
- },
- "tool_mode": false
- },
- "selected_output": "dataframe",
- "showNode": true,
- "type": "AstraDB"
- },
- "dragging": false,
- "id": "AstraDB-7gXip",
- "measured": {
- "height": 540,
- "width": 320
- },
- "position": {
- "x": 1548.269269836593,
- "y": 162.5619344372189
- },
- "selected": false,
- "type": "genericNode"
- },
- {
- "data": {
- "id": "LanguageModelComponent-NEQ8S",
- "node": {
- "base_classes": [
- "LanguageModel",
- "Message"
- ],
- "beta": false,
- "conditional_paths": [],
- "custom_fields": {},
- "description": "Runs a language model given a specified provider. ",
- "display_name": "Language Model",
- "documentation": "",
- "edited": false,
- "field_order": [
- "provider",
- "model_name",
- "api_key",
- "input_value",
- "system_message",
- "stream",
- "temperature"
- ],
- "frozen": false,
- "icon": "brain-circuit",
- "legacy": false,
- "metadata": {
- "keywords": [
- "model",
- "llm",
- "language model",
- "large language model"
- ]
- },
- "minimized": false,
- "output_types": [],
"outputs": [
{
"allows_loop": false,
@@ -2317,7 +1518,7 @@
"type": "LanguageModelComponent"
},
"dragging": false,
- "id": "LanguageModelComponent-NEQ8S",
+ "id": "LanguageModelComponent-CRZxx",
"measured": {
"height": 451,
"width": 320
@@ -2331,7 +1532,7 @@
},
{
"data": {
- "id": "LanguageModelComponent-pB4iD",
+ "id": "LanguageModelComponent-MD9V5",
"node": {
"base_classes": [
"LanguageModel",
@@ -2355,6 +1556,7 @@
],
"frozen": false,
"icon": "brain-circuit",
+ "last_updated": "2025-08-26T16:33:20.962Z",
"legacy": false,
"metadata": {
"keywords": [
@@ -2610,7 +1812,7 @@
"type": "LanguageModelComponent"
},
"dragging": false,
- "id": "LanguageModelComponent-pB4iD",
+ "id": "LanguageModelComponent-MD9V5",
"measured": {
"height": 451,
"width": 320
@@ -2624,7 +1826,7 @@
},
{
"data": {
- "id": "StructuredOutput-n8Y3t",
+ "id": "StructuredOutput-AUzID",
"node": {
"base_classes": [
"Data"
@@ -2691,7 +1893,7 @@
"group_outputs": false,
"method": "build_structured_dataframe",
"name": "dataframe_output",
- "selected": "DataFrame",
+ "selected": null,
"tool_mode": true,
"types": [
"DataFrame"
@@ -2905,11 +2107,12 @@
},
"tool_mode": false
},
+ "selected_output": "structured_output",
"showNode": true,
"type": "StructuredOutput"
},
"dragging": false,
- "id": "StructuredOutput-n8Y3t",
+ "id": "StructuredOutput-AUzID",
"measured": {
"height": 349,
"width": 320
@@ -2923,7 +2126,7 @@
},
{
"data": {
- "id": "note-AJ1HC",
+ "id": "note-IkRDS",
"node": {
"description": "# Hybrid Search RAG\n\nHybrid search performs a vector similarity search and a lexical search, compares the results of both searches, and then returns the most relevant results overall.\n\n## Prerequisites\n\n* An [OpenAI API key](https://platform.openai.com/)\n* An [Astra DB Application Token](https://docs.datastax.com/en/astra-db-serverless/databases/create-database.html) for the Astra DB component.\n\n## Quickstart\n\n1. In the Astra DB component, add your Astra DB Application Token.\nThis connects Langflow to your Astra database.\n2. Select an Astra collection that is hybrid-enabled.\nFor more information, see the [Datastax documentation](https://docs.datastax.com/en/astra-db-serverless/databases/hybrid-search.html).\nThe connection appears between the Parser component and the Astra DB component when a vector database is connected.\n3. Ensure the **Lexical Terms** and **Parsed Text** ports are connected.\n4. Add your OpenAI API key in the **Language Model** model component.\n5. Open the Playground and ask a question, like \"What are the features of my data?\"",
"display_name": "",
@@ -2935,7 +2138,7 @@
"type": "note"
},
"dragging": false,
- "id": "note-AJ1HC",
+ "id": "note-IkRDS",
"measured": {
"height": 601,
"width": 575
@@ -2946,19 +2149,820 @@
},
"selected": false,
"type": "noteNode"
+ },
+ {
+ "data": {
+ "id": "AstraDB-93cal",
+ "node": {
+ "base_classes": [
+ "Data",
+ "DataFrame",
+ "VectorStore"
+ ],
+ "beta": false,
+ "conditional_paths": [],
+ "custom_fields": {},
+ "description": "Ingest and search documents in Astra DB",
+ "display_name": "Astra DB",
+ "documentation": "https://docs.datastax.com/en/langflow/astra-components.html",
+ "edited": false,
+ "field_order": [
+ "token",
+ "environment",
+ "database_name",
+ "api_endpoint",
+ "keyspace",
+ "collection_name",
+ "embedding_model",
+ "ingest_data",
+ "search_query",
+ "should_cache_vector_store",
+ "search_method",
+ "reranker",
+ "lexical_terms",
+ "number_of_results",
+ "search_type",
+ "search_score_threshold",
+ "advanced_search_filter",
+ "autodetect_collection",
+ "content_field",
+ "deletion_field",
+ "ignore_invalid_documents",
+ "astradb_vectorstore_kwargs"
+ ],
+ "frozen": false,
+ "icon": "AstraDB",
+ "legacy": false,
+ "metadata": {
+ "code_hash": "23fbe9daca09",
+ "dependencies": {
+ "dependencies": [
+ {
+ "name": "astrapy",
+ "version": "2.0.1"
+ },
+ {
+ "name": "langchain_astradb",
+ "version": "0.6.0"
+ },
+ {
+ "name": "langchain_core",
+ "version": "0.3.75"
+ },
+ {
+ "name": "langflow",
+ "version": null
+ }
+ ],
+ "total_dependencies": 4
+ },
+ "module": "langflow.components.datastax.astradb_vectorstore.AstraDBVectorStoreComponent"
+ },
+ "minimized": false,
+ "output_types": [],
+ "outputs": [
+ {
+ "allows_loop": false,
+ "cache": true,
+ "display_name": "Search Results",
+ "group_outputs": false,
+ "method": "search_documents",
+ "name": "search_results",
+ "selected": "Data",
+ "tool_mode": true,
+ "types": [
+ "Data"
+ ],
+ "value": "__UNDEFINED__"
+ },
+ {
+ "allows_loop": false,
+ "cache": true,
+ "display_name": "DataFrame",
+ "group_outputs": false,
+ "method": "as_dataframe",
+ "name": "dataframe",
+ "selected": "DataFrame",
+ "tool_mode": true,
+ "types": [
+ "DataFrame"
+ ],
+ "value": "__UNDEFINED__"
+ },
+ {
+ "allows_loop": false,
+ "cache": true,
+ "display_name": "Vector Store Connection",
+ "group_outputs": false,
+ "hidden": true,
+ "method": "as_vector_store",
+ "name": "vectorstoreconnection",
+ "selected": "VectorStore",
+ "tool_mode": true,
+ "types": [
+ "VectorStore"
+ ],
+ "value": "__UNDEFINED__"
+ }
+ ],
+ "pinned": false,
+ "template": {
+ "_type": "Component",
+ "advanced_search_filter": {
+ "_input_type": "NestedDictInput",
+ "advanced": true,
+ "display_name": "Search Metadata Filter",
+ "dynamic": false,
+ "info": "Optional dictionary of filters to apply to the search query.",
+ "list": false,
+ "list_add_label": "Add More",
+ "name": "advanced_search_filter",
+ "placeholder": "",
+ "required": false,
+ "show": true,
+ "title_case": false,
+ "tool_mode": false,
+ "trace_as_input": true,
+ "trace_as_metadata": true,
+ "type": "NestedDict",
+ "value": {}
+ },
+ "api_endpoint": {
+ "_input_type": "DropdownInput",
+ "advanced": true,
+ "combobox": false,
+ "dialog_inputs": {},
+ "display_name": "Astra DB API Endpoint",
+ "dynamic": false,
+ "info": "The API Endpoint for the Astra DB instance. Supercedes database selection.",
+ "name": "api_endpoint",
+ "options": [],
+ "options_metadata": [],
+ "placeholder": "",
+ "required": false,
+ "show": true,
+ "title_case": false,
+ "toggle": false,
+ "tool_mode": false,
+ "trace_as_metadata": true,
+ "type": "str",
+ "value": ""
+ },
+ "astradb_vectorstore_kwargs": {
+ "_input_type": "NestedDictInput",
+ "advanced": true,
+ "display_name": "AstraDBVectorStore Parameters",
+ "dynamic": false,
+ "info": "Optional dictionary of additional parameters for the AstraDBVectorStore.",
+ "list": false,
+ "list_add_label": "Add More",
+ "name": "astradb_vectorstore_kwargs",
+ "placeholder": "",
+ "required": false,
+ "show": true,
+ "title_case": false,
+ "tool_mode": false,
+ "trace_as_input": true,
+ "trace_as_metadata": true,
+ "type": "NestedDict",
+ "value": {}
+ },
+ "autodetect_collection": {
+ "_input_type": "BoolInput",
+ "advanced": true,
+ "display_name": "Autodetect Collection",
+ "dynamic": false,
+ "info": "Boolean flag to determine whether to autodetect the collection.",
+ "list": false,
+ "list_add_label": "Add More",
+ "name": "autodetect_collection",
+ "placeholder": "",
+ "required": false,
+ "show": true,
+ "title_case": false,
+ "tool_mode": false,
+ "trace_as_metadata": true,
+ "type": "bool",
+ "value": true
+ },
+ "code": {
+ "advanced": true,
+ "dynamic": true,
+ "fileTypes": [],
+ "file_path": "",
+ "info": "",
+ "list": false,
+ "load_from_db": false,
+ "multiline": true,
+ "name": "code",
+ "password": false,
+ "placeholder": "",
+ "required": true,
+ "show": true,
+ "title_case": false,
+ "type": "code",
+ "value": "import re\nfrom collections import defaultdict\nfrom dataclasses import asdict, dataclass, field\n\nfrom astrapy import DataAPIClient, Database\nfrom astrapy.data.info.reranking import RerankServiceOptions\nfrom astrapy.info import CollectionDescriptor, CollectionLexicalOptions, CollectionRerankOptions\nfrom langchain_astradb import AstraDBVectorStore, VectorServiceOptions\nfrom langchain_astradb.utils.astradb import HybridSearchMode, _AstraDBCollectionEnvironment\nfrom langchain_core.documents import Document\n\nfrom langflow.base.vectorstores.model import LCVectorStoreComponent, check_cached_vector_store\nfrom langflow.base.vectorstores.vector_store_connection_decorator import vector_store_connection\nfrom langflow.helpers.data import docs_to_data\nfrom langflow.inputs.inputs import FloatInput, NestedDictInput\nfrom langflow.io import (\n BoolInput,\n DropdownInput,\n HandleInput,\n IntInput,\n QueryInput,\n SecretStrInput,\n StrInput,\n)\nfrom langflow.schema.data import Data\nfrom langflow.serialization import serialize\nfrom langflow.utils.version import get_version_info\n\n\n@vector_store_connection\nclass AstraDBVectorStoreComponent(LCVectorStoreComponent):\n display_name: str = \"Astra DB\"\n description: str = \"Ingest and search documents in Astra DB\"\n documentation: str = \"https://docs.datastax.com/en/langflow/astra-components.html\"\n name = \"AstraDB\"\n icon: str = \"AstraDB\"\n\n _cached_vector_store: AstraDBVectorStore | None = None\n\n @dataclass\n class NewDatabaseInput:\n functionality: str = \"create\"\n fields: dict[str, dict] = field(\n default_factory=lambda: {\n \"data\": {\n \"node\": {\n \"name\": \"create_database\",\n \"description\": \"Please allow several minutes for creation to complete.\",\n \"display_name\": \"Create new database\",\n \"field_order\": [\"01_new_database_name\", \"02_cloud_provider\", \"03_region\"],\n \"template\": {\n \"01_new_database_name\": StrInput(\n name=\"new_database_name\",\n display_name=\"Name\",\n info=\"Name of the new database to create in Astra DB.\",\n required=True,\n ),\n \"02_cloud_provider\": DropdownInput(\n name=\"cloud_provider\",\n display_name=\"Cloud provider\",\n info=\"Cloud provider for the new database.\",\n options=[],\n required=True,\n real_time_refresh=True,\n ),\n \"03_region\": DropdownInput(\n name=\"region\",\n display_name=\"Region\",\n info=\"Region for the new database.\",\n options=[],\n required=True,\n ),\n },\n },\n }\n }\n )\n\n @dataclass\n class NewCollectionInput:\n functionality: str = \"create\"\n fields: dict[str, dict] = field(\n default_factory=lambda: {\n \"data\": {\n \"node\": {\n \"name\": \"create_collection\",\n \"description\": \"Please allow several seconds for creation to complete.\",\n \"display_name\": \"Create new collection\",\n \"field_order\": [\n \"01_new_collection_name\",\n \"02_embedding_generation_provider\",\n \"03_embedding_generation_model\",\n \"04_dimension\",\n ],\n \"template\": {\n \"01_new_collection_name\": StrInput(\n name=\"new_collection_name\",\n display_name=\"Name\",\n info=\"Name of the new collection to create in Astra DB.\",\n required=True,\n ),\n \"02_embedding_generation_provider\": DropdownInput(\n name=\"embedding_generation_provider\",\n display_name=\"Embedding generation method\",\n info=\"Provider to use for generating embeddings.\",\n helper_text=(\n \"To create collections with more embedding provider options, go to \"\n 'your database in Astra DB'\n ),\n real_time_refresh=True,\n required=True,\n options=[],\n ),\n \"03_embedding_generation_model\": DropdownInput(\n name=\"embedding_generation_model\",\n display_name=\"Embedding model\",\n info=\"Model to use for generating embeddings.\",\n real_time_refresh=True,\n options=[],\n ),\n \"04_dimension\": IntInput(\n name=\"dimension\",\n display_name=\"Dimensions\",\n info=\"Dimensions of the embeddings to generate.\",\n value=None,\n ),\n },\n },\n }\n }\n )\n\n inputs = [\n SecretStrInput(\n name=\"token\",\n display_name=\"Astra DB Application Token\",\n info=\"Authentication token for accessing Astra DB.\",\n value=\"ASTRA_DB_APPLICATION_TOKEN\",\n required=True,\n real_time_refresh=True,\n input_types=[],\n ),\n DropdownInput(\n name=\"environment\",\n display_name=\"Environment\",\n info=\"The environment for the Astra DB API Endpoint.\",\n options=[\"prod\", \"test\", \"dev\"],\n value=\"prod\",\n advanced=True,\n real_time_refresh=True,\n combobox=True,\n ),\n DropdownInput(\n name=\"database_name\",\n display_name=\"Database\",\n info=\"The Database name for the Astra DB instance.\",\n required=True,\n refresh_button=True,\n real_time_refresh=True,\n dialog_inputs=asdict(NewDatabaseInput()),\n combobox=True,\n ),\n DropdownInput(\n name=\"api_endpoint\",\n display_name=\"Astra DB API Endpoint\",\n info=\"The API Endpoint for the Astra DB instance. Supercedes database selection.\",\n advanced=True,\n ),\n DropdownInput(\n name=\"keyspace\",\n display_name=\"Keyspace\",\n info=\"Optional keyspace within Astra DB to use for the collection.\",\n advanced=True,\n options=[],\n real_time_refresh=True,\n ),\n DropdownInput(\n name=\"collection_name\",\n display_name=\"Collection\",\n info=\"The name of the collection within Astra DB where the vectors will be stored.\",\n required=True,\n refresh_button=True,\n real_time_refresh=True,\n dialog_inputs=asdict(NewCollectionInput()),\n combobox=True,\n show=False,\n ),\n HandleInput(\n name=\"embedding_model\",\n display_name=\"Embedding Model\",\n input_types=[\"Embeddings\"],\n info=\"Specify the Embedding Model. Not required for Astra Vectorize collections.\",\n required=False,\n show=False,\n ),\n *LCVectorStoreComponent.inputs,\n DropdownInput(\n name=\"search_method\",\n display_name=\"Search Method\",\n info=(\n \"Determine how your content is matched: Vector finds semantic similarity, \"\n \"and Hybrid Search (suggested) combines both approaches \"\n \"with a reranker.\"\n ),\n options=[\"Hybrid Search\", \"Vector Search\"], # TODO: Restore Lexical Search?\n options_metadata=[{\"icon\": \"SearchHybrid\"}, {\"icon\": \"SearchVector\"}],\n value=\"Vector Search\",\n advanced=True,\n real_time_refresh=True,\n ),\n DropdownInput(\n name=\"reranker\",\n display_name=\"Reranker\",\n info=\"Post-retrieval model that re-scores results for optimal relevance ranking.\",\n show=False,\n toggle=True,\n ),\n QueryInput(\n name=\"lexical_terms\",\n display_name=\"Lexical Terms\",\n info=\"Add additional terms/keywords to augment search precision.\",\n placeholder=\"Enter terms to search...\",\n separator=\" \",\n show=False,\n value=\"\",\n ),\n IntInput(\n name=\"number_of_results\",\n display_name=\"Number of Search Results\",\n info=\"Number of search results to return.\",\n advanced=True,\n value=4,\n ),\n DropdownInput(\n name=\"search_type\",\n display_name=\"Search Type\",\n info=\"Search type to use\",\n options=[\"Similarity\", \"Similarity with score threshold\", \"MMR (Max Marginal Relevance)\"],\n value=\"Similarity\",\n advanced=True,\n ),\n FloatInput(\n name=\"search_score_threshold\",\n display_name=\"Search Score Threshold\",\n info=\"Minimum similarity score threshold for search results. \"\n \"(when using 'Similarity with score threshold')\",\n value=0,\n advanced=True,\n ),\n NestedDictInput(\n name=\"advanced_search_filter\",\n display_name=\"Search Metadata Filter\",\n info=\"Optional dictionary of filters to apply to the search query.\",\n advanced=True,\n ),\n BoolInput(\n name=\"autodetect_collection\",\n display_name=\"Autodetect Collection\",\n info=\"Boolean flag to determine whether to autodetect the collection.\",\n advanced=True,\n value=True,\n ),\n StrInput(\n name=\"content_field\",\n display_name=\"Content Field\",\n info=\"Field to use as the text content field for the vector store.\",\n advanced=True,\n ),\n StrInput(\n name=\"deletion_field\",\n display_name=\"Deletion Based On Field\",\n info=\"When this parameter is provided, documents in the target collection with \"\n \"metadata field values matching the input metadata field value will be deleted \"\n \"before new data is loaded.\",\n advanced=True,\n ),\n BoolInput(\n name=\"ignore_invalid_documents\",\n display_name=\"Ignore Invalid Documents\",\n info=\"Boolean flag to determine whether to ignore invalid documents at runtime.\",\n advanced=True,\n ),\n NestedDictInput(\n name=\"astradb_vectorstore_kwargs\",\n display_name=\"AstraDBVectorStore Parameters\",\n info=\"Optional dictionary of additional parameters for the AstraDBVectorStore.\",\n advanced=True,\n ),\n ]\n\n @classmethod\n def map_cloud_providers(cls):\n # TODO: Programmatically fetch the regions for each cloud provider\n return {\n \"dev\": {\n \"Amazon Web Services\": {\n \"id\": \"aws\",\n \"regions\": [\"us-west-2\"],\n },\n \"Google Cloud Platform\": {\n \"id\": \"gcp\",\n \"regions\": [\"us-central1\", \"europe-west4\"],\n },\n },\n \"test\": {\n \"Google Cloud Platform\": {\n \"id\": \"gcp\",\n \"regions\": [\"us-central1\"],\n },\n },\n \"prod\": {\n \"Amazon Web Services\": {\n \"id\": \"aws\",\n \"regions\": [\"us-east-2\", \"ap-south-1\", \"eu-west-1\"],\n },\n \"Google Cloud Platform\": {\n \"id\": \"gcp\",\n \"regions\": [\"us-east1\"],\n },\n \"Microsoft Azure\": {\n \"id\": \"azure\",\n \"regions\": [\"westus3\"],\n },\n },\n }\n\n @classmethod\n def get_vectorize_providers(cls, token: str, environment: str | None = None, api_endpoint: str | None = None):\n try:\n # Get the admin object\n client = DataAPIClient(environment=environment)\n admin_client = client.get_admin()\n db_admin = admin_client.get_database_admin(api_endpoint, token=token)\n\n # Get the list of embedding providers\n embedding_providers = db_admin.find_embedding_providers()\n\n vectorize_providers_mapping = {}\n # Map the provider display name to the provider key and models\n for provider_key, provider_data in embedding_providers.embedding_providers.items():\n # Get the provider display name and models\n display_name = provider_data.display_name\n models = [model.name for model in provider_data.models]\n\n # Build our mapping\n vectorize_providers_mapping[display_name] = [provider_key, models]\n\n # Sort the resulting dictionary\n return defaultdict(list, dict(sorted(vectorize_providers_mapping.items())))\n except Exception as _: # noqa: BLE001\n return {}\n\n @classmethod\n async def create_database_api(\n cls,\n new_database_name: str,\n cloud_provider: str,\n region: str,\n token: str,\n environment: str | None = None,\n keyspace: str | None = None,\n ):\n client = DataAPIClient(environment=environment)\n\n # Get the admin object\n admin_client = client.get_admin(token=token)\n\n # Get the environment, set to prod if null like\n my_env = environment or \"prod\"\n\n # Raise a value error if name isn't provided\n if not new_database_name:\n msg = \"Database name is required to create a new database.\"\n raise ValueError(msg)\n\n # Call the create database function\n return await admin_client.async_create_database(\n name=new_database_name,\n cloud_provider=cls.map_cloud_providers()[my_env][cloud_provider][\"id\"],\n region=region,\n keyspace=keyspace,\n wait_until_active=False,\n )\n\n @classmethod\n async def create_collection_api(\n cls,\n new_collection_name: str,\n token: str,\n api_endpoint: str,\n environment: str | None = None,\n keyspace: str | None = None,\n dimension: int | None = None,\n embedding_generation_provider: str | None = None,\n embedding_generation_model: str | None = None,\n reranker: str | None = None,\n ):\n # Build vectorize options, if needed\n vectorize_options = None\n if not dimension:\n providers = cls.get_vectorize_providers(token=token, environment=environment, api_endpoint=api_endpoint)\n vectorize_options = VectorServiceOptions(\n provider=providers.get(embedding_generation_provider, [None, []])[0],\n model_name=embedding_generation_model,\n )\n\n # Raise a value error if name isn't provided\n if not new_collection_name:\n msg = \"Collection name is required to create a new collection.\"\n raise ValueError(msg)\n\n # Define the base arguments being passed to the create collection function\n base_args = {\n \"collection_name\": new_collection_name,\n \"token\": token,\n \"api_endpoint\": api_endpoint,\n \"keyspace\": keyspace,\n \"environment\": environment,\n \"embedding_dimension\": dimension,\n \"collection_vector_service_options\": vectorize_options,\n }\n\n # Add optional arguments if the reranker is set\n if reranker:\n # Split the reranker field into a provider a model name\n provider, _ = reranker.split(\"/\")\n base_args[\"collection_rerank\"] = CollectionRerankOptions(\n service=RerankServiceOptions(provider=provider, model_name=reranker),\n )\n base_args[\"collection_lexical\"] = CollectionLexicalOptions(analyzer=\"STANDARD\")\n\n _AstraDBCollectionEnvironment(**base_args)\n\n @classmethod\n def get_database_list_static(cls, token: str, environment: str | None = None):\n client = DataAPIClient(environment=environment)\n\n # Get the admin object\n admin_client = client.get_admin(token=token)\n\n # Get the list of databases\n db_list = admin_client.list_databases()\n\n # Generate the api endpoint for each database\n db_info_dict = {}\n for db in db_list:\n try:\n # Get the API endpoint for the database\n api_endpoints = [db_reg.api_endpoint for db_reg in db.regions]\n\n # Get the number of collections\n try:\n # Get the number of collections in the database\n num_collections = len(\n client.get_database(\n api_endpoints[0],\n token=token,\n ).list_collection_names()\n )\n except Exception: # noqa: BLE001\n if db.status != \"PENDING\":\n continue\n num_collections = 0\n\n # Add the database to the dictionary\n db_info_dict[db.name] = {\n \"api_endpoints\": api_endpoints,\n \"keyspaces\": db.keyspaces,\n \"collections\": num_collections,\n \"status\": db.status if db.status != \"ACTIVE\" else None,\n \"org_id\": db.org_id if db.org_id else None,\n }\n except Exception: # noqa: BLE001, S110\n pass\n\n return db_info_dict\n\n def get_database_list(self):\n return self.get_database_list_static(\n token=self.token,\n environment=self.environment,\n )\n\n @classmethod\n def get_api_endpoint_static(\n cls,\n token: str,\n environment: str | None = None,\n api_endpoint: str | None = None,\n database_name: str | None = None,\n ):\n # If the api_endpoint is set, return it\n if api_endpoint:\n return api_endpoint\n\n # Check if the database_name is like a url\n if database_name and database_name.startswith(\"https://\"):\n return database_name\n\n # If the database is not set, nothing we can do.\n if not database_name:\n return None\n\n # Grab the database object\n db = cls.get_database_list_static(token=token, environment=environment).get(database_name)\n if not db:\n return None\n\n # Otherwise, get the URL from the database list\n endpoints = db.get(\"api_endpoints\") or []\n return endpoints[0] if endpoints else None\n\n def get_api_endpoint(self):\n return self.get_api_endpoint_static(\n token=self.token,\n environment=self.environment,\n api_endpoint=self.api_endpoint,\n database_name=self.database_name,\n )\n\n @classmethod\n def get_database_id_static(cls, api_endpoint: str) -> str | None:\n # Pattern matches standard UUID format: 8-4-4-4-12 hexadecimal characters\n uuid_pattern = r\"[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}\"\n match = re.search(uuid_pattern, api_endpoint)\n\n return match.group(0) if match else None\n\n def get_database_id(self):\n return self.get_database_id_static(api_endpoint=self.get_api_endpoint())\n\n def get_keyspace(self):\n keyspace = self.keyspace\n\n if keyspace:\n return keyspace.strip()\n\n return \"default_keyspace\"\n\n def get_database_object(self, api_endpoint: str | None = None):\n try:\n client = DataAPIClient(environment=self.environment)\n\n return client.get_database(\n api_endpoint or self.get_api_endpoint(),\n token=self.token,\n keyspace=self.get_keyspace(),\n )\n except Exception as e:\n msg = f\"Error fetching database object: {e}\"\n raise ValueError(msg) from e\n\n def collection_data(self, collection_name: str, database: Database | None = None):\n try:\n if not database:\n client = DataAPIClient(environment=self.environment)\n\n database = client.get_database(\n self.get_api_endpoint(),\n token=self.token,\n keyspace=self.get_keyspace(),\n )\n\n collection = database.get_collection(collection_name)\n\n return collection.estimated_document_count()\n except Exception as e: # noqa: BLE001\n self.log(f\"Error checking collection data: {e}\")\n\n return None\n\n def _initialize_database_options(self):\n try:\n return [\n {\n \"name\": name,\n \"status\": info[\"status\"],\n \"collections\": info[\"collections\"],\n \"api_endpoints\": info[\"api_endpoints\"],\n \"keyspaces\": info[\"keyspaces\"],\n \"org_id\": info[\"org_id\"],\n }\n for name, info in self.get_database_list().items()\n ]\n except Exception as e:\n msg = f\"Error fetching database options: {e}\"\n raise ValueError(msg) from e\n\n @classmethod\n def get_provider_icon(cls, collection: CollectionDescriptor | None = None, provider_name: str | None = None) -> str:\n # Get the provider name from the collection\n provider_name = provider_name or (\n collection.definition.vector.service.provider\n if (\n collection\n and collection.definition\n and collection.definition.vector\n and collection.definition.vector.service\n )\n else None\n )\n\n # If there is no provider, use the vector store icon\n if not provider_name or provider_name.lower() == \"bring your own\":\n return \"vectorstores\"\n\n # Map provider casings\n case_map = {\n \"nvidia\": \"NVIDIA\",\n \"openai\": \"OpenAI\",\n \"amazon bedrock\": \"AmazonBedrockEmbeddings\",\n \"azure openai\": \"AzureOpenAiEmbeddings\",\n \"cohere\": \"Cohere\",\n \"jina ai\": \"JinaAI\",\n \"mistral ai\": \"MistralAI\",\n \"upstage\": \"Upstage\",\n \"voyage ai\": \"VoyageAI\",\n }\n\n # Adjust the casing on some like nvidia\n return case_map[provider_name.lower()] if provider_name.lower() in case_map else provider_name.title()\n\n def _initialize_collection_options(self, api_endpoint: str | None = None):\n # Nothing to generate if we don't have an API endpoint yet\n api_endpoint = api_endpoint or self.get_api_endpoint()\n if not api_endpoint:\n return []\n\n # Retrieve the database object\n database = self.get_database_object(api_endpoint=api_endpoint)\n\n # Get the list of collections\n collection_list = database.list_collections(keyspace=self.get_keyspace())\n\n # Return the list of collections and metadata associated\n return [\n {\n \"name\": col.name,\n \"records\": self.collection_data(collection_name=col.name, database=database),\n \"provider\": (\n col.definition.vector.service.provider\n if col.definition.vector and col.definition.vector.service\n else None\n ),\n \"icon\": self.get_provider_icon(collection=col),\n \"model\": (\n col.definition.vector.service.model_name\n if col.definition.vector and col.definition.vector.service\n else None\n ),\n }\n for col in collection_list\n ]\n\n def reset_provider_options(self, build_config: dict) -> dict:\n \"\"\"Reset provider options and related configurations in the build_config dictionary.\"\"\"\n # Extract template path for cleaner access\n template = build_config[\"collection_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"]\n\n # Get vectorize providers\n vectorize_providers_api = self.get_vectorize_providers(\n token=self.token,\n environment=self.environment,\n api_endpoint=build_config[\"api_endpoint\"][\"value\"],\n )\n\n # Create a new dictionary with \"Bring your own\" first\n vectorize_providers: dict[str, list[list[str]]] = {\"Bring your own\": [[], []]}\n\n # Add the remaining items (only Nvidia) from the original dictionary\n vectorize_providers.update(\n {\n k: v\n for k, v in vectorize_providers_api.items()\n if k.lower() in [\"nvidia\"] # TODO: Eventually support more\n }\n )\n\n # Set provider options\n provider_field = \"02_embedding_generation_provider\"\n template[provider_field][\"options\"] = list(vectorize_providers.keys())\n\n # Add metadata for each provider option\n template[provider_field][\"options_metadata\"] = [\n {\"icon\": self.get_provider_icon(provider_name=provider)} for provider in template[provider_field][\"options\"]\n ]\n\n # Get selected embedding provider\n embedding_provider = template[provider_field][\"value\"]\n is_bring_your_own = embedding_provider and embedding_provider == \"Bring your own\"\n\n # Configure embedding model field\n model_field = \"03_embedding_generation_model\"\n template[model_field].update(\n {\n \"options\": vectorize_providers.get(embedding_provider, [[], []])[1],\n \"placeholder\": \"Bring your own\" if is_bring_your_own else None,\n \"readonly\": is_bring_your_own,\n \"required\": not is_bring_your_own,\n \"value\": None,\n }\n )\n\n # If this is a bring your own, set dimensions to 0\n return self.reset_dimension_field(build_config)\n\n def reset_dimension_field(self, build_config: dict) -> dict:\n \"\"\"Reset dimension field options based on provided configuration.\"\"\"\n # Extract template path for cleaner access\n template = build_config[\"collection_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"]\n\n # Get selected embedding model\n provider_field = \"02_embedding_generation_provider\"\n embedding_provider = template[provider_field][\"value\"]\n is_bring_your_own = embedding_provider and embedding_provider == \"Bring your own\"\n\n # Configure dimension field\n dimension_field = \"04_dimension\"\n dimension_value = 1024 if not is_bring_your_own else None # TODO: Dynamically figure this out\n template[dimension_field].update(\n {\n \"placeholder\": dimension_value,\n \"value\": dimension_value,\n \"readonly\": not is_bring_your_own,\n \"required\": is_bring_your_own,\n }\n )\n\n return build_config\n\n def reset_collection_list(self, build_config: dict) -> dict:\n \"\"\"Reset collection list options based on provided configuration.\"\"\"\n # Get collection options\n collection_options = self._initialize_collection_options(api_endpoint=build_config[\"api_endpoint\"][\"value\"])\n # Update collection configuration\n collection_config = build_config[\"collection_name\"]\n collection_config.update(\n {\n \"options\": [col[\"name\"] for col in collection_options],\n \"options_metadata\": [{k: v for k, v in col.items() if k != \"name\"} for col in collection_options],\n }\n )\n\n # Reset selected collection if not in options\n if collection_config[\"value\"] not in collection_config[\"options\"]:\n collection_config[\"value\"] = \"\"\n\n # Set advanced status based on database selection\n collection_config[\"show\"] = bool(build_config[\"database_name\"][\"value\"])\n\n return build_config\n\n def reset_database_list(self, build_config: dict) -> dict:\n \"\"\"Reset database list options and related configurations.\"\"\"\n # Get database options\n database_options = self._initialize_database_options()\n\n # Update cloud provider options\n env = self.environment\n template = build_config[\"database_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"]\n template[\"02_cloud_provider\"][\"options\"] = list(self.map_cloud_providers()[env].keys())\n\n # Update database configuration\n database_config = build_config[\"database_name\"]\n database_config.update(\n {\n \"options\": [db[\"name\"] for db in database_options],\n \"options_metadata\": [{k: v for k, v in db.items() if k != \"name\"} for db in database_options],\n }\n )\n\n # Reset selections if value not in options\n if database_config[\"value\"] not in database_config[\"options\"]:\n database_config[\"value\"] = \"\"\n build_config[\"api_endpoint\"][\"options\"] = []\n build_config[\"api_endpoint\"][\"value\"] = \"\"\n build_config[\"collection_name\"][\"show\"] = False\n\n # Set advanced status based on token presence\n database_config[\"show\"] = bool(build_config[\"token\"][\"value\"])\n\n return build_config\n\n def reset_build_config(self, build_config: dict) -> dict:\n \"\"\"Reset all build configuration options to default empty state.\"\"\"\n # Reset database configuration\n database_config = build_config[\"database_name\"]\n database_config.update({\"options\": [], \"options_metadata\": [], \"value\": \"\", \"show\": False})\n build_config[\"api_endpoint\"][\"options\"] = []\n build_config[\"api_endpoint\"][\"value\"] = \"\"\n\n # Reset collection configuration\n collection_config = build_config[\"collection_name\"]\n collection_config.update({\"options\": [], \"options_metadata\": [], \"value\": \"\", \"show\": False})\n\n return build_config\n\n def _handle_hybrid_search_options(self, build_config: dict) -> dict:\n \"\"\"Set hybrid search options in the build configuration.\"\"\"\n # Detect what hybrid options are available\n # Get the admin object\n client = DataAPIClient(environment=self.environment)\n admin_client = client.get_admin()\n db_admin = admin_client.get_database_admin(self.get_api_endpoint(), token=self.token)\n\n # We will try to get the reranking providers to see if its hybrid emabled\n try:\n providers = db_admin.find_reranking_providers()\n build_config[\"reranker\"][\"options\"] = [\n model.name for provider_data in providers.reranking_providers.values() for model in provider_data.models\n ]\n build_config[\"reranker\"][\"options_metadata\"] = [\n {\"icon\": self.get_provider_icon(provider_name=model.name.split(\"/\")[0])}\n for provider in providers.reranking_providers.values()\n for model in provider.models\n ]\n build_config[\"reranker\"][\"value\"] = build_config[\"reranker\"][\"options\"][0]\n\n # Set the default search field to hybrid search\n build_config[\"search_method\"][\"show\"] = True\n build_config[\"search_method\"][\"options\"] = [\"Hybrid Search\", \"Vector Search\"]\n build_config[\"search_method\"][\"value\"] = \"Hybrid Search\"\n except Exception as _: # noqa: BLE001\n build_config[\"reranker\"][\"options\"] = []\n build_config[\"reranker\"][\"options_metadata\"] = []\n\n # Set the default search field to vector search\n build_config[\"search_method\"][\"show\"] = False\n build_config[\"search_method\"][\"options\"] = [\"Vector Search\"]\n build_config[\"search_method\"][\"value\"] = \"Vector Search\"\n\n return build_config\n\n async def update_build_config(self, build_config: dict, field_value: str, field_name: str | None = None) -> dict:\n \"\"\"Update build configuration based on field name and value.\"\"\"\n # Early return if no token provided\n if not self.token:\n return self.reset_build_config(build_config)\n\n # Database creation callback\n if field_name == \"database_name\" and isinstance(field_value, dict):\n if \"01_new_database_name\" in field_value:\n await self._create_new_database(build_config, field_value)\n return self.reset_collection_list(build_config)\n return self._update_cloud_regions(build_config, field_value)\n\n # Collection creation callback\n if field_name == \"collection_name\" and isinstance(field_value, dict):\n # Case 1: New collection creation\n if \"01_new_collection_name\" in field_value:\n await self._create_new_collection(build_config, field_value)\n return build_config\n\n # Case 2: Update embedding provider options\n if \"02_embedding_generation_provider\" in field_value:\n return self.reset_provider_options(build_config)\n\n # Case 3: Update dimension field\n if \"03_embedding_generation_model\" in field_value:\n return self.reset_dimension_field(build_config)\n\n # Initial execution or token/environment change\n first_run = field_name == \"collection_name\" and not field_value and not build_config[\"database_name\"][\"options\"]\n if first_run or field_name in {\"token\", \"environment\"}:\n return self.reset_database_list(build_config)\n\n # Database selection change\n if field_name == \"database_name\" and not isinstance(field_value, dict):\n return self._handle_database_selection(build_config, field_value)\n\n # Keyspace selection change\n if field_name == \"keyspace\":\n return self.reset_collection_list(build_config)\n\n # Collection selection change\n if field_name == \"collection_name\" and not isinstance(field_value, dict):\n return self._handle_collection_selection(build_config, field_value)\n\n # Search method selection change\n if field_name == \"search_method\":\n is_vector_search = field_value == \"Vector Search\"\n is_autodetect = build_config[\"autodetect_collection\"][\"value\"]\n\n # Configure lexical terms (same for both cases)\n build_config[\"lexical_terms\"][\"show\"] = not is_vector_search\n build_config[\"lexical_terms\"][\"value\"] = \"\" if is_vector_search else build_config[\"lexical_terms\"][\"value\"]\n\n # Disable reranker disabling if hybrid search is selected\n build_config[\"reranker\"][\"show\"] = not is_vector_search\n build_config[\"reranker\"][\"toggle_disable\"] = not is_vector_search\n build_config[\"reranker\"][\"toggle_value\"] = True\n build_config[\"reranker\"][\"value\"] = build_config[\"reranker\"][\"options\"][0]\n\n # Toggle search type and score threshold based on search method\n build_config[\"search_type\"][\"show\"] = is_vector_search\n build_config[\"search_score_threshold\"][\"show\"] = is_vector_search\n\n # Make sure the search_type is set to \"Similarity\"\n if not is_vector_search or is_autodetect:\n build_config[\"search_type\"][\"value\"] = \"Similarity\"\n\n return build_config\n\n async def _create_new_database(self, build_config: dict, field_value: dict) -> None:\n \"\"\"Create a new database and update build config options.\"\"\"\n try:\n await self.create_database_api(\n new_database_name=field_value[\"01_new_database_name\"],\n token=self.token,\n keyspace=self.get_keyspace(),\n environment=self.environment,\n cloud_provider=field_value[\"02_cloud_provider\"],\n region=field_value[\"03_region\"],\n )\n except Exception as e:\n msg = f\"Error creating database: {e}\"\n raise ValueError(msg) from e\n\n build_config[\"database_name\"][\"options\"].append(field_value[\"01_new_database_name\"])\n build_config[\"database_name\"][\"options_metadata\"].append(\n {\n \"status\": \"PENDING\",\n \"collections\": 0,\n \"api_endpoints\": [],\n \"keyspaces\": [self.get_keyspace()],\n \"org_id\": None,\n }\n )\n\n def _update_cloud_regions(self, build_config: dict, field_value: dict) -> dict:\n \"\"\"Update cloud provider regions in build config.\"\"\"\n env = self.environment\n cloud_provider = field_value[\"02_cloud_provider\"]\n\n # Update the region options based on the selected cloud provider\n template = build_config[\"database_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"]\n template[\"03_region\"][\"options\"] = self.map_cloud_providers()[env][cloud_provider][\"regions\"]\n\n # Reset the the 03_region value if it's not in the new options\n if template[\"03_region\"][\"value\"] not in template[\"03_region\"][\"options\"]:\n template[\"03_region\"][\"value\"] = None\n\n return build_config\n\n async def _create_new_collection(self, build_config: dict, field_value: dict) -> None:\n \"\"\"Create a new collection and update build config options.\"\"\"\n embedding_provider = field_value.get(\"02_embedding_generation_provider\")\n try:\n await self.create_collection_api(\n new_collection_name=field_value[\"01_new_collection_name\"],\n token=self.token,\n api_endpoint=build_config[\"api_endpoint\"][\"value\"],\n environment=self.environment,\n keyspace=self.get_keyspace(),\n dimension=field_value.get(\"04_dimension\") if embedding_provider == \"Bring your own\" else None,\n embedding_generation_provider=embedding_provider,\n embedding_generation_model=field_value.get(\"03_embedding_generation_model\"),\n reranker=self.reranker,\n )\n except Exception as e:\n msg = f\"Error creating collection: {e}\"\n raise ValueError(msg) from e\n\n provider = embedding_provider.lower() if embedding_provider and embedding_provider != \"Bring your own\" else None\n build_config[\"collection_name\"].update(\n {\n \"value\": field_value[\"01_new_collection_name\"],\n \"options\": build_config[\"collection_name\"][\"options\"] + [field_value[\"01_new_collection_name\"]],\n }\n )\n build_config[\"embedding_model\"][\"show\"] = not bool(provider)\n build_config[\"embedding_model\"][\"required\"] = not bool(provider)\n build_config[\"collection_name\"][\"options_metadata\"].append(\n {\n \"records\": 0,\n \"provider\": provider,\n \"icon\": self.get_provider_icon(provider_name=provider),\n \"model\": field_value.get(\"03_embedding_generation_model\"),\n }\n )\n\n # Make sure we always show the reranker options if the collection is hybrid enabled\n # And right now they always are\n build_config[\"lexical_terms\"][\"show\"] = True\n\n def _handle_database_selection(self, build_config: dict, field_value: str) -> dict:\n \"\"\"Handle database selection and update related configurations.\"\"\"\n build_config = self.reset_database_list(build_config)\n\n # Reset collection list if database selection changes\n if field_value not in build_config[\"database_name\"][\"options\"]:\n build_config[\"database_name\"][\"value\"] = \"\"\n return build_config\n\n # Get the api endpoint for the selected database\n index = build_config[\"database_name\"][\"options\"].index(field_value)\n build_config[\"api_endpoint\"][\"options\"] = build_config[\"database_name\"][\"options_metadata\"][index][\n \"api_endpoints\"\n ]\n build_config[\"api_endpoint\"][\"value\"] = build_config[\"database_name\"][\"options_metadata\"][index][\n \"api_endpoints\"\n ][0]\n\n # Get the org_id for the selected database\n org_id = build_config[\"database_name\"][\"options_metadata\"][index][\"org_id\"]\n if not org_id:\n return build_config\n\n # Update the list of keyspaces based on the db info\n build_config[\"keyspace\"][\"options\"] = build_config[\"database_name\"][\"options_metadata\"][index][\"keyspaces\"]\n build_config[\"keyspace\"][\"value\"] = (\n build_config[\"keyspace\"][\"options\"] and build_config[\"keyspace\"][\"options\"][0]\n if build_config[\"keyspace\"][\"value\"] not in build_config[\"keyspace\"][\"options\"]\n else build_config[\"keyspace\"][\"value\"]\n )\n\n # Get the database id for the selected database\n db_id = self.get_database_id_static(api_endpoint=build_config[\"api_endpoint\"][\"value\"])\n keyspace = self.get_keyspace()\n\n # Update the helper text for the embedding provider field\n template = build_config[\"collection_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"]\n template[\"02_embedding_generation_provider\"][\"helper_text\"] = (\n \"To create collections with more embedding provider options, go to \"\n f''\n \"your database in Astra DB.\"\n )\n\n # Reset provider options\n build_config = self.reset_provider_options(build_config)\n\n # Handle hybrid search options\n build_config = self._handle_hybrid_search_options(build_config)\n\n return self.reset_collection_list(build_config)\n\n def _handle_collection_selection(self, build_config: dict, field_value: str) -> dict:\n \"\"\"Handle collection selection and update embedding options.\"\"\"\n build_config[\"autodetect_collection\"][\"value\"] = True\n build_config = self.reset_collection_list(build_config)\n\n # Reset embedding model if collection selection changes\n if field_value and field_value not in build_config[\"collection_name\"][\"options\"]:\n build_config[\"collection_name\"][\"options\"].append(field_value)\n build_config[\"collection_name\"][\"options_metadata\"].append(\n {\n \"records\": 0,\n \"provider\": None,\n \"icon\": \"vectorstores\",\n \"model\": None,\n }\n )\n build_config[\"autodetect_collection\"][\"value\"] = False\n\n if not field_value:\n return build_config\n\n # Get the selected collection index\n index = build_config[\"collection_name\"][\"options\"].index(field_value)\n\n # Set the provider of the selected collection\n provider = build_config[\"collection_name\"][\"options_metadata\"][index][\"provider\"]\n build_config[\"embedding_model\"][\"show\"] = not bool(provider)\n build_config[\"embedding_model\"][\"required\"] = not bool(provider)\n\n # Grab the collection object\n database = self.get_database_object(api_endpoint=build_config[\"api_endpoint\"][\"value\"])\n collection = database.get_collection(\n name=field_value,\n keyspace=build_config[\"keyspace\"][\"value\"],\n )\n\n # Check if hybrid and lexical are enabled\n col_options = collection.options()\n hyb_enabled = col_options.rerank and col_options.rerank.enabled\n lex_enabled = col_options.lexical and col_options.lexical.enabled\n user_hyb_enabled = build_config[\"search_method\"][\"value\"] == \"Hybrid Search\"\n\n # Reranker visible when both the collection supports it and the user selected Hybrid\n hybrid_active = bool(hyb_enabled and user_hyb_enabled)\n build_config[\"reranker\"][\"show\"] = hybrid_active\n build_config[\"reranker\"][\"toggle_value\"] = hybrid_active\n build_config[\"reranker\"][\"toggle_disable\"] = False # allow user to toggle if visible\n\n # If hybrid is active, lock search_type to \"Similarity\"\n if hybrid_active:\n build_config[\"search_type\"][\"value\"] = \"Similarity\"\n\n # Show the lexical terms option only if the collection enables lexical search\n build_config[\"lexical_terms\"][\"show\"] = bool(lex_enabled)\n\n return build_config\n\n @check_cached_vector_store\n def build_vector_store(self):\n try:\n from langchain_astradb import AstraDBVectorStore\n except ImportError as e:\n msg = (\n \"Could not import langchain Astra DB integration package. \"\n \"Please install it with `pip install langchain-astradb`.\"\n )\n raise ImportError(msg) from e\n\n # Get the embedding model and additional params\n embedding_params = {\"embedding\": self.embedding_model} if self.embedding_model else {}\n\n # Get the additional parameters\n additional_params = self.astradb_vectorstore_kwargs or {}\n\n # Get Langflow version and platform information\n __version__ = get_version_info()[\"version\"]\n langflow_prefix = \"\"\n # if os.getenv(\"AWS_EXECUTION_ENV\") == \"AWS_ECS_FARGATE\": # TODO: More precise way of detecting\n # langflow_prefix = \"ds-\"\n\n # Get the database object\n database = self.get_database_object()\n autodetect = self.collection_name in database.list_collection_names() and self.autodetect_collection\n\n # Bundle up the auto-detect parameters\n autodetect_params = {\n \"autodetect_collection\": autodetect,\n \"content_field\": (\n self.content_field\n if self.content_field and embedding_params\n else (\n \"page_content\"\n if embedding_params\n and self.collection_data(collection_name=self.collection_name, database=database) == 0\n else None\n )\n ),\n \"ignore_invalid_documents\": self.ignore_invalid_documents,\n }\n\n # Choose HybridSearchMode based on the selected param\n hybrid_search_mode = HybridSearchMode.DEFAULT if self.search_method == \"Hybrid Search\" else HybridSearchMode.OFF\n\n # Attempt to build the Vector Store object\n try:\n vector_store = AstraDBVectorStore(\n # Astra DB Authentication Parameters\n token=self.token,\n api_endpoint=database.api_endpoint,\n namespace=database.keyspace,\n collection_name=self.collection_name,\n environment=self.environment,\n # Hybrid Search Parameters\n hybrid_search=hybrid_search_mode,\n # Astra DB Usage Tracking Parameters\n ext_callers=[(f\"{langflow_prefix}langflow\", __version__)],\n # Astra DB Vector Store Parameters\n **autodetect_params,\n **embedding_params,\n **additional_params,\n )\n except Exception as e:\n msg = f\"Error initializing AstraDBVectorStore: {e}\"\n raise ValueError(msg) from e\n\n # Add documents to the vector store\n self._add_documents_to_vector_store(vector_store)\n\n return vector_store\n\n def _add_documents_to_vector_store(self, vector_store) -> None:\n self.ingest_data = self._prepare_ingest_data()\n\n documents = []\n for _input in self.ingest_data or []:\n if isinstance(_input, Data):\n documents.append(_input.to_lc_document())\n else:\n msg = \"Vector Store Inputs must be Data objects.\"\n raise TypeError(msg)\n\n documents = [\n Document(page_content=doc.page_content, metadata=serialize(doc.metadata, to_str=True)) for doc in documents\n ]\n\n if documents and self.deletion_field:\n self.log(f\"Deleting documents where {self.deletion_field}\")\n try:\n database = self.get_database_object()\n collection = database.get_collection(self.collection_name, keyspace=database.keyspace)\n delete_values = list({doc.metadata[self.deletion_field] for doc in documents})\n self.log(f\"Deleting documents where {self.deletion_field} matches {delete_values}.\")\n collection.delete_many({f\"metadata.{self.deletion_field}\": {\"$in\": delete_values}})\n except Exception as e:\n msg = f\"Error deleting documents from AstraDBVectorStore based on '{self.deletion_field}': {e}\"\n raise ValueError(msg) from e\n\n if documents:\n self.log(f\"Adding {len(documents)} documents to the Vector Store.\")\n try:\n vector_store.add_documents(documents)\n except Exception as e:\n msg = f\"Error adding documents to AstraDBVectorStore: {e}\"\n raise ValueError(msg) from e\n else:\n self.log(\"No documents to add to the Vector Store.\")\n\n def _map_search_type(self) -> str:\n search_type_mapping = {\n \"Similarity with score threshold\": \"similarity_score_threshold\",\n \"MMR (Max Marginal Relevance)\": \"mmr\",\n }\n\n return search_type_mapping.get(self.search_type, \"similarity\")\n\n def _build_search_args(self):\n # Clean up the search query\n query = self.search_query if isinstance(self.search_query, str) and self.search_query.strip() else None\n lexical_terms = self.lexical_terms or None\n\n # Check if we have a search query, and if so set the args\n if query:\n args = {\n \"query\": query,\n \"search_type\": self._map_search_type(),\n \"k\": self.number_of_results,\n \"score_threshold\": self.search_score_threshold,\n \"lexical_query\": lexical_terms,\n }\n elif self.advanced_search_filter:\n args = {\n \"n\": self.number_of_results,\n }\n else:\n return {}\n\n filter_arg = self.advanced_search_filter or {}\n if filter_arg:\n args[\"filter\"] = filter_arg\n\n return args\n\n def search_documents(self, vector_store=None) -> list[Data]:\n vector_store = vector_store or self.build_vector_store()\n\n self.log(f\"Search input: {self.search_query}\")\n self.log(f\"Search type: {self.search_type}\")\n self.log(f\"Number of results: {self.number_of_results}\")\n self.log(f\"store.hybrid_search: {vector_store.hybrid_search}\")\n self.log(f\"Lexical terms: {self.lexical_terms}\")\n self.log(f\"Reranker: {self.reranker}\")\n\n try:\n search_args = self._build_search_args()\n except Exception as e:\n msg = f\"Error in AstraDBVectorStore._build_search_args: {e}\"\n raise ValueError(msg) from e\n\n if not search_args:\n self.log(\"No search input or filters provided. Skipping search.\")\n return []\n\n docs = []\n search_method = \"search\" if \"query\" in search_args else \"metadata_search\"\n\n try:\n self.log(f\"Calling vector_store.{search_method} with args: {search_args}\")\n docs = getattr(vector_store, search_method)(**search_args)\n except Exception as e:\n msg = f\"Error performing {search_method} in AstraDBVectorStore: {e}\"\n raise ValueError(msg) from e\n\n self.log(f\"Retrieved documents: {len(docs)}\")\n\n data = docs_to_data(docs)\n self.log(f\"Converted documents to data: {len(data)}\")\n self.status = data\n\n return data\n\n def get_retriever_kwargs(self):\n search_args = self._build_search_args()\n\n return {\n \"search_type\": self._map_search_type(),\n \"search_kwargs\": search_args,\n }\n"
+ },
+ "collection_name": {
+ "_input_type": "DropdownInput",
+ "advanced": false,
+ "combobox": true,
+ "dialog_inputs": {
+ "fields": {
+ "data": {
+ "node": {
+ "description": "Please allow several seconds for creation to complete.",
+ "display_name": "Create new collection",
+ "field_order": [
+ "01_new_collection_name",
+ "02_embedding_generation_provider",
+ "03_embedding_generation_model",
+ "04_dimension"
+ ],
+ "name": "create_collection",
+ "template": {
+ "01_new_collection_name": {
+ "_input_type": "StrInput",
+ "advanced": false,
+ "display_name": "Name",
+ "dynamic": false,
+ "info": "Name of the new collection to create in Astra DB.",
+ "list": false,
+ "list_add_label": "Add More",
+ "load_from_db": false,
+ "name": "new_collection_name",
+ "placeholder": "",
+ "required": true,
+ "show": true,
+ "title_case": false,
+ "tool_mode": false,
+ "trace_as_metadata": true,
+ "type": "str",
+ "value": ""
+ },
+ "02_embedding_generation_provider": {
+ "_input_type": "DropdownInput",
+ "advanced": false,
+ "combobox": false,
+ "dialog_inputs": {},
+ "display_name": "Embedding generation method",
+ "dynamic": false,
+ "helper_text": "To create collections with more embedding provider options, go to your database in Astra DB",
+ "info": "Provider to use for generating embeddings.",
+ "name": "embedding_generation_provider",
+ "options": [],
+ "options_metadata": [],
+ "placeholder": "",
+ "real_time_refresh": true,
+ "required": true,
+ "show": true,
+ "title_case": false,
+ "toggle": false,
+ "tool_mode": false,
+ "trace_as_metadata": true,
+ "type": "str",
+ "value": ""
+ },
+ "03_embedding_generation_model": {
+ "_input_type": "DropdownInput",
+ "advanced": false,
+ "combobox": false,
+ "dialog_inputs": {},
+ "display_name": "Embedding model",
+ "dynamic": false,
+ "info": "Model to use for generating embeddings.",
+ "name": "embedding_generation_model",
+ "options": [],
+ "options_metadata": [],
+ "placeholder": "",
+ "real_time_refresh": true,
+ "required": false,
+ "show": true,
+ "title_case": false,
+ "toggle": false,
+ "tool_mode": false,
+ "trace_as_metadata": true,
+ "type": "str",
+ "value": ""
+ },
+ "04_dimension": {
+ "_input_type": "IntInput",
+ "advanced": false,
+ "display_name": "Dimensions",
+ "dynamic": false,
+ "info": "Dimensions of the embeddings to generate.",
+ "list": false,
+ "list_add_label": "Add More",
+ "name": "dimension",
+ "placeholder": "",
+ "required": false,
+ "show": true,
+ "title_case": false,
+ "tool_mode": false,
+ "trace_as_metadata": true,
+ "type": "int"
+ }
+ }
+ }
+ }
+ },
+ "functionality": "create"
+ },
+ "display_name": "Collection",
+ "dynamic": false,
+ "info": "The name of the collection within Astra DB where the vectors will be stored.",
+ "name": "collection_name",
+ "options": [],
+ "options_metadata": [],
+ "placeholder": "",
+ "real_time_refresh": true,
+ "refresh_button": true,
+ "required": true,
+ "show": false,
+ "title_case": false,
+ "toggle": false,
+ "tool_mode": false,
+ "trace_as_metadata": true,
+ "type": "str",
+ "value": ""
+ },
+ "content_field": {
+ "_input_type": "StrInput",
+ "advanced": true,
+ "display_name": "Content Field",
+ "dynamic": false,
+ "info": "Field to use as the text content field for the vector store.",
+ "list": false,
+ "list_add_label": "Add More",
+ "load_from_db": false,
+ "name": "content_field",
+ "placeholder": "",
+ "required": false,
+ "show": true,
+ "title_case": false,
+ "tool_mode": false,
+ "trace_as_metadata": true,
+ "type": "str",
+ "value": ""
+ },
+ "database_name": {
+ "_input_type": "DropdownInput",
+ "advanced": false,
+ "combobox": true,
+ "dialog_inputs": {
+ "fields": {
+ "data": {
+ "node": {
+ "description": "Please allow several minutes for creation to complete.",
+ "display_name": "Create new database",
+ "field_order": [
+ "01_new_database_name",
+ "02_cloud_provider",
+ "03_region"
+ ],
+ "name": "create_database",
+ "template": {
+ "01_new_database_name": {
+ "_input_type": "StrInput",
+ "advanced": false,
+ "display_name": "Name",
+ "dynamic": false,
+ "info": "Name of the new database to create in Astra DB.",
+ "list": false,
+ "list_add_label": "Add More",
+ "load_from_db": false,
+ "name": "new_database_name",
+ "placeholder": "",
+ "required": true,
+ "show": true,
+ "title_case": false,
+ "tool_mode": false,
+ "trace_as_metadata": true,
+ "type": "str",
+ "value": ""
+ },
+ "02_cloud_provider": {
+ "_input_type": "DropdownInput",
+ "advanced": false,
+ "combobox": false,
+ "dialog_inputs": {},
+ "display_name": "Cloud provider",
+ "dynamic": false,
+ "info": "Cloud provider for the new database.",
+ "name": "cloud_provider",
+ "options": [],
+ "options_metadata": [],
+ "placeholder": "",
+ "real_time_refresh": true,
+ "required": true,
+ "show": true,
+ "title_case": false,
+ "toggle": false,
+ "tool_mode": false,
+ "trace_as_metadata": true,
+ "type": "str",
+ "value": ""
+ },
+ "03_region": {
+ "_input_type": "DropdownInput",
+ "advanced": false,
+ "combobox": false,
+ "dialog_inputs": {},
+ "display_name": "Region",
+ "dynamic": false,
+ "info": "Region for the new database.",
+ "name": "region",
+ "options": [],
+ "options_metadata": [],
+ "placeholder": "",
+ "required": true,
+ "show": true,
+ "title_case": false,
+ "toggle": false,
+ "tool_mode": false,
+ "trace_as_metadata": true,
+ "type": "str",
+ "value": ""
+ }
+ }
+ }
+ }
+ },
+ "functionality": "create"
+ },
+ "display_name": "Database",
+ "dynamic": false,
+ "info": "The Database name for the Astra DB instance.",
+ "name": "database_name",
+ "options": [],
+ "options_metadata": [],
+ "placeholder": "",
+ "real_time_refresh": true,
+ "refresh_button": true,
+ "required": true,
+ "show": true,
+ "title_case": false,
+ "toggle": false,
+ "tool_mode": false,
+ "trace_as_metadata": true,
+ "type": "str",
+ "value": ""
+ },
+ "deletion_field": {
+ "_input_type": "StrInput",
+ "advanced": true,
+ "display_name": "Deletion Based On Field",
+ "dynamic": false,
+ "info": "When this parameter is provided, documents in the target collection with metadata field values matching the input metadata field value will be deleted before new data is loaded.",
+ "list": false,
+ "list_add_label": "Add More",
+ "load_from_db": false,
+ "name": "deletion_field",
+ "placeholder": "",
+ "required": false,
+ "show": true,
+ "title_case": false,
+ "tool_mode": false,
+ "trace_as_metadata": true,
+ "type": "str",
+ "value": ""
+ },
+ "embedding_model": {
+ "_input_type": "HandleInput",
+ "advanced": false,
+ "display_name": "Embedding Model",
+ "dynamic": false,
+ "info": "Specify the Embedding Model. Not required for Astra Vectorize collections.",
+ "input_types": [
+ "Embeddings"
+ ],
+ "list": false,
+ "list_add_label": "Add More",
+ "name": "embedding_model",
+ "placeholder": "",
+ "required": false,
+ "show": false,
+ "title_case": false,
+ "trace_as_metadata": true,
+ "type": "other",
+ "value": ""
+ },
+ "environment": {
+ "_input_type": "DropdownInput",
+ "advanced": true,
+ "combobox": true,
+ "dialog_inputs": {},
+ "display_name": "Environment",
+ "dynamic": false,
+ "info": "The environment for the Astra DB API Endpoint.",
+ "name": "environment",
+ "options": [
+ "prod",
+ "test",
+ "dev"
+ ],
+ "options_metadata": [],
+ "placeholder": "",
+ "real_time_refresh": true,
+ "required": false,
+ "show": true,
+ "title_case": false,
+ "toggle": false,
+ "tool_mode": false,
+ "trace_as_metadata": true,
+ "type": "str",
+ "value": "prod"
+ },
+ "ignore_invalid_documents": {
+ "_input_type": "BoolInput",
+ "advanced": true,
+ "display_name": "Ignore Invalid Documents",
+ "dynamic": false,
+ "info": "Boolean flag to determine whether to ignore invalid documents at runtime.",
+ "list": false,
+ "list_add_label": "Add More",
+ "name": "ignore_invalid_documents",
+ "placeholder": "",
+ "required": false,
+ "show": true,
+ "title_case": false,
+ "tool_mode": false,
+ "trace_as_metadata": true,
+ "type": "bool",
+ "value": false
+ },
+ "ingest_data": {
+ "_input_type": "HandleInput",
+ "advanced": false,
+ "display_name": "Ingest Data",
+ "dynamic": false,
+ "info": "",
+ "input_types": [
+ "Data",
+ "DataFrame"
+ ],
+ "list": true,
+ "list_add_label": "Add More",
+ "name": "ingest_data",
+ "placeholder": "",
+ "required": false,
+ "show": true,
+ "title_case": false,
+ "trace_as_metadata": true,
+ "type": "other",
+ "value": ""
+ },
+ "keyspace": {
+ "_input_type": "DropdownInput",
+ "advanced": true,
+ "combobox": false,
+ "dialog_inputs": {},
+ "display_name": "Keyspace",
+ "dynamic": false,
+ "info": "Optional keyspace within Astra DB to use for the collection.",
+ "name": "keyspace",
+ "options": [],
+ "options_metadata": [],
+ "placeholder": "",
+ "real_time_refresh": true,
+ "required": false,
+ "show": true,
+ "title_case": false,
+ "toggle": false,
+ "tool_mode": false,
+ "trace_as_metadata": true,
+ "type": "str",
+ "value": ""
+ },
+ "lexical_terms": {
+ "_input_type": "QueryInput",
+ "advanced": false,
+ "display_name": "Lexical Terms",
+ "dynamic": false,
+ "info": "Add additional terms/keywords to augment search precision.",
+ "input_types": [
+ "Message"
+ ],
+ "list": false,
+ "list_add_label": "Add More",
+ "load_from_db": false,
+ "name": "lexical_terms",
+ "placeholder": "Enter terms to search...",
+ "required": false,
+ "separator": " ",
+ "show": true,
+ "title_case": false,
+ "tool_mode": false,
+ "trace_as_input": true,
+ "trace_as_metadata": true,
+ "type": "query",
+ "value": ""
+ },
+ "number_of_results": {
+ "_input_type": "IntInput",
+ "advanced": true,
+ "display_name": "Number of Search Results",
+ "dynamic": false,
+ "info": "Number of search results to return.",
+ "list": false,
+ "list_add_label": "Add More",
+ "name": "number_of_results",
+ "placeholder": "",
+ "required": false,
+ "show": true,
+ "title_case": false,
+ "tool_mode": false,
+ "trace_as_metadata": true,
+ "type": "int",
+ "value": 4
+ },
+ "reranker": {
+ "_input_type": "DropdownInput",
+ "advanced": false,
+ "combobox": false,
+ "dialog_inputs": {},
+ "display_name": "Reranker",
+ "dynamic": false,
+ "info": "Post-retrieval model that re-scores results for optimal relevance ranking.",
+ "name": "reranker",
+ "options": [],
+ "options_metadata": [],
+ "placeholder": "",
+ "required": false,
+ "show": false,
+ "title_case": false,
+ "toggle": true,
+ "tool_mode": false,
+ "trace_as_metadata": true,
+ "type": "str",
+ "value": ""
+ },
+ "search_method": {
+ "_input_type": "DropdownInput",
+ "advanced": true,
+ "combobox": false,
+ "dialog_inputs": {},
+ "display_name": "Search Method",
+ "dynamic": false,
+ "info": "Determine how your content is matched: Vector finds semantic similarity, and Hybrid Search (suggested) combines both approaches with a reranker.",
+ "name": "search_method",
+ "options": [
+ "Hybrid Search",
+ "Vector Search"
+ ],
+ "options_metadata": [
+ {
+ "icon": "SearchHybrid"
+ },
+ {
+ "icon": "SearchVector"
+ }
+ ],
+ "placeholder": "",
+ "real_time_refresh": true,
+ "required": false,
+ "show": true,
+ "title_case": false,
+ "toggle": false,
+ "tool_mode": false,
+ "trace_as_metadata": true,
+ "type": "str",
+ "value": "Vector Search"
+ },
+ "search_query": {
+ "_input_type": "QueryInput",
+ "advanced": false,
+ "display_name": "Search Query",
+ "dynamic": false,
+ "info": "Enter a query to run a similarity search.",
+ "input_types": [
+ "Message"
+ ],
+ "list": false,
+ "list_add_label": "Add More",
+ "load_from_db": false,
+ "name": "search_query",
+ "placeholder": "Enter a query...",
+ "required": false,
+ "show": true,
+ "title_case": false,
+ "tool_mode": true,
+ "trace_as_input": true,
+ "trace_as_metadata": true,
+ "type": "query",
+ "value": ""
+ },
+ "search_score_threshold": {
+ "_input_type": "FloatInput",
+ "advanced": true,
+ "display_name": "Search Score Threshold",
+ "dynamic": false,
+ "info": "Minimum similarity score threshold for search results. (when using 'Similarity with score threshold')",
+ "list": false,
+ "list_add_label": "Add More",
+ "name": "search_score_threshold",
+ "placeholder": "",
+ "required": false,
+ "show": true,
+ "title_case": false,
+ "tool_mode": false,
+ "trace_as_metadata": true,
+ "type": "float",
+ "value": 0
+ },
+ "search_type": {
+ "_input_type": "DropdownInput",
+ "advanced": true,
+ "combobox": false,
+ "dialog_inputs": {},
+ "display_name": "Search Type",
+ "dynamic": false,
+ "info": "Search type to use",
+ "name": "search_type",
+ "options": [
+ "Similarity",
+ "Similarity with score threshold",
+ "MMR (Max Marginal Relevance)"
+ ],
+ "options_metadata": [],
+ "placeholder": "",
+ "required": false,
+ "show": true,
+ "title_case": false,
+ "toggle": false,
+ "tool_mode": false,
+ "trace_as_metadata": true,
+ "type": "str",
+ "value": "Similarity"
+ },
+ "should_cache_vector_store": {
+ "_input_type": "BoolInput",
+ "advanced": true,
+ "display_name": "Cache Vector Store",
+ "dynamic": false,
+ "info": "If True, the vector store will be cached for the current build of the component. This is useful for components that have multiple output methods and want to share the same vector store.",
+ "list": false,
+ "list_add_label": "Add More",
+ "name": "should_cache_vector_store",
+ "placeholder": "",
+ "required": false,
+ "show": true,
+ "title_case": false,
+ "tool_mode": false,
+ "trace_as_metadata": true,
+ "type": "bool",
+ "value": true
+ },
+ "token": {
+ "_input_type": "SecretStrInput",
+ "advanced": false,
+ "display_name": "Astra DB Application Token",
+ "dynamic": false,
+ "info": "Authentication token for accessing Astra DB.",
+ "input_types": [],
+ "load_from_db": true,
+ "name": "token",
+ "password": true,
+ "placeholder": "",
+ "real_time_refresh": true,
+ "required": true,
+ "show": true,
+ "title_case": false,
+ "type": "str",
+ "value": "ASTRA_DB_APPLICATION_TOKEN"
+ }
+ },
+ "tool_mode": false
+ },
+ "selected_output": "search_results",
+ "showNode": true,
+ "type": "AstraDB"
+ },
+ "dragging": false,
+ "id": "AstraDB-93cal",
+ "measured": {
+ "height": 540,
+ "width": 320
+ },
+ "position": {
+ "x": 1552.5270288197573,
+ "y": 310.92605536703144
+ },
+ "selected": false,
+ "type": "genericNode"
}
],
"viewport": {
- "x": 28.84866644052977,
- "y": 276.30129659855504,
- "zoom": 0.5265349644912217
+ "x": -29.911832824936937,
+ "y": 88.77245200098008,
+ "zoom": 0.582863818810844
}
},
"description": "Explore Hybrid Search with a vector database.",
"endpoint_name": null,
- "id": "cd58d400-fe60-47c6-b2b5-4a7d3eada7b1",
+ "id": "be9c7480-a8a2-4a12-ab32-67c1432e1504",
"is_component": false,
- "last_tested_version": "1.4.3",
+ "last_tested_version": "1.5.0.post2",
"name": "Hybrid Search RAG",
"tags": [
"openai",
diff --git a/src/backend/base/langflow/initial_setup/starter_projects/Instagram Copywriter.json b/src/backend/base/langflow/initial_setup/starter_projects/Instagram Copywriter.json
index 3fa0a918f..ba5635414 100644
--- a/src/backend/base/langflow/initial_setup/starter_projects/Instagram Copywriter.json
+++ b/src/backend/base/langflow/initial_setup/starter_projects/Instagram Copywriter.json
@@ -2208,7 +2208,7 @@
"show": true,
"title_case": false,
"type": "code",
- "value": "import json\nimport re\n\nfrom langchain_core.tools import StructuredTool\nfrom pydantic import ValidationError\n\nfrom langflow.base.agents.agent import LCToolsAgentComponent\nfrom langflow.base.agents.events import ExceptionWithMessageError\nfrom langflow.base.models.model_input_constants import (\n ALL_PROVIDER_FIELDS,\n MODEL_DYNAMIC_UPDATE_FIELDS,\n MODEL_PROVIDERS,\n MODEL_PROVIDERS_DICT,\n MODELS_METADATA,\n)\nfrom langflow.base.models.model_utils import get_model_name\nfrom langflow.components.helpers.current_date import CurrentDateComponent\nfrom langflow.components.helpers.memory import MemoryComponent\nfrom langflow.components.langchain_utilities.tool_calling import ToolCallingAgentComponent\nfrom langflow.custom.custom_component.component import _get_component_toolkit\nfrom langflow.custom.utils import update_component_build_config\nfrom langflow.field_typing import Tool\nfrom langflow.helpers.base_model import build_model_from_schema\nfrom langflow.io import BoolInput, DropdownInput, IntInput, MultilineInput, Output, TableInput\nfrom langflow.logging import logger\nfrom langflow.schema.data import Data\nfrom langflow.schema.dotdict import dotdict\nfrom langflow.schema.message import Message\nfrom langflow.schema.table import EditMode\n\n\ndef set_advanced_true(component_input):\n component_input.advanced = True\n return component_input\n\n\nMODEL_PROVIDERS_LIST = [\"Anthropic\", \"Google Generative AI\", \"Groq\", \"OpenAI\"]\n\n\nclass AgentComponent(ToolCallingAgentComponent):\n display_name: str = \"Agent\"\n description: str = \"Define the agent's instructions, then enter a task to complete using tools.\"\n documentation: str = \"https://docs.langflow.org/agents\"\n icon = \"bot\"\n beta = False\n name = \"Agent\"\n\n memory_inputs = [set_advanced_true(component_input) for component_input in MemoryComponent().inputs]\n\n # Filter out json_mode from OpenAI inputs since we handle structured output differently\n openai_inputs_filtered = [\n input_field\n for input_field in MODEL_PROVIDERS_DICT[\"OpenAI\"][\"inputs\"]\n if not (hasattr(input_field, \"name\") and input_field.name == \"json_mode\")\n ]\n\n inputs = [\n DropdownInput(\n name=\"agent_llm\",\n display_name=\"Model Provider\",\n info=\"The provider of the language model that the agent will use to generate responses.\",\n options=[*MODEL_PROVIDERS_LIST, \"Custom\"],\n value=\"OpenAI\",\n real_time_refresh=True,\n input_types=[],\n options_metadata=[MODELS_METADATA[key] for key in MODEL_PROVIDERS_LIST] + [{\"icon\": \"brain\"}],\n ),\n *openai_inputs_filtered,\n MultilineInput(\n name=\"system_prompt\",\n display_name=\"Agent Instructions\",\n info=\"System Prompt: Initial instructions and context provided to guide the agent's behavior.\",\n value=\"You are a helpful assistant that can use tools to answer questions and perform tasks.\",\n advanced=False,\n ),\n IntInput(\n name=\"n_messages\",\n display_name=\"Number of Chat History Messages\",\n value=100,\n info=\"Number of chat history messages to retrieve.\",\n advanced=True,\n show=True,\n ),\n MultilineInput(\n name=\"format_instructions\",\n display_name=\"Output Format Instructions\",\n info=\"Generic Template for structured output formatting. Valid only with Structured response.\",\n value=(\n \"You are an AI that extracts structured JSON objects from unstructured text. \"\n \"Use a predefined schema with expected types (str, int, float, bool, dict). \"\n \"Extract ALL relevant instances that match the schema - if multiple patterns exist, capture them all. \"\n \"Fill missing or ambiguous values with defaults: null for missing values. \"\n \"Remove exact duplicates but keep variations that have different field values. \"\n \"Always return valid JSON in the expected format, never throw errors. \"\n \"If multiple objects can be extracted, return them all in the structured format.\"\n ),\n advanced=True,\n ),\n TableInput(\n name=\"output_schema\",\n display_name=\"Output Schema\",\n info=(\n \"Schema Validation: Define the structure and data types for structured output. \"\n \"No validation if no output schema.\"\n ),\n advanced=True,\n required=False,\n value=[],\n table_schema=[\n {\n \"name\": \"name\",\n \"display_name\": \"Name\",\n \"type\": \"str\",\n \"description\": \"Specify the name of the output field.\",\n \"default\": \"field\",\n \"edit_mode\": EditMode.INLINE,\n },\n {\n \"name\": \"description\",\n \"display_name\": \"Description\",\n \"type\": \"str\",\n \"description\": \"Describe the purpose of the output field.\",\n \"default\": \"description of field\",\n \"edit_mode\": EditMode.POPOVER,\n },\n {\n \"name\": \"type\",\n \"display_name\": \"Type\",\n \"type\": \"str\",\n \"edit_mode\": EditMode.INLINE,\n \"description\": (\"Indicate the data type of the output field (e.g., str, int, float, bool, dict).\"),\n \"options\": [\"str\", \"int\", \"float\", \"bool\", \"dict\"],\n \"default\": \"str\",\n },\n {\n \"name\": \"multiple\",\n \"display_name\": \"As List\",\n \"type\": \"boolean\",\n \"description\": \"Set to True if this output field should be a list of the specified type.\",\n \"default\": \"False\",\n \"edit_mode\": EditMode.INLINE,\n },\n ],\n ),\n *LCToolsAgentComponent._base_inputs,\n # removed memory inputs from agent component\n # *memory_inputs,\n BoolInput(\n name=\"add_current_date_tool\",\n display_name=\"Current Date\",\n advanced=True,\n info=\"If true, will add a tool to the agent that returns the current date.\",\n value=True,\n ),\n ]\n outputs = [\n Output(name=\"response\", display_name=\"Response\", method=\"message_response\"),\n Output(name=\"structured_response\", display_name=\"Structured Response\", method=\"json_response\", tool_mode=False),\n ]\n\n async def get_agent_requirements(self):\n \"\"\"Get the agent requirements for the agent.\"\"\"\n llm_model, display_name = await self.get_llm()\n if llm_model is None:\n msg = \"No language model selected. Please choose a model to proceed.\"\n raise ValueError(msg)\n self.model_name = get_model_name(llm_model, display_name=display_name)\n\n # Get memory data\n self.chat_history = await self.get_memory_data()\n if isinstance(self.chat_history, Message):\n self.chat_history = [self.chat_history]\n\n # Add current date tool if enabled\n if self.add_current_date_tool:\n if not isinstance(self.tools, list): # type: ignore[has-type]\n self.tools = []\n current_date_tool = (await CurrentDateComponent(**self.get_base_args()).to_toolkit()).pop(0)\n if not isinstance(current_date_tool, StructuredTool):\n msg = \"CurrentDateComponent must be converted to a StructuredTool\"\n raise TypeError(msg)\n self.tools.append(current_date_tool)\n return llm_model, self.chat_history, self.tools\n\n async def message_response(self) -> Message:\n try:\n llm_model, self.chat_history, self.tools = await self.get_agent_requirements()\n # Set up and run agent\n self.set(\n llm=llm_model,\n tools=self.tools or [],\n chat_history=self.chat_history,\n input_value=self.input_value,\n system_prompt=self.system_prompt,\n )\n agent = self.create_agent_runnable()\n result = await self.run_agent(agent)\n\n # Store result for potential JSON output\n self._agent_result = result\n\n except (ValueError, TypeError, KeyError) as e:\n await logger.aerror(f\"{type(e).__name__}: {e!s}\")\n raise\n except ExceptionWithMessageError as e:\n await logger.aerror(f\"ExceptionWithMessageError occurred: {e}\")\n raise\n # Avoid catching blind Exception; let truly unexpected exceptions propagate\n except Exception as e:\n await logger.aerror(f\"Unexpected error: {e!s}\")\n raise\n else:\n return result\n\n def _preprocess_schema(self, schema):\n \"\"\"Preprocess schema to ensure correct data types for build_model_from_schema.\"\"\"\n processed_schema = []\n for field in schema:\n processed_field = {\n \"name\": str(field.get(\"name\", \"field\")),\n \"type\": str(field.get(\"type\", \"str\")),\n \"description\": str(field.get(\"description\", \"\")),\n \"multiple\": field.get(\"multiple\", False),\n }\n # Ensure multiple is handled correctly\n if isinstance(processed_field[\"multiple\"], str):\n processed_field[\"multiple\"] = processed_field[\"multiple\"].lower() in [\"true\", \"1\", \"t\", \"y\", \"yes\"]\n processed_schema.append(processed_field)\n return processed_schema\n\n async def build_structured_output_base(self, content: str):\n \"\"\"Build structured output with optional BaseModel validation.\"\"\"\n json_pattern = r\"\\{.*\\}\"\n schema_error_msg = \"Try setting an output schema\"\n\n # Try to parse content as JSON first\n json_data = None\n try:\n json_data = json.loads(content)\n except json.JSONDecodeError:\n json_match = re.search(json_pattern, content, re.DOTALL)\n if json_match:\n try:\n json_data = json.loads(json_match.group())\n except json.JSONDecodeError:\n return {\"content\": content, \"error\": schema_error_msg}\n else:\n return {\"content\": content, \"error\": schema_error_msg}\n\n # If no output schema provided, return parsed JSON without validation\n if not hasattr(self, \"output_schema\") or not self.output_schema or len(self.output_schema) == 0:\n return json_data\n\n # Use BaseModel validation with schema\n try:\n processed_schema = self._preprocess_schema(self.output_schema)\n output_model = build_model_from_schema(processed_schema)\n\n # Validate against the schema\n if isinstance(json_data, list):\n # Multiple objects\n validated_objects = []\n for item in json_data:\n try:\n validated_obj = output_model.model_validate(item)\n validated_objects.append(validated_obj.model_dump())\n except ValidationError as e:\n await logger.aerror(f\"Validation error for item: {e}\")\n # Include invalid items with error info\n validated_objects.append({\"data\": item, \"validation_error\": str(e)})\n return validated_objects\n\n # Single object\n try:\n validated_obj = output_model.model_validate(json_data)\n return [validated_obj.model_dump()] # Return as list for consistency\n except ValidationError as e:\n await logger.aerror(f\"Validation error: {e}\")\n return [{\"data\": json_data, \"validation_error\": str(e)}]\n\n except (TypeError, ValueError) as e:\n await logger.aerror(f\"Error building structured output: {e}\")\n # Fallback to parsed JSON without validation\n return json_data\n\n async def json_response(self) -> Data:\n \"\"\"Convert agent response to structured JSON Data output with schema validation.\"\"\"\n # Always use structured chat agent for JSON response mode for better JSON formatting\n try:\n system_components = []\n\n # 1. Agent Instructions (system_prompt)\n agent_instructions = getattr(self, \"system_prompt\", \"\") or \"\"\n if agent_instructions:\n system_components.append(f\"{agent_instructions}\")\n\n # 2. Format Instructions\n format_instructions = getattr(self, \"format_instructions\", \"\") or \"\"\n if format_instructions:\n system_components.append(f\"Format instructions: {format_instructions}\")\n\n # 3. Schema Information from BaseModel\n if hasattr(self, \"output_schema\") and self.output_schema and len(self.output_schema) > 0:\n try:\n processed_schema = self._preprocess_schema(self.output_schema)\n output_model = build_model_from_schema(processed_schema)\n schema_dict = output_model.model_json_schema()\n schema_info = (\n \"You are given some text that may include format instructions, \"\n \"explanations, or other content alongside a JSON schema.\\n\\n\"\n \"Your task:\\n\"\n \"- Extract only the JSON schema.\\n\"\n \"- Return it as valid JSON.\\n\"\n \"- Do not include format instructions, explanations, or extra text.\\n\\n\"\n \"Input:\\n\"\n f\"{json.dumps(schema_dict, indent=2)}\\n\\n\"\n \"Output (only JSON schema):\"\n )\n system_components.append(schema_info)\n except (ValidationError, ValueError, TypeError, KeyError) as e:\n await logger.aerror(f\"Could not build schema for prompt: {e}\", exc_info=True)\n\n # Combine all components\n combined_instructions = \"\\n\\n\".join(system_components) if system_components else \"\"\n llm_model, self.chat_history, self.tools = await self.get_agent_requirements()\n self.set(\n llm=llm_model,\n tools=self.tools or [],\n chat_history=self.chat_history,\n input_value=self.input_value,\n system_prompt=combined_instructions,\n )\n\n # Create and run structured chat agent\n try:\n structured_agent = self.create_agent_runnable()\n except (NotImplementedError, ValueError, TypeError) as e:\n await logger.aerror(f\"Error with structured chat agent: {e}\")\n raise\n try:\n result = await self.run_agent(structured_agent)\n except (ExceptionWithMessageError, ValueError, TypeError, RuntimeError) as e:\n await logger.aerror(f\"Error with structured agent result: {e}\")\n raise\n # Extract content from structured agent result\n if hasattr(result, \"content\"):\n content = result.content\n elif hasattr(result, \"text\"):\n content = result.text\n else:\n content = str(result)\n\n except (ExceptionWithMessageError, ValueError, TypeError, NotImplementedError, AttributeError) as e:\n await logger.aerror(f\"Error with structured chat agent: {e}\")\n # Fallback to regular agent\n content_str = \"No content returned from agent\"\n return Data(data={\"content\": content_str, \"error\": str(e)})\n\n # Process with structured output validation\n try:\n structured_output = await self.build_structured_output_base(content)\n\n # Handle different output formats\n if isinstance(structured_output, list) and structured_output:\n if len(structured_output) == 1:\n return Data(data=structured_output[0])\n return Data(data={\"results\": structured_output})\n if isinstance(structured_output, dict):\n return Data(data=structured_output)\n return Data(data={\"content\": content})\n\n except (ValueError, TypeError) as e:\n await logger.aerror(f\"Error in structured output processing: {e}\")\n return Data(data={\"content\": content, \"error\": str(e)})\n\n async def get_memory_data(self):\n # TODO: This is a temporary fix to avoid message duplication. We should develop a function for this.\n messages = (\n await MemoryComponent(**self.get_base_args())\n .set(session_id=self.graph.session_id, order=\"Ascending\", n_messages=self.n_messages)\n .retrieve_messages()\n )\n return [\n message for message in messages if getattr(message, \"id\", None) != getattr(self.input_value, \"id\", None)\n ]\n\n async def get_llm(self):\n if not isinstance(self.agent_llm, str):\n return self.agent_llm, None\n\n try:\n provider_info = MODEL_PROVIDERS_DICT.get(self.agent_llm)\n if not provider_info:\n msg = f\"Invalid model provider: {self.agent_llm}\"\n raise ValueError(msg)\n\n component_class = provider_info.get(\"component_class\")\n display_name = component_class.display_name\n inputs = provider_info.get(\"inputs\")\n prefix = provider_info.get(\"prefix\", \"\")\n\n return self._build_llm_model(component_class, inputs, prefix), display_name\n\n except (AttributeError, ValueError, TypeError, RuntimeError) as e:\n await logger.aerror(f\"Error building {self.agent_llm} language model: {e!s}\")\n msg = f\"Failed to initialize language model: {e!s}\"\n raise ValueError(msg) from e\n\n def _build_llm_model(self, component, inputs, prefix=\"\"):\n model_kwargs = {}\n for input_ in inputs:\n if hasattr(self, f\"{prefix}{input_.name}\"):\n model_kwargs[input_.name] = getattr(self, f\"{prefix}{input_.name}\")\n return component.set(**model_kwargs).build_model()\n\n def set_component_params(self, component):\n provider_info = MODEL_PROVIDERS_DICT.get(self.agent_llm)\n if provider_info:\n inputs = provider_info.get(\"inputs\")\n prefix = provider_info.get(\"prefix\")\n # Filter out json_mode and only use attributes that exist on this component\n model_kwargs = {}\n for input_ in inputs:\n if hasattr(self, f\"{prefix}{input_.name}\"):\n model_kwargs[input_.name] = getattr(self, f\"{prefix}{input_.name}\")\n\n return component.set(**model_kwargs)\n return component\n\n def delete_fields(self, build_config: dotdict, fields: dict | list[str]) -> None:\n \"\"\"Delete specified fields from build_config.\"\"\"\n for field in fields:\n build_config.pop(field, None)\n\n def update_input_types(self, build_config: dotdict) -> dotdict:\n \"\"\"Update input types for all fields in build_config.\"\"\"\n for key, value in build_config.items():\n if isinstance(value, dict):\n if value.get(\"input_types\") is None:\n build_config[key][\"input_types\"] = []\n elif hasattr(value, \"input_types\") and value.input_types is None:\n value.input_types = []\n return build_config\n\n async def update_build_config(\n self, build_config: dotdict, field_value: str, field_name: str | None = None\n ) -> dotdict:\n # Iterate over all providers in the MODEL_PROVIDERS_DICT\n # Existing logic for updating build_config\n if field_name in (\"agent_llm\",):\n build_config[\"agent_llm\"][\"value\"] = field_value\n provider_info = MODEL_PROVIDERS_DICT.get(field_value)\n if provider_info:\n component_class = provider_info.get(\"component_class\")\n if component_class and hasattr(component_class, \"update_build_config\"):\n # Call the component class's update_build_config method\n build_config = await update_component_build_config(\n component_class, build_config, field_value, \"model_name\"\n )\n\n provider_configs: dict[str, tuple[dict, list[dict]]] = {\n provider: (\n MODEL_PROVIDERS_DICT[provider][\"fields\"],\n [\n MODEL_PROVIDERS_DICT[other_provider][\"fields\"]\n for other_provider in MODEL_PROVIDERS_DICT\n if other_provider != provider\n ],\n )\n for provider in MODEL_PROVIDERS_DICT\n }\n if field_value in provider_configs:\n fields_to_add, fields_to_delete = provider_configs[field_value]\n\n # Delete fields from other providers\n for fields in fields_to_delete:\n self.delete_fields(build_config, fields)\n\n # Add provider-specific fields\n if field_value == \"OpenAI\" and not any(field in build_config for field in fields_to_add):\n build_config.update(fields_to_add)\n else:\n build_config.update(fields_to_add)\n # Reset input types for agent_llm\n build_config[\"agent_llm\"][\"input_types\"] = []\n elif field_value == \"Custom\":\n # Delete all provider fields\n self.delete_fields(build_config, ALL_PROVIDER_FIELDS)\n # Update with custom component\n custom_component = DropdownInput(\n name=\"agent_llm\",\n display_name=\"Language Model\",\n options=[*sorted(MODEL_PROVIDERS), \"Custom\"],\n value=\"Custom\",\n real_time_refresh=True,\n input_types=[\"LanguageModel\"],\n options_metadata=[MODELS_METADATA[key] for key in sorted(MODELS_METADATA.keys())]\n + [{\"icon\": \"brain\"}],\n )\n build_config.update({\"agent_llm\": custom_component.to_dict()})\n # Update input types for all fields\n build_config = self.update_input_types(build_config)\n\n # Validate required keys\n default_keys = [\n \"code\",\n \"_type\",\n \"agent_llm\",\n \"tools\",\n \"input_value\",\n \"add_current_date_tool\",\n \"system_prompt\",\n \"agent_description\",\n \"max_iterations\",\n \"handle_parsing_errors\",\n \"verbose\",\n ]\n missing_keys = [key for key in default_keys if key not in build_config]\n if missing_keys:\n msg = f\"Missing required keys in build_config: {missing_keys}\"\n raise ValueError(msg)\n if (\n isinstance(self.agent_llm, str)\n and self.agent_llm in MODEL_PROVIDERS_DICT\n and field_name in MODEL_DYNAMIC_UPDATE_FIELDS\n ):\n provider_info = MODEL_PROVIDERS_DICT.get(self.agent_llm)\n if provider_info:\n component_class = provider_info.get(\"component_class\")\n component_class = self.set_component_params(component_class)\n prefix = provider_info.get(\"prefix\")\n if component_class and hasattr(component_class, \"update_build_config\"):\n # Call each component class's update_build_config method\n # remove the prefix from the field_name\n if isinstance(field_name, str) and isinstance(prefix, str):\n field_name = field_name.replace(prefix, \"\")\n build_config = await update_component_build_config(\n component_class, build_config, field_value, \"model_name\"\n )\n return dotdict({k: v.to_dict() if hasattr(v, \"to_dict\") else v for k, v in build_config.items()})\n\n async def _get_tools(self) -> list[Tool]:\n component_toolkit = _get_component_toolkit()\n tools_names = self._build_tools_names()\n agent_description = self.get_tool_description()\n # TODO: Agent Description Depreciated Feature to be removed\n description = f\"{agent_description}{tools_names}\"\n tools = component_toolkit(component=self).get_tools(\n tool_name=\"Call_Agent\", tool_description=description, callbacks=self.get_langchain_callbacks()\n )\n if hasattr(self, \"tools_metadata\"):\n tools = component_toolkit(component=self, metadata=self.tools_metadata).update_tools_metadata(tools=tools)\n return tools\n"
+ "value": "import json\nimport re\n\nfrom langchain_core.tools import StructuredTool\nfrom pydantic import ValidationError\n\nfrom langflow.base.agents.agent import LCToolsAgentComponent\nfrom langflow.base.agents.events import ExceptionWithMessageError\nfrom langflow.base.models.model_input_constants import (\n ALL_PROVIDER_FIELDS,\n MODEL_DYNAMIC_UPDATE_FIELDS,\n MODEL_PROVIDERS,\n MODEL_PROVIDERS_DICT,\n MODELS_METADATA,\n)\nfrom langflow.base.models.model_utils import get_model_name\nfrom langflow.components.helpers.current_date import CurrentDateComponent\nfrom langflow.components.helpers.memory import MemoryComponent\nfrom langflow.components.langchain_utilities.tool_calling import ToolCallingAgentComponent\nfrom langflow.custom.custom_component.component import _get_component_toolkit\nfrom langflow.custom.utils import update_component_build_config\nfrom langflow.field_typing import Tool\nfrom langflow.helpers.base_model import build_model_from_schema\nfrom langflow.io import BoolInput, DropdownInput, IntInput, MultilineInput, Output, TableInput\nfrom langflow.logging import logger\nfrom langflow.schema.data import Data\nfrom langflow.schema.dotdict import dotdict\nfrom langflow.schema.message import Message\nfrom langflow.schema.table import EditMode\n\n\ndef set_advanced_true(component_input):\n component_input.advanced = True\n return component_input\n\n\nMODEL_PROVIDERS_LIST = [\"Anthropic\", \"Google Generative AI\", \"Groq\", \"OpenAI\"]\n\n\nclass AgentComponent(ToolCallingAgentComponent):\n display_name: str = \"Agent\"\n description: str = \"Define the agent's instructions, then enter a task to complete using tools.\"\n documentation: str = \"https://docs.langflow.org/agents\"\n icon = \"bot\"\n beta = False\n name = \"Agent\"\n\n memory_inputs = [set_advanced_true(component_input) for component_input in MemoryComponent().inputs]\n\n # Filter out json_mode from OpenAI inputs since we handle structured output differently\n openai_inputs_filtered = [\n input_field\n for input_field in MODEL_PROVIDERS_DICT[\"OpenAI\"][\"inputs\"]\n if not (hasattr(input_field, \"name\") and input_field.name == \"json_mode\")\n ]\n\n inputs = [\n DropdownInput(\n name=\"agent_llm\",\n display_name=\"Model Provider\",\n info=\"The provider of the language model that the agent will use to generate responses.\",\n options=[*MODEL_PROVIDERS_LIST, \"Custom\"],\n value=\"OpenAI\",\n real_time_refresh=True,\n input_types=[],\n options_metadata=[MODELS_METADATA[key] for key in MODEL_PROVIDERS_LIST] + [{\"icon\": \"brain\"}],\n ),\n *openai_inputs_filtered,\n MultilineInput(\n name=\"system_prompt\",\n display_name=\"Agent Instructions\",\n info=\"System Prompt: Initial instructions and context provided to guide the agent's behavior.\",\n value=\"You are a helpful assistant that can use tools to answer questions and perform tasks.\",\n advanced=False,\n ),\n IntInput(\n name=\"n_messages\",\n display_name=\"Number of Chat History Messages\",\n value=100,\n info=\"Number of chat history messages to retrieve.\",\n advanced=True,\n show=True,\n ),\n MultilineInput(\n name=\"format_instructions\",\n display_name=\"Output Format Instructions\",\n info=\"Generic Template for structured output formatting. Valid only with Structured response.\",\n value=(\n \"You are an AI that extracts structured JSON objects from unstructured text. \"\n \"Use a predefined schema with expected types (str, int, float, bool, dict). \"\n \"Extract ALL relevant instances that match the schema - if multiple patterns exist, capture them all. \"\n \"Fill missing or ambiguous values with defaults: null for missing values. \"\n \"Remove exact duplicates but keep variations that have different field values. \"\n \"Always return valid JSON in the expected format, never throw errors. \"\n \"If multiple objects can be extracted, return them all in the structured format.\"\n ),\n advanced=True,\n ),\n TableInput(\n name=\"output_schema\",\n display_name=\"Output Schema\",\n info=(\n \"Schema Validation: Define the structure and data types for structured output. \"\n \"No validation if no output schema.\"\n ),\n advanced=True,\n required=False,\n value=[],\n table_schema=[\n {\n \"name\": \"name\",\n \"display_name\": \"Name\",\n \"type\": \"str\",\n \"description\": \"Specify the name of the output field.\",\n \"default\": \"field\",\n \"edit_mode\": EditMode.INLINE,\n },\n {\n \"name\": \"description\",\n \"display_name\": \"Description\",\n \"type\": \"str\",\n \"description\": \"Describe the purpose of the output field.\",\n \"default\": \"description of field\",\n \"edit_mode\": EditMode.POPOVER,\n },\n {\n \"name\": \"type\",\n \"display_name\": \"Type\",\n \"type\": \"str\",\n \"edit_mode\": EditMode.INLINE,\n \"description\": (\"Indicate the data type of the output field (e.g., str, int, float, bool, dict).\"),\n \"options\": [\"str\", \"int\", \"float\", \"bool\", \"dict\"],\n \"default\": \"str\",\n },\n {\n \"name\": \"multiple\",\n \"display_name\": \"As List\",\n \"type\": \"boolean\",\n \"description\": \"Set to True if this output field should be a list of the specified type.\",\n \"default\": \"False\",\n \"edit_mode\": EditMode.INLINE,\n },\n ],\n ),\n *LCToolsAgentComponent._base_inputs,\n # removed memory inputs from agent component\n # *memory_inputs,\n BoolInput(\n name=\"add_current_date_tool\",\n display_name=\"Current Date\",\n advanced=True,\n info=\"If true, will add a tool to the agent that returns the current date.\",\n value=True,\n ),\n ]\n outputs = [\n Output(name=\"response\", display_name=\"Response\", method=\"message_response\"),\n Output(name=\"structured_response\", display_name=\"Structured Response\", method=\"json_response\", tool_mode=False),\n ]\n\n async def get_agent_requirements(self):\n \"\"\"Get the agent requirements for the agent.\"\"\"\n llm_model, display_name = await self.get_llm()\n if llm_model is None:\n msg = \"No language model selected. Please choose a model to proceed.\"\n raise ValueError(msg)\n self.model_name = get_model_name(llm_model, display_name=display_name)\n\n # Get memory data\n self.chat_history = await self.get_memory_data()\n if isinstance(self.chat_history, Message):\n self.chat_history = [self.chat_history]\n\n # Add current date tool if enabled\n if self.add_current_date_tool:\n if not isinstance(self.tools, list): # type: ignore[has-type]\n self.tools = []\n current_date_tool = (await CurrentDateComponent(**self.get_base_args()).to_toolkit()).pop(0)\n if not isinstance(current_date_tool, StructuredTool):\n msg = \"CurrentDateComponent must be converted to a StructuredTool\"\n raise TypeError(msg)\n self.tools.append(current_date_tool)\n return llm_model, self.chat_history, self.tools\n\n async def message_response(self) -> Message:\n try:\n llm_model, self.chat_history, self.tools = await self.get_agent_requirements()\n # Set up and run agent\n self.set(\n llm=llm_model,\n tools=self.tools or [],\n chat_history=self.chat_history,\n input_value=self.input_value,\n system_prompt=self.system_prompt,\n )\n agent = self.create_agent_runnable()\n result = await self.run_agent(agent)\n\n # Store result for potential JSON output\n self._agent_result = result\n\n except (ValueError, TypeError, KeyError) as e:\n await logger.aerror(f\"{type(e).__name__}: {e!s}\")\n raise\n except ExceptionWithMessageError as e:\n await logger.aerror(f\"ExceptionWithMessageError occurred: {e}\")\n raise\n # Avoid catching blind Exception; let truly unexpected exceptions propagate\n except Exception as e:\n await logger.aerror(f\"Unexpected error: {e!s}\")\n raise\n else:\n return result\n\n def _preprocess_schema(self, schema):\n \"\"\"Preprocess schema to ensure correct data types for build_model_from_schema.\"\"\"\n processed_schema = []\n for field in schema:\n processed_field = {\n \"name\": str(field.get(\"name\", \"field\")),\n \"type\": str(field.get(\"type\", \"str\")),\n \"description\": str(field.get(\"description\", \"\")),\n \"multiple\": field.get(\"multiple\", False),\n }\n # Ensure multiple is handled correctly\n if isinstance(processed_field[\"multiple\"], str):\n processed_field[\"multiple\"] = processed_field[\"multiple\"].lower() in [\"true\", \"1\", \"t\", \"y\", \"yes\"]\n processed_schema.append(processed_field)\n return processed_schema\n\n async def build_structured_output_base(self, content: str):\n \"\"\"Build structured output with optional BaseModel validation.\"\"\"\n json_pattern = r\"\\{.*\\}\"\n schema_error_msg = \"Try setting an output schema\"\n\n # Try to parse content as JSON first\n json_data = None\n try:\n json_data = json.loads(content)\n except json.JSONDecodeError:\n json_match = re.search(json_pattern, content, re.DOTALL)\n if json_match:\n try:\n json_data = json.loads(json_match.group())\n except json.JSONDecodeError:\n return {\"content\": content, \"error\": schema_error_msg}\n else:\n return {\"content\": content, \"error\": schema_error_msg}\n\n # If no output schema provided, return parsed JSON without validation\n if not hasattr(self, \"output_schema\") or not self.output_schema or len(self.output_schema) == 0:\n return json_data\n\n # Use BaseModel validation with schema\n try:\n processed_schema = self._preprocess_schema(self.output_schema)\n output_model = build_model_from_schema(processed_schema)\n\n # Validate against the schema\n if isinstance(json_data, list):\n # Multiple objects\n validated_objects = []\n for item in json_data:\n try:\n validated_obj = output_model.model_validate(item)\n validated_objects.append(validated_obj.model_dump())\n except ValidationError as e:\n await logger.aerror(f\"Validation error for item: {e}\")\n # Include invalid items with error info\n validated_objects.append({\"data\": item, \"validation_error\": str(e)})\n return validated_objects\n\n # Single object\n try:\n validated_obj = output_model.model_validate(json_data)\n return [validated_obj.model_dump()] # Return as list for consistency\n except ValidationError as e:\n await logger.aerror(f\"Validation error: {e}\")\n return [{\"data\": json_data, \"validation_error\": str(e)}]\n\n except (TypeError, ValueError) as e:\n await logger.aerror(f\"Error building structured output: {e}\")\n # Fallback to parsed JSON without validation\n return json_data\n\n async def json_response(self) -> Data:\n \"\"\"Convert agent response to structured JSON Data output with schema validation.\"\"\"\n # Always use structured chat agent for JSON response mode for better JSON formatting\n try:\n system_components = []\n\n # 1. Agent Instructions (system_prompt)\n agent_instructions = getattr(self, \"system_prompt\", \"\") or \"\"\n if agent_instructions:\n system_components.append(f\"{agent_instructions}\")\n\n # 2. Format Instructions\n format_instructions = getattr(self, \"format_instructions\", \"\") or \"\"\n if format_instructions:\n system_components.append(f\"Format instructions: {format_instructions}\")\n\n # 3. Schema Information from BaseModel\n if hasattr(self, \"output_schema\") and self.output_schema and len(self.output_schema) > 0:\n try:\n processed_schema = self._preprocess_schema(self.output_schema)\n output_model = build_model_from_schema(processed_schema)\n schema_dict = output_model.model_json_schema()\n schema_info = (\n \"You are given some text that may include format instructions, \"\n \"explanations, or other content alongside a JSON schema.\\n\\n\"\n \"Your task:\\n\"\n \"- Extract only the JSON schema.\\n\"\n \"- Return it as valid JSON.\\n\"\n \"- Do not include format instructions, explanations, or extra text.\\n\\n\"\n \"Input:\\n\"\n f\"{json.dumps(schema_dict, indent=2)}\\n\\n\"\n \"Output (only JSON schema):\"\n )\n system_components.append(schema_info)\n except (ValidationError, ValueError, TypeError, KeyError) as e:\n await logger.aerror(f\"Could not build schema for prompt: {e}\", exc_info=True)\n\n # Combine all components\n combined_instructions = \"\\n\\n\".join(system_components) if system_components else \"\"\n llm_model, self.chat_history, self.tools = await self.get_agent_requirements()\n self.set(\n llm=llm_model,\n tools=self.tools or [],\n chat_history=self.chat_history,\n input_value=self.input_value,\n system_prompt=combined_instructions,\n )\n\n # Create and run structured chat agent\n try:\n structured_agent = self.create_agent_runnable()\n except (NotImplementedError, ValueError, TypeError) as e:\n await logger.aerror(f\"Error with structured chat agent: {e}\")\n raise\n try:\n result = await self.run_agent(structured_agent)\n except (ExceptionWithMessageError, ValueError, TypeError, RuntimeError) as e:\n await logger.aerror(f\"Error with structured agent result: {e}\")\n raise\n # Extract content from structured agent result\n if hasattr(result, \"content\"):\n content = result.content\n elif hasattr(result, \"text\"):\n content = result.text\n else:\n content = str(result)\n\n except (ExceptionWithMessageError, ValueError, TypeError, NotImplementedError, AttributeError) as e:\n await logger.aerror(f\"Error with structured chat agent: {e}\")\n # Fallback to regular agent\n content_str = \"No content returned from agent\"\n return Data(data={\"content\": content_str, \"error\": str(e)})\n\n # Process with structured output validation\n try:\n structured_output = await self.build_structured_output_base(content)\n\n # Handle different output formats\n if isinstance(structured_output, list) and structured_output:\n if len(structured_output) == 1:\n return Data(data=structured_output[0])\n return Data(data={\"results\": structured_output})\n if isinstance(structured_output, dict):\n return Data(data=structured_output)\n return Data(data={\"content\": content})\n\n except (ValueError, TypeError) as e:\n await logger.aerror(f\"Error in structured output processing: {e}\")\n return Data(data={\"content\": content, \"error\": str(e)})\n\n async def get_memory_data(self):\n # TODO: This is a temporary fix to avoid message duplication. We should develop a function for this.\n messages = (\n await MemoryComponent(**self.get_base_args())\n .set(session_id=self.graph.session_id, order=\"Ascending\", n_messages=self.n_messages)\n .retrieve_messages()\n )\n return [\n message for message in messages if getattr(message, \"id\", None) != getattr(self.input_value, \"id\", None)\n ]\n\n async def get_llm(self):\n if not isinstance(self.agent_llm, str):\n return self.agent_llm, None\n\n try:\n provider_info = MODEL_PROVIDERS_DICT.get(self.agent_llm)\n if not provider_info:\n msg = f\"Invalid model provider: {self.agent_llm}\"\n raise ValueError(msg)\n\n component_class = provider_info.get(\"component_class\")\n display_name = component_class.display_name\n inputs = provider_info.get(\"inputs\")\n prefix = provider_info.get(\"prefix\", \"\")\n\n return self._build_llm_model(component_class, inputs, prefix), display_name\n\n except (AttributeError, ValueError, TypeError, RuntimeError) as e:\n await logger.aerror(f\"Error building {self.agent_llm} language model: {e!s}\")\n msg = f\"Failed to initialize language model: {e!s}\"\n raise ValueError(msg) from e\n\n def _build_llm_model(self, component, inputs, prefix=\"\"):\n model_kwargs = {}\n for input_ in inputs:\n if hasattr(self, f\"{prefix}{input_.name}\"):\n model_kwargs[input_.name] = getattr(self, f\"{prefix}{input_.name}\")\n return component.set(**model_kwargs).build_model()\n\n def set_component_params(self, component):\n provider_info = MODEL_PROVIDERS_DICT.get(self.agent_llm)\n if provider_info:\n inputs = provider_info.get(\"inputs\")\n prefix = provider_info.get(\"prefix\")\n # Filter out json_mode and only use attributes that exist on this component\n model_kwargs = {}\n for input_ in inputs:\n if hasattr(self, f\"{prefix}{input_.name}\"):\n model_kwargs[input_.name] = getattr(self, f\"{prefix}{input_.name}\")\n\n return component.set(**model_kwargs)\n return component\n\n def delete_fields(self, build_config: dotdict, fields: dict | list[str]) -> None:\n \"\"\"Delete specified fields from build_config.\"\"\"\n for field in fields:\n build_config.pop(field, None)\n\n def update_input_types(self, build_config: dotdict) -> dotdict:\n \"\"\"Update input types for all fields in build_config.\"\"\"\n for key, value in build_config.items():\n if isinstance(value, dict):\n if value.get(\"input_types\") is None:\n build_config[key][\"input_types\"] = []\n elif hasattr(value, \"input_types\") and value.input_types is None:\n value.input_types = []\n return build_config\n\n async def update_build_config(\n self, build_config: dotdict, field_value: str, field_name: str | None = None\n ) -> dotdict:\n # Iterate over all providers in the MODEL_PROVIDERS_DICT\n # Existing logic for updating build_config\n if field_name in (\"agent_llm\",):\n build_config[\"agent_llm\"][\"value\"] = field_value\n provider_info = MODEL_PROVIDERS_DICT.get(field_value)\n if provider_info:\n component_class = provider_info.get(\"component_class\")\n if component_class and hasattr(component_class, \"update_build_config\"):\n # Call the component class's update_build_config method\n build_config = await update_component_build_config(\n component_class, build_config, field_value, \"model_name\"\n )\n\n provider_configs: dict[str, tuple[dict, list[dict]]] = {\n provider: (\n MODEL_PROVIDERS_DICT[provider][\"fields\"],\n [\n MODEL_PROVIDERS_DICT[other_provider][\"fields\"]\n for other_provider in MODEL_PROVIDERS_DICT\n if other_provider != provider\n ],\n )\n for provider in MODEL_PROVIDERS_DICT\n }\n if field_value in provider_configs:\n fields_to_add, fields_to_delete = provider_configs[field_value]\n\n # Delete fields from other providers\n for fields in fields_to_delete:\n self.delete_fields(build_config, fields)\n\n # Add provider-specific fields\n if field_value == \"OpenAI\" and not any(field in build_config for field in fields_to_add):\n build_config.update(fields_to_add)\n else:\n build_config.update(fields_to_add)\n # Reset input types for agent_llm\n build_config[\"agent_llm\"][\"input_types\"] = []\n build_config[\"agent_llm\"][\"display_name\"] = \"Model Provider\"\n elif field_value == \"Custom\":\n # Delete all provider fields\n self.delete_fields(build_config, ALL_PROVIDER_FIELDS)\n # Update with custom component\n custom_component = DropdownInput(\n name=\"agent_llm\",\n display_name=\"Language Model\",\n options=[*sorted(MODEL_PROVIDERS), \"Custom\"],\n value=\"Custom\",\n real_time_refresh=True,\n input_types=[\"LanguageModel\"],\n options_metadata=[MODELS_METADATA[key] for key in sorted(MODELS_METADATA.keys())]\n + [{\"icon\": \"brain\"}],\n )\n build_config.update({\"agent_llm\": custom_component.to_dict()})\n # Update input types for all fields\n build_config = self.update_input_types(build_config)\n\n # Validate required keys\n default_keys = [\n \"code\",\n \"_type\",\n \"agent_llm\",\n \"tools\",\n \"input_value\",\n \"add_current_date_tool\",\n \"system_prompt\",\n \"agent_description\",\n \"max_iterations\",\n \"handle_parsing_errors\",\n \"verbose\",\n ]\n missing_keys = [key for key in default_keys if key not in build_config]\n if missing_keys:\n msg = f\"Missing required keys in build_config: {missing_keys}\"\n raise ValueError(msg)\n if (\n isinstance(self.agent_llm, str)\n and self.agent_llm in MODEL_PROVIDERS_DICT\n and field_name in MODEL_DYNAMIC_UPDATE_FIELDS\n ):\n provider_info = MODEL_PROVIDERS_DICT.get(self.agent_llm)\n if provider_info:\n component_class = provider_info.get(\"component_class\")\n component_class = self.set_component_params(component_class)\n prefix = provider_info.get(\"prefix\")\n if component_class and hasattr(component_class, \"update_build_config\"):\n # Call each component class's update_build_config method\n # remove the prefix from the field_name\n if isinstance(field_name, str) and isinstance(prefix, str):\n field_name = field_name.replace(prefix, \"\")\n build_config = await update_component_build_config(\n component_class, build_config, field_value, \"model_name\"\n )\n return dotdict({k: v.to_dict() if hasattr(v, \"to_dict\") else v for k, v in build_config.items()})\n\n async def _get_tools(self) -> list[Tool]:\n component_toolkit = _get_component_toolkit()\n tools_names = self._build_tools_names()\n agent_description = self.get_tool_description()\n # TODO: Agent Description Depreciated Feature to be removed\n description = f\"{agent_description}{tools_names}\"\n tools = component_toolkit(component=self).get_tools(\n tool_name=\"Call_Agent\", tool_description=description, callbacks=self.get_langchain_callbacks()\n )\n if hasattr(self, \"tools_metadata\"):\n tools = component_toolkit(component=self, metadata=self.tools_metadata).update_tools_metadata(tools=tools)\n return tools\n"
},
"handle_parsing_errors": {
"_input_type": "BoolInput",
diff --git a/src/backend/base/langflow/initial_setup/starter_projects/Invoice Summarizer.json b/src/backend/base/langflow/initial_setup/starter_projects/Invoice Summarizer.json
index 109be1847..51c7c1ac5 100644
--- a/src/backend/base/langflow/initial_setup/starter_projects/Invoice Summarizer.json
+++ b/src/backend/base/langflow/initial_setup/starter_projects/Invoice Summarizer.json
@@ -1389,7 +1389,7 @@
"show": true,
"title_case": false,
"type": "code",
- "value": "import json\nimport re\n\nfrom langchain_core.tools import StructuredTool\nfrom pydantic import ValidationError\n\nfrom langflow.base.agents.agent import LCToolsAgentComponent\nfrom langflow.base.agents.events import ExceptionWithMessageError\nfrom langflow.base.models.model_input_constants import (\n ALL_PROVIDER_FIELDS,\n MODEL_DYNAMIC_UPDATE_FIELDS,\n MODEL_PROVIDERS,\n MODEL_PROVIDERS_DICT,\n MODELS_METADATA,\n)\nfrom langflow.base.models.model_utils import get_model_name\nfrom langflow.components.helpers.current_date import CurrentDateComponent\nfrom langflow.components.helpers.memory import MemoryComponent\nfrom langflow.components.langchain_utilities.tool_calling import ToolCallingAgentComponent\nfrom langflow.custom.custom_component.component import _get_component_toolkit\nfrom langflow.custom.utils import update_component_build_config\nfrom langflow.field_typing import Tool\nfrom langflow.helpers.base_model import build_model_from_schema\nfrom langflow.io import BoolInput, DropdownInput, IntInput, MultilineInput, Output, TableInput\nfrom langflow.logging import logger\nfrom langflow.schema.data import Data\nfrom langflow.schema.dotdict import dotdict\nfrom langflow.schema.message import Message\nfrom langflow.schema.table import EditMode\n\n\ndef set_advanced_true(component_input):\n component_input.advanced = True\n return component_input\n\n\nMODEL_PROVIDERS_LIST = [\"Anthropic\", \"Google Generative AI\", \"Groq\", \"OpenAI\"]\n\n\nclass AgentComponent(ToolCallingAgentComponent):\n display_name: str = \"Agent\"\n description: str = \"Define the agent's instructions, then enter a task to complete using tools.\"\n documentation: str = \"https://docs.langflow.org/agents\"\n icon = \"bot\"\n beta = False\n name = \"Agent\"\n\n memory_inputs = [set_advanced_true(component_input) for component_input in MemoryComponent().inputs]\n\n # Filter out json_mode from OpenAI inputs since we handle structured output differently\n openai_inputs_filtered = [\n input_field\n for input_field in MODEL_PROVIDERS_DICT[\"OpenAI\"][\"inputs\"]\n if not (hasattr(input_field, \"name\") and input_field.name == \"json_mode\")\n ]\n\n inputs = [\n DropdownInput(\n name=\"agent_llm\",\n display_name=\"Model Provider\",\n info=\"The provider of the language model that the agent will use to generate responses.\",\n options=[*MODEL_PROVIDERS_LIST, \"Custom\"],\n value=\"OpenAI\",\n real_time_refresh=True,\n input_types=[],\n options_metadata=[MODELS_METADATA[key] for key in MODEL_PROVIDERS_LIST] + [{\"icon\": \"brain\"}],\n ),\n *openai_inputs_filtered,\n MultilineInput(\n name=\"system_prompt\",\n display_name=\"Agent Instructions\",\n info=\"System Prompt: Initial instructions and context provided to guide the agent's behavior.\",\n value=\"You are a helpful assistant that can use tools to answer questions and perform tasks.\",\n advanced=False,\n ),\n IntInput(\n name=\"n_messages\",\n display_name=\"Number of Chat History Messages\",\n value=100,\n info=\"Number of chat history messages to retrieve.\",\n advanced=True,\n show=True,\n ),\n MultilineInput(\n name=\"format_instructions\",\n display_name=\"Output Format Instructions\",\n info=\"Generic Template for structured output formatting. Valid only with Structured response.\",\n value=(\n \"You are an AI that extracts structured JSON objects from unstructured text. \"\n \"Use a predefined schema with expected types (str, int, float, bool, dict). \"\n \"Extract ALL relevant instances that match the schema - if multiple patterns exist, capture them all. \"\n \"Fill missing or ambiguous values with defaults: null for missing values. \"\n \"Remove exact duplicates but keep variations that have different field values. \"\n \"Always return valid JSON in the expected format, never throw errors. \"\n \"If multiple objects can be extracted, return them all in the structured format.\"\n ),\n advanced=True,\n ),\n TableInput(\n name=\"output_schema\",\n display_name=\"Output Schema\",\n info=(\n \"Schema Validation: Define the structure and data types for structured output. \"\n \"No validation if no output schema.\"\n ),\n advanced=True,\n required=False,\n value=[],\n table_schema=[\n {\n \"name\": \"name\",\n \"display_name\": \"Name\",\n \"type\": \"str\",\n \"description\": \"Specify the name of the output field.\",\n \"default\": \"field\",\n \"edit_mode\": EditMode.INLINE,\n },\n {\n \"name\": \"description\",\n \"display_name\": \"Description\",\n \"type\": \"str\",\n \"description\": \"Describe the purpose of the output field.\",\n \"default\": \"description of field\",\n \"edit_mode\": EditMode.POPOVER,\n },\n {\n \"name\": \"type\",\n \"display_name\": \"Type\",\n \"type\": \"str\",\n \"edit_mode\": EditMode.INLINE,\n \"description\": (\"Indicate the data type of the output field (e.g., str, int, float, bool, dict).\"),\n \"options\": [\"str\", \"int\", \"float\", \"bool\", \"dict\"],\n \"default\": \"str\",\n },\n {\n \"name\": \"multiple\",\n \"display_name\": \"As List\",\n \"type\": \"boolean\",\n \"description\": \"Set to True if this output field should be a list of the specified type.\",\n \"default\": \"False\",\n \"edit_mode\": EditMode.INLINE,\n },\n ],\n ),\n *LCToolsAgentComponent._base_inputs,\n # removed memory inputs from agent component\n # *memory_inputs,\n BoolInput(\n name=\"add_current_date_tool\",\n display_name=\"Current Date\",\n advanced=True,\n info=\"If true, will add a tool to the agent that returns the current date.\",\n value=True,\n ),\n ]\n outputs = [\n Output(name=\"response\", display_name=\"Response\", method=\"message_response\"),\n Output(name=\"structured_response\", display_name=\"Structured Response\", method=\"json_response\", tool_mode=False),\n ]\n\n async def get_agent_requirements(self):\n \"\"\"Get the agent requirements for the agent.\"\"\"\n llm_model, display_name = await self.get_llm()\n if llm_model is None:\n msg = \"No language model selected. Please choose a model to proceed.\"\n raise ValueError(msg)\n self.model_name = get_model_name(llm_model, display_name=display_name)\n\n # Get memory data\n self.chat_history = await self.get_memory_data()\n if isinstance(self.chat_history, Message):\n self.chat_history = [self.chat_history]\n\n # Add current date tool if enabled\n if self.add_current_date_tool:\n if not isinstance(self.tools, list): # type: ignore[has-type]\n self.tools = []\n current_date_tool = (await CurrentDateComponent(**self.get_base_args()).to_toolkit()).pop(0)\n if not isinstance(current_date_tool, StructuredTool):\n msg = \"CurrentDateComponent must be converted to a StructuredTool\"\n raise TypeError(msg)\n self.tools.append(current_date_tool)\n return llm_model, self.chat_history, self.tools\n\n async def message_response(self) -> Message:\n try:\n llm_model, self.chat_history, self.tools = await self.get_agent_requirements()\n # Set up and run agent\n self.set(\n llm=llm_model,\n tools=self.tools or [],\n chat_history=self.chat_history,\n input_value=self.input_value,\n system_prompt=self.system_prompt,\n )\n agent = self.create_agent_runnable()\n result = await self.run_agent(agent)\n\n # Store result for potential JSON output\n self._agent_result = result\n\n except (ValueError, TypeError, KeyError) as e:\n await logger.aerror(f\"{type(e).__name__}: {e!s}\")\n raise\n except ExceptionWithMessageError as e:\n await logger.aerror(f\"ExceptionWithMessageError occurred: {e}\")\n raise\n # Avoid catching blind Exception; let truly unexpected exceptions propagate\n except Exception as e:\n await logger.aerror(f\"Unexpected error: {e!s}\")\n raise\n else:\n return result\n\n def _preprocess_schema(self, schema):\n \"\"\"Preprocess schema to ensure correct data types for build_model_from_schema.\"\"\"\n processed_schema = []\n for field in schema:\n processed_field = {\n \"name\": str(field.get(\"name\", \"field\")),\n \"type\": str(field.get(\"type\", \"str\")),\n \"description\": str(field.get(\"description\", \"\")),\n \"multiple\": field.get(\"multiple\", False),\n }\n # Ensure multiple is handled correctly\n if isinstance(processed_field[\"multiple\"], str):\n processed_field[\"multiple\"] = processed_field[\"multiple\"].lower() in [\"true\", \"1\", \"t\", \"y\", \"yes\"]\n processed_schema.append(processed_field)\n return processed_schema\n\n async def build_structured_output_base(self, content: str):\n \"\"\"Build structured output with optional BaseModel validation.\"\"\"\n json_pattern = r\"\\{.*\\}\"\n schema_error_msg = \"Try setting an output schema\"\n\n # Try to parse content as JSON first\n json_data = None\n try:\n json_data = json.loads(content)\n except json.JSONDecodeError:\n json_match = re.search(json_pattern, content, re.DOTALL)\n if json_match:\n try:\n json_data = json.loads(json_match.group())\n except json.JSONDecodeError:\n return {\"content\": content, \"error\": schema_error_msg}\n else:\n return {\"content\": content, \"error\": schema_error_msg}\n\n # If no output schema provided, return parsed JSON without validation\n if not hasattr(self, \"output_schema\") or not self.output_schema or len(self.output_schema) == 0:\n return json_data\n\n # Use BaseModel validation with schema\n try:\n processed_schema = self._preprocess_schema(self.output_schema)\n output_model = build_model_from_schema(processed_schema)\n\n # Validate against the schema\n if isinstance(json_data, list):\n # Multiple objects\n validated_objects = []\n for item in json_data:\n try:\n validated_obj = output_model.model_validate(item)\n validated_objects.append(validated_obj.model_dump())\n except ValidationError as e:\n await logger.aerror(f\"Validation error for item: {e}\")\n # Include invalid items with error info\n validated_objects.append({\"data\": item, \"validation_error\": str(e)})\n return validated_objects\n\n # Single object\n try:\n validated_obj = output_model.model_validate(json_data)\n return [validated_obj.model_dump()] # Return as list for consistency\n except ValidationError as e:\n await logger.aerror(f\"Validation error: {e}\")\n return [{\"data\": json_data, \"validation_error\": str(e)}]\n\n except (TypeError, ValueError) as e:\n await logger.aerror(f\"Error building structured output: {e}\")\n # Fallback to parsed JSON without validation\n return json_data\n\n async def json_response(self) -> Data:\n \"\"\"Convert agent response to structured JSON Data output with schema validation.\"\"\"\n # Always use structured chat agent for JSON response mode for better JSON formatting\n try:\n system_components = []\n\n # 1. Agent Instructions (system_prompt)\n agent_instructions = getattr(self, \"system_prompt\", \"\") or \"\"\n if agent_instructions:\n system_components.append(f\"{agent_instructions}\")\n\n # 2. Format Instructions\n format_instructions = getattr(self, \"format_instructions\", \"\") or \"\"\n if format_instructions:\n system_components.append(f\"Format instructions: {format_instructions}\")\n\n # 3. Schema Information from BaseModel\n if hasattr(self, \"output_schema\") and self.output_schema and len(self.output_schema) > 0:\n try:\n processed_schema = self._preprocess_schema(self.output_schema)\n output_model = build_model_from_schema(processed_schema)\n schema_dict = output_model.model_json_schema()\n schema_info = (\n \"You are given some text that may include format instructions, \"\n \"explanations, or other content alongside a JSON schema.\\n\\n\"\n \"Your task:\\n\"\n \"- Extract only the JSON schema.\\n\"\n \"- Return it as valid JSON.\\n\"\n \"- Do not include format instructions, explanations, or extra text.\\n\\n\"\n \"Input:\\n\"\n f\"{json.dumps(schema_dict, indent=2)}\\n\\n\"\n \"Output (only JSON schema):\"\n )\n system_components.append(schema_info)\n except (ValidationError, ValueError, TypeError, KeyError) as e:\n await logger.aerror(f\"Could not build schema for prompt: {e}\", exc_info=True)\n\n # Combine all components\n combined_instructions = \"\\n\\n\".join(system_components) if system_components else \"\"\n llm_model, self.chat_history, self.tools = await self.get_agent_requirements()\n self.set(\n llm=llm_model,\n tools=self.tools or [],\n chat_history=self.chat_history,\n input_value=self.input_value,\n system_prompt=combined_instructions,\n )\n\n # Create and run structured chat agent\n try:\n structured_agent = self.create_agent_runnable()\n except (NotImplementedError, ValueError, TypeError) as e:\n await logger.aerror(f\"Error with structured chat agent: {e}\")\n raise\n try:\n result = await self.run_agent(structured_agent)\n except (ExceptionWithMessageError, ValueError, TypeError, RuntimeError) as e:\n await logger.aerror(f\"Error with structured agent result: {e}\")\n raise\n # Extract content from structured agent result\n if hasattr(result, \"content\"):\n content = result.content\n elif hasattr(result, \"text\"):\n content = result.text\n else:\n content = str(result)\n\n except (ExceptionWithMessageError, ValueError, TypeError, NotImplementedError, AttributeError) as e:\n await logger.aerror(f\"Error with structured chat agent: {e}\")\n # Fallback to regular agent\n content_str = \"No content returned from agent\"\n return Data(data={\"content\": content_str, \"error\": str(e)})\n\n # Process with structured output validation\n try:\n structured_output = await self.build_structured_output_base(content)\n\n # Handle different output formats\n if isinstance(structured_output, list) and structured_output:\n if len(structured_output) == 1:\n return Data(data=structured_output[0])\n return Data(data={\"results\": structured_output})\n if isinstance(structured_output, dict):\n return Data(data=structured_output)\n return Data(data={\"content\": content})\n\n except (ValueError, TypeError) as e:\n await logger.aerror(f\"Error in structured output processing: {e}\")\n return Data(data={\"content\": content, \"error\": str(e)})\n\n async def get_memory_data(self):\n # TODO: This is a temporary fix to avoid message duplication. We should develop a function for this.\n messages = (\n await MemoryComponent(**self.get_base_args())\n .set(session_id=self.graph.session_id, order=\"Ascending\", n_messages=self.n_messages)\n .retrieve_messages()\n )\n return [\n message for message in messages if getattr(message, \"id\", None) != getattr(self.input_value, \"id\", None)\n ]\n\n async def get_llm(self):\n if not isinstance(self.agent_llm, str):\n return self.agent_llm, None\n\n try:\n provider_info = MODEL_PROVIDERS_DICT.get(self.agent_llm)\n if not provider_info:\n msg = f\"Invalid model provider: {self.agent_llm}\"\n raise ValueError(msg)\n\n component_class = provider_info.get(\"component_class\")\n display_name = component_class.display_name\n inputs = provider_info.get(\"inputs\")\n prefix = provider_info.get(\"prefix\", \"\")\n\n return self._build_llm_model(component_class, inputs, prefix), display_name\n\n except (AttributeError, ValueError, TypeError, RuntimeError) as e:\n await logger.aerror(f\"Error building {self.agent_llm} language model: {e!s}\")\n msg = f\"Failed to initialize language model: {e!s}\"\n raise ValueError(msg) from e\n\n def _build_llm_model(self, component, inputs, prefix=\"\"):\n model_kwargs = {}\n for input_ in inputs:\n if hasattr(self, f\"{prefix}{input_.name}\"):\n model_kwargs[input_.name] = getattr(self, f\"{prefix}{input_.name}\")\n return component.set(**model_kwargs).build_model()\n\n def set_component_params(self, component):\n provider_info = MODEL_PROVIDERS_DICT.get(self.agent_llm)\n if provider_info:\n inputs = provider_info.get(\"inputs\")\n prefix = provider_info.get(\"prefix\")\n # Filter out json_mode and only use attributes that exist on this component\n model_kwargs = {}\n for input_ in inputs:\n if hasattr(self, f\"{prefix}{input_.name}\"):\n model_kwargs[input_.name] = getattr(self, f\"{prefix}{input_.name}\")\n\n return component.set(**model_kwargs)\n return component\n\n def delete_fields(self, build_config: dotdict, fields: dict | list[str]) -> None:\n \"\"\"Delete specified fields from build_config.\"\"\"\n for field in fields:\n build_config.pop(field, None)\n\n def update_input_types(self, build_config: dotdict) -> dotdict:\n \"\"\"Update input types for all fields in build_config.\"\"\"\n for key, value in build_config.items():\n if isinstance(value, dict):\n if value.get(\"input_types\") is None:\n build_config[key][\"input_types\"] = []\n elif hasattr(value, \"input_types\") and value.input_types is None:\n value.input_types = []\n return build_config\n\n async def update_build_config(\n self, build_config: dotdict, field_value: str, field_name: str | None = None\n ) -> dotdict:\n # Iterate over all providers in the MODEL_PROVIDERS_DICT\n # Existing logic for updating build_config\n if field_name in (\"agent_llm\",):\n build_config[\"agent_llm\"][\"value\"] = field_value\n provider_info = MODEL_PROVIDERS_DICT.get(field_value)\n if provider_info:\n component_class = provider_info.get(\"component_class\")\n if component_class and hasattr(component_class, \"update_build_config\"):\n # Call the component class's update_build_config method\n build_config = await update_component_build_config(\n component_class, build_config, field_value, \"model_name\"\n )\n\n provider_configs: dict[str, tuple[dict, list[dict]]] = {\n provider: (\n MODEL_PROVIDERS_DICT[provider][\"fields\"],\n [\n MODEL_PROVIDERS_DICT[other_provider][\"fields\"]\n for other_provider in MODEL_PROVIDERS_DICT\n if other_provider != provider\n ],\n )\n for provider in MODEL_PROVIDERS_DICT\n }\n if field_value in provider_configs:\n fields_to_add, fields_to_delete = provider_configs[field_value]\n\n # Delete fields from other providers\n for fields in fields_to_delete:\n self.delete_fields(build_config, fields)\n\n # Add provider-specific fields\n if field_value == \"OpenAI\" and not any(field in build_config for field in fields_to_add):\n build_config.update(fields_to_add)\n else:\n build_config.update(fields_to_add)\n # Reset input types for agent_llm\n build_config[\"agent_llm\"][\"input_types\"] = []\n elif field_value == \"Custom\":\n # Delete all provider fields\n self.delete_fields(build_config, ALL_PROVIDER_FIELDS)\n # Update with custom component\n custom_component = DropdownInput(\n name=\"agent_llm\",\n display_name=\"Language Model\",\n options=[*sorted(MODEL_PROVIDERS), \"Custom\"],\n value=\"Custom\",\n real_time_refresh=True,\n input_types=[\"LanguageModel\"],\n options_metadata=[MODELS_METADATA[key] for key in sorted(MODELS_METADATA.keys())]\n + [{\"icon\": \"brain\"}],\n )\n build_config.update({\"agent_llm\": custom_component.to_dict()})\n # Update input types for all fields\n build_config = self.update_input_types(build_config)\n\n # Validate required keys\n default_keys = [\n \"code\",\n \"_type\",\n \"agent_llm\",\n \"tools\",\n \"input_value\",\n \"add_current_date_tool\",\n \"system_prompt\",\n \"agent_description\",\n \"max_iterations\",\n \"handle_parsing_errors\",\n \"verbose\",\n ]\n missing_keys = [key for key in default_keys if key not in build_config]\n if missing_keys:\n msg = f\"Missing required keys in build_config: {missing_keys}\"\n raise ValueError(msg)\n if (\n isinstance(self.agent_llm, str)\n and self.agent_llm in MODEL_PROVIDERS_DICT\n and field_name in MODEL_DYNAMIC_UPDATE_FIELDS\n ):\n provider_info = MODEL_PROVIDERS_DICT.get(self.agent_llm)\n if provider_info:\n component_class = provider_info.get(\"component_class\")\n component_class = self.set_component_params(component_class)\n prefix = provider_info.get(\"prefix\")\n if component_class and hasattr(component_class, \"update_build_config\"):\n # Call each component class's update_build_config method\n # remove the prefix from the field_name\n if isinstance(field_name, str) and isinstance(prefix, str):\n field_name = field_name.replace(prefix, \"\")\n build_config = await update_component_build_config(\n component_class, build_config, field_value, \"model_name\"\n )\n return dotdict({k: v.to_dict() if hasattr(v, \"to_dict\") else v for k, v in build_config.items()})\n\n async def _get_tools(self) -> list[Tool]:\n component_toolkit = _get_component_toolkit()\n tools_names = self._build_tools_names()\n agent_description = self.get_tool_description()\n # TODO: Agent Description Depreciated Feature to be removed\n description = f\"{agent_description}{tools_names}\"\n tools = component_toolkit(component=self).get_tools(\n tool_name=\"Call_Agent\", tool_description=description, callbacks=self.get_langchain_callbacks()\n )\n if hasattr(self, \"tools_metadata\"):\n tools = component_toolkit(component=self, metadata=self.tools_metadata).update_tools_metadata(tools=tools)\n return tools\n"
+ "value": "import json\nimport re\n\nfrom langchain_core.tools import StructuredTool\nfrom pydantic import ValidationError\n\nfrom langflow.base.agents.agent import LCToolsAgentComponent\nfrom langflow.base.agents.events import ExceptionWithMessageError\nfrom langflow.base.models.model_input_constants import (\n ALL_PROVIDER_FIELDS,\n MODEL_DYNAMIC_UPDATE_FIELDS,\n MODEL_PROVIDERS,\n MODEL_PROVIDERS_DICT,\n MODELS_METADATA,\n)\nfrom langflow.base.models.model_utils import get_model_name\nfrom langflow.components.helpers.current_date import CurrentDateComponent\nfrom langflow.components.helpers.memory import MemoryComponent\nfrom langflow.components.langchain_utilities.tool_calling import ToolCallingAgentComponent\nfrom langflow.custom.custom_component.component import _get_component_toolkit\nfrom langflow.custom.utils import update_component_build_config\nfrom langflow.field_typing import Tool\nfrom langflow.helpers.base_model import build_model_from_schema\nfrom langflow.io import BoolInput, DropdownInput, IntInput, MultilineInput, Output, TableInput\nfrom langflow.logging import logger\nfrom langflow.schema.data import Data\nfrom langflow.schema.dotdict import dotdict\nfrom langflow.schema.message import Message\nfrom langflow.schema.table import EditMode\n\n\ndef set_advanced_true(component_input):\n component_input.advanced = True\n return component_input\n\n\nMODEL_PROVIDERS_LIST = [\"Anthropic\", \"Google Generative AI\", \"Groq\", \"OpenAI\"]\n\n\nclass AgentComponent(ToolCallingAgentComponent):\n display_name: str = \"Agent\"\n description: str = \"Define the agent's instructions, then enter a task to complete using tools.\"\n documentation: str = \"https://docs.langflow.org/agents\"\n icon = \"bot\"\n beta = False\n name = \"Agent\"\n\n memory_inputs = [set_advanced_true(component_input) for component_input in MemoryComponent().inputs]\n\n # Filter out json_mode from OpenAI inputs since we handle structured output differently\n openai_inputs_filtered = [\n input_field\n for input_field in MODEL_PROVIDERS_DICT[\"OpenAI\"][\"inputs\"]\n if not (hasattr(input_field, \"name\") and input_field.name == \"json_mode\")\n ]\n\n inputs = [\n DropdownInput(\n name=\"agent_llm\",\n display_name=\"Model Provider\",\n info=\"The provider of the language model that the agent will use to generate responses.\",\n options=[*MODEL_PROVIDERS_LIST, \"Custom\"],\n value=\"OpenAI\",\n real_time_refresh=True,\n input_types=[],\n options_metadata=[MODELS_METADATA[key] for key in MODEL_PROVIDERS_LIST] + [{\"icon\": \"brain\"}],\n ),\n *openai_inputs_filtered,\n MultilineInput(\n name=\"system_prompt\",\n display_name=\"Agent Instructions\",\n info=\"System Prompt: Initial instructions and context provided to guide the agent's behavior.\",\n value=\"You are a helpful assistant that can use tools to answer questions and perform tasks.\",\n advanced=False,\n ),\n IntInput(\n name=\"n_messages\",\n display_name=\"Number of Chat History Messages\",\n value=100,\n info=\"Number of chat history messages to retrieve.\",\n advanced=True,\n show=True,\n ),\n MultilineInput(\n name=\"format_instructions\",\n display_name=\"Output Format Instructions\",\n info=\"Generic Template for structured output formatting. Valid only with Structured response.\",\n value=(\n \"You are an AI that extracts structured JSON objects from unstructured text. \"\n \"Use a predefined schema with expected types (str, int, float, bool, dict). \"\n \"Extract ALL relevant instances that match the schema - if multiple patterns exist, capture them all. \"\n \"Fill missing or ambiguous values with defaults: null for missing values. \"\n \"Remove exact duplicates but keep variations that have different field values. \"\n \"Always return valid JSON in the expected format, never throw errors. \"\n \"If multiple objects can be extracted, return them all in the structured format.\"\n ),\n advanced=True,\n ),\n TableInput(\n name=\"output_schema\",\n display_name=\"Output Schema\",\n info=(\n \"Schema Validation: Define the structure and data types for structured output. \"\n \"No validation if no output schema.\"\n ),\n advanced=True,\n required=False,\n value=[],\n table_schema=[\n {\n \"name\": \"name\",\n \"display_name\": \"Name\",\n \"type\": \"str\",\n \"description\": \"Specify the name of the output field.\",\n \"default\": \"field\",\n \"edit_mode\": EditMode.INLINE,\n },\n {\n \"name\": \"description\",\n \"display_name\": \"Description\",\n \"type\": \"str\",\n \"description\": \"Describe the purpose of the output field.\",\n \"default\": \"description of field\",\n \"edit_mode\": EditMode.POPOVER,\n },\n {\n \"name\": \"type\",\n \"display_name\": \"Type\",\n \"type\": \"str\",\n \"edit_mode\": EditMode.INLINE,\n \"description\": (\"Indicate the data type of the output field (e.g., str, int, float, bool, dict).\"),\n \"options\": [\"str\", \"int\", \"float\", \"bool\", \"dict\"],\n \"default\": \"str\",\n },\n {\n \"name\": \"multiple\",\n \"display_name\": \"As List\",\n \"type\": \"boolean\",\n \"description\": \"Set to True if this output field should be a list of the specified type.\",\n \"default\": \"False\",\n \"edit_mode\": EditMode.INLINE,\n },\n ],\n ),\n *LCToolsAgentComponent._base_inputs,\n # removed memory inputs from agent component\n # *memory_inputs,\n BoolInput(\n name=\"add_current_date_tool\",\n display_name=\"Current Date\",\n advanced=True,\n info=\"If true, will add a tool to the agent that returns the current date.\",\n value=True,\n ),\n ]\n outputs = [\n Output(name=\"response\", display_name=\"Response\", method=\"message_response\"),\n Output(name=\"structured_response\", display_name=\"Structured Response\", method=\"json_response\", tool_mode=False),\n ]\n\n async def get_agent_requirements(self):\n \"\"\"Get the agent requirements for the agent.\"\"\"\n llm_model, display_name = await self.get_llm()\n if llm_model is None:\n msg = \"No language model selected. Please choose a model to proceed.\"\n raise ValueError(msg)\n self.model_name = get_model_name(llm_model, display_name=display_name)\n\n # Get memory data\n self.chat_history = await self.get_memory_data()\n if isinstance(self.chat_history, Message):\n self.chat_history = [self.chat_history]\n\n # Add current date tool if enabled\n if self.add_current_date_tool:\n if not isinstance(self.tools, list): # type: ignore[has-type]\n self.tools = []\n current_date_tool = (await CurrentDateComponent(**self.get_base_args()).to_toolkit()).pop(0)\n if not isinstance(current_date_tool, StructuredTool):\n msg = \"CurrentDateComponent must be converted to a StructuredTool\"\n raise TypeError(msg)\n self.tools.append(current_date_tool)\n return llm_model, self.chat_history, self.tools\n\n async def message_response(self) -> Message:\n try:\n llm_model, self.chat_history, self.tools = await self.get_agent_requirements()\n # Set up and run agent\n self.set(\n llm=llm_model,\n tools=self.tools or [],\n chat_history=self.chat_history,\n input_value=self.input_value,\n system_prompt=self.system_prompt,\n )\n agent = self.create_agent_runnable()\n result = await self.run_agent(agent)\n\n # Store result for potential JSON output\n self._agent_result = result\n\n except (ValueError, TypeError, KeyError) as e:\n await logger.aerror(f\"{type(e).__name__}: {e!s}\")\n raise\n except ExceptionWithMessageError as e:\n await logger.aerror(f\"ExceptionWithMessageError occurred: {e}\")\n raise\n # Avoid catching blind Exception; let truly unexpected exceptions propagate\n except Exception as e:\n await logger.aerror(f\"Unexpected error: {e!s}\")\n raise\n else:\n return result\n\n def _preprocess_schema(self, schema):\n \"\"\"Preprocess schema to ensure correct data types for build_model_from_schema.\"\"\"\n processed_schema = []\n for field in schema:\n processed_field = {\n \"name\": str(field.get(\"name\", \"field\")),\n \"type\": str(field.get(\"type\", \"str\")),\n \"description\": str(field.get(\"description\", \"\")),\n \"multiple\": field.get(\"multiple\", False),\n }\n # Ensure multiple is handled correctly\n if isinstance(processed_field[\"multiple\"], str):\n processed_field[\"multiple\"] = processed_field[\"multiple\"].lower() in [\"true\", \"1\", \"t\", \"y\", \"yes\"]\n processed_schema.append(processed_field)\n return processed_schema\n\n async def build_structured_output_base(self, content: str):\n \"\"\"Build structured output with optional BaseModel validation.\"\"\"\n json_pattern = r\"\\{.*\\}\"\n schema_error_msg = \"Try setting an output schema\"\n\n # Try to parse content as JSON first\n json_data = None\n try:\n json_data = json.loads(content)\n except json.JSONDecodeError:\n json_match = re.search(json_pattern, content, re.DOTALL)\n if json_match:\n try:\n json_data = json.loads(json_match.group())\n except json.JSONDecodeError:\n return {\"content\": content, \"error\": schema_error_msg}\n else:\n return {\"content\": content, \"error\": schema_error_msg}\n\n # If no output schema provided, return parsed JSON without validation\n if not hasattr(self, \"output_schema\") or not self.output_schema or len(self.output_schema) == 0:\n return json_data\n\n # Use BaseModel validation with schema\n try:\n processed_schema = self._preprocess_schema(self.output_schema)\n output_model = build_model_from_schema(processed_schema)\n\n # Validate against the schema\n if isinstance(json_data, list):\n # Multiple objects\n validated_objects = []\n for item in json_data:\n try:\n validated_obj = output_model.model_validate(item)\n validated_objects.append(validated_obj.model_dump())\n except ValidationError as e:\n await logger.aerror(f\"Validation error for item: {e}\")\n # Include invalid items with error info\n validated_objects.append({\"data\": item, \"validation_error\": str(e)})\n return validated_objects\n\n # Single object\n try:\n validated_obj = output_model.model_validate(json_data)\n return [validated_obj.model_dump()] # Return as list for consistency\n except ValidationError as e:\n await logger.aerror(f\"Validation error: {e}\")\n return [{\"data\": json_data, \"validation_error\": str(e)}]\n\n except (TypeError, ValueError) as e:\n await logger.aerror(f\"Error building structured output: {e}\")\n # Fallback to parsed JSON without validation\n return json_data\n\n async def json_response(self) -> Data:\n \"\"\"Convert agent response to structured JSON Data output with schema validation.\"\"\"\n # Always use structured chat agent for JSON response mode for better JSON formatting\n try:\n system_components = []\n\n # 1. Agent Instructions (system_prompt)\n agent_instructions = getattr(self, \"system_prompt\", \"\") or \"\"\n if agent_instructions:\n system_components.append(f\"{agent_instructions}\")\n\n # 2. Format Instructions\n format_instructions = getattr(self, \"format_instructions\", \"\") or \"\"\n if format_instructions:\n system_components.append(f\"Format instructions: {format_instructions}\")\n\n # 3. Schema Information from BaseModel\n if hasattr(self, \"output_schema\") and self.output_schema and len(self.output_schema) > 0:\n try:\n processed_schema = self._preprocess_schema(self.output_schema)\n output_model = build_model_from_schema(processed_schema)\n schema_dict = output_model.model_json_schema()\n schema_info = (\n \"You are given some text that may include format instructions, \"\n \"explanations, or other content alongside a JSON schema.\\n\\n\"\n \"Your task:\\n\"\n \"- Extract only the JSON schema.\\n\"\n \"- Return it as valid JSON.\\n\"\n \"- Do not include format instructions, explanations, or extra text.\\n\\n\"\n \"Input:\\n\"\n f\"{json.dumps(schema_dict, indent=2)}\\n\\n\"\n \"Output (only JSON schema):\"\n )\n system_components.append(schema_info)\n except (ValidationError, ValueError, TypeError, KeyError) as e:\n await logger.aerror(f\"Could not build schema for prompt: {e}\", exc_info=True)\n\n # Combine all components\n combined_instructions = \"\\n\\n\".join(system_components) if system_components else \"\"\n llm_model, self.chat_history, self.tools = await self.get_agent_requirements()\n self.set(\n llm=llm_model,\n tools=self.tools or [],\n chat_history=self.chat_history,\n input_value=self.input_value,\n system_prompt=combined_instructions,\n )\n\n # Create and run structured chat agent\n try:\n structured_agent = self.create_agent_runnable()\n except (NotImplementedError, ValueError, TypeError) as e:\n await logger.aerror(f\"Error with structured chat agent: {e}\")\n raise\n try:\n result = await self.run_agent(structured_agent)\n except (ExceptionWithMessageError, ValueError, TypeError, RuntimeError) as e:\n await logger.aerror(f\"Error with structured agent result: {e}\")\n raise\n # Extract content from structured agent result\n if hasattr(result, \"content\"):\n content = result.content\n elif hasattr(result, \"text\"):\n content = result.text\n else:\n content = str(result)\n\n except (ExceptionWithMessageError, ValueError, TypeError, NotImplementedError, AttributeError) as e:\n await logger.aerror(f\"Error with structured chat agent: {e}\")\n # Fallback to regular agent\n content_str = \"No content returned from agent\"\n return Data(data={\"content\": content_str, \"error\": str(e)})\n\n # Process with structured output validation\n try:\n structured_output = await self.build_structured_output_base(content)\n\n # Handle different output formats\n if isinstance(structured_output, list) and structured_output:\n if len(structured_output) == 1:\n return Data(data=structured_output[0])\n return Data(data={\"results\": structured_output})\n if isinstance(structured_output, dict):\n return Data(data=structured_output)\n return Data(data={\"content\": content})\n\n except (ValueError, TypeError) as e:\n await logger.aerror(f\"Error in structured output processing: {e}\")\n return Data(data={\"content\": content, \"error\": str(e)})\n\n async def get_memory_data(self):\n # TODO: This is a temporary fix to avoid message duplication. We should develop a function for this.\n messages = (\n await MemoryComponent(**self.get_base_args())\n .set(session_id=self.graph.session_id, order=\"Ascending\", n_messages=self.n_messages)\n .retrieve_messages()\n )\n return [\n message for message in messages if getattr(message, \"id\", None) != getattr(self.input_value, \"id\", None)\n ]\n\n async def get_llm(self):\n if not isinstance(self.agent_llm, str):\n return self.agent_llm, None\n\n try:\n provider_info = MODEL_PROVIDERS_DICT.get(self.agent_llm)\n if not provider_info:\n msg = f\"Invalid model provider: {self.agent_llm}\"\n raise ValueError(msg)\n\n component_class = provider_info.get(\"component_class\")\n display_name = component_class.display_name\n inputs = provider_info.get(\"inputs\")\n prefix = provider_info.get(\"prefix\", \"\")\n\n return self._build_llm_model(component_class, inputs, prefix), display_name\n\n except (AttributeError, ValueError, TypeError, RuntimeError) as e:\n await logger.aerror(f\"Error building {self.agent_llm} language model: {e!s}\")\n msg = f\"Failed to initialize language model: {e!s}\"\n raise ValueError(msg) from e\n\n def _build_llm_model(self, component, inputs, prefix=\"\"):\n model_kwargs = {}\n for input_ in inputs:\n if hasattr(self, f\"{prefix}{input_.name}\"):\n model_kwargs[input_.name] = getattr(self, f\"{prefix}{input_.name}\")\n return component.set(**model_kwargs).build_model()\n\n def set_component_params(self, component):\n provider_info = MODEL_PROVIDERS_DICT.get(self.agent_llm)\n if provider_info:\n inputs = provider_info.get(\"inputs\")\n prefix = provider_info.get(\"prefix\")\n # Filter out json_mode and only use attributes that exist on this component\n model_kwargs = {}\n for input_ in inputs:\n if hasattr(self, f\"{prefix}{input_.name}\"):\n model_kwargs[input_.name] = getattr(self, f\"{prefix}{input_.name}\")\n\n return component.set(**model_kwargs)\n return component\n\n def delete_fields(self, build_config: dotdict, fields: dict | list[str]) -> None:\n \"\"\"Delete specified fields from build_config.\"\"\"\n for field in fields:\n build_config.pop(field, None)\n\n def update_input_types(self, build_config: dotdict) -> dotdict:\n \"\"\"Update input types for all fields in build_config.\"\"\"\n for key, value in build_config.items():\n if isinstance(value, dict):\n if value.get(\"input_types\") is None:\n build_config[key][\"input_types\"] = []\n elif hasattr(value, \"input_types\") and value.input_types is None:\n value.input_types = []\n return build_config\n\n async def update_build_config(\n self, build_config: dotdict, field_value: str, field_name: str | None = None\n ) -> dotdict:\n # Iterate over all providers in the MODEL_PROVIDERS_DICT\n # Existing logic for updating build_config\n if field_name in (\"agent_llm\",):\n build_config[\"agent_llm\"][\"value\"] = field_value\n provider_info = MODEL_PROVIDERS_DICT.get(field_value)\n if provider_info:\n component_class = provider_info.get(\"component_class\")\n if component_class and hasattr(component_class, \"update_build_config\"):\n # Call the component class's update_build_config method\n build_config = await update_component_build_config(\n component_class, build_config, field_value, \"model_name\"\n )\n\n provider_configs: dict[str, tuple[dict, list[dict]]] = {\n provider: (\n MODEL_PROVIDERS_DICT[provider][\"fields\"],\n [\n MODEL_PROVIDERS_DICT[other_provider][\"fields\"]\n for other_provider in MODEL_PROVIDERS_DICT\n if other_provider != provider\n ],\n )\n for provider in MODEL_PROVIDERS_DICT\n }\n if field_value in provider_configs:\n fields_to_add, fields_to_delete = provider_configs[field_value]\n\n # Delete fields from other providers\n for fields in fields_to_delete:\n self.delete_fields(build_config, fields)\n\n # Add provider-specific fields\n if field_value == \"OpenAI\" and not any(field in build_config for field in fields_to_add):\n build_config.update(fields_to_add)\n else:\n build_config.update(fields_to_add)\n # Reset input types for agent_llm\n build_config[\"agent_llm\"][\"input_types\"] = []\n build_config[\"agent_llm\"][\"display_name\"] = \"Model Provider\"\n elif field_value == \"Custom\":\n # Delete all provider fields\n self.delete_fields(build_config, ALL_PROVIDER_FIELDS)\n # Update with custom component\n custom_component = DropdownInput(\n name=\"agent_llm\",\n display_name=\"Language Model\",\n options=[*sorted(MODEL_PROVIDERS), \"Custom\"],\n value=\"Custom\",\n real_time_refresh=True,\n input_types=[\"LanguageModel\"],\n options_metadata=[MODELS_METADATA[key] for key in sorted(MODELS_METADATA.keys())]\n + [{\"icon\": \"brain\"}],\n )\n build_config.update({\"agent_llm\": custom_component.to_dict()})\n # Update input types for all fields\n build_config = self.update_input_types(build_config)\n\n # Validate required keys\n default_keys = [\n \"code\",\n \"_type\",\n \"agent_llm\",\n \"tools\",\n \"input_value\",\n \"add_current_date_tool\",\n \"system_prompt\",\n \"agent_description\",\n \"max_iterations\",\n \"handle_parsing_errors\",\n \"verbose\",\n ]\n missing_keys = [key for key in default_keys if key not in build_config]\n if missing_keys:\n msg = f\"Missing required keys in build_config: {missing_keys}\"\n raise ValueError(msg)\n if (\n isinstance(self.agent_llm, str)\n and self.agent_llm in MODEL_PROVIDERS_DICT\n and field_name in MODEL_DYNAMIC_UPDATE_FIELDS\n ):\n provider_info = MODEL_PROVIDERS_DICT.get(self.agent_llm)\n if provider_info:\n component_class = provider_info.get(\"component_class\")\n component_class = self.set_component_params(component_class)\n prefix = provider_info.get(\"prefix\")\n if component_class and hasattr(component_class, \"update_build_config\"):\n # Call each component class's update_build_config method\n # remove the prefix from the field_name\n if isinstance(field_name, str) and isinstance(prefix, str):\n field_name = field_name.replace(prefix, \"\")\n build_config = await update_component_build_config(\n component_class, build_config, field_value, \"model_name\"\n )\n return dotdict({k: v.to_dict() if hasattr(v, \"to_dict\") else v for k, v in build_config.items()})\n\n async def _get_tools(self) -> list[Tool]:\n component_toolkit = _get_component_toolkit()\n tools_names = self._build_tools_names()\n agent_description = self.get_tool_description()\n # TODO: Agent Description Depreciated Feature to be removed\n description = f\"{agent_description}{tools_names}\"\n tools = component_toolkit(component=self).get_tools(\n tool_name=\"Call_Agent\", tool_description=description, callbacks=self.get_langchain_callbacks()\n )\n if hasattr(self, \"tools_metadata\"):\n tools = component_toolkit(component=self, metadata=self.tools_metadata).update_tools_metadata(tools=tools)\n return tools\n"
},
"handle_parsing_errors": {
"_input_type": "BoolInput",
diff --git a/src/backend/base/langflow/initial_setup/starter_projects/Knowledge Ingestion.json b/src/backend/base/langflow/initial_setup/starter_projects/Knowledge Ingestion.json
index c4ea590da..bdce3a82f 100644
--- a/src/backend/base/langflow/initial_setup/starter_projects/Knowledge Ingestion.json
+++ b/src/backend/base/langflow/initial_setup/starter_projects/Knowledge Ingestion.json
@@ -7,7 +7,7 @@
"data": {
"sourceHandle": {
"dataType": "URLComponent",
- "id": "URLComponent-6JEUC",
+ "id": "URLComponent-WBxJx",
"name": "page_results",
"output_types": [
"DataFrame"
@@ -15,7 +15,7 @@
},
"targetHandle": {
"fieldName": "data_inputs",
- "id": "SplitText-gvHe2",
+ "id": "SplitText-edq28",
"inputTypes": [
"Data",
"DataFrame",
@@ -24,20 +24,19 @@
"type": "other"
}
},
- "id": "reactflow__edge-URLComponent-6JEUC{œdataTypeœ:œURLComponentœ,œidœ:œURLComponent-6JEUCœ,œnameœ:œpage_resultsœ,œoutput_typesœ:[œDataFrameœ]}-SplitText-gvHe2{œfieldNameœ:œdata_inputsœ,œidœ:œSplitText-gvHe2œ,œinputTypesœ:[œDataœ,œDataFrameœ,œMessageœ],œtypeœ:œotherœ}",
+ "id": "reactflow__edge-URLComponent-WBxJx{œdataTypeœ:œURLComponentœ,œidœ:œURLComponent-WBxJxœ,œnameœ:œpage_resultsœ,œoutput_typesœ:[œDataFrameœ]}-SplitText-edq28{œfieldNameœ:œdata_inputsœ,œidœ:œSplitText-edq28œ,œinputTypesœ:[œDataœ,œDataFrameœ,œMessageœ],œtypeœ:œotherœ}",
"selected": false,
- "source": "URLComponent-6JEUC",
- "sourceHandle": "{œdataTypeœ: œURLComponentœ, œidœ: œURLComponent-6JEUCœ, œnameœ: œpage_resultsœ, œoutput_typesœ: [œDataFrameœ]}",
- "target": "SplitText-gvHe2",
- "targetHandle": "{œfieldNameœ: œdata_inputsœ, œidœ: œSplitText-gvHe2œ, œinputTypesœ: [œDataœ, œDataFrameœ, œMessageœ], œtypeœ: œotherœ}"
+ "source": "URLComponent-WBxJx",
+ "sourceHandle": "{œdataTypeœ: œURLComponentœ, œidœ: œURLComponent-WBxJxœ, œnameœ: œpage_resultsœ, œoutput_typesœ: [œDataFrameœ]}",
+ "target": "SplitText-edq28",
+ "targetHandle": "{œfieldNameœ: œdata_inputsœ, œidœ: œSplitText-edq28œ, œinputTypesœ: [œDataœ, œDataFrameœ, œMessageœ], œtypeœ: œotherœ}"
},
{
- "animated": false,
"className": "",
"data": {
"sourceHandle": {
"dataType": "SplitText",
- "id": "SplitText-gvHe2",
+ "id": "SplitText-edq28",
"name": "dataframe",
"output_types": [
"DataFrame"
@@ -45,25 +44,25 @@
},
"targetHandle": {
"fieldName": "input_df",
- "id": "KBIngestion-jj5iW",
+ "id": "KnowledgeIngestion-uSOy6",
"inputTypes": [
+ "Data",
"DataFrame"
],
"type": "other"
}
},
- "id": "xy-edge__SplitText-gvHe2{œdataTypeœ:œSplitTextœ,œidœ:œSplitText-gvHe2œ,œnameœ:œdataframeœ,œoutput_typesœ:[œDataFrameœ]}-KBIngestion-jj5iW{œfieldNameœ:œinput_dfœ,œidœ:œKBIngestion-jj5iWœ,œinputTypesœ:[œDataFrameœ],œtypeœ:œotherœ}",
- "selected": false,
- "source": "SplitText-gvHe2",
- "sourceHandle": "{œdataTypeœ: œSplitTextœ, œidœ: œSplitText-gvHe2œ, œnameœ: œdataframeœ, œoutput_typesœ: [œDataFrameœ]}",
- "target": "KBIngestion-jj5iW",
- "targetHandle": "{œfieldNameœ: œinput_dfœ, œidœ: œKBIngestion-jj5iWœ, œinputTypesœ: [œDataFrameœ], œtypeœ: œotherœ}"
+ "id": "xy-edge__SplitText-edq28{œdataTypeœ:œSplitTextœ,œidœ:œSplitText-edq28œ,œnameœ:œdataframeœ,œoutput_typesœ:[œDataFrameœ]}-KnowledgeIngestion-uSOy6{œfieldNameœ:œinput_dfœ,œidœ:œKnowledgeIngestion-uSOy6œ,œinputTypesœ:[œDataœ,œDataFrameœ],œtypeœ:œotherœ}",
+ "source": "SplitText-edq28",
+ "sourceHandle": "{œdataTypeœ: œSplitTextœ, œidœ: œSplitText-edq28œ, œnameœ: œdataframeœ, œoutput_typesœ: [œDataFrameœ]}",
+ "target": "KnowledgeIngestion-uSOy6",
+ "targetHandle": "{œfieldNameœ: œinput_dfœ, œidœ: œKnowledgeIngestion-uSOy6œ, œinputTypesœ: [œDataœ, œDataFrameœ], œtypeœ: œotherœ}"
}
],
"nodes": [
{
"data": {
- "id": "SplitText-gvHe2",
+ "id": "SplitText-edq28",
"node": {
"base_classes": [
"DataFrame"
@@ -280,7 +279,7 @@
"type": "SplitText"
},
"dragging": false,
- "id": "SplitText-gvHe2",
+ "id": "SplitText-edq28",
"measured": {
"height": 413,
"width": 320
@@ -294,7 +293,7 @@
},
{
"data": {
- "id": "note-bpWz8",
+ "id": "note-httIY",
"node": {
"description": "## Knowledge Ingestion\n\nThis flow shows the basics of the creation and ingestion of knowledge bases in Langflow. Here we use the `URL` component to dynamically fetch page data from the Langflow website, split it into chunks of 100 tokens, then ingest into a Knowledge Base.\n\n1. (Optional) Change the URL or switch to a different input data source as desired.\n2. (Optional) Adjust the Chunk Size as desired.\n3. Select or Create a new knowledge base.\n4. Ensure the column you wish to Vectorize is properly reflected in the Column Configuration table.",
"display_name": "",
@@ -305,7 +304,7 @@
},
"dragging": false,
"height": 401,
- "id": "note-bpWz8",
+ "id": "note-httIY",
"measured": {
"height": 401,
"width": 388
@@ -315,13 +314,13 @@
"y": 75.97023827444744
},
"resizing": false,
- "selected": true,
+ "selected": false,
"type": "noteNode",
"width": 388
},
{
"data": {
- "id": "URLComponent-6JEUC",
+ "id": "URLComponent-WBxJx",
"node": {
"base_classes": [
"DataFrame",
@@ -697,7 +696,7 @@
"type": "URLComponent"
},
"dragging": false,
- "id": "URLComponent-6JEUC",
+ "id": "URLComponent-WBxJx",
"measured": {
"height": 292,
"width": 320
@@ -711,7 +710,7 @@
},
{
"data": {
- "id": "KBIngestion-jj5iW",
+ "id": "KnowledgeIngestion-uSOy6",
"node": {
"base_classes": [
"Data"
@@ -732,11 +731,11 @@
"allow_duplicates"
],
"frozen": false,
- "icon": "database",
- "last_updated": "2025-08-13T19:45:49.122Z",
+ "icon": "upload",
+ "last_updated": "2025-08-26T18:21:03.358Z",
"legacy": false,
"metadata": {
- "code_hash": "6c62063f2c09",
+ "code_hash": "ce9549373934",
"dependencies": {
"dependencies": [
{
@@ -774,7 +773,7 @@
],
"total_dependencies": 8
},
- "module": "langflow.components.data.kb_ingest.KBIngestionComponent"
+ "module": "langflow.components.knowledge_bases.ingestion.KnowledgeIngestionComponent"
},
"minimized": false,
"output_types": [],
@@ -782,10 +781,10 @@
{
"allows_loop": false,
"cache": true,
- "display_name": "DataFrame",
+ "display_name": "Results",
"group_outputs": false,
"method": "build_kb_info",
- "name": "dataframe",
+ "name": "dataframe_output",
"selected": "Data",
"tool_mode": true,
"types": [
@@ -866,7 +865,7 @@
"show": true,
"title_case": false,
"type": "code",
- "value": "from __future__ import annotations\n\nimport asyncio\nimport contextlib\nimport hashlib\nimport json\nimport re\nimport uuid\nfrom dataclasses import asdict, dataclass, field\nfrom datetime import datetime, timezone\nfrom pathlib import Path\nfrom typing import Any\n\nimport pandas as pd\nfrom cryptography.fernet import InvalidToken\nfrom langchain_chroma import Chroma\nfrom loguru import logger\n\nfrom langflow.base.data.kb_utils import get_knowledge_bases\nfrom langflow.base.models.openai_constants import OPENAI_EMBEDDING_MODEL_NAMES\nfrom langflow.custom import Component\nfrom langflow.io import BoolInput, DataFrameInput, DropdownInput, IntInput, Output, SecretStrInput, StrInput, TableInput\nfrom langflow.schema.data import Data\nfrom langflow.schema.dotdict import dotdict # noqa: TC001\nfrom langflow.schema.table import EditMode\nfrom langflow.services.auth.utils import decrypt_api_key, encrypt_api_key\nfrom langflow.services.database.models.user.crud import get_user_by_id\nfrom langflow.services.deps import get_settings_service, get_variable_service, session_scope\n\nHUGGINGFACE_MODEL_NAMES = [\"sentence-transformers/all-MiniLM-L6-v2\", \"sentence-transformers/all-mpnet-base-v2\"]\nCOHERE_MODEL_NAMES = [\"embed-english-v3.0\", \"embed-multilingual-v3.0\"]\n\nsettings = get_settings_service().settings\nknowledge_directory = settings.knowledge_bases_dir\nif not knowledge_directory:\n msg = \"Knowledge bases directory is not set in the settings.\"\n raise ValueError(msg)\nKNOWLEDGE_BASES_ROOT_PATH = Path(knowledge_directory).expanduser()\n\n\nclass KBIngestionComponent(Component):\n \"\"\"Create or append to Langflow Knowledge from a DataFrame.\"\"\"\n\n # ------ UI metadata ---------------------------------------------------\n display_name = \"Knowledge Ingestion\"\n description = \"Create or update knowledge in Langflow.\"\n icon = \"database\"\n name = \"KBIngestion\"\n\n def __init__(self, *args, **kwargs) -> None:\n super().__init__(*args, **kwargs)\n self._cached_kb_path: Path | None = None\n\n @dataclass\n class NewKnowledgeBaseInput:\n functionality: str = \"create\"\n fields: dict[str, dict] = field(\n default_factory=lambda: {\n \"data\": {\n \"node\": {\n \"name\": \"create_knowledge_base\",\n \"description\": \"Create new knowledge in Langflow.\",\n \"display_name\": \"Create new knowledge\",\n \"field_order\": [\"01_new_kb_name\", \"02_embedding_model\", \"03_api_key\"],\n \"template\": {\n \"01_new_kb_name\": StrInput(\n name=\"new_kb_name\",\n display_name=\"Knowledge Name\",\n info=\"Name of the new knowledge to create.\",\n required=True,\n ),\n \"02_embedding_model\": DropdownInput(\n name=\"embedding_model\",\n display_name=\"Model Name\",\n info=\"Select the embedding model to use for this knowledge base.\",\n required=True,\n options=OPENAI_EMBEDDING_MODEL_NAMES + HUGGINGFACE_MODEL_NAMES + COHERE_MODEL_NAMES,\n options_metadata=[{\"icon\": \"OpenAI\"} for _ in OPENAI_EMBEDDING_MODEL_NAMES]\n + [{\"icon\": \"HuggingFace\"} for _ in HUGGINGFACE_MODEL_NAMES]\n + [{\"icon\": \"Cohere\"} for _ in COHERE_MODEL_NAMES],\n ),\n \"03_api_key\": SecretStrInput(\n name=\"api_key\",\n display_name=\"API Key\",\n info=\"Provider API key for embedding model\",\n required=True,\n load_from_db=False,\n ),\n },\n },\n }\n }\n )\n\n # ------ Inputs --------------------------------------------------------\n inputs = [\n DropdownInput(\n name=\"knowledge_base\",\n display_name=\"Knowledge\",\n info=\"Select the knowledge to load data from.\",\n required=True,\n options=[],\n refresh_button=True,\n dialog_inputs=asdict(NewKnowledgeBaseInput()),\n ),\n DataFrameInput(\n name=\"input_df\",\n display_name=\"Data\",\n info=\"Table with all original columns (already chunked / processed).\",\n required=True,\n ),\n TableInput(\n name=\"column_config\",\n display_name=\"Column Configuration\",\n info=\"Configure column behavior for the knowledge base.\",\n required=True,\n table_schema=[\n {\n \"name\": \"column_name\",\n \"display_name\": \"Column Name\",\n \"type\": \"str\",\n \"description\": \"Name of the column in the source DataFrame\",\n \"edit_mode\": EditMode.INLINE,\n },\n {\n \"name\": \"vectorize\",\n \"display_name\": \"Vectorize\",\n \"type\": \"boolean\",\n \"description\": \"Create embeddings for this column\",\n \"default\": False,\n \"edit_mode\": EditMode.INLINE,\n },\n {\n \"name\": \"identifier\",\n \"display_name\": \"Identifier\",\n \"type\": \"boolean\",\n \"description\": \"Use this column as unique identifier\",\n \"default\": False,\n \"edit_mode\": EditMode.INLINE,\n },\n ],\n value=[\n {\n \"column_name\": \"text\",\n \"vectorize\": True,\n \"identifier\": True,\n },\n ],\n ),\n IntInput(\n name=\"chunk_size\",\n display_name=\"Chunk Size\",\n info=\"Batch size for processing embeddings\",\n advanced=True,\n value=1000,\n ),\n SecretStrInput(\n name=\"api_key\",\n display_name=\"Embedding Provider API Key\",\n info=\"API key for the embedding provider to generate embeddings.\",\n advanced=True,\n required=False,\n ),\n BoolInput(\n name=\"allow_duplicates\",\n display_name=\"Allow Duplicates\",\n info=\"Allow duplicate rows in the knowledge base\",\n advanced=True,\n value=False,\n ),\n ]\n\n # ------ Outputs -------------------------------------------------------\n outputs = [Output(display_name=\"DataFrame\", name=\"dataframe\", method=\"build_kb_info\")]\n\n # ------ Internal helpers ---------------------------------------------\n def _get_kb_root(self) -> Path:\n \"\"\"Return the root directory for knowledge bases.\"\"\"\n return KNOWLEDGE_BASES_ROOT_PATH\n\n def _validate_column_config(self, df_source: pd.DataFrame) -> list[dict[str, Any]]:\n \"\"\"Validate column configuration using Structured Output patterns.\"\"\"\n if not self.column_config:\n msg = \"Column configuration cannot be empty\"\n raise ValueError(msg)\n\n # Convert table input to list of dicts (similar to Structured Output)\n config_list = self.column_config if isinstance(self.column_config, list) else []\n\n # Validate column names exist in DataFrame\n df_columns = set(df_source.columns)\n for config in config_list:\n col_name = config.get(\"column_name\")\n if col_name not in df_columns:\n msg = f\"Column '{col_name}' not found in DataFrame. Available columns: {sorted(df_columns)}\"\n raise ValueError(msg)\n\n return config_list\n\n def _get_embedding_provider(self, embedding_model: str) -> str:\n \"\"\"Get embedding provider by matching model name to lists.\"\"\"\n if embedding_model in OPENAI_EMBEDDING_MODEL_NAMES:\n return \"OpenAI\"\n if embedding_model in HUGGINGFACE_MODEL_NAMES:\n return \"HuggingFace\"\n if embedding_model in COHERE_MODEL_NAMES:\n return \"Cohere\"\n return \"Custom\"\n\n def _build_embeddings(self, embedding_model: str, api_key: str):\n \"\"\"Build embedding model using provider patterns.\"\"\"\n # Get provider by matching model name to lists\n provider = self._get_embedding_provider(embedding_model)\n\n # Validate provider and model\n if provider == \"OpenAI\":\n from langchain_openai import OpenAIEmbeddings\n\n if not api_key:\n msg = \"OpenAI API key is required when using OpenAI provider\"\n raise ValueError(msg)\n return OpenAIEmbeddings(\n model=embedding_model,\n api_key=api_key,\n chunk_size=self.chunk_size,\n )\n if provider == \"HuggingFace\":\n from langchain_huggingface import HuggingFaceEmbeddings\n\n return HuggingFaceEmbeddings(\n model=embedding_model,\n )\n if provider == \"Cohere\":\n from langchain_cohere import CohereEmbeddings\n\n if not api_key:\n msg = \"Cohere API key is required when using Cohere provider\"\n raise ValueError(msg)\n return CohereEmbeddings(\n model=embedding_model,\n cohere_api_key=api_key,\n )\n if provider == \"Custom\":\n # For custom embedding models, we would need additional configuration\n msg = \"Custom embedding models not yet supported\"\n raise NotImplementedError(msg)\n msg = f\"Unknown provider: {provider}\"\n raise ValueError(msg)\n\n def _build_embedding_metadata(self, embedding_model, api_key) -> dict[str, Any]:\n \"\"\"Build embedding model metadata.\"\"\"\n # Get provider by matching model name to lists\n embedding_provider = self._get_embedding_provider(embedding_model)\n\n api_key_to_save = None\n if api_key and hasattr(api_key, \"get_secret_value\"):\n api_key_to_save = api_key.get_secret_value()\n elif isinstance(api_key, str):\n api_key_to_save = api_key\n\n encrypted_api_key = None\n if api_key_to_save:\n settings_service = get_settings_service()\n try:\n encrypted_api_key = encrypt_api_key(api_key_to_save, settings_service=settings_service)\n except (TypeError, ValueError) as e:\n self.log(f\"Could not encrypt API key: {e}\")\n logger.error(f\"Could not encrypt API key: {e}\")\n\n return {\n \"embedding_provider\": embedding_provider,\n \"embedding_model\": embedding_model,\n \"api_key\": encrypted_api_key,\n \"api_key_used\": bool(api_key),\n \"chunk_size\": self.chunk_size,\n \"created_at\": datetime.now(timezone.utc).isoformat(),\n }\n\n def _save_embedding_metadata(self, kb_path: Path, embedding_model: str, api_key: str) -> None:\n \"\"\"Save embedding model metadata.\"\"\"\n embedding_metadata = self._build_embedding_metadata(embedding_model, api_key)\n metadata_path = kb_path / \"embedding_metadata.json\"\n metadata_path.write_text(json.dumps(embedding_metadata, indent=2))\n\n def _save_kb_files(\n self,\n kb_path: Path,\n config_list: list[dict[str, Any]],\n ) -> None:\n \"\"\"Save KB files using File Component storage patterns.\"\"\"\n try:\n # Create directory (following File Component patterns)\n kb_path.mkdir(parents=True, exist_ok=True)\n\n # Save column configuration\n # Only do this if the file doesn't exist already\n cfg_path = kb_path / \"schema.json\"\n if not cfg_path.exists():\n cfg_path.write_text(json.dumps(config_list, indent=2))\n\n except (OSError, TypeError, ValueError) as e:\n self.log(f\"Error saving KB files: {e}\")\n\n def _build_column_metadata(self, config_list: list[dict[str, Any]], df_source: pd.DataFrame) -> dict[str, Any]:\n \"\"\"Build detailed column metadata.\"\"\"\n metadata: dict[str, Any] = {\n \"total_columns\": len(df_source.columns),\n \"mapped_columns\": len(config_list),\n \"unmapped_columns\": len(df_source.columns) - len(config_list),\n \"columns\": [],\n \"summary\": {\"vectorized_columns\": [], \"identifier_columns\": []},\n }\n\n for config in config_list:\n col_name = config.get(\"column_name\")\n vectorize = config.get(\"vectorize\") == \"True\" or config.get(\"vectorize\") is True\n identifier = config.get(\"identifier\") == \"True\" or config.get(\"identifier\") is True\n\n # Add to columns list\n metadata[\"columns\"].append(\n {\n \"name\": col_name,\n \"vectorize\": vectorize,\n \"identifier\": identifier,\n }\n )\n\n # Update summary\n if vectorize:\n metadata[\"summary\"][\"vectorized_columns\"].append(col_name)\n if identifier:\n metadata[\"summary\"][\"identifier_columns\"].append(col_name)\n\n return metadata\n\n async def _create_vector_store(\n self, df_source: pd.DataFrame, config_list: list[dict[str, Any]], embedding_model: str, api_key: str\n ) -> None:\n \"\"\"Create vector store following Local DB component pattern.\"\"\"\n try:\n # Set up vector store directory\n vector_store_dir = await self._kb_path()\n if not vector_store_dir:\n msg = \"Knowledge base path is not set. Please create a new knowledge base first.\"\n raise ValueError(msg)\n vector_store_dir.mkdir(parents=True, exist_ok=True)\n\n # Create embeddings model\n embedding_function = self._build_embeddings(embedding_model, api_key)\n\n # Convert DataFrame to Data objects (following Local DB pattern)\n data_objects = await self._convert_df_to_data_objects(df_source, config_list)\n\n # Create vector store\n chroma = Chroma(\n persist_directory=str(vector_store_dir),\n embedding_function=embedding_function,\n collection_name=self.knowledge_base,\n )\n\n # Convert Data objects to LangChain Documents\n documents = []\n for data_obj in data_objects:\n doc = data_obj.to_lc_document()\n documents.append(doc)\n\n # Add documents to vector store\n if documents:\n chroma.add_documents(documents)\n self.log(f\"Added {len(documents)} documents to vector store '{self.knowledge_base}'\")\n\n except (OSError, ValueError, RuntimeError) as e:\n self.log(f\"Error creating vector store: {e}\")\n\n async def _convert_df_to_data_objects(\n self, df_source: pd.DataFrame, config_list: list[dict[str, Any]]\n ) -> list[Data]:\n \"\"\"Convert DataFrame to Data objects for vector store.\"\"\"\n data_objects: list[Data] = []\n\n # Set up vector store directory\n kb_path = await self._kb_path()\n\n # If we don't allow duplicates, we need to get the existing hashes\n chroma = Chroma(\n persist_directory=str(kb_path),\n collection_name=self.knowledge_base,\n )\n\n # Get all documents and their metadata\n all_docs = chroma.get()\n\n # Extract all _id values from metadata\n id_list = [metadata.get(\"_id\") for metadata in all_docs[\"metadatas\"] if metadata.get(\"_id\")]\n\n # Get column roles\n content_cols = []\n identifier_cols = []\n\n for config in config_list:\n col_name = config.get(\"column_name\")\n vectorize = config.get(\"vectorize\") == \"True\" or config.get(\"vectorize\") is True\n identifier = config.get(\"identifier\") == \"True\" or config.get(\"identifier\") is True\n\n if vectorize:\n content_cols.append(col_name)\n elif identifier:\n identifier_cols.append(col_name)\n\n # Convert each row to a Data object\n for _, row in df_source.iterrows():\n # Build content text from identifier columns using list comprehension\n identifier_parts = [str(row[col]) for col in content_cols if col in row and pd.notna(row[col])]\n\n # Join all parts into a single string\n page_content = \" \".join(identifier_parts)\n\n # Build metadata from NON-vectorized columns only (simple key-value pairs)\n data_dict = {\n \"text\": page_content, # Main content for vectorization\n }\n\n # Add identifier columns if they exist\n if identifier_cols:\n identifier_parts = [str(row[col]) for col in identifier_cols if col in row and pd.notna(row[col])]\n page_content = \" \".join(identifier_parts)\n\n # Add metadata columns as simple key-value pairs\n for col in df_source.columns:\n if col not in content_cols and col in row and pd.notna(row[col]):\n # Convert to simple types for Chroma metadata\n value = row[col]\n data_dict[col] = str(value) # Convert complex types to string\n\n # Hash the page_content for unique ID\n page_content_hash = hashlib.sha256(page_content.encode()).hexdigest()\n data_dict[\"_id\"] = page_content_hash\n\n # If duplicates are disallowed, and hash exists, prevent adding this row\n if not self.allow_duplicates and page_content_hash in id_list:\n self.log(f\"Skipping duplicate row with hash {page_content_hash}\")\n continue\n\n # Create Data object - everything except \"text\" becomes metadata\n data_obj = Data(data=data_dict)\n data_objects.append(data_obj)\n\n return data_objects\n\n def is_valid_collection_name(self, name, min_length: int = 3, max_length: int = 63) -> bool:\n \"\"\"Validates collection name against conditions 1-3.\n\n 1. Contains 3-63 characters\n 2. Starts and ends with alphanumeric character\n 3. Contains only alphanumeric characters, underscores, or hyphens.\n\n Args:\n name (str): Collection name to validate\n min_length (int): Minimum length of the name\n max_length (int): Maximum length of the name\n\n Returns:\n bool: True if valid, False otherwise\n \"\"\"\n # Check length (condition 1)\n if not (min_length <= len(name) <= max_length):\n return False\n\n # Check start/end with alphanumeric (condition 2)\n if not (name[0].isalnum() and name[-1].isalnum()):\n return False\n\n # Check allowed characters (condition 3)\n return re.match(r\"^[a-zA-Z0-9_-]+$\", name) is not None\n\n async def _kb_path(self) -> Path | None:\n # Check if we already have the path cached\n cached_path = getattr(self, \"_cached_kb_path\", None)\n if cached_path is not None:\n return cached_path\n\n # If not cached, compute it\n async with session_scope() as db:\n if not self.user_id:\n msg = \"User ID is required for fetching knowledge base path.\"\n raise ValueError(msg)\n current_user = await get_user_by_id(db, self.user_id)\n if not current_user:\n msg = f\"User with ID {self.user_id} not found.\"\n raise ValueError(msg)\n kb_user = current_user.username\n\n kb_root = self._get_kb_root()\n\n # Cache the result\n self._cached_kb_path = kb_root / kb_user / self.knowledge_base\n\n return self._cached_kb_path\n\n # ---------------------------------------------------------------------\n # OUTPUT METHODS\n # ---------------------------------------------------------------------\n async def build_kb_info(self) -> Data:\n \"\"\"Main ingestion routine → returns a dict with KB metadata.\"\"\"\n try:\n # Get source DataFrame\n df_source: pd.DataFrame = self.input_df\n\n # Validate column configuration (using Structured Output patterns)\n config_list = self._validate_column_config(df_source)\n column_metadata = self._build_column_metadata(config_list, df_source)\n\n # Read the embedding info from the knowledge base folder\n kb_path = await self._kb_path()\n if not kb_path:\n msg = \"Knowledge base path is not set. Please create a new knowledge base first.\"\n raise ValueError(msg)\n metadata_path = kb_path / \"embedding_metadata.json\"\n\n # If the API key is not provided, try to read it from the metadata file\n if metadata_path.exists():\n settings_service = get_settings_service()\n metadata = json.loads(metadata_path.read_text())\n embedding_model = metadata.get(\"embedding_model\")\n try:\n api_key = decrypt_api_key(metadata[\"api_key\"], settings_service)\n except (InvalidToken, TypeError, ValueError) as e:\n logger.error(f\"Could not decrypt API key. Please provide it manually. Error: {e}\")\n\n # Check if a custom API key was provided, update metadata if so\n if self.api_key:\n api_key = self.api_key\n self._save_embedding_metadata(\n kb_path=kb_path,\n embedding_model=embedding_model,\n api_key=api_key,\n )\n\n # Create vector store following Local DB component pattern\n await self._create_vector_store(df_source, config_list, embedding_model=embedding_model, api_key=api_key)\n\n # Save KB files (using File Component storage patterns)\n self._save_kb_files(kb_path, config_list)\n\n # Build metadata response\n meta: dict[str, Any] = {\n \"kb_id\": str(uuid.uuid4()),\n \"kb_name\": self.knowledge_base,\n \"rows\": len(df_source),\n \"column_metadata\": column_metadata,\n \"path\": str(kb_path),\n \"config_columns\": len(config_list),\n \"timestamp\": datetime.now(tz=timezone.utc).isoformat(),\n }\n\n # Set status message\n self.status = f\"✅ KB **{self.knowledge_base}** saved · {len(df_source)} chunks.\"\n\n return Data(data=meta)\n\n except (OSError, ValueError, RuntimeError, KeyError) as e:\n self.log(f\"Error in KB ingestion: {e}\")\n self.status = f\"❌ KB ingestion failed: {e}\"\n return Data(data={\"error\": str(e), \"kb_name\": self.knowledge_base})\n\n async def _get_api_key_variable(self, field_value: dict[str, Any]):\n async with session_scope() as db:\n if not self.user_id:\n msg = \"User ID is required for fetching global variables.\"\n raise ValueError(msg)\n current_user = await get_user_by_id(db, self.user_id)\n if not current_user:\n msg = f\"User with ID {self.user_id} not found.\"\n raise ValueError(msg)\n variable_service = get_variable_service()\n\n # Process the api_key field variable\n return await variable_service.get_variable(\n user_id=current_user.id,\n name=field_value[\"03_api_key\"],\n field=\"\",\n session=db,\n )\n\n async def update_build_config(\n self,\n build_config: dotdict,\n field_value: Any,\n field_name: str | None = None,\n ) -> dotdict:\n \"\"\"Update build configuration based on provider selection.\"\"\"\n # Create a new knowledge base\n if field_name == \"knowledge_base\":\n async with session_scope() as db:\n if not self.user_id:\n msg = \"User ID is required for fetching knowledge base list.\"\n raise ValueError(msg)\n current_user = await get_user_by_id(db, self.user_id)\n if not current_user:\n msg = f\"User with ID {self.user_id} not found.\"\n raise ValueError(msg)\n kb_user = current_user.username\n if isinstance(field_value, dict) and \"01_new_kb_name\" in field_value:\n # Validate the knowledge base name - Make sure it follows these rules:\n if not self.is_valid_collection_name(field_value[\"01_new_kb_name\"]):\n msg = f\"Invalid knowledge base name: {field_value['01_new_kb_name']}\"\n raise ValueError(msg)\n\n api_key = field_value.get(\"03_api_key\", None)\n with contextlib.suppress(Exception):\n # If the API key is a variable, resolve it\n api_key = await self._get_api_key_variable(field_value)\n\n # Make sure api_key is a string\n if not isinstance(api_key, str):\n msg = \"API key must be a string.\"\n raise ValueError(msg)\n\n # We need to test the API Key one time against the embedding model\n embed_model = self._build_embeddings(embedding_model=field_value[\"02_embedding_model\"], api_key=api_key)\n\n # Try to generate a dummy embedding to validate the API key without blocking the event loop\n try:\n await asyncio.wait_for(\n asyncio.to_thread(embed_model.embed_query, \"test\"),\n timeout=10,\n )\n except TimeoutError as e:\n msg = \"Embedding validation timed out. Please verify network connectivity and key.\"\n raise ValueError(msg) from e\n except Exception as e:\n msg = f\"Embedding validation failed: {e!s}\"\n raise ValueError(msg) from e\n\n # Create the new knowledge base directory\n kb_path = KNOWLEDGE_BASES_ROOT_PATH / kb_user / field_value[\"01_new_kb_name\"]\n kb_path.mkdir(parents=True, exist_ok=True)\n\n # Save the embedding metadata\n build_config[\"knowledge_base\"][\"value\"] = field_value[\"01_new_kb_name\"]\n self._save_embedding_metadata(\n kb_path=kb_path,\n embedding_model=field_value[\"02_embedding_model\"],\n api_key=api_key,\n )\n\n # Update the knowledge base options dynamically\n build_config[\"knowledge_base\"][\"options\"] = await get_knowledge_bases(\n KNOWLEDGE_BASES_ROOT_PATH,\n user_id=self.user_id,\n )\n\n # If the selected knowledge base is not available, reset it\n if build_config[\"knowledge_base\"][\"value\"] not in build_config[\"knowledge_base\"][\"options\"]:\n build_config[\"knowledge_base\"][\"value\"] = None\n\n return build_config\n"
+ "value": "from __future__ import annotations\n\nimport asyncio\nimport contextlib\nimport hashlib\nimport json\nimport re\nimport uuid\nfrom dataclasses import asdict, dataclass, field\nfrom datetime import datetime, timezone\nfrom pathlib import Path\nfrom typing import TYPE_CHECKING, Any\n\nimport pandas as pd\nfrom cryptography.fernet import InvalidToken\nfrom langchain_chroma import Chroma\nfrom loguru import logger\n\nfrom langflow.base.knowledge_bases.knowledge_base_utils import get_knowledge_bases\nfrom langflow.base.models.openai_constants import OPENAI_EMBEDDING_MODEL_NAMES\nfrom langflow.components.processing.converter import convert_to_dataframe\nfrom langflow.custom import Component\nfrom langflow.io import BoolInput, DropdownInput, HandleInput, IntInput, Output, SecretStrInput, StrInput, TableInput\nfrom langflow.schema.data import Data\nfrom langflow.schema.dotdict import dotdict # noqa: TC001\nfrom langflow.schema.table import EditMode\nfrom langflow.services.auth.utils import decrypt_api_key, encrypt_api_key\nfrom langflow.services.database.models.user.crud import get_user_by_id\nfrom langflow.services.deps import get_settings_service, get_variable_service, session_scope\n\nif TYPE_CHECKING:\n from langflow.schema.dataframe import DataFrame\n\nHUGGINGFACE_MODEL_NAMES = [\"sentence-transformers/all-MiniLM-L6-v2\", \"sentence-transformers/all-mpnet-base-v2\"]\nCOHERE_MODEL_NAMES = [\"embed-english-v3.0\", \"embed-multilingual-v3.0\"]\n\nsettings = get_settings_service().settings\nknowledge_directory = settings.knowledge_bases_dir\nif not knowledge_directory:\n msg = \"Knowledge bases directory is not set in the settings.\"\n raise ValueError(msg)\nKNOWLEDGE_BASES_ROOT_PATH = Path(knowledge_directory).expanduser()\n\n\nclass KnowledgeIngestionComponent(Component):\n \"\"\"Create or append to Langflow Knowledge from a DataFrame.\"\"\"\n\n # ------ UI metadata ---------------------------------------------------\n display_name = \"Knowledge Ingestion\"\n description = \"Create or update knowledge in Langflow.\"\n icon = \"upload\"\n name = \"KnowledgeIngestion\"\n\n def __init__(self, *args, **kwargs) -> None:\n super().__init__(*args, **kwargs)\n self._cached_kb_path: Path | None = None\n\n @dataclass\n class NewKnowledgeBaseInput:\n functionality: str = \"create\"\n fields: dict[str, dict] = field(\n default_factory=lambda: {\n \"data\": {\n \"node\": {\n \"name\": \"create_knowledge_base\",\n \"description\": \"Create new knowledge in Langflow.\",\n \"display_name\": \"Create new knowledge\",\n \"field_order\": [\"01_new_kb_name\", \"02_embedding_model\", \"03_api_key\"],\n \"template\": {\n \"01_new_kb_name\": StrInput(\n name=\"new_kb_name\",\n display_name=\"Knowledge Name\",\n info=\"Name of the new knowledge to create.\",\n required=True,\n ),\n \"02_embedding_model\": DropdownInput(\n name=\"embedding_model\",\n display_name=\"Model Name\",\n info=\"Select the embedding model to use for this knowledge base.\",\n required=True,\n options=OPENAI_EMBEDDING_MODEL_NAMES + HUGGINGFACE_MODEL_NAMES + COHERE_MODEL_NAMES,\n options_metadata=[{\"icon\": \"OpenAI\"} for _ in OPENAI_EMBEDDING_MODEL_NAMES]\n + [{\"icon\": \"HuggingFace\"} for _ in HUGGINGFACE_MODEL_NAMES]\n + [{\"icon\": \"Cohere\"} for _ in COHERE_MODEL_NAMES],\n ),\n \"03_api_key\": SecretStrInput(\n name=\"api_key\",\n display_name=\"API Key\",\n info=\"Provider API key for embedding model\",\n required=True,\n load_from_db=False,\n ),\n },\n },\n }\n }\n )\n\n # ------ Inputs --------------------------------------------------------\n inputs = [\n DropdownInput(\n name=\"knowledge_base\",\n display_name=\"Knowledge\",\n info=\"Select the knowledge to load data from.\",\n required=True,\n options=[],\n refresh_button=True,\n real_time_refresh=True,\n dialog_inputs=asdict(NewKnowledgeBaseInput()),\n ),\n HandleInput(\n name=\"input_df\",\n display_name=\"Input\",\n info=(\n \"Table with all original columns (already chunked / processed). \"\n \"Accepts Data or DataFrame. If Data is provided, it is converted to a DataFrame automatically.\"\n ),\n input_types=[\"Data\", \"DataFrame\"],\n required=True,\n ),\n TableInput(\n name=\"column_config\",\n display_name=\"Column Configuration\",\n info=\"Configure column behavior for the knowledge base.\",\n required=True,\n table_schema=[\n {\n \"name\": \"column_name\",\n \"display_name\": \"Column Name\",\n \"type\": \"str\",\n \"description\": \"Name of the column in the source DataFrame\",\n \"edit_mode\": EditMode.INLINE,\n },\n {\n \"name\": \"vectorize\",\n \"display_name\": \"Vectorize\",\n \"type\": \"boolean\",\n \"description\": \"Create embeddings for this column\",\n \"default\": False,\n \"edit_mode\": EditMode.INLINE,\n },\n {\n \"name\": \"identifier\",\n \"display_name\": \"Identifier\",\n \"type\": \"boolean\",\n \"description\": \"Use this column as unique identifier\",\n \"default\": False,\n \"edit_mode\": EditMode.INLINE,\n },\n ],\n value=[\n {\n \"column_name\": \"text\",\n \"vectorize\": True,\n \"identifier\": True,\n },\n ],\n ),\n IntInput(\n name=\"chunk_size\",\n display_name=\"Chunk Size\",\n info=\"Batch size for processing embeddings\",\n advanced=True,\n value=1000,\n ),\n SecretStrInput(\n name=\"api_key\",\n display_name=\"Embedding Provider API Key\",\n info=\"API key for the embedding provider to generate embeddings.\",\n advanced=True,\n required=False,\n ),\n BoolInput(\n name=\"allow_duplicates\",\n display_name=\"Allow Duplicates\",\n info=\"Allow duplicate rows in the knowledge base\",\n advanced=True,\n value=False,\n ),\n ]\n\n # ------ Outputs -------------------------------------------------------\n outputs = [Output(display_name=\"Results\", name=\"dataframe_output\", method=\"build_kb_info\")]\n\n # ------ Internal helpers ---------------------------------------------\n def _get_kb_root(self) -> Path:\n \"\"\"Return the root directory for knowledge bases.\"\"\"\n return KNOWLEDGE_BASES_ROOT_PATH\n\n def _validate_column_config(self, df_source: pd.DataFrame) -> list[dict[str, Any]]:\n \"\"\"Validate column configuration using Structured Output patterns.\"\"\"\n if not self.column_config:\n msg = \"Column configuration cannot be empty\"\n raise ValueError(msg)\n\n # Convert table input to list of dicts (similar to Structured Output)\n config_list = self.column_config if isinstance(self.column_config, list) else []\n\n # Validate column names exist in DataFrame\n df_columns = set(df_source.columns)\n for config in config_list:\n col_name = config.get(\"column_name\")\n if col_name not in df_columns:\n msg = f\"Column '{col_name}' not found in DataFrame. Available columns: {sorted(df_columns)}\"\n raise ValueError(msg)\n\n return config_list\n\n def _get_embedding_provider(self, embedding_model: str) -> str:\n \"\"\"Get embedding provider by matching model name to lists.\"\"\"\n if embedding_model in OPENAI_EMBEDDING_MODEL_NAMES:\n return \"OpenAI\"\n if embedding_model in HUGGINGFACE_MODEL_NAMES:\n return \"HuggingFace\"\n if embedding_model in COHERE_MODEL_NAMES:\n return \"Cohere\"\n return \"Custom\"\n\n def _build_embeddings(self, embedding_model: str, api_key: str):\n \"\"\"Build embedding model using provider patterns.\"\"\"\n # Get provider by matching model name to lists\n provider = self._get_embedding_provider(embedding_model)\n\n # Validate provider and model\n if provider == \"OpenAI\":\n from langchain_openai import OpenAIEmbeddings\n\n if not api_key:\n msg = \"OpenAI API key is required when using OpenAI provider\"\n raise ValueError(msg)\n return OpenAIEmbeddings(\n model=embedding_model,\n api_key=api_key,\n chunk_size=self.chunk_size,\n )\n if provider == \"HuggingFace\":\n from langchain_huggingface import HuggingFaceEmbeddings\n\n return HuggingFaceEmbeddings(\n model=embedding_model,\n )\n if provider == \"Cohere\":\n from langchain_cohere import CohereEmbeddings\n\n if not api_key:\n msg = \"Cohere API key is required when using Cohere provider\"\n raise ValueError(msg)\n return CohereEmbeddings(\n model=embedding_model,\n cohere_api_key=api_key,\n )\n if provider == \"Custom\":\n # For custom embedding models, we would need additional configuration\n msg = \"Custom embedding models not yet supported\"\n raise NotImplementedError(msg)\n msg = f\"Unknown provider: {provider}\"\n raise ValueError(msg)\n\n def _build_embedding_metadata(self, embedding_model, api_key) -> dict[str, Any]:\n \"\"\"Build embedding model metadata.\"\"\"\n # Get provider by matching model name to lists\n embedding_provider = self._get_embedding_provider(embedding_model)\n\n api_key_to_save = None\n if api_key and hasattr(api_key, \"get_secret_value\"):\n api_key_to_save = api_key.get_secret_value()\n elif isinstance(api_key, str):\n api_key_to_save = api_key\n\n encrypted_api_key = None\n if api_key_to_save:\n settings_service = get_settings_service()\n try:\n encrypted_api_key = encrypt_api_key(api_key_to_save, settings_service=settings_service)\n except (TypeError, ValueError) as e:\n self.log(f\"Could not encrypt API key: {e}\")\n logger.error(f\"Could not encrypt API key: {e}\")\n\n return {\n \"embedding_provider\": embedding_provider,\n \"embedding_model\": embedding_model,\n \"api_key\": encrypted_api_key,\n \"api_key_used\": bool(api_key),\n \"chunk_size\": self.chunk_size,\n \"created_at\": datetime.now(timezone.utc).isoformat(),\n }\n\n def _save_embedding_metadata(self, kb_path: Path, embedding_model: str, api_key: str) -> None:\n \"\"\"Save embedding model metadata.\"\"\"\n embedding_metadata = self._build_embedding_metadata(embedding_model, api_key)\n metadata_path = kb_path / \"embedding_metadata.json\"\n metadata_path.write_text(json.dumps(embedding_metadata, indent=2))\n\n def _save_kb_files(\n self,\n kb_path: Path,\n config_list: list[dict[str, Any]],\n ) -> None:\n \"\"\"Save KB files using File Component storage patterns.\"\"\"\n try:\n # Create directory (following File Component patterns)\n kb_path.mkdir(parents=True, exist_ok=True)\n\n # Save column configuration\n # Only do this if the file doesn't exist already\n cfg_path = kb_path / \"schema.json\"\n if not cfg_path.exists():\n cfg_path.write_text(json.dumps(config_list, indent=2))\n\n except (OSError, TypeError, ValueError) as e:\n self.log(f\"Error saving KB files: {e}\")\n\n def _build_column_metadata(self, config_list: list[dict[str, Any]], df_source: pd.DataFrame) -> dict[str, Any]:\n \"\"\"Build detailed column metadata.\"\"\"\n metadata: dict[str, Any] = {\n \"total_columns\": len(df_source.columns),\n \"mapped_columns\": len(config_list),\n \"unmapped_columns\": len(df_source.columns) - len(config_list),\n \"columns\": [],\n \"summary\": {\"vectorized_columns\": [], \"identifier_columns\": []},\n }\n\n for config in config_list:\n col_name = config.get(\"column_name\")\n vectorize = config.get(\"vectorize\") == \"True\" or config.get(\"vectorize\") is True\n identifier = config.get(\"identifier\") == \"True\" or config.get(\"identifier\") is True\n\n # Add to columns list\n metadata[\"columns\"].append(\n {\n \"name\": col_name,\n \"vectorize\": vectorize,\n \"identifier\": identifier,\n }\n )\n\n # Update summary\n if vectorize:\n metadata[\"summary\"][\"vectorized_columns\"].append(col_name)\n if identifier:\n metadata[\"summary\"][\"identifier_columns\"].append(col_name)\n\n return metadata\n\n async def _create_vector_store(\n self, df_source: pd.DataFrame, config_list: list[dict[str, Any]], embedding_model: str, api_key: str\n ) -> None:\n \"\"\"Create vector store following Local DB component pattern.\"\"\"\n try:\n # Set up vector store directory\n vector_store_dir = await self._kb_path()\n if not vector_store_dir:\n msg = \"Knowledge base path is not set. Please create a new knowledge base first.\"\n raise ValueError(msg)\n vector_store_dir.mkdir(parents=True, exist_ok=True)\n\n # Create embeddings model\n embedding_function = self._build_embeddings(embedding_model, api_key)\n\n # Convert DataFrame to Data objects (following Local DB pattern)\n data_objects = await self._convert_df_to_data_objects(df_source, config_list)\n\n # Create vector store\n chroma = Chroma(\n persist_directory=str(vector_store_dir),\n embedding_function=embedding_function,\n collection_name=self.knowledge_base,\n )\n\n # Convert Data objects to LangChain Documents\n documents = []\n for data_obj in data_objects:\n doc = data_obj.to_lc_document()\n documents.append(doc)\n\n # Add documents to vector store\n if documents:\n chroma.add_documents(documents)\n self.log(f\"Added {len(documents)} documents to vector store '{self.knowledge_base}'\")\n\n except (OSError, ValueError, RuntimeError) as e:\n self.log(f\"Error creating vector store: {e}\")\n\n async def _convert_df_to_data_objects(\n self, df_source: pd.DataFrame, config_list: list[dict[str, Any]]\n ) -> list[Data]:\n \"\"\"Convert DataFrame to Data objects for vector store.\"\"\"\n data_objects: list[Data] = []\n\n # Set up vector store directory\n kb_path = await self._kb_path()\n\n # If we don't allow duplicates, we need to get the existing hashes\n chroma = Chroma(\n persist_directory=str(kb_path),\n collection_name=self.knowledge_base,\n )\n\n # Get all documents and their metadata\n all_docs = chroma.get()\n\n # Extract all _id values from metadata\n id_list = [metadata.get(\"_id\") for metadata in all_docs[\"metadatas\"] if metadata.get(\"_id\")]\n\n # Get column roles\n content_cols = []\n identifier_cols = []\n\n for config in config_list:\n col_name = config.get(\"column_name\")\n vectorize = config.get(\"vectorize\") == \"True\" or config.get(\"vectorize\") is True\n identifier = config.get(\"identifier\") == \"True\" or config.get(\"identifier\") is True\n\n if vectorize:\n content_cols.append(col_name)\n elif identifier:\n identifier_cols.append(col_name)\n\n # Convert each row to a Data object\n for _, row in df_source.iterrows():\n # Build content text from identifier columns using list comprehension\n identifier_parts = [str(row[col]) for col in content_cols if col in row and pd.notna(row[col])]\n\n # Join all parts into a single string\n page_content = \" \".join(identifier_parts)\n\n # Build metadata from NON-vectorized columns only (simple key-value pairs)\n data_dict = {\n \"text\": page_content, # Main content for vectorization\n }\n\n # Add identifier columns if they exist\n if identifier_cols:\n identifier_parts = [str(row[col]) for col in identifier_cols if col in row and pd.notna(row[col])]\n page_content = \" \".join(identifier_parts)\n\n # Add metadata columns as simple key-value pairs\n for col in df_source.columns:\n if col not in content_cols and col in row and pd.notna(row[col]):\n # Convert to simple types for Chroma metadata\n value = row[col]\n data_dict[col] = str(value) # Convert complex types to string\n\n # Hash the page_content for unique ID\n page_content_hash = hashlib.sha256(page_content.encode()).hexdigest()\n data_dict[\"_id\"] = page_content_hash\n\n # If duplicates are disallowed, and hash exists, prevent adding this row\n if not self.allow_duplicates and page_content_hash in id_list:\n self.log(f\"Skipping duplicate row with hash {page_content_hash}\")\n continue\n\n # Create Data object - everything except \"text\" becomes metadata\n data_obj = Data(data=data_dict)\n data_objects.append(data_obj)\n\n return data_objects\n\n def is_valid_collection_name(self, name, min_length: int = 3, max_length: int = 63) -> bool:\n \"\"\"Validates collection name against conditions 1-3.\n\n 1. Contains 3-63 characters\n 2. Starts and ends with alphanumeric character\n 3. Contains only alphanumeric characters, underscores, or hyphens.\n\n Args:\n name (str): Collection name to validate\n min_length (int): Minimum length of the name\n max_length (int): Maximum length of the name\n\n Returns:\n bool: True if valid, False otherwise\n \"\"\"\n # Check length (condition 1)\n if not (min_length <= len(name) <= max_length):\n return False\n\n # Check start/end with alphanumeric (condition 2)\n if not (name[0].isalnum() and name[-1].isalnum()):\n return False\n\n # Check allowed characters (condition 3)\n return re.match(r\"^[a-zA-Z0-9_-]+$\", name) is not None\n\n async def _kb_path(self) -> Path | None:\n # Check if we already have the path cached\n cached_path = getattr(self, \"_cached_kb_path\", None)\n if cached_path is not None:\n return cached_path\n\n # If not cached, compute it\n async with session_scope() as db:\n if not self.user_id:\n msg = \"User ID is required for fetching knowledge base path.\"\n raise ValueError(msg)\n current_user = await get_user_by_id(db, self.user_id)\n if not current_user:\n msg = f\"User with ID {self.user_id} not found.\"\n raise ValueError(msg)\n kb_user = current_user.username\n\n kb_root = self._get_kb_root()\n\n # Cache the result\n self._cached_kb_path = kb_root / kb_user / self.knowledge_base\n\n return self._cached_kb_path\n\n # ---------------------------------------------------------------------\n # OUTPUT METHODS\n # ---------------------------------------------------------------------\n async def build_kb_info(self) -> Data:\n \"\"\"Main ingestion routine → returns a dict with KB metadata.\"\"\"\n try:\n input_value = self.input_df[0] if isinstance(self.input_df, list) else self.input_df\n df_source: DataFrame = convert_to_dataframe(input_value)\n\n # Validate column configuration (using Structured Output patterns)\n config_list = self._validate_column_config(df_source)\n column_metadata = self._build_column_metadata(config_list, df_source)\n\n # Read the embedding info from the knowledge base folder\n kb_path = await self._kb_path()\n if not kb_path:\n msg = \"Knowledge base path is not set. Please create a new knowledge base first.\"\n raise ValueError(msg)\n metadata_path = kb_path / \"embedding_metadata.json\"\n\n # If the API key is not provided, try to read it from the metadata file\n if metadata_path.exists():\n settings_service = get_settings_service()\n metadata = json.loads(metadata_path.read_text())\n embedding_model = metadata.get(\"embedding_model\")\n try:\n api_key = decrypt_api_key(metadata[\"api_key\"], settings_service)\n except (InvalidToken, TypeError, ValueError) as e:\n logger.error(f\"Could not decrypt API key. Please provide it manually. Error: {e}\")\n\n # Check if a custom API key was provided, update metadata if so\n if self.api_key:\n api_key = self.api_key\n self._save_embedding_metadata(\n kb_path=kb_path,\n embedding_model=embedding_model,\n api_key=api_key,\n )\n\n # Create vector store following Local DB component pattern\n await self._create_vector_store(df_source, config_list, embedding_model=embedding_model, api_key=api_key)\n\n # Save KB files (using File Component storage patterns)\n self._save_kb_files(kb_path, config_list)\n\n # Build metadata response\n meta: dict[str, Any] = {\n \"kb_id\": str(uuid.uuid4()),\n \"kb_name\": self.knowledge_base,\n \"rows\": len(df_source),\n \"column_metadata\": column_metadata,\n \"path\": str(kb_path),\n \"config_columns\": len(config_list),\n \"timestamp\": datetime.now(tz=timezone.utc).isoformat(),\n }\n\n # Set status message\n self.status = f\"✅ KB **{self.knowledge_base}** saved · {len(df_source)} chunks.\"\n\n return Data(data=meta)\n\n except (OSError, ValueError, RuntimeError, KeyError) as e:\n msg = f\"Error during KB ingestion: {e}\"\n raise RuntimeError(msg) from e\n\n async def _get_api_key_variable(self, field_value: dict[str, Any]):\n async with session_scope() as db:\n if not self.user_id:\n msg = \"User ID is required for fetching global variables.\"\n raise ValueError(msg)\n current_user = await get_user_by_id(db, self.user_id)\n if not current_user:\n msg = f\"User with ID {self.user_id} not found.\"\n raise ValueError(msg)\n variable_service = get_variable_service()\n\n # Process the api_key field variable\n return await variable_service.get_variable(\n user_id=current_user.id,\n name=field_value[\"03_api_key\"],\n field=\"\",\n session=db,\n )\n\n async def update_build_config(\n self,\n build_config: dotdict,\n field_value: Any,\n field_name: str | None = None,\n ) -> dotdict:\n \"\"\"Update build configuration based on provider selection.\"\"\"\n # Create a new knowledge base\n if field_name == \"knowledge_base\":\n async with session_scope() as db:\n if not self.user_id:\n msg = \"User ID is required for fetching knowledge base list.\"\n raise ValueError(msg)\n current_user = await get_user_by_id(db, self.user_id)\n if not current_user:\n msg = f\"User with ID {self.user_id} not found.\"\n raise ValueError(msg)\n kb_user = current_user.username\n if isinstance(field_value, dict) and \"01_new_kb_name\" in field_value:\n # Validate the knowledge base name - Make sure it follows these rules:\n if not self.is_valid_collection_name(field_value[\"01_new_kb_name\"]):\n msg = f\"Invalid knowledge base name: {field_value['01_new_kb_name']}\"\n raise ValueError(msg)\n\n api_key = field_value.get(\"03_api_key\", None)\n with contextlib.suppress(Exception):\n # If the API key is a variable, resolve it\n api_key = await self._get_api_key_variable(field_value)\n\n # Make sure api_key is a string\n if not isinstance(api_key, str):\n msg = \"API key must be a string.\"\n raise ValueError(msg)\n\n # We need to test the API Key one time against the embedding model\n embed_model = self._build_embeddings(embedding_model=field_value[\"02_embedding_model\"], api_key=api_key)\n\n # Try to generate a dummy embedding to validate the API key without blocking the event loop\n try:\n await asyncio.wait_for(\n asyncio.to_thread(embed_model.embed_query, \"test\"),\n timeout=10,\n )\n except TimeoutError as e:\n msg = \"Embedding validation timed out. Please verify network connectivity and key.\"\n raise ValueError(msg) from e\n except Exception as e:\n msg = f\"Embedding validation failed: {e!s}\"\n raise ValueError(msg) from e\n\n # Create the new knowledge base directory\n kb_path = KNOWLEDGE_BASES_ROOT_PATH / kb_user / field_value[\"01_new_kb_name\"]\n kb_path.mkdir(parents=True, exist_ok=True)\n\n # Save the embedding metadata\n build_config[\"knowledge_base\"][\"value\"] = field_value[\"01_new_kb_name\"]\n self._save_embedding_metadata(\n kb_path=kb_path,\n embedding_model=field_value[\"02_embedding_model\"],\n api_key=api_key,\n )\n\n # Update the knowledge base options dynamically\n build_config[\"knowledge_base\"][\"options\"] = await get_knowledge_bases(\n KNOWLEDGE_BASES_ROOT_PATH,\n user_id=self.user_id,\n )\n\n # If the selected knowledge base is not available, reset it\n if build_config[\"knowledge_base\"][\"value\"] not in build_config[\"knowledge_base\"][\"options\"]:\n build_config[\"knowledge_base\"][\"value\"] = None\n\n return build_config\n"
},
"column_config": {
"_input_type": "TableInput",
@@ -933,18 +932,19 @@
"value": [
{
"column_name": "text",
- "identifier": false,
+ "identifier": true,
"vectorize": true
}
]
},
"input_df": {
- "_input_type": "DataFrameInput",
+ "_input_type": "HandleInput",
"advanced": false,
- "display_name": "Data",
+ "display_name": "Input",
"dynamic": false,
- "info": "Table with all original columns (already chunked / processed).",
+ "info": "Table with all original columns (already chunked / processed). Accepts Data or DataFrame. If Data is provided, it is converted to a DataFrame automatically.",
"input_types": [
+ "Data",
"DataFrame"
],
"list": false,
@@ -954,8 +954,6 @@
"required": true,
"show": true,
"title_case": false,
- "tool_mode": false,
- "trace_as_input": true,
"trace_as_metadata": true,
"type": "other",
"value": ""
@@ -1054,7 +1052,7 @@
"dynamic": false,
"info": "Provider API key for embedding model",
"input_types": [],
- "load_from_db": true,
+ "load_from_db": false,
"name": "api_key",
"password": true,
"placeholder": "",
@@ -1077,6 +1075,7 @@
"options": [],
"options_metadata": [],
"placeholder": "",
+ "real_time_refresh": true,
"refresh_button": true,
"required": true,
"show": true,
@@ -1091,33 +1090,33 @@
"tool_mode": false
},
"showNode": true,
- "type": "KBIngestion"
+ "type": "KnowledgeIngestion"
},
"dragging": false,
- "id": "KBIngestion-jj5iW",
+ "id": "KnowledgeIngestion-uSOy6",
"measured": {
"height": 333,
"width": 320
},
"position": {
- "x": 1000.4023842644599,
- "y": 101.77068666606948
+ "x": 1001.4259863865477,
+ "y": 110.75185048793182
},
"selected": false,
"type": "genericNode"
}
],
"viewport": {
- "x": 280.03407172860966,
- "y": 131.39479654897661,
- "zoom": 0.9295918751284687
+ "x": 283.8838616133703,
+ "y": 160.13925386069144,
+ "zoom": 0.937778878472336
}
},
"description": "An example of creating a Knowledge Base and ingesting data into it from a web URL.",
"endpoint_name": null,
- "id": "dfffa40b-547b-46ae-9c4a-6539851990bf",
+ "id": "fd03902d-38ec-4bc5-ac00-c308bbcca359",
"is_component": false,
- "last_tested_version": "1.5.0.post1",
+ "last_tested_version": "1.5.0.post2",
"name": "Knowledge Ingestion",
"tags": []
}
\ No newline at end of file
diff --git a/src/backend/base/langflow/initial_setup/starter_projects/Knowledge Retrieval.json b/src/backend/base/langflow/initial_setup/starter_projects/Knowledge Retrieval.json
index afe778eef..97d40d442 100644
--- a/src/backend/base/langflow/initial_setup/starter_projects/Knowledge Retrieval.json
+++ b/src/backend/base/langflow/initial_setup/starter_projects/Knowledge Retrieval.json
@@ -2,46 +2,19 @@
"data": {
"edges": [
{
- "animated": false,
+ "className": "",
"data": {
"sourceHandle": {
- "dataType": "TextInput",
- "id": "TextInput-WyJxO",
- "name": "text",
- "output_types": [
- "Message"
- ]
- },
- "targetHandle": {
- "fieldName": "search_query",
- "id": "KBRetrieval-zz3I0",
- "inputTypes": [
- "Message"
- ],
- "type": "str"
- }
- },
- "id": "xy-edge__TextInput-WyJxO{œdataTypeœ:œTextInputœ,œidœ:œTextInput-WyJxOœ,œnameœ:œtextœ,œoutput_typesœ:[œMessageœ]}-KBRetrieval-zz3I0{œfieldNameœ:œsearch_queryœ,œidœ:œKBRetrieval-zz3I0œ,œinputTypesœ:[œMessageœ],œtypeœ:œstrœ}",
- "selected": false,
- "source": "TextInput-WyJxO",
- "sourceHandle": "{œdataTypeœ: œTextInputœ, œidœ: œTextInput-WyJxOœ, œnameœ: œtextœ, œoutput_typesœ: [œMessageœ]}",
- "target": "KBRetrieval-zz3I0",
- "targetHandle": "{œfieldNameœ: œsearch_queryœ, œidœ: œKBRetrieval-zz3I0œ, œinputTypesœ: [œMessageœ], œtypeœ: œstrœ}"
- },
- {
- "animated": false,
- "data": {
- "sourceHandle": {
- "dataType": "KBRetrieval",
- "id": "KBRetrieval-zz3I0",
- "name": "chroma_kb_data",
+ "dataType": "KnowledgeRetrieval",
+ "id": "KnowledgeRetrieval-kgwih",
+ "name": "retrieve_data",
"output_types": [
"DataFrame"
]
},
"targetHandle": {
"fieldName": "input_value",
- "id": "ChatOutput-N7nxz",
+ "id": "ChatOutput-OG4M9",
"inputTypes": [
"Data",
"DataFrame",
@@ -50,18 +23,43 @@
"type": "other"
}
},
- "id": "xy-edge__KBRetrieval-zz3I0{œdataTypeœ:œKBRetrievalœ,œidœ:œKBRetrieval-zz3I0œ,œnameœ:œchroma_kb_dataœ,œoutput_typesœ:[œDataFrameœ]}-ChatOutput-N7nxz{œfieldNameœ:œinput_valueœ,œidœ:œChatOutput-N7nxzœ,œinputTypesœ:[œDataœ,œDataFrameœ,œMessageœ],œtypeœ:œotherœ}",
- "selected": false,
- "source": "KBRetrieval-zz3I0",
- "sourceHandle": "{œdataTypeœ: œKBRetrievalœ, œidœ: œKBRetrieval-zz3I0œ, œnameœ: œchroma_kb_dataœ, œoutput_typesœ: [œDataFrameœ]}",
- "target": "ChatOutput-N7nxz",
- "targetHandle": "{œfieldNameœ: œinput_valueœ, œidœ: œChatOutput-N7nxzœ, œinputTypesœ: [œDataœ, œDataFrameœ, œMessageœ], œtypeœ: œotherœ}"
+ "id": "xy-edge__KnowledgeRetrieval-kgwih{œdataTypeœ:œKnowledgeRetrievalœ,œidœ:œKnowledgeRetrieval-kgwihœ,œnameœ:œretrieve_dataœ,œoutput_typesœ:[œDataFrameœ]}-ChatOutput-OG4M9{œfieldNameœ:œinput_valueœ,œidœ:œChatOutput-OG4M9œ,œinputTypesœ:[œDataœ,œDataFrameœ,œMessageœ],œtypeœ:œotherœ}",
+ "source": "KnowledgeRetrieval-kgwih",
+ "sourceHandle": "{œdataTypeœ: œKnowledgeRetrievalœ, œidœ: œKnowledgeRetrieval-kgwihœ, œnameœ: œretrieve_dataœ, œoutput_typesœ: [œDataFrameœ]}",
+ "target": "ChatOutput-OG4M9",
+ "targetHandle": "{œfieldNameœ: œinput_valueœ, œidœ: œChatOutput-OG4M9œ, œinputTypesœ: [œDataœ, œDataFrameœ, œMessageœ], œtypeœ: œotherœ}"
+ },
+ {
+ "className": "",
+ "data": {
+ "sourceHandle": {
+ "dataType": "TextInput",
+ "id": "TextInput-k48NL",
+ "name": "text",
+ "output_types": [
+ "Message"
+ ]
+ },
+ "targetHandle": {
+ "fieldName": "search_query",
+ "id": "KnowledgeRetrieval-kgwih",
+ "inputTypes": [
+ "Message"
+ ],
+ "type": "str"
+ }
+ },
+ "id": "xy-edge__TextInput-k48NL{œdataTypeœ:œTextInputœ,œidœ:œTextInput-k48NLœ,œnameœ:œtextœ,œoutput_typesœ:[œMessageœ]}-KnowledgeRetrieval-kgwih{œfieldNameœ:œsearch_queryœ,œidœ:œKnowledgeRetrieval-kgwihœ,œinputTypesœ:[œMessageœ],œtypeœ:œstrœ}",
+ "source": "TextInput-k48NL",
+ "sourceHandle": "{œdataTypeœ: œTextInputœ, œidœ: œTextInput-k48NLœ, œnameœ: œtextœ, œoutput_typesœ: [œMessageœ]}",
+ "target": "KnowledgeRetrieval-kgwih",
+ "targetHandle": "{œfieldNameœ: œsearch_queryœ, œidœ: œKnowledgeRetrieval-kgwihœ, œinputTypesœ: [œMessageœ], œtypeœ: œstrœ}"
}
],
"nodes": [
{
"data": {
- "id": "note-f86G8",
+ "id": "note-6CE3d",
"node": {
"description": "## Knowledge Retrieval\n\nA stand-alone component handles the retrieval of ingested knowledge from existing knowledge bases. To retrieve knowledge:\n\n1. Select your knowledge base from the Knowledge Base dropdown. If you do not see it, choose \"Refresh List\".\n2. (Optional) Enter a Search Query to be performed against the knowledge base.\n\nNote that by default, 5 results are returned, which can be configured by clicking Controls at the top of the component.\n",
"display_name": "",
@@ -72,7 +70,7 @@
},
"dragging": false,
"height": 384,
- "id": "note-f86G8",
+ "id": "note-6CE3d",
"measured": {
"height": 384,
"width": 371
@@ -88,7 +86,7 @@
},
{
"data": {
- "id": "TextInput-WyJxO",
+ "id": "TextInput-k48NL",
"node": {
"base_classes": [
"Message"
@@ -191,7 +189,7 @@
"type": "TextInput"
},
"dragging": false,
- "id": "TextInput-WyJxO",
+ "id": "TextInput-k48NL",
"measured": {
"height": 204,
"width": 320
@@ -205,7 +203,7 @@
},
{
"data": {
- "id": "ChatOutput-N7nxz",
+ "id": "ChatOutput-OG4M9",
"node": {
"base_classes": [
"Message"
@@ -520,7 +518,7 @@
"type": "ChatOutput"
},
"dragging": false,
- "id": "ChatOutput-N7nxz",
+ "id": "ChatOutput-OG4M9",
"measured": {
"height": 48,
"width": 192
@@ -534,7 +532,7 @@
},
{
"data": {
- "id": "KBRetrieval-zz3I0",
+ "id": "KnowledgeRetrieval-kgwih",
"node": {
"base_classes": [
"DataFrame"
@@ -551,14 +549,15 @@
"api_key",
"search_query",
"top_k",
- "include_metadata"
+ "include_metadata",
+ "include_embeddings"
],
"frozen": false,
- "icon": "database",
- "last_updated": "2025-08-14T17:19:22.182Z",
+ "icon": "download",
+ "last_updated": "2025-08-26T16:19:16.681Z",
"legacy": false,
"metadata": {
- "code_hash": "6fcf86be1aca",
+ "code_hash": "1eaf43b9b167",
"dependencies": {
"dependencies": [
{
@@ -596,7 +595,7 @@
],
"total_dependencies": 8
},
- "module": "langflow.components.data.kb_retrieval.KBRetrievalComponent"
+ "module": "langflow.components.knowledge_bases.retrieval.KnowledgeRetrievalComponent"
},
"minimized": false,
"output_types": [],
@@ -606,8 +605,8 @@
"cache": true,
"display_name": "Results",
"group_outputs": false,
- "method": "get_chroma_kb_data",
- "name": "chroma_kb_data",
+ "method": "retrieve_data",
+ "name": "retrieve_data",
"selected": "DataFrame",
"tool_mode": true,
"types": [
@@ -652,14 +651,32 @@
"show": true,
"title_case": false,
"type": "code",
- "value": "import json\nfrom pathlib import Path\nfrom typing import Any\n\nfrom cryptography.fernet import InvalidToken\nfrom langchain_chroma import Chroma\nfrom loguru import logger\nfrom pydantic import SecretStr\n\nfrom langflow.base.data.kb_utils import get_knowledge_bases\nfrom langflow.custom import Component\nfrom langflow.io import BoolInput, DropdownInput, IntInput, MessageTextInput, Output, SecretStrInput\nfrom langflow.schema.data import Data\nfrom langflow.schema.dataframe import DataFrame\nfrom langflow.services.auth.utils import decrypt_api_key\nfrom langflow.services.database.models.user.crud import get_user_by_id\nfrom langflow.services.deps import get_settings_service, session_scope\n\nsettings = get_settings_service().settings\nknowledge_directory = settings.knowledge_bases_dir\nif not knowledge_directory:\n msg = \"Knowledge bases directory is not set in the settings.\"\n raise ValueError(msg)\nKNOWLEDGE_BASES_ROOT_PATH = Path(knowledge_directory).expanduser()\n\n\nclass KBRetrievalComponent(Component):\n display_name = \"Knowledge Retrieval\"\n description = \"Search and retrieve data from knowledge.\"\n icon = \"database\"\n name = \"KBRetrieval\"\n\n inputs = [\n DropdownInput(\n name=\"knowledge_base\",\n display_name=\"Knowledge\",\n info=\"Select the knowledge to load data from.\",\n required=True,\n options=[],\n refresh_button=True,\n real_time_refresh=True,\n ),\n SecretStrInput(\n name=\"api_key\",\n display_name=\"Embedding Provider API Key\",\n info=\"API key for the embedding provider to generate embeddings.\",\n advanced=True,\n required=False,\n ),\n MessageTextInput(\n name=\"search_query\",\n display_name=\"Search Query\",\n info=\"Optional search query to filter knowledge base data.\",\n ),\n IntInput(\n name=\"top_k\",\n display_name=\"Top K Results\",\n info=\"Number of top results to return from the knowledge base.\",\n value=5,\n advanced=True,\n required=False,\n ),\n BoolInput(\n name=\"include_metadata\",\n display_name=\"Include Metadata\",\n info=\"Whether to include all metadata and embeddings in the output. If false, only content is returned.\",\n value=True,\n advanced=False,\n ),\n ]\n\n outputs = [\n Output(\n name=\"chroma_kb_data\",\n display_name=\"Results\",\n method=\"get_chroma_kb_data\",\n info=\"Returns the data from the selected knowledge base.\",\n ),\n ]\n\n async def update_build_config(self, build_config, field_value, field_name=None): # noqa: ARG002\n if field_name == \"knowledge_base\":\n # Update the knowledge base options dynamically\n build_config[\"knowledge_base\"][\"options\"] = await get_knowledge_bases(\n KNOWLEDGE_BASES_ROOT_PATH,\n user_id=self.user_id, # Use the user_id from the component context\n )\n\n # If the selected knowledge base is not available, reset it\n if build_config[\"knowledge_base\"][\"value\"] not in build_config[\"knowledge_base\"][\"options\"]:\n build_config[\"knowledge_base\"][\"value\"] = None\n\n return build_config\n\n def _get_kb_metadata(self, kb_path: Path) -> dict:\n \"\"\"Load and process knowledge base metadata.\"\"\"\n metadata: dict[str, Any] = {}\n metadata_file = kb_path / \"embedding_metadata.json\"\n if not metadata_file.exists():\n logger.warning(f\"Embedding metadata file not found at {metadata_file}\")\n return metadata\n\n try:\n with metadata_file.open(\"r\", encoding=\"utf-8\") as f:\n metadata = json.load(f)\n except json.JSONDecodeError:\n logger.error(f\"Error decoding JSON from {metadata_file}\")\n return {}\n\n # Decrypt API key if it exists\n if \"api_key\" in metadata and metadata.get(\"api_key\"):\n settings_service = get_settings_service()\n try:\n decrypted_key = decrypt_api_key(metadata[\"api_key\"], settings_service)\n metadata[\"api_key\"] = decrypted_key\n except (InvalidToken, TypeError, ValueError) as e:\n logger.error(f\"Could not decrypt API key. Please provide it manually. Error: {e}\")\n metadata[\"api_key\"] = None\n return metadata\n\n def _build_embeddings(self, metadata: dict):\n \"\"\"Build embedding model from metadata.\"\"\"\n runtime_api_key = self.api_key.get_secret_value() if isinstance(self.api_key, SecretStr) else self.api_key\n provider = metadata.get(\"embedding_provider\")\n model = metadata.get(\"embedding_model\")\n api_key = runtime_api_key or metadata.get(\"api_key\")\n chunk_size = metadata.get(\"chunk_size\")\n\n # Handle various providers\n if provider == \"OpenAI\":\n from langchain_openai import OpenAIEmbeddings\n\n if not api_key:\n msg = \"OpenAI API key is required. Provide it in the component's advanced settings.\"\n raise ValueError(msg)\n return OpenAIEmbeddings(\n model=model,\n api_key=api_key,\n chunk_size=chunk_size,\n )\n if provider == \"HuggingFace\":\n from langchain_huggingface import HuggingFaceEmbeddings\n\n return HuggingFaceEmbeddings(\n model=model,\n )\n if provider == \"Cohere\":\n from langchain_cohere import CohereEmbeddings\n\n if not api_key:\n msg = \"Cohere API key is required when using Cohere provider\"\n raise ValueError(msg)\n return CohereEmbeddings(\n model=model,\n cohere_api_key=api_key,\n )\n if provider == \"Custom\":\n # For custom embedding models, we would need additional configuration\n msg = \"Custom embedding models not yet supported\"\n raise NotImplementedError(msg)\n # Add other providers here if they become supported in ingest\n msg = f\"Embedding provider '{provider}' is not supported for retrieval.\"\n raise NotImplementedError(msg)\n\n async def get_chroma_kb_data(self) -> DataFrame:\n \"\"\"Retrieve data from the selected knowledge base by reading the Chroma collection.\n\n Returns:\n A DataFrame containing the data rows from the knowledge base.\n \"\"\"\n # Get the current user\n async with session_scope() as db:\n if not self.user_id:\n msg = \"User ID is required for fetching Knowledge Base data.\"\n raise ValueError(msg)\n current_user = await get_user_by_id(db, self.user_id)\n if not current_user:\n msg = f\"User with ID {self.user_id} not found.\"\n raise ValueError(msg)\n kb_user = current_user.username\n kb_path = KNOWLEDGE_BASES_ROOT_PATH / kb_user / self.knowledge_base\n\n metadata = self._get_kb_metadata(kb_path)\n if not metadata:\n msg = f\"Metadata not found for knowledge base: {self.knowledge_base}. Ensure it has been indexed.\"\n raise ValueError(msg)\n\n # Build the embedder for the knowledge base\n embedding_function = self._build_embeddings(metadata)\n\n # Load vector store\n chroma = Chroma(\n persist_directory=str(kb_path),\n embedding_function=embedding_function,\n collection_name=self.knowledge_base,\n )\n\n # If a search query is provided, perform a similarity search\n if self.search_query:\n # Use the search query to perform a similarity search\n logger.info(f\"Performing similarity search with query: {self.search_query}\")\n results = chroma.similarity_search_with_score(\n query=self.search_query or \"\",\n k=self.top_k,\n )\n else:\n results = chroma.similarity_search(\n query=self.search_query or \"\",\n k=self.top_k,\n )\n\n # For each result, make it a tuple to match the expected output format\n results = [(doc, 0) for doc in results] # Assign a dummy score of 0\n\n # If metadata is enabled, get embeddings for the results\n id_to_embedding = {}\n if self.include_metadata and results:\n doc_ids = [doc[0].metadata.get(\"_id\") for doc in results if doc[0].metadata.get(\"_id\")]\n\n # Only proceed if we have valid document IDs\n if doc_ids:\n # Access underlying client to get embeddings\n collection = chroma._client.get_collection(name=self.knowledge_base)\n embeddings_result = collection.get(where={\"_id\": {\"$in\": doc_ids}}, include=[\"embeddings\", \"metadatas\"])\n\n # Create a mapping from document ID to embedding\n for i, metadata in enumerate(embeddings_result.get(\"metadatas\", [])):\n if metadata and \"_id\" in metadata:\n id_to_embedding[metadata[\"_id\"]] = embeddings_result[\"embeddings\"][i]\n\n # Build output data based on include_metadata setting\n data_list = []\n for doc in results:\n if self.include_metadata:\n # Include all metadata, embeddings, and content\n kwargs = {\n \"content\": doc[0].page_content,\n **doc[0].metadata,\n }\n if self.search_query:\n kwargs[\"_score\"] = -1 * doc[1]\n kwargs[\"_embeddings\"] = id_to_embedding.get(doc[0].metadata.get(\"_id\"))\n else:\n # Only include content\n kwargs = {\n \"content\": doc[0].page_content,\n }\n\n data_list.append(Data(**kwargs))\n\n # Return the DataFrame containing the data\n return DataFrame(data=data_list)\n"
+ "value": "import json\nfrom pathlib import Path\nfrom typing import Any\n\nfrom cryptography.fernet import InvalidToken\nfrom langchain_chroma import Chroma\nfrom loguru import logger\nfrom pydantic import SecretStr\n\nfrom langflow.base.knowledge_bases.knowledge_base_utils import get_knowledge_bases\nfrom langflow.custom import Component\nfrom langflow.io import BoolInput, DropdownInput, IntInput, MessageTextInput, Output, SecretStrInput\nfrom langflow.schema.data import Data\nfrom langflow.schema.dataframe import DataFrame\nfrom langflow.services.auth.utils import decrypt_api_key\nfrom langflow.services.database.models.user.crud import get_user_by_id\nfrom langflow.services.deps import get_settings_service, session_scope\n\nsettings = get_settings_service().settings\nknowledge_directory = settings.knowledge_bases_dir\nif not knowledge_directory:\n msg = \"Knowledge bases directory is not set in the settings.\"\n raise ValueError(msg)\nKNOWLEDGE_BASES_ROOT_PATH = Path(knowledge_directory).expanduser()\n\n\nclass KnowledgeRetrievalComponent(Component):\n display_name = \"Knowledge Retrieval\"\n description = \"Search and retrieve data from knowledge.\"\n icon = \"download\"\n name = \"KnowledgeRetrieval\"\n\n inputs = [\n DropdownInput(\n name=\"knowledge_base\",\n display_name=\"Knowledge\",\n info=\"Select the knowledge to load data from.\",\n required=True,\n options=[],\n refresh_button=True,\n real_time_refresh=True,\n ),\n SecretStrInput(\n name=\"api_key\",\n display_name=\"Embedding Provider API Key\",\n info=\"API key for the embedding provider to generate embeddings.\",\n advanced=True,\n required=False,\n ),\n MessageTextInput(\n name=\"search_query\",\n display_name=\"Search Query\",\n info=\"Optional search query to filter knowledge base data.\",\n tool_mode=True,\n ),\n IntInput(\n name=\"top_k\",\n display_name=\"Top K Results\",\n info=\"Number of top results to return from the knowledge base.\",\n value=5,\n advanced=True,\n required=False,\n ),\n BoolInput(\n name=\"include_metadata\",\n display_name=\"Include Metadata\",\n info=\"Whether to include all metadata in the output. If false, only content is returned.\",\n value=True,\n advanced=False,\n ),\n BoolInput(\n name=\"include_embeddings\",\n display_name=\"Include Embeddings\",\n info=\"Whether to include embeddings in the output. Only applicable if 'Include Metadata' is enabled.\",\n value=False,\n advanced=True,\n ),\n ]\n\n outputs = [\n Output(\n name=\"retrieve_data\",\n display_name=\"Results\",\n method=\"retrieve_data\",\n info=\"Returns the data from the selected knowledge base.\",\n ),\n ]\n\n async def update_build_config(self, build_config, field_value, field_name=None): # noqa: ARG002\n if field_name == \"knowledge_base\":\n # Update the knowledge base options dynamically\n build_config[\"knowledge_base\"][\"options\"] = await get_knowledge_bases(\n KNOWLEDGE_BASES_ROOT_PATH,\n user_id=self.user_id, # Use the user_id from the component context\n )\n\n # If the selected knowledge base is not available, reset it\n if build_config[\"knowledge_base\"][\"value\"] not in build_config[\"knowledge_base\"][\"options\"]:\n build_config[\"knowledge_base\"][\"value\"] = None\n\n return build_config\n\n def _get_kb_metadata(self, kb_path: Path) -> dict:\n \"\"\"Load and process knowledge base metadata.\"\"\"\n metadata: dict[str, Any] = {}\n metadata_file = kb_path / \"embedding_metadata.json\"\n if not metadata_file.exists():\n logger.warning(f\"Embedding metadata file not found at {metadata_file}\")\n return metadata\n\n try:\n with metadata_file.open(\"r\", encoding=\"utf-8\") as f:\n metadata = json.load(f)\n except json.JSONDecodeError:\n logger.error(f\"Error decoding JSON from {metadata_file}\")\n return {}\n\n # Decrypt API key if it exists\n if \"api_key\" in metadata and metadata.get(\"api_key\"):\n settings_service = get_settings_service()\n try:\n decrypted_key = decrypt_api_key(metadata[\"api_key\"], settings_service)\n metadata[\"api_key\"] = decrypted_key\n except (InvalidToken, TypeError, ValueError) as e:\n logger.error(f\"Could not decrypt API key. Please provide it manually. Error: {e}\")\n metadata[\"api_key\"] = None\n return metadata\n\n def _build_embeddings(self, metadata: dict):\n \"\"\"Build embedding model from metadata.\"\"\"\n runtime_api_key = self.api_key.get_secret_value() if isinstance(self.api_key, SecretStr) else self.api_key\n provider = metadata.get(\"embedding_provider\")\n model = metadata.get(\"embedding_model\")\n api_key = runtime_api_key or metadata.get(\"api_key\")\n chunk_size = metadata.get(\"chunk_size\")\n\n # Handle various providers\n if provider == \"OpenAI\":\n from langchain_openai import OpenAIEmbeddings\n\n if not api_key:\n msg = \"OpenAI API key is required. Provide it in the component's advanced settings.\"\n raise ValueError(msg)\n return OpenAIEmbeddings(\n model=model,\n api_key=api_key,\n chunk_size=chunk_size,\n )\n if provider == \"HuggingFace\":\n from langchain_huggingface import HuggingFaceEmbeddings\n\n return HuggingFaceEmbeddings(\n model=model,\n )\n if provider == \"Cohere\":\n from langchain_cohere import CohereEmbeddings\n\n if not api_key:\n msg = \"Cohere API key is required when using Cohere provider\"\n raise ValueError(msg)\n return CohereEmbeddings(\n model=model,\n cohere_api_key=api_key,\n )\n if provider == \"Custom\":\n # For custom embedding models, we would need additional configuration\n msg = \"Custom embedding models not yet supported\"\n raise NotImplementedError(msg)\n # Add other providers here if they become supported in ingest\n msg = f\"Embedding provider '{provider}' is not supported for retrieval.\"\n raise NotImplementedError(msg)\n\n async def retrieve_data(self) -> DataFrame:\n \"\"\"Retrieve data from the selected knowledge base by reading the Chroma collection.\n\n Returns:\n A DataFrame containing the data rows from the knowledge base.\n \"\"\"\n # Get the current user\n async with session_scope() as db:\n if not self.user_id:\n msg = \"User ID is required for fetching Knowledge Base data.\"\n raise ValueError(msg)\n current_user = await get_user_by_id(db, self.user_id)\n if not current_user:\n msg = f\"User with ID {self.user_id} not found.\"\n raise ValueError(msg)\n kb_user = current_user.username\n kb_path = KNOWLEDGE_BASES_ROOT_PATH / kb_user / self.knowledge_base\n\n metadata = self._get_kb_metadata(kb_path)\n if not metadata:\n msg = f\"Metadata not found for knowledge base: {self.knowledge_base}. Ensure it has been indexed.\"\n raise ValueError(msg)\n\n # Build the embedder for the knowledge base\n embedding_function = self._build_embeddings(metadata)\n\n # Load vector store\n chroma = Chroma(\n persist_directory=str(kb_path),\n embedding_function=embedding_function,\n collection_name=self.knowledge_base,\n )\n\n # If a search query is provided, perform a similarity search\n if self.search_query:\n # Use the search query to perform a similarity search\n logger.info(f\"Performing similarity search with query: {self.search_query}\")\n results = chroma.similarity_search_with_score(\n query=self.search_query or \"\",\n k=self.top_k,\n )\n else:\n results = chroma.similarity_search(\n query=self.search_query or \"\",\n k=self.top_k,\n )\n\n # For each result, make it a tuple to match the expected output format\n results = [(doc, 0) for doc in results] # Assign a dummy score of 0\n\n # If include_embeddings is enabled, get embeddings for the results\n id_to_embedding = {}\n if self.include_embeddings and results:\n doc_ids = [doc[0].metadata.get(\"_id\") for doc in results if doc[0].metadata.get(\"_id\")]\n\n # Only proceed if we have valid document IDs\n if doc_ids:\n # Access underlying client to get embeddings\n collection = chroma._client.get_collection(name=self.knowledge_base)\n embeddings_result = collection.get(where={\"_id\": {\"$in\": doc_ids}}, include=[\"metadatas\", \"embeddings\"])\n\n # Create a mapping from document ID to embedding\n for i, metadata in enumerate(embeddings_result.get(\"metadatas\", [])):\n if metadata and \"_id\" in metadata:\n id_to_embedding[metadata[\"_id\"]] = embeddings_result[\"embeddings\"][i]\n\n # Build output data based on include_metadata setting\n data_list = []\n for doc in results:\n kwargs = {\n \"content\": doc[0].page_content,\n }\n if self.search_query:\n kwargs[\"_score\"] = -1 * doc[1]\n if self.include_metadata:\n # Include all metadata, embeddings, and content\n kwargs.update(doc[0].metadata)\n if self.include_embeddings:\n kwargs[\"_embeddings\"] = id_to_embedding.get(doc[0].metadata.get(\"_id\"))\n\n data_list.append(Data(**kwargs))\n\n # Return the DataFrame containing the data\n return DataFrame(data=data_list)\n"
+ },
+ "include_embeddings": {
+ "_input_type": "BoolInput",
+ "advanced": true,
+ "display_name": "Include Embeddings",
+ "dynamic": false,
+ "info": "Whether to include embeddings in the output. Only applicable if 'Include Metadata' is enabled.",
+ "list": false,
+ "list_add_label": "Add More",
+ "name": "include_embeddings",
+ "placeholder": "",
+ "required": false,
+ "show": true,
+ "title_case": false,
+ "tool_mode": false,
+ "trace_as_metadata": true,
+ "type": "bool",
+ "value": false
},
"include_metadata": {
"_input_type": "BoolInput",
"advanced": false,
"display_name": "Include Metadata",
"dynamic": false,
- "info": "Whether to include all metadata and embeddings in the output. If false, only content is returned.",
+ "info": "Whether to include all metadata in the output. If false, only content is returned.",
"list": false,
"list_add_label": "Add More",
"name": "include_metadata",
@@ -712,7 +729,7 @@
"required": false,
"show": true,
"title_case": false,
- "tool_mode": false,
+ "tool_mode": true,
"trace_as_input": true,
"trace_as_metadata": true,
"type": "str",
@@ -740,33 +757,33 @@
"tool_mode": false
},
"showNode": true,
- "type": "KBRetrieval"
+ "type": "KnowledgeRetrieval"
},
"dragging": false,
- "id": "KBRetrieval-zz3I0",
+ "id": "KnowledgeRetrieval-kgwih",
"measured": {
"height": 329,
"width": 320
},
"position": {
- "x": 616.6226476085393,
- "y": -343.13068334363356
+ "x": 635.7729107873928,
+ "y": -337.735556504938
},
"selected": false,
"type": "genericNode"
}
],
"viewport": {
- "x": 177.06633386268413,
- "y": 482.8027480187026,
- "zoom": 0.8999566725119924
+ "x": 90.80151692622985,
+ "y": 591.5552904741861,
+ "zoom": 1.0053880437940252
}
},
"description": "An example of performing a vector search against data in a Knowledge Base to retrieve relevant documents.",
"endpoint_name": null,
- "id": "5487ee05-73d5-4b12-9b41-bc4c3a2f9326",
+ "id": "e262d49f-c800-4962-948b-c94f79eb5fb8",
"is_component": false,
- "last_tested_version": "1.5.0.post1",
+ "last_tested_version": "1.5.0.post2",
"name": "Knowledge Retrieval",
"tags": []
}
\ No newline at end of file
diff --git a/src/backend/base/langflow/initial_setup/starter_projects/Market Research.json b/src/backend/base/langflow/initial_setup/starter_projects/Market Research.json
index b918c4fe2..765f0f157 100644
--- a/src/backend/base/langflow/initial_setup/starter_projects/Market Research.json
+++ b/src/backend/base/langflow/initial_setup/starter_projects/Market Research.json
@@ -2269,7 +2269,7 @@
"show": true,
"title_case": false,
"type": "code",
- "value": "import json\nimport re\n\nfrom langchain_core.tools import StructuredTool\nfrom pydantic import ValidationError\n\nfrom langflow.base.agents.agent import LCToolsAgentComponent\nfrom langflow.base.agents.events import ExceptionWithMessageError\nfrom langflow.base.models.model_input_constants import (\n ALL_PROVIDER_FIELDS,\n MODEL_DYNAMIC_UPDATE_FIELDS,\n MODEL_PROVIDERS,\n MODEL_PROVIDERS_DICT,\n MODELS_METADATA,\n)\nfrom langflow.base.models.model_utils import get_model_name\nfrom langflow.components.helpers.current_date import CurrentDateComponent\nfrom langflow.components.helpers.memory import MemoryComponent\nfrom langflow.components.langchain_utilities.tool_calling import ToolCallingAgentComponent\nfrom langflow.custom.custom_component.component import _get_component_toolkit\nfrom langflow.custom.utils import update_component_build_config\nfrom langflow.field_typing import Tool\nfrom langflow.helpers.base_model import build_model_from_schema\nfrom langflow.io import BoolInput, DropdownInput, IntInput, MultilineInput, Output, TableInput\nfrom langflow.logging import logger\nfrom langflow.schema.data import Data\nfrom langflow.schema.dotdict import dotdict\nfrom langflow.schema.message import Message\nfrom langflow.schema.table import EditMode\n\n\ndef set_advanced_true(component_input):\n component_input.advanced = True\n return component_input\n\n\nMODEL_PROVIDERS_LIST = [\"Anthropic\", \"Google Generative AI\", \"Groq\", \"OpenAI\"]\n\n\nclass AgentComponent(ToolCallingAgentComponent):\n display_name: str = \"Agent\"\n description: str = \"Define the agent's instructions, then enter a task to complete using tools.\"\n documentation: str = \"https://docs.langflow.org/agents\"\n icon = \"bot\"\n beta = False\n name = \"Agent\"\n\n memory_inputs = [set_advanced_true(component_input) for component_input in MemoryComponent().inputs]\n\n # Filter out json_mode from OpenAI inputs since we handle structured output differently\n openai_inputs_filtered = [\n input_field\n for input_field in MODEL_PROVIDERS_DICT[\"OpenAI\"][\"inputs\"]\n if not (hasattr(input_field, \"name\") and input_field.name == \"json_mode\")\n ]\n\n inputs = [\n DropdownInput(\n name=\"agent_llm\",\n display_name=\"Model Provider\",\n info=\"The provider of the language model that the agent will use to generate responses.\",\n options=[*MODEL_PROVIDERS_LIST, \"Custom\"],\n value=\"OpenAI\",\n real_time_refresh=True,\n input_types=[],\n options_metadata=[MODELS_METADATA[key] for key in MODEL_PROVIDERS_LIST] + [{\"icon\": \"brain\"}],\n ),\n *openai_inputs_filtered,\n MultilineInput(\n name=\"system_prompt\",\n display_name=\"Agent Instructions\",\n info=\"System Prompt: Initial instructions and context provided to guide the agent's behavior.\",\n value=\"You are a helpful assistant that can use tools to answer questions and perform tasks.\",\n advanced=False,\n ),\n IntInput(\n name=\"n_messages\",\n display_name=\"Number of Chat History Messages\",\n value=100,\n info=\"Number of chat history messages to retrieve.\",\n advanced=True,\n show=True,\n ),\n MultilineInput(\n name=\"format_instructions\",\n display_name=\"Output Format Instructions\",\n info=\"Generic Template for structured output formatting. Valid only with Structured response.\",\n value=(\n \"You are an AI that extracts structured JSON objects from unstructured text. \"\n \"Use a predefined schema with expected types (str, int, float, bool, dict). \"\n \"Extract ALL relevant instances that match the schema - if multiple patterns exist, capture them all. \"\n \"Fill missing or ambiguous values with defaults: null for missing values. \"\n \"Remove exact duplicates but keep variations that have different field values. \"\n \"Always return valid JSON in the expected format, never throw errors. \"\n \"If multiple objects can be extracted, return them all in the structured format.\"\n ),\n advanced=True,\n ),\n TableInput(\n name=\"output_schema\",\n display_name=\"Output Schema\",\n info=(\n \"Schema Validation: Define the structure and data types for structured output. \"\n \"No validation if no output schema.\"\n ),\n advanced=True,\n required=False,\n value=[],\n table_schema=[\n {\n \"name\": \"name\",\n \"display_name\": \"Name\",\n \"type\": \"str\",\n \"description\": \"Specify the name of the output field.\",\n \"default\": \"field\",\n \"edit_mode\": EditMode.INLINE,\n },\n {\n \"name\": \"description\",\n \"display_name\": \"Description\",\n \"type\": \"str\",\n \"description\": \"Describe the purpose of the output field.\",\n \"default\": \"description of field\",\n \"edit_mode\": EditMode.POPOVER,\n },\n {\n \"name\": \"type\",\n \"display_name\": \"Type\",\n \"type\": \"str\",\n \"edit_mode\": EditMode.INLINE,\n \"description\": (\"Indicate the data type of the output field (e.g., str, int, float, bool, dict).\"),\n \"options\": [\"str\", \"int\", \"float\", \"bool\", \"dict\"],\n \"default\": \"str\",\n },\n {\n \"name\": \"multiple\",\n \"display_name\": \"As List\",\n \"type\": \"boolean\",\n \"description\": \"Set to True if this output field should be a list of the specified type.\",\n \"default\": \"False\",\n \"edit_mode\": EditMode.INLINE,\n },\n ],\n ),\n *LCToolsAgentComponent._base_inputs,\n # removed memory inputs from agent component\n # *memory_inputs,\n BoolInput(\n name=\"add_current_date_tool\",\n display_name=\"Current Date\",\n advanced=True,\n info=\"If true, will add a tool to the agent that returns the current date.\",\n value=True,\n ),\n ]\n outputs = [\n Output(name=\"response\", display_name=\"Response\", method=\"message_response\"),\n Output(name=\"structured_response\", display_name=\"Structured Response\", method=\"json_response\", tool_mode=False),\n ]\n\n async def get_agent_requirements(self):\n \"\"\"Get the agent requirements for the agent.\"\"\"\n llm_model, display_name = await self.get_llm()\n if llm_model is None:\n msg = \"No language model selected. Please choose a model to proceed.\"\n raise ValueError(msg)\n self.model_name = get_model_name(llm_model, display_name=display_name)\n\n # Get memory data\n self.chat_history = await self.get_memory_data()\n if isinstance(self.chat_history, Message):\n self.chat_history = [self.chat_history]\n\n # Add current date tool if enabled\n if self.add_current_date_tool:\n if not isinstance(self.tools, list): # type: ignore[has-type]\n self.tools = []\n current_date_tool = (await CurrentDateComponent(**self.get_base_args()).to_toolkit()).pop(0)\n if not isinstance(current_date_tool, StructuredTool):\n msg = \"CurrentDateComponent must be converted to a StructuredTool\"\n raise TypeError(msg)\n self.tools.append(current_date_tool)\n return llm_model, self.chat_history, self.tools\n\n async def message_response(self) -> Message:\n try:\n llm_model, self.chat_history, self.tools = await self.get_agent_requirements()\n # Set up and run agent\n self.set(\n llm=llm_model,\n tools=self.tools or [],\n chat_history=self.chat_history,\n input_value=self.input_value,\n system_prompt=self.system_prompt,\n )\n agent = self.create_agent_runnable()\n result = await self.run_agent(agent)\n\n # Store result for potential JSON output\n self._agent_result = result\n\n except (ValueError, TypeError, KeyError) as e:\n await logger.aerror(f\"{type(e).__name__}: {e!s}\")\n raise\n except ExceptionWithMessageError as e:\n await logger.aerror(f\"ExceptionWithMessageError occurred: {e}\")\n raise\n # Avoid catching blind Exception; let truly unexpected exceptions propagate\n except Exception as e:\n await logger.aerror(f\"Unexpected error: {e!s}\")\n raise\n else:\n return result\n\n def _preprocess_schema(self, schema):\n \"\"\"Preprocess schema to ensure correct data types for build_model_from_schema.\"\"\"\n processed_schema = []\n for field in schema:\n processed_field = {\n \"name\": str(field.get(\"name\", \"field\")),\n \"type\": str(field.get(\"type\", \"str\")),\n \"description\": str(field.get(\"description\", \"\")),\n \"multiple\": field.get(\"multiple\", False),\n }\n # Ensure multiple is handled correctly\n if isinstance(processed_field[\"multiple\"], str):\n processed_field[\"multiple\"] = processed_field[\"multiple\"].lower() in [\"true\", \"1\", \"t\", \"y\", \"yes\"]\n processed_schema.append(processed_field)\n return processed_schema\n\n async def build_structured_output_base(self, content: str):\n \"\"\"Build structured output with optional BaseModel validation.\"\"\"\n json_pattern = r\"\\{.*\\}\"\n schema_error_msg = \"Try setting an output schema\"\n\n # Try to parse content as JSON first\n json_data = None\n try:\n json_data = json.loads(content)\n except json.JSONDecodeError:\n json_match = re.search(json_pattern, content, re.DOTALL)\n if json_match:\n try:\n json_data = json.loads(json_match.group())\n except json.JSONDecodeError:\n return {\"content\": content, \"error\": schema_error_msg}\n else:\n return {\"content\": content, \"error\": schema_error_msg}\n\n # If no output schema provided, return parsed JSON without validation\n if not hasattr(self, \"output_schema\") or not self.output_schema or len(self.output_schema) == 0:\n return json_data\n\n # Use BaseModel validation with schema\n try:\n processed_schema = self._preprocess_schema(self.output_schema)\n output_model = build_model_from_schema(processed_schema)\n\n # Validate against the schema\n if isinstance(json_data, list):\n # Multiple objects\n validated_objects = []\n for item in json_data:\n try:\n validated_obj = output_model.model_validate(item)\n validated_objects.append(validated_obj.model_dump())\n except ValidationError as e:\n await logger.aerror(f\"Validation error for item: {e}\")\n # Include invalid items with error info\n validated_objects.append({\"data\": item, \"validation_error\": str(e)})\n return validated_objects\n\n # Single object\n try:\n validated_obj = output_model.model_validate(json_data)\n return [validated_obj.model_dump()] # Return as list for consistency\n except ValidationError as e:\n await logger.aerror(f\"Validation error: {e}\")\n return [{\"data\": json_data, \"validation_error\": str(e)}]\n\n except (TypeError, ValueError) as e:\n await logger.aerror(f\"Error building structured output: {e}\")\n # Fallback to parsed JSON without validation\n return json_data\n\n async def json_response(self) -> Data:\n \"\"\"Convert agent response to structured JSON Data output with schema validation.\"\"\"\n # Always use structured chat agent for JSON response mode for better JSON formatting\n try:\n system_components = []\n\n # 1. Agent Instructions (system_prompt)\n agent_instructions = getattr(self, \"system_prompt\", \"\") or \"\"\n if agent_instructions:\n system_components.append(f\"{agent_instructions}\")\n\n # 2. Format Instructions\n format_instructions = getattr(self, \"format_instructions\", \"\") or \"\"\n if format_instructions:\n system_components.append(f\"Format instructions: {format_instructions}\")\n\n # 3. Schema Information from BaseModel\n if hasattr(self, \"output_schema\") and self.output_schema and len(self.output_schema) > 0:\n try:\n processed_schema = self._preprocess_schema(self.output_schema)\n output_model = build_model_from_schema(processed_schema)\n schema_dict = output_model.model_json_schema()\n schema_info = (\n \"You are given some text that may include format instructions, \"\n \"explanations, or other content alongside a JSON schema.\\n\\n\"\n \"Your task:\\n\"\n \"- Extract only the JSON schema.\\n\"\n \"- Return it as valid JSON.\\n\"\n \"- Do not include format instructions, explanations, or extra text.\\n\\n\"\n \"Input:\\n\"\n f\"{json.dumps(schema_dict, indent=2)}\\n\\n\"\n \"Output (only JSON schema):\"\n )\n system_components.append(schema_info)\n except (ValidationError, ValueError, TypeError, KeyError) as e:\n await logger.aerror(f\"Could not build schema for prompt: {e}\", exc_info=True)\n\n # Combine all components\n combined_instructions = \"\\n\\n\".join(system_components) if system_components else \"\"\n llm_model, self.chat_history, self.tools = await self.get_agent_requirements()\n self.set(\n llm=llm_model,\n tools=self.tools or [],\n chat_history=self.chat_history,\n input_value=self.input_value,\n system_prompt=combined_instructions,\n )\n\n # Create and run structured chat agent\n try:\n structured_agent = self.create_agent_runnable()\n except (NotImplementedError, ValueError, TypeError) as e:\n await logger.aerror(f\"Error with structured chat agent: {e}\")\n raise\n try:\n result = await self.run_agent(structured_agent)\n except (ExceptionWithMessageError, ValueError, TypeError, RuntimeError) as e:\n await logger.aerror(f\"Error with structured agent result: {e}\")\n raise\n # Extract content from structured agent result\n if hasattr(result, \"content\"):\n content = result.content\n elif hasattr(result, \"text\"):\n content = result.text\n else:\n content = str(result)\n\n except (ExceptionWithMessageError, ValueError, TypeError, NotImplementedError, AttributeError) as e:\n await logger.aerror(f\"Error with structured chat agent: {e}\")\n # Fallback to regular agent\n content_str = \"No content returned from agent\"\n return Data(data={\"content\": content_str, \"error\": str(e)})\n\n # Process with structured output validation\n try:\n structured_output = await self.build_structured_output_base(content)\n\n # Handle different output formats\n if isinstance(structured_output, list) and structured_output:\n if len(structured_output) == 1:\n return Data(data=structured_output[0])\n return Data(data={\"results\": structured_output})\n if isinstance(structured_output, dict):\n return Data(data=structured_output)\n return Data(data={\"content\": content})\n\n except (ValueError, TypeError) as e:\n await logger.aerror(f\"Error in structured output processing: {e}\")\n return Data(data={\"content\": content, \"error\": str(e)})\n\n async def get_memory_data(self):\n # TODO: This is a temporary fix to avoid message duplication. We should develop a function for this.\n messages = (\n await MemoryComponent(**self.get_base_args())\n .set(session_id=self.graph.session_id, order=\"Ascending\", n_messages=self.n_messages)\n .retrieve_messages()\n )\n return [\n message for message in messages if getattr(message, \"id\", None) != getattr(self.input_value, \"id\", None)\n ]\n\n async def get_llm(self):\n if not isinstance(self.agent_llm, str):\n return self.agent_llm, None\n\n try:\n provider_info = MODEL_PROVIDERS_DICT.get(self.agent_llm)\n if not provider_info:\n msg = f\"Invalid model provider: {self.agent_llm}\"\n raise ValueError(msg)\n\n component_class = provider_info.get(\"component_class\")\n display_name = component_class.display_name\n inputs = provider_info.get(\"inputs\")\n prefix = provider_info.get(\"prefix\", \"\")\n\n return self._build_llm_model(component_class, inputs, prefix), display_name\n\n except (AttributeError, ValueError, TypeError, RuntimeError) as e:\n await logger.aerror(f\"Error building {self.agent_llm} language model: {e!s}\")\n msg = f\"Failed to initialize language model: {e!s}\"\n raise ValueError(msg) from e\n\n def _build_llm_model(self, component, inputs, prefix=\"\"):\n model_kwargs = {}\n for input_ in inputs:\n if hasattr(self, f\"{prefix}{input_.name}\"):\n model_kwargs[input_.name] = getattr(self, f\"{prefix}{input_.name}\")\n return component.set(**model_kwargs).build_model()\n\n def set_component_params(self, component):\n provider_info = MODEL_PROVIDERS_DICT.get(self.agent_llm)\n if provider_info:\n inputs = provider_info.get(\"inputs\")\n prefix = provider_info.get(\"prefix\")\n # Filter out json_mode and only use attributes that exist on this component\n model_kwargs = {}\n for input_ in inputs:\n if hasattr(self, f\"{prefix}{input_.name}\"):\n model_kwargs[input_.name] = getattr(self, f\"{prefix}{input_.name}\")\n\n return component.set(**model_kwargs)\n return component\n\n def delete_fields(self, build_config: dotdict, fields: dict | list[str]) -> None:\n \"\"\"Delete specified fields from build_config.\"\"\"\n for field in fields:\n build_config.pop(field, None)\n\n def update_input_types(self, build_config: dotdict) -> dotdict:\n \"\"\"Update input types for all fields in build_config.\"\"\"\n for key, value in build_config.items():\n if isinstance(value, dict):\n if value.get(\"input_types\") is None:\n build_config[key][\"input_types\"] = []\n elif hasattr(value, \"input_types\") and value.input_types is None:\n value.input_types = []\n return build_config\n\n async def update_build_config(\n self, build_config: dotdict, field_value: str, field_name: str | None = None\n ) -> dotdict:\n # Iterate over all providers in the MODEL_PROVIDERS_DICT\n # Existing logic for updating build_config\n if field_name in (\"agent_llm\",):\n build_config[\"agent_llm\"][\"value\"] = field_value\n provider_info = MODEL_PROVIDERS_DICT.get(field_value)\n if provider_info:\n component_class = provider_info.get(\"component_class\")\n if component_class and hasattr(component_class, \"update_build_config\"):\n # Call the component class's update_build_config method\n build_config = await update_component_build_config(\n component_class, build_config, field_value, \"model_name\"\n )\n\n provider_configs: dict[str, tuple[dict, list[dict]]] = {\n provider: (\n MODEL_PROVIDERS_DICT[provider][\"fields\"],\n [\n MODEL_PROVIDERS_DICT[other_provider][\"fields\"]\n for other_provider in MODEL_PROVIDERS_DICT\n if other_provider != provider\n ],\n )\n for provider in MODEL_PROVIDERS_DICT\n }\n if field_value in provider_configs:\n fields_to_add, fields_to_delete = provider_configs[field_value]\n\n # Delete fields from other providers\n for fields in fields_to_delete:\n self.delete_fields(build_config, fields)\n\n # Add provider-specific fields\n if field_value == \"OpenAI\" and not any(field in build_config for field in fields_to_add):\n build_config.update(fields_to_add)\n else:\n build_config.update(fields_to_add)\n # Reset input types for agent_llm\n build_config[\"agent_llm\"][\"input_types\"] = []\n elif field_value == \"Custom\":\n # Delete all provider fields\n self.delete_fields(build_config, ALL_PROVIDER_FIELDS)\n # Update with custom component\n custom_component = DropdownInput(\n name=\"agent_llm\",\n display_name=\"Language Model\",\n options=[*sorted(MODEL_PROVIDERS), \"Custom\"],\n value=\"Custom\",\n real_time_refresh=True,\n input_types=[\"LanguageModel\"],\n options_metadata=[MODELS_METADATA[key] for key in sorted(MODELS_METADATA.keys())]\n + [{\"icon\": \"brain\"}],\n )\n build_config.update({\"agent_llm\": custom_component.to_dict()})\n # Update input types for all fields\n build_config = self.update_input_types(build_config)\n\n # Validate required keys\n default_keys = [\n \"code\",\n \"_type\",\n \"agent_llm\",\n \"tools\",\n \"input_value\",\n \"add_current_date_tool\",\n \"system_prompt\",\n \"agent_description\",\n \"max_iterations\",\n \"handle_parsing_errors\",\n \"verbose\",\n ]\n missing_keys = [key for key in default_keys if key not in build_config]\n if missing_keys:\n msg = f\"Missing required keys in build_config: {missing_keys}\"\n raise ValueError(msg)\n if (\n isinstance(self.agent_llm, str)\n and self.agent_llm in MODEL_PROVIDERS_DICT\n and field_name in MODEL_DYNAMIC_UPDATE_FIELDS\n ):\n provider_info = MODEL_PROVIDERS_DICT.get(self.agent_llm)\n if provider_info:\n component_class = provider_info.get(\"component_class\")\n component_class = self.set_component_params(component_class)\n prefix = provider_info.get(\"prefix\")\n if component_class and hasattr(component_class, \"update_build_config\"):\n # Call each component class's update_build_config method\n # remove the prefix from the field_name\n if isinstance(field_name, str) and isinstance(prefix, str):\n field_name = field_name.replace(prefix, \"\")\n build_config = await update_component_build_config(\n component_class, build_config, field_value, \"model_name\"\n )\n return dotdict({k: v.to_dict() if hasattr(v, \"to_dict\") else v for k, v in build_config.items()})\n\n async def _get_tools(self) -> list[Tool]:\n component_toolkit = _get_component_toolkit()\n tools_names = self._build_tools_names()\n agent_description = self.get_tool_description()\n # TODO: Agent Description Depreciated Feature to be removed\n description = f\"{agent_description}{tools_names}\"\n tools = component_toolkit(component=self).get_tools(\n tool_name=\"Call_Agent\", tool_description=description, callbacks=self.get_langchain_callbacks()\n )\n if hasattr(self, \"tools_metadata\"):\n tools = component_toolkit(component=self, metadata=self.tools_metadata).update_tools_metadata(tools=tools)\n return tools\n"
+ "value": "import json\nimport re\n\nfrom langchain_core.tools import StructuredTool\nfrom pydantic import ValidationError\n\nfrom langflow.base.agents.agent import LCToolsAgentComponent\nfrom langflow.base.agents.events import ExceptionWithMessageError\nfrom langflow.base.models.model_input_constants import (\n ALL_PROVIDER_FIELDS,\n MODEL_DYNAMIC_UPDATE_FIELDS,\n MODEL_PROVIDERS,\n MODEL_PROVIDERS_DICT,\n MODELS_METADATA,\n)\nfrom langflow.base.models.model_utils import get_model_name\nfrom langflow.components.helpers.current_date import CurrentDateComponent\nfrom langflow.components.helpers.memory import MemoryComponent\nfrom langflow.components.langchain_utilities.tool_calling import ToolCallingAgentComponent\nfrom langflow.custom.custom_component.component import _get_component_toolkit\nfrom langflow.custom.utils import update_component_build_config\nfrom langflow.field_typing import Tool\nfrom langflow.helpers.base_model import build_model_from_schema\nfrom langflow.io import BoolInput, DropdownInput, IntInput, MultilineInput, Output, TableInput\nfrom langflow.logging import logger\nfrom langflow.schema.data import Data\nfrom langflow.schema.dotdict import dotdict\nfrom langflow.schema.message import Message\nfrom langflow.schema.table import EditMode\n\n\ndef set_advanced_true(component_input):\n component_input.advanced = True\n return component_input\n\n\nMODEL_PROVIDERS_LIST = [\"Anthropic\", \"Google Generative AI\", \"Groq\", \"OpenAI\"]\n\n\nclass AgentComponent(ToolCallingAgentComponent):\n display_name: str = \"Agent\"\n description: str = \"Define the agent's instructions, then enter a task to complete using tools.\"\n documentation: str = \"https://docs.langflow.org/agents\"\n icon = \"bot\"\n beta = False\n name = \"Agent\"\n\n memory_inputs = [set_advanced_true(component_input) for component_input in MemoryComponent().inputs]\n\n # Filter out json_mode from OpenAI inputs since we handle structured output differently\n openai_inputs_filtered = [\n input_field\n for input_field in MODEL_PROVIDERS_DICT[\"OpenAI\"][\"inputs\"]\n if not (hasattr(input_field, \"name\") and input_field.name == \"json_mode\")\n ]\n\n inputs = [\n DropdownInput(\n name=\"agent_llm\",\n display_name=\"Model Provider\",\n info=\"The provider of the language model that the agent will use to generate responses.\",\n options=[*MODEL_PROVIDERS_LIST, \"Custom\"],\n value=\"OpenAI\",\n real_time_refresh=True,\n input_types=[],\n options_metadata=[MODELS_METADATA[key] for key in MODEL_PROVIDERS_LIST] + [{\"icon\": \"brain\"}],\n ),\n *openai_inputs_filtered,\n MultilineInput(\n name=\"system_prompt\",\n display_name=\"Agent Instructions\",\n info=\"System Prompt: Initial instructions and context provided to guide the agent's behavior.\",\n value=\"You are a helpful assistant that can use tools to answer questions and perform tasks.\",\n advanced=False,\n ),\n IntInput(\n name=\"n_messages\",\n display_name=\"Number of Chat History Messages\",\n value=100,\n info=\"Number of chat history messages to retrieve.\",\n advanced=True,\n show=True,\n ),\n MultilineInput(\n name=\"format_instructions\",\n display_name=\"Output Format Instructions\",\n info=\"Generic Template for structured output formatting. Valid only with Structured response.\",\n value=(\n \"You are an AI that extracts structured JSON objects from unstructured text. \"\n \"Use a predefined schema with expected types (str, int, float, bool, dict). \"\n \"Extract ALL relevant instances that match the schema - if multiple patterns exist, capture them all. \"\n \"Fill missing or ambiguous values with defaults: null for missing values. \"\n \"Remove exact duplicates but keep variations that have different field values. \"\n \"Always return valid JSON in the expected format, never throw errors. \"\n \"If multiple objects can be extracted, return them all in the structured format.\"\n ),\n advanced=True,\n ),\n TableInput(\n name=\"output_schema\",\n display_name=\"Output Schema\",\n info=(\n \"Schema Validation: Define the structure and data types for structured output. \"\n \"No validation if no output schema.\"\n ),\n advanced=True,\n required=False,\n value=[],\n table_schema=[\n {\n \"name\": \"name\",\n \"display_name\": \"Name\",\n \"type\": \"str\",\n \"description\": \"Specify the name of the output field.\",\n \"default\": \"field\",\n \"edit_mode\": EditMode.INLINE,\n },\n {\n \"name\": \"description\",\n \"display_name\": \"Description\",\n \"type\": \"str\",\n \"description\": \"Describe the purpose of the output field.\",\n \"default\": \"description of field\",\n \"edit_mode\": EditMode.POPOVER,\n },\n {\n \"name\": \"type\",\n \"display_name\": \"Type\",\n \"type\": \"str\",\n \"edit_mode\": EditMode.INLINE,\n \"description\": (\"Indicate the data type of the output field (e.g., str, int, float, bool, dict).\"),\n \"options\": [\"str\", \"int\", \"float\", \"bool\", \"dict\"],\n \"default\": \"str\",\n },\n {\n \"name\": \"multiple\",\n \"display_name\": \"As List\",\n \"type\": \"boolean\",\n \"description\": \"Set to True if this output field should be a list of the specified type.\",\n \"default\": \"False\",\n \"edit_mode\": EditMode.INLINE,\n },\n ],\n ),\n *LCToolsAgentComponent._base_inputs,\n # removed memory inputs from agent component\n # *memory_inputs,\n BoolInput(\n name=\"add_current_date_tool\",\n display_name=\"Current Date\",\n advanced=True,\n info=\"If true, will add a tool to the agent that returns the current date.\",\n value=True,\n ),\n ]\n outputs = [\n Output(name=\"response\", display_name=\"Response\", method=\"message_response\"),\n Output(name=\"structured_response\", display_name=\"Structured Response\", method=\"json_response\", tool_mode=False),\n ]\n\n async def get_agent_requirements(self):\n \"\"\"Get the agent requirements for the agent.\"\"\"\n llm_model, display_name = await self.get_llm()\n if llm_model is None:\n msg = \"No language model selected. Please choose a model to proceed.\"\n raise ValueError(msg)\n self.model_name = get_model_name(llm_model, display_name=display_name)\n\n # Get memory data\n self.chat_history = await self.get_memory_data()\n if isinstance(self.chat_history, Message):\n self.chat_history = [self.chat_history]\n\n # Add current date tool if enabled\n if self.add_current_date_tool:\n if not isinstance(self.tools, list): # type: ignore[has-type]\n self.tools = []\n current_date_tool = (await CurrentDateComponent(**self.get_base_args()).to_toolkit()).pop(0)\n if not isinstance(current_date_tool, StructuredTool):\n msg = \"CurrentDateComponent must be converted to a StructuredTool\"\n raise TypeError(msg)\n self.tools.append(current_date_tool)\n return llm_model, self.chat_history, self.tools\n\n async def message_response(self) -> Message:\n try:\n llm_model, self.chat_history, self.tools = await self.get_agent_requirements()\n # Set up and run agent\n self.set(\n llm=llm_model,\n tools=self.tools or [],\n chat_history=self.chat_history,\n input_value=self.input_value,\n system_prompt=self.system_prompt,\n )\n agent = self.create_agent_runnable()\n result = await self.run_agent(agent)\n\n # Store result for potential JSON output\n self._agent_result = result\n\n except (ValueError, TypeError, KeyError) as e:\n await logger.aerror(f\"{type(e).__name__}: {e!s}\")\n raise\n except ExceptionWithMessageError as e:\n await logger.aerror(f\"ExceptionWithMessageError occurred: {e}\")\n raise\n # Avoid catching blind Exception; let truly unexpected exceptions propagate\n except Exception as e:\n await logger.aerror(f\"Unexpected error: {e!s}\")\n raise\n else:\n return result\n\n def _preprocess_schema(self, schema):\n \"\"\"Preprocess schema to ensure correct data types for build_model_from_schema.\"\"\"\n processed_schema = []\n for field in schema:\n processed_field = {\n \"name\": str(field.get(\"name\", \"field\")),\n \"type\": str(field.get(\"type\", \"str\")),\n \"description\": str(field.get(\"description\", \"\")),\n \"multiple\": field.get(\"multiple\", False),\n }\n # Ensure multiple is handled correctly\n if isinstance(processed_field[\"multiple\"], str):\n processed_field[\"multiple\"] = processed_field[\"multiple\"].lower() in [\"true\", \"1\", \"t\", \"y\", \"yes\"]\n processed_schema.append(processed_field)\n return processed_schema\n\n async def build_structured_output_base(self, content: str):\n \"\"\"Build structured output with optional BaseModel validation.\"\"\"\n json_pattern = r\"\\{.*\\}\"\n schema_error_msg = \"Try setting an output schema\"\n\n # Try to parse content as JSON first\n json_data = None\n try:\n json_data = json.loads(content)\n except json.JSONDecodeError:\n json_match = re.search(json_pattern, content, re.DOTALL)\n if json_match:\n try:\n json_data = json.loads(json_match.group())\n except json.JSONDecodeError:\n return {\"content\": content, \"error\": schema_error_msg}\n else:\n return {\"content\": content, \"error\": schema_error_msg}\n\n # If no output schema provided, return parsed JSON without validation\n if not hasattr(self, \"output_schema\") or not self.output_schema or len(self.output_schema) == 0:\n return json_data\n\n # Use BaseModel validation with schema\n try:\n processed_schema = self._preprocess_schema(self.output_schema)\n output_model = build_model_from_schema(processed_schema)\n\n # Validate against the schema\n if isinstance(json_data, list):\n # Multiple objects\n validated_objects = []\n for item in json_data:\n try:\n validated_obj = output_model.model_validate(item)\n validated_objects.append(validated_obj.model_dump())\n except ValidationError as e:\n await logger.aerror(f\"Validation error for item: {e}\")\n # Include invalid items with error info\n validated_objects.append({\"data\": item, \"validation_error\": str(e)})\n return validated_objects\n\n # Single object\n try:\n validated_obj = output_model.model_validate(json_data)\n return [validated_obj.model_dump()] # Return as list for consistency\n except ValidationError as e:\n await logger.aerror(f\"Validation error: {e}\")\n return [{\"data\": json_data, \"validation_error\": str(e)}]\n\n except (TypeError, ValueError) as e:\n await logger.aerror(f\"Error building structured output: {e}\")\n # Fallback to parsed JSON without validation\n return json_data\n\n async def json_response(self) -> Data:\n \"\"\"Convert agent response to structured JSON Data output with schema validation.\"\"\"\n # Always use structured chat agent for JSON response mode for better JSON formatting\n try:\n system_components = []\n\n # 1. Agent Instructions (system_prompt)\n agent_instructions = getattr(self, \"system_prompt\", \"\") or \"\"\n if agent_instructions:\n system_components.append(f\"{agent_instructions}\")\n\n # 2. Format Instructions\n format_instructions = getattr(self, \"format_instructions\", \"\") or \"\"\n if format_instructions:\n system_components.append(f\"Format instructions: {format_instructions}\")\n\n # 3. Schema Information from BaseModel\n if hasattr(self, \"output_schema\") and self.output_schema and len(self.output_schema) > 0:\n try:\n processed_schema = self._preprocess_schema(self.output_schema)\n output_model = build_model_from_schema(processed_schema)\n schema_dict = output_model.model_json_schema()\n schema_info = (\n \"You are given some text that may include format instructions, \"\n \"explanations, or other content alongside a JSON schema.\\n\\n\"\n \"Your task:\\n\"\n \"- Extract only the JSON schema.\\n\"\n \"- Return it as valid JSON.\\n\"\n \"- Do not include format instructions, explanations, or extra text.\\n\\n\"\n \"Input:\\n\"\n f\"{json.dumps(schema_dict, indent=2)}\\n\\n\"\n \"Output (only JSON schema):\"\n )\n system_components.append(schema_info)\n except (ValidationError, ValueError, TypeError, KeyError) as e:\n await logger.aerror(f\"Could not build schema for prompt: {e}\", exc_info=True)\n\n # Combine all components\n combined_instructions = \"\\n\\n\".join(system_components) if system_components else \"\"\n llm_model, self.chat_history, self.tools = await self.get_agent_requirements()\n self.set(\n llm=llm_model,\n tools=self.tools or [],\n chat_history=self.chat_history,\n input_value=self.input_value,\n system_prompt=combined_instructions,\n )\n\n # Create and run structured chat agent\n try:\n structured_agent = self.create_agent_runnable()\n except (NotImplementedError, ValueError, TypeError) as e:\n await logger.aerror(f\"Error with structured chat agent: {e}\")\n raise\n try:\n result = await self.run_agent(structured_agent)\n except (ExceptionWithMessageError, ValueError, TypeError, RuntimeError) as e:\n await logger.aerror(f\"Error with structured agent result: {e}\")\n raise\n # Extract content from structured agent result\n if hasattr(result, \"content\"):\n content = result.content\n elif hasattr(result, \"text\"):\n content = result.text\n else:\n content = str(result)\n\n except (ExceptionWithMessageError, ValueError, TypeError, NotImplementedError, AttributeError) as e:\n await logger.aerror(f\"Error with structured chat agent: {e}\")\n # Fallback to regular agent\n content_str = \"No content returned from agent\"\n return Data(data={\"content\": content_str, \"error\": str(e)})\n\n # Process with structured output validation\n try:\n structured_output = await self.build_structured_output_base(content)\n\n # Handle different output formats\n if isinstance(structured_output, list) and structured_output:\n if len(structured_output) == 1:\n return Data(data=structured_output[0])\n return Data(data={\"results\": structured_output})\n if isinstance(structured_output, dict):\n return Data(data=structured_output)\n return Data(data={\"content\": content})\n\n except (ValueError, TypeError) as e:\n await logger.aerror(f\"Error in structured output processing: {e}\")\n return Data(data={\"content\": content, \"error\": str(e)})\n\n async def get_memory_data(self):\n # TODO: This is a temporary fix to avoid message duplication. We should develop a function for this.\n messages = (\n await MemoryComponent(**self.get_base_args())\n .set(session_id=self.graph.session_id, order=\"Ascending\", n_messages=self.n_messages)\n .retrieve_messages()\n )\n return [\n message for message in messages if getattr(message, \"id\", None) != getattr(self.input_value, \"id\", None)\n ]\n\n async def get_llm(self):\n if not isinstance(self.agent_llm, str):\n return self.agent_llm, None\n\n try:\n provider_info = MODEL_PROVIDERS_DICT.get(self.agent_llm)\n if not provider_info:\n msg = f\"Invalid model provider: {self.agent_llm}\"\n raise ValueError(msg)\n\n component_class = provider_info.get(\"component_class\")\n display_name = component_class.display_name\n inputs = provider_info.get(\"inputs\")\n prefix = provider_info.get(\"prefix\", \"\")\n\n return self._build_llm_model(component_class, inputs, prefix), display_name\n\n except (AttributeError, ValueError, TypeError, RuntimeError) as e:\n await logger.aerror(f\"Error building {self.agent_llm} language model: {e!s}\")\n msg = f\"Failed to initialize language model: {e!s}\"\n raise ValueError(msg) from e\n\n def _build_llm_model(self, component, inputs, prefix=\"\"):\n model_kwargs = {}\n for input_ in inputs:\n if hasattr(self, f\"{prefix}{input_.name}\"):\n model_kwargs[input_.name] = getattr(self, f\"{prefix}{input_.name}\")\n return component.set(**model_kwargs).build_model()\n\n def set_component_params(self, component):\n provider_info = MODEL_PROVIDERS_DICT.get(self.agent_llm)\n if provider_info:\n inputs = provider_info.get(\"inputs\")\n prefix = provider_info.get(\"prefix\")\n # Filter out json_mode and only use attributes that exist on this component\n model_kwargs = {}\n for input_ in inputs:\n if hasattr(self, f\"{prefix}{input_.name}\"):\n model_kwargs[input_.name] = getattr(self, f\"{prefix}{input_.name}\")\n\n return component.set(**model_kwargs)\n return component\n\n def delete_fields(self, build_config: dotdict, fields: dict | list[str]) -> None:\n \"\"\"Delete specified fields from build_config.\"\"\"\n for field in fields:\n build_config.pop(field, None)\n\n def update_input_types(self, build_config: dotdict) -> dotdict:\n \"\"\"Update input types for all fields in build_config.\"\"\"\n for key, value in build_config.items():\n if isinstance(value, dict):\n if value.get(\"input_types\") is None:\n build_config[key][\"input_types\"] = []\n elif hasattr(value, \"input_types\") and value.input_types is None:\n value.input_types = []\n return build_config\n\n async def update_build_config(\n self, build_config: dotdict, field_value: str, field_name: str | None = None\n ) -> dotdict:\n # Iterate over all providers in the MODEL_PROVIDERS_DICT\n # Existing logic for updating build_config\n if field_name in (\"agent_llm\",):\n build_config[\"agent_llm\"][\"value\"] = field_value\n provider_info = MODEL_PROVIDERS_DICT.get(field_value)\n if provider_info:\n component_class = provider_info.get(\"component_class\")\n if component_class and hasattr(component_class, \"update_build_config\"):\n # Call the component class's update_build_config method\n build_config = await update_component_build_config(\n component_class, build_config, field_value, \"model_name\"\n )\n\n provider_configs: dict[str, tuple[dict, list[dict]]] = {\n provider: (\n MODEL_PROVIDERS_DICT[provider][\"fields\"],\n [\n MODEL_PROVIDERS_DICT[other_provider][\"fields\"]\n for other_provider in MODEL_PROVIDERS_DICT\n if other_provider != provider\n ],\n )\n for provider in MODEL_PROVIDERS_DICT\n }\n if field_value in provider_configs:\n fields_to_add, fields_to_delete = provider_configs[field_value]\n\n # Delete fields from other providers\n for fields in fields_to_delete:\n self.delete_fields(build_config, fields)\n\n # Add provider-specific fields\n if field_value == \"OpenAI\" and not any(field in build_config for field in fields_to_add):\n build_config.update(fields_to_add)\n else:\n build_config.update(fields_to_add)\n # Reset input types for agent_llm\n build_config[\"agent_llm\"][\"input_types\"] = []\n build_config[\"agent_llm\"][\"display_name\"] = \"Model Provider\"\n elif field_value == \"Custom\":\n # Delete all provider fields\n self.delete_fields(build_config, ALL_PROVIDER_FIELDS)\n # Update with custom component\n custom_component = DropdownInput(\n name=\"agent_llm\",\n display_name=\"Language Model\",\n options=[*sorted(MODEL_PROVIDERS), \"Custom\"],\n value=\"Custom\",\n real_time_refresh=True,\n input_types=[\"LanguageModel\"],\n options_metadata=[MODELS_METADATA[key] for key in sorted(MODELS_METADATA.keys())]\n + [{\"icon\": \"brain\"}],\n )\n build_config.update({\"agent_llm\": custom_component.to_dict()})\n # Update input types for all fields\n build_config = self.update_input_types(build_config)\n\n # Validate required keys\n default_keys = [\n \"code\",\n \"_type\",\n \"agent_llm\",\n \"tools\",\n \"input_value\",\n \"add_current_date_tool\",\n \"system_prompt\",\n \"agent_description\",\n \"max_iterations\",\n \"handle_parsing_errors\",\n \"verbose\",\n ]\n missing_keys = [key for key in default_keys if key not in build_config]\n if missing_keys:\n msg = f\"Missing required keys in build_config: {missing_keys}\"\n raise ValueError(msg)\n if (\n isinstance(self.agent_llm, str)\n and self.agent_llm in MODEL_PROVIDERS_DICT\n and field_name in MODEL_DYNAMIC_UPDATE_FIELDS\n ):\n provider_info = MODEL_PROVIDERS_DICT.get(self.agent_llm)\n if provider_info:\n component_class = provider_info.get(\"component_class\")\n component_class = self.set_component_params(component_class)\n prefix = provider_info.get(\"prefix\")\n if component_class and hasattr(component_class, \"update_build_config\"):\n # Call each component class's update_build_config method\n # remove the prefix from the field_name\n if isinstance(field_name, str) and isinstance(prefix, str):\n field_name = field_name.replace(prefix, \"\")\n build_config = await update_component_build_config(\n component_class, build_config, field_value, \"model_name\"\n )\n return dotdict({k: v.to_dict() if hasattr(v, \"to_dict\") else v for k, v in build_config.items()})\n\n async def _get_tools(self) -> list[Tool]:\n component_toolkit = _get_component_toolkit()\n tools_names = self._build_tools_names()\n agent_description = self.get_tool_description()\n # TODO: Agent Description Depreciated Feature to be removed\n description = f\"{agent_description}{tools_names}\"\n tools = component_toolkit(component=self).get_tools(\n tool_name=\"Call_Agent\", tool_description=description, callbacks=self.get_langchain_callbacks()\n )\n if hasattr(self, \"tools_metadata\"):\n tools = component_toolkit(component=self, metadata=self.tools_metadata).update_tools_metadata(tools=tools)\n return tools\n"
},
"handle_parsing_errors": {
"_input_type": "BoolInput",
diff --git a/src/backend/base/langflow/initial_setup/starter_projects/News Aggregator.json b/src/backend/base/langflow/initial_setup/starter_projects/News Aggregator.json
index b99e10fb6..9184e74c1 100644
--- a/src/backend/base/langflow/initial_setup/starter_projects/News Aggregator.json
+++ b/src/backend/base/langflow/initial_setup/starter_projects/News Aggregator.json
@@ -1585,7 +1585,7 @@
"show": true,
"title_case": false,
"type": "code",
- "value": "import json\nimport re\n\nfrom langchain_core.tools import StructuredTool\nfrom pydantic import ValidationError\n\nfrom langflow.base.agents.agent import LCToolsAgentComponent\nfrom langflow.base.agents.events import ExceptionWithMessageError\nfrom langflow.base.models.model_input_constants import (\n ALL_PROVIDER_FIELDS,\n MODEL_DYNAMIC_UPDATE_FIELDS,\n MODEL_PROVIDERS,\n MODEL_PROVIDERS_DICT,\n MODELS_METADATA,\n)\nfrom langflow.base.models.model_utils import get_model_name\nfrom langflow.components.helpers.current_date import CurrentDateComponent\nfrom langflow.components.helpers.memory import MemoryComponent\nfrom langflow.components.langchain_utilities.tool_calling import ToolCallingAgentComponent\nfrom langflow.custom.custom_component.component import _get_component_toolkit\nfrom langflow.custom.utils import update_component_build_config\nfrom langflow.field_typing import Tool\nfrom langflow.helpers.base_model import build_model_from_schema\nfrom langflow.io import BoolInput, DropdownInput, IntInput, MultilineInput, Output, TableInput\nfrom langflow.logging import logger\nfrom langflow.schema.data import Data\nfrom langflow.schema.dotdict import dotdict\nfrom langflow.schema.message import Message\nfrom langflow.schema.table import EditMode\n\n\ndef set_advanced_true(component_input):\n component_input.advanced = True\n return component_input\n\n\nMODEL_PROVIDERS_LIST = [\"Anthropic\", \"Google Generative AI\", \"Groq\", \"OpenAI\"]\n\n\nclass AgentComponent(ToolCallingAgentComponent):\n display_name: str = \"Agent\"\n description: str = \"Define the agent's instructions, then enter a task to complete using tools.\"\n documentation: str = \"https://docs.langflow.org/agents\"\n icon = \"bot\"\n beta = False\n name = \"Agent\"\n\n memory_inputs = [set_advanced_true(component_input) for component_input in MemoryComponent().inputs]\n\n # Filter out json_mode from OpenAI inputs since we handle structured output differently\n openai_inputs_filtered = [\n input_field\n for input_field in MODEL_PROVIDERS_DICT[\"OpenAI\"][\"inputs\"]\n if not (hasattr(input_field, \"name\") and input_field.name == \"json_mode\")\n ]\n\n inputs = [\n DropdownInput(\n name=\"agent_llm\",\n display_name=\"Model Provider\",\n info=\"The provider of the language model that the agent will use to generate responses.\",\n options=[*MODEL_PROVIDERS_LIST, \"Custom\"],\n value=\"OpenAI\",\n real_time_refresh=True,\n input_types=[],\n options_metadata=[MODELS_METADATA[key] for key in MODEL_PROVIDERS_LIST] + [{\"icon\": \"brain\"}],\n ),\n *openai_inputs_filtered,\n MultilineInput(\n name=\"system_prompt\",\n display_name=\"Agent Instructions\",\n info=\"System Prompt: Initial instructions and context provided to guide the agent's behavior.\",\n value=\"You are a helpful assistant that can use tools to answer questions and perform tasks.\",\n advanced=False,\n ),\n IntInput(\n name=\"n_messages\",\n display_name=\"Number of Chat History Messages\",\n value=100,\n info=\"Number of chat history messages to retrieve.\",\n advanced=True,\n show=True,\n ),\n MultilineInput(\n name=\"format_instructions\",\n display_name=\"Output Format Instructions\",\n info=\"Generic Template for structured output formatting. Valid only with Structured response.\",\n value=(\n \"You are an AI that extracts structured JSON objects from unstructured text. \"\n \"Use a predefined schema with expected types (str, int, float, bool, dict). \"\n \"Extract ALL relevant instances that match the schema - if multiple patterns exist, capture them all. \"\n \"Fill missing or ambiguous values with defaults: null for missing values. \"\n \"Remove exact duplicates but keep variations that have different field values. \"\n \"Always return valid JSON in the expected format, never throw errors. \"\n \"If multiple objects can be extracted, return them all in the structured format.\"\n ),\n advanced=True,\n ),\n TableInput(\n name=\"output_schema\",\n display_name=\"Output Schema\",\n info=(\n \"Schema Validation: Define the structure and data types for structured output. \"\n \"No validation if no output schema.\"\n ),\n advanced=True,\n required=False,\n value=[],\n table_schema=[\n {\n \"name\": \"name\",\n \"display_name\": \"Name\",\n \"type\": \"str\",\n \"description\": \"Specify the name of the output field.\",\n \"default\": \"field\",\n \"edit_mode\": EditMode.INLINE,\n },\n {\n \"name\": \"description\",\n \"display_name\": \"Description\",\n \"type\": \"str\",\n \"description\": \"Describe the purpose of the output field.\",\n \"default\": \"description of field\",\n \"edit_mode\": EditMode.POPOVER,\n },\n {\n \"name\": \"type\",\n \"display_name\": \"Type\",\n \"type\": \"str\",\n \"edit_mode\": EditMode.INLINE,\n \"description\": (\"Indicate the data type of the output field (e.g., str, int, float, bool, dict).\"),\n \"options\": [\"str\", \"int\", \"float\", \"bool\", \"dict\"],\n \"default\": \"str\",\n },\n {\n \"name\": \"multiple\",\n \"display_name\": \"As List\",\n \"type\": \"boolean\",\n \"description\": \"Set to True if this output field should be a list of the specified type.\",\n \"default\": \"False\",\n \"edit_mode\": EditMode.INLINE,\n },\n ],\n ),\n *LCToolsAgentComponent._base_inputs,\n # removed memory inputs from agent component\n # *memory_inputs,\n BoolInput(\n name=\"add_current_date_tool\",\n display_name=\"Current Date\",\n advanced=True,\n info=\"If true, will add a tool to the agent that returns the current date.\",\n value=True,\n ),\n ]\n outputs = [\n Output(name=\"response\", display_name=\"Response\", method=\"message_response\"),\n Output(name=\"structured_response\", display_name=\"Structured Response\", method=\"json_response\", tool_mode=False),\n ]\n\n async def get_agent_requirements(self):\n \"\"\"Get the agent requirements for the agent.\"\"\"\n llm_model, display_name = await self.get_llm()\n if llm_model is None:\n msg = \"No language model selected. Please choose a model to proceed.\"\n raise ValueError(msg)\n self.model_name = get_model_name(llm_model, display_name=display_name)\n\n # Get memory data\n self.chat_history = await self.get_memory_data()\n if isinstance(self.chat_history, Message):\n self.chat_history = [self.chat_history]\n\n # Add current date tool if enabled\n if self.add_current_date_tool:\n if not isinstance(self.tools, list): # type: ignore[has-type]\n self.tools = []\n current_date_tool = (await CurrentDateComponent(**self.get_base_args()).to_toolkit()).pop(0)\n if not isinstance(current_date_tool, StructuredTool):\n msg = \"CurrentDateComponent must be converted to a StructuredTool\"\n raise TypeError(msg)\n self.tools.append(current_date_tool)\n return llm_model, self.chat_history, self.tools\n\n async def message_response(self) -> Message:\n try:\n llm_model, self.chat_history, self.tools = await self.get_agent_requirements()\n # Set up and run agent\n self.set(\n llm=llm_model,\n tools=self.tools or [],\n chat_history=self.chat_history,\n input_value=self.input_value,\n system_prompt=self.system_prompt,\n )\n agent = self.create_agent_runnable()\n result = await self.run_agent(agent)\n\n # Store result for potential JSON output\n self._agent_result = result\n\n except (ValueError, TypeError, KeyError) as e:\n await logger.aerror(f\"{type(e).__name__}: {e!s}\")\n raise\n except ExceptionWithMessageError as e:\n await logger.aerror(f\"ExceptionWithMessageError occurred: {e}\")\n raise\n # Avoid catching blind Exception; let truly unexpected exceptions propagate\n except Exception as e:\n await logger.aerror(f\"Unexpected error: {e!s}\")\n raise\n else:\n return result\n\n def _preprocess_schema(self, schema):\n \"\"\"Preprocess schema to ensure correct data types for build_model_from_schema.\"\"\"\n processed_schema = []\n for field in schema:\n processed_field = {\n \"name\": str(field.get(\"name\", \"field\")),\n \"type\": str(field.get(\"type\", \"str\")),\n \"description\": str(field.get(\"description\", \"\")),\n \"multiple\": field.get(\"multiple\", False),\n }\n # Ensure multiple is handled correctly\n if isinstance(processed_field[\"multiple\"], str):\n processed_field[\"multiple\"] = processed_field[\"multiple\"].lower() in [\"true\", \"1\", \"t\", \"y\", \"yes\"]\n processed_schema.append(processed_field)\n return processed_schema\n\n async def build_structured_output_base(self, content: str):\n \"\"\"Build structured output with optional BaseModel validation.\"\"\"\n json_pattern = r\"\\{.*\\}\"\n schema_error_msg = \"Try setting an output schema\"\n\n # Try to parse content as JSON first\n json_data = None\n try:\n json_data = json.loads(content)\n except json.JSONDecodeError:\n json_match = re.search(json_pattern, content, re.DOTALL)\n if json_match:\n try:\n json_data = json.loads(json_match.group())\n except json.JSONDecodeError:\n return {\"content\": content, \"error\": schema_error_msg}\n else:\n return {\"content\": content, \"error\": schema_error_msg}\n\n # If no output schema provided, return parsed JSON without validation\n if not hasattr(self, \"output_schema\") or not self.output_schema or len(self.output_schema) == 0:\n return json_data\n\n # Use BaseModel validation with schema\n try:\n processed_schema = self._preprocess_schema(self.output_schema)\n output_model = build_model_from_schema(processed_schema)\n\n # Validate against the schema\n if isinstance(json_data, list):\n # Multiple objects\n validated_objects = []\n for item in json_data:\n try:\n validated_obj = output_model.model_validate(item)\n validated_objects.append(validated_obj.model_dump())\n except ValidationError as e:\n await logger.aerror(f\"Validation error for item: {e}\")\n # Include invalid items with error info\n validated_objects.append({\"data\": item, \"validation_error\": str(e)})\n return validated_objects\n\n # Single object\n try:\n validated_obj = output_model.model_validate(json_data)\n return [validated_obj.model_dump()] # Return as list for consistency\n except ValidationError as e:\n await logger.aerror(f\"Validation error: {e}\")\n return [{\"data\": json_data, \"validation_error\": str(e)}]\n\n except (TypeError, ValueError) as e:\n await logger.aerror(f\"Error building structured output: {e}\")\n # Fallback to parsed JSON without validation\n return json_data\n\n async def json_response(self) -> Data:\n \"\"\"Convert agent response to structured JSON Data output with schema validation.\"\"\"\n # Always use structured chat agent for JSON response mode for better JSON formatting\n try:\n system_components = []\n\n # 1. Agent Instructions (system_prompt)\n agent_instructions = getattr(self, \"system_prompt\", \"\") or \"\"\n if agent_instructions:\n system_components.append(f\"{agent_instructions}\")\n\n # 2. Format Instructions\n format_instructions = getattr(self, \"format_instructions\", \"\") or \"\"\n if format_instructions:\n system_components.append(f\"Format instructions: {format_instructions}\")\n\n # 3. Schema Information from BaseModel\n if hasattr(self, \"output_schema\") and self.output_schema and len(self.output_schema) > 0:\n try:\n processed_schema = self._preprocess_schema(self.output_schema)\n output_model = build_model_from_schema(processed_schema)\n schema_dict = output_model.model_json_schema()\n schema_info = (\n \"You are given some text that may include format instructions, \"\n \"explanations, or other content alongside a JSON schema.\\n\\n\"\n \"Your task:\\n\"\n \"- Extract only the JSON schema.\\n\"\n \"- Return it as valid JSON.\\n\"\n \"- Do not include format instructions, explanations, or extra text.\\n\\n\"\n \"Input:\\n\"\n f\"{json.dumps(schema_dict, indent=2)}\\n\\n\"\n \"Output (only JSON schema):\"\n )\n system_components.append(schema_info)\n except (ValidationError, ValueError, TypeError, KeyError) as e:\n await logger.aerror(f\"Could not build schema for prompt: {e}\", exc_info=True)\n\n # Combine all components\n combined_instructions = \"\\n\\n\".join(system_components) if system_components else \"\"\n llm_model, self.chat_history, self.tools = await self.get_agent_requirements()\n self.set(\n llm=llm_model,\n tools=self.tools or [],\n chat_history=self.chat_history,\n input_value=self.input_value,\n system_prompt=combined_instructions,\n )\n\n # Create and run structured chat agent\n try:\n structured_agent = self.create_agent_runnable()\n except (NotImplementedError, ValueError, TypeError) as e:\n await logger.aerror(f\"Error with structured chat agent: {e}\")\n raise\n try:\n result = await self.run_agent(structured_agent)\n except (ExceptionWithMessageError, ValueError, TypeError, RuntimeError) as e:\n await logger.aerror(f\"Error with structured agent result: {e}\")\n raise\n # Extract content from structured agent result\n if hasattr(result, \"content\"):\n content = result.content\n elif hasattr(result, \"text\"):\n content = result.text\n else:\n content = str(result)\n\n except (ExceptionWithMessageError, ValueError, TypeError, NotImplementedError, AttributeError) as e:\n await logger.aerror(f\"Error with structured chat agent: {e}\")\n # Fallback to regular agent\n content_str = \"No content returned from agent\"\n return Data(data={\"content\": content_str, \"error\": str(e)})\n\n # Process with structured output validation\n try:\n structured_output = await self.build_structured_output_base(content)\n\n # Handle different output formats\n if isinstance(structured_output, list) and structured_output:\n if len(structured_output) == 1:\n return Data(data=structured_output[0])\n return Data(data={\"results\": structured_output})\n if isinstance(structured_output, dict):\n return Data(data=structured_output)\n return Data(data={\"content\": content})\n\n except (ValueError, TypeError) as e:\n await logger.aerror(f\"Error in structured output processing: {e}\")\n return Data(data={\"content\": content, \"error\": str(e)})\n\n async def get_memory_data(self):\n # TODO: This is a temporary fix to avoid message duplication. We should develop a function for this.\n messages = (\n await MemoryComponent(**self.get_base_args())\n .set(session_id=self.graph.session_id, order=\"Ascending\", n_messages=self.n_messages)\n .retrieve_messages()\n )\n return [\n message for message in messages if getattr(message, \"id\", None) != getattr(self.input_value, \"id\", None)\n ]\n\n async def get_llm(self):\n if not isinstance(self.agent_llm, str):\n return self.agent_llm, None\n\n try:\n provider_info = MODEL_PROVIDERS_DICT.get(self.agent_llm)\n if not provider_info:\n msg = f\"Invalid model provider: {self.agent_llm}\"\n raise ValueError(msg)\n\n component_class = provider_info.get(\"component_class\")\n display_name = component_class.display_name\n inputs = provider_info.get(\"inputs\")\n prefix = provider_info.get(\"prefix\", \"\")\n\n return self._build_llm_model(component_class, inputs, prefix), display_name\n\n except (AttributeError, ValueError, TypeError, RuntimeError) as e:\n await logger.aerror(f\"Error building {self.agent_llm} language model: {e!s}\")\n msg = f\"Failed to initialize language model: {e!s}\"\n raise ValueError(msg) from e\n\n def _build_llm_model(self, component, inputs, prefix=\"\"):\n model_kwargs = {}\n for input_ in inputs:\n if hasattr(self, f\"{prefix}{input_.name}\"):\n model_kwargs[input_.name] = getattr(self, f\"{prefix}{input_.name}\")\n return component.set(**model_kwargs).build_model()\n\n def set_component_params(self, component):\n provider_info = MODEL_PROVIDERS_DICT.get(self.agent_llm)\n if provider_info:\n inputs = provider_info.get(\"inputs\")\n prefix = provider_info.get(\"prefix\")\n # Filter out json_mode and only use attributes that exist on this component\n model_kwargs = {}\n for input_ in inputs:\n if hasattr(self, f\"{prefix}{input_.name}\"):\n model_kwargs[input_.name] = getattr(self, f\"{prefix}{input_.name}\")\n\n return component.set(**model_kwargs)\n return component\n\n def delete_fields(self, build_config: dotdict, fields: dict | list[str]) -> None:\n \"\"\"Delete specified fields from build_config.\"\"\"\n for field in fields:\n build_config.pop(field, None)\n\n def update_input_types(self, build_config: dotdict) -> dotdict:\n \"\"\"Update input types for all fields in build_config.\"\"\"\n for key, value in build_config.items():\n if isinstance(value, dict):\n if value.get(\"input_types\") is None:\n build_config[key][\"input_types\"] = []\n elif hasattr(value, \"input_types\") and value.input_types is None:\n value.input_types = []\n return build_config\n\n async def update_build_config(\n self, build_config: dotdict, field_value: str, field_name: str | None = None\n ) -> dotdict:\n # Iterate over all providers in the MODEL_PROVIDERS_DICT\n # Existing logic for updating build_config\n if field_name in (\"agent_llm\",):\n build_config[\"agent_llm\"][\"value\"] = field_value\n provider_info = MODEL_PROVIDERS_DICT.get(field_value)\n if provider_info:\n component_class = provider_info.get(\"component_class\")\n if component_class and hasattr(component_class, \"update_build_config\"):\n # Call the component class's update_build_config method\n build_config = await update_component_build_config(\n component_class, build_config, field_value, \"model_name\"\n )\n\n provider_configs: dict[str, tuple[dict, list[dict]]] = {\n provider: (\n MODEL_PROVIDERS_DICT[provider][\"fields\"],\n [\n MODEL_PROVIDERS_DICT[other_provider][\"fields\"]\n for other_provider in MODEL_PROVIDERS_DICT\n if other_provider != provider\n ],\n )\n for provider in MODEL_PROVIDERS_DICT\n }\n if field_value in provider_configs:\n fields_to_add, fields_to_delete = provider_configs[field_value]\n\n # Delete fields from other providers\n for fields in fields_to_delete:\n self.delete_fields(build_config, fields)\n\n # Add provider-specific fields\n if field_value == \"OpenAI\" and not any(field in build_config for field in fields_to_add):\n build_config.update(fields_to_add)\n else:\n build_config.update(fields_to_add)\n # Reset input types for agent_llm\n build_config[\"agent_llm\"][\"input_types\"] = []\n elif field_value == \"Custom\":\n # Delete all provider fields\n self.delete_fields(build_config, ALL_PROVIDER_FIELDS)\n # Update with custom component\n custom_component = DropdownInput(\n name=\"agent_llm\",\n display_name=\"Language Model\",\n options=[*sorted(MODEL_PROVIDERS), \"Custom\"],\n value=\"Custom\",\n real_time_refresh=True,\n input_types=[\"LanguageModel\"],\n options_metadata=[MODELS_METADATA[key] for key in sorted(MODELS_METADATA.keys())]\n + [{\"icon\": \"brain\"}],\n )\n build_config.update({\"agent_llm\": custom_component.to_dict()})\n # Update input types for all fields\n build_config = self.update_input_types(build_config)\n\n # Validate required keys\n default_keys = [\n \"code\",\n \"_type\",\n \"agent_llm\",\n \"tools\",\n \"input_value\",\n \"add_current_date_tool\",\n \"system_prompt\",\n \"agent_description\",\n \"max_iterations\",\n \"handle_parsing_errors\",\n \"verbose\",\n ]\n missing_keys = [key for key in default_keys if key not in build_config]\n if missing_keys:\n msg = f\"Missing required keys in build_config: {missing_keys}\"\n raise ValueError(msg)\n if (\n isinstance(self.agent_llm, str)\n and self.agent_llm in MODEL_PROVIDERS_DICT\n and field_name in MODEL_DYNAMIC_UPDATE_FIELDS\n ):\n provider_info = MODEL_PROVIDERS_DICT.get(self.agent_llm)\n if provider_info:\n component_class = provider_info.get(\"component_class\")\n component_class = self.set_component_params(component_class)\n prefix = provider_info.get(\"prefix\")\n if component_class and hasattr(component_class, \"update_build_config\"):\n # Call each component class's update_build_config method\n # remove the prefix from the field_name\n if isinstance(field_name, str) and isinstance(prefix, str):\n field_name = field_name.replace(prefix, \"\")\n build_config = await update_component_build_config(\n component_class, build_config, field_value, \"model_name\"\n )\n return dotdict({k: v.to_dict() if hasattr(v, \"to_dict\") else v for k, v in build_config.items()})\n\n async def _get_tools(self) -> list[Tool]:\n component_toolkit = _get_component_toolkit()\n tools_names = self._build_tools_names()\n agent_description = self.get_tool_description()\n # TODO: Agent Description Depreciated Feature to be removed\n description = f\"{agent_description}{tools_names}\"\n tools = component_toolkit(component=self).get_tools(\n tool_name=\"Call_Agent\", tool_description=description, callbacks=self.get_langchain_callbacks()\n )\n if hasattr(self, \"tools_metadata\"):\n tools = component_toolkit(component=self, metadata=self.tools_metadata).update_tools_metadata(tools=tools)\n return tools\n"
+ "value": "import json\nimport re\n\nfrom langchain_core.tools import StructuredTool\nfrom pydantic import ValidationError\n\nfrom langflow.base.agents.agent import LCToolsAgentComponent\nfrom langflow.base.agents.events import ExceptionWithMessageError\nfrom langflow.base.models.model_input_constants import (\n ALL_PROVIDER_FIELDS,\n MODEL_DYNAMIC_UPDATE_FIELDS,\n MODEL_PROVIDERS,\n MODEL_PROVIDERS_DICT,\n MODELS_METADATA,\n)\nfrom langflow.base.models.model_utils import get_model_name\nfrom langflow.components.helpers.current_date import CurrentDateComponent\nfrom langflow.components.helpers.memory import MemoryComponent\nfrom langflow.components.langchain_utilities.tool_calling import ToolCallingAgentComponent\nfrom langflow.custom.custom_component.component import _get_component_toolkit\nfrom langflow.custom.utils import update_component_build_config\nfrom langflow.field_typing import Tool\nfrom langflow.helpers.base_model import build_model_from_schema\nfrom langflow.io import BoolInput, DropdownInput, IntInput, MultilineInput, Output, TableInput\nfrom langflow.logging import logger\nfrom langflow.schema.data import Data\nfrom langflow.schema.dotdict import dotdict\nfrom langflow.schema.message import Message\nfrom langflow.schema.table import EditMode\n\n\ndef set_advanced_true(component_input):\n component_input.advanced = True\n return component_input\n\n\nMODEL_PROVIDERS_LIST = [\"Anthropic\", \"Google Generative AI\", \"Groq\", \"OpenAI\"]\n\n\nclass AgentComponent(ToolCallingAgentComponent):\n display_name: str = \"Agent\"\n description: str = \"Define the agent's instructions, then enter a task to complete using tools.\"\n documentation: str = \"https://docs.langflow.org/agents\"\n icon = \"bot\"\n beta = False\n name = \"Agent\"\n\n memory_inputs = [set_advanced_true(component_input) for component_input in MemoryComponent().inputs]\n\n # Filter out json_mode from OpenAI inputs since we handle structured output differently\n openai_inputs_filtered = [\n input_field\n for input_field in MODEL_PROVIDERS_DICT[\"OpenAI\"][\"inputs\"]\n if not (hasattr(input_field, \"name\") and input_field.name == \"json_mode\")\n ]\n\n inputs = [\n DropdownInput(\n name=\"agent_llm\",\n display_name=\"Model Provider\",\n info=\"The provider of the language model that the agent will use to generate responses.\",\n options=[*MODEL_PROVIDERS_LIST, \"Custom\"],\n value=\"OpenAI\",\n real_time_refresh=True,\n input_types=[],\n options_metadata=[MODELS_METADATA[key] for key in MODEL_PROVIDERS_LIST] + [{\"icon\": \"brain\"}],\n ),\n *openai_inputs_filtered,\n MultilineInput(\n name=\"system_prompt\",\n display_name=\"Agent Instructions\",\n info=\"System Prompt: Initial instructions and context provided to guide the agent's behavior.\",\n value=\"You are a helpful assistant that can use tools to answer questions and perform tasks.\",\n advanced=False,\n ),\n IntInput(\n name=\"n_messages\",\n display_name=\"Number of Chat History Messages\",\n value=100,\n info=\"Number of chat history messages to retrieve.\",\n advanced=True,\n show=True,\n ),\n MultilineInput(\n name=\"format_instructions\",\n display_name=\"Output Format Instructions\",\n info=\"Generic Template for structured output formatting. Valid only with Structured response.\",\n value=(\n \"You are an AI that extracts structured JSON objects from unstructured text. \"\n \"Use a predefined schema with expected types (str, int, float, bool, dict). \"\n \"Extract ALL relevant instances that match the schema - if multiple patterns exist, capture them all. \"\n \"Fill missing or ambiguous values with defaults: null for missing values. \"\n \"Remove exact duplicates but keep variations that have different field values. \"\n \"Always return valid JSON in the expected format, never throw errors. \"\n \"If multiple objects can be extracted, return them all in the structured format.\"\n ),\n advanced=True,\n ),\n TableInput(\n name=\"output_schema\",\n display_name=\"Output Schema\",\n info=(\n \"Schema Validation: Define the structure and data types for structured output. \"\n \"No validation if no output schema.\"\n ),\n advanced=True,\n required=False,\n value=[],\n table_schema=[\n {\n \"name\": \"name\",\n \"display_name\": \"Name\",\n \"type\": \"str\",\n \"description\": \"Specify the name of the output field.\",\n \"default\": \"field\",\n \"edit_mode\": EditMode.INLINE,\n },\n {\n \"name\": \"description\",\n \"display_name\": \"Description\",\n \"type\": \"str\",\n \"description\": \"Describe the purpose of the output field.\",\n \"default\": \"description of field\",\n \"edit_mode\": EditMode.POPOVER,\n },\n {\n \"name\": \"type\",\n \"display_name\": \"Type\",\n \"type\": \"str\",\n \"edit_mode\": EditMode.INLINE,\n \"description\": (\"Indicate the data type of the output field (e.g., str, int, float, bool, dict).\"),\n \"options\": [\"str\", \"int\", \"float\", \"bool\", \"dict\"],\n \"default\": \"str\",\n },\n {\n \"name\": \"multiple\",\n \"display_name\": \"As List\",\n \"type\": \"boolean\",\n \"description\": \"Set to True if this output field should be a list of the specified type.\",\n \"default\": \"False\",\n \"edit_mode\": EditMode.INLINE,\n },\n ],\n ),\n *LCToolsAgentComponent._base_inputs,\n # removed memory inputs from agent component\n # *memory_inputs,\n BoolInput(\n name=\"add_current_date_tool\",\n display_name=\"Current Date\",\n advanced=True,\n info=\"If true, will add a tool to the agent that returns the current date.\",\n value=True,\n ),\n ]\n outputs = [\n Output(name=\"response\", display_name=\"Response\", method=\"message_response\"),\n Output(name=\"structured_response\", display_name=\"Structured Response\", method=\"json_response\", tool_mode=False),\n ]\n\n async def get_agent_requirements(self):\n \"\"\"Get the agent requirements for the agent.\"\"\"\n llm_model, display_name = await self.get_llm()\n if llm_model is None:\n msg = \"No language model selected. Please choose a model to proceed.\"\n raise ValueError(msg)\n self.model_name = get_model_name(llm_model, display_name=display_name)\n\n # Get memory data\n self.chat_history = await self.get_memory_data()\n if isinstance(self.chat_history, Message):\n self.chat_history = [self.chat_history]\n\n # Add current date tool if enabled\n if self.add_current_date_tool:\n if not isinstance(self.tools, list): # type: ignore[has-type]\n self.tools = []\n current_date_tool = (await CurrentDateComponent(**self.get_base_args()).to_toolkit()).pop(0)\n if not isinstance(current_date_tool, StructuredTool):\n msg = \"CurrentDateComponent must be converted to a StructuredTool\"\n raise TypeError(msg)\n self.tools.append(current_date_tool)\n return llm_model, self.chat_history, self.tools\n\n async def message_response(self) -> Message:\n try:\n llm_model, self.chat_history, self.tools = await self.get_agent_requirements()\n # Set up and run agent\n self.set(\n llm=llm_model,\n tools=self.tools or [],\n chat_history=self.chat_history,\n input_value=self.input_value,\n system_prompt=self.system_prompt,\n )\n agent = self.create_agent_runnable()\n result = await self.run_agent(agent)\n\n # Store result for potential JSON output\n self._agent_result = result\n\n except (ValueError, TypeError, KeyError) as e:\n await logger.aerror(f\"{type(e).__name__}: {e!s}\")\n raise\n except ExceptionWithMessageError as e:\n await logger.aerror(f\"ExceptionWithMessageError occurred: {e}\")\n raise\n # Avoid catching blind Exception; let truly unexpected exceptions propagate\n except Exception as e:\n await logger.aerror(f\"Unexpected error: {e!s}\")\n raise\n else:\n return result\n\n def _preprocess_schema(self, schema):\n \"\"\"Preprocess schema to ensure correct data types for build_model_from_schema.\"\"\"\n processed_schema = []\n for field in schema:\n processed_field = {\n \"name\": str(field.get(\"name\", \"field\")),\n \"type\": str(field.get(\"type\", \"str\")),\n \"description\": str(field.get(\"description\", \"\")),\n \"multiple\": field.get(\"multiple\", False),\n }\n # Ensure multiple is handled correctly\n if isinstance(processed_field[\"multiple\"], str):\n processed_field[\"multiple\"] = processed_field[\"multiple\"].lower() in [\"true\", \"1\", \"t\", \"y\", \"yes\"]\n processed_schema.append(processed_field)\n return processed_schema\n\n async def build_structured_output_base(self, content: str):\n \"\"\"Build structured output with optional BaseModel validation.\"\"\"\n json_pattern = r\"\\{.*\\}\"\n schema_error_msg = \"Try setting an output schema\"\n\n # Try to parse content as JSON first\n json_data = None\n try:\n json_data = json.loads(content)\n except json.JSONDecodeError:\n json_match = re.search(json_pattern, content, re.DOTALL)\n if json_match:\n try:\n json_data = json.loads(json_match.group())\n except json.JSONDecodeError:\n return {\"content\": content, \"error\": schema_error_msg}\n else:\n return {\"content\": content, \"error\": schema_error_msg}\n\n # If no output schema provided, return parsed JSON without validation\n if not hasattr(self, \"output_schema\") or not self.output_schema or len(self.output_schema) == 0:\n return json_data\n\n # Use BaseModel validation with schema\n try:\n processed_schema = self._preprocess_schema(self.output_schema)\n output_model = build_model_from_schema(processed_schema)\n\n # Validate against the schema\n if isinstance(json_data, list):\n # Multiple objects\n validated_objects = []\n for item in json_data:\n try:\n validated_obj = output_model.model_validate(item)\n validated_objects.append(validated_obj.model_dump())\n except ValidationError as e:\n await logger.aerror(f\"Validation error for item: {e}\")\n # Include invalid items with error info\n validated_objects.append({\"data\": item, \"validation_error\": str(e)})\n return validated_objects\n\n # Single object\n try:\n validated_obj = output_model.model_validate(json_data)\n return [validated_obj.model_dump()] # Return as list for consistency\n except ValidationError as e:\n await logger.aerror(f\"Validation error: {e}\")\n return [{\"data\": json_data, \"validation_error\": str(e)}]\n\n except (TypeError, ValueError) as e:\n await logger.aerror(f\"Error building structured output: {e}\")\n # Fallback to parsed JSON without validation\n return json_data\n\n async def json_response(self) -> Data:\n \"\"\"Convert agent response to structured JSON Data output with schema validation.\"\"\"\n # Always use structured chat agent for JSON response mode for better JSON formatting\n try:\n system_components = []\n\n # 1. Agent Instructions (system_prompt)\n agent_instructions = getattr(self, \"system_prompt\", \"\") or \"\"\n if agent_instructions:\n system_components.append(f\"{agent_instructions}\")\n\n # 2. Format Instructions\n format_instructions = getattr(self, \"format_instructions\", \"\") or \"\"\n if format_instructions:\n system_components.append(f\"Format instructions: {format_instructions}\")\n\n # 3. Schema Information from BaseModel\n if hasattr(self, \"output_schema\") and self.output_schema and len(self.output_schema) > 0:\n try:\n processed_schema = self._preprocess_schema(self.output_schema)\n output_model = build_model_from_schema(processed_schema)\n schema_dict = output_model.model_json_schema()\n schema_info = (\n \"You are given some text that may include format instructions, \"\n \"explanations, or other content alongside a JSON schema.\\n\\n\"\n \"Your task:\\n\"\n \"- Extract only the JSON schema.\\n\"\n \"- Return it as valid JSON.\\n\"\n \"- Do not include format instructions, explanations, or extra text.\\n\\n\"\n \"Input:\\n\"\n f\"{json.dumps(schema_dict, indent=2)}\\n\\n\"\n \"Output (only JSON schema):\"\n )\n system_components.append(schema_info)\n except (ValidationError, ValueError, TypeError, KeyError) as e:\n await logger.aerror(f\"Could not build schema for prompt: {e}\", exc_info=True)\n\n # Combine all components\n combined_instructions = \"\\n\\n\".join(system_components) if system_components else \"\"\n llm_model, self.chat_history, self.tools = await self.get_agent_requirements()\n self.set(\n llm=llm_model,\n tools=self.tools or [],\n chat_history=self.chat_history,\n input_value=self.input_value,\n system_prompt=combined_instructions,\n )\n\n # Create and run structured chat agent\n try:\n structured_agent = self.create_agent_runnable()\n except (NotImplementedError, ValueError, TypeError) as e:\n await logger.aerror(f\"Error with structured chat agent: {e}\")\n raise\n try:\n result = await self.run_agent(structured_agent)\n except (ExceptionWithMessageError, ValueError, TypeError, RuntimeError) as e:\n await logger.aerror(f\"Error with structured agent result: {e}\")\n raise\n # Extract content from structured agent result\n if hasattr(result, \"content\"):\n content = result.content\n elif hasattr(result, \"text\"):\n content = result.text\n else:\n content = str(result)\n\n except (ExceptionWithMessageError, ValueError, TypeError, NotImplementedError, AttributeError) as e:\n await logger.aerror(f\"Error with structured chat agent: {e}\")\n # Fallback to regular agent\n content_str = \"No content returned from agent\"\n return Data(data={\"content\": content_str, \"error\": str(e)})\n\n # Process with structured output validation\n try:\n structured_output = await self.build_structured_output_base(content)\n\n # Handle different output formats\n if isinstance(structured_output, list) and structured_output:\n if len(structured_output) == 1:\n return Data(data=structured_output[0])\n return Data(data={\"results\": structured_output})\n if isinstance(structured_output, dict):\n return Data(data=structured_output)\n return Data(data={\"content\": content})\n\n except (ValueError, TypeError) as e:\n await logger.aerror(f\"Error in structured output processing: {e}\")\n return Data(data={\"content\": content, \"error\": str(e)})\n\n async def get_memory_data(self):\n # TODO: This is a temporary fix to avoid message duplication. We should develop a function for this.\n messages = (\n await MemoryComponent(**self.get_base_args())\n .set(session_id=self.graph.session_id, order=\"Ascending\", n_messages=self.n_messages)\n .retrieve_messages()\n )\n return [\n message for message in messages if getattr(message, \"id\", None) != getattr(self.input_value, \"id\", None)\n ]\n\n async def get_llm(self):\n if not isinstance(self.agent_llm, str):\n return self.agent_llm, None\n\n try:\n provider_info = MODEL_PROVIDERS_DICT.get(self.agent_llm)\n if not provider_info:\n msg = f\"Invalid model provider: {self.agent_llm}\"\n raise ValueError(msg)\n\n component_class = provider_info.get(\"component_class\")\n display_name = component_class.display_name\n inputs = provider_info.get(\"inputs\")\n prefix = provider_info.get(\"prefix\", \"\")\n\n return self._build_llm_model(component_class, inputs, prefix), display_name\n\n except (AttributeError, ValueError, TypeError, RuntimeError) as e:\n await logger.aerror(f\"Error building {self.agent_llm} language model: {e!s}\")\n msg = f\"Failed to initialize language model: {e!s}\"\n raise ValueError(msg) from e\n\n def _build_llm_model(self, component, inputs, prefix=\"\"):\n model_kwargs = {}\n for input_ in inputs:\n if hasattr(self, f\"{prefix}{input_.name}\"):\n model_kwargs[input_.name] = getattr(self, f\"{prefix}{input_.name}\")\n return component.set(**model_kwargs).build_model()\n\n def set_component_params(self, component):\n provider_info = MODEL_PROVIDERS_DICT.get(self.agent_llm)\n if provider_info:\n inputs = provider_info.get(\"inputs\")\n prefix = provider_info.get(\"prefix\")\n # Filter out json_mode and only use attributes that exist on this component\n model_kwargs = {}\n for input_ in inputs:\n if hasattr(self, f\"{prefix}{input_.name}\"):\n model_kwargs[input_.name] = getattr(self, f\"{prefix}{input_.name}\")\n\n return component.set(**model_kwargs)\n return component\n\n def delete_fields(self, build_config: dotdict, fields: dict | list[str]) -> None:\n \"\"\"Delete specified fields from build_config.\"\"\"\n for field in fields:\n build_config.pop(field, None)\n\n def update_input_types(self, build_config: dotdict) -> dotdict:\n \"\"\"Update input types for all fields in build_config.\"\"\"\n for key, value in build_config.items():\n if isinstance(value, dict):\n if value.get(\"input_types\") is None:\n build_config[key][\"input_types\"] = []\n elif hasattr(value, \"input_types\") and value.input_types is None:\n value.input_types = []\n return build_config\n\n async def update_build_config(\n self, build_config: dotdict, field_value: str, field_name: str | None = None\n ) -> dotdict:\n # Iterate over all providers in the MODEL_PROVIDERS_DICT\n # Existing logic for updating build_config\n if field_name in (\"agent_llm\",):\n build_config[\"agent_llm\"][\"value\"] = field_value\n provider_info = MODEL_PROVIDERS_DICT.get(field_value)\n if provider_info:\n component_class = provider_info.get(\"component_class\")\n if component_class and hasattr(component_class, \"update_build_config\"):\n # Call the component class's update_build_config method\n build_config = await update_component_build_config(\n component_class, build_config, field_value, \"model_name\"\n )\n\n provider_configs: dict[str, tuple[dict, list[dict]]] = {\n provider: (\n MODEL_PROVIDERS_DICT[provider][\"fields\"],\n [\n MODEL_PROVIDERS_DICT[other_provider][\"fields\"]\n for other_provider in MODEL_PROVIDERS_DICT\n if other_provider != provider\n ],\n )\n for provider in MODEL_PROVIDERS_DICT\n }\n if field_value in provider_configs:\n fields_to_add, fields_to_delete = provider_configs[field_value]\n\n # Delete fields from other providers\n for fields in fields_to_delete:\n self.delete_fields(build_config, fields)\n\n # Add provider-specific fields\n if field_value == \"OpenAI\" and not any(field in build_config for field in fields_to_add):\n build_config.update(fields_to_add)\n else:\n build_config.update(fields_to_add)\n # Reset input types for agent_llm\n build_config[\"agent_llm\"][\"input_types\"] = []\n build_config[\"agent_llm\"][\"display_name\"] = \"Model Provider\"\n elif field_value == \"Custom\":\n # Delete all provider fields\n self.delete_fields(build_config, ALL_PROVIDER_FIELDS)\n # Update with custom component\n custom_component = DropdownInput(\n name=\"agent_llm\",\n display_name=\"Language Model\",\n options=[*sorted(MODEL_PROVIDERS), \"Custom\"],\n value=\"Custom\",\n real_time_refresh=True,\n input_types=[\"LanguageModel\"],\n options_metadata=[MODELS_METADATA[key] for key in sorted(MODELS_METADATA.keys())]\n + [{\"icon\": \"brain\"}],\n )\n build_config.update({\"agent_llm\": custom_component.to_dict()})\n # Update input types for all fields\n build_config = self.update_input_types(build_config)\n\n # Validate required keys\n default_keys = [\n \"code\",\n \"_type\",\n \"agent_llm\",\n \"tools\",\n \"input_value\",\n \"add_current_date_tool\",\n \"system_prompt\",\n \"agent_description\",\n \"max_iterations\",\n \"handle_parsing_errors\",\n \"verbose\",\n ]\n missing_keys = [key for key in default_keys if key not in build_config]\n if missing_keys:\n msg = f\"Missing required keys in build_config: {missing_keys}\"\n raise ValueError(msg)\n if (\n isinstance(self.agent_llm, str)\n and self.agent_llm in MODEL_PROVIDERS_DICT\n and field_name in MODEL_DYNAMIC_UPDATE_FIELDS\n ):\n provider_info = MODEL_PROVIDERS_DICT.get(self.agent_llm)\n if provider_info:\n component_class = provider_info.get(\"component_class\")\n component_class = self.set_component_params(component_class)\n prefix = provider_info.get(\"prefix\")\n if component_class and hasattr(component_class, \"update_build_config\"):\n # Call each component class's update_build_config method\n # remove the prefix from the field_name\n if isinstance(field_name, str) and isinstance(prefix, str):\n field_name = field_name.replace(prefix, \"\")\n build_config = await update_component_build_config(\n component_class, build_config, field_value, \"model_name\"\n )\n return dotdict({k: v.to_dict() if hasattr(v, \"to_dict\") else v for k, v in build_config.items()})\n\n async def _get_tools(self) -> list[Tool]:\n component_toolkit = _get_component_toolkit()\n tools_names = self._build_tools_names()\n agent_description = self.get_tool_description()\n # TODO: Agent Description Depreciated Feature to be removed\n description = f\"{agent_description}{tools_names}\"\n tools = component_toolkit(component=self).get_tools(\n tool_name=\"Call_Agent\", tool_description=description, callbacks=self.get_langchain_callbacks()\n )\n if hasattr(self, \"tools_metadata\"):\n tools = component_toolkit(component=self, metadata=self.tools_metadata).update_tools_metadata(tools=tools)\n return tools\n"
},
"handle_parsing_errors": {
"_input_type": "BoolInput",
diff --git a/src/backend/base/langflow/initial_setup/starter_projects/Nvidia Remix.json b/src/backend/base/langflow/initial_setup/starter_projects/Nvidia Remix.json
index 777a95b46..f67e2b807 100644
--- a/src/backend/base/langflow/initial_setup/starter_projects/Nvidia Remix.json
+++ b/src/backend/base/langflow/initial_setup/starter_projects/Nvidia Remix.json
@@ -1059,7 +1059,7 @@
"show": true,
"title_case": false,
"type": "code",
- "value": "import json\nimport re\n\nfrom langchain_core.tools import StructuredTool\nfrom pydantic import ValidationError\n\nfrom langflow.base.agents.agent import LCToolsAgentComponent\nfrom langflow.base.agents.events import ExceptionWithMessageError\nfrom langflow.base.models.model_input_constants import (\n ALL_PROVIDER_FIELDS,\n MODEL_DYNAMIC_UPDATE_FIELDS,\n MODEL_PROVIDERS,\n MODEL_PROVIDERS_DICT,\n MODELS_METADATA,\n)\nfrom langflow.base.models.model_utils import get_model_name\nfrom langflow.components.helpers.current_date import CurrentDateComponent\nfrom langflow.components.helpers.memory import MemoryComponent\nfrom langflow.components.langchain_utilities.tool_calling import ToolCallingAgentComponent\nfrom langflow.custom.custom_component.component import _get_component_toolkit\nfrom langflow.custom.utils import update_component_build_config\nfrom langflow.field_typing import Tool\nfrom langflow.helpers.base_model import build_model_from_schema\nfrom langflow.io import BoolInput, DropdownInput, IntInput, MultilineInput, Output, TableInput\nfrom langflow.logging import logger\nfrom langflow.schema.data import Data\nfrom langflow.schema.dotdict import dotdict\nfrom langflow.schema.message import Message\nfrom langflow.schema.table import EditMode\n\n\ndef set_advanced_true(component_input):\n component_input.advanced = True\n return component_input\n\n\nMODEL_PROVIDERS_LIST = [\"Anthropic\", \"Google Generative AI\", \"Groq\", \"OpenAI\"]\n\n\nclass AgentComponent(ToolCallingAgentComponent):\n display_name: str = \"Agent\"\n description: str = \"Define the agent's instructions, then enter a task to complete using tools.\"\n documentation: str = \"https://docs.langflow.org/agents\"\n icon = \"bot\"\n beta = False\n name = \"Agent\"\n\n memory_inputs = [set_advanced_true(component_input) for component_input in MemoryComponent().inputs]\n\n # Filter out json_mode from OpenAI inputs since we handle structured output differently\n openai_inputs_filtered = [\n input_field\n for input_field in MODEL_PROVIDERS_DICT[\"OpenAI\"][\"inputs\"]\n if not (hasattr(input_field, \"name\") and input_field.name == \"json_mode\")\n ]\n\n inputs = [\n DropdownInput(\n name=\"agent_llm\",\n display_name=\"Model Provider\",\n info=\"The provider of the language model that the agent will use to generate responses.\",\n options=[*MODEL_PROVIDERS_LIST, \"Custom\"],\n value=\"OpenAI\",\n real_time_refresh=True,\n input_types=[],\n options_metadata=[MODELS_METADATA[key] for key in MODEL_PROVIDERS_LIST] + [{\"icon\": \"brain\"}],\n ),\n *openai_inputs_filtered,\n MultilineInput(\n name=\"system_prompt\",\n display_name=\"Agent Instructions\",\n info=\"System Prompt: Initial instructions and context provided to guide the agent's behavior.\",\n value=\"You are a helpful assistant that can use tools to answer questions and perform tasks.\",\n advanced=False,\n ),\n IntInput(\n name=\"n_messages\",\n display_name=\"Number of Chat History Messages\",\n value=100,\n info=\"Number of chat history messages to retrieve.\",\n advanced=True,\n show=True,\n ),\n MultilineInput(\n name=\"format_instructions\",\n display_name=\"Output Format Instructions\",\n info=\"Generic Template for structured output formatting. Valid only with Structured response.\",\n value=(\n \"You are an AI that extracts structured JSON objects from unstructured text. \"\n \"Use a predefined schema with expected types (str, int, float, bool, dict). \"\n \"Extract ALL relevant instances that match the schema - if multiple patterns exist, capture them all. \"\n \"Fill missing or ambiguous values with defaults: null for missing values. \"\n \"Remove exact duplicates but keep variations that have different field values. \"\n \"Always return valid JSON in the expected format, never throw errors. \"\n \"If multiple objects can be extracted, return them all in the structured format.\"\n ),\n advanced=True,\n ),\n TableInput(\n name=\"output_schema\",\n display_name=\"Output Schema\",\n info=(\n \"Schema Validation: Define the structure and data types for structured output. \"\n \"No validation if no output schema.\"\n ),\n advanced=True,\n required=False,\n value=[],\n table_schema=[\n {\n \"name\": \"name\",\n \"display_name\": \"Name\",\n \"type\": \"str\",\n \"description\": \"Specify the name of the output field.\",\n \"default\": \"field\",\n \"edit_mode\": EditMode.INLINE,\n },\n {\n \"name\": \"description\",\n \"display_name\": \"Description\",\n \"type\": \"str\",\n \"description\": \"Describe the purpose of the output field.\",\n \"default\": \"description of field\",\n \"edit_mode\": EditMode.POPOVER,\n },\n {\n \"name\": \"type\",\n \"display_name\": \"Type\",\n \"type\": \"str\",\n \"edit_mode\": EditMode.INLINE,\n \"description\": (\"Indicate the data type of the output field (e.g., str, int, float, bool, dict).\"),\n \"options\": [\"str\", \"int\", \"float\", \"bool\", \"dict\"],\n \"default\": \"str\",\n },\n {\n \"name\": \"multiple\",\n \"display_name\": \"As List\",\n \"type\": \"boolean\",\n \"description\": \"Set to True if this output field should be a list of the specified type.\",\n \"default\": \"False\",\n \"edit_mode\": EditMode.INLINE,\n },\n ],\n ),\n *LCToolsAgentComponent._base_inputs,\n # removed memory inputs from agent component\n # *memory_inputs,\n BoolInput(\n name=\"add_current_date_tool\",\n display_name=\"Current Date\",\n advanced=True,\n info=\"If true, will add a tool to the agent that returns the current date.\",\n value=True,\n ),\n ]\n outputs = [\n Output(name=\"response\", display_name=\"Response\", method=\"message_response\"),\n Output(name=\"structured_response\", display_name=\"Structured Response\", method=\"json_response\", tool_mode=False),\n ]\n\n async def get_agent_requirements(self):\n \"\"\"Get the agent requirements for the agent.\"\"\"\n llm_model, display_name = await self.get_llm()\n if llm_model is None:\n msg = \"No language model selected. Please choose a model to proceed.\"\n raise ValueError(msg)\n self.model_name = get_model_name(llm_model, display_name=display_name)\n\n # Get memory data\n self.chat_history = await self.get_memory_data()\n if isinstance(self.chat_history, Message):\n self.chat_history = [self.chat_history]\n\n # Add current date tool if enabled\n if self.add_current_date_tool:\n if not isinstance(self.tools, list): # type: ignore[has-type]\n self.tools = []\n current_date_tool = (await CurrentDateComponent(**self.get_base_args()).to_toolkit()).pop(0)\n if not isinstance(current_date_tool, StructuredTool):\n msg = \"CurrentDateComponent must be converted to a StructuredTool\"\n raise TypeError(msg)\n self.tools.append(current_date_tool)\n return llm_model, self.chat_history, self.tools\n\n async def message_response(self) -> Message:\n try:\n llm_model, self.chat_history, self.tools = await self.get_agent_requirements()\n # Set up and run agent\n self.set(\n llm=llm_model,\n tools=self.tools or [],\n chat_history=self.chat_history,\n input_value=self.input_value,\n system_prompt=self.system_prompt,\n )\n agent = self.create_agent_runnable()\n result = await self.run_agent(agent)\n\n # Store result for potential JSON output\n self._agent_result = result\n\n except (ValueError, TypeError, KeyError) as e:\n await logger.aerror(f\"{type(e).__name__}: {e!s}\")\n raise\n except ExceptionWithMessageError as e:\n await logger.aerror(f\"ExceptionWithMessageError occurred: {e}\")\n raise\n # Avoid catching blind Exception; let truly unexpected exceptions propagate\n except Exception as e:\n await logger.aerror(f\"Unexpected error: {e!s}\")\n raise\n else:\n return result\n\n def _preprocess_schema(self, schema):\n \"\"\"Preprocess schema to ensure correct data types for build_model_from_schema.\"\"\"\n processed_schema = []\n for field in schema:\n processed_field = {\n \"name\": str(field.get(\"name\", \"field\")),\n \"type\": str(field.get(\"type\", \"str\")),\n \"description\": str(field.get(\"description\", \"\")),\n \"multiple\": field.get(\"multiple\", False),\n }\n # Ensure multiple is handled correctly\n if isinstance(processed_field[\"multiple\"], str):\n processed_field[\"multiple\"] = processed_field[\"multiple\"].lower() in [\"true\", \"1\", \"t\", \"y\", \"yes\"]\n processed_schema.append(processed_field)\n return processed_schema\n\n async def build_structured_output_base(self, content: str):\n \"\"\"Build structured output with optional BaseModel validation.\"\"\"\n json_pattern = r\"\\{.*\\}\"\n schema_error_msg = \"Try setting an output schema\"\n\n # Try to parse content as JSON first\n json_data = None\n try:\n json_data = json.loads(content)\n except json.JSONDecodeError:\n json_match = re.search(json_pattern, content, re.DOTALL)\n if json_match:\n try:\n json_data = json.loads(json_match.group())\n except json.JSONDecodeError:\n return {\"content\": content, \"error\": schema_error_msg}\n else:\n return {\"content\": content, \"error\": schema_error_msg}\n\n # If no output schema provided, return parsed JSON without validation\n if not hasattr(self, \"output_schema\") or not self.output_schema or len(self.output_schema) == 0:\n return json_data\n\n # Use BaseModel validation with schema\n try:\n processed_schema = self._preprocess_schema(self.output_schema)\n output_model = build_model_from_schema(processed_schema)\n\n # Validate against the schema\n if isinstance(json_data, list):\n # Multiple objects\n validated_objects = []\n for item in json_data:\n try:\n validated_obj = output_model.model_validate(item)\n validated_objects.append(validated_obj.model_dump())\n except ValidationError as e:\n await logger.aerror(f\"Validation error for item: {e}\")\n # Include invalid items with error info\n validated_objects.append({\"data\": item, \"validation_error\": str(e)})\n return validated_objects\n\n # Single object\n try:\n validated_obj = output_model.model_validate(json_data)\n return [validated_obj.model_dump()] # Return as list for consistency\n except ValidationError as e:\n await logger.aerror(f\"Validation error: {e}\")\n return [{\"data\": json_data, \"validation_error\": str(e)}]\n\n except (TypeError, ValueError) as e:\n await logger.aerror(f\"Error building structured output: {e}\")\n # Fallback to parsed JSON without validation\n return json_data\n\n async def json_response(self) -> Data:\n \"\"\"Convert agent response to structured JSON Data output with schema validation.\"\"\"\n # Always use structured chat agent for JSON response mode for better JSON formatting\n try:\n system_components = []\n\n # 1. Agent Instructions (system_prompt)\n agent_instructions = getattr(self, \"system_prompt\", \"\") or \"\"\n if agent_instructions:\n system_components.append(f\"{agent_instructions}\")\n\n # 2. Format Instructions\n format_instructions = getattr(self, \"format_instructions\", \"\") or \"\"\n if format_instructions:\n system_components.append(f\"Format instructions: {format_instructions}\")\n\n # 3. Schema Information from BaseModel\n if hasattr(self, \"output_schema\") and self.output_schema and len(self.output_schema) > 0:\n try:\n processed_schema = self._preprocess_schema(self.output_schema)\n output_model = build_model_from_schema(processed_schema)\n schema_dict = output_model.model_json_schema()\n schema_info = (\n \"You are given some text that may include format instructions, \"\n \"explanations, or other content alongside a JSON schema.\\n\\n\"\n \"Your task:\\n\"\n \"- Extract only the JSON schema.\\n\"\n \"- Return it as valid JSON.\\n\"\n \"- Do not include format instructions, explanations, or extra text.\\n\\n\"\n \"Input:\\n\"\n f\"{json.dumps(schema_dict, indent=2)}\\n\\n\"\n \"Output (only JSON schema):\"\n )\n system_components.append(schema_info)\n except (ValidationError, ValueError, TypeError, KeyError) as e:\n await logger.aerror(f\"Could not build schema for prompt: {e}\", exc_info=True)\n\n # Combine all components\n combined_instructions = \"\\n\\n\".join(system_components) if system_components else \"\"\n llm_model, self.chat_history, self.tools = await self.get_agent_requirements()\n self.set(\n llm=llm_model,\n tools=self.tools or [],\n chat_history=self.chat_history,\n input_value=self.input_value,\n system_prompt=combined_instructions,\n )\n\n # Create and run structured chat agent\n try:\n structured_agent = self.create_agent_runnable()\n except (NotImplementedError, ValueError, TypeError) as e:\n await logger.aerror(f\"Error with structured chat agent: {e}\")\n raise\n try:\n result = await self.run_agent(structured_agent)\n except (ExceptionWithMessageError, ValueError, TypeError, RuntimeError) as e:\n await logger.aerror(f\"Error with structured agent result: {e}\")\n raise\n # Extract content from structured agent result\n if hasattr(result, \"content\"):\n content = result.content\n elif hasattr(result, \"text\"):\n content = result.text\n else:\n content = str(result)\n\n except (ExceptionWithMessageError, ValueError, TypeError, NotImplementedError, AttributeError) as e:\n await logger.aerror(f\"Error with structured chat agent: {e}\")\n # Fallback to regular agent\n content_str = \"No content returned from agent\"\n return Data(data={\"content\": content_str, \"error\": str(e)})\n\n # Process with structured output validation\n try:\n structured_output = await self.build_structured_output_base(content)\n\n # Handle different output formats\n if isinstance(structured_output, list) and structured_output:\n if len(structured_output) == 1:\n return Data(data=structured_output[0])\n return Data(data={\"results\": structured_output})\n if isinstance(structured_output, dict):\n return Data(data=structured_output)\n return Data(data={\"content\": content})\n\n except (ValueError, TypeError) as e:\n await logger.aerror(f\"Error in structured output processing: {e}\")\n return Data(data={\"content\": content, \"error\": str(e)})\n\n async def get_memory_data(self):\n # TODO: This is a temporary fix to avoid message duplication. We should develop a function for this.\n messages = (\n await MemoryComponent(**self.get_base_args())\n .set(session_id=self.graph.session_id, order=\"Ascending\", n_messages=self.n_messages)\n .retrieve_messages()\n )\n return [\n message for message in messages if getattr(message, \"id\", None) != getattr(self.input_value, \"id\", None)\n ]\n\n async def get_llm(self):\n if not isinstance(self.agent_llm, str):\n return self.agent_llm, None\n\n try:\n provider_info = MODEL_PROVIDERS_DICT.get(self.agent_llm)\n if not provider_info:\n msg = f\"Invalid model provider: {self.agent_llm}\"\n raise ValueError(msg)\n\n component_class = provider_info.get(\"component_class\")\n display_name = component_class.display_name\n inputs = provider_info.get(\"inputs\")\n prefix = provider_info.get(\"prefix\", \"\")\n\n return self._build_llm_model(component_class, inputs, prefix), display_name\n\n except (AttributeError, ValueError, TypeError, RuntimeError) as e:\n await logger.aerror(f\"Error building {self.agent_llm} language model: {e!s}\")\n msg = f\"Failed to initialize language model: {e!s}\"\n raise ValueError(msg) from e\n\n def _build_llm_model(self, component, inputs, prefix=\"\"):\n model_kwargs = {}\n for input_ in inputs:\n if hasattr(self, f\"{prefix}{input_.name}\"):\n model_kwargs[input_.name] = getattr(self, f\"{prefix}{input_.name}\")\n return component.set(**model_kwargs).build_model()\n\n def set_component_params(self, component):\n provider_info = MODEL_PROVIDERS_DICT.get(self.agent_llm)\n if provider_info:\n inputs = provider_info.get(\"inputs\")\n prefix = provider_info.get(\"prefix\")\n # Filter out json_mode and only use attributes that exist on this component\n model_kwargs = {}\n for input_ in inputs:\n if hasattr(self, f\"{prefix}{input_.name}\"):\n model_kwargs[input_.name] = getattr(self, f\"{prefix}{input_.name}\")\n\n return component.set(**model_kwargs)\n return component\n\n def delete_fields(self, build_config: dotdict, fields: dict | list[str]) -> None:\n \"\"\"Delete specified fields from build_config.\"\"\"\n for field in fields:\n build_config.pop(field, None)\n\n def update_input_types(self, build_config: dotdict) -> dotdict:\n \"\"\"Update input types for all fields in build_config.\"\"\"\n for key, value in build_config.items():\n if isinstance(value, dict):\n if value.get(\"input_types\") is None:\n build_config[key][\"input_types\"] = []\n elif hasattr(value, \"input_types\") and value.input_types is None:\n value.input_types = []\n return build_config\n\n async def update_build_config(\n self, build_config: dotdict, field_value: str, field_name: str | None = None\n ) -> dotdict:\n # Iterate over all providers in the MODEL_PROVIDERS_DICT\n # Existing logic for updating build_config\n if field_name in (\"agent_llm\",):\n build_config[\"agent_llm\"][\"value\"] = field_value\n provider_info = MODEL_PROVIDERS_DICT.get(field_value)\n if provider_info:\n component_class = provider_info.get(\"component_class\")\n if component_class and hasattr(component_class, \"update_build_config\"):\n # Call the component class's update_build_config method\n build_config = await update_component_build_config(\n component_class, build_config, field_value, \"model_name\"\n )\n\n provider_configs: dict[str, tuple[dict, list[dict]]] = {\n provider: (\n MODEL_PROVIDERS_DICT[provider][\"fields\"],\n [\n MODEL_PROVIDERS_DICT[other_provider][\"fields\"]\n for other_provider in MODEL_PROVIDERS_DICT\n if other_provider != provider\n ],\n )\n for provider in MODEL_PROVIDERS_DICT\n }\n if field_value in provider_configs:\n fields_to_add, fields_to_delete = provider_configs[field_value]\n\n # Delete fields from other providers\n for fields in fields_to_delete:\n self.delete_fields(build_config, fields)\n\n # Add provider-specific fields\n if field_value == \"OpenAI\" and not any(field in build_config for field in fields_to_add):\n build_config.update(fields_to_add)\n else:\n build_config.update(fields_to_add)\n # Reset input types for agent_llm\n build_config[\"agent_llm\"][\"input_types\"] = []\n elif field_value == \"Custom\":\n # Delete all provider fields\n self.delete_fields(build_config, ALL_PROVIDER_FIELDS)\n # Update with custom component\n custom_component = DropdownInput(\n name=\"agent_llm\",\n display_name=\"Language Model\",\n options=[*sorted(MODEL_PROVIDERS), \"Custom\"],\n value=\"Custom\",\n real_time_refresh=True,\n input_types=[\"LanguageModel\"],\n options_metadata=[MODELS_METADATA[key] for key in sorted(MODELS_METADATA.keys())]\n + [{\"icon\": \"brain\"}],\n )\n build_config.update({\"agent_llm\": custom_component.to_dict()})\n # Update input types for all fields\n build_config = self.update_input_types(build_config)\n\n # Validate required keys\n default_keys = [\n \"code\",\n \"_type\",\n \"agent_llm\",\n \"tools\",\n \"input_value\",\n \"add_current_date_tool\",\n \"system_prompt\",\n \"agent_description\",\n \"max_iterations\",\n \"handle_parsing_errors\",\n \"verbose\",\n ]\n missing_keys = [key for key in default_keys if key not in build_config]\n if missing_keys:\n msg = f\"Missing required keys in build_config: {missing_keys}\"\n raise ValueError(msg)\n if (\n isinstance(self.agent_llm, str)\n and self.agent_llm in MODEL_PROVIDERS_DICT\n and field_name in MODEL_DYNAMIC_UPDATE_FIELDS\n ):\n provider_info = MODEL_PROVIDERS_DICT.get(self.agent_llm)\n if provider_info:\n component_class = provider_info.get(\"component_class\")\n component_class = self.set_component_params(component_class)\n prefix = provider_info.get(\"prefix\")\n if component_class and hasattr(component_class, \"update_build_config\"):\n # Call each component class's update_build_config method\n # remove the prefix from the field_name\n if isinstance(field_name, str) and isinstance(prefix, str):\n field_name = field_name.replace(prefix, \"\")\n build_config = await update_component_build_config(\n component_class, build_config, field_value, \"model_name\"\n )\n return dotdict({k: v.to_dict() if hasattr(v, \"to_dict\") else v for k, v in build_config.items()})\n\n async def _get_tools(self) -> list[Tool]:\n component_toolkit = _get_component_toolkit()\n tools_names = self._build_tools_names()\n agent_description = self.get_tool_description()\n # TODO: Agent Description Depreciated Feature to be removed\n description = f\"{agent_description}{tools_names}\"\n tools = component_toolkit(component=self).get_tools(\n tool_name=\"Call_Agent\", tool_description=description, callbacks=self.get_langchain_callbacks()\n )\n if hasattr(self, \"tools_metadata\"):\n tools = component_toolkit(component=self, metadata=self.tools_metadata).update_tools_metadata(tools=tools)\n return tools\n"
+ "value": "import json\nimport re\n\nfrom langchain_core.tools import StructuredTool\nfrom pydantic import ValidationError\n\nfrom langflow.base.agents.agent import LCToolsAgentComponent\nfrom langflow.base.agents.events import ExceptionWithMessageError\nfrom langflow.base.models.model_input_constants import (\n ALL_PROVIDER_FIELDS,\n MODEL_DYNAMIC_UPDATE_FIELDS,\n MODEL_PROVIDERS,\n MODEL_PROVIDERS_DICT,\n MODELS_METADATA,\n)\nfrom langflow.base.models.model_utils import get_model_name\nfrom langflow.components.helpers.current_date import CurrentDateComponent\nfrom langflow.components.helpers.memory import MemoryComponent\nfrom langflow.components.langchain_utilities.tool_calling import ToolCallingAgentComponent\nfrom langflow.custom.custom_component.component import _get_component_toolkit\nfrom langflow.custom.utils import update_component_build_config\nfrom langflow.field_typing import Tool\nfrom langflow.helpers.base_model import build_model_from_schema\nfrom langflow.io import BoolInput, DropdownInput, IntInput, MultilineInput, Output, TableInput\nfrom langflow.logging import logger\nfrom langflow.schema.data import Data\nfrom langflow.schema.dotdict import dotdict\nfrom langflow.schema.message import Message\nfrom langflow.schema.table import EditMode\n\n\ndef set_advanced_true(component_input):\n component_input.advanced = True\n return component_input\n\n\nMODEL_PROVIDERS_LIST = [\"Anthropic\", \"Google Generative AI\", \"Groq\", \"OpenAI\"]\n\n\nclass AgentComponent(ToolCallingAgentComponent):\n display_name: str = \"Agent\"\n description: str = \"Define the agent's instructions, then enter a task to complete using tools.\"\n documentation: str = \"https://docs.langflow.org/agents\"\n icon = \"bot\"\n beta = False\n name = \"Agent\"\n\n memory_inputs = [set_advanced_true(component_input) for component_input in MemoryComponent().inputs]\n\n # Filter out json_mode from OpenAI inputs since we handle structured output differently\n openai_inputs_filtered = [\n input_field\n for input_field in MODEL_PROVIDERS_DICT[\"OpenAI\"][\"inputs\"]\n if not (hasattr(input_field, \"name\") and input_field.name == \"json_mode\")\n ]\n\n inputs = [\n DropdownInput(\n name=\"agent_llm\",\n display_name=\"Model Provider\",\n info=\"The provider of the language model that the agent will use to generate responses.\",\n options=[*MODEL_PROVIDERS_LIST, \"Custom\"],\n value=\"OpenAI\",\n real_time_refresh=True,\n input_types=[],\n options_metadata=[MODELS_METADATA[key] for key in MODEL_PROVIDERS_LIST] + [{\"icon\": \"brain\"}],\n ),\n *openai_inputs_filtered,\n MultilineInput(\n name=\"system_prompt\",\n display_name=\"Agent Instructions\",\n info=\"System Prompt: Initial instructions and context provided to guide the agent's behavior.\",\n value=\"You are a helpful assistant that can use tools to answer questions and perform tasks.\",\n advanced=False,\n ),\n IntInput(\n name=\"n_messages\",\n display_name=\"Number of Chat History Messages\",\n value=100,\n info=\"Number of chat history messages to retrieve.\",\n advanced=True,\n show=True,\n ),\n MultilineInput(\n name=\"format_instructions\",\n display_name=\"Output Format Instructions\",\n info=\"Generic Template for structured output formatting. Valid only with Structured response.\",\n value=(\n \"You are an AI that extracts structured JSON objects from unstructured text. \"\n \"Use a predefined schema with expected types (str, int, float, bool, dict). \"\n \"Extract ALL relevant instances that match the schema - if multiple patterns exist, capture them all. \"\n \"Fill missing or ambiguous values with defaults: null for missing values. \"\n \"Remove exact duplicates but keep variations that have different field values. \"\n \"Always return valid JSON in the expected format, never throw errors. \"\n \"If multiple objects can be extracted, return them all in the structured format.\"\n ),\n advanced=True,\n ),\n TableInput(\n name=\"output_schema\",\n display_name=\"Output Schema\",\n info=(\n \"Schema Validation: Define the structure and data types for structured output. \"\n \"No validation if no output schema.\"\n ),\n advanced=True,\n required=False,\n value=[],\n table_schema=[\n {\n \"name\": \"name\",\n \"display_name\": \"Name\",\n \"type\": \"str\",\n \"description\": \"Specify the name of the output field.\",\n \"default\": \"field\",\n \"edit_mode\": EditMode.INLINE,\n },\n {\n \"name\": \"description\",\n \"display_name\": \"Description\",\n \"type\": \"str\",\n \"description\": \"Describe the purpose of the output field.\",\n \"default\": \"description of field\",\n \"edit_mode\": EditMode.POPOVER,\n },\n {\n \"name\": \"type\",\n \"display_name\": \"Type\",\n \"type\": \"str\",\n \"edit_mode\": EditMode.INLINE,\n \"description\": (\"Indicate the data type of the output field (e.g., str, int, float, bool, dict).\"),\n \"options\": [\"str\", \"int\", \"float\", \"bool\", \"dict\"],\n \"default\": \"str\",\n },\n {\n \"name\": \"multiple\",\n \"display_name\": \"As List\",\n \"type\": \"boolean\",\n \"description\": \"Set to True if this output field should be a list of the specified type.\",\n \"default\": \"False\",\n \"edit_mode\": EditMode.INLINE,\n },\n ],\n ),\n *LCToolsAgentComponent._base_inputs,\n # removed memory inputs from agent component\n # *memory_inputs,\n BoolInput(\n name=\"add_current_date_tool\",\n display_name=\"Current Date\",\n advanced=True,\n info=\"If true, will add a tool to the agent that returns the current date.\",\n value=True,\n ),\n ]\n outputs = [\n Output(name=\"response\", display_name=\"Response\", method=\"message_response\"),\n Output(name=\"structured_response\", display_name=\"Structured Response\", method=\"json_response\", tool_mode=False),\n ]\n\n async def get_agent_requirements(self):\n \"\"\"Get the agent requirements for the agent.\"\"\"\n llm_model, display_name = await self.get_llm()\n if llm_model is None:\n msg = \"No language model selected. Please choose a model to proceed.\"\n raise ValueError(msg)\n self.model_name = get_model_name(llm_model, display_name=display_name)\n\n # Get memory data\n self.chat_history = await self.get_memory_data()\n if isinstance(self.chat_history, Message):\n self.chat_history = [self.chat_history]\n\n # Add current date tool if enabled\n if self.add_current_date_tool:\n if not isinstance(self.tools, list): # type: ignore[has-type]\n self.tools = []\n current_date_tool = (await CurrentDateComponent(**self.get_base_args()).to_toolkit()).pop(0)\n if not isinstance(current_date_tool, StructuredTool):\n msg = \"CurrentDateComponent must be converted to a StructuredTool\"\n raise TypeError(msg)\n self.tools.append(current_date_tool)\n return llm_model, self.chat_history, self.tools\n\n async def message_response(self) -> Message:\n try:\n llm_model, self.chat_history, self.tools = await self.get_agent_requirements()\n # Set up and run agent\n self.set(\n llm=llm_model,\n tools=self.tools or [],\n chat_history=self.chat_history,\n input_value=self.input_value,\n system_prompt=self.system_prompt,\n )\n agent = self.create_agent_runnable()\n result = await self.run_agent(agent)\n\n # Store result for potential JSON output\n self._agent_result = result\n\n except (ValueError, TypeError, KeyError) as e:\n await logger.aerror(f\"{type(e).__name__}: {e!s}\")\n raise\n except ExceptionWithMessageError as e:\n await logger.aerror(f\"ExceptionWithMessageError occurred: {e}\")\n raise\n # Avoid catching blind Exception; let truly unexpected exceptions propagate\n except Exception as e:\n await logger.aerror(f\"Unexpected error: {e!s}\")\n raise\n else:\n return result\n\n def _preprocess_schema(self, schema):\n \"\"\"Preprocess schema to ensure correct data types for build_model_from_schema.\"\"\"\n processed_schema = []\n for field in schema:\n processed_field = {\n \"name\": str(field.get(\"name\", \"field\")),\n \"type\": str(field.get(\"type\", \"str\")),\n \"description\": str(field.get(\"description\", \"\")),\n \"multiple\": field.get(\"multiple\", False),\n }\n # Ensure multiple is handled correctly\n if isinstance(processed_field[\"multiple\"], str):\n processed_field[\"multiple\"] = processed_field[\"multiple\"].lower() in [\"true\", \"1\", \"t\", \"y\", \"yes\"]\n processed_schema.append(processed_field)\n return processed_schema\n\n async def build_structured_output_base(self, content: str):\n \"\"\"Build structured output with optional BaseModel validation.\"\"\"\n json_pattern = r\"\\{.*\\}\"\n schema_error_msg = \"Try setting an output schema\"\n\n # Try to parse content as JSON first\n json_data = None\n try:\n json_data = json.loads(content)\n except json.JSONDecodeError:\n json_match = re.search(json_pattern, content, re.DOTALL)\n if json_match:\n try:\n json_data = json.loads(json_match.group())\n except json.JSONDecodeError:\n return {\"content\": content, \"error\": schema_error_msg}\n else:\n return {\"content\": content, \"error\": schema_error_msg}\n\n # If no output schema provided, return parsed JSON without validation\n if not hasattr(self, \"output_schema\") or not self.output_schema or len(self.output_schema) == 0:\n return json_data\n\n # Use BaseModel validation with schema\n try:\n processed_schema = self._preprocess_schema(self.output_schema)\n output_model = build_model_from_schema(processed_schema)\n\n # Validate against the schema\n if isinstance(json_data, list):\n # Multiple objects\n validated_objects = []\n for item in json_data:\n try:\n validated_obj = output_model.model_validate(item)\n validated_objects.append(validated_obj.model_dump())\n except ValidationError as e:\n await logger.aerror(f\"Validation error for item: {e}\")\n # Include invalid items with error info\n validated_objects.append({\"data\": item, \"validation_error\": str(e)})\n return validated_objects\n\n # Single object\n try:\n validated_obj = output_model.model_validate(json_data)\n return [validated_obj.model_dump()] # Return as list for consistency\n except ValidationError as e:\n await logger.aerror(f\"Validation error: {e}\")\n return [{\"data\": json_data, \"validation_error\": str(e)}]\n\n except (TypeError, ValueError) as e:\n await logger.aerror(f\"Error building structured output: {e}\")\n # Fallback to parsed JSON without validation\n return json_data\n\n async def json_response(self) -> Data:\n \"\"\"Convert agent response to structured JSON Data output with schema validation.\"\"\"\n # Always use structured chat agent for JSON response mode for better JSON formatting\n try:\n system_components = []\n\n # 1. Agent Instructions (system_prompt)\n agent_instructions = getattr(self, \"system_prompt\", \"\") or \"\"\n if agent_instructions:\n system_components.append(f\"{agent_instructions}\")\n\n # 2. Format Instructions\n format_instructions = getattr(self, \"format_instructions\", \"\") or \"\"\n if format_instructions:\n system_components.append(f\"Format instructions: {format_instructions}\")\n\n # 3. Schema Information from BaseModel\n if hasattr(self, \"output_schema\") and self.output_schema and len(self.output_schema) > 0:\n try:\n processed_schema = self._preprocess_schema(self.output_schema)\n output_model = build_model_from_schema(processed_schema)\n schema_dict = output_model.model_json_schema()\n schema_info = (\n \"You are given some text that may include format instructions, \"\n \"explanations, or other content alongside a JSON schema.\\n\\n\"\n \"Your task:\\n\"\n \"- Extract only the JSON schema.\\n\"\n \"- Return it as valid JSON.\\n\"\n \"- Do not include format instructions, explanations, or extra text.\\n\\n\"\n \"Input:\\n\"\n f\"{json.dumps(schema_dict, indent=2)}\\n\\n\"\n \"Output (only JSON schema):\"\n )\n system_components.append(schema_info)\n except (ValidationError, ValueError, TypeError, KeyError) as e:\n await logger.aerror(f\"Could not build schema for prompt: {e}\", exc_info=True)\n\n # Combine all components\n combined_instructions = \"\\n\\n\".join(system_components) if system_components else \"\"\n llm_model, self.chat_history, self.tools = await self.get_agent_requirements()\n self.set(\n llm=llm_model,\n tools=self.tools or [],\n chat_history=self.chat_history,\n input_value=self.input_value,\n system_prompt=combined_instructions,\n )\n\n # Create and run structured chat agent\n try:\n structured_agent = self.create_agent_runnable()\n except (NotImplementedError, ValueError, TypeError) as e:\n await logger.aerror(f\"Error with structured chat agent: {e}\")\n raise\n try:\n result = await self.run_agent(structured_agent)\n except (ExceptionWithMessageError, ValueError, TypeError, RuntimeError) as e:\n await logger.aerror(f\"Error with structured agent result: {e}\")\n raise\n # Extract content from structured agent result\n if hasattr(result, \"content\"):\n content = result.content\n elif hasattr(result, \"text\"):\n content = result.text\n else:\n content = str(result)\n\n except (ExceptionWithMessageError, ValueError, TypeError, NotImplementedError, AttributeError) as e:\n await logger.aerror(f\"Error with structured chat agent: {e}\")\n # Fallback to regular agent\n content_str = \"No content returned from agent\"\n return Data(data={\"content\": content_str, \"error\": str(e)})\n\n # Process with structured output validation\n try:\n structured_output = await self.build_structured_output_base(content)\n\n # Handle different output formats\n if isinstance(structured_output, list) and structured_output:\n if len(structured_output) == 1:\n return Data(data=structured_output[0])\n return Data(data={\"results\": structured_output})\n if isinstance(structured_output, dict):\n return Data(data=structured_output)\n return Data(data={\"content\": content})\n\n except (ValueError, TypeError) as e:\n await logger.aerror(f\"Error in structured output processing: {e}\")\n return Data(data={\"content\": content, \"error\": str(e)})\n\n async def get_memory_data(self):\n # TODO: This is a temporary fix to avoid message duplication. We should develop a function for this.\n messages = (\n await MemoryComponent(**self.get_base_args())\n .set(session_id=self.graph.session_id, order=\"Ascending\", n_messages=self.n_messages)\n .retrieve_messages()\n )\n return [\n message for message in messages if getattr(message, \"id\", None) != getattr(self.input_value, \"id\", None)\n ]\n\n async def get_llm(self):\n if not isinstance(self.agent_llm, str):\n return self.agent_llm, None\n\n try:\n provider_info = MODEL_PROVIDERS_DICT.get(self.agent_llm)\n if not provider_info:\n msg = f\"Invalid model provider: {self.agent_llm}\"\n raise ValueError(msg)\n\n component_class = provider_info.get(\"component_class\")\n display_name = component_class.display_name\n inputs = provider_info.get(\"inputs\")\n prefix = provider_info.get(\"prefix\", \"\")\n\n return self._build_llm_model(component_class, inputs, prefix), display_name\n\n except (AttributeError, ValueError, TypeError, RuntimeError) as e:\n await logger.aerror(f\"Error building {self.agent_llm} language model: {e!s}\")\n msg = f\"Failed to initialize language model: {e!s}\"\n raise ValueError(msg) from e\n\n def _build_llm_model(self, component, inputs, prefix=\"\"):\n model_kwargs = {}\n for input_ in inputs:\n if hasattr(self, f\"{prefix}{input_.name}\"):\n model_kwargs[input_.name] = getattr(self, f\"{prefix}{input_.name}\")\n return component.set(**model_kwargs).build_model()\n\n def set_component_params(self, component):\n provider_info = MODEL_PROVIDERS_DICT.get(self.agent_llm)\n if provider_info:\n inputs = provider_info.get(\"inputs\")\n prefix = provider_info.get(\"prefix\")\n # Filter out json_mode and only use attributes that exist on this component\n model_kwargs = {}\n for input_ in inputs:\n if hasattr(self, f\"{prefix}{input_.name}\"):\n model_kwargs[input_.name] = getattr(self, f\"{prefix}{input_.name}\")\n\n return component.set(**model_kwargs)\n return component\n\n def delete_fields(self, build_config: dotdict, fields: dict | list[str]) -> None:\n \"\"\"Delete specified fields from build_config.\"\"\"\n for field in fields:\n build_config.pop(field, None)\n\n def update_input_types(self, build_config: dotdict) -> dotdict:\n \"\"\"Update input types for all fields in build_config.\"\"\"\n for key, value in build_config.items():\n if isinstance(value, dict):\n if value.get(\"input_types\") is None:\n build_config[key][\"input_types\"] = []\n elif hasattr(value, \"input_types\") and value.input_types is None:\n value.input_types = []\n return build_config\n\n async def update_build_config(\n self, build_config: dotdict, field_value: str, field_name: str | None = None\n ) -> dotdict:\n # Iterate over all providers in the MODEL_PROVIDERS_DICT\n # Existing logic for updating build_config\n if field_name in (\"agent_llm\",):\n build_config[\"agent_llm\"][\"value\"] = field_value\n provider_info = MODEL_PROVIDERS_DICT.get(field_value)\n if provider_info:\n component_class = provider_info.get(\"component_class\")\n if component_class and hasattr(component_class, \"update_build_config\"):\n # Call the component class's update_build_config method\n build_config = await update_component_build_config(\n component_class, build_config, field_value, \"model_name\"\n )\n\n provider_configs: dict[str, tuple[dict, list[dict]]] = {\n provider: (\n MODEL_PROVIDERS_DICT[provider][\"fields\"],\n [\n MODEL_PROVIDERS_DICT[other_provider][\"fields\"]\n for other_provider in MODEL_PROVIDERS_DICT\n if other_provider != provider\n ],\n )\n for provider in MODEL_PROVIDERS_DICT\n }\n if field_value in provider_configs:\n fields_to_add, fields_to_delete = provider_configs[field_value]\n\n # Delete fields from other providers\n for fields in fields_to_delete:\n self.delete_fields(build_config, fields)\n\n # Add provider-specific fields\n if field_value == \"OpenAI\" and not any(field in build_config for field in fields_to_add):\n build_config.update(fields_to_add)\n else:\n build_config.update(fields_to_add)\n # Reset input types for agent_llm\n build_config[\"agent_llm\"][\"input_types\"] = []\n build_config[\"agent_llm\"][\"display_name\"] = \"Model Provider\"\n elif field_value == \"Custom\":\n # Delete all provider fields\n self.delete_fields(build_config, ALL_PROVIDER_FIELDS)\n # Update with custom component\n custom_component = DropdownInput(\n name=\"agent_llm\",\n display_name=\"Language Model\",\n options=[*sorted(MODEL_PROVIDERS), \"Custom\"],\n value=\"Custom\",\n real_time_refresh=True,\n input_types=[\"LanguageModel\"],\n options_metadata=[MODELS_METADATA[key] for key in sorted(MODELS_METADATA.keys())]\n + [{\"icon\": \"brain\"}],\n )\n build_config.update({\"agent_llm\": custom_component.to_dict()})\n # Update input types for all fields\n build_config = self.update_input_types(build_config)\n\n # Validate required keys\n default_keys = [\n \"code\",\n \"_type\",\n \"agent_llm\",\n \"tools\",\n \"input_value\",\n \"add_current_date_tool\",\n \"system_prompt\",\n \"agent_description\",\n \"max_iterations\",\n \"handle_parsing_errors\",\n \"verbose\",\n ]\n missing_keys = [key for key in default_keys if key not in build_config]\n if missing_keys:\n msg = f\"Missing required keys in build_config: {missing_keys}\"\n raise ValueError(msg)\n if (\n isinstance(self.agent_llm, str)\n and self.agent_llm in MODEL_PROVIDERS_DICT\n and field_name in MODEL_DYNAMIC_UPDATE_FIELDS\n ):\n provider_info = MODEL_PROVIDERS_DICT.get(self.agent_llm)\n if provider_info:\n component_class = provider_info.get(\"component_class\")\n component_class = self.set_component_params(component_class)\n prefix = provider_info.get(\"prefix\")\n if component_class and hasattr(component_class, \"update_build_config\"):\n # Call each component class's update_build_config method\n # remove the prefix from the field_name\n if isinstance(field_name, str) and isinstance(prefix, str):\n field_name = field_name.replace(prefix, \"\")\n build_config = await update_component_build_config(\n component_class, build_config, field_value, \"model_name\"\n )\n return dotdict({k: v.to_dict() if hasattr(v, \"to_dict\") else v for k, v in build_config.items()})\n\n async def _get_tools(self) -> list[Tool]:\n component_toolkit = _get_component_toolkit()\n tools_names = self._build_tools_names()\n agent_description = self.get_tool_description()\n # TODO: Agent Description Depreciated Feature to be removed\n description = f\"{agent_description}{tools_names}\"\n tools = component_toolkit(component=self).get_tools(\n tool_name=\"Call_Agent\", tool_description=description, callbacks=self.get_langchain_callbacks()\n )\n if hasattr(self, \"tools_metadata\"):\n tools = component_toolkit(component=self, metadata=self.tools_metadata).update_tools_metadata(tools=tools)\n return tools\n"
},
"handle_parsing_errors": {
"_input_type": "BoolInput",
diff --git a/src/backend/base/langflow/initial_setup/starter_projects/Pokédex Agent.json b/src/backend/base/langflow/initial_setup/starter_projects/Pokédex Agent.json
index aaef58590..3f1e395c7 100644
--- a/src/backend/base/langflow/initial_setup/starter_projects/Pokédex Agent.json
+++ b/src/backend/base/langflow/initial_setup/starter_projects/Pokédex Agent.json
@@ -1474,7 +1474,7 @@
"show": true,
"title_case": false,
"type": "code",
- "value": "import json\nimport re\n\nfrom langchain_core.tools import StructuredTool\nfrom pydantic import ValidationError\n\nfrom langflow.base.agents.agent import LCToolsAgentComponent\nfrom langflow.base.agents.events import ExceptionWithMessageError\nfrom langflow.base.models.model_input_constants import (\n ALL_PROVIDER_FIELDS,\n MODEL_DYNAMIC_UPDATE_FIELDS,\n MODEL_PROVIDERS,\n MODEL_PROVIDERS_DICT,\n MODELS_METADATA,\n)\nfrom langflow.base.models.model_utils import get_model_name\nfrom langflow.components.helpers.current_date import CurrentDateComponent\nfrom langflow.components.helpers.memory import MemoryComponent\nfrom langflow.components.langchain_utilities.tool_calling import ToolCallingAgentComponent\nfrom langflow.custom.custom_component.component import _get_component_toolkit\nfrom langflow.custom.utils import update_component_build_config\nfrom langflow.field_typing import Tool\nfrom langflow.helpers.base_model import build_model_from_schema\nfrom langflow.io import BoolInput, DropdownInput, IntInput, MultilineInput, Output, TableInput\nfrom langflow.logging import logger\nfrom langflow.schema.data import Data\nfrom langflow.schema.dotdict import dotdict\nfrom langflow.schema.message import Message\nfrom langflow.schema.table import EditMode\n\n\ndef set_advanced_true(component_input):\n component_input.advanced = True\n return component_input\n\n\nMODEL_PROVIDERS_LIST = [\"Anthropic\", \"Google Generative AI\", \"Groq\", \"OpenAI\"]\n\n\nclass AgentComponent(ToolCallingAgentComponent):\n display_name: str = \"Agent\"\n description: str = \"Define the agent's instructions, then enter a task to complete using tools.\"\n documentation: str = \"https://docs.langflow.org/agents\"\n icon = \"bot\"\n beta = False\n name = \"Agent\"\n\n memory_inputs = [set_advanced_true(component_input) for component_input in MemoryComponent().inputs]\n\n # Filter out json_mode from OpenAI inputs since we handle structured output differently\n openai_inputs_filtered = [\n input_field\n for input_field in MODEL_PROVIDERS_DICT[\"OpenAI\"][\"inputs\"]\n if not (hasattr(input_field, \"name\") and input_field.name == \"json_mode\")\n ]\n\n inputs = [\n DropdownInput(\n name=\"agent_llm\",\n display_name=\"Model Provider\",\n info=\"The provider of the language model that the agent will use to generate responses.\",\n options=[*MODEL_PROVIDERS_LIST, \"Custom\"],\n value=\"OpenAI\",\n real_time_refresh=True,\n input_types=[],\n options_metadata=[MODELS_METADATA[key] for key in MODEL_PROVIDERS_LIST] + [{\"icon\": \"brain\"}],\n ),\n *openai_inputs_filtered,\n MultilineInput(\n name=\"system_prompt\",\n display_name=\"Agent Instructions\",\n info=\"System Prompt: Initial instructions and context provided to guide the agent's behavior.\",\n value=\"You are a helpful assistant that can use tools to answer questions and perform tasks.\",\n advanced=False,\n ),\n IntInput(\n name=\"n_messages\",\n display_name=\"Number of Chat History Messages\",\n value=100,\n info=\"Number of chat history messages to retrieve.\",\n advanced=True,\n show=True,\n ),\n MultilineInput(\n name=\"format_instructions\",\n display_name=\"Output Format Instructions\",\n info=\"Generic Template for structured output formatting. Valid only with Structured response.\",\n value=(\n \"You are an AI that extracts structured JSON objects from unstructured text. \"\n \"Use a predefined schema with expected types (str, int, float, bool, dict). \"\n \"Extract ALL relevant instances that match the schema - if multiple patterns exist, capture them all. \"\n \"Fill missing or ambiguous values with defaults: null for missing values. \"\n \"Remove exact duplicates but keep variations that have different field values. \"\n \"Always return valid JSON in the expected format, never throw errors. \"\n \"If multiple objects can be extracted, return them all in the structured format.\"\n ),\n advanced=True,\n ),\n TableInput(\n name=\"output_schema\",\n display_name=\"Output Schema\",\n info=(\n \"Schema Validation: Define the structure and data types for structured output. \"\n \"No validation if no output schema.\"\n ),\n advanced=True,\n required=False,\n value=[],\n table_schema=[\n {\n \"name\": \"name\",\n \"display_name\": \"Name\",\n \"type\": \"str\",\n \"description\": \"Specify the name of the output field.\",\n \"default\": \"field\",\n \"edit_mode\": EditMode.INLINE,\n },\n {\n \"name\": \"description\",\n \"display_name\": \"Description\",\n \"type\": \"str\",\n \"description\": \"Describe the purpose of the output field.\",\n \"default\": \"description of field\",\n \"edit_mode\": EditMode.POPOVER,\n },\n {\n \"name\": \"type\",\n \"display_name\": \"Type\",\n \"type\": \"str\",\n \"edit_mode\": EditMode.INLINE,\n \"description\": (\"Indicate the data type of the output field (e.g., str, int, float, bool, dict).\"),\n \"options\": [\"str\", \"int\", \"float\", \"bool\", \"dict\"],\n \"default\": \"str\",\n },\n {\n \"name\": \"multiple\",\n \"display_name\": \"As List\",\n \"type\": \"boolean\",\n \"description\": \"Set to True if this output field should be a list of the specified type.\",\n \"default\": \"False\",\n \"edit_mode\": EditMode.INLINE,\n },\n ],\n ),\n *LCToolsAgentComponent._base_inputs,\n # removed memory inputs from agent component\n # *memory_inputs,\n BoolInput(\n name=\"add_current_date_tool\",\n display_name=\"Current Date\",\n advanced=True,\n info=\"If true, will add a tool to the agent that returns the current date.\",\n value=True,\n ),\n ]\n outputs = [\n Output(name=\"response\", display_name=\"Response\", method=\"message_response\"),\n Output(name=\"structured_response\", display_name=\"Structured Response\", method=\"json_response\", tool_mode=False),\n ]\n\n async def get_agent_requirements(self):\n \"\"\"Get the agent requirements for the agent.\"\"\"\n llm_model, display_name = await self.get_llm()\n if llm_model is None:\n msg = \"No language model selected. Please choose a model to proceed.\"\n raise ValueError(msg)\n self.model_name = get_model_name(llm_model, display_name=display_name)\n\n # Get memory data\n self.chat_history = await self.get_memory_data()\n if isinstance(self.chat_history, Message):\n self.chat_history = [self.chat_history]\n\n # Add current date tool if enabled\n if self.add_current_date_tool:\n if not isinstance(self.tools, list): # type: ignore[has-type]\n self.tools = []\n current_date_tool = (await CurrentDateComponent(**self.get_base_args()).to_toolkit()).pop(0)\n if not isinstance(current_date_tool, StructuredTool):\n msg = \"CurrentDateComponent must be converted to a StructuredTool\"\n raise TypeError(msg)\n self.tools.append(current_date_tool)\n return llm_model, self.chat_history, self.tools\n\n async def message_response(self) -> Message:\n try:\n llm_model, self.chat_history, self.tools = await self.get_agent_requirements()\n # Set up and run agent\n self.set(\n llm=llm_model,\n tools=self.tools or [],\n chat_history=self.chat_history,\n input_value=self.input_value,\n system_prompt=self.system_prompt,\n )\n agent = self.create_agent_runnable()\n result = await self.run_agent(agent)\n\n # Store result for potential JSON output\n self._agent_result = result\n\n except (ValueError, TypeError, KeyError) as e:\n await logger.aerror(f\"{type(e).__name__}: {e!s}\")\n raise\n except ExceptionWithMessageError as e:\n await logger.aerror(f\"ExceptionWithMessageError occurred: {e}\")\n raise\n # Avoid catching blind Exception; let truly unexpected exceptions propagate\n except Exception as e:\n await logger.aerror(f\"Unexpected error: {e!s}\")\n raise\n else:\n return result\n\n def _preprocess_schema(self, schema):\n \"\"\"Preprocess schema to ensure correct data types for build_model_from_schema.\"\"\"\n processed_schema = []\n for field in schema:\n processed_field = {\n \"name\": str(field.get(\"name\", \"field\")),\n \"type\": str(field.get(\"type\", \"str\")),\n \"description\": str(field.get(\"description\", \"\")),\n \"multiple\": field.get(\"multiple\", False),\n }\n # Ensure multiple is handled correctly\n if isinstance(processed_field[\"multiple\"], str):\n processed_field[\"multiple\"] = processed_field[\"multiple\"].lower() in [\"true\", \"1\", \"t\", \"y\", \"yes\"]\n processed_schema.append(processed_field)\n return processed_schema\n\n async def build_structured_output_base(self, content: str):\n \"\"\"Build structured output with optional BaseModel validation.\"\"\"\n json_pattern = r\"\\{.*\\}\"\n schema_error_msg = \"Try setting an output schema\"\n\n # Try to parse content as JSON first\n json_data = None\n try:\n json_data = json.loads(content)\n except json.JSONDecodeError:\n json_match = re.search(json_pattern, content, re.DOTALL)\n if json_match:\n try:\n json_data = json.loads(json_match.group())\n except json.JSONDecodeError:\n return {\"content\": content, \"error\": schema_error_msg}\n else:\n return {\"content\": content, \"error\": schema_error_msg}\n\n # If no output schema provided, return parsed JSON without validation\n if not hasattr(self, \"output_schema\") or not self.output_schema or len(self.output_schema) == 0:\n return json_data\n\n # Use BaseModel validation with schema\n try:\n processed_schema = self._preprocess_schema(self.output_schema)\n output_model = build_model_from_schema(processed_schema)\n\n # Validate against the schema\n if isinstance(json_data, list):\n # Multiple objects\n validated_objects = []\n for item in json_data:\n try:\n validated_obj = output_model.model_validate(item)\n validated_objects.append(validated_obj.model_dump())\n except ValidationError as e:\n await logger.aerror(f\"Validation error for item: {e}\")\n # Include invalid items with error info\n validated_objects.append({\"data\": item, \"validation_error\": str(e)})\n return validated_objects\n\n # Single object\n try:\n validated_obj = output_model.model_validate(json_data)\n return [validated_obj.model_dump()] # Return as list for consistency\n except ValidationError as e:\n await logger.aerror(f\"Validation error: {e}\")\n return [{\"data\": json_data, \"validation_error\": str(e)}]\n\n except (TypeError, ValueError) as e:\n await logger.aerror(f\"Error building structured output: {e}\")\n # Fallback to parsed JSON without validation\n return json_data\n\n async def json_response(self) -> Data:\n \"\"\"Convert agent response to structured JSON Data output with schema validation.\"\"\"\n # Always use structured chat agent for JSON response mode for better JSON formatting\n try:\n system_components = []\n\n # 1. Agent Instructions (system_prompt)\n agent_instructions = getattr(self, \"system_prompt\", \"\") or \"\"\n if agent_instructions:\n system_components.append(f\"{agent_instructions}\")\n\n # 2. Format Instructions\n format_instructions = getattr(self, \"format_instructions\", \"\") or \"\"\n if format_instructions:\n system_components.append(f\"Format instructions: {format_instructions}\")\n\n # 3. Schema Information from BaseModel\n if hasattr(self, \"output_schema\") and self.output_schema and len(self.output_schema) > 0:\n try:\n processed_schema = self._preprocess_schema(self.output_schema)\n output_model = build_model_from_schema(processed_schema)\n schema_dict = output_model.model_json_schema()\n schema_info = (\n \"You are given some text that may include format instructions, \"\n \"explanations, or other content alongside a JSON schema.\\n\\n\"\n \"Your task:\\n\"\n \"- Extract only the JSON schema.\\n\"\n \"- Return it as valid JSON.\\n\"\n \"- Do not include format instructions, explanations, or extra text.\\n\\n\"\n \"Input:\\n\"\n f\"{json.dumps(schema_dict, indent=2)}\\n\\n\"\n \"Output (only JSON schema):\"\n )\n system_components.append(schema_info)\n except (ValidationError, ValueError, TypeError, KeyError) as e:\n await logger.aerror(f\"Could not build schema for prompt: {e}\", exc_info=True)\n\n # Combine all components\n combined_instructions = \"\\n\\n\".join(system_components) if system_components else \"\"\n llm_model, self.chat_history, self.tools = await self.get_agent_requirements()\n self.set(\n llm=llm_model,\n tools=self.tools or [],\n chat_history=self.chat_history,\n input_value=self.input_value,\n system_prompt=combined_instructions,\n )\n\n # Create and run structured chat agent\n try:\n structured_agent = self.create_agent_runnable()\n except (NotImplementedError, ValueError, TypeError) as e:\n await logger.aerror(f\"Error with structured chat agent: {e}\")\n raise\n try:\n result = await self.run_agent(structured_agent)\n except (ExceptionWithMessageError, ValueError, TypeError, RuntimeError) as e:\n await logger.aerror(f\"Error with structured agent result: {e}\")\n raise\n # Extract content from structured agent result\n if hasattr(result, \"content\"):\n content = result.content\n elif hasattr(result, \"text\"):\n content = result.text\n else:\n content = str(result)\n\n except (ExceptionWithMessageError, ValueError, TypeError, NotImplementedError, AttributeError) as e:\n await logger.aerror(f\"Error with structured chat agent: {e}\")\n # Fallback to regular agent\n content_str = \"No content returned from agent\"\n return Data(data={\"content\": content_str, \"error\": str(e)})\n\n # Process with structured output validation\n try:\n structured_output = await self.build_structured_output_base(content)\n\n # Handle different output formats\n if isinstance(structured_output, list) and structured_output:\n if len(structured_output) == 1:\n return Data(data=structured_output[0])\n return Data(data={\"results\": structured_output})\n if isinstance(structured_output, dict):\n return Data(data=structured_output)\n return Data(data={\"content\": content})\n\n except (ValueError, TypeError) as e:\n await logger.aerror(f\"Error in structured output processing: {e}\")\n return Data(data={\"content\": content, \"error\": str(e)})\n\n async def get_memory_data(self):\n # TODO: This is a temporary fix to avoid message duplication. We should develop a function for this.\n messages = (\n await MemoryComponent(**self.get_base_args())\n .set(session_id=self.graph.session_id, order=\"Ascending\", n_messages=self.n_messages)\n .retrieve_messages()\n )\n return [\n message for message in messages if getattr(message, \"id\", None) != getattr(self.input_value, \"id\", None)\n ]\n\n async def get_llm(self):\n if not isinstance(self.agent_llm, str):\n return self.agent_llm, None\n\n try:\n provider_info = MODEL_PROVIDERS_DICT.get(self.agent_llm)\n if not provider_info:\n msg = f\"Invalid model provider: {self.agent_llm}\"\n raise ValueError(msg)\n\n component_class = provider_info.get(\"component_class\")\n display_name = component_class.display_name\n inputs = provider_info.get(\"inputs\")\n prefix = provider_info.get(\"prefix\", \"\")\n\n return self._build_llm_model(component_class, inputs, prefix), display_name\n\n except (AttributeError, ValueError, TypeError, RuntimeError) as e:\n await logger.aerror(f\"Error building {self.agent_llm} language model: {e!s}\")\n msg = f\"Failed to initialize language model: {e!s}\"\n raise ValueError(msg) from e\n\n def _build_llm_model(self, component, inputs, prefix=\"\"):\n model_kwargs = {}\n for input_ in inputs:\n if hasattr(self, f\"{prefix}{input_.name}\"):\n model_kwargs[input_.name] = getattr(self, f\"{prefix}{input_.name}\")\n return component.set(**model_kwargs).build_model()\n\n def set_component_params(self, component):\n provider_info = MODEL_PROVIDERS_DICT.get(self.agent_llm)\n if provider_info:\n inputs = provider_info.get(\"inputs\")\n prefix = provider_info.get(\"prefix\")\n # Filter out json_mode and only use attributes that exist on this component\n model_kwargs = {}\n for input_ in inputs:\n if hasattr(self, f\"{prefix}{input_.name}\"):\n model_kwargs[input_.name] = getattr(self, f\"{prefix}{input_.name}\")\n\n return component.set(**model_kwargs)\n return component\n\n def delete_fields(self, build_config: dotdict, fields: dict | list[str]) -> None:\n \"\"\"Delete specified fields from build_config.\"\"\"\n for field in fields:\n build_config.pop(field, None)\n\n def update_input_types(self, build_config: dotdict) -> dotdict:\n \"\"\"Update input types for all fields in build_config.\"\"\"\n for key, value in build_config.items():\n if isinstance(value, dict):\n if value.get(\"input_types\") is None:\n build_config[key][\"input_types\"] = []\n elif hasattr(value, \"input_types\") and value.input_types is None:\n value.input_types = []\n return build_config\n\n async def update_build_config(\n self, build_config: dotdict, field_value: str, field_name: str | None = None\n ) -> dotdict:\n # Iterate over all providers in the MODEL_PROVIDERS_DICT\n # Existing logic for updating build_config\n if field_name in (\"agent_llm\",):\n build_config[\"agent_llm\"][\"value\"] = field_value\n provider_info = MODEL_PROVIDERS_DICT.get(field_value)\n if provider_info:\n component_class = provider_info.get(\"component_class\")\n if component_class and hasattr(component_class, \"update_build_config\"):\n # Call the component class's update_build_config method\n build_config = await update_component_build_config(\n component_class, build_config, field_value, \"model_name\"\n )\n\n provider_configs: dict[str, tuple[dict, list[dict]]] = {\n provider: (\n MODEL_PROVIDERS_DICT[provider][\"fields\"],\n [\n MODEL_PROVIDERS_DICT[other_provider][\"fields\"]\n for other_provider in MODEL_PROVIDERS_DICT\n if other_provider != provider\n ],\n )\n for provider in MODEL_PROVIDERS_DICT\n }\n if field_value in provider_configs:\n fields_to_add, fields_to_delete = provider_configs[field_value]\n\n # Delete fields from other providers\n for fields in fields_to_delete:\n self.delete_fields(build_config, fields)\n\n # Add provider-specific fields\n if field_value == \"OpenAI\" and not any(field in build_config for field in fields_to_add):\n build_config.update(fields_to_add)\n else:\n build_config.update(fields_to_add)\n # Reset input types for agent_llm\n build_config[\"agent_llm\"][\"input_types\"] = []\n elif field_value == \"Custom\":\n # Delete all provider fields\n self.delete_fields(build_config, ALL_PROVIDER_FIELDS)\n # Update with custom component\n custom_component = DropdownInput(\n name=\"agent_llm\",\n display_name=\"Language Model\",\n options=[*sorted(MODEL_PROVIDERS), \"Custom\"],\n value=\"Custom\",\n real_time_refresh=True,\n input_types=[\"LanguageModel\"],\n options_metadata=[MODELS_METADATA[key] for key in sorted(MODELS_METADATA.keys())]\n + [{\"icon\": \"brain\"}],\n )\n build_config.update({\"agent_llm\": custom_component.to_dict()})\n # Update input types for all fields\n build_config = self.update_input_types(build_config)\n\n # Validate required keys\n default_keys = [\n \"code\",\n \"_type\",\n \"agent_llm\",\n \"tools\",\n \"input_value\",\n \"add_current_date_tool\",\n \"system_prompt\",\n \"agent_description\",\n \"max_iterations\",\n \"handle_parsing_errors\",\n \"verbose\",\n ]\n missing_keys = [key for key in default_keys if key not in build_config]\n if missing_keys:\n msg = f\"Missing required keys in build_config: {missing_keys}\"\n raise ValueError(msg)\n if (\n isinstance(self.agent_llm, str)\n and self.agent_llm in MODEL_PROVIDERS_DICT\n and field_name in MODEL_DYNAMIC_UPDATE_FIELDS\n ):\n provider_info = MODEL_PROVIDERS_DICT.get(self.agent_llm)\n if provider_info:\n component_class = provider_info.get(\"component_class\")\n component_class = self.set_component_params(component_class)\n prefix = provider_info.get(\"prefix\")\n if component_class and hasattr(component_class, \"update_build_config\"):\n # Call each component class's update_build_config method\n # remove the prefix from the field_name\n if isinstance(field_name, str) and isinstance(prefix, str):\n field_name = field_name.replace(prefix, \"\")\n build_config = await update_component_build_config(\n component_class, build_config, field_value, \"model_name\"\n )\n return dotdict({k: v.to_dict() if hasattr(v, \"to_dict\") else v for k, v in build_config.items()})\n\n async def _get_tools(self) -> list[Tool]:\n component_toolkit = _get_component_toolkit()\n tools_names = self._build_tools_names()\n agent_description = self.get_tool_description()\n # TODO: Agent Description Depreciated Feature to be removed\n description = f\"{agent_description}{tools_names}\"\n tools = component_toolkit(component=self).get_tools(\n tool_name=\"Call_Agent\", tool_description=description, callbacks=self.get_langchain_callbacks()\n )\n if hasattr(self, \"tools_metadata\"):\n tools = component_toolkit(component=self, metadata=self.tools_metadata).update_tools_metadata(tools=tools)\n return tools\n"
+ "value": "import json\nimport re\n\nfrom langchain_core.tools import StructuredTool\nfrom pydantic import ValidationError\n\nfrom langflow.base.agents.agent import LCToolsAgentComponent\nfrom langflow.base.agents.events import ExceptionWithMessageError\nfrom langflow.base.models.model_input_constants import (\n ALL_PROVIDER_FIELDS,\n MODEL_DYNAMIC_UPDATE_FIELDS,\n MODEL_PROVIDERS,\n MODEL_PROVIDERS_DICT,\n MODELS_METADATA,\n)\nfrom langflow.base.models.model_utils import get_model_name\nfrom langflow.components.helpers.current_date import CurrentDateComponent\nfrom langflow.components.helpers.memory import MemoryComponent\nfrom langflow.components.langchain_utilities.tool_calling import ToolCallingAgentComponent\nfrom langflow.custom.custom_component.component import _get_component_toolkit\nfrom langflow.custom.utils import update_component_build_config\nfrom langflow.field_typing import Tool\nfrom langflow.helpers.base_model import build_model_from_schema\nfrom langflow.io import BoolInput, DropdownInput, IntInput, MultilineInput, Output, TableInput\nfrom langflow.logging import logger\nfrom langflow.schema.data import Data\nfrom langflow.schema.dotdict import dotdict\nfrom langflow.schema.message import Message\nfrom langflow.schema.table import EditMode\n\n\ndef set_advanced_true(component_input):\n component_input.advanced = True\n return component_input\n\n\nMODEL_PROVIDERS_LIST = [\"Anthropic\", \"Google Generative AI\", \"Groq\", \"OpenAI\"]\n\n\nclass AgentComponent(ToolCallingAgentComponent):\n display_name: str = \"Agent\"\n description: str = \"Define the agent's instructions, then enter a task to complete using tools.\"\n documentation: str = \"https://docs.langflow.org/agents\"\n icon = \"bot\"\n beta = False\n name = \"Agent\"\n\n memory_inputs = [set_advanced_true(component_input) for component_input in MemoryComponent().inputs]\n\n # Filter out json_mode from OpenAI inputs since we handle structured output differently\n openai_inputs_filtered = [\n input_field\n for input_field in MODEL_PROVIDERS_DICT[\"OpenAI\"][\"inputs\"]\n if not (hasattr(input_field, \"name\") and input_field.name == \"json_mode\")\n ]\n\n inputs = [\n DropdownInput(\n name=\"agent_llm\",\n display_name=\"Model Provider\",\n info=\"The provider of the language model that the agent will use to generate responses.\",\n options=[*MODEL_PROVIDERS_LIST, \"Custom\"],\n value=\"OpenAI\",\n real_time_refresh=True,\n input_types=[],\n options_metadata=[MODELS_METADATA[key] for key in MODEL_PROVIDERS_LIST] + [{\"icon\": \"brain\"}],\n ),\n *openai_inputs_filtered,\n MultilineInput(\n name=\"system_prompt\",\n display_name=\"Agent Instructions\",\n info=\"System Prompt: Initial instructions and context provided to guide the agent's behavior.\",\n value=\"You are a helpful assistant that can use tools to answer questions and perform tasks.\",\n advanced=False,\n ),\n IntInput(\n name=\"n_messages\",\n display_name=\"Number of Chat History Messages\",\n value=100,\n info=\"Number of chat history messages to retrieve.\",\n advanced=True,\n show=True,\n ),\n MultilineInput(\n name=\"format_instructions\",\n display_name=\"Output Format Instructions\",\n info=\"Generic Template for structured output formatting. Valid only with Structured response.\",\n value=(\n \"You are an AI that extracts structured JSON objects from unstructured text. \"\n \"Use a predefined schema with expected types (str, int, float, bool, dict). \"\n \"Extract ALL relevant instances that match the schema - if multiple patterns exist, capture them all. \"\n \"Fill missing or ambiguous values with defaults: null for missing values. \"\n \"Remove exact duplicates but keep variations that have different field values. \"\n \"Always return valid JSON in the expected format, never throw errors. \"\n \"If multiple objects can be extracted, return them all in the structured format.\"\n ),\n advanced=True,\n ),\n TableInput(\n name=\"output_schema\",\n display_name=\"Output Schema\",\n info=(\n \"Schema Validation: Define the structure and data types for structured output. \"\n \"No validation if no output schema.\"\n ),\n advanced=True,\n required=False,\n value=[],\n table_schema=[\n {\n \"name\": \"name\",\n \"display_name\": \"Name\",\n \"type\": \"str\",\n \"description\": \"Specify the name of the output field.\",\n \"default\": \"field\",\n \"edit_mode\": EditMode.INLINE,\n },\n {\n \"name\": \"description\",\n \"display_name\": \"Description\",\n \"type\": \"str\",\n \"description\": \"Describe the purpose of the output field.\",\n \"default\": \"description of field\",\n \"edit_mode\": EditMode.POPOVER,\n },\n {\n \"name\": \"type\",\n \"display_name\": \"Type\",\n \"type\": \"str\",\n \"edit_mode\": EditMode.INLINE,\n \"description\": (\"Indicate the data type of the output field (e.g., str, int, float, bool, dict).\"),\n \"options\": [\"str\", \"int\", \"float\", \"bool\", \"dict\"],\n \"default\": \"str\",\n },\n {\n \"name\": \"multiple\",\n \"display_name\": \"As List\",\n \"type\": \"boolean\",\n \"description\": \"Set to True if this output field should be a list of the specified type.\",\n \"default\": \"False\",\n \"edit_mode\": EditMode.INLINE,\n },\n ],\n ),\n *LCToolsAgentComponent._base_inputs,\n # removed memory inputs from agent component\n # *memory_inputs,\n BoolInput(\n name=\"add_current_date_tool\",\n display_name=\"Current Date\",\n advanced=True,\n info=\"If true, will add a tool to the agent that returns the current date.\",\n value=True,\n ),\n ]\n outputs = [\n Output(name=\"response\", display_name=\"Response\", method=\"message_response\"),\n Output(name=\"structured_response\", display_name=\"Structured Response\", method=\"json_response\", tool_mode=False),\n ]\n\n async def get_agent_requirements(self):\n \"\"\"Get the agent requirements for the agent.\"\"\"\n llm_model, display_name = await self.get_llm()\n if llm_model is None:\n msg = \"No language model selected. Please choose a model to proceed.\"\n raise ValueError(msg)\n self.model_name = get_model_name(llm_model, display_name=display_name)\n\n # Get memory data\n self.chat_history = await self.get_memory_data()\n if isinstance(self.chat_history, Message):\n self.chat_history = [self.chat_history]\n\n # Add current date tool if enabled\n if self.add_current_date_tool:\n if not isinstance(self.tools, list): # type: ignore[has-type]\n self.tools = []\n current_date_tool = (await CurrentDateComponent(**self.get_base_args()).to_toolkit()).pop(0)\n if not isinstance(current_date_tool, StructuredTool):\n msg = \"CurrentDateComponent must be converted to a StructuredTool\"\n raise TypeError(msg)\n self.tools.append(current_date_tool)\n return llm_model, self.chat_history, self.tools\n\n async def message_response(self) -> Message:\n try:\n llm_model, self.chat_history, self.tools = await self.get_agent_requirements()\n # Set up and run agent\n self.set(\n llm=llm_model,\n tools=self.tools or [],\n chat_history=self.chat_history,\n input_value=self.input_value,\n system_prompt=self.system_prompt,\n )\n agent = self.create_agent_runnable()\n result = await self.run_agent(agent)\n\n # Store result for potential JSON output\n self._agent_result = result\n\n except (ValueError, TypeError, KeyError) as e:\n await logger.aerror(f\"{type(e).__name__}: {e!s}\")\n raise\n except ExceptionWithMessageError as e:\n await logger.aerror(f\"ExceptionWithMessageError occurred: {e}\")\n raise\n # Avoid catching blind Exception; let truly unexpected exceptions propagate\n except Exception as e:\n await logger.aerror(f\"Unexpected error: {e!s}\")\n raise\n else:\n return result\n\n def _preprocess_schema(self, schema):\n \"\"\"Preprocess schema to ensure correct data types for build_model_from_schema.\"\"\"\n processed_schema = []\n for field in schema:\n processed_field = {\n \"name\": str(field.get(\"name\", \"field\")),\n \"type\": str(field.get(\"type\", \"str\")),\n \"description\": str(field.get(\"description\", \"\")),\n \"multiple\": field.get(\"multiple\", False),\n }\n # Ensure multiple is handled correctly\n if isinstance(processed_field[\"multiple\"], str):\n processed_field[\"multiple\"] = processed_field[\"multiple\"].lower() in [\"true\", \"1\", \"t\", \"y\", \"yes\"]\n processed_schema.append(processed_field)\n return processed_schema\n\n async def build_structured_output_base(self, content: str):\n \"\"\"Build structured output with optional BaseModel validation.\"\"\"\n json_pattern = r\"\\{.*\\}\"\n schema_error_msg = \"Try setting an output schema\"\n\n # Try to parse content as JSON first\n json_data = None\n try:\n json_data = json.loads(content)\n except json.JSONDecodeError:\n json_match = re.search(json_pattern, content, re.DOTALL)\n if json_match:\n try:\n json_data = json.loads(json_match.group())\n except json.JSONDecodeError:\n return {\"content\": content, \"error\": schema_error_msg}\n else:\n return {\"content\": content, \"error\": schema_error_msg}\n\n # If no output schema provided, return parsed JSON without validation\n if not hasattr(self, \"output_schema\") or not self.output_schema or len(self.output_schema) == 0:\n return json_data\n\n # Use BaseModel validation with schema\n try:\n processed_schema = self._preprocess_schema(self.output_schema)\n output_model = build_model_from_schema(processed_schema)\n\n # Validate against the schema\n if isinstance(json_data, list):\n # Multiple objects\n validated_objects = []\n for item in json_data:\n try:\n validated_obj = output_model.model_validate(item)\n validated_objects.append(validated_obj.model_dump())\n except ValidationError as e:\n await logger.aerror(f\"Validation error for item: {e}\")\n # Include invalid items with error info\n validated_objects.append({\"data\": item, \"validation_error\": str(e)})\n return validated_objects\n\n # Single object\n try:\n validated_obj = output_model.model_validate(json_data)\n return [validated_obj.model_dump()] # Return as list for consistency\n except ValidationError as e:\n await logger.aerror(f\"Validation error: {e}\")\n return [{\"data\": json_data, \"validation_error\": str(e)}]\n\n except (TypeError, ValueError) as e:\n await logger.aerror(f\"Error building structured output: {e}\")\n # Fallback to parsed JSON without validation\n return json_data\n\n async def json_response(self) -> Data:\n \"\"\"Convert agent response to structured JSON Data output with schema validation.\"\"\"\n # Always use structured chat agent for JSON response mode for better JSON formatting\n try:\n system_components = []\n\n # 1. Agent Instructions (system_prompt)\n agent_instructions = getattr(self, \"system_prompt\", \"\") or \"\"\n if agent_instructions:\n system_components.append(f\"{agent_instructions}\")\n\n # 2. Format Instructions\n format_instructions = getattr(self, \"format_instructions\", \"\") or \"\"\n if format_instructions:\n system_components.append(f\"Format instructions: {format_instructions}\")\n\n # 3. Schema Information from BaseModel\n if hasattr(self, \"output_schema\") and self.output_schema and len(self.output_schema) > 0:\n try:\n processed_schema = self._preprocess_schema(self.output_schema)\n output_model = build_model_from_schema(processed_schema)\n schema_dict = output_model.model_json_schema()\n schema_info = (\n \"You are given some text that may include format instructions, \"\n \"explanations, or other content alongside a JSON schema.\\n\\n\"\n \"Your task:\\n\"\n \"- Extract only the JSON schema.\\n\"\n \"- Return it as valid JSON.\\n\"\n \"- Do not include format instructions, explanations, or extra text.\\n\\n\"\n \"Input:\\n\"\n f\"{json.dumps(schema_dict, indent=2)}\\n\\n\"\n \"Output (only JSON schema):\"\n )\n system_components.append(schema_info)\n except (ValidationError, ValueError, TypeError, KeyError) as e:\n await logger.aerror(f\"Could not build schema for prompt: {e}\", exc_info=True)\n\n # Combine all components\n combined_instructions = \"\\n\\n\".join(system_components) if system_components else \"\"\n llm_model, self.chat_history, self.tools = await self.get_agent_requirements()\n self.set(\n llm=llm_model,\n tools=self.tools or [],\n chat_history=self.chat_history,\n input_value=self.input_value,\n system_prompt=combined_instructions,\n )\n\n # Create and run structured chat agent\n try:\n structured_agent = self.create_agent_runnable()\n except (NotImplementedError, ValueError, TypeError) as e:\n await logger.aerror(f\"Error with structured chat agent: {e}\")\n raise\n try:\n result = await self.run_agent(structured_agent)\n except (ExceptionWithMessageError, ValueError, TypeError, RuntimeError) as e:\n await logger.aerror(f\"Error with structured agent result: {e}\")\n raise\n # Extract content from structured agent result\n if hasattr(result, \"content\"):\n content = result.content\n elif hasattr(result, \"text\"):\n content = result.text\n else:\n content = str(result)\n\n except (ExceptionWithMessageError, ValueError, TypeError, NotImplementedError, AttributeError) as e:\n await logger.aerror(f\"Error with structured chat agent: {e}\")\n # Fallback to regular agent\n content_str = \"No content returned from agent\"\n return Data(data={\"content\": content_str, \"error\": str(e)})\n\n # Process with structured output validation\n try:\n structured_output = await self.build_structured_output_base(content)\n\n # Handle different output formats\n if isinstance(structured_output, list) and structured_output:\n if len(structured_output) == 1:\n return Data(data=structured_output[0])\n return Data(data={\"results\": structured_output})\n if isinstance(structured_output, dict):\n return Data(data=structured_output)\n return Data(data={\"content\": content})\n\n except (ValueError, TypeError) as e:\n await logger.aerror(f\"Error in structured output processing: {e}\")\n return Data(data={\"content\": content, \"error\": str(e)})\n\n async def get_memory_data(self):\n # TODO: This is a temporary fix to avoid message duplication. We should develop a function for this.\n messages = (\n await MemoryComponent(**self.get_base_args())\n .set(session_id=self.graph.session_id, order=\"Ascending\", n_messages=self.n_messages)\n .retrieve_messages()\n )\n return [\n message for message in messages if getattr(message, \"id\", None) != getattr(self.input_value, \"id\", None)\n ]\n\n async def get_llm(self):\n if not isinstance(self.agent_llm, str):\n return self.agent_llm, None\n\n try:\n provider_info = MODEL_PROVIDERS_DICT.get(self.agent_llm)\n if not provider_info:\n msg = f\"Invalid model provider: {self.agent_llm}\"\n raise ValueError(msg)\n\n component_class = provider_info.get(\"component_class\")\n display_name = component_class.display_name\n inputs = provider_info.get(\"inputs\")\n prefix = provider_info.get(\"prefix\", \"\")\n\n return self._build_llm_model(component_class, inputs, prefix), display_name\n\n except (AttributeError, ValueError, TypeError, RuntimeError) as e:\n await logger.aerror(f\"Error building {self.agent_llm} language model: {e!s}\")\n msg = f\"Failed to initialize language model: {e!s}\"\n raise ValueError(msg) from e\n\n def _build_llm_model(self, component, inputs, prefix=\"\"):\n model_kwargs = {}\n for input_ in inputs:\n if hasattr(self, f\"{prefix}{input_.name}\"):\n model_kwargs[input_.name] = getattr(self, f\"{prefix}{input_.name}\")\n return component.set(**model_kwargs).build_model()\n\n def set_component_params(self, component):\n provider_info = MODEL_PROVIDERS_DICT.get(self.agent_llm)\n if provider_info:\n inputs = provider_info.get(\"inputs\")\n prefix = provider_info.get(\"prefix\")\n # Filter out json_mode and only use attributes that exist on this component\n model_kwargs = {}\n for input_ in inputs:\n if hasattr(self, f\"{prefix}{input_.name}\"):\n model_kwargs[input_.name] = getattr(self, f\"{prefix}{input_.name}\")\n\n return component.set(**model_kwargs)\n return component\n\n def delete_fields(self, build_config: dotdict, fields: dict | list[str]) -> None:\n \"\"\"Delete specified fields from build_config.\"\"\"\n for field in fields:\n build_config.pop(field, None)\n\n def update_input_types(self, build_config: dotdict) -> dotdict:\n \"\"\"Update input types for all fields in build_config.\"\"\"\n for key, value in build_config.items():\n if isinstance(value, dict):\n if value.get(\"input_types\") is None:\n build_config[key][\"input_types\"] = []\n elif hasattr(value, \"input_types\") and value.input_types is None:\n value.input_types = []\n return build_config\n\n async def update_build_config(\n self, build_config: dotdict, field_value: str, field_name: str | None = None\n ) -> dotdict:\n # Iterate over all providers in the MODEL_PROVIDERS_DICT\n # Existing logic for updating build_config\n if field_name in (\"agent_llm\",):\n build_config[\"agent_llm\"][\"value\"] = field_value\n provider_info = MODEL_PROVIDERS_DICT.get(field_value)\n if provider_info:\n component_class = provider_info.get(\"component_class\")\n if component_class and hasattr(component_class, \"update_build_config\"):\n # Call the component class's update_build_config method\n build_config = await update_component_build_config(\n component_class, build_config, field_value, \"model_name\"\n )\n\n provider_configs: dict[str, tuple[dict, list[dict]]] = {\n provider: (\n MODEL_PROVIDERS_DICT[provider][\"fields\"],\n [\n MODEL_PROVIDERS_DICT[other_provider][\"fields\"]\n for other_provider in MODEL_PROVIDERS_DICT\n if other_provider != provider\n ],\n )\n for provider in MODEL_PROVIDERS_DICT\n }\n if field_value in provider_configs:\n fields_to_add, fields_to_delete = provider_configs[field_value]\n\n # Delete fields from other providers\n for fields in fields_to_delete:\n self.delete_fields(build_config, fields)\n\n # Add provider-specific fields\n if field_value == \"OpenAI\" and not any(field in build_config for field in fields_to_add):\n build_config.update(fields_to_add)\n else:\n build_config.update(fields_to_add)\n # Reset input types for agent_llm\n build_config[\"agent_llm\"][\"input_types\"] = []\n build_config[\"agent_llm\"][\"display_name\"] = \"Model Provider\"\n elif field_value == \"Custom\":\n # Delete all provider fields\n self.delete_fields(build_config, ALL_PROVIDER_FIELDS)\n # Update with custom component\n custom_component = DropdownInput(\n name=\"agent_llm\",\n display_name=\"Language Model\",\n options=[*sorted(MODEL_PROVIDERS), \"Custom\"],\n value=\"Custom\",\n real_time_refresh=True,\n input_types=[\"LanguageModel\"],\n options_metadata=[MODELS_METADATA[key] for key in sorted(MODELS_METADATA.keys())]\n + [{\"icon\": \"brain\"}],\n )\n build_config.update({\"agent_llm\": custom_component.to_dict()})\n # Update input types for all fields\n build_config = self.update_input_types(build_config)\n\n # Validate required keys\n default_keys = [\n \"code\",\n \"_type\",\n \"agent_llm\",\n \"tools\",\n \"input_value\",\n \"add_current_date_tool\",\n \"system_prompt\",\n \"agent_description\",\n \"max_iterations\",\n \"handle_parsing_errors\",\n \"verbose\",\n ]\n missing_keys = [key for key in default_keys if key not in build_config]\n if missing_keys:\n msg = f\"Missing required keys in build_config: {missing_keys}\"\n raise ValueError(msg)\n if (\n isinstance(self.agent_llm, str)\n and self.agent_llm in MODEL_PROVIDERS_DICT\n and field_name in MODEL_DYNAMIC_UPDATE_FIELDS\n ):\n provider_info = MODEL_PROVIDERS_DICT.get(self.agent_llm)\n if provider_info:\n component_class = provider_info.get(\"component_class\")\n component_class = self.set_component_params(component_class)\n prefix = provider_info.get(\"prefix\")\n if component_class and hasattr(component_class, \"update_build_config\"):\n # Call each component class's update_build_config method\n # remove the prefix from the field_name\n if isinstance(field_name, str) and isinstance(prefix, str):\n field_name = field_name.replace(prefix, \"\")\n build_config = await update_component_build_config(\n component_class, build_config, field_value, \"model_name\"\n )\n return dotdict({k: v.to_dict() if hasattr(v, \"to_dict\") else v for k, v in build_config.items()})\n\n async def _get_tools(self) -> list[Tool]:\n component_toolkit = _get_component_toolkit()\n tools_names = self._build_tools_names()\n agent_description = self.get_tool_description()\n # TODO: Agent Description Depreciated Feature to be removed\n description = f\"{agent_description}{tools_names}\"\n tools = component_toolkit(component=self).get_tools(\n tool_name=\"Call_Agent\", tool_description=description, callbacks=self.get_langchain_callbacks()\n )\n if hasattr(self, \"tools_metadata\"):\n tools = component_toolkit(component=self, metadata=self.tools_metadata).update_tools_metadata(tools=tools)\n return tools\n"
},
"handle_parsing_errors": {
"_input_type": "BoolInput",
diff --git a/src/backend/base/langflow/initial_setup/starter_projects/Price Deal Finder.json b/src/backend/base/langflow/initial_setup/starter_projects/Price Deal Finder.json
index 85d8ea479..ec7915810 100644
--- a/src/backend/base/langflow/initial_setup/starter_projects/Price Deal Finder.json
+++ b/src/backend/base/langflow/initial_setup/starter_projects/Price Deal Finder.json
@@ -1841,7 +1841,7 @@
"show": true,
"title_case": false,
"type": "code",
- "value": "import json\nimport re\n\nfrom langchain_core.tools import StructuredTool\nfrom pydantic import ValidationError\n\nfrom langflow.base.agents.agent import LCToolsAgentComponent\nfrom langflow.base.agents.events import ExceptionWithMessageError\nfrom langflow.base.models.model_input_constants import (\n ALL_PROVIDER_FIELDS,\n MODEL_DYNAMIC_UPDATE_FIELDS,\n MODEL_PROVIDERS,\n MODEL_PROVIDERS_DICT,\n MODELS_METADATA,\n)\nfrom langflow.base.models.model_utils import get_model_name\nfrom langflow.components.helpers.current_date import CurrentDateComponent\nfrom langflow.components.helpers.memory import MemoryComponent\nfrom langflow.components.langchain_utilities.tool_calling import ToolCallingAgentComponent\nfrom langflow.custom.custom_component.component import _get_component_toolkit\nfrom langflow.custom.utils import update_component_build_config\nfrom langflow.field_typing import Tool\nfrom langflow.helpers.base_model import build_model_from_schema\nfrom langflow.io import BoolInput, DropdownInput, IntInput, MultilineInput, Output, TableInput\nfrom langflow.logging import logger\nfrom langflow.schema.data import Data\nfrom langflow.schema.dotdict import dotdict\nfrom langflow.schema.message import Message\nfrom langflow.schema.table import EditMode\n\n\ndef set_advanced_true(component_input):\n component_input.advanced = True\n return component_input\n\n\nMODEL_PROVIDERS_LIST = [\"Anthropic\", \"Google Generative AI\", \"Groq\", \"OpenAI\"]\n\n\nclass AgentComponent(ToolCallingAgentComponent):\n display_name: str = \"Agent\"\n description: str = \"Define the agent's instructions, then enter a task to complete using tools.\"\n documentation: str = \"https://docs.langflow.org/agents\"\n icon = \"bot\"\n beta = False\n name = \"Agent\"\n\n memory_inputs = [set_advanced_true(component_input) for component_input in MemoryComponent().inputs]\n\n # Filter out json_mode from OpenAI inputs since we handle structured output differently\n openai_inputs_filtered = [\n input_field\n for input_field in MODEL_PROVIDERS_DICT[\"OpenAI\"][\"inputs\"]\n if not (hasattr(input_field, \"name\") and input_field.name == \"json_mode\")\n ]\n\n inputs = [\n DropdownInput(\n name=\"agent_llm\",\n display_name=\"Model Provider\",\n info=\"The provider of the language model that the agent will use to generate responses.\",\n options=[*MODEL_PROVIDERS_LIST, \"Custom\"],\n value=\"OpenAI\",\n real_time_refresh=True,\n input_types=[],\n options_metadata=[MODELS_METADATA[key] for key in MODEL_PROVIDERS_LIST] + [{\"icon\": \"brain\"}],\n ),\n *openai_inputs_filtered,\n MultilineInput(\n name=\"system_prompt\",\n display_name=\"Agent Instructions\",\n info=\"System Prompt: Initial instructions and context provided to guide the agent's behavior.\",\n value=\"You are a helpful assistant that can use tools to answer questions and perform tasks.\",\n advanced=False,\n ),\n IntInput(\n name=\"n_messages\",\n display_name=\"Number of Chat History Messages\",\n value=100,\n info=\"Number of chat history messages to retrieve.\",\n advanced=True,\n show=True,\n ),\n MultilineInput(\n name=\"format_instructions\",\n display_name=\"Output Format Instructions\",\n info=\"Generic Template for structured output formatting. Valid only with Structured response.\",\n value=(\n \"You are an AI that extracts structured JSON objects from unstructured text. \"\n \"Use a predefined schema with expected types (str, int, float, bool, dict). \"\n \"Extract ALL relevant instances that match the schema - if multiple patterns exist, capture them all. \"\n \"Fill missing or ambiguous values with defaults: null for missing values. \"\n \"Remove exact duplicates but keep variations that have different field values. \"\n \"Always return valid JSON in the expected format, never throw errors. \"\n \"If multiple objects can be extracted, return them all in the structured format.\"\n ),\n advanced=True,\n ),\n TableInput(\n name=\"output_schema\",\n display_name=\"Output Schema\",\n info=(\n \"Schema Validation: Define the structure and data types for structured output. \"\n \"No validation if no output schema.\"\n ),\n advanced=True,\n required=False,\n value=[],\n table_schema=[\n {\n \"name\": \"name\",\n \"display_name\": \"Name\",\n \"type\": \"str\",\n \"description\": \"Specify the name of the output field.\",\n \"default\": \"field\",\n \"edit_mode\": EditMode.INLINE,\n },\n {\n \"name\": \"description\",\n \"display_name\": \"Description\",\n \"type\": \"str\",\n \"description\": \"Describe the purpose of the output field.\",\n \"default\": \"description of field\",\n \"edit_mode\": EditMode.POPOVER,\n },\n {\n \"name\": \"type\",\n \"display_name\": \"Type\",\n \"type\": \"str\",\n \"edit_mode\": EditMode.INLINE,\n \"description\": (\"Indicate the data type of the output field (e.g., str, int, float, bool, dict).\"),\n \"options\": [\"str\", \"int\", \"float\", \"bool\", \"dict\"],\n \"default\": \"str\",\n },\n {\n \"name\": \"multiple\",\n \"display_name\": \"As List\",\n \"type\": \"boolean\",\n \"description\": \"Set to True if this output field should be a list of the specified type.\",\n \"default\": \"False\",\n \"edit_mode\": EditMode.INLINE,\n },\n ],\n ),\n *LCToolsAgentComponent._base_inputs,\n # removed memory inputs from agent component\n # *memory_inputs,\n BoolInput(\n name=\"add_current_date_tool\",\n display_name=\"Current Date\",\n advanced=True,\n info=\"If true, will add a tool to the agent that returns the current date.\",\n value=True,\n ),\n ]\n outputs = [\n Output(name=\"response\", display_name=\"Response\", method=\"message_response\"),\n Output(name=\"structured_response\", display_name=\"Structured Response\", method=\"json_response\", tool_mode=False),\n ]\n\n async def get_agent_requirements(self):\n \"\"\"Get the agent requirements for the agent.\"\"\"\n llm_model, display_name = await self.get_llm()\n if llm_model is None:\n msg = \"No language model selected. Please choose a model to proceed.\"\n raise ValueError(msg)\n self.model_name = get_model_name(llm_model, display_name=display_name)\n\n # Get memory data\n self.chat_history = await self.get_memory_data()\n if isinstance(self.chat_history, Message):\n self.chat_history = [self.chat_history]\n\n # Add current date tool if enabled\n if self.add_current_date_tool:\n if not isinstance(self.tools, list): # type: ignore[has-type]\n self.tools = []\n current_date_tool = (await CurrentDateComponent(**self.get_base_args()).to_toolkit()).pop(0)\n if not isinstance(current_date_tool, StructuredTool):\n msg = \"CurrentDateComponent must be converted to a StructuredTool\"\n raise TypeError(msg)\n self.tools.append(current_date_tool)\n return llm_model, self.chat_history, self.tools\n\n async def message_response(self) -> Message:\n try:\n llm_model, self.chat_history, self.tools = await self.get_agent_requirements()\n # Set up and run agent\n self.set(\n llm=llm_model,\n tools=self.tools or [],\n chat_history=self.chat_history,\n input_value=self.input_value,\n system_prompt=self.system_prompt,\n )\n agent = self.create_agent_runnable()\n result = await self.run_agent(agent)\n\n # Store result for potential JSON output\n self._agent_result = result\n\n except (ValueError, TypeError, KeyError) as e:\n await logger.aerror(f\"{type(e).__name__}: {e!s}\")\n raise\n except ExceptionWithMessageError as e:\n await logger.aerror(f\"ExceptionWithMessageError occurred: {e}\")\n raise\n # Avoid catching blind Exception; let truly unexpected exceptions propagate\n except Exception as e:\n await logger.aerror(f\"Unexpected error: {e!s}\")\n raise\n else:\n return result\n\n def _preprocess_schema(self, schema):\n \"\"\"Preprocess schema to ensure correct data types for build_model_from_schema.\"\"\"\n processed_schema = []\n for field in schema:\n processed_field = {\n \"name\": str(field.get(\"name\", \"field\")),\n \"type\": str(field.get(\"type\", \"str\")),\n \"description\": str(field.get(\"description\", \"\")),\n \"multiple\": field.get(\"multiple\", False),\n }\n # Ensure multiple is handled correctly\n if isinstance(processed_field[\"multiple\"], str):\n processed_field[\"multiple\"] = processed_field[\"multiple\"].lower() in [\"true\", \"1\", \"t\", \"y\", \"yes\"]\n processed_schema.append(processed_field)\n return processed_schema\n\n async def build_structured_output_base(self, content: str):\n \"\"\"Build structured output with optional BaseModel validation.\"\"\"\n json_pattern = r\"\\{.*\\}\"\n schema_error_msg = \"Try setting an output schema\"\n\n # Try to parse content as JSON first\n json_data = None\n try:\n json_data = json.loads(content)\n except json.JSONDecodeError:\n json_match = re.search(json_pattern, content, re.DOTALL)\n if json_match:\n try:\n json_data = json.loads(json_match.group())\n except json.JSONDecodeError:\n return {\"content\": content, \"error\": schema_error_msg}\n else:\n return {\"content\": content, \"error\": schema_error_msg}\n\n # If no output schema provided, return parsed JSON without validation\n if not hasattr(self, \"output_schema\") or not self.output_schema or len(self.output_schema) == 0:\n return json_data\n\n # Use BaseModel validation with schema\n try:\n processed_schema = self._preprocess_schema(self.output_schema)\n output_model = build_model_from_schema(processed_schema)\n\n # Validate against the schema\n if isinstance(json_data, list):\n # Multiple objects\n validated_objects = []\n for item in json_data:\n try:\n validated_obj = output_model.model_validate(item)\n validated_objects.append(validated_obj.model_dump())\n except ValidationError as e:\n await logger.aerror(f\"Validation error for item: {e}\")\n # Include invalid items with error info\n validated_objects.append({\"data\": item, \"validation_error\": str(e)})\n return validated_objects\n\n # Single object\n try:\n validated_obj = output_model.model_validate(json_data)\n return [validated_obj.model_dump()] # Return as list for consistency\n except ValidationError as e:\n await logger.aerror(f\"Validation error: {e}\")\n return [{\"data\": json_data, \"validation_error\": str(e)}]\n\n except (TypeError, ValueError) as e:\n await logger.aerror(f\"Error building structured output: {e}\")\n # Fallback to parsed JSON without validation\n return json_data\n\n async def json_response(self) -> Data:\n \"\"\"Convert agent response to structured JSON Data output with schema validation.\"\"\"\n # Always use structured chat agent for JSON response mode for better JSON formatting\n try:\n system_components = []\n\n # 1. Agent Instructions (system_prompt)\n agent_instructions = getattr(self, \"system_prompt\", \"\") or \"\"\n if agent_instructions:\n system_components.append(f\"{agent_instructions}\")\n\n # 2. Format Instructions\n format_instructions = getattr(self, \"format_instructions\", \"\") or \"\"\n if format_instructions:\n system_components.append(f\"Format instructions: {format_instructions}\")\n\n # 3. Schema Information from BaseModel\n if hasattr(self, \"output_schema\") and self.output_schema and len(self.output_schema) > 0:\n try:\n processed_schema = self._preprocess_schema(self.output_schema)\n output_model = build_model_from_schema(processed_schema)\n schema_dict = output_model.model_json_schema()\n schema_info = (\n \"You are given some text that may include format instructions, \"\n \"explanations, or other content alongside a JSON schema.\\n\\n\"\n \"Your task:\\n\"\n \"- Extract only the JSON schema.\\n\"\n \"- Return it as valid JSON.\\n\"\n \"- Do not include format instructions, explanations, or extra text.\\n\\n\"\n \"Input:\\n\"\n f\"{json.dumps(schema_dict, indent=2)}\\n\\n\"\n \"Output (only JSON schema):\"\n )\n system_components.append(schema_info)\n except (ValidationError, ValueError, TypeError, KeyError) as e:\n await logger.aerror(f\"Could not build schema for prompt: {e}\", exc_info=True)\n\n # Combine all components\n combined_instructions = \"\\n\\n\".join(system_components) if system_components else \"\"\n llm_model, self.chat_history, self.tools = await self.get_agent_requirements()\n self.set(\n llm=llm_model,\n tools=self.tools or [],\n chat_history=self.chat_history,\n input_value=self.input_value,\n system_prompt=combined_instructions,\n )\n\n # Create and run structured chat agent\n try:\n structured_agent = self.create_agent_runnable()\n except (NotImplementedError, ValueError, TypeError) as e:\n await logger.aerror(f\"Error with structured chat agent: {e}\")\n raise\n try:\n result = await self.run_agent(structured_agent)\n except (ExceptionWithMessageError, ValueError, TypeError, RuntimeError) as e:\n await logger.aerror(f\"Error with structured agent result: {e}\")\n raise\n # Extract content from structured agent result\n if hasattr(result, \"content\"):\n content = result.content\n elif hasattr(result, \"text\"):\n content = result.text\n else:\n content = str(result)\n\n except (ExceptionWithMessageError, ValueError, TypeError, NotImplementedError, AttributeError) as e:\n await logger.aerror(f\"Error with structured chat agent: {e}\")\n # Fallback to regular agent\n content_str = \"No content returned from agent\"\n return Data(data={\"content\": content_str, \"error\": str(e)})\n\n # Process with structured output validation\n try:\n structured_output = await self.build_structured_output_base(content)\n\n # Handle different output formats\n if isinstance(structured_output, list) and structured_output:\n if len(structured_output) == 1:\n return Data(data=structured_output[0])\n return Data(data={\"results\": structured_output})\n if isinstance(structured_output, dict):\n return Data(data=structured_output)\n return Data(data={\"content\": content})\n\n except (ValueError, TypeError) as e:\n await logger.aerror(f\"Error in structured output processing: {e}\")\n return Data(data={\"content\": content, \"error\": str(e)})\n\n async def get_memory_data(self):\n # TODO: This is a temporary fix to avoid message duplication. We should develop a function for this.\n messages = (\n await MemoryComponent(**self.get_base_args())\n .set(session_id=self.graph.session_id, order=\"Ascending\", n_messages=self.n_messages)\n .retrieve_messages()\n )\n return [\n message for message in messages if getattr(message, \"id\", None) != getattr(self.input_value, \"id\", None)\n ]\n\n async def get_llm(self):\n if not isinstance(self.agent_llm, str):\n return self.agent_llm, None\n\n try:\n provider_info = MODEL_PROVIDERS_DICT.get(self.agent_llm)\n if not provider_info:\n msg = f\"Invalid model provider: {self.agent_llm}\"\n raise ValueError(msg)\n\n component_class = provider_info.get(\"component_class\")\n display_name = component_class.display_name\n inputs = provider_info.get(\"inputs\")\n prefix = provider_info.get(\"prefix\", \"\")\n\n return self._build_llm_model(component_class, inputs, prefix), display_name\n\n except (AttributeError, ValueError, TypeError, RuntimeError) as e:\n await logger.aerror(f\"Error building {self.agent_llm} language model: {e!s}\")\n msg = f\"Failed to initialize language model: {e!s}\"\n raise ValueError(msg) from e\n\n def _build_llm_model(self, component, inputs, prefix=\"\"):\n model_kwargs = {}\n for input_ in inputs:\n if hasattr(self, f\"{prefix}{input_.name}\"):\n model_kwargs[input_.name] = getattr(self, f\"{prefix}{input_.name}\")\n return component.set(**model_kwargs).build_model()\n\n def set_component_params(self, component):\n provider_info = MODEL_PROVIDERS_DICT.get(self.agent_llm)\n if provider_info:\n inputs = provider_info.get(\"inputs\")\n prefix = provider_info.get(\"prefix\")\n # Filter out json_mode and only use attributes that exist on this component\n model_kwargs = {}\n for input_ in inputs:\n if hasattr(self, f\"{prefix}{input_.name}\"):\n model_kwargs[input_.name] = getattr(self, f\"{prefix}{input_.name}\")\n\n return component.set(**model_kwargs)\n return component\n\n def delete_fields(self, build_config: dotdict, fields: dict | list[str]) -> None:\n \"\"\"Delete specified fields from build_config.\"\"\"\n for field in fields:\n build_config.pop(field, None)\n\n def update_input_types(self, build_config: dotdict) -> dotdict:\n \"\"\"Update input types for all fields in build_config.\"\"\"\n for key, value in build_config.items():\n if isinstance(value, dict):\n if value.get(\"input_types\") is None:\n build_config[key][\"input_types\"] = []\n elif hasattr(value, \"input_types\") and value.input_types is None:\n value.input_types = []\n return build_config\n\n async def update_build_config(\n self, build_config: dotdict, field_value: str, field_name: str | None = None\n ) -> dotdict:\n # Iterate over all providers in the MODEL_PROVIDERS_DICT\n # Existing logic for updating build_config\n if field_name in (\"agent_llm\",):\n build_config[\"agent_llm\"][\"value\"] = field_value\n provider_info = MODEL_PROVIDERS_DICT.get(field_value)\n if provider_info:\n component_class = provider_info.get(\"component_class\")\n if component_class and hasattr(component_class, \"update_build_config\"):\n # Call the component class's update_build_config method\n build_config = await update_component_build_config(\n component_class, build_config, field_value, \"model_name\"\n )\n\n provider_configs: dict[str, tuple[dict, list[dict]]] = {\n provider: (\n MODEL_PROVIDERS_DICT[provider][\"fields\"],\n [\n MODEL_PROVIDERS_DICT[other_provider][\"fields\"]\n for other_provider in MODEL_PROVIDERS_DICT\n if other_provider != provider\n ],\n )\n for provider in MODEL_PROVIDERS_DICT\n }\n if field_value in provider_configs:\n fields_to_add, fields_to_delete = provider_configs[field_value]\n\n # Delete fields from other providers\n for fields in fields_to_delete:\n self.delete_fields(build_config, fields)\n\n # Add provider-specific fields\n if field_value == \"OpenAI\" and not any(field in build_config for field in fields_to_add):\n build_config.update(fields_to_add)\n else:\n build_config.update(fields_to_add)\n # Reset input types for agent_llm\n build_config[\"agent_llm\"][\"input_types\"] = []\n elif field_value == \"Custom\":\n # Delete all provider fields\n self.delete_fields(build_config, ALL_PROVIDER_FIELDS)\n # Update with custom component\n custom_component = DropdownInput(\n name=\"agent_llm\",\n display_name=\"Language Model\",\n options=[*sorted(MODEL_PROVIDERS), \"Custom\"],\n value=\"Custom\",\n real_time_refresh=True,\n input_types=[\"LanguageModel\"],\n options_metadata=[MODELS_METADATA[key] for key in sorted(MODELS_METADATA.keys())]\n + [{\"icon\": \"brain\"}],\n )\n build_config.update({\"agent_llm\": custom_component.to_dict()})\n # Update input types for all fields\n build_config = self.update_input_types(build_config)\n\n # Validate required keys\n default_keys = [\n \"code\",\n \"_type\",\n \"agent_llm\",\n \"tools\",\n \"input_value\",\n \"add_current_date_tool\",\n \"system_prompt\",\n \"agent_description\",\n \"max_iterations\",\n \"handle_parsing_errors\",\n \"verbose\",\n ]\n missing_keys = [key for key in default_keys if key not in build_config]\n if missing_keys:\n msg = f\"Missing required keys in build_config: {missing_keys}\"\n raise ValueError(msg)\n if (\n isinstance(self.agent_llm, str)\n and self.agent_llm in MODEL_PROVIDERS_DICT\n and field_name in MODEL_DYNAMIC_UPDATE_FIELDS\n ):\n provider_info = MODEL_PROVIDERS_DICT.get(self.agent_llm)\n if provider_info:\n component_class = provider_info.get(\"component_class\")\n component_class = self.set_component_params(component_class)\n prefix = provider_info.get(\"prefix\")\n if component_class and hasattr(component_class, \"update_build_config\"):\n # Call each component class's update_build_config method\n # remove the prefix from the field_name\n if isinstance(field_name, str) and isinstance(prefix, str):\n field_name = field_name.replace(prefix, \"\")\n build_config = await update_component_build_config(\n component_class, build_config, field_value, \"model_name\"\n )\n return dotdict({k: v.to_dict() if hasattr(v, \"to_dict\") else v for k, v in build_config.items()})\n\n async def _get_tools(self) -> list[Tool]:\n component_toolkit = _get_component_toolkit()\n tools_names = self._build_tools_names()\n agent_description = self.get_tool_description()\n # TODO: Agent Description Depreciated Feature to be removed\n description = f\"{agent_description}{tools_names}\"\n tools = component_toolkit(component=self).get_tools(\n tool_name=\"Call_Agent\", tool_description=description, callbacks=self.get_langchain_callbacks()\n )\n if hasattr(self, \"tools_metadata\"):\n tools = component_toolkit(component=self, metadata=self.tools_metadata).update_tools_metadata(tools=tools)\n return tools\n"
+ "value": "import json\nimport re\n\nfrom langchain_core.tools import StructuredTool\nfrom pydantic import ValidationError\n\nfrom langflow.base.agents.agent import LCToolsAgentComponent\nfrom langflow.base.agents.events import ExceptionWithMessageError\nfrom langflow.base.models.model_input_constants import (\n ALL_PROVIDER_FIELDS,\n MODEL_DYNAMIC_UPDATE_FIELDS,\n MODEL_PROVIDERS,\n MODEL_PROVIDERS_DICT,\n MODELS_METADATA,\n)\nfrom langflow.base.models.model_utils import get_model_name\nfrom langflow.components.helpers.current_date import CurrentDateComponent\nfrom langflow.components.helpers.memory import MemoryComponent\nfrom langflow.components.langchain_utilities.tool_calling import ToolCallingAgentComponent\nfrom langflow.custom.custom_component.component import _get_component_toolkit\nfrom langflow.custom.utils import update_component_build_config\nfrom langflow.field_typing import Tool\nfrom langflow.helpers.base_model import build_model_from_schema\nfrom langflow.io import BoolInput, DropdownInput, IntInput, MultilineInput, Output, TableInput\nfrom langflow.logging import logger\nfrom langflow.schema.data import Data\nfrom langflow.schema.dotdict import dotdict\nfrom langflow.schema.message import Message\nfrom langflow.schema.table import EditMode\n\n\ndef set_advanced_true(component_input):\n component_input.advanced = True\n return component_input\n\n\nMODEL_PROVIDERS_LIST = [\"Anthropic\", \"Google Generative AI\", \"Groq\", \"OpenAI\"]\n\n\nclass AgentComponent(ToolCallingAgentComponent):\n display_name: str = \"Agent\"\n description: str = \"Define the agent's instructions, then enter a task to complete using tools.\"\n documentation: str = \"https://docs.langflow.org/agents\"\n icon = \"bot\"\n beta = False\n name = \"Agent\"\n\n memory_inputs = [set_advanced_true(component_input) for component_input in MemoryComponent().inputs]\n\n # Filter out json_mode from OpenAI inputs since we handle structured output differently\n openai_inputs_filtered = [\n input_field\n for input_field in MODEL_PROVIDERS_DICT[\"OpenAI\"][\"inputs\"]\n if not (hasattr(input_field, \"name\") and input_field.name == \"json_mode\")\n ]\n\n inputs = [\n DropdownInput(\n name=\"agent_llm\",\n display_name=\"Model Provider\",\n info=\"The provider of the language model that the agent will use to generate responses.\",\n options=[*MODEL_PROVIDERS_LIST, \"Custom\"],\n value=\"OpenAI\",\n real_time_refresh=True,\n input_types=[],\n options_metadata=[MODELS_METADATA[key] for key in MODEL_PROVIDERS_LIST] + [{\"icon\": \"brain\"}],\n ),\n *openai_inputs_filtered,\n MultilineInput(\n name=\"system_prompt\",\n display_name=\"Agent Instructions\",\n info=\"System Prompt: Initial instructions and context provided to guide the agent's behavior.\",\n value=\"You are a helpful assistant that can use tools to answer questions and perform tasks.\",\n advanced=False,\n ),\n IntInput(\n name=\"n_messages\",\n display_name=\"Number of Chat History Messages\",\n value=100,\n info=\"Number of chat history messages to retrieve.\",\n advanced=True,\n show=True,\n ),\n MultilineInput(\n name=\"format_instructions\",\n display_name=\"Output Format Instructions\",\n info=\"Generic Template for structured output formatting. Valid only with Structured response.\",\n value=(\n \"You are an AI that extracts structured JSON objects from unstructured text. \"\n \"Use a predefined schema with expected types (str, int, float, bool, dict). \"\n \"Extract ALL relevant instances that match the schema - if multiple patterns exist, capture them all. \"\n \"Fill missing or ambiguous values with defaults: null for missing values. \"\n \"Remove exact duplicates but keep variations that have different field values. \"\n \"Always return valid JSON in the expected format, never throw errors. \"\n \"If multiple objects can be extracted, return them all in the structured format.\"\n ),\n advanced=True,\n ),\n TableInput(\n name=\"output_schema\",\n display_name=\"Output Schema\",\n info=(\n \"Schema Validation: Define the structure and data types for structured output. \"\n \"No validation if no output schema.\"\n ),\n advanced=True,\n required=False,\n value=[],\n table_schema=[\n {\n \"name\": \"name\",\n \"display_name\": \"Name\",\n \"type\": \"str\",\n \"description\": \"Specify the name of the output field.\",\n \"default\": \"field\",\n \"edit_mode\": EditMode.INLINE,\n },\n {\n \"name\": \"description\",\n \"display_name\": \"Description\",\n \"type\": \"str\",\n \"description\": \"Describe the purpose of the output field.\",\n \"default\": \"description of field\",\n \"edit_mode\": EditMode.POPOVER,\n },\n {\n \"name\": \"type\",\n \"display_name\": \"Type\",\n \"type\": \"str\",\n \"edit_mode\": EditMode.INLINE,\n \"description\": (\"Indicate the data type of the output field (e.g., str, int, float, bool, dict).\"),\n \"options\": [\"str\", \"int\", \"float\", \"bool\", \"dict\"],\n \"default\": \"str\",\n },\n {\n \"name\": \"multiple\",\n \"display_name\": \"As List\",\n \"type\": \"boolean\",\n \"description\": \"Set to True if this output field should be a list of the specified type.\",\n \"default\": \"False\",\n \"edit_mode\": EditMode.INLINE,\n },\n ],\n ),\n *LCToolsAgentComponent._base_inputs,\n # removed memory inputs from agent component\n # *memory_inputs,\n BoolInput(\n name=\"add_current_date_tool\",\n display_name=\"Current Date\",\n advanced=True,\n info=\"If true, will add a tool to the agent that returns the current date.\",\n value=True,\n ),\n ]\n outputs = [\n Output(name=\"response\", display_name=\"Response\", method=\"message_response\"),\n Output(name=\"structured_response\", display_name=\"Structured Response\", method=\"json_response\", tool_mode=False),\n ]\n\n async def get_agent_requirements(self):\n \"\"\"Get the agent requirements for the agent.\"\"\"\n llm_model, display_name = await self.get_llm()\n if llm_model is None:\n msg = \"No language model selected. Please choose a model to proceed.\"\n raise ValueError(msg)\n self.model_name = get_model_name(llm_model, display_name=display_name)\n\n # Get memory data\n self.chat_history = await self.get_memory_data()\n if isinstance(self.chat_history, Message):\n self.chat_history = [self.chat_history]\n\n # Add current date tool if enabled\n if self.add_current_date_tool:\n if not isinstance(self.tools, list): # type: ignore[has-type]\n self.tools = []\n current_date_tool = (await CurrentDateComponent(**self.get_base_args()).to_toolkit()).pop(0)\n if not isinstance(current_date_tool, StructuredTool):\n msg = \"CurrentDateComponent must be converted to a StructuredTool\"\n raise TypeError(msg)\n self.tools.append(current_date_tool)\n return llm_model, self.chat_history, self.tools\n\n async def message_response(self) -> Message:\n try:\n llm_model, self.chat_history, self.tools = await self.get_agent_requirements()\n # Set up and run agent\n self.set(\n llm=llm_model,\n tools=self.tools or [],\n chat_history=self.chat_history,\n input_value=self.input_value,\n system_prompt=self.system_prompt,\n )\n agent = self.create_agent_runnable()\n result = await self.run_agent(agent)\n\n # Store result for potential JSON output\n self._agent_result = result\n\n except (ValueError, TypeError, KeyError) as e:\n await logger.aerror(f\"{type(e).__name__}: {e!s}\")\n raise\n except ExceptionWithMessageError as e:\n await logger.aerror(f\"ExceptionWithMessageError occurred: {e}\")\n raise\n # Avoid catching blind Exception; let truly unexpected exceptions propagate\n except Exception as e:\n await logger.aerror(f\"Unexpected error: {e!s}\")\n raise\n else:\n return result\n\n def _preprocess_schema(self, schema):\n \"\"\"Preprocess schema to ensure correct data types for build_model_from_schema.\"\"\"\n processed_schema = []\n for field in schema:\n processed_field = {\n \"name\": str(field.get(\"name\", \"field\")),\n \"type\": str(field.get(\"type\", \"str\")),\n \"description\": str(field.get(\"description\", \"\")),\n \"multiple\": field.get(\"multiple\", False),\n }\n # Ensure multiple is handled correctly\n if isinstance(processed_field[\"multiple\"], str):\n processed_field[\"multiple\"] = processed_field[\"multiple\"].lower() in [\"true\", \"1\", \"t\", \"y\", \"yes\"]\n processed_schema.append(processed_field)\n return processed_schema\n\n async def build_structured_output_base(self, content: str):\n \"\"\"Build structured output with optional BaseModel validation.\"\"\"\n json_pattern = r\"\\{.*\\}\"\n schema_error_msg = \"Try setting an output schema\"\n\n # Try to parse content as JSON first\n json_data = None\n try:\n json_data = json.loads(content)\n except json.JSONDecodeError:\n json_match = re.search(json_pattern, content, re.DOTALL)\n if json_match:\n try:\n json_data = json.loads(json_match.group())\n except json.JSONDecodeError:\n return {\"content\": content, \"error\": schema_error_msg}\n else:\n return {\"content\": content, \"error\": schema_error_msg}\n\n # If no output schema provided, return parsed JSON without validation\n if not hasattr(self, \"output_schema\") or not self.output_schema or len(self.output_schema) == 0:\n return json_data\n\n # Use BaseModel validation with schema\n try:\n processed_schema = self._preprocess_schema(self.output_schema)\n output_model = build_model_from_schema(processed_schema)\n\n # Validate against the schema\n if isinstance(json_data, list):\n # Multiple objects\n validated_objects = []\n for item in json_data:\n try:\n validated_obj = output_model.model_validate(item)\n validated_objects.append(validated_obj.model_dump())\n except ValidationError as e:\n await logger.aerror(f\"Validation error for item: {e}\")\n # Include invalid items with error info\n validated_objects.append({\"data\": item, \"validation_error\": str(e)})\n return validated_objects\n\n # Single object\n try:\n validated_obj = output_model.model_validate(json_data)\n return [validated_obj.model_dump()] # Return as list for consistency\n except ValidationError as e:\n await logger.aerror(f\"Validation error: {e}\")\n return [{\"data\": json_data, \"validation_error\": str(e)}]\n\n except (TypeError, ValueError) as e:\n await logger.aerror(f\"Error building structured output: {e}\")\n # Fallback to parsed JSON without validation\n return json_data\n\n async def json_response(self) -> Data:\n \"\"\"Convert agent response to structured JSON Data output with schema validation.\"\"\"\n # Always use structured chat agent for JSON response mode for better JSON formatting\n try:\n system_components = []\n\n # 1. Agent Instructions (system_prompt)\n agent_instructions = getattr(self, \"system_prompt\", \"\") or \"\"\n if agent_instructions:\n system_components.append(f\"{agent_instructions}\")\n\n # 2. Format Instructions\n format_instructions = getattr(self, \"format_instructions\", \"\") or \"\"\n if format_instructions:\n system_components.append(f\"Format instructions: {format_instructions}\")\n\n # 3. Schema Information from BaseModel\n if hasattr(self, \"output_schema\") and self.output_schema and len(self.output_schema) > 0:\n try:\n processed_schema = self._preprocess_schema(self.output_schema)\n output_model = build_model_from_schema(processed_schema)\n schema_dict = output_model.model_json_schema()\n schema_info = (\n \"You are given some text that may include format instructions, \"\n \"explanations, or other content alongside a JSON schema.\\n\\n\"\n \"Your task:\\n\"\n \"- Extract only the JSON schema.\\n\"\n \"- Return it as valid JSON.\\n\"\n \"- Do not include format instructions, explanations, or extra text.\\n\\n\"\n \"Input:\\n\"\n f\"{json.dumps(schema_dict, indent=2)}\\n\\n\"\n \"Output (only JSON schema):\"\n )\n system_components.append(schema_info)\n except (ValidationError, ValueError, TypeError, KeyError) as e:\n await logger.aerror(f\"Could not build schema for prompt: {e}\", exc_info=True)\n\n # Combine all components\n combined_instructions = \"\\n\\n\".join(system_components) if system_components else \"\"\n llm_model, self.chat_history, self.tools = await self.get_agent_requirements()\n self.set(\n llm=llm_model,\n tools=self.tools or [],\n chat_history=self.chat_history,\n input_value=self.input_value,\n system_prompt=combined_instructions,\n )\n\n # Create and run structured chat agent\n try:\n structured_agent = self.create_agent_runnable()\n except (NotImplementedError, ValueError, TypeError) as e:\n await logger.aerror(f\"Error with structured chat agent: {e}\")\n raise\n try:\n result = await self.run_agent(structured_agent)\n except (ExceptionWithMessageError, ValueError, TypeError, RuntimeError) as e:\n await logger.aerror(f\"Error with structured agent result: {e}\")\n raise\n # Extract content from structured agent result\n if hasattr(result, \"content\"):\n content = result.content\n elif hasattr(result, \"text\"):\n content = result.text\n else:\n content = str(result)\n\n except (ExceptionWithMessageError, ValueError, TypeError, NotImplementedError, AttributeError) as e:\n await logger.aerror(f\"Error with structured chat agent: {e}\")\n # Fallback to regular agent\n content_str = \"No content returned from agent\"\n return Data(data={\"content\": content_str, \"error\": str(e)})\n\n # Process with structured output validation\n try:\n structured_output = await self.build_structured_output_base(content)\n\n # Handle different output formats\n if isinstance(structured_output, list) and structured_output:\n if len(structured_output) == 1:\n return Data(data=structured_output[0])\n return Data(data={\"results\": structured_output})\n if isinstance(structured_output, dict):\n return Data(data=structured_output)\n return Data(data={\"content\": content})\n\n except (ValueError, TypeError) as e:\n await logger.aerror(f\"Error in structured output processing: {e}\")\n return Data(data={\"content\": content, \"error\": str(e)})\n\n async def get_memory_data(self):\n # TODO: This is a temporary fix to avoid message duplication. We should develop a function for this.\n messages = (\n await MemoryComponent(**self.get_base_args())\n .set(session_id=self.graph.session_id, order=\"Ascending\", n_messages=self.n_messages)\n .retrieve_messages()\n )\n return [\n message for message in messages if getattr(message, \"id\", None) != getattr(self.input_value, \"id\", None)\n ]\n\n async def get_llm(self):\n if not isinstance(self.agent_llm, str):\n return self.agent_llm, None\n\n try:\n provider_info = MODEL_PROVIDERS_DICT.get(self.agent_llm)\n if not provider_info:\n msg = f\"Invalid model provider: {self.agent_llm}\"\n raise ValueError(msg)\n\n component_class = provider_info.get(\"component_class\")\n display_name = component_class.display_name\n inputs = provider_info.get(\"inputs\")\n prefix = provider_info.get(\"prefix\", \"\")\n\n return self._build_llm_model(component_class, inputs, prefix), display_name\n\n except (AttributeError, ValueError, TypeError, RuntimeError) as e:\n await logger.aerror(f\"Error building {self.agent_llm} language model: {e!s}\")\n msg = f\"Failed to initialize language model: {e!s}\"\n raise ValueError(msg) from e\n\n def _build_llm_model(self, component, inputs, prefix=\"\"):\n model_kwargs = {}\n for input_ in inputs:\n if hasattr(self, f\"{prefix}{input_.name}\"):\n model_kwargs[input_.name] = getattr(self, f\"{prefix}{input_.name}\")\n return component.set(**model_kwargs).build_model()\n\n def set_component_params(self, component):\n provider_info = MODEL_PROVIDERS_DICT.get(self.agent_llm)\n if provider_info:\n inputs = provider_info.get(\"inputs\")\n prefix = provider_info.get(\"prefix\")\n # Filter out json_mode and only use attributes that exist on this component\n model_kwargs = {}\n for input_ in inputs:\n if hasattr(self, f\"{prefix}{input_.name}\"):\n model_kwargs[input_.name] = getattr(self, f\"{prefix}{input_.name}\")\n\n return component.set(**model_kwargs)\n return component\n\n def delete_fields(self, build_config: dotdict, fields: dict | list[str]) -> None:\n \"\"\"Delete specified fields from build_config.\"\"\"\n for field in fields:\n build_config.pop(field, None)\n\n def update_input_types(self, build_config: dotdict) -> dotdict:\n \"\"\"Update input types for all fields in build_config.\"\"\"\n for key, value in build_config.items():\n if isinstance(value, dict):\n if value.get(\"input_types\") is None:\n build_config[key][\"input_types\"] = []\n elif hasattr(value, \"input_types\") and value.input_types is None:\n value.input_types = []\n return build_config\n\n async def update_build_config(\n self, build_config: dotdict, field_value: str, field_name: str | None = None\n ) -> dotdict:\n # Iterate over all providers in the MODEL_PROVIDERS_DICT\n # Existing logic for updating build_config\n if field_name in (\"agent_llm\",):\n build_config[\"agent_llm\"][\"value\"] = field_value\n provider_info = MODEL_PROVIDERS_DICT.get(field_value)\n if provider_info:\n component_class = provider_info.get(\"component_class\")\n if component_class and hasattr(component_class, \"update_build_config\"):\n # Call the component class's update_build_config method\n build_config = await update_component_build_config(\n component_class, build_config, field_value, \"model_name\"\n )\n\n provider_configs: dict[str, tuple[dict, list[dict]]] = {\n provider: (\n MODEL_PROVIDERS_DICT[provider][\"fields\"],\n [\n MODEL_PROVIDERS_DICT[other_provider][\"fields\"]\n for other_provider in MODEL_PROVIDERS_DICT\n if other_provider != provider\n ],\n )\n for provider in MODEL_PROVIDERS_DICT\n }\n if field_value in provider_configs:\n fields_to_add, fields_to_delete = provider_configs[field_value]\n\n # Delete fields from other providers\n for fields in fields_to_delete:\n self.delete_fields(build_config, fields)\n\n # Add provider-specific fields\n if field_value == \"OpenAI\" and not any(field in build_config for field in fields_to_add):\n build_config.update(fields_to_add)\n else:\n build_config.update(fields_to_add)\n # Reset input types for agent_llm\n build_config[\"agent_llm\"][\"input_types\"] = []\n build_config[\"agent_llm\"][\"display_name\"] = \"Model Provider\"\n elif field_value == \"Custom\":\n # Delete all provider fields\n self.delete_fields(build_config, ALL_PROVIDER_FIELDS)\n # Update with custom component\n custom_component = DropdownInput(\n name=\"agent_llm\",\n display_name=\"Language Model\",\n options=[*sorted(MODEL_PROVIDERS), \"Custom\"],\n value=\"Custom\",\n real_time_refresh=True,\n input_types=[\"LanguageModel\"],\n options_metadata=[MODELS_METADATA[key] for key in sorted(MODELS_METADATA.keys())]\n + [{\"icon\": \"brain\"}],\n )\n build_config.update({\"agent_llm\": custom_component.to_dict()})\n # Update input types for all fields\n build_config = self.update_input_types(build_config)\n\n # Validate required keys\n default_keys = [\n \"code\",\n \"_type\",\n \"agent_llm\",\n \"tools\",\n \"input_value\",\n \"add_current_date_tool\",\n \"system_prompt\",\n \"agent_description\",\n \"max_iterations\",\n \"handle_parsing_errors\",\n \"verbose\",\n ]\n missing_keys = [key for key in default_keys if key not in build_config]\n if missing_keys:\n msg = f\"Missing required keys in build_config: {missing_keys}\"\n raise ValueError(msg)\n if (\n isinstance(self.agent_llm, str)\n and self.agent_llm in MODEL_PROVIDERS_DICT\n and field_name in MODEL_DYNAMIC_UPDATE_FIELDS\n ):\n provider_info = MODEL_PROVIDERS_DICT.get(self.agent_llm)\n if provider_info:\n component_class = provider_info.get(\"component_class\")\n component_class = self.set_component_params(component_class)\n prefix = provider_info.get(\"prefix\")\n if component_class and hasattr(component_class, \"update_build_config\"):\n # Call each component class's update_build_config method\n # remove the prefix from the field_name\n if isinstance(field_name, str) and isinstance(prefix, str):\n field_name = field_name.replace(prefix, \"\")\n build_config = await update_component_build_config(\n component_class, build_config, field_value, \"model_name\"\n )\n return dotdict({k: v.to_dict() if hasattr(v, \"to_dict\") else v for k, v in build_config.items()})\n\n async def _get_tools(self) -> list[Tool]:\n component_toolkit = _get_component_toolkit()\n tools_names = self._build_tools_names()\n agent_description = self.get_tool_description()\n # TODO: Agent Description Depreciated Feature to be removed\n description = f\"{agent_description}{tools_names}\"\n tools = component_toolkit(component=self).get_tools(\n tool_name=\"Call_Agent\", tool_description=description, callbacks=self.get_langchain_callbacks()\n )\n if hasattr(self, \"tools_metadata\"):\n tools = component_toolkit(component=self, metadata=self.tools_metadata).update_tools_metadata(tools=tools)\n return tools\n"
},
"handle_parsing_errors": {
"_input_type": "BoolInput",
diff --git a/src/backend/base/langflow/initial_setup/starter_projects/Research Agent.json b/src/backend/base/langflow/initial_setup/starter_projects/Research Agent.json
index e2b9a22bd..d7433ca01 100644
--- a/src/backend/base/langflow/initial_setup/starter_projects/Research Agent.json
+++ b/src/backend/base/langflow/initial_setup/starter_projects/Research Agent.json
@@ -2752,7 +2752,7 @@
"show": true,
"title_case": false,
"type": "code",
- "value": "import json\nimport re\n\nfrom langchain_core.tools import StructuredTool\nfrom pydantic import ValidationError\n\nfrom langflow.base.agents.agent import LCToolsAgentComponent\nfrom langflow.base.agents.events import ExceptionWithMessageError\nfrom langflow.base.models.model_input_constants import (\n ALL_PROVIDER_FIELDS,\n MODEL_DYNAMIC_UPDATE_FIELDS,\n MODEL_PROVIDERS,\n MODEL_PROVIDERS_DICT,\n MODELS_METADATA,\n)\nfrom langflow.base.models.model_utils import get_model_name\nfrom langflow.components.helpers.current_date import CurrentDateComponent\nfrom langflow.components.helpers.memory import MemoryComponent\nfrom langflow.components.langchain_utilities.tool_calling import ToolCallingAgentComponent\nfrom langflow.custom.custom_component.component import _get_component_toolkit\nfrom langflow.custom.utils import update_component_build_config\nfrom langflow.field_typing import Tool\nfrom langflow.helpers.base_model import build_model_from_schema\nfrom langflow.io import BoolInput, DropdownInput, IntInput, MultilineInput, Output, TableInput\nfrom langflow.logging import logger\nfrom langflow.schema.data import Data\nfrom langflow.schema.dotdict import dotdict\nfrom langflow.schema.message import Message\nfrom langflow.schema.table import EditMode\n\n\ndef set_advanced_true(component_input):\n component_input.advanced = True\n return component_input\n\n\nMODEL_PROVIDERS_LIST = [\"Anthropic\", \"Google Generative AI\", \"Groq\", \"OpenAI\"]\n\n\nclass AgentComponent(ToolCallingAgentComponent):\n display_name: str = \"Agent\"\n description: str = \"Define the agent's instructions, then enter a task to complete using tools.\"\n documentation: str = \"https://docs.langflow.org/agents\"\n icon = \"bot\"\n beta = False\n name = \"Agent\"\n\n memory_inputs = [set_advanced_true(component_input) for component_input in MemoryComponent().inputs]\n\n # Filter out json_mode from OpenAI inputs since we handle structured output differently\n openai_inputs_filtered = [\n input_field\n for input_field in MODEL_PROVIDERS_DICT[\"OpenAI\"][\"inputs\"]\n if not (hasattr(input_field, \"name\") and input_field.name == \"json_mode\")\n ]\n\n inputs = [\n DropdownInput(\n name=\"agent_llm\",\n display_name=\"Model Provider\",\n info=\"The provider of the language model that the agent will use to generate responses.\",\n options=[*MODEL_PROVIDERS_LIST, \"Custom\"],\n value=\"OpenAI\",\n real_time_refresh=True,\n input_types=[],\n options_metadata=[MODELS_METADATA[key] for key in MODEL_PROVIDERS_LIST] + [{\"icon\": \"brain\"}],\n ),\n *openai_inputs_filtered,\n MultilineInput(\n name=\"system_prompt\",\n display_name=\"Agent Instructions\",\n info=\"System Prompt: Initial instructions and context provided to guide the agent's behavior.\",\n value=\"You are a helpful assistant that can use tools to answer questions and perform tasks.\",\n advanced=False,\n ),\n IntInput(\n name=\"n_messages\",\n display_name=\"Number of Chat History Messages\",\n value=100,\n info=\"Number of chat history messages to retrieve.\",\n advanced=True,\n show=True,\n ),\n MultilineInput(\n name=\"format_instructions\",\n display_name=\"Output Format Instructions\",\n info=\"Generic Template for structured output formatting. Valid only with Structured response.\",\n value=(\n \"You are an AI that extracts structured JSON objects from unstructured text. \"\n \"Use a predefined schema with expected types (str, int, float, bool, dict). \"\n \"Extract ALL relevant instances that match the schema - if multiple patterns exist, capture them all. \"\n \"Fill missing or ambiguous values with defaults: null for missing values. \"\n \"Remove exact duplicates but keep variations that have different field values. \"\n \"Always return valid JSON in the expected format, never throw errors. \"\n \"If multiple objects can be extracted, return them all in the structured format.\"\n ),\n advanced=True,\n ),\n TableInput(\n name=\"output_schema\",\n display_name=\"Output Schema\",\n info=(\n \"Schema Validation: Define the structure and data types for structured output. \"\n \"No validation if no output schema.\"\n ),\n advanced=True,\n required=False,\n value=[],\n table_schema=[\n {\n \"name\": \"name\",\n \"display_name\": \"Name\",\n \"type\": \"str\",\n \"description\": \"Specify the name of the output field.\",\n \"default\": \"field\",\n \"edit_mode\": EditMode.INLINE,\n },\n {\n \"name\": \"description\",\n \"display_name\": \"Description\",\n \"type\": \"str\",\n \"description\": \"Describe the purpose of the output field.\",\n \"default\": \"description of field\",\n \"edit_mode\": EditMode.POPOVER,\n },\n {\n \"name\": \"type\",\n \"display_name\": \"Type\",\n \"type\": \"str\",\n \"edit_mode\": EditMode.INLINE,\n \"description\": (\"Indicate the data type of the output field (e.g., str, int, float, bool, dict).\"),\n \"options\": [\"str\", \"int\", \"float\", \"bool\", \"dict\"],\n \"default\": \"str\",\n },\n {\n \"name\": \"multiple\",\n \"display_name\": \"As List\",\n \"type\": \"boolean\",\n \"description\": \"Set to True if this output field should be a list of the specified type.\",\n \"default\": \"False\",\n \"edit_mode\": EditMode.INLINE,\n },\n ],\n ),\n *LCToolsAgentComponent._base_inputs,\n # removed memory inputs from agent component\n # *memory_inputs,\n BoolInput(\n name=\"add_current_date_tool\",\n display_name=\"Current Date\",\n advanced=True,\n info=\"If true, will add a tool to the agent that returns the current date.\",\n value=True,\n ),\n ]\n outputs = [\n Output(name=\"response\", display_name=\"Response\", method=\"message_response\"),\n Output(name=\"structured_response\", display_name=\"Structured Response\", method=\"json_response\", tool_mode=False),\n ]\n\n async def get_agent_requirements(self):\n \"\"\"Get the agent requirements for the agent.\"\"\"\n llm_model, display_name = await self.get_llm()\n if llm_model is None:\n msg = \"No language model selected. Please choose a model to proceed.\"\n raise ValueError(msg)\n self.model_name = get_model_name(llm_model, display_name=display_name)\n\n # Get memory data\n self.chat_history = await self.get_memory_data()\n if isinstance(self.chat_history, Message):\n self.chat_history = [self.chat_history]\n\n # Add current date tool if enabled\n if self.add_current_date_tool:\n if not isinstance(self.tools, list): # type: ignore[has-type]\n self.tools = []\n current_date_tool = (await CurrentDateComponent(**self.get_base_args()).to_toolkit()).pop(0)\n if not isinstance(current_date_tool, StructuredTool):\n msg = \"CurrentDateComponent must be converted to a StructuredTool\"\n raise TypeError(msg)\n self.tools.append(current_date_tool)\n return llm_model, self.chat_history, self.tools\n\n async def message_response(self) -> Message:\n try:\n llm_model, self.chat_history, self.tools = await self.get_agent_requirements()\n # Set up and run agent\n self.set(\n llm=llm_model,\n tools=self.tools or [],\n chat_history=self.chat_history,\n input_value=self.input_value,\n system_prompt=self.system_prompt,\n )\n agent = self.create_agent_runnable()\n result = await self.run_agent(agent)\n\n # Store result for potential JSON output\n self._agent_result = result\n\n except (ValueError, TypeError, KeyError) as e:\n await logger.aerror(f\"{type(e).__name__}: {e!s}\")\n raise\n except ExceptionWithMessageError as e:\n await logger.aerror(f\"ExceptionWithMessageError occurred: {e}\")\n raise\n # Avoid catching blind Exception; let truly unexpected exceptions propagate\n except Exception as e:\n await logger.aerror(f\"Unexpected error: {e!s}\")\n raise\n else:\n return result\n\n def _preprocess_schema(self, schema):\n \"\"\"Preprocess schema to ensure correct data types for build_model_from_schema.\"\"\"\n processed_schema = []\n for field in schema:\n processed_field = {\n \"name\": str(field.get(\"name\", \"field\")),\n \"type\": str(field.get(\"type\", \"str\")),\n \"description\": str(field.get(\"description\", \"\")),\n \"multiple\": field.get(\"multiple\", False),\n }\n # Ensure multiple is handled correctly\n if isinstance(processed_field[\"multiple\"], str):\n processed_field[\"multiple\"] = processed_field[\"multiple\"].lower() in [\"true\", \"1\", \"t\", \"y\", \"yes\"]\n processed_schema.append(processed_field)\n return processed_schema\n\n async def build_structured_output_base(self, content: str):\n \"\"\"Build structured output with optional BaseModel validation.\"\"\"\n json_pattern = r\"\\{.*\\}\"\n schema_error_msg = \"Try setting an output schema\"\n\n # Try to parse content as JSON first\n json_data = None\n try:\n json_data = json.loads(content)\n except json.JSONDecodeError:\n json_match = re.search(json_pattern, content, re.DOTALL)\n if json_match:\n try:\n json_data = json.loads(json_match.group())\n except json.JSONDecodeError:\n return {\"content\": content, \"error\": schema_error_msg}\n else:\n return {\"content\": content, \"error\": schema_error_msg}\n\n # If no output schema provided, return parsed JSON without validation\n if not hasattr(self, \"output_schema\") or not self.output_schema or len(self.output_schema) == 0:\n return json_data\n\n # Use BaseModel validation with schema\n try:\n processed_schema = self._preprocess_schema(self.output_schema)\n output_model = build_model_from_schema(processed_schema)\n\n # Validate against the schema\n if isinstance(json_data, list):\n # Multiple objects\n validated_objects = []\n for item in json_data:\n try:\n validated_obj = output_model.model_validate(item)\n validated_objects.append(validated_obj.model_dump())\n except ValidationError as e:\n await logger.aerror(f\"Validation error for item: {e}\")\n # Include invalid items with error info\n validated_objects.append({\"data\": item, \"validation_error\": str(e)})\n return validated_objects\n\n # Single object\n try:\n validated_obj = output_model.model_validate(json_data)\n return [validated_obj.model_dump()] # Return as list for consistency\n except ValidationError as e:\n await logger.aerror(f\"Validation error: {e}\")\n return [{\"data\": json_data, \"validation_error\": str(e)}]\n\n except (TypeError, ValueError) as e:\n await logger.aerror(f\"Error building structured output: {e}\")\n # Fallback to parsed JSON without validation\n return json_data\n\n async def json_response(self) -> Data:\n \"\"\"Convert agent response to structured JSON Data output with schema validation.\"\"\"\n # Always use structured chat agent for JSON response mode for better JSON formatting\n try:\n system_components = []\n\n # 1. Agent Instructions (system_prompt)\n agent_instructions = getattr(self, \"system_prompt\", \"\") or \"\"\n if agent_instructions:\n system_components.append(f\"{agent_instructions}\")\n\n # 2. Format Instructions\n format_instructions = getattr(self, \"format_instructions\", \"\") or \"\"\n if format_instructions:\n system_components.append(f\"Format instructions: {format_instructions}\")\n\n # 3. Schema Information from BaseModel\n if hasattr(self, \"output_schema\") and self.output_schema and len(self.output_schema) > 0:\n try:\n processed_schema = self._preprocess_schema(self.output_schema)\n output_model = build_model_from_schema(processed_schema)\n schema_dict = output_model.model_json_schema()\n schema_info = (\n \"You are given some text that may include format instructions, \"\n \"explanations, or other content alongside a JSON schema.\\n\\n\"\n \"Your task:\\n\"\n \"- Extract only the JSON schema.\\n\"\n \"- Return it as valid JSON.\\n\"\n \"- Do not include format instructions, explanations, or extra text.\\n\\n\"\n \"Input:\\n\"\n f\"{json.dumps(schema_dict, indent=2)}\\n\\n\"\n \"Output (only JSON schema):\"\n )\n system_components.append(schema_info)\n except (ValidationError, ValueError, TypeError, KeyError) as e:\n await logger.aerror(f\"Could not build schema for prompt: {e}\", exc_info=True)\n\n # Combine all components\n combined_instructions = \"\\n\\n\".join(system_components) if system_components else \"\"\n llm_model, self.chat_history, self.tools = await self.get_agent_requirements()\n self.set(\n llm=llm_model,\n tools=self.tools or [],\n chat_history=self.chat_history,\n input_value=self.input_value,\n system_prompt=combined_instructions,\n )\n\n # Create and run structured chat agent\n try:\n structured_agent = self.create_agent_runnable()\n except (NotImplementedError, ValueError, TypeError) as e:\n await logger.aerror(f\"Error with structured chat agent: {e}\")\n raise\n try:\n result = await self.run_agent(structured_agent)\n except (ExceptionWithMessageError, ValueError, TypeError, RuntimeError) as e:\n await logger.aerror(f\"Error with structured agent result: {e}\")\n raise\n # Extract content from structured agent result\n if hasattr(result, \"content\"):\n content = result.content\n elif hasattr(result, \"text\"):\n content = result.text\n else:\n content = str(result)\n\n except (ExceptionWithMessageError, ValueError, TypeError, NotImplementedError, AttributeError) as e:\n await logger.aerror(f\"Error with structured chat agent: {e}\")\n # Fallback to regular agent\n content_str = \"No content returned from agent\"\n return Data(data={\"content\": content_str, \"error\": str(e)})\n\n # Process with structured output validation\n try:\n structured_output = await self.build_structured_output_base(content)\n\n # Handle different output formats\n if isinstance(structured_output, list) and structured_output:\n if len(structured_output) == 1:\n return Data(data=structured_output[0])\n return Data(data={\"results\": structured_output})\n if isinstance(structured_output, dict):\n return Data(data=structured_output)\n return Data(data={\"content\": content})\n\n except (ValueError, TypeError) as e:\n await logger.aerror(f\"Error in structured output processing: {e}\")\n return Data(data={\"content\": content, \"error\": str(e)})\n\n async def get_memory_data(self):\n # TODO: This is a temporary fix to avoid message duplication. We should develop a function for this.\n messages = (\n await MemoryComponent(**self.get_base_args())\n .set(session_id=self.graph.session_id, order=\"Ascending\", n_messages=self.n_messages)\n .retrieve_messages()\n )\n return [\n message for message in messages if getattr(message, \"id\", None) != getattr(self.input_value, \"id\", None)\n ]\n\n async def get_llm(self):\n if not isinstance(self.agent_llm, str):\n return self.agent_llm, None\n\n try:\n provider_info = MODEL_PROVIDERS_DICT.get(self.agent_llm)\n if not provider_info:\n msg = f\"Invalid model provider: {self.agent_llm}\"\n raise ValueError(msg)\n\n component_class = provider_info.get(\"component_class\")\n display_name = component_class.display_name\n inputs = provider_info.get(\"inputs\")\n prefix = provider_info.get(\"prefix\", \"\")\n\n return self._build_llm_model(component_class, inputs, prefix), display_name\n\n except (AttributeError, ValueError, TypeError, RuntimeError) as e:\n await logger.aerror(f\"Error building {self.agent_llm} language model: {e!s}\")\n msg = f\"Failed to initialize language model: {e!s}\"\n raise ValueError(msg) from e\n\n def _build_llm_model(self, component, inputs, prefix=\"\"):\n model_kwargs = {}\n for input_ in inputs:\n if hasattr(self, f\"{prefix}{input_.name}\"):\n model_kwargs[input_.name] = getattr(self, f\"{prefix}{input_.name}\")\n return component.set(**model_kwargs).build_model()\n\n def set_component_params(self, component):\n provider_info = MODEL_PROVIDERS_DICT.get(self.agent_llm)\n if provider_info:\n inputs = provider_info.get(\"inputs\")\n prefix = provider_info.get(\"prefix\")\n # Filter out json_mode and only use attributes that exist on this component\n model_kwargs = {}\n for input_ in inputs:\n if hasattr(self, f\"{prefix}{input_.name}\"):\n model_kwargs[input_.name] = getattr(self, f\"{prefix}{input_.name}\")\n\n return component.set(**model_kwargs)\n return component\n\n def delete_fields(self, build_config: dotdict, fields: dict | list[str]) -> None:\n \"\"\"Delete specified fields from build_config.\"\"\"\n for field in fields:\n build_config.pop(field, None)\n\n def update_input_types(self, build_config: dotdict) -> dotdict:\n \"\"\"Update input types for all fields in build_config.\"\"\"\n for key, value in build_config.items():\n if isinstance(value, dict):\n if value.get(\"input_types\") is None:\n build_config[key][\"input_types\"] = []\n elif hasattr(value, \"input_types\") and value.input_types is None:\n value.input_types = []\n return build_config\n\n async def update_build_config(\n self, build_config: dotdict, field_value: str, field_name: str | None = None\n ) -> dotdict:\n # Iterate over all providers in the MODEL_PROVIDERS_DICT\n # Existing logic for updating build_config\n if field_name in (\"agent_llm\",):\n build_config[\"agent_llm\"][\"value\"] = field_value\n provider_info = MODEL_PROVIDERS_DICT.get(field_value)\n if provider_info:\n component_class = provider_info.get(\"component_class\")\n if component_class and hasattr(component_class, \"update_build_config\"):\n # Call the component class's update_build_config method\n build_config = await update_component_build_config(\n component_class, build_config, field_value, \"model_name\"\n )\n\n provider_configs: dict[str, tuple[dict, list[dict]]] = {\n provider: (\n MODEL_PROVIDERS_DICT[provider][\"fields\"],\n [\n MODEL_PROVIDERS_DICT[other_provider][\"fields\"]\n for other_provider in MODEL_PROVIDERS_DICT\n if other_provider != provider\n ],\n )\n for provider in MODEL_PROVIDERS_DICT\n }\n if field_value in provider_configs:\n fields_to_add, fields_to_delete = provider_configs[field_value]\n\n # Delete fields from other providers\n for fields in fields_to_delete:\n self.delete_fields(build_config, fields)\n\n # Add provider-specific fields\n if field_value == \"OpenAI\" and not any(field in build_config for field in fields_to_add):\n build_config.update(fields_to_add)\n else:\n build_config.update(fields_to_add)\n # Reset input types for agent_llm\n build_config[\"agent_llm\"][\"input_types\"] = []\n elif field_value == \"Custom\":\n # Delete all provider fields\n self.delete_fields(build_config, ALL_PROVIDER_FIELDS)\n # Update with custom component\n custom_component = DropdownInput(\n name=\"agent_llm\",\n display_name=\"Language Model\",\n options=[*sorted(MODEL_PROVIDERS), \"Custom\"],\n value=\"Custom\",\n real_time_refresh=True,\n input_types=[\"LanguageModel\"],\n options_metadata=[MODELS_METADATA[key] for key in sorted(MODELS_METADATA.keys())]\n + [{\"icon\": \"brain\"}],\n )\n build_config.update({\"agent_llm\": custom_component.to_dict()})\n # Update input types for all fields\n build_config = self.update_input_types(build_config)\n\n # Validate required keys\n default_keys = [\n \"code\",\n \"_type\",\n \"agent_llm\",\n \"tools\",\n \"input_value\",\n \"add_current_date_tool\",\n \"system_prompt\",\n \"agent_description\",\n \"max_iterations\",\n \"handle_parsing_errors\",\n \"verbose\",\n ]\n missing_keys = [key for key in default_keys if key not in build_config]\n if missing_keys:\n msg = f\"Missing required keys in build_config: {missing_keys}\"\n raise ValueError(msg)\n if (\n isinstance(self.agent_llm, str)\n and self.agent_llm in MODEL_PROVIDERS_DICT\n and field_name in MODEL_DYNAMIC_UPDATE_FIELDS\n ):\n provider_info = MODEL_PROVIDERS_DICT.get(self.agent_llm)\n if provider_info:\n component_class = provider_info.get(\"component_class\")\n component_class = self.set_component_params(component_class)\n prefix = provider_info.get(\"prefix\")\n if component_class and hasattr(component_class, \"update_build_config\"):\n # Call each component class's update_build_config method\n # remove the prefix from the field_name\n if isinstance(field_name, str) and isinstance(prefix, str):\n field_name = field_name.replace(prefix, \"\")\n build_config = await update_component_build_config(\n component_class, build_config, field_value, \"model_name\"\n )\n return dotdict({k: v.to_dict() if hasattr(v, \"to_dict\") else v for k, v in build_config.items()})\n\n async def _get_tools(self) -> list[Tool]:\n component_toolkit = _get_component_toolkit()\n tools_names = self._build_tools_names()\n agent_description = self.get_tool_description()\n # TODO: Agent Description Depreciated Feature to be removed\n description = f\"{agent_description}{tools_names}\"\n tools = component_toolkit(component=self).get_tools(\n tool_name=\"Call_Agent\", tool_description=description, callbacks=self.get_langchain_callbacks()\n )\n if hasattr(self, \"tools_metadata\"):\n tools = component_toolkit(component=self, metadata=self.tools_metadata).update_tools_metadata(tools=tools)\n return tools\n"
+ "value": "import json\nimport re\n\nfrom langchain_core.tools import StructuredTool\nfrom pydantic import ValidationError\n\nfrom langflow.base.agents.agent import LCToolsAgentComponent\nfrom langflow.base.agents.events import ExceptionWithMessageError\nfrom langflow.base.models.model_input_constants import (\n ALL_PROVIDER_FIELDS,\n MODEL_DYNAMIC_UPDATE_FIELDS,\n MODEL_PROVIDERS,\n MODEL_PROVIDERS_DICT,\n MODELS_METADATA,\n)\nfrom langflow.base.models.model_utils import get_model_name\nfrom langflow.components.helpers.current_date import CurrentDateComponent\nfrom langflow.components.helpers.memory import MemoryComponent\nfrom langflow.components.langchain_utilities.tool_calling import ToolCallingAgentComponent\nfrom langflow.custom.custom_component.component import _get_component_toolkit\nfrom langflow.custom.utils import update_component_build_config\nfrom langflow.field_typing import Tool\nfrom langflow.helpers.base_model import build_model_from_schema\nfrom langflow.io import BoolInput, DropdownInput, IntInput, MultilineInput, Output, TableInput\nfrom langflow.logging import logger\nfrom langflow.schema.data import Data\nfrom langflow.schema.dotdict import dotdict\nfrom langflow.schema.message import Message\nfrom langflow.schema.table import EditMode\n\n\ndef set_advanced_true(component_input):\n component_input.advanced = True\n return component_input\n\n\nMODEL_PROVIDERS_LIST = [\"Anthropic\", \"Google Generative AI\", \"Groq\", \"OpenAI\"]\n\n\nclass AgentComponent(ToolCallingAgentComponent):\n display_name: str = \"Agent\"\n description: str = \"Define the agent's instructions, then enter a task to complete using tools.\"\n documentation: str = \"https://docs.langflow.org/agents\"\n icon = \"bot\"\n beta = False\n name = \"Agent\"\n\n memory_inputs = [set_advanced_true(component_input) for component_input in MemoryComponent().inputs]\n\n # Filter out json_mode from OpenAI inputs since we handle structured output differently\n openai_inputs_filtered = [\n input_field\n for input_field in MODEL_PROVIDERS_DICT[\"OpenAI\"][\"inputs\"]\n if not (hasattr(input_field, \"name\") and input_field.name == \"json_mode\")\n ]\n\n inputs = [\n DropdownInput(\n name=\"agent_llm\",\n display_name=\"Model Provider\",\n info=\"The provider of the language model that the agent will use to generate responses.\",\n options=[*MODEL_PROVIDERS_LIST, \"Custom\"],\n value=\"OpenAI\",\n real_time_refresh=True,\n input_types=[],\n options_metadata=[MODELS_METADATA[key] for key in MODEL_PROVIDERS_LIST] + [{\"icon\": \"brain\"}],\n ),\n *openai_inputs_filtered,\n MultilineInput(\n name=\"system_prompt\",\n display_name=\"Agent Instructions\",\n info=\"System Prompt: Initial instructions and context provided to guide the agent's behavior.\",\n value=\"You are a helpful assistant that can use tools to answer questions and perform tasks.\",\n advanced=False,\n ),\n IntInput(\n name=\"n_messages\",\n display_name=\"Number of Chat History Messages\",\n value=100,\n info=\"Number of chat history messages to retrieve.\",\n advanced=True,\n show=True,\n ),\n MultilineInput(\n name=\"format_instructions\",\n display_name=\"Output Format Instructions\",\n info=\"Generic Template for structured output formatting. Valid only with Structured response.\",\n value=(\n \"You are an AI that extracts structured JSON objects from unstructured text. \"\n \"Use a predefined schema with expected types (str, int, float, bool, dict). \"\n \"Extract ALL relevant instances that match the schema - if multiple patterns exist, capture them all. \"\n \"Fill missing or ambiguous values with defaults: null for missing values. \"\n \"Remove exact duplicates but keep variations that have different field values. \"\n \"Always return valid JSON in the expected format, never throw errors. \"\n \"If multiple objects can be extracted, return them all in the structured format.\"\n ),\n advanced=True,\n ),\n TableInput(\n name=\"output_schema\",\n display_name=\"Output Schema\",\n info=(\n \"Schema Validation: Define the structure and data types for structured output. \"\n \"No validation if no output schema.\"\n ),\n advanced=True,\n required=False,\n value=[],\n table_schema=[\n {\n \"name\": \"name\",\n \"display_name\": \"Name\",\n \"type\": \"str\",\n \"description\": \"Specify the name of the output field.\",\n \"default\": \"field\",\n \"edit_mode\": EditMode.INLINE,\n },\n {\n \"name\": \"description\",\n \"display_name\": \"Description\",\n \"type\": \"str\",\n \"description\": \"Describe the purpose of the output field.\",\n \"default\": \"description of field\",\n \"edit_mode\": EditMode.POPOVER,\n },\n {\n \"name\": \"type\",\n \"display_name\": \"Type\",\n \"type\": \"str\",\n \"edit_mode\": EditMode.INLINE,\n \"description\": (\"Indicate the data type of the output field (e.g., str, int, float, bool, dict).\"),\n \"options\": [\"str\", \"int\", \"float\", \"bool\", \"dict\"],\n \"default\": \"str\",\n },\n {\n \"name\": \"multiple\",\n \"display_name\": \"As List\",\n \"type\": \"boolean\",\n \"description\": \"Set to True if this output field should be a list of the specified type.\",\n \"default\": \"False\",\n \"edit_mode\": EditMode.INLINE,\n },\n ],\n ),\n *LCToolsAgentComponent._base_inputs,\n # removed memory inputs from agent component\n # *memory_inputs,\n BoolInput(\n name=\"add_current_date_tool\",\n display_name=\"Current Date\",\n advanced=True,\n info=\"If true, will add a tool to the agent that returns the current date.\",\n value=True,\n ),\n ]\n outputs = [\n Output(name=\"response\", display_name=\"Response\", method=\"message_response\"),\n Output(name=\"structured_response\", display_name=\"Structured Response\", method=\"json_response\", tool_mode=False),\n ]\n\n async def get_agent_requirements(self):\n \"\"\"Get the agent requirements for the agent.\"\"\"\n llm_model, display_name = await self.get_llm()\n if llm_model is None:\n msg = \"No language model selected. Please choose a model to proceed.\"\n raise ValueError(msg)\n self.model_name = get_model_name(llm_model, display_name=display_name)\n\n # Get memory data\n self.chat_history = await self.get_memory_data()\n if isinstance(self.chat_history, Message):\n self.chat_history = [self.chat_history]\n\n # Add current date tool if enabled\n if self.add_current_date_tool:\n if not isinstance(self.tools, list): # type: ignore[has-type]\n self.tools = []\n current_date_tool = (await CurrentDateComponent(**self.get_base_args()).to_toolkit()).pop(0)\n if not isinstance(current_date_tool, StructuredTool):\n msg = \"CurrentDateComponent must be converted to a StructuredTool\"\n raise TypeError(msg)\n self.tools.append(current_date_tool)\n return llm_model, self.chat_history, self.tools\n\n async def message_response(self) -> Message:\n try:\n llm_model, self.chat_history, self.tools = await self.get_agent_requirements()\n # Set up and run agent\n self.set(\n llm=llm_model,\n tools=self.tools or [],\n chat_history=self.chat_history,\n input_value=self.input_value,\n system_prompt=self.system_prompt,\n )\n agent = self.create_agent_runnable()\n result = await self.run_agent(agent)\n\n # Store result for potential JSON output\n self._agent_result = result\n\n except (ValueError, TypeError, KeyError) as e:\n await logger.aerror(f\"{type(e).__name__}: {e!s}\")\n raise\n except ExceptionWithMessageError as e:\n await logger.aerror(f\"ExceptionWithMessageError occurred: {e}\")\n raise\n # Avoid catching blind Exception; let truly unexpected exceptions propagate\n except Exception as e:\n await logger.aerror(f\"Unexpected error: {e!s}\")\n raise\n else:\n return result\n\n def _preprocess_schema(self, schema):\n \"\"\"Preprocess schema to ensure correct data types for build_model_from_schema.\"\"\"\n processed_schema = []\n for field in schema:\n processed_field = {\n \"name\": str(field.get(\"name\", \"field\")),\n \"type\": str(field.get(\"type\", \"str\")),\n \"description\": str(field.get(\"description\", \"\")),\n \"multiple\": field.get(\"multiple\", False),\n }\n # Ensure multiple is handled correctly\n if isinstance(processed_field[\"multiple\"], str):\n processed_field[\"multiple\"] = processed_field[\"multiple\"].lower() in [\"true\", \"1\", \"t\", \"y\", \"yes\"]\n processed_schema.append(processed_field)\n return processed_schema\n\n async def build_structured_output_base(self, content: str):\n \"\"\"Build structured output with optional BaseModel validation.\"\"\"\n json_pattern = r\"\\{.*\\}\"\n schema_error_msg = \"Try setting an output schema\"\n\n # Try to parse content as JSON first\n json_data = None\n try:\n json_data = json.loads(content)\n except json.JSONDecodeError:\n json_match = re.search(json_pattern, content, re.DOTALL)\n if json_match:\n try:\n json_data = json.loads(json_match.group())\n except json.JSONDecodeError:\n return {\"content\": content, \"error\": schema_error_msg}\n else:\n return {\"content\": content, \"error\": schema_error_msg}\n\n # If no output schema provided, return parsed JSON without validation\n if not hasattr(self, \"output_schema\") or not self.output_schema or len(self.output_schema) == 0:\n return json_data\n\n # Use BaseModel validation with schema\n try:\n processed_schema = self._preprocess_schema(self.output_schema)\n output_model = build_model_from_schema(processed_schema)\n\n # Validate against the schema\n if isinstance(json_data, list):\n # Multiple objects\n validated_objects = []\n for item in json_data:\n try:\n validated_obj = output_model.model_validate(item)\n validated_objects.append(validated_obj.model_dump())\n except ValidationError as e:\n await logger.aerror(f\"Validation error for item: {e}\")\n # Include invalid items with error info\n validated_objects.append({\"data\": item, \"validation_error\": str(e)})\n return validated_objects\n\n # Single object\n try:\n validated_obj = output_model.model_validate(json_data)\n return [validated_obj.model_dump()] # Return as list for consistency\n except ValidationError as e:\n await logger.aerror(f\"Validation error: {e}\")\n return [{\"data\": json_data, \"validation_error\": str(e)}]\n\n except (TypeError, ValueError) as e:\n await logger.aerror(f\"Error building structured output: {e}\")\n # Fallback to parsed JSON without validation\n return json_data\n\n async def json_response(self) -> Data:\n \"\"\"Convert agent response to structured JSON Data output with schema validation.\"\"\"\n # Always use structured chat agent for JSON response mode for better JSON formatting\n try:\n system_components = []\n\n # 1. Agent Instructions (system_prompt)\n agent_instructions = getattr(self, \"system_prompt\", \"\") or \"\"\n if agent_instructions:\n system_components.append(f\"{agent_instructions}\")\n\n # 2. Format Instructions\n format_instructions = getattr(self, \"format_instructions\", \"\") or \"\"\n if format_instructions:\n system_components.append(f\"Format instructions: {format_instructions}\")\n\n # 3. Schema Information from BaseModel\n if hasattr(self, \"output_schema\") and self.output_schema and len(self.output_schema) > 0:\n try:\n processed_schema = self._preprocess_schema(self.output_schema)\n output_model = build_model_from_schema(processed_schema)\n schema_dict = output_model.model_json_schema()\n schema_info = (\n \"You are given some text that may include format instructions, \"\n \"explanations, or other content alongside a JSON schema.\\n\\n\"\n \"Your task:\\n\"\n \"- Extract only the JSON schema.\\n\"\n \"- Return it as valid JSON.\\n\"\n \"- Do not include format instructions, explanations, or extra text.\\n\\n\"\n \"Input:\\n\"\n f\"{json.dumps(schema_dict, indent=2)}\\n\\n\"\n \"Output (only JSON schema):\"\n )\n system_components.append(schema_info)\n except (ValidationError, ValueError, TypeError, KeyError) as e:\n await logger.aerror(f\"Could not build schema for prompt: {e}\", exc_info=True)\n\n # Combine all components\n combined_instructions = \"\\n\\n\".join(system_components) if system_components else \"\"\n llm_model, self.chat_history, self.tools = await self.get_agent_requirements()\n self.set(\n llm=llm_model,\n tools=self.tools or [],\n chat_history=self.chat_history,\n input_value=self.input_value,\n system_prompt=combined_instructions,\n )\n\n # Create and run structured chat agent\n try:\n structured_agent = self.create_agent_runnable()\n except (NotImplementedError, ValueError, TypeError) as e:\n await logger.aerror(f\"Error with structured chat agent: {e}\")\n raise\n try:\n result = await self.run_agent(structured_agent)\n except (ExceptionWithMessageError, ValueError, TypeError, RuntimeError) as e:\n await logger.aerror(f\"Error with structured agent result: {e}\")\n raise\n # Extract content from structured agent result\n if hasattr(result, \"content\"):\n content = result.content\n elif hasattr(result, \"text\"):\n content = result.text\n else:\n content = str(result)\n\n except (ExceptionWithMessageError, ValueError, TypeError, NotImplementedError, AttributeError) as e:\n await logger.aerror(f\"Error with structured chat agent: {e}\")\n # Fallback to regular agent\n content_str = \"No content returned from agent\"\n return Data(data={\"content\": content_str, \"error\": str(e)})\n\n # Process with structured output validation\n try:\n structured_output = await self.build_structured_output_base(content)\n\n # Handle different output formats\n if isinstance(structured_output, list) and structured_output:\n if len(structured_output) == 1:\n return Data(data=structured_output[0])\n return Data(data={\"results\": structured_output})\n if isinstance(structured_output, dict):\n return Data(data=structured_output)\n return Data(data={\"content\": content})\n\n except (ValueError, TypeError) as e:\n await logger.aerror(f\"Error in structured output processing: {e}\")\n return Data(data={\"content\": content, \"error\": str(e)})\n\n async def get_memory_data(self):\n # TODO: This is a temporary fix to avoid message duplication. We should develop a function for this.\n messages = (\n await MemoryComponent(**self.get_base_args())\n .set(session_id=self.graph.session_id, order=\"Ascending\", n_messages=self.n_messages)\n .retrieve_messages()\n )\n return [\n message for message in messages if getattr(message, \"id\", None) != getattr(self.input_value, \"id\", None)\n ]\n\n async def get_llm(self):\n if not isinstance(self.agent_llm, str):\n return self.agent_llm, None\n\n try:\n provider_info = MODEL_PROVIDERS_DICT.get(self.agent_llm)\n if not provider_info:\n msg = f\"Invalid model provider: {self.agent_llm}\"\n raise ValueError(msg)\n\n component_class = provider_info.get(\"component_class\")\n display_name = component_class.display_name\n inputs = provider_info.get(\"inputs\")\n prefix = provider_info.get(\"prefix\", \"\")\n\n return self._build_llm_model(component_class, inputs, prefix), display_name\n\n except (AttributeError, ValueError, TypeError, RuntimeError) as e:\n await logger.aerror(f\"Error building {self.agent_llm} language model: {e!s}\")\n msg = f\"Failed to initialize language model: {e!s}\"\n raise ValueError(msg) from e\n\n def _build_llm_model(self, component, inputs, prefix=\"\"):\n model_kwargs = {}\n for input_ in inputs:\n if hasattr(self, f\"{prefix}{input_.name}\"):\n model_kwargs[input_.name] = getattr(self, f\"{prefix}{input_.name}\")\n return component.set(**model_kwargs).build_model()\n\n def set_component_params(self, component):\n provider_info = MODEL_PROVIDERS_DICT.get(self.agent_llm)\n if provider_info:\n inputs = provider_info.get(\"inputs\")\n prefix = provider_info.get(\"prefix\")\n # Filter out json_mode and only use attributes that exist on this component\n model_kwargs = {}\n for input_ in inputs:\n if hasattr(self, f\"{prefix}{input_.name}\"):\n model_kwargs[input_.name] = getattr(self, f\"{prefix}{input_.name}\")\n\n return component.set(**model_kwargs)\n return component\n\n def delete_fields(self, build_config: dotdict, fields: dict | list[str]) -> None:\n \"\"\"Delete specified fields from build_config.\"\"\"\n for field in fields:\n build_config.pop(field, None)\n\n def update_input_types(self, build_config: dotdict) -> dotdict:\n \"\"\"Update input types for all fields in build_config.\"\"\"\n for key, value in build_config.items():\n if isinstance(value, dict):\n if value.get(\"input_types\") is None:\n build_config[key][\"input_types\"] = []\n elif hasattr(value, \"input_types\") and value.input_types is None:\n value.input_types = []\n return build_config\n\n async def update_build_config(\n self, build_config: dotdict, field_value: str, field_name: str | None = None\n ) -> dotdict:\n # Iterate over all providers in the MODEL_PROVIDERS_DICT\n # Existing logic for updating build_config\n if field_name in (\"agent_llm\",):\n build_config[\"agent_llm\"][\"value\"] = field_value\n provider_info = MODEL_PROVIDERS_DICT.get(field_value)\n if provider_info:\n component_class = provider_info.get(\"component_class\")\n if component_class and hasattr(component_class, \"update_build_config\"):\n # Call the component class's update_build_config method\n build_config = await update_component_build_config(\n component_class, build_config, field_value, \"model_name\"\n )\n\n provider_configs: dict[str, tuple[dict, list[dict]]] = {\n provider: (\n MODEL_PROVIDERS_DICT[provider][\"fields\"],\n [\n MODEL_PROVIDERS_DICT[other_provider][\"fields\"]\n for other_provider in MODEL_PROVIDERS_DICT\n if other_provider != provider\n ],\n )\n for provider in MODEL_PROVIDERS_DICT\n }\n if field_value in provider_configs:\n fields_to_add, fields_to_delete = provider_configs[field_value]\n\n # Delete fields from other providers\n for fields in fields_to_delete:\n self.delete_fields(build_config, fields)\n\n # Add provider-specific fields\n if field_value == \"OpenAI\" and not any(field in build_config for field in fields_to_add):\n build_config.update(fields_to_add)\n else:\n build_config.update(fields_to_add)\n # Reset input types for agent_llm\n build_config[\"agent_llm\"][\"input_types\"] = []\n build_config[\"agent_llm\"][\"display_name\"] = \"Model Provider\"\n elif field_value == \"Custom\":\n # Delete all provider fields\n self.delete_fields(build_config, ALL_PROVIDER_FIELDS)\n # Update with custom component\n custom_component = DropdownInput(\n name=\"agent_llm\",\n display_name=\"Language Model\",\n options=[*sorted(MODEL_PROVIDERS), \"Custom\"],\n value=\"Custom\",\n real_time_refresh=True,\n input_types=[\"LanguageModel\"],\n options_metadata=[MODELS_METADATA[key] for key in sorted(MODELS_METADATA.keys())]\n + [{\"icon\": \"brain\"}],\n )\n build_config.update({\"agent_llm\": custom_component.to_dict()})\n # Update input types for all fields\n build_config = self.update_input_types(build_config)\n\n # Validate required keys\n default_keys = [\n \"code\",\n \"_type\",\n \"agent_llm\",\n \"tools\",\n \"input_value\",\n \"add_current_date_tool\",\n \"system_prompt\",\n \"agent_description\",\n \"max_iterations\",\n \"handle_parsing_errors\",\n \"verbose\",\n ]\n missing_keys = [key for key in default_keys if key not in build_config]\n if missing_keys:\n msg = f\"Missing required keys in build_config: {missing_keys}\"\n raise ValueError(msg)\n if (\n isinstance(self.agent_llm, str)\n and self.agent_llm in MODEL_PROVIDERS_DICT\n and field_name in MODEL_DYNAMIC_UPDATE_FIELDS\n ):\n provider_info = MODEL_PROVIDERS_DICT.get(self.agent_llm)\n if provider_info:\n component_class = provider_info.get(\"component_class\")\n component_class = self.set_component_params(component_class)\n prefix = provider_info.get(\"prefix\")\n if component_class and hasattr(component_class, \"update_build_config\"):\n # Call each component class's update_build_config method\n # remove the prefix from the field_name\n if isinstance(field_name, str) and isinstance(prefix, str):\n field_name = field_name.replace(prefix, \"\")\n build_config = await update_component_build_config(\n component_class, build_config, field_value, \"model_name\"\n )\n return dotdict({k: v.to_dict() if hasattr(v, \"to_dict\") else v for k, v in build_config.items()})\n\n async def _get_tools(self) -> list[Tool]:\n component_toolkit = _get_component_toolkit()\n tools_names = self._build_tools_names()\n agent_description = self.get_tool_description()\n # TODO: Agent Description Depreciated Feature to be removed\n description = f\"{agent_description}{tools_names}\"\n tools = component_toolkit(component=self).get_tools(\n tool_name=\"Call_Agent\", tool_description=description, callbacks=self.get_langchain_callbacks()\n )\n if hasattr(self, \"tools_metadata\"):\n tools = component_toolkit(component=self, metadata=self.tools_metadata).update_tools_metadata(tools=tools)\n return tools\n"
},
"handle_parsing_errors": {
"_input_type": "BoolInput",
diff --git a/src/backend/base/langflow/initial_setup/starter_projects/SaaS Pricing.json b/src/backend/base/langflow/initial_setup/starter_projects/SaaS Pricing.json
index 8ea3e511a..8cd5e7fd9 100644
--- a/src/backend/base/langflow/initial_setup/starter_projects/SaaS Pricing.json
+++ b/src/backend/base/langflow/initial_setup/starter_projects/SaaS Pricing.json
@@ -1057,7 +1057,7 @@
"show": true,
"title_case": false,
"type": "code",
- "value": "import json\nimport re\n\nfrom langchain_core.tools import StructuredTool\nfrom pydantic import ValidationError\n\nfrom langflow.base.agents.agent import LCToolsAgentComponent\nfrom langflow.base.agents.events import ExceptionWithMessageError\nfrom langflow.base.models.model_input_constants import (\n ALL_PROVIDER_FIELDS,\n MODEL_DYNAMIC_UPDATE_FIELDS,\n MODEL_PROVIDERS,\n MODEL_PROVIDERS_DICT,\n MODELS_METADATA,\n)\nfrom langflow.base.models.model_utils import get_model_name\nfrom langflow.components.helpers.current_date import CurrentDateComponent\nfrom langflow.components.helpers.memory import MemoryComponent\nfrom langflow.components.langchain_utilities.tool_calling import ToolCallingAgentComponent\nfrom langflow.custom.custom_component.component import _get_component_toolkit\nfrom langflow.custom.utils import update_component_build_config\nfrom langflow.field_typing import Tool\nfrom langflow.helpers.base_model import build_model_from_schema\nfrom langflow.io import BoolInput, DropdownInput, IntInput, MultilineInput, Output, TableInput\nfrom langflow.logging import logger\nfrom langflow.schema.data import Data\nfrom langflow.schema.dotdict import dotdict\nfrom langflow.schema.message import Message\nfrom langflow.schema.table import EditMode\n\n\ndef set_advanced_true(component_input):\n component_input.advanced = True\n return component_input\n\n\nMODEL_PROVIDERS_LIST = [\"Anthropic\", \"Google Generative AI\", \"Groq\", \"OpenAI\"]\n\n\nclass AgentComponent(ToolCallingAgentComponent):\n display_name: str = \"Agent\"\n description: str = \"Define the agent's instructions, then enter a task to complete using tools.\"\n documentation: str = \"https://docs.langflow.org/agents\"\n icon = \"bot\"\n beta = False\n name = \"Agent\"\n\n memory_inputs = [set_advanced_true(component_input) for component_input in MemoryComponent().inputs]\n\n # Filter out json_mode from OpenAI inputs since we handle structured output differently\n openai_inputs_filtered = [\n input_field\n for input_field in MODEL_PROVIDERS_DICT[\"OpenAI\"][\"inputs\"]\n if not (hasattr(input_field, \"name\") and input_field.name == \"json_mode\")\n ]\n\n inputs = [\n DropdownInput(\n name=\"agent_llm\",\n display_name=\"Model Provider\",\n info=\"The provider of the language model that the agent will use to generate responses.\",\n options=[*MODEL_PROVIDERS_LIST, \"Custom\"],\n value=\"OpenAI\",\n real_time_refresh=True,\n input_types=[],\n options_metadata=[MODELS_METADATA[key] for key in MODEL_PROVIDERS_LIST] + [{\"icon\": \"brain\"}],\n ),\n *openai_inputs_filtered,\n MultilineInput(\n name=\"system_prompt\",\n display_name=\"Agent Instructions\",\n info=\"System Prompt: Initial instructions and context provided to guide the agent's behavior.\",\n value=\"You are a helpful assistant that can use tools to answer questions and perform tasks.\",\n advanced=False,\n ),\n IntInput(\n name=\"n_messages\",\n display_name=\"Number of Chat History Messages\",\n value=100,\n info=\"Number of chat history messages to retrieve.\",\n advanced=True,\n show=True,\n ),\n MultilineInput(\n name=\"format_instructions\",\n display_name=\"Output Format Instructions\",\n info=\"Generic Template for structured output formatting. Valid only with Structured response.\",\n value=(\n \"You are an AI that extracts structured JSON objects from unstructured text. \"\n \"Use a predefined schema with expected types (str, int, float, bool, dict). \"\n \"Extract ALL relevant instances that match the schema - if multiple patterns exist, capture them all. \"\n \"Fill missing or ambiguous values with defaults: null for missing values. \"\n \"Remove exact duplicates but keep variations that have different field values. \"\n \"Always return valid JSON in the expected format, never throw errors. \"\n \"If multiple objects can be extracted, return them all in the structured format.\"\n ),\n advanced=True,\n ),\n TableInput(\n name=\"output_schema\",\n display_name=\"Output Schema\",\n info=(\n \"Schema Validation: Define the structure and data types for structured output. \"\n \"No validation if no output schema.\"\n ),\n advanced=True,\n required=False,\n value=[],\n table_schema=[\n {\n \"name\": \"name\",\n \"display_name\": \"Name\",\n \"type\": \"str\",\n \"description\": \"Specify the name of the output field.\",\n \"default\": \"field\",\n \"edit_mode\": EditMode.INLINE,\n },\n {\n \"name\": \"description\",\n \"display_name\": \"Description\",\n \"type\": \"str\",\n \"description\": \"Describe the purpose of the output field.\",\n \"default\": \"description of field\",\n \"edit_mode\": EditMode.POPOVER,\n },\n {\n \"name\": \"type\",\n \"display_name\": \"Type\",\n \"type\": \"str\",\n \"edit_mode\": EditMode.INLINE,\n \"description\": (\"Indicate the data type of the output field (e.g., str, int, float, bool, dict).\"),\n \"options\": [\"str\", \"int\", \"float\", \"bool\", \"dict\"],\n \"default\": \"str\",\n },\n {\n \"name\": \"multiple\",\n \"display_name\": \"As List\",\n \"type\": \"boolean\",\n \"description\": \"Set to True if this output field should be a list of the specified type.\",\n \"default\": \"False\",\n \"edit_mode\": EditMode.INLINE,\n },\n ],\n ),\n *LCToolsAgentComponent._base_inputs,\n # removed memory inputs from agent component\n # *memory_inputs,\n BoolInput(\n name=\"add_current_date_tool\",\n display_name=\"Current Date\",\n advanced=True,\n info=\"If true, will add a tool to the agent that returns the current date.\",\n value=True,\n ),\n ]\n outputs = [\n Output(name=\"response\", display_name=\"Response\", method=\"message_response\"),\n Output(name=\"structured_response\", display_name=\"Structured Response\", method=\"json_response\", tool_mode=False),\n ]\n\n async def get_agent_requirements(self):\n \"\"\"Get the agent requirements for the agent.\"\"\"\n llm_model, display_name = await self.get_llm()\n if llm_model is None:\n msg = \"No language model selected. Please choose a model to proceed.\"\n raise ValueError(msg)\n self.model_name = get_model_name(llm_model, display_name=display_name)\n\n # Get memory data\n self.chat_history = await self.get_memory_data()\n if isinstance(self.chat_history, Message):\n self.chat_history = [self.chat_history]\n\n # Add current date tool if enabled\n if self.add_current_date_tool:\n if not isinstance(self.tools, list): # type: ignore[has-type]\n self.tools = []\n current_date_tool = (await CurrentDateComponent(**self.get_base_args()).to_toolkit()).pop(0)\n if not isinstance(current_date_tool, StructuredTool):\n msg = \"CurrentDateComponent must be converted to a StructuredTool\"\n raise TypeError(msg)\n self.tools.append(current_date_tool)\n return llm_model, self.chat_history, self.tools\n\n async def message_response(self) -> Message:\n try:\n llm_model, self.chat_history, self.tools = await self.get_agent_requirements()\n # Set up and run agent\n self.set(\n llm=llm_model,\n tools=self.tools or [],\n chat_history=self.chat_history,\n input_value=self.input_value,\n system_prompt=self.system_prompt,\n )\n agent = self.create_agent_runnable()\n result = await self.run_agent(agent)\n\n # Store result for potential JSON output\n self._agent_result = result\n\n except (ValueError, TypeError, KeyError) as e:\n await logger.aerror(f\"{type(e).__name__}: {e!s}\")\n raise\n except ExceptionWithMessageError as e:\n await logger.aerror(f\"ExceptionWithMessageError occurred: {e}\")\n raise\n # Avoid catching blind Exception; let truly unexpected exceptions propagate\n except Exception as e:\n await logger.aerror(f\"Unexpected error: {e!s}\")\n raise\n else:\n return result\n\n def _preprocess_schema(self, schema):\n \"\"\"Preprocess schema to ensure correct data types for build_model_from_schema.\"\"\"\n processed_schema = []\n for field in schema:\n processed_field = {\n \"name\": str(field.get(\"name\", \"field\")),\n \"type\": str(field.get(\"type\", \"str\")),\n \"description\": str(field.get(\"description\", \"\")),\n \"multiple\": field.get(\"multiple\", False),\n }\n # Ensure multiple is handled correctly\n if isinstance(processed_field[\"multiple\"], str):\n processed_field[\"multiple\"] = processed_field[\"multiple\"].lower() in [\"true\", \"1\", \"t\", \"y\", \"yes\"]\n processed_schema.append(processed_field)\n return processed_schema\n\n async def build_structured_output_base(self, content: str):\n \"\"\"Build structured output with optional BaseModel validation.\"\"\"\n json_pattern = r\"\\{.*\\}\"\n schema_error_msg = \"Try setting an output schema\"\n\n # Try to parse content as JSON first\n json_data = None\n try:\n json_data = json.loads(content)\n except json.JSONDecodeError:\n json_match = re.search(json_pattern, content, re.DOTALL)\n if json_match:\n try:\n json_data = json.loads(json_match.group())\n except json.JSONDecodeError:\n return {\"content\": content, \"error\": schema_error_msg}\n else:\n return {\"content\": content, \"error\": schema_error_msg}\n\n # If no output schema provided, return parsed JSON without validation\n if not hasattr(self, \"output_schema\") or not self.output_schema or len(self.output_schema) == 0:\n return json_data\n\n # Use BaseModel validation with schema\n try:\n processed_schema = self._preprocess_schema(self.output_schema)\n output_model = build_model_from_schema(processed_schema)\n\n # Validate against the schema\n if isinstance(json_data, list):\n # Multiple objects\n validated_objects = []\n for item in json_data:\n try:\n validated_obj = output_model.model_validate(item)\n validated_objects.append(validated_obj.model_dump())\n except ValidationError as e:\n await logger.aerror(f\"Validation error for item: {e}\")\n # Include invalid items with error info\n validated_objects.append({\"data\": item, \"validation_error\": str(e)})\n return validated_objects\n\n # Single object\n try:\n validated_obj = output_model.model_validate(json_data)\n return [validated_obj.model_dump()] # Return as list for consistency\n except ValidationError as e:\n await logger.aerror(f\"Validation error: {e}\")\n return [{\"data\": json_data, \"validation_error\": str(e)}]\n\n except (TypeError, ValueError) as e:\n await logger.aerror(f\"Error building structured output: {e}\")\n # Fallback to parsed JSON without validation\n return json_data\n\n async def json_response(self) -> Data:\n \"\"\"Convert agent response to structured JSON Data output with schema validation.\"\"\"\n # Always use structured chat agent for JSON response mode for better JSON formatting\n try:\n system_components = []\n\n # 1. Agent Instructions (system_prompt)\n agent_instructions = getattr(self, \"system_prompt\", \"\") or \"\"\n if agent_instructions:\n system_components.append(f\"{agent_instructions}\")\n\n # 2. Format Instructions\n format_instructions = getattr(self, \"format_instructions\", \"\") or \"\"\n if format_instructions:\n system_components.append(f\"Format instructions: {format_instructions}\")\n\n # 3. Schema Information from BaseModel\n if hasattr(self, \"output_schema\") and self.output_schema and len(self.output_schema) > 0:\n try:\n processed_schema = self._preprocess_schema(self.output_schema)\n output_model = build_model_from_schema(processed_schema)\n schema_dict = output_model.model_json_schema()\n schema_info = (\n \"You are given some text that may include format instructions, \"\n \"explanations, or other content alongside a JSON schema.\\n\\n\"\n \"Your task:\\n\"\n \"- Extract only the JSON schema.\\n\"\n \"- Return it as valid JSON.\\n\"\n \"- Do not include format instructions, explanations, or extra text.\\n\\n\"\n \"Input:\\n\"\n f\"{json.dumps(schema_dict, indent=2)}\\n\\n\"\n \"Output (only JSON schema):\"\n )\n system_components.append(schema_info)\n except (ValidationError, ValueError, TypeError, KeyError) as e:\n await logger.aerror(f\"Could not build schema for prompt: {e}\", exc_info=True)\n\n # Combine all components\n combined_instructions = \"\\n\\n\".join(system_components) if system_components else \"\"\n llm_model, self.chat_history, self.tools = await self.get_agent_requirements()\n self.set(\n llm=llm_model,\n tools=self.tools or [],\n chat_history=self.chat_history,\n input_value=self.input_value,\n system_prompt=combined_instructions,\n )\n\n # Create and run structured chat agent\n try:\n structured_agent = self.create_agent_runnable()\n except (NotImplementedError, ValueError, TypeError) as e:\n await logger.aerror(f\"Error with structured chat agent: {e}\")\n raise\n try:\n result = await self.run_agent(structured_agent)\n except (ExceptionWithMessageError, ValueError, TypeError, RuntimeError) as e:\n await logger.aerror(f\"Error with structured agent result: {e}\")\n raise\n # Extract content from structured agent result\n if hasattr(result, \"content\"):\n content = result.content\n elif hasattr(result, \"text\"):\n content = result.text\n else:\n content = str(result)\n\n except (ExceptionWithMessageError, ValueError, TypeError, NotImplementedError, AttributeError) as e:\n await logger.aerror(f\"Error with structured chat agent: {e}\")\n # Fallback to regular agent\n content_str = \"No content returned from agent\"\n return Data(data={\"content\": content_str, \"error\": str(e)})\n\n # Process with structured output validation\n try:\n structured_output = await self.build_structured_output_base(content)\n\n # Handle different output formats\n if isinstance(structured_output, list) and structured_output:\n if len(structured_output) == 1:\n return Data(data=structured_output[0])\n return Data(data={\"results\": structured_output})\n if isinstance(structured_output, dict):\n return Data(data=structured_output)\n return Data(data={\"content\": content})\n\n except (ValueError, TypeError) as e:\n await logger.aerror(f\"Error in structured output processing: {e}\")\n return Data(data={\"content\": content, \"error\": str(e)})\n\n async def get_memory_data(self):\n # TODO: This is a temporary fix to avoid message duplication. We should develop a function for this.\n messages = (\n await MemoryComponent(**self.get_base_args())\n .set(session_id=self.graph.session_id, order=\"Ascending\", n_messages=self.n_messages)\n .retrieve_messages()\n )\n return [\n message for message in messages if getattr(message, \"id\", None) != getattr(self.input_value, \"id\", None)\n ]\n\n async def get_llm(self):\n if not isinstance(self.agent_llm, str):\n return self.agent_llm, None\n\n try:\n provider_info = MODEL_PROVIDERS_DICT.get(self.agent_llm)\n if not provider_info:\n msg = f\"Invalid model provider: {self.agent_llm}\"\n raise ValueError(msg)\n\n component_class = provider_info.get(\"component_class\")\n display_name = component_class.display_name\n inputs = provider_info.get(\"inputs\")\n prefix = provider_info.get(\"prefix\", \"\")\n\n return self._build_llm_model(component_class, inputs, prefix), display_name\n\n except (AttributeError, ValueError, TypeError, RuntimeError) as e:\n await logger.aerror(f\"Error building {self.agent_llm} language model: {e!s}\")\n msg = f\"Failed to initialize language model: {e!s}\"\n raise ValueError(msg) from e\n\n def _build_llm_model(self, component, inputs, prefix=\"\"):\n model_kwargs = {}\n for input_ in inputs:\n if hasattr(self, f\"{prefix}{input_.name}\"):\n model_kwargs[input_.name] = getattr(self, f\"{prefix}{input_.name}\")\n return component.set(**model_kwargs).build_model()\n\n def set_component_params(self, component):\n provider_info = MODEL_PROVIDERS_DICT.get(self.agent_llm)\n if provider_info:\n inputs = provider_info.get(\"inputs\")\n prefix = provider_info.get(\"prefix\")\n # Filter out json_mode and only use attributes that exist on this component\n model_kwargs = {}\n for input_ in inputs:\n if hasattr(self, f\"{prefix}{input_.name}\"):\n model_kwargs[input_.name] = getattr(self, f\"{prefix}{input_.name}\")\n\n return component.set(**model_kwargs)\n return component\n\n def delete_fields(self, build_config: dotdict, fields: dict | list[str]) -> None:\n \"\"\"Delete specified fields from build_config.\"\"\"\n for field in fields:\n build_config.pop(field, None)\n\n def update_input_types(self, build_config: dotdict) -> dotdict:\n \"\"\"Update input types for all fields in build_config.\"\"\"\n for key, value in build_config.items():\n if isinstance(value, dict):\n if value.get(\"input_types\") is None:\n build_config[key][\"input_types\"] = []\n elif hasattr(value, \"input_types\") and value.input_types is None:\n value.input_types = []\n return build_config\n\n async def update_build_config(\n self, build_config: dotdict, field_value: str, field_name: str | None = None\n ) -> dotdict:\n # Iterate over all providers in the MODEL_PROVIDERS_DICT\n # Existing logic for updating build_config\n if field_name in (\"agent_llm\",):\n build_config[\"agent_llm\"][\"value\"] = field_value\n provider_info = MODEL_PROVIDERS_DICT.get(field_value)\n if provider_info:\n component_class = provider_info.get(\"component_class\")\n if component_class and hasattr(component_class, \"update_build_config\"):\n # Call the component class's update_build_config method\n build_config = await update_component_build_config(\n component_class, build_config, field_value, \"model_name\"\n )\n\n provider_configs: dict[str, tuple[dict, list[dict]]] = {\n provider: (\n MODEL_PROVIDERS_DICT[provider][\"fields\"],\n [\n MODEL_PROVIDERS_DICT[other_provider][\"fields\"]\n for other_provider in MODEL_PROVIDERS_DICT\n if other_provider != provider\n ],\n )\n for provider in MODEL_PROVIDERS_DICT\n }\n if field_value in provider_configs:\n fields_to_add, fields_to_delete = provider_configs[field_value]\n\n # Delete fields from other providers\n for fields in fields_to_delete:\n self.delete_fields(build_config, fields)\n\n # Add provider-specific fields\n if field_value == \"OpenAI\" and not any(field in build_config for field in fields_to_add):\n build_config.update(fields_to_add)\n else:\n build_config.update(fields_to_add)\n # Reset input types for agent_llm\n build_config[\"agent_llm\"][\"input_types\"] = []\n elif field_value == \"Custom\":\n # Delete all provider fields\n self.delete_fields(build_config, ALL_PROVIDER_FIELDS)\n # Update with custom component\n custom_component = DropdownInput(\n name=\"agent_llm\",\n display_name=\"Language Model\",\n options=[*sorted(MODEL_PROVIDERS), \"Custom\"],\n value=\"Custom\",\n real_time_refresh=True,\n input_types=[\"LanguageModel\"],\n options_metadata=[MODELS_METADATA[key] for key in sorted(MODELS_METADATA.keys())]\n + [{\"icon\": \"brain\"}],\n )\n build_config.update({\"agent_llm\": custom_component.to_dict()})\n # Update input types for all fields\n build_config = self.update_input_types(build_config)\n\n # Validate required keys\n default_keys = [\n \"code\",\n \"_type\",\n \"agent_llm\",\n \"tools\",\n \"input_value\",\n \"add_current_date_tool\",\n \"system_prompt\",\n \"agent_description\",\n \"max_iterations\",\n \"handle_parsing_errors\",\n \"verbose\",\n ]\n missing_keys = [key for key in default_keys if key not in build_config]\n if missing_keys:\n msg = f\"Missing required keys in build_config: {missing_keys}\"\n raise ValueError(msg)\n if (\n isinstance(self.agent_llm, str)\n and self.agent_llm in MODEL_PROVIDERS_DICT\n and field_name in MODEL_DYNAMIC_UPDATE_FIELDS\n ):\n provider_info = MODEL_PROVIDERS_DICT.get(self.agent_llm)\n if provider_info:\n component_class = provider_info.get(\"component_class\")\n component_class = self.set_component_params(component_class)\n prefix = provider_info.get(\"prefix\")\n if component_class and hasattr(component_class, \"update_build_config\"):\n # Call each component class's update_build_config method\n # remove the prefix from the field_name\n if isinstance(field_name, str) and isinstance(prefix, str):\n field_name = field_name.replace(prefix, \"\")\n build_config = await update_component_build_config(\n component_class, build_config, field_value, \"model_name\"\n )\n return dotdict({k: v.to_dict() if hasattr(v, \"to_dict\") else v for k, v in build_config.items()})\n\n async def _get_tools(self) -> list[Tool]:\n component_toolkit = _get_component_toolkit()\n tools_names = self._build_tools_names()\n agent_description = self.get_tool_description()\n # TODO: Agent Description Depreciated Feature to be removed\n description = f\"{agent_description}{tools_names}\"\n tools = component_toolkit(component=self).get_tools(\n tool_name=\"Call_Agent\", tool_description=description, callbacks=self.get_langchain_callbacks()\n )\n if hasattr(self, \"tools_metadata\"):\n tools = component_toolkit(component=self, metadata=self.tools_metadata).update_tools_metadata(tools=tools)\n return tools\n"
+ "value": "import json\nimport re\n\nfrom langchain_core.tools import StructuredTool\nfrom pydantic import ValidationError\n\nfrom langflow.base.agents.agent import LCToolsAgentComponent\nfrom langflow.base.agents.events import ExceptionWithMessageError\nfrom langflow.base.models.model_input_constants import (\n ALL_PROVIDER_FIELDS,\n MODEL_DYNAMIC_UPDATE_FIELDS,\n MODEL_PROVIDERS,\n MODEL_PROVIDERS_DICT,\n MODELS_METADATA,\n)\nfrom langflow.base.models.model_utils import get_model_name\nfrom langflow.components.helpers.current_date import CurrentDateComponent\nfrom langflow.components.helpers.memory import MemoryComponent\nfrom langflow.components.langchain_utilities.tool_calling import ToolCallingAgentComponent\nfrom langflow.custom.custom_component.component import _get_component_toolkit\nfrom langflow.custom.utils import update_component_build_config\nfrom langflow.field_typing import Tool\nfrom langflow.helpers.base_model import build_model_from_schema\nfrom langflow.io import BoolInput, DropdownInput, IntInput, MultilineInput, Output, TableInput\nfrom langflow.logging import logger\nfrom langflow.schema.data import Data\nfrom langflow.schema.dotdict import dotdict\nfrom langflow.schema.message import Message\nfrom langflow.schema.table import EditMode\n\n\ndef set_advanced_true(component_input):\n component_input.advanced = True\n return component_input\n\n\nMODEL_PROVIDERS_LIST = [\"Anthropic\", \"Google Generative AI\", \"Groq\", \"OpenAI\"]\n\n\nclass AgentComponent(ToolCallingAgentComponent):\n display_name: str = \"Agent\"\n description: str = \"Define the agent's instructions, then enter a task to complete using tools.\"\n documentation: str = \"https://docs.langflow.org/agents\"\n icon = \"bot\"\n beta = False\n name = \"Agent\"\n\n memory_inputs = [set_advanced_true(component_input) for component_input in MemoryComponent().inputs]\n\n # Filter out json_mode from OpenAI inputs since we handle structured output differently\n openai_inputs_filtered = [\n input_field\n for input_field in MODEL_PROVIDERS_DICT[\"OpenAI\"][\"inputs\"]\n if not (hasattr(input_field, \"name\") and input_field.name == \"json_mode\")\n ]\n\n inputs = [\n DropdownInput(\n name=\"agent_llm\",\n display_name=\"Model Provider\",\n info=\"The provider of the language model that the agent will use to generate responses.\",\n options=[*MODEL_PROVIDERS_LIST, \"Custom\"],\n value=\"OpenAI\",\n real_time_refresh=True,\n input_types=[],\n options_metadata=[MODELS_METADATA[key] for key in MODEL_PROVIDERS_LIST] + [{\"icon\": \"brain\"}],\n ),\n *openai_inputs_filtered,\n MultilineInput(\n name=\"system_prompt\",\n display_name=\"Agent Instructions\",\n info=\"System Prompt: Initial instructions and context provided to guide the agent's behavior.\",\n value=\"You are a helpful assistant that can use tools to answer questions and perform tasks.\",\n advanced=False,\n ),\n IntInput(\n name=\"n_messages\",\n display_name=\"Number of Chat History Messages\",\n value=100,\n info=\"Number of chat history messages to retrieve.\",\n advanced=True,\n show=True,\n ),\n MultilineInput(\n name=\"format_instructions\",\n display_name=\"Output Format Instructions\",\n info=\"Generic Template for structured output formatting. Valid only with Structured response.\",\n value=(\n \"You are an AI that extracts structured JSON objects from unstructured text. \"\n \"Use a predefined schema with expected types (str, int, float, bool, dict). \"\n \"Extract ALL relevant instances that match the schema - if multiple patterns exist, capture them all. \"\n \"Fill missing or ambiguous values with defaults: null for missing values. \"\n \"Remove exact duplicates but keep variations that have different field values. \"\n \"Always return valid JSON in the expected format, never throw errors. \"\n \"If multiple objects can be extracted, return them all in the structured format.\"\n ),\n advanced=True,\n ),\n TableInput(\n name=\"output_schema\",\n display_name=\"Output Schema\",\n info=(\n \"Schema Validation: Define the structure and data types for structured output. \"\n \"No validation if no output schema.\"\n ),\n advanced=True,\n required=False,\n value=[],\n table_schema=[\n {\n \"name\": \"name\",\n \"display_name\": \"Name\",\n \"type\": \"str\",\n \"description\": \"Specify the name of the output field.\",\n \"default\": \"field\",\n \"edit_mode\": EditMode.INLINE,\n },\n {\n \"name\": \"description\",\n \"display_name\": \"Description\",\n \"type\": \"str\",\n \"description\": \"Describe the purpose of the output field.\",\n \"default\": \"description of field\",\n \"edit_mode\": EditMode.POPOVER,\n },\n {\n \"name\": \"type\",\n \"display_name\": \"Type\",\n \"type\": \"str\",\n \"edit_mode\": EditMode.INLINE,\n \"description\": (\"Indicate the data type of the output field (e.g., str, int, float, bool, dict).\"),\n \"options\": [\"str\", \"int\", \"float\", \"bool\", \"dict\"],\n \"default\": \"str\",\n },\n {\n \"name\": \"multiple\",\n \"display_name\": \"As List\",\n \"type\": \"boolean\",\n \"description\": \"Set to True if this output field should be a list of the specified type.\",\n \"default\": \"False\",\n \"edit_mode\": EditMode.INLINE,\n },\n ],\n ),\n *LCToolsAgentComponent._base_inputs,\n # removed memory inputs from agent component\n # *memory_inputs,\n BoolInput(\n name=\"add_current_date_tool\",\n display_name=\"Current Date\",\n advanced=True,\n info=\"If true, will add a tool to the agent that returns the current date.\",\n value=True,\n ),\n ]\n outputs = [\n Output(name=\"response\", display_name=\"Response\", method=\"message_response\"),\n Output(name=\"structured_response\", display_name=\"Structured Response\", method=\"json_response\", tool_mode=False),\n ]\n\n async def get_agent_requirements(self):\n \"\"\"Get the agent requirements for the agent.\"\"\"\n llm_model, display_name = await self.get_llm()\n if llm_model is None:\n msg = \"No language model selected. Please choose a model to proceed.\"\n raise ValueError(msg)\n self.model_name = get_model_name(llm_model, display_name=display_name)\n\n # Get memory data\n self.chat_history = await self.get_memory_data()\n if isinstance(self.chat_history, Message):\n self.chat_history = [self.chat_history]\n\n # Add current date tool if enabled\n if self.add_current_date_tool:\n if not isinstance(self.tools, list): # type: ignore[has-type]\n self.tools = []\n current_date_tool = (await CurrentDateComponent(**self.get_base_args()).to_toolkit()).pop(0)\n if not isinstance(current_date_tool, StructuredTool):\n msg = \"CurrentDateComponent must be converted to a StructuredTool\"\n raise TypeError(msg)\n self.tools.append(current_date_tool)\n return llm_model, self.chat_history, self.tools\n\n async def message_response(self) -> Message:\n try:\n llm_model, self.chat_history, self.tools = await self.get_agent_requirements()\n # Set up and run agent\n self.set(\n llm=llm_model,\n tools=self.tools or [],\n chat_history=self.chat_history,\n input_value=self.input_value,\n system_prompt=self.system_prompt,\n )\n agent = self.create_agent_runnable()\n result = await self.run_agent(agent)\n\n # Store result for potential JSON output\n self._agent_result = result\n\n except (ValueError, TypeError, KeyError) as e:\n await logger.aerror(f\"{type(e).__name__}: {e!s}\")\n raise\n except ExceptionWithMessageError as e:\n await logger.aerror(f\"ExceptionWithMessageError occurred: {e}\")\n raise\n # Avoid catching blind Exception; let truly unexpected exceptions propagate\n except Exception as e:\n await logger.aerror(f\"Unexpected error: {e!s}\")\n raise\n else:\n return result\n\n def _preprocess_schema(self, schema):\n \"\"\"Preprocess schema to ensure correct data types for build_model_from_schema.\"\"\"\n processed_schema = []\n for field in schema:\n processed_field = {\n \"name\": str(field.get(\"name\", \"field\")),\n \"type\": str(field.get(\"type\", \"str\")),\n \"description\": str(field.get(\"description\", \"\")),\n \"multiple\": field.get(\"multiple\", False),\n }\n # Ensure multiple is handled correctly\n if isinstance(processed_field[\"multiple\"], str):\n processed_field[\"multiple\"] = processed_field[\"multiple\"].lower() in [\"true\", \"1\", \"t\", \"y\", \"yes\"]\n processed_schema.append(processed_field)\n return processed_schema\n\n async def build_structured_output_base(self, content: str):\n \"\"\"Build structured output with optional BaseModel validation.\"\"\"\n json_pattern = r\"\\{.*\\}\"\n schema_error_msg = \"Try setting an output schema\"\n\n # Try to parse content as JSON first\n json_data = None\n try:\n json_data = json.loads(content)\n except json.JSONDecodeError:\n json_match = re.search(json_pattern, content, re.DOTALL)\n if json_match:\n try:\n json_data = json.loads(json_match.group())\n except json.JSONDecodeError:\n return {\"content\": content, \"error\": schema_error_msg}\n else:\n return {\"content\": content, \"error\": schema_error_msg}\n\n # If no output schema provided, return parsed JSON without validation\n if not hasattr(self, \"output_schema\") or not self.output_schema or len(self.output_schema) == 0:\n return json_data\n\n # Use BaseModel validation with schema\n try:\n processed_schema = self._preprocess_schema(self.output_schema)\n output_model = build_model_from_schema(processed_schema)\n\n # Validate against the schema\n if isinstance(json_data, list):\n # Multiple objects\n validated_objects = []\n for item in json_data:\n try:\n validated_obj = output_model.model_validate(item)\n validated_objects.append(validated_obj.model_dump())\n except ValidationError as e:\n await logger.aerror(f\"Validation error for item: {e}\")\n # Include invalid items with error info\n validated_objects.append({\"data\": item, \"validation_error\": str(e)})\n return validated_objects\n\n # Single object\n try:\n validated_obj = output_model.model_validate(json_data)\n return [validated_obj.model_dump()] # Return as list for consistency\n except ValidationError as e:\n await logger.aerror(f\"Validation error: {e}\")\n return [{\"data\": json_data, \"validation_error\": str(e)}]\n\n except (TypeError, ValueError) as e:\n await logger.aerror(f\"Error building structured output: {e}\")\n # Fallback to parsed JSON without validation\n return json_data\n\n async def json_response(self) -> Data:\n \"\"\"Convert agent response to structured JSON Data output with schema validation.\"\"\"\n # Always use structured chat agent for JSON response mode for better JSON formatting\n try:\n system_components = []\n\n # 1. Agent Instructions (system_prompt)\n agent_instructions = getattr(self, \"system_prompt\", \"\") or \"\"\n if agent_instructions:\n system_components.append(f\"{agent_instructions}\")\n\n # 2. Format Instructions\n format_instructions = getattr(self, \"format_instructions\", \"\") or \"\"\n if format_instructions:\n system_components.append(f\"Format instructions: {format_instructions}\")\n\n # 3. Schema Information from BaseModel\n if hasattr(self, \"output_schema\") and self.output_schema and len(self.output_schema) > 0:\n try:\n processed_schema = self._preprocess_schema(self.output_schema)\n output_model = build_model_from_schema(processed_schema)\n schema_dict = output_model.model_json_schema()\n schema_info = (\n \"You are given some text that may include format instructions, \"\n \"explanations, or other content alongside a JSON schema.\\n\\n\"\n \"Your task:\\n\"\n \"- Extract only the JSON schema.\\n\"\n \"- Return it as valid JSON.\\n\"\n \"- Do not include format instructions, explanations, or extra text.\\n\\n\"\n \"Input:\\n\"\n f\"{json.dumps(schema_dict, indent=2)}\\n\\n\"\n \"Output (only JSON schema):\"\n )\n system_components.append(schema_info)\n except (ValidationError, ValueError, TypeError, KeyError) as e:\n await logger.aerror(f\"Could not build schema for prompt: {e}\", exc_info=True)\n\n # Combine all components\n combined_instructions = \"\\n\\n\".join(system_components) if system_components else \"\"\n llm_model, self.chat_history, self.tools = await self.get_agent_requirements()\n self.set(\n llm=llm_model,\n tools=self.tools or [],\n chat_history=self.chat_history,\n input_value=self.input_value,\n system_prompt=combined_instructions,\n )\n\n # Create and run structured chat agent\n try:\n structured_agent = self.create_agent_runnable()\n except (NotImplementedError, ValueError, TypeError) as e:\n await logger.aerror(f\"Error with structured chat agent: {e}\")\n raise\n try:\n result = await self.run_agent(structured_agent)\n except (ExceptionWithMessageError, ValueError, TypeError, RuntimeError) as e:\n await logger.aerror(f\"Error with structured agent result: {e}\")\n raise\n # Extract content from structured agent result\n if hasattr(result, \"content\"):\n content = result.content\n elif hasattr(result, \"text\"):\n content = result.text\n else:\n content = str(result)\n\n except (ExceptionWithMessageError, ValueError, TypeError, NotImplementedError, AttributeError) as e:\n await logger.aerror(f\"Error with structured chat agent: {e}\")\n # Fallback to regular agent\n content_str = \"No content returned from agent\"\n return Data(data={\"content\": content_str, \"error\": str(e)})\n\n # Process with structured output validation\n try:\n structured_output = await self.build_structured_output_base(content)\n\n # Handle different output formats\n if isinstance(structured_output, list) and structured_output:\n if len(structured_output) == 1:\n return Data(data=structured_output[0])\n return Data(data={\"results\": structured_output})\n if isinstance(structured_output, dict):\n return Data(data=structured_output)\n return Data(data={\"content\": content})\n\n except (ValueError, TypeError) as e:\n await logger.aerror(f\"Error in structured output processing: {e}\")\n return Data(data={\"content\": content, \"error\": str(e)})\n\n async def get_memory_data(self):\n # TODO: This is a temporary fix to avoid message duplication. We should develop a function for this.\n messages = (\n await MemoryComponent(**self.get_base_args())\n .set(session_id=self.graph.session_id, order=\"Ascending\", n_messages=self.n_messages)\n .retrieve_messages()\n )\n return [\n message for message in messages if getattr(message, \"id\", None) != getattr(self.input_value, \"id\", None)\n ]\n\n async def get_llm(self):\n if not isinstance(self.agent_llm, str):\n return self.agent_llm, None\n\n try:\n provider_info = MODEL_PROVIDERS_DICT.get(self.agent_llm)\n if not provider_info:\n msg = f\"Invalid model provider: {self.agent_llm}\"\n raise ValueError(msg)\n\n component_class = provider_info.get(\"component_class\")\n display_name = component_class.display_name\n inputs = provider_info.get(\"inputs\")\n prefix = provider_info.get(\"prefix\", \"\")\n\n return self._build_llm_model(component_class, inputs, prefix), display_name\n\n except (AttributeError, ValueError, TypeError, RuntimeError) as e:\n await logger.aerror(f\"Error building {self.agent_llm} language model: {e!s}\")\n msg = f\"Failed to initialize language model: {e!s}\"\n raise ValueError(msg) from e\n\n def _build_llm_model(self, component, inputs, prefix=\"\"):\n model_kwargs = {}\n for input_ in inputs:\n if hasattr(self, f\"{prefix}{input_.name}\"):\n model_kwargs[input_.name] = getattr(self, f\"{prefix}{input_.name}\")\n return component.set(**model_kwargs).build_model()\n\n def set_component_params(self, component):\n provider_info = MODEL_PROVIDERS_DICT.get(self.agent_llm)\n if provider_info:\n inputs = provider_info.get(\"inputs\")\n prefix = provider_info.get(\"prefix\")\n # Filter out json_mode and only use attributes that exist on this component\n model_kwargs = {}\n for input_ in inputs:\n if hasattr(self, f\"{prefix}{input_.name}\"):\n model_kwargs[input_.name] = getattr(self, f\"{prefix}{input_.name}\")\n\n return component.set(**model_kwargs)\n return component\n\n def delete_fields(self, build_config: dotdict, fields: dict | list[str]) -> None:\n \"\"\"Delete specified fields from build_config.\"\"\"\n for field in fields:\n build_config.pop(field, None)\n\n def update_input_types(self, build_config: dotdict) -> dotdict:\n \"\"\"Update input types for all fields in build_config.\"\"\"\n for key, value in build_config.items():\n if isinstance(value, dict):\n if value.get(\"input_types\") is None:\n build_config[key][\"input_types\"] = []\n elif hasattr(value, \"input_types\") and value.input_types is None:\n value.input_types = []\n return build_config\n\n async def update_build_config(\n self, build_config: dotdict, field_value: str, field_name: str | None = None\n ) -> dotdict:\n # Iterate over all providers in the MODEL_PROVIDERS_DICT\n # Existing logic for updating build_config\n if field_name in (\"agent_llm\",):\n build_config[\"agent_llm\"][\"value\"] = field_value\n provider_info = MODEL_PROVIDERS_DICT.get(field_value)\n if provider_info:\n component_class = provider_info.get(\"component_class\")\n if component_class and hasattr(component_class, \"update_build_config\"):\n # Call the component class's update_build_config method\n build_config = await update_component_build_config(\n component_class, build_config, field_value, \"model_name\"\n )\n\n provider_configs: dict[str, tuple[dict, list[dict]]] = {\n provider: (\n MODEL_PROVIDERS_DICT[provider][\"fields\"],\n [\n MODEL_PROVIDERS_DICT[other_provider][\"fields\"]\n for other_provider in MODEL_PROVIDERS_DICT\n if other_provider != provider\n ],\n )\n for provider in MODEL_PROVIDERS_DICT\n }\n if field_value in provider_configs:\n fields_to_add, fields_to_delete = provider_configs[field_value]\n\n # Delete fields from other providers\n for fields in fields_to_delete:\n self.delete_fields(build_config, fields)\n\n # Add provider-specific fields\n if field_value == \"OpenAI\" and not any(field in build_config for field in fields_to_add):\n build_config.update(fields_to_add)\n else:\n build_config.update(fields_to_add)\n # Reset input types for agent_llm\n build_config[\"agent_llm\"][\"input_types\"] = []\n build_config[\"agent_llm\"][\"display_name\"] = \"Model Provider\"\n elif field_value == \"Custom\":\n # Delete all provider fields\n self.delete_fields(build_config, ALL_PROVIDER_FIELDS)\n # Update with custom component\n custom_component = DropdownInput(\n name=\"agent_llm\",\n display_name=\"Language Model\",\n options=[*sorted(MODEL_PROVIDERS), \"Custom\"],\n value=\"Custom\",\n real_time_refresh=True,\n input_types=[\"LanguageModel\"],\n options_metadata=[MODELS_METADATA[key] for key in sorted(MODELS_METADATA.keys())]\n + [{\"icon\": \"brain\"}],\n )\n build_config.update({\"agent_llm\": custom_component.to_dict()})\n # Update input types for all fields\n build_config = self.update_input_types(build_config)\n\n # Validate required keys\n default_keys = [\n \"code\",\n \"_type\",\n \"agent_llm\",\n \"tools\",\n \"input_value\",\n \"add_current_date_tool\",\n \"system_prompt\",\n \"agent_description\",\n \"max_iterations\",\n \"handle_parsing_errors\",\n \"verbose\",\n ]\n missing_keys = [key for key in default_keys if key not in build_config]\n if missing_keys:\n msg = f\"Missing required keys in build_config: {missing_keys}\"\n raise ValueError(msg)\n if (\n isinstance(self.agent_llm, str)\n and self.agent_llm in MODEL_PROVIDERS_DICT\n and field_name in MODEL_DYNAMIC_UPDATE_FIELDS\n ):\n provider_info = MODEL_PROVIDERS_DICT.get(self.agent_llm)\n if provider_info:\n component_class = provider_info.get(\"component_class\")\n component_class = self.set_component_params(component_class)\n prefix = provider_info.get(\"prefix\")\n if component_class and hasattr(component_class, \"update_build_config\"):\n # Call each component class's update_build_config method\n # remove the prefix from the field_name\n if isinstance(field_name, str) and isinstance(prefix, str):\n field_name = field_name.replace(prefix, \"\")\n build_config = await update_component_build_config(\n component_class, build_config, field_value, \"model_name\"\n )\n return dotdict({k: v.to_dict() if hasattr(v, \"to_dict\") else v for k, v in build_config.items()})\n\n async def _get_tools(self) -> list[Tool]:\n component_toolkit = _get_component_toolkit()\n tools_names = self._build_tools_names()\n agent_description = self.get_tool_description()\n # TODO: Agent Description Depreciated Feature to be removed\n description = f\"{agent_description}{tools_names}\"\n tools = component_toolkit(component=self).get_tools(\n tool_name=\"Call_Agent\", tool_description=description, callbacks=self.get_langchain_callbacks()\n )\n if hasattr(self, \"tools_metadata\"):\n tools = component_toolkit(component=self, metadata=self.tools_metadata).update_tools_metadata(tools=tools)\n return tools\n"
},
"handle_parsing_errors": {
"_input_type": "BoolInput",
diff --git a/src/backend/base/langflow/initial_setup/starter_projects/Search agent.json b/src/backend/base/langflow/initial_setup/starter_projects/Search agent.json
index 276a2c0a2..63894fc60 100644
--- a/src/backend/base/langflow/initial_setup/starter_projects/Search agent.json
+++ b/src/backend/base/langflow/initial_setup/starter_projects/Search agent.json
@@ -1180,7 +1180,7 @@
"show": true,
"title_case": false,
"type": "code",
- "value": "import json\nimport re\n\nfrom langchain_core.tools import StructuredTool\nfrom pydantic import ValidationError\n\nfrom langflow.base.agents.agent import LCToolsAgentComponent\nfrom langflow.base.agents.events import ExceptionWithMessageError\nfrom langflow.base.models.model_input_constants import (\n ALL_PROVIDER_FIELDS,\n MODEL_DYNAMIC_UPDATE_FIELDS,\n MODEL_PROVIDERS,\n MODEL_PROVIDERS_DICT,\n MODELS_METADATA,\n)\nfrom langflow.base.models.model_utils import get_model_name\nfrom langflow.components.helpers.current_date import CurrentDateComponent\nfrom langflow.components.helpers.memory import MemoryComponent\nfrom langflow.components.langchain_utilities.tool_calling import ToolCallingAgentComponent\nfrom langflow.custom.custom_component.component import _get_component_toolkit\nfrom langflow.custom.utils import update_component_build_config\nfrom langflow.field_typing import Tool\nfrom langflow.helpers.base_model import build_model_from_schema\nfrom langflow.io import BoolInput, DropdownInput, IntInput, MultilineInput, Output, TableInput\nfrom langflow.logging import logger\nfrom langflow.schema.data import Data\nfrom langflow.schema.dotdict import dotdict\nfrom langflow.schema.message import Message\nfrom langflow.schema.table import EditMode\n\n\ndef set_advanced_true(component_input):\n component_input.advanced = True\n return component_input\n\n\nMODEL_PROVIDERS_LIST = [\"Anthropic\", \"Google Generative AI\", \"Groq\", \"OpenAI\"]\n\n\nclass AgentComponent(ToolCallingAgentComponent):\n display_name: str = \"Agent\"\n description: str = \"Define the agent's instructions, then enter a task to complete using tools.\"\n documentation: str = \"https://docs.langflow.org/agents\"\n icon = \"bot\"\n beta = False\n name = \"Agent\"\n\n memory_inputs = [set_advanced_true(component_input) for component_input in MemoryComponent().inputs]\n\n # Filter out json_mode from OpenAI inputs since we handle structured output differently\n openai_inputs_filtered = [\n input_field\n for input_field in MODEL_PROVIDERS_DICT[\"OpenAI\"][\"inputs\"]\n if not (hasattr(input_field, \"name\") and input_field.name == \"json_mode\")\n ]\n\n inputs = [\n DropdownInput(\n name=\"agent_llm\",\n display_name=\"Model Provider\",\n info=\"The provider of the language model that the agent will use to generate responses.\",\n options=[*MODEL_PROVIDERS_LIST, \"Custom\"],\n value=\"OpenAI\",\n real_time_refresh=True,\n input_types=[],\n options_metadata=[MODELS_METADATA[key] for key in MODEL_PROVIDERS_LIST] + [{\"icon\": \"brain\"}],\n ),\n *openai_inputs_filtered,\n MultilineInput(\n name=\"system_prompt\",\n display_name=\"Agent Instructions\",\n info=\"System Prompt: Initial instructions and context provided to guide the agent's behavior.\",\n value=\"You are a helpful assistant that can use tools to answer questions and perform tasks.\",\n advanced=False,\n ),\n IntInput(\n name=\"n_messages\",\n display_name=\"Number of Chat History Messages\",\n value=100,\n info=\"Number of chat history messages to retrieve.\",\n advanced=True,\n show=True,\n ),\n MultilineInput(\n name=\"format_instructions\",\n display_name=\"Output Format Instructions\",\n info=\"Generic Template for structured output formatting. Valid only with Structured response.\",\n value=(\n \"You are an AI that extracts structured JSON objects from unstructured text. \"\n \"Use a predefined schema with expected types (str, int, float, bool, dict). \"\n \"Extract ALL relevant instances that match the schema - if multiple patterns exist, capture them all. \"\n \"Fill missing or ambiguous values with defaults: null for missing values. \"\n \"Remove exact duplicates but keep variations that have different field values. \"\n \"Always return valid JSON in the expected format, never throw errors. \"\n \"If multiple objects can be extracted, return them all in the structured format.\"\n ),\n advanced=True,\n ),\n TableInput(\n name=\"output_schema\",\n display_name=\"Output Schema\",\n info=(\n \"Schema Validation: Define the structure and data types for structured output. \"\n \"No validation if no output schema.\"\n ),\n advanced=True,\n required=False,\n value=[],\n table_schema=[\n {\n \"name\": \"name\",\n \"display_name\": \"Name\",\n \"type\": \"str\",\n \"description\": \"Specify the name of the output field.\",\n \"default\": \"field\",\n \"edit_mode\": EditMode.INLINE,\n },\n {\n \"name\": \"description\",\n \"display_name\": \"Description\",\n \"type\": \"str\",\n \"description\": \"Describe the purpose of the output field.\",\n \"default\": \"description of field\",\n \"edit_mode\": EditMode.POPOVER,\n },\n {\n \"name\": \"type\",\n \"display_name\": \"Type\",\n \"type\": \"str\",\n \"edit_mode\": EditMode.INLINE,\n \"description\": (\"Indicate the data type of the output field (e.g., str, int, float, bool, dict).\"),\n \"options\": [\"str\", \"int\", \"float\", \"bool\", \"dict\"],\n \"default\": \"str\",\n },\n {\n \"name\": \"multiple\",\n \"display_name\": \"As List\",\n \"type\": \"boolean\",\n \"description\": \"Set to True if this output field should be a list of the specified type.\",\n \"default\": \"False\",\n \"edit_mode\": EditMode.INLINE,\n },\n ],\n ),\n *LCToolsAgentComponent._base_inputs,\n # removed memory inputs from agent component\n # *memory_inputs,\n BoolInput(\n name=\"add_current_date_tool\",\n display_name=\"Current Date\",\n advanced=True,\n info=\"If true, will add a tool to the agent that returns the current date.\",\n value=True,\n ),\n ]\n outputs = [\n Output(name=\"response\", display_name=\"Response\", method=\"message_response\"),\n Output(name=\"structured_response\", display_name=\"Structured Response\", method=\"json_response\", tool_mode=False),\n ]\n\n async def get_agent_requirements(self):\n \"\"\"Get the agent requirements for the agent.\"\"\"\n llm_model, display_name = await self.get_llm()\n if llm_model is None:\n msg = \"No language model selected. Please choose a model to proceed.\"\n raise ValueError(msg)\n self.model_name = get_model_name(llm_model, display_name=display_name)\n\n # Get memory data\n self.chat_history = await self.get_memory_data()\n if isinstance(self.chat_history, Message):\n self.chat_history = [self.chat_history]\n\n # Add current date tool if enabled\n if self.add_current_date_tool:\n if not isinstance(self.tools, list): # type: ignore[has-type]\n self.tools = []\n current_date_tool = (await CurrentDateComponent(**self.get_base_args()).to_toolkit()).pop(0)\n if not isinstance(current_date_tool, StructuredTool):\n msg = \"CurrentDateComponent must be converted to a StructuredTool\"\n raise TypeError(msg)\n self.tools.append(current_date_tool)\n return llm_model, self.chat_history, self.tools\n\n async def message_response(self) -> Message:\n try:\n llm_model, self.chat_history, self.tools = await self.get_agent_requirements()\n # Set up and run agent\n self.set(\n llm=llm_model,\n tools=self.tools or [],\n chat_history=self.chat_history,\n input_value=self.input_value,\n system_prompt=self.system_prompt,\n )\n agent = self.create_agent_runnable()\n result = await self.run_agent(agent)\n\n # Store result for potential JSON output\n self._agent_result = result\n\n except (ValueError, TypeError, KeyError) as e:\n await logger.aerror(f\"{type(e).__name__}: {e!s}\")\n raise\n except ExceptionWithMessageError as e:\n await logger.aerror(f\"ExceptionWithMessageError occurred: {e}\")\n raise\n # Avoid catching blind Exception; let truly unexpected exceptions propagate\n except Exception as e:\n await logger.aerror(f\"Unexpected error: {e!s}\")\n raise\n else:\n return result\n\n def _preprocess_schema(self, schema):\n \"\"\"Preprocess schema to ensure correct data types for build_model_from_schema.\"\"\"\n processed_schema = []\n for field in schema:\n processed_field = {\n \"name\": str(field.get(\"name\", \"field\")),\n \"type\": str(field.get(\"type\", \"str\")),\n \"description\": str(field.get(\"description\", \"\")),\n \"multiple\": field.get(\"multiple\", False),\n }\n # Ensure multiple is handled correctly\n if isinstance(processed_field[\"multiple\"], str):\n processed_field[\"multiple\"] = processed_field[\"multiple\"].lower() in [\"true\", \"1\", \"t\", \"y\", \"yes\"]\n processed_schema.append(processed_field)\n return processed_schema\n\n async def build_structured_output_base(self, content: str):\n \"\"\"Build structured output with optional BaseModel validation.\"\"\"\n json_pattern = r\"\\{.*\\}\"\n schema_error_msg = \"Try setting an output schema\"\n\n # Try to parse content as JSON first\n json_data = None\n try:\n json_data = json.loads(content)\n except json.JSONDecodeError:\n json_match = re.search(json_pattern, content, re.DOTALL)\n if json_match:\n try:\n json_data = json.loads(json_match.group())\n except json.JSONDecodeError:\n return {\"content\": content, \"error\": schema_error_msg}\n else:\n return {\"content\": content, \"error\": schema_error_msg}\n\n # If no output schema provided, return parsed JSON without validation\n if not hasattr(self, \"output_schema\") or not self.output_schema or len(self.output_schema) == 0:\n return json_data\n\n # Use BaseModel validation with schema\n try:\n processed_schema = self._preprocess_schema(self.output_schema)\n output_model = build_model_from_schema(processed_schema)\n\n # Validate against the schema\n if isinstance(json_data, list):\n # Multiple objects\n validated_objects = []\n for item in json_data:\n try:\n validated_obj = output_model.model_validate(item)\n validated_objects.append(validated_obj.model_dump())\n except ValidationError as e:\n await logger.aerror(f\"Validation error for item: {e}\")\n # Include invalid items with error info\n validated_objects.append({\"data\": item, \"validation_error\": str(e)})\n return validated_objects\n\n # Single object\n try:\n validated_obj = output_model.model_validate(json_data)\n return [validated_obj.model_dump()] # Return as list for consistency\n except ValidationError as e:\n await logger.aerror(f\"Validation error: {e}\")\n return [{\"data\": json_data, \"validation_error\": str(e)}]\n\n except (TypeError, ValueError) as e:\n await logger.aerror(f\"Error building structured output: {e}\")\n # Fallback to parsed JSON without validation\n return json_data\n\n async def json_response(self) -> Data:\n \"\"\"Convert agent response to structured JSON Data output with schema validation.\"\"\"\n # Always use structured chat agent for JSON response mode for better JSON formatting\n try:\n system_components = []\n\n # 1. Agent Instructions (system_prompt)\n agent_instructions = getattr(self, \"system_prompt\", \"\") or \"\"\n if agent_instructions:\n system_components.append(f\"{agent_instructions}\")\n\n # 2. Format Instructions\n format_instructions = getattr(self, \"format_instructions\", \"\") or \"\"\n if format_instructions:\n system_components.append(f\"Format instructions: {format_instructions}\")\n\n # 3. Schema Information from BaseModel\n if hasattr(self, \"output_schema\") and self.output_schema and len(self.output_schema) > 0:\n try:\n processed_schema = self._preprocess_schema(self.output_schema)\n output_model = build_model_from_schema(processed_schema)\n schema_dict = output_model.model_json_schema()\n schema_info = (\n \"You are given some text that may include format instructions, \"\n \"explanations, or other content alongside a JSON schema.\\n\\n\"\n \"Your task:\\n\"\n \"- Extract only the JSON schema.\\n\"\n \"- Return it as valid JSON.\\n\"\n \"- Do not include format instructions, explanations, or extra text.\\n\\n\"\n \"Input:\\n\"\n f\"{json.dumps(schema_dict, indent=2)}\\n\\n\"\n \"Output (only JSON schema):\"\n )\n system_components.append(schema_info)\n except (ValidationError, ValueError, TypeError, KeyError) as e:\n await logger.aerror(f\"Could not build schema for prompt: {e}\", exc_info=True)\n\n # Combine all components\n combined_instructions = \"\\n\\n\".join(system_components) if system_components else \"\"\n llm_model, self.chat_history, self.tools = await self.get_agent_requirements()\n self.set(\n llm=llm_model,\n tools=self.tools or [],\n chat_history=self.chat_history,\n input_value=self.input_value,\n system_prompt=combined_instructions,\n )\n\n # Create and run structured chat agent\n try:\n structured_agent = self.create_agent_runnable()\n except (NotImplementedError, ValueError, TypeError) as e:\n await logger.aerror(f\"Error with structured chat agent: {e}\")\n raise\n try:\n result = await self.run_agent(structured_agent)\n except (ExceptionWithMessageError, ValueError, TypeError, RuntimeError) as e:\n await logger.aerror(f\"Error with structured agent result: {e}\")\n raise\n # Extract content from structured agent result\n if hasattr(result, \"content\"):\n content = result.content\n elif hasattr(result, \"text\"):\n content = result.text\n else:\n content = str(result)\n\n except (ExceptionWithMessageError, ValueError, TypeError, NotImplementedError, AttributeError) as e:\n await logger.aerror(f\"Error with structured chat agent: {e}\")\n # Fallback to regular agent\n content_str = \"No content returned from agent\"\n return Data(data={\"content\": content_str, \"error\": str(e)})\n\n # Process with structured output validation\n try:\n structured_output = await self.build_structured_output_base(content)\n\n # Handle different output formats\n if isinstance(structured_output, list) and structured_output:\n if len(structured_output) == 1:\n return Data(data=structured_output[0])\n return Data(data={\"results\": structured_output})\n if isinstance(structured_output, dict):\n return Data(data=structured_output)\n return Data(data={\"content\": content})\n\n except (ValueError, TypeError) as e:\n await logger.aerror(f\"Error in structured output processing: {e}\")\n return Data(data={\"content\": content, \"error\": str(e)})\n\n async def get_memory_data(self):\n # TODO: This is a temporary fix to avoid message duplication. We should develop a function for this.\n messages = (\n await MemoryComponent(**self.get_base_args())\n .set(session_id=self.graph.session_id, order=\"Ascending\", n_messages=self.n_messages)\n .retrieve_messages()\n )\n return [\n message for message in messages if getattr(message, \"id\", None) != getattr(self.input_value, \"id\", None)\n ]\n\n async def get_llm(self):\n if not isinstance(self.agent_llm, str):\n return self.agent_llm, None\n\n try:\n provider_info = MODEL_PROVIDERS_DICT.get(self.agent_llm)\n if not provider_info:\n msg = f\"Invalid model provider: {self.agent_llm}\"\n raise ValueError(msg)\n\n component_class = provider_info.get(\"component_class\")\n display_name = component_class.display_name\n inputs = provider_info.get(\"inputs\")\n prefix = provider_info.get(\"prefix\", \"\")\n\n return self._build_llm_model(component_class, inputs, prefix), display_name\n\n except (AttributeError, ValueError, TypeError, RuntimeError) as e:\n await logger.aerror(f\"Error building {self.agent_llm} language model: {e!s}\")\n msg = f\"Failed to initialize language model: {e!s}\"\n raise ValueError(msg) from e\n\n def _build_llm_model(self, component, inputs, prefix=\"\"):\n model_kwargs = {}\n for input_ in inputs:\n if hasattr(self, f\"{prefix}{input_.name}\"):\n model_kwargs[input_.name] = getattr(self, f\"{prefix}{input_.name}\")\n return component.set(**model_kwargs).build_model()\n\n def set_component_params(self, component):\n provider_info = MODEL_PROVIDERS_DICT.get(self.agent_llm)\n if provider_info:\n inputs = provider_info.get(\"inputs\")\n prefix = provider_info.get(\"prefix\")\n # Filter out json_mode and only use attributes that exist on this component\n model_kwargs = {}\n for input_ in inputs:\n if hasattr(self, f\"{prefix}{input_.name}\"):\n model_kwargs[input_.name] = getattr(self, f\"{prefix}{input_.name}\")\n\n return component.set(**model_kwargs)\n return component\n\n def delete_fields(self, build_config: dotdict, fields: dict | list[str]) -> None:\n \"\"\"Delete specified fields from build_config.\"\"\"\n for field in fields:\n build_config.pop(field, None)\n\n def update_input_types(self, build_config: dotdict) -> dotdict:\n \"\"\"Update input types for all fields in build_config.\"\"\"\n for key, value in build_config.items():\n if isinstance(value, dict):\n if value.get(\"input_types\") is None:\n build_config[key][\"input_types\"] = []\n elif hasattr(value, \"input_types\") and value.input_types is None:\n value.input_types = []\n return build_config\n\n async def update_build_config(\n self, build_config: dotdict, field_value: str, field_name: str | None = None\n ) -> dotdict:\n # Iterate over all providers in the MODEL_PROVIDERS_DICT\n # Existing logic for updating build_config\n if field_name in (\"agent_llm\",):\n build_config[\"agent_llm\"][\"value\"] = field_value\n provider_info = MODEL_PROVIDERS_DICT.get(field_value)\n if provider_info:\n component_class = provider_info.get(\"component_class\")\n if component_class and hasattr(component_class, \"update_build_config\"):\n # Call the component class's update_build_config method\n build_config = await update_component_build_config(\n component_class, build_config, field_value, \"model_name\"\n )\n\n provider_configs: dict[str, tuple[dict, list[dict]]] = {\n provider: (\n MODEL_PROVIDERS_DICT[provider][\"fields\"],\n [\n MODEL_PROVIDERS_DICT[other_provider][\"fields\"]\n for other_provider in MODEL_PROVIDERS_DICT\n if other_provider != provider\n ],\n )\n for provider in MODEL_PROVIDERS_DICT\n }\n if field_value in provider_configs:\n fields_to_add, fields_to_delete = provider_configs[field_value]\n\n # Delete fields from other providers\n for fields in fields_to_delete:\n self.delete_fields(build_config, fields)\n\n # Add provider-specific fields\n if field_value == \"OpenAI\" and not any(field in build_config for field in fields_to_add):\n build_config.update(fields_to_add)\n else:\n build_config.update(fields_to_add)\n # Reset input types for agent_llm\n build_config[\"agent_llm\"][\"input_types\"] = []\n elif field_value == \"Custom\":\n # Delete all provider fields\n self.delete_fields(build_config, ALL_PROVIDER_FIELDS)\n # Update with custom component\n custom_component = DropdownInput(\n name=\"agent_llm\",\n display_name=\"Language Model\",\n options=[*sorted(MODEL_PROVIDERS), \"Custom\"],\n value=\"Custom\",\n real_time_refresh=True,\n input_types=[\"LanguageModel\"],\n options_metadata=[MODELS_METADATA[key] for key in sorted(MODELS_METADATA.keys())]\n + [{\"icon\": \"brain\"}],\n )\n build_config.update({\"agent_llm\": custom_component.to_dict()})\n # Update input types for all fields\n build_config = self.update_input_types(build_config)\n\n # Validate required keys\n default_keys = [\n \"code\",\n \"_type\",\n \"agent_llm\",\n \"tools\",\n \"input_value\",\n \"add_current_date_tool\",\n \"system_prompt\",\n \"agent_description\",\n \"max_iterations\",\n \"handle_parsing_errors\",\n \"verbose\",\n ]\n missing_keys = [key for key in default_keys if key not in build_config]\n if missing_keys:\n msg = f\"Missing required keys in build_config: {missing_keys}\"\n raise ValueError(msg)\n if (\n isinstance(self.agent_llm, str)\n and self.agent_llm in MODEL_PROVIDERS_DICT\n and field_name in MODEL_DYNAMIC_UPDATE_FIELDS\n ):\n provider_info = MODEL_PROVIDERS_DICT.get(self.agent_llm)\n if provider_info:\n component_class = provider_info.get(\"component_class\")\n component_class = self.set_component_params(component_class)\n prefix = provider_info.get(\"prefix\")\n if component_class and hasattr(component_class, \"update_build_config\"):\n # Call each component class's update_build_config method\n # remove the prefix from the field_name\n if isinstance(field_name, str) and isinstance(prefix, str):\n field_name = field_name.replace(prefix, \"\")\n build_config = await update_component_build_config(\n component_class, build_config, field_value, \"model_name\"\n )\n return dotdict({k: v.to_dict() if hasattr(v, \"to_dict\") else v for k, v in build_config.items()})\n\n async def _get_tools(self) -> list[Tool]:\n component_toolkit = _get_component_toolkit()\n tools_names = self._build_tools_names()\n agent_description = self.get_tool_description()\n # TODO: Agent Description Depreciated Feature to be removed\n description = f\"{agent_description}{tools_names}\"\n tools = component_toolkit(component=self).get_tools(\n tool_name=\"Call_Agent\", tool_description=description, callbacks=self.get_langchain_callbacks()\n )\n if hasattr(self, \"tools_metadata\"):\n tools = component_toolkit(component=self, metadata=self.tools_metadata).update_tools_metadata(tools=tools)\n return tools\n"
+ "value": "import json\nimport re\n\nfrom langchain_core.tools import StructuredTool\nfrom pydantic import ValidationError\n\nfrom langflow.base.agents.agent import LCToolsAgentComponent\nfrom langflow.base.agents.events import ExceptionWithMessageError\nfrom langflow.base.models.model_input_constants import (\n ALL_PROVIDER_FIELDS,\n MODEL_DYNAMIC_UPDATE_FIELDS,\n MODEL_PROVIDERS,\n MODEL_PROVIDERS_DICT,\n MODELS_METADATA,\n)\nfrom langflow.base.models.model_utils import get_model_name\nfrom langflow.components.helpers.current_date import CurrentDateComponent\nfrom langflow.components.helpers.memory import MemoryComponent\nfrom langflow.components.langchain_utilities.tool_calling import ToolCallingAgentComponent\nfrom langflow.custom.custom_component.component import _get_component_toolkit\nfrom langflow.custom.utils import update_component_build_config\nfrom langflow.field_typing import Tool\nfrom langflow.helpers.base_model import build_model_from_schema\nfrom langflow.io import BoolInput, DropdownInput, IntInput, MultilineInput, Output, TableInput\nfrom langflow.logging import logger\nfrom langflow.schema.data import Data\nfrom langflow.schema.dotdict import dotdict\nfrom langflow.schema.message import Message\nfrom langflow.schema.table import EditMode\n\n\ndef set_advanced_true(component_input):\n component_input.advanced = True\n return component_input\n\n\nMODEL_PROVIDERS_LIST = [\"Anthropic\", \"Google Generative AI\", \"Groq\", \"OpenAI\"]\n\n\nclass AgentComponent(ToolCallingAgentComponent):\n display_name: str = \"Agent\"\n description: str = \"Define the agent's instructions, then enter a task to complete using tools.\"\n documentation: str = \"https://docs.langflow.org/agents\"\n icon = \"bot\"\n beta = False\n name = \"Agent\"\n\n memory_inputs = [set_advanced_true(component_input) for component_input in MemoryComponent().inputs]\n\n # Filter out json_mode from OpenAI inputs since we handle structured output differently\n openai_inputs_filtered = [\n input_field\n for input_field in MODEL_PROVIDERS_DICT[\"OpenAI\"][\"inputs\"]\n if not (hasattr(input_field, \"name\") and input_field.name == \"json_mode\")\n ]\n\n inputs = [\n DropdownInput(\n name=\"agent_llm\",\n display_name=\"Model Provider\",\n info=\"The provider of the language model that the agent will use to generate responses.\",\n options=[*MODEL_PROVIDERS_LIST, \"Custom\"],\n value=\"OpenAI\",\n real_time_refresh=True,\n input_types=[],\n options_metadata=[MODELS_METADATA[key] for key in MODEL_PROVIDERS_LIST] + [{\"icon\": \"brain\"}],\n ),\n *openai_inputs_filtered,\n MultilineInput(\n name=\"system_prompt\",\n display_name=\"Agent Instructions\",\n info=\"System Prompt: Initial instructions and context provided to guide the agent's behavior.\",\n value=\"You are a helpful assistant that can use tools to answer questions and perform tasks.\",\n advanced=False,\n ),\n IntInput(\n name=\"n_messages\",\n display_name=\"Number of Chat History Messages\",\n value=100,\n info=\"Number of chat history messages to retrieve.\",\n advanced=True,\n show=True,\n ),\n MultilineInput(\n name=\"format_instructions\",\n display_name=\"Output Format Instructions\",\n info=\"Generic Template for structured output formatting. Valid only with Structured response.\",\n value=(\n \"You are an AI that extracts structured JSON objects from unstructured text. \"\n \"Use a predefined schema with expected types (str, int, float, bool, dict). \"\n \"Extract ALL relevant instances that match the schema - if multiple patterns exist, capture them all. \"\n \"Fill missing or ambiguous values with defaults: null for missing values. \"\n \"Remove exact duplicates but keep variations that have different field values. \"\n \"Always return valid JSON in the expected format, never throw errors. \"\n \"If multiple objects can be extracted, return them all in the structured format.\"\n ),\n advanced=True,\n ),\n TableInput(\n name=\"output_schema\",\n display_name=\"Output Schema\",\n info=(\n \"Schema Validation: Define the structure and data types for structured output. \"\n \"No validation if no output schema.\"\n ),\n advanced=True,\n required=False,\n value=[],\n table_schema=[\n {\n \"name\": \"name\",\n \"display_name\": \"Name\",\n \"type\": \"str\",\n \"description\": \"Specify the name of the output field.\",\n \"default\": \"field\",\n \"edit_mode\": EditMode.INLINE,\n },\n {\n \"name\": \"description\",\n \"display_name\": \"Description\",\n \"type\": \"str\",\n \"description\": \"Describe the purpose of the output field.\",\n \"default\": \"description of field\",\n \"edit_mode\": EditMode.POPOVER,\n },\n {\n \"name\": \"type\",\n \"display_name\": \"Type\",\n \"type\": \"str\",\n \"edit_mode\": EditMode.INLINE,\n \"description\": (\"Indicate the data type of the output field (e.g., str, int, float, bool, dict).\"),\n \"options\": [\"str\", \"int\", \"float\", \"bool\", \"dict\"],\n \"default\": \"str\",\n },\n {\n \"name\": \"multiple\",\n \"display_name\": \"As List\",\n \"type\": \"boolean\",\n \"description\": \"Set to True if this output field should be a list of the specified type.\",\n \"default\": \"False\",\n \"edit_mode\": EditMode.INLINE,\n },\n ],\n ),\n *LCToolsAgentComponent._base_inputs,\n # removed memory inputs from agent component\n # *memory_inputs,\n BoolInput(\n name=\"add_current_date_tool\",\n display_name=\"Current Date\",\n advanced=True,\n info=\"If true, will add a tool to the agent that returns the current date.\",\n value=True,\n ),\n ]\n outputs = [\n Output(name=\"response\", display_name=\"Response\", method=\"message_response\"),\n Output(name=\"structured_response\", display_name=\"Structured Response\", method=\"json_response\", tool_mode=False),\n ]\n\n async def get_agent_requirements(self):\n \"\"\"Get the agent requirements for the agent.\"\"\"\n llm_model, display_name = await self.get_llm()\n if llm_model is None:\n msg = \"No language model selected. Please choose a model to proceed.\"\n raise ValueError(msg)\n self.model_name = get_model_name(llm_model, display_name=display_name)\n\n # Get memory data\n self.chat_history = await self.get_memory_data()\n if isinstance(self.chat_history, Message):\n self.chat_history = [self.chat_history]\n\n # Add current date tool if enabled\n if self.add_current_date_tool:\n if not isinstance(self.tools, list): # type: ignore[has-type]\n self.tools = []\n current_date_tool = (await CurrentDateComponent(**self.get_base_args()).to_toolkit()).pop(0)\n if not isinstance(current_date_tool, StructuredTool):\n msg = \"CurrentDateComponent must be converted to a StructuredTool\"\n raise TypeError(msg)\n self.tools.append(current_date_tool)\n return llm_model, self.chat_history, self.tools\n\n async def message_response(self) -> Message:\n try:\n llm_model, self.chat_history, self.tools = await self.get_agent_requirements()\n # Set up and run agent\n self.set(\n llm=llm_model,\n tools=self.tools or [],\n chat_history=self.chat_history,\n input_value=self.input_value,\n system_prompt=self.system_prompt,\n )\n agent = self.create_agent_runnable()\n result = await self.run_agent(agent)\n\n # Store result for potential JSON output\n self._agent_result = result\n\n except (ValueError, TypeError, KeyError) as e:\n await logger.aerror(f\"{type(e).__name__}: {e!s}\")\n raise\n except ExceptionWithMessageError as e:\n await logger.aerror(f\"ExceptionWithMessageError occurred: {e}\")\n raise\n # Avoid catching blind Exception; let truly unexpected exceptions propagate\n except Exception as e:\n await logger.aerror(f\"Unexpected error: {e!s}\")\n raise\n else:\n return result\n\n def _preprocess_schema(self, schema):\n \"\"\"Preprocess schema to ensure correct data types for build_model_from_schema.\"\"\"\n processed_schema = []\n for field in schema:\n processed_field = {\n \"name\": str(field.get(\"name\", \"field\")),\n \"type\": str(field.get(\"type\", \"str\")),\n \"description\": str(field.get(\"description\", \"\")),\n \"multiple\": field.get(\"multiple\", False),\n }\n # Ensure multiple is handled correctly\n if isinstance(processed_field[\"multiple\"], str):\n processed_field[\"multiple\"] = processed_field[\"multiple\"].lower() in [\"true\", \"1\", \"t\", \"y\", \"yes\"]\n processed_schema.append(processed_field)\n return processed_schema\n\n async def build_structured_output_base(self, content: str):\n \"\"\"Build structured output with optional BaseModel validation.\"\"\"\n json_pattern = r\"\\{.*\\}\"\n schema_error_msg = \"Try setting an output schema\"\n\n # Try to parse content as JSON first\n json_data = None\n try:\n json_data = json.loads(content)\n except json.JSONDecodeError:\n json_match = re.search(json_pattern, content, re.DOTALL)\n if json_match:\n try:\n json_data = json.loads(json_match.group())\n except json.JSONDecodeError:\n return {\"content\": content, \"error\": schema_error_msg}\n else:\n return {\"content\": content, \"error\": schema_error_msg}\n\n # If no output schema provided, return parsed JSON without validation\n if not hasattr(self, \"output_schema\") or not self.output_schema or len(self.output_schema) == 0:\n return json_data\n\n # Use BaseModel validation with schema\n try:\n processed_schema = self._preprocess_schema(self.output_schema)\n output_model = build_model_from_schema(processed_schema)\n\n # Validate against the schema\n if isinstance(json_data, list):\n # Multiple objects\n validated_objects = []\n for item in json_data:\n try:\n validated_obj = output_model.model_validate(item)\n validated_objects.append(validated_obj.model_dump())\n except ValidationError as e:\n await logger.aerror(f\"Validation error for item: {e}\")\n # Include invalid items with error info\n validated_objects.append({\"data\": item, \"validation_error\": str(e)})\n return validated_objects\n\n # Single object\n try:\n validated_obj = output_model.model_validate(json_data)\n return [validated_obj.model_dump()] # Return as list for consistency\n except ValidationError as e:\n await logger.aerror(f\"Validation error: {e}\")\n return [{\"data\": json_data, \"validation_error\": str(e)}]\n\n except (TypeError, ValueError) as e:\n await logger.aerror(f\"Error building structured output: {e}\")\n # Fallback to parsed JSON without validation\n return json_data\n\n async def json_response(self) -> Data:\n \"\"\"Convert agent response to structured JSON Data output with schema validation.\"\"\"\n # Always use structured chat agent for JSON response mode for better JSON formatting\n try:\n system_components = []\n\n # 1. Agent Instructions (system_prompt)\n agent_instructions = getattr(self, \"system_prompt\", \"\") or \"\"\n if agent_instructions:\n system_components.append(f\"{agent_instructions}\")\n\n # 2. Format Instructions\n format_instructions = getattr(self, \"format_instructions\", \"\") or \"\"\n if format_instructions:\n system_components.append(f\"Format instructions: {format_instructions}\")\n\n # 3. Schema Information from BaseModel\n if hasattr(self, \"output_schema\") and self.output_schema and len(self.output_schema) > 0:\n try:\n processed_schema = self._preprocess_schema(self.output_schema)\n output_model = build_model_from_schema(processed_schema)\n schema_dict = output_model.model_json_schema()\n schema_info = (\n \"You are given some text that may include format instructions, \"\n \"explanations, or other content alongside a JSON schema.\\n\\n\"\n \"Your task:\\n\"\n \"- Extract only the JSON schema.\\n\"\n \"- Return it as valid JSON.\\n\"\n \"- Do not include format instructions, explanations, or extra text.\\n\\n\"\n \"Input:\\n\"\n f\"{json.dumps(schema_dict, indent=2)}\\n\\n\"\n \"Output (only JSON schema):\"\n )\n system_components.append(schema_info)\n except (ValidationError, ValueError, TypeError, KeyError) as e:\n await logger.aerror(f\"Could not build schema for prompt: {e}\", exc_info=True)\n\n # Combine all components\n combined_instructions = \"\\n\\n\".join(system_components) if system_components else \"\"\n llm_model, self.chat_history, self.tools = await self.get_agent_requirements()\n self.set(\n llm=llm_model,\n tools=self.tools or [],\n chat_history=self.chat_history,\n input_value=self.input_value,\n system_prompt=combined_instructions,\n )\n\n # Create and run structured chat agent\n try:\n structured_agent = self.create_agent_runnable()\n except (NotImplementedError, ValueError, TypeError) as e:\n await logger.aerror(f\"Error with structured chat agent: {e}\")\n raise\n try:\n result = await self.run_agent(structured_agent)\n except (ExceptionWithMessageError, ValueError, TypeError, RuntimeError) as e:\n await logger.aerror(f\"Error with structured agent result: {e}\")\n raise\n # Extract content from structured agent result\n if hasattr(result, \"content\"):\n content = result.content\n elif hasattr(result, \"text\"):\n content = result.text\n else:\n content = str(result)\n\n except (ExceptionWithMessageError, ValueError, TypeError, NotImplementedError, AttributeError) as e:\n await logger.aerror(f\"Error with structured chat agent: {e}\")\n # Fallback to regular agent\n content_str = \"No content returned from agent\"\n return Data(data={\"content\": content_str, \"error\": str(e)})\n\n # Process with structured output validation\n try:\n structured_output = await self.build_structured_output_base(content)\n\n # Handle different output formats\n if isinstance(structured_output, list) and structured_output:\n if len(structured_output) == 1:\n return Data(data=structured_output[0])\n return Data(data={\"results\": structured_output})\n if isinstance(structured_output, dict):\n return Data(data=structured_output)\n return Data(data={\"content\": content})\n\n except (ValueError, TypeError) as e:\n await logger.aerror(f\"Error in structured output processing: {e}\")\n return Data(data={\"content\": content, \"error\": str(e)})\n\n async def get_memory_data(self):\n # TODO: This is a temporary fix to avoid message duplication. We should develop a function for this.\n messages = (\n await MemoryComponent(**self.get_base_args())\n .set(session_id=self.graph.session_id, order=\"Ascending\", n_messages=self.n_messages)\n .retrieve_messages()\n )\n return [\n message for message in messages if getattr(message, \"id\", None) != getattr(self.input_value, \"id\", None)\n ]\n\n async def get_llm(self):\n if not isinstance(self.agent_llm, str):\n return self.agent_llm, None\n\n try:\n provider_info = MODEL_PROVIDERS_DICT.get(self.agent_llm)\n if not provider_info:\n msg = f\"Invalid model provider: {self.agent_llm}\"\n raise ValueError(msg)\n\n component_class = provider_info.get(\"component_class\")\n display_name = component_class.display_name\n inputs = provider_info.get(\"inputs\")\n prefix = provider_info.get(\"prefix\", \"\")\n\n return self._build_llm_model(component_class, inputs, prefix), display_name\n\n except (AttributeError, ValueError, TypeError, RuntimeError) as e:\n await logger.aerror(f\"Error building {self.agent_llm} language model: {e!s}\")\n msg = f\"Failed to initialize language model: {e!s}\"\n raise ValueError(msg) from e\n\n def _build_llm_model(self, component, inputs, prefix=\"\"):\n model_kwargs = {}\n for input_ in inputs:\n if hasattr(self, f\"{prefix}{input_.name}\"):\n model_kwargs[input_.name] = getattr(self, f\"{prefix}{input_.name}\")\n return component.set(**model_kwargs).build_model()\n\n def set_component_params(self, component):\n provider_info = MODEL_PROVIDERS_DICT.get(self.agent_llm)\n if provider_info:\n inputs = provider_info.get(\"inputs\")\n prefix = provider_info.get(\"prefix\")\n # Filter out json_mode and only use attributes that exist on this component\n model_kwargs = {}\n for input_ in inputs:\n if hasattr(self, f\"{prefix}{input_.name}\"):\n model_kwargs[input_.name] = getattr(self, f\"{prefix}{input_.name}\")\n\n return component.set(**model_kwargs)\n return component\n\n def delete_fields(self, build_config: dotdict, fields: dict | list[str]) -> None:\n \"\"\"Delete specified fields from build_config.\"\"\"\n for field in fields:\n build_config.pop(field, None)\n\n def update_input_types(self, build_config: dotdict) -> dotdict:\n \"\"\"Update input types for all fields in build_config.\"\"\"\n for key, value in build_config.items():\n if isinstance(value, dict):\n if value.get(\"input_types\") is None:\n build_config[key][\"input_types\"] = []\n elif hasattr(value, \"input_types\") and value.input_types is None:\n value.input_types = []\n return build_config\n\n async def update_build_config(\n self, build_config: dotdict, field_value: str, field_name: str | None = None\n ) -> dotdict:\n # Iterate over all providers in the MODEL_PROVIDERS_DICT\n # Existing logic for updating build_config\n if field_name in (\"agent_llm\",):\n build_config[\"agent_llm\"][\"value\"] = field_value\n provider_info = MODEL_PROVIDERS_DICT.get(field_value)\n if provider_info:\n component_class = provider_info.get(\"component_class\")\n if component_class and hasattr(component_class, \"update_build_config\"):\n # Call the component class's update_build_config method\n build_config = await update_component_build_config(\n component_class, build_config, field_value, \"model_name\"\n )\n\n provider_configs: dict[str, tuple[dict, list[dict]]] = {\n provider: (\n MODEL_PROVIDERS_DICT[provider][\"fields\"],\n [\n MODEL_PROVIDERS_DICT[other_provider][\"fields\"]\n for other_provider in MODEL_PROVIDERS_DICT\n if other_provider != provider\n ],\n )\n for provider in MODEL_PROVIDERS_DICT\n }\n if field_value in provider_configs:\n fields_to_add, fields_to_delete = provider_configs[field_value]\n\n # Delete fields from other providers\n for fields in fields_to_delete:\n self.delete_fields(build_config, fields)\n\n # Add provider-specific fields\n if field_value == \"OpenAI\" and not any(field in build_config for field in fields_to_add):\n build_config.update(fields_to_add)\n else:\n build_config.update(fields_to_add)\n # Reset input types for agent_llm\n build_config[\"agent_llm\"][\"input_types\"] = []\n build_config[\"agent_llm\"][\"display_name\"] = \"Model Provider\"\n elif field_value == \"Custom\":\n # Delete all provider fields\n self.delete_fields(build_config, ALL_PROVIDER_FIELDS)\n # Update with custom component\n custom_component = DropdownInput(\n name=\"agent_llm\",\n display_name=\"Language Model\",\n options=[*sorted(MODEL_PROVIDERS), \"Custom\"],\n value=\"Custom\",\n real_time_refresh=True,\n input_types=[\"LanguageModel\"],\n options_metadata=[MODELS_METADATA[key] for key in sorted(MODELS_METADATA.keys())]\n + [{\"icon\": \"brain\"}],\n )\n build_config.update({\"agent_llm\": custom_component.to_dict()})\n # Update input types for all fields\n build_config = self.update_input_types(build_config)\n\n # Validate required keys\n default_keys = [\n \"code\",\n \"_type\",\n \"agent_llm\",\n \"tools\",\n \"input_value\",\n \"add_current_date_tool\",\n \"system_prompt\",\n \"agent_description\",\n \"max_iterations\",\n \"handle_parsing_errors\",\n \"verbose\",\n ]\n missing_keys = [key for key in default_keys if key not in build_config]\n if missing_keys:\n msg = f\"Missing required keys in build_config: {missing_keys}\"\n raise ValueError(msg)\n if (\n isinstance(self.agent_llm, str)\n and self.agent_llm in MODEL_PROVIDERS_DICT\n and field_name in MODEL_DYNAMIC_UPDATE_FIELDS\n ):\n provider_info = MODEL_PROVIDERS_DICT.get(self.agent_llm)\n if provider_info:\n component_class = provider_info.get(\"component_class\")\n component_class = self.set_component_params(component_class)\n prefix = provider_info.get(\"prefix\")\n if component_class and hasattr(component_class, \"update_build_config\"):\n # Call each component class's update_build_config method\n # remove the prefix from the field_name\n if isinstance(field_name, str) and isinstance(prefix, str):\n field_name = field_name.replace(prefix, \"\")\n build_config = await update_component_build_config(\n component_class, build_config, field_value, \"model_name\"\n )\n return dotdict({k: v.to_dict() if hasattr(v, \"to_dict\") else v for k, v in build_config.items()})\n\n async def _get_tools(self) -> list[Tool]:\n component_toolkit = _get_component_toolkit()\n tools_names = self._build_tools_names()\n agent_description = self.get_tool_description()\n # TODO: Agent Description Depreciated Feature to be removed\n description = f\"{agent_description}{tools_names}\"\n tools = component_toolkit(component=self).get_tools(\n tool_name=\"Call_Agent\", tool_description=description, callbacks=self.get_langchain_callbacks()\n )\n if hasattr(self, \"tools_metadata\"):\n tools = component_toolkit(component=self, metadata=self.tools_metadata).update_tools_metadata(tools=tools)\n return tools\n"
},
"handle_parsing_errors": {
"_input_type": "BoolInput",
diff --git a/src/backend/base/langflow/initial_setup/starter_projects/Sequential Tasks Agents.json b/src/backend/base/langflow/initial_setup/starter_projects/Sequential Tasks Agents.json
index bf8f1d1ce..7a409f1c2 100644
--- a/src/backend/base/langflow/initial_setup/starter_projects/Sequential Tasks Agents.json
+++ b/src/backend/base/langflow/initial_setup/starter_projects/Sequential Tasks Agents.json
@@ -503,7 +503,7 @@
"show": true,
"title_case": false,
"type": "code",
- "value": "import json\nimport re\n\nfrom langchain_core.tools import StructuredTool\nfrom pydantic import ValidationError\n\nfrom langflow.base.agents.agent import LCToolsAgentComponent\nfrom langflow.base.agents.events import ExceptionWithMessageError\nfrom langflow.base.models.model_input_constants import (\n ALL_PROVIDER_FIELDS,\n MODEL_DYNAMIC_UPDATE_FIELDS,\n MODEL_PROVIDERS,\n MODEL_PROVIDERS_DICT,\n MODELS_METADATA,\n)\nfrom langflow.base.models.model_utils import get_model_name\nfrom langflow.components.helpers.current_date import CurrentDateComponent\nfrom langflow.components.helpers.memory import MemoryComponent\nfrom langflow.components.langchain_utilities.tool_calling import ToolCallingAgentComponent\nfrom langflow.custom.custom_component.component import _get_component_toolkit\nfrom langflow.custom.utils import update_component_build_config\nfrom langflow.field_typing import Tool\nfrom langflow.helpers.base_model import build_model_from_schema\nfrom langflow.io import BoolInput, DropdownInput, IntInput, MultilineInput, Output, TableInput\nfrom langflow.logging import logger\nfrom langflow.schema.data import Data\nfrom langflow.schema.dotdict import dotdict\nfrom langflow.schema.message import Message\nfrom langflow.schema.table import EditMode\n\n\ndef set_advanced_true(component_input):\n component_input.advanced = True\n return component_input\n\n\nMODEL_PROVIDERS_LIST = [\"Anthropic\", \"Google Generative AI\", \"Groq\", \"OpenAI\"]\n\n\nclass AgentComponent(ToolCallingAgentComponent):\n display_name: str = \"Agent\"\n description: str = \"Define the agent's instructions, then enter a task to complete using tools.\"\n documentation: str = \"https://docs.langflow.org/agents\"\n icon = \"bot\"\n beta = False\n name = \"Agent\"\n\n memory_inputs = [set_advanced_true(component_input) for component_input in MemoryComponent().inputs]\n\n # Filter out json_mode from OpenAI inputs since we handle structured output differently\n openai_inputs_filtered = [\n input_field\n for input_field in MODEL_PROVIDERS_DICT[\"OpenAI\"][\"inputs\"]\n if not (hasattr(input_field, \"name\") and input_field.name == \"json_mode\")\n ]\n\n inputs = [\n DropdownInput(\n name=\"agent_llm\",\n display_name=\"Model Provider\",\n info=\"The provider of the language model that the agent will use to generate responses.\",\n options=[*MODEL_PROVIDERS_LIST, \"Custom\"],\n value=\"OpenAI\",\n real_time_refresh=True,\n input_types=[],\n options_metadata=[MODELS_METADATA[key] for key in MODEL_PROVIDERS_LIST] + [{\"icon\": \"brain\"}],\n ),\n *openai_inputs_filtered,\n MultilineInput(\n name=\"system_prompt\",\n display_name=\"Agent Instructions\",\n info=\"System Prompt: Initial instructions and context provided to guide the agent's behavior.\",\n value=\"You are a helpful assistant that can use tools to answer questions and perform tasks.\",\n advanced=False,\n ),\n IntInput(\n name=\"n_messages\",\n display_name=\"Number of Chat History Messages\",\n value=100,\n info=\"Number of chat history messages to retrieve.\",\n advanced=True,\n show=True,\n ),\n MultilineInput(\n name=\"format_instructions\",\n display_name=\"Output Format Instructions\",\n info=\"Generic Template for structured output formatting. Valid only with Structured response.\",\n value=(\n \"You are an AI that extracts structured JSON objects from unstructured text. \"\n \"Use a predefined schema with expected types (str, int, float, bool, dict). \"\n \"Extract ALL relevant instances that match the schema - if multiple patterns exist, capture them all. \"\n \"Fill missing or ambiguous values with defaults: null for missing values. \"\n \"Remove exact duplicates but keep variations that have different field values. \"\n \"Always return valid JSON in the expected format, never throw errors. \"\n \"If multiple objects can be extracted, return them all in the structured format.\"\n ),\n advanced=True,\n ),\n TableInput(\n name=\"output_schema\",\n display_name=\"Output Schema\",\n info=(\n \"Schema Validation: Define the structure and data types for structured output. \"\n \"No validation if no output schema.\"\n ),\n advanced=True,\n required=False,\n value=[],\n table_schema=[\n {\n \"name\": \"name\",\n \"display_name\": \"Name\",\n \"type\": \"str\",\n \"description\": \"Specify the name of the output field.\",\n \"default\": \"field\",\n \"edit_mode\": EditMode.INLINE,\n },\n {\n \"name\": \"description\",\n \"display_name\": \"Description\",\n \"type\": \"str\",\n \"description\": \"Describe the purpose of the output field.\",\n \"default\": \"description of field\",\n \"edit_mode\": EditMode.POPOVER,\n },\n {\n \"name\": \"type\",\n \"display_name\": \"Type\",\n \"type\": \"str\",\n \"edit_mode\": EditMode.INLINE,\n \"description\": (\"Indicate the data type of the output field (e.g., str, int, float, bool, dict).\"),\n \"options\": [\"str\", \"int\", \"float\", \"bool\", \"dict\"],\n \"default\": \"str\",\n },\n {\n \"name\": \"multiple\",\n \"display_name\": \"As List\",\n \"type\": \"boolean\",\n \"description\": \"Set to True if this output field should be a list of the specified type.\",\n \"default\": \"False\",\n \"edit_mode\": EditMode.INLINE,\n },\n ],\n ),\n *LCToolsAgentComponent._base_inputs,\n # removed memory inputs from agent component\n # *memory_inputs,\n BoolInput(\n name=\"add_current_date_tool\",\n display_name=\"Current Date\",\n advanced=True,\n info=\"If true, will add a tool to the agent that returns the current date.\",\n value=True,\n ),\n ]\n outputs = [\n Output(name=\"response\", display_name=\"Response\", method=\"message_response\"),\n Output(name=\"structured_response\", display_name=\"Structured Response\", method=\"json_response\", tool_mode=False),\n ]\n\n async def get_agent_requirements(self):\n \"\"\"Get the agent requirements for the agent.\"\"\"\n llm_model, display_name = await self.get_llm()\n if llm_model is None:\n msg = \"No language model selected. Please choose a model to proceed.\"\n raise ValueError(msg)\n self.model_name = get_model_name(llm_model, display_name=display_name)\n\n # Get memory data\n self.chat_history = await self.get_memory_data()\n if isinstance(self.chat_history, Message):\n self.chat_history = [self.chat_history]\n\n # Add current date tool if enabled\n if self.add_current_date_tool:\n if not isinstance(self.tools, list): # type: ignore[has-type]\n self.tools = []\n current_date_tool = (await CurrentDateComponent(**self.get_base_args()).to_toolkit()).pop(0)\n if not isinstance(current_date_tool, StructuredTool):\n msg = \"CurrentDateComponent must be converted to a StructuredTool\"\n raise TypeError(msg)\n self.tools.append(current_date_tool)\n return llm_model, self.chat_history, self.tools\n\n async def message_response(self) -> Message:\n try:\n llm_model, self.chat_history, self.tools = await self.get_agent_requirements()\n # Set up and run agent\n self.set(\n llm=llm_model,\n tools=self.tools or [],\n chat_history=self.chat_history,\n input_value=self.input_value,\n system_prompt=self.system_prompt,\n )\n agent = self.create_agent_runnable()\n result = await self.run_agent(agent)\n\n # Store result for potential JSON output\n self._agent_result = result\n\n except (ValueError, TypeError, KeyError) as e:\n await logger.aerror(f\"{type(e).__name__}: {e!s}\")\n raise\n except ExceptionWithMessageError as e:\n await logger.aerror(f\"ExceptionWithMessageError occurred: {e}\")\n raise\n # Avoid catching blind Exception; let truly unexpected exceptions propagate\n except Exception as e:\n await logger.aerror(f\"Unexpected error: {e!s}\")\n raise\n else:\n return result\n\n def _preprocess_schema(self, schema):\n \"\"\"Preprocess schema to ensure correct data types for build_model_from_schema.\"\"\"\n processed_schema = []\n for field in schema:\n processed_field = {\n \"name\": str(field.get(\"name\", \"field\")),\n \"type\": str(field.get(\"type\", \"str\")),\n \"description\": str(field.get(\"description\", \"\")),\n \"multiple\": field.get(\"multiple\", False),\n }\n # Ensure multiple is handled correctly\n if isinstance(processed_field[\"multiple\"], str):\n processed_field[\"multiple\"] = processed_field[\"multiple\"].lower() in [\"true\", \"1\", \"t\", \"y\", \"yes\"]\n processed_schema.append(processed_field)\n return processed_schema\n\n async def build_structured_output_base(self, content: str):\n \"\"\"Build structured output with optional BaseModel validation.\"\"\"\n json_pattern = r\"\\{.*\\}\"\n schema_error_msg = \"Try setting an output schema\"\n\n # Try to parse content as JSON first\n json_data = None\n try:\n json_data = json.loads(content)\n except json.JSONDecodeError:\n json_match = re.search(json_pattern, content, re.DOTALL)\n if json_match:\n try:\n json_data = json.loads(json_match.group())\n except json.JSONDecodeError:\n return {\"content\": content, \"error\": schema_error_msg}\n else:\n return {\"content\": content, \"error\": schema_error_msg}\n\n # If no output schema provided, return parsed JSON without validation\n if not hasattr(self, \"output_schema\") or not self.output_schema or len(self.output_schema) == 0:\n return json_data\n\n # Use BaseModel validation with schema\n try:\n processed_schema = self._preprocess_schema(self.output_schema)\n output_model = build_model_from_schema(processed_schema)\n\n # Validate against the schema\n if isinstance(json_data, list):\n # Multiple objects\n validated_objects = []\n for item in json_data:\n try:\n validated_obj = output_model.model_validate(item)\n validated_objects.append(validated_obj.model_dump())\n except ValidationError as e:\n await logger.aerror(f\"Validation error for item: {e}\")\n # Include invalid items with error info\n validated_objects.append({\"data\": item, \"validation_error\": str(e)})\n return validated_objects\n\n # Single object\n try:\n validated_obj = output_model.model_validate(json_data)\n return [validated_obj.model_dump()] # Return as list for consistency\n except ValidationError as e:\n await logger.aerror(f\"Validation error: {e}\")\n return [{\"data\": json_data, \"validation_error\": str(e)}]\n\n except (TypeError, ValueError) as e:\n await logger.aerror(f\"Error building structured output: {e}\")\n # Fallback to parsed JSON without validation\n return json_data\n\n async def json_response(self) -> Data:\n \"\"\"Convert agent response to structured JSON Data output with schema validation.\"\"\"\n # Always use structured chat agent for JSON response mode for better JSON formatting\n try:\n system_components = []\n\n # 1. Agent Instructions (system_prompt)\n agent_instructions = getattr(self, \"system_prompt\", \"\") or \"\"\n if agent_instructions:\n system_components.append(f\"{agent_instructions}\")\n\n # 2. Format Instructions\n format_instructions = getattr(self, \"format_instructions\", \"\") or \"\"\n if format_instructions:\n system_components.append(f\"Format instructions: {format_instructions}\")\n\n # 3. Schema Information from BaseModel\n if hasattr(self, \"output_schema\") and self.output_schema and len(self.output_schema) > 0:\n try:\n processed_schema = self._preprocess_schema(self.output_schema)\n output_model = build_model_from_schema(processed_schema)\n schema_dict = output_model.model_json_schema()\n schema_info = (\n \"You are given some text that may include format instructions, \"\n \"explanations, or other content alongside a JSON schema.\\n\\n\"\n \"Your task:\\n\"\n \"- Extract only the JSON schema.\\n\"\n \"- Return it as valid JSON.\\n\"\n \"- Do not include format instructions, explanations, or extra text.\\n\\n\"\n \"Input:\\n\"\n f\"{json.dumps(schema_dict, indent=2)}\\n\\n\"\n \"Output (only JSON schema):\"\n )\n system_components.append(schema_info)\n except (ValidationError, ValueError, TypeError, KeyError) as e:\n await logger.aerror(f\"Could not build schema for prompt: {e}\", exc_info=True)\n\n # Combine all components\n combined_instructions = \"\\n\\n\".join(system_components) if system_components else \"\"\n llm_model, self.chat_history, self.tools = await self.get_agent_requirements()\n self.set(\n llm=llm_model,\n tools=self.tools or [],\n chat_history=self.chat_history,\n input_value=self.input_value,\n system_prompt=combined_instructions,\n )\n\n # Create and run structured chat agent\n try:\n structured_agent = self.create_agent_runnable()\n except (NotImplementedError, ValueError, TypeError) as e:\n await logger.aerror(f\"Error with structured chat agent: {e}\")\n raise\n try:\n result = await self.run_agent(structured_agent)\n except (ExceptionWithMessageError, ValueError, TypeError, RuntimeError) as e:\n await logger.aerror(f\"Error with structured agent result: {e}\")\n raise\n # Extract content from structured agent result\n if hasattr(result, \"content\"):\n content = result.content\n elif hasattr(result, \"text\"):\n content = result.text\n else:\n content = str(result)\n\n except (ExceptionWithMessageError, ValueError, TypeError, NotImplementedError, AttributeError) as e:\n await logger.aerror(f\"Error with structured chat agent: {e}\")\n # Fallback to regular agent\n content_str = \"No content returned from agent\"\n return Data(data={\"content\": content_str, \"error\": str(e)})\n\n # Process with structured output validation\n try:\n structured_output = await self.build_structured_output_base(content)\n\n # Handle different output formats\n if isinstance(structured_output, list) and structured_output:\n if len(structured_output) == 1:\n return Data(data=structured_output[0])\n return Data(data={\"results\": structured_output})\n if isinstance(structured_output, dict):\n return Data(data=structured_output)\n return Data(data={\"content\": content})\n\n except (ValueError, TypeError) as e:\n await logger.aerror(f\"Error in structured output processing: {e}\")\n return Data(data={\"content\": content, \"error\": str(e)})\n\n async def get_memory_data(self):\n # TODO: This is a temporary fix to avoid message duplication. We should develop a function for this.\n messages = (\n await MemoryComponent(**self.get_base_args())\n .set(session_id=self.graph.session_id, order=\"Ascending\", n_messages=self.n_messages)\n .retrieve_messages()\n )\n return [\n message for message in messages if getattr(message, \"id\", None) != getattr(self.input_value, \"id\", None)\n ]\n\n async def get_llm(self):\n if not isinstance(self.agent_llm, str):\n return self.agent_llm, None\n\n try:\n provider_info = MODEL_PROVIDERS_DICT.get(self.agent_llm)\n if not provider_info:\n msg = f\"Invalid model provider: {self.agent_llm}\"\n raise ValueError(msg)\n\n component_class = provider_info.get(\"component_class\")\n display_name = component_class.display_name\n inputs = provider_info.get(\"inputs\")\n prefix = provider_info.get(\"prefix\", \"\")\n\n return self._build_llm_model(component_class, inputs, prefix), display_name\n\n except (AttributeError, ValueError, TypeError, RuntimeError) as e:\n await logger.aerror(f\"Error building {self.agent_llm} language model: {e!s}\")\n msg = f\"Failed to initialize language model: {e!s}\"\n raise ValueError(msg) from e\n\n def _build_llm_model(self, component, inputs, prefix=\"\"):\n model_kwargs = {}\n for input_ in inputs:\n if hasattr(self, f\"{prefix}{input_.name}\"):\n model_kwargs[input_.name] = getattr(self, f\"{prefix}{input_.name}\")\n return component.set(**model_kwargs).build_model()\n\n def set_component_params(self, component):\n provider_info = MODEL_PROVIDERS_DICT.get(self.agent_llm)\n if provider_info:\n inputs = provider_info.get(\"inputs\")\n prefix = provider_info.get(\"prefix\")\n # Filter out json_mode and only use attributes that exist on this component\n model_kwargs = {}\n for input_ in inputs:\n if hasattr(self, f\"{prefix}{input_.name}\"):\n model_kwargs[input_.name] = getattr(self, f\"{prefix}{input_.name}\")\n\n return component.set(**model_kwargs)\n return component\n\n def delete_fields(self, build_config: dotdict, fields: dict | list[str]) -> None:\n \"\"\"Delete specified fields from build_config.\"\"\"\n for field in fields:\n build_config.pop(field, None)\n\n def update_input_types(self, build_config: dotdict) -> dotdict:\n \"\"\"Update input types for all fields in build_config.\"\"\"\n for key, value in build_config.items():\n if isinstance(value, dict):\n if value.get(\"input_types\") is None:\n build_config[key][\"input_types\"] = []\n elif hasattr(value, \"input_types\") and value.input_types is None:\n value.input_types = []\n return build_config\n\n async def update_build_config(\n self, build_config: dotdict, field_value: str, field_name: str | None = None\n ) -> dotdict:\n # Iterate over all providers in the MODEL_PROVIDERS_DICT\n # Existing logic for updating build_config\n if field_name in (\"agent_llm\",):\n build_config[\"agent_llm\"][\"value\"] = field_value\n provider_info = MODEL_PROVIDERS_DICT.get(field_value)\n if provider_info:\n component_class = provider_info.get(\"component_class\")\n if component_class and hasattr(component_class, \"update_build_config\"):\n # Call the component class's update_build_config method\n build_config = await update_component_build_config(\n component_class, build_config, field_value, \"model_name\"\n )\n\n provider_configs: dict[str, tuple[dict, list[dict]]] = {\n provider: (\n MODEL_PROVIDERS_DICT[provider][\"fields\"],\n [\n MODEL_PROVIDERS_DICT[other_provider][\"fields\"]\n for other_provider in MODEL_PROVIDERS_DICT\n if other_provider != provider\n ],\n )\n for provider in MODEL_PROVIDERS_DICT\n }\n if field_value in provider_configs:\n fields_to_add, fields_to_delete = provider_configs[field_value]\n\n # Delete fields from other providers\n for fields in fields_to_delete:\n self.delete_fields(build_config, fields)\n\n # Add provider-specific fields\n if field_value == \"OpenAI\" and not any(field in build_config for field in fields_to_add):\n build_config.update(fields_to_add)\n else:\n build_config.update(fields_to_add)\n # Reset input types for agent_llm\n build_config[\"agent_llm\"][\"input_types\"] = []\n elif field_value == \"Custom\":\n # Delete all provider fields\n self.delete_fields(build_config, ALL_PROVIDER_FIELDS)\n # Update with custom component\n custom_component = DropdownInput(\n name=\"agent_llm\",\n display_name=\"Language Model\",\n options=[*sorted(MODEL_PROVIDERS), \"Custom\"],\n value=\"Custom\",\n real_time_refresh=True,\n input_types=[\"LanguageModel\"],\n options_metadata=[MODELS_METADATA[key] for key in sorted(MODELS_METADATA.keys())]\n + [{\"icon\": \"brain\"}],\n )\n build_config.update({\"agent_llm\": custom_component.to_dict()})\n # Update input types for all fields\n build_config = self.update_input_types(build_config)\n\n # Validate required keys\n default_keys = [\n \"code\",\n \"_type\",\n \"agent_llm\",\n \"tools\",\n \"input_value\",\n \"add_current_date_tool\",\n \"system_prompt\",\n \"agent_description\",\n \"max_iterations\",\n \"handle_parsing_errors\",\n \"verbose\",\n ]\n missing_keys = [key for key in default_keys if key not in build_config]\n if missing_keys:\n msg = f\"Missing required keys in build_config: {missing_keys}\"\n raise ValueError(msg)\n if (\n isinstance(self.agent_llm, str)\n and self.agent_llm in MODEL_PROVIDERS_DICT\n and field_name in MODEL_DYNAMIC_UPDATE_FIELDS\n ):\n provider_info = MODEL_PROVIDERS_DICT.get(self.agent_llm)\n if provider_info:\n component_class = provider_info.get(\"component_class\")\n component_class = self.set_component_params(component_class)\n prefix = provider_info.get(\"prefix\")\n if component_class and hasattr(component_class, \"update_build_config\"):\n # Call each component class's update_build_config method\n # remove the prefix from the field_name\n if isinstance(field_name, str) and isinstance(prefix, str):\n field_name = field_name.replace(prefix, \"\")\n build_config = await update_component_build_config(\n component_class, build_config, field_value, \"model_name\"\n )\n return dotdict({k: v.to_dict() if hasattr(v, \"to_dict\") else v for k, v in build_config.items()})\n\n async def _get_tools(self) -> list[Tool]:\n component_toolkit = _get_component_toolkit()\n tools_names = self._build_tools_names()\n agent_description = self.get_tool_description()\n # TODO: Agent Description Depreciated Feature to be removed\n description = f\"{agent_description}{tools_names}\"\n tools = component_toolkit(component=self).get_tools(\n tool_name=\"Call_Agent\", tool_description=description, callbacks=self.get_langchain_callbacks()\n )\n if hasattr(self, \"tools_metadata\"):\n tools = component_toolkit(component=self, metadata=self.tools_metadata).update_tools_metadata(tools=tools)\n return tools\n"
+ "value": "import json\nimport re\n\nfrom langchain_core.tools import StructuredTool\nfrom pydantic import ValidationError\n\nfrom langflow.base.agents.agent import LCToolsAgentComponent\nfrom langflow.base.agents.events import ExceptionWithMessageError\nfrom langflow.base.models.model_input_constants import (\n ALL_PROVIDER_FIELDS,\n MODEL_DYNAMIC_UPDATE_FIELDS,\n MODEL_PROVIDERS,\n MODEL_PROVIDERS_DICT,\n MODELS_METADATA,\n)\nfrom langflow.base.models.model_utils import get_model_name\nfrom langflow.components.helpers.current_date import CurrentDateComponent\nfrom langflow.components.helpers.memory import MemoryComponent\nfrom langflow.components.langchain_utilities.tool_calling import ToolCallingAgentComponent\nfrom langflow.custom.custom_component.component import _get_component_toolkit\nfrom langflow.custom.utils import update_component_build_config\nfrom langflow.field_typing import Tool\nfrom langflow.helpers.base_model import build_model_from_schema\nfrom langflow.io import BoolInput, DropdownInput, IntInput, MultilineInput, Output, TableInput\nfrom langflow.logging import logger\nfrom langflow.schema.data import Data\nfrom langflow.schema.dotdict import dotdict\nfrom langflow.schema.message import Message\nfrom langflow.schema.table import EditMode\n\n\ndef set_advanced_true(component_input):\n component_input.advanced = True\n return component_input\n\n\nMODEL_PROVIDERS_LIST = [\"Anthropic\", \"Google Generative AI\", \"Groq\", \"OpenAI\"]\n\n\nclass AgentComponent(ToolCallingAgentComponent):\n display_name: str = \"Agent\"\n description: str = \"Define the agent's instructions, then enter a task to complete using tools.\"\n documentation: str = \"https://docs.langflow.org/agents\"\n icon = \"bot\"\n beta = False\n name = \"Agent\"\n\n memory_inputs = [set_advanced_true(component_input) for component_input in MemoryComponent().inputs]\n\n # Filter out json_mode from OpenAI inputs since we handle structured output differently\n openai_inputs_filtered = [\n input_field\n for input_field in MODEL_PROVIDERS_DICT[\"OpenAI\"][\"inputs\"]\n if not (hasattr(input_field, \"name\") and input_field.name == \"json_mode\")\n ]\n\n inputs = [\n DropdownInput(\n name=\"agent_llm\",\n display_name=\"Model Provider\",\n info=\"The provider of the language model that the agent will use to generate responses.\",\n options=[*MODEL_PROVIDERS_LIST, \"Custom\"],\n value=\"OpenAI\",\n real_time_refresh=True,\n input_types=[],\n options_metadata=[MODELS_METADATA[key] for key in MODEL_PROVIDERS_LIST] + [{\"icon\": \"brain\"}],\n ),\n *openai_inputs_filtered,\n MultilineInput(\n name=\"system_prompt\",\n display_name=\"Agent Instructions\",\n info=\"System Prompt: Initial instructions and context provided to guide the agent's behavior.\",\n value=\"You are a helpful assistant that can use tools to answer questions and perform tasks.\",\n advanced=False,\n ),\n IntInput(\n name=\"n_messages\",\n display_name=\"Number of Chat History Messages\",\n value=100,\n info=\"Number of chat history messages to retrieve.\",\n advanced=True,\n show=True,\n ),\n MultilineInput(\n name=\"format_instructions\",\n display_name=\"Output Format Instructions\",\n info=\"Generic Template for structured output formatting. Valid only with Structured response.\",\n value=(\n \"You are an AI that extracts structured JSON objects from unstructured text. \"\n \"Use a predefined schema with expected types (str, int, float, bool, dict). \"\n \"Extract ALL relevant instances that match the schema - if multiple patterns exist, capture them all. \"\n \"Fill missing or ambiguous values with defaults: null for missing values. \"\n \"Remove exact duplicates but keep variations that have different field values. \"\n \"Always return valid JSON in the expected format, never throw errors. \"\n \"If multiple objects can be extracted, return them all in the structured format.\"\n ),\n advanced=True,\n ),\n TableInput(\n name=\"output_schema\",\n display_name=\"Output Schema\",\n info=(\n \"Schema Validation: Define the structure and data types for structured output. \"\n \"No validation if no output schema.\"\n ),\n advanced=True,\n required=False,\n value=[],\n table_schema=[\n {\n \"name\": \"name\",\n \"display_name\": \"Name\",\n \"type\": \"str\",\n \"description\": \"Specify the name of the output field.\",\n \"default\": \"field\",\n \"edit_mode\": EditMode.INLINE,\n },\n {\n \"name\": \"description\",\n \"display_name\": \"Description\",\n \"type\": \"str\",\n \"description\": \"Describe the purpose of the output field.\",\n \"default\": \"description of field\",\n \"edit_mode\": EditMode.POPOVER,\n },\n {\n \"name\": \"type\",\n \"display_name\": \"Type\",\n \"type\": \"str\",\n \"edit_mode\": EditMode.INLINE,\n \"description\": (\"Indicate the data type of the output field (e.g., str, int, float, bool, dict).\"),\n \"options\": [\"str\", \"int\", \"float\", \"bool\", \"dict\"],\n \"default\": \"str\",\n },\n {\n \"name\": \"multiple\",\n \"display_name\": \"As List\",\n \"type\": \"boolean\",\n \"description\": \"Set to True if this output field should be a list of the specified type.\",\n \"default\": \"False\",\n \"edit_mode\": EditMode.INLINE,\n },\n ],\n ),\n *LCToolsAgentComponent._base_inputs,\n # removed memory inputs from agent component\n # *memory_inputs,\n BoolInput(\n name=\"add_current_date_tool\",\n display_name=\"Current Date\",\n advanced=True,\n info=\"If true, will add a tool to the agent that returns the current date.\",\n value=True,\n ),\n ]\n outputs = [\n Output(name=\"response\", display_name=\"Response\", method=\"message_response\"),\n Output(name=\"structured_response\", display_name=\"Structured Response\", method=\"json_response\", tool_mode=False),\n ]\n\n async def get_agent_requirements(self):\n \"\"\"Get the agent requirements for the agent.\"\"\"\n llm_model, display_name = await self.get_llm()\n if llm_model is None:\n msg = \"No language model selected. Please choose a model to proceed.\"\n raise ValueError(msg)\n self.model_name = get_model_name(llm_model, display_name=display_name)\n\n # Get memory data\n self.chat_history = await self.get_memory_data()\n if isinstance(self.chat_history, Message):\n self.chat_history = [self.chat_history]\n\n # Add current date tool if enabled\n if self.add_current_date_tool:\n if not isinstance(self.tools, list): # type: ignore[has-type]\n self.tools = []\n current_date_tool = (await CurrentDateComponent(**self.get_base_args()).to_toolkit()).pop(0)\n if not isinstance(current_date_tool, StructuredTool):\n msg = \"CurrentDateComponent must be converted to a StructuredTool\"\n raise TypeError(msg)\n self.tools.append(current_date_tool)\n return llm_model, self.chat_history, self.tools\n\n async def message_response(self) -> Message:\n try:\n llm_model, self.chat_history, self.tools = await self.get_agent_requirements()\n # Set up and run agent\n self.set(\n llm=llm_model,\n tools=self.tools or [],\n chat_history=self.chat_history,\n input_value=self.input_value,\n system_prompt=self.system_prompt,\n )\n agent = self.create_agent_runnable()\n result = await self.run_agent(agent)\n\n # Store result for potential JSON output\n self._agent_result = result\n\n except (ValueError, TypeError, KeyError) as e:\n await logger.aerror(f\"{type(e).__name__}: {e!s}\")\n raise\n except ExceptionWithMessageError as e:\n await logger.aerror(f\"ExceptionWithMessageError occurred: {e}\")\n raise\n # Avoid catching blind Exception; let truly unexpected exceptions propagate\n except Exception as e:\n await logger.aerror(f\"Unexpected error: {e!s}\")\n raise\n else:\n return result\n\n def _preprocess_schema(self, schema):\n \"\"\"Preprocess schema to ensure correct data types for build_model_from_schema.\"\"\"\n processed_schema = []\n for field in schema:\n processed_field = {\n \"name\": str(field.get(\"name\", \"field\")),\n \"type\": str(field.get(\"type\", \"str\")),\n \"description\": str(field.get(\"description\", \"\")),\n \"multiple\": field.get(\"multiple\", False),\n }\n # Ensure multiple is handled correctly\n if isinstance(processed_field[\"multiple\"], str):\n processed_field[\"multiple\"] = processed_field[\"multiple\"].lower() in [\"true\", \"1\", \"t\", \"y\", \"yes\"]\n processed_schema.append(processed_field)\n return processed_schema\n\n async def build_structured_output_base(self, content: str):\n \"\"\"Build structured output with optional BaseModel validation.\"\"\"\n json_pattern = r\"\\{.*\\}\"\n schema_error_msg = \"Try setting an output schema\"\n\n # Try to parse content as JSON first\n json_data = None\n try:\n json_data = json.loads(content)\n except json.JSONDecodeError:\n json_match = re.search(json_pattern, content, re.DOTALL)\n if json_match:\n try:\n json_data = json.loads(json_match.group())\n except json.JSONDecodeError:\n return {\"content\": content, \"error\": schema_error_msg}\n else:\n return {\"content\": content, \"error\": schema_error_msg}\n\n # If no output schema provided, return parsed JSON without validation\n if not hasattr(self, \"output_schema\") or not self.output_schema or len(self.output_schema) == 0:\n return json_data\n\n # Use BaseModel validation with schema\n try:\n processed_schema = self._preprocess_schema(self.output_schema)\n output_model = build_model_from_schema(processed_schema)\n\n # Validate against the schema\n if isinstance(json_data, list):\n # Multiple objects\n validated_objects = []\n for item in json_data:\n try:\n validated_obj = output_model.model_validate(item)\n validated_objects.append(validated_obj.model_dump())\n except ValidationError as e:\n await logger.aerror(f\"Validation error for item: {e}\")\n # Include invalid items with error info\n validated_objects.append({\"data\": item, \"validation_error\": str(e)})\n return validated_objects\n\n # Single object\n try:\n validated_obj = output_model.model_validate(json_data)\n return [validated_obj.model_dump()] # Return as list for consistency\n except ValidationError as e:\n await logger.aerror(f\"Validation error: {e}\")\n return [{\"data\": json_data, \"validation_error\": str(e)}]\n\n except (TypeError, ValueError) as e:\n await logger.aerror(f\"Error building structured output: {e}\")\n # Fallback to parsed JSON without validation\n return json_data\n\n async def json_response(self) -> Data:\n \"\"\"Convert agent response to structured JSON Data output with schema validation.\"\"\"\n # Always use structured chat agent for JSON response mode for better JSON formatting\n try:\n system_components = []\n\n # 1. Agent Instructions (system_prompt)\n agent_instructions = getattr(self, \"system_prompt\", \"\") or \"\"\n if agent_instructions:\n system_components.append(f\"{agent_instructions}\")\n\n # 2. Format Instructions\n format_instructions = getattr(self, \"format_instructions\", \"\") or \"\"\n if format_instructions:\n system_components.append(f\"Format instructions: {format_instructions}\")\n\n # 3. Schema Information from BaseModel\n if hasattr(self, \"output_schema\") and self.output_schema and len(self.output_schema) > 0:\n try:\n processed_schema = self._preprocess_schema(self.output_schema)\n output_model = build_model_from_schema(processed_schema)\n schema_dict = output_model.model_json_schema()\n schema_info = (\n \"You are given some text that may include format instructions, \"\n \"explanations, or other content alongside a JSON schema.\\n\\n\"\n \"Your task:\\n\"\n \"- Extract only the JSON schema.\\n\"\n \"- Return it as valid JSON.\\n\"\n \"- Do not include format instructions, explanations, or extra text.\\n\\n\"\n \"Input:\\n\"\n f\"{json.dumps(schema_dict, indent=2)}\\n\\n\"\n \"Output (only JSON schema):\"\n )\n system_components.append(schema_info)\n except (ValidationError, ValueError, TypeError, KeyError) as e:\n await logger.aerror(f\"Could not build schema for prompt: {e}\", exc_info=True)\n\n # Combine all components\n combined_instructions = \"\\n\\n\".join(system_components) if system_components else \"\"\n llm_model, self.chat_history, self.tools = await self.get_agent_requirements()\n self.set(\n llm=llm_model,\n tools=self.tools or [],\n chat_history=self.chat_history,\n input_value=self.input_value,\n system_prompt=combined_instructions,\n )\n\n # Create and run structured chat agent\n try:\n structured_agent = self.create_agent_runnable()\n except (NotImplementedError, ValueError, TypeError) as e:\n await logger.aerror(f\"Error with structured chat agent: {e}\")\n raise\n try:\n result = await self.run_agent(structured_agent)\n except (ExceptionWithMessageError, ValueError, TypeError, RuntimeError) as e:\n await logger.aerror(f\"Error with structured agent result: {e}\")\n raise\n # Extract content from structured agent result\n if hasattr(result, \"content\"):\n content = result.content\n elif hasattr(result, \"text\"):\n content = result.text\n else:\n content = str(result)\n\n except (ExceptionWithMessageError, ValueError, TypeError, NotImplementedError, AttributeError) as e:\n await logger.aerror(f\"Error with structured chat agent: {e}\")\n # Fallback to regular agent\n content_str = \"No content returned from agent\"\n return Data(data={\"content\": content_str, \"error\": str(e)})\n\n # Process with structured output validation\n try:\n structured_output = await self.build_structured_output_base(content)\n\n # Handle different output formats\n if isinstance(structured_output, list) and structured_output:\n if len(structured_output) == 1:\n return Data(data=structured_output[0])\n return Data(data={\"results\": structured_output})\n if isinstance(structured_output, dict):\n return Data(data=structured_output)\n return Data(data={\"content\": content})\n\n except (ValueError, TypeError) as e:\n await logger.aerror(f\"Error in structured output processing: {e}\")\n return Data(data={\"content\": content, \"error\": str(e)})\n\n async def get_memory_data(self):\n # TODO: This is a temporary fix to avoid message duplication. We should develop a function for this.\n messages = (\n await MemoryComponent(**self.get_base_args())\n .set(session_id=self.graph.session_id, order=\"Ascending\", n_messages=self.n_messages)\n .retrieve_messages()\n )\n return [\n message for message in messages if getattr(message, \"id\", None) != getattr(self.input_value, \"id\", None)\n ]\n\n async def get_llm(self):\n if not isinstance(self.agent_llm, str):\n return self.agent_llm, None\n\n try:\n provider_info = MODEL_PROVIDERS_DICT.get(self.agent_llm)\n if not provider_info:\n msg = f\"Invalid model provider: {self.agent_llm}\"\n raise ValueError(msg)\n\n component_class = provider_info.get(\"component_class\")\n display_name = component_class.display_name\n inputs = provider_info.get(\"inputs\")\n prefix = provider_info.get(\"prefix\", \"\")\n\n return self._build_llm_model(component_class, inputs, prefix), display_name\n\n except (AttributeError, ValueError, TypeError, RuntimeError) as e:\n await logger.aerror(f\"Error building {self.agent_llm} language model: {e!s}\")\n msg = f\"Failed to initialize language model: {e!s}\"\n raise ValueError(msg) from e\n\n def _build_llm_model(self, component, inputs, prefix=\"\"):\n model_kwargs = {}\n for input_ in inputs:\n if hasattr(self, f\"{prefix}{input_.name}\"):\n model_kwargs[input_.name] = getattr(self, f\"{prefix}{input_.name}\")\n return component.set(**model_kwargs).build_model()\n\n def set_component_params(self, component):\n provider_info = MODEL_PROVIDERS_DICT.get(self.agent_llm)\n if provider_info:\n inputs = provider_info.get(\"inputs\")\n prefix = provider_info.get(\"prefix\")\n # Filter out json_mode and only use attributes that exist on this component\n model_kwargs = {}\n for input_ in inputs:\n if hasattr(self, f\"{prefix}{input_.name}\"):\n model_kwargs[input_.name] = getattr(self, f\"{prefix}{input_.name}\")\n\n return component.set(**model_kwargs)\n return component\n\n def delete_fields(self, build_config: dotdict, fields: dict | list[str]) -> None:\n \"\"\"Delete specified fields from build_config.\"\"\"\n for field in fields:\n build_config.pop(field, None)\n\n def update_input_types(self, build_config: dotdict) -> dotdict:\n \"\"\"Update input types for all fields in build_config.\"\"\"\n for key, value in build_config.items():\n if isinstance(value, dict):\n if value.get(\"input_types\") is None:\n build_config[key][\"input_types\"] = []\n elif hasattr(value, \"input_types\") and value.input_types is None:\n value.input_types = []\n return build_config\n\n async def update_build_config(\n self, build_config: dotdict, field_value: str, field_name: str | None = None\n ) -> dotdict:\n # Iterate over all providers in the MODEL_PROVIDERS_DICT\n # Existing logic for updating build_config\n if field_name in (\"agent_llm\",):\n build_config[\"agent_llm\"][\"value\"] = field_value\n provider_info = MODEL_PROVIDERS_DICT.get(field_value)\n if provider_info:\n component_class = provider_info.get(\"component_class\")\n if component_class and hasattr(component_class, \"update_build_config\"):\n # Call the component class's update_build_config method\n build_config = await update_component_build_config(\n component_class, build_config, field_value, \"model_name\"\n )\n\n provider_configs: dict[str, tuple[dict, list[dict]]] = {\n provider: (\n MODEL_PROVIDERS_DICT[provider][\"fields\"],\n [\n MODEL_PROVIDERS_DICT[other_provider][\"fields\"]\n for other_provider in MODEL_PROVIDERS_DICT\n if other_provider != provider\n ],\n )\n for provider in MODEL_PROVIDERS_DICT\n }\n if field_value in provider_configs:\n fields_to_add, fields_to_delete = provider_configs[field_value]\n\n # Delete fields from other providers\n for fields in fields_to_delete:\n self.delete_fields(build_config, fields)\n\n # Add provider-specific fields\n if field_value == \"OpenAI\" and not any(field in build_config for field in fields_to_add):\n build_config.update(fields_to_add)\n else:\n build_config.update(fields_to_add)\n # Reset input types for agent_llm\n build_config[\"agent_llm\"][\"input_types\"] = []\n build_config[\"agent_llm\"][\"display_name\"] = \"Model Provider\"\n elif field_value == \"Custom\":\n # Delete all provider fields\n self.delete_fields(build_config, ALL_PROVIDER_FIELDS)\n # Update with custom component\n custom_component = DropdownInput(\n name=\"agent_llm\",\n display_name=\"Language Model\",\n options=[*sorted(MODEL_PROVIDERS), \"Custom\"],\n value=\"Custom\",\n real_time_refresh=True,\n input_types=[\"LanguageModel\"],\n options_metadata=[MODELS_METADATA[key] for key in sorted(MODELS_METADATA.keys())]\n + [{\"icon\": \"brain\"}],\n )\n build_config.update({\"agent_llm\": custom_component.to_dict()})\n # Update input types for all fields\n build_config = self.update_input_types(build_config)\n\n # Validate required keys\n default_keys = [\n \"code\",\n \"_type\",\n \"agent_llm\",\n \"tools\",\n \"input_value\",\n \"add_current_date_tool\",\n \"system_prompt\",\n \"agent_description\",\n \"max_iterations\",\n \"handle_parsing_errors\",\n \"verbose\",\n ]\n missing_keys = [key for key in default_keys if key not in build_config]\n if missing_keys:\n msg = f\"Missing required keys in build_config: {missing_keys}\"\n raise ValueError(msg)\n if (\n isinstance(self.agent_llm, str)\n and self.agent_llm in MODEL_PROVIDERS_DICT\n and field_name in MODEL_DYNAMIC_UPDATE_FIELDS\n ):\n provider_info = MODEL_PROVIDERS_DICT.get(self.agent_llm)\n if provider_info:\n component_class = provider_info.get(\"component_class\")\n component_class = self.set_component_params(component_class)\n prefix = provider_info.get(\"prefix\")\n if component_class and hasattr(component_class, \"update_build_config\"):\n # Call each component class's update_build_config method\n # remove the prefix from the field_name\n if isinstance(field_name, str) and isinstance(prefix, str):\n field_name = field_name.replace(prefix, \"\")\n build_config = await update_component_build_config(\n component_class, build_config, field_value, \"model_name\"\n )\n return dotdict({k: v.to_dict() if hasattr(v, \"to_dict\") else v for k, v in build_config.items()})\n\n async def _get_tools(self) -> list[Tool]:\n component_toolkit = _get_component_toolkit()\n tools_names = self._build_tools_names()\n agent_description = self.get_tool_description()\n # TODO: Agent Description Depreciated Feature to be removed\n description = f\"{agent_description}{tools_names}\"\n tools = component_toolkit(component=self).get_tools(\n tool_name=\"Call_Agent\", tool_description=description, callbacks=self.get_langchain_callbacks()\n )\n if hasattr(self, \"tools_metadata\"):\n tools = component_toolkit(component=self, metadata=self.tools_metadata).update_tools_metadata(tools=tools)\n return tools\n"
},
"handle_parsing_errors": {
"_input_type": "BoolInput",
@@ -1054,7 +1054,7 @@
"show": true,
"title_case": false,
"type": "code",
- "value": "import json\nimport re\n\nfrom langchain_core.tools import StructuredTool\nfrom pydantic import ValidationError\n\nfrom langflow.base.agents.agent import LCToolsAgentComponent\nfrom langflow.base.agents.events import ExceptionWithMessageError\nfrom langflow.base.models.model_input_constants import (\n ALL_PROVIDER_FIELDS,\n MODEL_DYNAMIC_UPDATE_FIELDS,\n MODEL_PROVIDERS,\n MODEL_PROVIDERS_DICT,\n MODELS_METADATA,\n)\nfrom langflow.base.models.model_utils import get_model_name\nfrom langflow.components.helpers.current_date import CurrentDateComponent\nfrom langflow.components.helpers.memory import MemoryComponent\nfrom langflow.components.langchain_utilities.tool_calling import ToolCallingAgentComponent\nfrom langflow.custom.custom_component.component import _get_component_toolkit\nfrom langflow.custom.utils import update_component_build_config\nfrom langflow.field_typing import Tool\nfrom langflow.helpers.base_model import build_model_from_schema\nfrom langflow.io import BoolInput, DropdownInput, IntInput, MultilineInput, Output, TableInput\nfrom langflow.logging import logger\nfrom langflow.schema.data import Data\nfrom langflow.schema.dotdict import dotdict\nfrom langflow.schema.message import Message\nfrom langflow.schema.table import EditMode\n\n\ndef set_advanced_true(component_input):\n component_input.advanced = True\n return component_input\n\n\nMODEL_PROVIDERS_LIST = [\"Anthropic\", \"Google Generative AI\", \"Groq\", \"OpenAI\"]\n\n\nclass AgentComponent(ToolCallingAgentComponent):\n display_name: str = \"Agent\"\n description: str = \"Define the agent's instructions, then enter a task to complete using tools.\"\n documentation: str = \"https://docs.langflow.org/agents\"\n icon = \"bot\"\n beta = False\n name = \"Agent\"\n\n memory_inputs = [set_advanced_true(component_input) for component_input in MemoryComponent().inputs]\n\n # Filter out json_mode from OpenAI inputs since we handle structured output differently\n openai_inputs_filtered = [\n input_field\n for input_field in MODEL_PROVIDERS_DICT[\"OpenAI\"][\"inputs\"]\n if not (hasattr(input_field, \"name\") and input_field.name == \"json_mode\")\n ]\n\n inputs = [\n DropdownInput(\n name=\"agent_llm\",\n display_name=\"Model Provider\",\n info=\"The provider of the language model that the agent will use to generate responses.\",\n options=[*MODEL_PROVIDERS_LIST, \"Custom\"],\n value=\"OpenAI\",\n real_time_refresh=True,\n input_types=[],\n options_metadata=[MODELS_METADATA[key] for key in MODEL_PROVIDERS_LIST] + [{\"icon\": \"brain\"}],\n ),\n *openai_inputs_filtered,\n MultilineInput(\n name=\"system_prompt\",\n display_name=\"Agent Instructions\",\n info=\"System Prompt: Initial instructions and context provided to guide the agent's behavior.\",\n value=\"You are a helpful assistant that can use tools to answer questions and perform tasks.\",\n advanced=False,\n ),\n IntInput(\n name=\"n_messages\",\n display_name=\"Number of Chat History Messages\",\n value=100,\n info=\"Number of chat history messages to retrieve.\",\n advanced=True,\n show=True,\n ),\n MultilineInput(\n name=\"format_instructions\",\n display_name=\"Output Format Instructions\",\n info=\"Generic Template for structured output formatting. Valid only with Structured response.\",\n value=(\n \"You are an AI that extracts structured JSON objects from unstructured text. \"\n \"Use a predefined schema with expected types (str, int, float, bool, dict). \"\n \"Extract ALL relevant instances that match the schema - if multiple patterns exist, capture them all. \"\n \"Fill missing or ambiguous values with defaults: null for missing values. \"\n \"Remove exact duplicates but keep variations that have different field values. \"\n \"Always return valid JSON in the expected format, never throw errors. \"\n \"If multiple objects can be extracted, return them all in the structured format.\"\n ),\n advanced=True,\n ),\n TableInput(\n name=\"output_schema\",\n display_name=\"Output Schema\",\n info=(\n \"Schema Validation: Define the structure and data types for structured output. \"\n \"No validation if no output schema.\"\n ),\n advanced=True,\n required=False,\n value=[],\n table_schema=[\n {\n \"name\": \"name\",\n \"display_name\": \"Name\",\n \"type\": \"str\",\n \"description\": \"Specify the name of the output field.\",\n \"default\": \"field\",\n \"edit_mode\": EditMode.INLINE,\n },\n {\n \"name\": \"description\",\n \"display_name\": \"Description\",\n \"type\": \"str\",\n \"description\": \"Describe the purpose of the output field.\",\n \"default\": \"description of field\",\n \"edit_mode\": EditMode.POPOVER,\n },\n {\n \"name\": \"type\",\n \"display_name\": \"Type\",\n \"type\": \"str\",\n \"edit_mode\": EditMode.INLINE,\n \"description\": (\"Indicate the data type of the output field (e.g., str, int, float, bool, dict).\"),\n \"options\": [\"str\", \"int\", \"float\", \"bool\", \"dict\"],\n \"default\": \"str\",\n },\n {\n \"name\": \"multiple\",\n \"display_name\": \"As List\",\n \"type\": \"boolean\",\n \"description\": \"Set to True if this output field should be a list of the specified type.\",\n \"default\": \"False\",\n \"edit_mode\": EditMode.INLINE,\n },\n ],\n ),\n *LCToolsAgentComponent._base_inputs,\n # removed memory inputs from agent component\n # *memory_inputs,\n BoolInput(\n name=\"add_current_date_tool\",\n display_name=\"Current Date\",\n advanced=True,\n info=\"If true, will add a tool to the agent that returns the current date.\",\n value=True,\n ),\n ]\n outputs = [\n Output(name=\"response\", display_name=\"Response\", method=\"message_response\"),\n Output(name=\"structured_response\", display_name=\"Structured Response\", method=\"json_response\", tool_mode=False),\n ]\n\n async def get_agent_requirements(self):\n \"\"\"Get the agent requirements for the agent.\"\"\"\n llm_model, display_name = await self.get_llm()\n if llm_model is None:\n msg = \"No language model selected. Please choose a model to proceed.\"\n raise ValueError(msg)\n self.model_name = get_model_name(llm_model, display_name=display_name)\n\n # Get memory data\n self.chat_history = await self.get_memory_data()\n if isinstance(self.chat_history, Message):\n self.chat_history = [self.chat_history]\n\n # Add current date tool if enabled\n if self.add_current_date_tool:\n if not isinstance(self.tools, list): # type: ignore[has-type]\n self.tools = []\n current_date_tool = (await CurrentDateComponent(**self.get_base_args()).to_toolkit()).pop(0)\n if not isinstance(current_date_tool, StructuredTool):\n msg = \"CurrentDateComponent must be converted to a StructuredTool\"\n raise TypeError(msg)\n self.tools.append(current_date_tool)\n return llm_model, self.chat_history, self.tools\n\n async def message_response(self) -> Message:\n try:\n llm_model, self.chat_history, self.tools = await self.get_agent_requirements()\n # Set up and run agent\n self.set(\n llm=llm_model,\n tools=self.tools or [],\n chat_history=self.chat_history,\n input_value=self.input_value,\n system_prompt=self.system_prompt,\n )\n agent = self.create_agent_runnable()\n result = await self.run_agent(agent)\n\n # Store result for potential JSON output\n self._agent_result = result\n\n except (ValueError, TypeError, KeyError) as e:\n await logger.aerror(f\"{type(e).__name__}: {e!s}\")\n raise\n except ExceptionWithMessageError as e:\n await logger.aerror(f\"ExceptionWithMessageError occurred: {e}\")\n raise\n # Avoid catching blind Exception; let truly unexpected exceptions propagate\n except Exception as e:\n await logger.aerror(f\"Unexpected error: {e!s}\")\n raise\n else:\n return result\n\n def _preprocess_schema(self, schema):\n \"\"\"Preprocess schema to ensure correct data types for build_model_from_schema.\"\"\"\n processed_schema = []\n for field in schema:\n processed_field = {\n \"name\": str(field.get(\"name\", \"field\")),\n \"type\": str(field.get(\"type\", \"str\")),\n \"description\": str(field.get(\"description\", \"\")),\n \"multiple\": field.get(\"multiple\", False),\n }\n # Ensure multiple is handled correctly\n if isinstance(processed_field[\"multiple\"], str):\n processed_field[\"multiple\"] = processed_field[\"multiple\"].lower() in [\"true\", \"1\", \"t\", \"y\", \"yes\"]\n processed_schema.append(processed_field)\n return processed_schema\n\n async def build_structured_output_base(self, content: str):\n \"\"\"Build structured output with optional BaseModel validation.\"\"\"\n json_pattern = r\"\\{.*\\}\"\n schema_error_msg = \"Try setting an output schema\"\n\n # Try to parse content as JSON first\n json_data = None\n try:\n json_data = json.loads(content)\n except json.JSONDecodeError:\n json_match = re.search(json_pattern, content, re.DOTALL)\n if json_match:\n try:\n json_data = json.loads(json_match.group())\n except json.JSONDecodeError:\n return {\"content\": content, \"error\": schema_error_msg}\n else:\n return {\"content\": content, \"error\": schema_error_msg}\n\n # If no output schema provided, return parsed JSON without validation\n if not hasattr(self, \"output_schema\") or not self.output_schema or len(self.output_schema) == 0:\n return json_data\n\n # Use BaseModel validation with schema\n try:\n processed_schema = self._preprocess_schema(self.output_schema)\n output_model = build_model_from_schema(processed_schema)\n\n # Validate against the schema\n if isinstance(json_data, list):\n # Multiple objects\n validated_objects = []\n for item in json_data:\n try:\n validated_obj = output_model.model_validate(item)\n validated_objects.append(validated_obj.model_dump())\n except ValidationError as e:\n await logger.aerror(f\"Validation error for item: {e}\")\n # Include invalid items with error info\n validated_objects.append({\"data\": item, \"validation_error\": str(e)})\n return validated_objects\n\n # Single object\n try:\n validated_obj = output_model.model_validate(json_data)\n return [validated_obj.model_dump()] # Return as list for consistency\n except ValidationError as e:\n await logger.aerror(f\"Validation error: {e}\")\n return [{\"data\": json_data, \"validation_error\": str(e)}]\n\n except (TypeError, ValueError) as e:\n await logger.aerror(f\"Error building structured output: {e}\")\n # Fallback to parsed JSON without validation\n return json_data\n\n async def json_response(self) -> Data:\n \"\"\"Convert agent response to structured JSON Data output with schema validation.\"\"\"\n # Always use structured chat agent for JSON response mode for better JSON formatting\n try:\n system_components = []\n\n # 1. Agent Instructions (system_prompt)\n agent_instructions = getattr(self, \"system_prompt\", \"\") or \"\"\n if agent_instructions:\n system_components.append(f\"{agent_instructions}\")\n\n # 2. Format Instructions\n format_instructions = getattr(self, \"format_instructions\", \"\") or \"\"\n if format_instructions:\n system_components.append(f\"Format instructions: {format_instructions}\")\n\n # 3. Schema Information from BaseModel\n if hasattr(self, \"output_schema\") and self.output_schema and len(self.output_schema) > 0:\n try:\n processed_schema = self._preprocess_schema(self.output_schema)\n output_model = build_model_from_schema(processed_schema)\n schema_dict = output_model.model_json_schema()\n schema_info = (\n \"You are given some text that may include format instructions, \"\n \"explanations, or other content alongside a JSON schema.\\n\\n\"\n \"Your task:\\n\"\n \"- Extract only the JSON schema.\\n\"\n \"- Return it as valid JSON.\\n\"\n \"- Do not include format instructions, explanations, or extra text.\\n\\n\"\n \"Input:\\n\"\n f\"{json.dumps(schema_dict, indent=2)}\\n\\n\"\n \"Output (only JSON schema):\"\n )\n system_components.append(schema_info)\n except (ValidationError, ValueError, TypeError, KeyError) as e:\n await logger.aerror(f\"Could not build schema for prompt: {e}\", exc_info=True)\n\n # Combine all components\n combined_instructions = \"\\n\\n\".join(system_components) if system_components else \"\"\n llm_model, self.chat_history, self.tools = await self.get_agent_requirements()\n self.set(\n llm=llm_model,\n tools=self.tools or [],\n chat_history=self.chat_history,\n input_value=self.input_value,\n system_prompt=combined_instructions,\n )\n\n # Create and run structured chat agent\n try:\n structured_agent = self.create_agent_runnable()\n except (NotImplementedError, ValueError, TypeError) as e:\n await logger.aerror(f\"Error with structured chat agent: {e}\")\n raise\n try:\n result = await self.run_agent(structured_agent)\n except (ExceptionWithMessageError, ValueError, TypeError, RuntimeError) as e:\n await logger.aerror(f\"Error with structured agent result: {e}\")\n raise\n # Extract content from structured agent result\n if hasattr(result, \"content\"):\n content = result.content\n elif hasattr(result, \"text\"):\n content = result.text\n else:\n content = str(result)\n\n except (ExceptionWithMessageError, ValueError, TypeError, NotImplementedError, AttributeError) as e:\n await logger.aerror(f\"Error with structured chat agent: {e}\")\n # Fallback to regular agent\n content_str = \"No content returned from agent\"\n return Data(data={\"content\": content_str, \"error\": str(e)})\n\n # Process with structured output validation\n try:\n structured_output = await self.build_structured_output_base(content)\n\n # Handle different output formats\n if isinstance(structured_output, list) and structured_output:\n if len(structured_output) == 1:\n return Data(data=structured_output[0])\n return Data(data={\"results\": structured_output})\n if isinstance(structured_output, dict):\n return Data(data=structured_output)\n return Data(data={\"content\": content})\n\n except (ValueError, TypeError) as e:\n await logger.aerror(f\"Error in structured output processing: {e}\")\n return Data(data={\"content\": content, \"error\": str(e)})\n\n async def get_memory_data(self):\n # TODO: This is a temporary fix to avoid message duplication. We should develop a function for this.\n messages = (\n await MemoryComponent(**self.get_base_args())\n .set(session_id=self.graph.session_id, order=\"Ascending\", n_messages=self.n_messages)\n .retrieve_messages()\n )\n return [\n message for message in messages if getattr(message, \"id\", None) != getattr(self.input_value, \"id\", None)\n ]\n\n async def get_llm(self):\n if not isinstance(self.agent_llm, str):\n return self.agent_llm, None\n\n try:\n provider_info = MODEL_PROVIDERS_DICT.get(self.agent_llm)\n if not provider_info:\n msg = f\"Invalid model provider: {self.agent_llm}\"\n raise ValueError(msg)\n\n component_class = provider_info.get(\"component_class\")\n display_name = component_class.display_name\n inputs = provider_info.get(\"inputs\")\n prefix = provider_info.get(\"prefix\", \"\")\n\n return self._build_llm_model(component_class, inputs, prefix), display_name\n\n except (AttributeError, ValueError, TypeError, RuntimeError) as e:\n await logger.aerror(f\"Error building {self.agent_llm} language model: {e!s}\")\n msg = f\"Failed to initialize language model: {e!s}\"\n raise ValueError(msg) from e\n\n def _build_llm_model(self, component, inputs, prefix=\"\"):\n model_kwargs = {}\n for input_ in inputs:\n if hasattr(self, f\"{prefix}{input_.name}\"):\n model_kwargs[input_.name] = getattr(self, f\"{prefix}{input_.name}\")\n return component.set(**model_kwargs).build_model()\n\n def set_component_params(self, component):\n provider_info = MODEL_PROVIDERS_DICT.get(self.agent_llm)\n if provider_info:\n inputs = provider_info.get(\"inputs\")\n prefix = provider_info.get(\"prefix\")\n # Filter out json_mode and only use attributes that exist on this component\n model_kwargs = {}\n for input_ in inputs:\n if hasattr(self, f\"{prefix}{input_.name}\"):\n model_kwargs[input_.name] = getattr(self, f\"{prefix}{input_.name}\")\n\n return component.set(**model_kwargs)\n return component\n\n def delete_fields(self, build_config: dotdict, fields: dict | list[str]) -> None:\n \"\"\"Delete specified fields from build_config.\"\"\"\n for field in fields:\n build_config.pop(field, None)\n\n def update_input_types(self, build_config: dotdict) -> dotdict:\n \"\"\"Update input types for all fields in build_config.\"\"\"\n for key, value in build_config.items():\n if isinstance(value, dict):\n if value.get(\"input_types\") is None:\n build_config[key][\"input_types\"] = []\n elif hasattr(value, \"input_types\") and value.input_types is None:\n value.input_types = []\n return build_config\n\n async def update_build_config(\n self, build_config: dotdict, field_value: str, field_name: str | None = None\n ) -> dotdict:\n # Iterate over all providers in the MODEL_PROVIDERS_DICT\n # Existing logic for updating build_config\n if field_name in (\"agent_llm\",):\n build_config[\"agent_llm\"][\"value\"] = field_value\n provider_info = MODEL_PROVIDERS_DICT.get(field_value)\n if provider_info:\n component_class = provider_info.get(\"component_class\")\n if component_class and hasattr(component_class, \"update_build_config\"):\n # Call the component class's update_build_config method\n build_config = await update_component_build_config(\n component_class, build_config, field_value, \"model_name\"\n )\n\n provider_configs: dict[str, tuple[dict, list[dict]]] = {\n provider: (\n MODEL_PROVIDERS_DICT[provider][\"fields\"],\n [\n MODEL_PROVIDERS_DICT[other_provider][\"fields\"]\n for other_provider in MODEL_PROVIDERS_DICT\n if other_provider != provider\n ],\n )\n for provider in MODEL_PROVIDERS_DICT\n }\n if field_value in provider_configs:\n fields_to_add, fields_to_delete = provider_configs[field_value]\n\n # Delete fields from other providers\n for fields in fields_to_delete:\n self.delete_fields(build_config, fields)\n\n # Add provider-specific fields\n if field_value == \"OpenAI\" and not any(field in build_config for field in fields_to_add):\n build_config.update(fields_to_add)\n else:\n build_config.update(fields_to_add)\n # Reset input types for agent_llm\n build_config[\"agent_llm\"][\"input_types\"] = []\n elif field_value == \"Custom\":\n # Delete all provider fields\n self.delete_fields(build_config, ALL_PROVIDER_FIELDS)\n # Update with custom component\n custom_component = DropdownInput(\n name=\"agent_llm\",\n display_name=\"Language Model\",\n options=[*sorted(MODEL_PROVIDERS), \"Custom\"],\n value=\"Custom\",\n real_time_refresh=True,\n input_types=[\"LanguageModel\"],\n options_metadata=[MODELS_METADATA[key] for key in sorted(MODELS_METADATA.keys())]\n + [{\"icon\": \"brain\"}],\n )\n build_config.update({\"agent_llm\": custom_component.to_dict()})\n # Update input types for all fields\n build_config = self.update_input_types(build_config)\n\n # Validate required keys\n default_keys = [\n \"code\",\n \"_type\",\n \"agent_llm\",\n \"tools\",\n \"input_value\",\n \"add_current_date_tool\",\n \"system_prompt\",\n \"agent_description\",\n \"max_iterations\",\n \"handle_parsing_errors\",\n \"verbose\",\n ]\n missing_keys = [key for key in default_keys if key not in build_config]\n if missing_keys:\n msg = f\"Missing required keys in build_config: {missing_keys}\"\n raise ValueError(msg)\n if (\n isinstance(self.agent_llm, str)\n and self.agent_llm in MODEL_PROVIDERS_DICT\n and field_name in MODEL_DYNAMIC_UPDATE_FIELDS\n ):\n provider_info = MODEL_PROVIDERS_DICT.get(self.agent_llm)\n if provider_info:\n component_class = provider_info.get(\"component_class\")\n component_class = self.set_component_params(component_class)\n prefix = provider_info.get(\"prefix\")\n if component_class and hasattr(component_class, \"update_build_config\"):\n # Call each component class's update_build_config method\n # remove the prefix from the field_name\n if isinstance(field_name, str) and isinstance(prefix, str):\n field_name = field_name.replace(prefix, \"\")\n build_config = await update_component_build_config(\n component_class, build_config, field_value, \"model_name\"\n )\n return dotdict({k: v.to_dict() if hasattr(v, \"to_dict\") else v for k, v in build_config.items()})\n\n async def _get_tools(self) -> list[Tool]:\n component_toolkit = _get_component_toolkit()\n tools_names = self._build_tools_names()\n agent_description = self.get_tool_description()\n # TODO: Agent Description Depreciated Feature to be removed\n description = f\"{agent_description}{tools_names}\"\n tools = component_toolkit(component=self).get_tools(\n tool_name=\"Call_Agent\", tool_description=description, callbacks=self.get_langchain_callbacks()\n )\n if hasattr(self, \"tools_metadata\"):\n tools = component_toolkit(component=self, metadata=self.tools_metadata).update_tools_metadata(tools=tools)\n return tools\n"
+ "value": "import json\nimport re\n\nfrom langchain_core.tools import StructuredTool\nfrom pydantic import ValidationError\n\nfrom langflow.base.agents.agent import LCToolsAgentComponent\nfrom langflow.base.agents.events import ExceptionWithMessageError\nfrom langflow.base.models.model_input_constants import (\n ALL_PROVIDER_FIELDS,\n MODEL_DYNAMIC_UPDATE_FIELDS,\n MODEL_PROVIDERS,\n MODEL_PROVIDERS_DICT,\n MODELS_METADATA,\n)\nfrom langflow.base.models.model_utils import get_model_name\nfrom langflow.components.helpers.current_date import CurrentDateComponent\nfrom langflow.components.helpers.memory import MemoryComponent\nfrom langflow.components.langchain_utilities.tool_calling import ToolCallingAgentComponent\nfrom langflow.custom.custom_component.component import _get_component_toolkit\nfrom langflow.custom.utils import update_component_build_config\nfrom langflow.field_typing import Tool\nfrom langflow.helpers.base_model import build_model_from_schema\nfrom langflow.io import BoolInput, DropdownInput, IntInput, MultilineInput, Output, TableInput\nfrom langflow.logging import logger\nfrom langflow.schema.data import Data\nfrom langflow.schema.dotdict import dotdict\nfrom langflow.schema.message import Message\nfrom langflow.schema.table import EditMode\n\n\ndef set_advanced_true(component_input):\n component_input.advanced = True\n return component_input\n\n\nMODEL_PROVIDERS_LIST = [\"Anthropic\", \"Google Generative AI\", \"Groq\", \"OpenAI\"]\n\n\nclass AgentComponent(ToolCallingAgentComponent):\n display_name: str = \"Agent\"\n description: str = \"Define the agent's instructions, then enter a task to complete using tools.\"\n documentation: str = \"https://docs.langflow.org/agents\"\n icon = \"bot\"\n beta = False\n name = \"Agent\"\n\n memory_inputs = [set_advanced_true(component_input) for component_input in MemoryComponent().inputs]\n\n # Filter out json_mode from OpenAI inputs since we handle structured output differently\n openai_inputs_filtered = [\n input_field\n for input_field in MODEL_PROVIDERS_DICT[\"OpenAI\"][\"inputs\"]\n if not (hasattr(input_field, \"name\") and input_field.name == \"json_mode\")\n ]\n\n inputs = [\n DropdownInput(\n name=\"agent_llm\",\n display_name=\"Model Provider\",\n info=\"The provider of the language model that the agent will use to generate responses.\",\n options=[*MODEL_PROVIDERS_LIST, \"Custom\"],\n value=\"OpenAI\",\n real_time_refresh=True,\n input_types=[],\n options_metadata=[MODELS_METADATA[key] for key in MODEL_PROVIDERS_LIST] + [{\"icon\": \"brain\"}],\n ),\n *openai_inputs_filtered,\n MultilineInput(\n name=\"system_prompt\",\n display_name=\"Agent Instructions\",\n info=\"System Prompt: Initial instructions and context provided to guide the agent's behavior.\",\n value=\"You are a helpful assistant that can use tools to answer questions and perform tasks.\",\n advanced=False,\n ),\n IntInput(\n name=\"n_messages\",\n display_name=\"Number of Chat History Messages\",\n value=100,\n info=\"Number of chat history messages to retrieve.\",\n advanced=True,\n show=True,\n ),\n MultilineInput(\n name=\"format_instructions\",\n display_name=\"Output Format Instructions\",\n info=\"Generic Template for structured output formatting. Valid only with Structured response.\",\n value=(\n \"You are an AI that extracts structured JSON objects from unstructured text. \"\n \"Use a predefined schema with expected types (str, int, float, bool, dict). \"\n \"Extract ALL relevant instances that match the schema - if multiple patterns exist, capture them all. \"\n \"Fill missing or ambiguous values with defaults: null for missing values. \"\n \"Remove exact duplicates but keep variations that have different field values. \"\n \"Always return valid JSON in the expected format, never throw errors. \"\n \"If multiple objects can be extracted, return them all in the structured format.\"\n ),\n advanced=True,\n ),\n TableInput(\n name=\"output_schema\",\n display_name=\"Output Schema\",\n info=(\n \"Schema Validation: Define the structure and data types for structured output. \"\n \"No validation if no output schema.\"\n ),\n advanced=True,\n required=False,\n value=[],\n table_schema=[\n {\n \"name\": \"name\",\n \"display_name\": \"Name\",\n \"type\": \"str\",\n \"description\": \"Specify the name of the output field.\",\n \"default\": \"field\",\n \"edit_mode\": EditMode.INLINE,\n },\n {\n \"name\": \"description\",\n \"display_name\": \"Description\",\n \"type\": \"str\",\n \"description\": \"Describe the purpose of the output field.\",\n \"default\": \"description of field\",\n \"edit_mode\": EditMode.POPOVER,\n },\n {\n \"name\": \"type\",\n \"display_name\": \"Type\",\n \"type\": \"str\",\n \"edit_mode\": EditMode.INLINE,\n \"description\": (\"Indicate the data type of the output field (e.g., str, int, float, bool, dict).\"),\n \"options\": [\"str\", \"int\", \"float\", \"bool\", \"dict\"],\n \"default\": \"str\",\n },\n {\n \"name\": \"multiple\",\n \"display_name\": \"As List\",\n \"type\": \"boolean\",\n \"description\": \"Set to True if this output field should be a list of the specified type.\",\n \"default\": \"False\",\n \"edit_mode\": EditMode.INLINE,\n },\n ],\n ),\n *LCToolsAgentComponent._base_inputs,\n # removed memory inputs from agent component\n # *memory_inputs,\n BoolInput(\n name=\"add_current_date_tool\",\n display_name=\"Current Date\",\n advanced=True,\n info=\"If true, will add a tool to the agent that returns the current date.\",\n value=True,\n ),\n ]\n outputs = [\n Output(name=\"response\", display_name=\"Response\", method=\"message_response\"),\n Output(name=\"structured_response\", display_name=\"Structured Response\", method=\"json_response\", tool_mode=False),\n ]\n\n async def get_agent_requirements(self):\n \"\"\"Get the agent requirements for the agent.\"\"\"\n llm_model, display_name = await self.get_llm()\n if llm_model is None:\n msg = \"No language model selected. Please choose a model to proceed.\"\n raise ValueError(msg)\n self.model_name = get_model_name(llm_model, display_name=display_name)\n\n # Get memory data\n self.chat_history = await self.get_memory_data()\n if isinstance(self.chat_history, Message):\n self.chat_history = [self.chat_history]\n\n # Add current date tool if enabled\n if self.add_current_date_tool:\n if not isinstance(self.tools, list): # type: ignore[has-type]\n self.tools = []\n current_date_tool = (await CurrentDateComponent(**self.get_base_args()).to_toolkit()).pop(0)\n if not isinstance(current_date_tool, StructuredTool):\n msg = \"CurrentDateComponent must be converted to a StructuredTool\"\n raise TypeError(msg)\n self.tools.append(current_date_tool)\n return llm_model, self.chat_history, self.tools\n\n async def message_response(self) -> Message:\n try:\n llm_model, self.chat_history, self.tools = await self.get_agent_requirements()\n # Set up and run agent\n self.set(\n llm=llm_model,\n tools=self.tools or [],\n chat_history=self.chat_history,\n input_value=self.input_value,\n system_prompt=self.system_prompt,\n )\n agent = self.create_agent_runnable()\n result = await self.run_agent(agent)\n\n # Store result for potential JSON output\n self._agent_result = result\n\n except (ValueError, TypeError, KeyError) as e:\n await logger.aerror(f\"{type(e).__name__}: {e!s}\")\n raise\n except ExceptionWithMessageError as e:\n await logger.aerror(f\"ExceptionWithMessageError occurred: {e}\")\n raise\n # Avoid catching blind Exception; let truly unexpected exceptions propagate\n except Exception as e:\n await logger.aerror(f\"Unexpected error: {e!s}\")\n raise\n else:\n return result\n\n def _preprocess_schema(self, schema):\n \"\"\"Preprocess schema to ensure correct data types for build_model_from_schema.\"\"\"\n processed_schema = []\n for field in schema:\n processed_field = {\n \"name\": str(field.get(\"name\", \"field\")),\n \"type\": str(field.get(\"type\", \"str\")),\n \"description\": str(field.get(\"description\", \"\")),\n \"multiple\": field.get(\"multiple\", False),\n }\n # Ensure multiple is handled correctly\n if isinstance(processed_field[\"multiple\"], str):\n processed_field[\"multiple\"] = processed_field[\"multiple\"].lower() in [\"true\", \"1\", \"t\", \"y\", \"yes\"]\n processed_schema.append(processed_field)\n return processed_schema\n\n async def build_structured_output_base(self, content: str):\n \"\"\"Build structured output with optional BaseModel validation.\"\"\"\n json_pattern = r\"\\{.*\\}\"\n schema_error_msg = \"Try setting an output schema\"\n\n # Try to parse content as JSON first\n json_data = None\n try:\n json_data = json.loads(content)\n except json.JSONDecodeError:\n json_match = re.search(json_pattern, content, re.DOTALL)\n if json_match:\n try:\n json_data = json.loads(json_match.group())\n except json.JSONDecodeError:\n return {\"content\": content, \"error\": schema_error_msg}\n else:\n return {\"content\": content, \"error\": schema_error_msg}\n\n # If no output schema provided, return parsed JSON without validation\n if not hasattr(self, \"output_schema\") or not self.output_schema or len(self.output_schema) == 0:\n return json_data\n\n # Use BaseModel validation with schema\n try:\n processed_schema = self._preprocess_schema(self.output_schema)\n output_model = build_model_from_schema(processed_schema)\n\n # Validate against the schema\n if isinstance(json_data, list):\n # Multiple objects\n validated_objects = []\n for item in json_data:\n try:\n validated_obj = output_model.model_validate(item)\n validated_objects.append(validated_obj.model_dump())\n except ValidationError as e:\n await logger.aerror(f\"Validation error for item: {e}\")\n # Include invalid items with error info\n validated_objects.append({\"data\": item, \"validation_error\": str(e)})\n return validated_objects\n\n # Single object\n try:\n validated_obj = output_model.model_validate(json_data)\n return [validated_obj.model_dump()] # Return as list for consistency\n except ValidationError as e:\n await logger.aerror(f\"Validation error: {e}\")\n return [{\"data\": json_data, \"validation_error\": str(e)}]\n\n except (TypeError, ValueError) as e:\n await logger.aerror(f\"Error building structured output: {e}\")\n # Fallback to parsed JSON without validation\n return json_data\n\n async def json_response(self) -> Data:\n \"\"\"Convert agent response to structured JSON Data output with schema validation.\"\"\"\n # Always use structured chat agent for JSON response mode for better JSON formatting\n try:\n system_components = []\n\n # 1. Agent Instructions (system_prompt)\n agent_instructions = getattr(self, \"system_prompt\", \"\") or \"\"\n if agent_instructions:\n system_components.append(f\"{agent_instructions}\")\n\n # 2. Format Instructions\n format_instructions = getattr(self, \"format_instructions\", \"\") or \"\"\n if format_instructions:\n system_components.append(f\"Format instructions: {format_instructions}\")\n\n # 3. Schema Information from BaseModel\n if hasattr(self, \"output_schema\") and self.output_schema and len(self.output_schema) > 0:\n try:\n processed_schema = self._preprocess_schema(self.output_schema)\n output_model = build_model_from_schema(processed_schema)\n schema_dict = output_model.model_json_schema()\n schema_info = (\n \"You are given some text that may include format instructions, \"\n \"explanations, or other content alongside a JSON schema.\\n\\n\"\n \"Your task:\\n\"\n \"- Extract only the JSON schema.\\n\"\n \"- Return it as valid JSON.\\n\"\n \"- Do not include format instructions, explanations, or extra text.\\n\\n\"\n \"Input:\\n\"\n f\"{json.dumps(schema_dict, indent=2)}\\n\\n\"\n \"Output (only JSON schema):\"\n )\n system_components.append(schema_info)\n except (ValidationError, ValueError, TypeError, KeyError) as e:\n await logger.aerror(f\"Could not build schema for prompt: {e}\", exc_info=True)\n\n # Combine all components\n combined_instructions = \"\\n\\n\".join(system_components) if system_components else \"\"\n llm_model, self.chat_history, self.tools = await self.get_agent_requirements()\n self.set(\n llm=llm_model,\n tools=self.tools or [],\n chat_history=self.chat_history,\n input_value=self.input_value,\n system_prompt=combined_instructions,\n )\n\n # Create and run structured chat agent\n try:\n structured_agent = self.create_agent_runnable()\n except (NotImplementedError, ValueError, TypeError) as e:\n await logger.aerror(f\"Error with structured chat agent: {e}\")\n raise\n try:\n result = await self.run_agent(structured_agent)\n except (ExceptionWithMessageError, ValueError, TypeError, RuntimeError) as e:\n await logger.aerror(f\"Error with structured agent result: {e}\")\n raise\n # Extract content from structured agent result\n if hasattr(result, \"content\"):\n content = result.content\n elif hasattr(result, \"text\"):\n content = result.text\n else:\n content = str(result)\n\n except (ExceptionWithMessageError, ValueError, TypeError, NotImplementedError, AttributeError) as e:\n await logger.aerror(f\"Error with structured chat agent: {e}\")\n # Fallback to regular agent\n content_str = \"No content returned from agent\"\n return Data(data={\"content\": content_str, \"error\": str(e)})\n\n # Process with structured output validation\n try:\n structured_output = await self.build_structured_output_base(content)\n\n # Handle different output formats\n if isinstance(structured_output, list) and structured_output:\n if len(structured_output) == 1:\n return Data(data=structured_output[0])\n return Data(data={\"results\": structured_output})\n if isinstance(structured_output, dict):\n return Data(data=structured_output)\n return Data(data={\"content\": content})\n\n except (ValueError, TypeError) as e:\n await logger.aerror(f\"Error in structured output processing: {e}\")\n return Data(data={\"content\": content, \"error\": str(e)})\n\n async def get_memory_data(self):\n # TODO: This is a temporary fix to avoid message duplication. We should develop a function for this.\n messages = (\n await MemoryComponent(**self.get_base_args())\n .set(session_id=self.graph.session_id, order=\"Ascending\", n_messages=self.n_messages)\n .retrieve_messages()\n )\n return [\n message for message in messages if getattr(message, \"id\", None) != getattr(self.input_value, \"id\", None)\n ]\n\n async def get_llm(self):\n if not isinstance(self.agent_llm, str):\n return self.agent_llm, None\n\n try:\n provider_info = MODEL_PROVIDERS_DICT.get(self.agent_llm)\n if not provider_info:\n msg = f\"Invalid model provider: {self.agent_llm}\"\n raise ValueError(msg)\n\n component_class = provider_info.get(\"component_class\")\n display_name = component_class.display_name\n inputs = provider_info.get(\"inputs\")\n prefix = provider_info.get(\"prefix\", \"\")\n\n return self._build_llm_model(component_class, inputs, prefix), display_name\n\n except (AttributeError, ValueError, TypeError, RuntimeError) as e:\n await logger.aerror(f\"Error building {self.agent_llm} language model: {e!s}\")\n msg = f\"Failed to initialize language model: {e!s}\"\n raise ValueError(msg) from e\n\n def _build_llm_model(self, component, inputs, prefix=\"\"):\n model_kwargs = {}\n for input_ in inputs:\n if hasattr(self, f\"{prefix}{input_.name}\"):\n model_kwargs[input_.name] = getattr(self, f\"{prefix}{input_.name}\")\n return component.set(**model_kwargs).build_model()\n\n def set_component_params(self, component):\n provider_info = MODEL_PROVIDERS_DICT.get(self.agent_llm)\n if provider_info:\n inputs = provider_info.get(\"inputs\")\n prefix = provider_info.get(\"prefix\")\n # Filter out json_mode and only use attributes that exist on this component\n model_kwargs = {}\n for input_ in inputs:\n if hasattr(self, f\"{prefix}{input_.name}\"):\n model_kwargs[input_.name] = getattr(self, f\"{prefix}{input_.name}\")\n\n return component.set(**model_kwargs)\n return component\n\n def delete_fields(self, build_config: dotdict, fields: dict | list[str]) -> None:\n \"\"\"Delete specified fields from build_config.\"\"\"\n for field in fields:\n build_config.pop(field, None)\n\n def update_input_types(self, build_config: dotdict) -> dotdict:\n \"\"\"Update input types for all fields in build_config.\"\"\"\n for key, value in build_config.items():\n if isinstance(value, dict):\n if value.get(\"input_types\") is None:\n build_config[key][\"input_types\"] = []\n elif hasattr(value, \"input_types\") and value.input_types is None:\n value.input_types = []\n return build_config\n\n async def update_build_config(\n self, build_config: dotdict, field_value: str, field_name: str | None = None\n ) -> dotdict:\n # Iterate over all providers in the MODEL_PROVIDERS_DICT\n # Existing logic for updating build_config\n if field_name in (\"agent_llm\",):\n build_config[\"agent_llm\"][\"value\"] = field_value\n provider_info = MODEL_PROVIDERS_DICT.get(field_value)\n if provider_info:\n component_class = provider_info.get(\"component_class\")\n if component_class and hasattr(component_class, \"update_build_config\"):\n # Call the component class's update_build_config method\n build_config = await update_component_build_config(\n component_class, build_config, field_value, \"model_name\"\n )\n\n provider_configs: dict[str, tuple[dict, list[dict]]] = {\n provider: (\n MODEL_PROVIDERS_DICT[provider][\"fields\"],\n [\n MODEL_PROVIDERS_DICT[other_provider][\"fields\"]\n for other_provider in MODEL_PROVIDERS_DICT\n if other_provider != provider\n ],\n )\n for provider in MODEL_PROVIDERS_DICT\n }\n if field_value in provider_configs:\n fields_to_add, fields_to_delete = provider_configs[field_value]\n\n # Delete fields from other providers\n for fields in fields_to_delete:\n self.delete_fields(build_config, fields)\n\n # Add provider-specific fields\n if field_value == \"OpenAI\" and not any(field in build_config for field in fields_to_add):\n build_config.update(fields_to_add)\n else:\n build_config.update(fields_to_add)\n # Reset input types for agent_llm\n build_config[\"agent_llm\"][\"input_types\"] = []\n build_config[\"agent_llm\"][\"display_name\"] = \"Model Provider\"\n elif field_value == \"Custom\":\n # Delete all provider fields\n self.delete_fields(build_config, ALL_PROVIDER_FIELDS)\n # Update with custom component\n custom_component = DropdownInput(\n name=\"agent_llm\",\n display_name=\"Language Model\",\n options=[*sorted(MODEL_PROVIDERS), \"Custom\"],\n value=\"Custom\",\n real_time_refresh=True,\n input_types=[\"LanguageModel\"],\n options_metadata=[MODELS_METADATA[key] for key in sorted(MODELS_METADATA.keys())]\n + [{\"icon\": \"brain\"}],\n )\n build_config.update({\"agent_llm\": custom_component.to_dict()})\n # Update input types for all fields\n build_config = self.update_input_types(build_config)\n\n # Validate required keys\n default_keys = [\n \"code\",\n \"_type\",\n \"agent_llm\",\n \"tools\",\n \"input_value\",\n \"add_current_date_tool\",\n \"system_prompt\",\n \"agent_description\",\n \"max_iterations\",\n \"handle_parsing_errors\",\n \"verbose\",\n ]\n missing_keys = [key for key in default_keys if key not in build_config]\n if missing_keys:\n msg = f\"Missing required keys in build_config: {missing_keys}\"\n raise ValueError(msg)\n if (\n isinstance(self.agent_llm, str)\n and self.agent_llm in MODEL_PROVIDERS_DICT\n and field_name in MODEL_DYNAMIC_UPDATE_FIELDS\n ):\n provider_info = MODEL_PROVIDERS_DICT.get(self.agent_llm)\n if provider_info:\n component_class = provider_info.get(\"component_class\")\n component_class = self.set_component_params(component_class)\n prefix = provider_info.get(\"prefix\")\n if component_class and hasattr(component_class, \"update_build_config\"):\n # Call each component class's update_build_config method\n # remove the prefix from the field_name\n if isinstance(field_name, str) and isinstance(prefix, str):\n field_name = field_name.replace(prefix, \"\")\n build_config = await update_component_build_config(\n component_class, build_config, field_value, \"model_name\"\n )\n return dotdict({k: v.to_dict() if hasattr(v, \"to_dict\") else v for k, v in build_config.items()})\n\n async def _get_tools(self) -> list[Tool]:\n component_toolkit = _get_component_toolkit()\n tools_names = self._build_tools_names()\n agent_description = self.get_tool_description()\n # TODO: Agent Description Depreciated Feature to be removed\n description = f\"{agent_description}{tools_names}\"\n tools = component_toolkit(component=self).get_tools(\n tool_name=\"Call_Agent\", tool_description=description, callbacks=self.get_langchain_callbacks()\n )\n if hasattr(self, \"tools_metadata\"):\n tools = component_toolkit(component=self, metadata=self.tools_metadata).update_tools_metadata(tools=tools)\n return tools\n"
},
"handle_parsing_errors": {
"_input_type": "BoolInput",
@@ -2419,7 +2419,7 @@
"show": true,
"title_case": false,
"type": "code",
- "value": "import json\nimport re\n\nfrom langchain_core.tools import StructuredTool\nfrom pydantic import ValidationError\n\nfrom langflow.base.agents.agent import LCToolsAgentComponent\nfrom langflow.base.agents.events import ExceptionWithMessageError\nfrom langflow.base.models.model_input_constants import (\n ALL_PROVIDER_FIELDS,\n MODEL_DYNAMIC_UPDATE_FIELDS,\n MODEL_PROVIDERS,\n MODEL_PROVIDERS_DICT,\n MODELS_METADATA,\n)\nfrom langflow.base.models.model_utils import get_model_name\nfrom langflow.components.helpers.current_date import CurrentDateComponent\nfrom langflow.components.helpers.memory import MemoryComponent\nfrom langflow.components.langchain_utilities.tool_calling import ToolCallingAgentComponent\nfrom langflow.custom.custom_component.component import _get_component_toolkit\nfrom langflow.custom.utils import update_component_build_config\nfrom langflow.field_typing import Tool\nfrom langflow.helpers.base_model import build_model_from_schema\nfrom langflow.io import BoolInput, DropdownInput, IntInput, MultilineInput, Output, TableInput\nfrom langflow.logging import logger\nfrom langflow.schema.data import Data\nfrom langflow.schema.dotdict import dotdict\nfrom langflow.schema.message import Message\nfrom langflow.schema.table import EditMode\n\n\ndef set_advanced_true(component_input):\n component_input.advanced = True\n return component_input\n\n\nMODEL_PROVIDERS_LIST = [\"Anthropic\", \"Google Generative AI\", \"Groq\", \"OpenAI\"]\n\n\nclass AgentComponent(ToolCallingAgentComponent):\n display_name: str = \"Agent\"\n description: str = \"Define the agent's instructions, then enter a task to complete using tools.\"\n documentation: str = \"https://docs.langflow.org/agents\"\n icon = \"bot\"\n beta = False\n name = \"Agent\"\n\n memory_inputs = [set_advanced_true(component_input) for component_input in MemoryComponent().inputs]\n\n # Filter out json_mode from OpenAI inputs since we handle structured output differently\n openai_inputs_filtered = [\n input_field\n for input_field in MODEL_PROVIDERS_DICT[\"OpenAI\"][\"inputs\"]\n if not (hasattr(input_field, \"name\") and input_field.name == \"json_mode\")\n ]\n\n inputs = [\n DropdownInput(\n name=\"agent_llm\",\n display_name=\"Model Provider\",\n info=\"The provider of the language model that the agent will use to generate responses.\",\n options=[*MODEL_PROVIDERS_LIST, \"Custom\"],\n value=\"OpenAI\",\n real_time_refresh=True,\n input_types=[],\n options_metadata=[MODELS_METADATA[key] for key in MODEL_PROVIDERS_LIST] + [{\"icon\": \"brain\"}],\n ),\n *openai_inputs_filtered,\n MultilineInput(\n name=\"system_prompt\",\n display_name=\"Agent Instructions\",\n info=\"System Prompt: Initial instructions and context provided to guide the agent's behavior.\",\n value=\"You are a helpful assistant that can use tools to answer questions and perform tasks.\",\n advanced=False,\n ),\n IntInput(\n name=\"n_messages\",\n display_name=\"Number of Chat History Messages\",\n value=100,\n info=\"Number of chat history messages to retrieve.\",\n advanced=True,\n show=True,\n ),\n MultilineInput(\n name=\"format_instructions\",\n display_name=\"Output Format Instructions\",\n info=\"Generic Template for structured output formatting. Valid only with Structured response.\",\n value=(\n \"You are an AI that extracts structured JSON objects from unstructured text. \"\n \"Use a predefined schema with expected types (str, int, float, bool, dict). \"\n \"Extract ALL relevant instances that match the schema - if multiple patterns exist, capture them all. \"\n \"Fill missing or ambiguous values with defaults: null for missing values. \"\n \"Remove exact duplicates but keep variations that have different field values. \"\n \"Always return valid JSON in the expected format, never throw errors. \"\n \"If multiple objects can be extracted, return them all in the structured format.\"\n ),\n advanced=True,\n ),\n TableInput(\n name=\"output_schema\",\n display_name=\"Output Schema\",\n info=(\n \"Schema Validation: Define the structure and data types for structured output. \"\n \"No validation if no output schema.\"\n ),\n advanced=True,\n required=False,\n value=[],\n table_schema=[\n {\n \"name\": \"name\",\n \"display_name\": \"Name\",\n \"type\": \"str\",\n \"description\": \"Specify the name of the output field.\",\n \"default\": \"field\",\n \"edit_mode\": EditMode.INLINE,\n },\n {\n \"name\": \"description\",\n \"display_name\": \"Description\",\n \"type\": \"str\",\n \"description\": \"Describe the purpose of the output field.\",\n \"default\": \"description of field\",\n \"edit_mode\": EditMode.POPOVER,\n },\n {\n \"name\": \"type\",\n \"display_name\": \"Type\",\n \"type\": \"str\",\n \"edit_mode\": EditMode.INLINE,\n \"description\": (\"Indicate the data type of the output field (e.g., str, int, float, bool, dict).\"),\n \"options\": [\"str\", \"int\", \"float\", \"bool\", \"dict\"],\n \"default\": \"str\",\n },\n {\n \"name\": \"multiple\",\n \"display_name\": \"As List\",\n \"type\": \"boolean\",\n \"description\": \"Set to True if this output field should be a list of the specified type.\",\n \"default\": \"False\",\n \"edit_mode\": EditMode.INLINE,\n },\n ],\n ),\n *LCToolsAgentComponent._base_inputs,\n # removed memory inputs from agent component\n # *memory_inputs,\n BoolInput(\n name=\"add_current_date_tool\",\n display_name=\"Current Date\",\n advanced=True,\n info=\"If true, will add a tool to the agent that returns the current date.\",\n value=True,\n ),\n ]\n outputs = [\n Output(name=\"response\", display_name=\"Response\", method=\"message_response\"),\n Output(name=\"structured_response\", display_name=\"Structured Response\", method=\"json_response\", tool_mode=False),\n ]\n\n async def get_agent_requirements(self):\n \"\"\"Get the agent requirements for the agent.\"\"\"\n llm_model, display_name = await self.get_llm()\n if llm_model is None:\n msg = \"No language model selected. Please choose a model to proceed.\"\n raise ValueError(msg)\n self.model_name = get_model_name(llm_model, display_name=display_name)\n\n # Get memory data\n self.chat_history = await self.get_memory_data()\n if isinstance(self.chat_history, Message):\n self.chat_history = [self.chat_history]\n\n # Add current date tool if enabled\n if self.add_current_date_tool:\n if not isinstance(self.tools, list): # type: ignore[has-type]\n self.tools = []\n current_date_tool = (await CurrentDateComponent(**self.get_base_args()).to_toolkit()).pop(0)\n if not isinstance(current_date_tool, StructuredTool):\n msg = \"CurrentDateComponent must be converted to a StructuredTool\"\n raise TypeError(msg)\n self.tools.append(current_date_tool)\n return llm_model, self.chat_history, self.tools\n\n async def message_response(self) -> Message:\n try:\n llm_model, self.chat_history, self.tools = await self.get_agent_requirements()\n # Set up and run agent\n self.set(\n llm=llm_model,\n tools=self.tools or [],\n chat_history=self.chat_history,\n input_value=self.input_value,\n system_prompt=self.system_prompt,\n )\n agent = self.create_agent_runnable()\n result = await self.run_agent(agent)\n\n # Store result for potential JSON output\n self._agent_result = result\n\n except (ValueError, TypeError, KeyError) as e:\n await logger.aerror(f\"{type(e).__name__}: {e!s}\")\n raise\n except ExceptionWithMessageError as e:\n await logger.aerror(f\"ExceptionWithMessageError occurred: {e}\")\n raise\n # Avoid catching blind Exception; let truly unexpected exceptions propagate\n except Exception as e:\n await logger.aerror(f\"Unexpected error: {e!s}\")\n raise\n else:\n return result\n\n def _preprocess_schema(self, schema):\n \"\"\"Preprocess schema to ensure correct data types for build_model_from_schema.\"\"\"\n processed_schema = []\n for field in schema:\n processed_field = {\n \"name\": str(field.get(\"name\", \"field\")),\n \"type\": str(field.get(\"type\", \"str\")),\n \"description\": str(field.get(\"description\", \"\")),\n \"multiple\": field.get(\"multiple\", False),\n }\n # Ensure multiple is handled correctly\n if isinstance(processed_field[\"multiple\"], str):\n processed_field[\"multiple\"] = processed_field[\"multiple\"].lower() in [\"true\", \"1\", \"t\", \"y\", \"yes\"]\n processed_schema.append(processed_field)\n return processed_schema\n\n async def build_structured_output_base(self, content: str):\n \"\"\"Build structured output with optional BaseModel validation.\"\"\"\n json_pattern = r\"\\{.*\\}\"\n schema_error_msg = \"Try setting an output schema\"\n\n # Try to parse content as JSON first\n json_data = None\n try:\n json_data = json.loads(content)\n except json.JSONDecodeError:\n json_match = re.search(json_pattern, content, re.DOTALL)\n if json_match:\n try:\n json_data = json.loads(json_match.group())\n except json.JSONDecodeError:\n return {\"content\": content, \"error\": schema_error_msg}\n else:\n return {\"content\": content, \"error\": schema_error_msg}\n\n # If no output schema provided, return parsed JSON without validation\n if not hasattr(self, \"output_schema\") or not self.output_schema or len(self.output_schema) == 0:\n return json_data\n\n # Use BaseModel validation with schema\n try:\n processed_schema = self._preprocess_schema(self.output_schema)\n output_model = build_model_from_schema(processed_schema)\n\n # Validate against the schema\n if isinstance(json_data, list):\n # Multiple objects\n validated_objects = []\n for item in json_data:\n try:\n validated_obj = output_model.model_validate(item)\n validated_objects.append(validated_obj.model_dump())\n except ValidationError as e:\n await logger.aerror(f\"Validation error for item: {e}\")\n # Include invalid items with error info\n validated_objects.append({\"data\": item, \"validation_error\": str(e)})\n return validated_objects\n\n # Single object\n try:\n validated_obj = output_model.model_validate(json_data)\n return [validated_obj.model_dump()] # Return as list for consistency\n except ValidationError as e:\n await logger.aerror(f\"Validation error: {e}\")\n return [{\"data\": json_data, \"validation_error\": str(e)}]\n\n except (TypeError, ValueError) as e:\n await logger.aerror(f\"Error building structured output: {e}\")\n # Fallback to parsed JSON without validation\n return json_data\n\n async def json_response(self) -> Data:\n \"\"\"Convert agent response to structured JSON Data output with schema validation.\"\"\"\n # Always use structured chat agent for JSON response mode for better JSON formatting\n try:\n system_components = []\n\n # 1. Agent Instructions (system_prompt)\n agent_instructions = getattr(self, \"system_prompt\", \"\") or \"\"\n if agent_instructions:\n system_components.append(f\"{agent_instructions}\")\n\n # 2. Format Instructions\n format_instructions = getattr(self, \"format_instructions\", \"\") or \"\"\n if format_instructions:\n system_components.append(f\"Format instructions: {format_instructions}\")\n\n # 3. Schema Information from BaseModel\n if hasattr(self, \"output_schema\") and self.output_schema and len(self.output_schema) > 0:\n try:\n processed_schema = self._preprocess_schema(self.output_schema)\n output_model = build_model_from_schema(processed_schema)\n schema_dict = output_model.model_json_schema()\n schema_info = (\n \"You are given some text that may include format instructions, \"\n \"explanations, or other content alongside a JSON schema.\\n\\n\"\n \"Your task:\\n\"\n \"- Extract only the JSON schema.\\n\"\n \"- Return it as valid JSON.\\n\"\n \"- Do not include format instructions, explanations, or extra text.\\n\\n\"\n \"Input:\\n\"\n f\"{json.dumps(schema_dict, indent=2)}\\n\\n\"\n \"Output (only JSON schema):\"\n )\n system_components.append(schema_info)\n except (ValidationError, ValueError, TypeError, KeyError) as e:\n await logger.aerror(f\"Could not build schema for prompt: {e}\", exc_info=True)\n\n # Combine all components\n combined_instructions = \"\\n\\n\".join(system_components) if system_components else \"\"\n llm_model, self.chat_history, self.tools = await self.get_agent_requirements()\n self.set(\n llm=llm_model,\n tools=self.tools or [],\n chat_history=self.chat_history,\n input_value=self.input_value,\n system_prompt=combined_instructions,\n )\n\n # Create and run structured chat agent\n try:\n structured_agent = self.create_agent_runnable()\n except (NotImplementedError, ValueError, TypeError) as e:\n await logger.aerror(f\"Error with structured chat agent: {e}\")\n raise\n try:\n result = await self.run_agent(structured_agent)\n except (ExceptionWithMessageError, ValueError, TypeError, RuntimeError) as e:\n await logger.aerror(f\"Error with structured agent result: {e}\")\n raise\n # Extract content from structured agent result\n if hasattr(result, \"content\"):\n content = result.content\n elif hasattr(result, \"text\"):\n content = result.text\n else:\n content = str(result)\n\n except (ExceptionWithMessageError, ValueError, TypeError, NotImplementedError, AttributeError) as e:\n await logger.aerror(f\"Error with structured chat agent: {e}\")\n # Fallback to regular agent\n content_str = \"No content returned from agent\"\n return Data(data={\"content\": content_str, \"error\": str(e)})\n\n # Process with structured output validation\n try:\n structured_output = await self.build_structured_output_base(content)\n\n # Handle different output formats\n if isinstance(structured_output, list) and structured_output:\n if len(structured_output) == 1:\n return Data(data=structured_output[0])\n return Data(data={\"results\": structured_output})\n if isinstance(structured_output, dict):\n return Data(data=structured_output)\n return Data(data={\"content\": content})\n\n except (ValueError, TypeError) as e:\n await logger.aerror(f\"Error in structured output processing: {e}\")\n return Data(data={\"content\": content, \"error\": str(e)})\n\n async def get_memory_data(self):\n # TODO: This is a temporary fix to avoid message duplication. We should develop a function for this.\n messages = (\n await MemoryComponent(**self.get_base_args())\n .set(session_id=self.graph.session_id, order=\"Ascending\", n_messages=self.n_messages)\n .retrieve_messages()\n )\n return [\n message for message in messages if getattr(message, \"id\", None) != getattr(self.input_value, \"id\", None)\n ]\n\n async def get_llm(self):\n if not isinstance(self.agent_llm, str):\n return self.agent_llm, None\n\n try:\n provider_info = MODEL_PROVIDERS_DICT.get(self.agent_llm)\n if not provider_info:\n msg = f\"Invalid model provider: {self.agent_llm}\"\n raise ValueError(msg)\n\n component_class = provider_info.get(\"component_class\")\n display_name = component_class.display_name\n inputs = provider_info.get(\"inputs\")\n prefix = provider_info.get(\"prefix\", \"\")\n\n return self._build_llm_model(component_class, inputs, prefix), display_name\n\n except (AttributeError, ValueError, TypeError, RuntimeError) as e:\n await logger.aerror(f\"Error building {self.agent_llm} language model: {e!s}\")\n msg = f\"Failed to initialize language model: {e!s}\"\n raise ValueError(msg) from e\n\n def _build_llm_model(self, component, inputs, prefix=\"\"):\n model_kwargs = {}\n for input_ in inputs:\n if hasattr(self, f\"{prefix}{input_.name}\"):\n model_kwargs[input_.name] = getattr(self, f\"{prefix}{input_.name}\")\n return component.set(**model_kwargs).build_model()\n\n def set_component_params(self, component):\n provider_info = MODEL_PROVIDERS_DICT.get(self.agent_llm)\n if provider_info:\n inputs = provider_info.get(\"inputs\")\n prefix = provider_info.get(\"prefix\")\n # Filter out json_mode and only use attributes that exist on this component\n model_kwargs = {}\n for input_ in inputs:\n if hasattr(self, f\"{prefix}{input_.name}\"):\n model_kwargs[input_.name] = getattr(self, f\"{prefix}{input_.name}\")\n\n return component.set(**model_kwargs)\n return component\n\n def delete_fields(self, build_config: dotdict, fields: dict | list[str]) -> None:\n \"\"\"Delete specified fields from build_config.\"\"\"\n for field in fields:\n build_config.pop(field, None)\n\n def update_input_types(self, build_config: dotdict) -> dotdict:\n \"\"\"Update input types for all fields in build_config.\"\"\"\n for key, value in build_config.items():\n if isinstance(value, dict):\n if value.get(\"input_types\") is None:\n build_config[key][\"input_types\"] = []\n elif hasattr(value, \"input_types\") and value.input_types is None:\n value.input_types = []\n return build_config\n\n async def update_build_config(\n self, build_config: dotdict, field_value: str, field_name: str | None = None\n ) -> dotdict:\n # Iterate over all providers in the MODEL_PROVIDERS_DICT\n # Existing logic for updating build_config\n if field_name in (\"agent_llm\",):\n build_config[\"agent_llm\"][\"value\"] = field_value\n provider_info = MODEL_PROVIDERS_DICT.get(field_value)\n if provider_info:\n component_class = provider_info.get(\"component_class\")\n if component_class and hasattr(component_class, \"update_build_config\"):\n # Call the component class's update_build_config method\n build_config = await update_component_build_config(\n component_class, build_config, field_value, \"model_name\"\n )\n\n provider_configs: dict[str, tuple[dict, list[dict]]] = {\n provider: (\n MODEL_PROVIDERS_DICT[provider][\"fields\"],\n [\n MODEL_PROVIDERS_DICT[other_provider][\"fields\"]\n for other_provider in MODEL_PROVIDERS_DICT\n if other_provider != provider\n ],\n )\n for provider in MODEL_PROVIDERS_DICT\n }\n if field_value in provider_configs:\n fields_to_add, fields_to_delete = provider_configs[field_value]\n\n # Delete fields from other providers\n for fields in fields_to_delete:\n self.delete_fields(build_config, fields)\n\n # Add provider-specific fields\n if field_value == \"OpenAI\" and not any(field in build_config for field in fields_to_add):\n build_config.update(fields_to_add)\n else:\n build_config.update(fields_to_add)\n # Reset input types for agent_llm\n build_config[\"agent_llm\"][\"input_types\"] = []\n elif field_value == \"Custom\":\n # Delete all provider fields\n self.delete_fields(build_config, ALL_PROVIDER_FIELDS)\n # Update with custom component\n custom_component = DropdownInput(\n name=\"agent_llm\",\n display_name=\"Language Model\",\n options=[*sorted(MODEL_PROVIDERS), \"Custom\"],\n value=\"Custom\",\n real_time_refresh=True,\n input_types=[\"LanguageModel\"],\n options_metadata=[MODELS_METADATA[key] for key in sorted(MODELS_METADATA.keys())]\n + [{\"icon\": \"brain\"}],\n )\n build_config.update({\"agent_llm\": custom_component.to_dict()})\n # Update input types for all fields\n build_config = self.update_input_types(build_config)\n\n # Validate required keys\n default_keys = [\n \"code\",\n \"_type\",\n \"agent_llm\",\n \"tools\",\n \"input_value\",\n \"add_current_date_tool\",\n \"system_prompt\",\n \"agent_description\",\n \"max_iterations\",\n \"handle_parsing_errors\",\n \"verbose\",\n ]\n missing_keys = [key for key in default_keys if key not in build_config]\n if missing_keys:\n msg = f\"Missing required keys in build_config: {missing_keys}\"\n raise ValueError(msg)\n if (\n isinstance(self.agent_llm, str)\n and self.agent_llm in MODEL_PROVIDERS_DICT\n and field_name in MODEL_DYNAMIC_UPDATE_FIELDS\n ):\n provider_info = MODEL_PROVIDERS_DICT.get(self.agent_llm)\n if provider_info:\n component_class = provider_info.get(\"component_class\")\n component_class = self.set_component_params(component_class)\n prefix = provider_info.get(\"prefix\")\n if component_class and hasattr(component_class, \"update_build_config\"):\n # Call each component class's update_build_config method\n # remove the prefix from the field_name\n if isinstance(field_name, str) and isinstance(prefix, str):\n field_name = field_name.replace(prefix, \"\")\n build_config = await update_component_build_config(\n component_class, build_config, field_value, \"model_name\"\n )\n return dotdict({k: v.to_dict() if hasattr(v, \"to_dict\") else v for k, v in build_config.items()})\n\n async def _get_tools(self) -> list[Tool]:\n component_toolkit = _get_component_toolkit()\n tools_names = self._build_tools_names()\n agent_description = self.get_tool_description()\n # TODO: Agent Description Depreciated Feature to be removed\n description = f\"{agent_description}{tools_names}\"\n tools = component_toolkit(component=self).get_tools(\n tool_name=\"Call_Agent\", tool_description=description, callbacks=self.get_langchain_callbacks()\n )\n if hasattr(self, \"tools_metadata\"):\n tools = component_toolkit(component=self, metadata=self.tools_metadata).update_tools_metadata(tools=tools)\n return tools\n"
+ "value": "import json\nimport re\n\nfrom langchain_core.tools import StructuredTool\nfrom pydantic import ValidationError\n\nfrom langflow.base.agents.agent import LCToolsAgentComponent\nfrom langflow.base.agents.events import ExceptionWithMessageError\nfrom langflow.base.models.model_input_constants import (\n ALL_PROVIDER_FIELDS,\n MODEL_DYNAMIC_UPDATE_FIELDS,\n MODEL_PROVIDERS,\n MODEL_PROVIDERS_DICT,\n MODELS_METADATA,\n)\nfrom langflow.base.models.model_utils import get_model_name\nfrom langflow.components.helpers.current_date import CurrentDateComponent\nfrom langflow.components.helpers.memory import MemoryComponent\nfrom langflow.components.langchain_utilities.tool_calling import ToolCallingAgentComponent\nfrom langflow.custom.custom_component.component import _get_component_toolkit\nfrom langflow.custom.utils import update_component_build_config\nfrom langflow.field_typing import Tool\nfrom langflow.helpers.base_model import build_model_from_schema\nfrom langflow.io import BoolInput, DropdownInput, IntInput, MultilineInput, Output, TableInput\nfrom langflow.logging import logger\nfrom langflow.schema.data import Data\nfrom langflow.schema.dotdict import dotdict\nfrom langflow.schema.message import Message\nfrom langflow.schema.table import EditMode\n\n\ndef set_advanced_true(component_input):\n component_input.advanced = True\n return component_input\n\n\nMODEL_PROVIDERS_LIST = [\"Anthropic\", \"Google Generative AI\", \"Groq\", \"OpenAI\"]\n\n\nclass AgentComponent(ToolCallingAgentComponent):\n display_name: str = \"Agent\"\n description: str = \"Define the agent's instructions, then enter a task to complete using tools.\"\n documentation: str = \"https://docs.langflow.org/agents\"\n icon = \"bot\"\n beta = False\n name = \"Agent\"\n\n memory_inputs = [set_advanced_true(component_input) for component_input in MemoryComponent().inputs]\n\n # Filter out json_mode from OpenAI inputs since we handle structured output differently\n openai_inputs_filtered = [\n input_field\n for input_field in MODEL_PROVIDERS_DICT[\"OpenAI\"][\"inputs\"]\n if not (hasattr(input_field, \"name\") and input_field.name == \"json_mode\")\n ]\n\n inputs = [\n DropdownInput(\n name=\"agent_llm\",\n display_name=\"Model Provider\",\n info=\"The provider of the language model that the agent will use to generate responses.\",\n options=[*MODEL_PROVIDERS_LIST, \"Custom\"],\n value=\"OpenAI\",\n real_time_refresh=True,\n input_types=[],\n options_metadata=[MODELS_METADATA[key] for key in MODEL_PROVIDERS_LIST] + [{\"icon\": \"brain\"}],\n ),\n *openai_inputs_filtered,\n MultilineInput(\n name=\"system_prompt\",\n display_name=\"Agent Instructions\",\n info=\"System Prompt: Initial instructions and context provided to guide the agent's behavior.\",\n value=\"You are a helpful assistant that can use tools to answer questions and perform tasks.\",\n advanced=False,\n ),\n IntInput(\n name=\"n_messages\",\n display_name=\"Number of Chat History Messages\",\n value=100,\n info=\"Number of chat history messages to retrieve.\",\n advanced=True,\n show=True,\n ),\n MultilineInput(\n name=\"format_instructions\",\n display_name=\"Output Format Instructions\",\n info=\"Generic Template for structured output formatting. Valid only with Structured response.\",\n value=(\n \"You are an AI that extracts structured JSON objects from unstructured text. \"\n \"Use a predefined schema with expected types (str, int, float, bool, dict). \"\n \"Extract ALL relevant instances that match the schema - if multiple patterns exist, capture them all. \"\n \"Fill missing or ambiguous values with defaults: null for missing values. \"\n \"Remove exact duplicates but keep variations that have different field values. \"\n \"Always return valid JSON in the expected format, never throw errors. \"\n \"If multiple objects can be extracted, return them all in the structured format.\"\n ),\n advanced=True,\n ),\n TableInput(\n name=\"output_schema\",\n display_name=\"Output Schema\",\n info=(\n \"Schema Validation: Define the structure and data types for structured output. \"\n \"No validation if no output schema.\"\n ),\n advanced=True,\n required=False,\n value=[],\n table_schema=[\n {\n \"name\": \"name\",\n \"display_name\": \"Name\",\n \"type\": \"str\",\n \"description\": \"Specify the name of the output field.\",\n \"default\": \"field\",\n \"edit_mode\": EditMode.INLINE,\n },\n {\n \"name\": \"description\",\n \"display_name\": \"Description\",\n \"type\": \"str\",\n \"description\": \"Describe the purpose of the output field.\",\n \"default\": \"description of field\",\n \"edit_mode\": EditMode.POPOVER,\n },\n {\n \"name\": \"type\",\n \"display_name\": \"Type\",\n \"type\": \"str\",\n \"edit_mode\": EditMode.INLINE,\n \"description\": (\"Indicate the data type of the output field (e.g., str, int, float, bool, dict).\"),\n \"options\": [\"str\", \"int\", \"float\", \"bool\", \"dict\"],\n \"default\": \"str\",\n },\n {\n \"name\": \"multiple\",\n \"display_name\": \"As List\",\n \"type\": \"boolean\",\n \"description\": \"Set to True if this output field should be a list of the specified type.\",\n \"default\": \"False\",\n \"edit_mode\": EditMode.INLINE,\n },\n ],\n ),\n *LCToolsAgentComponent._base_inputs,\n # removed memory inputs from agent component\n # *memory_inputs,\n BoolInput(\n name=\"add_current_date_tool\",\n display_name=\"Current Date\",\n advanced=True,\n info=\"If true, will add a tool to the agent that returns the current date.\",\n value=True,\n ),\n ]\n outputs = [\n Output(name=\"response\", display_name=\"Response\", method=\"message_response\"),\n Output(name=\"structured_response\", display_name=\"Structured Response\", method=\"json_response\", tool_mode=False),\n ]\n\n async def get_agent_requirements(self):\n \"\"\"Get the agent requirements for the agent.\"\"\"\n llm_model, display_name = await self.get_llm()\n if llm_model is None:\n msg = \"No language model selected. Please choose a model to proceed.\"\n raise ValueError(msg)\n self.model_name = get_model_name(llm_model, display_name=display_name)\n\n # Get memory data\n self.chat_history = await self.get_memory_data()\n if isinstance(self.chat_history, Message):\n self.chat_history = [self.chat_history]\n\n # Add current date tool if enabled\n if self.add_current_date_tool:\n if not isinstance(self.tools, list): # type: ignore[has-type]\n self.tools = []\n current_date_tool = (await CurrentDateComponent(**self.get_base_args()).to_toolkit()).pop(0)\n if not isinstance(current_date_tool, StructuredTool):\n msg = \"CurrentDateComponent must be converted to a StructuredTool\"\n raise TypeError(msg)\n self.tools.append(current_date_tool)\n return llm_model, self.chat_history, self.tools\n\n async def message_response(self) -> Message:\n try:\n llm_model, self.chat_history, self.tools = await self.get_agent_requirements()\n # Set up and run agent\n self.set(\n llm=llm_model,\n tools=self.tools or [],\n chat_history=self.chat_history,\n input_value=self.input_value,\n system_prompt=self.system_prompt,\n )\n agent = self.create_agent_runnable()\n result = await self.run_agent(agent)\n\n # Store result for potential JSON output\n self._agent_result = result\n\n except (ValueError, TypeError, KeyError) as e:\n await logger.aerror(f\"{type(e).__name__}: {e!s}\")\n raise\n except ExceptionWithMessageError as e:\n await logger.aerror(f\"ExceptionWithMessageError occurred: {e}\")\n raise\n # Avoid catching blind Exception; let truly unexpected exceptions propagate\n except Exception as e:\n await logger.aerror(f\"Unexpected error: {e!s}\")\n raise\n else:\n return result\n\n def _preprocess_schema(self, schema):\n \"\"\"Preprocess schema to ensure correct data types for build_model_from_schema.\"\"\"\n processed_schema = []\n for field in schema:\n processed_field = {\n \"name\": str(field.get(\"name\", \"field\")),\n \"type\": str(field.get(\"type\", \"str\")),\n \"description\": str(field.get(\"description\", \"\")),\n \"multiple\": field.get(\"multiple\", False),\n }\n # Ensure multiple is handled correctly\n if isinstance(processed_field[\"multiple\"], str):\n processed_field[\"multiple\"] = processed_field[\"multiple\"].lower() in [\"true\", \"1\", \"t\", \"y\", \"yes\"]\n processed_schema.append(processed_field)\n return processed_schema\n\n async def build_structured_output_base(self, content: str):\n \"\"\"Build structured output with optional BaseModel validation.\"\"\"\n json_pattern = r\"\\{.*\\}\"\n schema_error_msg = \"Try setting an output schema\"\n\n # Try to parse content as JSON first\n json_data = None\n try:\n json_data = json.loads(content)\n except json.JSONDecodeError:\n json_match = re.search(json_pattern, content, re.DOTALL)\n if json_match:\n try:\n json_data = json.loads(json_match.group())\n except json.JSONDecodeError:\n return {\"content\": content, \"error\": schema_error_msg}\n else:\n return {\"content\": content, \"error\": schema_error_msg}\n\n # If no output schema provided, return parsed JSON without validation\n if not hasattr(self, \"output_schema\") or not self.output_schema or len(self.output_schema) == 0:\n return json_data\n\n # Use BaseModel validation with schema\n try:\n processed_schema = self._preprocess_schema(self.output_schema)\n output_model = build_model_from_schema(processed_schema)\n\n # Validate against the schema\n if isinstance(json_data, list):\n # Multiple objects\n validated_objects = []\n for item in json_data:\n try:\n validated_obj = output_model.model_validate(item)\n validated_objects.append(validated_obj.model_dump())\n except ValidationError as e:\n await logger.aerror(f\"Validation error for item: {e}\")\n # Include invalid items with error info\n validated_objects.append({\"data\": item, \"validation_error\": str(e)})\n return validated_objects\n\n # Single object\n try:\n validated_obj = output_model.model_validate(json_data)\n return [validated_obj.model_dump()] # Return as list for consistency\n except ValidationError as e:\n await logger.aerror(f\"Validation error: {e}\")\n return [{\"data\": json_data, \"validation_error\": str(e)}]\n\n except (TypeError, ValueError) as e:\n await logger.aerror(f\"Error building structured output: {e}\")\n # Fallback to parsed JSON without validation\n return json_data\n\n async def json_response(self) -> Data:\n \"\"\"Convert agent response to structured JSON Data output with schema validation.\"\"\"\n # Always use structured chat agent for JSON response mode for better JSON formatting\n try:\n system_components = []\n\n # 1. Agent Instructions (system_prompt)\n agent_instructions = getattr(self, \"system_prompt\", \"\") or \"\"\n if agent_instructions:\n system_components.append(f\"{agent_instructions}\")\n\n # 2. Format Instructions\n format_instructions = getattr(self, \"format_instructions\", \"\") or \"\"\n if format_instructions:\n system_components.append(f\"Format instructions: {format_instructions}\")\n\n # 3. Schema Information from BaseModel\n if hasattr(self, \"output_schema\") and self.output_schema and len(self.output_schema) > 0:\n try:\n processed_schema = self._preprocess_schema(self.output_schema)\n output_model = build_model_from_schema(processed_schema)\n schema_dict = output_model.model_json_schema()\n schema_info = (\n \"You are given some text that may include format instructions, \"\n \"explanations, or other content alongside a JSON schema.\\n\\n\"\n \"Your task:\\n\"\n \"- Extract only the JSON schema.\\n\"\n \"- Return it as valid JSON.\\n\"\n \"- Do not include format instructions, explanations, or extra text.\\n\\n\"\n \"Input:\\n\"\n f\"{json.dumps(schema_dict, indent=2)}\\n\\n\"\n \"Output (only JSON schema):\"\n )\n system_components.append(schema_info)\n except (ValidationError, ValueError, TypeError, KeyError) as e:\n await logger.aerror(f\"Could not build schema for prompt: {e}\", exc_info=True)\n\n # Combine all components\n combined_instructions = \"\\n\\n\".join(system_components) if system_components else \"\"\n llm_model, self.chat_history, self.tools = await self.get_agent_requirements()\n self.set(\n llm=llm_model,\n tools=self.tools or [],\n chat_history=self.chat_history,\n input_value=self.input_value,\n system_prompt=combined_instructions,\n )\n\n # Create and run structured chat agent\n try:\n structured_agent = self.create_agent_runnable()\n except (NotImplementedError, ValueError, TypeError) as e:\n await logger.aerror(f\"Error with structured chat agent: {e}\")\n raise\n try:\n result = await self.run_agent(structured_agent)\n except (ExceptionWithMessageError, ValueError, TypeError, RuntimeError) as e:\n await logger.aerror(f\"Error with structured agent result: {e}\")\n raise\n # Extract content from structured agent result\n if hasattr(result, \"content\"):\n content = result.content\n elif hasattr(result, \"text\"):\n content = result.text\n else:\n content = str(result)\n\n except (ExceptionWithMessageError, ValueError, TypeError, NotImplementedError, AttributeError) as e:\n await logger.aerror(f\"Error with structured chat agent: {e}\")\n # Fallback to regular agent\n content_str = \"No content returned from agent\"\n return Data(data={\"content\": content_str, \"error\": str(e)})\n\n # Process with structured output validation\n try:\n structured_output = await self.build_structured_output_base(content)\n\n # Handle different output formats\n if isinstance(structured_output, list) and structured_output:\n if len(structured_output) == 1:\n return Data(data=structured_output[0])\n return Data(data={\"results\": structured_output})\n if isinstance(structured_output, dict):\n return Data(data=structured_output)\n return Data(data={\"content\": content})\n\n except (ValueError, TypeError) as e:\n await logger.aerror(f\"Error in structured output processing: {e}\")\n return Data(data={\"content\": content, \"error\": str(e)})\n\n async def get_memory_data(self):\n # TODO: This is a temporary fix to avoid message duplication. We should develop a function for this.\n messages = (\n await MemoryComponent(**self.get_base_args())\n .set(session_id=self.graph.session_id, order=\"Ascending\", n_messages=self.n_messages)\n .retrieve_messages()\n )\n return [\n message for message in messages if getattr(message, \"id\", None) != getattr(self.input_value, \"id\", None)\n ]\n\n async def get_llm(self):\n if not isinstance(self.agent_llm, str):\n return self.agent_llm, None\n\n try:\n provider_info = MODEL_PROVIDERS_DICT.get(self.agent_llm)\n if not provider_info:\n msg = f\"Invalid model provider: {self.agent_llm}\"\n raise ValueError(msg)\n\n component_class = provider_info.get(\"component_class\")\n display_name = component_class.display_name\n inputs = provider_info.get(\"inputs\")\n prefix = provider_info.get(\"prefix\", \"\")\n\n return self._build_llm_model(component_class, inputs, prefix), display_name\n\n except (AttributeError, ValueError, TypeError, RuntimeError) as e:\n await logger.aerror(f\"Error building {self.agent_llm} language model: {e!s}\")\n msg = f\"Failed to initialize language model: {e!s}\"\n raise ValueError(msg) from e\n\n def _build_llm_model(self, component, inputs, prefix=\"\"):\n model_kwargs = {}\n for input_ in inputs:\n if hasattr(self, f\"{prefix}{input_.name}\"):\n model_kwargs[input_.name] = getattr(self, f\"{prefix}{input_.name}\")\n return component.set(**model_kwargs).build_model()\n\n def set_component_params(self, component):\n provider_info = MODEL_PROVIDERS_DICT.get(self.agent_llm)\n if provider_info:\n inputs = provider_info.get(\"inputs\")\n prefix = provider_info.get(\"prefix\")\n # Filter out json_mode and only use attributes that exist on this component\n model_kwargs = {}\n for input_ in inputs:\n if hasattr(self, f\"{prefix}{input_.name}\"):\n model_kwargs[input_.name] = getattr(self, f\"{prefix}{input_.name}\")\n\n return component.set(**model_kwargs)\n return component\n\n def delete_fields(self, build_config: dotdict, fields: dict | list[str]) -> None:\n \"\"\"Delete specified fields from build_config.\"\"\"\n for field in fields:\n build_config.pop(field, None)\n\n def update_input_types(self, build_config: dotdict) -> dotdict:\n \"\"\"Update input types for all fields in build_config.\"\"\"\n for key, value in build_config.items():\n if isinstance(value, dict):\n if value.get(\"input_types\") is None:\n build_config[key][\"input_types\"] = []\n elif hasattr(value, \"input_types\") and value.input_types is None:\n value.input_types = []\n return build_config\n\n async def update_build_config(\n self, build_config: dotdict, field_value: str, field_name: str | None = None\n ) -> dotdict:\n # Iterate over all providers in the MODEL_PROVIDERS_DICT\n # Existing logic for updating build_config\n if field_name in (\"agent_llm\",):\n build_config[\"agent_llm\"][\"value\"] = field_value\n provider_info = MODEL_PROVIDERS_DICT.get(field_value)\n if provider_info:\n component_class = provider_info.get(\"component_class\")\n if component_class and hasattr(component_class, \"update_build_config\"):\n # Call the component class's update_build_config method\n build_config = await update_component_build_config(\n component_class, build_config, field_value, \"model_name\"\n )\n\n provider_configs: dict[str, tuple[dict, list[dict]]] = {\n provider: (\n MODEL_PROVIDERS_DICT[provider][\"fields\"],\n [\n MODEL_PROVIDERS_DICT[other_provider][\"fields\"]\n for other_provider in MODEL_PROVIDERS_DICT\n if other_provider != provider\n ],\n )\n for provider in MODEL_PROVIDERS_DICT\n }\n if field_value in provider_configs:\n fields_to_add, fields_to_delete = provider_configs[field_value]\n\n # Delete fields from other providers\n for fields in fields_to_delete:\n self.delete_fields(build_config, fields)\n\n # Add provider-specific fields\n if field_value == \"OpenAI\" and not any(field in build_config for field in fields_to_add):\n build_config.update(fields_to_add)\n else:\n build_config.update(fields_to_add)\n # Reset input types for agent_llm\n build_config[\"agent_llm\"][\"input_types\"] = []\n build_config[\"agent_llm\"][\"display_name\"] = \"Model Provider\"\n elif field_value == \"Custom\":\n # Delete all provider fields\n self.delete_fields(build_config, ALL_PROVIDER_FIELDS)\n # Update with custom component\n custom_component = DropdownInput(\n name=\"agent_llm\",\n display_name=\"Language Model\",\n options=[*sorted(MODEL_PROVIDERS), \"Custom\"],\n value=\"Custom\",\n real_time_refresh=True,\n input_types=[\"LanguageModel\"],\n options_metadata=[MODELS_METADATA[key] for key in sorted(MODELS_METADATA.keys())]\n + [{\"icon\": \"brain\"}],\n )\n build_config.update({\"agent_llm\": custom_component.to_dict()})\n # Update input types for all fields\n build_config = self.update_input_types(build_config)\n\n # Validate required keys\n default_keys = [\n \"code\",\n \"_type\",\n \"agent_llm\",\n \"tools\",\n \"input_value\",\n \"add_current_date_tool\",\n \"system_prompt\",\n \"agent_description\",\n \"max_iterations\",\n \"handle_parsing_errors\",\n \"verbose\",\n ]\n missing_keys = [key for key in default_keys if key not in build_config]\n if missing_keys:\n msg = f\"Missing required keys in build_config: {missing_keys}\"\n raise ValueError(msg)\n if (\n isinstance(self.agent_llm, str)\n and self.agent_llm in MODEL_PROVIDERS_DICT\n and field_name in MODEL_DYNAMIC_UPDATE_FIELDS\n ):\n provider_info = MODEL_PROVIDERS_DICT.get(self.agent_llm)\n if provider_info:\n component_class = provider_info.get(\"component_class\")\n component_class = self.set_component_params(component_class)\n prefix = provider_info.get(\"prefix\")\n if component_class and hasattr(component_class, \"update_build_config\"):\n # Call each component class's update_build_config method\n # remove the prefix from the field_name\n if isinstance(field_name, str) and isinstance(prefix, str):\n field_name = field_name.replace(prefix, \"\")\n build_config = await update_component_build_config(\n component_class, build_config, field_value, \"model_name\"\n )\n return dotdict({k: v.to_dict() if hasattr(v, \"to_dict\") else v for k, v in build_config.items()})\n\n async def _get_tools(self) -> list[Tool]:\n component_toolkit = _get_component_toolkit()\n tools_names = self._build_tools_names()\n agent_description = self.get_tool_description()\n # TODO: Agent Description Depreciated Feature to be removed\n description = f\"{agent_description}{tools_names}\"\n tools = component_toolkit(component=self).get_tools(\n tool_name=\"Call_Agent\", tool_description=description, callbacks=self.get_langchain_callbacks()\n )\n if hasattr(self, \"tools_metadata\"):\n tools = component_toolkit(component=self, metadata=self.tools_metadata).update_tools_metadata(tools=tools)\n return tools\n"
},
"handle_parsing_errors": {
"_input_type": "BoolInput",
diff --git a/src/backend/base/langflow/initial_setup/starter_projects/Simple Agent.json b/src/backend/base/langflow/initial_setup/starter_projects/Simple Agent.json
index a3f16926e..754ab958b 100644
--- a/src/backend/base/langflow/initial_setup/starter_projects/Simple Agent.json
+++ b/src/backend/base/langflow/initial_setup/starter_projects/Simple Agent.json
@@ -1168,7 +1168,7 @@
"show": true,
"title_case": false,
"type": "code",
- "value": "import json\nimport re\n\nfrom langchain_core.tools import StructuredTool\nfrom pydantic import ValidationError\n\nfrom langflow.base.agents.agent import LCToolsAgentComponent\nfrom langflow.base.agents.events import ExceptionWithMessageError\nfrom langflow.base.models.model_input_constants import (\n ALL_PROVIDER_FIELDS,\n MODEL_DYNAMIC_UPDATE_FIELDS,\n MODEL_PROVIDERS,\n MODEL_PROVIDERS_DICT,\n MODELS_METADATA,\n)\nfrom langflow.base.models.model_utils import get_model_name\nfrom langflow.components.helpers.current_date import CurrentDateComponent\nfrom langflow.components.helpers.memory import MemoryComponent\nfrom langflow.components.langchain_utilities.tool_calling import ToolCallingAgentComponent\nfrom langflow.custom.custom_component.component import _get_component_toolkit\nfrom langflow.custom.utils import update_component_build_config\nfrom langflow.field_typing import Tool\nfrom langflow.helpers.base_model import build_model_from_schema\nfrom langflow.io import BoolInput, DropdownInput, IntInput, MultilineInput, Output, TableInput\nfrom langflow.logging import logger\nfrom langflow.schema.data import Data\nfrom langflow.schema.dotdict import dotdict\nfrom langflow.schema.message import Message\nfrom langflow.schema.table import EditMode\n\n\ndef set_advanced_true(component_input):\n component_input.advanced = True\n return component_input\n\n\nMODEL_PROVIDERS_LIST = [\"Anthropic\", \"Google Generative AI\", \"Groq\", \"OpenAI\"]\n\n\nclass AgentComponent(ToolCallingAgentComponent):\n display_name: str = \"Agent\"\n description: str = \"Define the agent's instructions, then enter a task to complete using tools.\"\n documentation: str = \"https://docs.langflow.org/agents\"\n icon = \"bot\"\n beta = False\n name = \"Agent\"\n\n memory_inputs = [set_advanced_true(component_input) for component_input in MemoryComponent().inputs]\n\n # Filter out json_mode from OpenAI inputs since we handle structured output differently\n openai_inputs_filtered = [\n input_field\n for input_field in MODEL_PROVIDERS_DICT[\"OpenAI\"][\"inputs\"]\n if not (hasattr(input_field, \"name\") and input_field.name == \"json_mode\")\n ]\n\n inputs = [\n DropdownInput(\n name=\"agent_llm\",\n display_name=\"Model Provider\",\n info=\"The provider of the language model that the agent will use to generate responses.\",\n options=[*MODEL_PROVIDERS_LIST, \"Custom\"],\n value=\"OpenAI\",\n real_time_refresh=True,\n input_types=[],\n options_metadata=[MODELS_METADATA[key] for key in MODEL_PROVIDERS_LIST] + [{\"icon\": \"brain\"}],\n ),\n *openai_inputs_filtered,\n MultilineInput(\n name=\"system_prompt\",\n display_name=\"Agent Instructions\",\n info=\"System Prompt: Initial instructions and context provided to guide the agent's behavior.\",\n value=\"You are a helpful assistant that can use tools to answer questions and perform tasks.\",\n advanced=False,\n ),\n IntInput(\n name=\"n_messages\",\n display_name=\"Number of Chat History Messages\",\n value=100,\n info=\"Number of chat history messages to retrieve.\",\n advanced=True,\n show=True,\n ),\n MultilineInput(\n name=\"format_instructions\",\n display_name=\"Output Format Instructions\",\n info=\"Generic Template for structured output formatting. Valid only with Structured response.\",\n value=(\n \"You are an AI that extracts structured JSON objects from unstructured text. \"\n \"Use a predefined schema with expected types (str, int, float, bool, dict). \"\n \"Extract ALL relevant instances that match the schema - if multiple patterns exist, capture them all. \"\n \"Fill missing or ambiguous values with defaults: null for missing values. \"\n \"Remove exact duplicates but keep variations that have different field values. \"\n \"Always return valid JSON in the expected format, never throw errors. \"\n \"If multiple objects can be extracted, return them all in the structured format.\"\n ),\n advanced=True,\n ),\n TableInput(\n name=\"output_schema\",\n display_name=\"Output Schema\",\n info=(\n \"Schema Validation: Define the structure and data types for structured output. \"\n \"No validation if no output schema.\"\n ),\n advanced=True,\n required=False,\n value=[],\n table_schema=[\n {\n \"name\": \"name\",\n \"display_name\": \"Name\",\n \"type\": \"str\",\n \"description\": \"Specify the name of the output field.\",\n \"default\": \"field\",\n \"edit_mode\": EditMode.INLINE,\n },\n {\n \"name\": \"description\",\n \"display_name\": \"Description\",\n \"type\": \"str\",\n \"description\": \"Describe the purpose of the output field.\",\n \"default\": \"description of field\",\n \"edit_mode\": EditMode.POPOVER,\n },\n {\n \"name\": \"type\",\n \"display_name\": \"Type\",\n \"type\": \"str\",\n \"edit_mode\": EditMode.INLINE,\n \"description\": (\"Indicate the data type of the output field (e.g., str, int, float, bool, dict).\"),\n \"options\": [\"str\", \"int\", \"float\", \"bool\", \"dict\"],\n \"default\": \"str\",\n },\n {\n \"name\": \"multiple\",\n \"display_name\": \"As List\",\n \"type\": \"boolean\",\n \"description\": \"Set to True if this output field should be a list of the specified type.\",\n \"default\": \"False\",\n \"edit_mode\": EditMode.INLINE,\n },\n ],\n ),\n *LCToolsAgentComponent._base_inputs,\n # removed memory inputs from agent component\n # *memory_inputs,\n BoolInput(\n name=\"add_current_date_tool\",\n display_name=\"Current Date\",\n advanced=True,\n info=\"If true, will add a tool to the agent that returns the current date.\",\n value=True,\n ),\n ]\n outputs = [\n Output(name=\"response\", display_name=\"Response\", method=\"message_response\"),\n Output(name=\"structured_response\", display_name=\"Structured Response\", method=\"json_response\", tool_mode=False),\n ]\n\n async def get_agent_requirements(self):\n \"\"\"Get the agent requirements for the agent.\"\"\"\n llm_model, display_name = await self.get_llm()\n if llm_model is None:\n msg = \"No language model selected. Please choose a model to proceed.\"\n raise ValueError(msg)\n self.model_name = get_model_name(llm_model, display_name=display_name)\n\n # Get memory data\n self.chat_history = await self.get_memory_data()\n if isinstance(self.chat_history, Message):\n self.chat_history = [self.chat_history]\n\n # Add current date tool if enabled\n if self.add_current_date_tool:\n if not isinstance(self.tools, list): # type: ignore[has-type]\n self.tools = []\n current_date_tool = (await CurrentDateComponent(**self.get_base_args()).to_toolkit()).pop(0)\n if not isinstance(current_date_tool, StructuredTool):\n msg = \"CurrentDateComponent must be converted to a StructuredTool\"\n raise TypeError(msg)\n self.tools.append(current_date_tool)\n return llm_model, self.chat_history, self.tools\n\n async def message_response(self) -> Message:\n try:\n llm_model, self.chat_history, self.tools = await self.get_agent_requirements()\n # Set up and run agent\n self.set(\n llm=llm_model,\n tools=self.tools or [],\n chat_history=self.chat_history,\n input_value=self.input_value,\n system_prompt=self.system_prompt,\n )\n agent = self.create_agent_runnable()\n result = await self.run_agent(agent)\n\n # Store result for potential JSON output\n self._agent_result = result\n\n except (ValueError, TypeError, KeyError) as e:\n await logger.aerror(f\"{type(e).__name__}: {e!s}\")\n raise\n except ExceptionWithMessageError as e:\n await logger.aerror(f\"ExceptionWithMessageError occurred: {e}\")\n raise\n # Avoid catching blind Exception; let truly unexpected exceptions propagate\n except Exception as e:\n await logger.aerror(f\"Unexpected error: {e!s}\")\n raise\n else:\n return result\n\n def _preprocess_schema(self, schema):\n \"\"\"Preprocess schema to ensure correct data types for build_model_from_schema.\"\"\"\n processed_schema = []\n for field in schema:\n processed_field = {\n \"name\": str(field.get(\"name\", \"field\")),\n \"type\": str(field.get(\"type\", \"str\")),\n \"description\": str(field.get(\"description\", \"\")),\n \"multiple\": field.get(\"multiple\", False),\n }\n # Ensure multiple is handled correctly\n if isinstance(processed_field[\"multiple\"], str):\n processed_field[\"multiple\"] = processed_field[\"multiple\"].lower() in [\"true\", \"1\", \"t\", \"y\", \"yes\"]\n processed_schema.append(processed_field)\n return processed_schema\n\n async def build_structured_output_base(self, content: str):\n \"\"\"Build structured output with optional BaseModel validation.\"\"\"\n json_pattern = r\"\\{.*\\}\"\n schema_error_msg = \"Try setting an output schema\"\n\n # Try to parse content as JSON first\n json_data = None\n try:\n json_data = json.loads(content)\n except json.JSONDecodeError:\n json_match = re.search(json_pattern, content, re.DOTALL)\n if json_match:\n try:\n json_data = json.loads(json_match.group())\n except json.JSONDecodeError:\n return {\"content\": content, \"error\": schema_error_msg}\n else:\n return {\"content\": content, \"error\": schema_error_msg}\n\n # If no output schema provided, return parsed JSON without validation\n if not hasattr(self, \"output_schema\") or not self.output_schema or len(self.output_schema) == 0:\n return json_data\n\n # Use BaseModel validation with schema\n try:\n processed_schema = self._preprocess_schema(self.output_schema)\n output_model = build_model_from_schema(processed_schema)\n\n # Validate against the schema\n if isinstance(json_data, list):\n # Multiple objects\n validated_objects = []\n for item in json_data:\n try:\n validated_obj = output_model.model_validate(item)\n validated_objects.append(validated_obj.model_dump())\n except ValidationError as e:\n await logger.aerror(f\"Validation error for item: {e}\")\n # Include invalid items with error info\n validated_objects.append({\"data\": item, \"validation_error\": str(e)})\n return validated_objects\n\n # Single object\n try:\n validated_obj = output_model.model_validate(json_data)\n return [validated_obj.model_dump()] # Return as list for consistency\n except ValidationError as e:\n await logger.aerror(f\"Validation error: {e}\")\n return [{\"data\": json_data, \"validation_error\": str(e)}]\n\n except (TypeError, ValueError) as e:\n await logger.aerror(f\"Error building structured output: {e}\")\n # Fallback to parsed JSON without validation\n return json_data\n\n async def json_response(self) -> Data:\n \"\"\"Convert agent response to structured JSON Data output with schema validation.\"\"\"\n # Always use structured chat agent for JSON response mode for better JSON formatting\n try:\n system_components = []\n\n # 1. Agent Instructions (system_prompt)\n agent_instructions = getattr(self, \"system_prompt\", \"\") or \"\"\n if agent_instructions:\n system_components.append(f\"{agent_instructions}\")\n\n # 2. Format Instructions\n format_instructions = getattr(self, \"format_instructions\", \"\") or \"\"\n if format_instructions:\n system_components.append(f\"Format instructions: {format_instructions}\")\n\n # 3. Schema Information from BaseModel\n if hasattr(self, \"output_schema\") and self.output_schema and len(self.output_schema) > 0:\n try:\n processed_schema = self._preprocess_schema(self.output_schema)\n output_model = build_model_from_schema(processed_schema)\n schema_dict = output_model.model_json_schema()\n schema_info = (\n \"You are given some text that may include format instructions, \"\n \"explanations, or other content alongside a JSON schema.\\n\\n\"\n \"Your task:\\n\"\n \"- Extract only the JSON schema.\\n\"\n \"- Return it as valid JSON.\\n\"\n \"- Do not include format instructions, explanations, or extra text.\\n\\n\"\n \"Input:\\n\"\n f\"{json.dumps(schema_dict, indent=2)}\\n\\n\"\n \"Output (only JSON schema):\"\n )\n system_components.append(schema_info)\n except (ValidationError, ValueError, TypeError, KeyError) as e:\n await logger.aerror(f\"Could not build schema for prompt: {e}\", exc_info=True)\n\n # Combine all components\n combined_instructions = \"\\n\\n\".join(system_components) if system_components else \"\"\n llm_model, self.chat_history, self.tools = await self.get_agent_requirements()\n self.set(\n llm=llm_model,\n tools=self.tools or [],\n chat_history=self.chat_history,\n input_value=self.input_value,\n system_prompt=combined_instructions,\n )\n\n # Create and run structured chat agent\n try:\n structured_agent = self.create_agent_runnable()\n except (NotImplementedError, ValueError, TypeError) as e:\n await logger.aerror(f\"Error with structured chat agent: {e}\")\n raise\n try:\n result = await self.run_agent(structured_agent)\n except (ExceptionWithMessageError, ValueError, TypeError, RuntimeError) as e:\n await logger.aerror(f\"Error with structured agent result: {e}\")\n raise\n # Extract content from structured agent result\n if hasattr(result, \"content\"):\n content = result.content\n elif hasattr(result, \"text\"):\n content = result.text\n else:\n content = str(result)\n\n except (ExceptionWithMessageError, ValueError, TypeError, NotImplementedError, AttributeError) as e:\n await logger.aerror(f\"Error with structured chat agent: {e}\")\n # Fallback to regular agent\n content_str = \"No content returned from agent\"\n return Data(data={\"content\": content_str, \"error\": str(e)})\n\n # Process with structured output validation\n try:\n structured_output = await self.build_structured_output_base(content)\n\n # Handle different output formats\n if isinstance(structured_output, list) and structured_output:\n if len(structured_output) == 1:\n return Data(data=structured_output[0])\n return Data(data={\"results\": structured_output})\n if isinstance(structured_output, dict):\n return Data(data=structured_output)\n return Data(data={\"content\": content})\n\n except (ValueError, TypeError) as e:\n await logger.aerror(f\"Error in structured output processing: {e}\")\n return Data(data={\"content\": content, \"error\": str(e)})\n\n async def get_memory_data(self):\n # TODO: This is a temporary fix to avoid message duplication. We should develop a function for this.\n messages = (\n await MemoryComponent(**self.get_base_args())\n .set(session_id=self.graph.session_id, order=\"Ascending\", n_messages=self.n_messages)\n .retrieve_messages()\n )\n return [\n message for message in messages if getattr(message, \"id\", None) != getattr(self.input_value, \"id\", None)\n ]\n\n async def get_llm(self):\n if not isinstance(self.agent_llm, str):\n return self.agent_llm, None\n\n try:\n provider_info = MODEL_PROVIDERS_DICT.get(self.agent_llm)\n if not provider_info:\n msg = f\"Invalid model provider: {self.agent_llm}\"\n raise ValueError(msg)\n\n component_class = provider_info.get(\"component_class\")\n display_name = component_class.display_name\n inputs = provider_info.get(\"inputs\")\n prefix = provider_info.get(\"prefix\", \"\")\n\n return self._build_llm_model(component_class, inputs, prefix), display_name\n\n except (AttributeError, ValueError, TypeError, RuntimeError) as e:\n await logger.aerror(f\"Error building {self.agent_llm} language model: {e!s}\")\n msg = f\"Failed to initialize language model: {e!s}\"\n raise ValueError(msg) from e\n\n def _build_llm_model(self, component, inputs, prefix=\"\"):\n model_kwargs = {}\n for input_ in inputs:\n if hasattr(self, f\"{prefix}{input_.name}\"):\n model_kwargs[input_.name] = getattr(self, f\"{prefix}{input_.name}\")\n return component.set(**model_kwargs).build_model()\n\n def set_component_params(self, component):\n provider_info = MODEL_PROVIDERS_DICT.get(self.agent_llm)\n if provider_info:\n inputs = provider_info.get(\"inputs\")\n prefix = provider_info.get(\"prefix\")\n # Filter out json_mode and only use attributes that exist on this component\n model_kwargs = {}\n for input_ in inputs:\n if hasattr(self, f\"{prefix}{input_.name}\"):\n model_kwargs[input_.name] = getattr(self, f\"{prefix}{input_.name}\")\n\n return component.set(**model_kwargs)\n return component\n\n def delete_fields(self, build_config: dotdict, fields: dict | list[str]) -> None:\n \"\"\"Delete specified fields from build_config.\"\"\"\n for field in fields:\n build_config.pop(field, None)\n\n def update_input_types(self, build_config: dotdict) -> dotdict:\n \"\"\"Update input types for all fields in build_config.\"\"\"\n for key, value in build_config.items():\n if isinstance(value, dict):\n if value.get(\"input_types\") is None:\n build_config[key][\"input_types\"] = []\n elif hasattr(value, \"input_types\") and value.input_types is None:\n value.input_types = []\n return build_config\n\n async def update_build_config(\n self, build_config: dotdict, field_value: str, field_name: str | None = None\n ) -> dotdict:\n # Iterate over all providers in the MODEL_PROVIDERS_DICT\n # Existing logic for updating build_config\n if field_name in (\"agent_llm\",):\n build_config[\"agent_llm\"][\"value\"] = field_value\n provider_info = MODEL_PROVIDERS_DICT.get(field_value)\n if provider_info:\n component_class = provider_info.get(\"component_class\")\n if component_class and hasattr(component_class, \"update_build_config\"):\n # Call the component class's update_build_config method\n build_config = await update_component_build_config(\n component_class, build_config, field_value, \"model_name\"\n )\n\n provider_configs: dict[str, tuple[dict, list[dict]]] = {\n provider: (\n MODEL_PROVIDERS_DICT[provider][\"fields\"],\n [\n MODEL_PROVIDERS_DICT[other_provider][\"fields\"]\n for other_provider in MODEL_PROVIDERS_DICT\n if other_provider != provider\n ],\n )\n for provider in MODEL_PROVIDERS_DICT\n }\n if field_value in provider_configs:\n fields_to_add, fields_to_delete = provider_configs[field_value]\n\n # Delete fields from other providers\n for fields in fields_to_delete:\n self.delete_fields(build_config, fields)\n\n # Add provider-specific fields\n if field_value == \"OpenAI\" and not any(field in build_config for field in fields_to_add):\n build_config.update(fields_to_add)\n else:\n build_config.update(fields_to_add)\n # Reset input types for agent_llm\n build_config[\"agent_llm\"][\"input_types\"] = []\n elif field_value == \"Custom\":\n # Delete all provider fields\n self.delete_fields(build_config, ALL_PROVIDER_FIELDS)\n # Update with custom component\n custom_component = DropdownInput(\n name=\"agent_llm\",\n display_name=\"Language Model\",\n options=[*sorted(MODEL_PROVIDERS), \"Custom\"],\n value=\"Custom\",\n real_time_refresh=True,\n input_types=[\"LanguageModel\"],\n options_metadata=[MODELS_METADATA[key] for key in sorted(MODELS_METADATA.keys())]\n + [{\"icon\": \"brain\"}],\n )\n build_config.update({\"agent_llm\": custom_component.to_dict()})\n # Update input types for all fields\n build_config = self.update_input_types(build_config)\n\n # Validate required keys\n default_keys = [\n \"code\",\n \"_type\",\n \"agent_llm\",\n \"tools\",\n \"input_value\",\n \"add_current_date_tool\",\n \"system_prompt\",\n \"agent_description\",\n \"max_iterations\",\n \"handle_parsing_errors\",\n \"verbose\",\n ]\n missing_keys = [key for key in default_keys if key not in build_config]\n if missing_keys:\n msg = f\"Missing required keys in build_config: {missing_keys}\"\n raise ValueError(msg)\n if (\n isinstance(self.agent_llm, str)\n and self.agent_llm in MODEL_PROVIDERS_DICT\n and field_name in MODEL_DYNAMIC_UPDATE_FIELDS\n ):\n provider_info = MODEL_PROVIDERS_DICT.get(self.agent_llm)\n if provider_info:\n component_class = provider_info.get(\"component_class\")\n component_class = self.set_component_params(component_class)\n prefix = provider_info.get(\"prefix\")\n if component_class and hasattr(component_class, \"update_build_config\"):\n # Call each component class's update_build_config method\n # remove the prefix from the field_name\n if isinstance(field_name, str) and isinstance(prefix, str):\n field_name = field_name.replace(prefix, \"\")\n build_config = await update_component_build_config(\n component_class, build_config, field_value, \"model_name\"\n )\n return dotdict({k: v.to_dict() if hasattr(v, \"to_dict\") else v for k, v in build_config.items()})\n\n async def _get_tools(self) -> list[Tool]:\n component_toolkit = _get_component_toolkit()\n tools_names = self._build_tools_names()\n agent_description = self.get_tool_description()\n # TODO: Agent Description Depreciated Feature to be removed\n description = f\"{agent_description}{tools_names}\"\n tools = component_toolkit(component=self).get_tools(\n tool_name=\"Call_Agent\", tool_description=description, callbacks=self.get_langchain_callbacks()\n )\n if hasattr(self, \"tools_metadata\"):\n tools = component_toolkit(component=self, metadata=self.tools_metadata).update_tools_metadata(tools=tools)\n return tools\n"
+ "value": "import json\nimport re\n\nfrom langchain_core.tools import StructuredTool\nfrom pydantic import ValidationError\n\nfrom langflow.base.agents.agent import LCToolsAgentComponent\nfrom langflow.base.agents.events import ExceptionWithMessageError\nfrom langflow.base.models.model_input_constants import (\n ALL_PROVIDER_FIELDS,\n MODEL_DYNAMIC_UPDATE_FIELDS,\n MODEL_PROVIDERS,\n MODEL_PROVIDERS_DICT,\n MODELS_METADATA,\n)\nfrom langflow.base.models.model_utils import get_model_name\nfrom langflow.components.helpers.current_date import CurrentDateComponent\nfrom langflow.components.helpers.memory import MemoryComponent\nfrom langflow.components.langchain_utilities.tool_calling import ToolCallingAgentComponent\nfrom langflow.custom.custom_component.component import _get_component_toolkit\nfrom langflow.custom.utils import update_component_build_config\nfrom langflow.field_typing import Tool\nfrom langflow.helpers.base_model import build_model_from_schema\nfrom langflow.io import BoolInput, DropdownInput, IntInput, MultilineInput, Output, TableInput\nfrom langflow.logging import logger\nfrom langflow.schema.data import Data\nfrom langflow.schema.dotdict import dotdict\nfrom langflow.schema.message import Message\nfrom langflow.schema.table import EditMode\n\n\ndef set_advanced_true(component_input):\n component_input.advanced = True\n return component_input\n\n\nMODEL_PROVIDERS_LIST = [\"Anthropic\", \"Google Generative AI\", \"Groq\", \"OpenAI\"]\n\n\nclass AgentComponent(ToolCallingAgentComponent):\n display_name: str = \"Agent\"\n description: str = \"Define the agent's instructions, then enter a task to complete using tools.\"\n documentation: str = \"https://docs.langflow.org/agents\"\n icon = \"bot\"\n beta = False\n name = \"Agent\"\n\n memory_inputs = [set_advanced_true(component_input) for component_input in MemoryComponent().inputs]\n\n # Filter out json_mode from OpenAI inputs since we handle structured output differently\n openai_inputs_filtered = [\n input_field\n for input_field in MODEL_PROVIDERS_DICT[\"OpenAI\"][\"inputs\"]\n if not (hasattr(input_field, \"name\") and input_field.name == \"json_mode\")\n ]\n\n inputs = [\n DropdownInput(\n name=\"agent_llm\",\n display_name=\"Model Provider\",\n info=\"The provider of the language model that the agent will use to generate responses.\",\n options=[*MODEL_PROVIDERS_LIST, \"Custom\"],\n value=\"OpenAI\",\n real_time_refresh=True,\n input_types=[],\n options_metadata=[MODELS_METADATA[key] for key in MODEL_PROVIDERS_LIST] + [{\"icon\": \"brain\"}],\n ),\n *openai_inputs_filtered,\n MultilineInput(\n name=\"system_prompt\",\n display_name=\"Agent Instructions\",\n info=\"System Prompt: Initial instructions and context provided to guide the agent's behavior.\",\n value=\"You are a helpful assistant that can use tools to answer questions and perform tasks.\",\n advanced=False,\n ),\n IntInput(\n name=\"n_messages\",\n display_name=\"Number of Chat History Messages\",\n value=100,\n info=\"Number of chat history messages to retrieve.\",\n advanced=True,\n show=True,\n ),\n MultilineInput(\n name=\"format_instructions\",\n display_name=\"Output Format Instructions\",\n info=\"Generic Template for structured output formatting. Valid only with Structured response.\",\n value=(\n \"You are an AI that extracts structured JSON objects from unstructured text. \"\n \"Use a predefined schema with expected types (str, int, float, bool, dict). \"\n \"Extract ALL relevant instances that match the schema - if multiple patterns exist, capture them all. \"\n \"Fill missing or ambiguous values with defaults: null for missing values. \"\n \"Remove exact duplicates but keep variations that have different field values. \"\n \"Always return valid JSON in the expected format, never throw errors. \"\n \"If multiple objects can be extracted, return them all in the structured format.\"\n ),\n advanced=True,\n ),\n TableInput(\n name=\"output_schema\",\n display_name=\"Output Schema\",\n info=(\n \"Schema Validation: Define the structure and data types for structured output. \"\n \"No validation if no output schema.\"\n ),\n advanced=True,\n required=False,\n value=[],\n table_schema=[\n {\n \"name\": \"name\",\n \"display_name\": \"Name\",\n \"type\": \"str\",\n \"description\": \"Specify the name of the output field.\",\n \"default\": \"field\",\n \"edit_mode\": EditMode.INLINE,\n },\n {\n \"name\": \"description\",\n \"display_name\": \"Description\",\n \"type\": \"str\",\n \"description\": \"Describe the purpose of the output field.\",\n \"default\": \"description of field\",\n \"edit_mode\": EditMode.POPOVER,\n },\n {\n \"name\": \"type\",\n \"display_name\": \"Type\",\n \"type\": \"str\",\n \"edit_mode\": EditMode.INLINE,\n \"description\": (\"Indicate the data type of the output field (e.g., str, int, float, bool, dict).\"),\n \"options\": [\"str\", \"int\", \"float\", \"bool\", \"dict\"],\n \"default\": \"str\",\n },\n {\n \"name\": \"multiple\",\n \"display_name\": \"As List\",\n \"type\": \"boolean\",\n \"description\": \"Set to True if this output field should be a list of the specified type.\",\n \"default\": \"False\",\n \"edit_mode\": EditMode.INLINE,\n },\n ],\n ),\n *LCToolsAgentComponent._base_inputs,\n # removed memory inputs from agent component\n # *memory_inputs,\n BoolInput(\n name=\"add_current_date_tool\",\n display_name=\"Current Date\",\n advanced=True,\n info=\"If true, will add a tool to the agent that returns the current date.\",\n value=True,\n ),\n ]\n outputs = [\n Output(name=\"response\", display_name=\"Response\", method=\"message_response\"),\n Output(name=\"structured_response\", display_name=\"Structured Response\", method=\"json_response\", tool_mode=False),\n ]\n\n async def get_agent_requirements(self):\n \"\"\"Get the agent requirements for the agent.\"\"\"\n llm_model, display_name = await self.get_llm()\n if llm_model is None:\n msg = \"No language model selected. Please choose a model to proceed.\"\n raise ValueError(msg)\n self.model_name = get_model_name(llm_model, display_name=display_name)\n\n # Get memory data\n self.chat_history = await self.get_memory_data()\n if isinstance(self.chat_history, Message):\n self.chat_history = [self.chat_history]\n\n # Add current date tool if enabled\n if self.add_current_date_tool:\n if not isinstance(self.tools, list): # type: ignore[has-type]\n self.tools = []\n current_date_tool = (await CurrentDateComponent(**self.get_base_args()).to_toolkit()).pop(0)\n if not isinstance(current_date_tool, StructuredTool):\n msg = \"CurrentDateComponent must be converted to a StructuredTool\"\n raise TypeError(msg)\n self.tools.append(current_date_tool)\n return llm_model, self.chat_history, self.tools\n\n async def message_response(self) -> Message:\n try:\n llm_model, self.chat_history, self.tools = await self.get_agent_requirements()\n # Set up and run agent\n self.set(\n llm=llm_model,\n tools=self.tools or [],\n chat_history=self.chat_history,\n input_value=self.input_value,\n system_prompt=self.system_prompt,\n )\n agent = self.create_agent_runnable()\n result = await self.run_agent(agent)\n\n # Store result for potential JSON output\n self._agent_result = result\n\n except (ValueError, TypeError, KeyError) as e:\n await logger.aerror(f\"{type(e).__name__}: {e!s}\")\n raise\n except ExceptionWithMessageError as e:\n await logger.aerror(f\"ExceptionWithMessageError occurred: {e}\")\n raise\n # Avoid catching blind Exception; let truly unexpected exceptions propagate\n except Exception as e:\n await logger.aerror(f\"Unexpected error: {e!s}\")\n raise\n else:\n return result\n\n def _preprocess_schema(self, schema):\n \"\"\"Preprocess schema to ensure correct data types for build_model_from_schema.\"\"\"\n processed_schema = []\n for field in schema:\n processed_field = {\n \"name\": str(field.get(\"name\", \"field\")),\n \"type\": str(field.get(\"type\", \"str\")),\n \"description\": str(field.get(\"description\", \"\")),\n \"multiple\": field.get(\"multiple\", False),\n }\n # Ensure multiple is handled correctly\n if isinstance(processed_field[\"multiple\"], str):\n processed_field[\"multiple\"] = processed_field[\"multiple\"].lower() in [\"true\", \"1\", \"t\", \"y\", \"yes\"]\n processed_schema.append(processed_field)\n return processed_schema\n\n async def build_structured_output_base(self, content: str):\n \"\"\"Build structured output with optional BaseModel validation.\"\"\"\n json_pattern = r\"\\{.*\\}\"\n schema_error_msg = \"Try setting an output schema\"\n\n # Try to parse content as JSON first\n json_data = None\n try:\n json_data = json.loads(content)\n except json.JSONDecodeError:\n json_match = re.search(json_pattern, content, re.DOTALL)\n if json_match:\n try:\n json_data = json.loads(json_match.group())\n except json.JSONDecodeError:\n return {\"content\": content, \"error\": schema_error_msg}\n else:\n return {\"content\": content, \"error\": schema_error_msg}\n\n # If no output schema provided, return parsed JSON without validation\n if not hasattr(self, \"output_schema\") or not self.output_schema or len(self.output_schema) == 0:\n return json_data\n\n # Use BaseModel validation with schema\n try:\n processed_schema = self._preprocess_schema(self.output_schema)\n output_model = build_model_from_schema(processed_schema)\n\n # Validate against the schema\n if isinstance(json_data, list):\n # Multiple objects\n validated_objects = []\n for item in json_data:\n try:\n validated_obj = output_model.model_validate(item)\n validated_objects.append(validated_obj.model_dump())\n except ValidationError as e:\n await logger.aerror(f\"Validation error for item: {e}\")\n # Include invalid items with error info\n validated_objects.append({\"data\": item, \"validation_error\": str(e)})\n return validated_objects\n\n # Single object\n try:\n validated_obj = output_model.model_validate(json_data)\n return [validated_obj.model_dump()] # Return as list for consistency\n except ValidationError as e:\n await logger.aerror(f\"Validation error: {e}\")\n return [{\"data\": json_data, \"validation_error\": str(e)}]\n\n except (TypeError, ValueError) as e:\n await logger.aerror(f\"Error building structured output: {e}\")\n # Fallback to parsed JSON without validation\n return json_data\n\n async def json_response(self) -> Data:\n \"\"\"Convert agent response to structured JSON Data output with schema validation.\"\"\"\n # Always use structured chat agent for JSON response mode for better JSON formatting\n try:\n system_components = []\n\n # 1. Agent Instructions (system_prompt)\n agent_instructions = getattr(self, \"system_prompt\", \"\") or \"\"\n if agent_instructions:\n system_components.append(f\"{agent_instructions}\")\n\n # 2. Format Instructions\n format_instructions = getattr(self, \"format_instructions\", \"\") or \"\"\n if format_instructions:\n system_components.append(f\"Format instructions: {format_instructions}\")\n\n # 3. Schema Information from BaseModel\n if hasattr(self, \"output_schema\") and self.output_schema and len(self.output_schema) > 0:\n try:\n processed_schema = self._preprocess_schema(self.output_schema)\n output_model = build_model_from_schema(processed_schema)\n schema_dict = output_model.model_json_schema()\n schema_info = (\n \"You are given some text that may include format instructions, \"\n \"explanations, or other content alongside a JSON schema.\\n\\n\"\n \"Your task:\\n\"\n \"- Extract only the JSON schema.\\n\"\n \"- Return it as valid JSON.\\n\"\n \"- Do not include format instructions, explanations, or extra text.\\n\\n\"\n \"Input:\\n\"\n f\"{json.dumps(schema_dict, indent=2)}\\n\\n\"\n \"Output (only JSON schema):\"\n )\n system_components.append(schema_info)\n except (ValidationError, ValueError, TypeError, KeyError) as e:\n await logger.aerror(f\"Could not build schema for prompt: {e}\", exc_info=True)\n\n # Combine all components\n combined_instructions = \"\\n\\n\".join(system_components) if system_components else \"\"\n llm_model, self.chat_history, self.tools = await self.get_agent_requirements()\n self.set(\n llm=llm_model,\n tools=self.tools or [],\n chat_history=self.chat_history,\n input_value=self.input_value,\n system_prompt=combined_instructions,\n )\n\n # Create and run structured chat agent\n try:\n structured_agent = self.create_agent_runnable()\n except (NotImplementedError, ValueError, TypeError) as e:\n await logger.aerror(f\"Error with structured chat agent: {e}\")\n raise\n try:\n result = await self.run_agent(structured_agent)\n except (ExceptionWithMessageError, ValueError, TypeError, RuntimeError) as e:\n await logger.aerror(f\"Error with structured agent result: {e}\")\n raise\n # Extract content from structured agent result\n if hasattr(result, \"content\"):\n content = result.content\n elif hasattr(result, \"text\"):\n content = result.text\n else:\n content = str(result)\n\n except (ExceptionWithMessageError, ValueError, TypeError, NotImplementedError, AttributeError) as e:\n await logger.aerror(f\"Error with structured chat agent: {e}\")\n # Fallback to regular agent\n content_str = \"No content returned from agent\"\n return Data(data={\"content\": content_str, \"error\": str(e)})\n\n # Process with structured output validation\n try:\n structured_output = await self.build_structured_output_base(content)\n\n # Handle different output formats\n if isinstance(structured_output, list) and structured_output:\n if len(structured_output) == 1:\n return Data(data=structured_output[0])\n return Data(data={\"results\": structured_output})\n if isinstance(structured_output, dict):\n return Data(data=structured_output)\n return Data(data={\"content\": content})\n\n except (ValueError, TypeError) as e:\n await logger.aerror(f\"Error in structured output processing: {e}\")\n return Data(data={\"content\": content, \"error\": str(e)})\n\n async def get_memory_data(self):\n # TODO: This is a temporary fix to avoid message duplication. We should develop a function for this.\n messages = (\n await MemoryComponent(**self.get_base_args())\n .set(session_id=self.graph.session_id, order=\"Ascending\", n_messages=self.n_messages)\n .retrieve_messages()\n )\n return [\n message for message in messages if getattr(message, \"id\", None) != getattr(self.input_value, \"id\", None)\n ]\n\n async def get_llm(self):\n if not isinstance(self.agent_llm, str):\n return self.agent_llm, None\n\n try:\n provider_info = MODEL_PROVIDERS_DICT.get(self.agent_llm)\n if not provider_info:\n msg = f\"Invalid model provider: {self.agent_llm}\"\n raise ValueError(msg)\n\n component_class = provider_info.get(\"component_class\")\n display_name = component_class.display_name\n inputs = provider_info.get(\"inputs\")\n prefix = provider_info.get(\"prefix\", \"\")\n\n return self._build_llm_model(component_class, inputs, prefix), display_name\n\n except (AttributeError, ValueError, TypeError, RuntimeError) as e:\n await logger.aerror(f\"Error building {self.agent_llm} language model: {e!s}\")\n msg = f\"Failed to initialize language model: {e!s}\"\n raise ValueError(msg) from e\n\n def _build_llm_model(self, component, inputs, prefix=\"\"):\n model_kwargs = {}\n for input_ in inputs:\n if hasattr(self, f\"{prefix}{input_.name}\"):\n model_kwargs[input_.name] = getattr(self, f\"{prefix}{input_.name}\")\n return component.set(**model_kwargs).build_model()\n\n def set_component_params(self, component):\n provider_info = MODEL_PROVIDERS_DICT.get(self.agent_llm)\n if provider_info:\n inputs = provider_info.get(\"inputs\")\n prefix = provider_info.get(\"prefix\")\n # Filter out json_mode and only use attributes that exist on this component\n model_kwargs = {}\n for input_ in inputs:\n if hasattr(self, f\"{prefix}{input_.name}\"):\n model_kwargs[input_.name] = getattr(self, f\"{prefix}{input_.name}\")\n\n return component.set(**model_kwargs)\n return component\n\n def delete_fields(self, build_config: dotdict, fields: dict | list[str]) -> None:\n \"\"\"Delete specified fields from build_config.\"\"\"\n for field in fields:\n build_config.pop(field, None)\n\n def update_input_types(self, build_config: dotdict) -> dotdict:\n \"\"\"Update input types for all fields in build_config.\"\"\"\n for key, value in build_config.items():\n if isinstance(value, dict):\n if value.get(\"input_types\") is None:\n build_config[key][\"input_types\"] = []\n elif hasattr(value, \"input_types\") and value.input_types is None:\n value.input_types = []\n return build_config\n\n async def update_build_config(\n self, build_config: dotdict, field_value: str, field_name: str | None = None\n ) -> dotdict:\n # Iterate over all providers in the MODEL_PROVIDERS_DICT\n # Existing logic for updating build_config\n if field_name in (\"agent_llm\",):\n build_config[\"agent_llm\"][\"value\"] = field_value\n provider_info = MODEL_PROVIDERS_DICT.get(field_value)\n if provider_info:\n component_class = provider_info.get(\"component_class\")\n if component_class and hasattr(component_class, \"update_build_config\"):\n # Call the component class's update_build_config method\n build_config = await update_component_build_config(\n component_class, build_config, field_value, \"model_name\"\n )\n\n provider_configs: dict[str, tuple[dict, list[dict]]] = {\n provider: (\n MODEL_PROVIDERS_DICT[provider][\"fields\"],\n [\n MODEL_PROVIDERS_DICT[other_provider][\"fields\"]\n for other_provider in MODEL_PROVIDERS_DICT\n if other_provider != provider\n ],\n )\n for provider in MODEL_PROVIDERS_DICT\n }\n if field_value in provider_configs:\n fields_to_add, fields_to_delete = provider_configs[field_value]\n\n # Delete fields from other providers\n for fields in fields_to_delete:\n self.delete_fields(build_config, fields)\n\n # Add provider-specific fields\n if field_value == \"OpenAI\" and not any(field in build_config for field in fields_to_add):\n build_config.update(fields_to_add)\n else:\n build_config.update(fields_to_add)\n # Reset input types for agent_llm\n build_config[\"agent_llm\"][\"input_types\"] = []\n build_config[\"agent_llm\"][\"display_name\"] = \"Model Provider\"\n elif field_value == \"Custom\":\n # Delete all provider fields\n self.delete_fields(build_config, ALL_PROVIDER_FIELDS)\n # Update with custom component\n custom_component = DropdownInput(\n name=\"agent_llm\",\n display_name=\"Language Model\",\n options=[*sorted(MODEL_PROVIDERS), \"Custom\"],\n value=\"Custom\",\n real_time_refresh=True,\n input_types=[\"LanguageModel\"],\n options_metadata=[MODELS_METADATA[key] for key in sorted(MODELS_METADATA.keys())]\n + [{\"icon\": \"brain\"}],\n )\n build_config.update({\"agent_llm\": custom_component.to_dict()})\n # Update input types for all fields\n build_config = self.update_input_types(build_config)\n\n # Validate required keys\n default_keys = [\n \"code\",\n \"_type\",\n \"agent_llm\",\n \"tools\",\n \"input_value\",\n \"add_current_date_tool\",\n \"system_prompt\",\n \"agent_description\",\n \"max_iterations\",\n \"handle_parsing_errors\",\n \"verbose\",\n ]\n missing_keys = [key for key in default_keys if key not in build_config]\n if missing_keys:\n msg = f\"Missing required keys in build_config: {missing_keys}\"\n raise ValueError(msg)\n if (\n isinstance(self.agent_llm, str)\n and self.agent_llm in MODEL_PROVIDERS_DICT\n and field_name in MODEL_DYNAMIC_UPDATE_FIELDS\n ):\n provider_info = MODEL_PROVIDERS_DICT.get(self.agent_llm)\n if provider_info:\n component_class = provider_info.get(\"component_class\")\n component_class = self.set_component_params(component_class)\n prefix = provider_info.get(\"prefix\")\n if component_class and hasattr(component_class, \"update_build_config\"):\n # Call each component class's update_build_config method\n # remove the prefix from the field_name\n if isinstance(field_name, str) and isinstance(prefix, str):\n field_name = field_name.replace(prefix, \"\")\n build_config = await update_component_build_config(\n component_class, build_config, field_value, \"model_name\"\n )\n return dotdict({k: v.to_dict() if hasattr(v, \"to_dict\") else v for k, v in build_config.items()})\n\n async def _get_tools(self) -> list[Tool]:\n component_toolkit = _get_component_toolkit()\n tools_names = self._build_tools_names()\n agent_description = self.get_tool_description()\n # TODO: Agent Description Depreciated Feature to be removed\n description = f\"{agent_description}{tools_names}\"\n tools = component_toolkit(component=self).get_tools(\n tool_name=\"Call_Agent\", tool_description=description, callbacks=self.get_langchain_callbacks()\n )\n if hasattr(self, \"tools_metadata\"):\n tools = component_toolkit(component=self, metadata=self.tools_metadata).update_tools_metadata(tools=tools)\n return tools\n"
},
"handle_parsing_errors": {
"_input_type": "BoolInput",
diff --git a/src/backend/base/langflow/initial_setup/starter_projects/Social Media Agent.json b/src/backend/base/langflow/initial_setup/starter_projects/Social Media Agent.json
index e94f1c919..f821594fe 100644
--- a/src/backend/base/langflow/initial_setup/starter_projects/Social Media Agent.json
+++ b/src/backend/base/langflow/initial_setup/starter_projects/Social Media Agent.json
@@ -1526,7 +1526,7 @@
"show": true,
"title_case": false,
"type": "code",
- "value": "import json\nimport re\n\nfrom langchain_core.tools import StructuredTool\nfrom pydantic import ValidationError\n\nfrom langflow.base.agents.agent import LCToolsAgentComponent\nfrom langflow.base.agents.events import ExceptionWithMessageError\nfrom langflow.base.models.model_input_constants import (\n ALL_PROVIDER_FIELDS,\n MODEL_DYNAMIC_UPDATE_FIELDS,\n MODEL_PROVIDERS,\n MODEL_PROVIDERS_DICT,\n MODELS_METADATA,\n)\nfrom langflow.base.models.model_utils import get_model_name\nfrom langflow.components.helpers.current_date import CurrentDateComponent\nfrom langflow.components.helpers.memory import MemoryComponent\nfrom langflow.components.langchain_utilities.tool_calling import ToolCallingAgentComponent\nfrom langflow.custom.custom_component.component import _get_component_toolkit\nfrom langflow.custom.utils import update_component_build_config\nfrom langflow.field_typing import Tool\nfrom langflow.helpers.base_model import build_model_from_schema\nfrom langflow.io import BoolInput, DropdownInput, IntInput, MultilineInput, Output, TableInput\nfrom langflow.logging import logger\nfrom langflow.schema.data import Data\nfrom langflow.schema.dotdict import dotdict\nfrom langflow.schema.message import Message\nfrom langflow.schema.table import EditMode\n\n\ndef set_advanced_true(component_input):\n component_input.advanced = True\n return component_input\n\n\nMODEL_PROVIDERS_LIST = [\"Anthropic\", \"Google Generative AI\", \"Groq\", \"OpenAI\"]\n\n\nclass AgentComponent(ToolCallingAgentComponent):\n display_name: str = \"Agent\"\n description: str = \"Define the agent's instructions, then enter a task to complete using tools.\"\n documentation: str = \"https://docs.langflow.org/agents\"\n icon = \"bot\"\n beta = False\n name = \"Agent\"\n\n memory_inputs = [set_advanced_true(component_input) for component_input in MemoryComponent().inputs]\n\n # Filter out json_mode from OpenAI inputs since we handle structured output differently\n openai_inputs_filtered = [\n input_field\n for input_field in MODEL_PROVIDERS_DICT[\"OpenAI\"][\"inputs\"]\n if not (hasattr(input_field, \"name\") and input_field.name == \"json_mode\")\n ]\n\n inputs = [\n DropdownInput(\n name=\"agent_llm\",\n display_name=\"Model Provider\",\n info=\"The provider of the language model that the agent will use to generate responses.\",\n options=[*MODEL_PROVIDERS_LIST, \"Custom\"],\n value=\"OpenAI\",\n real_time_refresh=True,\n input_types=[],\n options_metadata=[MODELS_METADATA[key] for key in MODEL_PROVIDERS_LIST] + [{\"icon\": \"brain\"}],\n ),\n *openai_inputs_filtered,\n MultilineInput(\n name=\"system_prompt\",\n display_name=\"Agent Instructions\",\n info=\"System Prompt: Initial instructions and context provided to guide the agent's behavior.\",\n value=\"You are a helpful assistant that can use tools to answer questions and perform tasks.\",\n advanced=False,\n ),\n IntInput(\n name=\"n_messages\",\n display_name=\"Number of Chat History Messages\",\n value=100,\n info=\"Number of chat history messages to retrieve.\",\n advanced=True,\n show=True,\n ),\n MultilineInput(\n name=\"format_instructions\",\n display_name=\"Output Format Instructions\",\n info=\"Generic Template for structured output formatting. Valid only with Structured response.\",\n value=(\n \"You are an AI that extracts structured JSON objects from unstructured text. \"\n \"Use a predefined schema with expected types (str, int, float, bool, dict). \"\n \"Extract ALL relevant instances that match the schema - if multiple patterns exist, capture them all. \"\n \"Fill missing or ambiguous values with defaults: null for missing values. \"\n \"Remove exact duplicates but keep variations that have different field values. \"\n \"Always return valid JSON in the expected format, never throw errors. \"\n \"If multiple objects can be extracted, return them all in the structured format.\"\n ),\n advanced=True,\n ),\n TableInput(\n name=\"output_schema\",\n display_name=\"Output Schema\",\n info=(\n \"Schema Validation: Define the structure and data types for structured output. \"\n \"No validation if no output schema.\"\n ),\n advanced=True,\n required=False,\n value=[],\n table_schema=[\n {\n \"name\": \"name\",\n \"display_name\": \"Name\",\n \"type\": \"str\",\n \"description\": \"Specify the name of the output field.\",\n \"default\": \"field\",\n \"edit_mode\": EditMode.INLINE,\n },\n {\n \"name\": \"description\",\n \"display_name\": \"Description\",\n \"type\": \"str\",\n \"description\": \"Describe the purpose of the output field.\",\n \"default\": \"description of field\",\n \"edit_mode\": EditMode.POPOVER,\n },\n {\n \"name\": \"type\",\n \"display_name\": \"Type\",\n \"type\": \"str\",\n \"edit_mode\": EditMode.INLINE,\n \"description\": (\"Indicate the data type of the output field (e.g., str, int, float, bool, dict).\"),\n \"options\": [\"str\", \"int\", \"float\", \"bool\", \"dict\"],\n \"default\": \"str\",\n },\n {\n \"name\": \"multiple\",\n \"display_name\": \"As List\",\n \"type\": \"boolean\",\n \"description\": \"Set to True if this output field should be a list of the specified type.\",\n \"default\": \"False\",\n \"edit_mode\": EditMode.INLINE,\n },\n ],\n ),\n *LCToolsAgentComponent._base_inputs,\n # removed memory inputs from agent component\n # *memory_inputs,\n BoolInput(\n name=\"add_current_date_tool\",\n display_name=\"Current Date\",\n advanced=True,\n info=\"If true, will add a tool to the agent that returns the current date.\",\n value=True,\n ),\n ]\n outputs = [\n Output(name=\"response\", display_name=\"Response\", method=\"message_response\"),\n Output(name=\"structured_response\", display_name=\"Structured Response\", method=\"json_response\", tool_mode=False),\n ]\n\n async def get_agent_requirements(self):\n \"\"\"Get the agent requirements for the agent.\"\"\"\n llm_model, display_name = await self.get_llm()\n if llm_model is None:\n msg = \"No language model selected. Please choose a model to proceed.\"\n raise ValueError(msg)\n self.model_name = get_model_name(llm_model, display_name=display_name)\n\n # Get memory data\n self.chat_history = await self.get_memory_data()\n if isinstance(self.chat_history, Message):\n self.chat_history = [self.chat_history]\n\n # Add current date tool if enabled\n if self.add_current_date_tool:\n if not isinstance(self.tools, list): # type: ignore[has-type]\n self.tools = []\n current_date_tool = (await CurrentDateComponent(**self.get_base_args()).to_toolkit()).pop(0)\n if not isinstance(current_date_tool, StructuredTool):\n msg = \"CurrentDateComponent must be converted to a StructuredTool\"\n raise TypeError(msg)\n self.tools.append(current_date_tool)\n return llm_model, self.chat_history, self.tools\n\n async def message_response(self) -> Message:\n try:\n llm_model, self.chat_history, self.tools = await self.get_agent_requirements()\n # Set up and run agent\n self.set(\n llm=llm_model,\n tools=self.tools or [],\n chat_history=self.chat_history,\n input_value=self.input_value,\n system_prompt=self.system_prompt,\n )\n agent = self.create_agent_runnable()\n result = await self.run_agent(agent)\n\n # Store result for potential JSON output\n self._agent_result = result\n\n except (ValueError, TypeError, KeyError) as e:\n await logger.aerror(f\"{type(e).__name__}: {e!s}\")\n raise\n except ExceptionWithMessageError as e:\n await logger.aerror(f\"ExceptionWithMessageError occurred: {e}\")\n raise\n # Avoid catching blind Exception; let truly unexpected exceptions propagate\n except Exception as e:\n await logger.aerror(f\"Unexpected error: {e!s}\")\n raise\n else:\n return result\n\n def _preprocess_schema(self, schema):\n \"\"\"Preprocess schema to ensure correct data types for build_model_from_schema.\"\"\"\n processed_schema = []\n for field in schema:\n processed_field = {\n \"name\": str(field.get(\"name\", \"field\")),\n \"type\": str(field.get(\"type\", \"str\")),\n \"description\": str(field.get(\"description\", \"\")),\n \"multiple\": field.get(\"multiple\", False),\n }\n # Ensure multiple is handled correctly\n if isinstance(processed_field[\"multiple\"], str):\n processed_field[\"multiple\"] = processed_field[\"multiple\"].lower() in [\"true\", \"1\", \"t\", \"y\", \"yes\"]\n processed_schema.append(processed_field)\n return processed_schema\n\n async def build_structured_output_base(self, content: str):\n \"\"\"Build structured output with optional BaseModel validation.\"\"\"\n json_pattern = r\"\\{.*\\}\"\n schema_error_msg = \"Try setting an output schema\"\n\n # Try to parse content as JSON first\n json_data = None\n try:\n json_data = json.loads(content)\n except json.JSONDecodeError:\n json_match = re.search(json_pattern, content, re.DOTALL)\n if json_match:\n try:\n json_data = json.loads(json_match.group())\n except json.JSONDecodeError:\n return {\"content\": content, \"error\": schema_error_msg}\n else:\n return {\"content\": content, \"error\": schema_error_msg}\n\n # If no output schema provided, return parsed JSON without validation\n if not hasattr(self, \"output_schema\") or not self.output_schema or len(self.output_schema) == 0:\n return json_data\n\n # Use BaseModel validation with schema\n try:\n processed_schema = self._preprocess_schema(self.output_schema)\n output_model = build_model_from_schema(processed_schema)\n\n # Validate against the schema\n if isinstance(json_data, list):\n # Multiple objects\n validated_objects = []\n for item in json_data:\n try:\n validated_obj = output_model.model_validate(item)\n validated_objects.append(validated_obj.model_dump())\n except ValidationError as e:\n await logger.aerror(f\"Validation error for item: {e}\")\n # Include invalid items with error info\n validated_objects.append({\"data\": item, \"validation_error\": str(e)})\n return validated_objects\n\n # Single object\n try:\n validated_obj = output_model.model_validate(json_data)\n return [validated_obj.model_dump()] # Return as list for consistency\n except ValidationError as e:\n await logger.aerror(f\"Validation error: {e}\")\n return [{\"data\": json_data, \"validation_error\": str(e)}]\n\n except (TypeError, ValueError) as e:\n await logger.aerror(f\"Error building structured output: {e}\")\n # Fallback to parsed JSON without validation\n return json_data\n\n async def json_response(self) -> Data:\n \"\"\"Convert agent response to structured JSON Data output with schema validation.\"\"\"\n # Always use structured chat agent for JSON response mode for better JSON formatting\n try:\n system_components = []\n\n # 1. Agent Instructions (system_prompt)\n agent_instructions = getattr(self, \"system_prompt\", \"\") or \"\"\n if agent_instructions:\n system_components.append(f\"{agent_instructions}\")\n\n # 2. Format Instructions\n format_instructions = getattr(self, \"format_instructions\", \"\") or \"\"\n if format_instructions:\n system_components.append(f\"Format instructions: {format_instructions}\")\n\n # 3. Schema Information from BaseModel\n if hasattr(self, \"output_schema\") and self.output_schema and len(self.output_schema) > 0:\n try:\n processed_schema = self._preprocess_schema(self.output_schema)\n output_model = build_model_from_schema(processed_schema)\n schema_dict = output_model.model_json_schema()\n schema_info = (\n \"You are given some text that may include format instructions, \"\n \"explanations, or other content alongside a JSON schema.\\n\\n\"\n \"Your task:\\n\"\n \"- Extract only the JSON schema.\\n\"\n \"- Return it as valid JSON.\\n\"\n \"- Do not include format instructions, explanations, or extra text.\\n\\n\"\n \"Input:\\n\"\n f\"{json.dumps(schema_dict, indent=2)}\\n\\n\"\n \"Output (only JSON schema):\"\n )\n system_components.append(schema_info)\n except (ValidationError, ValueError, TypeError, KeyError) as e:\n await logger.aerror(f\"Could not build schema for prompt: {e}\", exc_info=True)\n\n # Combine all components\n combined_instructions = \"\\n\\n\".join(system_components) if system_components else \"\"\n llm_model, self.chat_history, self.tools = await self.get_agent_requirements()\n self.set(\n llm=llm_model,\n tools=self.tools or [],\n chat_history=self.chat_history,\n input_value=self.input_value,\n system_prompt=combined_instructions,\n )\n\n # Create and run structured chat agent\n try:\n structured_agent = self.create_agent_runnable()\n except (NotImplementedError, ValueError, TypeError) as e:\n await logger.aerror(f\"Error with structured chat agent: {e}\")\n raise\n try:\n result = await self.run_agent(structured_agent)\n except (ExceptionWithMessageError, ValueError, TypeError, RuntimeError) as e:\n await logger.aerror(f\"Error with structured agent result: {e}\")\n raise\n # Extract content from structured agent result\n if hasattr(result, \"content\"):\n content = result.content\n elif hasattr(result, \"text\"):\n content = result.text\n else:\n content = str(result)\n\n except (ExceptionWithMessageError, ValueError, TypeError, NotImplementedError, AttributeError) as e:\n await logger.aerror(f\"Error with structured chat agent: {e}\")\n # Fallback to regular agent\n content_str = \"No content returned from agent\"\n return Data(data={\"content\": content_str, \"error\": str(e)})\n\n # Process with structured output validation\n try:\n structured_output = await self.build_structured_output_base(content)\n\n # Handle different output formats\n if isinstance(structured_output, list) and structured_output:\n if len(structured_output) == 1:\n return Data(data=structured_output[0])\n return Data(data={\"results\": structured_output})\n if isinstance(structured_output, dict):\n return Data(data=structured_output)\n return Data(data={\"content\": content})\n\n except (ValueError, TypeError) as e:\n await logger.aerror(f\"Error in structured output processing: {e}\")\n return Data(data={\"content\": content, \"error\": str(e)})\n\n async def get_memory_data(self):\n # TODO: This is a temporary fix to avoid message duplication. We should develop a function for this.\n messages = (\n await MemoryComponent(**self.get_base_args())\n .set(session_id=self.graph.session_id, order=\"Ascending\", n_messages=self.n_messages)\n .retrieve_messages()\n )\n return [\n message for message in messages if getattr(message, \"id\", None) != getattr(self.input_value, \"id\", None)\n ]\n\n async def get_llm(self):\n if not isinstance(self.agent_llm, str):\n return self.agent_llm, None\n\n try:\n provider_info = MODEL_PROVIDERS_DICT.get(self.agent_llm)\n if not provider_info:\n msg = f\"Invalid model provider: {self.agent_llm}\"\n raise ValueError(msg)\n\n component_class = provider_info.get(\"component_class\")\n display_name = component_class.display_name\n inputs = provider_info.get(\"inputs\")\n prefix = provider_info.get(\"prefix\", \"\")\n\n return self._build_llm_model(component_class, inputs, prefix), display_name\n\n except (AttributeError, ValueError, TypeError, RuntimeError) as e:\n await logger.aerror(f\"Error building {self.agent_llm} language model: {e!s}\")\n msg = f\"Failed to initialize language model: {e!s}\"\n raise ValueError(msg) from e\n\n def _build_llm_model(self, component, inputs, prefix=\"\"):\n model_kwargs = {}\n for input_ in inputs:\n if hasattr(self, f\"{prefix}{input_.name}\"):\n model_kwargs[input_.name] = getattr(self, f\"{prefix}{input_.name}\")\n return component.set(**model_kwargs).build_model()\n\n def set_component_params(self, component):\n provider_info = MODEL_PROVIDERS_DICT.get(self.agent_llm)\n if provider_info:\n inputs = provider_info.get(\"inputs\")\n prefix = provider_info.get(\"prefix\")\n # Filter out json_mode and only use attributes that exist on this component\n model_kwargs = {}\n for input_ in inputs:\n if hasattr(self, f\"{prefix}{input_.name}\"):\n model_kwargs[input_.name] = getattr(self, f\"{prefix}{input_.name}\")\n\n return component.set(**model_kwargs)\n return component\n\n def delete_fields(self, build_config: dotdict, fields: dict | list[str]) -> None:\n \"\"\"Delete specified fields from build_config.\"\"\"\n for field in fields:\n build_config.pop(field, None)\n\n def update_input_types(self, build_config: dotdict) -> dotdict:\n \"\"\"Update input types for all fields in build_config.\"\"\"\n for key, value in build_config.items():\n if isinstance(value, dict):\n if value.get(\"input_types\") is None:\n build_config[key][\"input_types\"] = []\n elif hasattr(value, \"input_types\") and value.input_types is None:\n value.input_types = []\n return build_config\n\n async def update_build_config(\n self, build_config: dotdict, field_value: str, field_name: str | None = None\n ) -> dotdict:\n # Iterate over all providers in the MODEL_PROVIDERS_DICT\n # Existing logic for updating build_config\n if field_name in (\"agent_llm\",):\n build_config[\"agent_llm\"][\"value\"] = field_value\n provider_info = MODEL_PROVIDERS_DICT.get(field_value)\n if provider_info:\n component_class = provider_info.get(\"component_class\")\n if component_class and hasattr(component_class, \"update_build_config\"):\n # Call the component class's update_build_config method\n build_config = await update_component_build_config(\n component_class, build_config, field_value, \"model_name\"\n )\n\n provider_configs: dict[str, tuple[dict, list[dict]]] = {\n provider: (\n MODEL_PROVIDERS_DICT[provider][\"fields\"],\n [\n MODEL_PROVIDERS_DICT[other_provider][\"fields\"]\n for other_provider in MODEL_PROVIDERS_DICT\n if other_provider != provider\n ],\n )\n for provider in MODEL_PROVIDERS_DICT\n }\n if field_value in provider_configs:\n fields_to_add, fields_to_delete = provider_configs[field_value]\n\n # Delete fields from other providers\n for fields in fields_to_delete:\n self.delete_fields(build_config, fields)\n\n # Add provider-specific fields\n if field_value == \"OpenAI\" and not any(field in build_config for field in fields_to_add):\n build_config.update(fields_to_add)\n else:\n build_config.update(fields_to_add)\n # Reset input types for agent_llm\n build_config[\"agent_llm\"][\"input_types\"] = []\n elif field_value == \"Custom\":\n # Delete all provider fields\n self.delete_fields(build_config, ALL_PROVIDER_FIELDS)\n # Update with custom component\n custom_component = DropdownInput(\n name=\"agent_llm\",\n display_name=\"Language Model\",\n options=[*sorted(MODEL_PROVIDERS), \"Custom\"],\n value=\"Custom\",\n real_time_refresh=True,\n input_types=[\"LanguageModel\"],\n options_metadata=[MODELS_METADATA[key] for key in sorted(MODELS_METADATA.keys())]\n + [{\"icon\": \"brain\"}],\n )\n build_config.update({\"agent_llm\": custom_component.to_dict()})\n # Update input types for all fields\n build_config = self.update_input_types(build_config)\n\n # Validate required keys\n default_keys = [\n \"code\",\n \"_type\",\n \"agent_llm\",\n \"tools\",\n \"input_value\",\n \"add_current_date_tool\",\n \"system_prompt\",\n \"agent_description\",\n \"max_iterations\",\n \"handle_parsing_errors\",\n \"verbose\",\n ]\n missing_keys = [key for key in default_keys if key not in build_config]\n if missing_keys:\n msg = f\"Missing required keys in build_config: {missing_keys}\"\n raise ValueError(msg)\n if (\n isinstance(self.agent_llm, str)\n and self.agent_llm in MODEL_PROVIDERS_DICT\n and field_name in MODEL_DYNAMIC_UPDATE_FIELDS\n ):\n provider_info = MODEL_PROVIDERS_DICT.get(self.agent_llm)\n if provider_info:\n component_class = provider_info.get(\"component_class\")\n component_class = self.set_component_params(component_class)\n prefix = provider_info.get(\"prefix\")\n if component_class and hasattr(component_class, \"update_build_config\"):\n # Call each component class's update_build_config method\n # remove the prefix from the field_name\n if isinstance(field_name, str) and isinstance(prefix, str):\n field_name = field_name.replace(prefix, \"\")\n build_config = await update_component_build_config(\n component_class, build_config, field_value, \"model_name\"\n )\n return dotdict({k: v.to_dict() if hasattr(v, \"to_dict\") else v for k, v in build_config.items()})\n\n async def _get_tools(self) -> list[Tool]:\n component_toolkit = _get_component_toolkit()\n tools_names = self._build_tools_names()\n agent_description = self.get_tool_description()\n # TODO: Agent Description Depreciated Feature to be removed\n description = f\"{agent_description}{tools_names}\"\n tools = component_toolkit(component=self).get_tools(\n tool_name=\"Call_Agent\", tool_description=description, callbacks=self.get_langchain_callbacks()\n )\n if hasattr(self, \"tools_metadata\"):\n tools = component_toolkit(component=self, metadata=self.tools_metadata).update_tools_metadata(tools=tools)\n return tools\n"
+ "value": "import json\nimport re\n\nfrom langchain_core.tools import StructuredTool\nfrom pydantic import ValidationError\n\nfrom langflow.base.agents.agent import LCToolsAgentComponent\nfrom langflow.base.agents.events import ExceptionWithMessageError\nfrom langflow.base.models.model_input_constants import (\n ALL_PROVIDER_FIELDS,\n MODEL_DYNAMIC_UPDATE_FIELDS,\n MODEL_PROVIDERS,\n MODEL_PROVIDERS_DICT,\n MODELS_METADATA,\n)\nfrom langflow.base.models.model_utils import get_model_name\nfrom langflow.components.helpers.current_date import CurrentDateComponent\nfrom langflow.components.helpers.memory import MemoryComponent\nfrom langflow.components.langchain_utilities.tool_calling import ToolCallingAgentComponent\nfrom langflow.custom.custom_component.component import _get_component_toolkit\nfrom langflow.custom.utils import update_component_build_config\nfrom langflow.field_typing import Tool\nfrom langflow.helpers.base_model import build_model_from_schema\nfrom langflow.io import BoolInput, DropdownInput, IntInput, MultilineInput, Output, TableInput\nfrom langflow.logging import logger\nfrom langflow.schema.data import Data\nfrom langflow.schema.dotdict import dotdict\nfrom langflow.schema.message import Message\nfrom langflow.schema.table import EditMode\n\n\ndef set_advanced_true(component_input):\n component_input.advanced = True\n return component_input\n\n\nMODEL_PROVIDERS_LIST = [\"Anthropic\", \"Google Generative AI\", \"Groq\", \"OpenAI\"]\n\n\nclass AgentComponent(ToolCallingAgentComponent):\n display_name: str = \"Agent\"\n description: str = \"Define the agent's instructions, then enter a task to complete using tools.\"\n documentation: str = \"https://docs.langflow.org/agents\"\n icon = \"bot\"\n beta = False\n name = \"Agent\"\n\n memory_inputs = [set_advanced_true(component_input) for component_input in MemoryComponent().inputs]\n\n # Filter out json_mode from OpenAI inputs since we handle structured output differently\n openai_inputs_filtered = [\n input_field\n for input_field in MODEL_PROVIDERS_DICT[\"OpenAI\"][\"inputs\"]\n if not (hasattr(input_field, \"name\") and input_field.name == \"json_mode\")\n ]\n\n inputs = [\n DropdownInput(\n name=\"agent_llm\",\n display_name=\"Model Provider\",\n info=\"The provider of the language model that the agent will use to generate responses.\",\n options=[*MODEL_PROVIDERS_LIST, \"Custom\"],\n value=\"OpenAI\",\n real_time_refresh=True,\n input_types=[],\n options_metadata=[MODELS_METADATA[key] for key in MODEL_PROVIDERS_LIST] + [{\"icon\": \"brain\"}],\n ),\n *openai_inputs_filtered,\n MultilineInput(\n name=\"system_prompt\",\n display_name=\"Agent Instructions\",\n info=\"System Prompt: Initial instructions and context provided to guide the agent's behavior.\",\n value=\"You are a helpful assistant that can use tools to answer questions and perform tasks.\",\n advanced=False,\n ),\n IntInput(\n name=\"n_messages\",\n display_name=\"Number of Chat History Messages\",\n value=100,\n info=\"Number of chat history messages to retrieve.\",\n advanced=True,\n show=True,\n ),\n MultilineInput(\n name=\"format_instructions\",\n display_name=\"Output Format Instructions\",\n info=\"Generic Template for structured output formatting. Valid only with Structured response.\",\n value=(\n \"You are an AI that extracts structured JSON objects from unstructured text. \"\n \"Use a predefined schema with expected types (str, int, float, bool, dict). \"\n \"Extract ALL relevant instances that match the schema - if multiple patterns exist, capture them all. \"\n \"Fill missing or ambiguous values with defaults: null for missing values. \"\n \"Remove exact duplicates but keep variations that have different field values. \"\n \"Always return valid JSON in the expected format, never throw errors. \"\n \"If multiple objects can be extracted, return them all in the structured format.\"\n ),\n advanced=True,\n ),\n TableInput(\n name=\"output_schema\",\n display_name=\"Output Schema\",\n info=(\n \"Schema Validation: Define the structure and data types for structured output. \"\n \"No validation if no output schema.\"\n ),\n advanced=True,\n required=False,\n value=[],\n table_schema=[\n {\n \"name\": \"name\",\n \"display_name\": \"Name\",\n \"type\": \"str\",\n \"description\": \"Specify the name of the output field.\",\n \"default\": \"field\",\n \"edit_mode\": EditMode.INLINE,\n },\n {\n \"name\": \"description\",\n \"display_name\": \"Description\",\n \"type\": \"str\",\n \"description\": \"Describe the purpose of the output field.\",\n \"default\": \"description of field\",\n \"edit_mode\": EditMode.POPOVER,\n },\n {\n \"name\": \"type\",\n \"display_name\": \"Type\",\n \"type\": \"str\",\n \"edit_mode\": EditMode.INLINE,\n \"description\": (\"Indicate the data type of the output field (e.g., str, int, float, bool, dict).\"),\n \"options\": [\"str\", \"int\", \"float\", \"bool\", \"dict\"],\n \"default\": \"str\",\n },\n {\n \"name\": \"multiple\",\n \"display_name\": \"As List\",\n \"type\": \"boolean\",\n \"description\": \"Set to True if this output field should be a list of the specified type.\",\n \"default\": \"False\",\n \"edit_mode\": EditMode.INLINE,\n },\n ],\n ),\n *LCToolsAgentComponent._base_inputs,\n # removed memory inputs from agent component\n # *memory_inputs,\n BoolInput(\n name=\"add_current_date_tool\",\n display_name=\"Current Date\",\n advanced=True,\n info=\"If true, will add a tool to the agent that returns the current date.\",\n value=True,\n ),\n ]\n outputs = [\n Output(name=\"response\", display_name=\"Response\", method=\"message_response\"),\n Output(name=\"structured_response\", display_name=\"Structured Response\", method=\"json_response\", tool_mode=False),\n ]\n\n async def get_agent_requirements(self):\n \"\"\"Get the agent requirements for the agent.\"\"\"\n llm_model, display_name = await self.get_llm()\n if llm_model is None:\n msg = \"No language model selected. Please choose a model to proceed.\"\n raise ValueError(msg)\n self.model_name = get_model_name(llm_model, display_name=display_name)\n\n # Get memory data\n self.chat_history = await self.get_memory_data()\n if isinstance(self.chat_history, Message):\n self.chat_history = [self.chat_history]\n\n # Add current date tool if enabled\n if self.add_current_date_tool:\n if not isinstance(self.tools, list): # type: ignore[has-type]\n self.tools = []\n current_date_tool = (await CurrentDateComponent(**self.get_base_args()).to_toolkit()).pop(0)\n if not isinstance(current_date_tool, StructuredTool):\n msg = \"CurrentDateComponent must be converted to a StructuredTool\"\n raise TypeError(msg)\n self.tools.append(current_date_tool)\n return llm_model, self.chat_history, self.tools\n\n async def message_response(self) -> Message:\n try:\n llm_model, self.chat_history, self.tools = await self.get_agent_requirements()\n # Set up and run agent\n self.set(\n llm=llm_model,\n tools=self.tools or [],\n chat_history=self.chat_history,\n input_value=self.input_value,\n system_prompt=self.system_prompt,\n )\n agent = self.create_agent_runnable()\n result = await self.run_agent(agent)\n\n # Store result for potential JSON output\n self._agent_result = result\n\n except (ValueError, TypeError, KeyError) as e:\n await logger.aerror(f\"{type(e).__name__}: {e!s}\")\n raise\n except ExceptionWithMessageError as e:\n await logger.aerror(f\"ExceptionWithMessageError occurred: {e}\")\n raise\n # Avoid catching blind Exception; let truly unexpected exceptions propagate\n except Exception as e:\n await logger.aerror(f\"Unexpected error: {e!s}\")\n raise\n else:\n return result\n\n def _preprocess_schema(self, schema):\n \"\"\"Preprocess schema to ensure correct data types for build_model_from_schema.\"\"\"\n processed_schema = []\n for field in schema:\n processed_field = {\n \"name\": str(field.get(\"name\", \"field\")),\n \"type\": str(field.get(\"type\", \"str\")),\n \"description\": str(field.get(\"description\", \"\")),\n \"multiple\": field.get(\"multiple\", False),\n }\n # Ensure multiple is handled correctly\n if isinstance(processed_field[\"multiple\"], str):\n processed_field[\"multiple\"] = processed_field[\"multiple\"].lower() in [\"true\", \"1\", \"t\", \"y\", \"yes\"]\n processed_schema.append(processed_field)\n return processed_schema\n\n async def build_structured_output_base(self, content: str):\n \"\"\"Build structured output with optional BaseModel validation.\"\"\"\n json_pattern = r\"\\{.*\\}\"\n schema_error_msg = \"Try setting an output schema\"\n\n # Try to parse content as JSON first\n json_data = None\n try:\n json_data = json.loads(content)\n except json.JSONDecodeError:\n json_match = re.search(json_pattern, content, re.DOTALL)\n if json_match:\n try:\n json_data = json.loads(json_match.group())\n except json.JSONDecodeError:\n return {\"content\": content, \"error\": schema_error_msg}\n else:\n return {\"content\": content, \"error\": schema_error_msg}\n\n # If no output schema provided, return parsed JSON without validation\n if not hasattr(self, \"output_schema\") or not self.output_schema or len(self.output_schema) == 0:\n return json_data\n\n # Use BaseModel validation with schema\n try:\n processed_schema = self._preprocess_schema(self.output_schema)\n output_model = build_model_from_schema(processed_schema)\n\n # Validate against the schema\n if isinstance(json_data, list):\n # Multiple objects\n validated_objects = []\n for item in json_data:\n try:\n validated_obj = output_model.model_validate(item)\n validated_objects.append(validated_obj.model_dump())\n except ValidationError as e:\n await logger.aerror(f\"Validation error for item: {e}\")\n # Include invalid items with error info\n validated_objects.append({\"data\": item, \"validation_error\": str(e)})\n return validated_objects\n\n # Single object\n try:\n validated_obj = output_model.model_validate(json_data)\n return [validated_obj.model_dump()] # Return as list for consistency\n except ValidationError as e:\n await logger.aerror(f\"Validation error: {e}\")\n return [{\"data\": json_data, \"validation_error\": str(e)}]\n\n except (TypeError, ValueError) as e:\n await logger.aerror(f\"Error building structured output: {e}\")\n # Fallback to parsed JSON without validation\n return json_data\n\n async def json_response(self) -> Data:\n \"\"\"Convert agent response to structured JSON Data output with schema validation.\"\"\"\n # Always use structured chat agent for JSON response mode for better JSON formatting\n try:\n system_components = []\n\n # 1. Agent Instructions (system_prompt)\n agent_instructions = getattr(self, \"system_prompt\", \"\") or \"\"\n if agent_instructions:\n system_components.append(f\"{agent_instructions}\")\n\n # 2. Format Instructions\n format_instructions = getattr(self, \"format_instructions\", \"\") or \"\"\n if format_instructions:\n system_components.append(f\"Format instructions: {format_instructions}\")\n\n # 3. Schema Information from BaseModel\n if hasattr(self, \"output_schema\") and self.output_schema and len(self.output_schema) > 0:\n try:\n processed_schema = self._preprocess_schema(self.output_schema)\n output_model = build_model_from_schema(processed_schema)\n schema_dict = output_model.model_json_schema()\n schema_info = (\n \"You are given some text that may include format instructions, \"\n \"explanations, or other content alongside a JSON schema.\\n\\n\"\n \"Your task:\\n\"\n \"- Extract only the JSON schema.\\n\"\n \"- Return it as valid JSON.\\n\"\n \"- Do not include format instructions, explanations, or extra text.\\n\\n\"\n \"Input:\\n\"\n f\"{json.dumps(schema_dict, indent=2)}\\n\\n\"\n \"Output (only JSON schema):\"\n )\n system_components.append(schema_info)\n except (ValidationError, ValueError, TypeError, KeyError) as e:\n await logger.aerror(f\"Could not build schema for prompt: {e}\", exc_info=True)\n\n # Combine all components\n combined_instructions = \"\\n\\n\".join(system_components) if system_components else \"\"\n llm_model, self.chat_history, self.tools = await self.get_agent_requirements()\n self.set(\n llm=llm_model,\n tools=self.tools or [],\n chat_history=self.chat_history,\n input_value=self.input_value,\n system_prompt=combined_instructions,\n )\n\n # Create and run structured chat agent\n try:\n structured_agent = self.create_agent_runnable()\n except (NotImplementedError, ValueError, TypeError) as e:\n await logger.aerror(f\"Error with structured chat agent: {e}\")\n raise\n try:\n result = await self.run_agent(structured_agent)\n except (ExceptionWithMessageError, ValueError, TypeError, RuntimeError) as e:\n await logger.aerror(f\"Error with structured agent result: {e}\")\n raise\n # Extract content from structured agent result\n if hasattr(result, \"content\"):\n content = result.content\n elif hasattr(result, \"text\"):\n content = result.text\n else:\n content = str(result)\n\n except (ExceptionWithMessageError, ValueError, TypeError, NotImplementedError, AttributeError) as e:\n await logger.aerror(f\"Error with structured chat agent: {e}\")\n # Fallback to regular agent\n content_str = \"No content returned from agent\"\n return Data(data={\"content\": content_str, \"error\": str(e)})\n\n # Process with structured output validation\n try:\n structured_output = await self.build_structured_output_base(content)\n\n # Handle different output formats\n if isinstance(structured_output, list) and structured_output:\n if len(structured_output) == 1:\n return Data(data=structured_output[0])\n return Data(data={\"results\": structured_output})\n if isinstance(structured_output, dict):\n return Data(data=structured_output)\n return Data(data={\"content\": content})\n\n except (ValueError, TypeError) as e:\n await logger.aerror(f\"Error in structured output processing: {e}\")\n return Data(data={\"content\": content, \"error\": str(e)})\n\n async def get_memory_data(self):\n # TODO: This is a temporary fix to avoid message duplication. We should develop a function for this.\n messages = (\n await MemoryComponent(**self.get_base_args())\n .set(session_id=self.graph.session_id, order=\"Ascending\", n_messages=self.n_messages)\n .retrieve_messages()\n )\n return [\n message for message in messages if getattr(message, \"id\", None) != getattr(self.input_value, \"id\", None)\n ]\n\n async def get_llm(self):\n if not isinstance(self.agent_llm, str):\n return self.agent_llm, None\n\n try:\n provider_info = MODEL_PROVIDERS_DICT.get(self.agent_llm)\n if not provider_info:\n msg = f\"Invalid model provider: {self.agent_llm}\"\n raise ValueError(msg)\n\n component_class = provider_info.get(\"component_class\")\n display_name = component_class.display_name\n inputs = provider_info.get(\"inputs\")\n prefix = provider_info.get(\"prefix\", \"\")\n\n return self._build_llm_model(component_class, inputs, prefix), display_name\n\n except (AttributeError, ValueError, TypeError, RuntimeError) as e:\n await logger.aerror(f\"Error building {self.agent_llm} language model: {e!s}\")\n msg = f\"Failed to initialize language model: {e!s}\"\n raise ValueError(msg) from e\n\n def _build_llm_model(self, component, inputs, prefix=\"\"):\n model_kwargs = {}\n for input_ in inputs:\n if hasattr(self, f\"{prefix}{input_.name}\"):\n model_kwargs[input_.name] = getattr(self, f\"{prefix}{input_.name}\")\n return component.set(**model_kwargs).build_model()\n\n def set_component_params(self, component):\n provider_info = MODEL_PROVIDERS_DICT.get(self.agent_llm)\n if provider_info:\n inputs = provider_info.get(\"inputs\")\n prefix = provider_info.get(\"prefix\")\n # Filter out json_mode and only use attributes that exist on this component\n model_kwargs = {}\n for input_ in inputs:\n if hasattr(self, f\"{prefix}{input_.name}\"):\n model_kwargs[input_.name] = getattr(self, f\"{prefix}{input_.name}\")\n\n return component.set(**model_kwargs)\n return component\n\n def delete_fields(self, build_config: dotdict, fields: dict | list[str]) -> None:\n \"\"\"Delete specified fields from build_config.\"\"\"\n for field in fields:\n build_config.pop(field, None)\n\n def update_input_types(self, build_config: dotdict) -> dotdict:\n \"\"\"Update input types for all fields in build_config.\"\"\"\n for key, value in build_config.items():\n if isinstance(value, dict):\n if value.get(\"input_types\") is None:\n build_config[key][\"input_types\"] = []\n elif hasattr(value, \"input_types\") and value.input_types is None:\n value.input_types = []\n return build_config\n\n async def update_build_config(\n self, build_config: dotdict, field_value: str, field_name: str | None = None\n ) -> dotdict:\n # Iterate over all providers in the MODEL_PROVIDERS_DICT\n # Existing logic for updating build_config\n if field_name in (\"agent_llm\",):\n build_config[\"agent_llm\"][\"value\"] = field_value\n provider_info = MODEL_PROVIDERS_DICT.get(field_value)\n if provider_info:\n component_class = provider_info.get(\"component_class\")\n if component_class and hasattr(component_class, \"update_build_config\"):\n # Call the component class's update_build_config method\n build_config = await update_component_build_config(\n component_class, build_config, field_value, \"model_name\"\n )\n\n provider_configs: dict[str, tuple[dict, list[dict]]] = {\n provider: (\n MODEL_PROVIDERS_DICT[provider][\"fields\"],\n [\n MODEL_PROVIDERS_DICT[other_provider][\"fields\"]\n for other_provider in MODEL_PROVIDERS_DICT\n if other_provider != provider\n ],\n )\n for provider in MODEL_PROVIDERS_DICT\n }\n if field_value in provider_configs:\n fields_to_add, fields_to_delete = provider_configs[field_value]\n\n # Delete fields from other providers\n for fields in fields_to_delete:\n self.delete_fields(build_config, fields)\n\n # Add provider-specific fields\n if field_value == \"OpenAI\" and not any(field in build_config for field in fields_to_add):\n build_config.update(fields_to_add)\n else:\n build_config.update(fields_to_add)\n # Reset input types for agent_llm\n build_config[\"agent_llm\"][\"input_types\"] = []\n build_config[\"agent_llm\"][\"display_name\"] = \"Model Provider\"\n elif field_value == \"Custom\":\n # Delete all provider fields\n self.delete_fields(build_config, ALL_PROVIDER_FIELDS)\n # Update with custom component\n custom_component = DropdownInput(\n name=\"agent_llm\",\n display_name=\"Language Model\",\n options=[*sorted(MODEL_PROVIDERS), \"Custom\"],\n value=\"Custom\",\n real_time_refresh=True,\n input_types=[\"LanguageModel\"],\n options_metadata=[MODELS_METADATA[key] for key in sorted(MODELS_METADATA.keys())]\n + [{\"icon\": \"brain\"}],\n )\n build_config.update({\"agent_llm\": custom_component.to_dict()})\n # Update input types for all fields\n build_config = self.update_input_types(build_config)\n\n # Validate required keys\n default_keys = [\n \"code\",\n \"_type\",\n \"agent_llm\",\n \"tools\",\n \"input_value\",\n \"add_current_date_tool\",\n \"system_prompt\",\n \"agent_description\",\n \"max_iterations\",\n \"handle_parsing_errors\",\n \"verbose\",\n ]\n missing_keys = [key for key in default_keys if key not in build_config]\n if missing_keys:\n msg = f\"Missing required keys in build_config: {missing_keys}\"\n raise ValueError(msg)\n if (\n isinstance(self.agent_llm, str)\n and self.agent_llm in MODEL_PROVIDERS_DICT\n and field_name in MODEL_DYNAMIC_UPDATE_FIELDS\n ):\n provider_info = MODEL_PROVIDERS_DICT.get(self.agent_llm)\n if provider_info:\n component_class = provider_info.get(\"component_class\")\n component_class = self.set_component_params(component_class)\n prefix = provider_info.get(\"prefix\")\n if component_class and hasattr(component_class, \"update_build_config\"):\n # Call each component class's update_build_config method\n # remove the prefix from the field_name\n if isinstance(field_name, str) and isinstance(prefix, str):\n field_name = field_name.replace(prefix, \"\")\n build_config = await update_component_build_config(\n component_class, build_config, field_value, \"model_name\"\n )\n return dotdict({k: v.to_dict() if hasattr(v, \"to_dict\") else v for k, v in build_config.items()})\n\n async def _get_tools(self) -> list[Tool]:\n component_toolkit = _get_component_toolkit()\n tools_names = self._build_tools_names()\n agent_description = self.get_tool_description()\n # TODO: Agent Description Depreciated Feature to be removed\n description = f\"{agent_description}{tools_names}\"\n tools = component_toolkit(component=self).get_tools(\n tool_name=\"Call_Agent\", tool_description=description, callbacks=self.get_langchain_callbacks()\n )\n if hasattr(self, \"tools_metadata\"):\n tools = component_toolkit(component=self, metadata=self.tools_metadata).update_tools_metadata(tools=tools)\n return tools\n"
},
"handle_parsing_errors": {
"_input_type": "BoolInput",
diff --git a/src/backend/base/langflow/initial_setup/starter_projects/Travel Planning Agents.json b/src/backend/base/langflow/initial_setup/starter_projects/Travel Planning Agents.json
index 4109bc120..a86d26dff 100644
--- a/src/backend/base/langflow/initial_setup/starter_projects/Travel Planning Agents.json
+++ b/src/backend/base/langflow/initial_setup/starter_projects/Travel Planning Agents.json
@@ -1892,7 +1892,7 @@
"show": true,
"title_case": false,
"type": "code",
- "value": "import json\nimport re\n\nfrom langchain_core.tools import StructuredTool\nfrom pydantic import ValidationError\n\nfrom langflow.base.agents.agent import LCToolsAgentComponent\nfrom langflow.base.agents.events import ExceptionWithMessageError\nfrom langflow.base.models.model_input_constants import (\n ALL_PROVIDER_FIELDS,\n MODEL_DYNAMIC_UPDATE_FIELDS,\n MODEL_PROVIDERS,\n MODEL_PROVIDERS_DICT,\n MODELS_METADATA,\n)\nfrom langflow.base.models.model_utils import get_model_name\nfrom langflow.components.helpers.current_date import CurrentDateComponent\nfrom langflow.components.helpers.memory import MemoryComponent\nfrom langflow.components.langchain_utilities.tool_calling import ToolCallingAgentComponent\nfrom langflow.custom.custom_component.component import _get_component_toolkit\nfrom langflow.custom.utils import update_component_build_config\nfrom langflow.field_typing import Tool\nfrom langflow.helpers.base_model import build_model_from_schema\nfrom langflow.io import BoolInput, DropdownInput, IntInput, MultilineInput, Output, TableInput\nfrom langflow.logging import logger\nfrom langflow.schema.data import Data\nfrom langflow.schema.dotdict import dotdict\nfrom langflow.schema.message import Message\nfrom langflow.schema.table import EditMode\n\n\ndef set_advanced_true(component_input):\n component_input.advanced = True\n return component_input\n\n\nMODEL_PROVIDERS_LIST = [\"Anthropic\", \"Google Generative AI\", \"Groq\", \"OpenAI\"]\n\n\nclass AgentComponent(ToolCallingAgentComponent):\n display_name: str = \"Agent\"\n description: str = \"Define the agent's instructions, then enter a task to complete using tools.\"\n documentation: str = \"https://docs.langflow.org/agents\"\n icon = \"bot\"\n beta = False\n name = \"Agent\"\n\n memory_inputs = [set_advanced_true(component_input) for component_input in MemoryComponent().inputs]\n\n # Filter out json_mode from OpenAI inputs since we handle structured output differently\n openai_inputs_filtered = [\n input_field\n for input_field in MODEL_PROVIDERS_DICT[\"OpenAI\"][\"inputs\"]\n if not (hasattr(input_field, \"name\") and input_field.name == \"json_mode\")\n ]\n\n inputs = [\n DropdownInput(\n name=\"agent_llm\",\n display_name=\"Model Provider\",\n info=\"The provider of the language model that the agent will use to generate responses.\",\n options=[*MODEL_PROVIDERS_LIST, \"Custom\"],\n value=\"OpenAI\",\n real_time_refresh=True,\n input_types=[],\n options_metadata=[MODELS_METADATA[key] for key in MODEL_PROVIDERS_LIST] + [{\"icon\": \"brain\"}],\n ),\n *openai_inputs_filtered,\n MultilineInput(\n name=\"system_prompt\",\n display_name=\"Agent Instructions\",\n info=\"System Prompt: Initial instructions and context provided to guide the agent's behavior.\",\n value=\"You are a helpful assistant that can use tools to answer questions and perform tasks.\",\n advanced=False,\n ),\n IntInput(\n name=\"n_messages\",\n display_name=\"Number of Chat History Messages\",\n value=100,\n info=\"Number of chat history messages to retrieve.\",\n advanced=True,\n show=True,\n ),\n MultilineInput(\n name=\"format_instructions\",\n display_name=\"Output Format Instructions\",\n info=\"Generic Template for structured output formatting. Valid only with Structured response.\",\n value=(\n \"You are an AI that extracts structured JSON objects from unstructured text. \"\n \"Use a predefined schema with expected types (str, int, float, bool, dict). \"\n \"Extract ALL relevant instances that match the schema - if multiple patterns exist, capture them all. \"\n \"Fill missing or ambiguous values with defaults: null for missing values. \"\n \"Remove exact duplicates but keep variations that have different field values. \"\n \"Always return valid JSON in the expected format, never throw errors. \"\n \"If multiple objects can be extracted, return them all in the structured format.\"\n ),\n advanced=True,\n ),\n TableInput(\n name=\"output_schema\",\n display_name=\"Output Schema\",\n info=(\n \"Schema Validation: Define the structure and data types for structured output. \"\n \"No validation if no output schema.\"\n ),\n advanced=True,\n required=False,\n value=[],\n table_schema=[\n {\n \"name\": \"name\",\n \"display_name\": \"Name\",\n \"type\": \"str\",\n \"description\": \"Specify the name of the output field.\",\n \"default\": \"field\",\n \"edit_mode\": EditMode.INLINE,\n },\n {\n \"name\": \"description\",\n \"display_name\": \"Description\",\n \"type\": \"str\",\n \"description\": \"Describe the purpose of the output field.\",\n \"default\": \"description of field\",\n \"edit_mode\": EditMode.POPOVER,\n },\n {\n \"name\": \"type\",\n \"display_name\": \"Type\",\n \"type\": \"str\",\n \"edit_mode\": EditMode.INLINE,\n \"description\": (\"Indicate the data type of the output field (e.g., str, int, float, bool, dict).\"),\n \"options\": [\"str\", \"int\", \"float\", \"bool\", \"dict\"],\n \"default\": \"str\",\n },\n {\n \"name\": \"multiple\",\n \"display_name\": \"As List\",\n \"type\": \"boolean\",\n \"description\": \"Set to True if this output field should be a list of the specified type.\",\n \"default\": \"False\",\n \"edit_mode\": EditMode.INLINE,\n },\n ],\n ),\n *LCToolsAgentComponent._base_inputs,\n # removed memory inputs from agent component\n # *memory_inputs,\n BoolInput(\n name=\"add_current_date_tool\",\n display_name=\"Current Date\",\n advanced=True,\n info=\"If true, will add a tool to the agent that returns the current date.\",\n value=True,\n ),\n ]\n outputs = [\n Output(name=\"response\", display_name=\"Response\", method=\"message_response\"),\n Output(name=\"structured_response\", display_name=\"Structured Response\", method=\"json_response\", tool_mode=False),\n ]\n\n async def get_agent_requirements(self):\n \"\"\"Get the agent requirements for the agent.\"\"\"\n llm_model, display_name = await self.get_llm()\n if llm_model is None:\n msg = \"No language model selected. Please choose a model to proceed.\"\n raise ValueError(msg)\n self.model_name = get_model_name(llm_model, display_name=display_name)\n\n # Get memory data\n self.chat_history = await self.get_memory_data()\n if isinstance(self.chat_history, Message):\n self.chat_history = [self.chat_history]\n\n # Add current date tool if enabled\n if self.add_current_date_tool:\n if not isinstance(self.tools, list): # type: ignore[has-type]\n self.tools = []\n current_date_tool = (await CurrentDateComponent(**self.get_base_args()).to_toolkit()).pop(0)\n if not isinstance(current_date_tool, StructuredTool):\n msg = \"CurrentDateComponent must be converted to a StructuredTool\"\n raise TypeError(msg)\n self.tools.append(current_date_tool)\n return llm_model, self.chat_history, self.tools\n\n async def message_response(self) -> Message:\n try:\n llm_model, self.chat_history, self.tools = await self.get_agent_requirements()\n # Set up and run agent\n self.set(\n llm=llm_model,\n tools=self.tools or [],\n chat_history=self.chat_history,\n input_value=self.input_value,\n system_prompt=self.system_prompt,\n )\n agent = self.create_agent_runnable()\n result = await self.run_agent(agent)\n\n # Store result for potential JSON output\n self._agent_result = result\n\n except (ValueError, TypeError, KeyError) as e:\n await logger.aerror(f\"{type(e).__name__}: {e!s}\")\n raise\n except ExceptionWithMessageError as e:\n await logger.aerror(f\"ExceptionWithMessageError occurred: {e}\")\n raise\n # Avoid catching blind Exception; let truly unexpected exceptions propagate\n except Exception as e:\n await logger.aerror(f\"Unexpected error: {e!s}\")\n raise\n else:\n return result\n\n def _preprocess_schema(self, schema):\n \"\"\"Preprocess schema to ensure correct data types for build_model_from_schema.\"\"\"\n processed_schema = []\n for field in schema:\n processed_field = {\n \"name\": str(field.get(\"name\", \"field\")),\n \"type\": str(field.get(\"type\", \"str\")),\n \"description\": str(field.get(\"description\", \"\")),\n \"multiple\": field.get(\"multiple\", False),\n }\n # Ensure multiple is handled correctly\n if isinstance(processed_field[\"multiple\"], str):\n processed_field[\"multiple\"] = processed_field[\"multiple\"].lower() in [\"true\", \"1\", \"t\", \"y\", \"yes\"]\n processed_schema.append(processed_field)\n return processed_schema\n\n async def build_structured_output_base(self, content: str):\n \"\"\"Build structured output with optional BaseModel validation.\"\"\"\n json_pattern = r\"\\{.*\\}\"\n schema_error_msg = \"Try setting an output schema\"\n\n # Try to parse content as JSON first\n json_data = None\n try:\n json_data = json.loads(content)\n except json.JSONDecodeError:\n json_match = re.search(json_pattern, content, re.DOTALL)\n if json_match:\n try:\n json_data = json.loads(json_match.group())\n except json.JSONDecodeError:\n return {\"content\": content, \"error\": schema_error_msg}\n else:\n return {\"content\": content, \"error\": schema_error_msg}\n\n # If no output schema provided, return parsed JSON without validation\n if not hasattr(self, \"output_schema\") or not self.output_schema or len(self.output_schema) == 0:\n return json_data\n\n # Use BaseModel validation with schema\n try:\n processed_schema = self._preprocess_schema(self.output_schema)\n output_model = build_model_from_schema(processed_schema)\n\n # Validate against the schema\n if isinstance(json_data, list):\n # Multiple objects\n validated_objects = []\n for item in json_data:\n try:\n validated_obj = output_model.model_validate(item)\n validated_objects.append(validated_obj.model_dump())\n except ValidationError as e:\n await logger.aerror(f\"Validation error for item: {e}\")\n # Include invalid items with error info\n validated_objects.append({\"data\": item, \"validation_error\": str(e)})\n return validated_objects\n\n # Single object\n try:\n validated_obj = output_model.model_validate(json_data)\n return [validated_obj.model_dump()] # Return as list for consistency\n except ValidationError as e:\n await logger.aerror(f\"Validation error: {e}\")\n return [{\"data\": json_data, \"validation_error\": str(e)}]\n\n except (TypeError, ValueError) as e:\n await logger.aerror(f\"Error building structured output: {e}\")\n # Fallback to parsed JSON without validation\n return json_data\n\n async def json_response(self) -> Data:\n \"\"\"Convert agent response to structured JSON Data output with schema validation.\"\"\"\n # Always use structured chat agent for JSON response mode for better JSON formatting\n try:\n system_components = []\n\n # 1. Agent Instructions (system_prompt)\n agent_instructions = getattr(self, \"system_prompt\", \"\") or \"\"\n if agent_instructions:\n system_components.append(f\"{agent_instructions}\")\n\n # 2. Format Instructions\n format_instructions = getattr(self, \"format_instructions\", \"\") or \"\"\n if format_instructions:\n system_components.append(f\"Format instructions: {format_instructions}\")\n\n # 3. Schema Information from BaseModel\n if hasattr(self, \"output_schema\") and self.output_schema and len(self.output_schema) > 0:\n try:\n processed_schema = self._preprocess_schema(self.output_schema)\n output_model = build_model_from_schema(processed_schema)\n schema_dict = output_model.model_json_schema()\n schema_info = (\n \"You are given some text that may include format instructions, \"\n \"explanations, or other content alongside a JSON schema.\\n\\n\"\n \"Your task:\\n\"\n \"- Extract only the JSON schema.\\n\"\n \"- Return it as valid JSON.\\n\"\n \"- Do not include format instructions, explanations, or extra text.\\n\\n\"\n \"Input:\\n\"\n f\"{json.dumps(schema_dict, indent=2)}\\n\\n\"\n \"Output (only JSON schema):\"\n )\n system_components.append(schema_info)\n except (ValidationError, ValueError, TypeError, KeyError) as e:\n await logger.aerror(f\"Could not build schema for prompt: {e}\", exc_info=True)\n\n # Combine all components\n combined_instructions = \"\\n\\n\".join(system_components) if system_components else \"\"\n llm_model, self.chat_history, self.tools = await self.get_agent_requirements()\n self.set(\n llm=llm_model,\n tools=self.tools or [],\n chat_history=self.chat_history,\n input_value=self.input_value,\n system_prompt=combined_instructions,\n )\n\n # Create and run structured chat agent\n try:\n structured_agent = self.create_agent_runnable()\n except (NotImplementedError, ValueError, TypeError) as e:\n await logger.aerror(f\"Error with structured chat agent: {e}\")\n raise\n try:\n result = await self.run_agent(structured_agent)\n except (ExceptionWithMessageError, ValueError, TypeError, RuntimeError) as e:\n await logger.aerror(f\"Error with structured agent result: {e}\")\n raise\n # Extract content from structured agent result\n if hasattr(result, \"content\"):\n content = result.content\n elif hasattr(result, \"text\"):\n content = result.text\n else:\n content = str(result)\n\n except (ExceptionWithMessageError, ValueError, TypeError, NotImplementedError, AttributeError) as e:\n await logger.aerror(f\"Error with structured chat agent: {e}\")\n # Fallback to regular agent\n content_str = \"No content returned from agent\"\n return Data(data={\"content\": content_str, \"error\": str(e)})\n\n # Process with structured output validation\n try:\n structured_output = await self.build_structured_output_base(content)\n\n # Handle different output formats\n if isinstance(structured_output, list) and structured_output:\n if len(structured_output) == 1:\n return Data(data=structured_output[0])\n return Data(data={\"results\": structured_output})\n if isinstance(structured_output, dict):\n return Data(data=structured_output)\n return Data(data={\"content\": content})\n\n except (ValueError, TypeError) as e:\n await logger.aerror(f\"Error in structured output processing: {e}\")\n return Data(data={\"content\": content, \"error\": str(e)})\n\n async def get_memory_data(self):\n # TODO: This is a temporary fix to avoid message duplication. We should develop a function for this.\n messages = (\n await MemoryComponent(**self.get_base_args())\n .set(session_id=self.graph.session_id, order=\"Ascending\", n_messages=self.n_messages)\n .retrieve_messages()\n )\n return [\n message for message in messages if getattr(message, \"id\", None) != getattr(self.input_value, \"id\", None)\n ]\n\n async def get_llm(self):\n if not isinstance(self.agent_llm, str):\n return self.agent_llm, None\n\n try:\n provider_info = MODEL_PROVIDERS_DICT.get(self.agent_llm)\n if not provider_info:\n msg = f\"Invalid model provider: {self.agent_llm}\"\n raise ValueError(msg)\n\n component_class = provider_info.get(\"component_class\")\n display_name = component_class.display_name\n inputs = provider_info.get(\"inputs\")\n prefix = provider_info.get(\"prefix\", \"\")\n\n return self._build_llm_model(component_class, inputs, prefix), display_name\n\n except (AttributeError, ValueError, TypeError, RuntimeError) as e:\n await logger.aerror(f\"Error building {self.agent_llm} language model: {e!s}\")\n msg = f\"Failed to initialize language model: {e!s}\"\n raise ValueError(msg) from e\n\n def _build_llm_model(self, component, inputs, prefix=\"\"):\n model_kwargs = {}\n for input_ in inputs:\n if hasattr(self, f\"{prefix}{input_.name}\"):\n model_kwargs[input_.name] = getattr(self, f\"{prefix}{input_.name}\")\n return component.set(**model_kwargs).build_model()\n\n def set_component_params(self, component):\n provider_info = MODEL_PROVIDERS_DICT.get(self.agent_llm)\n if provider_info:\n inputs = provider_info.get(\"inputs\")\n prefix = provider_info.get(\"prefix\")\n # Filter out json_mode and only use attributes that exist on this component\n model_kwargs = {}\n for input_ in inputs:\n if hasattr(self, f\"{prefix}{input_.name}\"):\n model_kwargs[input_.name] = getattr(self, f\"{prefix}{input_.name}\")\n\n return component.set(**model_kwargs)\n return component\n\n def delete_fields(self, build_config: dotdict, fields: dict | list[str]) -> None:\n \"\"\"Delete specified fields from build_config.\"\"\"\n for field in fields:\n build_config.pop(field, None)\n\n def update_input_types(self, build_config: dotdict) -> dotdict:\n \"\"\"Update input types for all fields in build_config.\"\"\"\n for key, value in build_config.items():\n if isinstance(value, dict):\n if value.get(\"input_types\") is None:\n build_config[key][\"input_types\"] = []\n elif hasattr(value, \"input_types\") and value.input_types is None:\n value.input_types = []\n return build_config\n\n async def update_build_config(\n self, build_config: dotdict, field_value: str, field_name: str | None = None\n ) -> dotdict:\n # Iterate over all providers in the MODEL_PROVIDERS_DICT\n # Existing logic for updating build_config\n if field_name in (\"agent_llm\",):\n build_config[\"agent_llm\"][\"value\"] = field_value\n provider_info = MODEL_PROVIDERS_DICT.get(field_value)\n if provider_info:\n component_class = provider_info.get(\"component_class\")\n if component_class and hasattr(component_class, \"update_build_config\"):\n # Call the component class's update_build_config method\n build_config = await update_component_build_config(\n component_class, build_config, field_value, \"model_name\"\n )\n\n provider_configs: dict[str, tuple[dict, list[dict]]] = {\n provider: (\n MODEL_PROVIDERS_DICT[provider][\"fields\"],\n [\n MODEL_PROVIDERS_DICT[other_provider][\"fields\"]\n for other_provider in MODEL_PROVIDERS_DICT\n if other_provider != provider\n ],\n )\n for provider in MODEL_PROVIDERS_DICT\n }\n if field_value in provider_configs:\n fields_to_add, fields_to_delete = provider_configs[field_value]\n\n # Delete fields from other providers\n for fields in fields_to_delete:\n self.delete_fields(build_config, fields)\n\n # Add provider-specific fields\n if field_value == \"OpenAI\" and not any(field in build_config for field in fields_to_add):\n build_config.update(fields_to_add)\n else:\n build_config.update(fields_to_add)\n # Reset input types for agent_llm\n build_config[\"agent_llm\"][\"input_types\"] = []\n elif field_value == \"Custom\":\n # Delete all provider fields\n self.delete_fields(build_config, ALL_PROVIDER_FIELDS)\n # Update with custom component\n custom_component = DropdownInput(\n name=\"agent_llm\",\n display_name=\"Language Model\",\n options=[*sorted(MODEL_PROVIDERS), \"Custom\"],\n value=\"Custom\",\n real_time_refresh=True,\n input_types=[\"LanguageModel\"],\n options_metadata=[MODELS_METADATA[key] for key in sorted(MODELS_METADATA.keys())]\n + [{\"icon\": \"brain\"}],\n )\n build_config.update({\"agent_llm\": custom_component.to_dict()})\n # Update input types for all fields\n build_config = self.update_input_types(build_config)\n\n # Validate required keys\n default_keys = [\n \"code\",\n \"_type\",\n \"agent_llm\",\n \"tools\",\n \"input_value\",\n \"add_current_date_tool\",\n \"system_prompt\",\n \"agent_description\",\n \"max_iterations\",\n \"handle_parsing_errors\",\n \"verbose\",\n ]\n missing_keys = [key for key in default_keys if key not in build_config]\n if missing_keys:\n msg = f\"Missing required keys in build_config: {missing_keys}\"\n raise ValueError(msg)\n if (\n isinstance(self.agent_llm, str)\n and self.agent_llm in MODEL_PROVIDERS_DICT\n and field_name in MODEL_DYNAMIC_UPDATE_FIELDS\n ):\n provider_info = MODEL_PROVIDERS_DICT.get(self.agent_llm)\n if provider_info:\n component_class = provider_info.get(\"component_class\")\n component_class = self.set_component_params(component_class)\n prefix = provider_info.get(\"prefix\")\n if component_class and hasattr(component_class, \"update_build_config\"):\n # Call each component class's update_build_config method\n # remove the prefix from the field_name\n if isinstance(field_name, str) and isinstance(prefix, str):\n field_name = field_name.replace(prefix, \"\")\n build_config = await update_component_build_config(\n component_class, build_config, field_value, \"model_name\"\n )\n return dotdict({k: v.to_dict() if hasattr(v, \"to_dict\") else v for k, v in build_config.items()})\n\n async def _get_tools(self) -> list[Tool]:\n component_toolkit = _get_component_toolkit()\n tools_names = self._build_tools_names()\n agent_description = self.get_tool_description()\n # TODO: Agent Description Depreciated Feature to be removed\n description = f\"{agent_description}{tools_names}\"\n tools = component_toolkit(component=self).get_tools(\n tool_name=\"Call_Agent\", tool_description=description, callbacks=self.get_langchain_callbacks()\n )\n if hasattr(self, \"tools_metadata\"):\n tools = component_toolkit(component=self, metadata=self.tools_metadata).update_tools_metadata(tools=tools)\n return tools\n"
+ "value": "import json\nimport re\n\nfrom langchain_core.tools import StructuredTool\nfrom pydantic import ValidationError\n\nfrom langflow.base.agents.agent import LCToolsAgentComponent\nfrom langflow.base.agents.events import ExceptionWithMessageError\nfrom langflow.base.models.model_input_constants import (\n ALL_PROVIDER_FIELDS,\n MODEL_DYNAMIC_UPDATE_FIELDS,\n MODEL_PROVIDERS,\n MODEL_PROVIDERS_DICT,\n MODELS_METADATA,\n)\nfrom langflow.base.models.model_utils import get_model_name\nfrom langflow.components.helpers.current_date import CurrentDateComponent\nfrom langflow.components.helpers.memory import MemoryComponent\nfrom langflow.components.langchain_utilities.tool_calling import ToolCallingAgentComponent\nfrom langflow.custom.custom_component.component import _get_component_toolkit\nfrom langflow.custom.utils import update_component_build_config\nfrom langflow.field_typing import Tool\nfrom langflow.helpers.base_model import build_model_from_schema\nfrom langflow.io import BoolInput, DropdownInput, IntInput, MultilineInput, Output, TableInput\nfrom langflow.logging import logger\nfrom langflow.schema.data import Data\nfrom langflow.schema.dotdict import dotdict\nfrom langflow.schema.message import Message\nfrom langflow.schema.table import EditMode\n\n\ndef set_advanced_true(component_input):\n component_input.advanced = True\n return component_input\n\n\nMODEL_PROVIDERS_LIST = [\"Anthropic\", \"Google Generative AI\", \"Groq\", \"OpenAI\"]\n\n\nclass AgentComponent(ToolCallingAgentComponent):\n display_name: str = \"Agent\"\n description: str = \"Define the agent's instructions, then enter a task to complete using tools.\"\n documentation: str = \"https://docs.langflow.org/agents\"\n icon = \"bot\"\n beta = False\n name = \"Agent\"\n\n memory_inputs = [set_advanced_true(component_input) for component_input in MemoryComponent().inputs]\n\n # Filter out json_mode from OpenAI inputs since we handle structured output differently\n openai_inputs_filtered = [\n input_field\n for input_field in MODEL_PROVIDERS_DICT[\"OpenAI\"][\"inputs\"]\n if not (hasattr(input_field, \"name\") and input_field.name == \"json_mode\")\n ]\n\n inputs = [\n DropdownInput(\n name=\"agent_llm\",\n display_name=\"Model Provider\",\n info=\"The provider of the language model that the agent will use to generate responses.\",\n options=[*MODEL_PROVIDERS_LIST, \"Custom\"],\n value=\"OpenAI\",\n real_time_refresh=True,\n input_types=[],\n options_metadata=[MODELS_METADATA[key] for key in MODEL_PROVIDERS_LIST] + [{\"icon\": \"brain\"}],\n ),\n *openai_inputs_filtered,\n MultilineInput(\n name=\"system_prompt\",\n display_name=\"Agent Instructions\",\n info=\"System Prompt: Initial instructions and context provided to guide the agent's behavior.\",\n value=\"You are a helpful assistant that can use tools to answer questions and perform tasks.\",\n advanced=False,\n ),\n IntInput(\n name=\"n_messages\",\n display_name=\"Number of Chat History Messages\",\n value=100,\n info=\"Number of chat history messages to retrieve.\",\n advanced=True,\n show=True,\n ),\n MultilineInput(\n name=\"format_instructions\",\n display_name=\"Output Format Instructions\",\n info=\"Generic Template for structured output formatting. Valid only with Structured response.\",\n value=(\n \"You are an AI that extracts structured JSON objects from unstructured text. \"\n \"Use a predefined schema with expected types (str, int, float, bool, dict). \"\n \"Extract ALL relevant instances that match the schema - if multiple patterns exist, capture them all. \"\n \"Fill missing or ambiguous values with defaults: null for missing values. \"\n \"Remove exact duplicates but keep variations that have different field values. \"\n \"Always return valid JSON in the expected format, never throw errors. \"\n \"If multiple objects can be extracted, return them all in the structured format.\"\n ),\n advanced=True,\n ),\n TableInput(\n name=\"output_schema\",\n display_name=\"Output Schema\",\n info=(\n \"Schema Validation: Define the structure and data types for structured output. \"\n \"No validation if no output schema.\"\n ),\n advanced=True,\n required=False,\n value=[],\n table_schema=[\n {\n \"name\": \"name\",\n \"display_name\": \"Name\",\n \"type\": \"str\",\n \"description\": \"Specify the name of the output field.\",\n \"default\": \"field\",\n \"edit_mode\": EditMode.INLINE,\n },\n {\n \"name\": \"description\",\n \"display_name\": \"Description\",\n \"type\": \"str\",\n \"description\": \"Describe the purpose of the output field.\",\n \"default\": \"description of field\",\n \"edit_mode\": EditMode.POPOVER,\n },\n {\n \"name\": \"type\",\n \"display_name\": \"Type\",\n \"type\": \"str\",\n \"edit_mode\": EditMode.INLINE,\n \"description\": (\"Indicate the data type of the output field (e.g., str, int, float, bool, dict).\"),\n \"options\": [\"str\", \"int\", \"float\", \"bool\", \"dict\"],\n \"default\": \"str\",\n },\n {\n \"name\": \"multiple\",\n \"display_name\": \"As List\",\n \"type\": \"boolean\",\n \"description\": \"Set to True if this output field should be a list of the specified type.\",\n \"default\": \"False\",\n \"edit_mode\": EditMode.INLINE,\n },\n ],\n ),\n *LCToolsAgentComponent._base_inputs,\n # removed memory inputs from agent component\n # *memory_inputs,\n BoolInput(\n name=\"add_current_date_tool\",\n display_name=\"Current Date\",\n advanced=True,\n info=\"If true, will add a tool to the agent that returns the current date.\",\n value=True,\n ),\n ]\n outputs = [\n Output(name=\"response\", display_name=\"Response\", method=\"message_response\"),\n Output(name=\"structured_response\", display_name=\"Structured Response\", method=\"json_response\", tool_mode=False),\n ]\n\n async def get_agent_requirements(self):\n \"\"\"Get the agent requirements for the agent.\"\"\"\n llm_model, display_name = await self.get_llm()\n if llm_model is None:\n msg = \"No language model selected. Please choose a model to proceed.\"\n raise ValueError(msg)\n self.model_name = get_model_name(llm_model, display_name=display_name)\n\n # Get memory data\n self.chat_history = await self.get_memory_data()\n if isinstance(self.chat_history, Message):\n self.chat_history = [self.chat_history]\n\n # Add current date tool if enabled\n if self.add_current_date_tool:\n if not isinstance(self.tools, list): # type: ignore[has-type]\n self.tools = []\n current_date_tool = (await CurrentDateComponent(**self.get_base_args()).to_toolkit()).pop(0)\n if not isinstance(current_date_tool, StructuredTool):\n msg = \"CurrentDateComponent must be converted to a StructuredTool\"\n raise TypeError(msg)\n self.tools.append(current_date_tool)\n return llm_model, self.chat_history, self.tools\n\n async def message_response(self) -> Message:\n try:\n llm_model, self.chat_history, self.tools = await self.get_agent_requirements()\n # Set up and run agent\n self.set(\n llm=llm_model,\n tools=self.tools or [],\n chat_history=self.chat_history,\n input_value=self.input_value,\n system_prompt=self.system_prompt,\n )\n agent = self.create_agent_runnable()\n result = await self.run_agent(agent)\n\n # Store result for potential JSON output\n self._agent_result = result\n\n except (ValueError, TypeError, KeyError) as e:\n await logger.aerror(f\"{type(e).__name__}: {e!s}\")\n raise\n except ExceptionWithMessageError as e:\n await logger.aerror(f\"ExceptionWithMessageError occurred: {e}\")\n raise\n # Avoid catching blind Exception; let truly unexpected exceptions propagate\n except Exception as e:\n await logger.aerror(f\"Unexpected error: {e!s}\")\n raise\n else:\n return result\n\n def _preprocess_schema(self, schema):\n \"\"\"Preprocess schema to ensure correct data types for build_model_from_schema.\"\"\"\n processed_schema = []\n for field in schema:\n processed_field = {\n \"name\": str(field.get(\"name\", \"field\")),\n \"type\": str(field.get(\"type\", \"str\")),\n \"description\": str(field.get(\"description\", \"\")),\n \"multiple\": field.get(\"multiple\", False),\n }\n # Ensure multiple is handled correctly\n if isinstance(processed_field[\"multiple\"], str):\n processed_field[\"multiple\"] = processed_field[\"multiple\"].lower() in [\"true\", \"1\", \"t\", \"y\", \"yes\"]\n processed_schema.append(processed_field)\n return processed_schema\n\n async def build_structured_output_base(self, content: str):\n \"\"\"Build structured output with optional BaseModel validation.\"\"\"\n json_pattern = r\"\\{.*\\}\"\n schema_error_msg = \"Try setting an output schema\"\n\n # Try to parse content as JSON first\n json_data = None\n try:\n json_data = json.loads(content)\n except json.JSONDecodeError:\n json_match = re.search(json_pattern, content, re.DOTALL)\n if json_match:\n try:\n json_data = json.loads(json_match.group())\n except json.JSONDecodeError:\n return {\"content\": content, \"error\": schema_error_msg}\n else:\n return {\"content\": content, \"error\": schema_error_msg}\n\n # If no output schema provided, return parsed JSON without validation\n if not hasattr(self, \"output_schema\") or not self.output_schema or len(self.output_schema) == 0:\n return json_data\n\n # Use BaseModel validation with schema\n try:\n processed_schema = self._preprocess_schema(self.output_schema)\n output_model = build_model_from_schema(processed_schema)\n\n # Validate against the schema\n if isinstance(json_data, list):\n # Multiple objects\n validated_objects = []\n for item in json_data:\n try:\n validated_obj = output_model.model_validate(item)\n validated_objects.append(validated_obj.model_dump())\n except ValidationError as e:\n await logger.aerror(f\"Validation error for item: {e}\")\n # Include invalid items with error info\n validated_objects.append({\"data\": item, \"validation_error\": str(e)})\n return validated_objects\n\n # Single object\n try:\n validated_obj = output_model.model_validate(json_data)\n return [validated_obj.model_dump()] # Return as list for consistency\n except ValidationError as e:\n await logger.aerror(f\"Validation error: {e}\")\n return [{\"data\": json_data, \"validation_error\": str(e)}]\n\n except (TypeError, ValueError) as e:\n await logger.aerror(f\"Error building structured output: {e}\")\n # Fallback to parsed JSON without validation\n return json_data\n\n async def json_response(self) -> Data:\n \"\"\"Convert agent response to structured JSON Data output with schema validation.\"\"\"\n # Always use structured chat agent for JSON response mode for better JSON formatting\n try:\n system_components = []\n\n # 1. Agent Instructions (system_prompt)\n agent_instructions = getattr(self, \"system_prompt\", \"\") or \"\"\n if agent_instructions:\n system_components.append(f\"{agent_instructions}\")\n\n # 2. Format Instructions\n format_instructions = getattr(self, \"format_instructions\", \"\") or \"\"\n if format_instructions:\n system_components.append(f\"Format instructions: {format_instructions}\")\n\n # 3. Schema Information from BaseModel\n if hasattr(self, \"output_schema\") and self.output_schema and len(self.output_schema) > 0:\n try:\n processed_schema = self._preprocess_schema(self.output_schema)\n output_model = build_model_from_schema(processed_schema)\n schema_dict = output_model.model_json_schema()\n schema_info = (\n \"You are given some text that may include format instructions, \"\n \"explanations, or other content alongside a JSON schema.\\n\\n\"\n \"Your task:\\n\"\n \"- Extract only the JSON schema.\\n\"\n \"- Return it as valid JSON.\\n\"\n \"- Do not include format instructions, explanations, or extra text.\\n\\n\"\n \"Input:\\n\"\n f\"{json.dumps(schema_dict, indent=2)}\\n\\n\"\n \"Output (only JSON schema):\"\n )\n system_components.append(schema_info)\n except (ValidationError, ValueError, TypeError, KeyError) as e:\n await logger.aerror(f\"Could not build schema for prompt: {e}\", exc_info=True)\n\n # Combine all components\n combined_instructions = \"\\n\\n\".join(system_components) if system_components else \"\"\n llm_model, self.chat_history, self.tools = await self.get_agent_requirements()\n self.set(\n llm=llm_model,\n tools=self.tools or [],\n chat_history=self.chat_history,\n input_value=self.input_value,\n system_prompt=combined_instructions,\n )\n\n # Create and run structured chat agent\n try:\n structured_agent = self.create_agent_runnable()\n except (NotImplementedError, ValueError, TypeError) as e:\n await logger.aerror(f\"Error with structured chat agent: {e}\")\n raise\n try:\n result = await self.run_agent(structured_agent)\n except (ExceptionWithMessageError, ValueError, TypeError, RuntimeError) as e:\n await logger.aerror(f\"Error with structured agent result: {e}\")\n raise\n # Extract content from structured agent result\n if hasattr(result, \"content\"):\n content = result.content\n elif hasattr(result, \"text\"):\n content = result.text\n else:\n content = str(result)\n\n except (ExceptionWithMessageError, ValueError, TypeError, NotImplementedError, AttributeError) as e:\n await logger.aerror(f\"Error with structured chat agent: {e}\")\n # Fallback to regular agent\n content_str = \"No content returned from agent\"\n return Data(data={\"content\": content_str, \"error\": str(e)})\n\n # Process with structured output validation\n try:\n structured_output = await self.build_structured_output_base(content)\n\n # Handle different output formats\n if isinstance(structured_output, list) and structured_output:\n if len(structured_output) == 1:\n return Data(data=structured_output[0])\n return Data(data={\"results\": structured_output})\n if isinstance(structured_output, dict):\n return Data(data=structured_output)\n return Data(data={\"content\": content})\n\n except (ValueError, TypeError) as e:\n await logger.aerror(f\"Error in structured output processing: {e}\")\n return Data(data={\"content\": content, \"error\": str(e)})\n\n async def get_memory_data(self):\n # TODO: This is a temporary fix to avoid message duplication. We should develop a function for this.\n messages = (\n await MemoryComponent(**self.get_base_args())\n .set(session_id=self.graph.session_id, order=\"Ascending\", n_messages=self.n_messages)\n .retrieve_messages()\n )\n return [\n message for message in messages if getattr(message, \"id\", None) != getattr(self.input_value, \"id\", None)\n ]\n\n async def get_llm(self):\n if not isinstance(self.agent_llm, str):\n return self.agent_llm, None\n\n try:\n provider_info = MODEL_PROVIDERS_DICT.get(self.agent_llm)\n if not provider_info:\n msg = f\"Invalid model provider: {self.agent_llm}\"\n raise ValueError(msg)\n\n component_class = provider_info.get(\"component_class\")\n display_name = component_class.display_name\n inputs = provider_info.get(\"inputs\")\n prefix = provider_info.get(\"prefix\", \"\")\n\n return self._build_llm_model(component_class, inputs, prefix), display_name\n\n except (AttributeError, ValueError, TypeError, RuntimeError) as e:\n await logger.aerror(f\"Error building {self.agent_llm} language model: {e!s}\")\n msg = f\"Failed to initialize language model: {e!s}\"\n raise ValueError(msg) from e\n\n def _build_llm_model(self, component, inputs, prefix=\"\"):\n model_kwargs = {}\n for input_ in inputs:\n if hasattr(self, f\"{prefix}{input_.name}\"):\n model_kwargs[input_.name] = getattr(self, f\"{prefix}{input_.name}\")\n return component.set(**model_kwargs).build_model()\n\n def set_component_params(self, component):\n provider_info = MODEL_PROVIDERS_DICT.get(self.agent_llm)\n if provider_info:\n inputs = provider_info.get(\"inputs\")\n prefix = provider_info.get(\"prefix\")\n # Filter out json_mode and only use attributes that exist on this component\n model_kwargs = {}\n for input_ in inputs:\n if hasattr(self, f\"{prefix}{input_.name}\"):\n model_kwargs[input_.name] = getattr(self, f\"{prefix}{input_.name}\")\n\n return component.set(**model_kwargs)\n return component\n\n def delete_fields(self, build_config: dotdict, fields: dict | list[str]) -> None:\n \"\"\"Delete specified fields from build_config.\"\"\"\n for field in fields:\n build_config.pop(field, None)\n\n def update_input_types(self, build_config: dotdict) -> dotdict:\n \"\"\"Update input types for all fields in build_config.\"\"\"\n for key, value in build_config.items():\n if isinstance(value, dict):\n if value.get(\"input_types\") is None:\n build_config[key][\"input_types\"] = []\n elif hasattr(value, \"input_types\") and value.input_types is None:\n value.input_types = []\n return build_config\n\n async def update_build_config(\n self, build_config: dotdict, field_value: str, field_name: str | None = None\n ) -> dotdict:\n # Iterate over all providers in the MODEL_PROVIDERS_DICT\n # Existing logic for updating build_config\n if field_name in (\"agent_llm\",):\n build_config[\"agent_llm\"][\"value\"] = field_value\n provider_info = MODEL_PROVIDERS_DICT.get(field_value)\n if provider_info:\n component_class = provider_info.get(\"component_class\")\n if component_class and hasattr(component_class, \"update_build_config\"):\n # Call the component class's update_build_config method\n build_config = await update_component_build_config(\n component_class, build_config, field_value, \"model_name\"\n )\n\n provider_configs: dict[str, tuple[dict, list[dict]]] = {\n provider: (\n MODEL_PROVIDERS_DICT[provider][\"fields\"],\n [\n MODEL_PROVIDERS_DICT[other_provider][\"fields\"]\n for other_provider in MODEL_PROVIDERS_DICT\n if other_provider != provider\n ],\n )\n for provider in MODEL_PROVIDERS_DICT\n }\n if field_value in provider_configs:\n fields_to_add, fields_to_delete = provider_configs[field_value]\n\n # Delete fields from other providers\n for fields in fields_to_delete:\n self.delete_fields(build_config, fields)\n\n # Add provider-specific fields\n if field_value == \"OpenAI\" and not any(field in build_config for field in fields_to_add):\n build_config.update(fields_to_add)\n else:\n build_config.update(fields_to_add)\n # Reset input types for agent_llm\n build_config[\"agent_llm\"][\"input_types\"] = []\n build_config[\"agent_llm\"][\"display_name\"] = \"Model Provider\"\n elif field_value == \"Custom\":\n # Delete all provider fields\n self.delete_fields(build_config, ALL_PROVIDER_FIELDS)\n # Update with custom component\n custom_component = DropdownInput(\n name=\"agent_llm\",\n display_name=\"Language Model\",\n options=[*sorted(MODEL_PROVIDERS), \"Custom\"],\n value=\"Custom\",\n real_time_refresh=True,\n input_types=[\"LanguageModel\"],\n options_metadata=[MODELS_METADATA[key] for key in sorted(MODELS_METADATA.keys())]\n + [{\"icon\": \"brain\"}],\n )\n build_config.update({\"agent_llm\": custom_component.to_dict()})\n # Update input types for all fields\n build_config = self.update_input_types(build_config)\n\n # Validate required keys\n default_keys = [\n \"code\",\n \"_type\",\n \"agent_llm\",\n \"tools\",\n \"input_value\",\n \"add_current_date_tool\",\n \"system_prompt\",\n \"agent_description\",\n \"max_iterations\",\n \"handle_parsing_errors\",\n \"verbose\",\n ]\n missing_keys = [key for key in default_keys if key not in build_config]\n if missing_keys:\n msg = f\"Missing required keys in build_config: {missing_keys}\"\n raise ValueError(msg)\n if (\n isinstance(self.agent_llm, str)\n and self.agent_llm in MODEL_PROVIDERS_DICT\n and field_name in MODEL_DYNAMIC_UPDATE_FIELDS\n ):\n provider_info = MODEL_PROVIDERS_DICT.get(self.agent_llm)\n if provider_info:\n component_class = provider_info.get(\"component_class\")\n component_class = self.set_component_params(component_class)\n prefix = provider_info.get(\"prefix\")\n if component_class and hasattr(component_class, \"update_build_config\"):\n # Call each component class's update_build_config method\n # remove the prefix from the field_name\n if isinstance(field_name, str) and isinstance(prefix, str):\n field_name = field_name.replace(prefix, \"\")\n build_config = await update_component_build_config(\n component_class, build_config, field_value, \"model_name\"\n )\n return dotdict({k: v.to_dict() if hasattr(v, \"to_dict\") else v for k, v in build_config.items()})\n\n async def _get_tools(self) -> list[Tool]:\n component_toolkit = _get_component_toolkit()\n tools_names = self._build_tools_names()\n agent_description = self.get_tool_description()\n # TODO: Agent Description Depreciated Feature to be removed\n description = f\"{agent_description}{tools_names}\"\n tools = component_toolkit(component=self).get_tools(\n tool_name=\"Call_Agent\", tool_description=description, callbacks=self.get_langchain_callbacks()\n )\n if hasattr(self, \"tools_metadata\"):\n tools = component_toolkit(component=self, metadata=self.tools_metadata).update_tools_metadata(tools=tools)\n return tools\n"
},
"handle_parsing_errors": {
"_input_type": "BoolInput",
@@ -2436,7 +2436,7 @@
"show": true,
"title_case": false,
"type": "code",
- "value": "import json\nimport re\n\nfrom langchain_core.tools import StructuredTool\nfrom pydantic import ValidationError\n\nfrom langflow.base.agents.agent import LCToolsAgentComponent\nfrom langflow.base.agents.events import ExceptionWithMessageError\nfrom langflow.base.models.model_input_constants import (\n ALL_PROVIDER_FIELDS,\n MODEL_DYNAMIC_UPDATE_FIELDS,\n MODEL_PROVIDERS,\n MODEL_PROVIDERS_DICT,\n MODELS_METADATA,\n)\nfrom langflow.base.models.model_utils import get_model_name\nfrom langflow.components.helpers.current_date import CurrentDateComponent\nfrom langflow.components.helpers.memory import MemoryComponent\nfrom langflow.components.langchain_utilities.tool_calling import ToolCallingAgentComponent\nfrom langflow.custom.custom_component.component import _get_component_toolkit\nfrom langflow.custom.utils import update_component_build_config\nfrom langflow.field_typing import Tool\nfrom langflow.helpers.base_model import build_model_from_schema\nfrom langflow.io import BoolInput, DropdownInput, IntInput, MultilineInput, Output, TableInput\nfrom langflow.logging import logger\nfrom langflow.schema.data import Data\nfrom langflow.schema.dotdict import dotdict\nfrom langflow.schema.message import Message\nfrom langflow.schema.table import EditMode\n\n\ndef set_advanced_true(component_input):\n component_input.advanced = True\n return component_input\n\n\nMODEL_PROVIDERS_LIST = [\"Anthropic\", \"Google Generative AI\", \"Groq\", \"OpenAI\"]\n\n\nclass AgentComponent(ToolCallingAgentComponent):\n display_name: str = \"Agent\"\n description: str = \"Define the agent's instructions, then enter a task to complete using tools.\"\n documentation: str = \"https://docs.langflow.org/agents\"\n icon = \"bot\"\n beta = False\n name = \"Agent\"\n\n memory_inputs = [set_advanced_true(component_input) for component_input in MemoryComponent().inputs]\n\n # Filter out json_mode from OpenAI inputs since we handle structured output differently\n openai_inputs_filtered = [\n input_field\n for input_field in MODEL_PROVIDERS_DICT[\"OpenAI\"][\"inputs\"]\n if not (hasattr(input_field, \"name\") and input_field.name == \"json_mode\")\n ]\n\n inputs = [\n DropdownInput(\n name=\"agent_llm\",\n display_name=\"Model Provider\",\n info=\"The provider of the language model that the agent will use to generate responses.\",\n options=[*MODEL_PROVIDERS_LIST, \"Custom\"],\n value=\"OpenAI\",\n real_time_refresh=True,\n input_types=[],\n options_metadata=[MODELS_METADATA[key] for key in MODEL_PROVIDERS_LIST] + [{\"icon\": \"brain\"}],\n ),\n *openai_inputs_filtered,\n MultilineInput(\n name=\"system_prompt\",\n display_name=\"Agent Instructions\",\n info=\"System Prompt: Initial instructions and context provided to guide the agent's behavior.\",\n value=\"You are a helpful assistant that can use tools to answer questions and perform tasks.\",\n advanced=False,\n ),\n IntInput(\n name=\"n_messages\",\n display_name=\"Number of Chat History Messages\",\n value=100,\n info=\"Number of chat history messages to retrieve.\",\n advanced=True,\n show=True,\n ),\n MultilineInput(\n name=\"format_instructions\",\n display_name=\"Output Format Instructions\",\n info=\"Generic Template for structured output formatting. Valid only with Structured response.\",\n value=(\n \"You are an AI that extracts structured JSON objects from unstructured text. \"\n \"Use a predefined schema with expected types (str, int, float, bool, dict). \"\n \"Extract ALL relevant instances that match the schema - if multiple patterns exist, capture them all. \"\n \"Fill missing or ambiguous values with defaults: null for missing values. \"\n \"Remove exact duplicates but keep variations that have different field values. \"\n \"Always return valid JSON in the expected format, never throw errors. \"\n \"If multiple objects can be extracted, return them all in the structured format.\"\n ),\n advanced=True,\n ),\n TableInput(\n name=\"output_schema\",\n display_name=\"Output Schema\",\n info=(\n \"Schema Validation: Define the structure and data types for structured output. \"\n \"No validation if no output schema.\"\n ),\n advanced=True,\n required=False,\n value=[],\n table_schema=[\n {\n \"name\": \"name\",\n \"display_name\": \"Name\",\n \"type\": \"str\",\n \"description\": \"Specify the name of the output field.\",\n \"default\": \"field\",\n \"edit_mode\": EditMode.INLINE,\n },\n {\n \"name\": \"description\",\n \"display_name\": \"Description\",\n \"type\": \"str\",\n \"description\": \"Describe the purpose of the output field.\",\n \"default\": \"description of field\",\n \"edit_mode\": EditMode.POPOVER,\n },\n {\n \"name\": \"type\",\n \"display_name\": \"Type\",\n \"type\": \"str\",\n \"edit_mode\": EditMode.INLINE,\n \"description\": (\"Indicate the data type of the output field (e.g., str, int, float, bool, dict).\"),\n \"options\": [\"str\", \"int\", \"float\", \"bool\", \"dict\"],\n \"default\": \"str\",\n },\n {\n \"name\": \"multiple\",\n \"display_name\": \"As List\",\n \"type\": \"boolean\",\n \"description\": \"Set to True if this output field should be a list of the specified type.\",\n \"default\": \"False\",\n \"edit_mode\": EditMode.INLINE,\n },\n ],\n ),\n *LCToolsAgentComponent._base_inputs,\n # removed memory inputs from agent component\n # *memory_inputs,\n BoolInput(\n name=\"add_current_date_tool\",\n display_name=\"Current Date\",\n advanced=True,\n info=\"If true, will add a tool to the agent that returns the current date.\",\n value=True,\n ),\n ]\n outputs = [\n Output(name=\"response\", display_name=\"Response\", method=\"message_response\"),\n Output(name=\"structured_response\", display_name=\"Structured Response\", method=\"json_response\", tool_mode=False),\n ]\n\n async def get_agent_requirements(self):\n \"\"\"Get the agent requirements for the agent.\"\"\"\n llm_model, display_name = await self.get_llm()\n if llm_model is None:\n msg = \"No language model selected. Please choose a model to proceed.\"\n raise ValueError(msg)\n self.model_name = get_model_name(llm_model, display_name=display_name)\n\n # Get memory data\n self.chat_history = await self.get_memory_data()\n if isinstance(self.chat_history, Message):\n self.chat_history = [self.chat_history]\n\n # Add current date tool if enabled\n if self.add_current_date_tool:\n if not isinstance(self.tools, list): # type: ignore[has-type]\n self.tools = []\n current_date_tool = (await CurrentDateComponent(**self.get_base_args()).to_toolkit()).pop(0)\n if not isinstance(current_date_tool, StructuredTool):\n msg = \"CurrentDateComponent must be converted to a StructuredTool\"\n raise TypeError(msg)\n self.tools.append(current_date_tool)\n return llm_model, self.chat_history, self.tools\n\n async def message_response(self) -> Message:\n try:\n llm_model, self.chat_history, self.tools = await self.get_agent_requirements()\n # Set up and run agent\n self.set(\n llm=llm_model,\n tools=self.tools or [],\n chat_history=self.chat_history,\n input_value=self.input_value,\n system_prompt=self.system_prompt,\n )\n agent = self.create_agent_runnable()\n result = await self.run_agent(agent)\n\n # Store result for potential JSON output\n self._agent_result = result\n\n except (ValueError, TypeError, KeyError) as e:\n await logger.aerror(f\"{type(e).__name__}: {e!s}\")\n raise\n except ExceptionWithMessageError as e:\n await logger.aerror(f\"ExceptionWithMessageError occurred: {e}\")\n raise\n # Avoid catching blind Exception; let truly unexpected exceptions propagate\n except Exception as e:\n await logger.aerror(f\"Unexpected error: {e!s}\")\n raise\n else:\n return result\n\n def _preprocess_schema(self, schema):\n \"\"\"Preprocess schema to ensure correct data types for build_model_from_schema.\"\"\"\n processed_schema = []\n for field in schema:\n processed_field = {\n \"name\": str(field.get(\"name\", \"field\")),\n \"type\": str(field.get(\"type\", \"str\")),\n \"description\": str(field.get(\"description\", \"\")),\n \"multiple\": field.get(\"multiple\", False),\n }\n # Ensure multiple is handled correctly\n if isinstance(processed_field[\"multiple\"], str):\n processed_field[\"multiple\"] = processed_field[\"multiple\"].lower() in [\"true\", \"1\", \"t\", \"y\", \"yes\"]\n processed_schema.append(processed_field)\n return processed_schema\n\n async def build_structured_output_base(self, content: str):\n \"\"\"Build structured output with optional BaseModel validation.\"\"\"\n json_pattern = r\"\\{.*\\}\"\n schema_error_msg = \"Try setting an output schema\"\n\n # Try to parse content as JSON first\n json_data = None\n try:\n json_data = json.loads(content)\n except json.JSONDecodeError:\n json_match = re.search(json_pattern, content, re.DOTALL)\n if json_match:\n try:\n json_data = json.loads(json_match.group())\n except json.JSONDecodeError:\n return {\"content\": content, \"error\": schema_error_msg}\n else:\n return {\"content\": content, \"error\": schema_error_msg}\n\n # If no output schema provided, return parsed JSON without validation\n if not hasattr(self, \"output_schema\") or not self.output_schema or len(self.output_schema) == 0:\n return json_data\n\n # Use BaseModel validation with schema\n try:\n processed_schema = self._preprocess_schema(self.output_schema)\n output_model = build_model_from_schema(processed_schema)\n\n # Validate against the schema\n if isinstance(json_data, list):\n # Multiple objects\n validated_objects = []\n for item in json_data:\n try:\n validated_obj = output_model.model_validate(item)\n validated_objects.append(validated_obj.model_dump())\n except ValidationError as e:\n await logger.aerror(f\"Validation error for item: {e}\")\n # Include invalid items with error info\n validated_objects.append({\"data\": item, \"validation_error\": str(e)})\n return validated_objects\n\n # Single object\n try:\n validated_obj = output_model.model_validate(json_data)\n return [validated_obj.model_dump()] # Return as list for consistency\n except ValidationError as e:\n await logger.aerror(f\"Validation error: {e}\")\n return [{\"data\": json_data, \"validation_error\": str(e)}]\n\n except (TypeError, ValueError) as e:\n await logger.aerror(f\"Error building structured output: {e}\")\n # Fallback to parsed JSON without validation\n return json_data\n\n async def json_response(self) -> Data:\n \"\"\"Convert agent response to structured JSON Data output with schema validation.\"\"\"\n # Always use structured chat agent for JSON response mode for better JSON formatting\n try:\n system_components = []\n\n # 1. Agent Instructions (system_prompt)\n agent_instructions = getattr(self, \"system_prompt\", \"\") or \"\"\n if agent_instructions:\n system_components.append(f\"{agent_instructions}\")\n\n # 2. Format Instructions\n format_instructions = getattr(self, \"format_instructions\", \"\") or \"\"\n if format_instructions:\n system_components.append(f\"Format instructions: {format_instructions}\")\n\n # 3. Schema Information from BaseModel\n if hasattr(self, \"output_schema\") and self.output_schema and len(self.output_schema) > 0:\n try:\n processed_schema = self._preprocess_schema(self.output_schema)\n output_model = build_model_from_schema(processed_schema)\n schema_dict = output_model.model_json_schema()\n schema_info = (\n \"You are given some text that may include format instructions, \"\n \"explanations, or other content alongside a JSON schema.\\n\\n\"\n \"Your task:\\n\"\n \"- Extract only the JSON schema.\\n\"\n \"- Return it as valid JSON.\\n\"\n \"- Do not include format instructions, explanations, or extra text.\\n\\n\"\n \"Input:\\n\"\n f\"{json.dumps(schema_dict, indent=2)}\\n\\n\"\n \"Output (only JSON schema):\"\n )\n system_components.append(schema_info)\n except (ValidationError, ValueError, TypeError, KeyError) as e:\n await logger.aerror(f\"Could not build schema for prompt: {e}\", exc_info=True)\n\n # Combine all components\n combined_instructions = \"\\n\\n\".join(system_components) if system_components else \"\"\n llm_model, self.chat_history, self.tools = await self.get_agent_requirements()\n self.set(\n llm=llm_model,\n tools=self.tools or [],\n chat_history=self.chat_history,\n input_value=self.input_value,\n system_prompt=combined_instructions,\n )\n\n # Create and run structured chat agent\n try:\n structured_agent = self.create_agent_runnable()\n except (NotImplementedError, ValueError, TypeError) as e:\n await logger.aerror(f\"Error with structured chat agent: {e}\")\n raise\n try:\n result = await self.run_agent(structured_agent)\n except (ExceptionWithMessageError, ValueError, TypeError, RuntimeError) as e:\n await logger.aerror(f\"Error with structured agent result: {e}\")\n raise\n # Extract content from structured agent result\n if hasattr(result, \"content\"):\n content = result.content\n elif hasattr(result, \"text\"):\n content = result.text\n else:\n content = str(result)\n\n except (ExceptionWithMessageError, ValueError, TypeError, NotImplementedError, AttributeError) as e:\n await logger.aerror(f\"Error with structured chat agent: {e}\")\n # Fallback to regular agent\n content_str = \"No content returned from agent\"\n return Data(data={\"content\": content_str, \"error\": str(e)})\n\n # Process with structured output validation\n try:\n structured_output = await self.build_structured_output_base(content)\n\n # Handle different output formats\n if isinstance(structured_output, list) and structured_output:\n if len(structured_output) == 1:\n return Data(data=structured_output[0])\n return Data(data={\"results\": structured_output})\n if isinstance(structured_output, dict):\n return Data(data=structured_output)\n return Data(data={\"content\": content})\n\n except (ValueError, TypeError) as e:\n await logger.aerror(f\"Error in structured output processing: {e}\")\n return Data(data={\"content\": content, \"error\": str(e)})\n\n async def get_memory_data(self):\n # TODO: This is a temporary fix to avoid message duplication. We should develop a function for this.\n messages = (\n await MemoryComponent(**self.get_base_args())\n .set(session_id=self.graph.session_id, order=\"Ascending\", n_messages=self.n_messages)\n .retrieve_messages()\n )\n return [\n message for message in messages if getattr(message, \"id\", None) != getattr(self.input_value, \"id\", None)\n ]\n\n async def get_llm(self):\n if not isinstance(self.agent_llm, str):\n return self.agent_llm, None\n\n try:\n provider_info = MODEL_PROVIDERS_DICT.get(self.agent_llm)\n if not provider_info:\n msg = f\"Invalid model provider: {self.agent_llm}\"\n raise ValueError(msg)\n\n component_class = provider_info.get(\"component_class\")\n display_name = component_class.display_name\n inputs = provider_info.get(\"inputs\")\n prefix = provider_info.get(\"prefix\", \"\")\n\n return self._build_llm_model(component_class, inputs, prefix), display_name\n\n except (AttributeError, ValueError, TypeError, RuntimeError) as e:\n await logger.aerror(f\"Error building {self.agent_llm} language model: {e!s}\")\n msg = f\"Failed to initialize language model: {e!s}\"\n raise ValueError(msg) from e\n\n def _build_llm_model(self, component, inputs, prefix=\"\"):\n model_kwargs = {}\n for input_ in inputs:\n if hasattr(self, f\"{prefix}{input_.name}\"):\n model_kwargs[input_.name] = getattr(self, f\"{prefix}{input_.name}\")\n return component.set(**model_kwargs).build_model()\n\n def set_component_params(self, component):\n provider_info = MODEL_PROVIDERS_DICT.get(self.agent_llm)\n if provider_info:\n inputs = provider_info.get(\"inputs\")\n prefix = provider_info.get(\"prefix\")\n # Filter out json_mode and only use attributes that exist on this component\n model_kwargs = {}\n for input_ in inputs:\n if hasattr(self, f\"{prefix}{input_.name}\"):\n model_kwargs[input_.name] = getattr(self, f\"{prefix}{input_.name}\")\n\n return component.set(**model_kwargs)\n return component\n\n def delete_fields(self, build_config: dotdict, fields: dict | list[str]) -> None:\n \"\"\"Delete specified fields from build_config.\"\"\"\n for field in fields:\n build_config.pop(field, None)\n\n def update_input_types(self, build_config: dotdict) -> dotdict:\n \"\"\"Update input types for all fields in build_config.\"\"\"\n for key, value in build_config.items():\n if isinstance(value, dict):\n if value.get(\"input_types\") is None:\n build_config[key][\"input_types\"] = []\n elif hasattr(value, \"input_types\") and value.input_types is None:\n value.input_types = []\n return build_config\n\n async def update_build_config(\n self, build_config: dotdict, field_value: str, field_name: str | None = None\n ) -> dotdict:\n # Iterate over all providers in the MODEL_PROVIDERS_DICT\n # Existing logic for updating build_config\n if field_name in (\"agent_llm\",):\n build_config[\"agent_llm\"][\"value\"] = field_value\n provider_info = MODEL_PROVIDERS_DICT.get(field_value)\n if provider_info:\n component_class = provider_info.get(\"component_class\")\n if component_class and hasattr(component_class, \"update_build_config\"):\n # Call the component class's update_build_config method\n build_config = await update_component_build_config(\n component_class, build_config, field_value, \"model_name\"\n )\n\n provider_configs: dict[str, tuple[dict, list[dict]]] = {\n provider: (\n MODEL_PROVIDERS_DICT[provider][\"fields\"],\n [\n MODEL_PROVIDERS_DICT[other_provider][\"fields\"]\n for other_provider in MODEL_PROVIDERS_DICT\n if other_provider != provider\n ],\n )\n for provider in MODEL_PROVIDERS_DICT\n }\n if field_value in provider_configs:\n fields_to_add, fields_to_delete = provider_configs[field_value]\n\n # Delete fields from other providers\n for fields in fields_to_delete:\n self.delete_fields(build_config, fields)\n\n # Add provider-specific fields\n if field_value == \"OpenAI\" and not any(field in build_config for field in fields_to_add):\n build_config.update(fields_to_add)\n else:\n build_config.update(fields_to_add)\n # Reset input types for agent_llm\n build_config[\"agent_llm\"][\"input_types\"] = []\n elif field_value == \"Custom\":\n # Delete all provider fields\n self.delete_fields(build_config, ALL_PROVIDER_FIELDS)\n # Update with custom component\n custom_component = DropdownInput(\n name=\"agent_llm\",\n display_name=\"Language Model\",\n options=[*sorted(MODEL_PROVIDERS), \"Custom\"],\n value=\"Custom\",\n real_time_refresh=True,\n input_types=[\"LanguageModel\"],\n options_metadata=[MODELS_METADATA[key] for key in sorted(MODELS_METADATA.keys())]\n + [{\"icon\": \"brain\"}],\n )\n build_config.update({\"agent_llm\": custom_component.to_dict()})\n # Update input types for all fields\n build_config = self.update_input_types(build_config)\n\n # Validate required keys\n default_keys = [\n \"code\",\n \"_type\",\n \"agent_llm\",\n \"tools\",\n \"input_value\",\n \"add_current_date_tool\",\n \"system_prompt\",\n \"agent_description\",\n \"max_iterations\",\n \"handle_parsing_errors\",\n \"verbose\",\n ]\n missing_keys = [key for key in default_keys if key not in build_config]\n if missing_keys:\n msg = f\"Missing required keys in build_config: {missing_keys}\"\n raise ValueError(msg)\n if (\n isinstance(self.agent_llm, str)\n and self.agent_llm in MODEL_PROVIDERS_DICT\n and field_name in MODEL_DYNAMIC_UPDATE_FIELDS\n ):\n provider_info = MODEL_PROVIDERS_DICT.get(self.agent_llm)\n if provider_info:\n component_class = provider_info.get(\"component_class\")\n component_class = self.set_component_params(component_class)\n prefix = provider_info.get(\"prefix\")\n if component_class and hasattr(component_class, \"update_build_config\"):\n # Call each component class's update_build_config method\n # remove the prefix from the field_name\n if isinstance(field_name, str) and isinstance(prefix, str):\n field_name = field_name.replace(prefix, \"\")\n build_config = await update_component_build_config(\n component_class, build_config, field_value, \"model_name\"\n )\n return dotdict({k: v.to_dict() if hasattr(v, \"to_dict\") else v for k, v in build_config.items()})\n\n async def _get_tools(self) -> list[Tool]:\n component_toolkit = _get_component_toolkit()\n tools_names = self._build_tools_names()\n agent_description = self.get_tool_description()\n # TODO: Agent Description Depreciated Feature to be removed\n description = f\"{agent_description}{tools_names}\"\n tools = component_toolkit(component=self).get_tools(\n tool_name=\"Call_Agent\", tool_description=description, callbacks=self.get_langchain_callbacks()\n )\n if hasattr(self, \"tools_metadata\"):\n tools = component_toolkit(component=self, metadata=self.tools_metadata).update_tools_metadata(tools=tools)\n return tools\n"
+ "value": "import json\nimport re\n\nfrom langchain_core.tools import StructuredTool\nfrom pydantic import ValidationError\n\nfrom langflow.base.agents.agent import LCToolsAgentComponent\nfrom langflow.base.agents.events import ExceptionWithMessageError\nfrom langflow.base.models.model_input_constants import (\n ALL_PROVIDER_FIELDS,\n MODEL_DYNAMIC_UPDATE_FIELDS,\n MODEL_PROVIDERS,\n MODEL_PROVIDERS_DICT,\n MODELS_METADATA,\n)\nfrom langflow.base.models.model_utils import get_model_name\nfrom langflow.components.helpers.current_date import CurrentDateComponent\nfrom langflow.components.helpers.memory import MemoryComponent\nfrom langflow.components.langchain_utilities.tool_calling import ToolCallingAgentComponent\nfrom langflow.custom.custom_component.component import _get_component_toolkit\nfrom langflow.custom.utils import update_component_build_config\nfrom langflow.field_typing import Tool\nfrom langflow.helpers.base_model import build_model_from_schema\nfrom langflow.io import BoolInput, DropdownInput, IntInput, MultilineInput, Output, TableInput\nfrom langflow.logging import logger\nfrom langflow.schema.data import Data\nfrom langflow.schema.dotdict import dotdict\nfrom langflow.schema.message import Message\nfrom langflow.schema.table import EditMode\n\n\ndef set_advanced_true(component_input):\n component_input.advanced = True\n return component_input\n\n\nMODEL_PROVIDERS_LIST = [\"Anthropic\", \"Google Generative AI\", \"Groq\", \"OpenAI\"]\n\n\nclass AgentComponent(ToolCallingAgentComponent):\n display_name: str = \"Agent\"\n description: str = \"Define the agent's instructions, then enter a task to complete using tools.\"\n documentation: str = \"https://docs.langflow.org/agents\"\n icon = \"bot\"\n beta = False\n name = \"Agent\"\n\n memory_inputs = [set_advanced_true(component_input) for component_input in MemoryComponent().inputs]\n\n # Filter out json_mode from OpenAI inputs since we handle structured output differently\n openai_inputs_filtered = [\n input_field\n for input_field in MODEL_PROVIDERS_DICT[\"OpenAI\"][\"inputs\"]\n if not (hasattr(input_field, \"name\") and input_field.name == \"json_mode\")\n ]\n\n inputs = [\n DropdownInput(\n name=\"agent_llm\",\n display_name=\"Model Provider\",\n info=\"The provider of the language model that the agent will use to generate responses.\",\n options=[*MODEL_PROVIDERS_LIST, \"Custom\"],\n value=\"OpenAI\",\n real_time_refresh=True,\n input_types=[],\n options_metadata=[MODELS_METADATA[key] for key in MODEL_PROVIDERS_LIST] + [{\"icon\": \"brain\"}],\n ),\n *openai_inputs_filtered,\n MultilineInput(\n name=\"system_prompt\",\n display_name=\"Agent Instructions\",\n info=\"System Prompt: Initial instructions and context provided to guide the agent's behavior.\",\n value=\"You are a helpful assistant that can use tools to answer questions and perform tasks.\",\n advanced=False,\n ),\n IntInput(\n name=\"n_messages\",\n display_name=\"Number of Chat History Messages\",\n value=100,\n info=\"Number of chat history messages to retrieve.\",\n advanced=True,\n show=True,\n ),\n MultilineInput(\n name=\"format_instructions\",\n display_name=\"Output Format Instructions\",\n info=\"Generic Template for structured output formatting. Valid only with Structured response.\",\n value=(\n \"You are an AI that extracts structured JSON objects from unstructured text. \"\n \"Use a predefined schema with expected types (str, int, float, bool, dict). \"\n \"Extract ALL relevant instances that match the schema - if multiple patterns exist, capture them all. \"\n \"Fill missing or ambiguous values with defaults: null for missing values. \"\n \"Remove exact duplicates but keep variations that have different field values. \"\n \"Always return valid JSON in the expected format, never throw errors. \"\n \"If multiple objects can be extracted, return them all in the structured format.\"\n ),\n advanced=True,\n ),\n TableInput(\n name=\"output_schema\",\n display_name=\"Output Schema\",\n info=(\n \"Schema Validation: Define the structure and data types for structured output. \"\n \"No validation if no output schema.\"\n ),\n advanced=True,\n required=False,\n value=[],\n table_schema=[\n {\n \"name\": \"name\",\n \"display_name\": \"Name\",\n \"type\": \"str\",\n \"description\": \"Specify the name of the output field.\",\n \"default\": \"field\",\n \"edit_mode\": EditMode.INLINE,\n },\n {\n \"name\": \"description\",\n \"display_name\": \"Description\",\n \"type\": \"str\",\n \"description\": \"Describe the purpose of the output field.\",\n \"default\": \"description of field\",\n \"edit_mode\": EditMode.POPOVER,\n },\n {\n \"name\": \"type\",\n \"display_name\": \"Type\",\n \"type\": \"str\",\n \"edit_mode\": EditMode.INLINE,\n \"description\": (\"Indicate the data type of the output field (e.g., str, int, float, bool, dict).\"),\n \"options\": [\"str\", \"int\", \"float\", \"bool\", \"dict\"],\n \"default\": \"str\",\n },\n {\n \"name\": \"multiple\",\n \"display_name\": \"As List\",\n \"type\": \"boolean\",\n \"description\": \"Set to True if this output field should be a list of the specified type.\",\n \"default\": \"False\",\n \"edit_mode\": EditMode.INLINE,\n },\n ],\n ),\n *LCToolsAgentComponent._base_inputs,\n # removed memory inputs from agent component\n # *memory_inputs,\n BoolInput(\n name=\"add_current_date_tool\",\n display_name=\"Current Date\",\n advanced=True,\n info=\"If true, will add a tool to the agent that returns the current date.\",\n value=True,\n ),\n ]\n outputs = [\n Output(name=\"response\", display_name=\"Response\", method=\"message_response\"),\n Output(name=\"structured_response\", display_name=\"Structured Response\", method=\"json_response\", tool_mode=False),\n ]\n\n async def get_agent_requirements(self):\n \"\"\"Get the agent requirements for the agent.\"\"\"\n llm_model, display_name = await self.get_llm()\n if llm_model is None:\n msg = \"No language model selected. Please choose a model to proceed.\"\n raise ValueError(msg)\n self.model_name = get_model_name(llm_model, display_name=display_name)\n\n # Get memory data\n self.chat_history = await self.get_memory_data()\n if isinstance(self.chat_history, Message):\n self.chat_history = [self.chat_history]\n\n # Add current date tool if enabled\n if self.add_current_date_tool:\n if not isinstance(self.tools, list): # type: ignore[has-type]\n self.tools = []\n current_date_tool = (await CurrentDateComponent(**self.get_base_args()).to_toolkit()).pop(0)\n if not isinstance(current_date_tool, StructuredTool):\n msg = \"CurrentDateComponent must be converted to a StructuredTool\"\n raise TypeError(msg)\n self.tools.append(current_date_tool)\n return llm_model, self.chat_history, self.tools\n\n async def message_response(self) -> Message:\n try:\n llm_model, self.chat_history, self.tools = await self.get_agent_requirements()\n # Set up and run agent\n self.set(\n llm=llm_model,\n tools=self.tools or [],\n chat_history=self.chat_history,\n input_value=self.input_value,\n system_prompt=self.system_prompt,\n )\n agent = self.create_agent_runnable()\n result = await self.run_agent(agent)\n\n # Store result for potential JSON output\n self._agent_result = result\n\n except (ValueError, TypeError, KeyError) as e:\n await logger.aerror(f\"{type(e).__name__}: {e!s}\")\n raise\n except ExceptionWithMessageError as e:\n await logger.aerror(f\"ExceptionWithMessageError occurred: {e}\")\n raise\n # Avoid catching blind Exception; let truly unexpected exceptions propagate\n except Exception as e:\n await logger.aerror(f\"Unexpected error: {e!s}\")\n raise\n else:\n return result\n\n def _preprocess_schema(self, schema):\n \"\"\"Preprocess schema to ensure correct data types for build_model_from_schema.\"\"\"\n processed_schema = []\n for field in schema:\n processed_field = {\n \"name\": str(field.get(\"name\", \"field\")),\n \"type\": str(field.get(\"type\", \"str\")),\n \"description\": str(field.get(\"description\", \"\")),\n \"multiple\": field.get(\"multiple\", False),\n }\n # Ensure multiple is handled correctly\n if isinstance(processed_field[\"multiple\"], str):\n processed_field[\"multiple\"] = processed_field[\"multiple\"].lower() in [\"true\", \"1\", \"t\", \"y\", \"yes\"]\n processed_schema.append(processed_field)\n return processed_schema\n\n async def build_structured_output_base(self, content: str):\n \"\"\"Build structured output with optional BaseModel validation.\"\"\"\n json_pattern = r\"\\{.*\\}\"\n schema_error_msg = \"Try setting an output schema\"\n\n # Try to parse content as JSON first\n json_data = None\n try:\n json_data = json.loads(content)\n except json.JSONDecodeError:\n json_match = re.search(json_pattern, content, re.DOTALL)\n if json_match:\n try:\n json_data = json.loads(json_match.group())\n except json.JSONDecodeError:\n return {\"content\": content, \"error\": schema_error_msg}\n else:\n return {\"content\": content, \"error\": schema_error_msg}\n\n # If no output schema provided, return parsed JSON without validation\n if not hasattr(self, \"output_schema\") or not self.output_schema or len(self.output_schema) == 0:\n return json_data\n\n # Use BaseModel validation with schema\n try:\n processed_schema = self._preprocess_schema(self.output_schema)\n output_model = build_model_from_schema(processed_schema)\n\n # Validate against the schema\n if isinstance(json_data, list):\n # Multiple objects\n validated_objects = []\n for item in json_data:\n try:\n validated_obj = output_model.model_validate(item)\n validated_objects.append(validated_obj.model_dump())\n except ValidationError as e:\n await logger.aerror(f\"Validation error for item: {e}\")\n # Include invalid items with error info\n validated_objects.append({\"data\": item, \"validation_error\": str(e)})\n return validated_objects\n\n # Single object\n try:\n validated_obj = output_model.model_validate(json_data)\n return [validated_obj.model_dump()] # Return as list for consistency\n except ValidationError as e:\n await logger.aerror(f\"Validation error: {e}\")\n return [{\"data\": json_data, \"validation_error\": str(e)}]\n\n except (TypeError, ValueError) as e:\n await logger.aerror(f\"Error building structured output: {e}\")\n # Fallback to parsed JSON without validation\n return json_data\n\n async def json_response(self) -> Data:\n \"\"\"Convert agent response to structured JSON Data output with schema validation.\"\"\"\n # Always use structured chat agent for JSON response mode for better JSON formatting\n try:\n system_components = []\n\n # 1. Agent Instructions (system_prompt)\n agent_instructions = getattr(self, \"system_prompt\", \"\") or \"\"\n if agent_instructions:\n system_components.append(f\"{agent_instructions}\")\n\n # 2. Format Instructions\n format_instructions = getattr(self, \"format_instructions\", \"\") or \"\"\n if format_instructions:\n system_components.append(f\"Format instructions: {format_instructions}\")\n\n # 3. Schema Information from BaseModel\n if hasattr(self, \"output_schema\") and self.output_schema and len(self.output_schema) > 0:\n try:\n processed_schema = self._preprocess_schema(self.output_schema)\n output_model = build_model_from_schema(processed_schema)\n schema_dict = output_model.model_json_schema()\n schema_info = (\n \"You are given some text that may include format instructions, \"\n \"explanations, or other content alongside a JSON schema.\\n\\n\"\n \"Your task:\\n\"\n \"- Extract only the JSON schema.\\n\"\n \"- Return it as valid JSON.\\n\"\n \"- Do not include format instructions, explanations, or extra text.\\n\\n\"\n \"Input:\\n\"\n f\"{json.dumps(schema_dict, indent=2)}\\n\\n\"\n \"Output (only JSON schema):\"\n )\n system_components.append(schema_info)\n except (ValidationError, ValueError, TypeError, KeyError) as e:\n await logger.aerror(f\"Could not build schema for prompt: {e}\", exc_info=True)\n\n # Combine all components\n combined_instructions = \"\\n\\n\".join(system_components) if system_components else \"\"\n llm_model, self.chat_history, self.tools = await self.get_agent_requirements()\n self.set(\n llm=llm_model,\n tools=self.tools or [],\n chat_history=self.chat_history,\n input_value=self.input_value,\n system_prompt=combined_instructions,\n )\n\n # Create and run structured chat agent\n try:\n structured_agent = self.create_agent_runnable()\n except (NotImplementedError, ValueError, TypeError) as e:\n await logger.aerror(f\"Error with structured chat agent: {e}\")\n raise\n try:\n result = await self.run_agent(structured_agent)\n except (ExceptionWithMessageError, ValueError, TypeError, RuntimeError) as e:\n await logger.aerror(f\"Error with structured agent result: {e}\")\n raise\n # Extract content from structured agent result\n if hasattr(result, \"content\"):\n content = result.content\n elif hasattr(result, \"text\"):\n content = result.text\n else:\n content = str(result)\n\n except (ExceptionWithMessageError, ValueError, TypeError, NotImplementedError, AttributeError) as e:\n await logger.aerror(f\"Error with structured chat agent: {e}\")\n # Fallback to regular agent\n content_str = \"No content returned from agent\"\n return Data(data={\"content\": content_str, \"error\": str(e)})\n\n # Process with structured output validation\n try:\n structured_output = await self.build_structured_output_base(content)\n\n # Handle different output formats\n if isinstance(structured_output, list) and structured_output:\n if len(structured_output) == 1:\n return Data(data=structured_output[0])\n return Data(data={\"results\": structured_output})\n if isinstance(structured_output, dict):\n return Data(data=structured_output)\n return Data(data={\"content\": content})\n\n except (ValueError, TypeError) as e:\n await logger.aerror(f\"Error in structured output processing: {e}\")\n return Data(data={\"content\": content, \"error\": str(e)})\n\n async def get_memory_data(self):\n # TODO: This is a temporary fix to avoid message duplication. We should develop a function for this.\n messages = (\n await MemoryComponent(**self.get_base_args())\n .set(session_id=self.graph.session_id, order=\"Ascending\", n_messages=self.n_messages)\n .retrieve_messages()\n )\n return [\n message for message in messages if getattr(message, \"id\", None) != getattr(self.input_value, \"id\", None)\n ]\n\n async def get_llm(self):\n if not isinstance(self.agent_llm, str):\n return self.agent_llm, None\n\n try:\n provider_info = MODEL_PROVIDERS_DICT.get(self.agent_llm)\n if not provider_info:\n msg = f\"Invalid model provider: {self.agent_llm}\"\n raise ValueError(msg)\n\n component_class = provider_info.get(\"component_class\")\n display_name = component_class.display_name\n inputs = provider_info.get(\"inputs\")\n prefix = provider_info.get(\"prefix\", \"\")\n\n return self._build_llm_model(component_class, inputs, prefix), display_name\n\n except (AttributeError, ValueError, TypeError, RuntimeError) as e:\n await logger.aerror(f\"Error building {self.agent_llm} language model: {e!s}\")\n msg = f\"Failed to initialize language model: {e!s}\"\n raise ValueError(msg) from e\n\n def _build_llm_model(self, component, inputs, prefix=\"\"):\n model_kwargs = {}\n for input_ in inputs:\n if hasattr(self, f\"{prefix}{input_.name}\"):\n model_kwargs[input_.name] = getattr(self, f\"{prefix}{input_.name}\")\n return component.set(**model_kwargs).build_model()\n\n def set_component_params(self, component):\n provider_info = MODEL_PROVIDERS_DICT.get(self.agent_llm)\n if provider_info:\n inputs = provider_info.get(\"inputs\")\n prefix = provider_info.get(\"prefix\")\n # Filter out json_mode and only use attributes that exist on this component\n model_kwargs = {}\n for input_ in inputs:\n if hasattr(self, f\"{prefix}{input_.name}\"):\n model_kwargs[input_.name] = getattr(self, f\"{prefix}{input_.name}\")\n\n return component.set(**model_kwargs)\n return component\n\n def delete_fields(self, build_config: dotdict, fields: dict | list[str]) -> None:\n \"\"\"Delete specified fields from build_config.\"\"\"\n for field in fields:\n build_config.pop(field, None)\n\n def update_input_types(self, build_config: dotdict) -> dotdict:\n \"\"\"Update input types for all fields in build_config.\"\"\"\n for key, value in build_config.items():\n if isinstance(value, dict):\n if value.get(\"input_types\") is None:\n build_config[key][\"input_types\"] = []\n elif hasattr(value, \"input_types\") and value.input_types is None:\n value.input_types = []\n return build_config\n\n async def update_build_config(\n self, build_config: dotdict, field_value: str, field_name: str | None = None\n ) -> dotdict:\n # Iterate over all providers in the MODEL_PROVIDERS_DICT\n # Existing logic for updating build_config\n if field_name in (\"agent_llm\",):\n build_config[\"agent_llm\"][\"value\"] = field_value\n provider_info = MODEL_PROVIDERS_DICT.get(field_value)\n if provider_info:\n component_class = provider_info.get(\"component_class\")\n if component_class and hasattr(component_class, \"update_build_config\"):\n # Call the component class's update_build_config method\n build_config = await update_component_build_config(\n component_class, build_config, field_value, \"model_name\"\n )\n\n provider_configs: dict[str, tuple[dict, list[dict]]] = {\n provider: (\n MODEL_PROVIDERS_DICT[provider][\"fields\"],\n [\n MODEL_PROVIDERS_DICT[other_provider][\"fields\"]\n for other_provider in MODEL_PROVIDERS_DICT\n if other_provider != provider\n ],\n )\n for provider in MODEL_PROVIDERS_DICT\n }\n if field_value in provider_configs:\n fields_to_add, fields_to_delete = provider_configs[field_value]\n\n # Delete fields from other providers\n for fields in fields_to_delete:\n self.delete_fields(build_config, fields)\n\n # Add provider-specific fields\n if field_value == \"OpenAI\" and not any(field in build_config for field in fields_to_add):\n build_config.update(fields_to_add)\n else:\n build_config.update(fields_to_add)\n # Reset input types for agent_llm\n build_config[\"agent_llm\"][\"input_types\"] = []\n build_config[\"agent_llm\"][\"display_name\"] = \"Model Provider\"\n elif field_value == \"Custom\":\n # Delete all provider fields\n self.delete_fields(build_config, ALL_PROVIDER_FIELDS)\n # Update with custom component\n custom_component = DropdownInput(\n name=\"agent_llm\",\n display_name=\"Language Model\",\n options=[*sorted(MODEL_PROVIDERS), \"Custom\"],\n value=\"Custom\",\n real_time_refresh=True,\n input_types=[\"LanguageModel\"],\n options_metadata=[MODELS_METADATA[key] for key in sorted(MODELS_METADATA.keys())]\n + [{\"icon\": \"brain\"}],\n )\n build_config.update({\"agent_llm\": custom_component.to_dict()})\n # Update input types for all fields\n build_config = self.update_input_types(build_config)\n\n # Validate required keys\n default_keys = [\n \"code\",\n \"_type\",\n \"agent_llm\",\n \"tools\",\n \"input_value\",\n \"add_current_date_tool\",\n \"system_prompt\",\n \"agent_description\",\n \"max_iterations\",\n \"handle_parsing_errors\",\n \"verbose\",\n ]\n missing_keys = [key for key in default_keys if key not in build_config]\n if missing_keys:\n msg = f\"Missing required keys in build_config: {missing_keys}\"\n raise ValueError(msg)\n if (\n isinstance(self.agent_llm, str)\n and self.agent_llm in MODEL_PROVIDERS_DICT\n and field_name in MODEL_DYNAMIC_UPDATE_FIELDS\n ):\n provider_info = MODEL_PROVIDERS_DICT.get(self.agent_llm)\n if provider_info:\n component_class = provider_info.get(\"component_class\")\n component_class = self.set_component_params(component_class)\n prefix = provider_info.get(\"prefix\")\n if component_class and hasattr(component_class, \"update_build_config\"):\n # Call each component class's update_build_config method\n # remove the prefix from the field_name\n if isinstance(field_name, str) and isinstance(prefix, str):\n field_name = field_name.replace(prefix, \"\")\n build_config = await update_component_build_config(\n component_class, build_config, field_value, \"model_name\"\n )\n return dotdict({k: v.to_dict() if hasattr(v, \"to_dict\") else v for k, v in build_config.items()})\n\n async def _get_tools(self) -> list[Tool]:\n component_toolkit = _get_component_toolkit()\n tools_names = self._build_tools_names()\n agent_description = self.get_tool_description()\n # TODO: Agent Description Depreciated Feature to be removed\n description = f\"{agent_description}{tools_names}\"\n tools = component_toolkit(component=self).get_tools(\n tool_name=\"Call_Agent\", tool_description=description, callbacks=self.get_langchain_callbacks()\n )\n if hasattr(self, \"tools_metadata\"):\n tools = component_toolkit(component=self, metadata=self.tools_metadata).update_tools_metadata(tools=tools)\n return tools\n"
},
"handle_parsing_errors": {
"_input_type": "BoolInput",
@@ -2980,7 +2980,7 @@
"show": true,
"title_case": false,
"type": "code",
- "value": "import json\nimport re\n\nfrom langchain_core.tools import StructuredTool\nfrom pydantic import ValidationError\n\nfrom langflow.base.agents.agent import LCToolsAgentComponent\nfrom langflow.base.agents.events import ExceptionWithMessageError\nfrom langflow.base.models.model_input_constants import (\n ALL_PROVIDER_FIELDS,\n MODEL_DYNAMIC_UPDATE_FIELDS,\n MODEL_PROVIDERS,\n MODEL_PROVIDERS_DICT,\n MODELS_METADATA,\n)\nfrom langflow.base.models.model_utils import get_model_name\nfrom langflow.components.helpers.current_date import CurrentDateComponent\nfrom langflow.components.helpers.memory import MemoryComponent\nfrom langflow.components.langchain_utilities.tool_calling import ToolCallingAgentComponent\nfrom langflow.custom.custom_component.component import _get_component_toolkit\nfrom langflow.custom.utils import update_component_build_config\nfrom langflow.field_typing import Tool\nfrom langflow.helpers.base_model import build_model_from_schema\nfrom langflow.io import BoolInput, DropdownInput, IntInput, MultilineInput, Output, TableInput\nfrom langflow.logging import logger\nfrom langflow.schema.data import Data\nfrom langflow.schema.dotdict import dotdict\nfrom langflow.schema.message import Message\nfrom langflow.schema.table import EditMode\n\n\ndef set_advanced_true(component_input):\n component_input.advanced = True\n return component_input\n\n\nMODEL_PROVIDERS_LIST = [\"Anthropic\", \"Google Generative AI\", \"Groq\", \"OpenAI\"]\n\n\nclass AgentComponent(ToolCallingAgentComponent):\n display_name: str = \"Agent\"\n description: str = \"Define the agent's instructions, then enter a task to complete using tools.\"\n documentation: str = \"https://docs.langflow.org/agents\"\n icon = \"bot\"\n beta = False\n name = \"Agent\"\n\n memory_inputs = [set_advanced_true(component_input) for component_input in MemoryComponent().inputs]\n\n # Filter out json_mode from OpenAI inputs since we handle structured output differently\n openai_inputs_filtered = [\n input_field\n for input_field in MODEL_PROVIDERS_DICT[\"OpenAI\"][\"inputs\"]\n if not (hasattr(input_field, \"name\") and input_field.name == \"json_mode\")\n ]\n\n inputs = [\n DropdownInput(\n name=\"agent_llm\",\n display_name=\"Model Provider\",\n info=\"The provider of the language model that the agent will use to generate responses.\",\n options=[*MODEL_PROVIDERS_LIST, \"Custom\"],\n value=\"OpenAI\",\n real_time_refresh=True,\n input_types=[],\n options_metadata=[MODELS_METADATA[key] for key in MODEL_PROVIDERS_LIST] + [{\"icon\": \"brain\"}],\n ),\n *openai_inputs_filtered,\n MultilineInput(\n name=\"system_prompt\",\n display_name=\"Agent Instructions\",\n info=\"System Prompt: Initial instructions and context provided to guide the agent's behavior.\",\n value=\"You are a helpful assistant that can use tools to answer questions and perform tasks.\",\n advanced=False,\n ),\n IntInput(\n name=\"n_messages\",\n display_name=\"Number of Chat History Messages\",\n value=100,\n info=\"Number of chat history messages to retrieve.\",\n advanced=True,\n show=True,\n ),\n MultilineInput(\n name=\"format_instructions\",\n display_name=\"Output Format Instructions\",\n info=\"Generic Template for structured output formatting. Valid only with Structured response.\",\n value=(\n \"You are an AI that extracts structured JSON objects from unstructured text. \"\n \"Use a predefined schema with expected types (str, int, float, bool, dict). \"\n \"Extract ALL relevant instances that match the schema - if multiple patterns exist, capture them all. \"\n \"Fill missing or ambiguous values with defaults: null for missing values. \"\n \"Remove exact duplicates but keep variations that have different field values. \"\n \"Always return valid JSON in the expected format, never throw errors. \"\n \"If multiple objects can be extracted, return them all in the structured format.\"\n ),\n advanced=True,\n ),\n TableInput(\n name=\"output_schema\",\n display_name=\"Output Schema\",\n info=(\n \"Schema Validation: Define the structure and data types for structured output. \"\n \"No validation if no output schema.\"\n ),\n advanced=True,\n required=False,\n value=[],\n table_schema=[\n {\n \"name\": \"name\",\n \"display_name\": \"Name\",\n \"type\": \"str\",\n \"description\": \"Specify the name of the output field.\",\n \"default\": \"field\",\n \"edit_mode\": EditMode.INLINE,\n },\n {\n \"name\": \"description\",\n \"display_name\": \"Description\",\n \"type\": \"str\",\n \"description\": \"Describe the purpose of the output field.\",\n \"default\": \"description of field\",\n \"edit_mode\": EditMode.POPOVER,\n },\n {\n \"name\": \"type\",\n \"display_name\": \"Type\",\n \"type\": \"str\",\n \"edit_mode\": EditMode.INLINE,\n \"description\": (\"Indicate the data type of the output field (e.g., str, int, float, bool, dict).\"),\n \"options\": [\"str\", \"int\", \"float\", \"bool\", \"dict\"],\n \"default\": \"str\",\n },\n {\n \"name\": \"multiple\",\n \"display_name\": \"As List\",\n \"type\": \"boolean\",\n \"description\": \"Set to True if this output field should be a list of the specified type.\",\n \"default\": \"False\",\n \"edit_mode\": EditMode.INLINE,\n },\n ],\n ),\n *LCToolsAgentComponent._base_inputs,\n # removed memory inputs from agent component\n # *memory_inputs,\n BoolInput(\n name=\"add_current_date_tool\",\n display_name=\"Current Date\",\n advanced=True,\n info=\"If true, will add a tool to the agent that returns the current date.\",\n value=True,\n ),\n ]\n outputs = [\n Output(name=\"response\", display_name=\"Response\", method=\"message_response\"),\n Output(name=\"structured_response\", display_name=\"Structured Response\", method=\"json_response\", tool_mode=False),\n ]\n\n async def get_agent_requirements(self):\n \"\"\"Get the agent requirements for the agent.\"\"\"\n llm_model, display_name = await self.get_llm()\n if llm_model is None:\n msg = \"No language model selected. Please choose a model to proceed.\"\n raise ValueError(msg)\n self.model_name = get_model_name(llm_model, display_name=display_name)\n\n # Get memory data\n self.chat_history = await self.get_memory_data()\n if isinstance(self.chat_history, Message):\n self.chat_history = [self.chat_history]\n\n # Add current date tool if enabled\n if self.add_current_date_tool:\n if not isinstance(self.tools, list): # type: ignore[has-type]\n self.tools = []\n current_date_tool = (await CurrentDateComponent(**self.get_base_args()).to_toolkit()).pop(0)\n if not isinstance(current_date_tool, StructuredTool):\n msg = \"CurrentDateComponent must be converted to a StructuredTool\"\n raise TypeError(msg)\n self.tools.append(current_date_tool)\n return llm_model, self.chat_history, self.tools\n\n async def message_response(self) -> Message:\n try:\n llm_model, self.chat_history, self.tools = await self.get_agent_requirements()\n # Set up and run agent\n self.set(\n llm=llm_model,\n tools=self.tools or [],\n chat_history=self.chat_history,\n input_value=self.input_value,\n system_prompt=self.system_prompt,\n )\n agent = self.create_agent_runnable()\n result = await self.run_agent(agent)\n\n # Store result for potential JSON output\n self._agent_result = result\n\n except (ValueError, TypeError, KeyError) as e:\n await logger.aerror(f\"{type(e).__name__}: {e!s}\")\n raise\n except ExceptionWithMessageError as e:\n await logger.aerror(f\"ExceptionWithMessageError occurred: {e}\")\n raise\n # Avoid catching blind Exception; let truly unexpected exceptions propagate\n except Exception as e:\n await logger.aerror(f\"Unexpected error: {e!s}\")\n raise\n else:\n return result\n\n def _preprocess_schema(self, schema):\n \"\"\"Preprocess schema to ensure correct data types for build_model_from_schema.\"\"\"\n processed_schema = []\n for field in schema:\n processed_field = {\n \"name\": str(field.get(\"name\", \"field\")),\n \"type\": str(field.get(\"type\", \"str\")),\n \"description\": str(field.get(\"description\", \"\")),\n \"multiple\": field.get(\"multiple\", False),\n }\n # Ensure multiple is handled correctly\n if isinstance(processed_field[\"multiple\"], str):\n processed_field[\"multiple\"] = processed_field[\"multiple\"].lower() in [\"true\", \"1\", \"t\", \"y\", \"yes\"]\n processed_schema.append(processed_field)\n return processed_schema\n\n async def build_structured_output_base(self, content: str):\n \"\"\"Build structured output with optional BaseModel validation.\"\"\"\n json_pattern = r\"\\{.*\\}\"\n schema_error_msg = \"Try setting an output schema\"\n\n # Try to parse content as JSON first\n json_data = None\n try:\n json_data = json.loads(content)\n except json.JSONDecodeError:\n json_match = re.search(json_pattern, content, re.DOTALL)\n if json_match:\n try:\n json_data = json.loads(json_match.group())\n except json.JSONDecodeError:\n return {\"content\": content, \"error\": schema_error_msg}\n else:\n return {\"content\": content, \"error\": schema_error_msg}\n\n # If no output schema provided, return parsed JSON without validation\n if not hasattr(self, \"output_schema\") or not self.output_schema or len(self.output_schema) == 0:\n return json_data\n\n # Use BaseModel validation with schema\n try:\n processed_schema = self._preprocess_schema(self.output_schema)\n output_model = build_model_from_schema(processed_schema)\n\n # Validate against the schema\n if isinstance(json_data, list):\n # Multiple objects\n validated_objects = []\n for item in json_data:\n try:\n validated_obj = output_model.model_validate(item)\n validated_objects.append(validated_obj.model_dump())\n except ValidationError as e:\n await logger.aerror(f\"Validation error for item: {e}\")\n # Include invalid items with error info\n validated_objects.append({\"data\": item, \"validation_error\": str(e)})\n return validated_objects\n\n # Single object\n try:\n validated_obj = output_model.model_validate(json_data)\n return [validated_obj.model_dump()] # Return as list for consistency\n except ValidationError as e:\n await logger.aerror(f\"Validation error: {e}\")\n return [{\"data\": json_data, \"validation_error\": str(e)}]\n\n except (TypeError, ValueError) as e:\n await logger.aerror(f\"Error building structured output: {e}\")\n # Fallback to parsed JSON without validation\n return json_data\n\n async def json_response(self) -> Data:\n \"\"\"Convert agent response to structured JSON Data output with schema validation.\"\"\"\n # Always use structured chat agent for JSON response mode for better JSON formatting\n try:\n system_components = []\n\n # 1. Agent Instructions (system_prompt)\n agent_instructions = getattr(self, \"system_prompt\", \"\") or \"\"\n if agent_instructions:\n system_components.append(f\"{agent_instructions}\")\n\n # 2. Format Instructions\n format_instructions = getattr(self, \"format_instructions\", \"\") or \"\"\n if format_instructions:\n system_components.append(f\"Format instructions: {format_instructions}\")\n\n # 3. Schema Information from BaseModel\n if hasattr(self, \"output_schema\") and self.output_schema and len(self.output_schema) > 0:\n try:\n processed_schema = self._preprocess_schema(self.output_schema)\n output_model = build_model_from_schema(processed_schema)\n schema_dict = output_model.model_json_schema()\n schema_info = (\n \"You are given some text that may include format instructions, \"\n \"explanations, or other content alongside a JSON schema.\\n\\n\"\n \"Your task:\\n\"\n \"- Extract only the JSON schema.\\n\"\n \"- Return it as valid JSON.\\n\"\n \"- Do not include format instructions, explanations, or extra text.\\n\\n\"\n \"Input:\\n\"\n f\"{json.dumps(schema_dict, indent=2)}\\n\\n\"\n \"Output (only JSON schema):\"\n )\n system_components.append(schema_info)\n except (ValidationError, ValueError, TypeError, KeyError) as e:\n await logger.aerror(f\"Could not build schema for prompt: {e}\", exc_info=True)\n\n # Combine all components\n combined_instructions = \"\\n\\n\".join(system_components) if system_components else \"\"\n llm_model, self.chat_history, self.tools = await self.get_agent_requirements()\n self.set(\n llm=llm_model,\n tools=self.tools or [],\n chat_history=self.chat_history,\n input_value=self.input_value,\n system_prompt=combined_instructions,\n )\n\n # Create and run structured chat agent\n try:\n structured_agent = self.create_agent_runnable()\n except (NotImplementedError, ValueError, TypeError) as e:\n await logger.aerror(f\"Error with structured chat agent: {e}\")\n raise\n try:\n result = await self.run_agent(structured_agent)\n except (ExceptionWithMessageError, ValueError, TypeError, RuntimeError) as e:\n await logger.aerror(f\"Error with structured agent result: {e}\")\n raise\n # Extract content from structured agent result\n if hasattr(result, \"content\"):\n content = result.content\n elif hasattr(result, \"text\"):\n content = result.text\n else:\n content = str(result)\n\n except (ExceptionWithMessageError, ValueError, TypeError, NotImplementedError, AttributeError) as e:\n await logger.aerror(f\"Error with structured chat agent: {e}\")\n # Fallback to regular agent\n content_str = \"No content returned from agent\"\n return Data(data={\"content\": content_str, \"error\": str(e)})\n\n # Process with structured output validation\n try:\n structured_output = await self.build_structured_output_base(content)\n\n # Handle different output formats\n if isinstance(structured_output, list) and structured_output:\n if len(structured_output) == 1:\n return Data(data=structured_output[0])\n return Data(data={\"results\": structured_output})\n if isinstance(structured_output, dict):\n return Data(data=structured_output)\n return Data(data={\"content\": content})\n\n except (ValueError, TypeError) as e:\n await logger.aerror(f\"Error in structured output processing: {e}\")\n return Data(data={\"content\": content, \"error\": str(e)})\n\n async def get_memory_data(self):\n # TODO: This is a temporary fix to avoid message duplication. We should develop a function for this.\n messages = (\n await MemoryComponent(**self.get_base_args())\n .set(session_id=self.graph.session_id, order=\"Ascending\", n_messages=self.n_messages)\n .retrieve_messages()\n )\n return [\n message for message in messages if getattr(message, \"id\", None) != getattr(self.input_value, \"id\", None)\n ]\n\n async def get_llm(self):\n if not isinstance(self.agent_llm, str):\n return self.agent_llm, None\n\n try:\n provider_info = MODEL_PROVIDERS_DICT.get(self.agent_llm)\n if not provider_info:\n msg = f\"Invalid model provider: {self.agent_llm}\"\n raise ValueError(msg)\n\n component_class = provider_info.get(\"component_class\")\n display_name = component_class.display_name\n inputs = provider_info.get(\"inputs\")\n prefix = provider_info.get(\"prefix\", \"\")\n\n return self._build_llm_model(component_class, inputs, prefix), display_name\n\n except (AttributeError, ValueError, TypeError, RuntimeError) as e:\n await logger.aerror(f\"Error building {self.agent_llm} language model: {e!s}\")\n msg = f\"Failed to initialize language model: {e!s}\"\n raise ValueError(msg) from e\n\n def _build_llm_model(self, component, inputs, prefix=\"\"):\n model_kwargs = {}\n for input_ in inputs:\n if hasattr(self, f\"{prefix}{input_.name}\"):\n model_kwargs[input_.name] = getattr(self, f\"{prefix}{input_.name}\")\n return component.set(**model_kwargs).build_model()\n\n def set_component_params(self, component):\n provider_info = MODEL_PROVIDERS_DICT.get(self.agent_llm)\n if provider_info:\n inputs = provider_info.get(\"inputs\")\n prefix = provider_info.get(\"prefix\")\n # Filter out json_mode and only use attributes that exist on this component\n model_kwargs = {}\n for input_ in inputs:\n if hasattr(self, f\"{prefix}{input_.name}\"):\n model_kwargs[input_.name] = getattr(self, f\"{prefix}{input_.name}\")\n\n return component.set(**model_kwargs)\n return component\n\n def delete_fields(self, build_config: dotdict, fields: dict | list[str]) -> None:\n \"\"\"Delete specified fields from build_config.\"\"\"\n for field in fields:\n build_config.pop(field, None)\n\n def update_input_types(self, build_config: dotdict) -> dotdict:\n \"\"\"Update input types for all fields in build_config.\"\"\"\n for key, value in build_config.items():\n if isinstance(value, dict):\n if value.get(\"input_types\") is None:\n build_config[key][\"input_types\"] = []\n elif hasattr(value, \"input_types\") and value.input_types is None:\n value.input_types = []\n return build_config\n\n async def update_build_config(\n self, build_config: dotdict, field_value: str, field_name: str | None = None\n ) -> dotdict:\n # Iterate over all providers in the MODEL_PROVIDERS_DICT\n # Existing logic for updating build_config\n if field_name in (\"agent_llm\",):\n build_config[\"agent_llm\"][\"value\"] = field_value\n provider_info = MODEL_PROVIDERS_DICT.get(field_value)\n if provider_info:\n component_class = provider_info.get(\"component_class\")\n if component_class and hasattr(component_class, \"update_build_config\"):\n # Call the component class's update_build_config method\n build_config = await update_component_build_config(\n component_class, build_config, field_value, \"model_name\"\n )\n\n provider_configs: dict[str, tuple[dict, list[dict]]] = {\n provider: (\n MODEL_PROVIDERS_DICT[provider][\"fields\"],\n [\n MODEL_PROVIDERS_DICT[other_provider][\"fields\"]\n for other_provider in MODEL_PROVIDERS_DICT\n if other_provider != provider\n ],\n )\n for provider in MODEL_PROVIDERS_DICT\n }\n if field_value in provider_configs:\n fields_to_add, fields_to_delete = provider_configs[field_value]\n\n # Delete fields from other providers\n for fields in fields_to_delete:\n self.delete_fields(build_config, fields)\n\n # Add provider-specific fields\n if field_value == \"OpenAI\" and not any(field in build_config for field in fields_to_add):\n build_config.update(fields_to_add)\n else:\n build_config.update(fields_to_add)\n # Reset input types for agent_llm\n build_config[\"agent_llm\"][\"input_types\"] = []\n elif field_value == \"Custom\":\n # Delete all provider fields\n self.delete_fields(build_config, ALL_PROVIDER_FIELDS)\n # Update with custom component\n custom_component = DropdownInput(\n name=\"agent_llm\",\n display_name=\"Language Model\",\n options=[*sorted(MODEL_PROVIDERS), \"Custom\"],\n value=\"Custom\",\n real_time_refresh=True,\n input_types=[\"LanguageModel\"],\n options_metadata=[MODELS_METADATA[key] for key in sorted(MODELS_METADATA.keys())]\n + [{\"icon\": \"brain\"}],\n )\n build_config.update({\"agent_llm\": custom_component.to_dict()})\n # Update input types for all fields\n build_config = self.update_input_types(build_config)\n\n # Validate required keys\n default_keys = [\n \"code\",\n \"_type\",\n \"agent_llm\",\n \"tools\",\n \"input_value\",\n \"add_current_date_tool\",\n \"system_prompt\",\n \"agent_description\",\n \"max_iterations\",\n \"handle_parsing_errors\",\n \"verbose\",\n ]\n missing_keys = [key for key in default_keys if key not in build_config]\n if missing_keys:\n msg = f\"Missing required keys in build_config: {missing_keys}\"\n raise ValueError(msg)\n if (\n isinstance(self.agent_llm, str)\n and self.agent_llm in MODEL_PROVIDERS_DICT\n and field_name in MODEL_DYNAMIC_UPDATE_FIELDS\n ):\n provider_info = MODEL_PROVIDERS_DICT.get(self.agent_llm)\n if provider_info:\n component_class = provider_info.get(\"component_class\")\n component_class = self.set_component_params(component_class)\n prefix = provider_info.get(\"prefix\")\n if component_class and hasattr(component_class, \"update_build_config\"):\n # Call each component class's update_build_config method\n # remove the prefix from the field_name\n if isinstance(field_name, str) and isinstance(prefix, str):\n field_name = field_name.replace(prefix, \"\")\n build_config = await update_component_build_config(\n component_class, build_config, field_value, \"model_name\"\n )\n return dotdict({k: v.to_dict() if hasattr(v, \"to_dict\") else v for k, v in build_config.items()})\n\n async def _get_tools(self) -> list[Tool]:\n component_toolkit = _get_component_toolkit()\n tools_names = self._build_tools_names()\n agent_description = self.get_tool_description()\n # TODO: Agent Description Depreciated Feature to be removed\n description = f\"{agent_description}{tools_names}\"\n tools = component_toolkit(component=self).get_tools(\n tool_name=\"Call_Agent\", tool_description=description, callbacks=self.get_langchain_callbacks()\n )\n if hasattr(self, \"tools_metadata\"):\n tools = component_toolkit(component=self, metadata=self.tools_metadata).update_tools_metadata(tools=tools)\n return tools\n"
+ "value": "import json\nimport re\n\nfrom langchain_core.tools import StructuredTool\nfrom pydantic import ValidationError\n\nfrom langflow.base.agents.agent import LCToolsAgentComponent\nfrom langflow.base.agents.events import ExceptionWithMessageError\nfrom langflow.base.models.model_input_constants import (\n ALL_PROVIDER_FIELDS,\n MODEL_DYNAMIC_UPDATE_FIELDS,\n MODEL_PROVIDERS,\n MODEL_PROVIDERS_DICT,\n MODELS_METADATA,\n)\nfrom langflow.base.models.model_utils import get_model_name\nfrom langflow.components.helpers.current_date import CurrentDateComponent\nfrom langflow.components.helpers.memory import MemoryComponent\nfrom langflow.components.langchain_utilities.tool_calling import ToolCallingAgentComponent\nfrom langflow.custom.custom_component.component import _get_component_toolkit\nfrom langflow.custom.utils import update_component_build_config\nfrom langflow.field_typing import Tool\nfrom langflow.helpers.base_model import build_model_from_schema\nfrom langflow.io import BoolInput, DropdownInput, IntInput, MultilineInput, Output, TableInput\nfrom langflow.logging import logger\nfrom langflow.schema.data import Data\nfrom langflow.schema.dotdict import dotdict\nfrom langflow.schema.message import Message\nfrom langflow.schema.table import EditMode\n\n\ndef set_advanced_true(component_input):\n component_input.advanced = True\n return component_input\n\n\nMODEL_PROVIDERS_LIST = [\"Anthropic\", \"Google Generative AI\", \"Groq\", \"OpenAI\"]\n\n\nclass AgentComponent(ToolCallingAgentComponent):\n display_name: str = \"Agent\"\n description: str = \"Define the agent's instructions, then enter a task to complete using tools.\"\n documentation: str = \"https://docs.langflow.org/agents\"\n icon = \"bot\"\n beta = False\n name = \"Agent\"\n\n memory_inputs = [set_advanced_true(component_input) for component_input in MemoryComponent().inputs]\n\n # Filter out json_mode from OpenAI inputs since we handle structured output differently\n openai_inputs_filtered = [\n input_field\n for input_field in MODEL_PROVIDERS_DICT[\"OpenAI\"][\"inputs\"]\n if not (hasattr(input_field, \"name\") and input_field.name == \"json_mode\")\n ]\n\n inputs = [\n DropdownInput(\n name=\"agent_llm\",\n display_name=\"Model Provider\",\n info=\"The provider of the language model that the agent will use to generate responses.\",\n options=[*MODEL_PROVIDERS_LIST, \"Custom\"],\n value=\"OpenAI\",\n real_time_refresh=True,\n input_types=[],\n options_metadata=[MODELS_METADATA[key] for key in MODEL_PROVIDERS_LIST] + [{\"icon\": \"brain\"}],\n ),\n *openai_inputs_filtered,\n MultilineInput(\n name=\"system_prompt\",\n display_name=\"Agent Instructions\",\n info=\"System Prompt: Initial instructions and context provided to guide the agent's behavior.\",\n value=\"You are a helpful assistant that can use tools to answer questions and perform tasks.\",\n advanced=False,\n ),\n IntInput(\n name=\"n_messages\",\n display_name=\"Number of Chat History Messages\",\n value=100,\n info=\"Number of chat history messages to retrieve.\",\n advanced=True,\n show=True,\n ),\n MultilineInput(\n name=\"format_instructions\",\n display_name=\"Output Format Instructions\",\n info=\"Generic Template for structured output formatting. Valid only with Structured response.\",\n value=(\n \"You are an AI that extracts structured JSON objects from unstructured text. \"\n \"Use a predefined schema with expected types (str, int, float, bool, dict). \"\n \"Extract ALL relevant instances that match the schema - if multiple patterns exist, capture them all. \"\n \"Fill missing or ambiguous values with defaults: null for missing values. \"\n \"Remove exact duplicates but keep variations that have different field values. \"\n \"Always return valid JSON in the expected format, never throw errors. \"\n \"If multiple objects can be extracted, return them all in the structured format.\"\n ),\n advanced=True,\n ),\n TableInput(\n name=\"output_schema\",\n display_name=\"Output Schema\",\n info=(\n \"Schema Validation: Define the structure and data types for structured output. \"\n \"No validation if no output schema.\"\n ),\n advanced=True,\n required=False,\n value=[],\n table_schema=[\n {\n \"name\": \"name\",\n \"display_name\": \"Name\",\n \"type\": \"str\",\n \"description\": \"Specify the name of the output field.\",\n \"default\": \"field\",\n \"edit_mode\": EditMode.INLINE,\n },\n {\n \"name\": \"description\",\n \"display_name\": \"Description\",\n \"type\": \"str\",\n \"description\": \"Describe the purpose of the output field.\",\n \"default\": \"description of field\",\n \"edit_mode\": EditMode.POPOVER,\n },\n {\n \"name\": \"type\",\n \"display_name\": \"Type\",\n \"type\": \"str\",\n \"edit_mode\": EditMode.INLINE,\n \"description\": (\"Indicate the data type of the output field (e.g., str, int, float, bool, dict).\"),\n \"options\": [\"str\", \"int\", \"float\", \"bool\", \"dict\"],\n \"default\": \"str\",\n },\n {\n \"name\": \"multiple\",\n \"display_name\": \"As List\",\n \"type\": \"boolean\",\n \"description\": \"Set to True if this output field should be a list of the specified type.\",\n \"default\": \"False\",\n \"edit_mode\": EditMode.INLINE,\n },\n ],\n ),\n *LCToolsAgentComponent._base_inputs,\n # removed memory inputs from agent component\n # *memory_inputs,\n BoolInput(\n name=\"add_current_date_tool\",\n display_name=\"Current Date\",\n advanced=True,\n info=\"If true, will add a tool to the agent that returns the current date.\",\n value=True,\n ),\n ]\n outputs = [\n Output(name=\"response\", display_name=\"Response\", method=\"message_response\"),\n Output(name=\"structured_response\", display_name=\"Structured Response\", method=\"json_response\", tool_mode=False),\n ]\n\n async def get_agent_requirements(self):\n \"\"\"Get the agent requirements for the agent.\"\"\"\n llm_model, display_name = await self.get_llm()\n if llm_model is None:\n msg = \"No language model selected. Please choose a model to proceed.\"\n raise ValueError(msg)\n self.model_name = get_model_name(llm_model, display_name=display_name)\n\n # Get memory data\n self.chat_history = await self.get_memory_data()\n if isinstance(self.chat_history, Message):\n self.chat_history = [self.chat_history]\n\n # Add current date tool if enabled\n if self.add_current_date_tool:\n if not isinstance(self.tools, list): # type: ignore[has-type]\n self.tools = []\n current_date_tool = (await CurrentDateComponent(**self.get_base_args()).to_toolkit()).pop(0)\n if not isinstance(current_date_tool, StructuredTool):\n msg = \"CurrentDateComponent must be converted to a StructuredTool\"\n raise TypeError(msg)\n self.tools.append(current_date_tool)\n return llm_model, self.chat_history, self.tools\n\n async def message_response(self) -> Message:\n try:\n llm_model, self.chat_history, self.tools = await self.get_agent_requirements()\n # Set up and run agent\n self.set(\n llm=llm_model,\n tools=self.tools or [],\n chat_history=self.chat_history,\n input_value=self.input_value,\n system_prompt=self.system_prompt,\n )\n agent = self.create_agent_runnable()\n result = await self.run_agent(agent)\n\n # Store result for potential JSON output\n self._agent_result = result\n\n except (ValueError, TypeError, KeyError) as e:\n await logger.aerror(f\"{type(e).__name__}: {e!s}\")\n raise\n except ExceptionWithMessageError as e:\n await logger.aerror(f\"ExceptionWithMessageError occurred: {e}\")\n raise\n # Avoid catching blind Exception; let truly unexpected exceptions propagate\n except Exception as e:\n await logger.aerror(f\"Unexpected error: {e!s}\")\n raise\n else:\n return result\n\n def _preprocess_schema(self, schema):\n \"\"\"Preprocess schema to ensure correct data types for build_model_from_schema.\"\"\"\n processed_schema = []\n for field in schema:\n processed_field = {\n \"name\": str(field.get(\"name\", \"field\")),\n \"type\": str(field.get(\"type\", \"str\")),\n \"description\": str(field.get(\"description\", \"\")),\n \"multiple\": field.get(\"multiple\", False),\n }\n # Ensure multiple is handled correctly\n if isinstance(processed_field[\"multiple\"], str):\n processed_field[\"multiple\"] = processed_field[\"multiple\"].lower() in [\"true\", \"1\", \"t\", \"y\", \"yes\"]\n processed_schema.append(processed_field)\n return processed_schema\n\n async def build_structured_output_base(self, content: str):\n \"\"\"Build structured output with optional BaseModel validation.\"\"\"\n json_pattern = r\"\\{.*\\}\"\n schema_error_msg = \"Try setting an output schema\"\n\n # Try to parse content as JSON first\n json_data = None\n try:\n json_data = json.loads(content)\n except json.JSONDecodeError:\n json_match = re.search(json_pattern, content, re.DOTALL)\n if json_match:\n try:\n json_data = json.loads(json_match.group())\n except json.JSONDecodeError:\n return {\"content\": content, \"error\": schema_error_msg}\n else:\n return {\"content\": content, \"error\": schema_error_msg}\n\n # If no output schema provided, return parsed JSON without validation\n if not hasattr(self, \"output_schema\") or not self.output_schema or len(self.output_schema) == 0:\n return json_data\n\n # Use BaseModel validation with schema\n try:\n processed_schema = self._preprocess_schema(self.output_schema)\n output_model = build_model_from_schema(processed_schema)\n\n # Validate against the schema\n if isinstance(json_data, list):\n # Multiple objects\n validated_objects = []\n for item in json_data:\n try:\n validated_obj = output_model.model_validate(item)\n validated_objects.append(validated_obj.model_dump())\n except ValidationError as e:\n await logger.aerror(f\"Validation error for item: {e}\")\n # Include invalid items with error info\n validated_objects.append({\"data\": item, \"validation_error\": str(e)})\n return validated_objects\n\n # Single object\n try:\n validated_obj = output_model.model_validate(json_data)\n return [validated_obj.model_dump()] # Return as list for consistency\n except ValidationError as e:\n await logger.aerror(f\"Validation error: {e}\")\n return [{\"data\": json_data, \"validation_error\": str(e)}]\n\n except (TypeError, ValueError) as e:\n await logger.aerror(f\"Error building structured output: {e}\")\n # Fallback to parsed JSON without validation\n return json_data\n\n async def json_response(self) -> Data:\n \"\"\"Convert agent response to structured JSON Data output with schema validation.\"\"\"\n # Always use structured chat agent for JSON response mode for better JSON formatting\n try:\n system_components = []\n\n # 1. Agent Instructions (system_prompt)\n agent_instructions = getattr(self, \"system_prompt\", \"\") or \"\"\n if agent_instructions:\n system_components.append(f\"{agent_instructions}\")\n\n # 2. Format Instructions\n format_instructions = getattr(self, \"format_instructions\", \"\") or \"\"\n if format_instructions:\n system_components.append(f\"Format instructions: {format_instructions}\")\n\n # 3. Schema Information from BaseModel\n if hasattr(self, \"output_schema\") and self.output_schema and len(self.output_schema) > 0:\n try:\n processed_schema = self._preprocess_schema(self.output_schema)\n output_model = build_model_from_schema(processed_schema)\n schema_dict = output_model.model_json_schema()\n schema_info = (\n \"You are given some text that may include format instructions, \"\n \"explanations, or other content alongside a JSON schema.\\n\\n\"\n \"Your task:\\n\"\n \"- Extract only the JSON schema.\\n\"\n \"- Return it as valid JSON.\\n\"\n \"- Do not include format instructions, explanations, or extra text.\\n\\n\"\n \"Input:\\n\"\n f\"{json.dumps(schema_dict, indent=2)}\\n\\n\"\n \"Output (only JSON schema):\"\n )\n system_components.append(schema_info)\n except (ValidationError, ValueError, TypeError, KeyError) as e:\n await logger.aerror(f\"Could not build schema for prompt: {e}\", exc_info=True)\n\n # Combine all components\n combined_instructions = \"\\n\\n\".join(system_components) if system_components else \"\"\n llm_model, self.chat_history, self.tools = await self.get_agent_requirements()\n self.set(\n llm=llm_model,\n tools=self.tools or [],\n chat_history=self.chat_history,\n input_value=self.input_value,\n system_prompt=combined_instructions,\n )\n\n # Create and run structured chat agent\n try:\n structured_agent = self.create_agent_runnable()\n except (NotImplementedError, ValueError, TypeError) as e:\n await logger.aerror(f\"Error with structured chat agent: {e}\")\n raise\n try:\n result = await self.run_agent(structured_agent)\n except (ExceptionWithMessageError, ValueError, TypeError, RuntimeError) as e:\n await logger.aerror(f\"Error with structured agent result: {e}\")\n raise\n # Extract content from structured agent result\n if hasattr(result, \"content\"):\n content = result.content\n elif hasattr(result, \"text\"):\n content = result.text\n else:\n content = str(result)\n\n except (ExceptionWithMessageError, ValueError, TypeError, NotImplementedError, AttributeError) as e:\n await logger.aerror(f\"Error with structured chat agent: {e}\")\n # Fallback to regular agent\n content_str = \"No content returned from agent\"\n return Data(data={\"content\": content_str, \"error\": str(e)})\n\n # Process with structured output validation\n try:\n structured_output = await self.build_structured_output_base(content)\n\n # Handle different output formats\n if isinstance(structured_output, list) and structured_output:\n if len(structured_output) == 1:\n return Data(data=structured_output[0])\n return Data(data={\"results\": structured_output})\n if isinstance(structured_output, dict):\n return Data(data=structured_output)\n return Data(data={\"content\": content})\n\n except (ValueError, TypeError) as e:\n await logger.aerror(f\"Error in structured output processing: {e}\")\n return Data(data={\"content\": content, \"error\": str(e)})\n\n async def get_memory_data(self):\n # TODO: This is a temporary fix to avoid message duplication. We should develop a function for this.\n messages = (\n await MemoryComponent(**self.get_base_args())\n .set(session_id=self.graph.session_id, order=\"Ascending\", n_messages=self.n_messages)\n .retrieve_messages()\n )\n return [\n message for message in messages if getattr(message, \"id\", None) != getattr(self.input_value, \"id\", None)\n ]\n\n async def get_llm(self):\n if not isinstance(self.agent_llm, str):\n return self.agent_llm, None\n\n try:\n provider_info = MODEL_PROVIDERS_DICT.get(self.agent_llm)\n if not provider_info:\n msg = f\"Invalid model provider: {self.agent_llm}\"\n raise ValueError(msg)\n\n component_class = provider_info.get(\"component_class\")\n display_name = component_class.display_name\n inputs = provider_info.get(\"inputs\")\n prefix = provider_info.get(\"prefix\", \"\")\n\n return self._build_llm_model(component_class, inputs, prefix), display_name\n\n except (AttributeError, ValueError, TypeError, RuntimeError) as e:\n await logger.aerror(f\"Error building {self.agent_llm} language model: {e!s}\")\n msg = f\"Failed to initialize language model: {e!s}\"\n raise ValueError(msg) from e\n\n def _build_llm_model(self, component, inputs, prefix=\"\"):\n model_kwargs = {}\n for input_ in inputs:\n if hasattr(self, f\"{prefix}{input_.name}\"):\n model_kwargs[input_.name] = getattr(self, f\"{prefix}{input_.name}\")\n return component.set(**model_kwargs).build_model()\n\n def set_component_params(self, component):\n provider_info = MODEL_PROVIDERS_DICT.get(self.agent_llm)\n if provider_info:\n inputs = provider_info.get(\"inputs\")\n prefix = provider_info.get(\"prefix\")\n # Filter out json_mode and only use attributes that exist on this component\n model_kwargs = {}\n for input_ in inputs:\n if hasattr(self, f\"{prefix}{input_.name}\"):\n model_kwargs[input_.name] = getattr(self, f\"{prefix}{input_.name}\")\n\n return component.set(**model_kwargs)\n return component\n\n def delete_fields(self, build_config: dotdict, fields: dict | list[str]) -> None:\n \"\"\"Delete specified fields from build_config.\"\"\"\n for field in fields:\n build_config.pop(field, None)\n\n def update_input_types(self, build_config: dotdict) -> dotdict:\n \"\"\"Update input types for all fields in build_config.\"\"\"\n for key, value in build_config.items():\n if isinstance(value, dict):\n if value.get(\"input_types\") is None:\n build_config[key][\"input_types\"] = []\n elif hasattr(value, \"input_types\") and value.input_types is None:\n value.input_types = []\n return build_config\n\n async def update_build_config(\n self, build_config: dotdict, field_value: str, field_name: str | None = None\n ) -> dotdict:\n # Iterate over all providers in the MODEL_PROVIDERS_DICT\n # Existing logic for updating build_config\n if field_name in (\"agent_llm\",):\n build_config[\"agent_llm\"][\"value\"] = field_value\n provider_info = MODEL_PROVIDERS_DICT.get(field_value)\n if provider_info:\n component_class = provider_info.get(\"component_class\")\n if component_class and hasattr(component_class, \"update_build_config\"):\n # Call the component class's update_build_config method\n build_config = await update_component_build_config(\n component_class, build_config, field_value, \"model_name\"\n )\n\n provider_configs: dict[str, tuple[dict, list[dict]]] = {\n provider: (\n MODEL_PROVIDERS_DICT[provider][\"fields\"],\n [\n MODEL_PROVIDERS_DICT[other_provider][\"fields\"]\n for other_provider in MODEL_PROVIDERS_DICT\n if other_provider != provider\n ],\n )\n for provider in MODEL_PROVIDERS_DICT\n }\n if field_value in provider_configs:\n fields_to_add, fields_to_delete = provider_configs[field_value]\n\n # Delete fields from other providers\n for fields in fields_to_delete:\n self.delete_fields(build_config, fields)\n\n # Add provider-specific fields\n if field_value == \"OpenAI\" and not any(field in build_config for field in fields_to_add):\n build_config.update(fields_to_add)\n else:\n build_config.update(fields_to_add)\n # Reset input types for agent_llm\n build_config[\"agent_llm\"][\"input_types\"] = []\n build_config[\"agent_llm\"][\"display_name\"] = \"Model Provider\"\n elif field_value == \"Custom\":\n # Delete all provider fields\n self.delete_fields(build_config, ALL_PROVIDER_FIELDS)\n # Update with custom component\n custom_component = DropdownInput(\n name=\"agent_llm\",\n display_name=\"Language Model\",\n options=[*sorted(MODEL_PROVIDERS), \"Custom\"],\n value=\"Custom\",\n real_time_refresh=True,\n input_types=[\"LanguageModel\"],\n options_metadata=[MODELS_METADATA[key] for key in sorted(MODELS_METADATA.keys())]\n + [{\"icon\": \"brain\"}],\n )\n build_config.update({\"agent_llm\": custom_component.to_dict()})\n # Update input types for all fields\n build_config = self.update_input_types(build_config)\n\n # Validate required keys\n default_keys = [\n \"code\",\n \"_type\",\n \"agent_llm\",\n \"tools\",\n \"input_value\",\n \"add_current_date_tool\",\n \"system_prompt\",\n \"agent_description\",\n \"max_iterations\",\n \"handle_parsing_errors\",\n \"verbose\",\n ]\n missing_keys = [key for key in default_keys if key not in build_config]\n if missing_keys:\n msg = f\"Missing required keys in build_config: {missing_keys}\"\n raise ValueError(msg)\n if (\n isinstance(self.agent_llm, str)\n and self.agent_llm in MODEL_PROVIDERS_DICT\n and field_name in MODEL_DYNAMIC_UPDATE_FIELDS\n ):\n provider_info = MODEL_PROVIDERS_DICT.get(self.agent_llm)\n if provider_info:\n component_class = provider_info.get(\"component_class\")\n component_class = self.set_component_params(component_class)\n prefix = provider_info.get(\"prefix\")\n if component_class and hasattr(component_class, \"update_build_config\"):\n # Call each component class's update_build_config method\n # remove the prefix from the field_name\n if isinstance(field_name, str) and isinstance(prefix, str):\n field_name = field_name.replace(prefix, \"\")\n build_config = await update_component_build_config(\n component_class, build_config, field_value, \"model_name\"\n )\n return dotdict({k: v.to_dict() if hasattr(v, \"to_dict\") else v for k, v in build_config.items()})\n\n async def _get_tools(self) -> list[Tool]:\n component_toolkit = _get_component_toolkit()\n tools_names = self._build_tools_names()\n agent_description = self.get_tool_description()\n # TODO: Agent Description Depreciated Feature to be removed\n description = f\"{agent_description}{tools_names}\"\n tools = component_toolkit(component=self).get_tools(\n tool_name=\"Call_Agent\", tool_description=description, callbacks=self.get_langchain_callbacks()\n )\n if hasattr(self, \"tools_metadata\"):\n tools = component_toolkit(component=self, metadata=self.tools_metadata).update_tools_metadata(tools=tools)\n return tools\n"
},
"handle_parsing_errors": {
"_input_type": "BoolInput",
diff --git a/src/backend/base/langflow/initial_setup/starter_projects/Vector Store RAG.json b/src/backend/base/langflow/initial_setup/starter_projects/Vector Store RAG.json
index c3c6571b9..9d4785b1f 100644
--- a/src/backend/base/langflow/initial_setup/starter_projects/Vector Store RAG.json
+++ b/src/backend/base/langflow/initial_setup/starter_projects/Vector Store RAG.json
@@ -7,7 +7,7 @@
"data": {
"sourceHandle": {
"dataType": "ChatInput",
- "id": "ChatInput-et7o5",
+ "id": "ChatInput-insg9",
"name": "message",
"output_types": [
"Message"
@@ -15,7 +15,7 @@
},
"targetHandle": {
"fieldName": "question",
- "id": "Prompt-V3tlJ",
+ "id": "Prompt-wgAUs",
"inputTypes": [
"Message",
"Text"
@@ -23,12 +23,12 @@
"type": "str"
}
},
- "id": "reactflow__edge-ChatInput-et7o5{œdataTypeœ:œChatInputœ,œidœ:œChatInput-et7o5œ,œnameœ:œmessageœ,œoutput_typesœ:[œMessageœ]}-Prompt-V3tlJ{œfieldNameœ:œquestionœ,œidœ:œPrompt-V3tlJœ,œinputTypesœ:[œMessageœ,œTextœ],œtypeœ:œstrœ}",
+ "id": "reactflow__edge-ChatInput-insg9{œdataTypeœ:œChatInputœ,œidœ:œChatInput-insg9œ,œnameœ:œmessageœ,œoutput_typesœ:[œMessageœ]}-Prompt-wgAUs{œfieldNameœ:œquestionœ,œidœ:œPrompt-wgAUsœ,œinputTypesœ:[œMessageœ,œTextœ],œtypeœ:œstrœ}",
"selected": false,
- "source": "ChatInput-et7o5",
- "sourceHandle": "{œdataTypeœ: œChatInputœ, œidœ: œChatInput-et7o5œ, œnameœ: œmessageœ, œoutput_typesœ: [œMessageœ]}",
- "target": "Prompt-V3tlJ",
- "targetHandle": "{œfieldNameœ: œquestionœ, œidœ: œPrompt-V3tlJœ, œinputTypesœ: [œMessageœ, œTextœ], œtypeœ: œstrœ}"
+ "source": "ChatInput-insg9",
+ "sourceHandle": "{œdataTypeœ: œChatInputœ, œidœ: œChatInput-insg9œ, œnameœ: œmessageœ, œoutput_typesœ: [œMessageœ]}",
+ "target": "Prompt-wgAUs",
+ "targetHandle": "{œfieldNameœ: œquestionœ, œidœ: œPrompt-wgAUsœ, œinputTypesœ: [œMessageœ, œTextœ], œtypeœ: œstrœ}"
},
{
"animated": false,
@@ -36,7 +36,7 @@
"data": {
"sourceHandle": {
"dataType": "parser",
- "id": "parser-WUXPk",
+ "id": "parser-gcmKF",
"name": "parsed_text",
"output_types": [
"Message"
@@ -44,7 +44,7 @@
},
"targetHandle": {
"fieldName": "context",
- "id": "Prompt-V3tlJ",
+ "id": "Prompt-wgAUs",
"inputTypes": [
"Message",
"Text"
@@ -52,154 +52,12 @@
"type": "str"
}
},
- "id": "reactflow__edge-parser-WUXPk{œdataTypeœ:œparserœ,œidœ:œparser-WUXPkœ,œnameœ:œparsed_textœ,œoutput_typesœ:[œMessageœ]}-Prompt-V3tlJ{œfieldNameœ:œcontextœ,œidœ:œPrompt-V3tlJœ,œinputTypesœ:[œMessageœ,œTextœ],œtypeœ:œstrœ}",
+ "id": "reactflow__edge-parser-gcmKF{œdataTypeœ:œparserœ,œidœ:œparser-gcmKFœ,œnameœ:œparsed_textœ,œoutput_typesœ:[œMessageœ]}-Prompt-wgAUs{œfieldNameœ:œcontextœ,œidœ:œPrompt-wgAUsœ,œinputTypesœ:[œMessageœ,œTextœ],œtypeœ:œstrœ}",
"selected": false,
- "source": "parser-WUXPk",
- "sourceHandle": "{œdataTypeœ: œparserœ, œidœ: œparser-WUXPkœ, œnameœ: œparsed_textœ, œoutput_typesœ: [œMessageœ]}",
- "target": "Prompt-V3tlJ",
- "targetHandle": "{œfieldNameœ: œcontextœ, œidœ: œPrompt-V3tlJœ, œinputTypesœ: [œMessageœ, œTextœ], œtypeœ: œstrœ}"
- },
- {
- "animated": false,
- "className": "",
- "data": {
- "sourceHandle": {
- "dataType": "OpenAIEmbeddings",
- "id": "OpenAIEmbeddings-oFtHy",
- "name": "embeddings",
- "output_types": [
- "Embeddings"
- ]
- },
- "targetHandle": {
- "fieldName": "embedding_model",
- "id": "AstraDB-W6NB4",
- "inputTypes": [
- "Embeddings"
- ],
- "type": "other"
- }
- },
- "id": "reactflow__edge-OpenAIEmbeddings-oFtHy{œdataTypeœ:œOpenAIEmbeddingsœ,œidœ:œOpenAIEmbeddings-oFtHyœ,œnameœ:œembeddingsœ,œoutput_typesœ:[œEmbeddingsœ]}-AstraDB-W6NB4{œfieldNameœ:œembedding_modelœ,œidœ:œAstraDB-W6NB4œ,œinputTypesœ:[œEmbeddingsœ],œtypeœ:œotherœ}",
- "selected": false,
- "source": "OpenAIEmbeddings-oFtHy",
- "sourceHandle": "{œdataTypeœ: œOpenAIEmbeddingsœ, œidœ: œOpenAIEmbeddings-oFtHyœ, œnameœ: œembeddingsœ, œoutput_typesœ: [œEmbeddingsœ]}",
- "target": "AstraDB-W6NB4",
- "targetHandle": "{œfieldNameœ: œembedding_modelœ, œidœ: œAstraDB-W6NB4œ, œinputTypesœ: [œEmbeddingsœ], œtypeœ: œotherœ}"
- },
- {
- "animated": false,
- "className": "",
- "data": {
- "sourceHandle": {
- "dataType": "OpenAIEmbeddings",
- "id": "OpenAIEmbeddings-v0rcw",
- "name": "embeddings",
- "output_types": [
- "Embeddings"
- ]
- },
- "targetHandle": {
- "fieldName": "embedding_model",
- "id": "AstraDB-JsRrT",
- "inputTypes": [
- "Embeddings"
- ],
- "type": "other"
- }
- },
- "id": "reactflow__edge-OpenAIEmbeddings-v0rcw{œdataTypeœ:œOpenAIEmbeddingsœ,œidœ:œOpenAIEmbeddings-v0rcwœ,œnameœ:œembeddingsœ,œoutput_typesœ:[œEmbeddingsœ]}-AstraDB-JsRrT{œfieldNameœ:œembedding_modelœ,œidœ:œAstraDB-JsRrTœ,œinputTypesœ:[œEmbeddingsœ],œtypeœ:œotherœ}",
- "selected": false,
- "source": "OpenAIEmbeddings-v0rcw",
- "sourceHandle": "{œdataTypeœ: œOpenAIEmbeddingsœ, œidœ: œOpenAIEmbeddings-v0rcwœ, œnameœ: œembeddingsœ, œoutput_typesœ: [œEmbeddingsœ]}",
- "target": "AstraDB-JsRrT",
- "targetHandle": "{œfieldNameœ: œembedding_modelœ, œidœ: œAstraDB-JsRrTœ, œinputTypesœ: [œEmbeddingsœ], œtypeœ: œotherœ}"
- },
- {
- "animated": false,
- "className": "",
- "data": {
- "sourceHandle": {
- "dataType": "ChatInput",
- "id": "ChatInput-et7o5",
- "name": "message",
- "output_types": [
- "Message"
- ]
- },
- "targetHandle": {
- "fieldName": "search_query",
- "id": "AstraDB-JsRrT",
- "inputTypes": [
- "Message"
- ],
- "type": "query"
- }
- },
- "id": "reactflow__edge-ChatInput-et7o5{œdataTypeœ:œChatInputœ,œidœ:œChatInput-et7o5œ,œnameœ:œmessageœ,œoutput_typesœ:[œMessageœ]}-AstraDB-JsRrT{œfieldNameœ:œsearch_queryœ,œidœ:œAstraDB-JsRrTœ,œinputTypesœ:[œMessageœ],œtypeœ:œqueryœ}",
- "selected": false,
- "source": "ChatInput-et7o5",
- "sourceHandle": "{œdataTypeœ: œChatInputœ, œidœ: œChatInput-et7o5œ, œnameœ: œmessageœ, œoutput_typesœ: [œMessageœ]}",
- "target": "AstraDB-JsRrT",
- "targetHandle": "{œfieldNameœ: œsearch_queryœ, œidœ: œAstraDB-JsRrTœ, œinputTypesœ: [œMessageœ], œtypeœ: œqueryœ}"
- },
- {
- "animated": false,
- "className": "",
- "data": {
- "sourceHandle": {
- "dataType": "AstraDB",
- "id": "AstraDB-JsRrT",
- "name": "dataframe",
- "output_types": [
- "DataFrame"
- ]
- },
- "targetHandle": {
- "fieldName": "input_data",
- "id": "parser-WUXPk",
- "inputTypes": [
- "DataFrame",
- "Data"
- ],
- "type": "other"
- }
- },
- "id": "reactflow__edge-AstraDB-JsRrT{œdataTypeœ:œAstraDBœ,œidœ:œAstraDB-JsRrTœ,œnameœ:œdataframeœ,œoutput_typesœ:[œDataFrameœ]}-parser-WUXPk{œfieldNameœ:œinput_dataœ,œidœ:œparser-WUXPkœ,œinputTypesœ:[œDataFrameœ,œDataœ],œtypeœ:œotherœ}",
- "selected": false,
- "source": "AstraDB-JsRrT",
- "sourceHandle": "{œdataTypeœ: œAstraDBœ, œidœ: œAstraDB-JsRrTœ, œnameœ: œdataframeœ, œoutput_typesœ: [œDataFrameœ]}",
- "target": "parser-WUXPk",
- "targetHandle": "{œfieldNameœ: œinput_dataœ, œidœ: œparser-WUXPkœ, œinputTypesœ: [œDataFrameœ, œDataœ], œtypeœ: œotherœ}"
- },
- {
- "animated": false,
- "className": "",
- "data": {
- "sourceHandle": {
- "dataType": "SplitText",
- "id": "SplitText-6H5cD",
- "name": "dataframe",
- "output_types": [
- "DataFrame"
- ]
- },
- "targetHandle": {
- "fieldName": "ingest_data",
- "id": "AstraDB-W6NB4",
- "inputTypes": [
- "Data",
- "DataFrame"
- ],
- "type": "other"
- }
- },
- "id": "reactflow__edge-SplitText-6H5cD{œdataTypeœ:œSplitTextœ,œidœ:œSplitText-6H5cDœ,œnameœ:œdataframeœ,œoutput_typesœ:[œDataFrameœ]}-AstraDB-W6NB4{œfieldNameœ:œingest_dataœ,œidœ:œAstraDB-W6NB4œ,œinputTypesœ:[œDataœ,œDataFrameœ],œtypeœ:œotherœ}",
- "selected": false,
- "source": "SplitText-6H5cD",
- "sourceHandle": "{œdataTypeœ: œSplitTextœ, œidœ: œSplitText-6H5cDœ, œnameœ: œdataframeœ, œoutput_typesœ: [œDataFrameœ]}",
- "target": "AstraDB-W6NB4",
- "targetHandle": "{œfieldNameœ: œingest_dataœ, œidœ: œAstraDB-W6NB4œ, œinputTypesœ: [œDataœ, œDataFrameœ], œtypeœ: œotherœ}"
+ "source": "parser-gcmKF",
+ "sourceHandle": "{œdataTypeœ: œparserœ, œidœ: œparser-gcmKFœ, œnameœ: œparsed_textœ, œoutput_typesœ: [œMessageœ]}",
+ "target": "Prompt-wgAUs",
+ "targetHandle": "{œfieldNameœ: œcontextœ, œidœ: œPrompt-wgAUsœ, œinputTypesœ: [œMessageœ, œTextœ], œtypeœ: œstrœ}"
},
{
"animated": false,
@@ -207,7 +65,7 @@
"data": {
"sourceHandle": {
"dataType": "File",
- "id": "File-vusZ2",
+ "id": "File-wqFzl",
"name": "message",
"output_types": [
"Message"
@@ -215,7 +73,7 @@
},
"targetHandle": {
"fieldName": "data_inputs",
- "id": "SplitText-6H5cD",
+ "id": "SplitText-50nDI",
"inputTypes": [
"Data",
"DataFrame",
@@ -224,12 +82,12 @@
"type": "other"
}
},
- "id": "reactflow__edge-File-vusZ2{œdataTypeœ:œFileœ,œidœ:œFile-vusZ2œ,œnameœ:œmessageœ,œoutput_typesœ:[œMessageœ]}-SplitText-6H5cD{œfieldNameœ:œdata_inputsœ,œidœ:œSplitText-6H5cDœ,œinputTypesœ:[œDataœ,œDataFrameœ,œMessageœ],œtypeœ:œotherœ}",
+ "id": "reactflow__edge-File-wqFzl{œdataTypeœ:œFileœ,œidœ:œFile-wqFzlœ,œnameœ:œmessageœ,œoutput_typesœ:[œMessageœ]}-SplitText-50nDI{œfieldNameœ:œdata_inputsœ,œidœ:œSplitText-50nDIœ,œinputTypesœ:[œDataœ,œDataFrameœ,œMessageœ],œtypeœ:œotherœ}",
"selected": false,
- "source": "File-vusZ2",
- "sourceHandle": "{œdataTypeœ: œFileœ, œidœ: œFile-vusZ2œ, œnameœ: œmessageœ, œoutput_typesœ: [œMessageœ]}",
- "target": "SplitText-6H5cD",
- "targetHandle": "{œfieldNameœ: œdata_inputsœ, œidœ: œSplitText-6H5cDœ, œinputTypesœ: [œDataœ, œDataFrameœ, œMessageœ], œtypeœ: œotherœ}"
+ "source": "File-wqFzl",
+ "sourceHandle": "{œdataTypeœ: œFileœ, œidœ: œFile-wqFzlœ, œnameœ: œmessageœ, œoutput_typesœ: [œMessageœ]}",
+ "target": "SplitText-50nDI",
+ "targetHandle": "{œfieldNameœ: œdata_inputsœ, œidœ: œSplitText-50nDIœ, œinputTypesœ: [œDataœ, œDataFrameœ, œMessageœ], œtypeœ: œotherœ}"
},
{
"animated": false,
@@ -237,7 +95,7 @@
"data": {
"sourceHandle": {
"dataType": "Prompt",
- "id": "Prompt-V3tlJ",
+ "id": "Prompt-wgAUs",
"name": "prompt",
"output_types": [
"Message"
@@ -245,19 +103,19 @@
},
"targetHandle": {
"fieldName": "input_value",
- "id": "LanguageModelComponent-1uhUK",
+ "id": "LanguageModelComponent-nQYc0",
"inputTypes": [
"Message"
],
"type": "str"
}
},
- "id": "reactflow__edge-Prompt-V3tlJ{œdataTypeœ:œPromptœ,œidœ:œPrompt-V3tlJœ,œnameœ:œpromptœ,œoutput_typesœ:[œMessageœ]}-LanguageModelComponent-1uhUK{œfieldNameœ:œinput_valueœ,œidœ:œLanguageModelComponent-1uhUKœ,œinputTypesœ:[œMessageœ],œtypeœ:œstrœ}",
+ "id": "reactflow__edge-Prompt-wgAUs{œdataTypeœ:œPromptœ,œidœ:œPrompt-wgAUsœ,œnameœ:œpromptœ,œoutput_typesœ:[œMessageœ]}-LanguageModelComponent-nQYc0{œfieldNameœ:œinput_valueœ,œidœ:œLanguageModelComponent-nQYc0œ,œinputTypesœ:[œMessageœ],œtypeœ:œstrœ}",
"selected": false,
- "source": "Prompt-V3tlJ",
- "sourceHandle": "{œdataTypeœ: œPromptœ, œidœ: œPrompt-V3tlJœ, œnameœ: œpromptœ, œoutput_typesœ: [œMessageœ]}",
- "target": "LanguageModelComponent-1uhUK",
- "targetHandle": "{œfieldNameœ: œinput_valueœ, œidœ: œLanguageModelComponent-1uhUKœ, œinputTypesœ: [œMessageœ], œtypeœ: œstrœ}"
+ "source": "Prompt-wgAUs",
+ "sourceHandle": "{œdataTypeœ: œPromptœ, œidœ: œPrompt-wgAUsœ, œnameœ: œpromptœ, œoutput_typesœ: [œMessageœ]}",
+ "target": "LanguageModelComponent-nQYc0",
+ "targetHandle": "{œfieldNameœ: œinput_valueœ, œidœ: œLanguageModelComponent-nQYc0œ, œinputTypesœ: [œMessageœ], œtypeœ: œstrœ}"
},
{
"animated": false,
@@ -265,7 +123,7 @@
"data": {
"sourceHandle": {
"dataType": "LanguageModelComponent",
- "id": "LanguageModelComponent-1uhUK",
+ "id": "LanguageModelComponent-nQYc0",
"name": "text_output",
"output_types": [
"Message"
@@ -273,7 +131,7 @@
},
"targetHandle": {
"fieldName": "input_value",
- "id": "ChatOutput-ZaYDW",
+ "id": "ChatOutput-VG394",
"inputTypes": [
"Data",
"DataFrame",
@@ -282,12 +140,94 @@
"type": "str"
}
},
- "id": "reactflow__edge-LanguageModelComponent-1uhUK{œdataTypeœ:œLanguageModelComponentœ,œidœ:œLanguageModelComponent-1uhUKœ,œnameœ:œtext_outputœ,œoutput_typesœ:[œMessageœ]}-ChatOutput-ZaYDW{œfieldNameœ:œinput_valueœ,œidœ:œChatOutput-ZaYDWœ,œinputTypesœ:[œDataœ,œDataFrameœ,œMessageœ],œtypeœ:œstrœ}",
+ "id": "reactflow__edge-LanguageModelComponent-nQYc0{œdataTypeœ:œLanguageModelComponentœ,œidœ:œLanguageModelComponent-nQYc0œ,œnameœ:œtext_outputœ,œoutput_typesœ:[œMessageœ]}-ChatOutput-VG394{œfieldNameœ:œinput_valueœ,œidœ:œChatOutput-VG394œ,œinputTypesœ:[œDataœ,œDataFrameœ,œMessageœ],œtypeœ:œstrœ}",
"selected": false,
- "source": "LanguageModelComponent-1uhUK",
- "sourceHandle": "{œdataTypeœ: œLanguageModelComponentœ, œidœ: œLanguageModelComponent-1uhUKœ, œnameœ: œtext_outputœ, œoutput_typesœ: [œMessageœ]}",
- "target": "ChatOutput-ZaYDW",
- "targetHandle": "{œfieldNameœ: œinput_valueœ, œidœ: œChatOutput-ZaYDWœ, œinputTypesœ: [œDataœ, œDataFrameœ, œMessageœ], œtypeœ: œstrœ}"
+ "source": "LanguageModelComponent-nQYc0",
+ "sourceHandle": "{œdataTypeœ: œLanguageModelComponentœ, œidœ: œLanguageModelComponent-nQYc0œ, œnameœ: œtext_outputœ, œoutput_typesœ: [œMessageœ]}",
+ "target": "ChatOutput-VG394",
+ "targetHandle": "{œfieldNameœ: œinput_valueœ, œidœ: œChatOutput-VG394œ, œinputTypesœ: [œDataœ, œDataFrameœ, œMessageœ], œtypeœ: œstrœ}"
+ },
+ {
+ "animated": false,
+ "className": "",
+ "data": {
+ "sourceHandle": {
+ "dataType": "SplitText",
+ "id": "SplitText-50nDI",
+ "name": "dataframe",
+ "output_types": [
+ "DataFrame"
+ ]
+ },
+ "targetHandle": {
+ "fieldName": "ingest_data",
+ "id": "AstraDB-t8lcj",
+ "inputTypes": [
+ "Data",
+ "DataFrame"
+ ],
+ "type": "other"
+ }
+ },
+ "id": "reactflow__edge-SplitText-50nDI{œdataTypeœ:œSplitTextœ,œidœ:œSplitText-50nDIœ,œnameœ:œdataframeœ,œoutput_typesœ:[œDataFrameœ]}-AstraDB-t8lcj{œfieldNameœ:œingest_dataœ,œidœ:œAstraDB-t8lcjœ,œinputTypesœ:[œDataœ,œDataFrameœ],œtypeœ:œotherœ}",
+ "selected": false,
+ "source": "SplitText-50nDI",
+ "sourceHandle": "{œdataTypeœ: œSplitTextœ, œidœ: œSplitText-50nDIœ, œnameœ: œdataframeœ, œoutput_typesœ: [œDataFrameœ]}",
+ "target": "AstraDB-t8lcj",
+ "targetHandle": "{œfieldNameœ: œingest_dataœ, œidœ: œAstraDB-t8lcjœ, œinputTypesœ: [œDataœ, œDataFrameœ], œtypeœ: œotherœ}"
+ },
+ {
+ "className": "",
+ "data": {
+ "sourceHandle": {
+ "dataType": "ChatInput",
+ "id": "ChatInput-insg9",
+ "name": "message",
+ "output_types": [
+ "Message"
+ ]
+ },
+ "targetHandle": {
+ "fieldName": "search_query",
+ "id": "AstraDB-CLCyc",
+ "inputTypes": [
+ "Message"
+ ],
+ "type": "query"
+ }
+ },
+ "id": "xy-edge__ChatInput-insg9{œdataTypeœ:œChatInputœ,œidœ:œChatInput-insg9œ,œnameœ:œmessageœ,œoutput_typesœ:[œMessageœ]}-AstraDB-CLCyc{œfieldNameœ:œsearch_queryœ,œidœ:œAstraDB-CLCycœ,œinputTypesœ:[œMessageœ],œtypeœ:œqueryœ}",
+ "source": "ChatInput-insg9",
+ "sourceHandle": "{œdataTypeœ: œChatInputœ, œidœ: œChatInput-insg9œ, œnameœ: œmessageœ, œoutput_typesœ: [œMessageœ]}",
+ "target": "AstraDB-CLCyc",
+ "targetHandle": "{œfieldNameœ: œsearch_queryœ, œidœ: œAstraDB-CLCycœ, œinputTypesœ: [œMessageœ], œtypeœ: œqueryœ}"
+ },
+ {
+ "className": "",
+ "data": {
+ "sourceHandle": {
+ "dataType": "AstraDB",
+ "id": "AstraDB-CLCyc",
+ "name": "dataframe",
+ "output_types": [
+ "DataFrame"
+ ]
+ },
+ "targetHandle": {
+ "fieldName": "input_data",
+ "id": "parser-gcmKF",
+ "inputTypes": [
+ "DataFrame",
+ "Data"
+ ],
+ "type": "other"
+ }
+ },
+ "id": "xy-edge__AstraDB-CLCyc{œdataTypeœ:œAstraDBœ,œidœ:œAstraDB-CLCycœ,œnameœ:œdataframeœ,œoutput_typesœ:[œDataFrameœ]}-parser-gcmKF{œfieldNameœ:œinput_dataœ,œidœ:œparser-gcmKFœ,œinputTypesœ:[œDataFrameœ,œDataœ],œtypeœ:œotherœ}",
+ "source": "AstraDB-CLCyc",
+ "sourceHandle": "{œdataTypeœ: œAstraDBœ, œidœ: œAstraDB-CLCycœ, œnameœ: œdataframeœ, œoutput_typesœ: [œDataFrameœ]}",
+ "target": "parser-gcmKF",
+ "targetHandle": "{œfieldNameœ: œinput_dataœ, œidœ: œparser-gcmKFœ, œinputTypesœ: [œDataFrameœ, œDataœ], œtypeœ: œotherœ}"
}
],
"nodes": [
@@ -295,7 +235,7 @@
"data": {
"description": "Get chat inputs from the Playground.",
"display_name": "Chat Input",
- "id": "ChatInput-et7o5",
+ "id": "ChatInput-insg9",
"node": {
"base_classes": [
"Message"
@@ -574,7 +514,7 @@
},
"dragging": false,
"height": 234,
- "id": "ChatInput-et7o5",
+ "id": "ChatInput-insg9",
"measured": {
"height": 234,
"width": 320
@@ -595,7 +535,7 @@
"data": {
"description": "Create a prompt template with dynamic variables.",
"display_name": "Prompt",
- "id": "Prompt-V3tlJ",
+ "id": "Prompt-wgAUs",
"node": {
"base_classes": [
"Message"
@@ -759,7 +699,7 @@
},
"dragging": false,
"height": 433,
- "id": "Prompt-V3tlJ",
+ "id": "Prompt-wgAUs",
"measured": {
"height": 433,
"width": 320
@@ -780,7 +720,7 @@
"data": {
"description": "Split text into chunks based on specified criteria.",
"display_name": "Split Text",
- "id": "SplitText-6H5cD",
+ "id": "SplitText-50nDI",
"node": {
"base_classes": [
"Data"
@@ -982,7 +922,7 @@
},
"dragging": false,
"height": 475,
- "id": "SplitText-6H5cD",
+ "id": "SplitText-50nDI",
"measured": {
"height": 475,
"width": 320
@@ -1001,7 +941,7 @@
},
{
"data": {
- "id": "note-rR7dl",
+ "id": "note-0MDGm",
"node": {
"description": "## 🐕 2. Retriever Flow\n\nThis flow answers your questions with contextual data retrieved from your vector database.\n\nOpen the **Playground** and ask, \n\n```\nWhat is this document about?\n```\n",
"display_name": "",
@@ -1014,7 +954,7 @@
},
"dragging": false,
"height": 324,
- "id": "note-rR7dl",
+ "id": "note-0MDGm",
"measured": {
"height": 324,
"width": 324
@@ -1038,7 +978,7 @@
},
{
"data": {
- "id": "note-zTYux",
+ "id": "note-mSH4A",
"node": {
"description": "Retrieval Augmented Generation (RAG) is a way of providing additional context to a Large Language Model (LLM) by preloading a vector database with embeddings for relevant content. When a user chats with the LLM, a _similarity search_ retrieves relevant content by comparing an embedding for the user's query against the embeddings in the vector database.\nFor example, a RAG chatbot could be pre-loaded with product data, and then it can help customers find specific products based on their queries.\nThis template has two sub-flows. One flow loads data into your vector store, and the other is the user-driven chat flow that compares a new query against the existing content in your vector database.\n\n## Quickstart\n1. Add your OpenAI API key to the **Language Model** component and the two **Embeddings** components.\n2. Add an Astra application token to the **Astra DB** vector store components, or replace these components with other vector store components available in the **Components** menu.\n**💡 Store your credentials as Langflow global variables 🌐 to simplify token management and reuse in your flows.**\n\n## Run the flows\n1. Load your data into a vector database with the 📚 **Load Data** flow. Select a file to upload in the **File** component, and then click **Play** ▶️ on the **Astra DB** component to run the **Load Data** flow.\n2. Open the **Playground** to start a chat with the 🐕 **Retriever** flow.\n\nOnly the run the **Load Data** flow when you need to populate your vector database with baseline content, such as product data.\nThe **Retriever** flow is the user-facing chat flow. This flow generates an embedding from chat input, runs a similarity search against the vector database to retrieve relevant content, and then passes the original query and the retrieved content to the LLM, which produces the chat response sent to the user.\n\n## Next steps\nExperiment by changing the prompt and the loaded data to see how the LLM's responses change.",
"display_name": "Read Me",
@@ -1051,7 +991,7 @@
},
"dragging": false,
"height": 556,
- "id": "note-zTYux",
+ "id": "note-mSH4A",
"measured": {
"height": 556,
"width": 389
@@ -1077,7 +1017,7 @@
"data": {
"description": "Display a chat message in the Playground.",
"display_name": "Chat Output",
- "id": "ChatOutput-ZaYDW",
+ "id": "ChatOutput-VG394",
"node": {
"base_classes": [
"Message"
@@ -1380,7 +1320,7 @@
},
"dragging": false,
"height": 234,
- "id": "ChatOutput-ZaYDW",
+ "id": "ChatOutput-VG394",
"measured": {
"height": 234,
"width": 320
@@ -1399,7 +1339,7 @@
},
{
"data": {
- "id": "OpenAIEmbeddings-v0rcw",
+ "id": "OpenAIEmbeddings-y2ymc",
"node": {
"base_classes": [
"Embeddings"
@@ -1892,7 +1832,7 @@
},
"dragging": false,
"height": 320,
- "id": "OpenAIEmbeddings-v0rcw",
+ "id": "OpenAIEmbeddings-y2ymc",
"measured": {
"height": 320,
"width": 320
@@ -1911,7 +1851,7 @@
},
{
"data": {
- "id": "note-i07aq",
+ "id": "note-8ieVo",
"node": {
"description": "## 📚 1. Load Data Flow\n\nRun this first! Load data from a local file and embed it into the vector database.\n\nSelect a Database and a Collection, or create new ones. \n\nClick **Run component** on the **Astra DB** component to load your data.\n\n\n### Next steps:\n Experiment by changing the prompt and the contextual data to see how the retrieval flow's responses change.",
"display_name": "",
@@ -1924,7 +1864,7 @@
},
"dragging": false,
"height": 460,
- "id": "note-i07aq",
+ "id": "note-8ieVo",
"measured": {
"height": 460,
"width": 340
@@ -1948,7 +1888,7 @@
},
{
"data": {
- "id": "OpenAIEmbeddings-oFtHy",
+ "id": "OpenAIEmbeddings-cusQX",
"node": {
"base_classes": [
"Embeddings"
@@ -2441,7 +2381,7 @@
},
"dragging": false,
"height": 320,
- "id": "OpenAIEmbeddings-oFtHy",
+ "id": "OpenAIEmbeddings-cusQX",
"measured": {
"height": 320,
"width": 320
@@ -2460,7 +2400,7 @@
},
{
"data": {
- "id": "note-YhXtV",
+ "id": "note-Sk1It",
"node": {
"description": "### 💡 Add your OpenAI API key here 👇",
"display_name": "",
@@ -2473,7 +2413,7 @@
},
"dragging": false,
"height": 324,
- "id": "note-YhXtV",
+ "id": "note-Sk1It",
"measured": {
"height": 324,
"width": 324
@@ -2492,7 +2432,7 @@
},
{
"data": {
- "id": "note-AdydJ",
+ "id": "note-qmLAt",
"node": {
"description": "### 💡 Add your OpenAI API key here 👇",
"display_name": "",
@@ -2505,7 +2445,7 @@
},
"dragging": false,
"height": 324,
- "id": "note-AdydJ",
+ "id": "note-qmLAt",
"measured": {
"height": 324,
"width": 324
@@ -2524,7 +2464,7 @@
},
{
"data": {
- "id": "note-aBKhj",
+ "id": "note-UCSuM",
"node": {
"description": "### 💡 Add your OpenAI API key here 👇",
"display_name": "",
@@ -2537,7 +2477,7 @@
},
"dragging": false,
"height": 324,
- "id": "note-aBKhj",
+ "id": "note-UCSuM",
"measured": {
"height": 324,
"width": 324
@@ -2556,7 +2496,7 @@
},
{
"data": {
- "id": "parser-WUXPk",
+ "id": "parser-gcmKF",
"node": {
"base_classes": [
"Message"
@@ -2718,7 +2658,7 @@
"type": "parser"
},
"dragging": false,
- "id": "parser-WUXPk",
+ "id": "parser-gcmKF",
"measured": {
"height": 361,
"width": 320
@@ -2732,1600 +2672,7 @@
},
{
"data": {
- "id": "AstraDB-JsRrT",
- "node": {
- "base_classes": [
- "Data",
- "DataFrame",
- "VectorStore"
- ],
- "beta": false,
- "conditional_paths": [],
- "custom_fields": {},
- "description": "Ingest and search documents in Astra DB",
- "display_name": "Astra DB",
- "documentation": "https://docs.datastax.com/en/langflow/astra-components.html",
- "edited": false,
- "field_order": [
- "token",
- "environment",
- "database_name",
- "api_endpoint",
- "keyspace",
- "collection_name",
- "embedding_model",
- "ingest_data",
- "search_query",
- "should_cache_vector_store",
- "search_method",
- "reranker",
- "lexical_terms",
- "number_of_results",
- "search_type",
- "search_score_threshold",
- "advanced_search_filter",
- "autodetect_collection",
- "content_field",
- "deletion_field",
- "ignore_invalid_documents",
- "astradb_vectorstore_kwargs"
- ],
- "frozen": false,
- "icon": "AstraDB",
- "legacy": false,
- "metadata": {
- "code_hash": "23fbe9daca09",
- "dependencies": {
- "dependencies": [
- {
- "name": "astrapy",
- "version": "2.0.1"
- },
- {
- "name": "langchain_astradb",
- "version": "0.6.0"
- },
- {
- "name": "langchain_core",
- "version": "0.3.75"
- },
- {
- "name": "langflow",
- "version": null
- }
- ],
- "total_dependencies": 4
- },
- "module": "langflow.components.datastax.astradb.AstraDBVectorStoreComponent"
- },
- "minimized": false,
- "output_types": [],
- "outputs": [
- {
- "allows_loop": false,
- "cache": true,
- "display_name": "Search Results",
- "group_outputs": false,
- "method": "search_documents",
- "name": "search_results",
- "selected": "Data",
- "tool_mode": true,
- "types": [
- "Data"
- ],
- "value": "__UNDEFINED__"
- },
- {
- "allows_loop": false,
- "cache": true,
- "display_name": "DataFrame",
- "group_outputs": false,
- "method": "as_dataframe",
- "name": "dataframe",
- "selected": "DataFrame",
- "tool_mode": true,
- "types": [
- "DataFrame"
- ],
- "value": "__UNDEFINED__"
- },
- {
- "allows_loop": false,
- "cache": true,
- "display_name": "Vector Store Connection",
- "group_outputs": false,
- "hidden": true,
- "method": "as_vector_store",
- "name": "vectorstoreconnection",
- "selected": "VectorStore",
- "tool_mode": true,
- "types": [
- "VectorStore"
- ],
- "value": "__UNDEFINED__"
- }
- ],
- "pinned": false,
- "selected_output": "dataframe",
- "template": {
- "_type": "Component",
- "advanced_search_filter": {
- "_input_type": "NestedDictInput",
- "advanced": true,
- "display_name": "Search Metadata Filter",
- "dynamic": false,
- "info": "Optional dictionary of filters to apply to the search query.",
- "list": false,
- "list_add_label": "Add More",
- "name": "advanced_search_filter",
- "placeholder": "",
- "required": false,
- "show": true,
- "title_case": false,
- "tool_mode": false,
- "trace_as_input": true,
- "trace_as_metadata": true,
- "type": "NestedDict",
- "value": {}
- },
- "api_endpoint": {
- "_input_type": "StrInput",
- "advanced": false,
- "display_name": "Astra DB API Endpoint",
- "dynamic": false,
- "info": "The API Endpoint for the Astra DB instance. Supercedes database selection.",
- "list": false,
- "list_add_label": "Add More",
- "load_from_db": false,
- "name": "api_endpoint",
- "placeholder": "",
- "required": false,
- "show": false,
- "title_case": false,
- "tool_mode": false,
- "trace_as_metadata": true,
- "type": "str",
- "value": ""
- },
- "astradb_vectorstore_kwargs": {
- "_input_type": "NestedDictInput",
- "advanced": true,
- "display_name": "AstraDBVectorStore Parameters",
- "dynamic": false,
- "info": "Optional dictionary of additional parameters for the AstraDBVectorStore.",
- "list": false,
- "list_add_label": "Add More",
- "name": "astradb_vectorstore_kwargs",
- "placeholder": "",
- "required": false,
- "show": true,
- "title_case": false,
- "tool_mode": false,
- "trace_as_input": true,
- "trace_as_metadata": true,
- "type": "NestedDict",
- "value": {}
- },
- "autodetect_collection": {
- "_input_type": "BoolInput",
- "advanced": true,
- "display_name": "Autodetect Collection",
- "dynamic": false,
- "info": "Boolean flag to determine whether to autodetect the collection.",
- "list": false,
- "list_add_label": "Add More",
- "name": "autodetect_collection",
- "placeholder": "",
- "required": false,
- "show": true,
- "title_case": false,
- "tool_mode": false,
- "trace_as_metadata": true,
- "type": "bool",
- "value": true
- },
- "code": {
- "advanced": true,
- "dynamic": true,
- "fileTypes": [],
- "file_path": "",
- "info": "",
- "list": false,
- "load_from_db": false,
- "multiline": true,
- "name": "code",
- "password": false,
- "placeholder": "",
- "required": true,
- "show": true,
- "title_case": false,
- "type": "code",
- "value": "import re\nfrom collections import defaultdict\nfrom dataclasses import asdict, dataclass, field\n\nfrom astrapy import DataAPIClient, Database\nfrom astrapy.data.info.reranking import RerankServiceOptions\nfrom astrapy.info import CollectionDescriptor, CollectionLexicalOptions, CollectionRerankOptions\nfrom langchain_astradb import AstraDBVectorStore, VectorServiceOptions\nfrom langchain_astradb.utils.astradb import HybridSearchMode, _AstraDBCollectionEnvironment\nfrom langchain_core.documents import Document\n\nfrom langflow.base.vectorstores.model import LCVectorStoreComponent, check_cached_vector_store\nfrom langflow.base.vectorstores.vector_store_connection_decorator import vector_store_connection\nfrom langflow.helpers.data import docs_to_data\nfrom langflow.inputs.inputs import FloatInput, NestedDictInput\nfrom langflow.io import (\n BoolInput,\n DropdownInput,\n HandleInput,\n IntInput,\n QueryInput,\n SecretStrInput,\n StrInput,\n)\nfrom langflow.schema.data import Data\nfrom langflow.serialization import serialize\nfrom langflow.utils.version import get_version_info\n\n\n@vector_store_connection\nclass AstraDBVectorStoreComponent(LCVectorStoreComponent):\n display_name: str = \"Astra DB\"\n description: str = \"Ingest and search documents in Astra DB\"\n documentation: str = \"https://docs.datastax.com/en/langflow/astra-components.html\"\n name = \"AstraDB\"\n icon: str = \"AstraDB\"\n\n _cached_vector_store: AstraDBVectorStore | None = None\n\n @dataclass\n class NewDatabaseInput:\n functionality: str = \"create\"\n fields: dict[str, dict] = field(\n default_factory=lambda: {\n \"data\": {\n \"node\": {\n \"name\": \"create_database\",\n \"description\": \"Please allow several minutes for creation to complete.\",\n \"display_name\": \"Create new database\",\n \"field_order\": [\"01_new_database_name\", \"02_cloud_provider\", \"03_region\"],\n \"template\": {\n \"01_new_database_name\": StrInput(\n name=\"new_database_name\",\n display_name=\"Name\",\n info=\"Name of the new database to create in Astra DB.\",\n required=True,\n ),\n \"02_cloud_provider\": DropdownInput(\n name=\"cloud_provider\",\n display_name=\"Cloud provider\",\n info=\"Cloud provider for the new database.\",\n options=[],\n required=True,\n real_time_refresh=True,\n ),\n \"03_region\": DropdownInput(\n name=\"region\",\n display_name=\"Region\",\n info=\"Region for the new database.\",\n options=[],\n required=True,\n ),\n },\n },\n }\n }\n )\n\n @dataclass\n class NewCollectionInput:\n functionality: str = \"create\"\n fields: dict[str, dict] = field(\n default_factory=lambda: {\n \"data\": {\n \"node\": {\n \"name\": \"create_collection\",\n \"description\": \"Please allow several seconds for creation to complete.\",\n \"display_name\": \"Create new collection\",\n \"field_order\": [\n \"01_new_collection_name\",\n \"02_embedding_generation_provider\",\n \"03_embedding_generation_model\",\n \"04_dimension\",\n ],\n \"template\": {\n \"01_new_collection_name\": StrInput(\n name=\"new_collection_name\",\n display_name=\"Name\",\n info=\"Name of the new collection to create in Astra DB.\",\n required=True,\n ),\n \"02_embedding_generation_provider\": DropdownInput(\n name=\"embedding_generation_provider\",\n display_name=\"Embedding generation method\",\n info=\"Provider to use for generating embeddings.\",\n helper_text=(\n \"To create collections with more embedding provider options, go to \"\n 'your database in Astra DB'\n ),\n real_time_refresh=True,\n required=True,\n options=[],\n ),\n \"03_embedding_generation_model\": DropdownInput(\n name=\"embedding_generation_model\",\n display_name=\"Embedding model\",\n info=\"Model to use for generating embeddings.\",\n real_time_refresh=True,\n options=[],\n ),\n \"04_dimension\": IntInput(\n name=\"dimension\",\n display_name=\"Dimensions\",\n info=\"Dimensions of the embeddings to generate.\",\n value=None,\n ),\n },\n },\n }\n }\n )\n\n inputs = [\n SecretStrInput(\n name=\"token\",\n display_name=\"Astra DB Application Token\",\n info=\"Authentication token for accessing Astra DB.\",\n value=\"ASTRA_DB_APPLICATION_TOKEN\",\n required=True,\n real_time_refresh=True,\n input_types=[],\n ),\n DropdownInput(\n name=\"environment\",\n display_name=\"Environment\",\n info=\"The environment for the Astra DB API Endpoint.\",\n options=[\"prod\", \"test\", \"dev\"],\n value=\"prod\",\n advanced=True,\n real_time_refresh=True,\n combobox=True,\n ),\n DropdownInput(\n name=\"database_name\",\n display_name=\"Database\",\n info=\"The Database name for the Astra DB instance.\",\n required=True,\n refresh_button=True,\n real_time_refresh=True,\n dialog_inputs=asdict(NewDatabaseInput()),\n combobox=True,\n ),\n DropdownInput(\n name=\"api_endpoint\",\n display_name=\"Astra DB API Endpoint\",\n info=\"The API Endpoint for the Astra DB instance. Supercedes database selection.\",\n advanced=True,\n ),\n DropdownInput(\n name=\"keyspace\",\n display_name=\"Keyspace\",\n info=\"Optional keyspace within Astra DB to use for the collection.\",\n advanced=True,\n options=[],\n real_time_refresh=True,\n ),\n DropdownInput(\n name=\"collection_name\",\n display_name=\"Collection\",\n info=\"The name of the collection within Astra DB where the vectors will be stored.\",\n required=True,\n refresh_button=True,\n real_time_refresh=True,\n dialog_inputs=asdict(NewCollectionInput()),\n combobox=True,\n show=False,\n ),\n HandleInput(\n name=\"embedding_model\",\n display_name=\"Embedding Model\",\n input_types=[\"Embeddings\"],\n info=\"Specify the Embedding Model. Not required for Astra Vectorize collections.\",\n required=False,\n show=False,\n ),\n *LCVectorStoreComponent.inputs,\n DropdownInput(\n name=\"search_method\",\n display_name=\"Search Method\",\n info=(\n \"Determine how your content is matched: Vector finds semantic similarity, \"\n \"and Hybrid Search (suggested) combines both approaches \"\n \"with a reranker.\"\n ),\n options=[\"Hybrid Search\", \"Vector Search\"], # TODO: Restore Lexical Search?\n options_metadata=[{\"icon\": \"SearchHybrid\"}, {\"icon\": \"SearchVector\"}],\n value=\"Vector Search\",\n advanced=True,\n real_time_refresh=True,\n ),\n DropdownInput(\n name=\"reranker\",\n display_name=\"Reranker\",\n info=\"Post-retrieval model that re-scores results for optimal relevance ranking.\",\n show=False,\n toggle=True,\n ),\n QueryInput(\n name=\"lexical_terms\",\n display_name=\"Lexical Terms\",\n info=\"Add additional terms/keywords to augment search precision.\",\n placeholder=\"Enter terms to search...\",\n separator=\" \",\n show=False,\n value=\"\",\n ),\n IntInput(\n name=\"number_of_results\",\n display_name=\"Number of Search Results\",\n info=\"Number of search results to return.\",\n advanced=True,\n value=4,\n ),\n DropdownInput(\n name=\"search_type\",\n display_name=\"Search Type\",\n info=\"Search type to use\",\n options=[\"Similarity\", \"Similarity with score threshold\", \"MMR (Max Marginal Relevance)\"],\n value=\"Similarity\",\n advanced=True,\n ),\n FloatInput(\n name=\"search_score_threshold\",\n display_name=\"Search Score Threshold\",\n info=\"Minimum similarity score threshold for search results. \"\n \"(when using 'Similarity with score threshold')\",\n value=0,\n advanced=True,\n ),\n NestedDictInput(\n name=\"advanced_search_filter\",\n display_name=\"Search Metadata Filter\",\n info=\"Optional dictionary of filters to apply to the search query.\",\n advanced=True,\n ),\n BoolInput(\n name=\"autodetect_collection\",\n display_name=\"Autodetect Collection\",\n info=\"Boolean flag to determine whether to autodetect the collection.\",\n advanced=True,\n value=True,\n ),\n StrInput(\n name=\"content_field\",\n display_name=\"Content Field\",\n info=\"Field to use as the text content field for the vector store.\",\n advanced=True,\n ),\n StrInput(\n name=\"deletion_field\",\n display_name=\"Deletion Based On Field\",\n info=\"When this parameter is provided, documents in the target collection with \"\n \"metadata field values matching the input metadata field value will be deleted \"\n \"before new data is loaded.\",\n advanced=True,\n ),\n BoolInput(\n name=\"ignore_invalid_documents\",\n display_name=\"Ignore Invalid Documents\",\n info=\"Boolean flag to determine whether to ignore invalid documents at runtime.\",\n advanced=True,\n ),\n NestedDictInput(\n name=\"astradb_vectorstore_kwargs\",\n display_name=\"AstraDBVectorStore Parameters\",\n info=\"Optional dictionary of additional parameters for the AstraDBVectorStore.\",\n advanced=True,\n ),\n ]\n\n @classmethod\n def map_cloud_providers(cls):\n # TODO: Programmatically fetch the regions for each cloud provider\n return {\n \"dev\": {\n \"Amazon Web Services\": {\n \"id\": \"aws\",\n \"regions\": [\"us-west-2\"],\n },\n \"Google Cloud Platform\": {\n \"id\": \"gcp\",\n \"regions\": [\"us-central1\", \"europe-west4\"],\n },\n },\n \"test\": {\n \"Google Cloud Platform\": {\n \"id\": \"gcp\",\n \"regions\": [\"us-central1\"],\n },\n },\n \"prod\": {\n \"Amazon Web Services\": {\n \"id\": \"aws\",\n \"regions\": [\"us-east-2\", \"ap-south-1\", \"eu-west-1\"],\n },\n \"Google Cloud Platform\": {\n \"id\": \"gcp\",\n \"regions\": [\"us-east1\"],\n },\n \"Microsoft Azure\": {\n \"id\": \"azure\",\n \"regions\": [\"westus3\"],\n },\n },\n }\n\n @classmethod\n def get_vectorize_providers(cls, token: str, environment: str | None = None, api_endpoint: str | None = None):\n try:\n # Get the admin object\n client = DataAPIClient(environment=environment)\n admin_client = client.get_admin()\n db_admin = admin_client.get_database_admin(api_endpoint, token=token)\n\n # Get the list of embedding providers\n embedding_providers = db_admin.find_embedding_providers()\n\n vectorize_providers_mapping = {}\n # Map the provider display name to the provider key and models\n for provider_key, provider_data in embedding_providers.embedding_providers.items():\n # Get the provider display name and models\n display_name = provider_data.display_name\n models = [model.name for model in provider_data.models]\n\n # Build our mapping\n vectorize_providers_mapping[display_name] = [provider_key, models]\n\n # Sort the resulting dictionary\n return defaultdict(list, dict(sorted(vectorize_providers_mapping.items())))\n except Exception as _: # noqa: BLE001\n return {}\n\n @classmethod\n async def create_database_api(\n cls,\n new_database_name: str,\n cloud_provider: str,\n region: str,\n token: str,\n environment: str | None = None,\n keyspace: str | None = None,\n ):\n client = DataAPIClient(environment=environment)\n\n # Get the admin object\n admin_client = client.get_admin(token=token)\n\n # Get the environment, set to prod if null like\n my_env = environment or \"prod\"\n\n # Raise a value error if name isn't provided\n if not new_database_name:\n msg = \"Database name is required to create a new database.\"\n raise ValueError(msg)\n\n # Call the create database function\n return await admin_client.async_create_database(\n name=new_database_name,\n cloud_provider=cls.map_cloud_providers()[my_env][cloud_provider][\"id\"],\n region=region,\n keyspace=keyspace,\n wait_until_active=False,\n )\n\n @classmethod\n async def create_collection_api(\n cls,\n new_collection_name: str,\n token: str,\n api_endpoint: str,\n environment: str | None = None,\n keyspace: str | None = None,\n dimension: int | None = None,\n embedding_generation_provider: str | None = None,\n embedding_generation_model: str | None = None,\n reranker: str | None = None,\n ):\n # Build vectorize options, if needed\n vectorize_options = None\n if not dimension:\n providers = cls.get_vectorize_providers(token=token, environment=environment, api_endpoint=api_endpoint)\n vectorize_options = VectorServiceOptions(\n provider=providers.get(embedding_generation_provider, [None, []])[0],\n model_name=embedding_generation_model,\n )\n\n # Raise a value error if name isn't provided\n if not new_collection_name:\n msg = \"Collection name is required to create a new collection.\"\n raise ValueError(msg)\n\n # Define the base arguments being passed to the create collection function\n base_args = {\n \"collection_name\": new_collection_name,\n \"token\": token,\n \"api_endpoint\": api_endpoint,\n \"keyspace\": keyspace,\n \"environment\": environment,\n \"embedding_dimension\": dimension,\n \"collection_vector_service_options\": vectorize_options,\n }\n\n # Add optional arguments if the reranker is set\n if reranker:\n # Split the reranker field into a provider a model name\n provider, _ = reranker.split(\"/\")\n base_args[\"collection_rerank\"] = CollectionRerankOptions(\n service=RerankServiceOptions(provider=provider, model_name=reranker),\n )\n base_args[\"collection_lexical\"] = CollectionLexicalOptions(analyzer=\"STANDARD\")\n\n _AstraDBCollectionEnvironment(**base_args)\n\n @classmethod\n def get_database_list_static(cls, token: str, environment: str | None = None):\n client = DataAPIClient(environment=environment)\n\n # Get the admin object\n admin_client = client.get_admin(token=token)\n\n # Get the list of databases\n db_list = admin_client.list_databases()\n\n # Generate the api endpoint for each database\n db_info_dict = {}\n for db in db_list:\n try:\n # Get the API endpoint for the database\n api_endpoints = [db_reg.api_endpoint for db_reg in db.regions]\n\n # Get the number of collections\n try:\n # Get the number of collections in the database\n num_collections = len(\n client.get_database(\n api_endpoints[0],\n token=token,\n ).list_collection_names()\n )\n except Exception: # noqa: BLE001\n if db.status != \"PENDING\":\n continue\n num_collections = 0\n\n # Add the database to the dictionary\n db_info_dict[db.name] = {\n \"api_endpoints\": api_endpoints,\n \"keyspaces\": db.keyspaces,\n \"collections\": num_collections,\n \"status\": db.status if db.status != \"ACTIVE\" else None,\n \"org_id\": db.org_id if db.org_id else None,\n }\n except Exception: # noqa: BLE001, S110\n pass\n\n return db_info_dict\n\n def get_database_list(self):\n return self.get_database_list_static(\n token=self.token,\n environment=self.environment,\n )\n\n @classmethod\n def get_api_endpoint_static(\n cls,\n token: str,\n environment: str | None = None,\n api_endpoint: str | None = None,\n database_name: str | None = None,\n ):\n # If the api_endpoint is set, return it\n if api_endpoint:\n return api_endpoint\n\n # Check if the database_name is like a url\n if database_name and database_name.startswith(\"https://\"):\n return database_name\n\n # If the database is not set, nothing we can do.\n if not database_name:\n return None\n\n # Grab the database object\n db = cls.get_database_list_static(token=token, environment=environment).get(database_name)\n if not db:\n return None\n\n # Otherwise, get the URL from the database list\n endpoints = db.get(\"api_endpoints\") or []\n return endpoints[0] if endpoints else None\n\n def get_api_endpoint(self):\n return self.get_api_endpoint_static(\n token=self.token,\n environment=self.environment,\n api_endpoint=self.api_endpoint,\n database_name=self.database_name,\n )\n\n @classmethod\n def get_database_id_static(cls, api_endpoint: str) -> str | None:\n # Pattern matches standard UUID format: 8-4-4-4-12 hexadecimal characters\n uuid_pattern = r\"[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}\"\n match = re.search(uuid_pattern, api_endpoint)\n\n return match.group(0) if match else None\n\n def get_database_id(self):\n return self.get_database_id_static(api_endpoint=self.get_api_endpoint())\n\n def get_keyspace(self):\n keyspace = self.keyspace\n\n if keyspace:\n return keyspace.strip()\n\n return \"default_keyspace\"\n\n def get_database_object(self, api_endpoint: str | None = None):\n try:\n client = DataAPIClient(environment=self.environment)\n\n return client.get_database(\n api_endpoint or self.get_api_endpoint(),\n token=self.token,\n keyspace=self.get_keyspace(),\n )\n except Exception as e:\n msg = f\"Error fetching database object: {e}\"\n raise ValueError(msg) from e\n\n def collection_data(self, collection_name: str, database: Database | None = None):\n try:\n if not database:\n client = DataAPIClient(environment=self.environment)\n\n database = client.get_database(\n self.get_api_endpoint(),\n token=self.token,\n keyspace=self.get_keyspace(),\n )\n\n collection = database.get_collection(collection_name)\n\n return collection.estimated_document_count()\n except Exception as e: # noqa: BLE001\n self.log(f\"Error checking collection data: {e}\")\n\n return None\n\n def _initialize_database_options(self):\n try:\n return [\n {\n \"name\": name,\n \"status\": info[\"status\"],\n \"collections\": info[\"collections\"],\n \"api_endpoints\": info[\"api_endpoints\"],\n \"keyspaces\": info[\"keyspaces\"],\n \"org_id\": info[\"org_id\"],\n }\n for name, info in self.get_database_list().items()\n ]\n except Exception as e:\n msg = f\"Error fetching database options: {e}\"\n raise ValueError(msg) from e\n\n @classmethod\n def get_provider_icon(cls, collection: CollectionDescriptor | None = None, provider_name: str | None = None) -> str:\n # Get the provider name from the collection\n provider_name = provider_name or (\n collection.definition.vector.service.provider\n if (\n collection\n and collection.definition\n and collection.definition.vector\n and collection.definition.vector.service\n )\n else None\n )\n\n # If there is no provider, use the vector store icon\n if not provider_name or provider_name.lower() == \"bring your own\":\n return \"vectorstores\"\n\n # Map provider casings\n case_map = {\n \"nvidia\": \"NVIDIA\",\n \"openai\": \"OpenAI\",\n \"amazon bedrock\": \"AmazonBedrockEmbeddings\",\n \"azure openai\": \"AzureOpenAiEmbeddings\",\n \"cohere\": \"Cohere\",\n \"jina ai\": \"JinaAI\",\n \"mistral ai\": \"MistralAI\",\n \"upstage\": \"Upstage\",\n \"voyage ai\": \"VoyageAI\",\n }\n\n # Adjust the casing on some like nvidia\n return case_map[provider_name.lower()] if provider_name.lower() in case_map else provider_name.title()\n\n def _initialize_collection_options(self, api_endpoint: str | None = None):\n # Nothing to generate if we don't have an API endpoint yet\n api_endpoint = api_endpoint or self.get_api_endpoint()\n if not api_endpoint:\n return []\n\n # Retrieve the database object\n database = self.get_database_object(api_endpoint=api_endpoint)\n\n # Get the list of collections\n collection_list = database.list_collections(keyspace=self.get_keyspace())\n\n # Return the list of collections and metadata associated\n return [\n {\n \"name\": col.name,\n \"records\": self.collection_data(collection_name=col.name, database=database),\n \"provider\": (\n col.definition.vector.service.provider\n if col.definition.vector and col.definition.vector.service\n else None\n ),\n \"icon\": self.get_provider_icon(collection=col),\n \"model\": (\n col.definition.vector.service.model_name\n if col.definition.vector and col.definition.vector.service\n else None\n ),\n }\n for col in collection_list\n ]\n\n def reset_provider_options(self, build_config: dict) -> dict:\n \"\"\"Reset provider options and related configurations in the build_config dictionary.\"\"\"\n # Extract template path for cleaner access\n template = build_config[\"collection_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"]\n\n # Get vectorize providers\n vectorize_providers_api = self.get_vectorize_providers(\n token=self.token,\n environment=self.environment,\n api_endpoint=build_config[\"api_endpoint\"][\"value\"],\n )\n\n # Create a new dictionary with \"Bring your own\" first\n vectorize_providers: dict[str, list[list[str]]] = {\"Bring your own\": [[], []]}\n\n # Add the remaining items (only Nvidia) from the original dictionary\n vectorize_providers.update(\n {\n k: v\n for k, v in vectorize_providers_api.items()\n if k.lower() in [\"nvidia\"] # TODO: Eventually support more\n }\n )\n\n # Set provider options\n provider_field = \"02_embedding_generation_provider\"\n template[provider_field][\"options\"] = list(vectorize_providers.keys())\n\n # Add metadata for each provider option\n template[provider_field][\"options_metadata\"] = [\n {\"icon\": self.get_provider_icon(provider_name=provider)} for provider in template[provider_field][\"options\"]\n ]\n\n # Get selected embedding provider\n embedding_provider = template[provider_field][\"value\"]\n is_bring_your_own = embedding_provider and embedding_provider == \"Bring your own\"\n\n # Configure embedding model field\n model_field = \"03_embedding_generation_model\"\n template[model_field].update(\n {\n \"options\": vectorize_providers.get(embedding_provider, [[], []])[1],\n \"placeholder\": \"Bring your own\" if is_bring_your_own else None,\n \"readonly\": is_bring_your_own,\n \"required\": not is_bring_your_own,\n \"value\": None,\n }\n )\n\n # If this is a bring your own, set dimensions to 0\n return self.reset_dimension_field(build_config)\n\n def reset_dimension_field(self, build_config: dict) -> dict:\n \"\"\"Reset dimension field options based on provided configuration.\"\"\"\n # Extract template path for cleaner access\n template = build_config[\"collection_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"]\n\n # Get selected embedding model\n provider_field = \"02_embedding_generation_provider\"\n embedding_provider = template[provider_field][\"value\"]\n is_bring_your_own = embedding_provider and embedding_provider == \"Bring your own\"\n\n # Configure dimension field\n dimension_field = \"04_dimension\"\n dimension_value = 1024 if not is_bring_your_own else None # TODO: Dynamically figure this out\n template[dimension_field].update(\n {\n \"placeholder\": dimension_value,\n \"value\": dimension_value,\n \"readonly\": not is_bring_your_own,\n \"required\": is_bring_your_own,\n }\n )\n\n return build_config\n\n def reset_collection_list(self, build_config: dict) -> dict:\n \"\"\"Reset collection list options based on provided configuration.\"\"\"\n # Get collection options\n collection_options = self._initialize_collection_options(api_endpoint=build_config[\"api_endpoint\"][\"value\"])\n # Update collection configuration\n collection_config = build_config[\"collection_name\"]\n collection_config.update(\n {\n \"options\": [col[\"name\"] for col in collection_options],\n \"options_metadata\": [{k: v for k, v in col.items() if k != \"name\"} for col in collection_options],\n }\n )\n\n # Reset selected collection if not in options\n if collection_config[\"value\"] not in collection_config[\"options\"]:\n collection_config[\"value\"] = \"\"\n\n # Set advanced status based on database selection\n collection_config[\"show\"] = bool(build_config[\"database_name\"][\"value\"])\n\n return build_config\n\n def reset_database_list(self, build_config: dict) -> dict:\n \"\"\"Reset database list options and related configurations.\"\"\"\n # Get database options\n database_options = self._initialize_database_options()\n\n # Update cloud provider options\n env = self.environment\n template = build_config[\"database_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"]\n template[\"02_cloud_provider\"][\"options\"] = list(self.map_cloud_providers()[env].keys())\n\n # Update database configuration\n database_config = build_config[\"database_name\"]\n database_config.update(\n {\n \"options\": [db[\"name\"] for db in database_options],\n \"options_metadata\": [{k: v for k, v in db.items() if k != \"name\"} for db in database_options],\n }\n )\n\n # Reset selections if value not in options\n if database_config[\"value\"] not in database_config[\"options\"]:\n database_config[\"value\"] = \"\"\n build_config[\"api_endpoint\"][\"options\"] = []\n build_config[\"api_endpoint\"][\"value\"] = \"\"\n build_config[\"collection_name\"][\"show\"] = False\n\n # Set advanced status based on token presence\n database_config[\"show\"] = bool(build_config[\"token\"][\"value\"])\n\n return build_config\n\n def reset_build_config(self, build_config: dict) -> dict:\n \"\"\"Reset all build configuration options to default empty state.\"\"\"\n # Reset database configuration\n database_config = build_config[\"database_name\"]\n database_config.update({\"options\": [], \"options_metadata\": [], \"value\": \"\", \"show\": False})\n build_config[\"api_endpoint\"][\"options\"] = []\n build_config[\"api_endpoint\"][\"value\"] = \"\"\n\n # Reset collection configuration\n collection_config = build_config[\"collection_name\"]\n collection_config.update({\"options\": [], \"options_metadata\": [], \"value\": \"\", \"show\": False})\n\n return build_config\n\n def _handle_hybrid_search_options(self, build_config: dict) -> dict:\n \"\"\"Set hybrid search options in the build configuration.\"\"\"\n # Detect what hybrid options are available\n # Get the admin object\n client = DataAPIClient(environment=self.environment)\n admin_client = client.get_admin()\n db_admin = admin_client.get_database_admin(self.get_api_endpoint(), token=self.token)\n\n # We will try to get the reranking providers to see if its hybrid emabled\n try:\n providers = db_admin.find_reranking_providers()\n build_config[\"reranker\"][\"options\"] = [\n model.name for provider_data in providers.reranking_providers.values() for model in provider_data.models\n ]\n build_config[\"reranker\"][\"options_metadata\"] = [\n {\"icon\": self.get_provider_icon(provider_name=model.name.split(\"/\")[0])}\n for provider in providers.reranking_providers.values()\n for model in provider.models\n ]\n build_config[\"reranker\"][\"value\"] = build_config[\"reranker\"][\"options\"][0]\n\n # Set the default search field to hybrid search\n build_config[\"search_method\"][\"show\"] = True\n build_config[\"search_method\"][\"options\"] = [\"Hybrid Search\", \"Vector Search\"]\n build_config[\"search_method\"][\"value\"] = \"Hybrid Search\"\n except Exception as _: # noqa: BLE001\n build_config[\"reranker\"][\"options\"] = []\n build_config[\"reranker\"][\"options_metadata\"] = []\n\n # Set the default search field to vector search\n build_config[\"search_method\"][\"show\"] = False\n build_config[\"search_method\"][\"options\"] = [\"Vector Search\"]\n build_config[\"search_method\"][\"value\"] = \"Vector Search\"\n\n return build_config\n\n async def update_build_config(self, build_config: dict, field_value: str, field_name: str | None = None) -> dict:\n \"\"\"Update build configuration based on field name and value.\"\"\"\n # Early return if no token provided\n if not self.token:\n return self.reset_build_config(build_config)\n\n # Database creation callback\n if field_name == \"database_name\" and isinstance(field_value, dict):\n if \"01_new_database_name\" in field_value:\n await self._create_new_database(build_config, field_value)\n return self.reset_collection_list(build_config)\n return self._update_cloud_regions(build_config, field_value)\n\n # Collection creation callback\n if field_name == \"collection_name\" and isinstance(field_value, dict):\n # Case 1: New collection creation\n if \"01_new_collection_name\" in field_value:\n await self._create_new_collection(build_config, field_value)\n return build_config\n\n # Case 2: Update embedding provider options\n if \"02_embedding_generation_provider\" in field_value:\n return self.reset_provider_options(build_config)\n\n # Case 3: Update dimension field\n if \"03_embedding_generation_model\" in field_value:\n return self.reset_dimension_field(build_config)\n\n # Initial execution or token/environment change\n first_run = field_name == \"collection_name\" and not field_value and not build_config[\"database_name\"][\"options\"]\n if first_run or field_name in {\"token\", \"environment\"}:\n return self.reset_database_list(build_config)\n\n # Database selection change\n if field_name == \"database_name\" and not isinstance(field_value, dict):\n return self._handle_database_selection(build_config, field_value)\n\n # Keyspace selection change\n if field_name == \"keyspace\":\n return self.reset_collection_list(build_config)\n\n # Collection selection change\n if field_name == \"collection_name\" and not isinstance(field_value, dict):\n return self._handle_collection_selection(build_config, field_value)\n\n # Search method selection change\n if field_name == \"search_method\":\n is_vector_search = field_value == \"Vector Search\"\n is_autodetect = build_config[\"autodetect_collection\"][\"value\"]\n\n # Configure lexical terms (same for both cases)\n build_config[\"lexical_terms\"][\"show\"] = not is_vector_search\n build_config[\"lexical_terms\"][\"value\"] = \"\" if is_vector_search else build_config[\"lexical_terms\"][\"value\"]\n\n # Disable reranker disabling if hybrid search is selected\n build_config[\"reranker\"][\"show\"] = not is_vector_search\n build_config[\"reranker\"][\"toggle_disable\"] = not is_vector_search\n build_config[\"reranker\"][\"toggle_value\"] = True\n build_config[\"reranker\"][\"value\"] = build_config[\"reranker\"][\"options\"][0]\n\n # Toggle search type and score threshold based on search method\n build_config[\"search_type\"][\"show\"] = is_vector_search\n build_config[\"search_score_threshold\"][\"show\"] = is_vector_search\n\n # Make sure the search_type is set to \"Similarity\"\n if not is_vector_search or is_autodetect:\n build_config[\"search_type\"][\"value\"] = \"Similarity\"\n\n return build_config\n\n async def _create_new_database(self, build_config: dict, field_value: dict) -> None:\n \"\"\"Create a new database and update build config options.\"\"\"\n try:\n await self.create_database_api(\n new_database_name=field_value[\"01_new_database_name\"],\n token=self.token,\n keyspace=self.get_keyspace(),\n environment=self.environment,\n cloud_provider=field_value[\"02_cloud_provider\"],\n region=field_value[\"03_region\"],\n )\n except Exception as e:\n msg = f\"Error creating database: {e}\"\n raise ValueError(msg) from e\n\n build_config[\"database_name\"][\"options\"].append(field_value[\"01_new_database_name\"])\n build_config[\"database_name\"][\"options_metadata\"].append(\n {\n \"status\": \"PENDING\",\n \"collections\": 0,\n \"api_endpoints\": [],\n \"keyspaces\": [self.get_keyspace()],\n \"org_id\": None,\n }\n )\n\n def _update_cloud_regions(self, build_config: dict, field_value: dict) -> dict:\n \"\"\"Update cloud provider regions in build config.\"\"\"\n env = self.environment\n cloud_provider = field_value[\"02_cloud_provider\"]\n\n # Update the region options based on the selected cloud provider\n template = build_config[\"database_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"]\n template[\"03_region\"][\"options\"] = self.map_cloud_providers()[env][cloud_provider][\"regions\"]\n\n # Reset the the 03_region value if it's not in the new options\n if template[\"03_region\"][\"value\"] not in template[\"03_region\"][\"options\"]:\n template[\"03_region\"][\"value\"] = None\n\n return build_config\n\n async def _create_new_collection(self, build_config: dict, field_value: dict) -> None:\n \"\"\"Create a new collection and update build config options.\"\"\"\n embedding_provider = field_value.get(\"02_embedding_generation_provider\")\n try:\n await self.create_collection_api(\n new_collection_name=field_value[\"01_new_collection_name\"],\n token=self.token,\n api_endpoint=build_config[\"api_endpoint\"][\"value\"],\n environment=self.environment,\n keyspace=self.get_keyspace(),\n dimension=field_value.get(\"04_dimension\") if embedding_provider == \"Bring your own\" else None,\n embedding_generation_provider=embedding_provider,\n embedding_generation_model=field_value.get(\"03_embedding_generation_model\"),\n reranker=self.reranker,\n )\n except Exception as e:\n msg = f\"Error creating collection: {e}\"\n raise ValueError(msg) from e\n\n provider = embedding_provider.lower() if embedding_provider and embedding_provider != \"Bring your own\" else None\n build_config[\"collection_name\"].update(\n {\n \"value\": field_value[\"01_new_collection_name\"],\n \"options\": build_config[\"collection_name\"][\"options\"] + [field_value[\"01_new_collection_name\"]],\n }\n )\n build_config[\"embedding_model\"][\"show\"] = not bool(provider)\n build_config[\"embedding_model\"][\"required\"] = not bool(provider)\n build_config[\"collection_name\"][\"options_metadata\"].append(\n {\n \"records\": 0,\n \"provider\": provider,\n \"icon\": self.get_provider_icon(provider_name=provider),\n \"model\": field_value.get(\"03_embedding_generation_model\"),\n }\n )\n\n # Make sure we always show the reranker options if the collection is hybrid enabled\n # And right now they always are\n build_config[\"lexical_terms\"][\"show\"] = True\n\n def _handle_database_selection(self, build_config: dict, field_value: str) -> dict:\n \"\"\"Handle database selection and update related configurations.\"\"\"\n build_config = self.reset_database_list(build_config)\n\n # Reset collection list if database selection changes\n if field_value not in build_config[\"database_name\"][\"options\"]:\n build_config[\"database_name\"][\"value\"] = \"\"\n return build_config\n\n # Get the api endpoint for the selected database\n index = build_config[\"database_name\"][\"options\"].index(field_value)\n build_config[\"api_endpoint\"][\"options\"] = build_config[\"database_name\"][\"options_metadata\"][index][\n \"api_endpoints\"\n ]\n build_config[\"api_endpoint\"][\"value\"] = build_config[\"database_name\"][\"options_metadata\"][index][\n \"api_endpoints\"\n ][0]\n\n # Get the org_id for the selected database\n org_id = build_config[\"database_name\"][\"options_metadata\"][index][\"org_id\"]\n if not org_id:\n return build_config\n\n # Update the list of keyspaces based on the db info\n build_config[\"keyspace\"][\"options\"] = build_config[\"database_name\"][\"options_metadata\"][index][\"keyspaces\"]\n build_config[\"keyspace\"][\"value\"] = (\n build_config[\"keyspace\"][\"options\"] and build_config[\"keyspace\"][\"options\"][0]\n if build_config[\"keyspace\"][\"value\"] not in build_config[\"keyspace\"][\"options\"]\n else build_config[\"keyspace\"][\"value\"]\n )\n\n # Get the database id for the selected database\n db_id = self.get_database_id_static(api_endpoint=build_config[\"api_endpoint\"][\"value\"])\n keyspace = self.get_keyspace()\n\n # Update the helper text for the embedding provider field\n template = build_config[\"collection_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"]\n template[\"02_embedding_generation_provider\"][\"helper_text\"] = (\n \"To create collections with more embedding provider options, go to \"\n f''\n \"your database in Astra DB.\"\n )\n\n # Reset provider options\n build_config = self.reset_provider_options(build_config)\n\n # Handle hybrid search options\n build_config = self._handle_hybrid_search_options(build_config)\n\n return self.reset_collection_list(build_config)\n\n def _handle_collection_selection(self, build_config: dict, field_value: str) -> dict:\n \"\"\"Handle collection selection and update embedding options.\"\"\"\n build_config[\"autodetect_collection\"][\"value\"] = True\n build_config = self.reset_collection_list(build_config)\n\n # Reset embedding model if collection selection changes\n if field_value and field_value not in build_config[\"collection_name\"][\"options\"]:\n build_config[\"collection_name\"][\"options\"].append(field_value)\n build_config[\"collection_name\"][\"options_metadata\"].append(\n {\n \"records\": 0,\n \"provider\": None,\n \"icon\": \"vectorstores\",\n \"model\": None,\n }\n )\n build_config[\"autodetect_collection\"][\"value\"] = False\n\n if not field_value:\n return build_config\n\n # Get the selected collection index\n index = build_config[\"collection_name\"][\"options\"].index(field_value)\n\n # Set the provider of the selected collection\n provider = build_config[\"collection_name\"][\"options_metadata\"][index][\"provider\"]\n build_config[\"embedding_model\"][\"show\"] = not bool(provider)\n build_config[\"embedding_model\"][\"required\"] = not bool(provider)\n\n # Grab the collection object\n database = self.get_database_object(api_endpoint=build_config[\"api_endpoint\"][\"value\"])\n collection = database.get_collection(\n name=field_value,\n keyspace=build_config[\"keyspace\"][\"value\"],\n )\n\n # Check if hybrid and lexical are enabled\n col_options = collection.options()\n hyb_enabled = col_options.rerank and col_options.rerank.enabled\n lex_enabled = col_options.lexical and col_options.lexical.enabled\n user_hyb_enabled = build_config[\"search_method\"][\"value\"] == \"Hybrid Search\"\n\n # Reranker visible when both the collection supports it and the user selected Hybrid\n hybrid_active = bool(hyb_enabled and user_hyb_enabled)\n build_config[\"reranker\"][\"show\"] = hybrid_active\n build_config[\"reranker\"][\"toggle_value\"] = hybrid_active\n build_config[\"reranker\"][\"toggle_disable\"] = False # allow user to toggle if visible\n\n # If hybrid is active, lock search_type to \"Similarity\"\n if hybrid_active:\n build_config[\"search_type\"][\"value\"] = \"Similarity\"\n\n # Show the lexical terms option only if the collection enables lexical search\n build_config[\"lexical_terms\"][\"show\"] = bool(lex_enabled)\n\n return build_config\n\n @check_cached_vector_store\n def build_vector_store(self):\n try:\n from langchain_astradb import AstraDBVectorStore\n except ImportError as e:\n msg = (\n \"Could not import langchain Astra DB integration package. \"\n \"Please install it with `pip install langchain-astradb`.\"\n )\n raise ImportError(msg) from e\n\n # Get the embedding model and additional params\n embedding_params = {\"embedding\": self.embedding_model} if self.embedding_model else {}\n\n # Get the additional parameters\n additional_params = self.astradb_vectorstore_kwargs or {}\n\n # Get Langflow version and platform information\n __version__ = get_version_info()[\"version\"]\n langflow_prefix = \"\"\n # if os.getenv(\"AWS_EXECUTION_ENV\") == \"AWS_ECS_FARGATE\": # TODO: More precise way of detecting\n # langflow_prefix = \"ds-\"\n\n # Get the database object\n database = self.get_database_object()\n autodetect = self.collection_name in database.list_collection_names() and self.autodetect_collection\n\n # Bundle up the auto-detect parameters\n autodetect_params = {\n \"autodetect_collection\": autodetect,\n \"content_field\": (\n self.content_field\n if self.content_field and embedding_params\n else (\n \"page_content\"\n if embedding_params\n and self.collection_data(collection_name=self.collection_name, database=database) == 0\n else None\n )\n ),\n \"ignore_invalid_documents\": self.ignore_invalid_documents,\n }\n\n # Choose HybridSearchMode based on the selected param\n hybrid_search_mode = HybridSearchMode.DEFAULT if self.search_method == \"Hybrid Search\" else HybridSearchMode.OFF\n\n # Attempt to build the Vector Store object\n try:\n vector_store = AstraDBVectorStore(\n # Astra DB Authentication Parameters\n token=self.token,\n api_endpoint=database.api_endpoint,\n namespace=database.keyspace,\n collection_name=self.collection_name,\n environment=self.environment,\n # Hybrid Search Parameters\n hybrid_search=hybrid_search_mode,\n # Astra DB Usage Tracking Parameters\n ext_callers=[(f\"{langflow_prefix}langflow\", __version__)],\n # Astra DB Vector Store Parameters\n **autodetect_params,\n **embedding_params,\n **additional_params,\n )\n except Exception as e:\n msg = f\"Error initializing AstraDBVectorStore: {e}\"\n raise ValueError(msg) from e\n\n # Add documents to the vector store\n self._add_documents_to_vector_store(vector_store)\n\n return vector_store\n\n def _add_documents_to_vector_store(self, vector_store) -> None:\n self.ingest_data = self._prepare_ingest_data()\n\n documents = []\n for _input in self.ingest_data or []:\n if isinstance(_input, Data):\n documents.append(_input.to_lc_document())\n else:\n msg = \"Vector Store Inputs must be Data objects.\"\n raise TypeError(msg)\n\n documents = [\n Document(page_content=doc.page_content, metadata=serialize(doc.metadata, to_str=True)) for doc in documents\n ]\n\n if documents and self.deletion_field:\n self.log(f\"Deleting documents where {self.deletion_field}\")\n try:\n database = self.get_database_object()\n collection = database.get_collection(self.collection_name, keyspace=database.keyspace)\n delete_values = list({doc.metadata[self.deletion_field] for doc in documents})\n self.log(f\"Deleting documents where {self.deletion_field} matches {delete_values}.\")\n collection.delete_many({f\"metadata.{self.deletion_field}\": {\"$in\": delete_values}})\n except Exception as e:\n msg = f\"Error deleting documents from AstraDBVectorStore based on '{self.deletion_field}': {e}\"\n raise ValueError(msg) from e\n\n if documents:\n self.log(f\"Adding {len(documents)} documents to the Vector Store.\")\n try:\n vector_store.add_documents(documents)\n except Exception as e:\n msg = f\"Error adding documents to AstraDBVectorStore: {e}\"\n raise ValueError(msg) from e\n else:\n self.log(\"No documents to add to the Vector Store.\")\n\n def _map_search_type(self) -> str:\n search_type_mapping = {\n \"Similarity with score threshold\": \"similarity_score_threshold\",\n \"MMR (Max Marginal Relevance)\": \"mmr\",\n }\n\n return search_type_mapping.get(self.search_type, \"similarity\")\n\n def _build_search_args(self):\n # Clean up the search query\n query = self.search_query if isinstance(self.search_query, str) and self.search_query.strip() else None\n lexical_terms = self.lexical_terms or None\n\n # Check if we have a search query, and if so set the args\n if query:\n args = {\n \"query\": query,\n \"search_type\": self._map_search_type(),\n \"k\": self.number_of_results,\n \"score_threshold\": self.search_score_threshold,\n \"lexical_query\": lexical_terms,\n }\n elif self.advanced_search_filter:\n args = {\n \"n\": self.number_of_results,\n }\n else:\n return {}\n\n filter_arg = self.advanced_search_filter or {}\n if filter_arg:\n args[\"filter\"] = filter_arg\n\n return args\n\n def search_documents(self, vector_store=None) -> list[Data]:\n vector_store = vector_store or self.build_vector_store()\n\n self.log(f\"Search input: {self.search_query}\")\n self.log(f\"Search type: {self.search_type}\")\n self.log(f\"Number of results: {self.number_of_results}\")\n self.log(f\"store.hybrid_search: {vector_store.hybrid_search}\")\n self.log(f\"Lexical terms: {self.lexical_terms}\")\n self.log(f\"Reranker: {self.reranker}\")\n\n try:\n search_args = self._build_search_args()\n except Exception as e:\n msg = f\"Error in AstraDBVectorStore._build_search_args: {e}\"\n raise ValueError(msg) from e\n\n if not search_args:\n self.log(\"No search input or filters provided. Skipping search.\")\n return []\n\n docs = []\n search_method = \"search\" if \"query\" in search_args else \"metadata_search\"\n\n try:\n self.log(f\"Calling vector_store.{search_method} with args: {search_args}\")\n docs = getattr(vector_store, search_method)(**search_args)\n except Exception as e:\n msg = f\"Error performing {search_method} in AstraDBVectorStore: {e}\"\n raise ValueError(msg) from e\n\n self.log(f\"Retrieved documents: {len(docs)}\")\n\n data = docs_to_data(docs)\n self.log(f\"Converted documents to data: {len(data)}\")\n self.status = data\n\n return data\n\n def get_retriever_kwargs(self):\n search_args = self._build_search_args()\n\n return {\n \"search_type\": self._map_search_type(),\n \"search_kwargs\": search_args,\n }\n"
- },
- "collection_name": {
- "_input_type": "DropdownInput",
- "advanced": false,
- "combobox": true,
- "dialog_inputs": {
- "fields": {
- "data": {
- "node": {
- "description": "Please allow several seconds for creation to complete.",
- "display_name": "Create new collection",
- "field_order": [
- "01_new_collection_name",
- "02_embedding_generation_provider",
- "03_embedding_generation_model",
- "04_dimension"
- ],
- "name": "create_collection",
- "template": {
- "01_new_collection_name": {
- "_input_type": "StrInput",
- "advanced": false,
- "display_name": "Name",
- "dynamic": false,
- "info": "Name of the new collection to create in Astra DB.",
- "list": false,
- "list_add_label": "Add More",
- "load_from_db": false,
- "name": "new_collection_name",
- "placeholder": "",
- "required": true,
- "show": true,
- "title_case": false,
- "tool_mode": false,
- "trace_as_metadata": true,
- "type": "str",
- "value": ""
- },
- "02_embedding_generation_provider": {
- "_input_type": "DropdownInput",
- "advanced": false,
- "combobox": false,
- "dialog_inputs": {},
- "display_name": "Embedding generation method",
- "dynamic": false,
- "helper_text": "To create collections with more embedding provider options, go to your database in Astra DB",
- "info": "Provider to use for generating embeddings.",
- "name": "embedding_generation_provider",
- "options": [],
- "options_metadata": [],
- "placeholder": "",
- "real_time_refresh": true,
- "required": true,
- "show": true,
- "title_case": false,
- "tool_mode": false,
- "trace_as_metadata": true,
- "type": "str",
- "value": ""
- },
- "03_embedding_generation_model": {
- "_input_type": "DropdownInput",
- "advanced": false,
- "combobox": false,
- "dialog_inputs": {},
- "display_name": "Embedding model",
- "dynamic": false,
- "info": "Model to use for generating embeddings.",
- "name": "embedding_generation_model",
- "options": [],
- "options_metadata": [],
- "placeholder": "",
- "real_time_refresh": true,
- "required": false,
- "show": true,
- "title_case": false,
- "tool_mode": false,
- "trace_as_metadata": true,
- "type": "str",
- "value": ""
- },
- "04_dimension": {
- "_input_type": "IntInput",
- "advanced": false,
- "display_name": "Dimensions",
- "dynamic": false,
- "info": "Dimensions of the embeddings to generate.",
- "list": false,
- "list_add_label": "Add More",
- "name": "dimension",
- "placeholder": "",
- "required": false,
- "show": true,
- "title_case": false,
- "tool_mode": false,
- "trace_as_metadata": true,
- "type": "int"
- }
- }
- }
- }
- },
- "functionality": "create"
- },
- "display_name": "Collection",
- "dynamic": false,
- "info": "The name of the collection within Astra DB where the vectors will be stored.",
- "name": "collection_name",
- "options": [],
- "options_metadata": [],
- "placeholder": "",
- "real_time_refresh": true,
- "refresh_button": true,
- "required": true,
- "show": false,
- "title_case": false,
- "tool_mode": false,
- "trace_as_metadata": true,
- "type": "str",
- "value": ""
- },
- "content_field": {
- "_input_type": "StrInput",
- "advanced": true,
- "display_name": "Content Field",
- "dynamic": false,
- "info": "Field to use as the text content field for the vector store.",
- "list": false,
- "list_add_label": "Add More",
- "load_from_db": false,
- "name": "content_field",
- "placeholder": "",
- "required": false,
- "show": true,
- "title_case": false,
- "tool_mode": false,
- "trace_as_metadata": true,
- "type": "str",
- "value": ""
- },
- "database_name": {
- "_input_type": "DropdownInput",
- "advanced": false,
- "combobox": true,
- "dialog_inputs": {
- "fields": {
- "data": {
- "node": {
- "description": "Please allow several minutes for creation to complete.",
- "display_name": "Create new database",
- "field_order": [
- "01_new_database_name",
- "02_cloud_provider",
- "03_region"
- ],
- "name": "create_database",
- "template": {
- "01_new_database_name": {
- "_input_type": "StrInput",
- "advanced": false,
- "display_name": "Name",
- "dynamic": false,
- "info": "Name of the new database to create in Astra DB.",
- "list": false,
- "list_add_label": "Add More",
- "load_from_db": false,
- "name": "new_database_name",
- "placeholder": "",
- "required": true,
- "show": true,
- "title_case": false,
- "tool_mode": false,
- "trace_as_metadata": true,
- "type": "str",
- "value": ""
- },
- "02_cloud_provider": {
- "_input_type": "DropdownInput",
- "advanced": false,
- "combobox": false,
- "dialog_inputs": {},
- "display_name": "Cloud provider",
- "dynamic": false,
- "info": "Cloud provider for the new database.",
- "name": "cloud_provider",
- "options": [
- "Amazon Web Services",
- "Google Cloud Platform",
- "Microsoft Azure"
- ],
- "options_metadata": [],
- "placeholder": "",
- "real_time_refresh": true,
- "required": true,
- "show": true,
- "title_case": false,
- "tool_mode": false,
- "trace_as_metadata": true,
- "type": "str",
- "value": ""
- },
- "03_region": {
- "_input_type": "DropdownInput",
- "advanced": false,
- "combobox": false,
- "dialog_inputs": {},
- "display_name": "Region",
- "dynamic": false,
- "info": "Region for the new database.",
- "name": "region",
- "options": [],
- "options_metadata": [],
- "placeholder": "",
- "required": true,
- "show": true,
- "title_case": false,
- "tool_mode": false,
- "trace_as_metadata": true,
- "type": "str",
- "value": ""
- }
- }
- }
- }
- },
- "functionality": "create"
- },
- "display_name": "Database",
- "dynamic": false,
- "info": "The Database name for the Astra DB instance.",
- "name": "database_name",
- "options": [],
- "options_metadata": [
- {
- "api_endpoint": "https://5b8bb22c-4a38-4f0a-865c-a18ed7590bd1-us-east-2.apps.astra.datastax.com",
- "collections": 5,
- "keyspaces": [
- "default_keyspace",
- "samples_dataflow"
- ],
- "org_id": "260f986d-e65c-4f05-94a3-7cebfcb867a3",
- "status": null
- }
- ],
- "placeholder": "",
- "real_time_refresh": true,
- "refresh_button": true,
- "required": true,
- "show": true,
- "title_case": false,
- "tool_mode": false,
- "trace_as_metadata": true,
- "type": "str",
- "value": ""
- },
- "deletion_field": {
- "_input_type": "StrInput",
- "advanced": true,
- "display_name": "Deletion Based On Field",
- "dynamic": false,
- "info": "When this parameter is provided, documents in the target collection with metadata field values matching the input metadata field value will be deleted before new data is loaded.",
- "list": false,
- "list_add_label": "Add More",
- "load_from_db": false,
- "name": "deletion_field",
- "placeholder": "",
- "required": false,
- "show": true,
- "title_case": false,
- "tool_mode": false,
- "trace_as_metadata": true,
- "type": "str",
- "value": ""
- },
- "embedding_model": {
- "_input_type": "HandleInput",
- "advanced": false,
- "display_name": "Embedding Model",
- "dynamic": false,
- "info": "Specify the Embedding Model. Not required for Astra Vectorize collections.",
- "input_types": [
- "Embeddings"
- ],
- "list": false,
- "list_add_label": "Add More",
- "name": "embedding_model",
- "placeholder": "",
- "required": false,
- "show": true,
- "title_case": false,
- "trace_as_metadata": true,
- "type": "other",
- "value": ""
- },
- "environment": {
- "_input_type": "DropdownInput",
- "advanced": true,
- "combobox": true,
- "dialog_inputs": {},
- "display_name": "Environment",
- "dynamic": false,
- "info": "The environment for the Astra DB API Endpoint.",
- "name": "environment",
- "options": [
- "prod",
- "test",
- "dev"
- ],
- "options_metadata": [],
- "placeholder": "",
- "real_time_refresh": true,
- "required": false,
- "show": true,
- "title_case": false,
- "tool_mode": false,
- "trace_as_metadata": true,
- "type": "str",
- "value": "prod"
- },
- "ignore_invalid_documents": {
- "_input_type": "BoolInput",
- "advanced": true,
- "display_name": "Ignore Invalid Documents",
- "dynamic": false,
- "info": "Boolean flag to determine whether to ignore invalid documents at runtime.",
- "list": false,
- "list_add_label": "Add More",
- "name": "ignore_invalid_documents",
- "placeholder": "",
- "required": false,
- "show": true,
- "title_case": false,
- "tool_mode": false,
- "trace_as_metadata": true,
- "type": "bool",
- "value": false
- },
- "ingest_data": {
- "_input_type": "HandleInput",
- "advanced": false,
- "display_name": "Ingest Data",
- "dynamic": false,
- "info": "",
- "input_types": [
- "Data",
- "DataFrame"
- ],
- "list": true,
- "list_add_label": "Add More",
- "name": "ingest_data",
- "placeholder": "",
- "required": false,
- "show": true,
- "title_case": false,
- "trace_as_metadata": true,
- "type": "other",
- "value": ""
- },
- "keyspace": {
- "_input_type": "DropdownInput",
- "advanced": true,
- "combobox": false,
- "dialog_inputs": {},
- "display_name": "Keyspace",
- "dynamic": false,
- "info": "Optional keyspace within Astra DB to use for the collection.",
- "name": "keyspace",
- "options": [],
- "options_metadata": [],
- "placeholder": "",
- "real_time_refresh": true,
- "required": false,
- "show": true,
- "title_case": false,
- "tool_mode": false,
- "trace_as_metadata": true,
- "type": "str",
- "value": ""
- },
- "lexical_terms": {
- "_input_type": "QueryInput",
- "advanced": true,
- "display_name": "Lexical Terms",
- "dynamic": false,
- "info": "Add additional terms/keywords to augment search precision.",
- "input_types": [
- "Message"
- ],
- "list": false,
- "list_add_label": "Add More",
- "load_from_db": false,
- "name": "lexical_terms",
- "placeholder": "Enter terms to search...",
- "required": false,
- "separator": " ",
- "show": false,
- "title_case": false,
- "tool_mode": false,
- "trace_as_input": true,
- "trace_as_metadata": true,
- "type": "query",
- "value": ""
- },
- "number_of_results": {
- "_input_type": "IntInput",
- "advanced": true,
- "display_name": "Number of Search Results",
- "dynamic": false,
- "info": "Number of search results to return.",
- "list": false,
- "list_add_label": "Add More",
- "name": "number_of_results",
- "placeholder": "",
- "required": false,
- "show": true,
- "title_case": false,
- "tool_mode": false,
- "trace_as_metadata": true,
- "type": "int",
- "value": 4
- },
- "reranker": {
- "_input_type": "DropdownInput",
- "advanced": false,
- "combobox": false,
- "dialog_inputs": {},
- "display_name": "Reranker",
- "dynamic": false,
- "info": "Post-retrieval model that re-scores results for optimal relevance ranking.",
- "name": "reranker",
- "options": [],
- "options_metadata": [],
- "placeholder": "",
- "required": false,
- "show": false,
- "title_case": false,
- "tool_mode": false,
- "trace_as_metadata": true,
- "type": "str",
- "value": ""
- },
- "search_method": {
- "_input_type": "DropdownInput",
- "advanced": true,
- "combobox": false,
- "dialog_inputs": {},
- "display_name": "Search Method",
- "dynamic": false,
- "info": "Determine how your content is matched: Vector finds semantic similarity, and Hybrid Search (suggested) combines both approaches with a reranker.",
- "name": "search_method",
- "options": [
- "Hybrid Search",
- "Vector Search"
- ],
- "options_metadata": [],
- "placeholder": "",
- "real_time_refresh": true,
- "required": false,
- "show": true,
- "title_case": false,
- "tool_mode": false,
- "trace_as_metadata": true,
- "type": "str",
- "value": "Vector Search"
- },
- "search_query": {
- "_input_type": "QueryInput",
- "advanced": false,
- "display_name": "Search Query",
- "dynamic": false,
- "info": "Enter a query to run a similarity search.",
- "input_types": [
- "Message"
- ],
- "list": false,
- "list_add_label": "Add More",
- "load_from_db": false,
- "name": "search_query",
- "placeholder": "Enter a query...",
- "required": false,
- "show": true,
- "title_case": false,
- "tool_mode": true,
- "trace_as_input": true,
- "trace_as_metadata": true,
- "type": "query",
- "value": ""
- },
- "search_score_threshold": {
- "_input_type": "FloatInput",
- "advanced": true,
- "display_name": "Search Score Threshold",
- "dynamic": false,
- "info": "Minimum similarity score threshold for search results. (when using 'Similarity with score threshold')",
- "list": false,
- "list_add_label": "Add More",
- "name": "search_score_threshold",
- "placeholder": "",
- "required": false,
- "show": true,
- "title_case": false,
- "tool_mode": false,
- "trace_as_metadata": true,
- "type": "float",
- "value": 0
- },
- "search_type": {
- "_input_type": "DropdownInput",
- "advanced": true,
- "combobox": false,
- "dialog_inputs": {},
- "display_name": "Search Type",
- "dynamic": false,
- "info": "Search type to use",
- "name": "search_type",
- "options": [
- "Similarity",
- "Similarity with score threshold",
- "MMR (Max Marginal Relevance)"
- ],
- "options_metadata": [],
- "placeholder": "",
- "required": false,
- "show": true,
- "title_case": false,
- "tool_mode": false,
- "trace_as_metadata": true,
- "type": "str",
- "value": "Similarity"
- },
- "should_cache_vector_store": {
- "_input_type": "BoolInput",
- "advanced": true,
- "display_name": "Cache Vector Store",
- "dynamic": false,
- "info": "If True, the vector store will be cached for the current build of the component. This is useful for components that have multiple output methods and want to share the same vector store.",
- "list": false,
- "list_add_label": "Add More",
- "name": "should_cache_vector_store",
- "placeholder": "",
- "required": false,
- "show": true,
- "title_case": false,
- "tool_mode": false,
- "trace_as_metadata": true,
- "type": "bool",
- "value": true
- },
- "token": {
- "_input_type": "SecretStrInput",
- "advanced": false,
- "display_name": "Astra DB Application Token",
- "dynamic": false,
- "info": "Authentication token for accessing Astra DB.",
- "input_types": [],
- "load_from_db": true,
- "name": "token",
- "password": true,
- "placeholder": "",
- "real_time_refresh": true,
- "required": true,
- "show": true,
- "title_case": false,
- "type": "str",
- "value": "ASTRA_DB_APPLICATION_TOKEN"
- }
- },
- "tool_mode": false
- },
- "selected_output": "dataframe",
- "showNode": true,
- "type": "AstraDB"
- },
- "dragging": false,
- "id": "AstraDB-JsRrT",
- "measured": {
- "height": 502,
- "width": 320
- },
- "position": {
- "x": 1206.2272993725155,
- "y": 491.41485400844977
- },
- "selected": false,
- "type": "genericNode"
- },
- {
- "data": {
- "id": "AstraDB-W6NB4",
- "node": {
- "base_classes": [
- "Data",
- "DataFrame",
- "VectorStore"
- ],
- "beta": false,
- "conditional_paths": [],
- "custom_fields": {},
- "description": "Ingest and search documents in Astra DB",
- "display_name": "Astra DB",
- "documentation": "https://docs.datastax.com/en/langflow/astra-components.html",
- "edited": false,
- "field_order": [
- "token",
- "environment",
- "database_name",
- "api_endpoint",
- "keyspace",
- "collection_name",
- "embedding_model",
- "ingest_data",
- "search_query",
- "should_cache_vector_store",
- "search_method",
- "reranker",
- "lexical_terms",
- "number_of_results",
- "search_type",
- "search_score_threshold",
- "advanced_search_filter",
- "autodetect_collection",
- "content_field",
- "deletion_field",
- "ignore_invalid_documents",
- "astradb_vectorstore_kwargs"
- ],
- "frozen": false,
- "icon": "AstraDB",
- "legacy": false,
- "metadata": {
- "code_hash": "23fbe9daca09",
- "dependencies": {
- "dependencies": [
- {
- "name": "astrapy",
- "version": "2.0.1"
- },
- {
- "name": "langchain_astradb",
- "version": "0.6.0"
- },
- {
- "name": "langchain_core",
- "version": "0.3.75"
- },
- {
- "name": "langflow",
- "version": null
- }
- ],
- "total_dependencies": 4
- },
- "module": "langflow.components.datastax.astradb.AstraDBVectorStoreComponent"
- },
- "minimized": false,
- "output_types": [],
- "outputs": [
- {
- "allows_loop": false,
- "cache": true,
- "display_name": "Search Results",
- "group_outputs": false,
- "method": "search_documents",
- "name": "search_results",
- "selected": "Data",
- "tool_mode": true,
- "types": [
- "Data"
- ],
- "value": "__UNDEFINED__"
- },
- {
- "allows_loop": false,
- "cache": true,
- "display_name": "DataFrame",
- "group_outputs": false,
- "method": "as_dataframe",
- "name": "dataframe",
- "selected": "DataFrame",
- "tool_mode": true,
- "types": [
- "DataFrame"
- ],
- "value": "__UNDEFINED__"
- },
- {
- "allows_loop": false,
- "cache": true,
- "display_name": "Vector Store Connection",
- "group_outputs": false,
- "hidden": true,
- "method": "as_vector_store",
- "name": "vectorstoreconnection",
- "selected": "VectorStore",
- "tool_mode": true,
- "types": [
- "VectorStore"
- ],
- "value": "__UNDEFINED__"
- }
- ],
- "pinned": false,
- "template": {
- "_type": "Component",
- "advanced_search_filter": {
- "_input_type": "NestedDictInput",
- "advanced": true,
- "display_name": "Search Metadata Filter",
- "dynamic": false,
- "info": "Optional dictionary of filters to apply to the search query.",
- "list": false,
- "list_add_label": "Add More",
- "name": "advanced_search_filter",
- "placeholder": "",
- "required": false,
- "show": true,
- "title_case": false,
- "tool_mode": false,
- "trace_as_input": true,
- "trace_as_metadata": true,
- "type": "NestedDict",
- "value": {}
- },
- "api_endpoint": {
- "_input_type": "StrInput",
- "advanced": false,
- "display_name": "Astra DB API Endpoint",
- "dynamic": false,
- "info": "The API Endpoint for the Astra DB instance. Supercedes database selection.",
- "list": false,
- "list_add_label": "Add More",
- "load_from_db": false,
- "name": "api_endpoint",
- "placeholder": "",
- "required": false,
- "show": false,
- "title_case": false,
- "tool_mode": false,
- "trace_as_metadata": true,
- "type": "str",
- "value": ""
- },
- "astradb_vectorstore_kwargs": {
- "_input_type": "NestedDictInput",
- "advanced": true,
- "display_name": "AstraDBVectorStore Parameters",
- "dynamic": false,
- "info": "Optional dictionary of additional parameters for the AstraDBVectorStore.",
- "list": false,
- "list_add_label": "Add More",
- "name": "astradb_vectorstore_kwargs",
- "placeholder": "",
- "required": false,
- "show": true,
- "title_case": false,
- "tool_mode": false,
- "trace_as_input": true,
- "trace_as_metadata": true,
- "type": "NestedDict",
- "value": {}
- },
- "autodetect_collection": {
- "_input_type": "BoolInput",
- "advanced": true,
- "display_name": "Autodetect Collection",
- "dynamic": false,
- "info": "Boolean flag to determine whether to autodetect the collection.",
- "list": false,
- "list_add_label": "Add More",
- "name": "autodetect_collection",
- "placeholder": "",
- "required": false,
- "show": true,
- "title_case": false,
- "tool_mode": false,
- "trace_as_metadata": true,
- "type": "bool",
- "value": true
- },
- "code": {
- "advanced": true,
- "dynamic": true,
- "fileTypes": [],
- "file_path": "",
- "info": "",
- "list": false,
- "load_from_db": false,
- "multiline": true,
- "name": "code",
- "password": false,
- "placeholder": "",
- "required": true,
- "show": true,
- "title_case": false,
- "type": "code",
- "value": "import re\nfrom collections import defaultdict\nfrom dataclasses import asdict, dataclass, field\n\nfrom astrapy import DataAPIClient, Database\nfrom astrapy.data.info.reranking import RerankServiceOptions\nfrom astrapy.info import CollectionDescriptor, CollectionLexicalOptions, CollectionRerankOptions\nfrom langchain_astradb import AstraDBVectorStore, VectorServiceOptions\nfrom langchain_astradb.utils.astradb import HybridSearchMode, _AstraDBCollectionEnvironment\nfrom langchain_core.documents import Document\n\nfrom langflow.base.vectorstores.model import LCVectorStoreComponent, check_cached_vector_store\nfrom langflow.base.vectorstores.vector_store_connection_decorator import vector_store_connection\nfrom langflow.helpers.data import docs_to_data\nfrom langflow.inputs.inputs import FloatInput, NestedDictInput\nfrom langflow.io import (\n BoolInput,\n DropdownInput,\n HandleInput,\n IntInput,\n QueryInput,\n SecretStrInput,\n StrInput,\n)\nfrom langflow.schema.data import Data\nfrom langflow.serialization import serialize\nfrom langflow.utils.version import get_version_info\n\n\n@vector_store_connection\nclass AstraDBVectorStoreComponent(LCVectorStoreComponent):\n display_name: str = \"Astra DB\"\n description: str = \"Ingest and search documents in Astra DB\"\n documentation: str = \"https://docs.datastax.com/en/langflow/astra-components.html\"\n name = \"AstraDB\"\n icon: str = \"AstraDB\"\n\n _cached_vector_store: AstraDBVectorStore | None = None\n\n @dataclass\n class NewDatabaseInput:\n functionality: str = \"create\"\n fields: dict[str, dict] = field(\n default_factory=lambda: {\n \"data\": {\n \"node\": {\n \"name\": \"create_database\",\n \"description\": \"Please allow several minutes for creation to complete.\",\n \"display_name\": \"Create new database\",\n \"field_order\": [\"01_new_database_name\", \"02_cloud_provider\", \"03_region\"],\n \"template\": {\n \"01_new_database_name\": StrInput(\n name=\"new_database_name\",\n display_name=\"Name\",\n info=\"Name of the new database to create in Astra DB.\",\n required=True,\n ),\n \"02_cloud_provider\": DropdownInput(\n name=\"cloud_provider\",\n display_name=\"Cloud provider\",\n info=\"Cloud provider for the new database.\",\n options=[],\n required=True,\n real_time_refresh=True,\n ),\n \"03_region\": DropdownInput(\n name=\"region\",\n display_name=\"Region\",\n info=\"Region for the new database.\",\n options=[],\n required=True,\n ),\n },\n },\n }\n }\n )\n\n @dataclass\n class NewCollectionInput:\n functionality: str = \"create\"\n fields: dict[str, dict] = field(\n default_factory=lambda: {\n \"data\": {\n \"node\": {\n \"name\": \"create_collection\",\n \"description\": \"Please allow several seconds for creation to complete.\",\n \"display_name\": \"Create new collection\",\n \"field_order\": [\n \"01_new_collection_name\",\n \"02_embedding_generation_provider\",\n \"03_embedding_generation_model\",\n \"04_dimension\",\n ],\n \"template\": {\n \"01_new_collection_name\": StrInput(\n name=\"new_collection_name\",\n display_name=\"Name\",\n info=\"Name of the new collection to create in Astra DB.\",\n required=True,\n ),\n \"02_embedding_generation_provider\": DropdownInput(\n name=\"embedding_generation_provider\",\n display_name=\"Embedding generation method\",\n info=\"Provider to use for generating embeddings.\",\n helper_text=(\n \"To create collections with more embedding provider options, go to \"\n 'your database in Astra DB'\n ),\n real_time_refresh=True,\n required=True,\n options=[],\n ),\n \"03_embedding_generation_model\": DropdownInput(\n name=\"embedding_generation_model\",\n display_name=\"Embedding model\",\n info=\"Model to use for generating embeddings.\",\n real_time_refresh=True,\n options=[],\n ),\n \"04_dimension\": IntInput(\n name=\"dimension\",\n display_name=\"Dimensions\",\n info=\"Dimensions of the embeddings to generate.\",\n value=None,\n ),\n },\n },\n }\n }\n )\n\n inputs = [\n SecretStrInput(\n name=\"token\",\n display_name=\"Astra DB Application Token\",\n info=\"Authentication token for accessing Astra DB.\",\n value=\"ASTRA_DB_APPLICATION_TOKEN\",\n required=True,\n real_time_refresh=True,\n input_types=[],\n ),\n DropdownInput(\n name=\"environment\",\n display_name=\"Environment\",\n info=\"The environment for the Astra DB API Endpoint.\",\n options=[\"prod\", \"test\", \"dev\"],\n value=\"prod\",\n advanced=True,\n real_time_refresh=True,\n combobox=True,\n ),\n DropdownInput(\n name=\"database_name\",\n display_name=\"Database\",\n info=\"The Database name for the Astra DB instance.\",\n required=True,\n refresh_button=True,\n real_time_refresh=True,\n dialog_inputs=asdict(NewDatabaseInput()),\n combobox=True,\n ),\n DropdownInput(\n name=\"api_endpoint\",\n display_name=\"Astra DB API Endpoint\",\n info=\"The API Endpoint for the Astra DB instance. Supercedes database selection.\",\n advanced=True,\n ),\n DropdownInput(\n name=\"keyspace\",\n display_name=\"Keyspace\",\n info=\"Optional keyspace within Astra DB to use for the collection.\",\n advanced=True,\n options=[],\n real_time_refresh=True,\n ),\n DropdownInput(\n name=\"collection_name\",\n display_name=\"Collection\",\n info=\"The name of the collection within Astra DB where the vectors will be stored.\",\n required=True,\n refresh_button=True,\n real_time_refresh=True,\n dialog_inputs=asdict(NewCollectionInput()),\n combobox=True,\n show=False,\n ),\n HandleInput(\n name=\"embedding_model\",\n display_name=\"Embedding Model\",\n input_types=[\"Embeddings\"],\n info=\"Specify the Embedding Model. Not required for Astra Vectorize collections.\",\n required=False,\n show=False,\n ),\n *LCVectorStoreComponent.inputs,\n DropdownInput(\n name=\"search_method\",\n display_name=\"Search Method\",\n info=(\n \"Determine how your content is matched: Vector finds semantic similarity, \"\n \"and Hybrid Search (suggested) combines both approaches \"\n \"with a reranker.\"\n ),\n options=[\"Hybrid Search\", \"Vector Search\"], # TODO: Restore Lexical Search?\n options_metadata=[{\"icon\": \"SearchHybrid\"}, {\"icon\": \"SearchVector\"}],\n value=\"Vector Search\",\n advanced=True,\n real_time_refresh=True,\n ),\n DropdownInput(\n name=\"reranker\",\n display_name=\"Reranker\",\n info=\"Post-retrieval model that re-scores results for optimal relevance ranking.\",\n show=False,\n toggle=True,\n ),\n QueryInput(\n name=\"lexical_terms\",\n display_name=\"Lexical Terms\",\n info=\"Add additional terms/keywords to augment search precision.\",\n placeholder=\"Enter terms to search...\",\n separator=\" \",\n show=False,\n value=\"\",\n ),\n IntInput(\n name=\"number_of_results\",\n display_name=\"Number of Search Results\",\n info=\"Number of search results to return.\",\n advanced=True,\n value=4,\n ),\n DropdownInput(\n name=\"search_type\",\n display_name=\"Search Type\",\n info=\"Search type to use\",\n options=[\"Similarity\", \"Similarity with score threshold\", \"MMR (Max Marginal Relevance)\"],\n value=\"Similarity\",\n advanced=True,\n ),\n FloatInput(\n name=\"search_score_threshold\",\n display_name=\"Search Score Threshold\",\n info=\"Minimum similarity score threshold for search results. \"\n \"(when using 'Similarity with score threshold')\",\n value=0,\n advanced=True,\n ),\n NestedDictInput(\n name=\"advanced_search_filter\",\n display_name=\"Search Metadata Filter\",\n info=\"Optional dictionary of filters to apply to the search query.\",\n advanced=True,\n ),\n BoolInput(\n name=\"autodetect_collection\",\n display_name=\"Autodetect Collection\",\n info=\"Boolean flag to determine whether to autodetect the collection.\",\n advanced=True,\n value=True,\n ),\n StrInput(\n name=\"content_field\",\n display_name=\"Content Field\",\n info=\"Field to use as the text content field for the vector store.\",\n advanced=True,\n ),\n StrInput(\n name=\"deletion_field\",\n display_name=\"Deletion Based On Field\",\n info=\"When this parameter is provided, documents in the target collection with \"\n \"metadata field values matching the input metadata field value will be deleted \"\n \"before new data is loaded.\",\n advanced=True,\n ),\n BoolInput(\n name=\"ignore_invalid_documents\",\n display_name=\"Ignore Invalid Documents\",\n info=\"Boolean flag to determine whether to ignore invalid documents at runtime.\",\n advanced=True,\n ),\n NestedDictInput(\n name=\"astradb_vectorstore_kwargs\",\n display_name=\"AstraDBVectorStore Parameters\",\n info=\"Optional dictionary of additional parameters for the AstraDBVectorStore.\",\n advanced=True,\n ),\n ]\n\n @classmethod\n def map_cloud_providers(cls):\n # TODO: Programmatically fetch the regions for each cloud provider\n return {\n \"dev\": {\n \"Amazon Web Services\": {\n \"id\": \"aws\",\n \"regions\": [\"us-west-2\"],\n },\n \"Google Cloud Platform\": {\n \"id\": \"gcp\",\n \"regions\": [\"us-central1\", \"europe-west4\"],\n },\n },\n \"test\": {\n \"Google Cloud Platform\": {\n \"id\": \"gcp\",\n \"regions\": [\"us-central1\"],\n },\n },\n \"prod\": {\n \"Amazon Web Services\": {\n \"id\": \"aws\",\n \"regions\": [\"us-east-2\", \"ap-south-1\", \"eu-west-1\"],\n },\n \"Google Cloud Platform\": {\n \"id\": \"gcp\",\n \"regions\": [\"us-east1\"],\n },\n \"Microsoft Azure\": {\n \"id\": \"azure\",\n \"regions\": [\"westus3\"],\n },\n },\n }\n\n @classmethod\n def get_vectorize_providers(cls, token: str, environment: str | None = None, api_endpoint: str | None = None):\n try:\n # Get the admin object\n client = DataAPIClient(environment=environment)\n admin_client = client.get_admin()\n db_admin = admin_client.get_database_admin(api_endpoint, token=token)\n\n # Get the list of embedding providers\n embedding_providers = db_admin.find_embedding_providers()\n\n vectorize_providers_mapping = {}\n # Map the provider display name to the provider key and models\n for provider_key, provider_data in embedding_providers.embedding_providers.items():\n # Get the provider display name and models\n display_name = provider_data.display_name\n models = [model.name for model in provider_data.models]\n\n # Build our mapping\n vectorize_providers_mapping[display_name] = [provider_key, models]\n\n # Sort the resulting dictionary\n return defaultdict(list, dict(sorted(vectorize_providers_mapping.items())))\n except Exception as _: # noqa: BLE001\n return {}\n\n @classmethod\n async def create_database_api(\n cls,\n new_database_name: str,\n cloud_provider: str,\n region: str,\n token: str,\n environment: str | None = None,\n keyspace: str | None = None,\n ):\n client = DataAPIClient(environment=environment)\n\n # Get the admin object\n admin_client = client.get_admin(token=token)\n\n # Get the environment, set to prod if null like\n my_env = environment or \"prod\"\n\n # Raise a value error if name isn't provided\n if not new_database_name:\n msg = \"Database name is required to create a new database.\"\n raise ValueError(msg)\n\n # Call the create database function\n return await admin_client.async_create_database(\n name=new_database_name,\n cloud_provider=cls.map_cloud_providers()[my_env][cloud_provider][\"id\"],\n region=region,\n keyspace=keyspace,\n wait_until_active=False,\n )\n\n @classmethod\n async def create_collection_api(\n cls,\n new_collection_name: str,\n token: str,\n api_endpoint: str,\n environment: str | None = None,\n keyspace: str | None = None,\n dimension: int | None = None,\n embedding_generation_provider: str | None = None,\n embedding_generation_model: str | None = None,\n reranker: str | None = None,\n ):\n # Build vectorize options, if needed\n vectorize_options = None\n if not dimension:\n providers = cls.get_vectorize_providers(token=token, environment=environment, api_endpoint=api_endpoint)\n vectorize_options = VectorServiceOptions(\n provider=providers.get(embedding_generation_provider, [None, []])[0],\n model_name=embedding_generation_model,\n )\n\n # Raise a value error if name isn't provided\n if not new_collection_name:\n msg = \"Collection name is required to create a new collection.\"\n raise ValueError(msg)\n\n # Define the base arguments being passed to the create collection function\n base_args = {\n \"collection_name\": new_collection_name,\n \"token\": token,\n \"api_endpoint\": api_endpoint,\n \"keyspace\": keyspace,\n \"environment\": environment,\n \"embedding_dimension\": dimension,\n \"collection_vector_service_options\": vectorize_options,\n }\n\n # Add optional arguments if the reranker is set\n if reranker:\n # Split the reranker field into a provider a model name\n provider, _ = reranker.split(\"/\")\n base_args[\"collection_rerank\"] = CollectionRerankOptions(\n service=RerankServiceOptions(provider=provider, model_name=reranker),\n )\n base_args[\"collection_lexical\"] = CollectionLexicalOptions(analyzer=\"STANDARD\")\n\n _AstraDBCollectionEnvironment(**base_args)\n\n @classmethod\n def get_database_list_static(cls, token: str, environment: str | None = None):\n client = DataAPIClient(environment=environment)\n\n # Get the admin object\n admin_client = client.get_admin(token=token)\n\n # Get the list of databases\n db_list = admin_client.list_databases()\n\n # Generate the api endpoint for each database\n db_info_dict = {}\n for db in db_list:\n try:\n # Get the API endpoint for the database\n api_endpoints = [db_reg.api_endpoint for db_reg in db.regions]\n\n # Get the number of collections\n try:\n # Get the number of collections in the database\n num_collections = len(\n client.get_database(\n api_endpoints[0],\n token=token,\n ).list_collection_names()\n )\n except Exception: # noqa: BLE001\n if db.status != \"PENDING\":\n continue\n num_collections = 0\n\n # Add the database to the dictionary\n db_info_dict[db.name] = {\n \"api_endpoints\": api_endpoints,\n \"keyspaces\": db.keyspaces,\n \"collections\": num_collections,\n \"status\": db.status if db.status != \"ACTIVE\" else None,\n \"org_id\": db.org_id if db.org_id else None,\n }\n except Exception: # noqa: BLE001, S110\n pass\n\n return db_info_dict\n\n def get_database_list(self):\n return self.get_database_list_static(\n token=self.token,\n environment=self.environment,\n )\n\n @classmethod\n def get_api_endpoint_static(\n cls,\n token: str,\n environment: str | None = None,\n api_endpoint: str | None = None,\n database_name: str | None = None,\n ):\n # If the api_endpoint is set, return it\n if api_endpoint:\n return api_endpoint\n\n # Check if the database_name is like a url\n if database_name and database_name.startswith(\"https://\"):\n return database_name\n\n # If the database is not set, nothing we can do.\n if not database_name:\n return None\n\n # Grab the database object\n db = cls.get_database_list_static(token=token, environment=environment).get(database_name)\n if not db:\n return None\n\n # Otherwise, get the URL from the database list\n endpoints = db.get(\"api_endpoints\") or []\n return endpoints[0] if endpoints else None\n\n def get_api_endpoint(self):\n return self.get_api_endpoint_static(\n token=self.token,\n environment=self.environment,\n api_endpoint=self.api_endpoint,\n database_name=self.database_name,\n )\n\n @classmethod\n def get_database_id_static(cls, api_endpoint: str) -> str | None:\n # Pattern matches standard UUID format: 8-4-4-4-12 hexadecimal characters\n uuid_pattern = r\"[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}\"\n match = re.search(uuid_pattern, api_endpoint)\n\n return match.group(0) if match else None\n\n def get_database_id(self):\n return self.get_database_id_static(api_endpoint=self.get_api_endpoint())\n\n def get_keyspace(self):\n keyspace = self.keyspace\n\n if keyspace:\n return keyspace.strip()\n\n return \"default_keyspace\"\n\n def get_database_object(self, api_endpoint: str | None = None):\n try:\n client = DataAPIClient(environment=self.environment)\n\n return client.get_database(\n api_endpoint or self.get_api_endpoint(),\n token=self.token,\n keyspace=self.get_keyspace(),\n )\n except Exception as e:\n msg = f\"Error fetching database object: {e}\"\n raise ValueError(msg) from e\n\n def collection_data(self, collection_name: str, database: Database | None = None):\n try:\n if not database:\n client = DataAPIClient(environment=self.environment)\n\n database = client.get_database(\n self.get_api_endpoint(),\n token=self.token,\n keyspace=self.get_keyspace(),\n )\n\n collection = database.get_collection(collection_name)\n\n return collection.estimated_document_count()\n except Exception as e: # noqa: BLE001\n self.log(f\"Error checking collection data: {e}\")\n\n return None\n\n def _initialize_database_options(self):\n try:\n return [\n {\n \"name\": name,\n \"status\": info[\"status\"],\n \"collections\": info[\"collections\"],\n \"api_endpoints\": info[\"api_endpoints\"],\n \"keyspaces\": info[\"keyspaces\"],\n \"org_id\": info[\"org_id\"],\n }\n for name, info in self.get_database_list().items()\n ]\n except Exception as e:\n msg = f\"Error fetching database options: {e}\"\n raise ValueError(msg) from e\n\n @classmethod\n def get_provider_icon(cls, collection: CollectionDescriptor | None = None, provider_name: str | None = None) -> str:\n # Get the provider name from the collection\n provider_name = provider_name or (\n collection.definition.vector.service.provider\n if (\n collection\n and collection.definition\n and collection.definition.vector\n and collection.definition.vector.service\n )\n else None\n )\n\n # If there is no provider, use the vector store icon\n if not provider_name or provider_name.lower() == \"bring your own\":\n return \"vectorstores\"\n\n # Map provider casings\n case_map = {\n \"nvidia\": \"NVIDIA\",\n \"openai\": \"OpenAI\",\n \"amazon bedrock\": \"AmazonBedrockEmbeddings\",\n \"azure openai\": \"AzureOpenAiEmbeddings\",\n \"cohere\": \"Cohere\",\n \"jina ai\": \"JinaAI\",\n \"mistral ai\": \"MistralAI\",\n \"upstage\": \"Upstage\",\n \"voyage ai\": \"VoyageAI\",\n }\n\n # Adjust the casing on some like nvidia\n return case_map[provider_name.lower()] if provider_name.lower() in case_map else provider_name.title()\n\n def _initialize_collection_options(self, api_endpoint: str | None = None):\n # Nothing to generate if we don't have an API endpoint yet\n api_endpoint = api_endpoint or self.get_api_endpoint()\n if not api_endpoint:\n return []\n\n # Retrieve the database object\n database = self.get_database_object(api_endpoint=api_endpoint)\n\n # Get the list of collections\n collection_list = database.list_collections(keyspace=self.get_keyspace())\n\n # Return the list of collections and metadata associated\n return [\n {\n \"name\": col.name,\n \"records\": self.collection_data(collection_name=col.name, database=database),\n \"provider\": (\n col.definition.vector.service.provider\n if col.definition.vector and col.definition.vector.service\n else None\n ),\n \"icon\": self.get_provider_icon(collection=col),\n \"model\": (\n col.definition.vector.service.model_name\n if col.definition.vector and col.definition.vector.service\n else None\n ),\n }\n for col in collection_list\n ]\n\n def reset_provider_options(self, build_config: dict) -> dict:\n \"\"\"Reset provider options and related configurations in the build_config dictionary.\"\"\"\n # Extract template path for cleaner access\n template = build_config[\"collection_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"]\n\n # Get vectorize providers\n vectorize_providers_api = self.get_vectorize_providers(\n token=self.token,\n environment=self.environment,\n api_endpoint=build_config[\"api_endpoint\"][\"value\"],\n )\n\n # Create a new dictionary with \"Bring your own\" first\n vectorize_providers: dict[str, list[list[str]]] = {\"Bring your own\": [[], []]}\n\n # Add the remaining items (only Nvidia) from the original dictionary\n vectorize_providers.update(\n {\n k: v\n for k, v in vectorize_providers_api.items()\n if k.lower() in [\"nvidia\"] # TODO: Eventually support more\n }\n )\n\n # Set provider options\n provider_field = \"02_embedding_generation_provider\"\n template[provider_field][\"options\"] = list(vectorize_providers.keys())\n\n # Add metadata for each provider option\n template[provider_field][\"options_metadata\"] = [\n {\"icon\": self.get_provider_icon(provider_name=provider)} for provider in template[provider_field][\"options\"]\n ]\n\n # Get selected embedding provider\n embedding_provider = template[provider_field][\"value\"]\n is_bring_your_own = embedding_provider and embedding_provider == \"Bring your own\"\n\n # Configure embedding model field\n model_field = \"03_embedding_generation_model\"\n template[model_field].update(\n {\n \"options\": vectorize_providers.get(embedding_provider, [[], []])[1],\n \"placeholder\": \"Bring your own\" if is_bring_your_own else None,\n \"readonly\": is_bring_your_own,\n \"required\": not is_bring_your_own,\n \"value\": None,\n }\n )\n\n # If this is a bring your own, set dimensions to 0\n return self.reset_dimension_field(build_config)\n\n def reset_dimension_field(self, build_config: dict) -> dict:\n \"\"\"Reset dimension field options based on provided configuration.\"\"\"\n # Extract template path for cleaner access\n template = build_config[\"collection_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"]\n\n # Get selected embedding model\n provider_field = \"02_embedding_generation_provider\"\n embedding_provider = template[provider_field][\"value\"]\n is_bring_your_own = embedding_provider and embedding_provider == \"Bring your own\"\n\n # Configure dimension field\n dimension_field = \"04_dimension\"\n dimension_value = 1024 if not is_bring_your_own else None # TODO: Dynamically figure this out\n template[dimension_field].update(\n {\n \"placeholder\": dimension_value,\n \"value\": dimension_value,\n \"readonly\": not is_bring_your_own,\n \"required\": is_bring_your_own,\n }\n )\n\n return build_config\n\n def reset_collection_list(self, build_config: dict) -> dict:\n \"\"\"Reset collection list options based on provided configuration.\"\"\"\n # Get collection options\n collection_options = self._initialize_collection_options(api_endpoint=build_config[\"api_endpoint\"][\"value\"])\n # Update collection configuration\n collection_config = build_config[\"collection_name\"]\n collection_config.update(\n {\n \"options\": [col[\"name\"] for col in collection_options],\n \"options_metadata\": [{k: v for k, v in col.items() if k != \"name\"} for col in collection_options],\n }\n )\n\n # Reset selected collection if not in options\n if collection_config[\"value\"] not in collection_config[\"options\"]:\n collection_config[\"value\"] = \"\"\n\n # Set advanced status based on database selection\n collection_config[\"show\"] = bool(build_config[\"database_name\"][\"value\"])\n\n return build_config\n\n def reset_database_list(self, build_config: dict) -> dict:\n \"\"\"Reset database list options and related configurations.\"\"\"\n # Get database options\n database_options = self._initialize_database_options()\n\n # Update cloud provider options\n env = self.environment\n template = build_config[\"database_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"]\n template[\"02_cloud_provider\"][\"options\"] = list(self.map_cloud_providers()[env].keys())\n\n # Update database configuration\n database_config = build_config[\"database_name\"]\n database_config.update(\n {\n \"options\": [db[\"name\"] for db in database_options],\n \"options_metadata\": [{k: v for k, v in db.items() if k != \"name\"} for db in database_options],\n }\n )\n\n # Reset selections if value not in options\n if database_config[\"value\"] not in database_config[\"options\"]:\n database_config[\"value\"] = \"\"\n build_config[\"api_endpoint\"][\"options\"] = []\n build_config[\"api_endpoint\"][\"value\"] = \"\"\n build_config[\"collection_name\"][\"show\"] = False\n\n # Set advanced status based on token presence\n database_config[\"show\"] = bool(build_config[\"token\"][\"value\"])\n\n return build_config\n\n def reset_build_config(self, build_config: dict) -> dict:\n \"\"\"Reset all build configuration options to default empty state.\"\"\"\n # Reset database configuration\n database_config = build_config[\"database_name\"]\n database_config.update({\"options\": [], \"options_metadata\": [], \"value\": \"\", \"show\": False})\n build_config[\"api_endpoint\"][\"options\"] = []\n build_config[\"api_endpoint\"][\"value\"] = \"\"\n\n # Reset collection configuration\n collection_config = build_config[\"collection_name\"]\n collection_config.update({\"options\": [], \"options_metadata\": [], \"value\": \"\", \"show\": False})\n\n return build_config\n\n def _handle_hybrid_search_options(self, build_config: dict) -> dict:\n \"\"\"Set hybrid search options in the build configuration.\"\"\"\n # Detect what hybrid options are available\n # Get the admin object\n client = DataAPIClient(environment=self.environment)\n admin_client = client.get_admin()\n db_admin = admin_client.get_database_admin(self.get_api_endpoint(), token=self.token)\n\n # We will try to get the reranking providers to see if its hybrid emabled\n try:\n providers = db_admin.find_reranking_providers()\n build_config[\"reranker\"][\"options\"] = [\n model.name for provider_data in providers.reranking_providers.values() for model in provider_data.models\n ]\n build_config[\"reranker\"][\"options_metadata\"] = [\n {\"icon\": self.get_provider_icon(provider_name=model.name.split(\"/\")[0])}\n for provider in providers.reranking_providers.values()\n for model in provider.models\n ]\n build_config[\"reranker\"][\"value\"] = build_config[\"reranker\"][\"options\"][0]\n\n # Set the default search field to hybrid search\n build_config[\"search_method\"][\"show\"] = True\n build_config[\"search_method\"][\"options\"] = [\"Hybrid Search\", \"Vector Search\"]\n build_config[\"search_method\"][\"value\"] = \"Hybrid Search\"\n except Exception as _: # noqa: BLE001\n build_config[\"reranker\"][\"options\"] = []\n build_config[\"reranker\"][\"options_metadata\"] = []\n\n # Set the default search field to vector search\n build_config[\"search_method\"][\"show\"] = False\n build_config[\"search_method\"][\"options\"] = [\"Vector Search\"]\n build_config[\"search_method\"][\"value\"] = \"Vector Search\"\n\n return build_config\n\n async def update_build_config(self, build_config: dict, field_value: str, field_name: str | None = None) -> dict:\n \"\"\"Update build configuration based on field name and value.\"\"\"\n # Early return if no token provided\n if not self.token:\n return self.reset_build_config(build_config)\n\n # Database creation callback\n if field_name == \"database_name\" and isinstance(field_value, dict):\n if \"01_new_database_name\" in field_value:\n await self._create_new_database(build_config, field_value)\n return self.reset_collection_list(build_config)\n return self._update_cloud_regions(build_config, field_value)\n\n # Collection creation callback\n if field_name == \"collection_name\" and isinstance(field_value, dict):\n # Case 1: New collection creation\n if \"01_new_collection_name\" in field_value:\n await self._create_new_collection(build_config, field_value)\n return build_config\n\n # Case 2: Update embedding provider options\n if \"02_embedding_generation_provider\" in field_value:\n return self.reset_provider_options(build_config)\n\n # Case 3: Update dimension field\n if \"03_embedding_generation_model\" in field_value:\n return self.reset_dimension_field(build_config)\n\n # Initial execution or token/environment change\n first_run = field_name == \"collection_name\" and not field_value and not build_config[\"database_name\"][\"options\"]\n if first_run or field_name in {\"token\", \"environment\"}:\n return self.reset_database_list(build_config)\n\n # Database selection change\n if field_name == \"database_name\" and not isinstance(field_value, dict):\n return self._handle_database_selection(build_config, field_value)\n\n # Keyspace selection change\n if field_name == \"keyspace\":\n return self.reset_collection_list(build_config)\n\n # Collection selection change\n if field_name == \"collection_name\" and not isinstance(field_value, dict):\n return self._handle_collection_selection(build_config, field_value)\n\n # Search method selection change\n if field_name == \"search_method\":\n is_vector_search = field_value == \"Vector Search\"\n is_autodetect = build_config[\"autodetect_collection\"][\"value\"]\n\n # Configure lexical terms (same for both cases)\n build_config[\"lexical_terms\"][\"show\"] = not is_vector_search\n build_config[\"lexical_terms\"][\"value\"] = \"\" if is_vector_search else build_config[\"lexical_terms\"][\"value\"]\n\n # Disable reranker disabling if hybrid search is selected\n build_config[\"reranker\"][\"show\"] = not is_vector_search\n build_config[\"reranker\"][\"toggle_disable\"] = not is_vector_search\n build_config[\"reranker\"][\"toggle_value\"] = True\n build_config[\"reranker\"][\"value\"] = build_config[\"reranker\"][\"options\"][0]\n\n # Toggle search type and score threshold based on search method\n build_config[\"search_type\"][\"show\"] = is_vector_search\n build_config[\"search_score_threshold\"][\"show\"] = is_vector_search\n\n # Make sure the search_type is set to \"Similarity\"\n if not is_vector_search or is_autodetect:\n build_config[\"search_type\"][\"value\"] = \"Similarity\"\n\n return build_config\n\n async def _create_new_database(self, build_config: dict, field_value: dict) -> None:\n \"\"\"Create a new database and update build config options.\"\"\"\n try:\n await self.create_database_api(\n new_database_name=field_value[\"01_new_database_name\"],\n token=self.token,\n keyspace=self.get_keyspace(),\n environment=self.environment,\n cloud_provider=field_value[\"02_cloud_provider\"],\n region=field_value[\"03_region\"],\n )\n except Exception as e:\n msg = f\"Error creating database: {e}\"\n raise ValueError(msg) from e\n\n build_config[\"database_name\"][\"options\"].append(field_value[\"01_new_database_name\"])\n build_config[\"database_name\"][\"options_metadata\"].append(\n {\n \"status\": \"PENDING\",\n \"collections\": 0,\n \"api_endpoints\": [],\n \"keyspaces\": [self.get_keyspace()],\n \"org_id\": None,\n }\n )\n\n def _update_cloud_regions(self, build_config: dict, field_value: dict) -> dict:\n \"\"\"Update cloud provider regions in build config.\"\"\"\n env = self.environment\n cloud_provider = field_value[\"02_cloud_provider\"]\n\n # Update the region options based on the selected cloud provider\n template = build_config[\"database_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"]\n template[\"03_region\"][\"options\"] = self.map_cloud_providers()[env][cloud_provider][\"regions\"]\n\n # Reset the the 03_region value if it's not in the new options\n if template[\"03_region\"][\"value\"] not in template[\"03_region\"][\"options\"]:\n template[\"03_region\"][\"value\"] = None\n\n return build_config\n\n async def _create_new_collection(self, build_config: dict, field_value: dict) -> None:\n \"\"\"Create a new collection and update build config options.\"\"\"\n embedding_provider = field_value.get(\"02_embedding_generation_provider\")\n try:\n await self.create_collection_api(\n new_collection_name=field_value[\"01_new_collection_name\"],\n token=self.token,\n api_endpoint=build_config[\"api_endpoint\"][\"value\"],\n environment=self.environment,\n keyspace=self.get_keyspace(),\n dimension=field_value.get(\"04_dimension\") if embedding_provider == \"Bring your own\" else None,\n embedding_generation_provider=embedding_provider,\n embedding_generation_model=field_value.get(\"03_embedding_generation_model\"),\n reranker=self.reranker,\n )\n except Exception as e:\n msg = f\"Error creating collection: {e}\"\n raise ValueError(msg) from e\n\n provider = embedding_provider.lower() if embedding_provider and embedding_provider != \"Bring your own\" else None\n build_config[\"collection_name\"].update(\n {\n \"value\": field_value[\"01_new_collection_name\"],\n \"options\": build_config[\"collection_name\"][\"options\"] + [field_value[\"01_new_collection_name\"]],\n }\n )\n build_config[\"embedding_model\"][\"show\"] = not bool(provider)\n build_config[\"embedding_model\"][\"required\"] = not bool(provider)\n build_config[\"collection_name\"][\"options_metadata\"].append(\n {\n \"records\": 0,\n \"provider\": provider,\n \"icon\": self.get_provider_icon(provider_name=provider),\n \"model\": field_value.get(\"03_embedding_generation_model\"),\n }\n )\n\n # Make sure we always show the reranker options if the collection is hybrid enabled\n # And right now they always are\n build_config[\"lexical_terms\"][\"show\"] = True\n\n def _handle_database_selection(self, build_config: dict, field_value: str) -> dict:\n \"\"\"Handle database selection and update related configurations.\"\"\"\n build_config = self.reset_database_list(build_config)\n\n # Reset collection list if database selection changes\n if field_value not in build_config[\"database_name\"][\"options\"]:\n build_config[\"database_name\"][\"value\"] = \"\"\n return build_config\n\n # Get the api endpoint for the selected database\n index = build_config[\"database_name\"][\"options\"].index(field_value)\n build_config[\"api_endpoint\"][\"options\"] = build_config[\"database_name\"][\"options_metadata\"][index][\n \"api_endpoints\"\n ]\n build_config[\"api_endpoint\"][\"value\"] = build_config[\"database_name\"][\"options_metadata\"][index][\n \"api_endpoints\"\n ][0]\n\n # Get the org_id for the selected database\n org_id = build_config[\"database_name\"][\"options_metadata\"][index][\"org_id\"]\n if not org_id:\n return build_config\n\n # Update the list of keyspaces based on the db info\n build_config[\"keyspace\"][\"options\"] = build_config[\"database_name\"][\"options_metadata\"][index][\"keyspaces\"]\n build_config[\"keyspace\"][\"value\"] = (\n build_config[\"keyspace\"][\"options\"] and build_config[\"keyspace\"][\"options\"][0]\n if build_config[\"keyspace\"][\"value\"] not in build_config[\"keyspace\"][\"options\"]\n else build_config[\"keyspace\"][\"value\"]\n )\n\n # Get the database id for the selected database\n db_id = self.get_database_id_static(api_endpoint=build_config[\"api_endpoint\"][\"value\"])\n keyspace = self.get_keyspace()\n\n # Update the helper text for the embedding provider field\n template = build_config[\"collection_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"]\n template[\"02_embedding_generation_provider\"][\"helper_text\"] = (\n \"To create collections with more embedding provider options, go to \"\n f''\n \"your database in Astra DB.\"\n )\n\n # Reset provider options\n build_config = self.reset_provider_options(build_config)\n\n # Handle hybrid search options\n build_config = self._handle_hybrid_search_options(build_config)\n\n return self.reset_collection_list(build_config)\n\n def _handle_collection_selection(self, build_config: dict, field_value: str) -> dict:\n \"\"\"Handle collection selection and update embedding options.\"\"\"\n build_config[\"autodetect_collection\"][\"value\"] = True\n build_config = self.reset_collection_list(build_config)\n\n # Reset embedding model if collection selection changes\n if field_value and field_value not in build_config[\"collection_name\"][\"options\"]:\n build_config[\"collection_name\"][\"options\"].append(field_value)\n build_config[\"collection_name\"][\"options_metadata\"].append(\n {\n \"records\": 0,\n \"provider\": None,\n \"icon\": \"vectorstores\",\n \"model\": None,\n }\n )\n build_config[\"autodetect_collection\"][\"value\"] = False\n\n if not field_value:\n return build_config\n\n # Get the selected collection index\n index = build_config[\"collection_name\"][\"options\"].index(field_value)\n\n # Set the provider of the selected collection\n provider = build_config[\"collection_name\"][\"options_metadata\"][index][\"provider\"]\n build_config[\"embedding_model\"][\"show\"] = not bool(provider)\n build_config[\"embedding_model\"][\"required\"] = not bool(provider)\n\n # Grab the collection object\n database = self.get_database_object(api_endpoint=build_config[\"api_endpoint\"][\"value\"])\n collection = database.get_collection(\n name=field_value,\n keyspace=build_config[\"keyspace\"][\"value\"],\n )\n\n # Check if hybrid and lexical are enabled\n col_options = collection.options()\n hyb_enabled = col_options.rerank and col_options.rerank.enabled\n lex_enabled = col_options.lexical and col_options.lexical.enabled\n user_hyb_enabled = build_config[\"search_method\"][\"value\"] == \"Hybrid Search\"\n\n # Reranker visible when both the collection supports it and the user selected Hybrid\n hybrid_active = bool(hyb_enabled and user_hyb_enabled)\n build_config[\"reranker\"][\"show\"] = hybrid_active\n build_config[\"reranker\"][\"toggle_value\"] = hybrid_active\n build_config[\"reranker\"][\"toggle_disable\"] = False # allow user to toggle if visible\n\n # If hybrid is active, lock search_type to \"Similarity\"\n if hybrid_active:\n build_config[\"search_type\"][\"value\"] = \"Similarity\"\n\n # Show the lexical terms option only if the collection enables lexical search\n build_config[\"lexical_terms\"][\"show\"] = bool(lex_enabled)\n\n return build_config\n\n @check_cached_vector_store\n def build_vector_store(self):\n try:\n from langchain_astradb import AstraDBVectorStore\n except ImportError as e:\n msg = (\n \"Could not import langchain Astra DB integration package. \"\n \"Please install it with `pip install langchain-astradb`.\"\n )\n raise ImportError(msg) from e\n\n # Get the embedding model and additional params\n embedding_params = {\"embedding\": self.embedding_model} if self.embedding_model else {}\n\n # Get the additional parameters\n additional_params = self.astradb_vectorstore_kwargs or {}\n\n # Get Langflow version and platform information\n __version__ = get_version_info()[\"version\"]\n langflow_prefix = \"\"\n # if os.getenv(\"AWS_EXECUTION_ENV\") == \"AWS_ECS_FARGATE\": # TODO: More precise way of detecting\n # langflow_prefix = \"ds-\"\n\n # Get the database object\n database = self.get_database_object()\n autodetect = self.collection_name in database.list_collection_names() and self.autodetect_collection\n\n # Bundle up the auto-detect parameters\n autodetect_params = {\n \"autodetect_collection\": autodetect,\n \"content_field\": (\n self.content_field\n if self.content_field and embedding_params\n else (\n \"page_content\"\n if embedding_params\n and self.collection_data(collection_name=self.collection_name, database=database) == 0\n else None\n )\n ),\n \"ignore_invalid_documents\": self.ignore_invalid_documents,\n }\n\n # Choose HybridSearchMode based on the selected param\n hybrid_search_mode = HybridSearchMode.DEFAULT if self.search_method == \"Hybrid Search\" else HybridSearchMode.OFF\n\n # Attempt to build the Vector Store object\n try:\n vector_store = AstraDBVectorStore(\n # Astra DB Authentication Parameters\n token=self.token,\n api_endpoint=database.api_endpoint,\n namespace=database.keyspace,\n collection_name=self.collection_name,\n environment=self.environment,\n # Hybrid Search Parameters\n hybrid_search=hybrid_search_mode,\n # Astra DB Usage Tracking Parameters\n ext_callers=[(f\"{langflow_prefix}langflow\", __version__)],\n # Astra DB Vector Store Parameters\n **autodetect_params,\n **embedding_params,\n **additional_params,\n )\n except Exception as e:\n msg = f\"Error initializing AstraDBVectorStore: {e}\"\n raise ValueError(msg) from e\n\n # Add documents to the vector store\n self._add_documents_to_vector_store(vector_store)\n\n return vector_store\n\n def _add_documents_to_vector_store(self, vector_store) -> None:\n self.ingest_data = self._prepare_ingest_data()\n\n documents = []\n for _input in self.ingest_data or []:\n if isinstance(_input, Data):\n documents.append(_input.to_lc_document())\n else:\n msg = \"Vector Store Inputs must be Data objects.\"\n raise TypeError(msg)\n\n documents = [\n Document(page_content=doc.page_content, metadata=serialize(doc.metadata, to_str=True)) for doc in documents\n ]\n\n if documents and self.deletion_field:\n self.log(f\"Deleting documents where {self.deletion_field}\")\n try:\n database = self.get_database_object()\n collection = database.get_collection(self.collection_name, keyspace=database.keyspace)\n delete_values = list({doc.metadata[self.deletion_field] for doc in documents})\n self.log(f\"Deleting documents where {self.deletion_field} matches {delete_values}.\")\n collection.delete_many({f\"metadata.{self.deletion_field}\": {\"$in\": delete_values}})\n except Exception as e:\n msg = f\"Error deleting documents from AstraDBVectorStore based on '{self.deletion_field}': {e}\"\n raise ValueError(msg) from e\n\n if documents:\n self.log(f\"Adding {len(documents)} documents to the Vector Store.\")\n try:\n vector_store.add_documents(documents)\n except Exception as e:\n msg = f\"Error adding documents to AstraDBVectorStore: {e}\"\n raise ValueError(msg) from e\n else:\n self.log(\"No documents to add to the Vector Store.\")\n\n def _map_search_type(self) -> str:\n search_type_mapping = {\n \"Similarity with score threshold\": \"similarity_score_threshold\",\n \"MMR (Max Marginal Relevance)\": \"mmr\",\n }\n\n return search_type_mapping.get(self.search_type, \"similarity\")\n\n def _build_search_args(self):\n # Clean up the search query\n query = self.search_query if isinstance(self.search_query, str) and self.search_query.strip() else None\n lexical_terms = self.lexical_terms or None\n\n # Check if we have a search query, and if so set the args\n if query:\n args = {\n \"query\": query,\n \"search_type\": self._map_search_type(),\n \"k\": self.number_of_results,\n \"score_threshold\": self.search_score_threshold,\n \"lexical_query\": lexical_terms,\n }\n elif self.advanced_search_filter:\n args = {\n \"n\": self.number_of_results,\n }\n else:\n return {}\n\n filter_arg = self.advanced_search_filter or {}\n if filter_arg:\n args[\"filter\"] = filter_arg\n\n return args\n\n def search_documents(self, vector_store=None) -> list[Data]:\n vector_store = vector_store or self.build_vector_store()\n\n self.log(f\"Search input: {self.search_query}\")\n self.log(f\"Search type: {self.search_type}\")\n self.log(f\"Number of results: {self.number_of_results}\")\n self.log(f\"store.hybrid_search: {vector_store.hybrid_search}\")\n self.log(f\"Lexical terms: {self.lexical_terms}\")\n self.log(f\"Reranker: {self.reranker}\")\n\n try:\n search_args = self._build_search_args()\n except Exception as e:\n msg = f\"Error in AstraDBVectorStore._build_search_args: {e}\"\n raise ValueError(msg) from e\n\n if not search_args:\n self.log(\"No search input or filters provided. Skipping search.\")\n return []\n\n docs = []\n search_method = \"search\" if \"query\" in search_args else \"metadata_search\"\n\n try:\n self.log(f\"Calling vector_store.{search_method} with args: {search_args}\")\n docs = getattr(vector_store, search_method)(**search_args)\n except Exception as e:\n msg = f\"Error performing {search_method} in AstraDBVectorStore: {e}\"\n raise ValueError(msg) from e\n\n self.log(f\"Retrieved documents: {len(docs)}\")\n\n data = docs_to_data(docs)\n self.log(f\"Converted documents to data: {len(data)}\")\n self.status = data\n\n return data\n\n def get_retriever_kwargs(self):\n search_args = self._build_search_args()\n\n return {\n \"search_type\": self._map_search_type(),\n \"search_kwargs\": search_args,\n }\n"
- },
- "collection_name": {
- "_input_type": "DropdownInput",
- "advanced": false,
- "combobox": true,
- "dialog_inputs": {
- "fields": {
- "data": {
- "node": {
- "description": "Please allow several seconds for creation to complete.",
- "display_name": "Create new collection",
- "field_order": [
- "01_new_collection_name",
- "02_embedding_generation_provider",
- "03_embedding_generation_model",
- "04_dimension"
- ],
- "name": "create_collection",
- "template": {
- "01_new_collection_name": {
- "_input_type": "StrInput",
- "advanced": false,
- "display_name": "Name",
- "dynamic": false,
- "info": "Name of the new collection to create in Astra DB.",
- "list": false,
- "list_add_label": "Add More",
- "load_from_db": false,
- "name": "new_collection_name",
- "placeholder": "",
- "required": true,
- "show": true,
- "title_case": false,
- "tool_mode": false,
- "trace_as_metadata": true,
- "type": "str",
- "value": ""
- },
- "02_embedding_generation_provider": {
- "_input_type": "DropdownInput",
- "advanced": false,
- "combobox": false,
- "dialog_inputs": {},
- "display_name": "Embedding generation method",
- "dynamic": false,
- "helper_text": "To create collections with more embedding provider options, go to your database in Astra DB",
- "info": "Provider to use for generating embeddings.",
- "name": "embedding_generation_provider",
- "options": [],
- "options_metadata": [],
- "placeholder": "",
- "real_time_refresh": true,
- "required": true,
- "show": true,
- "title_case": false,
- "tool_mode": false,
- "trace_as_metadata": true,
- "type": "str",
- "value": ""
- },
- "03_embedding_generation_model": {
- "_input_type": "DropdownInput",
- "advanced": false,
- "combobox": false,
- "dialog_inputs": {},
- "display_name": "Embedding model",
- "dynamic": false,
- "info": "Model to use for generating embeddings.",
- "name": "embedding_generation_model",
- "options": [],
- "options_metadata": [],
- "placeholder": "",
- "real_time_refresh": true,
- "required": false,
- "show": true,
- "title_case": false,
- "tool_mode": false,
- "trace_as_metadata": true,
- "type": "str",
- "value": ""
- },
- "04_dimension": {
- "_input_type": "IntInput",
- "advanced": false,
- "display_name": "Dimensions",
- "dynamic": false,
- "info": "Dimensions of the embeddings to generate.",
- "list": false,
- "list_add_label": "Add More",
- "name": "dimension",
- "placeholder": "",
- "required": false,
- "show": true,
- "title_case": false,
- "tool_mode": false,
- "trace_as_metadata": true,
- "type": "int"
- }
- }
- }
- }
- },
- "functionality": "create"
- },
- "display_name": "Collection",
- "dynamic": false,
- "info": "The name of the collection within Astra DB where the vectors will be stored.",
- "name": "collection_name",
- "options": [],
- "options_metadata": [],
- "placeholder": "",
- "real_time_refresh": true,
- "refresh_button": true,
- "required": true,
- "show": false,
- "title_case": false,
- "tool_mode": false,
- "trace_as_metadata": true,
- "type": "str",
- "value": ""
- },
- "content_field": {
- "_input_type": "StrInput",
- "advanced": true,
- "display_name": "Content Field",
- "dynamic": false,
- "info": "Field to use as the text content field for the vector store.",
- "list": false,
- "list_add_label": "Add More",
- "load_from_db": false,
- "name": "content_field",
- "placeholder": "",
- "required": false,
- "show": true,
- "title_case": false,
- "tool_mode": false,
- "trace_as_metadata": true,
- "type": "str",
- "value": ""
- },
- "database_name": {
- "_input_type": "DropdownInput",
- "advanced": false,
- "combobox": true,
- "dialog_inputs": {
- "fields": {
- "data": {
- "node": {
- "description": "Please allow several minutes for creation to complete.",
- "display_name": "Create new database",
- "field_order": [
- "01_new_database_name",
- "02_cloud_provider",
- "03_region"
- ],
- "name": "create_database",
- "template": {
- "01_new_database_name": {
- "_input_type": "StrInput",
- "advanced": false,
- "display_name": "Name",
- "dynamic": false,
- "info": "Name of the new database to create in Astra DB.",
- "list": false,
- "list_add_label": "Add More",
- "load_from_db": false,
- "name": "new_database_name",
- "placeholder": "",
- "required": true,
- "show": true,
- "title_case": false,
- "tool_mode": false,
- "trace_as_metadata": true,
- "type": "str",
- "value": ""
- },
- "02_cloud_provider": {
- "_input_type": "DropdownInput",
- "advanced": false,
- "combobox": false,
- "dialog_inputs": {},
- "display_name": "Cloud provider",
- "dynamic": false,
- "info": "Cloud provider for the new database.",
- "name": "cloud_provider",
- "options": [
- "Amazon Web Services",
- "Google Cloud Platform",
- "Microsoft Azure"
- ],
- "options_metadata": [],
- "placeholder": "",
- "real_time_refresh": true,
- "required": true,
- "show": true,
- "title_case": false,
- "tool_mode": false,
- "trace_as_metadata": true,
- "type": "str",
- "value": ""
- },
- "03_region": {
- "_input_type": "DropdownInput",
- "advanced": false,
- "combobox": false,
- "dialog_inputs": {},
- "display_name": "Region",
- "dynamic": false,
- "info": "Region for the new database.",
- "name": "region",
- "options": [],
- "options_metadata": [],
- "placeholder": "",
- "required": true,
- "show": true,
- "title_case": false,
- "tool_mode": false,
- "trace_as_metadata": true,
- "type": "str",
- "value": ""
- }
- }
- }
- }
- },
- "functionality": "create"
- },
- "display_name": "Database",
- "dynamic": false,
- "info": "The Database name for the Astra DB instance.",
- "name": "database_name",
- "options": [],
- "options_metadata": [
- {
- "api_endpoint": "https://5b8bb22c-4a38-4f0a-865c-a18ed7590bd1-us-east-2.apps.astra.datastax.com",
- "collections": 5,
- "keyspaces": [
- "default_keyspace",
- "samples_dataflow"
- ],
- "org_id": "260f986d-e65c-4f05-94a3-7cebfcb867a3",
- "status": null
- }
- ],
- "placeholder": "",
- "real_time_refresh": true,
- "refresh_button": true,
- "required": true,
- "show": true,
- "title_case": false,
- "tool_mode": false,
- "trace_as_metadata": true,
- "type": "str",
- "value": ""
- },
- "deletion_field": {
- "_input_type": "StrInput",
- "advanced": true,
- "display_name": "Deletion Based On Field",
- "dynamic": false,
- "info": "When this parameter is provided, documents in the target collection with metadata field values matching the input metadata field value will be deleted before new data is loaded.",
- "list": false,
- "list_add_label": "Add More",
- "load_from_db": false,
- "name": "deletion_field",
- "placeholder": "",
- "required": false,
- "show": true,
- "title_case": false,
- "tool_mode": false,
- "trace_as_metadata": true,
- "type": "str",
- "value": ""
- },
- "embedding_model": {
- "_input_type": "HandleInput",
- "advanced": false,
- "display_name": "Embedding Model",
- "dynamic": false,
- "info": "Specify the Embedding Model. Not required for Astra Vectorize collections.",
- "input_types": [
- "Embeddings"
- ],
- "list": false,
- "list_add_label": "Add More",
- "name": "embedding_model",
- "placeholder": "",
- "required": false,
- "show": true,
- "title_case": false,
- "trace_as_metadata": true,
- "type": "other",
- "value": ""
- },
- "environment": {
- "_input_type": "DropdownInput",
- "advanced": true,
- "combobox": true,
- "dialog_inputs": {},
- "display_name": "Environment",
- "dynamic": false,
- "info": "The environment for the Astra DB API Endpoint.",
- "name": "environment",
- "options": [
- "prod",
- "test",
- "dev"
- ],
- "options_metadata": [],
- "placeholder": "",
- "real_time_refresh": true,
- "required": false,
- "show": true,
- "title_case": false,
- "tool_mode": false,
- "trace_as_metadata": true,
- "type": "str",
- "value": "prod"
- },
- "ignore_invalid_documents": {
- "_input_type": "BoolInput",
- "advanced": true,
- "display_name": "Ignore Invalid Documents",
- "dynamic": false,
- "info": "Boolean flag to determine whether to ignore invalid documents at runtime.",
- "list": false,
- "list_add_label": "Add More",
- "name": "ignore_invalid_documents",
- "placeholder": "",
- "required": false,
- "show": true,
- "title_case": false,
- "tool_mode": false,
- "trace_as_metadata": true,
- "type": "bool",
- "value": false
- },
- "ingest_data": {
- "_input_type": "HandleInput",
- "advanced": false,
- "display_name": "Ingest Data",
- "dynamic": false,
- "info": "",
- "input_types": [
- "Data",
- "DataFrame"
- ],
- "list": true,
- "list_add_label": "Add More",
- "name": "ingest_data",
- "placeholder": "",
- "required": false,
- "show": true,
- "title_case": false,
- "trace_as_metadata": true,
- "type": "other",
- "value": ""
- },
- "keyspace": {
- "_input_type": "DropdownInput",
- "advanced": true,
- "combobox": false,
- "dialog_inputs": {},
- "display_name": "Keyspace",
- "dynamic": false,
- "info": "Optional keyspace within Astra DB to use for the collection.",
- "name": "keyspace",
- "options": [],
- "options_metadata": [],
- "placeholder": "",
- "real_time_refresh": true,
- "required": false,
- "show": true,
- "title_case": false,
- "tool_mode": false,
- "trace_as_metadata": true,
- "type": "str",
- "value": ""
- },
- "lexical_terms": {
- "_input_type": "QueryInput",
- "advanced": true,
- "display_name": "Lexical Terms",
- "dynamic": false,
- "info": "Add additional terms/keywords to augment search precision.",
- "input_types": [
- "Message"
- ],
- "list": false,
- "list_add_label": "Add More",
- "load_from_db": false,
- "name": "lexical_terms",
- "placeholder": "Enter terms to search...",
- "required": false,
- "separator": " ",
- "show": false,
- "title_case": false,
- "tool_mode": false,
- "trace_as_input": true,
- "trace_as_metadata": true,
- "type": "query",
- "value": ""
- },
- "number_of_results": {
- "_input_type": "IntInput",
- "advanced": true,
- "display_name": "Number of Search Results",
- "dynamic": false,
- "info": "Number of search results to return.",
- "list": false,
- "list_add_label": "Add More",
- "name": "number_of_results",
- "placeholder": "",
- "required": false,
- "show": true,
- "title_case": false,
- "tool_mode": false,
- "trace_as_metadata": true,
- "type": "int",
- "value": 4
- },
- "reranker": {
- "_input_type": "DropdownInput",
- "advanced": false,
- "combobox": false,
- "dialog_inputs": {},
- "display_name": "Reranker",
- "dynamic": false,
- "info": "Post-retrieval model that re-scores results for optimal relevance ranking.",
- "name": "reranker",
- "options": [],
- "options_metadata": [],
- "placeholder": "",
- "required": false,
- "show": false,
- "title_case": false,
- "tool_mode": false,
- "trace_as_metadata": true,
- "type": "str",
- "value": ""
- },
- "search_method": {
- "_input_type": "DropdownInput",
- "advanced": true,
- "combobox": false,
- "dialog_inputs": {},
- "display_name": "Search Method",
- "dynamic": false,
- "info": "Determine how your content is matched: Vector finds semantic similarity, and Hybrid Search (suggested) combines both approaches with a reranker.",
- "name": "search_method",
- "options": [
- "Hybrid Search",
- "Vector Search"
- ],
- "options_metadata": [],
- "placeholder": "",
- "real_time_refresh": true,
- "required": false,
- "show": true,
- "title_case": false,
- "tool_mode": false,
- "trace_as_metadata": true,
- "type": "str",
- "value": "Vector Search"
- },
- "search_query": {
- "_input_type": "QueryInput",
- "advanced": false,
- "display_name": "Search Query",
- "dynamic": false,
- "info": "Enter a query to run a similarity search.",
- "input_types": [
- "Message"
- ],
- "list": false,
- "list_add_label": "Add More",
- "load_from_db": false,
- "name": "search_query",
- "placeholder": "Enter a query...",
- "required": false,
- "show": true,
- "title_case": false,
- "tool_mode": true,
- "trace_as_input": true,
- "trace_as_metadata": true,
- "type": "query",
- "value": ""
- },
- "search_score_threshold": {
- "_input_type": "FloatInput",
- "advanced": true,
- "display_name": "Search Score Threshold",
- "dynamic": false,
- "info": "Minimum similarity score threshold for search results. (when using 'Similarity with score threshold')",
- "list": false,
- "list_add_label": "Add More",
- "name": "search_score_threshold",
- "placeholder": "",
- "required": false,
- "show": true,
- "title_case": false,
- "tool_mode": false,
- "trace_as_metadata": true,
- "type": "float",
- "value": 0
- },
- "search_type": {
- "_input_type": "DropdownInput",
- "advanced": true,
- "combobox": false,
- "dialog_inputs": {},
- "display_name": "Search Type",
- "dynamic": false,
- "info": "Search type to use",
- "name": "search_type",
- "options": [
- "Similarity",
- "Similarity with score threshold",
- "MMR (Max Marginal Relevance)"
- ],
- "options_metadata": [],
- "placeholder": "",
- "required": false,
- "show": true,
- "title_case": false,
- "tool_mode": false,
- "trace_as_metadata": true,
- "type": "str",
- "value": "Similarity"
- },
- "should_cache_vector_store": {
- "_input_type": "BoolInput",
- "advanced": true,
- "display_name": "Cache Vector Store",
- "dynamic": false,
- "info": "If True, the vector store will be cached for the current build of the component. This is useful for components that have multiple output methods and want to share the same vector store.",
- "list": false,
- "list_add_label": "Add More",
- "name": "should_cache_vector_store",
- "placeholder": "",
- "required": false,
- "show": true,
- "title_case": false,
- "tool_mode": false,
- "trace_as_metadata": true,
- "type": "bool",
- "value": true
- },
- "token": {
- "_input_type": "SecretStrInput",
- "advanced": false,
- "display_name": "Astra DB Application Token",
- "dynamic": false,
- "info": "Authentication token for accessing Astra DB.",
- "input_types": [],
- "load_from_db": true,
- "name": "token",
- "password": true,
- "placeholder": "",
- "real_time_refresh": true,
- "required": true,
- "show": true,
- "title_case": false,
- "type": "str",
- "value": "ASTRA_DB_APPLICATION_TOKEN"
- }
- },
- "tool_mode": false
- },
- "selected_output": "search_results",
- "showNode": true,
- "type": "AstraDB"
- },
- "dragging": false,
- "id": "AstraDB-W6NB4",
- "measured": {
- "height": 502,
- "width": 320
- },
- "position": {
- "x": 2060.799531746744,
- "y": 1507.872099528214
- },
- "selected": false,
- "type": "genericNode"
- },
- {
- "data": {
- "id": "File-vusZ2",
+ "id": "File-wqFzl",
"node": {
"base_classes": [
"Message"
@@ -4350,8 +2697,33 @@
],
"frozen": false,
"icon": "file-text",
+ "last_updated": "2025-08-27T14:19:16.203Z",
"legacy": false,
- "metadata": {},
+ "metadata": {
+ "code_hash": "23fbe9daca09",
+ "dependencies": {
+ "dependencies": [
+ {
+ "name": "astrapy",
+ "version": "2.0.1"
+ },
+ {
+ "name": "langchain_astradb",
+ "version": "0.6.0"
+ },
+ {
+ "name": "langchain_core",
+ "version": "0.3.75"
+ },
+ {
+ "name": "langflow",
+ "version": null
+ }
+ ],
+ "total_dependencies": 4
+ },
+ "module": "langflow.components.datastax.astradb.AstraDBVectorStoreComponent"
+ },
"minimized": false,
"output_types": [],
"outputs": [
@@ -4593,7 +2965,7 @@
"type": "File"
},
"dragging": false,
- "id": "File-vusZ2",
+ "id": "File-wqFzl",
"measured": {
"height": 230,
"width": 320
@@ -4607,7 +2979,7 @@
},
{
"data": {
- "id": "LanguageModelComponent-1uhUK",
+ "id": "LanguageModelComponent-nQYc0",
"node": {
"base_classes": [
"LanguageModel",
@@ -4631,6 +3003,7 @@
],
"frozen": false,
"icon": "brain-circuit",
+ "last_updated": "2025-08-27T14:19:16.085Z",
"legacy": false,
"metadata": {
"keywords": [
@@ -4886,7 +3259,7 @@
"type": "LanguageModelComponent"
},
"dragging": false,
- "id": "LanguageModelComponent-1uhUK",
+ "id": "LanguageModelComponent-nQYc0",
"measured": {
"height": 451,
"width": 320
@@ -4897,19 +3270,1631 @@
},
"selected": false,
"type": "genericNode"
+ },
+ {
+ "data": {
+ "id": "AstraDB-t8lcj",
+ "node": {
+ "base_classes": [
+ "Data",
+ "DataFrame",
+ "VectorStore"
+ ],
+ "beta": false,
+ "conditional_paths": [],
+ "custom_fields": {},
+ "description": "Ingest and search documents in Astra DB",
+ "display_name": "Astra DB",
+ "documentation": "https://docs.datastax.com/en/langflow/astra-components.html",
+ "edited": false,
+ "field_order": [
+ "token",
+ "environment",
+ "database_name",
+ "api_endpoint",
+ "keyspace",
+ "collection_name",
+ "embedding_model",
+ "ingest_data",
+ "search_query",
+ "should_cache_vector_store",
+ "search_method",
+ "reranker",
+ "lexical_terms",
+ "number_of_results",
+ "search_type",
+ "search_score_threshold",
+ "advanced_search_filter",
+ "autodetect_collection",
+ "content_field",
+ "deletion_field",
+ "ignore_invalid_documents",
+ "astradb_vectorstore_kwargs"
+ ],
+ "frozen": false,
+ "icon": "AstraDB",
+ "last_updated": "2025-08-27T14:19:16.085Z",
+ "legacy": false,
+ "metadata": {
+ "code_hash": "23fbe9daca09",
+ "dependencies": {
+ "dependencies": [
+ {
+ "name": "astrapy",
+ "version": "2.0.1"
+ },
+ {
+ "name": "langchain_astradb",
+ "version": "0.6.0"
+ },
+ {
+ "name": "langchain_core",
+ "version": "0.3.75"
+ },
+ {
+ "name": "langflow",
+ "version": null
+ }
+ ],
+ "total_dependencies": 4
+ },
+ "module": "langflow.components.datastax.astradb_vectorstore.AstraDBVectorStoreComponent"
+ },
+ "minimized": false,
+ "output_types": [],
+ "outputs": [
+ {
+ "allows_loop": false,
+ "cache": true,
+ "display_name": "Search Results",
+ "group_outputs": false,
+ "method": "search_documents",
+ "name": "search_results",
+ "selected": "Data",
+ "tool_mode": true,
+ "types": [
+ "Data"
+ ],
+ "value": "__UNDEFINED__"
+ },
+ {
+ "allows_loop": false,
+ "cache": true,
+ "display_name": "DataFrame",
+ "group_outputs": false,
+ "method": "as_dataframe",
+ "name": "dataframe",
+ "selected": "DataFrame",
+ "tool_mode": true,
+ "types": [
+ "DataFrame"
+ ],
+ "value": "__UNDEFINED__"
+ },
+ {
+ "allows_loop": false,
+ "cache": true,
+ "display_name": "Vector Store Connection",
+ "group_outputs": false,
+ "hidden": true,
+ "method": "as_vector_store",
+ "name": "vectorstoreconnection",
+ "selected": "VectorStore",
+ "tool_mode": true,
+ "types": [
+ "VectorStore"
+ ],
+ "value": "__UNDEFINED__"
+ }
+ ],
+ "pinned": false,
+ "template": {
+ "_type": "Component",
+ "advanced_search_filter": {
+ "_input_type": "NestedDictInput",
+ "advanced": true,
+ "display_name": "Search Metadata Filter",
+ "dynamic": false,
+ "info": "Optional dictionary of filters to apply to the search query.",
+ "list": false,
+ "list_add_label": "Add More",
+ "name": "advanced_search_filter",
+ "placeholder": "",
+ "required": false,
+ "show": true,
+ "title_case": false,
+ "tool_mode": false,
+ "trace_as_input": true,
+ "trace_as_metadata": true,
+ "type": "NestedDict",
+ "value": {}
+ },
+ "api_endpoint": {
+ "_input_type": "DropdownInput",
+ "advanced": true,
+ "combobox": false,
+ "dialog_inputs": {},
+ "display_name": "Astra DB API Endpoint",
+ "dynamic": false,
+ "info": "The API Endpoint for the Astra DB instance. Supercedes database selection.",
+ "name": "api_endpoint",
+ "options": [],
+ "options_metadata": [],
+ "placeholder": "",
+ "required": false,
+ "show": true,
+ "title_case": false,
+ "toggle": false,
+ "tool_mode": false,
+ "trace_as_metadata": true,
+ "type": "str",
+ "value": ""
+ },
+ "astradb_vectorstore_kwargs": {
+ "_input_type": "NestedDictInput",
+ "advanced": true,
+ "display_name": "AstraDBVectorStore Parameters",
+ "dynamic": false,
+ "info": "Optional dictionary of additional parameters for the AstraDBVectorStore.",
+ "list": false,
+ "list_add_label": "Add More",
+ "name": "astradb_vectorstore_kwargs",
+ "placeholder": "",
+ "required": false,
+ "show": true,
+ "title_case": false,
+ "tool_mode": false,
+ "trace_as_input": true,
+ "trace_as_metadata": true,
+ "type": "NestedDict",
+ "value": {}
+ },
+ "autodetect_collection": {
+ "_input_type": "BoolInput",
+ "advanced": true,
+ "display_name": "Autodetect Collection",
+ "dynamic": false,
+ "info": "Boolean flag to determine whether to autodetect the collection.",
+ "list": false,
+ "list_add_label": "Add More",
+ "name": "autodetect_collection",
+ "placeholder": "",
+ "required": false,
+ "show": true,
+ "title_case": false,
+ "tool_mode": false,
+ "trace_as_metadata": true,
+ "type": "bool",
+ "value": true
+ },
+ "code": {
+ "advanced": true,
+ "dynamic": true,
+ "fileTypes": [],
+ "file_path": "",
+ "info": "",
+ "list": false,
+ "load_from_db": false,
+ "multiline": true,
+ "name": "code",
+ "password": false,
+ "placeholder": "",
+ "required": true,
+ "show": true,
+ "title_case": false,
+ "type": "code",
+ "value": "import re\nfrom collections import defaultdict\nfrom dataclasses import asdict, dataclass, field\n\nfrom astrapy import DataAPIClient, Database\nfrom astrapy.data.info.reranking import RerankServiceOptions\nfrom astrapy.info import CollectionDescriptor, CollectionLexicalOptions, CollectionRerankOptions\nfrom langchain_astradb import AstraDBVectorStore, VectorServiceOptions\nfrom langchain_astradb.utils.astradb import HybridSearchMode, _AstraDBCollectionEnvironment\nfrom langchain_core.documents import Document\n\nfrom langflow.base.vectorstores.model import LCVectorStoreComponent, check_cached_vector_store\nfrom langflow.base.vectorstores.vector_store_connection_decorator import vector_store_connection\nfrom langflow.helpers.data import docs_to_data\nfrom langflow.inputs.inputs import FloatInput, NestedDictInput\nfrom langflow.io import (\n BoolInput,\n DropdownInput,\n HandleInput,\n IntInput,\n QueryInput,\n SecretStrInput,\n StrInput,\n)\nfrom langflow.schema.data import Data\nfrom langflow.serialization import serialize\nfrom langflow.utils.version import get_version_info\n\n\n@vector_store_connection\nclass AstraDBVectorStoreComponent(LCVectorStoreComponent):\n display_name: str = \"Astra DB\"\n description: str = \"Ingest and search documents in Astra DB\"\n documentation: str = \"https://docs.datastax.com/en/langflow/astra-components.html\"\n name = \"AstraDB\"\n icon: str = \"AstraDB\"\n\n _cached_vector_store: AstraDBVectorStore | None = None\n\n @dataclass\n class NewDatabaseInput:\n functionality: str = \"create\"\n fields: dict[str, dict] = field(\n default_factory=lambda: {\n \"data\": {\n \"node\": {\n \"name\": \"create_database\",\n \"description\": \"Please allow several minutes for creation to complete.\",\n \"display_name\": \"Create new database\",\n \"field_order\": [\"01_new_database_name\", \"02_cloud_provider\", \"03_region\"],\n \"template\": {\n \"01_new_database_name\": StrInput(\n name=\"new_database_name\",\n display_name=\"Name\",\n info=\"Name of the new database to create in Astra DB.\",\n required=True,\n ),\n \"02_cloud_provider\": DropdownInput(\n name=\"cloud_provider\",\n display_name=\"Cloud provider\",\n info=\"Cloud provider for the new database.\",\n options=[],\n required=True,\n real_time_refresh=True,\n ),\n \"03_region\": DropdownInput(\n name=\"region\",\n display_name=\"Region\",\n info=\"Region for the new database.\",\n options=[],\n required=True,\n ),\n },\n },\n }\n }\n )\n\n @dataclass\n class NewCollectionInput:\n functionality: str = \"create\"\n fields: dict[str, dict] = field(\n default_factory=lambda: {\n \"data\": {\n \"node\": {\n \"name\": \"create_collection\",\n \"description\": \"Please allow several seconds for creation to complete.\",\n \"display_name\": \"Create new collection\",\n \"field_order\": [\n \"01_new_collection_name\",\n \"02_embedding_generation_provider\",\n \"03_embedding_generation_model\",\n \"04_dimension\",\n ],\n \"template\": {\n \"01_new_collection_name\": StrInput(\n name=\"new_collection_name\",\n display_name=\"Name\",\n info=\"Name of the new collection to create in Astra DB.\",\n required=True,\n ),\n \"02_embedding_generation_provider\": DropdownInput(\n name=\"embedding_generation_provider\",\n display_name=\"Embedding generation method\",\n info=\"Provider to use for generating embeddings.\",\n helper_text=(\n \"To create collections with more embedding provider options, go to \"\n 'your database in Astra DB'\n ),\n real_time_refresh=True,\n required=True,\n options=[],\n ),\n \"03_embedding_generation_model\": DropdownInput(\n name=\"embedding_generation_model\",\n display_name=\"Embedding model\",\n info=\"Model to use for generating embeddings.\",\n real_time_refresh=True,\n options=[],\n ),\n \"04_dimension\": IntInput(\n name=\"dimension\",\n display_name=\"Dimensions\",\n info=\"Dimensions of the embeddings to generate.\",\n value=None,\n ),\n },\n },\n }\n }\n )\n\n inputs = [\n SecretStrInput(\n name=\"token\",\n display_name=\"Astra DB Application Token\",\n info=\"Authentication token for accessing Astra DB.\",\n value=\"ASTRA_DB_APPLICATION_TOKEN\",\n required=True,\n real_time_refresh=True,\n input_types=[],\n ),\n DropdownInput(\n name=\"environment\",\n display_name=\"Environment\",\n info=\"The environment for the Astra DB API Endpoint.\",\n options=[\"prod\", \"test\", \"dev\"],\n value=\"prod\",\n advanced=True,\n real_time_refresh=True,\n combobox=True,\n ),\n DropdownInput(\n name=\"database_name\",\n display_name=\"Database\",\n info=\"The Database name for the Astra DB instance.\",\n required=True,\n refresh_button=True,\n real_time_refresh=True,\n dialog_inputs=asdict(NewDatabaseInput()),\n combobox=True,\n ),\n DropdownInput(\n name=\"api_endpoint\",\n display_name=\"Astra DB API Endpoint\",\n info=\"The API Endpoint for the Astra DB instance. Supercedes database selection.\",\n advanced=True,\n ),\n DropdownInput(\n name=\"keyspace\",\n display_name=\"Keyspace\",\n info=\"Optional keyspace within Astra DB to use for the collection.\",\n advanced=True,\n options=[],\n real_time_refresh=True,\n ),\n DropdownInput(\n name=\"collection_name\",\n display_name=\"Collection\",\n info=\"The name of the collection within Astra DB where the vectors will be stored.\",\n required=True,\n refresh_button=True,\n real_time_refresh=True,\n dialog_inputs=asdict(NewCollectionInput()),\n combobox=True,\n show=False,\n ),\n HandleInput(\n name=\"embedding_model\",\n display_name=\"Embedding Model\",\n input_types=[\"Embeddings\"],\n info=\"Specify the Embedding Model. Not required for Astra Vectorize collections.\",\n required=False,\n show=False,\n ),\n *LCVectorStoreComponent.inputs,\n DropdownInput(\n name=\"search_method\",\n display_name=\"Search Method\",\n info=(\n \"Determine how your content is matched: Vector finds semantic similarity, \"\n \"and Hybrid Search (suggested) combines both approaches \"\n \"with a reranker.\"\n ),\n options=[\"Hybrid Search\", \"Vector Search\"], # TODO: Restore Lexical Search?\n options_metadata=[{\"icon\": \"SearchHybrid\"}, {\"icon\": \"SearchVector\"}],\n value=\"Vector Search\",\n advanced=True,\n real_time_refresh=True,\n ),\n DropdownInput(\n name=\"reranker\",\n display_name=\"Reranker\",\n info=\"Post-retrieval model that re-scores results for optimal relevance ranking.\",\n show=False,\n toggle=True,\n ),\n QueryInput(\n name=\"lexical_terms\",\n display_name=\"Lexical Terms\",\n info=\"Add additional terms/keywords to augment search precision.\",\n placeholder=\"Enter terms to search...\",\n separator=\" \",\n show=False,\n value=\"\",\n ),\n IntInput(\n name=\"number_of_results\",\n display_name=\"Number of Search Results\",\n info=\"Number of search results to return.\",\n advanced=True,\n value=4,\n ),\n DropdownInput(\n name=\"search_type\",\n display_name=\"Search Type\",\n info=\"Search type to use\",\n options=[\"Similarity\", \"Similarity with score threshold\", \"MMR (Max Marginal Relevance)\"],\n value=\"Similarity\",\n advanced=True,\n ),\n FloatInput(\n name=\"search_score_threshold\",\n display_name=\"Search Score Threshold\",\n info=\"Minimum similarity score threshold for search results. \"\n \"(when using 'Similarity with score threshold')\",\n value=0,\n advanced=True,\n ),\n NestedDictInput(\n name=\"advanced_search_filter\",\n display_name=\"Search Metadata Filter\",\n info=\"Optional dictionary of filters to apply to the search query.\",\n advanced=True,\n ),\n BoolInput(\n name=\"autodetect_collection\",\n display_name=\"Autodetect Collection\",\n info=\"Boolean flag to determine whether to autodetect the collection.\",\n advanced=True,\n value=True,\n ),\n StrInput(\n name=\"content_field\",\n display_name=\"Content Field\",\n info=\"Field to use as the text content field for the vector store.\",\n advanced=True,\n ),\n StrInput(\n name=\"deletion_field\",\n display_name=\"Deletion Based On Field\",\n info=\"When this parameter is provided, documents in the target collection with \"\n \"metadata field values matching the input metadata field value will be deleted \"\n \"before new data is loaded.\",\n advanced=True,\n ),\n BoolInput(\n name=\"ignore_invalid_documents\",\n display_name=\"Ignore Invalid Documents\",\n info=\"Boolean flag to determine whether to ignore invalid documents at runtime.\",\n advanced=True,\n ),\n NestedDictInput(\n name=\"astradb_vectorstore_kwargs\",\n display_name=\"AstraDBVectorStore Parameters\",\n info=\"Optional dictionary of additional parameters for the AstraDBVectorStore.\",\n advanced=True,\n ),\n ]\n\n @classmethod\n def map_cloud_providers(cls):\n # TODO: Programmatically fetch the regions for each cloud provider\n return {\n \"dev\": {\n \"Amazon Web Services\": {\n \"id\": \"aws\",\n \"regions\": [\"us-west-2\"],\n },\n \"Google Cloud Platform\": {\n \"id\": \"gcp\",\n \"regions\": [\"us-central1\", \"europe-west4\"],\n },\n },\n \"test\": {\n \"Google Cloud Platform\": {\n \"id\": \"gcp\",\n \"regions\": [\"us-central1\"],\n },\n },\n \"prod\": {\n \"Amazon Web Services\": {\n \"id\": \"aws\",\n \"regions\": [\"us-east-2\", \"ap-south-1\", \"eu-west-1\"],\n },\n \"Google Cloud Platform\": {\n \"id\": \"gcp\",\n \"regions\": [\"us-east1\"],\n },\n \"Microsoft Azure\": {\n \"id\": \"azure\",\n \"regions\": [\"westus3\"],\n },\n },\n }\n\n @classmethod\n def get_vectorize_providers(cls, token: str, environment: str | None = None, api_endpoint: str | None = None):\n try:\n # Get the admin object\n client = DataAPIClient(environment=environment)\n admin_client = client.get_admin()\n db_admin = admin_client.get_database_admin(api_endpoint, token=token)\n\n # Get the list of embedding providers\n embedding_providers = db_admin.find_embedding_providers()\n\n vectorize_providers_mapping = {}\n # Map the provider display name to the provider key and models\n for provider_key, provider_data in embedding_providers.embedding_providers.items():\n # Get the provider display name and models\n display_name = provider_data.display_name\n models = [model.name for model in provider_data.models]\n\n # Build our mapping\n vectorize_providers_mapping[display_name] = [provider_key, models]\n\n # Sort the resulting dictionary\n return defaultdict(list, dict(sorted(vectorize_providers_mapping.items())))\n except Exception as _: # noqa: BLE001\n return {}\n\n @classmethod\n async def create_database_api(\n cls,\n new_database_name: str,\n cloud_provider: str,\n region: str,\n token: str,\n environment: str | None = None,\n keyspace: str | None = None,\n ):\n client = DataAPIClient(environment=environment)\n\n # Get the admin object\n admin_client = client.get_admin(token=token)\n\n # Get the environment, set to prod if null like\n my_env = environment or \"prod\"\n\n # Raise a value error if name isn't provided\n if not new_database_name:\n msg = \"Database name is required to create a new database.\"\n raise ValueError(msg)\n\n # Call the create database function\n return await admin_client.async_create_database(\n name=new_database_name,\n cloud_provider=cls.map_cloud_providers()[my_env][cloud_provider][\"id\"],\n region=region,\n keyspace=keyspace,\n wait_until_active=False,\n )\n\n @classmethod\n async def create_collection_api(\n cls,\n new_collection_name: str,\n token: str,\n api_endpoint: str,\n environment: str | None = None,\n keyspace: str | None = None,\n dimension: int | None = None,\n embedding_generation_provider: str | None = None,\n embedding_generation_model: str | None = None,\n reranker: str | None = None,\n ):\n # Build vectorize options, if needed\n vectorize_options = None\n if not dimension:\n providers = cls.get_vectorize_providers(token=token, environment=environment, api_endpoint=api_endpoint)\n vectorize_options = VectorServiceOptions(\n provider=providers.get(embedding_generation_provider, [None, []])[0],\n model_name=embedding_generation_model,\n )\n\n # Raise a value error if name isn't provided\n if not new_collection_name:\n msg = \"Collection name is required to create a new collection.\"\n raise ValueError(msg)\n\n # Define the base arguments being passed to the create collection function\n base_args = {\n \"collection_name\": new_collection_name,\n \"token\": token,\n \"api_endpoint\": api_endpoint,\n \"keyspace\": keyspace,\n \"environment\": environment,\n \"embedding_dimension\": dimension,\n \"collection_vector_service_options\": vectorize_options,\n }\n\n # Add optional arguments if the reranker is set\n if reranker:\n # Split the reranker field into a provider a model name\n provider, _ = reranker.split(\"/\")\n base_args[\"collection_rerank\"] = CollectionRerankOptions(\n service=RerankServiceOptions(provider=provider, model_name=reranker),\n )\n base_args[\"collection_lexical\"] = CollectionLexicalOptions(analyzer=\"STANDARD\")\n\n _AstraDBCollectionEnvironment(**base_args)\n\n @classmethod\n def get_database_list_static(cls, token: str, environment: str | None = None):\n client = DataAPIClient(environment=environment)\n\n # Get the admin object\n admin_client = client.get_admin(token=token)\n\n # Get the list of databases\n db_list = admin_client.list_databases()\n\n # Generate the api endpoint for each database\n db_info_dict = {}\n for db in db_list:\n try:\n # Get the API endpoint for the database\n api_endpoints = [db_reg.api_endpoint for db_reg in db.regions]\n\n # Get the number of collections\n try:\n # Get the number of collections in the database\n num_collections = len(\n client.get_database(\n api_endpoints[0],\n token=token,\n ).list_collection_names()\n )\n except Exception: # noqa: BLE001\n if db.status != \"PENDING\":\n continue\n num_collections = 0\n\n # Add the database to the dictionary\n db_info_dict[db.name] = {\n \"api_endpoints\": api_endpoints,\n \"keyspaces\": db.keyspaces,\n \"collections\": num_collections,\n \"status\": db.status if db.status != \"ACTIVE\" else None,\n \"org_id\": db.org_id if db.org_id else None,\n }\n except Exception: # noqa: BLE001, S110\n pass\n\n return db_info_dict\n\n def get_database_list(self):\n return self.get_database_list_static(\n token=self.token,\n environment=self.environment,\n )\n\n @classmethod\n def get_api_endpoint_static(\n cls,\n token: str,\n environment: str | None = None,\n api_endpoint: str | None = None,\n database_name: str | None = None,\n ):\n # If the api_endpoint is set, return it\n if api_endpoint:\n return api_endpoint\n\n # Check if the database_name is like a url\n if database_name and database_name.startswith(\"https://\"):\n return database_name\n\n # If the database is not set, nothing we can do.\n if not database_name:\n return None\n\n # Grab the database object\n db = cls.get_database_list_static(token=token, environment=environment).get(database_name)\n if not db:\n return None\n\n # Otherwise, get the URL from the database list\n endpoints = db.get(\"api_endpoints\") or []\n return endpoints[0] if endpoints else None\n\n def get_api_endpoint(self):\n return self.get_api_endpoint_static(\n token=self.token,\n environment=self.environment,\n api_endpoint=self.api_endpoint,\n database_name=self.database_name,\n )\n\n @classmethod\n def get_database_id_static(cls, api_endpoint: str) -> str | None:\n # Pattern matches standard UUID format: 8-4-4-4-12 hexadecimal characters\n uuid_pattern = r\"[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}\"\n match = re.search(uuid_pattern, api_endpoint)\n\n return match.group(0) if match else None\n\n def get_database_id(self):\n return self.get_database_id_static(api_endpoint=self.get_api_endpoint())\n\n def get_keyspace(self):\n keyspace = self.keyspace\n\n if keyspace:\n return keyspace.strip()\n\n return \"default_keyspace\"\n\n def get_database_object(self, api_endpoint: str | None = None):\n try:\n client = DataAPIClient(environment=self.environment)\n\n return client.get_database(\n api_endpoint or self.get_api_endpoint(),\n token=self.token,\n keyspace=self.get_keyspace(),\n )\n except Exception as e:\n msg = f\"Error fetching database object: {e}\"\n raise ValueError(msg) from e\n\n def collection_data(self, collection_name: str, database: Database | None = None):\n try:\n if not database:\n client = DataAPIClient(environment=self.environment)\n\n database = client.get_database(\n self.get_api_endpoint(),\n token=self.token,\n keyspace=self.get_keyspace(),\n )\n\n collection = database.get_collection(collection_name)\n\n return collection.estimated_document_count()\n except Exception as e: # noqa: BLE001\n self.log(f\"Error checking collection data: {e}\")\n\n return None\n\n def _initialize_database_options(self):\n try:\n return [\n {\n \"name\": name,\n \"status\": info[\"status\"],\n \"collections\": info[\"collections\"],\n \"api_endpoints\": info[\"api_endpoints\"],\n \"keyspaces\": info[\"keyspaces\"],\n \"org_id\": info[\"org_id\"],\n }\n for name, info in self.get_database_list().items()\n ]\n except Exception as e:\n msg = f\"Error fetching database options: {e}\"\n raise ValueError(msg) from e\n\n @classmethod\n def get_provider_icon(cls, collection: CollectionDescriptor | None = None, provider_name: str | None = None) -> str:\n # Get the provider name from the collection\n provider_name = provider_name or (\n collection.definition.vector.service.provider\n if (\n collection\n and collection.definition\n and collection.definition.vector\n and collection.definition.vector.service\n )\n else None\n )\n\n # If there is no provider, use the vector store icon\n if not provider_name or provider_name.lower() == \"bring your own\":\n return \"vectorstores\"\n\n # Map provider casings\n case_map = {\n \"nvidia\": \"NVIDIA\",\n \"openai\": \"OpenAI\",\n \"amazon bedrock\": \"AmazonBedrockEmbeddings\",\n \"azure openai\": \"AzureOpenAiEmbeddings\",\n \"cohere\": \"Cohere\",\n \"jina ai\": \"JinaAI\",\n \"mistral ai\": \"MistralAI\",\n \"upstage\": \"Upstage\",\n \"voyage ai\": \"VoyageAI\",\n }\n\n # Adjust the casing on some like nvidia\n return case_map[provider_name.lower()] if provider_name.lower() in case_map else provider_name.title()\n\n def _initialize_collection_options(self, api_endpoint: str | None = None):\n # Nothing to generate if we don't have an API endpoint yet\n api_endpoint = api_endpoint or self.get_api_endpoint()\n if not api_endpoint:\n return []\n\n # Retrieve the database object\n database = self.get_database_object(api_endpoint=api_endpoint)\n\n # Get the list of collections\n collection_list = database.list_collections(keyspace=self.get_keyspace())\n\n # Return the list of collections and metadata associated\n return [\n {\n \"name\": col.name,\n \"records\": self.collection_data(collection_name=col.name, database=database),\n \"provider\": (\n col.definition.vector.service.provider\n if col.definition.vector and col.definition.vector.service\n else None\n ),\n \"icon\": self.get_provider_icon(collection=col),\n \"model\": (\n col.definition.vector.service.model_name\n if col.definition.vector and col.definition.vector.service\n else None\n ),\n }\n for col in collection_list\n ]\n\n def reset_provider_options(self, build_config: dict) -> dict:\n \"\"\"Reset provider options and related configurations in the build_config dictionary.\"\"\"\n # Extract template path for cleaner access\n template = build_config[\"collection_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"]\n\n # Get vectorize providers\n vectorize_providers_api = self.get_vectorize_providers(\n token=self.token,\n environment=self.environment,\n api_endpoint=build_config[\"api_endpoint\"][\"value\"],\n )\n\n # Create a new dictionary with \"Bring your own\" first\n vectorize_providers: dict[str, list[list[str]]] = {\"Bring your own\": [[], []]}\n\n # Add the remaining items (only Nvidia) from the original dictionary\n vectorize_providers.update(\n {\n k: v\n for k, v in vectorize_providers_api.items()\n if k.lower() in [\"nvidia\"] # TODO: Eventually support more\n }\n )\n\n # Set provider options\n provider_field = \"02_embedding_generation_provider\"\n template[provider_field][\"options\"] = list(vectorize_providers.keys())\n\n # Add metadata for each provider option\n template[provider_field][\"options_metadata\"] = [\n {\"icon\": self.get_provider_icon(provider_name=provider)} for provider in template[provider_field][\"options\"]\n ]\n\n # Get selected embedding provider\n embedding_provider = template[provider_field][\"value\"]\n is_bring_your_own = embedding_provider and embedding_provider == \"Bring your own\"\n\n # Configure embedding model field\n model_field = \"03_embedding_generation_model\"\n template[model_field].update(\n {\n \"options\": vectorize_providers.get(embedding_provider, [[], []])[1],\n \"placeholder\": \"Bring your own\" if is_bring_your_own else None,\n \"readonly\": is_bring_your_own,\n \"required\": not is_bring_your_own,\n \"value\": None,\n }\n )\n\n # If this is a bring your own, set dimensions to 0\n return self.reset_dimension_field(build_config)\n\n def reset_dimension_field(self, build_config: dict) -> dict:\n \"\"\"Reset dimension field options based on provided configuration.\"\"\"\n # Extract template path for cleaner access\n template = build_config[\"collection_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"]\n\n # Get selected embedding model\n provider_field = \"02_embedding_generation_provider\"\n embedding_provider = template[provider_field][\"value\"]\n is_bring_your_own = embedding_provider and embedding_provider == \"Bring your own\"\n\n # Configure dimension field\n dimension_field = \"04_dimension\"\n dimension_value = 1024 if not is_bring_your_own else None # TODO: Dynamically figure this out\n template[dimension_field].update(\n {\n \"placeholder\": dimension_value,\n \"value\": dimension_value,\n \"readonly\": not is_bring_your_own,\n \"required\": is_bring_your_own,\n }\n )\n\n return build_config\n\n def reset_collection_list(self, build_config: dict) -> dict:\n \"\"\"Reset collection list options based on provided configuration.\"\"\"\n # Get collection options\n collection_options = self._initialize_collection_options(api_endpoint=build_config[\"api_endpoint\"][\"value\"])\n # Update collection configuration\n collection_config = build_config[\"collection_name\"]\n collection_config.update(\n {\n \"options\": [col[\"name\"] for col in collection_options],\n \"options_metadata\": [{k: v for k, v in col.items() if k != \"name\"} for col in collection_options],\n }\n )\n\n # Reset selected collection if not in options\n if collection_config[\"value\"] not in collection_config[\"options\"]:\n collection_config[\"value\"] = \"\"\n\n # Set advanced status based on database selection\n collection_config[\"show\"] = bool(build_config[\"database_name\"][\"value\"])\n\n return build_config\n\n def reset_database_list(self, build_config: dict) -> dict:\n \"\"\"Reset database list options and related configurations.\"\"\"\n # Get database options\n database_options = self._initialize_database_options()\n\n # Update cloud provider options\n env = self.environment\n template = build_config[\"database_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"]\n template[\"02_cloud_provider\"][\"options\"] = list(self.map_cloud_providers()[env].keys())\n\n # Update database configuration\n database_config = build_config[\"database_name\"]\n database_config.update(\n {\n \"options\": [db[\"name\"] for db in database_options],\n \"options_metadata\": [{k: v for k, v in db.items() if k != \"name\"} for db in database_options],\n }\n )\n\n # Reset selections if value not in options\n if database_config[\"value\"] not in database_config[\"options\"]:\n database_config[\"value\"] = \"\"\n build_config[\"api_endpoint\"][\"options\"] = []\n build_config[\"api_endpoint\"][\"value\"] = \"\"\n build_config[\"collection_name\"][\"show\"] = False\n\n # Set advanced status based on token presence\n database_config[\"show\"] = bool(build_config[\"token\"][\"value\"])\n\n return build_config\n\n def reset_build_config(self, build_config: dict) -> dict:\n \"\"\"Reset all build configuration options to default empty state.\"\"\"\n # Reset database configuration\n database_config = build_config[\"database_name\"]\n database_config.update({\"options\": [], \"options_metadata\": [], \"value\": \"\", \"show\": False})\n build_config[\"api_endpoint\"][\"options\"] = []\n build_config[\"api_endpoint\"][\"value\"] = \"\"\n\n # Reset collection configuration\n collection_config = build_config[\"collection_name\"]\n collection_config.update({\"options\": [], \"options_metadata\": [], \"value\": \"\", \"show\": False})\n\n return build_config\n\n def _handle_hybrid_search_options(self, build_config: dict) -> dict:\n \"\"\"Set hybrid search options in the build configuration.\"\"\"\n # Detect what hybrid options are available\n # Get the admin object\n client = DataAPIClient(environment=self.environment)\n admin_client = client.get_admin()\n db_admin = admin_client.get_database_admin(self.get_api_endpoint(), token=self.token)\n\n # We will try to get the reranking providers to see if its hybrid emabled\n try:\n providers = db_admin.find_reranking_providers()\n build_config[\"reranker\"][\"options\"] = [\n model.name for provider_data in providers.reranking_providers.values() for model in provider_data.models\n ]\n build_config[\"reranker\"][\"options_metadata\"] = [\n {\"icon\": self.get_provider_icon(provider_name=model.name.split(\"/\")[0])}\n for provider in providers.reranking_providers.values()\n for model in provider.models\n ]\n build_config[\"reranker\"][\"value\"] = build_config[\"reranker\"][\"options\"][0]\n\n # Set the default search field to hybrid search\n build_config[\"search_method\"][\"show\"] = True\n build_config[\"search_method\"][\"options\"] = [\"Hybrid Search\", \"Vector Search\"]\n build_config[\"search_method\"][\"value\"] = \"Hybrid Search\"\n except Exception as _: # noqa: BLE001\n build_config[\"reranker\"][\"options\"] = []\n build_config[\"reranker\"][\"options_metadata\"] = []\n\n # Set the default search field to vector search\n build_config[\"search_method\"][\"show\"] = False\n build_config[\"search_method\"][\"options\"] = [\"Vector Search\"]\n build_config[\"search_method\"][\"value\"] = \"Vector Search\"\n\n return build_config\n\n async def update_build_config(self, build_config: dict, field_value: str, field_name: str | None = None) -> dict:\n \"\"\"Update build configuration based on field name and value.\"\"\"\n # Early return if no token provided\n if not self.token:\n return self.reset_build_config(build_config)\n\n # Database creation callback\n if field_name == \"database_name\" and isinstance(field_value, dict):\n if \"01_new_database_name\" in field_value:\n await self._create_new_database(build_config, field_value)\n return self.reset_collection_list(build_config)\n return self._update_cloud_regions(build_config, field_value)\n\n # Collection creation callback\n if field_name == \"collection_name\" and isinstance(field_value, dict):\n # Case 1: New collection creation\n if \"01_new_collection_name\" in field_value:\n await self._create_new_collection(build_config, field_value)\n return build_config\n\n # Case 2: Update embedding provider options\n if \"02_embedding_generation_provider\" in field_value:\n return self.reset_provider_options(build_config)\n\n # Case 3: Update dimension field\n if \"03_embedding_generation_model\" in field_value:\n return self.reset_dimension_field(build_config)\n\n # Initial execution or token/environment change\n first_run = field_name == \"collection_name\" and not field_value and not build_config[\"database_name\"][\"options\"]\n if first_run or field_name in {\"token\", \"environment\"}:\n return self.reset_database_list(build_config)\n\n # Database selection change\n if field_name == \"database_name\" and not isinstance(field_value, dict):\n return self._handle_database_selection(build_config, field_value)\n\n # Keyspace selection change\n if field_name == \"keyspace\":\n return self.reset_collection_list(build_config)\n\n # Collection selection change\n if field_name == \"collection_name\" and not isinstance(field_value, dict):\n return self._handle_collection_selection(build_config, field_value)\n\n # Search method selection change\n if field_name == \"search_method\":\n is_vector_search = field_value == \"Vector Search\"\n is_autodetect = build_config[\"autodetect_collection\"][\"value\"]\n\n # Configure lexical terms (same for both cases)\n build_config[\"lexical_terms\"][\"show\"] = not is_vector_search\n build_config[\"lexical_terms\"][\"value\"] = \"\" if is_vector_search else build_config[\"lexical_terms\"][\"value\"]\n\n # Disable reranker disabling if hybrid search is selected\n build_config[\"reranker\"][\"show\"] = not is_vector_search\n build_config[\"reranker\"][\"toggle_disable\"] = not is_vector_search\n build_config[\"reranker\"][\"toggle_value\"] = True\n build_config[\"reranker\"][\"value\"] = build_config[\"reranker\"][\"options\"][0]\n\n # Toggle search type and score threshold based on search method\n build_config[\"search_type\"][\"show\"] = is_vector_search\n build_config[\"search_score_threshold\"][\"show\"] = is_vector_search\n\n # Make sure the search_type is set to \"Similarity\"\n if not is_vector_search or is_autodetect:\n build_config[\"search_type\"][\"value\"] = \"Similarity\"\n\n return build_config\n\n async def _create_new_database(self, build_config: dict, field_value: dict) -> None:\n \"\"\"Create a new database and update build config options.\"\"\"\n try:\n await self.create_database_api(\n new_database_name=field_value[\"01_new_database_name\"],\n token=self.token,\n keyspace=self.get_keyspace(),\n environment=self.environment,\n cloud_provider=field_value[\"02_cloud_provider\"],\n region=field_value[\"03_region\"],\n )\n except Exception as e:\n msg = f\"Error creating database: {e}\"\n raise ValueError(msg) from e\n\n build_config[\"database_name\"][\"options\"].append(field_value[\"01_new_database_name\"])\n build_config[\"database_name\"][\"options_metadata\"].append(\n {\n \"status\": \"PENDING\",\n \"collections\": 0,\n \"api_endpoints\": [],\n \"keyspaces\": [self.get_keyspace()],\n \"org_id\": None,\n }\n )\n\n def _update_cloud_regions(self, build_config: dict, field_value: dict) -> dict:\n \"\"\"Update cloud provider regions in build config.\"\"\"\n env = self.environment\n cloud_provider = field_value[\"02_cloud_provider\"]\n\n # Update the region options based on the selected cloud provider\n template = build_config[\"database_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"]\n template[\"03_region\"][\"options\"] = self.map_cloud_providers()[env][cloud_provider][\"regions\"]\n\n # Reset the the 03_region value if it's not in the new options\n if template[\"03_region\"][\"value\"] not in template[\"03_region\"][\"options\"]:\n template[\"03_region\"][\"value\"] = None\n\n return build_config\n\n async def _create_new_collection(self, build_config: dict, field_value: dict) -> None:\n \"\"\"Create a new collection and update build config options.\"\"\"\n embedding_provider = field_value.get(\"02_embedding_generation_provider\")\n try:\n await self.create_collection_api(\n new_collection_name=field_value[\"01_new_collection_name\"],\n token=self.token,\n api_endpoint=build_config[\"api_endpoint\"][\"value\"],\n environment=self.environment,\n keyspace=self.get_keyspace(),\n dimension=field_value.get(\"04_dimension\") if embedding_provider == \"Bring your own\" else None,\n embedding_generation_provider=embedding_provider,\n embedding_generation_model=field_value.get(\"03_embedding_generation_model\"),\n reranker=self.reranker,\n )\n except Exception as e:\n msg = f\"Error creating collection: {e}\"\n raise ValueError(msg) from e\n\n provider = embedding_provider.lower() if embedding_provider and embedding_provider != \"Bring your own\" else None\n build_config[\"collection_name\"].update(\n {\n \"value\": field_value[\"01_new_collection_name\"],\n \"options\": build_config[\"collection_name\"][\"options\"] + [field_value[\"01_new_collection_name\"]],\n }\n )\n build_config[\"embedding_model\"][\"show\"] = not bool(provider)\n build_config[\"embedding_model\"][\"required\"] = not bool(provider)\n build_config[\"collection_name\"][\"options_metadata\"].append(\n {\n \"records\": 0,\n \"provider\": provider,\n \"icon\": self.get_provider_icon(provider_name=provider),\n \"model\": field_value.get(\"03_embedding_generation_model\"),\n }\n )\n\n # Make sure we always show the reranker options if the collection is hybrid enabled\n # And right now they always are\n build_config[\"lexical_terms\"][\"show\"] = True\n\n def _handle_database_selection(self, build_config: dict, field_value: str) -> dict:\n \"\"\"Handle database selection and update related configurations.\"\"\"\n build_config = self.reset_database_list(build_config)\n\n # Reset collection list if database selection changes\n if field_value not in build_config[\"database_name\"][\"options\"]:\n build_config[\"database_name\"][\"value\"] = \"\"\n return build_config\n\n # Get the api endpoint for the selected database\n index = build_config[\"database_name\"][\"options\"].index(field_value)\n build_config[\"api_endpoint\"][\"options\"] = build_config[\"database_name\"][\"options_metadata\"][index][\n \"api_endpoints\"\n ]\n build_config[\"api_endpoint\"][\"value\"] = build_config[\"database_name\"][\"options_metadata\"][index][\n \"api_endpoints\"\n ][0]\n\n # Get the org_id for the selected database\n org_id = build_config[\"database_name\"][\"options_metadata\"][index][\"org_id\"]\n if not org_id:\n return build_config\n\n # Update the list of keyspaces based on the db info\n build_config[\"keyspace\"][\"options\"] = build_config[\"database_name\"][\"options_metadata\"][index][\"keyspaces\"]\n build_config[\"keyspace\"][\"value\"] = (\n build_config[\"keyspace\"][\"options\"] and build_config[\"keyspace\"][\"options\"][0]\n if build_config[\"keyspace\"][\"value\"] not in build_config[\"keyspace\"][\"options\"]\n else build_config[\"keyspace\"][\"value\"]\n )\n\n # Get the database id for the selected database\n db_id = self.get_database_id_static(api_endpoint=build_config[\"api_endpoint\"][\"value\"])\n keyspace = self.get_keyspace()\n\n # Update the helper text for the embedding provider field\n template = build_config[\"collection_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"]\n template[\"02_embedding_generation_provider\"][\"helper_text\"] = (\n \"To create collections with more embedding provider options, go to \"\n f''\n \"your database in Astra DB.\"\n )\n\n # Reset provider options\n build_config = self.reset_provider_options(build_config)\n\n # Handle hybrid search options\n build_config = self._handle_hybrid_search_options(build_config)\n\n return self.reset_collection_list(build_config)\n\n def _handle_collection_selection(self, build_config: dict, field_value: str) -> dict:\n \"\"\"Handle collection selection and update embedding options.\"\"\"\n build_config[\"autodetect_collection\"][\"value\"] = True\n build_config = self.reset_collection_list(build_config)\n\n # Reset embedding model if collection selection changes\n if field_value and field_value not in build_config[\"collection_name\"][\"options\"]:\n build_config[\"collection_name\"][\"options\"].append(field_value)\n build_config[\"collection_name\"][\"options_metadata\"].append(\n {\n \"records\": 0,\n \"provider\": None,\n \"icon\": \"vectorstores\",\n \"model\": None,\n }\n )\n build_config[\"autodetect_collection\"][\"value\"] = False\n\n if not field_value:\n return build_config\n\n # Get the selected collection index\n index = build_config[\"collection_name\"][\"options\"].index(field_value)\n\n # Set the provider of the selected collection\n provider = build_config[\"collection_name\"][\"options_metadata\"][index][\"provider\"]\n build_config[\"embedding_model\"][\"show\"] = not bool(provider)\n build_config[\"embedding_model\"][\"required\"] = not bool(provider)\n\n # Grab the collection object\n database = self.get_database_object(api_endpoint=build_config[\"api_endpoint\"][\"value\"])\n collection = database.get_collection(\n name=field_value,\n keyspace=build_config[\"keyspace\"][\"value\"],\n )\n\n # Check if hybrid and lexical are enabled\n col_options = collection.options()\n hyb_enabled = col_options.rerank and col_options.rerank.enabled\n lex_enabled = col_options.lexical and col_options.lexical.enabled\n user_hyb_enabled = build_config[\"search_method\"][\"value\"] == \"Hybrid Search\"\n\n # Reranker visible when both the collection supports it and the user selected Hybrid\n hybrid_active = bool(hyb_enabled and user_hyb_enabled)\n build_config[\"reranker\"][\"show\"] = hybrid_active\n build_config[\"reranker\"][\"toggle_value\"] = hybrid_active\n build_config[\"reranker\"][\"toggle_disable\"] = False # allow user to toggle if visible\n\n # If hybrid is active, lock search_type to \"Similarity\"\n if hybrid_active:\n build_config[\"search_type\"][\"value\"] = \"Similarity\"\n\n # Show the lexical terms option only if the collection enables lexical search\n build_config[\"lexical_terms\"][\"show\"] = bool(lex_enabled)\n\n return build_config\n\n @check_cached_vector_store\n def build_vector_store(self):\n try:\n from langchain_astradb import AstraDBVectorStore\n except ImportError as e:\n msg = (\n \"Could not import langchain Astra DB integration package. \"\n \"Please install it with `pip install langchain-astradb`.\"\n )\n raise ImportError(msg) from e\n\n # Get the embedding model and additional params\n embedding_params = {\"embedding\": self.embedding_model} if self.embedding_model else {}\n\n # Get the additional parameters\n additional_params = self.astradb_vectorstore_kwargs or {}\n\n # Get Langflow version and platform information\n __version__ = get_version_info()[\"version\"]\n langflow_prefix = \"\"\n # if os.getenv(\"AWS_EXECUTION_ENV\") == \"AWS_ECS_FARGATE\": # TODO: More precise way of detecting\n # langflow_prefix = \"ds-\"\n\n # Get the database object\n database = self.get_database_object()\n autodetect = self.collection_name in database.list_collection_names() and self.autodetect_collection\n\n # Bundle up the auto-detect parameters\n autodetect_params = {\n \"autodetect_collection\": autodetect,\n \"content_field\": (\n self.content_field\n if self.content_field and embedding_params\n else (\n \"page_content\"\n if embedding_params\n and self.collection_data(collection_name=self.collection_name, database=database) == 0\n else None\n )\n ),\n \"ignore_invalid_documents\": self.ignore_invalid_documents,\n }\n\n # Choose HybridSearchMode based on the selected param\n hybrid_search_mode = HybridSearchMode.DEFAULT if self.search_method == \"Hybrid Search\" else HybridSearchMode.OFF\n\n # Attempt to build the Vector Store object\n try:\n vector_store = AstraDBVectorStore(\n # Astra DB Authentication Parameters\n token=self.token,\n api_endpoint=database.api_endpoint,\n namespace=database.keyspace,\n collection_name=self.collection_name,\n environment=self.environment,\n # Hybrid Search Parameters\n hybrid_search=hybrid_search_mode,\n # Astra DB Usage Tracking Parameters\n ext_callers=[(f\"{langflow_prefix}langflow\", __version__)],\n # Astra DB Vector Store Parameters\n **autodetect_params,\n **embedding_params,\n **additional_params,\n )\n except Exception as e:\n msg = f\"Error initializing AstraDBVectorStore: {e}\"\n raise ValueError(msg) from e\n\n # Add documents to the vector store\n self._add_documents_to_vector_store(vector_store)\n\n return vector_store\n\n def _add_documents_to_vector_store(self, vector_store) -> None:\n self.ingest_data = self._prepare_ingest_data()\n\n documents = []\n for _input in self.ingest_data or []:\n if isinstance(_input, Data):\n documents.append(_input.to_lc_document())\n else:\n msg = \"Vector Store Inputs must be Data objects.\"\n raise TypeError(msg)\n\n documents = [\n Document(page_content=doc.page_content, metadata=serialize(doc.metadata, to_str=True)) for doc in documents\n ]\n\n if documents and self.deletion_field:\n self.log(f\"Deleting documents where {self.deletion_field}\")\n try:\n database = self.get_database_object()\n collection = database.get_collection(self.collection_name, keyspace=database.keyspace)\n delete_values = list({doc.metadata[self.deletion_field] for doc in documents})\n self.log(f\"Deleting documents where {self.deletion_field} matches {delete_values}.\")\n collection.delete_many({f\"metadata.{self.deletion_field}\": {\"$in\": delete_values}})\n except Exception as e:\n msg = f\"Error deleting documents from AstraDBVectorStore based on '{self.deletion_field}': {e}\"\n raise ValueError(msg) from e\n\n if documents:\n self.log(f\"Adding {len(documents)} documents to the Vector Store.\")\n try:\n vector_store.add_documents(documents)\n except Exception as e:\n msg = f\"Error adding documents to AstraDBVectorStore: {e}\"\n raise ValueError(msg) from e\n else:\n self.log(\"No documents to add to the Vector Store.\")\n\n def _map_search_type(self) -> str:\n search_type_mapping = {\n \"Similarity with score threshold\": \"similarity_score_threshold\",\n \"MMR (Max Marginal Relevance)\": \"mmr\",\n }\n\n return search_type_mapping.get(self.search_type, \"similarity\")\n\n def _build_search_args(self):\n # Clean up the search query\n query = self.search_query if isinstance(self.search_query, str) and self.search_query.strip() else None\n lexical_terms = self.lexical_terms or None\n\n # Check if we have a search query, and if so set the args\n if query:\n args = {\n \"query\": query,\n \"search_type\": self._map_search_type(),\n \"k\": self.number_of_results,\n \"score_threshold\": self.search_score_threshold,\n \"lexical_query\": lexical_terms,\n }\n elif self.advanced_search_filter:\n args = {\n \"n\": self.number_of_results,\n }\n else:\n return {}\n\n filter_arg = self.advanced_search_filter or {}\n if filter_arg:\n args[\"filter\"] = filter_arg\n\n return args\n\n def search_documents(self, vector_store=None) -> list[Data]:\n vector_store = vector_store or self.build_vector_store()\n\n self.log(f\"Search input: {self.search_query}\")\n self.log(f\"Search type: {self.search_type}\")\n self.log(f\"Number of results: {self.number_of_results}\")\n self.log(f\"store.hybrid_search: {vector_store.hybrid_search}\")\n self.log(f\"Lexical terms: {self.lexical_terms}\")\n self.log(f\"Reranker: {self.reranker}\")\n\n try:\n search_args = self._build_search_args()\n except Exception as e:\n msg = f\"Error in AstraDBVectorStore._build_search_args: {e}\"\n raise ValueError(msg) from e\n\n if not search_args:\n self.log(\"No search input or filters provided. Skipping search.\")\n return []\n\n docs = []\n search_method = \"search\" if \"query\" in search_args else \"metadata_search\"\n\n try:\n self.log(f\"Calling vector_store.{search_method} with args: {search_args}\")\n docs = getattr(vector_store, search_method)(**search_args)\n except Exception as e:\n msg = f\"Error performing {search_method} in AstraDBVectorStore: {e}\"\n raise ValueError(msg) from e\n\n self.log(f\"Retrieved documents: {len(docs)}\")\n\n data = docs_to_data(docs)\n self.log(f\"Converted documents to data: {len(data)}\")\n self.status = data\n\n return data\n\n def get_retriever_kwargs(self):\n search_args = self._build_search_args()\n\n return {\n \"search_type\": self._map_search_type(),\n \"search_kwargs\": search_args,\n }\n"
+ },
+ "collection_name": {
+ "_input_type": "DropdownInput",
+ "advanced": false,
+ "combobox": true,
+ "dialog_inputs": {
+ "fields": {
+ "data": {
+ "node": {
+ "description": "Please allow several seconds for creation to complete.",
+ "display_name": "Create new collection",
+ "field_order": [
+ "01_new_collection_name",
+ "02_embedding_generation_provider",
+ "03_embedding_generation_model",
+ "04_dimension"
+ ],
+ "name": "create_collection",
+ "template": {
+ "01_new_collection_name": {
+ "_input_type": "StrInput",
+ "advanced": false,
+ "display_name": "Name",
+ "dynamic": false,
+ "info": "Name of the new collection to create in Astra DB.",
+ "list": false,
+ "list_add_label": "Add More",
+ "load_from_db": false,
+ "name": "new_collection_name",
+ "placeholder": "",
+ "required": true,
+ "show": true,
+ "title_case": false,
+ "tool_mode": false,
+ "trace_as_metadata": true,
+ "type": "str",
+ "value": ""
+ },
+ "02_embedding_generation_provider": {
+ "_input_type": "DropdownInput",
+ "advanced": false,
+ "combobox": false,
+ "dialog_inputs": {},
+ "display_name": "Embedding generation method",
+ "dynamic": false,
+ "helper_text": "To create collections with more embedding provider options, go to your database in Astra DB",
+ "info": "Provider to use for generating embeddings.",
+ "name": "embedding_generation_provider",
+ "options": [],
+ "options_metadata": [],
+ "placeholder": "",
+ "real_time_refresh": true,
+ "required": true,
+ "show": true,
+ "title_case": false,
+ "toggle": false,
+ "tool_mode": false,
+ "trace_as_metadata": true,
+ "type": "str",
+ "value": ""
+ },
+ "03_embedding_generation_model": {
+ "_input_type": "DropdownInput",
+ "advanced": false,
+ "combobox": false,
+ "dialog_inputs": {},
+ "display_name": "Embedding model",
+ "dynamic": false,
+ "info": "Model to use for generating embeddings.",
+ "name": "embedding_generation_model",
+ "options": [],
+ "options_metadata": [],
+ "placeholder": "",
+ "real_time_refresh": true,
+ "required": false,
+ "show": true,
+ "title_case": false,
+ "toggle": false,
+ "tool_mode": false,
+ "trace_as_metadata": true,
+ "type": "str",
+ "value": ""
+ },
+ "04_dimension": {
+ "_input_type": "IntInput",
+ "advanced": false,
+ "display_name": "Dimensions",
+ "dynamic": false,
+ "info": "Dimensions of the embeddings to generate.",
+ "list": false,
+ "list_add_label": "Add More",
+ "name": "dimension",
+ "placeholder": "",
+ "required": false,
+ "show": true,
+ "title_case": false,
+ "tool_mode": false,
+ "trace_as_metadata": true,
+ "type": "int"
+ }
+ }
+ }
+ }
+ },
+ "functionality": "create"
+ },
+ "display_name": "Collection",
+ "dynamic": false,
+ "info": "The name of the collection within Astra DB where the vectors will be stored.",
+ "name": "collection_name",
+ "options": [],
+ "options_metadata": [],
+ "placeholder": "",
+ "real_time_refresh": true,
+ "refresh_button": true,
+ "required": true,
+ "show": false,
+ "title_case": false,
+ "toggle": false,
+ "tool_mode": false,
+ "trace_as_metadata": true,
+ "type": "str",
+ "value": ""
+ },
+ "content_field": {
+ "_input_type": "StrInput",
+ "advanced": true,
+ "display_name": "Content Field",
+ "dynamic": false,
+ "info": "Field to use as the text content field for the vector store.",
+ "list": false,
+ "list_add_label": "Add More",
+ "load_from_db": false,
+ "name": "content_field",
+ "placeholder": "",
+ "required": false,
+ "show": true,
+ "title_case": false,
+ "tool_mode": false,
+ "trace_as_metadata": true,
+ "type": "str",
+ "value": ""
+ },
+ "database_name": {
+ "_input_type": "DropdownInput",
+ "advanced": false,
+ "combobox": true,
+ "dialog_inputs": {
+ "fields": {
+ "data": {
+ "node": {
+ "description": "Please allow several minutes for creation to complete.",
+ "display_name": "Create new database",
+ "field_order": [
+ "01_new_database_name",
+ "02_cloud_provider",
+ "03_region"
+ ],
+ "name": "create_database",
+ "template": {
+ "01_new_database_name": {
+ "_input_type": "StrInput",
+ "advanced": false,
+ "display_name": "Name",
+ "dynamic": false,
+ "info": "Name of the new database to create in Astra DB.",
+ "list": false,
+ "list_add_label": "Add More",
+ "load_from_db": false,
+ "name": "new_database_name",
+ "placeholder": "",
+ "required": true,
+ "show": true,
+ "title_case": false,
+ "tool_mode": false,
+ "trace_as_metadata": true,
+ "type": "str",
+ "value": ""
+ },
+ "02_cloud_provider": {
+ "_input_type": "DropdownInput",
+ "advanced": false,
+ "combobox": false,
+ "dialog_inputs": {},
+ "display_name": "Cloud provider",
+ "dynamic": false,
+ "info": "Cloud provider for the new database.",
+ "name": "cloud_provider",
+ "options": [
+ "Amazon Web Services",
+ "Google Cloud Platform",
+ "Microsoft Azure"
+ ],
+ "options_metadata": [],
+ "placeholder": "",
+ "real_time_refresh": true,
+ "required": true,
+ "show": true,
+ "title_case": false,
+ "toggle": false,
+ "tool_mode": false,
+ "trace_as_metadata": true,
+ "type": "str",
+ "value": ""
+ },
+ "03_region": {
+ "_input_type": "DropdownInput",
+ "advanced": false,
+ "combobox": false,
+ "dialog_inputs": {},
+ "display_name": "Region",
+ "dynamic": false,
+ "info": "Region for the new database.",
+ "name": "region",
+ "options": [],
+ "options_metadata": [],
+ "placeholder": "",
+ "required": true,
+ "show": true,
+ "title_case": false,
+ "toggle": false,
+ "tool_mode": false,
+ "trace_as_metadata": true,
+ "type": "str",
+ "value": ""
+ }
+ }
+ }
+ }
+ },
+ "functionality": "create"
+ },
+ "display_name": "Database",
+ "dynamic": false,
+ "info": "The Database name for the Astra DB instance.",
+ "name": "database_name",
+ "options": [],
+ "options_metadata": [],
+ "placeholder": "",
+ "real_time_refresh": true,
+ "refresh_button": true,
+ "required": true,
+ "show": true,
+ "title_case": false,
+ "toggle": false,
+ "tool_mode": false,
+ "trace_as_metadata": true,
+ "type": "str",
+ "value": ""
+ },
+ "deletion_field": {
+ "_input_type": "StrInput",
+ "advanced": true,
+ "display_name": "Deletion Based On Field",
+ "dynamic": false,
+ "info": "When this parameter is provided, documents in the target collection with metadata field values matching the input metadata field value will be deleted before new data is loaded.",
+ "list": false,
+ "list_add_label": "Add More",
+ "load_from_db": false,
+ "name": "deletion_field",
+ "placeholder": "",
+ "required": false,
+ "show": true,
+ "title_case": false,
+ "tool_mode": false,
+ "trace_as_metadata": true,
+ "type": "str",
+ "value": ""
+ },
+ "embedding_model": {
+ "_input_type": "HandleInput",
+ "advanced": false,
+ "display_name": "Embedding Model",
+ "dynamic": false,
+ "info": "Specify the Embedding Model. Not required for Astra Vectorize collections.",
+ "input_types": [
+ "Embeddings"
+ ],
+ "list": false,
+ "list_add_label": "Add More",
+ "name": "embedding_model",
+ "placeholder": "",
+ "required": false,
+ "show": false,
+ "title_case": false,
+ "trace_as_metadata": true,
+ "type": "other",
+ "value": ""
+ },
+ "environment": {
+ "_input_type": "DropdownInput",
+ "advanced": true,
+ "combobox": true,
+ "dialog_inputs": {},
+ "display_name": "Environment",
+ "dynamic": false,
+ "info": "The environment for the Astra DB API Endpoint.",
+ "name": "environment",
+ "options": [
+ "prod",
+ "test",
+ "dev"
+ ],
+ "options_metadata": [],
+ "placeholder": "",
+ "real_time_refresh": true,
+ "required": false,
+ "show": true,
+ "title_case": false,
+ "toggle": false,
+ "tool_mode": false,
+ "trace_as_metadata": true,
+ "type": "str",
+ "value": "prod"
+ },
+ "ignore_invalid_documents": {
+ "_input_type": "BoolInput",
+ "advanced": true,
+ "display_name": "Ignore Invalid Documents",
+ "dynamic": false,
+ "info": "Boolean flag to determine whether to ignore invalid documents at runtime.",
+ "list": false,
+ "list_add_label": "Add More",
+ "name": "ignore_invalid_documents",
+ "placeholder": "",
+ "required": false,
+ "show": true,
+ "title_case": false,
+ "tool_mode": false,
+ "trace_as_metadata": true,
+ "type": "bool",
+ "value": false
+ },
+ "ingest_data": {
+ "_input_type": "HandleInput",
+ "advanced": false,
+ "display_name": "Ingest Data",
+ "dynamic": false,
+ "info": "",
+ "input_types": [
+ "Data",
+ "DataFrame"
+ ],
+ "list": true,
+ "list_add_label": "Add More",
+ "name": "ingest_data",
+ "placeholder": "",
+ "required": false,
+ "show": true,
+ "title_case": false,
+ "trace_as_metadata": true,
+ "type": "other",
+ "value": ""
+ },
+ "keyspace": {
+ "_input_type": "DropdownInput",
+ "advanced": true,
+ "combobox": false,
+ "dialog_inputs": {},
+ "display_name": "Keyspace",
+ "dynamic": false,
+ "info": "Optional keyspace within Astra DB to use for the collection.",
+ "name": "keyspace",
+ "options": [],
+ "options_metadata": [],
+ "placeholder": "",
+ "real_time_refresh": true,
+ "required": false,
+ "show": true,
+ "title_case": false,
+ "toggle": false,
+ "tool_mode": false,
+ "trace_as_metadata": true,
+ "type": "str",
+ "value": ""
+ },
+ "lexical_terms": {
+ "_input_type": "QueryInput",
+ "advanced": false,
+ "display_name": "Lexical Terms",
+ "dynamic": false,
+ "info": "Add additional terms/keywords to augment search precision.",
+ "input_types": [
+ "Message"
+ ],
+ "list": false,
+ "list_add_label": "Add More",
+ "load_from_db": false,
+ "name": "lexical_terms",
+ "placeholder": "Enter terms to search...",
+ "required": false,
+ "separator": " ",
+ "show": false,
+ "title_case": false,
+ "tool_mode": false,
+ "trace_as_input": true,
+ "trace_as_metadata": true,
+ "type": "query",
+ "value": ""
+ },
+ "number_of_results": {
+ "_input_type": "IntInput",
+ "advanced": true,
+ "display_name": "Number of Search Results",
+ "dynamic": false,
+ "info": "Number of search results to return.",
+ "list": false,
+ "list_add_label": "Add More",
+ "name": "number_of_results",
+ "placeholder": "",
+ "required": false,
+ "show": true,
+ "title_case": false,
+ "tool_mode": false,
+ "trace_as_metadata": true,
+ "type": "int",
+ "value": 4
+ },
+ "reranker": {
+ "_input_type": "DropdownInput",
+ "advanced": false,
+ "combobox": false,
+ "dialog_inputs": {},
+ "display_name": "Reranker",
+ "dynamic": false,
+ "info": "Post-retrieval model that re-scores results for optimal relevance ranking.",
+ "name": "reranker",
+ "options": [],
+ "options_metadata": [],
+ "placeholder": "",
+ "required": false,
+ "show": false,
+ "title_case": false,
+ "toggle": true,
+ "tool_mode": false,
+ "trace_as_metadata": true,
+ "type": "str",
+ "value": ""
+ },
+ "search_method": {
+ "_input_type": "DropdownInput",
+ "advanced": true,
+ "combobox": false,
+ "dialog_inputs": {},
+ "display_name": "Search Method",
+ "dynamic": false,
+ "info": "Determine how your content is matched: Vector finds semantic similarity, and Hybrid Search (suggested) combines both approaches with a reranker.",
+ "name": "search_method",
+ "options": [
+ "Hybrid Search",
+ "Vector Search"
+ ],
+ "options_metadata": [
+ {
+ "icon": "SearchHybrid"
+ },
+ {
+ "icon": "SearchVector"
+ }
+ ],
+ "placeholder": "",
+ "real_time_refresh": true,
+ "required": false,
+ "show": true,
+ "title_case": false,
+ "toggle": false,
+ "tool_mode": false,
+ "trace_as_metadata": true,
+ "type": "str",
+ "value": "Vector Search"
+ },
+ "search_query": {
+ "_input_type": "QueryInput",
+ "advanced": false,
+ "display_name": "Search Query",
+ "dynamic": false,
+ "info": "Enter a query to run a similarity search.",
+ "input_types": [
+ "Message"
+ ],
+ "list": false,
+ "list_add_label": "Add More",
+ "load_from_db": false,
+ "name": "search_query",
+ "placeholder": "Enter a query...",
+ "required": false,
+ "show": true,
+ "title_case": false,
+ "tool_mode": true,
+ "trace_as_input": true,
+ "trace_as_metadata": true,
+ "type": "query",
+ "value": ""
+ },
+ "search_score_threshold": {
+ "_input_type": "FloatInput",
+ "advanced": true,
+ "display_name": "Search Score Threshold",
+ "dynamic": false,
+ "info": "Minimum similarity score threshold for search results. (when using 'Similarity with score threshold')",
+ "list": false,
+ "list_add_label": "Add More",
+ "name": "search_score_threshold",
+ "placeholder": "",
+ "required": false,
+ "show": true,
+ "title_case": false,
+ "tool_mode": false,
+ "trace_as_metadata": true,
+ "type": "float",
+ "value": 0
+ },
+ "search_type": {
+ "_input_type": "DropdownInput",
+ "advanced": true,
+ "combobox": false,
+ "dialog_inputs": {},
+ "display_name": "Search Type",
+ "dynamic": false,
+ "info": "Search type to use",
+ "name": "search_type",
+ "options": [
+ "Similarity",
+ "Similarity with score threshold",
+ "MMR (Max Marginal Relevance)"
+ ],
+ "options_metadata": [],
+ "placeholder": "",
+ "required": false,
+ "show": true,
+ "title_case": false,
+ "toggle": false,
+ "tool_mode": false,
+ "trace_as_metadata": true,
+ "type": "str",
+ "value": "Similarity"
+ },
+ "should_cache_vector_store": {
+ "_input_type": "BoolInput",
+ "advanced": true,
+ "display_name": "Cache Vector Store",
+ "dynamic": false,
+ "info": "If True, the vector store will be cached for the current build of the component. This is useful for components that have multiple output methods and want to share the same vector store.",
+ "list": false,
+ "list_add_label": "Add More",
+ "name": "should_cache_vector_store",
+ "placeholder": "",
+ "required": false,
+ "show": true,
+ "title_case": false,
+ "tool_mode": false,
+ "trace_as_metadata": true,
+ "type": "bool",
+ "value": true
+ },
+ "token": {
+ "_input_type": "SecretStrInput",
+ "advanced": false,
+ "display_name": "Astra DB Application Token",
+ "dynamic": false,
+ "info": "Authentication token for accessing Astra DB.",
+ "input_types": [],
+ "load_from_db": true,
+ "name": "token",
+ "password": true,
+ "placeholder": "",
+ "real_time_refresh": true,
+ "required": true,
+ "show": true,
+ "title_case": false,
+ "type": "str",
+ "value": "ASTRA_DB_APPLICATION_TOKEN"
+ }
+ },
+ "tool_mode": false
+ },
+ "selected_output": "search_results",
+ "showNode": true,
+ "type": "AstraDB"
+ },
+ "dragging": false,
+ "id": "AstraDB-t8lcj",
+ "measured": {
+ "height": 457,
+ "width": 320
+ },
+ "position": {
+ "x": 2095.3200255891484,
+ "y": 1501.719729125222
+ },
+ "selected": false,
+ "type": "genericNode"
+ },
+ {
+ "data": {
+ "id": "AstraDB-CLCyc",
+ "node": {
+ "base_classes": [
+ "Data",
+ "DataFrame",
+ "VectorStore"
+ ],
+ "beta": false,
+ "conditional_paths": [],
+ "custom_fields": {},
+ "description": "Ingest and search documents in Astra DB",
+ "display_name": "Astra DB",
+ "documentation": "https://docs.datastax.com/en/langflow/astra-components.html",
+ "edited": false,
+ "field_order": [
+ "token",
+ "environment",
+ "database_name",
+ "api_endpoint",
+ "keyspace",
+ "collection_name",
+ "embedding_model",
+ "ingest_data",
+ "search_query",
+ "should_cache_vector_store",
+ "search_method",
+ "reranker",
+ "lexical_terms",
+ "number_of_results",
+ "search_type",
+ "search_score_threshold",
+ "advanced_search_filter",
+ "autodetect_collection",
+ "content_field",
+ "deletion_field",
+ "ignore_invalid_documents",
+ "astradb_vectorstore_kwargs"
+ ],
+ "frozen": false,
+ "icon": "AstraDB",
+ "last_updated": "2025-08-27T14:19:33.967Z",
+ "legacy": false,
+ "metadata": {
+ "code_hash": "23fbe9daca09",
+ "dependencies": {
+ "dependencies": [
+ {
+ "name": "astrapy",
+ "version": "2.0.1"
+ },
+ {
+ "name": "langchain_astradb",
+ "version": "0.6.0"
+ },
+ {
+ "name": "langchain_core",
+ "version": "0.3.75"
+ },
+ {
+ "name": "langflow",
+ "version": null
+ }
+ ],
+ "total_dependencies": 4
+ },
+ "module": "langflow.components.datastax.astradb_vectorstore.AstraDBVectorStoreComponent"
+ },
+ "minimized": false,
+ "output_types": [],
+ "outputs": [
+ {
+ "allows_loop": false,
+ "cache": true,
+ "display_name": "Search Results",
+ "group_outputs": false,
+ "method": "search_documents",
+ "name": "search_results",
+ "selected": "Data",
+ "tool_mode": true,
+ "types": [
+ "Data"
+ ],
+ "value": "__UNDEFINED__"
+ },
+ {
+ "allows_loop": false,
+ "cache": true,
+ "display_name": "DataFrame",
+ "group_outputs": false,
+ "method": "as_dataframe",
+ "name": "dataframe",
+ "selected": "DataFrame",
+ "tool_mode": true,
+ "types": [
+ "DataFrame"
+ ],
+ "value": "__UNDEFINED__"
+ },
+ {
+ "allows_loop": false,
+ "cache": true,
+ "display_name": "Vector Store Connection",
+ "group_outputs": false,
+ "hidden": true,
+ "method": "as_vector_store",
+ "name": "vectorstoreconnection",
+ "selected": "VectorStore",
+ "tool_mode": true,
+ "types": [
+ "VectorStore"
+ ],
+ "value": "__UNDEFINED__"
+ }
+ ],
+ "pinned": false,
+ "template": {
+ "_type": "Component",
+ "advanced_search_filter": {
+ "_input_type": "NestedDictInput",
+ "advanced": true,
+ "display_name": "Search Metadata Filter",
+ "dynamic": false,
+ "info": "Optional dictionary of filters to apply to the search query.",
+ "list": false,
+ "list_add_label": "Add More",
+ "name": "advanced_search_filter",
+ "placeholder": "",
+ "required": false,
+ "show": true,
+ "title_case": false,
+ "tool_mode": false,
+ "trace_as_input": true,
+ "trace_as_metadata": true,
+ "type": "NestedDict",
+ "value": {}
+ },
+ "api_endpoint": {
+ "_input_type": "DropdownInput",
+ "advanced": true,
+ "combobox": false,
+ "dialog_inputs": {},
+ "display_name": "Astra DB API Endpoint",
+ "dynamic": false,
+ "info": "The API Endpoint for the Astra DB instance. Supercedes database selection.",
+ "name": "api_endpoint",
+ "options": [],
+ "options_metadata": [],
+ "placeholder": "",
+ "required": false,
+ "show": true,
+ "title_case": false,
+ "toggle": false,
+ "tool_mode": false,
+ "trace_as_metadata": true,
+ "type": "str",
+ "value": ""
+ },
+ "astradb_vectorstore_kwargs": {
+ "_input_type": "NestedDictInput",
+ "advanced": true,
+ "display_name": "AstraDBVectorStore Parameters",
+ "dynamic": false,
+ "info": "Optional dictionary of additional parameters for the AstraDBVectorStore.",
+ "list": false,
+ "list_add_label": "Add More",
+ "name": "astradb_vectorstore_kwargs",
+ "placeholder": "",
+ "required": false,
+ "show": true,
+ "title_case": false,
+ "tool_mode": false,
+ "trace_as_input": true,
+ "trace_as_metadata": true,
+ "type": "NestedDict",
+ "value": {}
+ },
+ "autodetect_collection": {
+ "_input_type": "BoolInput",
+ "advanced": true,
+ "display_name": "Autodetect Collection",
+ "dynamic": false,
+ "info": "Boolean flag to determine whether to autodetect the collection.",
+ "list": false,
+ "list_add_label": "Add More",
+ "name": "autodetect_collection",
+ "placeholder": "",
+ "required": false,
+ "show": true,
+ "title_case": false,
+ "tool_mode": false,
+ "trace_as_metadata": true,
+ "type": "bool",
+ "value": true
+ },
+ "code": {
+ "advanced": true,
+ "dynamic": true,
+ "fileTypes": [],
+ "file_path": "",
+ "info": "",
+ "list": false,
+ "load_from_db": false,
+ "multiline": true,
+ "name": "code",
+ "password": false,
+ "placeholder": "",
+ "required": true,
+ "show": true,
+ "title_case": false,
+ "type": "code",
+ "value": "import re\nfrom collections import defaultdict\nfrom dataclasses import asdict, dataclass, field\n\nfrom astrapy import DataAPIClient, Database\nfrom astrapy.data.info.reranking import RerankServiceOptions\nfrom astrapy.info import CollectionDescriptor, CollectionLexicalOptions, CollectionRerankOptions\nfrom langchain_astradb import AstraDBVectorStore, VectorServiceOptions\nfrom langchain_astradb.utils.astradb import HybridSearchMode, _AstraDBCollectionEnvironment\nfrom langchain_core.documents import Document\n\nfrom langflow.base.vectorstores.model import LCVectorStoreComponent, check_cached_vector_store\nfrom langflow.base.vectorstores.vector_store_connection_decorator import vector_store_connection\nfrom langflow.helpers.data import docs_to_data\nfrom langflow.inputs.inputs import FloatInput, NestedDictInput\nfrom langflow.io import (\n BoolInput,\n DropdownInput,\n HandleInput,\n IntInput,\n QueryInput,\n SecretStrInput,\n StrInput,\n)\nfrom langflow.schema.data import Data\nfrom langflow.serialization import serialize\nfrom langflow.utils.version import get_version_info\n\n\n@vector_store_connection\nclass AstraDBVectorStoreComponent(LCVectorStoreComponent):\n display_name: str = \"Astra DB\"\n description: str = \"Ingest and search documents in Astra DB\"\n documentation: str = \"https://docs.datastax.com/en/langflow/astra-components.html\"\n name = \"AstraDB\"\n icon: str = \"AstraDB\"\n\n _cached_vector_store: AstraDBVectorStore | None = None\n\n @dataclass\n class NewDatabaseInput:\n functionality: str = \"create\"\n fields: dict[str, dict] = field(\n default_factory=lambda: {\n \"data\": {\n \"node\": {\n \"name\": \"create_database\",\n \"description\": \"Please allow several minutes for creation to complete.\",\n \"display_name\": \"Create new database\",\n \"field_order\": [\"01_new_database_name\", \"02_cloud_provider\", \"03_region\"],\n \"template\": {\n \"01_new_database_name\": StrInput(\n name=\"new_database_name\",\n display_name=\"Name\",\n info=\"Name of the new database to create in Astra DB.\",\n required=True,\n ),\n \"02_cloud_provider\": DropdownInput(\n name=\"cloud_provider\",\n display_name=\"Cloud provider\",\n info=\"Cloud provider for the new database.\",\n options=[],\n required=True,\n real_time_refresh=True,\n ),\n \"03_region\": DropdownInput(\n name=\"region\",\n display_name=\"Region\",\n info=\"Region for the new database.\",\n options=[],\n required=True,\n ),\n },\n },\n }\n }\n )\n\n @dataclass\n class NewCollectionInput:\n functionality: str = \"create\"\n fields: dict[str, dict] = field(\n default_factory=lambda: {\n \"data\": {\n \"node\": {\n \"name\": \"create_collection\",\n \"description\": \"Please allow several seconds for creation to complete.\",\n \"display_name\": \"Create new collection\",\n \"field_order\": [\n \"01_new_collection_name\",\n \"02_embedding_generation_provider\",\n \"03_embedding_generation_model\",\n \"04_dimension\",\n ],\n \"template\": {\n \"01_new_collection_name\": StrInput(\n name=\"new_collection_name\",\n display_name=\"Name\",\n info=\"Name of the new collection to create in Astra DB.\",\n required=True,\n ),\n \"02_embedding_generation_provider\": DropdownInput(\n name=\"embedding_generation_provider\",\n display_name=\"Embedding generation method\",\n info=\"Provider to use for generating embeddings.\",\n helper_text=(\n \"To create collections with more embedding provider options, go to \"\n 'your database in Astra DB'\n ),\n real_time_refresh=True,\n required=True,\n options=[],\n ),\n \"03_embedding_generation_model\": DropdownInput(\n name=\"embedding_generation_model\",\n display_name=\"Embedding model\",\n info=\"Model to use for generating embeddings.\",\n real_time_refresh=True,\n options=[],\n ),\n \"04_dimension\": IntInput(\n name=\"dimension\",\n display_name=\"Dimensions\",\n info=\"Dimensions of the embeddings to generate.\",\n value=None,\n ),\n },\n },\n }\n }\n )\n\n inputs = [\n SecretStrInput(\n name=\"token\",\n display_name=\"Astra DB Application Token\",\n info=\"Authentication token for accessing Astra DB.\",\n value=\"ASTRA_DB_APPLICATION_TOKEN\",\n required=True,\n real_time_refresh=True,\n input_types=[],\n ),\n DropdownInput(\n name=\"environment\",\n display_name=\"Environment\",\n info=\"The environment for the Astra DB API Endpoint.\",\n options=[\"prod\", \"test\", \"dev\"],\n value=\"prod\",\n advanced=True,\n real_time_refresh=True,\n combobox=True,\n ),\n DropdownInput(\n name=\"database_name\",\n display_name=\"Database\",\n info=\"The Database name for the Astra DB instance.\",\n required=True,\n refresh_button=True,\n real_time_refresh=True,\n dialog_inputs=asdict(NewDatabaseInput()),\n combobox=True,\n ),\n DropdownInput(\n name=\"api_endpoint\",\n display_name=\"Astra DB API Endpoint\",\n info=\"The API Endpoint for the Astra DB instance. Supercedes database selection.\",\n advanced=True,\n ),\n DropdownInput(\n name=\"keyspace\",\n display_name=\"Keyspace\",\n info=\"Optional keyspace within Astra DB to use for the collection.\",\n advanced=True,\n options=[],\n real_time_refresh=True,\n ),\n DropdownInput(\n name=\"collection_name\",\n display_name=\"Collection\",\n info=\"The name of the collection within Astra DB where the vectors will be stored.\",\n required=True,\n refresh_button=True,\n real_time_refresh=True,\n dialog_inputs=asdict(NewCollectionInput()),\n combobox=True,\n show=False,\n ),\n HandleInput(\n name=\"embedding_model\",\n display_name=\"Embedding Model\",\n input_types=[\"Embeddings\"],\n info=\"Specify the Embedding Model. Not required for Astra Vectorize collections.\",\n required=False,\n show=False,\n ),\n *LCVectorStoreComponent.inputs,\n DropdownInput(\n name=\"search_method\",\n display_name=\"Search Method\",\n info=(\n \"Determine how your content is matched: Vector finds semantic similarity, \"\n \"and Hybrid Search (suggested) combines both approaches \"\n \"with a reranker.\"\n ),\n options=[\"Hybrid Search\", \"Vector Search\"], # TODO: Restore Lexical Search?\n options_metadata=[{\"icon\": \"SearchHybrid\"}, {\"icon\": \"SearchVector\"}],\n value=\"Vector Search\",\n advanced=True,\n real_time_refresh=True,\n ),\n DropdownInput(\n name=\"reranker\",\n display_name=\"Reranker\",\n info=\"Post-retrieval model that re-scores results for optimal relevance ranking.\",\n show=False,\n toggle=True,\n ),\n QueryInput(\n name=\"lexical_terms\",\n display_name=\"Lexical Terms\",\n info=\"Add additional terms/keywords to augment search precision.\",\n placeholder=\"Enter terms to search...\",\n separator=\" \",\n show=False,\n value=\"\",\n ),\n IntInput(\n name=\"number_of_results\",\n display_name=\"Number of Search Results\",\n info=\"Number of search results to return.\",\n advanced=True,\n value=4,\n ),\n DropdownInput(\n name=\"search_type\",\n display_name=\"Search Type\",\n info=\"Search type to use\",\n options=[\"Similarity\", \"Similarity with score threshold\", \"MMR (Max Marginal Relevance)\"],\n value=\"Similarity\",\n advanced=True,\n ),\n FloatInput(\n name=\"search_score_threshold\",\n display_name=\"Search Score Threshold\",\n info=\"Minimum similarity score threshold for search results. \"\n \"(when using 'Similarity with score threshold')\",\n value=0,\n advanced=True,\n ),\n NestedDictInput(\n name=\"advanced_search_filter\",\n display_name=\"Search Metadata Filter\",\n info=\"Optional dictionary of filters to apply to the search query.\",\n advanced=True,\n ),\n BoolInput(\n name=\"autodetect_collection\",\n display_name=\"Autodetect Collection\",\n info=\"Boolean flag to determine whether to autodetect the collection.\",\n advanced=True,\n value=True,\n ),\n StrInput(\n name=\"content_field\",\n display_name=\"Content Field\",\n info=\"Field to use as the text content field for the vector store.\",\n advanced=True,\n ),\n StrInput(\n name=\"deletion_field\",\n display_name=\"Deletion Based On Field\",\n info=\"When this parameter is provided, documents in the target collection with \"\n \"metadata field values matching the input metadata field value will be deleted \"\n \"before new data is loaded.\",\n advanced=True,\n ),\n BoolInput(\n name=\"ignore_invalid_documents\",\n display_name=\"Ignore Invalid Documents\",\n info=\"Boolean flag to determine whether to ignore invalid documents at runtime.\",\n advanced=True,\n ),\n NestedDictInput(\n name=\"astradb_vectorstore_kwargs\",\n display_name=\"AstraDBVectorStore Parameters\",\n info=\"Optional dictionary of additional parameters for the AstraDBVectorStore.\",\n advanced=True,\n ),\n ]\n\n @classmethod\n def map_cloud_providers(cls):\n # TODO: Programmatically fetch the regions for each cloud provider\n return {\n \"dev\": {\n \"Amazon Web Services\": {\n \"id\": \"aws\",\n \"regions\": [\"us-west-2\"],\n },\n \"Google Cloud Platform\": {\n \"id\": \"gcp\",\n \"regions\": [\"us-central1\", \"europe-west4\"],\n },\n },\n \"test\": {\n \"Google Cloud Platform\": {\n \"id\": \"gcp\",\n \"regions\": [\"us-central1\"],\n },\n },\n \"prod\": {\n \"Amazon Web Services\": {\n \"id\": \"aws\",\n \"regions\": [\"us-east-2\", \"ap-south-1\", \"eu-west-1\"],\n },\n \"Google Cloud Platform\": {\n \"id\": \"gcp\",\n \"regions\": [\"us-east1\"],\n },\n \"Microsoft Azure\": {\n \"id\": \"azure\",\n \"regions\": [\"westus3\"],\n },\n },\n }\n\n @classmethod\n def get_vectorize_providers(cls, token: str, environment: str | None = None, api_endpoint: str | None = None):\n try:\n # Get the admin object\n client = DataAPIClient(environment=environment)\n admin_client = client.get_admin()\n db_admin = admin_client.get_database_admin(api_endpoint, token=token)\n\n # Get the list of embedding providers\n embedding_providers = db_admin.find_embedding_providers()\n\n vectorize_providers_mapping = {}\n # Map the provider display name to the provider key and models\n for provider_key, provider_data in embedding_providers.embedding_providers.items():\n # Get the provider display name and models\n display_name = provider_data.display_name\n models = [model.name for model in provider_data.models]\n\n # Build our mapping\n vectorize_providers_mapping[display_name] = [provider_key, models]\n\n # Sort the resulting dictionary\n return defaultdict(list, dict(sorted(vectorize_providers_mapping.items())))\n except Exception as _: # noqa: BLE001\n return {}\n\n @classmethod\n async def create_database_api(\n cls,\n new_database_name: str,\n cloud_provider: str,\n region: str,\n token: str,\n environment: str | None = None,\n keyspace: str | None = None,\n ):\n client = DataAPIClient(environment=environment)\n\n # Get the admin object\n admin_client = client.get_admin(token=token)\n\n # Get the environment, set to prod if null like\n my_env = environment or \"prod\"\n\n # Raise a value error if name isn't provided\n if not new_database_name:\n msg = \"Database name is required to create a new database.\"\n raise ValueError(msg)\n\n # Call the create database function\n return await admin_client.async_create_database(\n name=new_database_name,\n cloud_provider=cls.map_cloud_providers()[my_env][cloud_provider][\"id\"],\n region=region,\n keyspace=keyspace,\n wait_until_active=False,\n )\n\n @classmethod\n async def create_collection_api(\n cls,\n new_collection_name: str,\n token: str,\n api_endpoint: str,\n environment: str | None = None,\n keyspace: str | None = None,\n dimension: int | None = None,\n embedding_generation_provider: str | None = None,\n embedding_generation_model: str | None = None,\n reranker: str | None = None,\n ):\n # Build vectorize options, if needed\n vectorize_options = None\n if not dimension:\n providers = cls.get_vectorize_providers(token=token, environment=environment, api_endpoint=api_endpoint)\n vectorize_options = VectorServiceOptions(\n provider=providers.get(embedding_generation_provider, [None, []])[0],\n model_name=embedding_generation_model,\n )\n\n # Raise a value error if name isn't provided\n if not new_collection_name:\n msg = \"Collection name is required to create a new collection.\"\n raise ValueError(msg)\n\n # Define the base arguments being passed to the create collection function\n base_args = {\n \"collection_name\": new_collection_name,\n \"token\": token,\n \"api_endpoint\": api_endpoint,\n \"keyspace\": keyspace,\n \"environment\": environment,\n \"embedding_dimension\": dimension,\n \"collection_vector_service_options\": vectorize_options,\n }\n\n # Add optional arguments if the reranker is set\n if reranker:\n # Split the reranker field into a provider a model name\n provider, _ = reranker.split(\"/\")\n base_args[\"collection_rerank\"] = CollectionRerankOptions(\n service=RerankServiceOptions(provider=provider, model_name=reranker),\n )\n base_args[\"collection_lexical\"] = CollectionLexicalOptions(analyzer=\"STANDARD\")\n\n _AstraDBCollectionEnvironment(**base_args)\n\n @classmethod\n def get_database_list_static(cls, token: str, environment: str | None = None):\n client = DataAPIClient(environment=environment)\n\n # Get the admin object\n admin_client = client.get_admin(token=token)\n\n # Get the list of databases\n db_list = admin_client.list_databases()\n\n # Generate the api endpoint for each database\n db_info_dict = {}\n for db in db_list:\n try:\n # Get the API endpoint for the database\n api_endpoints = [db_reg.api_endpoint for db_reg in db.regions]\n\n # Get the number of collections\n try:\n # Get the number of collections in the database\n num_collections = len(\n client.get_database(\n api_endpoints[0],\n token=token,\n ).list_collection_names()\n )\n except Exception: # noqa: BLE001\n if db.status != \"PENDING\":\n continue\n num_collections = 0\n\n # Add the database to the dictionary\n db_info_dict[db.name] = {\n \"api_endpoints\": api_endpoints,\n \"keyspaces\": db.keyspaces,\n \"collections\": num_collections,\n \"status\": db.status if db.status != \"ACTIVE\" else None,\n \"org_id\": db.org_id if db.org_id else None,\n }\n except Exception: # noqa: BLE001, S110\n pass\n\n return db_info_dict\n\n def get_database_list(self):\n return self.get_database_list_static(\n token=self.token,\n environment=self.environment,\n )\n\n @classmethod\n def get_api_endpoint_static(\n cls,\n token: str,\n environment: str | None = None,\n api_endpoint: str | None = None,\n database_name: str | None = None,\n ):\n # If the api_endpoint is set, return it\n if api_endpoint:\n return api_endpoint\n\n # Check if the database_name is like a url\n if database_name and database_name.startswith(\"https://\"):\n return database_name\n\n # If the database is not set, nothing we can do.\n if not database_name:\n return None\n\n # Grab the database object\n db = cls.get_database_list_static(token=token, environment=environment).get(database_name)\n if not db:\n return None\n\n # Otherwise, get the URL from the database list\n endpoints = db.get(\"api_endpoints\") or []\n return endpoints[0] if endpoints else None\n\n def get_api_endpoint(self):\n return self.get_api_endpoint_static(\n token=self.token,\n environment=self.environment,\n api_endpoint=self.api_endpoint,\n database_name=self.database_name,\n )\n\n @classmethod\n def get_database_id_static(cls, api_endpoint: str) -> str | None:\n # Pattern matches standard UUID format: 8-4-4-4-12 hexadecimal characters\n uuid_pattern = r\"[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}\"\n match = re.search(uuid_pattern, api_endpoint)\n\n return match.group(0) if match else None\n\n def get_database_id(self):\n return self.get_database_id_static(api_endpoint=self.get_api_endpoint())\n\n def get_keyspace(self):\n keyspace = self.keyspace\n\n if keyspace:\n return keyspace.strip()\n\n return \"default_keyspace\"\n\n def get_database_object(self, api_endpoint: str | None = None):\n try:\n client = DataAPIClient(environment=self.environment)\n\n return client.get_database(\n api_endpoint or self.get_api_endpoint(),\n token=self.token,\n keyspace=self.get_keyspace(),\n )\n except Exception as e:\n msg = f\"Error fetching database object: {e}\"\n raise ValueError(msg) from e\n\n def collection_data(self, collection_name: str, database: Database | None = None):\n try:\n if not database:\n client = DataAPIClient(environment=self.environment)\n\n database = client.get_database(\n self.get_api_endpoint(),\n token=self.token,\n keyspace=self.get_keyspace(),\n )\n\n collection = database.get_collection(collection_name)\n\n return collection.estimated_document_count()\n except Exception as e: # noqa: BLE001\n self.log(f\"Error checking collection data: {e}\")\n\n return None\n\n def _initialize_database_options(self):\n try:\n return [\n {\n \"name\": name,\n \"status\": info[\"status\"],\n \"collections\": info[\"collections\"],\n \"api_endpoints\": info[\"api_endpoints\"],\n \"keyspaces\": info[\"keyspaces\"],\n \"org_id\": info[\"org_id\"],\n }\n for name, info in self.get_database_list().items()\n ]\n except Exception as e:\n msg = f\"Error fetching database options: {e}\"\n raise ValueError(msg) from e\n\n @classmethod\n def get_provider_icon(cls, collection: CollectionDescriptor | None = None, provider_name: str | None = None) -> str:\n # Get the provider name from the collection\n provider_name = provider_name or (\n collection.definition.vector.service.provider\n if (\n collection\n and collection.definition\n and collection.definition.vector\n and collection.definition.vector.service\n )\n else None\n )\n\n # If there is no provider, use the vector store icon\n if not provider_name or provider_name.lower() == \"bring your own\":\n return \"vectorstores\"\n\n # Map provider casings\n case_map = {\n \"nvidia\": \"NVIDIA\",\n \"openai\": \"OpenAI\",\n \"amazon bedrock\": \"AmazonBedrockEmbeddings\",\n \"azure openai\": \"AzureOpenAiEmbeddings\",\n \"cohere\": \"Cohere\",\n \"jina ai\": \"JinaAI\",\n \"mistral ai\": \"MistralAI\",\n \"upstage\": \"Upstage\",\n \"voyage ai\": \"VoyageAI\",\n }\n\n # Adjust the casing on some like nvidia\n return case_map[provider_name.lower()] if provider_name.lower() in case_map else provider_name.title()\n\n def _initialize_collection_options(self, api_endpoint: str | None = None):\n # Nothing to generate if we don't have an API endpoint yet\n api_endpoint = api_endpoint or self.get_api_endpoint()\n if not api_endpoint:\n return []\n\n # Retrieve the database object\n database = self.get_database_object(api_endpoint=api_endpoint)\n\n # Get the list of collections\n collection_list = database.list_collections(keyspace=self.get_keyspace())\n\n # Return the list of collections and metadata associated\n return [\n {\n \"name\": col.name,\n \"records\": self.collection_data(collection_name=col.name, database=database),\n \"provider\": (\n col.definition.vector.service.provider\n if col.definition.vector and col.definition.vector.service\n else None\n ),\n \"icon\": self.get_provider_icon(collection=col),\n \"model\": (\n col.definition.vector.service.model_name\n if col.definition.vector and col.definition.vector.service\n else None\n ),\n }\n for col in collection_list\n ]\n\n def reset_provider_options(self, build_config: dict) -> dict:\n \"\"\"Reset provider options and related configurations in the build_config dictionary.\"\"\"\n # Extract template path for cleaner access\n template = build_config[\"collection_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"]\n\n # Get vectorize providers\n vectorize_providers_api = self.get_vectorize_providers(\n token=self.token,\n environment=self.environment,\n api_endpoint=build_config[\"api_endpoint\"][\"value\"],\n )\n\n # Create a new dictionary with \"Bring your own\" first\n vectorize_providers: dict[str, list[list[str]]] = {\"Bring your own\": [[], []]}\n\n # Add the remaining items (only Nvidia) from the original dictionary\n vectorize_providers.update(\n {\n k: v\n for k, v in vectorize_providers_api.items()\n if k.lower() in [\"nvidia\"] # TODO: Eventually support more\n }\n )\n\n # Set provider options\n provider_field = \"02_embedding_generation_provider\"\n template[provider_field][\"options\"] = list(vectorize_providers.keys())\n\n # Add metadata for each provider option\n template[provider_field][\"options_metadata\"] = [\n {\"icon\": self.get_provider_icon(provider_name=provider)} for provider in template[provider_field][\"options\"]\n ]\n\n # Get selected embedding provider\n embedding_provider = template[provider_field][\"value\"]\n is_bring_your_own = embedding_provider and embedding_provider == \"Bring your own\"\n\n # Configure embedding model field\n model_field = \"03_embedding_generation_model\"\n template[model_field].update(\n {\n \"options\": vectorize_providers.get(embedding_provider, [[], []])[1],\n \"placeholder\": \"Bring your own\" if is_bring_your_own else None,\n \"readonly\": is_bring_your_own,\n \"required\": not is_bring_your_own,\n \"value\": None,\n }\n )\n\n # If this is a bring your own, set dimensions to 0\n return self.reset_dimension_field(build_config)\n\n def reset_dimension_field(self, build_config: dict) -> dict:\n \"\"\"Reset dimension field options based on provided configuration.\"\"\"\n # Extract template path for cleaner access\n template = build_config[\"collection_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"]\n\n # Get selected embedding model\n provider_field = \"02_embedding_generation_provider\"\n embedding_provider = template[provider_field][\"value\"]\n is_bring_your_own = embedding_provider and embedding_provider == \"Bring your own\"\n\n # Configure dimension field\n dimension_field = \"04_dimension\"\n dimension_value = 1024 if not is_bring_your_own else None # TODO: Dynamically figure this out\n template[dimension_field].update(\n {\n \"placeholder\": dimension_value,\n \"value\": dimension_value,\n \"readonly\": not is_bring_your_own,\n \"required\": is_bring_your_own,\n }\n )\n\n return build_config\n\n def reset_collection_list(self, build_config: dict) -> dict:\n \"\"\"Reset collection list options based on provided configuration.\"\"\"\n # Get collection options\n collection_options = self._initialize_collection_options(api_endpoint=build_config[\"api_endpoint\"][\"value\"])\n # Update collection configuration\n collection_config = build_config[\"collection_name\"]\n collection_config.update(\n {\n \"options\": [col[\"name\"] for col in collection_options],\n \"options_metadata\": [{k: v for k, v in col.items() if k != \"name\"} for col in collection_options],\n }\n )\n\n # Reset selected collection if not in options\n if collection_config[\"value\"] not in collection_config[\"options\"]:\n collection_config[\"value\"] = \"\"\n\n # Set advanced status based on database selection\n collection_config[\"show\"] = bool(build_config[\"database_name\"][\"value\"])\n\n return build_config\n\n def reset_database_list(self, build_config: dict) -> dict:\n \"\"\"Reset database list options and related configurations.\"\"\"\n # Get database options\n database_options = self._initialize_database_options()\n\n # Update cloud provider options\n env = self.environment\n template = build_config[\"database_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"]\n template[\"02_cloud_provider\"][\"options\"] = list(self.map_cloud_providers()[env].keys())\n\n # Update database configuration\n database_config = build_config[\"database_name\"]\n database_config.update(\n {\n \"options\": [db[\"name\"] for db in database_options],\n \"options_metadata\": [{k: v for k, v in db.items() if k != \"name\"} for db in database_options],\n }\n )\n\n # Reset selections if value not in options\n if database_config[\"value\"] not in database_config[\"options\"]:\n database_config[\"value\"] = \"\"\n build_config[\"api_endpoint\"][\"options\"] = []\n build_config[\"api_endpoint\"][\"value\"] = \"\"\n build_config[\"collection_name\"][\"show\"] = False\n\n # Set advanced status based on token presence\n database_config[\"show\"] = bool(build_config[\"token\"][\"value\"])\n\n return build_config\n\n def reset_build_config(self, build_config: dict) -> dict:\n \"\"\"Reset all build configuration options to default empty state.\"\"\"\n # Reset database configuration\n database_config = build_config[\"database_name\"]\n database_config.update({\"options\": [], \"options_metadata\": [], \"value\": \"\", \"show\": False})\n build_config[\"api_endpoint\"][\"options\"] = []\n build_config[\"api_endpoint\"][\"value\"] = \"\"\n\n # Reset collection configuration\n collection_config = build_config[\"collection_name\"]\n collection_config.update({\"options\": [], \"options_metadata\": [], \"value\": \"\", \"show\": False})\n\n return build_config\n\n def _handle_hybrid_search_options(self, build_config: dict) -> dict:\n \"\"\"Set hybrid search options in the build configuration.\"\"\"\n # Detect what hybrid options are available\n # Get the admin object\n client = DataAPIClient(environment=self.environment)\n admin_client = client.get_admin()\n db_admin = admin_client.get_database_admin(self.get_api_endpoint(), token=self.token)\n\n # We will try to get the reranking providers to see if its hybrid emabled\n try:\n providers = db_admin.find_reranking_providers()\n build_config[\"reranker\"][\"options\"] = [\n model.name for provider_data in providers.reranking_providers.values() for model in provider_data.models\n ]\n build_config[\"reranker\"][\"options_metadata\"] = [\n {\"icon\": self.get_provider_icon(provider_name=model.name.split(\"/\")[0])}\n for provider in providers.reranking_providers.values()\n for model in provider.models\n ]\n build_config[\"reranker\"][\"value\"] = build_config[\"reranker\"][\"options\"][0]\n\n # Set the default search field to hybrid search\n build_config[\"search_method\"][\"show\"] = True\n build_config[\"search_method\"][\"options\"] = [\"Hybrid Search\", \"Vector Search\"]\n build_config[\"search_method\"][\"value\"] = \"Hybrid Search\"\n except Exception as _: # noqa: BLE001\n build_config[\"reranker\"][\"options\"] = []\n build_config[\"reranker\"][\"options_metadata\"] = []\n\n # Set the default search field to vector search\n build_config[\"search_method\"][\"show\"] = False\n build_config[\"search_method\"][\"options\"] = [\"Vector Search\"]\n build_config[\"search_method\"][\"value\"] = \"Vector Search\"\n\n return build_config\n\n async def update_build_config(self, build_config: dict, field_value: str, field_name: str | None = None) -> dict:\n \"\"\"Update build configuration based on field name and value.\"\"\"\n # Early return if no token provided\n if not self.token:\n return self.reset_build_config(build_config)\n\n # Database creation callback\n if field_name == \"database_name\" and isinstance(field_value, dict):\n if \"01_new_database_name\" in field_value:\n await self._create_new_database(build_config, field_value)\n return self.reset_collection_list(build_config)\n return self._update_cloud_regions(build_config, field_value)\n\n # Collection creation callback\n if field_name == \"collection_name\" and isinstance(field_value, dict):\n # Case 1: New collection creation\n if \"01_new_collection_name\" in field_value:\n await self._create_new_collection(build_config, field_value)\n return build_config\n\n # Case 2: Update embedding provider options\n if \"02_embedding_generation_provider\" in field_value:\n return self.reset_provider_options(build_config)\n\n # Case 3: Update dimension field\n if \"03_embedding_generation_model\" in field_value:\n return self.reset_dimension_field(build_config)\n\n # Initial execution or token/environment change\n first_run = field_name == \"collection_name\" and not field_value and not build_config[\"database_name\"][\"options\"]\n if first_run or field_name in {\"token\", \"environment\"}:\n return self.reset_database_list(build_config)\n\n # Database selection change\n if field_name == \"database_name\" and not isinstance(field_value, dict):\n return self._handle_database_selection(build_config, field_value)\n\n # Keyspace selection change\n if field_name == \"keyspace\":\n return self.reset_collection_list(build_config)\n\n # Collection selection change\n if field_name == \"collection_name\" and not isinstance(field_value, dict):\n return self._handle_collection_selection(build_config, field_value)\n\n # Search method selection change\n if field_name == \"search_method\":\n is_vector_search = field_value == \"Vector Search\"\n is_autodetect = build_config[\"autodetect_collection\"][\"value\"]\n\n # Configure lexical terms (same for both cases)\n build_config[\"lexical_terms\"][\"show\"] = not is_vector_search\n build_config[\"lexical_terms\"][\"value\"] = \"\" if is_vector_search else build_config[\"lexical_terms\"][\"value\"]\n\n # Disable reranker disabling if hybrid search is selected\n build_config[\"reranker\"][\"show\"] = not is_vector_search\n build_config[\"reranker\"][\"toggle_disable\"] = not is_vector_search\n build_config[\"reranker\"][\"toggle_value\"] = True\n build_config[\"reranker\"][\"value\"] = build_config[\"reranker\"][\"options\"][0]\n\n # Toggle search type and score threshold based on search method\n build_config[\"search_type\"][\"show\"] = is_vector_search\n build_config[\"search_score_threshold\"][\"show\"] = is_vector_search\n\n # Make sure the search_type is set to \"Similarity\"\n if not is_vector_search or is_autodetect:\n build_config[\"search_type\"][\"value\"] = \"Similarity\"\n\n return build_config\n\n async def _create_new_database(self, build_config: dict, field_value: dict) -> None:\n \"\"\"Create a new database and update build config options.\"\"\"\n try:\n await self.create_database_api(\n new_database_name=field_value[\"01_new_database_name\"],\n token=self.token,\n keyspace=self.get_keyspace(),\n environment=self.environment,\n cloud_provider=field_value[\"02_cloud_provider\"],\n region=field_value[\"03_region\"],\n )\n except Exception as e:\n msg = f\"Error creating database: {e}\"\n raise ValueError(msg) from e\n\n build_config[\"database_name\"][\"options\"].append(field_value[\"01_new_database_name\"])\n build_config[\"database_name\"][\"options_metadata\"].append(\n {\n \"status\": \"PENDING\",\n \"collections\": 0,\n \"api_endpoints\": [],\n \"keyspaces\": [self.get_keyspace()],\n \"org_id\": None,\n }\n )\n\n def _update_cloud_regions(self, build_config: dict, field_value: dict) -> dict:\n \"\"\"Update cloud provider regions in build config.\"\"\"\n env = self.environment\n cloud_provider = field_value[\"02_cloud_provider\"]\n\n # Update the region options based on the selected cloud provider\n template = build_config[\"database_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"]\n template[\"03_region\"][\"options\"] = self.map_cloud_providers()[env][cloud_provider][\"regions\"]\n\n # Reset the the 03_region value if it's not in the new options\n if template[\"03_region\"][\"value\"] not in template[\"03_region\"][\"options\"]:\n template[\"03_region\"][\"value\"] = None\n\n return build_config\n\n async def _create_new_collection(self, build_config: dict, field_value: dict) -> None:\n \"\"\"Create a new collection and update build config options.\"\"\"\n embedding_provider = field_value.get(\"02_embedding_generation_provider\")\n try:\n await self.create_collection_api(\n new_collection_name=field_value[\"01_new_collection_name\"],\n token=self.token,\n api_endpoint=build_config[\"api_endpoint\"][\"value\"],\n environment=self.environment,\n keyspace=self.get_keyspace(),\n dimension=field_value.get(\"04_dimension\") if embedding_provider == \"Bring your own\" else None,\n embedding_generation_provider=embedding_provider,\n embedding_generation_model=field_value.get(\"03_embedding_generation_model\"),\n reranker=self.reranker,\n )\n except Exception as e:\n msg = f\"Error creating collection: {e}\"\n raise ValueError(msg) from e\n\n provider = embedding_provider.lower() if embedding_provider and embedding_provider != \"Bring your own\" else None\n build_config[\"collection_name\"].update(\n {\n \"value\": field_value[\"01_new_collection_name\"],\n \"options\": build_config[\"collection_name\"][\"options\"] + [field_value[\"01_new_collection_name\"]],\n }\n )\n build_config[\"embedding_model\"][\"show\"] = not bool(provider)\n build_config[\"embedding_model\"][\"required\"] = not bool(provider)\n build_config[\"collection_name\"][\"options_metadata\"].append(\n {\n \"records\": 0,\n \"provider\": provider,\n \"icon\": self.get_provider_icon(provider_name=provider),\n \"model\": field_value.get(\"03_embedding_generation_model\"),\n }\n )\n\n # Make sure we always show the reranker options if the collection is hybrid enabled\n # And right now they always are\n build_config[\"lexical_terms\"][\"show\"] = True\n\n def _handle_database_selection(self, build_config: dict, field_value: str) -> dict:\n \"\"\"Handle database selection and update related configurations.\"\"\"\n build_config = self.reset_database_list(build_config)\n\n # Reset collection list if database selection changes\n if field_value not in build_config[\"database_name\"][\"options\"]:\n build_config[\"database_name\"][\"value\"] = \"\"\n return build_config\n\n # Get the api endpoint for the selected database\n index = build_config[\"database_name\"][\"options\"].index(field_value)\n build_config[\"api_endpoint\"][\"options\"] = build_config[\"database_name\"][\"options_metadata\"][index][\n \"api_endpoints\"\n ]\n build_config[\"api_endpoint\"][\"value\"] = build_config[\"database_name\"][\"options_metadata\"][index][\n \"api_endpoints\"\n ][0]\n\n # Get the org_id for the selected database\n org_id = build_config[\"database_name\"][\"options_metadata\"][index][\"org_id\"]\n if not org_id:\n return build_config\n\n # Update the list of keyspaces based on the db info\n build_config[\"keyspace\"][\"options\"] = build_config[\"database_name\"][\"options_metadata\"][index][\"keyspaces\"]\n build_config[\"keyspace\"][\"value\"] = (\n build_config[\"keyspace\"][\"options\"] and build_config[\"keyspace\"][\"options\"][0]\n if build_config[\"keyspace\"][\"value\"] not in build_config[\"keyspace\"][\"options\"]\n else build_config[\"keyspace\"][\"value\"]\n )\n\n # Get the database id for the selected database\n db_id = self.get_database_id_static(api_endpoint=build_config[\"api_endpoint\"][\"value\"])\n keyspace = self.get_keyspace()\n\n # Update the helper text for the embedding provider field\n template = build_config[\"collection_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"]\n template[\"02_embedding_generation_provider\"][\"helper_text\"] = (\n \"To create collections with more embedding provider options, go to \"\n f''\n \"your database in Astra DB.\"\n )\n\n # Reset provider options\n build_config = self.reset_provider_options(build_config)\n\n # Handle hybrid search options\n build_config = self._handle_hybrid_search_options(build_config)\n\n return self.reset_collection_list(build_config)\n\n def _handle_collection_selection(self, build_config: dict, field_value: str) -> dict:\n \"\"\"Handle collection selection and update embedding options.\"\"\"\n build_config[\"autodetect_collection\"][\"value\"] = True\n build_config = self.reset_collection_list(build_config)\n\n # Reset embedding model if collection selection changes\n if field_value and field_value not in build_config[\"collection_name\"][\"options\"]:\n build_config[\"collection_name\"][\"options\"].append(field_value)\n build_config[\"collection_name\"][\"options_metadata\"].append(\n {\n \"records\": 0,\n \"provider\": None,\n \"icon\": \"vectorstores\",\n \"model\": None,\n }\n )\n build_config[\"autodetect_collection\"][\"value\"] = False\n\n if not field_value:\n return build_config\n\n # Get the selected collection index\n index = build_config[\"collection_name\"][\"options\"].index(field_value)\n\n # Set the provider of the selected collection\n provider = build_config[\"collection_name\"][\"options_metadata\"][index][\"provider\"]\n build_config[\"embedding_model\"][\"show\"] = not bool(provider)\n build_config[\"embedding_model\"][\"required\"] = not bool(provider)\n\n # Grab the collection object\n database = self.get_database_object(api_endpoint=build_config[\"api_endpoint\"][\"value\"])\n collection = database.get_collection(\n name=field_value,\n keyspace=build_config[\"keyspace\"][\"value\"],\n )\n\n # Check if hybrid and lexical are enabled\n col_options = collection.options()\n hyb_enabled = col_options.rerank and col_options.rerank.enabled\n lex_enabled = col_options.lexical and col_options.lexical.enabled\n user_hyb_enabled = build_config[\"search_method\"][\"value\"] == \"Hybrid Search\"\n\n # Reranker visible when both the collection supports it and the user selected Hybrid\n hybrid_active = bool(hyb_enabled and user_hyb_enabled)\n build_config[\"reranker\"][\"show\"] = hybrid_active\n build_config[\"reranker\"][\"toggle_value\"] = hybrid_active\n build_config[\"reranker\"][\"toggle_disable\"] = False # allow user to toggle if visible\n\n # If hybrid is active, lock search_type to \"Similarity\"\n if hybrid_active:\n build_config[\"search_type\"][\"value\"] = \"Similarity\"\n\n # Show the lexical terms option only if the collection enables lexical search\n build_config[\"lexical_terms\"][\"show\"] = bool(lex_enabled)\n\n return build_config\n\n @check_cached_vector_store\n def build_vector_store(self):\n try:\n from langchain_astradb import AstraDBVectorStore\n except ImportError as e:\n msg = (\n \"Could not import langchain Astra DB integration package. \"\n \"Please install it with `pip install langchain-astradb`.\"\n )\n raise ImportError(msg) from e\n\n # Get the embedding model and additional params\n embedding_params = {\"embedding\": self.embedding_model} if self.embedding_model else {}\n\n # Get the additional parameters\n additional_params = self.astradb_vectorstore_kwargs or {}\n\n # Get Langflow version and platform information\n __version__ = get_version_info()[\"version\"]\n langflow_prefix = \"\"\n # if os.getenv(\"AWS_EXECUTION_ENV\") == \"AWS_ECS_FARGATE\": # TODO: More precise way of detecting\n # langflow_prefix = \"ds-\"\n\n # Get the database object\n database = self.get_database_object()\n autodetect = self.collection_name in database.list_collection_names() and self.autodetect_collection\n\n # Bundle up the auto-detect parameters\n autodetect_params = {\n \"autodetect_collection\": autodetect,\n \"content_field\": (\n self.content_field\n if self.content_field and embedding_params\n else (\n \"page_content\"\n if embedding_params\n and self.collection_data(collection_name=self.collection_name, database=database) == 0\n else None\n )\n ),\n \"ignore_invalid_documents\": self.ignore_invalid_documents,\n }\n\n # Choose HybridSearchMode based on the selected param\n hybrid_search_mode = HybridSearchMode.DEFAULT if self.search_method == \"Hybrid Search\" else HybridSearchMode.OFF\n\n # Attempt to build the Vector Store object\n try:\n vector_store = AstraDBVectorStore(\n # Astra DB Authentication Parameters\n token=self.token,\n api_endpoint=database.api_endpoint,\n namespace=database.keyspace,\n collection_name=self.collection_name,\n environment=self.environment,\n # Hybrid Search Parameters\n hybrid_search=hybrid_search_mode,\n # Astra DB Usage Tracking Parameters\n ext_callers=[(f\"{langflow_prefix}langflow\", __version__)],\n # Astra DB Vector Store Parameters\n **autodetect_params,\n **embedding_params,\n **additional_params,\n )\n except Exception as e:\n msg = f\"Error initializing AstraDBVectorStore: {e}\"\n raise ValueError(msg) from e\n\n # Add documents to the vector store\n self._add_documents_to_vector_store(vector_store)\n\n return vector_store\n\n def _add_documents_to_vector_store(self, vector_store) -> None:\n self.ingest_data = self._prepare_ingest_data()\n\n documents = []\n for _input in self.ingest_data or []:\n if isinstance(_input, Data):\n documents.append(_input.to_lc_document())\n else:\n msg = \"Vector Store Inputs must be Data objects.\"\n raise TypeError(msg)\n\n documents = [\n Document(page_content=doc.page_content, metadata=serialize(doc.metadata, to_str=True)) for doc in documents\n ]\n\n if documents and self.deletion_field:\n self.log(f\"Deleting documents where {self.deletion_field}\")\n try:\n database = self.get_database_object()\n collection = database.get_collection(self.collection_name, keyspace=database.keyspace)\n delete_values = list({doc.metadata[self.deletion_field] for doc in documents})\n self.log(f\"Deleting documents where {self.deletion_field} matches {delete_values}.\")\n collection.delete_many({f\"metadata.{self.deletion_field}\": {\"$in\": delete_values}})\n except Exception as e:\n msg = f\"Error deleting documents from AstraDBVectorStore based on '{self.deletion_field}': {e}\"\n raise ValueError(msg) from e\n\n if documents:\n self.log(f\"Adding {len(documents)} documents to the Vector Store.\")\n try:\n vector_store.add_documents(documents)\n except Exception as e:\n msg = f\"Error adding documents to AstraDBVectorStore: {e}\"\n raise ValueError(msg) from e\n else:\n self.log(\"No documents to add to the Vector Store.\")\n\n def _map_search_type(self) -> str:\n search_type_mapping = {\n \"Similarity with score threshold\": \"similarity_score_threshold\",\n \"MMR (Max Marginal Relevance)\": \"mmr\",\n }\n\n return search_type_mapping.get(self.search_type, \"similarity\")\n\n def _build_search_args(self):\n # Clean up the search query\n query = self.search_query if isinstance(self.search_query, str) and self.search_query.strip() else None\n lexical_terms = self.lexical_terms or None\n\n # Check if we have a search query, and if so set the args\n if query:\n args = {\n \"query\": query,\n \"search_type\": self._map_search_type(),\n \"k\": self.number_of_results,\n \"score_threshold\": self.search_score_threshold,\n \"lexical_query\": lexical_terms,\n }\n elif self.advanced_search_filter:\n args = {\n \"n\": self.number_of_results,\n }\n else:\n return {}\n\n filter_arg = self.advanced_search_filter or {}\n if filter_arg:\n args[\"filter\"] = filter_arg\n\n return args\n\n def search_documents(self, vector_store=None) -> list[Data]:\n vector_store = vector_store or self.build_vector_store()\n\n self.log(f\"Search input: {self.search_query}\")\n self.log(f\"Search type: {self.search_type}\")\n self.log(f\"Number of results: {self.number_of_results}\")\n self.log(f\"store.hybrid_search: {vector_store.hybrid_search}\")\n self.log(f\"Lexical terms: {self.lexical_terms}\")\n self.log(f\"Reranker: {self.reranker}\")\n\n try:\n search_args = self._build_search_args()\n except Exception as e:\n msg = f\"Error in AstraDBVectorStore._build_search_args: {e}\"\n raise ValueError(msg) from e\n\n if not search_args:\n self.log(\"No search input or filters provided. Skipping search.\")\n return []\n\n docs = []\n search_method = \"search\" if \"query\" in search_args else \"metadata_search\"\n\n try:\n self.log(f\"Calling vector_store.{search_method} with args: {search_args}\")\n docs = getattr(vector_store, search_method)(**search_args)\n except Exception as e:\n msg = f\"Error performing {search_method} in AstraDBVectorStore: {e}\"\n raise ValueError(msg) from e\n\n self.log(f\"Retrieved documents: {len(docs)}\")\n\n data = docs_to_data(docs)\n self.log(f\"Converted documents to data: {len(data)}\")\n self.status = data\n\n return data\n\n def get_retriever_kwargs(self):\n search_args = self._build_search_args()\n\n return {\n \"search_type\": self._map_search_type(),\n \"search_kwargs\": search_args,\n }\n"
+ },
+ "collection_name": {
+ "_input_type": "DropdownInput",
+ "advanced": false,
+ "combobox": true,
+ "dialog_inputs": {
+ "fields": {
+ "data": {
+ "node": {
+ "description": "Please allow several seconds for creation to complete.",
+ "display_name": "Create new collection",
+ "field_order": [
+ "01_new_collection_name",
+ "02_embedding_generation_provider",
+ "03_embedding_generation_model",
+ "04_dimension"
+ ],
+ "name": "create_collection",
+ "template": {
+ "01_new_collection_name": {
+ "_input_type": "StrInput",
+ "advanced": false,
+ "display_name": "Name",
+ "dynamic": false,
+ "info": "Name of the new collection to create in Astra DB.",
+ "list": false,
+ "list_add_label": "Add More",
+ "load_from_db": false,
+ "name": "new_collection_name",
+ "placeholder": "",
+ "required": true,
+ "show": true,
+ "title_case": false,
+ "tool_mode": false,
+ "trace_as_metadata": true,
+ "type": "str",
+ "value": ""
+ },
+ "02_embedding_generation_provider": {
+ "_input_type": "DropdownInput",
+ "advanced": false,
+ "combobox": false,
+ "dialog_inputs": {},
+ "display_name": "Embedding generation method",
+ "dynamic": false,
+ "helper_text": "To create collections with more embedding provider options, go to your database in Astra DB",
+ "info": "Provider to use for generating embeddings.",
+ "name": "embedding_generation_provider",
+ "options": [],
+ "options_metadata": [],
+ "placeholder": "",
+ "real_time_refresh": true,
+ "required": true,
+ "show": true,
+ "title_case": false,
+ "toggle": false,
+ "tool_mode": false,
+ "trace_as_metadata": true,
+ "type": "str",
+ "value": ""
+ },
+ "03_embedding_generation_model": {
+ "_input_type": "DropdownInput",
+ "advanced": false,
+ "combobox": false,
+ "dialog_inputs": {},
+ "display_name": "Embedding model",
+ "dynamic": false,
+ "info": "Model to use for generating embeddings.",
+ "name": "embedding_generation_model",
+ "options": [],
+ "options_metadata": [],
+ "placeholder": "",
+ "real_time_refresh": true,
+ "required": false,
+ "show": true,
+ "title_case": false,
+ "toggle": false,
+ "tool_mode": false,
+ "trace_as_metadata": true,
+ "type": "str",
+ "value": ""
+ },
+ "04_dimension": {
+ "_input_type": "IntInput",
+ "advanced": false,
+ "display_name": "Dimensions",
+ "dynamic": false,
+ "info": "Dimensions of the embeddings to generate.",
+ "list": false,
+ "list_add_label": "Add More",
+ "name": "dimension",
+ "placeholder": "",
+ "required": false,
+ "show": true,
+ "title_case": false,
+ "tool_mode": false,
+ "trace_as_metadata": true,
+ "type": "int"
+ }
+ }
+ }
+ }
+ },
+ "functionality": "create"
+ },
+ "display_name": "Collection",
+ "dynamic": false,
+ "info": "The name of the collection within Astra DB where the vectors will be stored.",
+ "name": "collection_name",
+ "options": [],
+ "options_metadata": [],
+ "placeholder": "",
+ "real_time_refresh": true,
+ "refresh_button": true,
+ "required": true,
+ "show": false,
+ "title_case": false,
+ "toggle": false,
+ "tool_mode": false,
+ "trace_as_metadata": true,
+ "type": "str",
+ "value": ""
+ },
+ "content_field": {
+ "_input_type": "StrInput",
+ "advanced": true,
+ "display_name": "Content Field",
+ "dynamic": false,
+ "info": "Field to use as the text content field for the vector store.",
+ "list": false,
+ "list_add_label": "Add More",
+ "load_from_db": false,
+ "name": "content_field",
+ "placeholder": "",
+ "required": false,
+ "show": true,
+ "title_case": false,
+ "tool_mode": false,
+ "trace_as_metadata": true,
+ "type": "str",
+ "value": ""
+ },
+ "database_name": {
+ "_input_type": "DropdownInput",
+ "advanced": false,
+ "combobox": true,
+ "dialog_inputs": {
+ "fields": {
+ "data": {
+ "node": {
+ "description": "Please allow several minutes for creation to complete.",
+ "display_name": "Create new database",
+ "field_order": [
+ "01_new_database_name",
+ "02_cloud_provider",
+ "03_region"
+ ],
+ "name": "create_database",
+ "template": {
+ "01_new_database_name": {
+ "_input_type": "StrInput",
+ "advanced": false,
+ "display_name": "Name",
+ "dynamic": false,
+ "info": "Name of the new database to create in Astra DB.",
+ "list": false,
+ "list_add_label": "Add More",
+ "load_from_db": false,
+ "name": "new_database_name",
+ "placeholder": "",
+ "required": true,
+ "show": true,
+ "title_case": false,
+ "tool_mode": false,
+ "trace_as_metadata": true,
+ "type": "str",
+ "value": ""
+ },
+ "02_cloud_provider": {
+ "_input_type": "DropdownInput",
+ "advanced": false,
+ "combobox": false,
+ "dialog_inputs": {},
+ "display_name": "Cloud provider",
+ "dynamic": false,
+ "info": "Cloud provider for the new database.",
+ "name": "cloud_provider",
+ "options": [
+ "Amazon Web Services",
+ "Google Cloud Platform",
+ "Microsoft Azure"
+ ],
+ "options_metadata": [],
+ "placeholder": "",
+ "real_time_refresh": true,
+ "required": true,
+ "show": true,
+ "title_case": false,
+ "toggle": false,
+ "tool_mode": false,
+ "trace_as_metadata": true,
+ "type": "str",
+ "value": ""
+ },
+ "03_region": {
+ "_input_type": "DropdownInput",
+ "advanced": false,
+ "combobox": false,
+ "dialog_inputs": {},
+ "display_name": "Region",
+ "dynamic": false,
+ "info": "Region for the new database.",
+ "name": "region",
+ "options": [],
+ "options_metadata": [],
+ "placeholder": "",
+ "required": true,
+ "show": true,
+ "title_case": false,
+ "toggle": false,
+ "tool_mode": false,
+ "trace_as_metadata": true,
+ "type": "str",
+ "value": ""
+ }
+ }
+ }
+ }
+ },
+ "functionality": "create"
+ },
+ "display_name": "Database",
+ "dynamic": false,
+ "info": "The Database name for the Astra DB instance.",
+ "name": "database_name",
+ "options": [],
+ "options_metadata": [],
+ "placeholder": "",
+ "real_time_refresh": true,
+ "refresh_button": true,
+ "required": true,
+ "show": true,
+ "title_case": false,
+ "toggle": false,
+ "tool_mode": false,
+ "trace_as_metadata": true,
+ "type": "str",
+ "value": ""
+ },
+ "deletion_field": {
+ "_input_type": "StrInput",
+ "advanced": true,
+ "display_name": "Deletion Based On Field",
+ "dynamic": false,
+ "info": "When this parameter is provided, documents in the target collection with metadata field values matching the input metadata field value will be deleted before new data is loaded.",
+ "list": false,
+ "list_add_label": "Add More",
+ "load_from_db": false,
+ "name": "deletion_field",
+ "placeholder": "",
+ "required": false,
+ "show": true,
+ "title_case": false,
+ "tool_mode": false,
+ "trace_as_metadata": true,
+ "type": "str",
+ "value": ""
+ },
+ "embedding_model": {
+ "_input_type": "HandleInput",
+ "advanced": false,
+ "display_name": "Embedding Model",
+ "dynamic": false,
+ "info": "Specify the Embedding Model. Not required for Astra Vectorize collections.",
+ "input_types": [
+ "Embeddings"
+ ],
+ "list": false,
+ "list_add_label": "Add More",
+ "name": "embedding_model",
+ "placeholder": "",
+ "required": false,
+ "show": false,
+ "title_case": false,
+ "trace_as_metadata": true,
+ "type": "other",
+ "value": ""
+ },
+ "environment": {
+ "_input_type": "DropdownInput",
+ "advanced": true,
+ "combobox": true,
+ "dialog_inputs": {},
+ "display_name": "Environment",
+ "dynamic": false,
+ "info": "The environment for the Astra DB API Endpoint.",
+ "name": "environment",
+ "options": [
+ "prod",
+ "test",
+ "dev"
+ ],
+ "options_metadata": [],
+ "placeholder": "",
+ "real_time_refresh": true,
+ "required": false,
+ "show": true,
+ "title_case": false,
+ "toggle": false,
+ "tool_mode": false,
+ "trace_as_metadata": true,
+ "type": "str",
+ "value": "prod"
+ },
+ "ignore_invalid_documents": {
+ "_input_type": "BoolInput",
+ "advanced": true,
+ "display_name": "Ignore Invalid Documents",
+ "dynamic": false,
+ "info": "Boolean flag to determine whether to ignore invalid documents at runtime.",
+ "list": false,
+ "list_add_label": "Add More",
+ "name": "ignore_invalid_documents",
+ "placeholder": "",
+ "required": false,
+ "show": true,
+ "title_case": false,
+ "tool_mode": false,
+ "trace_as_metadata": true,
+ "type": "bool",
+ "value": false
+ },
+ "ingest_data": {
+ "_input_type": "HandleInput",
+ "advanced": false,
+ "display_name": "Ingest Data",
+ "dynamic": false,
+ "info": "",
+ "input_types": [
+ "Data",
+ "DataFrame"
+ ],
+ "list": true,
+ "list_add_label": "Add More",
+ "name": "ingest_data",
+ "placeholder": "",
+ "required": false,
+ "show": true,
+ "title_case": false,
+ "trace_as_metadata": true,
+ "type": "other",
+ "value": ""
+ },
+ "keyspace": {
+ "_input_type": "DropdownInput",
+ "advanced": true,
+ "combobox": false,
+ "dialog_inputs": {},
+ "display_name": "Keyspace",
+ "dynamic": false,
+ "info": "Optional keyspace within Astra DB to use for the collection.",
+ "name": "keyspace",
+ "options": [],
+ "options_metadata": [],
+ "placeholder": "",
+ "real_time_refresh": true,
+ "required": false,
+ "show": true,
+ "title_case": false,
+ "toggle": false,
+ "tool_mode": false,
+ "trace_as_metadata": true,
+ "type": "str",
+ "value": ""
+ },
+ "lexical_terms": {
+ "_input_type": "QueryInput",
+ "advanced": false,
+ "display_name": "Lexical Terms",
+ "dynamic": false,
+ "info": "Add additional terms/keywords to augment search precision.",
+ "input_types": [
+ "Message"
+ ],
+ "list": false,
+ "list_add_label": "Add More",
+ "load_from_db": false,
+ "name": "lexical_terms",
+ "placeholder": "Enter terms to search...",
+ "required": false,
+ "separator": " ",
+ "show": false,
+ "title_case": false,
+ "tool_mode": false,
+ "trace_as_input": true,
+ "trace_as_metadata": true,
+ "type": "query",
+ "value": ""
+ },
+ "number_of_results": {
+ "_input_type": "IntInput",
+ "advanced": true,
+ "display_name": "Number of Search Results",
+ "dynamic": false,
+ "info": "Number of search results to return.",
+ "list": false,
+ "list_add_label": "Add More",
+ "name": "number_of_results",
+ "placeholder": "",
+ "required": false,
+ "show": true,
+ "title_case": false,
+ "tool_mode": false,
+ "trace_as_metadata": true,
+ "type": "int",
+ "value": 4
+ },
+ "reranker": {
+ "_input_type": "DropdownInput",
+ "advanced": false,
+ "combobox": false,
+ "dialog_inputs": {},
+ "display_name": "Reranker",
+ "dynamic": false,
+ "info": "Post-retrieval model that re-scores results for optimal relevance ranking.",
+ "name": "reranker",
+ "options": [],
+ "options_metadata": [],
+ "placeholder": "",
+ "required": false,
+ "show": false,
+ "title_case": false,
+ "toggle": true,
+ "tool_mode": false,
+ "trace_as_metadata": true,
+ "type": "str",
+ "value": ""
+ },
+ "search_method": {
+ "_input_type": "DropdownInput",
+ "advanced": true,
+ "combobox": false,
+ "dialog_inputs": {},
+ "display_name": "Search Method",
+ "dynamic": false,
+ "info": "Determine how your content is matched: Vector finds semantic similarity, and Hybrid Search (suggested) combines both approaches with a reranker.",
+ "name": "search_method",
+ "options": [
+ "Hybrid Search",
+ "Vector Search"
+ ],
+ "options_metadata": [
+ {
+ "icon": "SearchHybrid"
+ },
+ {
+ "icon": "SearchVector"
+ }
+ ],
+ "placeholder": "",
+ "real_time_refresh": true,
+ "required": false,
+ "show": true,
+ "title_case": false,
+ "toggle": false,
+ "tool_mode": false,
+ "trace_as_metadata": true,
+ "type": "str",
+ "value": "Vector Search"
+ },
+ "search_query": {
+ "_input_type": "QueryInput",
+ "advanced": false,
+ "display_name": "Search Query",
+ "dynamic": false,
+ "info": "Enter a query to run a similarity search.",
+ "input_types": [
+ "Message"
+ ],
+ "list": false,
+ "list_add_label": "Add More",
+ "load_from_db": false,
+ "name": "search_query",
+ "placeholder": "Enter a query...",
+ "required": false,
+ "show": true,
+ "title_case": false,
+ "tool_mode": true,
+ "trace_as_input": true,
+ "trace_as_metadata": true,
+ "type": "query",
+ "value": ""
+ },
+ "search_score_threshold": {
+ "_input_type": "FloatInput",
+ "advanced": true,
+ "display_name": "Search Score Threshold",
+ "dynamic": false,
+ "info": "Minimum similarity score threshold for search results. (when using 'Similarity with score threshold')",
+ "list": false,
+ "list_add_label": "Add More",
+ "name": "search_score_threshold",
+ "placeholder": "",
+ "required": false,
+ "show": true,
+ "title_case": false,
+ "tool_mode": false,
+ "trace_as_metadata": true,
+ "type": "float",
+ "value": 0
+ },
+ "search_type": {
+ "_input_type": "DropdownInput",
+ "advanced": true,
+ "combobox": false,
+ "dialog_inputs": {},
+ "display_name": "Search Type",
+ "dynamic": false,
+ "info": "Search type to use",
+ "name": "search_type",
+ "options": [
+ "Similarity",
+ "Similarity with score threshold",
+ "MMR (Max Marginal Relevance)"
+ ],
+ "options_metadata": [],
+ "placeholder": "",
+ "required": false,
+ "show": true,
+ "title_case": false,
+ "toggle": false,
+ "tool_mode": false,
+ "trace_as_metadata": true,
+ "type": "str",
+ "value": "Similarity"
+ },
+ "should_cache_vector_store": {
+ "_input_type": "BoolInput",
+ "advanced": true,
+ "display_name": "Cache Vector Store",
+ "dynamic": false,
+ "info": "If True, the vector store will be cached for the current build of the component. This is useful for components that have multiple output methods and want to share the same vector store.",
+ "list": false,
+ "list_add_label": "Add More",
+ "name": "should_cache_vector_store",
+ "placeholder": "",
+ "required": false,
+ "show": true,
+ "title_case": false,
+ "tool_mode": false,
+ "trace_as_metadata": true,
+ "type": "bool",
+ "value": true
+ },
+ "token": {
+ "_input_type": "SecretStrInput",
+ "advanced": false,
+ "display_name": "Astra DB Application Token",
+ "dynamic": false,
+ "info": "Authentication token for accessing Astra DB.",
+ "input_types": [],
+ "load_from_db": true,
+ "name": "token",
+ "password": true,
+ "placeholder": "",
+ "real_time_refresh": true,
+ "required": true,
+ "show": true,
+ "title_case": false,
+ "type": "str",
+ "value": "ASTRA_DB_APPLICATION_TOKEN"
+ }
+ },
+ "tool_mode": false
+ },
+ "selected_output": "dataframe",
+ "showNode": true,
+ "type": "AstraDB"
+ },
+ "dragging": false,
+ "id": "AstraDB-CLCyc",
+ "measured": {
+ "height": 457,
+ "width": 320
+ },
+ "position": {
+ "x": 1210.9603958451855,
+ "y": 519.26557834351
+ },
+ "selected": false,
+ "type": "genericNode"
}
],
"viewport": {
- "x": -22.84629031494228,
- "y": -151.44728538879235,
- "zoom": 0.45963552948592706
+ "x": 100.76434400868948,
+ "y": -143.93713359276438,
+ "zoom": 0.43125741996443906
}
},
"description": "Load your data for chat context with Retrieval Augmented Generation.",
"endpoint_name": null,
- "id": "b65cafc6-9f8e-4137-ad2b-4c663822f512",
+ "id": "e4776e4e-d3cf-42f3-9160-981f4b18325a",
"is_component": false,
- "last_tested_version": "1.4.3",
+ "last_tested_version": "1.6.0",
"name": "Vector Store RAG",
"tags": [
"openai",
diff --git a/src/backend/base/langflow/initial_setup/starter_projects/Youtube Analysis.json b/src/backend/base/langflow/initial_setup/starter_projects/Youtube Analysis.json
index 8d7d69735..2e4a5bc43 100644
--- a/src/backend/base/langflow/initial_setup/starter_projects/Youtube Analysis.json
+++ b/src/backend/base/langflow/initial_setup/starter_projects/Youtube Analysis.json
@@ -905,7 +905,7 @@
"show": true,
"title_case": false,
"type": "code",
- "value": "import json\nimport re\n\nfrom langchain_core.tools import StructuredTool\nfrom pydantic import ValidationError\n\nfrom langflow.base.agents.agent import LCToolsAgentComponent\nfrom langflow.base.agents.events import ExceptionWithMessageError\nfrom langflow.base.models.model_input_constants import (\n ALL_PROVIDER_FIELDS,\n MODEL_DYNAMIC_UPDATE_FIELDS,\n MODEL_PROVIDERS,\n MODEL_PROVIDERS_DICT,\n MODELS_METADATA,\n)\nfrom langflow.base.models.model_utils import get_model_name\nfrom langflow.components.helpers.current_date import CurrentDateComponent\nfrom langflow.components.helpers.memory import MemoryComponent\nfrom langflow.components.langchain_utilities.tool_calling import ToolCallingAgentComponent\nfrom langflow.custom.custom_component.component import _get_component_toolkit\nfrom langflow.custom.utils import update_component_build_config\nfrom langflow.field_typing import Tool\nfrom langflow.helpers.base_model import build_model_from_schema\nfrom langflow.io import BoolInput, DropdownInput, IntInput, MultilineInput, Output, TableInput\nfrom langflow.logging import logger\nfrom langflow.schema.data import Data\nfrom langflow.schema.dotdict import dotdict\nfrom langflow.schema.message import Message\nfrom langflow.schema.table import EditMode\n\n\ndef set_advanced_true(component_input):\n component_input.advanced = True\n return component_input\n\n\nMODEL_PROVIDERS_LIST = [\"Anthropic\", \"Google Generative AI\", \"Groq\", \"OpenAI\"]\n\n\nclass AgentComponent(ToolCallingAgentComponent):\n display_name: str = \"Agent\"\n description: str = \"Define the agent's instructions, then enter a task to complete using tools.\"\n documentation: str = \"https://docs.langflow.org/agents\"\n icon = \"bot\"\n beta = False\n name = \"Agent\"\n\n memory_inputs = [set_advanced_true(component_input) for component_input in MemoryComponent().inputs]\n\n # Filter out json_mode from OpenAI inputs since we handle structured output differently\n openai_inputs_filtered = [\n input_field\n for input_field in MODEL_PROVIDERS_DICT[\"OpenAI\"][\"inputs\"]\n if not (hasattr(input_field, \"name\") and input_field.name == \"json_mode\")\n ]\n\n inputs = [\n DropdownInput(\n name=\"agent_llm\",\n display_name=\"Model Provider\",\n info=\"The provider of the language model that the agent will use to generate responses.\",\n options=[*MODEL_PROVIDERS_LIST, \"Custom\"],\n value=\"OpenAI\",\n real_time_refresh=True,\n input_types=[],\n options_metadata=[MODELS_METADATA[key] for key in MODEL_PROVIDERS_LIST] + [{\"icon\": \"brain\"}],\n ),\n *openai_inputs_filtered,\n MultilineInput(\n name=\"system_prompt\",\n display_name=\"Agent Instructions\",\n info=\"System Prompt: Initial instructions and context provided to guide the agent's behavior.\",\n value=\"You are a helpful assistant that can use tools to answer questions and perform tasks.\",\n advanced=False,\n ),\n IntInput(\n name=\"n_messages\",\n display_name=\"Number of Chat History Messages\",\n value=100,\n info=\"Number of chat history messages to retrieve.\",\n advanced=True,\n show=True,\n ),\n MultilineInput(\n name=\"format_instructions\",\n display_name=\"Output Format Instructions\",\n info=\"Generic Template for structured output formatting. Valid only with Structured response.\",\n value=(\n \"You are an AI that extracts structured JSON objects from unstructured text. \"\n \"Use a predefined schema with expected types (str, int, float, bool, dict). \"\n \"Extract ALL relevant instances that match the schema - if multiple patterns exist, capture them all. \"\n \"Fill missing or ambiguous values with defaults: null for missing values. \"\n \"Remove exact duplicates but keep variations that have different field values. \"\n \"Always return valid JSON in the expected format, never throw errors. \"\n \"If multiple objects can be extracted, return them all in the structured format.\"\n ),\n advanced=True,\n ),\n TableInput(\n name=\"output_schema\",\n display_name=\"Output Schema\",\n info=(\n \"Schema Validation: Define the structure and data types for structured output. \"\n \"No validation if no output schema.\"\n ),\n advanced=True,\n required=False,\n value=[],\n table_schema=[\n {\n \"name\": \"name\",\n \"display_name\": \"Name\",\n \"type\": \"str\",\n \"description\": \"Specify the name of the output field.\",\n \"default\": \"field\",\n \"edit_mode\": EditMode.INLINE,\n },\n {\n \"name\": \"description\",\n \"display_name\": \"Description\",\n \"type\": \"str\",\n \"description\": \"Describe the purpose of the output field.\",\n \"default\": \"description of field\",\n \"edit_mode\": EditMode.POPOVER,\n },\n {\n \"name\": \"type\",\n \"display_name\": \"Type\",\n \"type\": \"str\",\n \"edit_mode\": EditMode.INLINE,\n \"description\": (\"Indicate the data type of the output field (e.g., str, int, float, bool, dict).\"),\n \"options\": [\"str\", \"int\", \"float\", \"bool\", \"dict\"],\n \"default\": \"str\",\n },\n {\n \"name\": \"multiple\",\n \"display_name\": \"As List\",\n \"type\": \"boolean\",\n \"description\": \"Set to True if this output field should be a list of the specified type.\",\n \"default\": \"False\",\n \"edit_mode\": EditMode.INLINE,\n },\n ],\n ),\n *LCToolsAgentComponent._base_inputs,\n # removed memory inputs from agent component\n # *memory_inputs,\n BoolInput(\n name=\"add_current_date_tool\",\n display_name=\"Current Date\",\n advanced=True,\n info=\"If true, will add a tool to the agent that returns the current date.\",\n value=True,\n ),\n ]\n outputs = [\n Output(name=\"response\", display_name=\"Response\", method=\"message_response\"),\n Output(name=\"structured_response\", display_name=\"Structured Response\", method=\"json_response\", tool_mode=False),\n ]\n\n async def get_agent_requirements(self):\n \"\"\"Get the agent requirements for the agent.\"\"\"\n llm_model, display_name = await self.get_llm()\n if llm_model is None:\n msg = \"No language model selected. Please choose a model to proceed.\"\n raise ValueError(msg)\n self.model_name = get_model_name(llm_model, display_name=display_name)\n\n # Get memory data\n self.chat_history = await self.get_memory_data()\n if isinstance(self.chat_history, Message):\n self.chat_history = [self.chat_history]\n\n # Add current date tool if enabled\n if self.add_current_date_tool:\n if not isinstance(self.tools, list): # type: ignore[has-type]\n self.tools = []\n current_date_tool = (await CurrentDateComponent(**self.get_base_args()).to_toolkit()).pop(0)\n if not isinstance(current_date_tool, StructuredTool):\n msg = \"CurrentDateComponent must be converted to a StructuredTool\"\n raise TypeError(msg)\n self.tools.append(current_date_tool)\n return llm_model, self.chat_history, self.tools\n\n async def message_response(self) -> Message:\n try:\n llm_model, self.chat_history, self.tools = await self.get_agent_requirements()\n # Set up and run agent\n self.set(\n llm=llm_model,\n tools=self.tools or [],\n chat_history=self.chat_history,\n input_value=self.input_value,\n system_prompt=self.system_prompt,\n )\n agent = self.create_agent_runnable()\n result = await self.run_agent(agent)\n\n # Store result for potential JSON output\n self._agent_result = result\n\n except (ValueError, TypeError, KeyError) as e:\n await logger.aerror(f\"{type(e).__name__}: {e!s}\")\n raise\n except ExceptionWithMessageError as e:\n await logger.aerror(f\"ExceptionWithMessageError occurred: {e}\")\n raise\n # Avoid catching blind Exception; let truly unexpected exceptions propagate\n except Exception as e:\n await logger.aerror(f\"Unexpected error: {e!s}\")\n raise\n else:\n return result\n\n def _preprocess_schema(self, schema):\n \"\"\"Preprocess schema to ensure correct data types for build_model_from_schema.\"\"\"\n processed_schema = []\n for field in schema:\n processed_field = {\n \"name\": str(field.get(\"name\", \"field\")),\n \"type\": str(field.get(\"type\", \"str\")),\n \"description\": str(field.get(\"description\", \"\")),\n \"multiple\": field.get(\"multiple\", False),\n }\n # Ensure multiple is handled correctly\n if isinstance(processed_field[\"multiple\"], str):\n processed_field[\"multiple\"] = processed_field[\"multiple\"].lower() in [\"true\", \"1\", \"t\", \"y\", \"yes\"]\n processed_schema.append(processed_field)\n return processed_schema\n\n async def build_structured_output_base(self, content: str):\n \"\"\"Build structured output with optional BaseModel validation.\"\"\"\n json_pattern = r\"\\{.*\\}\"\n schema_error_msg = \"Try setting an output schema\"\n\n # Try to parse content as JSON first\n json_data = None\n try:\n json_data = json.loads(content)\n except json.JSONDecodeError:\n json_match = re.search(json_pattern, content, re.DOTALL)\n if json_match:\n try:\n json_data = json.loads(json_match.group())\n except json.JSONDecodeError:\n return {\"content\": content, \"error\": schema_error_msg}\n else:\n return {\"content\": content, \"error\": schema_error_msg}\n\n # If no output schema provided, return parsed JSON without validation\n if not hasattr(self, \"output_schema\") or not self.output_schema or len(self.output_schema) == 0:\n return json_data\n\n # Use BaseModel validation with schema\n try:\n processed_schema = self._preprocess_schema(self.output_schema)\n output_model = build_model_from_schema(processed_schema)\n\n # Validate against the schema\n if isinstance(json_data, list):\n # Multiple objects\n validated_objects = []\n for item in json_data:\n try:\n validated_obj = output_model.model_validate(item)\n validated_objects.append(validated_obj.model_dump())\n except ValidationError as e:\n await logger.aerror(f\"Validation error for item: {e}\")\n # Include invalid items with error info\n validated_objects.append({\"data\": item, \"validation_error\": str(e)})\n return validated_objects\n\n # Single object\n try:\n validated_obj = output_model.model_validate(json_data)\n return [validated_obj.model_dump()] # Return as list for consistency\n except ValidationError as e:\n await logger.aerror(f\"Validation error: {e}\")\n return [{\"data\": json_data, \"validation_error\": str(e)}]\n\n except (TypeError, ValueError) as e:\n await logger.aerror(f\"Error building structured output: {e}\")\n # Fallback to parsed JSON without validation\n return json_data\n\n async def json_response(self) -> Data:\n \"\"\"Convert agent response to structured JSON Data output with schema validation.\"\"\"\n # Always use structured chat agent for JSON response mode for better JSON formatting\n try:\n system_components = []\n\n # 1. Agent Instructions (system_prompt)\n agent_instructions = getattr(self, \"system_prompt\", \"\") or \"\"\n if agent_instructions:\n system_components.append(f\"{agent_instructions}\")\n\n # 2. Format Instructions\n format_instructions = getattr(self, \"format_instructions\", \"\") or \"\"\n if format_instructions:\n system_components.append(f\"Format instructions: {format_instructions}\")\n\n # 3. Schema Information from BaseModel\n if hasattr(self, \"output_schema\") and self.output_schema and len(self.output_schema) > 0:\n try:\n processed_schema = self._preprocess_schema(self.output_schema)\n output_model = build_model_from_schema(processed_schema)\n schema_dict = output_model.model_json_schema()\n schema_info = (\n \"You are given some text that may include format instructions, \"\n \"explanations, or other content alongside a JSON schema.\\n\\n\"\n \"Your task:\\n\"\n \"- Extract only the JSON schema.\\n\"\n \"- Return it as valid JSON.\\n\"\n \"- Do not include format instructions, explanations, or extra text.\\n\\n\"\n \"Input:\\n\"\n f\"{json.dumps(schema_dict, indent=2)}\\n\\n\"\n \"Output (only JSON schema):\"\n )\n system_components.append(schema_info)\n except (ValidationError, ValueError, TypeError, KeyError) as e:\n await logger.aerror(f\"Could not build schema for prompt: {e}\", exc_info=True)\n\n # Combine all components\n combined_instructions = \"\\n\\n\".join(system_components) if system_components else \"\"\n llm_model, self.chat_history, self.tools = await self.get_agent_requirements()\n self.set(\n llm=llm_model,\n tools=self.tools or [],\n chat_history=self.chat_history,\n input_value=self.input_value,\n system_prompt=combined_instructions,\n )\n\n # Create and run structured chat agent\n try:\n structured_agent = self.create_agent_runnable()\n except (NotImplementedError, ValueError, TypeError) as e:\n await logger.aerror(f\"Error with structured chat agent: {e}\")\n raise\n try:\n result = await self.run_agent(structured_agent)\n except (ExceptionWithMessageError, ValueError, TypeError, RuntimeError) as e:\n await logger.aerror(f\"Error with structured agent result: {e}\")\n raise\n # Extract content from structured agent result\n if hasattr(result, \"content\"):\n content = result.content\n elif hasattr(result, \"text\"):\n content = result.text\n else:\n content = str(result)\n\n except (ExceptionWithMessageError, ValueError, TypeError, NotImplementedError, AttributeError) as e:\n await logger.aerror(f\"Error with structured chat agent: {e}\")\n # Fallback to regular agent\n content_str = \"No content returned from agent\"\n return Data(data={\"content\": content_str, \"error\": str(e)})\n\n # Process with structured output validation\n try:\n structured_output = await self.build_structured_output_base(content)\n\n # Handle different output formats\n if isinstance(structured_output, list) and structured_output:\n if len(structured_output) == 1:\n return Data(data=structured_output[0])\n return Data(data={\"results\": structured_output})\n if isinstance(structured_output, dict):\n return Data(data=structured_output)\n return Data(data={\"content\": content})\n\n except (ValueError, TypeError) as e:\n await logger.aerror(f\"Error in structured output processing: {e}\")\n return Data(data={\"content\": content, \"error\": str(e)})\n\n async def get_memory_data(self):\n # TODO: This is a temporary fix to avoid message duplication. We should develop a function for this.\n messages = (\n await MemoryComponent(**self.get_base_args())\n .set(session_id=self.graph.session_id, order=\"Ascending\", n_messages=self.n_messages)\n .retrieve_messages()\n )\n return [\n message for message in messages if getattr(message, \"id\", None) != getattr(self.input_value, \"id\", None)\n ]\n\n async def get_llm(self):\n if not isinstance(self.agent_llm, str):\n return self.agent_llm, None\n\n try:\n provider_info = MODEL_PROVIDERS_DICT.get(self.agent_llm)\n if not provider_info:\n msg = f\"Invalid model provider: {self.agent_llm}\"\n raise ValueError(msg)\n\n component_class = provider_info.get(\"component_class\")\n display_name = component_class.display_name\n inputs = provider_info.get(\"inputs\")\n prefix = provider_info.get(\"prefix\", \"\")\n\n return self._build_llm_model(component_class, inputs, prefix), display_name\n\n except (AttributeError, ValueError, TypeError, RuntimeError) as e:\n await logger.aerror(f\"Error building {self.agent_llm} language model: {e!s}\")\n msg = f\"Failed to initialize language model: {e!s}\"\n raise ValueError(msg) from e\n\n def _build_llm_model(self, component, inputs, prefix=\"\"):\n model_kwargs = {}\n for input_ in inputs:\n if hasattr(self, f\"{prefix}{input_.name}\"):\n model_kwargs[input_.name] = getattr(self, f\"{prefix}{input_.name}\")\n return component.set(**model_kwargs).build_model()\n\n def set_component_params(self, component):\n provider_info = MODEL_PROVIDERS_DICT.get(self.agent_llm)\n if provider_info:\n inputs = provider_info.get(\"inputs\")\n prefix = provider_info.get(\"prefix\")\n # Filter out json_mode and only use attributes that exist on this component\n model_kwargs = {}\n for input_ in inputs:\n if hasattr(self, f\"{prefix}{input_.name}\"):\n model_kwargs[input_.name] = getattr(self, f\"{prefix}{input_.name}\")\n\n return component.set(**model_kwargs)\n return component\n\n def delete_fields(self, build_config: dotdict, fields: dict | list[str]) -> None:\n \"\"\"Delete specified fields from build_config.\"\"\"\n for field in fields:\n build_config.pop(field, None)\n\n def update_input_types(self, build_config: dotdict) -> dotdict:\n \"\"\"Update input types for all fields in build_config.\"\"\"\n for key, value in build_config.items():\n if isinstance(value, dict):\n if value.get(\"input_types\") is None:\n build_config[key][\"input_types\"] = []\n elif hasattr(value, \"input_types\") and value.input_types is None:\n value.input_types = []\n return build_config\n\n async def update_build_config(\n self, build_config: dotdict, field_value: str, field_name: str | None = None\n ) -> dotdict:\n # Iterate over all providers in the MODEL_PROVIDERS_DICT\n # Existing logic for updating build_config\n if field_name in (\"agent_llm\",):\n build_config[\"agent_llm\"][\"value\"] = field_value\n provider_info = MODEL_PROVIDERS_DICT.get(field_value)\n if provider_info:\n component_class = provider_info.get(\"component_class\")\n if component_class and hasattr(component_class, \"update_build_config\"):\n # Call the component class's update_build_config method\n build_config = await update_component_build_config(\n component_class, build_config, field_value, \"model_name\"\n )\n\n provider_configs: dict[str, tuple[dict, list[dict]]] = {\n provider: (\n MODEL_PROVIDERS_DICT[provider][\"fields\"],\n [\n MODEL_PROVIDERS_DICT[other_provider][\"fields\"]\n for other_provider in MODEL_PROVIDERS_DICT\n if other_provider != provider\n ],\n )\n for provider in MODEL_PROVIDERS_DICT\n }\n if field_value in provider_configs:\n fields_to_add, fields_to_delete = provider_configs[field_value]\n\n # Delete fields from other providers\n for fields in fields_to_delete:\n self.delete_fields(build_config, fields)\n\n # Add provider-specific fields\n if field_value == \"OpenAI\" and not any(field in build_config for field in fields_to_add):\n build_config.update(fields_to_add)\n else:\n build_config.update(fields_to_add)\n # Reset input types for agent_llm\n build_config[\"agent_llm\"][\"input_types\"] = []\n elif field_value == \"Custom\":\n # Delete all provider fields\n self.delete_fields(build_config, ALL_PROVIDER_FIELDS)\n # Update with custom component\n custom_component = DropdownInput(\n name=\"agent_llm\",\n display_name=\"Language Model\",\n options=[*sorted(MODEL_PROVIDERS), \"Custom\"],\n value=\"Custom\",\n real_time_refresh=True,\n input_types=[\"LanguageModel\"],\n options_metadata=[MODELS_METADATA[key] for key in sorted(MODELS_METADATA.keys())]\n + [{\"icon\": \"brain\"}],\n )\n build_config.update({\"agent_llm\": custom_component.to_dict()})\n # Update input types for all fields\n build_config = self.update_input_types(build_config)\n\n # Validate required keys\n default_keys = [\n \"code\",\n \"_type\",\n \"agent_llm\",\n \"tools\",\n \"input_value\",\n \"add_current_date_tool\",\n \"system_prompt\",\n \"agent_description\",\n \"max_iterations\",\n \"handle_parsing_errors\",\n \"verbose\",\n ]\n missing_keys = [key for key in default_keys if key not in build_config]\n if missing_keys:\n msg = f\"Missing required keys in build_config: {missing_keys}\"\n raise ValueError(msg)\n if (\n isinstance(self.agent_llm, str)\n and self.agent_llm in MODEL_PROVIDERS_DICT\n and field_name in MODEL_DYNAMIC_UPDATE_FIELDS\n ):\n provider_info = MODEL_PROVIDERS_DICT.get(self.agent_llm)\n if provider_info:\n component_class = provider_info.get(\"component_class\")\n component_class = self.set_component_params(component_class)\n prefix = provider_info.get(\"prefix\")\n if component_class and hasattr(component_class, \"update_build_config\"):\n # Call each component class's update_build_config method\n # remove the prefix from the field_name\n if isinstance(field_name, str) and isinstance(prefix, str):\n field_name = field_name.replace(prefix, \"\")\n build_config = await update_component_build_config(\n component_class, build_config, field_value, \"model_name\"\n )\n return dotdict({k: v.to_dict() if hasattr(v, \"to_dict\") else v for k, v in build_config.items()})\n\n async def _get_tools(self) -> list[Tool]:\n component_toolkit = _get_component_toolkit()\n tools_names = self._build_tools_names()\n agent_description = self.get_tool_description()\n # TODO: Agent Description Depreciated Feature to be removed\n description = f\"{agent_description}{tools_names}\"\n tools = component_toolkit(component=self).get_tools(\n tool_name=\"Call_Agent\", tool_description=description, callbacks=self.get_langchain_callbacks()\n )\n if hasattr(self, \"tools_metadata\"):\n tools = component_toolkit(component=self, metadata=self.tools_metadata).update_tools_metadata(tools=tools)\n return tools\n"
+ "value": "import json\nimport re\n\nfrom langchain_core.tools import StructuredTool\nfrom pydantic import ValidationError\n\nfrom langflow.base.agents.agent import LCToolsAgentComponent\nfrom langflow.base.agents.events import ExceptionWithMessageError\nfrom langflow.base.models.model_input_constants import (\n ALL_PROVIDER_FIELDS,\n MODEL_DYNAMIC_UPDATE_FIELDS,\n MODEL_PROVIDERS,\n MODEL_PROVIDERS_DICT,\n MODELS_METADATA,\n)\nfrom langflow.base.models.model_utils import get_model_name\nfrom langflow.components.helpers.current_date import CurrentDateComponent\nfrom langflow.components.helpers.memory import MemoryComponent\nfrom langflow.components.langchain_utilities.tool_calling import ToolCallingAgentComponent\nfrom langflow.custom.custom_component.component import _get_component_toolkit\nfrom langflow.custom.utils import update_component_build_config\nfrom langflow.field_typing import Tool\nfrom langflow.helpers.base_model import build_model_from_schema\nfrom langflow.io import BoolInput, DropdownInput, IntInput, MultilineInput, Output, TableInput\nfrom langflow.logging import logger\nfrom langflow.schema.data import Data\nfrom langflow.schema.dotdict import dotdict\nfrom langflow.schema.message import Message\nfrom langflow.schema.table import EditMode\n\n\ndef set_advanced_true(component_input):\n component_input.advanced = True\n return component_input\n\n\nMODEL_PROVIDERS_LIST = [\"Anthropic\", \"Google Generative AI\", \"Groq\", \"OpenAI\"]\n\n\nclass AgentComponent(ToolCallingAgentComponent):\n display_name: str = \"Agent\"\n description: str = \"Define the agent's instructions, then enter a task to complete using tools.\"\n documentation: str = \"https://docs.langflow.org/agents\"\n icon = \"bot\"\n beta = False\n name = \"Agent\"\n\n memory_inputs = [set_advanced_true(component_input) for component_input in MemoryComponent().inputs]\n\n # Filter out json_mode from OpenAI inputs since we handle structured output differently\n openai_inputs_filtered = [\n input_field\n for input_field in MODEL_PROVIDERS_DICT[\"OpenAI\"][\"inputs\"]\n if not (hasattr(input_field, \"name\") and input_field.name == \"json_mode\")\n ]\n\n inputs = [\n DropdownInput(\n name=\"agent_llm\",\n display_name=\"Model Provider\",\n info=\"The provider of the language model that the agent will use to generate responses.\",\n options=[*MODEL_PROVIDERS_LIST, \"Custom\"],\n value=\"OpenAI\",\n real_time_refresh=True,\n input_types=[],\n options_metadata=[MODELS_METADATA[key] for key in MODEL_PROVIDERS_LIST] + [{\"icon\": \"brain\"}],\n ),\n *openai_inputs_filtered,\n MultilineInput(\n name=\"system_prompt\",\n display_name=\"Agent Instructions\",\n info=\"System Prompt: Initial instructions and context provided to guide the agent's behavior.\",\n value=\"You are a helpful assistant that can use tools to answer questions and perform tasks.\",\n advanced=False,\n ),\n IntInput(\n name=\"n_messages\",\n display_name=\"Number of Chat History Messages\",\n value=100,\n info=\"Number of chat history messages to retrieve.\",\n advanced=True,\n show=True,\n ),\n MultilineInput(\n name=\"format_instructions\",\n display_name=\"Output Format Instructions\",\n info=\"Generic Template for structured output formatting. Valid only with Structured response.\",\n value=(\n \"You are an AI that extracts structured JSON objects from unstructured text. \"\n \"Use a predefined schema with expected types (str, int, float, bool, dict). \"\n \"Extract ALL relevant instances that match the schema - if multiple patterns exist, capture them all. \"\n \"Fill missing or ambiguous values with defaults: null for missing values. \"\n \"Remove exact duplicates but keep variations that have different field values. \"\n \"Always return valid JSON in the expected format, never throw errors. \"\n \"If multiple objects can be extracted, return them all in the structured format.\"\n ),\n advanced=True,\n ),\n TableInput(\n name=\"output_schema\",\n display_name=\"Output Schema\",\n info=(\n \"Schema Validation: Define the structure and data types for structured output. \"\n \"No validation if no output schema.\"\n ),\n advanced=True,\n required=False,\n value=[],\n table_schema=[\n {\n \"name\": \"name\",\n \"display_name\": \"Name\",\n \"type\": \"str\",\n \"description\": \"Specify the name of the output field.\",\n \"default\": \"field\",\n \"edit_mode\": EditMode.INLINE,\n },\n {\n \"name\": \"description\",\n \"display_name\": \"Description\",\n \"type\": \"str\",\n \"description\": \"Describe the purpose of the output field.\",\n \"default\": \"description of field\",\n \"edit_mode\": EditMode.POPOVER,\n },\n {\n \"name\": \"type\",\n \"display_name\": \"Type\",\n \"type\": \"str\",\n \"edit_mode\": EditMode.INLINE,\n \"description\": (\"Indicate the data type of the output field (e.g., str, int, float, bool, dict).\"),\n \"options\": [\"str\", \"int\", \"float\", \"bool\", \"dict\"],\n \"default\": \"str\",\n },\n {\n \"name\": \"multiple\",\n \"display_name\": \"As List\",\n \"type\": \"boolean\",\n \"description\": \"Set to True if this output field should be a list of the specified type.\",\n \"default\": \"False\",\n \"edit_mode\": EditMode.INLINE,\n },\n ],\n ),\n *LCToolsAgentComponent._base_inputs,\n # removed memory inputs from agent component\n # *memory_inputs,\n BoolInput(\n name=\"add_current_date_tool\",\n display_name=\"Current Date\",\n advanced=True,\n info=\"If true, will add a tool to the agent that returns the current date.\",\n value=True,\n ),\n ]\n outputs = [\n Output(name=\"response\", display_name=\"Response\", method=\"message_response\"),\n Output(name=\"structured_response\", display_name=\"Structured Response\", method=\"json_response\", tool_mode=False),\n ]\n\n async def get_agent_requirements(self):\n \"\"\"Get the agent requirements for the agent.\"\"\"\n llm_model, display_name = await self.get_llm()\n if llm_model is None:\n msg = \"No language model selected. Please choose a model to proceed.\"\n raise ValueError(msg)\n self.model_name = get_model_name(llm_model, display_name=display_name)\n\n # Get memory data\n self.chat_history = await self.get_memory_data()\n if isinstance(self.chat_history, Message):\n self.chat_history = [self.chat_history]\n\n # Add current date tool if enabled\n if self.add_current_date_tool:\n if not isinstance(self.tools, list): # type: ignore[has-type]\n self.tools = []\n current_date_tool = (await CurrentDateComponent(**self.get_base_args()).to_toolkit()).pop(0)\n if not isinstance(current_date_tool, StructuredTool):\n msg = \"CurrentDateComponent must be converted to a StructuredTool\"\n raise TypeError(msg)\n self.tools.append(current_date_tool)\n return llm_model, self.chat_history, self.tools\n\n async def message_response(self) -> Message:\n try:\n llm_model, self.chat_history, self.tools = await self.get_agent_requirements()\n # Set up and run agent\n self.set(\n llm=llm_model,\n tools=self.tools or [],\n chat_history=self.chat_history,\n input_value=self.input_value,\n system_prompt=self.system_prompt,\n )\n agent = self.create_agent_runnable()\n result = await self.run_agent(agent)\n\n # Store result for potential JSON output\n self._agent_result = result\n\n except (ValueError, TypeError, KeyError) as e:\n await logger.aerror(f\"{type(e).__name__}: {e!s}\")\n raise\n except ExceptionWithMessageError as e:\n await logger.aerror(f\"ExceptionWithMessageError occurred: {e}\")\n raise\n # Avoid catching blind Exception; let truly unexpected exceptions propagate\n except Exception as e:\n await logger.aerror(f\"Unexpected error: {e!s}\")\n raise\n else:\n return result\n\n def _preprocess_schema(self, schema):\n \"\"\"Preprocess schema to ensure correct data types for build_model_from_schema.\"\"\"\n processed_schema = []\n for field in schema:\n processed_field = {\n \"name\": str(field.get(\"name\", \"field\")),\n \"type\": str(field.get(\"type\", \"str\")),\n \"description\": str(field.get(\"description\", \"\")),\n \"multiple\": field.get(\"multiple\", False),\n }\n # Ensure multiple is handled correctly\n if isinstance(processed_field[\"multiple\"], str):\n processed_field[\"multiple\"] = processed_field[\"multiple\"].lower() in [\"true\", \"1\", \"t\", \"y\", \"yes\"]\n processed_schema.append(processed_field)\n return processed_schema\n\n async def build_structured_output_base(self, content: str):\n \"\"\"Build structured output with optional BaseModel validation.\"\"\"\n json_pattern = r\"\\{.*\\}\"\n schema_error_msg = \"Try setting an output schema\"\n\n # Try to parse content as JSON first\n json_data = None\n try:\n json_data = json.loads(content)\n except json.JSONDecodeError:\n json_match = re.search(json_pattern, content, re.DOTALL)\n if json_match:\n try:\n json_data = json.loads(json_match.group())\n except json.JSONDecodeError:\n return {\"content\": content, \"error\": schema_error_msg}\n else:\n return {\"content\": content, \"error\": schema_error_msg}\n\n # If no output schema provided, return parsed JSON without validation\n if not hasattr(self, \"output_schema\") or not self.output_schema or len(self.output_schema) == 0:\n return json_data\n\n # Use BaseModel validation with schema\n try:\n processed_schema = self._preprocess_schema(self.output_schema)\n output_model = build_model_from_schema(processed_schema)\n\n # Validate against the schema\n if isinstance(json_data, list):\n # Multiple objects\n validated_objects = []\n for item in json_data:\n try:\n validated_obj = output_model.model_validate(item)\n validated_objects.append(validated_obj.model_dump())\n except ValidationError as e:\n await logger.aerror(f\"Validation error for item: {e}\")\n # Include invalid items with error info\n validated_objects.append({\"data\": item, \"validation_error\": str(e)})\n return validated_objects\n\n # Single object\n try:\n validated_obj = output_model.model_validate(json_data)\n return [validated_obj.model_dump()] # Return as list for consistency\n except ValidationError as e:\n await logger.aerror(f\"Validation error: {e}\")\n return [{\"data\": json_data, \"validation_error\": str(e)}]\n\n except (TypeError, ValueError) as e:\n await logger.aerror(f\"Error building structured output: {e}\")\n # Fallback to parsed JSON without validation\n return json_data\n\n async def json_response(self) -> Data:\n \"\"\"Convert agent response to structured JSON Data output with schema validation.\"\"\"\n # Always use structured chat agent for JSON response mode for better JSON formatting\n try:\n system_components = []\n\n # 1. Agent Instructions (system_prompt)\n agent_instructions = getattr(self, \"system_prompt\", \"\") or \"\"\n if agent_instructions:\n system_components.append(f\"{agent_instructions}\")\n\n # 2. Format Instructions\n format_instructions = getattr(self, \"format_instructions\", \"\") or \"\"\n if format_instructions:\n system_components.append(f\"Format instructions: {format_instructions}\")\n\n # 3. Schema Information from BaseModel\n if hasattr(self, \"output_schema\") and self.output_schema and len(self.output_schema) > 0:\n try:\n processed_schema = self._preprocess_schema(self.output_schema)\n output_model = build_model_from_schema(processed_schema)\n schema_dict = output_model.model_json_schema()\n schema_info = (\n \"You are given some text that may include format instructions, \"\n \"explanations, or other content alongside a JSON schema.\\n\\n\"\n \"Your task:\\n\"\n \"- Extract only the JSON schema.\\n\"\n \"- Return it as valid JSON.\\n\"\n \"- Do not include format instructions, explanations, or extra text.\\n\\n\"\n \"Input:\\n\"\n f\"{json.dumps(schema_dict, indent=2)}\\n\\n\"\n \"Output (only JSON schema):\"\n )\n system_components.append(schema_info)\n except (ValidationError, ValueError, TypeError, KeyError) as e:\n await logger.aerror(f\"Could not build schema for prompt: {e}\", exc_info=True)\n\n # Combine all components\n combined_instructions = \"\\n\\n\".join(system_components) if system_components else \"\"\n llm_model, self.chat_history, self.tools = await self.get_agent_requirements()\n self.set(\n llm=llm_model,\n tools=self.tools or [],\n chat_history=self.chat_history,\n input_value=self.input_value,\n system_prompt=combined_instructions,\n )\n\n # Create and run structured chat agent\n try:\n structured_agent = self.create_agent_runnable()\n except (NotImplementedError, ValueError, TypeError) as e:\n await logger.aerror(f\"Error with structured chat agent: {e}\")\n raise\n try:\n result = await self.run_agent(structured_agent)\n except (ExceptionWithMessageError, ValueError, TypeError, RuntimeError) as e:\n await logger.aerror(f\"Error with structured agent result: {e}\")\n raise\n # Extract content from structured agent result\n if hasattr(result, \"content\"):\n content = result.content\n elif hasattr(result, \"text\"):\n content = result.text\n else:\n content = str(result)\n\n except (ExceptionWithMessageError, ValueError, TypeError, NotImplementedError, AttributeError) as e:\n await logger.aerror(f\"Error with structured chat agent: {e}\")\n # Fallback to regular agent\n content_str = \"No content returned from agent\"\n return Data(data={\"content\": content_str, \"error\": str(e)})\n\n # Process with structured output validation\n try:\n structured_output = await self.build_structured_output_base(content)\n\n # Handle different output formats\n if isinstance(structured_output, list) and structured_output:\n if len(structured_output) == 1:\n return Data(data=structured_output[0])\n return Data(data={\"results\": structured_output})\n if isinstance(structured_output, dict):\n return Data(data=structured_output)\n return Data(data={\"content\": content})\n\n except (ValueError, TypeError) as e:\n await logger.aerror(f\"Error in structured output processing: {e}\")\n return Data(data={\"content\": content, \"error\": str(e)})\n\n async def get_memory_data(self):\n # TODO: This is a temporary fix to avoid message duplication. We should develop a function for this.\n messages = (\n await MemoryComponent(**self.get_base_args())\n .set(session_id=self.graph.session_id, order=\"Ascending\", n_messages=self.n_messages)\n .retrieve_messages()\n )\n return [\n message for message in messages if getattr(message, \"id\", None) != getattr(self.input_value, \"id\", None)\n ]\n\n async def get_llm(self):\n if not isinstance(self.agent_llm, str):\n return self.agent_llm, None\n\n try:\n provider_info = MODEL_PROVIDERS_DICT.get(self.agent_llm)\n if not provider_info:\n msg = f\"Invalid model provider: {self.agent_llm}\"\n raise ValueError(msg)\n\n component_class = provider_info.get(\"component_class\")\n display_name = component_class.display_name\n inputs = provider_info.get(\"inputs\")\n prefix = provider_info.get(\"prefix\", \"\")\n\n return self._build_llm_model(component_class, inputs, prefix), display_name\n\n except (AttributeError, ValueError, TypeError, RuntimeError) as e:\n await logger.aerror(f\"Error building {self.agent_llm} language model: {e!s}\")\n msg = f\"Failed to initialize language model: {e!s}\"\n raise ValueError(msg) from e\n\n def _build_llm_model(self, component, inputs, prefix=\"\"):\n model_kwargs = {}\n for input_ in inputs:\n if hasattr(self, f\"{prefix}{input_.name}\"):\n model_kwargs[input_.name] = getattr(self, f\"{prefix}{input_.name}\")\n return component.set(**model_kwargs).build_model()\n\n def set_component_params(self, component):\n provider_info = MODEL_PROVIDERS_DICT.get(self.agent_llm)\n if provider_info:\n inputs = provider_info.get(\"inputs\")\n prefix = provider_info.get(\"prefix\")\n # Filter out json_mode and only use attributes that exist on this component\n model_kwargs = {}\n for input_ in inputs:\n if hasattr(self, f\"{prefix}{input_.name}\"):\n model_kwargs[input_.name] = getattr(self, f\"{prefix}{input_.name}\")\n\n return component.set(**model_kwargs)\n return component\n\n def delete_fields(self, build_config: dotdict, fields: dict | list[str]) -> None:\n \"\"\"Delete specified fields from build_config.\"\"\"\n for field in fields:\n build_config.pop(field, None)\n\n def update_input_types(self, build_config: dotdict) -> dotdict:\n \"\"\"Update input types for all fields in build_config.\"\"\"\n for key, value in build_config.items():\n if isinstance(value, dict):\n if value.get(\"input_types\") is None:\n build_config[key][\"input_types\"] = []\n elif hasattr(value, \"input_types\") and value.input_types is None:\n value.input_types = []\n return build_config\n\n async def update_build_config(\n self, build_config: dotdict, field_value: str, field_name: str | None = None\n ) -> dotdict:\n # Iterate over all providers in the MODEL_PROVIDERS_DICT\n # Existing logic for updating build_config\n if field_name in (\"agent_llm\",):\n build_config[\"agent_llm\"][\"value\"] = field_value\n provider_info = MODEL_PROVIDERS_DICT.get(field_value)\n if provider_info:\n component_class = provider_info.get(\"component_class\")\n if component_class and hasattr(component_class, \"update_build_config\"):\n # Call the component class's update_build_config method\n build_config = await update_component_build_config(\n component_class, build_config, field_value, \"model_name\"\n )\n\n provider_configs: dict[str, tuple[dict, list[dict]]] = {\n provider: (\n MODEL_PROVIDERS_DICT[provider][\"fields\"],\n [\n MODEL_PROVIDERS_DICT[other_provider][\"fields\"]\n for other_provider in MODEL_PROVIDERS_DICT\n if other_provider != provider\n ],\n )\n for provider in MODEL_PROVIDERS_DICT\n }\n if field_value in provider_configs:\n fields_to_add, fields_to_delete = provider_configs[field_value]\n\n # Delete fields from other providers\n for fields in fields_to_delete:\n self.delete_fields(build_config, fields)\n\n # Add provider-specific fields\n if field_value == \"OpenAI\" and not any(field in build_config for field in fields_to_add):\n build_config.update(fields_to_add)\n else:\n build_config.update(fields_to_add)\n # Reset input types for agent_llm\n build_config[\"agent_llm\"][\"input_types\"] = []\n build_config[\"agent_llm\"][\"display_name\"] = \"Model Provider\"\n elif field_value == \"Custom\":\n # Delete all provider fields\n self.delete_fields(build_config, ALL_PROVIDER_FIELDS)\n # Update with custom component\n custom_component = DropdownInput(\n name=\"agent_llm\",\n display_name=\"Language Model\",\n options=[*sorted(MODEL_PROVIDERS), \"Custom\"],\n value=\"Custom\",\n real_time_refresh=True,\n input_types=[\"LanguageModel\"],\n options_metadata=[MODELS_METADATA[key] for key in sorted(MODELS_METADATA.keys())]\n + [{\"icon\": \"brain\"}],\n )\n build_config.update({\"agent_llm\": custom_component.to_dict()})\n # Update input types for all fields\n build_config = self.update_input_types(build_config)\n\n # Validate required keys\n default_keys = [\n \"code\",\n \"_type\",\n \"agent_llm\",\n \"tools\",\n \"input_value\",\n \"add_current_date_tool\",\n \"system_prompt\",\n \"agent_description\",\n \"max_iterations\",\n \"handle_parsing_errors\",\n \"verbose\",\n ]\n missing_keys = [key for key in default_keys if key not in build_config]\n if missing_keys:\n msg = f\"Missing required keys in build_config: {missing_keys}\"\n raise ValueError(msg)\n if (\n isinstance(self.agent_llm, str)\n and self.agent_llm in MODEL_PROVIDERS_DICT\n and field_name in MODEL_DYNAMIC_UPDATE_FIELDS\n ):\n provider_info = MODEL_PROVIDERS_DICT.get(self.agent_llm)\n if provider_info:\n component_class = provider_info.get(\"component_class\")\n component_class = self.set_component_params(component_class)\n prefix = provider_info.get(\"prefix\")\n if component_class and hasattr(component_class, \"update_build_config\"):\n # Call each component class's update_build_config method\n # remove the prefix from the field_name\n if isinstance(field_name, str) and isinstance(prefix, str):\n field_name = field_name.replace(prefix, \"\")\n build_config = await update_component_build_config(\n component_class, build_config, field_value, \"model_name\"\n )\n return dotdict({k: v.to_dict() if hasattr(v, \"to_dict\") else v for k, v in build_config.items()})\n\n async def _get_tools(self) -> list[Tool]:\n component_toolkit = _get_component_toolkit()\n tools_names = self._build_tools_names()\n agent_description = self.get_tool_description()\n # TODO: Agent Description Depreciated Feature to be removed\n description = f\"{agent_description}{tools_names}\"\n tools = component_toolkit(component=self).get_tools(\n tool_name=\"Call_Agent\", tool_description=description, callbacks=self.get_langchain_callbacks()\n )\n if hasattr(self, \"tools_metadata\"):\n tools = component_toolkit(component=self, metadata=self.tools_metadata).update_tools_metadata(tools=tools)\n return tools\n"
},
"handle_parsing_errors": {
"_input_type": "BoolInput",
diff --git a/src/backend/base/pyproject.toml b/src/backend/base/pyproject.toml
index 11d6e8999..6854703de 100644
--- a/src/backend/base/pyproject.toml
+++ b/src/backend/base/pyproject.toml
@@ -1,6 +1,6 @@
[project]
name = "langflow-base"
-version = "0.5.0.post2"
+version = "0.6.0"
description = "A Python package with a built-in web application"
requires-python = ">=3.10,<3.14"
license = "MIT"
diff --git a/src/backend/tests/unit/base/data/test_kb_utils.py b/src/backend/tests/unit/base/data/test_kb_utils.py
index 0d6b3441e..806a161fe 100644
--- a/src/backend/tests/unit/base/data/test_kb_utils.py
+++ b/src/backend/tests/unit/base/data/test_kb_utils.py
@@ -1,5 +1,5 @@
import pytest
-from langflow.base.data.kb_utils import compute_bm25, compute_tfidf
+from langflow.base.knowledge_bases.knowledge_base_utils import compute_bm25, compute_tfidf
class TestKBUtils:
diff --git a/src/backend/tests/unit/base/mcp/test_mcp_util.py b/src/backend/tests/unit/base/mcp/test_mcp_util.py
index 3e81a0978..074103b75 100644
--- a/src/backend/tests/unit/base/mcp/test_mcp_util.py
+++ b/src/backend/tests/unit/base/mcp/test_mcp_util.py
@@ -594,6 +594,11 @@ class TestMCPSseClientWithDeepWikiServer:
# Test valid URL
valid_url = "https://mcp.deepwiki.com/sse"
is_valid, error = await sse_client.validate_url(valid_url)
+ # Either valid or accessible, or rate-limited (429) which indicates server is reachable
+ if not is_valid and "429" in error:
+ # Rate limiting indicates the server is accessible but limiting requests
+ # This is a transient network issue, not a test failure
+ pytest.skip(f"DeepWiki server is rate limiting requests: {error}")
assert is_valid or error == "" # Either valid or accessible
# Test invalid URL
diff --git a/src/backend/tests/unit/components/knowledge_bases/__init__.py b/src/backend/tests/unit/components/knowledge_bases/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/src/backend/tests/unit/components/data/test_kb_ingest.py b/src/backend/tests/unit/components/knowledge_bases/test_ingestion.py
similarity index 92%
rename from src/backend/tests/unit/components/data/test_kb_ingest.py
rename to src/backend/tests/unit/components/knowledge_bases/test_ingestion.py
index ad9c2afe3..c97e4a764 100644
--- a/src/backend/tests/unit/components/data/test_kb_ingest.py
+++ b/src/backend/tests/unit/components/knowledge_bases/test_ingestion.py
@@ -2,25 +2,25 @@ import json
import uuid
from unittest.mock import AsyncMock, MagicMock, patch
-import pandas as pd
import pytest
-from langflow.base.data.kb_utils import get_knowledge_bases
-from langflow.components.data.kb_ingest import KBIngestionComponent
+from langflow.base.knowledge_bases.knowledge_base_utils import get_knowledge_bases
+from langflow.components.knowledge_bases.ingestion import KnowledgeIngestionComponent
from langflow.schema.data import Data
+from langflow.schema.dataframe import DataFrame
from tests.base import ComponentTestBaseWithoutClient
-class TestKBIngestionComponent(ComponentTestBaseWithoutClient):
+class TestKnowledgeIngestionComponent(ComponentTestBaseWithoutClient):
@pytest.fixture
def component_class(self):
"""Return the component class to test."""
- return KBIngestionComponent
+ return KnowledgeIngestionComponent
@pytest.fixture(autouse=True)
def mock_knowledge_base_path(self, tmp_path):
"""Mock the knowledge base root path directly."""
- with patch("langflow.components.data.kb_ingest.KNOWLEDGE_BASES_ROOT_PATH", tmp_path):
+ with patch("langflow.components.knowledge_bases.ingestion.KNOWLEDGE_BASES_ROOT_PATH", tmp_path):
yield
class MockUser:
@@ -39,14 +39,14 @@ class TestKBIngestionComponent(ComponentTestBaseWithoutClient):
def setup_mocks(self, mock_user_data):
"""Mock the component's user_id attribute and User object."""
with (
- patch.object(KBIngestionComponent, "user_id", mock_user_data["user_id"]),
+ patch.object(KnowledgeIngestionComponent, "user_id", mock_user_data["user_id"]),
patch(
- "langflow.components.data.kb_ingest.get_user_by_id",
+ "langflow.components.knowledge_bases.ingestion.get_user_by_id",
new_callable=AsyncMock,
return_value=mock_user_data["user_obj"],
),
patch(
- "langflow.base.data.kb_utils.get_user_by_id",
+ "langflow.base.knowledge_bases.knowledge_base_utils.get_user_by_id",
new_callable=AsyncMock,
return_value=mock_user_data["user_obj"],
),
@@ -62,7 +62,7 @@ class TestKBIngestionComponent(ComponentTestBaseWithoutClient):
def default_kwargs(self, tmp_path, mock_user_id):
"""Return default kwargs for component instantiation."""
# Create a sample DataFrame
- data_df = pd.DataFrame(
+ data_df = DataFrame(
{"text": ["Sample text 1", "Sample text 2"], "title": ["Title 1", "Title 2"], "category": ["cat1", "cat2"]}
)
@@ -209,8 +209,8 @@ class TestKBIngestionComponent(ComponentTestBaseWithoutClient):
with pytest.raises(NotImplementedError, match="Custom embedding models not yet supported"):
component._build_embeddings("custom-model", "test-key")
- @patch("langflow.components.data.kb_ingest.get_settings_service")
- @patch("langflow.components.data.kb_ingest.encrypt_api_key")
+ @patch("langflow.components.knowledge_bases.ingestion.get_settings_service")
+ @patch("langflow.components.knowledge_bases.ingestion.encrypt_api_key")
def test_build_embedding_metadata(self, mock_encrypt, mock_get_settings, component_class, default_kwargs):
"""Test building embedding metadata."""
component = component_class(**default_kwargs)
@@ -250,7 +250,7 @@ class TestKBIngestionComponent(ComponentTestBaseWithoutClient):
config_list = default_kwargs["column_config"]
# Mock Chroma to avoid actual vector store operations
- with patch("langflow.components.data.kb_ingest.Chroma") as mock_chroma:
+ with patch("langflow.components.knowledge_bases.ingestion.Chroma") as mock_chroma:
mock_chroma_instance = MagicMock()
mock_chroma_instance.get.return_value = {"metadatas": []}
mock_chroma.return_value = mock_chroma_instance
@@ -275,7 +275,7 @@ class TestKBIngestionComponent(ComponentTestBaseWithoutClient):
config_list = default_kwargs["column_config"]
# Mock Chroma with existing hash
- with patch("langflow.components.data.kb_ingest.Chroma") as mock_chroma:
+ with patch("langflow.components.knowledge_bases.ingestion.Chroma") as mock_chroma:
# Simulate existing document with same hash
existing_hash = "some_existing_hash"
mock_chroma_instance = MagicMock()
@@ -283,7 +283,7 @@ class TestKBIngestionComponent(ComponentTestBaseWithoutClient):
mock_chroma.return_value = mock_chroma_instance
# Mock hashlib to return the existing hash for first row
- with patch("langflow.components.data.kb_ingest.hashlib.sha256") as mock_hash:
+ with patch("langflow.components.knowledge_bases.ingestion.hashlib.sha256") as mock_hash:
mock_hash_obj = MagicMock()
mock_hash_obj.hexdigest.side_effect = [existing_hash, "different_hash"]
mock_hash.return_value = mock_hash_obj
@@ -309,8 +309,8 @@ class TestKBIngestionComponent(ComponentTestBaseWithoutClient):
assert component.is_valid_collection_name("invalid_") is False # Ends with underscore
assert component.is_valid_collection_name("invalid@name") is False # Invalid character
- @patch("langflow.components.data.kb_ingest.json.loads")
- @patch("langflow.components.data.kb_ingest.decrypt_api_key")
+ @patch("langflow.components.knowledge_bases.ingestion.json.loads")
+ @patch("langflow.components.knowledge_bases.ingestion.decrypt_api_key")
async def test_build_kb_info_success(self, mock_decrypt, mock_json_loads, component_class, default_kwargs):
"""Test successful KB info building."""
component = component_class(**default_kwargs)
diff --git a/src/backend/tests/unit/components/data/test_kb_retrieval.py b/src/backend/tests/unit/components/knowledge_bases/test_retrieval.py
similarity index 90%
rename from src/backend/tests/unit/components/data/test_kb_retrieval.py
rename to src/backend/tests/unit/components/knowledge_bases/test_retrieval.py
index d40d2070a..1f46facdd 100644
--- a/src/backend/tests/unit/components/data/test_kb_retrieval.py
+++ b/src/backend/tests/unit/components/knowledge_bases/test_retrieval.py
@@ -5,23 +5,23 @@ from pathlib import Path
from unittest.mock import AsyncMock, MagicMock, patch
import pytest
-from langflow.base.data.kb_utils import get_knowledge_bases
-from langflow.components.data.kb_retrieval import KBRetrievalComponent
+from langflow.base.knowledge_bases.knowledge_base_utils import get_knowledge_bases
+from langflow.components.knowledge_bases.retrieval import KnowledgeRetrievalComponent
from pydantic import SecretStr
from tests.base import ComponentTestBaseWithoutClient
-class TestKBRetrievalComponent(ComponentTestBaseWithoutClient):
+class TestKnowledgeRetrievalComponent(ComponentTestBaseWithoutClient):
@pytest.fixture
def component_class(self):
"""Return the component class to test."""
- return KBRetrievalComponent
+ return KnowledgeRetrievalComponent
@pytest.fixture(autouse=True)
def mock_knowledge_base_path(self, tmp_path):
"""Mock the knowledge base root path directly."""
- with patch("langflow.components.data.kb_retrieval.KNOWLEDGE_BASES_ROOT_PATH", tmp_path):
+ with patch("langflow.components.knowledge_bases.retrieval.KNOWLEDGE_BASES_ROOT_PATH", tmp_path):
yield
class MockUser:
@@ -40,14 +40,14 @@ class TestKBRetrievalComponent(ComponentTestBaseWithoutClient):
def setup_mocks(self, mock_user_data):
"""Mock the component's user_id attribute and User object."""
with (
- patch.object(KBRetrievalComponent, "user_id", mock_user_data["user_id"]),
+ patch.object(KnowledgeRetrievalComponent, "user_id", mock_user_data["user_id"]),
patch(
- "langflow.components.data.kb_retrieval.get_user_by_id",
+ "langflow.components.knowledge_bases.retrieval.get_user_by_id",
new_callable=AsyncMock,
return_value=mock_user_data["user_obj"],
),
patch(
- "langflow.base.data.kb_utils.get_user_by_id",
+ "langflow.base.knowledge_bases.knowledge_base_utils.get_user_by_id",
new_callable=AsyncMock,
return_value=mock_user_data["user_obj"],
),
@@ -138,7 +138,7 @@ class TestKBRetrievalComponent(ComponentTestBaseWithoutClient):
component = component_class(**default_kwargs)
kb_path = Path(default_kwargs["kb_root_path"]) / mock_user_id["user"] / default_kwargs["knowledge_base"]
- with patch("langflow.components.data.kb_retrieval.decrypt_api_key") as mock_decrypt:
+ with patch("langflow.components.knowledge_bases.retrieval.decrypt_api_key") as mock_decrypt:
mock_decrypt.return_value = "decrypted_key"
metadata = component._get_kb_metadata(kb_path)
@@ -185,7 +185,7 @@ class TestKBRetrievalComponent(ComponentTestBaseWithoutClient):
}
(kb_path / "embedding_metadata.json").write_text(json.dumps(metadata))
- with patch("langflow.components.data.kb_retrieval.decrypt_api_key") as mock_decrypt:
+ with patch("langflow.components.knowledge_bases.retrieval.decrypt_api_key") as mock_decrypt:
mock_decrypt.side_effect = ValueError("Decryption failed")
result = component._get_kb_metadata(kb_path)
@@ -327,7 +327,7 @@ class TestKBRetrievalComponent(ComponentTestBaseWithoutClient):
chunk_size=1000,
)
- async def test_get_chroma_kb_data_no_metadata(self, component_class, default_kwargs, tmp_path, mock_user_id):
+ async def test_retrieve_data_no_metadata(self, component_class, default_kwargs, tmp_path, mock_user_id):
"""Test retrieving data when metadata is missing."""
# Remove metadata file
kb_path = tmp_path / mock_user_id["user"] / default_kwargs["knowledge_base"]
@@ -338,10 +338,10 @@ class TestKBRetrievalComponent(ComponentTestBaseWithoutClient):
component = component_class(**default_kwargs)
with pytest.raises(ValueError, match="Metadata not found for knowledge base"):
- await component.get_chroma_kb_data()
+ await component.retrieve_data()
- def test_get_chroma_kb_data_path_construction(self, component_class, default_kwargs):
- """Test that get_chroma_kb_data constructs the correct paths."""
+ def test_retrieve_data_path_construction(self, component_class, default_kwargs):
+ """Test that retrieve_data constructs the correct paths."""
component = component_class(**default_kwargs)
# Test that the component correctly builds the KB path
@@ -354,17 +354,17 @@ class TestKBRetrievalComponent(ComponentTestBaseWithoutClient):
assert expanded_path.exists() # tmp_path should exist
# Verify method exists with correct parameters
- assert hasattr(component, "get_chroma_kb_data")
+ assert hasattr(component, "retrieve_data")
assert hasattr(component, "search_query")
assert hasattr(component, "top_k")
assert hasattr(component, "include_embeddings")
- async def test_get_chroma_kb_data_method_exists(self, component_class, default_kwargs):
- """Test that get_chroma_kb_data method exists and can be called."""
+ async def test_retrieve_data_method_exists(self, component_class, default_kwargs):
+ """Test that retrieve_data method exists and can be called."""
component = component_class(**default_kwargs)
# Just verify the method exists and has the right signature
- assert hasattr(component, "get_chroma_kb_data"), "Component should have get_chroma_kb_data method"
+ assert hasattr(component, "retrieve_data"), "Component should have retrieve_data method"
# Mock all external calls to avoid integration issues
with (
@@ -377,7 +377,7 @@ class TestKBRetrievalComponent(ComponentTestBaseWithoutClient):
# This is a unit test focused on the component's internal logic
with contextlib.suppress(Exception):
- await component.get_chroma_kb_data()
+ await component.retrieve_data()
# Verify internal methods were called
mock_get_metadata.assert_called_once()
diff --git a/src/frontend/src/utils/styleUtils.ts b/src/frontend/src/utils/styleUtils.ts
index 02c80e5cf..20add9b96 100644
--- a/src/frontend/src/utils/styleUtils.ts
+++ b/src/frontend/src/utils/styleUtils.ts
@@ -211,6 +211,11 @@ export const SIDEBAR_CATEGORIES = [
{ display_name: "Agents", name: "agents", icon: "Bot" },
{ display_name: "Models", name: "models", icon: "BrainCog" },
{ display_name: "Data", name: "data", icon: "Database" },
+ {
+ display_name: "Knowledge Bases",
+ name: "knowledge_bases",
+ icon: "Library",
+ },
{ display_name: "Vector Stores", name: "vectorstores", icon: "Layers" },
{ display_name: "Processing", name: "processing", icon: "ListFilter" },
{ display_name: "Logic", name: "logic", icon: "ArrowRightLeft" },
diff --git a/src/frontend/tests/core/integrations/starter-projects.spec.ts b/src/frontend/tests/core/integrations/starter-projects.spec.ts
index eea3ef605..c768b4b3a 100644
--- a/src/frontend/tests/core/integrations/starter-projects.spec.ts
+++ b/src/frontend/tests/core/integrations/starter-projects.spec.ts
@@ -97,7 +97,9 @@ test(
const nodesFromServer = astraStarterProject?.data.nodes.length;
expect(
- edges === edgesFromServer || edges === edgesFromServer - 1,
+ edges === edgesFromServer ||
+ edges === edgesFromServer - 1 ||
+ edges === edgesFromServer - 2,
).toBeTruthy();
expect(nodes).toBe(nodesFromServer);
},
diff --git a/uv.lock b/uv.lock
index ba8fe500b..f00cda8ba 100644
--- a/uv.lock
+++ b/uv.lock
@@ -4975,7 +4975,7 @@ wheels = [
[[package]]
name = "langflow"
-version = "1.5.0.post2"
+version = "1.6.0"
source = { editable = "." }
dependencies = [
{ name = "aiofile" },
@@ -5360,7 +5360,7 @@ dev = [
[[package]]
name = "langflow-base"
-version = "0.5.0.post2"
+version = "0.6.0"
source = { editable = "src/backend/base" }
dependencies = [
{ name = "aiofile" },