feat: Add dual output support to Agent component with structured JSON parsing (#8836)

* feat: Add dual output support to Agent component with structured JSON parsing ## Summary - Add "Structured Response" output alongside existing "Response" output - Filter out conflicting json_mode field from OpenAI inputs - Implement robust JSON parsing with fallback handling ## Changes Made ### Agent Component (agent.py) - Add second output: "Structured Response" (Data type) with tool_mode=False - Filter json_mode from OpenAI inputs to prevent UI conflicts - Add json_response() method with multi-stage JSON parsing: - Direct JSON parsing for valid responses - Regex extraction for embedded JSON in text - Graceful error handling with diagnostic info - Share execution between outputs (no duplicate agent runs) - Fix model building to handle missing json_mode attribute ### Tests (test_agent_component.py) - Add 9 comprehensive test cases covering: - Dual output structure validation - Input filtering verification - JSON parsing (valid, embedded, error cases) - Model building without json_mode - Shared execution efficiency - Frontend node structure - Component initialization ## Benefits - Users get both Message and Data output types to choose from - Clean UI without confusing duplicate JSON toggles - Robust JSON parsing handles various response formats - Efficient single-execution approach - Maintains backward compatibility 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com> * [autofix.ci] apply automated fixes * update to templates with model list update * [autofix.ci] apply automated fixes * Update test_agent_component.py * update to the test and update to templates * [autofix.ci] apply automated fixes --------- Co-authored-by: Claude <noreply@anthropic.com> Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com> Co-authored-by: Edwin Jose <edwin.jose@datastax.com>
2025-07-21 13:33:30 -03:00 · 2025-07-21 13:33:30 -03:00 · 3bb22b29cc
commit 3bb22b29cc
parent 400ff562c4
18 changed files with 400 additions and 251 deletions
--- a/src/backend/base/langflow/base/models/openai_constants.py
+++ b/src/backend/base/langflow/base/models/openai_constants.py
@ -8,7 +8,9 @@ OPENAI_MODELS_DETAILED = [
    create_model_metadata(provider="OpenAI", name="gpt-4.1", icon="OpenAI", tool_calling=True),
    create_model_metadata(provider="OpenAI", name="gpt-4.1-mini", icon="OpenAI", tool_calling=True),
    create_model_metadata(provider="OpenAI", name="gpt-4.1-nano", icon="OpenAI", tool_calling=True),
-    create_model_metadata(provider="OpenAI", name="gpt-4.5-preview", icon="OpenAI", tool_calling=True, preview=True),
+    create_model_metadata(
+        provider="OpenAI", name="gpt-4.5-preview", icon="OpenAI", tool_calling=True, preview=True, not_supported=True
+    ),
    create_model_metadata(provider="OpenAI", name="gpt-4-turbo", icon="OpenAI", tool_calling=True),
    create_model_metadata(
        provider="OpenAI", name="gpt-4-turbo-preview", icon="OpenAI", tool_calling=True, preview=True
@ -17,8 +19,8 @@ OPENAI_MODELS_DETAILED = [
    create_model_metadata(provider="OpenAI", name="gpt-3.5-turbo", icon="OpenAI", tool_calling=True),
    # Reasoning Models
    create_model_metadata(provider="OpenAI", name="o1", icon="OpenAI", reasoning=True),
-    create_model_metadata(provider="OpenAI", name="o1-mini", icon="OpenAI", reasoning=True),
-    create_model_metadata(provider="OpenAI", name="o1-pro", icon="OpenAI", reasoning=True),
+    create_model_metadata(provider="OpenAI", name="o1-mini", icon="OpenAI", reasoning=True, not_supported=True),
+    create_model_metadata(provider="OpenAI", name="o1-pro", icon="OpenAI", reasoning=True, not_supported=True),
    create_model_metadata(provider="OpenAI", name="o3-mini", icon="OpenAI", reasoning=True),
    create_model_metadata(provider="OpenAI", name="o3", icon="OpenAI", reasoning=True),
    create_model_metadata(provider="OpenAI", name="o3-pro", icon="OpenAI", reasoning=True),
--- a/src/backend/base/langflow/components/agents/agent.py
+++ b/src/backend/base/langflow/components/agents/agent.py
@ -1,3 +1,6 @@
+import json
+import re
+
 from langchain_core.tools import StructuredTool

 from langflow.base.agents.agent import LCToolsAgentComponent
@ -18,6 +21,7 @@ from langflow.custom.utils import update_component_build_config
 from langflow.field_typing import Tool
 from langflow.io import BoolInput, DropdownInput, IntInput, MultilineInput, Output
 from langflow.logging import logger
+from langflow.schema.data import Data
 from langflow.schema.dotdict import dotdict
 from langflow.schema.message import Message

@ -40,6 +44,13 @@ class AgentComponent(ToolCallingAgentComponent):

    memory_inputs = [set_advanced_true(component_input) for component_input in MemoryComponent().inputs]

+    # Filter out json_mode from OpenAI inputs since we handle structured output differently
+    openai_inputs_filtered = [
+        input_field
+        for input_field in MODEL_PROVIDERS_DICT["OpenAI"]["inputs"]
+        if not (hasattr(input_field, "name") and input_field.name == "json_mode")
+    ]
+
    inputs = [
        DropdownInput(
            name="agent_llm",
@ -51,7 +62,7 @@ class AgentComponent(ToolCallingAgentComponent):
            input_types=[],
            options_metadata=[MODELS_METADATA[key] for key in MODEL_PROVIDERS_LIST] + [{"icon": "brain"}],
        ),
-        *MODEL_PROVIDERS_DICT["OpenAI"]["inputs"],
+        *openai_inputs_filtered,
        MultilineInput(
            name="system_prompt",
            display_name="Agent Instructions",
@ -78,7 +89,10 @@ class AgentComponent(ToolCallingAgentComponent):
            value=True,
        ),
    ]
-    outputs = [Output(name="response", display_name="Response", method="message_response")]
+    outputs = [
+        Output(name="response", display_name="Response", method="message_response"),
+        Output(name="structured_response", display_name="Structured Response", method="json_response", tool_mode=False),
+    ]

    async def message_response(self) -> Message:
        try:
@ -114,7 +128,11 @@ class AgentComponent(ToolCallingAgentComponent):
                system_prompt=self.system_prompt,
            )
            agent = self.create_agent_runnable()
-            return await self.run_agent(agent)
+            result = await self.run_agent(agent)
+
+            # Store result for potential JSON output
+            self._agent_result = result
+            # return result

        except (ValueError, TypeError, KeyError) as e:
            logger.error(f"{type(e).__name__}: {e!s}")
@ -125,6 +143,41 @@ class AgentComponent(ToolCallingAgentComponent):
        except Exception as e:
            logger.error(f"Unexpected error: {e!s}")
            raise
+        else:
+            return result
+
+    async def json_response(self) -> Data:
+        """Convert agent response to structured JSON Data output."""
+        # Run the regular message response first to get the result
+        if not hasattr(self, "_agent_result"):
+            await self.message_response()
+
+        result = self._agent_result
+
+        # Extract content from result
+        if hasattr(result, "content"):
+            content = result.content
+        elif hasattr(result, "text"):
+            content = result.text
+        else:
+            content = str(result)
+
+        # Try to parse as JSON
+        try:
+            json_data = json.loads(content)
+            return Data(data=json_data)
+        except json.JSONDecodeError:
+            # If it's not valid JSON, try to extract JSON from the content
+            json_match = re.search(r"\{.*\}", content, re.DOTALL)
+            if json_match:
+                try:
+                    json_data = json.loads(json_match.group())
+                    return Data(data=json_data)
+                except json.JSONDecodeError:
+                    pass
+
+            # If we can't extract JSON, return the raw content as data
+            return Data(data={"content": content, "error": "Could not parse as JSON"})

    async def get_memory_data(self):
        # TODO: This is a temporary fix to avoid message duplication. We should develop a function for this.
@ -171,7 +224,11 @@ class AgentComponent(ToolCallingAgentComponent):
        if provider_info:
            inputs = provider_info.get("inputs")
            prefix = provider_info.get("prefix")
-            model_kwargs = {input_.name: getattr(self, f"{prefix}{input_.name}") for input_ in inputs}
+            # Filter out json_mode and only use attributes that exist on this component
+            model_kwargs = {}
+            for input_ in inputs:
+                if hasattr(self, f"{prefix}{input_.name}"):
+                    model_kwargs[input_.name] = getattr(self, f"{prefix}{input_.name}")

            return component.set(**model_kwargs)
        return component
--- a/src/backend/base/langflow/initial_setup/starter_projects/Instagram
+++ b/src/backend/base/langflow/initial_setup/starter_projects/Instagram
--- a/src/backend/base/langflow/initial_setup/starter_projects/Invoice
+++ b/src/backend/base/langflow/initial_setup/starter_projects/Invoice
--- a/src/backend/base/langflow/initial_setup/starter_projects/Market
+++ b/src/backend/base/langflow/initial_setup/starter_projects/Market
--- a/src/backend/base/langflow/initial_setup/starter_projects/News
+++ b/src/backend/base/langflow/initial_setup/starter_projects/News
--- a/src/backend/base/langflow/initial_setup/starter_projects/Nvidia
+++ b/src/backend/base/langflow/initial_setup/starter_projects/Nvidia
--- a/src/backend/base/langflow/initial_setup/starter_projects/Pokédex
+++ b/src/backend/base/langflow/initial_setup/starter_projects/Pokédex
--- a/src/backend/base/langflow/initial_setup/starter_projects/Price
+++ b/src/backend/base/langflow/initial_setup/starter_projects/Price
--- a/src/backend/base/langflow/initial_setup/starter_projects/Research
+++ b/src/backend/base/langflow/initial_setup/starter_projects/Research
--- a/src/backend/base/langflow/initial_setup/starter_projects/SaaS
+++ b/src/backend/base/langflow/initial_setup/starter_projects/SaaS
--- a/src/backend/base/langflow/initial_setup/starter_projects/Search
+++ b/src/backend/base/langflow/initial_setup/starter_projects/Search
--- a/src/backend/base/langflow/initial_setup/starter_projects/Sequential
+++ b/src/backend/base/langflow/initial_setup/starter_projects/Sequential
--- a/src/backend/base/langflow/initial_setup/starter_projects/Simple
+++ b/src/backend/base/langflow/initial_setup/starter_projects/Simple
--- a/src/backend/base/langflow/initial_setup/starter_projects/Social
+++ b/src/backend/base/langflow/initial_setup/starter_projects/Social
--- a/src/backend/base/langflow/initial_setup/starter_projects/Travel
+++ b/src/backend/base/langflow/initial_setup/starter_projects/Travel
--- a/src/backend/base/langflow/initial_setup/starter_projects/Youtube
+++ b/src/backend/base/langflow/initial_setup/starter_projects/Youtube
--- a/src/backend/tests/unit/components/agents/test_agent_component.py
+++ b/src/backend/tests/unit/components/agents/test_agent_component.py
@ -1,6 +1,5 @@
 import os
 from typing import Any
-from unittest.mock import AsyncMock, patch
 from uuid import uuid4

 import pytest
@ -15,7 +14,6 @@ from langflow.base.models.openai_constants import (
 from langflow.components.agents.agent import AgentComponent
 from langflow.components.tools.calculator import CalculatorToolComponent
 from langflow.custom import Component
-from langflow.services.database.session import NoopSession

 from tests.base import ComponentTestBaseWithClient, ComponentTestBaseWithoutClient
 from tests.unit.mock_language_model import MockLanguageModel
@ -23,7 +21,7 @@ from tests.unit.mock_language_model import MockLanguageModel
 # Load environment variables from .env file


-class TestAgentComponentWithoutClient(ComponentTestBaseWithoutClient):
+class TestAgentComponent(ComponentTestBaseWithoutClient):
    @pytest.fixture
    def component_class(self):
        return AgentComponent
@ -101,6 +99,157 @@ class TestAgentComponentWithoutClient(ComponentTestBaseWithoutClient):
        # Verify model_name field is cleared for Custom
        assert "model_name" not in updated_config

+    async def test_agent_has_dual_outputs(self, component_class, default_kwargs):
+        """Test that Agent component has both Response and Structured Response outputs."""
+        component = await self.component_setup(component_class, default_kwargs)
+
+        assert len(component.outputs) == 2
+        assert component.outputs[0].name == "response"
+        assert component.outputs[0].display_name == "Response"
+        assert component.outputs[0].method == "message_response"
+
+        assert component.outputs[1].name == "structured_response"
+        assert component.outputs[1].display_name == "Structured Response"
+        assert component.outputs[1].method == "json_response"
+        assert component.outputs[1].tool_mode is False
+
+    async def test_json_mode_filtered_from_openai_inputs(self, component_class, default_kwargs):
+        """Test that json_mode is filtered out from OpenAI inputs."""
+        component = await self.component_setup(component_class, default_kwargs)
+
+        # Check that json_mode is not in the agent's inputs
+        input_names = [inp.name for inp in component.inputs if hasattr(inp, "name")]
+        assert "json_mode" not in input_names
+
+        # Verify other OpenAI inputs are still present
+        assert "model_name" in input_names
+        assert "api_key" in input_names
+        assert "temperature" in input_names
+
+    async def test_json_response_parsing_valid_json(self, component_class, default_kwargs):
+        """Test that json_response correctly parses JSON from agent response."""
+        component = await self.component_setup(component_class, default_kwargs)
+
+        # Mock a response with valid JSON
+        mock_result = type("MockResult", (), {"content": '{"name": "test", "value": 123}'})()
+        component._agent_result = mock_result
+
+        result = await component.json_response()
+
+        from langflow.schema.data import Data
+
+        assert isinstance(result, Data)
+        assert result.data == {"name": "test", "value": 123}
+
+    async def test_json_response_parsing_embedded_json(self, component_class, default_kwargs):
+        """Test that json_response handles text containing JSON."""
+        component = await self.component_setup(component_class, default_kwargs)
+
+        # Mock a response with text containing JSON
+        mock_result = type("MockResult", (), {"content": 'Here is the result: {"status": "success"} - done!'})()
+        component._agent_result = mock_result
+
+        result = await component.json_response()
+
+        from langflow.schema.data import Data
+
+        assert isinstance(result, Data)
+        assert result.data == {"status": "success"}
+
+    async def test_json_response_error_handling(self, component_class, default_kwargs):
+        """Test that json_response handles completely non-JSON responses."""
+        component = await self.component_setup(component_class, default_kwargs)
+
+        # Mock a response with no JSON
+        mock_result = type("MockResult", (), {"content": "This is just plain text with no JSON"})()
+        component._agent_result = mock_result
+
+        result = await component.json_response()
+
+        from langflow.schema.data import Data
+
+        assert isinstance(result, Data)
+        assert "error" in result.data
+        assert result.data["content"] == "This is just plain text with no JSON"
+
+    async def test_model_building_without_json_mode(self, component_class, default_kwargs):
+        """Test that model building works without json_mode attribute."""
+        component = await self.component_setup(component_class, default_kwargs)
+        component.agent_llm = "OpenAI"
+
+        # Mock component for testing
+        from unittest.mock import Mock
+
+        mock_component = Mock()
+        mock_component.set.return_value = mock_component
+
+        # Should not raise AttributeError for missing json_mode
+        result = component.set_component_params(mock_component)
+
+        assert result is not None
+        # Verify set was called (meaning no AttributeError occurred)
+        mock_component.set.assert_called_once()
+
+    async def test_shared_execution_between_outputs(self, component_class, default_kwargs):
+        """Test that both outputs use the same agent execution."""
+        component = await self.component_setup(component_class, default_kwargs)
+
+        # Mock the message_response method
+        from unittest.mock import AsyncMock
+
+        mock_result = type("MockResult", (), {"content": '{"shared": "result"}'})()
+
+        async def mock_message_response_side_effect():
+            component._agent_result = mock_result
+            return mock_result
+
+        component.message_response = AsyncMock(side_effect=mock_message_response_side_effect)
+
+        # Call json_response first
+        json_result = await component.json_response()
+
+        # message_response should have been called once
+        component.message_response.assert_called_once()
+
+        # Verify the result was stored and reused
+        assert hasattr(component, "_agent_result")
+        assert json_result.data == {"shared": "result"}
+
+    async def test_agent_component_initialization(self, component_class, default_kwargs):
+        """Test that Agent component initializes correctly with filtered inputs."""
+        component = await self.component_setup(component_class, default_kwargs)
+
+        # Should not raise any errors during initialization
+        assert component.display_name == "Agent"
+        assert component.name == "Agent"
+        assert len(component.inputs) > 0
+        assert len(component.outputs) == 2
+
+    async def test_frontend_node_structure(self, component_class, default_kwargs):
+        """Test that frontend node has correct structure with filtered inputs."""
+        component = await self.component_setup(component_class, default_kwargs)
+
+        frontend_node = component.to_frontend_node()
+        build_config = frontend_node["data"]["node"]["template"]
+
+        # Verify json_mode is not in build config
+        assert "json_mode" not in build_config
+
+        # Verify other expected fields are present
+        assert "agent_llm" in build_config
+        assert "system_prompt" in build_config
+        assert "add_current_date_tool" in build_config
+
+
+class TestAgentComponentWithClient(ComponentTestBaseWithClient):
+    @pytest.fixture
+    def component_class(self):
+        return AgentComponent
+
+    @pytest.fixture
+    def file_names_mapping(self):
+        return []
+
    @pytest.mark.api_key_required
    @pytest.mark.no_blockbuster
    async def test_agent_component_with_calculator(self):
@ -111,113 +260,6 @@ class TestAgentComponentWithoutClient(ComponentTestBaseWithoutClient):

        temperature = 0.1

-        # Initialize the AgentComponent with mocked inputs
-        agent = AgentComponent(
-            tools=tools,
-            input_value=input_value,
-            api_key=api_key,
-            model_name="gpt-4.1-nano",
-            llm_type="OpenAI",
-            temperature=temperature,
-            _session_id=str(uuid4()),
-        )
-
-        with (
-            patch.object(NoopSession, "add", new_callable=AsyncMock) as mock_add,
-            patch.object(NoopSession, "commit", new_callable=AsyncMock) as mock_commit,
-        ):
-            response = await agent.message_response()
-            assert mock_add.called
-            assert mock_commit.called
-        assert "4" in response.data.get("text")
-
-    @pytest.mark.api_key_required
-    @pytest.mark.no_blockbuster
-    async def test_agent_component_with_all_openai_models(self):
-        # Mock inputs
-        api_key = os.getenv("OPENAI_API_KEY")
-        input_value = "What is 2 + 2?"
-
-        # Iterate over all OpenAI models
-        failed_models = {}
-        for model_name in OPENAI_CHAT_MODEL_NAMES + OPENAI_REASONING_MODEL_NAMES:
-            try:
-                # Initialize the AgentComponent with mocked inputs
-                tools = [CalculatorToolComponent().build_tool()]  # Use the Calculator component as a tool
-                agent = AgentComponent(
-                    tools=tools,
-                    input_value=input_value,
-                    api_key=api_key,
-                    model_name=model_name,
-                    agent_llm=None,
-                    llm_type="OpenAI",
-                    temperature=0.1,
-                    _session_id=str(uuid4()),
-                )
-
-                response = await agent.message_response()
-                if "4" not in response.data.get("text"):
-                    failed_models[model_name] = f"Expected '4' in response but got: {response.data.get('text')}"
-            except Exception as e:  # noqa: BLE001
-                failed_models[model_name] = f"Exception occurred: {e!s}"
-
-        assert not failed_models, f"The following models failed the test: {failed_models}"
-
-    @pytest.mark.api_key_required
-    @pytest.mark.no_blockbuster
-    async def test_agent_component_with_all_anthropic_models(self):
-        # Mock inputs
-        api_key = os.getenv("ANTHROPIC_API_KEY")
-        input_value = "What is 2 + 2?"
-
-        # Iterate over all Anthropic models
-        failed_models = {}
-
-        for model_name in ANTHROPIC_MODELS:
-            try:
-                # Initialize the AgentComponent with mocked inputs
-                tools = [CalculatorToolComponent().build_tool()]
-                agent = AgentComponent(
-                    tools=tools,
-                    input_value=input_value,
-                    api_key=api_key,
-                    model_name=model_name,
-                    agent_llm="Anthropic",
-                    _session_id=str(uuid4()),
-                )
-
-                response = await agent.message_response()
-                response_text = response.data.get("text", "")
-
-                if "4" not in response_text:
-                    failed_models[model_name] = f"Expected '4' in response but got: {response_text}"
-
-            except Exception as e:  # noqa: BLE001
-                failed_models[model_name] = f"Exception occurred: {e!s}"
-
-        assert not failed_models, "The following models failed the test:\n" + "\n".join(
-            f"{model}: {error}" for model, error in failed_models.items()
-        )
-
-
-class TestAgentComponentWithClient(ComponentTestBaseWithClient):
-    @pytest.fixture
-    def component_class(self):
-        return AgentComponent
-
-    @pytest.fixture
-    def file_names_mapping(self):
-        return []
-
-    @pytest.mark.api_key_required
-    @pytest.mark.no_blockbuster
-    async def test_agent_component_with_calculator(self):
-        api_key = os.getenv("OPENAI_API_KEY")
-        tools = [CalculatorToolComponent().build_tool()]
-        input_value = "What is 2 + 2?"
-
-        temperature = 0.1
-
        # Initialize the AgentComponent with mocked inputs
        agent = AgentComponent(
            tools=tools,
@ -228,33 +270,34 @@ class TestAgentComponentWithClient(ComponentTestBaseWithClient):
            temperature=temperature,
            _session_id=str(uuid4()),
        )
+
        response = await agent.message_response()
        assert "4" in response.data.get("text")

    @pytest.mark.api_key_required
    @pytest.mark.no_blockbuster
    async def test_agent_component_with_all_openai_models(self):
+        # Mock inputs
        api_key = os.getenv("OPENAI_API_KEY")
        input_value = "What is 2 + 2?"

        # Iterate over all OpenAI models
-        failed_models = {}
+        failed_models = []
        for model_name in OPENAI_CHAT_MODEL_NAMES + OPENAI_REASONING_MODEL_NAMES:
-            try:
-                tools = [CalculatorToolComponent().build_tool()]
-                agent = AgentComponent(
-                    tools=tools,
-                    input_value=input_value,
-                    api_key=api_key,
-                    model_name=model_name,
-                    agent_llm="OpenAI",
-                    _session_id=str(uuid4()),
-                )
-                response = await agent.message_response()
-                if "4" not in response.data.get("text"):
-                    failed_models[model_name] = f"Expected '4' in response but got: {response.data.get('text')}"
-            except Exception as e:  # noqa: BLE001
-                failed_models[model_name] = f"Exception occurred: {e!s}"
+            # Initialize the AgentComponent with mocked inputs
+            tools = [CalculatorToolComponent().build_tool()]  # Use the Calculator component as a tool
+            agent = AgentComponent(
+                tools=tools,
+                input_value=input_value,
+                api_key=api_key,
+                model_name=model_name,
+                agent_llm="OpenAI",
+                _session_id=str(uuid4()),
+            )
+
+            response = await agent.message_response()
+            if "4" not in response.data.get("text"):
+                failed_models.append(model_name)

        assert not failed_models, f"The following models failed the test: {failed_models}"