refactor: update build_structured_output to return only last output element (#8467)

* Update structured_output.py * template updates * Update Financial Report Parser.json * Update structured_output.py * Update structured_output.py * update strcutred output with optimisations * Update test_structured_output_component.py * fix lint * update templates * [autofix.ci] apply automated fixes * [autofix.ci] apply automated fixes (attempt 2/3) * Update structured_output.py * [autofix.ci] apply automated fixes * Update test_structured_output_component.py * [autofix.ci] apply automated fixes * fix image sentiment analysis * [autofix.ci] apply automated fixes * Update Image Sentiment Analysis.json * [autofix.ci] apply automated fixes * update template with new language model component --------- Co-authored-by: Cristhian Zanforlin Lousa <cristhian.lousa@gmail.com> Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
2025-06-13 18:02:48 -05:00 · 2025-06-13 18:02:48 -05:00 · e7f78d99cb
commit e7f78d99cb
parent c18330d449
7 changed files with 1366 additions and 962 deletions
--- a/src/backend/base/langflow/components/processing/structured_output.py
+++ b/src/backend/base/langflow/components/processing/structured_output.py
@ -119,7 +119,7 @@ class StructuredOutputComponent(Component):
        ),
    ]

-    def build_structured_output_base(self) -> Data:
+    def build_structured_output_base(self):
        schema_name = self.schema_name or "OutputModel"

        if not hasattr(self.llm, "with_structured_output"):
@ -142,6 +142,7 @@ class StructuredOutputComponent(Component):
        except NotImplementedError as exc:
            msg = f"{self.llm.__class__.__name__} does not support structured output."
            raise TypeError(msg) from exc
+
        config_dict = {
            "run_name": self.display_name,
            "project_name": self.get_project_name(),
@ -153,16 +154,31 @@ class StructuredOutputComponent(Component):
            input_value=self.input_value,
            config=config_dict,
        )
-        if isinstance(result, BaseModel):
-            result = result.model_dump()
-        if responses := result.get("responses"):
-            result = responses[0].model_dump()
-        if result and "objects" in result:
-            return result["objects"]

-        return result
+        # OPTIMIZATION NOTE: Simplified processing based on trustcall response structure
+        # Handle non-dict responses (shouldn't happen with trustcall, but defensive)
+        if not isinstance(result, dict):
+            return result
+
+        # Extract first response and convert BaseModel to dict
+        responses = result.get("responses", [])
+        if not responses:
+            return result
+
+        # Convert BaseModel to dict (creates the "objects" key)
+        first_response = responses[0]
+        structured_data = first_response.model_dump() if isinstance(first_response, BaseModel) else first_response
+
+        # Extract the objects array (guaranteed to exist due to our Pydantic model structure)
+        return structured_data.get("objects", structured_data)

    def build_structured_output(self) -> Data:
        output = self.build_structured_output_base()
-
-        return Data(text_key="results", data={"results": output})
+        if not isinstance(output, list) or not output:
+            # handle empty or unexpected type case
+            msg = "No structured output returned"
+            raise ValueError(msg)
+        if len(output) != 1:
+            msg = "Multiple structured outputs returned"
+            raise ValueError(msg)
+        return Data(data=output[0])
--- a/src/backend/base/langflow/initial_setup/starter_projects/Financial
+++ b/src/backend/base/langflow/initial_setup/starter_projects/Financial
--- a/src/backend/base/langflow/initial_setup/starter_projects/Hybrid
+++ b/src/backend/base/langflow/initial_setup/starter_projects/Hybrid
--- a/src/backend/base/langflow/initial_setup/starter_projects/Image
+++ b/src/backend/base/langflow/initial_setup/starter_projects/Image
--- a/src/backend/base/langflow/initial_setup/starter_projects/Market
+++ b/src/backend/base/langflow/initial_setup/starter_projects/Market
--- a/src/backend/base/langflow/initial_setup/starter_projects/Portfolio
+++ b/src/backend/base/langflow/initial_setup/starter_projects/Portfolio
--- a/src/backend/tests/unit/components/processing/test_structured_output_component.py
+++ b/src/backend/tests/unit/components/processing/test_structured_output_component.py
@ -42,7 +42,13 @@ class TestStructuredOutputComponent(ComponentTestBaseWithoutClient):
                def model_dump(self, **__):
                    return {"objects": [{"field": "value"}]}

-            return MockBaseModel()
+            # Return trustcall-style response structure
+            return {
+                "messages": ["mock_message"],
+                "responses": [MockBaseModel()],
+                "response_metadata": [{"id": "mock_id"}],
+                "attempts": 1,
+            }

        component = StructuredOutputComponent(
            llm=MockLanguageModel(),
@ -185,11 +191,16 @@ class TestStructuredOutputComponent(ComponentTestBaseWithoutClient):
            def model_dump(self, **__):
                return {"objects": self.objects}

-        mock_llm = MockLanguageModel()
-        mock_get_chat_result.return_value = ParentModel()
+        # Update to return trustcall-style response
+        mock_get_chat_result.return_value = {
+            "messages": ["mock_message"],
+            "responses": [ParentModel()],
+            "response_metadata": [{"id": "mock_id"}],
+            "attempts": 1,
+        }

        component = StructuredOutputComponent(
-            llm=mock_llm,
+            llm=MockLanguageModel(),
            input_value="Test input",
            schema_name="NestedSchema",
            output_schema=[
@ -218,7 +229,13 @@ class TestStructuredOutputComponent(ComponentTestBaseWithoutClient):
            def model_dump(self, **__):
                return {"objects": self.objects}

-        mock_get_chat_result.return_value = MockBaseModel()
+        # Update to return trustcall-style response
+        mock_get_chat_result.return_value = {
+            "messages": ["mock_message"],
+            "responses": [MockBaseModel()],
+            "response_metadata": [{"id": "mock_id"}],
+            "attempts": 1,
+        }

        component = StructuredOutputComponent(
            llm=MockLanguageModel(),
@ -291,8 +308,16 @@ class TestStructuredOutputComponent(ComponentTestBaseWithoutClient):
        with pytest.raises(openai.BadRequestError) as exc_info:
            component.build_structured_output_base()

-        # Verify the error message contains expected content
-        assert "max_tokens was reached" in str(exc_info.value)
+        # Verify the error message contains expected content (updated to match actual OpenAI error format)
+        error_message = str(exc_info.value)
+        assert any(
+            phrase in error_message
+            for phrase in [
+                "max_tokens was reached",
+                "max_tokens or model output limit was reached",
+                "Could not finish the message because max_tokens",
+            ]
+        ), f"Expected max_tokens error but got: {error_message}"

    @pytest.mark.skipif(
        "OPENAI_API_KEY" not in os.environ,
@ -438,3 +463,251 @@ class TestStructuredOutputComponent(ComponentTestBaseWithoutClient):
        # The test is expected to fail with a 400 Bad Request error
        with pytest.raises(Exception, match="400 Bad Request"):
            component.build_structured_output_base()
+
+    def test_structured_output_returns_dict_when_no_objects_key(self):
+        """Test that when trustcall returns a dict without 'objects' key, we return the dict directly."""
+
+        def mock_get_chat_result(runnable, system_message, input_value, config):  # noqa: ARG001
+            # Return trustcall-style response but without BaseModel that creates "objects" key
+            return {
+                "messages": ["mock_message"],
+                "responses": [{"field": "value", "another_field": "another_value"}],  # Direct dict, not BaseModel
+                "response_metadata": [{"id": "mock_id"}],
+                "attempts": 1,
+            }
+
+        component = StructuredOutputComponent(
+            llm=MockLanguageModel(),
+            input_value="Test input",
+            schema_name="TestSchema",
+            output_schema=[{"name": "field", "type": "str", "description": "A test field"}],
+            multiple=False,
+            system_prompt="Test system prompt",
+        )
+
+        with patch("langflow.components.processing.structured_output.get_chat_result", mock_get_chat_result):
+            result = component.build_structured_output_base()
+            # Should return the dict directly since there's no "objects" key
+            assert isinstance(result, dict)
+            assert result == {"field": "value", "another_field": "another_value"}
+
+    def test_structured_output_returns_direct_response_when_not_dict(self):
+        """Test that when trustcall returns a non-dict response, we return it directly."""
+
+        def mock_get_chat_result(runnable, system_message, input_value, config):  # noqa: ARG001
+            # Return a string response (edge case)
+            return "Simple string response"
+
+        component = StructuredOutputComponent(
+            llm=MockLanguageModel(),
+            input_value="Test input",
+            schema_name="TestSchema",
+            output_schema=[{"name": "field", "type": "str", "description": "A test field"}],
+            multiple=False,
+            system_prompt="Test system prompt",
+        )
+
+        with patch("langflow.components.processing.structured_output.get_chat_result", mock_get_chat_result):
+            result = component.build_structured_output_base()
+            # Should return the string directly
+            assert isinstance(result, str)
+            assert result == "Simple string response"
+
+    def test_structured_output_handles_empty_responses_array(self):
+        """Test that when trustcall returns empty responses array, we return the result dict."""
+
+        def mock_get_chat_result(runnable, system_message, input_value, config):  # noqa: ARG001
+            # Return trustcall-style response with empty responses
+            return {
+                "messages": ["mock_message"],
+                "responses": [],  # Empty responses array
+                "response_metadata": [],
+                "attempts": 1,
+                "fallback_data": {"field": "fallback_value"},  # Some other data in the result
+            }
+
+        component = StructuredOutputComponent(
+            llm=MockLanguageModel(),
+            input_value="Test input",
+            schema_name="TestSchema",
+            output_schema=[{"name": "field", "type": "str", "description": "A test field"}],
+            multiple=False,
+            system_prompt="Test system prompt",
+        )
+
+        with patch("langflow.components.processing.structured_output.get_chat_result", mock_get_chat_result):
+            result = component.build_structured_output_base()
+            # Should return the entire result dict when responses is empty
+            assert isinstance(result, dict)
+            assert "messages" in result
+            assert "responses" in result
+            assert "fallback_data" in result
+
+    def test_build_structured_output_fails_when_base_returns_non_list(self):
+        """Test that build_structured_output() fails when base method returns non-list."""
+
+        def mock_get_chat_result(runnable, system_message, input_value, config):  # noqa: ARG001
+            # Return a dict instead of list with objects
+            return {
+                "messages": ["mock_message"],
+                "responses": [{"single_item": "value"}],  # Dict without "objects" key
+                "response_metadata": [{"id": "mock_id"}],
+                "attempts": 1,
+            }
+
+        component = StructuredOutputComponent(
+            llm=MockLanguageModel(),
+            input_value="Test input",
+            schema_name="TestSchema",
+            output_schema=[{"name": "field", "type": "str", "description": "A test field"}],
+            multiple=False,
+            system_prompt="Test system prompt",
+        )
+
+        with (
+            patch("langflow.components.processing.structured_output.get_chat_result", mock_get_chat_result),
+            pytest.raises(ValueError, match="No structured output returned"),
+        ):
+            component.build_structured_output()
+
+    def test_build_structured_output_returns_data_with_dict(self):
+        """Test that build_structured_output() returns Data object with dict data."""
+
+        def mock_get_chat_result(runnable, system_message, input_value, config):  # noqa: ARG001
+            class MockBaseModel(BaseModel):
+                def model_dump(self, **__):
+                    return {"objects": [{"field": "value2", "number": 24}]}  # Return only one object
+
+            # Return trustcall-style response structure
+            return {
+                "messages": ["mock_message"],
+                "responses": [MockBaseModel()],
+                "response_metadata": [{"id": "mock_id"}],
+                "attempts": 1,
+            }
+
+        component = StructuredOutputComponent(
+            llm=MockLanguageModel(),
+            input_value="Test input",
+            schema_name="TestSchema",
+            output_schema=[
+                {"name": "field", "type": "str", "description": "A test field"},
+                {"name": "number", "type": "int", "description": "A test number"},
+            ],
+            multiple=False,
+            system_prompt="Test system prompt",
+        )
+
+        with patch("langflow.components.processing.structured_output.get_chat_result", mock_get_chat_result):
+            result = component.build_structured_output()
+
+            # Check that result is a Data object
+            from langflow.schema.data import Data
+
+            assert isinstance(result, Data)
+
+            # Check that result.data is a dict
+            assert isinstance(result.data, dict)
+
+            # Check the content of the dict
+            assert result.data == {"field": "value2", "number": 24}
+
+            # Verify the data has the expected keys
+            assert "field" in result.data
+            assert "number" in result.data
+            assert result.data["field"] == "value2"
+            assert result.data["number"] == 24
+
+    def test_build_structured_output_returns_data_with_single_item(self):
+        """Test that build_structured_output() returns Data object when only one item in objects."""
+
+        def mock_get_chat_result(runnable, system_message, input_value, config):  # noqa: ARG001
+            class MockBaseModel(BaseModel):
+                def model_dump(self, **__):
+                    return {"objects": [{"name": "John Doe", "age": 30}]}
+
+            return {
+                "messages": ["mock_message"],
+                "responses": [MockBaseModel()],
+                "response_metadata": [{"id": "mock_id"}],
+                "attempts": 1,
+            }
+
+        component = StructuredOutputComponent(
+            llm=MockLanguageModel(),
+            input_value="Extract name and age from: John Doe is 30 years old",
+            schema_name="PersonInfo",
+            output_schema=[
+                {"name": "name", "type": "str", "description": "Person's name"},
+                {"name": "age", "type": "int", "description": "Person's age"},
+            ],
+            multiple=False,
+            system_prompt="Extract person info",
+        )
+
+        with patch("langflow.components.processing.structured_output.get_chat_result", mock_get_chat_result):
+            result = component.build_structured_output()
+
+            # Check that result is a Data object
+            from langflow.schema.data import Data
+
+            assert isinstance(result, Data)
+
+            # Check that result.data is a dict
+            assert isinstance(result.data, dict)
+
+            # Check the content matches exactly
+            assert result.data == {"name": "John Doe", "age": 30}
+
+    def test_build_structured_output_data_object_properties(self):
+        """Test that the returned Data object has proper properties."""
+
+        def mock_get_chat_result(runnable, system_message, input_value, config):  # noqa: ARG001
+            class MockBaseModel(BaseModel):
+                def model_dump(self, **__):
+                    return {"objects": [{"product": "iPhone", "price": 999.99, "available": True}]}
+
+            return {
+                "messages": ["mock_message"],
+                "responses": [MockBaseModel()],
+                "response_metadata": [{"id": "mock_id"}],
+                "attempts": 1,
+            }
+
+        component = StructuredOutputComponent(
+            llm=MockLanguageModel(),
+            input_value="Product info: iPhone costs $999.99 and is available",
+            schema_name="ProductInfo",
+            output_schema=[
+                {"name": "product", "type": "str", "description": "Product name"},
+                {"name": "price", "type": "float", "description": "Product price"},
+                {"name": "available", "type": "bool", "description": "Product availability"},
+            ],
+            multiple=False,
+            system_prompt="Extract product info",
+        )
+
+        with patch("langflow.components.processing.structured_output.get_chat_result", mock_get_chat_result):
+            result = component.build_structured_output()
+
+            # Check that result is a Data object
+            from langflow.schema.data import Data
+
+            assert isinstance(result, Data)
+
+            # Check that result.data is a dict with correct types
+            assert isinstance(result.data, dict)
+            assert isinstance(result.data["product"], str)
+            assert isinstance(result.data["price"], float)
+            assert isinstance(result.data["available"], bool)
+
+            # Check values
+            assert result.data["product"] == "iPhone"
+            assert result.data["price"] == 999.99
+            assert result.data["available"] is True
+
+            # Test Data object methods if they exist
+            if hasattr(result, "get_text"):
+                # Data object should be able to represent itself as text
+                text_repr = result.get_text()
+                assert isinstance(text_repr, str)