refactor: update build_structured_output to return only last output element (#8467)

* Update structured_output.py

* template updates

* Update Financial Report Parser.json

* Update structured_output.py

* Update structured_output.py

* update strcutred output with optimisations

* Update test_structured_output_component.py

* fix lint

* update templates

* [autofix.ci] apply automated fixes

* [autofix.ci] apply automated fixes (attempt 2/3)

* Update structured_output.py

* [autofix.ci] apply automated fixes

* Update test_structured_output_component.py

* [autofix.ci] apply automated fixes

* fix image sentiment analysis

* [autofix.ci] apply automated fixes

* Update Image Sentiment Analysis.json

* [autofix.ci] apply automated fixes

* update template with new language model component

---------

Co-authored-by: Cristhian Zanforlin Lousa <cristhian.lousa@gmail.com>
Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
This commit is contained in:
Edwin Jose 2025-06-13 18:02:48 -05:00 committed by GitHub
commit e7f78d99cb
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 1366 additions and 962 deletions

View file

@ -119,7 +119,7 @@ class StructuredOutputComponent(Component):
),
]
def build_structured_output_base(self) -> Data:
def build_structured_output_base(self):
schema_name = self.schema_name or "OutputModel"
if not hasattr(self.llm, "with_structured_output"):
@ -142,6 +142,7 @@ class StructuredOutputComponent(Component):
except NotImplementedError as exc:
msg = f"{self.llm.__class__.__name__} does not support structured output."
raise TypeError(msg) from exc
config_dict = {
"run_name": self.display_name,
"project_name": self.get_project_name(),
@ -153,16 +154,31 @@ class StructuredOutputComponent(Component):
input_value=self.input_value,
config=config_dict,
)
if isinstance(result, BaseModel):
result = result.model_dump()
if responses := result.get("responses"):
result = responses[0].model_dump()
if result and "objects" in result:
return result["objects"]
return result
# OPTIMIZATION NOTE: Simplified processing based on trustcall response structure
# Handle non-dict responses (shouldn't happen with trustcall, but defensive)
if not isinstance(result, dict):
return result
# Extract first response and convert BaseModel to dict
responses = result.get("responses", [])
if not responses:
return result
# Convert BaseModel to dict (creates the "objects" key)
first_response = responses[0]
structured_data = first_response.model_dump() if isinstance(first_response, BaseModel) else first_response
# Extract the objects array (guaranteed to exist due to our Pydantic model structure)
return structured_data.get("objects", structured_data)
def build_structured_output(self) -> Data:
output = self.build_structured_output_base()
return Data(text_key="results", data={"results": output})
if not isinstance(output, list) or not output:
# handle empty or unexpected type case
msg = "No structured output returned"
raise ValueError(msg)
if len(output) != 1:
msg = "Multiple structured outputs returned"
raise ValueError(msg)
return Data(data=output[0])

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View file

@ -42,7 +42,13 @@ class TestStructuredOutputComponent(ComponentTestBaseWithoutClient):
def model_dump(self, **__):
return {"objects": [{"field": "value"}]}
return MockBaseModel()
# Return trustcall-style response structure
return {
"messages": ["mock_message"],
"responses": [MockBaseModel()],
"response_metadata": [{"id": "mock_id"}],
"attempts": 1,
}
component = StructuredOutputComponent(
llm=MockLanguageModel(),
@ -185,11 +191,16 @@ class TestStructuredOutputComponent(ComponentTestBaseWithoutClient):
def model_dump(self, **__):
return {"objects": self.objects}
mock_llm = MockLanguageModel()
mock_get_chat_result.return_value = ParentModel()
# Update to return trustcall-style response
mock_get_chat_result.return_value = {
"messages": ["mock_message"],
"responses": [ParentModel()],
"response_metadata": [{"id": "mock_id"}],
"attempts": 1,
}
component = StructuredOutputComponent(
llm=mock_llm,
llm=MockLanguageModel(),
input_value="Test input",
schema_name="NestedSchema",
output_schema=[
@ -218,7 +229,13 @@ class TestStructuredOutputComponent(ComponentTestBaseWithoutClient):
def model_dump(self, **__):
return {"objects": self.objects}
mock_get_chat_result.return_value = MockBaseModel()
# Update to return trustcall-style response
mock_get_chat_result.return_value = {
"messages": ["mock_message"],
"responses": [MockBaseModel()],
"response_metadata": [{"id": "mock_id"}],
"attempts": 1,
}
component = StructuredOutputComponent(
llm=MockLanguageModel(),
@ -291,8 +308,16 @@ class TestStructuredOutputComponent(ComponentTestBaseWithoutClient):
with pytest.raises(openai.BadRequestError) as exc_info:
component.build_structured_output_base()
# Verify the error message contains expected content
assert "max_tokens was reached" in str(exc_info.value)
# Verify the error message contains expected content (updated to match actual OpenAI error format)
error_message = str(exc_info.value)
assert any(
phrase in error_message
for phrase in [
"max_tokens was reached",
"max_tokens or model output limit was reached",
"Could not finish the message because max_tokens",
]
), f"Expected max_tokens error but got: {error_message}"
@pytest.mark.skipif(
"OPENAI_API_KEY" not in os.environ,
@ -438,3 +463,251 @@ class TestStructuredOutputComponent(ComponentTestBaseWithoutClient):
# The test is expected to fail with a 400 Bad Request error
with pytest.raises(Exception, match="400 Bad Request"):
component.build_structured_output_base()
def test_structured_output_returns_dict_when_no_objects_key(self):
"""Test that when trustcall returns a dict without 'objects' key, we return the dict directly."""
def mock_get_chat_result(runnable, system_message, input_value, config): # noqa: ARG001
# Return trustcall-style response but without BaseModel that creates "objects" key
return {
"messages": ["mock_message"],
"responses": [{"field": "value", "another_field": "another_value"}], # Direct dict, not BaseModel
"response_metadata": [{"id": "mock_id"}],
"attempts": 1,
}
component = StructuredOutputComponent(
llm=MockLanguageModel(),
input_value="Test input",
schema_name="TestSchema",
output_schema=[{"name": "field", "type": "str", "description": "A test field"}],
multiple=False,
system_prompt="Test system prompt",
)
with patch("langflow.components.processing.structured_output.get_chat_result", mock_get_chat_result):
result = component.build_structured_output_base()
# Should return the dict directly since there's no "objects" key
assert isinstance(result, dict)
assert result == {"field": "value", "another_field": "another_value"}
def test_structured_output_returns_direct_response_when_not_dict(self):
"""Test that when trustcall returns a non-dict response, we return it directly."""
def mock_get_chat_result(runnable, system_message, input_value, config): # noqa: ARG001
# Return a string response (edge case)
return "Simple string response"
component = StructuredOutputComponent(
llm=MockLanguageModel(),
input_value="Test input",
schema_name="TestSchema",
output_schema=[{"name": "field", "type": "str", "description": "A test field"}],
multiple=False,
system_prompt="Test system prompt",
)
with patch("langflow.components.processing.structured_output.get_chat_result", mock_get_chat_result):
result = component.build_structured_output_base()
# Should return the string directly
assert isinstance(result, str)
assert result == "Simple string response"
def test_structured_output_handles_empty_responses_array(self):
"""Test that when trustcall returns empty responses array, we return the result dict."""
def mock_get_chat_result(runnable, system_message, input_value, config): # noqa: ARG001
# Return trustcall-style response with empty responses
return {
"messages": ["mock_message"],
"responses": [], # Empty responses array
"response_metadata": [],
"attempts": 1,
"fallback_data": {"field": "fallback_value"}, # Some other data in the result
}
component = StructuredOutputComponent(
llm=MockLanguageModel(),
input_value="Test input",
schema_name="TestSchema",
output_schema=[{"name": "field", "type": "str", "description": "A test field"}],
multiple=False,
system_prompt="Test system prompt",
)
with patch("langflow.components.processing.structured_output.get_chat_result", mock_get_chat_result):
result = component.build_structured_output_base()
# Should return the entire result dict when responses is empty
assert isinstance(result, dict)
assert "messages" in result
assert "responses" in result
assert "fallback_data" in result
def test_build_structured_output_fails_when_base_returns_non_list(self):
"""Test that build_structured_output() fails when base method returns non-list."""
def mock_get_chat_result(runnable, system_message, input_value, config): # noqa: ARG001
# Return a dict instead of list with objects
return {
"messages": ["mock_message"],
"responses": [{"single_item": "value"}], # Dict without "objects" key
"response_metadata": [{"id": "mock_id"}],
"attempts": 1,
}
component = StructuredOutputComponent(
llm=MockLanguageModel(),
input_value="Test input",
schema_name="TestSchema",
output_schema=[{"name": "field", "type": "str", "description": "A test field"}],
multiple=False,
system_prompt="Test system prompt",
)
with (
patch("langflow.components.processing.structured_output.get_chat_result", mock_get_chat_result),
pytest.raises(ValueError, match="No structured output returned"),
):
component.build_structured_output()
def test_build_structured_output_returns_data_with_dict(self):
"""Test that build_structured_output() returns Data object with dict data."""
def mock_get_chat_result(runnable, system_message, input_value, config): # noqa: ARG001
class MockBaseModel(BaseModel):
def model_dump(self, **__):
return {"objects": [{"field": "value2", "number": 24}]} # Return only one object
# Return trustcall-style response structure
return {
"messages": ["mock_message"],
"responses": [MockBaseModel()],
"response_metadata": [{"id": "mock_id"}],
"attempts": 1,
}
component = StructuredOutputComponent(
llm=MockLanguageModel(),
input_value="Test input",
schema_name="TestSchema",
output_schema=[
{"name": "field", "type": "str", "description": "A test field"},
{"name": "number", "type": "int", "description": "A test number"},
],
multiple=False,
system_prompt="Test system prompt",
)
with patch("langflow.components.processing.structured_output.get_chat_result", mock_get_chat_result):
result = component.build_structured_output()
# Check that result is a Data object
from langflow.schema.data import Data
assert isinstance(result, Data)
# Check that result.data is a dict
assert isinstance(result.data, dict)
# Check the content of the dict
assert result.data == {"field": "value2", "number": 24}
# Verify the data has the expected keys
assert "field" in result.data
assert "number" in result.data
assert result.data["field"] == "value2"
assert result.data["number"] == 24
def test_build_structured_output_returns_data_with_single_item(self):
"""Test that build_structured_output() returns Data object when only one item in objects."""
def mock_get_chat_result(runnable, system_message, input_value, config): # noqa: ARG001
class MockBaseModel(BaseModel):
def model_dump(self, **__):
return {"objects": [{"name": "John Doe", "age": 30}]}
return {
"messages": ["mock_message"],
"responses": [MockBaseModel()],
"response_metadata": [{"id": "mock_id"}],
"attempts": 1,
}
component = StructuredOutputComponent(
llm=MockLanguageModel(),
input_value="Extract name and age from: John Doe is 30 years old",
schema_name="PersonInfo",
output_schema=[
{"name": "name", "type": "str", "description": "Person's name"},
{"name": "age", "type": "int", "description": "Person's age"},
],
multiple=False,
system_prompt="Extract person info",
)
with patch("langflow.components.processing.structured_output.get_chat_result", mock_get_chat_result):
result = component.build_structured_output()
# Check that result is a Data object
from langflow.schema.data import Data
assert isinstance(result, Data)
# Check that result.data is a dict
assert isinstance(result.data, dict)
# Check the content matches exactly
assert result.data == {"name": "John Doe", "age": 30}
def test_build_structured_output_data_object_properties(self):
"""Test that the returned Data object has proper properties."""
def mock_get_chat_result(runnable, system_message, input_value, config): # noqa: ARG001
class MockBaseModel(BaseModel):
def model_dump(self, **__):
return {"objects": [{"product": "iPhone", "price": 999.99, "available": True}]}
return {
"messages": ["mock_message"],
"responses": [MockBaseModel()],
"response_metadata": [{"id": "mock_id"}],
"attempts": 1,
}
component = StructuredOutputComponent(
llm=MockLanguageModel(),
input_value="Product info: iPhone costs $999.99 and is available",
schema_name="ProductInfo",
output_schema=[
{"name": "product", "type": "str", "description": "Product name"},
{"name": "price", "type": "float", "description": "Product price"},
{"name": "available", "type": "bool", "description": "Product availability"},
],
multiple=False,
system_prompt="Extract product info",
)
with patch("langflow.components.processing.structured_output.get_chat_result", mock_get_chat_result):
result = component.build_structured_output()
# Check that result is a Data object
from langflow.schema.data import Data
assert isinstance(result, Data)
# Check that result.data is a dict with correct types
assert isinstance(result.data, dict)
assert isinstance(result.data["product"], str)
assert isinstance(result.data["price"], float)
assert isinstance(result.data["available"], bool)
# Check values
assert result.data["product"] == "iPhone"
assert result.data["price"] == 999.99
assert result.data["available"] is True
# Test Data object methods if they exist
if hasattr(result, "get_text"):
# Data object should be able to represent itself as text
text_repr = result.get_text()
assert isinstance(text_repr, str)