refactor: update build_structured_output to return only last output element (#8467)
* Update structured_output.py * template updates * Update Financial Report Parser.json * Update structured_output.py * Update structured_output.py * update strcutred output with optimisations * Update test_structured_output_component.py * fix lint * update templates * [autofix.ci] apply automated fixes * [autofix.ci] apply automated fixes (attempt 2/3) * Update structured_output.py * [autofix.ci] apply automated fixes * Update test_structured_output_component.py * [autofix.ci] apply automated fixes * fix image sentiment analysis * [autofix.ci] apply automated fixes * Update Image Sentiment Analysis.json * [autofix.ci] apply automated fixes * update template with new language model component --------- Co-authored-by: Cristhian Zanforlin Lousa <cristhian.lousa@gmail.com> Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
This commit is contained in:
parent
c18330d449
commit
e7f78d99cb
7 changed files with 1366 additions and 962 deletions
|
|
@ -119,7 +119,7 @@ class StructuredOutputComponent(Component):
|
|||
),
|
||||
]
|
||||
|
||||
def build_structured_output_base(self) -> Data:
|
||||
def build_structured_output_base(self):
|
||||
schema_name = self.schema_name or "OutputModel"
|
||||
|
||||
if not hasattr(self.llm, "with_structured_output"):
|
||||
|
|
@ -142,6 +142,7 @@ class StructuredOutputComponent(Component):
|
|||
except NotImplementedError as exc:
|
||||
msg = f"{self.llm.__class__.__name__} does not support structured output."
|
||||
raise TypeError(msg) from exc
|
||||
|
||||
config_dict = {
|
||||
"run_name": self.display_name,
|
||||
"project_name": self.get_project_name(),
|
||||
|
|
@ -153,16 +154,31 @@ class StructuredOutputComponent(Component):
|
|||
input_value=self.input_value,
|
||||
config=config_dict,
|
||||
)
|
||||
if isinstance(result, BaseModel):
|
||||
result = result.model_dump()
|
||||
if responses := result.get("responses"):
|
||||
result = responses[0].model_dump()
|
||||
if result and "objects" in result:
|
||||
return result["objects"]
|
||||
|
||||
return result
|
||||
# OPTIMIZATION NOTE: Simplified processing based on trustcall response structure
|
||||
# Handle non-dict responses (shouldn't happen with trustcall, but defensive)
|
||||
if not isinstance(result, dict):
|
||||
return result
|
||||
|
||||
# Extract first response and convert BaseModel to dict
|
||||
responses = result.get("responses", [])
|
||||
if not responses:
|
||||
return result
|
||||
|
||||
# Convert BaseModel to dict (creates the "objects" key)
|
||||
first_response = responses[0]
|
||||
structured_data = first_response.model_dump() if isinstance(first_response, BaseModel) else first_response
|
||||
|
||||
# Extract the objects array (guaranteed to exist due to our Pydantic model structure)
|
||||
return structured_data.get("objects", structured_data)
|
||||
|
||||
def build_structured_output(self) -> Data:
|
||||
output = self.build_structured_output_base()
|
||||
|
||||
return Data(text_key="results", data={"results": output})
|
||||
if not isinstance(output, list) or not output:
|
||||
# handle empty or unexpected type case
|
||||
msg = "No structured output returned"
|
||||
raise ValueError(msg)
|
||||
if len(output) != 1:
|
||||
msg = "Multiple structured outputs returned"
|
||||
raise ValueError(msg)
|
||||
return Data(data=output[0])
|
||||
|
|
|
|||
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
|
@ -42,7 +42,13 @@ class TestStructuredOutputComponent(ComponentTestBaseWithoutClient):
|
|||
def model_dump(self, **__):
|
||||
return {"objects": [{"field": "value"}]}
|
||||
|
||||
return MockBaseModel()
|
||||
# Return trustcall-style response structure
|
||||
return {
|
||||
"messages": ["mock_message"],
|
||||
"responses": [MockBaseModel()],
|
||||
"response_metadata": [{"id": "mock_id"}],
|
||||
"attempts": 1,
|
||||
}
|
||||
|
||||
component = StructuredOutputComponent(
|
||||
llm=MockLanguageModel(),
|
||||
|
|
@ -185,11 +191,16 @@ class TestStructuredOutputComponent(ComponentTestBaseWithoutClient):
|
|||
def model_dump(self, **__):
|
||||
return {"objects": self.objects}
|
||||
|
||||
mock_llm = MockLanguageModel()
|
||||
mock_get_chat_result.return_value = ParentModel()
|
||||
# Update to return trustcall-style response
|
||||
mock_get_chat_result.return_value = {
|
||||
"messages": ["mock_message"],
|
||||
"responses": [ParentModel()],
|
||||
"response_metadata": [{"id": "mock_id"}],
|
||||
"attempts": 1,
|
||||
}
|
||||
|
||||
component = StructuredOutputComponent(
|
||||
llm=mock_llm,
|
||||
llm=MockLanguageModel(),
|
||||
input_value="Test input",
|
||||
schema_name="NestedSchema",
|
||||
output_schema=[
|
||||
|
|
@ -218,7 +229,13 @@ class TestStructuredOutputComponent(ComponentTestBaseWithoutClient):
|
|||
def model_dump(self, **__):
|
||||
return {"objects": self.objects}
|
||||
|
||||
mock_get_chat_result.return_value = MockBaseModel()
|
||||
# Update to return trustcall-style response
|
||||
mock_get_chat_result.return_value = {
|
||||
"messages": ["mock_message"],
|
||||
"responses": [MockBaseModel()],
|
||||
"response_metadata": [{"id": "mock_id"}],
|
||||
"attempts": 1,
|
||||
}
|
||||
|
||||
component = StructuredOutputComponent(
|
||||
llm=MockLanguageModel(),
|
||||
|
|
@ -291,8 +308,16 @@ class TestStructuredOutputComponent(ComponentTestBaseWithoutClient):
|
|||
with pytest.raises(openai.BadRequestError) as exc_info:
|
||||
component.build_structured_output_base()
|
||||
|
||||
# Verify the error message contains expected content
|
||||
assert "max_tokens was reached" in str(exc_info.value)
|
||||
# Verify the error message contains expected content (updated to match actual OpenAI error format)
|
||||
error_message = str(exc_info.value)
|
||||
assert any(
|
||||
phrase in error_message
|
||||
for phrase in [
|
||||
"max_tokens was reached",
|
||||
"max_tokens or model output limit was reached",
|
||||
"Could not finish the message because max_tokens",
|
||||
]
|
||||
), f"Expected max_tokens error but got: {error_message}"
|
||||
|
||||
@pytest.mark.skipif(
|
||||
"OPENAI_API_KEY" not in os.environ,
|
||||
|
|
@ -438,3 +463,251 @@ class TestStructuredOutputComponent(ComponentTestBaseWithoutClient):
|
|||
# The test is expected to fail with a 400 Bad Request error
|
||||
with pytest.raises(Exception, match="400 Bad Request"):
|
||||
component.build_structured_output_base()
|
||||
|
||||
def test_structured_output_returns_dict_when_no_objects_key(self):
|
||||
"""Test that when trustcall returns a dict without 'objects' key, we return the dict directly."""
|
||||
|
||||
def mock_get_chat_result(runnable, system_message, input_value, config): # noqa: ARG001
|
||||
# Return trustcall-style response but without BaseModel that creates "objects" key
|
||||
return {
|
||||
"messages": ["mock_message"],
|
||||
"responses": [{"field": "value", "another_field": "another_value"}], # Direct dict, not BaseModel
|
||||
"response_metadata": [{"id": "mock_id"}],
|
||||
"attempts": 1,
|
||||
}
|
||||
|
||||
component = StructuredOutputComponent(
|
||||
llm=MockLanguageModel(),
|
||||
input_value="Test input",
|
||||
schema_name="TestSchema",
|
||||
output_schema=[{"name": "field", "type": "str", "description": "A test field"}],
|
||||
multiple=False,
|
||||
system_prompt="Test system prompt",
|
||||
)
|
||||
|
||||
with patch("langflow.components.processing.structured_output.get_chat_result", mock_get_chat_result):
|
||||
result = component.build_structured_output_base()
|
||||
# Should return the dict directly since there's no "objects" key
|
||||
assert isinstance(result, dict)
|
||||
assert result == {"field": "value", "another_field": "another_value"}
|
||||
|
||||
def test_structured_output_returns_direct_response_when_not_dict(self):
|
||||
"""Test that when trustcall returns a non-dict response, we return it directly."""
|
||||
|
||||
def mock_get_chat_result(runnable, system_message, input_value, config): # noqa: ARG001
|
||||
# Return a string response (edge case)
|
||||
return "Simple string response"
|
||||
|
||||
component = StructuredOutputComponent(
|
||||
llm=MockLanguageModel(),
|
||||
input_value="Test input",
|
||||
schema_name="TestSchema",
|
||||
output_schema=[{"name": "field", "type": "str", "description": "A test field"}],
|
||||
multiple=False,
|
||||
system_prompt="Test system prompt",
|
||||
)
|
||||
|
||||
with patch("langflow.components.processing.structured_output.get_chat_result", mock_get_chat_result):
|
||||
result = component.build_structured_output_base()
|
||||
# Should return the string directly
|
||||
assert isinstance(result, str)
|
||||
assert result == "Simple string response"
|
||||
|
||||
def test_structured_output_handles_empty_responses_array(self):
|
||||
"""Test that when trustcall returns empty responses array, we return the result dict."""
|
||||
|
||||
def mock_get_chat_result(runnable, system_message, input_value, config): # noqa: ARG001
|
||||
# Return trustcall-style response with empty responses
|
||||
return {
|
||||
"messages": ["mock_message"],
|
||||
"responses": [], # Empty responses array
|
||||
"response_metadata": [],
|
||||
"attempts": 1,
|
||||
"fallback_data": {"field": "fallback_value"}, # Some other data in the result
|
||||
}
|
||||
|
||||
component = StructuredOutputComponent(
|
||||
llm=MockLanguageModel(),
|
||||
input_value="Test input",
|
||||
schema_name="TestSchema",
|
||||
output_schema=[{"name": "field", "type": "str", "description": "A test field"}],
|
||||
multiple=False,
|
||||
system_prompt="Test system prompt",
|
||||
)
|
||||
|
||||
with patch("langflow.components.processing.structured_output.get_chat_result", mock_get_chat_result):
|
||||
result = component.build_structured_output_base()
|
||||
# Should return the entire result dict when responses is empty
|
||||
assert isinstance(result, dict)
|
||||
assert "messages" in result
|
||||
assert "responses" in result
|
||||
assert "fallback_data" in result
|
||||
|
||||
def test_build_structured_output_fails_when_base_returns_non_list(self):
|
||||
"""Test that build_structured_output() fails when base method returns non-list."""
|
||||
|
||||
def mock_get_chat_result(runnable, system_message, input_value, config): # noqa: ARG001
|
||||
# Return a dict instead of list with objects
|
||||
return {
|
||||
"messages": ["mock_message"],
|
||||
"responses": [{"single_item": "value"}], # Dict without "objects" key
|
||||
"response_metadata": [{"id": "mock_id"}],
|
||||
"attempts": 1,
|
||||
}
|
||||
|
||||
component = StructuredOutputComponent(
|
||||
llm=MockLanguageModel(),
|
||||
input_value="Test input",
|
||||
schema_name="TestSchema",
|
||||
output_schema=[{"name": "field", "type": "str", "description": "A test field"}],
|
||||
multiple=False,
|
||||
system_prompt="Test system prompt",
|
||||
)
|
||||
|
||||
with (
|
||||
patch("langflow.components.processing.structured_output.get_chat_result", mock_get_chat_result),
|
||||
pytest.raises(ValueError, match="No structured output returned"),
|
||||
):
|
||||
component.build_structured_output()
|
||||
|
||||
def test_build_structured_output_returns_data_with_dict(self):
|
||||
"""Test that build_structured_output() returns Data object with dict data."""
|
||||
|
||||
def mock_get_chat_result(runnable, system_message, input_value, config): # noqa: ARG001
|
||||
class MockBaseModel(BaseModel):
|
||||
def model_dump(self, **__):
|
||||
return {"objects": [{"field": "value2", "number": 24}]} # Return only one object
|
||||
|
||||
# Return trustcall-style response structure
|
||||
return {
|
||||
"messages": ["mock_message"],
|
||||
"responses": [MockBaseModel()],
|
||||
"response_metadata": [{"id": "mock_id"}],
|
||||
"attempts": 1,
|
||||
}
|
||||
|
||||
component = StructuredOutputComponent(
|
||||
llm=MockLanguageModel(),
|
||||
input_value="Test input",
|
||||
schema_name="TestSchema",
|
||||
output_schema=[
|
||||
{"name": "field", "type": "str", "description": "A test field"},
|
||||
{"name": "number", "type": "int", "description": "A test number"},
|
||||
],
|
||||
multiple=False,
|
||||
system_prompt="Test system prompt",
|
||||
)
|
||||
|
||||
with patch("langflow.components.processing.structured_output.get_chat_result", mock_get_chat_result):
|
||||
result = component.build_structured_output()
|
||||
|
||||
# Check that result is a Data object
|
||||
from langflow.schema.data import Data
|
||||
|
||||
assert isinstance(result, Data)
|
||||
|
||||
# Check that result.data is a dict
|
||||
assert isinstance(result.data, dict)
|
||||
|
||||
# Check the content of the dict
|
||||
assert result.data == {"field": "value2", "number": 24}
|
||||
|
||||
# Verify the data has the expected keys
|
||||
assert "field" in result.data
|
||||
assert "number" in result.data
|
||||
assert result.data["field"] == "value2"
|
||||
assert result.data["number"] == 24
|
||||
|
||||
def test_build_structured_output_returns_data_with_single_item(self):
|
||||
"""Test that build_structured_output() returns Data object when only one item in objects."""
|
||||
|
||||
def mock_get_chat_result(runnable, system_message, input_value, config): # noqa: ARG001
|
||||
class MockBaseModel(BaseModel):
|
||||
def model_dump(self, **__):
|
||||
return {"objects": [{"name": "John Doe", "age": 30}]}
|
||||
|
||||
return {
|
||||
"messages": ["mock_message"],
|
||||
"responses": [MockBaseModel()],
|
||||
"response_metadata": [{"id": "mock_id"}],
|
||||
"attempts": 1,
|
||||
}
|
||||
|
||||
component = StructuredOutputComponent(
|
||||
llm=MockLanguageModel(),
|
||||
input_value="Extract name and age from: John Doe is 30 years old",
|
||||
schema_name="PersonInfo",
|
||||
output_schema=[
|
||||
{"name": "name", "type": "str", "description": "Person's name"},
|
||||
{"name": "age", "type": "int", "description": "Person's age"},
|
||||
],
|
||||
multiple=False,
|
||||
system_prompt="Extract person info",
|
||||
)
|
||||
|
||||
with patch("langflow.components.processing.structured_output.get_chat_result", mock_get_chat_result):
|
||||
result = component.build_structured_output()
|
||||
|
||||
# Check that result is a Data object
|
||||
from langflow.schema.data import Data
|
||||
|
||||
assert isinstance(result, Data)
|
||||
|
||||
# Check that result.data is a dict
|
||||
assert isinstance(result.data, dict)
|
||||
|
||||
# Check the content matches exactly
|
||||
assert result.data == {"name": "John Doe", "age": 30}
|
||||
|
||||
def test_build_structured_output_data_object_properties(self):
|
||||
"""Test that the returned Data object has proper properties."""
|
||||
|
||||
def mock_get_chat_result(runnable, system_message, input_value, config): # noqa: ARG001
|
||||
class MockBaseModel(BaseModel):
|
||||
def model_dump(self, **__):
|
||||
return {"objects": [{"product": "iPhone", "price": 999.99, "available": True}]}
|
||||
|
||||
return {
|
||||
"messages": ["mock_message"],
|
||||
"responses": [MockBaseModel()],
|
||||
"response_metadata": [{"id": "mock_id"}],
|
||||
"attempts": 1,
|
||||
}
|
||||
|
||||
component = StructuredOutputComponent(
|
||||
llm=MockLanguageModel(),
|
||||
input_value="Product info: iPhone costs $999.99 and is available",
|
||||
schema_name="ProductInfo",
|
||||
output_schema=[
|
||||
{"name": "product", "type": "str", "description": "Product name"},
|
||||
{"name": "price", "type": "float", "description": "Product price"},
|
||||
{"name": "available", "type": "bool", "description": "Product availability"},
|
||||
],
|
||||
multiple=False,
|
||||
system_prompt="Extract product info",
|
||||
)
|
||||
|
||||
with patch("langflow.components.processing.structured_output.get_chat_result", mock_get_chat_result):
|
||||
result = component.build_structured_output()
|
||||
|
||||
# Check that result is a Data object
|
||||
from langflow.schema.data import Data
|
||||
|
||||
assert isinstance(result, Data)
|
||||
|
||||
# Check that result.data is a dict with correct types
|
||||
assert isinstance(result.data, dict)
|
||||
assert isinstance(result.data["product"], str)
|
||||
assert isinstance(result.data["price"], float)
|
||||
assert isinstance(result.data["available"], bool)
|
||||
|
||||
# Check values
|
||||
assert result.data["product"] == "iPhone"
|
||||
assert result.data["price"] == 999.99
|
||||
assert result.data["available"] is True
|
||||
|
||||
# Test Data object methods if they exist
|
||||
if hasattr(result, "get_text"):
|
||||
# Data object should be able to represent itself as text
|
||||
text_repr = result.get_text()
|
||||
assert isinstance(text_repr, str)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue