feat: update Structured output to handle Dataframe and inbuilt Prompt (#6642)

* Structured Output

* update

* [autofix.ci] apply automated fixes

* updates

* [autofix.ci] apply automated fixes

* [autofix.ci] apply automated fixes (attempt 2/3)

* Update structured_output.py

* [autofix.ci] apply automated fixes

* [autofix.ci] apply automated fixes

* update in Templates and added inline edit to the component table inputs

* format fix

* [autofix.ci] apply automated fixes

* [autofix.ci] apply automated fixes (attempt 2/3)

* Update Financial Report Parser.json

* Update Portfolio Website Code Generator.json

* update as per review

* [autofix.ci] apply automated fixes

* update to templates

* fix breaking change

* lint and format error fix

* [autofix.ci] apply automated fixes

* [autofix.ci] apply automated fixes (attempt 2/3)

* [autofix.ci] apply automated fixes

* updated file

---------

Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
This commit is contained in:
Edwin Jose 2025-02-20 13:11:00 -05:00 committed by GitHub
commit 18df0e8637
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 1258 additions and 1476 deletions

View file

@ -5,8 +5,10 @@ from pydantic import BaseModel, Field, create_model
from langflow.base.models.chat_result import get_chat_result
from langflow.custom import Component
from langflow.helpers.base_model import build_model_from_schema
from langflow.io import BoolInput, HandleInput, MessageTextInput, Output, StrInput, TableInput
from langflow.io import BoolInput, HandleInput, MessageTextInput, MultilineInput, Output, TableInput
from langflow.schema.data import Data
from langflow.schema.dataframe import DataFrame
from langflow.schema.table import EditMode
if TYPE_CHECKING:
from langflow.field_typing.constants import LanguageModel
@ -36,7 +38,24 @@ class StructuredOutputComponent(Component):
tool_mode=True,
required=True,
),
StrInput(
MultilineInput(
name="system_prompt",
display_name="Format Instructions",
info="The instructions to the language model for formatting the output.",
value=(
"You are an AI system designed to extract structured information from unstructured text."
"Given the input_text, return a JSON object with predefined keys based on the expected structure."
"Extract values accurately and format them according to the specified type "
"(e.g., string, integer, float, date)."
"If a value is missing or cannot be determined, return a default "
"(e.g., null, 0, or 'N/A')."
"If multiple instances of the expected structure exist within the input_text, "
"stream each as a separate JSON object."
),
required=True,
advanced=True,
),
MessageTextInput(
name="schema_name",
display_name="Schema Name",
info="Provide a name for the output data schema.",
@ -47,6 +66,7 @@ class StructuredOutputComponent(Component):
display_name="Output Schema",
info="Define the structure and data types for the model's output.",
required=True,
# TODO: remove deault value
table_schema=[
{
"name": "name",
@ -54,6 +74,7 @@ class StructuredOutputComponent(Component):
"type": "str",
"description": "Specify the name of the output field.",
"default": "field",
"edit_mode": EditMode.INLINE,
},
{
"name": "description",
@ -61,11 +82,13 @@ class StructuredOutputComponent(Component):
"type": "str",
"description": "Describe the purpose of the output field.",
"default": "description of field",
"edit_mode": EditMode.POPOVER,
},
{
"name": "type",
"display_name": "Type",
"type": "str",
"edit_mode": EditMode.INLINE,
"description": (
"Indicate the data type of the output field (e.g., str, int, float, bool, list, dict)."
),
@ -77,6 +100,7 @@ class StructuredOutputComponent(Component):
"type": "boolean",
"description": "Set to True if this output field should be a list of the specified type.",
"default": "False",
"edit_mode": EditMode.INLINE,
},
],
value=[{"name": "field", "description": "description of field", "type": "text", "multiple": "False"}],
@ -85,15 +109,17 @@ class StructuredOutputComponent(Component):
name="multiple",
advanced=True,
display_name="Generate Multiple",
info="Set to True if the model should generate a list of outputs instead of a single output.",
info="[Deplrecated] Always set to True",
value=True,
),
]
outputs = [
Output(name="structured_output", display_name="Structured Output", method="build_structured_output"),
Output(name="structured_output_dataframe", display_name="DataFrame", method="as_dataframe"),
]
def build_structured_output(self) -> Data:
def build_structured_output_base(self) -> Data:
schema_name = self.schema_name or "OutputModel"
if not hasattr(self.llm, "with_structured_output"):
@ -104,13 +130,12 @@ class StructuredOutputComponent(Component):
raise ValueError(msg)
output_model_ = build_model_from_schema(self.output_schema)
if self.multiple:
output_model = create_model(
schema_name,
objects=(list[output_model_], Field(description=f"A list of {schema_name}.")), # type: ignore[valid-type]
)
else:
output_model = output_model_
output_model = create_model(
schema_name,
objects=(list[output_model_], Field(description=f"A list of {schema_name}.")), # type: ignore[valid-type]
)
try:
llm_with_structured_output = cast("LanguageModel", self.llm).with_structured_output(schema=output_model) # type: ignore[valid-type, attr-defined]
@ -122,10 +147,25 @@ class StructuredOutputComponent(Component):
"project_name": self.get_project_name(),
"callbacks": self.get_langchain_callbacks(),
}
output = get_chat_result(runnable=llm_with_structured_output, input_value=self.input_value, config=config_dict)
if isinstance(output, BaseModel):
output_dict = output.model_dump()
else:
msg = f"Output should be a Pydantic BaseModel, got {type(output)} ({output})"
raise TypeError(msg)
return Data(data=output_dict)
result = get_chat_result(
runnable=llm_with_structured_output,
system_message=self.system_prompt,
input_value=self.input_value,
config=config_dict,
)
if isinstance(result, BaseModel):
result = result.model_dump()
if "objects" in result:
return result["objects"]
return result
def build_structured_output(self) -> Data:
output = self.build_structured_output_base()
return Data(results=output)
def as_dataframe(self) -> DataFrame:
output = self.build_structured_output_base()
if isinstance(output, list):
return DataFrame(data=output)
return DataFrame(data=[output])

File diff suppressed because one or more lines are too long

View file

@ -5,18 +5,39 @@ import pytest
from langflow.components.helpers.structured_output import StructuredOutputComponent
from langflow.helpers.base_model import build_model_from_schema
from langflow.inputs.inputs import TableInput
from langflow.schema.data import Data
from pydantic import BaseModel
from tests.base import ComponentTestBaseWithoutClient
from tests.unit.mock_language_model import MockLanguageModel
class TestStructuredOutputComponent:
class TestStructuredOutputComponent(ComponentTestBaseWithoutClient):
@pytest.fixture
def component_class(self):
"""Return the component class to test."""
return StructuredOutputComponent
@pytest.fixture
def default_kwargs(self):
"""Return the default kwargs for the component."""
return {
"llm": MockLanguageModel(),
"input_value": "Test input",
"schema_name": "TestSchema",
"output_schema": [{"name": "field", "type": "str", "description": "A test field"}],
"multiple": False,
"system_prompt": "Test system prompt",
}
@pytest.fixture
def file_names_mapping(self):
"""Return the file names mapping for version-specific files."""
def test_successful_structured_output_generation_with_patch_with_config(self):
def mock_get_chat_result(runnable, input_value, config): # noqa: ARG001
def mock_get_chat_result(runnable, system_message, input_value, config): # noqa: ARG001
class MockBaseModel(BaseModel):
def model_dump(self, **kwargs): # noqa: ARG002
return {"field": "value"}
def model_dump(self, **__):
return {"objects": [{"field": "value"}]}
return MockBaseModel()
@ -26,12 +47,13 @@ class TestStructuredOutputComponent:
schema_name="TestSchema",
output_schema=[{"name": "field", "type": "str", "description": "A test field"}],
multiple=False,
system_prompt="Test system prompt",
)
with patch("langflow.components.helpers.structured_output.get_chat_result", mock_get_chat_result):
result = component.build_structured_output()
assert isinstance(result, Data)
assert result.data == {"field": "value"}
result = component.build_structured_output_base()
assert isinstance(result, list)
assert result == [{"field": "value"}]
def test_raises_value_error_for_unsupported_language_model(self):
# Mocking an incompatible language model
@ -155,10 +177,13 @@ class TestStructuredOutputComponent:
child: str = "value"
class ParentModel(BaseModel):
parent: ChildModel = ChildModel()
objects: list[dict] = [{"parent": {"child": "value"}}]
def model_dump(self, **__):
return {"objects": self.objects}
mock_llm = MockLanguageModel()
mock_get_chat_result.return_value = ParentModel(parent=ChildModel(child="value"))
mock_get_chat_result.return_value = ParentModel()
component = StructuredOutputComponent(
llm=mock_llm,
@ -173,20 +198,24 @@ class TestStructuredOutputComponent:
}
],
multiple=False,
system_prompt="Test system prompt",
)
result = component.build_structured_output()
assert isinstance(result, Data)
assert result.data == {"parent": {"child": "value"}}
result = component.build_structured_output_base()
assert isinstance(result, list)
assert result == [{"parent": {"child": "value"}}]
@patch("langflow.components.helpers.structured_output.get_chat_result")
def test_large_input_value(self, mock_get_chat_result):
large_input = "Test input " * 1000
class MockBaseModel(BaseModel):
field: str = "value"
objects: list[dict] = [{"field": "value"}]
mock_get_chat_result.return_value = MockBaseModel(field="value")
def model_dump(self, **__):
return {"objects": self.objects}
mock_get_chat_result.return_value = MockBaseModel()
component = StructuredOutputComponent(
llm=MockLanguageModel(),
@ -194,9 +223,10 @@ class TestStructuredOutputComponent:
schema_name="LargeInputSchema",
output_schema=[{"name": "field", "type": "str", "description": "A test field"}],
multiple=False,
system_prompt="Test system prompt",
)
result = component.build_structured_output()
assert isinstance(result, Data)
assert result.data == {"field": "value"}
result = component.build_structured_output_base()
assert isinstance(result, list)
assert result == [{"field": "value"}]
mock_get_chat_result.assert_called_once()