feat: update Structured output to handle Dataframe and inbuilt Prompt (#6642)
* Structured Output * update * [autofix.ci] apply automated fixes * updates * [autofix.ci] apply automated fixes * [autofix.ci] apply automated fixes (attempt 2/3) * Update structured_output.py * [autofix.ci] apply automated fixes * [autofix.ci] apply automated fixes * update in Templates and added inline edit to the component table inputs * format fix * [autofix.ci] apply automated fixes * [autofix.ci] apply automated fixes (attempt 2/3) * Update Financial Report Parser.json * Update Portfolio Website Code Generator.json * update as per review * [autofix.ci] apply automated fixes * update to templates * fix breaking change * lint and format error fix * [autofix.ci] apply automated fixes * [autofix.ci] apply automated fixes (attempt 2/3) * [autofix.ci] apply automated fixes * updated file --------- Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
This commit is contained in:
parent
a4355caabe
commit
18df0e8637
4 changed files with 1258 additions and 1476 deletions
|
|
@ -5,8 +5,10 @@ from pydantic import BaseModel, Field, create_model
|
|||
from langflow.base.models.chat_result import get_chat_result
|
||||
from langflow.custom import Component
|
||||
from langflow.helpers.base_model import build_model_from_schema
|
||||
from langflow.io import BoolInput, HandleInput, MessageTextInput, Output, StrInput, TableInput
|
||||
from langflow.io import BoolInput, HandleInput, MessageTextInput, MultilineInput, Output, TableInput
|
||||
from langflow.schema.data import Data
|
||||
from langflow.schema.dataframe import DataFrame
|
||||
from langflow.schema.table import EditMode
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from langflow.field_typing.constants import LanguageModel
|
||||
|
|
@ -36,7 +38,24 @@ class StructuredOutputComponent(Component):
|
|||
tool_mode=True,
|
||||
required=True,
|
||||
),
|
||||
StrInput(
|
||||
MultilineInput(
|
||||
name="system_prompt",
|
||||
display_name="Format Instructions",
|
||||
info="The instructions to the language model for formatting the output.",
|
||||
value=(
|
||||
"You are an AI system designed to extract structured information from unstructured text."
|
||||
"Given the input_text, return a JSON object with predefined keys based on the expected structure."
|
||||
"Extract values accurately and format them according to the specified type "
|
||||
"(e.g., string, integer, float, date)."
|
||||
"If a value is missing or cannot be determined, return a default "
|
||||
"(e.g., null, 0, or 'N/A')."
|
||||
"If multiple instances of the expected structure exist within the input_text, "
|
||||
"stream each as a separate JSON object."
|
||||
),
|
||||
required=True,
|
||||
advanced=True,
|
||||
),
|
||||
MessageTextInput(
|
||||
name="schema_name",
|
||||
display_name="Schema Name",
|
||||
info="Provide a name for the output data schema.",
|
||||
|
|
@ -47,6 +66,7 @@ class StructuredOutputComponent(Component):
|
|||
display_name="Output Schema",
|
||||
info="Define the structure and data types for the model's output.",
|
||||
required=True,
|
||||
# TODO: remove deault value
|
||||
table_schema=[
|
||||
{
|
||||
"name": "name",
|
||||
|
|
@ -54,6 +74,7 @@ class StructuredOutputComponent(Component):
|
|||
"type": "str",
|
||||
"description": "Specify the name of the output field.",
|
||||
"default": "field",
|
||||
"edit_mode": EditMode.INLINE,
|
||||
},
|
||||
{
|
||||
"name": "description",
|
||||
|
|
@ -61,11 +82,13 @@ class StructuredOutputComponent(Component):
|
|||
"type": "str",
|
||||
"description": "Describe the purpose of the output field.",
|
||||
"default": "description of field",
|
||||
"edit_mode": EditMode.POPOVER,
|
||||
},
|
||||
{
|
||||
"name": "type",
|
||||
"display_name": "Type",
|
||||
"type": "str",
|
||||
"edit_mode": EditMode.INLINE,
|
||||
"description": (
|
||||
"Indicate the data type of the output field (e.g., str, int, float, bool, list, dict)."
|
||||
),
|
||||
|
|
@ -77,6 +100,7 @@ class StructuredOutputComponent(Component):
|
|||
"type": "boolean",
|
||||
"description": "Set to True if this output field should be a list of the specified type.",
|
||||
"default": "False",
|
||||
"edit_mode": EditMode.INLINE,
|
||||
},
|
||||
],
|
||||
value=[{"name": "field", "description": "description of field", "type": "text", "multiple": "False"}],
|
||||
|
|
@ -85,15 +109,17 @@ class StructuredOutputComponent(Component):
|
|||
name="multiple",
|
||||
advanced=True,
|
||||
display_name="Generate Multiple",
|
||||
info="Set to True if the model should generate a list of outputs instead of a single output.",
|
||||
info="[Deplrecated] Always set to True",
|
||||
value=True,
|
||||
),
|
||||
]
|
||||
|
||||
outputs = [
|
||||
Output(name="structured_output", display_name="Structured Output", method="build_structured_output"),
|
||||
Output(name="structured_output_dataframe", display_name="DataFrame", method="as_dataframe"),
|
||||
]
|
||||
|
||||
def build_structured_output(self) -> Data:
|
||||
def build_structured_output_base(self) -> Data:
|
||||
schema_name = self.schema_name or "OutputModel"
|
||||
|
||||
if not hasattr(self.llm, "with_structured_output"):
|
||||
|
|
@ -104,13 +130,12 @@ class StructuredOutputComponent(Component):
|
|||
raise ValueError(msg)
|
||||
|
||||
output_model_ = build_model_from_schema(self.output_schema)
|
||||
if self.multiple:
|
||||
output_model = create_model(
|
||||
schema_name,
|
||||
objects=(list[output_model_], Field(description=f"A list of {schema_name}.")), # type: ignore[valid-type]
|
||||
)
|
||||
else:
|
||||
output_model = output_model_
|
||||
|
||||
output_model = create_model(
|
||||
schema_name,
|
||||
objects=(list[output_model_], Field(description=f"A list of {schema_name}.")), # type: ignore[valid-type]
|
||||
)
|
||||
|
||||
try:
|
||||
llm_with_structured_output = cast("LanguageModel", self.llm).with_structured_output(schema=output_model) # type: ignore[valid-type, attr-defined]
|
||||
|
||||
|
|
@ -122,10 +147,25 @@ class StructuredOutputComponent(Component):
|
|||
"project_name": self.get_project_name(),
|
||||
"callbacks": self.get_langchain_callbacks(),
|
||||
}
|
||||
output = get_chat_result(runnable=llm_with_structured_output, input_value=self.input_value, config=config_dict)
|
||||
if isinstance(output, BaseModel):
|
||||
output_dict = output.model_dump()
|
||||
else:
|
||||
msg = f"Output should be a Pydantic BaseModel, got {type(output)} ({output})"
|
||||
raise TypeError(msg)
|
||||
return Data(data=output_dict)
|
||||
result = get_chat_result(
|
||||
runnable=llm_with_structured_output,
|
||||
system_message=self.system_prompt,
|
||||
input_value=self.input_value,
|
||||
config=config_dict,
|
||||
)
|
||||
if isinstance(result, BaseModel):
|
||||
result = result.model_dump()
|
||||
if "objects" in result:
|
||||
return result["objects"]
|
||||
return result
|
||||
|
||||
def build_structured_output(self) -> Data:
|
||||
output = self.build_structured_output_base()
|
||||
|
||||
return Data(results=output)
|
||||
|
||||
def as_dataframe(self) -> DataFrame:
|
||||
output = self.build_structured_output_base()
|
||||
if isinstance(output, list):
|
||||
return DataFrame(data=output)
|
||||
return DataFrame(data=[output])
|
||||
|
|
|
|||
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
|
@ -5,18 +5,39 @@ import pytest
|
|||
from langflow.components.helpers.structured_output import StructuredOutputComponent
|
||||
from langflow.helpers.base_model import build_model_from_schema
|
||||
from langflow.inputs.inputs import TableInput
|
||||
from langflow.schema.data import Data
|
||||
from pydantic import BaseModel
|
||||
|
||||
from tests.base import ComponentTestBaseWithoutClient
|
||||
from tests.unit.mock_language_model import MockLanguageModel
|
||||
|
||||
|
||||
class TestStructuredOutputComponent:
|
||||
class TestStructuredOutputComponent(ComponentTestBaseWithoutClient):
|
||||
@pytest.fixture
|
||||
def component_class(self):
|
||||
"""Return the component class to test."""
|
||||
return StructuredOutputComponent
|
||||
|
||||
@pytest.fixture
|
||||
def default_kwargs(self):
|
||||
"""Return the default kwargs for the component."""
|
||||
return {
|
||||
"llm": MockLanguageModel(),
|
||||
"input_value": "Test input",
|
||||
"schema_name": "TestSchema",
|
||||
"output_schema": [{"name": "field", "type": "str", "description": "A test field"}],
|
||||
"multiple": False,
|
||||
"system_prompt": "Test system prompt",
|
||||
}
|
||||
|
||||
@pytest.fixture
|
||||
def file_names_mapping(self):
|
||||
"""Return the file names mapping for version-specific files."""
|
||||
|
||||
def test_successful_structured_output_generation_with_patch_with_config(self):
|
||||
def mock_get_chat_result(runnable, input_value, config): # noqa: ARG001
|
||||
def mock_get_chat_result(runnable, system_message, input_value, config): # noqa: ARG001
|
||||
class MockBaseModel(BaseModel):
|
||||
def model_dump(self, **kwargs): # noqa: ARG002
|
||||
return {"field": "value"}
|
||||
def model_dump(self, **__):
|
||||
return {"objects": [{"field": "value"}]}
|
||||
|
||||
return MockBaseModel()
|
||||
|
||||
|
|
@ -26,12 +47,13 @@ class TestStructuredOutputComponent:
|
|||
schema_name="TestSchema",
|
||||
output_schema=[{"name": "field", "type": "str", "description": "A test field"}],
|
||||
multiple=False,
|
||||
system_prompt="Test system prompt",
|
||||
)
|
||||
|
||||
with patch("langflow.components.helpers.structured_output.get_chat_result", mock_get_chat_result):
|
||||
result = component.build_structured_output()
|
||||
assert isinstance(result, Data)
|
||||
assert result.data == {"field": "value"}
|
||||
result = component.build_structured_output_base()
|
||||
assert isinstance(result, list)
|
||||
assert result == [{"field": "value"}]
|
||||
|
||||
def test_raises_value_error_for_unsupported_language_model(self):
|
||||
# Mocking an incompatible language model
|
||||
|
|
@ -155,10 +177,13 @@ class TestStructuredOutputComponent:
|
|||
child: str = "value"
|
||||
|
||||
class ParentModel(BaseModel):
|
||||
parent: ChildModel = ChildModel()
|
||||
objects: list[dict] = [{"parent": {"child": "value"}}]
|
||||
|
||||
def model_dump(self, **__):
|
||||
return {"objects": self.objects}
|
||||
|
||||
mock_llm = MockLanguageModel()
|
||||
mock_get_chat_result.return_value = ParentModel(parent=ChildModel(child="value"))
|
||||
mock_get_chat_result.return_value = ParentModel()
|
||||
|
||||
component = StructuredOutputComponent(
|
||||
llm=mock_llm,
|
||||
|
|
@ -173,20 +198,24 @@ class TestStructuredOutputComponent:
|
|||
}
|
||||
],
|
||||
multiple=False,
|
||||
system_prompt="Test system prompt",
|
||||
)
|
||||
|
||||
result = component.build_structured_output()
|
||||
assert isinstance(result, Data)
|
||||
assert result.data == {"parent": {"child": "value"}}
|
||||
result = component.build_structured_output_base()
|
||||
assert isinstance(result, list)
|
||||
assert result == [{"parent": {"child": "value"}}]
|
||||
|
||||
@patch("langflow.components.helpers.structured_output.get_chat_result")
|
||||
def test_large_input_value(self, mock_get_chat_result):
|
||||
large_input = "Test input " * 1000
|
||||
|
||||
class MockBaseModel(BaseModel):
|
||||
field: str = "value"
|
||||
objects: list[dict] = [{"field": "value"}]
|
||||
|
||||
mock_get_chat_result.return_value = MockBaseModel(field="value")
|
||||
def model_dump(self, **__):
|
||||
return {"objects": self.objects}
|
||||
|
||||
mock_get_chat_result.return_value = MockBaseModel()
|
||||
|
||||
component = StructuredOutputComponent(
|
||||
llm=MockLanguageModel(),
|
||||
|
|
@ -194,9 +223,10 @@ class TestStructuredOutputComponent:
|
|||
schema_name="LargeInputSchema",
|
||||
output_schema=[{"name": "field", "type": "str", "description": "A test field"}],
|
||||
multiple=False,
|
||||
system_prompt="Test system prompt",
|
||||
)
|
||||
|
||||
result = component.build_structured_output()
|
||||
assert isinstance(result, Data)
|
||||
assert result.data == {"field": "value"}
|
||||
result = component.build_structured_output_base()
|
||||
assert isinstance(result, list)
|
||||
assert result == [{"field": "value"}]
|
||||
mock_get_chat_result.assert_called_once()
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue