diff --git a/src/backend/base/langflow/components/processing/structured_output.py b/src/backend/base/langflow/components/processing/structured_output.py index 27112cc52..9c3bc73fd 100644 --- a/src/backend/base/langflow/components/processing/structured_output.py +++ b/src/backend/base/langflow/components/processing/structured_output.py @@ -41,14 +41,13 @@ class StructuredOutputComponent(Component): display_name="Format Instructions", info="The instructions to the language model for formatting the output.", value=( - "You are an AI system designed to extract structured information from unstructured text." - "Given the input_text, return a JSON object with predefined keys based on the expected structure." - "Extract values accurately and format them according to the specified type " - "(e.g., string, integer, float, date)." - "If a value is missing or cannot be determined, return a default " - "(e.g., null, 0, or 'N/A')." - "If multiple instances of the expected structure exist within the input_text, " - "stream each as a separate JSON object." + "You are an AI that extracts one structured JSON object from unstructured text. " + "Use a predefined schema with expected types (str, int, float, bool, dict). " + "If multiple structures exist, extract only the first most complete one. " + "Fill missing or ambiguous values with defaults: null for missing values. " + "Ignore duplicates and partial repeats. " + "Always return one valid JSON, never throw errors or return multiple objects." + "Output: A single well-formed JSON object, and nothing else." ), required=True, advanced=True, diff --git a/src/backend/base/langflow/initial_setup/starter_projects/Financial Report Parser.json b/src/backend/base/langflow/initial_setup/starter_projects/Financial Report Parser.json index 1db1b4d83..ac92da745 100644 --- a/src/backend/base/langflow/initial_setup/starter_projects/Financial Report Parser.json +++ b/src/backend/base/langflow/initial_setup/starter_projects/Financial Report Parser.json @@ -1254,7 +1254,7 @@ "show": true, "title_case": false, "type": "code", - "value": "from pydantic import BaseModel, Field, create_model\nfrom trustcall import create_extractor\n\nfrom langflow.base.models.chat_result import get_chat_result\nfrom langflow.custom.custom_component.component import Component\nfrom langflow.helpers.base_model import build_model_from_schema\nfrom langflow.io import (\n HandleInput,\n MessageTextInput,\n MultilineInput,\n Output,\n TableInput,\n)\nfrom langflow.schema.data import Data\nfrom langflow.schema.table import EditMode\n\n\nclass StructuredOutputComponent(Component):\n display_name = \"Structured Output\"\n description = \"Uses an LLM to generate structured data. Ideal for extraction and consistency.\"\n name = \"StructuredOutput\"\n icon = \"braces\"\n\n inputs = [\n HandleInput(\n name=\"llm\",\n display_name=\"Language Model\",\n info=\"The language model to use to generate the structured output.\",\n input_types=[\"LanguageModel\"],\n required=True,\n ),\n MultilineInput(\n name=\"input_value\",\n display_name=\"Input Message\",\n info=\"The input message to the language model.\",\n tool_mode=True,\n required=True,\n ),\n MultilineInput(\n name=\"system_prompt\",\n display_name=\"Format Instructions\",\n info=\"The instructions to the language model for formatting the output.\",\n value=(\n \"You are an AI system designed to extract structured information from unstructured text.\"\n \"Given the input_text, return a JSON object with predefined keys based on the expected structure.\"\n \"Extract values accurately and format them according to the specified type \"\n \"(e.g., string, integer, float, date).\"\n \"If a value is missing or cannot be determined, return a default \"\n \"(e.g., null, 0, or 'N/A').\"\n \"If multiple instances of the expected structure exist within the input_text, \"\n \"stream each as a separate JSON object.\"\n ),\n required=True,\n advanced=True,\n ),\n MessageTextInput(\n name=\"schema_name\",\n display_name=\"Schema Name\",\n info=\"Provide a name for the output data schema.\",\n advanced=True,\n ),\n TableInput(\n name=\"output_schema\",\n display_name=\"Output Schema\",\n info=\"Define the structure and data types for the model's output.\",\n required=True,\n # TODO: remove deault value\n table_schema=[\n {\n \"name\": \"name\",\n \"display_name\": \"Name\",\n \"type\": \"str\",\n \"description\": \"Specify the name of the output field.\",\n \"default\": \"field\",\n \"edit_mode\": EditMode.INLINE,\n },\n {\n \"name\": \"description\",\n \"display_name\": \"Description\",\n \"type\": \"str\",\n \"description\": \"Describe the purpose of the output field.\",\n \"default\": \"description of field\",\n \"edit_mode\": EditMode.POPOVER,\n },\n {\n \"name\": \"type\",\n \"display_name\": \"Type\",\n \"type\": \"str\",\n \"edit_mode\": EditMode.INLINE,\n \"description\": (\"Indicate the data type of the output field (e.g., str, int, float, bool, dict).\"),\n \"options\": [\"str\", \"int\", \"float\", \"bool\", \"dict\"],\n \"default\": \"str\",\n },\n {\n \"name\": \"multiple\",\n \"display_name\": \"As List\",\n \"type\": \"boolean\",\n \"description\": \"Set to True if this output field should be a list of the specified type.\",\n \"default\": \"False\",\n \"edit_mode\": EditMode.INLINE,\n },\n ],\n value=[\n {\n \"name\": \"field\",\n \"description\": \"description of field\",\n \"type\": \"str\",\n \"multiple\": \"False\",\n }\n ],\n ),\n ]\n\n outputs = [\n Output(\n name=\"structured_output\",\n display_name=\"Structured Output\",\n method=\"build_structured_output\",\n ),\n ]\n\n def build_structured_output_base(self):\n schema_name = self.schema_name or \"OutputModel\"\n\n if not hasattr(self.llm, \"with_structured_output\"):\n msg = \"Language model does not support structured output.\"\n raise TypeError(msg)\n if not self.output_schema:\n msg = \"Output schema cannot be empty\"\n raise ValueError(msg)\n\n output_model_ = build_model_from_schema(self.output_schema)\n\n output_model = create_model(\n schema_name,\n __doc__=f\"A list of {schema_name}.\",\n objects=(list[output_model_], Field(description=f\"A list of {schema_name}.\")), # type: ignore[valid-type]\n )\n\n try:\n llm_with_structured_output = create_extractor(self.llm, tools=[output_model])\n except NotImplementedError as exc:\n msg = f\"{self.llm.__class__.__name__} does not support structured output.\"\n raise TypeError(msg) from exc\n\n config_dict = {\n \"run_name\": self.display_name,\n \"project_name\": self.get_project_name(),\n \"callbacks\": self.get_langchain_callbacks(),\n }\n result = get_chat_result(\n runnable=llm_with_structured_output,\n system_message=self.system_prompt,\n input_value=self.input_value,\n config=config_dict,\n )\n\n # OPTIMIZATION NOTE: Simplified processing based on trustcall response structure\n # Handle non-dict responses (shouldn't happen with trustcall, but defensive)\n if not isinstance(result, dict):\n return result\n\n # Extract first response and convert BaseModel to dict\n responses = result.get(\"responses\", [])\n if not responses:\n return result\n\n # Convert BaseModel to dict (creates the \"objects\" key)\n first_response = responses[0]\n structured_data = first_response.model_dump() if isinstance(first_response, BaseModel) else first_response\n\n # Extract the objects array (guaranteed to exist due to our Pydantic model structure)\n return structured_data.get(\"objects\", structured_data)\n\n def build_structured_output(self) -> Data:\n output = self.build_structured_output_base()\n if not isinstance(output, list) or not output:\n # handle empty or unexpected type case\n msg = \"No structured output returned\"\n raise ValueError(msg)\n if len(output) != 1:\n msg = \"Multiple structured outputs returned\"\n raise ValueError(msg)\n return Data(data=output[0])\n" + "value": "from pydantic import BaseModel, Field, create_model\nfrom trustcall import create_extractor\n\nfrom langflow.base.models.chat_result import get_chat_result\nfrom langflow.custom.custom_component.component import Component\nfrom langflow.helpers.base_model import build_model_from_schema\nfrom langflow.io import (\n HandleInput,\n MessageTextInput,\n MultilineInput,\n Output,\n TableInput,\n)\nfrom langflow.schema.data import Data\nfrom langflow.schema.table import EditMode\n\n\nclass StructuredOutputComponent(Component):\n display_name = \"Structured Output\"\n description = \"Uses an LLM to generate structured data. Ideal for extraction and consistency.\"\n name = \"StructuredOutput\"\n icon = \"braces\"\n\n inputs = [\n HandleInput(\n name=\"llm\",\n display_name=\"Language Model\",\n info=\"The language model to use to generate the structured output.\",\n input_types=[\"LanguageModel\"],\n required=True,\n ),\n MultilineInput(\n name=\"input_value\",\n display_name=\"Input Message\",\n info=\"The input message to the language model.\",\n tool_mode=True,\n required=True,\n ),\n MultilineInput(\n name=\"system_prompt\",\n display_name=\"Format Instructions\",\n info=\"The instructions to the language model for formatting the output.\",\n value=(\n \"You are an AI that extracts one structured JSON object from unstructured text. \"\n \"Use a predefined schema with expected types (str, int, float, bool, dict). \"\n \"If multiple structures exist, extract only the first most complete one. \"\n \"Fill missing or ambiguous values with defaults: null for missing values. \"\n \"Ignore duplicates and partial repeats. \"\n \"Always return one valid JSON, never throw errors or return multiple objects.\"\n \"Output: A single well-formed JSON object, and nothing else.\"\n ),\n required=True,\n advanced=True,\n ),\n MessageTextInput(\n name=\"schema_name\",\n display_name=\"Schema Name\",\n info=\"Provide a name for the output data schema.\",\n advanced=True,\n ),\n TableInput(\n name=\"output_schema\",\n display_name=\"Output Schema\",\n info=\"Define the structure and data types for the model's output.\",\n required=True,\n # TODO: remove deault value\n table_schema=[\n {\n \"name\": \"name\",\n \"display_name\": \"Name\",\n \"type\": \"str\",\n \"description\": \"Specify the name of the output field.\",\n \"default\": \"field\",\n \"edit_mode\": EditMode.INLINE,\n },\n {\n \"name\": \"description\",\n \"display_name\": \"Description\",\n \"type\": \"str\",\n \"description\": \"Describe the purpose of the output field.\",\n \"default\": \"description of field\",\n \"edit_mode\": EditMode.POPOVER,\n },\n {\n \"name\": \"type\",\n \"display_name\": \"Type\",\n \"type\": \"str\",\n \"edit_mode\": EditMode.INLINE,\n \"description\": (\"Indicate the data type of the output field (e.g., str, int, float, bool, dict).\"),\n \"options\": [\"str\", \"int\", \"float\", \"bool\", \"dict\"],\n \"default\": \"str\",\n },\n {\n \"name\": \"multiple\",\n \"display_name\": \"As List\",\n \"type\": \"boolean\",\n \"description\": \"Set to True if this output field should be a list of the specified type.\",\n \"default\": \"False\",\n \"edit_mode\": EditMode.INLINE,\n },\n ],\n value=[\n {\n \"name\": \"field\",\n \"description\": \"description of field\",\n \"type\": \"str\",\n \"multiple\": \"False\",\n }\n ],\n ),\n ]\n\n outputs = [\n Output(\n name=\"structured_output\",\n display_name=\"Structured Output\",\n method=\"build_structured_output\",\n ),\n ]\n\n def build_structured_output_base(self):\n schema_name = self.schema_name or \"OutputModel\"\n\n if not hasattr(self.llm, \"with_structured_output\"):\n msg = \"Language model does not support structured output.\"\n raise TypeError(msg)\n if not self.output_schema:\n msg = \"Output schema cannot be empty\"\n raise ValueError(msg)\n\n output_model_ = build_model_from_schema(self.output_schema)\n\n output_model = create_model(\n schema_name,\n __doc__=f\"A list of {schema_name}.\",\n objects=(list[output_model_], Field(description=f\"A list of {schema_name}.\")), # type: ignore[valid-type]\n )\n\n try:\n llm_with_structured_output = create_extractor(self.llm, tools=[output_model])\n except NotImplementedError as exc:\n msg = f\"{self.llm.__class__.__name__} does not support structured output.\"\n raise TypeError(msg) from exc\n\n config_dict = {\n \"run_name\": self.display_name,\n \"project_name\": self.get_project_name(),\n \"callbacks\": self.get_langchain_callbacks(),\n }\n result = get_chat_result(\n runnable=llm_with_structured_output,\n system_message=self.system_prompt,\n input_value=self.input_value,\n config=config_dict,\n )\n\n # OPTIMIZATION NOTE: Simplified processing based on trustcall response structure\n # Handle non-dict responses (shouldn't happen with trustcall, but defensive)\n if not isinstance(result, dict):\n return result\n\n # Extract first response and convert BaseModel to dict\n responses = result.get(\"responses\", [])\n if not responses:\n return result\n\n # Convert BaseModel to dict (creates the \"objects\" key)\n first_response = responses[0]\n structured_data = first_response.model_dump() if isinstance(first_response, BaseModel) else first_response\n\n # Extract the objects array (guaranteed to exist due to our Pydantic model structure)\n return structured_data.get(\"objects\", structured_data)\n\n def build_structured_output(self) -> Data:\n output = self.build_structured_output_base()\n if not isinstance(output, list) or not output:\n # handle empty or unexpected type case\n msg = \"No structured output returned\"\n raise ValueError(msg)\n if len(output) != 1:\n msg = \"Multiple structured outputs returned\"\n raise ValueError(msg)\n return Data(data=output[0])\n" }, "input_value": { "_input_type": "MessageTextInput", diff --git a/src/backend/base/langflow/initial_setup/starter_projects/Hybrid Search RAG.json b/src/backend/base/langflow/initial_setup/starter_projects/Hybrid Search RAG.json index c53da20a9..c43a639f8 100644 --- a/src/backend/base/langflow/initial_setup/starter_projects/Hybrid Search RAG.json +++ b/src/backend/base/langflow/initial_setup/starter_projects/Hybrid Search RAG.json @@ -609,7 +609,7 @@ "show": true, "title_case": false, "type": "code", - "value": "from pydantic import BaseModel, Field, create_model\nfrom trustcall import create_extractor\n\nfrom langflow.base.models.chat_result import get_chat_result\nfrom langflow.custom.custom_component.component import Component\nfrom langflow.helpers.base_model import build_model_from_schema\nfrom langflow.io import (\n HandleInput,\n MessageTextInput,\n MultilineInput,\n Output,\n TableInput,\n)\nfrom langflow.schema.data import Data\nfrom langflow.schema.table import EditMode\n\n\nclass StructuredOutputComponent(Component):\n display_name = \"Structured Output\"\n description = \"Uses an LLM to generate structured data. Ideal for extraction and consistency.\"\n name = \"StructuredOutput\"\n icon = \"braces\"\n\n inputs = [\n HandleInput(\n name=\"llm\",\n display_name=\"Language Model\",\n info=\"The language model to use to generate the structured output.\",\n input_types=[\"LanguageModel\"],\n required=True,\n ),\n MultilineInput(\n name=\"input_value\",\n display_name=\"Input Message\",\n info=\"The input message to the language model.\",\n tool_mode=True,\n required=True,\n ),\n MultilineInput(\n name=\"system_prompt\",\n display_name=\"Format Instructions\",\n info=\"The instructions to the language model for formatting the output.\",\n value=(\n \"You are an AI system designed to extract structured information from unstructured text.\"\n \"Given the input_text, return a JSON object with predefined keys based on the expected structure.\"\n \"Extract values accurately and format them according to the specified type \"\n \"(e.g., string, integer, float, date).\"\n \"If a value is missing or cannot be determined, return a default \"\n \"(e.g., null, 0, or 'N/A').\"\n \"If multiple instances of the expected structure exist within the input_text, \"\n \"stream each as a separate JSON object.\"\n ),\n required=True,\n advanced=True,\n ),\n MessageTextInput(\n name=\"schema_name\",\n display_name=\"Schema Name\",\n info=\"Provide a name for the output data schema.\",\n advanced=True,\n ),\n TableInput(\n name=\"output_schema\",\n display_name=\"Output Schema\",\n info=\"Define the structure and data types for the model's output.\",\n required=True,\n # TODO: remove deault value\n table_schema=[\n {\n \"name\": \"name\",\n \"display_name\": \"Name\",\n \"type\": \"str\",\n \"description\": \"Specify the name of the output field.\",\n \"default\": \"field\",\n \"edit_mode\": EditMode.INLINE,\n },\n {\n \"name\": \"description\",\n \"display_name\": \"Description\",\n \"type\": \"str\",\n \"description\": \"Describe the purpose of the output field.\",\n \"default\": \"description of field\",\n \"edit_mode\": EditMode.POPOVER,\n },\n {\n \"name\": \"type\",\n \"display_name\": \"Type\",\n \"type\": \"str\",\n \"edit_mode\": EditMode.INLINE,\n \"description\": (\"Indicate the data type of the output field (e.g., str, int, float, bool, dict).\"),\n \"options\": [\"str\", \"int\", \"float\", \"bool\", \"dict\"],\n \"default\": \"str\",\n },\n {\n \"name\": \"multiple\",\n \"display_name\": \"As List\",\n \"type\": \"boolean\",\n \"description\": \"Set to True if this output field should be a list of the specified type.\",\n \"default\": \"False\",\n \"edit_mode\": EditMode.INLINE,\n },\n ],\n value=[\n {\n \"name\": \"field\",\n \"description\": \"description of field\",\n \"type\": \"str\",\n \"multiple\": \"False\",\n }\n ],\n ),\n ]\n\n outputs = [\n Output(\n name=\"structured_output\",\n display_name=\"Structured Output\",\n method=\"build_structured_output\",\n ),\n ]\n\n def build_structured_output_base(self):\n schema_name = self.schema_name or \"OutputModel\"\n\n if not hasattr(self.llm, \"with_structured_output\"):\n msg = \"Language model does not support structured output.\"\n raise TypeError(msg)\n if not self.output_schema:\n msg = \"Output schema cannot be empty\"\n raise ValueError(msg)\n\n output_model_ = build_model_from_schema(self.output_schema)\n\n output_model = create_model(\n schema_name,\n __doc__=f\"A list of {schema_name}.\",\n objects=(list[output_model_], Field(description=f\"A list of {schema_name}.\")), # type: ignore[valid-type]\n )\n\n try:\n llm_with_structured_output = create_extractor(self.llm, tools=[output_model])\n except NotImplementedError as exc:\n msg = f\"{self.llm.__class__.__name__} does not support structured output.\"\n raise TypeError(msg) from exc\n\n config_dict = {\n \"run_name\": self.display_name,\n \"project_name\": self.get_project_name(),\n \"callbacks\": self.get_langchain_callbacks(),\n }\n result = get_chat_result(\n runnable=llm_with_structured_output,\n system_message=self.system_prompt,\n input_value=self.input_value,\n config=config_dict,\n )\n\n # OPTIMIZATION NOTE: Simplified processing based on trustcall response structure\n # Handle non-dict responses (shouldn't happen with trustcall, but defensive)\n if not isinstance(result, dict):\n return result\n\n # Extract first response and convert BaseModel to dict\n responses = result.get(\"responses\", [])\n if not responses:\n return result\n\n # Convert BaseModel to dict (creates the \"objects\" key)\n first_response = responses[0]\n structured_data = first_response.model_dump() if isinstance(first_response, BaseModel) else first_response\n\n # Extract the objects array (guaranteed to exist due to our Pydantic model structure)\n return structured_data.get(\"objects\", structured_data)\n\n def build_structured_output(self) -> Data:\n output = self.build_structured_output_base()\n if not isinstance(output, list) or not output:\n # handle empty or unexpected type case\n msg = \"No structured output returned\"\n raise ValueError(msg)\n if len(output) != 1:\n msg = \"Multiple structured outputs returned\"\n raise ValueError(msg)\n return Data(data=output[0])\n" + "value": "from pydantic import BaseModel, Field, create_model\nfrom trustcall import create_extractor\n\nfrom langflow.base.models.chat_result import get_chat_result\nfrom langflow.custom.custom_component.component import Component\nfrom langflow.helpers.base_model import build_model_from_schema\nfrom langflow.io import (\n HandleInput,\n MessageTextInput,\n MultilineInput,\n Output,\n TableInput,\n)\nfrom langflow.schema.data import Data\nfrom langflow.schema.table import EditMode\n\n\nclass StructuredOutputComponent(Component):\n display_name = \"Structured Output\"\n description = \"Uses an LLM to generate structured data. Ideal for extraction and consistency.\"\n name = \"StructuredOutput\"\n icon = \"braces\"\n\n inputs = [\n HandleInput(\n name=\"llm\",\n display_name=\"Language Model\",\n info=\"The language model to use to generate the structured output.\",\n input_types=[\"LanguageModel\"],\n required=True,\n ),\n MultilineInput(\n name=\"input_value\",\n display_name=\"Input Message\",\n info=\"The input message to the language model.\",\n tool_mode=True,\n required=True,\n ),\n MultilineInput(\n name=\"system_prompt\",\n display_name=\"Format Instructions\",\n info=\"The instructions to the language model for formatting the output.\",\n value=(\n \"You are an AI that extracts one structured JSON object from unstructured text. \"\n \"Use a predefined schema with expected types (str, int, float, bool, dict). \"\n \"If multiple structures exist, extract only the first most complete one. \"\n \"Fill missing or ambiguous values with defaults: null for missing values. \"\n \"Ignore duplicates and partial repeats. \"\n \"Always return one valid JSON, never throw errors or return multiple objects.\"\n \"Output: A single well-formed JSON object, and nothing else.\"\n ),\n required=True,\n advanced=True,\n ),\n MessageTextInput(\n name=\"schema_name\",\n display_name=\"Schema Name\",\n info=\"Provide a name for the output data schema.\",\n advanced=True,\n ),\n TableInput(\n name=\"output_schema\",\n display_name=\"Output Schema\",\n info=\"Define the structure and data types for the model's output.\",\n required=True,\n # TODO: remove deault value\n table_schema=[\n {\n \"name\": \"name\",\n \"display_name\": \"Name\",\n \"type\": \"str\",\n \"description\": \"Specify the name of the output field.\",\n \"default\": \"field\",\n \"edit_mode\": EditMode.INLINE,\n },\n {\n \"name\": \"description\",\n \"display_name\": \"Description\",\n \"type\": \"str\",\n \"description\": \"Describe the purpose of the output field.\",\n \"default\": \"description of field\",\n \"edit_mode\": EditMode.POPOVER,\n },\n {\n \"name\": \"type\",\n \"display_name\": \"Type\",\n \"type\": \"str\",\n \"edit_mode\": EditMode.INLINE,\n \"description\": (\"Indicate the data type of the output field (e.g., str, int, float, bool, dict).\"),\n \"options\": [\"str\", \"int\", \"float\", \"bool\", \"dict\"],\n \"default\": \"str\",\n },\n {\n \"name\": \"multiple\",\n \"display_name\": \"As List\",\n \"type\": \"boolean\",\n \"description\": \"Set to True if this output field should be a list of the specified type.\",\n \"default\": \"False\",\n \"edit_mode\": EditMode.INLINE,\n },\n ],\n value=[\n {\n \"name\": \"field\",\n \"description\": \"description of field\",\n \"type\": \"str\",\n \"multiple\": \"False\",\n }\n ],\n ),\n ]\n\n outputs = [\n Output(\n name=\"structured_output\",\n display_name=\"Structured Output\",\n method=\"build_structured_output\",\n ),\n ]\n\n def build_structured_output_base(self):\n schema_name = self.schema_name or \"OutputModel\"\n\n if not hasattr(self.llm, \"with_structured_output\"):\n msg = \"Language model does not support structured output.\"\n raise TypeError(msg)\n if not self.output_schema:\n msg = \"Output schema cannot be empty\"\n raise ValueError(msg)\n\n output_model_ = build_model_from_schema(self.output_schema)\n\n output_model = create_model(\n schema_name,\n __doc__=f\"A list of {schema_name}.\",\n objects=(list[output_model_], Field(description=f\"A list of {schema_name}.\")), # type: ignore[valid-type]\n )\n\n try:\n llm_with_structured_output = create_extractor(self.llm, tools=[output_model])\n except NotImplementedError as exc:\n msg = f\"{self.llm.__class__.__name__} does not support structured output.\"\n raise TypeError(msg) from exc\n\n config_dict = {\n \"run_name\": self.display_name,\n \"project_name\": self.get_project_name(),\n \"callbacks\": self.get_langchain_callbacks(),\n }\n result = get_chat_result(\n runnable=llm_with_structured_output,\n system_message=self.system_prompt,\n input_value=self.input_value,\n config=config_dict,\n )\n\n # OPTIMIZATION NOTE: Simplified processing based on trustcall response structure\n # Handle non-dict responses (shouldn't happen with trustcall, but defensive)\n if not isinstance(result, dict):\n return result\n\n # Extract first response and convert BaseModel to dict\n responses = result.get(\"responses\", [])\n if not responses:\n return result\n\n # Convert BaseModel to dict (creates the \"objects\" key)\n first_response = responses[0]\n structured_data = first_response.model_dump() if isinstance(first_response, BaseModel) else first_response\n\n # Extract the objects array (guaranteed to exist due to our Pydantic model structure)\n return structured_data.get(\"objects\", structured_data)\n\n def build_structured_output(self) -> Data:\n output = self.build_structured_output_base()\n if not isinstance(output, list) or not output:\n # handle empty or unexpected type case\n msg = \"No structured output returned\"\n raise ValueError(msg)\n if len(output) != 1:\n msg = \"Multiple structured outputs returned\"\n raise ValueError(msg)\n return Data(data=output[0])\n" }, "input_value": { "_input_type": "MessageTextInput", diff --git a/src/backend/base/langflow/initial_setup/starter_projects/Image Sentiment Analysis.json b/src/backend/base/langflow/initial_setup/starter_projects/Image Sentiment Analysis.json index 97bd390a0..940d7efda 100644 --- a/src/backend/base/langflow/initial_setup/starter_projects/Image Sentiment Analysis.json +++ b/src/backend/base/langflow/initial_setup/starter_projects/Image Sentiment Analysis.json @@ -1009,7 +1009,7 @@ "show": true, "title_case": false, "type": "code", - "value": "from pydantic import BaseModel, Field, create_model\nfrom trustcall import create_extractor\n\nfrom langflow.base.models.chat_result import get_chat_result\nfrom langflow.custom.custom_component.component import Component\nfrom langflow.helpers.base_model import build_model_from_schema\nfrom langflow.io import (\n HandleInput,\n MessageTextInput,\n MultilineInput,\n Output,\n TableInput,\n)\nfrom langflow.schema.data import Data\nfrom langflow.schema.table import EditMode\n\n\nclass StructuredOutputComponent(Component):\n display_name = \"Structured Output\"\n description = \"Uses an LLM to generate structured data. Ideal for extraction and consistency.\"\n name = \"StructuredOutput\"\n icon = \"braces\"\n\n inputs = [\n HandleInput(\n name=\"llm\",\n display_name=\"Language Model\",\n info=\"The language model to use to generate the structured output.\",\n input_types=[\"LanguageModel\"],\n required=True,\n ),\n MultilineInput(\n name=\"input_value\",\n display_name=\"Input Message\",\n info=\"The input message to the language model.\",\n tool_mode=True,\n required=True,\n ),\n MultilineInput(\n name=\"system_prompt\",\n display_name=\"Format Instructions\",\n info=\"The instructions to the language model for formatting the output.\",\n value=(\n \"You are an AI system designed to extract structured information from unstructured text.\"\n \"Given the input_text, return a JSON object with predefined keys based on the expected structure.\"\n \"Extract values accurately and format them according to the specified type \"\n \"(e.g., string, integer, float, date).\"\n \"If a value is missing or cannot be determined, return a default \"\n \"(e.g., null, 0, or 'N/A').\"\n \"If multiple instances of the expected structure exist within the input_text, \"\n \"stream each as a separate JSON object.\"\n ),\n required=True,\n advanced=True,\n ),\n MessageTextInput(\n name=\"schema_name\",\n display_name=\"Schema Name\",\n info=\"Provide a name for the output data schema.\",\n advanced=True,\n ),\n TableInput(\n name=\"output_schema\",\n display_name=\"Output Schema\",\n info=\"Define the structure and data types for the model's output.\",\n required=True,\n # TODO: remove deault value\n table_schema=[\n {\n \"name\": \"name\",\n \"display_name\": \"Name\",\n \"type\": \"str\",\n \"description\": \"Specify the name of the output field.\",\n \"default\": \"field\",\n \"edit_mode\": EditMode.INLINE,\n },\n {\n \"name\": \"description\",\n \"display_name\": \"Description\",\n \"type\": \"str\",\n \"description\": \"Describe the purpose of the output field.\",\n \"default\": \"description of field\",\n \"edit_mode\": EditMode.POPOVER,\n },\n {\n \"name\": \"type\",\n \"display_name\": \"Type\",\n \"type\": \"str\",\n \"edit_mode\": EditMode.INLINE,\n \"description\": (\"Indicate the data type of the output field (e.g., str, int, float, bool, dict).\"),\n \"options\": [\"str\", \"int\", \"float\", \"bool\", \"dict\"],\n \"default\": \"str\",\n },\n {\n \"name\": \"multiple\",\n \"display_name\": \"As List\",\n \"type\": \"boolean\",\n \"description\": \"Set to True if this output field should be a list of the specified type.\",\n \"default\": \"False\",\n \"edit_mode\": EditMode.INLINE,\n },\n ],\n value=[\n {\n \"name\": \"field\",\n \"description\": \"description of field\",\n \"type\": \"str\",\n \"multiple\": \"False\",\n }\n ],\n ),\n ]\n\n outputs = [\n Output(\n name=\"structured_output\",\n display_name=\"Structured Output\",\n method=\"build_structured_output\",\n ),\n ]\n\n def build_structured_output_base(self):\n schema_name = self.schema_name or \"OutputModel\"\n\n if not hasattr(self.llm, \"with_structured_output\"):\n msg = \"Language model does not support structured output.\"\n raise TypeError(msg)\n if not self.output_schema:\n msg = \"Output schema cannot be empty\"\n raise ValueError(msg)\n\n output_model_ = build_model_from_schema(self.output_schema)\n\n output_model = create_model(\n schema_name,\n __doc__=f\"A list of {schema_name}.\",\n objects=(list[output_model_], Field(description=f\"A list of {schema_name}.\")), # type: ignore[valid-type]\n )\n\n try:\n llm_with_structured_output = create_extractor(self.llm, tools=[output_model])\n except NotImplementedError as exc:\n msg = f\"{self.llm.__class__.__name__} does not support structured output.\"\n raise TypeError(msg) from exc\n\n config_dict = {\n \"run_name\": self.display_name,\n \"project_name\": self.get_project_name(),\n \"callbacks\": self.get_langchain_callbacks(),\n }\n result = get_chat_result(\n runnable=llm_with_structured_output,\n system_message=self.system_prompt,\n input_value=self.input_value,\n config=config_dict,\n )\n\n # OPTIMIZATION NOTE: Simplified processing based on trustcall response structure\n # Handle non-dict responses (shouldn't happen with trustcall, but defensive)\n if not isinstance(result, dict):\n return result\n\n # Extract first response and convert BaseModel to dict\n responses = result.get(\"responses\", [])\n if not responses:\n return result\n\n # Convert BaseModel to dict (creates the \"objects\" key)\n first_response = responses[0]\n structured_data = first_response.model_dump() if isinstance(first_response, BaseModel) else first_response\n\n # Extract the objects array (guaranteed to exist due to our Pydantic model structure)\n return structured_data.get(\"objects\", structured_data)\n\n def build_structured_output(self) -> Data:\n output = self.build_structured_output_base()\n if not isinstance(output, list) or not output:\n # handle empty or unexpected type case\n msg = \"No structured output returned\"\n raise ValueError(msg)\n if len(output) != 1:\n msg = \"Multiple structured outputs returned\"\n raise ValueError(msg)\n return Data(data=output[0])\n" + "value": "from pydantic import BaseModel, Field, create_model\nfrom trustcall import create_extractor\n\nfrom langflow.base.models.chat_result import get_chat_result\nfrom langflow.custom.custom_component.component import Component\nfrom langflow.helpers.base_model import build_model_from_schema\nfrom langflow.io import (\n HandleInput,\n MessageTextInput,\n MultilineInput,\n Output,\n TableInput,\n)\nfrom langflow.schema.data import Data\nfrom langflow.schema.table import EditMode\n\n\nclass StructuredOutputComponent(Component):\n display_name = \"Structured Output\"\n description = \"Uses an LLM to generate structured data. Ideal for extraction and consistency.\"\n name = \"StructuredOutput\"\n icon = \"braces\"\n\n inputs = [\n HandleInput(\n name=\"llm\",\n display_name=\"Language Model\",\n info=\"The language model to use to generate the structured output.\",\n input_types=[\"LanguageModel\"],\n required=True,\n ),\n MultilineInput(\n name=\"input_value\",\n display_name=\"Input Message\",\n info=\"The input message to the language model.\",\n tool_mode=True,\n required=True,\n ),\n MultilineInput(\n name=\"system_prompt\",\n display_name=\"Format Instructions\",\n info=\"The instructions to the language model for formatting the output.\",\n value=(\n \"You are an AI that extracts one structured JSON object from unstructured text. \"\n \"Use a predefined schema with expected types (str, int, float, bool, dict). \"\n \"If multiple structures exist, extract only the first most complete one. \"\n \"Fill missing or ambiguous values with defaults: null for missing values. \"\n \"Ignore duplicates and partial repeats. \"\n \"Always return one valid JSON, never throw errors or return multiple objects.\"\n \"Output: A single well-formed JSON object, and nothing else.\"\n ),\n required=True,\n advanced=True,\n ),\n MessageTextInput(\n name=\"schema_name\",\n display_name=\"Schema Name\",\n info=\"Provide a name for the output data schema.\",\n advanced=True,\n ),\n TableInput(\n name=\"output_schema\",\n display_name=\"Output Schema\",\n info=\"Define the structure and data types for the model's output.\",\n required=True,\n # TODO: remove deault value\n table_schema=[\n {\n \"name\": \"name\",\n \"display_name\": \"Name\",\n \"type\": \"str\",\n \"description\": \"Specify the name of the output field.\",\n \"default\": \"field\",\n \"edit_mode\": EditMode.INLINE,\n },\n {\n \"name\": \"description\",\n \"display_name\": \"Description\",\n \"type\": \"str\",\n \"description\": \"Describe the purpose of the output field.\",\n \"default\": \"description of field\",\n \"edit_mode\": EditMode.POPOVER,\n },\n {\n \"name\": \"type\",\n \"display_name\": \"Type\",\n \"type\": \"str\",\n \"edit_mode\": EditMode.INLINE,\n \"description\": (\"Indicate the data type of the output field (e.g., str, int, float, bool, dict).\"),\n \"options\": [\"str\", \"int\", \"float\", \"bool\", \"dict\"],\n \"default\": \"str\",\n },\n {\n \"name\": \"multiple\",\n \"display_name\": \"As List\",\n \"type\": \"boolean\",\n \"description\": \"Set to True if this output field should be a list of the specified type.\",\n \"default\": \"False\",\n \"edit_mode\": EditMode.INLINE,\n },\n ],\n value=[\n {\n \"name\": \"field\",\n \"description\": \"description of field\",\n \"type\": \"str\",\n \"multiple\": \"False\",\n }\n ],\n ),\n ]\n\n outputs = [\n Output(\n name=\"structured_output\",\n display_name=\"Structured Output\",\n method=\"build_structured_output\",\n ),\n ]\n\n def build_structured_output_base(self):\n schema_name = self.schema_name or \"OutputModel\"\n\n if not hasattr(self.llm, \"with_structured_output\"):\n msg = \"Language model does not support structured output.\"\n raise TypeError(msg)\n if not self.output_schema:\n msg = \"Output schema cannot be empty\"\n raise ValueError(msg)\n\n output_model_ = build_model_from_schema(self.output_schema)\n\n output_model = create_model(\n schema_name,\n __doc__=f\"A list of {schema_name}.\",\n objects=(list[output_model_], Field(description=f\"A list of {schema_name}.\")), # type: ignore[valid-type]\n )\n\n try:\n llm_with_structured_output = create_extractor(self.llm, tools=[output_model])\n except NotImplementedError as exc:\n msg = f\"{self.llm.__class__.__name__} does not support structured output.\"\n raise TypeError(msg) from exc\n\n config_dict = {\n \"run_name\": self.display_name,\n \"project_name\": self.get_project_name(),\n \"callbacks\": self.get_langchain_callbacks(),\n }\n result = get_chat_result(\n runnable=llm_with_structured_output,\n system_message=self.system_prompt,\n input_value=self.input_value,\n config=config_dict,\n )\n\n # OPTIMIZATION NOTE: Simplified processing based on trustcall response structure\n # Handle non-dict responses (shouldn't happen with trustcall, but defensive)\n if not isinstance(result, dict):\n return result\n\n # Extract first response and convert BaseModel to dict\n responses = result.get(\"responses\", [])\n if not responses:\n return result\n\n # Convert BaseModel to dict (creates the \"objects\" key)\n first_response = responses[0]\n structured_data = first_response.model_dump() if isinstance(first_response, BaseModel) else first_response\n\n # Extract the objects array (guaranteed to exist due to our Pydantic model structure)\n return structured_data.get(\"objects\", structured_data)\n\n def build_structured_output(self) -> Data:\n output = self.build_structured_output_base()\n if not isinstance(output, list) or not output:\n # handle empty or unexpected type case\n msg = \"No structured output returned\"\n raise ValueError(msg)\n if len(output) != 1:\n msg = \"Multiple structured outputs returned\"\n raise ValueError(msg)\n return Data(data=output[0])\n" }, "input_value": { "_input_type": "MessageTextInput", diff --git a/src/backend/base/langflow/initial_setup/starter_projects/Market Research.json b/src/backend/base/langflow/initial_setup/starter_projects/Market Research.json index 3626710f3..c60372e3e 100644 --- a/src/backend/base/langflow/initial_setup/starter_projects/Market Research.json +++ b/src/backend/base/langflow/initial_setup/starter_projects/Market Research.json @@ -879,7 +879,7 @@ "show": true, "title_case": false, "type": "code", - "value": "from pydantic import BaseModel, Field, create_model\nfrom trustcall import create_extractor\n\nfrom langflow.base.models.chat_result import get_chat_result\nfrom langflow.custom.custom_component.component import Component\nfrom langflow.helpers.base_model import build_model_from_schema\nfrom langflow.io import (\n HandleInput,\n MessageTextInput,\n MultilineInput,\n Output,\n TableInput,\n)\nfrom langflow.schema.data import Data\nfrom langflow.schema.table import EditMode\n\n\nclass StructuredOutputComponent(Component):\n display_name = \"Structured Output\"\n description = \"Uses an LLM to generate structured data. Ideal for extraction and consistency.\"\n name = \"StructuredOutput\"\n icon = \"braces\"\n\n inputs = [\n HandleInput(\n name=\"llm\",\n display_name=\"Language Model\",\n info=\"The language model to use to generate the structured output.\",\n input_types=[\"LanguageModel\"],\n required=True,\n ),\n MultilineInput(\n name=\"input_value\",\n display_name=\"Input Message\",\n info=\"The input message to the language model.\",\n tool_mode=True,\n required=True,\n ),\n MultilineInput(\n name=\"system_prompt\",\n display_name=\"Format Instructions\",\n info=\"The instructions to the language model for formatting the output.\",\n value=(\n \"You are an AI system designed to extract structured information from unstructured text.\"\n \"Given the input_text, return a JSON object with predefined keys based on the expected structure.\"\n \"Extract values accurately and format them according to the specified type \"\n \"(e.g., string, integer, float, date).\"\n \"If a value is missing or cannot be determined, return a default \"\n \"(e.g., null, 0, or 'N/A').\"\n \"If multiple instances of the expected structure exist within the input_text, \"\n \"stream each as a separate JSON object.\"\n ),\n required=True,\n advanced=True,\n ),\n MessageTextInput(\n name=\"schema_name\",\n display_name=\"Schema Name\",\n info=\"Provide a name for the output data schema.\",\n advanced=True,\n ),\n TableInput(\n name=\"output_schema\",\n display_name=\"Output Schema\",\n info=\"Define the structure and data types for the model's output.\",\n required=True,\n # TODO: remove deault value\n table_schema=[\n {\n \"name\": \"name\",\n \"display_name\": \"Name\",\n \"type\": \"str\",\n \"description\": \"Specify the name of the output field.\",\n \"default\": \"field\",\n \"edit_mode\": EditMode.INLINE,\n },\n {\n \"name\": \"description\",\n \"display_name\": \"Description\",\n \"type\": \"str\",\n \"description\": \"Describe the purpose of the output field.\",\n \"default\": \"description of field\",\n \"edit_mode\": EditMode.POPOVER,\n },\n {\n \"name\": \"type\",\n \"display_name\": \"Type\",\n \"type\": \"str\",\n \"edit_mode\": EditMode.INLINE,\n \"description\": (\"Indicate the data type of the output field (e.g., str, int, float, bool, dict).\"),\n \"options\": [\"str\", \"int\", \"float\", \"bool\", \"dict\"],\n \"default\": \"str\",\n },\n {\n \"name\": \"multiple\",\n \"display_name\": \"As List\",\n \"type\": \"boolean\",\n \"description\": \"Set to True if this output field should be a list of the specified type.\",\n \"default\": \"False\",\n \"edit_mode\": EditMode.INLINE,\n },\n ],\n value=[\n {\n \"name\": \"field\",\n \"description\": \"description of field\",\n \"type\": \"str\",\n \"multiple\": \"False\",\n }\n ],\n ),\n ]\n\n outputs = [\n Output(\n name=\"structured_output\",\n display_name=\"Structured Output\",\n method=\"build_structured_output\",\n ),\n ]\n\n def build_structured_output_base(self):\n schema_name = self.schema_name or \"OutputModel\"\n\n if not hasattr(self.llm, \"with_structured_output\"):\n msg = \"Language model does not support structured output.\"\n raise TypeError(msg)\n if not self.output_schema:\n msg = \"Output schema cannot be empty\"\n raise ValueError(msg)\n\n output_model_ = build_model_from_schema(self.output_schema)\n\n output_model = create_model(\n schema_name,\n __doc__=f\"A list of {schema_name}.\",\n objects=(list[output_model_], Field(description=f\"A list of {schema_name}.\")), # type: ignore[valid-type]\n )\n\n try:\n llm_with_structured_output = create_extractor(self.llm, tools=[output_model])\n except NotImplementedError as exc:\n msg = f\"{self.llm.__class__.__name__} does not support structured output.\"\n raise TypeError(msg) from exc\n\n config_dict = {\n \"run_name\": self.display_name,\n \"project_name\": self.get_project_name(),\n \"callbacks\": self.get_langchain_callbacks(),\n }\n result = get_chat_result(\n runnable=llm_with_structured_output,\n system_message=self.system_prompt,\n input_value=self.input_value,\n config=config_dict,\n )\n\n # OPTIMIZATION NOTE: Simplified processing based on trustcall response structure\n # Handle non-dict responses (shouldn't happen with trustcall, but defensive)\n if not isinstance(result, dict):\n return result\n\n # Extract first response and convert BaseModel to dict\n responses = result.get(\"responses\", [])\n if not responses:\n return result\n\n # Convert BaseModel to dict (creates the \"objects\" key)\n first_response = responses[0]\n structured_data = first_response.model_dump() if isinstance(first_response, BaseModel) else first_response\n\n # Extract the objects array (guaranteed to exist due to our Pydantic model structure)\n return structured_data.get(\"objects\", structured_data)\n\n def build_structured_output(self) -> Data:\n output = self.build_structured_output_base()\n if not isinstance(output, list) or not output:\n # handle empty or unexpected type case\n msg = \"No structured output returned\"\n raise ValueError(msg)\n if len(output) != 1:\n msg = \"Multiple structured outputs returned\"\n raise ValueError(msg)\n return Data(data=output[0])\n" + "value": "from pydantic import BaseModel, Field, create_model\nfrom trustcall import create_extractor\n\nfrom langflow.base.models.chat_result import get_chat_result\nfrom langflow.custom.custom_component.component import Component\nfrom langflow.helpers.base_model import build_model_from_schema\nfrom langflow.io import (\n HandleInput,\n MessageTextInput,\n MultilineInput,\n Output,\n TableInput,\n)\nfrom langflow.schema.data import Data\nfrom langflow.schema.table import EditMode\n\n\nclass StructuredOutputComponent(Component):\n display_name = \"Structured Output\"\n description = \"Uses an LLM to generate structured data. Ideal for extraction and consistency.\"\n name = \"StructuredOutput\"\n icon = \"braces\"\n\n inputs = [\n HandleInput(\n name=\"llm\",\n display_name=\"Language Model\",\n info=\"The language model to use to generate the structured output.\",\n input_types=[\"LanguageModel\"],\n required=True,\n ),\n MultilineInput(\n name=\"input_value\",\n display_name=\"Input Message\",\n info=\"The input message to the language model.\",\n tool_mode=True,\n required=True,\n ),\n MultilineInput(\n name=\"system_prompt\",\n display_name=\"Format Instructions\",\n info=\"The instructions to the language model for formatting the output.\",\n value=(\n \"You are an AI that extracts one structured JSON object from unstructured text. \"\n \"Use a predefined schema with expected types (str, int, float, bool, dict). \"\n \"If multiple structures exist, extract only the first most complete one. \"\n \"Fill missing or ambiguous values with defaults: null for missing values. \"\n \"Ignore duplicates and partial repeats. \"\n \"Always return one valid JSON, never throw errors or return multiple objects.\"\n \"Output: A single well-formed JSON object, and nothing else.\"\n ),\n required=True,\n advanced=True,\n ),\n MessageTextInput(\n name=\"schema_name\",\n display_name=\"Schema Name\",\n info=\"Provide a name for the output data schema.\",\n advanced=True,\n ),\n TableInput(\n name=\"output_schema\",\n display_name=\"Output Schema\",\n info=\"Define the structure and data types for the model's output.\",\n required=True,\n # TODO: remove deault value\n table_schema=[\n {\n \"name\": \"name\",\n \"display_name\": \"Name\",\n \"type\": \"str\",\n \"description\": \"Specify the name of the output field.\",\n \"default\": \"field\",\n \"edit_mode\": EditMode.INLINE,\n },\n {\n \"name\": \"description\",\n \"display_name\": \"Description\",\n \"type\": \"str\",\n \"description\": \"Describe the purpose of the output field.\",\n \"default\": \"description of field\",\n \"edit_mode\": EditMode.POPOVER,\n },\n {\n \"name\": \"type\",\n \"display_name\": \"Type\",\n \"type\": \"str\",\n \"edit_mode\": EditMode.INLINE,\n \"description\": (\"Indicate the data type of the output field (e.g., str, int, float, bool, dict).\"),\n \"options\": [\"str\", \"int\", \"float\", \"bool\", \"dict\"],\n \"default\": \"str\",\n },\n {\n \"name\": \"multiple\",\n \"display_name\": \"As List\",\n \"type\": \"boolean\",\n \"description\": \"Set to True if this output field should be a list of the specified type.\",\n \"default\": \"False\",\n \"edit_mode\": EditMode.INLINE,\n },\n ],\n value=[\n {\n \"name\": \"field\",\n \"description\": \"description of field\",\n \"type\": \"str\",\n \"multiple\": \"False\",\n }\n ],\n ),\n ]\n\n outputs = [\n Output(\n name=\"structured_output\",\n display_name=\"Structured Output\",\n method=\"build_structured_output\",\n ),\n ]\n\n def build_structured_output_base(self):\n schema_name = self.schema_name or \"OutputModel\"\n\n if not hasattr(self.llm, \"with_structured_output\"):\n msg = \"Language model does not support structured output.\"\n raise TypeError(msg)\n if not self.output_schema:\n msg = \"Output schema cannot be empty\"\n raise ValueError(msg)\n\n output_model_ = build_model_from_schema(self.output_schema)\n\n output_model = create_model(\n schema_name,\n __doc__=f\"A list of {schema_name}.\",\n objects=(list[output_model_], Field(description=f\"A list of {schema_name}.\")), # type: ignore[valid-type]\n )\n\n try:\n llm_with_structured_output = create_extractor(self.llm, tools=[output_model])\n except NotImplementedError as exc:\n msg = f\"{self.llm.__class__.__name__} does not support structured output.\"\n raise TypeError(msg) from exc\n\n config_dict = {\n \"run_name\": self.display_name,\n \"project_name\": self.get_project_name(),\n \"callbacks\": self.get_langchain_callbacks(),\n }\n result = get_chat_result(\n runnable=llm_with_structured_output,\n system_message=self.system_prompt,\n input_value=self.input_value,\n config=config_dict,\n )\n\n # OPTIMIZATION NOTE: Simplified processing based on trustcall response structure\n # Handle non-dict responses (shouldn't happen with trustcall, but defensive)\n if not isinstance(result, dict):\n return result\n\n # Extract first response and convert BaseModel to dict\n responses = result.get(\"responses\", [])\n if not responses:\n return result\n\n # Convert BaseModel to dict (creates the \"objects\" key)\n first_response = responses[0]\n structured_data = first_response.model_dump() if isinstance(first_response, BaseModel) else first_response\n\n # Extract the objects array (guaranteed to exist due to our Pydantic model structure)\n return structured_data.get(\"objects\", structured_data)\n\n def build_structured_output(self) -> Data:\n output = self.build_structured_output_base()\n if not isinstance(output, list) or not output:\n # handle empty or unexpected type case\n msg = \"No structured output returned\"\n raise ValueError(msg)\n if len(output) != 1:\n msg = \"Multiple structured outputs returned\"\n raise ValueError(msg)\n return Data(data=output[0])\n" }, "input_value": { "_input_type": "MessageTextInput", diff --git a/src/backend/base/langflow/initial_setup/starter_projects/Portfolio Website Code Generator.json b/src/backend/base/langflow/initial_setup/starter_projects/Portfolio Website Code Generator.json index f0bfc7e45..924256e94 100644 --- a/src/backend/base/langflow/initial_setup/starter_projects/Portfolio Website Code Generator.json +++ b/src/backend/base/langflow/initial_setup/starter_projects/Portfolio Website Code Generator.json @@ -1509,7 +1509,7 @@ "show": true, "title_case": false, "type": "code", - "value": "from pydantic import BaseModel, Field, create_model\nfrom trustcall import create_extractor\n\nfrom langflow.base.models.chat_result import get_chat_result\nfrom langflow.custom.custom_component.component import Component\nfrom langflow.helpers.base_model import build_model_from_schema\nfrom langflow.io import (\n HandleInput,\n MessageTextInput,\n MultilineInput,\n Output,\n TableInput,\n)\nfrom langflow.schema.data import Data\nfrom langflow.schema.table import EditMode\n\n\nclass StructuredOutputComponent(Component):\n display_name = \"Structured Output\"\n description = \"Uses an LLM to generate structured data. Ideal for extraction and consistency.\"\n name = \"StructuredOutput\"\n icon = \"braces\"\n\n inputs = [\n HandleInput(\n name=\"llm\",\n display_name=\"Language Model\",\n info=\"The language model to use to generate the structured output.\",\n input_types=[\"LanguageModel\"],\n required=True,\n ),\n MultilineInput(\n name=\"input_value\",\n display_name=\"Input Message\",\n info=\"The input message to the language model.\",\n tool_mode=True,\n required=True,\n ),\n MultilineInput(\n name=\"system_prompt\",\n display_name=\"Format Instructions\",\n info=\"The instructions to the language model for formatting the output.\",\n value=(\n \"You are an AI system designed to extract structured information from unstructured text.\"\n \"Given the input_text, return a JSON object with predefined keys based on the expected structure.\"\n \"Extract values accurately and format them according to the specified type \"\n \"(e.g., string, integer, float, date).\"\n \"If a value is missing or cannot be determined, return a default \"\n \"(e.g., null, 0, or 'N/A').\"\n \"If multiple instances of the expected structure exist within the input_text, \"\n \"stream each as a separate JSON object.\"\n ),\n required=True,\n advanced=True,\n ),\n MessageTextInput(\n name=\"schema_name\",\n display_name=\"Schema Name\",\n info=\"Provide a name for the output data schema.\",\n advanced=True,\n ),\n TableInput(\n name=\"output_schema\",\n display_name=\"Output Schema\",\n info=\"Define the structure and data types for the model's output.\",\n required=True,\n # TODO: remove deault value\n table_schema=[\n {\n \"name\": \"name\",\n \"display_name\": \"Name\",\n \"type\": \"str\",\n \"description\": \"Specify the name of the output field.\",\n \"default\": \"field\",\n \"edit_mode\": EditMode.INLINE,\n },\n {\n \"name\": \"description\",\n \"display_name\": \"Description\",\n \"type\": \"str\",\n \"description\": \"Describe the purpose of the output field.\",\n \"default\": \"description of field\",\n \"edit_mode\": EditMode.POPOVER,\n },\n {\n \"name\": \"type\",\n \"display_name\": \"Type\",\n \"type\": \"str\",\n \"edit_mode\": EditMode.INLINE,\n \"description\": (\"Indicate the data type of the output field (e.g., str, int, float, bool, dict).\"),\n \"options\": [\"str\", \"int\", \"float\", \"bool\", \"dict\"],\n \"default\": \"str\",\n },\n {\n \"name\": \"multiple\",\n \"display_name\": \"As List\",\n \"type\": \"boolean\",\n \"description\": \"Set to True if this output field should be a list of the specified type.\",\n \"default\": \"False\",\n \"edit_mode\": EditMode.INLINE,\n },\n ],\n value=[\n {\n \"name\": \"field\",\n \"description\": \"description of field\",\n \"type\": \"str\",\n \"multiple\": \"False\",\n }\n ],\n ),\n ]\n\n outputs = [\n Output(\n name=\"structured_output\",\n display_name=\"Structured Output\",\n method=\"build_structured_output\",\n ),\n ]\n\n def build_structured_output_base(self):\n schema_name = self.schema_name or \"OutputModel\"\n\n if not hasattr(self.llm, \"with_structured_output\"):\n msg = \"Language model does not support structured output.\"\n raise TypeError(msg)\n if not self.output_schema:\n msg = \"Output schema cannot be empty\"\n raise ValueError(msg)\n\n output_model_ = build_model_from_schema(self.output_schema)\n\n output_model = create_model(\n schema_name,\n __doc__=f\"A list of {schema_name}.\",\n objects=(list[output_model_], Field(description=f\"A list of {schema_name}.\")), # type: ignore[valid-type]\n )\n\n try:\n llm_with_structured_output = create_extractor(self.llm, tools=[output_model])\n except NotImplementedError as exc:\n msg = f\"{self.llm.__class__.__name__} does not support structured output.\"\n raise TypeError(msg) from exc\n\n config_dict = {\n \"run_name\": self.display_name,\n \"project_name\": self.get_project_name(),\n \"callbacks\": self.get_langchain_callbacks(),\n }\n result = get_chat_result(\n runnable=llm_with_structured_output,\n system_message=self.system_prompt,\n input_value=self.input_value,\n config=config_dict,\n )\n\n # OPTIMIZATION NOTE: Simplified processing based on trustcall response structure\n # Handle non-dict responses (shouldn't happen with trustcall, but defensive)\n if not isinstance(result, dict):\n return result\n\n # Extract first response and convert BaseModel to dict\n responses = result.get(\"responses\", [])\n if not responses:\n return result\n\n # Convert BaseModel to dict (creates the \"objects\" key)\n first_response = responses[0]\n structured_data = first_response.model_dump() if isinstance(first_response, BaseModel) else first_response\n\n # Extract the objects array (guaranteed to exist due to our Pydantic model structure)\n return structured_data.get(\"objects\", structured_data)\n\n def build_structured_output(self) -> Data:\n output = self.build_structured_output_base()\n if not isinstance(output, list) or not output:\n # handle empty or unexpected type case\n msg = \"No structured output returned\"\n raise ValueError(msg)\n if len(output) != 1:\n msg = \"Multiple structured outputs returned\"\n raise ValueError(msg)\n return Data(data=output[0])\n" + "value": "from pydantic import BaseModel, Field, create_model\nfrom trustcall import create_extractor\n\nfrom langflow.base.models.chat_result import get_chat_result\nfrom langflow.custom.custom_component.component import Component\nfrom langflow.helpers.base_model import build_model_from_schema\nfrom langflow.io import (\n HandleInput,\n MessageTextInput,\n MultilineInput,\n Output,\n TableInput,\n)\nfrom langflow.schema.data import Data\nfrom langflow.schema.table import EditMode\n\n\nclass StructuredOutputComponent(Component):\n display_name = \"Structured Output\"\n description = \"Uses an LLM to generate structured data. Ideal for extraction and consistency.\"\n name = \"StructuredOutput\"\n icon = \"braces\"\n\n inputs = [\n HandleInput(\n name=\"llm\",\n display_name=\"Language Model\",\n info=\"The language model to use to generate the structured output.\",\n input_types=[\"LanguageModel\"],\n required=True,\n ),\n MultilineInput(\n name=\"input_value\",\n display_name=\"Input Message\",\n info=\"The input message to the language model.\",\n tool_mode=True,\n required=True,\n ),\n MultilineInput(\n name=\"system_prompt\",\n display_name=\"Format Instructions\",\n info=\"The instructions to the language model for formatting the output.\",\n value=(\n \"You are an AI that extracts one structured JSON object from unstructured text. \"\n \"Use a predefined schema with expected types (str, int, float, bool, dict). \"\n \"If multiple structures exist, extract only the first most complete one. \"\n \"Fill missing or ambiguous values with defaults: null for missing values. \"\n \"Ignore duplicates and partial repeats. \"\n \"Always return one valid JSON, never throw errors or return multiple objects.\"\n \"Output: A single well-formed JSON object, and nothing else.\"\n ),\n required=True,\n advanced=True,\n ),\n MessageTextInput(\n name=\"schema_name\",\n display_name=\"Schema Name\",\n info=\"Provide a name for the output data schema.\",\n advanced=True,\n ),\n TableInput(\n name=\"output_schema\",\n display_name=\"Output Schema\",\n info=\"Define the structure and data types for the model's output.\",\n required=True,\n # TODO: remove deault value\n table_schema=[\n {\n \"name\": \"name\",\n \"display_name\": \"Name\",\n \"type\": \"str\",\n \"description\": \"Specify the name of the output field.\",\n \"default\": \"field\",\n \"edit_mode\": EditMode.INLINE,\n },\n {\n \"name\": \"description\",\n \"display_name\": \"Description\",\n \"type\": \"str\",\n \"description\": \"Describe the purpose of the output field.\",\n \"default\": \"description of field\",\n \"edit_mode\": EditMode.POPOVER,\n },\n {\n \"name\": \"type\",\n \"display_name\": \"Type\",\n \"type\": \"str\",\n \"edit_mode\": EditMode.INLINE,\n \"description\": (\"Indicate the data type of the output field (e.g., str, int, float, bool, dict).\"),\n \"options\": [\"str\", \"int\", \"float\", \"bool\", \"dict\"],\n \"default\": \"str\",\n },\n {\n \"name\": \"multiple\",\n \"display_name\": \"As List\",\n \"type\": \"boolean\",\n \"description\": \"Set to True if this output field should be a list of the specified type.\",\n \"default\": \"False\",\n \"edit_mode\": EditMode.INLINE,\n },\n ],\n value=[\n {\n \"name\": \"field\",\n \"description\": \"description of field\",\n \"type\": \"str\",\n \"multiple\": \"False\",\n }\n ],\n ),\n ]\n\n outputs = [\n Output(\n name=\"structured_output\",\n display_name=\"Structured Output\",\n method=\"build_structured_output\",\n ),\n ]\n\n def build_structured_output_base(self):\n schema_name = self.schema_name or \"OutputModel\"\n\n if not hasattr(self.llm, \"with_structured_output\"):\n msg = \"Language model does not support structured output.\"\n raise TypeError(msg)\n if not self.output_schema:\n msg = \"Output schema cannot be empty\"\n raise ValueError(msg)\n\n output_model_ = build_model_from_schema(self.output_schema)\n\n output_model = create_model(\n schema_name,\n __doc__=f\"A list of {schema_name}.\",\n objects=(list[output_model_], Field(description=f\"A list of {schema_name}.\")), # type: ignore[valid-type]\n )\n\n try:\n llm_with_structured_output = create_extractor(self.llm, tools=[output_model])\n except NotImplementedError as exc:\n msg = f\"{self.llm.__class__.__name__} does not support structured output.\"\n raise TypeError(msg) from exc\n\n config_dict = {\n \"run_name\": self.display_name,\n \"project_name\": self.get_project_name(),\n \"callbacks\": self.get_langchain_callbacks(),\n }\n result = get_chat_result(\n runnable=llm_with_structured_output,\n system_message=self.system_prompt,\n input_value=self.input_value,\n config=config_dict,\n )\n\n # OPTIMIZATION NOTE: Simplified processing based on trustcall response structure\n # Handle non-dict responses (shouldn't happen with trustcall, but defensive)\n if not isinstance(result, dict):\n return result\n\n # Extract first response and convert BaseModel to dict\n responses = result.get(\"responses\", [])\n if not responses:\n return result\n\n # Convert BaseModel to dict (creates the \"objects\" key)\n first_response = responses[0]\n structured_data = first_response.model_dump() if isinstance(first_response, BaseModel) else first_response\n\n # Extract the objects array (guaranteed to exist due to our Pydantic model structure)\n return structured_data.get(\"objects\", structured_data)\n\n def build_structured_output(self) -> Data:\n output = self.build_structured_output_base()\n if not isinstance(output, list) or not output:\n # handle empty or unexpected type case\n msg = \"No structured output returned\"\n raise ValueError(msg)\n if len(output) != 1:\n msg = \"Multiple structured outputs returned\"\n raise ValueError(msg)\n return Data(data=output[0])\n" }, "input_value": { "_input_type": "MessageTextInput",