diff --git a/.github/workflows/typescript_test.yml b/.github/workflows/typescript_test.yml index ff844e9ef..68d82d9ce 100644 --- a/.github/workflows/typescript_test.yml +++ b/.github/workflows/typescript_test.yml @@ -155,7 +155,7 @@ jobs: fi echo "Fixed suites: $SUITES" fi - + TAGS=() if echo "$SUITES" | jq -e 'contains(["components"])' > /dev/null; then TAGS+=("@components") @@ -312,11 +312,11 @@ jobs: command: | cd src/frontend echo 'Running tests with pattern: ${{ needs.determine-test-suite.outputs.test_grep }}' - npx playwright test ${{ inputs.tests_folder }} ${{ needs.determine-test-suite.outputs.test_grep }} --shard ${{ matrix.shardIndex }}/${{ matrix.shardTotal }} --list + npx playwright test ${{ inputs.tests_folder }} ${{ needs.determine-test-suite.outputs.test_grep }} --shard ${{ matrix.shardIndex }}/${{ matrix.shardTotal }} --list --retries=3 # echo command before running - echo "npx playwright test ${{ inputs.tests_folder }} ${{ needs.determine-test-suite.outputs.test_grep }} --trace on --shard ${{ matrix.shardIndex }}/${{ matrix.shardTotal }} --workers 2" + echo "npx playwright test ${{ inputs.tests_folder }} ${{ needs.determine-test-suite.outputs.test_grep }} --trace on --shard ${{ matrix.shardIndex }}/${{ matrix.shardTotal }} --workers 2 --retries=3" - npx playwright test ${{ inputs.tests_folder }} ${{ needs.determine-test-suite.outputs.test_grep }} --trace on --shard ${{ matrix.shardIndex }}/${{ matrix.shardTotal }} --workers 2 + npx playwright test ${{ inputs.tests_folder }} ${{ needs.determine-test-suite.outputs.test_grep }} --trace on --shard ${{ matrix.shardIndex }}/${{ matrix.shardTotal }} --workers 2 --retries=3 - name: Upload Test Results if: always() diff --git a/src/backend/base/langflow/components/helpers/structured_output.py b/src/backend/base/langflow/components/helpers/structured_output.py index 36301d1fe..80adf624e 100644 --- a/src/backend/base/langflow/components/helpers/structured_output.py +++ b/src/backend/base/langflow/components/helpers/structured_output.py @@ -1,6 +1,5 @@ -from typing import TYPE_CHECKING, cast - from pydantic import BaseModel, Field, create_model +from trustcall import create_extractor from langflow.base.models.chat_result import get_chat_result from langflow.custom import Component @@ -17,9 +16,6 @@ from langflow.schema.data import Data from langflow.schema.dataframe import DataFrame from langflow.schema.table import EditMode -if TYPE_CHECKING: - from langflow.field_typing.constants import LanguageModel - class StructuredOutputComponent(Component): display_name = "Structured Output" @@ -156,12 +152,12 @@ class StructuredOutputComponent(Component): output_model = create_model( schema_name, + __doc__=f"A list of {schema_name}.", objects=(list[output_model_], Field(description=f"A list of {schema_name}.")), # type: ignore[valid-type] ) try: - llm_with_structured_output = cast("LanguageModel", self.llm).with_structured_output(schema=output_model) # type: ignore[valid-type, attr-defined] - + llm_with_structured_output = create_extractor(self.llm, tools=[output_model]) except NotImplementedError as exc: msg = f"{self.llm.__class__.__name__} does not support structured output." raise TypeError(msg) from exc @@ -178,14 +174,17 @@ class StructuredOutputComponent(Component): ) if isinstance(result, BaseModel): result = result.model_dump() - if "objects" in result: + if responses := result.get("responses"): + result = responses[0].model_dump() + if result and "objects" in result: return result["objects"] + return result def build_structured_output(self) -> Data: output = self.build_structured_output_base() - return Data(results=output) + return Data(text_key="results", data={"results": output}) def as_dataframe(self) -> DataFrame: output = self.build_structured_output_base() diff --git a/src/backend/base/langflow/initial_setup/starter_projects/Financial Report Parser.json b/src/backend/base/langflow/initial_setup/starter_projects/Financial Report Parser.json index 22ca87e52..243627f00 100644 --- a/src/backend/base/langflow/initial_setup/starter_projects/Financial Report Parser.json +++ b/src/backend/base/langflow/initial_setup/starter_projects/Financial Report Parser.json @@ -1251,7 +1251,7 @@ "show": true, "title_case": false, "type": "code", - "value": "from typing import TYPE_CHECKING, cast\n\nfrom pydantic import BaseModel, Field, create_model\n\nfrom langflow.base.models.chat_result import get_chat_result\nfrom langflow.custom import Component\nfrom langflow.helpers.base_model import build_model_from_schema\nfrom langflow.io import (\n BoolInput,\n HandleInput,\n MessageTextInput,\n MultilineInput,\n Output,\n TableInput,\n)\nfrom langflow.schema.data import Data\nfrom langflow.schema.dataframe import DataFrame\nfrom langflow.schema.table import EditMode\n\nif TYPE_CHECKING:\n from langflow.field_typing.constants import LanguageModel\n\n\nclass StructuredOutputComponent(Component):\n display_name = \"Structured Output\"\n description = (\n \"Transforms LLM responses into **structured data formats**. Ideal for extracting specific information \"\n \"or creating consistent outputs.\"\n )\n name = \"StructuredOutput\"\n icon = \"braces\"\n\n inputs = [\n HandleInput(\n name=\"llm\",\n display_name=\"Language Model\",\n info=\"The language model to use to generate the structured output.\",\n input_types=[\"LanguageModel\"],\n required=True,\n ),\n MessageTextInput(\n name=\"input_value\",\n display_name=\"Input Message\",\n info=\"The input message to the language model.\",\n tool_mode=True,\n required=True,\n ),\n MultilineInput(\n name=\"system_prompt\",\n display_name=\"Format Instructions\",\n info=\"The instructions to the language model for formatting the output.\",\n value=(\n \"You are an AI system designed to extract structured information from unstructured text.\"\n \"Given the input_text, return a JSON object with predefined keys based on the expected structure.\"\n \"Extract values accurately and format them according to the specified type \"\n \"(e.g., string, integer, float, date).\"\n \"If a value is missing or cannot be determined, return a default \"\n \"(e.g., null, 0, or 'N/A').\"\n \"If multiple instances of the expected structure exist within the input_text, \"\n \"stream each as a separate JSON object.\"\n ),\n required=True,\n advanced=True,\n ),\n MessageTextInput(\n name=\"schema_name\",\n display_name=\"Schema Name\",\n info=\"Provide a name for the output data schema.\",\n advanced=True,\n ),\n TableInput(\n name=\"output_schema\",\n display_name=\"Output Schema\",\n info=\"Define the structure and data types for the model's output.\",\n required=True,\n # TODO: remove deault value\n table_schema=[\n {\n \"name\": \"name\",\n \"display_name\": \"Name\",\n \"type\": \"str\",\n \"description\": \"Specify the name of the output field.\",\n \"default\": \"field\",\n \"edit_mode\": EditMode.INLINE,\n },\n {\n \"name\": \"description\",\n \"display_name\": \"Description\",\n \"type\": \"str\",\n \"description\": \"Describe the purpose of the output field.\",\n \"default\": \"description of field\",\n \"edit_mode\": EditMode.POPOVER,\n },\n {\n \"name\": \"type\",\n \"display_name\": \"Type\",\n \"type\": \"str\",\n \"edit_mode\": EditMode.INLINE,\n \"description\": (\n \"Indicate the data type of the output field (e.g., str, int, float, bool, list, dict).\"\n ),\n \"options\": [\"str\", \"int\", \"float\", \"bool\", \"list\", \"dict\"],\n \"default\": \"str\",\n },\n {\n \"name\": \"multiple\",\n \"display_name\": \"Multiple\",\n \"type\": \"boolean\",\n \"description\": \"Set to True if this output field should be a list of the specified type.\",\n \"default\": \"False\",\n \"edit_mode\": EditMode.INLINE,\n },\n ],\n value=[\n {\n \"name\": \"field\",\n \"description\": \"description of field\",\n \"type\": \"str\",\n \"multiple\": \"False\",\n }\n ],\n ),\n BoolInput(\n name=\"multiple\",\n advanced=True,\n display_name=\"Generate Multiple\",\n info=\"[Deplrecated] Always set to True\",\n value=True,\n ),\n ]\n\n outputs = [\n Output(\n name=\"structured_output\",\n display_name=\"Structured Output\",\n method=\"build_structured_output\",\n ),\n Output(\n name=\"structured_output_dataframe\",\n display_name=\"DataFrame\",\n method=\"as_dataframe\",\n ),\n ]\n\n def build_structured_output_base(self) -> Data:\n schema_name = self.schema_name or \"OutputModel\"\n\n if not hasattr(self.llm, \"with_structured_output\"):\n msg = \"Language model does not support structured output.\"\n raise TypeError(msg)\n if not self.output_schema:\n msg = \"Output schema cannot be empty\"\n raise ValueError(msg)\n\n output_model_ = build_model_from_schema(self.output_schema)\n\n output_model = create_model(\n schema_name,\n objects=(list[output_model_], Field(description=f\"A list of {schema_name}.\")), # type: ignore[valid-type]\n )\n\n try:\n llm_with_structured_output = cast(\"LanguageModel\", self.llm).with_structured_output(schema=output_model) # type: ignore[valid-type, attr-defined]\n\n except NotImplementedError as exc:\n msg = f\"{self.llm.__class__.__name__} does not support structured output.\"\n raise TypeError(msg) from exc\n config_dict = {\n \"run_name\": self.display_name,\n \"project_name\": self.get_project_name(),\n \"callbacks\": self.get_langchain_callbacks(),\n }\n result = get_chat_result(\n runnable=llm_with_structured_output,\n system_message=self.system_prompt,\n input_value=self.input_value,\n config=config_dict,\n )\n if isinstance(result, BaseModel):\n result = result.model_dump()\n if \"objects\" in result:\n return result[\"objects\"]\n return result\n\n def build_structured_output(self) -> Data:\n output = self.build_structured_output_base()\n\n return Data(results=output)\n\n def as_dataframe(self) -> DataFrame:\n output = self.build_structured_output_base()\n if isinstance(output, list):\n return DataFrame(data=output)\n return DataFrame(data=[output])\n" + "value": "from pydantic import BaseModel, Field, create_model\nfrom trustcall import create_extractor\n\nfrom langflow.base.models.chat_result import get_chat_result\nfrom langflow.custom import Component\nfrom langflow.helpers.base_model import build_model_from_schema\nfrom langflow.io import (\n BoolInput,\n HandleInput,\n MessageTextInput,\n MultilineInput,\n Output,\n TableInput,\n)\nfrom langflow.schema.data import Data\nfrom langflow.schema.dataframe import DataFrame\nfrom langflow.schema.table import EditMode\n\n\nclass StructuredOutputComponent(Component):\n display_name = \"Structured Output\"\n description = (\n \"Transforms LLM responses into **structured data formats**. Ideal for extracting specific information \"\n \"or creating consistent outputs.\"\n )\n name = \"StructuredOutput\"\n icon = \"braces\"\n\n inputs = [\n HandleInput(\n name=\"llm\",\n display_name=\"Language Model\",\n info=\"The language model to use to generate the structured output.\",\n input_types=[\"LanguageModel\"],\n required=True,\n ),\n MessageTextInput(\n name=\"input_value\",\n display_name=\"Input Message\",\n info=\"The input message to the language model.\",\n tool_mode=True,\n required=True,\n ),\n MultilineInput(\n name=\"system_prompt\",\n display_name=\"Format Instructions\",\n info=\"The instructions to the language model for formatting the output.\",\n value=(\n \"You are an AI system designed to extract structured information from unstructured text.\"\n \"Given the input_text, return a JSON object with predefined keys based on the expected structure.\"\n \"Extract values accurately and format them according to the specified type \"\n \"(e.g., string, integer, float, date).\"\n \"If a value is missing or cannot be determined, return a default \"\n \"(e.g., null, 0, or 'N/A').\"\n \"If multiple instances of the expected structure exist within the input_text, \"\n \"stream each as a separate JSON object.\"\n ),\n required=True,\n advanced=True,\n ),\n MessageTextInput(\n name=\"schema_name\",\n display_name=\"Schema Name\",\n info=\"Provide a name for the output data schema.\",\n advanced=True,\n ),\n TableInput(\n name=\"output_schema\",\n display_name=\"Output Schema\",\n info=\"Define the structure and data types for the model's output.\",\n required=True,\n # TODO: remove deault value\n table_schema=[\n {\n \"name\": \"name\",\n \"display_name\": \"Name\",\n \"type\": \"str\",\n \"description\": \"Specify the name of the output field.\",\n \"default\": \"field\",\n \"edit_mode\": EditMode.INLINE,\n },\n {\n \"name\": \"description\",\n \"display_name\": \"Description\",\n \"type\": \"str\",\n \"description\": \"Describe the purpose of the output field.\",\n \"default\": \"description of field\",\n \"edit_mode\": EditMode.POPOVER,\n },\n {\n \"name\": \"type\",\n \"display_name\": \"Type\",\n \"type\": \"str\",\n \"edit_mode\": EditMode.INLINE,\n \"description\": (\n \"Indicate the data type of the output field (e.g., str, int, float, bool, list, dict).\"\n ),\n \"options\": [\"str\", \"int\", \"float\", \"bool\", \"list\", \"dict\"],\n \"default\": \"str\",\n },\n {\n \"name\": \"multiple\",\n \"display_name\": \"Multiple\",\n \"type\": \"boolean\",\n \"description\": \"Set to True if this output field should be a list of the specified type.\",\n \"default\": \"False\",\n \"edit_mode\": EditMode.INLINE,\n },\n ],\n value=[\n {\n \"name\": \"field\",\n \"description\": \"description of field\",\n \"type\": \"str\",\n \"multiple\": \"False\",\n }\n ],\n ),\n BoolInput(\n name=\"multiple\",\n advanced=True,\n display_name=\"Generate Multiple\",\n info=\"[Deplrecated] Always set to True\",\n value=True,\n ),\n ]\n\n outputs = [\n Output(\n name=\"structured_output\",\n display_name=\"Structured Output\",\n method=\"build_structured_output\",\n ),\n Output(\n name=\"structured_output_dataframe\",\n display_name=\"DataFrame\",\n method=\"as_dataframe\",\n ),\n ]\n\n def build_structured_output_base(self) -> Data:\n schema_name = self.schema_name or \"OutputModel\"\n\n if not hasattr(self.llm, \"with_structured_output\"):\n msg = \"Language model does not support structured output.\"\n raise TypeError(msg)\n if not self.output_schema:\n msg = \"Output schema cannot be empty\"\n raise ValueError(msg)\n\n output_model_ = build_model_from_schema(self.output_schema)\n\n output_model = create_model(\n schema_name,\n __doc__=f\"A list of {schema_name}.\",\n objects=(list[output_model_], Field(description=f\"A list of {schema_name}.\")), # type: ignore[valid-type]\n )\n\n try:\n llm_with_structured_output = create_extractor(self.llm, tools=[output_model])\n except NotImplementedError as exc:\n msg = f\"{self.llm.__class__.__name__} does not support structured output.\"\n raise TypeError(msg) from exc\n config_dict = {\n \"run_name\": self.display_name,\n \"project_name\": self.get_project_name(),\n \"callbacks\": self.get_langchain_callbacks(),\n }\n result = get_chat_result(\n runnable=llm_with_structured_output,\n system_message=self.system_prompt,\n input_value=self.input_value,\n config=config_dict,\n )\n if isinstance(result, BaseModel):\n result = result.model_dump()\n if responses := result.get(\"responses\"):\n result = responses[0].model_dump()\n if result and \"objects\" in result:\n return result[\"objects\"]\n\n return result\n\n def build_structured_output(self) -> Data:\n output = self.build_structured_output_base()\n\n return Data(text_key=\"results\", data={\"results\": output})\n\n def as_dataframe(self) -> DataFrame:\n output = self.build_structured_output_base()\n if isinstance(output, list):\n return DataFrame(data=output)\n return DataFrame(data=[output])\n" }, "input_value": { "_input_type": "MessageTextInput", diff --git a/src/backend/base/langflow/initial_setup/starter_projects/Image Sentiment Analysis.json b/src/backend/base/langflow/initial_setup/starter_projects/Image Sentiment Analysis.json index c1b5a7d85..38914d9b6 100644 --- a/src/backend/base/langflow/initial_setup/starter_projects/Image Sentiment Analysis.json +++ b/src/backend/base/langflow/initial_setup/starter_projects/Image Sentiment Analysis.json @@ -1406,7 +1406,7 @@ "show": true, "title_case": false, "type": "code", - "value": "from typing import TYPE_CHECKING, cast\n\nfrom pydantic import BaseModel, Field, create_model\n\nfrom langflow.base.models.chat_result import get_chat_result\nfrom langflow.custom import Component\nfrom langflow.helpers.base_model import build_model_from_schema\nfrom langflow.io import (\n BoolInput,\n HandleInput,\n MessageTextInput,\n MultilineInput,\n Output,\n TableInput,\n)\nfrom langflow.schema.data import Data\nfrom langflow.schema.dataframe import DataFrame\nfrom langflow.schema.table import EditMode\n\nif TYPE_CHECKING:\n from langflow.field_typing.constants import LanguageModel\n\n\nclass StructuredOutputComponent(Component):\n display_name = \"Structured Output\"\n description = (\n \"Transforms LLM responses into **structured data formats**. Ideal for extracting specific information \"\n \"or creating consistent outputs.\"\n )\n name = \"StructuredOutput\"\n icon = \"braces\"\n\n inputs = [\n HandleInput(\n name=\"llm\",\n display_name=\"Language Model\",\n info=\"The language model to use to generate the structured output.\",\n input_types=[\"LanguageModel\"],\n required=True,\n ),\n MessageTextInput(\n name=\"input_value\",\n display_name=\"Input Message\",\n info=\"The input message to the language model.\",\n tool_mode=True,\n required=True,\n ),\n MultilineInput(\n name=\"system_prompt\",\n display_name=\"Format Instructions\",\n info=\"The instructions to the language model for formatting the output.\",\n value=(\n \"You are an AI system designed to extract structured information from unstructured text.\"\n \"Given the input_text, return a JSON object with predefined keys based on the expected structure.\"\n \"Extract values accurately and format them according to the specified type \"\n \"(e.g., string, integer, float, date).\"\n \"If a value is missing or cannot be determined, return a default \"\n \"(e.g., null, 0, or 'N/A').\"\n \"If multiple instances of the expected structure exist within the input_text, \"\n \"stream each as a separate JSON object.\"\n ),\n required=True,\n advanced=True,\n ),\n MessageTextInput(\n name=\"schema_name\",\n display_name=\"Schema Name\",\n info=\"Provide a name for the output data schema.\",\n advanced=True,\n ),\n TableInput(\n name=\"output_schema\",\n display_name=\"Output Schema\",\n info=\"Define the structure and data types for the model's output.\",\n required=True,\n # TODO: remove deault value\n table_schema=[\n {\n \"name\": \"name\",\n \"display_name\": \"Name\",\n \"type\": \"str\",\n \"description\": \"Specify the name of the output field.\",\n \"default\": \"field\",\n \"edit_mode\": EditMode.INLINE,\n },\n {\n \"name\": \"description\",\n \"display_name\": \"Description\",\n \"type\": \"str\",\n \"description\": \"Describe the purpose of the output field.\",\n \"default\": \"description of field\",\n \"edit_mode\": EditMode.POPOVER,\n },\n {\n \"name\": \"type\",\n \"display_name\": \"Type\",\n \"type\": \"str\",\n \"edit_mode\": EditMode.INLINE,\n \"description\": (\n \"Indicate the data type of the output field (e.g., str, int, float, bool, list, dict).\"\n ),\n \"options\": [\"str\", \"int\", \"float\", \"bool\", \"list\", \"dict\"],\n \"default\": \"str\",\n },\n {\n \"name\": \"multiple\",\n \"display_name\": \"Multiple\",\n \"type\": \"boolean\",\n \"description\": \"Set to True if this output field should be a list of the specified type.\",\n \"default\": \"False\",\n \"edit_mode\": EditMode.INLINE,\n },\n ],\n value=[\n {\n \"name\": \"field\",\n \"description\": \"description of field\",\n \"type\": \"str\",\n \"multiple\": \"False\",\n }\n ],\n ),\n BoolInput(\n name=\"multiple\",\n advanced=True,\n display_name=\"Generate Multiple\",\n info=\"[Deplrecated] Always set to True\",\n value=True,\n ),\n ]\n\n outputs = [\n Output(\n name=\"structured_output\",\n display_name=\"Structured Output\",\n method=\"build_structured_output\",\n ),\n Output(\n name=\"structured_output_dataframe\",\n display_name=\"DataFrame\",\n method=\"as_dataframe\",\n ),\n ]\n\n def build_structured_output_base(self) -> Data:\n schema_name = self.schema_name or \"OutputModel\"\n\n if not hasattr(self.llm, \"with_structured_output\"):\n msg = \"Language model does not support structured output.\"\n raise TypeError(msg)\n if not self.output_schema:\n msg = \"Output schema cannot be empty\"\n raise ValueError(msg)\n\n output_model_ = build_model_from_schema(self.output_schema)\n\n output_model = create_model(\n schema_name,\n objects=(list[output_model_], Field(description=f\"A list of {schema_name}.\")), # type: ignore[valid-type]\n )\n\n try:\n llm_with_structured_output = cast(\"LanguageModel\", self.llm).with_structured_output(schema=output_model) # type: ignore[valid-type, attr-defined]\n\n except NotImplementedError as exc:\n msg = f\"{self.llm.__class__.__name__} does not support structured output.\"\n raise TypeError(msg) from exc\n config_dict = {\n \"run_name\": self.display_name,\n \"project_name\": self.get_project_name(),\n \"callbacks\": self.get_langchain_callbacks(),\n }\n result = get_chat_result(\n runnable=llm_with_structured_output,\n system_message=self.system_prompt,\n input_value=self.input_value,\n config=config_dict,\n )\n if isinstance(result, BaseModel):\n result = result.model_dump()\n if \"objects\" in result:\n return result[\"objects\"]\n return result\n\n def build_structured_output(self) -> Data:\n output = self.build_structured_output_base()\n\n return Data(results=output)\n\n def as_dataframe(self) -> DataFrame:\n output = self.build_structured_output_base()\n if isinstance(output, list):\n return DataFrame(data=output)\n return DataFrame(data=[output])\n" + "value": "from pydantic import BaseModel, Field, create_model\nfrom trustcall import create_extractor\n\nfrom langflow.base.models.chat_result import get_chat_result\nfrom langflow.custom import Component\nfrom langflow.helpers.base_model import build_model_from_schema\nfrom langflow.io import (\n BoolInput,\n HandleInput,\n MessageTextInput,\n MultilineInput,\n Output,\n TableInput,\n)\nfrom langflow.schema.data import Data\nfrom langflow.schema.dataframe import DataFrame\nfrom langflow.schema.table import EditMode\n\n\nclass StructuredOutputComponent(Component):\n display_name = \"Structured Output\"\n description = (\n \"Transforms LLM responses into **structured data formats**. Ideal for extracting specific information \"\n \"or creating consistent outputs.\"\n )\n name = \"StructuredOutput\"\n icon = \"braces\"\n\n inputs = [\n HandleInput(\n name=\"llm\",\n display_name=\"Language Model\",\n info=\"The language model to use to generate the structured output.\",\n input_types=[\"LanguageModel\"],\n required=True,\n ),\n MessageTextInput(\n name=\"input_value\",\n display_name=\"Input Message\",\n info=\"The input message to the language model.\",\n tool_mode=True,\n required=True,\n ),\n MultilineInput(\n name=\"system_prompt\",\n display_name=\"Format Instructions\",\n info=\"The instructions to the language model for formatting the output.\",\n value=(\n \"You are an AI system designed to extract structured information from unstructured text.\"\n \"Given the input_text, return a JSON object with predefined keys based on the expected structure.\"\n \"Extract values accurately and format them according to the specified type \"\n \"(e.g., string, integer, float, date).\"\n \"If a value is missing or cannot be determined, return a default \"\n \"(e.g., null, 0, or 'N/A').\"\n \"If multiple instances of the expected structure exist within the input_text, \"\n \"stream each as a separate JSON object.\"\n ),\n required=True,\n advanced=True,\n ),\n MessageTextInput(\n name=\"schema_name\",\n display_name=\"Schema Name\",\n info=\"Provide a name for the output data schema.\",\n advanced=True,\n ),\n TableInput(\n name=\"output_schema\",\n display_name=\"Output Schema\",\n info=\"Define the structure and data types for the model's output.\",\n required=True,\n # TODO: remove deault value\n table_schema=[\n {\n \"name\": \"name\",\n \"display_name\": \"Name\",\n \"type\": \"str\",\n \"description\": \"Specify the name of the output field.\",\n \"default\": \"field\",\n \"edit_mode\": EditMode.INLINE,\n },\n {\n \"name\": \"description\",\n \"display_name\": \"Description\",\n \"type\": \"str\",\n \"description\": \"Describe the purpose of the output field.\",\n \"default\": \"description of field\",\n \"edit_mode\": EditMode.POPOVER,\n },\n {\n \"name\": \"type\",\n \"display_name\": \"Type\",\n \"type\": \"str\",\n \"edit_mode\": EditMode.INLINE,\n \"description\": (\n \"Indicate the data type of the output field (e.g., str, int, float, bool, list, dict).\"\n ),\n \"options\": [\"str\", \"int\", \"float\", \"bool\", \"list\", \"dict\"],\n \"default\": \"str\",\n },\n {\n \"name\": \"multiple\",\n \"display_name\": \"Multiple\",\n \"type\": \"boolean\",\n \"description\": \"Set to True if this output field should be a list of the specified type.\",\n \"default\": \"False\",\n \"edit_mode\": EditMode.INLINE,\n },\n ],\n value=[\n {\n \"name\": \"field\",\n \"description\": \"description of field\",\n \"type\": \"str\",\n \"multiple\": \"False\",\n }\n ],\n ),\n BoolInput(\n name=\"multiple\",\n advanced=True,\n display_name=\"Generate Multiple\",\n info=\"[Deplrecated] Always set to True\",\n value=True,\n ),\n ]\n\n outputs = [\n Output(\n name=\"structured_output\",\n display_name=\"Structured Output\",\n method=\"build_structured_output\",\n ),\n Output(\n name=\"structured_output_dataframe\",\n display_name=\"DataFrame\",\n method=\"as_dataframe\",\n ),\n ]\n\n def build_structured_output_base(self) -> Data:\n schema_name = self.schema_name or \"OutputModel\"\n\n if not hasattr(self.llm, \"with_structured_output\"):\n msg = \"Language model does not support structured output.\"\n raise TypeError(msg)\n if not self.output_schema:\n msg = \"Output schema cannot be empty\"\n raise ValueError(msg)\n\n output_model_ = build_model_from_schema(self.output_schema)\n\n output_model = create_model(\n schema_name,\n __doc__=f\"A list of {schema_name}.\",\n objects=(list[output_model_], Field(description=f\"A list of {schema_name}.\")), # type: ignore[valid-type]\n )\n\n try:\n llm_with_structured_output = create_extractor(self.llm, tools=[output_model])\n except NotImplementedError as exc:\n msg = f\"{self.llm.__class__.__name__} does not support structured output.\"\n raise TypeError(msg) from exc\n config_dict = {\n \"run_name\": self.display_name,\n \"project_name\": self.get_project_name(),\n \"callbacks\": self.get_langchain_callbacks(),\n }\n result = get_chat_result(\n runnable=llm_with_structured_output,\n system_message=self.system_prompt,\n input_value=self.input_value,\n config=config_dict,\n )\n if isinstance(result, BaseModel):\n result = result.model_dump()\n if responses := result.get(\"responses\"):\n result = responses[0].model_dump()\n if result and \"objects\" in result:\n return result[\"objects\"]\n\n return result\n\n def build_structured_output(self) -> Data:\n output = self.build_structured_output_base()\n\n return Data(text_key=\"results\", data={\"results\": output})\n\n def as_dataframe(self) -> DataFrame:\n output = self.build_structured_output_base()\n if isinstance(output, list):\n return DataFrame(data=output)\n return DataFrame(data=[output])\n" }, "input_value": { "_input_type": "MessageTextInput", diff --git a/src/backend/base/langflow/initial_setup/starter_projects/Market Research.json b/src/backend/base/langflow/initial_setup/starter_projects/Market Research.json index 10ad5eea1..3395017c9 100644 --- a/src/backend/base/langflow/initial_setup/starter_projects/Market Research.json +++ b/src/backend/base/langflow/initial_setup/starter_projects/Market Research.json @@ -885,7 +885,7 @@ "show": true, "title_case": false, "type": "code", - "value": "from typing import TYPE_CHECKING, cast\n\nfrom pydantic import BaseModel, Field, create_model\n\nfrom langflow.base.models.chat_result import get_chat_result\nfrom langflow.custom import Component\nfrom langflow.helpers.base_model import build_model_from_schema\nfrom langflow.io import (\n BoolInput,\n HandleInput,\n MessageTextInput,\n MultilineInput,\n Output,\n TableInput,\n)\nfrom langflow.schema.data import Data\nfrom langflow.schema.dataframe import DataFrame\nfrom langflow.schema.table import EditMode\n\nif TYPE_CHECKING:\n from langflow.field_typing.constants import LanguageModel\n\n\nclass StructuredOutputComponent(Component):\n display_name = \"Structured Output\"\n description = (\n \"Transforms LLM responses into **structured data formats**. Ideal for extracting specific information \"\n \"or creating consistent outputs.\"\n )\n name = \"StructuredOutput\"\n icon = \"braces\"\n\n inputs = [\n HandleInput(\n name=\"llm\",\n display_name=\"Language Model\",\n info=\"The language model to use to generate the structured output.\",\n input_types=[\"LanguageModel\"],\n required=True,\n ),\n MessageTextInput(\n name=\"input_value\",\n display_name=\"Input Message\",\n info=\"The input message to the language model.\",\n tool_mode=True,\n required=True,\n ),\n MultilineInput(\n name=\"system_prompt\",\n display_name=\"Format Instructions\",\n info=\"The instructions to the language model for formatting the output.\",\n value=(\n \"You are an AI system designed to extract structured information from unstructured text.\"\n \"Given the input_text, return a JSON object with predefined keys based on the expected structure.\"\n \"Extract values accurately and format them according to the specified type \"\n \"(e.g., string, integer, float, date).\"\n \"If a value is missing or cannot be determined, return a default \"\n \"(e.g., null, 0, or 'N/A').\"\n \"If multiple instances of the expected structure exist within the input_text, \"\n \"stream each as a separate JSON object.\"\n ),\n required=True,\n advanced=True,\n ),\n MessageTextInput(\n name=\"schema_name\",\n display_name=\"Schema Name\",\n info=\"Provide a name for the output data schema.\",\n advanced=True,\n ),\n TableInput(\n name=\"output_schema\",\n display_name=\"Output Schema\",\n info=\"Define the structure and data types for the model's output.\",\n required=True,\n # TODO: remove deault value\n table_schema=[\n {\n \"name\": \"name\",\n \"display_name\": \"Name\",\n \"type\": \"str\",\n \"description\": \"Specify the name of the output field.\",\n \"default\": \"field\",\n \"edit_mode\": EditMode.INLINE,\n },\n {\n \"name\": \"description\",\n \"display_name\": \"Description\",\n \"type\": \"str\",\n \"description\": \"Describe the purpose of the output field.\",\n \"default\": \"description of field\",\n \"edit_mode\": EditMode.POPOVER,\n },\n {\n \"name\": \"type\",\n \"display_name\": \"Type\",\n \"type\": \"str\",\n \"edit_mode\": EditMode.INLINE,\n \"description\": (\n \"Indicate the data type of the output field (e.g., str, int, float, bool, list, dict).\"\n ),\n \"options\": [\"str\", \"int\", \"float\", \"bool\", \"list\", \"dict\"],\n \"default\": \"str\",\n },\n {\n \"name\": \"multiple\",\n \"display_name\": \"Multiple\",\n \"type\": \"boolean\",\n \"description\": \"Set to True if this output field should be a list of the specified type.\",\n \"default\": \"False\",\n \"edit_mode\": EditMode.INLINE,\n },\n ],\n value=[\n {\n \"name\": \"field\",\n \"description\": \"description of field\",\n \"type\": \"str\",\n \"multiple\": \"False\",\n }\n ],\n ),\n BoolInput(\n name=\"multiple\",\n advanced=True,\n display_name=\"Generate Multiple\",\n info=\"[Deplrecated] Always set to True\",\n value=True,\n ),\n ]\n\n outputs = [\n Output(\n name=\"structured_output\",\n display_name=\"Structured Output\",\n method=\"build_structured_output\",\n ),\n Output(\n name=\"structured_output_dataframe\",\n display_name=\"DataFrame\",\n method=\"as_dataframe\",\n ),\n ]\n\n def build_structured_output_base(self) -> Data:\n schema_name = self.schema_name or \"OutputModel\"\n\n if not hasattr(self.llm, \"with_structured_output\"):\n msg = \"Language model does not support structured output.\"\n raise TypeError(msg)\n if not self.output_schema:\n msg = \"Output schema cannot be empty\"\n raise ValueError(msg)\n\n output_model_ = build_model_from_schema(self.output_schema)\n\n output_model = create_model(\n schema_name,\n objects=(list[output_model_], Field(description=f\"A list of {schema_name}.\")), # type: ignore[valid-type]\n )\n\n try:\n llm_with_structured_output = cast(\"LanguageModel\", self.llm).with_structured_output(schema=output_model) # type: ignore[valid-type, attr-defined]\n\n except NotImplementedError as exc:\n msg = f\"{self.llm.__class__.__name__} does not support structured output.\"\n raise TypeError(msg) from exc\n config_dict = {\n \"run_name\": self.display_name,\n \"project_name\": self.get_project_name(),\n \"callbacks\": self.get_langchain_callbacks(),\n }\n result = get_chat_result(\n runnable=llm_with_structured_output,\n system_message=self.system_prompt,\n input_value=self.input_value,\n config=config_dict,\n )\n if isinstance(result, BaseModel):\n result = result.model_dump()\n if \"objects\" in result:\n return result[\"objects\"]\n return result\n\n def build_structured_output(self) -> Data:\n output = self.build_structured_output_base()\n\n return Data(results=output)\n\n def as_dataframe(self) -> DataFrame:\n output = self.build_structured_output_base()\n if isinstance(output, list):\n return DataFrame(data=output)\n return DataFrame(data=[output])\n" + "value": "from pydantic import BaseModel, Field, create_model\nfrom trustcall import create_extractor\n\nfrom langflow.base.models.chat_result import get_chat_result\nfrom langflow.custom import Component\nfrom langflow.helpers.base_model import build_model_from_schema\nfrom langflow.io import (\n BoolInput,\n HandleInput,\n MessageTextInput,\n MultilineInput,\n Output,\n TableInput,\n)\nfrom langflow.schema.data import Data\nfrom langflow.schema.dataframe import DataFrame\nfrom langflow.schema.table import EditMode\n\n\nclass StructuredOutputComponent(Component):\n display_name = \"Structured Output\"\n description = (\n \"Transforms LLM responses into **structured data formats**. Ideal for extracting specific information \"\n \"or creating consistent outputs.\"\n )\n name = \"StructuredOutput\"\n icon = \"braces\"\n\n inputs = [\n HandleInput(\n name=\"llm\",\n display_name=\"Language Model\",\n info=\"The language model to use to generate the structured output.\",\n input_types=[\"LanguageModel\"],\n required=True,\n ),\n MessageTextInput(\n name=\"input_value\",\n display_name=\"Input Message\",\n info=\"The input message to the language model.\",\n tool_mode=True,\n required=True,\n ),\n MultilineInput(\n name=\"system_prompt\",\n display_name=\"Format Instructions\",\n info=\"The instructions to the language model for formatting the output.\",\n value=(\n \"You are an AI system designed to extract structured information from unstructured text.\"\n \"Given the input_text, return a JSON object with predefined keys based on the expected structure.\"\n \"Extract values accurately and format them according to the specified type \"\n \"(e.g., string, integer, float, date).\"\n \"If a value is missing or cannot be determined, return a default \"\n \"(e.g., null, 0, or 'N/A').\"\n \"If multiple instances of the expected structure exist within the input_text, \"\n \"stream each as a separate JSON object.\"\n ),\n required=True,\n advanced=True,\n ),\n MessageTextInput(\n name=\"schema_name\",\n display_name=\"Schema Name\",\n info=\"Provide a name for the output data schema.\",\n advanced=True,\n ),\n TableInput(\n name=\"output_schema\",\n display_name=\"Output Schema\",\n info=\"Define the structure and data types for the model's output.\",\n required=True,\n # TODO: remove deault value\n table_schema=[\n {\n \"name\": \"name\",\n \"display_name\": \"Name\",\n \"type\": \"str\",\n \"description\": \"Specify the name of the output field.\",\n \"default\": \"field\",\n \"edit_mode\": EditMode.INLINE,\n },\n {\n \"name\": \"description\",\n \"display_name\": \"Description\",\n \"type\": \"str\",\n \"description\": \"Describe the purpose of the output field.\",\n \"default\": \"description of field\",\n \"edit_mode\": EditMode.POPOVER,\n },\n {\n \"name\": \"type\",\n \"display_name\": \"Type\",\n \"type\": \"str\",\n \"edit_mode\": EditMode.INLINE,\n \"description\": (\n \"Indicate the data type of the output field (e.g., str, int, float, bool, list, dict).\"\n ),\n \"options\": [\"str\", \"int\", \"float\", \"bool\", \"list\", \"dict\"],\n \"default\": \"str\",\n },\n {\n \"name\": \"multiple\",\n \"display_name\": \"Multiple\",\n \"type\": \"boolean\",\n \"description\": \"Set to True if this output field should be a list of the specified type.\",\n \"default\": \"False\",\n \"edit_mode\": EditMode.INLINE,\n },\n ],\n value=[\n {\n \"name\": \"field\",\n \"description\": \"description of field\",\n \"type\": \"str\",\n \"multiple\": \"False\",\n }\n ],\n ),\n BoolInput(\n name=\"multiple\",\n advanced=True,\n display_name=\"Generate Multiple\",\n info=\"[Deplrecated] Always set to True\",\n value=True,\n ),\n ]\n\n outputs = [\n Output(\n name=\"structured_output\",\n display_name=\"Structured Output\",\n method=\"build_structured_output\",\n ),\n Output(\n name=\"structured_output_dataframe\",\n display_name=\"DataFrame\",\n method=\"as_dataframe\",\n ),\n ]\n\n def build_structured_output_base(self) -> Data:\n schema_name = self.schema_name or \"OutputModel\"\n\n if not hasattr(self.llm, \"with_structured_output\"):\n msg = \"Language model does not support structured output.\"\n raise TypeError(msg)\n if not self.output_schema:\n msg = \"Output schema cannot be empty\"\n raise ValueError(msg)\n\n output_model_ = build_model_from_schema(self.output_schema)\n\n output_model = create_model(\n schema_name,\n __doc__=f\"A list of {schema_name}.\",\n objects=(list[output_model_], Field(description=f\"A list of {schema_name}.\")), # type: ignore[valid-type]\n )\n\n try:\n llm_with_structured_output = create_extractor(self.llm, tools=[output_model])\n except NotImplementedError as exc:\n msg = f\"{self.llm.__class__.__name__} does not support structured output.\"\n raise TypeError(msg) from exc\n config_dict = {\n \"run_name\": self.display_name,\n \"project_name\": self.get_project_name(),\n \"callbacks\": self.get_langchain_callbacks(),\n }\n result = get_chat_result(\n runnable=llm_with_structured_output,\n system_message=self.system_prompt,\n input_value=self.input_value,\n config=config_dict,\n )\n if isinstance(result, BaseModel):\n result = result.model_dump()\n if responses := result.get(\"responses\"):\n result = responses[0].model_dump()\n if result and \"objects\" in result:\n return result[\"objects\"]\n\n return result\n\n def build_structured_output(self) -> Data:\n output = self.build_structured_output_base()\n\n return Data(text_key=\"results\", data={\"results\": output})\n\n def as_dataframe(self) -> DataFrame:\n output = self.build_structured_output_base()\n if isinstance(output, list):\n return DataFrame(data=output)\n return DataFrame(data=[output])\n" }, "input_value": { "_input_type": "MessageTextInput", diff --git a/src/backend/base/langflow/initial_setup/starter_projects/Portfolio Website Code Generator.json b/src/backend/base/langflow/initial_setup/starter_projects/Portfolio Website Code Generator.json index 4bf71ced4..039613d91 100644 --- a/src/backend/base/langflow/initial_setup/starter_projects/Portfolio Website Code Generator.json +++ b/src/backend/base/langflow/initial_setup/starter_projects/Portfolio Website Code Generator.json @@ -1513,7 +1513,7 @@ "show": true, "title_case": false, "type": "code", - "value": "from typing import TYPE_CHECKING, cast\n\nfrom pydantic import BaseModel, Field, create_model\n\nfrom langflow.base.models.chat_result import get_chat_result\nfrom langflow.custom import Component\nfrom langflow.helpers.base_model import build_model_from_schema\nfrom langflow.io import (\n BoolInput,\n HandleInput,\n MessageTextInput,\n MultilineInput,\n Output,\n TableInput,\n)\nfrom langflow.schema.data import Data\nfrom langflow.schema.dataframe import DataFrame\nfrom langflow.schema.table import EditMode\n\nif TYPE_CHECKING:\n from langflow.field_typing.constants import LanguageModel\n\n\nclass StructuredOutputComponent(Component):\n display_name = \"Structured Output\"\n description = (\n \"Transforms LLM responses into **structured data formats**. Ideal for extracting specific information \"\n \"or creating consistent outputs.\"\n )\n name = \"StructuredOutput\"\n icon = \"braces\"\n\n inputs = [\n HandleInput(\n name=\"llm\",\n display_name=\"Language Model\",\n info=\"The language model to use to generate the structured output.\",\n input_types=[\"LanguageModel\"],\n required=True,\n ),\n MessageTextInput(\n name=\"input_value\",\n display_name=\"Input Message\",\n info=\"The input message to the language model.\",\n tool_mode=True,\n required=True,\n ),\n MultilineInput(\n name=\"system_prompt\",\n display_name=\"Format Instructions\",\n info=\"The instructions to the language model for formatting the output.\",\n value=(\n \"You are an AI system designed to extract structured information from unstructured text.\"\n \"Given the input_text, return a JSON object with predefined keys based on the expected structure.\"\n \"Extract values accurately and format them according to the specified type \"\n \"(e.g., string, integer, float, date).\"\n \"If a value is missing or cannot be determined, return a default \"\n \"(e.g., null, 0, or 'N/A').\"\n \"If multiple instances of the expected structure exist within the input_text, \"\n \"stream each as a separate JSON object.\"\n ),\n required=True,\n advanced=True,\n ),\n MessageTextInput(\n name=\"schema_name\",\n display_name=\"Schema Name\",\n info=\"Provide a name for the output data schema.\",\n advanced=True,\n ),\n TableInput(\n name=\"output_schema\",\n display_name=\"Output Schema\",\n info=\"Define the structure and data types for the model's output.\",\n required=True,\n # TODO: remove deault value\n table_schema=[\n {\n \"name\": \"name\",\n \"display_name\": \"Name\",\n \"type\": \"str\",\n \"description\": \"Specify the name of the output field.\",\n \"default\": \"field\",\n \"edit_mode\": EditMode.INLINE,\n },\n {\n \"name\": \"description\",\n \"display_name\": \"Description\",\n \"type\": \"str\",\n \"description\": \"Describe the purpose of the output field.\",\n \"default\": \"description of field\",\n \"edit_mode\": EditMode.POPOVER,\n },\n {\n \"name\": \"type\",\n \"display_name\": \"Type\",\n \"type\": \"str\",\n \"edit_mode\": EditMode.INLINE,\n \"description\": (\n \"Indicate the data type of the output field (e.g., str, int, float, bool, list, dict).\"\n ),\n \"options\": [\"str\", \"int\", \"float\", \"bool\", \"list\", \"dict\"],\n \"default\": \"str\",\n },\n {\n \"name\": \"multiple\",\n \"display_name\": \"Multiple\",\n \"type\": \"boolean\",\n \"description\": \"Set to True if this output field should be a list of the specified type.\",\n \"default\": \"False\",\n \"edit_mode\": EditMode.INLINE,\n },\n ],\n value=[\n {\n \"name\": \"field\",\n \"description\": \"description of field\",\n \"type\": \"str\",\n \"multiple\": \"False\",\n }\n ],\n ),\n BoolInput(\n name=\"multiple\",\n advanced=True,\n display_name=\"Generate Multiple\",\n info=\"[Deplrecated] Always set to True\",\n value=True,\n ),\n ]\n\n outputs = [\n Output(\n name=\"structured_output\",\n display_name=\"Structured Output\",\n method=\"build_structured_output\",\n ),\n Output(\n name=\"structured_output_dataframe\",\n display_name=\"DataFrame\",\n method=\"as_dataframe\",\n ),\n ]\n\n def build_structured_output_base(self) -> Data:\n schema_name = self.schema_name or \"OutputModel\"\n\n if not hasattr(self.llm, \"with_structured_output\"):\n msg = \"Language model does not support structured output.\"\n raise TypeError(msg)\n if not self.output_schema:\n msg = \"Output schema cannot be empty\"\n raise ValueError(msg)\n\n output_model_ = build_model_from_schema(self.output_schema)\n\n output_model = create_model(\n schema_name,\n objects=(list[output_model_], Field(description=f\"A list of {schema_name}.\")), # type: ignore[valid-type]\n )\n\n try:\n llm_with_structured_output = cast(\"LanguageModel\", self.llm).with_structured_output(schema=output_model) # type: ignore[valid-type, attr-defined]\n\n except NotImplementedError as exc:\n msg = f\"{self.llm.__class__.__name__} does not support structured output.\"\n raise TypeError(msg) from exc\n config_dict = {\n \"run_name\": self.display_name,\n \"project_name\": self.get_project_name(),\n \"callbacks\": self.get_langchain_callbacks(),\n }\n result = get_chat_result(\n runnable=llm_with_structured_output,\n system_message=self.system_prompt,\n input_value=self.input_value,\n config=config_dict,\n )\n if isinstance(result, BaseModel):\n result = result.model_dump()\n if \"objects\" in result:\n return result[\"objects\"]\n return result\n\n def build_structured_output(self) -> Data:\n output = self.build_structured_output_base()\n\n return Data(results=output)\n\n def as_dataframe(self) -> DataFrame:\n output = self.build_structured_output_base()\n if isinstance(output, list):\n return DataFrame(data=output)\n return DataFrame(data=[output])\n" + "value": "from pydantic import BaseModel, Field, create_model\nfrom trustcall import create_extractor\n\nfrom langflow.base.models.chat_result import get_chat_result\nfrom langflow.custom import Component\nfrom langflow.helpers.base_model import build_model_from_schema\nfrom langflow.io import (\n BoolInput,\n HandleInput,\n MessageTextInput,\n MultilineInput,\n Output,\n TableInput,\n)\nfrom langflow.schema.data import Data\nfrom langflow.schema.dataframe import DataFrame\nfrom langflow.schema.table import EditMode\n\n\nclass StructuredOutputComponent(Component):\n display_name = \"Structured Output\"\n description = (\n \"Transforms LLM responses into **structured data formats**. Ideal for extracting specific information \"\n \"or creating consistent outputs.\"\n )\n name = \"StructuredOutput\"\n icon = \"braces\"\n\n inputs = [\n HandleInput(\n name=\"llm\",\n display_name=\"Language Model\",\n info=\"The language model to use to generate the structured output.\",\n input_types=[\"LanguageModel\"],\n required=True,\n ),\n MessageTextInput(\n name=\"input_value\",\n display_name=\"Input Message\",\n info=\"The input message to the language model.\",\n tool_mode=True,\n required=True,\n ),\n MultilineInput(\n name=\"system_prompt\",\n display_name=\"Format Instructions\",\n info=\"The instructions to the language model for formatting the output.\",\n value=(\n \"You are an AI system designed to extract structured information from unstructured text.\"\n \"Given the input_text, return a JSON object with predefined keys based on the expected structure.\"\n \"Extract values accurately and format them according to the specified type \"\n \"(e.g., string, integer, float, date).\"\n \"If a value is missing or cannot be determined, return a default \"\n \"(e.g., null, 0, or 'N/A').\"\n \"If multiple instances of the expected structure exist within the input_text, \"\n \"stream each as a separate JSON object.\"\n ),\n required=True,\n advanced=True,\n ),\n MessageTextInput(\n name=\"schema_name\",\n display_name=\"Schema Name\",\n info=\"Provide a name for the output data schema.\",\n advanced=True,\n ),\n TableInput(\n name=\"output_schema\",\n display_name=\"Output Schema\",\n info=\"Define the structure and data types for the model's output.\",\n required=True,\n # TODO: remove deault value\n table_schema=[\n {\n \"name\": \"name\",\n \"display_name\": \"Name\",\n \"type\": \"str\",\n \"description\": \"Specify the name of the output field.\",\n \"default\": \"field\",\n \"edit_mode\": EditMode.INLINE,\n },\n {\n \"name\": \"description\",\n \"display_name\": \"Description\",\n \"type\": \"str\",\n \"description\": \"Describe the purpose of the output field.\",\n \"default\": \"description of field\",\n \"edit_mode\": EditMode.POPOVER,\n },\n {\n \"name\": \"type\",\n \"display_name\": \"Type\",\n \"type\": \"str\",\n \"edit_mode\": EditMode.INLINE,\n \"description\": (\n \"Indicate the data type of the output field (e.g., str, int, float, bool, list, dict).\"\n ),\n \"options\": [\"str\", \"int\", \"float\", \"bool\", \"list\", \"dict\"],\n \"default\": \"str\",\n },\n {\n \"name\": \"multiple\",\n \"display_name\": \"Multiple\",\n \"type\": \"boolean\",\n \"description\": \"Set to True if this output field should be a list of the specified type.\",\n \"default\": \"False\",\n \"edit_mode\": EditMode.INLINE,\n },\n ],\n value=[\n {\n \"name\": \"field\",\n \"description\": \"description of field\",\n \"type\": \"str\",\n \"multiple\": \"False\",\n }\n ],\n ),\n BoolInput(\n name=\"multiple\",\n advanced=True,\n display_name=\"Generate Multiple\",\n info=\"[Deplrecated] Always set to True\",\n value=True,\n ),\n ]\n\n outputs = [\n Output(\n name=\"structured_output\",\n display_name=\"Structured Output\",\n method=\"build_structured_output\",\n ),\n Output(\n name=\"structured_output_dataframe\",\n display_name=\"DataFrame\",\n method=\"as_dataframe\",\n ),\n ]\n\n def build_structured_output_base(self) -> Data:\n schema_name = self.schema_name or \"OutputModel\"\n\n if not hasattr(self.llm, \"with_structured_output\"):\n msg = \"Language model does not support structured output.\"\n raise TypeError(msg)\n if not self.output_schema:\n msg = \"Output schema cannot be empty\"\n raise ValueError(msg)\n\n output_model_ = build_model_from_schema(self.output_schema)\n\n output_model = create_model(\n schema_name,\n __doc__=f\"A list of {schema_name}.\",\n objects=(list[output_model_], Field(description=f\"A list of {schema_name}.\")), # type: ignore[valid-type]\n )\n\n try:\n llm_with_structured_output = create_extractor(self.llm, tools=[output_model])\n except NotImplementedError as exc:\n msg = f\"{self.llm.__class__.__name__} does not support structured output.\"\n raise TypeError(msg) from exc\n config_dict = {\n \"run_name\": self.display_name,\n \"project_name\": self.get_project_name(),\n \"callbacks\": self.get_langchain_callbacks(),\n }\n result = get_chat_result(\n runnable=llm_with_structured_output,\n system_message=self.system_prompt,\n input_value=self.input_value,\n config=config_dict,\n )\n if isinstance(result, BaseModel):\n result = result.model_dump()\n if responses := result.get(\"responses\"):\n result = responses[0].model_dump()\n if result and \"objects\" in result:\n return result[\"objects\"]\n\n return result\n\n def build_structured_output(self) -> Data:\n output = self.build_structured_output_base()\n\n return Data(text_key=\"results\", data={\"results\": output})\n\n def as_dataframe(self) -> DataFrame:\n output = self.build_structured_output_base()\n if isinstance(output, list):\n return DataFrame(data=output)\n return DataFrame(data=[output])\n" }, "input_value": { "_input_type": "MessageTextInput", diff --git a/src/backend/base/pyproject.toml b/src/backend/base/pyproject.toml index 0f66556ce..77eecce64 100644 --- a/src/backend/base/pyproject.toml +++ b/src/backend/base/pyproject.toml @@ -88,6 +88,7 @@ dependencies = [ "scipy>=1.15.2", "ibm-watsonx-ai>=1.3.1", "langchain-ibm>=0.3.8", + "trustcall>=0.0.38", ] [dependency-groups] diff --git a/src/backend/tests/unit/components/helpers/test_structured_output_component.py b/src/backend/tests/unit/components/helpers/test_structured_output_component.py index 8e1d1c32d..b93535c71 100644 --- a/src/backend/tests/unit/components/helpers/test_structured_output_component.py +++ b/src/backend/tests/unit/components/helpers/test_structured_output_component.py @@ -1,7 +1,11 @@ +import os import re from unittest.mock import patch +import openai import pytest +from langchain_nvidia_ai_endpoints import ChatNVIDIA +from langchain_openai import ChatOpenAI from langflow.components.helpers.structured_output import StructuredOutputComponent from langflow.helpers.base_model import build_model_from_schema from langflow.inputs.inputs import TableInput @@ -230,3 +234,202 @@ class TestStructuredOutputComponent(ComponentTestBaseWithoutClient): assert isinstance(result, list) assert result == [{"field": "value"}] mock_get_chat_result.assert_called_once() + + @pytest.mark.skipif( + "OPENAI_API_KEY" not in os.environ, + reason="OPENAI_API_KEY environment variable not set", + ) + def test_with_real_openai_model_simple_schema(self): + # Create a real OpenAI model + llm = ChatOpenAI(model="gpt-4o-mini", temperature=0) + + # Create a component with a simple schema + component = StructuredOutputComponent( + llm=llm, + input_value="Extract the name and age from this text: John Doe is 30 years old.", + schema_name="PersonInfo", + output_schema=[ + {"name": "name", "type": "str", "description": "The person's name"}, + {"name": "age", "type": "int", "description": "The person's age"}, + ], + multiple=False, + system_prompt="Extract structured information from the input text.", + ) + + # Get the structured output + result = component.build_structured_output_base() + + # Verify the result + assert isinstance(result, list) + assert len(result) > 0 + assert "name" in result[0] + assert "age" in result[0] + assert result[0]["name"] == "John Doe" + assert result[0]["age"] == 30 + + @pytest.mark.skipif( + "OPENAI_API_KEY" not in os.environ, + reason="OPENAI_API_KEY environment variable not set", + ) + def test_with_real_openai_model_simple_schema_fail(self): + # Create a real OpenAI model with very low max_tokens to force truncation + llm = ChatOpenAI(model="gpt-4o-mini", temperature=0, max_tokens=1) + + # Create a component with a simple schema + component = StructuredOutputComponent( + llm=llm, + input_value="Extract the name and age from this text: John Doe is 30 years old.", + schema_name="PersonInfo", + output_schema=[ + {"name": "name", "type": "str", "description": "The person's name"}, + {"name": "age", "type": "int", "description": "The person's age"}, + ], + multiple=False, + system_prompt="Extract structured information from the input text.", + ) + + # Expect BadRequestError due to max_tokens being reached + with pytest.raises(openai.BadRequestError) as exc_info: + component.build_structured_output_base() + + # Verify the error message contains expected content + assert "max_tokens was reached" in str(exc_info.value) + + @pytest.mark.skipif( + "OPENAI_API_KEY" not in os.environ, + reason="OPENAI_API_KEY environment variable not set", + ) + def test_with_real_openai_model_complex_schema(self): + from langchain_openai import ChatOpenAI + + # Create a real OpenAI model + llm = ChatOpenAI(model="gpt-4o-mini", temperature=0) + + # Create a component with a more complex schema + component = StructuredOutputComponent( + llm=llm, + input_value=""" + Product Review: + I purchased the XYZ Wireless Headphones last month. The sound quality is excellent, + and the battery lasts about 8 hours. However, they're a bit uncomfortable after + wearing them for a long time. The price was $129.99, which I think is reasonable + for the quality. Overall rating: 4/5. + """, + schema_name="ProductReview", + output_schema=[ + {"name": "product_name", "type": "str", "description": "The name of the product"}, + {"name": "sound_quality", "type": "str", "description": "Description of sound quality"}, + {"name": "comfort", "type": "str", "description": "Description of comfort"}, + {"name": "battery_life", "type": "str", "description": "Description of battery life"}, + {"name": "price", "type": "float", "description": "The price of the product"}, + {"name": "rating", "type": "float", "description": "The overall rating out of 5"}, + ], + multiple=False, + system_prompt="Extract detailed product review information from the input text.", + ) + + # Get the structured output + result = component.build_structured_output_base() + + # Verify the result + assert isinstance(result, list) + assert len(result) > 0 + assert "product_name" in result[0] + assert "sound_quality" in result[0] + assert "comfort" in result[0] + assert "battery_life" in result[0] + assert "price" in result[0] + assert "rating" in result[0] + assert result[0]["product_name"] == "XYZ Wireless Headphones" + assert result[0]["price"] == 129.99 + assert result[0]["rating"] == 4.0 + + @pytest.mark.skipif( + "OPENAI_API_KEY" not in os.environ, + reason="OPENAI_API_KEY environment variable not set", + ) + def test_with_real_openai_model_nested_schema(self): + from langchain_openai import ChatOpenAI + + # Create a real OpenAI model + llm = ChatOpenAI(model="gpt-4o-mini", temperature=0) + + # Create a component with a flattened schema (no nested structures) + component = StructuredOutputComponent( + llm=llm, + input_value=""" + Restaurant: Bella Italia + Address: 123 Main St, Anytown, CA 12345 + Visited: June 15, 2023 + + Ordered: + - Margherita Pizza ($14.99) - Delicious with fresh basil + - Tiramisu ($8.50) - Perfect sweetness + + Service was excellent, atmosphere was cozy. + Total bill: $35.49 including tip. + Would definitely visit again! + """, + schema_name="RestaurantReview", + output_schema=[ + {"name": "restaurant_name", "type": "str", "description": "The name of the restaurant"}, + {"name": "street", "type": "str", "description": "Street address"}, + {"name": "city", "type": "str", "description": "City"}, + {"name": "state", "type": "str", "description": "State"}, + {"name": "zip", "type": "str", "description": "ZIP code"}, + {"name": "first_item_name", "type": "str", "description": "Name of first item ordered"}, + {"name": "first_item_price", "type": "float", "description": "Price of first item"}, + {"name": "second_item_name", "type": "str", "description": "Name of second item ordered"}, + {"name": "second_item_price", "type": "float", "description": "Price of second item"}, + {"name": "total_bill", "type": "float", "description": "Total bill amount"}, + {"name": "would_return", "type": "bool", "description": "Whether the reviewer would return"}, + ], + multiple=False, + system_prompt="Extract detailed restaurant review information from the input text.", + ) + + # Get the structured output + result = component.build_structured_output_base() + + # Verify the result + assert isinstance(result, list) + assert len(result) > 0 + assert "restaurant_name" in result[0] + assert "street" in result[0] + assert "city" in result[0] + assert "state" in result[0] + assert "zip" in result[0] + assert "first_item_name" in result[0] + assert "first_item_price" in result[0] + assert "total_bill" in result[0] + assert "would_return" in result[0] + + assert result[0]["restaurant_name"] == "Bella Italia" + assert result[0]["street"] == "123 Main St" + assert result[0]["total_bill"] == 35.49 + assert result[0]["would_return"] is True + + @pytest.mark.skipif( + "NVIDIA_API_KEY" not in os.environ, + reason="NVIDIA_API_KEY environment variable not set", + ) + def test_with_real_nvidia_model_simple_schema(self): + # Create a real NVIDIA model + llm = ChatNVIDIA(model="meta/llama-3.2-3b-instruct", temperature=0, max_tokens=10) + + # Create a component with a simple schema + component = StructuredOutputComponent( + llm=llm, + input_value="Extract the name and age from this text: John Doe is 30 years old.", + schema_name="PersonInfo", + output_schema=[ + {"name": "name", "type": "str", "description": "The person's name"}, + {"name": "age", "type": "int", "description": "The person's age"}, + ], + multiple=False, + system_prompt="Extract structured information from the input text.", + ) + + # The test is expected to fail with a 400 Bad Request error + with pytest.raises(Exception, match="400 Bad Request"): + component.build_structured_output_base() diff --git a/src/backend/tests/unit/mock_language_model.py b/src/backend/tests/unit/mock_language_model.py index 186d127cf..70192c654 100644 --- a/src/backend/tests/unit/mock_language_model.py +++ b/src/backend/tests/unit/mock_language_model.py @@ -66,7 +66,7 @@ class MockLanguageModel(BaseLanguageModel, BaseModel): async def apredict_messages(self, *args, **kwargs): raise NotImplementedError - def bind_tools(self, tools): + def bind_tools(self, tools, tool_choice=None): # noqa: ARG002 """Bind tools to the model for testing.""" self.tools = tools return self diff --git a/src/frontend/tests/assets/resume.txt b/src/frontend/tests/assets/resume.txt new file mode 100644 index 000000000..37682e0af --- /dev/null +++ b/src/frontend/tests/assets/resume.txt @@ -0,0 +1,44 @@ +John Smith +Software Engineer + +San Francisco, California, USA +Email: john.smith@example.com +Phone: (555) 123-4567 +LinkedIn: www.linkedin.com/in/johnsmith +GitHub: github.com/johnsmith +Portfolio: www.johnsmith.dev +Visa Status: US Citizen + +Summary +Experienced software engineer with 5+ years specializing in distributed systems and machine learning applications. Currently pursuing PhD in Computer Science while seeking flexible opportunities to apply industry expertise. + +Experience +Stanford University +Doctoral Researcher, Distributed Systems +September 2023 - Present + +TechCorp Inc. +Senior Software Engineer +June 2020 - August 2023 + +DataSystems LLC +Software Engineer +March 2018 - May 2020 + +Education +Stanford University +Ph.D. in Computer Science (Distributed Systems) +2023 - Present + +University of California, Berkeley +M.S. in Computer Science +2016 - 2018 + +Skills +Python, Rust, Go, JavaScript, TensorFlow, PyTorch, AWS, GCP, Docker, Kubernetes, PostgreSQL, MongoDB + +Projects +- Distributed Training Framework: Scalable system for training large language models +- Cloud Resource Optimizer: Reduced infrastructure costs by 30% through intelligent resource allocation +- Real-time Analytics Dashboard: Built visualization platform processing 1M+ events daily + diff --git a/src/frontend/tests/core/integrations/Portfolio Website Code Generator.spec.ts b/src/frontend/tests/core/integrations/Portfolio Website Code Generator.spec.ts index c9b8c7f07..eab90afa4 100644 --- a/src/frontend/tests/core/integrations/Portfolio Website Code Generator.spec.ts +++ b/src/frontend/tests/core/integrations/Portfolio Website Code Generator.spec.ts @@ -46,7 +46,7 @@ withEventDeliveryModes( .first() .fill(process.env.ANTHROPIC_API_KEY ?? ""); - await uploadFile(page, "test_file.txt"); + await uploadFile(page, "resume.txt"); await page.getByTestId("playground-btn-flow-io").click(); diff --git a/uv.lock b/uv.lock index 74178492f..01f35f4cf 100644 --- a/uv.lock +++ b/uv.lock @@ -1996,6 +1996,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/4c/a3/ac312faeceffd2d8f86bc6dcb5c401188ba5a01bc88e69bed97578a0dfcd/durationpy-0.9-py3-none-any.whl", hash = "sha256:e65359a7af5cedad07fb77a2dd3f390f8eb0b74cb845589fa6c057086834dd38", size = 3461 }, ] +[[package]] +name = "dydantic" +version = "0.0.8" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pydantic" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/08/c5/2d097e5a4816b15186c1ae06c5cfe3c332e69a0f3556dc6cee2d370acf2a/dydantic-0.0.8.tar.gz", hash = "sha256:14a31d4cdfce314ce3e69e8f8c7c46cbc26ce3ce4485de0832260386c612942f", size = 8115 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/7a/7c/a1b120141a300853d82291faf0ba1a95133fa390e4b7d773647b69c8c0f4/dydantic-0.0.8-py3-none-any.whl", hash = "sha256:cd0a991f523bd8632699872f1c0c4278415dd04783e36adec5428defa0afb721", size = 8637 }, +] + [[package]] name = "e2b" version = "1.1.0" @@ -4931,6 +4943,7 @@ dependencies = [ { name = "spider-client" }, { name = "sqlalchemy", extra = ["aiosqlite"] }, { name = "sqlmodel" }, + { name = "trustcall" }, { name = "typer" }, { name = "uncurl" }, { name = "uvicorn" }, @@ -5081,6 +5094,7 @@ requires-dist = [ { name = "sqlalchemy", extras = ["postgresql-psycopg"], marker = "extra == 'postgresql'" }, { name = "sqlalchemy", extras = ["postgresql-psycopg2binary"], marker = "extra == 'postgresql'" }, { name = "sqlmodel", specifier = "==0.0.22" }, + { name = "trustcall", specifier = ">=0.0.38" }, { name = "typer", specifier = ">=0.13.0,<1.0.0" }, { name = "uncurl", specifier = ">=0.0.11,<1.0.0" }, { name = "uvicorn", specifier = ">=0.30.0,<1.0.0" }, @@ -5146,6 +5160,61 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/0f/b4/aca347143c978ee92b3ff19edb681d3484076a4d1f16ce98032927acbb02/langfuse-2.53.9-py3-none-any.whl", hash = "sha256:04363bc323f7513621c88a997003f7b906ae8f5d096bd54221cfcb6bf7a6f16a", size = 222025 }, ] +[[package]] +name = "langgraph" +version = "0.3.19" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "langchain-core" }, + { name = "langgraph-checkpoint" }, + { name = "langgraph-prebuilt" }, + { name = "langgraph-sdk" }, + { name = "xxhash" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/3c/4f/f9d4822bd0ab288de64b5c61d59ac50ffcfb5232acaf3749ce70e7b6911b/langgraph-0.3.19.tar.gz", hash = "sha256:b5e013e6723d10910fc2677d436c82a3e86f2874864b8498eb7018359f40c148", size = 116507 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d4/bc/a6aec4793df4f8033bdef364488fdbbb1355a0d666ec3764b06aeeb4d4fd/langgraph-0.3.19-py3-none-any.whl", hash = "sha256:d3cdece5bd8055a68eb76c327554cae2ccd7e5ecd6aa2e8ed26bd466b40059d3", size = 137924 }, +] + +[[package]] +name = "langgraph-checkpoint" +version = "2.0.21" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "langchain-core" }, + { name = "msgpack" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/38/40/6919e8b4f773e0a94df882f99e1778b1d6be82f5fc6547adaa514201ab08/langgraph_checkpoint-2.0.21.tar.gz", hash = "sha256:52beeb6dc1bd8c487b8315466cab271093b65eb97f54a0942dfe105cd20b237f", size = 36560 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c2/84/a7ffbac796aea76f8536f72f640d4be4e006af4172ec08f14e125c90bd06/langgraph_checkpoint-2.0.21-py3-none-any.whl", hash = "sha256:ca89c2090cd9729f83f9782226935dc5ff9fe7756c24936f484ccb0ce367f87b", size = 41247 }, +] + +[[package]] +name = "langgraph-prebuilt" +version = "0.1.4" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "langchain-core" }, + { name = "langgraph-checkpoint" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/a9/0b/487fbf469387216d17d2cdcc82952ddd56649ca15953ea4e13b01186976e/langgraph_prebuilt-0.1.4.tar.gz", hash = "sha256:61a5543c6d1be3d54bf53147763b4510d6ab2989347a16d1e9c366ef4dbcf0d8", size = 23409 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c7/bd/a1d898d5e0e3c1e01e4f1a2ee56d4ae03ff7f1d3bbc2b40831ebd8e0d24c/langgraph_prebuilt-0.1.4-py3-none-any.whl", hash = "sha256:23110997d2747cfb7ca2649ca78c6fc950a6646a5c96b7e9d8b7d19221f896a1", size = 24793 }, +] + +[[package]] +name = "langgraph-sdk" +version = "0.1.58" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "httpx" }, + { name = "orjson" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/86/34/d924fbb53da5d6d9448f7ee3d38dee5d213a23eaba7b956cb320b0e32172/langgraph_sdk-0.1.58.tar.gz", hash = "sha256:ef8b0e4c08af8c7efd3919497879c87a3627806b51e4ba5e8b06e0717e3d44cd", size = 43438 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/19/b3/20c2e01ac338daca2bef4a937a93b914e51e8afc79b5537c337abc5b11e4/langgraph_sdk-0.1.58-py3-none-any.whl", hash = "sha256:65f88cf5582da0c316714dc475126fa03c5f74d72bc0b9221dd42649de8e23d4", size = 46504 }, +] + [[package]] name = "langsmith" version = "0.1.147" @@ -9893,6 +9962,21 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/fe/7b/7757205dee3628f75e7991021d15cd1bd0c9b044ca9affe99b50879fc0e1/triton-3.0.0-1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:34e509deb77f1c067d8640725ef00c5cbfcb2052a1a3cb6a6d343841f92624eb", size = 209464695 }, ] +[[package]] +name = "trustcall" +version = "0.0.38" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "dydantic" }, + { name = "jsonpatch" }, + { name = "langgraph" }, + { name = "langgraph-prebuilt" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/aa/b9/6e3ecf9617ab778e2ca3fb1ed1dfb6a739d8ca6d8d9ce09c34a080bd6ab5/trustcall-0.0.38.tar.gz", hash = "sha256:318d451737d88188254c468ae813d16b7c7b1d19da17d402a52629a0198f4646", size = 35973 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/50/1e/36dafe25a5edbfab01f92561e99da14c42d4575d5a628369137098af149f/trustcall-0.0.38-py3-none-any.whl", hash = "sha256:90d5441f792059a6d5a08f90d306818363be7aa0f096002e30cfbcceee706351", size = 26554 }, +] + [[package]] name = "typer" version = "0.15.2"