diff --git a/pyproject.toml b/pyproject.toml index 674f8f510..929187ae1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -186,6 +186,7 @@ dev-dependencies = [ "blockbuster>=1.5.8,<1.6", "types-aiofiles>=24.1.0.20240626", "codeflash>=0.8.4", + "hypothesis>=6.123.17", ] diff --git a/src/backend/base/langflow/api/v1/schemas.py b/src/backend/base/langflow/api/v1/schemas.py index 20ab9c668..96b21e3e3 100644 --- a/src/backend/base/langflow/api/v1/schemas.py +++ b/src/backend/base/langflow/api/v1/schemas.py @@ -1,20 +1,13 @@ from datetime import datetime, timezone +from decimal import Decimal from enum import Enum from pathlib import Path from typing import Any from uuid import UUID -from pydantic import ( - BaseModel, - ConfigDict, - Field, - field_serializer, - field_validator, - model_serializer, -) +from pydantic import BaseModel, ConfigDict, Field, field_serializer, field_validator, model_serializer from langflow.graph.schema import RunOutputs -from langflow.graph.utils import serialize_field from langflow.schema import dotdict from langflow.schema.graph import Tweaks from langflow.schema.schema import InputType, OutputType, OutputValue @@ -24,6 +17,7 @@ from langflow.services.database.models.flow import FlowCreate, FlowRead from langflow.services.database.models.user import UserRead from langflow.services.settings.feature_flags import FeatureFlags from langflow.services.tracing.schema import Log +from langflow.utils.constants import MAX_TEXT_LENGTH from langflow.utils.util_strings import truncate_long_strings @@ -275,9 +269,65 @@ class ResultDataResponse(BaseModel): @field_serializer("results") @classmethod def serialize_results(cls, v): + """Serialize results with custom handling for special types and truncation.""" if isinstance(v, dict): - return {key: serialize_field(val) for key, val in v.items()} - return serialize_field(v) + return {key: cls._serialize_and_truncate(val, max_length=MAX_TEXT_LENGTH) for key, val in v.items()} + return cls._serialize_and_truncate(v, max_length=MAX_TEXT_LENGTH) + + @staticmethod + def _serialize_and_truncate(obj: Any, max_length: int = MAX_TEXT_LENGTH) -> Any: + """Helper method to serialize and truncate values.""" + if isinstance(obj, bytes): + obj = obj.decode("utf-8", errors="ignore") + if len(obj) > max_length: + return f"{obj[:max_length]}... [truncated]" + return obj + if isinstance(obj, str): + if len(obj) > max_length: + return f"{obj[:max_length]}... [truncated]" + return obj + if isinstance(obj, datetime): + return obj.astimezone().isoformat() + if isinstance(obj, Decimal): + return float(obj) + if isinstance(obj, UUID): + return str(obj) + if isinstance(obj, OutputValue | Log): + # First serialize the model + serialized = obj.model_dump() + # Then recursively truncate all values in the serialized dict + for key, value in serialized.items(): + # Handle string values directly to ensure proper truncation + if isinstance(value, str) and len(value) > max_length: + serialized[key] = f"{value[:max_length]}... [truncated]" + else: + serialized[key] = ResultDataResponse._serialize_and_truncate(value, max_length=max_length) + return serialized + if isinstance(obj, BaseModel): + # For other BaseModel instances, serialize all fields + serialized = obj.model_dump() + return { + k: ResultDataResponse._serialize_and_truncate(v, max_length=max_length) for k, v in serialized.items() + } + if isinstance(obj, dict): + return {k: ResultDataResponse._serialize_and_truncate(v, max_length=max_length) for k, v in obj.items()} + if isinstance(obj, list | tuple): + return [ResultDataResponse._serialize_and_truncate(item, max_length=max_length) for item in obj] + return obj + + @model_serializer(mode="plain") + def serialize_model(self) -> dict: + """Custom serializer for the entire model.""" + return { + "results": self.serialize_results(self.results), + "outputs": self._serialize_and_truncate(self.outputs, max_length=MAX_TEXT_LENGTH), + "logs": self._serialize_and_truncate(self.logs, max_length=MAX_TEXT_LENGTH), + "message": self._serialize_and_truncate(self.message, max_length=MAX_TEXT_LENGTH), + "artifacts": self._serialize_and_truncate(self.artifacts, max_length=MAX_TEXT_LENGTH), + "timedelta": self.timedelta, + "duration": self.duration, + "used_frozen_result": self.used_frozen_result, + } class VertexBuildResponse(BaseModel): diff --git a/src/backend/base/langflow/utils/constants.py b/src/backend/base/langflow/utils/constants.py index 1b720652a..21e39fcc7 100644 --- a/src/backend/base/langflow/utils/constants.py +++ b/src/backend/base/langflow/utils/constants.py @@ -185,4 +185,4 @@ MESSAGE_SENDER_USER = "User" MESSAGE_SENDER_NAME_AI = "AI" MESSAGE_SENDER_NAME_USER = "User" -MAX_TEXT_LENGTH = 99999 +MAX_TEXT_LENGTH = 20000 diff --git a/src/backend/tests/unit/api/v1/test_api_schemas.py b/src/backend/tests/unit/api/v1/test_api_schemas.py new file mode 100644 index 000000000..26844eb6c --- /dev/null +++ b/src/backend/tests/unit/api/v1/test_api_schemas.py @@ -0,0 +1,316 @@ +from datetime import datetime, timezone + +from hypothesis import HealthCheck, example, given, settings +from hypothesis import strategies as st +from langflow.api.v1.schemas import ResultDataResponse, VertexBuildResponse +from langflow.schema.schema import OutputValue +from langflow.services.tracing.schema import Log +from pydantic import BaseModel + +# Use a smaller test size for hypothesis +TEST_TEXT_LENGTH = 50 + + +class SampleBaseModel(BaseModel): + name: str + value: int + + +@given(st.text(min_size=TEST_TEXT_LENGTH + 1, max_size=TEST_TEXT_LENGTH * 2)) +@settings(max_examples=10) +def test_result_data_response_truncation(long_string): + """Test that ResultDataResponse properly truncates long strings.""" + response = ResultDataResponse( + results={"long_text": long_string}, + message={"text": long_string}, + ) + + response.serialize_model() + truncated = response._serialize_and_truncate(long_string, max_length=TEST_TEXT_LENGTH) + assert len(truncated) <= TEST_TEXT_LENGTH + len("... [truncated]") + assert "... [truncated]" in truncated + + +@given( + st.uuids(), + st.datetimes(timezones=st.just(timezone.utc)), + st.decimals(min_value="-1e6", max_value="1e6"), + st.text(min_size=1), + st.integers(), +) +@settings(max_examples=10) +def test_result_data_response_special_types(uuid, dt, decimal, name, value): + """Test that ResultDataResponse properly handles special data types.""" + test_model = SampleBaseModel(name=name, value=value) + + response = ResultDataResponse( + results={ + "uuid": uuid, + "datetime": dt, + "decimal": decimal, + "model": test_model, + } + ) + + serialized = response.serialize_model() + assert serialized["results"]["uuid"] == str(uuid) + # Compare timezone-aware datetimes + assert datetime.fromisoformat(serialized["results"]["datetime"]).astimezone(timezone.utc) == dt + assert isinstance(serialized["results"]["decimal"], float) + assert serialized["results"]["model"] == {"name": name, "value": value} + + +@given( + st.lists(st.text(min_size=TEST_TEXT_LENGTH + 1, max_size=TEST_TEXT_LENGTH * 2), min_size=1, max_size=2), + st.dictionaries( + keys=st.text(min_size=1, max_size=10), + values=st.text(min_size=TEST_TEXT_LENGTH + 1, max_size=TEST_TEXT_LENGTH * 2), + min_size=1, + max_size=2, + ), +) +@settings(max_examples=5, suppress_health_check=[HealthCheck.too_slow, HealthCheck.large_base_example]) +def test_result_data_response_nested_structures(long_list, long_dict): + """Test that ResultDataResponse handles nested structures correctly.""" + nested_data = { + "list": long_list, + "dict": long_dict, + } + + response = ResultDataResponse(results=nested_data) + serialized = response._serialize_and_truncate(nested_data, max_length=TEST_TEXT_LENGTH) + + # Check list items + for item in serialized["list"]: + assert len(item) <= TEST_TEXT_LENGTH + len("... [truncated]") + if len(item) > TEST_TEXT_LENGTH: + assert "... [truncated]" in item + + # Check dict values + for val in serialized["dict"].values(): + assert len(val) <= TEST_TEXT_LENGTH + len("... [truncated]") + if len(val) > TEST_TEXT_LENGTH: + assert "... [truncated]" in val + + +@given( + st.dictionaries( + keys=st.text(min_size=1, max_size=5), + values=st.text(min_size=TEST_TEXT_LENGTH + 1, max_size=TEST_TEXT_LENGTH * 2), + min_size=1, + max_size=2, + ), +) +@settings(max_examples=10) +@example( + outputs_dict={"0": "000000000000000000000000000000000000000000000000000"}, +).via("discovered failure") +@example( + outputs_dict={"0": "000000000000000000000000000000000000000000000000000000000000000000"}, +).via("discovered failure") +def test_result_data_response_outputs(outputs_dict): + """Test that ResultDataResponse properly handles and truncates outputs.""" + # Create OutputValue objects with potentially long messages + outputs = {key: OutputValue(type="text", message=value) for key, value in outputs_dict.items()} + + response = ResultDataResponse(outputs=outputs) + serialized = ResultDataResponse._serialize_and_truncate(response, max_length=TEST_TEXT_LENGTH) + + # Check outputs are properly serialized and truncated + for key, value in outputs_dict.items(): + assert key in serialized["outputs"] + serialized_output = serialized["outputs"][key] + assert serialized_output["type"] == "text" + + # Check message truncation + message = serialized_output["message"] + assert len(message) <= TEST_TEXT_LENGTH + len("... [truncated]"), f"Message length: {len(message)}" + if len(value) > TEST_TEXT_LENGTH: + assert "... [truncated]" in message + assert message.startswith(value[:TEST_TEXT_LENGTH]) + else: + assert message == value + + +@given( + st.lists( + st.text(min_size=TEST_TEXT_LENGTH + 1, max_size=TEST_TEXT_LENGTH * 2), + min_size=1, + max_size=3, + ), +) +@settings(max_examples=10) +@example( + log_messages=["000000000000000000000000000000000000000000000000000"], +).via("discovered failure") +def test_result_data_response_logs(log_messages): + """Test that ResultDataResponse properly handles and truncates logs.""" + # Create logs with long messages + logs = { + "test_node": [ + Log( + message=msg, + name="test_log", + type="test", + ) + for msg in log_messages + ] + } + + response = ResultDataResponse(logs=logs) + serialized = ResultDataResponse._serialize_and_truncate(response, max_length=TEST_TEXT_LENGTH) + + # Check logs are properly serialized and truncated + assert "test_node" in serialized["logs"] + serialized_logs = serialized["logs"]["test_node"] + + for i, log_msg in enumerate(log_messages): + serialized_log = serialized_logs[i] + assert serialized_log["name"] == "test_log" + assert serialized_log["type"] == "test" + + # Check message truncation + message = serialized_log["message"] + assert len(message) <= TEST_TEXT_LENGTH + len("... [truncated]") + if len(log_msg) > TEST_TEXT_LENGTH: + assert "... [truncated]" in message + assert message.startswith(log_msg[:TEST_TEXT_LENGTH]) + else: + assert message == log_msg + + +@given( + st.dictionaries( + keys=st.text(min_size=1, max_size=5), + values=st.text(min_size=TEST_TEXT_LENGTH + 1, max_size=TEST_TEXT_LENGTH * 2), + min_size=1, + max_size=2, + ), + st.lists( + st.text(min_size=TEST_TEXT_LENGTH + 1, max_size=TEST_TEXT_LENGTH * 2), + min_size=1, + max_size=3, + ), +) +@settings(max_examples=10) +@example( + outputs_dict={"0": "000000000000000000000000000000000000000000000000000000000000000000"}, + log_messages=["000000000000000000000000000000000000000000000000000"], +).via("discovered failure") +@example( + outputs_dict={"0": "000000000000000000000000000000000000000000000000000"}, + log_messages=["000000000000000000000000000000000000000000000000000"], +).via("discovered failure") +def test_result_data_response_combined_fields(outputs_dict, log_messages): + """Test that ResultDataResponse properly handles all fields together.""" + # Create OutputValue objects with potentially long messages + outputs = {key: OutputValue(type="text", message=value) for key, value in outputs_dict.items()} + + # Create logs with long messages + logs = { + "test_node": [ + Log( + message=msg, + name="test_log", + type="test", + ) + for msg in log_messages + ] + } + + response = ResultDataResponse( + outputs=outputs, + logs=logs, + results={"test": "value"}, + message={"text": "test"}, + artifacts={"file": "test.txt"}, + ) + serialized = ResultDataResponse._serialize_and_truncate(response, max_length=TEST_TEXT_LENGTH) + + # Check all fields are present + assert "outputs" in serialized + assert "logs" in serialized + assert "results" in serialized + assert "message" in serialized + assert "artifacts" in serialized + + # Check outputs truncation + for key, value in outputs_dict.items(): + assert key in serialized["outputs"] + serialized_output = serialized["outputs"][key] + assert serialized_output["type"] == "text" + + # Check message truncation + message = serialized_output["message"] + if len(value) > TEST_TEXT_LENGTH: + assert len(message) <= TEST_TEXT_LENGTH + len("... [truncated]") + assert "... [truncated]" in message + else: + assert message == value + + # Check logs truncation + assert "test_node" in serialized["logs"] + serialized_logs = serialized["logs"]["test_node"] + + for i, log_msg in enumerate(log_messages): + serialized_log = serialized_logs[i] + assert serialized_log["name"] == "test_log" + assert serialized_log["type"] == "test" + + # Check message truncation + message = serialized_log["message"] + if len(log_msg) > TEST_TEXT_LENGTH: + assert len(message) <= TEST_TEXT_LENGTH + len("... [truncated]") + assert "... [truncated]" in message + else: + assert message == log_msg + + +@given( + st.text(min_size=1), # build_id + st.lists(st.text()), # logs + st.text(min_size=1), # message +) +@settings(max_examples=10) +def test_vertex_build_response_serialization(build_id, log_messages, test_message): + """Test that VertexBuildResponse properly serializes its data field.""" + logs = [Log(message=msg, name="test_log", type="test") for msg in log_messages] + + result_data = ResultDataResponse( + results={"test": test_message}, + message={"text": test_message}, + logs={"node1": logs}, + ) + + response = VertexBuildResponse( + id=build_id, + valid=True, + data=result_data, + ) + + serialized = response.model_dump() + assert serialized["id"] == build_id + assert serialized["valid"] is True + assert isinstance(serialized["data"], dict) + assert serialized["data"]["results"]["test"] == test_message + + +@given(st.text(min_size=TEST_TEXT_LENGTH + 1, max_size=TEST_TEXT_LENGTH * 2)) +@settings(max_examples=10) +def test_vertex_build_response_with_long_data(long_string): + """Test that VertexBuildResponse properly handles long data in its data field.""" + result_data = ResultDataResponse( + results={"long_text": long_string}, + message={"text": long_string}, + ) + + response = VertexBuildResponse( + id="test-id", + valid=True, + data=result_data, + ) + + response.model_dump() + truncated = result_data._serialize_and_truncate(long_string, max_length=TEST_TEXT_LENGTH) + assert len(truncated) <= TEST_TEXT_LENGTH + len("... [truncated]") + assert "... [truncated]" in truncated diff --git a/uv.lock b/uv.lock index 8d4a487c1..d50afab6e 100644 --- a/uv.lock +++ b/uv.lock @@ -532,7 +532,7 @@ name = "blessed" version = "1.20.0" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "jinxed", marker = "sys_platform == 'win32'" }, + { name = "jinxed", marker = "platform_system == 'Windows'" }, { name = "six" }, { name = "wcwidth" }, ] @@ -954,7 +954,7 @@ name = "click" version = "8.1.8" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "colorama", marker = "sys_platform == 'win32'" }, + { name = "colorama", marker = "platform_system == 'Windows'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/b9/2e/0090cbf739cee7d23781ad4b89a9894a41538e4fcf4c31dcdd705b78eb8b/click-8.1.8.tar.gz", hash = "sha256:ed53c9d8990d83c2a27deae68e4ee337473f6330c040a31d4225c9574d16096a", size = 226593 } wheels = [ @@ -3000,6 +3000,20 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/d7/de/85a784bcc4a3779d1753a7ec2dee5de90e18c7bcf402e71b51fcf150b129/hyperframe-6.0.1-py3-none-any.whl", hash = "sha256:0ec6bafd80d8ad2195c4f03aacba3a8265e57bc4cff261e802bf39970ed02a15", size = 12389 }, ] +[[package]] +name = "hypothesis" +version = "6.123.17" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "attrs" }, + { name = "exceptiongroup", marker = "python_full_version < '3.11'" }, + { name = "sortedcontainers" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/15/a7/695b2bcb4e8438e1d4683efa6877fc95be293a11251471d4552d6dd08259/hypothesis-6.123.17.tar.gz", hash = "sha256:5850893975b4f08e893ddc10f1d468bc7e011d59703f70fe06a10161e426e602", size = 418572 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/11/8a/f1c166f048df4b314d0d38e9530b7af516a16160873d724bb416084d6990/hypothesis-6.123.17-py3-none-any.whl", hash = "sha256:5c949fb44935e32c61c64abfcc3975eec41f8205ade2223073ba074c1e078ead", size = 480880 }, +] + [[package]] name = "identify" version = "2.6.3" @@ -3142,7 +3156,7 @@ name = "ipykernel" version = "6.29.5" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "appnope", marker = "sys_platform == 'darwin'" }, + { name = "appnope", marker = "platform_system == 'Darwin'" }, { name = "comm" }, { name = "debugpy" }, { name = "ipython" }, @@ -3233,7 +3247,7 @@ name = "jinxed" version = "1.3.0" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "ansicon", marker = "sys_platform == 'win32'" }, + { name = "ansicon", marker = "platform_system == 'Windows'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/20/d0/59b2b80e7a52d255f9e0ad040d2e826342d05580c4b1d7d7747cfb8db731/jinxed-1.3.0.tar.gz", hash = "sha256:1593124b18a41b7a3da3b078471442e51dbad3d77b4d4f2b0c26ab6f7d660dbf", size = 80981 } wheels = [ @@ -4040,6 +4054,7 @@ dev = [ { name = "codeflash" }, { name = "dictdiffer" }, { name = "httpx" }, + { name = "hypothesis" }, { name = "ipykernel" }, { name = "mypy" }, { name = "packaging" }, @@ -4182,6 +4197,7 @@ dev = [ { name = "codeflash", specifier = ">=0.8.4" }, { name = "dictdiffer", specifier = ">=0.9.0" }, { name = "httpx", specifier = ">=0.27.0" }, + { name = "hypothesis", specifier = ">=6.123.17" }, { name = "ipykernel", specifier = ">=6.29.0" }, { name = "mypy", specifier = ">=1.11.0" }, { name = "packaging", specifier = ">=24.1,<25.0" }, @@ -6148,7 +6164,7 @@ name = "portalocker" version = "2.10.1" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "pywin32", marker = "sys_platform == 'win32'" }, + { name = "pywin32", marker = "platform_system == 'Windows'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/ed/d3/c6c64067759e87af98cc668c1cc75171347d0f1577fab7ca3749134e3cd4/portalocker-2.10.1.tar.gz", hash = "sha256:ef1bf844e878ab08aee7e40184156e1151f228f103aa5c6bd0724cc330960f8f", size = 40891 } wheels = [ @@ -8645,19 +8661,19 @@ dependencies = [ { name = "fsspec" }, { name = "jinja2" }, { name = "networkx" }, - { name = "nvidia-cublas-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-cuda-cupti-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-cuda-nvrtc-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-cuda-runtime-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-cudnn-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-cufft-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-curand-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-cusolver-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-cusparse-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-nccl-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-nvtx-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, + { name = "nvidia-cublas-cu12", marker = "platform_machine == 'x86_64' and platform_system == 'Linux'" }, + { name = "nvidia-cuda-cupti-cu12", marker = "platform_machine == 'x86_64' and platform_system == 'Linux'" }, + { name = "nvidia-cuda-nvrtc-cu12", marker = "platform_machine == 'x86_64' and platform_system == 'Linux'" }, + { name = "nvidia-cuda-runtime-cu12", marker = "platform_machine == 'x86_64' and platform_system == 'Linux'" }, + { name = "nvidia-cudnn-cu12", marker = "platform_machine == 'x86_64' and platform_system == 'Linux'" }, + { name = "nvidia-cufft-cu12", marker = "platform_machine == 'x86_64' and platform_system == 'Linux'" }, + { name = "nvidia-curand-cu12", marker = "platform_machine == 'x86_64' and platform_system == 'Linux'" }, + { name = "nvidia-cusolver-cu12", marker = "platform_machine == 'x86_64' and platform_system == 'Linux'" }, + { name = "nvidia-cusparse-cu12", marker = "platform_machine == 'x86_64' and platform_system == 'Linux'" }, + { name = "nvidia-nccl-cu12", marker = "platform_machine == 'x86_64' and platform_system == 'Linux'" }, + { name = "nvidia-nvtx-cu12", marker = "platform_machine == 'x86_64' and platform_system == 'Linux'" }, { name = "sympy" }, - { name = "triton", marker = "python_full_version < '3.13' and platform_machine == 'x86_64' and sys_platform == 'linux'" }, + { name = "triton", marker = "python_full_version < '3.13' and platform_machine == 'x86_64' and platform_system == 'Linux'" }, { name = "typing-extensions" }, ] wheels = [ @@ -8698,7 +8714,7 @@ name = "tqdm" version = "4.67.1" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "colorama", marker = "sys_platform == 'win32'" }, + { name = "colorama", marker = "platform_system == 'Windows'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/a8/4b/29b4ef32e036bb34e4ab51796dd745cdba7ed47ad142a9f4a1eb8e0c744d/tqdm-4.67.1.tar.gz", hash = "sha256:f8aef9c52c08c13a65f30ea34f4e5aac3fd1a34959879d7e59e63027286627f2", size = 169737 } wheels = [