From 00dcef2bf1d6882caf8169c2b6cf587527f96022 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=8Dtalo=20Johnny?= Date: Mon, 20 Jan 2025 18:15:07 -0300 Subject: [PATCH] feat: implement function to truncate json (#5811) * feat: implement function to truncate json * refactor: apply JSON truncation function in models * fix: update code to use the standard field serialization format * Update src/backend/base/langflow/services/database/models/transactions/model.py Co-authored-by: Gabriel Luiz Freitas Almeida * Update src/backend/base/langflow/services/database/models/transactions/model.py Co-authored-by: Gabriel Luiz Freitas Almeida * Update src/backend/base/langflow/services/database/models/vertex_builds/model.py --------- Co-authored-by: Gabriel Luiz Freitas Almeida --- .../database/models/transactions/model.py | 10 +- .../database/models/vertex_builds/model.py | 8 +- .../base/langflow/services/database/utils.py | 45 +++++++ .../unit/services/database/test_utils.py | 110 ++++++++++++++++++ 4 files changed, 167 insertions(+), 6 deletions(-) create mode 100644 src/backend/tests/unit/services/database/test_utils.py diff --git a/src/backend/base/langflow/services/database/models/transactions/model.py b/src/backend/base/langflow/services/database/models/transactions/model.py index 30478f15c..cc12e9f70 100644 --- a/src/backend/base/langflow/services/database/models/transactions/model.py +++ b/src/backend/base/langflow/services/database/models/transactions/model.py @@ -5,11 +5,11 @@ from uuid import UUID, uuid4 from pydantic import field_serializer, field_validator from sqlmodel import JSON, Column, Field, Relationship, SQLModel +from langflow.services.database.utils import truncate_json + if TYPE_CHECKING: from langflow.services.database.models.flow.model import Flow -from langflow.utils.util_strings import truncate_long_strings - class TransactionBase(SQLModel): timestamp: datetime = Field(default_factory=lambda: datetime.now(timezone.utc)) @@ -34,9 +34,13 @@ class TransactionBase(SQLModel): value = UUID(value) return value + @field_serializer("inputs") + def serialize_inputs(self, data) -> dict: + return truncate_json(data) + @field_serializer("outputs") def serialize_outputs(self, data) -> dict: - return truncate_long_strings(data) + return truncate_json(data) class TransactionTable(TransactionBase, table=True): # type: ignore[call-arg] diff --git a/src/backend/base/langflow/services/database/models/vertex_builds/model.py b/src/backend/base/langflow/services/database/models/vertex_builds/model.py index 6302589c1..d37dd14ec 100644 --- a/src/backend/base/langflow/services/database/models/vertex_builds/model.py +++ b/src/backend/base/langflow/services/database/models/vertex_builds/model.py @@ -6,6 +6,8 @@ from pydantic import BaseModel, field_serializer, field_validator from sqlalchemy import Text from sqlmodel import JSON, Column, Field, Relationship, SQLModel +from langflow.services.database.utils import truncate_json + if TYPE_CHECKING: from langflow.services.database.models.flow.model import Flow @@ -42,12 +44,12 @@ class VertexBuildBase(SQLModel): return value @field_serializer("data") - def serialize_data(self, data: dict) -> dict: - return truncate_long_strings(data) + def serialize_data(self, data) -> dict: + return truncate_json(data) @field_serializer("artifacts") def serialize_artifacts(self, data) -> dict: - return truncate_long_strings(data) + return truncate_json(data) @field_serializer("params") def serialize_params(self, data) -> str: diff --git a/src/backend/base/langflow/services/database/utils.py b/src/backend/base/langflow/services/database/utils.py index 942a11cfa..c64fbab22 100644 --- a/src/backend/base/langflow/services/database/utils.py +++ b/src/backend/base/langflow/services/database/utils.py @@ -1,5 +1,6 @@ from __future__ import annotations +import json from contextlib import asynccontextmanager from dataclasses import dataclass from typing import TYPE_CHECKING @@ -9,10 +10,54 @@ from loguru import logger from sqlmodel import text from sqlmodel.ext.asyncio.session import AsyncSession +from langflow.utils import constants + if TYPE_CHECKING: from langflow.services.database.service import DatabaseService +def truncate_json(data, *, max_size: int = constants.MAX_TEXT_LENGTH): + def calculate_size(data): + return len(json.dumps(data)) + + def shrink_to_size(data, remaining_size): + if isinstance(data, dict): + truncated = {} + for key, value in data.items(): + key_size = len(json.dumps(key)) + if remaining_size - key_size <= 0: + break + truncated[key] = shrink_to_size(value, remaining_size - key_size) + remaining_size -= len(json.dumps({key: value})) - key_size + return truncated + + if isinstance(data, list): + truncated = [] + for item in data: + if remaining_size <= len('""'): + break + truncated.append(shrink_to_size(item, remaining_size)) + remaining_size -= len(json.dumps(item)) + 1 + return truncated + + if isinstance(data, str): + max_string_length = max(remaining_size - 2, 0) + return data[:max_string_length] + "…" if max_string_length > 0 else "…" + + return data + + try: + json.dumps(data) + is_serialized = True + except Exception: # noqa: BLE001 + is_serialized = False + + if calculate_size(data) <= max_size or not is_serialized: + return data + + return shrink_to_size(data, max_size) + + async def initialize_database(*, fix_migration: bool = False) -> None: logger.debug("Initializing database") from langflow.services.deps import get_db_service diff --git a/src/backend/tests/unit/services/database/test_utils.py b/src/backend/tests/unit/services/database/test_utils.py new file mode 100644 index 000000000..07b40ecec --- /dev/null +++ b/src/backend/tests/unit/services/database/test_utils.py @@ -0,0 +1,110 @@ +import json + +import pytest +from langflow.services.database.utils import truncate_json + + +@pytest.fixture +def small_json(): + return [ + {"name": "Cole Ramos", "email": "egestas.fusce.aliquet@google.couk"}, + {"name": "Chancellor Torres", "email": "lorem.eu@hotmail.com"}, + {"name": "Deanna Lyons", "email": "neque.venenatis.lacus@outlook.couk"}, + {"name": "Ruby O'connor", "email": "lectus.justo.eu@hotmail.couk"}, + {"name": "Iona Dorsey", "email": "rutrum@yahoo.org"}, + ] + + +@pytest.fixture +def large_json(): + return [ + { + "name": "Nash Briggs", + "phone": "1-827-252-5669", + "email": "magna.ut@icloud.edu", + "address": "847-2983 Vel Rd.", + "list": 5, + "country": "South Korea", + "region": "Gilgit Baltistan", + "postalZip": "6088-8521", + "text": "ipsum. Curabitur consequat, lectus sit amet luctus vulputate, nisi sem", + "alphanumeric": "OJG47QKX4DO", + "currency": "$46.88", + "numberrange": 6, + }, + { + "name": "Keefe Cooley", + "phone": "(164) 954-5395", + "email": "congue.turpis.in@protonmail.ca", + "address": "Ap #674-3382 Egestas. St.", + "list": 3, + "country": "Spain", + "region": "Antioquia", + "postalZip": "42452", + "text": "nisl. Nulla eu neque pellentesque massa lobortis ultrices. Vivamus rhoncus.", + "alphanumeric": "FIE81ZDK2RI", + "currency": "$37.74", + "numberrange": 3, + }, + { + "name": "Randall Booth", + "phone": "(762) 778-9833", + "email": "a@icloud.edu", + "address": "Ap #116-8418 Nec Ave", + "list": 9, + "country": "Norway", + "region": "Prince Edward Island", + "postalZip": "39155", + "text": "tempor arcu. Vestibulum ut eros non enim commodo hendrerit. Donec", + "alphanumeric": "GMF33SGB4XD", + "currency": "$87.24", + "numberrange": 0, + }, + { + "name": "Aurora Mooney", + "phone": "(626) 435-3885", + "email": "morbi.sit.amet@icloud.org", + "address": "837-8038 Duis Rd.", + "list": 15, + "country": "United States", + "region": "West Sulawesi", + "postalZip": "84466-29328", + "text": "metus eu erat semper rutrum. Fusce dolor quam, elementum at,", + "alphanumeric": "CVK31QJA8GZ", + "currency": "$85.97", + "numberrange": 1, + }, + { + "name": "Irma Snider", + "phone": "1-682-186-4584", + "email": "senectus.et@hotmail.org", + "address": "718-8593 Mauris. Avenue", + "list": 13, + "country": "Italy", + "region": "East Region", + "postalZip": "47178", + "text": "Cras convallis convallis dolor. Quisque tincidunt pede ac urna. Ut", + "alphanumeric": "KXR03TWX8QA", + "currency": "$65.54", + "numberrange": 3, + }, + ] + + +def test_truncate_json__small_case(small_json): + max_size = 400 + + result = truncate_json(small_json, max_size=max_size) + + assert len(str(small_json)) < max_size, "small_json must be smaller than max_size" + assert result == small_json, "small_json should not be truncated" + + +def test_truncate_json__large_case(large_json): + max_size = 1000 + + result = truncate_json(large_json, max_size=max_size) + + assert len(str(large_json)) > max_size, "large_json must be larger than max_size" + assert len(str(result)) < len(str(large_json)), "result must be smaller than large_json" + assert json.dumps(result), "result must be a valid JSON object"