feat: implement function to truncate json (#5811)

* feat: implement function to truncate json

* refactor: apply JSON truncation function in models

* fix: update code to use the standard field serialization format

* Update src/backend/base/langflow/services/database/models/transactions/model.py

Co-authored-by: Gabriel Luiz Freitas Almeida <gabriel@langflow.org>

* Update src/backend/base/langflow/services/database/models/transactions/model.py

Co-authored-by: Gabriel Luiz Freitas Almeida <gabriel@langflow.org>

* Update src/backend/base/langflow/services/database/models/vertex_builds/model.py

---------

Co-authored-by: Gabriel Luiz Freitas Almeida <gabriel@langflow.org>
This commit is contained in:
Ítalo Johnny 2025-01-20 18:15:07 -03:00 committed by GitHub
commit 00dcef2bf1
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 167 additions and 6 deletions

View file

@ -5,11 +5,11 @@ from uuid import UUID, uuid4
from pydantic import field_serializer, field_validator
from sqlmodel import JSON, Column, Field, Relationship, SQLModel
from langflow.services.database.utils import truncate_json
if TYPE_CHECKING:
from langflow.services.database.models.flow.model import Flow
from langflow.utils.util_strings import truncate_long_strings
class TransactionBase(SQLModel):
timestamp: datetime = Field(default_factory=lambda: datetime.now(timezone.utc))
@ -34,9 +34,13 @@ class TransactionBase(SQLModel):
value = UUID(value)
return value
@field_serializer("inputs")
def serialize_inputs(self, data) -> dict:
return truncate_json(data)
@field_serializer("outputs")
def serialize_outputs(self, data) -> dict:
return truncate_long_strings(data)
return truncate_json(data)
class TransactionTable(TransactionBase, table=True): # type: ignore[call-arg]

View file

@ -6,6 +6,8 @@ from pydantic import BaseModel, field_serializer, field_validator
from sqlalchemy import Text
from sqlmodel import JSON, Column, Field, Relationship, SQLModel
from langflow.services.database.utils import truncate_json
if TYPE_CHECKING:
from langflow.services.database.models.flow.model import Flow
@ -42,12 +44,12 @@ class VertexBuildBase(SQLModel):
return value
@field_serializer("data")
def serialize_data(self, data: dict) -> dict:
return truncate_long_strings(data)
def serialize_data(self, data) -> dict:
return truncate_json(data)
@field_serializer("artifacts")
def serialize_artifacts(self, data) -> dict:
return truncate_long_strings(data)
return truncate_json(data)
@field_serializer("params")
def serialize_params(self, data) -> str:

View file

@ -1,5 +1,6 @@
from __future__ import annotations
import json
from contextlib import asynccontextmanager
from dataclasses import dataclass
from typing import TYPE_CHECKING
@ -9,10 +10,54 @@ from loguru import logger
from sqlmodel import text
from sqlmodel.ext.asyncio.session import AsyncSession
from langflow.utils import constants
if TYPE_CHECKING:
from langflow.services.database.service import DatabaseService
def truncate_json(data, *, max_size: int = constants.MAX_TEXT_LENGTH):
def calculate_size(data):
return len(json.dumps(data))
def shrink_to_size(data, remaining_size):
if isinstance(data, dict):
truncated = {}
for key, value in data.items():
key_size = len(json.dumps(key))
if remaining_size - key_size <= 0:
break
truncated[key] = shrink_to_size(value, remaining_size - key_size)
remaining_size -= len(json.dumps({key: value})) - key_size
return truncated
if isinstance(data, list):
truncated = []
for item in data:
if remaining_size <= len('""'):
break
truncated.append(shrink_to_size(item, remaining_size))
remaining_size -= len(json.dumps(item)) + 1
return truncated
if isinstance(data, str):
max_string_length = max(remaining_size - 2, 0)
return data[:max_string_length] + "" if max_string_length > 0 else ""
return data
try:
json.dumps(data)
is_serialized = True
except Exception: # noqa: BLE001
is_serialized = False
if calculate_size(data) <= max_size or not is_serialized:
return data
return shrink_to_size(data, max_size)
async def initialize_database(*, fix_migration: bool = False) -> None:
logger.debug("Initializing database")
from langflow.services.deps import get_db_service

View file

@ -0,0 +1,110 @@
import json
import pytest
from langflow.services.database.utils import truncate_json
@pytest.fixture
def small_json():
return [
{"name": "Cole Ramos", "email": "egestas.fusce.aliquet@google.couk"},
{"name": "Chancellor Torres", "email": "lorem.eu@hotmail.com"},
{"name": "Deanna Lyons", "email": "neque.venenatis.lacus@outlook.couk"},
{"name": "Ruby O'connor", "email": "lectus.justo.eu@hotmail.couk"},
{"name": "Iona Dorsey", "email": "rutrum@yahoo.org"},
]
@pytest.fixture
def large_json():
return [
{
"name": "Nash Briggs",
"phone": "1-827-252-5669",
"email": "magna.ut@icloud.edu",
"address": "847-2983 Vel Rd.",
"list": 5,
"country": "South Korea",
"region": "Gilgit Baltistan",
"postalZip": "6088-8521",
"text": "ipsum. Curabitur consequat, lectus sit amet luctus vulputate, nisi sem",
"alphanumeric": "OJG47QKX4DO",
"currency": "$46.88",
"numberrange": 6,
},
{
"name": "Keefe Cooley",
"phone": "(164) 954-5395",
"email": "congue.turpis.in@protonmail.ca",
"address": "Ap #674-3382 Egestas. St.",
"list": 3,
"country": "Spain",
"region": "Antioquia",
"postalZip": "42452",
"text": "nisl. Nulla eu neque pellentesque massa lobortis ultrices. Vivamus rhoncus.",
"alphanumeric": "FIE81ZDK2RI",
"currency": "$37.74",
"numberrange": 3,
},
{
"name": "Randall Booth",
"phone": "(762) 778-9833",
"email": "a@icloud.edu",
"address": "Ap #116-8418 Nec Ave",
"list": 9,
"country": "Norway",
"region": "Prince Edward Island",
"postalZip": "39155",
"text": "tempor arcu. Vestibulum ut eros non enim commodo hendrerit. Donec",
"alphanumeric": "GMF33SGB4XD",
"currency": "$87.24",
"numberrange": 0,
},
{
"name": "Aurora Mooney",
"phone": "(626) 435-3885",
"email": "morbi.sit.amet@icloud.org",
"address": "837-8038 Duis Rd.",
"list": 15,
"country": "United States",
"region": "West Sulawesi",
"postalZip": "84466-29328",
"text": "metus eu erat semper rutrum. Fusce dolor quam, elementum at,",
"alphanumeric": "CVK31QJA8GZ",
"currency": "$85.97",
"numberrange": 1,
},
{
"name": "Irma Snider",
"phone": "1-682-186-4584",
"email": "senectus.et@hotmail.org",
"address": "718-8593 Mauris. Avenue",
"list": 13,
"country": "Italy",
"region": "East Region",
"postalZip": "47178",
"text": "Cras convallis convallis dolor. Quisque tincidunt pede ac urna. Ut",
"alphanumeric": "KXR03TWX8QA",
"currency": "$65.54",
"numberrange": 3,
},
]
def test_truncate_json__small_case(small_json):
max_size = 400
result = truncate_json(small_json, max_size=max_size)
assert len(str(small_json)) < max_size, "small_json must be smaller than max_size"
assert result == small_json, "small_json should not be truncated"
def test_truncate_json__large_case(large_json):
max_size = 1000
result = truncate_json(large_json, max_size=max_size)
assert len(str(large_json)) > max_size, "large_json must be larger than max_size"
assert len(str(result)) < len(str(large_json)), "result must be smaller than large_json"
assert json.dumps(result), "result must be a valid JSON object"