feat: implement function to truncate json (#5811)
* feat: implement function to truncate json * refactor: apply JSON truncation function in models * fix: update code to use the standard field serialization format * Update src/backend/base/langflow/services/database/models/transactions/model.py Co-authored-by: Gabriel Luiz Freitas Almeida <gabriel@langflow.org> * Update src/backend/base/langflow/services/database/models/transactions/model.py Co-authored-by: Gabriel Luiz Freitas Almeida <gabriel@langflow.org> * Update src/backend/base/langflow/services/database/models/vertex_builds/model.py --------- Co-authored-by: Gabriel Luiz Freitas Almeida <gabriel@langflow.org>
This commit is contained in:
parent
44266f593b
commit
00dcef2bf1
4 changed files with 167 additions and 6 deletions
|
|
@ -5,11 +5,11 @@ from uuid import UUID, uuid4
|
|||
from pydantic import field_serializer, field_validator
|
||||
from sqlmodel import JSON, Column, Field, Relationship, SQLModel
|
||||
|
||||
from langflow.services.database.utils import truncate_json
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from langflow.services.database.models.flow.model import Flow
|
||||
|
||||
from langflow.utils.util_strings import truncate_long_strings
|
||||
|
||||
|
||||
class TransactionBase(SQLModel):
|
||||
timestamp: datetime = Field(default_factory=lambda: datetime.now(timezone.utc))
|
||||
|
|
@ -34,9 +34,13 @@ class TransactionBase(SQLModel):
|
|||
value = UUID(value)
|
||||
return value
|
||||
|
||||
@field_serializer("inputs")
|
||||
def serialize_inputs(self, data) -> dict:
|
||||
return truncate_json(data)
|
||||
|
||||
@field_serializer("outputs")
|
||||
def serialize_outputs(self, data) -> dict:
|
||||
return truncate_long_strings(data)
|
||||
return truncate_json(data)
|
||||
|
||||
|
||||
class TransactionTable(TransactionBase, table=True): # type: ignore[call-arg]
|
||||
|
|
|
|||
|
|
@ -6,6 +6,8 @@ from pydantic import BaseModel, field_serializer, field_validator
|
|||
from sqlalchemy import Text
|
||||
from sqlmodel import JSON, Column, Field, Relationship, SQLModel
|
||||
|
||||
from langflow.services.database.utils import truncate_json
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from langflow.services.database.models.flow.model import Flow
|
||||
|
||||
|
|
@ -42,12 +44,12 @@ class VertexBuildBase(SQLModel):
|
|||
return value
|
||||
|
||||
@field_serializer("data")
|
||||
def serialize_data(self, data: dict) -> dict:
|
||||
return truncate_long_strings(data)
|
||||
def serialize_data(self, data) -> dict:
|
||||
return truncate_json(data)
|
||||
|
||||
@field_serializer("artifacts")
|
||||
def serialize_artifacts(self, data) -> dict:
|
||||
return truncate_long_strings(data)
|
||||
return truncate_json(data)
|
||||
|
||||
@field_serializer("params")
|
||||
def serialize_params(self, data) -> str:
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from contextlib import asynccontextmanager
|
||||
from dataclasses import dataclass
|
||||
from typing import TYPE_CHECKING
|
||||
|
|
@ -9,10 +10,54 @@ from loguru import logger
|
|||
from sqlmodel import text
|
||||
from sqlmodel.ext.asyncio.session import AsyncSession
|
||||
|
||||
from langflow.utils import constants
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from langflow.services.database.service import DatabaseService
|
||||
|
||||
|
||||
def truncate_json(data, *, max_size: int = constants.MAX_TEXT_LENGTH):
|
||||
def calculate_size(data):
|
||||
return len(json.dumps(data))
|
||||
|
||||
def shrink_to_size(data, remaining_size):
|
||||
if isinstance(data, dict):
|
||||
truncated = {}
|
||||
for key, value in data.items():
|
||||
key_size = len(json.dumps(key))
|
||||
if remaining_size - key_size <= 0:
|
||||
break
|
||||
truncated[key] = shrink_to_size(value, remaining_size - key_size)
|
||||
remaining_size -= len(json.dumps({key: value})) - key_size
|
||||
return truncated
|
||||
|
||||
if isinstance(data, list):
|
||||
truncated = []
|
||||
for item in data:
|
||||
if remaining_size <= len('""'):
|
||||
break
|
||||
truncated.append(shrink_to_size(item, remaining_size))
|
||||
remaining_size -= len(json.dumps(item)) + 1
|
||||
return truncated
|
||||
|
||||
if isinstance(data, str):
|
||||
max_string_length = max(remaining_size - 2, 0)
|
||||
return data[:max_string_length] + "…" if max_string_length > 0 else "…"
|
||||
|
||||
return data
|
||||
|
||||
try:
|
||||
json.dumps(data)
|
||||
is_serialized = True
|
||||
except Exception: # noqa: BLE001
|
||||
is_serialized = False
|
||||
|
||||
if calculate_size(data) <= max_size or not is_serialized:
|
||||
return data
|
||||
|
||||
return shrink_to_size(data, max_size)
|
||||
|
||||
|
||||
async def initialize_database(*, fix_migration: bool = False) -> None:
|
||||
logger.debug("Initializing database")
|
||||
from langflow.services.deps import get_db_service
|
||||
|
|
|
|||
110
src/backend/tests/unit/services/database/test_utils.py
Normal file
110
src/backend/tests/unit/services/database/test_utils.py
Normal file
|
|
@ -0,0 +1,110 @@
|
|||
import json
|
||||
|
||||
import pytest
|
||||
from langflow.services.database.utils import truncate_json
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def small_json():
|
||||
return [
|
||||
{"name": "Cole Ramos", "email": "egestas.fusce.aliquet@google.couk"},
|
||||
{"name": "Chancellor Torres", "email": "lorem.eu@hotmail.com"},
|
||||
{"name": "Deanna Lyons", "email": "neque.venenatis.lacus@outlook.couk"},
|
||||
{"name": "Ruby O'connor", "email": "lectus.justo.eu@hotmail.couk"},
|
||||
{"name": "Iona Dorsey", "email": "rutrum@yahoo.org"},
|
||||
]
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def large_json():
|
||||
return [
|
||||
{
|
||||
"name": "Nash Briggs",
|
||||
"phone": "1-827-252-5669",
|
||||
"email": "magna.ut@icloud.edu",
|
||||
"address": "847-2983 Vel Rd.",
|
||||
"list": 5,
|
||||
"country": "South Korea",
|
||||
"region": "Gilgit Baltistan",
|
||||
"postalZip": "6088-8521",
|
||||
"text": "ipsum. Curabitur consequat, lectus sit amet luctus vulputate, nisi sem",
|
||||
"alphanumeric": "OJG47QKX4DO",
|
||||
"currency": "$46.88",
|
||||
"numberrange": 6,
|
||||
},
|
||||
{
|
||||
"name": "Keefe Cooley",
|
||||
"phone": "(164) 954-5395",
|
||||
"email": "congue.turpis.in@protonmail.ca",
|
||||
"address": "Ap #674-3382 Egestas. St.",
|
||||
"list": 3,
|
||||
"country": "Spain",
|
||||
"region": "Antioquia",
|
||||
"postalZip": "42452",
|
||||
"text": "nisl. Nulla eu neque pellentesque massa lobortis ultrices. Vivamus rhoncus.",
|
||||
"alphanumeric": "FIE81ZDK2RI",
|
||||
"currency": "$37.74",
|
||||
"numberrange": 3,
|
||||
},
|
||||
{
|
||||
"name": "Randall Booth",
|
||||
"phone": "(762) 778-9833",
|
||||
"email": "a@icloud.edu",
|
||||
"address": "Ap #116-8418 Nec Ave",
|
||||
"list": 9,
|
||||
"country": "Norway",
|
||||
"region": "Prince Edward Island",
|
||||
"postalZip": "39155",
|
||||
"text": "tempor arcu. Vestibulum ut eros non enim commodo hendrerit. Donec",
|
||||
"alphanumeric": "GMF33SGB4XD",
|
||||
"currency": "$87.24",
|
||||
"numberrange": 0,
|
||||
},
|
||||
{
|
||||
"name": "Aurora Mooney",
|
||||
"phone": "(626) 435-3885",
|
||||
"email": "morbi.sit.amet@icloud.org",
|
||||
"address": "837-8038 Duis Rd.",
|
||||
"list": 15,
|
||||
"country": "United States",
|
||||
"region": "West Sulawesi",
|
||||
"postalZip": "84466-29328",
|
||||
"text": "metus eu erat semper rutrum. Fusce dolor quam, elementum at,",
|
||||
"alphanumeric": "CVK31QJA8GZ",
|
||||
"currency": "$85.97",
|
||||
"numberrange": 1,
|
||||
},
|
||||
{
|
||||
"name": "Irma Snider",
|
||||
"phone": "1-682-186-4584",
|
||||
"email": "senectus.et@hotmail.org",
|
||||
"address": "718-8593 Mauris. Avenue",
|
||||
"list": 13,
|
||||
"country": "Italy",
|
||||
"region": "East Region",
|
||||
"postalZip": "47178",
|
||||
"text": "Cras convallis convallis dolor. Quisque tincidunt pede ac urna. Ut",
|
||||
"alphanumeric": "KXR03TWX8QA",
|
||||
"currency": "$65.54",
|
||||
"numberrange": 3,
|
||||
},
|
||||
]
|
||||
|
||||
|
||||
def test_truncate_json__small_case(small_json):
|
||||
max_size = 400
|
||||
|
||||
result = truncate_json(small_json, max_size=max_size)
|
||||
|
||||
assert len(str(small_json)) < max_size, "small_json must be smaller than max_size"
|
||||
assert result == small_json, "small_json should not be truncated"
|
||||
|
||||
|
||||
def test_truncate_json__large_case(large_json):
|
||||
max_size = 1000
|
||||
|
||||
result = truncate_json(large_json, max_size=max_size)
|
||||
|
||||
assert len(str(large_json)) > max_size, "large_json must be larger than max_size"
|
||||
assert len(str(result)) < len(str(large_json)), "result must be smaller than large_json"
|
||||
assert json.dumps(result), "result must be a valid JSON object"
|
||||
Loading…
Add table
Add a link
Reference in a new issue