fix: make messages appear in the correct order again (#5309)

This commit is contained in:
Gabriel Luiz Freitas Almeida 2024-12-17 22:45:22 -03:00 committed by GitHub
commit 5a8d73c5b2
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 189 additions and 26 deletions

View file

@ -22,7 +22,7 @@ from langflow.schema.content_types import ErrorContent
from langflow.schema.data import Data
from langflow.schema.image import Image, get_file_paths, is_image_file
from langflow.schema.properties import Properties, Source
from langflow.schema.validators import timestamp_to_str_validator
from langflow.schema.validators import timestamp_to_str, timestamp_to_str_validator
from langflow.utils.constants import (
MESSAGE_SENDER_AI,
MESSAGE_SENDER_NAME_AI,
@ -91,7 +91,7 @@ class Message(Data):
def serialize_timestamp(self, value):
try:
# Try parsing with timezone
return datetime.strptime(value.strip(), "%Y-%m-%d %H:%M:%S %Z").astimezone(timezone.utc)
return datetime.strptime(value.strip(), "%Y-%m-%d %H:%M:%S %Z").replace(tzinfo=timezone.utc)
except ValueError:
# Try parsing without timezone
return datetime.strptime(value.strip(), "%Y-%m-%d %H:%M:%S").replace(tzinfo=timezone.utc)
@ -311,6 +311,24 @@ class MessageResponse(DefaultModel):
category: str | None = None
content_blocks: list[ContentBlock] | None = None
@field_validator("content_blocks", mode="before")
@classmethod
def validate_content_blocks(cls, v):
if isinstance(v, str):
v = json.loads(v)
if isinstance(v, list):
return [cls.validate_content_blocks(block) for block in v]
if isinstance(v, dict):
return ContentBlock.model_validate(v)
return v
@field_validator("properties", mode="before")
@classmethod
def validate_properties(cls, v):
if isinstance(v, str):
v = json.loads(v)
return v
@field_validator("files", mode="before")
@classmethod
def validate_files(cls, v):
@ -321,8 +339,7 @@ class MessageResponse(DefaultModel):
@field_serializer("timestamp")
@classmethod
def serialize_timestamp(cls, v):
v = v.replace(microsecond=0)
return v.strftime("%Y-%m-%d %H:%M:%S %Z")
return timestamp_to_str(v)
@field_serializer("files")
@classmethod

View file

@ -1,35 +1,114 @@
from datetime import datetime
from datetime import datetime, timezone
from pydantic import BeforeValidator
def timestamp_to_str(timestamp: datetime | str) -> str:
"""Convert timestamp to standardized string format.
Handles multiple input formats and ensures consistent UTC timezone output.
Args:
timestamp (datetime | str): Input timestamp either as datetime object or string
Returns:
str: Formatted timestamp string in 'YYYY-MM-DD HH:MM:SS UTC' format
Raises:
ValueError: If string timestamp is in invalid format
"""
if isinstance(timestamp, str):
# Try parsing with different formats
formats = [
"%Y-%m-%dT%H:%M:%S", # ISO format
"%Y-%m-%d %H:%M:%S %Z", # Standard with timezone
"%Y-%m-%d %H:%M:%S", # Without timezone
"%Y-%m-%dT%H:%M:%S.%f", # ISO with microseconds
"%Y-%m-%dT%H:%M:%S%z", # ISO with numeric timezone
]
for fmt in formats:
try:
parsed = datetime.strptime(timestamp.strip(), fmt).replace(tzinfo=timezone.utc)
return parsed.strftime("%Y-%m-%d %H:%M:%S %Z")
except ValueError:
continue
msg = f"Invalid timestamp format: {timestamp}"
raise ValueError(msg)
# Handle datetime object
if timestamp.tzinfo is None:
timestamp = timestamp.replace(tzinfo=timezone.utc)
return timestamp.strftime("%Y-%m-%d %H:%M:%S %Z")
def str_to_timestamp(timestamp: str | datetime) -> datetime:
"""Convert timestamp to datetime object.
Handles multiple input formats and ensures consistent UTC timezone output.
Args:
timestamp (str | datetime): Input timestamp either as string or datetime object
Returns:
datetime: Datetime object with UTC timezone
Raises:
ValueError: If string timestamp is not in 'YYYY-MM-DD HH:MM:SS UTC' format
"""
if isinstance(timestamp, str):
# Just check if the string is a valid datetime
try:
datetime.strptime(timestamp, "%Y-%m-%d %H:%M:%S %Z") # noqa: DTZ007
result = timestamp
return datetime.strptime(timestamp, "%Y-%m-%d %H:%M:%S %Z").replace(tzinfo=timezone.utc)
except ValueError as e:
msg = f"Invalid timestamp: {timestamp}"
msg = f"Invalid timestamp format: {timestamp}. Expected format: YYYY-MM-DD HH:MM:SS UTC"
raise ValueError(msg) from e
else:
result = timestamp.strftime("%Y-%m-%d %H:%M:%S %Z")
return result
return timestamp
def timestamp_with_fractional_seconds(timestamp: datetime | str) -> str:
"""Convert timestamp to string format including fractional seconds.
Handles multiple input formats and ensures consistent UTC timezone output.
Args:
timestamp (datetime | str): Input timestamp either as datetime object or string
Returns:
str: Formatted timestamp string in 'YYYY-MM-DD HH:MM:SS.ffffff UTC' format
Raises:
ValueError: If string timestamp is in invalid format
"""
if isinstance(timestamp, str):
# Just check if the string is a valid datetime
try:
datetime.strptime(timestamp, "%Y-%m-%d %H:%M:%S.%f %Z") # noqa: DTZ007
result = timestamp
except ValueError as e:
msg = f"Invalid timestamp: {timestamp}"
raise ValueError(msg) from e
else:
result = timestamp.strftime("%Y-%m-%d %H:%M:%S.%f %Z")
return result
# Try parsing with different formats
formats = [
"%Y-%m-%d %H:%M:%S.%f %Z", # Standard with timezone
"%Y-%m-%d %H:%M:%S.%f", # Without timezone
"%Y-%m-%dT%H:%M:%S.%f", # ISO format
"%Y-%m-%dT%H:%M:%S.%f%z", # ISO with numeric timezone
# Also try without fractional seconds
"%Y-%m-%d %H:%M:%S %Z",
"%Y-%m-%d %H:%M:%S",
"%Y-%m-%dT%H:%M:%S",
]
for fmt in formats:
try:
parsed = datetime.strptime(timestamp.strip(), fmt).replace(tzinfo=timezone.utc)
return parsed.strftime("%Y-%m-%d %H:%M:%S.%f %Z")
except ValueError:
continue
msg = f"Invalid timestamp format: {timestamp}"
raise ValueError(msg)
# Handle datetime object
if timestamp.tzinfo is None:
timestamp = timestamp.replace(tzinfo=timezone.utc)
return timestamp.strftime("%Y-%m-%d %H:%M:%S.%f %Z")
timestamp_to_str_validator = BeforeValidator(timestamp_to_str)
timestamp_with_fractional_seconds_validator = BeforeValidator(timestamp_with_fractional_seconds)
str_to_timestamp_validator = BeforeValidator(str_to_timestamp)

View file

@ -1,6 +1,6 @@
import json
from datetime import datetime, timezone
from typing import TYPE_CHECKING
from typing import TYPE_CHECKING, Annotated
from uuid import UUID, uuid4
from pydantic import field_serializer, field_validator
@ -9,6 +9,7 @@ from sqlmodel import JSON, Column, Field, Relationship, SQLModel
from langflow.schema.content_block import ContentBlock
from langflow.schema.properties import Properties
from langflow.schema.validators import str_to_timestamp_validator
if TYPE_CHECKING:
from langflow.schema.message import Message
@ -16,7 +17,9 @@ if TYPE_CHECKING:
class MessageBase(SQLModel):
timestamp: datetime = Field(default_factory=lambda: datetime.now(timezone.utc))
timestamp: Annotated[datetime, str_to_timestamp_validator] = Field(
default_factory=lambda: datetime.now(timezone.utc)
)
sender: str
sender_name: str
session_id: str
@ -29,6 +32,13 @@ class MessageBase(SQLModel):
category: str = Field(default="message")
content_blocks: list[ContentBlock] = Field(default_factory=list)
@field_validator("timestamp", mode="before")
@classmethod
def validate_timestamp(cls, value):
if isinstance(value, str):
return datetime.fromisoformat(value)
return value
@field_validator("files", mode="before")
@classmethod
def validate_files(cls, value):
@ -110,6 +120,25 @@ class MessageTable(MessageBase, table=True): # type: ignore[call-arg]
category: str = Field(sa_column=Column(Text))
content_blocks: list[ContentBlock] = Field(default_factory=list, sa_column=Column(JSON)) # type: ignore[assignment]
# We need to make sure the datetimes have timezone after running session.refresh
# because we are losing the timezone information when we save the message to the database
# and when we read it back. We use field_validator to make sure the datetimes have timezone
# after running session.refresh
@field_validator("timestamp", mode="after")
@classmethod
def validate_timestamp(cls, value):
if isinstance(value, datetime):
return value.replace(tzinfo=timezone.utc)
return value
@field_serializer("timestamp")
def serialize_timestamp(self, value, _info):
if isinstance(value, datetime):
if value.tzinfo is None:
value = value.replace(tzinfo=timezone.utc)
return value.strftime("%Y-%m-%d %H:%M:%S %Z")
return value
@field_validator("flow_id", mode="before")
@classmethod
def validate_flow_id(cls, value):
@ -136,6 +165,8 @@ class MessageTable(MessageBase, table=True): # type: ignore[call-arg]
return [self.serialize_properties_or_content_blocks(item) for item in value]
if hasattr(value, "model_dump"):
return value.model_dump()
if isinstance(value, str):
return json.loads(value)
return value
# Needed for Column(JSON)

View file

@ -1,4 +1,5 @@
import shutil
from datetime import datetime, timezone
from pathlib import Path
import pytest
@ -154,11 +155,16 @@ def test_message_without_sender():
def test_message_serialization():
"""Test message serialization to dict."""
# Create a timestamp with timezone
message = Message(text="Test message", sender=MESSAGE_SENDER_USER)
timestamp_str = message.timestamp
timestamp = datetime.strptime(timestamp_str, "%Y-%m-%d %H:%M:%S %Z").replace(tzinfo=timezone.utc)
serialized = message.model_dump()
assert serialized["text"] == "Test message"
assert serialized["sender"] == MESSAGE_SENDER_USER
assert serialized["timestamp"] == timestamp
assert serialized["timestamp"].tzinfo == timezone.utc
def test_message_to_lc_without_sender():
@ -170,6 +176,24 @@ def test_message_to_lc_without_sender():
assert lc_message.content == "Test message"
def test_timestamp_serialization():
"""Test timestamp serialization with different formats."""
# Test with timezone
msg1 = Message(text="Test message", sender=MESSAGE_SENDER_USER, timestamp="2023-12-25 15:30:45 UTC")
serialized1 = msg1.model_dump()
assert serialized1["timestamp"].tzinfo == timezone.utc
# Test without timezone
msg2 = Message(text="Test message", sender=MESSAGE_SENDER_USER, timestamp="2023-12-25 15:30:45")
serialized2 = msg2.model_dump()
assert serialized2["timestamp"].tzinfo == timezone.utc
# Test that both formats result in equivalent UTC times when appropriate
msg_with_tz = Message(text="Test message", sender=MESSAGE_SENDER_USER, timestamp="2023-12-25 15:30:45 UTC")
msg_without_tz = Message(text="Test message", sender=MESSAGE_SENDER_USER, timestamp="2023-12-25 15:30:45")
assert msg_with_tz.model_dump()["timestamp"] == msg_without_tz.model_dump()["timestamp"]
# Clean up the cache directory after all tests
@pytest.fixture(autouse=True)
def cleanup():

View file

@ -1,3 +1,4 @@
from datetime import datetime, timezone
from uuid import UUID
import pytest
@ -25,7 +26,7 @@ async def created_messages(session): # noqa: ARG001
messages = [
MessageCreate(text="Test message 1", sender="User", sender_name="User", session_id="session_id2"),
MessageCreate(text="Test message 2", sender="User", sender_name="User", session_id="session_id2"),
MessageCreate(text="Test message 3", sender="User", sender_name="User", session_id="session_id2"),
MessageCreate(text="Test message 3", sender="AI", sender_name="AI", session_id="session_id2"),
]
messagetables = [MessageTable.model_validate(message, from_attributes=True) for message in messages]
return await aadd_messagetables(messagetables, _session)
@ -99,8 +100,19 @@ async def test_successfully_update_session_id(client, logged_in_headers, created
)
assert response.status_code == 200
assert len(response.json()) == len(created_messages)
for message in response.json():
messages = response.json()
for message in messages:
assert message["session_id"] == new_session_id
response_timestamp = message["timestamp"]
timestamp = datetime.strptime(response_timestamp, "%Y-%m-%d %H:%M:%S %Z").replace(tzinfo=timezone.utc)
timestamp_str = timestamp.strftime("%Y-%m-%d %H:%M:%S %Z")
assert timestamp_str == response_timestamp
# Check if the messages ordered by timestamp are in the correct order
# User, User, AI
assert messages[0]["sender"] == "User"
assert messages[1]["sender"] == "User"
assert messages[2]["sender"] == "AI"
# No messages found with the given session ID