diff --git a/src/backend/base/langflow/schema/message.py b/src/backend/base/langflow/schema/message.py index f966fae8e..16f0fa847 100644 --- a/src/backend/base/langflow/schema/message.py +++ b/src/backend/base/langflow/schema/message.py @@ -22,7 +22,7 @@ from langflow.schema.content_types import ErrorContent from langflow.schema.data import Data from langflow.schema.image import Image, get_file_paths, is_image_file from langflow.schema.properties import Properties, Source -from langflow.schema.validators import timestamp_to_str_validator +from langflow.schema.validators import timestamp_to_str, timestamp_to_str_validator from langflow.utils.constants import ( MESSAGE_SENDER_AI, MESSAGE_SENDER_NAME_AI, @@ -91,7 +91,7 @@ class Message(Data): def serialize_timestamp(self, value): try: # Try parsing with timezone - return datetime.strptime(value.strip(), "%Y-%m-%d %H:%M:%S %Z").astimezone(timezone.utc) + return datetime.strptime(value.strip(), "%Y-%m-%d %H:%M:%S %Z").replace(tzinfo=timezone.utc) except ValueError: # Try parsing without timezone return datetime.strptime(value.strip(), "%Y-%m-%d %H:%M:%S").replace(tzinfo=timezone.utc) @@ -311,6 +311,24 @@ class MessageResponse(DefaultModel): category: str | None = None content_blocks: list[ContentBlock] | None = None + @field_validator("content_blocks", mode="before") + @classmethod + def validate_content_blocks(cls, v): + if isinstance(v, str): + v = json.loads(v) + if isinstance(v, list): + return [cls.validate_content_blocks(block) for block in v] + if isinstance(v, dict): + return ContentBlock.model_validate(v) + return v + + @field_validator("properties", mode="before") + @classmethod + def validate_properties(cls, v): + if isinstance(v, str): + v = json.loads(v) + return v + @field_validator("files", mode="before") @classmethod def validate_files(cls, v): @@ -321,8 +339,7 @@ class MessageResponse(DefaultModel): @field_serializer("timestamp") @classmethod def serialize_timestamp(cls, v): - v = v.replace(microsecond=0) - return v.strftime("%Y-%m-%d %H:%M:%S %Z") + return timestamp_to_str(v) @field_serializer("files") @classmethod diff --git a/src/backend/base/langflow/schema/validators.py b/src/backend/base/langflow/schema/validators.py index 3c95bde51..b53ce86df 100644 --- a/src/backend/base/langflow/schema/validators.py +++ b/src/backend/base/langflow/schema/validators.py @@ -1,35 +1,114 @@ -from datetime import datetime +from datetime import datetime, timezone from pydantic import BeforeValidator def timestamp_to_str(timestamp: datetime | str) -> str: + """Convert timestamp to standardized string format. + + Handles multiple input formats and ensures consistent UTC timezone output. + + Args: + timestamp (datetime | str): Input timestamp either as datetime object or string + + Returns: + str: Formatted timestamp string in 'YYYY-MM-DD HH:MM:SS UTC' format + + Raises: + ValueError: If string timestamp is in invalid format + """ + if isinstance(timestamp, str): + # Try parsing with different formats + formats = [ + "%Y-%m-%dT%H:%M:%S", # ISO format + "%Y-%m-%d %H:%M:%S %Z", # Standard with timezone + "%Y-%m-%d %H:%M:%S", # Without timezone + "%Y-%m-%dT%H:%M:%S.%f", # ISO with microseconds + "%Y-%m-%dT%H:%M:%S%z", # ISO with numeric timezone + ] + + for fmt in formats: + try: + parsed = datetime.strptime(timestamp.strip(), fmt).replace(tzinfo=timezone.utc) + return parsed.strftime("%Y-%m-%d %H:%M:%S %Z") + except ValueError: + continue + + msg = f"Invalid timestamp format: {timestamp}" + raise ValueError(msg) + + # Handle datetime object + if timestamp.tzinfo is None: + timestamp = timestamp.replace(tzinfo=timezone.utc) + return timestamp.strftime("%Y-%m-%d %H:%M:%S %Z") + + +def str_to_timestamp(timestamp: str | datetime) -> datetime: + """Convert timestamp to datetime object. + + Handles multiple input formats and ensures consistent UTC timezone output. + + Args: + timestamp (str | datetime): Input timestamp either as string or datetime object + + Returns: + datetime: Datetime object with UTC timezone + + Raises: + ValueError: If string timestamp is not in 'YYYY-MM-DD HH:MM:SS UTC' format + """ if isinstance(timestamp, str): - # Just check if the string is a valid datetime try: - datetime.strptime(timestamp, "%Y-%m-%d %H:%M:%S %Z") # noqa: DTZ007 - result = timestamp + return datetime.strptime(timestamp, "%Y-%m-%d %H:%M:%S %Z").replace(tzinfo=timezone.utc) except ValueError as e: - msg = f"Invalid timestamp: {timestamp}" + msg = f"Invalid timestamp format: {timestamp}. Expected format: YYYY-MM-DD HH:MM:SS UTC" raise ValueError(msg) from e - else: - result = timestamp.strftime("%Y-%m-%d %H:%M:%S %Z") - return result + return timestamp def timestamp_with_fractional_seconds(timestamp: datetime | str) -> str: + """Convert timestamp to string format including fractional seconds. + + Handles multiple input formats and ensures consistent UTC timezone output. + + Args: + timestamp (datetime | str): Input timestamp either as datetime object or string + + Returns: + str: Formatted timestamp string in 'YYYY-MM-DD HH:MM:SS.ffffff UTC' format + + Raises: + ValueError: If string timestamp is in invalid format + """ if isinstance(timestamp, str): - # Just check if the string is a valid datetime - try: - datetime.strptime(timestamp, "%Y-%m-%d %H:%M:%S.%f %Z") # noqa: DTZ007 - result = timestamp - except ValueError as e: - msg = f"Invalid timestamp: {timestamp}" - raise ValueError(msg) from e - else: - result = timestamp.strftime("%Y-%m-%d %H:%M:%S.%f %Z") - return result + # Try parsing with different formats + formats = [ + "%Y-%m-%d %H:%M:%S.%f %Z", # Standard with timezone + "%Y-%m-%d %H:%M:%S.%f", # Without timezone + "%Y-%m-%dT%H:%M:%S.%f", # ISO format + "%Y-%m-%dT%H:%M:%S.%f%z", # ISO with numeric timezone + # Also try without fractional seconds + "%Y-%m-%d %H:%M:%S %Z", + "%Y-%m-%d %H:%M:%S", + "%Y-%m-%dT%H:%M:%S", + ] + + for fmt in formats: + try: + parsed = datetime.strptime(timestamp.strip(), fmt).replace(tzinfo=timezone.utc) + return parsed.strftime("%Y-%m-%d %H:%M:%S.%f %Z") + except ValueError: + continue + + msg = f"Invalid timestamp format: {timestamp}" + raise ValueError(msg) + + # Handle datetime object + if timestamp.tzinfo is None: + timestamp = timestamp.replace(tzinfo=timezone.utc) + return timestamp.strftime("%Y-%m-%d %H:%M:%S.%f %Z") timestamp_to_str_validator = BeforeValidator(timestamp_to_str) timestamp_with_fractional_seconds_validator = BeforeValidator(timestamp_with_fractional_seconds) +str_to_timestamp_validator = BeforeValidator(str_to_timestamp) diff --git a/src/backend/base/langflow/services/database/models/message/model.py b/src/backend/base/langflow/services/database/models/message/model.py index 7313f6a11..15f49e24f 100644 --- a/src/backend/base/langflow/services/database/models/message/model.py +++ b/src/backend/base/langflow/services/database/models/message/model.py @@ -1,6 +1,6 @@ import json from datetime import datetime, timezone -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Annotated from uuid import UUID, uuid4 from pydantic import field_serializer, field_validator @@ -9,6 +9,7 @@ from sqlmodel import JSON, Column, Field, Relationship, SQLModel from langflow.schema.content_block import ContentBlock from langflow.schema.properties import Properties +from langflow.schema.validators import str_to_timestamp_validator if TYPE_CHECKING: from langflow.schema.message import Message @@ -16,7 +17,9 @@ if TYPE_CHECKING: class MessageBase(SQLModel): - timestamp: datetime = Field(default_factory=lambda: datetime.now(timezone.utc)) + timestamp: Annotated[datetime, str_to_timestamp_validator] = Field( + default_factory=lambda: datetime.now(timezone.utc) + ) sender: str sender_name: str session_id: str @@ -29,6 +32,13 @@ class MessageBase(SQLModel): category: str = Field(default="message") content_blocks: list[ContentBlock] = Field(default_factory=list) + @field_validator("timestamp", mode="before") + @classmethod + def validate_timestamp(cls, value): + if isinstance(value, str): + return datetime.fromisoformat(value) + return value + @field_validator("files", mode="before") @classmethod def validate_files(cls, value): @@ -110,6 +120,25 @@ class MessageTable(MessageBase, table=True): # type: ignore[call-arg] category: str = Field(sa_column=Column(Text)) content_blocks: list[ContentBlock] = Field(default_factory=list, sa_column=Column(JSON)) # type: ignore[assignment] + # We need to make sure the datetimes have timezone after running session.refresh + # because we are losing the timezone information when we save the message to the database + # and when we read it back. We use field_validator to make sure the datetimes have timezone + # after running session.refresh + @field_validator("timestamp", mode="after") + @classmethod + def validate_timestamp(cls, value): + if isinstance(value, datetime): + return value.replace(tzinfo=timezone.utc) + return value + + @field_serializer("timestamp") + def serialize_timestamp(self, value, _info): + if isinstance(value, datetime): + if value.tzinfo is None: + value = value.replace(tzinfo=timezone.utc) + return value.strftime("%Y-%m-%d %H:%M:%S %Z") + return value + @field_validator("flow_id", mode="before") @classmethod def validate_flow_id(cls, value): @@ -136,6 +165,8 @@ class MessageTable(MessageBase, table=True): # type: ignore[call-arg] return [self.serialize_properties_or_content_blocks(item) for item in value] if hasattr(value, "model_dump"): return value.model_dump() + if isinstance(value, str): + return json.loads(value) return value # Needed for Column(JSON) diff --git a/src/backend/tests/unit/schema/test_schema_message.py b/src/backend/tests/unit/schema/test_schema_message.py index cdf47d670..94d181cb9 100644 --- a/src/backend/tests/unit/schema/test_schema_message.py +++ b/src/backend/tests/unit/schema/test_schema_message.py @@ -1,4 +1,5 @@ import shutil +from datetime import datetime, timezone from pathlib import Path import pytest @@ -154,11 +155,16 @@ def test_message_without_sender(): def test_message_serialization(): """Test message serialization to dict.""" + # Create a timestamp with timezone message = Message(text="Test message", sender=MESSAGE_SENDER_USER) + timestamp_str = message.timestamp + timestamp = datetime.strptime(timestamp_str, "%Y-%m-%d %H:%M:%S %Z").replace(tzinfo=timezone.utc) serialized = message.model_dump() assert serialized["text"] == "Test message" assert serialized["sender"] == MESSAGE_SENDER_USER + assert serialized["timestamp"] == timestamp + assert serialized["timestamp"].tzinfo == timezone.utc def test_message_to_lc_without_sender(): @@ -170,6 +176,24 @@ def test_message_to_lc_without_sender(): assert lc_message.content == "Test message" +def test_timestamp_serialization(): + """Test timestamp serialization with different formats.""" + # Test with timezone + msg1 = Message(text="Test message", sender=MESSAGE_SENDER_USER, timestamp="2023-12-25 15:30:45 UTC") + serialized1 = msg1.model_dump() + assert serialized1["timestamp"].tzinfo == timezone.utc + + # Test without timezone + msg2 = Message(text="Test message", sender=MESSAGE_SENDER_USER, timestamp="2023-12-25 15:30:45") + serialized2 = msg2.model_dump() + assert serialized2["timestamp"].tzinfo == timezone.utc + + # Test that both formats result in equivalent UTC times when appropriate + msg_with_tz = Message(text="Test message", sender=MESSAGE_SENDER_USER, timestamp="2023-12-25 15:30:45 UTC") + msg_without_tz = Message(text="Test message", sender=MESSAGE_SENDER_USER, timestamp="2023-12-25 15:30:45") + assert msg_with_tz.model_dump()["timestamp"] == msg_without_tz.model_dump()["timestamp"] + + # Clean up the cache directory after all tests @pytest.fixture(autouse=True) def cleanup(): diff --git a/src/backend/tests/unit/test_messages_endpoints.py b/src/backend/tests/unit/test_messages_endpoints.py index 0c155de71..2031cb5ef 100644 --- a/src/backend/tests/unit/test_messages_endpoints.py +++ b/src/backend/tests/unit/test_messages_endpoints.py @@ -1,3 +1,4 @@ +from datetime import datetime, timezone from uuid import UUID import pytest @@ -25,7 +26,7 @@ async def created_messages(session): # noqa: ARG001 messages = [ MessageCreate(text="Test message 1", sender="User", sender_name="User", session_id="session_id2"), MessageCreate(text="Test message 2", sender="User", sender_name="User", session_id="session_id2"), - MessageCreate(text="Test message 3", sender="User", sender_name="User", session_id="session_id2"), + MessageCreate(text="Test message 3", sender="AI", sender_name="AI", session_id="session_id2"), ] messagetables = [MessageTable.model_validate(message, from_attributes=True) for message in messages] return await aadd_messagetables(messagetables, _session) @@ -99,8 +100,19 @@ async def test_successfully_update_session_id(client, logged_in_headers, created ) assert response.status_code == 200 assert len(response.json()) == len(created_messages) - for message in response.json(): + messages = response.json() + for message in messages: assert message["session_id"] == new_session_id + response_timestamp = message["timestamp"] + timestamp = datetime.strptime(response_timestamp, "%Y-%m-%d %H:%M:%S %Z").replace(tzinfo=timezone.utc) + timestamp_str = timestamp.strftime("%Y-%m-%d %H:%M:%S %Z") + assert timestamp_str == response_timestamp + + # Check if the messages ordered by timestamp are in the correct order + # User, User, AI + assert messages[0]["sender"] == "User" + assert messages[1]["sender"] == "User" + assert messages[2]["sender"] == "AI" # No messages found with the given session ID