Admission control, entropy-based micro-faulting, phantom tool injection for backing store queries, and xMemory session hierarchy for long conversations (50+ turns). Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-opencode) Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
130 lines
5.4 KiB
Python
130 lines
5.4 KiB
Python
"""Tests for goal-aware retrieval integration in the gateway.
|
|
|
|
Tests the Session attributes for goal tracking, cosine similarity-based
|
|
topic shift detection, and graceful degradation when helper_llm is absent.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
from pathlib import Path
|
|
from tempfile import TemporaryDirectory
|
|
from unittest.mock import AsyncMock, MagicMock, patch
|
|
|
|
import pytest
|
|
|
|
from mnemosyne.gateway import Session
|
|
from mnemosyne.helper_llm import GoalClassification
|
|
from mnemosyne.object_store import DummyEmbedder, _cosine_similarity
|
|
|
|
|
|
# ── Fixtures ─────────────────────────────────────────────────────────────
|
|
|
|
|
|
@pytest.fixture
|
|
def tmp_log_dir():
|
|
with TemporaryDirectory() as d:
|
|
yield Path(d)
|
|
|
|
|
|
@pytest.fixture
|
|
def session(tmp_log_dir):
|
|
return Session("goal01", tmp_log_dir)
|
|
|
|
|
|
# ── Session attributes ───────────────────────────────────────────────────
|
|
|
|
|
|
class TestGoalSessionAttributes:
|
|
"""Verify Session.__init__ creates goal-tracking attributes."""
|
|
|
|
def test_goal_session_attributes_exist(self, session):
|
|
"""Session has _last_user_embedding and _current_goal attributes."""
|
|
assert hasattr(session, "_last_user_embedding")
|
|
assert session._last_user_embedding is None
|
|
assert hasattr(session, "_current_goal")
|
|
assert session._current_goal is None
|
|
assert hasattr(session, "entropy_detector")
|
|
assert session.entropy_detector is not None
|
|
|
|
|
|
# ── Cosine similarity topic shift ────────────────────────────────────────
|
|
|
|
|
|
class TestCosineTopicShift:
|
|
"""Test topic shift detection via cosine similarity."""
|
|
|
|
def test_goal_cosine_similarity_topic_shift(self):
|
|
"""Different topics produce low cosine similarity (< 0.5)."""
|
|
embedder = DummyEmbedder()
|
|
# Two very different texts should produce different embeddings
|
|
emb_a = embedder.embed("How do I configure the database connection pool?")
|
|
emb_b = embedder.embed("What color should the login button be?")
|
|
sim = _cosine_similarity(emb_a, emb_b)
|
|
# DummyEmbedder uses hash-based vectors, so different texts
|
|
# produce essentially random vectors with low expected similarity
|
|
# For 384-dim random unit vectors, expected |cos| ≈ 0.05
|
|
assert sim < 0.5
|
|
|
|
def test_goal_cosine_similarity_same_topic(self):
|
|
"""Identical text produces cosine similarity of 1.0."""
|
|
embedder = DummyEmbedder()
|
|
text = "How do I configure the database connection pool?"
|
|
emb_a = embedder.embed(text)
|
|
emb_b = embedder.embed(text)
|
|
sim = _cosine_similarity(emb_a, emb_b)
|
|
assert sim > 0.99 # Same text → same hash → same embedding
|
|
|
|
def test_goal_first_message_always_classifies(self, session):
|
|
"""First message (no prior embedding) should always trigger classification."""
|
|
# _last_user_embedding is None → goal_changed should be True
|
|
assert session._last_user_embedding is None
|
|
# This is the logic from gateway._preprocess step 1c:
|
|
# if session._last_user_embedding is not None: check sim
|
|
# else: goal_changed = True
|
|
goal_changed = session._last_user_embedding is None
|
|
assert goal_changed is True
|
|
|
|
def test_goal_classification_fallback_no_helper(self, session):
|
|
"""Goal detection is skipped gracefully when helper_llm is None.
|
|
|
|
The gateway code wraps goal detection in try/except and checks
|
|
`if goal_changed and helper_llm is not None`. When helper_llm
|
|
is None, no classification occurs and _current_goal stays None.
|
|
"""
|
|
# Simulate the gateway logic: helper_llm is None
|
|
helper_llm = None
|
|
goal_changed = True # First message
|
|
|
|
# This mirrors the gateway code path
|
|
if goal_changed and helper_llm is not None:
|
|
# Would call helper_llm.classify_goal(...)
|
|
session._current_goal = GoalClassification(goal="test")
|
|
|
|
# Goal should remain None since helper_llm is None
|
|
assert session._current_goal is None
|
|
|
|
def test_goal_embedding_updated_after_message(self, session):
|
|
"""_last_user_embedding is updated after processing a message."""
|
|
embedder = DummyEmbedder()
|
|
user_text = "Tell me about the authentication system"
|
|
current_embedding = embedder.embed(user_text)
|
|
|
|
# Simulate the gateway update
|
|
session._last_user_embedding = current_embedding
|
|
|
|
assert session._last_user_embedding is not None
|
|
assert len(session._last_user_embedding) == 384
|
|
|
|
def test_goal_classification_stored_on_session(self, session):
|
|
"""GoalClassification is stored on session._current_goal."""
|
|
goal = GoalClassification(
|
|
goal="Implement authentication",
|
|
relevant_types=["file_context", "design_decision"],
|
|
relevant_tags=["auth", "security"],
|
|
predicted_needs=["auth.py", "middleware.py"],
|
|
)
|
|
session._current_goal = goal
|
|
|
|
assert session._current_goal.goal == "Implement authentication"
|
|
assert "file_context" in session._current_goal.relevant_types
|
|
assert "auth" in session._current_goal.relevant_tags
|