5-level fidelity manager (L0-Full to L4-Evicted) with helper LLM (Haiku 4.5) for intelligent summarization during degradation. Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-opencode) Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
572 lines
21 KiB
Python
572 lines
21 KiB
Python
"""Tests for the Helper LLM client.
|
|
|
|
All tests use mocked Anthropic API responses — no real API calls.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
from unittest.mock import AsyncMock, MagicMock, patch
|
|
|
|
import pytest
|
|
|
|
from mnemosyne.helper_llm import (
|
|
GoalClassification,
|
|
HelperLLM,
|
|
SummaryResult,
|
|
_L0_TO_L1_PROMPT,
|
|
_L1_TO_L2_PROMPT,
|
|
_GOAL_CLASSIFICATION_PROMPT,
|
|
_MICRO_FAULT_PROMPT,
|
|
)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Fixtures
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def _make_api_response(text: str) -> MagicMock:
|
|
"""Build a mock Anthropic Messages response with a single text block."""
|
|
block = MagicMock()
|
|
block.type = "text"
|
|
block.text = text
|
|
response = MagicMock()
|
|
response.content = [block]
|
|
return response
|
|
|
|
|
|
@pytest.fixture()
|
|
def helper() -> HelperLLM:
|
|
"""Return a HelperLLM with a mocked async client."""
|
|
with patch.dict("os.environ", {"ANTHROPIC_API_KEY": "test-key-000"}):
|
|
h = HelperLLM()
|
|
h._client = MagicMock()
|
|
h._client.messages = MagicMock()
|
|
h._client.messages.create = AsyncMock()
|
|
return h
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Dataclass tests
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
class TestDataclasses:
|
|
def test_summary_result_defaults(self) -> None:
|
|
r = SummaryResult(summary="hello")
|
|
assert r.summary == "hello"
|
|
assert r.losses == []
|
|
assert r.can_answer == []
|
|
assert r.key_entities == []
|
|
|
|
def test_summary_result_with_fields(self) -> None:
|
|
r = SummaryResult(
|
|
summary="s",
|
|
losses=["a"],
|
|
can_answer=["b"],
|
|
key_entities=["c"],
|
|
)
|
|
assert r.losses == ["a"]
|
|
assert r.can_answer == ["b"]
|
|
assert r.key_entities == ["c"]
|
|
|
|
def test_goal_classification_defaults(self) -> None:
|
|
g = GoalClassification(goal="write tests")
|
|
assert g.goal == "write tests"
|
|
assert g.relevant_types == []
|
|
assert g.relevant_tags == []
|
|
assert g.predicted_needs == []
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# JSON parsing tests (static methods, no API calls)
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
class TestParseSummaryJson:
|
|
def test_valid_json(self) -> None:
|
|
raw = json.dumps(
|
|
{
|
|
"summary": "Auth middleware uses JWT.",
|
|
"losses": ["exact error codes"],
|
|
"can_answer": ["auth approach"],
|
|
"key_entities": ["src/auth/middleware.ts"],
|
|
}
|
|
)
|
|
result = HelperLLM._parse_summary_json(raw)
|
|
assert result is not None
|
|
assert result.summary == "Auth middleware uses JWT."
|
|
assert result.losses == ["exact error codes"]
|
|
assert result.can_answer == ["auth approach"]
|
|
assert result.key_entities == ["src/auth/middleware.ts"]
|
|
|
|
def test_json_in_code_fence(self) -> None:
|
|
raw = (
|
|
'```json\n{"summary": "test", "losses": [], "can_answer": [], "key_entities": []}\n```'
|
|
)
|
|
result = HelperLLM._parse_summary_json(raw)
|
|
assert result is not None
|
|
assert result.summary == "test"
|
|
|
|
def test_json_with_surrounding_text(self) -> None:
|
|
raw = 'Here is the result:\n{"summary": "ok", "losses": ["x"]}\nDone.'
|
|
result = HelperLLM._parse_summary_json(raw)
|
|
assert result is not None
|
|
assert result.summary == "ok"
|
|
assert result.losses == ["x"]
|
|
|
|
def test_invalid_json_returns_none(self) -> None:
|
|
assert HelperLLM._parse_summary_json("not json at all") is None
|
|
|
|
def test_empty_string_returns_none(self) -> None:
|
|
assert HelperLLM._parse_summary_json("") is None
|
|
|
|
def test_non_dict_json_returns_none(self) -> None:
|
|
assert HelperLLM._parse_summary_json("[1, 2, 3]") is None
|
|
|
|
def test_missing_fields_default_to_empty(self) -> None:
|
|
raw = '{"summary": "minimal"}'
|
|
result = HelperLLM._parse_summary_json(raw)
|
|
assert result is not None
|
|
assert result.summary == "minimal"
|
|
assert result.losses == []
|
|
assert result.can_answer == []
|
|
assert result.key_entities == []
|
|
|
|
|
|
class TestParseGoalJson:
|
|
def test_valid_json(self) -> None:
|
|
raw = json.dumps(
|
|
{
|
|
"goal": "write auth tests",
|
|
"relevant_types": ["file_context", "design_decision"],
|
|
"relevant_tags": ["auth", "testing"],
|
|
"predicted_needs": ["auth middleware impl"],
|
|
}
|
|
)
|
|
result = HelperLLM._parse_goal_json(raw)
|
|
assert result is not None
|
|
assert result.goal == "write auth tests"
|
|
assert "file_context" in result.relevant_types
|
|
assert "auth" in result.relevant_tags
|
|
|
|
def test_invalid_json_returns_none(self) -> None:
|
|
assert HelperLLM._parse_goal_json("garbage") is None
|
|
|
|
def test_json_in_code_fence(self) -> None:
|
|
raw = '```\n{"goal": "deploy", "relevant_types": [], "relevant_tags": [], "predicted_needs": []}\n```'
|
|
result = HelperLLM._parse_goal_json(raw)
|
|
assert result is not None
|
|
assert result.goal == "deploy"
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Prompt construction tests
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
class TestPromptConstruction:
|
|
def test_l0_to_l1_prompt_contains_content_and_type(self) -> None:
|
|
prompt = _L0_TO_L1_PROMPT.format(
|
|
object_type="file_context",
|
|
content="def hello(): pass",
|
|
)
|
|
assert "file_context" in prompt
|
|
assert "def hello(): pass" in prompt
|
|
assert "DECLARED LOSSES" in prompt
|
|
assert "CAN_ANSWER" in prompt
|
|
assert "OUTPUT FORMAT (JSON)" in prompt
|
|
|
|
def test_l1_to_l2_prompt_includes_losses(self) -> None:
|
|
prompt = _L1_TO_L2_PROMPT.format(
|
|
object_type="debugging_session",
|
|
l1_summary="Fixed race condition",
|
|
l1_losses="- exact error codes\n- line numbers",
|
|
)
|
|
assert "debugging_session" in prompt
|
|
assert "Fixed race condition" in prompt
|
|
assert "exact error codes" in prompt
|
|
|
|
def test_goal_prompt_includes_message_and_context(self) -> None:
|
|
prompt = _GOAL_CLASSIFICATION_PROMPT.format(
|
|
user_message="now write tests",
|
|
recent_context="We just implemented auth.",
|
|
)
|
|
assert "now write tests" in prompt
|
|
assert "We just implemented auth." in prompt
|
|
|
|
def test_micro_fault_prompt_includes_question_and_context(self) -> None:
|
|
prompt = _MICRO_FAULT_PROMPT.format(
|
|
question="What error code?",
|
|
context="Error 401 unauthorized",
|
|
)
|
|
assert "What error code?" in prompt
|
|
assert "Error 401 unauthorized" in prompt
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# API call tests (mocked)
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
class TestSummarizeL0ToL1:
|
|
async def test_successful_summarization(self, helper: HelperLLM) -> None:
|
|
api_response = _make_api_response(
|
|
json.dumps(
|
|
{
|
|
"summary": "Auth middleware validates JWT tokens.",
|
|
"losses": ["exact error codes for token expiry"],
|
|
"can_answer": ["auth approach used"],
|
|
"key_entities": ["src/auth/middleware.ts", "jsonwebtoken"],
|
|
}
|
|
)
|
|
)
|
|
helper._client.messages.create = AsyncMock(return_value=api_response)
|
|
|
|
result = await helper.summarize_l0_to_l1(
|
|
content="Full auth middleware source code here...",
|
|
object_type="file_context",
|
|
)
|
|
|
|
assert isinstance(result, SummaryResult)
|
|
assert result.summary == "Auth middleware validates JWT tokens."
|
|
assert "exact error codes for token expiry" in result.losses
|
|
assert "src/auth/middleware.ts" in result.key_entities
|
|
|
|
# Verify the API was called with correct model
|
|
call_kwargs = helper._client.messages.create.call_args.kwargs
|
|
assert call_kwargs["model"] == "claude-haiku-4-5-20251001"
|
|
# Verify prompt contains the content
|
|
user_msg = call_kwargs["messages"][0]["content"]
|
|
assert "file_context" in user_msg
|
|
assert "Full auth middleware source code here..." in user_msg
|
|
|
|
async def test_json_parse_failure_returns_fallback(self, helper: HelperLLM) -> None:
|
|
api_response = _make_api_response("This is not valid JSON at all.")
|
|
helper._client.messages.create = AsyncMock(return_value=api_response)
|
|
|
|
content = "A" * 1000
|
|
result = await helper.summarize_l0_to_l1(
|
|
content=content,
|
|
object_type="file_context",
|
|
)
|
|
|
|
assert isinstance(result, SummaryResult)
|
|
# Fallback: first 30% of content
|
|
assert len(result.summary) == 300
|
|
assert result.summary == "A" * 300
|
|
assert result.losses == []
|
|
|
|
async def test_respects_max_summary_tokens(self, helper: HelperLLM) -> None:
|
|
api_response = _make_api_response('{"summary": "ok"}')
|
|
helper._client.messages.create = AsyncMock(return_value=api_response)
|
|
|
|
await helper.summarize_l0_to_l1(
|
|
content="test",
|
|
object_type="tool_result",
|
|
max_summary_tokens=512,
|
|
)
|
|
|
|
call_kwargs = helper._client.messages.create.call_args.kwargs
|
|
assert call_kwargs["max_tokens"] == 512
|
|
|
|
|
|
class TestCompressL1ToL2:
|
|
async def test_successful_compression(self, helper: HelperLLM) -> None:
|
|
api_response = _make_api_response(
|
|
json.dumps(
|
|
{
|
|
"summary": "Auth uses JWT with refresh tokens.",
|
|
"losses": ["function signatures"],
|
|
"can_answer": ["what was decided"],
|
|
"key_entities": ["middleware.ts"],
|
|
}
|
|
)
|
|
)
|
|
helper._client.messages.create = AsyncMock(return_value=api_response)
|
|
|
|
l1_losses = ["exact error codes", "line-by-line implementation"]
|
|
result = await helper.compress_l1_to_l2(
|
|
l1_summary="Detailed auth summary...",
|
|
l1_losses=l1_losses,
|
|
object_type="file_context",
|
|
)
|
|
|
|
assert isinstance(result, SummaryResult)
|
|
assert result.summary == "Auth uses JWT with refresh tokens."
|
|
# L1 losses should be accumulated (prepended)
|
|
assert result.losses[0] == "exact error codes"
|
|
assert result.losses[1] == "line-by-line implementation"
|
|
assert "function signatures" in result.losses
|
|
|
|
async def test_loss_accumulation(self, helper: HelperLLM) -> None:
|
|
api_response = _make_api_response(
|
|
json.dumps(
|
|
{
|
|
"summary": "compact",
|
|
"losses": ["new_loss"],
|
|
"can_answer": [],
|
|
"key_entities": [],
|
|
}
|
|
)
|
|
)
|
|
helper._client.messages.create = AsyncMock(return_value=api_response)
|
|
|
|
result = await helper.compress_l1_to_l2(
|
|
l1_summary="summary",
|
|
l1_losses=["old_loss_1", "old_loss_2"],
|
|
object_type="design_decision",
|
|
)
|
|
|
|
assert result.losses == ["old_loss_1", "old_loss_2", "new_loss"]
|
|
|
|
async def test_fallback_on_parse_failure(self, helper: HelperLLM) -> None:
|
|
api_response = _make_api_response("broken response")
|
|
helper._client.messages.create = AsyncMock(return_value=api_response)
|
|
|
|
l1_losses = ["loss_a"]
|
|
result = await helper.compress_l1_to_l2(
|
|
l1_summary="A" * 100,
|
|
l1_losses=l1_losses,
|
|
object_type="file_context",
|
|
)
|
|
|
|
assert isinstance(result, SummaryResult)
|
|
# Fallback: 30% of l1_summary
|
|
assert len(result.summary) == 30
|
|
# L1 losses preserved in fallback
|
|
assert result.losses == ["loss_a"]
|
|
|
|
|
|
class TestGenerateStub:
|
|
async def test_successful_stub(self, helper: HelperLLM) -> None:
|
|
stub_text = "[debugging_session | 2026-03-13 14:30 | Fixed race condition in auth | 12 related objects]"
|
|
api_response = _make_api_response(stub_text)
|
|
helper._client.messages.create = AsyncMock(return_value=api_response)
|
|
|
|
result = await helper.generate_stub(
|
|
l2_summary="Fixed race condition in auth token refresh.",
|
|
object_type="debugging_session",
|
|
timestamp="2026-03-13 14:30",
|
|
)
|
|
|
|
assert result == stub_text
|
|
assert "debugging_session" in result
|
|
assert "2026-03-13 14:30" in result
|
|
|
|
async def test_multiline_response_takes_first_line(self, helper: HelperLLM) -> None:
|
|
api_response = _make_api_response(
|
|
"[type | ts | desc | 0 related objects]\nExtra line\nAnother"
|
|
)
|
|
helper._client.messages.create = AsyncMock(return_value=api_response)
|
|
|
|
result = await helper.generate_stub("summary", "type", "ts")
|
|
assert "\n" not in result
|
|
assert result == "[type | ts | desc | 0 related objects]"
|
|
|
|
async def test_empty_response_fallback(self, helper: HelperLLM) -> None:
|
|
api_response = _make_api_response("")
|
|
helper._client.messages.create = AsyncMock(return_value=api_response)
|
|
|
|
result = await helper.generate_stub("summary", "file_context", "2026-01-01")
|
|
assert "file_context" in result
|
|
assert "2026-01-01" in result
|
|
assert "summary unavailable" in result
|
|
|
|
|
|
class TestAnswerMicroFault:
|
|
async def test_successful_answer(self, helper: HelperLLM) -> None:
|
|
api_response = _make_api_response("The error code is 401 UNAUTHORIZED.")
|
|
helper._client.messages.create = AsyncMock(return_value=api_response)
|
|
|
|
result = await helper.answer_micro_fault(
|
|
question="What error code does auth return for expired tokens?",
|
|
relevant_contents=[
|
|
"Auth middleware returns 401 for expired tokens.",
|
|
"Token refresh logic in refresh.ts.",
|
|
],
|
|
)
|
|
|
|
assert result == "The error code is 401 UNAUTHORIZED."
|
|
|
|
# Verify context was joined with separator
|
|
call_kwargs = helper._client.messages.create.call_args.kwargs
|
|
prompt = call_kwargs["messages"][0]["content"]
|
|
assert "What error code" in prompt
|
|
assert "Auth middleware returns 401" in prompt
|
|
assert "---" in prompt # separator between contents
|
|
|
|
async def test_empty_response_fallback(self, helper: HelperLLM) -> None:
|
|
api_response = _make_api_response(" ")
|
|
helper._client.messages.create = AsyncMock(return_value=api_response)
|
|
|
|
result = await helper.answer_micro_fault(
|
|
question="anything",
|
|
relevant_contents=["content"],
|
|
)
|
|
|
|
assert result == "Unable to answer from available context."
|
|
|
|
async def test_respects_max_tokens(self, helper: HelperLLM) -> None:
|
|
api_response = _make_api_response("answer")
|
|
helper._client.messages.create = AsyncMock(return_value=api_response)
|
|
|
|
await helper.answer_micro_fault("q", ["c"], max_tokens=150)
|
|
|
|
call_kwargs = helper._client.messages.create.call_args.kwargs
|
|
assert call_kwargs["max_tokens"] == 150
|
|
|
|
|
|
class TestClassifyGoal:
|
|
async def test_successful_classification(self, helper: HelperLLM) -> None:
|
|
api_response = _make_api_response(
|
|
json.dumps(
|
|
{
|
|
"goal": "write unit tests for auth module",
|
|
"relevant_types": ["file_context", "design_decision"],
|
|
"relevant_tags": ["auth", "testing"],
|
|
"predicted_needs": ["auth middleware implementation", "test patterns"],
|
|
}
|
|
)
|
|
)
|
|
helper._client.messages.create = AsyncMock(return_value=api_response)
|
|
|
|
result = await helper.classify_goal(
|
|
user_message="Now write tests for the auth middleware",
|
|
recent_context="We just finished implementing JWT auth.",
|
|
)
|
|
|
|
assert isinstance(result, GoalClassification)
|
|
assert result.goal == "write unit tests for auth module"
|
|
assert "file_context" in result.relevant_types
|
|
assert "auth" in result.relevant_tags
|
|
assert "auth middleware implementation" in result.predicted_needs
|
|
|
|
async def test_fallback_on_parse_failure(self, helper: HelperLLM) -> None:
|
|
api_response = _make_api_response("I don't understand the format")
|
|
helper._client.messages.create = AsyncMock(return_value=api_response)
|
|
|
|
result = await helper.classify_goal(
|
|
user_message="deploy to production",
|
|
recent_context="context",
|
|
)
|
|
|
|
assert isinstance(result, GoalClassification)
|
|
assert result.goal == "deploy to production"
|
|
assert result.relevant_types == []
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Error handling tests
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
class TestErrorHandling:
|
|
async def test_timeout_returns_fallback(self, helper: HelperLLM) -> None:
|
|
import anthropic as anth
|
|
|
|
helper._client.messages.create = AsyncMock(
|
|
side_effect=anth.APITimeoutError(request=MagicMock())
|
|
)
|
|
|
|
result = await helper.summarize_l0_to_l1(
|
|
content="some content here",
|
|
object_type="file_context",
|
|
)
|
|
|
|
# Should get fallback (30% of content)
|
|
assert isinstance(result, SummaryResult)
|
|
assert len(result.summary) == 5 # 30% of 18 chars ≈ 5
|
|
|
|
async def test_api_error_returns_fallback(self, helper: HelperLLM) -> None:
|
|
import anthropic as anth
|
|
|
|
helper._client.messages.create = AsyncMock(
|
|
side_effect=anth.APIError(
|
|
message="Internal server error",
|
|
request=MagicMock(),
|
|
body=None,
|
|
)
|
|
)
|
|
|
|
result = await helper.summarize_l0_to_l1(
|
|
content="test content",
|
|
object_type="tool_result",
|
|
)
|
|
|
|
assert isinstance(result, SummaryResult)
|
|
# Fallback summary
|
|
assert result.summary == "tes" # 30% of 12 chars = 3
|
|
|
|
async def test_timeout_on_micro_fault(self, helper: HelperLLM) -> None:
|
|
import anthropic as anth
|
|
|
|
helper._client.messages.create = AsyncMock(
|
|
side_effect=anth.APITimeoutError(request=MagicMock())
|
|
)
|
|
|
|
result = await helper.answer_micro_fault("question", ["content"])
|
|
assert result == "Unable to answer from available context."
|
|
|
|
async def test_timeout_on_goal_classification(self, helper: HelperLLM) -> None:
|
|
import anthropic as anth
|
|
|
|
helper._client.messages.create = AsyncMock(
|
|
side_effect=anth.APITimeoutError(request=MagicMock())
|
|
)
|
|
|
|
result = await helper.classify_goal("do something", "context")
|
|
assert isinstance(result, GoalClassification)
|
|
assert result.goal == "do something"
|
|
|
|
async def test_timeout_on_generate_stub(self, helper: HelperLLM) -> None:
|
|
import anthropic as anth
|
|
|
|
helper._client.messages.create = AsyncMock(
|
|
side_effect=anth.APITimeoutError(request=MagicMock())
|
|
)
|
|
|
|
result = await helper.generate_stub("summary", "file_context", "2026-01-01")
|
|
assert "file_context" in result
|
|
assert "summary unavailable" in result
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Constructor tests
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
class TestConstructor:
|
|
def test_uses_provided_api_key(self) -> None:
|
|
with patch("mnemosyne.helper_llm.anthropic.AsyncAnthropic") as mock_cls:
|
|
HelperLLM(api_key="sk-test-123")
|
|
call_kwargs = mock_cls.call_args.kwargs
|
|
assert call_kwargs["api_key"] == "sk-test-123"
|
|
|
|
def test_falls_back_to_env_var(self) -> None:
|
|
with (
|
|
patch.dict("os.environ", {"ANTHROPIC_API_KEY": "sk-env-456"}),
|
|
patch("mnemosyne.helper_llm.anthropic.AsyncAnthropic") as mock_cls,
|
|
):
|
|
HelperLLM()
|
|
call_kwargs = mock_cls.call_args.kwargs
|
|
assert call_kwargs["api_key"] == "sk-env-456"
|
|
|
|
def test_custom_model_and_base_url(self) -> None:
|
|
with patch("mnemosyne.helper_llm.anthropic.AsyncAnthropic") as mock_cls:
|
|
h = HelperLLM(
|
|
api_key="key",
|
|
model="claude-3-haiku-20240307",
|
|
base_url="http://localhost:8080",
|
|
)
|
|
call_kwargs = mock_cls.call_args.kwargs
|
|
assert call_kwargs["base_url"] == "http://localhost:8080"
|
|
assert h._model == "claude-3-haiku-20240307"
|
|
|
|
def test_default_timeout_and_retries(self) -> None:
|
|
with patch("mnemosyne.helper_llm.anthropic.AsyncAnthropic") as mock_cls:
|
|
HelperLLM(api_key="key")
|
|
call_kwargs = mock_cls.call_args.kwargs
|
|
assert call_kwargs["timeout"] == 10.0
|
|
assert call_kwargs["max_retries"] == 2
|