commit ed0361f97c5a8fcc2913fbfda722f600a6c2001e Author: Joey Yakimowich-Payne Date: Fri Mar 13 11:40:35 2026 -0600 chore: initialize project scaffold and config Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-opencode) Co-authored-by: Sisyphus diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..d7604ce --- /dev/null +++ b/.gitignore @@ -0,0 +1,48 @@ +# Python +__pycache__/ +*.py[cod] +*$py.class +*.egg-info/ +*.egg +dist/ +build/ +.eggs/ + +# Virtual environment +.venv/ + +# IDE +.idea/ +.vscode/ +*.swp +*.swo + +# uv +uv.lock + +# Logs +logs/ +*.jsonl + +# Database +*.db +*.sqlite + +# Docker +pgdata/ + +# Node +node_modules/ +package-lock.json + +# Linters/caches +.ruff_cache/ +.pytest_cache/ +.mypy_cache/ + +# Temporary files +*.png + +# OS +.DS_Store +Thumbs.db diff --git a/.python-version b/.python-version new file mode 100644 index 0000000..6324d40 --- /dev/null +++ b/.python-version @@ -0,0 +1 @@ +3.14 diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..f669e0c --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,20 @@ +services: + postgres: + image: pgvector/pgvector:pg16 + environment: + POSTGRES_DB: mnemosyne + POSTGRES_USER: mnemosyne + POSTGRES_PASSWORD: mnemosyne_dev + ports: + - "5433:5432" + volumes: + - pgdata:/var/lib/postgresql/data + - ./sql/init.sql:/docker-entrypoint-initdb.d/01-init.sql + healthcheck: + test: ["CMD-SHELL", "pg_isready -U mnemosyne"] + interval: 5s + timeout: 5s + retries: 5 + +volumes: + pgdata: diff --git a/mnemosyne.toml b/mnemosyne.toml new file mode 100644 index 0000000..0e26bf5 --- /dev/null +++ b/mnemosyne.toml @@ -0,0 +1,78 @@ +# Mnemosyne configuration +# Copy this to your working directory and customize. + +[proxy] +host = "127.0.0.1" +port = 0 # 0 = random free port +upstream = "https://api.anthropic.com" + +[paging] +window_size = 200000 +enable_paging = true +enable_trim = true +age_threshold = 4 +min_evict_size = 500 + +[fidelity] +# Pressure zone thresholds (as fraction of window_size) +normal_max = 0.50 +caution_max = 0.70 +warning_max = 0.85 +critical_max = 0.95 +# Above critical_max = emergency zone + +# Fidelity degradation settings +pin_duration_turns = 5 # How long a faulted object stays pinned +min_age_for_degrade = 3 # Don't degrade objects from the last N turns + +[helper_llm] +model = "claude-haiku-4-5-20251001" +# If empty, uses ANTHROPIC_API_KEY env var +api_key = "" +base_url = "https://api.anthropic.com" +timeout_seconds = 10 +max_retries = 2 +# Max tokens for summary generation +max_summary_tokens = 1024 +max_compact_tokens = 256 +max_stub_tokens = 64 +max_micro_fault_tokens = 200 +max_goal_tokens = 128 + +[object_store] +# "memory" for in-memory only (Phase 1-2) +# "sqlite" for local file (Phase 2+) +# "postgresql" for full backing store (Phase 3+) +backend = "memory" + +[object_store.sqlite] +path = "mnemosyne.db" + +[object_store.postgresql] +host = "localhost" +port = 5433 +database = "mnemosyne" +user = "mnemosyne" +password = "mnemosyne_dev" + +[embeddings] +model = "all-MiniLM-L6-v2" +dimension = 384 +# Use ONNX runtime for faster inference +use_onnx = true + +[admission] +enabled = false # Enable in Phase 4d +threshold = 0.4 +weight_type = 0.35 +weight_novelty = 0.25 +weight_utility = 0.25 +weight_recency = 0.15 + +[entropy] +enabled = false # Enable in Phase 4e +normal_max = 1.5 +elevated_max = 2.2 +# Above elevated_max = high entropy +window_size = 20 # Rolling window of tokens for entropy calculation +debounce_count = 3 # Require N consecutive high-entropy tokens diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..efb8c43 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,63 @@ +[project] +name = "mnemosyne" +version = "0.1.0" +description = "Object-addressed context memory for LLM agents — multi-fidelity compression with goal-aware retrieval" +readme = "README.md" +requires-python = ">=3.12" +dependencies = [ + # --- Inherited from Pichay (proxy foundation) --- + "httpx>=0.27", + "fastapi>=0.116.0", + "uvicorn>=0.35.0", + "anthropic>=0.84.0", + "numpy>=2.2.0", + + # --- Mnemosyne additions --- + # PostgreSQL + vector search (backing store) + "asyncpg>=0.30.0", + "pgvector>=0.3.6", + + # Embeddings (semantic search, goal detection) + "sentence-transformers>=3.4.0", + + # Config + "tomli>=2.0.0; python_version < '3.11'", + + # Telemetry (optional, from Pichay) + "prometheus-client>=0.23.0", +] + +[project.optional-dependencies] +dev = [ + "pytest>=8.0", + "pytest-asyncio>=0.25.0", + "ruff>=0.9.0", +] +# Pichay analysis tools (not needed for proxy operation) +analysis = [ + "matplotlib>=3.10.0", + "polars>=1.38.0", + "dask>=2024.1.0", + "flask>=3.0", +] + +[project.scripts] +mnemosyne = "mnemosyne.gateway:main" + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[tool.hatch.build.targets.wheel] +packages = ["src/mnemosyne"] + +[tool.ruff] +target-version = "py312" +line-length = 100 + +[tool.ruff.lint] +select = ["E", "F", "I", "N", "W", "UP"] + +[tool.pytest.ini_options] +asyncio_mode = "auto" +testpaths = ["tests"] diff --git a/sql/init.sql b/sql/init.sql new file mode 100644 index 0000000..e7726be --- /dev/null +++ b/sql/init.sql @@ -0,0 +1,161 @@ +-- Mnemosyne Object Store Schema +-- PostgreSQL 16 + pgvector +-- See SCHEMA.md for full documentation. + +CREATE EXTENSION IF NOT EXISTS vector; + +-- 1.1 Sessions +CREATE TABLE sessions ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + external_id TEXT UNIQUE NOT NULL, + model TEXT NOT NULL, + created_at TIMESTAMPTZ NOT NULL DEFAULT now(), + last_active_at TIMESTAMPTZ NOT NULL DEFAULT now(), + total_turns INTEGER NOT NULL DEFAULT 0, + total_objects INTEGER NOT NULL DEFAULT 0, + total_faults INTEGER NOT NULL DEFAULT 0, + total_micro_faults INTEGER NOT NULL DEFAULT 0, + status TEXT NOT NULL DEFAULT 'active', + config JSONB NOT NULL DEFAULT '{}' +); + +CREATE INDEX idx_sessions_external ON sessions(external_id); +CREATE INDEX idx_sessions_active ON sessions(status) WHERE status = 'active'; + +-- 1.2 Semantic Objects +CREATE TABLE semantic_objects ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + session_id UUID NOT NULL REFERENCES sessions(id) ON DELETE CASCADE, + + -- Identity + object_type TEXT NOT NULL, + source_tool TEXT, + source_key TEXT, + + -- Multi-fidelity content + content_full TEXT NOT NULL, + summary_detailed TEXT, + summary_compact TEXT, + stub TEXT NOT NULL, + + -- Declared losses + losses_l1 JSONB DEFAULT '[]', + losses_l2 JSONB DEFAULT '[]', + can_answer_l1 JSONB DEFAULT '[]', + can_answer_l2 JSONB DEFAULT '[]', + fault_when JSONB DEFAULT '[]', + + -- Entities and tags + key_entities JSONB DEFAULT '[]', + tags TEXT[] DEFAULT '{}', + + -- State + current_fidelity INTEGER NOT NULL DEFAULT 0, + pinned BOOLEAN NOT NULL DEFAULT false, + pin_reason TEXT, + + -- Metrics + created_at TIMESTAMPTZ NOT NULL DEFAULT now(), + last_accessed TIMESTAMPTZ NOT NULL DEFAULT now(), + access_count INTEGER NOT NULL DEFAULT 0, + fault_count INTEGER NOT NULL DEFAULT 0, + micro_fault_count INTEGER NOT NULL DEFAULT 0, + + -- Token counts + tokens_l0 INTEGER NOT NULL DEFAULT 0, + tokens_l1 INTEGER, + tokens_l2 INTEGER, + tokens_l3 INTEGER NOT NULL DEFAULT 0, + + -- Source range + source_turn_start INTEGER, + source_turn_end INTEGER, + + -- Embedding + embedding vector(384) NOT NULL +); + +CREATE INDEX idx_objects_session ON semantic_objects(session_id); +CREATE INDEX idx_objects_session_fidelity ON semantic_objects(session_id, current_fidelity); +CREATE INDEX idx_objects_session_type ON semantic_objects(session_id, object_type); +CREATE INDEX idx_objects_source_key ON semantic_objects(session_id, source_key) + WHERE source_key IS NOT NULL; +CREATE INDEX idx_objects_created ON semantic_objects(session_id, created_at); +CREATE INDEX idx_objects_last_accessed ON semantic_objects(session_id, last_accessed); +CREATE INDEX idx_objects_tags ON semantic_objects USING GIN(tags); +CREATE INDEX idx_objects_embedding ON semantic_objects + USING ivfflat (embedding vector_cosine_ops) WITH (lists = 100); +CREATE INDEX idx_objects_content_fts ON semantic_objects + USING GIN(to_tsvector('english', content_full)); + +-- 1.3 Object Relationships +CREATE TABLE object_relationships ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + session_id UUID NOT NULL REFERENCES sessions(id) ON DELETE CASCADE, + source_id UUID NOT NULL REFERENCES semantic_objects(id) ON DELETE CASCADE, + target_id UUID NOT NULL REFERENCES semantic_objects(id) ON DELETE CASCADE, + relationship TEXT NOT NULL, + metadata JSONB DEFAULT '{}', + created_at TIMESTAMPTZ NOT NULL DEFAULT now(), + + UNIQUE(source_id, target_id, relationship) +); + +CREATE INDEX idx_rels_source ON object_relationships(source_id); +CREATE INDEX idx_rels_target ON object_relationships(target_id); +CREATE INDEX idx_rels_session ON object_relationships(session_id); + +-- 1.4 Fault History +CREATE TABLE fault_history ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + session_id UUID NOT NULL REFERENCES sessions(id) ON DELETE CASCADE, + object_id UUID NOT NULL REFERENCES semantic_objects(id) ON DELETE CASCADE, + fault_type TEXT NOT NULL, + turn_number INTEGER NOT NULL, + content_hash TEXT NOT NULL, + question TEXT, + answer TEXT, + answer_tokens INTEGER, + avoided_tokens INTEGER, + latency_ms INTEGER, + created_at TIMESTAMPTZ NOT NULL DEFAULT now() +); + +CREATE INDEX idx_faults_session ON fault_history(session_id); +CREATE INDEX idx_faults_object ON fault_history(object_id); +CREATE INDEX idx_faults_content_hash ON fault_history(content_hash); + +-- 1.5 Fidelity Transitions +CREATE TABLE fidelity_transitions ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + session_id UUID NOT NULL REFERENCES sessions(id) ON DELETE CASCADE, + object_id UUID NOT NULL REFERENCES semantic_objects(id) ON DELETE CASCADE, + from_fidelity INTEGER NOT NULL, + to_fidelity INTEGER NOT NULL, + trigger TEXT NOT NULL, + turn_number INTEGER NOT NULL, + pressure_zone TEXT, + token_count INTEGER, + created_at TIMESTAMPTZ NOT NULL DEFAULT now() +); + +CREATE INDEX idx_transitions_session ON fidelity_transitions(session_id); +CREATE INDEX idx_transitions_object ON fidelity_transitions(object_id); + +-- 1.6 Admission Scores +CREATE TABLE admission_scores ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + session_id UUID NOT NULL REFERENCES sessions(id) ON DELETE CASCADE, + object_id UUID REFERENCES semantic_objects(id) ON DELETE SET NULL, + admitted BOOLEAN NOT NULL, + score_total REAL NOT NULL, + score_type REAL NOT NULL, + score_novelty REAL NOT NULL, + score_utility REAL NOT NULL, + score_recency REAL NOT NULL, + threshold REAL NOT NULL, + content_preview TEXT, + created_at TIMESTAMPTZ NOT NULL DEFAULT now() +); + +CREATE INDEX idx_admission_session ON admission_scores(session_id);