fix: assign conversation turn numbers to blocks and add /api/blocks debug endpoint

Blocks were all getting turn=1 because label_messages used a single
global counter. Now derives turn from message position (each user msg
increments the turn). Also updates turn on already-labeled blocks.
Adds /api/blocks endpoint to inspect BlockStore state per session.
This enables collapse_range(1,72) to correctly target early turns.
This commit is contained in:
Joey Yakimowich-Payne 2026-03-13 21:35:19 -06:00
commit 2c42f9b52a
2 changed files with 61 additions and 3 deletions

View file

@ -67,11 +67,22 @@ class BlockStore:
its content hash. Labels are stable across turns as long as
the content doesn't change.
Turn numbers are derived from message position: each user/assistant
pair is one conversation turn (user msg at index i turn i//2 + 1).
This ensures ``collapse_range(1, 72)`` targets the right messages.
Only labels user and assistant text messages. Tool_use and
tool_result blocks are managed by the PageStore, not here.
"""
for msg in messages:
# Compute per-message turn based on position in conversation
turn_counter = 0
for i, msg in enumerate(messages):
role = msg.get("role", "")
# Increment turn on each user message (user+assistant = 1 turn)
if role == "user":
turn_counter += 1
msg_turn = turn_counter if turn_counter > 0 else 1
if role not in ("user", "assistant"):
continue
@ -81,12 +92,14 @@ class BlockStore:
if isinstance(content, str):
# Skip if already labeled by us (validated against known IDs)
if self._has_our_label(content):
# Update turn on existing block if it changed
self._update_turn(content, msg_turn)
continue
# Skip very short messages (not worth labeling)
if len(content) < 200:
continue
entry = self._get_or_create(content, role, current_turn)
entry = self._get_or_create(content, role, msg_turn)
if entry and entry.status == "resident":
size_k = entry.size / 1024
msg["content"] = f"[tensor:{entry.block_id} ({size_k:.1f}KB)]\n{content}"
@ -101,16 +114,27 @@ class BlockStore:
text = block.get("text", "")
# Skip if already labeled by us (validated against known IDs)
if self._has_our_label(text):
self._update_turn(text, msg_turn)
continue
# Skip short blocks
if len(text) < 200:
continue
entry = self._get_or_create(text, role, current_turn)
entry = self._get_or_create(text, role, msg_turn)
if entry and entry.status == "resident":
size_k = entry.size / 1024
block["text"] = f"[tensor:{entry.block_id} ({size_k:.1f}KB)]\n{text}"
def _update_turn(self, labeled_content: str, turn: int) -> None:
"""Update the turn number on an already-labeled block."""
import re
m = re.match(r"\[tensor:([a-f0-9]{8,12})", labeled_content)
if m:
entry = self._by_id.get(m.group(1))
if entry:
entry.turn = turn
def _get_or_create(self, content: str, role: str, turn: int) -> BlockEntry | None:
"""Get existing entry by content hash, or create a new one."""
content_hash = hashlib.sha256(content.encode()).hexdigest()

View file

@ -1636,6 +1636,40 @@ def create_app(
"session_tokens": total_tokens,
}
@app.get("/api/blocks")
async def api_blocks(session_id: str | None = None) -> dict[str, Any]:
"""Debug endpoint: expose BlockStore state per session."""
all_sessions = sessions.all()
out: dict[str, Any] = {}
for sid, sess in all_sessions.items():
if session_id and sid != session_id:
continue
bs = sess.block_store
blocks = []
for bid, entry in bs._by_id.items():
blocks.append(
{
"id": bid,
"status": entry.status,
"turn": entry.turn,
"role": entry.role,
"size": entry.size,
"preview": entry.preview[:80] if entry.preview else "",
"summary": entry.summary,
"collapse_start": getattr(entry, "collapse_start_turn", None),
"collapse_end": getattr(entry, "collapse_end_turn", None),
}
)
out[sid] = {
"total_blocks": len(blocks),
"by_status": {},
"blocks": blocks,
}
for b in blocks:
s = b["status"]
out[sid]["by_status"][s] = out[sid]["by_status"].get(s, 0) + 1
return out
@app.get("/api/compaction-context")
async def api_compaction_context(
session_id: str,