feat: add challenge and red-blue competitions across API and web

This commit is contained in:
Joey Yakimowich-Payne 2025-10-01 06:49:09 -06:00
commit 8fd3c4bb64
No known key found for this signature in database
GPG key ID: 6BFE655FA5ABD1E1
77 changed files with 5355 additions and 24 deletions

View file

@ -0,0 +1,56 @@
"""
Challenge scorer protocol and type definitions.
Defines the interface for custom scoring plugins that compute
numeric scores from attempt metrics for leaderboard ranking.
"""
from __future__ import annotations
from typing import Any, Protocol, TypedDict
class ScoringContext(TypedDict, total=False):
"""Context provided to scorer plugins."""
tenant_id: str
app_id: str
workflow_id: str
challenge_id: str
end_user_id: str | None
timeout_ms: int
class AttemptMetrics(TypedDict, total=False):
"""Metrics from a challenge attempt."""
succeeded: bool
tokens_total: int | None
elapsed_ms: int | None
rating: int | None
created_at: int | None # epoch ms
class ScoringResult(TypedDict, total=False):
"""Result returned by scorer plugin."""
score: float
details: dict[str, Any] | None
class ScorerProtocol(Protocol):
"""Protocol that all scorer plugins must implement."""
def score(self, metrics: AttemptMetrics, config: dict[str, Any], ctx: ScoringContext) -> ScoringResult:
"""
Compute a numeric score from attempt metrics.
Args:
metrics: Attempt metrics (tokens, time, rating, etc.)
config: Plugin-specific configuration (from challenge.scoring_config)
ctx: Context with tenant_id, app_id, etc.
Returns:
ScoringResult with computed score and optional details
"""
...

View file

@ -0,0 +1,112 @@
"""
Challenge scorer service.
Loads and invokes custom scorer plugins to compute scores from attempt metrics.
"""
from __future__ import annotations
import logging
from typing import Any
from services.challenge_scorer_protocol import AttemptMetrics, ScoringContext, ScoringResult
logger = logging.getLogger(__name__)
class ChallengeScorerService:
"""Service for loading and invoking custom scorer plugins."""
_plugin_cache: dict[str, Any] = {}
@classmethod
def score_with_plugin(
cls,
*,
scorer_plugin_id: str | None,
scorer_entrypoint: str | None,
metrics: AttemptMetrics,
config: dict[str, Any] | None,
ctx: ScoringContext,
) -> ScoringResult:
"""
Compute score using a custom scorer plugin.
Args:
scorer_plugin_id: Plugin identifier (e.g., 'builtin.weighted_scorer')
scorer_entrypoint: Entrypoint path (e.g., 'services.scorers.weighted:WeightedScorer')
metrics: Attempt metrics to score
config: Plugin-specific configuration
ctx: Scoring context
Returns:
ScoringResult with computed score
Raises:
ValueError: If plugin cannot be loaded or scoring fails
"""
if not scorer_plugin_id or not scorer_entrypoint:
raise ValueError("scorer_plugin_id and scorer_entrypoint are required for custom scoring")
# Load plugin
scorer = cls._load_plugin(scorer_plugin_id, scorer_entrypoint)
if not scorer:
raise ValueError(f"Failed to load scorer plugin: {scorer_plugin_id}:{scorer_entrypoint}")
# Invoke scorer with timeout protection
timeout_ms = ctx.get("timeout_ms", 5000)
try:
# TODO: Add timeout enforcement using threading.Timer or signal.alarm
result = scorer.score(metrics, config or {}, ctx)
if not isinstance(result, dict) or "score" not in result:
raise ValueError("Scorer must return a dict with 'score' key")
return result
except Exception as e:
logger.error(f"Scorer plugin {scorer_plugin_id} failed: {e}", exc_info=True)
raise ValueError(f"Scorer plugin execution failed: {e}")
@classmethod
def _load_plugin(cls, plugin_id: str, entrypoint: str) -> Any:
"""
Load a scorer plugin by entrypoint.
Args:
plugin_id: Plugin identifier for caching
entrypoint: Python path like 'pkg.module:ClassName'
Returns:
Scorer instance or None if loading fails
"""
cache_key = f"{plugin_id}:{entrypoint}"
if cache_key in cls._plugin_cache:
return cls._plugin_cache[cache_key]
try:
# Parse entrypoint: 'pkg.module:ClassName'
if ":" not in entrypoint:
raise ValueError(f"Invalid entrypoint format: {entrypoint}. Expected 'module:ClassName'")
module_path, class_name = entrypoint.split(":", 1)
# Dynamic import
import importlib
module = importlib.import_module(module_path)
scorer_class = getattr(module, class_name)
# Instantiate
scorer = scorer_class()
# Cache it
cls._plugin_cache[cache_key] = scorer
logger.info(f"Loaded scorer plugin: {plugin_id} from {entrypoint}")
return scorer
except Exception as e:
logger.error(f"Failed to load scorer plugin {plugin_id}:{entrypoint}: {e}", exc_info=True)
return None
@classmethod
def clear_cache(cls) -> None:
"""Clear the plugin cache (useful for testing)."""
cls._plugin_cache.clear()

View file

@ -0,0 +1,64 @@
from __future__ import annotations
import re
from collections.abc import Mapping
from typing import Any
from sqlalchemy.orm import Session
from extensions.ext_database import db
from models.challenge import Challenge, ChallengeAttempt
class ChallengeService:
@staticmethod
def evaluate_outcome(output_text: str, cfg: Mapping[str, Any]) -> tuple[bool, dict[str, Any]]:
success_type = cfg.get("success_type", "regex")
pattern = cfg.get("success_pattern")
if success_type == "regex" and pattern:
try:
if re.search(pattern, output_text, flags=re.IGNORECASE | re.MULTILINE):
return True, {"mode": "regex", "matched": True}
return False, {"mode": "regex", "matched": False}
except re.error as e:
return False, {"mode": "regex", "error": f"invalid_regex: {e}"}
if success_type == "contains" and pattern:
return (pattern.lower() in output_text.lower()), {"mode": "contains"}
return False, {"mode": success_type, "info": "no_pattern_or_unsupported"}
@staticmethod
def record_attempt(
*,
tenant_id: str,
challenge_id: str,
end_user_id: str | None,
account_id: str | None,
workflow_run_id: str | None,
succeeded: bool,
score: float | None = None,
judge_rating: int | None = None,
judge_feedback: str | None = None,
judge_output_raw: dict[str, Any] | None = None,
tokens_total: int | None = None,
elapsed_ms: int | None = None,
session: Session | None = None,
) -> ChallengeAttempt:
sess = session or db.session
attempt = ChallengeAttempt()
attempt.tenant_id = tenant_id
attempt.challenge_id = challenge_id
attempt.end_user_id = end_user_id
attempt.account_id = account_id
attempt.workflow_run_id = workflow_run_id
attempt.succeeded = succeeded
attempt.score = score
attempt.judge_rating = judge_rating
attempt.judge_feedback = judge_feedback
attempt.judge_output_raw = judge_output_raw
attempt.tokens_total = tokens_total
attempt.elapsed_ms = elapsed_ms
sess.add(attempt)
sess.commit()
return attempt

View file

@ -0,0 +1,91 @@
from __future__ import annotations
from typing import Any
from sqlalchemy.orm import Session
from extensions.ext_database import db
from models.red_blue import RedBlueChallenge, TeamPairing, TeamSubmission
class RedBlueService:
@staticmethod
def submit_prompt(
*,
challenge_id: str,
tenant_id: str,
team: str,
prompt: str,
account_id: str | None,
end_user_id: str | None,
session: Session | None = None,
) -> TeamSubmission:
sess = session or db.session
sub = TeamSubmission()
sub.red_blue_challenge_id = challenge_id
sub.tenant_id = tenant_id
sub.team = team
sub.prompt = prompt
sub.account_id = account_id
sub.end_user_id = end_user_id
sess.add(sub)
sess.commit()
return sub
@staticmethod
def select_counterparty_submission(
*,
challenge: RedBlueChallenge,
team: str,
session: Session | None = None,
) -> TeamSubmission | None:
sess = session or db.session
opposite = "blue" if team == "red" else "red"
# Simplest policy: latest active from opposite team
return (
sess.query(TeamSubmission)
.filter(
TeamSubmission.red_blue_challenge_id == challenge.id,
TeamSubmission.team == opposite,
TeamSubmission.active.is_(True),
)
.order_by(TeamSubmission.created_at.desc())
.first()
)
@staticmethod
def record_pairing(
*,
challenge_id: str,
tenant_id: str,
attack_submission_id: str | None,
defense_submission_id: str | None,
judge_output_raw: dict[str, Any] | None,
categories: dict[str, Any] | None,
judge_rating: int | None,
judge_feedback: str | None,
red_points: float,
blue_points: float,
tokens_total: int | None,
elapsed_ms: int | None,
session: Session | None = None,
) -> TeamPairing:
sess = session or db.session
pairing = TeamPairing()
pairing.red_blue_challenge_id = challenge_id
pairing.tenant_id = tenant_id
pairing.attack_submission_id = attack_submission_id
pairing.defense_submission_id = defense_submission_id
pairing.judge_output_raw = judge_output_raw
pairing.categories = categories
pairing.judge_rating = judge_rating
pairing.judge_feedback = judge_feedback
pairing.red_points = red_points
pairing.blue_points = blue_points
pairing.tokens_total = tokens_total
pairing.elapsed_ms = elapsed_ms
sess.add(pairing)
sess.commit()
return pairing

View file

@ -0,0 +1,144 @@
# Custom Scorer Plugins
This directory contains custom scorer plugins for challenge leaderboards.
## Overview
Scorers compute numeric scores from challenge attempt metrics (tokens, time, rating, success) for ranking on leaderboards when `scoring_strategy = 'custom'`.
## Built-in Scorers
### WeightedScorer
**Entrypoint:** `services.scorers.weighted:WeightedScorer`
Computes a weighted score combining multiple metrics with configurable bonuses and penalties.
**Formula:**
```
score = success_bonus
+ (rating × rating_weight)
- (elapsed_seconds × time_penalty)
- (tokens × token_penalty)
```
**Configuration:**
- `success_bonus` (float, default: 100): Base points for successful attempts
- `rating_weight` (float, default: 10): Multiplier for judge rating (0-10)
- `time_penalty` (float, default: 1.0): Penalty per second elapsed
- `token_penalty` (float, default: 0.01): Penalty per token used
**Example Configuration:**
```json
{
"success_bonus": 100.0,
"rating_weight": 10.0,
"time_penalty": 1.0,
"token_penalty": 0.01
}
```
**Example Challenge Setup (via API):**
```python
{
"name": "Advanced Prompt Challenge",
"scoring_strategy": "custom",
"scoring_plugin_id": "builtin.weighted_scorer",
"scoring_entrypoint": "services.scorers.weighted:WeightedScorer",
"scoring_config": {
"success_bonus": 100.0,
"rating_weight": 15.0,
"time_penalty": 0.5,
"token_penalty": 0.02
}
}
```
## Creating Custom Scorers
### 1. Implement the ScorerProtocol
Create a new file in this directory (e.g., `custom.py`):
```python
from typing import Any
from services.challenge_scorer_protocol import AttemptMetrics, ScoringContext, ScoringResult
class MyCustomScorer:
def score(self, metrics: AttemptMetrics, config: dict[str, Any], ctx: ScoringContext) -> ScoringResult:
# Access metrics
succeeded = metrics.get('succeeded', False)
tokens = metrics.get('tokens_total', 0)
elapsed_ms = metrics.get('elapsed_ms', 0)
rating = metrics.get('rating', 0)
# Access configuration
multiplier = config.get('multiplier', 1.0)
# Compute score
score = (rating * multiplier) if succeeded else 0.0
return {
'score': score,
'details': { # optional
'multiplier_used': multiplier
}
}
```
### 2. Register in Challenge
Set the challenge's scoring fields:
```python
challenge.scoring_strategy = 'custom'
challenge.scoring_plugin_id = 'my_custom_scorer'
challenge.scoring_entrypoint = 'services.scorers.custom:MyCustomScorer'
challenge.scoring_config = {
'multiplier': 2.0
}
```
### 3. Testing
Create tests in `api/tests/unit_tests/services/` following the pattern in `test_challenge_scorer_service.py`.
## Protocol Reference
### Input Types
**AttemptMetrics:**
- `succeeded` (bool): Whether the challenge was passed
- `tokens_total` (int | None): Total tokens used
- `elapsed_ms` (int | None): Time taken in milliseconds
- `rating` (int | None): Judge rating (0-10)
- `created_at` (int | None): Timestamp in epoch milliseconds
**ScoringContext:**
- `tenant_id` (str): Tenant identifier
- `app_id` (str): Application identifier
- `workflow_id` (str): Workflow identifier
- `challenge_id` (str): Challenge identifier
- `end_user_id` (str | None): End user identifier (if available)
- `timeout_ms` (int): Maximum execution time
### Output Type
**ScoringResult:**
- `score` (float, required): Computed numeric score
- `details` (dict[str, Any] | None, optional): Additional scoring details
## Error Handling
- Scorers must return a dict with a `score` key
- Exceptions are caught and logged; the attempt is recorded with `score=None`
- Scorers are executed with a timeout (default: 5s)
- Scorers should never return negative scores; use `max(score, 0.0)` to clamp
## Best Practices
1. **Keep it simple**: Scoring should be fast and deterministic
2. **Validate config**: Check configuration values and provide defaults
3. **Clamp scores**: Ensure scores are non-negative
4. **Document formula**: Clearly explain how your scorer works
5. **Test edge cases**: Test with missing metrics, zeros, nulls

View file

@ -0,0 +1 @@
"""Built-in scorer plugins."""

View file

@ -0,0 +1,66 @@
"""
Weighted scorer plugin.
Computes a weighted score based on success bonus, rating, elapsed time, and token usage.
"""
from __future__ import annotations
from typing import Any
from services.challenge_scorer_protocol import AttemptMetrics, ScoringContext, ScoringResult
class WeightedScorer:
"""
Example weighted scorer that combines multiple metrics.
Configuration options:
- success_bonus (float): Base points for successful attempt (default: 100)
- rating_weight (float): Multiplier for judge rating (default: 10)
- time_penalty (float): Penalty per second elapsed (default: 1.0)
- token_penalty (float): Penalty per token used (default: 0.01)
Formula:
score = success_bonus
+ (rating * rating_weight)
- (elapsed_seconds * time_penalty)
- (tokens * token_penalty)
"""
def score(self, metrics: AttemptMetrics, config: dict[str, Any], ctx: ScoringContext) -> ScoringResult:
"""Compute weighted score from metrics."""
# Base score for success
base = 0.0
if metrics.get("succeeded"):
base += config.get("success_bonus", 100.0)
# Add rating contribution
rating = metrics.get("rating") or 0
rating_weight = config.get("rating_weight", 10.0)
rating_score = rating * rating_weight
# Subtract time penalty
elapsed_ms = metrics.get("elapsed_ms") or 0
elapsed_seconds = elapsed_ms / 1000.0
time_penalty = config.get("time_penalty", 1.0)
time_score = elapsed_seconds * time_penalty
# Subtract token penalty
tokens = metrics.get("tokens_total") or 0
token_penalty = config.get("token_penalty", 0.01)
token_score = tokens * token_penalty
# Compute final score (never negative)
final_score = base + rating_score - time_score - token_score
final_score = max(final_score, 0.0)
return {
"score": final_score,
"details": {
"base": base,
"rating_contribution": rating_score,
"time_penalty": time_score,
"token_penalty": token_score,
},
}