feat: add challenge and red-blue competitions across API and web
This commit is contained in:
parent
f5161d9add
commit
8fd3c4bb64
77 changed files with 5355 additions and 24 deletions
56
api/services/challenge_scorer_protocol.py
Normal file
56
api/services/challenge_scorer_protocol.py
Normal file
|
|
@ -0,0 +1,56 @@
|
|||
"""
|
||||
Challenge scorer protocol and type definitions.
|
||||
|
||||
Defines the interface for custom scoring plugins that compute
|
||||
numeric scores from attempt metrics for leaderboard ranking.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Protocol, TypedDict
|
||||
|
||||
|
||||
class ScoringContext(TypedDict, total=False):
|
||||
"""Context provided to scorer plugins."""
|
||||
|
||||
tenant_id: str
|
||||
app_id: str
|
||||
workflow_id: str
|
||||
challenge_id: str
|
||||
end_user_id: str | None
|
||||
timeout_ms: int
|
||||
|
||||
|
||||
class AttemptMetrics(TypedDict, total=False):
|
||||
"""Metrics from a challenge attempt."""
|
||||
|
||||
succeeded: bool
|
||||
tokens_total: int | None
|
||||
elapsed_ms: int | None
|
||||
rating: int | None
|
||||
created_at: int | None # epoch ms
|
||||
|
||||
|
||||
class ScoringResult(TypedDict, total=False):
|
||||
"""Result returned by scorer plugin."""
|
||||
|
||||
score: float
|
||||
details: dict[str, Any] | None
|
||||
|
||||
|
||||
class ScorerProtocol(Protocol):
|
||||
"""Protocol that all scorer plugins must implement."""
|
||||
|
||||
def score(self, metrics: AttemptMetrics, config: dict[str, Any], ctx: ScoringContext) -> ScoringResult:
|
||||
"""
|
||||
Compute a numeric score from attempt metrics.
|
||||
|
||||
Args:
|
||||
metrics: Attempt metrics (tokens, time, rating, etc.)
|
||||
config: Plugin-specific configuration (from challenge.scoring_config)
|
||||
ctx: Context with tenant_id, app_id, etc.
|
||||
|
||||
Returns:
|
||||
ScoringResult with computed score and optional details
|
||||
"""
|
||||
...
|
||||
112
api/services/challenge_scorer_service.py
Normal file
112
api/services/challenge_scorer_service.py
Normal file
|
|
@ -0,0 +1,112 @@
|
|||
"""
|
||||
Challenge scorer service.
|
||||
|
||||
Loads and invokes custom scorer plugins to compute scores from attempt metrics.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from typing import Any
|
||||
|
||||
from services.challenge_scorer_protocol import AttemptMetrics, ScoringContext, ScoringResult
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class ChallengeScorerService:
|
||||
"""Service for loading and invoking custom scorer plugins."""
|
||||
|
||||
_plugin_cache: dict[str, Any] = {}
|
||||
|
||||
@classmethod
|
||||
def score_with_plugin(
|
||||
cls,
|
||||
*,
|
||||
scorer_plugin_id: str | None,
|
||||
scorer_entrypoint: str | None,
|
||||
metrics: AttemptMetrics,
|
||||
config: dict[str, Any] | None,
|
||||
ctx: ScoringContext,
|
||||
) -> ScoringResult:
|
||||
"""
|
||||
Compute score using a custom scorer plugin.
|
||||
|
||||
Args:
|
||||
scorer_plugin_id: Plugin identifier (e.g., 'builtin.weighted_scorer')
|
||||
scorer_entrypoint: Entrypoint path (e.g., 'services.scorers.weighted:WeightedScorer')
|
||||
metrics: Attempt metrics to score
|
||||
config: Plugin-specific configuration
|
||||
ctx: Scoring context
|
||||
|
||||
Returns:
|
||||
ScoringResult with computed score
|
||||
|
||||
Raises:
|
||||
ValueError: If plugin cannot be loaded or scoring fails
|
||||
"""
|
||||
if not scorer_plugin_id or not scorer_entrypoint:
|
||||
raise ValueError("scorer_plugin_id and scorer_entrypoint are required for custom scoring")
|
||||
|
||||
# Load plugin
|
||||
scorer = cls._load_plugin(scorer_plugin_id, scorer_entrypoint)
|
||||
if not scorer:
|
||||
raise ValueError(f"Failed to load scorer plugin: {scorer_plugin_id}:{scorer_entrypoint}")
|
||||
|
||||
# Invoke scorer with timeout protection
|
||||
timeout_ms = ctx.get("timeout_ms", 5000)
|
||||
try:
|
||||
# TODO: Add timeout enforcement using threading.Timer or signal.alarm
|
||||
result = scorer.score(metrics, config or {}, ctx)
|
||||
if not isinstance(result, dict) or "score" not in result:
|
||||
raise ValueError("Scorer must return a dict with 'score' key")
|
||||
return result
|
||||
except Exception as e:
|
||||
logger.error(f"Scorer plugin {scorer_plugin_id} failed: {e}", exc_info=True)
|
||||
raise ValueError(f"Scorer plugin execution failed: {e}")
|
||||
|
||||
@classmethod
|
||||
def _load_plugin(cls, plugin_id: str, entrypoint: str) -> Any:
|
||||
"""
|
||||
Load a scorer plugin by entrypoint.
|
||||
|
||||
Args:
|
||||
plugin_id: Plugin identifier for caching
|
||||
entrypoint: Python path like 'pkg.module:ClassName'
|
||||
|
||||
Returns:
|
||||
Scorer instance or None if loading fails
|
||||
"""
|
||||
cache_key = f"{plugin_id}:{entrypoint}"
|
||||
if cache_key in cls._plugin_cache:
|
||||
return cls._plugin_cache[cache_key]
|
||||
|
||||
try:
|
||||
# Parse entrypoint: 'pkg.module:ClassName'
|
||||
if ":" not in entrypoint:
|
||||
raise ValueError(f"Invalid entrypoint format: {entrypoint}. Expected 'module:ClassName'")
|
||||
|
||||
module_path, class_name = entrypoint.split(":", 1)
|
||||
|
||||
# Dynamic import
|
||||
import importlib
|
||||
|
||||
module = importlib.import_module(module_path)
|
||||
scorer_class = getattr(module, class_name)
|
||||
|
||||
# Instantiate
|
||||
scorer = scorer_class()
|
||||
|
||||
# Cache it
|
||||
cls._plugin_cache[cache_key] = scorer
|
||||
logger.info(f"Loaded scorer plugin: {plugin_id} from {entrypoint}")
|
||||
return scorer
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to load scorer plugin {plugin_id}:{entrypoint}: {e}", exc_info=True)
|
||||
return None
|
||||
|
||||
@classmethod
|
||||
def clear_cache(cls) -> None:
|
||||
"""Clear the plugin cache (useful for testing)."""
|
||||
cls._plugin_cache.clear()
|
||||
64
api/services/challenge_service.py
Normal file
64
api/services/challenge_service.py
Normal file
|
|
@ -0,0 +1,64 @@
|
|||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from collections.abc import Mapping
|
||||
from typing import Any
|
||||
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from extensions.ext_database import db
|
||||
from models.challenge import Challenge, ChallengeAttempt
|
||||
|
||||
|
||||
class ChallengeService:
|
||||
@staticmethod
|
||||
def evaluate_outcome(output_text: str, cfg: Mapping[str, Any]) -> tuple[bool, dict[str, Any]]:
|
||||
success_type = cfg.get("success_type", "regex")
|
||||
pattern = cfg.get("success_pattern")
|
||||
if success_type == "regex" and pattern:
|
||||
try:
|
||||
if re.search(pattern, output_text, flags=re.IGNORECASE | re.MULTILINE):
|
||||
return True, {"mode": "regex", "matched": True}
|
||||
return False, {"mode": "regex", "matched": False}
|
||||
except re.error as e:
|
||||
return False, {"mode": "regex", "error": f"invalid_regex: {e}"}
|
||||
if success_type == "contains" and pattern:
|
||||
return (pattern.lower() in output_text.lower()), {"mode": "contains"}
|
||||
return False, {"mode": success_type, "info": "no_pattern_or_unsupported"}
|
||||
|
||||
@staticmethod
|
||||
def record_attempt(
|
||||
*,
|
||||
tenant_id: str,
|
||||
challenge_id: str,
|
||||
end_user_id: str | None,
|
||||
account_id: str | None,
|
||||
workflow_run_id: str | None,
|
||||
succeeded: bool,
|
||||
score: float | None = None,
|
||||
judge_rating: int | None = None,
|
||||
judge_feedback: str | None = None,
|
||||
judge_output_raw: dict[str, Any] | None = None,
|
||||
tokens_total: int | None = None,
|
||||
elapsed_ms: int | None = None,
|
||||
session: Session | None = None,
|
||||
) -> ChallengeAttempt:
|
||||
sess = session or db.session
|
||||
attempt = ChallengeAttempt()
|
||||
attempt.tenant_id = tenant_id
|
||||
attempt.challenge_id = challenge_id
|
||||
attempt.end_user_id = end_user_id
|
||||
attempt.account_id = account_id
|
||||
attempt.workflow_run_id = workflow_run_id
|
||||
attempt.succeeded = succeeded
|
||||
attempt.score = score
|
||||
attempt.judge_rating = judge_rating
|
||||
attempt.judge_feedback = judge_feedback
|
||||
attempt.judge_output_raw = judge_output_raw
|
||||
attempt.tokens_total = tokens_total
|
||||
attempt.elapsed_ms = elapsed_ms
|
||||
sess.add(attempt)
|
||||
sess.commit()
|
||||
return attempt
|
||||
|
||||
|
||||
91
api/services/red_blue_service.py
Normal file
91
api/services/red_blue_service.py
Normal file
|
|
@ -0,0 +1,91 @@
|
|||
from __future__ import annotations
|
||||
|
||||
from typing import Any
|
||||
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from extensions.ext_database import db
|
||||
from models.red_blue import RedBlueChallenge, TeamPairing, TeamSubmission
|
||||
|
||||
|
||||
class RedBlueService:
|
||||
@staticmethod
|
||||
def submit_prompt(
|
||||
*,
|
||||
challenge_id: str,
|
||||
tenant_id: str,
|
||||
team: str,
|
||||
prompt: str,
|
||||
account_id: str | None,
|
||||
end_user_id: str | None,
|
||||
session: Session | None = None,
|
||||
) -> TeamSubmission:
|
||||
sess = session or db.session
|
||||
sub = TeamSubmission()
|
||||
sub.red_blue_challenge_id = challenge_id
|
||||
sub.tenant_id = tenant_id
|
||||
sub.team = team
|
||||
sub.prompt = prompt
|
||||
sub.account_id = account_id
|
||||
sub.end_user_id = end_user_id
|
||||
sess.add(sub)
|
||||
sess.commit()
|
||||
return sub
|
||||
|
||||
@staticmethod
|
||||
def select_counterparty_submission(
|
||||
*,
|
||||
challenge: RedBlueChallenge,
|
||||
team: str,
|
||||
session: Session | None = None,
|
||||
) -> TeamSubmission | None:
|
||||
sess = session or db.session
|
||||
opposite = "blue" if team == "red" else "red"
|
||||
# Simplest policy: latest active from opposite team
|
||||
return (
|
||||
sess.query(TeamSubmission)
|
||||
.filter(
|
||||
TeamSubmission.red_blue_challenge_id == challenge.id,
|
||||
TeamSubmission.team == opposite,
|
||||
TeamSubmission.active.is_(True),
|
||||
)
|
||||
.order_by(TeamSubmission.created_at.desc())
|
||||
.first()
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def record_pairing(
|
||||
*,
|
||||
challenge_id: str,
|
||||
tenant_id: str,
|
||||
attack_submission_id: str | None,
|
||||
defense_submission_id: str | None,
|
||||
judge_output_raw: dict[str, Any] | None,
|
||||
categories: dict[str, Any] | None,
|
||||
judge_rating: int | None,
|
||||
judge_feedback: str | None,
|
||||
red_points: float,
|
||||
blue_points: float,
|
||||
tokens_total: int | None,
|
||||
elapsed_ms: int | None,
|
||||
session: Session | None = None,
|
||||
) -> TeamPairing:
|
||||
sess = session or db.session
|
||||
pairing = TeamPairing()
|
||||
pairing.red_blue_challenge_id = challenge_id
|
||||
pairing.tenant_id = tenant_id
|
||||
pairing.attack_submission_id = attack_submission_id
|
||||
pairing.defense_submission_id = defense_submission_id
|
||||
pairing.judge_output_raw = judge_output_raw
|
||||
pairing.categories = categories
|
||||
pairing.judge_rating = judge_rating
|
||||
pairing.judge_feedback = judge_feedback
|
||||
pairing.red_points = red_points
|
||||
pairing.blue_points = blue_points
|
||||
pairing.tokens_total = tokens_total
|
||||
pairing.elapsed_ms = elapsed_ms
|
||||
sess.add(pairing)
|
||||
sess.commit()
|
||||
return pairing
|
||||
|
||||
|
||||
144
api/services/scorers/README.md
Normal file
144
api/services/scorers/README.md
Normal file
|
|
@ -0,0 +1,144 @@
|
|||
# Custom Scorer Plugins
|
||||
|
||||
This directory contains custom scorer plugins for challenge leaderboards.
|
||||
|
||||
## Overview
|
||||
|
||||
Scorers compute numeric scores from challenge attempt metrics (tokens, time, rating, success) for ranking on leaderboards when `scoring_strategy = 'custom'`.
|
||||
|
||||
## Built-in Scorers
|
||||
|
||||
### WeightedScorer
|
||||
|
||||
**Entrypoint:** `services.scorers.weighted:WeightedScorer`
|
||||
|
||||
Computes a weighted score combining multiple metrics with configurable bonuses and penalties.
|
||||
|
||||
**Formula:**
|
||||
```
|
||||
score = success_bonus
|
||||
+ (rating × rating_weight)
|
||||
- (elapsed_seconds × time_penalty)
|
||||
- (tokens × token_penalty)
|
||||
```
|
||||
|
||||
**Configuration:**
|
||||
- `success_bonus` (float, default: 100): Base points for successful attempts
|
||||
- `rating_weight` (float, default: 10): Multiplier for judge rating (0-10)
|
||||
- `time_penalty` (float, default: 1.0): Penalty per second elapsed
|
||||
- `token_penalty` (float, default: 0.01): Penalty per token used
|
||||
|
||||
**Example Configuration:**
|
||||
```json
|
||||
{
|
||||
"success_bonus": 100.0,
|
||||
"rating_weight": 10.0,
|
||||
"time_penalty": 1.0,
|
||||
"token_penalty": 0.01
|
||||
}
|
||||
```
|
||||
|
||||
**Example Challenge Setup (via API):**
|
||||
```python
|
||||
{
|
||||
"name": "Advanced Prompt Challenge",
|
||||
"scoring_strategy": "custom",
|
||||
"scoring_plugin_id": "builtin.weighted_scorer",
|
||||
"scoring_entrypoint": "services.scorers.weighted:WeightedScorer",
|
||||
"scoring_config": {
|
||||
"success_bonus": 100.0,
|
||||
"rating_weight": 15.0,
|
||||
"time_penalty": 0.5,
|
||||
"token_penalty": 0.02
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Creating Custom Scorers
|
||||
|
||||
### 1. Implement the ScorerProtocol
|
||||
|
||||
Create a new file in this directory (e.g., `custom.py`):
|
||||
|
||||
```python
|
||||
from typing import Any
|
||||
from services.challenge_scorer_protocol import AttemptMetrics, ScoringContext, ScoringResult
|
||||
|
||||
class MyCustomScorer:
|
||||
def score(self, metrics: AttemptMetrics, config: dict[str, Any], ctx: ScoringContext) -> ScoringResult:
|
||||
# Access metrics
|
||||
succeeded = metrics.get('succeeded', False)
|
||||
tokens = metrics.get('tokens_total', 0)
|
||||
elapsed_ms = metrics.get('elapsed_ms', 0)
|
||||
rating = metrics.get('rating', 0)
|
||||
|
||||
# Access configuration
|
||||
multiplier = config.get('multiplier', 1.0)
|
||||
|
||||
# Compute score
|
||||
score = (rating * multiplier) if succeeded else 0.0
|
||||
|
||||
return {
|
||||
'score': score,
|
||||
'details': { # optional
|
||||
'multiplier_used': multiplier
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### 2. Register in Challenge
|
||||
|
||||
Set the challenge's scoring fields:
|
||||
|
||||
```python
|
||||
challenge.scoring_strategy = 'custom'
|
||||
challenge.scoring_plugin_id = 'my_custom_scorer'
|
||||
challenge.scoring_entrypoint = 'services.scorers.custom:MyCustomScorer'
|
||||
challenge.scoring_config = {
|
||||
'multiplier': 2.0
|
||||
}
|
||||
```
|
||||
|
||||
### 3. Testing
|
||||
|
||||
Create tests in `api/tests/unit_tests/services/` following the pattern in `test_challenge_scorer_service.py`.
|
||||
|
||||
## Protocol Reference
|
||||
|
||||
### Input Types
|
||||
|
||||
**AttemptMetrics:**
|
||||
- `succeeded` (bool): Whether the challenge was passed
|
||||
- `tokens_total` (int | None): Total tokens used
|
||||
- `elapsed_ms` (int | None): Time taken in milliseconds
|
||||
- `rating` (int | None): Judge rating (0-10)
|
||||
- `created_at` (int | None): Timestamp in epoch milliseconds
|
||||
|
||||
**ScoringContext:**
|
||||
- `tenant_id` (str): Tenant identifier
|
||||
- `app_id` (str): Application identifier
|
||||
- `workflow_id` (str): Workflow identifier
|
||||
- `challenge_id` (str): Challenge identifier
|
||||
- `end_user_id` (str | None): End user identifier (if available)
|
||||
- `timeout_ms` (int): Maximum execution time
|
||||
|
||||
### Output Type
|
||||
|
||||
**ScoringResult:**
|
||||
- `score` (float, required): Computed numeric score
|
||||
- `details` (dict[str, Any] | None, optional): Additional scoring details
|
||||
|
||||
## Error Handling
|
||||
|
||||
- Scorers must return a dict with a `score` key
|
||||
- Exceptions are caught and logged; the attempt is recorded with `score=None`
|
||||
- Scorers are executed with a timeout (default: 5s)
|
||||
- Scorers should never return negative scores; use `max(score, 0.0)` to clamp
|
||||
|
||||
## Best Practices
|
||||
|
||||
1. **Keep it simple**: Scoring should be fast and deterministic
|
||||
2. **Validate config**: Check configuration values and provide defaults
|
||||
3. **Clamp scores**: Ensure scores are non-negative
|
||||
4. **Document formula**: Clearly explain how your scorer works
|
||||
5. **Test edge cases**: Test with missing metrics, zeros, nulls
|
||||
1
api/services/scorers/__init__.py
Normal file
1
api/services/scorers/__init__.py
Normal file
|
|
@ -0,0 +1 @@
|
|||
"""Built-in scorer plugins."""
|
||||
66
api/services/scorers/weighted.py
Normal file
66
api/services/scorers/weighted.py
Normal file
|
|
@ -0,0 +1,66 @@
|
|||
"""
|
||||
Weighted scorer plugin.
|
||||
|
||||
Computes a weighted score based on success bonus, rating, elapsed time, and token usage.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any
|
||||
|
||||
from services.challenge_scorer_protocol import AttemptMetrics, ScoringContext, ScoringResult
|
||||
|
||||
|
||||
class WeightedScorer:
|
||||
"""
|
||||
Example weighted scorer that combines multiple metrics.
|
||||
|
||||
Configuration options:
|
||||
- success_bonus (float): Base points for successful attempt (default: 100)
|
||||
- rating_weight (float): Multiplier for judge rating (default: 10)
|
||||
- time_penalty (float): Penalty per second elapsed (default: 1.0)
|
||||
- token_penalty (float): Penalty per token used (default: 0.01)
|
||||
|
||||
Formula:
|
||||
score = success_bonus
|
||||
+ (rating * rating_weight)
|
||||
- (elapsed_seconds * time_penalty)
|
||||
- (tokens * token_penalty)
|
||||
"""
|
||||
|
||||
def score(self, metrics: AttemptMetrics, config: dict[str, Any], ctx: ScoringContext) -> ScoringResult:
|
||||
"""Compute weighted score from metrics."""
|
||||
# Base score for success
|
||||
base = 0.0
|
||||
if metrics.get("succeeded"):
|
||||
base += config.get("success_bonus", 100.0)
|
||||
|
||||
# Add rating contribution
|
||||
rating = metrics.get("rating") or 0
|
||||
rating_weight = config.get("rating_weight", 10.0)
|
||||
rating_score = rating * rating_weight
|
||||
|
||||
# Subtract time penalty
|
||||
elapsed_ms = metrics.get("elapsed_ms") or 0
|
||||
elapsed_seconds = elapsed_ms / 1000.0
|
||||
time_penalty = config.get("time_penalty", 1.0)
|
||||
time_score = elapsed_seconds * time_penalty
|
||||
|
||||
# Subtract token penalty
|
||||
tokens = metrics.get("tokens_total") or 0
|
||||
token_penalty = config.get("token_penalty", 0.01)
|
||||
token_score = tokens * token_penalty
|
||||
|
||||
# Compute final score (never negative)
|
||||
final_score = base + rating_score - time_score - token_score
|
||||
final_score = max(final_score, 0.0)
|
||||
|
||||
return {
|
||||
"score": final_score,
|
||||
"details": {
|
||||
"base": base,
|
||||
"rating_contribution": rating_score,
|
||||
"time_penalty": time_score,
|
||||
"token_penalty": token_score,
|
||||
},
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue