feat: add challenge and red-blue competitions across API and web

2025-10-01 06:49:09 -06:00 · 2025-10-01 06:49:09 -06:00 · 8fd3c4bb64
commit 8fd3c4bb64
parent f5161d9add
77 changed files with 5355 additions and 24 deletions
--- a/api/controllers/console/init.py
+++ b/api/controllers/console/init.py
@ -129,6 +129,10 @@ from .workspace import (
    workspace,
 )

+# Import custom challenge controllers
+from . import challenges as challenges
+from . import red_blue_challenges as red_blue_challenges
+
 api.add_namespace(console_ns)

 __all__ = [
@ -204,4 +208,6 @@ __all__ = [
    "workflow_run",
    "workflow_statistic",
    "workspace",
+    "challenges",
+    "red_blue_challenges",
 ]
--- a/api/controllers/console/app/app.py
+++ b/api/controllers/console/app/app.py
@ -12,7 +12,6 @@ from controllers.console.app.wraps import get_app_model
 from controllers.console.wraps import (
    account_initialization_required,
    cloud_edition_billing_resource_check,
-    enterprise_license_required,
    setup_required,
 )
 from core.ops.ops_trace_manager import OpsTraceManager
@ -53,7 +52,6 @@ class AppListApi(Resource):
    @setup_required
    @login_required
    @account_initialization_required
-    @enterprise_license_required
    def get(self):
        """Get app list"""

@ -166,7 +164,6 @@ class AppApi(Resource):
    @setup_required
    @login_required
    @account_initialization_required
-    @enterprise_license_required
    @get_app_model
    @marshal_with(app_detail_fields_with_site)
    def get(self, app_model):
--- a/api/controllers/console/challenges.py
+++ b/api/controllers/console/challenges.py
@ -0,0 +1,154 @@
+from __future__ import annotations
+
+from flask_restx import Resource, reqparse
+
+from controllers.console import console_ns as api
+from controllers.console.wraps import (
+    account_initialization_required,
+    setup_required,
+)
+from libs.login import login_required
+from extensions.ext_database import db
+from libs.login import current_user
+from models.challenge import Challenge
+
+
+@api.route("/challenges")
+class ChallengeListCreateApi(Resource):
+    @api.doc("list_challenges")
+    @setup_required
+    @login_required
+    @account_initialization_required
+    def get(self):
+        tenant_id = current_user.current_tenant_id
+        if not tenant_id:
+            # no active workspace selected; return empty list to avoid leaking data
+            return {"result": "success", "data": []}
+        rows = (
+            db.session.query(Challenge)
+            .filter(Challenge.tenant_id == tenant_id)
+            .order_by(Challenge.created_at.desc())
+            .all()
+        )
+        return {
+            "result": "success",
+            "data": [
+                {
+                    "id": r.id,
+                    "name": r.name,
+                    "description": r.description,
+                    "goal": r.goal,
+                    "is_active": r.is_active,
+                    "success_type": r.success_type,
+                    "success_pattern": r.success_pattern,
+                    "scoring_strategy": r.scoring_strategy,
+                    "app_id": r.app_id,
+                    "workflow_id": r.workflow_id,
+                }
+                for r in rows
+            ],
+        }
+
+    @api.doc("create_challenge")
+    @setup_required
+    @login_required
+    @account_initialization_required
+    def post(self):
+        parser = reqparse.RequestParser()
+        parser.add_argument("tenant_id", type=str, required=False, location="json")
+        parser.add_argument("app_id", type=str, required=True, location="json")
+        parser.add_argument("workflow_id", type=str, required=False, location="json")
+        parser.add_argument("name", type=str, required=True, location="json")
+        parser.add_argument("description", type=str, required=False, location="json")
+        parser.add_argument("goal", type=str, required=False, location="json")
+        parser.add_argument("success_type", type=str, required=False, location="json")
+        parser.add_argument("success_pattern", type=str, required=False, location="json")
+        parser.add_argument("scoring_strategy", type=str, required=False, location="json")
+        parser.add_argument("is_active", type=bool, required=False, location="json")
+        args = parser.parse_args()
+
+        c = Challenge()
+        c.tenant_id = args.get("tenant_id") or current_user.current_tenant_id
+        c.app_id = args["app_id"]
+        # Convert empty string to None for UUID field
+        workflow_id = args.get("workflow_id")
+        c.workflow_id = workflow_id if workflow_id else None
+        c.name = args["name"]
+        c.description = args.get("description")
+        c.goal = args.get("goal")
+        if args.get("success_type"):
+            c.success_type = args["success_type"]
+        c.success_pattern = args.get("success_pattern")
+        if args.get("scoring_strategy"):
+            c.scoring_strategy = args["scoring_strategy"]
+        if args.get("is_active") is not None:
+            c.is_active = args["is_active"]
+        db.session.add(c)
+        db.session.commit()
+        return {"result": "success", "data": {"id": c.id}}, 201
+
+
+@api.route("/challenges/<uuid:challenge_id>")
+class ChallengeDetailApi(Resource):
+    @api.doc("get_challenge")
+    @setup_required
+    @login_required
+    @account_initialization_required
+    def get(self, challenge_id):
+        c = db.session.get(Challenge, str(challenge_id))
+        if not c:
+            return {"result": "not_found"}, 404
+        return {
+            "result": "success",
+            "data": {
+                "id": c.id,
+                "name": c.name,
+                "description": c.description,
+                "goal": c.goal,
+                "is_active": c.is_active,
+                "success_type": c.success_type,
+                "success_pattern": c.success_pattern,
+                "scoring_strategy": c.scoring_strategy,
+                "app_id": c.app_id,
+                "workflow_id": c.workflow_id,
+            },
+        }
+
+    @api.doc("update_challenge")
+    @setup_required
+    @login_required
+    @account_initialization_required
+    def patch(self, challenge_id):
+        c = db.session.get(Challenge, str(challenge_id))
+        if not c:
+            return {"result": "not_found"}, 404
+        parser = reqparse.RequestParser()
+        parser.add_argument("name", type=str, required=False, location="json")
+        parser.add_argument("description", type=str, required=False, location="json")
+        parser.add_argument("goal", type=str, required=False, location="json")
+        parser.add_argument("is_active", type=bool, required=False, location="json")
+        args = parser.parse_args()
+        if args.get("name"):
+            c.name = args["name"]
+        if args.get("description") is not None:
+            c.description = args["description"]
+        if args.get("goal") is not None:
+            c.goal = args["goal"]
+        if args.get("is_active") is not None:
+            c.is_active = bool(args["is_active"])
+        db.session.commit()
+        return {"result": "success"}
+
+    @api.doc("delete_challenge")
+    @setup_required
+    @login_required
+    @account_initialization_required
+    def delete(self, challenge_id):
+        c = db.session.get(Challenge, str(challenge_id))
+        if not c:
+            return {"result": "not_found"}, 404
+        db.session.delete(c)
+        db.session.commit()
+        return {"result": "success"}, 204
+
+
--- a/api/controllers/console/datasets/metadata.py
+++ b/api/controllers/console/datasets/metadata.py
@ -5,7 +5,7 @@ from flask_restx import Resource, marshal_with, reqparse
 from werkzeug.exceptions import NotFound

 from controllers.console import console_ns
-from controllers.console.wraps import account_initialization_required, enterprise_license_required, setup_required
+from controllers.console.wraps import account_initialization_required, setup_required
 from fields.dataset_fields import dataset_metadata_fields
 from libs.login import login_required
 from services.dataset_service import DatasetService
@ -21,7 +21,6 @@ class DatasetMetadataCreateApi(Resource):
    @setup_required
    @login_required
    @account_initialization_required
-    @enterprise_license_required
    @marshal_with(dataset_metadata_fields)
    def post(self, dataset_id):
        parser = reqparse.RequestParser()
@ -42,7 +41,6 @@ class DatasetMetadataCreateApi(Resource):
    @setup_required
    @login_required
    @account_initialization_required
-    @enterprise_license_required
    def get(self, dataset_id):
        dataset_id_str = str(dataset_id)
        dataset = DatasetService.get_dataset(dataset_id_str)
@ -56,7 +54,6 @@ class DatasetMetadataApi(Resource):
    @setup_required
    @login_required
    @account_initialization_required
-    @enterprise_license_required
    @marshal_with(dataset_metadata_fields)
    def patch(self, dataset_id, metadata_id):
        parser = reqparse.RequestParser()
@ -77,7 +74,6 @@ class DatasetMetadataApi(Resource):
    @setup_required
    @login_required
    @account_initialization_required
-    @enterprise_license_required
    def delete(self, dataset_id, metadata_id):
        dataset_id_str = str(dataset_id)
        metadata_id_str = str(metadata_id)
@ -95,7 +91,6 @@ class DatasetMetadataBuiltInFieldApi(Resource):
    @setup_required
    @login_required
    @account_initialization_required
-    @enterprise_license_required
    def get(self):
        built_in_fields = MetadataService.get_built_in_fields()
        return {"fields": built_in_fields}, 200
@ -106,7 +101,6 @@ class DatasetMetadataBuiltInFieldActionApi(Resource):
    @setup_required
    @login_required
    @account_initialization_required
-    @enterprise_license_required
    def post(self, dataset_id, action: Literal["enable", "disable"]):
        dataset_id_str = str(dataset_id)
        dataset = DatasetService.get_dataset(dataset_id_str)
@ -126,7 +120,6 @@ class DocumentMetadataEditApi(Resource):
    @setup_required
    @login_required
    @account_initialization_required
-    @enterprise_license_required
    def post(self, dataset_id):
        dataset_id_str = str(dataset_id)
        dataset = DatasetService.get_dataset(dataset_id_str)
--- a/api/controllers/console/datasets/rag_pipeline/rag_pipeline.py
+++ b/api/controllers/console/datasets/rag_pipeline/rag_pipeline.py
@ -7,7 +7,6 @@ from sqlalchemy.orm import Session
 from controllers.console import console_ns
 from controllers.console.wraps import (
    account_initialization_required,
-    enterprise_license_required,
    knowledge_pipeline_publish_enabled,
    setup_required,
 )
@ -37,7 +36,6 @@ class PipelineTemplateListApi(Resource):
    @setup_required
    @login_required
    @account_initialization_required
-    @enterprise_license_required
    def get(self):
        type = request.args.get("type", default="built-in", type=str)
        language = request.args.get("language", default="en-US", type=str)
@ -51,7 +49,6 @@ class PipelineTemplateDetailApi(Resource):
    @setup_required
    @login_required
    @account_initialization_required
-    @enterprise_license_required
    def get(self, template_id: str):
        type = request.args.get("type", default="built-in", type=str)
        rag_pipeline_service = RagPipelineService()
@ -64,7 +61,6 @@ class CustomizedPipelineTemplateApi(Resource):
    @setup_required
    @login_required
    @account_initialization_required
-    @enterprise_license_required
    def patch(self, template_id: str):
        parser = reqparse.RequestParser()
        parser.add_argument(
@ -95,7 +91,6 @@ class CustomizedPipelineTemplateApi(Resource):
    @setup_required
    @login_required
    @account_initialization_required
-    @enterprise_license_required
    def delete(self, template_id: str):
        RagPipelineService.delete_customized_pipeline_template(template_id)
        return 200
@ -103,7 +98,6 @@ class CustomizedPipelineTemplateApi(Resource):
    @setup_required
    @login_required
    @account_initialization_required
-    @enterprise_license_required
    def post(self, template_id: str):
        with Session(db.engine) as session:
            template = (
@ -120,7 +114,6 @@ class PublishCustomizedPipelineTemplateApi(Resource):
    @setup_required
    @login_required
    @account_initialization_required
-    @enterprise_license_required
    @knowledge_pipeline_publish_enabled
    def post(self, pipeline_id: str):
        parser = reqparse.RequestParser()
--- a/api/controllers/console/red_blue_challenges.py
+++ b/api/controllers/console/red_blue_challenges.py
@ -0,0 +1,145 @@
+from __future__ import annotations
+
+from flask_restx import Resource, reqparse
+
+from controllers.console import console_ns as api
+from controllers.console.wraps import (
+    account_initialization_required,
+    setup_required,
+)
+from extensions.ext_database import db
+from libs.login import current_user, login_required
+from models.red_blue import RedBlueChallenge, TeamPairing
+
+
+@api.route("/red-blue-challenges")
+class RedBlueListCreateApi(Resource):
+    @api.doc("list_red_blue_challenges")
+    @setup_required
+    @login_required
+    @account_initialization_required
+    def get(self):
+        tenant_id = current_user.current_tenant_id
+        if not tenant_id:
+            return {"result": "success", "data": []}
+        rows = (
+            db.session.query(RedBlueChallenge)
+            .filter(RedBlueChallenge.tenant_id == tenant_id)
+            .order_by(RedBlueChallenge.created_at.desc())
+            .all()
+        )
+        return {
+            "result": "success",
+            "data": [
+                {
+                    "id": r.id,
+                    "name": r.name,
+                    "description": r.description,
+                    "is_active": r.is_active,
+                }
+                for r in rows
+            ],
+        }
+
+    @api.doc("create_red_blue_challenge")
+    @setup_required
+    @login_required
+    @account_initialization_required
+    def post(self):
+        parser = reqparse.RequestParser()
+        parser.add_argument("tenant_id", type=str, required=True, location="json")
+        parser.add_argument("app_id", type=str, required=True, location="json")
+        parser.add_argument("name", type=str, required=True, location="json")
+        parser.add_argument("description", type=str, required=False, location="json")
+        parser.add_argument("judge_suite", type=dict, required=True, location="json")
+        args = parser.parse_args()
+
+        c = RedBlueChallenge()
+        c.tenant_id = args.get("tenant_id") or current_user.current_tenant_id
+        c.app_id = args["app_id"]
+        c.name = args["name"]
+        c.description = args.get("description")
+        c.judge_suite = args["judge_suite"]
+        db.session.add(c)
+        db.session.commit()
+        return {"result": "success", "data": {"id": c.id}}, 201
+
+
+@api.route("/red-blue-challenges/<uuid:challenge_id>")
+class RedBlueDetailApi(Resource):
+    @api.doc("get_red_blue_challenge")
+    @setup_required
+    @login_required
+    @account_initialization_required
+    def get(self, challenge_id):
+        c = db.session.get(RedBlueChallenge, str(challenge_id))
+        if not c:
+            return {"result": "not_found"}, 404
+        return {
+            "result": "success",
+            "data": {"id": c.id, "name": c.name, "description": c.description, "is_active": c.is_active},
+        }
+
+    @api.doc("update_red_blue_challenge")
+    @setup_required
+    @login_required
+    @account_initialization_required
+    def patch(self, challenge_id):
+        c = db.session.get(RedBlueChallenge, str(challenge_id))
+        if not c:
+            return {"result": "not_found"}, 404
+        parser = reqparse.RequestParser()
+        parser.add_argument("name", type=str, required=False, location="json")
+        parser.add_argument("description", type=str, required=False, location="json")
+        parser.add_argument("is_active", type=bool, required=False, location="json")
+        args = parser.parse_args()
+        if args.get("name"):
+            c.name = args["name"]
+        if args.get("description") is not None:
+            c.description = args["description"]
+        if args.get("is_active") is not None:
+            c.is_active = bool(args["is_active"])
+        db.session.commit()
+        return {"result": "success"}
+
+    @api.doc("delete_red_blue_challenge")
+    @setup_required
+    @login_required
+    @account_initialization_required
+    def delete(self, challenge_id):
+        c = db.session.get(RedBlueChallenge, str(challenge_id))
+        if not c:
+            return {"result": "not_found"}, 404
+        db.session.delete(c)
+        db.session.commit()
+        return {"result": "success"}, 204
+
+
+@api.route("/red-blue-challenges/<uuid:challenge_id>/pairings")
+class RedBluePairingsApi(Resource):
+    @api.doc("list_red_blue_pairings")
+    @setup_required
+    @login_required
+    @account_initialization_required
+    def get(self, challenge_id):
+        rows = (
+            db.session.query(TeamPairing)
+            .filter(TeamPairing.red_blue_challenge_id == str(challenge_id))
+            .order_by(TeamPairing.created_at.desc())
+            .limit(100)
+            .all()
+        )
+        return {
+            "result": "success",
+            "data": [
+                {
+                    "id": r.id,
+                    "red_points": r.red_points,
+                    "blue_points": r.blue_points,
+                    "judge_rating": r.judge_rating,
+                    "created_at": r.created_at.isoformat() if hasattr(r.created_at, "isoformat") else None,
+                }
+                for r in rows
+            ],
+        }
+
--- a/api/controllers/console/workspace/account.py
+++ b/api/controllers/console/workspace/account.py
@ -29,7 +29,6 @@ from controllers.console.wraps import (
    account_initialization_required,
    cloud_edition_billing_enabled,
    enable_change_email,
-    enterprise_license_required,
    only_edition_cloud,
    setup_required,
 )
@ -102,7 +101,6 @@ class AccountProfileApi(Resource):
    @login_required
    @account_initialization_required
    @marshal_with(account_fields)
-    @enterprise_license_required
    def get(self):
        if not isinstance(current_user, Account):
            raise ValueError("Invalid user account")
--- a/api/controllers/console/workspace/tool_providers.py
+++ b/api/controllers/console/workspace/tool_providers.py
@ -13,7 +13,6 @@ from configs import dify_config
 from controllers.console import api
 from controllers.console.wraps import (
    account_initialization_required,
-    enterprise_license_required,
    setup_required,
 )
 from core.mcp.auth.auth_flow import auth, handle_callback
@ -667,7 +666,6 @@ class ToolLabelsApi(Resource):
    @setup_required
    @login_required
    @account_initialization_required
-    @enterprise_license_required
    def get(self):
        return jsonable_encoder(ToolLabelsService.list_tool_labels())

--- a/api/controllers/web/init.py
+++ b/api/controllers/web/init.py
@ -24,12 +24,15 @@ from . import (
    files,
    forgot_password,
    login,
+    register,
    message,
    passport,
    remote_files,
    saved_message,
    site,
    workflow,
+    challenges,
+    red_blue_challenges,
 )

 api.add_namespace(web_ns)
@ -45,6 +48,7 @@ __all__ = [
    "files",
    "forgot_password",
    "login",
+    "register",
    "message",
    "passport",
    "remote_files",
@ -52,4 +56,6 @@ __all__ = [
    "site",
    "web_ns",
    "workflow",
+    "challenges",
+    "red_blue_challenges",
 ]
--- a/api/controllers/web/challenges.py
+++ b/api/controllers/web/challenges.py
@ -0,0 +1,121 @@
+from __future__ import annotations
+
+from flask_restx import Resource
+
+from controllers.web import web_ns
+from extensions.ext_database import db
+from sqlalchemy import select
+
+from models.challenge import Challenge, ChallengeAttempt
+from models.model import App, Site
+
+
+@web_ns.route("/challenges")
+class ChallengeListApi(Resource):
+    def get(self):
+        q = db.session.query(Challenge).filter(Challenge.is_active.is_(True)).order_by(Challenge.created_at.desc())
+        items = []
+        for c in q.all():
+            app = db.session.get(App, c.app_id) if c.app_id else None
+            site_code = None
+            if c.app_id:
+                site = db.session.execute(
+                    select(Site).where(Site.app_id == c.app_id, Site.status == "normal")
+                ).scalar_one_or_none()
+                site_code = site.code if site else None
+            items.append({
+                "id": c.id,
+                "name": c.name,
+                "description": c.description,
+                "goal": c.goal,
+                "app_id": c.app_id,
+                "workflow_id": c.workflow_id,
+                "app_mode": app.mode if app else None,
+                "app_site_code": site_code,
+            })
+        return {"result": "success", "data": items}
+
+
+@web_ns.route("/challenges/<uuid:challenge_id>")
+class ChallengeDetailApi(Resource):
+    def get(self, challenge_id):
+        c = db.session.get(Challenge, str(challenge_id))
+        if not c:
+            return {"result": "not_found"}, 404
+
+        app = db.session.get(App, c.app_id) if c.app_id else None
+        site_code = None
+        if c.app_id:
+            site = db.session.execute(
+                select(Site).where(Site.app_id == c.app_id, Site.status == "normal")
+            ).scalar_one_or_none()
+            site_code = site.code if site else None
+        data = {
+            "id": c.id,
+            "name": c.name,
+            "description": c.description,
+            "goal": c.goal,
+            "is_active": c.is_active,
+            "app_id": c.app_id,
+            "workflow_id": c.workflow_id,
+            "app_mode": app.mode if app else None,
+            "app_site_code": site_code,
+        }
+        return {"result": "success", "data": data}
+
+
+@web_ns.route("/challenges/<uuid:challenge_id>/leaderboard")
+class ChallengeLeaderboardApi(Resource):
+    def get(self, challenge_id):
+        limit = 20
+
+        # Get the challenge to determine scoring strategy
+        challenge = db.session.get(Challenge, str(challenge_id))
+        if not challenge:
+            return {"result": "not_found"}, 404
+
+        scoring_strategy = challenge.scoring_strategy or 'highest_rating'
+
+        # Build query based on scoring strategy
+        q = db.session.query(ChallengeAttempt).filter(
+            ChallengeAttempt.challenge_id == str(challenge_id),
+            ChallengeAttempt.succeeded.is_(True)
+        )
+
+        # Apply sorting based on strategy
+        if scoring_strategy == 'first':
+            # Earliest successful attempt wins
+            q = q.order_by(ChallengeAttempt.created_at.asc())
+        elif scoring_strategy == 'fastest':
+            # Lowest elapsed_ms wins
+            q = q.order_by(ChallengeAttempt.elapsed_ms.asc().nullslast(), ChallengeAttempt.created_at.asc())
+        elif scoring_strategy == 'fewest_tokens':
+            # Lowest tokens_total wins
+            q = q.order_by(ChallengeAttempt.tokens_total.asc().nullslast(), ChallengeAttempt.created_at.asc())
+        elif scoring_strategy == 'highest_rating':
+            # Highest judge_rating wins, ties broken by earliest
+            q = q.order_by(ChallengeAttempt.judge_rating.desc().nullslast(), ChallengeAttempt.created_at.asc())
+        elif scoring_strategy == 'custom':
+            # Custom score field (computed by plugin)
+            q = q.order_by(ChallengeAttempt.score.desc().nullslast(), ChallengeAttempt.created_at.asc())
+        else:
+            # Default to highest_rating
+            q = q.order_by(ChallengeAttempt.judge_rating.desc().nullslast(), ChallengeAttempt.created_at.asc())
+
+        rows = q.limit(limit).all()
+        data = [
+            {
+                "attempt_id": r.id,
+                "account_id": r.account_id,
+                "end_user_id": r.end_user_id,
+                "score": r.score,
+                "judge_rating": r.judge_rating,
+                "tokens_total": r.tokens_total,
+                "elapsed_ms": r.elapsed_ms,
+                "created_at": r.created_at.isoformat() if hasattr(r.created_at, "isoformat") else None,
+            }
+            for r in rows
+        ]
+        return {"result": "success", "data": data}
+
+
--- a/api/controllers/web/red_blue_challenges.py
+++ b/api/controllers/web/red_blue_challenges.py
@ -0,0 +1,85 @@
+from __future__ import annotations
+
+from flask import request
+from flask_restx import Resource
+
+from controllers.web import web_ns
+from extensions.ext_database import db
+from models.red_blue import RedBlueChallenge, TeamPairing
+from services.red_blue_service import RedBlueService
+
+
+@web_ns.route("/red-blue-challenges")
+class RedBlueListApi(Resource):
+    def get(self):
+        q = db.session.query(RedBlueChallenge).filter(RedBlueChallenge.is_active.is_(True))
+        items = [
+            {
+                "id": c.id,
+                "name": c.name,
+                "description": c.description,
+            }
+            for c in q.all()
+        ]
+        return {"result": "success", "data": items}
+
+
+@web_ns.route("/red-blue-challenges/<uuid:challenge_id>")
+class RedBlueDetailApi(Resource):
+    def get(self, challenge_id):
+        c = db.session.get(RedBlueChallenge, str(challenge_id))
+        if not c:
+            return {"result": "not_found"}, 404
+        data = {
+            "id": c.id,
+            "name": c.name,
+            "description": c.description,
+        }
+        return {"result": "success", "data": data}
+
+
+@web_ns.route("/red-blue-challenges/<uuid:challenge_id>/submit")
+class RedBlueSubmitApi(Resource):
+    def post(self, challenge_id):
+        payload = request.get_json(force=True) or {}
+        team = payload.get("team")
+        prompt = payload.get("prompt")
+        if team not in ("red", "blue") or not prompt:
+            return {"result": "bad_request"}, 400
+        c = db.session.get(RedBlueChallenge, str(challenge_id))
+        if not c:
+            return {"result": "not_found"}, 404
+        sub = RedBlueService.submit_prompt(
+            challenge_id=str(challenge_id),
+            tenant_id=c.tenant_id,
+            team=team,
+            prompt=prompt,
+            account_id=None,
+            end_user_id=None,
+        )
+        return {"result": "success", "data": {"id": sub.id}}, 201
+
+
+@web_ns.route("/red-blue-challenges/<uuid:challenge_id>/leaderboard")
+class RedBlueLeaderboardApi(Resource):
+    def get(self, challenge_id):
+        # aggregate simple totals
+        red = (
+            db.session.query(db.func.coalesce(db.func.sum(TeamPairing.red_points), 0.0))
+            .filter(TeamPairing.red_blue_challenge_id == str(challenge_id))
+            .scalar()
+        )
+        blue = (
+            db.session.query(db.func.coalesce(db.func.sum(TeamPairing.blue_points), 0.0))
+            .filter(TeamPairing.red_blue_challenge_id == str(challenge_id))
+            .scalar()
+        )
+        total = (red or 0.0) + (blue or 0.0)
+        data = {
+            "red_points": float(red or 0.0),
+            "blue_points": float(blue or 0.0),
+            "red_ratio": (float(red or 0.0) / total) if total else 0.0,
+            "blue_ratio": (float(blue or 0.0) / total) if total else 0.0,
+        }
+        return {"result": "success", "data": data}
+
--- a/api/controllers/web/register.py
+++ b/api/controllers/web/register.py
@ -0,0 +1,30 @@
+from __future__ import annotations
+
+from flask import request
+from flask_restx import Resource
+
+from controllers.web import web_ns
+from extensions.ext_database import db
+from services.account_service import RegisterService
+
+
+@web_ns.route('/register')
+class WebRegisterApi(Resource):
+    def post(self):
+        payload = request.get_json(force=True) or {}
+        email = payload.get('email')
+        name = payload.get('name') or 'Player'
+        password = payload.get('password')
+        if not email or not password:
+            return { 'result': 'bad_request' }, 400
+        account = RegisterService.register(
+            email=email,
+            name=name,
+            password=password,
+            is_setup=False,
+            create_workspace_required=False,
+        )
+        db.session.commit()
+        return { 'result': 'success', 'data': { 'account_id': account.id } }, 201
+
+
--- a/api/core/workflow/enums.py
+++ b/api/core/workflow/enums.py
@ -58,6 +58,9 @@ class NodeType(StrEnum):
    DOCUMENT_EXTRACTOR = "document-extractor"
    LIST_OPERATOR = "list-operator"
    AGENT = "agent"
+    CHALLENGE_EVALUATOR = "challenge-evaluator"
+    JUDGING_LLM = "judging-llm"
+    TEAM_CHALLENGE = "team-challenge"


 class NodeExecutionType(StrEnum):
--- a/api/core/workflow/nodes/challenge_evaluator/init.py
+++ b/api/core/workflow/nodes/challenge_evaluator/init.py
@ -0,0 +1,3 @@
+from .node import ChallengeEvaluatorNode
+
+__all__ = ['ChallengeEvaluatorNode']
--- a/api/core/workflow/nodes/challenge_evaluator/node.py
+++ b/api/core/workflow/nodes/challenge_evaluator/node.py
@ -0,0 +1,258 @@
+# pyright: reportImplicitRelativeImport=none
+
+from __future__ import annotations
+
+import logging
+import time
+from collections.abc import Mapping
+from typing import Any
+
+from core.variables.segments import Segment
+from core.workflow.enums import ErrorStrategy, NodeExecutionType, NodeType, WorkflowNodeExecutionStatus
+from core.workflow.node_events import NodeRunResult
+from core.workflow.nodes.base.entities import BaseNodeData, RetryConfig
+from core.workflow.nodes.base.node import Node
+from extensions.ext_database import db
+from models.challenge import Challenge
+from services.challenge_scorer_service import ChallengeScorerService
+from services.challenge_service import ChallengeService
+
+logger = logging.getLogger(__name__)
+
+
+class ChallengeEvaluatorNode(Node):
+    node_type = NodeType.CHALLENGE_EVALUATOR
+    execution_type = NodeExecutionType.EXECUTABLE
+
+    _node_data: BaseNodeData
+
+    def init_node_data(self, data: Mapping[str, Any]):
+        # Using BaseNodeData to carry title/desc; node data is accessed directly
+        self._node_data = BaseNodeData.model_validate(data)
+        self._config: dict[str, Any] = data
+
+    def _get_error_strategy(self) -> ErrorStrategy | None:
+        return getattr(self._node_data, 'error_strategy', None)
+
+    def _get_retry_config(self) -> RetryConfig:
+        return getattr(self._node_data, 'retry_config', RetryConfig())
+
+    def _get_title(self) -> str:
+        return getattr(self._node_data, 'title', 'Challenge Evaluator')
+
+    def _get_description(self) -> str | None:
+        return getattr(self._node_data, 'desc', None)
+
+    def _get_default_value_dict(self) -> dict[str, Any]:
+        return getattr(self._node_data, 'default_value_dict', {})
+
+    def get_base_node_data(self) -> BaseNodeData:
+        return self._node_data
+
+    @classmethod
+    def version(cls) -> str:
+        return "1"
+
+    def _run(self) -> NodeRunResult:
+        # Resolve response text from selector in config.inputs.response (frontend schema)
+        output_text = ''
+        source_selector = None
+        inputs_cfg = self._config.get('inputs') or {}
+        if isinstance(inputs_cfg, dict):
+            source_selector = inputs_cfg.get('response')
+        # fallback to older key if any
+        source_selector = source_selector or self._config.get('value_selector')
+
+        # Check evaluation mode from config
+        evaluation_mode = self._config.get('evaluation_mode', 'rules')
+
+        logger.info("ChallengeEvaluator - evaluation_mode: %s, source_selector: %s", evaluation_mode, source_selector)
+
+        # Initialize judge variables
+        is_judge_input = False
+        judge_passed = False
+        judge_rating = 0
+        judge_feedback_from_input = ''
+        output_text = ''
+
+        def _segment_to_value(segment: Segment | None) -> Any:
+            if segment is None:
+                return None
+            if hasattr(segment, "to_object"):
+                try:
+                    return segment.to_object()
+                except Exception:  # pragma: no cover - defensive
+                    pass
+            return getattr(segment, "value", segment)
+
+        # If evaluation_mode is 'llm-judge', try to read from upstream Judging LLM node
+        if evaluation_mode == 'llm-judge' and source_selector and len(source_selector) >= 1:
+            try:
+                node_id = source_selector[0]
+                # Retrieve judge outputs as Segments and convert to primitive values
+                passed_segment = self.graph_runtime_state.variable_pool.get([node_id, 'judge_passed'])
+                rating_segment = self.graph_runtime_state.variable_pool.get([node_id, 'judge_rating'])
+                feedback_segment = self.graph_runtime_state.variable_pool.get([node_id, 'judge_feedback'])
+
+                potential_judge_passed = _segment_to_value(passed_segment)
+                potential_judge_rating = _segment_to_value(rating_segment)
+                potential_judge_feedback = _segment_to_value(feedback_segment)
+
+                logger.info(
+                    "ChallengeEvaluator - Reading judge outputs: passed=%s, rating=%s, feedback=%s",
+                    potential_judge_passed,
+                    potential_judge_rating,
+                    potential_judge_feedback,
+                )
+
+                # If judge_passed exists, we successfully read from a Judging LLM node
+                if potential_judge_passed is not None:
+                    is_judge_input = True
+                    judge_passed = bool(potential_judge_passed)
+                    judge_rating = int(potential_judge_rating or 0)
+                    judge_feedback_from_input = str(potential_judge_feedback or '')
+                    logger.info(
+                        "ChallengeEvaluator - Judge input successfully read! passed=%s, rating=%s, feedback=%s",
+                        judge_passed,
+                        judge_rating,
+                        judge_feedback_from_input,
+                    )
+            except Exception as e:
+                logger.error("ChallengeEvaluator - Error reading judge outputs: %s", e, exc_info=True)
+                is_judge_input = False
+
+        # If not using judge input, get text output for rules-based evaluation
+        if not is_judge_input and source_selector:
+            try:
+                segment = self.graph_runtime_state.variable_pool.get(source_selector)
+                if segment is None:
+                    output_text = ''
+                elif hasattr(segment, 'text'):
+                    output_text = segment.text
+                else:
+                    output_text = str(_segment_to_value(segment) or '')
+            except Exception:
+                output_text = ''
+
+        # Evaluate based on mode
+        if is_judge_input:
+            ok = judge_passed
+            details = {
+                'mode': 'llm-judge',
+                'rating': judge_rating,
+                'feedback': judge_feedback_from_input,
+            }
+        else:
+            # Rules-based evaluation (only if not using judge input)
+            ok, details = ChallengeService.evaluate_outcome(output_text, self._config)
+
+        # optional persistence if config carries challenge_id
+        challenge_id = self._config.get('challenge_id')
+        if challenge_id:
+            try:
+                # Calculate elapsed time in milliseconds
+                elapsed_ms = int((time.time() - self.graph_runtime_state.start_at) * 1000)
+
+                # Get total tokens used in the workflow so far
+                tokens_total = self.graph_runtime_state.total_tokens
+
+                # Extract judge_rating from details if available (for highest_rating strategy)
+                judge_rating = None
+                judge_feedback = None
+                if isinstance(details, dict):
+                    judge_rating = details.get('rating')
+                    judge_feedback = details.get('feedback')
+
+                # Load challenge to check scoring strategy
+                challenge = db.session.get(Challenge, str(challenge_id))
+
+                # Score field is reserved for custom scoring plugins.
+                # For built-in strategies (first, fastest, fewest_tokens, highest_rating),
+                # the leaderboard sorts by specific columns (created_at, elapsed_ms, tokens_total, judge_rating).
+                score = None
+
+                # If custom scoring is configured, compute score using plugin
+                if challenge and challenge.scoring_strategy == 'custom':
+                    try:
+                        metrics = {
+                            'succeeded': ok,
+                            'tokens_total': tokens_total,
+                            'elapsed_ms': elapsed_ms,
+                            'rating': judge_rating,
+                            'created_at': int(time.time() * 1000),
+                        }
+
+                        ctx = {
+                            'tenant_id': self.tenant_id,
+                            'app_id': self.app_id,
+                            'workflow_id': self.workflow_id,
+                            'challenge_id': str(challenge_id),
+                            'end_user_id': None,
+                            'timeout_ms': 5000,
+                        }
+
+                        result = ChallengeScorerService.score_with_plugin(
+                            scorer_plugin_id=challenge.scoring_plugin_id,
+                            scorer_entrypoint=challenge.scoring_entrypoint,
+                            metrics=metrics,
+                            config=challenge.scoring_config or {},
+                            ctx=ctx,
+                        )
+
+                        score = result.get('score')
+                        logger.info(
+                            "Custom scorer computed score: %s (details: %s)",
+                            score,
+                            result.get('details'),
+                        )
+                    except Exception as e:
+                        logger.error("Custom scorer failed: %s", e, exc_info=True)
+                        # Continue with score=None on error
+
+                ChallengeService.record_attempt(
+                    tenant_id=self.tenant_id,
+                    challenge_id=challenge_id,
+                    end_user_id=None,
+                    account_id=None,
+                    workflow_run_id=None,
+                    succeeded=ok,
+                    score=score,
+                    judge_rating=judge_rating,
+                    judge_feedback=judge_feedback,
+                    tokens_total=tokens_total,
+                    elapsed_ms=elapsed_ms,
+                    session=db.session,
+                )
+            except Exception:
+                # do not crash the workflow if recording fails
+                pass
+
+        # Always provide all output variables to match frontend getOutputVars
+        outputs: dict[str, Any] = {
+            'challenge_succeeded': ok,
+            'judge_rating': 0,
+            'judge_feedback': '',
+            'message': '',
+        }
+
+        # Override with actual values if evaluator provides them
+        if isinstance(details, dict):
+            logger.debug("ChallengeEvaluator - details: %s", details)
+            if 'rating' in details:
+                outputs['judge_rating'] = details.get('rating')
+            if 'feedback' in details:
+                outputs['judge_feedback'] = details.get('feedback')
+            if 'message' in details:
+                outputs['message'] = details.get('message')
+            # If no explicit message, create one from evaluation details
+            if not outputs['message']:
+                if ok:
+                    outputs['message'] = f"Success: {details.get('mode', 'evaluation')} matched"
+                else:
+                    outputs['message'] = f"Failed: {details.get('mode', 'evaluation')} did not match"
+
+        return NodeRunResult(
+            status=WorkflowNodeExecutionStatus.SUCCEEDED,
+            outputs=outputs,
+        )
+
--- a/api/core/workflow/nodes/judging_llm/node.py
+++ b/api/core/workflow/nodes/judging_llm/node.py
@ -0,0 +1,188 @@
+from __future__ import annotations
+
+import json
+import re
+from collections.abc import Mapping
+from typing import Any
+
+from core.model_manager import ModelManager
+from core.model_runtime.entities.llm_entities import LLMResult
+from core.model_runtime.entities.message_entities import (
+    PromptMessageContentType,
+    SystemPromptMessage,
+    UserPromptMessage,
+)
+from core.model_runtime.entities.model_entities import ModelType
+from core.workflow.enums import ErrorStrategy, NodeExecutionType, NodeType, WorkflowNodeExecutionStatus
+from core.workflow.node_events import NodeRunResult
+from core.workflow.nodes.base.entities import BaseNodeData, RetryConfig
+from core.workflow.nodes.base.node import Node
+from services.challenge_service import ChallengeService
+
+
+class JudgingLLMNode(Node):
+    node_type = NodeType.JUDGING_LLM
+    execution_type = NodeExecutionType.EXECUTABLE
+
+    _node_data: BaseNodeData
+
+    def init_node_data(self, data: Mapping[str, Any]):
+        self._node_data = BaseNodeData.model_validate(data)
+        # Access data directly from node_data, not from a 'config' key
+        self._config: dict[str, Any] = data
+
+    def _get_error_strategy(self) -> ErrorStrategy | None:
+        return getattr(self._node_data, 'error_strategy', None)
+
+    def _get_retry_config(self) -> RetryConfig:
+        return getattr(self._node_data, 'retry_config', RetryConfig())
+
+    def _get_title(self) -> str:
+        return getattr(self._node_data, 'title', 'Judging LLM')
+
+    def _get_description(self) -> str | None:
+        return getattr(self._node_data, 'desc', None)
+
+    def _get_default_value_dict(self) -> dict[str, Any]:
+        return getattr(self._node_data, 'default_value_dict', {})
+
+    def get_base_node_data(self) -> BaseNodeData:
+        return self._node_data
+
+    @classmethod
+    def version(cls) -> str:
+        return "1"
+
+    def _run(self) -> NodeRunResult:
+        # Placeholder with FE-compatible keys. Extract inputs for future wiring.
+        inputs_cfg = self._config.get('inputs') or {}
+        goal_selector = None
+        response_selector = None
+        if isinstance(inputs_cfg, dict):
+            goal_selector = inputs_cfg.get('goal')
+            response_selector = inputs_cfg.get('response')
+
+        # Attempt to read variables (not used in placeholder decision)
+        _ = None
+        try:
+            if goal_selector:
+                _ = self.graph_runtime_state.variable_pool.get(goal_selector)
+            if response_selector:
+                _ = self.graph_runtime_state.variable_pool.get(response_selector)
+        except Exception:
+            pass
+
+        outputs = {
+            'judge_passed': False,
+            'judge_rating': 0,
+            'judge_feedback': '',
+        }
+
+        # If model config and rubric provided, invoke LLM synchronously to judge
+        judge_model = self._config.get('judge_model') or {}
+        rubric = self._config.get('rubric_prompt_template') or ''
+        provider = (judge_model or {}).get('provider')
+        model_name = (judge_model or {}).get('name')
+        completion_params = (judge_model or {}).get('completion_params') or {}
+
+        def _segment_to_text(seg: Any) -> str:
+            try:
+                # Many variable types expose .text
+                if hasattr(seg, 'text'):
+                    return str(seg.text)
+                if isinstance(seg, (dict, list)):
+                    return json.dumps(seg, ensure_ascii=False)
+                return str(seg)
+            except Exception:
+                return ''
+
+        # Debug: log what we're checking
+        import logging
+        logger = logging.getLogger(__name__)
+        logger.info(
+            "JudgingLLM check - provider: %s, model: %s, rubric_len: %s, response_selector: %s",
+            provider,
+            model_name,
+            len(rubric) if rubric else 0,
+            response_selector,
+        )
+
+        if provider and model_name and rubric and response_selector:
+            logger.info("JudgingLLM: All conditions met, invoking LLM...")
+            try:
+                goal_val = self.graph_runtime_state.variable_pool.get(goal_selector) if goal_selector else None
+                response_val = self.graph_runtime_state.variable_pool.get(response_selector)
+                goal_text = _segment_to_text(goal_val)
+                response_text = _segment_to_text(response_val)
+                json_template = '{"passed": boolean, "rating": number (0-10), "feedback": string}'
+
+                prompt_body = (
+                    f"Goal:\n{goal_text}\n\n"
+                    f"Response:\n{response_text}\n\n"
+                    f"Return JSON with rating 0-10: {json_template}"
+                )
+
+                prompt_messages = [
+                    SystemPromptMessage(content=rubric),
+                    UserPromptMessage(content=prompt_body),
+                ]
+
+                model_instance = ModelManager().get_model_instance(
+                    tenant_id=self.tenant_id,
+                    model_type=ModelType.LLM,
+                    provider=provider,
+                    model=model_name,
+                )
+                result: LLMResult = model_instance.invoke_llm(
+                    prompt_messages=prompt_messages,
+                    model_parameters=completion_params,
+                    stop=[],
+                    stream=False,
+                    user=self.user_id,
+                )  # type: ignore
+                # Extract text from result
+                text_out = ''
+                content = getattr(result.message, 'content', '')
+                if isinstance(content, str):
+                    text_out = content
+                elif isinstance(content, list):
+                    for item in content:
+                        if getattr(item, 'type', None) == PromptMessageContentType.TEXT:
+                            text_out += str(getattr(item, 'data', ''))
+                else:
+                    text_out = str(content)
+
+                # Parse last JSON object in output
+                verdict: dict[str, Any] | None = None
+                try:
+                    matches = re.findall(r"\{[\s\S]*\}", text_out)
+                    if matches:
+                        verdict = json.loads(matches[-1])
+                except Exception:
+                    verdict = None
+
+                if isinstance(verdict, dict):
+                    outputs['judge_passed'] = bool(verdict.get('passed'))
+                    outputs['judge_rating'] = int(verdict.get('rating') or 0)
+                    outputs['judge_feedback'] = str(verdict.get('feedback') or '')
+                    outputs['judge_raw'] = json.dumps(verdict)
+                else:
+                    # Fallback to simple rules if configured
+                    success_type = self._config.get('success_type')
+                    success_pattern = self._config.get('success_pattern')
+                    if success_type and success_pattern:
+                        ok, _ = ChallengeService.evaluate_outcome(response_text, {
+                            'success_type': success_type,
+                            'success_pattern': success_pattern,
+                        })
+                        outputs['judge_passed'] = ok
+                        outputs['judge_rating'] = 10 if ok else 0
+                        outputs['judge_feedback'] = 'passed by rules' if ok else 'failed by rules'
+            except Exception as e:
+                # keep default outputs on error
+                logger.error("JudgingLLM error: %s", e, exc_info=True)
+                pass
+        else:
+            logger.warning("JudgingLLM skipped - missing required fields")
+        return NodeRunResult(status=WorkflowNodeExecutionStatus.SUCCEEDED, outputs=outputs)
+
--- a/api/core/workflow/nodes/node_mapping.py
+++ b/api/core/workflow/nodes/node_mapping.py
@ -24,6 +24,9 @@ from core.workflow.nodes.tool import ToolNode
 from core.workflow.nodes.variable_aggregator import VariableAggregatorNode
 from core.workflow.nodes.variable_assigner.v1 import VariableAssignerNode as VariableAssignerNodeV1
 from core.workflow.nodes.variable_assigner.v2 import VariableAssignerNode as VariableAssignerNodeV2
+from core.workflow.nodes.challenge_evaluator.node import ChallengeEvaluatorNode
+from core.workflow.nodes.judging_llm.node import JudgingLLMNode
+from core.workflow.nodes.team_challenge.node import TeamChallengeNode

 LATEST_VERSION = "latest"

@ -142,4 +145,16 @@ NODE_TYPE_CLASSES_MAPPING: Mapping[NodeType, Mapping[str, type[Node]]] = {
        LATEST_VERSION: KnowledgeIndexNode,
        "1": KnowledgeIndexNode,
    },
+    NodeType.CHALLENGE_EVALUATOR: {
+        LATEST_VERSION: ChallengeEvaluatorNode,
+        "1": ChallengeEvaluatorNode,
+    },
+    NodeType.JUDGING_LLM: {
+        LATEST_VERSION: JudgingLLMNode,
+        "1": JudgingLLMNode,
+    },
+    NodeType.TEAM_CHALLENGE: {
+        LATEST_VERSION: TeamChallengeNode,
+        "1": TeamChallengeNode,
+    },
 }
--- a/api/core/workflow/nodes/team_challenge/node.py
+++ b/api/core/workflow/nodes/team_challenge/node.py
@ -0,0 +1,68 @@
+from __future__ import annotations
+
+from collections.abc import Mapping
+from typing import Any
+
+from core.workflow.enums import ErrorStrategy, NodeExecutionType, NodeType, WorkflowNodeExecutionStatus
+from core.workflow.node_events import NodeRunResult
+from core.workflow.nodes.base.entities import BaseNodeData, RetryConfig
+from core.workflow.nodes.base.node import Node
+
+
+class TeamChallengeNode(Node):
+    node_type = NodeType.TEAM_CHALLENGE
+    execution_type = NodeExecutionType.EXECUTABLE
+
+    _node_data: BaseNodeData
+
+    def init_node_data(self, data: Mapping[str, Any]):
+        self._node_data = BaseNodeData.model_validate(data)
+        self._config: dict[str, Any] = data
+
+    def _get_error_strategy(self) -> ErrorStrategy | None:
+        return getattr(self._node_data, 'error_strategy', None)
+
+    def _get_retry_config(self) -> RetryConfig:
+        return getattr(self._node_data, 'retry_config', RetryConfig())
+
+    def _get_title(self) -> str:
+        return getattr(self._node_data, 'title', 'Team Challenge')
+
+    def _get_description(self) -> str | None:
+        return getattr(self._node_data, 'desc', None)
+
+    def _get_default_value_dict(self) -> dict[str, Any]:
+        return getattr(self._node_data, 'default_value_dict', {})
+
+    def get_base_node_data(self) -> BaseNodeData:
+        return self._node_data
+
+    @classmethod
+    def version(cls) -> str:
+        return "1"
+
+    def _run(self) -> NodeRunResult:
+        # Read inputs.team_choice for consistency with FE
+        inputs_cfg = self._config.get('inputs') or {}
+        team_choice = ''
+        if isinstance(inputs_cfg, dict):
+            team_choice_selector = inputs_cfg.get('team_choice')
+            if team_choice_selector:
+                try:
+                    v = self.graph_runtime_state.variable_pool.get_value_by_selector(team_choice_selector)
+                    team_choice = str(v or '')
+                except Exception:
+                    team_choice = ''
+
+        outputs = {
+            'team': team_choice,
+            'judge_passed': False,
+            'judge_rating': 0,
+            'judge_feedback': '',
+            'categories': {},
+            'team_points': 0.0,
+            'total_points': 0.0,
+        }
+        return NodeRunResult(status=WorkflowNodeExecutionStatus.SUCCEEDED, outputs=outputs)
+
+
--- a/api/migrations/versions/2025_09_30_0822-183e2d30fb4e_add_challenge_red_blue_tables.py
+++ b/api/migrations/versions/2025_09_30_0822-183e2d30fb4e_add_challenge_red_blue_tables.py
@ -0,0 +1,170 @@
+"""add challenge & red/blue tables
+
+Revision ID: 183e2d30fb4e
+Revises: 68519ad5cd18
+Create Date: 2025-09-30 08:22:31.223257
+
+"""
+from alembic import op
+import models as models
+import sqlalchemy as sa
+from sqlalchemy.dialects import postgresql
+
+# revision identifiers, used by Alembic.
+revision = '183e2d30fb4e'
+down_revision = '68519ad5cd18'
+branch_labels = None
+depends_on = None
+
+
+def upgrade():
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.create_table('challenge_attempts',
+    sa.Column('id', models.types.StringUUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False),
+    sa.Column('tenant_id', models.types.StringUUID(), nullable=False),
+    sa.Column('challenge_id', models.types.StringUUID(), nullable=False),
+    sa.Column('end_user_id', models.types.StringUUID(), nullable=True),
+    sa.Column('account_id', models.types.StringUUID(), nullable=True),
+    sa.Column('workflow_run_id', models.types.StringUUID(), nullable=True),
+    sa.Column('succeeded', sa.Boolean(), server_default=sa.text('false'), nullable=False),
+    sa.Column('score', sa.Float(), nullable=True),
+    sa.Column('judge_rating', sa.Integer(), nullable=True),
+    sa.Column('judge_feedback', sa.Text(), nullable=True),
+    sa.Column('judge_output_raw', postgresql.JSONB(astext_type=sa.Text()), nullable=True),
+    sa.Column('tokens_total', sa.Integer(), nullable=True),
+    sa.Column('elapsed_ms', sa.Integer(), nullable=True),
+    sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP'), nullable=False),
+    sa.PrimaryKeyConstraint('id', name='challenge_attempts_pkey')
+    )
+    with op.batch_alter_table('challenge_attempts', schema=None) as batch_op:
+        batch_op.create_index('challenge_attempts_challenge_id_idx', ['challenge_id'], unique=False)
+        batch_op.create_index('challenge_attempts_tenant_id_idx', ['tenant_id'], unique=False)
+
+    op.create_table('challenges',
+    sa.Column('id', models.types.StringUUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False),
+    sa.Column('tenant_id', models.types.StringUUID(), nullable=False),
+    sa.Column('app_id', models.types.StringUUID(), nullable=False),
+    sa.Column('workflow_id', models.types.StringUUID(), nullable=True),
+    sa.Column('name', sa.Text(), nullable=False),
+    sa.Column('description', sa.Text(), nullable=True),
+    sa.Column('goal', sa.Text(), nullable=True),
+    sa.Column('success_type', sa.String(length=64), server_default=sa.text("'regex'"), nullable=False),
+    sa.Column('success_pattern', sa.Text(), nullable=True),
+    sa.Column('secret_ref', sa.Text(), nullable=True),
+    sa.Column('evaluator_type', sa.String(length=32), server_default=sa.text("'rules'"), nullable=False),
+    sa.Column('evaluator_plugin_id', sa.Text(), nullable=True),
+    sa.Column('evaluator_entrypoint', sa.Text(), nullable=True),
+    sa.Column('evaluator_config', postgresql.JSONB(astext_type=sa.Text()), nullable=True),
+    sa.Column('scoring_strategy', sa.String(length=64), server_default=sa.text("'first'"), nullable=False),
+    sa.Column('scoring_plugin_id', sa.Text(), nullable=True),
+    sa.Column('scoring_entrypoint', sa.Text(), nullable=True),
+    sa.Column('scoring_config', postgresql.JSONB(astext_type=sa.Text()), nullable=True),
+    sa.Column('is_active', sa.Boolean(), server_default=sa.text('true'), nullable=False),
+    sa.Column('created_by', models.types.StringUUID(), nullable=True),
+    sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP'), nullable=False),
+    sa.Column('updated_by', models.types.StringUUID(), nullable=True),
+    sa.Column('updated_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP'), nullable=False),
+    sa.PrimaryKeyConstraint('id', name='challenges_pkey')
+    )
+    with op.batch_alter_table('challenges', schema=None) as batch_op:
+        batch_op.create_index('challenges_app_id_idx', ['app_id'], unique=False)
+        batch_op.create_index('challenges_tenant_id_idx', ['tenant_id'], unique=False)
+
+    op.create_table('red_blue_challenges',
+    sa.Column('id', models.types.StringUUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False),
+    sa.Column('tenant_id', models.types.StringUUID(), nullable=False),
+    sa.Column('app_id', models.types.StringUUID(), nullable=False),
+    sa.Column('workflow_id', models.types.StringUUID(), nullable=True),
+    sa.Column('name', sa.Text(), nullable=False),
+    sa.Column('description', sa.Text(), nullable=True),
+    sa.Column('judge_suite', postgresql.JSONB(astext_type=sa.Text()), nullable=False),
+    sa.Column('defense_selection_policy', sa.String(length=64), server_default=sa.text("'latest_best'"), nullable=False),
+    sa.Column('attack_selection_policy', sa.String(length=64), server_default=sa.text("'latest_best'"), nullable=False),
+    sa.Column('scoring_strategy', sa.String(length=64), server_default=sa.text("'red_blue_ratio'"), nullable=False),
+    sa.Column('theme', postgresql.JSONB(astext_type=sa.Text()), nullable=True),
+    sa.Column('instructions_md', sa.Text(), nullable=True),
+    sa.Column('is_active', sa.Boolean(), server_default=sa.text('true'), nullable=False),
+    sa.Column('created_by', models.types.StringUUID(), nullable=True),
+    sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP'), nullable=False),
+    sa.Column('updated_by', models.types.StringUUID(), nullable=True),
+    sa.Column('updated_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP'), nullable=False),
+    sa.PrimaryKeyConstraint('id', name='red_blue_challenges_pkey')
+    )
+    with op.batch_alter_table('red_blue_challenges', schema=None) as batch_op:
+        batch_op.create_index('red_blue_challenges_app_id_idx', ['app_id'], unique=False)
+        batch_op.create_index('red_blue_challenges_tenant_id_idx', ['tenant_id'], unique=False)
+
+    op.create_table('team_pairings',
+    sa.Column('id', models.types.StringUUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False),
+    sa.Column('red_blue_challenge_id', models.types.StringUUID(), nullable=False),
+    sa.Column('tenant_id', models.types.StringUUID(), nullable=False),
+    sa.Column('attack_submission_id', models.types.StringUUID(), nullable=True),
+    sa.Column('defense_submission_id', models.types.StringUUID(), nullable=True),
+    sa.Column('judge_output_raw', postgresql.JSONB(astext_type=sa.Text()), nullable=True),
+    sa.Column('categories', postgresql.JSONB(astext_type=sa.Text()), nullable=True),
+    sa.Column('judge_rating', sa.Integer(), nullable=True),
+    sa.Column('judge_feedback', sa.Text(), nullable=True),
+    sa.Column('red_points', sa.Float(), server_default=sa.text('0'), nullable=False),
+    sa.Column('blue_points', sa.Float(), server_default=sa.text('0'), nullable=False),
+    sa.Column('tokens_total', sa.Integer(), nullable=True),
+    sa.Column('elapsed_ms', sa.Integer(), nullable=True),
+    sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP'), nullable=False),
+    sa.PrimaryKeyConstraint('id', name='team_pairings_pkey')
+    )
+    with op.batch_alter_table('team_pairings', schema=None) as batch_op:
+        batch_op.create_index('team_pairings_challenge_id_idx', ['red_blue_challenge_id'], unique=False)
+        batch_op.create_index('team_pairings_tenant_id_idx', ['tenant_id'], unique=False)
+
+    op.create_table('team_submissions',
+    sa.Column('id', models.types.StringUUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False),
+    sa.Column('red_blue_challenge_id', models.types.StringUUID(), nullable=False),
+    sa.Column('tenant_id', models.types.StringUUID(), nullable=False),
+    sa.Column('account_id', models.types.StringUUID(), nullable=True),
+    sa.Column('end_user_id', models.types.StringUUID(), nullable=True),
+    sa.Column('team', sa.String(length=16), nullable=False),
+    sa.Column('prompt', sa.Text(), nullable=False),
+    sa.Column('active', sa.Boolean(), server_default=sa.text('true'), nullable=False),
+    sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP'), nullable=False),
+    sa.PrimaryKeyConstraint('id', name='team_submissions_pkey')
+    )
+    with op.batch_alter_table('team_submissions', schema=None) as batch_op:
+        batch_op.create_index('team_submissions_challenge_id_idx', ['red_blue_challenge_id'], unique=False)
+        batch_op.create_index('team_submissions_tenant_id_idx', ['tenant_id'], unique=False)
+
+    with op.batch_alter_table('providers', schema=None) as batch_op:
+        batch_op.drop_column('credential_status')
+
+    # ### end Alembic commands ###
+
+
+def downgrade():
+    # ### commands auto generated by Alembic - please adjust! ###
+    with op.batch_alter_table('providers', schema=None) as batch_op:
+        batch_op.add_column(sa.Column('credential_status', sa.VARCHAR(length=20), server_default=sa.text("'active'::character varying"), autoincrement=False, nullable=True))
+
+    with op.batch_alter_table('team_submissions', schema=None) as batch_op:
+        batch_op.drop_index('team_submissions_tenant_id_idx')
+        batch_op.drop_index('team_submissions_challenge_id_idx')
+
+    op.drop_table('team_submissions')
+    with op.batch_alter_table('team_pairings', schema=None) as batch_op:
+        batch_op.drop_index('team_pairings_tenant_id_idx')
+        batch_op.drop_index('team_pairings_challenge_id_idx')
+
+    op.drop_table('team_pairings')
+    with op.batch_alter_table('red_blue_challenges', schema=None) as batch_op:
+        batch_op.drop_index('red_blue_challenges_tenant_id_idx')
+        batch_op.drop_index('red_blue_challenges_app_id_idx')
+
+    op.drop_table('red_blue_challenges')
+    with op.batch_alter_table('challenges', schema=None) as batch_op:
+        batch_op.drop_index('challenges_tenant_id_idx')
+        batch_op.drop_index('challenges_app_id_idx')
+
+    op.drop_table('challenges')
+    with op.batch_alter_table('challenge_attempts', schema=None) as batch_op:
+        batch_op.drop_index('challenge_attempts_tenant_id_idx')
+        batch_op.drop_index('challenge_attempts_challenge_id_idx')
+
+    op.drop_table('challenge_attempts')
+    # ### end Alembic commands ###
--- a/api/models/init.py
+++ b/api/models/init.py
@ -91,6 +91,8 @@ from .workflow import (
    WorkflowRun,
    WorkflowType,
 )
+from .challenge import Challenge, ChallengeAttempt
+from .red_blue import RedBlueChallenge, TeamSubmission, TeamPairing

 __all__ = [
    "APIBasedExtension",
@ -181,4 +183,9 @@ __all__ = [
    "WorkflowRunTriggeredFrom",
    "WorkflowToolProvider",
    "WorkflowType",
+    "Challenge",
+    "ChallengeAttempt",
+    "RedBlueChallenge",
+    "TeamSubmission",
+    "TeamPairing",
 ]
--- a/api/models/challenge.py
+++ b/api/models/challenge.py
@ -0,0 +1,91 @@
+from __future__ import annotations
+
+from datetime import datetime
+
+import sqlalchemy as sa
+from sqlalchemy.dialects.postgresql import JSONB
+from sqlalchemy.orm import Mapped, mapped_column
+
+from .base import Base
+from .types import StringUUID
+
+
+class Challenge(Base):
+    __tablename__ = "challenges"
+    __table_args__ = (
+        sa.PrimaryKeyConstraint("id", name="challenges_pkey"),
+        sa.Index("challenges_tenant_id_idx", "tenant_id"),
+        sa.Index("challenges_app_id_idx", "app_id"),
+    )
+
+    id: Mapped[str] = mapped_column(StringUUID, server_default=sa.text("uuid_generate_v4()"))
+    tenant_id: Mapped[str] = mapped_column(StringUUID, nullable=False)
+    app_id: Mapped[str] = mapped_column(StringUUID, nullable=False)
+    workflow_id: Mapped[str | None] = mapped_column(StringUUID, nullable=True)
+
+    name: Mapped[str] = mapped_column(sa.Text, nullable=False)
+    description: Mapped[str | None] = mapped_column(sa.Text, nullable=True)
+    goal: Mapped[str | None] = mapped_column(sa.Text, nullable=True)
+
+    success_type: Mapped[str] = mapped_column(sa.String(64), nullable=False, server_default=sa.text("'regex'"))
+    success_pattern: Mapped[str | None] = mapped_column(sa.Text, nullable=True)
+    secret_ref: Mapped[str | None] = mapped_column(sa.Text, nullable=True)
+
+    evaluator_type: Mapped[str] = mapped_column(sa.String(32), nullable=False, server_default=sa.text("'rules'"))
+    evaluator_plugin_id: Mapped[str | None] = mapped_column(sa.Text, nullable=True)
+    evaluator_entrypoint: Mapped[str | None] = mapped_column(sa.Text, nullable=True)
+    evaluator_config = mapped_column(JSONB, nullable=True)
+
+    scoring_strategy: Mapped[str] = mapped_column(sa.String(64), nullable=False, server_default=sa.text("'first'"))
+    scoring_plugin_id: Mapped[str | None] = mapped_column(sa.Text, nullable=True)
+    scoring_entrypoint: Mapped[str | None] = mapped_column(sa.Text, nullable=True)
+    scoring_config = mapped_column(JSONB, nullable=True)
+
+    is_active: Mapped[bool] = mapped_column(sa.Boolean, nullable=False, server_default=sa.text("true"))
+
+    created_by: Mapped[str | None] = mapped_column(StringUUID, nullable=True)
+    created_at: Mapped[datetime] = mapped_column(
+        sa.DateTime,
+        nullable=False,
+        server_default=sa.func.current_timestamp(),
+    )
+    updated_by: Mapped[str | None] = mapped_column(StringUUID, nullable=True)
+    updated_at: Mapped[datetime] = mapped_column(
+        sa.DateTime,
+        nullable=False,
+        server_default=sa.func.current_timestamp(),
+    )
+
+
+class ChallengeAttempt(Base):
+    __tablename__ = "challenge_attempts"
+    __table_args__ = (
+        sa.PrimaryKeyConstraint("id", name="challenge_attempts_pkey"),
+        sa.Index("challenge_attempts_tenant_id_idx", "tenant_id"),
+        sa.Index("challenge_attempts_challenge_id_idx", "challenge_id"),
+    )
+
+    id: Mapped[str] = mapped_column(StringUUID, server_default=sa.text("uuid_generate_v4()"))
+    tenant_id: Mapped[str] = mapped_column(StringUUID, nullable=False)
+    challenge_id: Mapped[str] = mapped_column(StringUUID, nullable=False)
+
+    end_user_id: Mapped[str | None] = mapped_column(StringUUID, nullable=True)
+    account_id: Mapped[str | None] = mapped_column(StringUUID, nullable=True)
+    workflow_run_id: Mapped[str | None] = mapped_column(StringUUID, nullable=True)
+
+    succeeded: Mapped[bool] = mapped_column(sa.Boolean, nullable=False, server_default=sa.text("false"))
+    score: Mapped[float | None] = mapped_column(sa.Float, nullable=True)
+
+    judge_rating: Mapped[int | None] = mapped_column(sa.Integer, nullable=True)
+    judge_feedback: Mapped[str | None] = mapped_column(sa.Text, nullable=True)
+    judge_output_raw = mapped_column(JSONB, nullable=True)
+
+    tokens_total: Mapped[int | None] = mapped_column(sa.Integer, nullable=True)
+    elapsed_ms: Mapped[int | None] = mapped_column(sa.Integer, nullable=True)
+
+    created_at: Mapped[datetime] = mapped_column(
+        sa.DateTime,
+        nullable=False,
+        server_default=sa.func.current_timestamp(),
+    )
+
--- a/api/models/red_blue.py
+++ b/api/models/red_blue.py
@ -0,0 +1,114 @@
+from __future__ import annotations
+
+from datetime import datetime
+
+import sqlalchemy as sa
+from sqlalchemy.dialects.postgresql import JSONB
+from sqlalchemy.orm import Mapped, mapped_column
+
+from .base import Base
+from .types import StringUUID
+
+
+class RedBlueChallenge(Base):
+    __tablename__ = "red_blue_challenges"
+    __table_args__ = (
+        sa.PrimaryKeyConstraint("id", name="red_blue_challenges_pkey"),
+        sa.Index("red_blue_challenges_tenant_id_idx", "tenant_id"),
+        sa.Index("red_blue_challenges_app_id_idx", "app_id"),
+    )
+
+    id: Mapped[str] = mapped_column(StringUUID, server_default=sa.text("uuid_generate_v4()"))
+    tenant_id: Mapped[str] = mapped_column(StringUUID, nullable=False)
+    app_id: Mapped[str] = mapped_column(StringUUID, nullable=False)
+    workflow_id: Mapped[str | None] = mapped_column(StringUUID, nullable=True)
+
+    name: Mapped[str] = mapped_column(sa.Text, nullable=False)
+    description: Mapped[str | None] = mapped_column(sa.Text, nullable=True)
+
+    judge_suite = mapped_column(JSONB, nullable=False)
+    defense_selection_policy: Mapped[str] = mapped_column(
+        sa.String(64), nullable=False, server_default=sa.text("'latest_best'")
+    )
+    attack_selection_policy: Mapped[str] = mapped_column(
+        sa.String(64), nullable=False, server_default=sa.text("'latest_best'")
+    )
+    scoring_strategy: Mapped[str] = mapped_column(
+        sa.String(64), nullable=False, server_default=sa.text("'red_blue_ratio'")
+    )
+
+    theme = mapped_column(JSONB, nullable=True)
+    instructions_md: Mapped[str | None] = mapped_column(sa.Text, nullable=True)
+
+    is_active: Mapped[bool] = mapped_column(sa.Boolean, nullable=False, server_default=sa.text("true"))
+
+    created_by: Mapped[str | None] = mapped_column(StringUUID, nullable=True)
+    created_at: Mapped[datetime] = mapped_column(
+        sa.DateTime,
+        nullable=False,
+        server_default=sa.func.current_timestamp(),
+    )
+    updated_by: Mapped[str | None] = mapped_column(StringUUID, nullable=True)
+    updated_at: Mapped[datetime] = mapped_column(
+        sa.DateTime,
+        nullable=False,
+        server_default=sa.func.current_timestamp(),
+    )
+
+
+class TeamSubmission(Base):
+    __tablename__ = "team_submissions"
+    __table_args__ = (
+        sa.PrimaryKeyConstraint("id", name="team_submissions_pkey"),
+        sa.Index("team_submissions_challenge_id_idx", "red_blue_challenge_id"),
+        sa.Index("team_submissions_tenant_id_idx", "tenant_id"),
+    )
+
+    id: Mapped[str] = mapped_column(StringUUID, server_default=sa.text("uuid_generate_v4()"))
+    red_blue_challenge_id: Mapped[str] = mapped_column(StringUUID, nullable=False)
+    tenant_id: Mapped[str] = mapped_column(StringUUID, nullable=False)
+    account_id: Mapped[str | None] = mapped_column(StringUUID, nullable=True)
+    end_user_id: Mapped[str | None] = mapped_column(StringUUID, nullable=True)
+
+    team: Mapped[str] = mapped_column(sa.String(16), nullable=False)  # 'red' | 'blue'
+    prompt: Mapped[str] = mapped_column(sa.Text, nullable=False)
+    active: Mapped[bool] = mapped_column(sa.Boolean, nullable=False, server_default=sa.text("true"))
+
+    created_at: Mapped[datetime] = mapped_column(
+        sa.DateTime,
+        nullable=False,
+        server_default=sa.func.current_timestamp(),
+    )
+
+
+class TeamPairing(Base):
+    __tablename__ = "team_pairings"
+    __table_args__ = (
+        sa.PrimaryKeyConstraint("id", name="team_pairings_pkey"),
+        sa.Index("team_pairings_challenge_id_idx", "red_blue_challenge_id"),
+        sa.Index("team_pairings_tenant_id_idx", "tenant_id"),
+    )
+
+    id: Mapped[str] = mapped_column(StringUUID, server_default=sa.text("uuid_generate_v4()"))
+    red_blue_challenge_id: Mapped[str] = mapped_column(StringUUID, nullable=False)
+    tenant_id: Mapped[str] = mapped_column(StringUUID, nullable=False)
+
+    attack_submission_id: Mapped[str | None] = mapped_column(StringUUID, nullable=True)
+    defense_submission_id: Mapped[str | None] = mapped_column(StringUUID, nullable=True)
+
+    judge_output_raw = mapped_column(JSONB, nullable=True)
+    categories = mapped_column(JSONB, nullable=True)
+    judge_rating: Mapped[int | None] = mapped_column(sa.Integer, nullable=True)
+    judge_feedback: Mapped[str | None] = mapped_column(sa.Text, nullable=True)
+    red_points: Mapped[float] = mapped_column(sa.Float, nullable=False, server_default=sa.text("0"))
+    blue_points: Mapped[float] = mapped_column(sa.Float, nullable=False, server_default=sa.text("0"))
+
+    tokens_total: Mapped[int | None] = mapped_column(sa.Integer, nullable=True)
+    elapsed_ms: Mapped[int | None] = mapped_column(sa.Integer, nullable=True)
+
+    created_at: Mapped[datetime] = mapped_column(
+        sa.DateTime,
+        nullable=False,
+        server_default=sa.func.current_timestamp(),
+    )
+
--- a/api/pyproject.toml
+++ b/api/pyproject.toml
@ -223,3 +223,6 @@ vdb = [
    "xinference-client~=1.2.2",
    "mo-vector~=0.1.13",
 ]
+[tool.pyright]
+typeCheckingMode = "basic"
+reportImplicitRelativeImport = "none"
--- a/api/pyrightconfig.json
+++ b/api/pyrightconfig.json
@ -8,7 +8,8 @@
    "extensions",
    "core/app/app_config/easy_ui_based_app/dataset"
  ],
-  "typeCheckingMode": "strict",
+  "typeCheckingMode": "basic",
+  "reportImplicitRelativeImport": "none",
  "allowedUntypedLibraries": [
    "flask_restx",
    "flask_login",
--- a/api/run.sh
+++ b/api/run.sh
@ -0,0 +1 @@
+uv run --dev flask --app app run --host 0.0.0.0 --port 5001 --debug
--- a/api/services/challenge_scorer_protocol.py
+++ b/api/services/challenge_scorer_protocol.py
@ -0,0 +1,56 @@
+"""
+Challenge scorer protocol and type definitions.
+
+Defines the interface for custom scoring plugins that compute
+numeric scores from attempt metrics for leaderboard ranking.
+"""
+
+from __future__ import annotations
+
+from typing import Any, Protocol, TypedDict
+
+
+class ScoringContext(TypedDict, total=False):
+    """Context provided to scorer plugins."""
+
+    tenant_id: str
+    app_id: str
+    workflow_id: str
+    challenge_id: str
+    end_user_id: str | None
+    timeout_ms: int
+
+
+class AttemptMetrics(TypedDict, total=False):
+    """Metrics from a challenge attempt."""
+
+    succeeded: bool
+    tokens_total: int | None
+    elapsed_ms: int | None
+    rating: int | None
+    created_at: int | None  # epoch ms
+
+
+class ScoringResult(TypedDict, total=False):
+    """Result returned by scorer plugin."""
+
+    score: float
+    details: dict[str, Any] | None
+
+
+class ScorerProtocol(Protocol):
+    """Protocol that all scorer plugins must implement."""
+
+    def score(self, metrics: AttemptMetrics, config: dict[str, Any], ctx: ScoringContext) -> ScoringResult:
+        """
+        Compute a numeric score from attempt metrics.
+
+        Args:
+            metrics: Attempt metrics (tokens, time, rating, etc.)
+            config: Plugin-specific configuration (from challenge.scoring_config)
+            ctx: Context with tenant_id, app_id, etc.
+
+        Returns:
+            ScoringResult with computed score and optional details
+        """
+        ...
--- a/api/services/challenge_scorer_service.py
+++ b/api/services/challenge_scorer_service.py
@ -0,0 +1,112 @@
+"""
+Challenge scorer service.
+
+Loads and invokes custom scorer plugins to compute scores from attempt metrics.
+"""
+
+from __future__ import annotations
+
+import logging
+from typing import Any
+
+from services.challenge_scorer_protocol import AttemptMetrics, ScoringContext, ScoringResult
+
+logger = logging.getLogger(__name__)
+
+
+class ChallengeScorerService:
+    """Service for loading and invoking custom scorer plugins."""
+
+    _plugin_cache: dict[str, Any] = {}
+
+    @classmethod
+    def score_with_plugin(
+        cls,
+        *,
+        scorer_plugin_id: str | None,
+        scorer_entrypoint: str | None,
+        metrics: AttemptMetrics,
+        config: dict[str, Any] | None,
+        ctx: ScoringContext,
+    ) -> ScoringResult:
+        """
+        Compute score using a custom scorer plugin.
+
+        Args:
+            scorer_plugin_id: Plugin identifier (e.g., 'builtin.weighted_scorer')
+            scorer_entrypoint: Entrypoint path (e.g., 'services.scorers.weighted:WeightedScorer')
+            metrics: Attempt metrics to score
+            config: Plugin-specific configuration
+            ctx: Scoring context
+
+        Returns:
+            ScoringResult with computed score
+
+        Raises:
+            ValueError: If plugin cannot be loaded or scoring fails
+        """
+        if not scorer_plugin_id or not scorer_entrypoint:
+            raise ValueError("scorer_plugin_id and scorer_entrypoint are required for custom scoring")
+
+        # Load plugin
+        scorer = cls._load_plugin(scorer_plugin_id, scorer_entrypoint)
+        if not scorer:
+            raise ValueError(f"Failed to load scorer plugin: {scorer_plugin_id}:{scorer_entrypoint}")
+
+        # Invoke scorer with timeout protection
+        timeout_ms = ctx.get("timeout_ms", 5000)
+        try:
+            # TODO: Add timeout enforcement using threading.Timer or signal.alarm
+            result = scorer.score(metrics, config or {}, ctx)
+            if not isinstance(result, dict) or "score" not in result:
+                raise ValueError("Scorer must return a dict with 'score' key")
+            return result
+        except Exception as e:
+            logger.error(f"Scorer plugin {scorer_plugin_id} failed: {e}", exc_info=True)
+            raise ValueError(f"Scorer plugin execution failed: {e}")
+
+    @classmethod
+    def _load_plugin(cls, plugin_id: str, entrypoint: str) -> Any:
+        """
+        Load a scorer plugin by entrypoint.
+
+        Args:
+            plugin_id: Plugin identifier for caching
+            entrypoint: Python path like 'pkg.module:ClassName'
+
+        Returns:
+            Scorer instance or None if loading fails
+        """
+        cache_key = f"{plugin_id}:{entrypoint}"
+        if cache_key in cls._plugin_cache:
+            return cls._plugin_cache[cache_key]
+
+        try:
+            # Parse entrypoint: 'pkg.module:ClassName'
+            if ":" not in entrypoint:
+                raise ValueError(f"Invalid entrypoint format: {entrypoint}. Expected 'module:ClassName'")
+
+            module_path, class_name = entrypoint.split(":", 1)
+
+            # Dynamic import
+            import importlib
+
+            module = importlib.import_module(module_path)
+            scorer_class = getattr(module, class_name)
+
+            # Instantiate
+            scorer = scorer_class()
+
+            # Cache it
+            cls._plugin_cache[cache_key] = scorer
+            logger.info(f"Loaded scorer plugin: {plugin_id} from {entrypoint}")
+            return scorer
+
+        except Exception as e:
+            logger.error(f"Failed to load scorer plugin {plugin_id}:{entrypoint}: {e}", exc_info=True)
+            return None
+
+    @classmethod
+    def clear_cache(cls) -> None:
+        """Clear the plugin cache (useful for testing)."""
+        cls._plugin_cache.clear()
--- a/api/services/challenge_service.py
+++ b/api/services/challenge_service.py
@ -0,0 +1,64 @@
+from __future__ import annotations
+
+import re
+from collections.abc import Mapping
+from typing import Any
+
+from sqlalchemy.orm import Session
+
+from extensions.ext_database import db
+from models.challenge import Challenge, ChallengeAttempt
+
+
+class ChallengeService:
+    @staticmethod
+    def evaluate_outcome(output_text: str, cfg: Mapping[str, Any]) -> tuple[bool, dict[str, Any]]:
+        success_type = cfg.get("success_type", "regex")
+        pattern = cfg.get("success_pattern")
+        if success_type == "regex" and pattern:
+            try:
+                if re.search(pattern, output_text, flags=re.IGNORECASE | re.MULTILINE):
+                    return True, {"mode": "regex", "matched": True}
+                return False, {"mode": "regex", "matched": False}
+            except re.error as e:
+                return False, {"mode": "regex", "error": f"invalid_regex: {e}"}
+        if success_type == "contains" and pattern:
+            return (pattern.lower() in output_text.lower()), {"mode": "contains"}
+        return False, {"mode": success_type, "info": "no_pattern_or_unsupported"}
+
+    @staticmethod
+    def record_attempt(
+        *,
+        tenant_id: str,
+        challenge_id: str,
+        end_user_id: str | None,
+        account_id: str | None,
+        workflow_run_id: str | None,
+        succeeded: bool,
+        score: float | None = None,
+        judge_rating: int | None = None,
+        judge_feedback: str | None = None,
+        judge_output_raw: dict[str, Any] | None = None,
+        tokens_total: int | None = None,
+        elapsed_ms: int | None = None,
+        session: Session | None = None,
+    ) -> ChallengeAttempt:
+        sess = session or db.session
+        attempt = ChallengeAttempt()
+        attempt.tenant_id = tenant_id
+        attempt.challenge_id = challenge_id
+        attempt.end_user_id = end_user_id
+        attempt.account_id = account_id
+        attempt.workflow_run_id = workflow_run_id
+        attempt.succeeded = succeeded
+        attempt.score = score
+        attempt.judge_rating = judge_rating
+        attempt.judge_feedback = judge_feedback
+        attempt.judge_output_raw = judge_output_raw
+        attempt.tokens_total = tokens_total
+        attempt.elapsed_ms = elapsed_ms
+        sess.add(attempt)
+        sess.commit()
+        return attempt
+
+
--- a/api/services/red_blue_service.py
+++ b/api/services/red_blue_service.py
@ -0,0 +1,91 @@
+from __future__ import annotations
+
+from typing import Any
+
+from sqlalchemy.orm import Session
+
+from extensions.ext_database import db
+from models.red_blue import RedBlueChallenge, TeamPairing, TeamSubmission
+
+
+class RedBlueService:
+    @staticmethod
+    def submit_prompt(
+        *,
+        challenge_id: str,
+        tenant_id: str,
+        team: str,
+        prompt: str,
+        account_id: str | None,
+        end_user_id: str | None,
+        session: Session | None = None,
+    ) -> TeamSubmission:
+        sess = session or db.session
+        sub = TeamSubmission()
+        sub.red_blue_challenge_id = challenge_id
+        sub.tenant_id = tenant_id
+        sub.team = team
+        sub.prompt = prompt
+        sub.account_id = account_id
+        sub.end_user_id = end_user_id
+        sess.add(sub)
+        sess.commit()
+        return sub
+
+    @staticmethod
+    def select_counterparty_submission(
+        *,
+        challenge: RedBlueChallenge,
+        team: str,
+        session: Session | None = None,
+    ) -> TeamSubmission | None:
+        sess = session or db.session
+        opposite = "blue" if team == "red" else "red"
+        # Simplest policy: latest active from opposite team
+        return (
+            sess.query(TeamSubmission)
+            .filter(
+                TeamSubmission.red_blue_challenge_id == challenge.id,
+                TeamSubmission.team == opposite,
+                TeamSubmission.active.is_(True),
+            )
+            .order_by(TeamSubmission.created_at.desc())
+            .first()
+        )
+
+    @staticmethod
+    def record_pairing(
+        *,
+        challenge_id: str,
+        tenant_id: str,
+        attack_submission_id: str | None,
+        defense_submission_id: str | None,
+        judge_output_raw: dict[str, Any] | None,
+        categories: dict[str, Any] | None,
+        judge_rating: int | None,
+        judge_feedback: str | None,
+        red_points: float,
+        blue_points: float,
+        tokens_total: int | None,
+        elapsed_ms: int | None,
+        session: Session | None = None,
+    ) -> TeamPairing:
+        sess = session or db.session
+        pairing = TeamPairing()
+        pairing.red_blue_challenge_id = challenge_id
+        pairing.tenant_id = tenant_id
+        pairing.attack_submission_id = attack_submission_id
+        pairing.defense_submission_id = defense_submission_id
+        pairing.judge_output_raw = judge_output_raw
+        pairing.categories = categories
+        pairing.judge_rating = judge_rating
+        pairing.judge_feedback = judge_feedback
+        pairing.red_points = red_points
+        pairing.blue_points = blue_points
+        pairing.tokens_total = tokens_total
+        pairing.elapsed_ms = elapsed_ms
+        sess.add(pairing)
+        sess.commit()
+        return pairing
+
+
--- a/api/services/scorers/README.md
+++ b/api/services/scorers/README.md
@ -0,0 +1,144 @@
+# Custom Scorer Plugins
+
+This directory contains custom scorer plugins for challenge leaderboards.
+
+## Overview
+
+Scorers compute numeric scores from challenge attempt metrics (tokens, time, rating, success) for ranking on leaderboards when `scoring_strategy = 'custom'`.
+
+## Built-in Scorers
+
+### WeightedScorer
+
+**Entrypoint:** `services.scorers.weighted:WeightedScorer`
+
+Computes a weighted score combining multiple metrics with configurable bonuses and penalties.
+
+**Formula:**
+```
+score = success_bonus
+        + (rating × rating_weight)
+        - (elapsed_seconds × time_penalty)
+        - (tokens × token_penalty)
+```
+
+**Configuration:**
+- `success_bonus` (float, default: 100): Base points for successful attempts
+- `rating_weight` (float, default: 10): Multiplier for judge rating (0-10)
+- `time_penalty` (float, default: 1.0): Penalty per second elapsed
+- `token_penalty` (float, default: 0.01): Penalty per token used
+
+**Example Configuration:**
+```json
+{
+  "success_bonus": 100.0,
+  "rating_weight": 10.0,
+  "time_penalty": 1.0,
+  "token_penalty": 0.01
+}
+```
+
+**Example Challenge Setup (via API):**
+```python
+{
+  "name": "Advanced Prompt Challenge",
+  "scoring_strategy": "custom",
+  "scoring_plugin_id": "builtin.weighted_scorer",
+  "scoring_entrypoint": "services.scorers.weighted:WeightedScorer",
+  "scoring_config": {
+    "success_bonus": 100.0,
+    "rating_weight": 15.0,
+    "time_penalty": 0.5,
+    "token_penalty": 0.02
+  }
+}
+```
+
+## Creating Custom Scorers
+
+### 1. Implement the ScorerProtocol
+
+Create a new file in this directory (e.g., `custom.py`):
+
+```python
+from typing import Any
+from services.challenge_scorer_protocol import AttemptMetrics, ScoringContext, ScoringResult
+
+class MyCustomScorer:
+    def score(self, metrics: AttemptMetrics, config: dict[str, Any], ctx: ScoringContext) -> ScoringResult:
+        # Access metrics
+        succeeded = metrics.get('succeeded', False)
+        tokens = metrics.get('tokens_total', 0)
+        elapsed_ms = metrics.get('elapsed_ms', 0)
+        rating = metrics.get('rating', 0)
+
+        # Access configuration
+        multiplier = config.get('multiplier', 1.0)
+
+        # Compute score
+        score = (rating * multiplier) if succeeded else 0.0
+
+        return {
+            'score': score,
+            'details': {  # optional
+                'multiplier_used': multiplier
+            }
+        }
+```
+
+### 2. Register in Challenge
+
+Set the challenge's scoring fields:
+
+```python
+challenge.scoring_strategy = 'custom'
+challenge.scoring_plugin_id = 'my_custom_scorer'
+challenge.scoring_entrypoint = 'services.scorers.custom:MyCustomScorer'
+challenge.scoring_config = {
+    'multiplier': 2.0
+}
+```
+
+### 3. Testing
+
+Create tests in `api/tests/unit_tests/services/` following the pattern in `test_challenge_scorer_service.py`.
+
+## Protocol Reference
+
+### Input Types
+
+**AttemptMetrics:**
+- `succeeded` (bool): Whether the challenge was passed
+- `tokens_total` (int | None): Total tokens used
+- `elapsed_ms` (int | None): Time taken in milliseconds
+- `rating` (int | None): Judge rating (0-10)
+- `created_at` (int | None): Timestamp in epoch milliseconds
+
+**ScoringContext:**
+- `tenant_id` (str): Tenant identifier
+- `app_id` (str): Application identifier
+- `workflow_id` (str): Workflow identifier
+- `challenge_id` (str): Challenge identifier
+- `end_user_id` (str | None): End user identifier (if available)
+- `timeout_ms` (int): Maximum execution time
+
+### Output Type
+
+**ScoringResult:**
+- `score` (float, required): Computed numeric score
+- `details` (dict[str, Any] | None, optional): Additional scoring details
+
+## Error Handling
+
+- Scorers must return a dict with a `score` key
+- Exceptions are caught and logged; the attempt is recorded with `score=None`
+- Scorers are executed with a timeout (default: 5s)
+- Scorers should never return negative scores; use `max(score, 0.0)` to clamp
+
+## Best Practices
+
+1. **Keep it simple**: Scoring should be fast and deterministic
+2. **Validate config**: Check configuration values and provide defaults
+3. **Clamp scores**: Ensure scores are non-negative
+4. **Document formula**: Clearly explain how your scorer works
+5. **Test edge cases**: Test with missing metrics, zeros, nulls
--- a/api/services/scorers/init.py
+++ b/api/services/scorers/init.py
@ -0,0 +1 @@
+"""Built-in scorer plugins."""
--- a/api/services/scorers/weighted.py
+++ b/api/services/scorers/weighted.py
@ -0,0 +1,66 @@
+"""
+Weighted scorer plugin.
+
+Computes a weighted score based on success bonus, rating, elapsed time, and token usage.
+"""
+
+from __future__ import annotations
+
+from typing import Any
+
+from services.challenge_scorer_protocol import AttemptMetrics, ScoringContext, ScoringResult
+
+
+class WeightedScorer:
+    """
+    Example weighted scorer that combines multiple metrics.
+
+    Configuration options:
+    - success_bonus (float): Base points for successful attempt (default: 100)
+    - rating_weight (float): Multiplier for judge rating (default: 10)
+    - time_penalty (float): Penalty per second elapsed (default: 1.0)
+    - token_penalty (float): Penalty per token used (default: 0.01)
+
+    Formula:
+        score = success_bonus
+                + (rating * rating_weight)
+                - (elapsed_seconds * time_penalty)
+                - (tokens * token_penalty)
+    """
+
+    def score(self, metrics: AttemptMetrics, config: dict[str, Any], ctx: ScoringContext) -> ScoringResult:
+        """Compute weighted score from metrics."""
+        # Base score for success
+        base = 0.0
+        if metrics.get("succeeded"):
+            base += config.get("success_bonus", 100.0)
+
+        # Add rating contribution
+        rating = metrics.get("rating") or 0
+        rating_weight = config.get("rating_weight", 10.0)
+        rating_score = rating * rating_weight
+
+        # Subtract time penalty
+        elapsed_ms = metrics.get("elapsed_ms") or 0
+        elapsed_seconds = elapsed_ms / 1000.0
+        time_penalty = config.get("time_penalty", 1.0)
+        time_score = elapsed_seconds * time_penalty
+
+        # Subtract token penalty
+        tokens = metrics.get("tokens_total") or 0
+        token_penalty = config.get("token_penalty", 0.01)
+        token_score = tokens * token_penalty
+
+        # Compute final score (never negative)
+        final_score = base + rating_score - time_score - token_score
+        final_score = max(final_score, 0.0)
+
+        return {
+            "score": final_score,
+            "details": {
+                "base": base,
+                "rating_contribution": rating_score,
+                "time_penalty": time_score,
+                "token_penalty": token_score,
+            },
+        }
--- a/api/tests/test_containers_integration_tests/test_web_challenges.py
+++ b/api/tests/test_containers_integration_tests/test_web_challenges.py
@ -0,0 +1,14 @@
+from __future__ import annotations
+
+from flask.testing import FlaskClient
+
+
+class TestWebChallenges:
+    def test_list_and_detail(self, test_client_with_containers: FlaskClient):
+        # list
+        resp = test_client_with_containers.get("/api/web/challenges")
+        assert resp.status_code == 200
+        data = resp.get_json()
+        assert data["result"] == "success"
+
+
--- a/api/tests/test_containers_integration_tests/test_web_red_blue_challenges.py
+++ b/api/tests/test_containers_integration_tests/test_web_red_blue_challenges.py
@ -0,0 +1,13 @@
+from __future__ import annotations
+
+from flask.testing import FlaskClient
+
+
+class TestWebRedBlueChallenges:
+    def test_list(self, test_client_with_containers: FlaskClient):
+        resp = test_client_with_containers.get("/api/web/red-blue-challenges")
+        assert resp.status_code == 200
+        data = resp.get_json()
+        assert data["result"] == "success"
+
+
--- a/api/tests/unit_tests/services/test_challenge_scorer_service.py
+++ b/api/tests/unit_tests/services/test_challenge_scorer_service.py
@ -0,0 +1,144 @@
+"""Tests for ChallengeScorerService."""
+
+from __future__ import annotations
+
+import pytest
+
+from services.challenge_scorer_service import ChallengeScorerService
+
+
+class TestChallengeScorerService:
+    """Test custom scorer plugin loading and execution."""
+
+    def test_weighted_scorer_success(self):
+        """Test WeightedScorer with successful attempt."""
+        metrics = {
+            "succeeded": True,
+            "tokens_total": 1000,
+            "elapsed_ms": 5000,  # 5 seconds
+            "rating": 8,
+            "created_at": 1730000000000,
+        }
+
+        config = {
+            "success_bonus": 100.0,
+            "rating_weight": 10.0,
+            "time_penalty": 1.0,
+            "token_penalty": 0.01,
+        }
+
+        ctx = {
+            "tenant_id": "test-tenant",
+            "app_id": "test-app",
+            "workflow_id": "test-workflow",
+            "challenge_id": "test-challenge",
+            "timeout_ms": 5000,
+        }
+
+        result = ChallengeScorerService.score_with_plugin(
+            scorer_plugin_id="builtin.weighted_scorer",
+            scorer_entrypoint="services.scorers.weighted:WeightedScorer",
+            metrics=metrics,
+            config=config,
+            ctx=ctx,
+        )
+
+        # Expected: 100 (success) + 80 (8*10 rating) - 5 (5s*1.0) - 10 (1000*0.01) = 165
+        assert result["score"] == 165.0
+        assert "details" in result
+        assert result["details"]["base"] == 100.0
+        assert result["details"]["rating_contribution"] == 80.0
+        assert result["details"]["time_penalty"] == 5.0
+        assert result["details"]["token_penalty"] == 10.0
+
+    def test_weighted_scorer_failure(self):
+        """Test WeightedScorer with failed attempt."""
+        metrics = {
+            "succeeded": False,
+            "tokens_total": 500,
+            "elapsed_ms": 2000,  # 2 seconds
+            "rating": 3,
+            "created_at": 1730000000000,
+        }
+
+        config = {
+            "success_bonus": 100.0,
+            "rating_weight": 10.0,
+            "time_penalty": 1.0,
+            "token_penalty": 0.01,
+        }
+
+        ctx = {
+            "tenant_id": "test-tenant",
+            "app_id": "test-app",
+            "challenge_id": "test-challenge",
+            "timeout_ms": 5000,
+        }
+
+        result = ChallengeScorerService.score_with_plugin(
+            scorer_plugin_id="builtin.weighted_scorer",
+            scorer_entrypoint="services.scorers.weighted:WeightedScorer",
+            metrics=metrics,
+            config=config,
+            ctx=ctx,
+        )
+
+        # Expected: 0 (no success bonus) + 30 (3*10) - 2 (2s*1.0) - 5 (500*0.01) = 23
+        assert result["score"] == 23.0
+
+    def test_weighted_scorer_minimum_zero(self):
+        """Test WeightedScorer never returns negative scores."""
+        metrics = {
+            "succeeded": False,
+            "tokens_total": 10000,  # High token count
+            "elapsed_ms": 30000,  # 30 seconds
+            "rating": 1,
+            "created_at": 1730000000000,
+        }
+
+        config = {
+            "success_bonus": 100.0,
+            "rating_weight": 10.0,
+            "time_penalty": 1.0,
+            "token_penalty": 0.01,
+        }
+
+        ctx = {
+            "tenant_id": "test-tenant",
+            "app_id": "test-app",
+            "challenge_id": "test-challenge",
+            "timeout_ms": 5000,
+        }
+
+        result = ChallengeScorerService.score_with_plugin(
+            scorer_plugin_id="builtin.weighted_scorer",
+            scorer_entrypoint="services.scorers.weighted:WeightedScorer",
+            metrics=metrics,
+            config=config,
+            ctx=ctx,
+        )
+
+        # Expected: 0 + 10 - 30 - 100 = -120, but clamped to 0
+        assert result["score"] == 0.0
+
+    def test_scorer_with_missing_plugin(self):
+        """Test error handling for missing plugin."""
+        with pytest.raises(ValueError, match="Failed to load scorer plugin"):
+            ChallengeScorerService.score_with_plugin(
+                scorer_plugin_id="nonexistent",
+                scorer_entrypoint="nonexistent.module:NonexistentScorer",
+                metrics={},
+                config={},
+                ctx={"timeout_ms": 5000},
+            )
+
+    def test_scorer_with_invalid_entrypoint(self):
+        """Test error handling for invalid entrypoint format."""
+        with pytest.raises(ValueError, match="scorer_plugin_id and scorer_entrypoint are required"):
+            ChallengeScorerService.score_with_plugin(
+                scorer_plugin_id=None,
+                scorer_entrypoint=None,
+                metrics={},
+                config={},
+                ctx={"timeout_ms": 5000},
+            )
--- a/api/tests/unit_tests/services/test_challenge_service.py
+++ b/api/tests/unit_tests/services/test_challenge_service.py
@ -0,0 +1,40 @@
+from __future__ import annotations
+
+from models.challenge import ChallengeAttempt
+from services.challenge_service import ChallengeService
+
+
+def test_evaluate_outcome_regex_match():
+    ok, details = ChallengeService.evaluate_outcome(
+        "Hello SECRET",
+        {"success_type": "regex", "success_pattern": "secret"},
+    )
+    assert ok is True
+    assert details.get("mode") == "regex"
+
+
+def test_evaluate_outcome_contains():
+    ok, _ = ChallengeService.evaluate_outcome(
+        "hello world",
+        {"success_type": "contains", "success_pattern": "world"},
+    )
+    assert ok is True
+
+
+def test_record_attempt_creates_row(mocker):
+    # mock db.session
+    session = mocker.MagicMock()
+    attempt = ChallengeService.record_attempt(
+        tenant_id="t1",
+        challenge_id="c1",
+        end_user_id=None,
+        account_id=None,
+        workflow_run_id=None,
+        succeeded=True,
+        score=10.0,
+        session=session,
+    )
+    assert isinstance(attempt, ChallengeAttempt)
+    session.add.assert_called_once()
+    session.commit.assert_called_once()
+
--- a/api/tests/unit_tests/services/test_red_blue_service.py
+++ b/api/tests/unit_tests/services/test_red_blue_service.py
@ -0,0 +1,34 @@
+from __future__ import annotations
+
+from types import SimpleNamespace
+
+from services.red_blue_service import RedBlueService
+
+
+def test_submit_prompt_creates_submission(mocker):
+    session = mocker.MagicMock()
+    sub = RedBlueService.submit_prompt(
+        challenge_id="cid",
+        tenant_id="tid",
+        team="red",
+        prompt="attack",
+        account_id="aid",
+        end_user_id="eid",
+        session=session,
+    )
+    assert sub.team == "red"
+    session.add.assert_called_once()
+    session.commit.assert_called_once()
+
+
+def test_select_counterparty_submission_latest_active(mocker):
+    c = SimpleNamespace(id="cid")
+    session = mocker.MagicMock()
+    qs = (
+        session.query.return_value.filter.return_value.order_by.return_value
+    )
+    qs.first.return_value = SimpleNamespace(id="subid", team="blue")
+    sub = RedBlueService.select_counterparty_submission(challenge=c, team="red", session=session)
+    assert sub.team == "blue"
+
+
				`@ -0,0 +1 @@`
				`uv run --dev flask --app app run --host 0.0.0.0 --port 5001 --debug`