feat: add challenge and red-blue competitions across API and web
This commit is contained in:
parent
f5161d9add
commit
8fd3c4bb64
77 changed files with 5355 additions and 24 deletions
|
|
@ -129,6 +129,10 @@ from .workspace import (
|
|||
workspace,
|
||||
)
|
||||
|
||||
# Import custom challenge controllers
|
||||
from . import challenges as challenges
|
||||
from . import red_blue_challenges as red_blue_challenges
|
||||
|
||||
api.add_namespace(console_ns)
|
||||
|
||||
__all__ = [
|
||||
|
|
@ -204,4 +208,6 @@ __all__ = [
|
|||
"workflow_run",
|
||||
"workflow_statistic",
|
||||
"workspace",
|
||||
"challenges",
|
||||
"red_blue_challenges",
|
||||
]
|
||||
|
|
|
|||
|
|
@ -12,7 +12,6 @@ from controllers.console.app.wraps import get_app_model
|
|||
from controllers.console.wraps import (
|
||||
account_initialization_required,
|
||||
cloud_edition_billing_resource_check,
|
||||
enterprise_license_required,
|
||||
setup_required,
|
||||
)
|
||||
from core.ops.ops_trace_manager import OpsTraceManager
|
||||
|
|
@ -53,7 +52,6 @@ class AppListApi(Resource):
|
|||
@setup_required
|
||||
@login_required
|
||||
@account_initialization_required
|
||||
@enterprise_license_required
|
||||
def get(self):
|
||||
"""Get app list"""
|
||||
|
||||
|
|
@ -166,7 +164,6 @@ class AppApi(Resource):
|
|||
@setup_required
|
||||
@login_required
|
||||
@account_initialization_required
|
||||
@enterprise_license_required
|
||||
@get_app_model
|
||||
@marshal_with(app_detail_fields_with_site)
|
||||
def get(self, app_model):
|
||||
|
|
|
|||
154
api/controllers/console/challenges.py
Normal file
154
api/controllers/console/challenges.py
Normal file
|
|
@ -0,0 +1,154 @@
|
|||
from __future__ import annotations
|
||||
|
||||
from flask_restx import Resource, reqparse
|
||||
|
||||
from controllers.console import console_ns as api
|
||||
from controllers.console.wraps import (
|
||||
account_initialization_required,
|
||||
setup_required,
|
||||
)
|
||||
from libs.login import login_required
|
||||
from extensions.ext_database import db
|
||||
from libs.login import current_user
|
||||
from models.challenge import Challenge
|
||||
|
||||
|
||||
@api.route("/challenges")
|
||||
class ChallengeListCreateApi(Resource):
|
||||
@api.doc("list_challenges")
|
||||
@setup_required
|
||||
@login_required
|
||||
@account_initialization_required
|
||||
def get(self):
|
||||
tenant_id = current_user.current_tenant_id
|
||||
if not tenant_id:
|
||||
# no active workspace selected; return empty list to avoid leaking data
|
||||
return {"result": "success", "data": []}
|
||||
rows = (
|
||||
db.session.query(Challenge)
|
||||
.filter(Challenge.tenant_id == tenant_id)
|
||||
.order_by(Challenge.created_at.desc())
|
||||
.all()
|
||||
)
|
||||
return {
|
||||
"result": "success",
|
||||
"data": [
|
||||
{
|
||||
"id": r.id,
|
||||
"name": r.name,
|
||||
"description": r.description,
|
||||
"goal": r.goal,
|
||||
"is_active": r.is_active,
|
||||
"success_type": r.success_type,
|
||||
"success_pattern": r.success_pattern,
|
||||
"scoring_strategy": r.scoring_strategy,
|
||||
"app_id": r.app_id,
|
||||
"workflow_id": r.workflow_id,
|
||||
}
|
||||
for r in rows
|
||||
],
|
||||
}
|
||||
|
||||
@api.doc("create_challenge")
|
||||
@setup_required
|
||||
@login_required
|
||||
@account_initialization_required
|
||||
def post(self):
|
||||
parser = reqparse.RequestParser()
|
||||
parser.add_argument("tenant_id", type=str, required=False, location="json")
|
||||
parser.add_argument("app_id", type=str, required=True, location="json")
|
||||
parser.add_argument("workflow_id", type=str, required=False, location="json")
|
||||
parser.add_argument("name", type=str, required=True, location="json")
|
||||
parser.add_argument("description", type=str, required=False, location="json")
|
||||
parser.add_argument("goal", type=str, required=False, location="json")
|
||||
parser.add_argument("success_type", type=str, required=False, location="json")
|
||||
parser.add_argument("success_pattern", type=str, required=False, location="json")
|
||||
parser.add_argument("scoring_strategy", type=str, required=False, location="json")
|
||||
parser.add_argument("is_active", type=bool, required=False, location="json")
|
||||
args = parser.parse_args()
|
||||
|
||||
c = Challenge()
|
||||
c.tenant_id = args.get("tenant_id") or current_user.current_tenant_id
|
||||
c.app_id = args["app_id"]
|
||||
# Convert empty string to None for UUID field
|
||||
workflow_id = args.get("workflow_id")
|
||||
c.workflow_id = workflow_id if workflow_id else None
|
||||
c.name = args["name"]
|
||||
c.description = args.get("description")
|
||||
c.goal = args.get("goal")
|
||||
if args.get("success_type"):
|
||||
c.success_type = args["success_type"]
|
||||
c.success_pattern = args.get("success_pattern")
|
||||
if args.get("scoring_strategy"):
|
||||
c.scoring_strategy = args["scoring_strategy"]
|
||||
if args.get("is_active") is not None:
|
||||
c.is_active = args["is_active"]
|
||||
db.session.add(c)
|
||||
db.session.commit()
|
||||
return {"result": "success", "data": {"id": c.id}}, 201
|
||||
|
||||
|
||||
@api.route("/challenges/<uuid:challenge_id>")
|
||||
class ChallengeDetailApi(Resource):
|
||||
@api.doc("get_challenge")
|
||||
@setup_required
|
||||
@login_required
|
||||
@account_initialization_required
|
||||
def get(self, challenge_id):
|
||||
c = db.session.get(Challenge, str(challenge_id))
|
||||
if not c:
|
||||
return {"result": "not_found"}, 404
|
||||
return {
|
||||
"result": "success",
|
||||
"data": {
|
||||
"id": c.id,
|
||||
"name": c.name,
|
||||
"description": c.description,
|
||||
"goal": c.goal,
|
||||
"is_active": c.is_active,
|
||||
"success_type": c.success_type,
|
||||
"success_pattern": c.success_pattern,
|
||||
"scoring_strategy": c.scoring_strategy,
|
||||
"app_id": c.app_id,
|
||||
"workflow_id": c.workflow_id,
|
||||
},
|
||||
}
|
||||
|
||||
@api.doc("update_challenge")
|
||||
@setup_required
|
||||
@login_required
|
||||
@account_initialization_required
|
||||
def patch(self, challenge_id):
|
||||
c = db.session.get(Challenge, str(challenge_id))
|
||||
if not c:
|
||||
return {"result": "not_found"}, 404
|
||||
parser = reqparse.RequestParser()
|
||||
parser.add_argument("name", type=str, required=False, location="json")
|
||||
parser.add_argument("description", type=str, required=False, location="json")
|
||||
parser.add_argument("goal", type=str, required=False, location="json")
|
||||
parser.add_argument("is_active", type=bool, required=False, location="json")
|
||||
args = parser.parse_args()
|
||||
if args.get("name"):
|
||||
c.name = args["name"]
|
||||
if args.get("description") is not None:
|
||||
c.description = args["description"]
|
||||
if args.get("goal") is not None:
|
||||
c.goal = args["goal"]
|
||||
if args.get("is_active") is not None:
|
||||
c.is_active = bool(args["is_active"])
|
||||
db.session.commit()
|
||||
return {"result": "success"}
|
||||
|
||||
@api.doc("delete_challenge")
|
||||
@setup_required
|
||||
@login_required
|
||||
@account_initialization_required
|
||||
def delete(self, challenge_id):
|
||||
c = db.session.get(Challenge, str(challenge_id))
|
||||
if not c:
|
||||
return {"result": "not_found"}, 404
|
||||
db.session.delete(c)
|
||||
db.session.commit()
|
||||
return {"result": "success"}, 204
|
||||
|
||||
|
||||
|
|
@ -5,7 +5,7 @@ from flask_restx import Resource, marshal_with, reqparse
|
|||
from werkzeug.exceptions import NotFound
|
||||
|
||||
from controllers.console import console_ns
|
||||
from controllers.console.wraps import account_initialization_required, enterprise_license_required, setup_required
|
||||
from controllers.console.wraps import account_initialization_required, setup_required
|
||||
from fields.dataset_fields import dataset_metadata_fields
|
||||
from libs.login import login_required
|
||||
from services.dataset_service import DatasetService
|
||||
|
|
@ -21,7 +21,6 @@ class DatasetMetadataCreateApi(Resource):
|
|||
@setup_required
|
||||
@login_required
|
||||
@account_initialization_required
|
||||
@enterprise_license_required
|
||||
@marshal_with(dataset_metadata_fields)
|
||||
def post(self, dataset_id):
|
||||
parser = reqparse.RequestParser()
|
||||
|
|
@ -42,7 +41,6 @@ class DatasetMetadataCreateApi(Resource):
|
|||
@setup_required
|
||||
@login_required
|
||||
@account_initialization_required
|
||||
@enterprise_license_required
|
||||
def get(self, dataset_id):
|
||||
dataset_id_str = str(dataset_id)
|
||||
dataset = DatasetService.get_dataset(dataset_id_str)
|
||||
|
|
@ -56,7 +54,6 @@ class DatasetMetadataApi(Resource):
|
|||
@setup_required
|
||||
@login_required
|
||||
@account_initialization_required
|
||||
@enterprise_license_required
|
||||
@marshal_with(dataset_metadata_fields)
|
||||
def patch(self, dataset_id, metadata_id):
|
||||
parser = reqparse.RequestParser()
|
||||
|
|
@ -77,7 +74,6 @@ class DatasetMetadataApi(Resource):
|
|||
@setup_required
|
||||
@login_required
|
||||
@account_initialization_required
|
||||
@enterprise_license_required
|
||||
def delete(self, dataset_id, metadata_id):
|
||||
dataset_id_str = str(dataset_id)
|
||||
metadata_id_str = str(metadata_id)
|
||||
|
|
@ -95,7 +91,6 @@ class DatasetMetadataBuiltInFieldApi(Resource):
|
|||
@setup_required
|
||||
@login_required
|
||||
@account_initialization_required
|
||||
@enterprise_license_required
|
||||
def get(self):
|
||||
built_in_fields = MetadataService.get_built_in_fields()
|
||||
return {"fields": built_in_fields}, 200
|
||||
|
|
@ -106,7 +101,6 @@ class DatasetMetadataBuiltInFieldActionApi(Resource):
|
|||
@setup_required
|
||||
@login_required
|
||||
@account_initialization_required
|
||||
@enterprise_license_required
|
||||
def post(self, dataset_id, action: Literal["enable", "disable"]):
|
||||
dataset_id_str = str(dataset_id)
|
||||
dataset = DatasetService.get_dataset(dataset_id_str)
|
||||
|
|
@ -126,7 +120,6 @@ class DocumentMetadataEditApi(Resource):
|
|||
@setup_required
|
||||
@login_required
|
||||
@account_initialization_required
|
||||
@enterprise_license_required
|
||||
def post(self, dataset_id):
|
||||
dataset_id_str = str(dataset_id)
|
||||
dataset = DatasetService.get_dataset(dataset_id_str)
|
||||
|
|
|
|||
|
|
@ -7,7 +7,6 @@ from sqlalchemy.orm import Session
|
|||
from controllers.console import console_ns
|
||||
from controllers.console.wraps import (
|
||||
account_initialization_required,
|
||||
enterprise_license_required,
|
||||
knowledge_pipeline_publish_enabled,
|
||||
setup_required,
|
||||
)
|
||||
|
|
@ -37,7 +36,6 @@ class PipelineTemplateListApi(Resource):
|
|||
@setup_required
|
||||
@login_required
|
||||
@account_initialization_required
|
||||
@enterprise_license_required
|
||||
def get(self):
|
||||
type = request.args.get("type", default="built-in", type=str)
|
||||
language = request.args.get("language", default="en-US", type=str)
|
||||
|
|
@ -51,7 +49,6 @@ class PipelineTemplateDetailApi(Resource):
|
|||
@setup_required
|
||||
@login_required
|
||||
@account_initialization_required
|
||||
@enterprise_license_required
|
||||
def get(self, template_id: str):
|
||||
type = request.args.get("type", default="built-in", type=str)
|
||||
rag_pipeline_service = RagPipelineService()
|
||||
|
|
@ -64,7 +61,6 @@ class CustomizedPipelineTemplateApi(Resource):
|
|||
@setup_required
|
||||
@login_required
|
||||
@account_initialization_required
|
||||
@enterprise_license_required
|
||||
def patch(self, template_id: str):
|
||||
parser = reqparse.RequestParser()
|
||||
parser.add_argument(
|
||||
|
|
@ -95,7 +91,6 @@ class CustomizedPipelineTemplateApi(Resource):
|
|||
@setup_required
|
||||
@login_required
|
||||
@account_initialization_required
|
||||
@enterprise_license_required
|
||||
def delete(self, template_id: str):
|
||||
RagPipelineService.delete_customized_pipeline_template(template_id)
|
||||
return 200
|
||||
|
|
@ -103,7 +98,6 @@ class CustomizedPipelineTemplateApi(Resource):
|
|||
@setup_required
|
||||
@login_required
|
||||
@account_initialization_required
|
||||
@enterprise_license_required
|
||||
def post(self, template_id: str):
|
||||
with Session(db.engine) as session:
|
||||
template = (
|
||||
|
|
@ -120,7 +114,6 @@ class PublishCustomizedPipelineTemplateApi(Resource):
|
|||
@setup_required
|
||||
@login_required
|
||||
@account_initialization_required
|
||||
@enterprise_license_required
|
||||
@knowledge_pipeline_publish_enabled
|
||||
def post(self, pipeline_id: str):
|
||||
parser = reqparse.RequestParser()
|
||||
|
|
|
|||
145
api/controllers/console/red_blue_challenges.py
Normal file
145
api/controllers/console/red_blue_challenges.py
Normal file
|
|
@ -0,0 +1,145 @@
|
|||
from __future__ import annotations
|
||||
|
||||
from flask_restx import Resource, reqparse
|
||||
|
||||
from controllers.console import console_ns as api
|
||||
from controllers.console.wraps import (
|
||||
account_initialization_required,
|
||||
setup_required,
|
||||
)
|
||||
from extensions.ext_database import db
|
||||
from libs.login import current_user, login_required
|
||||
from models.red_blue import RedBlueChallenge, TeamPairing
|
||||
|
||||
|
||||
@api.route("/red-blue-challenges")
|
||||
class RedBlueListCreateApi(Resource):
|
||||
@api.doc("list_red_blue_challenges")
|
||||
@setup_required
|
||||
@login_required
|
||||
@account_initialization_required
|
||||
def get(self):
|
||||
tenant_id = current_user.current_tenant_id
|
||||
if not tenant_id:
|
||||
return {"result": "success", "data": []}
|
||||
rows = (
|
||||
db.session.query(RedBlueChallenge)
|
||||
.filter(RedBlueChallenge.tenant_id == tenant_id)
|
||||
.order_by(RedBlueChallenge.created_at.desc())
|
||||
.all()
|
||||
)
|
||||
return {
|
||||
"result": "success",
|
||||
"data": [
|
||||
{
|
||||
"id": r.id,
|
||||
"name": r.name,
|
||||
"description": r.description,
|
||||
"is_active": r.is_active,
|
||||
}
|
||||
for r in rows
|
||||
],
|
||||
}
|
||||
|
||||
@api.doc("create_red_blue_challenge")
|
||||
@setup_required
|
||||
@login_required
|
||||
@account_initialization_required
|
||||
def post(self):
|
||||
parser = reqparse.RequestParser()
|
||||
parser.add_argument("tenant_id", type=str, required=True, location="json")
|
||||
parser.add_argument("app_id", type=str, required=True, location="json")
|
||||
parser.add_argument("name", type=str, required=True, location="json")
|
||||
parser.add_argument("description", type=str, required=False, location="json")
|
||||
parser.add_argument("judge_suite", type=dict, required=True, location="json")
|
||||
args = parser.parse_args()
|
||||
|
||||
c = RedBlueChallenge()
|
||||
c.tenant_id = args.get("tenant_id") or current_user.current_tenant_id
|
||||
c.app_id = args["app_id"]
|
||||
c.name = args["name"]
|
||||
c.description = args.get("description")
|
||||
c.judge_suite = args["judge_suite"]
|
||||
db.session.add(c)
|
||||
db.session.commit()
|
||||
return {"result": "success", "data": {"id": c.id}}, 201
|
||||
|
||||
|
||||
@api.route("/red-blue-challenges/<uuid:challenge_id>")
|
||||
class RedBlueDetailApi(Resource):
|
||||
@api.doc("get_red_blue_challenge")
|
||||
@setup_required
|
||||
@login_required
|
||||
@account_initialization_required
|
||||
def get(self, challenge_id):
|
||||
c = db.session.get(RedBlueChallenge, str(challenge_id))
|
||||
if not c:
|
||||
return {"result": "not_found"}, 404
|
||||
return {
|
||||
"result": "success",
|
||||
"data": {"id": c.id, "name": c.name, "description": c.description, "is_active": c.is_active},
|
||||
}
|
||||
|
||||
@api.doc("update_red_blue_challenge")
|
||||
@setup_required
|
||||
@login_required
|
||||
@account_initialization_required
|
||||
def patch(self, challenge_id):
|
||||
c = db.session.get(RedBlueChallenge, str(challenge_id))
|
||||
if not c:
|
||||
return {"result": "not_found"}, 404
|
||||
parser = reqparse.RequestParser()
|
||||
parser.add_argument("name", type=str, required=False, location="json")
|
||||
parser.add_argument("description", type=str, required=False, location="json")
|
||||
parser.add_argument("is_active", type=bool, required=False, location="json")
|
||||
args = parser.parse_args()
|
||||
if args.get("name"):
|
||||
c.name = args["name"]
|
||||
if args.get("description") is not None:
|
||||
c.description = args["description"]
|
||||
if args.get("is_active") is not None:
|
||||
c.is_active = bool(args["is_active"])
|
||||
db.session.commit()
|
||||
return {"result": "success"}
|
||||
|
||||
@api.doc("delete_red_blue_challenge")
|
||||
@setup_required
|
||||
@login_required
|
||||
@account_initialization_required
|
||||
def delete(self, challenge_id):
|
||||
c = db.session.get(RedBlueChallenge, str(challenge_id))
|
||||
if not c:
|
||||
return {"result": "not_found"}, 404
|
||||
db.session.delete(c)
|
||||
db.session.commit()
|
||||
return {"result": "success"}, 204
|
||||
|
||||
|
||||
@api.route("/red-blue-challenges/<uuid:challenge_id>/pairings")
|
||||
class RedBluePairingsApi(Resource):
|
||||
@api.doc("list_red_blue_pairings")
|
||||
@setup_required
|
||||
@login_required
|
||||
@account_initialization_required
|
||||
def get(self, challenge_id):
|
||||
rows = (
|
||||
db.session.query(TeamPairing)
|
||||
.filter(TeamPairing.red_blue_challenge_id == str(challenge_id))
|
||||
.order_by(TeamPairing.created_at.desc())
|
||||
.limit(100)
|
||||
.all()
|
||||
)
|
||||
return {
|
||||
"result": "success",
|
||||
"data": [
|
||||
{
|
||||
"id": r.id,
|
||||
"red_points": r.red_points,
|
||||
"blue_points": r.blue_points,
|
||||
"judge_rating": r.judge_rating,
|
||||
"created_at": r.created_at.isoformat() if hasattr(r.created_at, "isoformat") else None,
|
||||
}
|
||||
for r in rows
|
||||
],
|
||||
}
|
||||
|
||||
|
|
@ -29,7 +29,6 @@ from controllers.console.wraps import (
|
|||
account_initialization_required,
|
||||
cloud_edition_billing_enabled,
|
||||
enable_change_email,
|
||||
enterprise_license_required,
|
||||
only_edition_cloud,
|
||||
setup_required,
|
||||
)
|
||||
|
|
@ -102,7 +101,6 @@ class AccountProfileApi(Resource):
|
|||
@login_required
|
||||
@account_initialization_required
|
||||
@marshal_with(account_fields)
|
||||
@enterprise_license_required
|
||||
def get(self):
|
||||
if not isinstance(current_user, Account):
|
||||
raise ValueError("Invalid user account")
|
||||
|
|
|
|||
|
|
@ -13,7 +13,6 @@ from configs import dify_config
|
|||
from controllers.console import api
|
||||
from controllers.console.wraps import (
|
||||
account_initialization_required,
|
||||
enterprise_license_required,
|
||||
setup_required,
|
||||
)
|
||||
from core.mcp.auth.auth_flow import auth, handle_callback
|
||||
|
|
@ -667,7 +666,6 @@ class ToolLabelsApi(Resource):
|
|||
@setup_required
|
||||
@login_required
|
||||
@account_initialization_required
|
||||
@enterprise_license_required
|
||||
def get(self):
|
||||
return jsonable_encoder(ToolLabelsService.list_tool_labels())
|
||||
|
||||
|
|
|
|||
|
|
@ -24,12 +24,15 @@ from . import (
|
|||
files,
|
||||
forgot_password,
|
||||
login,
|
||||
register,
|
||||
message,
|
||||
passport,
|
||||
remote_files,
|
||||
saved_message,
|
||||
site,
|
||||
workflow,
|
||||
challenges,
|
||||
red_blue_challenges,
|
||||
)
|
||||
|
||||
api.add_namespace(web_ns)
|
||||
|
|
@ -45,6 +48,7 @@ __all__ = [
|
|||
"files",
|
||||
"forgot_password",
|
||||
"login",
|
||||
"register",
|
||||
"message",
|
||||
"passport",
|
||||
"remote_files",
|
||||
|
|
@ -52,4 +56,6 @@ __all__ = [
|
|||
"site",
|
||||
"web_ns",
|
||||
"workflow",
|
||||
"challenges",
|
||||
"red_blue_challenges",
|
||||
]
|
||||
|
|
|
|||
121
api/controllers/web/challenges.py
Normal file
121
api/controllers/web/challenges.py
Normal file
|
|
@ -0,0 +1,121 @@
|
|||
from __future__ import annotations
|
||||
|
||||
from flask_restx import Resource
|
||||
|
||||
from controllers.web import web_ns
|
||||
from extensions.ext_database import db
|
||||
from sqlalchemy import select
|
||||
|
||||
from models.challenge import Challenge, ChallengeAttempt
|
||||
from models.model import App, Site
|
||||
|
||||
|
||||
@web_ns.route("/challenges")
|
||||
class ChallengeListApi(Resource):
|
||||
def get(self):
|
||||
q = db.session.query(Challenge).filter(Challenge.is_active.is_(True)).order_by(Challenge.created_at.desc())
|
||||
items = []
|
||||
for c in q.all():
|
||||
app = db.session.get(App, c.app_id) if c.app_id else None
|
||||
site_code = None
|
||||
if c.app_id:
|
||||
site = db.session.execute(
|
||||
select(Site).where(Site.app_id == c.app_id, Site.status == "normal")
|
||||
).scalar_one_or_none()
|
||||
site_code = site.code if site else None
|
||||
items.append({
|
||||
"id": c.id,
|
||||
"name": c.name,
|
||||
"description": c.description,
|
||||
"goal": c.goal,
|
||||
"app_id": c.app_id,
|
||||
"workflow_id": c.workflow_id,
|
||||
"app_mode": app.mode if app else None,
|
||||
"app_site_code": site_code,
|
||||
})
|
||||
return {"result": "success", "data": items}
|
||||
|
||||
|
||||
@web_ns.route("/challenges/<uuid:challenge_id>")
|
||||
class ChallengeDetailApi(Resource):
|
||||
def get(self, challenge_id):
|
||||
c = db.session.get(Challenge, str(challenge_id))
|
||||
if not c:
|
||||
return {"result": "not_found"}, 404
|
||||
|
||||
app = db.session.get(App, c.app_id) if c.app_id else None
|
||||
site_code = None
|
||||
if c.app_id:
|
||||
site = db.session.execute(
|
||||
select(Site).where(Site.app_id == c.app_id, Site.status == "normal")
|
||||
).scalar_one_or_none()
|
||||
site_code = site.code if site else None
|
||||
data = {
|
||||
"id": c.id,
|
||||
"name": c.name,
|
||||
"description": c.description,
|
||||
"goal": c.goal,
|
||||
"is_active": c.is_active,
|
||||
"app_id": c.app_id,
|
||||
"workflow_id": c.workflow_id,
|
||||
"app_mode": app.mode if app else None,
|
||||
"app_site_code": site_code,
|
||||
}
|
||||
return {"result": "success", "data": data}
|
||||
|
||||
|
||||
@web_ns.route("/challenges/<uuid:challenge_id>/leaderboard")
|
||||
class ChallengeLeaderboardApi(Resource):
|
||||
def get(self, challenge_id):
|
||||
limit = 20
|
||||
|
||||
# Get the challenge to determine scoring strategy
|
||||
challenge = db.session.get(Challenge, str(challenge_id))
|
||||
if not challenge:
|
||||
return {"result": "not_found"}, 404
|
||||
|
||||
scoring_strategy = challenge.scoring_strategy or 'highest_rating'
|
||||
|
||||
# Build query based on scoring strategy
|
||||
q = db.session.query(ChallengeAttempt).filter(
|
||||
ChallengeAttempt.challenge_id == str(challenge_id),
|
||||
ChallengeAttempt.succeeded.is_(True)
|
||||
)
|
||||
|
||||
# Apply sorting based on strategy
|
||||
if scoring_strategy == 'first':
|
||||
# Earliest successful attempt wins
|
||||
q = q.order_by(ChallengeAttempt.created_at.asc())
|
||||
elif scoring_strategy == 'fastest':
|
||||
# Lowest elapsed_ms wins
|
||||
q = q.order_by(ChallengeAttempt.elapsed_ms.asc().nullslast(), ChallengeAttempt.created_at.asc())
|
||||
elif scoring_strategy == 'fewest_tokens':
|
||||
# Lowest tokens_total wins
|
||||
q = q.order_by(ChallengeAttempt.tokens_total.asc().nullslast(), ChallengeAttempt.created_at.asc())
|
||||
elif scoring_strategy == 'highest_rating':
|
||||
# Highest judge_rating wins, ties broken by earliest
|
||||
q = q.order_by(ChallengeAttempt.judge_rating.desc().nullslast(), ChallengeAttempt.created_at.asc())
|
||||
elif scoring_strategy == 'custom':
|
||||
# Custom score field (computed by plugin)
|
||||
q = q.order_by(ChallengeAttempt.score.desc().nullslast(), ChallengeAttempt.created_at.asc())
|
||||
else:
|
||||
# Default to highest_rating
|
||||
q = q.order_by(ChallengeAttempt.judge_rating.desc().nullslast(), ChallengeAttempt.created_at.asc())
|
||||
|
||||
rows = q.limit(limit).all()
|
||||
data = [
|
||||
{
|
||||
"attempt_id": r.id,
|
||||
"account_id": r.account_id,
|
||||
"end_user_id": r.end_user_id,
|
||||
"score": r.score,
|
||||
"judge_rating": r.judge_rating,
|
||||
"tokens_total": r.tokens_total,
|
||||
"elapsed_ms": r.elapsed_ms,
|
||||
"created_at": r.created_at.isoformat() if hasattr(r.created_at, "isoformat") else None,
|
||||
}
|
||||
for r in rows
|
||||
]
|
||||
return {"result": "success", "data": data}
|
||||
|
||||
|
||||
85
api/controllers/web/red_blue_challenges.py
Normal file
85
api/controllers/web/red_blue_challenges.py
Normal file
|
|
@ -0,0 +1,85 @@
|
|||
from __future__ import annotations
|
||||
|
||||
from flask import request
|
||||
from flask_restx import Resource
|
||||
|
||||
from controllers.web import web_ns
|
||||
from extensions.ext_database import db
|
||||
from models.red_blue import RedBlueChallenge, TeamPairing
|
||||
from services.red_blue_service import RedBlueService
|
||||
|
||||
|
||||
@web_ns.route("/red-blue-challenges")
|
||||
class RedBlueListApi(Resource):
|
||||
def get(self):
|
||||
q = db.session.query(RedBlueChallenge).filter(RedBlueChallenge.is_active.is_(True))
|
||||
items = [
|
||||
{
|
||||
"id": c.id,
|
||||
"name": c.name,
|
||||
"description": c.description,
|
||||
}
|
||||
for c in q.all()
|
||||
]
|
||||
return {"result": "success", "data": items}
|
||||
|
||||
|
||||
@web_ns.route("/red-blue-challenges/<uuid:challenge_id>")
|
||||
class RedBlueDetailApi(Resource):
|
||||
def get(self, challenge_id):
|
||||
c = db.session.get(RedBlueChallenge, str(challenge_id))
|
||||
if not c:
|
||||
return {"result": "not_found"}, 404
|
||||
data = {
|
||||
"id": c.id,
|
||||
"name": c.name,
|
||||
"description": c.description,
|
||||
}
|
||||
return {"result": "success", "data": data}
|
||||
|
||||
|
||||
@web_ns.route("/red-blue-challenges/<uuid:challenge_id>/submit")
|
||||
class RedBlueSubmitApi(Resource):
|
||||
def post(self, challenge_id):
|
||||
payload = request.get_json(force=True) or {}
|
||||
team = payload.get("team")
|
||||
prompt = payload.get("prompt")
|
||||
if team not in ("red", "blue") or not prompt:
|
||||
return {"result": "bad_request"}, 400
|
||||
c = db.session.get(RedBlueChallenge, str(challenge_id))
|
||||
if not c:
|
||||
return {"result": "not_found"}, 404
|
||||
sub = RedBlueService.submit_prompt(
|
||||
challenge_id=str(challenge_id),
|
||||
tenant_id=c.tenant_id,
|
||||
team=team,
|
||||
prompt=prompt,
|
||||
account_id=None,
|
||||
end_user_id=None,
|
||||
)
|
||||
return {"result": "success", "data": {"id": sub.id}}, 201
|
||||
|
||||
|
||||
@web_ns.route("/red-blue-challenges/<uuid:challenge_id>/leaderboard")
|
||||
class RedBlueLeaderboardApi(Resource):
|
||||
def get(self, challenge_id):
|
||||
# aggregate simple totals
|
||||
red = (
|
||||
db.session.query(db.func.coalesce(db.func.sum(TeamPairing.red_points), 0.0))
|
||||
.filter(TeamPairing.red_blue_challenge_id == str(challenge_id))
|
||||
.scalar()
|
||||
)
|
||||
blue = (
|
||||
db.session.query(db.func.coalesce(db.func.sum(TeamPairing.blue_points), 0.0))
|
||||
.filter(TeamPairing.red_blue_challenge_id == str(challenge_id))
|
||||
.scalar()
|
||||
)
|
||||
total = (red or 0.0) + (blue or 0.0)
|
||||
data = {
|
||||
"red_points": float(red or 0.0),
|
||||
"blue_points": float(blue or 0.0),
|
||||
"red_ratio": (float(red or 0.0) / total) if total else 0.0,
|
||||
"blue_ratio": (float(blue or 0.0) / total) if total else 0.0,
|
||||
}
|
||||
return {"result": "success", "data": data}
|
||||
|
||||
30
api/controllers/web/register.py
Normal file
30
api/controllers/web/register.py
Normal file
|
|
@ -0,0 +1,30 @@
|
|||
from __future__ import annotations
|
||||
|
||||
from flask import request
|
||||
from flask_restx import Resource
|
||||
|
||||
from controllers.web import web_ns
|
||||
from extensions.ext_database import db
|
||||
from services.account_service import RegisterService
|
||||
|
||||
|
||||
@web_ns.route('/register')
|
||||
class WebRegisterApi(Resource):
|
||||
def post(self):
|
||||
payload = request.get_json(force=True) or {}
|
||||
email = payload.get('email')
|
||||
name = payload.get('name') or 'Player'
|
||||
password = payload.get('password')
|
||||
if not email or not password:
|
||||
return { 'result': 'bad_request' }, 400
|
||||
account = RegisterService.register(
|
||||
email=email,
|
||||
name=name,
|
||||
password=password,
|
||||
is_setup=False,
|
||||
create_workspace_required=False,
|
||||
)
|
||||
db.session.commit()
|
||||
return { 'result': 'success', 'data': { 'account_id': account.id } }, 201
|
||||
|
||||
|
||||
|
|
@ -58,6 +58,9 @@ class NodeType(StrEnum):
|
|||
DOCUMENT_EXTRACTOR = "document-extractor"
|
||||
LIST_OPERATOR = "list-operator"
|
||||
AGENT = "agent"
|
||||
CHALLENGE_EVALUATOR = "challenge-evaluator"
|
||||
JUDGING_LLM = "judging-llm"
|
||||
TEAM_CHALLENGE = "team-challenge"
|
||||
|
||||
|
||||
class NodeExecutionType(StrEnum):
|
||||
|
|
|
|||
3
api/core/workflow/nodes/challenge_evaluator/__init__.py
Normal file
3
api/core/workflow/nodes/challenge_evaluator/__init__.py
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
from .node import ChallengeEvaluatorNode
|
||||
|
||||
__all__ = ['ChallengeEvaluatorNode']
|
||||
258
api/core/workflow/nodes/challenge_evaluator/node.py
Normal file
258
api/core/workflow/nodes/challenge_evaluator/node.py
Normal file
|
|
@ -0,0 +1,258 @@
|
|||
# pyright: reportImplicitRelativeImport=none
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import time
|
||||
from collections.abc import Mapping
|
||||
from typing import Any
|
||||
|
||||
from core.variables.segments import Segment
|
||||
from core.workflow.enums import ErrorStrategy, NodeExecutionType, NodeType, WorkflowNodeExecutionStatus
|
||||
from core.workflow.node_events import NodeRunResult
|
||||
from core.workflow.nodes.base.entities import BaseNodeData, RetryConfig
|
||||
from core.workflow.nodes.base.node import Node
|
||||
from extensions.ext_database import db
|
||||
from models.challenge import Challenge
|
||||
from services.challenge_scorer_service import ChallengeScorerService
|
||||
from services.challenge_service import ChallengeService
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class ChallengeEvaluatorNode(Node):
|
||||
node_type = NodeType.CHALLENGE_EVALUATOR
|
||||
execution_type = NodeExecutionType.EXECUTABLE
|
||||
|
||||
_node_data: BaseNodeData
|
||||
|
||||
def init_node_data(self, data: Mapping[str, Any]):
|
||||
# Using BaseNodeData to carry title/desc; node data is accessed directly
|
||||
self._node_data = BaseNodeData.model_validate(data)
|
||||
self._config: dict[str, Any] = data
|
||||
|
||||
def _get_error_strategy(self) -> ErrorStrategy | None:
|
||||
return getattr(self._node_data, 'error_strategy', None)
|
||||
|
||||
def _get_retry_config(self) -> RetryConfig:
|
||||
return getattr(self._node_data, 'retry_config', RetryConfig())
|
||||
|
||||
def _get_title(self) -> str:
|
||||
return getattr(self._node_data, 'title', 'Challenge Evaluator')
|
||||
|
||||
def _get_description(self) -> str | None:
|
||||
return getattr(self._node_data, 'desc', None)
|
||||
|
||||
def _get_default_value_dict(self) -> dict[str, Any]:
|
||||
return getattr(self._node_data, 'default_value_dict', {})
|
||||
|
||||
def get_base_node_data(self) -> BaseNodeData:
|
||||
return self._node_data
|
||||
|
||||
@classmethod
|
||||
def version(cls) -> str:
|
||||
return "1"
|
||||
|
||||
def _run(self) -> NodeRunResult:
|
||||
# Resolve response text from selector in config.inputs.response (frontend schema)
|
||||
output_text = ''
|
||||
source_selector = None
|
||||
inputs_cfg = self._config.get('inputs') or {}
|
||||
if isinstance(inputs_cfg, dict):
|
||||
source_selector = inputs_cfg.get('response')
|
||||
# fallback to older key if any
|
||||
source_selector = source_selector or self._config.get('value_selector')
|
||||
|
||||
# Check evaluation mode from config
|
||||
evaluation_mode = self._config.get('evaluation_mode', 'rules')
|
||||
|
||||
logger.info("ChallengeEvaluator - evaluation_mode: %s, source_selector: %s", evaluation_mode, source_selector)
|
||||
|
||||
# Initialize judge variables
|
||||
is_judge_input = False
|
||||
judge_passed = False
|
||||
judge_rating = 0
|
||||
judge_feedback_from_input = ''
|
||||
output_text = ''
|
||||
|
||||
def _segment_to_value(segment: Segment | None) -> Any:
|
||||
if segment is None:
|
||||
return None
|
||||
if hasattr(segment, "to_object"):
|
||||
try:
|
||||
return segment.to_object()
|
||||
except Exception: # pragma: no cover - defensive
|
||||
pass
|
||||
return getattr(segment, "value", segment)
|
||||
|
||||
# If evaluation_mode is 'llm-judge', try to read from upstream Judging LLM node
|
||||
if evaluation_mode == 'llm-judge' and source_selector and len(source_selector) >= 1:
|
||||
try:
|
||||
node_id = source_selector[0]
|
||||
# Retrieve judge outputs as Segments and convert to primitive values
|
||||
passed_segment = self.graph_runtime_state.variable_pool.get([node_id, 'judge_passed'])
|
||||
rating_segment = self.graph_runtime_state.variable_pool.get([node_id, 'judge_rating'])
|
||||
feedback_segment = self.graph_runtime_state.variable_pool.get([node_id, 'judge_feedback'])
|
||||
|
||||
potential_judge_passed = _segment_to_value(passed_segment)
|
||||
potential_judge_rating = _segment_to_value(rating_segment)
|
||||
potential_judge_feedback = _segment_to_value(feedback_segment)
|
||||
|
||||
logger.info(
|
||||
"ChallengeEvaluator - Reading judge outputs: passed=%s, rating=%s, feedback=%s",
|
||||
potential_judge_passed,
|
||||
potential_judge_rating,
|
||||
potential_judge_feedback,
|
||||
)
|
||||
|
||||
# If judge_passed exists, we successfully read from a Judging LLM node
|
||||
if potential_judge_passed is not None:
|
||||
is_judge_input = True
|
||||
judge_passed = bool(potential_judge_passed)
|
||||
judge_rating = int(potential_judge_rating or 0)
|
||||
judge_feedback_from_input = str(potential_judge_feedback or '')
|
||||
logger.info(
|
||||
"ChallengeEvaluator - Judge input successfully read! passed=%s, rating=%s, feedback=%s",
|
||||
judge_passed,
|
||||
judge_rating,
|
||||
judge_feedback_from_input,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error("ChallengeEvaluator - Error reading judge outputs: %s", e, exc_info=True)
|
||||
is_judge_input = False
|
||||
|
||||
# If not using judge input, get text output for rules-based evaluation
|
||||
if not is_judge_input and source_selector:
|
||||
try:
|
||||
segment = self.graph_runtime_state.variable_pool.get(source_selector)
|
||||
if segment is None:
|
||||
output_text = ''
|
||||
elif hasattr(segment, 'text'):
|
||||
output_text = segment.text
|
||||
else:
|
||||
output_text = str(_segment_to_value(segment) or '')
|
||||
except Exception:
|
||||
output_text = ''
|
||||
|
||||
# Evaluate based on mode
|
||||
if is_judge_input:
|
||||
ok = judge_passed
|
||||
details = {
|
||||
'mode': 'llm-judge',
|
||||
'rating': judge_rating,
|
||||
'feedback': judge_feedback_from_input,
|
||||
}
|
||||
else:
|
||||
# Rules-based evaluation (only if not using judge input)
|
||||
ok, details = ChallengeService.evaluate_outcome(output_text, self._config)
|
||||
|
||||
# optional persistence if config carries challenge_id
|
||||
challenge_id = self._config.get('challenge_id')
|
||||
if challenge_id:
|
||||
try:
|
||||
# Calculate elapsed time in milliseconds
|
||||
elapsed_ms = int((time.time() - self.graph_runtime_state.start_at) * 1000)
|
||||
|
||||
# Get total tokens used in the workflow so far
|
||||
tokens_total = self.graph_runtime_state.total_tokens
|
||||
|
||||
# Extract judge_rating from details if available (for highest_rating strategy)
|
||||
judge_rating = None
|
||||
judge_feedback = None
|
||||
if isinstance(details, dict):
|
||||
judge_rating = details.get('rating')
|
||||
judge_feedback = details.get('feedback')
|
||||
|
||||
# Load challenge to check scoring strategy
|
||||
challenge = db.session.get(Challenge, str(challenge_id))
|
||||
|
||||
# Score field is reserved for custom scoring plugins.
|
||||
# For built-in strategies (first, fastest, fewest_tokens, highest_rating),
|
||||
# the leaderboard sorts by specific columns (created_at, elapsed_ms, tokens_total, judge_rating).
|
||||
score = None
|
||||
|
||||
# If custom scoring is configured, compute score using plugin
|
||||
if challenge and challenge.scoring_strategy == 'custom':
|
||||
try:
|
||||
metrics = {
|
||||
'succeeded': ok,
|
||||
'tokens_total': tokens_total,
|
||||
'elapsed_ms': elapsed_ms,
|
||||
'rating': judge_rating,
|
||||
'created_at': int(time.time() * 1000),
|
||||
}
|
||||
|
||||
ctx = {
|
||||
'tenant_id': self.tenant_id,
|
||||
'app_id': self.app_id,
|
||||
'workflow_id': self.workflow_id,
|
||||
'challenge_id': str(challenge_id),
|
||||
'end_user_id': None,
|
||||
'timeout_ms': 5000,
|
||||
}
|
||||
|
||||
result = ChallengeScorerService.score_with_plugin(
|
||||
scorer_plugin_id=challenge.scoring_plugin_id,
|
||||
scorer_entrypoint=challenge.scoring_entrypoint,
|
||||
metrics=metrics,
|
||||
config=challenge.scoring_config or {},
|
||||
ctx=ctx,
|
||||
)
|
||||
|
||||
score = result.get('score')
|
||||
logger.info(
|
||||
"Custom scorer computed score: %s (details: %s)",
|
||||
score,
|
||||
result.get('details'),
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error("Custom scorer failed: %s", e, exc_info=True)
|
||||
# Continue with score=None on error
|
||||
|
||||
ChallengeService.record_attempt(
|
||||
tenant_id=self.tenant_id,
|
||||
challenge_id=challenge_id,
|
||||
end_user_id=None,
|
||||
account_id=None,
|
||||
workflow_run_id=None,
|
||||
succeeded=ok,
|
||||
score=score,
|
||||
judge_rating=judge_rating,
|
||||
judge_feedback=judge_feedback,
|
||||
tokens_total=tokens_total,
|
||||
elapsed_ms=elapsed_ms,
|
||||
session=db.session,
|
||||
)
|
||||
except Exception:
|
||||
# do not crash the workflow if recording fails
|
||||
pass
|
||||
|
||||
# Always provide all output variables to match frontend getOutputVars
|
||||
outputs: dict[str, Any] = {
|
||||
'challenge_succeeded': ok,
|
||||
'judge_rating': 0,
|
||||
'judge_feedback': '',
|
||||
'message': '',
|
||||
}
|
||||
|
||||
# Override with actual values if evaluator provides them
|
||||
if isinstance(details, dict):
|
||||
logger.debug("ChallengeEvaluator - details: %s", details)
|
||||
if 'rating' in details:
|
||||
outputs['judge_rating'] = details.get('rating')
|
||||
if 'feedback' in details:
|
||||
outputs['judge_feedback'] = details.get('feedback')
|
||||
if 'message' in details:
|
||||
outputs['message'] = details.get('message')
|
||||
# If no explicit message, create one from evaluation details
|
||||
if not outputs['message']:
|
||||
if ok:
|
||||
outputs['message'] = f"Success: {details.get('mode', 'evaluation')} matched"
|
||||
else:
|
||||
outputs['message'] = f"Failed: {details.get('mode', 'evaluation')} did not match"
|
||||
|
||||
return NodeRunResult(
|
||||
status=WorkflowNodeExecutionStatus.SUCCEEDED,
|
||||
outputs=outputs,
|
||||
)
|
||||
|
||||
188
api/core/workflow/nodes/judging_llm/node.py
Normal file
188
api/core/workflow/nodes/judging_llm/node.py
Normal file
|
|
@ -0,0 +1,188 @@
|
|||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import re
|
||||
from collections.abc import Mapping
|
||||
from typing import Any
|
||||
|
||||
from core.model_manager import ModelManager
|
||||
from core.model_runtime.entities.llm_entities import LLMResult
|
||||
from core.model_runtime.entities.message_entities import (
|
||||
PromptMessageContentType,
|
||||
SystemPromptMessage,
|
||||
UserPromptMessage,
|
||||
)
|
||||
from core.model_runtime.entities.model_entities import ModelType
|
||||
from core.workflow.enums import ErrorStrategy, NodeExecutionType, NodeType, WorkflowNodeExecutionStatus
|
||||
from core.workflow.node_events import NodeRunResult
|
||||
from core.workflow.nodes.base.entities import BaseNodeData, RetryConfig
|
||||
from core.workflow.nodes.base.node import Node
|
||||
from services.challenge_service import ChallengeService
|
||||
|
||||
|
||||
class JudgingLLMNode(Node):
|
||||
node_type = NodeType.JUDGING_LLM
|
||||
execution_type = NodeExecutionType.EXECUTABLE
|
||||
|
||||
_node_data: BaseNodeData
|
||||
|
||||
def init_node_data(self, data: Mapping[str, Any]):
|
||||
self._node_data = BaseNodeData.model_validate(data)
|
||||
# Access data directly from node_data, not from a 'config' key
|
||||
self._config: dict[str, Any] = data
|
||||
|
||||
def _get_error_strategy(self) -> ErrorStrategy | None:
|
||||
return getattr(self._node_data, 'error_strategy', None)
|
||||
|
||||
def _get_retry_config(self) -> RetryConfig:
|
||||
return getattr(self._node_data, 'retry_config', RetryConfig())
|
||||
|
||||
def _get_title(self) -> str:
|
||||
return getattr(self._node_data, 'title', 'Judging LLM')
|
||||
|
||||
def _get_description(self) -> str | None:
|
||||
return getattr(self._node_data, 'desc', None)
|
||||
|
||||
def _get_default_value_dict(self) -> dict[str, Any]:
|
||||
return getattr(self._node_data, 'default_value_dict', {})
|
||||
|
||||
def get_base_node_data(self) -> BaseNodeData:
|
||||
return self._node_data
|
||||
|
||||
@classmethod
|
||||
def version(cls) -> str:
|
||||
return "1"
|
||||
|
||||
def _run(self) -> NodeRunResult:
|
||||
# Placeholder with FE-compatible keys. Extract inputs for future wiring.
|
||||
inputs_cfg = self._config.get('inputs') or {}
|
||||
goal_selector = None
|
||||
response_selector = None
|
||||
if isinstance(inputs_cfg, dict):
|
||||
goal_selector = inputs_cfg.get('goal')
|
||||
response_selector = inputs_cfg.get('response')
|
||||
|
||||
# Attempt to read variables (not used in placeholder decision)
|
||||
_ = None
|
||||
try:
|
||||
if goal_selector:
|
||||
_ = self.graph_runtime_state.variable_pool.get(goal_selector)
|
||||
if response_selector:
|
||||
_ = self.graph_runtime_state.variable_pool.get(response_selector)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
outputs = {
|
||||
'judge_passed': False,
|
||||
'judge_rating': 0,
|
||||
'judge_feedback': '',
|
||||
}
|
||||
|
||||
# If model config and rubric provided, invoke LLM synchronously to judge
|
||||
judge_model = self._config.get('judge_model') or {}
|
||||
rubric = self._config.get('rubric_prompt_template') or ''
|
||||
provider = (judge_model or {}).get('provider')
|
||||
model_name = (judge_model or {}).get('name')
|
||||
completion_params = (judge_model or {}).get('completion_params') or {}
|
||||
|
||||
def _segment_to_text(seg: Any) -> str:
|
||||
try:
|
||||
# Many variable types expose .text
|
||||
if hasattr(seg, 'text'):
|
||||
return str(seg.text)
|
||||
if isinstance(seg, (dict, list)):
|
||||
return json.dumps(seg, ensure_ascii=False)
|
||||
return str(seg)
|
||||
except Exception:
|
||||
return ''
|
||||
|
||||
# Debug: log what we're checking
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
logger.info(
|
||||
"JudgingLLM check - provider: %s, model: %s, rubric_len: %s, response_selector: %s",
|
||||
provider,
|
||||
model_name,
|
||||
len(rubric) if rubric else 0,
|
||||
response_selector,
|
||||
)
|
||||
|
||||
if provider and model_name and rubric and response_selector:
|
||||
logger.info("JudgingLLM: All conditions met, invoking LLM...")
|
||||
try:
|
||||
goal_val = self.graph_runtime_state.variable_pool.get(goal_selector) if goal_selector else None
|
||||
response_val = self.graph_runtime_state.variable_pool.get(response_selector)
|
||||
goal_text = _segment_to_text(goal_val)
|
||||
response_text = _segment_to_text(response_val)
|
||||
json_template = '{"passed": boolean, "rating": number (0-10), "feedback": string}'
|
||||
|
||||
prompt_body = (
|
||||
f"Goal:\n{goal_text}\n\n"
|
||||
f"Response:\n{response_text}\n\n"
|
||||
f"Return JSON with rating 0-10: {json_template}"
|
||||
)
|
||||
|
||||
prompt_messages = [
|
||||
SystemPromptMessage(content=rubric),
|
||||
UserPromptMessage(content=prompt_body),
|
||||
]
|
||||
|
||||
model_instance = ModelManager().get_model_instance(
|
||||
tenant_id=self.tenant_id,
|
||||
model_type=ModelType.LLM,
|
||||
provider=provider,
|
||||
model=model_name,
|
||||
)
|
||||
result: LLMResult = model_instance.invoke_llm(
|
||||
prompt_messages=prompt_messages,
|
||||
model_parameters=completion_params,
|
||||
stop=[],
|
||||
stream=False,
|
||||
user=self.user_id,
|
||||
) # type: ignore
|
||||
# Extract text from result
|
||||
text_out = ''
|
||||
content = getattr(result.message, 'content', '')
|
||||
if isinstance(content, str):
|
||||
text_out = content
|
||||
elif isinstance(content, list):
|
||||
for item in content:
|
||||
if getattr(item, 'type', None) == PromptMessageContentType.TEXT:
|
||||
text_out += str(getattr(item, 'data', ''))
|
||||
else:
|
||||
text_out = str(content)
|
||||
|
||||
# Parse last JSON object in output
|
||||
verdict: dict[str, Any] | None = None
|
||||
try:
|
||||
matches = re.findall(r"\{[\s\S]*\}", text_out)
|
||||
if matches:
|
||||
verdict = json.loads(matches[-1])
|
||||
except Exception:
|
||||
verdict = None
|
||||
|
||||
if isinstance(verdict, dict):
|
||||
outputs['judge_passed'] = bool(verdict.get('passed'))
|
||||
outputs['judge_rating'] = int(verdict.get('rating') or 0)
|
||||
outputs['judge_feedback'] = str(verdict.get('feedback') or '')
|
||||
outputs['judge_raw'] = json.dumps(verdict)
|
||||
else:
|
||||
# Fallback to simple rules if configured
|
||||
success_type = self._config.get('success_type')
|
||||
success_pattern = self._config.get('success_pattern')
|
||||
if success_type and success_pattern:
|
||||
ok, _ = ChallengeService.evaluate_outcome(response_text, {
|
||||
'success_type': success_type,
|
||||
'success_pattern': success_pattern,
|
||||
})
|
||||
outputs['judge_passed'] = ok
|
||||
outputs['judge_rating'] = 10 if ok else 0
|
||||
outputs['judge_feedback'] = 'passed by rules' if ok else 'failed by rules'
|
||||
except Exception as e:
|
||||
# keep default outputs on error
|
||||
logger.error("JudgingLLM error: %s", e, exc_info=True)
|
||||
pass
|
||||
else:
|
||||
logger.warning("JudgingLLM skipped - missing required fields")
|
||||
return NodeRunResult(status=WorkflowNodeExecutionStatus.SUCCEEDED, outputs=outputs)
|
||||
|
||||
|
|
@ -24,6 +24,9 @@ from core.workflow.nodes.tool import ToolNode
|
|||
from core.workflow.nodes.variable_aggregator import VariableAggregatorNode
|
||||
from core.workflow.nodes.variable_assigner.v1 import VariableAssignerNode as VariableAssignerNodeV1
|
||||
from core.workflow.nodes.variable_assigner.v2 import VariableAssignerNode as VariableAssignerNodeV2
|
||||
from core.workflow.nodes.challenge_evaluator.node import ChallengeEvaluatorNode
|
||||
from core.workflow.nodes.judging_llm.node import JudgingLLMNode
|
||||
from core.workflow.nodes.team_challenge.node import TeamChallengeNode
|
||||
|
||||
LATEST_VERSION = "latest"
|
||||
|
||||
|
|
@ -142,4 +145,16 @@ NODE_TYPE_CLASSES_MAPPING: Mapping[NodeType, Mapping[str, type[Node]]] = {
|
|||
LATEST_VERSION: KnowledgeIndexNode,
|
||||
"1": KnowledgeIndexNode,
|
||||
},
|
||||
NodeType.CHALLENGE_EVALUATOR: {
|
||||
LATEST_VERSION: ChallengeEvaluatorNode,
|
||||
"1": ChallengeEvaluatorNode,
|
||||
},
|
||||
NodeType.JUDGING_LLM: {
|
||||
LATEST_VERSION: JudgingLLMNode,
|
||||
"1": JudgingLLMNode,
|
||||
},
|
||||
NodeType.TEAM_CHALLENGE: {
|
||||
LATEST_VERSION: TeamChallengeNode,
|
||||
"1": TeamChallengeNode,
|
||||
},
|
||||
}
|
||||
|
|
|
|||
68
api/core/workflow/nodes/team_challenge/node.py
Normal file
68
api/core/workflow/nodes/team_challenge/node.py
Normal file
|
|
@ -0,0 +1,68 @@
|
|||
from __future__ import annotations
|
||||
|
||||
from collections.abc import Mapping
|
||||
from typing import Any
|
||||
|
||||
from core.workflow.enums import ErrorStrategy, NodeExecutionType, NodeType, WorkflowNodeExecutionStatus
|
||||
from core.workflow.node_events import NodeRunResult
|
||||
from core.workflow.nodes.base.entities import BaseNodeData, RetryConfig
|
||||
from core.workflow.nodes.base.node import Node
|
||||
|
||||
|
||||
class TeamChallengeNode(Node):
|
||||
node_type = NodeType.TEAM_CHALLENGE
|
||||
execution_type = NodeExecutionType.EXECUTABLE
|
||||
|
||||
_node_data: BaseNodeData
|
||||
|
||||
def init_node_data(self, data: Mapping[str, Any]):
|
||||
self._node_data = BaseNodeData.model_validate(data)
|
||||
self._config: dict[str, Any] = data
|
||||
|
||||
def _get_error_strategy(self) -> ErrorStrategy | None:
|
||||
return getattr(self._node_data, 'error_strategy', None)
|
||||
|
||||
def _get_retry_config(self) -> RetryConfig:
|
||||
return getattr(self._node_data, 'retry_config', RetryConfig())
|
||||
|
||||
def _get_title(self) -> str:
|
||||
return getattr(self._node_data, 'title', 'Team Challenge')
|
||||
|
||||
def _get_description(self) -> str | None:
|
||||
return getattr(self._node_data, 'desc', None)
|
||||
|
||||
def _get_default_value_dict(self) -> dict[str, Any]:
|
||||
return getattr(self._node_data, 'default_value_dict', {})
|
||||
|
||||
def get_base_node_data(self) -> BaseNodeData:
|
||||
return self._node_data
|
||||
|
||||
@classmethod
|
||||
def version(cls) -> str:
|
||||
return "1"
|
||||
|
||||
def _run(self) -> NodeRunResult:
|
||||
# Read inputs.team_choice for consistency with FE
|
||||
inputs_cfg = self._config.get('inputs') or {}
|
||||
team_choice = ''
|
||||
if isinstance(inputs_cfg, dict):
|
||||
team_choice_selector = inputs_cfg.get('team_choice')
|
||||
if team_choice_selector:
|
||||
try:
|
||||
v = self.graph_runtime_state.variable_pool.get_value_by_selector(team_choice_selector)
|
||||
team_choice = str(v or '')
|
||||
except Exception:
|
||||
team_choice = ''
|
||||
|
||||
outputs = {
|
||||
'team': team_choice,
|
||||
'judge_passed': False,
|
||||
'judge_rating': 0,
|
||||
'judge_feedback': '',
|
||||
'categories': {},
|
||||
'team_points': 0.0,
|
||||
'total_points': 0.0,
|
||||
}
|
||||
return NodeRunResult(status=WorkflowNodeExecutionStatus.SUCCEEDED, outputs=outputs)
|
||||
|
||||
|
||||
|
|
@ -0,0 +1,170 @@
|
|||
"""add challenge & red/blue tables
|
||||
|
||||
Revision ID: 183e2d30fb4e
|
||||
Revises: 68519ad5cd18
|
||||
Create Date: 2025-09-30 08:22:31.223257
|
||||
|
||||
"""
|
||||
from alembic import op
|
||||
import models as models
|
||||
import sqlalchemy as sa
|
||||
from sqlalchemy.dialects import postgresql
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = '183e2d30fb4e'
|
||||
down_revision = '68519ad5cd18'
|
||||
branch_labels = None
|
||||
depends_on = None
|
||||
|
||||
|
||||
def upgrade():
|
||||
# ### commands auto generated by Alembic - please adjust! ###
|
||||
op.create_table('challenge_attempts',
|
||||
sa.Column('id', models.types.StringUUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False),
|
||||
sa.Column('tenant_id', models.types.StringUUID(), nullable=False),
|
||||
sa.Column('challenge_id', models.types.StringUUID(), nullable=False),
|
||||
sa.Column('end_user_id', models.types.StringUUID(), nullable=True),
|
||||
sa.Column('account_id', models.types.StringUUID(), nullable=True),
|
||||
sa.Column('workflow_run_id', models.types.StringUUID(), nullable=True),
|
||||
sa.Column('succeeded', sa.Boolean(), server_default=sa.text('false'), nullable=False),
|
||||
sa.Column('score', sa.Float(), nullable=True),
|
||||
sa.Column('judge_rating', sa.Integer(), nullable=True),
|
||||
sa.Column('judge_feedback', sa.Text(), nullable=True),
|
||||
sa.Column('judge_output_raw', postgresql.JSONB(astext_type=sa.Text()), nullable=True),
|
||||
sa.Column('tokens_total', sa.Integer(), nullable=True),
|
||||
sa.Column('elapsed_ms', sa.Integer(), nullable=True),
|
||||
sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP'), nullable=False),
|
||||
sa.PrimaryKeyConstraint('id', name='challenge_attempts_pkey')
|
||||
)
|
||||
with op.batch_alter_table('challenge_attempts', schema=None) as batch_op:
|
||||
batch_op.create_index('challenge_attempts_challenge_id_idx', ['challenge_id'], unique=False)
|
||||
batch_op.create_index('challenge_attempts_tenant_id_idx', ['tenant_id'], unique=False)
|
||||
|
||||
op.create_table('challenges',
|
||||
sa.Column('id', models.types.StringUUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False),
|
||||
sa.Column('tenant_id', models.types.StringUUID(), nullable=False),
|
||||
sa.Column('app_id', models.types.StringUUID(), nullable=False),
|
||||
sa.Column('workflow_id', models.types.StringUUID(), nullable=True),
|
||||
sa.Column('name', sa.Text(), nullable=False),
|
||||
sa.Column('description', sa.Text(), nullable=True),
|
||||
sa.Column('goal', sa.Text(), nullable=True),
|
||||
sa.Column('success_type', sa.String(length=64), server_default=sa.text("'regex'"), nullable=False),
|
||||
sa.Column('success_pattern', sa.Text(), nullable=True),
|
||||
sa.Column('secret_ref', sa.Text(), nullable=True),
|
||||
sa.Column('evaluator_type', sa.String(length=32), server_default=sa.text("'rules'"), nullable=False),
|
||||
sa.Column('evaluator_plugin_id', sa.Text(), nullable=True),
|
||||
sa.Column('evaluator_entrypoint', sa.Text(), nullable=True),
|
||||
sa.Column('evaluator_config', postgresql.JSONB(astext_type=sa.Text()), nullable=True),
|
||||
sa.Column('scoring_strategy', sa.String(length=64), server_default=sa.text("'first'"), nullable=False),
|
||||
sa.Column('scoring_plugin_id', sa.Text(), nullable=True),
|
||||
sa.Column('scoring_entrypoint', sa.Text(), nullable=True),
|
||||
sa.Column('scoring_config', postgresql.JSONB(astext_type=sa.Text()), nullable=True),
|
||||
sa.Column('is_active', sa.Boolean(), server_default=sa.text('true'), nullable=False),
|
||||
sa.Column('created_by', models.types.StringUUID(), nullable=True),
|
||||
sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP'), nullable=False),
|
||||
sa.Column('updated_by', models.types.StringUUID(), nullable=True),
|
||||
sa.Column('updated_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP'), nullable=False),
|
||||
sa.PrimaryKeyConstraint('id', name='challenges_pkey')
|
||||
)
|
||||
with op.batch_alter_table('challenges', schema=None) as batch_op:
|
||||
batch_op.create_index('challenges_app_id_idx', ['app_id'], unique=False)
|
||||
batch_op.create_index('challenges_tenant_id_idx', ['tenant_id'], unique=False)
|
||||
|
||||
op.create_table('red_blue_challenges',
|
||||
sa.Column('id', models.types.StringUUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False),
|
||||
sa.Column('tenant_id', models.types.StringUUID(), nullable=False),
|
||||
sa.Column('app_id', models.types.StringUUID(), nullable=False),
|
||||
sa.Column('workflow_id', models.types.StringUUID(), nullable=True),
|
||||
sa.Column('name', sa.Text(), nullable=False),
|
||||
sa.Column('description', sa.Text(), nullable=True),
|
||||
sa.Column('judge_suite', postgresql.JSONB(astext_type=sa.Text()), nullable=False),
|
||||
sa.Column('defense_selection_policy', sa.String(length=64), server_default=sa.text("'latest_best'"), nullable=False),
|
||||
sa.Column('attack_selection_policy', sa.String(length=64), server_default=sa.text("'latest_best'"), nullable=False),
|
||||
sa.Column('scoring_strategy', sa.String(length=64), server_default=sa.text("'red_blue_ratio'"), nullable=False),
|
||||
sa.Column('theme', postgresql.JSONB(astext_type=sa.Text()), nullable=True),
|
||||
sa.Column('instructions_md', sa.Text(), nullable=True),
|
||||
sa.Column('is_active', sa.Boolean(), server_default=sa.text('true'), nullable=False),
|
||||
sa.Column('created_by', models.types.StringUUID(), nullable=True),
|
||||
sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP'), nullable=False),
|
||||
sa.Column('updated_by', models.types.StringUUID(), nullable=True),
|
||||
sa.Column('updated_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP'), nullable=False),
|
||||
sa.PrimaryKeyConstraint('id', name='red_blue_challenges_pkey')
|
||||
)
|
||||
with op.batch_alter_table('red_blue_challenges', schema=None) as batch_op:
|
||||
batch_op.create_index('red_blue_challenges_app_id_idx', ['app_id'], unique=False)
|
||||
batch_op.create_index('red_blue_challenges_tenant_id_idx', ['tenant_id'], unique=False)
|
||||
|
||||
op.create_table('team_pairings',
|
||||
sa.Column('id', models.types.StringUUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False),
|
||||
sa.Column('red_blue_challenge_id', models.types.StringUUID(), nullable=False),
|
||||
sa.Column('tenant_id', models.types.StringUUID(), nullable=False),
|
||||
sa.Column('attack_submission_id', models.types.StringUUID(), nullable=True),
|
||||
sa.Column('defense_submission_id', models.types.StringUUID(), nullable=True),
|
||||
sa.Column('judge_output_raw', postgresql.JSONB(astext_type=sa.Text()), nullable=True),
|
||||
sa.Column('categories', postgresql.JSONB(astext_type=sa.Text()), nullable=True),
|
||||
sa.Column('judge_rating', sa.Integer(), nullable=True),
|
||||
sa.Column('judge_feedback', sa.Text(), nullable=True),
|
||||
sa.Column('red_points', sa.Float(), server_default=sa.text('0'), nullable=False),
|
||||
sa.Column('blue_points', sa.Float(), server_default=sa.text('0'), nullable=False),
|
||||
sa.Column('tokens_total', sa.Integer(), nullable=True),
|
||||
sa.Column('elapsed_ms', sa.Integer(), nullable=True),
|
||||
sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP'), nullable=False),
|
||||
sa.PrimaryKeyConstraint('id', name='team_pairings_pkey')
|
||||
)
|
||||
with op.batch_alter_table('team_pairings', schema=None) as batch_op:
|
||||
batch_op.create_index('team_pairings_challenge_id_idx', ['red_blue_challenge_id'], unique=False)
|
||||
batch_op.create_index('team_pairings_tenant_id_idx', ['tenant_id'], unique=False)
|
||||
|
||||
op.create_table('team_submissions',
|
||||
sa.Column('id', models.types.StringUUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False),
|
||||
sa.Column('red_blue_challenge_id', models.types.StringUUID(), nullable=False),
|
||||
sa.Column('tenant_id', models.types.StringUUID(), nullable=False),
|
||||
sa.Column('account_id', models.types.StringUUID(), nullable=True),
|
||||
sa.Column('end_user_id', models.types.StringUUID(), nullable=True),
|
||||
sa.Column('team', sa.String(length=16), nullable=False),
|
||||
sa.Column('prompt', sa.Text(), nullable=False),
|
||||
sa.Column('active', sa.Boolean(), server_default=sa.text('true'), nullable=False),
|
||||
sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP'), nullable=False),
|
||||
sa.PrimaryKeyConstraint('id', name='team_submissions_pkey')
|
||||
)
|
||||
with op.batch_alter_table('team_submissions', schema=None) as batch_op:
|
||||
batch_op.create_index('team_submissions_challenge_id_idx', ['red_blue_challenge_id'], unique=False)
|
||||
batch_op.create_index('team_submissions_tenant_id_idx', ['tenant_id'], unique=False)
|
||||
|
||||
with op.batch_alter_table('providers', schema=None) as batch_op:
|
||||
batch_op.drop_column('credential_status')
|
||||
|
||||
# ### end Alembic commands ###
|
||||
|
||||
|
||||
def downgrade():
|
||||
# ### commands auto generated by Alembic - please adjust! ###
|
||||
with op.batch_alter_table('providers', schema=None) as batch_op:
|
||||
batch_op.add_column(sa.Column('credential_status', sa.VARCHAR(length=20), server_default=sa.text("'active'::character varying"), autoincrement=False, nullable=True))
|
||||
|
||||
with op.batch_alter_table('team_submissions', schema=None) as batch_op:
|
||||
batch_op.drop_index('team_submissions_tenant_id_idx')
|
||||
batch_op.drop_index('team_submissions_challenge_id_idx')
|
||||
|
||||
op.drop_table('team_submissions')
|
||||
with op.batch_alter_table('team_pairings', schema=None) as batch_op:
|
||||
batch_op.drop_index('team_pairings_tenant_id_idx')
|
||||
batch_op.drop_index('team_pairings_challenge_id_idx')
|
||||
|
||||
op.drop_table('team_pairings')
|
||||
with op.batch_alter_table('red_blue_challenges', schema=None) as batch_op:
|
||||
batch_op.drop_index('red_blue_challenges_tenant_id_idx')
|
||||
batch_op.drop_index('red_blue_challenges_app_id_idx')
|
||||
|
||||
op.drop_table('red_blue_challenges')
|
||||
with op.batch_alter_table('challenges', schema=None) as batch_op:
|
||||
batch_op.drop_index('challenges_tenant_id_idx')
|
||||
batch_op.drop_index('challenges_app_id_idx')
|
||||
|
||||
op.drop_table('challenges')
|
||||
with op.batch_alter_table('challenge_attempts', schema=None) as batch_op:
|
||||
batch_op.drop_index('challenge_attempts_tenant_id_idx')
|
||||
batch_op.drop_index('challenge_attempts_challenge_id_idx')
|
||||
|
||||
op.drop_table('challenge_attempts')
|
||||
# ### end Alembic commands ###
|
||||
|
|
@ -91,6 +91,8 @@ from .workflow import (
|
|||
WorkflowRun,
|
||||
WorkflowType,
|
||||
)
|
||||
from .challenge import Challenge, ChallengeAttempt
|
||||
from .red_blue import RedBlueChallenge, TeamSubmission, TeamPairing
|
||||
|
||||
__all__ = [
|
||||
"APIBasedExtension",
|
||||
|
|
@ -181,4 +183,9 @@ __all__ = [
|
|||
"WorkflowRunTriggeredFrom",
|
||||
"WorkflowToolProvider",
|
||||
"WorkflowType",
|
||||
"Challenge",
|
||||
"ChallengeAttempt",
|
||||
"RedBlueChallenge",
|
||||
"TeamSubmission",
|
||||
"TeamPairing",
|
||||
]
|
||||
|
|
|
|||
91
api/models/challenge.py
Normal file
91
api/models/challenge.py
Normal file
|
|
@ -0,0 +1,91 @@
|
|||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime
|
||||
|
||||
import sqlalchemy as sa
|
||||
from sqlalchemy.dialects.postgresql import JSONB
|
||||
from sqlalchemy.orm import Mapped, mapped_column
|
||||
|
||||
from .base import Base
|
||||
from .types import StringUUID
|
||||
|
||||
|
||||
class Challenge(Base):
|
||||
__tablename__ = "challenges"
|
||||
__table_args__ = (
|
||||
sa.PrimaryKeyConstraint("id", name="challenges_pkey"),
|
||||
sa.Index("challenges_tenant_id_idx", "tenant_id"),
|
||||
sa.Index("challenges_app_id_idx", "app_id"),
|
||||
)
|
||||
|
||||
id: Mapped[str] = mapped_column(StringUUID, server_default=sa.text("uuid_generate_v4()"))
|
||||
tenant_id: Mapped[str] = mapped_column(StringUUID, nullable=False)
|
||||
app_id: Mapped[str] = mapped_column(StringUUID, nullable=False)
|
||||
workflow_id: Mapped[str | None] = mapped_column(StringUUID, nullable=True)
|
||||
|
||||
name: Mapped[str] = mapped_column(sa.Text, nullable=False)
|
||||
description: Mapped[str | None] = mapped_column(sa.Text, nullable=True)
|
||||
goal: Mapped[str | None] = mapped_column(sa.Text, nullable=True)
|
||||
|
||||
success_type: Mapped[str] = mapped_column(sa.String(64), nullable=False, server_default=sa.text("'regex'"))
|
||||
success_pattern: Mapped[str | None] = mapped_column(sa.Text, nullable=True)
|
||||
secret_ref: Mapped[str | None] = mapped_column(sa.Text, nullable=True)
|
||||
|
||||
evaluator_type: Mapped[str] = mapped_column(sa.String(32), nullable=False, server_default=sa.text("'rules'"))
|
||||
evaluator_plugin_id: Mapped[str | None] = mapped_column(sa.Text, nullable=True)
|
||||
evaluator_entrypoint: Mapped[str | None] = mapped_column(sa.Text, nullable=True)
|
||||
evaluator_config = mapped_column(JSONB, nullable=True)
|
||||
|
||||
scoring_strategy: Mapped[str] = mapped_column(sa.String(64), nullable=False, server_default=sa.text("'first'"))
|
||||
scoring_plugin_id: Mapped[str | None] = mapped_column(sa.Text, nullable=True)
|
||||
scoring_entrypoint: Mapped[str | None] = mapped_column(sa.Text, nullable=True)
|
||||
scoring_config = mapped_column(JSONB, nullable=True)
|
||||
|
||||
is_active: Mapped[bool] = mapped_column(sa.Boolean, nullable=False, server_default=sa.text("true"))
|
||||
|
||||
created_by: Mapped[str | None] = mapped_column(StringUUID, nullable=True)
|
||||
created_at: Mapped[datetime] = mapped_column(
|
||||
sa.DateTime,
|
||||
nullable=False,
|
||||
server_default=sa.func.current_timestamp(),
|
||||
)
|
||||
updated_by: Mapped[str | None] = mapped_column(StringUUID, nullable=True)
|
||||
updated_at: Mapped[datetime] = mapped_column(
|
||||
sa.DateTime,
|
||||
nullable=False,
|
||||
server_default=sa.func.current_timestamp(),
|
||||
)
|
||||
|
||||
|
||||
class ChallengeAttempt(Base):
|
||||
__tablename__ = "challenge_attempts"
|
||||
__table_args__ = (
|
||||
sa.PrimaryKeyConstraint("id", name="challenge_attempts_pkey"),
|
||||
sa.Index("challenge_attempts_tenant_id_idx", "tenant_id"),
|
||||
sa.Index("challenge_attempts_challenge_id_idx", "challenge_id"),
|
||||
)
|
||||
|
||||
id: Mapped[str] = mapped_column(StringUUID, server_default=sa.text("uuid_generate_v4()"))
|
||||
tenant_id: Mapped[str] = mapped_column(StringUUID, nullable=False)
|
||||
challenge_id: Mapped[str] = mapped_column(StringUUID, nullable=False)
|
||||
|
||||
end_user_id: Mapped[str | None] = mapped_column(StringUUID, nullable=True)
|
||||
account_id: Mapped[str | None] = mapped_column(StringUUID, nullable=True)
|
||||
workflow_run_id: Mapped[str | None] = mapped_column(StringUUID, nullable=True)
|
||||
|
||||
succeeded: Mapped[bool] = mapped_column(sa.Boolean, nullable=False, server_default=sa.text("false"))
|
||||
score: Mapped[float | None] = mapped_column(sa.Float, nullable=True)
|
||||
|
||||
judge_rating: Mapped[int | None] = mapped_column(sa.Integer, nullable=True)
|
||||
judge_feedback: Mapped[str | None] = mapped_column(sa.Text, nullable=True)
|
||||
judge_output_raw = mapped_column(JSONB, nullable=True)
|
||||
|
||||
tokens_total: Mapped[int | None] = mapped_column(sa.Integer, nullable=True)
|
||||
elapsed_ms: Mapped[int | None] = mapped_column(sa.Integer, nullable=True)
|
||||
|
||||
created_at: Mapped[datetime] = mapped_column(
|
||||
sa.DateTime,
|
||||
nullable=False,
|
||||
server_default=sa.func.current_timestamp(),
|
||||
)
|
||||
|
||||
114
api/models/red_blue.py
Normal file
114
api/models/red_blue.py
Normal file
|
|
@ -0,0 +1,114 @@
|
|||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime
|
||||
|
||||
import sqlalchemy as sa
|
||||
from sqlalchemy.dialects.postgresql import JSONB
|
||||
from sqlalchemy.orm import Mapped, mapped_column
|
||||
|
||||
from .base import Base
|
||||
from .types import StringUUID
|
||||
|
||||
|
||||
class RedBlueChallenge(Base):
|
||||
__tablename__ = "red_blue_challenges"
|
||||
__table_args__ = (
|
||||
sa.PrimaryKeyConstraint("id", name="red_blue_challenges_pkey"),
|
||||
sa.Index("red_blue_challenges_tenant_id_idx", "tenant_id"),
|
||||
sa.Index("red_blue_challenges_app_id_idx", "app_id"),
|
||||
)
|
||||
|
||||
id: Mapped[str] = mapped_column(StringUUID, server_default=sa.text("uuid_generate_v4()"))
|
||||
tenant_id: Mapped[str] = mapped_column(StringUUID, nullable=False)
|
||||
app_id: Mapped[str] = mapped_column(StringUUID, nullable=False)
|
||||
workflow_id: Mapped[str | None] = mapped_column(StringUUID, nullable=True)
|
||||
|
||||
name: Mapped[str] = mapped_column(sa.Text, nullable=False)
|
||||
description: Mapped[str | None] = mapped_column(sa.Text, nullable=True)
|
||||
|
||||
judge_suite = mapped_column(JSONB, nullable=False)
|
||||
defense_selection_policy: Mapped[str] = mapped_column(
|
||||
sa.String(64), nullable=False, server_default=sa.text("'latest_best'")
|
||||
)
|
||||
attack_selection_policy: Mapped[str] = mapped_column(
|
||||
sa.String(64), nullable=False, server_default=sa.text("'latest_best'")
|
||||
)
|
||||
scoring_strategy: Mapped[str] = mapped_column(
|
||||
sa.String(64), nullable=False, server_default=sa.text("'red_blue_ratio'")
|
||||
)
|
||||
|
||||
theme = mapped_column(JSONB, nullable=True)
|
||||
instructions_md: Mapped[str | None] = mapped_column(sa.Text, nullable=True)
|
||||
|
||||
is_active: Mapped[bool] = mapped_column(sa.Boolean, nullable=False, server_default=sa.text("true"))
|
||||
|
||||
created_by: Mapped[str | None] = mapped_column(StringUUID, nullable=True)
|
||||
created_at: Mapped[datetime] = mapped_column(
|
||||
sa.DateTime,
|
||||
nullable=False,
|
||||
server_default=sa.func.current_timestamp(),
|
||||
)
|
||||
updated_by: Mapped[str | None] = mapped_column(StringUUID, nullable=True)
|
||||
updated_at: Mapped[datetime] = mapped_column(
|
||||
sa.DateTime,
|
||||
nullable=False,
|
||||
server_default=sa.func.current_timestamp(),
|
||||
)
|
||||
|
||||
|
||||
class TeamSubmission(Base):
|
||||
__tablename__ = "team_submissions"
|
||||
__table_args__ = (
|
||||
sa.PrimaryKeyConstraint("id", name="team_submissions_pkey"),
|
||||
sa.Index("team_submissions_challenge_id_idx", "red_blue_challenge_id"),
|
||||
sa.Index("team_submissions_tenant_id_idx", "tenant_id"),
|
||||
)
|
||||
|
||||
id: Mapped[str] = mapped_column(StringUUID, server_default=sa.text("uuid_generate_v4()"))
|
||||
red_blue_challenge_id: Mapped[str] = mapped_column(StringUUID, nullable=False)
|
||||
tenant_id: Mapped[str] = mapped_column(StringUUID, nullable=False)
|
||||
account_id: Mapped[str | None] = mapped_column(StringUUID, nullable=True)
|
||||
end_user_id: Mapped[str | None] = mapped_column(StringUUID, nullable=True)
|
||||
|
||||
team: Mapped[str] = mapped_column(sa.String(16), nullable=False) # 'red' | 'blue'
|
||||
prompt: Mapped[str] = mapped_column(sa.Text, nullable=False)
|
||||
active: Mapped[bool] = mapped_column(sa.Boolean, nullable=False, server_default=sa.text("true"))
|
||||
|
||||
created_at: Mapped[datetime] = mapped_column(
|
||||
sa.DateTime,
|
||||
nullable=False,
|
||||
server_default=sa.func.current_timestamp(),
|
||||
)
|
||||
|
||||
|
||||
class TeamPairing(Base):
|
||||
__tablename__ = "team_pairings"
|
||||
__table_args__ = (
|
||||
sa.PrimaryKeyConstraint("id", name="team_pairings_pkey"),
|
||||
sa.Index("team_pairings_challenge_id_idx", "red_blue_challenge_id"),
|
||||
sa.Index("team_pairings_tenant_id_idx", "tenant_id"),
|
||||
)
|
||||
|
||||
id: Mapped[str] = mapped_column(StringUUID, server_default=sa.text("uuid_generate_v4()"))
|
||||
red_blue_challenge_id: Mapped[str] = mapped_column(StringUUID, nullable=False)
|
||||
tenant_id: Mapped[str] = mapped_column(StringUUID, nullable=False)
|
||||
|
||||
attack_submission_id: Mapped[str | None] = mapped_column(StringUUID, nullable=True)
|
||||
defense_submission_id: Mapped[str | None] = mapped_column(StringUUID, nullable=True)
|
||||
|
||||
judge_output_raw = mapped_column(JSONB, nullable=True)
|
||||
categories = mapped_column(JSONB, nullable=True)
|
||||
judge_rating: Mapped[int | None] = mapped_column(sa.Integer, nullable=True)
|
||||
judge_feedback: Mapped[str | None] = mapped_column(sa.Text, nullable=True)
|
||||
red_points: Mapped[float] = mapped_column(sa.Float, nullable=False, server_default=sa.text("0"))
|
||||
blue_points: Mapped[float] = mapped_column(sa.Float, nullable=False, server_default=sa.text("0"))
|
||||
|
||||
tokens_total: Mapped[int | None] = mapped_column(sa.Integer, nullable=True)
|
||||
elapsed_ms: Mapped[int | None] = mapped_column(sa.Integer, nullable=True)
|
||||
|
||||
created_at: Mapped[datetime] = mapped_column(
|
||||
sa.DateTime,
|
||||
nullable=False,
|
||||
server_default=sa.func.current_timestamp(),
|
||||
)
|
||||
|
||||
|
|
@ -223,3 +223,6 @@ vdb = [
|
|||
"xinference-client~=1.2.2",
|
||||
"mo-vector~=0.1.13",
|
||||
]
|
||||
[tool.pyright]
|
||||
typeCheckingMode = "basic"
|
||||
reportImplicitRelativeImport = "none"
|
||||
|
|
|
|||
|
|
@ -8,7 +8,8 @@
|
|||
"extensions",
|
||||
"core/app/app_config/easy_ui_based_app/dataset"
|
||||
],
|
||||
"typeCheckingMode": "strict",
|
||||
"typeCheckingMode": "basic",
|
||||
"reportImplicitRelativeImport": "none",
|
||||
"allowedUntypedLibraries": [
|
||||
"flask_restx",
|
||||
"flask_login",
|
||||
|
|
|
|||
1
api/run.sh
Normal file
1
api/run.sh
Normal file
|
|
@ -0,0 +1 @@
|
|||
uv run --dev flask --app app run --host 0.0.0.0 --port 5001 --debug
|
||||
56
api/services/challenge_scorer_protocol.py
Normal file
56
api/services/challenge_scorer_protocol.py
Normal file
|
|
@ -0,0 +1,56 @@
|
|||
"""
|
||||
Challenge scorer protocol and type definitions.
|
||||
|
||||
Defines the interface for custom scoring plugins that compute
|
||||
numeric scores from attempt metrics for leaderboard ranking.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Protocol, TypedDict
|
||||
|
||||
|
||||
class ScoringContext(TypedDict, total=False):
|
||||
"""Context provided to scorer plugins."""
|
||||
|
||||
tenant_id: str
|
||||
app_id: str
|
||||
workflow_id: str
|
||||
challenge_id: str
|
||||
end_user_id: str | None
|
||||
timeout_ms: int
|
||||
|
||||
|
||||
class AttemptMetrics(TypedDict, total=False):
|
||||
"""Metrics from a challenge attempt."""
|
||||
|
||||
succeeded: bool
|
||||
tokens_total: int | None
|
||||
elapsed_ms: int | None
|
||||
rating: int | None
|
||||
created_at: int | None # epoch ms
|
||||
|
||||
|
||||
class ScoringResult(TypedDict, total=False):
|
||||
"""Result returned by scorer plugin."""
|
||||
|
||||
score: float
|
||||
details: dict[str, Any] | None
|
||||
|
||||
|
||||
class ScorerProtocol(Protocol):
|
||||
"""Protocol that all scorer plugins must implement."""
|
||||
|
||||
def score(self, metrics: AttemptMetrics, config: dict[str, Any], ctx: ScoringContext) -> ScoringResult:
|
||||
"""
|
||||
Compute a numeric score from attempt metrics.
|
||||
|
||||
Args:
|
||||
metrics: Attempt metrics (tokens, time, rating, etc.)
|
||||
config: Plugin-specific configuration (from challenge.scoring_config)
|
||||
ctx: Context with tenant_id, app_id, etc.
|
||||
|
||||
Returns:
|
||||
ScoringResult with computed score and optional details
|
||||
"""
|
||||
...
|
||||
112
api/services/challenge_scorer_service.py
Normal file
112
api/services/challenge_scorer_service.py
Normal file
|
|
@ -0,0 +1,112 @@
|
|||
"""
|
||||
Challenge scorer service.
|
||||
|
||||
Loads and invokes custom scorer plugins to compute scores from attempt metrics.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from typing import Any
|
||||
|
||||
from services.challenge_scorer_protocol import AttemptMetrics, ScoringContext, ScoringResult
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class ChallengeScorerService:
|
||||
"""Service for loading and invoking custom scorer plugins."""
|
||||
|
||||
_plugin_cache: dict[str, Any] = {}
|
||||
|
||||
@classmethod
|
||||
def score_with_plugin(
|
||||
cls,
|
||||
*,
|
||||
scorer_plugin_id: str | None,
|
||||
scorer_entrypoint: str | None,
|
||||
metrics: AttemptMetrics,
|
||||
config: dict[str, Any] | None,
|
||||
ctx: ScoringContext,
|
||||
) -> ScoringResult:
|
||||
"""
|
||||
Compute score using a custom scorer plugin.
|
||||
|
||||
Args:
|
||||
scorer_plugin_id: Plugin identifier (e.g., 'builtin.weighted_scorer')
|
||||
scorer_entrypoint: Entrypoint path (e.g., 'services.scorers.weighted:WeightedScorer')
|
||||
metrics: Attempt metrics to score
|
||||
config: Plugin-specific configuration
|
||||
ctx: Scoring context
|
||||
|
||||
Returns:
|
||||
ScoringResult with computed score
|
||||
|
||||
Raises:
|
||||
ValueError: If plugin cannot be loaded or scoring fails
|
||||
"""
|
||||
if not scorer_plugin_id or not scorer_entrypoint:
|
||||
raise ValueError("scorer_plugin_id and scorer_entrypoint are required for custom scoring")
|
||||
|
||||
# Load plugin
|
||||
scorer = cls._load_plugin(scorer_plugin_id, scorer_entrypoint)
|
||||
if not scorer:
|
||||
raise ValueError(f"Failed to load scorer plugin: {scorer_plugin_id}:{scorer_entrypoint}")
|
||||
|
||||
# Invoke scorer with timeout protection
|
||||
timeout_ms = ctx.get("timeout_ms", 5000)
|
||||
try:
|
||||
# TODO: Add timeout enforcement using threading.Timer or signal.alarm
|
||||
result = scorer.score(metrics, config or {}, ctx)
|
||||
if not isinstance(result, dict) or "score" not in result:
|
||||
raise ValueError("Scorer must return a dict with 'score' key")
|
||||
return result
|
||||
except Exception as e:
|
||||
logger.error(f"Scorer plugin {scorer_plugin_id} failed: {e}", exc_info=True)
|
||||
raise ValueError(f"Scorer plugin execution failed: {e}")
|
||||
|
||||
@classmethod
|
||||
def _load_plugin(cls, plugin_id: str, entrypoint: str) -> Any:
|
||||
"""
|
||||
Load a scorer plugin by entrypoint.
|
||||
|
||||
Args:
|
||||
plugin_id: Plugin identifier for caching
|
||||
entrypoint: Python path like 'pkg.module:ClassName'
|
||||
|
||||
Returns:
|
||||
Scorer instance or None if loading fails
|
||||
"""
|
||||
cache_key = f"{plugin_id}:{entrypoint}"
|
||||
if cache_key in cls._plugin_cache:
|
||||
return cls._plugin_cache[cache_key]
|
||||
|
||||
try:
|
||||
# Parse entrypoint: 'pkg.module:ClassName'
|
||||
if ":" not in entrypoint:
|
||||
raise ValueError(f"Invalid entrypoint format: {entrypoint}. Expected 'module:ClassName'")
|
||||
|
||||
module_path, class_name = entrypoint.split(":", 1)
|
||||
|
||||
# Dynamic import
|
||||
import importlib
|
||||
|
||||
module = importlib.import_module(module_path)
|
||||
scorer_class = getattr(module, class_name)
|
||||
|
||||
# Instantiate
|
||||
scorer = scorer_class()
|
||||
|
||||
# Cache it
|
||||
cls._plugin_cache[cache_key] = scorer
|
||||
logger.info(f"Loaded scorer plugin: {plugin_id} from {entrypoint}")
|
||||
return scorer
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to load scorer plugin {plugin_id}:{entrypoint}: {e}", exc_info=True)
|
||||
return None
|
||||
|
||||
@classmethod
|
||||
def clear_cache(cls) -> None:
|
||||
"""Clear the plugin cache (useful for testing)."""
|
||||
cls._plugin_cache.clear()
|
||||
64
api/services/challenge_service.py
Normal file
64
api/services/challenge_service.py
Normal file
|
|
@ -0,0 +1,64 @@
|
|||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from collections.abc import Mapping
|
||||
from typing import Any
|
||||
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from extensions.ext_database import db
|
||||
from models.challenge import Challenge, ChallengeAttempt
|
||||
|
||||
|
||||
class ChallengeService:
|
||||
@staticmethod
|
||||
def evaluate_outcome(output_text: str, cfg: Mapping[str, Any]) -> tuple[bool, dict[str, Any]]:
|
||||
success_type = cfg.get("success_type", "regex")
|
||||
pattern = cfg.get("success_pattern")
|
||||
if success_type == "regex" and pattern:
|
||||
try:
|
||||
if re.search(pattern, output_text, flags=re.IGNORECASE | re.MULTILINE):
|
||||
return True, {"mode": "regex", "matched": True}
|
||||
return False, {"mode": "regex", "matched": False}
|
||||
except re.error as e:
|
||||
return False, {"mode": "regex", "error": f"invalid_regex: {e}"}
|
||||
if success_type == "contains" and pattern:
|
||||
return (pattern.lower() in output_text.lower()), {"mode": "contains"}
|
||||
return False, {"mode": success_type, "info": "no_pattern_or_unsupported"}
|
||||
|
||||
@staticmethod
|
||||
def record_attempt(
|
||||
*,
|
||||
tenant_id: str,
|
||||
challenge_id: str,
|
||||
end_user_id: str | None,
|
||||
account_id: str | None,
|
||||
workflow_run_id: str | None,
|
||||
succeeded: bool,
|
||||
score: float | None = None,
|
||||
judge_rating: int | None = None,
|
||||
judge_feedback: str | None = None,
|
||||
judge_output_raw: dict[str, Any] | None = None,
|
||||
tokens_total: int | None = None,
|
||||
elapsed_ms: int | None = None,
|
||||
session: Session | None = None,
|
||||
) -> ChallengeAttempt:
|
||||
sess = session or db.session
|
||||
attempt = ChallengeAttempt()
|
||||
attempt.tenant_id = tenant_id
|
||||
attempt.challenge_id = challenge_id
|
||||
attempt.end_user_id = end_user_id
|
||||
attempt.account_id = account_id
|
||||
attempt.workflow_run_id = workflow_run_id
|
||||
attempt.succeeded = succeeded
|
||||
attempt.score = score
|
||||
attempt.judge_rating = judge_rating
|
||||
attempt.judge_feedback = judge_feedback
|
||||
attempt.judge_output_raw = judge_output_raw
|
||||
attempt.tokens_total = tokens_total
|
||||
attempt.elapsed_ms = elapsed_ms
|
||||
sess.add(attempt)
|
||||
sess.commit()
|
||||
return attempt
|
||||
|
||||
|
||||
91
api/services/red_blue_service.py
Normal file
91
api/services/red_blue_service.py
Normal file
|
|
@ -0,0 +1,91 @@
|
|||
from __future__ import annotations
|
||||
|
||||
from typing import Any
|
||||
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from extensions.ext_database import db
|
||||
from models.red_blue import RedBlueChallenge, TeamPairing, TeamSubmission
|
||||
|
||||
|
||||
class RedBlueService:
|
||||
@staticmethod
|
||||
def submit_prompt(
|
||||
*,
|
||||
challenge_id: str,
|
||||
tenant_id: str,
|
||||
team: str,
|
||||
prompt: str,
|
||||
account_id: str | None,
|
||||
end_user_id: str | None,
|
||||
session: Session | None = None,
|
||||
) -> TeamSubmission:
|
||||
sess = session or db.session
|
||||
sub = TeamSubmission()
|
||||
sub.red_blue_challenge_id = challenge_id
|
||||
sub.tenant_id = tenant_id
|
||||
sub.team = team
|
||||
sub.prompt = prompt
|
||||
sub.account_id = account_id
|
||||
sub.end_user_id = end_user_id
|
||||
sess.add(sub)
|
||||
sess.commit()
|
||||
return sub
|
||||
|
||||
@staticmethod
|
||||
def select_counterparty_submission(
|
||||
*,
|
||||
challenge: RedBlueChallenge,
|
||||
team: str,
|
||||
session: Session | None = None,
|
||||
) -> TeamSubmission | None:
|
||||
sess = session or db.session
|
||||
opposite = "blue" if team == "red" else "red"
|
||||
# Simplest policy: latest active from opposite team
|
||||
return (
|
||||
sess.query(TeamSubmission)
|
||||
.filter(
|
||||
TeamSubmission.red_blue_challenge_id == challenge.id,
|
||||
TeamSubmission.team == opposite,
|
||||
TeamSubmission.active.is_(True),
|
||||
)
|
||||
.order_by(TeamSubmission.created_at.desc())
|
||||
.first()
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def record_pairing(
|
||||
*,
|
||||
challenge_id: str,
|
||||
tenant_id: str,
|
||||
attack_submission_id: str | None,
|
||||
defense_submission_id: str | None,
|
||||
judge_output_raw: dict[str, Any] | None,
|
||||
categories: dict[str, Any] | None,
|
||||
judge_rating: int | None,
|
||||
judge_feedback: str | None,
|
||||
red_points: float,
|
||||
blue_points: float,
|
||||
tokens_total: int | None,
|
||||
elapsed_ms: int | None,
|
||||
session: Session | None = None,
|
||||
) -> TeamPairing:
|
||||
sess = session or db.session
|
||||
pairing = TeamPairing()
|
||||
pairing.red_blue_challenge_id = challenge_id
|
||||
pairing.tenant_id = tenant_id
|
||||
pairing.attack_submission_id = attack_submission_id
|
||||
pairing.defense_submission_id = defense_submission_id
|
||||
pairing.judge_output_raw = judge_output_raw
|
||||
pairing.categories = categories
|
||||
pairing.judge_rating = judge_rating
|
||||
pairing.judge_feedback = judge_feedback
|
||||
pairing.red_points = red_points
|
||||
pairing.blue_points = blue_points
|
||||
pairing.tokens_total = tokens_total
|
||||
pairing.elapsed_ms = elapsed_ms
|
||||
sess.add(pairing)
|
||||
sess.commit()
|
||||
return pairing
|
||||
|
||||
|
||||
144
api/services/scorers/README.md
Normal file
144
api/services/scorers/README.md
Normal file
|
|
@ -0,0 +1,144 @@
|
|||
# Custom Scorer Plugins
|
||||
|
||||
This directory contains custom scorer plugins for challenge leaderboards.
|
||||
|
||||
## Overview
|
||||
|
||||
Scorers compute numeric scores from challenge attempt metrics (tokens, time, rating, success) for ranking on leaderboards when `scoring_strategy = 'custom'`.
|
||||
|
||||
## Built-in Scorers
|
||||
|
||||
### WeightedScorer
|
||||
|
||||
**Entrypoint:** `services.scorers.weighted:WeightedScorer`
|
||||
|
||||
Computes a weighted score combining multiple metrics with configurable bonuses and penalties.
|
||||
|
||||
**Formula:**
|
||||
```
|
||||
score = success_bonus
|
||||
+ (rating × rating_weight)
|
||||
- (elapsed_seconds × time_penalty)
|
||||
- (tokens × token_penalty)
|
||||
```
|
||||
|
||||
**Configuration:**
|
||||
- `success_bonus` (float, default: 100): Base points for successful attempts
|
||||
- `rating_weight` (float, default: 10): Multiplier for judge rating (0-10)
|
||||
- `time_penalty` (float, default: 1.0): Penalty per second elapsed
|
||||
- `token_penalty` (float, default: 0.01): Penalty per token used
|
||||
|
||||
**Example Configuration:**
|
||||
```json
|
||||
{
|
||||
"success_bonus": 100.0,
|
||||
"rating_weight": 10.0,
|
||||
"time_penalty": 1.0,
|
||||
"token_penalty": 0.01
|
||||
}
|
||||
```
|
||||
|
||||
**Example Challenge Setup (via API):**
|
||||
```python
|
||||
{
|
||||
"name": "Advanced Prompt Challenge",
|
||||
"scoring_strategy": "custom",
|
||||
"scoring_plugin_id": "builtin.weighted_scorer",
|
||||
"scoring_entrypoint": "services.scorers.weighted:WeightedScorer",
|
||||
"scoring_config": {
|
||||
"success_bonus": 100.0,
|
||||
"rating_weight": 15.0,
|
||||
"time_penalty": 0.5,
|
||||
"token_penalty": 0.02
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Creating Custom Scorers
|
||||
|
||||
### 1. Implement the ScorerProtocol
|
||||
|
||||
Create a new file in this directory (e.g., `custom.py`):
|
||||
|
||||
```python
|
||||
from typing import Any
|
||||
from services.challenge_scorer_protocol import AttemptMetrics, ScoringContext, ScoringResult
|
||||
|
||||
class MyCustomScorer:
|
||||
def score(self, metrics: AttemptMetrics, config: dict[str, Any], ctx: ScoringContext) -> ScoringResult:
|
||||
# Access metrics
|
||||
succeeded = metrics.get('succeeded', False)
|
||||
tokens = metrics.get('tokens_total', 0)
|
||||
elapsed_ms = metrics.get('elapsed_ms', 0)
|
||||
rating = metrics.get('rating', 0)
|
||||
|
||||
# Access configuration
|
||||
multiplier = config.get('multiplier', 1.0)
|
||||
|
||||
# Compute score
|
||||
score = (rating * multiplier) if succeeded else 0.0
|
||||
|
||||
return {
|
||||
'score': score,
|
||||
'details': { # optional
|
||||
'multiplier_used': multiplier
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### 2. Register in Challenge
|
||||
|
||||
Set the challenge's scoring fields:
|
||||
|
||||
```python
|
||||
challenge.scoring_strategy = 'custom'
|
||||
challenge.scoring_plugin_id = 'my_custom_scorer'
|
||||
challenge.scoring_entrypoint = 'services.scorers.custom:MyCustomScorer'
|
||||
challenge.scoring_config = {
|
||||
'multiplier': 2.0
|
||||
}
|
||||
```
|
||||
|
||||
### 3. Testing
|
||||
|
||||
Create tests in `api/tests/unit_tests/services/` following the pattern in `test_challenge_scorer_service.py`.
|
||||
|
||||
## Protocol Reference
|
||||
|
||||
### Input Types
|
||||
|
||||
**AttemptMetrics:**
|
||||
- `succeeded` (bool): Whether the challenge was passed
|
||||
- `tokens_total` (int | None): Total tokens used
|
||||
- `elapsed_ms` (int | None): Time taken in milliseconds
|
||||
- `rating` (int | None): Judge rating (0-10)
|
||||
- `created_at` (int | None): Timestamp in epoch milliseconds
|
||||
|
||||
**ScoringContext:**
|
||||
- `tenant_id` (str): Tenant identifier
|
||||
- `app_id` (str): Application identifier
|
||||
- `workflow_id` (str): Workflow identifier
|
||||
- `challenge_id` (str): Challenge identifier
|
||||
- `end_user_id` (str | None): End user identifier (if available)
|
||||
- `timeout_ms` (int): Maximum execution time
|
||||
|
||||
### Output Type
|
||||
|
||||
**ScoringResult:**
|
||||
- `score` (float, required): Computed numeric score
|
||||
- `details` (dict[str, Any] | None, optional): Additional scoring details
|
||||
|
||||
## Error Handling
|
||||
|
||||
- Scorers must return a dict with a `score` key
|
||||
- Exceptions are caught and logged; the attempt is recorded with `score=None`
|
||||
- Scorers are executed with a timeout (default: 5s)
|
||||
- Scorers should never return negative scores; use `max(score, 0.0)` to clamp
|
||||
|
||||
## Best Practices
|
||||
|
||||
1. **Keep it simple**: Scoring should be fast and deterministic
|
||||
2. **Validate config**: Check configuration values and provide defaults
|
||||
3. **Clamp scores**: Ensure scores are non-negative
|
||||
4. **Document formula**: Clearly explain how your scorer works
|
||||
5. **Test edge cases**: Test with missing metrics, zeros, nulls
|
||||
1
api/services/scorers/__init__.py
Normal file
1
api/services/scorers/__init__.py
Normal file
|
|
@ -0,0 +1 @@
|
|||
"""Built-in scorer plugins."""
|
||||
66
api/services/scorers/weighted.py
Normal file
66
api/services/scorers/weighted.py
Normal file
|
|
@ -0,0 +1,66 @@
|
|||
"""
|
||||
Weighted scorer plugin.
|
||||
|
||||
Computes a weighted score based on success bonus, rating, elapsed time, and token usage.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any
|
||||
|
||||
from services.challenge_scorer_protocol import AttemptMetrics, ScoringContext, ScoringResult
|
||||
|
||||
|
||||
class WeightedScorer:
|
||||
"""
|
||||
Example weighted scorer that combines multiple metrics.
|
||||
|
||||
Configuration options:
|
||||
- success_bonus (float): Base points for successful attempt (default: 100)
|
||||
- rating_weight (float): Multiplier for judge rating (default: 10)
|
||||
- time_penalty (float): Penalty per second elapsed (default: 1.0)
|
||||
- token_penalty (float): Penalty per token used (default: 0.01)
|
||||
|
||||
Formula:
|
||||
score = success_bonus
|
||||
+ (rating * rating_weight)
|
||||
- (elapsed_seconds * time_penalty)
|
||||
- (tokens * token_penalty)
|
||||
"""
|
||||
|
||||
def score(self, metrics: AttemptMetrics, config: dict[str, Any], ctx: ScoringContext) -> ScoringResult:
|
||||
"""Compute weighted score from metrics."""
|
||||
# Base score for success
|
||||
base = 0.0
|
||||
if metrics.get("succeeded"):
|
||||
base += config.get("success_bonus", 100.0)
|
||||
|
||||
# Add rating contribution
|
||||
rating = metrics.get("rating") or 0
|
||||
rating_weight = config.get("rating_weight", 10.0)
|
||||
rating_score = rating * rating_weight
|
||||
|
||||
# Subtract time penalty
|
||||
elapsed_ms = metrics.get("elapsed_ms") or 0
|
||||
elapsed_seconds = elapsed_ms / 1000.0
|
||||
time_penalty = config.get("time_penalty", 1.0)
|
||||
time_score = elapsed_seconds * time_penalty
|
||||
|
||||
# Subtract token penalty
|
||||
tokens = metrics.get("tokens_total") or 0
|
||||
token_penalty = config.get("token_penalty", 0.01)
|
||||
token_score = tokens * token_penalty
|
||||
|
||||
# Compute final score (never negative)
|
||||
final_score = base + rating_score - time_score - token_score
|
||||
final_score = max(final_score, 0.0)
|
||||
|
||||
return {
|
||||
"score": final_score,
|
||||
"details": {
|
||||
"base": base,
|
||||
"rating_contribution": rating_score,
|
||||
"time_penalty": time_score,
|
||||
"token_penalty": token_score,
|
||||
},
|
||||
}
|
||||
|
|
@ -0,0 +1,14 @@
|
|||
from __future__ import annotations
|
||||
|
||||
from flask.testing import FlaskClient
|
||||
|
||||
|
||||
class TestWebChallenges:
|
||||
def test_list_and_detail(self, test_client_with_containers: FlaskClient):
|
||||
# list
|
||||
resp = test_client_with_containers.get("/api/web/challenges")
|
||||
assert resp.status_code == 200
|
||||
data = resp.get_json()
|
||||
assert data["result"] == "success"
|
||||
|
||||
|
||||
|
|
@ -0,0 +1,13 @@
|
|||
from __future__ import annotations
|
||||
|
||||
from flask.testing import FlaskClient
|
||||
|
||||
|
||||
class TestWebRedBlueChallenges:
|
||||
def test_list(self, test_client_with_containers: FlaskClient):
|
||||
resp = test_client_with_containers.get("/api/web/red-blue-challenges")
|
||||
assert resp.status_code == 200
|
||||
data = resp.get_json()
|
||||
assert data["result"] == "success"
|
||||
|
||||
|
||||
144
api/tests/unit_tests/services/test_challenge_scorer_service.py
Normal file
144
api/tests/unit_tests/services/test_challenge_scorer_service.py
Normal file
|
|
@ -0,0 +1,144 @@
|
|||
"""Tests for ChallengeScorerService."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
|
||||
from services.challenge_scorer_service import ChallengeScorerService
|
||||
|
||||
|
||||
class TestChallengeScorerService:
|
||||
"""Test custom scorer plugin loading and execution."""
|
||||
|
||||
def test_weighted_scorer_success(self):
|
||||
"""Test WeightedScorer with successful attempt."""
|
||||
metrics = {
|
||||
"succeeded": True,
|
||||
"tokens_total": 1000,
|
||||
"elapsed_ms": 5000, # 5 seconds
|
||||
"rating": 8,
|
||||
"created_at": 1730000000000,
|
||||
}
|
||||
|
||||
config = {
|
||||
"success_bonus": 100.0,
|
||||
"rating_weight": 10.0,
|
||||
"time_penalty": 1.0,
|
||||
"token_penalty": 0.01,
|
||||
}
|
||||
|
||||
ctx = {
|
||||
"tenant_id": "test-tenant",
|
||||
"app_id": "test-app",
|
||||
"workflow_id": "test-workflow",
|
||||
"challenge_id": "test-challenge",
|
||||
"timeout_ms": 5000,
|
||||
}
|
||||
|
||||
result = ChallengeScorerService.score_with_plugin(
|
||||
scorer_plugin_id="builtin.weighted_scorer",
|
||||
scorer_entrypoint="services.scorers.weighted:WeightedScorer",
|
||||
metrics=metrics,
|
||||
config=config,
|
||||
ctx=ctx,
|
||||
)
|
||||
|
||||
# Expected: 100 (success) + 80 (8*10 rating) - 5 (5s*1.0) - 10 (1000*0.01) = 165
|
||||
assert result["score"] == 165.0
|
||||
assert "details" in result
|
||||
assert result["details"]["base"] == 100.0
|
||||
assert result["details"]["rating_contribution"] == 80.0
|
||||
assert result["details"]["time_penalty"] == 5.0
|
||||
assert result["details"]["token_penalty"] == 10.0
|
||||
|
||||
def test_weighted_scorer_failure(self):
|
||||
"""Test WeightedScorer with failed attempt."""
|
||||
metrics = {
|
||||
"succeeded": False,
|
||||
"tokens_total": 500,
|
||||
"elapsed_ms": 2000, # 2 seconds
|
||||
"rating": 3,
|
||||
"created_at": 1730000000000,
|
||||
}
|
||||
|
||||
config = {
|
||||
"success_bonus": 100.0,
|
||||
"rating_weight": 10.0,
|
||||
"time_penalty": 1.0,
|
||||
"token_penalty": 0.01,
|
||||
}
|
||||
|
||||
ctx = {
|
||||
"tenant_id": "test-tenant",
|
||||
"app_id": "test-app",
|
||||
"challenge_id": "test-challenge",
|
||||
"timeout_ms": 5000,
|
||||
}
|
||||
|
||||
result = ChallengeScorerService.score_with_plugin(
|
||||
scorer_plugin_id="builtin.weighted_scorer",
|
||||
scorer_entrypoint="services.scorers.weighted:WeightedScorer",
|
||||
metrics=metrics,
|
||||
config=config,
|
||||
ctx=ctx,
|
||||
)
|
||||
|
||||
# Expected: 0 (no success bonus) + 30 (3*10) - 2 (2s*1.0) - 5 (500*0.01) = 23
|
||||
assert result["score"] == 23.0
|
||||
|
||||
def test_weighted_scorer_minimum_zero(self):
|
||||
"""Test WeightedScorer never returns negative scores."""
|
||||
metrics = {
|
||||
"succeeded": False,
|
||||
"tokens_total": 10000, # High token count
|
||||
"elapsed_ms": 30000, # 30 seconds
|
||||
"rating": 1,
|
||||
"created_at": 1730000000000,
|
||||
}
|
||||
|
||||
config = {
|
||||
"success_bonus": 100.0,
|
||||
"rating_weight": 10.0,
|
||||
"time_penalty": 1.0,
|
||||
"token_penalty": 0.01,
|
||||
}
|
||||
|
||||
ctx = {
|
||||
"tenant_id": "test-tenant",
|
||||
"app_id": "test-app",
|
||||
"challenge_id": "test-challenge",
|
||||
"timeout_ms": 5000,
|
||||
}
|
||||
|
||||
result = ChallengeScorerService.score_with_plugin(
|
||||
scorer_plugin_id="builtin.weighted_scorer",
|
||||
scorer_entrypoint="services.scorers.weighted:WeightedScorer",
|
||||
metrics=metrics,
|
||||
config=config,
|
||||
ctx=ctx,
|
||||
)
|
||||
|
||||
# Expected: 0 + 10 - 30 - 100 = -120, but clamped to 0
|
||||
assert result["score"] == 0.0
|
||||
|
||||
def test_scorer_with_missing_plugin(self):
|
||||
"""Test error handling for missing plugin."""
|
||||
with pytest.raises(ValueError, match="Failed to load scorer plugin"):
|
||||
ChallengeScorerService.score_with_plugin(
|
||||
scorer_plugin_id="nonexistent",
|
||||
scorer_entrypoint="nonexistent.module:NonexistentScorer",
|
||||
metrics={},
|
||||
config={},
|
||||
ctx={"timeout_ms": 5000},
|
||||
)
|
||||
|
||||
def test_scorer_with_invalid_entrypoint(self):
|
||||
"""Test error handling for invalid entrypoint format."""
|
||||
with pytest.raises(ValueError, match="scorer_plugin_id and scorer_entrypoint are required"):
|
||||
ChallengeScorerService.score_with_plugin(
|
||||
scorer_plugin_id=None,
|
||||
scorer_entrypoint=None,
|
||||
metrics={},
|
||||
config={},
|
||||
ctx={"timeout_ms": 5000},
|
||||
)
|
||||
40
api/tests/unit_tests/services/test_challenge_service.py
Normal file
40
api/tests/unit_tests/services/test_challenge_service.py
Normal file
|
|
@ -0,0 +1,40 @@
|
|||
from __future__ import annotations
|
||||
|
||||
from models.challenge import ChallengeAttempt
|
||||
from services.challenge_service import ChallengeService
|
||||
|
||||
|
||||
def test_evaluate_outcome_regex_match():
|
||||
ok, details = ChallengeService.evaluate_outcome(
|
||||
"Hello SECRET",
|
||||
{"success_type": "regex", "success_pattern": "secret"},
|
||||
)
|
||||
assert ok is True
|
||||
assert details.get("mode") == "regex"
|
||||
|
||||
|
||||
def test_evaluate_outcome_contains():
|
||||
ok, _ = ChallengeService.evaluate_outcome(
|
||||
"hello world",
|
||||
{"success_type": "contains", "success_pattern": "world"},
|
||||
)
|
||||
assert ok is True
|
||||
|
||||
|
||||
def test_record_attempt_creates_row(mocker):
|
||||
# mock db.session
|
||||
session = mocker.MagicMock()
|
||||
attempt = ChallengeService.record_attempt(
|
||||
tenant_id="t1",
|
||||
challenge_id="c1",
|
||||
end_user_id=None,
|
||||
account_id=None,
|
||||
workflow_run_id=None,
|
||||
succeeded=True,
|
||||
score=10.0,
|
||||
session=session,
|
||||
)
|
||||
assert isinstance(attempt, ChallengeAttempt)
|
||||
session.add.assert_called_once()
|
||||
session.commit.assert_called_once()
|
||||
|
||||
34
api/tests/unit_tests/services/test_red_blue_service.py
Normal file
34
api/tests/unit_tests/services/test_red_blue_service.py
Normal file
|
|
@ -0,0 +1,34 @@
|
|||
from __future__ import annotations
|
||||
|
||||
from types import SimpleNamespace
|
||||
|
||||
from services.red_blue_service import RedBlueService
|
||||
|
||||
|
||||
def test_submit_prompt_creates_submission(mocker):
|
||||
session = mocker.MagicMock()
|
||||
sub = RedBlueService.submit_prompt(
|
||||
challenge_id="cid",
|
||||
tenant_id="tid",
|
||||
team="red",
|
||||
prompt="attack",
|
||||
account_id="aid",
|
||||
end_user_id="eid",
|
||||
session=session,
|
||||
)
|
||||
assert sub.team == "red"
|
||||
session.add.assert_called_once()
|
||||
session.commit.assert_called_once()
|
||||
|
||||
|
||||
def test_select_counterparty_submission_latest_active(mocker):
|
||||
c = SimpleNamespace(id="cid")
|
||||
session = mocker.MagicMock()
|
||||
qs = (
|
||||
session.query.return_value.filter.return_value.order_by.return_value
|
||||
)
|
||||
qs.first.return_value = SimpleNamespace(id="subid", team="blue")
|
||||
sub = RedBlueService.select_counterparty_submission(challenge=c, team="red", session=session)
|
||||
assert sub.team == "blue"
|
||||
|
||||
|
||||
633
docs/design/prompt-challenges.md
Normal file
633
docs/design/prompt-challenges.md
Normal file
|
|
@ -0,0 +1,633 @@
|
|||
## Prompt Hacking Challenges — Design
|
||||
|
||||
### Overview
|
||||
|
||||
Enable developer-authored prompt hacking challenges inside Dify’s workflow builder via a new workflow node. Players can register/login using the existing web auth and compete on challenges. Attempts are recorded server-side and leaderboards are exposed via public web APIs.
|
||||
|
||||
### Goals
|
||||
|
||||
- Add a first-class workflow node that evaluates success/failure against developer-specified criteria.
|
||||
- Add a Judging LLM node that compares model outputs to the challenge goal and produces pass/fail, textual feedback, and a 1–10 rating.
|
||||
- Persist attempts with metadata for scoring and leaderboards.
|
||||
- Reuse existing account/web auth for players.
|
||||
- Fit Dify’s DDD/Clean Architecture: models, services, controllers, workflow nodes, and frontend builder integration.
|
||||
|
||||
### Non-Goals
|
||||
|
||||
- Anti-cheat measures beyond simple rate limiting.
|
||||
- Complex custom scoring plugins (design leaves a hook for future work).
|
||||
|
||||
## Architecture Summary
|
||||
|
||||
### Backend components
|
||||
|
||||
- Models (SQLAlchemy)
|
||||
- Challenge
|
||||
- id, tenant_id, app_id, workflow_id
|
||||
- name, description, goal (plain text shown to players)
|
||||
- success_type: one of ['regex', 'contains', 'custom']
|
||||
- success_pattern: string (regex or substring depending on type)
|
||||
- secret_ref: reference to server-side secret (never exposed to clients)
|
||||
- scoring_strategy: one of ['first', 'fastest', 'fewest_tokens', 'custom']
|
||||
- is_active: bool
|
||||
- created_by, created_at, updated_by, updated_at
|
||||
- ChallengeAttempt
|
||||
- id, tenant_id, challenge_id (FK), end_user_id (FK), workflow_run_id (optional FK)
|
||||
- succeeded: bool
|
||||
- score: numeric (meaning depends on strategy)
|
||||
- judge_rating: int (0–10)
|
||||
- judge_feedback: text
|
||||
- judge_output_raw: jsonb (optional; structured judgement payload)
|
||||
- tokens_total: int (when available from run metrics)
|
||||
- elapsed_ms: int (when available)
|
||||
- created_at
|
||||
|
||||
- Service layer (e.g., `ChallengeService`, `ChallengeJudgeService`)
|
||||
- evaluate_outcome(output, cfg) -> (succeeded: bool, details: dict)
|
||||
- judge_with_llm(goal, response, cfg) -> { passed: bool, rating: int, feedback: str, raw?: dict }
|
||||
- evaluate_with_plugin(evaluator_ref, goal, response, ctx) -> { passed: bool, rating?: int, feedback?: str, raw?: dict }
|
||||
- score_with_plugin(scorer_ref, attempt_metrics, ctx) -> { score: number, details?: dict }
|
||||
- record_attempt(tenant_id, challenge_id, end_user_id, run_meta, succeeded) -> ChallengeAttempt
|
||||
- get_leaderboard(challenge_id, limit, strategy) -> list
|
||||
- get_challenge_public(challenge_id) -> dict
|
||||
|
||||
- Controllers
|
||||
- Console (for creators): CRUD on challenges under the workspace (`/console/api/challenges`)
|
||||
- Web (for players): public endpoints under `/web/api/challenges`
|
||||
- List active challenges, fetch details, fetch leaderboard
|
||||
- Optional auth via existing web login for personalization, otherwise anonymous read
|
||||
|
||||
- Workflow nodes
|
||||
- NodeType: `challenge-evaluator`
|
||||
- Config
|
||||
- `challenge_id`: reference to a stored Challenge (preferred)
|
||||
- or inline config: `success_type`, `success_pattern`, `scoring_strategy`
|
||||
- `mask_variables`: string[] — variable names to redact in logs
|
||||
- Execution
|
||||
- Consumes upstream content (typically latest assistant output)
|
||||
- Evaluates success with `ChallengeService.evaluate_outcome`
|
||||
- If an `EndUser` context exists and a `challenge_id` is present, writes `ChallengeAttempt`
|
||||
- Outputs `{ challenge_succeeded: boolean, message?: string }`, optionally passes through original output
|
||||
- NodeType: `judging-llm`
|
||||
- Purpose: judge a model response against the challenge goal using an LLM rubric.
|
||||
- Config
|
||||
- `judge_model`: provider/name/version
|
||||
- `temperature`, `max_tokens`, other model params
|
||||
- `rubric_prompt_template`: template with placeholders for {goal}, {response}, optional {hints}
|
||||
- `rating_scale`: default 0–10; configurable upper bound optional
|
||||
- `pass_threshold`: integer (default 5)
|
||||
- Inputs
|
||||
- `goal`: the attacking goal or acceptance criteria
|
||||
- `response`: the model output to evaluate
|
||||
- Execution
|
||||
- Calls `ChallengeJudgeService.judge_with_llm()` to obtain structured judgement
|
||||
- Returns outputs `{ judge_passed: boolean, judge_rating: number (0–10), judge_feedback: string, judge_raw?: object }`
|
||||
- Integration
|
||||
- Downstream `challenge-evaluator` can consume `judge_passed` and `judge_rating` to record an attempt instead of regex/contains
|
||||
- Alternatively, `challenge-evaluator` may support an `evaluation_mode: 'rules' | 'llm-judge'` to invoke judging internally
|
||||
- NodeType: `team-challenge` (Red/Blue orchestrator)
|
||||
- Purpose: orchestrate two-sided challenges where players choose Red (attack) or Blue (defense) and submit prompts accordingly. The node pairs attacks and defenses, configures the LLM, invokes judging, and emits scores.
|
||||
- Config
|
||||
- `red_blue_challenge_id`: reference to a Red/Blue challenge definition
|
||||
- `defense_selection_policy`: 'latest_best' | 'random_active' | 'round_robin' | 'request_new_if_none'
|
||||
- `attack_selection_policy`: same options for the defense side evaluation path
|
||||
- `judge_suite`: list of category tests to run (e.g., CBRNE, SA, SH, RWH, V, M)
|
||||
- `scoring_strategy`: 'red_blue_ratio' | 'custom'
|
||||
- Inputs
|
||||
- `team_choice`: 'red' | 'blue'
|
||||
- `attack_prompt?`: string (when `team_choice = 'red'`)
|
||||
- `defense_prompt?`: string (when `team_choice = 'blue'`)
|
||||
- Execution (high level)
|
||||
- If `team_choice = 'red'`:
|
||||
- Persist `attack_prompt` submission
|
||||
- Load a defense by policy; if none, optionally signal Blue to provide one (async) and fall back to last known
|
||||
- Configure LLM with defense as system prompt, submit attack as user message
|
||||
- Run `judge_suite` via `judging-llm`; compute Red score
|
||||
- If `team_choice = 'blue'`:
|
||||
- Persist `defense_prompt` submission
|
||||
- Load an attack by policy; if none, signal Red to provide one (async) and fall back to last known
|
||||
- Configure LLM with defense as system prompt and submit the loaded attack
|
||||
- Run `judge_suite`; compute Blue score (prevention)
|
||||
- Persist pairing and metrics
|
||||
- Outputs
|
||||
- `{ team: 'red'|'blue', judge_passed: boolean, judge_rating: number, judge_feedback: string, categories: Record<string, boolean|number>, team_points: number, total_points: number }`
|
||||
|
||||
### Frontend components
|
||||
|
||||
- Workflow builder
|
||||
- Add `Prompt Challenge` to the node palette
|
||||
- Add `Judging LLM` to the node palette
|
||||
- Node editor panel: select existing Challenge or define inline success criteria
|
||||
- Judging panel: choose model, edit rubric prompt, set pass threshold, preview structured outputs
|
||||
- Custom evaluator/scorer panels: choose plugin and configure JSON settings with live schema validation
|
||||
- I18n strings in `web/i18n/en-US/`
|
||||
- Challenge display & theming
|
||||
- Author-provided instructions (Markdown) render before/alongside the task input area
|
||||
- Theme tokens (colors, logo, background) applied to challenge pages
|
||||
- Optional hero image/video via existing `UploadFile` and signed URLs
|
||||
|
||||
- Optional player UX (phase 2)
|
||||
- `/challenges` list and `/challenges/[id]` details with leaderboard
|
||||
- `/challenge-collections` list and `/challenge-collections/[id]` details with collection leaderboard
|
||||
- Use existing web login endpoints
|
||||
|
||||
## Data Model
|
||||
|
||||
Minimal table shapes (final columns managed in migration):
|
||||
|
||||
```sql
|
||||
-- challenges
|
||||
id (uuid pk)
|
||||
tenant_id (uuid fk)
|
||||
app_id (uuid fk)
|
||||
workflow_id (uuid fk)
|
||||
name (text)
|
||||
description (text)
|
||||
goal (text)
|
||||
success_type (text)
|
||||
success_pattern (text)
|
||||
secret_ref (text)
|
||||
scoring_strategy (text)
|
||||
is_active (bool)
|
||||
created_by (uuid)
|
||||
created_at (timestamp)
|
||||
updated_by (uuid)
|
||||
updated_at (timestamp)
|
||||
|
||||
-- challenge_attempts
|
||||
id (uuid pk)
|
||||
tenant_id (uuid fk)
|
||||
challenge_id (uuid fk)
|
||||
end_user_id (uuid fk)
|
||||
workflow_run_id (uuid fk, nullable)
|
||||
succeeded (bool)
|
||||
score (numeric)
|
||||
tokens_total (int)
|
||||
elapsed_ms (int)
|
||||
created_at (timestamp)
|
||||
```
|
||||
|
||||
Additional columns for judging:
|
||||
|
||||
```sql
|
||||
ALTER TABLE challenge_attempts
|
||||
ADD COLUMN judge_rating integer,
|
||||
ADD COLUMN judge_feedback text,
|
||||
ADD COLUMN judge_output_raw jsonb;
|
||||
```
|
||||
|
||||
Optional columns for custom evaluators/scorers:
|
||||
|
||||
```sql
|
||||
ALTER TABLE challenges
|
||||
ADD COLUMN evaluator_type text DEFAULT 'rules', -- one of: rules, llm-judge, custom
|
||||
ADD COLUMN evaluator_plugin_id text,
|
||||
ADD COLUMN evaluator_entrypoint text, -- e.g., "pkg.module:Evaluator"
|
||||
ADD COLUMN evaluator_config jsonb,
|
||||
ADD COLUMN scoring_plugin_id text,
|
||||
ADD COLUMN scoring_entrypoint text, -- e.g., "pkg.module:Scorer"
|
||||
ADD COLUMN scoring_config jsonb;
|
||||
```
|
||||
|
||||
Additional tables for Red/Blue team challenges:
|
||||
|
||||
```sql
|
||||
-- red_blue_challenges (definition)
|
||||
CREATE TABLE red_blue_challenges (
|
||||
id uuid PRIMARY KEY DEFAULT uuid_generate_v4(),
|
||||
tenant_id uuid NOT NULL REFERENCES tenants(id),
|
||||
app_id uuid NOT NULL REFERENCES apps(id),
|
||||
workflow_id uuid REFERENCES workflows(id),
|
||||
name text NOT NULL,
|
||||
description text,
|
||||
judge_suite jsonb NOT NULL, -- list of categories/tests
|
||||
defense_selection_policy text NOT NULL DEFAULT 'latest_best',
|
||||
attack_selection_policy text NOT NULL DEFAULT 'latest_best',
|
||||
scoring_strategy text NOT NULL DEFAULT 'red_blue_ratio',
|
||||
theme jsonb,
|
||||
instructions_md text,
|
||||
is_active boolean NOT NULL DEFAULT true,
|
||||
created_by uuid,
|
||||
created_at timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||
updated_by uuid,
|
||||
updated_at timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP
|
||||
);
|
||||
|
||||
-- team_submissions (attack/defense prompts)
|
||||
CREATE TABLE team_submissions (
|
||||
id uuid PRIMARY KEY DEFAULT uuid_generate_v4(),
|
||||
red_blue_challenge_id uuid NOT NULL REFERENCES red_blue_challenges(id) ON DELETE CASCADE,
|
||||
tenant_id uuid NOT NULL,
|
||||
account_id uuid NULL,
|
||||
end_user_id uuid NULL,
|
||||
team text NOT NULL CHECK (team in ('red','blue')),
|
||||
prompt text NOT NULL,
|
||||
active boolean NOT NULL DEFAULT true,
|
||||
created_at timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP
|
||||
);
|
||||
|
||||
-- pairings (which attack tested against which defense)
|
||||
CREATE TABLE team_pairings (
|
||||
id uuid PRIMARY KEY DEFAULT uuid_generate_v4(),
|
||||
red_blue_challenge_id uuid NOT NULL REFERENCES red_blue_challenges(id) ON DELETE CASCADE,
|
||||
tenant_id uuid NOT NULL,
|
||||
attack_submission_id uuid REFERENCES team_submissions(id),
|
||||
defense_submission_id uuid REFERENCES team_submissions(id),
|
||||
judge_output_raw jsonb,
|
||||
categories jsonb, -- e.g., per-suite pass/fail or rating
|
||||
judge_rating integer,
|
||||
judge_feedback text,
|
||||
red_points numeric NOT NULL DEFAULT 0,
|
||||
blue_points numeric NOT NULL DEFAULT 0,
|
||||
tokens_total int,
|
||||
elapsed_ms int,
|
||||
created_at timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP
|
||||
);
|
||||
```
|
||||
|
||||
## API Design
|
||||
|
||||
### Console (creator)
|
||||
|
||||
- `GET /console/api/challenges?app_id=...` — list
|
||||
- `POST /console/api/challenges` — create
|
||||
- `GET /console/api/challenges/{id}` — retrieve
|
||||
- `PATCH /console/api/challenges/{id}` — update
|
||||
- `DELETE /console/api/challenges/{id}` — delete
|
||||
|
||||
All require console `login_required` and membership in tenant.
|
||||
|
||||
### Web (player)
|
||||
|
||||
- `GET /web/api/challenges` — list active challenges (public)
|
||||
- `GET /web/api/challenges/{id}` — details (public)
|
||||
- `GET /web/api/challenges/{id}/leaderboard?limit=...` — leaderboard (public)
|
||||
|
||||
Player login uses existing web login endpoints to obtain access token when needed for personalization.
|
||||
|
||||
### Collections
|
||||
|
||||
Console (creator)
|
||||
|
||||
- `GET /console/api/challenge-collections?app_id=...`
|
||||
- `POST /console/api/challenge-collections`
|
||||
- `GET /console/api/challenge-collections/{id}`
|
||||
- `PATCH /console/api/challenge-collections/{id}`
|
||||
- `DELETE /console/api/challenge-collections/{id}`
|
||||
- `PUT /console/api/challenge-collections/{id}/challenges` (set membership and order)
|
||||
|
||||
Web (player)
|
||||
|
||||
- `GET /web/api/challenge-collections` — list public collections
|
||||
- `GET /web/api/challenge-collections/{id}` — collection details (instructions/theme), included challenges
|
||||
- `GET /web/api/challenge-collections/{id}/leaderboard?limit=...` — collection leaderboard
|
||||
|
||||
### Red/Blue team challenge APIs
|
||||
|
||||
Console (creator)
|
||||
|
||||
- `POST /console/api/red-blue-challenges` — create
|
||||
- `GET /console/api/red-blue-challenges?app_id=...` — list
|
||||
- `GET /console/api/red-blue-challenges/{id}` — detail
|
||||
- `PATCH /console/api/red-blue-challenges/{id}` — update
|
||||
- `DELETE /console/api/red-blue-challenges/{id}` — delete
|
||||
- `GET /console/api/red-blue-challenges/{id}/pairings` — view pairings/metrics
|
||||
|
||||
Web (player)
|
||||
|
||||
- `POST /web/api/red-blue-challenges/{id}/join` — join red or blue (payload: { team })
|
||||
- `POST /web/api/red-blue-challenges/{id}/submit` — submit attack/defense (payload: { team, prompt })
|
||||
- `GET /web/api/red-blue-challenges/{id}` — public info (instructions, theme, leaderboard snapshot)
|
||||
- `GET /web/api/red-blue-challenges/{id}/leaderboard?limit=...` — red vs blue standings
|
||||
|
||||
## Player Registration & Identity
|
||||
|
||||
### Registration and login
|
||||
|
||||
- Reuse existing web auth service for player accounts:
|
||||
- Email/password login: `POST /web/api/login`
|
||||
- Email code login: `POST /web/api/login/email-code/send` + `POST /web/api/login/email-code/verify` (existing patterns)
|
||||
- Add an explicit web registration endpoint (thin wrapper around `RegisterService.register`):
|
||||
- `POST /web/api/register` (payload: email, name, password | email-code)
|
||||
- Behavior:
|
||||
- `create_workspace_required = False` to avoid auto-creating workspaces for players
|
||||
- `status = active`
|
||||
- Set `interface_language` from `Accept-Language` as done in OAuth flow
|
||||
- On success, also create or associate a per-tenant `EndUser` record so gameplay runs can be attributed consistently.
|
||||
|
||||
### Player identity during runs
|
||||
|
||||
- Each gameplay run already has an `EndUser` context. For registered players:
|
||||
- When a player is authenticated, resolve (or lazily create) an `EndUser` tied to their `account_id` for the current tenant/app
|
||||
- Persist `end_user_id` to `ChallengeAttempt` as today; optionally also store `account_id` for simplified leaderboard personalization
|
||||
|
||||
### Optional schema addition
|
||||
|
||||
```sql
|
||||
ALTER TABLE challenge_attempts
|
||||
ADD COLUMN account_id uuid NULL;
|
||||
```
|
||||
|
||||
This enables direct joins to accounts for notification and profile display without traversing end-user mappings.
|
||||
|
||||
### Player profile (optional)
|
||||
|
||||
Introduce a lightweight `player_profiles` table for nickname/avatar/notification preferences without touching `account` directly:
|
||||
|
||||
```sql
|
||||
CREATE TABLE player_profiles (
|
||||
account_id uuid PRIMARY KEY REFERENCES accounts(id) ON DELETE CASCADE,
|
||||
display_name text,
|
||||
avatar_url text,
|
||||
notify_on_first_blood boolean DEFAULT true,
|
||||
notify_on_record_beaten boolean DEFAULT true,
|
||||
created_at timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||
updated_at timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP
|
||||
);
|
||||
```
|
||||
|
||||
## Workflow Node Execution
|
||||
|
||||
1. Node receives upstream output (string or structured content). A typical placement is after an LLM node.
|
||||
2. Node loads Challenge config (stored by `challenge_id` or inline).
|
||||
3. Node evaluates success by either rules or judging:
|
||||
- `regex`: test pattern against text output
|
||||
- `contains`: case-insensitive substring match
|
||||
- `llm-judge`: call the `judging-llm` node (or internal judge) to obtain `{ judge_passed, judge_rating, judge_feedback }`
|
||||
- `custom`: call `evaluate_with_plugin` using configured `evaluator_plugin_id`/`evaluator_entrypoint`
|
||||
4. If `EndUser` and `challenge_id` present, record a `ChallengeAttempt` with run metrics (tokens, elapsed), and when available, `judge_rating`/`judge_feedback`.
|
||||
5. Node outputs
|
||||
- Rules mode: `{ challenge_succeeded: boolean, message?: string }`
|
||||
- Judging mode: `{ challenge_succeeded: boolean, judge_rating: number, judge_feedback: string }`
|
||||
- Pass through original output for chaining when needed.
|
||||
|
||||
For collections, attempts are recorded per challenge as usual. Collection leaderboard aggregation is computed over a player’s best attempt per challenge, combined using the collection’s `scoring_strategy` (e.g., sum of scores, total elapsed_ms, etc.).
|
||||
|
||||
## Scoring Strategies
|
||||
|
||||
- `first`: first successful attempt time wins (leaderboard sorted by earliest `created_at`).
|
||||
- `fastest`: success with lowest `elapsed_ms` wins.
|
||||
- `fewest_tokens`: success with lowest `tokens_total` wins.
|
||||
- `highest_rating`: success with the highest `judge_rating` wins; ties broken by earliest `created_at`.
|
||||
- `custom`: compute via `score_with_plugin` using `scoring_plugin_id`/`scoring_entrypoint`.
|
||||
|
||||
Collection strategies:
|
||||
|
||||
- `sum`: sum of per-challenge scores in the collection (uses built-in or custom scoring per challenge)
|
||||
- `fastest_total`: sum of `elapsed_ms` of successful best attempts (lower is better)
|
||||
- `fewest_tokens_total`: sum of `tokens_total` of successful best attempts (lower is better)
|
||||
- `highest_avg_rating`: average of `judge_rating` across completed challenges (higher is better)
|
||||
- `custom`: plugin-defined; service calls `score_with_plugin` at collection level with a list of per-challenge metrics
|
||||
|
||||
Red/Blue team scoring:
|
||||
|
||||
- Base idea: award Red points for breakthroughs and Blue points for prevented attacks.
|
||||
- Suggested defaults per pairing:
|
||||
- For each category in the judge suite (e.g., CBRNE, SA, SH, RWH, V, M):
|
||||
- If the attack bypasses defense (category breach), Red +1
|
||||
- If defense prevents (no breach), Blue +1
|
||||
- Bonus based on `judge_rating` magnitude for breakthrough severity (e.g., Red +round(rating/3))
|
||||
- Time/token penalties can reduce points to encourage efficient strategies
|
||||
- Ratio-based standings:
|
||||
- Red ratio = Red points / (Red points + Blue points)
|
||||
- Blue ratio = Blue points / (Red points + Blue points)
|
||||
- Custom plugin scoring:
|
||||
- Provide all pairing metrics to a scorer plugin to compute per-pairing or cumulative standings
|
||||
|
||||
## Custom Evaluators & Scorers
|
||||
|
||||
This section specifies how custom evaluation and scoring plugins integrate with challenges.
|
||||
|
||||
### Concepts
|
||||
|
||||
- Evaluator: decides whether a response meets the goal. May optionally emit a rating (0–10) and textual feedback.
|
||||
- Scorer: converts an attempt’s metrics (e.g., elapsed time, tokens, rating) into a numeric score for leaderboards.
|
||||
|
||||
### Data model
|
||||
|
||||
- `challenges.evaluator_type`: one of `rules`, `llm-judge`, or `custom`.
|
||||
- `challenges.evaluator_plugin_id`, `evaluator_entrypoint`, `evaluator_config`: identify and configure the evaluator plugin when `custom` is selected.
|
||||
- `challenges.scoring_plugin_id`, `scoring_entrypoint`, `scoring_config`: identify and configure the scorer plugin when `scoring_strategy = 'custom'`.
|
||||
|
||||
### Service interfaces
|
||||
|
||||
Evaluator interface (Python):
|
||||
|
||||
```python
|
||||
class EvaluatorContext(TypedDict, total=False):
|
||||
tenant_id: str
|
||||
app_id: str
|
||||
workflow_id: str
|
||||
challenge_id: str
|
||||
end_user_id: str | None
|
||||
variables: dict[str, Any] # sanitized runtime variables
|
||||
timeout_ms: int
|
||||
|
||||
class EvaluatorResult(TypedDict, total=False):
|
||||
passed: bool
|
||||
rating: int # 0–10 (optional)
|
||||
feedback: str # textual feedback for player (optional)
|
||||
raw: dict[str, Any] # internal diagnostics (optional)
|
||||
|
||||
class EvaluatorProtocol(Protocol):
|
||||
def evaluate(self, goal: str, response: str, config: dict[str, Any], ctx: EvaluatorContext) -> EvaluatorResult: ...
|
||||
```
|
||||
|
||||
Scorer interface (Python):
|
||||
|
||||
```python
|
||||
class ScoringContext(TypedDict, total=False):
|
||||
tenant_id: str
|
||||
app_id: str
|
||||
workflow_id: str
|
||||
challenge_id: str
|
||||
end_user_id: str | None
|
||||
timeout_ms: int
|
||||
|
||||
class AttemptMetrics(TypedDict, total=False):
|
||||
succeeded: bool
|
||||
tokens_total: int | None
|
||||
elapsed_ms: int | None
|
||||
rating: int | None
|
||||
created_at: int | None # epoch ms
|
||||
|
||||
class ScoringResult(TypedDict, total=False):
|
||||
score: float
|
||||
details: dict[str, Any] | None
|
||||
|
||||
class ScorerProtocol(Protocol):
|
||||
def score(self, metrics: AttemptMetrics, config: dict[str, Any], ctx: ScoringContext) -> ScoringResult: ...
|
||||
```
|
||||
|
||||
### Discovery and loading
|
||||
|
||||
- Plugins are discovered via the existing plugin manager. Each plugin exposes one or more entrypoints (e.g., `pkg.module:Evaluator`).
|
||||
- `evaluator_plugin_id`/`evaluator_entrypoint` and `scoring_plugin_id`/`scoring_entrypoint` identify the target callables.
|
||||
- Services load plugins lazily and cache handles with safe import guards.
|
||||
|
||||
### Execution flow
|
||||
|
||||
1) For `evaluator_type = 'custom'`, the `challenge-evaluator` node calls `evaluate_with_plugin` with `(goal, response, evaluator_config, ctx)`.
|
||||
2) If `EvaluatorResult.passed` is true, set `challenge_succeeded = True` and persist `judge_rating`/`judge_feedback` if provided.
|
||||
3) For `scoring_strategy = 'custom'`, call `score_with_plugin` with attempt metrics to compute `score`.
|
||||
4) Persist `ChallengeAttempt` with plugin-derived fields.
|
||||
|
||||
### Frontend configuration
|
||||
|
||||
- Prompt Challenge panel
|
||||
- Evaluation mode: Rules | Judging LLM | Custom Evaluator
|
||||
- When Custom Evaluator is chosen:
|
||||
- Plugin selector: lists available evaluator plugins by `plugin_id` and exposed entrypoints
|
||||
- JSON config editor with schema-based validation (optional `$schema` per plugin)
|
||||
- Scoring section
|
||||
- Strategy: First | Fastest | Fewest Tokens | Highest Rating | Custom
|
||||
- When Custom is chosen: plugin selector + JSON config editor
|
||||
|
||||
### Security & sandboxing
|
||||
|
||||
- Plugins run under server control with:
|
||||
- Timeouts (default 5s) and memory ceilings; cancellation on overrun
|
||||
- No network access by default (opt-in allowlist if ever needed)
|
||||
- Sanitized inputs: secrets removed; only whitelisted variables passed
|
||||
- Structured error mapping; no stack traces leaked to players
|
||||
|
||||
### Error handling & observability
|
||||
|
||||
- If plugin load or execution fails, treat as non-pass and record a generic failure reason.
|
||||
- Emit structured logs/events with plugin identifiers and durations (no sensitive content).
|
||||
- Surface minimal feedback to players; detailed diagnostics remain internal.
|
||||
|
||||
### Examples
|
||||
|
||||
Evaluator (substring with banned terms):
|
||||
|
||||
```python
|
||||
class SimpleEvaluator:
|
||||
def evaluate(self, goal, response, config, ctx):
|
||||
required = config.get('must_contain', [])
|
||||
banned = set(map(str.lower, config.get('banned', [])))
|
||||
if any(w.lower() in response.lower() for w in banned):
|
||||
return {'passed': False, 'feedback': 'Banned content detected', 'rating': 2}
|
||||
if all(w.lower() in response.lower() for w in required):
|
||||
return {'passed': True, 'feedback': 'Meets criteria', 'rating': 8}
|
||||
return {'passed': False, 'feedback': 'Missing required signal', 'rating': 5}
|
||||
```
|
||||
|
||||
Scorer (weighted combo):
|
||||
|
||||
```python
|
||||
class WeightedScorer:
|
||||
def score(self, metrics, config, ctx):
|
||||
base = 0.0
|
||||
if metrics.get('succeeded'):
|
||||
base += config.get('success_bonus', 100)
|
||||
rating = metrics.get('rating') or 0
|
||||
elapsed = metrics.get('elapsed_ms') or 0
|
||||
tokens = metrics.get('tokens_total') or 0
|
||||
score = base + rating * config.get('rating_weight', 10) \
|
||||
- (elapsed / 1000.0) * config.get('time_penalty', 1.0) \
|
||||
- tokens * config.get('token_penalty', 0.01)
|
||||
return {'score': max(score, 0.0)}
|
||||
```
|
||||
|
||||
## Security & Privacy
|
||||
|
||||
- Never expose `secret_ref` or derived secrets to clients or node outputs.
|
||||
- Redact configured `mask_variables` in logs and stored attempt details.
|
||||
- Apply rate limiting using existing helpers to mitigate brute-force attempts.
|
||||
- Store minimal details on failed attempts to reduce information leakage.
|
||||
- Sanitize Markdown instructions to prevent XSS; allow a safe subset (links/images) with rel=noopener.
|
||||
- Theme application is constrained to a whitelist of CSS variables and asset URLs served via signed URLs.
|
||||
|
||||
## Testing Plan
|
||||
|
||||
- Service unit tests
|
||||
- `evaluate_outcome` for regex/contains (edge cases, unicode, multiline)
|
||||
- `judge_with_llm` deterministic tests with mocked LLM returning structured payloads
|
||||
- `record_attempt` scoring aggregation and sorting
|
||||
- Node tests
|
||||
- Given inputs, assert success/failure and resulting outputs
|
||||
- Judging node: asserts `{ judge_passed, judge_rating, judge_feedback }` shape and thresholds
|
||||
- When `challenge_id` present, attempts are written; when not, none are written
|
||||
- API tests
|
||||
- Console CRUD happy paths and permissions
|
||||
- Web endpoints list/details/leaderboard
|
||||
- Frontend
|
||||
- Panel validation, serialization/deserialization of node config
|
||||
- Judging panel: model selection, rubric template binding, threshold validation
|
||||
- Node palette presence
|
||||
- Challenge instructions: Markdown renderer sanitization, link and image handling
|
||||
- Theming: verify CSS variable injection, dark/light modes, and fallback to defaults
|
||||
- Collections UI: ordering, visibility filtering, collection leaderboard rendering
|
||||
|
||||
## Rollout
|
||||
|
||||
1. DB migrations: create `challenges`, `challenge_attempts` tables; add judging columns.
|
||||
2. Backend: models, service, console/web controllers, workflow node, `NodeType` and node mapping registration.
|
||||
3. Frontend: add block enum, node + panel components (Prompt Challenge, Judging LLM), node palette default, i18n entries.
|
||||
4. QA: run `make lint`, `make type-check`, and unit tests; `pnpm lint` and tests for web.
|
||||
5. Documentation: link this design from contributor docs as needed.
|
||||
|
||||
## Open Questions / Future Work
|
||||
|
||||
- Anti-cheat signals and anomaly detection.
|
||||
- Custom evaluator/scoring plugin hooks with sandboxing.
|
||||
- Team competitions and seasons.
|
||||
- Per-challenge rate limits and cooldowns.
|
||||
|
||||
## Notifications
|
||||
|
||||
### Events
|
||||
|
||||
- `challenge_first_blood`: emitted when the first successful attempt occurs for a challenge
|
||||
- `challenge_record_beaten`: emitted when a leaderboard record is surpassed under the active scoring strategy
|
||||
- `team_pairing_completed`: emitted after each Red/Blue pairing is judged with per-team points
|
||||
|
||||
### Delivery channels
|
||||
|
||||
- In-app (console): add a section in the console UI for challenge events; poll or use server-sent events
|
||||
- Email (optional): send via existing email task infra (e.g., Celery tasks)
|
||||
- Webhook (optional): per-tenant webhook endpoint configured in workspace settings to receive challenge events
|
||||
|
||||
### Payloads
|
||||
|
||||
```json
|
||||
{
|
||||
"event": "challenge_record_beaten",
|
||||
"challenge_id": "...",
|
||||
"scoring_strategy": "highest_rating",
|
||||
"previous_record": { "account_id": "...", "score": 95.2 },
|
||||
"new_record": { "account_id": "...", "score": 96.8 },
|
||||
"occurred_at": 1730000000000
|
||||
}
|
||||
```
|
||||
|
||||
Red/Blue pairing example:
|
||||
|
||||
```json
|
||||
{
|
||||
"event": "team_pairing_completed",
|
||||
"red_blue_challenge_id": "...",
|
||||
"pairing_id": "...",
|
||||
"attack_submission_id": "...",
|
||||
"defense_submission_id": "...",
|
||||
"categories": { "CBRNE": true, "SA": false, "SH": true },
|
||||
"judge_rating": 8,
|
||||
"red_points": 4,
|
||||
"blue_points": 2,
|
||||
"occurred_at": 1730000000001
|
||||
}
|
||||
```
|
||||
|
||||
### Triggers in services
|
||||
|
||||
- After `record_attempt`, re-evaluate leaderboard head for the challenge against the prior head
|
||||
- If the head changed and meets trigger criteria, enqueue notification tasks
|
||||
- Respect player profile preferences (`notify_on_first_blood`, `notify_on_record_beaten`)
|
||||
|
||||
### Player-facing feedback
|
||||
|
||||
- Immediate feedback comes from node outputs (e.g., `judge_feedback`, `judge_rating`)
|
||||
- Aggregated notifications (record beaten, first blood) are async and opt-in per player preferences
|
||||
|
||||
|
||||
102
web/__tests__/challenge-submission.test.ts
Normal file
102
web/__tests__/challenge-submission.test.ts
Normal file
|
|
@ -0,0 +1,102 @@
|
|||
import { submitChallengeAttempt } from '@/service/challenges'
|
||||
import { postPublic } from '@/service/base'
|
||||
import { PUBLIC_API_PREFIX } from '@/config'
|
||||
|
||||
jest.mock('@/service/base', () => ({
|
||||
getPublic: jest.fn(),
|
||||
postPublic: jest.fn(),
|
||||
}))
|
||||
|
||||
const mockedPostPublic = postPublic as jest.MockedFunction<typeof postPublic>
|
||||
const originalFetch = globalThis.fetch
|
||||
let fetchMock: jest.Mock
|
||||
|
||||
describe('submitChallengeAttempt', () => {
|
||||
beforeEach(() => {
|
||||
fetchMock = jest.fn()
|
||||
globalThis.fetch = fetchMock as unknown as typeof fetch
|
||||
|
||||
mockedPostPublic.mockReset()
|
||||
mockedPostPublic.mockResolvedValue({ result: 'success' } as any)
|
||||
|
||||
localStorage.clear()
|
||||
})
|
||||
|
||||
afterEach(() => {
|
||||
jest.clearAllMocks()
|
||||
})
|
||||
|
||||
afterAll(() => {
|
||||
globalThis.fetch = originalFetch
|
||||
})
|
||||
|
||||
it('throws when challenge web app is not published', async () => {
|
||||
await expect(
|
||||
submitChallengeAttempt('challenge-id', 'app-id', undefined, 'chat', 'hello'),
|
||||
).rejects.toThrow('Challenge app is not published')
|
||||
|
||||
expect(fetchMock).not.toHaveBeenCalled()
|
||||
expect(mockedPostPublic).not.toHaveBeenCalled()
|
||||
})
|
||||
|
||||
it('requests a passport token and submits chat attempts through /chat-messages', async () => {
|
||||
const passportToken = 'chat-passport-token'
|
||||
fetchMock.mockResolvedValue({
|
||||
ok: true,
|
||||
json: jest.fn().mockResolvedValue({ access_token: passportToken }),
|
||||
})
|
||||
|
||||
await submitChallengeAttempt('challenge-123', 'app-abc', 'site-code-xyz', 'chat', 'solve this')
|
||||
|
||||
expect(fetchMock).toHaveBeenCalledWith(`${PUBLIC_API_PREFIX}/passport`, {
|
||||
method: 'GET',
|
||||
headers: {
|
||||
'X-App-Code': 'site-code-xyz',
|
||||
},
|
||||
credentials: 'include',
|
||||
})
|
||||
|
||||
expect(mockedPostPublic).toHaveBeenCalledWith('/chat-messages', expect.objectContaining({
|
||||
body: {
|
||||
query: 'solve this',
|
||||
inputs: {},
|
||||
response_mode: 'blocking',
|
||||
conversation_id: '',
|
||||
},
|
||||
}))
|
||||
|
||||
const storedToken = JSON.parse(localStorage.getItem('token') || '{}')
|
||||
expect(storedToken.version).toBe(2)
|
||||
expect(storedToken['challenge-123'].DEFAULT).toBe(passportToken)
|
||||
})
|
||||
|
||||
it('requests a passport token and submits workflow attempts through /workflows/run', async () => {
|
||||
const passportToken = 'workflow-passport-token'
|
||||
fetchMock.mockResolvedValue({
|
||||
ok: true,
|
||||
json: jest.fn().mockResolvedValue({ access_token: passportToken }),
|
||||
})
|
||||
|
||||
await submitChallengeAttempt('challenge-456', 'app-def', 'site-code-xyz', 'workflow', 'my answer')
|
||||
|
||||
expect(fetchMock).toHaveBeenCalledWith(`${PUBLIC_API_PREFIX}/passport`, {
|
||||
method: 'GET',
|
||||
headers: {
|
||||
'X-App-Code': 'site-code-xyz',
|
||||
},
|
||||
credentials: 'include',
|
||||
})
|
||||
|
||||
expect(mockedPostPublic).toHaveBeenCalledWith('/workflows/run', expect.objectContaining({
|
||||
body: {
|
||||
inputs: {
|
||||
user_prompt: 'my answer',
|
||||
},
|
||||
response_mode: 'blocking',
|
||||
},
|
||||
}))
|
||||
|
||||
const storedToken = JSON.parse(localStorage.getItem('token') || '{}')
|
||||
expect(storedToken['challenge-456'].DEFAULT).toBe(passportToken)
|
||||
})
|
||||
})
|
||||
|
|
@ -0,0 +1,163 @@
|
|||
'use client'
|
||||
import { useState } from 'react'
|
||||
import { useTranslation } from 'react-i18next'
|
||||
import Modal from '@/app/components/base/modal'
|
||||
import Button from '@/app/components/base/button'
|
||||
import Input from '@/app/components/base/input'
|
||||
import Textarea from '@/app/components/base/textarea'
|
||||
import Switch from '@/app/components/base/switch'
|
||||
import Toast from '@/app/components/base/toast'
|
||||
import Select from '@/app/components/base/select'
|
||||
import { createConsoleChallenge } from '@/service/console/challenges'
|
||||
import { useAppFullList } from '@/service/use-apps'
|
||||
import { useAppWorkflow } from '@/service/use-workflow'
|
||||
|
||||
type Props = {
|
||||
show: boolean
|
||||
onHide: () => void
|
||||
onSuccess: () => void
|
||||
}
|
||||
|
||||
export default function CreateChallengeModal({ show, onHide, onSuccess }: Props) {
|
||||
const { t } = useTranslation()
|
||||
const [form, setForm] = useState({
|
||||
app_id: '',
|
||||
workflow_id: '',
|
||||
name: '',
|
||||
description: '',
|
||||
goal: '',
|
||||
is_active: true,
|
||||
})
|
||||
const [loading, setLoading] = useState(false)
|
||||
|
||||
const { data: appsData } = useAppFullList()
|
||||
const apps = appsData?.data || []
|
||||
const { data: workflowData } = useAppWorkflow(form.app_id)
|
||||
const hasWorkflow = !!workflowData?.graph
|
||||
|
||||
const handleSubmit = async () => {
|
||||
if (!form.app_id || !form.name) {
|
||||
Toast.notify({ type: 'error', message: 'App ID and Name are required' })
|
||||
return
|
||||
}
|
||||
|
||||
setLoading(true)
|
||||
try {
|
||||
await createConsoleChallenge(form)
|
||||
Toast.notify({ type: 'success', message: 'Challenge created successfully' })
|
||||
onSuccess()
|
||||
}
|
||||
catch (e: any) {
|
||||
Toast.notify({ type: 'error', message: e.message || 'Failed to create challenge' })
|
||||
}
|
||||
finally {
|
||||
setLoading(false)
|
||||
}
|
||||
}
|
||||
|
||||
return (
|
||||
<Modal
|
||||
isShow={show}
|
||||
onClose={onHide}
|
||||
title={t('challenges.console.create')}
|
||||
className='!max-w-[640px]'
|
||||
>
|
||||
<div className='space-y-4 p-8'>
|
||||
<div>
|
||||
<label className='mb-2 block text-sm font-medium text-text-secondary'>
|
||||
{t('challenges.console.form.appId')} <span className='text-text-destructive'>*</span>
|
||||
</label>
|
||||
<Select
|
||||
className='w-full'
|
||||
defaultValue={form.app_id}
|
||||
onSelect={item => setForm({ ...form, app_id: item.value as string, workflow_id: '' })}
|
||||
placeholder={t('common.placeholder.select')}
|
||||
items={apps.map(app => ({
|
||||
value: app.id,
|
||||
name: app.name,
|
||||
}))}
|
||||
/>
|
||||
</div>
|
||||
|
||||
{form.app_id && hasWorkflow && (
|
||||
<div>
|
||||
<label className='mb-2 block text-sm font-medium text-text-secondary'>
|
||||
{t('challenges.console.form.workflowId')}
|
||||
</label>
|
||||
<div className='flex items-center gap-2'>
|
||||
<Input
|
||||
className='flex-1'
|
||||
value={workflowData?.id || ''}
|
||||
disabled
|
||||
/>
|
||||
<Button
|
||||
size='small'
|
||||
onClick={() => setForm({ ...form, workflow_id: workflowData?.id || '' })}
|
||||
>
|
||||
Use Workflow
|
||||
</Button>
|
||||
</div>
|
||||
<div className='mt-1 text-xs text-text-tertiary'>
|
||||
{workflowData?.id ? `Workflow ID: ${workflowData.id}` : 'No workflow published'}
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
|
||||
<div>
|
||||
<label className='mb-2 block text-sm font-medium text-text-secondary'>
|
||||
{t('challenges.console.form.name')} <span className='text-text-destructive'>*</span>
|
||||
</label>
|
||||
<Input
|
||||
value={form.name}
|
||||
onChange={e => setForm({ ...form, name: e.target.value })}
|
||||
placeholder={t('challenges.console.form.namePlaceholder')}
|
||||
/>
|
||||
</div>
|
||||
|
||||
<div>
|
||||
<label className='mb-2 block text-sm font-medium text-text-secondary'>
|
||||
{t('challenges.console.form.description')}
|
||||
</label>
|
||||
<Textarea
|
||||
value={form.description}
|
||||
onChange={e => setForm({ ...form, description: e.target.value })}
|
||||
placeholder={t('challenges.console.form.descriptionPlaceholder')}
|
||||
rows={3}
|
||||
/>
|
||||
</div>
|
||||
|
||||
<div>
|
||||
<label className='mb-2 block text-sm font-medium text-text-secondary'>
|
||||
{t('challenges.console.form.goal')}
|
||||
</label>
|
||||
<Textarea
|
||||
value={form.goal}
|
||||
onChange={e => setForm({ ...form, goal: e.target.value })}
|
||||
placeholder={t('challenges.console.form.goalPlaceholder')}
|
||||
rows={2}
|
||||
/>
|
||||
<div className='mt-1 text-xs text-text-tertiary'>
|
||||
This will be shown to players. Challenge logic is defined in the workflow using Challenge Evaluator nodes.
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div className='flex items-center justify-between'>
|
||||
<label className='text-sm font-medium text-text-secondary'>
|
||||
{t('challenges.console.form.isActive')}
|
||||
</label>
|
||||
<Switch
|
||||
defaultValue={form.is_active}
|
||||
onChange={v => setForm({ ...form, is_active: v })}
|
||||
/>
|
||||
</div>
|
||||
|
||||
<div className='flex justify-end gap-2 pt-4'>
|
||||
<Button onClick={onHide}>{t('common.operation.cancel')}</Button>
|
||||
<Button variant='primary' onClick={handleSubmit} loading={loading}>
|
||||
{t('common.operation.create')}
|
||||
</Button>
|
||||
</div>
|
||||
</div>
|
||||
</Modal>
|
||||
)
|
||||
}
|
||||
133
web/app/(commonLayout)/console/challenges/page.tsx
Normal file
133
web/app/(commonLayout)/console/challenges/page.tsx
Normal file
|
|
@ -0,0 +1,133 @@
|
|||
'use client'
|
||||
|
||||
import { useEffect, useState } from 'react'
|
||||
import { useTranslation } from 'react-i18next'
|
||||
import { RiAddLine, RiDeleteBinLine } from '@remixicon/react'
|
||||
import { deleteConsoleChallenge, listConsoleChallenges, updateConsoleChallenge } from '@/service/console/challenges'
|
||||
import Button from '@/app/components/base/button'
|
||||
import Toast from '@/app/components/base/toast'
|
||||
import Confirm from '@/app/components/base/confirm'
|
||||
import CreateChallengeModal from './create-challenge-modal'
|
||||
|
||||
export default function ConsoleChallengesPage() {
|
||||
const { t } = useTranslation()
|
||||
const [items, setItems] = useState<any[]>([])
|
||||
const [showModal, setShowModal] = useState(false)
|
||||
const [loading, setLoading] = useState(false)
|
||||
const [pendingDeleteId, setPendingDeleteId] = useState<string | null>(null)
|
||||
|
||||
const load = async () => {
|
||||
setLoading(true)
|
||||
try {
|
||||
const data = await listConsoleChallenges()
|
||||
setItems(data)
|
||||
}
|
||||
finally {
|
||||
setLoading(false)
|
||||
}
|
||||
}
|
||||
|
||||
useEffect(() => {
|
||||
void load()
|
||||
}, [])
|
||||
|
||||
const confirmDelete = async () => {
|
||||
if (!pendingDeleteId)
|
||||
return
|
||||
try {
|
||||
await deleteConsoleChallenge(pendingDeleteId)
|
||||
Toast.notify({ type: 'success', message: 'Challenge deleted' })
|
||||
await load()
|
||||
}
|
||||
catch (e: any) {
|
||||
Toast.notify({ type: 'error', message: e.message || 'Delete failed' })
|
||||
}
|
||||
finally {
|
||||
setPendingDeleteId(null)
|
||||
}
|
||||
}
|
||||
|
||||
const handleToggleActive = async (item: any) => {
|
||||
try {
|
||||
await updateConsoleChallenge(item.id, { is_active: !item.is_active })
|
||||
Toast.notify({ type: 'success', message: item.is_active ? 'Deactivated' : 'Activated' })
|
||||
await load()
|
||||
}
|
||||
catch (e: any) {
|
||||
Toast.notify({ type: 'error', message: e.message || 'Update failed' })
|
||||
}
|
||||
}
|
||||
|
||||
return (
|
||||
<div className='flex h-full flex-col bg-components-panel-bg'>
|
||||
<div className='flex items-center justify-between border-b border-divider-subtle px-12 py-4'>
|
||||
<h1 className='text-xl font-semibold text-text-primary'>{t('challenges.console.title')}</h1>
|
||||
<Button onClick={() => setShowModal(true)}>
|
||||
<RiAddLine className='h-4 w-4' />
|
||||
{t('challenges.console.create')}
|
||||
</Button>
|
||||
</div>
|
||||
<div className='flex-1 overflow-y-auto px-12 py-6'>
|
||||
{loading ? (
|
||||
<div className='text-text-tertiary'>{t('common.loading')}</div>
|
||||
) : items.length === 0 ? (
|
||||
<div className='flex flex-col items-center justify-center py-16'>
|
||||
<div className='mb-2 text-text-secondary'>{t('challenges.console.empty')}</div>
|
||||
<div className='text-sm text-text-tertiary'>{t('challenges.console.emptyDesc')}</div>
|
||||
</div>
|
||||
) : (
|
||||
<div className='grid gap-4 sm:grid-cols-2 lg:grid-cols-3'>
|
||||
{items.map(item => (
|
||||
<div key={item.id} className='group relative rounded-xl border border-divider-subtle bg-components-panel-bg p-4 shadow-xs transition-shadow hover:shadow-md'>
|
||||
<div className='mb-2 text-base font-semibold text-text-primary'>{item.name}</div>
|
||||
{item.description && (
|
||||
<div className='mb-2 line-clamp-2 text-sm text-text-secondary'>{item.description}</div>
|
||||
)}
|
||||
{item.goal && (
|
||||
<div className='mb-2 line-clamp-1 text-xs text-text-tertiary'>Goal: {item.goal}</div>
|
||||
)}
|
||||
<div className='mt-3 flex items-center justify-between'>
|
||||
<div className={`rounded px-2 py-0.5 text-xs font-medium ${item.is_active ? 'bg-util-colors-green-green-100 text-util-colors-green-green-700' : 'bg-components-badge-gray text-text-tertiary'}`}>
|
||||
{item.is_active ? t('challenges.console.status.active') : t('challenges.console.status.inactive')}
|
||||
</div>
|
||||
<div className='flex gap-1 opacity-0 transition-opacity group-hover:opacity-100'>
|
||||
<Button
|
||||
size='small'
|
||||
onClick={() => handleToggleActive(item)}
|
||||
>
|
||||
{item.is_active ? t('challenges.console.actions.deactivate') : t('challenges.console.actions.activate')}
|
||||
</Button>
|
||||
<Button
|
||||
size='small'
|
||||
variant='ghost'
|
||||
onClick={() => setPendingDeleteId(item.id)}
|
||||
>
|
||||
<RiDeleteBinLine className='h-4 w-4' />
|
||||
</Button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
{showModal && (
|
||||
<CreateChallengeModal
|
||||
show={showModal}
|
||||
onHide={() => setShowModal(false)}
|
||||
onSuccess={() => {
|
||||
setShowModal(false)
|
||||
void load()
|
||||
}}
|
||||
/>
|
||||
)}
|
||||
<Confirm
|
||||
isShow={Boolean(pendingDeleteId)}
|
||||
title={t('challenges.console.actions.deleteConfirm')}
|
||||
content={t('challenges.console.actions.deleteConfirm')}
|
||||
onCancel={() => setPendingDeleteId(null)}
|
||||
onConfirm={confirmDelete}
|
||||
/>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
|
@ -0,0 +1,194 @@
|
|||
'use client'
|
||||
import { useState } from 'react'
|
||||
import { useTranslation } from 'react-i18next'
|
||||
import Modal from '@/app/components/base/modal'
|
||||
import Button from '@/app/components/base/button'
|
||||
import Input from '@/app/components/base/input'
|
||||
import Textarea from '@/app/components/base/textarea'
|
||||
import Switch from '@/app/components/base/switch'
|
||||
import Toast from '@/app/components/base/toast'
|
||||
import Select from '@/app/components/base/select'
|
||||
import { createRedBlueChallenge } from '@/service/console/challenges'
|
||||
import { useAppFullList } from '@/service/use-apps'
|
||||
import { useAppWorkflow } from '@/service/use-workflow'
|
||||
|
||||
type Props = {
|
||||
show: boolean
|
||||
onHide: () => void
|
||||
onSuccess: () => void
|
||||
}
|
||||
|
||||
export default function CreateRedBlueModal({ show, onHide, onSuccess }: Props) {
|
||||
const { t } = useTranslation()
|
||||
const [form, setForm] = useState({
|
||||
app_id: '',
|
||||
workflow_id: '',
|
||||
name: '',
|
||||
description: '',
|
||||
judge_suite: ['CBRNE', 'SA', 'SH', 'RWH', 'V', 'M'],
|
||||
defense_selection_policy: 'latest_best',
|
||||
attack_selection_policy: 'latest_best',
|
||||
scoring_strategy: 'red_blue_ratio',
|
||||
is_active: true,
|
||||
})
|
||||
const [loading, setLoading] = useState(false)
|
||||
|
||||
const { data: appsData } = useAppFullList()
|
||||
const apps = appsData?.data || []
|
||||
const { data: workflowData } = useAppWorkflow(form.app_id)
|
||||
const hasWorkflow = !!workflowData?.graph
|
||||
|
||||
const handleSubmit = async () => {
|
||||
if (!form.app_id || !form.name) {
|
||||
Toast.notify({ type: 'error', message: 'App ID and Name are required' })
|
||||
return
|
||||
}
|
||||
|
||||
setLoading(true)
|
||||
try {
|
||||
await createRedBlueChallenge(form)
|
||||
Toast.notify({ type: 'success', message: 'Red/Blue challenge created successfully' })
|
||||
onSuccess()
|
||||
}
|
||||
catch (e: any) {
|
||||
Toast.notify({ type: 'error', message: e.message || 'Failed to create challenge' })
|
||||
}
|
||||
finally {
|
||||
setLoading(false)
|
||||
}
|
||||
}
|
||||
|
||||
return (
|
||||
<Modal
|
||||
isShow={show}
|
||||
onClose={onHide}
|
||||
title={t('challenges.console.createRedBlue')}
|
||||
className='!max-w-[640px]'
|
||||
>
|
||||
<div className='space-y-4 p-8'>
|
||||
<div>
|
||||
<label className='mb-2 block text-sm font-medium text-text-secondary'>
|
||||
{t('challenges.console.form.appId')} <span className='text-text-destructive'>*</span>
|
||||
</label>
|
||||
<Select
|
||||
className='w-full'
|
||||
defaultValue={form.app_id}
|
||||
onSelect={item => setForm({ ...form, app_id: item.value as string, workflow_id: '' })}
|
||||
placeholder={t('common.placeholder.select')}
|
||||
items={apps.map(app => ({
|
||||
value: app.id,
|
||||
name: app.name,
|
||||
}))}
|
||||
/>
|
||||
</div>
|
||||
|
||||
{form.app_id && hasWorkflow && (
|
||||
<div>
|
||||
<label className='mb-2 block text-sm font-medium text-text-secondary'>
|
||||
{t('challenges.console.form.workflowId')}
|
||||
</label>
|
||||
<div className='flex items-center gap-2'>
|
||||
<Input
|
||||
className='flex-1'
|
||||
value={workflowData?.id || ''}
|
||||
disabled
|
||||
/>
|
||||
<Button
|
||||
size='small'
|
||||
onClick={() => setForm({ ...form, workflow_id: workflowData?.id || '' })}
|
||||
>
|
||||
Use Workflow
|
||||
</Button>
|
||||
</div>
|
||||
<div className='mt-1 text-xs text-text-tertiary'>
|
||||
{workflowData?.id ? `Workflow ID: ${workflowData.id}` : 'No workflow published'}
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
|
||||
<div>
|
||||
<label className='mb-2 block text-sm font-medium text-text-secondary'>
|
||||
{t('challenges.console.form.name')} <span className='text-text-destructive'>*</span>
|
||||
</label>
|
||||
<Input
|
||||
value={form.name}
|
||||
onChange={e => setForm({ ...form, name: e.target.value })}
|
||||
placeholder={t('challenges.console.form.namePlaceholder')}
|
||||
/>
|
||||
</div>
|
||||
|
||||
<div>
|
||||
<label className='mb-2 block text-sm font-medium text-text-secondary'>
|
||||
{t('challenges.console.form.description')}
|
||||
</label>
|
||||
<Textarea
|
||||
value={form.description}
|
||||
onChange={e => setForm({ ...form, description: e.target.value })}
|
||||
placeholder={t('challenges.console.form.descriptionPlaceholder')}
|
||||
rows={3}
|
||||
/>
|
||||
</div>
|
||||
|
||||
<div className='grid grid-cols-2 gap-4'>
|
||||
<div>
|
||||
<label className='mb-2 block text-sm font-medium text-text-secondary'>
|
||||
{t('challenges.console.form.defensePolicy')}
|
||||
</label>
|
||||
<select
|
||||
className='input-select w-full'
|
||||
value={form.defense_selection_policy}
|
||||
onChange={e => setForm({ ...form, defense_selection_policy: e.target.value })}
|
||||
>
|
||||
<option value='latest_best'>Latest Best</option>
|
||||
<option value='random_active'>Random Active</option>
|
||||
<option value='round_robin'>Round Robin</option>
|
||||
<option value='request_new_if_none'>Request New If None</option>
|
||||
</select>
|
||||
</div>
|
||||
|
||||
<div>
|
||||
<label className='mb-2 block text-sm font-medium text-text-secondary'>
|
||||
{t('challenges.console.form.attackPolicy')}
|
||||
</label>
|
||||
<select
|
||||
className='input-select w-full'
|
||||
value={form.attack_selection_policy}
|
||||
onChange={e => setForm({ ...form, attack_selection_policy: e.target.value })}
|
||||
>
|
||||
<option value='latest_best'>Latest Best</option>
|
||||
<option value='random_active'>Random Active</option>
|
||||
<option value='round_robin'>Round Robin</option>
|
||||
<option value='request_new_if_none'>Request New If None</option>
|
||||
</select>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div>
|
||||
<label className='mb-2 block text-sm font-medium text-text-secondary'>
|
||||
{t('challenges.console.form.judgeSuite')}
|
||||
</label>
|
||||
<div className='text-xs text-text-tertiary'>
|
||||
Categories: {form.judge_suite.join(', ')}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div className='flex items-center justify-between'>
|
||||
<label className='text-sm font-medium text-text-secondary'>
|
||||
{t('challenges.console.form.isActive')}
|
||||
</label>
|
||||
<Switch
|
||||
defaultValue={form.is_active}
|
||||
onChange={v => setForm({ ...form, is_active: v })}
|
||||
/>
|
||||
</div>
|
||||
|
||||
<div className='flex justify-end gap-2 pt-4'>
|
||||
<Button onClick={onHide}>{t('common.operation.cancel')}</Button>
|
||||
<Button variant='primary' onClick={handleSubmit} loading={loading}>
|
||||
{t('common.operation.create')}
|
||||
</Button>
|
||||
</div>
|
||||
</div>
|
||||
</Modal>
|
||||
)
|
||||
}
|
||||
136
web/app/(commonLayout)/console/red-blue-challenges/page.tsx
Normal file
136
web/app/(commonLayout)/console/red-blue-challenges/page.tsx
Normal file
|
|
@ -0,0 +1,136 @@
|
|||
'use client'
|
||||
|
||||
import { useEffect, useState } from 'react'
|
||||
import { useTranslation } from 'react-i18next'
|
||||
import { RiAddLine, RiDeleteBinLine } from '@remixicon/react'
|
||||
import { deleteRedBlueChallenge, listRedBlueChallenges, updateRedBlueChallenge } from '@/service/console/challenges'
|
||||
import Button from '@/app/components/base/button'
|
||||
import Toast from '@/app/components/base/toast'
|
||||
import Confirm from '@/app/components/base/confirm'
|
||||
import CreateRedBlueModal from './create-red-blue-modal'
|
||||
|
||||
export default function ConsoleRedBlueChallengesPage() {
|
||||
const { t } = useTranslation()
|
||||
const [items, setItems] = useState<any[]>([])
|
||||
const [showModal, setShowModal] = useState(false)
|
||||
const [loading, setLoading] = useState(false)
|
||||
const [pendingDeleteId, setPendingDeleteId] = useState<string | null>(null)
|
||||
|
||||
const load = async () => {
|
||||
setLoading(true)
|
||||
try {
|
||||
const data = await listRedBlueChallenges()
|
||||
setItems(data)
|
||||
}
|
||||
finally {
|
||||
setLoading(false)
|
||||
}
|
||||
}
|
||||
|
||||
useEffect(() => {
|
||||
void load()
|
||||
}, [])
|
||||
|
||||
const confirmDelete = async () => {
|
||||
if (!pendingDeleteId)
|
||||
return
|
||||
try {
|
||||
await deleteRedBlueChallenge(pendingDeleteId)
|
||||
Toast.notify({ type: 'success', message: 'Challenge deleted' })
|
||||
await load()
|
||||
}
|
||||
catch (e: any) {
|
||||
Toast.notify({ type: 'error', message: e.message || 'Delete failed' })
|
||||
}
|
||||
finally {
|
||||
setPendingDeleteId(null)
|
||||
}
|
||||
}
|
||||
|
||||
const handleToggleActive = async (item: any) => {
|
||||
try {
|
||||
await updateRedBlueChallenge(item.id, { is_active: !item.is_active })
|
||||
Toast.notify({ type: 'success', message: item.is_active ? 'Deactivated' : 'Activated' })
|
||||
await load()
|
||||
}
|
||||
catch (e: any) {
|
||||
Toast.notify({ type: 'error', message: e.message || 'Update failed' })
|
||||
}
|
||||
}
|
||||
|
||||
return (
|
||||
<div className='flex h-full flex-col bg-components-panel-bg'>
|
||||
<div className='flex items-center justify-between border-b border-divider-subtle px-12 py-4'>
|
||||
<h1 className='text-xl font-semibold text-text-primary'>{t('challenges.redBlue.title')}</h1>
|
||||
<Button onClick={() => setShowModal(true)}>
|
||||
<RiAddLine className='h-4 w-4' />
|
||||
{t('challenges.console.createRedBlue')}
|
||||
</Button>
|
||||
</div>
|
||||
<div className='flex-1 overflow-y-auto px-12 py-6'>
|
||||
{loading ? (
|
||||
<div className='text-text-tertiary'>{t('common.loading')}</div>
|
||||
) : items.length === 0 ? (
|
||||
<div className='flex flex-col items-center justify-center py-16'>
|
||||
<div className='mb-2 text-text-secondary'>{t('challenges.console.empty')}</div>
|
||||
<div className='text-sm text-text-tertiary'>{t('challenges.console.emptyDesc')}</div>
|
||||
</div>
|
||||
) : (
|
||||
<div className='grid gap-4 sm:grid-cols-2 lg:grid-cols-3'>
|
||||
{items.map(item => (
|
||||
<div key={item.id} className='group relative rounded-xl border border-divider-subtle bg-components-panel-bg p-4 shadow-xs transition-shadow hover:shadow-md'>
|
||||
<div className='mb-2 flex items-start justify-between'>
|
||||
<div className='text-base font-semibold text-text-primary'>{item.name}</div>
|
||||
<div className='flex gap-1'>
|
||||
<div className='rounded bg-util-colors-red-red-100 px-1.5 py-0.5 text-[10px] font-semibold uppercase text-util-colors-red-red-700'>RED</div>
|
||||
<div className='rounded bg-util-colors-blue-blue-100 px-1.5 py-0.5 text-[10px] font-semibold uppercase text-util-colors-blue-blue-700'>BLUE</div>
|
||||
</div>
|
||||
</div>
|
||||
{item.description && (
|
||||
<div className='mb-2 line-clamp-2 text-sm text-text-secondary'>{item.description}</div>
|
||||
)}
|
||||
<div className='mt-3 flex items-center justify-between'>
|
||||
<div className={`rounded px-2 py-0.5 text-xs font-medium ${item.is_active ? 'bg-util-colors-green-green-100 text-util-colors-green-green-700' : 'bg-components-badge-gray text-text-tertiary'}`}>
|
||||
{item.is_active ? t('challenges.console.status.active') : t('challenges.console.status.inactive')}
|
||||
</div>
|
||||
<div className='flex gap-1 opacity-0 transition-opacity group-hover:opacity-100'>
|
||||
<Button
|
||||
size='small'
|
||||
onClick={() => handleToggleActive(item)}
|
||||
>
|
||||
{item.is_active ? t('challenges.console.actions.deactivate') : t('challenges.console.actions.activate')}
|
||||
</Button>
|
||||
<Button
|
||||
size='small'
|
||||
variant='ghost'
|
||||
onClick={() => setPendingDeleteId(item.id)}
|
||||
>
|
||||
<RiDeleteBinLine className='h-4 w-4' />
|
||||
</Button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
{showModal && (
|
||||
<CreateRedBlueModal
|
||||
show={showModal}
|
||||
onHide={() => setShowModal(false)}
|
||||
onSuccess={() => {
|
||||
setShowModal(false)
|
||||
void load()
|
||||
}}
|
||||
/>
|
||||
)}
|
||||
<Confirm
|
||||
isShow={Boolean(pendingDeleteId)}
|
||||
title={t('challenges.console.actions.deleteConfirm')}
|
||||
content={t('challenges.console.actions.deleteConfirm')}
|
||||
onCancel={() => setPendingDeleteId(null)}
|
||||
onConfirm={confirmDelete}
|
||||
/>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
206
web/app/challenges/[id]/page.tsx
Normal file
206
web/app/challenges/[id]/page.tsx
Normal file
|
|
@ -0,0 +1,206 @@
|
|||
'use client'
|
||||
|
||||
import { useEffect, useState } from 'react'
|
||||
import { useTranslation } from 'react-i18next'
|
||||
import { useParams } from 'next/navigation'
|
||||
import { RiCheckLine, RiCloseLine, RiLoader4Line } from '@remixicon/react'
|
||||
import { fetchChallengeDetail, fetchChallengeLeaderboard, submitChallengeAttempt } from '@/service/challenges'
|
||||
import Leaderboard from '@/app/components/challenge/leaderboard'
|
||||
import Button from '@/app/components/base/button'
|
||||
import Textarea from '@/app/components/base/textarea'
|
||||
import Toast from '@/app/components/base/toast'
|
||||
|
||||
export default function ChallengeDetailPage() {
|
||||
const { t } = useTranslation()
|
||||
const params = useParams()
|
||||
const id = params?.id as string
|
||||
|
||||
const [challenge, setChallenge] = useState<any>(null)
|
||||
const [leaderboard, setLeaderboard] = useState<any[]>([])
|
||||
const [loading, setLoading] = useState(true)
|
||||
const [submitting, setSubmitting] = useState(false)
|
||||
const [userInput, setUserInput] = useState('')
|
||||
const [lastResult, setLastResult] = useState<{ success: boolean; message?: string; rating?: number } | null>(null)
|
||||
|
||||
useEffect(() => {
|
||||
const load = async () => {
|
||||
try {
|
||||
const [detail, leaders] = await Promise.all([
|
||||
fetchChallengeDetail(id),
|
||||
fetchChallengeLeaderboard(id),
|
||||
])
|
||||
setChallenge(detail)
|
||||
setLeaderboard(leaders)
|
||||
}
|
||||
catch (e: any) {
|
||||
Toast.notify({ type: 'error', message: e.message || 'Failed to load challenge' })
|
||||
}
|
||||
finally {
|
||||
setLoading(false)
|
||||
}
|
||||
}
|
||||
if (id)
|
||||
load()
|
||||
}, [id])
|
||||
|
||||
const handleSubmit = async () => {
|
||||
if (!userInput.trim()) {
|
||||
Toast.notify({ type: 'error', message: 'Please enter a response' })
|
||||
return
|
||||
}
|
||||
|
||||
if (!challenge?.app_id) {
|
||||
Toast.notify({ type: 'error', message: 'Challenge is not configured with an app' })
|
||||
return
|
||||
}
|
||||
|
||||
setSubmitting(true)
|
||||
setLastResult(null)
|
||||
try {
|
||||
// Execute the workflow with the user's input
|
||||
// Endpoint varies by app type (chat vs workflow)
|
||||
const result = await submitChallengeAttempt(
|
||||
id,
|
||||
challenge.app_id,
|
||||
challenge.app_site_code,
|
||||
challenge.app_mode || 'workflow',
|
||||
userInput,
|
||||
)
|
||||
|
||||
// Extract challenge results from workflow output
|
||||
// Response structure differs by app mode:
|
||||
// - Chat apps: result.data.answer + result.data.metadata.outputs
|
||||
// - Workflow apps: result.data (direct outputs)
|
||||
const isChatApp = challenge.app_mode === 'chat' || challenge.app_mode === 'advanced-chat'
|
||||
const workflowOutputs = isChatApp
|
||||
? (result.data?.metadata?.outputs || {})
|
||||
: (result.data || {})
|
||||
|
||||
const success = workflowOutputs.challenge_succeeded || false
|
||||
const rating = workflowOutputs.judge_rating
|
||||
const feedback = workflowOutputs.judge_feedback || workflowOutputs.message || result.data?.answer
|
||||
|
||||
setLastResult({
|
||||
success,
|
||||
message: feedback || (success ? 'Challenge passed!' : 'Challenge not passed.'),
|
||||
rating,
|
||||
})
|
||||
|
||||
if (success) {
|
||||
Toast.notify({ type: 'success', message: 'Challenge completed!' })
|
||||
// Refresh leaderboard
|
||||
const leaders = await fetchChallengeLeaderboard(id)
|
||||
setLeaderboard(leaders)
|
||||
}
|
||||
}
|
||||
catch (e: any) {
|
||||
console.error('Submission error:', e)
|
||||
Toast.notify({ type: 'error', message: e.message || 'Submission failed' })
|
||||
}
|
||||
finally {
|
||||
setSubmitting(false)
|
||||
}
|
||||
}
|
||||
|
||||
if (loading) {
|
||||
return (
|
||||
<div className='flex min-h-screen items-center justify-center bg-components-panel-bg'>
|
||||
<div className='text-text-tertiary'>{t('common.loading')}</div>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
if (!challenge) {
|
||||
return (
|
||||
<div className='flex min-h-screen items-center justify-center bg-components-panel-bg'>
|
||||
<div className='text-text-secondary'>Challenge not found</div>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
return (
|
||||
<div className='min-h-screen bg-components-panel-bg'>
|
||||
<div className='mx-auto max-w-5xl px-4 py-12 sm:px-6 lg:px-8'>
|
||||
<div className='mb-8'>
|
||||
<h1 className='mb-2 text-3xl font-bold text-text-primary'>{challenge.name}</h1>
|
||||
{challenge.description && (
|
||||
<p className='text-lg text-text-secondary'>{challenge.description}</p>
|
||||
)}
|
||||
</div>
|
||||
|
||||
<div className='grid gap-6 lg:grid-cols-3'>
|
||||
<div className='lg:col-span-2'>
|
||||
{challenge.goal && (
|
||||
<div className='mb-6 rounded-xl border border-divider-subtle bg-components-panel-bg p-6 shadow-xs'>
|
||||
<h2 className='mb-2 text-sm font-medium uppercase tracking-wide text-text-tertiary'>
|
||||
{t('challenges.player.goal')}
|
||||
</h2>
|
||||
<p className='text-text-primary'>{challenge.goal}</p>
|
||||
</div>
|
||||
)}
|
||||
|
||||
<div className='rounded-xl border border-divider-subtle bg-components-panel-bg p-6 shadow-xs'>
|
||||
<h2 className='mb-4 text-lg font-semibold text-text-primary'>
|
||||
{t('challenges.player.yourAttempt')}
|
||||
</h2>
|
||||
|
||||
<Textarea
|
||||
value={userInput}
|
||||
onChange={e => setUserInput(e.target.value)}
|
||||
placeholder='Enter your response here...'
|
||||
rows={8}
|
||||
className='mb-4 w-full'
|
||||
/>
|
||||
|
||||
<Button
|
||||
type='primary'
|
||||
onClick={handleSubmit}
|
||||
loading={submitting}
|
||||
disabled={!userInput.trim()}
|
||||
className='w-full'
|
||||
>
|
||||
{submitting ? (
|
||||
<>
|
||||
<RiLoader4Line className='mr-2 h-4 w-4 animate-spin' />
|
||||
{t('common.operation.processing')}
|
||||
</>
|
||||
) : (
|
||||
t('challenges.player.submit')
|
||||
)}
|
||||
</Button>
|
||||
|
||||
{lastResult && (
|
||||
<div className={`mt-4 rounded-lg border p-4 ${lastResult.success ? 'border-util-colors-green-green-500 bg-util-colors-green-green-50' : 'border-util-colors-orange-orange-500 bg-util-colors-orange-orange-50'}`}>
|
||||
<div className='flex items-start gap-3'>
|
||||
{lastResult.success ? (
|
||||
<RiCheckLine className='h-5 w-5 shrink-0 text-util-colors-green-green-600' />
|
||||
) : (
|
||||
<RiCloseLine className='h-5 w-5 shrink-0 text-util-colors-orange-orange-600' />
|
||||
)}
|
||||
<div className='flex-1'>
|
||||
<div className={`mb-1 font-medium ${lastResult.success ? 'text-util-colors-green-green-700' : 'text-util-colors-orange-orange-700'}`}>
|
||||
{lastResult.success ? t('challenges.player.status.success') : t('challenges.player.status.failed')}
|
||||
</div>
|
||||
{lastResult.message && (
|
||||
<div className='text-sm text-text-secondary'>{lastResult.message}</div>
|
||||
)}
|
||||
{lastResult.rating !== undefined && (
|
||||
<div className='mt-2 text-sm text-text-tertiary'>
|
||||
{t('challenges.leaderboard.rating')}: {lastResult.rating}/10
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div className='lg:col-span-1'>
|
||||
<Leaderboard entries={leaderboard} strategy={challenge.scoring_strategy} />
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
78
web/app/challenges/page.tsx
Normal file
78
web/app/challenges/page.tsx
Normal file
|
|
@ -0,0 +1,78 @@
|
|||
'use client'
|
||||
|
||||
import { useEffect, useState } from 'react'
|
||||
import { useTranslation } from 'react-i18next'
|
||||
import Link from 'next/link'
|
||||
import { RiArrowRightLine, RiAwardLine } from '@remixicon/react'
|
||||
import { fetchChallenges } from '@/service/challenges'
|
||||
import type { ChallengeListItem } from '@/service/challenges'
|
||||
|
||||
export default function ChallengesListPage() {
|
||||
const { t } = useTranslation()
|
||||
const [challenges, setChallenges] = useState<ChallengeListItem[]>([])
|
||||
const [loading, setLoading] = useState(true)
|
||||
|
||||
useEffect(() => {
|
||||
const load = async () => {
|
||||
try {
|
||||
const data = await fetchChallenges()
|
||||
console.log('Loaded challenges:', data)
|
||||
setChallenges(data)
|
||||
}
|
||||
catch (error) {
|
||||
console.error('Failed to load challenges:', error)
|
||||
}
|
||||
finally {
|
||||
setLoading(false)
|
||||
}
|
||||
}
|
||||
load()
|
||||
}, [])
|
||||
|
||||
return (
|
||||
<div className='min-h-screen bg-components-panel-bg'>
|
||||
<div className='mx-auto max-w-7xl px-4 py-12 sm:px-6 lg:px-8'>
|
||||
<div className='mb-8 text-center'>
|
||||
<h1 className='mb-2 text-4xl font-bold text-text-primary'>{t('challenges.player.browse')}</h1>
|
||||
<p className='text-lg text-text-secondary'>Test your skills and compete on the leaderboard</p>
|
||||
</div>
|
||||
|
||||
{loading ? (
|
||||
<div className='text-center text-text-tertiary'>{t('common.loading')}</div>
|
||||
) : challenges.length === 0 ? (
|
||||
<div className='rounded-xl border border-divider-subtle bg-components-panel-bg p-12 text-center'>
|
||||
<RiAwardLine className='mx-auto mb-4 h-12 w-12 text-text-quaternary' />
|
||||
<div className='text-text-secondary'>No challenges available yet</div>
|
||||
</div>
|
||||
) : (
|
||||
<div className='grid gap-6 sm:grid-cols-2 lg:grid-cols-3'>
|
||||
{challenges.map(challenge => (
|
||||
<Link
|
||||
key={challenge.id}
|
||||
href={`/challenges/${challenge.id}`}
|
||||
className='group block'
|
||||
>
|
||||
<div className='h-full rounded-xl border border-divider-subtle bg-components-panel-bg p-6 shadow-xs transition-all hover:border-components-button-primary-bg hover:shadow-md'>
|
||||
<div className='mb-3 flex items-start justify-between'>
|
||||
<RiAwardLine className='h-8 w-8 text-util-colors-cyan-cyan-500' />
|
||||
<RiArrowRightLine className='h-5 w-5 text-text-quaternary transition-transform group-hover:translate-x-1' />
|
||||
</div>
|
||||
<h3 className='mb-2 text-lg font-semibold text-text-primary'>{challenge.name}</h3>
|
||||
{challenge.description && (
|
||||
<p className='mb-3 line-clamp-2 text-sm text-text-secondary'>{challenge.description}</p>
|
||||
)}
|
||||
{challenge.goal && (
|
||||
<div className='mt-4 rounded-lg bg-components-panel-on-panel-item-bg p-3'>
|
||||
<div className='mb-1 text-xs font-medium uppercase text-text-tertiary'>Goal</div>
|
||||
<div className='line-clamp-2 text-sm text-text-secondary'>{challenge.goal}</div>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
</Link>
|
||||
))}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
124
web/app/components/challenge/leaderboard.tsx
Normal file
124
web/app/components/challenge/leaderboard.tsx
Normal file
|
|
@ -0,0 +1,124 @@
|
|||
'use client'
|
||||
import { useTranslation } from 'react-i18next'
|
||||
|
||||
type LeaderboardEntry = {
|
||||
rank: number
|
||||
player_name: string
|
||||
score: number
|
||||
elapsed_ms?: number
|
||||
tokens_total?: number
|
||||
judge_rating?: number
|
||||
created_at: string
|
||||
is_current_user?: boolean
|
||||
}
|
||||
|
||||
type Props = {
|
||||
entries: LeaderboardEntry[]
|
||||
strategy?: string
|
||||
}
|
||||
|
||||
export default function Leaderboard({ entries, strategy = 'highest_rating' }: Props) {
|
||||
const { t } = useTranslation()
|
||||
|
||||
if (entries.length === 0) {
|
||||
return (
|
||||
<div className='rounded-xl border border-divider-subtle bg-components-panel-bg p-8 text-center'>
|
||||
<div className='text-text-tertiary'>{t('challenges.leaderboard.empty')}</div>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
return (
|
||||
<div className='rounded-xl border border-divider-subtle bg-components-panel-bg shadow-xs'>
|
||||
<div className='border-b border-divider-subtle px-6 py-4'>
|
||||
<h2 className='text-lg font-semibold text-text-primary'>{t('challenges.leaderboard.title')}</h2>
|
||||
</div>
|
||||
<div className='overflow-x-auto'>
|
||||
<table className='w-full'>
|
||||
<thead className='border-b border-divider-subtle bg-components-panel-on-panel-item-bg'>
|
||||
<tr>
|
||||
<th className='px-6 py-3 text-left text-xs font-medium uppercase tracking-wider text-text-tertiary'>
|
||||
{t('challenges.leaderboard.rank')}
|
||||
</th>
|
||||
<th className='px-6 py-3 text-left text-xs font-medium uppercase tracking-wider text-text-tertiary'>
|
||||
{t('challenges.leaderboard.player')}
|
||||
</th>
|
||||
<th className='px-6 py-3 text-left text-xs font-medium uppercase tracking-wider text-text-tertiary'>
|
||||
{t('challenges.leaderboard.score')}
|
||||
</th>
|
||||
{strategy === 'fastest' && (
|
||||
<th className='px-6 py-3 text-left text-xs font-medium uppercase tracking-wider text-text-tertiary'>
|
||||
{t('challenges.leaderboard.time')}
|
||||
</th>
|
||||
)}
|
||||
{strategy === 'fewest_tokens' && (
|
||||
<th className='px-6 py-3 text-left text-xs font-medium uppercase tracking-wider text-text-tertiary'>
|
||||
{t('challenges.leaderboard.tokens')}
|
||||
</th>
|
||||
)}
|
||||
{strategy === 'highest_rating' && (
|
||||
<th className='px-6 py-3 text-left text-xs font-medium uppercase tracking-wider text-text-tertiary'>
|
||||
{t('challenges.leaderboard.rating')}
|
||||
</th>
|
||||
)}
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody className='divide-y divide-divider-subtle'>
|
||||
{entries.map((entry, idx) => (
|
||||
<tr
|
||||
key={idx}
|
||||
className={`transition-colors hover:bg-components-panel-on-panel-item-bg ${entry.is_current_user ? 'bg-util-colors-blue-blue-50' : ''}`}
|
||||
>
|
||||
<td className='whitespace-nowrap px-6 py-4'>
|
||||
<div className='flex items-center'>
|
||||
{entry.rank <= 3 ? (
|
||||
<span className='text-lg'>
|
||||
{entry.rank === 1 && '🥇'}
|
||||
{entry.rank === 2 && '🥈'}
|
||||
{entry.rank === 3 && '🥉'}
|
||||
</span>
|
||||
) : (
|
||||
<span className='text-sm text-text-tertiary'>#{entry.rank}</span>
|
||||
)}
|
||||
</div>
|
||||
</td>
|
||||
<td className='whitespace-nowrap px-6 py-4'>
|
||||
<div className='flex items-center'>
|
||||
<div className='text-sm font-medium text-text-primary'>
|
||||
{entry.player_name}
|
||||
{entry.is_current_user && (
|
||||
<span className='ml-2 rounded bg-util-colors-blue-blue-100 px-1.5 py-0.5 text-xs text-util-colors-blue-blue-700'>
|
||||
{t('challenges.leaderboard.yourBest')}
|
||||
</span>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
</td>
|
||||
<td className='whitespace-nowrap px-6 py-4 text-sm text-text-secondary'>
|
||||
{entry.score.toFixed(1)}
|
||||
</td>
|
||||
{strategy === 'fastest' && entry.elapsed_ms !== undefined && (
|
||||
<td className='whitespace-nowrap px-6 py-4 text-sm text-text-secondary'>
|
||||
{(entry.elapsed_ms / 1000).toFixed(2)}s
|
||||
</td>
|
||||
)}
|
||||
{strategy === 'fewest_tokens' && entry.tokens_total !== undefined && (
|
||||
<td className='whitespace-nowrap px-6 py-4 text-sm text-text-secondary'>
|
||||
{entry.tokens_total}
|
||||
</td>
|
||||
)}
|
||||
{strategy === 'highest_rating' && entry.judge_rating !== undefined && (
|
||||
<td className='whitespace-nowrap px-6 py-4'>
|
||||
<div className='flex items-center'>
|
||||
<span className='text-sm font-medium text-text-primary'>{entry.judge_rating}/10</span>
|
||||
</div>
|
||||
</td>
|
||||
)}
|
||||
</tr>
|
||||
))}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
|
@ -65,6 +65,8 @@ export const useAvailableNodesMetaData = () => {
|
|||
nodesMap: {
|
||||
...availableNodesMetaDataMap,
|
||||
[BlockEnum.VariableAssigner]: availableNodesMetaDataMap?.[BlockEnum.VariableAggregator],
|
||||
// Legacy alias for renamed node
|
||||
'prompt-challenge': availableNodesMetaDataMap?.[BlockEnum.ChallengeEvaluator],
|
||||
},
|
||||
}
|
||||
}, [availableNodesMetaData, availableNodesMetaDataMap])
|
||||
|
|
|
|||
|
|
@ -62,6 +62,8 @@ export const useAvailableNodesMetaData = () => {
|
|||
nodesMap: {
|
||||
...availableNodesMetaDataMap,
|
||||
[BlockEnum.VariableAssigner]: availableNodesMetaDataMap?.[BlockEnum.VariableAggregator],
|
||||
// Legacy alias for renamed node
|
||||
'prompt-challenge': availableNodesMetaDataMap?.[BlockEnum.ChallengeEvaluator],
|
||||
},
|
||||
}
|
||||
}, [availableNodesMetaData, availableNodesMetaDataMap])
|
||||
|
|
|
|||
|
|
@ -66,6 +66,9 @@ const getIcon = (type: BlockEnum, className: string) => {
|
|||
[BlockEnum.KnowledgeBase]: <KnowledgeBase className={className} />,
|
||||
[BlockEnum.DataSource]: <Datasource className={className} />,
|
||||
[BlockEnum.DataSourceEmpty]: <></>,
|
||||
[BlockEnum.ChallengeEvaluator]: <IfElse className={className} />,
|
||||
[BlockEnum.JudgingLLM]: <Llm className={className} />,
|
||||
[BlockEnum.TeamChallenge]: <Agent className={className} />,
|
||||
}[type]
|
||||
}
|
||||
const ICON_CONTAINER_BG_COLOR_MAP: Record<string, string> = {
|
||||
|
|
@ -92,6 +95,9 @@ const ICON_CONTAINER_BG_COLOR_MAP: Record<string, string> = {
|
|||
[BlockEnum.Agent]: 'bg-util-colors-indigo-indigo-500',
|
||||
[BlockEnum.KnowledgeBase]: 'bg-util-colors-warning-warning-500',
|
||||
[BlockEnum.DataSource]: 'bg-components-icon-bg-midnight-solid',
|
||||
[BlockEnum.ChallengeEvaluator]: 'bg-util-colors-blue-blue-500',
|
||||
[BlockEnum.JudgingLLM]: 'bg-util-colors-indigo-indigo-500',
|
||||
[BlockEnum.TeamChallenge]: 'bg-util-colors-green-green-500',
|
||||
}
|
||||
const BlockIcon: FC<BlockIconProps> = ({
|
||||
type,
|
||||
|
|
|
|||
|
|
@ -61,6 +61,7 @@ export const SUPPORT_OUTPUT_VARS_NODE = [
|
|||
BlockEnum.ParameterExtractor, BlockEnum.Iteration, BlockEnum.Loop,
|
||||
BlockEnum.DocExtractor, BlockEnum.ListFilter,
|
||||
BlockEnum.Agent, BlockEnum.DataSource,
|
||||
BlockEnum.ChallengeEvaluator, BlockEnum.JudgingLLM, BlockEnum.TeamChallenge,
|
||||
]
|
||||
|
||||
export const AGENT_OUTPUT_STRUCT: Var[] = [
|
||||
|
|
|
|||
|
|
@ -20,6 +20,9 @@ import httpRequestDefault from '@/app/components/workflow/nodes/http/default'
|
|||
import parameterExtractorDefault from '@/app/components/workflow/nodes/parameter-extractor/default'
|
||||
import listOperatorDefault from '@/app/components/workflow/nodes/list-operator/default'
|
||||
import toolDefault from '@/app/components/workflow/nodes/tool/default'
|
||||
import challengeEvaluatorDefault from '@/app/components/workflow/nodes/challenge-evaluator/default'
|
||||
import judgingLLMDefault from '@/app/components/workflow/nodes/judging-llm/default'
|
||||
import teamChallengeDefault from '@/app/components/workflow/nodes/team-challenge/default'
|
||||
|
||||
export const WORKFLOW_COMMON_NODES = [
|
||||
llmDefault,
|
||||
|
|
@ -41,4 +44,7 @@ export const WORKFLOW_COMMON_NODES = [
|
|||
httpRequestDefault,
|
||||
listOperatorDefault,
|
||||
toolDefault,
|
||||
challengeEvaluatorDefault,
|
||||
judgingLLMDefault,
|
||||
teamChallengeDefault,
|
||||
]
|
||||
|
|
|
|||
|
|
@ -645,6 +645,41 @@ const formatItem = (
|
|||
}) as Var[]
|
||||
break
|
||||
}
|
||||
|
||||
case BlockEnum.JudgingLLM:
|
||||
case BlockEnum.ChallengeEvaluator:
|
||||
case BlockEnum.TeamChallenge: {
|
||||
// Synchronously get outputs if getOutputVars is defined
|
||||
const nodeType = data.type
|
||||
if (nodeType === BlockEnum.JudgingLLM) {
|
||||
res.vars = [
|
||||
{ variable: 'judge_passed', type: VarType.boolean },
|
||||
{ variable: 'judge_rating', type: VarType.number },
|
||||
{ variable: 'judge_feedback', type: VarType.string },
|
||||
{ variable: 'judge_raw', type: VarType.object },
|
||||
]
|
||||
}
|
||||
else if (nodeType === BlockEnum.ChallengeEvaluator) {
|
||||
res.vars = [
|
||||
{ variable: 'challenge_succeeded', type: VarType.boolean },
|
||||
{ variable: 'judge_rating', type: VarType.number },
|
||||
{ variable: 'judge_feedback', type: VarType.string },
|
||||
{ variable: 'message', type: VarType.string },
|
||||
]
|
||||
}
|
||||
else if (nodeType === BlockEnum.TeamChallenge) {
|
||||
res.vars = [
|
||||
{ variable: 'team', type: VarType.string },
|
||||
{ variable: 'judge_passed', type: VarType.boolean },
|
||||
{ variable: 'judge_rating', type: VarType.number },
|
||||
{ variable: 'judge_feedback', type: VarType.string },
|
||||
{ variable: 'categories', type: VarType.object },
|
||||
{ variable: 'team_points', type: VarType.number },
|
||||
{ variable: 'total_points', type: VarType.number },
|
||||
]
|
||||
}
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
const { error_strategy } = data
|
||||
|
|
|
|||
|
|
@ -0,0 +1,42 @@
|
|||
import type { NodeDefault } from '../../types'
|
||||
import { genNodeMetaData } from '@/app/components/workflow/utils'
|
||||
import { BlockEnum, VarType } from '@/app/components/workflow/types'
|
||||
import { BlockClassificationEnum } from '@/app/components/workflow/block-selector/types'
|
||||
import type { ChallengeEvaluatorNodeType } from './types'
|
||||
|
||||
const metaData = genNodeMetaData({
|
||||
classification: BlockClassificationEnum.Utilities,
|
||||
sort: 3,
|
||||
type: BlockEnum.ChallengeEvaluator,
|
||||
helpLinkUri: 'challenge-evaluator',
|
||||
})
|
||||
|
||||
const nodeDefault: NodeDefault<ChallengeEvaluatorNodeType> = {
|
||||
metaData,
|
||||
defaultValue: {
|
||||
evaluation_mode: 'rules',
|
||||
success_type: 'contains',
|
||||
success_pattern: '',
|
||||
scoring_strategy: 'highest_rating',
|
||||
mask_variables: [],
|
||||
inputs: {
|
||||
response: [],
|
||||
},
|
||||
},
|
||||
getOutputVars() {
|
||||
return [
|
||||
{ variable: 'challenge_succeeded', type: VarType.boolean },
|
||||
{ variable: 'judge_rating', type: VarType.number },
|
||||
{ variable: 'judge_feedback', type: VarType.string },
|
||||
{ variable: 'message', type: VarType.string },
|
||||
]
|
||||
},
|
||||
checkValid(payload: ChallengeEvaluatorNodeType, t: any) {
|
||||
let errorMessages = ''
|
||||
if (payload.evaluation_mode === 'rules' && !payload.success_pattern)
|
||||
errorMessages = t('workflow.errorMsg.fieldRequired', { field: 'success_pattern' })
|
||||
return { isValid: !errorMessages, errorMessage: errorMessages }
|
||||
},
|
||||
}
|
||||
|
||||
export default nodeDefault
|
||||
|
|
@ -0,0 +1,28 @@
|
|||
import type { FC } from 'react'
|
||||
import React from 'react'
|
||||
import type { NodeProps } from '@/app/components/workflow/types'
|
||||
import type { ChallengeEvaluatorNodeType } from './types'
|
||||
|
||||
const Node: FC<NodeProps<ChallengeEvaluatorNodeType>> = ({ data }) => {
|
||||
const { evaluation_mode, success_type, success_pattern, challenge_id } = data
|
||||
return (
|
||||
<div className='mb-1 px-3 py-1'>
|
||||
{challenge_id ? (
|
||||
<div className='flex items-center gap-2'>
|
||||
<div className='rounded bg-components-badge-white-to-dark px-1 py-0.5 text-[10px] font-semibold uppercase text-text-tertiary'>Challenge</div>
|
||||
<div className='truncate text-xs text-text-secondary' title={challenge_id}>{challenge_id}</div>
|
||||
</div>
|
||||
) : (
|
||||
<div className='flex items-center gap-2'>
|
||||
<div className='rounded bg-components-badge-white-to-dark px-1 py-0.5 text-[10px] font-semibold uppercase text-text-tertiary'>{evaluation_mode}</div>
|
||||
<div className='rounded bg-components-badge-white-to-dark px-1 py-0.5 text-[10px] font-semibold uppercase text-text-tertiary'>{success_type}</div>
|
||||
{success_pattern && (
|
||||
<div className='min-w-0 truncate text-xs text-text-secondary' title={success_pattern}>"{success_pattern}"</div>
|
||||
)}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
export default React.memo(Node)
|
||||
121
web/app/components/workflow/nodes/challenge-evaluator/panel.tsx
Normal file
121
web/app/components/workflow/nodes/challenge-evaluator/panel.tsx
Normal file
|
|
@ -0,0 +1,121 @@
|
|||
import type { FC } from 'react'
|
||||
import { memo, useMemo } from 'react'
|
||||
import { useTranslation } from 'react-i18next'
|
||||
import type { NodePanelProps } from '@/app/components/workflow/types'
|
||||
import Field from '@/app/components/workflow/nodes/_base/components/field'
|
||||
import Split from '@/app/components/workflow/nodes/_base/components/split'
|
||||
import OutputVars, { VarItem } from '@/app/components/workflow/nodes/_base/components/output-vars'
|
||||
import VarReferencePicker from '@/app/components/workflow/nodes/_base/components/variable/var-reference-picker'
|
||||
import type { ChallengeEvaluatorNodeType } from './types'
|
||||
import useNodeCrud from '@/app/components/workflow/nodes/_base/hooks/use-node-crud'
|
||||
import produce from 'immer'
|
||||
import useSWR from 'swr'
|
||||
import { fetchChallenges } from '@/service/challenges'
|
||||
import Editor from '@/app/components/workflow/nodes/_base/components/prompt/editor'
|
||||
import useAvailableVarList from '@/app/components/workflow/nodes/_base/hooks/use-available-var-list'
|
||||
import Select from '@/app/components/base/select'
|
||||
|
||||
const i18nPrefix = 'workflow.nodes.challengeEvaluator'
|
||||
|
||||
const Panel: FC<NodePanelProps<ChallengeEvaluatorNodeType>> = ({ id, data }) => {
|
||||
const { t } = useTranslation()
|
||||
const { inputs, setInputs } = useNodeCrud<ChallengeEvaluatorNodeType>(id, data)
|
||||
const { data: challenges } = useSWR('challenges:list', fetchChallenges)
|
||||
|
||||
const filterVar = useMemo(() => (_: any) => true, [])
|
||||
const { availableVars, availableNodesWithParent } = useAvailableVarList(id, { onlyLeafNodeVar: false, filterVar })
|
||||
|
||||
return (
|
||||
<div className='pt-2'>
|
||||
<div className='space-y-4 px-4 pb-4'>
|
||||
<Field title={t(`${i18nPrefix}.selectedChallenge`)} tooltip={t(`${i18nPrefix}.selectedChallengeTip`)}>
|
||||
<Select
|
||||
items={(challenges || []).map((c: any) => ({ value: c.id, name: c.name }))}
|
||||
defaultValue={data.challenge_id || ''}
|
||||
onSelect={item => setInputs(produce(inputs, (draft) => { (draft as any).challenge_id = (item?.value as string) || undefined }))}
|
||||
allowSearch={false}
|
||||
/>
|
||||
</Field>
|
||||
<Field title={t(`${i18nPrefix}.evaluationMode`)} tooltip={t(`${i18nPrefix}.evaluationModeTip`)}>
|
||||
<Select
|
||||
items={[
|
||||
{ value: 'rules', name: 'Rules' },
|
||||
{ value: 'llm-judge', name: 'Judging LLM' },
|
||||
{ value: 'custom', name: 'Custom' },
|
||||
]}
|
||||
defaultValue={data.evaluation_mode || 'rules'}
|
||||
onSelect={item => setInputs(produce(inputs, (draft) => { (draft as any).evaluation_mode = item.value as string }))}
|
||||
allowSearch={false}
|
||||
/>
|
||||
</Field>
|
||||
{!data.challenge_id && data.evaluation_mode === 'rules' && (
|
||||
<>
|
||||
<Field title={t(`${i18nPrefix}.successType`)} tooltip={t(`${i18nPrefix}.successTypeTip`)}>
|
||||
<Select
|
||||
items={[
|
||||
{ value: 'contains', name: 'Contains' },
|
||||
{ value: 'regex', name: 'Regex' },
|
||||
]}
|
||||
defaultValue={data.success_type || 'contains'}
|
||||
onSelect={item => setInputs(produce(inputs, (draft) => { (draft as any).success_type = item.value as string }))}
|
||||
allowSearch={false}
|
||||
/>
|
||||
</Field>
|
||||
<Field title={t(`${i18nPrefix}.successPattern`)} tooltip={t(`${i18nPrefix}.successPatternTip`)} required>
|
||||
<Editor
|
||||
title={<div className='text-xs font-semibold uppercase text-text-secondary'>pattern</div>}
|
||||
value={data.success_pattern || ''}
|
||||
onChange={v => setInputs(produce(inputs, (draft) => { (draft as any).success_pattern = v }))}
|
||||
readOnly={false}
|
||||
isShowContext={false}
|
||||
isChatApp
|
||||
isChatModel
|
||||
hasSetBlockStatus={{ history: false, query: false, context: false }}
|
||||
nodesOutputVars={availableVars}
|
||||
availableNodes={availableNodesWithParent}
|
||||
isSupportFileVar
|
||||
/>
|
||||
</Field>
|
||||
</>
|
||||
)}
|
||||
<Field title={t(`${i18nPrefix}.responseVar`)} tooltip={t(`${i18nPrefix}.responseVarTip`)}>
|
||||
<VarReferencePicker
|
||||
nodeId={id}
|
||||
readonly={false}
|
||||
isShowNodeName
|
||||
value={data.inputs?.response || []}
|
||||
onChange={v => setInputs(produce(inputs, (draft) => { (draft as any).inputs = { ...(draft as any).inputs, response: v } }))}
|
||||
filterVar={filterVar}
|
||||
/>
|
||||
</Field>
|
||||
<Field title={t(`${i18nPrefix}.scoringStrategy`)} tooltip={t(`${i18nPrefix}.scoringStrategyTip`)}>
|
||||
<Select
|
||||
items={[
|
||||
{ value: 'first', name: t(`${i18nPrefix}.scoringFirst`) },
|
||||
{ value: 'fastest', name: t(`${i18nPrefix}.scoringFastest`) },
|
||||
{ value: 'fewest_tokens', name: t(`${i18nPrefix}.scoringFewestTokens`) },
|
||||
{ value: 'highest_rating', name: t(`${i18nPrefix}.scoringHighestRating`) },
|
||||
{ value: 'custom', name: t(`${i18nPrefix}.scoringCustom`) },
|
||||
]}
|
||||
defaultValue={data.scoring_strategy || 'highest_rating'}
|
||||
onSelect={item => setInputs(produce(inputs, (draft) => { (draft as any).scoring_strategy = item.value as string }))}
|
||||
allowSearch={false}
|
||||
/>
|
||||
</Field>
|
||||
</div>
|
||||
<Split />
|
||||
<div>
|
||||
<OutputVars>
|
||||
<>
|
||||
<VarItem name='challenge_succeeded' type='boolean' description='Challenge succeeded' />
|
||||
<VarItem name='judge_rating' type='number' description='Judge rating' />
|
||||
<VarItem name='judge_feedback' type='string' description='Judge feedback' />
|
||||
<VarItem name='message' type='string' description='Message' />
|
||||
</>
|
||||
</OutputVars>
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
export default memo(Panel)
|
||||
|
|
@ -0,0 +1,20 @@
|
|||
import type { CommonNodeType, ValueSelector } from '@/app/components/workflow/types'
|
||||
import { BlockEnum } from '@/app/components/workflow/types'
|
||||
|
||||
export type ChallengeEvaluatorNodeType = CommonNodeType<{
|
||||
challenge_id?: string
|
||||
evaluation_mode?: 'rules' | 'llm-judge' | 'custom'
|
||||
success_type?: 'regex' | 'contains' | 'custom'
|
||||
success_pattern?: string
|
||||
scoring_strategy?: 'first' | 'fastest' | 'fewest_tokens' | 'highest_rating' | 'custom'
|
||||
mask_variables?: string[]
|
||||
inputs?: {
|
||||
response?: ValueSelector
|
||||
}
|
||||
}>
|
||||
|
||||
export const DEFAULT_CHALLENGE_EVALUATOR_INPUTS: ChallengeEvaluatorNodeType['inputs'] = {
|
||||
response: [],
|
||||
}
|
||||
|
||||
export const CHALLENGE_EVALUATOR_BLOCK_TYPE = BlockEnum.ChallengeEvaluator
|
||||
|
|
@ -43,6 +43,12 @@ import DataSourcePanel from './data-source/panel'
|
|||
import KnowledgeBaseNode from './knowledge-base/node'
|
||||
import KnowledgeBasePanel from './knowledge-base/panel'
|
||||
import { TransferMethod } from '@/types/app'
|
||||
import ChallengeEvaluatorNode from './challenge-evaluator/node'
|
||||
import ChallengeEvaluatorPanel from './challenge-evaluator/panel'
|
||||
import JudgingLLMNode from './judging-llm/node'
|
||||
import JudgingLLMPanel from './judging-llm/panel'
|
||||
import TeamChallengeNode from './team-challenge/node'
|
||||
import TeamChallengePanel from './team-challenge/panel'
|
||||
|
||||
export const NodeComponentMap: Record<string, ComponentType<any>> = {
|
||||
[BlockEnum.Start]: StartNode,
|
||||
|
|
@ -67,6 +73,11 @@ export const NodeComponentMap: Record<string, ComponentType<any>> = {
|
|||
[BlockEnum.Agent]: AgentNode,
|
||||
[BlockEnum.DataSource]: DataSourceNode,
|
||||
[BlockEnum.KnowledgeBase]: KnowledgeBaseNode,
|
||||
[BlockEnum.ChallengeEvaluator]: ChallengeEvaluatorNode,
|
||||
[BlockEnum.JudgingLLM]: JudgingLLMNode,
|
||||
[BlockEnum.TeamChallenge]: TeamChallengeNode,
|
||||
// Legacy alias for renamed node
|
||||
'prompt-challenge': ChallengeEvaluatorNode,
|
||||
}
|
||||
|
||||
export const PanelComponentMap: Record<string, ComponentType<any>> = {
|
||||
|
|
@ -92,6 +103,11 @@ export const PanelComponentMap: Record<string, ComponentType<any>> = {
|
|||
[BlockEnum.Agent]: AgentPanel,
|
||||
[BlockEnum.DataSource]: DataSourcePanel,
|
||||
[BlockEnum.KnowledgeBase]: KnowledgeBasePanel,
|
||||
[BlockEnum.ChallengeEvaluator]: ChallengeEvaluatorPanel,
|
||||
[BlockEnum.JudgingLLM]: JudgingLLMPanel,
|
||||
[BlockEnum.TeamChallenge]: TeamChallengePanel,
|
||||
// Legacy alias for renamed node
|
||||
'prompt-challenge': ChallengeEvaluatorPanel,
|
||||
}
|
||||
|
||||
export const CUSTOM_NODE_TYPE = 'custom'
|
||||
|
|
|
|||
45
web/app/components/workflow/nodes/judging-llm/default.ts
Normal file
45
web/app/components/workflow/nodes/judging-llm/default.ts
Normal file
|
|
@ -0,0 +1,45 @@
|
|||
import type { NodeDefault } from '../../types'
|
||||
import { genNodeMetaData } from '@/app/components/workflow/utils'
|
||||
import { BlockEnum, VarType } from '@/app/components/workflow/types'
|
||||
import { BlockClassificationEnum } from '@/app/components/workflow/block-selector/types'
|
||||
import type { JudgingLLMNodeType } from './types'
|
||||
import { DEFAULT_JUDGE_MODEL } from './types'
|
||||
|
||||
const metaData = genNodeMetaData({
|
||||
classification: BlockClassificationEnum.Utilities,
|
||||
sort: 4,
|
||||
type: BlockEnum.JudgingLLM,
|
||||
helpLinkUri: 'judging-llm',
|
||||
})
|
||||
|
||||
const nodeDefault: NodeDefault<JudgingLLMNodeType> = {
|
||||
metaData,
|
||||
defaultValue: {
|
||||
judge_model: DEFAULT_JUDGE_MODEL,
|
||||
rubric_prompt_template: '',
|
||||
rating_scale: 10,
|
||||
pass_threshold: 7,
|
||||
inputs: {
|
||||
goal: [],
|
||||
response: [],
|
||||
},
|
||||
},
|
||||
getOutputVars() {
|
||||
return [
|
||||
{ variable: 'judge_passed', type: VarType.boolean },
|
||||
{ variable: 'judge_rating', type: VarType.number },
|
||||
{ variable: 'judge_feedback', type: VarType.string },
|
||||
{ variable: 'judge_raw', type: VarType.object },
|
||||
]
|
||||
},
|
||||
checkValid(payload: JudgingLLMNodeType, t: any) {
|
||||
let errorMessages = ''
|
||||
if (!payload.judge_model?.provider)
|
||||
errorMessages = t('workflow.errorMsg.fieldRequired', { field: t('workflow.common.model') })
|
||||
if (!errorMessages && !payload.rubric_prompt_template)
|
||||
errorMessages = t('workflow.errorMsg.fieldRequired', { field: 'rubric_prompt_template' })
|
||||
return { isValid: !errorMessages, errorMessage: errorMessages }
|
||||
},
|
||||
}
|
||||
|
||||
export default nodeDefault
|
||||
33
web/app/components/workflow/nodes/judging-llm/node.tsx
Normal file
33
web/app/components/workflow/nodes/judging-llm/node.tsx
Normal file
|
|
@ -0,0 +1,33 @@
|
|||
import type { FC } from 'react'
|
||||
import React from 'react'
|
||||
import type { NodeProps } from '@/app/components/workflow/types'
|
||||
import ModelSelector from '@/app/components/header/account-setting/model-provider-page/model-selector'
|
||||
import { useTextGenerationCurrentProviderAndModelAndModelList } from '@/app/components/header/account-setting/model-provider-page/hooks'
|
||||
import type { JudgingLLMNodeType } from './types'
|
||||
|
||||
const Node: FC<NodeProps<JudgingLLMNodeType>> = ({ data }) => {
|
||||
const { judge_model, pass_threshold } = data
|
||||
const hasSetModel = !!(judge_model?.provider && judge_model?.name)
|
||||
const { textGenerationModelList } = useTextGenerationCurrentProviderAndModelAndModelList()
|
||||
|
||||
if (!hasSetModel)
|
||||
return null
|
||||
|
||||
return (
|
||||
<div className='mb-1 px-3 py-1'>
|
||||
<div className='flex items-center gap-2'>
|
||||
<ModelSelector
|
||||
defaultModel={{ provider: judge_model!.provider, model: judge_model!.name }}
|
||||
modelList={textGenerationModelList}
|
||||
triggerClassName='!h-6 !rounded-md'
|
||||
readonly
|
||||
/>
|
||||
<div className='rounded bg-components-badge-white-to-dark px-1 py-0.5 text-[10px] font-semibold uppercase text-text-tertiary'>
|
||||
Pass ≥ {pass_threshold ?? 7}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
export default React.memo(Node)
|
||||
143
web/app/components/workflow/nodes/judging-llm/panel.tsx
Normal file
143
web/app/components/workflow/nodes/judging-llm/panel.tsx
Normal file
|
|
@ -0,0 +1,143 @@
|
|||
import type { FC } from 'react'
|
||||
import { memo, useMemo } from 'react'
|
||||
import { useTranslation } from 'react-i18next'
|
||||
import type { NodePanelProps } from '@/app/components/workflow/types'
|
||||
import Field from '@/app/components/workflow/nodes/_base/components/field'
|
||||
import Split from '@/app/components/workflow/nodes/_base/components/split'
|
||||
import VarReferencePicker from '@/app/components/workflow/nodes/_base/components/variable/var-reference-picker'
|
||||
import OutputVars, { VarItem } from '@/app/components/workflow/nodes/_base/components/output-vars'
|
||||
import ModelParameterModal from '@/app/components/header/account-setting/model-provider-page/model-parameter-modal'
|
||||
import { fetchAndMergeValidCompletionParams } from '@/utils/completion-params'
|
||||
import Toast from '@/app/components/base/toast'
|
||||
import AddButton2 from '@/app/components/base/button/add-button'
|
||||
import Editor from '@/app/components/workflow/nodes/_base/components/prompt/editor'
|
||||
import useAvailableVarList from '@/app/components/workflow/nodes/_base/hooks/use-available-var-list'
|
||||
import Input from '@/app/components/base/input'
|
||||
import type { JudgingLLMNodeType } from './types'
|
||||
import useNodeCrud from '@/app/components/workflow/nodes/_base/hooks/use-node-crud'
|
||||
import produce from 'immer'
|
||||
|
||||
const Panel: FC<NodePanelProps<JudgingLLMNodeType>> = ({ id, data }) => {
|
||||
const { t } = useTranslation()
|
||||
const { inputs, setInputs } = useNodeCrud<JudgingLLMNodeType>(id, data)
|
||||
const filterVar = useMemo(() => (_: any) => true, [])
|
||||
const { availableVars, availableNodesWithParent } = useAvailableVarList(id, { onlyLeafNodeVar: false, filterVar })
|
||||
|
||||
return (
|
||||
<div className='pt-2'>
|
||||
<div className='space-y-4 px-4 pb-4'>
|
||||
<Field title={t('workflow.common.model')} required>
|
||||
<ModelParameterModal
|
||||
popupClassName='!w-[387px]'
|
||||
isInWorkflow
|
||||
isAdvancedMode={true}
|
||||
mode={data.judge_model?.mode}
|
||||
provider={data.judge_model?.provider}
|
||||
completionParams={data.judge_model?.completion_params}
|
||||
modelId={data.judge_model?.name}
|
||||
setModel={async (model: { provider: string; modelId: string; mode?: string }) => {
|
||||
try {
|
||||
const { params } = await fetchAndMergeValidCompletionParams(
|
||||
model.provider,
|
||||
model.modelId,
|
||||
data.judge_model?.completion_params || {},
|
||||
true,
|
||||
)
|
||||
setInputs(produce(inputs, (draft) => {
|
||||
(draft as any).judge_model = {
|
||||
provider: model.provider,
|
||||
name: model.modelId,
|
||||
mode: model.mode || 'chat',
|
||||
completion_params: params,
|
||||
}
|
||||
}))
|
||||
}
|
||||
catch {
|
||||
Toast.notify({ type: 'error', message: t('common.error') })
|
||||
setInputs(produce(inputs, (draft) => {
|
||||
(draft as any).judge_model = {
|
||||
provider: model.provider,
|
||||
name: model.modelId,
|
||||
mode: model.mode || 'chat',
|
||||
completion_params: {},
|
||||
}
|
||||
}))
|
||||
}
|
||||
}}
|
||||
onCompletionParamsChange={newParams => setInputs(produce(inputs, (draft) => { (draft as any).judge_model.completion_params = newParams }))}
|
||||
hideDebugWithMultipleModel
|
||||
debugWithMultipleModel={false}
|
||||
readonly={false}
|
||||
/>
|
||||
</Field>
|
||||
<Field title='Rubric Template' required>
|
||||
<div className='space-y-2'>
|
||||
<Editor
|
||||
title={<div className='text-xs font-semibold uppercase text-text-secondary'>system</div>}
|
||||
value={data.rubric_prompt_template || ''}
|
||||
onChange={v => setInputs(produce(inputs, (draft) => { (draft as any).rubric_prompt_template = v }))}
|
||||
readOnly={false}
|
||||
isShowContext={false}
|
||||
isChatApp
|
||||
isChatModel
|
||||
hasSetBlockStatus={{ history: false, query: false, context: false }}
|
||||
nodesOutputVars={availableVars}
|
||||
availableNodes={availableNodesWithParent}
|
||||
isSupportFileVar
|
||||
/>
|
||||
<div className='flex items-center gap-2'>
|
||||
<AddButton2 onClick={() => setInputs(produce(inputs, (draft) => { (draft as any).rubric_prompt_template = 'You are a strict evaluator. Given a goal and a model response, decide pass/fail, give a rating 0-10, and provide concise feedback.\\n\\nGoal:\\n{goal}\\n\\nResponse:\\n{response}\\n\\nReturn JSON: {"passed": boolean, "rating": number, "feedback": string}.' }))} />
|
||||
<div className='system-xs-medium-uppercase text-text-tertiary'>Insert default rubric</div>
|
||||
</div>
|
||||
</div>
|
||||
</Field>
|
||||
<Field title='Pass Threshold'>
|
||||
<Input
|
||||
type='number'
|
||||
wrapperClassName='w-full'
|
||||
min={0}
|
||||
max={data.rating_scale || 10}
|
||||
value={data.pass_threshold ?? 7}
|
||||
onChange={e => setInputs(produce(inputs, (draft) => { (draft as any).pass_threshold = Number(e.target.value) }))}
|
||||
/>
|
||||
</Field>
|
||||
<Field title='Inputs'>
|
||||
<div className='space-y-2'>
|
||||
<div>
|
||||
<div className='system-xs-medium-uppercase mb-1 text-text-tertiary'>Goal</div>
|
||||
<VarReferencePicker
|
||||
nodeId={id}
|
||||
isShowNodeName
|
||||
readonly={false}
|
||||
value={data.inputs?.goal || []}
|
||||
onChange={v => setInputs(produce(inputs, (draft) => { (draft as any).inputs = { ...(draft as any).inputs, goal: v } }))}
|
||||
/>
|
||||
</div>
|
||||
<div>
|
||||
<div className='system-xs-medium-uppercase mb-1 text-text-tertiary'>Response</div>
|
||||
<VarReferencePicker
|
||||
nodeId={id}
|
||||
isShowNodeName
|
||||
readonly={false}
|
||||
value={data.inputs?.response || []}
|
||||
onChange={v => setInputs(produce(inputs, (draft) => { (draft as any).inputs = { ...(draft as any).inputs, response: v } }))}
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
</Field>
|
||||
</div>
|
||||
<Split />
|
||||
<div>
|
||||
<OutputVars>
|
||||
<>
|
||||
<VarItem name='judge_passed' type='boolean' description={t('workflow.nodes.judgingLLM.outputVars.judgePassed')} />
|
||||
<VarItem name='judge_rating' type='number' description={t('workflow.nodes.judgingLLM.outputVars.judgeRating')} />
|
||||
<VarItem name='judge_feedback' type='string' description={t('workflow.nodes.judgingLLM.outputVars.judgeFeedback')} />
|
||||
</>
|
||||
</OutputVars>
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
export default memo(Panel)
|
||||
24
web/app/components/workflow/nodes/judging-llm/types.ts
Normal file
24
web/app/components/workflow/nodes/judging-llm/types.ts
Normal file
|
|
@ -0,0 +1,24 @@
|
|||
import type { CommonNodeType, ModelConfig, ValueSelector } from '@/app/components/workflow/types'
|
||||
import { BlockEnum } from '@/app/components/workflow/types'
|
||||
|
||||
export type JudgingLLMNodeType = CommonNodeType<{
|
||||
judge_model: ModelConfig
|
||||
rubric_prompt_template: string
|
||||
rating_scale?: number
|
||||
pass_threshold?: number
|
||||
inputs?: {
|
||||
goal?: ValueSelector
|
||||
response?: ValueSelector
|
||||
}
|
||||
}>
|
||||
|
||||
export const DEFAULT_JUDGE_MODEL: ModelConfig = {
|
||||
provider: '',
|
||||
name: '',
|
||||
mode: 'chat',
|
||||
completion_params: {
|
||||
temperature: 0.3,
|
||||
},
|
||||
}
|
||||
|
||||
export const JUDGING_LLM_BLOCK_TYPE = BlockEnum.JudgingLLM
|
||||
42
web/app/components/workflow/nodes/team-challenge/default.ts
Normal file
42
web/app/components/workflow/nodes/team-challenge/default.ts
Normal file
|
|
@ -0,0 +1,42 @@
|
|||
import type { NodeDefault } from '../../types'
|
||||
import { genNodeMetaData } from '@/app/components/workflow/utils'
|
||||
import { BlockEnum, VarType } from '@/app/components/workflow/types'
|
||||
import { BlockClassificationEnum } from '@/app/components/workflow/block-selector/types'
|
||||
import type { TeamChallengeNodeType } from './types'
|
||||
|
||||
const metaData = genNodeMetaData({
|
||||
classification: BlockClassificationEnum.Utilities,
|
||||
sort: 5,
|
||||
type: BlockEnum.TeamChallenge,
|
||||
helpLinkUri: 'team-challenge',
|
||||
})
|
||||
|
||||
const nodeDefault: NodeDefault<TeamChallengeNodeType> = {
|
||||
metaData,
|
||||
defaultValue: {
|
||||
defense_selection_policy: 'latest_best',
|
||||
attack_selection_policy: 'latest_best',
|
||||
scoring_strategy: 'red_blue_ratio',
|
||||
inputs: {
|
||||
team_choice: [],
|
||||
attack_prompt: [],
|
||||
defense_prompt: [],
|
||||
},
|
||||
},
|
||||
getOutputVars() {
|
||||
return [
|
||||
{ variable: 'team', type: VarType.string },
|
||||
{ variable: 'judge_passed', type: VarType.boolean },
|
||||
{ variable: 'judge_rating', type: VarType.number },
|
||||
{ variable: 'judge_feedback', type: VarType.string },
|
||||
{ variable: 'categories', type: VarType.object },
|
||||
{ variable: 'team_points', type: VarType.number },
|
||||
{ variable: 'total_points', type: VarType.number },
|
||||
]
|
||||
},
|
||||
checkValid(_payload: TeamChallengeNodeType) {
|
||||
return { isValid: true }
|
||||
},
|
||||
}
|
||||
|
||||
export default nodeDefault
|
||||
25
web/app/components/workflow/nodes/team-challenge/node.tsx
Normal file
25
web/app/components/workflow/nodes/team-challenge/node.tsx
Normal file
|
|
@ -0,0 +1,25 @@
|
|||
import type { FC } from 'react'
|
||||
import React from 'react'
|
||||
import type { NodeProps } from '@/app/components/workflow/types'
|
||||
import type { TeamChallengeNodeType } from './types'
|
||||
|
||||
const Node: FC<NodeProps<TeamChallengeNodeType>> = ({ data }) => {
|
||||
const { red_blue_challenge_id, defense_selection_policy, attack_selection_policy } = data
|
||||
return (
|
||||
<div className='mb-1 px-3 py-1'>
|
||||
{red_blue_challenge_id ? (
|
||||
<div className='flex items-center gap-2'>
|
||||
<div className='rounded bg-components-badge-white-to-dark px-1 py-0.5 text-[10px] font-semibold uppercase text-text-tertiary'>Red/Blue</div>
|
||||
<div className='truncate text-xs text-text-secondary' title={red_blue_challenge_id}>{red_blue_challenge_id}</div>
|
||||
</div>
|
||||
) : (
|
||||
<div className='flex items-center gap-2'>
|
||||
<div className='rounded bg-components-badge-white-to-dark px-1 py-0.5 text-[10px] font-semibold uppercase text-text-tertiary'>Defense: {defense_selection_policy}</div>
|
||||
<div className='rounded bg-components-badge-white-to-dark px-1 py-0.5 text-[10px] font-semibold uppercase text-text-tertiary'>Attack: {attack_selection_policy}</div>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
export default React.memo(Node)
|
||||
106
web/app/components/workflow/nodes/team-challenge/panel.tsx
Normal file
106
web/app/components/workflow/nodes/team-challenge/panel.tsx
Normal file
|
|
@ -0,0 +1,106 @@
|
|||
import type { FC } from 'react'
|
||||
import { memo } from 'react'
|
||||
import { useTranslation } from 'react-i18next'
|
||||
import type { NodePanelProps } from '@/app/components/workflow/types'
|
||||
import Field from '@/app/components/workflow/nodes/_base/components/field'
|
||||
import Split from '@/app/components/workflow/nodes/_base/components/split'
|
||||
import VarReferencePicker from '@/app/components/workflow/nodes/_base/components/variable/var-reference-picker'
|
||||
import OutputVars, { VarItem } from '@/app/components/workflow/nodes/_base/components/output-vars'
|
||||
import type { TeamChallengeNodeType } from './types'
|
||||
import useNodeCrud from '@/app/components/workflow/nodes/_base/hooks/use-node-crud'
|
||||
import produce from 'immer'
|
||||
import useSWR from 'swr'
|
||||
import { fetchRedBlueChallenges } from '@/service/redBlueChallenges'
|
||||
import Select from '@/app/components/base/select'
|
||||
|
||||
const i18nPrefix = 'workflow.nodes.teamChallenge'
|
||||
|
||||
const Panel: FC<NodePanelProps<TeamChallengeNodeType>> = ({ id, data }) => {
|
||||
const { t } = useTranslation()
|
||||
const { inputs, setInputs } = useNodeCrud<TeamChallengeNodeType>(id, data)
|
||||
const { data: redBlue } = useSWR('redBlue:list', fetchRedBlueChallenges)
|
||||
|
||||
return (
|
||||
<div className='pt-2'>
|
||||
<div className='space-y-4 px-4 pb-4'>
|
||||
<Field title={t(`${i18nPrefix}.selectedChallenge`)} tooltip={t(`${i18nPrefix}.selectedChallengeTip`)}>
|
||||
<Select
|
||||
items={(redBlue || []).map((c: any) => ({ value: c.id, name: c.name }))}
|
||||
defaultValue={data.red_blue_challenge_id || ''}
|
||||
onSelect={item => setInputs(produce(inputs, (draft) => { (draft as any).red_blue_challenge_id = (item?.value as string) || undefined }))}
|
||||
allowSearch={false}
|
||||
/>
|
||||
</Field>
|
||||
<Field title={t(`${i18nPrefix}.defenseSelectionPolicy`)} tooltip={t(`${i18nPrefix}.defenseSelectionPolicyTip`)}>
|
||||
<Select
|
||||
items={[
|
||||
{ value: 'latest_best', name: 'latest_best' },
|
||||
{ value: 'random_active', name: 'random_active' },
|
||||
{ value: 'round_robin', name: 'round_robin' },
|
||||
{ value: 'request_new_if_none', name: 'request_new_if_none' },
|
||||
]}
|
||||
defaultValue={data.defense_selection_policy || 'latest_best'}
|
||||
onSelect={item => setInputs(produce(inputs, (draft) => { (draft as any).defense_selection_policy = item.value as string }))}
|
||||
allowSearch={false}
|
||||
/>
|
||||
</Field>
|
||||
<Field title={t(`${i18nPrefix}.attackSelectionPolicy`)} tooltip={t(`${i18nPrefix}.attackSelectionPolicyTip`)}>
|
||||
<Select
|
||||
items={[
|
||||
{ value: 'latest_best', name: 'latest_best' },
|
||||
{ value: 'random_active', name: 'random_active' },
|
||||
{ value: 'round_robin', name: 'round_robin' },
|
||||
{ value: 'request_new_if_none', name: 'request_new_if_none' },
|
||||
]}
|
||||
defaultValue={data.attack_selection_policy || 'latest_best'}
|
||||
onSelect={item => setInputs(produce(inputs, (draft) => { (draft as any).attack_selection_policy = item.value as string }))}
|
||||
allowSearch={false}
|
||||
/>
|
||||
</Field>
|
||||
<Field title={t(`${i18nPrefix}.teamChoiceVar`)} tooltip={t(`${i18nPrefix}.teamChoiceVarTip`)}>
|
||||
<VarReferencePicker
|
||||
nodeId={id}
|
||||
isShowNodeName
|
||||
readonly={false}
|
||||
value={data.inputs?.team_choice || []}
|
||||
onChange={v => setInputs(produce(inputs, (draft) => { (draft as any).inputs = { ...(draft as any).inputs, team_choice: v } }))}
|
||||
/>
|
||||
</Field>
|
||||
<Field title={t(`${i18nPrefix}.attackPromptVar`)} tooltip={t(`${i18nPrefix}.attackPromptVarTip`)}>
|
||||
<VarReferencePicker
|
||||
nodeId={id}
|
||||
isShowNodeName
|
||||
readonly={false}
|
||||
value={data.inputs?.attack_prompt || []}
|
||||
onChange={v => setInputs(produce(inputs, (draft) => { (draft as any).inputs = { ...(draft as any).inputs, attack_prompt: v } }))}
|
||||
/>
|
||||
</Field>
|
||||
<Field title={t(`${i18nPrefix}.defensePromptVar`)} tooltip={t(`${i18nPrefix}.defensePromptVarTip`)}>
|
||||
<VarReferencePicker
|
||||
nodeId={id}
|
||||
isShowNodeName
|
||||
readonly={false}
|
||||
value={data.inputs?.defense_prompt || []}
|
||||
onChange={v => setInputs(produce(inputs, (draft) => { (draft as any).inputs = { ...(draft as any).inputs, defense_prompt: v } }))}
|
||||
/>
|
||||
</Field>
|
||||
</div>
|
||||
<Split />
|
||||
<div>
|
||||
<OutputVars>
|
||||
<>
|
||||
<VarItem name='team' type='string' description='Team' />
|
||||
<VarItem name='judge_passed' type='boolean' description='Judge passed' />
|
||||
<VarItem name='judge_rating' type='number' description='Judge rating' />
|
||||
<VarItem name='judge_feedback' type='string' description='Judge feedback' />
|
||||
<VarItem name='categories' type='object' description='Category outcomes' />
|
||||
<VarItem name='team_points' type='number' description='Team points' />
|
||||
<VarItem name='total_points' type='number' description='Total points' />
|
||||
</>
|
||||
</OutputVars>
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
export default memo(Panel)
|
||||
16
web/app/components/workflow/nodes/team-challenge/types.ts
Normal file
16
web/app/components/workflow/nodes/team-challenge/types.ts
Normal file
|
|
@ -0,0 +1,16 @@
|
|||
import type { CommonNodeType, ValueSelector } from '@/app/components/workflow/types'
|
||||
import { BlockEnum } from '@/app/components/workflow/types'
|
||||
|
||||
export type TeamChallengeNodeType = CommonNodeType<{
|
||||
red_blue_challenge_id?: string
|
||||
defense_selection_policy?: 'latest_best' | 'random_active' | 'round_robin' | 'request_new_if_none'
|
||||
attack_selection_policy?: 'latest_best' | 'random_active' | 'round_robin' | 'request_new_if_none'
|
||||
scoring_strategy?: 'red_blue_ratio' | 'custom'
|
||||
inputs?: {
|
||||
team_choice?: ValueSelector
|
||||
attack_prompt?: ValueSelector
|
||||
defense_prompt?: ValueSelector
|
||||
}
|
||||
}>
|
||||
|
||||
export const TEAM_CHALLENGE_BLOCK_TYPE = BlockEnum.TeamChallenge
|
||||
|
|
@ -50,6 +50,9 @@ export enum BlockEnum {
|
|||
DataSource = 'datasource',
|
||||
DataSourceEmpty = 'datasource-empty',
|
||||
KnowledgeBase = 'knowledge-index',
|
||||
ChallengeEvaluator = 'challenge-evaluator',
|
||||
JudgingLLM = 'judging-llm',
|
||||
TeamChallenge = 'team-challenge',
|
||||
}
|
||||
|
||||
export enum ControlMode {
|
||||
|
|
|
|||
191
web/app/red-blue-challenges/[id]/page.tsx
Normal file
191
web/app/red-blue-challenges/[id]/page.tsx
Normal file
|
|
@ -0,0 +1,191 @@
|
|||
'use client'
|
||||
|
||||
import { useEffect, useState } from 'react'
|
||||
import { useTranslation } from 'react-i18next'
|
||||
import { useParams } from 'next/navigation'
|
||||
import { RiLoader4Line, RiShieldLine, RiSwordLine } from '@remixicon/react'
|
||||
import { fetchRedBlueLeaderboard, submitRedBluePrompt } from '@/service/redBlueChallenges'
|
||||
import Button from '@/app/components/base/button'
|
||||
import Textarea from '@/app/components/base/textarea'
|
||||
import Toast from '@/app/components/base/toast'
|
||||
|
||||
export default function RedBlueChallengeDetailPage() {
|
||||
const { t } = useTranslation()
|
||||
const params = useParams()
|
||||
const id = params?.id as string
|
||||
|
||||
const [team, setTeam] = useState<'red' | 'blue' | null>(null)
|
||||
const [prompt, setPrompt] = useState('')
|
||||
const [submitting, setSubmitting] = useState(false)
|
||||
const [lastResult, setLastResult] = useState<any>(null)
|
||||
const [leaderboard, setLeaderboard] = useState<any>(null)
|
||||
|
||||
useEffect(() => {
|
||||
const load = async () => {
|
||||
try {
|
||||
const leaders = await fetchRedBlueLeaderboard(id)
|
||||
setLeaderboard(leaders)
|
||||
}
|
||||
catch (e: any) {
|
||||
console.error('Failed to load leaderboard:', e)
|
||||
}
|
||||
}
|
||||
if (id)
|
||||
load()
|
||||
}, [id])
|
||||
|
||||
const handleSubmit = async () => {
|
||||
if (!team || !prompt.trim()) {
|
||||
Toast.notify({ type: 'error', message: 'Please choose a team and enter a prompt' })
|
||||
return
|
||||
}
|
||||
|
||||
setSubmitting(true)
|
||||
setLastResult(null)
|
||||
try {
|
||||
const result = await submitRedBluePrompt(id, team, prompt)
|
||||
setLastResult(result)
|
||||
Toast.notify({ type: 'success', message: 'Prompt submitted!' })
|
||||
setPrompt('')
|
||||
|
||||
// Refresh leaderboard
|
||||
const leaders = await fetchRedBlueLeaderboard(id)
|
||||
setLeaderboard(leaders)
|
||||
}
|
||||
catch (e: any) {
|
||||
Toast.notify({ type: 'error', message: e.message || 'Submission failed' })
|
||||
}
|
||||
finally {
|
||||
setSubmitting(false)
|
||||
}
|
||||
}
|
||||
|
||||
return (
|
||||
<div className='min-h-screen bg-components-panel-bg'>
|
||||
<div className='mx-auto max-w-5xl px-4 py-12 sm:px-6 lg:px-8'>
|
||||
<div className='mb-8 text-center'>
|
||||
<h1 className='mb-2 text-3xl font-bold text-text-primary'>{t('challenges.redBlue.title')}</h1>
|
||||
<p className='text-lg text-text-secondary'>Join the Red or Blue team and compete</p>
|
||||
</div>
|
||||
|
||||
{!team ? (
|
||||
<div className='grid gap-6 sm:grid-cols-2'>
|
||||
<button
|
||||
onClick={() => setTeam('red')}
|
||||
className='group rounded-xl border-2 border-util-colors-red-red-300 bg-util-colors-red-red-50 p-8 shadow-xs transition-all hover:border-util-colors-red-red-500 hover:shadow-md'
|
||||
>
|
||||
<RiSwordLine className='mx-auto mb-4 h-16 w-16 text-util-colors-red-red-600' />
|
||||
<h2 className='mb-2 text-2xl font-bold text-util-colors-red-red-700'>
|
||||
{t('challenges.redBlue.red')}
|
||||
</h2>
|
||||
<p className='text-util-colors-red-red-600'>{t('challenges.redBlue.redDesc')}</p>
|
||||
</button>
|
||||
|
||||
<button
|
||||
onClick={() => setTeam('blue')}
|
||||
className='group rounded-xl border-2 border-util-colors-blue-blue-300 bg-util-colors-blue-blue-50 p-8 shadow-xs transition-all hover:border-util-colors-blue-blue-500 hover:shadow-md'
|
||||
>
|
||||
<RiShieldLine className='mx-auto mb-4 h-16 w-16 text-util-colors-blue-blue-600' />
|
||||
<h2 className='mb-2 text-2xl font-bold text-util-colors-blue-blue-700'>
|
||||
{t('challenges.redBlue.blue')}
|
||||
</h2>
|
||||
<p className='text-util-colors-blue-blue-600'>{t('challenges.redBlue.blueDesc')}</p>
|
||||
</button>
|
||||
</div>
|
||||
) : (
|
||||
<div className='grid gap-6 lg:grid-cols-3'>
|
||||
<div className='lg:col-span-2'>
|
||||
<div className={`rounded-xl border-2 p-6 shadow-xs ${team === 'red' ? 'border-util-colors-red-red-300 bg-util-colors-red-red-50' : 'border-util-colors-blue-blue-300 bg-util-colors-blue-blue-50'}`}>
|
||||
<div className='mb-4 flex items-center justify-between'>
|
||||
<h2 className={`text-xl font-bold ${team === 'red' ? 'text-util-colors-red-red-700' : 'text-util-colors-blue-blue-700'}`}>
|
||||
{team === 'red' ? t('challenges.redBlue.submitAttack') : t('challenges.redBlue.submitDefense')}
|
||||
</h2>
|
||||
<Button
|
||||
size='small'
|
||||
onClick={() => {
|
||||
setTeam(null)
|
||||
setPrompt('')
|
||||
setLastResult(null)
|
||||
}}
|
||||
>
|
||||
Switch Team
|
||||
</Button>
|
||||
</div>
|
||||
|
||||
<Textarea
|
||||
value={prompt}
|
||||
onChange={e => setPrompt(e.target.value)}
|
||||
placeholder={team === 'red' ? t('challenges.redBlue.attackPrompt') : t('challenges.redBlue.defensePrompt')}
|
||||
rows={8}
|
||||
className='mb-4 w-full'
|
||||
/>
|
||||
|
||||
<Button
|
||||
type='primary'
|
||||
onClick={handleSubmit}
|
||||
loading={submitting}
|
||||
disabled={!prompt.trim()}
|
||||
className='w-full'
|
||||
>
|
||||
{submitting ? (
|
||||
<>
|
||||
<RiLoader4Line className='mr-2 h-4 w-4 animate-spin' />
|
||||
{t('common.operation.processing')}
|
||||
</>
|
||||
) : (
|
||||
t('challenges.player.submit')
|
||||
)}
|
||||
</Button>
|
||||
|
||||
{lastResult && (
|
||||
<div className='mt-4 rounded-lg border border-divider-subtle bg-components-panel-bg p-4'>
|
||||
<h3 className='mb-2 font-medium text-text-primary'>{t('challenges.redBlue.results')}</h3>
|
||||
{lastResult.judge_rating !== undefined && (
|
||||
<div className='text-sm text-text-secondary'>
|
||||
{t('challenges.leaderboard.rating')}: {lastResult.judge_rating}/10
|
||||
</div>
|
||||
)}
|
||||
{lastResult.team_points !== undefined && (
|
||||
<div className='mt-1 text-sm text-text-secondary'>
|
||||
Points earned: {lastResult.team_points}
|
||||
</div>
|
||||
)}
|
||||
{lastResult.judge_feedback && (
|
||||
<div className='mt-2 text-sm text-text-tertiary'>{lastResult.judge_feedback}</div>
|
||||
)}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div className='lg:col-span-1'>
|
||||
{leaderboard && (
|
||||
<div className='rounded-xl border border-divider-subtle bg-components-panel-bg p-6 shadow-xs'>
|
||||
<h3 className='mb-4 text-lg font-semibold text-text-primary'>Standings</h3>
|
||||
<div className='space-y-3'>
|
||||
<div className='flex items-center justify-between rounded-lg bg-util-colors-red-red-50 p-3'>
|
||||
<span className='font-medium text-util-colors-red-red-700'>
|
||||
{t('challenges.redBlue.redPoints')}
|
||||
</span>
|
||||
<span className='text-xl font-bold text-util-colors-red-red-700'>
|
||||
{leaderboard.red_points || 0}
|
||||
</span>
|
||||
</div>
|
||||
<div className='flex items-center justify-between rounded-lg bg-util-colors-blue-blue-50 p-3'>
|
||||
<span className='font-medium text-util-colors-blue-blue-700'>
|
||||
{t('challenges.redBlue.bluePoints')}
|
||||
</span>
|
||||
<span className='text-xl font-bold text-util-colors-blue-blue-700'>
|
||||
{leaderboard.blue_points || 0}
|
||||
</span>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
23
web/app/red-blue-challenges/page.tsx
Normal file
23
web/app/red-blue-challenges/page.tsx
Normal file
|
|
@ -0,0 +1,23 @@
|
|||
import Link from 'next/link'
|
||||
import { fetchRedBlueChallenges } from '@/service/redBlueChallenges'
|
||||
|
||||
export default async function RedBlueChallengesPage() {
|
||||
const items = await fetchRedBlueChallenges()
|
||||
return (
|
||||
<div className="px-6 py-8">
|
||||
<h1 className="mb-4 text-xl font-semibold">Red / Blue Challenges</h1>
|
||||
<ul className="space-y-2">
|
||||
{items.map(i => (
|
||||
<li key={i.id} className="rounded border p-3">
|
||||
<Link href={`/red-blue-challenges/${i.id}`} className="text-primary hover:underline">
|
||||
{i.name}
|
||||
</Link>
|
||||
{i.description && (
|
||||
<p className="mt-1 text-sm text-gray-500">{i.description}</p>
|
||||
)}
|
||||
</li>
|
||||
))}
|
||||
</ul>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
|
@ -23,6 +23,7 @@ const NAMESPACES = [
|
|||
'app-overview',
|
||||
'app',
|
||||
'billing',
|
||||
'challenges',
|
||||
'common',
|
||||
'custom',
|
||||
'dataset-creation',
|
||||
|
|
|
|||
80
web/i18n/en-US/challenges.ts
Normal file
80
web/i18n/en-US/challenges.ts
Normal file
|
|
@ -0,0 +1,80 @@
|
|||
export default {
|
||||
console: {
|
||||
title: 'Challenges',
|
||||
create: 'Create Challenge',
|
||||
createRedBlue: 'Create Red/Blue Challenge',
|
||||
edit: 'Edit Challenge',
|
||||
empty: 'No challenges yet',
|
||||
emptyDesc: 'Create your first challenge to get started',
|
||||
form: {
|
||||
name: 'Name',
|
||||
namePlaceholder: 'Enter challenge name',
|
||||
description: 'Description',
|
||||
descriptionPlaceholder: 'Describe the challenge',
|
||||
goal: 'Goal',
|
||||
goalPlaceholder: 'What should players achieve?',
|
||||
appId: 'App ID',
|
||||
workflowId: 'Workflow ID',
|
||||
evaluatorType: 'Evaluator Type',
|
||||
successType: 'Success Type',
|
||||
successPattern: 'Success Pattern',
|
||||
scoringStrategy: 'Scoring Strategy',
|
||||
isActive: 'Active',
|
||||
judgeSuite: 'Judge Suite',
|
||||
defensePolicy: 'Defense Selection Policy',
|
||||
attackPolicy: 'Attack Selection Policy',
|
||||
},
|
||||
actions: {
|
||||
activate: 'Activate',
|
||||
deactivate: 'Deactivate',
|
||||
edit: 'Edit',
|
||||
delete: 'Delete',
|
||||
deleteConfirm: 'Are you sure you want to delete this challenge?',
|
||||
},
|
||||
status: {
|
||||
active: 'Active',
|
||||
inactive: 'Inactive',
|
||||
},
|
||||
},
|
||||
player: {
|
||||
title: 'Challenges',
|
||||
browse: 'Browse Challenges',
|
||||
play: 'Play Challenge',
|
||||
submit: 'Submit',
|
||||
tryAgain: 'Try Again',
|
||||
viewLeaderboard: 'View Leaderboard',
|
||||
yourAttempt: 'Your Attempt',
|
||||
goal: 'Goal',
|
||||
status: {
|
||||
success: 'Success!',
|
||||
failed: 'Failed',
|
||||
pending: 'Pending',
|
||||
},
|
||||
},
|
||||
leaderboard: {
|
||||
title: 'Leaderboard',
|
||||
rank: 'Rank',
|
||||
player: 'Player',
|
||||
score: 'Score',
|
||||
time: 'Time',
|
||||
tokens: 'Tokens',
|
||||
rating: 'Rating',
|
||||
empty: 'No attempts yet',
|
||||
yourBest: 'Your Best',
|
||||
},
|
||||
redBlue: {
|
||||
title: 'Red vs Blue',
|
||||
chooseTeam: 'Choose Your Team',
|
||||
red: 'Red Team',
|
||||
blue: 'Blue Team',
|
||||
redDesc: 'Attack: Try to bypass defenses',
|
||||
blueDesc: 'Defense: Prevent attacks',
|
||||
submitAttack: 'Submit Attack',
|
||||
submitDefense: 'Submit Defense',
|
||||
attackPrompt: 'Attack Prompt',
|
||||
defensePrompt: 'Defense Prompt',
|
||||
results: 'Results',
|
||||
redPoints: 'Red Points',
|
||||
bluePoints: 'Blue Points',
|
||||
},
|
||||
}
|
||||
|
|
@ -270,6 +270,9 @@ const translation = {
|
|||
'loop-end': 'Exit Loop',
|
||||
'knowledge-index': 'Knowledge Base',
|
||||
'datasource': 'Data Source',
|
||||
'challenge-evaluator': 'Challenge Evaluator',
|
||||
'judging-llm': 'Judging LLM',
|
||||
'team-challenge': 'Team Challenge',
|
||||
},
|
||||
blocksAbout: {
|
||||
'start': 'Define the initial parameters for launching a workflow',
|
||||
|
|
@ -294,6 +297,9 @@ const translation = {
|
|||
'agent': 'Invoking large language models to answer questions or process natural language',
|
||||
'knowledge-index': 'Knowledge Base About',
|
||||
'datasource': 'Data Source About',
|
||||
'challenge-evaluator': 'Evaluate a model response against success rules or an LLM judge and record attempts.',
|
||||
'judging-llm': 'Judge a response with a rubric using an LLM and output pass/rating/feedback.',
|
||||
'team-challenge': 'Coordinate Red/Blue prompts, run judging, and emit team scores.',
|
||||
},
|
||||
operator: {
|
||||
zoomIn: 'Zoom In',
|
||||
|
|
@ -864,6 +870,69 @@ const translation = {
|
|||
last_record: 'Last record',
|
||||
},
|
||||
},
|
||||
challengeEvaluator: {
|
||||
evaluationMode: 'Evaluation Mode',
|
||||
evaluationModeTip: 'Choose how the response is evaluated: rules, LLM judge, or custom.',
|
||||
successType: 'Success Type',
|
||||
successTypeTip: 'Contains: substring match (case-insensitive). Regex: JavaScript regular expression.',
|
||||
successPattern: 'Success Pattern',
|
||||
successPatternPlaceholder: 'Enter regex or substring',
|
||||
successPatternTip: 'Supports variables. For regex, do not add surrounding slashes.',
|
||||
responseVar: 'Response Variable',
|
||||
responseVarTip: 'Pick the upstream response to evaluate.',
|
||||
selectedChallenge: 'Selected Challenge',
|
||||
selectedChallengeTip: 'Select an existing challenge to use its stored rules and settings.',
|
||||
scoringStrategy: 'Scoring Strategy',
|
||||
scoringStrategyTip: 'How to rank successful attempts on the leaderboard.',
|
||||
scoringFirst: 'First (earliest success)',
|
||||
scoringFastest: 'Fastest (lowest time)',
|
||||
scoringFewestTokens: 'Fewest Tokens',
|
||||
scoringHighestRating: 'Highest Rating',
|
||||
scoringCustom: 'Custom',
|
||||
outputVars: {
|
||||
challengeSucceeded: 'Challenge Succeeded',
|
||||
judgeRating: 'Judge Rating',
|
||||
judgeFeedback: 'Judge Feedback',
|
||||
message: 'Result Message',
|
||||
},
|
||||
},
|
||||
judgingLLM: {
|
||||
rubricTemplate: 'Rubric Template',
|
||||
rubricTemplatePlaceholder: 'Define your evaluation criteria',
|
||||
rubricTemplateTip: 'Template for the judge. Use {goal} and {response} placeholders.',
|
||||
passThreshold: 'Pass Threshold',
|
||||
passThresholdTip: 'Minimum rating (0-10) required to pass.',
|
||||
insertDefaultRubric: 'Insert default rubric',
|
||||
outputVars: {
|
||||
judgePassed: 'Judge Passed',
|
||||
judgeRating: 'Judge Rating',
|
||||
judgeFeedback: 'Judge Feedback',
|
||||
judgeRaw: 'Judge Raw Output',
|
||||
},
|
||||
},
|
||||
teamChallenge: {
|
||||
defenseSelectionPolicy: 'Defense Selection Policy',
|
||||
defenseSelectionPolicyTip: 'Choose how a defense prompt is selected to pair against incoming attacks.',
|
||||
attackSelectionPolicy: 'Attack Selection Policy',
|
||||
attackSelectionPolicyTip: 'Choose how an attack prompt is selected to test your defense.',
|
||||
teamChoiceVar: 'Team Choice Variable',
|
||||
teamChoiceVarTip: 'Variable that yields "red" or "blue" to select the role.',
|
||||
attackPromptVar: 'Attack Prompt Variable',
|
||||
attackPromptVarTip: 'Variable that provides the attacker\'s prompt.',
|
||||
defensePromptVar: 'Defense Prompt Variable',
|
||||
defensePromptVarTip: 'Variable that provides the defender\'s prompt (system prompt).',
|
||||
selectedChallenge: 'Selected Challenge',
|
||||
selectedChallengeTip: 'Pick a Red/Blue challenge definition to orchestrate evaluations.',
|
||||
outputVars: {
|
||||
team: 'Team',
|
||||
judgePassed: 'Judge Passed',
|
||||
judgeRating: 'Judge Rating',
|
||||
judgeFeedback: 'Judge Feedback',
|
||||
categories: 'Categories',
|
||||
teamPoints: 'Team Points',
|
||||
totalPoints: 'Total Points',
|
||||
},
|
||||
},
|
||||
agent: {
|
||||
strategy: {
|
||||
label: 'Agentic Strategy',
|
||||
|
|
|
|||
|
|
@ -19,7 +19,7 @@
|
|||
"and_qq >= 14.9"
|
||||
],
|
||||
"scripts": {
|
||||
"dev": "cross-env NODE_OPTIONS='--inspect' next dev --turbopack",
|
||||
"dev": "cross-env NODE_OPTIONS='' next dev --turbopack",
|
||||
"build": "next build",
|
||||
"build:docker": "next build && node scripts/optimize-standalone.js",
|
||||
"start": "cp -r .next/static .next/standalone/.next/static && cp -r public .next/standalone/public && cross-env PORT=$npm_config_port HOSTNAME=$npm_config_host node .next/standalone/server.js",
|
||||
|
|
|
|||
1
web/run.sh
Normal file
1
web/run.sh
Normal file
|
|
@ -0,0 +1 @@
|
|||
pnpm run dev
|
||||
104
web/service/challenges.ts
Normal file
104
web/service/challenges.ts
Normal file
|
|
@ -0,0 +1,104 @@
|
|||
import { getPublic, postPublic } from './base'
|
||||
import { PUBLIC_API_PREFIX } from '@/config'
|
||||
import { getInitialTokenV2, isTokenV1 } from '@/app/components/share/utils'
|
||||
import { CONVERSATION_ID_INFO } from '@/app/components/base/chat/constants'
|
||||
|
||||
export type ChallengeListItem = {
|
||||
id: string
|
||||
name: string
|
||||
description?: string
|
||||
goal?: string
|
||||
app_id?: string
|
||||
workflow_id?: string
|
||||
app_mode?: string
|
||||
app_site_code?: string
|
||||
}
|
||||
|
||||
export async function fetchChallenges(): Promise<ChallengeListItem[]> {
|
||||
const res = await getPublic<{ result: string; data: ChallengeListItem[] }>('/challenges')
|
||||
return res.data ?? []
|
||||
}
|
||||
|
||||
export async function fetchChallengeDetail(id: string) {
|
||||
const res = await getPublic<{ result: string; data: any }>(`/challenges/${id}`)
|
||||
return res.data
|
||||
}
|
||||
|
||||
export async function fetchChallengeLeaderboard(id: string) {
|
||||
const res = await getPublic<{ result: string; data: any[] }>(`/challenges/${id}/leaderboard`)
|
||||
return res.data ?? []
|
||||
}
|
||||
|
||||
export async function submitChallengeAttempt(
|
||||
challengeId: string,
|
||||
appId: string,
|
||||
appSiteCode: string | undefined,
|
||||
appMode: string,
|
||||
userInput: string,
|
||||
) {
|
||||
if (!appSiteCode)
|
||||
throw new Error('Challenge app is not published. Please enable the app site for this challenge.')
|
||||
|
||||
const passportRes = await fetch(`${PUBLIC_API_PREFIX}/passport`, {
|
||||
method: 'GET',
|
||||
headers: {
|
||||
'X-App-Code': appSiteCode,
|
||||
},
|
||||
credentials: 'include',
|
||||
})
|
||||
|
||||
if (!passportRes.ok) {
|
||||
let message = 'Unable to start challenge. Please try again.'
|
||||
try {
|
||||
const data = await passportRes.json()
|
||||
message = data?.message || message
|
||||
}
|
||||
catch { /* ignore json parse errors */ }
|
||||
throw new Error(message)
|
||||
}
|
||||
|
||||
const passportData = await passportRes.json() as { access_token?: string }
|
||||
const accessToken = passportData?.access_token
|
||||
if (!accessToken)
|
||||
throw new Error('Challenge authorization failed. Please refresh and try again.')
|
||||
|
||||
// Persist token using the same structure expected by getAccessToken(true)
|
||||
const storageKey = 'token'
|
||||
const userKey = 'DEFAULT'
|
||||
const rawTokenStore = localStorage.getItem(storageKey) || JSON.stringify(getInitialTokenV2())
|
||||
let tokenStore: Record<string, any>
|
||||
try {
|
||||
const parsed = JSON.parse(rawTokenStore)
|
||||
tokenStore = isTokenV1(parsed) ? getInitialTokenV2() : parsed
|
||||
}
|
||||
catch {
|
||||
tokenStore = getInitialTokenV2()
|
||||
}
|
||||
|
||||
tokenStore[challengeId] = {
|
||||
...(tokenStore[challengeId] || {}),
|
||||
[userKey]: accessToken,
|
||||
}
|
||||
localStorage.setItem(storageKey, JSON.stringify(tokenStore))
|
||||
localStorage.removeItem(CONVERSATION_ID_INFO)
|
||||
|
||||
if (appMode === 'chat' || appMode === 'advanced-chat' || appMode === 'agent-chat') {
|
||||
return await postPublic<any>('/chat-messages', {
|
||||
body: {
|
||||
query: userInput,
|
||||
inputs: {},
|
||||
response_mode: 'blocking',
|
||||
conversation_id: '',
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
return await postPublic<any>('/workflows/run', {
|
||||
body: {
|
||||
inputs: {
|
||||
user_prompt: userInput,
|
||||
},
|
||||
response_mode: 'blocking',
|
||||
},
|
||||
})
|
||||
}
|
||||
99
web/service/console/challenges.ts
Normal file
99
web/service/console/challenges.ts
Normal file
|
|
@ -0,0 +1,99 @@
|
|||
import { request } from '@/service/base'
|
||||
|
||||
export type ConsoleChallenge = {
|
||||
id: string
|
||||
name: string
|
||||
description?: string
|
||||
goal?: string
|
||||
is_active?: boolean
|
||||
success_type?: string
|
||||
success_pattern?: string
|
||||
scoring_strategy?: string
|
||||
app_id?: string
|
||||
workflow_id?: string
|
||||
}
|
||||
|
||||
export async function listConsoleChallenges() {
|
||||
const resp = await request<{ data: ConsoleChallenge[] }>('/challenges', {}, {})
|
||||
return resp.data
|
||||
}
|
||||
|
||||
export async function createConsoleChallenge(payload: {
|
||||
app_id: string
|
||||
workflow_id?: string
|
||||
name: string
|
||||
description?: string
|
||||
goal?: string
|
||||
success_type?: string
|
||||
success_pattern?: string
|
||||
scoring_strategy?: string
|
||||
is_active?: boolean
|
||||
}) {
|
||||
const resp = await request<{ data: { id: string } }>('/challenges', {
|
||||
method: 'POST',
|
||||
body: payload,
|
||||
}, {})
|
||||
return resp.data
|
||||
}
|
||||
|
||||
export async function updateConsoleChallenge(id: string, payload: Partial<ConsoleChallenge>) {
|
||||
const resp = await request<{ data: ConsoleChallenge }>(`/challenges/${id}`, {
|
||||
method: 'PATCH',
|
||||
body: payload,
|
||||
}, {})
|
||||
return resp.data
|
||||
}
|
||||
|
||||
export async function deleteConsoleChallenge(id: string) {
|
||||
await request(`/challenges/${id}`, {
|
||||
method: 'DELETE',
|
||||
}, {})
|
||||
}
|
||||
|
||||
export type RedBlueChallenge = {
|
||||
id: string
|
||||
name: string
|
||||
description?: string
|
||||
judge_suite?: string[]
|
||||
defense_selection_policy?: string
|
||||
attack_selection_policy?: string
|
||||
scoring_strategy?: string
|
||||
is_active?: boolean
|
||||
}
|
||||
|
||||
export async function listRedBlueChallenges() {
|
||||
const resp = await request<{ data: RedBlueChallenge[] }>('/red-blue-challenges', {}, {})
|
||||
return resp.data
|
||||
}
|
||||
|
||||
export async function createRedBlueChallenge(payload: {
|
||||
app_id: string
|
||||
workflow_id?: string
|
||||
name: string
|
||||
description?: string
|
||||
judge_suite?: string[]
|
||||
defense_selection_policy?: string
|
||||
attack_selection_policy?: string
|
||||
scoring_strategy?: string
|
||||
is_active?: boolean
|
||||
}) {
|
||||
const resp = await request<{ data: { id: string } }>('/red-blue-challenges', {
|
||||
method: 'POST',
|
||||
body: payload,
|
||||
}, {})
|
||||
return resp.data
|
||||
}
|
||||
|
||||
export async function updateRedBlueChallenge(id: string, payload: Partial<RedBlueChallenge>) {
|
||||
const resp = await request<{ data: RedBlueChallenge }>(`/red-blue-challenges/${id}`, {
|
||||
method: 'PATCH',
|
||||
body: payload,
|
||||
}, {})
|
||||
return resp.data
|
||||
}
|
||||
|
||||
export async function deleteRedBlueChallenge(id: string) {
|
||||
await request(`/red-blue-challenges/${id}`, {
|
||||
method: 'DELETE',
|
||||
}, {})
|
||||
}
|
||||
27
web/service/console/redBlueChallenges.ts
Normal file
27
web/service/console/redBlueChallenges.ts
Normal file
|
|
@ -0,0 +1,27 @@
|
|||
import { request } from '@/service/base'
|
||||
|
||||
export type ConsoleRedBlueChallenge = {
|
||||
id: string
|
||||
name: string
|
||||
description?: string
|
||||
is_active?: boolean
|
||||
}
|
||||
|
||||
export async function listConsoleRedBlueChallenges() {
|
||||
const resp = await request<{ data: ConsoleRedBlueChallenge[] }>('/red-blue-challenges', {}, {})
|
||||
return resp.data
|
||||
}
|
||||
|
||||
export async function createConsoleRedBlueChallenge(payload: {
|
||||
tenant_id: string
|
||||
app_id: string
|
||||
name: string
|
||||
description?: string
|
||||
judge_suite: Record<string, any>
|
||||
}) {
|
||||
const resp = await request<{ data: { id: string } }>('/red-blue-challenges', {
|
||||
method: 'POST',
|
||||
body: JSON.stringify(payload),
|
||||
}, {})
|
||||
return resp.data
|
||||
}
|
||||
24
web/service/redBlueChallenges.ts
Normal file
24
web/service/redBlueChallenges.ts
Normal file
|
|
@ -0,0 +1,24 @@
|
|||
import { getPublic, postPublic } from './base'
|
||||
|
||||
export type RedBlueListItem = {
|
||||
id: string
|
||||
name: string
|
||||
description?: string
|
||||
}
|
||||
|
||||
export async function fetchRedBlueChallenges(): Promise<RedBlueListItem[]> {
|
||||
const res = await getPublic<{ result: string; data: RedBlueListItem[] }>('/red-blue-challenges')
|
||||
return res.data ?? []
|
||||
}
|
||||
|
||||
export async function fetchRedBlueLeaderboard(id: string) {
|
||||
const res = await getPublic<{ result: string; data: any }>(`/red-blue-challenges/${id}/leaderboard`)
|
||||
return res.data
|
||||
}
|
||||
|
||||
export async function submitRedBluePrompt(id: string, team: 'red' | 'blue', prompt: string) {
|
||||
const res = await postPublic<{ result: string; data: any }>(`/red-blue-challenges/${id}/submit`, {
|
||||
body: { team, prompt },
|
||||
})
|
||||
return res.data
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue