Add multi-site Caddy helpers and document usage
- add startup/shutdown scripts that render a Caddyfile from JSON config and run health checks - add Python utilities and a sample sites.json for declarative multi-site configuration - document the workflow and ignore generated Caddy state artifacts - normalize double-quote style across challenge workflow controllers, nodes, and tests
This commit is contained in:
parent
a99e716ad2
commit
6038fc25f5
26 changed files with 1018 additions and 168 deletions
|
|
@ -50,8 +50,8 @@ class AnswerNode(Node):
|
|||
existing_outputs = self.graph_runtime_state.outputs
|
||||
merged_outputs: dict[str, Any] = {
|
||||
**existing_outputs,
|
||||
'answer': segments.markdown,
|
||||
'files': ArrayFileSegment(value=files),
|
||||
"answer": segments.markdown,
|
||||
"files": ArrayFileSegment(value=files),
|
||||
}
|
||||
self.graph_runtime_state.outputs = merged_outputs
|
||||
return NodeRunResult(
|
||||
|
|
|
|||
|
|
@ -1,3 +1,3 @@
|
|||
from .node import ChallengeEvaluatorNode
|
||||
|
||||
__all__ = ['ChallengeEvaluatorNode']
|
||||
__all__ = ["ChallengeEvaluatorNode"]
|
||||
|
|
|
|||
|
|
@ -32,19 +32,19 @@ class ChallengeEvaluatorNode(Node):
|
|||
self._config: dict[str, Any] = data
|
||||
|
||||
def _get_error_strategy(self) -> ErrorStrategy | None:
|
||||
return getattr(self._node_data, 'error_strategy', None)
|
||||
return getattr(self._node_data, "error_strategy", None)
|
||||
|
||||
def _get_retry_config(self) -> RetryConfig:
|
||||
return getattr(self._node_data, 'retry_config', RetryConfig())
|
||||
return getattr(self._node_data, "retry_config", RetryConfig())
|
||||
|
||||
def _get_title(self) -> str:
|
||||
return getattr(self._node_data, 'title', 'Challenge Evaluator')
|
||||
return getattr(self._node_data, "title", "Challenge Evaluator")
|
||||
|
||||
def _get_description(self) -> str | None:
|
||||
return getattr(self._node_data, 'desc', None)
|
||||
return getattr(self._node_data, "desc", None)
|
||||
|
||||
def _get_default_value_dict(self) -> dict[str, Any]:
|
||||
return getattr(self._node_data, 'default_value_dict', {})
|
||||
return getattr(self._node_data, "default_value_dict", {})
|
||||
|
||||
def get_base_node_data(self) -> BaseNodeData:
|
||||
return self._node_data
|
||||
|
|
@ -55,16 +55,16 @@ class ChallengeEvaluatorNode(Node):
|
|||
|
||||
def _run(self) -> NodeRunResult:
|
||||
# Resolve response text from selector in config.inputs.response (frontend schema)
|
||||
output_text = ''
|
||||
output_text = ""
|
||||
source_selector = None
|
||||
inputs_cfg = self._config.get('inputs') or {}
|
||||
inputs_cfg = self._config.get("inputs") or {}
|
||||
if isinstance(inputs_cfg, dict):
|
||||
source_selector = inputs_cfg.get('response')
|
||||
source_selector = inputs_cfg.get("response")
|
||||
# fallback to older key if any
|
||||
source_selector = source_selector or self._config.get('value_selector')
|
||||
source_selector = source_selector or self._config.get("value_selector")
|
||||
|
||||
# Check evaluation mode from config
|
||||
evaluation_mode = self._config.get('evaluation_mode', 'rules')
|
||||
evaluation_mode = self._config.get("evaluation_mode", "rules")
|
||||
|
||||
logger.info("ChallengeEvaluator - evaluation_mode: %s, source_selector: %s", evaluation_mode, source_selector)
|
||||
|
||||
|
|
@ -72,8 +72,8 @@ class ChallengeEvaluatorNode(Node):
|
|||
is_judge_input = False
|
||||
judge_passed = False
|
||||
judge_rating = 0
|
||||
judge_feedback_from_input = ''
|
||||
output_text = ''
|
||||
judge_feedback_from_input = ""
|
||||
output_text = ""
|
||||
|
||||
def _segment_to_value(segment: Segment | None) -> Any:
|
||||
if segment is None:
|
||||
|
|
@ -86,13 +86,13 @@ class ChallengeEvaluatorNode(Node):
|
|||
return getattr(segment, "value", segment)
|
||||
|
||||
# If evaluation_mode is 'llm-judge', try to read from upstream Judging LLM node
|
||||
if evaluation_mode == 'llm-judge' and source_selector and len(source_selector) >= 1:
|
||||
if evaluation_mode == "llm-judge" and source_selector and len(source_selector) >= 1:
|
||||
try:
|
||||
node_id = source_selector[0]
|
||||
# Retrieve judge outputs as Segments and convert to primitive values
|
||||
passed_segment = self.graph_runtime_state.variable_pool.get([node_id, 'judge_passed'])
|
||||
rating_segment = self.graph_runtime_state.variable_pool.get([node_id, 'judge_rating'])
|
||||
feedback_segment = self.graph_runtime_state.variable_pool.get([node_id, 'judge_feedback'])
|
||||
passed_segment = self.graph_runtime_state.variable_pool.get([node_id, "judge_passed"])
|
||||
rating_segment = self.graph_runtime_state.variable_pool.get([node_id, "judge_rating"])
|
||||
feedback_segment = self.graph_runtime_state.variable_pool.get([node_id, "judge_feedback"])
|
||||
|
||||
potential_judge_passed = _segment_to_value(passed_segment)
|
||||
potential_judge_rating = _segment_to_value(rating_segment)
|
||||
|
|
@ -110,7 +110,7 @@ class ChallengeEvaluatorNode(Node):
|
|||
is_judge_input = True
|
||||
judge_passed = bool(potential_judge_passed)
|
||||
judge_rating = int(potential_judge_rating or 0)
|
||||
judge_feedback_from_input = str(potential_judge_feedback or '')
|
||||
judge_feedback_from_input = str(potential_judge_feedback or "")
|
||||
logger.info(
|
||||
"ChallengeEvaluator - Judge input successfully read! passed=%s, rating=%s, feedback=%s",
|
||||
judge_passed,
|
||||
|
|
@ -126,28 +126,28 @@ class ChallengeEvaluatorNode(Node):
|
|||
try:
|
||||
segment = self.graph_runtime_state.variable_pool.get(source_selector)
|
||||
if segment is None:
|
||||
output_text = ''
|
||||
elif hasattr(segment, 'text'):
|
||||
output_text = ""
|
||||
elif hasattr(segment, "text"):
|
||||
output_text = segment.text
|
||||
else:
|
||||
output_text = str(_segment_to_value(segment) or '')
|
||||
output_text = str(_segment_to_value(segment) or "")
|
||||
except Exception:
|
||||
output_text = ''
|
||||
output_text = ""
|
||||
|
||||
# Evaluate based on mode
|
||||
if is_judge_input:
|
||||
ok = judge_passed
|
||||
details = {
|
||||
'mode': 'llm-judge',
|
||||
'rating': judge_rating,
|
||||
'feedback': judge_feedback_from_input,
|
||||
"mode": "llm-judge",
|
||||
"rating": judge_rating,
|
||||
"feedback": judge_feedback_from_input,
|
||||
}
|
||||
else:
|
||||
# Rules-based evaluation (only if not using judge input)
|
||||
ok, details = ChallengeService.evaluate_outcome(output_text, self._config)
|
||||
|
||||
# optional persistence if config carries challenge_id
|
||||
challenge_id = self._config.get('challenge_id')
|
||||
challenge_id = self._config.get("challenge_id")
|
||||
judge_feedback = judge_feedback_from_input if is_judge_input else None
|
||||
judge_rating_value = judge_rating if is_judge_input else None
|
||||
if challenge_id:
|
||||
|
|
@ -160,8 +160,8 @@ class ChallengeEvaluatorNode(Node):
|
|||
|
||||
# Extract judge_rating from details if available (for highest_rating strategy)
|
||||
if isinstance(details, dict):
|
||||
judge_rating_raw = details.get('rating')
|
||||
judge_feedback_raw = details.get('feedback')
|
||||
judge_rating_raw = details.get("rating")
|
||||
judge_feedback_raw = details.get("feedback")
|
||||
if judge_rating_raw is not None:
|
||||
judge_rating_value = int(judge_rating_raw)
|
||||
if (
|
||||
|
|
@ -180,23 +180,23 @@ class ChallengeEvaluatorNode(Node):
|
|||
score = None
|
||||
|
||||
# If custom scoring is configured, compute score using plugin
|
||||
if challenge and challenge.scoring_strategy == 'custom':
|
||||
if challenge and challenge.scoring_strategy == "custom":
|
||||
try:
|
||||
metrics = {
|
||||
'succeeded': ok,
|
||||
'tokens_total': tokens_total,
|
||||
'elapsed_ms': elapsed_ms,
|
||||
'rating': judge_rating,
|
||||
'created_at': int(time.time() * 1000),
|
||||
"succeeded": ok,
|
||||
"tokens_total": tokens_total,
|
||||
"elapsed_ms": elapsed_ms,
|
||||
"rating": judge_rating,
|
||||
"created_at": int(time.time() * 1000),
|
||||
}
|
||||
|
||||
ctx = {
|
||||
'tenant_id': self.tenant_id,
|
||||
'app_id': self.app_id,
|
||||
'workflow_id': self.workflow_id,
|
||||
'challenge_id': str(challenge_id),
|
||||
'end_user_id': None,
|
||||
'timeout_ms': 5000,
|
||||
"tenant_id": self.tenant_id,
|
||||
"app_id": self.app_id,
|
||||
"workflow_id": self.workflow_id,
|
||||
"challenge_id": str(challenge_id),
|
||||
"end_user_id": None,
|
||||
"timeout_ms": 5000,
|
||||
}
|
||||
|
||||
result = ChallengeScorerService.score_with_plugin(
|
||||
|
|
@ -207,11 +207,11 @@ class ChallengeEvaluatorNode(Node):
|
|||
ctx=ctx,
|
||||
)
|
||||
|
||||
score = result.get('score')
|
||||
score = result.get("score")
|
||||
logger.info(
|
||||
"Custom scorer computed score: %s (details: %s)",
|
||||
score,
|
||||
result.get('details'),
|
||||
result.get("details"),
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error("Custom scorer failed: %s", e, exc_info=True)
|
||||
|
|
@ -237,34 +237,34 @@ class ChallengeEvaluatorNode(Node):
|
|||
|
||||
# Always provide all output variables to match frontend getOutputVars
|
||||
outputs: dict[str, Any] = {
|
||||
'challenge_succeeded': ok,
|
||||
'judge_rating': judge_rating_value,
|
||||
'judge_feedback': judge_feedback_from_input or judge_feedback or '',
|
||||
'message': '',
|
||||
"challenge_succeeded": ok,
|
||||
"judge_rating": judge_rating_value,
|
||||
"judge_feedback": judge_feedback_from_input or judge_feedback or "",
|
||||
"message": "",
|
||||
}
|
||||
|
||||
# Override with actual values if evaluator provides them
|
||||
if isinstance(details, dict):
|
||||
logger.debug("ChallengeEvaluator - details: %s", details)
|
||||
if 'rating' in details:
|
||||
outputs['judge_rating'] = details.get('rating')
|
||||
if 'feedback' in details:
|
||||
outputs['judge_feedback'] = details.get('feedback')
|
||||
if 'message' in details:
|
||||
outputs['message'] = details.get('message')
|
||||
if "rating" in details:
|
||||
outputs["judge_rating"] = details.get("rating")
|
||||
if "feedback" in details:
|
||||
outputs["judge_feedback"] = details.get("feedback")
|
||||
if "message" in details:
|
||||
outputs["message"] = details.get("message")
|
||||
# If no explicit message, create one from evaluation details
|
||||
if not outputs['message']:
|
||||
if not outputs["message"]:
|
||||
if ok:
|
||||
outputs['message'] = f"Success: {details.get('mode', 'evaluation')} matched"
|
||||
outputs["message"] = f"Success: {details.get('mode', 'evaluation')} matched"
|
||||
else:
|
||||
outputs['message'] = f"Failed: {details.get('mode', 'evaluation')} did not match"
|
||||
outputs["message"] = f"Failed: {details.get('mode', 'evaluation')} did not match"
|
||||
|
||||
# also persist judge outputs onto runtime state so downstream consumers can access them
|
||||
if outputs['judge_feedback']:
|
||||
self.graph_runtime_state.set_output('judge_feedback', outputs['judge_feedback'])
|
||||
if outputs['judge_rating'] is not None:
|
||||
self.graph_runtime_state.set_output('judge_rating', outputs['judge_rating'])
|
||||
self.graph_runtime_state.set_output('challenge_succeeded', outputs['challenge_succeeded'])
|
||||
if outputs["judge_feedback"]:
|
||||
self.graph_runtime_state.set_output("judge_feedback", outputs["judge_feedback"])
|
||||
if outputs["judge_rating"] is not None:
|
||||
self.graph_runtime_state.set_output("judge_rating", outputs["judge_rating"])
|
||||
self.graph_runtime_state.set_output("challenge_succeeded", outputs["challenge_succeeded"])
|
||||
|
||||
return NodeRunResult(
|
||||
status=WorkflowNodeExecutionStatus.SUCCEEDED,
|
||||
|
|
|
|||
|
|
@ -32,19 +32,19 @@ class JudgingLLMNode(Node):
|
|||
self._config: dict[str, Any] = data
|
||||
|
||||
def _get_error_strategy(self) -> ErrorStrategy | None:
|
||||
return getattr(self._node_data, 'error_strategy', None)
|
||||
return getattr(self._node_data, "error_strategy", None)
|
||||
|
||||
def _get_retry_config(self) -> RetryConfig:
|
||||
return getattr(self._node_data, 'retry_config', RetryConfig())
|
||||
return getattr(self._node_data, "retry_config", RetryConfig())
|
||||
|
||||
def _get_title(self) -> str:
|
||||
return getattr(self._node_data, 'title', 'Judging LLM')
|
||||
return getattr(self._node_data, "title", "Judging LLM")
|
||||
|
||||
def _get_description(self) -> str | None:
|
||||
return getattr(self._node_data, 'desc', None)
|
||||
return getattr(self._node_data, "desc", None)
|
||||
|
||||
def _get_default_value_dict(self) -> dict[str, Any]:
|
||||
return getattr(self._node_data, 'default_value_dict', {})
|
||||
return getattr(self._node_data, "default_value_dict", {})
|
||||
|
||||
def get_base_node_data(self) -> BaseNodeData:
|
||||
return self._node_data
|
||||
|
|
@ -55,12 +55,12 @@ class JudgingLLMNode(Node):
|
|||
|
||||
def _run(self) -> NodeRunResult:
|
||||
# Placeholder with FE-compatible keys. Extract inputs for future wiring.
|
||||
inputs_cfg = self._config.get('inputs') or {}
|
||||
inputs_cfg = self._config.get("inputs") or {}
|
||||
goal_selector = None
|
||||
response_selector = None
|
||||
if isinstance(inputs_cfg, dict):
|
||||
goal_selector = inputs_cfg.get('goal')
|
||||
response_selector = inputs_cfg.get('response')
|
||||
goal_selector = inputs_cfg.get("goal")
|
||||
response_selector = inputs_cfg.get("response")
|
||||
|
||||
# Attempt to read variables (not used in placeholder decision)
|
||||
_ = None
|
||||
|
|
@ -73,31 +73,32 @@ class JudgingLLMNode(Node):
|
|||
pass
|
||||
|
||||
outputs = {
|
||||
'judge_passed': False,
|
||||
'judge_rating': 0,
|
||||
'judge_feedback': '',
|
||||
"judge_passed": False,
|
||||
"judge_rating": 0,
|
||||
"judge_feedback": "",
|
||||
}
|
||||
|
||||
# If model config and rubric provided, invoke LLM synchronously to judge
|
||||
judge_model = self._config.get('judge_model') or {}
|
||||
rubric = self._config.get('rubric_prompt_template') or ''
|
||||
provider = (judge_model or {}).get('provider')
|
||||
model_name = (judge_model or {}).get('name')
|
||||
completion_params = (judge_model or {}).get('completion_params') or {}
|
||||
judge_model = self._config.get("judge_model") or {}
|
||||
rubric = self._config.get("rubric_prompt_template") or ""
|
||||
provider = (judge_model or {}).get("provider")
|
||||
model_name = (judge_model or {}).get("name")
|
||||
completion_params = (judge_model or {}).get("completion_params") or {}
|
||||
|
||||
def _segment_to_text(seg: Any) -> str:
|
||||
try:
|
||||
# Many variable types expose .text
|
||||
if hasattr(seg, 'text'):
|
||||
if hasattr(seg, "text"):
|
||||
return str(seg.text)
|
||||
if isinstance(seg, (dict, list)):
|
||||
return json.dumps(seg, ensure_ascii=False)
|
||||
return str(seg)
|
||||
except Exception:
|
||||
return ''
|
||||
return ""
|
||||
|
||||
# Debug: log what we're checking
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
logger.info(
|
||||
"JudgingLLM check - provider: %s, model: %s, rubric_len: %s, response_selector: %s",
|
||||
|
|
@ -117,9 +118,7 @@ class JudgingLLMNode(Node):
|
|||
json_template = '{"passed": boolean, "rating": number (0-10), "feedback": string}'
|
||||
|
||||
prompt_body = (
|
||||
f"Goal:\n{goal_text}\n\n"
|
||||
f"Response:\n{response_text}\n\n"
|
||||
f"Return JSON with rating 0-10: {json_template}"
|
||||
f"Goal:\n{goal_text}\n\nResponse:\n{response_text}\n\nReturn JSON with rating 0-10: {json_template}"
|
||||
)
|
||||
|
||||
prompt_messages = [
|
||||
|
|
@ -141,14 +140,14 @@ class JudgingLLMNode(Node):
|
|||
user=self.user_id,
|
||||
) # type: ignore
|
||||
# Extract text from result
|
||||
text_out = ''
|
||||
content = getattr(result.message, 'content', '')
|
||||
text_out = ""
|
||||
content = getattr(result.message, "content", "")
|
||||
if isinstance(content, str):
|
||||
text_out = content
|
||||
elif isinstance(content, list):
|
||||
for item in content:
|
||||
if getattr(item, 'type', None) == PromptMessageContentType.TEXT:
|
||||
text_out += str(getattr(item, 'data', ''))
|
||||
if getattr(item, "type", None) == PromptMessageContentType.TEXT:
|
||||
text_out += str(getattr(item, "data", ""))
|
||||
else:
|
||||
text_out = str(content)
|
||||
|
||||
|
|
@ -162,22 +161,25 @@ class JudgingLLMNode(Node):
|
|||
verdict = None
|
||||
|
||||
if isinstance(verdict, dict):
|
||||
outputs['judge_passed'] = bool(verdict.get('passed'))
|
||||
outputs['judge_rating'] = int(verdict.get('rating') or 0)
|
||||
outputs['judge_feedback'] = str(verdict.get('feedback') or '')
|
||||
outputs['judge_raw'] = json.dumps(verdict)
|
||||
outputs["judge_passed"] = bool(verdict.get("passed"))
|
||||
outputs["judge_rating"] = int(verdict.get("rating") or 0)
|
||||
outputs["judge_feedback"] = str(verdict.get("feedback") or "")
|
||||
outputs["judge_raw"] = json.dumps(verdict)
|
||||
else:
|
||||
# Fallback to simple rules if configured
|
||||
success_type = self._config.get('success_type')
|
||||
success_pattern = self._config.get('success_pattern')
|
||||
success_type = self._config.get("success_type")
|
||||
success_pattern = self._config.get("success_pattern")
|
||||
if success_type and success_pattern:
|
||||
ok, _ = ChallengeService.evaluate_outcome(response_text, {
|
||||
'success_type': success_type,
|
||||
'success_pattern': success_pattern,
|
||||
})
|
||||
outputs['judge_passed'] = ok
|
||||
outputs['judge_rating'] = 10 if ok else 0
|
||||
outputs['judge_feedback'] = 'passed by rules' if ok else 'failed by rules'
|
||||
ok, _ = ChallengeService.evaluate_outcome(
|
||||
response_text,
|
||||
{
|
||||
"success_type": success_type,
|
||||
"success_pattern": success_pattern,
|
||||
},
|
||||
)
|
||||
outputs["judge_passed"] = ok
|
||||
outputs["judge_rating"] = 10 if ok else 0
|
||||
outputs["judge_feedback"] = "passed by rules" if ok else "failed by rules"
|
||||
except Exception as e:
|
||||
# keep default outputs on error
|
||||
logger.error("JudgingLLM error: %s", e, exc_info=True)
|
||||
|
|
@ -185,4 +187,3 @@ class JudgingLLMNode(Node):
|
|||
else:
|
||||
logger.warning("JudgingLLM skipped - missing required fields")
|
||||
return NodeRunResult(status=WorkflowNodeExecutionStatus.SUCCEEDED, outputs=outputs)
|
||||
|
||||
|
|
|
|||
|
|
@ -20,19 +20,19 @@ class TeamChallengeNode(Node):
|
|||
self._config: dict[str, Any] = data
|
||||
|
||||
def _get_error_strategy(self) -> ErrorStrategy | None:
|
||||
return getattr(self._node_data, 'error_strategy', None)
|
||||
return getattr(self._node_data, "error_strategy", None)
|
||||
|
||||
def _get_retry_config(self) -> RetryConfig:
|
||||
return getattr(self._node_data, 'retry_config', RetryConfig())
|
||||
return getattr(self._node_data, "retry_config", RetryConfig())
|
||||
|
||||
def _get_title(self) -> str:
|
||||
return getattr(self._node_data, 'title', 'Team Challenge')
|
||||
return getattr(self._node_data, "title", "Team Challenge")
|
||||
|
||||
def _get_description(self) -> str | None:
|
||||
return getattr(self._node_data, 'desc', None)
|
||||
return getattr(self._node_data, "desc", None)
|
||||
|
||||
def _get_default_value_dict(self) -> dict[str, Any]:
|
||||
return getattr(self._node_data, 'default_value_dict', {})
|
||||
return getattr(self._node_data, "default_value_dict", {})
|
||||
|
||||
def get_base_node_data(self) -> BaseNodeData:
|
||||
return self._node_data
|
||||
|
|
@ -43,26 +43,24 @@ class TeamChallengeNode(Node):
|
|||
|
||||
def _run(self) -> NodeRunResult:
|
||||
# Read inputs.team_choice for consistency with FE
|
||||
inputs_cfg = self._config.get('inputs') or {}
|
||||
team_choice = ''
|
||||
inputs_cfg = self._config.get("inputs") or {}
|
||||
team_choice = ""
|
||||
if isinstance(inputs_cfg, dict):
|
||||
team_choice_selector = inputs_cfg.get('team_choice')
|
||||
team_choice_selector = inputs_cfg.get("team_choice")
|
||||
if team_choice_selector:
|
||||
try:
|
||||
v = self.graph_runtime_state.variable_pool.get_value_by_selector(team_choice_selector)
|
||||
team_choice = str(v or '')
|
||||
team_choice = str(v or "")
|
||||
except Exception:
|
||||
team_choice = ''
|
||||
team_choice = ""
|
||||
|
||||
outputs = {
|
||||
'team': team_choice,
|
||||
'judge_passed': False,
|
||||
'judge_rating': 0,
|
||||
'judge_feedback': '',
|
||||
'categories': {},
|
||||
'team_points': 0.0,
|
||||
'total_points': 0.0,
|
||||
"team": team_choice,
|
||||
"judge_passed": False,
|
||||
"judge_rating": 0,
|
||||
"judge_feedback": "",
|
||||
"categories": {},
|
||||
"team_points": 0.0,
|
||||
"total_points": 0.0,
|
||||
}
|
||||
return NodeRunResult(status=WorkflowNodeExecutionStatus.SUCCEEDED, outputs=outputs)
|
||||
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue