diff --git a/api/core/workflow/nodes/answer/answer_node.py b/api/core/workflow/nodes/answer/answer_node.py index 86174c7ea..e9df26006 100644 --- a/api/core/workflow/nodes/answer/answer_node.py +++ b/api/core/workflow/nodes/answer/answer_node.py @@ -45,9 +45,18 @@ class AnswerNode(Node): def _run(self) -> NodeRunResult: segments = self.graph_runtime_state.variable_pool.convert_template(self._node_data.answer) files = self._extract_files_from_segments(segments.value) + # Merge answer outputs into existing workflow outputs so upstream metadata (e.g. judge feedback) + # is preserved and included in the final workflow_finished payload. + existing_outputs = self.graph_runtime_state.outputs + merged_outputs: dict[str, Any] = { + **existing_outputs, + 'answer': segments.markdown, + 'files': ArrayFileSegment(value=files), + } + self.graph_runtime_state.outputs = merged_outputs return NodeRunResult( status=WorkflowNodeExecutionStatus.SUCCEEDED, - outputs={"answer": segments.markdown, "files": ArrayFileSegment(value=files)}, + outputs=merged_outputs, ) def _extract_files_from_segments(self, segments: Sequence[Segment]): diff --git a/api/core/workflow/nodes/challenge_evaluator/node.py b/api/core/workflow/nodes/challenge_evaluator/node.py index e06d856ee..c0062b902 100644 --- a/api/core/workflow/nodes/challenge_evaluator/node.py +++ b/api/core/workflow/nodes/challenge_evaluator/node.py @@ -148,6 +148,8 @@ class ChallengeEvaluatorNode(Node): # optional persistence if config carries challenge_id challenge_id = self._config.get('challenge_id') + judge_feedback = judge_feedback_from_input if is_judge_input else None + judge_rating_value = judge_rating if is_judge_input else None if challenge_id: try: # Calculate elapsed time in milliseconds @@ -157,11 +159,17 @@ class ChallengeEvaluatorNode(Node): tokens_total = self.graph_runtime_state.total_tokens # Extract judge_rating from details if available (for highest_rating strategy) - judge_rating = None - judge_feedback = None if isinstance(details, dict): - judge_rating = details.get('rating') - judge_feedback = details.get('feedback') + judge_rating_raw = details.get('rating') + judge_feedback_raw = details.get('feedback') + if judge_rating_raw is not None: + judge_rating_value = int(judge_rating_raw) + if ( + judge_feedback_raw is not None + and isinstance(judge_feedback_raw, str) + and judge_feedback_raw.strip() + ): + judge_feedback = str(judge_feedback_raw) # Load challenge to check scoring strategy challenge = db.session.get(Challenge, str(challenge_id)) @@ -209,7 +217,7 @@ class ChallengeEvaluatorNode(Node): logger.error("Custom scorer failed: %s", e, exc_info=True) # Continue with score=None on error - ChallengeService.record_attempt( + _ = ChallengeService.record_attempt( tenant_id=self.tenant_id, challenge_id=challenge_id, end_user_id=None, @@ -217,8 +225,8 @@ class ChallengeEvaluatorNode(Node): workflow_run_id=None, succeeded=ok, score=score, - judge_rating=judge_rating, - judge_feedback=judge_feedback, + judge_rating=judge_rating_value, + judge_feedback=judge_feedback_from_input or judge_feedback, tokens_total=tokens_total, elapsed_ms=elapsed_ms, session=db.session, @@ -230,8 +238,8 @@ class ChallengeEvaluatorNode(Node): # Always provide all output variables to match frontend getOutputVars outputs: dict[str, Any] = { 'challenge_succeeded': ok, - 'judge_rating': 0, - 'judge_feedback': '', + 'judge_rating': judge_rating_value, + 'judge_feedback': judge_feedback_from_input or judge_feedback or '', 'message': '', } @@ -251,8 +259,14 @@ class ChallengeEvaluatorNode(Node): else: outputs['message'] = f"Failed: {details.get('mode', 'evaluation')} did not match" + # also persist judge outputs onto runtime state so downstream consumers can access them + if outputs['judge_feedback']: + self.graph_runtime_state.set_output('judge_feedback', outputs['judge_feedback']) + if outputs['judge_rating'] is not None: + self.graph_runtime_state.set_output('judge_rating', outputs['judge_rating']) + self.graph_runtime_state.set_output('challenge_succeeded', outputs['challenge_succeeded']) + return NodeRunResult( status=WorkflowNodeExecutionStatus.SUCCEEDED, outputs=outputs, ) - diff --git a/web/app/challenges/[id]/page.tsx b/web/app/challenges/[id]/page.tsx index 538bf5ed5..70e5d9974 100644 --- a/web/app/challenges/[id]/page.tsx +++ b/web/app/challenges/[id]/page.tsx @@ -54,6 +54,14 @@ export default function ChallengeDetailPage() { setHasStreamingResult(false) }, []) + useEffect(() => { + const previousOverflow = document.body.style.overflowY + document.body.style.overflowY = 'auto' + return () => { + document.body.style.overflowY = previousOverflow + } + }, []) + useEffect(() => () => { stopStreaming() }, [stopStreaming]) @@ -80,6 +88,7 @@ export default function ChallengeDetailPage() { challenge.app_site_code, challenge.app_mode || 'workflow', userInput, + challenge.goal, { onStreamUpdate: (text) => { setStreamingText(text) @@ -99,9 +108,24 @@ export default function ChallengeDetailPage() { setHasStreamingResult(false) setStreamingText(result.rawText) + const judgeFeedback = typeof result.outputs?.judge_feedback === 'string' && result.outputs.judge_feedback.trim().length > 0 + ? result.outputs.judge_feedback + : (typeof result.message === 'string' && result.message.trim().length > 0 ? result.message : undefined) + const fallbackExplanation = typeof result.outputs?.message === 'string' && result.outputs.message.trim().length > 0 + ? result.outputs.message + : '' + const successFallback = t('challenges.player.defaultSuccessMessage', 'Challenge passed!') + const failureFallback = t('challenges.player.defaultFailureMessage', 'Challenge not passed.') + const judgeFeedbackLine = judgeFeedback + ? t('challenges.player.judgeFeedbackLine', { feedback: judgeFeedback, defaultValue: `${judgeFeedback}` }) + : '' + const combinedMessage = result.success + ? [judgeFeedback || fallbackExplanation || successFallback].filter(Boolean).join('\n') + : [judgeFeedbackLine || fallbackExplanation || failureFallback].filter(Boolean).join('\n') + setLastResult({ success: result.success, - message: result.message, + message: combinedMessage, rating: result.rating, }) @@ -143,7 +167,7 @@ export default function ChallengeDetailPage() { } return ( -