diff --git a/web/app/challenges/[id]/page.tsx b/web/app/challenges/[id]/page.tsx index 70e5d9974..abf9ce2cf 100644 --- a/web/app/challenges/[id]/page.tsx +++ b/web/app/challenges/[id]/page.tsx @@ -20,8 +20,9 @@ export default function ChallengeDetailPage() { const [loading, setLoading] = useState(true) const [submitting, setSubmitting] = useState(false) const [userInput, setUserInput] = useState('') - const [lastResult, setLastResult] = useState<{ success: boolean; message?: string; rating?: number } | null>(null) + const [lastResult, setLastResult] = useState<{ success: boolean; message?: string; rating?: number; thinking?: string } | null>(null) const [streamingText, setStreamingText] = useState('') + const [streamingThinking, setStreamingThinking] = useState('') const [hasStreamingResult, setHasStreamingResult] = useState(false) const abortControllerRef = useRef(null) @@ -66,6 +67,30 @@ export default function ChallengeDetailPage() { stopStreaming() }, [stopStreaming]) + const parseModelOutput = useCallback((text: string) => { + if (!text) + return { thinking: '', response: '' } + + const thinkStart = text.toLowerCase().indexOf('') + if (thinkStart === -1) + return { thinking: '', response: text.trim() } + + const beforeThink = text.slice(0, thinkStart) + const afterThinkStart = text.slice(thinkStart + 7) // length of '' + const thinkEndRelative = afterThinkStart.toLowerCase().indexOf('') + + if (thinkEndRelative === -1) { + const thinking = afterThinkStart.trim() + const response = beforeThink.trim() + return { thinking, response } + } + + const thinking = afterThinkStart.slice(0, thinkEndRelative).trim() + const afterThink = afterThinkStart.slice(thinkEndRelative + 8) // length of '' + const response = `${beforeThink}${afterThink}`.trim() + return { thinking, response } + }, []) + const handleSubmit = async () => { if (!userInput.trim()) { Toast.notify({ type: 'error', message: 'Please enter a response' }) @@ -80,6 +105,7 @@ export default function ChallengeDetailPage() { setSubmitting(true) setLastResult(null) setStreamingText('') + setStreamingThinking('') setHasStreamingResult(false) try { const result = await submitChallengeAttempt( @@ -91,7 +117,9 @@ export default function ChallengeDetailPage() { challenge.goal, { onStreamUpdate: (text) => { - setStreamingText(text) + const { thinking, response } = parseModelOutput(text) + setStreamingThinking(thinking) + setStreamingText(response) setHasStreamingResult(true) }, onAbortController: (controller) => { @@ -106,7 +134,9 @@ export default function ChallengeDetailPage() { ) setHasStreamingResult(false) - setStreamingText(result.rawText) + const { thinking: finalThinking, response: finalResponse } = parseModelOutput(result.rawText) + setStreamingThinking(finalThinking) + setStreamingText(finalResponse) const judgeFeedback = typeof result.outputs?.judge_feedback === 'string' && result.outputs.judge_feedback.trim().length > 0 ? result.outputs.judge_feedback @@ -119,14 +149,16 @@ export default function ChallengeDetailPage() { const judgeFeedbackLine = judgeFeedback ? t('challenges.player.judgeFeedbackLine', { feedback: judgeFeedback, defaultValue: `${judgeFeedback}` }) : '' + const { thinking, response } = parseModelOutput(judgeFeedback || fallbackExplanation) const combinedMessage = result.success - ? [judgeFeedback || fallbackExplanation || successFallback].filter(Boolean).join('\n') - : [judgeFeedbackLine || fallbackExplanation || failureFallback].filter(Boolean).join('\n') + ? [response || successFallback].filter(Boolean).join('\n') + : [judgeFeedbackLine || response || failureFallback].filter(Boolean).join('\n') setLastResult({ success: result.success, message: combinedMessage, rating: result.rating, + thinking, }) if (result.success) { @@ -140,6 +172,7 @@ export default function ChallengeDetailPage() { console.error('Submission error:', e) setHasStreamingResult(false) setStreamingText('') + setStreamingThinking('') if (e?.name === 'AbortError') return if (!e?.__handled) @@ -225,8 +258,16 @@ export default function ChallengeDetailPage() { )} -
- {streamingText || t('challenges.player.awaitingResponse')} + {streamingThinking && ( +
+
+ {t('challenges.player.modelThinking', 'Thinking')} +
+
{streamingThinking}
+
+ )} +
+ {streamingText || (!streamingThinking && t('challenges.player.awaitingResponse'))}
)} @@ -243,8 +284,16 @@ export default function ChallengeDetailPage() {
{lastResult.success ? t('challenges.player.status.success') : t('challenges.player.status.failed')}
+ {lastResult.thinking && ( +
+
+ {t('challenges.player.modelThinking', 'Thinking')} +
+
{lastResult.thinking}
+
+ )} {lastResult.message && ( -
{lastResult.message}
+
{lastResult.message}
)} {lastResult.rating !== undefined && (
diff --git a/web/i18n/en-US/challenges.ts b/web/i18n/en-US/challenges.ts index 2f64ff6de..f4bd95c49 100644 --- a/web/i18n/en-US/challenges.ts +++ b/web/i18n/en-US/challenges.ts @@ -57,7 +57,8 @@ export default { defaultSuccessMessage: 'Challenge passed!', defaultFailureMessage: 'Challenge not passed.', ratingLine: 'Judge rating: {{rating}}/10', - judgeFeedbackLine: 'Judge feedback: {{feedback}}', + judgeFeedbackLine: '{{feedback}}', + modelThinking: 'Thinking', }, leaderboard: { title: 'Leaderboard',