feat(web): surface thinking traces during challenge submissions

This commit is contained in:
Joey Yakimowich-Payne 2025-10-01 22:16:46 -06:00
commit 6af3ff618c
No known key found for this signature in database
GPG key ID: 6BFE655FA5ABD1E1
2 changed files with 59 additions and 9 deletions

View file

@ -20,8 +20,9 @@ export default function ChallengeDetailPage() {
const [loading, setLoading] = useState(true) const [loading, setLoading] = useState(true)
const [submitting, setSubmitting] = useState(false) const [submitting, setSubmitting] = useState(false)
const [userInput, setUserInput] = useState('') const [userInput, setUserInput] = useState('')
const [lastResult, setLastResult] = useState<{ success: boolean; message?: string; rating?: number } | null>(null) const [lastResult, setLastResult] = useState<{ success: boolean; message?: string; rating?: number; thinking?: string } | null>(null)
const [streamingText, setStreamingText] = useState('') const [streamingText, setStreamingText] = useState('')
const [streamingThinking, setStreamingThinking] = useState('')
const [hasStreamingResult, setHasStreamingResult] = useState(false) const [hasStreamingResult, setHasStreamingResult] = useState(false)
const abortControllerRef = useRef<AbortController | null>(null) const abortControllerRef = useRef<AbortController | null>(null)
@ -66,6 +67,30 @@ export default function ChallengeDetailPage() {
stopStreaming() stopStreaming()
}, [stopStreaming]) }, [stopStreaming])
const parseModelOutput = useCallback((text: string) => {
if (!text)
return { thinking: '', response: '' }
const thinkStart = text.toLowerCase().indexOf('<think>')
if (thinkStart === -1)
return { thinking: '', response: text.trim() }
const beforeThink = text.slice(0, thinkStart)
const afterThinkStart = text.slice(thinkStart + 7) // length of '<think>'
const thinkEndRelative = afterThinkStart.toLowerCase().indexOf('</think>')
if (thinkEndRelative === -1) {
const thinking = afterThinkStart.trim()
const response = beforeThink.trim()
return { thinking, response }
}
const thinking = afterThinkStart.slice(0, thinkEndRelative).trim()
const afterThink = afterThinkStart.slice(thinkEndRelative + 8) // length of '</think>'
const response = `${beforeThink}${afterThink}`.trim()
return { thinking, response }
}, [])
const handleSubmit = async () => { const handleSubmit = async () => {
if (!userInput.trim()) { if (!userInput.trim()) {
Toast.notify({ type: 'error', message: 'Please enter a response' }) Toast.notify({ type: 'error', message: 'Please enter a response' })
@ -80,6 +105,7 @@ export default function ChallengeDetailPage() {
setSubmitting(true) setSubmitting(true)
setLastResult(null) setLastResult(null)
setStreamingText('') setStreamingText('')
setStreamingThinking('')
setHasStreamingResult(false) setHasStreamingResult(false)
try { try {
const result = await submitChallengeAttempt( const result = await submitChallengeAttempt(
@ -91,7 +117,9 @@ export default function ChallengeDetailPage() {
challenge.goal, challenge.goal,
{ {
onStreamUpdate: (text) => { onStreamUpdate: (text) => {
setStreamingText(text) const { thinking, response } = parseModelOutput(text)
setStreamingThinking(thinking)
setStreamingText(response)
setHasStreamingResult(true) setHasStreamingResult(true)
}, },
onAbortController: (controller) => { onAbortController: (controller) => {
@ -106,7 +134,9 @@ export default function ChallengeDetailPage() {
) )
setHasStreamingResult(false) setHasStreamingResult(false)
setStreamingText(result.rawText) const { thinking: finalThinking, response: finalResponse } = parseModelOutput(result.rawText)
setStreamingThinking(finalThinking)
setStreamingText(finalResponse)
const judgeFeedback = typeof result.outputs?.judge_feedback === 'string' && result.outputs.judge_feedback.trim().length > 0 const judgeFeedback = typeof result.outputs?.judge_feedback === 'string' && result.outputs.judge_feedback.trim().length > 0
? result.outputs.judge_feedback ? result.outputs.judge_feedback
@ -119,14 +149,16 @@ export default function ChallengeDetailPage() {
const judgeFeedbackLine = judgeFeedback const judgeFeedbackLine = judgeFeedback
? t('challenges.player.judgeFeedbackLine', { feedback: judgeFeedback, defaultValue: `${judgeFeedback}` }) ? t('challenges.player.judgeFeedbackLine', { feedback: judgeFeedback, defaultValue: `${judgeFeedback}` })
: '' : ''
const { thinking, response } = parseModelOutput(judgeFeedback || fallbackExplanation)
const combinedMessage = result.success const combinedMessage = result.success
? [judgeFeedback || fallbackExplanation || successFallback].filter(Boolean).join('\n') ? [response || successFallback].filter(Boolean).join('\n')
: [judgeFeedbackLine || fallbackExplanation || failureFallback].filter(Boolean).join('\n') : [judgeFeedbackLine || response || failureFallback].filter(Boolean).join('\n')
setLastResult({ setLastResult({
success: result.success, success: result.success,
message: combinedMessage, message: combinedMessage,
rating: result.rating, rating: result.rating,
thinking,
}) })
if (result.success) { if (result.success) {
@ -140,6 +172,7 @@ export default function ChallengeDetailPage() {
console.error('Submission error:', e) console.error('Submission error:', e)
setHasStreamingResult(false) setHasStreamingResult(false)
setStreamingText('') setStreamingText('')
setStreamingThinking('')
if (e?.name === 'AbortError') if (e?.name === 'AbortError')
return return
if (!e?.__handled) if (!e?.__handled)
@ -225,8 +258,16 @@ export default function ChallengeDetailPage() {
<RiLoader4Line className='h-4 w-4 animate-spin text-text-tertiary' /> <RiLoader4Line className='h-4 w-4 animate-spin text-text-tertiary' />
)} )}
</div> </div>
<div className='mt-2 whitespace-pre-wrap text-sm text-text-primary'> {streamingThinking && (
{streamingText || t('challenges.player.awaitingResponse')} <div className='mt-3 space-y-2 rounded-md border border-divider-subtle bg-components-panel-bg p-3'>
<div className='text-xs font-medium uppercase tracking-wide text-text-tertiary'>
{t('challenges.player.modelThinking', 'Thinking')}
</div>
<div className='whitespace-pre-wrap text-sm text-text-secondary'>{streamingThinking}</div>
</div>
)}
<div className='mt-3 whitespace-pre-wrap text-sm text-text-primary'>
{streamingText || (!streamingThinking && t('challenges.player.awaitingResponse'))}
</div> </div>
</div> </div>
)} )}
@ -243,8 +284,16 @@ export default function ChallengeDetailPage() {
<div className={`mb-1 font-medium ${lastResult.success ? 'text-util-colors-green-green-700' : 'text-util-colors-orange-orange-700'}`}> <div className={`mb-1 font-medium ${lastResult.success ? 'text-util-colors-green-green-700' : 'text-util-colors-orange-orange-700'}`}>
{lastResult.success ? t('challenges.player.status.success') : t('challenges.player.status.failed')} {lastResult.success ? t('challenges.player.status.success') : t('challenges.player.status.failed')}
</div> </div>
{lastResult.thinking && (
<div className='bg-components-panel-bg/60 mt-3 space-y-2 rounded-md border border-divider-subtle p-3'>
<div className='text-xs font-medium uppercase tracking-wide text-text-tertiary'>
{t('challenges.player.modelThinking', 'Thinking')}
</div>
<div className='whitespace-pre-wrap text-sm text-text-secondary'>{lastResult.thinking}</div>
</div>
)}
{lastResult.message && ( {lastResult.message && (
<div className='whitespace-pre-wrap text-sm text-text-secondary'>{lastResult.message}</div> <div className='mt-3 whitespace-pre-wrap text-sm text-text-secondary'>{lastResult.message}</div>
)} )}
{lastResult.rating !== undefined && ( {lastResult.rating !== undefined && (
<div className='mt-2 text-sm text-text-tertiary'> <div className='mt-2 text-sm text-text-tertiary'>

View file

@ -57,7 +57,8 @@ export default {
defaultSuccessMessage: 'Challenge passed!', defaultSuccessMessage: 'Challenge passed!',
defaultFailureMessage: 'Challenge not passed.', defaultFailureMessage: 'Challenge not passed.',
ratingLine: 'Judge rating: {{rating}}/10', ratingLine: 'Judge rating: {{rating}}/10',
judgeFeedbackLine: 'Judge feedback: {{feedback}}', judgeFeedbackLine: '{{feedback}}',
modelThinking: 'Thinking',
}, },
leaderboard: { leaderboard: {
title: 'Leaderboard', title: 'Leaderboard',