feat(web): surface thinking traces during challenge submissions
This commit is contained in:
parent
96634e7cec
commit
6af3ff618c
2 changed files with 59 additions and 9 deletions
|
|
@ -20,8 +20,9 @@ export default function ChallengeDetailPage() {
|
||||||
const [loading, setLoading] = useState(true)
|
const [loading, setLoading] = useState(true)
|
||||||
const [submitting, setSubmitting] = useState(false)
|
const [submitting, setSubmitting] = useState(false)
|
||||||
const [userInput, setUserInput] = useState('')
|
const [userInput, setUserInput] = useState('')
|
||||||
const [lastResult, setLastResult] = useState<{ success: boolean; message?: string; rating?: number } | null>(null)
|
const [lastResult, setLastResult] = useState<{ success: boolean; message?: string; rating?: number; thinking?: string } | null>(null)
|
||||||
const [streamingText, setStreamingText] = useState('')
|
const [streamingText, setStreamingText] = useState('')
|
||||||
|
const [streamingThinking, setStreamingThinking] = useState('')
|
||||||
const [hasStreamingResult, setHasStreamingResult] = useState(false)
|
const [hasStreamingResult, setHasStreamingResult] = useState(false)
|
||||||
const abortControllerRef = useRef<AbortController | null>(null)
|
const abortControllerRef = useRef<AbortController | null>(null)
|
||||||
|
|
||||||
|
|
@ -66,6 +67,30 @@ export default function ChallengeDetailPage() {
|
||||||
stopStreaming()
|
stopStreaming()
|
||||||
}, [stopStreaming])
|
}, [stopStreaming])
|
||||||
|
|
||||||
|
const parseModelOutput = useCallback((text: string) => {
|
||||||
|
if (!text)
|
||||||
|
return { thinking: '', response: '' }
|
||||||
|
|
||||||
|
const thinkStart = text.toLowerCase().indexOf('<think>')
|
||||||
|
if (thinkStart === -1)
|
||||||
|
return { thinking: '', response: text.trim() }
|
||||||
|
|
||||||
|
const beforeThink = text.slice(0, thinkStart)
|
||||||
|
const afterThinkStart = text.slice(thinkStart + 7) // length of '<think>'
|
||||||
|
const thinkEndRelative = afterThinkStart.toLowerCase().indexOf('</think>')
|
||||||
|
|
||||||
|
if (thinkEndRelative === -1) {
|
||||||
|
const thinking = afterThinkStart.trim()
|
||||||
|
const response = beforeThink.trim()
|
||||||
|
return { thinking, response }
|
||||||
|
}
|
||||||
|
|
||||||
|
const thinking = afterThinkStart.slice(0, thinkEndRelative).trim()
|
||||||
|
const afterThink = afterThinkStart.slice(thinkEndRelative + 8) // length of '</think>'
|
||||||
|
const response = `${beforeThink}${afterThink}`.trim()
|
||||||
|
return { thinking, response }
|
||||||
|
}, [])
|
||||||
|
|
||||||
const handleSubmit = async () => {
|
const handleSubmit = async () => {
|
||||||
if (!userInput.trim()) {
|
if (!userInput.trim()) {
|
||||||
Toast.notify({ type: 'error', message: 'Please enter a response' })
|
Toast.notify({ type: 'error', message: 'Please enter a response' })
|
||||||
|
|
@ -80,6 +105,7 @@ export default function ChallengeDetailPage() {
|
||||||
setSubmitting(true)
|
setSubmitting(true)
|
||||||
setLastResult(null)
|
setLastResult(null)
|
||||||
setStreamingText('')
|
setStreamingText('')
|
||||||
|
setStreamingThinking('')
|
||||||
setHasStreamingResult(false)
|
setHasStreamingResult(false)
|
||||||
try {
|
try {
|
||||||
const result = await submitChallengeAttempt(
|
const result = await submitChallengeAttempt(
|
||||||
|
|
@ -91,7 +117,9 @@ export default function ChallengeDetailPage() {
|
||||||
challenge.goal,
|
challenge.goal,
|
||||||
{
|
{
|
||||||
onStreamUpdate: (text) => {
|
onStreamUpdate: (text) => {
|
||||||
setStreamingText(text)
|
const { thinking, response } = parseModelOutput(text)
|
||||||
|
setStreamingThinking(thinking)
|
||||||
|
setStreamingText(response)
|
||||||
setHasStreamingResult(true)
|
setHasStreamingResult(true)
|
||||||
},
|
},
|
||||||
onAbortController: (controller) => {
|
onAbortController: (controller) => {
|
||||||
|
|
@ -106,7 +134,9 @@ export default function ChallengeDetailPage() {
|
||||||
)
|
)
|
||||||
|
|
||||||
setHasStreamingResult(false)
|
setHasStreamingResult(false)
|
||||||
setStreamingText(result.rawText)
|
const { thinking: finalThinking, response: finalResponse } = parseModelOutput(result.rawText)
|
||||||
|
setStreamingThinking(finalThinking)
|
||||||
|
setStreamingText(finalResponse)
|
||||||
|
|
||||||
const judgeFeedback = typeof result.outputs?.judge_feedback === 'string' && result.outputs.judge_feedback.trim().length > 0
|
const judgeFeedback = typeof result.outputs?.judge_feedback === 'string' && result.outputs.judge_feedback.trim().length > 0
|
||||||
? result.outputs.judge_feedback
|
? result.outputs.judge_feedback
|
||||||
|
|
@ -119,14 +149,16 @@ export default function ChallengeDetailPage() {
|
||||||
const judgeFeedbackLine = judgeFeedback
|
const judgeFeedbackLine = judgeFeedback
|
||||||
? t('challenges.player.judgeFeedbackLine', { feedback: judgeFeedback, defaultValue: `${judgeFeedback}` })
|
? t('challenges.player.judgeFeedbackLine', { feedback: judgeFeedback, defaultValue: `${judgeFeedback}` })
|
||||||
: ''
|
: ''
|
||||||
|
const { thinking, response } = parseModelOutput(judgeFeedback || fallbackExplanation)
|
||||||
const combinedMessage = result.success
|
const combinedMessage = result.success
|
||||||
? [judgeFeedback || fallbackExplanation || successFallback].filter(Boolean).join('\n')
|
? [response || successFallback].filter(Boolean).join('\n')
|
||||||
: [judgeFeedbackLine || fallbackExplanation || failureFallback].filter(Boolean).join('\n')
|
: [judgeFeedbackLine || response || failureFallback].filter(Boolean).join('\n')
|
||||||
|
|
||||||
setLastResult({
|
setLastResult({
|
||||||
success: result.success,
|
success: result.success,
|
||||||
message: combinedMessage,
|
message: combinedMessage,
|
||||||
rating: result.rating,
|
rating: result.rating,
|
||||||
|
thinking,
|
||||||
})
|
})
|
||||||
|
|
||||||
if (result.success) {
|
if (result.success) {
|
||||||
|
|
@ -140,6 +172,7 @@ export default function ChallengeDetailPage() {
|
||||||
console.error('Submission error:', e)
|
console.error('Submission error:', e)
|
||||||
setHasStreamingResult(false)
|
setHasStreamingResult(false)
|
||||||
setStreamingText('')
|
setStreamingText('')
|
||||||
|
setStreamingThinking('')
|
||||||
if (e?.name === 'AbortError')
|
if (e?.name === 'AbortError')
|
||||||
return
|
return
|
||||||
if (!e?.__handled)
|
if (!e?.__handled)
|
||||||
|
|
@ -225,8 +258,16 @@ export default function ChallengeDetailPage() {
|
||||||
<RiLoader4Line className='h-4 w-4 animate-spin text-text-tertiary' />
|
<RiLoader4Line className='h-4 w-4 animate-spin text-text-tertiary' />
|
||||||
)}
|
)}
|
||||||
</div>
|
</div>
|
||||||
<div className='mt-2 whitespace-pre-wrap text-sm text-text-primary'>
|
{streamingThinking && (
|
||||||
{streamingText || t('challenges.player.awaitingResponse')}
|
<div className='mt-3 space-y-2 rounded-md border border-divider-subtle bg-components-panel-bg p-3'>
|
||||||
|
<div className='text-xs font-medium uppercase tracking-wide text-text-tertiary'>
|
||||||
|
{t('challenges.player.modelThinking', 'Thinking')}
|
||||||
|
</div>
|
||||||
|
<div className='whitespace-pre-wrap text-sm text-text-secondary'>{streamingThinking}</div>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
<div className='mt-3 whitespace-pre-wrap text-sm text-text-primary'>
|
||||||
|
{streamingText || (!streamingThinking && t('challenges.player.awaitingResponse'))}
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
)}
|
)}
|
||||||
|
|
@ -243,8 +284,16 @@ export default function ChallengeDetailPage() {
|
||||||
<div className={`mb-1 font-medium ${lastResult.success ? 'text-util-colors-green-green-700' : 'text-util-colors-orange-orange-700'}`}>
|
<div className={`mb-1 font-medium ${lastResult.success ? 'text-util-colors-green-green-700' : 'text-util-colors-orange-orange-700'}`}>
|
||||||
{lastResult.success ? t('challenges.player.status.success') : t('challenges.player.status.failed')}
|
{lastResult.success ? t('challenges.player.status.success') : t('challenges.player.status.failed')}
|
||||||
</div>
|
</div>
|
||||||
|
{lastResult.thinking && (
|
||||||
|
<div className='bg-components-panel-bg/60 mt-3 space-y-2 rounded-md border border-divider-subtle p-3'>
|
||||||
|
<div className='text-xs font-medium uppercase tracking-wide text-text-tertiary'>
|
||||||
|
{t('challenges.player.modelThinking', 'Thinking')}
|
||||||
|
</div>
|
||||||
|
<div className='whitespace-pre-wrap text-sm text-text-secondary'>{lastResult.thinking}</div>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
{lastResult.message && (
|
{lastResult.message && (
|
||||||
<div className='whitespace-pre-wrap text-sm text-text-secondary'>{lastResult.message}</div>
|
<div className='mt-3 whitespace-pre-wrap text-sm text-text-secondary'>{lastResult.message}</div>
|
||||||
)}
|
)}
|
||||||
{lastResult.rating !== undefined && (
|
{lastResult.rating !== undefined && (
|
||||||
<div className='mt-2 text-sm text-text-tertiary'>
|
<div className='mt-2 text-sm text-text-tertiary'>
|
||||||
|
|
|
||||||
|
|
@ -57,7 +57,8 @@ export default {
|
||||||
defaultSuccessMessage: 'Challenge passed!',
|
defaultSuccessMessage: 'Challenge passed!',
|
||||||
defaultFailureMessage: 'Challenge not passed.',
|
defaultFailureMessage: 'Challenge not passed.',
|
||||||
ratingLine: 'Judge rating: {{rating}}/10',
|
ratingLine: 'Judge rating: {{rating}}/10',
|
||||||
judgeFeedbackLine: 'Judge feedback: {{feedback}}',
|
judgeFeedbackLine: '{{feedback}}',
|
||||||
|
modelThinking: 'Thinking',
|
||||||
},
|
},
|
||||||
leaderboard: {
|
leaderboard: {
|
||||||
title: 'Leaderboard',
|
title: 'Leaderboard',
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue