ocr-time-capsule / js /reasoning-parser.js
davanstrien's picture
davanstrien HF Staff
Fix reasoning trace parsing for incomplete XML tags
2e33030
/**
* Reasoning Trace Parser
* Handles parsing and formatting of model reasoning traces from OCR outputs
*/
class ReasoningParser {
/**
* Detect if text contains reasoning trace markers
* @param {string} text - The text to check
* @returns {boolean} - True if reasoning trace is detected
*/
static detectReasoningTrace(text) {
if (!text || typeof text !== 'string') return false;
// Check for complete reasoning trace patterns (both opening and closing tags)
const completePatterns = [
{ start: /<think>/i, end: /<\/think>/i },
{ start: /<thinking>/i, end: /<\/thinking>/i },
{ start: /<reasoning>/i, end: /<\/reasoning>/i },
{ start: /<thought>/i, end: /<\/thought>/i }
];
// Only return true if we find BOTH opening and closing tags
return completePatterns.some(pattern =>
pattern.start.test(text) && pattern.end.test(text)
);
}
/**
* Parse reasoning content from text
* @param {string} text - The text containing reasoning trace
* @returns {object} - Object with reasoning and answer sections
*/
static parseReasoningContent(text) {
if (!text) {
return { reasoning: null, answer: null, original: text };
}
// Try multiple patterns for flexibility
const patterns = [
{
start: /<think>/i,
end: /<\/think>/i,
answerStart: /<answer>/i,
answerEnd: /<\/answer>/i
},
{
start: /<thinking>/i,
end: /<\/thinking>/i,
answerStart: /<answer>/i,
answerEnd: /<\/answer>/i
},
{
start: /<reasoning>/i,
end: /<\/reasoning>/i,
answerStart: /<output>/i,
answerEnd: /<\/output>/i
}
];
for (const pattern of patterns) {
const reasoningMatch = text.match(new RegExp(
pattern.start.source + '([\\s\\S]*?)' + pattern.end.source,
'i'
));
const answerMatch = text.match(new RegExp(
pattern.answerStart.source + '([\\s\\S]*?)' + pattern.answerEnd.source,
'i'
));
if (reasoningMatch || answerMatch) {
return {
reasoning: reasoningMatch ? reasoningMatch[1].trim() : null,
answer: answerMatch ? answerMatch[1].trim() : null,
hasReasoning: !!reasoningMatch,
hasAnswer: !!answerMatch,
original: text
};
}
}
// Check if there are incomplete reasoning tags (opening but no closing)
const hasOpeningTag = /<think>|<thinking>|<reasoning>|<thought>/i.test(text);
if (hasOpeningTag) {
console.warn('Incomplete reasoning trace detected - missing closing tags');
}
// If no patterns match, return original text as answer
return {
reasoning: null,
answer: text,
hasReasoning: false,
hasAnswer: true,
original: text
};
}
/**
* Format reasoning steps for display
* @param {string} reasoningText - The raw reasoning text
* @returns {object} - Formatted reasoning with steps and metadata
*/
static formatReasoningSteps(reasoningText) {
if (!reasoningText) return null;
// Parse numbered steps (e.g., "1. Step content")
const stepPattern = /^\d+\.\s+\*\*(.+?)\*\*(.+?)(?=^\d+\.\s|\z)/gms;
const steps = [];
let match;
while ((match = stepPattern.exec(reasoningText)) !== null) {
steps.push({
title: match[1].trim(),
content: match[2].trim()
});
}
// If no numbered steps found, try to parse by line breaks
if (steps.length === 0) {
const lines = reasoningText.split('\n').filter(line => line.trim());
lines.forEach((line, index) => {
// Check if line starts with a number
const numberedMatch = line.match(/^(\d+)\.\s*(.+)/);
if (numberedMatch) {
const title = numberedMatch[2].replace(/\*\*/g, '').trim();
steps.push({
number: numberedMatch[1],
title: title,
content: ''
});
} else if (steps.length > 0) {
// Add to previous step's content
steps[steps.length - 1].content += '\n' + line;
}
});
}
return {
steps: steps,
rawText: reasoningText,
stepCount: steps.length,
characterCount: reasoningText.length,
wordCount: reasoningText.split(/\s+/).filter(w => w).length
};
}
/**
* Extract key insights from reasoning
* @param {string} reasoningText - The reasoning text
* @returns {array} - Array of key insights or decisions
*/
static extractInsights(reasoningText) {
if (!reasoningText) return [];
const insights = [];
// Look for decision points and key observations
const patterns = [
/decision:\s*(.+)/gi,
/observation:\s*(.+)/gi,
/note:\s*(.+)/gi,
/important:\s*(.+)/gi,
/key finding:\s*(.+)/gi
];
patterns.forEach(pattern => {
let match;
while ((match = pattern.exec(reasoningText)) !== null) {
insights.push(match[1].trim());
}
});
return insights;
}
/**
* Get summary statistics about the reasoning trace
* @param {object} parsedContent - Parsed reasoning content
* @returns {object} - Statistics about the reasoning
*/
static getReasoningStats(parsedContent) {
if (!parsedContent || !parsedContent.reasoning) {
return {
hasReasoning: false,
reasoningLength: 0,
answerLength: 0,
reasoningRatio: 0
};
}
const reasoningLength = parsedContent.reasoning.length;
const answerLength = parsedContent.answer ? parsedContent.answer.length : 0;
const totalLength = reasoningLength + answerLength;
return {
hasReasoning: true,
reasoningLength: reasoningLength,
answerLength: answerLength,
totalLength: totalLength,
reasoningRatio: totalLength > 0 ? (reasoningLength / totalLength * 100).toFixed(1) : 0,
reasoningWords: parsedContent.reasoning.split(/\s+/).filter(w => w).length,
answerWords: parsedContent.answer ? parsedContent.answer.split(/\s+/).filter(w => w).length : 0
};
}
/**
* Format reasoning for export
* @param {object} parsedContent - Parsed reasoning content
* @param {boolean} includeReasoning - Whether to include reasoning in export
* @returns {string} - Formatted text for export
*/
static formatForExport(parsedContent, includeReasoning = true) {
if (!parsedContent) return '';
let exportText = '';
if (includeReasoning && parsedContent.reasoning) {
exportText += '=== MODEL REASONING ===\n\n';
exportText += parsedContent.reasoning;
exportText += '\n\n=== FINAL OUTPUT ===\n\n';
}
if (parsedContent.answer) {
exportText += parsedContent.answer;
}
return exportText;
}
}
// Export for use in other scripts
window.ReasoningParser = ReasoningParser;