/** * Text comparison utilities for OCR Text Explorer * Provides character, word, and line-level diff visualization */ /** * Create character-level diff with inline highlighting */ function createCharacterDiff(original, improved) { if (!original || !improved) { return '

No text to compare

'; } const dp = computeLCS(original, improved); const diff = buildDiff(original, improved, dp); let html = '
'; for (const part of diff) { if (part.type === 'equal') { html += escapeHtml(part.value); } else if (part.type === 'delete') { html += `${escapeHtml(part.value)}`; } else if (part.type === 'insert') { html += `${escapeHtml(part.value)}`; } } html += '
'; return html; } /** * Create word-level diff */ function createWordDiff(original, improved) { if (!original || !improved) { return '

No text to compare

'; } // Split into words while preserving whitespace const originalWords = splitIntoWords(original); const improvedWords = splitIntoWords(improved); const dp = computeLCS(originalWords, improvedWords); const diff = buildDiff(originalWords, improvedWords, dp); let html = '
'; for (const part of diff) { if (part.type === 'equal') { html += escapeHtml(part.value.join('')); } else if (part.type === 'delete') { html += `${escapeHtml(part.value.join(''))}`; } else if (part.type === 'insert') { html += `${escapeHtml(part.value.join(''))}`; } } html += '
'; return html; } /** * Create line-level diff */ function createLineDiff(original, improved) { if (!original || !improved) { return '

No text to compare

'; } const originalLines = original.split('\n'); const improvedLines = improved.split('\n'); const dp = computeLCS(originalLines, improvedLines); const diff = buildDiff(originalLines, improvedLines, dp); let html = '
'; for (const part of diff) { if (part.type === 'equal') { for (const line of part.value) { html += `
${escapeHtml(line)}
`; } } else if (part.type === 'delete') { for (const line of part.value) { html += `
${escapeHtml(line)}
`; } } else if (part.type === 'insert') { for (const line of part.value) { html += `
${escapeHtml(line)}
`; } } } html += '
'; return html; } /** * Compute Longest Common Subsequence using dynamic programming */ function computeLCS(a, b) { const m = a.length; const n = b.length; const dp = Array(m + 1).fill(null).map(() => Array(n + 1).fill(0)); for (let i = 1; i <= m; i++) { for (let j = 1; j <= n; j++) { if (a[i - 1] === b[j - 1]) { dp[i][j] = dp[i - 1][j - 1] + 1; } else { dp[i][j] = Math.max(dp[i - 1][j], dp[i][j - 1]); } } } return dp; } /** * Build diff from LCS table */ function buildDiff(a, b, dp) { const diff = []; let i = a.length; let j = b.length; while (i > 0 || j > 0) { if (i > 0 && j > 0 && a[i - 1] === b[j - 1]) { // Characters are equal if (diff.length > 0 && diff[diff.length - 1].type === 'equal') { diff[diff.length - 1].value.unshift(a[i - 1]); } else { diff.push({ type: 'equal', value: [a[i - 1]] }); } i--; j--; } else if (j > 0 && (i === 0 || dp[i][j - 1] >= dp[i - 1][j])) { // Character in b but not in a (insertion) if (diff.length > 0 && diff[diff.length - 1].type === 'insert') { diff[diff.length - 1].value.unshift(b[j - 1]); } else { diff.push({ type: 'insert', value: [b[j - 1]] }); } j--; } else { // Character in a but not in b (deletion) if (diff.length > 0 && diff[diff.length - 1].type === 'delete') { diff[diff.length - 1].value.unshift(a[i - 1]); } else { diff.push({ type: 'delete', value: [a[i - 1]] }); } i--; } } diff.reverse(); // Convert arrays to strings for character diff if (typeof a === 'string') { diff.forEach(part => { part.value = part.value.join(''); }); } return diff; } /** * Split text into words while preserving whitespace */ function splitIntoWords(text) { const words = []; let current = ''; let inWord = false; for (const char of text) { if (/\s/.test(char)) { if (inWord && current) { words.push(current); current = ''; inWord = false; } words.push(char); } else { current += char; inWord = true; } } if (current) { words.push(current); } return words; } /** * Escape HTML special characters */ function escapeHtml(text) { const div = document.createElement('div'); div.textContent = text; return div.innerHTML; } /** * Calculate similarity percentage between two texts */ function calculateSimilarity(original, improved) { if (!original || !improved) return 0; const dp = computeLCS(original, improved); const lcsLength = dp[original.length][improved.length]; const maxLength = Math.max(original.length, improved.length); return Math.round((lcsLength / maxLength) * 100); } /** * Create markdown-aware diff showing original text vs rendered markdown */ function createMarkdownDiff(original, improved, renderFunction) { if (!original || !improved) { return '

No text to compare

'; } let html = '
'; // Original text (plain) html += '
'; html += '

Original (Plain Text)

'; html += '
'; html += '
' + escapeHtml(original) + '
'; html += '
'; html += '
'; // Improved text (rendered markdown) html += '
'; html += '

Improved (Rendered Markdown)

'; html += '
'; // Render the markdown using the provided function if (renderFunction && typeof renderFunction === 'function') { html += renderFunction(improved); } else { // Fallback to plain text if no render function provided html += '
' + escapeHtml(improved) + '
'; } html += '
'; html += '
'; html += '
'; return html; }