Spaces:
Running
Running
/** | |
* Text comparison utilities for OCR Text Explorer | |
* Provides character, word, and line-level diff visualization | |
*/ | |
/** | |
* Create character-level diff with inline highlighting | |
*/ | |
function createCharacterDiff(original, improved) { | |
if (!original || !improved) { | |
return '<p class="text-gray-500">No text to compare</p>'; | |
} | |
const dp = computeLCS(original, improved); | |
const diff = buildDiff(original, improved, dp); | |
let html = '<div class="font-mono text-sm whitespace-pre-wrap text-gray-900 dark:text-gray-100">'; | |
for (const part of diff) { | |
if (part.type === 'equal') { | |
html += escapeHtml(part.value); | |
} else if (part.type === 'delete') { | |
html += `<span class="bg-red-200 dark:bg-red-950 text-red-800 dark:text-red-300 line-through">${escapeHtml(part.value)}</span>`; | |
} else if (part.type === 'insert') { | |
html += `<span class="bg-green-200 dark:bg-green-950 text-green-800 dark:text-green-300">${escapeHtml(part.value)}</span>`; | |
} | |
} | |
html += '</div>'; | |
return html; | |
} | |
/** | |
* Create word-level diff | |
*/ | |
function createWordDiff(original, improved) { | |
if (!original || !improved) { | |
return '<p class="text-gray-500">No text to compare</p>'; | |
} | |
// Split into words while preserving whitespace | |
const originalWords = splitIntoWords(original); | |
const improvedWords = splitIntoWords(improved); | |
const dp = computeLCS(originalWords, improvedWords); | |
const diff = buildDiff(originalWords, improvedWords, dp); | |
let html = '<div class="font-mono text-sm whitespace-pre-wrap text-gray-900 dark:text-gray-100">'; | |
for (const part of diff) { | |
if (part.type === 'equal') { | |
html += escapeHtml(part.value.join('')); | |
} else if (part.type === 'delete') { | |
html += `<span class="bg-red-200 dark:bg-red-950 text-red-800 dark:text-red-300 line-through">${escapeHtml(part.value.join(''))}</span>`; | |
} else if (part.type === 'insert') { | |
html += `<span class="bg-green-200 dark:bg-green-950 text-green-800 dark:text-green-300">${escapeHtml(part.value.join(''))}</span>`; | |
} | |
} | |
html += '</div>'; | |
return html; | |
} | |
/** | |
* Create line-level diff | |
*/ | |
function createLineDiff(original, improved) { | |
if (!original || !improved) { | |
return '<p class="text-gray-500">No text to compare</p>'; | |
} | |
const originalLines = original.split('\n'); | |
const improvedLines = improved.split('\n'); | |
const dp = computeLCS(originalLines, improvedLines); | |
const diff = buildDiff(originalLines, improvedLines, dp); | |
let html = '<div class="font-mono text-sm text-gray-900 dark:text-gray-100">'; | |
for (const part of diff) { | |
if (part.type === 'equal') { | |
for (const line of part.value) { | |
html += `<div class="py-1">${escapeHtml(line)}</div>`; | |
} | |
} else if (part.type === 'delete') { | |
for (const line of part.value) { | |
html += `<div class="py-1 bg-red-200 dark:bg-red-950 text-red-800 dark:text-red-300 line-through">${escapeHtml(line)}</div>`; | |
} | |
} else if (part.type === 'insert') { | |
for (const line of part.value) { | |
html += `<div class="py-1 bg-green-200 dark:bg-green-950 text-green-800 dark:text-green-300">${escapeHtml(line)}</div>`; | |
} | |
} | |
} | |
html += '</div>'; | |
return html; | |
} | |
/** | |
* Compute Longest Common Subsequence using dynamic programming | |
*/ | |
function computeLCS(a, b) { | |
const m = a.length; | |
const n = b.length; | |
const dp = Array(m + 1).fill(null).map(() => Array(n + 1).fill(0)); | |
for (let i = 1; i <= m; i++) { | |
for (let j = 1; j <= n; j++) { | |
if (a[i - 1] === b[j - 1]) { | |
dp[i][j] = dp[i - 1][j - 1] + 1; | |
} else { | |
dp[i][j] = Math.max(dp[i - 1][j], dp[i][j - 1]); | |
} | |
} | |
} | |
return dp; | |
} | |
/** | |
* Build diff from LCS table | |
*/ | |
function buildDiff(a, b, dp) { | |
const diff = []; | |
let i = a.length; | |
let j = b.length; | |
while (i > 0 || j > 0) { | |
if (i > 0 && j > 0 && a[i - 1] === b[j - 1]) { | |
// Characters are equal | |
if (diff.length > 0 && diff[diff.length - 1].type === 'equal') { | |
diff[diff.length - 1].value.unshift(a[i - 1]); | |
} else { | |
diff.push({ type: 'equal', value: [a[i - 1]] }); | |
} | |
i--; | |
j--; | |
} else if (j > 0 && (i === 0 || dp[i][j - 1] >= dp[i - 1][j])) { | |
// Character in b but not in a (insertion) | |
if (diff.length > 0 && diff[diff.length - 1].type === 'insert') { | |
diff[diff.length - 1].value.unshift(b[j - 1]); | |
} else { | |
diff.push({ type: 'insert', value: [b[j - 1]] }); | |
} | |
j--; | |
} else { | |
// Character in a but not in b (deletion) | |
if (diff.length > 0 && diff[diff.length - 1].type === 'delete') { | |
diff[diff.length - 1].value.unshift(a[i - 1]); | |
} else { | |
diff.push({ type: 'delete', value: [a[i - 1]] }); | |
} | |
i--; | |
} | |
} | |
diff.reverse(); | |
// Convert arrays to strings for character diff | |
if (typeof a === 'string') { | |
diff.forEach(part => { | |
part.value = part.value.join(''); | |
}); | |
} | |
return diff; | |
} | |
/** | |
* Split text into words while preserving whitespace | |
*/ | |
function splitIntoWords(text) { | |
const words = []; | |
let current = ''; | |
let inWord = false; | |
for (const char of text) { | |
if (/\s/.test(char)) { | |
if (inWord && current) { | |
words.push(current); | |
current = ''; | |
inWord = false; | |
} | |
words.push(char); | |
} else { | |
current += char; | |
inWord = true; | |
} | |
} | |
if (current) { | |
words.push(current); | |
} | |
return words; | |
} | |
/** | |
* Escape HTML special characters | |
*/ | |
function escapeHtml(text) { | |
const div = document.createElement('div'); | |
div.textContent = text; | |
return div.innerHTML; | |
} | |
/** | |
* Calculate similarity percentage between two texts | |
*/ | |
function calculateSimilarity(original, improved) { | |
if (!original || !improved) return 0; | |
const dp = computeLCS(original, improved); | |
const lcsLength = dp[original.length][improved.length]; | |
const maxLength = Math.max(original.length, improved.length); | |
return Math.round((lcsLength / maxLength) * 100); | |
} | |
/** | |
* Create markdown-aware diff showing original text vs rendered markdown | |
*/ | |
function createMarkdownDiff(original, improved, renderFunction) { | |
if (!original || !improved) { | |
return '<p class="text-gray-500">No text to compare</p>'; | |
} | |
let html = '<div class="grid grid-cols-2 gap-6">'; | |
// Original text (plain) | |
html += '<div>'; | |
html += '<h4 class="text-sm font-medium text-gray-700 dark:text-gray-300 mb-3">Original (Plain Text)</h4>'; | |
html += '<div class="font-mono text-xs bg-gray-50 dark:bg-gray-800 text-gray-900 dark:text-gray-100 p-4 rounded-lg overflow-x-auto">'; | |
html += '<pre class="whitespace-pre-wrap">' + escapeHtml(original) + '</pre>'; | |
html += '</div>'; | |
html += '</div>'; | |
// Improved text (rendered markdown) | |
html += '<div>'; | |
html += '<h4 class="text-sm font-medium text-gray-700 dark:text-gray-300 mb-3">Improved (Rendered Markdown)</h4>'; | |
html += '<div class="bg-gray-50 dark:bg-gray-800 p-4 rounded-lg overflow-x-auto">'; | |
// Render the markdown using the provided function | |
if (renderFunction && typeof renderFunction === 'function') { | |
html += renderFunction(improved); | |
} else { | |
// Fallback to plain text if no render function provided | |
html += '<pre class="whitespace-pre-wrap font-mono text-xs">' + escapeHtml(improved) + '</pre>'; | |
} | |
html += '</div>'; | |
html += '</div>'; | |
html += '</div>'; | |
return html; | |
} |