/**
* Text comparison utilities for OCR Text Explorer
* Provides character, word, and line-level diff visualization
*/
/**
* Create character-level diff with inline highlighting
*/
function createCharacterDiff(original, improved) {
if (!original || !improved) {
return '
';
for (const part of diff) {
if (part.type === 'equal') {
html += escapeHtml(part.value);
} else if (part.type === 'delete') {
html += `${escapeHtml(part.value)}`;
} else if (part.type === 'insert') {
html += `${escapeHtml(part.value)}`;
}
}
html += '
';
return html;
}
/**
* Create word-level diff
*/
function createWordDiff(original, improved) {
if (!original || !improved) {
return '';
for (const part of diff) {
if (part.type === 'equal') {
html += escapeHtml(part.value.join(''));
} else if (part.type === 'delete') {
html += `${escapeHtml(part.value.join(''))}`;
} else if (part.type === 'insert') {
html += `${escapeHtml(part.value.join(''))}`;
}
}
html += '
';
return html;
}
/**
* Create line-level diff
*/
function createLineDiff(original, improved) {
if (!original || !improved) {
return '';
for (const part of diff) {
if (part.type === 'equal') {
for (const line of part.value) {
html += `
${escapeHtml(line)}
`;
}
} else if (part.type === 'delete') {
for (const line of part.value) {
html += `
${escapeHtml(line)}
`;
}
} else if (part.type === 'insert') {
for (const line of part.value) {
html += `
${escapeHtml(line)}
`;
}
}
}
html += '
';
return html;
}
/**
* Compute Longest Common Subsequence using dynamic programming
*/
function computeLCS(a, b) {
const m = a.length;
const n = b.length;
const dp = Array(m + 1).fill(null).map(() => Array(n + 1).fill(0));
for (let i = 1; i <= m; i++) {
for (let j = 1; j <= n; j++) {
if (a[i - 1] === b[j - 1]) {
dp[i][j] = dp[i - 1][j - 1] + 1;
} else {
dp[i][j] = Math.max(dp[i - 1][j], dp[i][j - 1]);
}
}
}
return dp;
}
/**
* Build diff from LCS table
*/
function buildDiff(a, b, dp) {
const diff = [];
let i = a.length;
let j = b.length;
while (i > 0 || j > 0) {
if (i > 0 && j > 0 && a[i - 1] === b[j - 1]) {
// Characters are equal
if (diff.length > 0 && diff[diff.length - 1].type === 'equal') {
diff[diff.length - 1].value.unshift(a[i - 1]);
} else {
diff.push({ type: 'equal', value: [a[i - 1]] });
}
i--;
j--;
} else if (j > 0 && (i === 0 || dp[i][j - 1] >= dp[i - 1][j])) {
// Character in b but not in a (insertion)
if (diff.length > 0 && diff[diff.length - 1].type === 'insert') {
diff[diff.length - 1].value.unshift(b[j - 1]);
} else {
diff.push({ type: 'insert', value: [b[j - 1]] });
}
j--;
} else {
// Character in a but not in b (deletion)
if (diff.length > 0 && diff[diff.length - 1].type === 'delete') {
diff[diff.length - 1].value.unshift(a[i - 1]);
} else {
diff.push({ type: 'delete', value: [a[i - 1]] });
}
i--;
}
}
diff.reverse();
// Convert arrays to strings for character diff
if (typeof a === 'string') {
diff.forEach(part => {
part.value = part.value.join('');
});
}
return diff;
}
/**
* Split text into words while preserving whitespace
*/
function splitIntoWords(text) {
const words = [];
let current = '';
let inWord = false;
for (const char of text) {
if (/\s/.test(char)) {
if (inWord && current) {
words.push(current);
current = '';
inWord = false;
}
words.push(char);
} else {
current += char;
inWord = true;
}
}
if (current) {
words.push(current);
}
return words;
}
/**
* Escape HTML special characters
*/
function escapeHtml(text) {
const div = document.createElement('div');
div.textContent = text;
return div.innerHTML;
}
/**
* Calculate similarity percentage between two texts
*/
function calculateSimilarity(original, improved) {
if (!original || !improved) return 0;
const dp = computeLCS(original, improved);
const lcsLength = dp[original.length][improved.length];
const maxLength = Math.max(original.length, improved.length);
return Math.round((lcsLength / maxLength) * 100);
}
/**
* Create markdown-aware diff showing original text vs rendered markdown
*/
function createMarkdownDiff(original, improved, renderFunction) {
if (!original || !improved) {
return '