ocr-time-capsule / js /diff-utils.js
davanstrien's picture
davanstrien HF Staff
Add markdown rendering support for VLM output
dbda7b0
/**
* Text comparison utilities for OCR Text Explorer
* Provides character, word, and line-level diff visualization
*/
/**
* Create character-level diff with inline highlighting
*/
function createCharacterDiff(original, improved) {
if (!original || !improved) {
return '<p class="text-gray-500">No text to compare</p>';
}
const dp = computeLCS(original, improved);
const diff = buildDiff(original, improved, dp);
let html = '<div class="font-mono text-sm whitespace-pre-wrap text-gray-900 dark:text-gray-100">';
for (const part of diff) {
if (part.type === 'equal') {
html += escapeHtml(part.value);
} else if (part.type === 'delete') {
html += `<span class="bg-red-200 dark:bg-red-950 text-red-800 dark:text-red-300 line-through">${escapeHtml(part.value)}</span>`;
} else if (part.type === 'insert') {
html += `<span class="bg-green-200 dark:bg-green-950 text-green-800 dark:text-green-300">${escapeHtml(part.value)}</span>`;
}
}
html += '</div>';
return html;
}
/**
* Create word-level diff
*/
function createWordDiff(original, improved) {
if (!original || !improved) {
return '<p class="text-gray-500">No text to compare</p>';
}
// Split into words while preserving whitespace
const originalWords = splitIntoWords(original);
const improvedWords = splitIntoWords(improved);
const dp = computeLCS(originalWords, improvedWords);
const diff = buildDiff(originalWords, improvedWords, dp);
let html = '<div class="font-mono text-sm whitespace-pre-wrap text-gray-900 dark:text-gray-100">';
for (const part of diff) {
if (part.type === 'equal') {
html += escapeHtml(part.value.join(''));
} else if (part.type === 'delete') {
html += `<span class="bg-red-200 dark:bg-red-950 text-red-800 dark:text-red-300 line-through">${escapeHtml(part.value.join(''))}</span>`;
} else if (part.type === 'insert') {
html += `<span class="bg-green-200 dark:bg-green-950 text-green-800 dark:text-green-300">${escapeHtml(part.value.join(''))}</span>`;
}
}
html += '</div>';
return html;
}
/**
* Create line-level diff
*/
function createLineDiff(original, improved) {
if (!original || !improved) {
return '<p class="text-gray-500">No text to compare</p>';
}
const originalLines = original.split('\n');
const improvedLines = improved.split('\n');
const dp = computeLCS(originalLines, improvedLines);
const diff = buildDiff(originalLines, improvedLines, dp);
let html = '<div class="font-mono text-sm text-gray-900 dark:text-gray-100">';
for (const part of diff) {
if (part.type === 'equal') {
for (const line of part.value) {
html += `<div class="py-1">${escapeHtml(line)}</div>`;
}
} else if (part.type === 'delete') {
for (const line of part.value) {
html += `<div class="py-1 bg-red-200 dark:bg-red-950 text-red-800 dark:text-red-300 line-through">${escapeHtml(line)}</div>`;
}
} else if (part.type === 'insert') {
for (const line of part.value) {
html += `<div class="py-1 bg-green-200 dark:bg-green-950 text-green-800 dark:text-green-300">${escapeHtml(line)}</div>`;
}
}
}
html += '</div>';
return html;
}
/**
* Compute Longest Common Subsequence using dynamic programming
*/
function computeLCS(a, b) {
const m = a.length;
const n = b.length;
const dp = Array(m + 1).fill(null).map(() => Array(n + 1).fill(0));
for (let i = 1; i <= m; i++) {
for (let j = 1; j <= n; j++) {
if (a[i - 1] === b[j - 1]) {
dp[i][j] = dp[i - 1][j - 1] + 1;
} else {
dp[i][j] = Math.max(dp[i - 1][j], dp[i][j - 1]);
}
}
}
return dp;
}
/**
* Build diff from LCS table
*/
function buildDiff(a, b, dp) {
const diff = [];
let i = a.length;
let j = b.length;
while (i > 0 || j > 0) {
if (i > 0 && j > 0 && a[i - 1] === b[j - 1]) {
// Characters are equal
if (diff.length > 0 && diff[diff.length - 1].type === 'equal') {
diff[diff.length - 1].value.unshift(a[i - 1]);
} else {
diff.push({ type: 'equal', value: [a[i - 1]] });
}
i--;
j--;
} else if (j > 0 && (i === 0 || dp[i][j - 1] >= dp[i - 1][j])) {
// Character in b but not in a (insertion)
if (diff.length > 0 && diff[diff.length - 1].type === 'insert') {
diff[diff.length - 1].value.unshift(b[j - 1]);
} else {
diff.push({ type: 'insert', value: [b[j - 1]] });
}
j--;
} else {
// Character in a but not in b (deletion)
if (diff.length > 0 && diff[diff.length - 1].type === 'delete') {
diff[diff.length - 1].value.unshift(a[i - 1]);
} else {
diff.push({ type: 'delete', value: [a[i - 1]] });
}
i--;
}
}
diff.reverse();
// Convert arrays to strings for character diff
if (typeof a === 'string') {
diff.forEach(part => {
part.value = part.value.join('');
});
}
return diff;
}
/**
* Split text into words while preserving whitespace
*/
function splitIntoWords(text) {
const words = [];
let current = '';
let inWord = false;
for (const char of text) {
if (/\s/.test(char)) {
if (inWord && current) {
words.push(current);
current = '';
inWord = false;
}
words.push(char);
} else {
current += char;
inWord = true;
}
}
if (current) {
words.push(current);
}
return words;
}
/**
* Escape HTML special characters
*/
function escapeHtml(text) {
const div = document.createElement('div');
div.textContent = text;
return div.innerHTML;
}
/**
* Calculate similarity percentage between two texts
*/
function calculateSimilarity(original, improved) {
if (!original || !improved) return 0;
const dp = computeLCS(original, improved);
const lcsLength = dp[original.length][improved.length];
const maxLength = Math.max(original.length, improved.length);
return Math.round((lcsLength / maxLength) * 100);
}
/**
* Create markdown-aware diff showing original text vs rendered markdown
*/
function createMarkdownDiff(original, improved, renderFunction) {
if (!original || !improved) {
return '<p class="text-gray-500">No text to compare</p>';
}
let html = '<div class="grid grid-cols-2 gap-6">';
// Original text (plain)
html += '<div>';
html += '<h4 class="text-sm font-medium text-gray-700 dark:text-gray-300 mb-3">Original (Plain Text)</h4>';
html += '<div class="font-mono text-xs bg-gray-50 dark:bg-gray-800 text-gray-900 dark:text-gray-100 p-4 rounded-lg overflow-x-auto">';
html += '<pre class="whitespace-pre-wrap">' + escapeHtml(original) + '</pre>';
html += '</div>';
html += '</div>';
// Improved text (rendered markdown)
html += '<div>';
html += '<h4 class="text-sm font-medium text-gray-700 dark:text-gray-300 mb-3">Improved (Rendered Markdown)</h4>';
html += '<div class="bg-gray-50 dark:bg-gray-800 p-4 rounded-lg overflow-x-auto">';
// Render the markdown using the provided function
if (renderFunction && typeof renderFunction === 'function') {
html += renderFunction(improved);
} else {
// Fallback to plain text if no render function provided
html += '<pre class="whitespace-pre-wrap font-mono text-xs">' + escapeHtml(improved) + '</pre>';
}
html += '</div>';
html += '</div>';
html += '</div>';
return html;
}