Spaces:
Running
Running
<html lang="en"> | |
<!-- | |
This is a JSON Dataset Explorer (Viewer and Editor) made by Concedo/LostRuins | |
Please go to https://github.com/LostRuins/DatasetExplorer for updates | |
This software is under the AGPL v3.0 License unless otherwise exempted. Please do not remove this line. | |
-Concedo | |
--> | |
<head> | |
<style> | |
html { | |
font-family: sans-serif; | |
-ms-text-size-adjust: 100%; | |
-webkit-text-size-adjust: 100%; | |
font-size: 10px; | |
-webkit-tap-highlight-color: rgba(0, 0, 0, 0); | |
} | |
* { | |
-webkit-box-sizing: border-box; | |
-moz-box-sizing: border-box; | |
box-sizing: border-box | |
} | |
:after, | |
:before { | |
-webkit-box-sizing: border-box; | |
-moz-box-sizing: border-box; | |
box-sizing: border-box | |
} | |
.cb | |
{ | |
width:18px; | |
height:18px; | |
} | |
body { | |
margin: 0; | |
font-family: "Helvetica Neue", Helvetica, Arial, sans-serif; | |
font-size: 15px; | |
line-height: 1.42857143; | |
color: #333; | |
background-color: #fff; | |
} | |
button, | |
input, | |
select, | |
textarea { | |
font-family: inherit; | |
font-size: inherit; | |
line-height: inherit; | |
} | |
a { | |
color: #337ab7; | |
text-decoration: none; | |
} | |
a:focus, | |
a:hover { | |
color: #23527c; | |
text-decoration: underline; | |
} | |
a:focus { | |
outline: 5px auto -webkit-focus-ring-color; | |
outline-offset: -2px; | |
} | |
img { | |
vertical-align: middle; | |
} | |
.nest1 | |
{ | |
color:rgb(89, 58, 202); | |
} | |
.nest2 | |
{ | |
color:rgb(197, 69, 69); | |
} | |
.unselectable { | |
-webkit-touch-callout: none ; | |
-webkit-user-select: none ; | |
-khtml-user-select: none ; | |
-moz-user-select: none ; | |
-ms-user-select: none ; | |
user-select: none ; | |
} | |
.navbar { | |
background-color: #333; | |
overflow: hidden; | |
position: relative; | |
top: 0; | |
width: 100%; | |
} | |
/* Links inside the navbar */ | |
.navbar a { | |
float: left; | |
display: block; | |
color: white; | |
text-align: center; | |
padding: 14px 20px; | |
text-decoration: none; | |
} | |
/* Change color on hover */ | |
.navbar a:hover { | |
background-color: #ddd; | |
color: black; | |
} | |
</style> | |
<title>Concedo JSON Dataset Explorer</title> | |
<script> | |
var db = []; | |
var lastleftitem = null; | |
var lastselectedidx = 0; | |
var currdbidxs = []; | |
var selecteddb = {}; | |
function formatHtml(unsafe) | |
{ | |
if(!unsafe){return "";} | |
return unsafe.toString() | |
.replace(/&/g, "&") | |
.replace(/</g, "<") | |
.replace(/>/g, ">") | |
.replace(/"/g, """) | |
.replace(/'/g, "'") | |
.replace(/\n/g, '<br>') | |
.replace(/\t/g, ' '); | |
} | |
function escapeRegExp(string) { | |
return string.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); | |
} | |
function isArr(obj) { | |
// checks for null and undefined | |
if (obj == null) { | |
return false; | |
} | |
return (obj.constructor === Array) | |
} | |
function nextitm() | |
{ | |
if(lastleftitem!=null) | |
{ | |
let sib = lastleftitem.nextSibling; | |
if(sib) | |
{ | |
clickItem(sib); | |
} | |
} | |
} | |
function clickItem(titleDiv) | |
{ | |
let index = titleDiv.getAttribute('data-index'); | |
const contentsDiv = document.getElementById('contents'); | |
lastselectedidx = index; | |
contentsDiv.innerHTML = renderPage(db[index],true).text; | |
contentsDiv.scrollTop = 0; | |
if (lastleftitem) { | |
lastleftitem.style.backgroundColor = null; | |
} | |
lastleftitem = titleDiv; | |
titleDiv.style.backgroundColor = '#d3d3d3'; | |
} | |
function countsel() | |
{ | |
let count = 0; | |
for (k in selecteddb) if (selecteddb[k]) ++count; | |
document.getElementById("selcounter").innerText = `${count} Selected`; | |
} | |
function selitm(idx) | |
{ | |
let box = document.getElementById(`itm${idx}`); | |
if(box) | |
{ | |
selecteddb[idx] = box.checked; | |
} | |
} | |
function selrange(isSel) | |
{ | |
let selstart = 0; | |
let selend = currdbidxs.length; | |
if(document.getElementById("selectamt").value!="") | |
{ | |
let sv = document.getElementById("selectamt").value; | |
if(sv.includes("-")) | |
{ | |
let svs = sv.split("-"); | |
if(svs.length==2 && svs[0]!="" && svs[1]!="") | |
{ | |
selstart = Math.min(svs[0],selend); | |
selend = Math.min(svs[1],selend); | |
} | |
} | |
else | |
{ | |
selstart = 0; | |
selend = Math.min(selend,sv); | |
} | |
} | |
for(let i=selstart;i<selend;++i) | |
{ | |
let box = document.getElementById(`itm${currdbidxs[i]}`); | |
if(box) | |
{ | |
box.checked = isSel; | |
selitm(currdbidxs[i]); | |
} | |
} | |
countsel(); | |
} | |
function invertsel() | |
{ | |
for(let i=0;i<currdbidxs.length;++i) | |
{ | |
let box = document.getElementById(`itm${currdbidxs[i]}`); | |
if(box) | |
{ | |
box.checked = !box.checked; | |
selitm(currdbidxs[i]); | |
} | |
} | |
countsel(); | |
} | |
function ngramParser(text, n) { | |
const words = text.split(/\s+/).filter(word => word.length > 0); | |
const ngrams = {}; | |
for (let i = 0; i <= words.length - n; i++) { | |
const ngram = words.slice(i, i + n).join(' '); | |
if (ngrams[ngram]) { | |
ngrams[ngram]++; | |
} else { | |
ngrams[ngram] = 1; | |
} | |
} | |
const sortedNgrams = Object.entries(ngrams).sort((a, b) => b[1] - a[1]); | |
return sortedNgrams.map(entry => ({ ng: entry[0], cnt: entry[1] })); | |
} | |
function displayngram() | |
{ | |
const contentsDiv = document.getElementById('contents'); | |
let fulltxt = ""; | |
let parts = []; | |
for (k in selecteddb) { | |
if (selecteddb[k]) { | |
parts.push(renderPage(db[k],false).text); | |
} | |
} | |
fulltxt = parts.join('\n\n'); | |
if(fulltxt=="") | |
{ | |
contentsDiv.innerText = "Error: You must select at least 1 item to calculate N-Grams!"; | |
contentsDiv.scrollTop = 0; | |
return; | |
} | |
let pageText = ""; | |
for(let i=1;i<=5;++i) | |
{ | |
let res = ngramParser(fulltxt,i); | |
let lim = Math.min(res.length,100); | |
pageText += `[ ${i} Word Sequences ]\n`; | |
for(let j=0;j<lim;++j) | |
{ | |
pageText += `${res[j].cnt} - ${res[j].ng}\n`; | |
} | |
pageText += `\n\n`; | |
} | |
contentsDiv.innerText = pageText; | |
contentsDiv.scrollTop = 0; | |
} | |
function endEditElem() | |
{ | |
const contentsDiv = document.getElementById('contents'); | |
if(lastselectedidx >= db.length) | |
{ | |
return; | |
} | |
let ebox = document.getElementById('editbox'); | |
if(ebox && ebox.value) | |
{ | |
try { | |
var newitem = JSON.parse(ebox.value); | |
db[lastselectedidx] = newitem; | |
contentsDiv.innerHTML = renderPage(db[lastselectedidx],true).text; | |
contentsDiv.scrollTop = 0; | |
} catch (e) { | |
console.log("Error decoding text: " + e); | |
} | |
} | |
} | |
function editElem() | |
{ | |
if(lastselectedidx >= db.length) | |
{ | |
return; | |
} | |
let itm = db[lastselectedidx]; | |
let itmstr = JSON.stringify(itm,null,2); | |
const contentsDiv = document.getElementById('contents'); | |
contentsDiv.innerHTML = `<button class="unselectable" onclick="return endEditElem()" style="float:right;">Save Edits</button><textarea style="line-height:1.1;width:100%;height:calc(100% - 54px);resize: none;" id="editbox" placeholder="" rows="15"></textarea>`; | |
document.getElementById('editbox').value = itmstr; | |
contentsDiv.scrollTop = 0; | |
} | |
function renderPage(item, useHtml) { | |
let tot = ""; | |
let nodecount = 0; | |
let wordcount = 0; | |
if(useHtml) | |
{ | |
tot += `<button class="unselectable" onclick="return editElem()" style="float:right;">Edit Item</button>`; | |
} | |
for (let key in item) { | |
if (item.hasOwnProperty(key)) { | |
let val = item[key]; | |
let valmod = false; | |
if (isArr(val)) { | |
let v2 = ""; | |
for (let key2 in val) { | |
let arritem = val[key2]; | |
if (arritem.constructor == Object) { //dict test | |
for (let key3 in arritem) { | |
if (arritem.hasOwnProperty(key3)) { | |
let val3 = arritem[key3]; | |
nodecount += 1; | |
if (useHtml) { | |
v2 += `<span class='nest2'>[[${formatHtml(key3)}]]</span><br>${formatHtml(val3)}<br><br>`; | |
} else { | |
v2 += `\n\n${val3}`; | |
} | |
} | |
} | |
} | |
else if(arritem.constructor == String) | |
{ | |
if (useHtml) { | |
v2 += `<br>${formatHtml(arritem)}`; | |
} else { | |
v2 += `\n${arritem}`; | |
} | |
} | |
} | |
valmod = true; | |
val = v2; | |
} | |
else if (val && val.constructor == Object) { | |
let v2 = ""; | |
for (let key3 in val) { | |
if (val.hasOwnProperty(key3)) { | |
let val3 = val[key3]; | |
nodecount += 1; | |
if (useHtml) { | |
v2 += `<span class='nest2'>[[${formatHtml(key3)}]]</span><br>${formatHtml(val3)}<br><br>`; | |
} else { | |
v2 += `\n\n${val3}`; | |
} | |
} | |
} | |
valmod = true; | |
val = v2; | |
} | |
nodecount += 1; | |
if (useHtml) { | |
tot += `<span class='nest1'>[${formatHtml(key)}]</span><br>${valmod ? val : formatHtml(val)}<br><br>`; | |
} else { | |
tot += `\n\n${val}`; | |
} | |
} | |
} | |
if(document.getElementById('minwords').value != "" || document.getElementById('maxwords').value != "" || document.getElementById('showdetail').value=="words") | |
{ | |
wordcount = countWords(tot); | |
} | |
return {"text":tot,"nodes":nodecount,"words":wordcount}; | |
} | |
function clearInputs() | |
{ | |
document.getElementById('minlen').value = ""; | |
document.getElementById('maxlen').value = ""; | |
document.getElementById('minnodes').value = ""; | |
document.getElementById('maxnodes').value = ""; | |
document.getElementById('minwords').value = ""; | |
document.getElementById('maxwords').value = ""; | |
document.getElementById('minmatches').value = ""; | |
document.getElementById('maxmatches').value = ""; | |
document.getElementById('search').value = ""; | |
document.getElementById('casesensitive').checked = false; | |
document.getElementById('useregex').checked = false; | |
document.getElementById('selectamt').value = ""; | |
} | |
function displayTitles() { | |
const tocDiv = document.getElementById('toc'); | |
const contentsDiv = document.getElementById('contents'); | |
const minlen = document.getElementById('minlen').value; | |
const maxlen = document.getElementById('maxlen').value; | |
const minnodes = document.getElementById('minnodes').value; | |
const maxnodes = document.getElementById('maxnodes').value; | |
const minwords = document.getElementById('minwords').value; | |
const maxwords = document.getElementById('maxwords').value; | |
const minmatches = document.getElementById('minmatches').value; | |
const maxmatches = document.getElementById('maxmatches').value; | |
let search = document.getElementById('search').value; | |
contentsDiv.innerText = ""; | |
tocDiv.innerText = ''; // Clear previous titles | |
let ascindex = 0; | |
currdbidxs = []; | |
if (search != "") { | |
if (!document.getElementById('useregex').checked) { | |
search = escapeRegExp(search); | |
} | |
} | |
db.forEach((item, index) => { | |
const titleDiv = document.createElement('div'); | |
let rendered = renderPage(item,false); | |
let len = rendered.text.length; | |
let matchescount = 0; | |
if (minlen != "") { | |
if (len < minlen) { | |
return; | |
} | |
} | |
if (maxlen != "") { | |
if (len > maxlen) { | |
return; | |
} | |
} | |
if (minnodes != "") { | |
if (rendered.nodes < minnodes) { | |
return; | |
} | |
} | |
if (maxnodes != "") { | |
if (rendered.nodes > maxnodes) { | |
return; | |
} | |
} | |
if (minwords != "") { | |
if (rendered.words < minwords) { | |
return; | |
} | |
} | |
if (maxwords != "") { | |
if (rendered.words > maxwords) { | |
return; | |
} | |
} | |
if (search != "") { | |
var exp = new RegExp(search, `g${document.getElementById('casesensitive').checked?"":"i"}`); | |
let m = rendered.text.match(exp); | |
if(!m) | |
{ | |
return; | |
} | |
matchescount = m.length; | |
} | |
if (search != "" && minmatches != "") { | |
if (matchescount < minmatches) { | |
return; | |
} | |
} | |
if (search != "" && maxmatches != "") { | |
if (matchescount > maxmatches) { | |
return; | |
} | |
} | |
let shownvalue = ""; | |
switch(document.getElementById('showdetail').value) | |
{ | |
case "chars": | |
shownvalue = len; | |
break; | |
case "words": | |
shownvalue = rendered.words; | |
break; | |
case "turns": | |
shownvalue = rendered.nodes; | |
break; | |
case "matches": | |
shownvalue = matchescount; | |
break; | |
} | |
titleDiv.innerHTML = `<input class="cb" type="checkbox" id="itm${index}" onclick="selitm(${index});countsel();" ${selecteddb[index]?"checked":""}> ${index} - ${shownvalue}`; | |
titleDiv.style.cursor = 'pointer'; | |
titleDiv.setAttribute('data-value',shownvalue); | |
titleDiv.setAttribute('data-index',index); | |
titleDiv.addEventListener('click', function () { | |
clickItem(titleDiv); | |
}); | |
tocDiv.appendChild(titleDiv); | |
currdbidxs.push(index); | |
ascindex += 1; | |
}); | |
let sorter = document.getElementById('sortorder').value; | |
if(sorter=="1" || sorter=="2") | |
{ | |
let asc = (sorter=="2"); | |
const childDivs = Array.from(tocDiv.children); | |
if(asc) | |
{ | |
childDivs.sort((a, b) => { | |
return parseInt(a.getAttribute('data-value')) - parseInt(b.getAttribute('data-value')); | |
}); | |
} else { | |
childDivs.sort((b, a) => { | |
return parseInt(a.getAttribute('data-value')) - parseInt(b.getAttribute('data-value')); | |
}); | |
} | |
currdbidxs = []; | |
childDivs.forEach(child => { | |
tocDiv.appendChild(child); | |
currdbidxs.push(child.getAttribute('data-index')); | |
}); | |
} | |
document.getElementById('rescounter').innerText = `${ascindex} Results`; | |
} | |
var tempfileurl = null; | |
var tempfileobj = []; | |
function erasesel() | |
{ | |
const contentsDiv = document.getElementById('contents'); | |
tempfileobj = []; | |
for(let i=0;i<currdbidxs.length;++i) | |
{ | |
let ci = currdbidxs[i]; | |
let box = document.getElementById(`itm${ci}`); | |
if(box && box.checked) | |
{ | |
tempfileobj.push(db[ci]); | |
} | |
} | |
if(tempfileobj.length==0) | |
{ | |
contentsDiv.innerText = "Error: You must select at least 1 item to erase!"; | |
contentsDiv.scrollTop = 0; | |
return; | |
} | |
db = db.filter( ( el ) => !tempfileobj.includes( el ) ); | |
selecteddb = {}; | |
countsel(); | |
displayTitles(); | |
} | |
function prunesel() | |
{ | |
const contentsDiv = document.getElementById('contents'); | |
tempfileobj = []; | |
for(let i=0;i<currdbidxs.length;++i) | |
{ | |
let ci = currdbidxs[i]; | |
let box = document.getElementById(`itm${ci}`); | |
if(box && box.checked) | |
{ | |
tempfileobj.push(db[ci]); | |
} | |
} | |
if(tempfileobj.length==0) | |
{ | |
contentsDiv.innerText = "Error: You must select at least 1 item to keep!"; | |
contentsDiv.scrollTop = 0; | |
return; | |
} | |
db = tempfileobj; | |
selecteddb = {}; | |
countsel(); | |
displayTitles(); | |
} | |
function exportsel() | |
{ | |
const contentsDiv = document.getElementById('contents'); | |
tempfileobj = []; | |
for(let i=0;i<currdbidxs.length;++i) | |
{ | |
let ci = currdbidxs[i]; | |
let box = document.getElementById(`itm${ci}`); | |
if(box && box.checked) | |
{ | |
tempfileobj.push(db[ci]); | |
} | |
} | |
if(tempfileobj.length==0) | |
{ | |
contentsDiv.innerText = "Error: Please select at least 1 sample to export!"; | |
return; | |
} | |
var a = document.getElementById("tempfile"); | |
var file = null; | |
try { | |
file = new Blob([JSON.stringify(tempfileobj,null,2)], { type: 'application/json' }); | |
if (tempfileurl) { | |
window.URL.revokeObjectURL(tempfileurl); | |
} | |
tempfileurl = window.URL.createObjectURL(file); | |
a.href = tempfileurl; | |
a.target = '_blank'; | |
a.download = "selected_dataset.json"; | |
setTimeout(function(){a.click()},20); | |
} catch (e) { | |
contentsDiv.innerText = "File could not be saved. It might be too large.\nTry splitting into multiple files instead.\n\n"+e; | |
console.log("Error saving: " + e); | |
} | |
} | |
let lastfile = null; | |
function loadfile() | |
{ | |
if(!lastfile) | |
{ | |
return; | |
} | |
const replace_existing = (document.getElementById('appendfile').value=="0"); | |
const merge_existing = (document.getElementById('appendfile').value=="1"); | |
const subtract_existing = (document.getElementById('appendfile').value=="2"); | |
const contentsDiv = document.getElementById('contents'); | |
const dedup = (document.getElementById('dedup').value=="1"); | |
let newdb = []; | |
var filesizemb = lastfile.size/(1024*1024); | |
function postLoad() | |
{ | |
if(replace_existing) | |
{ | |
db = newdb; | |
}else if(merge_existing){ | |
for(let s in newdb) | |
{ | |
db.push(newdb[s]); | |
} | |
} | |
else | |
{ | |
const set = new Set(db.map(item => JSON.stringify(item))); | |
for(let s in newdb) | |
{ | |
let newitm = JSON.stringify(newdb[s]); | |
if(set.has(newitm)) | |
{ | |
set.delete(newitm); | |
} | |
} | |
db = Array.from(set).map(item => JSON.parse(item)); | |
} | |
if(dedup && !subtract_existing) | |
{ | |
const set = new Set(db.map(item => JSON.stringify(item))); | |
db = Array.from(set).map(item => JSON.parse(item)); | |
} | |
selecteddb = {}; | |
countsel(); | |
displayTitles(); | |
contentsDiv.innerText = "Dataset File Loaded\nSelect an item to browse contents."; | |
} | |
async function streamJsonFile(file) { | |
const CHUNK_SIZE = 32 * 1024 * 1024; // 32 MB chunks | |
const decoder = new TextDecoder('utf-8'); | |
const reader = file.stream().getReader(); | |
const elemrange = (document.getElementById('elemrange').value); | |
let elemstart = 0; | |
let elemend = 999999999; | |
let elemcounter = 0; | |
if(elemrange!="") | |
{ | |
if(elemrange.includes("-")) | |
{ | |
let svs = elemrange.split("-"); | |
if(svs.length==2 && svs[0]!="" && svs[1]!="") | |
{ | |
elemstart = Math.min(svs[0],elemend); | |
elemend = Math.min(svs[1],elemend); | |
} | |
} | |
else | |
{ | |
elemstart = 0; | |
elemend = Math.min(elemend,elemrange); | |
} | |
} | |
let buffer = ''; | |
let accusize = 0; | |
let accu = []; | |
newdb = []; | |
let lastloop = false; | |
let errs = null; | |
while (true) { | |
const { done, value } = await reader.read(); | |
// Decode the chunk to a string | |
const chunk = decoder.decode(value, { stream: true }); | |
accu.push(chunk); | |
if (done) { | |
lastloop = true; | |
} | |
accusize += chunk.length; | |
if(accusize<CHUNK_SIZE && !lastloop) | |
{ | |
continue; | |
} | |
buffer += accu.join(""); | |
accu = []; | |
accusize = 0; | |
// Process each character in the buffer | |
let objstart = 0; | |
let objend = 0; | |
let bl = buffer.length; | |
let inObject = false; | |
let inString = false; | |
let escapeNextChar = false; | |
let curlyBraceCount = 0; | |
for (let i = 0; i < bl; i++) { | |
const char = buffer[i]; | |
if (char === '\\' && inString) { | |
escapeNextChar = !escapeNextChar; | |
continue; | |
} | |
if (char === '"' && !escapeNextChar) { | |
inString = !inString; | |
} | |
escapeNextChar = false; | |
if (inString) { | |
continue; | |
} | |
if (char === '{') { | |
if (!inObject) { | |
inObject = true; | |
objstart = i; | |
} | |
curlyBraceCount++; | |
} else if (char === '}') { | |
curlyBraceCount--; | |
} | |
// When curlyBraceCount returns to 0, we have a complete object | |
if (inObject && curlyBraceCount === 0) { | |
let jsonstr = buffer.slice(objstart, i + 1); | |
objend = i; | |
inObject = false; | |
try{ | |
const jsonObject = JSON.parse(jsonstr); | |
if(elemcounter>=elemstart && elemcounter<elemend) | |
{ | |
newdb.push(jsonObject); | |
} | |
++elemcounter; | |
}catch(ex) | |
{ | |
errs = ex; | |
console.log(ex); | |
} | |
} | |
} | |
if(objstart>0 && objend>0) | |
{ | |
inObject = false; | |
buffer = buffer.slice(objend+1); // Remove the processed object from the buffer | |
if(newdb.length>=(elemend-elemstart)) | |
{ | |
errs = `Loading stopped at ${newdb.length} items. Specified range exceeded.` | |
lastloop = true; | |
} | |
} | |
if(lastloop) | |
{ | |
break; | |
} | |
} | |
if(newdb.length>0) | |
{ | |
postLoad(); | |
if(errs) | |
{ | |
contentsDiv.innerText = `Dataset partially loaded.\nInterruption occurred while loading dataset file: ${lastfile.name}\n${errs}`; | |
} | |
}else{ | |
contentsDiv.innerText = `Error: Could not load dataset file: ${lastfile.name}`; | |
} | |
} | |
const reader = new FileReader(); | |
reader.onload = function (e) { | |
let loadok = false; | |
try | |
{ | |
//try normal json | |
newdb = JSON.parse(e.target.result); | |
loadok = true; | |
} | |
catch(err) | |
{ | |
console.log(err); | |
//try jsonl | |
try | |
{ | |
const content = e.target.result; | |
if(content) | |
{ | |
const lines = content.split('\n'); | |
newdb = lines.filter(line => line.trim()).map(line => JSON.parse(line)); | |
loadok = true; | |
} | |
} | |
catch(err2) | |
{ | |
console.log(err2); | |
} | |
} | |
if(loadok) | |
{ | |
const elemrange = (document.getElementById('elemrange').value); | |
let elemstart = 0; | |
let elemend = 999999999; | |
let elemcounter = 0; | |
if(elemrange!="") | |
{ | |
if(elemrange.includes("-")) | |
{ | |
let svs = elemrange.split("-"); | |
if(svs.length==2 && svs[0]!="" && svs[1]!="") | |
{ | |
elemstart = Math.min(svs[0],elemend); | |
elemend = Math.min(svs[1],elemend); | |
} | |
} | |
else | |
{ | |
elemstart = 0; | |
elemend = Math.min(elemend,elemrange); | |
} | |
} | |
newdb = newdb.slice(elemstart, elemend); | |
postLoad(); | |
} | |
else | |
{ | |
if(filesizemb > 0.01 && (lastfile.type === "application/json" || lastfile.name.endsWith(".jsonl"))) | |
{ | |
//try big json | |
streamJsonFile(lastfile); | |
contentsDiv.innerText = `Attempting Fallback Stream-Load of Massive File:\n${lastfile.name} (${filesizemb.toFixed(0)} mb).\nPlease wait, this may take a long time...`; | |
}else{ | |
contentsDiv.innerText = `Error: Could not load dataset file: ${lastfile.name}`; | |
} | |
contentsDiv.scrollTop = 0; | |
} | |
}; | |
if (filesizemb > 999) //files larger than 1gb just use streamload | |
{ | |
//try big json | |
streamJsonFile(lastfile); | |
contentsDiv.innerText = `Attempting Stream-Load of Massive File:\n${lastfile.name} (${filesizemb.toFixed(0)} mb)\nPlease wait, this may take a long time...`; | |
} else { | |
reader.readAsText(lastfile); | |
} | |
} | |
function clearfile() | |
{ | |
lastfile = null; | |
document.getElementById('pickfile').value = ""; | |
} | |
function countWords(str) { | |
if (str == "") { return 0; } | |
const wordPattern = /[a-zA-Z0-9_]+/g; | |
const words = str.match(wordPattern); | |
if (!words) { | |
return 0; | |
} | |
return words.length; | |
} | |
function countOccurances(str,search) | |
{ | |
var exp = new RegExp(search, `g${document.getElementById('casesensitive').checked?"":"i"}`); | |
var count = (temp.match(exp) || []).length; | |
return count; | |
} | |
function togglepanel(id) | |
{ | |
const p1 = document.getElementById('panel1'); | |
const p2 = document.getElementById('panel2'); | |
const p3 = document.getElementById('panel3'); | |
const p4 = document.getElementById('panel4'); | |
p1.style.display = p2.style.display = p3.style.display = p4.style.display = "none"; | |
document.getElementById(`panel${id}`).style.display = "block"; | |
} | |
document.addEventListener("DOMContentLoaded", function () { | |
const fileInput = document.getElementById('pickfile'); | |
fileInput.addEventListener('change', function (event) { | |
lastfile = event.target.files[0]; | |
loadfile(); | |
}); | |
}); | |
</script> | |
</head> | |
<body> | |
<div class="navbar"> | |
<a href="#" onclick="togglepanel(1)">Importing</a> | |
<a href="#" onclick="togglepanel(2)">Filtering</a> | |
<a href="#" onclick="togglepanel(3)">Selecting</a> | |
<a href="#" onclick="togglepanel(4)">Exporting</a> | |
<a style="float: right;" href="https://github.com/LostRuins/DatasetExplorer">Concedo JSON Dataset Explorer</a> | |
</div> | |
<div style="display:flex;"> | |
<div style="padding:4px"> | |
<div id="panel1" style="padding:2px; display: block;"> | |
<input onclick="clearfile()" style="width: 220px;" id="pickfile" name="file" type="file" accept=".json,.jsonl"> | |
<a id="tempfile" href="#" style="display:none;"></a> | |
<select id="appendfile"> | |
<option value="0">Replace Existing</option> | |
<option value="1">Merge with Existing</option> | |
<option value="2">Subtract from Existing</option> | |
</select> | |
<select id="dedup"> | |
<option value="0">Allow Duplicates</option> | |
<option value="1">Remove Duplicates</option> | |
</select> | |
LoadRange: | |
<input type="text" style="width:84px" placeholder="(Range: All)" value="" id="elemrange"> | |
</div> | |
<div id="panel2" style="display: none;"> | |
<div style="padding:1px"> | |
Chars: <input type="text" style="width:50px" placeholder="(Min)" value="" id="minlen"> | |
<input type="text" style="width:50px" placeholder="(Max)" value="" id="maxlen"> | |
Turns: <input type="text" style="width:50px" placeholder="(Min)" value="" id="minnodes"> | |
<input type="text" style="width:50px" placeholder="(Max)" value="" id="maxnodes"> | |
Words: <input type="text" style="width:50px" placeholder="(Min)" value="" id="minwords"> | |
<input type="text" style="width:50px" placeholder="(Max)" value="" id="maxwords"> | |
Matches: <input type="text" style="width:50px" placeholder="(Min)" value="" id="minmatches"> | |
<input type="text" style="width:50px" placeholder="(Max)" value="" id="maxmatches"> | |
</div> | |
<div style="padding:2px"> | |
<input type="text" style="width:200px" placeholder="(Search)" value="" id="search"> | |
<input type="checkbox" id="useregex"> | |
<label for="useregex"> Uses Regex</label> | |
<input type="checkbox" id="casesensitive"> | |
<label for="casesensitive"> Case Sensitive</label> | |
</div> | |
<div style="font-size: 15px;"> | |
<select id="showdetail"> | |
<option value="chars">Count Characters</option> | |
<option value="words">Count Words</option> | |
<option value="turns">Count Turns</option> | |
<option value="matches">Count Matches</option> | |
</select> | |
<select id="sortorder" > | |
<option value="0">No Sort</option> | |
<option value="1">Sort Descending</option> | |
<option value="2">Sort Ascending</option> | |
</select> | |
<span style="font-size: 15px;"> | |
<button id="filter" onclick="displayTitles()">Filter</button> | |
<button onclick="clearInputs()">Clear Filters</button> | |
</span> | |
</div> | |
</div> | |
<div id="panel3" style="padding:2px; display: none;"> | |
<div> | |
<input type="text" style="width:84px" placeholder="(Range: All)" value="" id="selectamt"> | |
<button id="selectall" onclick="selrange(true)">Select Range</button> | |
<button id="deselectall" onclick="selrange(false)">Deselect Range</button> | |
<button id="invertsel" onclick="invertsel()">Invert Selection</button> | |
</div><div style="padding: 2px; margin-top: 6px;"> | |
<button onclick="erasesel()">Erase Selected</button></h3> | |
<button onclick="prunesel()">Prune To Selected</button></h3> | |
<button id="ngram" onclick="displayngram()">Calc. N-Gram</button> | |
</div> | |
</div> | |
<div id="panel4" style="padding:2px; display: none;"> | |
<button id="exportsel" onclick="exportsel()">JSON Export Selected</button> | |
</div> | |
</div> | |
</div> | |
<div style="display:flex;height:calc(100vh - 150px); font-size:20px; margin-top:2px"> | |
<div style="height:100%; width:220px; padding:2px; border-right:1px solid #ccc;"> | |
<div><span style="font-size: 13px;"> [ <span id="selcounter">0 Selected</span> / <span id="rescounter">0 Results</span> ] </span></div> | |
<div id="toc" style="height:calc(100% - 60px); padding:4px; overflow:auto; width:100%;">No Items</div> | |
<button id="nextitm" style="width: 100%; padding:2px" onclick="nextitm()">Next Item</button> | |
</div> | |
<div id="contents" style="height:100%; overflow:auto; width:calc(100% - 220px); padding:8px;">No File Opened</div> | |
</div> | |
</body> | |
</html> | |