Spaces:
Running
Running
| <html lang="en"> | |
| <!-- | |
| This is a JSON Dataset Explorer (Viewer and Editor) made by Concedo/LostRuins | |
| Please go to https://github.com/LostRuins/DatasetExplorer for updates | |
| This software is under the AGPL v3.0 License unless otherwise exempted. Please do not remove this line. | |
| -Concedo | |
| --> | |
| <head> | |
| <style> | |
| html { | |
| font-family: sans-serif; | |
| -ms-text-size-adjust: 100%; | |
| -webkit-text-size-adjust: 100%; | |
| font-size: 10px; | |
| -webkit-tap-highlight-color: rgba(0, 0, 0, 0); | |
| } | |
| * { | |
| -webkit-box-sizing: border-box; | |
| -moz-box-sizing: border-box; | |
| box-sizing: border-box | |
| } | |
| :after, | |
| :before { | |
| -webkit-box-sizing: border-box; | |
| -moz-box-sizing: border-box; | |
| box-sizing: border-box | |
| } | |
| .cb | |
| { | |
| width:18px; | |
| height:18px; | |
| } | |
| body { | |
| margin: 0; | |
| font-family: "Helvetica Neue", Helvetica, Arial, sans-serif; | |
| font-size: 15px; | |
| line-height: 1.42857143; | |
| color: #333; | |
| background-color: #fff; | |
| } | |
| button, | |
| input, | |
| select, | |
| textarea { | |
| font-family: inherit; | |
| font-size: inherit; | |
| line-height: inherit; | |
| } | |
| a { | |
| color: #337ab7; | |
| text-decoration: none; | |
| } | |
| a:focus, | |
| a:hover { | |
| color: #23527c; | |
| text-decoration: underline; | |
| } | |
| a:focus { | |
| outline: 5px auto -webkit-focus-ring-color; | |
| outline-offset: -2px; | |
| } | |
| img { | |
| vertical-align: middle; | |
| } | |
| .nest1 | |
| { | |
| color:rgb(89, 58, 202); | |
| } | |
| .nest2 | |
| { | |
| color:rgb(197, 69, 69); | |
| } | |
| .unselectable { | |
| -webkit-touch-callout: none ; | |
| -webkit-user-select: none ; | |
| -khtml-user-select: none ; | |
| -moz-user-select: none ; | |
| -ms-user-select: none ; | |
| user-select: none ; | |
| } | |
| .navbar { | |
| background-color: #333; | |
| overflow: hidden; | |
| position: relative; | |
| top: 0; | |
| width: 100%; | |
| } | |
| /* Links inside the navbar */ | |
| .navbar a { | |
| float: left; | |
| display: block; | |
| color: white; | |
| text-align: center; | |
| padding: 14px 20px; | |
| text-decoration: none; | |
| } | |
| /* Change color on hover */ | |
| .navbar a:hover { | |
| background-color: #ddd; | |
| color: black; | |
| } | |
| </style> | |
| <title>Concedo JSON Dataset Explorer</title> | |
| <script> | |
| var db = []; | |
| var lastleftitem = null; | |
| var lastselectedidx = 0; | |
| var currdbidxs = []; | |
| var selecteddb = {}; | |
| function formatHtml(unsafe) | |
| { | |
| if(!unsafe){return "";} | |
| return unsafe.toString() | |
| .replace(/&/g, "&") | |
| .replace(/</g, "<") | |
| .replace(/>/g, ">") | |
| .replace(/"/g, """) | |
| .replace(/'/g, "'") | |
| .replace(/\n/g, '<br>') | |
| .replace(/\t/g, ' '); | |
| } | |
| function escapeRegExp(string) { | |
| return string.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); | |
| } | |
| function isArr(obj) { | |
| // checks for null and undefined | |
| if (obj == null) { | |
| return false; | |
| } | |
| return (obj.constructor === Array) | |
| } | |
| function nextitm() | |
| { | |
| if(lastleftitem!=null) | |
| { | |
| let sib = lastleftitem.nextSibling; | |
| if(sib) | |
| { | |
| clickItem(sib); | |
| } | |
| } | |
| } | |
| function clickItem(titleDiv) | |
| { | |
| let index = titleDiv.getAttribute('data-index'); | |
| const contentsDiv = document.getElementById('contents'); | |
| lastselectedidx = index; | |
| contentsDiv.innerHTML = renderPage(db[index],true).text; | |
| contentsDiv.scrollTop = 0; | |
| if (lastleftitem) { | |
| lastleftitem.style.backgroundColor = null; | |
| } | |
| lastleftitem = titleDiv; | |
| titleDiv.style.backgroundColor = '#d3d3d3'; | |
| } | |
| function countsel() | |
| { | |
| let count = 0; | |
| for (k in selecteddb) if (selecteddb[k]) ++count; | |
| document.getElementById("selcounter").innerText = `${count} Selected`; | |
| } | |
| function selitm(idx) | |
| { | |
| let box = document.getElementById(`itm${idx}`); | |
| if(box) | |
| { | |
| selecteddb[idx] = box.checked; | |
| } | |
| } | |
| function selrange(isSel) | |
| { | |
| let selstart = 0; | |
| let selend = currdbidxs.length; | |
| if(document.getElementById("selectamt").value!="") | |
| { | |
| let sv = document.getElementById("selectamt").value; | |
| if(sv.includes("-")) | |
| { | |
| let svs = sv.split("-"); | |
| if(svs.length==2 && svs[0]!="" && svs[1]!="") | |
| { | |
| selstart = Math.min(svs[0],selend); | |
| selend = Math.min(svs[1],selend); | |
| } | |
| } | |
| else | |
| { | |
| selstart = 0; | |
| selend = Math.min(selend,sv); | |
| } | |
| } | |
| for(let i=selstart;i<selend;++i) | |
| { | |
| let box = document.getElementById(`itm${currdbidxs[i]}`); | |
| if(box) | |
| { | |
| box.checked = isSel; | |
| selitm(currdbidxs[i]); | |
| } | |
| } | |
| countsel(); | |
| } | |
| function invertsel() | |
| { | |
| for(let i=0;i<currdbidxs.length;++i) | |
| { | |
| let box = document.getElementById(`itm${currdbidxs[i]}`); | |
| if(box) | |
| { | |
| box.checked = !box.checked; | |
| selitm(currdbidxs[i]); | |
| } | |
| } | |
| countsel(); | |
| } | |
| function ngramParser(text, n) { | |
| const words = text.split(/\s+/).filter(word => word.length > 0); | |
| const ngrams = {}; | |
| for (let i = 0; i <= words.length - n; i++) { | |
| const ngram = words.slice(i, i + n).join(' '); | |
| if (ngrams[ngram]) { | |
| ngrams[ngram]++; | |
| } else { | |
| ngrams[ngram] = 1; | |
| } | |
| } | |
| const sortedNgrams = Object.entries(ngrams).sort((a, b) => b[1] - a[1]); | |
| return sortedNgrams.map(entry => ({ ng: entry[0], cnt: entry[1] })); | |
| } | |
| function displayngram() | |
| { | |
| const contentsDiv = document.getElementById('contents'); | |
| let fulltxt = ""; | |
| let parts = []; | |
| for (k in selecteddb) { | |
| if (selecteddb[k]) { | |
| parts.push(renderPage(db[k],false).text); | |
| } | |
| } | |
| fulltxt = parts.join('\n\n'); | |
| if(fulltxt=="") | |
| { | |
| contentsDiv.innerText = "Error: You must select at least 1 item to calculate N-Grams!"; | |
| contentsDiv.scrollTop = 0; | |
| return; | |
| } | |
| let pageText = ""; | |
| for(let i=1;i<=5;++i) | |
| { | |
| let res = ngramParser(fulltxt,i); | |
| let lim = Math.min(res.length,100); | |
| pageText += `[ ${i} Word Sequences ]\n`; | |
| for(let j=0;j<lim;++j) | |
| { | |
| pageText += `${res[j].cnt} - ${res[j].ng}\n`; | |
| } | |
| pageText += `\n\n`; | |
| } | |
| contentsDiv.innerText = pageText; | |
| contentsDiv.scrollTop = 0; | |
| } | |
| function endEditElem() | |
| { | |
| const contentsDiv = document.getElementById('contents'); | |
| if(lastselectedidx >= db.length) | |
| { | |
| return; | |
| } | |
| let ebox = document.getElementById('editbox'); | |
| if(ebox && ebox.value) | |
| { | |
| try { | |
| var newitem = JSON.parse(ebox.value); | |
| db[lastselectedidx] = newitem; | |
| contentsDiv.innerHTML = renderPage(db[lastselectedidx],true).text; | |
| contentsDiv.scrollTop = 0; | |
| } catch (e) { | |
| console.log("Error decoding text: " + e); | |
| } | |
| } | |
| } | |
| function editElem() | |
| { | |
| if(lastselectedidx >= db.length) | |
| { | |
| return; | |
| } | |
| let itm = db[lastselectedidx]; | |
| let itmstr = JSON.stringify(itm,null,2); | |
| const contentsDiv = document.getElementById('contents'); | |
| contentsDiv.innerHTML = `<button class="unselectable" onclick="return endEditElem()" style="float:right;">Save Edits</button><textarea style="line-height:1.1;width:100%;height:calc(100% - 54px);resize: none;" id="editbox" placeholder="" rows="15"></textarea>`; | |
| document.getElementById('editbox').value = itmstr; | |
| contentsDiv.scrollTop = 0; | |
| } | |
| function renderPage(item, useHtml) { | |
| let tot = ""; | |
| let nodecount = 0; | |
| let wordcount = 0; | |
| if(useHtml) | |
| { | |
| tot += `<button class="unselectable" onclick="return editElem()" style="float:right;">Edit Item</button>`; | |
| } | |
| for (let key in item) { | |
| if (item.hasOwnProperty(key)) { | |
| let val = item[key]; | |
| let valmod = false; | |
| if (isArr(val)) { | |
| let v2 = ""; | |
| for (let key2 in val) { | |
| let arritem = val[key2]; | |
| if (arritem.constructor == Object) { //dict test | |
| for (let key3 in arritem) { | |
| if (arritem.hasOwnProperty(key3)) { | |
| let val3 = arritem[key3]; | |
| nodecount += 1; | |
| if (useHtml) { | |
| v2 += `<span class='nest2'>[[${formatHtml(key3)}]]</span><br>${formatHtml(val3)}<br><br>`; | |
| } else { | |
| v2 += `\n\n${val3}`; | |
| } | |
| } | |
| } | |
| } | |
| else if(arritem.constructor == String) | |
| { | |
| if (useHtml) { | |
| v2 += `<br>${formatHtml(arritem)}`; | |
| } else { | |
| v2 += `\n${arritem}`; | |
| } | |
| } | |
| } | |
| valmod = true; | |
| val = v2; | |
| } | |
| else if (val && val.constructor == Object) { | |
| let v2 = ""; | |
| for (let key3 in val) { | |
| if (val.hasOwnProperty(key3)) { | |
| let val3 = val[key3]; | |
| nodecount += 1; | |
| if (useHtml) { | |
| v2 += `<span class='nest2'>[[${formatHtml(key3)}]]</span><br>${formatHtml(val3)}<br><br>`; | |
| } else { | |
| v2 += `\n\n${val3}`; | |
| } | |
| } | |
| } | |
| valmod = true; | |
| val = v2; | |
| } | |
| nodecount += 1; | |
| if (useHtml) { | |
| tot += `<span class='nest1'>[${formatHtml(key)}]</span><br>${valmod ? val : formatHtml(val)}<br><br>`; | |
| } else { | |
| tot += `\n\n${val}`; | |
| } | |
| } | |
| } | |
| if(document.getElementById('minwords').value != "" || document.getElementById('maxwords').value != "" || document.getElementById('showdetail').value=="words") | |
| { | |
| wordcount = countWords(tot); | |
| } | |
| return {"text":tot,"nodes":nodecount,"words":wordcount}; | |
| } | |
| function clearInputs() | |
| { | |
| document.getElementById('minlen').value = ""; | |
| document.getElementById('maxlen').value = ""; | |
| document.getElementById('minnodes').value = ""; | |
| document.getElementById('maxnodes').value = ""; | |
| document.getElementById('minwords').value = ""; | |
| document.getElementById('maxwords').value = ""; | |
| document.getElementById('minmatches').value = ""; | |
| document.getElementById('maxmatches').value = ""; | |
| document.getElementById('search').value = ""; | |
| document.getElementById('casesensitive').checked = false; | |
| document.getElementById('useregex').checked = false; | |
| document.getElementById('selectamt').value = ""; | |
| } | |
| function displayTitles() { | |
| const tocDiv = document.getElementById('toc'); | |
| const contentsDiv = document.getElementById('contents'); | |
| const minlen = document.getElementById('minlen').value; | |
| const maxlen = document.getElementById('maxlen').value; | |
| const minnodes = document.getElementById('minnodes').value; | |
| const maxnodes = document.getElementById('maxnodes').value; | |
| const minwords = document.getElementById('minwords').value; | |
| const maxwords = document.getElementById('maxwords').value; | |
| const minmatches = document.getElementById('minmatches').value; | |
| const maxmatches = document.getElementById('maxmatches').value; | |
| let search = document.getElementById('search').value; | |
| contentsDiv.innerText = ""; | |
| tocDiv.innerText = ''; // Clear previous titles | |
| let ascindex = 0; | |
| currdbidxs = []; | |
| if (search != "") { | |
| if (!document.getElementById('useregex').checked) { | |
| search = escapeRegExp(search); | |
| } | |
| } | |
| db.forEach((item, index) => { | |
| const titleDiv = document.createElement('div'); | |
| let rendered = renderPage(item,false); | |
| let len = rendered.text.length; | |
| let matchescount = 0; | |
| if (minlen != "") { | |
| if (len < minlen) { | |
| return; | |
| } | |
| } | |
| if (maxlen != "") { | |
| if (len > maxlen) { | |
| return; | |
| } | |
| } | |
| if (minnodes != "") { | |
| if (rendered.nodes < minnodes) { | |
| return; | |
| } | |
| } | |
| if (maxnodes != "") { | |
| if (rendered.nodes > maxnodes) { | |
| return; | |
| } | |
| } | |
| if (minwords != "") { | |
| if (rendered.words < minwords) { | |
| return; | |
| } | |
| } | |
| if (maxwords != "") { | |
| if (rendered.words > maxwords) { | |
| return; | |
| } | |
| } | |
| if (search != "") { | |
| var exp = new RegExp(search, `g${document.getElementById('casesensitive').checked?"":"i"}`); | |
| let m = rendered.text.match(exp); | |
| if(!m) | |
| { | |
| return; | |
| } | |
| matchescount = m.length; | |
| } | |
| if (search != "" && minmatches != "") { | |
| if (matchescount < minmatches) { | |
| return; | |
| } | |
| } | |
| if (search != "" && maxmatches != "") { | |
| if (matchescount > maxmatches) { | |
| return; | |
| } | |
| } | |
| let shownvalue = ""; | |
| switch(document.getElementById('showdetail').value) | |
| { | |
| case "chars": | |
| shownvalue = len; | |
| break; | |
| case "words": | |
| shownvalue = rendered.words; | |
| break; | |
| case "turns": | |
| shownvalue = rendered.nodes; | |
| break; | |
| case "matches": | |
| shownvalue = matchescount; | |
| break; | |
| } | |
| titleDiv.innerHTML = `<input class="cb" type="checkbox" id="itm${index}" onclick="selitm(${index});countsel();" ${selecteddb[index]?"checked":""}> ${index} - ${shownvalue}`; | |
| titleDiv.style.cursor = 'pointer'; | |
| titleDiv.setAttribute('data-value',shownvalue); | |
| titleDiv.setAttribute('data-index',index); | |
| titleDiv.addEventListener('click', function () { | |
| clickItem(titleDiv); | |
| }); | |
| tocDiv.appendChild(titleDiv); | |
| currdbidxs.push(index); | |
| ascindex += 1; | |
| }); | |
| let sorter = document.getElementById('sortorder').value; | |
| if(sorter=="1" || sorter=="2") | |
| { | |
| let asc = (sorter=="2"); | |
| const childDivs = Array.from(tocDiv.children); | |
| if(asc) | |
| { | |
| childDivs.sort((a, b) => { | |
| return parseInt(a.getAttribute('data-value')) - parseInt(b.getAttribute('data-value')); | |
| }); | |
| } else { | |
| childDivs.sort((b, a) => { | |
| return parseInt(a.getAttribute('data-value')) - parseInt(b.getAttribute('data-value')); | |
| }); | |
| } | |
| currdbidxs = []; | |
| childDivs.forEach(child => { | |
| tocDiv.appendChild(child); | |
| currdbidxs.push(child.getAttribute('data-index')); | |
| }); | |
| } | |
| document.getElementById('rescounter').innerText = `${ascindex} Results`; | |
| } | |
| var tempfileurl = null; | |
| var tempfileobj = []; | |
| function erasesel() | |
| { | |
| const contentsDiv = document.getElementById('contents'); | |
| tempfileobj = []; | |
| for(let i=0;i<currdbidxs.length;++i) | |
| { | |
| let ci = currdbidxs[i]; | |
| let box = document.getElementById(`itm${ci}`); | |
| if(box && box.checked) | |
| { | |
| tempfileobj.push(db[ci]); | |
| } | |
| } | |
| if(tempfileobj.length==0) | |
| { | |
| contentsDiv.innerText = "Error: You must select at least 1 item to erase!"; | |
| contentsDiv.scrollTop = 0; | |
| return; | |
| } | |
| db = db.filter( ( el ) => !tempfileobj.includes( el ) ); | |
| selecteddb = {}; | |
| countsel(); | |
| displayTitles(); | |
| } | |
| function prunesel() | |
| { | |
| const contentsDiv = document.getElementById('contents'); | |
| tempfileobj = []; | |
| for(let i=0;i<currdbidxs.length;++i) | |
| { | |
| let ci = currdbidxs[i]; | |
| let box = document.getElementById(`itm${ci}`); | |
| if(box && box.checked) | |
| { | |
| tempfileobj.push(db[ci]); | |
| } | |
| } | |
| if(tempfileobj.length==0) | |
| { | |
| contentsDiv.innerText = "Error: You must select at least 1 item to keep!"; | |
| contentsDiv.scrollTop = 0; | |
| return; | |
| } | |
| db = tempfileobj; | |
| selecteddb = {}; | |
| countsel(); | |
| displayTitles(); | |
| } | |
| function exportsel() | |
| { | |
| const contentsDiv = document.getElementById('contents'); | |
| tempfileobj = []; | |
| for(let i=0;i<currdbidxs.length;++i) | |
| { | |
| let ci = currdbidxs[i]; | |
| let box = document.getElementById(`itm${ci}`); | |
| if(box && box.checked) | |
| { | |
| tempfileobj.push(db[ci]); | |
| } | |
| } | |
| if(tempfileobj.length==0) | |
| { | |
| contentsDiv.innerText = "Error: Please select at least 1 sample to export!"; | |
| return; | |
| } | |
| var a = document.getElementById("tempfile"); | |
| var file = null; | |
| try { | |
| file = new Blob([JSON.stringify(tempfileobj,null,2)], { type: 'application/json' }); | |
| if (tempfileurl) { | |
| window.URL.revokeObjectURL(tempfileurl); | |
| } | |
| tempfileurl = window.URL.createObjectURL(file); | |
| a.href = tempfileurl; | |
| a.target = '_blank'; | |
| a.download = "selected_dataset.json"; | |
| setTimeout(function(){a.click()},20); | |
| } catch (e) { | |
| contentsDiv.innerText = "File could not be saved. It might be too large.\nTry splitting into multiple files instead.\n\n"+e; | |
| console.log("Error saving: " + e); | |
| } | |
| } | |
| let lastfile = null; | |
| function loadfile() | |
| { | |
| if(!lastfile) | |
| { | |
| return; | |
| } | |
| const replace_existing = (document.getElementById('appendfile').value=="0"); | |
| const merge_existing = (document.getElementById('appendfile').value=="1"); | |
| const subtract_existing = (document.getElementById('appendfile').value=="2"); | |
| const contentsDiv = document.getElementById('contents'); | |
| const dedup = (document.getElementById('dedup').value=="1"); | |
| let newdb = []; | |
| var filesizemb = lastfile.size/(1024*1024); | |
| function postLoad() | |
| { | |
| if(replace_existing) | |
| { | |
| db = newdb; | |
| }else if(merge_existing){ | |
| for(let s in newdb) | |
| { | |
| db.push(newdb[s]); | |
| } | |
| } | |
| else | |
| { | |
| const set = new Set(db.map(item => JSON.stringify(item))); | |
| for(let s in newdb) | |
| { | |
| let newitm = JSON.stringify(newdb[s]); | |
| if(set.has(newitm)) | |
| { | |
| set.delete(newitm); | |
| } | |
| } | |
| db = Array.from(set).map(item => JSON.parse(item)); | |
| } | |
| if(dedup && !subtract_existing) | |
| { | |
| const set = new Set(db.map(item => JSON.stringify(item))); | |
| db = Array.from(set).map(item => JSON.parse(item)); | |
| } | |
| selecteddb = {}; | |
| countsel(); | |
| displayTitles(); | |
| contentsDiv.innerText = "Dataset File Loaded\nSelect an item to browse contents."; | |
| } | |
| async function streamJsonFile(file) { | |
| const CHUNK_SIZE = 32 * 1024 * 1024; // 32 MB chunks | |
| const decoder = new TextDecoder('utf-8'); | |
| const reader = file.stream().getReader(); | |
| const elemrange = (document.getElementById('elemrange').value); | |
| let elemstart = 0; | |
| let elemend = 999999999; | |
| let elemcounter = 0; | |
| if(elemrange!="") | |
| { | |
| if(elemrange.includes("-")) | |
| { | |
| let svs = elemrange.split("-"); | |
| if(svs.length==2 && svs[0]!="" && svs[1]!="") | |
| { | |
| elemstart = Math.min(svs[0],elemend); | |
| elemend = Math.min(svs[1],elemend); | |
| } | |
| } | |
| else | |
| { | |
| elemstart = 0; | |
| elemend = Math.min(elemend,elemrange); | |
| } | |
| } | |
| let buffer = ''; | |
| let accusize = 0; | |
| let accu = []; | |
| newdb = []; | |
| let lastloop = false; | |
| let errs = null; | |
| while (true) { | |
| const { done, value } = await reader.read(); | |
| // Decode the chunk to a string | |
| const chunk = decoder.decode(value, { stream: true }); | |
| accu.push(chunk); | |
| if (done) { | |
| lastloop = true; | |
| } | |
| accusize += chunk.length; | |
| if(accusize<CHUNK_SIZE && !lastloop) | |
| { | |
| continue; | |
| } | |
| buffer += accu.join(""); | |
| accu = []; | |
| accusize = 0; | |
| // Process each character in the buffer | |
| let objstart = 0; | |
| let objend = 0; | |
| let bl = buffer.length; | |
| let inObject = false; | |
| let inString = false; | |
| let escapeNextChar = false; | |
| let curlyBraceCount = 0; | |
| for (let i = 0; i < bl; i++) { | |
| const char = buffer[i]; | |
| if (char === '\\' && inString) { | |
| escapeNextChar = !escapeNextChar; | |
| continue; | |
| } | |
| if (char === '"' && !escapeNextChar) { | |
| inString = !inString; | |
| } | |
| escapeNextChar = false; | |
| if (inString) { | |
| continue; | |
| } | |
| if (char === '{') { | |
| if (!inObject) { | |
| inObject = true; | |
| objstart = i; | |
| } | |
| curlyBraceCount++; | |
| } else if (char === '}') { | |
| curlyBraceCount--; | |
| } | |
| // When curlyBraceCount returns to 0, we have a complete object | |
| if (inObject && curlyBraceCount === 0) { | |
| let jsonstr = buffer.slice(objstart, i + 1); | |
| objend = i; | |
| inObject = false; | |
| try{ | |
| const jsonObject = JSON.parse(jsonstr); | |
| if(elemcounter>=elemstart && elemcounter<elemend) | |
| { | |
| newdb.push(jsonObject); | |
| } | |
| ++elemcounter; | |
| }catch(ex) | |
| { | |
| errs = ex; | |
| console.log(ex); | |
| } | |
| } | |
| } | |
| if(objstart>0 && objend>0) | |
| { | |
| inObject = false; | |
| buffer = buffer.slice(objend+1); // Remove the processed object from the buffer | |
| if(newdb.length>=(elemend-elemstart)) | |
| { | |
| errs = `Loading stopped at ${newdb.length} items. Specified range exceeded.` | |
| lastloop = true; | |
| } | |
| } | |
| if(lastloop) | |
| { | |
| break; | |
| } | |
| } | |
| if(newdb.length>0) | |
| { | |
| postLoad(); | |
| if(errs) | |
| { | |
| contentsDiv.innerText = `Dataset partially loaded.\nInterruption occurred while loading dataset file: ${lastfile.name}\n${errs}`; | |
| } | |
| }else{ | |
| contentsDiv.innerText = `Error: Could not load dataset file: ${lastfile.name}`; | |
| } | |
| } | |
| const reader = new FileReader(); | |
| reader.onload = function (e) { | |
| let loadok = false; | |
| try | |
| { | |
| //try normal json | |
| newdb = JSON.parse(e.target.result); | |
| loadok = true; | |
| } | |
| catch(err) | |
| { | |
| console.log(err); | |
| //try jsonl | |
| try | |
| { | |
| const content = e.target.result; | |
| if(content) | |
| { | |
| const lines = content.split('\n'); | |
| newdb = lines.filter(line => line.trim()).map(line => JSON.parse(line)); | |
| loadok = true; | |
| } | |
| } | |
| catch(err2) | |
| { | |
| console.log(err2); | |
| } | |
| } | |
| if(loadok) | |
| { | |
| const elemrange = (document.getElementById('elemrange').value); | |
| let elemstart = 0; | |
| let elemend = 999999999; | |
| let elemcounter = 0; | |
| if(elemrange!="") | |
| { | |
| if(elemrange.includes("-")) | |
| { | |
| let svs = elemrange.split("-"); | |
| if(svs.length==2 && svs[0]!="" && svs[1]!="") | |
| { | |
| elemstart = Math.min(svs[0],elemend); | |
| elemend = Math.min(svs[1],elemend); | |
| } | |
| } | |
| else | |
| { | |
| elemstart = 0; | |
| elemend = Math.min(elemend,elemrange); | |
| } | |
| } | |
| newdb = newdb.slice(elemstart, elemend); | |
| postLoad(); | |
| } | |
| else | |
| { | |
| if(filesizemb > 0.01 && (lastfile.type === "application/json" || lastfile.name.endsWith(".jsonl"))) | |
| { | |
| //try big json | |
| streamJsonFile(lastfile); | |
| contentsDiv.innerText = `Attempting Fallback Stream-Load of Massive File:\n${lastfile.name} (${filesizemb.toFixed(0)} mb).\nPlease wait, this may take a long time...`; | |
| }else{ | |
| contentsDiv.innerText = `Error: Could not load dataset file: ${lastfile.name}`; | |
| } | |
| contentsDiv.scrollTop = 0; | |
| } | |
| }; | |
| if (filesizemb > 999) //files larger than 1gb just use streamload | |
| { | |
| //try big json | |
| streamJsonFile(lastfile); | |
| contentsDiv.innerText = `Attempting Stream-Load of Massive File:\n${lastfile.name} (${filesizemb.toFixed(0)} mb)\nPlease wait, this may take a long time...`; | |
| } else { | |
| reader.readAsText(lastfile); | |
| } | |
| } | |
| function clearfile() | |
| { | |
| lastfile = null; | |
| document.getElementById('pickfile').value = ""; | |
| } | |
| function countWords(str) { | |
| if (str == "") { return 0; } | |
| const wordPattern = /[a-zA-Z0-9_]+/g; | |
| const words = str.match(wordPattern); | |
| if (!words) { | |
| return 0; | |
| } | |
| return words.length; | |
| } | |
| function countOccurances(str,search) | |
| { | |
| var exp = new RegExp(search, `g${document.getElementById('casesensitive').checked?"":"i"}`); | |
| var count = (temp.match(exp) || []).length; | |
| return count; | |
| } | |
| function togglepanel(id) | |
| { | |
| const p1 = document.getElementById('panel1'); | |
| const p2 = document.getElementById('panel2'); | |
| const p3 = document.getElementById('panel3'); | |
| const p4 = document.getElementById('panel4'); | |
| p1.style.display = p2.style.display = p3.style.display = p4.style.display = "none"; | |
| document.getElementById(`panel${id}`).style.display = "block"; | |
| } | |
| document.addEventListener("DOMContentLoaded", function () { | |
| const fileInput = document.getElementById('pickfile'); | |
| fileInput.addEventListener('change', function (event) { | |
| lastfile = event.target.files[0]; | |
| loadfile(); | |
| }); | |
| }); | |
| </script> | |
| </head> | |
| <body> | |
| <div class="navbar"> | |
| <a href="#" onclick="togglepanel(1)">Importing</a> | |
| <a href="#" onclick="togglepanel(2)">Filtering</a> | |
| <a href="#" onclick="togglepanel(3)">Selecting</a> | |
| <a href="#" onclick="togglepanel(4)">Exporting</a> | |
| <a style="float: right;" href="https://github.com/LostRuins/DatasetExplorer">Concedo JSON Dataset Explorer</a> | |
| </div> | |
| <div style="display:flex;"> | |
| <div style="padding:4px"> | |
| <div id="panel1" style="padding:2px; display: block;"> | |
| <input onclick="clearfile()" style="width: 220px;" id="pickfile" name="file" type="file" accept=".json,.jsonl"> | |
| <a id="tempfile" href="#" style="display:none;"></a> | |
| <select id="appendfile"> | |
| <option value="0">Replace Existing</option> | |
| <option value="1">Merge with Existing</option> | |
| <option value="2">Subtract from Existing</option> | |
| </select> | |
| <select id="dedup"> | |
| <option value="0">Allow Duplicates</option> | |
| <option value="1">Remove Duplicates</option> | |
| </select> | |
| LoadRange: | |
| <input type="text" style="width:84px" placeholder="(Range: All)" value="" id="elemrange"> | |
| </div> | |
| <div id="panel2" style="display: none;"> | |
| <div style="padding:1px"> | |
| Chars: <input type="text" style="width:50px" placeholder="(Min)" value="" id="minlen"> | |
| <input type="text" style="width:50px" placeholder="(Max)" value="" id="maxlen"> | |
| Turns: <input type="text" style="width:50px" placeholder="(Min)" value="" id="minnodes"> | |
| <input type="text" style="width:50px" placeholder="(Max)" value="" id="maxnodes"> | |
| Words: <input type="text" style="width:50px" placeholder="(Min)" value="" id="minwords"> | |
| <input type="text" style="width:50px" placeholder="(Max)" value="" id="maxwords"> | |
| Matches: <input type="text" style="width:50px" placeholder="(Min)" value="" id="minmatches"> | |
| <input type="text" style="width:50px" placeholder="(Max)" value="" id="maxmatches"> | |
| </div> | |
| <div style="padding:2px"> | |
| <input type="text" style="width:200px" placeholder="(Search)" value="" id="search"> | |
| <input type="checkbox" id="useregex"> | |
| <label for="useregex"> Uses Regex</label> | |
| <input type="checkbox" id="casesensitive"> | |
| <label for="casesensitive"> Case Sensitive</label> | |
| </div> | |
| <div style="font-size: 15px;"> | |
| <select id="showdetail"> | |
| <option value="chars">Count Characters</option> | |
| <option value="words">Count Words</option> | |
| <option value="turns">Count Turns</option> | |
| <option value="matches">Count Matches</option> | |
| </select> | |
| <select id="sortorder" > | |
| <option value="0">No Sort</option> | |
| <option value="1">Sort Descending</option> | |
| <option value="2">Sort Ascending</option> | |
| </select> | |
| <span style="font-size: 15px;"> | |
| <button id="filter" onclick="displayTitles()">Filter</button> | |
| <button onclick="clearInputs()">Clear Filters</button> | |
| </span> | |
| </div> | |
| </div> | |
| <div id="panel3" style="padding:2px; display: none;"> | |
| <div> | |
| <input type="text" style="width:84px" placeholder="(Range: All)" value="" id="selectamt"> | |
| <button id="selectall" onclick="selrange(true)">Select Range</button> | |
| <button id="deselectall" onclick="selrange(false)">Deselect Range</button> | |
| <button id="invertsel" onclick="invertsel()">Invert Selection</button> | |
| </div><div style="padding: 2px; margin-top: 6px;"> | |
| <button onclick="erasesel()">Erase Selected</button></h3> | |
| <button onclick="prunesel()">Prune To Selected</button></h3> | |
| <button id="ngram" onclick="displayngram()">Calc. N-Gram</button> | |
| </div> | |
| </div> | |
| <div id="panel4" style="padding:2px; display: none;"> | |
| <button id="exportsel" onclick="exportsel()">JSON Export Selected</button> | |
| </div> | |
| </div> | |
| </div> | |
| <div style="display:flex;height:calc(100vh - 150px); font-size:20px; margin-top:2px"> | |
| <div style="height:100%; width:220px; padding:2px; border-right:1px solid #ccc;"> | |
| <div><span style="font-size: 13px;"> [ <span id="selcounter">0 Selected</span> / <span id="rescounter">0 Results</span> ] </span></div> | |
| <div id="toc" style="height:calc(100% - 60px); padding:4px; overflow:auto; width:100%;">No Items</div> | |
| <button id="nextitm" style="width: 100%; padding:2px" onclick="nextitm()">Next Item</button> | |
| </div> | |
| <div id="contents" style="height:100%; overflow:auto; width:calc(100% - 220px); padding:8px;">No File Opened</div> | |
| </div> | |
| </body> | |
| </html> | |