|
<!DOCTYPE html> |
|
<html lang="en"> |
|
<head> |
|
<meta charset="UTF-8" /> |
|
<meta name="viewport" content="width=device-width, initial-scale=1.0" /> |
|
<script> |
|
function strToHtml(str) { |
|
let parser = new DOMParser(); |
|
return parser.parseFromString(str, "text/html"); |
|
} |
|
|
|
|
|
|
|
function tableToObj(table) { |
|
var rows = table.rows; |
|
var propCells = rows[0].cells; |
|
var propNames = []; |
|
var results = []; |
|
var obj, row, cells; |
|
|
|
|
|
|
|
|
|
for (var i = 0, iLen = propCells.length; i < iLen; i++) { |
|
propNames.push( |
|
(propCells[i].textContent || propCells[i].innerText).trim() |
|
); |
|
} |
|
|
|
|
|
|
|
|
|
for (var j = 1, jLen = rows.length; j < jLen; j++) { |
|
cells = rows[j].cells; |
|
obj = {}; |
|
|
|
for (var k = 0; k < iLen; k++) { |
|
obj[propNames[k]] = ( |
|
cells[k].textContent || cells[k].innerText |
|
).trim(); |
|
} |
|
results.push(obj); |
|
} |
|
return results; |
|
} |
|
|
|
function formatGpu(gpus) { |
|
return gpus.map( |
|
(g) => `${g["Product Name"]} - ${g["Memory"].split(",")[0]}` |
|
); |
|
} |
|
|
|
const gguf_quants = { |
|
"IQ1_S": 1.56, |
|
"IQ1_M": 1.75, |
|
"IQ2_XXS": 2.06, |
|
"IQ2_XS": 2.31, |
|
"IQ2_S": 2.5, |
|
"IQ3_XXS": 3.06, |
|
"IQ3_XS": 3.3, |
|
"IQ3_S": 3.44, |
|
"IQ3_M": 3.66, |
|
"Q2_K": 3.35, |
|
"Q3_K_S": 3.5, |
|
"Q3_K_M": 3.91, |
|
"Q3_K_L": 4.27, |
|
"IQ4_XS": 4.25, |
|
"Q4_0": 4.55, |
|
"Q4_K_S": 4.58, |
|
"Q4_K_M": 4.85, |
|
"Q5_0": 5.54, |
|
"Q5_K_S": 5.54, |
|
"Q5_K_M": 5.69, |
|
"Q6_K": 6.59, |
|
"Q8_0": 8.5, |
|
} |
|
|
|
async function modelConfig(hf_model) { |
|
let config = await fetch( |
|
`https://huggingface.co/${hf_model}/raw/main/config.json` |
|
).then(r => r.json()) |
|
let model_size = 0 |
|
try { |
|
model_size = (await fetch(`https://huggingface.co/${hf_model}/resolve/main/model.safetensors.index.json`).then(r => r.json()))["metadata"]["total_size"] / 2 |
|
if (isNaN(model_size)) { |
|
throw new Erorr("no size in safetensors metadata") |
|
} |
|
} catch (e) { |
|
try { |
|
model_size = (await fetch(`https://huggingface.co/${hf_model}/resolve/main/pytorch_model.bin.index.json`).then(r => r.json()))["metadata"]["total_size"] / 2 |
|
if (isNaN(model_size)) { |
|
throw new Erorr("no size in pytorch metadata") |
|
} |
|
} catch { |
|
let model_page = await fetch( |
|
"https://corsproxy.io/?" + encodeURIComponent(`https://huggingface.co/${hf_model}`) |
|
).then(r => r.text()) |
|
let el = document.createElement( 'html' ); |
|
el.innerHTML = model_page |
|
let params_el = el.querySelector('div[data-target="ModelSafetensorsParams"]') |
|
if (params_el !== null) { |
|
model_size = JSON.parse(params_el.attributes.getNamedItem("data-props").value)["safetensors"]["total"] |
|
} else { |
|
params_el = el.querySelector('div[data-target="ModelHeader"]') |
|
model_size = JSON.parse(params_el.attributes.getNamedItem("data-props").value)["model"]["safetensors"]["total"] |
|
} |
|
} |
|
} |
|
config.parameters = model_size |
|
return config |
|
} |
|
|
|
function inputBuffer(context=8192, model_config, bsz=512) { |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
const inp_tokens = bsz |
|
const inp_embd = model_config["hidden_size"] * bsz |
|
const inp_pos = bsz |
|
const inp_KQ_mask = context * bsz |
|
const inp_K_shift = context |
|
const inp_sum = bsz |
|
|
|
return inp_tokens + inp_embd + inp_pos + inp_KQ_mask + inp_K_shift + inp_sum |
|
} |
|
|
|
function computeBuffer(context=8192, model_config, bsz=512) { |
|
if (bsz != 512) { |
|
alert("batch size other than 512 is currently not supported for the compute buffer, using batchsize 512 for compute buffer calculation, end result result will be an overestimatition") |
|
} |
|
return (context / 1024 * 2 + 0.75) * model_config["num_attention_heads"] * 1024 * 1024 |
|
} |
|
|
|
function kvCache(context=8192, model_config, cache_bit=16) { |
|
const n_gqa = model_config["num_attention_heads"] / model_config["num_key_value_heads"] |
|
const n_embd_gqa = model_config["hidden_size"] / n_gqa |
|
const n_elements = n_embd_gqa * (model_config["num_hidden_layers"] * context) |
|
const size = 2 * n_elements |
|
return size * (cache_bit / 8) |
|
} |
|
|
|
function contextSize(context=8192, model_config, bsz=512, cache_bit=16) { |
|
return Number.parseFloat((inputBuffer(context, model_config, bsz) + kvCache(context, model_config, cache_bit) + computeBuffer(context, model_config, bsz)).toFixed(2)) |
|
} |
|
|
|
function modelSize(model_config, bpw=4.5) { |
|
return Number.parseFloat((model_config["parameters"] * bpw / 8).toFixed(2)) |
|
} |
|
|
|
async function calculateSizes(format, context_loc) { |
|
|
|
format = "gguf" |
|
|
|
try { |
|
const model_config = await modelConfig(document.getElementById("modelsearch").value) |
|
const context = parseInt(document.getElementById("contextsize").value) |
|
let bsz = 512 |
|
let cache_bit = 16 |
|
let bpw = 0 |
|
if (format === "gguf") { |
|
bsz = parseInt(document.getElementById("batchsize").value) |
|
bpw = gguf_quants[document.getElementById("quantsize").innerText] |
|
|
|
} else if (format == "exl2") { |
|
cache_bit = Number.parseInt(document.getElementById("kvCache").value) |
|
bpw = Number.parseFloat(document.getElementById("bpw").value) |
|
} |
|
|
|
const model_size = modelSize(model_config, bpw) |
|
const context_size = contextSize(context, model_config, bsz, cache_bit) |
|
const total_size = ((model_size + context_size) / 2**30) |
|
document.getElementById("resultmodel").innerText = (model_size / 2**30).toFixed(2) |
|
document.getElementById("resultcontext").innerText = (context_size / 2**30).toFixed(2) |
|
const result_total_el = document.getElementById("resulttotal"); |
|
result_total_el.innerText = total_size.toFixed(2) |
|
|
|
const allocated_vram = Number.parseInt(document.getElementById("maxvram").value); |
|
const vram = allocated_vram |
|
if (vram - total_size > 0.5) { |
|
result_total_el.style.backgroundColor = "#bef264" |
|
} else if (vram - total_size > 0) { |
|
result_total_el.style.backgroundColor = "#facc15" |
|
} else { |
|
result_total_el.style.backgroundColor = "#ef4444" |
|
} |
|
|
|
const layer_size = ((model_size / 2**30) / model_config["num_hidden_layers"]) |
|
const layer_size_el = document.getElementById("layersize"); |
|
layer_size_el.innerText = layer_size.toFixed(2) |
|
|
|
const context_dealloc = context_loc === "vram" ? (context_size / 2**30) : 0; |
|
const layers_offload = Math.floor((allocated_vram - context_dealloc) / layer_size) |
|
|
|
const layers_offload_el = document.getElementById("layersoffload"); |
|
layers_offload_el.innerText = `${layers_offload > model_config["num_hidden_layers"] ? model_config["num_hidden_layers"] : Math.max(0, layers_offload)}/${model_config["num_hidden_layers"]}` |
|
|
|
} catch(e) { |
|
alert(e); |
|
} |
|
} |
|
</script> |
|
<link href="./styles.css" rel="stylesheet"> |
|
<title>Can I split it? - GGUF VRAM Calculator</title> |
|
</head> |
|
<body class="p-8"> |
|
<div x-data="{ format: 'gguf', context_loc: 'vram' }" class="flex flex-col max-h-screen items-center mt-16 gap-10"> |
|
<div style="text-align: center;"> |
|
<h1 class="text-xl font-semibold leading-6 text-gray-900"> |
|
GGUF Model, Can I split it? |
|
</h1> |
|
<h3 class="font-semibold leading-6 text-gray-900"> |
|
Based on <a href="https://huggingface.co/NyxKrage" style="color: blue;">NyxKrage</a>'s <a href="https://huggingface.co/spaces/NyxKrage/LLM-Model-VRAM-Calculator" style="color: blue;">LLM VRAM calculator</a> |
|
</h3> |
|
</div> |
|
<div class="flex flex-col gap-10"> |
|
<div class="w-auto flex flex-col gap-4"> |
|
<div class="relative"> |
|
<label |
|
for="maxvram" |
|
class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900" |
|
> |
|
Max Allocated VRAM |
|
</label> |
|
<input |
|
value="24" |
|
type="number" |
|
name="maxvram" |
|
id="maxvram" |
|
step="1" |
|
class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6" |
|
/> |
|
</div> |
|
|
|
|
|
|
|
|
|
<div class="flex flex-row gap-4 relative"> |
|
<label |
|
for="contextsize" |
|
class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900" |
|
> |
|
Model (unquantized) |
|
</label> |
|
<div |
|
class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6" |
|
x-data="{ |
|
open: false, |
|
value: 'Nexusflow/Starling-LM-7B-beta', |
|
results: null, |
|
toggle() { |
|
if (this.open) { |
|
return this.close() |
|
} |
|
|
|
this.$refs.input.focus() |
|
|
|
this.open = true |
|
}, |
|
close(focusAfter) { |
|
if (! this.open) return |
|
|
|
this.open = false |
|
|
|
focusAfter && focusAfter.focus() |
|
} |
|
}" |
|
x-on:keydown.escape.prevent.stop="close($refs.input)" |
|
x-id="['model-typeahead']" |
|
class="relative" |
|
> |
|
|
|
<input |
|
id="modelsearch" |
|
x-ref="input" |
|
x-on:click="toggle()" |
|
@keypress.debounce.150ms="results = (await |
|
fetch('https://huggingface.co/api/quicksearch?type=model&q=' + |
|
encodeURIComponent(value)).then(r => r.json())).models.filter(m => !m.id.includes('GGUF') && !m.id.includes('AWQ') && !m.id.includes('GPTQ') && !m.id.includes('exl2'));" |
|
:aria-expanded="open" |
|
:aria-controls="$id('model-typeahead')" |
|
x-model="value" |
|
class="flex justify-between items-center gap-2 w-full" |
|
/> |
|
|
|
|
|
<div |
|
x-ref="panel" |
|
x-show="open" |
|
x-transition.origin.top.left |
|
x-on:click.outside="close($refs.input)" |
|
:id="$id('model-typeahead')" |
|
style="display: none" |
|
class="absolute left-0 mt-4 w-full rounded-md bg-white shadow-sm ring-1 ring-inset ring-gray-300 z-10" |
|
> |
|
<template x-for="result in results"> |
|
<a |
|
@click="value = result.id; close($refs.input)" |
|
x-text="result.id" |
|
class="flex cursor-pointer items-center gap-2 w-full first-of-type:rounded-t-md last-of-type:rounded-b-md px-4 py-2.5 text-left text-sm hover:bg-gray-500/5 disabled:text-gray-500" |
|
></a> |
|
</template> |
|
</div> |
|
</div> |
|
</div> |
|
|
|
|
|
|
|
<div class="relative"> |
|
<label |
|
for="contextsize" |
|
class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900" |
|
> |
|
Context Size |
|
</label> |
|
<input |
|
value="8192" |
|
type="number" |
|
name="contextsize" |
|
id="contextsize" |
|
step="1024" |
|
class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6" |
|
/> |
|
</div> |
|
|
|
<div class="relative"> |
|
<label |
|
class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900" |
|
>Context offloaded to</label |
|
> |
|
<fieldset |
|
x-model="context_loc" |
|
class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6" |
|
> |
|
<legend class="sr-only">Context location</legend> |
|
<div |
|
class="space-y-4 sm:flex sm:items-center sm:space-x-10 sm:space-y-0" |
|
> |
|
<div class="flex items-center"> |
|
<input |
|
id="context-vram" |
|
name="context-allocation" |
|
type="radio" |
|
value="vram" |
|
checked |
|
class="h-4 w-4 border-gray-300 text-indigo-600 focus:ring-indigo-600" |
|
/> |
|
<label |
|
for="context-vram" |
|
class="ml-3 block text-sm font-medium leading-6 text-gray-900" |
|
>VRAM</label |
|
> |
|
</div> |
|
<div class="flex items-center"> |
|
<input |
|
id="context-ram" |
|
name="context-allocation" |
|
type="radio" |
|
value="ram" |
|
class="h-4 w-4 border-gray-300 text-indigo-600 focus:ring-indigo-600" |
|
/> |
|
<label |
|
for="context-ram" |
|
class="ml-3 block text-sm font-medium leading-6 text-gray-900" |
|
>RAM</label |
|
> |
|
</div> |
|
</div> |
|
</fieldset> |
|
</div> |
|
|
|
|
|
<div x-show="format === 'gguf'" class="relative"> |
|
<div class="flex flex-row gap-4"> |
|
<label |
|
for="contextsize" |
|
class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900" |
|
> |
|
Quantization Size |
|
</label> |
|
<div |
|
class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6" |
|
x-data="{ |
|
open: false, |
|
value: '', |
|
toggle() { |
|
if (this.open) { |
|
return this.close() |
|
} |
|
|
|
this.$refs.button.focus() |
|
|
|
this.open = true |
|
}, |
|
close(focusAfter) { |
|
if (! this.open) return |
|
|
|
this.open = false |
|
|
|
focusAfter && focusAfter.focus() |
|
} |
|
}" |
|
x-on:keydown.escape.prevent.stop="close($refs.button)" |
|
x-id="['dropdown-button']" |
|
class="relative" |
|
> |
|
|
|
<button |
|
x-ref="button" |
|
x-on:click="toggle()" |
|
:aria-expanded="open" |
|
:aria-controls="$id('dropdown-button')" |
|
type="button" |
|
id="quantsize" |
|
x-text="value.length === 0 ? 'Q4_K_S' : value" |
|
class="flex justify-between items-center gap-2 w-full" |
|
> |
|
Q4_K_S |
|
|
|
|
|
<svg |
|
xmlns="http://www.w3.org/2000/svg" |
|
class="h-5 w-5 text-gray-400" |
|
viewBox="0 0 20 20" |
|
fill="currentColor" |
|
> |
|
<path |
|
fill-rule="evenodd" |
|
d="M5.293 7.293a1 1 0 011.414 0L10 10.586l3.293-3.293a1 1 0 111.414 1.414l-4 4a1 1 0 01-1.414 0l-4-4a1 1 0 010-1.414z" |
|
clip-rule="evenodd" |
|
/> |
|
</svg> |
|
</button> |
|
|
|
|
|
<div |
|
x-data="{ quants: [ |
|
'IQ1_S', |
|
'IQ1_M', |
|
'IQ2_XXS', |
|
'IQ2_XS', |
|
'IQ2_S', |
|
'IQ3_XXS', |
|
'IQ3_XS', |
|
'IQ3_S', |
|
'IQ3_M', |
|
'Q2_K', |
|
'Q3_K_S', |
|
'Q3_K_M', |
|
'Q3_K_L', |
|
'IQ4_XS', |
|
'Q4_0', |
|
'Q4_K_S', |
|
'Q4_K_M', |
|
'Q5_0', |
|
'Q5_K_S', |
|
'Q5_K_M', |
|
'Q6_K', |
|
'Q8_0' |
|
]}" |
|
x-ref="panel" |
|
x-show="open" |
|
x-transition.origin.top.left |
|
x-on:click.outside="close($refs.button)" |
|
:id="$id('dropdown-button')" |
|
style="display: none" |
|
class="absolute left-0 mt-4 w-full rounded-md bg-white shadow-sm ring-1 ring-inset ring-gray-300 z-10" |
|
> |
|
<template x-for="quant in quants"> |
|
<a |
|
@click="value = quant; close($refs.button)" |
|
x-text="quant" |
|
class="flex cursor-pointer items-center gap-2 w-full first-of-type:rounded-t-md last-of-type:rounded-b-md px-4 py-2.5 text-left text-sm hover:bg-gray-500/5 disabled:text-gray-500" |
|
></a> |
|
</template> |
|
</div> |
|
</div> |
|
<div class="relative"> |
|
<label |
|
for="batchsize" |
|
class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900" |
|
> |
|
Batch Size |
|
</label> |
|
<input |
|
value="512" |
|
type="number" |
|
step="128" |
|
id="batchsize" |
|
class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6" |
|
/> |
|
</div> |
|
</div> |
|
</div> |
|
<button |
|
type="button" |
|
class="rounded-md bg-slate-800 px-3 py-2 text-sm font-semibold text-white shadow-sm hover:bg-slate-700 focus-visible:outline focus-visible:outline-2 focus-visible:outline-offset-2 focus-visible:outline-indigo-600" |
|
@click="calculateSizes(format, context_loc)" |
|
> |
|
Submit |
|
</button> |
|
</div> |
|
<div class="w-auto flex flex-col gap-4"> |
|
<div class="relative"> |
|
<label |
|
class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900" |
|
> |
|
Model Size (GB) |
|
</label> |
|
<div |
|
id="resultmodel" |
|
class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6" |
|
>4.20</div> |
|
</div> |
|
<div class="relative"> |
|
<label |
|
class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900" |
|
> |
|
Context Size (GB) |
|
</label> |
|
<div |
|
id="resultcontext" |
|
class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6" |
|
>6.90</div> |
|
</div> |
|
<div class="relative"> |
|
<label |
|
class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900" |
|
> |
|
Total Size (GB) |
|
</label> |
|
<div |
|
id="resulttotal" |
|
class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6" |
|
>420.69</div> |
|
</div> |
|
<div class="relative"> |
|
<label |
|
class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900" |
|
> |
|
Layer size (GB) |
|
</label> |
|
<div |
|
id="layersize" |
|
class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6" |
|
>42.69</div> |
|
</div> |
|
<div class="relative"> |
|
<label |
|
class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900" |
|
> |
|
Layers offloaded to GPU (out of total) |
|
</label> |
|
<div |
|
id="layersoffload" |
|
class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6" |
|
>42</div> |
|
</div> |
|
</div> |
|
</div> |
|
</div> |
|
<script |
|
src="https://cdn.jsdelivr.net/npm/[email protected]/dist/cdn.min.js" |
|
></script> |
|
<script defer> |
|
calculateSizes("gguf", "vram") |
|
</script> |
|
</body> |
|
</html> |
|
|