Spaces:
Runtime error
Runtime error
nyxtestacc
commited on
Commit
•
c028b5a
1
Parent(s):
7001010
Add app
Browse files- .gitignore +1 -0
- app.py +101 -0
- context_sizes/Llama2 13B.csv +12 -0
- context_sizes/Llama2 20B.csv +12 -0
- context_sizes/Llama2 70B.csv +12 -0
- context_sizes/Llama2 7B.csv +12 -0
- context_sizes/Mistral 7B.csv +12 -0
- context_sizes/Mixtral 8x7B.csv +12 -0
- context_sizes/Solar 10.7B_11B.csv +12 -0
- context_sizes/Yi 34B.csv +12 -0
- models.csv +9 -0
- quants.csv +13 -0
- requirements.txt +2 -0
.gitignore
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
.venv
|
app.py
ADDED
@@ -0,0 +1,101 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import pandas as pd
|
3 |
+
|
4 |
+
quants = (
|
5 |
+
pd.read_csv("quants.csv")
|
6 |
+
.applymap(str)
|
7 |
+
.groupby("quant")["bpw"]
|
8 |
+
.apply(float)
|
9 |
+
.to_dict()
|
10 |
+
)
|
11 |
+
models = (
|
12 |
+
pd.read_csv("models.csv")
|
13 |
+
.applymap(str)
|
14 |
+
.groupby("model")["params"]
|
15 |
+
.apply(float)
|
16 |
+
.to_dict()
|
17 |
+
)
|
18 |
+
|
19 |
+
|
20 |
+
def context_sizes(model):
|
21 |
+
return pd.read_csv(
|
22 |
+
"context_sizes/" + model.replace("/", "_") + ".csv",
|
23 |
+
header=None,
|
24 |
+
names=["context", "size"],
|
25 |
+
)
|
26 |
+
|
27 |
+
|
28 |
+
def linear_regression(xs, ys) -> tuple[float, float]:
|
29 |
+
sum_y = ys.sum()
|
30 |
+
sum_x = sum(xs)
|
31 |
+
sum_xy = sum([x * y for x, y in zip(xs, ys)])
|
32 |
+
sum_x2 = sum([x**2 for x in xs])
|
33 |
+
n = len(xs)
|
34 |
+
|
35 |
+
a = (sum_y * sum_x2 - sum_x * sum_xy) / (n * sum_x2 - sum_x**2)
|
36 |
+
b = (n * sum_xy - sum_x * sum_y) / (n * sum_x2 - sum_x**2)
|
37 |
+
return a, b
|
38 |
+
|
39 |
+
|
40 |
+
def calc_model_size(parameters: float, quant: float) -> float:
|
41 |
+
return round(parameters * quant / 8, 2)
|
42 |
+
|
43 |
+
|
44 |
+
def calc_context_size(context, model) -> float:
|
45 |
+
sizes = context_sizes(model)
|
46 |
+
a, b = linear_regression(sizes["context"], sizes["size"])
|
47 |
+
return round((a + b * context) / 1024, 2)
|
48 |
+
|
49 |
+
|
50 |
+
def calc(model_base, context, quant_size):
|
51 |
+
model_params = models[model_base]
|
52 |
+
quant_bpw = quants[quant_size]
|
53 |
+
|
54 |
+
model_size = calc_model_size(model_params, quant_bpw)
|
55 |
+
context_size = calc_context_size(context, model_base)
|
56 |
+
|
57 |
+
return model_size, context_size, model_size + context_size
|
58 |
+
|
59 |
+
|
60 |
+
title = "GGUF VRAM Calculator"
|
61 |
+
|
62 |
+
with gr.Blocks(title=title, theme=gr.themes.Monochrome()) as app:
|
63 |
+
default_model = "Mistral 7B"
|
64 |
+
default_quant = "Q4_K_S"
|
65 |
+
default_context = 8192
|
66 |
+
default_model_size = calc_model_size(models[default_model], quants[default_quant])
|
67 |
+
default_context_size = calc_context_size(default_context, default_model)
|
68 |
+
|
69 |
+
gr.Markdown(f"# {app.title}")
|
70 |
+
model = gr.Dropdown(
|
71 |
+
list(models.keys()), value=default_model, label="Select Model Base"
|
72 |
+
)
|
73 |
+
context = gr.Number(minimum=1, value=default_context, label="Context Size (Tokens)")
|
74 |
+
quant = gr.Dropdown(
|
75 |
+
list(quants.keys()), value=default_quant, label="Select Quant Size"
|
76 |
+
)
|
77 |
+
btn = gr.Button(value="Submit", variant="primary")
|
78 |
+
btn.click(
|
79 |
+
calc,
|
80 |
+
inputs=[
|
81 |
+
model,
|
82 |
+
context,
|
83 |
+
quant,
|
84 |
+
],
|
85 |
+
outputs=[
|
86 |
+
gr.Number(
|
87 |
+
label="Model Size (GB)",
|
88 |
+
value=default_model_size,
|
89 |
+
),
|
90 |
+
gr.Number(
|
91 |
+
label="Context Size (GB)",
|
92 |
+
value=default_context_size,
|
93 |
+
),
|
94 |
+
gr.Number(
|
95 |
+
label="Total Size (GB)",
|
96 |
+
value=default_model_size + default_context_size,
|
97 |
+
),
|
98 |
+
],
|
99 |
+
)
|
100 |
+
|
101 |
+
app.launch()
|
context_sizes/Llama2 13B.csv
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
512,475
|
2 |
+
1024,912
|
3 |
+
2048,1794
|
4 |
+
3072,2676
|
5 |
+
4096,3558
|
6 |
+
6144,5322
|
7 |
+
8192,7086
|
8 |
+
12288,10614
|
9 |
+
16384,14142
|
10 |
+
24576,21198
|
11 |
+
32768,28254
|
12 |
+
65536,56508
|
context_sizes/Llama2 20B.csv
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
512,695
|
2 |
+
1024,1352
|
3 |
+
2048,2674
|
4 |
+
3072,3996
|
5 |
+
4096,5318
|
6 |
+
6144,7962
|
7 |
+
8192,10606
|
8 |
+
12288,15894
|
9 |
+
16384,21182
|
10 |
+
24576,31782.52
|
11 |
+
32768,42335.26
|
12 |
+
65536,84670.52
|
context_sizes/Llama2 70B.csv
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
512,305
|
2 |
+
1024,498
|
3 |
+
2048,948
|
4 |
+
3072,1398
|
5 |
+
4096,1848
|
6 |
+
6144,2748
|
7 |
+
8192,3648
|
8 |
+
12288,5448
|
9 |
+
16384,7248
|
10 |
+
24576,10848
|
11 |
+
32768,14448
|
12 |
+
65536,28896
|
context_sizes/Llama2 7B.csv
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
512,326.5
|
2 |
+
1024,602
|
3 |
+
2048,1180
|
4 |
+
3072,1758
|
5 |
+
4096,2336
|
6 |
+
6144,3492
|
7 |
+
8192,4648
|
8 |
+
12288,6960
|
9 |
+
16384,9272
|
10 |
+
24576,13896
|
11 |
+
32768,18520
|
12 |
+
65536,37016
|
context_sizes/Mistral 7B.csv
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
512,137
|
2 |
+
1024,218
|
3 |
+
2048,412
|
4 |
+
3072,606
|
5 |
+
4096,800
|
6 |
+
6144,1188
|
7 |
+
8192,1576
|
8 |
+
12288,2352
|
9 |
+
16384,3128
|
10 |
+
24576,4680
|
11 |
+
32768,6232
|
12 |
+
65536,12440
|
context_sizes/Mixtral 8x7B.csv
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
512,181.72
|
2 |
+
1024,249.22
|
3 |
+
2048,443.22
|
4 |
+
3072,637.22
|
5 |
+
4096,831.22
|
6 |
+
6144,1219.22
|
7 |
+
8192,1607.22
|
8 |
+
12288,2383.22
|
9 |
+
16384,3159.22
|
10 |
+
24576,4711.22
|
11 |
+
32768,6263.22
|
12 |
+
65536,12471.22
|
context_sizes/Solar 10.7B_11B.csv
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
512,172.19
|
2 |
+
1024,285.19
|
3 |
+
2048,543.19
|
4 |
+
3072,801.19
|
5 |
+
4096,1059.19
|
6 |
+
6144,1575.19
|
7 |
+
8192,2091.19
|
8 |
+
12288,3123.19
|
9 |
+
16384,4155.19
|
10 |
+
24576,6219.19
|
11 |
+
32768,8283.19
|
12 |
+
65536,16539.19
|
context_sizes/Yi 34B.csv
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
512,262.19
|
2 |
+
1024,399.19
|
3 |
+
2048,753.19
|
4 |
+
3072,1107.19
|
5 |
+
4096,1461.19
|
6 |
+
6144,2169.19
|
7 |
+
8192,2877.19
|
8 |
+
12288,4293.19
|
9 |
+
16384,5709.19
|
10 |
+
24576,8541.19
|
11 |
+
32768,11373.19
|
12 |
+
65536,22701.19
|
models.csv
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
model,params
|
2 |
+
Llama2 7B,7
|
3 |
+
Llama2 13B,13
|
4 |
+
Llama2 70B,70
|
5 |
+
Mistral 7B,7
|
6 |
+
Llama2 20B,20
|
7 |
+
Mixtral 8x7B,46.7
|
8 |
+
Yi 34B,34
|
9 |
+
Solar 10.7B/11B,10.7
|
quants.csv
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
quant,bpw
|
2 |
+
Q2_K,3.35
|
3 |
+
Q3_K_S,3.5
|
4 |
+
Q3_K_M,3.91
|
5 |
+
Q3_K_L,4.27
|
6 |
+
Q4_0,4.55
|
7 |
+
Q4_K_S,4.58
|
8 |
+
Q4_K_M,4.85
|
9 |
+
Q5_0,5.54
|
10 |
+
Q5_K_S,5.54
|
11 |
+
Q5_K_M,5.69
|
12 |
+
Q6_K,6.59
|
13 |
+
Q8_0,8.5
|
requirements.txt
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
gradio==4.15.0
|
2 |
+
pandas==2.2.0
|