Commit
·
87b3d4b
1
Parent(s):
30f4531
add files
Browse files- .DS_Store +0 -0
- ColPali_+_Qwen2_VL.ipynb +0 -0
- Faster_Zero_shot_Object_Detection_with_Optimum.ipynb +0 -0
- Faster_foundation_models_with_torch_compile.ipynb +344 -0
- Fine_tune_Florence_2.ipynb +0 -0
- Fine_tune_PaliGemma.ipynb +1846 -0
- Fine_tune_SmolVLM2_on_Video.ipynb +0 -0
- Finetune_ColPali.ipynb +0 -0
- Fit_in_vision_models_using_quanto.ipynb +0 -0
- Gemma_3_for_Video_Understanding.ipynb +0 -0
- Gemma_3n_Video_Vibe_Tests.ipynb +1489 -0
- Idefics_FT.ipynb +1866 -0
- LICENSE +201 -0
- PaliGemma_DPO.ipynb +0 -0
- README.md +26 -3
- Reduce_any_model_to_fp16_using_🤗_Optimum_DETR.ipynb +0 -0
- ShieldGemma_2_for_Vision_LM_Safety.ipynb +0 -0
- Smol_VLM_FT.ipynb +1271 -0
- inference_gists/Aria_Inference.ipynb +0 -0
- inference_gists/ColQwen2.ipynb +0 -0
- inference_gists/IBM_Granite_Vision.ipynb +0 -0
- inference_gists/InternVL3_Gist.ipynb +0 -0
- knowledge_distillation.md +186 -0
- paligemma.py +91 -0
- smolvlm.py +137 -0
- train_idefics2.py +132 -0
.DS_Store
ADDED
Binary file (6.15 kB). View file
|
|
ColPali_+_Qwen2_VL.ipynb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
Faster_Zero_shot_Object_Detection_with_Optimum.ipynb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
Faster_foundation_models_with_torch_compile.ipynb
ADDED
@@ -0,0 +1,344 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"nbformat": 4,
|
3 |
+
"nbformat_minor": 0,
|
4 |
+
"metadata": {
|
5 |
+
"colab": {
|
6 |
+
"provenance": [],
|
7 |
+
"machine_shape": "hm",
|
8 |
+
"gpuType": "L4"
|
9 |
+
},
|
10 |
+
"kernelspec": {
|
11 |
+
"name": "python3",
|
12 |
+
"display_name": "Python 3"
|
13 |
+
},
|
14 |
+
"language_info": {
|
15 |
+
"name": "python"
|
16 |
+
},
|
17 |
+
"accelerator": "GPU"
|
18 |
+
},
|
19 |
+
"cells": [
|
20 |
+
{
|
21 |
+
"cell_type": "markdown",
|
22 |
+
"source": [
|
23 |
+
"# Faster Foundation Models with `torch.compile`"
|
24 |
+
],
|
25 |
+
"metadata": {
|
26 |
+
"id": "axYlcDTznci4"
|
27 |
+
}
|
28 |
+
},
|
29 |
+
{
|
30 |
+
"cell_type": "markdown",
|
31 |
+
"source": [
|
32 |
+
"## Introduction to `torch.compile()`"
|
33 |
+
],
|
34 |
+
"metadata": {
|
35 |
+
"id": "B-yw8KMWsjfY"
|
36 |
+
}
|
37 |
+
},
|
38 |
+
{
|
39 |
+
"cell_type": "markdown",
|
40 |
+
"source": [
|
41 |
+
"This guide aims to provide a benchmark on the inference speed-ups introduced with `torch.compile()` with no reduction in model performance for foundation models in 🤗 Transformers.\n",
|
42 |
+
"\n",
|
43 |
+
"Most used `torch.compile` modes are following:\n",
|
44 |
+
"\n",
|
45 |
+
"- \"default\" is the default mode, which is a good balance between performance and overhead\n",
|
46 |
+
"\n",
|
47 |
+
"- \"reduce-overhead\" reduces the overhead of python with CUDA graphs, useful for small batches, consumes a lot of memory. As of now only works for CUDA only graphs which do not mutate inputs.\n",
|
48 |
+
"\n",
|
49 |
+
"If you have a lot of memory to use, the best speed-up is through `reduce-overhead`. How much speed-up one can get depends on the model, so in this tutorial we will check the most used foundation models."
|
50 |
+
],
|
51 |
+
"metadata": {
|
52 |
+
"id": "AmmT4aDnqgOB"
|
53 |
+
}
|
54 |
+
},
|
55 |
+
{
|
56 |
+
"cell_type": "markdown",
|
57 |
+
"source": [
|
58 |
+
"## OWLv2\n",
|
59 |
+
"\n",
|
60 |
+
"OWLv2 is a zero-shot object detection model released by Google Brain. We will load base version."
|
61 |
+
],
|
62 |
+
"metadata": {
|
63 |
+
"id": "5sCfbPTn7wBE"
|
64 |
+
}
|
65 |
+
},
|
66 |
+
{
|
67 |
+
"cell_type": "markdown",
|
68 |
+
"source": [
|
69 |
+
"Let's load the model and processor for OWLv2."
|
70 |
+
],
|
71 |
+
"metadata": {
|
72 |
+
"id": "joeX3J315K0G"
|
73 |
+
}
|
74 |
+
},
|
75 |
+
{
|
76 |
+
"cell_type": "code",
|
77 |
+
"source": [
|
78 |
+
"from PIL import Image\n",
|
79 |
+
"import requests\n",
|
80 |
+
"\n",
|
81 |
+
"url = 'https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/bee.jpg'\n",
|
82 |
+
"image = Image.open(requests.get(url, stream=True).raw)"
|
83 |
+
],
|
84 |
+
"metadata": {
|
85 |
+
"id": "Ztfcdqkul62z"
|
86 |
+
},
|
87 |
+
"execution_count": 1,
|
88 |
+
"outputs": []
|
89 |
+
},
|
90 |
+
{
|
91 |
+
"cell_type": "code",
|
92 |
+
"source": [
|
93 |
+
"from transformers import AutoProcessor, Owlv2ForObjectDetection\n",
|
94 |
+
"import torch\n",
|
95 |
+
"import numpy as np\n",
|
96 |
+
"\n",
|
97 |
+
"processor = AutoProcessor.from_pretrained(\"google/owlv2-base-patch16-ensemble\")\n",
|
98 |
+
"model = Owlv2ForObjectDetection.from_pretrained(\"google/owlv2-base-patch16-ensemble\").to(\"cuda\")\n",
|
99 |
+
"\n",
|
100 |
+
"texts = [[\"a photo of a bee\", \"a photo of a bird\"]]\n",
|
101 |
+
"inputs = processor(text=texts, images=image, return_tensors=\"pt\").to(\"cuda\")"
|
102 |
+
],
|
103 |
+
"metadata": {
|
104 |
+
"id": "84npPHCQpHZ6",
|
105 |
+
"colab": {
|
106 |
+
"base_uri": "https://localhost:8080/"
|
107 |
+
},
|
108 |
+
"outputId": "f30c41c7-b897-460d-d2a4-a1276bf2263e"
|
109 |
+
},
|
110 |
+
"execution_count": 2,
|
111 |
+
"outputs": [
|
112 |
+
{
|
113 |
+
"output_type": "stream",
|
114 |
+
"name": "stderr",
|
115 |
+
"text": [
|
116 |
+
"/usr/local/lib/python3.10/dist-packages/huggingface_hub/utils/_token.py:89: UserWarning: \n",
|
117 |
+
"The secret `HF_TOKEN` does not exist in your Colab secrets.\n",
|
118 |
+
"To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.\n",
|
119 |
+
"You will be able to reuse this secret in all of your notebooks.\n",
|
120 |
+
"Please note that authentication is recommended but still optional to access public models or datasets.\n",
|
121 |
+
" warnings.warn(\n"
|
122 |
+
]
|
123 |
+
}
|
124 |
+
]
|
125 |
+
},
|
126 |
+
{
|
127 |
+
"cell_type": "markdown",
|
128 |
+
"source": [
|
129 |
+
"We can now get to benchmarking. We will benchmark the model itself and the compiled model."
|
130 |
+
],
|
131 |
+
"metadata": {
|
132 |
+
"id": "3AedkjLu5PRo"
|
133 |
+
}
|
134 |
+
},
|
135 |
+
{
|
136 |
+
"cell_type": "code",
|
137 |
+
"source": [
|
138 |
+
"starter, ender = torch.cuda.Event(enable_timing=True), torch.cuda.Event(enable_timing=True)\n",
|
139 |
+
"repetitions = 30\n",
|
140 |
+
"timings=np.zeros((repetitions,1))\n",
|
141 |
+
"\n",
|
142 |
+
"for _ in range(10):\n",
|
143 |
+
" _ = model(**inputs)\n",
|
144 |
+
"\n",
|
145 |
+
"with torch.no_grad():\n",
|
146 |
+
" for rep in range(repetitions):\n",
|
147 |
+
" torch.cuda.synchronize()\n",
|
148 |
+
" starter.record()\n",
|
149 |
+
" output = model(**inputs)\n",
|
150 |
+
" ender.record()\n",
|
151 |
+
" torch.cuda.synchronize()\n",
|
152 |
+
" curr_time = starter.elapsed_time(ender)\n",
|
153 |
+
" timings[rep] = curr_time\n",
|
154 |
+
"\n",
|
155 |
+
"mean_syn = np.sum(timings) / repetitions\n",
|
156 |
+
"print(mean_syn)\n"
|
157 |
+
],
|
158 |
+
"metadata": {
|
159 |
+
"id": "RQQSEgkQtXEV",
|
160 |
+
"colab": {
|
161 |
+
"base_uri": "https://localhost:8080/"
|
162 |
+
},
|
163 |
+
"outputId": "8003590b-c4bc-4b3d-9b1b-dade853b8dd8"
|
164 |
+
},
|
165 |
+
"execution_count": 3,
|
166 |
+
"outputs": [
|
167 |
+
{
|
168 |
+
"output_type": "stream",
|
169 |
+
"name": "stdout",
|
170 |
+
"text": [
|
171 |
+
"255.7331792195638\n"
|
172 |
+
]
|
173 |
+
}
|
174 |
+
]
|
175 |
+
},
|
176 |
+
{
|
177 |
+
"cell_type": "code",
|
178 |
+
"source": [
|
179 |
+
"starter, ender = torch.cuda.Event(enable_timing=True), torch.cuda.Event(enable_timing=True)\n",
|
180 |
+
"timings=np.zeros((repetitions,1))\n",
|
181 |
+
"\n",
|
182 |
+
"compiled_model = torch.compile(model, mode=\"reduce-overhead\").to(\"cuda\")\n",
|
183 |
+
"\n",
|
184 |
+
"for _ in range(30):\n",
|
185 |
+
" with torch.no_grad():\n",
|
186 |
+
" _ = compiled_model(**inputs)\n",
|
187 |
+
"\n",
|
188 |
+
"\n",
|
189 |
+
"with torch.no_grad():\n",
|
190 |
+
" for rep in range(repetitions):\n",
|
191 |
+
" torch.cuda.synchronize()\n",
|
192 |
+
" starter.record()\n",
|
193 |
+
" output = compiled_model(**inputs)\n",
|
194 |
+
" ender.record()\n",
|
195 |
+
" torch.cuda.synchronize()\n",
|
196 |
+
" curr_time = starter.elapsed_time(ender)\n",
|
197 |
+
" timings[rep] = curr_time\n",
|
198 |
+
"\n",
|
199 |
+
"mean_syn = np.sum(timings) / repetitions\n",
|
200 |
+
"print(mean_syn)"
|
201 |
+
],
|
202 |
+
"metadata": {
|
203 |
+
"id": "bEZiNgaupOx6",
|
204 |
+
"colab": {
|
205 |
+
"base_uri": "https://localhost:8080/"
|
206 |
+
},
|
207 |
+
"outputId": "e5d47875-1e40-4997-e533-94bf0ff34d14"
|
208 |
+
},
|
209 |
+
"execution_count": 4,
|
210 |
+
"outputs": [
|
211 |
+
{
|
212 |
+
"output_type": "stream",
|
213 |
+
"name": "stderr",
|
214 |
+
"text": [
|
215 |
+
"/usr/lib/python3.10/multiprocessing/popen_fork.py:66: RuntimeWarning: os.fork() was called. os.fork() is incompatible with multithreaded code, and JAX is multithreaded, so this will likely lead to a deadlock.\n",
|
216 |
+
" self.pid = os.fork()\n",
|
217 |
+
"/usr/local/lib/python3.10/dist-packages/torch/_inductor/compile_fx.py:124: UserWarning: TensorFloat32 tensor cores for float32 matrix multiplication available but not enabled. Consider setting `torch.set_float32_matmul_precision('high')` for better performance.\n",
|
218 |
+
" warnings.warn(\n",
|
219 |
+
"skipping cudagraphs due to skipping cudagraphs due to cpu device. Found from : \n",
|
220 |
+
" File \"/usr/local/lib/python3.10/dist-packages/transformers/models/owlv2/modeling_owlv2.py\", line 1711, in forward\n",
|
221 |
+
" pred_boxes = self.box_predictor(image_feats, feature_map)\n",
|
222 |
+
" File \"/usr/local/lib/python3.10/dist-packages/transformers/models/owlv2/modeling_owlv2.py\", line 1374, in box_predictor\n",
|
223 |
+
" box_bias = self.box_bias.to(feature_map.device)\n",
|
224 |
+
"\n"
|
225 |
+
]
|
226 |
+
},
|
227 |
+
{
|
228 |
+
"output_type": "stream",
|
229 |
+
"name": "stdout",
|
230 |
+
"text": [
|
231 |
+
"154.6884775797526\n"
|
232 |
+
]
|
233 |
+
}
|
234 |
+
]
|
235 |
+
},
|
236 |
+
{
|
237 |
+
"cell_type": "markdown",
|
238 |
+
"source": [
|
239 |
+
"We got nearly 40 percent speed-up! You can also increase the batch size and see how much further speed-up you can get."
|
240 |
+
],
|
241 |
+
"metadata": {
|
242 |
+
"id": "d_0d7DwN6gBt"
|
243 |
+
}
|
244 |
+
},
|
245 |
+
{
|
246 |
+
"cell_type": "code",
|
247 |
+
"source": [
|
248 |
+
"texts = [[\"a photo of a bee\", \"a photo of a bird\"] for _ in range(8)]\n",
|
249 |
+
"images = [image for _ in range(8)]\n",
|
250 |
+
"inputs = processor(text=texts, images=image, return_tensors=\"pt\").to(\"cuda\")"
|
251 |
+
],
|
252 |
+
"metadata": {
|
253 |
+
"id": "exKoOptB61UL"
|
254 |
+
},
|
255 |
+
"execution_count": 11,
|
256 |
+
"outputs": []
|
257 |
+
},
|
258 |
+
{
|
259 |
+
"cell_type": "code",
|
260 |
+
"source": [
|
261 |
+
"starter, ender = torch.cuda.Event(enable_timing=True), torch.cuda.Event(enable_timing=True)\n",
|
262 |
+
"repetitions = 30\n",
|
263 |
+
"timings=np.zeros((repetitions,1))\n",
|
264 |
+
"\n",
|
265 |
+
"for _ in range(10):\n",
|
266 |
+
" _ = model(**inputs)\n",
|
267 |
+
"\n",
|
268 |
+
"with torch.no_grad():\n",
|
269 |
+
" for rep in range(repetitions):\n",
|
270 |
+
" torch.cuda.synchronize()\n",
|
271 |
+
" starter.record()\n",
|
272 |
+
" output = model(**inputs)\n",
|
273 |
+
" ender.record()\n",
|
274 |
+
" torch.cuda.synchronize()\n",
|
275 |
+
" curr_time = starter.elapsed_time(ender)\n",
|
276 |
+
" timings[rep] = curr_time\n",
|
277 |
+
"\n",
|
278 |
+
"mean_syn = np.sum(timings) / repetitions\n",
|
279 |
+
"print(mean_syn)"
|
280 |
+
],
|
281 |
+
"metadata": {
|
282 |
+
"colab": {
|
283 |
+
"base_uri": "https://localhost:8080/"
|
284 |
+
},
|
285 |
+
"id": "EFj9Pgra7Km8",
|
286 |
+
"outputId": "5fefb8c0-9e86-478c-e9e2-0dbc0fa8a37b"
|
287 |
+
},
|
288 |
+
"execution_count": 12,
|
289 |
+
"outputs": [
|
290 |
+
{
|
291 |
+
"output_type": "stream",
|
292 |
+
"name": "stdout",
|
293 |
+
"text": [
|
294 |
+
"269.3023401896159\n"
|
295 |
+
]
|
296 |
+
}
|
297 |
+
]
|
298 |
+
},
|
299 |
+
{
|
300 |
+
"cell_type": "code",
|
301 |
+
"source": [
|
302 |
+
"starter, ender = torch.cuda.Event(enable_timing=True), torch.cuda.Event(enable_timing=True)\n",
|
303 |
+
"timings=np.zeros((repetitions,1))\n",
|
304 |
+
"\n",
|
305 |
+
"compiled_model = torch.compile(model, mode=\"reduce-overhead\").to(\"cuda\")\n",
|
306 |
+
"\n",
|
307 |
+
"for _ in range(30):\n",
|
308 |
+
" with torch.no_grad():\n",
|
309 |
+
" _ = compiled_model(**inputs)\n",
|
310 |
+
"\n",
|
311 |
+
"\n",
|
312 |
+
"with torch.no_grad():\n",
|
313 |
+
" for rep in range(repetitions):\n",
|
314 |
+
" torch.cuda.synchronize()\n",
|
315 |
+
" starter.record()\n",
|
316 |
+
" output = compiled_model(**inputs)\n",
|
317 |
+
" ender.record()\n",
|
318 |
+
" torch.cuda.synchronize()\n",
|
319 |
+
" curr_time = starter.elapsed_time(ender)\n",
|
320 |
+
" timings[rep] = curr_time\n",
|
321 |
+
"\n",
|
322 |
+
"mean_syn = np.sum(timings) / repetitions\n",
|
323 |
+
"print(mean_syn)"
|
324 |
+
],
|
325 |
+
"metadata": {
|
326 |
+
"colab": {
|
327 |
+
"base_uri": "https://localhost:8080/"
|
328 |
+
},
|
329 |
+
"id": "OuQZmgTK7UCo",
|
330 |
+
"outputId": "7184eb1d-b545-4bb6-b544-3effd5c2545a"
|
331 |
+
},
|
332 |
+
"execution_count": 13,
|
333 |
+
"outputs": [
|
334 |
+
{
|
335 |
+
"output_type": "stream",
|
336 |
+
"name": "stdout",
|
337 |
+
"text": [
|
338 |
+
"159.77137603759766\n"
|
339 |
+
]
|
340 |
+
}
|
341 |
+
]
|
342 |
+
}
|
343 |
+
]
|
344 |
+
}
|
Fine_tune_Florence_2.ipynb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
Fine_tune_PaliGemma.ipynb
ADDED
@@ -0,0 +1,1846 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "markdown",
|
5 |
+
"metadata": {
|
6 |
+
"id": "view-in-github",
|
7 |
+
"colab_type": "text"
|
8 |
+
},
|
9 |
+
"source": [
|
10 |
+
"<a href=\"https://colab.research.google.com/github/merveenoyan/smol-vision/blob/main/Fine_tune_PaliGemma.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
|
11 |
+
]
|
12 |
+
},
|
13 |
+
{
|
14 |
+
"cell_type": "markdown",
|
15 |
+
"metadata": {
|
16 |
+
"id": "m8t6tkjuuONX"
|
17 |
+
},
|
18 |
+
"source": [
|
19 |
+
"## PaliGemma Fine-tuning\n",
|
20 |
+
"\n",
|
21 |
+
"In this notebook, we will fine-tune [pretrained PaliGemma](https://huggingface.co/google/paligemma2-3b-pt-448) on a small split of [VQAv2](https://huggingface.co/datasets/HuggingFaceM4/VQAv2) dataset. Let's get started by installing necessary libraries."
|
22 |
+
]
|
23 |
+
},
|
24 |
+
{
|
25 |
+
"cell_type": "code",
|
26 |
+
"source": [
|
27 |
+
"!pip install -q -U datasets bitsandbytes peft git+https://github.com/huggingface/transformers.git"
|
28 |
+
],
|
29 |
+
"metadata": {
|
30 |
+
"id": "EB0gv8OzHfLV",
|
31 |
+
"colab": {
|
32 |
+
"base_uri": "https://localhost:8080/"
|
33 |
+
},
|
34 |
+
"outputId": "9de07e75-ddf4-4347-fc41-432a23774e2c"
|
35 |
+
},
|
36 |
+
"execution_count": 1,
|
37 |
+
"outputs": [
|
38 |
+
{
|
39 |
+
"output_type": "stream",
|
40 |
+
"name": "stdout",
|
41 |
+
"text": [
|
42 |
+
" Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n",
|
43 |
+
" Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n",
|
44 |
+
" Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
|
45 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m480.6/480.6 kB\u001b[0m \u001b[31m25.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
46 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m69.1/69.1 MB\u001b[0m \u001b[31m28.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
47 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m116.3/116.3 kB\u001b[0m \u001b[31m7.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
48 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m179.3/179.3 kB\u001b[0m \u001b[31m14.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
49 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m134.8/134.8 kB\u001b[0m \u001b[31m8.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
50 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.0/3.0 MB\u001b[0m \u001b[31m75.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
51 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m194.1/194.1 kB\u001b[0m \u001b[31m17.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
52 |
+
"\u001b[?25h Building wheel for transformers (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
|
53 |
+
"\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
|
54 |
+
"gcsfs 2024.10.0 requires fsspec==2024.10.0, but you have fsspec 2024.9.0 which is incompatible.\u001b[0m\u001b[31m\n",
|
55 |
+
"\u001b[0m"
|
56 |
+
]
|
57 |
+
}
|
58 |
+
]
|
59 |
+
},
|
60 |
+
{
|
61 |
+
"cell_type": "markdown",
|
62 |
+
"metadata": {
|
63 |
+
"id": "q_85okyYt1eo"
|
64 |
+
},
|
65 |
+
"source": [
|
66 |
+
"We will authenticate to access the model using `notebook_login()`."
|
67 |
+
]
|
68 |
+
},
|
69 |
+
{
|
70 |
+
"cell_type": "code",
|
71 |
+
"execution_count": 2,
|
72 |
+
"metadata": {
|
73 |
+
"id": "NzJZSHD8tZZy",
|
74 |
+
"colab": {
|
75 |
+
"base_uri": "https://localhost:8080/",
|
76 |
+
"height": 17,
|
77 |
+
"referenced_widgets": [
|
78 |
+
"4f0e85aa740146d3aca81588a0288031",
|
79 |
+
"c7fcb9dd46e649c4b8bd967b69bdb867",
|
80 |
+
"c3fad0f1cb954317a20ee158f7e10363",
|
81 |
+
"3deca9286f89422aa691325b39347b0b",
|
82 |
+
"ca1c290bfb654f1190bbde68d51167f1",
|
83 |
+
"2d8493a60b7a42c1b25ec0bbe0a59043",
|
84 |
+
"c25efe32ee7c40d3a4c95093abb2a720",
|
85 |
+
"55c01e2c04d1499ca5b9b19dea7e4e02",
|
86 |
+
"bf9da831d7ad4651a262c5e7f80bbf87",
|
87 |
+
"ed2d3d1a700143d2a48e9a9b13bd1200",
|
88 |
+
"40782cfc43a8437da5534feee03c6ba6",
|
89 |
+
"b6fac3155dd140bc8e1b010270bc3cc2",
|
90 |
+
"ca348c721475417582ed5018ed43151f",
|
91 |
+
"3f07afac7c194db7a16167d177562a46",
|
92 |
+
"5515d96f0c8947f0ad4b7f17eb7d63f6",
|
93 |
+
"d703de12cf9d4f87aa6ec2cc52f1090a",
|
94 |
+
"757bc788bd6842d28a9f889187ffb88e",
|
95 |
+
"65f10d2456cb4ee1963fac050e4c34f7",
|
96 |
+
"9335e48fe8ba4fe9b535b5ece1be6ff5",
|
97 |
+
"80df5f3cd6c646808b09d99daed5bfd2"
|
98 |
+
]
|
99 |
+
},
|
100 |
+
"outputId": "c01b2b6f-3c1e-45da-9fc0-f4f518bcca24"
|
101 |
+
},
|
102 |
+
"outputs": [
|
103 |
+
{
|
104 |
+
"output_type": "display_data",
|
105 |
+
"data": {
|
106 |
+
"text/plain": [
|
107 |
+
"VBox(children=(HTML(value='<center> <img\\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…"
|
108 |
+
],
|
109 |
+
"application/vnd.jupyter.widget-view+json": {
|
110 |
+
"version_major": 2,
|
111 |
+
"version_minor": 0,
|
112 |
+
"model_id": "4f0e85aa740146d3aca81588a0288031"
|
113 |
+
}
|
114 |
+
},
|
115 |
+
"metadata": {}
|
116 |
+
}
|
117 |
+
],
|
118 |
+
"source": [
|
119 |
+
"from huggingface_hub import notebook_login\n",
|
120 |
+
"notebook_login()"
|
121 |
+
]
|
122 |
+
},
|
123 |
+
{
|
124 |
+
"cell_type": "markdown",
|
125 |
+
"metadata": {
|
126 |
+
"id": "9_jUBDTEuw1j"
|
127 |
+
},
|
128 |
+
"source": [
|
129 |
+
"Let's load the dataset."
|
130 |
+
]
|
131 |
+
},
|
132 |
+
{
|
133 |
+
"cell_type": "code",
|
134 |
+
"execution_count": 1,
|
135 |
+
"metadata": {
|
136 |
+
"id": "az5kdSbNpjgH",
|
137 |
+
"colab": {
|
138 |
+
"base_uri": "https://localhost:8080/"
|
139 |
+
},
|
140 |
+
"outputId": "2d9f379c-eb31-45b0-b84c-79c2a2577d01"
|
141 |
+
},
|
142 |
+
"outputs": [
|
143 |
+
{
|
144 |
+
"output_type": "stream",
|
145 |
+
"name": "stderr",
|
146 |
+
"text": [
|
147 |
+
"/usr/local/lib/python3.10/dist-packages/huggingface_hub/utils/_auth.py:94: UserWarning: \n",
|
148 |
+
"The secret `HF_TOKEN` does not exist in your Colab secrets.\n",
|
149 |
+
"To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.\n",
|
150 |
+
"You will be able to reuse this secret in all of your notebooks.\n",
|
151 |
+
"Please note that authentication is recommended but still optional to access public models or datasets.\n",
|
152 |
+
" warnings.warn(\n"
|
153 |
+
]
|
154 |
+
}
|
155 |
+
],
|
156 |
+
"source": [
|
157 |
+
"from datasets import load_dataset\n",
|
158 |
+
"ds = load_dataset('merve/vqav2-small', split=\"validation\")\n"
|
159 |
+
]
|
160 |
+
},
|
161 |
+
{
|
162 |
+
"cell_type": "code",
|
163 |
+
"execution_count": 2,
|
164 |
+
"metadata": {
|
165 |
+
"id": "wN1c9Aqhqt47"
|
166 |
+
},
|
167 |
+
"outputs": [],
|
168 |
+
"source": [
|
169 |
+
"split_ds = ds.train_test_split(test_size=0.9) # we'll use a very small split for demo\n",
|
170 |
+
"train_ds = split_ds[\"test\"]"
|
171 |
+
]
|
172 |
+
},
|
173 |
+
{
|
174 |
+
"cell_type": "code",
|
175 |
+
"execution_count": 3,
|
176 |
+
"metadata": {
|
177 |
+
"id": "TNJW2ty4yy4L",
|
178 |
+
"colab": {
|
179 |
+
"base_uri": "https://localhost:8080/"
|
180 |
+
},
|
181 |
+
"outputId": "f76414b2-8f37-48ae-d369-b977323fa892"
|
182 |
+
},
|
183 |
+
"outputs": [
|
184 |
+
{
|
185 |
+
"output_type": "execute_result",
|
186 |
+
"data": {
|
187 |
+
"text/plain": [
|
188 |
+
"Dataset({\n",
|
189 |
+
" features: ['multiple_choice_answer', 'question', 'image'],\n",
|
190 |
+
" num_rows: 19292\n",
|
191 |
+
"})"
|
192 |
+
]
|
193 |
+
},
|
194 |
+
"metadata": {},
|
195 |
+
"execution_count": 3
|
196 |
+
}
|
197 |
+
],
|
198 |
+
"source": [
|
199 |
+
"train_ds"
|
200 |
+
]
|
201 |
+
},
|
202 |
+
{
|
203 |
+
"cell_type": "markdown",
|
204 |
+
"metadata": {
|
205 |
+
"id": "Hi_Y1blXwA04"
|
206 |
+
},
|
207 |
+
"source": [
|
208 |
+
"Our dataset is a very general one and similar to many datasets that PaliGemma was trained with. In this case, we do not need to fine-tune the image encoder, the multimodal projector but we will only fine-tune the text decoder."
|
209 |
+
]
|
210 |
+
},
|
211 |
+
{
|
212 |
+
"cell_type": "code",
|
213 |
+
"execution_count": 4,
|
214 |
+
"metadata": {
|
215 |
+
"id": "Zya_PWM3uBWs"
|
216 |
+
},
|
217 |
+
"outputs": [],
|
218 |
+
"source": [
|
219 |
+
"from transformers import PaliGemmaProcessor\n",
|
220 |
+
"model_id =\"google/paligemma2-3b-pt-224\" # or your favorite PaliGemma"
|
221 |
+
]
|
222 |
+
},
|
223 |
+
{
|
224 |
+
"cell_type": "code",
|
225 |
+
"execution_count": 5,
|
226 |
+
"metadata": {
|
227 |
+
"id": "iZRvrfUquH1y",
|
228 |
+
"colab": {
|
229 |
+
"base_uri": "https://localhost:8080/",
|
230 |
+
"height": 49,
|
231 |
+
"referenced_widgets": [
|
232 |
+
"8458933373264dbeb58d0b5ace4fd9c6",
|
233 |
+
"714009484da745dc8a87e5066b939de2",
|
234 |
+
"e43e970ce8ba477e83081a4c7fea05f5",
|
235 |
+
"7138aa9537fc4b4f809e57665be87139",
|
236 |
+
"46810cc7c7c54e31a65e609c386d86d9",
|
237 |
+
"cfed7deef0b74f4b9d160e9fdc2b138e",
|
238 |
+
"23ddab24ac304751b3babfaeec9360eb",
|
239 |
+
"79e87175ffb949bd8cddf4577210a42d",
|
240 |
+
"5aed84a20ac34f2b943d26d66decc88f",
|
241 |
+
"3ca0e1427ac6477c9921929af7ff00d1",
|
242 |
+
"a9a5503caf384b93bf987e5271a577d2"
|
243 |
+
]
|
244 |
+
},
|
245 |
+
"outputId": "34f12289-6ef4-49d9-9257-ad0328961190"
|
246 |
+
},
|
247 |
+
"outputs": [
|
248 |
+
{
|
249 |
+
"output_type": "display_data",
|
250 |
+
"data": {
|
251 |
+
"text/plain": [
|
252 |
+
"Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s]"
|
253 |
+
],
|
254 |
+
"application/vnd.jupyter.widget-view+json": {
|
255 |
+
"version_major": 2,
|
256 |
+
"version_minor": 0,
|
257 |
+
"model_id": "8458933373264dbeb58d0b5ace4fd9c6"
|
258 |
+
}
|
259 |
+
},
|
260 |
+
"metadata": {}
|
261 |
+
}
|
262 |
+
],
|
263 |
+
"source": [
|
264 |
+
"from transformers import PaliGemmaForConditionalGeneration\n",
|
265 |
+
"import torch\n",
|
266 |
+
"device = \"cuda\"\n",
|
267 |
+
"model = PaliGemmaForConditionalGeneration.from_pretrained(model_id, torch_dtype=torch.bfloat16).to(device)\n",
|
268 |
+
"\n",
|
269 |
+
"for param in model.vision_tower.parameters():\n",
|
270 |
+
" param.requires_grad = False\n",
|
271 |
+
"\n",
|
272 |
+
"for param in model.multi_modal_projector.parameters():\n",
|
273 |
+
" param.requires_grad = False\n"
|
274 |
+
]
|
275 |
+
},
|
276 |
+
{
|
277 |
+
"cell_type": "markdown",
|
278 |
+
"metadata": {
|
279 |
+
"id": "uCiVI-xUwSJm"
|
280 |
+
},
|
281 |
+
"source": [
|
282 |
+
"Alternatively, if you want to do LoRA and QLoRA fine-tuning, you can run below cells to load the adapter either in full precision or quantized."
|
283 |
+
]
|
284 |
+
},
|
285 |
+
{
|
286 |
+
"cell_type": "code",
|
287 |
+
"execution_count": 6,
|
288 |
+
"metadata": {
|
289 |
+
"id": "9AYeuyzNuJ9X",
|
290 |
+
"colab": {
|
291 |
+
"base_uri": "https://localhost:8080/",
|
292 |
+
"height": 66,
|
293 |
+
"referenced_widgets": [
|
294 |
+
"c68f0fe7a6bb4060afcb05e3f6422288",
|
295 |
+
"fef3c94897fc4ffa86f91aac7a45ac7f",
|
296 |
+
"92881d2e3f1a438b92a389cc6022f7ad",
|
297 |
+
"f518ab021bc648f188638fd168879edd",
|
298 |
+
"1a29c71234d74f08b2645f9383fee126",
|
299 |
+
"f8553ec713ea440eb0208a1012547988",
|
300 |
+
"25e0373512b747ba8ebe020b8b8ab932",
|
301 |
+
"daff4ba27c68441395aa5377111f30f1",
|
302 |
+
"863090b3318e4e0186bd46d3d1479de4",
|
303 |
+
"acae1751ff5d4293bb588c2d9c7ab851",
|
304 |
+
"8859eb8d9c154cb79a302db1568768fa"
|
305 |
+
]
|
306 |
+
},
|
307 |
+
"outputId": "aaedd707-f694-4ba8-ba43-7ae2a3739e73"
|
308 |
+
},
|
309 |
+
"outputs": [
|
310 |
+
{
|
311 |
+
"output_type": "display_data",
|
312 |
+
"data": {
|
313 |
+
"text/plain": [
|
314 |
+
"Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s]"
|
315 |
+
],
|
316 |
+
"application/vnd.jupyter.widget-view+json": {
|
317 |
+
"version_major": 2,
|
318 |
+
"version_minor": 0,
|
319 |
+
"model_id": "c68f0fe7a6bb4060afcb05e3f6422288"
|
320 |
+
}
|
321 |
+
},
|
322 |
+
"metadata": {}
|
323 |
+
},
|
324 |
+
{
|
325 |
+
"output_type": "stream",
|
326 |
+
"name": "stdout",
|
327 |
+
"text": [
|
328 |
+
"trainable params: 11,876,352 || all params: 3,044,118,768 || trainable%: 0.3901\n"
|
329 |
+
]
|
330 |
+
}
|
331 |
+
],
|
332 |
+
"source": [
|
333 |
+
"from transformers import BitsAndBytesConfig, PaliGemmaForConditionalGeneration\n",
|
334 |
+
"from peft import get_peft_model, LoraConfig\n",
|
335 |
+
"\n",
|
336 |
+
"bnb_config = BitsAndBytesConfig(load_in_4bit=True, bnb_4bit_compute_dtype=torch.bfloat16)\n",
|
337 |
+
"\n",
|
338 |
+
"lora_config = LoraConfig(\n",
|
339 |
+
" r=8,\n",
|
340 |
+
" target_modules=[\"q_proj\", \"o_proj\", \"k_proj\", \"v_proj\", \"gate_proj\", \"up_proj\", \"down_proj\"],\n",
|
341 |
+
" task_type=\"CAUSAL_LM\",\n",
|
342 |
+
")\n",
|
343 |
+
"\n",
|
344 |
+
"model = PaliGemmaForConditionalGeneration.from_pretrained(model_id, device_map=\"auto\")#, quantization_config=bnb_config)\n",
|
345 |
+
"model = get_peft_model(model, lora_config)\n",
|
346 |
+
"model.print_trainable_parameters()\n",
|
347 |
+
"#trainable params: 11,298,816 || all params: 2,934,634,224 || trainable%: 0.38501616002417344\n"
|
348 |
+
]
|
349 |
+
},
|
350 |
+
{
|
351 |
+
"cell_type": "markdown",
|
352 |
+
"source": [
|
353 |
+
"We need to take tokens to same dtype as model so need to store it as a variable."
|
354 |
+
],
|
355 |
+
"metadata": {
|
356 |
+
"id": "sfxtN1iKRWXX"
|
357 |
+
}
|
358 |
+
},
|
359 |
+
{
|
360 |
+
"cell_type": "code",
|
361 |
+
"source": [
|
362 |
+
"DTYPE = model.dtype"
|
363 |
+
],
|
364 |
+
"metadata": {
|
365 |
+
"id": "uGZ6FnioRWEc"
|
366 |
+
},
|
367 |
+
"execution_count": 7,
|
368 |
+
"outputs": []
|
369 |
+
},
|
370 |
+
{
|
371 |
+
"cell_type": "markdown",
|
372 |
+
"metadata": {
|
373 |
+
"id": "OsquATWQu2lJ"
|
374 |
+
},
|
375 |
+
"source": [
|
376 |
+
"Load the processor to preprocess the dataset."
|
377 |
+
]
|
378 |
+
},
|
379 |
+
{
|
380 |
+
"cell_type": "code",
|
381 |
+
"source": [
|
382 |
+
"processor = PaliGemmaProcessor.from_pretrained(model_id)"
|
383 |
+
],
|
384 |
+
"metadata": {
|
385 |
+
"id": "wQ_gbnXARKz1"
|
386 |
+
},
|
387 |
+
"execution_count": 8,
|
388 |
+
"outputs": []
|
389 |
+
},
|
390 |
+
{
|
391 |
+
"cell_type": "markdown",
|
392 |
+
"metadata": {
|
393 |
+
"id": "QZROnV-pu7rt"
|
394 |
+
},
|
395 |
+
"source": [
|
396 |
+
"We will preprocess our examples. We need to prepare a prompt template and pass the text input inside, pass it with batches of images to processor. Then we will set the pad tokens and image tokens to -100 to let the model ignore them. We will pass our preprocessed input as labels to make the model learn how to generate responses."
|
397 |
+
]
|
398 |
+
},
|
399 |
+
{
|
400 |
+
"cell_type": "code",
|
401 |
+
"execution_count": 9,
|
402 |
+
"metadata": {
|
403 |
+
"id": "hdw3uBcNuGmw"
|
404 |
+
},
|
405 |
+
"outputs": [],
|
406 |
+
"source": [
|
407 |
+
"import torch\n",
|
408 |
+
"\n",
|
409 |
+
"image_token = processor.tokenizer.convert_tokens_to_ids(\"<image>\")\n",
|
410 |
+
"def collate_fn(examples):\n",
|
411 |
+
" texts = [\"<image>answer en \" + example[\"question\"] for example in examples]\n",
|
412 |
+
" labels= [example['multiple_choice_answer'] for example in examples]\n",
|
413 |
+
" images = [example[\"image\"].convert(\"RGB\") for example in examples]\n",
|
414 |
+
" tokens = processor(text=texts, images=images, suffix=labels,\n",
|
415 |
+
" return_tensors=\"pt\", padding=\"longest\")\n",
|
416 |
+
"\n",
|
417 |
+
" tokens = tokens.to(DTYPE).to(device)\n",
|
418 |
+
" return tokens\n"
|
419 |
+
]
|
420 |
+
},
|
421 |
+
{
|
422 |
+
"cell_type": "markdown",
|
423 |
+
"metadata": {
|
424 |
+
"id": "logv0oLqwbIe"
|
425 |
+
},
|
426 |
+
"source": [
|
427 |
+
"We will now initialize the `TrainingArguments`."
|
428 |
+
]
|
429 |
+
},
|
430 |
+
{
|
431 |
+
"cell_type": "code",
|
432 |
+
"execution_count": 13,
|
433 |
+
"metadata": {
|
434 |
+
"id": "Il7zKQO9uMPT"
|
435 |
+
},
|
436 |
+
"outputs": [],
|
437 |
+
"source": [
|
438 |
+
"from transformers import TrainingArguments\n",
|
439 |
+
"args=TrainingArguments(\n",
|
440 |
+
" num_train_epochs=2,\n",
|
441 |
+
" remove_unused_columns=False,\n",
|
442 |
+
" per_device_train_batch_size=1,\n",
|
443 |
+
" gradient_accumulation_steps=4,\n",
|
444 |
+
" warmup_steps=2,\n",
|
445 |
+
" learning_rate=2e-5,\n",
|
446 |
+
" weight_decay=1e-6,\n",
|
447 |
+
" adam_beta2=0.999,\n",
|
448 |
+
" logging_steps=100,\n",
|
449 |
+
" optim=\"adamw_hf\", # you can use paged optimizers like paged_adamw_8bit for QLoRA\n",
|
450 |
+
" save_strategy=\"steps\",\n",
|
451 |
+
" save_steps=1000,\n",
|
452 |
+
" save_total_limit=1,\n",
|
453 |
+
" output_dir=\"paligemma_vqav2\",\n",
|
454 |
+
" bf16=True,\n",
|
455 |
+
" report_to=[\"tensorboard\"],\n",
|
456 |
+
" dataloader_pin_memory=False\n",
|
457 |
+
" )\n"
|
458 |
+
]
|
459 |
+
},
|
460 |
+
{
|
461 |
+
"cell_type": "markdown",
|
462 |
+
"metadata": {
|
463 |
+
"id": "8pR0EaGlwrDp"
|
464 |
+
},
|
465 |
+
"source": [
|
466 |
+
"We can now start training."
|
467 |
+
]
|
468 |
+
},
|
469 |
+
{
|
470 |
+
"cell_type": "code",
|
471 |
+
"execution_count": 14,
|
472 |
+
"metadata": {
|
473 |
+
"id": "CguCGDv1uNkF"
|
474 |
+
},
|
475 |
+
"outputs": [],
|
476 |
+
"source": [
|
477 |
+
"from transformers import Trainer\n",
|
478 |
+
"\n",
|
479 |
+
"trainer = Trainer(\n",
|
480 |
+
" model=model,\n",
|
481 |
+
" train_dataset=train_ds ,\n",
|
482 |
+
" data_collator=collate_fn,\n",
|
483 |
+
" args=args\n",
|
484 |
+
" )\n"
|
485 |
+
]
|
486 |
+
},
|
487 |
+
{
|
488 |
+
"cell_type": "markdown",
|
489 |
+
"source": [
|
490 |
+
"LoRA with bsz of 2 works on A100 Colab. You can apply gradient accumulation (which is enabled in this notebook) to simulate larger batch sizes.\n",
|
491 |
+
"Currently there's an issue with QLoRA, we are investigating and will solve soon."
|
492 |
+
],
|
493 |
+
"metadata": {
|
494 |
+
"id": "ZX912_liP-Eh"
|
495 |
+
}
|
496 |
+
},
|
497 |
+
{
|
498 |
+
"cell_type": "code",
|
499 |
+
"execution_count": null,
|
500 |
+
"metadata": {
|
501 |
+
"id": "9KFPQLrnF2Ha"
|
502 |
+
},
|
503 |
+
"outputs": [],
|
504 |
+
"source": [
|
505 |
+
"trainer.train()"
|
506 |
+
]
|
507 |
+
},
|
508 |
+
{
|
509 |
+
"cell_type": "code",
|
510 |
+
"execution_count": null,
|
511 |
+
"metadata": {
|
512 |
+
"id": "O9fMDEjXSSzF"
|
513 |
+
},
|
514 |
+
"outputs": [],
|
515 |
+
"source": [
|
516 |
+
"trainer.push_to_hub()"
|
517 |
+
]
|
518 |
+
},
|
519 |
+
{
|
520 |
+
"cell_type": "markdown",
|
521 |
+
"metadata": {
|
522 |
+
"id": "JohfxEJQjLBd"
|
523 |
+
},
|
524 |
+
"source": [
|
525 |
+
"You can find steps to infer [here](https://colab.research.google.com/drive/100IQcvMvGm9y--oelbLfI__eHCoz5Ser?usp=sharing)."
|
526 |
+
]
|
527 |
+
}
|
528 |
+
],
|
529 |
+
"metadata": {
|
530 |
+
"accelerator": "GPU",
|
531 |
+
"colab": {
|
532 |
+
"gpuType": "A100",
|
533 |
+
"provenance": [],
|
534 |
+
"include_colab_link": true
|
535 |
+
},
|
536 |
+
"kernelspec": {
|
537 |
+
"display_name": "Python 3",
|
538 |
+
"name": "python3"
|
539 |
+
},
|
540 |
+
"language_info": {
|
541 |
+
"codemirror_mode": {
|
542 |
+
"name": "ipython",
|
543 |
+
"version": 3
|
544 |
+
},
|
545 |
+
"file_extension": ".py",
|
546 |
+
"mimetype": "text/x-python",
|
547 |
+
"name": "python",
|
548 |
+
"nbconvert_exporter": "python",
|
549 |
+
"pygments_lexer": "ipython3",
|
550 |
+
"version": "3.11.3"
|
551 |
+
},
|
552 |
+
"widgets": {
|
553 |
+
"application/vnd.jupyter.widget-state+json": {
|
554 |
+
"4f0e85aa740146d3aca81588a0288031": {
|
555 |
+
"model_module": "@jupyter-widgets/controls",
|
556 |
+
"model_name": "VBoxModel",
|
557 |
+
"model_module_version": "1.5.0",
|
558 |
+
"state": {
|
559 |
+
"_dom_classes": [],
|
560 |
+
"_model_module": "@jupyter-widgets/controls",
|
561 |
+
"_model_module_version": "1.5.0",
|
562 |
+
"_model_name": "VBoxModel",
|
563 |
+
"_view_count": null,
|
564 |
+
"_view_module": "@jupyter-widgets/controls",
|
565 |
+
"_view_module_version": "1.5.0",
|
566 |
+
"_view_name": "VBoxView",
|
567 |
+
"box_style": "",
|
568 |
+
"children": [],
|
569 |
+
"layout": "IPY_MODEL_c25efe32ee7c40d3a4c95093abb2a720"
|
570 |
+
}
|
571 |
+
},
|
572 |
+
"c7fcb9dd46e649c4b8bd967b69bdb867": {
|
573 |
+
"model_module": "@jupyter-widgets/controls",
|
574 |
+
"model_name": "HTMLModel",
|
575 |
+
"model_module_version": "1.5.0",
|
576 |
+
"state": {
|
577 |
+
"_dom_classes": [],
|
578 |
+
"_model_module": "@jupyter-widgets/controls",
|
579 |
+
"_model_module_version": "1.5.0",
|
580 |
+
"_model_name": "HTMLModel",
|
581 |
+
"_view_count": null,
|
582 |
+
"_view_module": "@jupyter-widgets/controls",
|
583 |
+
"_view_module_version": "1.5.0",
|
584 |
+
"_view_name": "HTMLView",
|
585 |
+
"description": "",
|
586 |
+
"description_tooltip": null,
|
587 |
+
"layout": "IPY_MODEL_55c01e2c04d1499ca5b9b19dea7e4e02",
|
588 |
+
"placeholder": "",
|
589 |
+
"style": "IPY_MODEL_bf9da831d7ad4651a262c5e7f80bbf87",
|
590 |
+
"value": "<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.svg\nalt='Hugging Face'> <br> Copy a token from <a\nhref=\"https://huggingface.co/settings/tokens\" target=\"_blank\">your Hugging Face\ntokens page</a> and paste it below. <br> Immediately click login after copying\nyour token or it might be stored in plain text in this notebook file. </center>"
|
591 |
+
}
|
592 |
+
},
|
593 |
+
"c3fad0f1cb954317a20ee158f7e10363": {
|
594 |
+
"model_module": "@jupyter-widgets/controls",
|
595 |
+
"model_name": "PasswordModel",
|
596 |
+
"model_module_version": "1.5.0",
|
597 |
+
"state": {
|
598 |
+
"_dom_classes": [],
|
599 |
+
"_model_module": "@jupyter-widgets/controls",
|
600 |
+
"_model_module_version": "1.5.0",
|
601 |
+
"_model_name": "PasswordModel",
|
602 |
+
"_view_count": null,
|
603 |
+
"_view_module": "@jupyter-widgets/controls",
|
604 |
+
"_view_module_version": "1.5.0",
|
605 |
+
"_view_name": "PasswordView",
|
606 |
+
"continuous_update": true,
|
607 |
+
"description": "Token:",
|
608 |
+
"description_tooltip": null,
|
609 |
+
"disabled": false,
|
610 |
+
"layout": "IPY_MODEL_ed2d3d1a700143d2a48e9a9b13bd1200",
|
611 |
+
"placeholder": "",
|
612 |
+
"style": "IPY_MODEL_40782cfc43a8437da5534feee03c6ba6",
|
613 |
+
"value": ""
|
614 |
+
}
|
615 |
+
},
|
616 |
+
"3deca9286f89422aa691325b39347b0b": {
|
617 |
+
"model_module": "@jupyter-widgets/controls",
|
618 |
+
"model_name": "CheckboxModel",
|
619 |
+
"model_module_version": "1.5.0",
|
620 |
+
"state": {
|
621 |
+
"_dom_classes": [],
|
622 |
+
"_model_module": "@jupyter-widgets/controls",
|
623 |
+
"_model_module_version": "1.5.0",
|
624 |
+
"_model_name": "CheckboxModel",
|
625 |
+
"_view_count": null,
|
626 |
+
"_view_module": "@jupyter-widgets/controls",
|
627 |
+
"_view_module_version": "1.5.0",
|
628 |
+
"_view_name": "CheckboxView",
|
629 |
+
"description": "Add token as git credential?",
|
630 |
+
"description_tooltip": null,
|
631 |
+
"disabled": false,
|
632 |
+
"indent": true,
|
633 |
+
"layout": "IPY_MODEL_b6fac3155dd140bc8e1b010270bc3cc2",
|
634 |
+
"style": "IPY_MODEL_ca348c721475417582ed5018ed43151f",
|
635 |
+
"value": true
|
636 |
+
}
|
637 |
+
},
|
638 |
+
"ca1c290bfb654f1190bbde68d51167f1": {
|
639 |
+
"model_module": "@jupyter-widgets/controls",
|
640 |
+
"model_name": "ButtonModel",
|
641 |
+
"model_module_version": "1.5.0",
|
642 |
+
"state": {
|
643 |
+
"_dom_classes": [],
|
644 |
+
"_model_module": "@jupyter-widgets/controls",
|
645 |
+
"_model_module_version": "1.5.0",
|
646 |
+
"_model_name": "ButtonModel",
|
647 |
+
"_view_count": null,
|
648 |
+
"_view_module": "@jupyter-widgets/controls",
|
649 |
+
"_view_module_version": "1.5.0",
|
650 |
+
"_view_name": "ButtonView",
|
651 |
+
"button_style": "",
|
652 |
+
"description": "Login",
|
653 |
+
"disabled": false,
|
654 |
+
"icon": "",
|
655 |
+
"layout": "IPY_MODEL_3f07afac7c194db7a16167d177562a46",
|
656 |
+
"style": "IPY_MODEL_5515d96f0c8947f0ad4b7f17eb7d63f6",
|
657 |
+
"tooltip": ""
|
658 |
+
}
|
659 |
+
},
|
660 |
+
"2d8493a60b7a42c1b25ec0bbe0a59043": {
|
661 |
+
"model_module": "@jupyter-widgets/controls",
|
662 |
+
"model_name": "HTMLModel",
|
663 |
+
"model_module_version": "1.5.0",
|
664 |
+
"state": {
|
665 |
+
"_dom_classes": [],
|
666 |
+
"_model_module": "@jupyter-widgets/controls",
|
667 |
+
"_model_module_version": "1.5.0",
|
668 |
+
"_model_name": "HTMLModel",
|
669 |
+
"_view_count": null,
|
670 |
+
"_view_module": "@jupyter-widgets/controls",
|
671 |
+
"_view_module_version": "1.5.0",
|
672 |
+
"_view_name": "HTMLView",
|
673 |
+
"description": "",
|
674 |
+
"description_tooltip": null,
|
675 |
+
"layout": "IPY_MODEL_d703de12cf9d4f87aa6ec2cc52f1090a",
|
676 |
+
"placeholder": "",
|
677 |
+
"style": "IPY_MODEL_757bc788bd6842d28a9f889187ffb88e",
|
678 |
+
"value": "\n<b>Pro Tip:</b> If you don't already have one, you can create a dedicated\n'notebooks' token with 'write' access, that you can then easily reuse for all\nnotebooks. </center>"
|
679 |
+
}
|
680 |
+
},
|
681 |
+
"c25efe32ee7c40d3a4c95093abb2a720": {
|
682 |
+
"model_module": "@jupyter-widgets/base",
|
683 |
+
"model_name": "LayoutModel",
|
684 |
+
"model_module_version": "1.2.0",
|
685 |
+
"state": {
|
686 |
+
"_model_module": "@jupyter-widgets/base",
|
687 |
+
"_model_module_version": "1.2.0",
|
688 |
+
"_model_name": "LayoutModel",
|
689 |
+
"_view_count": null,
|
690 |
+
"_view_module": "@jupyter-widgets/base",
|
691 |
+
"_view_module_version": "1.2.0",
|
692 |
+
"_view_name": "LayoutView",
|
693 |
+
"align_content": null,
|
694 |
+
"align_items": "center",
|
695 |
+
"align_self": null,
|
696 |
+
"border": null,
|
697 |
+
"bottom": null,
|
698 |
+
"display": "flex",
|
699 |
+
"flex": null,
|
700 |
+
"flex_flow": "column",
|
701 |
+
"grid_area": null,
|
702 |
+
"grid_auto_columns": null,
|
703 |
+
"grid_auto_flow": null,
|
704 |
+
"grid_auto_rows": null,
|
705 |
+
"grid_column": null,
|
706 |
+
"grid_gap": null,
|
707 |
+
"grid_row": null,
|
708 |
+
"grid_template_areas": null,
|
709 |
+
"grid_template_columns": null,
|
710 |
+
"grid_template_rows": null,
|
711 |
+
"height": null,
|
712 |
+
"justify_content": null,
|
713 |
+
"justify_items": null,
|
714 |
+
"left": null,
|
715 |
+
"margin": null,
|
716 |
+
"max_height": null,
|
717 |
+
"max_width": null,
|
718 |
+
"min_height": null,
|
719 |
+
"min_width": null,
|
720 |
+
"object_fit": null,
|
721 |
+
"object_position": null,
|
722 |
+
"order": null,
|
723 |
+
"overflow": null,
|
724 |
+
"overflow_x": null,
|
725 |
+
"overflow_y": null,
|
726 |
+
"padding": null,
|
727 |
+
"right": null,
|
728 |
+
"top": null,
|
729 |
+
"visibility": null,
|
730 |
+
"width": "50%"
|
731 |
+
}
|
732 |
+
},
|
733 |
+
"55c01e2c04d1499ca5b9b19dea7e4e02": {
|
734 |
+
"model_module": "@jupyter-widgets/base",
|
735 |
+
"model_name": "LayoutModel",
|
736 |
+
"model_module_version": "1.2.0",
|
737 |
+
"state": {
|
738 |
+
"_model_module": "@jupyter-widgets/base",
|
739 |
+
"_model_module_version": "1.2.0",
|
740 |
+
"_model_name": "LayoutModel",
|
741 |
+
"_view_count": null,
|
742 |
+
"_view_module": "@jupyter-widgets/base",
|
743 |
+
"_view_module_version": "1.2.0",
|
744 |
+
"_view_name": "LayoutView",
|
745 |
+
"align_content": null,
|
746 |
+
"align_items": null,
|
747 |
+
"align_self": null,
|
748 |
+
"border": null,
|
749 |
+
"bottom": null,
|
750 |
+
"display": null,
|
751 |
+
"flex": null,
|
752 |
+
"flex_flow": null,
|
753 |
+
"grid_area": null,
|
754 |
+
"grid_auto_columns": null,
|
755 |
+
"grid_auto_flow": null,
|
756 |
+
"grid_auto_rows": null,
|
757 |
+
"grid_column": null,
|
758 |
+
"grid_gap": null,
|
759 |
+
"grid_row": null,
|
760 |
+
"grid_template_areas": null,
|
761 |
+
"grid_template_columns": null,
|
762 |
+
"grid_template_rows": null,
|
763 |
+
"height": null,
|
764 |
+
"justify_content": null,
|
765 |
+
"justify_items": null,
|
766 |
+
"left": null,
|
767 |
+
"margin": null,
|
768 |
+
"max_height": null,
|
769 |
+
"max_width": null,
|
770 |
+
"min_height": null,
|
771 |
+
"min_width": null,
|
772 |
+
"object_fit": null,
|
773 |
+
"object_position": null,
|
774 |
+
"order": null,
|
775 |
+
"overflow": null,
|
776 |
+
"overflow_x": null,
|
777 |
+
"overflow_y": null,
|
778 |
+
"padding": null,
|
779 |
+
"right": null,
|
780 |
+
"top": null,
|
781 |
+
"visibility": null,
|
782 |
+
"width": null
|
783 |
+
}
|
784 |
+
},
|
785 |
+
"bf9da831d7ad4651a262c5e7f80bbf87": {
|
786 |
+
"model_module": "@jupyter-widgets/controls",
|
787 |
+
"model_name": "DescriptionStyleModel",
|
788 |
+
"model_module_version": "1.5.0",
|
789 |
+
"state": {
|
790 |
+
"_model_module": "@jupyter-widgets/controls",
|
791 |
+
"_model_module_version": "1.5.0",
|
792 |
+
"_model_name": "DescriptionStyleModel",
|
793 |
+
"_view_count": null,
|
794 |
+
"_view_module": "@jupyter-widgets/base",
|
795 |
+
"_view_module_version": "1.2.0",
|
796 |
+
"_view_name": "StyleView",
|
797 |
+
"description_width": ""
|
798 |
+
}
|
799 |
+
},
|
800 |
+
"ed2d3d1a700143d2a48e9a9b13bd1200": {
|
801 |
+
"model_module": "@jupyter-widgets/base",
|
802 |
+
"model_name": "LayoutModel",
|
803 |
+
"model_module_version": "1.2.0",
|
804 |
+
"state": {
|
805 |
+
"_model_module": "@jupyter-widgets/base",
|
806 |
+
"_model_module_version": "1.2.0",
|
807 |
+
"_model_name": "LayoutModel",
|
808 |
+
"_view_count": null,
|
809 |
+
"_view_module": "@jupyter-widgets/base",
|
810 |
+
"_view_module_version": "1.2.0",
|
811 |
+
"_view_name": "LayoutView",
|
812 |
+
"align_content": null,
|
813 |
+
"align_items": null,
|
814 |
+
"align_self": null,
|
815 |
+
"border": null,
|
816 |
+
"bottom": null,
|
817 |
+
"display": null,
|
818 |
+
"flex": null,
|
819 |
+
"flex_flow": null,
|
820 |
+
"grid_area": null,
|
821 |
+
"grid_auto_columns": null,
|
822 |
+
"grid_auto_flow": null,
|
823 |
+
"grid_auto_rows": null,
|
824 |
+
"grid_column": null,
|
825 |
+
"grid_gap": null,
|
826 |
+
"grid_row": null,
|
827 |
+
"grid_template_areas": null,
|
828 |
+
"grid_template_columns": null,
|
829 |
+
"grid_template_rows": null,
|
830 |
+
"height": null,
|
831 |
+
"justify_content": null,
|
832 |
+
"justify_items": null,
|
833 |
+
"left": null,
|
834 |
+
"margin": null,
|
835 |
+
"max_height": null,
|
836 |
+
"max_width": null,
|
837 |
+
"min_height": null,
|
838 |
+
"min_width": null,
|
839 |
+
"object_fit": null,
|
840 |
+
"object_position": null,
|
841 |
+
"order": null,
|
842 |
+
"overflow": null,
|
843 |
+
"overflow_x": null,
|
844 |
+
"overflow_y": null,
|
845 |
+
"padding": null,
|
846 |
+
"right": null,
|
847 |
+
"top": null,
|
848 |
+
"visibility": null,
|
849 |
+
"width": null
|
850 |
+
}
|
851 |
+
},
|
852 |
+
"40782cfc43a8437da5534feee03c6ba6": {
|
853 |
+
"model_module": "@jupyter-widgets/controls",
|
854 |
+
"model_name": "DescriptionStyleModel",
|
855 |
+
"model_module_version": "1.5.0",
|
856 |
+
"state": {
|
857 |
+
"_model_module": "@jupyter-widgets/controls",
|
858 |
+
"_model_module_version": "1.5.0",
|
859 |
+
"_model_name": "DescriptionStyleModel",
|
860 |
+
"_view_count": null,
|
861 |
+
"_view_module": "@jupyter-widgets/base",
|
862 |
+
"_view_module_version": "1.2.0",
|
863 |
+
"_view_name": "StyleView",
|
864 |
+
"description_width": ""
|
865 |
+
}
|
866 |
+
},
|
867 |
+
"b6fac3155dd140bc8e1b010270bc3cc2": {
|
868 |
+
"model_module": "@jupyter-widgets/base",
|
869 |
+
"model_name": "LayoutModel",
|
870 |
+
"model_module_version": "1.2.0",
|
871 |
+
"state": {
|
872 |
+
"_model_module": "@jupyter-widgets/base",
|
873 |
+
"_model_module_version": "1.2.0",
|
874 |
+
"_model_name": "LayoutModel",
|
875 |
+
"_view_count": null,
|
876 |
+
"_view_module": "@jupyter-widgets/base",
|
877 |
+
"_view_module_version": "1.2.0",
|
878 |
+
"_view_name": "LayoutView",
|
879 |
+
"align_content": null,
|
880 |
+
"align_items": null,
|
881 |
+
"align_self": null,
|
882 |
+
"border": null,
|
883 |
+
"bottom": null,
|
884 |
+
"display": null,
|
885 |
+
"flex": null,
|
886 |
+
"flex_flow": null,
|
887 |
+
"grid_area": null,
|
888 |
+
"grid_auto_columns": null,
|
889 |
+
"grid_auto_flow": null,
|
890 |
+
"grid_auto_rows": null,
|
891 |
+
"grid_column": null,
|
892 |
+
"grid_gap": null,
|
893 |
+
"grid_row": null,
|
894 |
+
"grid_template_areas": null,
|
895 |
+
"grid_template_columns": null,
|
896 |
+
"grid_template_rows": null,
|
897 |
+
"height": null,
|
898 |
+
"justify_content": null,
|
899 |
+
"justify_items": null,
|
900 |
+
"left": null,
|
901 |
+
"margin": null,
|
902 |
+
"max_height": null,
|
903 |
+
"max_width": null,
|
904 |
+
"min_height": null,
|
905 |
+
"min_width": null,
|
906 |
+
"object_fit": null,
|
907 |
+
"object_position": null,
|
908 |
+
"order": null,
|
909 |
+
"overflow": null,
|
910 |
+
"overflow_x": null,
|
911 |
+
"overflow_y": null,
|
912 |
+
"padding": null,
|
913 |
+
"right": null,
|
914 |
+
"top": null,
|
915 |
+
"visibility": null,
|
916 |
+
"width": null
|
917 |
+
}
|
918 |
+
},
|
919 |
+
"ca348c721475417582ed5018ed43151f": {
|
920 |
+
"model_module": "@jupyter-widgets/controls",
|
921 |
+
"model_name": "DescriptionStyleModel",
|
922 |
+
"model_module_version": "1.5.0",
|
923 |
+
"state": {
|
924 |
+
"_model_module": "@jupyter-widgets/controls",
|
925 |
+
"_model_module_version": "1.5.0",
|
926 |
+
"_model_name": "DescriptionStyleModel",
|
927 |
+
"_view_count": null,
|
928 |
+
"_view_module": "@jupyter-widgets/base",
|
929 |
+
"_view_module_version": "1.2.0",
|
930 |
+
"_view_name": "StyleView",
|
931 |
+
"description_width": ""
|
932 |
+
}
|
933 |
+
},
|
934 |
+
"3f07afac7c194db7a16167d177562a46": {
|
935 |
+
"model_module": "@jupyter-widgets/base",
|
936 |
+
"model_name": "LayoutModel",
|
937 |
+
"model_module_version": "1.2.0",
|
938 |
+
"state": {
|
939 |
+
"_model_module": "@jupyter-widgets/base",
|
940 |
+
"_model_module_version": "1.2.0",
|
941 |
+
"_model_name": "LayoutModel",
|
942 |
+
"_view_count": null,
|
943 |
+
"_view_module": "@jupyter-widgets/base",
|
944 |
+
"_view_module_version": "1.2.0",
|
945 |
+
"_view_name": "LayoutView",
|
946 |
+
"align_content": null,
|
947 |
+
"align_items": null,
|
948 |
+
"align_self": null,
|
949 |
+
"border": null,
|
950 |
+
"bottom": null,
|
951 |
+
"display": null,
|
952 |
+
"flex": null,
|
953 |
+
"flex_flow": null,
|
954 |
+
"grid_area": null,
|
955 |
+
"grid_auto_columns": null,
|
956 |
+
"grid_auto_flow": null,
|
957 |
+
"grid_auto_rows": null,
|
958 |
+
"grid_column": null,
|
959 |
+
"grid_gap": null,
|
960 |
+
"grid_row": null,
|
961 |
+
"grid_template_areas": null,
|
962 |
+
"grid_template_columns": null,
|
963 |
+
"grid_template_rows": null,
|
964 |
+
"height": null,
|
965 |
+
"justify_content": null,
|
966 |
+
"justify_items": null,
|
967 |
+
"left": null,
|
968 |
+
"margin": null,
|
969 |
+
"max_height": null,
|
970 |
+
"max_width": null,
|
971 |
+
"min_height": null,
|
972 |
+
"min_width": null,
|
973 |
+
"object_fit": null,
|
974 |
+
"object_position": null,
|
975 |
+
"order": null,
|
976 |
+
"overflow": null,
|
977 |
+
"overflow_x": null,
|
978 |
+
"overflow_y": null,
|
979 |
+
"padding": null,
|
980 |
+
"right": null,
|
981 |
+
"top": null,
|
982 |
+
"visibility": null,
|
983 |
+
"width": null
|
984 |
+
}
|
985 |
+
},
|
986 |
+
"5515d96f0c8947f0ad4b7f17eb7d63f6": {
|
987 |
+
"model_module": "@jupyter-widgets/controls",
|
988 |
+
"model_name": "ButtonStyleModel",
|
989 |
+
"model_module_version": "1.5.0",
|
990 |
+
"state": {
|
991 |
+
"_model_module": "@jupyter-widgets/controls",
|
992 |
+
"_model_module_version": "1.5.0",
|
993 |
+
"_model_name": "ButtonStyleModel",
|
994 |
+
"_view_count": null,
|
995 |
+
"_view_module": "@jupyter-widgets/base",
|
996 |
+
"_view_module_version": "1.2.0",
|
997 |
+
"_view_name": "StyleView",
|
998 |
+
"button_color": null,
|
999 |
+
"font_weight": ""
|
1000 |
+
}
|
1001 |
+
},
|
1002 |
+
"d703de12cf9d4f87aa6ec2cc52f1090a": {
|
1003 |
+
"model_module": "@jupyter-widgets/base",
|
1004 |
+
"model_name": "LayoutModel",
|
1005 |
+
"model_module_version": "1.2.0",
|
1006 |
+
"state": {
|
1007 |
+
"_model_module": "@jupyter-widgets/base",
|
1008 |
+
"_model_module_version": "1.2.0",
|
1009 |
+
"_model_name": "LayoutModel",
|
1010 |
+
"_view_count": null,
|
1011 |
+
"_view_module": "@jupyter-widgets/base",
|
1012 |
+
"_view_module_version": "1.2.0",
|
1013 |
+
"_view_name": "LayoutView",
|
1014 |
+
"align_content": null,
|
1015 |
+
"align_items": null,
|
1016 |
+
"align_self": null,
|
1017 |
+
"border": null,
|
1018 |
+
"bottom": null,
|
1019 |
+
"display": null,
|
1020 |
+
"flex": null,
|
1021 |
+
"flex_flow": null,
|
1022 |
+
"grid_area": null,
|
1023 |
+
"grid_auto_columns": null,
|
1024 |
+
"grid_auto_flow": null,
|
1025 |
+
"grid_auto_rows": null,
|
1026 |
+
"grid_column": null,
|
1027 |
+
"grid_gap": null,
|
1028 |
+
"grid_row": null,
|
1029 |
+
"grid_template_areas": null,
|
1030 |
+
"grid_template_columns": null,
|
1031 |
+
"grid_template_rows": null,
|
1032 |
+
"height": null,
|
1033 |
+
"justify_content": null,
|
1034 |
+
"justify_items": null,
|
1035 |
+
"left": null,
|
1036 |
+
"margin": null,
|
1037 |
+
"max_height": null,
|
1038 |
+
"max_width": null,
|
1039 |
+
"min_height": null,
|
1040 |
+
"min_width": null,
|
1041 |
+
"object_fit": null,
|
1042 |
+
"object_position": null,
|
1043 |
+
"order": null,
|
1044 |
+
"overflow": null,
|
1045 |
+
"overflow_x": null,
|
1046 |
+
"overflow_y": null,
|
1047 |
+
"padding": null,
|
1048 |
+
"right": null,
|
1049 |
+
"top": null,
|
1050 |
+
"visibility": null,
|
1051 |
+
"width": null
|
1052 |
+
}
|
1053 |
+
},
|
1054 |
+
"757bc788bd6842d28a9f889187ffb88e": {
|
1055 |
+
"model_module": "@jupyter-widgets/controls",
|
1056 |
+
"model_name": "DescriptionStyleModel",
|
1057 |
+
"model_module_version": "1.5.0",
|
1058 |
+
"state": {
|
1059 |
+
"_model_module": "@jupyter-widgets/controls",
|
1060 |
+
"_model_module_version": "1.5.0",
|
1061 |
+
"_model_name": "DescriptionStyleModel",
|
1062 |
+
"_view_count": null,
|
1063 |
+
"_view_module": "@jupyter-widgets/base",
|
1064 |
+
"_view_module_version": "1.2.0",
|
1065 |
+
"_view_name": "StyleView",
|
1066 |
+
"description_width": ""
|
1067 |
+
}
|
1068 |
+
},
|
1069 |
+
"65f10d2456cb4ee1963fac050e4c34f7": {
|
1070 |
+
"model_module": "@jupyter-widgets/controls",
|
1071 |
+
"model_name": "LabelModel",
|
1072 |
+
"model_module_version": "1.5.0",
|
1073 |
+
"state": {
|
1074 |
+
"_dom_classes": [],
|
1075 |
+
"_model_module": "@jupyter-widgets/controls",
|
1076 |
+
"_model_module_version": "1.5.0",
|
1077 |
+
"_model_name": "LabelModel",
|
1078 |
+
"_view_count": null,
|
1079 |
+
"_view_module": "@jupyter-widgets/controls",
|
1080 |
+
"_view_module_version": "1.5.0",
|
1081 |
+
"_view_name": "LabelView",
|
1082 |
+
"description": "",
|
1083 |
+
"description_tooltip": null,
|
1084 |
+
"layout": "IPY_MODEL_9335e48fe8ba4fe9b535b5ece1be6ff5",
|
1085 |
+
"placeholder": "",
|
1086 |
+
"style": "IPY_MODEL_80df5f3cd6c646808b09d99daed5bfd2",
|
1087 |
+
"value": "Connecting..."
|
1088 |
+
}
|
1089 |
+
},
|
1090 |
+
"9335e48fe8ba4fe9b535b5ece1be6ff5": {
|
1091 |
+
"model_module": "@jupyter-widgets/base",
|
1092 |
+
"model_name": "LayoutModel",
|
1093 |
+
"model_module_version": "1.2.0",
|
1094 |
+
"state": {
|
1095 |
+
"_model_module": "@jupyter-widgets/base",
|
1096 |
+
"_model_module_version": "1.2.0",
|
1097 |
+
"_model_name": "LayoutModel",
|
1098 |
+
"_view_count": null,
|
1099 |
+
"_view_module": "@jupyter-widgets/base",
|
1100 |
+
"_view_module_version": "1.2.0",
|
1101 |
+
"_view_name": "LayoutView",
|
1102 |
+
"align_content": null,
|
1103 |
+
"align_items": null,
|
1104 |
+
"align_self": null,
|
1105 |
+
"border": null,
|
1106 |
+
"bottom": null,
|
1107 |
+
"display": null,
|
1108 |
+
"flex": null,
|
1109 |
+
"flex_flow": null,
|
1110 |
+
"grid_area": null,
|
1111 |
+
"grid_auto_columns": null,
|
1112 |
+
"grid_auto_flow": null,
|
1113 |
+
"grid_auto_rows": null,
|
1114 |
+
"grid_column": null,
|
1115 |
+
"grid_gap": null,
|
1116 |
+
"grid_row": null,
|
1117 |
+
"grid_template_areas": null,
|
1118 |
+
"grid_template_columns": null,
|
1119 |
+
"grid_template_rows": null,
|
1120 |
+
"height": null,
|
1121 |
+
"justify_content": null,
|
1122 |
+
"justify_items": null,
|
1123 |
+
"left": null,
|
1124 |
+
"margin": null,
|
1125 |
+
"max_height": null,
|
1126 |
+
"max_width": null,
|
1127 |
+
"min_height": null,
|
1128 |
+
"min_width": null,
|
1129 |
+
"object_fit": null,
|
1130 |
+
"object_position": null,
|
1131 |
+
"order": null,
|
1132 |
+
"overflow": null,
|
1133 |
+
"overflow_x": null,
|
1134 |
+
"overflow_y": null,
|
1135 |
+
"padding": null,
|
1136 |
+
"right": null,
|
1137 |
+
"top": null,
|
1138 |
+
"visibility": null,
|
1139 |
+
"width": null
|
1140 |
+
}
|
1141 |
+
},
|
1142 |
+
"80df5f3cd6c646808b09d99daed5bfd2": {
|
1143 |
+
"model_module": "@jupyter-widgets/controls",
|
1144 |
+
"model_name": "DescriptionStyleModel",
|
1145 |
+
"model_module_version": "1.5.0",
|
1146 |
+
"state": {
|
1147 |
+
"_model_module": "@jupyter-widgets/controls",
|
1148 |
+
"_model_module_version": "1.5.0",
|
1149 |
+
"_model_name": "DescriptionStyleModel",
|
1150 |
+
"_view_count": null,
|
1151 |
+
"_view_module": "@jupyter-widgets/base",
|
1152 |
+
"_view_module_version": "1.2.0",
|
1153 |
+
"_view_name": "StyleView",
|
1154 |
+
"description_width": ""
|
1155 |
+
}
|
1156 |
+
},
|
1157 |
+
"8458933373264dbeb58d0b5ace4fd9c6": {
|
1158 |
+
"model_module": "@jupyter-widgets/controls",
|
1159 |
+
"model_name": "HBoxModel",
|
1160 |
+
"model_module_version": "1.5.0",
|
1161 |
+
"state": {
|
1162 |
+
"_dom_classes": [],
|
1163 |
+
"_model_module": "@jupyter-widgets/controls",
|
1164 |
+
"_model_module_version": "1.5.0",
|
1165 |
+
"_model_name": "HBoxModel",
|
1166 |
+
"_view_count": null,
|
1167 |
+
"_view_module": "@jupyter-widgets/controls",
|
1168 |
+
"_view_module_version": "1.5.0",
|
1169 |
+
"_view_name": "HBoxView",
|
1170 |
+
"box_style": "",
|
1171 |
+
"children": [
|
1172 |
+
"IPY_MODEL_714009484da745dc8a87e5066b939de2",
|
1173 |
+
"IPY_MODEL_e43e970ce8ba477e83081a4c7fea05f5",
|
1174 |
+
"IPY_MODEL_7138aa9537fc4b4f809e57665be87139"
|
1175 |
+
],
|
1176 |
+
"layout": "IPY_MODEL_46810cc7c7c54e31a65e609c386d86d9"
|
1177 |
+
}
|
1178 |
+
},
|
1179 |
+
"714009484da745dc8a87e5066b939de2": {
|
1180 |
+
"model_module": "@jupyter-widgets/controls",
|
1181 |
+
"model_name": "HTMLModel",
|
1182 |
+
"model_module_version": "1.5.0",
|
1183 |
+
"state": {
|
1184 |
+
"_dom_classes": [],
|
1185 |
+
"_model_module": "@jupyter-widgets/controls",
|
1186 |
+
"_model_module_version": "1.5.0",
|
1187 |
+
"_model_name": "HTMLModel",
|
1188 |
+
"_view_count": null,
|
1189 |
+
"_view_module": "@jupyter-widgets/controls",
|
1190 |
+
"_view_module_version": "1.5.0",
|
1191 |
+
"_view_name": "HTMLView",
|
1192 |
+
"description": "",
|
1193 |
+
"description_tooltip": null,
|
1194 |
+
"layout": "IPY_MODEL_cfed7deef0b74f4b9d160e9fdc2b138e",
|
1195 |
+
"placeholder": "",
|
1196 |
+
"style": "IPY_MODEL_23ddab24ac304751b3babfaeec9360eb",
|
1197 |
+
"value": "Loading checkpoint shards: 100%"
|
1198 |
+
}
|
1199 |
+
},
|
1200 |
+
"e43e970ce8ba477e83081a4c7fea05f5": {
|
1201 |
+
"model_module": "@jupyter-widgets/controls",
|
1202 |
+
"model_name": "FloatProgressModel",
|
1203 |
+
"model_module_version": "1.5.0",
|
1204 |
+
"state": {
|
1205 |
+
"_dom_classes": [],
|
1206 |
+
"_model_module": "@jupyter-widgets/controls",
|
1207 |
+
"_model_module_version": "1.5.0",
|
1208 |
+
"_model_name": "FloatProgressModel",
|
1209 |
+
"_view_count": null,
|
1210 |
+
"_view_module": "@jupyter-widgets/controls",
|
1211 |
+
"_view_module_version": "1.5.0",
|
1212 |
+
"_view_name": "ProgressView",
|
1213 |
+
"bar_style": "success",
|
1214 |
+
"description": "",
|
1215 |
+
"description_tooltip": null,
|
1216 |
+
"layout": "IPY_MODEL_79e87175ffb949bd8cddf4577210a42d",
|
1217 |
+
"max": 2,
|
1218 |
+
"min": 0,
|
1219 |
+
"orientation": "horizontal",
|
1220 |
+
"style": "IPY_MODEL_5aed84a20ac34f2b943d26d66decc88f",
|
1221 |
+
"value": 2
|
1222 |
+
}
|
1223 |
+
},
|
1224 |
+
"7138aa9537fc4b4f809e57665be87139": {
|
1225 |
+
"model_module": "@jupyter-widgets/controls",
|
1226 |
+
"model_name": "HTMLModel",
|
1227 |
+
"model_module_version": "1.5.0",
|
1228 |
+
"state": {
|
1229 |
+
"_dom_classes": [],
|
1230 |
+
"_model_module": "@jupyter-widgets/controls",
|
1231 |
+
"_model_module_version": "1.5.0",
|
1232 |
+
"_model_name": "HTMLModel",
|
1233 |
+
"_view_count": null,
|
1234 |
+
"_view_module": "@jupyter-widgets/controls",
|
1235 |
+
"_view_module_version": "1.5.0",
|
1236 |
+
"_view_name": "HTMLView",
|
1237 |
+
"description": "",
|
1238 |
+
"description_tooltip": null,
|
1239 |
+
"layout": "IPY_MODEL_3ca0e1427ac6477c9921929af7ff00d1",
|
1240 |
+
"placeholder": "",
|
1241 |
+
"style": "IPY_MODEL_a9a5503caf384b93bf987e5271a577d2",
|
1242 |
+
"value": " 2/2 [00:00<00:00, 2.83it/s]"
|
1243 |
+
}
|
1244 |
+
},
|
1245 |
+
"46810cc7c7c54e31a65e609c386d86d9": {
|
1246 |
+
"model_module": "@jupyter-widgets/base",
|
1247 |
+
"model_name": "LayoutModel",
|
1248 |
+
"model_module_version": "1.2.0",
|
1249 |
+
"state": {
|
1250 |
+
"_model_module": "@jupyter-widgets/base",
|
1251 |
+
"_model_module_version": "1.2.0",
|
1252 |
+
"_model_name": "LayoutModel",
|
1253 |
+
"_view_count": null,
|
1254 |
+
"_view_module": "@jupyter-widgets/base",
|
1255 |
+
"_view_module_version": "1.2.0",
|
1256 |
+
"_view_name": "LayoutView",
|
1257 |
+
"align_content": null,
|
1258 |
+
"align_items": null,
|
1259 |
+
"align_self": null,
|
1260 |
+
"border": null,
|
1261 |
+
"bottom": null,
|
1262 |
+
"display": null,
|
1263 |
+
"flex": null,
|
1264 |
+
"flex_flow": null,
|
1265 |
+
"grid_area": null,
|
1266 |
+
"grid_auto_columns": null,
|
1267 |
+
"grid_auto_flow": null,
|
1268 |
+
"grid_auto_rows": null,
|
1269 |
+
"grid_column": null,
|
1270 |
+
"grid_gap": null,
|
1271 |
+
"grid_row": null,
|
1272 |
+
"grid_template_areas": null,
|
1273 |
+
"grid_template_columns": null,
|
1274 |
+
"grid_template_rows": null,
|
1275 |
+
"height": null,
|
1276 |
+
"justify_content": null,
|
1277 |
+
"justify_items": null,
|
1278 |
+
"left": null,
|
1279 |
+
"margin": null,
|
1280 |
+
"max_height": null,
|
1281 |
+
"max_width": null,
|
1282 |
+
"min_height": null,
|
1283 |
+
"min_width": null,
|
1284 |
+
"object_fit": null,
|
1285 |
+
"object_position": null,
|
1286 |
+
"order": null,
|
1287 |
+
"overflow": null,
|
1288 |
+
"overflow_x": null,
|
1289 |
+
"overflow_y": null,
|
1290 |
+
"padding": null,
|
1291 |
+
"right": null,
|
1292 |
+
"top": null,
|
1293 |
+
"visibility": null,
|
1294 |
+
"width": null
|
1295 |
+
}
|
1296 |
+
},
|
1297 |
+
"cfed7deef0b74f4b9d160e9fdc2b138e": {
|
1298 |
+
"model_module": "@jupyter-widgets/base",
|
1299 |
+
"model_name": "LayoutModel",
|
1300 |
+
"model_module_version": "1.2.0",
|
1301 |
+
"state": {
|
1302 |
+
"_model_module": "@jupyter-widgets/base",
|
1303 |
+
"_model_module_version": "1.2.0",
|
1304 |
+
"_model_name": "LayoutModel",
|
1305 |
+
"_view_count": null,
|
1306 |
+
"_view_module": "@jupyter-widgets/base",
|
1307 |
+
"_view_module_version": "1.2.0",
|
1308 |
+
"_view_name": "LayoutView",
|
1309 |
+
"align_content": null,
|
1310 |
+
"align_items": null,
|
1311 |
+
"align_self": null,
|
1312 |
+
"border": null,
|
1313 |
+
"bottom": null,
|
1314 |
+
"display": null,
|
1315 |
+
"flex": null,
|
1316 |
+
"flex_flow": null,
|
1317 |
+
"grid_area": null,
|
1318 |
+
"grid_auto_columns": null,
|
1319 |
+
"grid_auto_flow": null,
|
1320 |
+
"grid_auto_rows": null,
|
1321 |
+
"grid_column": null,
|
1322 |
+
"grid_gap": null,
|
1323 |
+
"grid_row": null,
|
1324 |
+
"grid_template_areas": null,
|
1325 |
+
"grid_template_columns": null,
|
1326 |
+
"grid_template_rows": null,
|
1327 |
+
"height": null,
|
1328 |
+
"justify_content": null,
|
1329 |
+
"justify_items": null,
|
1330 |
+
"left": null,
|
1331 |
+
"margin": null,
|
1332 |
+
"max_height": null,
|
1333 |
+
"max_width": null,
|
1334 |
+
"min_height": null,
|
1335 |
+
"min_width": null,
|
1336 |
+
"object_fit": null,
|
1337 |
+
"object_position": null,
|
1338 |
+
"order": null,
|
1339 |
+
"overflow": null,
|
1340 |
+
"overflow_x": null,
|
1341 |
+
"overflow_y": null,
|
1342 |
+
"padding": null,
|
1343 |
+
"right": null,
|
1344 |
+
"top": null,
|
1345 |
+
"visibility": null,
|
1346 |
+
"width": null
|
1347 |
+
}
|
1348 |
+
},
|
1349 |
+
"23ddab24ac304751b3babfaeec9360eb": {
|
1350 |
+
"model_module": "@jupyter-widgets/controls",
|
1351 |
+
"model_name": "DescriptionStyleModel",
|
1352 |
+
"model_module_version": "1.5.0",
|
1353 |
+
"state": {
|
1354 |
+
"_model_module": "@jupyter-widgets/controls",
|
1355 |
+
"_model_module_version": "1.5.0",
|
1356 |
+
"_model_name": "DescriptionStyleModel",
|
1357 |
+
"_view_count": null,
|
1358 |
+
"_view_module": "@jupyter-widgets/base",
|
1359 |
+
"_view_module_version": "1.2.0",
|
1360 |
+
"_view_name": "StyleView",
|
1361 |
+
"description_width": ""
|
1362 |
+
}
|
1363 |
+
},
|
1364 |
+
"79e87175ffb949bd8cddf4577210a42d": {
|
1365 |
+
"model_module": "@jupyter-widgets/base",
|
1366 |
+
"model_name": "LayoutModel",
|
1367 |
+
"model_module_version": "1.2.0",
|
1368 |
+
"state": {
|
1369 |
+
"_model_module": "@jupyter-widgets/base",
|
1370 |
+
"_model_module_version": "1.2.0",
|
1371 |
+
"_model_name": "LayoutModel",
|
1372 |
+
"_view_count": null,
|
1373 |
+
"_view_module": "@jupyter-widgets/base",
|
1374 |
+
"_view_module_version": "1.2.0",
|
1375 |
+
"_view_name": "LayoutView",
|
1376 |
+
"align_content": null,
|
1377 |
+
"align_items": null,
|
1378 |
+
"align_self": null,
|
1379 |
+
"border": null,
|
1380 |
+
"bottom": null,
|
1381 |
+
"display": null,
|
1382 |
+
"flex": null,
|
1383 |
+
"flex_flow": null,
|
1384 |
+
"grid_area": null,
|
1385 |
+
"grid_auto_columns": null,
|
1386 |
+
"grid_auto_flow": null,
|
1387 |
+
"grid_auto_rows": null,
|
1388 |
+
"grid_column": null,
|
1389 |
+
"grid_gap": null,
|
1390 |
+
"grid_row": null,
|
1391 |
+
"grid_template_areas": null,
|
1392 |
+
"grid_template_columns": null,
|
1393 |
+
"grid_template_rows": null,
|
1394 |
+
"height": null,
|
1395 |
+
"justify_content": null,
|
1396 |
+
"justify_items": null,
|
1397 |
+
"left": null,
|
1398 |
+
"margin": null,
|
1399 |
+
"max_height": null,
|
1400 |
+
"max_width": null,
|
1401 |
+
"min_height": null,
|
1402 |
+
"min_width": null,
|
1403 |
+
"object_fit": null,
|
1404 |
+
"object_position": null,
|
1405 |
+
"order": null,
|
1406 |
+
"overflow": null,
|
1407 |
+
"overflow_x": null,
|
1408 |
+
"overflow_y": null,
|
1409 |
+
"padding": null,
|
1410 |
+
"right": null,
|
1411 |
+
"top": null,
|
1412 |
+
"visibility": null,
|
1413 |
+
"width": null
|
1414 |
+
}
|
1415 |
+
},
|
1416 |
+
"5aed84a20ac34f2b943d26d66decc88f": {
|
1417 |
+
"model_module": "@jupyter-widgets/controls",
|
1418 |
+
"model_name": "ProgressStyleModel",
|
1419 |
+
"model_module_version": "1.5.0",
|
1420 |
+
"state": {
|
1421 |
+
"_model_module": "@jupyter-widgets/controls",
|
1422 |
+
"_model_module_version": "1.5.0",
|
1423 |
+
"_model_name": "ProgressStyleModel",
|
1424 |
+
"_view_count": null,
|
1425 |
+
"_view_module": "@jupyter-widgets/base",
|
1426 |
+
"_view_module_version": "1.2.0",
|
1427 |
+
"_view_name": "StyleView",
|
1428 |
+
"bar_color": null,
|
1429 |
+
"description_width": ""
|
1430 |
+
}
|
1431 |
+
},
|
1432 |
+
"3ca0e1427ac6477c9921929af7ff00d1": {
|
1433 |
+
"model_module": "@jupyter-widgets/base",
|
1434 |
+
"model_name": "LayoutModel",
|
1435 |
+
"model_module_version": "1.2.0",
|
1436 |
+
"state": {
|
1437 |
+
"_model_module": "@jupyter-widgets/base",
|
1438 |
+
"_model_module_version": "1.2.0",
|
1439 |
+
"_model_name": "LayoutModel",
|
1440 |
+
"_view_count": null,
|
1441 |
+
"_view_module": "@jupyter-widgets/base",
|
1442 |
+
"_view_module_version": "1.2.0",
|
1443 |
+
"_view_name": "LayoutView",
|
1444 |
+
"align_content": null,
|
1445 |
+
"align_items": null,
|
1446 |
+
"align_self": null,
|
1447 |
+
"border": null,
|
1448 |
+
"bottom": null,
|
1449 |
+
"display": null,
|
1450 |
+
"flex": null,
|
1451 |
+
"flex_flow": null,
|
1452 |
+
"grid_area": null,
|
1453 |
+
"grid_auto_columns": null,
|
1454 |
+
"grid_auto_flow": null,
|
1455 |
+
"grid_auto_rows": null,
|
1456 |
+
"grid_column": null,
|
1457 |
+
"grid_gap": null,
|
1458 |
+
"grid_row": null,
|
1459 |
+
"grid_template_areas": null,
|
1460 |
+
"grid_template_columns": null,
|
1461 |
+
"grid_template_rows": null,
|
1462 |
+
"height": null,
|
1463 |
+
"justify_content": null,
|
1464 |
+
"justify_items": null,
|
1465 |
+
"left": null,
|
1466 |
+
"margin": null,
|
1467 |
+
"max_height": null,
|
1468 |
+
"max_width": null,
|
1469 |
+
"min_height": null,
|
1470 |
+
"min_width": null,
|
1471 |
+
"object_fit": null,
|
1472 |
+
"object_position": null,
|
1473 |
+
"order": null,
|
1474 |
+
"overflow": null,
|
1475 |
+
"overflow_x": null,
|
1476 |
+
"overflow_y": null,
|
1477 |
+
"padding": null,
|
1478 |
+
"right": null,
|
1479 |
+
"top": null,
|
1480 |
+
"visibility": null,
|
1481 |
+
"width": null
|
1482 |
+
}
|
1483 |
+
},
|
1484 |
+
"a9a5503caf384b93bf987e5271a577d2": {
|
1485 |
+
"model_module": "@jupyter-widgets/controls",
|
1486 |
+
"model_name": "DescriptionStyleModel",
|
1487 |
+
"model_module_version": "1.5.0",
|
1488 |
+
"state": {
|
1489 |
+
"_model_module": "@jupyter-widgets/controls",
|
1490 |
+
"_model_module_version": "1.5.0",
|
1491 |
+
"_model_name": "DescriptionStyleModel",
|
1492 |
+
"_view_count": null,
|
1493 |
+
"_view_module": "@jupyter-widgets/base",
|
1494 |
+
"_view_module_version": "1.2.0",
|
1495 |
+
"_view_name": "StyleView",
|
1496 |
+
"description_width": ""
|
1497 |
+
}
|
1498 |
+
},
|
1499 |
+
"c68f0fe7a6bb4060afcb05e3f6422288": {
|
1500 |
+
"model_module": "@jupyter-widgets/controls",
|
1501 |
+
"model_name": "HBoxModel",
|
1502 |
+
"model_module_version": "1.5.0",
|
1503 |
+
"state": {
|
1504 |
+
"_dom_classes": [],
|
1505 |
+
"_model_module": "@jupyter-widgets/controls",
|
1506 |
+
"_model_module_version": "1.5.0",
|
1507 |
+
"_model_name": "HBoxModel",
|
1508 |
+
"_view_count": null,
|
1509 |
+
"_view_module": "@jupyter-widgets/controls",
|
1510 |
+
"_view_module_version": "1.5.0",
|
1511 |
+
"_view_name": "HBoxView",
|
1512 |
+
"box_style": "",
|
1513 |
+
"children": [
|
1514 |
+
"IPY_MODEL_fef3c94897fc4ffa86f91aac7a45ac7f",
|
1515 |
+
"IPY_MODEL_92881d2e3f1a438b92a389cc6022f7ad",
|
1516 |
+
"IPY_MODEL_f518ab021bc648f188638fd168879edd"
|
1517 |
+
],
|
1518 |
+
"layout": "IPY_MODEL_1a29c71234d74f08b2645f9383fee126"
|
1519 |
+
}
|
1520 |
+
},
|
1521 |
+
"fef3c94897fc4ffa86f91aac7a45ac7f": {
|
1522 |
+
"model_module": "@jupyter-widgets/controls",
|
1523 |
+
"model_name": "HTMLModel",
|
1524 |
+
"model_module_version": "1.5.0",
|
1525 |
+
"state": {
|
1526 |
+
"_dom_classes": [],
|
1527 |
+
"_model_module": "@jupyter-widgets/controls",
|
1528 |
+
"_model_module_version": "1.5.0",
|
1529 |
+
"_model_name": "HTMLModel",
|
1530 |
+
"_view_count": null,
|
1531 |
+
"_view_module": "@jupyter-widgets/controls",
|
1532 |
+
"_view_module_version": "1.5.0",
|
1533 |
+
"_view_name": "HTMLView",
|
1534 |
+
"description": "",
|
1535 |
+
"description_tooltip": null,
|
1536 |
+
"layout": "IPY_MODEL_f8553ec713ea440eb0208a1012547988",
|
1537 |
+
"placeholder": "",
|
1538 |
+
"style": "IPY_MODEL_25e0373512b747ba8ebe020b8b8ab932",
|
1539 |
+
"value": "Loading checkpoint shards: 100%"
|
1540 |
+
}
|
1541 |
+
},
|
1542 |
+
"92881d2e3f1a438b92a389cc6022f7ad": {
|
1543 |
+
"model_module": "@jupyter-widgets/controls",
|
1544 |
+
"model_name": "FloatProgressModel",
|
1545 |
+
"model_module_version": "1.5.0",
|
1546 |
+
"state": {
|
1547 |
+
"_dom_classes": [],
|
1548 |
+
"_model_module": "@jupyter-widgets/controls",
|
1549 |
+
"_model_module_version": "1.5.0",
|
1550 |
+
"_model_name": "FloatProgressModel",
|
1551 |
+
"_view_count": null,
|
1552 |
+
"_view_module": "@jupyter-widgets/controls",
|
1553 |
+
"_view_module_version": "1.5.0",
|
1554 |
+
"_view_name": "ProgressView",
|
1555 |
+
"bar_style": "success",
|
1556 |
+
"description": "",
|
1557 |
+
"description_tooltip": null,
|
1558 |
+
"layout": "IPY_MODEL_daff4ba27c68441395aa5377111f30f1",
|
1559 |
+
"max": 2,
|
1560 |
+
"min": 0,
|
1561 |
+
"orientation": "horizontal",
|
1562 |
+
"style": "IPY_MODEL_863090b3318e4e0186bd46d3d1479de4",
|
1563 |
+
"value": 2
|
1564 |
+
}
|
1565 |
+
},
|
1566 |
+
"f518ab021bc648f188638fd168879edd": {
|
1567 |
+
"model_module": "@jupyter-widgets/controls",
|
1568 |
+
"model_name": "HTMLModel",
|
1569 |
+
"model_module_version": "1.5.0",
|
1570 |
+
"state": {
|
1571 |
+
"_dom_classes": [],
|
1572 |
+
"_model_module": "@jupyter-widgets/controls",
|
1573 |
+
"_model_module_version": "1.5.0",
|
1574 |
+
"_model_name": "HTMLModel",
|
1575 |
+
"_view_count": null,
|
1576 |
+
"_view_module": "@jupyter-widgets/controls",
|
1577 |
+
"_view_module_version": "1.5.0",
|
1578 |
+
"_view_name": "HTMLView",
|
1579 |
+
"description": "",
|
1580 |
+
"description_tooltip": null,
|
1581 |
+
"layout": "IPY_MODEL_acae1751ff5d4293bb588c2d9c7ab851",
|
1582 |
+
"placeholder": "",
|
1583 |
+
"style": "IPY_MODEL_8859eb8d9c154cb79a302db1568768fa",
|
1584 |
+
"value": " 2/2 [00:05<00:00, 2.39s/it]"
|
1585 |
+
}
|
1586 |
+
},
|
1587 |
+
"1a29c71234d74f08b2645f9383fee126": {
|
1588 |
+
"model_module": "@jupyter-widgets/base",
|
1589 |
+
"model_name": "LayoutModel",
|
1590 |
+
"model_module_version": "1.2.0",
|
1591 |
+
"state": {
|
1592 |
+
"_model_module": "@jupyter-widgets/base",
|
1593 |
+
"_model_module_version": "1.2.0",
|
1594 |
+
"_model_name": "LayoutModel",
|
1595 |
+
"_view_count": null,
|
1596 |
+
"_view_module": "@jupyter-widgets/base",
|
1597 |
+
"_view_module_version": "1.2.0",
|
1598 |
+
"_view_name": "LayoutView",
|
1599 |
+
"align_content": null,
|
1600 |
+
"align_items": null,
|
1601 |
+
"align_self": null,
|
1602 |
+
"border": null,
|
1603 |
+
"bottom": null,
|
1604 |
+
"display": null,
|
1605 |
+
"flex": null,
|
1606 |
+
"flex_flow": null,
|
1607 |
+
"grid_area": null,
|
1608 |
+
"grid_auto_columns": null,
|
1609 |
+
"grid_auto_flow": null,
|
1610 |
+
"grid_auto_rows": null,
|
1611 |
+
"grid_column": null,
|
1612 |
+
"grid_gap": null,
|
1613 |
+
"grid_row": null,
|
1614 |
+
"grid_template_areas": null,
|
1615 |
+
"grid_template_columns": null,
|
1616 |
+
"grid_template_rows": null,
|
1617 |
+
"height": null,
|
1618 |
+
"justify_content": null,
|
1619 |
+
"justify_items": null,
|
1620 |
+
"left": null,
|
1621 |
+
"margin": null,
|
1622 |
+
"max_height": null,
|
1623 |
+
"max_width": null,
|
1624 |
+
"min_height": null,
|
1625 |
+
"min_width": null,
|
1626 |
+
"object_fit": null,
|
1627 |
+
"object_position": null,
|
1628 |
+
"order": null,
|
1629 |
+
"overflow": null,
|
1630 |
+
"overflow_x": null,
|
1631 |
+
"overflow_y": null,
|
1632 |
+
"padding": null,
|
1633 |
+
"right": null,
|
1634 |
+
"top": null,
|
1635 |
+
"visibility": null,
|
1636 |
+
"width": null
|
1637 |
+
}
|
1638 |
+
},
|
1639 |
+
"f8553ec713ea440eb0208a1012547988": {
|
1640 |
+
"model_module": "@jupyter-widgets/base",
|
1641 |
+
"model_name": "LayoutModel",
|
1642 |
+
"model_module_version": "1.2.0",
|
1643 |
+
"state": {
|
1644 |
+
"_model_module": "@jupyter-widgets/base",
|
1645 |
+
"_model_module_version": "1.2.0",
|
1646 |
+
"_model_name": "LayoutModel",
|
1647 |
+
"_view_count": null,
|
1648 |
+
"_view_module": "@jupyter-widgets/base",
|
1649 |
+
"_view_module_version": "1.2.0",
|
1650 |
+
"_view_name": "LayoutView",
|
1651 |
+
"align_content": null,
|
1652 |
+
"align_items": null,
|
1653 |
+
"align_self": null,
|
1654 |
+
"border": null,
|
1655 |
+
"bottom": null,
|
1656 |
+
"display": null,
|
1657 |
+
"flex": null,
|
1658 |
+
"flex_flow": null,
|
1659 |
+
"grid_area": null,
|
1660 |
+
"grid_auto_columns": null,
|
1661 |
+
"grid_auto_flow": null,
|
1662 |
+
"grid_auto_rows": null,
|
1663 |
+
"grid_column": null,
|
1664 |
+
"grid_gap": null,
|
1665 |
+
"grid_row": null,
|
1666 |
+
"grid_template_areas": null,
|
1667 |
+
"grid_template_columns": null,
|
1668 |
+
"grid_template_rows": null,
|
1669 |
+
"height": null,
|
1670 |
+
"justify_content": null,
|
1671 |
+
"justify_items": null,
|
1672 |
+
"left": null,
|
1673 |
+
"margin": null,
|
1674 |
+
"max_height": null,
|
1675 |
+
"max_width": null,
|
1676 |
+
"min_height": null,
|
1677 |
+
"min_width": null,
|
1678 |
+
"object_fit": null,
|
1679 |
+
"object_position": null,
|
1680 |
+
"order": null,
|
1681 |
+
"overflow": null,
|
1682 |
+
"overflow_x": null,
|
1683 |
+
"overflow_y": null,
|
1684 |
+
"padding": null,
|
1685 |
+
"right": null,
|
1686 |
+
"top": null,
|
1687 |
+
"visibility": null,
|
1688 |
+
"width": null
|
1689 |
+
}
|
1690 |
+
},
|
1691 |
+
"25e0373512b747ba8ebe020b8b8ab932": {
|
1692 |
+
"model_module": "@jupyter-widgets/controls",
|
1693 |
+
"model_name": "DescriptionStyleModel",
|
1694 |
+
"model_module_version": "1.5.0",
|
1695 |
+
"state": {
|
1696 |
+
"_model_module": "@jupyter-widgets/controls",
|
1697 |
+
"_model_module_version": "1.5.0",
|
1698 |
+
"_model_name": "DescriptionStyleModel",
|
1699 |
+
"_view_count": null,
|
1700 |
+
"_view_module": "@jupyter-widgets/base",
|
1701 |
+
"_view_module_version": "1.2.0",
|
1702 |
+
"_view_name": "StyleView",
|
1703 |
+
"description_width": ""
|
1704 |
+
}
|
1705 |
+
},
|
1706 |
+
"daff4ba27c68441395aa5377111f30f1": {
|
1707 |
+
"model_module": "@jupyter-widgets/base",
|
1708 |
+
"model_name": "LayoutModel",
|
1709 |
+
"model_module_version": "1.2.0",
|
1710 |
+
"state": {
|
1711 |
+
"_model_module": "@jupyter-widgets/base",
|
1712 |
+
"_model_module_version": "1.2.0",
|
1713 |
+
"_model_name": "LayoutModel",
|
1714 |
+
"_view_count": null,
|
1715 |
+
"_view_module": "@jupyter-widgets/base",
|
1716 |
+
"_view_module_version": "1.2.0",
|
1717 |
+
"_view_name": "LayoutView",
|
1718 |
+
"align_content": null,
|
1719 |
+
"align_items": null,
|
1720 |
+
"align_self": null,
|
1721 |
+
"border": null,
|
1722 |
+
"bottom": null,
|
1723 |
+
"display": null,
|
1724 |
+
"flex": null,
|
1725 |
+
"flex_flow": null,
|
1726 |
+
"grid_area": null,
|
1727 |
+
"grid_auto_columns": null,
|
1728 |
+
"grid_auto_flow": null,
|
1729 |
+
"grid_auto_rows": null,
|
1730 |
+
"grid_column": null,
|
1731 |
+
"grid_gap": null,
|
1732 |
+
"grid_row": null,
|
1733 |
+
"grid_template_areas": null,
|
1734 |
+
"grid_template_columns": null,
|
1735 |
+
"grid_template_rows": null,
|
1736 |
+
"height": null,
|
1737 |
+
"justify_content": null,
|
1738 |
+
"justify_items": null,
|
1739 |
+
"left": null,
|
1740 |
+
"margin": null,
|
1741 |
+
"max_height": null,
|
1742 |
+
"max_width": null,
|
1743 |
+
"min_height": null,
|
1744 |
+
"min_width": null,
|
1745 |
+
"object_fit": null,
|
1746 |
+
"object_position": null,
|
1747 |
+
"order": null,
|
1748 |
+
"overflow": null,
|
1749 |
+
"overflow_x": null,
|
1750 |
+
"overflow_y": null,
|
1751 |
+
"padding": null,
|
1752 |
+
"right": null,
|
1753 |
+
"top": null,
|
1754 |
+
"visibility": null,
|
1755 |
+
"width": null
|
1756 |
+
}
|
1757 |
+
},
|
1758 |
+
"863090b3318e4e0186bd46d3d1479de4": {
|
1759 |
+
"model_module": "@jupyter-widgets/controls",
|
1760 |
+
"model_name": "ProgressStyleModel",
|
1761 |
+
"model_module_version": "1.5.0",
|
1762 |
+
"state": {
|
1763 |
+
"_model_module": "@jupyter-widgets/controls",
|
1764 |
+
"_model_module_version": "1.5.0",
|
1765 |
+
"_model_name": "ProgressStyleModel",
|
1766 |
+
"_view_count": null,
|
1767 |
+
"_view_module": "@jupyter-widgets/base",
|
1768 |
+
"_view_module_version": "1.2.0",
|
1769 |
+
"_view_name": "StyleView",
|
1770 |
+
"bar_color": null,
|
1771 |
+
"description_width": ""
|
1772 |
+
}
|
1773 |
+
},
|
1774 |
+
"acae1751ff5d4293bb588c2d9c7ab851": {
|
1775 |
+
"model_module": "@jupyter-widgets/base",
|
1776 |
+
"model_name": "LayoutModel",
|
1777 |
+
"model_module_version": "1.2.0",
|
1778 |
+
"state": {
|
1779 |
+
"_model_module": "@jupyter-widgets/base",
|
1780 |
+
"_model_module_version": "1.2.0",
|
1781 |
+
"_model_name": "LayoutModel",
|
1782 |
+
"_view_count": null,
|
1783 |
+
"_view_module": "@jupyter-widgets/base",
|
1784 |
+
"_view_module_version": "1.2.0",
|
1785 |
+
"_view_name": "LayoutView",
|
1786 |
+
"align_content": null,
|
1787 |
+
"align_items": null,
|
1788 |
+
"align_self": null,
|
1789 |
+
"border": null,
|
1790 |
+
"bottom": null,
|
1791 |
+
"display": null,
|
1792 |
+
"flex": null,
|
1793 |
+
"flex_flow": null,
|
1794 |
+
"grid_area": null,
|
1795 |
+
"grid_auto_columns": null,
|
1796 |
+
"grid_auto_flow": null,
|
1797 |
+
"grid_auto_rows": null,
|
1798 |
+
"grid_column": null,
|
1799 |
+
"grid_gap": null,
|
1800 |
+
"grid_row": null,
|
1801 |
+
"grid_template_areas": null,
|
1802 |
+
"grid_template_columns": null,
|
1803 |
+
"grid_template_rows": null,
|
1804 |
+
"height": null,
|
1805 |
+
"justify_content": null,
|
1806 |
+
"justify_items": null,
|
1807 |
+
"left": null,
|
1808 |
+
"margin": null,
|
1809 |
+
"max_height": null,
|
1810 |
+
"max_width": null,
|
1811 |
+
"min_height": null,
|
1812 |
+
"min_width": null,
|
1813 |
+
"object_fit": null,
|
1814 |
+
"object_position": null,
|
1815 |
+
"order": null,
|
1816 |
+
"overflow": null,
|
1817 |
+
"overflow_x": null,
|
1818 |
+
"overflow_y": null,
|
1819 |
+
"padding": null,
|
1820 |
+
"right": null,
|
1821 |
+
"top": null,
|
1822 |
+
"visibility": null,
|
1823 |
+
"width": null
|
1824 |
+
}
|
1825 |
+
},
|
1826 |
+
"8859eb8d9c154cb79a302db1568768fa": {
|
1827 |
+
"model_module": "@jupyter-widgets/controls",
|
1828 |
+
"model_name": "DescriptionStyleModel",
|
1829 |
+
"model_module_version": "1.5.0",
|
1830 |
+
"state": {
|
1831 |
+
"_model_module": "@jupyter-widgets/controls",
|
1832 |
+
"_model_module_version": "1.5.0",
|
1833 |
+
"_model_name": "DescriptionStyleModel",
|
1834 |
+
"_view_count": null,
|
1835 |
+
"_view_module": "@jupyter-widgets/base",
|
1836 |
+
"_view_module_version": "1.2.0",
|
1837 |
+
"_view_name": "StyleView",
|
1838 |
+
"description_width": ""
|
1839 |
+
}
|
1840 |
+
}
|
1841 |
+
}
|
1842 |
+
}
|
1843 |
+
},
|
1844 |
+
"nbformat": 4,
|
1845 |
+
"nbformat_minor": 0
|
1846 |
+
}
|
Fine_tune_SmolVLM2_on_Video.ipynb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
Finetune_ColPali.ipynb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
Fit_in_vision_models_using_quanto.ipynb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
Gemma_3_for_Video_Understanding.ipynb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
Gemma_3n_Video_Vibe_Tests.ipynb
ADDED
@@ -0,0 +1,1489 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"nbformat": 4,
|
3 |
+
"nbformat_minor": 0,
|
4 |
+
"metadata": {
|
5 |
+
"colab": {
|
6 |
+
"provenance": [],
|
7 |
+
"machine_shape": "hm",
|
8 |
+
"gpuType": "A100",
|
9 |
+
"include_colab_link": true
|
10 |
+
},
|
11 |
+
"kernelspec": {
|
12 |
+
"name": "python3",
|
13 |
+
"display_name": "Python 3"
|
14 |
+
},
|
15 |
+
"language_info": {
|
16 |
+
"name": "python"
|
17 |
+
},
|
18 |
+
"accelerator": "GPU",
|
19 |
+
"widgets": {
|
20 |
+
"application/vnd.jupyter.widget-state+json": {
|
21 |
+
"542490f74e974451bc44009a6fa174bd": {
|
22 |
+
"model_module": "@jupyter-widgets/controls",
|
23 |
+
"model_name": "VBoxModel",
|
24 |
+
"model_module_version": "1.5.0",
|
25 |
+
"state": {
|
26 |
+
"_dom_classes": [],
|
27 |
+
"_model_module": "@jupyter-widgets/controls",
|
28 |
+
"_model_module_version": "1.5.0",
|
29 |
+
"_model_name": "VBoxModel",
|
30 |
+
"_view_count": null,
|
31 |
+
"_view_module": "@jupyter-widgets/controls",
|
32 |
+
"_view_module_version": "1.5.0",
|
33 |
+
"_view_name": "VBoxView",
|
34 |
+
"box_style": "",
|
35 |
+
"children": [],
|
36 |
+
"layout": "IPY_MODEL_8d0e5abdd7c549f1a66ee198c9fa1430"
|
37 |
+
}
|
38 |
+
},
|
39 |
+
"409f985be1134b468b81136fbdb54408": {
|
40 |
+
"model_module": "@jupyter-widgets/controls",
|
41 |
+
"model_name": "HTMLModel",
|
42 |
+
"model_module_version": "1.5.0",
|
43 |
+
"state": {
|
44 |
+
"_dom_classes": [],
|
45 |
+
"_model_module": "@jupyter-widgets/controls",
|
46 |
+
"_model_module_version": "1.5.0",
|
47 |
+
"_model_name": "HTMLModel",
|
48 |
+
"_view_count": null,
|
49 |
+
"_view_module": "@jupyter-widgets/controls",
|
50 |
+
"_view_module_version": "1.5.0",
|
51 |
+
"_view_name": "HTMLView",
|
52 |
+
"description": "",
|
53 |
+
"description_tooltip": null,
|
54 |
+
"layout": "IPY_MODEL_c72dd3d6a4c246cfa6590c314783c8f0",
|
55 |
+
"placeholder": "",
|
56 |
+
"style": "IPY_MODEL_c0e471e664dd41eab98efe08301ef5e1",
|
57 |
+
"value": "<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.svg\nalt='Hugging Face'> <br> Copy a token from <a\nhref=\"https://huggingface.co/settings/tokens\" target=\"_blank\">your Hugging Face\ntokens page</a> and paste it below. <br> Immediately click login after copying\nyour token or it might be stored in plain text in this notebook file. </center>"
|
58 |
+
}
|
59 |
+
},
|
60 |
+
"57cb1e931c614980a4147cb125524d7d": {
|
61 |
+
"model_module": "@jupyter-widgets/controls",
|
62 |
+
"model_name": "PasswordModel",
|
63 |
+
"model_module_version": "1.5.0",
|
64 |
+
"state": {
|
65 |
+
"_dom_classes": [],
|
66 |
+
"_model_module": "@jupyter-widgets/controls",
|
67 |
+
"_model_module_version": "1.5.0",
|
68 |
+
"_model_name": "PasswordModel",
|
69 |
+
"_view_count": null,
|
70 |
+
"_view_module": "@jupyter-widgets/controls",
|
71 |
+
"_view_module_version": "1.5.0",
|
72 |
+
"_view_name": "PasswordView",
|
73 |
+
"continuous_update": true,
|
74 |
+
"description": "Token:",
|
75 |
+
"description_tooltip": null,
|
76 |
+
"disabled": false,
|
77 |
+
"layout": "IPY_MODEL_868f63ea9455442d837dc2c422918800",
|
78 |
+
"placeholder": "",
|
79 |
+
"style": "IPY_MODEL_5b7b4707b1bf4159a10bf7e289bde435",
|
80 |
+
"value": ""
|
81 |
+
}
|
82 |
+
},
|
83 |
+
"87dc7aaf52e349a7bb43bb1b8bc137ee": {
|
84 |
+
"model_module": "@jupyter-widgets/controls",
|
85 |
+
"model_name": "CheckboxModel",
|
86 |
+
"model_module_version": "1.5.0",
|
87 |
+
"state": {
|
88 |
+
"_dom_classes": [],
|
89 |
+
"_model_module": "@jupyter-widgets/controls",
|
90 |
+
"_model_module_version": "1.5.0",
|
91 |
+
"_model_name": "CheckboxModel",
|
92 |
+
"_view_count": null,
|
93 |
+
"_view_module": "@jupyter-widgets/controls",
|
94 |
+
"_view_module_version": "1.5.0",
|
95 |
+
"_view_name": "CheckboxView",
|
96 |
+
"description": "Add token as git credential?",
|
97 |
+
"description_tooltip": null,
|
98 |
+
"disabled": false,
|
99 |
+
"indent": true,
|
100 |
+
"layout": "IPY_MODEL_889d0d1ed24e4de2b89896511d008e60",
|
101 |
+
"style": "IPY_MODEL_68fc757825dd44a48ab2383db20958db",
|
102 |
+
"value": true
|
103 |
+
}
|
104 |
+
},
|
105 |
+
"983ed4cb4eea42daa9ae8c0417021a21": {
|
106 |
+
"model_module": "@jupyter-widgets/controls",
|
107 |
+
"model_name": "ButtonModel",
|
108 |
+
"model_module_version": "1.5.0",
|
109 |
+
"state": {
|
110 |
+
"_dom_classes": [],
|
111 |
+
"_model_module": "@jupyter-widgets/controls",
|
112 |
+
"_model_module_version": "1.5.0",
|
113 |
+
"_model_name": "ButtonModel",
|
114 |
+
"_view_count": null,
|
115 |
+
"_view_module": "@jupyter-widgets/controls",
|
116 |
+
"_view_module_version": "1.5.0",
|
117 |
+
"_view_name": "ButtonView",
|
118 |
+
"button_style": "",
|
119 |
+
"description": "Login",
|
120 |
+
"disabled": false,
|
121 |
+
"icon": "",
|
122 |
+
"layout": "IPY_MODEL_cb76f933e6e640d9a688f7838e5fb0b3",
|
123 |
+
"style": "IPY_MODEL_8704264bff4d46c9813ac9acf92da962",
|
124 |
+
"tooltip": ""
|
125 |
+
}
|
126 |
+
},
|
127 |
+
"40c381fd7bb04b43a879044a4e988cc6": {
|
128 |
+
"model_module": "@jupyter-widgets/controls",
|
129 |
+
"model_name": "HTMLModel",
|
130 |
+
"model_module_version": "1.5.0",
|
131 |
+
"state": {
|
132 |
+
"_dom_classes": [],
|
133 |
+
"_model_module": "@jupyter-widgets/controls",
|
134 |
+
"_model_module_version": "1.5.0",
|
135 |
+
"_model_name": "HTMLModel",
|
136 |
+
"_view_count": null,
|
137 |
+
"_view_module": "@jupyter-widgets/controls",
|
138 |
+
"_view_module_version": "1.5.0",
|
139 |
+
"_view_name": "HTMLView",
|
140 |
+
"description": "",
|
141 |
+
"description_tooltip": null,
|
142 |
+
"layout": "IPY_MODEL_9b5d87960dde401baeaf8b6144fb8bad",
|
143 |
+
"placeholder": "",
|
144 |
+
"style": "IPY_MODEL_76e06881e5e94197a24944e07fdf3189",
|
145 |
+
"value": "\n<b>Pro Tip:</b> If you don't already have one, you can create a dedicated\n'notebooks' token with 'write' access, that you can then easily reuse for all\nnotebooks. </center>"
|
146 |
+
}
|
147 |
+
},
|
148 |
+
"8d0e5abdd7c549f1a66ee198c9fa1430": {
|
149 |
+
"model_module": "@jupyter-widgets/base",
|
150 |
+
"model_name": "LayoutModel",
|
151 |
+
"model_module_version": "1.2.0",
|
152 |
+
"state": {
|
153 |
+
"_model_module": "@jupyter-widgets/base",
|
154 |
+
"_model_module_version": "1.2.0",
|
155 |
+
"_model_name": "LayoutModel",
|
156 |
+
"_view_count": null,
|
157 |
+
"_view_module": "@jupyter-widgets/base",
|
158 |
+
"_view_module_version": "1.2.0",
|
159 |
+
"_view_name": "LayoutView",
|
160 |
+
"align_content": null,
|
161 |
+
"align_items": "center",
|
162 |
+
"align_self": null,
|
163 |
+
"border": null,
|
164 |
+
"bottom": null,
|
165 |
+
"display": "flex",
|
166 |
+
"flex": null,
|
167 |
+
"flex_flow": "column",
|
168 |
+
"grid_area": null,
|
169 |
+
"grid_auto_columns": null,
|
170 |
+
"grid_auto_flow": null,
|
171 |
+
"grid_auto_rows": null,
|
172 |
+
"grid_column": null,
|
173 |
+
"grid_gap": null,
|
174 |
+
"grid_row": null,
|
175 |
+
"grid_template_areas": null,
|
176 |
+
"grid_template_columns": null,
|
177 |
+
"grid_template_rows": null,
|
178 |
+
"height": null,
|
179 |
+
"justify_content": null,
|
180 |
+
"justify_items": null,
|
181 |
+
"left": null,
|
182 |
+
"margin": null,
|
183 |
+
"max_height": null,
|
184 |
+
"max_width": null,
|
185 |
+
"min_height": null,
|
186 |
+
"min_width": null,
|
187 |
+
"object_fit": null,
|
188 |
+
"object_position": null,
|
189 |
+
"order": null,
|
190 |
+
"overflow": null,
|
191 |
+
"overflow_x": null,
|
192 |
+
"overflow_y": null,
|
193 |
+
"padding": null,
|
194 |
+
"right": null,
|
195 |
+
"top": null,
|
196 |
+
"visibility": null,
|
197 |
+
"width": "50%"
|
198 |
+
}
|
199 |
+
},
|
200 |
+
"c72dd3d6a4c246cfa6590c314783c8f0": {
|
201 |
+
"model_module": "@jupyter-widgets/base",
|
202 |
+
"model_name": "LayoutModel",
|
203 |
+
"model_module_version": "1.2.0",
|
204 |
+
"state": {
|
205 |
+
"_model_module": "@jupyter-widgets/base",
|
206 |
+
"_model_module_version": "1.2.0",
|
207 |
+
"_model_name": "LayoutModel",
|
208 |
+
"_view_count": null,
|
209 |
+
"_view_module": "@jupyter-widgets/base",
|
210 |
+
"_view_module_version": "1.2.0",
|
211 |
+
"_view_name": "LayoutView",
|
212 |
+
"align_content": null,
|
213 |
+
"align_items": null,
|
214 |
+
"align_self": null,
|
215 |
+
"border": null,
|
216 |
+
"bottom": null,
|
217 |
+
"display": null,
|
218 |
+
"flex": null,
|
219 |
+
"flex_flow": null,
|
220 |
+
"grid_area": null,
|
221 |
+
"grid_auto_columns": null,
|
222 |
+
"grid_auto_flow": null,
|
223 |
+
"grid_auto_rows": null,
|
224 |
+
"grid_column": null,
|
225 |
+
"grid_gap": null,
|
226 |
+
"grid_row": null,
|
227 |
+
"grid_template_areas": null,
|
228 |
+
"grid_template_columns": null,
|
229 |
+
"grid_template_rows": null,
|
230 |
+
"height": null,
|
231 |
+
"justify_content": null,
|
232 |
+
"justify_items": null,
|
233 |
+
"left": null,
|
234 |
+
"margin": null,
|
235 |
+
"max_height": null,
|
236 |
+
"max_width": null,
|
237 |
+
"min_height": null,
|
238 |
+
"min_width": null,
|
239 |
+
"object_fit": null,
|
240 |
+
"object_position": null,
|
241 |
+
"order": null,
|
242 |
+
"overflow": null,
|
243 |
+
"overflow_x": null,
|
244 |
+
"overflow_y": null,
|
245 |
+
"padding": null,
|
246 |
+
"right": null,
|
247 |
+
"top": null,
|
248 |
+
"visibility": null,
|
249 |
+
"width": null
|
250 |
+
}
|
251 |
+
},
|
252 |
+
"c0e471e664dd41eab98efe08301ef5e1": {
|
253 |
+
"model_module": "@jupyter-widgets/controls",
|
254 |
+
"model_name": "DescriptionStyleModel",
|
255 |
+
"model_module_version": "1.5.0",
|
256 |
+
"state": {
|
257 |
+
"_model_module": "@jupyter-widgets/controls",
|
258 |
+
"_model_module_version": "1.5.0",
|
259 |
+
"_model_name": "DescriptionStyleModel",
|
260 |
+
"_view_count": null,
|
261 |
+
"_view_module": "@jupyter-widgets/base",
|
262 |
+
"_view_module_version": "1.2.0",
|
263 |
+
"_view_name": "StyleView",
|
264 |
+
"description_width": ""
|
265 |
+
}
|
266 |
+
},
|
267 |
+
"868f63ea9455442d837dc2c422918800": {
|
268 |
+
"model_module": "@jupyter-widgets/base",
|
269 |
+
"model_name": "LayoutModel",
|
270 |
+
"model_module_version": "1.2.0",
|
271 |
+
"state": {
|
272 |
+
"_model_module": "@jupyter-widgets/base",
|
273 |
+
"_model_module_version": "1.2.0",
|
274 |
+
"_model_name": "LayoutModel",
|
275 |
+
"_view_count": null,
|
276 |
+
"_view_module": "@jupyter-widgets/base",
|
277 |
+
"_view_module_version": "1.2.0",
|
278 |
+
"_view_name": "LayoutView",
|
279 |
+
"align_content": null,
|
280 |
+
"align_items": null,
|
281 |
+
"align_self": null,
|
282 |
+
"border": null,
|
283 |
+
"bottom": null,
|
284 |
+
"display": null,
|
285 |
+
"flex": null,
|
286 |
+
"flex_flow": null,
|
287 |
+
"grid_area": null,
|
288 |
+
"grid_auto_columns": null,
|
289 |
+
"grid_auto_flow": null,
|
290 |
+
"grid_auto_rows": null,
|
291 |
+
"grid_column": null,
|
292 |
+
"grid_gap": null,
|
293 |
+
"grid_row": null,
|
294 |
+
"grid_template_areas": null,
|
295 |
+
"grid_template_columns": null,
|
296 |
+
"grid_template_rows": null,
|
297 |
+
"height": null,
|
298 |
+
"justify_content": null,
|
299 |
+
"justify_items": null,
|
300 |
+
"left": null,
|
301 |
+
"margin": null,
|
302 |
+
"max_height": null,
|
303 |
+
"max_width": null,
|
304 |
+
"min_height": null,
|
305 |
+
"min_width": null,
|
306 |
+
"object_fit": null,
|
307 |
+
"object_position": null,
|
308 |
+
"order": null,
|
309 |
+
"overflow": null,
|
310 |
+
"overflow_x": null,
|
311 |
+
"overflow_y": null,
|
312 |
+
"padding": null,
|
313 |
+
"right": null,
|
314 |
+
"top": null,
|
315 |
+
"visibility": null,
|
316 |
+
"width": null
|
317 |
+
}
|
318 |
+
},
|
319 |
+
"5b7b4707b1bf4159a10bf7e289bde435": {
|
320 |
+
"model_module": "@jupyter-widgets/controls",
|
321 |
+
"model_name": "DescriptionStyleModel",
|
322 |
+
"model_module_version": "1.5.0",
|
323 |
+
"state": {
|
324 |
+
"_model_module": "@jupyter-widgets/controls",
|
325 |
+
"_model_module_version": "1.5.0",
|
326 |
+
"_model_name": "DescriptionStyleModel",
|
327 |
+
"_view_count": null,
|
328 |
+
"_view_module": "@jupyter-widgets/base",
|
329 |
+
"_view_module_version": "1.2.0",
|
330 |
+
"_view_name": "StyleView",
|
331 |
+
"description_width": ""
|
332 |
+
}
|
333 |
+
},
|
334 |
+
"889d0d1ed24e4de2b89896511d008e60": {
|
335 |
+
"model_module": "@jupyter-widgets/base",
|
336 |
+
"model_name": "LayoutModel",
|
337 |
+
"model_module_version": "1.2.0",
|
338 |
+
"state": {
|
339 |
+
"_model_module": "@jupyter-widgets/base",
|
340 |
+
"_model_module_version": "1.2.0",
|
341 |
+
"_model_name": "LayoutModel",
|
342 |
+
"_view_count": null,
|
343 |
+
"_view_module": "@jupyter-widgets/base",
|
344 |
+
"_view_module_version": "1.2.0",
|
345 |
+
"_view_name": "LayoutView",
|
346 |
+
"align_content": null,
|
347 |
+
"align_items": null,
|
348 |
+
"align_self": null,
|
349 |
+
"border": null,
|
350 |
+
"bottom": null,
|
351 |
+
"display": null,
|
352 |
+
"flex": null,
|
353 |
+
"flex_flow": null,
|
354 |
+
"grid_area": null,
|
355 |
+
"grid_auto_columns": null,
|
356 |
+
"grid_auto_flow": null,
|
357 |
+
"grid_auto_rows": null,
|
358 |
+
"grid_column": null,
|
359 |
+
"grid_gap": null,
|
360 |
+
"grid_row": null,
|
361 |
+
"grid_template_areas": null,
|
362 |
+
"grid_template_columns": null,
|
363 |
+
"grid_template_rows": null,
|
364 |
+
"height": null,
|
365 |
+
"justify_content": null,
|
366 |
+
"justify_items": null,
|
367 |
+
"left": null,
|
368 |
+
"margin": null,
|
369 |
+
"max_height": null,
|
370 |
+
"max_width": null,
|
371 |
+
"min_height": null,
|
372 |
+
"min_width": null,
|
373 |
+
"object_fit": null,
|
374 |
+
"object_position": null,
|
375 |
+
"order": null,
|
376 |
+
"overflow": null,
|
377 |
+
"overflow_x": null,
|
378 |
+
"overflow_y": null,
|
379 |
+
"padding": null,
|
380 |
+
"right": null,
|
381 |
+
"top": null,
|
382 |
+
"visibility": null,
|
383 |
+
"width": null
|
384 |
+
}
|
385 |
+
},
|
386 |
+
"68fc757825dd44a48ab2383db20958db": {
|
387 |
+
"model_module": "@jupyter-widgets/controls",
|
388 |
+
"model_name": "DescriptionStyleModel",
|
389 |
+
"model_module_version": "1.5.0",
|
390 |
+
"state": {
|
391 |
+
"_model_module": "@jupyter-widgets/controls",
|
392 |
+
"_model_module_version": "1.5.0",
|
393 |
+
"_model_name": "DescriptionStyleModel",
|
394 |
+
"_view_count": null,
|
395 |
+
"_view_module": "@jupyter-widgets/base",
|
396 |
+
"_view_module_version": "1.2.0",
|
397 |
+
"_view_name": "StyleView",
|
398 |
+
"description_width": ""
|
399 |
+
}
|
400 |
+
},
|
401 |
+
"cb76f933e6e640d9a688f7838e5fb0b3": {
|
402 |
+
"model_module": "@jupyter-widgets/base",
|
403 |
+
"model_name": "LayoutModel",
|
404 |
+
"model_module_version": "1.2.0",
|
405 |
+
"state": {
|
406 |
+
"_model_module": "@jupyter-widgets/base",
|
407 |
+
"_model_module_version": "1.2.0",
|
408 |
+
"_model_name": "LayoutModel",
|
409 |
+
"_view_count": null,
|
410 |
+
"_view_module": "@jupyter-widgets/base",
|
411 |
+
"_view_module_version": "1.2.0",
|
412 |
+
"_view_name": "LayoutView",
|
413 |
+
"align_content": null,
|
414 |
+
"align_items": null,
|
415 |
+
"align_self": null,
|
416 |
+
"border": null,
|
417 |
+
"bottom": null,
|
418 |
+
"display": null,
|
419 |
+
"flex": null,
|
420 |
+
"flex_flow": null,
|
421 |
+
"grid_area": null,
|
422 |
+
"grid_auto_columns": null,
|
423 |
+
"grid_auto_flow": null,
|
424 |
+
"grid_auto_rows": null,
|
425 |
+
"grid_column": null,
|
426 |
+
"grid_gap": null,
|
427 |
+
"grid_row": null,
|
428 |
+
"grid_template_areas": null,
|
429 |
+
"grid_template_columns": null,
|
430 |
+
"grid_template_rows": null,
|
431 |
+
"height": null,
|
432 |
+
"justify_content": null,
|
433 |
+
"justify_items": null,
|
434 |
+
"left": null,
|
435 |
+
"margin": null,
|
436 |
+
"max_height": null,
|
437 |
+
"max_width": null,
|
438 |
+
"min_height": null,
|
439 |
+
"min_width": null,
|
440 |
+
"object_fit": null,
|
441 |
+
"object_position": null,
|
442 |
+
"order": null,
|
443 |
+
"overflow": null,
|
444 |
+
"overflow_x": null,
|
445 |
+
"overflow_y": null,
|
446 |
+
"padding": null,
|
447 |
+
"right": null,
|
448 |
+
"top": null,
|
449 |
+
"visibility": null,
|
450 |
+
"width": null
|
451 |
+
}
|
452 |
+
},
|
453 |
+
"8704264bff4d46c9813ac9acf92da962": {
|
454 |
+
"model_module": "@jupyter-widgets/controls",
|
455 |
+
"model_name": "ButtonStyleModel",
|
456 |
+
"model_module_version": "1.5.0",
|
457 |
+
"state": {
|
458 |
+
"_model_module": "@jupyter-widgets/controls",
|
459 |
+
"_model_module_version": "1.5.0",
|
460 |
+
"_model_name": "ButtonStyleModel",
|
461 |
+
"_view_count": null,
|
462 |
+
"_view_module": "@jupyter-widgets/base",
|
463 |
+
"_view_module_version": "1.2.0",
|
464 |
+
"_view_name": "StyleView",
|
465 |
+
"button_color": null,
|
466 |
+
"font_weight": ""
|
467 |
+
}
|
468 |
+
},
|
469 |
+
"9b5d87960dde401baeaf8b6144fb8bad": {
|
470 |
+
"model_module": "@jupyter-widgets/base",
|
471 |
+
"model_name": "LayoutModel",
|
472 |
+
"model_module_version": "1.2.0",
|
473 |
+
"state": {
|
474 |
+
"_model_module": "@jupyter-widgets/base",
|
475 |
+
"_model_module_version": "1.2.0",
|
476 |
+
"_model_name": "LayoutModel",
|
477 |
+
"_view_count": null,
|
478 |
+
"_view_module": "@jupyter-widgets/base",
|
479 |
+
"_view_module_version": "1.2.0",
|
480 |
+
"_view_name": "LayoutView",
|
481 |
+
"align_content": null,
|
482 |
+
"align_items": null,
|
483 |
+
"align_self": null,
|
484 |
+
"border": null,
|
485 |
+
"bottom": null,
|
486 |
+
"display": null,
|
487 |
+
"flex": null,
|
488 |
+
"flex_flow": null,
|
489 |
+
"grid_area": null,
|
490 |
+
"grid_auto_columns": null,
|
491 |
+
"grid_auto_flow": null,
|
492 |
+
"grid_auto_rows": null,
|
493 |
+
"grid_column": null,
|
494 |
+
"grid_gap": null,
|
495 |
+
"grid_row": null,
|
496 |
+
"grid_template_areas": null,
|
497 |
+
"grid_template_columns": null,
|
498 |
+
"grid_template_rows": null,
|
499 |
+
"height": null,
|
500 |
+
"justify_content": null,
|
501 |
+
"justify_items": null,
|
502 |
+
"left": null,
|
503 |
+
"margin": null,
|
504 |
+
"max_height": null,
|
505 |
+
"max_width": null,
|
506 |
+
"min_height": null,
|
507 |
+
"min_width": null,
|
508 |
+
"object_fit": null,
|
509 |
+
"object_position": null,
|
510 |
+
"order": null,
|
511 |
+
"overflow": null,
|
512 |
+
"overflow_x": null,
|
513 |
+
"overflow_y": null,
|
514 |
+
"padding": null,
|
515 |
+
"right": null,
|
516 |
+
"top": null,
|
517 |
+
"visibility": null,
|
518 |
+
"width": null
|
519 |
+
}
|
520 |
+
},
|
521 |
+
"76e06881e5e94197a24944e07fdf3189": {
|
522 |
+
"model_module": "@jupyter-widgets/controls",
|
523 |
+
"model_name": "DescriptionStyleModel",
|
524 |
+
"model_module_version": "1.5.0",
|
525 |
+
"state": {
|
526 |
+
"_model_module": "@jupyter-widgets/controls",
|
527 |
+
"_model_module_version": "1.5.0",
|
528 |
+
"_model_name": "DescriptionStyleModel",
|
529 |
+
"_view_count": null,
|
530 |
+
"_view_module": "@jupyter-widgets/base",
|
531 |
+
"_view_module_version": "1.2.0",
|
532 |
+
"_view_name": "StyleView",
|
533 |
+
"description_width": ""
|
534 |
+
}
|
535 |
+
},
|
536 |
+
"f40dd696acc64c6284c6f8f485f3ce9d": {
|
537 |
+
"model_module": "@jupyter-widgets/controls",
|
538 |
+
"model_name": "LabelModel",
|
539 |
+
"model_module_version": "1.5.0",
|
540 |
+
"state": {
|
541 |
+
"_dom_classes": [],
|
542 |
+
"_model_module": "@jupyter-widgets/controls",
|
543 |
+
"_model_module_version": "1.5.0",
|
544 |
+
"_model_name": "LabelModel",
|
545 |
+
"_view_count": null,
|
546 |
+
"_view_module": "@jupyter-widgets/controls",
|
547 |
+
"_view_module_version": "1.5.0",
|
548 |
+
"_view_name": "LabelView",
|
549 |
+
"description": "",
|
550 |
+
"description_tooltip": null,
|
551 |
+
"layout": "IPY_MODEL_4488de26dce74cbbb39d99ae09bd21fa",
|
552 |
+
"placeholder": "",
|
553 |
+
"style": "IPY_MODEL_ded62e6c032745ec88ca0ab694b0d397",
|
554 |
+
"value": "Connecting..."
|
555 |
+
}
|
556 |
+
},
|
557 |
+
"4488de26dce74cbbb39d99ae09bd21fa": {
|
558 |
+
"model_module": "@jupyter-widgets/base",
|
559 |
+
"model_name": "LayoutModel",
|
560 |
+
"model_module_version": "1.2.0",
|
561 |
+
"state": {
|
562 |
+
"_model_module": "@jupyter-widgets/base",
|
563 |
+
"_model_module_version": "1.2.0",
|
564 |
+
"_model_name": "LayoutModel",
|
565 |
+
"_view_count": null,
|
566 |
+
"_view_module": "@jupyter-widgets/base",
|
567 |
+
"_view_module_version": "1.2.0",
|
568 |
+
"_view_name": "LayoutView",
|
569 |
+
"align_content": null,
|
570 |
+
"align_items": null,
|
571 |
+
"align_self": null,
|
572 |
+
"border": null,
|
573 |
+
"bottom": null,
|
574 |
+
"display": null,
|
575 |
+
"flex": null,
|
576 |
+
"flex_flow": null,
|
577 |
+
"grid_area": null,
|
578 |
+
"grid_auto_columns": null,
|
579 |
+
"grid_auto_flow": null,
|
580 |
+
"grid_auto_rows": null,
|
581 |
+
"grid_column": null,
|
582 |
+
"grid_gap": null,
|
583 |
+
"grid_row": null,
|
584 |
+
"grid_template_areas": null,
|
585 |
+
"grid_template_columns": null,
|
586 |
+
"grid_template_rows": null,
|
587 |
+
"height": null,
|
588 |
+
"justify_content": null,
|
589 |
+
"justify_items": null,
|
590 |
+
"left": null,
|
591 |
+
"margin": null,
|
592 |
+
"max_height": null,
|
593 |
+
"max_width": null,
|
594 |
+
"min_height": null,
|
595 |
+
"min_width": null,
|
596 |
+
"object_fit": null,
|
597 |
+
"object_position": null,
|
598 |
+
"order": null,
|
599 |
+
"overflow": null,
|
600 |
+
"overflow_x": null,
|
601 |
+
"overflow_y": null,
|
602 |
+
"padding": null,
|
603 |
+
"right": null,
|
604 |
+
"top": null,
|
605 |
+
"visibility": null,
|
606 |
+
"width": null
|
607 |
+
}
|
608 |
+
},
|
609 |
+
"ded62e6c032745ec88ca0ab694b0d397": {
|
610 |
+
"model_module": "@jupyter-widgets/controls",
|
611 |
+
"model_name": "DescriptionStyleModel",
|
612 |
+
"model_module_version": "1.5.0",
|
613 |
+
"state": {
|
614 |
+
"_model_module": "@jupyter-widgets/controls",
|
615 |
+
"_model_module_version": "1.5.0",
|
616 |
+
"_model_name": "DescriptionStyleModel",
|
617 |
+
"_view_count": null,
|
618 |
+
"_view_module": "@jupyter-widgets/base",
|
619 |
+
"_view_module_version": "1.2.0",
|
620 |
+
"_view_name": "StyleView",
|
621 |
+
"description_width": ""
|
622 |
+
}
|
623 |
+
},
|
624 |
+
"be523e956910487ca263d943a7a58395": {
|
625 |
+
"model_module": "@jupyter-widgets/controls",
|
626 |
+
"model_name": "HBoxModel",
|
627 |
+
"model_module_version": "1.5.0",
|
628 |
+
"state": {
|
629 |
+
"_dom_classes": [],
|
630 |
+
"_model_module": "@jupyter-widgets/controls",
|
631 |
+
"_model_module_version": "1.5.0",
|
632 |
+
"_model_name": "HBoxModel",
|
633 |
+
"_view_count": null,
|
634 |
+
"_view_module": "@jupyter-widgets/controls",
|
635 |
+
"_view_module_version": "1.5.0",
|
636 |
+
"_view_name": "HBoxView",
|
637 |
+
"box_style": "",
|
638 |
+
"children": [
|
639 |
+
"IPY_MODEL_01dc23faab3d42cda41fdfdd2a7dfed5",
|
640 |
+
"IPY_MODEL_777d7addfb144fd8896b77a1e0d54f25",
|
641 |
+
"IPY_MODEL_c518268069244b21810e84380502c190"
|
642 |
+
],
|
643 |
+
"layout": "IPY_MODEL_fee72c1c455549b59092028b855a082a"
|
644 |
+
}
|
645 |
+
},
|
646 |
+
"01dc23faab3d42cda41fdfdd2a7dfed5": {
|
647 |
+
"model_module": "@jupyter-widgets/controls",
|
648 |
+
"model_name": "HTMLModel",
|
649 |
+
"model_module_version": "1.5.0",
|
650 |
+
"state": {
|
651 |
+
"_dom_classes": [],
|
652 |
+
"_model_module": "@jupyter-widgets/controls",
|
653 |
+
"_model_module_version": "1.5.0",
|
654 |
+
"_model_name": "HTMLModel",
|
655 |
+
"_view_count": null,
|
656 |
+
"_view_module": "@jupyter-widgets/controls",
|
657 |
+
"_view_module_version": "1.5.0",
|
658 |
+
"_view_name": "HTMLView",
|
659 |
+
"description": "",
|
660 |
+
"description_tooltip": null,
|
661 |
+
"layout": "IPY_MODEL_ed0fa93199b94fb486c125d4f322d59f",
|
662 |
+
"placeholder": "",
|
663 |
+
"style": "IPY_MODEL_66f82e7ef3694c699e3d4a2bd826392b",
|
664 |
+
"value": "Loading checkpoint shards: 100%"
|
665 |
+
}
|
666 |
+
},
|
667 |
+
"777d7addfb144fd8896b77a1e0d54f25": {
|
668 |
+
"model_module": "@jupyter-widgets/controls",
|
669 |
+
"model_name": "FloatProgressModel",
|
670 |
+
"model_module_version": "1.5.0",
|
671 |
+
"state": {
|
672 |
+
"_dom_classes": [],
|
673 |
+
"_model_module": "@jupyter-widgets/controls",
|
674 |
+
"_model_module_version": "1.5.0",
|
675 |
+
"_model_name": "FloatProgressModel",
|
676 |
+
"_view_count": null,
|
677 |
+
"_view_module": "@jupyter-widgets/controls",
|
678 |
+
"_view_module_version": "1.5.0",
|
679 |
+
"_view_name": "ProgressView",
|
680 |
+
"bar_style": "success",
|
681 |
+
"description": "",
|
682 |
+
"description_tooltip": null,
|
683 |
+
"layout": "IPY_MODEL_2bfd51e3ae954008ae83704c24dbd6cb",
|
684 |
+
"max": 4,
|
685 |
+
"min": 0,
|
686 |
+
"orientation": "horizontal",
|
687 |
+
"style": "IPY_MODEL_f8b84d8c06384680973ef6fe787b5a5d",
|
688 |
+
"value": 4
|
689 |
+
}
|
690 |
+
},
|
691 |
+
"c518268069244b21810e84380502c190": {
|
692 |
+
"model_module": "@jupyter-widgets/controls",
|
693 |
+
"model_name": "HTMLModel",
|
694 |
+
"model_module_version": "1.5.0",
|
695 |
+
"state": {
|
696 |
+
"_dom_classes": [],
|
697 |
+
"_model_module": "@jupyter-widgets/controls",
|
698 |
+
"_model_module_version": "1.5.0",
|
699 |
+
"_model_name": "HTMLModel",
|
700 |
+
"_view_count": null,
|
701 |
+
"_view_module": "@jupyter-widgets/controls",
|
702 |
+
"_view_module_version": "1.5.0",
|
703 |
+
"_view_name": "HTMLView",
|
704 |
+
"description": "",
|
705 |
+
"description_tooltip": null,
|
706 |
+
"layout": "IPY_MODEL_770341dc116148a8b7571cce3a2f2baf",
|
707 |
+
"placeholder": "",
|
708 |
+
"style": "IPY_MODEL_29416122cc0b4a5592668ddced7686ba",
|
709 |
+
"value": " 4/4 [00:00<00:00, 5.03it/s]"
|
710 |
+
}
|
711 |
+
},
|
712 |
+
"fee72c1c455549b59092028b855a082a": {
|
713 |
+
"model_module": "@jupyter-widgets/base",
|
714 |
+
"model_name": "LayoutModel",
|
715 |
+
"model_module_version": "1.2.0",
|
716 |
+
"state": {
|
717 |
+
"_model_module": "@jupyter-widgets/base",
|
718 |
+
"_model_module_version": "1.2.0",
|
719 |
+
"_model_name": "LayoutModel",
|
720 |
+
"_view_count": null,
|
721 |
+
"_view_module": "@jupyter-widgets/base",
|
722 |
+
"_view_module_version": "1.2.0",
|
723 |
+
"_view_name": "LayoutView",
|
724 |
+
"align_content": null,
|
725 |
+
"align_items": null,
|
726 |
+
"align_self": null,
|
727 |
+
"border": null,
|
728 |
+
"bottom": null,
|
729 |
+
"display": null,
|
730 |
+
"flex": null,
|
731 |
+
"flex_flow": null,
|
732 |
+
"grid_area": null,
|
733 |
+
"grid_auto_columns": null,
|
734 |
+
"grid_auto_flow": null,
|
735 |
+
"grid_auto_rows": null,
|
736 |
+
"grid_column": null,
|
737 |
+
"grid_gap": null,
|
738 |
+
"grid_row": null,
|
739 |
+
"grid_template_areas": null,
|
740 |
+
"grid_template_columns": null,
|
741 |
+
"grid_template_rows": null,
|
742 |
+
"height": null,
|
743 |
+
"justify_content": null,
|
744 |
+
"justify_items": null,
|
745 |
+
"left": null,
|
746 |
+
"margin": null,
|
747 |
+
"max_height": null,
|
748 |
+
"max_width": null,
|
749 |
+
"min_height": null,
|
750 |
+
"min_width": null,
|
751 |
+
"object_fit": null,
|
752 |
+
"object_position": null,
|
753 |
+
"order": null,
|
754 |
+
"overflow": null,
|
755 |
+
"overflow_x": null,
|
756 |
+
"overflow_y": null,
|
757 |
+
"padding": null,
|
758 |
+
"right": null,
|
759 |
+
"top": null,
|
760 |
+
"visibility": null,
|
761 |
+
"width": null
|
762 |
+
}
|
763 |
+
},
|
764 |
+
"ed0fa93199b94fb486c125d4f322d59f": {
|
765 |
+
"model_module": "@jupyter-widgets/base",
|
766 |
+
"model_name": "LayoutModel",
|
767 |
+
"model_module_version": "1.2.0",
|
768 |
+
"state": {
|
769 |
+
"_model_module": "@jupyter-widgets/base",
|
770 |
+
"_model_module_version": "1.2.0",
|
771 |
+
"_model_name": "LayoutModel",
|
772 |
+
"_view_count": null,
|
773 |
+
"_view_module": "@jupyter-widgets/base",
|
774 |
+
"_view_module_version": "1.2.0",
|
775 |
+
"_view_name": "LayoutView",
|
776 |
+
"align_content": null,
|
777 |
+
"align_items": null,
|
778 |
+
"align_self": null,
|
779 |
+
"border": null,
|
780 |
+
"bottom": null,
|
781 |
+
"display": null,
|
782 |
+
"flex": null,
|
783 |
+
"flex_flow": null,
|
784 |
+
"grid_area": null,
|
785 |
+
"grid_auto_columns": null,
|
786 |
+
"grid_auto_flow": null,
|
787 |
+
"grid_auto_rows": null,
|
788 |
+
"grid_column": null,
|
789 |
+
"grid_gap": null,
|
790 |
+
"grid_row": null,
|
791 |
+
"grid_template_areas": null,
|
792 |
+
"grid_template_columns": null,
|
793 |
+
"grid_template_rows": null,
|
794 |
+
"height": null,
|
795 |
+
"justify_content": null,
|
796 |
+
"justify_items": null,
|
797 |
+
"left": null,
|
798 |
+
"margin": null,
|
799 |
+
"max_height": null,
|
800 |
+
"max_width": null,
|
801 |
+
"min_height": null,
|
802 |
+
"min_width": null,
|
803 |
+
"object_fit": null,
|
804 |
+
"object_position": null,
|
805 |
+
"order": null,
|
806 |
+
"overflow": null,
|
807 |
+
"overflow_x": null,
|
808 |
+
"overflow_y": null,
|
809 |
+
"padding": null,
|
810 |
+
"right": null,
|
811 |
+
"top": null,
|
812 |
+
"visibility": null,
|
813 |
+
"width": null
|
814 |
+
}
|
815 |
+
},
|
816 |
+
"66f82e7ef3694c699e3d4a2bd826392b": {
|
817 |
+
"model_module": "@jupyter-widgets/controls",
|
818 |
+
"model_name": "DescriptionStyleModel",
|
819 |
+
"model_module_version": "1.5.0",
|
820 |
+
"state": {
|
821 |
+
"_model_module": "@jupyter-widgets/controls",
|
822 |
+
"_model_module_version": "1.5.0",
|
823 |
+
"_model_name": "DescriptionStyleModel",
|
824 |
+
"_view_count": null,
|
825 |
+
"_view_module": "@jupyter-widgets/base",
|
826 |
+
"_view_module_version": "1.2.0",
|
827 |
+
"_view_name": "StyleView",
|
828 |
+
"description_width": ""
|
829 |
+
}
|
830 |
+
},
|
831 |
+
"2bfd51e3ae954008ae83704c24dbd6cb": {
|
832 |
+
"model_module": "@jupyter-widgets/base",
|
833 |
+
"model_name": "LayoutModel",
|
834 |
+
"model_module_version": "1.2.0",
|
835 |
+
"state": {
|
836 |
+
"_model_module": "@jupyter-widgets/base",
|
837 |
+
"_model_module_version": "1.2.0",
|
838 |
+
"_model_name": "LayoutModel",
|
839 |
+
"_view_count": null,
|
840 |
+
"_view_module": "@jupyter-widgets/base",
|
841 |
+
"_view_module_version": "1.2.0",
|
842 |
+
"_view_name": "LayoutView",
|
843 |
+
"align_content": null,
|
844 |
+
"align_items": null,
|
845 |
+
"align_self": null,
|
846 |
+
"border": null,
|
847 |
+
"bottom": null,
|
848 |
+
"display": null,
|
849 |
+
"flex": null,
|
850 |
+
"flex_flow": null,
|
851 |
+
"grid_area": null,
|
852 |
+
"grid_auto_columns": null,
|
853 |
+
"grid_auto_flow": null,
|
854 |
+
"grid_auto_rows": null,
|
855 |
+
"grid_column": null,
|
856 |
+
"grid_gap": null,
|
857 |
+
"grid_row": null,
|
858 |
+
"grid_template_areas": null,
|
859 |
+
"grid_template_columns": null,
|
860 |
+
"grid_template_rows": null,
|
861 |
+
"height": null,
|
862 |
+
"justify_content": null,
|
863 |
+
"justify_items": null,
|
864 |
+
"left": null,
|
865 |
+
"margin": null,
|
866 |
+
"max_height": null,
|
867 |
+
"max_width": null,
|
868 |
+
"min_height": null,
|
869 |
+
"min_width": null,
|
870 |
+
"object_fit": null,
|
871 |
+
"object_position": null,
|
872 |
+
"order": null,
|
873 |
+
"overflow": null,
|
874 |
+
"overflow_x": null,
|
875 |
+
"overflow_y": null,
|
876 |
+
"padding": null,
|
877 |
+
"right": null,
|
878 |
+
"top": null,
|
879 |
+
"visibility": null,
|
880 |
+
"width": null
|
881 |
+
}
|
882 |
+
},
|
883 |
+
"f8b84d8c06384680973ef6fe787b5a5d": {
|
884 |
+
"model_module": "@jupyter-widgets/controls",
|
885 |
+
"model_name": "ProgressStyleModel",
|
886 |
+
"model_module_version": "1.5.0",
|
887 |
+
"state": {
|
888 |
+
"_model_module": "@jupyter-widgets/controls",
|
889 |
+
"_model_module_version": "1.5.0",
|
890 |
+
"_model_name": "ProgressStyleModel",
|
891 |
+
"_view_count": null,
|
892 |
+
"_view_module": "@jupyter-widgets/base",
|
893 |
+
"_view_module_version": "1.2.0",
|
894 |
+
"_view_name": "StyleView",
|
895 |
+
"bar_color": null,
|
896 |
+
"description_width": ""
|
897 |
+
}
|
898 |
+
},
|
899 |
+
"770341dc116148a8b7571cce3a2f2baf": {
|
900 |
+
"model_module": "@jupyter-widgets/base",
|
901 |
+
"model_name": "LayoutModel",
|
902 |
+
"model_module_version": "1.2.0",
|
903 |
+
"state": {
|
904 |
+
"_model_module": "@jupyter-widgets/base",
|
905 |
+
"_model_module_version": "1.2.0",
|
906 |
+
"_model_name": "LayoutModel",
|
907 |
+
"_view_count": null,
|
908 |
+
"_view_module": "@jupyter-widgets/base",
|
909 |
+
"_view_module_version": "1.2.0",
|
910 |
+
"_view_name": "LayoutView",
|
911 |
+
"align_content": null,
|
912 |
+
"align_items": null,
|
913 |
+
"align_self": null,
|
914 |
+
"border": null,
|
915 |
+
"bottom": null,
|
916 |
+
"display": null,
|
917 |
+
"flex": null,
|
918 |
+
"flex_flow": null,
|
919 |
+
"grid_area": null,
|
920 |
+
"grid_auto_columns": null,
|
921 |
+
"grid_auto_flow": null,
|
922 |
+
"grid_auto_rows": null,
|
923 |
+
"grid_column": null,
|
924 |
+
"grid_gap": null,
|
925 |
+
"grid_row": null,
|
926 |
+
"grid_template_areas": null,
|
927 |
+
"grid_template_columns": null,
|
928 |
+
"grid_template_rows": null,
|
929 |
+
"height": null,
|
930 |
+
"justify_content": null,
|
931 |
+
"justify_items": null,
|
932 |
+
"left": null,
|
933 |
+
"margin": null,
|
934 |
+
"max_height": null,
|
935 |
+
"max_width": null,
|
936 |
+
"min_height": null,
|
937 |
+
"min_width": null,
|
938 |
+
"object_fit": null,
|
939 |
+
"object_position": null,
|
940 |
+
"order": null,
|
941 |
+
"overflow": null,
|
942 |
+
"overflow_x": null,
|
943 |
+
"overflow_y": null,
|
944 |
+
"padding": null,
|
945 |
+
"right": null,
|
946 |
+
"top": null,
|
947 |
+
"visibility": null,
|
948 |
+
"width": null
|
949 |
+
}
|
950 |
+
},
|
951 |
+
"29416122cc0b4a5592668ddced7686ba": {
|
952 |
+
"model_module": "@jupyter-widgets/controls",
|
953 |
+
"model_name": "DescriptionStyleModel",
|
954 |
+
"model_module_version": "1.5.0",
|
955 |
+
"state": {
|
956 |
+
"_model_module": "@jupyter-widgets/controls",
|
957 |
+
"_model_module_version": "1.5.0",
|
958 |
+
"_model_name": "DescriptionStyleModel",
|
959 |
+
"_view_count": null,
|
960 |
+
"_view_module": "@jupyter-widgets/base",
|
961 |
+
"_view_module_version": "1.2.0",
|
962 |
+
"_view_name": "StyleView",
|
963 |
+
"description_width": ""
|
964 |
+
}
|
965 |
+
}
|
966 |
+
}
|
967 |
+
}
|
968 |
+
},
|
969 |
+
"cells": [
|
970 |
+
{
|
971 |
+
"cell_type": "markdown",
|
972 |
+
"metadata": {
|
973 |
+
"id": "view-in-github",
|
974 |
+
"colab_type": "text"
|
975 |
+
},
|
976 |
+
"source": [
|
977 |
+
"<a href=\"https://colab.research.google.com/github/merveenoyan/smol-vision/blob/main/Gemma_3n_Video_Vibe_Tests.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
|
978 |
+
]
|
979 |
+
},
|
980 |
+
{
|
981 |
+
"cell_type": "markdown",
|
982 |
+
"source": [
|
983 |
+
"## Gemma 3n Video with Audio Inference"
|
984 |
+
],
|
985 |
+
"metadata": {
|
986 |
+
"id": "onFz3_7AqnaB"
|
987 |
+
}
|
988 |
+
},
|
989 |
+
{
|
990 |
+
"cell_type": "markdown",
|
991 |
+
"source": [
|
992 |
+
"In this notebook we'll infer Gemma-3n videos with audios inside."
|
993 |
+
],
|
994 |
+
"metadata": {
|
995 |
+
"id": "KKUnhy4JqqAg"
|
996 |
+
}
|
997 |
+
},
|
998 |
+
{
|
999 |
+
"cell_type": "code",
|
1000 |
+
"source": [
|
1001 |
+
"!pip install -U -q transformers timm datasets"
|
1002 |
+
],
|
1003 |
+
"metadata": {
|
1004 |
+
"id": "Vf-VvnrNjuxF"
|
1005 |
+
},
|
1006 |
+
"execution_count": null,
|
1007 |
+
"outputs": []
|
1008 |
+
},
|
1009 |
+
{
|
1010 |
+
"cell_type": "markdown",
|
1011 |
+
"source": [
|
1012 |
+
"We will load three examples from FineVideo dataset and Gemma-3n model so make sure you have access to both and provide access token."
|
1013 |
+
],
|
1014 |
+
"metadata": {
|
1015 |
+
"id": "gcJbxIPLqvjH"
|
1016 |
+
}
|
1017 |
+
},
|
1018 |
+
{
|
1019 |
+
"cell_type": "code",
|
1020 |
+
"source": [
|
1021 |
+
"from huggingface_hub import login\n",
|
1022 |
+
"login()"
|
1023 |
+
],
|
1024 |
+
"metadata": {
|
1025 |
+
"id": "bROdG2-Jj9lT",
|
1026 |
+
"colab": {
|
1027 |
+
"base_uri": "https://localhost:8080/",
|
1028 |
+
"height": 17,
|
1029 |
+
"referenced_widgets": [
|
1030 |
+
"542490f74e974451bc44009a6fa174bd",
|
1031 |
+
"409f985be1134b468b81136fbdb54408",
|
1032 |
+
"57cb1e931c614980a4147cb125524d7d",
|
1033 |
+
"87dc7aaf52e349a7bb43bb1b8bc137ee",
|
1034 |
+
"983ed4cb4eea42daa9ae8c0417021a21",
|
1035 |
+
"40c381fd7bb04b43a879044a4e988cc6",
|
1036 |
+
"8d0e5abdd7c549f1a66ee198c9fa1430",
|
1037 |
+
"c72dd3d6a4c246cfa6590c314783c8f0",
|
1038 |
+
"c0e471e664dd41eab98efe08301ef5e1",
|
1039 |
+
"868f63ea9455442d837dc2c422918800",
|
1040 |
+
"5b7b4707b1bf4159a10bf7e289bde435",
|
1041 |
+
"889d0d1ed24e4de2b89896511d008e60",
|
1042 |
+
"68fc757825dd44a48ab2383db20958db",
|
1043 |
+
"cb76f933e6e640d9a688f7838e5fb0b3",
|
1044 |
+
"8704264bff4d46c9813ac9acf92da962",
|
1045 |
+
"9b5d87960dde401baeaf8b6144fb8bad",
|
1046 |
+
"76e06881e5e94197a24944e07fdf3189",
|
1047 |
+
"f40dd696acc64c6284c6f8f485f3ce9d",
|
1048 |
+
"4488de26dce74cbbb39d99ae09bd21fa",
|
1049 |
+
"ded62e6c032745ec88ca0ab694b0d397"
|
1050 |
+
]
|
1051 |
+
},
|
1052 |
+
"outputId": "1978e9bd-3b52-40b8-e643-418f9872476d"
|
1053 |
+
},
|
1054 |
+
"execution_count": null,
|
1055 |
+
"outputs": [
|
1056 |
+
{
|
1057 |
+
"output_type": "display_data",
|
1058 |
+
"data": {
|
1059 |
+
"text/plain": [
|
1060 |
+
"VBox(children=(HTML(value='<center> <img\\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…"
|
1061 |
+
],
|
1062 |
+
"application/vnd.jupyter.widget-view+json": {
|
1063 |
+
"version_major": 2,
|
1064 |
+
"version_minor": 0,
|
1065 |
+
"model_id": "542490f74e974451bc44009a6fa174bd"
|
1066 |
+
}
|
1067 |
+
},
|
1068 |
+
"metadata": {}
|
1069 |
+
}
|
1070 |
+
]
|
1071 |
+
},
|
1072 |
+
{
|
1073 |
+
"cell_type": "code",
|
1074 |
+
"execution_count": null,
|
1075 |
+
"metadata": {
|
1076 |
+
"id": "TMiKyRtAjjAc",
|
1077 |
+
"colab": {
|
1078 |
+
"base_uri": "https://localhost:8080/",
|
1079 |
+
"height": 173,
|
1080 |
+
"referenced_widgets": [
|
1081 |
+
"be523e956910487ca263d943a7a58395",
|
1082 |
+
"01dc23faab3d42cda41fdfdd2a7dfed5",
|
1083 |
+
"777d7addfb144fd8896b77a1e0d54f25",
|
1084 |
+
"c518268069244b21810e84380502c190",
|
1085 |
+
"fee72c1c455549b59092028b855a082a",
|
1086 |
+
"ed0fa93199b94fb486c125d4f322d59f",
|
1087 |
+
"66f82e7ef3694c699e3d4a2bd826392b",
|
1088 |
+
"2bfd51e3ae954008ae83704c24dbd6cb",
|
1089 |
+
"f8b84d8c06384680973ef6fe787b5a5d",
|
1090 |
+
"770341dc116148a8b7571cce3a2f2baf",
|
1091 |
+
"29416122cc0b4a5592668ddced7686ba"
|
1092 |
+
]
|
1093 |
+
},
|
1094 |
+
"outputId": "7351e21a-3c82-4d0c-c827-24b66812f181"
|
1095 |
+
},
|
1096 |
+
"outputs": [
|
1097 |
+
{
|
1098 |
+
"output_type": "stream",
|
1099 |
+
"name": "stderr",
|
1100 |
+
"text": [
|
1101 |
+
"/usr/local/lib/python3.11/dist-packages/huggingface_hub/utils/_auth.py:94: UserWarning: \n",
|
1102 |
+
"The secret `HF_TOKEN` does not exist in your Colab secrets.\n",
|
1103 |
+
"To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.\n",
|
1104 |
+
"You will be able to reuse this secret in all of your notebooks.\n",
|
1105 |
+
"Please note that authentication is recommended but still optional to access public models or datasets.\n",
|
1106 |
+
" warnings.warn(\n"
|
1107 |
+
]
|
1108 |
+
},
|
1109 |
+
{
|
1110 |
+
"output_type": "display_data",
|
1111 |
+
"data": {
|
1112 |
+
"text/plain": [
|
1113 |
+
"Loading checkpoint shards: 0%| | 0/4 [00:00<?, ?it/s]"
|
1114 |
+
],
|
1115 |
+
"application/vnd.jupyter.widget-view+json": {
|
1116 |
+
"version_major": 2,
|
1117 |
+
"version_minor": 0,
|
1118 |
+
"model_id": "be523e956910487ca263d943a7a58395"
|
1119 |
+
}
|
1120 |
+
},
|
1121 |
+
"metadata": {}
|
1122 |
+
}
|
1123 |
+
],
|
1124 |
+
"source": [
|
1125 |
+
"from transformers import AutoProcessor, Gemma3nForConditionalGeneration\n",
|
1126 |
+
"import torch\n",
|
1127 |
+
"model = Gemma3nForConditionalGeneration.from_pretrained(\n",
|
1128 |
+
" \"google/gemma-3n-E4B-it\", torch_dtype=torch.bfloat16,\n",
|
1129 |
+
").to(\"cuda\")\n",
|
1130 |
+
"processor = AutoProcessor.from_pretrained(\n",
|
1131 |
+
" \"google/gemma-3n-E4B-it\",\n",
|
1132 |
+
")\n",
|
1133 |
+
"processor.tokenizer.padding_side = \"right\""
|
1134 |
+
]
|
1135 |
+
},
|
1136 |
+
{
|
1137 |
+
"cell_type": "markdown",
|
1138 |
+
"source": [
|
1139 |
+
"Download video for inference."
|
1140 |
+
],
|
1141 |
+
"metadata": {
|
1142 |
+
"id": "mQzrURJlNRwW"
|
1143 |
+
}
|
1144 |
+
},
|
1145 |
+
{
|
1146 |
+
"cell_type": "code",
|
1147 |
+
"source": [
|
1148 |
+
"!wget https://huggingface.co/datasets/merve/vlm_test_images/resolve/main/IMG_8137.mp4"
|
1149 |
+
],
|
1150 |
+
"metadata": {
|
1151 |
+
"colab": {
|
1152 |
+
"base_uri": "https://localhost:8080/"
|
1153 |
+
},
|
1154 |
+
"id": "PAQ1S2uDMIzj",
|
1155 |
+
"outputId": "c584ee8c-b960-4f82-f2c6-be194709256f"
|
1156 |
+
},
|
1157 |
+
"execution_count": null,
|
1158 |
+
"outputs": [
|
1159 |
+
{
|
1160 |
+
"output_type": "stream",
|
1161 |
+
"name": "stdout",
|
1162 |
+
"text": [
|
1163 |
+
"--2025-07-01 13:39:22-- https://huggingface.co/datasets/merve/vlm_test_images/resolve/main/IMG_8137.mp4\n",
|
1164 |
+
"Resolving huggingface.co (huggingface.co)... 18.172.134.4, 18.172.134.24, 18.172.134.124, ...\n",
|
1165 |
+
"Connecting to huggingface.co (huggingface.co)|18.172.134.4|:443... connected.\n",
|
1166 |
+
"HTTP request sent, awaiting response... 302 Found\n",
|
1167 |
+
"Location: https://cdn-lfs-us-1.hf.co/repos/7b/14/7b14679bb56cefbf7829be71f3f444110ccc308f431bd8596f534e743367ea5c/6331cbb913feb48349e3b7015a7969e04ce3cd594b1bda7278e4e33fe4a3f5f3?response-content-disposition=inline%3B+filename*%3DUTF-8%27%27IMG_8137.mp4%3B+filename%3D%22IMG_8137.mp4%22%3B&response-content-type=video%2Fmp4&Expires=1751380762&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTc1MTM4MDc2Mn19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy11cy0xLmhmLmNvL3JlcG9zLzdiLzE0LzdiMTQ2NzliYjU2Y2VmYmY3ODI5YmU3MWYzZjQ0NDExMGNjYzMwOGY0MzFiZDg1OTZmNTM0ZTc0MzM2N2VhNWMvNjMzMWNiYjkxM2ZlYjQ4MzQ5ZTNiNzAxNWE3OTY5ZTA0Y2UzY2Q1OTRiMWJkYTcyNzhlNGUzM2ZlNGEzZjVmMz9yZXNwb25zZS1jb250ZW50LWRpc3Bvc2l0aW9uPSomcmVzcG9uc2UtY29udGVudC10eXBlPSoifV19&Signature=MsPaMyO17sK%7Eo3U41ncCYEHd2vpjR6Jvv2IiqrhIy45kp-2WPdIGaYg5F7g9ENDJfFqmYavs6VH26AdLbX3HLPBUoR%7EAV8Iew8V1lFK1SpMkyCkh0SMtYNHqSw27jJ1ZSIhMKnHA7hRGi5b8LAhBiGzmlikz4a%7EtZAjjQZ18ZyN8GxCvTironzCp3uKUExWpRQF%7EwEwqurBb%7EKs-uJ6KDLvshYInzF%7Eo1LEoRNlXdxmDk8Q5Q7ZnBFM5m%7EPvBt-OQ4WWDPQZ86qblHwtoAgf483cdviYLPd8PjGzarQxgrjxbqELMvXM-nvUdXcOuAwhbBzpzSwBGQManPZxOFKTFw__&Key-Pair-Id=K24J24Z295AEI9 [following]\n",
|
1168 |
+
"--2025-07-01 13:39:22-- https://cdn-lfs-us-1.hf.co/repos/7b/14/7b14679bb56cefbf7829be71f3f444110ccc308f431bd8596f534e743367ea5c/6331cbb913feb48349e3b7015a7969e04ce3cd594b1bda7278e4e33fe4a3f5f3?response-content-disposition=inline%3B+filename*%3DUTF-8%27%27IMG_8137.mp4%3B+filename%3D%22IMG_8137.mp4%22%3B&response-content-type=video%2Fmp4&Expires=1751380762&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTc1MTM4MDc2Mn19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy11cy0xLmhmLmNvL3JlcG9zLzdiLzE0LzdiMTQ2NzliYjU2Y2VmYmY3ODI5YmU3MWYzZjQ0NDExMGNjYzMwOGY0MzFiZDg1OTZmNTM0ZTc0MzM2N2VhNWMvNjMzMWNiYjkxM2ZlYjQ4MzQ5ZTNiNzAxNWE3OTY5ZTA0Y2UzY2Q1OTRiMWJkYTcyNzhlNGUzM2ZlNGEzZjVmMz9yZXNwb25zZS1jb250ZW50LWRpc3Bvc2l0aW9uPSomcmVzcG9uc2UtY29udGVudC10eXBlPSoifV19&Signature=MsPaMyO17sK%7Eo3U41ncCYEHd2vpjR6Jvv2IiqrhIy45kp-2WPdIGaYg5F7g9ENDJfFqmYavs6VH26AdLbX3HLPBUoR%7EAV8Iew8V1lFK1SpMkyCkh0SMtYNHqSw27jJ1ZSIhMKnHA7hRGi5b8LAhBiGzmlikz4a%7EtZAjjQZ18ZyN8GxCvTironzCp3uKUExWpRQF%7EwEwqurBb%7EKs-uJ6KDLvshYInzF%7Eo1LEoRNlXdxmDk8Q5Q7ZnBFM5m%7EPvBt-OQ4WWDPQZ86qblHwtoAgf483cdviYLPd8PjGzarQxgrjxbqELMvXM-nvUdXcOuAwhbBzpzSwBGQManPZxOFKTFw__&Key-Pair-Id=K24J24Z295AEI9\n",
|
1169 |
+
"Resolving cdn-lfs-us-1.hf.co (cdn-lfs-us-1.hf.co)... 3.167.138.114, 3.167.138.90, 3.167.138.39, ...\n",
|
1170 |
+
"Connecting to cdn-lfs-us-1.hf.co (cdn-lfs-us-1.hf.co)|3.167.138.114|:443... connected.\n",
|
1171 |
+
"HTTP request sent, awaiting response... 200 OK\n",
|
1172 |
+
"Length: 5340706 (5.1M) [video/mp4]\n",
|
1173 |
+
"Saving to: ‘IMG_8137.mp4’\n",
|
1174 |
+
"\n",
|
1175 |
+
"IMG_8137.mp4 100%[===================>] 5.09M 27.1MB/s in 0.2s \n",
|
1176 |
+
"\n",
|
1177 |
+
"2025-07-01 13:39:22 (27.1 MB/s) - ‘IMG_8137.mp4’ saved [5340706/5340706]\n",
|
1178 |
+
"\n"
|
1179 |
+
]
|
1180 |
+
}
|
1181 |
+
]
|
1182 |
+
},
|
1183 |
+
{
|
1184 |
+
"cell_type": "markdown",
|
1185 |
+
"source": [
|
1186 |
+
"Strip audios from video."
|
1187 |
+
],
|
1188 |
+
"metadata": {
|
1189 |
+
"id": "KXlBj7dVtUFZ"
|
1190 |
+
}
|
1191 |
+
},
|
1192 |
+
{
|
1193 |
+
"cell_type": "code",
|
1194 |
+
"source": [
|
1195 |
+
"import os\n",
|
1196 |
+
"import subprocess\n",
|
1197 |
+
"filename = \"IMG_8137.mp4\"\n",
|
1198 |
+
"audio_path = os.path.join(\"audios\", f\"audio.wav\")\n",
|
1199 |
+
"\n",
|
1200 |
+
"subprocess.run([\n",
|
1201 |
+
" \"ffmpeg\", \"-i\", filename,\n",
|
1202 |
+
" \"-q:a\", \"0\", \"-map\", \"a\",\n",
|
1203 |
+
" audio_path,\n",
|
1204 |
+
" \"-y\"\n",
|
1205 |
+
"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)"
|
1206 |
+
],
|
1207 |
+
"metadata": {
|
1208 |
+
"colab": {
|
1209 |
+
"base_uri": "https://localhost:8080/"
|
1210 |
+
},
|
1211 |
+
"id": "FQhKimtlMOHe",
|
1212 |
+
"outputId": "ef05231a-ce56-4733-b0be-d6b423a143ae"
|
1213 |
+
},
|
1214 |
+
"execution_count": null,
|
1215 |
+
"outputs": [
|
1216 |
+
{
|
1217 |
+
"output_type": "execute_result",
|
1218 |
+
"data": {
|
1219 |
+
"text/plain": [
|
1220 |
+
"CompletedProcess(args=['ffmpeg', '-i', 'IMG_8137.mp4', '-q:a', '0', '-map', 'a', 'audios/audio.wav', '-y'], returncode=0)"
|
1221 |
+
]
|
1222 |
+
},
|
1223 |
+
"metadata": {},
|
1224 |
+
"execution_count": 57
|
1225 |
+
}
|
1226 |
+
]
|
1227 |
+
},
|
1228 |
+
{
|
1229 |
+
"cell_type": "code",
|
1230 |
+
"source": [
|
1231 |
+
"import cv2\n",
|
1232 |
+
"from PIL import Image\n",
|
1233 |
+
"import numpy as np\n",
|
1234 |
+
"\n",
|
1235 |
+
"def downsample_video(video_path):\n",
|
1236 |
+
" vidcap = cv2.VideoCapture(video_path)\n",
|
1237 |
+
" total_frames = int(vidcap.get(cv2.CAP_PROP_FRAME_COUNT))\n",
|
1238 |
+
" fps = vidcap.get(cv2.CAP_PROP_FPS)\n",
|
1239 |
+
"\n",
|
1240 |
+
" frames = []\n",
|
1241 |
+
" frame_indices = np.linspace(0, total_frames - 1, 7, dtype=int)\n",
|
1242 |
+
"\n",
|
1243 |
+
" for i in frame_indices:\n",
|
1244 |
+
" vidcap.set(cv2.CAP_PROP_POS_FRAMES, i)\n",
|
1245 |
+
" success, image = vidcap.read()\n",
|
1246 |
+
" if success:\n",
|
1247 |
+
" image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # Convert from BGR to RGB\n",
|
1248 |
+
" pil_image = Image.fromarray(image)\n",
|
1249 |
+
" timestamp = round(i / fps, 2)\n",
|
1250 |
+
" frames.append((pil_image, timestamp))\n",
|
1251 |
+
"\n",
|
1252 |
+
" vidcap.release()\n",
|
1253 |
+
" return frames\n"
|
1254 |
+
],
|
1255 |
+
"metadata": {
|
1256 |
+
"id": "6e_cExwMjx7v"
|
1257 |
+
},
|
1258 |
+
"execution_count": null,
|
1259 |
+
"outputs": []
|
1260 |
+
},
|
1261 |
+
{
|
1262 |
+
"cell_type": "markdown",
|
1263 |
+
"source": [
|
1264 |
+
"We will generate descriptions to videos and compare them to irl description in the metadata for the vibecheck.\n",
|
1265 |
+
"\n",
|
1266 |
+
"We need to downsample video to frames."
|
1267 |
+
],
|
1268 |
+
"metadata": {
|
1269 |
+
"id": "mRKCPRabuMs6"
|
1270 |
+
}
|
1271 |
+
},
|
1272 |
+
{
|
1273 |
+
"cell_type": "code",
|
1274 |
+
"source": [
|
1275 |
+
"frames = downsample_video(filename)"
|
1276 |
+
],
|
1277 |
+
"metadata": {
|
1278 |
+
"id": "UMJESbFulYTi"
|
1279 |
+
},
|
1280 |
+
"execution_count": null,
|
1281 |
+
"outputs": []
|
1282 |
+
},
|
1283 |
+
{
|
1284 |
+
"cell_type": "code",
|
1285 |
+
"source": [
|
1286 |
+
"frames"
|
1287 |
+
],
|
1288 |
+
"metadata": {
|
1289 |
+
"colab": {
|
1290 |
+
"base_uri": "https://localhost:8080/"
|
1291 |
+
},
|
1292 |
+
"id": "wJKdYXasMfEG",
|
1293 |
+
"outputId": "2cff578c-df4d-41ca-8d9e-f85b4fed3456"
|
1294 |
+
},
|
1295 |
+
"execution_count": null,
|
1296 |
+
"outputs": [
|
1297 |
+
{
|
1298 |
+
"output_type": "execute_result",
|
1299 |
+
"data": {
|
1300 |
+
"text/plain": [
|
1301 |
+
"[(<PIL.Image.Image image mode=RGB size=1080x1920>, np.float64(0.0)),\n",
|
1302 |
+
" (<PIL.Image.Image image mode=RGB size=1080x1920>, np.float64(1.03)),\n",
|
1303 |
+
" (<PIL.Image.Image image mode=RGB size=1080x1920>, np.float64(2.09)),\n",
|
1304 |
+
" (<PIL.Image.Image image mode=RGB size=1080x1920>, np.float64(3.12)),\n",
|
1305 |
+
" (<PIL.Image.Image image mode=RGB size=1080x1920>, np.float64(4.17)),\n",
|
1306 |
+
" (<PIL.Image.Image image mode=RGB size=1080x1920>, np.float64(5.21)),\n",
|
1307 |
+
" (<PIL.Image.Image image mode=RGB size=1080x1920>, np.float64(6.26))]"
|
1308 |
+
]
|
1309 |
+
},
|
1310 |
+
"metadata": {},
|
1311 |
+
"execution_count": 52
|
1312 |
+
}
|
1313 |
+
]
|
1314 |
+
},
|
1315 |
+
{
|
1316 |
+
"cell_type": "code",
|
1317 |
+
"source": [
|
1318 |
+
"messages = [\n",
|
1319 |
+
" {\n",
|
1320 |
+
" \"role\": \"system\",\n",
|
1321 |
+
" \"content\": [{\"type\": \"text\", \"text\": \"You are a helpful assistant.\"}]\n",
|
1322 |
+
" },\n",
|
1323 |
+
" {\n",
|
1324 |
+
" \"role\": \"user\",\n",
|
1325 |
+
" \"content\": [\n",
|
1326 |
+
" {\"type\": \"text\", \"text\": f\"What is happening in this video? Summarize the events.\"}]\n",
|
1327 |
+
" }\n",
|
1328 |
+
"]\n",
|
1329 |
+
"for frame in frames:\n",
|
1330 |
+
" image, timestamp = frame\n",
|
1331 |
+
" messages[1][\"content\"].append({\"type\": \"text\", \"text\": f\"Frame {timestamp}: \"})\n",
|
1332 |
+
" image.save(f\"image_{timestamp}.png\")\n",
|
1333 |
+
" messages[1][\"content\"].append({\"type\": \"image\", \"url\": f\"./image_{timestamp}.png\"})\n",
|
1334 |
+
"messages[1][\"content\"].append({\"type\": \"audio\", \"audio\": f\"audios/audio.wav\"})"
|
1335 |
+
],
|
1336 |
+
"metadata": {
|
1337 |
+
"id": "u8itVHCflZYQ"
|
1338 |
+
},
|
1339 |
+
"execution_count": null,
|
1340 |
+
"outputs": []
|
1341 |
+
},
|
1342 |
+
{
|
1343 |
+
"cell_type": "code",
|
1344 |
+
"source": [
|
1345 |
+
"messages"
|
1346 |
+
],
|
1347 |
+
"metadata": {
|
1348 |
+
"id": "dBX4mNxXxGoC",
|
1349 |
+
"colab": {
|
1350 |
+
"base_uri": "https://localhost:8080/"
|
1351 |
+
},
|
1352 |
+
"outputId": "b738e828-bf9b-4f13-bbb2-9f38bea50b6a"
|
1353 |
+
},
|
1354 |
+
"execution_count": null,
|
1355 |
+
"outputs": [
|
1356 |
+
{
|
1357 |
+
"output_type": "execute_result",
|
1358 |
+
"data": {
|
1359 |
+
"text/plain": [
|
1360 |
+
"[{'role': 'system',\n",
|
1361 |
+
" 'content': [{'type': 'text', 'text': 'You are a helpful assistant.'}]},\n",
|
1362 |
+
" {'role': 'user',\n",
|
1363 |
+
" 'content': [{'type': 'text',\n",
|
1364 |
+
" 'text': 'What is happening in this video? Summarize the events.'},\n",
|
1365 |
+
" {'type': 'text', 'text': 'Frame 0.0: '},\n",
|
1366 |
+
" {'type': 'image', 'url': './image_0.0.png'},\n",
|
1367 |
+
" {'type': 'text', 'text': 'Frame 1.03: '},\n",
|
1368 |
+
" {'type': 'image', 'url': './image_1.03.png'},\n",
|
1369 |
+
" {'type': 'text', 'text': 'Frame 2.09: '},\n",
|
1370 |
+
" {'type': 'image', 'url': './image_2.09.png'},\n",
|
1371 |
+
" {'type': 'text', 'text': 'Frame 3.12: '},\n",
|
1372 |
+
" {'type': 'image', 'url': './image_3.12.png'},\n",
|
1373 |
+
" {'type': 'text', 'text': 'Frame 4.17: '},\n",
|
1374 |
+
" {'type': 'image', 'url': './image_4.17.png'},\n",
|
1375 |
+
" {'type': 'text', 'text': 'Frame 5.21: '},\n",
|
1376 |
+
" {'type': 'image', 'url': './image_5.21.png'},\n",
|
1377 |
+
" {'type': 'text', 'text': 'Frame 6.26: '},\n",
|
1378 |
+
" {'type': 'image', 'url': './image_6.26.png'},\n",
|
1379 |
+
" {'type': 'audio', 'audio': 'audios/audio.wav'}]}]"
|
1380 |
+
]
|
1381 |
+
},
|
1382 |
+
"metadata": {},
|
1383 |
+
"execution_count": 59
|
1384 |
+
}
|
1385 |
+
]
|
1386 |
+
},
|
1387 |
+
{
|
1388 |
+
"cell_type": "code",
|
1389 |
+
"source": [
|
1390 |
+
"#processor.tokenizer.padding_side = \"right\"\n",
|
1391 |
+
"inputs = processor.apply_chat_template(\n",
|
1392 |
+
" messages, add_generation_prompt=True, tokenize=True,\n",
|
1393 |
+
" return_dict=True, return_tensors=\"pt\"\n",
|
1394 |
+
").to(model.device).to(model.dtype)"
|
1395 |
+
],
|
1396 |
+
"metadata": {
|
1397 |
+
"id": "e4f0qr67lcjo"
|
1398 |
+
},
|
1399 |
+
"execution_count": null,
|
1400 |
+
"outputs": []
|
1401 |
+
},
|
1402 |
+
{
|
1403 |
+
"cell_type": "code",
|
1404 |
+
"source": [
|
1405 |
+
"inputs[\"input_ids\"].shape[-1]"
|
1406 |
+
],
|
1407 |
+
"metadata": {
|
1408 |
+
"colab": {
|
1409 |
+
"base_uri": "https://localhost:8080/"
|
1410 |
+
},
|
1411 |
+
"id": "EOiBpgkI9kXi",
|
1412 |
+
"outputId": "911a6013-f76f-4fed-c402-8039d67b1e05"
|
1413 |
+
},
|
1414 |
+
"execution_count": null,
|
1415 |
+
"outputs": [
|
1416 |
+
{
|
1417 |
+
"output_type": "execute_result",
|
1418 |
+
"data": {
|
1419 |
+
"text/plain": [
|
1420 |
+
"2087"
|
1421 |
+
]
|
1422 |
+
},
|
1423 |
+
"metadata": {},
|
1424 |
+
"execution_count": 61
|
1425 |
+
}
|
1426 |
+
]
|
1427 |
+
},
|
1428 |
+
{
|
1429 |
+
"cell_type": "code",
|
1430 |
+
"source": [
|
1431 |
+
"with torch.inference_mode():\n",
|
1432 |
+
" generation = model.generate(**inputs, max_new_tokens=200, do_sample=False)"
|
1433 |
+
],
|
1434 |
+
"metadata": {
|
1435 |
+
"id": "yJ95UXBqvXPM",
|
1436 |
+
"colab": {
|
1437 |
+
"base_uri": "https://localhost:8080/"
|
1438 |
+
},
|
1439 |
+
"outputId": "721839dc-aa78-401b-e802-b858690980da"
|
1440 |
+
},
|
1441 |
+
"execution_count": null,
|
1442 |
+
"outputs": [
|
1443 |
+
{
|
1444 |
+
"output_type": "stream",
|
1445 |
+
"name": "stderr",
|
1446 |
+
"text": [
|
1447 |
+
"The following generation flags are not valid and may be ignored: ['top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.\n"
|
1448 |
+
]
|
1449 |
+
}
|
1450 |
+
]
|
1451 |
+
},
|
1452 |
+
{
|
1453 |
+
"cell_type": "code",
|
1454 |
+
"source": [
|
1455 |
+
"input_len = inputs[\"input_ids\"].shape[-1]\n",
|
1456 |
+
"\n",
|
1457 |
+
"generation = generation[0][input_len:]\n",
|
1458 |
+
"\n",
|
1459 |
+
"decoded = processor.decode(generation, skip_special_tokens=True)\n",
|
1460 |
+
"print(decoded)"
|
1461 |
+
],
|
1462 |
+
"metadata": {
|
1463 |
+
"colab": {
|
1464 |
+
"base_uri": "https://localhost:8080/"
|
1465 |
+
},
|
1466 |
+
"id": "3ifVZy9c74St",
|
1467 |
+
"outputId": "f8ab51c6-e5a3-4a16-875b-d07404041396"
|
1468 |
+
},
|
1469 |
+
"execution_count": null,
|
1470 |
+
"outputs": [
|
1471 |
+
{
|
1472 |
+
"output_type": "stream",
|
1473 |
+
"name": "stdout",
|
1474 |
+
"text": [
|
1475 |
+
"Here's a summary of what's happening in the video:\n",
|
1476 |
+
"\n",
|
1477 |
+
"The video appears to be taken at a ski resort. The main subject is a person snowboarding down a snowy slope. \n",
|
1478 |
+
"\n",
|
1479 |
+
"**Initial Scene (0.0 - 1.03):** The snowboarder is initially positioned on the slope, seemingly having fallen or stopped. Other skiers and snowboarders are visible in the background, waiting at what looks like a lift station.\n",
|
1480 |
+
"\n",
|
1481 |
+
"**Mid-Video (1.03 - 6.26):** The snowboarder gets back up and continues down the slope. They navigate past other people, including skiers and snowboarders, and eventually reach a lift station. The video shows the snowboarder interacting with others at the lift, possibly waiting for the lift to start or having just gotten off. There are also other skiers and snowboarders around the lift station.\n",
|
1482 |
+
"\n",
|
1483 |
+
"**End Scene (6.26):** The snowboarder is still at the lift station,\n"
|
1484 |
+
]
|
1485 |
+
}
|
1486 |
+
]
|
1487 |
+
}
|
1488 |
+
]
|
1489 |
+
}
|
Idefics_FT.ipynb
ADDED
@@ -0,0 +1,1866 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "markdown",
|
5 |
+
"metadata": {
|
6 |
+
"id": "nc0g2NLpUSGr"
|
7 |
+
},
|
8 |
+
"source": [
|
9 |
+
"# Fine-tune IDEFICS3 on Visual Question Answering\n",
|
10 |
+
"\n",
|
11 |
+
"In this notebook we will fine-tune IDEFICS3 on VQAv2 dataset.\n",
|
12 |
+
"\n",
|
13 |
+
"The transformers PR isn't merged yet so we will install the branch that contains the transformers implementation"
|
14 |
+
]
|
15 |
+
},
|
16 |
+
{
|
17 |
+
"cell_type": "code",
|
18 |
+
"execution_count": null,
|
19 |
+
"metadata": {
|
20 |
+
"colab": {
|
21 |
+
"base_uri": "https://localhost:8080/"
|
22 |
+
},
|
23 |
+
"id": "qttWxowEhlRt",
|
24 |
+
"outputId": "ca8d1fd2-ed88-4aef-f8a4-8f60df269b70"
|
25 |
+
},
|
26 |
+
"outputs": [],
|
27 |
+
"source": [
|
28 |
+
"!git clone https://github.com/andimarafioti/transformers.git"
|
29 |
+
]
|
30 |
+
},
|
31 |
+
{
|
32 |
+
"cell_type": "code",
|
33 |
+
"execution_count": 2,
|
34 |
+
"metadata": {
|
35 |
+
"colab": {
|
36 |
+
"base_uri": "https://localhost:8080/"
|
37 |
+
},
|
38 |
+
"id": "qttWxowEhlRt",
|
39 |
+
"outputId": "ca8d1fd2-ed88-4aef-f8a4-8f60df269b70"
|
40 |
+
},
|
41 |
+
"outputs": [
|
42 |
+
{
|
43 |
+
"name": "stdout",
|
44 |
+
"output_type": "stream",
|
45 |
+
"text": [
|
46 |
+
"/home/merve/transformers\n"
|
47 |
+
]
|
48 |
+
},
|
49 |
+
{
|
50 |
+
"name": "stderr",
|
51 |
+
"output_type": "stream",
|
52 |
+
"text": [
|
53 |
+
"/home/merve/anaconda3/envs/py311_env/lib/python3.11/site-packages/IPython/core/magics/osm.py:417: UserWarning: using dhist requires you to install the `pickleshare` library.\n",
|
54 |
+
" self.shell.db['dhist'] = compress_dhist(dhist)[-100:]\n"
|
55 |
+
]
|
56 |
+
}
|
57 |
+
],
|
58 |
+
"source": [
|
59 |
+
"%cd transformers"
|
60 |
+
]
|
61 |
+
},
|
62 |
+
{
|
63 |
+
"cell_type": "code",
|
64 |
+
"execution_count": 3,
|
65 |
+
"metadata": {
|
66 |
+
"colab": {
|
67 |
+
"base_uri": "https://localhost:8080/"
|
68 |
+
},
|
69 |
+
"id": "qttWxowEhlRt",
|
70 |
+
"outputId": "ca8d1fd2-ed88-4aef-f8a4-8f60df269b70"
|
71 |
+
},
|
72 |
+
"outputs": [
|
73 |
+
{
|
74 |
+
"name": "stdout",
|
75 |
+
"output_type": "stream",
|
76 |
+
"text": [
|
77 |
+
"Previous HEAD position was a72b30fe0 hot fix for merve\n",
|
78 |
+
"Switched to branch 'idefics3'\n",
|
79 |
+
"Your branch is up to date with 'origin/idefics3'.\n"
|
80 |
+
]
|
81 |
+
}
|
82 |
+
],
|
83 |
+
"source": [
|
84 |
+
"!git checkout idefics3"
|
85 |
+
]
|
86 |
+
},
|
87 |
+
{
|
88 |
+
"cell_type": "code",
|
89 |
+
"execution_count": 10,
|
90 |
+
"metadata": {
|
91 |
+
"colab": {
|
92 |
+
"base_uri": "https://localhost:8080/"
|
93 |
+
},
|
94 |
+
"id": "qttWxowEhlRt",
|
95 |
+
"outputId": "ca8d1fd2-ed88-4aef-f8a4-8f60df269b70"
|
96 |
+
},
|
97 |
+
"outputs": [
|
98 |
+
{
|
99 |
+
"name": "stdout",
|
100 |
+
"output_type": "stream",
|
101 |
+
"text": [
|
102 |
+
"Note: switching to 'a72b30fe06bba77d9df4c72fcea48bbdc0d812a5'.\n",
|
103 |
+
"\n",
|
104 |
+
"You are in 'detached HEAD' state. You can look around, make experimental\n",
|
105 |
+
"changes and commit them, and you can discard any commits you make in this\n",
|
106 |
+
"state without impacting any branches by switching back to a branch.\n",
|
107 |
+
"\n",
|
108 |
+
"If you want to create a new branch to retain commits you create, you may\n",
|
109 |
+
"do so (now or later) by using -c with the switch command. Example:\n",
|
110 |
+
"\n",
|
111 |
+
" git switch -c <new-branch-name>\n",
|
112 |
+
"\n",
|
113 |
+
"Or undo this operation with:\n",
|
114 |
+
"\n",
|
115 |
+
" git switch -\n",
|
116 |
+
"\n",
|
117 |
+
"Turn off this advice by setting config variable advice.detachedHead to false\n",
|
118 |
+
"\n",
|
119 |
+
"HEAD is now at a72b30fe0 hot fix for merve\n"
|
120 |
+
]
|
121 |
+
}
|
122 |
+
],
|
123 |
+
"source": [
|
124 |
+
"!git checkout a72b30fe06bba77d9df4c72fcea48bbdc0d812a5"
|
125 |
+
]
|
126 |
+
},
|
127 |
+
{
|
128 |
+
"cell_type": "code",
|
129 |
+
"execution_count": null,
|
130 |
+
"metadata": {
|
131 |
+
"colab": {
|
132 |
+
"base_uri": "https://localhost:8080/"
|
133 |
+
},
|
134 |
+
"id": "qttWxowEhlRt",
|
135 |
+
"outputId": "ca8d1fd2-ed88-4aef-f8a4-8f60df269b70"
|
136 |
+
},
|
137 |
+
"outputs": [],
|
138 |
+
"source": [
|
139 |
+
"!pip install -q \".\""
|
140 |
+
]
|
141 |
+
},
|
142 |
+
{
|
143 |
+
"cell_type": "code",
|
144 |
+
"execution_count": 12,
|
145 |
+
"metadata": {
|
146 |
+
"colab": {
|
147 |
+
"base_uri": "https://localhost:8080/"
|
148 |
+
},
|
149 |
+
"id": "WIhA1lQ7j0kw",
|
150 |
+
"outputId": "75d422a4-e258-455d-9b48-9fba36c060c3"
|
151 |
+
},
|
152 |
+
"outputs": [],
|
153 |
+
"source": [
|
154 |
+
"!pip install -q accelerate datasets peft bitsandbytes"
|
155 |
+
]
|
156 |
+
},
|
157 |
+
{
|
158 |
+
"cell_type": "code",
|
159 |
+
"execution_count": 13,
|
160 |
+
"metadata": {
|
161 |
+
"colab": {
|
162 |
+
"base_uri": "https://localhost:8080/"
|
163 |
+
},
|
164 |
+
"id": "WIhA1lQ7j0kw",
|
165 |
+
"outputId": "75d422a4-e258-455d-9b48-9fba36c060c3"
|
166 |
+
},
|
167 |
+
"outputs": [],
|
168 |
+
"source": [
|
169 |
+
"!pip install -q flash-attn --no-build-isolation"
|
170 |
+
]
|
171 |
+
},
|
172 |
+
{
|
173 |
+
"cell_type": "markdown",
|
174 |
+
"metadata": {
|
175 |
+
"id": "wAeMA0heVBjT"
|
176 |
+
},
|
177 |
+
"source": [
|
178 |
+
"We will push out model to Hub so we need to authenticate ourselves."
|
179 |
+
]
|
180 |
+
},
|
181 |
+
{
|
182 |
+
"cell_type": "code",
|
183 |
+
"execution_count": null,
|
184 |
+
"metadata": {
|
185 |
+
"colab": {
|
186 |
+
"base_uri": "https://localhost:8080/",
|
187 |
+
"height": 145,
|
188 |
+
"referenced_widgets": [
|
189 |
+
"02f82c31a81c4f60a53644ac17e35ffd",
|
190 |
+
"2a08bc280647423188a7da9a87693167",
|
191 |
+
"0d0713e8a8624ac8bf79830c9553ff32",
|
192 |
+
"e01c3514a6904c79b5646688a515ca10",
|
193 |
+
"02adc6bce181453d9d18aea4fb1110be",
|
194 |
+
"224ed6bcd8c04e6fab9ef6c145630e39",
|
195 |
+
"e1063035ef1e42768dd984653d992137",
|
196 |
+
"bc34088935944cc8b02b2386239a3639",
|
197 |
+
"0d62633c4df246abb3d72f8c87d9cfb9",
|
198 |
+
"5bbba35b612247ea946b8844807dbb42",
|
199 |
+
"45242b70a62b4ebfbe4aac00c904bcc8",
|
200 |
+
"893b6342058945448a3861eb2c1c3a41",
|
201 |
+
"f8af58e353b94164b34d2ce064252dc2",
|
202 |
+
"45da78de95c4464d9eb60709ff94cc1a",
|
203 |
+
"4573bad8837142d0b1f063d568a771c6",
|
204 |
+
"cabd977f2993428d91fc75df5a15328e",
|
205 |
+
"bf91e9029f394c35874b4d35d61dd2c8",
|
206 |
+
"010cc98b3522423d86f89140fd7e1222",
|
207 |
+
"9d5a7a4379ce4e3493e7e050bfb173dc",
|
208 |
+
"4988f3cbc5164c499598c83a5b3a665b",
|
209 |
+
"b6ca0bfe87874730907ef1a4c500863a",
|
210 |
+
"8110e462f10b413e8dc59171fb84a13a",
|
211 |
+
"526fda6c78374906b7c1b93e5f973b25",
|
212 |
+
"d00821b88efa4256b29d52fe816a7c89",
|
213 |
+
"142c966c31fe4e5f99031da317e2ff54",
|
214 |
+
"d67b40ceeba8412f91ac885cb816eb01",
|
215 |
+
"a43226fe11eb4ec28c9619d7ee3a4618",
|
216 |
+
"e7878cd9245b4e56b172e40008df453b",
|
217 |
+
"5d09e1657d3e405f98d1a948c5c0c022",
|
218 |
+
"3429b2b924484bd2a45dfb6f186db6bc",
|
219 |
+
"0424a259c1d34333951b757c3c705b6f",
|
220 |
+
"89b4a59adc4942b290a9f3158b89423f"
|
221 |
+
]
|
222 |
+
},
|
223 |
+
"id": "yKd5xtSGj7cm",
|
224 |
+
"outputId": "ca0d369a-8e70-46a6-bb77-50ed3509ab39"
|
225 |
+
},
|
226 |
+
"outputs": [],
|
227 |
+
"source": [
|
228 |
+
"from huggingface_hub import notebook_login\n",
|
229 |
+
"\n",
|
230 |
+
"notebook_login()"
|
231 |
+
]
|
232 |
+
},
|
233 |
+
{
|
234 |
+
"cell_type": "markdown",
|
235 |
+
"metadata": {
|
236 |
+
"id": "WRq8ve-LVAzU"
|
237 |
+
},
|
238 |
+
"source": [
|
239 |
+
"In this notebook we will not do full fine-tuning but use QLoRA method, which loads an adapter to the quantized version of the model, saving space. If you want to do full fine-tuning, set `USE_LORA` and `USE_QLORA` to False. If you want to do LoRA, set `USE_QLORA` to False and `USE_LORA` to True."
|
240 |
+
]
|
241 |
+
},
|
242 |
+
{
|
243 |
+
"cell_type": "code",
|
244 |
+
"execution_count": 1,
|
245 |
+
"metadata": {},
|
246 |
+
"outputs": [],
|
247 |
+
"source": [
|
248 |
+
"import os\n",
|
249 |
+
"os.environ[\"CUDA_DEVICE_ORDER\"] = \"PCI_BUS_ID\"\n",
|
250 |
+
"os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"4\" # you don't need this unless you work on a multigpu setup and need to use a specific index\n",
|
251 |
+
"# if you want to use multiple GPUs, use e.g. \"2,4\""
|
252 |
+
]
|
253 |
+
},
|
254 |
+
{
|
255 |
+
"cell_type": "code",
|
256 |
+
"execution_count": 8,
|
257 |
+
"metadata": {},
|
258 |
+
"outputs": [],
|
259 |
+
"source": [
|
260 |
+
"for param in model.model.vision_model.parameters():\n",
|
261 |
+
" param.requires_grad = False "
|
262 |
+
]
|
263 |
+
},
|
264 |
+
{
|
265 |
+
"cell_type": "markdown",
|
266 |
+
"metadata": {
|
267 |
+
"id": "QtjggkcTVnSV"
|
268 |
+
},
|
269 |
+
"source": [
|
270 |
+
"We will load VQAv2 dataset. For educational purposes we will load the validation split and split it twice."
|
271 |
+
]
|
272 |
+
},
|
273 |
+
{
|
274 |
+
"cell_type": "code",
|
275 |
+
"execution_count": 9,
|
276 |
+
"metadata": {
|
277 |
+
"colab": {
|
278 |
+
"base_uri": "https://localhost:8080/"
|
279 |
+
},
|
280 |
+
"id": "POOqKqYRka5O",
|
281 |
+
"outputId": "87977922-2c3a-4c96-fffb-7c097b0815fa"
|
282 |
+
},
|
283 |
+
"outputs": [],
|
284 |
+
"source": [
|
285 |
+
"from datasets import load_dataset\n",
|
286 |
+
"ds = load_dataset('merve/vqav2-small', trust_remote_code=True)"
|
287 |
+
]
|
288 |
+
},
|
289 |
+
{
|
290 |
+
"cell_type": "code",
|
291 |
+
"execution_count": 10,
|
292 |
+
"metadata": {
|
293 |
+
"id": "Znf9vMo5rnSd"
|
294 |
+
},
|
295 |
+
"outputs": [],
|
296 |
+
"source": [
|
297 |
+
"split_ds = ds[\"validation\"].train_test_split(test_size=0.8)\n",
|
298 |
+
"train_ds = split_ds[\"train\"]"
|
299 |
+
]
|
300 |
+
},
|
301 |
+
{
|
302 |
+
"cell_type": "code",
|
303 |
+
"execution_count": 11,
|
304 |
+
"metadata": {},
|
305 |
+
"outputs": [
|
306 |
+
{
|
307 |
+
"data": {
|
308 |
+
"text/plain": [
|
309 |
+
"Dataset({\n",
|
310 |
+
" features: ['multiple_choice_answer', 'question', 'image'],\n",
|
311 |
+
" num_rows: 4287\n",
|
312 |
+
"})"
|
313 |
+
]
|
314 |
+
},
|
315 |
+
"execution_count": 11,
|
316 |
+
"metadata": {},
|
317 |
+
"output_type": "execute_result"
|
318 |
+
}
|
319 |
+
],
|
320 |
+
"source": [
|
321 |
+
"train_ds"
|
322 |
+
]
|
323 |
+
},
|
324 |
+
{
|
325 |
+
"cell_type": "code",
|
326 |
+
"execution_count": null,
|
327 |
+
"metadata": {},
|
328 |
+
"outputs": [],
|
329 |
+
"source": [
|
330 |
+
"import torch\n",
|
331 |
+
"from peft import LoraConfig, prepare_model_for_kbit_training, get_peft_model\n",
|
332 |
+
"from transformers import AutoProcessor, BitsAndBytesConfig, Idefics3ForConditionalGeneration\n",
|
333 |
+
"\n",
|
334 |
+
"USE_LORA = False\n",
|
335 |
+
"USE_QLORA = False\n",
|
336 |
+
"model_id = \"HuggingFaceM4/Idefics3-8B-Llama3\"\n",
|
337 |
+
"\n",
|
338 |
+
"processor = AutoProcessor.from_pretrained(\n",
|
339 |
+
" model_id\n",
|
340 |
+
")\n",
|
341 |
+
"\n",
|
342 |
+
"if USE_QLORA or USE_LORA:\n",
|
343 |
+
" lora_config = LoraConfig(\n",
|
344 |
+
" r=8,\n",
|
345 |
+
" lora_alpha=8,\n",
|
346 |
+
" lora_dropout=0.1,\n",
|
347 |
+
" target_modules=['down_proj','o_proj','k_proj','q_proj','gate_proj','up_proj','v_proj'],\n",
|
348 |
+
" use_dora=False if USE_QLORA else True,\n",
|
349 |
+
" init_lora_weights=\"gaussian\"\n",
|
350 |
+
" )\n",
|
351 |
+
" lora_config.inference_mode = False\n",
|
352 |
+
" if USE_QLORA:\n",
|
353 |
+
" bnb_config = BitsAndBytesConfig(\n",
|
354 |
+
" load_in_4bit=True,\n",
|
355 |
+
" bnb_4bit_use_double_quant=True,\n",
|
356 |
+
" bnb_4bit_quant_type=\"nf4\",\n",
|
357 |
+
" bnb_4bit_compute_dtype=torch.bfloat16\n",
|
358 |
+
" )\n",
|
359 |
+
" \n",
|
360 |
+
" model = Idefics3ForConditionalGeneration.from_pretrained(\n",
|
361 |
+
" model_id,\n",
|
362 |
+
" quantization_config=bnb_config if USE_QLORA else None,\n",
|
363 |
+
" _attn_implementation=\"flash_attention_2\",\n",
|
364 |
+
" device_map=\"auto\"\n",
|
365 |
+
" )\n",
|
366 |
+
" model.add_adapter(lora_config)\n",
|
367 |
+
" model.enable_adapters()\n",
|
368 |
+
" model = prepare_model_for_kbit_training(model)\n",
|
369 |
+
" model = get_peft_model(model, lora_config)\n",
|
370 |
+
" print(model.get_nb_trainable_parameters())\n",
|
371 |
+
"else:\n",
|
372 |
+
" model = Idefics3ForConditionalGeneration.from_pretrained(\n",
|
373 |
+
" model_id,\n",
|
374 |
+
" torch_dtype=torch.bfloat16,\n",
|
375 |
+
" _attn_implementation=\"flash_attention_2\",\n",
|
376 |
+
" ).to(DEVICE)\n",
|
377 |
+
" \n",
|
378 |
+
" # if you'd like to only fine-tune LLM\n",
|
379 |
+
" for param in model.model.vision_model.parameters():\n",
|
380 |
+
" param.requires_grad = False"
|
381 |
+
]
|
382 |
+
},
|
383 |
+
{
|
384 |
+
"cell_type": "markdown",
|
385 |
+
"metadata": {
|
386 |
+
"id": "5nwMO3n0X7Hv"
|
387 |
+
},
|
388 |
+
"source": [
|
389 |
+
"Let's write our data collating function. We will apply prompt template to have questions and answers together so model can learn to answer. Then we pass the formatted prompts and images to the processor which processes both."
|
390 |
+
]
|
391 |
+
},
|
392 |
+
{
|
393 |
+
"cell_type": "code",
|
394 |
+
"execution_count": 12,
|
395 |
+
"metadata": {
|
396 |
+
"id": "e0krVLZ-wNMl"
|
397 |
+
},
|
398 |
+
"outputs": [],
|
399 |
+
"source": [
|
400 |
+
"image_token_id = processor.tokenizer.additional_special_tokens_ids[\n",
|
401 |
+
" processor.tokenizer.additional_special_tokens.index(\"<image>\")]\n",
|
402 |
+
"\n",
|
403 |
+
"def collate_fn(examples):\n",
|
404 |
+
" texts = []\n",
|
405 |
+
" images = []\n",
|
406 |
+
" for example in examples:\n",
|
407 |
+
" image = example[\"image\"]\n",
|
408 |
+
" question = example[\"question\"]\n",
|
409 |
+
" answer = example[\"multiple_choice_answer\"]\n",
|
410 |
+
" messages = [\n",
|
411 |
+
" {\n",
|
412 |
+
" \"role\": \"user\",\n",
|
413 |
+
" \"content\": [\n",
|
414 |
+
" {\"type\": \"text\", \"text\": \"Answer briefly.\"},\n",
|
415 |
+
" {\"type\": \"image\"},\n",
|
416 |
+
" {\"type\": \"text\", \"text\": question}\n",
|
417 |
+
" ]\n",
|
418 |
+
" },\n",
|
419 |
+
" {\n",
|
420 |
+
" \"role\": \"assistant\",\n",
|
421 |
+
" \"content\": [\n",
|
422 |
+
" {\"type\": \"text\", \"text\": answer}\n",
|
423 |
+
" ]\n",
|
424 |
+
" }\n",
|
425 |
+
" ]\n",
|
426 |
+
" text = processor.apply_chat_template(messages, add_generation_prompt=False)\n",
|
427 |
+
" texts.append(text.strip())\n",
|
428 |
+
" images.append([image])\n",
|
429 |
+
"\n",
|
430 |
+
" batch = processor(text=texts, images=images, return_tensors=\"pt\", padding=True)\n",
|
431 |
+
" labels = batch[\"input_ids\"].clone()\n",
|
432 |
+
" labels[labels == processor.tokenizer.pad_token_id] = -100\n",
|
433 |
+
" labels[labels == image_token_id] = -100 \n",
|
434 |
+
" batch[\"labels\"] = labels\n",
|
435 |
+
"\n",
|
436 |
+
" return batch\n"
|
437 |
+
]
|
438 |
+
},
|
439 |
+
{
|
440 |
+
"cell_type": "markdown",
|
441 |
+
"metadata": {
|
442 |
+
"id": "QvAs896cdwg8"
|
443 |
+
},
|
444 |
+
"source": [
|
445 |
+
"We can now initialize `Trainer` and initialize `TrainingArguments` to pass to `Trainer`."
|
446 |
+
]
|
447 |
+
},
|
448 |
+
{
|
449 |
+
"cell_type": "code",
|
450 |
+
"execution_count": 14,
|
451 |
+
"metadata": {
|
452 |
+
"colab": {
|
453 |
+
"base_uri": "https://localhost:8080/"
|
454 |
+
},
|
455 |
+
"id": "QNE2yWAYrAhD",
|
456 |
+
"outputId": "2bdefa08-a54b-40e0-cae8-f029ff8312e7"
|
457 |
+
},
|
458 |
+
"outputs": [],
|
459 |
+
"source": [
|
460 |
+
"from transformers import TrainingArguments, Trainer\n",
|
461 |
+
"\n",
|
462 |
+
"training_args = TrainingArguments(\n",
|
463 |
+
" num_train_epochs=1,\n",
|
464 |
+
" per_device_train_batch_size=2,\n",
|
465 |
+
" gradient_accumulation_steps=8,\n",
|
466 |
+
" warmup_steps=50,\n",
|
467 |
+
" learning_rate=1e-4,\n",
|
468 |
+
" weight_decay=0.01,\n",
|
469 |
+
" logging_steps=25,\n",
|
470 |
+
" save_strategy=\"steps\",\n",
|
471 |
+
" save_steps=250,\n",
|
472 |
+
" save_total_limit=1,\n",
|
473 |
+
" optim=\"adamw_hf\", # for 8-bit, pick paged_adamw_hf\n",
|
474 |
+
" #evaluation_strategy=\"epoch\",\n",
|
475 |
+
" bf16=True,\n",
|
476 |
+
" output_dir=\"./idefics3-llama-vqav2\",\n",
|
477 |
+
" hub_model_id=\"idefics3-llama-vqav2\",\n",
|
478 |
+
" remove_unused_columns=False,\n",
|
479 |
+
")\n"
|
480 |
+
]
|
481 |
+
},
|
482 |
+
{
|
483 |
+
"cell_type": "code",
|
484 |
+
"execution_count": 15,
|
485 |
+
"metadata": {
|
486 |
+
"id": "oBBSDpBhreJd"
|
487 |
+
},
|
488 |
+
"outputs": [
|
489 |
+
{
|
490 |
+
"name": "stderr",
|
491 |
+
"output_type": "stream",
|
492 |
+
"text": [
|
493 |
+
"Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n"
|
494 |
+
]
|
495 |
+
}
|
496 |
+
],
|
497 |
+
"source": [
|
498 |
+
"trainer = Trainer(\n",
|
499 |
+
" model=model,\n",
|
500 |
+
" args=training_args,\n",
|
501 |
+
" data_collator=collate_fn,\n",
|
502 |
+
" train_dataset=train_ds,\n",
|
503 |
+
" #eval_dataset=test_ds,\n",
|
504 |
+
")"
|
505 |
+
]
|
506 |
+
},
|
507 |
+
{
|
508 |
+
"cell_type": "markdown",
|
509 |
+
"metadata": {},
|
510 |
+
"source": [
|
511 |
+
"I'm running standalone scripts on top of tmux so the logs will not appear here. I will upload my training script to this repository."
|
512 |
+
]
|
513 |
+
},
|
514 |
+
{
|
515 |
+
"cell_type": "code",
|
516 |
+
"execution_count": null,
|
517 |
+
"metadata": {},
|
518 |
+
"outputs": [],
|
519 |
+
"source": [
|
520 |
+
"trainer.train()"
|
521 |
+
]
|
522 |
+
},
|
523 |
+
{
|
524 |
+
"cell_type": "code",
|
525 |
+
"execution_count": null,
|
526 |
+
"metadata": {},
|
527 |
+
"outputs": [],
|
528 |
+
"source": [
|
529 |
+
"trainer.push_to_hub()"
|
530 |
+
]
|
531 |
+
}
|
532 |
+
],
|
533 |
+
"metadata": {
|
534 |
+
"accelerator": "GPU",
|
535 |
+
"colab": {
|
536 |
+
"gpuType": "A100",
|
537 |
+
"machine_shape": "hm",
|
538 |
+
"provenance": []
|
539 |
+
},
|
540 |
+
"kernelspec": {
|
541 |
+
"display_name": "py311_env",
|
542 |
+
"language": "python",
|
543 |
+
"name": "py311_env"
|
544 |
+
},
|
545 |
+
"language_info": {
|
546 |
+
"codemirror_mode": {
|
547 |
+
"name": "ipython",
|
548 |
+
"version": 3
|
549 |
+
},
|
550 |
+
"file_extension": ".py",
|
551 |
+
"mimetype": "text/x-python",
|
552 |
+
"name": "python",
|
553 |
+
"nbconvert_exporter": "python",
|
554 |
+
"pygments_lexer": "ipython3",
|
555 |
+
"version": "3.11.9"
|
556 |
+
},
|
557 |
+
"widgets": {
|
558 |
+
"application/vnd.jupyter.widget-state+json": {
|
559 |
+
"010cc98b3522423d86f89140fd7e1222": {
|
560 |
+
"model_module": "@jupyter-widgets/controls",
|
561 |
+
"model_module_version": "1.5.0",
|
562 |
+
"model_name": "LabelModel",
|
563 |
+
"state": {
|
564 |
+
"_dom_classes": [],
|
565 |
+
"_model_module": "@jupyter-widgets/controls",
|
566 |
+
"_model_module_version": "1.5.0",
|
567 |
+
"_model_name": "LabelModel",
|
568 |
+
"_view_count": null,
|
569 |
+
"_view_module": "@jupyter-widgets/controls",
|
570 |
+
"_view_module_version": "1.5.0",
|
571 |
+
"_view_name": "LabelView",
|
572 |
+
"description": "",
|
573 |
+
"description_tooltip": null,
|
574 |
+
"layout": "IPY_MODEL_9d5a7a4379ce4e3493e7e050bfb173dc",
|
575 |
+
"placeholder": "",
|
576 |
+
"style": "IPY_MODEL_4988f3cbc5164c499598c83a5b3a665b",
|
577 |
+
"value": "Connecting..."
|
578 |
+
}
|
579 |
+
},
|
580 |
+
"02adc6bce181453d9d18aea4fb1110be": {
|
581 |
+
"model_module": "@jupyter-widgets/controls",
|
582 |
+
"model_module_version": "1.5.0",
|
583 |
+
"model_name": "ButtonModel",
|
584 |
+
"state": {
|
585 |
+
"_dom_classes": [],
|
586 |
+
"_model_module": "@jupyter-widgets/controls",
|
587 |
+
"_model_module_version": "1.5.0",
|
588 |
+
"_model_name": "ButtonModel",
|
589 |
+
"_view_count": null,
|
590 |
+
"_view_module": "@jupyter-widgets/controls",
|
591 |
+
"_view_module_version": "1.5.0",
|
592 |
+
"_view_name": "ButtonView",
|
593 |
+
"button_style": "",
|
594 |
+
"description": "Login",
|
595 |
+
"disabled": false,
|
596 |
+
"icon": "",
|
597 |
+
"layout": "IPY_MODEL_45da78de95c4464d9eb60709ff94cc1a",
|
598 |
+
"style": "IPY_MODEL_4573bad8837142d0b1f063d568a771c6",
|
599 |
+
"tooltip": ""
|
600 |
+
}
|
601 |
+
},
|
602 |
+
"02f82c31a81c4f60a53644ac17e35ffd": {
|
603 |
+
"model_module": "@jupyter-widgets/controls",
|
604 |
+
"model_module_version": "1.5.0",
|
605 |
+
"model_name": "VBoxModel",
|
606 |
+
"state": {
|
607 |
+
"_dom_classes": [],
|
608 |
+
"_model_module": "@jupyter-widgets/controls",
|
609 |
+
"_model_module_version": "1.5.0",
|
610 |
+
"_model_name": "VBoxModel",
|
611 |
+
"_view_count": null,
|
612 |
+
"_view_module": "@jupyter-widgets/controls",
|
613 |
+
"_view_module_version": "1.5.0",
|
614 |
+
"_view_name": "VBoxView",
|
615 |
+
"box_style": "",
|
616 |
+
"children": [
|
617 |
+
"IPY_MODEL_b6ca0bfe87874730907ef1a4c500863a",
|
618 |
+
"IPY_MODEL_8110e462f10b413e8dc59171fb84a13a",
|
619 |
+
"IPY_MODEL_526fda6c78374906b7c1b93e5f973b25",
|
620 |
+
"IPY_MODEL_d00821b88efa4256b29d52fe816a7c89"
|
621 |
+
],
|
622 |
+
"layout": "IPY_MODEL_e1063035ef1e42768dd984653d992137"
|
623 |
+
}
|
624 |
+
},
|
625 |
+
"0424a259c1d34333951b757c3c705b6f": {
|
626 |
+
"model_module": "@jupyter-widgets/base",
|
627 |
+
"model_module_version": "1.2.0",
|
628 |
+
"model_name": "LayoutModel",
|
629 |
+
"state": {
|
630 |
+
"_model_module": "@jupyter-widgets/base",
|
631 |
+
"_model_module_version": "1.2.0",
|
632 |
+
"_model_name": "LayoutModel",
|
633 |
+
"_view_count": null,
|
634 |
+
"_view_module": "@jupyter-widgets/base",
|
635 |
+
"_view_module_version": "1.2.0",
|
636 |
+
"_view_name": "LayoutView",
|
637 |
+
"align_content": null,
|
638 |
+
"align_items": null,
|
639 |
+
"align_self": null,
|
640 |
+
"border": null,
|
641 |
+
"bottom": null,
|
642 |
+
"display": null,
|
643 |
+
"flex": null,
|
644 |
+
"flex_flow": null,
|
645 |
+
"grid_area": null,
|
646 |
+
"grid_auto_columns": null,
|
647 |
+
"grid_auto_flow": null,
|
648 |
+
"grid_auto_rows": null,
|
649 |
+
"grid_column": null,
|
650 |
+
"grid_gap": null,
|
651 |
+
"grid_row": null,
|
652 |
+
"grid_template_areas": null,
|
653 |
+
"grid_template_columns": null,
|
654 |
+
"grid_template_rows": null,
|
655 |
+
"height": null,
|
656 |
+
"justify_content": null,
|
657 |
+
"justify_items": null,
|
658 |
+
"left": null,
|
659 |
+
"margin": null,
|
660 |
+
"max_height": null,
|
661 |
+
"max_width": null,
|
662 |
+
"min_height": null,
|
663 |
+
"min_width": null,
|
664 |
+
"object_fit": null,
|
665 |
+
"object_position": null,
|
666 |
+
"order": null,
|
667 |
+
"overflow": null,
|
668 |
+
"overflow_x": null,
|
669 |
+
"overflow_y": null,
|
670 |
+
"padding": null,
|
671 |
+
"right": null,
|
672 |
+
"top": null,
|
673 |
+
"visibility": null,
|
674 |
+
"width": null
|
675 |
+
}
|
676 |
+
},
|
677 |
+
"0d0713e8a8624ac8bf79830c9553ff32": {
|
678 |
+
"model_module": "@jupyter-widgets/controls",
|
679 |
+
"model_module_version": "1.5.0",
|
680 |
+
"model_name": "PasswordModel",
|
681 |
+
"state": {
|
682 |
+
"_dom_classes": [],
|
683 |
+
"_model_module": "@jupyter-widgets/controls",
|
684 |
+
"_model_module_version": "1.5.0",
|
685 |
+
"_model_name": "PasswordModel",
|
686 |
+
"_view_count": null,
|
687 |
+
"_view_module": "@jupyter-widgets/controls",
|
688 |
+
"_view_module_version": "1.5.0",
|
689 |
+
"_view_name": "PasswordView",
|
690 |
+
"continuous_update": true,
|
691 |
+
"description": "Token:",
|
692 |
+
"description_tooltip": null,
|
693 |
+
"disabled": false,
|
694 |
+
"layout": "IPY_MODEL_5bbba35b612247ea946b8844807dbb42",
|
695 |
+
"placeholder": "",
|
696 |
+
"style": "IPY_MODEL_45242b70a62b4ebfbe4aac00c904bcc8",
|
697 |
+
"value": ""
|
698 |
+
}
|
699 |
+
},
|
700 |
+
"0d62633c4df246abb3d72f8c87d9cfb9": {
|
701 |
+
"model_module": "@jupyter-widgets/controls",
|
702 |
+
"model_module_version": "1.5.0",
|
703 |
+
"model_name": "DescriptionStyleModel",
|
704 |
+
"state": {
|
705 |
+
"_model_module": "@jupyter-widgets/controls",
|
706 |
+
"_model_module_version": "1.5.0",
|
707 |
+
"_model_name": "DescriptionStyleModel",
|
708 |
+
"_view_count": null,
|
709 |
+
"_view_module": "@jupyter-widgets/base",
|
710 |
+
"_view_module_version": "1.2.0",
|
711 |
+
"_view_name": "StyleView",
|
712 |
+
"description_width": ""
|
713 |
+
}
|
714 |
+
},
|
715 |
+
"142c966c31fe4e5f99031da317e2ff54": {
|
716 |
+
"model_module": "@jupyter-widgets/base",
|
717 |
+
"model_module_version": "1.2.0",
|
718 |
+
"model_name": "LayoutModel",
|
719 |
+
"state": {
|
720 |
+
"_model_module": "@jupyter-widgets/base",
|
721 |
+
"_model_module_version": "1.2.0",
|
722 |
+
"_model_name": "LayoutModel",
|
723 |
+
"_view_count": null,
|
724 |
+
"_view_module": "@jupyter-widgets/base",
|
725 |
+
"_view_module_version": "1.2.0",
|
726 |
+
"_view_name": "LayoutView",
|
727 |
+
"align_content": null,
|
728 |
+
"align_items": null,
|
729 |
+
"align_self": null,
|
730 |
+
"border": null,
|
731 |
+
"bottom": null,
|
732 |
+
"display": null,
|
733 |
+
"flex": null,
|
734 |
+
"flex_flow": null,
|
735 |
+
"grid_area": null,
|
736 |
+
"grid_auto_columns": null,
|
737 |
+
"grid_auto_flow": null,
|
738 |
+
"grid_auto_rows": null,
|
739 |
+
"grid_column": null,
|
740 |
+
"grid_gap": null,
|
741 |
+
"grid_row": null,
|
742 |
+
"grid_template_areas": null,
|
743 |
+
"grid_template_columns": null,
|
744 |
+
"grid_template_rows": null,
|
745 |
+
"height": null,
|
746 |
+
"justify_content": null,
|
747 |
+
"justify_items": null,
|
748 |
+
"left": null,
|
749 |
+
"margin": null,
|
750 |
+
"max_height": null,
|
751 |
+
"max_width": null,
|
752 |
+
"min_height": null,
|
753 |
+
"min_width": null,
|
754 |
+
"object_fit": null,
|
755 |
+
"object_position": null,
|
756 |
+
"order": null,
|
757 |
+
"overflow": null,
|
758 |
+
"overflow_x": null,
|
759 |
+
"overflow_y": null,
|
760 |
+
"padding": null,
|
761 |
+
"right": null,
|
762 |
+
"top": null,
|
763 |
+
"visibility": null,
|
764 |
+
"width": null
|
765 |
+
}
|
766 |
+
},
|
767 |
+
"224ed6bcd8c04e6fab9ef6c145630e39": {
|
768 |
+
"model_module": "@jupyter-widgets/controls",
|
769 |
+
"model_module_version": "1.5.0",
|
770 |
+
"model_name": "HTMLModel",
|
771 |
+
"state": {
|
772 |
+
"_dom_classes": [],
|
773 |
+
"_model_module": "@jupyter-widgets/controls",
|
774 |
+
"_model_module_version": "1.5.0",
|
775 |
+
"_model_name": "HTMLModel",
|
776 |
+
"_view_count": null,
|
777 |
+
"_view_module": "@jupyter-widgets/controls",
|
778 |
+
"_view_module_version": "1.5.0",
|
779 |
+
"_view_name": "HTMLView",
|
780 |
+
"description": "",
|
781 |
+
"description_tooltip": null,
|
782 |
+
"layout": "IPY_MODEL_cabd977f2993428d91fc75df5a15328e",
|
783 |
+
"placeholder": "",
|
784 |
+
"style": "IPY_MODEL_bf91e9029f394c35874b4d35d61dd2c8",
|
785 |
+
"value": "\n<b>Pro Tip:</b> If you don't already have one, you can create a dedicated\n'notebooks' token with 'write' access, that you can then easily reuse for all\nnotebooks. </center>"
|
786 |
+
}
|
787 |
+
},
|
788 |
+
"2641dbfe060e466bbe10038004edefc0": {
|
789 |
+
"model_module": "@jupyter-widgets/base",
|
790 |
+
"model_module_version": "1.2.0",
|
791 |
+
"model_name": "LayoutModel",
|
792 |
+
"state": {
|
793 |
+
"_model_module": "@jupyter-widgets/base",
|
794 |
+
"_model_module_version": "1.2.0",
|
795 |
+
"_model_name": "LayoutModel",
|
796 |
+
"_view_count": null,
|
797 |
+
"_view_module": "@jupyter-widgets/base",
|
798 |
+
"_view_module_version": "1.2.0",
|
799 |
+
"_view_name": "LayoutView",
|
800 |
+
"align_content": null,
|
801 |
+
"align_items": null,
|
802 |
+
"align_self": null,
|
803 |
+
"border": null,
|
804 |
+
"bottom": null,
|
805 |
+
"display": null,
|
806 |
+
"flex": null,
|
807 |
+
"flex_flow": null,
|
808 |
+
"grid_area": null,
|
809 |
+
"grid_auto_columns": null,
|
810 |
+
"grid_auto_flow": null,
|
811 |
+
"grid_auto_rows": null,
|
812 |
+
"grid_column": null,
|
813 |
+
"grid_gap": null,
|
814 |
+
"grid_row": null,
|
815 |
+
"grid_template_areas": null,
|
816 |
+
"grid_template_columns": null,
|
817 |
+
"grid_template_rows": null,
|
818 |
+
"height": null,
|
819 |
+
"justify_content": null,
|
820 |
+
"justify_items": null,
|
821 |
+
"left": null,
|
822 |
+
"margin": null,
|
823 |
+
"max_height": null,
|
824 |
+
"max_width": null,
|
825 |
+
"min_height": null,
|
826 |
+
"min_width": null,
|
827 |
+
"object_fit": null,
|
828 |
+
"object_position": null,
|
829 |
+
"order": null,
|
830 |
+
"overflow": null,
|
831 |
+
"overflow_x": null,
|
832 |
+
"overflow_y": null,
|
833 |
+
"padding": null,
|
834 |
+
"right": null,
|
835 |
+
"top": null,
|
836 |
+
"visibility": null,
|
837 |
+
"width": null
|
838 |
+
}
|
839 |
+
},
|
840 |
+
"2a08bc280647423188a7da9a87693167": {
|
841 |
+
"model_module": "@jupyter-widgets/controls",
|
842 |
+
"model_module_version": "1.5.0",
|
843 |
+
"model_name": "HTMLModel",
|
844 |
+
"state": {
|
845 |
+
"_dom_classes": [],
|
846 |
+
"_model_module": "@jupyter-widgets/controls",
|
847 |
+
"_model_module_version": "1.5.0",
|
848 |
+
"_model_name": "HTMLModel",
|
849 |
+
"_view_count": null,
|
850 |
+
"_view_module": "@jupyter-widgets/controls",
|
851 |
+
"_view_module_version": "1.5.0",
|
852 |
+
"_view_name": "HTMLView",
|
853 |
+
"description": "",
|
854 |
+
"description_tooltip": null,
|
855 |
+
"layout": "IPY_MODEL_bc34088935944cc8b02b2386239a3639",
|
856 |
+
"placeholder": "",
|
857 |
+
"style": "IPY_MODEL_0d62633c4df246abb3d72f8c87d9cfb9",
|
858 |
+
"value": "<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.svg\nalt='Hugging Face'> <br> Copy a token from <a\nhref=\"https://huggingface.co/settings/tokens\" target=\"_blank\">your Hugging Face\ntokens page</a> and paste it below. <br> Immediately click login after copying\nyour token or it might be stored in plain text in this notebook file. </center>"
|
859 |
+
}
|
860 |
+
},
|
861 |
+
"3429b2b924484bd2a45dfb6f186db6bc": {
|
862 |
+
"model_module": "@jupyter-widgets/controls",
|
863 |
+
"model_module_version": "1.5.0",
|
864 |
+
"model_name": "DescriptionStyleModel",
|
865 |
+
"state": {
|
866 |
+
"_model_module": "@jupyter-widgets/controls",
|
867 |
+
"_model_module_version": "1.5.0",
|
868 |
+
"_model_name": "DescriptionStyleModel",
|
869 |
+
"_view_count": null,
|
870 |
+
"_view_module": "@jupyter-widgets/base",
|
871 |
+
"_view_module_version": "1.2.0",
|
872 |
+
"_view_name": "StyleView",
|
873 |
+
"description_width": ""
|
874 |
+
}
|
875 |
+
},
|
876 |
+
"377ff50b710f496ab9acb4554df58df2": {
|
877 |
+
"model_module": "@jupyter-widgets/controls",
|
878 |
+
"model_module_version": "1.5.0",
|
879 |
+
"model_name": "HTMLModel",
|
880 |
+
"state": {
|
881 |
+
"_dom_classes": [],
|
882 |
+
"_model_module": "@jupyter-widgets/controls",
|
883 |
+
"_model_module_version": "1.5.0",
|
884 |
+
"_model_name": "HTMLModel",
|
885 |
+
"_view_count": null,
|
886 |
+
"_view_module": "@jupyter-widgets/controls",
|
887 |
+
"_view_module_version": "1.5.0",
|
888 |
+
"_view_name": "HTMLView",
|
889 |
+
"description": "",
|
890 |
+
"description_tooltip": null,
|
891 |
+
"layout": "IPY_MODEL_b7b9ed6e0aa14e29bf27dbaf4e248f03",
|
892 |
+
"placeholder": "",
|
893 |
+
"style": "IPY_MODEL_c7d2883b47e741b4a0af037342ce88b3",
|
894 |
+
"value": "Loading checkpoint shards: 100%"
|
895 |
+
}
|
896 |
+
},
|
897 |
+
"41a1b28a5785496a967027f51bf74aca": {
|
898 |
+
"model_module": "@jupyter-widgets/controls",
|
899 |
+
"model_module_version": "1.5.0",
|
900 |
+
"model_name": "FloatProgressModel",
|
901 |
+
"state": {
|
902 |
+
"_dom_classes": [],
|
903 |
+
"_model_module": "@jupyter-widgets/controls",
|
904 |
+
"_model_module_version": "1.5.0",
|
905 |
+
"_model_name": "FloatProgressModel",
|
906 |
+
"_view_count": null,
|
907 |
+
"_view_module": "@jupyter-widgets/controls",
|
908 |
+
"_view_module_version": "1.5.0",
|
909 |
+
"_view_name": "ProgressView",
|
910 |
+
"bar_style": "success",
|
911 |
+
"description": "",
|
912 |
+
"description_tooltip": null,
|
913 |
+
"layout": "IPY_MODEL_9240afbf2b7346be8738c4017c4d91f1",
|
914 |
+
"max": 4,
|
915 |
+
"min": 0,
|
916 |
+
"orientation": "horizontal",
|
917 |
+
"style": "IPY_MODEL_e0c376506a5a4757a9d51a5f181f3fb6",
|
918 |
+
"value": 4
|
919 |
+
}
|
920 |
+
},
|
921 |
+
"45242b70a62b4ebfbe4aac00c904bcc8": {
|
922 |
+
"model_module": "@jupyter-widgets/controls",
|
923 |
+
"model_module_version": "1.5.0",
|
924 |
+
"model_name": "DescriptionStyleModel",
|
925 |
+
"state": {
|
926 |
+
"_model_module": "@jupyter-widgets/controls",
|
927 |
+
"_model_module_version": "1.5.0",
|
928 |
+
"_model_name": "DescriptionStyleModel",
|
929 |
+
"_view_count": null,
|
930 |
+
"_view_module": "@jupyter-widgets/base",
|
931 |
+
"_view_module_version": "1.2.0",
|
932 |
+
"_view_name": "StyleView",
|
933 |
+
"description_width": ""
|
934 |
+
}
|
935 |
+
},
|
936 |
+
"4573bad8837142d0b1f063d568a771c6": {
|
937 |
+
"model_module": "@jupyter-widgets/controls",
|
938 |
+
"model_module_version": "1.5.0",
|
939 |
+
"model_name": "ButtonStyleModel",
|
940 |
+
"state": {
|
941 |
+
"_model_module": "@jupyter-widgets/controls",
|
942 |
+
"_model_module_version": "1.5.0",
|
943 |
+
"_model_name": "ButtonStyleModel",
|
944 |
+
"_view_count": null,
|
945 |
+
"_view_module": "@jupyter-widgets/base",
|
946 |
+
"_view_module_version": "1.2.0",
|
947 |
+
"_view_name": "StyleView",
|
948 |
+
"button_color": null,
|
949 |
+
"font_weight": ""
|
950 |
+
}
|
951 |
+
},
|
952 |
+
"45da78de95c4464d9eb60709ff94cc1a": {
|
953 |
+
"model_module": "@jupyter-widgets/base",
|
954 |
+
"model_module_version": "1.2.0",
|
955 |
+
"model_name": "LayoutModel",
|
956 |
+
"state": {
|
957 |
+
"_model_module": "@jupyter-widgets/base",
|
958 |
+
"_model_module_version": "1.2.0",
|
959 |
+
"_model_name": "LayoutModel",
|
960 |
+
"_view_count": null,
|
961 |
+
"_view_module": "@jupyter-widgets/base",
|
962 |
+
"_view_module_version": "1.2.0",
|
963 |
+
"_view_name": "LayoutView",
|
964 |
+
"align_content": null,
|
965 |
+
"align_items": null,
|
966 |
+
"align_self": null,
|
967 |
+
"border": null,
|
968 |
+
"bottom": null,
|
969 |
+
"display": null,
|
970 |
+
"flex": null,
|
971 |
+
"flex_flow": null,
|
972 |
+
"grid_area": null,
|
973 |
+
"grid_auto_columns": null,
|
974 |
+
"grid_auto_flow": null,
|
975 |
+
"grid_auto_rows": null,
|
976 |
+
"grid_column": null,
|
977 |
+
"grid_gap": null,
|
978 |
+
"grid_row": null,
|
979 |
+
"grid_template_areas": null,
|
980 |
+
"grid_template_columns": null,
|
981 |
+
"grid_template_rows": null,
|
982 |
+
"height": null,
|
983 |
+
"justify_content": null,
|
984 |
+
"justify_items": null,
|
985 |
+
"left": null,
|
986 |
+
"margin": null,
|
987 |
+
"max_height": null,
|
988 |
+
"max_width": null,
|
989 |
+
"min_height": null,
|
990 |
+
"min_width": null,
|
991 |
+
"object_fit": null,
|
992 |
+
"object_position": null,
|
993 |
+
"order": null,
|
994 |
+
"overflow": null,
|
995 |
+
"overflow_x": null,
|
996 |
+
"overflow_y": null,
|
997 |
+
"padding": null,
|
998 |
+
"right": null,
|
999 |
+
"top": null,
|
1000 |
+
"visibility": null,
|
1001 |
+
"width": null
|
1002 |
+
}
|
1003 |
+
},
|
1004 |
+
"4988f3cbc5164c499598c83a5b3a665b": {
|
1005 |
+
"model_module": "@jupyter-widgets/controls",
|
1006 |
+
"model_module_version": "1.5.0",
|
1007 |
+
"model_name": "DescriptionStyleModel",
|
1008 |
+
"state": {
|
1009 |
+
"_model_module": "@jupyter-widgets/controls",
|
1010 |
+
"_model_module_version": "1.5.0",
|
1011 |
+
"_model_name": "DescriptionStyleModel",
|
1012 |
+
"_view_count": null,
|
1013 |
+
"_view_module": "@jupyter-widgets/base",
|
1014 |
+
"_view_module_version": "1.2.0",
|
1015 |
+
"_view_name": "StyleView",
|
1016 |
+
"description_width": ""
|
1017 |
+
}
|
1018 |
+
},
|
1019 |
+
"526fda6c78374906b7c1b93e5f973b25": {
|
1020 |
+
"model_module": "@jupyter-widgets/controls",
|
1021 |
+
"model_module_version": "1.5.0",
|
1022 |
+
"model_name": "LabelModel",
|
1023 |
+
"state": {
|
1024 |
+
"_dom_classes": [],
|
1025 |
+
"_model_module": "@jupyter-widgets/controls",
|
1026 |
+
"_model_module_version": "1.5.0",
|
1027 |
+
"_model_name": "LabelModel",
|
1028 |
+
"_view_count": null,
|
1029 |
+
"_view_module": "@jupyter-widgets/controls",
|
1030 |
+
"_view_module_version": "1.5.0",
|
1031 |
+
"_view_name": "LabelView",
|
1032 |
+
"description": "",
|
1033 |
+
"description_tooltip": null,
|
1034 |
+
"layout": "IPY_MODEL_5d09e1657d3e405f98d1a948c5c0c022",
|
1035 |
+
"placeholder": "",
|
1036 |
+
"style": "IPY_MODEL_3429b2b924484bd2a45dfb6f186db6bc",
|
1037 |
+
"value": "Your token has been saved to /root/.cache/huggingface/token"
|
1038 |
+
}
|
1039 |
+
},
|
1040 |
+
"5bbba35b612247ea946b8844807dbb42": {
|
1041 |
+
"model_module": "@jupyter-widgets/base",
|
1042 |
+
"model_module_version": "1.2.0",
|
1043 |
+
"model_name": "LayoutModel",
|
1044 |
+
"state": {
|
1045 |
+
"_model_module": "@jupyter-widgets/base",
|
1046 |
+
"_model_module_version": "1.2.0",
|
1047 |
+
"_model_name": "LayoutModel",
|
1048 |
+
"_view_count": null,
|
1049 |
+
"_view_module": "@jupyter-widgets/base",
|
1050 |
+
"_view_module_version": "1.2.0",
|
1051 |
+
"_view_name": "LayoutView",
|
1052 |
+
"align_content": null,
|
1053 |
+
"align_items": null,
|
1054 |
+
"align_self": null,
|
1055 |
+
"border": null,
|
1056 |
+
"bottom": null,
|
1057 |
+
"display": null,
|
1058 |
+
"flex": null,
|
1059 |
+
"flex_flow": null,
|
1060 |
+
"grid_area": null,
|
1061 |
+
"grid_auto_columns": null,
|
1062 |
+
"grid_auto_flow": null,
|
1063 |
+
"grid_auto_rows": null,
|
1064 |
+
"grid_column": null,
|
1065 |
+
"grid_gap": null,
|
1066 |
+
"grid_row": null,
|
1067 |
+
"grid_template_areas": null,
|
1068 |
+
"grid_template_columns": null,
|
1069 |
+
"grid_template_rows": null,
|
1070 |
+
"height": null,
|
1071 |
+
"justify_content": null,
|
1072 |
+
"justify_items": null,
|
1073 |
+
"left": null,
|
1074 |
+
"margin": null,
|
1075 |
+
"max_height": null,
|
1076 |
+
"max_width": null,
|
1077 |
+
"min_height": null,
|
1078 |
+
"min_width": null,
|
1079 |
+
"object_fit": null,
|
1080 |
+
"object_position": null,
|
1081 |
+
"order": null,
|
1082 |
+
"overflow": null,
|
1083 |
+
"overflow_x": null,
|
1084 |
+
"overflow_y": null,
|
1085 |
+
"padding": null,
|
1086 |
+
"right": null,
|
1087 |
+
"top": null,
|
1088 |
+
"visibility": null,
|
1089 |
+
"width": null
|
1090 |
+
}
|
1091 |
+
},
|
1092 |
+
"5d09e1657d3e405f98d1a948c5c0c022": {
|
1093 |
+
"model_module": "@jupyter-widgets/base",
|
1094 |
+
"model_module_version": "1.2.0",
|
1095 |
+
"model_name": "LayoutModel",
|
1096 |
+
"state": {
|
1097 |
+
"_model_module": "@jupyter-widgets/base",
|
1098 |
+
"_model_module_version": "1.2.0",
|
1099 |
+
"_model_name": "LayoutModel",
|
1100 |
+
"_view_count": null,
|
1101 |
+
"_view_module": "@jupyter-widgets/base",
|
1102 |
+
"_view_module_version": "1.2.0",
|
1103 |
+
"_view_name": "LayoutView",
|
1104 |
+
"align_content": null,
|
1105 |
+
"align_items": null,
|
1106 |
+
"align_self": null,
|
1107 |
+
"border": null,
|
1108 |
+
"bottom": null,
|
1109 |
+
"display": null,
|
1110 |
+
"flex": null,
|
1111 |
+
"flex_flow": null,
|
1112 |
+
"grid_area": null,
|
1113 |
+
"grid_auto_columns": null,
|
1114 |
+
"grid_auto_flow": null,
|
1115 |
+
"grid_auto_rows": null,
|
1116 |
+
"grid_column": null,
|
1117 |
+
"grid_gap": null,
|
1118 |
+
"grid_row": null,
|
1119 |
+
"grid_template_areas": null,
|
1120 |
+
"grid_template_columns": null,
|
1121 |
+
"grid_template_rows": null,
|
1122 |
+
"height": null,
|
1123 |
+
"justify_content": null,
|
1124 |
+
"justify_items": null,
|
1125 |
+
"left": null,
|
1126 |
+
"margin": null,
|
1127 |
+
"max_height": null,
|
1128 |
+
"max_width": null,
|
1129 |
+
"min_height": null,
|
1130 |
+
"min_width": null,
|
1131 |
+
"object_fit": null,
|
1132 |
+
"object_position": null,
|
1133 |
+
"order": null,
|
1134 |
+
"overflow": null,
|
1135 |
+
"overflow_x": null,
|
1136 |
+
"overflow_y": null,
|
1137 |
+
"padding": null,
|
1138 |
+
"right": null,
|
1139 |
+
"top": null,
|
1140 |
+
"visibility": null,
|
1141 |
+
"width": null
|
1142 |
+
}
|
1143 |
+
},
|
1144 |
+
"5da7171e37014620b11ffc3fa623fc40": {
|
1145 |
+
"model_module": "@jupyter-widgets/base",
|
1146 |
+
"model_module_version": "1.2.0",
|
1147 |
+
"model_name": "LayoutModel",
|
1148 |
+
"state": {
|
1149 |
+
"_model_module": "@jupyter-widgets/base",
|
1150 |
+
"_model_module_version": "1.2.0",
|
1151 |
+
"_model_name": "LayoutModel",
|
1152 |
+
"_view_count": null,
|
1153 |
+
"_view_module": "@jupyter-widgets/base",
|
1154 |
+
"_view_module_version": "1.2.0",
|
1155 |
+
"_view_name": "LayoutView",
|
1156 |
+
"align_content": null,
|
1157 |
+
"align_items": null,
|
1158 |
+
"align_self": null,
|
1159 |
+
"border": null,
|
1160 |
+
"bottom": null,
|
1161 |
+
"display": null,
|
1162 |
+
"flex": null,
|
1163 |
+
"flex_flow": null,
|
1164 |
+
"grid_area": null,
|
1165 |
+
"grid_auto_columns": null,
|
1166 |
+
"grid_auto_flow": null,
|
1167 |
+
"grid_auto_rows": null,
|
1168 |
+
"grid_column": null,
|
1169 |
+
"grid_gap": null,
|
1170 |
+
"grid_row": null,
|
1171 |
+
"grid_template_areas": null,
|
1172 |
+
"grid_template_columns": null,
|
1173 |
+
"grid_template_rows": null,
|
1174 |
+
"height": null,
|
1175 |
+
"justify_content": null,
|
1176 |
+
"justify_items": null,
|
1177 |
+
"left": null,
|
1178 |
+
"margin": null,
|
1179 |
+
"max_height": null,
|
1180 |
+
"max_width": null,
|
1181 |
+
"min_height": null,
|
1182 |
+
"min_width": null,
|
1183 |
+
"object_fit": null,
|
1184 |
+
"object_position": null,
|
1185 |
+
"order": null,
|
1186 |
+
"overflow": null,
|
1187 |
+
"overflow_x": null,
|
1188 |
+
"overflow_y": null,
|
1189 |
+
"padding": null,
|
1190 |
+
"right": null,
|
1191 |
+
"top": null,
|
1192 |
+
"visibility": null,
|
1193 |
+
"width": null
|
1194 |
+
}
|
1195 |
+
},
|
1196 |
+
"6d3d418868564e10b5abc88b2daab932": {
|
1197 |
+
"model_module": "@jupyter-widgets/controls",
|
1198 |
+
"model_module_version": "1.5.0",
|
1199 |
+
"model_name": "HBoxModel",
|
1200 |
+
"state": {
|
1201 |
+
"_dom_classes": [],
|
1202 |
+
"_model_module": "@jupyter-widgets/controls",
|
1203 |
+
"_model_module_version": "1.5.0",
|
1204 |
+
"_model_name": "HBoxModel",
|
1205 |
+
"_view_count": null,
|
1206 |
+
"_view_module": "@jupyter-widgets/controls",
|
1207 |
+
"_view_module_version": "1.5.0",
|
1208 |
+
"_view_name": "HBoxView",
|
1209 |
+
"box_style": "",
|
1210 |
+
"children": [
|
1211 |
+
"IPY_MODEL_377ff50b710f496ab9acb4554df58df2",
|
1212 |
+
"IPY_MODEL_41a1b28a5785496a967027f51bf74aca",
|
1213 |
+
"IPY_MODEL_77412b3af8de4a2988e099cc146c90cb"
|
1214 |
+
],
|
1215 |
+
"layout": "IPY_MODEL_2641dbfe060e466bbe10038004edefc0"
|
1216 |
+
}
|
1217 |
+
},
|
1218 |
+
"77412b3af8de4a2988e099cc146c90cb": {
|
1219 |
+
"model_module": "@jupyter-widgets/controls",
|
1220 |
+
"model_module_version": "1.5.0",
|
1221 |
+
"model_name": "HTMLModel",
|
1222 |
+
"state": {
|
1223 |
+
"_dom_classes": [],
|
1224 |
+
"_model_module": "@jupyter-widgets/controls",
|
1225 |
+
"_model_module_version": "1.5.0",
|
1226 |
+
"_model_name": "HTMLModel",
|
1227 |
+
"_view_count": null,
|
1228 |
+
"_view_module": "@jupyter-widgets/controls",
|
1229 |
+
"_view_module_version": "1.5.0",
|
1230 |
+
"_view_name": "HTMLView",
|
1231 |
+
"description": "",
|
1232 |
+
"description_tooltip": null,
|
1233 |
+
"layout": "IPY_MODEL_5da7171e37014620b11ffc3fa623fc40",
|
1234 |
+
"placeholder": "",
|
1235 |
+
"style": "IPY_MODEL_fe9fd697c96143c784cfdb5530b9ccca",
|
1236 |
+
"value": " 4/4 [00:11<00:00, 2.59s/it]"
|
1237 |
+
}
|
1238 |
+
},
|
1239 |
+
"8110e462f10b413e8dc59171fb84a13a": {
|
1240 |
+
"model_module": "@jupyter-widgets/controls",
|
1241 |
+
"model_module_version": "1.5.0",
|
1242 |
+
"model_name": "LabelModel",
|
1243 |
+
"state": {
|
1244 |
+
"_dom_classes": [],
|
1245 |
+
"_model_module": "@jupyter-widgets/controls",
|
1246 |
+
"_model_module_version": "1.5.0",
|
1247 |
+
"_model_name": "LabelModel",
|
1248 |
+
"_view_count": null,
|
1249 |
+
"_view_module": "@jupyter-widgets/controls",
|
1250 |
+
"_view_module_version": "1.5.0",
|
1251 |
+
"_view_name": "LabelView",
|
1252 |
+
"description": "",
|
1253 |
+
"description_tooltip": null,
|
1254 |
+
"layout": "IPY_MODEL_a43226fe11eb4ec28c9619d7ee3a4618",
|
1255 |
+
"placeholder": "",
|
1256 |
+
"style": "IPY_MODEL_e7878cd9245b4e56b172e40008df453b",
|
1257 |
+
"value": "Your token has been saved in your configured git credential helpers (store)."
|
1258 |
+
}
|
1259 |
+
},
|
1260 |
+
"893b6342058945448a3861eb2c1c3a41": {
|
1261 |
+
"model_module": "@jupyter-widgets/base",
|
1262 |
+
"model_module_version": "1.2.0",
|
1263 |
+
"model_name": "LayoutModel",
|
1264 |
+
"state": {
|
1265 |
+
"_model_module": "@jupyter-widgets/base",
|
1266 |
+
"_model_module_version": "1.2.0",
|
1267 |
+
"_model_name": "LayoutModel",
|
1268 |
+
"_view_count": null,
|
1269 |
+
"_view_module": "@jupyter-widgets/base",
|
1270 |
+
"_view_module_version": "1.2.0",
|
1271 |
+
"_view_name": "LayoutView",
|
1272 |
+
"align_content": null,
|
1273 |
+
"align_items": null,
|
1274 |
+
"align_self": null,
|
1275 |
+
"border": null,
|
1276 |
+
"bottom": null,
|
1277 |
+
"display": null,
|
1278 |
+
"flex": null,
|
1279 |
+
"flex_flow": null,
|
1280 |
+
"grid_area": null,
|
1281 |
+
"grid_auto_columns": null,
|
1282 |
+
"grid_auto_flow": null,
|
1283 |
+
"grid_auto_rows": null,
|
1284 |
+
"grid_column": null,
|
1285 |
+
"grid_gap": null,
|
1286 |
+
"grid_row": null,
|
1287 |
+
"grid_template_areas": null,
|
1288 |
+
"grid_template_columns": null,
|
1289 |
+
"grid_template_rows": null,
|
1290 |
+
"height": null,
|
1291 |
+
"justify_content": null,
|
1292 |
+
"justify_items": null,
|
1293 |
+
"left": null,
|
1294 |
+
"margin": null,
|
1295 |
+
"max_height": null,
|
1296 |
+
"max_width": null,
|
1297 |
+
"min_height": null,
|
1298 |
+
"min_width": null,
|
1299 |
+
"object_fit": null,
|
1300 |
+
"object_position": null,
|
1301 |
+
"order": null,
|
1302 |
+
"overflow": null,
|
1303 |
+
"overflow_x": null,
|
1304 |
+
"overflow_y": null,
|
1305 |
+
"padding": null,
|
1306 |
+
"right": null,
|
1307 |
+
"top": null,
|
1308 |
+
"visibility": null,
|
1309 |
+
"width": null
|
1310 |
+
}
|
1311 |
+
},
|
1312 |
+
"89b4a59adc4942b290a9f3158b89423f": {
|
1313 |
+
"model_module": "@jupyter-widgets/controls",
|
1314 |
+
"model_module_version": "1.5.0",
|
1315 |
+
"model_name": "DescriptionStyleModel",
|
1316 |
+
"state": {
|
1317 |
+
"_model_module": "@jupyter-widgets/controls",
|
1318 |
+
"_model_module_version": "1.5.0",
|
1319 |
+
"_model_name": "DescriptionStyleModel",
|
1320 |
+
"_view_count": null,
|
1321 |
+
"_view_module": "@jupyter-widgets/base",
|
1322 |
+
"_view_module_version": "1.2.0",
|
1323 |
+
"_view_name": "StyleView",
|
1324 |
+
"description_width": ""
|
1325 |
+
}
|
1326 |
+
},
|
1327 |
+
"9240afbf2b7346be8738c4017c4d91f1": {
|
1328 |
+
"model_module": "@jupyter-widgets/base",
|
1329 |
+
"model_module_version": "1.2.0",
|
1330 |
+
"model_name": "LayoutModel",
|
1331 |
+
"state": {
|
1332 |
+
"_model_module": "@jupyter-widgets/base",
|
1333 |
+
"_model_module_version": "1.2.0",
|
1334 |
+
"_model_name": "LayoutModel",
|
1335 |
+
"_view_count": null,
|
1336 |
+
"_view_module": "@jupyter-widgets/base",
|
1337 |
+
"_view_module_version": "1.2.0",
|
1338 |
+
"_view_name": "LayoutView",
|
1339 |
+
"align_content": null,
|
1340 |
+
"align_items": null,
|
1341 |
+
"align_self": null,
|
1342 |
+
"border": null,
|
1343 |
+
"bottom": null,
|
1344 |
+
"display": null,
|
1345 |
+
"flex": null,
|
1346 |
+
"flex_flow": null,
|
1347 |
+
"grid_area": null,
|
1348 |
+
"grid_auto_columns": null,
|
1349 |
+
"grid_auto_flow": null,
|
1350 |
+
"grid_auto_rows": null,
|
1351 |
+
"grid_column": null,
|
1352 |
+
"grid_gap": null,
|
1353 |
+
"grid_row": null,
|
1354 |
+
"grid_template_areas": null,
|
1355 |
+
"grid_template_columns": null,
|
1356 |
+
"grid_template_rows": null,
|
1357 |
+
"height": null,
|
1358 |
+
"justify_content": null,
|
1359 |
+
"justify_items": null,
|
1360 |
+
"left": null,
|
1361 |
+
"margin": null,
|
1362 |
+
"max_height": null,
|
1363 |
+
"max_width": null,
|
1364 |
+
"min_height": null,
|
1365 |
+
"min_width": null,
|
1366 |
+
"object_fit": null,
|
1367 |
+
"object_position": null,
|
1368 |
+
"order": null,
|
1369 |
+
"overflow": null,
|
1370 |
+
"overflow_x": null,
|
1371 |
+
"overflow_y": null,
|
1372 |
+
"padding": null,
|
1373 |
+
"right": null,
|
1374 |
+
"top": null,
|
1375 |
+
"visibility": null,
|
1376 |
+
"width": null
|
1377 |
+
}
|
1378 |
+
},
|
1379 |
+
"9d5a7a4379ce4e3493e7e050bfb173dc": {
|
1380 |
+
"model_module": "@jupyter-widgets/base",
|
1381 |
+
"model_module_version": "1.2.0",
|
1382 |
+
"model_name": "LayoutModel",
|
1383 |
+
"state": {
|
1384 |
+
"_model_module": "@jupyter-widgets/base",
|
1385 |
+
"_model_module_version": "1.2.0",
|
1386 |
+
"_model_name": "LayoutModel",
|
1387 |
+
"_view_count": null,
|
1388 |
+
"_view_module": "@jupyter-widgets/base",
|
1389 |
+
"_view_module_version": "1.2.0",
|
1390 |
+
"_view_name": "LayoutView",
|
1391 |
+
"align_content": null,
|
1392 |
+
"align_items": null,
|
1393 |
+
"align_self": null,
|
1394 |
+
"border": null,
|
1395 |
+
"bottom": null,
|
1396 |
+
"display": null,
|
1397 |
+
"flex": null,
|
1398 |
+
"flex_flow": null,
|
1399 |
+
"grid_area": null,
|
1400 |
+
"grid_auto_columns": null,
|
1401 |
+
"grid_auto_flow": null,
|
1402 |
+
"grid_auto_rows": null,
|
1403 |
+
"grid_column": null,
|
1404 |
+
"grid_gap": null,
|
1405 |
+
"grid_row": null,
|
1406 |
+
"grid_template_areas": null,
|
1407 |
+
"grid_template_columns": null,
|
1408 |
+
"grid_template_rows": null,
|
1409 |
+
"height": null,
|
1410 |
+
"justify_content": null,
|
1411 |
+
"justify_items": null,
|
1412 |
+
"left": null,
|
1413 |
+
"margin": null,
|
1414 |
+
"max_height": null,
|
1415 |
+
"max_width": null,
|
1416 |
+
"min_height": null,
|
1417 |
+
"min_width": null,
|
1418 |
+
"object_fit": null,
|
1419 |
+
"object_position": null,
|
1420 |
+
"order": null,
|
1421 |
+
"overflow": null,
|
1422 |
+
"overflow_x": null,
|
1423 |
+
"overflow_y": null,
|
1424 |
+
"padding": null,
|
1425 |
+
"right": null,
|
1426 |
+
"top": null,
|
1427 |
+
"visibility": null,
|
1428 |
+
"width": null
|
1429 |
+
}
|
1430 |
+
},
|
1431 |
+
"a43226fe11eb4ec28c9619d7ee3a4618": {
|
1432 |
+
"model_module": "@jupyter-widgets/base",
|
1433 |
+
"model_module_version": "1.2.0",
|
1434 |
+
"model_name": "LayoutModel",
|
1435 |
+
"state": {
|
1436 |
+
"_model_module": "@jupyter-widgets/base",
|
1437 |
+
"_model_module_version": "1.2.0",
|
1438 |
+
"_model_name": "LayoutModel",
|
1439 |
+
"_view_count": null,
|
1440 |
+
"_view_module": "@jupyter-widgets/base",
|
1441 |
+
"_view_module_version": "1.2.0",
|
1442 |
+
"_view_name": "LayoutView",
|
1443 |
+
"align_content": null,
|
1444 |
+
"align_items": null,
|
1445 |
+
"align_self": null,
|
1446 |
+
"border": null,
|
1447 |
+
"bottom": null,
|
1448 |
+
"display": null,
|
1449 |
+
"flex": null,
|
1450 |
+
"flex_flow": null,
|
1451 |
+
"grid_area": null,
|
1452 |
+
"grid_auto_columns": null,
|
1453 |
+
"grid_auto_flow": null,
|
1454 |
+
"grid_auto_rows": null,
|
1455 |
+
"grid_column": null,
|
1456 |
+
"grid_gap": null,
|
1457 |
+
"grid_row": null,
|
1458 |
+
"grid_template_areas": null,
|
1459 |
+
"grid_template_columns": null,
|
1460 |
+
"grid_template_rows": null,
|
1461 |
+
"height": null,
|
1462 |
+
"justify_content": null,
|
1463 |
+
"justify_items": null,
|
1464 |
+
"left": null,
|
1465 |
+
"margin": null,
|
1466 |
+
"max_height": null,
|
1467 |
+
"max_width": null,
|
1468 |
+
"min_height": null,
|
1469 |
+
"min_width": null,
|
1470 |
+
"object_fit": null,
|
1471 |
+
"object_position": null,
|
1472 |
+
"order": null,
|
1473 |
+
"overflow": null,
|
1474 |
+
"overflow_x": null,
|
1475 |
+
"overflow_y": null,
|
1476 |
+
"padding": null,
|
1477 |
+
"right": null,
|
1478 |
+
"top": null,
|
1479 |
+
"visibility": null,
|
1480 |
+
"width": null
|
1481 |
+
}
|
1482 |
+
},
|
1483 |
+
"b6ca0bfe87874730907ef1a4c500863a": {
|
1484 |
+
"model_module": "@jupyter-widgets/controls",
|
1485 |
+
"model_module_version": "1.5.0",
|
1486 |
+
"model_name": "LabelModel",
|
1487 |
+
"state": {
|
1488 |
+
"_dom_classes": [],
|
1489 |
+
"_model_module": "@jupyter-widgets/controls",
|
1490 |
+
"_model_module_version": "1.5.0",
|
1491 |
+
"_model_name": "LabelModel",
|
1492 |
+
"_view_count": null,
|
1493 |
+
"_view_module": "@jupyter-widgets/controls",
|
1494 |
+
"_view_module_version": "1.5.0",
|
1495 |
+
"_view_name": "LabelView",
|
1496 |
+
"description": "",
|
1497 |
+
"description_tooltip": null,
|
1498 |
+
"layout": "IPY_MODEL_142c966c31fe4e5f99031da317e2ff54",
|
1499 |
+
"placeholder": "",
|
1500 |
+
"style": "IPY_MODEL_d67b40ceeba8412f91ac885cb816eb01",
|
1501 |
+
"value": "Token is valid (permission: write)."
|
1502 |
+
}
|
1503 |
+
},
|
1504 |
+
"b7b9ed6e0aa14e29bf27dbaf4e248f03": {
|
1505 |
+
"model_module": "@jupyter-widgets/base",
|
1506 |
+
"model_module_version": "1.2.0",
|
1507 |
+
"model_name": "LayoutModel",
|
1508 |
+
"state": {
|
1509 |
+
"_model_module": "@jupyter-widgets/base",
|
1510 |
+
"_model_module_version": "1.2.0",
|
1511 |
+
"_model_name": "LayoutModel",
|
1512 |
+
"_view_count": null,
|
1513 |
+
"_view_module": "@jupyter-widgets/base",
|
1514 |
+
"_view_module_version": "1.2.0",
|
1515 |
+
"_view_name": "LayoutView",
|
1516 |
+
"align_content": null,
|
1517 |
+
"align_items": null,
|
1518 |
+
"align_self": null,
|
1519 |
+
"border": null,
|
1520 |
+
"bottom": null,
|
1521 |
+
"display": null,
|
1522 |
+
"flex": null,
|
1523 |
+
"flex_flow": null,
|
1524 |
+
"grid_area": null,
|
1525 |
+
"grid_auto_columns": null,
|
1526 |
+
"grid_auto_flow": null,
|
1527 |
+
"grid_auto_rows": null,
|
1528 |
+
"grid_column": null,
|
1529 |
+
"grid_gap": null,
|
1530 |
+
"grid_row": null,
|
1531 |
+
"grid_template_areas": null,
|
1532 |
+
"grid_template_columns": null,
|
1533 |
+
"grid_template_rows": null,
|
1534 |
+
"height": null,
|
1535 |
+
"justify_content": null,
|
1536 |
+
"justify_items": null,
|
1537 |
+
"left": null,
|
1538 |
+
"margin": null,
|
1539 |
+
"max_height": null,
|
1540 |
+
"max_width": null,
|
1541 |
+
"min_height": null,
|
1542 |
+
"min_width": null,
|
1543 |
+
"object_fit": null,
|
1544 |
+
"object_position": null,
|
1545 |
+
"order": null,
|
1546 |
+
"overflow": null,
|
1547 |
+
"overflow_x": null,
|
1548 |
+
"overflow_y": null,
|
1549 |
+
"padding": null,
|
1550 |
+
"right": null,
|
1551 |
+
"top": null,
|
1552 |
+
"visibility": null,
|
1553 |
+
"width": null
|
1554 |
+
}
|
1555 |
+
},
|
1556 |
+
"bc34088935944cc8b02b2386239a3639": {
|
1557 |
+
"model_module": "@jupyter-widgets/base",
|
1558 |
+
"model_module_version": "1.2.0",
|
1559 |
+
"model_name": "LayoutModel",
|
1560 |
+
"state": {
|
1561 |
+
"_model_module": "@jupyter-widgets/base",
|
1562 |
+
"_model_module_version": "1.2.0",
|
1563 |
+
"_model_name": "LayoutModel",
|
1564 |
+
"_view_count": null,
|
1565 |
+
"_view_module": "@jupyter-widgets/base",
|
1566 |
+
"_view_module_version": "1.2.0",
|
1567 |
+
"_view_name": "LayoutView",
|
1568 |
+
"align_content": null,
|
1569 |
+
"align_items": null,
|
1570 |
+
"align_self": null,
|
1571 |
+
"border": null,
|
1572 |
+
"bottom": null,
|
1573 |
+
"display": null,
|
1574 |
+
"flex": null,
|
1575 |
+
"flex_flow": null,
|
1576 |
+
"grid_area": null,
|
1577 |
+
"grid_auto_columns": null,
|
1578 |
+
"grid_auto_flow": null,
|
1579 |
+
"grid_auto_rows": null,
|
1580 |
+
"grid_column": null,
|
1581 |
+
"grid_gap": null,
|
1582 |
+
"grid_row": null,
|
1583 |
+
"grid_template_areas": null,
|
1584 |
+
"grid_template_columns": null,
|
1585 |
+
"grid_template_rows": null,
|
1586 |
+
"height": null,
|
1587 |
+
"justify_content": null,
|
1588 |
+
"justify_items": null,
|
1589 |
+
"left": null,
|
1590 |
+
"margin": null,
|
1591 |
+
"max_height": null,
|
1592 |
+
"max_width": null,
|
1593 |
+
"min_height": null,
|
1594 |
+
"min_width": null,
|
1595 |
+
"object_fit": null,
|
1596 |
+
"object_position": null,
|
1597 |
+
"order": null,
|
1598 |
+
"overflow": null,
|
1599 |
+
"overflow_x": null,
|
1600 |
+
"overflow_y": null,
|
1601 |
+
"padding": null,
|
1602 |
+
"right": null,
|
1603 |
+
"top": null,
|
1604 |
+
"visibility": null,
|
1605 |
+
"width": null
|
1606 |
+
}
|
1607 |
+
},
|
1608 |
+
"bf91e9029f394c35874b4d35d61dd2c8": {
|
1609 |
+
"model_module": "@jupyter-widgets/controls",
|
1610 |
+
"model_module_version": "1.5.0",
|
1611 |
+
"model_name": "DescriptionStyleModel",
|
1612 |
+
"state": {
|
1613 |
+
"_model_module": "@jupyter-widgets/controls",
|
1614 |
+
"_model_module_version": "1.5.0",
|
1615 |
+
"_model_name": "DescriptionStyleModel",
|
1616 |
+
"_view_count": null,
|
1617 |
+
"_view_module": "@jupyter-widgets/base",
|
1618 |
+
"_view_module_version": "1.2.0",
|
1619 |
+
"_view_name": "StyleView",
|
1620 |
+
"description_width": ""
|
1621 |
+
}
|
1622 |
+
},
|
1623 |
+
"c7d2883b47e741b4a0af037342ce88b3": {
|
1624 |
+
"model_module": "@jupyter-widgets/controls",
|
1625 |
+
"model_module_version": "1.5.0",
|
1626 |
+
"model_name": "DescriptionStyleModel",
|
1627 |
+
"state": {
|
1628 |
+
"_model_module": "@jupyter-widgets/controls",
|
1629 |
+
"_model_module_version": "1.5.0",
|
1630 |
+
"_model_name": "DescriptionStyleModel",
|
1631 |
+
"_view_count": null,
|
1632 |
+
"_view_module": "@jupyter-widgets/base",
|
1633 |
+
"_view_module_version": "1.2.0",
|
1634 |
+
"_view_name": "StyleView",
|
1635 |
+
"description_width": ""
|
1636 |
+
}
|
1637 |
+
},
|
1638 |
+
"cabd977f2993428d91fc75df5a15328e": {
|
1639 |
+
"model_module": "@jupyter-widgets/base",
|
1640 |
+
"model_module_version": "1.2.0",
|
1641 |
+
"model_name": "LayoutModel",
|
1642 |
+
"state": {
|
1643 |
+
"_model_module": "@jupyter-widgets/base",
|
1644 |
+
"_model_module_version": "1.2.0",
|
1645 |
+
"_model_name": "LayoutModel",
|
1646 |
+
"_view_count": null,
|
1647 |
+
"_view_module": "@jupyter-widgets/base",
|
1648 |
+
"_view_module_version": "1.2.0",
|
1649 |
+
"_view_name": "LayoutView",
|
1650 |
+
"align_content": null,
|
1651 |
+
"align_items": null,
|
1652 |
+
"align_self": null,
|
1653 |
+
"border": null,
|
1654 |
+
"bottom": null,
|
1655 |
+
"display": null,
|
1656 |
+
"flex": null,
|
1657 |
+
"flex_flow": null,
|
1658 |
+
"grid_area": null,
|
1659 |
+
"grid_auto_columns": null,
|
1660 |
+
"grid_auto_flow": null,
|
1661 |
+
"grid_auto_rows": null,
|
1662 |
+
"grid_column": null,
|
1663 |
+
"grid_gap": null,
|
1664 |
+
"grid_row": null,
|
1665 |
+
"grid_template_areas": null,
|
1666 |
+
"grid_template_columns": null,
|
1667 |
+
"grid_template_rows": null,
|
1668 |
+
"height": null,
|
1669 |
+
"justify_content": null,
|
1670 |
+
"justify_items": null,
|
1671 |
+
"left": null,
|
1672 |
+
"margin": null,
|
1673 |
+
"max_height": null,
|
1674 |
+
"max_width": null,
|
1675 |
+
"min_height": null,
|
1676 |
+
"min_width": null,
|
1677 |
+
"object_fit": null,
|
1678 |
+
"object_position": null,
|
1679 |
+
"order": null,
|
1680 |
+
"overflow": null,
|
1681 |
+
"overflow_x": null,
|
1682 |
+
"overflow_y": null,
|
1683 |
+
"padding": null,
|
1684 |
+
"right": null,
|
1685 |
+
"top": null,
|
1686 |
+
"visibility": null,
|
1687 |
+
"width": null
|
1688 |
+
}
|
1689 |
+
},
|
1690 |
+
"d00821b88efa4256b29d52fe816a7c89": {
|
1691 |
+
"model_module": "@jupyter-widgets/controls",
|
1692 |
+
"model_module_version": "1.5.0",
|
1693 |
+
"model_name": "LabelModel",
|
1694 |
+
"state": {
|
1695 |
+
"_dom_classes": [],
|
1696 |
+
"_model_module": "@jupyter-widgets/controls",
|
1697 |
+
"_model_module_version": "1.5.0",
|
1698 |
+
"_model_name": "LabelModel",
|
1699 |
+
"_view_count": null,
|
1700 |
+
"_view_module": "@jupyter-widgets/controls",
|
1701 |
+
"_view_module_version": "1.5.0",
|
1702 |
+
"_view_name": "LabelView",
|
1703 |
+
"description": "",
|
1704 |
+
"description_tooltip": null,
|
1705 |
+
"layout": "IPY_MODEL_0424a259c1d34333951b757c3c705b6f",
|
1706 |
+
"placeholder": "",
|
1707 |
+
"style": "IPY_MODEL_89b4a59adc4942b290a9f3158b89423f",
|
1708 |
+
"value": "Login successful"
|
1709 |
+
}
|
1710 |
+
},
|
1711 |
+
"d67b40ceeba8412f91ac885cb816eb01": {
|
1712 |
+
"model_module": "@jupyter-widgets/controls",
|
1713 |
+
"model_module_version": "1.5.0",
|
1714 |
+
"model_name": "DescriptionStyleModel",
|
1715 |
+
"state": {
|
1716 |
+
"_model_module": "@jupyter-widgets/controls",
|
1717 |
+
"_model_module_version": "1.5.0",
|
1718 |
+
"_model_name": "DescriptionStyleModel",
|
1719 |
+
"_view_count": null,
|
1720 |
+
"_view_module": "@jupyter-widgets/base",
|
1721 |
+
"_view_module_version": "1.2.0",
|
1722 |
+
"_view_name": "StyleView",
|
1723 |
+
"description_width": ""
|
1724 |
+
}
|
1725 |
+
},
|
1726 |
+
"e01c3514a6904c79b5646688a515ca10": {
|
1727 |
+
"model_module": "@jupyter-widgets/controls",
|
1728 |
+
"model_module_version": "1.5.0",
|
1729 |
+
"model_name": "CheckboxModel",
|
1730 |
+
"state": {
|
1731 |
+
"_dom_classes": [],
|
1732 |
+
"_model_module": "@jupyter-widgets/controls",
|
1733 |
+
"_model_module_version": "1.5.0",
|
1734 |
+
"_model_name": "CheckboxModel",
|
1735 |
+
"_view_count": null,
|
1736 |
+
"_view_module": "@jupyter-widgets/controls",
|
1737 |
+
"_view_module_version": "1.5.0",
|
1738 |
+
"_view_name": "CheckboxView",
|
1739 |
+
"description": "Add token as git credential?",
|
1740 |
+
"description_tooltip": null,
|
1741 |
+
"disabled": false,
|
1742 |
+
"indent": true,
|
1743 |
+
"layout": "IPY_MODEL_893b6342058945448a3861eb2c1c3a41",
|
1744 |
+
"style": "IPY_MODEL_f8af58e353b94164b34d2ce064252dc2",
|
1745 |
+
"value": true
|
1746 |
+
}
|
1747 |
+
},
|
1748 |
+
"e0c376506a5a4757a9d51a5f181f3fb6": {
|
1749 |
+
"model_module": "@jupyter-widgets/controls",
|
1750 |
+
"model_module_version": "1.5.0",
|
1751 |
+
"model_name": "ProgressStyleModel",
|
1752 |
+
"state": {
|
1753 |
+
"_model_module": "@jupyter-widgets/controls",
|
1754 |
+
"_model_module_version": "1.5.0",
|
1755 |
+
"_model_name": "ProgressStyleModel",
|
1756 |
+
"_view_count": null,
|
1757 |
+
"_view_module": "@jupyter-widgets/base",
|
1758 |
+
"_view_module_version": "1.2.0",
|
1759 |
+
"_view_name": "StyleView",
|
1760 |
+
"bar_color": null,
|
1761 |
+
"description_width": ""
|
1762 |
+
}
|
1763 |
+
},
|
1764 |
+
"e1063035ef1e42768dd984653d992137": {
|
1765 |
+
"model_module": "@jupyter-widgets/base",
|
1766 |
+
"model_module_version": "1.2.0",
|
1767 |
+
"model_name": "LayoutModel",
|
1768 |
+
"state": {
|
1769 |
+
"_model_module": "@jupyter-widgets/base",
|
1770 |
+
"_model_module_version": "1.2.0",
|
1771 |
+
"_model_name": "LayoutModel",
|
1772 |
+
"_view_count": null,
|
1773 |
+
"_view_module": "@jupyter-widgets/base",
|
1774 |
+
"_view_module_version": "1.2.0",
|
1775 |
+
"_view_name": "LayoutView",
|
1776 |
+
"align_content": null,
|
1777 |
+
"align_items": "center",
|
1778 |
+
"align_self": null,
|
1779 |
+
"border": null,
|
1780 |
+
"bottom": null,
|
1781 |
+
"display": "flex",
|
1782 |
+
"flex": null,
|
1783 |
+
"flex_flow": "column",
|
1784 |
+
"grid_area": null,
|
1785 |
+
"grid_auto_columns": null,
|
1786 |
+
"grid_auto_flow": null,
|
1787 |
+
"grid_auto_rows": null,
|
1788 |
+
"grid_column": null,
|
1789 |
+
"grid_gap": null,
|
1790 |
+
"grid_row": null,
|
1791 |
+
"grid_template_areas": null,
|
1792 |
+
"grid_template_columns": null,
|
1793 |
+
"grid_template_rows": null,
|
1794 |
+
"height": null,
|
1795 |
+
"justify_content": null,
|
1796 |
+
"justify_items": null,
|
1797 |
+
"left": null,
|
1798 |
+
"margin": null,
|
1799 |
+
"max_height": null,
|
1800 |
+
"max_width": null,
|
1801 |
+
"min_height": null,
|
1802 |
+
"min_width": null,
|
1803 |
+
"object_fit": null,
|
1804 |
+
"object_position": null,
|
1805 |
+
"order": null,
|
1806 |
+
"overflow": null,
|
1807 |
+
"overflow_x": null,
|
1808 |
+
"overflow_y": null,
|
1809 |
+
"padding": null,
|
1810 |
+
"right": null,
|
1811 |
+
"top": null,
|
1812 |
+
"visibility": null,
|
1813 |
+
"width": "50%"
|
1814 |
+
}
|
1815 |
+
},
|
1816 |
+
"e7878cd9245b4e56b172e40008df453b": {
|
1817 |
+
"model_module": "@jupyter-widgets/controls",
|
1818 |
+
"model_module_version": "1.5.0",
|
1819 |
+
"model_name": "DescriptionStyleModel",
|
1820 |
+
"state": {
|
1821 |
+
"_model_module": "@jupyter-widgets/controls",
|
1822 |
+
"_model_module_version": "1.5.0",
|
1823 |
+
"_model_name": "DescriptionStyleModel",
|
1824 |
+
"_view_count": null,
|
1825 |
+
"_view_module": "@jupyter-widgets/base",
|
1826 |
+
"_view_module_version": "1.2.0",
|
1827 |
+
"_view_name": "StyleView",
|
1828 |
+
"description_width": ""
|
1829 |
+
}
|
1830 |
+
},
|
1831 |
+
"f8af58e353b94164b34d2ce064252dc2": {
|
1832 |
+
"model_module": "@jupyter-widgets/controls",
|
1833 |
+
"model_module_version": "1.5.0",
|
1834 |
+
"model_name": "DescriptionStyleModel",
|
1835 |
+
"state": {
|
1836 |
+
"_model_module": "@jupyter-widgets/controls",
|
1837 |
+
"_model_module_version": "1.5.0",
|
1838 |
+
"_model_name": "DescriptionStyleModel",
|
1839 |
+
"_view_count": null,
|
1840 |
+
"_view_module": "@jupyter-widgets/base",
|
1841 |
+
"_view_module_version": "1.2.0",
|
1842 |
+
"_view_name": "StyleView",
|
1843 |
+
"description_width": ""
|
1844 |
+
}
|
1845 |
+
},
|
1846 |
+
"fe9fd697c96143c784cfdb5530b9ccca": {
|
1847 |
+
"model_module": "@jupyter-widgets/controls",
|
1848 |
+
"model_module_version": "1.5.0",
|
1849 |
+
"model_name": "DescriptionStyleModel",
|
1850 |
+
"state": {
|
1851 |
+
"_model_module": "@jupyter-widgets/controls",
|
1852 |
+
"_model_module_version": "1.5.0",
|
1853 |
+
"_model_name": "DescriptionStyleModel",
|
1854 |
+
"_view_count": null,
|
1855 |
+
"_view_module": "@jupyter-widgets/base",
|
1856 |
+
"_view_module_version": "1.2.0",
|
1857 |
+
"_view_name": "StyleView",
|
1858 |
+
"description_width": ""
|
1859 |
+
}
|
1860 |
+
}
|
1861 |
+
}
|
1862 |
+
}
|
1863 |
+
},
|
1864 |
+
"nbformat": 4,
|
1865 |
+
"nbformat_minor": 4
|
1866 |
+
}
|
LICENSE
ADDED
@@ -0,0 +1,201 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Apache License
|
2 |
+
Version 2.0, January 2004
|
3 |
+
http://www.apache.org/licenses/
|
4 |
+
|
5 |
+
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
6 |
+
|
7 |
+
1. Definitions.
|
8 |
+
|
9 |
+
"License" shall mean the terms and conditions for use, reproduction,
|
10 |
+
and distribution as defined by Sections 1 through 9 of this document.
|
11 |
+
|
12 |
+
"Licensor" shall mean the copyright owner or entity authorized by
|
13 |
+
the copyright owner that is granting the License.
|
14 |
+
|
15 |
+
"Legal Entity" shall mean the union of the acting entity and all
|
16 |
+
other entities that control, are controlled by, or are under common
|
17 |
+
control with that entity. For the purposes of this definition,
|
18 |
+
"control" means (i) the power, direct or indirect, to cause the
|
19 |
+
direction or management of such entity, whether by contract or
|
20 |
+
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
21 |
+
outstanding shares, or (iii) beneficial ownership of such entity.
|
22 |
+
|
23 |
+
"You" (or "Your") shall mean an individual or Legal Entity
|
24 |
+
exercising permissions granted by this License.
|
25 |
+
|
26 |
+
"Source" form shall mean the preferred form for making modifications,
|
27 |
+
including but not limited to software source code, documentation
|
28 |
+
source, and configuration files.
|
29 |
+
|
30 |
+
"Object" form shall mean any form resulting from mechanical
|
31 |
+
transformation or translation of a Source form, including but
|
32 |
+
not limited to compiled object code, generated documentation,
|
33 |
+
and conversions to other media types.
|
34 |
+
|
35 |
+
"Work" shall mean the work of authorship, whether in Source or
|
36 |
+
Object form, made available under the License, as indicated by a
|
37 |
+
copyright notice that is included in or attached to the work
|
38 |
+
(an example is provided in the Appendix below).
|
39 |
+
|
40 |
+
"Derivative Works" shall mean any work, whether in Source or Object
|
41 |
+
form, that is based on (or derived from) the Work and for which the
|
42 |
+
editorial revisions, annotations, elaborations, or other modifications
|
43 |
+
represent, as a whole, an original work of authorship. For the purposes
|
44 |
+
of this License, Derivative Works shall not include works that remain
|
45 |
+
separable from, or merely link (or bind by name) to the interfaces of,
|
46 |
+
the Work and Derivative Works thereof.
|
47 |
+
|
48 |
+
"Contribution" shall mean any work of authorship, including
|
49 |
+
the original version of the Work and any modifications or additions
|
50 |
+
to that Work or Derivative Works thereof, that is intentionally
|
51 |
+
submitted to Licensor for inclusion in the Work by the copyright owner
|
52 |
+
or by an individual or Legal Entity authorized to submit on behalf of
|
53 |
+
the copyright owner. For the purposes of this definition, "submitted"
|
54 |
+
means any form of electronic, verbal, or written communication sent
|
55 |
+
to the Licensor or its representatives, including but not limited to
|
56 |
+
communication on electronic mailing lists, source code control systems,
|
57 |
+
and issue tracking systems that are managed by, or on behalf of, the
|
58 |
+
Licensor for the purpose of discussing and improving the Work, but
|
59 |
+
excluding communication that is conspicuously marked or otherwise
|
60 |
+
designated in writing by the copyright owner as "Not a Contribution."
|
61 |
+
|
62 |
+
"Contributor" shall mean Licensor and any individual or Legal Entity
|
63 |
+
on behalf of whom a Contribution has been received by Licensor and
|
64 |
+
subsequently incorporated within the Work.
|
65 |
+
|
66 |
+
2. Grant of Copyright License. Subject to the terms and conditions of
|
67 |
+
this License, each Contributor hereby grants to You a perpetual,
|
68 |
+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
69 |
+
copyright license to reproduce, prepare Derivative Works of,
|
70 |
+
publicly display, publicly perform, sublicense, and distribute the
|
71 |
+
Work and such Derivative Works in Source or Object form.
|
72 |
+
|
73 |
+
3. Grant of Patent License. Subject to the terms and conditions of
|
74 |
+
this License, each Contributor hereby grants to You a perpetual,
|
75 |
+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
76 |
+
(except as stated in this section) patent license to make, have made,
|
77 |
+
use, offer to sell, sell, import, and otherwise transfer the Work,
|
78 |
+
where such license applies only to those patent claims licensable
|
79 |
+
by such Contributor that are necessarily infringed by their
|
80 |
+
Contribution(s) alone or by combination of their Contribution(s)
|
81 |
+
with the Work to which such Contribution(s) was submitted. If You
|
82 |
+
institute patent litigation against any entity (including a
|
83 |
+
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
84 |
+
or a Contribution incorporated within the Work constitutes direct
|
85 |
+
or contributory patent infringement, then any patent licenses
|
86 |
+
granted to You under this License for that Work shall terminate
|
87 |
+
as of the date such litigation is filed.
|
88 |
+
|
89 |
+
4. Redistribution. You may reproduce and distribute copies of the
|
90 |
+
Work or Derivative Works thereof in any medium, with or without
|
91 |
+
modifications, and in Source or Object form, provided that You
|
92 |
+
meet the following conditions:
|
93 |
+
|
94 |
+
(a) You must give any other recipients of the Work or
|
95 |
+
Derivative Works a copy of this License; and
|
96 |
+
|
97 |
+
(b) You must cause any modified files to carry prominent notices
|
98 |
+
stating that You changed the files; and
|
99 |
+
|
100 |
+
(c) You must retain, in the Source form of any Derivative Works
|
101 |
+
that You distribute, all copyright, patent, trademark, and
|
102 |
+
attribution notices from the Source form of the Work,
|
103 |
+
excluding those notices that do not pertain to any part of
|
104 |
+
the Derivative Works; and
|
105 |
+
|
106 |
+
(d) If the Work includes a "NOTICE" text file as part of its
|
107 |
+
distribution, then any Derivative Works that You distribute must
|
108 |
+
include a readable copy of the attribution notices contained
|
109 |
+
within such NOTICE file, excluding those notices that do not
|
110 |
+
pertain to any part of the Derivative Works, in at least one
|
111 |
+
of the following places: within a NOTICE text file distributed
|
112 |
+
as part of the Derivative Works; within the Source form or
|
113 |
+
documentation, if provided along with the Derivative Works; or,
|
114 |
+
within a display generated by the Derivative Works, if and
|
115 |
+
wherever such third-party notices normally appear. The contents
|
116 |
+
of the NOTICE file are for informational purposes only and
|
117 |
+
do not modify the License. You may add Your own attribution
|
118 |
+
notices within Derivative Works that You distribute, alongside
|
119 |
+
or as an addendum to the NOTICE text from the Work, provided
|
120 |
+
that such additional attribution notices cannot be construed
|
121 |
+
as modifying the License.
|
122 |
+
|
123 |
+
You may add Your own copyright statement to Your modifications and
|
124 |
+
may provide additional or different license terms and conditions
|
125 |
+
for use, reproduction, or distribution of Your modifications, or
|
126 |
+
for any such Derivative Works as a whole, provided Your use,
|
127 |
+
reproduction, and distribution of the Work otherwise complies with
|
128 |
+
the conditions stated in this License.
|
129 |
+
|
130 |
+
5. Submission of Contributions. Unless You explicitly state otherwise,
|
131 |
+
any Contribution intentionally submitted for inclusion in the Work
|
132 |
+
by You to the Licensor shall be under the terms and conditions of
|
133 |
+
this License, without any additional terms or conditions.
|
134 |
+
Notwithstanding the above, nothing herein shall supersede or modify
|
135 |
+
the terms of any separate license agreement you may have executed
|
136 |
+
with Licensor regarding such Contributions.
|
137 |
+
|
138 |
+
6. Trademarks. This License does not grant permission to use the trade
|
139 |
+
names, trademarks, service marks, or product names of the Licensor,
|
140 |
+
except as required for reasonable and customary use in describing the
|
141 |
+
origin of the Work and reproducing the content of the NOTICE file.
|
142 |
+
|
143 |
+
7. Disclaimer of Warranty. Unless required by applicable law or
|
144 |
+
agreed to in writing, Licensor provides the Work (and each
|
145 |
+
Contributor provides its Contributions) on an "AS IS" BASIS,
|
146 |
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
147 |
+
implied, including, without limitation, any warranties or conditions
|
148 |
+
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
149 |
+
PARTICULAR PURPOSE. You are solely responsible for determining the
|
150 |
+
appropriateness of using or redistributing the Work and assume any
|
151 |
+
risks associated with Your exercise of permissions under this License.
|
152 |
+
|
153 |
+
8. Limitation of Liability. In no event and under no legal theory,
|
154 |
+
whether in tort (including negligence), contract, or otherwise,
|
155 |
+
unless required by applicable law (such as deliberate and grossly
|
156 |
+
negligent acts) or agreed to in writing, shall any Contributor be
|
157 |
+
liable to You for damages, including any direct, indirect, special,
|
158 |
+
incidental, or consequential damages of any character arising as a
|
159 |
+
result of this License or out of the use or inability to use the
|
160 |
+
Work (including but not limited to damages for loss of goodwill,
|
161 |
+
work stoppage, computer failure or malfunction, or any and all
|
162 |
+
other commercial damages or losses), even if such Contributor
|
163 |
+
has been advised of the possibility of such damages.
|
164 |
+
|
165 |
+
9. Accepting Warranty or Additional Liability. While redistributing
|
166 |
+
the Work or Derivative Works thereof, You may choose to offer,
|
167 |
+
and charge a fee for, acceptance of support, warranty, indemnity,
|
168 |
+
or other liability obligations and/or rights consistent with this
|
169 |
+
License. However, in accepting such obligations, You may act only
|
170 |
+
on Your own behalf and on Your sole responsibility, not on behalf
|
171 |
+
of any other Contributor, and only if You agree to indemnify,
|
172 |
+
defend, and hold each Contributor harmless for any liability
|
173 |
+
incurred by, or claims asserted against, such Contributor by reason
|
174 |
+
of your accepting any such warranty or additional liability.
|
175 |
+
|
176 |
+
END OF TERMS AND CONDITIONS
|
177 |
+
|
178 |
+
APPENDIX: How to apply the Apache License to your work.
|
179 |
+
|
180 |
+
To apply the Apache License to your work, attach the following
|
181 |
+
boilerplate notice, with the fields enclosed by brackets "[]"
|
182 |
+
replaced with your own identifying information. (Don't include
|
183 |
+
the brackets!) The text should be enclosed in the appropriate
|
184 |
+
comment syntax for the file format. We also recommend that a
|
185 |
+
file or class name and description of purpose be included on the
|
186 |
+
same "printed page" as the copyright notice for easier
|
187 |
+
identification within third-party archives.
|
188 |
+
|
189 |
+
Copyright [yyyy] [name of copyright owner]
|
190 |
+
|
191 |
+
Licensed under the Apache License, Version 2.0 (the "License");
|
192 |
+
you may not use this file except in compliance with the License.
|
193 |
+
You may obtain a copy of the License at
|
194 |
+
|
195 |
+
http://www.apache.org/licenses/LICENSE-2.0
|
196 |
+
|
197 |
+
Unless required by applicable law or agreed to in writing, software
|
198 |
+
distributed under the License is distributed on an "AS IS" BASIS,
|
199 |
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
200 |
+
See the License for the specific language governing permissions and
|
201 |
+
limitations under the License.
|
PaliGemma_DPO.ipynb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
README.md
CHANGED
@@ -1,3 +1,26 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+

|
2 |
+
# Smol Vision 🐣
|
3 |
+
Recipes for shrinking, optimizing, customizing cutting edge vision and multimodal AI models.
|
4 |
+
|
5 |
+
Latest examples 👇🏻
|
6 |
+
- [Fine-tuning SmolVLM2 on Video Captioning](https://github.com/merveenoyan/smol-vision/blob/main/Fine_tune_SmolVLM2_on_Video.ipynb)
|
7 |
+
- [Multimodal RAG using ColPali and Qwen2-VL](https://github.com/merveenoyan/smol-vision/blob/main/ColPali_%2B_Qwen2_VL.ipynb)
|
8 |
+
- [Fine-tune ColPali for Multimodal RAG](https://github.com/merveenoyan/smol-vision/blob/main/Finetune_ColPali.ipynb)
|
9 |
+
|
10 |
+
**Note**: The script and notebook are updated to fix few issues related to QLoRA!
|
11 |
+
|
12 |
+
| | Notebook | Description |
|
13 |
+
|------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------------|
|
14 |
+
| Quantization/ONNX | [Faster and Smaller Zero-shot Object Detection with Optimum](https://github.com/merveenoyan/smol-vision/blob/main/Faster_Zero_shot_Object_Detection_with_Optimum.ipynb) | Quantize the state-of-the-art zero-shot object detection model OWLv2 using Optimum ONNXRuntime tools. |
|
15 |
+
| VLM Fine-tuning | [Fine-tune PaliGemma](https://github.com/merveenoyan/smol-vision/blob/main/Fine_tune_PaliGemma.ipynb) | Fine-tune state-of-the-art vision language backbone PaliGemma using transformers. |
|
16 |
+
| Intro to Optimum/ORT | [Optimizing DETR with 🤗 Optimum](https://github.com/merveenoyan/smol-vision/blob/main/Reduce_any_model_to_fp16_using_%F0%9F%A4%97_Optimum_DETR.ipynb) | A soft introduction to exporting vision models to ONNX and quantizing them. |
|
17 |
+
| Model Shrinking | [Knowledge Distillation for Computer Vision](https://huggingface.co/docs/transformers/en/tasks/knowledge_distillation_for_image_classification) | Knowledge distillation for image classification. |
|
18 |
+
| Quantization | [Fit in vision models using Quanto](https://github.com/merveenoyan/smol-vision/blob/main/Fit_in_vision_models_using_quanto.ipynb) | Fit in vision models to smaller hardware using quanto |
|
19 |
+
| Speed-up | [Faster foundation models with torch.compile](https://github.com/merveenoyan/smol-vision/blob/main/Faster_foundation_models_with_torch_compile.ipynb) | Improving latency for foundation models using `torch.compile` |
|
20 |
+
| VLM Fine-tuning | [Fine-tune Florence-2](https://github.com/merveenoyan/smol-vision/blob/main/Fine_tune_Florence_2.ipynb) | Fine-tune Florence-2 on DocVQA dataset |
|
21 |
+
| VLM Fine-tuning | [QLoRA/Fine-tune IDEFICS3 or SmolVLM on VQAv2](https://github.com/merveenoyan/smol-vision/blob/main/Smol_VLM_FT.ipynb) | QLoRA/Full Fine-tune IDEFICS3 or SmolVLM on VQAv2 dataset |
|
22 |
+
| VLM Fine-tuning (Script) | [QLoRA Fine-tune IDEFICS3 on VQAv2](https://github.com/merveenoyan/smol-vision/blob/main/smolvlm.py) | QLoRA/Full Fine-tune IDEFICS3 or SmolVLM on VQAv2 dataset |
|
23 |
+
| Multimodal RAG | [Multimodal RAG using ColPali and Qwen2-VL](https://github.com/merveenoyan/smol-vision/blob/main/ColPali_%2B_Qwen2_VL.ipynb) | Learn to retrieve documents and pipeline to RAG without hefty document processing using ColPali through Byaldi and do the generation with Qwen2-VL |
|
24 |
+
| Multimodal Retriever Fine-tuning | [Fine-tune ColPali for Multimodal RAG](https://github.com/merveenoyan/smol-vision/blob/main/Finetune_ColPali.ipynb) | Learn to apply contrastive fine-tuning on ColPali to customize it for your own multimodal document RAG use case |
|
25 |
+
| Speed-up/Memory Optimization | Vision language model serving using TGI (SOON) | Explore speed-ups and memory improvements for vision-language model serving with text-generation inference |
|
26 |
+
| Quantization/Optimum/ORT | All levels of quantization and graph optimizations for Image Segmentation using Optimum (SOON) | End-to-end model optimization using Optimum |
|
Reduce_any_model_to_fp16_using_🤗_Optimum_DETR.ipynb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
ShieldGemma_2_for_Vision_LM_Safety.ipynb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
Smol_VLM_FT.ipynb
ADDED
@@ -0,0 +1,1271 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "markdown",
|
5 |
+
"metadata": {
|
6 |
+
"id": "nc0g2NLpUSGr"
|
7 |
+
},
|
8 |
+
"source": [
|
9 |
+
"# Fine-tune SmolVLM on Visual Question Answering using Consumer GPU with QLoRA\n",
|
10 |
+
"\n",
|
11 |
+
"In this notebook we will fine-tune SmolVLM VQAv2 dataset. With this notebook you can also fine-tune Idefics3, since both models have the same model class/architecture.\n",
|
12 |
+
"\n",
|
13 |
+
"We will use some techniques in this notebook that will let you fine-tune the model on L4 with batch size of 4 only using around 16.4 GB of VRAM. We ran this notebook in that setup to test, but because we were able to afford A100 this notebook was last ran on an A100."
|
14 |
+
]
|
15 |
+
},
|
16 |
+
{
|
17 |
+
"cell_type": "code",
|
18 |
+
"execution_count": 1,
|
19 |
+
"metadata": {
|
20 |
+
"colab": {
|
21 |
+
"base_uri": "https://localhost:8080/"
|
22 |
+
},
|
23 |
+
"id": "WIhA1lQ7j0kw",
|
24 |
+
"outputId": "d152531d-8a63-459f-d0b5-f61a47b268d2"
|
25 |
+
},
|
26 |
+
"outputs": [],
|
27 |
+
"source": [
|
28 |
+
"!pip install -q accelerate datasets peft bitsandbytes tensorboard"
|
29 |
+
]
|
30 |
+
},
|
31 |
+
{
|
32 |
+
"cell_type": "code",
|
33 |
+
"execution_count": 2,
|
34 |
+
"metadata": {
|
35 |
+
"colab": {
|
36 |
+
"base_uri": "https://localhost:8080/"
|
37 |
+
},
|
38 |
+
"id": "XyJaqZZ3uYYl",
|
39 |
+
"outputId": "eff31ad7-7a77-4391-a1ed-6a871e667be5"
|
40 |
+
},
|
41 |
+
"outputs": [],
|
42 |
+
"source": [
|
43 |
+
"!pip install -q flash-attn --no-build-isolation"
|
44 |
+
]
|
45 |
+
},
|
46 |
+
{
|
47 |
+
"cell_type": "markdown",
|
48 |
+
"metadata": {
|
49 |
+
"id": "wAeMA0heVBjT"
|
50 |
+
},
|
51 |
+
"source": [
|
52 |
+
"We will push out model to Hub so we need to authenticate ourselves."
|
53 |
+
]
|
54 |
+
},
|
55 |
+
{
|
56 |
+
"cell_type": "code",
|
57 |
+
"execution_count": null,
|
58 |
+
"metadata": {
|
59 |
+
"colab": {
|
60 |
+
"base_uri": "https://localhost:8080/",
|
61 |
+
"height": 17,
|
62 |
+
"referenced_widgets": [
|
63 |
+
"261a3abc28d74e4ca5af6f9df8cea3e5",
|
64 |
+
"b6284cfacfd642278a7809a154463d69",
|
65 |
+
"62c12672f59349b9ade248bee799fa5a",
|
66 |
+
"9af532f878ab491096358d3bc83250d8",
|
67 |
+
"599303d9f1204c85bca500c859dd0d87",
|
68 |
+
"00617a46b15d45648c4796a91c96ec57",
|
69 |
+
"5492da586f594365afc30ee6da1bf67c",
|
70 |
+
"86aa1abb905346bf8956754a9704f250",
|
71 |
+
"eeb2fbfd6cd54c4aa3983dc334a5377d",
|
72 |
+
"ed34441fca164b389dfea1eabdba6e4a",
|
73 |
+
"99f5b0432c1849128fa181b88925c77b",
|
74 |
+
"5e529d6d6c4e40b4863961ea63bf259a",
|
75 |
+
"ebfcd83e42ec46afb772d53ad7f35d43",
|
76 |
+
"94958be916d6439d87dcd45c59178bec",
|
77 |
+
"31a0c4a7fcff4744be56adf4125ef4e6",
|
78 |
+
"2c975a8158bf49b389d47a5c4e40c97b",
|
79 |
+
"b474bf8f464d40d8865665e4c7f0a411",
|
80 |
+
"f8a75ac273fc408f923bf9d7f7263db8",
|
81 |
+
"dd08ce6386184df38f47348e547738d8",
|
82 |
+
"3aef5e8d5d9e4bd29bd3790ad139c02c"
|
83 |
+
]
|
84 |
+
},
|
85 |
+
"id": "yKd5xtSGj7cm",
|
86 |
+
"outputId": "63b352c0-3f7d-4945-add2-52102246d7b2"
|
87 |
+
},
|
88 |
+
"outputs": [],
|
89 |
+
"source": [
|
90 |
+
"from huggingface_hub import notebook_login\n",
|
91 |
+
"\n",
|
92 |
+
"notebook_login()"
|
93 |
+
]
|
94 |
+
},
|
95 |
+
{
|
96 |
+
"cell_type": "markdown",
|
97 |
+
"metadata": {
|
98 |
+
"id": "WRq8ve-LVAzU"
|
99 |
+
},
|
100 |
+
"source": [
|
101 |
+
"In this notebook we will not do full fine-tuning but use QLoRA method, which loads an adapter to the quantized version of the model, saving space. If you want to do full fine-tuning, set `USE_LORA` and `USE_QLORA` to False. If you want to do LoRA, set `USE_QLORA` to False and `USE_LORA` to True."
|
102 |
+
]
|
103 |
+
},
|
104 |
+
{
|
105 |
+
"cell_type": "code",
|
106 |
+
"execution_count": 1,
|
107 |
+
"metadata": {},
|
108 |
+
"outputs": [],
|
109 |
+
"source": [
|
110 |
+
"import os\n",
|
111 |
+
"os.environ[\"CUDA_DEVICE_ORDER\"] = \"PCI_BUS_ID\"\n",
|
112 |
+
"os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"1, 2\""
|
113 |
+
]
|
114 |
+
},
|
115 |
+
{
|
116 |
+
"cell_type": "code",
|
117 |
+
"execution_count": 2,
|
118 |
+
"metadata": {
|
119 |
+
"colab": {
|
120 |
+
"base_uri": "https://localhost:8080/"
|
121 |
+
},
|
122 |
+
"id": "b9CDMq0duYYn",
|
123 |
+
"outputId": "65a4a5fa-fe4d-4243-b2d7-405a8aa81c04"
|
124 |
+
},
|
125 |
+
"outputs": [
|
126 |
+
{
|
127 |
+
"data": {
|
128 |
+
"application/vnd.jupyter.widget-view+json": {
|
129 |
+
"model_id": "23d3d175e6e642c7abc2bce09b73cf4d",
|
130 |
+
"version_major": 2,
|
131 |
+
"version_minor": 0
|
132 |
+
},
|
133 |
+
"text/plain": [
|
134 |
+
"processor_config.json: 0%| | 0.00/68.0 [00:00<?, ?B/s]"
|
135 |
+
]
|
136 |
+
},
|
137 |
+
"metadata": {},
|
138 |
+
"output_type": "display_data"
|
139 |
+
},
|
140 |
+
{
|
141 |
+
"data": {
|
142 |
+
"application/vnd.jupyter.widget-view+json": {
|
143 |
+
"model_id": "db6ca8f47f274464b135909c907c946a",
|
144 |
+
"version_major": 2,
|
145 |
+
"version_minor": 0
|
146 |
+
},
|
147 |
+
"text/plain": [
|
148 |
+
"chat_template.json: 0%| | 0.00/434 [00:00<?, ?B/s]"
|
149 |
+
]
|
150 |
+
},
|
151 |
+
"metadata": {},
|
152 |
+
"output_type": "display_data"
|
153 |
+
},
|
154 |
+
{
|
155 |
+
"data": {
|
156 |
+
"application/vnd.jupyter.widget-view+json": {
|
157 |
+
"model_id": "d05822c6293c424fbf9df6ec0f6b532b",
|
158 |
+
"version_major": 2,
|
159 |
+
"version_minor": 0
|
160 |
+
},
|
161 |
+
"text/plain": [
|
162 |
+
"preprocessor_config.json: 0%| | 0.00/486 [00:00<?, ?B/s]"
|
163 |
+
]
|
164 |
+
},
|
165 |
+
"metadata": {},
|
166 |
+
"output_type": "display_data"
|
167 |
+
},
|
168 |
+
{
|
169 |
+
"data": {
|
170 |
+
"application/vnd.jupyter.widget-view+json": {
|
171 |
+
"model_id": "05582fca18f443d6965776a721e30e9f",
|
172 |
+
"version_major": 2,
|
173 |
+
"version_minor": 0
|
174 |
+
},
|
175 |
+
"text/plain": [
|
176 |
+
"tokenizer_config.json: 0%| | 0.00/4.04k [00:00<?, ?B/s]"
|
177 |
+
]
|
178 |
+
},
|
179 |
+
"metadata": {},
|
180 |
+
"output_type": "display_data"
|
181 |
+
},
|
182 |
+
{
|
183 |
+
"data": {
|
184 |
+
"application/vnd.jupyter.widget-view+json": {
|
185 |
+
"model_id": "3d8974fd1ba9415c8070c1eab8ad75cb",
|
186 |
+
"version_major": 2,
|
187 |
+
"version_minor": 0
|
188 |
+
},
|
189 |
+
"text/plain": [
|
190 |
+
"vocab.json: 0%| | 0.00/801k [00:00<?, ?B/s]"
|
191 |
+
]
|
192 |
+
},
|
193 |
+
"metadata": {},
|
194 |
+
"output_type": "display_data"
|
195 |
+
},
|
196 |
+
{
|
197 |
+
"data": {
|
198 |
+
"application/vnd.jupyter.widget-view+json": {
|
199 |
+
"model_id": "648257c1b1c24e25a26355bddf75aa41",
|
200 |
+
"version_major": 2,
|
201 |
+
"version_minor": 0
|
202 |
+
},
|
203 |
+
"text/plain": [
|
204 |
+
"merges.txt: 0%| | 0.00/466k [00:00<?, ?B/s]"
|
205 |
+
]
|
206 |
+
},
|
207 |
+
"metadata": {},
|
208 |
+
"output_type": "display_data"
|
209 |
+
},
|
210 |
+
{
|
211 |
+
"data": {
|
212 |
+
"application/vnd.jupyter.widget-view+json": {
|
213 |
+
"model_id": "afa9a31c6b7f45e082ae07dea4a2600e",
|
214 |
+
"version_major": 2,
|
215 |
+
"version_minor": 0
|
216 |
+
},
|
217 |
+
"text/plain": [
|
218 |
+
"tokenizer.json: 0%| | 0.00/3.52M [00:00<?, ?B/s]"
|
219 |
+
]
|
220 |
+
},
|
221 |
+
"metadata": {},
|
222 |
+
"output_type": "display_data"
|
223 |
+
},
|
224 |
+
{
|
225 |
+
"data": {
|
226 |
+
"application/vnd.jupyter.widget-view+json": {
|
227 |
+
"model_id": "92232af543a4446cac53e4fcf3f4b6e1",
|
228 |
+
"version_major": 2,
|
229 |
+
"version_minor": 0
|
230 |
+
},
|
231 |
+
"text/plain": [
|
232 |
+
"added_tokens.json: 0%| | 0.00/92.0 [00:00<?, ?B/s]"
|
233 |
+
]
|
234 |
+
},
|
235 |
+
"metadata": {},
|
236 |
+
"output_type": "display_data"
|
237 |
+
},
|
238 |
+
{
|
239 |
+
"data": {
|
240 |
+
"application/vnd.jupyter.widget-view+json": {
|
241 |
+
"model_id": "a5f06e59634f4edf9f3d9409846a2b31",
|
242 |
+
"version_major": 2,
|
243 |
+
"version_minor": 0
|
244 |
+
},
|
245 |
+
"text/plain": [
|
246 |
+
"special_tokens_map.json: 0%| | 0.00/1.07k [00:00<?, ?B/s]"
|
247 |
+
]
|
248 |
+
},
|
249 |
+
"metadata": {},
|
250 |
+
"output_type": "display_data"
|
251 |
+
},
|
252 |
+
{
|
253 |
+
"name": "stderr",
|
254 |
+
"output_type": "stream",
|
255 |
+
"text": [
|
256 |
+
"Some kwargs in processor config are unused and will not have any effect: image_seq_len. \n"
|
257 |
+
]
|
258 |
+
},
|
259 |
+
{
|
260 |
+
"data": {
|
261 |
+
"application/vnd.jupyter.widget-view+json": {
|
262 |
+
"model_id": "7ddfa8718bc24882ba2b50a899656107",
|
263 |
+
"version_major": 2,
|
264 |
+
"version_minor": 0
|
265 |
+
},
|
266 |
+
"text/plain": [
|
267 |
+
"config.json: 0%| | 0.00/7.08k [00:00<?, ?B/s]"
|
268 |
+
]
|
269 |
+
},
|
270 |
+
"metadata": {},
|
271 |
+
"output_type": "display_data"
|
272 |
+
},
|
273 |
+
{
|
274 |
+
"data": {
|
275 |
+
"application/vnd.jupyter.widget-view+json": {
|
276 |
+
"model_id": "5983728a1c1e43edb4d16bee6ad40171",
|
277 |
+
"version_major": 2,
|
278 |
+
"version_minor": 0
|
279 |
+
},
|
280 |
+
"text/plain": [
|
281 |
+
"model.safetensors: 0%| | 0.00/4.49G [00:00<?, ?B/s]"
|
282 |
+
]
|
283 |
+
},
|
284 |
+
"metadata": {},
|
285 |
+
"output_type": "display_data"
|
286 |
+
},
|
287 |
+
{
|
288 |
+
"data": {
|
289 |
+
"application/vnd.jupyter.widget-view+json": {
|
290 |
+
"model_id": "dff574197f1f4466abb0eb46d36b8378",
|
291 |
+
"version_major": 2,
|
292 |
+
"version_minor": 0
|
293 |
+
},
|
294 |
+
"text/plain": [
|
295 |
+
"generation_config.json: 0%| | 0.00/132 [00:00<?, ?B/s]"
|
296 |
+
]
|
297 |
+
},
|
298 |
+
"metadata": {},
|
299 |
+
"output_type": "display_data"
|
300 |
+
},
|
301 |
+
{
|
302 |
+
"name": "stdout",
|
303 |
+
"output_type": "stream",
|
304 |
+
"text": [
|
305 |
+
"(10536960, 2256809840)\n"
|
306 |
+
]
|
307 |
+
}
|
308 |
+
],
|
309 |
+
"source": [
|
310 |
+
"import torch\n",
|
311 |
+
"from peft import LoraConfig, prepare_model_for_kbit_training, get_peft_model\n",
|
312 |
+
"from transformers import AutoProcessor, BitsAndBytesConfig, Idefics3ForConditionalGeneration\n",
|
313 |
+
"\n",
|
314 |
+
"USE_LORA = False\n",
|
315 |
+
"USE_QLORA = True\n",
|
316 |
+
"SMOL = True\n",
|
317 |
+
"\n",
|
318 |
+
"model_id = \"HuggingFaceTB/SmolVLM-Base\" if SMOL else \"HuggingFaceM4/Idefics3-8B-Llama3\"\n",
|
319 |
+
"\n",
|
320 |
+
"processor = AutoProcessor.from_pretrained(\n",
|
321 |
+
" model_id\n",
|
322 |
+
")\n",
|
323 |
+
"\n",
|
324 |
+
"if USE_QLORA or USE_LORA:\n",
|
325 |
+
" lora_config = LoraConfig(\n",
|
326 |
+
" r=8,\n",
|
327 |
+
" lora_alpha=8,\n",
|
328 |
+
" lora_dropout=0.1,\n",
|
329 |
+
" target_modules=['down_proj','o_proj','k_proj','q_proj','gate_proj','up_proj','v_proj'],\n",
|
330 |
+
" use_dora=False if USE_QLORA else True,\n",
|
331 |
+
" init_lora_weights=\"gaussian\"\n",
|
332 |
+
" )\n",
|
333 |
+
" lora_config.inference_mode = False\n",
|
334 |
+
" if USE_QLORA:\n",
|
335 |
+
" bnb_config = BitsAndBytesConfig(\n",
|
336 |
+
" load_in_4bit=True,\n",
|
337 |
+
" bnb_4bit_use_double_quant=True,\n",
|
338 |
+
" bnb_4bit_quant_type=\"nf4\",\n",
|
339 |
+
" bnb_4bit_compute_dtype=torch.bfloat16\n",
|
340 |
+
" )\n",
|
341 |
+
"\n",
|
342 |
+
" model = Idefics3ForConditionalGeneration.from_pretrained(\n",
|
343 |
+
" model_id,\n",
|
344 |
+
" quantization_config=bnb_config if USE_QLORA else None,\n",
|
345 |
+
" _attn_implementation=\"flash_attention_2\",\n",
|
346 |
+
" device_map=\"auto\"\n",
|
347 |
+
" )\n",
|
348 |
+
" model.add_adapter(lora_config)\n",
|
349 |
+
" model.enable_adapters()\n",
|
350 |
+
" model = prepare_model_for_kbit_training(model)\n",
|
351 |
+
" model = get_peft_model(model, lora_config)\n",
|
352 |
+
" print(model.get_nb_trainable_parameters())\n",
|
353 |
+
"else:\n",
|
354 |
+
" model = Idefics3ForConditionalGeneration.from_pretrained(\n",
|
355 |
+
" model_id,\n",
|
356 |
+
" torch_dtype=torch.bfloat16,\n",
|
357 |
+
" _attn_implementation=\"flash_attention_2\",\n",
|
358 |
+
" ).to(DEVICE)\n",
|
359 |
+
"\n",
|
360 |
+
" # if you'd like to only fine-tune LLM\n",
|
361 |
+
" for param in model.model.vision_model.parameters():\n",
|
362 |
+
" param.requires_grad = False"
|
363 |
+
]
|
364 |
+
},
|
365 |
+
{
|
366 |
+
"cell_type": "markdown",
|
367 |
+
"metadata": {
|
368 |
+
"id": "WIVhpp0EyZO2"
|
369 |
+
},
|
370 |
+
"source": [
|
371 |
+
"The model as is is holding 2.7 GB of GPU RAM 💗"
|
372 |
+
]
|
373 |
+
},
|
374 |
+
{
|
375 |
+
"cell_type": "markdown",
|
376 |
+
"metadata": {
|
377 |
+
"id": "LMTtg3dl3NX2"
|
378 |
+
},
|
379 |
+
"source": [
|
380 |
+
"## Loading the dataset and Preprocessing"
|
381 |
+
]
|
382 |
+
},
|
383 |
+
{
|
384 |
+
"cell_type": "markdown",
|
385 |
+
"metadata": {
|
386 |
+
"id": "pWHMWTSZ3Pyr"
|
387 |
+
},
|
388 |
+
"source": [
|
389 |
+
"We will load a small portion of the VQAv2 dataset. We are loading a small portion of the model for education purposes."
|
390 |
+
]
|
391 |
+
},
|
392 |
+
{
|
393 |
+
"cell_type": "code",
|
394 |
+
"execution_count": 3,
|
395 |
+
"metadata": {
|
396 |
+
"id": "POOqKqYRka5O"
|
397 |
+
},
|
398 |
+
"outputs": [],
|
399 |
+
"source": [
|
400 |
+
"from datasets import load_dataset\n",
|
401 |
+
"ds = load_dataset('merve/vqav2-small', trust_remote_code=True)"
|
402 |
+
]
|
403 |
+
},
|
404 |
+
{
|
405 |
+
"cell_type": "code",
|
406 |
+
"execution_count": 6,
|
407 |
+
"metadata": {
|
408 |
+
"id": "Znf9vMo5rnSd"
|
409 |
+
},
|
410 |
+
"outputs": [],
|
411 |
+
"source": [
|
412 |
+
"split_ds = ds[\"validation\"].train_test_split(test_size=0.5)\n",
|
413 |
+
"train_ds = split_ds[\"train\"]"
|
414 |
+
]
|
415 |
+
},
|
416 |
+
{
|
417 |
+
"cell_type": "code",
|
418 |
+
"execution_count": 7,
|
419 |
+
"metadata": {
|
420 |
+
"colab": {
|
421 |
+
"base_uri": "https://localhost:8080/"
|
422 |
+
},
|
423 |
+
"id": "FIDioFlRuYYn",
|
424 |
+
"outputId": "79b697a7-d245-4fdc-b0e8-d9ffa8627953"
|
425 |
+
},
|
426 |
+
"outputs": [
|
427 |
+
{
|
428 |
+
"data": {
|
429 |
+
"text/plain": [
|
430 |
+
"Dataset({\n",
|
431 |
+
" features: ['multiple_choice_answer', 'question', 'image'],\n",
|
432 |
+
" num_rows: 10717\n",
|
433 |
+
"})"
|
434 |
+
]
|
435 |
+
},
|
436 |
+
"execution_count": 7,
|
437 |
+
"metadata": {},
|
438 |
+
"output_type": "execute_result"
|
439 |
+
}
|
440 |
+
],
|
441 |
+
"source": [
|
442 |
+
"train_ds"
|
443 |
+
]
|
444 |
+
},
|
445 |
+
{
|
446 |
+
"cell_type": "markdown",
|
447 |
+
"metadata": {
|
448 |
+
"id": "5nwMO3n0X7Hv"
|
449 |
+
},
|
450 |
+
"source": [
|
451 |
+
"Let's write our data collating function. We will apply prompt template to have questions and answers together so model can learn to answer. Then we pass the formatted prompts and images to the processor which processes both."
|
452 |
+
]
|
453 |
+
},
|
454 |
+
{
|
455 |
+
"cell_type": "code",
|
456 |
+
"execution_count": 8,
|
457 |
+
"metadata": {
|
458 |
+
"id": "e0krVLZ-wNMl"
|
459 |
+
},
|
460 |
+
"outputs": [],
|
461 |
+
"source": [
|
462 |
+
"image_token_id = processor.tokenizer.additional_special_tokens_ids[\n",
|
463 |
+
" processor.tokenizer.additional_special_tokens.index(\"<image>\")]\n",
|
464 |
+
"\n",
|
465 |
+
"def collate_fn(examples):\n",
|
466 |
+
" texts = []\n",
|
467 |
+
" images = []\n",
|
468 |
+
" for example in examples:\n",
|
469 |
+
" image = example[\"image\"]\n",
|
470 |
+
" if image.mode != 'RGB':\n",
|
471 |
+
" image = image.convert('RGB')\n",
|
472 |
+
" question = example[\"question\"]\n",
|
473 |
+
" answer = example[\"multiple_choice_answer\"]\n",
|
474 |
+
" messages = [\n",
|
475 |
+
" {\n",
|
476 |
+
" \"role\": \"user\",\n",
|
477 |
+
" \"content\": [\n",
|
478 |
+
" {\"type\": \"text\", \"text\": \"Answer briefly.\"},\n",
|
479 |
+
" {\"type\": \"image\"},\n",
|
480 |
+
" {\"type\": \"text\", \"text\": question}\n",
|
481 |
+
" ]\n",
|
482 |
+
" },\n",
|
483 |
+
" {\n",
|
484 |
+
" \"role\": \"assistant\",\n",
|
485 |
+
" \"content\": [\n",
|
486 |
+
" {\"type\": \"text\", \"text\": answer}\n",
|
487 |
+
" ]\n",
|
488 |
+
" }\n",
|
489 |
+
" ]\n",
|
490 |
+
" text = processor.apply_chat_template(messages, add_generation_prompt=False)\n",
|
491 |
+
" texts.append(text.strip())\n",
|
492 |
+
" images.append([image])\n",
|
493 |
+
"\n",
|
494 |
+
" batch = processor(text=texts, images=images, return_tensors=\"pt\", padding=True)\n",
|
495 |
+
" labels = batch[\"input_ids\"].clone()\n",
|
496 |
+
" labels[labels == processor.tokenizer.pad_token_id] = -100\n",
|
497 |
+
" labels[labels == image_token_id] = -100\n",
|
498 |
+
" batch[\"labels\"] = labels\n",
|
499 |
+
"\n",
|
500 |
+
" return batch"
|
501 |
+
]
|
502 |
+
},
|
503 |
+
{
|
504 |
+
"cell_type": "markdown",
|
505 |
+
"metadata": {
|
506 |
+
"id": "kEYDjWpE3LD5"
|
507 |
+
},
|
508 |
+
"source": [
|
509 |
+
"## Training"
|
510 |
+
]
|
511 |
+
},
|
512 |
+
{
|
513 |
+
"cell_type": "markdown",
|
514 |
+
"metadata": {
|
515 |
+
"id": "QvAs896cdwg8"
|
516 |
+
},
|
517 |
+
"source": [
|
518 |
+
"We can now initialize `Trainer` and initialize `TrainingArguments` to pass to `Trainer`.\n",
|
519 |
+
"\n",
|
520 |
+
"Some notes:\n",
|
521 |
+
"- If you use 8-bit QLoRA with the below setup it uses around 16.4 GB VRAM (beautiful, fits comfortably inside L4, Colab free tier)\n",
|
522 |
+
"- We use gradient accumulation to simulate a larger batch size.\n",
|
523 |
+
"- We also save up on memory from intermediate activations by using gradient checkpointing.\n",
|
524 |
+
"\n",
|
525 |
+
"**Disclaimer:** \n",
|
526 |
+
"The techniques here aren't free lunch. The latter two will add additional compute to the training, thus slow down a bit (for reference on two A100s with bsz of 16, we were able to train for 2 hrs 43 mins with the gradient accumulation steps of 4, disabling it reduced it with 2 hr 35 mins). \n",
|
527 |
+
"If you want to speed-up, you might play around, reduce to 4-bit precision and have a higher batch size. Note that 4-bit might result in model learning less."
|
528 |
+
]
|
529 |
+
},
|
530 |
+
{
|
531 |
+
"cell_type": "code",
|
532 |
+
"execution_count": 15,
|
533 |
+
"metadata": {
|
534 |
+
"id": "QNE2yWAYrAhD"
|
535 |
+
},
|
536 |
+
"outputs": [],
|
537 |
+
"source": [
|
538 |
+
"from transformers import TrainingArguments, Trainer\n",
|
539 |
+
"\n",
|
540 |
+
"model_name = model_id.split(\"/\")[-1]\n",
|
541 |
+
"\n",
|
542 |
+
"training_args = TrainingArguments(\n",
|
543 |
+
" num_train_epochs=1,\n",
|
544 |
+
" per_device_train_batch_size=16,\n",
|
545 |
+
" gradient_accumulation_steps=4,\n",
|
546 |
+
" warmup_steps=50,\n",
|
547 |
+
" learning_rate=1e-4,\n",
|
548 |
+
" weight_decay=0.01,\n",
|
549 |
+
" logging_steps=25,\n",
|
550 |
+
" save_strategy=\"steps\",\n",
|
551 |
+
" save_steps=250,\n",
|
552 |
+
" save_total_limit=1,\n",
|
553 |
+
" optim=\"paged_adamw_8bit\", # for 8-bit, keep this, else adamw_hf\n",
|
554 |
+
" bf16=True, # underlying precision for 8bit\n",
|
555 |
+
" output_dir=f\"./{model_name}-vqav2\",\n",
|
556 |
+
" hub_model_id=f\"{model_name}-vqav2\",\n",
|
557 |
+
" report_to=\"tensorboard\",\n",
|
558 |
+
" remove_unused_columns=False,\n",
|
559 |
+
" gradient_checkpointing=True\n",
|
560 |
+
")\n"
|
561 |
+
]
|
562 |
+
},
|
563 |
+
{
|
564 |
+
"cell_type": "code",
|
565 |
+
"execution_count": 16,
|
566 |
+
"metadata": {
|
567 |
+
"id": "oBBSDpBhreJd"
|
568 |
+
},
|
569 |
+
"outputs": [
|
570 |
+
{
|
571 |
+
"name": "stderr",
|
572 |
+
"output_type": "stream",
|
573 |
+
"text": [
|
574 |
+
"Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n"
|
575 |
+
]
|
576 |
+
}
|
577 |
+
],
|
578 |
+
"source": [
|
579 |
+
"trainer = Trainer(\n",
|
580 |
+
" model=model,\n",
|
581 |
+
" args=training_args,\n",
|
582 |
+
" data_collator=collate_fn,\n",
|
583 |
+
" train_dataset=train_ds,\n",
|
584 |
+
")"
|
585 |
+
]
|
586 |
+
},
|
587 |
+
{
|
588 |
+
"cell_type": "code",
|
589 |
+
"execution_count": null,
|
590 |
+
"metadata": {
|
591 |
+
"id": "_QOCpw_-uYYo"
|
592 |
+
},
|
593 |
+
"outputs": [
|
594 |
+
{
|
595 |
+
"data": {
|
596 |
+
"text/html": [
|
597 |
+
"\n",
|
598 |
+
" <div>\n",
|
599 |
+
" \n",
|
600 |
+
" <progress value='9' max='670' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
|
601 |
+
" [ 9/670 01:41 < 2:39:41, 0.07 it/s, Epoch 0.01/1]\n",
|
602 |
+
" </div>\n",
|
603 |
+
" <table border=\"1\" class=\"dataframe\">\n",
|
604 |
+
" <thead>\n",
|
605 |
+
" <tr style=\"text-align: left;\">\n",
|
606 |
+
" <th>Step</th>\n",
|
607 |
+
" <th>Training Loss</th>\n",
|
608 |
+
" </tr>\n",
|
609 |
+
" </thead>\n",
|
610 |
+
" <tbody>\n",
|
611 |
+
" </tbody>\n",
|
612 |
+
"</table><p>"
|
613 |
+
],
|
614 |
+
"text/plain": [
|
615 |
+
"<IPython.core.display.HTML object>"
|
616 |
+
]
|
617 |
+
},
|
618 |
+
"metadata": {},
|
619 |
+
"output_type": "display_data"
|
620 |
+
}
|
621 |
+
],
|
622 |
+
"source": [
|
623 |
+
"trainer.train()"
|
624 |
+
]
|
625 |
+
},
|
626 |
+
{
|
627 |
+
"cell_type": "code",
|
628 |
+
"execution_count": null,
|
629 |
+
"metadata": {
|
630 |
+
"id": "0hN0QD9_uYYo"
|
631 |
+
},
|
632 |
+
"outputs": [],
|
633 |
+
"source": [
|
634 |
+
"trainer.push_to_hub()"
|
635 |
+
]
|
636 |
+
}
|
637 |
+
],
|
638 |
+
"metadata": {
|
639 |
+
"accelerator": "GPU",
|
640 |
+
"colab": {
|
641 |
+
"gpuType": "A100",
|
642 |
+
"provenance": []
|
643 |
+
},
|
644 |
+
"kernelspec": {
|
645 |
+
"display_name": "Python 3 (ipykernel)",
|
646 |
+
"language": "python",
|
647 |
+
"name": "python3"
|
648 |
+
},
|
649 |
+
"language_info": {
|
650 |
+
"codemirror_mode": {
|
651 |
+
"name": "ipython",
|
652 |
+
"version": 3
|
653 |
+
},
|
654 |
+
"file_extension": ".py",
|
655 |
+
"mimetype": "text/x-python",
|
656 |
+
"name": "python",
|
657 |
+
"nbconvert_exporter": "python",
|
658 |
+
"pygments_lexer": "ipython3",
|
659 |
+
"version": "3.12.4"
|
660 |
+
},
|
661 |
+
"widgets": {
|
662 |
+
"application/vnd.jupyter.widget-state+json": {
|
663 |
+
"00617a46b15d45648c4796a91c96ec57": {
|
664 |
+
"model_module": "@jupyter-widgets/controls",
|
665 |
+
"model_module_version": "1.5.0",
|
666 |
+
"model_name": "HTMLModel",
|
667 |
+
"state": {
|
668 |
+
"_dom_classes": [],
|
669 |
+
"_model_module": "@jupyter-widgets/controls",
|
670 |
+
"_model_module_version": "1.5.0",
|
671 |
+
"_model_name": "HTMLModel",
|
672 |
+
"_view_count": null,
|
673 |
+
"_view_module": "@jupyter-widgets/controls",
|
674 |
+
"_view_module_version": "1.5.0",
|
675 |
+
"_view_name": "HTMLView",
|
676 |
+
"description": "",
|
677 |
+
"description_tooltip": null,
|
678 |
+
"layout": "IPY_MODEL_2c975a8158bf49b389d47a5c4e40c97b",
|
679 |
+
"placeholder": "",
|
680 |
+
"style": "IPY_MODEL_b474bf8f464d40d8865665e4c7f0a411",
|
681 |
+
"value": "\n<b>Pro Tip:</b> If you don't already have one, you can create a dedicated\n'notebooks' token with 'write' access, that you can then easily reuse for all\nnotebooks. </center>"
|
682 |
+
}
|
683 |
+
},
|
684 |
+
"261a3abc28d74e4ca5af6f9df8cea3e5": {
|
685 |
+
"model_module": "@jupyter-widgets/controls",
|
686 |
+
"model_module_version": "1.5.0",
|
687 |
+
"model_name": "VBoxModel",
|
688 |
+
"state": {
|
689 |
+
"_dom_classes": [],
|
690 |
+
"_model_module": "@jupyter-widgets/controls",
|
691 |
+
"_model_module_version": "1.5.0",
|
692 |
+
"_model_name": "VBoxModel",
|
693 |
+
"_view_count": null,
|
694 |
+
"_view_module": "@jupyter-widgets/controls",
|
695 |
+
"_view_module_version": "1.5.0",
|
696 |
+
"_view_name": "VBoxView",
|
697 |
+
"box_style": "",
|
698 |
+
"children": [],
|
699 |
+
"layout": "IPY_MODEL_5492da586f594365afc30ee6da1bf67c"
|
700 |
+
}
|
701 |
+
},
|
702 |
+
"2c975a8158bf49b389d47a5c4e40c97b": {
|
703 |
+
"model_module": "@jupyter-widgets/base",
|
704 |
+
"model_module_version": "1.2.0",
|
705 |
+
"model_name": "LayoutModel",
|
706 |
+
"state": {
|
707 |
+
"_model_module": "@jupyter-widgets/base",
|
708 |
+
"_model_module_version": "1.2.0",
|
709 |
+
"_model_name": "LayoutModel",
|
710 |
+
"_view_count": null,
|
711 |
+
"_view_module": "@jupyter-widgets/base",
|
712 |
+
"_view_module_version": "1.2.0",
|
713 |
+
"_view_name": "LayoutView",
|
714 |
+
"align_content": null,
|
715 |
+
"align_items": null,
|
716 |
+
"align_self": null,
|
717 |
+
"border": null,
|
718 |
+
"bottom": null,
|
719 |
+
"display": null,
|
720 |
+
"flex": null,
|
721 |
+
"flex_flow": null,
|
722 |
+
"grid_area": null,
|
723 |
+
"grid_auto_columns": null,
|
724 |
+
"grid_auto_flow": null,
|
725 |
+
"grid_auto_rows": null,
|
726 |
+
"grid_column": null,
|
727 |
+
"grid_gap": null,
|
728 |
+
"grid_row": null,
|
729 |
+
"grid_template_areas": null,
|
730 |
+
"grid_template_columns": null,
|
731 |
+
"grid_template_rows": null,
|
732 |
+
"height": null,
|
733 |
+
"justify_content": null,
|
734 |
+
"justify_items": null,
|
735 |
+
"left": null,
|
736 |
+
"margin": null,
|
737 |
+
"max_height": null,
|
738 |
+
"max_width": null,
|
739 |
+
"min_height": null,
|
740 |
+
"min_width": null,
|
741 |
+
"object_fit": null,
|
742 |
+
"object_position": null,
|
743 |
+
"order": null,
|
744 |
+
"overflow": null,
|
745 |
+
"overflow_x": null,
|
746 |
+
"overflow_y": null,
|
747 |
+
"padding": null,
|
748 |
+
"right": null,
|
749 |
+
"top": null,
|
750 |
+
"visibility": null,
|
751 |
+
"width": null
|
752 |
+
}
|
753 |
+
},
|
754 |
+
"31a0c4a7fcff4744be56adf4125ef4e6": {
|
755 |
+
"model_module": "@jupyter-widgets/controls",
|
756 |
+
"model_module_version": "1.5.0",
|
757 |
+
"model_name": "ButtonStyleModel",
|
758 |
+
"state": {
|
759 |
+
"_model_module": "@jupyter-widgets/controls",
|
760 |
+
"_model_module_version": "1.5.0",
|
761 |
+
"_model_name": "ButtonStyleModel",
|
762 |
+
"_view_count": null,
|
763 |
+
"_view_module": "@jupyter-widgets/base",
|
764 |
+
"_view_module_version": "1.2.0",
|
765 |
+
"_view_name": "StyleView",
|
766 |
+
"button_color": null,
|
767 |
+
"font_weight": ""
|
768 |
+
}
|
769 |
+
},
|
770 |
+
"3aef5e8d5d9e4bd29bd3790ad139c02c": {
|
771 |
+
"model_module": "@jupyter-widgets/controls",
|
772 |
+
"model_module_version": "1.5.0",
|
773 |
+
"model_name": "DescriptionStyleModel",
|
774 |
+
"state": {
|
775 |
+
"_model_module": "@jupyter-widgets/controls",
|
776 |
+
"_model_module_version": "1.5.0",
|
777 |
+
"_model_name": "DescriptionStyleModel",
|
778 |
+
"_view_count": null,
|
779 |
+
"_view_module": "@jupyter-widgets/base",
|
780 |
+
"_view_module_version": "1.2.0",
|
781 |
+
"_view_name": "StyleView",
|
782 |
+
"description_width": ""
|
783 |
+
}
|
784 |
+
},
|
785 |
+
"5492da586f594365afc30ee6da1bf67c": {
|
786 |
+
"model_module": "@jupyter-widgets/base",
|
787 |
+
"model_module_version": "1.2.0",
|
788 |
+
"model_name": "LayoutModel",
|
789 |
+
"state": {
|
790 |
+
"_model_module": "@jupyter-widgets/base",
|
791 |
+
"_model_module_version": "1.2.0",
|
792 |
+
"_model_name": "LayoutModel",
|
793 |
+
"_view_count": null,
|
794 |
+
"_view_module": "@jupyter-widgets/base",
|
795 |
+
"_view_module_version": "1.2.0",
|
796 |
+
"_view_name": "LayoutView",
|
797 |
+
"align_content": null,
|
798 |
+
"align_items": "center",
|
799 |
+
"align_self": null,
|
800 |
+
"border": null,
|
801 |
+
"bottom": null,
|
802 |
+
"display": "flex",
|
803 |
+
"flex": null,
|
804 |
+
"flex_flow": "column",
|
805 |
+
"grid_area": null,
|
806 |
+
"grid_auto_columns": null,
|
807 |
+
"grid_auto_flow": null,
|
808 |
+
"grid_auto_rows": null,
|
809 |
+
"grid_column": null,
|
810 |
+
"grid_gap": null,
|
811 |
+
"grid_row": null,
|
812 |
+
"grid_template_areas": null,
|
813 |
+
"grid_template_columns": null,
|
814 |
+
"grid_template_rows": null,
|
815 |
+
"height": null,
|
816 |
+
"justify_content": null,
|
817 |
+
"justify_items": null,
|
818 |
+
"left": null,
|
819 |
+
"margin": null,
|
820 |
+
"max_height": null,
|
821 |
+
"max_width": null,
|
822 |
+
"min_height": null,
|
823 |
+
"min_width": null,
|
824 |
+
"object_fit": null,
|
825 |
+
"object_position": null,
|
826 |
+
"order": null,
|
827 |
+
"overflow": null,
|
828 |
+
"overflow_x": null,
|
829 |
+
"overflow_y": null,
|
830 |
+
"padding": null,
|
831 |
+
"right": null,
|
832 |
+
"top": null,
|
833 |
+
"visibility": null,
|
834 |
+
"width": "50%"
|
835 |
+
}
|
836 |
+
},
|
837 |
+
"599303d9f1204c85bca500c859dd0d87": {
|
838 |
+
"model_module": "@jupyter-widgets/controls",
|
839 |
+
"model_module_version": "1.5.0",
|
840 |
+
"model_name": "ButtonModel",
|
841 |
+
"state": {
|
842 |
+
"_dom_classes": [],
|
843 |
+
"_model_module": "@jupyter-widgets/controls",
|
844 |
+
"_model_module_version": "1.5.0",
|
845 |
+
"_model_name": "ButtonModel",
|
846 |
+
"_view_count": null,
|
847 |
+
"_view_module": "@jupyter-widgets/controls",
|
848 |
+
"_view_module_version": "1.5.0",
|
849 |
+
"_view_name": "ButtonView",
|
850 |
+
"button_style": "",
|
851 |
+
"description": "Login",
|
852 |
+
"disabled": false,
|
853 |
+
"icon": "",
|
854 |
+
"layout": "IPY_MODEL_94958be916d6439d87dcd45c59178bec",
|
855 |
+
"style": "IPY_MODEL_31a0c4a7fcff4744be56adf4125ef4e6",
|
856 |
+
"tooltip": ""
|
857 |
+
}
|
858 |
+
},
|
859 |
+
"5e529d6d6c4e40b4863961ea63bf259a": {
|
860 |
+
"model_module": "@jupyter-widgets/base",
|
861 |
+
"model_module_version": "1.2.0",
|
862 |
+
"model_name": "LayoutModel",
|
863 |
+
"state": {
|
864 |
+
"_model_module": "@jupyter-widgets/base",
|
865 |
+
"_model_module_version": "1.2.0",
|
866 |
+
"_model_name": "LayoutModel",
|
867 |
+
"_view_count": null,
|
868 |
+
"_view_module": "@jupyter-widgets/base",
|
869 |
+
"_view_module_version": "1.2.0",
|
870 |
+
"_view_name": "LayoutView",
|
871 |
+
"align_content": null,
|
872 |
+
"align_items": null,
|
873 |
+
"align_self": null,
|
874 |
+
"border": null,
|
875 |
+
"bottom": null,
|
876 |
+
"display": null,
|
877 |
+
"flex": null,
|
878 |
+
"flex_flow": null,
|
879 |
+
"grid_area": null,
|
880 |
+
"grid_auto_columns": null,
|
881 |
+
"grid_auto_flow": null,
|
882 |
+
"grid_auto_rows": null,
|
883 |
+
"grid_column": null,
|
884 |
+
"grid_gap": null,
|
885 |
+
"grid_row": null,
|
886 |
+
"grid_template_areas": null,
|
887 |
+
"grid_template_columns": null,
|
888 |
+
"grid_template_rows": null,
|
889 |
+
"height": null,
|
890 |
+
"justify_content": null,
|
891 |
+
"justify_items": null,
|
892 |
+
"left": null,
|
893 |
+
"margin": null,
|
894 |
+
"max_height": null,
|
895 |
+
"max_width": null,
|
896 |
+
"min_height": null,
|
897 |
+
"min_width": null,
|
898 |
+
"object_fit": null,
|
899 |
+
"object_position": null,
|
900 |
+
"order": null,
|
901 |
+
"overflow": null,
|
902 |
+
"overflow_x": null,
|
903 |
+
"overflow_y": null,
|
904 |
+
"padding": null,
|
905 |
+
"right": null,
|
906 |
+
"top": null,
|
907 |
+
"visibility": null,
|
908 |
+
"width": null
|
909 |
+
}
|
910 |
+
},
|
911 |
+
"62c12672f59349b9ade248bee799fa5a": {
|
912 |
+
"model_module": "@jupyter-widgets/controls",
|
913 |
+
"model_module_version": "1.5.0",
|
914 |
+
"model_name": "PasswordModel",
|
915 |
+
"state": {
|
916 |
+
"_dom_classes": [],
|
917 |
+
"_model_module": "@jupyter-widgets/controls",
|
918 |
+
"_model_module_version": "1.5.0",
|
919 |
+
"_model_name": "PasswordModel",
|
920 |
+
"_view_count": null,
|
921 |
+
"_view_module": "@jupyter-widgets/controls",
|
922 |
+
"_view_module_version": "1.5.0",
|
923 |
+
"_view_name": "PasswordView",
|
924 |
+
"continuous_update": true,
|
925 |
+
"description": "Token:",
|
926 |
+
"description_tooltip": null,
|
927 |
+
"disabled": false,
|
928 |
+
"layout": "IPY_MODEL_ed34441fca164b389dfea1eabdba6e4a",
|
929 |
+
"placeholder": "",
|
930 |
+
"style": "IPY_MODEL_99f5b0432c1849128fa181b88925c77b",
|
931 |
+
"value": ""
|
932 |
+
}
|
933 |
+
},
|
934 |
+
"86aa1abb905346bf8956754a9704f250": {
|
935 |
+
"model_module": "@jupyter-widgets/base",
|
936 |
+
"model_module_version": "1.2.0",
|
937 |
+
"model_name": "LayoutModel",
|
938 |
+
"state": {
|
939 |
+
"_model_module": "@jupyter-widgets/base",
|
940 |
+
"_model_module_version": "1.2.0",
|
941 |
+
"_model_name": "LayoutModel",
|
942 |
+
"_view_count": null,
|
943 |
+
"_view_module": "@jupyter-widgets/base",
|
944 |
+
"_view_module_version": "1.2.0",
|
945 |
+
"_view_name": "LayoutView",
|
946 |
+
"align_content": null,
|
947 |
+
"align_items": null,
|
948 |
+
"align_self": null,
|
949 |
+
"border": null,
|
950 |
+
"bottom": null,
|
951 |
+
"display": null,
|
952 |
+
"flex": null,
|
953 |
+
"flex_flow": null,
|
954 |
+
"grid_area": null,
|
955 |
+
"grid_auto_columns": null,
|
956 |
+
"grid_auto_flow": null,
|
957 |
+
"grid_auto_rows": null,
|
958 |
+
"grid_column": null,
|
959 |
+
"grid_gap": null,
|
960 |
+
"grid_row": null,
|
961 |
+
"grid_template_areas": null,
|
962 |
+
"grid_template_columns": null,
|
963 |
+
"grid_template_rows": null,
|
964 |
+
"height": null,
|
965 |
+
"justify_content": null,
|
966 |
+
"justify_items": null,
|
967 |
+
"left": null,
|
968 |
+
"margin": null,
|
969 |
+
"max_height": null,
|
970 |
+
"max_width": null,
|
971 |
+
"min_height": null,
|
972 |
+
"min_width": null,
|
973 |
+
"object_fit": null,
|
974 |
+
"object_position": null,
|
975 |
+
"order": null,
|
976 |
+
"overflow": null,
|
977 |
+
"overflow_x": null,
|
978 |
+
"overflow_y": null,
|
979 |
+
"padding": null,
|
980 |
+
"right": null,
|
981 |
+
"top": null,
|
982 |
+
"visibility": null,
|
983 |
+
"width": null
|
984 |
+
}
|
985 |
+
},
|
986 |
+
"94958be916d6439d87dcd45c59178bec": {
|
987 |
+
"model_module": "@jupyter-widgets/base",
|
988 |
+
"model_module_version": "1.2.0",
|
989 |
+
"model_name": "LayoutModel",
|
990 |
+
"state": {
|
991 |
+
"_model_module": "@jupyter-widgets/base",
|
992 |
+
"_model_module_version": "1.2.0",
|
993 |
+
"_model_name": "LayoutModel",
|
994 |
+
"_view_count": null,
|
995 |
+
"_view_module": "@jupyter-widgets/base",
|
996 |
+
"_view_module_version": "1.2.0",
|
997 |
+
"_view_name": "LayoutView",
|
998 |
+
"align_content": null,
|
999 |
+
"align_items": null,
|
1000 |
+
"align_self": null,
|
1001 |
+
"border": null,
|
1002 |
+
"bottom": null,
|
1003 |
+
"display": null,
|
1004 |
+
"flex": null,
|
1005 |
+
"flex_flow": null,
|
1006 |
+
"grid_area": null,
|
1007 |
+
"grid_auto_columns": null,
|
1008 |
+
"grid_auto_flow": null,
|
1009 |
+
"grid_auto_rows": null,
|
1010 |
+
"grid_column": null,
|
1011 |
+
"grid_gap": null,
|
1012 |
+
"grid_row": null,
|
1013 |
+
"grid_template_areas": null,
|
1014 |
+
"grid_template_columns": null,
|
1015 |
+
"grid_template_rows": null,
|
1016 |
+
"height": null,
|
1017 |
+
"justify_content": null,
|
1018 |
+
"justify_items": null,
|
1019 |
+
"left": null,
|
1020 |
+
"margin": null,
|
1021 |
+
"max_height": null,
|
1022 |
+
"max_width": null,
|
1023 |
+
"min_height": null,
|
1024 |
+
"min_width": null,
|
1025 |
+
"object_fit": null,
|
1026 |
+
"object_position": null,
|
1027 |
+
"order": null,
|
1028 |
+
"overflow": null,
|
1029 |
+
"overflow_x": null,
|
1030 |
+
"overflow_y": null,
|
1031 |
+
"padding": null,
|
1032 |
+
"right": null,
|
1033 |
+
"top": null,
|
1034 |
+
"visibility": null,
|
1035 |
+
"width": null
|
1036 |
+
}
|
1037 |
+
},
|
1038 |
+
"99f5b0432c1849128fa181b88925c77b": {
|
1039 |
+
"model_module": "@jupyter-widgets/controls",
|
1040 |
+
"model_module_version": "1.5.0",
|
1041 |
+
"model_name": "DescriptionStyleModel",
|
1042 |
+
"state": {
|
1043 |
+
"_model_module": "@jupyter-widgets/controls",
|
1044 |
+
"_model_module_version": "1.5.0",
|
1045 |
+
"_model_name": "DescriptionStyleModel",
|
1046 |
+
"_view_count": null,
|
1047 |
+
"_view_module": "@jupyter-widgets/base",
|
1048 |
+
"_view_module_version": "1.2.0",
|
1049 |
+
"_view_name": "StyleView",
|
1050 |
+
"description_width": ""
|
1051 |
+
}
|
1052 |
+
},
|
1053 |
+
"9af532f878ab491096358d3bc83250d8": {
|
1054 |
+
"model_module": "@jupyter-widgets/controls",
|
1055 |
+
"model_module_version": "1.5.0",
|
1056 |
+
"model_name": "CheckboxModel",
|
1057 |
+
"state": {
|
1058 |
+
"_dom_classes": [],
|
1059 |
+
"_model_module": "@jupyter-widgets/controls",
|
1060 |
+
"_model_module_version": "1.5.0",
|
1061 |
+
"_model_name": "CheckboxModel",
|
1062 |
+
"_view_count": null,
|
1063 |
+
"_view_module": "@jupyter-widgets/controls",
|
1064 |
+
"_view_module_version": "1.5.0",
|
1065 |
+
"_view_name": "CheckboxView",
|
1066 |
+
"description": "Add token as git credential?",
|
1067 |
+
"description_tooltip": null,
|
1068 |
+
"disabled": false,
|
1069 |
+
"indent": true,
|
1070 |
+
"layout": "IPY_MODEL_5e529d6d6c4e40b4863961ea63bf259a",
|
1071 |
+
"style": "IPY_MODEL_ebfcd83e42ec46afb772d53ad7f35d43",
|
1072 |
+
"value": true
|
1073 |
+
}
|
1074 |
+
},
|
1075 |
+
"b474bf8f464d40d8865665e4c7f0a411": {
|
1076 |
+
"model_module": "@jupyter-widgets/controls",
|
1077 |
+
"model_module_version": "1.5.0",
|
1078 |
+
"model_name": "DescriptionStyleModel",
|
1079 |
+
"state": {
|
1080 |
+
"_model_module": "@jupyter-widgets/controls",
|
1081 |
+
"_model_module_version": "1.5.0",
|
1082 |
+
"_model_name": "DescriptionStyleModel",
|
1083 |
+
"_view_count": null,
|
1084 |
+
"_view_module": "@jupyter-widgets/base",
|
1085 |
+
"_view_module_version": "1.2.0",
|
1086 |
+
"_view_name": "StyleView",
|
1087 |
+
"description_width": ""
|
1088 |
+
}
|
1089 |
+
},
|
1090 |
+
"b6284cfacfd642278a7809a154463d69": {
|
1091 |
+
"model_module": "@jupyter-widgets/controls",
|
1092 |
+
"model_module_version": "1.5.0",
|
1093 |
+
"model_name": "HTMLModel",
|
1094 |
+
"state": {
|
1095 |
+
"_dom_classes": [],
|
1096 |
+
"_model_module": "@jupyter-widgets/controls",
|
1097 |
+
"_model_module_version": "1.5.0",
|
1098 |
+
"_model_name": "HTMLModel",
|
1099 |
+
"_view_count": null,
|
1100 |
+
"_view_module": "@jupyter-widgets/controls",
|
1101 |
+
"_view_module_version": "1.5.0",
|
1102 |
+
"_view_name": "HTMLView",
|
1103 |
+
"description": "",
|
1104 |
+
"description_tooltip": null,
|
1105 |
+
"layout": "IPY_MODEL_86aa1abb905346bf8956754a9704f250",
|
1106 |
+
"placeholder": "",
|
1107 |
+
"style": "IPY_MODEL_eeb2fbfd6cd54c4aa3983dc334a5377d",
|
1108 |
+
"value": "<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.svg\nalt='Hugging Face'> <br> Copy a token from <a\nhref=\"https://huggingface.co/settings/tokens\" target=\"_blank\">your Hugging Face\ntokens page</a> and paste it below. <br> Immediately click login after copying\nyour token or it might be stored in plain text in this notebook file. </center>"
|
1109 |
+
}
|
1110 |
+
},
|
1111 |
+
"dd08ce6386184df38f47348e547738d8": {
|
1112 |
+
"model_module": "@jupyter-widgets/base",
|
1113 |
+
"model_module_version": "1.2.0",
|
1114 |
+
"model_name": "LayoutModel",
|
1115 |
+
"state": {
|
1116 |
+
"_model_module": "@jupyter-widgets/base",
|
1117 |
+
"_model_module_version": "1.2.0",
|
1118 |
+
"_model_name": "LayoutModel",
|
1119 |
+
"_view_count": null,
|
1120 |
+
"_view_module": "@jupyter-widgets/base",
|
1121 |
+
"_view_module_version": "1.2.0",
|
1122 |
+
"_view_name": "LayoutView",
|
1123 |
+
"align_content": null,
|
1124 |
+
"align_items": null,
|
1125 |
+
"align_self": null,
|
1126 |
+
"border": null,
|
1127 |
+
"bottom": null,
|
1128 |
+
"display": null,
|
1129 |
+
"flex": null,
|
1130 |
+
"flex_flow": null,
|
1131 |
+
"grid_area": null,
|
1132 |
+
"grid_auto_columns": null,
|
1133 |
+
"grid_auto_flow": null,
|
1134 |
+
"grid_auto_rows": null,
|
1135 |
+
"grid_column": null,
|
1136 |
+
"grid_gap": null,
|
1137 |
+
"grid_row": null,
|
1138 |
+
"grid_template_areas": null,
|
1139 |
+
"grid_template_columns": null,
|
1140 |
+
"grid_template_rows": null,
|
1141 |
+
"height": null,
|
1142 |
+
"justify_content": null,
|
1143 |
+
"justify_items": null,
|
1144 |
+
"left": null,
|
1145 |
+
"margin": null,
|
1146 |
+
"max_height": null,
|
1147 |
+
"max_width": null,
|
1148 |
+
"min_height": null,
|
1149 |
+
"min_width": null,
|
1150 |
+
"object_fit": null,
|
1151 |
+
"object_position": null,
|
1152 |
+
"order": null,
|
1153 |
+
"overflow": null,
|
1154 |
+
"overflow_x": null,
|
1155 |
+
"overflow_y": null,
|
1156 |
+
"padding": null,
|
1157 |
+
"right": null,
|
1158 |
+
"top": null,
|
1159 |
+
"visibility": null,
|
1160 |
+
"width": null
|
1161 |
+
}
|
1162 |
+
},
|
1163 |
+
"ebfcd83e42ec46afb772d53ad7f35d43": {
|
1164 |
+
"model_module": "@jupyter-widgets/controls",
|
1165 |
+
"model_module_version": "1.5.0",
|
1166 |
+
"model_name": "DescriptionStyleModel",
|
1167 |
+
"state": {
|
1168 |
+
"_model_module": "@jupyter-widgets/controls",
|
1169 |
+
"_model_module_version": "1.5.0",
|
1170 |
+
"_model_name": "DescriptionStyleModel",
|
1171 |
+
"_view_count": null,
|
1172 |
+
"_view_module": "@jupyter-widgets/base",
|
1173 |
+
"_view_module_version": "1.2.0",
|
1174 |
+
"_view_name": "StyleView",
|
1175 |
+
"description_width": ""
|
1176 |
+
}
|
1177 |
+
},
|
1178 |
+
"ed34441fca164b389dfea1eabdba6e4a": {
|
1179 |
+
"model_module": "@jupyter-widgets/base",
|
1180 |
+
"model_module_version": "1.2.0",
|
1181 |
+
"model_name": "LayoutModel",
|
1182 |
+
"state": {
|
1183 |
+
"_model_module": "@jupyter-widgets/base",
|
1184 |
+
"_model_module_version": "1.2.0",
|
1185 |
+
"_model_name": "LayoutModel",
|
1186 |
+
"_view_count": null,
|
1187 |
+
"_view_module": "@jupyter-widgets/base",
|
1188 |
+
"_view_module_version": "1.2.0",
|
1189 |
+
"_view_name": "LayoutView",
|
1190 |
+
"align_content": null,
|
1191 |
+
"align_items": null,
|
1192 |
+
"align_self": null,
|
1193 |
+
"border": null,
|
1194 |
+
"bottom": null,
|
1195 |
+
"display": null,
|
1196 |
+
"flex": null,
|
1197 |
+
"flex_flow": null,
|
1198 |
+
"grid_area": null,
|
1199 |
+
"grid_auto_columns": null,
|
1200 |
+
"grid_auto_flow": null,
|
1201 |
+
"grid_auto_rows": null,
|
1202 |
+
"grid_column": null,
|
1203 |
+
"grid_gap": null,
|
1204 |
+
"grid_row": null,
|
1205 |
+
"grid_template_areas": null,
|
1206 |
+
"grid_template_columns": null,
|
1207 |
+
"grid_template_rows": null,
|
1208 |
+
"height": null,
|
1209 |
+
"justify_content": null,
|
1210 |
+
"justify_items": null,
|
1211 |
+
"left": null,
|
1212 |
+
"margin": null,
|
1213 |
+
"max_height": null,
|
1214 |
+
"max_width": null,
|
1215 |
+
"min_height": null,
|
1216 |
+
"min_width": null,
|
1217 |
+
"object_fit": null,
|
1218 |
+
"object_position": null,
|
1219 |
+
"order": null,
|
1220 |
+
"overflow": null,
|
1221 |
+
"overflow_x": null,
|
1222 |
+
"overflow_y": null,
|
1223 |
+
"padding": null,
|
1224 |
+
"right": null,
|
1225 |
+
"top": null,
|
1226 |
+
"visibility": null,
|
1227 |
+
"width": null
|
1228 |
+
}
|
1229 |
+
},
|
1230 |
+
"eeb2fbfd6cd54c4aa3983dc334a5377d": {
|
1231 |
+
"model_module": "@jupyter-widgets/controls",
|
1232 |
+
"model_module_version": "1.5.0",
|
1233 |
+
"model_name": "DescriptionStyleModel",
|
1234 |
+
"state": {
|
1235 |
+
"_model_module": "@jupyter-widgets/controls",
|
1236 |
+
"_model_module_version": "1.5.0",
|
1237 |
+
"_model_name": "DescriptionStyleModel",
|
1238 |
+
"_view_count": null,
|
1239 |
+
"_view_module": "@jupyter-widgets/base",
|
1240 |
+
"_view_module_version": "1.2.0",
|
1241 |
+
"_view_name": "StyleView",
|
1242 |
+
"description_width": ""
|
1243 |
+
}
|
1244 |
+
},
|
1245 |
+
"f8a75ac273fc408f923bf9d7f7263db8": {
|
1246 |
+
"model_module": "@jupyter-widgets/controls",
|
1247 |
+
"model_module_version": "1.5.0",
|
1248 |
+
"model_name": "LabelModel",
|
1249 |
+
"state": {
|
1250 |
+
"_dom_classes": [],
|
1251 |
+
"_model_module": "@jupyter-widgets/controls",
|
1252 |
+
"_model_module_version": "1.5.0",
|
1253 |
+
"_model_name": "LabelModel",
|
1254 |
+
"_view_count": null,
|
1255 |
+
"_view_module": "@jupyter-widgets/controls",
|
1256 |
+
"_view_module_version": "1.5.0",
|
1257 |
+
"_view_name": "LabelView",
|
1258 |
+
"description": "",
|
1259 |
+
"description_tooltip": null,
|
1260 |
+
"layout": "IPY_MODEL_dd08ce6386184df38f47348e547738d8",
|
1261 |
+
"placeholder": "",
|
1262 |
+
"style": "IPY_MODEL_3aef5e8d5d9e4bd29bd3790ad139c02c",
|
1263 |
+
"value": "Connecting..."
|
1264 |
+
}
|
1265 |
+
}
|
1266 |
+
}
|
1267 |
+
}
|
1268 |
+
},
|
1269 |
+
"nbformat": 4,
|
1270 |
+
"nbformat_minor": 4
|
1271 |
+
}
|
inference_gists/Aria_Inference.ipynb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
inference_gists/ColQwen2.ipynb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
inference_gists/IBM_Granite_Vision.ipynb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
inference_gists/InternVL3_Gist.ipynb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
knowledge_distillation.md
ADDED
@@ -0,0 +1,186 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<!--Copyright 2023 The HuggingFace Team. All rights reserved.
|
2 |
+
|
3 |
+
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
|
4 |
+
the License. You may obtain a copy of the License at
|
5 |
+
|
6 |
+
http://www.apache.org/licenses/LICENSE-2.0
|
7 |
+
|
8 |
+
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
|
9 |
+
an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
10 |
+
specific language governing permissions and limitations under the License.
|
11 |
+
|
12 |
+
⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
|
13 |
+
rendered properly in your Markdown viewer.
|
14 |
+
|
15 |
+
-->
|
16 |
+
# Knowledge Distillation for Computer Vision
|
17 |
+
|
18 |
+
[[open-in-colab]]
|
19 |
+
|
20 |
+
Knowledge distillation is a technique used to transfer knowledge from a larger, more complex model (teacher) to a smaller, simpler model (student). To distill knowledge from one model to another, we take a pre-trained teacher model trained on a certain task (image classification for this case) and randomly initialize a student model to be trained on image classification. Next, we train the student model to minimize the difference between it's outputs and the teacher's outputs, thus making it mimic the behavior. It was first introduced in [Distilling the Knowledge in a Neural Network by Hinton et al](https://arxiv.org/abs/1503.02531). In this guide, we will do task-specific knowledge distillation. We will use the [beans dataset](https://huggingface.co/datasets/beans) for this.
|
21 |
+
|
22 |
+
This guide demonstrates how you can distill a [fine-tuned ViT model](https://huggingface.co/merve/vit-mobilenet-beans-224) (teacher model) to a [MobileNet](https://huggingface.co/google/mobilenet_v2_1.4_224) (student model) using the [Trainer API](https://huggingface.co/docs/transformers/en/main_classes/trainer#trainer) of 🤗 Transformers.
|
23 |
+
|
24 |
+
Let's install the libraries needed for distillation and evaluating the process.
|
25 |
+
|
26 |
+
```bash
|
27 |
+
pip install transformers datasets accelerate tensorboard evaluate --upgrade
|
28 |
+
```
|
29 |
+
|
30 |
+
In this example, we are using the `merve/beans-vit-224` model as teacher model. It's an image classification model, based on `google/vit-base-patch16-224-in21k` fine-tuned on beans dataset. We will distill this model to a randomly initialized MobileNetV2.
|
31 |
+
|
32 |
+
We will now load the dataset.
|
33 |
+
|
34 |
+
```python
|
35 |
+
from datasets import load_dataset
|
36 |
+
|
37 |
+
dataset = load_dataset("beans")
|
38 |
+
```
|
39 |
+
|
40 |
+
We can use an image processor from either of the models, as in this case they return the same output with same resolution. We will use the `map()` method of `dataset` to apply the preprocessing to every split of the dataset.
|
41 |
+
|
42 |
+
```python
|
43 |
+
from transformers import AutoImageProcessor
|
44 |
+
teacher_processor = AutoImageProcessor.from_pretrained("merve/beans-vit-224")
|
45 |
+
|
46 |
+
def process(examples):
|
47 |
+
processed_inputs = teacher_processor(examples["image"])
|
48 |
+
return processed_inputs
|
49 |
+
|
50 |
+
processed_datasets = dataset.map(process, batched=True)
|
51 |
+
```
|
52 |
+
|
53 |
+
Essentially, we want the student model (a randomly initialized MobileNet) to mimic the teacher model (fine-tuned vision transformer). To achieve this, we first get the logits output from the teacher and the student. Then, we divide each of them by the parameter `temperature` which controls the importance of each soft target. A parameter called `lambda` weighs the importance of the distillation loss. In this example, we will use `temperature=5` and `lambda=0.5`. We will use the Kullback-Leibler Divergence loss to compute the divergence between the student and teacher. Given two data P and Q, KL Divergence explains how much extra information we need to represent P using Q. If two are identical, their KL divergence is zero, as there's no other information needed to explain P from Q. Thus, in the context of knowledge distillation, KL divergence is useful.
|
54 |
+
|
55 |
+
|
56 |
+
```python
|
57 |
+
from transformers import TrainingArguments, Trainer
|
58 |
+
import torch
|
59 |
+
import torch.nn as nn
|
60 |
+
import torch.nn.functional as F
|
61 |
+
|
62 |
+
|
63 |
+
class ImageDistilTrainer(Trainer):
|
64 |
+
def __init__(self, teacher_model=None, student_model=None, temperature=None, lambda_param=None, *args, **kwargs):
|
65 |
+
super().__init__(model=student_model, *args, **kwargs)
|
66 |
+
self.teacher = teacher_model
|
67 |
+
self.student = student_model
|
68 |
+
self.loss_function = nn.KLDivLoss(reduction="batchmean")
|
69 |
+
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
70 |
+
self.teacher.to(device)
|
71 |
+
self.teacher.eval()
|
72 |
+
self.temperature = temperature
|
73 |
+
self.lambda_param = lambda_param
|
74 |
+
|
75 |
+
def compute_loss(self, student, inputs, return_outputs=False):
|
76 |
+
student_output = self.student(**inputs)
|
77 |
+
|
78 |
+
with torch.no_grad():
|
79 |
+
teacher_output = self.teacher(**inputs)
|
80 |
+
|
81 |
+
# Compute soft targets for teacher and student
|
82 |
+
soft_teacher = F.softmax(teacher_output.logits / self.temperature, dim=-1)
|
83 |
+
soft_student = F.log_softmax(student_output.logits / self.temperature, dim=-1)
|
84 |
+
|
85 |
+
# Compute the loss
|
86 |
+
distillation_loss = self.loss_function(soft_student, soft_teacher) * (self.temperature ** 2)
|
87 |
+
|
88 |
+
# Compute the true label loss
|
89 |
+
student_target_loss = student_output.loss
|
90 |
+
|
91 |
+
# Calculate final loss
|
92 |
+
loss = (1. - self.lambda_param) * student_target_loss + self.lambda_param * distillation_loss
|
93 |
+
return (loss, student_output) if return_outputs else loss
|
94 |
+
```
|
95 |
+
|
96 |
+
We will now login to Hugging Face Hub so we can push our model to the Hugging Face Hub through the `Trainer`.
|
97 |
+
|
98 |
+
```python
|
99 |
+
from huggingface_hub import notebook_login
|
100 |
+
|
101 |
+
notebook_login()
|
102 |
+
```
|
103 |
+
|
104 |
+
Let's set the `TrainingArguments`, the teacher model and the student model.
|
105 |
+
|
106 |
+
```python
|
107 |
+
from transformers import AutoModelForImageClassification, MobileNetV2Config, MobileNetV2ForImageClassification
|
108 |
+
|
109 |
+
training_args = TrainingArguments(
|
110 |
+
output_dir="my-awesome-model",
|
111 |
+
num_train_epochs=30,
|
112 |
+
fp16=True,
|
113 |
+
logging_dir=f"{repo_name}/logs",
|
114 |
+
logging_strategy="epoch",
|
115 |
+
eval_strategy="epoch",
|
116 |
+
save_strategy="epoch",
|
117 |
+
load_best_model_at_end=True,
|
118 |
+
metric_for_best_model="accuracy",
|
119 |
+
report_to="tensorboard",
|
120 |
+
push_to_hub=True,
|
121 |
+
hub_strategy="every_save",
|
122 |
+
hub_model_id=repo_name,
|
123 |
+
)
|
124 |
+
|
125 |
+
num_labels = len(processed_datasets["train"].features["labels"].names)
|
126 |
+
|
127 |
+
# initialize models
|
128 |
+
teacher_model = AutoModelForImageClassification.from_pretrained(
|
129 |
+
"merve/beans-vit-224",
|
130 |
+
num_labels=num_labels,
|
131 |
+
ignore_mismatched_sizes=True
|
132 |
+
)
|
133 |
+
|
134 |
+
# training MobileNetV2 from scratch
|
135 |
+
student_config = MobileNetV2Config()
|
136 |
+
student_config.num_labels = num_labels
|
137 |
+
student_model = MobileNetV2ForImageClassification(student_config)
|
138 |
+
```
|
139 |
+
|
140 |
+
We can use `compute_metrics` function to evaluate our model on the test set. This function will be used during the training process to compute the `accuracy` & `f1` of our model.
|
141 |
+
|
142 |
+
```python
|
143 |
+
import evaluate
|
144 |
+
import numpy as np
|
145 |
+
|
146 |
+
accuracy = evaluate.load("accuracy")
|
147 |
+
|
148 |
+
def compute_metrics(eval_pred):
|
149 |
+
predictions, labels = eval_pred
|
150 |
+
acc = accuracy.compute(references=labels, predictions=np.argmax(predictions, axis=1))
|
151 |
+
return {"accuracy": acc["accuracy"]}
|
152 |
+
```
|
153 |
+
|
154 |
+
Let's initialize the `Trainer` with the training arguments we defined. We will also initialize our data collator.
|
155 |
+
|
156 |
+
```python
|
157 |
+
from transformers import DefaultDataCollator
|
158 |
+
|
159 |
+
data_collator = DefaultDataCollator()
|
160 |
+
trainer = ImageDistilTrainer(
|
161 |
+
student_model=student_model,
|
162 |
+
teacher_model=teacher_model,
|
163 |
+
training_args=training_args,
|
164 |
+
train_dataset=processed_datasets["train"],
|
165 |
+
eval_dataset=processed_datasets["validation"],
|
166 |
+
data_collator=data_collator,
|
167 |
+
tokenizer=teacher_processor,
|
168 |
+
compute_metrics=compute_metrics,
|
169 |
+
temperature=5,
|
170 |
+
lambda_param=0.5
|
171 |
+
)
|
172 |
+
```
|
173 |
+
|
174 |
+
We can now train our model.
|
175 |
+
|
176 |
+
```python
|
177 |
+
trainer.train()
|
178 |
+
```
|
179 |
+
|
180 |
+
We can evaluate the model on the test set.
|
181 |
+
|
182 |
+
```python
|
183 |
+
trainer.evaluate(processed_datasets["test"])
|
184 |
+
```
|
185 |
+
|
186 |
+
On test set, our model reaches 72 percent accuracy. To have a sanity check over efficiency of distillation, we also trained MobileNet on the beans dataset from scratch with the same hyperparameters and observed 63 percent accuracy on the test set. We invite the readers to try different pre-trained teacher models, student architectures, distillation parameters and report their findings. The training logs and checkpoints for distilled model can be found in [this repository](https://huggingface.co/merve/vit-mobilenet-beans-224), and MobileNetV2 trained from scratch can be found in this [repository](https://huggingface.co/merve/resnet-mobilenet-beans-5).
|
paligemma.py
ADDED
@@ -0,0 +1,91 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from datasets import load_dataset
|
2 |
+
import torch
|
3 |
+
from transformers import PaliGemmaProcessor, PaliGemmaForConditionalGeneration, Trainer, TrainingArguments, BitsAndBytesConfig
|
4 |
+
from peft import get_peft_model, LoraConfig
|
5 |
+
import os
|
6 |
+
|
7 |
+
USE_LORA = False
|
8 |
+
USE_QLORA = False
|
9 |
+
FREEZE_VISION = False
|
10 |
+
|
11 |
+
ds = load_dataset('merve/vqav2-small', split="validation")
|
12 |
+
ds = ds.train_test_split(test_size=0.5)["train"]
|
13 |
+
|
14 |
+
model_id = "google/paligemma2-3b-pt-448"
|
15 |
+
processor = PaliGemmaProcessor.from_pretrained(model_id)
|
16 |
+
|
17 |
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
18 |
+
|
19 |
+
image_token = processor.tokenizer.convert_tokens_to_ids("<image>")
|
20 |
+
|
21 |
+
def collate_fn(examples):
|
22 |
+
texts = ["<image>answer en " + example["question"] for example in examples]
|
23 |
+
labels= [example['multiple_choice_answer'] for example in examples]
|
24 |
+
images = [example["image"].convert("RGB") for example in examples]
|
25 |
+
tokens = processor(text=texts, images=images, suffix=labels,
|
26 |
+
return_tensors="pt", padding="longest")
|
27 |
+
|
28 |
+
tokens = tokens.to(torch.bfloat16).to(device)
|
29 |
+
return tokens
|
30 |
+
|
31 |
+
|
32 |
+
if USE_LORA or USE_QLORA:
|
33 |
+
lora_config = LoraConfig(
|
34 |
+
r=8,
|
35 |
+
target_modules=["q_proj", "o_proj", "k_proj", "v_proj", "gate_proj", "up_proj", "down_proj"],
|
36 |
+
task_type="CAUSAL_LM",
|
37 |
+
)
|
38 |
+
if USE_QLORA:
|
39 |
+
bnb_config = BitsAndBytesConfig(
|
40 |
+
load_in_4bit=True,
|
41 |
+
bnb_4bit_quant_type="nf4",
|
42 |
+
bnb_4bit_compute_type=torch.bfloat16
|
43 |
+
)
|
44 |
+
model = PaliGemmaForConditionalGeneration.from_pretrained(model_id, device_map="auto",
|
45 |
+
quantization_config=bnb_config if USE_QLORA else None,
|
46 |
+
torch_dtype=torch.bfloat16)
|
47 |
+
model = get_peft_model(model, lora_config)
|
48 |
+
model = model.to(device)
|
49 |
+
model.print_trainable_parameters()
|
50 |
+
else:
|
51 |
+
model = PaliGemmaForConditionalGeneration.from_pretrained(model_id, device_map="auto").to(device)
|
52 |
+
model = model.to(device)
|
53 |
+
|
54 |
+
if FREEZE_VISION:
|
55 |
+
for param in model.vision_tower.parameters():
|
56 |
+
param.requires_grad = False
|
57 |
+
|
58 |
+
for param in model.multi_modal_projector.parameters():
|
59 |
+
param.requires_grad = False
|
60 |
+
|
61 |
+
|
62 |
+
args=TrainingArguments(
|
63 |
+
num_train_epochs=3,
|
64 |
+
remove_unused_columns=False,
|
65 |
+
per_device_train_batch_size=4,
|
66 |
+
gradient_accumulation_steps=4,
|
67 |
+
warmup_steps=2,
|
68 |
+
learning_rate=2e-5,
|
69 |
+
weight_decay=1e-6,
|
70 |
+
adam_beta2=0.999,
|
71 |
+
logging_steps=100,
|
72 |
+
optim="adamw_hf",
|
73 |
+
save_strategy="steps",
|
74 |
+
save_steps=1000,
|
75 |
+
save_total_limit=1,
|
76 |
+
push_to_hub=True
|
77 |
+
output_dir="paligemma_vqav2",
|
78 |
+
bf16=True,
|
79 |
+
report_to=["tensorboard"],
|
80 |
+
dataloader_pin_memory=False
|
81 |
+
)
|
82 |
+
|
83 |
+
|
84 |
+
trainer = Trainer(
|
85 |
+
model=model,
|
86 |
+
train_dataset=ds ,
|
87 |
+
data_collator=collate_fn,
|
88 |
+
args=args
|
89 |
+
)
|
90 |
+
|
91 |
+
trainer.train()
|
smolvlm.py
ADDED
@@ -0,0 +1,137 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
from peft import LoraConfig, prepare_model_for_kbit_training, get_peft_model
|
3 |
+
from transformers import AutoProcessor, BitsAndBytesConfig, Idefics3ForConditionalGeneration
|
4 |
+
from transformers import TrainingArguments, Trainer
|
5 |
+
from datasets import load_dataset
|
6 |
+
import os
|
7 |
+
from PIL import Image
|
8 |
+
from transformers.image_utils import load_image
|
9 |
+
|
10 |
+
USE_LORA = False
|
11 |
+
USE_QLORA = True
|
12 |
+
SMOL = True
|
13 |
+
|
14 |
+
model_id = "HuggingFaceTB/SmolVLM-Base" if SMOL else "HuggingFaceM4/Idefics3-8B-Llama3"
|
15 |
+
|
16 |
+
processor = AutoProcessor.from_pretrained(
|
17 |
+
model_id
|
18 |
+
)
|
19 |
+
|
20 |
+
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
|
21 |
+
os.environ["CUDA_VISIBLE_DEVICES"] = "1, 4"
|
22 |
+
if USE_QLORA or USE_LORA:
|
23 |
+
lora_config = LoraConfig(
|
24 |
+
r=8,
|
25 |
+
lora_alpha=8,
|
26 |
+
lora_dropout=0.1,
|
27 |
+
target_modules=['down_proj','o_proj','k_proj','q_proj','gate_proj','up_proj','v_proj'],
|
28 |
+
use_dora=False if USE_QLORA else True,
|
29 |
+
init_lora_weights="gaussian"
|
30 |
+
)
|
31 |
+
lora_config.inference_mode = False
|
32 |
+
if USE_QLORA:
|
33 |
+
bnb_config = BitsAndBytesConfig(
|
34 |
+
load_in_4bit=True,
|
35 |
+
bnb_4bit_use_double_quant=True,
|
36 |
+
bnb_4bit_quant_type="nf4",
|
37 |
+
bnb_4bit_compute_dtype=torch.bfloat16
|
38 |
+
)
|
39 |
+
|
40 |
+
model = Idefics3ForConditionalGeneration.from_pretrained(
|
41 |
+
model_id,
|
42 |
+
quantization_config=bnb_config if USE_QLORA else None,
|
43 |
+
_attn_implementation="flash_attention_2",
|
44 |
+
device_map="auto"
|
45 |
+
)
|
46 |
+
model.add_adapter(lora_config)
|
47 |
+
model.enable_adapters()
|
48 |
+
model = prepare_model_for_kbit_training(model)
|
49 |
+
model = get_peft_model(model, lora_config)
|
50 |
+
print(model.get_nb_trainable_parameters())
|
51 |
+
else:
|
52 |
+
model = Idefics3ForConditionalGeneration.from_pretrained(
|
53 |
+
model_id,
|
54 |
+
torch_dtype=torch.bfloat16,
|
55 |
+
_attn_implementation="flash_attention_2",
|
56 |
+
).to(DEVICE)
|
57 |
+
|
58 |
+
# if you'd like to only fine-tune LLM
|
59 |
+
for param in model.model.vision_model.parameters():
|
60 |
+
param.requires_grad = False
|
61 |
+
|
62 |
+
ds = load_dataset('merve/vqav2-small', trust_remote_code=True)
|
63 |
+
|
64 |
+
split_ds = ds["validation"].train_test_split(test_size=0.8)
|
65 |
+
train_ds = split_ds["train"]
|
66 |
+
|
67 |
+
|
68 |
+
image_token_id = processor.tokenizer.additional_special_tokens_ids[
|
69 |
+
processor.tokenizer.additional_special_tokens.index("<image>")]
|
70 |
+
def collate_fn(examples):
|
71 |
+
texts = []
|
72 |
+
images = []
|
73 |
+
for example in examples:
|
74 |
+
image = example["image"]
|
75 |
+
if image.mode != 'RGB':
|
76 |
+
image = image.convert('RGB')
|
77 |
+
question = example["question"]
|
78 |
+
answer = example["multiple_choice_answer"]
|
79 |
+
messages = [
|
80 |
+
{
|
81 |
+
"role": "user",
|
82 |
+
"content": [
|
83 |
+
{"type": "text", "text": "Answer briefly."},
|
84 |
+
{"type": "image"},
|
85 |
+
{"type": "text", "text": question}
|
86 |
+
]
|
87 |
+
},
|
88 |
+
{
|
89 |
+
"role": "assistant",
|
90 |
+
"content": [
|
91 |
+
{"type": "text", "text": answer}
|
92 |
+
]
|
93 |
+
}
|
94 |
+
]
|
95 |
+
text = processor.apply_chat_template(messages, add_generation_prompt=False)
|
96 |
+
texts.append(text.strip())
|
97 |
+
images.append([image])
|
98 |
+
|
99 |
+
batch = processor(text=texts, images=images, return_tensors="pt", padding=True)
|
100 |
+
labels = batch["input_ids"].clone()
|
101 |
+
labels[labels == processor.tokenizer.pad_token_id] = -100
|
102 |
+
labels[labels == image_token_id] = -100
|
103 |
+
batch["labels"] = labels
|
104 |
+
|
105 |
+
return batch
|
106 |
+
|
107 |
+
|
108 |
+
model_name = model_id.split("/")[-1]
|
109 |
+
|
110 |
+
training_args = TrainingArguments(
|
111 |
+
num_train_epochs=1,
|
112 |
+
per_device_train_batch_size=8,
|
113 |
+
gradient_accumulation_steps=4,
|
114 |
+
warmup_steps=50,
|
115 |
+
learning_rate=1e-4,
|
116 |
+
weight_decay=0.01,
|
117 |
+
logging_steps=25,
|
118 |
+
save_strategy="steps",
|
119 |
+
save_steps=250,
|
120 |
+
save_total_limit=1,
|
121 |
+
optim="paged_adamw_8bit", # for 8-bit, keep this, else adamw_hf
|
122 |
+
bf16=True, # underlying precision for 8bit
|
123 |
+
output_dir=f"./{model_name}-vqav2",
|
124 |
+
hub_model_id=f"{model_name}-vqav2",
|
125 |
+
report_to="tensorboard",
|
126 |
+
remove_unused_columns=False,
|
127 |
+
gradient_checkpointing=True
|
128 |
+
)
|
129 |
+
trainer = Trainer(
|
130 |
+
model=model,
|
131 |
+
args=training_args,
|
132 |
+
data_collator=collate_fn,
|
133 |
+
train_dataset=train_ds,
|
134 |
+
)
|
135 |
+
|
136 |
+
trainer.train()
|
137 |
+
trainer.push_to_hub()
|
train_idefics2.py
ADDED
@@ -0,0 +1,132 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
from peft import LoraConfig, prepare_model_for_kbit_training, get_peft_model
|
3 |
+
from transformers import AutoProcessor, BitsAndBytesConfig, Idefics3ForConditionalGeneration
|
4 |
+
from datasets import load_dataset
|
5 |
+
|
6 |
+
|
7 |
+
DEVICE = "cuda:4"
|
8 |
+
PCI_BUS_ID=4
|
9 |
+
CUDA_VISIBLE_DEVICES=4
|
10 |
+
USE_LORA = False
|
11 |
+
USE_QLORA = True
|
12 |
+
model_id = "HuggingFaceM4/Idefics3-8B-Llama3"
|
13 |
+
|
14 |
+
processor = AutoProcessor.from_pretrained(
|
15 |
+
model_id
|
16 |
+
)
|
17 |
+
|
18 |
+
|
19 |
+
if USE_QLORA or USE_LORA:
|
20 |
+
lora_config = LoraConfig(
|
21 |
+
r=8,
|
22 |
+
lora_alpha=8,
|
23 |
+
lora_dropout=0.1,
|
24 |
+
target_modules=['down_proj','o_proj','k_proj','q_proj','gate_proj','up_proj','v_proj'],
|
25 |
+
use_dora=False if USE_QLORA else True,
|
26 |
+
init_lora_weights="gaussian"
|
27 |
+
)
|
28 |
+
lora_config.inference_mode = False
|
29 |
+
if USE_QLORA:
|
30 |
+
bnb_config = BitsAndBytesConfig(
|
31 |
+
load_in_4bit=True,
|
32 |
+
bnb_4bit_use_double_quant=True,
|
33 |
+
bnb_4bit_quant_type="nf4",
|
34 |
+
bnb_4bit_compute_dtype=torch.bfloat16
|
35 |
+
)
|
36 |
+
|
37 |
+
model = Idefics3ForConditionalGeneration.from_pretrained(
|
38 |
+
model_id,
|
39 |
+
quantization_config=bnb_config if USE_QLORA else None,
|
40 |
+
_attn_implementation="flash_attention_2",
|
41 |
+
device_map="auto"
|
42 |
+
)
|
43 |
+
model.add_adapter(lora_config)
|
44 |
+
model.enable_adapters()
|
45 |
+
model = prepare_model_for_kbit_training(model)
|
46 |
+
model = get_peft_model(model, lora_config)
|
47 |
+
print(model.get_nb_trainable_parameters())
|
48 |
+
|
49 |
+
|
50 |
+
else:
|
51 |
+
model = Idefics3ForConditionalGeneration.from_pretrained(
|
52 |
+
model_id,
|
53 |
+
torch_dtype=torch.bfloat16,
|
54 |
+
_attn_implementation="flash_attention_2",
|
55 |
+
).to(DEVICE)
|
56 |
+
|
57 |
+
# if you'd like to only fine-tune LLM
|
58 |
+
for param in model.model.vision_model.parameters():
|
59 |
+
param.requires_grad = False
|
60 |
+
|
61 |
+
ds = load_dataset('merve/vqav2-small', trust_remote_code=True)
|
62 |
+
split_ds = ds["validation"].train_test_split(test_size=0.8)
|
63 |
+
train_ds = split_ds["train"]
|
64 |
+
|
65 |
+
image_token_id = processor.tokenizer.additional_special_tokens_ids[
|
66 |
+
processor.tokenizer.additional_special_tokens.index("<image>")]
|
67 |
+
|
68 |
+
def collate_fn(examples):
|
69 |
+
texts = []
|
70 |
+
images = []
|
71 |
+
for example in examples:
|
72 |
+
image = example["image"]
|
73 |
+
question = example["question"]
|
74 |
+
answer = example["multiple_choice_answer"]
|
75 |
+
messages = [
|
76 |
+
{
|
77 |
+
"role": "user",
|
78 |
+
"content": [
|
79 |
+
{"type": "text", "text": "Answer briefly."},
|
80 |
+
{"type": "image"},
|
81 |
+
{"type": "text", "text": question}
|
82 |
+
]
|
83 |
+
},
|
84 |
+
{
|
85 |
+
"role": "assistant",
|
86 |
+
"content": [
|
87 |
+
{"type": "text", "text": answer}
|
88 |
+
]
|
89 |
+
}
|
90 |
+
]
|
91 |
+
text = processor.apply_chat_template(messages, add_generation_prompt=False)
|
92 |
+
texts.append(text.strip())
|
93 |
+
images.append([image])
|
94 |
+
|
95 |
+
batch = processor(text=texts, images=images, return_tensors="pt", padding=True)
|
96 |
+
labels = batch["input_ids"].clone()
|
97 |
+
labels[labels == processor.tokenizer.pad_token_id] = -100
|
98 |
+
labels[labels == image_token_id] = -100
|
99 |
+
batch["labels"] = labels
|
100 |
+
|
101 |
+
return batch
|
102 |
+
|
103 |
+
from transformers import TrainingArguments, Trainer
|
104 |
+
|
105 |
+
training_args = TrainingArguments(
|
106 |
+
num_train_epochs=1,
|
107 |
+
per_device_train_batch_size=1, # increase for QLoRA
|
108 |
+
gradient_accumulation_steps=8,
|
109 |
+
warmup_steps=50,
|
110 |
+
learning_rate=1e-4,
|
111 |
+
weight_decay=0.01,
|
112 |
+
logging_steps=25,
|
113 |
+
save_strategy="steps",
|
114 |
+
save_steps=250,
|
115 |
+
save_total_limit=1,
|
116 |
+
optim="adamw_hf", # for 8-bit, pick paged_adamw_hf
|
117 |
+
#evaluation_strategy="epoch",
|
118 |
+
bf16=True,
|
119 |
+
output_dir="./idefics3-llama-vqav2",
|
120 |
+
hub_model_id="idefics3-llama-vqav2",
|
121 |
+
remove_unused_columns=False,
|
122 |
+
)
|
123 |
+
|
124 |
+
trainer = Trainer(
|
125 |
+
model=model,
|
126 |
+
args=training_args,
|
127 |
+
data_collator=collate_fn,
|
128 |
+
train_dataset=train_ds,
|
129 |
+
)
|
130 |
+
|
131 |
+
trainer.train()
|
132 |
+
trainer.push_to_hub()
|