{ "cells": [ { "cell_type": "markdown", "source": [ "# Fine-tune Gemma3n on FineVideo\n", "\n", "In this notebook, we will see how to fine-tune Gemma3n an videos with audios inside.\n", "Using all three modalities is very costly compute-wise, so keep in mind that this is an educational tutorial to fit the model in 40GB VRAM." ], "metadata": { "id": "0eVo7Mc5GMyL" } }, { "cell_type": "code", "execution_count": 1, "metadata": { "id": "BLv-NJRZzHiA", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "bb4e4b32-5000-42e0-889d-90648e335a41" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m40.9/40.9 kB\u001b[0m \u001b[31m2.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m10.8/10.8 MB\u001b[0m \u001b[31m114.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m376.2/376.2 kB\u001b[0m \u001b[31m33.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m494.8/494.8 kB\u001b[0m \u001b[31m38.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m193.6/193.6 kB\u001b[0m \u001b[31m17.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m363.4/363.4 MB\u001b[0m \u001b[31m4.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m13.8/13.8 MB\u001b[0m \u001b[31m126.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m24.6/24.6 MB\u001b[0m \u001b[31m92.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m883.7/883.7 kB\u001b[0m \u001b[31m58.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m664.8/664.8 MB\u001b[0m \u001b[31m1.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m211.5/211.5 MB\u001b[0m \u001b[31m11.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m56.3/56.3 MB\u001b[0m \u001b[31m42.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m127.9/127.9 MB\u001b[0m \u001b[31m19.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m207.5/207.5 MB\u001b[0m \u001b[31m3.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m21.1/21.1 MB\u001b[0m \u001b[31m114.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25h\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", "gcsfs 2025.3.2 requires fsspec==2025.3.2, but you have fsspec 2025.3.0 which is incompatible.\u001b[0m\u001b[31m\n", "\u001b[0m" ] } ], "source": [ "!pip install -U -q timm transformers trl peft datasets" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "id": "UxE2vzKsbov0" }, "outputs": [], "source": [ "import io\n", "import os\n", "import zipfile\n", "\n", "import torch\n", "from datasets import load_dataset\n", "from PIL import Image\n", "from transformers import AutoProcessor, Gemma3nForConditionalGeneration\n", "\n", "from trl import (\n", " SFTConfig,\n", " SFTTrainer,\n", ")" ] }, { "cell_type": "markdown", "metadata": { "id": "T06yJvcMiqO6" }, "source": [ "## Download videos and preprocessing\n", "\n", "FineVideo is a quite large dataset, we don't need a ton of examples, so we stream the dataset, check the duration and download the videos shorter than 30 secs." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "wBFfYgLxmg7b" }, "outputs": [], "source": [ "from datasets import load_dataset\n", "import json\n", "import os\n", "\n", "dataset = load_dataset(\"HuggingFaceFV/finevideo\", split=\"train\", streaming=True)\n", "\n", "\n", "os.makedirs(\"videos\", exist_ok=True)\n", "os.makedirs(\"metadata\", exist_ok=True)\n", "\n", "for idx, sample in enumerate(dataset):\n", " data = sample[\"json\"]\n", " duration = data.get(\"duration_seconds\", 0)\n", " if duration < 30:\n", " video_filename = f\"videos/sample_{idx}.mp4\"\n", " with open(video_filename, 'wb') as video_file:\n", " video_file.write(sample['mp4'])\n", "\n", " json_filename = f\"metadata/sample_{idx}.json\"\n", " with open(json_filename, 'w') as json_file:\n", " json.dump(sample['json'], json_file)\n" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "K48dmmZTdZ1l", "outputId": "31c7c32b-1c40-4df4-eb51-11857d7b4da9" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Number of items in content/videos: 871\n" ] } ], "source": [ " print(f\"Number of items in content/videos: {len(os.listdir('videos'))}\")" ] }, { "cell_type": "markdown", "source": [ "In FineVideo some frames are dark so we downsample 6 frames and if we can't get meaningful videos we remove them." ], "metadata": { "id": "QbkDI03qHMog" } }, { "cell_type": "code", "execution_count": 10, "metadata": { "id": "0UMZi3tHb-BC" }, "outputs": [], "source": [ "import cv2\n", "from PIL import Image\n", "import numpy as np\n", "\n", "def is_dark(frame, threshold=10):\n", " return np.max(frame) < threshold # all pixels are very close to 0\n", "\n", "def downsample_video(video_path):\n", " vidcap = cv2.VideoCapture(video_path)\n", " total_frames = int(vidcap.get(cv2.CAP_PROP_FRAME_COUNT))\n", " fps = vidcap.get(cv2.CAP_PROP_FPS)\n", "\n", " frames = []\n", "\n", " # Generate 8 evenly spaced indices, skip first and last\n", " full_indices = np.linspace(0, total_frames - 1, 8, dtype=int)[1:-1]\n", "\n", " for i in full_indices:\n", " found_valid = False\n", " for offset in [0, -1, 1, -2, 2]: # Try nearby frames if original is dark\n", " candidate_idx = i + offset\n", " if 0 <= candidate_idx < total_frames:\n", " vidcap.set(cv2.CAP_PROP_POS_FRAMES, candidate_idx)\n", " success, image = vidcap.read()\n", " if success:\n", " if not is_dark(image):\n", " image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)\n", " pil_image = Image.fromarray(image)\n", " timestamp = round(candidate_idx / fps, 2)\n", " frames.append((pil_image, timestamp))\n", " found_valid = True\n", " break\n", " if not found_valid:\n", " print(f\"Warning: Could not find non-dark frame near index {i}\")\n", "\n", " vidcap.release()\n", "\n", " # If still fewer than 8, try to top off by scanning more frames\n", " if len(frames) < 6:\n", " print(\"Trying to top off with additional non-dark frames...\")\n", " idx = 0\n", " while len(frames) < 8 and idx < total_frames:\n", " vidcap.set(cv2.CAP_PROP_POS_FRAMES, idx)\n", " success, image = vidcap.read()\n", " if success and not is_dark(image):\n", " image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)\n", " pil_image = Image.fromarray(image)\n", " timestamp = round(idx / fps, 2)\n", " # Avoid adding duplicate timestamps\n", " if not any(ts == timestamp for _, ts in frames):\n", " frames.append((pil_image, timestamp))\n", " idx += 1\n", "\n", " return frames[:8] # Ensure exactly 8 frames\n", "\n", "import os\n", "import glob\n", "\n", "def remove_dark_videos(video_dir, metadata_dir, audio_dir):\n", " \"\"\"\n", " Remove videos (and their metadata/audio files) if all frames are dark.\n", " \"\"\"\n", " video_paths = glob.glob(os.path.join(video_dir, \"*.mp4\"))\n", "\n", " for video_path in video_paths:\n", " filename = os.path.basename(video_path)\n", " base_name = os.path.splitext(filename)[0]\n", "\n", " frames = downsample_video(video_path)\n", " if len(frames) < 6:\n", " try:\n", " os.remove(video_path)\n", " print(f\"Deleted: {video_path}\")\n", " except Exception as e:\n", " print(f\"Failed to delete {video_path}: {e}\")\n", "\n", " metadata_path = os.path.join(metadata_dir, f\"{base_name}.json\")\n", " if os.path.exists(metadata_path):\n", " os.remove(metadata_path)\n", "\n", " # Remove audio\n", " audio_path = os.path.join(audio_dir, f\"{base_name}.wav\")\n", " if os.path.exists(audio_path):\n", " os.remove(audio_path)\n", "\n" ] }, { "cell_type": "code", "source": [ "remove_dark_videos(\n", " video_dir=\"videos\",\n", " metadata_dir=\"metadata\",\n", " audio_dir=\"audios\"\n", " )" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "pA6iIR38l66-", "outputId": "78f81f41-5e70-4900-e33c-cd918aaed67d" }, "execution_count": 12, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Warning: Could not find non-dark frame near index 208\n", "Trying to top off with additional non-dark frames...\n", "Deleted: videos/sample_9650.mp4\n", "Warning: Could not find non-dark frame near index 432\n", "Trying to top off with additional non-dark frames...\n", "Deleted: videos/sample_31965.mp4\n" ] } ] }, { "cell_type": "markdown", "metadata": { "id": "-qa4Tf8PwITC" }, "source": [ "Gemma-3n accepts video (image frames) and audio separately, so we strip audio from video." ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "id": "OR7bhnCawHrF" }, "outputs": [], "source": [ "import os\n", "import subprocess\n", "\n", "video_dir = \"videos\"\n", "audio_dir = \"audios\"\n", "os.makedirs(audio_dir, exist_ok=True)\n", "\n", "for filename in os.listdir(video_dir):\n", " if not filename.endswith(\".mp4\"):\n", " continue\n", "\n", " idx = filename.split(\"_\")[1].split(\".\")[0]\n", " video_path = os.path.join(video_dir, filename)\n", " audio_path = os.path.join(audio_dir, f\"sample_{idx}.wav\")\n", "\n", " subprocess.run([\n", " \"ffmpeg\", \"-i\", video_path,\n", " \"-q:a\", \"0\", \"-map\", \"a\",\n", " audio_path,\n", " \"-y\"\n", " ], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)\n" ] }, { "cell_type": "markdown", "metadata": { "id": "uIlVtxDcwQcy" }, "source": [ "Construct a new dataset with audio, video, metadata (video categories). This dataset is very cool, it has some questions and answers, captions and more so get creative if you have the GPU VRAM to do so. Here we solve an easier task for educational purposes." ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 49, "referenced_widgets": [ "4eb3613e8efa4fd9adf2cfe27bfbd699", "c15cc5cb9d7947a99a01a30e430d0459", "1801493cd54742fd99752b2f605af1cb", "e5e518d8cf5f4aa5a0ecad6583f0d317", "425f9f26bd0647b1989ecb704414aa9f", "5eeff3de00c5488db1817328e83bb992", "4846c29045294042b8d916cb0fd8f9d6", "20b59cdc19684e1c97517e36f5bf8d6a", "143d6079d1744eedb41e2e1182bd0f33", "c022d8fabedc43ef9db0c8aca82d215e", "464ffcc84f48468b8f5d3f08412c6101" ] }, "id": "erYr3SdmuS4m", "outputId": "0c95ff77-7976-4641-9a51-b7f24f36270d" }, "outputs": [ { "output_type": "display_data", "data": { "text/plain": [ "Generating train split: 0 examples [00:00, ? examples/s]" ], "application/vnd.jupyter.widget-view+json": { "version_major": 2, "version_minor": 0, "model_id": "4eb3613e8efa4fd9adf2cfe27bfbd699" } }, "metadata": {} } ], "source": [ "from datasets import Dataset\n", "import json\n", "\n", "def gen():\n", " meta_dir = \"metadata\"\n", " for filename in os.listdir(meta_dir):\n", " if not filename.endswith(\".json\"):\n", " continue\n", "\n", " idx = filename.split(\"_\")[1].split(\".\")[0]\n", " if os.path.exists(f\"videos/sample_{idx}.mp4\"):\n", " video_filename = f\"sample_{idx}.mp4\"\n", " audio_filename = f\"sample_{idx}.wav\"\n", " json_path = os.path.join(meta_dir, filename)\n", "\n", " with open(json_path, \"r\") as f:\n", " metadata = json.load(f)\n", "\n", "\n", " yield {\n", " \"video\": video_filename,\n", " \"audio\": audio_filename,\n", " \"content_parent_category\": metadata[\"content_parent_category\"],\n", " \"sample_index\": int(idx)\n", " }\n", " else:\n", " pass\n", "\n", "dataset = Dataset.from_generator(gen)\n" ] }, { "cell_type": "markdown", "metadata": { "id": "CjtgRoSEd9TV" }, "source": [ "We will speed-up and downsample the audios to save space during training." ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "id": "8DDaQ86MD1Y3" }, "outputs": [], "source": [ "import torchaudio\n", "from torchaudio.transforms import Resample\n", "import os\n", "import torch\n", "\n", "def preprocess_audio(audio_path, target_sample_rate=16000, max_duration_sec=5, speedup_factor=1.25):\n", " waveform, sample_rate = torchaudio.load(audio_path)\n", "\n", " if waveform.shape[0] > 1:\n", " waveform = waveform.mean(dim=0, keepdim=True)\n", "\n", " if sample_rate != target_sample_rate:\n", " resampler = Resample(orig_freq=sample_rate, new_freq=target_sample_rate)\n", " waveform = resampler(waveform)\n", " sample_rate = target_sample_rate\n", "\n", " if speedup_factor > 1.0:\n", " indices = torch.arange(0, waveform.shape[1], step=speedup_factor).long()\n", " if indices[-1] >= waveform.shape[1]:\n", " indices = indices[:-1]\n", " waveform = waveform[:, indices]\n", "\n", " max_length = int(target_sample_rate * max_duration_sec)\n", " if waveform.shape[1] > max_length:\n", " waveform = waveform[:, :max_length]\n", "\n", " torchaudio.save(audio_path, waveform, sample_rate)\n" ] }, { "cell_type": "code", "execution_count": 15, "metadata": { "id": "IQ7L2_0bI1tP" }, "outputs": [], "source": [ "for file_name in os.listdir(\"audios\"):\n", " if file_name.lower().endswith(\".wav\"):\n", " audio_path = os.path.join(\"audios\", file_name)\n", " preprocess_audio(audio_path)" ] }, { "cell_type": "code", "execution_count": 16, "metadata": { "id": "pspaO2Lv4SxG" }, "outputs": [], "source": [ "dataset = dataset.train_test_split(test_size=0.10, seed=42)" ] }, { "cell_type": "markdown", "source": [ "### Load the model\n", "\n", "Make sure you have your Hugging Face token in your Colab secrets." ], "metadata": { "id": "hrvYdvQ9Hye4" } }, { "cell_type": "code", "execution_count": 57, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 49, "referenced_widgets": [ "a33fedc485b346b1b9d4fb8b18e8ac64", "94d5d3b00449488caa6d8badc443a74f", "a60a111fc7c24bd7b21fed3f3dd64f29", "e830732fc2bc4848847ea85c772d0b98", "3e25db05674d4d2f8fd839a0ec63e7d8", "3262178b8baf4741b06250d7416df1f3", "2e9d5cf7a5c6466a9e1de6d4f403cd95", "9d2631150d5c4089bcc95f22a6698287", "9c0857a4034f4780ab5e7fdd9aa9d09d", "073975370eab45d9abc4f69f2b7b3d48", "0d1dfc47d0704506bc6e521c07162b4b" ] }, "id": "UQaaLBCVzXH-", "outputId": "a6244057-777b-4f48-e89e-0d3c945e06e8" }, "outputs": [ { "output_type": "display_data", "data": { "text/plain": [ "Loading checkpoint shards: 0%| | 0/3 [00:00" ], "text/html": [ "\n", "
\n", " \n", " \n", " [588/588 1:28:09, Epoch 3/3]\n", "
\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
EpochTraining LossValidation Loss
11.3635003.557561
20.9818003.502365
30.8442003.512452

" ] }, "metadata": {} }, { "output_type": "execute_result", "data": { "text/plain": [ "TrainOutput(global_step=588, training_loss=1.369473821451875, metrics={'train_runtime': 5299.3753, 'train_samples_per_second': 0.443, 'train_steps_per_second': 0.111, 'total_flos': 7.490494981503706e+16, 'train_loss': 1.369473821451875})" ] }, "metadata": {}, "execution_count": 63 } ], "source": [ "trainer.train()" ] }, { "cell_type": "markdown", "source": [ "Test the model with a video of snowboarding." ], "metadata": { "id": "qKtWUXVoUyKE" } }, { "cell_type": "code", "execution_count": 67, "metadata": { "id": "X5fOWf2bRERq", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "5daa499e-56c9-4241-eb04-c8c29864ee9e" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "--2025-07-16 13:18:33-- https://huggingface.co/datasets/merve/vlm_test_images/resolve/main/IMG_8137.mp4\n", "Resolving huggingface.co (huggingface.co)... 18.160.143.99, 18.160.143.32, 18.160.143.75, ...\n", "Connecting to huggingface.co (huggingface.co)|18.160.143.99|:443... connected.\n", "HTTP request sent, awaiting response... 302 Found\n", "Location: https://cdn-lfs-us-1.hf.co/repos/7b/14/7b14679bb56cefbf7829be71f3f444110ccc308f431bd8596f534e743367ea5c/6331cbb913feb48349e3b7015a7969e04ce3cd594b1bda7278e4e33fe4a3f5f3?response-content-disposition=inline%3B+filename*%3DUTF-8%27%27IMG_8137.mp4%3B+filename%3D%22IMG_8137.mp4%22%3B&response-content-type=video%2Fmp4&Expires=1752675513&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTc1MjY3NTUxM319LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy11cy0xLmhmLmNvL3JlcG9zLzdiLzE0LzdiMTQ2NzliYjU2Y2VmYmY3ODI5YmU3MWYzZjQ0NDExMGNjYzMwOGY0MzFiZDg1OTZmNTM0ZTc0MzM2N2VhNWMvNjMzMWNiYjkxM2ZlYjQ4MzQ5ZTNiNzAxNWE3OTY5ZTA0Y2UzY2Q1OTRiMWJkYTcyNzhlNGUzM2ZlNGEzZjVmMz9yZXNwb25zZS1jb250ZW50LWRpc3Bvc2l0aW9uPSomcmVzcG9uc2UtY29udGVudC10eXBlPSoifV19&Signature=dKwm2ee9rdtmzuZ8tVMOOJWndfV85S9dKaTwiZbVQt3N6-1dtWkDKXbIsjuD%7Eyriu1dnXNDSjXSDIn-s7ypd8Ie-U1ABXw5Ou6CZ03Z9U4JIQDWBMwEGGEZ6HFCx0mR3royc3u-AKekcIw7zEOFtfAZ%7Eo0XT7l3BiAAV3IVu94m1ONONU779D1gSgPo1sWfuqWydAefPe2NVmSxY1HvH7DHxVOVRuGTfegXN59hvZKhSfZ0Dk0WqBjhReYVdEVxl5j-5pynjo-G%7EUsvldEcxxQpPdcD1DuOGQvYc0KyWw2Tyv3ibU7vhT%7EwVpvdG6tdIi2QOACJ4rfeaVWn5twIHxw__&Key-Pair-Id=K24J24Z295AEI9 [following]\n", "--2025-07-16 13:18:33-- https://cdn-lfs-us-1.hf.co/repos/7b/14/7b14679bb56cefbf7829be71f3f444110ccc308f431bd8596f534e743367ea5c/6331cbb913feb48349e3b7015a7969e04ce3cd594b1bda7278e4e33fe4a3f5f3?response-content-disposition=inline%3B+filename*%3DUTF-8%27%27IMG_8137.mp4%3B+filename%3D%22IMG_8137.mp4%22%3B&response-content-type=video%2Fmp4&Expires=1752675513&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTc1MjY3NTUxM319LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy11cy0xLmhmLmNvL3JlcG9zLzdiLzE0LzdiMTQ2NzliYjU2Y2VmYmY3ODI5YmU3MWYzZjQ0NDExMGNjYzMwOGY0MzFiZDg1OTZmNTM0ZTc0MzM2N2VhNWMvNjMzMWNiYjkxM2ZlYjQ4MzQ5ZTNiNzAxNWE3OTY5ZTA0Y2UzY2Q1OTRiMWJkYTcyNzhlNGUzM2ZlNGEzZjVmMz9yZXNwb25zZS1jb250ZW50LWRpc3Bvc2l0aW9uPSomcmVzcG9uc2UtY29udGVudC10eXBlPSoifV19&Signature=dKwm2ee9rdtmzuZ8tVMOOJWndfV85S9dKaTwiZbVQt3N6-1dtWkDKXbIsjuD%7Eyriu1dnXNDSjXSDIn-s7ypd8Ie-U1ABXw5Ou6CZ03Z9U4JIQDWBMwEGGEZ6HFCx0mR3royc3u-AKekcIw7zEOFtfAZ%7Eo0XT7l3BiAAV3IVu94m1ONONU779D1gSgPo1sWfuqWydAefPe2NVmSxY1HvH7DHxVOVRuGTfegXN59hvZKhSfZ0Dk0WqBjhReYVdEVxl5j-5pynjo-G%7EUsvldEcxxQpPdcD1DuOGQvYc0KyWw2Tyv3ibU7vhT%7EwVpvdG6tdIi2QOACJ4rfeaVWn5twIHxw__&Key-Pair-Id=K24J24Z295AEI9\n", "Resolving cdn-lfs-us-1.hf.co (cdn-lfs-us-1.hf.co)... 3.169.202.18, 3.169.202.35, 3.169.202.26, ...\n", "Connecting to cdn-lfs-us-1.hf.co (cdn-lfs-us-1.hf.co)|3.169.202.18|:443... connected.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 5340706 (5.1M) [video/mp4]\n", "Saving to: ‘IMG_8137.mp4’\n", "\n", "IMG_8137.mp4 100%[===================>] 5.09M --.-KB/s in 0.1s \n", "\n", "2025-07-16 13:18:33 (38.9 MB/s) - ‘IMG_8137.mp4’ saved [5340706/5340706]\n", "\n" ] } ], "source": [ "!wget https://huggingface.co/datasets/merve/vlm_test_images/resolve/main/IMG_8137.mp4" ] }, { "cell_type": "code", "source": [ "model = trainer.model # trainer has the adapter" ], "metadata": { "id": "KBfMiUChc2Ky" }, "execution_count": 89, "outputs": [] }, { "cell_type": "markdown", "source": [ "Strip audio and downsample video." ], "metadata": { "id": "R14WzyjbZCwI" } }, { "cell_type": "code", "source": [ "audio_path = \"/content/test_audio.wav\"\n", "subprocess.run([\n", " \"ffmpeg\", \"-i\", \"/content/IMG_8137.mp4\",\n", " \"-q:a\", \"0\", \"-map\", \"a\",\n", " f\"{audio_path}\",\n", " \"-y\"\n", " ], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "RnJZ-QNJaOqp", "outputId": "c2f42e28-d427-4da7-cf86-6c3b70e6ee02" }, "execution_count": 97, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "CompletedProcess(args=['ffmpeg', '-i', '/content/IMG_8137.mp4', '-q:a', '0', '-map', 'a', '/content/test_audio.wav', '-y'], returncode=0)" ] }, "metadata": {}, "execution_count": 97 } ] }, { "cell_type": "code", "source": [ "frames = downsample_video(\"/content/IMG_8137.mp4\")\n", "\n", "# repeat the chat template\n", "text = \"Based on the video, predict the category of it.\"\n", "message = [\n", " {\n", " \"role\": \"user\",\n", " \"content\": [\n", " {\"type\": \"text\", \"text\": text}\n", " ],\n", " },\n", "]\n", "for frame in frames:\n", " image, timestamp = frame\n", " message[0][\"content\"].append({\"type\": \"text\", \"text\": f\"Frame {timestamp}:\"})\n", " timestamp = str(timestamp).replace(\".\", \"_\")\n", " image.save(f\"test_frame_{timestamp}.png\")\n", " message[0][\"content\"].append({\"type\": \"image\", \"url\": f\"test_frame_{timestamp}.png\"})\n", "\n", "message[0][\"content\"].append({\"type\": \"audio\", \"audio\": f\"{audio_path}\"})" ], "metadata": { "id": "9drrCnfRYi6O" }, "execution_count": 98, "outputs": [] }, { "cell_type": "code", "source": [ "message" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "7s1Dhxf_Z3xU", "outputId": "1eba1e9e-d859-4aa7-ff4e-992ef272df7c" }, "execution_count": 99, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "[{'role': 'user',\n", " 'content': [{'type': 'text',\n", " 'text': 'Based on the video, predict the category of it.'},\n", " {'type': 'text', 'text': 'Frame 0.88:'},\n", " {'type': 'image', 'url': 'test_frame_0_88.png'},\n", " {'type': 'text', 'text': 'Frame 1.79:'},\n", " {'type': 'image', 'url': 'test_frame_1_79.png'},\n", " {'type': 'text', 'text': 'Frame 2.67:'},\n", " {'type': 'image', 'url': 'test_frame_2_67.png'},\n", " {'type': 'text', 'text': 'Frame 3.57:'},\n", " {'type': 'image', 'url': 'test_frame_3_57.png'},\n", " {'type': 'text', 'text': 'Frame 4.45:'},\n", " {'type': 'image', 'url': 'test_frame_4_45.png'},\n", " {'type': 'text', 'text': 'Frame 5.36:'},\n", " {'type': 'image', 'url': 'test_frame_5_36.png'},\n", " {'type': 'audio', 'audio': '/content/test_audio.wav'}]}]" ] }, "metadata": {}, "execution_count": 99 } ] }, { "cell_type": "code", "source": [ "inputs = processor.apply_chat_template(\n", " message,\n", " add_generation_prompt=True,\n", " tokenize=True,\n", " return_dict=True,\n", " return_tensors=\"pt\",\n", " padding=True,\n", ").to(model.device).to(model.dtype)" ], "metadata": { "id": "xNTQRMzsZyQz" }, "execution_count": 100, "outputs": [] }, { "cell_type": "code", "source": [ "input_len = inputs[\"input_ids\"].shape[-1]\n", "\n", "with torch.inference_mode():\n", " generation = model.generate(**inputs, max_new_tokens=100, do_sample=False)\n", " generation = generation[0][input_len:]\n", "\n", "decoded = processor.decode(generation, skip_special_tokens=True)\n", "print(decoded)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "WNfnannnZ5-S", "outputId": "0afca313-a4f7-4c02-872e-665a853a19df" }, "execution_count": 101, "outputs": [ { "output_type": "stream", "name": "stderr", "text": [ "The following generation flags are not valid and may be ignored: ['top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.\n" ] }, { "output_type": "stream", "name": "stdout", "text": [ "Snowboarding\n" ] } ] }, { "cell_type": "markdown", "source": [ "Thanks a lot for reading! Keep training the model further with more data or unfreeze the layers for better performance 💗" ], "metadata": { "id": "LOUBj5dgeddG" } }, { "cell_type": "code", "source": [], "metadata": { "id": "4KnNR6lneuKm" }, "execution_count": null, "outputs": [] } ], "metadata": { "accelerator": "GPU", "colab": { "gpuType": "A100", "machine_shape": "hm", "provenance": [] }, "kernelspec": { "display_name": "Python 3", "name": "python3" }, "language_info": { "name": "python" }, "widgets": { "application/vnd.jupyter.widget-state+json": { "4eb3613e8efa4fd9adf2cfe27bfbd699": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_c15cc5cb9d7947a99a01a30e430d0459", "IPY_MODEL_1801493cd54742fd99752b2f605af1cb", "IPY_MODEL_e5e518d8cf5f4aa5a0ecad6583f0d317" ], "layout": "IPY_MODEL_425f9f26bd0647b1989ecb704414aa9f" } }, "c15cc5cb9d7947a99a01a30e430d0459": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_5eeff3de00c5488db1817328e83bb992", "placeholder": "​", "style": "IPY_MODEL_4846c29045294042b8d916cb0fd8f9d6", "value": "Generating train split: " } }, "1801493cd54742fd99752b2f605af1cb": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_20b59cdc19684e1c97517e36f5bf8d6a", "max": 1, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_143d6079d1744eedb41e2e1182bd0f33", "value": 1 } }, "e5e518d8cf5f4aa5a0ecad6583f0d317": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_c022d8fabedc43ef9db0c8aca82d215e", "placeholder": "​", "style": "IPY_MODEL_464ffcc84f48468b8f5d3f08412c6101", "value": " 869/0 [00:00<00:00, 8490.20 examples/s]" } }, "425f9f26bd0647b1989ecb704414aa9f": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "5eeff3de00c5488db1817328e83bb992": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "4846c29045294042b8d916cb0fd8f9d6": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "20b59cdc19684e1c97517e36f5bf8d6a": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": "20px" } }, "143d6079d1744eedb41e2e1182bd0f33": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "c022d8fabedc43ef9db0c8aca82d215e": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "464ffcc84f48468b8f5d3f08412c6101": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "a33fedc485b346b1b9d4fb8b18e8ac64": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_94d5d3b00449488caa6d8badc443a74f", "IPY_MODEL_a60a111fc7c24bd7b21fed3f3dd64f29", "IPY_MODEL_e830732fc2bc4848847ea85c772d0b98" ], "layout": "IPY_MODEL_3e25db05674d4d2f8fd839a0ec63e7d8" } }, "94d5d3b00449488caa6d8badc443a74f": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_3262178b8baf4741b06250d7416df1f3", "placeholder": "​", "style": "IPY_MODEL_2e9d5cf7a5c6466a9e1de6d4f403cd95", "value": "Loading checkpoint shards: 100%" } }, "a60a111fc7c24bd7b21fed3f3dd64f29": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_9d2631150d5c4089bcc95f22a6698287", "max": 3, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_9c0857a4034f4780ab5e7fdd9aa9d09d", "value": 3 } }, "e830732fc2bc4848847ea85c772d0b98": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_073975370eab45d9abc4f69f2b7b3d48", "placeholder": "​", "style": "IPY_MODEL_0d1dfc47d0704506bc6e521c07162b4b", "value": " 3/3 [00:00<00:00,  3.91it/s]" } }, "3e25db05674d4d2f8fd839a0ec63e7d8": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "3262178b8baf4741b06250d7416df1f3": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "2e9d5cf7a5c6466a9e1de6d4f403cd95": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "9d2631150d5c4089bcc95f22a6698287": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "9c0857a4034f4780ab5e7fdd9aa9d09d": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "073975370eab45d9abc4f69f2b7b3d48": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "0d1dfc47d0704506bc6e521c07162b4b": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } } } } }, "nbformat": 4, "nbformat_minor": 0 }