diff --git "a/Fine_tune_SmolVLM2_on_Video.ipynb" "b/Fine_tune_SmolVLM2_on_Video.ipynb"
new file mode 100644--- /dev/null
+++ "b/Fine_tune_SmolVLM2_on_Video.ipynb"
@@ -0,0 +1,2909 @@
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "view-in-github",
+        "colab_type": "text"
+      },
+      "source": [
+        "<a href=\"https://colab.research.google.com/github/merveenoyan/smol-vision/blob/main/Fine_tune_SmolVLM2_on_Video.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "nc0g2NLpUSGr"
+      },
+      "source": [
+        "# Fine-tune SmolVLM2 on Video Captioning\n",
+        "In this notebook we will fine-tune SmolVLM2-500M-Video-Instruct on  Video Feedback dataset. It is ran on a Colab A100 for full fine-tuning, but you can squeeze it to L4 with QLoRA."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "WIhA1lQ7j0kw",
+        "outputId": "928f2f4e-6cd8-452b-d621-605550fdd33c"
+      },
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "  Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
+            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m163.5/163.5 kB\u001b[0m \u001b[31m5.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+            "\u001b[?25h  Building wheel for docopt (setup.py) ... \u001b[?25l\u001b[?25hdone\n"
+          ]
+        }
+      ],
+      "source": [
+        "!pip install -q accelerate datasets peft bitsandbytes tensorboard pyav num2words"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "!pip install -q git+https://github.com/huggingface/transformers.git"
+      ],
+      "metadata": {
+        "id": "FCYgmJtDRElR"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "XyJaqZZ3uYYl"
+      },
+      "outputs": [],
+      "source": [
+        "!pip install -q flash-attn --no-build-isolation"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "wAeMA0heVBjT"
+      },
+      "source": [
+        "We will push out model to Hub so we need to authenticate ourselves."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 17,
+          "referenced_widgets": [
+            "112da28d935543069e7a1a2abc22f9f4",
+            "0d22c009aa584ca1a71e32336a7985e0",
+            "ad17e30049cb4b5aa4046d94690f87d3",
+            "e77d3520a2d64f9a840652669c9a0ba1",
+            "1852745b0de44f4281cea0cbb3508459",
+            "166c19ec6d9f4455a56a0f146d1c0abc",
+            "f6362bc7b5b24dd592d35a76a1fbf26b",
+            "e99fbdfc8a22408a8c728a36c8744b24",
+            "0fee30c9bf2b4bdfad7a37261f92db64",
+            "4cd8babc92cc4aeba74d2147f28dee7d",
+            "a4fbf37fe0fe44cfbf72ca1e82af3467",
+            "be50e04c5629463eb18d029d045f25b3",
+            "5490c69c251144c4979e346c66ac1e53",
+            "44d0e1db5f664b3fb7c146c216566776",
+            "7af918a10ec745d7a3f4a883dbdc8b6a",
+            "4156b6897089446984196606ef0d3461",
+            "cf4b5a9cefe84fd9a4d120ab1da6f3f4",
+            "484155e67e36453c9d1ebd2ea1768eca",
+            "48bb89c434284b639f45b5929cf8d1a9",
+            "0ead4ab9bb7648c69352094bfbcb8800"
+          ]
+        },
+        "id": "yKd5xtSGj7cm",
+        "outputId": "a6e841d8-f2d6-44a8-d44d-c0c244d95f9b"
+      },
+      "outputs": [
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": [
+              "VBox(children=(HTML(value='<center> <img\\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…"
+            ],
+            "application/vnd.jupyter.widget-view+json": {
+              "version_major": 2,
+              "version_minor": 0,
+              "model_id": "112da28d935543069e7a1a2abc22f9f4"
+            }
+          },
+          "metadata": {}
+        }
+      ],
+      "source": [
+        "from huggingface_hub import notebook_login\n",
+        "\n",
+        "notebook_login()"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "WRq8ve-LVAzU"
+      },
+      "source": [
+        "In this notebook we will do full fine-tuning on 500M variant. You can also apply QLoRA or LoRA on 2.2B variant, which loads an adapter to the quantized version of the model, saving space. If you want to do full fine-tuning, set `USE_LORA` and `USE_QLORA` to False. If you want to do LoRA, set `USE_QLORA` to False and `USE_LORA` to True.\n",
+        "\n",
+        "Small model should learn more so we suggest disabling QLoRA or LoRA when fine-tuning it."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "b9CDMq0duYYn",
+        "outputId": "e7f53726-e6c3-4f7b-e98b-009bc213ca40"
+      },
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "/usr/local/lib/python3.11/dist-packages/huggingface_hub/utils/_auth.py:94: UserWarning: \n",
+            "The secret `HF_TOKEN` does not exist in your Colab secrets.\n",
+            "To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.\n",
+            "You will be able to reuse this secret in all of your notebooks.\n",
+            "Please note that authentication is recommended but still optional to access public models or datasets.\n",
+            "  warnings.warn(\n",
+            "You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`.\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "The model as is is holding: 0.97 of GPU RAM\n"
+          ]
+        }
+      ],
+      "source": [
+        "import torch\n",
+        "from peft import LoraConfig, prepare_model_for_kbit_training, get_peft_model\n",
+        "from transformers import AutoProcessor, BitsAndBytesConfig, AutoModelForImageTextToText\n",
+        "import os\n",
+        "\n",
+        "\n",
+        "USE_LORA = False\n",
+        "USE_QLORA = False\n",
+        "SMOL = True\n",
+        "\n",
+        "model_id = \"HuggingFaceTB/SmolVLM2-500M-Video-Instruct\" if SMOL else \"HuggingFaceTB/SmolVLM2-2.2B-Instruct\"\n",
+        "\n",
+        "processor = AutoProcessor.from_pretrained(\n",
+        "    model_id\n",
+        ")\n",
+        "\n",
+        "if USE_QLORA or USE_LORA:\n",
+        "    lora_config = LoraConfig(\n",
+        "        r=8,\n",
+        "        lora_alpha=8,\n",
+        "        lora_dropout=0.1,\n",
+        "        target_modules=['down_proj','o_proj','k_proj','q_proj','gate_proj','up_proj','v_proj'],\n",
+        "        use_dora=False if USE_QLORA else True,\n",
+        "        init_lora_weights=\"gaussian\"\n",
+        "    )\n",
+        "    lora_config.inference_mode = False\n",
+        "    if USE_QLORA:\n",
+        "        bnb_config = BitsAndBytesConfig(\n",
+        "            load_in_4bit=True,\n",
+        "            bnb_4bit_use_double_quant=True,\n",
+        "            bnb_4bit_quant_type=\"nf4\",\n",
+        "            bnb_4bit_compute_dtype=torch.bfloat16\n",
+        "        )\n",
+        "\n",
+        "    model = AutoModelForImageTextToText.from_pretrained(\n",
+        "        model_id,\n",
+        "        quantization_config=bnb_config if USE_QLORA else None,\n",
+        "        _attn_implementation=\"flash_attention_2\",\n",
+        "        device_map=\"auto\"\n",
+        "    )\n",
+        "    model.add_adapter(lora_config)\n",
+        "    model.enable_adapters()\n",
+        "    model = prepare_model_for_kbit_training(model)\n",
+        "    model = get_peft_model(model, lora_config)\n",
+        "    print(model.get_nb_trainable_parameters())\n",
+        "else:\n",
+        "    model = AutoModelForImageTextToText.from_pretrained(\n",
+        "        model_id,\n",
+        "        torch_dtype=torch.bfloat16,\n",
+        "        _attn_implementation=\"flash_attention_2\",\n",
+        "    ).to(\"cuda\")\n",
+        "\n",
+        "    # if you'd like to only fine-tune LLM\n",
+        "    for param in model.model.vision_model.parameters():\n",
+        "        param.requires_grad = False\n",
+        "\n",
+        "peak_mem = torch.cuda.max_memory_allocated()\n",
+        "print(f\"The model as is is holding: {peak_mem / 1024**3:.2f} of GPU RAM\")"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "LMTtg3dl3NX2"
+      },
+      "source": [
+        "## Loading the dataset and Preprocessing"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "pWHMWTSZ3Pyr"
+      },
+      "source": [
+        "We will load a dataset that contains generated videos and their super short captions of 4k examples. We are loading small chunk of it for training and smaller one for test."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "yASWDFUMPsV5"
+      },
+      "outputs": [],
+      "source": [
+        "from datasets import load_dataset\n",
+        "\n",
+        "ds = load_dataset(\"TIGER-Lab/VideoFeedback\", \"real\")"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "Znf9vMo5rnSd"
+      },
+      "outputs": [],
+      "source": [
+        "split_ds = ds[\"train\"].train_test_split(test_size=0.5)\n",
+        "train_ds = split_ds[\"train\"]"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "del split_ds, ds"
+      ],
+      "metadata": {
+        "id": "KKEZPwinSwTr"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "lQDEvNquPsV6"
+      },
+      "source": [
+        "Take a sneak peek."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "41LV2UwxPsV6",
+        "outputId": "7cc91603-2cf8-42fe-b735-7af4aaa33b4f"
+      },
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "prompt:  A dog inside of a dog kennel on a patio., video: https://huggingface.co/datasets/hexuan21/VideoFeedback-videos-mp4/resolve/main/p/p110924.mp4\n"
+          ]
+        }
+      ],
+      "source": [
+        "print(f\"prompt:  {train_ds[0]['text prompt']}, video: {train_ds[0]['video link']}\")"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "5nwMO3n0X7Hv"
+      },
+      "source": [
+        "Let's write our data collating function. We will apply prompt template to have videos and captions together so model can learn to caption. Then we pass the formatted prompts and videos to the processor which processes both."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "e0krVLZ-wNMl"
+      },
+      "outputs": [],
+      "source": [
+        "from torch.nn.utils.rnn import pad_sequence\n",
+        "\n",
+        "image_token_id = processor.tokenizer.additional_special_tokens_ids[\n",
+        "    processor.tokenizer.additional_special_tokens.index(\"<image>\")\n",
+        "]\n",
+        "\n",
+        "def collate_fn(examples):\n",
+        "    instances = []\n",
+        "    for example in examples:\n",
+        "        prompt = example[\"text prompt\"]\n",
+        "\n",
+        "        user_content = [{\"type\": \"text\", \"text\": \"Caption the video.\"}]\n",
+        "        user_content.append({\"type\": \"video\", \"path\": example[\"video link\"]})\n",
+        "\n",
+        "        messages = [\n",
+        "            {\"role\": \"user\", \"content\": user_content},\n",
+        "            {\"role\": \"assistant\", \"content\": [{\"type\": \"text\", \"text\": f\"{prompt}\"}]}\n",
+        "        ]\n",
+        "\n",
+        "        instance = processor.apply_chat_template(messages, add_generation_prompt=False,\n",
+        "                                                 tokenize=True, return_dict=True, return_tensors=\"pt\").to(\"cuda\").to(model.dtype)\n",
+        "        instances.append(instance)\n",
+        "\n",
+        "\n",
+        "    input_ids = pad_sequence(\n",
+        "        [inst[\"input_ids\"].squeeze(0) for inst in instances],\n",
+        "        batch_first=True,\n",
+        "        padding_value=processor.tokenizer.pad_token_id\n",
+        "    )\n",
+        "    attention_mask = pad_sequence(\n",
+        "        [inst[\"attention_mask\"].squeeze(0) for inst in instances],\n",
+        "        batch_first=True,\n",
+        "        padding_value=0\n",
+        "    )\n",
+        "    labels = pad_sequence(\n",
+        "        [inst[\"input_ids\"].squeeze(0).clone() for inst in instances],\n",
+        "        batch_first=True,\n",
+        "        padding_value=-100\n",
+        "    )\n",
+        "\n",
+        "    labels[labels == image_token_id] = -100\n",
+        "\n",
+        "    out = {\n",
+        "        \"input_ids\": input_ids,\n",
+        "        \"attention_mask\": attention_mask,\n",
+        "        \"labels\": labels\n",
+        "    }\n",
+        "\n",
+        "\n",
+        "    # Step 1: figure out maximum frames, height, width across the batch\n",
+        "    pvs = [inst[\"pixel_values\"].squeeze(0) for inst in instances if \"pixel_values\" in inst]\n",
+        "    if pvs:  # there is at least one non-None pixel_values\n",
+        "        max_frames = max(pv.shape[0] for pv in pvs)\n",
+        "        max_h = max(pv.shape[-2] for pv in pvs)\n",
+        "        max_w = max(pv.shape[-1] for pv in pvs)\n",
+        "    else:\n",
+        "        max_h = max_w = processor.video_size['longest_edge']\n",
+        "        max_frames = 1\n",
+        "\n",
+        "    padded_pixel_values_list = []\n",
+        "    for ex in instances:\n",
+        "        pv = ex.get(\"pixel_values\", None).squeeze(0)\n",
+        "\n",
+        "        if pv is None:\n",
+        "            # text-only => fill pixel data + mask with zeros\n",
+        "            shape_pv = (max_frames, 3, max_h, max_w)\n",
+        "            padded_pv = torch.zeros(shape_pv, dtype=torch.float32)\n",
+        "        else:\n",
+        "            f, c, h, w = pv.shape\n",
+        "            # Prepare final storage\n",
+        "            padded_pv = torch.zeros(\n",
+        "                (max_frames, c, max_h, max_w),\n",
+        "                dtype=pv.dtype,\n",
+        "                device=pv.device\n",
+        "            )\n",
+        "            padded_pv[:f, :, :h, :w] = pv\n",
+        "        padded_pixel_values_list.append(padded_pv)\n",
+        "\n",
+        "    out[\"pixel_values\"] = torch.stack(padded_pixel_values_list, dim=0)\n",
+        "    return out"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "kEYDjWpE3LD5"
+      },
+      "source": [
+        "## Training"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "QvAs896cdwg8"
+      },
+      "source": [
+        "We can now initialize `Trainer` and initialize `TrainingArguments` to pass to `Trainer`.\n",
+        "\n",
+        "Some notes:\n",
+        "- If you use 8-bit QLoRA with the below setup it uses around 16.4 GB VRAM (beautiful, fits comfortably inside L4, Colab free tier)\n",
+        "- We use gradient accumulation to simulate a larger batch size.\n",
+        "- We also save up on memory from intermediate activations by using gradient checkpointing.\n",
+        "\n",
+        "**Disclaimer:**\n",
+        "The techniques here aren't free lunch. The latter two will add additional compute to the training, thus slow down a bit (for reference on two A100s with bsz of 16, we were able to train for 2 hrs 43 mins with the gradient accumulation steps of 4, disabling it reduced it with 2 hr 35 mins).\n",
+        "If you want to speed-up, you might play around, reduce to 4-bit precision and have a higher batch size. Note that 4-bit might result in model learning less."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "QNE2yWAYrAhD"
+      },
+      "outputs": [],
+      "source": [
+        "from transformers import TrainingArguments, Trainer\n",
+        "\n",
+        "model_name = model_id.split(\"/\")[-1]\n",
+        "\n",
+        "training_args = TrainingArguments(\n",
+        "    num_train_epochs=1,\n",
+        "    per_device_train_batch_size=2,\n",
+        "    gradient_accumulation_steps=1,\n",
+        "    warmup_steps=50,\n",
+        "    learning_rate=1e-4,\n",
+        "    weight_decay=0.01,\n",
+        "    logging_steps=25,\n",
+        "    save_strategy=\"steps\",\n",
+        "    save_steps=250,\n",
+        "    save_total_limit=1,\n",
+        "    optim=\"adamw_hf\", # for 8-bit, keep paged_adamw_8bit, else adamw_hf\n",
+        "    bf16=True,\n",
+        "    output_dir=f\"./{model_name}-video-feedback\",\n",
+        "    hub_model_id=f\"{model_name}-video-feedback\",\n",
+        "    remove_unused_columns=False,\n",
+        "    report_to=\"tensorboard\",\n",
+        "    dataloader_pin_memory=False\n",
+        ")\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "oBBSDpBhreJd"
+      },
+      "outputs": [],
+      "source": [
+        "trainer = Trainer(\n",
+        "    model=model,\n",
+        "    args=training_args,\n",
+        "    data_collator=collate_fn,\n",
+        "    train_dataset=train_ds,\n",
+        ")"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "_QOCpw_-uYYo",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 1000
+        },
+        "outputId": "ad1fd1f6-41f9-4fa2-ae89-e75c9876cd65"
+      },
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "/usr/local/lib/python3.11/dist-packages/transformers/optimization.py:640: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
+            "  warnings.warn(\n"
+          ]
+        },
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": [
+              "<IPython.core.display.HTML object>"
+            ],
+            "text/html": [
+              "\n",
+              "    <div>\n",
+              "      \n",
+              "      <progress value='1000' max='1000' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
+              "      [1000/1000 19:51, Epoch 1/1]\n",
+              "    </div>\n",
+              "    <table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              " <tr style=\"text-align: left;\">\n",
+              "      <th>Step</th>\n",
+              "      <th>Training Loss</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <td>25</td>\n",
+              "      <td>3.345600</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>50</td>\n",
+              "      <td>0.709500</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>75</td>\n",
+              "      <td>0.341000</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>100</td>\n",
+              "      <td>0.272200</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>125</td>\n",
+              "      <td>0.250600</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>150</td>\n",
+              "      <td>0.290400</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>175</td>\n",
+              "      <td>0.261100</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>200</td>\n",
+              "      <td>0.258000</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>225</td>\n",
+              "      <td>0.276500</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>250</td>\n",
+              "      <td>0.265900</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>275</td>\n",
+              "      <td>0.301500</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>300</td>\n",
+              "      <td>0.277900</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>325</td>\n",
+              "      <td>0.282800</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>350</td>\n",
+              "      <td>0.264100</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>375</td>\n",
+              "      <td>0.235500</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>400</td>\n",
+              "      <td>0.251400</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>425</td>\n",
+              "      <td>0.242500</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>450</td>\n",
+              "      <td>0.281100</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>475</td>\n",
+              "      <td>0.261000</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>500</td>\n",
+              "      <td>0.231800</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>525</td>\n",
+              "      <td>0.232200</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>550</td>\n",
+              "      <td>0.268100</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>575</td>\n",
+              "      <td>0.222400</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>600</td>\n",
+              "      <td>0.246600</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>625</td>\n",
+              "      <td>0.251700</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>650</td>\n",
+              "      <td>0.257800</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>675</td>\n",
+              "      <td>0.241000</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>700</td>\n",
+              "      <td>0.229000</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>725</td>\n",
+              "      <td>0.236600</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>750</td>\n",
+              "      <td>0.220900</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>775</td>\n",
+              "      <td>0.271400</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>800</td>\n",
+              "      <td>0.259900</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>825</td>\n",
+              "      <td>0.243900</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>850</td>\n",
+              "      <td>0.236400</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>875</td>\n",
+              "      <td>0.227200</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>900</td>\n",
+              "      <td>0.227900</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>925</td>\n",
+              "      <td>0.263300</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>950</td>\n",
+              "      <td>0.255200</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>975</td>\n",
+              "      <td>0.250000</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>1000</td>\n",
+              "      <td>0.244400</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table><p>"
+            ]
+          },
+          "metadata": {}
+        },
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "TrainOutput(global_step=1000, training_loss=0.3446595501899719, metrics={'train_runtime': 1194.5916, 'train_samples_per_second': 1.674, 'train_steps_per_second': 0.837, 'total_flos': 1550232912784896.0, 'train_loss': 0.3446595501899719, 'epoch': 1.0})"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 9
+        }
+      ],
+      "source": [
+        "trainer.train()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "0hN0QD9_uYYo",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 214,
+          "referenced_widgets": [
+            "50f23a17be4f46b687b1b1df1e70238a",
+            "f1a0aa50142044f5817f8676103bff58",
+            "02cb2b35986f4ea294fbf6b2490972d5",
+            "ecc5cc7a30fa48f2afc708f8a40f50eb",
+            "6e80b0bf0aa2433fa97d6f43dd21e2cf",
+            "d7d628d4ef7c4b888fd2f70e472dac10",
+            "c42bbb0b19a544f58a5737acb1d97a85",
+            "7cd065e777f54efb857efb92415997b2",
+            "970c3d13cbbf47da986503c6ad99f506",
+            "96ba1124ba7642fe9d616b348499d0ef",
+            "484d11b997454f88902fe507c1156698",
+            "03e863ab55424bdabbc87bb965562b9b",
+            "62c1056ac5f14b1caa235010d33f241a",
+            "a415d0d029ec4225864ed59db18c20b3",
+            "56cda03822db434bb17b7cadbbaeb81b",
+            "4e085c242353430b8e32afa3bb260aa9",
+            "c55a0063346d4c429cbc000bbd612287",
+            "ba8348a627bc41149e888f0deae68a51",
+            "bef10393380046c3a842dc979ed8c01f",
+            "052ce17694bd4d3291ff5a10d2702b4b",
+            "11775cc8d35442c3a31452d66f6104e7",
+            "af6707547c5243eb9227efc0eb76134e",
+            "3d6e35795ba24eed96f2fa842b265e5b",
+            "84376aa81cae42a18fc49bdded395187",
+            "805784ce9e65411dbf35373db3680920",
+            "308ab776682a4e93ac05e06aa98a77f1",
+            "13ece6fbb1d84f03ae434119de486f07",
+            "88ef32028fd640de85d75c197eca36eb",
+            "d926557788ba45c0bef88d9e8a4b56aa",
+            "a5faec577a9844ea921c2bce1d472b23",
+            "f1e2134eb4624735842db7c112b515a0",
+            "0bd50b2853324f5c832821b7174c5ce2",
+            "077f3bcf99044d168250d1e6c4abbcae",
+            "bc2065597db04146a6df7ed10de7b93c",
+            "24cf286de1cf40f299bf0797f74c85eb",
+            "c349943637234fbc96a2a9f325d3c9f1",
+            "2d4d2f5ffae5451ebf1583362da3e9a9",
+            "3cd8e1e9fc234219b8fbd4161799640f",
+            "9e604469cb34439c944e84238b1ec055",
+            "ba94df0961c44b6d974ef882297731d8",
+            "419572dbd59a4583831961b7d8ecfa4a",
+            "88cf901fb47a4925930b7deffe98a9ce",
+            "9a6a8a5bf8f1479eb478a8c81b58aa69",
+            "3a51fdcde7984422b7a0925057f6cc37"
+          ]
+        },
+        "outputId": "20daaa82-d090-4a7b-c655-60eccf851f47"
+      },
+      "outputs": [
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": [
+              "Upload 3 LFS files:   0%|          | 0/3 [00:00<?, ?it/s]"
+            ],
+            "application/vnd.jupyter.widget-view+json": {
+              "version_major": 2,
+              "version_minor": 0,
+              "model_id": "50f23a17be4f46b687b1b1df1e70238a"
+            }
+          },
+          "metadata": {}
+        },
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": [
+              "events.out.tfevents.1740055910.82ea94387a47.41010.0:   0%|          | 0.00/17.1k [00:00<?, ?B/s]"
+            ],
+            "application/vnd.jupyter.widget-view+json": {
+              "version_major": 2,
+              "version_minor": 0,
+              "model_id": "03e863ab55424bdabbc87bb965562b9b"
+            }
+          },
+          "metadata": {}
+        },
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": [
+              "model.safetensors:   0%|          | 0.00/1.02G [00:00<?, ?B/s]"
+            ],
+            "application/vnd.jupyter.widget-view+json": {
+              "version_major": 2,
+              "version_minor": 0,
+              "model_id": "3d6e35795ba24eed96f2fa842b265e5b"
+            }
+          },
+          "metadata": {}
+        },
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": [
+              "training_args.bin:   0%|          | 0.00/5.43k [00:00<?, ?B/s]"
+            ],
+            "application/vnd.jupyter.widget-view+json": {
+              "version_major": 2,
+              "version_minor": 0,
+              "model_id": "bc2065597db04146a6df7ed10de7b93c"
+            }
+          },
+          "metadata": {}
+        },
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "CommitInfo(commit_url='https://huggingface.co/merve/SmolVLM2-500M-Video-Instruct-video-feedback/commit/2f33b0685d991475ac091593e224f3e5e7b7cac7', commit_message='End of training', commit_description='', oid='2f33b0685d991475ac091593e224f3e5e7b7cac7', pr_url=None, repo_url=RepoUrl('https://huggingface.co/merve/SmolVLM2-500M-Video-Instruct-video-feedback', endpoint='https://huggingface.co', repo_type='model', repo_id='merve/SmolVLM2-500M-Video-Instruct-video-feedback'), pr_revision=None, pr_num=None)"
+            ],
+            "application/vnd.google.colaboratory.intrinsic+json": {
+              "type": "string"
+            }
+          },
+          "metadata": {},
+          "execution_count": 10
+        }
+      ],
+      "source": [
+        "trainer.push_to_hub()"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "The test example is a video of a woman walking by, you can download and check from [here](https://huggingface.co/datasets/hexuan21/VideoFeedback-videos-mp4/blob/main/p/p000304.mp4)."
+      ],
+      "metadata": {
+        "id": "4dewIZzjfpNx"
+      }
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "2dkZlDPtPsV7",
+        "outputId": "d37f856a-5873-4b7c-e807-e0f2a706be94"
+      },
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "User: Caption the video.You are provided the following series of three frames from a 0:00:03 [H:MM:SS] video.\n",
+            "\n",
+            "Frame from 00:00:\n",
+            "Frame from 00:01:\n",
+            "Frame from 00:02:\n",
+            "\n",
+            "\n",
+            "Assistant: woman in white shirt walks by\n"
+          ]
+        }
+      ],
+      "source": [
+        "messages = [{\"role\": \"user\",\n",
+        "                 \"content\": [{\"type\": \"text\", \"text\": \"Caption the video.\"},\n",
+        "                  {\"type\": \"video\", \"path\": \"https://huggingface.co/datasets/hexuan21/VideoFeedback-videos-mp4/resolve/main/p/p000304.mp4\"}]}]\n",
+        "\n",
+        "\n",
+        "inputs = processor.apply_chat_template(messages, add_generation_prompt=True,\n",
+        "                                          tokenize=True, return_dict=True, return_tensors=\"pt\").to(\"cuda\").to(model.dtype)\n",
+        "\n",
+        "generated_ids = model.generate(**inputs, do_sample=False, max_new_tokens=64)\n",
+        "generated_texts = processor.batch_decode(\n",
+        "    generated_ids,\n",
+        "    skip_special_tokens=True,\n",
+        ")\n",
+        "\n",
+        "print(generated_texts[0])"
+      ]
+    }
+  ],
+  "metadata": {
+    "accelerator": "GPU",
+    "colab": {
+      "gpuType": "A100",
+      "provenance": [],
+      "include_colab_link": true
+    },
+    "kernelspec": {
+      "display_name": "Python 3 (ipykernel)",
+      "language": "python",
+      "name": "python3"
+    },
+    "language_info": {
+      "codemirror_mode": {
+        "name": "ipython",
+        "version": 3
+      },
+      "file_extension": ".py",
+      "mimetype": "text/x-python",
+      "name": "python",
+      "nbconvert_exporter": "python",
+      "pygments_lexer": "ipython3",
+      "version": "3.10.16"
+    },
+    "widgets": {
+      "application/vnd.jupyter.widget-state+json": {
+        "112da28d935543069e7a1a2abc22f9f4": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "VBoxModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "VBoxModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "VBoxView",
+            "box_style": "",
+            "children": [],
+            "layout": "IPY_MODEL_f6362bc7b5b24dd592d35a76a1fbf26b"
+          }
+        },
+        "0d22c009aa584ca1a71e32336a7985e0": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HTMLModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HTMLModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HTMLView",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_e99fbdfc8a22408a8c728a36c8744b24",
+            "placeholder": "​",
+            "style": "IPY_MODEL_0fee30c9bf2b4bdfad7a37261f92db64",
+            "value": "<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.svg\nalt='Hugging Face'> <br> Copy a token from <a\nhref=\"https://huggingface.co/settings/tokens\" target=\"_blank\">your Hugging Face\ntokens page</a> and paste it below. <br> Immediately click login after copying\nyour token or it might be stored in plain text in this notebook file. </center>"
+          }
+        },
+        "ad17e30049cb4b5aa4046d94690f87d3": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "PasswordModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "PasswordModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "PasswordView",
+            "continuous_update": true,
+            "description": "Token:",
+            "description_tooltip": null,
+            "disabled": false,
+            "layout": "IPY_MODEL_4cd8babc92cc4aeba74d2147f28dee7d",
+            "placeholder": "​",
+            "style": "IPY_MODEL_a4fbf37fe0fe44cfbf72ca1e82af3467",
+            "value": ""
+          }
+        },
+        "e77d3520a2d64f9a840652669c9a0ba1": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "CheckboxModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "CheckboxModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "CheckboxView",
+            "description": "Add token as git credential?",
+            "description_tooltip": null,
+            "disabled": false,
+            "indent": true,
+            "layout": "IPY_MODEL_be50e04c5629463eb18d029d045f25b3",
+            "style": "IPY_MODEL_5490c69c251144c4979e346c66ac1e53",
+            "value": true
+          }
+        },
+        "1852745b0de44f4281cea0cbb3508459": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "ButtonModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "ButtonModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "ButtonView",
+            "button_style": "",
+            "description": "Login",
+            "disabled": false,
+            "icon": "",
+            "layout": "IPY_MODEL_44d0e1db5f664b3fb7c146c216566776",
+            "style": "IPY_MODEL_7af918a10ec745d7a3f4a883dbdc8b6a",
+            "tooltip": ""
+          }
+        },
+        "166c19ec6d9f4455a56a0f146d1c0abc": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HTMLModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HTMLModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HTMLView",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_4156b6897089446984196606ef0d3461",
+            "placeholder": "​",
+            "style": "IPY_MODEL_cf4b5a9cefe84fd9a4d120ab1da6f3f4",
+            "value": "\n<b>Pro Tip:</b> If you don't already have one, you can create a dedicated\n'notebooks' token with 'write' access, that you can then easily reuse for all\nnotebooks. </center>"
+          }
+        },
+        "f6362bc7b5b24dd592d35a76a1fbf26b": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": "center",
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": "flex",
+            "flex": null,
+            "flex_flow": "column",
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": "50%"
+          }
+        },
+        "e99fbdfc8a22408a8c728a36c8744b24": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "0fee30c9bf2b4bdfad7a37261f92db64": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "DescriptionStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "DescriptionStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "description_width": ""
+          }
+        },
+        "4cd8babc92cc4aeba74d2147f28dee7d": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "a4fbf37fe0fe44cfbf72ca1e82af3467": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "DescriptionStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "DescriptionStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "description_width": ""
+          }
+        },
+        "be50e04c5629463eb18d029d045f25b3": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "5490c69c251144c4979e346c66ac1e53": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "DescriptionStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "DescriptionStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "description_width": ""
+          }
+        },
+        "44d0e1db5f664b3fb7c146c216566776": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "7af918a10ec745d7a3f4a883dbdc8b6a": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "ButtonStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "ButtonStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "button_color": null,
+            "font_weight": ""
+          }
+        },
+        "4156b6897089446984196606ef0d3461": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "cf4b5a9cefe84fd9a4d120ab1da6f3f4": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "DescriptionStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "DescriptionStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "description_width": ""
+          }
+        },
+        "484155e67e36453c9d1ebd2ea1768eca": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "LabelModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "LabelModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "LabelView",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_48bb89c434284b639f45b5929cf8d1a9",
+            "placeholder": "​",
+            "style": "IPY_MODEL_0ead4ab9bb7648c69352094bfbcb8800",
+            "value": "Connecting..."
+          }
+        },
+        "48bb89c434284b639f45b5929cf8d1a9": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "0ead4ab9bb7648c69352094bfbcb8800": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "DescriptionStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "DescriptionStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "description_width": ""
+          }
+        },
+        "50f23a17be4f46b687b1b1df1e70238a": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HBoxModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HBoxModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HBoxView",
+            "box_style": "",
+            "children": [
+              "IPY_MODEL_f1a0aa50142044f5817f8676103bff58",
+              "IPY_MODEL_02cb2b35986f4ea294fbf6b2490972d5",
+              "IPY_MODEL_ecc5cc7a30fa48f2afc708f8a40f50eb"
+            ],
+            "layout": "IPY_MODEL_6e80b0bf0aa2433fa97d6f43dd21e2cf"
+          }
+        },
+        "f1a0aa50142044f5817f8676103bff58": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HTMLModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HTMLModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HTMLView",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_d7d628d4ef7c4b888fd2f70e472dac10",
+            "placeholder": "​",
+            "style": "IPY_MODEL_c42bbb0b19a544f58a5737acb1d97a85",
+            "value": "Upload 3 LFS files: 100%"
+          }
+        },
+        "02cb2b35986f4ea294fbf6b2490972d5": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "FloatProgressModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "FloatProgressModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "ProgressView",
+            "bar_style": "success",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_7cd065e777f54efb857efb92415997b2",
+            "max": 3,
+            "min": 0,
+            "orientation": "horizontal",
+            "style": "IPY_MODEL_970c3d13cbbf47da986503c6ad99f506",
+            "value": 3
+          }
+        },
+        "ecc5cc7a30fa48f2afc708f8a40f50eb": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HTMLModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HTMLModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HTMLView",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_96ba1124ba7642fe9d616b348499d0ef",
+            "placeholder": "​",
+            "style": "IPY_MODEL_484d11b997454f88902fe507c1156698",
+            "value": " 3/3 [00:24&lt;00:00, 24.17s/it]"
+          }
+        },
+        "6e80b0bf0aa2433fa97d6f43dd21e2cf": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "d7d628d4ef7c4b888fd2f70e472dac10": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "c42bbb0b19a544f58a5737acb1d97a85": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "DescriptionStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "DescriptionStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "description_width": ""
+          }
+        },
+        "7cd065e777f54efb857efb92415997b2": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "970c3d13cbbf47da986503c6ad99f506": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "ProgressStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "ProgressStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "bar_color": null,
+            "description_width": ""
+          }
+        },
+        "96ba1124ba7642fe9d616b348499d0ef": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "484d11b997454f88902fe507c1156698": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "DescriptionStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "DescriptionStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "description_width": ""
+          }
+        },
+        "03e863ab55424bdabbc87bb965562b9b": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HBoxModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HBoxModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HBoxView",
+            "box_style": "",
+            "children": [
+              "IPY_MODEL_62c1056ac5f14b1caa235010d33f241a",
+              "IPY_MODEL_a415d0d029ec4225864ed59db18c20b3",
+              "IPY_MODEL_56cda03822db434bb17b7cadbbaeb81b"
+            ],
+            "layout": "IPY_MODEL_4e085c242353430b8e32afa3bb260aa9"
+          }
+        },
+        "62c1056ac5f14b1caa235010d33f241a": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HTMLModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HTMLModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HTMLView",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_c55a0063346d4c429cbc000bbd612287",
+            "placeholder": "​",
+            "style": "IPY_MODEL_ba8348a627bc41149e888f0deae68a51",
+            "value": "events.out.tfevents.1740055910.82ea94387a47.41010.0: 100%"
+          }
+        },
+        "a415d0d029ec4225864ed59db18c20b3": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "FloatProgressModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "FloatProgressModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "ProgressView",
+            "bar_style": "success",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_bef10393380046c3a842dc979ed8c01f",
+            "max": 17132,
+            "min": 0,
+            "orientation": "horizontal",
+            "style": "IPY_MODEL_052ce17694bd4d3291ff5a10d2702b4b",
+            "value": 17132
+          }
+        },
+        "56cda03822db434bb17b7cadbbaeb81b": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HTMLModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HTMLModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HTMLView",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_11775cc8d35442c3a31452d66f6104e7",
+            "placeholder": "​",
+            "style": "IPY_MODEL_af6707547c5243eb9227efc0eb76134e",
+            "value": " 17.1k/17.1k [00:00&lt;00:00, 121kB/s]"
+          }
+        },
+        "4e085c242353430b8e32afa3bb260aa9": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "c55a0063346d4c429cbc000bbd612287": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "ba8348a627bc41149e888f0deae68a51": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "DescriptionStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "DescriptionStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "description_width": ""
+          }
+        },
+        "bef10393380046c3a842dc979ed8c01f": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "052ce17694bd4d3291ff5a10d2702b4b": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "ProgressStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "ProgressStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "bar_color": null,
+            "description_width": ""
+          }
+        },
+        "11775cc8d35442c3a31452d66f6104e7": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "af6707547c5243eb9227efc0eb76134e": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "DescriptionStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "DescriptionStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "description_width": ""
+          }
+        },
+        "3d6e35795ba24eed96f2fa842b265e5b": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HBoxModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HBoxModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HBoxView",
+            "box_style": "",
+            "children": [
+              "IPY_MODEL_84376aa81cae42a18fc49bdded395187",
+              "IPY_MODEL_805784ce9e65411dbf35373db3680920",
+              "IPY_MODEL_308ab776682a4e93ac05e06aa98a77f1"
+            ],
+            "layout": "IPY_MODEL_13ece6fbb1d84f03ae434119de486f07"
+          }
+        },
+        "84376aa81cae42a18fc49bdded395187": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HTMLModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HTMLModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HTMLView",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_88ef32028fd640de85d75c197eca36eb",
+            "placeholder": "​",
+            "style": "IPY_MODEL_d926557788ba45c0bef88d9e8a4b56aa",
+            "value": "model.safetensors: 100%"
+          }
+        },
+        "805784ce9e65411dbf35373db3680920": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "FloatProgressModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "FloatProgressModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "ProgressView",
+            "bar_style": "success",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_a5faec577a9844ea921c2bce1d472b23",
+            "max": 1015025832,
+            "min": 0,
+            "orientation": "horizontal",
+            "style": "IPY_MODEL_f1e2134eb4624735842db7c112b515a0",
+            "value": 1015025832
+          }
+        },
+        "308ab776682a4e93ac05e06aa98a77f1": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HTMLModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HTMLModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HTMLView",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_0bd50b2853324f5c832821b7174c5ce2",
+            "placeholder": "​",
+            "style": "IPY_MODEL_077f3bcf99044d168250d1e6c4abbcae",
+            "value": " 1.02G/1.02G [00:23&lt;00:00, 46.4MB/s]"
+          }
+        },
+        "13ece6fbb1d84f03ae434119de486f07": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "88ef32028fd640de85d75c197eca36eb": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "d926557788ba45c0bef88d9e8a4b56aa": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "DescriptionStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "DescriptionStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "description_width": ""
+          }
+        },
+        "a5faec577a9844ea921c2bce1d472b23": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "f1e2134eb4624735842db7c112b515a0": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "ProgressStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "ProgressStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "bar_color": null,
+            "description_width": ""
+          }
+        },
+        "0bd50b2853324f5c832821b7174c5ce2": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "077f3bcf99044d168250d1e6c4abbcae": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "DescriptionStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "DescriptionStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "description_width": ""
+          }
+        },
+        "bc2065597db04146a6df7ed10de7b93c": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HBoxModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HBoxModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HBoxView",
+            "box_style": "",
+            "children": [
+              "IPY_MODEL_24cf286de1cf40f299bf0797f74c85eb",
+              "IPY_MODEL_c349943637234fbc96a2a9f325d3c9f1",
+              "IPY_MODEL_2d4d2f5ffae5451ebf1583362da3e9a9"
+            ],
+            "layout": "IPY_MODEL_3cd8e1e9fc234219b8fbd4161799640f"
+          }
+        },
+        "24cf286de1cf40f299bf0797f74c85eb": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HTMLModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HTMLModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HTMLView",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_9e604469cb34439c944e84238b1ec055",
+            "placeholder": "​",
+            "style": "IPY_MODEL_ba94df0961c44b6d974ef882297731d8",
+            "value": "training_args.bin: 100%"
+          }
+        },
+        "c349943637234fbc96a2a9f325d3c9f1": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "FloatProgressModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "FloatProgressModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "ProgressView",
+            "bar_style": "success",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_419572dbd59a4583831961b7d8ecfa4a",
+            "max": 5432,
+            "min": 0,
+            "orientation": "horizontal",
+            "style": "IPY_MODEL_88cf901fb47a4925930b7deffe98a9ce",
+            "value": 5432
+          }
+        },
+        "2d4d2f5ffae5451ebf1583362da3e9a9": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HTMLModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HTMLModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HTMLView",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_9a6a8a5bf8f1479eb478a8c81b58aa69",
+            "placeholder": "​",
+            "style": "IPY_MODEL_3a51fdcde7984422b7a0925057f6cc37",
+            "value": " 5.43k/5.43k [00:00&lt;00:00, 40.2kB/s]"
+          }
+        },
+        "3cd8e1e9fc234219b8fbd4161799640f": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "9e604469cb34439c944e84238b1ec055": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "ba94df0961c44b6d974ef882297731d8": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "DescriptionStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "DescriptionStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "description_width": ""
+          }
+        },
+        "419572dbd59a4583831961b7d8ecfa4a": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "88cf901fb47a4925930b7deffe98a9ce": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "ProgressStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "ProgressStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "bar_color": null,
+            "description_width": ""
+          }
+        },
+        "9a6a8a5bf8f1479eb478a8c81b58aa69": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "3a51fdcde7984422b7a0925057f6cc37": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "DescriptionStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "DescriptionStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "description_width": ""
+          }
+        }
+      }
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
\ No newline at end of file

Step	Training Loss
25	3.345600
50	0.709500
75	0.341000
100	0.272200
125	0.250600
150	0.290400
175	0.261100
200	0.258000
225	0.276500
250	0.265900
275	0.301500
300	0.277900
325	0.282800
350	0.264100
375	0.235500
400	0.251400
425	0.242500
450	0.281100
475	0.261000
500	0.231800
525	0.232200
550	0.268100
575	0.222400
600	0.246600
625	0.251700
650	0.257800
675	0.241000
700	0.229000
725	0.236600
750	0.220900
775	0.271400
800	0.259900
825	0.243900
850	0.236400
875	0.227200
900	0.227900
925	0.263300
950	0.255200
975	0.250000
1000	0.244400