{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": { "_cell_guid": "b1076dfc-b9ad-4769-8c92-a6c4dae69d19", "_uuid": "8f2839f25d086af736a60e9eeb907d3b93b6e0e5", "execution": { "iopub.execute_input": "2025-06-12T17:04:41.889457Z", "iopub.status.busy": "2025-06-12T17:04:41.889269Z", "iopub.status.idle": "2025-06-12T17:04:43.013740Z", "shell.execute_reply": "2025-06-12T17:04:43.012978Z", "shell.execute_reply.started": "2025-06-12T17:04:41.889440Z" }, "trusted": true }, "outputs": [], "source": [ "# This Python 3 environment comes with many helpful analytics libraries installed\n", "# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python\n", "# For example, here's several helpful packages to load\n", "\n", "import numpy as np # linear algebra\n", "import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)\n", "\n", "# Input data files are available in the read-only \"../input/\" directory\n", "# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory\n", "\n", "import os\n", "for dirname, _, filenames in os.walk('/kaggle/input'):\n", " for filename in filenames:\n", " print(os.path.join(dirname, filename))\n", "\n", "# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using \"Save & Run All\" \n", "# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "execution": { "iopub.execute_input": "2025-06-12T17:04:43.015611Z", "iopub.status.busy": "2025-06-12T17:04:43.015277Z", "iopub.status.idle": "2025-06-12T17:04:47.753149Z", "shell.execute_reply": "2025-06-12T17:04:47.752453Z", "shell.execute_reply.started": "2025-06-12T17:04:43.015591Z" }, "trusted": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Requirement already satisfied: datasets in /usr/local/lib/python3.11/dist-packages (3.6.0)\n", "Requirement already satisfied: filelock in /usr/local/lib/python3.11/dist-packages (from datasets) (3.18.0)\n", "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.11/dist-packages (from datasets) (1.26.4)\n", "Requirement already satisfied: pyarrow>=15.0.0 in /usr/local/lib/python3.11/dist-packages (from datasets) (19.0.1)\n", "Requirement already satisfied: dill<0.3.9,>=0.3.0 in /usr/local/lib/python3.11/dist-packages (from datasets) (0.3.8)\n", "Requirement already satisfied: pandas in /usr/local/lib/python3.11/dist-packages (from datasets) (2.2.3)\n", "Requirement already satisfied: requests>=2.32.2 in /usr/local/lib/python3.11/dist-packages (from datasets) (2.32.3)\n", "Requirement already satisfied: tqdm>=4.66.3 in /usr/local/lib/python3.11/dist-packages (from datasets) (4.67.1)\n", "Requirement already satisfied: xxhash in /usr/local/lib/python3.11/dist-packages (from datasets) (3.5.0)\n", "Requirement already satisfied: multiprocess<0.70.17 in /usr/local/lib/python3.11/dist-packages (from datasets) (0.70.16)\n", "Collecting fsspec<=2025.3.0,>=2023.1.0 (from fsspec[http]<=2025.3.0,>=2023.1.0->datasets)\n", " Downloading fsspec-2025.3.0-py3-none-any.whl.metadata (11 kB)\n", "Requirement already satisfied: huggingface-hub>=0.24.0 in /usr/local/lib/python3.11/dist-packages (from datasets) (0.31.1)\n", "Requirement already satisfied: packaging in /usr/local/lib/python3.11/dist-packages (from datasets) (25.0)\n", "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.11/dist-packages (from datasets) (6.0.2)\n", "Requirement already satisfied: aiohttp!=4.0.0a0,!=4.0.0a1 in /usr/local/lib/python3.11/dist-packages (from fsspec[http]<=2025.3.0,>=2023.1.0->datasets) (3.11.18)\n", "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub>=0.24.0->datasets) (4.13.2)\n", "Requirement already satisfied: hf-xet<2.0.0,>=1.1.0 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub>=0.24.0->datasets) (1.1.0)\n", "Requirement already satisfied: mkl_fft in /usr/local/lib/python3.11/dist-packages (from numpy>=1.17->datasets) (1.3.8)\n", "Requirement already satisfied: mkl_random in /usr/local/lib/python3.11/dist-packages (from numpy>=1.17->datasets) (1.2.4)\n", "Requirement already satisfied: mkl_umath in /usr/local/lib/python3.11/dist-packages (from numpy>=1.17->datasets) (0.1.1)\n", "Requirement already satisfied: mkl in /usr/local/lib/python3.11/dist-packages (from numpy>=1.17->datasets) (2025.1.0)\n", "Requirement already satisfied: tbb4py in /usr/local/lib/python3.11/dist-packages (from numpy>=1.17->datasets) (2022.1.0)\n", "Requirement already satisfied: mkl-service in /usr/local/lib/python3.11/dist-packages (from numpy>=1.17->datasets) (2.4.1)\n", "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.11/dist-packages (from requests>=2.32.2->datasets) (3.4.2)\n", "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.11/dist-packages (from requests>=2.32.2->datasets) (3.10)\n", "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.11/dist-packages (from requests>=2.32.2->datasets) (2.4.0)\n", "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.11/dist-packages (from requests>=2.32.2->datasets) (2025.4.26)\n", "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.11/dist-packages (from pandas->datasets) (2.9.0.post0)\n", "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.11/dist-packages (from pandas->datasets) (2025.2)\n", "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.11/dist-packages (from pandas->datasets) (2025.2)\n", "Requirement already satisfied: aiohappyeyeballs>=2.3.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<=2025.3.0,>=2023.1.0->datasets) (2.6.1)\n", "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.11/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<=2025.3.0,>=2023.1.0->datasets) (1.3.2)\n", "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<=2025.3.0,>=2023.1.0->datasets) (25.3.0)\n", "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.11/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<=2025.3.0,>=2023.1.0->datasets) (1.6.0)\n", "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.11/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<=2025.3.0,>=2023.1.0->datasets) (6.4.3)\n", "Requirement already satisfied: propcache>=0.2.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<=2025.3.0,>=2023.1.0->datasets) (0.3.1)\n", "Requirement already satisfied: yarl<2.0,>=1.17.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<=2025.3.0,>=2023.1.0->datasets) (1.20.0)\n", "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.11/dist-packages (from python-dateutil>=2.8.2->pandas->datasets) (1.17.0)\n", "Requirement already satisfied: intel-openmp<2026,>=2024 in /usr/local/lib/python3.11/dist-packages (from mkl->numpy>=1.17->datasets) (2024.2.0)\n", "Requirement already satisfied: tbb==2022.* in /usr/local/lib/python3.11/dist-packages (from mkl->numpy>=1.17->datasets) (2022.1.0)\n", "Requirement already satisfied: tcmlib==1.* in /usr/local/lib/python3.11/dist-packages (from tbb==2022.*->mkl->numpy>=1.17->datasets) (1.3.0)\n", "Requirement already satisfied: intel-cmplr-lib-rt in /usr/local/lib/python3.11/dist-packages (from mkl_umath->numpy>=1.17->datasets) (2024.2.0)\n", "Requirement already satisfied: intel-cmplr-lib-ur==2024.2.0 in /usr/local/lib/python3.11/dist-packages (from intel-openmp<2026,>=2024->mkl->numpy>=1.17->datasets) (2024.2.0)\n", "Downloading fsspec-2025.3.0-py3-none-any.whl (193 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m193.6/193.6 kB\u001b[0m \u001b[31m4.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n", "\u001b[?25hInstalling collected packages: fsspec\n", " Attempting uninstall: fsspec\n", " Found existing installation: fsspec 2025.3.2\n", " Uninstalling fsspec-2025.3.2:\n", " Successfully uninstalled fsspec-2025.3.2\n", "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", "cesium 0.12.4 requires numpy<3.0,>=2.0, but you have numpy 1.26.4 which is incompatible.\n", "bigframes 1.42.0 requires rich<14,>=12.4.4, but you have rich 14.0.0 which is incompatible.\n", "torch 2.6.0+cu124 requires nvidia-cublas-cu12==12.4.5.8; platform_system == \"Linux\" and platform_machine == \"x86_64\", but you have nvidia-cublas-cu12 12.9.0.13 which is incompatible.\n", "torch 2.6.0+cu124 requires nvidia-cudnn-cu12==9.1.0.70; platform_system == \"Linux\" and platform_machine == \"x86_64\", but you have nvidia-cudnn-cu12 9.3.0.75 which is incompatible.\n", "torch 2.6.0+cu124 requires nvidia-cufft-cu12==11.2.1.3; platform_system == \"Linux\" and platform_machine == \"x86_64\", but you have nvidia-cufft-cu12 11.4.0.6 which is incompatible.\n", "torch 2.6.0+cu124 requires nvidia-curand-cu12==10.3.5.147; platform_system == \"Linux\" and platform_machine == \"x86_64\", but you have nvidia-curand-cu12 10.3.10.19 which is incompatible.\n", "torch 2.6.0+cu124 requires nvidia-cusolver-cu12==11.6.1.9; platform_system == \"Linux\" and platform_machine == \"x86_64\", but you have nvidia-cusolver-cu12 11.7.4.40 which is incompatible.\n", "torch 2.6.0+cu124 requires nvidia-cusparse-cu12==12.3.1.170; platform_system == \"Linux\" and platform_machine == \"x86_64\", but you have nvidia-cusparse-cu12 12.5.9.5 which is incompatible.\n", "torch 2.6.0+cu124 requires nvidia-nvjitlink-cu12==12.4.127; platform_system == \"Linux\" and platform_machine == \"x86_64\", but you have nvidia-nvjitlink-cu12 12.9.41 which is incompatible.\n", "gcsfs 2025.3.2 requires fsspec==2025.3.2, but you have fsspec 2025.3.0 which is incompatible.\u001b[0m\u001b[31m\n", "\u001b[0mSuccessfully installed fsspec-2025.3.0\n" ] } ], "source": [ "!pip install datasets\n" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "execution": { "iopub.execute_input": "2025-06-12T17:04:47.754432Z", "iopub.status.busy": "2025-06-12T17:04:47.754136Z", "iopub.status.idle": "2025-06-12T17:04:51.067474Z", "shell.execute_reply": "2025-06-12T17:04:51.066953Z", "shell.execute_reply.started": "2025-06-12T17:04:47.754407Z" }, "trusted": true }, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "1493ba48468f46648af0bfb811beee06", "version_major": 2, "version_minor": 0 }, "text/plain": [ "README.md: 0%| | 0.00/4.65k [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "d146713524544f65af5a9c2c7bc33d70", "version_major": 2, "version_minor": 0 }, "text/plain": [ "train.csv: 0%| | 0.00/11.3M [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "b83b84e119a64c14b78c97549f709b0a", "version_major": 2, "version_minor": 0 }, "text/plain": [ "validation.csv: 0%| | 0.00/442k [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "bd01c9577b4646d1a16cc3d8f508d610", "version_major": 2, "version_minor": 0 }, "text/plain": [ "test.csv: 0%| | 0.00/1.35M [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "e486ee8329dc4d29924e3edfb7df2479", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Generating train split: 0%| | 0/12460 [00:00, ? examples/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "366c9a3006034d439092c1a1bc12ace9", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Generating validation split: 0%| | 0/500 [00:00, ? examples/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "70d73fc03dcb446e886abfce07d50b4e", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Generating test split: 0%| | 0/1500 [00:00, ? examples/s]" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "from datasets import load_dataset\n", "ds = load_dataset(\"knkarthick/dialogsum\")\n" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "execution": { "iopub.execute_input": "2025-06-12T17:04:51.068492Z", "iopub.status.busy": "2025-06-12T17:04:51.068151Z", "iopub.status.idle": "2025-06-12T17:04:52.772955Z", "shell.execute_reply": "2025-06-12T17:04:52.772321Z", "shell.execute_reply.started": "2025-06-12T17:04:51.068474Z" }, "trusted": true }, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "1814d5ef014147d0ac7e1a9b2ddc6560", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Map: 0%| | 0/12460 [00:00, ? examples/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "ea9b9978b1b24eb188251a9908682166", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Map: 0%| | 0/500 [00:00, ? examples/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "9f0ca8f8df98458381d20cd0908c507f", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Map: 0%| | 0/1500 [00:00, ? examples/s]" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "import re\n", "def clean_text(text):\n", " text=re.sub(r'\\s+', ' ', text).strip()\n", " return text \n", "def preprocess(example):\n", " example[\"dialogue\"] = clean_text(example[\"dialogue\"])\n", " example[\"summary\"] = clean_text(example[\"summary\"])\n", " return example\n", "ds[\"train\"] = ds[\"train\"].map(preprocess)\n", "ds[\"validation\"] = ds[\"validation\"].map(preprocess)\n", "ds[\"test\"] = ds[\"test\"].map(preprocess)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "execution": { "iopub.execute_input": "2025-06-12T17:04:52.774820Z", "iopub.status.busy": "2025-06-12T17:04:52.774612Z", "iopub.status.idle": "2025-06-12T17:05:15.376798Z", "shell.execute_reply": "2025-06-12T17:05:15.375993Z", "shell.execute_reply.started": "2025-06-12T17:04:52.774804Z" }, "trusted": true }, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "c9843e22bdeb412f89c8ac4deb2fd6c6", "version_major": 2, "version_minor": 0 }, "text/plain": [ "tokenizer_config.json: 0%| | 0.00/26.0 [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "6345b5f45bd245499f288926d631543e", "version_major": 2, "version_minor": 0 }, "text/plain": [ "config.json: 0%| | 0.00/1.80k [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "c73e268748254c1ab6376245698e649a", "version_major": 2, "version_minor": 0 }, "text/plain": [ "vocab.json: 0%| | 0.00/899k [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "9deb932fd0c247568e7fa87f909f1413", "version_major": 2, "version_minor": 0 }, "text/plain": [ "merges.txt: 0%| | 0.00/456k [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "dd000a5f19dd423eb8e3ba0071ee8ed4", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Map: 0%| | 0/12460 [00:00, ? examples/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stderr", "output_type": "stream", "text": [ "/usr/local/lib/python3.11/dist-packages/transformers/tokenization_utils_base.py:3980: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n", " warnings.warn(\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "b50d71bf97ae48e2b6a90a0606380024", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Map: 0%| | 0/500 [00:00, ? examples/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "c3b5f103b9644a1294886e5f32480dc3", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Map: 0%| | 0/1500 [00:00, ? examples/s]" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "from transformers import AutoTokenizer\n", "\n", "# Load the tokenizer\n", "tokenizer = AutoTokenizer.from_pretrained(\"sshleifer/distilbart-cnn-12-6\")\n", "\n", "def tokenize_function(example):\n", " inputs = tokenizer(\n", " example[\"dialogue\"],\n", " max_length=1024,\n", " padding=\"max_length\",\n", " truncation=True\n", " )\n", " with tokenizer.as_target_tokenizer():\n", " labels = tokenizer(\n", " example[\"summary\"],\n", " max_length=128,\n", " padding=\"max_length\",\n", " truncation=True\n", " )\n", " inputs[\"labels\"] = labels[\"input_ids\"]\n", " return inputs\n", "tokenized_ds = ds.map(\n", " tokenize_function,\n", " batched=True,\n", " remove_columns=ds[\"train\"].column_names\n", ")\n" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "execution": { "iopub.execute_input": "2025-06-12T17:05:15.378134Z", "iopub.status.busy": "2025-06-12T17:05:15.377670Z", "iopub.status.idle": "2025-06-12T17:05:15.384805Z", "shell.execute_reply": "2025-06-12T17:05:15.384102Z", "shell.execute_reply.started": "2025-06-12T17:05:15.378114Z" }, "trusted": true }, "outputs": [], "source": [ "import torch \n", "from torch.utils.data import DataLoader\n", "\n", "for split in [\"train\", \"validation\", \"test\"]:\n", " tokenized_ds[split].set_format(type=\"torch\", columns=[\"input_ids\", \"attention_mask\", \"labels\"])\n", "train_loader = DataLoader(tokenized_ds[\"train\"], batch_size=8, shuffle=True,num_workers=4)\n", "val_loader = DataLoader(tokenized_ds[\"validation\"], batch_size=8,num_workers=4)\n", "test_loader = DataLoader(tokenized_ds[\"test\"], batch_size=8,num_workers=4)\n", "\n" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "execution": { "iopub.execute_input": "2025-06-12T17:05:15.385961Z", "iopub.status.busy": "2025-06-12T17:05:15.385696Z", "iopub.status.idle": "2025-06-12T17:05:35.597065Z", "shell.execute_reply": "2025-06-12T17:05:35.596176Z", "shell.execute_reply.started": "2025-06-12T17:05:15.385922Z" }, "trusted": true }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "2025-06-12 17:05:18.048160: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n", "WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n", "E0000 00:00:1749747918.257111 35 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n", "E0000 00:00:1749747918.316434 35 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "d89f33cc54124a7e9aa9ee425c7a81ac", "version_major": 2, "version_minor": 0 }, "text/plain": [ "pytorch_model.bin: 0%| | 0.00/1.22G [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "53e22b661e6a46029ef1f0db2b69d58d", "version_major": 2, "version_minor": 0 }, "text/plain": [ "model.safetensors: 0%| | 0.00/1.22G [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "Using device: cuda\n" ] }, { "data": { "text/plain": [ "BartForConditionalGeneration(\n", " (model): BartModel(\n", " (shared): BartScaledWordEmbedding(50264, 1024, padding_idx=1)\n", " (encoder): BartEncoder(\n", " (embed_tokens): BartScaledWordEmbedding(50264, 1024, padding_idx=1)\n", " (embed_positions): BartLearnedPositionalEmbedding(1026, 1024)\n", " (layers): ModuleList(\n", " (0-11): 12 x BartEncoderLayer(\n", " (self_attn): BartSdpaAttention(\n", " (k_proj): Linear(in_features=1024, out_features=1024, bias=True)\n", " (v_proj): Linear(in_features=1024, out_features=1024, bias=True)\n", " (q_proj): Linear(in_features=1024, out_features=1024, bias=True)\n", " (out_proj): Linear(in_features=1024, out_features=1024, bias=True)\n", " )\n", " (self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)\n", " (activation_fn): GELUActivation()\n", " (fc1): Linear(in_features=1024, out_features=4096, bias=True)\n", " (fc2): Linear(in_features=4096, out_features=1024, bias=True)\n", " (final_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)\n", " )\n", " )\n", " (layernorm_embedding): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)\n", " )\n", " (decoder): BartDecoder(\n", " (embed_tokens): BartScaledWordEmbedding(50264, 1024, padding_idx=1)\n", " (embed_positions): BartLearnedPositionalEmbedding(1026, 1024)\n", " (layers): ModuleList(\n", " (0-5): 6 x BartDecoderLayer(\n", " (self_attn): BartSdpaAttention(\n", " (k_proj): Linear(in_features=1024, out_features=1024, bias=True)\n", " (v_proj): Linear(in_features=1024, out_features=1024, bias=True)\n", " (q_proj): Linear(in_features=1024, out_features=1024, bias=True)\n", " (out_proj): Linear(in_features=1024, out_features=1024, bias=True)\n", " )\n", " (activation_fn): GELUActivation()\n", " (self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)\n", " (encoder_attn): BartSdpaAttention(\n", " (k_proj): Linear(in_features=1024, out_features=1024, bias=True)\n", " (v_proj): Linear(in_features=1024, out_features=1024, bias=True)\n", " (q_proj): Linear(in_features=1024, out_features=1024, bias=True)\n", " (out_proj): Linear(in_features=1024, out_features=1024, bias=True)\n", " )\n", " (encoder_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)\n", " (fc1): Linear(in_features=1024, out_features=4096, bias=True)\n", " (fc2): Linear(in_features=4096, out_features=1024, bias=True)\n", " (final_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)\n", " )\n", " )\n", " (layernorm_embedding): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)\n", " )\n", " )\n", " (lm_head): Linear(in_features=1024, out_features=50264, bias=False)\n", ")" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "\n", "import torch\n", "from transformers import AutoTokenizer, AutoModelForSeq2SeqLM\n", "\n", "model_name = \"sshleifer/distilbart-cnn-12-6\"\n", "\n", "tokenizer = AutoTokenizer.from_pretrained(model_name)\n", "model = AutoModelForSeq2SeqLM.from_pretrained(model_name)\n", "\n", "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n", "print(f\"Using device: {device}\")\n", "\n", "model.to(device)\n" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "execution": { "iopub.execute_input": "2025-06-12T17:05:35.598725Z", "iopub.status.busy": "2025-06-12T17:05:35.598092Z", "iopub.status.idle": "2025-06-12T17:05:35.634550Z", "shell.execute_reply": "2025-06-12T17:05:35.633637Z", "shell.execute_reply.started": "2025-06-12T17:05:35.598704Z" }, "trusted": true }, "outputs": [], "source": [ "from torch.optim import AdamW\n", "from transformers import get_scheduler\n", "\n", "optimizer = AdamW(model.parameters(), lr=5e-5, weight_decay=0.01)\n", "num_epochs=3\n", "\n", "num_training_steps = num_epochs * len(train_loader)\n", "lr_scheduler = get_scheduler(\n", " name=\"linear\", \n", " optimizer=optimizer,\n", " num_warmup_steps=0, \n", " num_training_steps=num_training_steps,\n", ")\n" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "execution": { "iopub.execute_input": "2025-06-12T17:05:35.635812Z", "iopub.status.busy": "2025-06-12T17:05:35.635590Z", "iopub.status.idle": "2025-06-12T17:05:35.670246Z", "shell.execute_reply": "2025-06-12T17:05:35.669407Z", "shell.execute_reply.started": "2025-06-12T17:05:35.635795Z" }, "trusted": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "CUDA Available: True\n", "Device Name: Tesla T4\n", "Model is on: cuda:0\n" ] } ], "source": [ "import torch\n", "\n", "print(\"CUDA Available:\", torch.cuda.is_available())\n", "print(\"Device Name:\", torch.cuda.get_device_name(0) if torch.cuda.is_available() else \"CPU only\")\n", "print(\"Model is on:\", next(model.parameters()).device)\n" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "execution": { "iopub.execute_input": "2025-06-12T17:05:35.671585Z", "iopub.status.busy": "2025-06-12T17:05:35.671257Z", "iopub.status.idle": "2025-06-12T17:05:39.563479Z", "shell.execute_reply": "2025-06-12T17:05:39.562626Z", "shell.execute_reply.started": "2025-06-12T17:05:35.671526Z" }, "trusted": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "--- Dialogue ---\n", "\n", "#Person1#: Ms. Dawson, I need you to take a dictation for me.\n", "#Person2#: Yes, sir...\n", "#Person1#: This should go out as an intra-office memorandum to all employees by this afternoon. Are you ready?\n", "#Person2#: Yes, sir. Go ahead.\n", "#Person1#: Attention all staff... Effective immediately, all office communications are restricted to email correspondence and official memos. The use of Instant Message programs by employees during working hours is strictly prohibited.\n", "#Person2#: Sir, does this apply to intra-office communications only? Or will it also restrict external communications?\n", "#Person1#: It should apply to all communications, not only in this office between employees, but also any outside communications.\n", "#Person2#: But sir, many employees use Instant Messaging to communicate with their clients.\n", "#Person1#: They will just have to change their communication methods. I don't want any - one using Instant Messaging in this office. It wastes too much time! Now, please continue with the memo. Where were we?\n", "#Person2#: This applies to internal and external communications.\n", "#Person1#: Yes. Any employee who persists in using Instant Messaging will first receive a warning and be placed on probation. At second offense, the employee will face termination. Any questions regarding this new policy may be directed to department heads.\n", "#Person2#: Is that all?\n", "#Person1#: Yes. Please get this memo typed up and distributed to all employees before 4 pm.\n", "\n", "--- Summary ---\n", "\n", " The use of Instant Message programs by employees during working hours is strictly prohibited . Any employee who persists in using Instant Messaging will first receive a warning and be placed on probation . At second offense, the employee will face termination . Any questions regarding this new policy may be directed to department heads .\n" ] } ], "source": [ "from transformers import AutoTokenizer, AutoModelForSeq2SeqLM\n", "from datasets import load_dataset\n", "import torch\n", "\n", "# 1. Load dataset\n", "ds = load_dataset(\"knkarthick/dialogsum\")\n", "test_dialogues = ds[\"test\"][\"dialogue\"]\n", "\n", "# 2. Load a lightweight pretrained summarization model\n", "tokenizer = AutoTokenizer.from_pretrained(\"sshleifer/distilbart-cnn-12-6\")\n", "model = AutoModelForSeq2SeqLM.from_pretrained(\"sshleifer/distilbart-cnn-12-6\")\n", "\n", "# 3. Move model to device (GPU if available)\n", "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n", "model.to(device)\n", "model.eval()\n", "\n", "# 4. Pick a test example to summarize\n", "example_dialogue = test_dialogues[0]\n", "\n", "# 5. Tokenize and summarize\n", "inputs = tokenizer(example_dialogue, return_tensors=\"pt\", max_length=1024, truncation=True).to(device)\n", "summary_ids = model.generate(inputs[\"input_ids\"], max_length=128, num_beams=4, early_stopping=True)\n", "summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)\n", "\n", "# 6. Output\n", "print(\"\\n--- Dialogue ---\\n\")\n", "print(example_dialogue)\n", "\n", "print(\"\\n--- Summary ---\\n\")\n", "print(summary)\n" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "execution": { "iopub.execute_input": "2025-06-12T17:05:39.564906Z", "iopub.status.busy": "2025-06-12T17:05:39.564625Z", "iopub.status.idle": "2025-06-12T17:05:43.195555Z", "shell.execute_reply": "2025-06-12T17:05:43.194726Z", "shell.execute_reply.started": "2025-06-12T17:05:39.564883Z" }, "trusted": true }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n", "To disable this warning, you can either:\n", "\t- Avoid using `tokenizers` before the fork if possible\n", "\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Collecting evaluate\n", " Downloading evaluate-0.4.3-py3-none-any.whl.metadata (9.2 kB)\n", "Requirement already satisfied: datasets>=2.0.0 in /usr/local/lib/python3.11/dist-packages (from evaluate) (3.6.0)\n", "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.11/dist-packages (from evaluate) (1.26.4)\n", "Requirement already satisfied: dill in /usr/local/lib/python3.11/dist-packages (from evaluate) (0.3.8)\n", "Requirement already satisfied: pandas in /usr/local/lib/python3.11/dist-packages (from evaluate) (2.2.3)\n", "Requirement already satisfied: requests>=2.19.0 in /usr/local/lib/python3.11/dist-packages (from evaluate) (2.32.3)\n", "Requirement already satisfied: tqdm>=4.62.1 in /usr/local/lib/python3.11/dist-packages (from evaluate) (4.67.1)\n", "Requirement already satisfied: xxhash in /usr/local/lib/python3.11/dist-packages (from evaluate) (3.5.0)\n", "Requirement already satisfied: multiprocess in /usr/local/lib/python3.11/dist-packages (from evaluate) (0.70.16)\n", "Requirement already satisfied: fsspec>=2021.05.0 in /usr/local/lib/python3.11/dist-packages (from fsspec[http]>=2021.05.0->evaluate) (2025.3.0)\n", "Requirement already satisfied: huggingface-hub>=0.7.0 in /usr/local/lib/python3.11/dist-packages (from evaluate) (0.31.1)\n", "Requirement already satisfied: packaging in /usr/local/lib/python3.11/dist-packages (from evaluate) (25.0)\n", "Requirement already satisfied: filelock in /usr/local/lib/python3.11/dist-packages (from datasets>=2.0.0->evaluate) (3.18.0)\n", "Requirement already satisfied: pyarrow>=15.0.0 in /usr/local/lib/python3.11/dist-packages (from datasets>=2.0.0->evaluate) (19.0.1)\n", "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.11/dist-packages (from datasets>=2.0.0->evaluate) (6.0.2)\n", "Requirement already satisfied: aiohttp!=4.0.0a0,!=4.0.0a1 in /usr/local/lib/python3.11/dist-packages (from fsspec[http]>=2021.05.0->evaluate) (3.11.18)\n", "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub>=0.7.0->evaluate) (4.13.2)\n", "Requirement already satisfied: hf-xet<2.0.0,>=1.1.0 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub>=0.7.0->evaluate) (1.1.0)\n", "Requirement already satisfied: mkl_fft in /usr/local/lib/python3.11/dist-packages (from numpy>=1.17->evaluate) (1.3.8)\n", "Requirement already satisfied: mkl_random in /usr/local/lib/python3.11/dist-packages (from numpy>=1.17->evaluate) (1.2.4)\n", "Requirement already satisfied: mkl_umath in /usr/local/lib/python3.11/dist-packages (from numpy>=1.17->evaluate) (0.1.1)\n", "Requirement already satisfied: mkl in /usr/local/lib/python3.11/dist-packages (from numpy>=1.17->evaluate) (2025.1.0)\n", "Requirement already satisfied: tbb4py in /usr/local/lib/python3.11/dist-packages (from numpy>=1.17->evaluate) (2022.1.0)\n", "Requirement already satisfied: mkl-service in /usr/local/lib/python3.11/dist-packages (from numpy>=1.17->evaluate) (2.4.1)\n", "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.11/dist-packages (from requests>=2.19.0->evaluate) (3.4.2)\n", "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.11/dist-packages (from requests>=2.19.0->evaluate) (3.10)\n", "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.11/dist-packages (from requests>=2.19.0->evaluate) (2.4.0)\n", "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.11/dist-packages (from requests>=2.19.0->evaluate) (2025.4.26)\n", "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.11/dist-packages (from pandas->evaluate) (2.9.0.post0)\n", "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.11/dist-packages (from pandas->evaluate) (2025.2)\n", "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.11/dist-packages (from pandas->evaluate) (2025.2)\n", "Requirement already satisfied: aiohappyeyeballs>=2.3.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]>=2021.05.0->evaluate) (2.6.1)\n", "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.11/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]>=2021.05.0->evaluate) (1.3.2)\n", "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]>=2021.05.0->evaluate) (25.3.0)\n", "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.11/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]>=2021.05.0->evaluate) (1.6.0)\n", "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.11/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]>=2021.05.0->evaluate) (6.4.3)\n", "Requirement already satisfied: propcache>=0.2.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]>=2021.05.0->evaluate) (0.3.1)\n", "Requirement already satisfied: yarl<2.0,>=1.17.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]>=2021.05.0->evaluate) (1.20.0)\n", "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.11/dist-packages (from python-dateutil>=2.8.2->pandas->evaluate) (1.17.0)\n", "Requirement already satisfied: intel-openmp<2026,>=2024 in /usr/local/lib/python3.11/dist-packages (from mkl->numpy>=1.17->evaluate) (2024.2.0)\n", "Requirement already satisfied: tbb==2022.* in /usr/local/lib/python3.11/dist-packages (from mkl->numpy>=1.17->evaluate) (2022.1.0)\n", "Requirement already satisfied: tcmlib==1.* in /usr/local/lib/python3.11/dist-packages (from tbb==2022.*->mkl->numpy>=1.17->evaluate) (1.3.0)\n", "Requirement already satisfied: intel-cmplr-lib-rt in /usr/local/lib/python3.11/dist-packages (from mkl_umath->numpy>=1.17->evaluate) (2024.2.0)\n", "Requirement already satisfied: intel-cmplr-lib-ur==2024.2.0 in /usr/local/lib/python3.11/dist-packages (from intel-openmp<2026,>=2024->mkl->numpy>=1.17->evaluate) (2024.2.0)\n", "Downloading evaluate-0.4.3-py3-none-any.whl (84 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m84.0/84.0 kB\u001b[0m \u001b[31m3.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hInstalling collected packages: evaluate\n", "Successfully installed evaluate-0.4.3\n" ] } ], "source": [ "!pip install evaluate\n" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "collapsed": true, "execution": { "iopub.execute_input": "2025-06-12T17:05:43.196795Z", "iopub.status.busy": "2025-06-12T17:05:43.196573Z", "iopub.status.idle": "2025-06-12T17:05:56.234877Z", "shell.execute_reply": "2025-06-12T17:05:56.234133Z", "shell.execute_reply.started": "2025-06-12T17:05:43.196773Z" }, "jupyter": { "outputs_hidden": true }, "trusted": true }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n", "To disable this warning, you can either:\n", "\t- Avoid using `tokenizers` before the fork if possible\n", "\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Collecting rouge_score\n", " Downloading rouge_score-0.1.2.tar.gz (17 kB)\n", " Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", "Requirement already satisfied: absl-py in /usr/local/lib/python3.11/dist-packages (from rouge_score) (1.4.0)\n", "Requirement already satisfied: nltk in /usr/local/lib/python3.11/dist-packages (from rouge_score) (3.9.1)\n", "Requirement already satisfied: numpy in /usr/local/lib/python3.11/dist-packages (from rouge_score) (1.26.4)\n", "Requirement already satisfied: six>=1.14.0 in /usr/local/lib/python3.11/dist-packages (from rouge_score) (1.17.0)\n", "Requirement already satisfied: click in /usr/local/lib/python3.11/dist-packages (from nltk->rouge_score) (8.1.8)\n", "Requirement already satisfied: joblib in /usr/local/lib/python3.11/dist-packages (from nltk->rouge_score) (1.5.0)\n", "Requirement already satisfied: regex>=2021.8.3 in /usr/local/lib/python3.11/dist-packages (from nltk->rouge_score) (2024.11.6)\n", "Requirement already satisfied: tqdm in /usr/local/lib/python3.11/dist-packages (from nltk->rouge_score) (4.67.1)\n", "Requirement already satisfied: mkl_fft in /usr/local/lib/python3.11/dist-packages (from numpy->rouge_score) (1.3.8)\n", "Requirement already satisfied: mkl_random in /usr/local/lib/python3.11/dist-packages (from numpy->rouge_score) (1.2.4)\n", "Requirement already satisfied: mkl_umath in /usr/local/lib/python3.11/dist-packages (from numpy->rouge_score) (0.1.1)\n", "Requirement already satisfied: mkl in /usr/local/lib/python3.11/dist-packages (from numpy->rouge_score) (2025.1.0)\n", "Requirement already satisfied: tbb4py in /usr/local/lib/python3.11/dist-packages (from numpy->rouge_score) (2022.1.0)\n", "Requirement already satisfied: mkl-service in /usr/local/lib/python3.11/dist-packages (from numpy->rouge_score) (2.4.1)\n", "Requirement already satisfied: intel-openmp<2026,>=2024 in /usr/local/lib/python3.11/dist-packages (from mkl->numpy->rouge_score) (2024.2.0)\n", "Requirement already satisfied: tbb==2022.* in /usr/local/lib/python3.11/dist-packages (from mkl->numpy->rouge_score) (2022.1.0)\n", "Requirement already satisfied: tcmlib==1.* in /usr/local/lib/python3.11/dist-packages (from tbb==2022.*->mkl->numpy->rouge_score) (1.3.0)\n", "Requirement already satisfied: intel-cmplr-lib-rt in /usr/local/lib/python3.11/dist-packages (from mkl_umath->numpy->rouge_score) (2024.2.0)\n", "Requirement already satisfied: intel-cmplr-lib-ur==2024.2.0 in /usr/local/lib/python3.11/dist-packages (from intel-openmp<2026,>=2024->mkl->numpy->rouge_score) (2024.2.0)\n", "Building wheels for collected packages: rouge_score\n", " Building wheel for rouge_score (setup.py) ... \u001b[?25l\u001b[?25hdone\n", " Created wheel for rouge_score: filename=rouge_score-0.1.2-py3-none-any.whl size=24934 sha256=b66536c4a57b6d602a59f82536abf9cdb3d83ae75b752f815f298def1a2891ee\n", " Stored in directory: /root/.cache/pip/wheels/1e/19/43/8a442dc83660ca25e163e1bd1f89919284ab0d0c1475475148\n", "Successfully built rouge_score\n", "Installing collected packages: rouge_score\n", "Successfully installed rouge_score-0.1.2\n" ] } ], "source": [ "!pip install rouge_score" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "execution": { "iopub.execute_input": "2025-06-12T17:05:56.236436Z", "iopub.status.busy": "2025-06-12T17:05:56.236159Z", "iopub.status.idle": "2025-06-12T17:05:57.972541Z", "shell.execute_reply": "2025-06-12T17:05:57.971744Z", "shell.execute_reply.started": "2025-06-12T17:05:56.236413Z" }, "trusted": true }, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "0a0de9e1e5ee4a9d9c1d705fad7fc03d", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Downloading builder script: 0%| | 0.00/6.27k [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "{'rouge1': 0.6666666666666666, 'rouge2': 0.42105263157894735, 'rougeL': 0.6666666666666666, 'rougeLsum': 0.6666666666666666}\n" ] } ], "source": [ "import evaluate\n", "\n", "rouge = evaluate.load(\"rouge\")\n", "\n", "predictions = [\"The use of Instant Messaging is prohibited...\"]\n", "references = [\"The use of Instant Messaging programs by employees during working hours is strictly prohibited...\"]\n", "\n", "results = rouge.compute(predictions=predictions, references=references)\n", "print(results)\n" ] }, { "cell_type": "code", "execution_count": 16, "metadata": { "execution": { "iopub.execute_input": "2025-06-12T17:19:07.923722Z", "iopub.status.busy": "2025-06-12T17:19:07.922912Z", "iopub.status.idle": "2025-06-12T17:19:07.938514Z", "shell.execute_reply": "2025-06-12T17:19:07.937755Z", "shell.execute_reply.started": "2025-06-12T17:19:07.923698Z" }, "trusted": true }, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "a89c2a8663494ab0a27750205864dc51", "version_major": 2, "version_minor": 0 }, "text/plain": [ "VBox(children=(HTML(value='