{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "0c4ecb49-ce58-4b65-849a-760980576e48",
   "metadata": {},
   "source": [
    "# Poro34B Lora fine-tuning with S-Group's data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "5b686006-65a7-43af-8207-1c7309a5e423",
   "metadata": {},
   "outputs": [],
   "source": [
    "# This script finetunes the Poro34B model with 185 Questions and Answers pair"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "defcdb6f-3b69-4b03-b2dc-07c4b3027fd6",
   "metadata": {},
   "source": [
    "## Initialization"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "67f730e6-3467-4a19-ab76-e8baace8e02e",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Requirement already satisfied: peft in /opt/conda/lib/python3.10/site-packages (0.9.0)\n",
      "Requirement already satisfied: numpy>=1.17 in /opt/conda/lib/python3.10/site-packages (from peft) (1.26.3)\n",
      "Requirement already satisfied: packaging>=20.0 in /opt/conda/lib/python3.10/site-packages (from peft) (23.2)\n",
      "Requirement already satisfied: psutil in /opt/conda/lib/python3.10/site-packages (from peft) (5.9.8)\n",
      "Requirement already satisfied: pyyaml in /opt/conda/lib/python3.10/site-packages (from peft) (6.0.1)\n",
      "Requirement already satisfied: torch>=1.13.0 in /opt/conda/lib/python3.10/site-packages (from peft) (2.0.0.post101)\n",
      "Requirement already satisfied: transformers in /opt/conda/lib/python3.10/site-packages (from peft) (4.31.0)\n",
      "Requirement already satisfied: tqdm in /opt/conda/lib/python3.10/site-packages (from peft) (4.66.1)\n",
      "Requirement already satisfied: accelerate>=0.21.0 in /opt/conda/lib/python3.10/site-packages (from peft) (0.21.0)\n",
      "Requirement already satisfied: safetensors in /opt/conda/lib/python3.10/site-packages (from peft) (0.3.3)\n",
      "Requirement already satisfied: huggingface-hub>=0.17.0 in /opt/conda/lib/python3.10/site-packages (from peft) (0.20.2)\n",
      "Requirement already satisfied: filelock in /opt/conda/lib/python3.10/site-packages (from huggingface-hub>=0.17.0->peft) (3.13.1)\n",
      "Requirement already satisfied: fsspec>=2023.5.0 in /opt/conda/lib/python3.10/site-packages (from huggingface-hub>=0.17.0->peft) (2023.6.0)\n",
      "Requirement already satisfied: requests in /opt/conda/lib/python3.10/site-packages (from huggingface-hub>=0.17.0->peft) (2.31.0)\n",
      "Requirement already satisfied: typing-extensions>=3.7.4.3 in /opt/conda/lib/python3.10/site-packages (from huggingface-hub>=0.17.0->peft) (4.5.0)\n",
      "Requirement already satisfied: sympy in /opt/conda/lib/python3.10/site-packages (from torch>=1.13.0->peft) (1.12)\n",
      "Requirement already satisfied: networkx in /opt/conda/lib/python3.10/site-packages (from torch>=1.13.0->peft) (3.2.1)\n",
      "Requirement already satisfied: jinja2 in /opt/conda/lib/python3.10/site-packages (from torch>=1.13.0->peft) (3.1.3)\n",
      "Requirement already satisfied: regex!=2019.12.17 in /opt/conda/lib/python3.10/site-packages (from transformers->peft) (2023.12.25)\n",
      "Requirement already satisfied: tokenizers!=0.11.3,<0.14,>=0.11.1 in /opt/conda/lib/python3.10/site-packages (from transformers->peft) (0.13.3)\n",
      "Requirement already satisfied: MarkupSafe>=2.0 in /opt/conda/lib/python3.10/site-packages (from jinja2->torch>=1.13.0->peft) (2.1.4)\n",
      "Requirement already satisfied: charset-normalizer<4,>=2 in /opt/conda/lib/python3.10/site-packages (from requests->huggingface-hub>=0.17.0->peft) (3.3.2)\n",
      "Requirement already satisfied: idna<4,>=2.5 in /opt/conda/lib/python3.10/site-packages (from requests->huggingface-hub>=0.17.0->peft) (3.6)\n",
      "Requirement already satisfied: urllib3<3,>=1.21.1 in /opt/conda/lib/python3.10/site-packages (from requests->huggingface-hub>=0.17.0->peft) (1.26.18)\n",
      "Requirement already satisfied: certifi>=2017.4.17 in /opt/conda/lib/python3.10/site-packages (from requests->huggingface-hub>=0.17.0->peft) (2023.11.17)\n",
      "Requirement already satisfied: mpmath>=0.19 in /opt/conda/lib/python3.10/site-packages (from sympy->torch>=1.13.0->peft) (1.3.0)\n"
     ]
    }
   ],
   "source": [
    "# pip install peft, all other Python libraries are already in AWS image\n",
    "!pip install peft"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "80b24df2-140b-4792-aaf1-6f6aff92ece8",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2024-02-29 16:17:22.775245: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n",
      "To enable the following instructions: SSE4.1 SSE4.2 AVX AVX2 AVX512F FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n"
     ]
    }
   ],
   "source": [
    "import torch\n",
    "import json\n",
    "from transformers import AutoModelForCausalLM, AutoTokenizer \n",
    "from transformers import TrainingArguments, Trainer\n",
    "from transformers import pipeline\n",
    "from peft import  get_peft_model, PromptTuningConfig, TaskType, PromptTuningInit\n",
    "from datasets import load_dataset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "d31adfc6-a460-419e-871b-d0437501b026",
   "metadata": {},
   "outputs": [],
   "source": [
    "# this checks wether we have GPU\n",
    "device = torch.device(\"cuda\") if torch.cuda.is_available() else torch.device(\"cpu\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "2c5a9b07-c92b-4d1d-b5b5-96e8c234e14f",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "cpu\n"
     ]
    }
   ],
   "source": [
    "print(device)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "6ea88a10-f5f1-4342-939b-60d2b9c5bb91",
   "metadata": {},
   "source": [
    "## Foundation model import"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "2c0f7b3a-9d56-46ce-9dc8-5fe40b2628a6",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Foundation model\n",
    "model_name='LumiOpen/Poro-34B'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "4e4c9089-a195-4fd7-91b2-6240cafb4989",
   "metadata": {},
   "outputs": [],
   "source": [
    "tokenizer = AutoTokenizer.from_pretrained(model_name)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "a42e0fb6-40d4-483b-a034-84ff351c021d",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "34a67d537808415ab77b583333186ae3",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Loading checkpoint shards:   0%|          | 0/14 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "branch = \"1000B\"\n",
    "model = AutoModelForCausalLM.from_pretrained(model_name,\n",
    "    torch_dtype=torch.bfloat16,\n",
    "    revision=branch,\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "258df7e6-27c1-48a2-b20a-d377dc885884",
   "metadata": {},
   "source": [
    "## Setting up the Lora parameters"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "63151e65-6bff-4b65-a8ae-af4d6c53036f",
   "metadata": {},
   "outputs": [],
   "source": [
    "from peft import LoraConfig, get_peft_model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "f35f934f-23c6-47db-95bb-df20526e29e7",
   "metadata": {},
   "outputs": [],
   "source": [
    "config = LoraConfig(\n",
    "    r=8,\n",
    "    lora_alpha=8,\n",
    "    target_modules=[\"query_key_value\"],\n",
    "    lora_dropout=0.05,\n",
    "    bias=\"none\",\n",
    "    task_type=\"CAUSAL_LM\"\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "a62cb983-6e28-40f1-9f8d-64a1a6ccd0f3",
   "metadata": {},
   "outputs": [],
   "source": [
    "peft_model = get_peft_model(model, config)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "fe7e7078-2998-4f17-abda-f03a32e04735",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "trainable params: 12386304\n",
      "all params: 34229336064\n",
      "trainable: 0.04%\n"
     ]
    }
   ],
   "source": [
    "trainable_params = 0\n",
    "all_param = 0\n",
    "\n",
    "# iterating over all parameters\n",
    "for _, param in peft_model.named_parameters():\n",
    "    # adding parameters to total\n",
    "    all_param += param.numel()\n",
    "    # adding parameters to trainable if they require a graident\n",
    "    if param.requires_grad:\n",
    "        trainable_params += param.numel()\n",
    "\n",
    "# print number of trainable parameters\n",
    "print(f\"trainable params: {trainable_params}\")\n",
    "print(f\"all params: {all_param}\")\n",
    "print(f\"trainable: {100 * trainable_params / all_param:.2f}%\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "d029921d-60db-43ad-ae8b-2ab9910bd490",
   "metadata": {},
   "source": [
    "## Preparing the training data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "216aa90b-a87d-4a37-b178-e7e83bf987ce",
   "metadata": {},
   "outputs": [],
   "source": [
    "# prepare the data for training\n",
    "def prepare_train_data(data):\n",
    "    text_input = data['text']\n",
    "    tokenized_input = tokenizer(text_input, return_tensors='pt', padding=True)\n",
    "    tokenized_input['labels'] = tokenized_input['input_ids']\n",
    "    return tokenized_input"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "5551fac2-9a26-4a86-b8b4-d2f572ecfaa9",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "711294e3ec68409abdc6a3f29c271d3a",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Generating train split: 0 examples [00:00, ? examples/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "dataset = load_dataset(\"json\", data_files=\"prompts_185.json\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "dfa8382a-3e66-4433-b6dd-97d59a7945f6",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "72acd5c60a62468c86206a93b17a87ab",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Map:   0%|          | 0/185 [00:00<?, ? examples/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "train_dataset = dataset['train'].map(prepare_train_data, batched=True, remove_columns=[\"text\"])"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "319de4fe-f5e0-4ea6-9a61-6f18c241bd02",
   "metadata": {},
   "source": [
    "## Setting up the training parameters"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "9bb42764-af93-463f-8cf6-68707f21151b",
   "metadata": {},
   "outputs": [],
   "source": [
    "from transformers import DataCollatorForLanguageModeling"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "4b8a94c9-2648-4626-9238-4475645aa695",
   "metadata": {},
   "outputs": [],
   "source": [
    "trainer = Trainer(\n",
    "    model=peft_model,\n",
    "    train_dataset=train_dataset,\n",
    "    args=TrainingArguments(\n",
    "        per_device_train_batch_size=4,\n",
    "        gradient_accumulation_steps=4,\n",
    "        warmup_steps=20,\n",
    "        max_steps=20,\n",
    "        learning_rate=1e-3,\n",
    "        logging_steps=1,\n",
    "        output_dir='outputs',\n",
    "    ),\n",
    "    data_collator=DataCollatorForLanguageModeling(tokenizer, mlm=False)\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "fbd62db8-65e3-4d05-b11e-179aaf8f0e65",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/opt/conda/lib/python3.10/site-packages/transformers/optimization.py:411: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
      "  warnings.warn(\n",
      "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mtimo-au-laine\u001b[0m. Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "wandb version 0.16.3 is available!  To upgrade, please run:\n",
       " $ pip install wandb --upgrade"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "Tracking run with wandb version 0.16.2"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "Run data is saved locally in <code>/home/sagemaker-user/wandb/run-20240229_162718-2nyysvnx</code>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "Syncing run <strong><a href='https://wandb.ai/timo-au-laine/huggingface/runs/2nyysvnx' target=\"_blank\">trim-dust-9</a></strong> to <a href='https://wandb.ai/timo-au-laine/huggingface' target=\"_blank\">Weights & Biases</a> (<a href='https://wandb.me/run' target=\"_blank\">docs</a>)<br/>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       " View project at <a href='https://wandb.ai/timo-au-laine/huggingface' target=\"_blank\">https://wandb.ai/timo-au-laine/huggingface</a>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       " View run at <a href='https://wandb.ai/timo-au-laine/huggingface/runs/2nyysvnx' target=\"_blank\">https://wandb.ai/timo-au-laine/huggingface/runs/2nyysvnx</a>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "You're using a BloomTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "\n",
       "    <div>\n",
       "      \n",
       "      <progress value='19' max='20' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
       "      [19/20 5:10:38 < 18:16, 0.00 it/s, Epoch 1.53/2]\n",
       "    </div>\n",
       "    <table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       " <tr style=\"text-align: left;\">\n",
       "      <th>Step</th>\n",
       "      <th>Training Loss</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>2.511700</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2</td>\n",
       "      <td>2.543000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>3</td>\n",
       "      <td>2.566400</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>4</td>\n",
       "      <td>2.472700</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>5</td>\n",
       "      <td>2.500000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>6</td>\n",
       "      <td>2.500000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>7</td>\n",
       "      <td>2.625000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>8</td>\n",
       "      <td>2.277300</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>9</td>\n",
       "      <td>2.359400</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>10</td>\n",
       "      <td>2.175800</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>11</td>\n",
       "      <td>2.293000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>12</td>\n",
       "      <td>2.132800</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>13</td>\n",
       "      <td>1.974600</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>14</td>\n",
       "      <td>2.076200</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>15</td>\n",
       "      <td>1.869100</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>16</td>\n",
       "      <td>1.640600</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>17</td>\n",
       "      <td>1.769500</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table><p>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "trainer.train()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "1ed2cf09-3683-4016-88d9-9ada1ddb4345",
   "metadata": {},
   "source": [
    "## Saving the finetuned model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c37902bf-47e5-4f89-9128-a6b7d91cb437",
   "metadata": {},
   "outputs": [],
   "source": [
    "model_id185 = \"Poro-34B-Lora-185\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "163b54c4-3027-4e0d-9d52-7e3d698020da",
   "metadata": {},
   "outputs": [],
   "source": [
    "peft_model.save_pretrained(model_id185)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ec432db5-4f0c-43c7-b4e4-ef087f057bd0",
   "metadata": {},
   "outputs": [],
   "source": [
    "!ls -lh {model_id185}  # Lora parameters file size"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "11460d4e-3e11-4fdb-b134-61b45bb84018",
   "metadata": {},
   "source": [
    "## Testing"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "eb6a1213-a7ab-4bb5-8ffc-0e2666286dc6",
   "metadata": {},
   "outputs": [],
   "source": [
    "def generate_output(model, inputs, max_new_tokens=100):\n",
    "    outputs = model.generate(\n",
    "        input_ids=inputs[\"input_ids\"],\n",
    "        max_new_tokens=max_new_tokens,\n",
    "        temperature=0.1,\n",
    "    )\n",
    "    return outputs"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "0a844312-2a1e-4c76-9078-96506b252522",
   "metadata": {},
   "source": [
    "### Original model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "d38bbed0-e938-43ef-b816-b5e0f9d066fd",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['Given the question delimited by triple backticks ```{ Mikä on osuusmaksu ja miksi se pitää maksaa? }```, what is the answer? Answer: Osuusmaksu on osuuskaupan osuusmaksu, joka maksetaan liittymisen yhteydessä. Osuusmaksu on sijoitus osuuskauppaan ja se palautetaan, kun jäsen eroaa osuuskaupasta. Osuusmaksun suuruus vaihtelee osuuskaupoittain. Osuusmaksun suuruus on 100 euroa. Osuusmaksu on sijoitus osuuskauppaan ja se palautetaan, kun jäsen eroaa osuuskaupasta. Osuusmaksun suuruus vaihtelee osuuskaupoittain. Osuusmaksun suuruus on 100 euroa. Osuusmaksu on sijoitus osuuskauppaan ja se palautetaan, kun jäsen eroaa osuuskaupasta. Osuusmaksun suuruus vaihtelee osuuskaupoittain.']\n"
     ]
    }
   ],
   "source": [
    "prompt = tokenizer('Given the question delimited by triple backticks ```{ Mikä on osuusmaksu ja miksi se pitää maksaa? }```, what is the answer? Answer:', return_tensors=\"pt\")\n",
    "result = generate_output(model,prompt)\n",
    "print(tokenizer.batch_decode(result, skip_special_tokens=True))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "d6091480-e399-4890-bd32-7a51d1cbb50f",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['Given the question delimited by triple backticks ```{ Mistä näen S-Tilini tapahtumat ja saldon? }```, what is the answer? Answer: S-Pankin verkkopankissa. The answer is the first sentence in the paragraph. The answer is the first sentence in the paragraph. The answer is the first sentence in the paragraph. The answer is the first sentence in the paragraph. The answer is the first sentence in the paragraph. The answer is the first sentence in the paragraph. The answer is the first sentence in the paragraph. The answer is the first sentence in the paragraph. The answer is the first sentence in the paragraph. The answer is the']\n"
     ]
    }
   ],
   "source": [
    "prompt = tokenizer('Given the question delimited by triple backticks ```{ Mistä näen S-Tilini tapahtumat ja saldon? }```, what is the answer? Answer:', return_tensors=\"pt\")\n",
    "result = generate_output(model,prompt)\n",
    "print(tokenizer.batch_decode(result, skip_special_tokens=True))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "ae3c3d6a-2b07-4e46-9ddc-dccadfd07196",
   "metadata": {},
   "source": [
    "### Finetuned model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "id": "4cf53f39-ad3f-43e2-8daa-79853b054cd2",
   "metadata": {},
   "outputs": [],
   "source": [
    "from peft import PeftModel"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "id": "142bf57d-cffc-47b2-ae91-8a5420c46d32",
   "metadata": {},
   "outputs": [],
   "source": [
    "loaded_model = PeftModel.from_pretrained(model,model_id185,is_trainable=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "id": "c3cacd26-edff-494c-9428-55b7659988de",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['Given the question delimited by triple backticks ```{ Mikä on osuusmaksu ja miksi se pitää maksaa? }```, what is the answer? Answer: {Osuusmaksun suuruus on 100 euroa. Se maksetaan vain kerran, ja se palautetaan osuuskaupan asiakasomistajuudesta luovuttaessa. Osuusmaksun voi maksaa kerralla kokonaan tai kerryttää sitä Bonuksilla. Osuusmaksun voi maksaa myös S-Etukortilla, jolloin se veloitetaan S-Etukorttiin liitetyltä maksuvälineeltä.}\\n\\n{Osuusmaksun voi maksaa myös S-ryhmän lahjakortilla. Lahjakortin saldo ei kuitenkaan kerrytä Bonusta.}\\n\\n{Osuusmaksun voi maksaa myös S-ryhmän lahjak']\n"
     ]
    }
   ],
   "source": [
    "prompt = tokenizer('Given the question delimited by triple backticks ```{ Mikä on osuusmaksu ja miksi se pitää maksaa? }```, what is the answer? Answer:', return_tensors=\"pt\")\n",
    "result = generate_output(loaded_model,prompt)\n",
    "print(tokenizer.batch_decode(result, skip_special_tokens=True))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "id": "57029580-6429-4c43-8482-1a052839bc05",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['Given the question delimited by triple backticks ```{ Mistä näen S-Tilini tapahtumat ja saldon? }```, what is the answer? Answer: {S-Tilin tapahtumat ja saldo ovat nähtävissä S-mobiilissa ja S-Pankin verkkopankissa.}\\n\\n{S-mobiilissa S-Tilin tapahtumat ja saldo ovat nähtävissä S-Etukortti-osiossa.}\\n\\n{S-Pankin verkkopankissa S-Tilin tapahtumat ja saldo ovat nähtävissä Tilit-osiossa.}\\n\\n{S-Tilin tapahtumat ja saldo ovat nähtävissä myös S-mobiilin ja S-Pankin verkkopankin S-Etukortti- ja Tilit-osiossa, kun olet']\n"
     ]
    }
   ],
   "source": [
    "prompt = tokenizer('Given the question delimited by triple backticks ```{ Mistä näen S-Tilini tapahtumat ja saldon? }```, what is the answer? Answer:', return_tensors=\"pt\")\n",
    "result = generate_output(loaded_model,prompt)\n",
    "print(tokenizer.batch_decode(result, skip_special_tokens=True))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "166c476c-01a2-49cc-b03f-6cb1d9ae6136",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "897448ea-f680-4a5b-a148-f65df6704bac",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.13"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}