kingabzpro
/

DeepSeek-R1-0528-Qwen3-8B-Medical-Reasoning

+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Setting Up"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%capture\n",
+    "%pip install -U transformers==4.52.1\n",
+    "%pip install -U datasets \n",
+    "%pip install -U accelerate \n",
+    "%pip install -U peft \n",
+    "%pip install -U trl \n",
+    "%pip install -U bitsandbytes"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured.\n"
+     ]
+    }
+   ],
+   "source": [
+    "from huggingface_hub import login\n",
+    "import os\n",
+    "\n",
+    "hf_token = os.environ.get(\"HF_TOKEN\")\n",
+    "login(hf_token)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Loading the model and tokenizer"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {
+    "_cell_guid": "82e08da0-4ee2-4235-a54f-c2bed3609a2b",
+    "_uuid": "059b3da3-4a60-40ed-8a3c-2c395b2c3a8d",
+    "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    }
+   },
+   "outputs": [],
+   "source": [
+    "from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig\n",
+    "import torch\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "bnb_config = BitsAndBytesConfig(\n",
+    "    load_in_4bit=True,\n",
+    "    bnb_4bit_use_double_quant=False,\n",
+    "    bnb_4bit_quant_type=\"nf4\",\n",
+    "    bnb_4bit_compute_dtype=torch.bfloat16,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Unrecognized keys in `rope_scaling` for 'rope_type'='yarn': {'attn_factor'}\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "9ebe60e4563942ddb0d3674d6788bceb",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "# Load tokenizer & model\n",
+    "\n",
+    "model_dir = \"deepseek-ai/DeepSeek-R1-0528-Qwen3-8B\"\n",
+    "\n",
+    "tokenizer = AutoTokenizer.from_pretrained(model_dir, use_fast=True)\n",
+    "\n",
+    "model = AutoModelForCausalLM.from_pretrained(\n",
+    "    model_dir,\n",
+    "    quantization_config=bnb_config,   \n",
+    "    device_map=\"auto\",  \n",
+    "    torch_dtype=torch.bfloat16,\n",
+    "    trust_remote_code=True             \n",
+    ")\n",
+    "\n",
+    "model.config.use_cache = False\n",
+    "model.config.pretraining_tp = 1"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fri May 30 11:42:48 2025       \n",
+      "+-----------------------------------------------------------------------------------------+\n",
+      "| NVIDIA-SMI 565.57.01              Driver Version: 565.57.01      CUDA Version: 12.7     |\n",
+      "|-----------------------------------------+------------------------+----------------------+\n",
+      "| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |\n",
+      "| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |\n",
+      "|                                         |                        |               MIG M. |\n",
+      "|=========================================+========================+======================|\n",
+      "|   0  NVIDIA GeForce RTX 4090        On  |   00000000:06:00.0 Off |                  Off |\n",
+      "|  0%   27C    P0             58W /  450W |    8360MiB /  24564MiB |      0%      Default |\n",
+      "|                                         |                        |                  N/A |\n",
+      "+-----------------------------------------+------------------------+----------------------+\n",
+      "                                                                                         \n",
+      "+-----------------------------------------------------------------------------------------+\n",
+      "| Processes:                                                                              |\n",
+      "|  GPU   GI   CI        PID   Type   Process name                              GPU Memory |\n",
+      "|        ID   ID                                                               Usage      |\n",
+      "|=========================================================================================|\n",
+      "+-----------------------------------------------------------------------------------------+\n"
+     ]
+    }
+   ],
+   "source": [
+    "!nvidia-smi"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Loading and processing the dataset"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "train_prompt_style = \"\"\"\n",
+    "Please answer with one of the options in the bracket. Write reasoning in between <analysis></analysis>. Write the answer in between <answer></answer>.\n",
+    "### Question:\n",
+    "{}\n",
+    "\n",
+    "### Response:\n",
+    "{}\"\"\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "EOS_TOKEN = tokenizer.eos_token  # Must add EOS_TOKEN\n",
+    "\n",
+    "def formatting_prompts_func(examples):\n",
+    "    inputs = examples[\"input\"]\n",
+    "    outputs = examples[\"output\"]\n",
+    "    texts = []\n",
+    "    for question, response in zip(inputs, outputs):\n",
+    "        # Remove the \"Q:\" prefix from the question\n",
+    "        question = question.replace(\"Q:\", \"\")\n",
+    "        \n",
+    "        # Append the EOS token to the response if it's not already there\n",
+    "        if not response.endswith(tokenizer.eos_token):\n",
+    "            response += tokenizer.eos_token\n",
+    "            \n",
+    "        text = train_prompt_style.format(question, response)\n",
+    "        texts.append(text)\n",
+    "    return {\"text\": texts}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Please answer with one of the options in the bracket. Write reasoning in between <analysis></analysis>. Write the answer in between <answer></answer>.\n",
+      "### Question:\n",
+      "A research group wants to assess the relationship between childhood diet and cardiovascular disease in adulthood. A prospective cohort study of 500 children between 10 to 15 years of age is conducted in which the participants' diets are recorded for 1 year and then the patients are assessed 20 years later for the presence of cardiovascular disease. A statistically significant association is found between childhood consumption of vegetables and decreased risk of hyperlipidemia and exercise tolerance. When these findings are submitted to a scientific journal, a peer reviewer comments that the researchers did not discuss the study's validity. Which of the following additional analyses would most likely address the concerns about this study's design?? \n",
+      "{'A': 'Blinding', 'B': 'Crossover', 'C': 'Matching', 'D': 'Stratification', 'E': 'Randomization'},\n",
+      "\n",
+      "### Response:\n",
+      "<analysis>\n",
+      "\n",
+      "This is a question about assessing the validity of a prospective cohort study. The study found an association between childhood diet and cardiovascular disease in adulthood. The peer reviewer is concerned that the researchers did not discuss the validity of the study design. \n",
+      "\n",
+      "To address concerns about validity in a prospective cohort study, we need to consider potential confounding factors that could influence the results. The additional analysis suggested should help control for confounding.\n",
+      "</analysis>\n",
+      "<answer>\n",
+      "D: Stratification\n",
+      "</answer><｜end▁of▁sentence｜>\n"
+     ]
+    }
+   ],
+   "source": [
+    "from datasets import load_dataset\n",
+    "\n",
+    "dataset = load_dataset(\n",
+    "    \"mamachang/medical-reasoning\",\n",
+    "    split=\"train\",\n",
+    "    trust_remote_code=True,\n",
+    ")\n",
+    "dataset = dataset.map(\n",
+    "    formatting_prompts_func,\n",
+    "    batched=True,\n",
+    ")\n",
+    "print(dataset[\"text\"][10])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from transformers import DataCollatorForLanguageModeling\n",
+    "\n",
+    "data_collator = DataCollatorForLanguageModeling(\n",
+    "    tokenizer=tokenizer,\n",
+    "    mlm=False\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Model inference before fine-tuning"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "inference_prompt_style = \"\"\"\n",
+    "Please answer with one of the options in the bracket. Write reasoning in between <analysis></analysis>. Write the answer in between <answer></answer>.\n",
+    "\n",
+    "### Question:\n",
+    "{}\n",
+    "\n",
+    "### Response:\n",
+    "<analysis>\n",
+    "\"\"\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "<analysis>\n",
+      "<think>\n",
+      "First, the question is about a prospective cohort study. In this study, 500 children aged 10 to 15 are followed for 20 years, with their diets recorded for one year, and then they're assessed for cardiovascular disease. A statistically significant association is found between vegetable consumption and decreased risk of hyperlipidemia and better exercise tolerance.\n",
+      "\n",
+      "The peer reviewer is concerned about the study's validity, and I need to figure out which additional analysis would most likely address those concerns. The options are: A) Blinding, B) Crossover, C) Matching, D) Stratification, E) Randomization.\n",
+      "\n",
+      "Validity in a cohort study often refers to how well the study is designed to minimize bias and confounding factors. Since it's a prospective cohort study, the main issue might be confounding or selection bias.\n",
+      "\n",
+      "The study is assessing diet and cardiovascular disease. Diet is a potential confounder because it might be related to other factors that affect cardiovascular disease. For example, children who eat more vegetables might also have better overall health, exercise habits, or socioeconomic status, which could influence outcomes later.\n",
+      "\n",
+      "The reviewer didn't specify what the concern is, but based on the context, it's likely about confounding or bias in the study design. The options are all methods that can be used to address validity issues.\n",
+      "\n",
+      "Let me review each option:\n",
+      "\n",
+      "- A) Blinding: This is typically used in randomized controlled trials to prevent bias in outcome assessment. In a cohort study, blinding might not be directly applicable, especially if the exposure is diet, which is recorded at the beginning and not something that can be blinded during assessment.\n",
+      "\n",
+      "- B) Crossover: This is a design where participants switch between different interventions or conditions. But this is a cohort study, not an intervention study, so crossover might not be relevant. The study is about natural history, not randomizing interventions.\n",
+      "\n",
+      "- C) Matching: In cohort studies, matching is often used in case-control studies to control for confounding, but in a prospective cohort study, matching is not typically done. Cohort studies usually don't involve matching; it's more for case-control.\n",
+      "\n",
+      "- D) Stratification: This is a method to control for confounding by dividing the study population into strata based on potential confounders and analyzing within each stratum. For example, stratifying by age, sex, or other factors to see if the association holds.\n",
+      "\n",
+      "- E) Randomization: This is key in randomized controlled trials to assign participants to groups randomly, reducing selection bias. In a cohort study, randomization isn't used because it's observational; participants are followed based on their existing exposures.\n",
+      "\n",
+      "The study is a prospective cohort study, so it's observational. The researchers recorded diets and then followed up. The concern is about validity, which could be due to confounding factors.\n",
+      "\n",
+      "In cohort studies, to address validity, we often use methods to control for confounding, like stratification or adjustment in statistical models.\n",
+      "\n",
+      "But the question asks for an \"additional analysis\" that would address the concerns. So, it's not about the design itself, but an analysis that could be added.\n",
+      "\n",
+      "Now, looking at the options:\n",
+      "\n",
+      "- Blinding: Not really applicable to a cohort study for diet exposure.\n",
+      "\n",
+      "- Crossover: Doesn't fit, as it's not an intervention study.\n",
+      "\n",
+      "- Matching: In cohort studies, matching is not standard; it's more for case-control. But sometimes, in cohort studies, matching can be used if it's a sub-study or something, but generally, it's not.\n",
+      "\n",
+      "- Stratification: This is a common analysis in cohort studies to check for confounding. For example, if there's an association, stratifying by other factors can show if it's consistent or biased.\n",
+      "\n",
+      "- Randomization: In a cohort study, randomization isn't part of the design; it's retrospective or something. But the study is prospective, meaning they start from the beginning, but it's still observational.\n",
+      "\n",
+      "The peer reviewer might be concerned that the association is due to confounding, and without randomization, it's hard to establish causality.\n",
+      "\n",
+      "But randomization is for the design, not an analysis. The question says \"additional analyses,\" so randomization might not be the right choice because it's a method to be implemented during the study, not an analysis.\n",
+      "\n",
+      "Let's read the question carefully: \"which of the following additional analyses would most likely address the concerns about this study's design?\"\n",
+      "\n",
+      "So, it's specifically about analyses, not design changes.\n",
+      "\n",
+      "In the context of a cohort study, analyses to address validity often include controlling for confounders.\n",
+      "\n",
+      "Stratification is an analysis method where you divide the data into strata and compare.\n",
+      "\n",
+      "Randomization is a design element, not an analysis.\n",
+      "\n",
+      "But option E is \"Randomization,\" which might be confusing.\n",
+      "\n",
+      "Perhaps the reviewer is concerned about selection bias or something, and randomization could be a way to address it if the study were designed differently, but it's not.\n",
+      "\n",
+      "Let's think about what the concerns might be.\n",
+      "\n",
+      "The study is a prospective cohort, so they probably randomized the assignment of diet recording or something? No, the diet is recorded for one year, and then they follow up. But the association is found, and validity might be questioned if there were unmeasured confounders.\n",
+      "\n",
+      "Common concerns in cohort studies include:\n",
+      "\n",
+      "- Confounding by factors not measured\n",
+      "\n",
+      "- Loss to follow-up\n",
+      "\n",
+      "- Measurement error\n",
+      "\n",
+      "- Selection bias if the cohort wasn't representative\n",
+      "\n",
+      "But the options are given, so I need to see which one is most relevant.\n",
+      "\n",
+      "Let's consider each option:\n",
+      "\n",
+      "- A) Blinding: In a cohort study, blinding could be for the outcome assessors to reduce bias, but the exposure is diet, which is recorded at the beginning, so blinding might not help with confounding.\n",
+      "\n",
+      "- B) Crossover: Not applicable, as it's not an intervention study\n"
+     ]
+    }
+   ],
+   "source": [
+    "question = dataset[10]['input']\n",
+    "question = question.replace(\"Q:\", \"\")\n",
+    "\n",
+    "inputs = tokenizer(\n",
+    "    [inference_prompt_style.format(question) + tokenizer.eos_token],\n",
+    "    return_tensors=\"pt\"\n",
+    ").to(\"cuda\")\n",
+    "\n",
+    "outputs = model.generate(\n",
+    "    input_ids=inputs.input_ids,\n",
+    "    attention_mask=inputs.attention_mask,\n",
+    "    max_new_tokens=1200,\n",
+    "    eos_token_id=tokenizer.eos_token_id,\n",
+    "    use_cache=True,\n",
+    ")\n",
+    "response = tokenizer.batch_decode(outputs, skip_special_tokens=True)\n",
+    "print(response[0].split(\"### Response:\")[1])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Setting up the model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from peft import LoraConfig, get_peft_model\n",
+    "\n",
+    "# LoRA config\n",
+    "peft_config = LoraConfig(\n",
+    "    lora_alpha=16,                           # Scaling factor for LoRA\n",
+    "    lora_dropout=0.05,                       # Add slight dropout for regularization\n",
+    "    r=64,                                    # Rank of the LoRA update matrices\n",
+    "    bias=\"none\",                             # No bias reparameterization\n",
+    "    task_type=\"CAUSAL_LM\",                   # Task type: Causal Language Modeling\n",
+    "    target_modules=[\n",
+    "        \"q_proj\",\n",
+    "        \"k_proj\",\n",
+    "        \"v_proj\",\n",
+    "        \"o_proj\",\n",
+    "        \"gate_proj\",\n",
+    "        \"up_proj\",\n",
+    "        \"down_proj\",\n",
+    "    ],  # Target modules for LoRA\n",
+    ")\n",
+    "\n",
+    "model = get_peft_model(model, peft_config)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.\n"
+     ]
+    }
+   ],
+   "source": [
+    "from trl import SFTTrainer\n",
+    "from transformers import TrainingArguments\n",
+    "\n",
+    "\n",
+    "# Training Arguments\n",
+    "training_arguments = TrainingArguments(\n",
+    "    output_dir=\"DeepSeek-R1-0528-Qwen3-8B-Medical-Reasoning\",\n",
+    "    per_device_train_batch_size=1,\n",
+    "    per_device_eval_batch_size=1,\n",
+    "    gradient_accumulation_steps=2,\n",
+    "    optim=\"paged_adamw_32bit\",\n",
+    "    num_train_epochs=1,\n",
+    "    logging_steps=0.2,\n",
+    "    warmup_steps=10,\n",
+    "    logging_strategy=\"steps\",\n",
+    "    learning_rate=2e-4,\n",
+    "    fp16=False,\n",
+    "    bf16=False,\n",
+    "    group_by_length=True,\n",
+    "    report_to=\"none\"\n",
+    ")\n",
+    "\n",
+    "# Initialize the Trainer\n",
+    "trainer = SFTTrainer(\n",
+    "    model=model,\n",
+    "    args=training_arguments,\n",
+    "    train_dataset=dataset,\n",
+    "    peft_config=peft_config,\n",
+    "    data_collator=data_collator,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Model Training"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "\n",
+       "    <div>\n",
+       "      \n",
+       "      <progress value='1851' max='1851' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
+       "      [1851/1851 15:16, Epoch 1/1]\n",
+       "    </div>\n",
+       "    <table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       " <tr style=\"text-align: left;\">\n",
+       "      <th>Step</th>\n",
+       "      <th>Training Loss</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <td>371</td>\n",
+       "      <td>1.003000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>742</td>\n",
+       "      <td>0.942100</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>1113</td>\n",
+       "      <td>0.918800</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>1484</td>\n",
+       "      <td>0.895600</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table><p>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Unrecognized keys in `rope_scaling` for 'rope_type'='yarn': {'attn_factor'}\n",
+      "Unrecognized keys in `rope_scaling` for 'rope_type'='yarn': {'attn_factor'}\n",
+      "Unrecognized keys in `rope_scaling` for 'rope_type'='yarn': {'attn_factor'}\n",
+      "Unrecognized keys in `rope_scaling` for 'rope_type'='yarn': {'attn_factor'}\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "TrainOutput(global_step=1851, training_loss=0.9323031581588077, metrics={'train_runtime': 917.8344, 'train_samples_per_second': 4.033, 'train_steps_per_second': 2.017, 'total_flos': 7.739078550532915e+16, 'train_loss': 0.9323031581588077})"
+      ]
+     },
+     "execution_count": 15,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "import gc, torch\n",
+    "gc.collect()\n",
+    "torch.cuda.empty_cache()\n",
+    "model.config.use_cache = False\n",
+    "trainer.train()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Model inference after fine-tuning"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "<analysis>\n",
+      "This is a question about evaluating the validity of a prospective cohort study design. The study looked at childhood diet and cardiovascular disease in adulthood. The peer reviewer raised concerns about the study's validity, likely because it was an observational study without randomization or control group. \n",
+      "\n",
+      "The question asks which additional analysis would most likely address the concerns about the study's design. The choices include blinding, crossover, matching, stratification, and randomization. \n",
+      "\n",
+      "Randomization is the key to reducing bias in observational studies. By randomly assigning participants to different groups, you can control for confounding factors. The other choices do not directly address the lack of randomization in the original study design.\n",
+      "</analysis>\n",
+      "<answer>\n",
+      "E: Randomization\n",
+      "</answer>\n"
+     ]
+    }
+   ],
+   "source": [
+    "question = dataset[10]['input']\n",
+    "question = question.replace(\"Q:\", \"\")\n",
+    "\n",
+    "inputs = tokenizer(\n",
+    "    [inference_prompt_style.format(question,) + tokenizer.eos_token],\n",
+    "    return_tensors=\"pt\"\n",
+    ").to(\"cuda\")\n",
+    "\n",
+    "outputs = model.generate(\n",
+    "    input_ids=inputs.input_ids,\n",
+    "    attention_mask=inputs.attention_mask,\n",
+    "    max_new_tokens=1200,\n",
+    "    eos_token_id=tokenizer.eos_token_id,\n",
+    "    use_cache=True,\n",
+    ")\n",
+    "response = tokenizer.batch_decode(outputs, skip_special_tokens=True)\n",
+    "print(response[0].split(\"### Response:\")[1])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "<analysis>\n",
+      "\n",
+      "This is a question about assessing the validity of a prospective cohort study. The study found an association between childhood diet and cardiovascular disease in adulthood. The peer reviewer is concerned that the researchers did not discuss the validity of the study design. \n",
+      "\n",
+      "To address concerns about validity in a prospective cohort study, we need to consider potential confounding factors that could influence the results. The additional analysis suggested should help control for confounding.\n",
+      "</analysis>\n",
+      "<answer>\n",
+      "D: Stratification\n",
+      "</answer>\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(dataset[10]['output'])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "<analysis>\n",
+      "<answer>\n",
+      "D: Distal symmetric sensorimotor polyneuropathy\n",
+      "</answer> \n",
+      "</think>\n",
+      "<analysis>\n",
+      "\n",
+      "This is a clinical vignette describing a 55-year-old man with burning and shooting pain in his feet and lower legs that worsens at night. He has a history of type 2 diabetes mellitus and hypertension. The description of the pain being burning and shooting in a symmetric distribution in the lower extremities, worsening at night, along with his history of diabetes, is most consistent with distal symmetric sensorimotor polyneuropathy. Autonomic neuropathy would not cause sensory symptoms. Isolated cranial nerve or peripheral nerve neuropathy would not explain the symmetric distribution. Radiculopathy would not explain the sensory symptoms.\n",
+      "</analysis>\n",
+      "<answer>\n",
+      "D: Distal symmetric sensorimotor polyneuropathy\n",
+      "</answer>\n"
+     ]
+    }
+   ],
+   "source": [
+    "question = dataset[100]['input']\n",
+    "question = question.replace(\"Q:\", \"\")\n",
+    "\n",
+    "inputs = tokenizer(\n",
+    "    [inference_prompt_style.format(question) + tokenizer.eos_token],\n",
+    "    return_tensors=\"pt\"\n",
+    ").to(\"cuda\")\n",
+    "\n",
+    "outputs = model.generate(\n",
+    "    input_ids=inputs.input_ids,\n",
+    "    attention_mask=inputs.attention_mask,\n",
+    "    max_new_tokens=1200,\n",
+    "    eos_token_id=tokenizer.eos_token_id,\n",
+    "    use_cache=True,\n",
+    ")\n",
+    "response = tokenizer.batch_decode(outputs, skip_special_tokens=True)\n",
+    "print(response[0].split(\"### Response:\")[1])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "<analysis>\n",
+      "\n",
+      "This patient has a history of type 2 diabetes mellitus and is experiencing burning and shooting pains in his feet and lower legs that are worse at night and have progressed over the past 6 months. This presentation is most consistent with distal symmetric sensorimotor polyneuropathy, a type of diabetic neuropathy that affects the distal extremities in a length-dependent pattern. Autonomic neuropathy, cranial nerve neuropathy, and radiculopathy would not explain the symmetric distal distribution. Isolated peripheral neuropathy would not be expected in the setting of longstanding diabetes.\n",
+      "</analysis>\n",
+      "<answer>\n",
+      "D: Distal symmetric sensorimotor polyneuropathy\n",
+      "</answer>\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(dataset[100]['output'])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Saving the model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "a83548e89c4743ab8d356440e50f5944",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "README.md:   0%|          | 0.00/5.90k [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Unrecognized keys in `rope_scaling` for 'rope_type'='yarn': {'attn_factor'}\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "7e13d58a863e4f8fba3bf430b3c906a6",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Uploading...:   0%|          | 0.00/698M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "3dbd46371b5549039c903a11d39953fb",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Uploading...:   0%|          | 0.00/11.4M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "No files have been modified since last commit. Skipping to prevent empty commit.\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "CommitInfo(commit_url='https://huggingface.co/kingabzpro/DeepSeek-R1-0528-Qwen3-8B-Medical-Reasoning/commit/93da153613461499f6f4fe7576689126adfd0a95', commit_message='Upload tokenizer', commit_description='', oid='93da153613461499f6f4fe7576689126adfd0a95', pr_url=None, repo_url=RepoUrl('https://huggingface.co/kingabzpro/DeepSeek-R1-0528-Qwen3-8B-Medical-Reasoning', endpoint='https://huggingface.co', repo_type='model', repo_id='kingabzpro/DeepSeek-R1-0528-Qwen3-8B-Medical-Reasoning'), pr_revision=None, pr_num=None)"
+      ]
+     },
+     "execution_count": 20,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "new_model_name = \"DeepSeek-R1-0528-Qwen3-8B-Medical-Reasoning\"\n",
+    "trainer.model.push_to_hub(new_model_name)\n",
+    "trainer.processing_class.push_to_hub(new_model_name)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Loading the Adopter and testing the model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "del model\n",
+    "del trainer\n",
+    "torch.cuda.empty_cache()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Unrecognized keys in `rope_scaling` for 'rope_type'='yarn': {'attn_factor'}\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "0c1fdc4ef0d3446e83fd86105230fe92",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "04e8fd839dc94247bfdfd47b36458ba2",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "adapter_config.json:   0%|          | 0.00/870 [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "559929904fc3436faff5b28ece76f727",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "adapter_model.safetensors:   0%|          | 0.00/698M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig\n",
+    "from peft import PeftModel\n",
+    "import torch\n",
+    "\n",
+    "# Base model\n",
+    "base_model_id = \"deepseek-ai/DeepSeek-R1-0528-Qwen3-8B\"\n",
+    "\n",
+    "# Your fine-tuned LoRA adapter repository\n",
+    "lora_adapter_id = \"kingabzpro/DeepSeek-R1-0528-Qwen3-8B-Medical-Reasoning\"\n",
+    "\n",
+    "# Load the model in 4-bit\n",
+    "bnb_config = BitsAndBytesConfig(\n",
+    "    load_in_4bit=True,\n",
+    "    bnb_4bit_use_double_quant=False,\n",
+    "    bnb_4bit_quant_type=\"nf4\",\n",
+    "    bnb_4bit_compute_dtype=torch.bfloat16,\n",
+    ")\n",
+    "\n",
+    "# Load base model\n",
+    "base_model = AutoModelForCausalLM.from_pretrained(\n",
+    "    base_model_id,\n",
+    "    device_map=\"auto\",\n",
+    "    torch_dtype=torch.bfloat16,\n",
+    "    quantization_config=bnb_config,\n",
+    "    trust_remote_code=True,\n",
+    ")\n",
+    "\n",
+    "# Attach the LoRA adapter\n",
+    "model = PeftModel.from_pretrained(\n",
+    "    base_model,\n",
+    "    lora_adapter_id,\n",
+    "    device_map=\"auto\",\n",
+    "    trust_remote_code=True,\n",
+    ")\n",
+    "\n",
+    "# Load tokenizer\n",
+    "tokenizer = AutoTokenizer.from_pretrained(base_model_id, trust_remote_code=True)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "<analysis>\n",
+      "\n",
+      "This is a question about evaluating the validity of a prospective cohort study design. The study looked at childhood diet and cardiovascular disease in adulthood. The peer reviewer was concerned about the study's validity. \n",
+      "\n",
+      "To address concerns about validity in a prospective cohort study, we need to consider potential confounding factors. The choices given are different statistical methods that can help control for confounding. \n",
+      "\n",
+      "Blinding and crossover designs are not applicable to a prospective cohort study. Matching and stratification can help control for confounding by balancing the distribution of confounders between groups. Randomization is the best way to minimize confounding by randomly assigning participants to different exposure groups.\n",
+      "</think>\n",
+      "<analysis>\n",
+      "This is a question about evaluating the validity of a prospective cohort study design. The study looked at childhood diet and cardiovascular disease in adulthood. The peer reviewer was concerned about the study's validity. \n",
+      "\n",
+      "To address concerns about validity in a prospective cohort study, we need to consider potential confounding factors. The choices given are different statistical methods that can help control for confounding. \n",
+      "\n",
+      "Blinding and crossover designs are not applicable to a prospective cohort study. Matching and stratification can help control for confounding by balancing the distribution of confounders between groups. Randomization is the best way to minimize confounding by randomly assigning participants to different exposure groups.\n",
+      "</analysis>\n",
+      "<answer>\n",
+      "E: Randomization\n",
+      "</answer>\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Inference example\n",
+    "prompt = \"\"\"\n",
+    "Please answer with one of the options in the bracket. Write reasoning in between <analysis></analysis>. Write the answer in between <answer></answer>.\n",
+    "\n",
+    "### Question:\n",
+    "A research group wants to assess the relationship between childhood diet and cardiovascular disease in adulthood.\n",
+    "A prospective cohort study of 500 children between 10 to 15 years of age is conducted in which the participants' diets are recorded for 1 year and then the patients are assessed 20 years later for the presence of cardiovascular disease.\n",
+    "A statistically significant association is found between childhood consumption of vegetables and decreased risk of hyperlipidemia and exercise tolerance.\n",
+    "When these findings are submitted to a scientific journal, a peer reviewer comments that the researchers did not discuss the study's validity.\n",
+    "Which of the following additional analyses would most likely address the concerns about this study's design? \n",
+    "{'A': 'Blinding', 'B': 'Crossover', 'C': 'Matching', 'D': 'Stratification', 'E': 'Randomization'},\n",
+    "### Response:\n",
+    "<analysis>\n",
+    "\n",
+    "\"\"\"\n",
+    "\n",
+    "inputs = tokenizer(\n",
+    "    [prompt + tokenizer.eos_token],\n",
+    "    return_tensors=\"pt\"\n",
+    ").to(\"cuda\")\n",
+    "\n",
+    "outputs = model.generate(\n",
+    "    input_ids=inputs.input_ids,\n",
+    "    attention_mask=inputs.attention_mask,\n",
+    "    max_new_tokens=1200,\n",
+    "    eos_token_id=tokenizer.eos_token_id,\n",
+    "    use_cache=True,\n",
+    ")\n",
+    "response = tokenizer.batch_decode(outputs, skip_special_tokens=True)\n",
+    "print(response[0].split(\"### Response:\")[1])"
+   ]
+  }
+ ],
+ "metadata": {
+  "kaggle": {
+   "accelerator": "nvidiaTeslaT4",
+   "dataSources": [
+    {
+     "modelId": 322000,
+     "modelInstanceId": 301517,
+     "sourceId": 363139,
+     "sourceType": "modelInstanceVersion"
+    }
+   ],
+   "dockerImageVersionId": 31011,
+   "isGpuEnabled": true,
+   "isInternetEnabled": true,
+   "language": "python",
+   "sourceType": "notebook"
+  },
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.10"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}