In [2]:
from unsloth import FastLanguageModel
import torch
max_seq_length = 8192 # Choose any! We auto support RoPE Scaling internally!
dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = False # Use 4bit quantization to reduce memory usage. Can be False.

model, tokenizer = FastLanguageModel.from_pretrained(
    # Can select any from the below:
    # "unsloth/Qwen2.5-0.5B", "unsloth/Qwen2.5-1.5B", "unsloth/Qwen2.5-3B"
    # "unsloth/Qwen2.5-14B",  "unsloth/Qwen2.5-32B",  "unsloth/Qwen2.5-72B",
    # And also all Instruct versions and Math. Coding verisons!
    model_name = "unsloth/Qwen2.5-1.5B-Instruct",
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
    # token = "hf_...", # use one if using gated models like meta-llama/Llama-2-7b-hf
)

ðŸ¦¥ Unsloth: Will patch your computer to enable 2x faster free finetuning.
ðŸ¦¥ Unsloth Zoo will now patch everything to make training faster!
==((====))==  Unsloth 2025.3.14: Fast Qwen2 patching. Transformers: 4.49.0.
   \\   /|    NVIDIA A100-PCIE-40GB. Num GPUs = 1. Max memory: 39.394 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.5.1+cu121. CUDA: 8.0. CUDA Toolkit: 12.1. Triton: 3.1.0
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.29.post1. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors:   0%|          | 0.00/3.09G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/270 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/7.36k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/2.78M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/1.67M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/605 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/614 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/11.4M [00:00<?, ?B/s]

In [3]:
model = FastLanguageModel.get_peft_model(
    model,
    r = 32, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 16,
    lora_dropout = 0, # Supports any, but = 0 is optimized
    bias = "none",    # Supports any, but = "none" is optimized
    # [NEW] "unsloth" uses 30% less VRAM, fits 2x larger batch sizes!
    use_gradient_checkpointing = "unsloth", # True or "unsloth" for very long context
    random_state = 3407,
    use_rslora = False,  # We support rank stabilized LoRA
    loftq_config = None, # And LoftQ
)

Unsloth 2025.3.14 patched 28 layers with 28 QKV layers, 28 O layers and 28 MLP layers.


In [4]:
from datasets import load_dataset, Dataset
import json

def convert_format(messages):
    """
    Convert a single entry from the 'messages' column to the target format.
    """
    # Create the target format with system prompt
    target_format = '<|im_start|>system\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\n'

    # Add each message
    for message in messages:
        role = message.get('role')
        content = message.get('content')

        if role and content:
            # Add formatted message
            target_format += f'<|im_start|>{role}\n{content}<|im_end|>\n'

    return target_format

def process_dataset():
    # Load the dataset from Hugging Face
    dataset = load_dataset("oumi-ai/MetaMathQA-R1")

    # Create new datasets dict to store the converted data
    converted_datasets = {}

    # Process each split in the dataset
    for split_name in dataset.keys():
        print(f"\nProcessing split: {split_name}")
        split = dataset[split_name]

        # Check if 'messages' column exists in this split
        if 'messages' in split.column_names:
            messages_list = split['messages']
            print(f"Number of examples in {split_name}: {len(messages_list)}")

            # Get the first 10000 examples (or all if less than 10000)
            num_examples = min(10000, len(messages_list))

            # Convert each messages entry and store in a list
            converted_texts = []
            for i in range(num_examples):
                # Convert this entry to the target format
                converted = convert_format(messages_list[i])
                converted_texts.append(converted)

                # Print progress
                if i % 1000 == 0:
                    print(f"Processed {i} examples")

            # Create a new dataset with a 'text' column containing the converted data
            converted_dataset = Dataset.from_dict({"text": converted_texts})
            converted_datasets[split_name] = converted_dataset

            print(f"Conversion complete for {split_name}, processed {num_examples} examples")
        else:
            print(f"'messages' column not found in {split_name} split")

    return converted_datasets

if __name__ == "__main__":
    # Process the dataset and get the converted datasets
    converted_data = process_dataset()

    # Example of accessing an element (if 'train' split exists)
    if 'train' in converted_data:
        print("\nExample of accessing converted data:")
        print("Dataset[0]['text']:")
        print(converted_data['train'][0]['text'][:500] + "...")  # Show first 500 chars

    # You can save the datasets if needed
    for split_name, dataset in converted_data.items():
        dataset.save_to_disk(f"converted_{split_name}")
        print(f"Saved {split_name} dataset to disk as 'converted_{split_name}'")

README.md:   0%|          | 0.00/5.60k [00:00<?, ?B/s]

Resolving data files:   0%|          | 0/40 [00:00<?, ?it/s]

Downloading data:   0%|          | 0/40 [00:00<?, ?files/s]

train-000-of-040.parquet:   0%|          | 0.00/59.5M [00:00<?, ?B/s]

train-001-of-040.parquet:   0%|          | 0.00/59.3M [00:00<?, ?B/s]

train-002-of-040.parquet:   0%|          | 0.00/60.7M [00:00<?, ?B/s]

train-003-of-040.parquet:   0%|          | 0.00/60.7M [00:00<?, ?B/s]

train-004-of-040.parquet:   0%|          | 0.00/60.3M [00:00<?, ?B/s]

train-005-of-040.parquet:   0%|          | 0.00/58.8M [00:00<?, ?B/s]

train-006-of-040.parquet:   0%|          | 0.00/60.9M [00:00<?, ?B/s]

train-007-of-040.parquet:   0%|          | 0.00/59.5M [00:00<?, ?B/s]

train-008-of-040.parquet:   0%|          | 0.00/60.7M [00:00<?, ?B/s]

train-009-of-040.parquet:   0%|          | 0.00/60.1M [00:00<?, ?B/s]

train-010-of-040.parquet:   0%|          | 0.00/60.2M [00:00<?, ?B/s]

train-011-of-040.parquet:   0%|          | 0.00/60.5M [00:00<?, ?B/s]

train-012-of-040.parquet:   0%|          | 0.00/59.4M [00:00<?, ?B/s]

train-013-of-040.parquet:   0%|          | 0.00/59.9M [00:00<?, ?B/s]

train-014-of-040.parquet:   0%|          | 0.00/60.9M [00:00<?, ?B/s]

train-015-of-040.parquet:   0%|          | 0.00/59.3M [00:00<?, ?B/s]

train-016-of-040.parquet:   0%|          | 0.00/60.3M [00:00<?, ?B/s]

train-017-of-040.parquet:   0%|          | 0.00/61.1M [00:00<?, ?B/s]

train-018-of-040.parquet:   0%|          | 0.00/60.4M [00:00<?, ?B/s]

train-019-of-040.parquet:   0%|          | 0.00/61.0M [00:00<?, ?B/s]

train-020-of-040.parquet:   0%|          | 0.00/60.1M [00:00<?, ?B/s]

train-021-of-040.parquet:   0%|          | 0.00/60.0M [00:00<?, ?B/s]

train-022-of-040.parquet:   0%|          | 0.00/60.5M [00:00<?, ?B/s]

train-023-of-040.parquet:   0%|          | 0.00/59.8M [00:00<?, ?B/s]

train-024-of-040.parquet:   0%|          | 0.00/61.0M [00:00<?, ?B/s]

train-025-of-040.parquet:   0%|          | 0.00/60.4M [00:00<?, ?B/s]

train-026-of-040.parquet:   0%|          | 0.00/60.0M [00:00<?, ?B/s]

train-027-of-040.parquet:   0%|          | 0.00/58.4M [00:00<?, ?B/s]

train-028-of-040.parquet:   0%|          | 0.00/60.7M [00:00<?, ?B/s]

train-029-of-040.parquet:   0%|          | 0.00/60.8M [00:00<?, ?B/s]

train-030-of-040.parquet:   0%|          | 0.00/60.5M [00:00<?, ?B/s]

train-031-of-040.parquet:   0%|          | 0.00/60.1M [00:00<?, ?B/s]

train-032-of-040.parquet:   0%|          | 0.00/61.0M [00:00<?, ?B/s]

train-033-of-040.parquet:   0%|          | 0.00/59.9M [00:00<?, ?B/s]

train-034-of-040.parquet:   0%|          | 0.00/61.0M [00:00<?, ?B/s]

train-035-of-040.parquet:   0%|          | 0.00/60.1M [00:00<?, ?B/s]

train-036-of-040.parquet:   0%|          | 0.00/59.2M [00:00<?, ?B/s]

train-037-of-040.parquet:   0%|          | 0.00/59.2M [00:00<?, ?B/s]

train-038-of-040.parquet:   0%|          | 0.00/61.0M [00:00<?, ?B/s]

train-039-of-040.parquet:   0%|          | 0.00/30.3M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/394995 [00:00<?, ? examples/s]


Processing split: train
Number of examples in train: 394995
Processed 0 examples
Processed 1000 examples
Processed 2000 examples
Processed 3000 examples
Processed 4000 examples
Processed 5000 examples
Processed 6000 examples
Processed 7000 examples
Processed 8000 examples
Processed 9000 examples
Conversion complete for train, processed 10000 examples

Example of accessing converted data:
Dataset[0]['text']:
<|im_start|>system
You are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>
<|im_start|>user
The cost of five pencils and one pen is $\$2.50$, and the cost of one pencil and two pens is $\$1.85$. What is the cost of two pencils and one pen?<|im_end|>
<|im_start|>assistant
<think>
Alright, let's try to solve this problem step by step. So, we have two equations here:

1. The cost of five pencils and one pen is $2.50.
2. The cost of one pencil and two pens is $1.85.

We need t...


Saving the dataset (0/1 shards):   0%|          | 0/10000 [00:00<?, ? examples/s]

Saved train dataset to disk as 'converted_train'


In [5]:
from trl import SFTTrainer
from transformers import TrainingArguments, DataCollatorForSeq2Seq
from unsloth import is_bfloat16_supported

trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = dataset,
    dataset_text_field = "text",
    max_seq_length = max_seq_length,
    data_collator = DataCollatorForSeq2Seq(tokenizer = tokenizer),
    dataset_num_proc = 2,
    packing = False, # Can make training 5x faster for short sequences.
    args = TrainingArguments(
        per_device_train_batch_size = 16,
        gradient_accumulation_steps = 4,
        warmup_steps = 100,
        num_train_epochs = 6, # Set this for 1 full training run.
        #max_steps = 60,
        learning_rate = 2e-4,
        fp16 = not is_bfloat16_supported(),
        bf16 = is_bfloat16_supported(),
        logging_steps = 50,
        save_steps = 100,  # Added checkpoint saving
        optim = "adamw_8bit",
        weight_decay = 0.01,
        lr_scheduler_type = "linear",
        seed = 3407,
        output_dir = "outputs",
        report_to = "none", # Use this for WandB etc
    ),
)

Unsloth: Tokenizing ["text"] (num_proc=2):   0%|          | 0/10000 [00:00<?, ? examples/s]

In [6]:
from unsloth.chat_templates import train_on_responses_only
trainer = train_on_responses_only(
    trainer,
    instruction_part = "<|im_start|>user\n",
    response_part = "<|im_start|>assistant\n",
)

Map (num_proc=128):   0%|          | 0/10000 [00:00<?, ? examples/s]

In [7]:
tokenizer.decode(trainer.train_dataset[0]["input_ids"])

'<|im_start|>system\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\n<|im_start|>user\nThe cost of five pencils and one pen is $\\$2.50$, and the cost of one pencil and two pens is $\\$1.85$. What is the cost of two pencils and one pen?<|im_end|>\n<|im_start|>assistant\n<think>\nAlright, let\'s try to solve this problem step by step. So, we have two equations here:\n\n1. The cost of five pencils and one pen is $2.50.\n2. The cost of one pencil and two pens is $1.85.\n\nWe need to find the cost of two pencils and one pen. Hmm, okay. So first off, let me assign variables to these things to make it easier. Let\'s say the cost of one pencil is "p" dollars, and the cost of one pen is "n" dollars. Then we can translate the given information into equations.\n\nFirst equation: Five pencils and one pen cost $2.50. That would be 5 times p plus 1 times n equals 2.50. So, mathematically, that\'s:\n\n5p + n = 2.50\n\nSecond equation: One pencil and two pens cost $1.8

In [8]:
space = tokenizer(" ", add_special_tokens = False).input_ids[0]
tokenizer.decode([space if x == -100 else x for x in trainer.train_dataset[0]["labels"]])

'                                                                         <think>\nAlright, let\'s try to solve this problem step by step. So, we have two equations here:\n\n1. The cost of five pencils and one pen is $2.50.\n2. The cost of one pencil and two pens is $1.85.\n\nWe need to find the cost of two pencils and one pen. Hmm, okay. So first off, let me assign variables to these things to make it easier. Let\'s say the cost of one pencil is "p" dollars, and the cost of one pen is "n" dollars. Then we can translate the given information into equations.\n\nFirst equation: Five pencils and one pen cost $2.50. That would be 5 times p plus 1 times n equals 2.50. So, mathematically, that\'s:\n\n5p + n = 2.50\n\nSecond equation: One pencil and two pens cost $1.85. That translates to 1 times p plus 2 times n equals 1.85. So:\n\np + 2n = 1.85\n\nOkay, so now we have a system of two equations with two variables. We need to solve for p and n, and once we have those, we can figure out what 2

In [9]:
trainer_stats = trainer.train()

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 10,000 | Num Epochs = 6 | Total steps = 936
O^O/ \_/ \    Batch size per device = 16 | Gradient accumulation steps = 4
\        /    Data Parallel GPUs = 1 | Total batch size (16 x 4 x 1) = 64
 "-____-"     Trainable parameters = 36,929,536/1,580,643,840 (2.34% trained)


Unsloth: Will smartly offload gradients to save VRAM!


Step,Training Loss
50,1.074
100,0.9146
150,0.8534
200,0.8235
250,0.826
300,0.831
350,0.811
400,0.8053
450,0.8033
500,0.7903


In [None]:
from huggingface_hub import login
login()

In [10]:
model.save_pretrained("lora_model") # Local saving
tokenizer.save_pretrained("lora_model")
# model.push_to_hub("your_name/lora_model", token = "...") # Online saving
# tokenizer.push_to_hub("your_name/lora_model", token = "...") # Online saving

('lora_model/tokenizer_config.json',
 'lora_model/special_tokens_map.json',
 'lora_model/vocab.json',
 'lora_model/merges.txt',
 'lora_model/added_tokens.json',
 'lora_model/tokenizer.json')