|
import modal |
|
import os |
|
from pathlib import Path |
|
|
|
|
|
app = modal.App("qwen-law-finetuning") |
|
|
|
|
|
|
|
|
|
image = ( |
|
modal.Image.from_registry( |
|
"nvidia/cuda:12.1.0-devel-ubuntu22.04", |
|
add_python="3.10" |
|
) |
|
.apt_install(["git", "build-essential", "ninja-build"]) |
|
.pip_install("unsloth", "datasets") |
|
.pip_install("torch>=2.0.1", "transformers>=4.33.0") |
|
.pip_install("peft>=0.5.0", "trl>=0.7.1", "tensorboard") |
|
.pip_install("bitsandbytes>=0.41.1", "accelerate>=0.23.0") |
|
.pip_install("xformers>=0.0.21", "einops", "sentencepiece", "protobuf") |
|
.pip_install("flash-attn>=2.3.0") |
|
.add_local_dir(".", remote_path="/root/code") |
|
) |
|
|
|
|
|
image = image.add_local_dir(".", remote_path="/root/code") |
|
|
|
|
|
volume = modal.Volume.from_name("finetune-volume", create_if_missing=True) |
|
VOLUME_PATH = "/data" |
|
|
|
@app.function( |
|
image=image, |
|
gpu="A100-40GB", |
|
timeout=60 * 60 * 5, |
|
volumes={VOLUME_PATH: volume}, |
|
) |
|
def finetune_qwen(): |
|
import torch |
|
from datasets import load_dataset |
|
from unsloth import FastLanguageModel |
|
from transformers import TrainingArguments |
|
from trl import SFTTrainer |
|
import os |
|
|
|
|
|
os.chdir("/root/code") |
|
|
|
|
|
output_dir = os.path.join(VOLUME_PATH, "JurisQwen") |
|
os.makedirs(output_dir, exist_ok=True) |
|
|
|
print("Loading dataset...") |
|
|
|
ds = load_dataset("viber1/indian-law-dataset") |
|
|
|
|
|
def format_instruction(example): |
|
return { |
|
"text": f"<|im_start|>user\n{example['Instruction']}<|im_end|>\n<|im_start|>assistant\n{example['Response']}<|im_end|>" |
|
} |
|
|
|
|
|
formatted_ds = ds.map(format_instruction) |
|
train_dataset = formatted_ds["train"] |
|
|
|
|
|
max_seq_length = 4096 |
|
model_id = "Qwen/Qwen2.5-7B" |
|
|
|
print("Loading model...") |
|
|
|
model, tokenizer = FastLanguageModel.from_pretrained( |
|
model_id, |
|
max_seq_length=max_seq_length, |
|
load_in_4bit=True, |
|
attn_implementation="flash_attention_2", |
|
dtype=torch.bfloat16, |
|
) |
|
|
|
|
|
model = FastLanguageModel.get_peft_model( |
|
model, |
|
r=32, |
|
target_modules=["q_proj", "k_proj", "v_proj", "o_proj", |
|
"gate_proj", "up_proj", "down_proj"], |
|
lora_alpha=64, |
|
lora_dropout=0.05, |
|
bias="none", |
|
use_gradient_checkpointing="unsloth", |
|
) |
|
|
|
|
|
training_args = TrainingArguments( |
|
output_dir=os.path.join(VOLUME_PATH, "checkpoints"), |
|
num_train_epochs=3, |
|
per_device_train_batch_size=16, |
|
gradient_accumulation_steps=2, |
|
optim="adamw_8bit", |
|
learning_rate=2e-4, |
|
weight_decay=0.001, |
|
lr_scheduler_type="cosine", |
|
warmup_ratio=0.1, |
|
bf16=True, |
|
fp16=False, |
|
logging_steps=10, |
|
save_strategy="epoch", |
|
report_to="tensorboard", |
|
tf32=True, |
|
) |
|
|
|
print("Preparing trainer...") |
|
|
|
trainer = SFTTrainer( |
|
model=model, |
|
tokenizer=tokenizer, |
|
train_dataset=train_dataset, |
|
dataset_text_field="text", |
|
max_seq_length=max_seq_length, |
|
args=training_args, |
|
packing=True, |
|
) |
|
|
|
|
|
print("Starting training...") |
|
trainer.train() |
|
print("Training completed!") |
|
|
|
|
|
print(f"Saving model to {output_dir}") |
|
model.save_pretrained(output_dir) |
|
tokenizer.save_pretrained(output_dir) |
|
|
|
|
|
print("Testing inference...") |
|
FastLanguageModel.for_inference(model) |
|
test_prompt = "<|im_start|>user\nWhat are the key provisions of the Indian Contract Act?<|im_end|>" |
|
inputs = tokenizer([test_prompt], return_tensors="pt").to("cuda") |
|
outputs = model.generate(**inputs, max_new_tokens=512) |
|
print("Generated response:") |
|
print(tokenizer.decode(outputs[0])) |
|
|
|
return f"Model successfully trained and saved to {output_dir}" |
|
|
|
@app.function( |
|
image=image, |
|
gpu="A100-40GB", |
|
timeout=60 * 10, |
|
volumes={VOLUME_PATH: volume}, |
|
) |
|
def test_inference(prompt: str): |
|
from unsloth import FastLanguageModel |
|
import torch |
|
import os |
|
|
|
|
|
model_path = os.path.join(VOLUME_PATH, "JurisQwen") |
|
|
|
print(f"Loading model from {model_path}") |
|
model, tokenizer = FastLanguageModel.from_pretrained( |
|
model_path, |
|
max_seq_length=4096, |
|
attn_implementation="flash_attention_2", |
|
dtype=torch.bfloat16, |
|
) |
|
|
|
|
|
FastLanguageModel.for_inference(model) |
|
|
|
|
|
formatted_prompt = f"<|im_start|>user\n{prompt}<|im_end|>" |
|
inputs = tokenizer([formatted_prompt], return_tensors="pt").to("cuda") |
|
|
|
|
|
outputs = model.generate(**inputs, max_new_tokens=512) |
|
response = tokenizer.decode(outputs[0]) |
|
|
|
return response |
|
|
|
|
|
@app.local_entrypoint() |
|
def main(): |
|
print("Starting fine-tuning process...") |
|
app.deploy() |
|
result = finetune_qwen.remote() |
|
print(f"Fine-tuning result: {result}") |
|
|
|
|
|
if __name__ == "__main__": |
|
main() |