import gradio as gr import os from huggingface_hub import HfApi, login from datasets import load_dataset from transformers import ( AutoModelForVision2Seq, AutoProcessor, Seq2SeqTrainer, Seq2SeqTrainingArguments, ) import torch # -------------------------- # CONFIGURATION # -------------------------- MODEL_NAME = "reducto/RolmOCR" OUTPUT_DIR = "./rolmocr-finetuned" HF_MODEL_REPO = "HiteshKamwal/Rolmft" # 🔴 change this to your HF repo # Make sure HF token is set as environment variable in Spaces (Settings -> Variables) HF_TOKEN = os.getenv("HF_TOKEN") # -------------------------- # DATASET LOADING + PREPROCESS # -------------------------- def load_and_preprocess_dataset(dataset_name="mamun1113/doctors-handwritten-prescription-bd-dataset"): """ Loads dataset from Hugging Face hub. Must contain image + text. """ dataset = load_dataset(dataset_name) processor = AutoProcessor.from_pretrained(MODEL_NAME) def preprocess(batch): images = [processor.image_processor(image.convert("RGB"), return_tensors="pt").pixel_values[0] for image in batch["image"]] labels = processor.tokenizer( batch["text"], padding="max_length", truncation=True, max_length=128, return_tensors="pt" ).input_ids return {"pixel_values": images, "labels": labels} dataset = dataset.map(preprocess, batched=True) return dataset, processor # -------------------------- # TRAINING FUNCTION # -------------------------- def train_model(dataset_name, epochs, lr, batch_size): dataset, processor = load_and_preprocess_dataset(dataset_name) model = AutoModelForVision2Seq.from_pretrained(MODEL_NAME) training_args = Seq2SeqTrainingArguments( output_dir=OUTPUT_DIR, per_device_train_batch_size=batch_size, gradient_accumulation_steps=2, num_train_epochs=epochs, learning_rate=lr, save_strategy="epoch", evaluation_strategy="epoch", fp16=True if torch.cuda.is_available() else False, logging_dir="./logs", push_to_hub=True, hub_model_id=HF_MODEL_REPO, hub_token=HF_TOKEN, ) trainer = Seq2SeqTrainer( model=model, args=training_args, train_dataset=dataset["train"], eval_dataset=dataset["test"] if "test" in dataset else dataset["validation"], tokenizer=processor.tokenizer, ) trainer.train() trainer.save_model(OUTPUT_DIR) processor.save_pretrained(OUTPUT_DIR) # push final model api = HfApi() api.upload_folder( folder_path=OUTPUT_DIR, repo_id=HF_MODEL_REPO, repo_type="model", token=HF_TOKEN, ) return f"✅ Training complete. Model pushed to {HF_MODEL_REPO}" # -------------------------- # GRADIO UI # -------------------------- def gradio_train(dataset_name, epochs, lr, batch_size): result = train_model(dataset_name, int(epochs), float(lr), int(batch_size)) return result with gr.Blocks() as demo: gr.Markdown("## 🚀 Fine-tune RolmOCR on Doctor Handwriting Dataset") with gr.Row(): dataset_name = gr.Textbox( value="mamun1113/doctors-handwritten-prescription-bd-dataset", label="HF Dataset name (must include image+text fields)", ) with gr.Row(): epochs = gr.Number(value=3, label="Epochs") lr = gr.Number(value=2e-5, label="Learning Rate") batch_size = gr.Number(value=2, label="Batch Size (per device)") output = gr.Textbox(label="Training Status") btn = gr.Button("Start Training") btn.click(gradio_train, [dataset_name, epochs, lr, batch_size], output) demo.launch()