#!/usr/bin/env python3
"""
Fine-tuning script for SmolLM2-135M model using Unsloth.

This script demonstrates how to:
1. Install and configure Unsloth
2. Prepare and format training data
3. Configure and run the training process
4. Save and evaluate the model

To run this script:
1. Install dependencies: pip install -r requirements.txt
2. Run: python train.py
"""

import logging
import os
from datetime import datetime
from pathlib import Path
from typing import Union

import hydra
from omegaconf import DictConfig, OmegaConf

# isort: off
from unsloth import FastLanguageModel, FastModel, is_bfloat16_supported  # noqa: E402
from unsloth.chat_templates import get_chat_template  # noqa: E402

# isort: on

import os

import torch
from datasets import (
    Dataset,
    DatasetDict,
    IterableDataset,
    IterableDatasetDict,
    load_dataset,
)
from peft import PeftModel
from smolagents import CodeAgent, LiteLLMModel, Model, TransformersModel, VLLMModel
from smolagents.monitoring import LogLevel
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    DataCollatorForLanguageModeling,
    Trainer,
    TrainingArguments,
)
from trl import SFTTrainer

from tools.smart_search.tool import SmartSearchTool


# Setup logging
def setup_logging():
    """Configure logging for the training process."""
    # Create logs directory if it doesn't exist
    log_dir = Path("logs")
    log_dir.mkdir(exist_ok=True)

    # Create a unique log file name with timestamp
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    log_file = log_dir / f"training_{timestamp}.log"

    # Configure logging
    logging.basicConfig(
        level=logging.INFO,
        format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
        handlers=[logging.FileHandler(log_file), logging.StreamHandler()],
    )

    logger = logging.getLogger(__name__)
    logger.info(f"Logging initialized. Log file: {log_file}")
    return logger


logger = setup_logging()


def install_dependencies():
    """Install required dependencies."""
    logger.info("Installing dependencies...")
    try:
        os.system(
            'pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"'
        )
        os.system("pip install --no-deps xformers trl peft accelerate bitsandbytes")
        logger.info("Dependencies installed successfully")
    except Exception as e:
        logger.error(f"Error installing dependencies: {e}")
        raise


def load_model(cfg: DictConfig) -> tuple[FastLanguageModel, AutoTokenizer]:
    """Load and configure the model."""
    logger.info("Loading model and tokenizer...")
    try:
        model, tokenizer = FastModel.from_pretrained(
            model_name=cfg.model.name,
            max_seq_length=cfg.model.max_seq_length,
            dtype=cfg.model.dtype,
            load_in_4bit=cfg.model.load_in_4bit,
        )
        logger.info("Base model loaded successfully")

        # Configure LoRA
        model = FastModel.get_peft_model(
            model,
            r=cfg.peft.r,
            target_modules=cfg.peft.target_modules,
            lora_alpha=cfg.peft.lora_alpha,
            lora_dropout=cfg.peft.lora_dropout,
            bias=cfg.peft.bias,
            use_gradient_checkpointing=cfg.peft.use_gradient_checkpointing,
            random_state=cfg.peft.random_state,
            use_rslora=cfg.peft.use_rslora,
            loftq_config=cfg.peft.loftq_config,
        )
        logger.info("LoRA configuration applied successfully")

        return model, tokenizer
    except Exception as e:
        logger.error(f"Error loading model: {e}")
        raise


def load_and_format_dataset(
    tokenizer: AutoTokenizer,
    cfg: DictConfig,
) -> tuple[
    Union[DatasetDict, Dataset, IterableDatasetDict, IterableDataset], AutoTokenizer
]:
    """Load and format the training dataset."""
    logger.info("Loading and formatting dataset...")
    try:
        # Load the code-act dataset
        dataset = load_dataset("xingyaoww/code-act", split="codeact")
        logger.info(f"Dataset loaded successfully. Size: {len(dataset)} examples")

        # Split into train and validation sets
        dataset = dataset.train_test_split(
            test_size=cfg.dataset.validation_split, seed=cfg.dataset.seed
        )
        logger.info(
            f"Dataset split into train ({len(dataset['train'])} examples) and validation ({len(dataset['test'])} examples) sets"
        )

        # Configure chat template
        tokenizer = get_chat_template(
            tokenizer,
            chat_template="chatml",  # Supports zephyr, chatml, mistral, llama, alpaca, vicuna, vicuna_old, unsloth
            mapping={
                "role": "from",
                "content": "value",
                "user": "human",
                "assistant": "gpt",
            },  # ShareGPT style
            map_eos_token=True,  # Maps <|im_end|> to </s> instead
        )
        logger.info("Chat template configured successfully")

        def formatting_prompts_func(examples):
            convos = examples["conversations"]
            texts = [
                tokenizer.apply_chat_template(
                    convo, tokenize=False, add_generation_prompt=False
                )
                for convo in convos
            ]
            return {"text": texts}

        # Apply formatting to both train and validation sets
        dataset = DatasetDict(
            {
                "train": dataset["train"].map(formatting_prompts_func, batched=True),
                "validation": dataset["test"].map(
                    formatting_prompts_func, batched=True
                ),
            }
        )
        logger.info("Dataset formatting completed successfully")

        return dataset, tokenizer
    except Exception as e:
        logger.error(f"Error loading/formatting dataset: {e}")
        raise


def create_trainer(
    model: FastLanguageModel,
    tokenizer: AutoTokenizer,
    dataset: Union[DatasetDict, Dataset, IterableDatasetDict, IterableDataset],
    cfg: DictConfig,
) -> Trainer:
    """Create and configure the SFTTrainer."""
    logger.info("Creating trainer...")
    try:
        # Create TrainingArguments from config
        training_args_dict = OmegaConf.to_container(cfg.training.args, resolve=True)
        # Add dynamic precision settings
        training_args_dict.update(
            {
                "fp16": not is_bfloat16_supported(),
                "bf16": is_bfloat16_supported(),
            }
        )
        training_args = TrainingArguments(**training_args_dict)

        # Create data collator from config
        data_collator = DataCollatorForLanguageModeling(
            tokenizer=tokenizer,
            **cfg.training.sft.data_collator,
        )

        # Create SFT config without data_collator to avoid duplication
        sft_config = OmegaConf.to_container(cfg.training.sft, resolve=True)
        sft_config.pop("data_collator", None)  # Remove data_collator from config

        trainer = SFTTrainer(
            model=model,
            tokenizer=tokenizer,
            train_dataset=dataset["train"],
            eval_dataset=dataset["validation"],
            args=training_args,
            data_collator=data_collator,
            **sft_config,
        )
        logger.info("Trainer created successfully")
        return trainer
    except Exception as e:
        logger.error(f"Error creating trainer: {e}")
        raise


@hydra.main(version_base=None, config_path="conf", config_name="config")
def main(cfg: DictConfig) -> None:
    """Main training function."""
    try:
        logger.info("Starting training process...")
        logger.info(f"Configuration:\n{OmegaConf.to_yaml(cfg)}")

        # Install dependencies
        # install_dependencies()

        # Train if requested
        if cfg.train:
            # Load model and tokenizer
            model, tokenizer = load_model(cfg)

            # Load and prepare dataset
            dataset, tokenizer = load_and_format_dataset(tokenizer, cfg)

            # Create trainer
            trainer: Trainer = create_trainer(model, tokenizer, dataset, cfg)

            logger.info("Starting training...")
            trainer.train()

            # Save model
            logger.info(f"Saving final model to {cfg.output.dir}...")
            trainer.save_model(cfg.output.dir)

            # Save model in VLLM format
            logger.info("Saving model in VLLM format...")
            model.save_pretrained_merged(
                cfg.output.dir, tokenizer, save_method="merged_16bit"
            )

            # Print final metrics
            final_metrics = trainer.state.log_history[-1]
            logger.info("\nTraining completed!")
            logger.info(f"Final training loss: {final_metrics.get('loss', 'N/A')}")
            logger.info(
                f"Final validation loss: {final_metrics.get('eval_loss', 'N/A')}"
            )
        else:
            logger.info("Training skipped as train=False")

        # Test if requested
        if cfg.test:
            logger.info("\nStarting testing...")
            try:
                # Enable memory history tracking
                torch.cuda.memory._record_memory_history()

                # Set memory allocation configuration
                os.environ["PYTORCH_CUDA_ALLOC_CONF"] = (
                    "expandable_segments:True,max_split_size_mb:128"
                )

                # Load test dataset
                test_dataset = load_dataset(
                    cfg.test_dataset.name,
                    cfg.test_dataset.config,
                    split=cfg.test_dataset.split,
                    trust_remote_code=True,
                )
                logger.info(f"Loaded test dataset with {len(test_dataset)} examples")
                logger.info(f"Dataset features: {test_dataset.features}")

                # Clear CUDA cache before loading model
                torch.cuda.empty_cache()

                # Initialize model
                model: Model = LiteLLMModel(
                    api_base="http://localhost:8000/v1",
                    api_key="not-needed",
                    model_id=f"{cfg.model.provider}/{cfg.model.name}",
                    # model_id=cfg.model.name,
                    # model_id=cfg.output.dir,
                )

                # model: Model = TransformersModel(
                #     model_id=cfg.model.name,
                #     # model_id=cfg.output.dir,
                # )

                # model: Model = VLLMModel(
                #     model_id=cfg.model.name,
                #     # model_id=cfg.output.dir,
                # )

                # Create CodeAgent with SmartSearchTool
                agent = CodeAgent(
                    model=model,
                    tools=[SmartSearchTool()],
                    verbosity_level=LogLevel.ERROR,
                )

                # Format task to get succinct answer
                def format_task(question):
                    return f"""Please provide two answers to the following question:

1. A succinct answer that follows these rules:
   - Contains ONLY the answer, nothing else
   - Does not repeat the question
   - Does not include explanations, reasoning, or context
   - Does not include source attribution or references
   - Does not use phrases like "The answer is" or "I found that"
   - Does not include formatting, bullet points, or line breaks
   - If the answer is a number, return only the number
   - If the answer requires multiple items, separate them with commas
   - If the answer requires ordering, maintain the specified order
   - Uses the most direct and succinct form possible

2. A verbose answer that includes:
   - The complete answer with all relevant details
   - Explanations and reasoning
   - Context and background information
   - Source attribution where appropriate

Question: {question}

Please format your response as a JSON object with two keys:
- "succinct_answer": The concise answer following the rules above
- "verbose_answer": The detailed explanation with context"""

                # Run inference on test samples
                logger.info("Running inference on test samples...")
                for i, example in enumerate(test_dataset):
                    try:
                        # Clear CUDA cache before each sample
                        torch.cuda.empty_cache()

                        # Format the task
                        task = format_task(example["Question"])

                        # Run the agent
                        result = agent.run(
                            task=task,
                            max_steps=3,
                            reset=True,
                            stream=False,
                        )

                        # Parse the result
                        import json

                        json_str = result[result.find("{") : result.rfind("}") + 1]
                        parsed_result = json.loads(json_str)
                        answer = parsed_result["succinct_answer"]

                        logger.info(f"\nTest Sample {i+1}:")
                        logger.info(f"Question: {example['Question']}")
                        logger.info(f"Model Response: {answer}")
                        logger.info("-" * 80)

                        # Log memory usage after each sample
                        logger.info(f"Memory usage after sample {i+1}:")
                        logger.info(
                            f"Allocated: {torch.cuda.memory_allocated() / 1024**2:.2f} MB"
                        )
                        logger.info(
                            f"Reserved: {torch.cuda.memory_reserved() / 1024**2:.2f} MB"
                        )

                    except Exception as e:
                        logger.error(f"Error processing test sample {i+1}: {str(e)}")
                        continue

                # Dump memory snapshot for analysis
                torch.cuda.memory._dump_snapshot("memory_snapshot.pickle")
                logger.info("Memory snapshot saved to memory_snapshot.pickle")

            except Exception as e:
                logger.error(f"Error during testing: {e}")
                raise

    except Exception as e:
        logger.error(f"Error in main training process: {e}")
        raise


if __name__ == "__main__":
    main()

# uv run python train.py