"""
Multi-Dataset Configuration for HarpoonNet Modular Training
Combines multiple drone datasets for comprehensive training
"""

import torch
import os
from pathlib import Path

class MultiDatasetConfig:
    """Configuration for training with multiple drone datasets"""
    
    # Base paths
    DOWNLOADS_PATH = os.path.expanduser("~/Downloads")
    PROJECT_ROOT = os.path.abspath(os.path.dirname(__file__))
    
    # Available datasets with their info
    DATASETS = {
        'drone_v1': {
            'path': os.path.join(DOWNLOADS_PATH, 'Drone.v1i.yolov8'),
            'name': 'Drone.v1i.yolov8',
            'description': 'Large drone dataset - 17K+ images',
            'priority': 1,  # Primary dataset
            'train_images': 15687,
            'valid_images': 1342,
            'test_images': 677
        },
        'drone_detection_v5': {
            'path': os.path.join(DOWNLOADS_PATH, 'droneDetection.v5i.yolov8'),
            'name': 'droneDetection.v5i.yolov8',
            'description': 'Drone detection dataset - 1.3K images',
            'priority': 2,
            'train_images': 1143,
            'valid_images': 109,
            'test_images': 54
        },
        'drone_detection_v5_2': {
            'path': os.path.join(DOWNLOADS_PATH, 'droneDetection.v5i (2).yolov8'),
            'name': 'droneDetection.v5i (2).yolov8',
            'description': 'Drone detection dataset v2 - 1.3K images',
            'priority': 3,
            'train_images': 1143,
            'valid_images': 109,
            'test_images': 54
        },
        'miranda_ot1': {
            'path': os.path.join(DOWNLOADS_PATH, 'Miranda OT1.v3i.yolov8'),
            'name': 'Miranda OT1.v3i.yolov8',
            'description': 'Miranda drone dataset - 933 images',
            'priority': 4,
            'train_images': 933,
            'valid_images': 0,  # No validation split
            'test_images': 0    # No test split
        },
        # Background (negative) dataset – smoke & clouds only (no labels)
        'smokecloud_v1': {
            'path': os.path.join(DOWNLOADS_PATH, 'Smoke_Cloud.v1i (2).yolov8'),
            'name': 'Smoke_Cloud.v1i (2).yolov8',
            'description': 'Smoke & cloud negative dataset - 5.5K images, label-free',
            'priority': 5,
            'is_negative_dataset': True,  # Flag to indicate this is for negative examples
            # Actual counts from the dataset
            'train_images': 4500,
            'valid_images': 639,
            'test_images': 326
        },
        
        # Additional negative datasets for reducing false positives
        'doors_v1': {
            'path': os.path.join(DOWNLOADS_PATH, 'door.v1i.yolov8'),
            'name': 'door.v1i.yolov8',
            'description': 'Door/wall negative dataset - 50 images, label-free',
            'priority': 6,
            'is_negative_dataset': True,
            'train_images': 35,
            'valid_images': 10,
            'test_images': 5
        },
        
        'birds_v1': {
            'path': os.path.join(DOWNLOADS_PATH, 'birds label.v1i.yolov8'),
            'name': 'birds label.v1i.yolov8',
            'description': 'Birds negative dataset - 100 images, label-free',
            'priority': 7,
            'is_negative_dataset': True,
            'train_images': 70,
            'valid_images': 20,
            'test_images': 10
        },
        
        'airplanes_v1': {
            'path': os.path.join(DOWNLOADS_PATH, 'Airplane detection.v1i.yolov8'),
            'name': 'Airplane detection.v1i.yolov8',
            'description': 'Airplane negative dataset - 4.5K images, label-free',
            'priority': 8,
            'is_negative_dataset': True,
            'train_images': 3200,
            'valid_images': 900,
            'test_images': 454
        }
    }
    
    # Validate dataset availability
    AVAILABLE_DATASETS = {}
    for key, dataset in DATASETS.items():
        if os.path.exists(dataset['path']):
            AVAILABLE_DATASETS[key] = dataset
            print(f"✅ Found dataset: {dataset['name']} ({dataset['description']})")
        else:
            print(f"❌ Missing dataset: {dataset['name']} at {dataset['path']}")
    
    # Calculate total dataset size
    TOTAL_TRAIN_IMAGES = sum(d['train_images'] for d in AVAILABLE_DATASETS.values())
    TOTAL_VALID_IMAGES = sum(d['valid_images'] for d in AVAILABLE_DATASETS.values())
    TOTAL_TEST_IMAGES = sum(d['test_images'] for d in AVAILABLE_DATASETS.values())
    TOTAL_IMAGES = TOTAL_TRAIN_IMAGES + TOTAL_VALID_IMAGES + TOTAL_TEST_IMAGES
    
    print(f"\n📊 Combined Dataset Stats:")
    print(f"   Train: {TOTAL_TRAIN_IMAGES:,} images")
    print(f"   Valid: {TOTAL_VALID_IMAGES:,} images") 
    print(f"   Test: {TOTAL_TEST_IMAGES:,} images")
    print(f"   Total: {TOTAL_IMAGES:,} images")
    
    # Model parameters for HarpoonNet Modular
    NUM_CLASSES = 1  # Single class: drone
    IMAGE_SIZE = 416  # Optimized for edge inference
    
    # Training parameters - optimized for large multi-dataset training
    BATCH_SIZE = 16      # Reduced due to larger dataset size
    EPOCHS = 50          # More epochs for comprehensive learning
    LEARNING_RATE = 0.001 # Conservative LR for stable multi-dataset training
    WEIGHT_DECAY = 0.0001
    
    # Loss parameters for HarpoonNet Modular (single-scale)
    LAMBDA_COORD = 5.0   # Coordinate loss weight
    LAMBDA_OBJ = 1.0     # Objectness loss weight  
    LAMBDA_NOOBJ = 0.5   # No-object loss weight
    LAMBDA_CLASS = 1.0   # Classification loss weight
    
    # Regularization
    DROPOUT_RATE = 0.1
    GRADIENT_CLIP_VALUE = 10.0
    
    # Device
    DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    
    # Inference parameters
    CONF_THRESHOLD = 0.25  # Confidence threshold
    NMS_THRESHOLD = 0.4    # NMS threshold
    MAX_DETECTIONS = 100   # Max detections per image
    
    # Data augmentation - balanced for multi-dataset training
    AUGMENTATION = True
    AUG_PARAMS = {
        'flip_lr': 0.5,      # Horizontal flip
        'flip_ud': 0.1,      # Vertical flip (minimal for drones)
        'rotation': 15,      # Max rotation degrees
        'scale': 0.2,        # Scale variation
        'brightness': 0.2,   # Brightness variation
        'contrast': 0.2,     # Contrast variation
        'saturation': 0.3,   # Saturation variation
        'hue': 0.01          # Minimal hue variation
    }
    
    # Checkpoints and logging
    CHECKPOINT_PATH = "checkpoints"
    EXPERIMENT_NAME = f"harpoon_multi_dataset_{TOTAL_IMAGES//1000}k"
    
    # Early stopping
    EARLY_STOPPING_PATIENCE = 10
    EARLY_STOPPING_MIN_DELTA = 0.001
    
    # Learning rate scheduler
    LR_SCHEDULER = 'reduce_on_plateau'
    LR_SCHEDULER_PATIENCE = 5
    LR_SCHEDULER_FACTOR = 0.5
    LR_SCHEDULER_MIN_LR = 1e-6
    
    @classmethod
    def get_dataset_paths(cls, dataset_key, split='train'):
        """Get paths for a specific dataset and split"""
        if dataset_key not in cls.AVAILABLE_DATASETS:
            raise ValueError(f"Dataset {dataset_key} not available")
        
        dataset = cls.AVAILABLE_DATASETS[dataset_key]
        base_path = dataset['path']
        
        if split == 'train':
            images_path = os.path.join(base_path, 'train', 'images')
            labels_path = os.path.join(base_path, 'train', 'labels')
        elif split == 'valid':
            images_path = os.path.join(base_path, 'valid', 'images')
            labels_path = os.path.join(base_path, 'valid', 'labels')
        elif split == 'test':
            images_path = os.path.join(base_path, 'test', 'images')
            labels_path = os.path.join(base_path, 'test', 'labels')
        else:
            raise ValueError(f"Invalid split: {split}")
        
        return images_path, labels_path
    
    @classmethod
    def get_all_dataset_paths(cls, split='train'):
        """Get paths for all available datasets for a specific split"""
        all_paths = []
        
        for dataset_key in cls.AVAILABLE_DATASETS.keys():
            try:
                images_path, labels_path = cls.get_dataset_paths(dataset_key, split)
                dataset_info = cls.AVAILABLE_DATASETS[dataset_key]
                is_negative = dataset_info.get('is_negative_dataset', False)
                
                # For negative datasets, only check images path (no labels needed)
                if is_negative:
                    if os.path.exists(images_path):
                        all_paths.append((images_path, labels_path, dataset_key))
                        print(f"✅ Added {split} path for {dataset_key} (negative dataset)")
                    else:
                        print(f"⚠️  Skipping {dataset_key} {split} - images path not found")
                else:
                    # For regular datasets, check both images and labels
                    if os.path.exists(images_path) and os.path.exists(labels_path):
                        all_paths.append((images_path, labels_path, dataset_key))
                        print(f"✅ Added {split} path for {dataset_key}")
                    else:
                        print(f"⚠️  Skipping {dataset_key} {split} - path not found")
            except ValueError as e:
                print(f"⚠️  Skipping {dataset_key} {split}: {e}")
        
        return all_paths
    
    @classmethod
    def print_config(cls):
        """Print the multi-dataset configuration"""
        print("\n" + "="*60)
        print("🚀 HarpoonNet Multi-Dataset Training Configuration")
        print("="*60)
        print(f"Available Datasets: {len(cls.AVAILABLE_DATASETS)}")
        
        for key, dataset in cls.AVAILABLE_DATASETS.items():
            print(f"  📁 {dataset['name']}")
            print(f"     Train: {dataset['train_images']:,} | Valid: {dataset['valid_images']:,} | Test: {dataset['test_images']:,}")
        
        print(f"\n📊 Combined Statistics:")
        print(f"   Total Images: {cls.TOTAL_IMAGES:,}")
        print(f"   Training Images: {cls.TOTAL_TRAIN_IMAGES:,}")
        print(f"   Validation Images: {cls.TOTAL_VALID_IMAGES:,}")
        
        print(f"\n🏗️ Architecture:")
        print(f"   Model: HarpoonNet Modular (EfficientNet-B0 + HarpoonHead)")
        print(f"   Input Size: {cls.IMAGE_SIZE}x{cls.IMAGE_SIZE}")
        print(f"   Classes: {cls.NUM_CLASSES} (drone detection)")
        
        print(f"\n🎯 Training Parameters:")
        print(f"   Batch Size: {cls.BATCH_SIZE}")
        print(f"   Epochs: {cls.EPOCHS}")
        print(f"   Learning Rate: {cls.LEARNING_RATE}")
        print(f"   Device: {cls.DEVICE}")
        print(f"   Experiment: {cls.EXPERIMENT_NAME}")
        
        print("="*60)


if __name__ == "__main__":
    # Test the configuration
    config = MultiDatasetConfig()
    config.print_config()
    
    # Test path retrieval
    print(f"\n🧪 Testing path retrieval:")
    train_paths = config.get_all_dataset_paths('train')
    print(f"Found {len(train_paths)} training dataset paths")
    
    valid_paths = config.get_all_dataset_paths('valid')
    print(f"Found {len(valid_paths)} validation dataset paths")